solrizer 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,54 +0,0 @@
1
-
2
- require 'active-fedora'
3
-
4
-
5
- module Solrizer
6
- class Repository
7
-
8
- #
9
- # This method initializes the fedora repository and solr instance
10
- #
11
- def initialize_repository
12
- Fedora::Repository.register( FEDORA_URL )
13
- ActiveFedora::SolrService.register( FEDORA_SOLR_URL )
14
- end
15
-
16
- #
17
- # This method retrieves a comprehensive list of unique ids in the fedora repository
18
- #
19
- def self.get_pids( num_docs )
20
- solr_results = ActiveFedora::SolrService.instance.conn.query( "active_fedora_model_field:Document", { :rows => num_docs } )
21
- id_array = []
22
- solr_results.hits.each do |hit|
23
- id_array << hit[SOLR_DOCUMENT_ID]
24
- end
25
- return id_array
26
- end
27
-
28
- #
29
- # This method retrieves the object associated with the given unique id
30
- #
31
- def self.get_object( pid )
32
- object = ActiveFedora::Base.load_instance( pid )
33
- end
34
-
35
- #
36
- # This method retrieves a comprehensive list of datastreams for the given object
37
- #
38
- def self.get_datastreams( obj )
39
- ds_keys = obj.datastreams.keys
40
- end
41
-
42
- #
43
- # This method retrieves the datastream for the given object with the given datastream name
44
- #
45
- def self.get_datastream( obj, ds_name )
46
- begin
47
- obj.datastreams[ ds_name ]
48
- rescue
49
- return nil
50
- end
51
- end
52
-
53
- end
54
- end
@@ -1,8 +0,0 @@
1
- <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
2
- <rdf:Description rdf:about="info:fedora/demo:multipurpose-objects-model_and_sdef">
3
- <hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/fedora-system:ContentModel-3.0"/>
4
- <hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/afmodel:SaltDocument"/>
5
- <hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/afmodel:JP2Document"/>
6
- <hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/afmodel:DCDocument"/>
7
- </rdf:Description>
8
- </rdf:RDF>
@@ -1,16 +0,0 @@
1
- id: id
2
- date: _date
3
- string: _field
4
- text: _field
5
- symbol: _field
6
- integer: _field
7
- long: _field
8
- boolean: _field
9
- float: _field
10
- double: _field
11
- facet: _facet
12
- display: _display
13
- sort: _sort
14
- unstemmed_search: _unstem_search
15
-
16
-
@@ -1,18 +0,0 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
- require 'solrizer'
3
-
4
- describe Solrizer::Indexer do
5
-
6
- before(:each) do
7
- @indexer = Solrizer::Indexer.new
8
- end
9
-
10
- describe "index" do
11
- it "should update solr with the metadata from the given object" do
12
- pending "Got to decide if/how to handle fixtures in this gem. Probably should just mock out Fedora & Solr entirely."
13
- obj = Solrizer::Repository.get_object( "druid:sb733gr4073" )
14
- @indexer.index( obj )
15
- end
16
- end
17
-
18
- end
@@ -1,127 +0,0 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
- require 'solrizer'
3
-
4
- describe Solrizer::Indexer do
5
-
6
- before(:each) do
7
- Solrizer::Indexer.any_instance.stubs(:connect).returns("foo")
8
-
9
- @extractor = mock("Extractor")
10
- @extractor.stubs(:html_content_to_solr).returns(@solr_doc)
11
- # @solr_doc = mock('solr_doc')
12
- # @solr_doc.stubs(:<<)
13
- # @solr_doc.stubs(:[])
14
-
15
- @solr_doc = Solr::Document.new
16
-
17
- Solrizer::Extractor.expects(:new).returns(@extractor)
18
- @indexer = Solrizer::Indexer.new
19
-
20
- end
21
-
22
- describe "#generate_dates" do
23
- it "should still give 9999-99-99 date if the solr document does not have a date_t field" do
24
-
25
- solr_result = @indexer.generate_dates(@solr_doc)
26
- solr_result.should be_kind_of Solr::Document
27
- solr_result[:date_t].should == "9999-99-99"
28
- solr_result[:month_facet].should == "99"
29
- solr_result[:day_facet].should == '99'
30
-
31
- end
32
-
33
- it "should still give 9999-99-99 date if the solr_doc[:date_t] is not valid date in YYYY-MM-DD format " do
34
-
35
- @solr_doc << Solr::Field.new(:date_t => "Unknown")
36
- solr_result = @indexer.generate_dates(@solr_doc)
37
- solr_result.should be_kind_of Solr::Document
38
- solr_result[:date_t].should == "Unknown"
39
- solr_result[:month_facet].should == "99"
40
- solr_result[:day_facet].should == '99'
41
-
42
- end
43
-
44
- it "should give month and dates even if the :date_t is not a valid date but is in YYYY-MM-DD format " do
45
-
46
- @solr_doc << Solr::Field.new(:date_t => "0000-13-11")
47
- solr_result = @indexer.generate_dates(@solr_doc)
48
- solr_result.should be_kind_of Solr::Document
49
- solr_result[:date_t].should == "0000-13-11"
50
- solr_result[:month_facet].should == "99"
51
- solr_result[:day_facet].should == '11'
52
- end
53
-
54
- it "should give month and day when in a valid date format" do
55
- @solr_doc << Solr::Field.new(:date_t => "1978-04-11")
56
- solr_result = @indexer.generate_dates(@solr_doc)
57
- solr_result.should be_kind_of Solr::Document
58
- solr_result[:date_t].should == "1978-04-11"
59
- solr_result[:month_facet].should == "04"
60
- solr_result[:day_facet].should == '11'
61
-
62
- end
63
-
64
- it "should still give two digit strings even if the month/day is single digit" do
65
-
66
- @solr_doc << Solr::Field.new(:date_t => "1978-4-1")
67
- solr_result = @indexer.generate_dates(@solr_doc)
68
- solr_result.should be_kind_of Solr::Document
69
- solr_result[:date_t].should == "1978-4-1"
70
- solr_result[:month_facet].should == "04"
71
- solr_result[:day_facet].should == '01'
72
-
73
- end
74
-
75
- end
76
-
77
-
78
-
79
- describe "#solrize" do
80
- it "should convert a hash to a solr doc" do
81
- example_hash = {"box"=>"Box 51A", "city"=>["Ann Arbor", "Hyderabad", "Palo Alto"], "person"=>["ELLIE ENGELMORE", "Reddy", "EDWARD FEIGENBAUM"], "title"=>"Letter from Ellie Engelmore to Professor K. C. Reddy", "series"=>"eaf7000", "folder"=>"Folder 15", "technology"=>["artificial intelligence"], "year"=>"1985", "organization"=>["Heuristic Programming Project", "Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder", "Professor K. C. Reddy School of Mathematics and Computer/Information Sciences"], "collection"=>"e-a-feigenbaum-collection", "state"=>["Michigan", "California"]}
82
-
83
- example_result = Solrizer::Indexer.solrize( example_hash )
84
- example_result.should be_kind_of Solr::Document
85
- example_hash.each_pair do |key,values|
86
- if values.class == String
87
- example_result["#{key}_facet"].should == values
88
- else
89
- values.each do |v|
90
- example_result.inspect.include?("@name=\"#{key}_facet\"").should be_true
91
- example_result.inspect.include?("@value=\"#{v}\"").should be_true
92
- end
93
- end
94
- end
95
- end
96
-
97
- it "should handle hashes with facets listed in a sub-hash" do
98
- simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
99
- result = Solrizer::Indexer.solrize( simple_hash )
100
- result.should be_kind_of Solr::Document
101
- result["technology_facet"].should == "t1"
102
- result.inspect.include?('@boost=nil').should be_true
103
- result.inspect.include?('@name="technology_facet"').should be_true
104
- result.inspect.include?('@value="t2"').should be_true
105
- result["company_facet"].should == "c1"
106
- result["person_facet"].should == "p1"
107
- result.inspect.include?('@name="person_facet"').should be_true
108
- result.inspect.include?('@value="p2"').should be_true
109
-
110
- end
111
-
112
- it "should create symbols from the :symbols subhash" do
113
- simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}, :symbols=>{'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
114
- result = Solrizer::Indexer.solrize( simple_hash )
115
- result.should be_kind_of Solr::Document
116
- result["technology_s"].should == "t1"
117
- result.inspect.include?('@name="technology_s"').should be_true
118
- result.inspect.include?('@value="t2"').should be_true
119
-
120
- result["company_s"].should == "c1"
121
- result["person_s"].should == "p1"
122
- result.inspect.include?('@name="person_s"').should be_true
123
- result.inspect.include?('@value="p2"').should be_true
124
-
125
- end
126
- end
127
- end
@@ -1,42 +0,0 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
-
3
- describe Solrizer::Solrizer do
4
-
5
- before(:each) do
6
- @solrizer = Solrizer::Solrizer.new
7
- end
8
-
9
- describe "solrize" do
10
- it "should trigger the indexer for the provided object" do
11
- sample_obj = ActiveFedora::Base.new
12
- @solrizer.indexer.expects(:index).with( sample_obj )
13
- @solrizer.solrize( sample_obj )
14
- end
15
- it "should work with Fedora::FedoraObject objects" do
16
- mock_object = Fedora::FedoraObject.new(:pid=>"my:pid", :label=>"my label")
17
- ActiveFedora::Base.expects(:load_instance).with( mock_object.pid ).returns(mock_object)
18
- @solrizer.indexer.expects(:index).with( mock_object )
19
- @solrizer.solrize( mock_object )
20
- end
21
- it "should load the object if only a pid is provided" do
22
- mock_object = mock("my object")
23
- mock_object.stubs(:pid)
24
- mock_object.stubs(:label)
25
- mock_object.stubs(:datastreams).returns({'descMetadata'=>"foo","location"=>"bar"})
26
-
27
- ActiveFedora::Base.expects(:load_instance).with( "_PID_" ).returns(mock_object)
28
- @solrizer.indexer.expects(:index).with(mock_object)
29
- @solrizer.solrize("_PID_")
30
- end
31
-
32
- end
33
-
34
- describe "solrize_objects" do
35
- it "should call solrize for each object returned by Fedora::Repository.find_objects" do
36
- objects = [["pid1"], ["pid2"], ["pid3"]]
37
- Fedora::Repository.any_instance.expects(:find_objects).returns(objects)
38
- objects.each {|object| @solrizer.expects(:solrize).with( object ) }
39
- @solrizer.solrize_objects
40
- end
41
- end
42
- end