solrizer 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,54 +0,0 @@
1
-
2
- require 'active-fedora'
3
-
4
-
5
- module Solrizer
6
- class Repository
7
-
8
- #
9
- # This method initializes the fedora repository and solr instance
10
- #
11
- def initialize_repository
12
- Fedora::Repository.register( FEDORA_URL )
13
- ActiveFedora::SolrService.register( FEDORA_SOLR_URL )
14
- end
15
-
16
- #
17
- # This method retrieves a comprehensive list of unique ids in the fedora repository
18
- #
19
- def self.get_pids( num_docs )
20
- solr_results = ActiveFedora::SolrService.instance.conn.query( "active_fedora_model_field:Document", { :rows => num_docs } )
21
- id_array = []
22
- solr_results.hits.each do |hit|
23
- id_array << hit[SOLR_DOCUMENT_ID]
24
- end
25
- return id_array
26
- end
27
-
28
- #
29
- # This method retrieves the object associated with the given unique id
30
- #
31
- def self.get_object( pid )
32
- object = ActiveFedora::Base.load_instance( pid )
33
- end
34
-
35
- #
36
- # This method retrieves a comprehensive list of datastreams for the given object
37
- #
38
- def self.get_datastreams( obj )
39
- ds_keys = obj.datastreams.keys
40
- end
41
-
42
- #
43
- # This method retrieves the datastream for the given object with the given datastream name
44
- #
45
- def self.get_datastream( obj, ds_name )
46
- begin
47
- obj.datastreams[ ds_name ]
48
- rescue
49
- return nil
50
- end
51
- end
52
-
53
- end
54
- end
@@ -1,8 +0,0 @@
1
- <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
2
- <rdf:Description rdf:about="info:fedora/demo:multipurpose-objects-model_and_sdef">
3
- <hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/fedora-system:ContentModel-3.0"/>
4
- <hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/afmodel:SaltDocument"/>
5
- <hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/afmodel:JP2Document"/>
6
- <hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/afmodel:DCDocument"/>
7
- </rdf:Description>
8
- </rdf:RDF>
@@ -1,16 +0,0 @@
1
- id: id
2
- date: _date
3
- string: _field
4
- text: _field
5
- symbol: _field
6
- integer: _field
7
- long: _field
8
- boolean: _field
9
- float: _field
10
- double: _field
11
- facet: _facet
12
- display: _display
13
- sort: _sort
14
- unstemmed_search: _unstem_search
15
-
16
-
@@ -1,18 +0,0 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
- require 'solrizer'
3
-
4
- describe Solrizer::Indexer do
5
-
6
- before(:each) do
7
- @indexer = Solrizer::Indexer.new
8
- end
9
-
10
- describe "index" do
11
- it "should update solr with the metadata from the given object" do
12
- pending "Got to decide if/how to handle fixtures in this gem. Probably should just mock out Fedora & Solr entirely."
13
- obj = Solrizer::Repository.get_object( "druid:sb733gr4073" )
14
- @indexer.index( obj )
15
- end
16
- end
17
-
18
- end
@@ -1,127 +0,0 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
- require 'solrizer'
3
-
4
- describe Solrizer::Indexer do
5
-
6
- before(:each) do
7
- Solrizer::Indexer.any_instance.stubs(:connect).returns("foo")
8
-
9
- @extractor = mock("Extractor")
10
- @extractor.stubs(:html_content_to_solr).returns(@solr_doc)
11
- # @solr_doc = mock('solr_doc')
12
- # @solr_doc.stubs(:<<)
13
- # @solr_doc.stubs(:[])
14
-
15
- @solr_doc = Solr::Document.new
16
-
17
- Solrizer::Extractor.expects(:new).returns(@extractor)
18
- @indexer = Solrizer::Indexer.new
19
-
20
- end
21
-
22
- describe "#generate_dates" do
23
- it "should still give 9999-99-99 date if the solr document does not have a date_t field" do
24
-
25
- solr_result = @indexer.generate_dates(@solr_doc)
26
- solr_result.should be_kind_of Solr::Document
27
- solr_result[:date_t].should == "9999-99-99"
28
- solr_result[:month_facet].should == "99"
29
- solr_result[:day_facet].should == '99'
30
-
31
- end
32
-
33
- it "should still give 9999-99-99 date if the solr_doc[:date_t] is not valid date in YYYY-MM-DD format " do
34
-
35
- @solr_doc << Solr::Field.new(:date_t => "Unknown")
36
- solr_result = @indexer.generate_dates(@solr_doc)
37
- solr_result.should be_kind_of Solr::Document
38
- solr_result[:date_t].should == "Unknown"
39
- solr_result[:month_facet].should == "99"
40
- solr_result[:day_facet].should == '99'
41
-
42
- end
43
-
44
- it "should give month and dates even if the :date_t is not a valid date but is in YYYY-MM-DD format " do
45
-
46
- @solr_doc << Solr::Field.new(:date_t => "0000-13-11")
47
- solr_result = @indexer.generate_dates(@solr_doc)
48
- solr_result.should be_kind_of Solr::Document
49
- solr_result[:date_t].should == "0000-13-11"
50
- solr_result[:month_facet].should == "99"
51
- solr_result[:day_facet].should == '11'
52
- end
53
-
54
- it "should give month and day when in a valid date format" do
55
- @solr_doc << Solr::Field.new(:date_t => "1978-04-11")
56
- solr_result = @indexer.generate_dates(@solr_doc)
57
- solr_result.should be_kind_of Solr::Document
58
- solr_result[:date_t].should == "1978-04-11"
59
- solr_result[:month_facet].should == "04"
60
- solr_result[:day_facet].should == '11'
61
-
62
- end
63
-
64
- it "should still give two digit strings even if the month/day is single digit" do
65
-
66
- @solr_doc << Solr::Field.new(:date_t => "1978-4-1")
67
- solr_result = @indexer.generate_dates(@solr_doc)
68
- solr_result.should be_kind_of Solr::Document
69
- solr_result[:date_t].should == "1978-4-1"
70
- solr_result[:month_facet].should == "04"
71
- solr_result[:day_facet].should == '01'
72
-
73
- end
74
-
75
- end
76
-
77
-
78
-
79
- describe "#solrize" do
80
- it "should convert a hash to a solr doc" do
81
- example_hash = {"box"=>"Box 51A", "city"=>["Ann Arbor", "Hyderabad", "Palo Alto"], "person"=>["ELLIE ENGELMORE", "Reddy", "EDWARD FEIGENBAUM"], "title"=>"Letter from Ellie Engelmore to Professor K. C. Reddy", "series"=>"eaf7000", "folder"=>"Folder 15", "technology"=>["artificial intelligence"], "year"=>"1985", "organization"=>["Heuristic Programming Project", "Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder", "Professor K. C. Reddy School of Mathematics and Computer/Information Sciences"], "collection"=>"e-a-feigenbaum-collection", "state"=>["Michigan", "California"]}
82
-
83
- example_result = Solrizer::Indexer.solrize( example_hash )
84
- example_result.should be_kind_of Solr::Document
85
- example_hash.each_pair do |key,values|
86
- if values.class == String
87
- example_result["#{key}_facet"].should == values
88
- else
89
- values.each do |v|
90
- example_result.inspect.include?("@name=\"#{key}_facet\"").should be_true
91
- example_result.inspect.include?("@value=\"#{v}\"").should be_true
92
- end
93
- end
94
- end
95
- end
96
-
97
- it "should handle hashes with facets listed in a sub-hash" do
98
- simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
99
- result = Solrizer::Indexer.solrize( simple_hash )
100
- result.should be_kind_of Solr::Document
101
- result["technology_facet"].should == "t1"
102
- result.inspect.include?('@boost=nil').should be_true
103
- result.inspect.include?('@name="technology_facet"').should be_true
104
- result.inspect.include?('@value="t2"').should be_true
105
- result["company_facet"].should == "c1"
106
- result["person_facet"].should == "p1"
107
- result.inspect.include?('@name="person_facet"').should be_true
108
- result.inspect.include?('@value="p2"').should be_true
109
-
110
- end
111
-
112
- it "should create symbols from the :symbols subhash" do
113
- simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}, :symbols=>{'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
114
- result = Solrizer::Indexer.solrize( simple_hash )
115
- result.should be_kind_of Solr::Document
116
- result["technology_s"].should == "t1"
117
- result.inspect.include?('@name="technology_s"').should be_true
118
- result.inspect.include?('@value="t2"').should be_true
119
-
120
- result["company_s"].should == "c1"
121
- result["person_s"].should == "p1"
122
- result.inspect.include?('@name="person_s"').should be_true
123
- result.inspect.include?('@value="p2"').should be_true
124
-
125
- end
126
- end
127
- end
@@ -1,42 +0,0 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
-
3
- describe Solrizer::Solrizer do
4
-
5
- before(:each) do
6
- @solrizer = Solrizer::Solrizer.new
7
- end
8
-
9
- describe "solrize" do
10
- it "should trigger the indexer for the provided object" do
11
- sample_obj = ActiveFedora::Base.new
12
- @solrizer.indexer.expects(:index).with( sample_obj )
13
- @solrizer.solrize( sample_obj )
14
- end
15
- it "should work with Fedora::FedoraObject objects" do
16
- mock_object = Fedora::FedoraObject.new(:pid=>"my:pid", :label=>"my label")
17
- ActiveFedora::Base.expects(:load_instance).with( mock_object.pid ).returns(mock_object)
18
- @solrizer.indexer.expects(:index).with( mock_object )
19
- @solrizer.solrize( mock_object )
20
- end
21
- it "should load the object if only a pid is provided" do
22
- mock_object = mock("my object")
23
- mock_object.stubs(:pid)
24
- mock_object.stubs(:label)
25
- mock_object.stubs(:datastreams).returns({'descMetadata'=>"foo","location"=>"bar"})
26
-
27
- ActiveFedora::Base.expects(:load_instance).with( "_PID_" ).returns(mock_object)
28
- @solrizer.indexer.expects(:index).with(mock_object)
29
- @solrizer.solrize("_PID_")
30
- end
31
-
32
- end
33
-
34
- describe "solrize_objects" do
35
- it "should call solrize for each object returned by Fedora::Repository.find_objects" do
36
- objects = [["pid1"], ["pid2"], ["pid3"]]
37
- Fedora::Repository.any_instance.expects(:find_objects).returns(objects)
38
- objects.each {|object| @solrizer.expects(:solrize).with( object ) }
39
- @solrizer.solrize_objects
40
- end
41
- end
42
- end