solrizer 0.1.0.pre2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2 @@
1
+ --exclude "spec/*,gems/*"
2
+ --rails
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,16 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'solrizer'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ config.mock_with :mocha
10
+
11
+
12
+ def fixture(file)
13
+ File.new(File.join(File.dirname(__FILE__), 'fixtures', file))
14
+ end
15
+
16
+ end
@@ -0,0 +1,50 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'solrizer'
3
+
4
+ describe Solrizer::Extractor do
5
+
6
+ before(:each) do
7
+ @extractor = Solrizer::Extractor.new
8
+ end
9
+
10
+ describe ".xml_to_solr" do
11
+ it "should turn simple xml into a solr document" do
12
+ desc_meta = fixture("druid-bv448hq0314-descMetadata.xml")
13
+ result = @extractor.xml_to_solr(desc_meta)
14
+ result[:type_t].should == "text"
15
+ result[:medium_t].should == "Paper Document"
16
+ result[:rights_t].should == "Presumed under copyright. Do not publish."
17
+ result[:date_t].should == "1985-12-30"
18
+ result[:format_t].should == "application/tiff"
19
+ result[:title_t].should == "This is a Sample Title"
20
+ result[:publisher_t].should == "Sample Unversity"
21
+
22
+ # ... and a hacky way of making sure that it added a field for each of the dc:medium values
23
+ result.inspect.include?('@value="application/tiff"').should be_true
24
+ result.inspect.include?('@value="application/pdf"').should be_true
25
+ end
26
+ end
27
+
28
+ describe "extract_rels_ext" do
29
+ it "should extract the content model of the RELS-EXT datastream of a Fedora object and set hydra_type using hydra_types mapping" do
30
+ rels_ext = fixture("rels_ext_cmodel.xml")
31
+ result = @extractor.extract_rels_ext( rels_ext )
32
+ result[:cmodel_t].should == "info:fedora/fedora-system:ContentModel-3.0"
33
+ result[:hydra_type_t].should == "salt_document"
34
+
35
+ # ... and a hacky way of making sure that it added a field for each of the dc:medium values
36
+ result.inspect.include?('@value="info:fedora/afmodel:SaltDocument"').should be_true
37
+ result.inspect.include?('@value="jp2_document"').should be_true
38
+ end
39
+ end
40
+
41
+ describe "extract_hydra_types" do
42
+ it "should extract the hydra_type of a Fedora object" do
43
+ rels_ext = fixture("rels_ext_cmodel.xml")
44
+ result = @extractor.extract_rels_ext( rels_ext )
45
+ result[:hydra_type_t].should == "salt_document"
46
+ end
47
+ end
48
+
49
+
50
+ end
@@ -0,0 +1,127 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'solrizer'
3
+
4
+ describe Solrizer::Indexer do
5
+
6
+ before(:each) do
7
+ Solrizer::Indexer.any_instance.stubs(:connect).returns("foo")
8
+
9
+ @extractor = mock("Extractor")
10
+ @extractor.stubs(:html_content_to_solr).returns(@solr_doc)
11
+ # @solr_doc = mock('solr_doc')
12
+ # @solr_doc.stubs(:<<)
13
+ # @solr_doc.stubs(:[])
14
+
15
+ @solr_doc = Solr::Document.new
16
+
17
+ Solrizer::Extractor.expects(:new).returns(@extractor)
18
+ @indexer = Solrizer::Indexer.new
19
+
20
+ end
21
+
22
+ describe "#generate_dates" do
23
+ it "should still give 9999-99-99 date if the solr document does not have a date_t field" do
24
+
25
+ solr_result = @indexer.generate_dates(@solr_doc)
26
+ solr_result.should be_kind_of Solr::Document
27
+ solr_result[:date_t].should == "9999-99-99"
28
+ solr_result[:month_facet].should == "99"
29
+ solr_result[:day_facet].should == '99'
30
+
31
+ end
32
+
33
+ it "should still give 9999-99-99 date if the solr_doc[:date_t] is not valid date in YYYY-MM-DD format " do
34
+
35
+ @solr_doc << Solr::Field.new(:date_t => "Unknown")
36
+ solr_result = @indexer.generate_dates(@solr_doc)
37
+ solr_result.should be_kind_of Solr::Document
38
+ solr_result[:date_t].should == "Unknown"
39
+ solr_result[:month_facet].should == "99"
40
+ solr_result[:day_facet].should == '99'
41
+
42
+ end
43
+
44
+ it "should give month and dates even if the :date_t is not a valid date but is in YYYY-MM-DD format " do
45
+
46
+ @solr_doc << Solr::Field.new(:date_t => "0000-13-11")
47
+ solr_result = @indexer.generate_dates(@solr_doc)
48
+ solr_result.should be_kind_of Solr::Document
49
+ solr_result[:date_t].should == "0000-13-11"
50
+ solr_result[:month_facet].should == "99"
51
+ solr_result[:day_facet].should == '11'
52
+ end
53
+
54
+ it "should give month and day when in a valid date format" do
55
+ @solr_doc << Solr::Field.new(:date_t => "1978-04-11")
56
+ solr_result = @indexer.generate_dates(@solr_doc)
57
+ solr_result.should be_kind_of Solr::Document
58
+ solr_result[:date_t].should == "1978-04-11"
59
+ solr_result[:month_facet].should == "04"
60
+ solr_result[:day_facet].should == '11'
61
+
62
+ end
63
+
64
+ it "should still give two digit strings even if the month/day is single digit" do
65
+
66
+ @solr_doc << Solr::Field.new(:date_t => "1978-4-1")
67
+ solr_result = @indexer.generate_dates(@solr_doc)
68
+ solr_result.should be_kind_of Solr::Document
69
+ solr_result[:date_t].should == "1978-4-1"
70
+ solr_result[:month_facet].should == "04"
71
+ solr_result[:day_facet].should == '01'
72
+
73
+ end
74
+
75
+ end
76
+
77
+
78
+
79
+ describe "#solrize" do
80
+ it "should convert a hash to a solr doc" do
81
+ example_hash = {"box"=>"Box 51A", "city"=>["Ann Arbor", "Hyderabad", "Palo Alto"], "person"=>["ELLIE ENGELMORE", "Reddy", "EDWARD FEIGENBAUM"], "title"=>"Letter from Ellie Engelmore to Professor K. C. Reddy", "series"=>"eaf7000", "folder"=>"Folder 15", "technology"=>["artificial intelligence"], "year"=>"1985", "organization"=>["Heuristic Programming Project", "Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder", "Professor K. C. Reddy School of Mathematics and Computer/Information Sciences"], "collection"=>"e-a-feigenbaum-collection", "state"=>["Michigan", "California"]}
82
+
83
+ example_result = Solrizer::Indexer.solrize( example_hash )
84
+ example_result.should be_kind_of Solr::Document
85
+ example_hash.each_pair do |key,values|
86
+ if values.class == String
87
+ example_result["#{key}_facet"].should == values
88
+ else
89
+ values.each do |v|
90
+ example_result.inspect.include?("@name=\"#{key}_facet\"").should be_true
91
+ example_result.inspect.include?("@value=\"#{v}\"").should be_true
92
+ end
93
+ end
94
+ end
95
+ end
96
+
97
+ it "should handle hashes with facets listed in a sub-hash" do
98
+ simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
99
+ result = Solrizer::Indexer.solrize( simple_hash )
100
+ result.should be_kind_of Solr::Document
101
+ result["technology_facet"].should == "t1"
102
+ result.inspect.include?('@boost=nil').should be_true
103
+ result.inspect.include?('@name="technology_facet"').should be_true
104
+ result.inspect.include?('@value="t2"').should be_true
105
+ result["company_facet"].should == "c1"
106
+ result["person_facet"].should == "p1"
107
+ result.inspect.include?('@name="person_facet"').should be_true
108
+ result.inspect.include?('@value="p2"').should be_true
109
+
110
+ end
111
+
112
+ it "should create symbols from the :symbols subhash" do
113
+ simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}, :symbols=>{'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
114
+ result = Solrizer::Indexer.solrize( simple_hash )
115
+ result.should be_kind_of Solr::Document
116
+ result["technology_s"].should == "t1"
117
+ result.inspect.include?('@name="technology_s"').should be_true
118
+ result.inspect.include?('@value="t2"').should be_true
119
+
120
+ result["company_s"].should == "c1"
121
+ result["person_s"].should == "p1"
122
+ result.inspect.include?('@name="person_s"').should be_true
123
+ result.inspect.include?('@value="p2"').should be_true
124
+
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,42 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'solrizer'
3
+
4
+ describe Solrizer::Solrizer do
5
+
6
+ before(:each) do
7
+ @solrizer = Solrizer::Solrizer.new
8
+ end
9
+
10
+ describe "solrize" do
11
+ it "should trigger the indexer for the provided object" do
12
+ # sample_obj = ActiveFedora::Base.new
13
+ mock_object = mock("my object")
14
+ mock_object.expects(:kind_of?).with(ActiveFedora::Base).returns(true)
15
+ mock_object.stubs(:pid)
16
+ mock_object.stubs(:label)
17
+ mock_object.stubs(:datastreams).returns({'descMetadata'=>"foo","location"=>"bar"})
18
+ ActiveFedora::Base.expects(:load_instance).never
19
+ @solrizer.indexer.expects(:index).with( mock_object )
20
+ @solrizer.solrize( mock_object )
21
+ end
22
+ it "should still load the object if only a pid is provided" do
23
+ mock_object = mock("my object")
24
+ mock_object.stubs(:pid)
25
+ mock_object.stubs(:label)
26
+ mock_object.stubs(:datastreams).returns({'descMetadata'=>"foo","location"=>"bar"})
27
+
28
+ ActiveFedora::Base.expects(:load_instance).with( "_PID_" ).returns(mock_object)
29
+ @solrizer.indexer.expects(:index).with(mock_object)
30
+ @solrizer.solrize("_PID_")
31
+ end
32
+ end
33
+
34
+ describe "solrize_objects" do
35
+ it "should call solrize for each pid returned by solr" do
36
+ pids = [["pid1"], ["pid2"], ["pid3"]]
37
+ Solrizer::Repository.expects(:get_pids).returns(pids)
38
+ pids.each {|pid| @solrizer.expects(:solrize).with( pid ) }
39
+ @solrizer.solrize_objects
40
+ end
41
+ end
42
+ end
metadata ADDED
@@ -0,0 +1,126 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: solrizer
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: true
5
+ segments:
6
+ - 0
7
+ - 1
8
+ - 0
9
+ - pre2
10
+ version: 0.1.0.pre2
11
+ platform: ruby
12
+ authors:
13
+ - Matt Zumwalt
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-05-15 00:00:00 -05:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: active-fedora
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ requirements:
26
+ - - ">"
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 1
30
+ - 1
31
+ - 3
32
+ version: 1.1.3
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ name: rspec
37
+ prerelease: false
38
+ requirement: &id002 !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ segments:
43
+ - 1
44
+ - 2
45
+ - 9
46
+ version: 1.2.9
47
+ type: :development
48
+ version_requirements: *id002
49
+ description: Use solrizer to populate solr indexes from Fedora repository content or from other sources. You can run solrizer from within your apps, using the provided rake tasks, or as a JMS listener
50
+ email: matt.zumwalt@yourmediashelf.com
51
+ executables: []
52
+
53
+ extensions: []
54
+
55
+ extra_rdoc_files:
56
+ - LICENSE
57
+ - README.rdoc
58
+ files:
59
+ - .gitignore
60
+ - LICENSE
61
+ - README.rdoc
62
+ - Rakefile
63
+ - VERSION
64
+ - config/fedora.yml
65
+ - config/hydra_types.yml
66
+ - config/solr.yml
67
+ - lib/solrizer.rb
68
+ - lib/solrizer/configuration.rb
69
+ - lib/solrizer/extractor.rb
70
+ - lib/solrizer/indexer.rb
71
+ - lib/solrizer/main.rb
72
+ - lib/solrizer/replicator.rb
73
+ - lib/solrizer/repository.rb
74
+ - lib/tasks/solrizer.rake
75
+ - solrizer.gemspec
76
+ - spec/fixtures/druid-bv448hq0314-descMetadata.xml
77
+ - spec/fixtures/druid-bv448hq0314-extProperties.xml
78
+ - spec/fixtures/druid-cm234kq4672-extProperties.xml
79
+ - spec/fixtures/druid-cm234kq4672-stories.xml
80
+ - spec/fixtures/druid-hc513kw4806-descMetadata.xml
81
+ - spec/fixtures/rels_ext_cmodel.xml
82
+ - spec/integration/indexer_spec.rb
83
+ - spec/rcov.opts
84
+ - spec/spec.opts
85
+ - spec/spec_helper.rb
86
+ - spec/units/extractor_spec.rb
87
+ - spec/units/indexer_spec.rb
88
+ - spec/units/shelver_spec.rb
89
+ has_rdoc: true
90
+ homepage: http://github.com/projecthydra/solrizer
91
+ licenses: []
92
+
93
+ post_install_message:
94
+ rdoc_options:
95
+ - --charset=UTF-8
96
+ require_paths:
97
+ - lib
98
+ required_ruby_version: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ segments:
103
+ - 0
104
+ version: "0"
105
+ required_rubygems_version: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ">"
108
+ - !ruby/object:Gem::Version
109
+ segments:
110
+ - 1
111
+ - 3
112
+ - 1
113
+ version: 1.3.1
114
+ requirements: []
115
+
116
+ rubyforge_project:
117
+ rubygems_version: 1.3.6
118
+ signing_key:
119
+ specification_version: 3
120
+ summary: A utility for building solr indexes, usually from Fedora repository content.
121
+ test_files:
122
+ - spec/integration/indexer_spec.rb
123
+ - spec/spec_helper.rb
124
+ - spec/units/extractor_spec.rb
125
+ - spec/units/indexer_spec.rb
126
+ - spec/units/shelver_spec.rb