solrizer 0.1.0.pre2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,2 @@
1
+ --exclude "spec/*,gems/*"
2
+ --rails
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,16 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'solrizer'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ config.mock_with :mocha
10
+
11
+
12
+ def fixture(file)
13
+ File.new(File.join(File.dirname(__FILE__), 'fixtures', file))
14
+ end
15
+
16
+ end
@@ -0,0 +1,50 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'solrizer'
3
+
4
+ describe Solrizer::Extractor do
5
+
6
+ before(:each) do
7
+ @extractor = Solrizer::Extractor.new
8
+ end
9
+
10
+ describe ".xml_to_solr" do
11
+ it "should turn simple xml into a solr document" do
12
+ desc_meta = fixture("druid-bv448hq0314-descMetadata.xml")
13
+ result = @extractor.xml_to_solr(desc_meta)
14
+ result[:type_t].should == "text"
15
+ result[:medium_t].should == "Paper Document"
16
+ result[:rights_t].should == "Presumed under copyright. Do not publish."
17
+ result[:date_t].should == "1985-12-30"
18
+ result[:format_t].should == "application/tiff"
19
+ result[:title_t].should == "This is a Sample Title"
20
+ result[:publisher_t].should == "Sample Unversity"
21
+
22
+ # ... and a hacky way of making sure that it added a field for each of the dc:medium values
23
+ result.inspect.include?('@value="application/tiff"').should be_true
24
+ result.inspect.include?('@value="application/pdf"').should be_true
25
+ end
26
+ end
27
+
28
+ describe "extract_rels_ext" do
29
+ it "should extract the content model of the RELS-EXT datastream of a Fedora object and set hydra_type using hydra_types mapping" do
30
+ rels_ext = fixture("rels_ext_cmodel.xml")
31
+ result = @extractor.extract_rels_ext( rels_ext )
32
+ result[:cmodel_t].should == "info:fedora/fedora-system:ContentModel-3.0"
33
+ result[:hydra_type_t].should == "salt_document"
34
+
35
+ # ... and a hacky way of making sure that it added a field for each of the dc:medium values
36
+ result.inspect.include?('@value="info:fedora/afmodel:SaltDocument"').should be_true
37
+ result.inspect.include?('@value="jp2_document"').should be_true
38
+ end
39
+ end
40
+
41
+ describe "extract_hydra_types" do
42
+ it "should extract the hydra_type of a Fedora object" do
43
+ rels_ext = fixture("rels_ext_cmodel.xml")
44
+ result = @extractor.extract_rels_ext( rels_ext )
45
+ result[:hydra_type_t].should == "salt_document"
46
+ end
47
+ end
48
+
49
+
50
+ end
@@ -0,0 +1,127 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'solrizer'
3
+
4
+ describe Solrizer::Indexer do
5
+
6
+ before(:each) do
7
+ Solrizer::Indexer.any_instance.stubs(:connect).returns("foo")
8
+
9
+ @extractor = mock("Extractor")
10
+ @extractor.stubs(:html_content_to_solr).returns(@solr_doc)
11
+ # @solr_doc = mock('solr_doc')
12
+ # @solr_doc.stubs(:<<)
13
+ # @solr_doc.stubs(:[])
14
+
15
+ @solr_doc = Solr::Document.new
16
+
17
+ Solrizer::Extractor.expects(:new).returns(@extractor)
18
+ @indexer = Solrizer::Indexer.new
19
+
20
+ end
21
+
22
+ describe "#generate_dates" do
23
+ it "should still give 9999-99-99 date if the solr document does not have a date_t field" do
24
+
25
+ solr_result = @indexer.generate_dates(@solr_doc)
26
+ solr_result.should be_kind_of Solr::Document
27
+ solr_result[:date_t].should == "9999-99-99"
28
+ solr_result[:month_facet].should == "99"
29
+ solr_result[:day_facet].should == '99'
30
+
31
+ end
32
+
33
+ it "should still give 9999-99-99 date if the solr_doc[:date_t] is not valid date in YYYY-MM-DD format " do
34
+
35
+ @solr_doc << Solr::Field.new(:date_t => "Unknown")
36
+ solr_result = @indexer.generate_dates(@solr_doc)
37
+ solr_result.should be_kind_of Solr::Document
38
+ solr_result[:date_t].should == "Unknown"
39
+ solr_result[:month_facet].should == "99"
40
+ solr_result[:day_facet].should == '99'
41
+
42
+ end
43
+
44
+ it "should give month and dates even if the :date_t is not a valid date but is in YYYY-MM-DD format " do
45
+
46
+ @solr_doc << Solr::Field.new(:date_t => "0000-13-11")
47
+ solr_result = @indexer.generate_dates(@solr_doc)
48
+ solr_result.should be_kind_of Solr::Document
49
+ solr_result[:date_t].should == "0000-13-11"
50
+ solr_result[:month_facet].should == "99"
51
+ solr_result[:day_facet].should == '11'
52
+ end
53
+
54
+ it "should give month and day when in a valid date format" do
55
+ @solr_doc << Solr::Field.new(:date_t => "1978-04-11")
56
+ solr_result = @indexer.generate_dates(@solr_doc)
57
+ solr_result.should be_kind_of Solr::Document
58
+ solr_result[:date_t].should == "1978-04-11"
59
+ solr_result[:month_facet].should == "04"
60
+ solr_result[:day_facet].should == '11'
61
+
62
+ end
63
+
64
+ it "should still give two digit strings even if the month/day is single digit" do
65
+
66
+ @solr_doc << Solr::Field.new(:date_t => "1978-4-1")
67
+ solr_result = @indexer.generate_dates(@solr_doc)
68
+ solr_result.should be_kind_of Solr::Document
69
+ solr_result[:date_t].should == "1978-4-1"
70
+ solr_result[:month_facet].should == "04"
71
+ solr_result[:day_facet].should == '01'
72
+
73
+ end
74
+
75
+ end
76
+
77
+
78
+
79
+ describe "#solrize" do
80
+ it "should convert a hash to a solr doc" do
81
+ example_hash = {"box"=>"Box 51A", "city"=>["Ann Arbor", "Hyderabad", "Palo Alto"], "person"=>["ELLIE ENGELMORE", "Reddy", "EDWARD FEIGENBAUM"], "title"=>"Letter from Ellie Engelmore to Professor K. C. Reddy", "series"=>"eaf7000", "folder"=>"Folder 15", "technology"=>["artificial intelligence"], "year"=>"1985", "organization"=>["Heuristic Programming Project", "Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder", "Professor K. C. Reddy School of Mathematics and Computer/Information Sciences"], "collection"=>"e-a-feigenbaum-collection", "state"=>["Michigan", "California"]}
82
+
83
+ example_result = Solrizer::Indexer.solrize( example_hash )
84
+ example_result.should be_kind_of Solr::Document
85
+ example_hash.each_pair do |key,values|
86
+ if values.class == String
87
+ example_result["#{key}_facet"].should == values
88
+ else
89
+ values.each do |v|
90
+ example_result.inspect.include?("@name=\"#{key}_facet\"").should be_true
91
+ example_result.inspect.include?("@value=\"#{v}\"").should be_true
92
+ end
93
+ end
94
+ end
95
+ end
96
+
97
+ it "should handle hashes with facets listed in a sub-hash" do
98
+ simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
99
+ result = Solrizer::Indexer.solrize( simple_hash )
100
+ result.should be_kind_of Solr::Document
101
+ result["technology_facet"].should == "t1"
102
+ result.inspect.include?('@boost=nil').should be_true
103
+ result.inspect.include?('@name="technology_facet"').should be_true
104
+ result.inspect.include?('@value="t2"').should be_true
105
+ result["company_facet"].should == "c1"
106
+ result["person_facet"].should == "p1"
107
+ result.inspect.include?('@name="person_facet"').should be_true
108
+ result.inspect.include?('@value="p2"').should be_true
109
+
110
+ end
111
+
112
+ it "should create symbols from the :symbols subhash" do
113
+ simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}, :symbols=>{'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
114
+ result = Solrizer::Indexer.solrize( simple_hash )
115
+ result.should be_kind_of Solr::Document
116
+ result["technology_s"].should == "t1"
117
+ result.inspect.include?('@name="technology_s"').should be_true
118
+ result.inspect.include?('@value="t2"').should be_true
119
+
120
+ result["company_s"].should == "c1"
121
+ result["person_s"].should == "p1"
122
+ result.inspect.include?('@name="person_s"').should be_true
123
+ result.inspect.include?('@value="p2"').should be_true
124
+
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,42 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'solrizer'
3
+
4
+ describe Solrizer::Solrizer do
5
+
6
+ before(:each) do
7
+ @solrizer = Solrizer::Solrizer.new
8
+ end
9
+
10
+ describe "solrize" do
11
+ it "should trigger the indexer for the provided object" do
12
+ # sample_obj = ActiveFedora::Base.new
13
+ mock_object = mock("my object")
14
+ mock_object.expects(:kind_of?).with(ActiveFedora::Base).returns(true)
15
+ mock_object.stubs(:pid)
16
+ mock_object.stubs(:label)
17
+ mock_object.stubs(:datastreams).returns({'descMetadata'=>"foo","location"=>"bar"})
18
+ ActiveFedora::Base.expects(:load_instance).never
19
+ @solrizer.indexer.expects(:index).with( mock_object )
20
+ @solrizer.solrize( mock_object )
21
+ end
22
+ it "should still load the object if only a pid is provided" do
23
+ mock_object = mock("my object")
24
+ mock_object.stubs(:pid)
25
+ mock_object.stubs(:label)
26
+ mock_object.stubs(:datastreams).returns({'descMetadata'=>"foo","location"=>"bar"})
27
+
28
+ ActiveFedora::Base.expects(:load_instance).with( "_PID_" ).returns(mock_object)
29
+ @solrizer.indexer.expects(:index).with(mock_object)
30
+ @solrizer.solrize("_PID_")
31
+ end
32
+ end
33
+
34
+ describe "solrize_objects" do
35
+ it "should call solrize for each pid returned by solr" do
36
+ pids = [["pid1"], ["pid2"], ["pid3"]]
37
+ Solrizer::Repository.expects(:get_pids).returns(pids)
38
+ pids.each {|pid| @solrizer.expects(:solrize).with( pid ) }
39
+ @solrizer.solrize_objects
40
+ end
41
+ end
42
+ end
metadata ADDED
@@ -0,0 +1,126 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: solrizer
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: true
5
+ segments:
6
+ - 0
7
+ - 1
8
+ - 0
9
+ - pre2
10
+ version: 0.1.0.pre2
11
+ platform: ruby
12
+ authors:
13
+ - Matt Zumwalt
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-05-15 00:00:00 -05:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: active-fedora
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ requirements:
26
+ - - ">"
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 1
30
+ - 1
31
+ - 3
32
+ version: 1.1.3
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ name: rspec
37
+ prerelease: false
38
+ requirement: &id002 !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ segments:
43
+ - 1
44
+ - 2
45
+ - 9
46
+ version: 1.2.9
47
+ type: :development
48
+ version_requirements: *id002
49
+ description: Use solrizer to populate solr indexes from Fedora repository content or from other sources. You can run solrizer from within your apps, using the provided rake tasks, or as a JMS listener
50
+ email: matt.zumwalt@yourmediashelf.com
51
+ executables: []
52
+
53
+ extensions: []
54
+
55
+ extra_rdoc_files:
56
+ - LICENSE
57
+ - README.rdoc
58
+ files:
59
+ - .gitignore
60
+ - LICENSE
61
+ - README.rdoc
62
+ - Rakefile
63
+ - VERSION
64
+ - config/fedora.yml
65
+ - config/hydra_types.yml
66
+ - config/solr.yml
67
+ - lib/solrizer.rb
68
+ - lib/solrizer/configuration.rb
69
+ - lib/solrizer/extractor.rb
70
+ - lib/solrizer/indexer.rb
71
+ - lib/solrizer/main.rb
72
+ - lib/solrizer/replicator.rb
73
+ - lib/solrizer/repository.rb
74
+ - lib/tasks/solrizer.rake
75
+ - solrizer.gemspec
76
+ - spec/fixtures/druid-bv448hq0314-descMetadata.xml
77
+ - spec/fixtures/druid-bv448hq0314-extProperties.xml
78
+ - spec/fixtures/druid-cm234kq4672-extProperties.xml
79
+ - spec/fixtures/druid-cm234kq4672-stories.xml
80
+ - spec/fixtures/druid-hc513kw4806-descMetadata.xml
81
+ - spec/fixtures/rels_ext_cmodel.xml
82
+ - spec/integration/indexer_spec.rb
83
+ - spec/rcov.opts
84
+ - spec/spec.opts
85
+ - spec/spec_helper.rb
86
+ - spec/units/extractor_spec.rb
87
+ - spec/units/indexer_spec.rb
88
+ - spec/units/shelver_spec.rb
89
+ has_rdoc: true
90
+ homepage: http://github.com/projecthydra/solrizer
91
+ licenses: []
92
+
93
+ post_install_message:
94
+ rdoc_options:
95
+ - --charset=UTF-8
96
+ require_paths:
97
+ - lib
98
+ required_ruby_version: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ segments:
103
+ - 0
104
+ version: "0"
105
+ required_rubygems_version: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ">"
108
+ - !ruby/object:Gem::Version
109
+ segments:
110
+ - 1
111
+ - 3
112
+ - 1
113
+ version: 1.3.1
114
+ requirements: []
115
+
116
+ rubyforge_project:
117
+ rubygems_version: 1.3.6
118
+ signing_key:
119
+ specification_version: 3
120
+ summary: A utility for building solr indexes, usually from Fedora repository content.
121
+ test_files:
122
+ - spec/integration/indexer_spec.rb
123
+ - spec/spec_helper.rb
124
+ - spec/units/extractor_spec.rb
125
+ - spec/units/indexer_spec.rb
126
+ - spec/units/shelver_spec.rb