solrizer 0.1.0.pre2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +19 -0
- data/LICENSE +20 -0
- data/README.rdoc +17 -0
- data/Rakefile +46 -0
- data/VERSION +1 -0
- data/config/fedora.yml +16 -0
- data/config/hydra_types.yml +4 -0
- data/config/solr.yml +7 -0
- data/lib/solrizer.rb +101 -0
- data/lib/solrizer/configuration.rb +8 -0
- data/lib/solrizer/extractor.rb +89 -0
- data/lib/solrizer/indexer.rb +261 -0
- data/lib/solrizer/main.rb +17 -0
- data/lib/solrizer/replicator.rb +143 -0
- data/lib/solrizer/repository.rb +54 -0
- data/lib/tasks/solrizer.rake +33 -0
- data/solrizer.gemspec +80 -0
- data/spec/fixtures/druid-bv448hq0314-descMetadata.xml +11 -0
- data/spec/fixtures/druid-bv448hq0314-extProperties.xml +52 -0
- data/spec/fixtures/druid-cm234kq4672-extProperties.xml +5 -0
- data/spec/fixtures/druid-cm234kq4672-stories.xml +17 -0
- data/spec/fixtures/druid-hc513kw4806-descMetadata.xml +11 -0
- data/spec/fixtures/rels_ext_cmodel.xml +8 -0
- data/spec/integration/indexer_spec.rb +18 -0
- data/spec/rcov.opts +2 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +16 -0
- data/spec/units/extractor_spec.rb +50 -0
- data/spec/units/indexer_spec.rb +127 -0
- data/spec/units/shelver_spec.rb +42 -0
- metadata +126 -0
data/spec/rcov.opts
ADDED
data/spec/spec.opts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
2
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
3
|
+
require 'solrizer'
|
4
|
+
require 'spec'
|
5
|
+
require 'spec/autorun'
|
6
|
+
|
7
|
+
Spec::Runner.configure do |config|
|
8
|
+
|
9
|
+
config.mock_with :mocha
|
10
|
+
|
11
|
+
|
12
|
+
def fixture(file)
|
13
|
+
File.new(File.join(File.dirname(__FILE__), 'fixtures', file))
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
|
+
require 'solrizer'
|
3
|
+
|
4
|
+
describe Solrizer::Extractor do
|
5
|
+
|
6
|
+
before(:each) do
|
7
|
+
@extractor = Solrizer::Extractor.new
|
8
|
+
end
|
9
|
+
|
10
|
+
describe ".xml_to_solr" do
|
11
|
+
it "should turn simple xml into a solr document" do
|
12
|
+
desc_meta = fixture("druid-bv448hq0314-descMetadata.xml")
|
13
|
+
result = @extractor.xml_to_solr(desc_meta)
|
14
|
+
result[:type_t].should == "text"
|
15
|
+
result[:medium_t].should == "Paper Document"
|
16
|
+
result[:rights_t].should == "Presumed under copyright. Do not publish."
|
17
|
+
result[:date_t].should == "1985-12-30"
|
18
|
+
result[:format_t].should == "application/tiff"
|
19
|
+
result[:title_t].should == "This is a Sample Title"
|
20
|
+
result[:publisher_t].should == "Sample Unversity"
|
21
|
+
|
22
|
+
# ... and a hacky way of making sure that it added a field for each of the dc:medium values
|
23
|
+
result.inspect.include?('@value="application/tiff"').should be_true
|
24
|
+
result.inspect.include?('@value="application/pdf"').should be_true
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe "extract_rels_ext" do
|
29
|
+
it "should extract the content model of the RELS-EXT datastream of a Fedora object and set hydra_type using hydra_types mapping" do
|
30
|
+
rels_ext = fixture("rels_ext_cmodel.xml")
|
31
|
+
result = @extractor.extract_rels_ext( rels_ext )
|
32
|
+
result[:cmodel_t].should == "info:fedora/fedora-system:ContentModel-3.0"
|
33
|
+
result[:hydra_type_t].should == "salt_document"
|
34
|
+
|
35
|
+
# ... and a hacky way of making sure that it added a field for each of the dc:medium values
|
36
|
+
result.inspect.include?('@value="info:fedora/afmodel:SaltDocument"').should be_true
|
37
|
+
result.inspect.include?('@value="jp2_document"').should be_true
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe "extract_hydra_types" do
|
42
|
+
it "should extract the hydra_type of a Fedora object" do
|
43
|
+
rels_ext = fixture("rels_ext_cmodel.xml")
|
44
|
+
result = @extractor.extract_rels_ext( rels_ext )
|
45
|
+
result[:hydra_type_t].should == "salt_document"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
|
+
require 'solrizer'
|
3
|
+
|
4
|
+
describe Solrizer::Indexer do
|
5
|
+
|
6
|
+
before(:each) do
|
7
|
+
Solrizer::Indexer.any_instance.stubs(:connect).returns("foo")
|
8
|
+
|
9
|
+
@extractor = mock("Extractor")
|
10
|
+
@extractor.stubs(:html_content_to_solr).returns(@solr_doc)
|
11
|
+
# @solr_doc = mock('solr_doc')
|
12
|
+
# @solr_doc.stubs(:<<)
|
13
|
+
# @solr_doc.stubs(:[])
|
14
|
+
|
15
|
+
@solr_doc = Solr::Document.new
|
16
|
+
|
17
|
+
Solrizer::Extractor.expects(:new).returns(@extractor)
|
18
|
+
@indexer = Solrizer::Indexer.new
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
describe "#generate_dates" do
|
23
|
+
it "should still give 9999-99-99 date if the solr document does not have a date_t field" do
|
24
|
+
|
25
|
+
solr_result = @indexer.generate_dates(@solr_doc)
|
26
|
+
solr_result.should be_kind_of Solr::Document
|
27
|
+
solr_result[:date_t].should == "9999-99-99"
|
28
|
+
solr_result[:month_facet].should == "99"
|
29
|
+
solr_result[:day_facet].should == '99'
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should still give 9999-99-99 date if the solr_doc[:date_t] is not valid date in YYYY-MM-DD format " do
|
34
|
+
|
35
|
+
@solr_doc << Solr::Field.new(:date_t => "Unknown")
|
36
|
+
solr_result = @indexer.generate_dates(@solr_doc)
|
37
|
+
solr_result.should be_kind_of Solr::Document
|
38
|
+
solr_result[:date_t].should == "Unknown"
|
39
|
+
solr_result[:month_facet].should == "99"
|
40
|
+
solr_result[:day_facet].should == '99'
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should give month and dates even if the :date_t is not a valid date but is in YYYY-MM-DD format " do
|
45
|
+
|
46
|
+
@solr_doc << Solr::Field.new(:date_t => "0000-13-11")
|
47
|
+
solr_result = @indexer.generate_dates(@solr_doc)
|
48
|
+
solr_result.should be_kind_of Solr::Document
|
49
|
+
solr_result[:date_t].should == "0000-13-11"
|
50
|
+
solr_result[:month_facet].should == "99"
|
51
|
+
solr_result[:day_facet].should == '11'
|
52
|
+
end
|
53
|
+
|
54
|
+
it "should give month and day when in a valid date format" do
|
55
|
+
@solr_doc << Solr::Field.new(:date_t => "1978-04-11")
|
56
|
+
solr_result = @indexer.generate_dates(@solr_doc)
|
57
|
+
solr_result.should be_kind_of Solr::Document
|
58
|
+
solr_result[:date_t].should == "1978-04-11"
|
59
|
+
solr_result[:month_facet].should == "04"
|
60
|
+
solr_result[:day_facet].should == '11'
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
it "should still give two digit strings even if the month/day is single digit" do
|
65
|
+
|
66
|
+
@solr_doc << Solr::Field.new(:date_t => "1978-4-1")
|
67
|
+
solr_result = @indexer.generate_dates(@solr_doc)
|
68
|
+
solr_result.should be_kind_of Solr::Document
|
69
|
+
solr_result[:date_t].should == "1978-4-1"
|
70
|
+
solr_result[:month_facet].should == "04"
|
71
|
+
solr_result[:day_facet].should == '01'
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
describe "#solrize" do
|
80
|
+
it "should convert a hash to a solr doc" do
|
81
|
+
example_hash = {"box"=>"Box 51A", "city"=>["Ann Arbor", "Hyderabad", "Palo Alto"], "person"=>["ELLIE ENGELMORE", "Reddy", "EDWARD FEIGENBAUM"], "title"=>"Letter from Ellie Engelmore to Professor K. C. Reddy", "series"=>"eaf7000", "folder"=>"Folder 15", "technology"=>["artificial intelligence"], "year"=>"1985", "organization"=>["Heuristic Programming Project", "Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder", "Professor K. C. Reddy School of Mathematics and Computer/Information Sciences"], "collection"=>"e-a-feigenbaum-collection", "state"=>["Michigan", "California"]}
|
82
|
+
|
83
|
+
example_result = Solrizer::Indexer.solrize( example_hash )
|
84
|
+
example_result.should be_kind_of Solr::Document
|
85
|
+
example_hash.each_pair do |key,values|
|
86
|
+
if values.class == String
|
87
|
+
example_result["#{key}_facet"].should == values
|
88
|
+
else
|
89
|
+
values.each do |v|
|
90
|
+
example_result.inspect.include?("@name=\"#{key}_facet\"").should be_true
|
91
|
+
example_result.inspect.include?("@value=\"#{v}\"").should be_true
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
it "should handle hashes with facets listed in a sub-hash" do
|
98
|
+
simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
|
99
|
+
result = Solrizer::Indexer.solrize( simple_hash )
|
100
|
+
result.should be_kind_of Solr::Document
|
101
|
+
result["technology_facet"].should == "t1"
|
102
|
+
result.inspect.include?('@boost=nil').should be_true
|
103
|
+
result.inspect.include?('@name="technology_facet"').should be_true
|
104
|
+
result.inspect.include?('@value="t2"').should be_true
|
105
|
+
result["company_facet"].should == "c1"
|
106
|
+
result["person_facet"].should == "p1"
|
107
|
+
result.inspect.include?('@name="person_facet"').should be_true
|
108
|
+
result.inspect.include?('@value="p2"').should be_true
|
109
|
+
|
110
|
+
end
|
111
|
+
|
112
|
+
it "should create symbols from the :symbols subhash" do
|
113
|
+
simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}, :symbols=>{'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
|
114
|
+
result = Solrizer::Indexer.solrize( simple_hash )
|
115
|
+
result.should be_kind_of Solr::Document
|
116
|
+
result["technology_s"].should == "t1"
|
117
|
+
result.inspect.include?('@name="technology_s"').should be_true
|
118
|
+
result.inspect.include?('@value="t2"').should be_true
|
119
|
+
|
120
|
+
result["company_s"].should == "c1"
|
121
|
+
result["person_s"].should == "p1"
|
122
|
+
result.inspect.include?('@name="person_s"').should be_true
|
123
|
+
result.inspect.include?('@value="p2"').should be_true
|
124
|
+
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
|
+
require 'solrizer'
|
3
|
+
|
4
|
+
describe Solrizer::Solrizer do
|
5
|
+
|
6
|
+
before(:each) do
|
7
|
+
@solrizer = Solrizer::Solrizer.new
|
8
|
+
end
|
9
|
+
|
10
|
+
describe "solrize" do
|
11
|
+
it "should trigger the indexer for the provided object" do
|
12
|
+
# sample_obj = ActiveFedora::Base.new
|
13
|
+
mock_object = mock("my object")
|
14
|
+
mock_object.expects(:kind_of?).with(ActiveFedora::Base).returns(true)
|
15
|
+
mock_object.stubs(:pid)
|
16
|
+
mock_object.stubs(:label)
|
17
|
+
mock_object.stubs(:datastreams).returns({'descMetadata'=>"foo","location"=>"bar"})
|
18
|
+
ActiveFedora::Base.expects(:load_instance).never
|
19
|
+
@solrizer.indexer.expects(:index).with( mock_object )
|
20
|
+
@solrizer.solrize( mock_object )
|
21
|
+
end
|
22
|
+
it "should still load the object if only a pid is provided" do
|
23
|
+
mock_object = mock("my object")
|
24
|
+
mock_object.stubs(:pid)
|
25
|
+
mock_object.stubs(:label)
|
26
|
+
mock_object.stubs(:datastreams).returns({'descMetadata'=>"foo","location"=>"bar"})
|
27
|
+
|
28
|
+
ActiveFedora::Base.expects(:load_instance).with( "_PID_" ).returns(mock_object)
|
29
|
+
@solrizer.indexer.expects(:index).with(mock_object)
|
30
|
+
@solrizer.solrize("_PID_")
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe "solrize_objects" do
|
35
|
+
it "should call solrize for each pid returned by solr" do
|
36
|
+
pids = [["pid1"], ["pid2"], ["pid3"]]
|
37
|
+
Solrizer::Repository.expects(:get_pids).returns(pids)
|
38
|
+
pids.each {|pid| @solrizer.expects(:solrize).with( pid ) }
|
39
|
+
@solrizer.solrize_objects
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
metadata
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: solrizer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: true
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
- pre2
|
10
|
+
version: 0.1.0.pre2
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Matt Zumwalt
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-05-15 00:00:00 -05:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: active-fedora
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
requirements:
|
26
|
+
- - ">"
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 1
|
30
|
+
- 1
|
31
|
+
- 3
|
32
|
+
version: 1.1.3
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: rspec
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
requirements:
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
segments:
|
43
|
+
- 1
|
44
|
+
- 2
|
45
|
+
- 9
|
46
|
+
version: 1.2.9
|
47
|
+
type: :development
|
48
|
+
version_requirements: *id002
|
49
|
+
description: Use solrizer to populate solr indexes from Fedora repository content or from other sources. You can run solrizer from within your apps, using the provided rake tasks, or as a JMS listener
|
50
|
+
email: matt.zumwalt@yourmediashelf.com
|
51
|
+
executables: []
|
52
|
+
|
53
|
+
extensions: []
|
54
|
+
|
55
|
+
extra_rdoc_files:
|
56
|
+
- LICENSE
|
57
|
+
- README.rdoc
|
58
|
+
files:
|
59
|
+
- .gitignore
|
60
|
+
- LICENSE
|
61
|
+
- README.rdoc
|
62
|
+
- Rakefile
|
63
|
+
- VERSION
|
64
|
+
- config/fedora.yml
|
65
|
+
- config/hydra_types.yml
|
66
|
+
- config/solr.yml
|
67
|
+
- lib/solrizer.rb
|
68
|
+
- lib/solrizer/configuration.rb
|
69
|
+
- lib/solrizer/extractor.rb
|
70
|
+
- lib/solrizer/indexer.rb
|
71
|
+
- lib/solrizer/main.rb
|
72
|
+
- lib/solrizer/replicator.rb
|
73
|
+
- lib/solrizer/repository.rb
|
74
|
+
- lib/tasks/solrizer.rake
|
75
|
+
- solrizer.gemspec
|
76
|
+
- spec/fixtures/druid-bv448hq0314-descMetadata.xml
|
77
|
+
- spec/fixtures/druid-bv448hq0314-extProperties.xml
|
78
|
+
- spec/fixtures/druid-cm234kq4672-extProperties.xml
|
79
|
+
- spec/fixtures/druid-cm234kq4672-stories.xml
|
80
|
+
- spec/fixtures/druid-hc513kw4806-descMetadata.xml
|
81
|
+
- spec/fixtures/rels_ext_cmodel.xml
|
82
|
+
- spec/integration/indexer_spec.rb
|
83
|
+
- spec/rcov.opts
|
84
|
+
- spec/spec.opts
|
85
|
+
- spec/spec_helper.rb
|
86
|
+
- spec/units/extractor_spec.rb
|
87
|
+
- spec/units/indexer_spec.rb
|
88
|
+
- spec/units/shelver_spec.rb
|
89
|
+
has_rdoc: true
|
90
|
+
homepage: http://github.com/projecthydra/solrizer
|
91
|
+
licenses: []
|
92
|
+
|
93
|
+
post_install_message:
|
94
|
+
rdoc_options:
|
95
|
+
- --charset=UTF-8
|
96
|
+
require_paths:
|
97
|
+
- lib
|
98
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - ">="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
segments:
|
103
|
+
- 0
|
104
|
+
version: "0"
|
105
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - ">"
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
segments:
|
110
|
+
- 1
|
111
|
+
- 3
|
112
|
+
- 1
|
113
|
+
version: 1.3.1
|
114
|
+
requirements: []
|
115
|
+
|
116
|
+
rubyforge_project:
|
117
|
+
rubygems_version: 1.3.6
|
118
|
+
signing_key:
|
119
|
+
specification_version: 3
|
120
|
+
summary: A utility for building solr indexes, usually from Fedora repository content.
|
121
|
+
test_files:
|
122
|
+
- spec/integration/indexer_spec.rb
|
123
|
+
- spec/spec_helper.rb
|
124
|
+
- spec/units/extractor_spec.rb
|
125
|
+
- spec/units/indexer_spec.rb
|
126
|
+
- spec/units/shelver_spec.rb
|