solrizer-fedora 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.textile +6 -0
- data/VERSION +1 -1
- data/lib/solrizer/fedora/indexer.rb +12 -8
- data/lib/solrizer/fedora/solrizer.rb +11 -6
- data/solrizer-fedora.gemspec +2 -2
- data/spec/units/fedora_indexer_spec.rb +73 -14
- data/spec/units/fedora_solrizer_spec.rb +7 -1
- metadata +4 -4
data/History.textile
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
h2. 1.0.3
|
2
|
+
|
3
|
+
smarter loading of config files
|
4
|
+
switched solrize_objects to only intercept errors if it's given an optional parameter of :suppress_errors=>true
|
5
|
+
cleaner command line output
|
6
|
+
|
1
7
|
h2. 1.0.2
|
2
8
|
|
3
9
|
switched solrize_objects to only intercept errors if it's given an optional parameter of :suppress_errors=>true
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.0.
|
1
|
+
1.0.3
|
@@ -48,7 +48,7 @@ class Indexer
|
|
48
48
|
if ActiveFedora.fedora_config.empty?
|
49
49
|
ActiveFedora.init
|
50
50
|
end
|
51
|
-
|
51
|
+
|
52
52
|
if defined?(Blacklight)
|
53
53
|
solr_config = Blacklight.solr_config
|
54
54
|
else
|
@@ -59,9 +59,12 @@ class Indexer
|
|
59
59
|
solr_config = yaml[RAILS_ENV]
|
60
60
|
puts solr_config.inspect
|
61
61
|
else
|
62
|
-
config_path = File.join(
|
63
|
-
|
64
|
-
|
62
|
+
config_path = File.join("config","solr.yml")
|
63
|
+
unless File.exist?(config_path)
|
64
|
+
config_path = File.join(File.dirname(__FILE__), "..", "..", "..", "config", "solr.yml")
|
65
|
+
end
|
66
|
+
logger.debug "SOLRIZER: reading config from " + config_path.inspect
|
67
|
+
yaml = YAML.load(File.open(config_path))
|
65
68
|
|
66
69
|
if ENV["environment"].nil?
|
67
70
|
environment = "development"
|
@@ -70,7 +73,7 @@ class Indexer
|
|
70
73
|
end #if
|
71
74
|
|
72
75
|
solr_config = yaml[environment]
|
73
|
-
|
76
|
+
logger.debug "SOLRIZER solr_config:" + solr_config.inspect
|
74
77
|
end #if defined?(RAILS_ROOT)
|
75
78
|
|
76
79
|
end #if defined?(Blacklight)
|
@@ -86,12 +89,13 @@ class Indexer
|
|
86
89
|
else
|
87
90
|
raise
|
88
91
|
end
|
92
|
+
|
89
93
|
@solr = RSolr.connect :url => url
|
90
94
|
# @connection = Solr::Connection.new(url, :autocommit => :on )
|
91
95
|
|
92
|
-
rescue
|
93
|
-
|
94
|
-
|
96
|
+
rescue RuntimeError => e
|
97
|
+
logger.debug "Unable to establish SOLR Connection with #{solr_config.inspect}. Failed with #{e.message}"
|
98
|
+
raise URI::InvalidURIError
|
95
99
|
end
|
96
100
|
|
97
101
|
#
|
@@ -31,9 +31,12 @@ class Solrizer
|
|
31
31
|
@indexer = Indexer.new( :index_full_text=>@index_full_text )
|
32
32
|
end
|
33
33
|
|
34
|
+
# Solrize the given Fedora object's full-text and facets into the search index
|
34
35
|
#
|
35
|
-
#
|
36
|
-
#
|
36
|
+
# @param [String or ActiveFedora::Base] obj the object to solrize
|
37
|
+
# @param [Hash] opts optional parameters
|
38
|
+
# @example Suppress errors using :suppress_errors option
|
39
|
+
# solrizer.solrize("my:pid", :suppress_errors=>true)
|
37
40
|
def solrize( obj, opts={} )
|
38
41
|
# retrieve the Fedora object based on the given unique id
|
39
42
|
|
@@ -80,10 +83,12 @@ class Solrizer
|
|
80
83
|
|
81
84
|
end
|
82
85
|
|
86
|
+
# Retrieve a comprehensive list of all the unique identifiers in Fedora and
|
87
|
+
# solrize each object's full-text and facets into the search index
|
83
88
|
#
|
84
|
-
#
|
85
|
-
#
|
86
|
-
def solrize_objects
|
89
|
+
# @example Suppress errors using :suppress_errors option
|
90
|
+
# solrizer.solrize_objects( :suppress_errors=>true )
|
91
|
+
def solrize_objects(opts={})
|
87
92
|
# retrieve a list of all the pids in the fedora repository
|
88
93
|
num_docs = 1000000 # modify this number to guarantee that all the objects are retrieved from the repository
|
89
94
|
puts "WARNING: You have turned off indexing of Full Text content. Be sure to re-run indexer with @@index_full_text set to true in main.rb" if index_full_text == false
|
@@ -94,7 +99,7 @@ class Solrizer
|
|
94
99
|
|
95
100
|
puts "Shelving #{objects.length} Fedora objects"
|
96
101
|
objects.each do |object|
|
97
|
-
solrize( object )
|
102
|
+
solrize( object, opts )
|
98
103
|
end
|
99
104
|
|
100
105
|
else
|
data/solrizer-fedora.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{solrizer-fedora}
|
8
|
-
s.version = "1.0.
|
8
|
+
s.version = "1.0.3"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Matt Zumwalt"]
|
12
|
-
s.date = %q{2011-
|
12
|
+
s.date = %q{2011-05-03}
|
13
13
|
s.description = %q{An extension to projecthydra/solrizer that provides utilities for loading objects from Fedora Repositories and creating solr documents from them.}
|
14
14
|
s.email = %q{matt.zumwalt@yourmediashelf.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -4,23 +4,82 @@ require "solrizer/fedora"
|
|
4
4
|
|
5
5
|
describe Solrizer::Fedora::Indexer do
|
6
6
|
|
7
|
-
before(:
|
8
|
-
|
7
|
+
before(:all) do
|
8
|
+
if !defined?(Blacklight)
|
9
|
+
class Blacklight
|
10
|
+
def self.is_stub?
|
11
|
+
true
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
9
16
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
# @solr_doc.stubs(:<<)
|
14
|
-
# @solr_doc.stubs(:[])
|
15
|
-
|
16
|
-
@solr_doc = Hash.new
|
17
|
-
|
18
|
-
Solrizer::Extractor.expects(:new).returns(@extractor)
|
19
|
-
@indexer = Solrizer::Fedora::Indexer.new
|
20
|
-
|
21
|
-
end
|
17
|
+
after(:all) do
|
18
|
+
Object.instance_eval {remove_const :Blacklight} unless !Blacklight.respond_to?(:is_stub?)
|
19
|
+
end
|
22
20
|
|
21
|
+
before(:each) do
|
22
|
+
@extractor = mock("Extractor")
|
23
|
+
@extractor.stubs(:html_content_to_solr).returns(@solr_doc)
|
24
|
+
# @solr_doc = mock('solr_doc')
|
25
|
+
# @solr_doc.stubs(:<<)
|
26
|
+
# @solr_doc.stubs(:[])
|
27
|
+
|
28
|
+
@solr_doc = Hash.new
|
29
|
+
|
30
|
+
Solrizer::Extractor.stubs(:new).returns(@extractor)
|
31
|
+
end
|
32
|
+
|
33
|
+
describe "#new" do
|
34
|
+
it "should return a URL from solr_config if the config has a :url" do
|
35
|
+
Blacklight.stubs(:solr_config).returns({:url => "http://foo.com:8080/solr"})
|
36
|
+
@indexer = Solrizer::Fedora::Indexer.new
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should return a URL from solr_config if the config has a 'url' " do
|
40
|
+
Blacklight.stubs(:solr_config).returns({'url' => "http://foo.com:8080/solr"})
|
41
|
+
@indexer = Solrizer::Fedora::Indexer.new
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should raise an error if the solr_config does not have a :url" do
|
45
|
+
Blacklight.stubs(:solr_config).returns({'boosh' => "http://foo.com:8080/solr"})
|
46
|
+
lambda { Solrizer::Fedora::Indexer.new }.should raise_error(URI::InvalidURIError)
|
47
|
+
end
|
48
|
+
|
49
|
+
it "should return a fulltext URL if solr_config has a fulltext url defined" do
|
50
|
+
Blacklight.stubs(:solr_config).returns({'fulltext' =>{ 'url' => "http://foo.com:8080/solr"}})
|
51
|
+
@indexer = Solrizer::Fedora::Indexer.new(:index_full_text => true)
|
52
|
+
end
|
53
|
+
|
54
|
+
it "should return a fulltext URL if solr_config has a default url defined" do
|
55
|
+
Blacklight.stubs(:solr_config).returns({'default' =>{ 'url' => "http://foo.com:8080/solr"}})
|
56
|
+
@indexer = Solrizer::Fedora::Indexer.new(:index_full_text => false)
|
57
|
+
end
|
58
|
+
|
59
|
+
# it "should find the solr.yml even if Blacklight is not loaded and RAILS is not loaded" do
|
60
|
+
# pending "Need to unset Blacklight in order to make this work..."
|
61
|
+
# # Store RAILS_ROOT if it's set
|
62
|
+
# if defined? RAILS_ROOT
|
63
|
+
# temp_rails_root = RAILS_ROOT
|
64
|
+
# Object.send(:remove_const, :RAILS_ROOT)
|
65
|
+
# end
|
66
|
+
# sample_config = {'development' => {'solr'=> {'url' => "http://noblacklight.norails.edu:8080/solr"}}}
|
67
|
+
# YAML.stubs(:load).returns(sample_config)
|
68
|
+
# ActiveFedora.stubs(:init)
|
69
|
+
# @indexer = Solrizer::Fedora::Indexer.new
|
70
|
+
# # Re-set RAILS_ROOT if it was stored
|
71
|
+
# unless temp_rails_root.nil?
|
72
|
+
# RAILS_ROOT = temp_rails_root
|
73
|
+
# end
|
74
|
+
# end
|
75
|
+
end
|
76
|
+
|
23
77
|
describe "#generate_dates" do
|
78
|
+
before(:each) do
|
79
|
+
Solrizer::Fedora::Indexer.any_instance.stubs(:connect).returns("foo")
|
80
|
+
@indexer = Solrizer::Fedora::Indexer.new
|
81
|
+
end
|
82
|
+
|
24
83
|
it "should still give 9999-99-99 date if the solr document does not have a date_t field" do
|
25
84
|
|
26
85
|
solr_result = @indexer.generate_dates(@solr_doc)
|
@@ -35,8 +35,14 @@ describe Solrizer::Fedora::Solrizer do
|
|
35
35
|
it "should call solrize for each object returned by Fedora::Repository.find_objects" do
|
36
36
|
objects = [["pid1"], ["pid2"], ["pid3"]]
|
37
37
|
Fedora::Repository.any_instance.expects(:find_objects).returns(objects)
|
38
|
-
objects.each {|object| @solrizer.expects(:solrize).with( object ) }
|
38
|
+
objects.each {|object| @solrizer.expects(:solrize).with( object, {} ) }
|
39
39
|
@solrizer.solrize_objects
|
40
40
|
end
|
41
|
+
it "should pass optional suppress_errors argument into .solrize method" do
|
42
|
+
objects = [["pid1"], ["pid2"], ["pid3"]]
|
43
|
+
Fedora::Repository.any_instance.expects(:find_objects).returns(objects)
|
44
|
+
objects.each {|object| @solrizer.expects(:solrize).with( object, :suppress_errors => true ) }
|
45
|
+
@solrizer.solrize_objects( :suppress_errors => true )
|
46
|
+
end
|
41
47
|
end
|
42
48
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: solrizer-fedora
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 17
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 1.0.
|
9
|
+
- 3
|
10
|
+
version: 1.0.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Matt Zumwalt
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-05-03 00:00:00 -05:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|