solrizer-fedora 1.0.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/History.textile +6 -0
- data/VERSION +1 -1
- data/lib/solrizer/fedora/indexer.rb +12 -8
- data/lib/solrizer/fedora/solrizer.rb +11 -6
- data/solrizer-fedora.gemspec +2 -2
- data/spec/units/fedora_indexer_spec.rb +73 -14
- data/spec/units/fedora_solrizer_spec.rb +7 -1
- metadata +4 -4
data/History.textile
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
h2. 1.0.3
|
2
|
+
|
3
|
+
smarter loading of config files
|
4
|
+
switched solrize_objects to only intercept errors if it's given an optional parameter of :suppress_errors=>true
|
5
|
+
cleaner command line output
|
6
|
+
|
1
7
|
h2. 1.0.2
|
2
8
|
|
3
9
|
switched solrize_objects to only intercept errors if it's given an optional parameter of :suppress_errors=>true
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.0.
|
1
|
+
1.0.3
|
@@ -48,7 +48,7 @@ class Indexer
|
|
48
48
|
if ActiveFedora.fedora_config.empty?
|
49
49
|
ActiveFedora.init
|
50
50
|
end
|
51
|
-
|
51
|
+
|
52
52
|
if defined?(Blacklight)
|
53
53
|
solr_config = Blacklight.solr_config
|
54
54
|
else
|
@@ -59,9 +59,12 @@ class Indexer
|
|
59
59
|
solr_config = yaml[RAILS_ENV]
|
60
60
|
puts solr_config.inspect
|
61
61
|
else
|
62
|
-
config_path = File.join(
|
63
|
-
|
64
|
-
|
62
|
+
config_path = File.join("config","solr.yml")
|
63
|
+
unless File.exist?(config_path)
|
64
|
+
config_path = File.join(File.dirname(__FILE__), "..", "..", "..", "config", "solr.yml")
|
65
|
+
end
|
66
|
+
logger.debug "SOLRIZER: reading config from " + config_path.inspect
|
67
|
+
yaml = YAML.load(File.open(config_path))
|
65
68
|
|
66
69
|
if ENV["environment"].nil?
|
67
70
|
environment = "development"
|
@@ -70,7 +73,7 @@ class Indexer
|
|
70
73
|
end #if
|
71
74
|
|
72
75
|
solr_config = yaml[environment]
|
73
|
-
|
76
|
+
logger.debug "SOLRIZER solr_config:" + solr_config.inspect
|
74
77
|
end #if defined?(RAILS_ROOT)
|
75
78
|
|
76
79
|
end #if defined?(Blacklight)
|
@@ -86,12 +89,13 @@ class Indexer
|
|
86
89
|
else
|
87
90
|
raise
|
88
91
|
end
|
92
|
+
|
89
93
|
@solr = RSolr.connect :url => url
|
90
94
|
# @connection = Solr::Connection.new(url, :autocommit => :on )
|
91
95
|
|
92
|
-
rescue
|
93
|
-
|
94
|
-
|
96
|
+
rescue RuntimeError => e
|
97
|
+
logger.debug "Unable to establish SOLR Connection with #{solr_config.inspect}. Failed with #{e.message}"
|
98
|
+
raise URI::InvalidURIError
|
95
99
|
end
|
96
100
|
|
97
101
|
#
|
@@ -31,9 +31,12 @@ class Solrizer
|
|
31
31
|
@indexer = Indexer.new( :index_full_text=>@index_full_text )
|
32
32
|
end
|
33
33
|
|
34
|
+
# Solrize the given Fedora object's full-text and facets into the search index
|
34
35
|
#
|
35
|
-
#
|
36
|
-
#
|
36
|
+
# @param [String or ActiveFedora::Base] obj the object to solrize
|
37
|
+
# @param [Hash] opts optional parameters
|
38
|
+
# @example Suppress errors using :suppress_errors option
|
39
|
+
# solrizer.solrize("my:pid", :suppress_errors=>true)
|
37
40
|
def solrize( obj, opts={} )
|
38
41
|
# retrieve the Fedora object based on the given unique id
|
39
42
|
|
@@ -80,10 +83,12 @@ class Solrizer
|
|
80
83
|
|
81
84
|
end
|
82
85
|
|
86
|
+
# Retrieve a comprehensive list of all the unique identifiers in Fedora and
|
87
|
+
# solrize each object's full-text and facets into the search index
|
83
88
|
#
|
84
|
-
#
|
85
|
-
#
|
86
|
-
def solrize_objects
|
89
|
+
# @example Suppress errors using :suppress_errors option
|
90
|
+
# solrizer.solrize_objects( :suppress_errors=>true )
|
91
|
+
def solrize_objects(opts={})
|
87
92
|
# retrieve a list of all the pids in the fedora repository
|
88
93
|
num_docs = 1000000 # modify this number to guarantee that all the objects are retrieved from the repository
|
89
94
|
puts "WARNING: You have turned off indexing of Full Text content. Be sure to re-run indexer with @@index_full_text set to true in main.rb" if index_full_text == false
|
@@ -94,7 +99,7 @@ class Solrizer
|
|
94
99
|
|
95
100
|
puts "Shelving #{objects.length} Fedora objects"
|
96
101
|
objects.each do |object|
|
97
|
-
solrize( object )
|
102
|
+
solrize( object, opts )
|
98
103
|
end
|
99
104
|
|
100
105
|
else
|
data/solrizer-fedora.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{solrizer-fedora}
|
8
|
-
s.version = "1.0.
|
8
|
+
s.version = "1.0.3"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Matt Zumwalt"]
|
12
|
-
s.date = %q{2011-
|
12
|
+
s.date = %q{2011-05-03}
|
13
13
|
s.description = %q{An extension to projecthydra/solrizer that provides utilities for loading objects from Fedora Repositories and creating solr documents from them.}
|
14
14
|
s.email = %q{matt.zumwalt@yourmediashelf.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -4,23 +4,82 @@ require "solrizer/fedora"
|
|
4
4
|
|
5
5
|
describe Solrizer::Fedora::Indexer do
|
6
6
|
|
7
|
-
before(:
|
8
|
-
|
7
|
+
before(:all) do
|
8
|
+
if !defined?(Blacklight)
|
9
|
+
class Blacklight
|
10
|
+
def self.is_stub?
|
11
|
+
true
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
9
16
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
# @solr_doc.stubs(:<<)
|
14
|
-
# @solr_doc.stubs(:[])
|
15
|
-
|
16
|
-
@solr_doc = Hash.new
|
17
|
-
|
18
|
-
Solrizer::Extractor.expects(:new).returns(@extractor)
|
19
|
-
@indexer = Solrizer::Fedora::Indexer.new
|
20
|
-
|
21
|
-
end
|
17
|
+
after(:all) do
|
18
|
+
Object.instance_eval {remove_const :Blacklight} unless !Blacklight.respond_to?(:is_stub?)
|
19
|
+
end
|
22
20
|
|
21
|
+
before(:each) do
|
22
|
+
@extractor = mock("Extractor")
|
23
|
+
@extractor.stubs(:html_content_to_solr).returns(@solr_doc)
|
24
|
+
# @solr_doc = mock('solr_doc')
|
25
|
+
# @solr_doc.stubs(:<<)
|
26
|
+
# @solr_doc.stubs(:[])
|
27
|
+
|
28
|
+
@solr_doc = Hash.new
|
29
|
+
|
30
|
+
Solrizer::Extractor.stubs(:new).returns(@extractor)
|
31
|
+
end
|
32
|
+
|
33
|
+
describe "#new" do
|
34
|
+
it "should return a URL from solr_config if the config has a :url" do
|
35
|
+
Blacklight.stubs(:solr_config).returns({:url => "http://foo.com:8080/solr"})
|
36
|
+
@indexer = Solrizer::Fedora::Indexer.new
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should return a URL from solr_config if the config has a 'url' " do
|
40
|
+
Blacklight.stubs(:solr_config).returns({'url' => "http://foo.com:8080/solr"})
|
41
|
+
@indexer = Solrizer::Fedora::Indexer.new
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should raise an error if the solr_config does not have a :url" do
|
45
|
+
Blacklight.stubs(:solr_config).returns({'boosh' => "http://foo.com:8080/solr"})
|
46
|
+
lambda { Solrizer::Fedora::Indexer.new }.should raise_error(URI::InvalidURIError)
|
47
|
+
end
|
48
|
+
|
49
|
+
it "should return a fulltext URL if solr_config has a fulltext url defined" do
|
50
|
+
Blacklight.stubs(:solr_config).returns({'fulltext' =>{ 'url' => "http://foo.com:8080/solr"}})
|
51
|
+
@indexer = Solrizer::Fedora::Indexer.new(:index_full_text => true)
|
52
|
+
end
|
53
|
+
|
54
|
+
it "should return a fulltext URL if solr_config has a default url defined" do
|
55
|
+
Blacklight.stubs(:solr_config).returns({'default' =>{ 'url' => "http://foo.com:8080/solr"}})
|
56
|
+
@indexer = Solrizer::Fedora::Indexer.new(:index_full_text => false)
|
57
|
+
end
|
58
|
+
|
59
|
+
# it "should find the solr.yml even if Blacklight is not loaded and RAILS is not loaded" do
|
60
|
+
# pending "Need to unset Blacklight in order to make this work..."
|
61
|
+
# # Store RAILS_ROOT if it's set
|
62
|
+
# if defined? RAILS_ROOT
|
63
|
+
# temp_rails_root = RAILS_ROOT
|
64
|
+
# Object.send(:remove_const, :RAILS_ROOT)
|
65
|
+
# end
|
66
|
+
# sample_config = {'development' => {'solr'=> {'url' => "http://noblacklight.norails.edu:8080/solr"}}}
|
67
|
+
# YAML.stubs(:load).returns(sample_config)
|
68
|
+
# ActiveFedora.stubs(:init)
|
69
|
+
# @indexer = Solrizer::Fedora::Indexer.new
|
70
|
+
# # Re-set RAILS_ROOT if it was stored
|
71
|
+
# unless temp_rails_root.nil?
|
72
|
+
# RAILS_ROOT = temp_rails_root
|
73
|
+
# end
|
74
|
+
# end
|
75
|
+
end
|
76
|
+
|
23
77
|
describe "#generate_dates" do
|
78
|
+
before(:each) do
|
79
|
+
Solrizer::Fedora::Indexer.any_instance.stubs(:connect).returns("foo")
|
80
|
+
@indexer = Solrizer::Fedora::Indexer.new
|
81
|
+
end
|
82
|
+
|
24
83
|
it "should still give 9999-99-99 date if the solr document does not have a date_t field" do
|
25
84
|
|
26
85
|
solr_result = @indexer.generate_dates(@solr_doc)
|
@@ -35,8 +35,14 @@ describe Solrizer::Fedora::Solrizer do
|
|
35
35
|
it "should call solrize for each object returned by Fedora::Repository.find_objects" do
|
36
36
|
objects = [["pid1"], ["pid2"], ["pid3"]]
|
37
37
|
Fedora::Repository.any_instance.expects(:find_objects).returns(objects)
|
38
|
-
objects.each {|object| @solrizer.expects(:solrize).with( object ) }
|
38
|
+
objects.each {|object| @solrizer.expects(:solrize).with( object, {} ) }
|
39
39
|
@solrizer.solrize_objects
|
40
40
|
end
|
41
|
+
it "should pass optional suppress_errors argument into .solrize method" do
|
42
|
+
objects = [["pid1"], ["pid2"], ["pid3"]]
|
43
|
+
Fedora::Repository.any_instance.expects(:find_objects).returns(objects)
|
44
|
+
objects.each {|object| @solrizer.expects(:solrize).with( object, :suppress_errors => true ) }
|
45
|
+
@solrizer.solrize_objects( :suppress_errors => true )
|
46
|
+
end
|
41
47
|
end
|
42
48
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: solrizer-fedora
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 17
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 1.0.
|
9
|
+
- 3
|
10
|
+
version: 1.0.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Matt Zumwalt
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-05-03 00:00:00 -05:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|