harvestdor-indexer 0.0.3 → 0.0.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/README.rdoc +4 -0
- data/Rakefile +2 -1
- data/harvestdor-indexer.gemspec +1 -1
- data/lib/harvestdor-indexer.rb +112 -27
- data/lib/harvestdor-indexer/version.rb +1 -1
- data/spec/unit/harvestdor-indexer_spec.rb +45 -61
- metadata +19 -33
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
OGYzNWY3ZWNkM2VlNzdhY2YyN2UxZGQ3NzM2MTQyOTRmYzRiZDVjMw==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
YTE4YWFhMWEzOGQ0NDJhMDdlNGRlNGFiNjNiMjE1ZWJlNWMyNzQ2Yg==
|
7
|
+
!binary "U0hBNTEy":
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ODdlZDM2MTNkYmYzODNlZmVhY2I0YzJjNGRhMGE3MjMyYzNjNTZiYTJmNDMy
|
10
|
+
ZWI1ZTUyZjZmNWFlNTY3NGUzZGMzMzVlMDY0MTQxNjRiNzRlM2U0OGI3MzU4
|
11
|
+
NTZhNWUwMjE2MjNjMDJjODU4MTA3YzVjOGY5NzQzZGNkMGE1ZDM=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
MTYyMzMzNDA2OTM5NWU4ZjdlNGQxYzc3ODU5MTBmNjI5MGM0OTM3NjNkZDdm
|
14
|
+
OWIwY2M1Yjk5OTEyOGIxNGFmYmEwMGNmZDhkZmQ4MmM5MWQxMDAzMmZhMTdl
|
15
|
+
YmNkODM1MDZmMTI4OThhYmFlYWFlMTc5MzEwYzBlMTIyNzg4Yzk=
|
data/README.rdoc
CHANGED
@@ -108,6 +108,10 @@ I suggest you run your code on harvestdor-dev, as it is already set up to be abl
|
|
108
108
|
|
109
109
|
== Releases
|
110
110
|
|
111
|
+
* <b>0.0.7</b> adding additional logging of error, success counts, and time to index and harvest
|
112
|
+
* <b>0.0.6</b> tweak error handling for public xml pieces
|
113
|
+
* <b>0.0.5</b> make rake release a no-op
|
114
|
+
* <b>0.0.4</b> add confstruct runtime dependency
|
111
115
|
* <b>0.0.3</b> add methods for public_xml, content_metadata, identity_metadata ...
|
112
116
|
* <b>0.0.2</b> better model code for index method (thanks, Bess!)
|
113
117
|
* <b>0.0.1</b> initial commit
|
data/Rakefile
CHANGED
data/harvestdor-indexer.gemspec
CHANGED
@@ -24,7 +24,7 @@ Gem::Specification.new do |gem|
|
|
24
24
|
gem.add_dependency 'stanford-mods'
|
25
25
|
|
26
26
|
# Runtime dependencies
|
27
|
-
|
27
|
+
gem.add_runtime_dependency 'confstruct'
|
28
28
|
|
29
29
|
# Development dependencies
|
30
30
|
# Bundler will install these gems too if you've checked out solrmarc-wrapper source from git and run 'bundle install'
|
data/lib/harvestdor-indexer.rb
CHANGED
@@ -15,7 +15,15 @@ module Harvestdor
|
|
15
15
|
# Base class to harvest from DOR via harvestdor gem and then index
|
16
16
|
class Indexer
|
17
17
|
|
18
|
+
attr_accessor :error_count, :success_count, :max_retries
|
19
|
+
attr_accessor :total_time_to_parse,:total_time_to_solr
|
20
|
+
|
18
21
|
def initialize yml_path, options = {}
|
22
|
+
@success_count=0 # the number of objects successfully indexed
|
23
|
+
@error_count=0 # the number of objects that failed
|
24
|
+
@max_retries=5 # the number of times to retry an object
|
25
|
+
@total_time_to_solr=0
|
26
|
+
@total_time_to_parse=0
|
19
27
|
@yml_path = yml_path
|
20
28
|
config.configure(YAML.load_file(yml_path)) if yml_path
|
21
29
|
config.configure options
|
@@ -35,19 +43,62 @@ module Harvestdor
|
|
35
43
|
# create a Solr profiling document for each druid
|
36
44
|
# write the result to the Solr index
|
37
45
|
def harvest_and_index
|
46
|
+
start_time=Time.now
|
47
|
+
logger.info("Started harvest_and_index at #{start_time}")
|
38
48
|
if whitelist.empty?
|
39
49
|
druids.each { |druid| index druid }
|
40
50
|
else
|
41
51
|
whitelist.each { |druid| index druid }
|
42
52
|
end
|
43
53
|
solr_client.commit
|
44
|
-
|
54
|
+
total_time=elapsed_time(start_time)
|
55
|
+
total_objects=@success_count+@error_count
|
56
|
+
logger.info("Finished harvest_and_index at #{Time.now}: final Solr commit returned")
|
57
|
+
logger.info("Total elapsed time for harvest and index: #{(total_time/60.0)} minutes")
|
58
|
+
logger.info("Avg solr commit time per object (successful): #{@total_time_to_solr/@success_count} seconds") unless (@total_time_to_solr == 0 || @success_count == 0)
|
59
|
+
logger.info("Avg solr commit time per object (all): #{@total_time_to_solr/total_objects} seconds") unless (@total_time_to_solr == 0 || @error_count == 0 || total_objects == 0)
|
60
|
+
logger.info("Avg parse time per object (successful): #{@total_time_to_parse/@success_count} seconds") unless (@total_time_to_parse == 0 || @success_count == 0)
|
61
|
+
logger.info("Avg parse time per object (all): #{@total_time_to_parse/total_objects} seconds") unless (@total_time_to_parse == 0 || @error_count == 0 || total_objects == 0)
|
62
|
+
logger.info("Avg complete index time per object (successful): #{total_time/@success_count} seconds") unless (@success_count == 0)
|
63
|
+
logger.info("Avg complete index time per object (all): #{total_time/total_objects} seconds") unless (@error_count == 0 || total_object == 0)
|
64
|
+
logger.info("Successful count: #{@success_count}")
|
65
|
+
logger.info("Error count: #{@error_count}")
|
66
|
+
logger.info("Total records processed: #{total_objects}")
|
45
67
|
end
|
46
68
|
|
47
69
|
# return Array of druids contained in the OAI harvest indicated by OAI params in yml configuration file
|
48
70
|
# @return [Array<String>] or enumeration over it, if block is given. (strings are druids, e.g. ab123cd1234)
|
49
71
|
def druids
|
50
|
-
@druids
|
72
|
+
if @druids.nil?
|
73
|
+
start_time=Time.now
|
74
|
+
logger.info("Starting OAI harvest of druids at #{start_time}.")
|
75
|
+
@druids = harvestdor_client.druids_via_oai
|
76
|
+
logger.info("Completed OAI harves of druids at #{Time.now}. Found #{@druids.size} druids. Total elapsed time for OAI harvest = #{elapsed_time(start_time,:minutes)} minutes")
|
77
|
+
end
|
78
|
+
return @druids
|
79
|
+
end
|
80
|
+
|
81
|
+
#add the document to solr, retry if an error occurs
|
82
|
+
def solr_add(doc, id, do_retry=true)
|
83
|
+
#if do_retry is false, skip retrying
|
84
|
+
tries=do_retry ? 0 : 999
|
85
|
+
while tries < @max_retries
|
86
|
+
begin
|
87
|
+
tries+=1
|
88
|
+
solr_client.add(doc)
|
89
|
+
#return if successful
|
90
|
+
return
|
91
|
+
rescue => e
|
92
|
+
if tries<@max_retries
|
93
|
+
logger.warn "#{id}: #{e.message}, retrying"
|
94
|
+
else
|
95
|
+
@error_count+=1
|
96
|
+
logger.error "Failed saving #{id}: #{e.message}"
|
97
|
+
logger.error e.backtrace
|
98
|
+
return
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
51
102
|
end
|
52
103
|
|
53
104
|
# create Solr doc for the druid and add it to Solr, unless it is on the blacklist.
|
@@ -59,6 +110,8 @@ module Harvestdor
|
|
59
110
|
logger.fatal("You must override the index method to transform druids into Solr docs and add them to Solr")
|
60
111
|
|
61
112
|
begin
|
113
|
+
start_time=Time.now
|
114
|
+
logger.info("About to index #{druid} at #{start_time}")
|
62
115
|
#logger.debug "About to index #{druid}"
|
63
116
|
doc_hash = {}
|
64
117
|
doc_hash[:id] = druid
|
@@ -69,10 +122,12 @@ module Harvestdor
|
|
69
122
|
|
70
123
|
solr_client.add(doc_hash)
|
71
124
|
|
72
|
-
|
125
|
+
logger.info("Indexed #{druid} in #{elapsed_time(start_time)} seconds")
|
126
|
+
@success_count+=1
|
73
127
|
# TODO: provide call to code to update DOR object's workflow datastream??
|
74
128
|
rescue => e
|
75
|
-
|
129
|
+
@error_count+=1
|
130
|
+
logger.error "Failed to index #{druid} in #{elapsed_time(start_time)} seconds: #{e.message}"
|
76
131
|
end
|
77
132
|
end
|
78
133
|
end
|
@@ -81,7 +136,9 @@ module Harvestdor
|
|
81
136
|
# @param [String] druid e.g. ab123cd4567
|
82
137
|
# @return [Stanford::Mods::Record] created from the MODS xml for the druid
|
83
138
|
def smods_rec druid
|
139
|
+
start_time=Time.now
|
84
140
|
ng_doc = harvestdor_client.mods druid
|
141
|
+
logger.info("Fetched MODs for #{druid} in #{elapsed_time(start_time)} seconds")
|
85
142
|
raise "Empty MODS metadata for #{druid}: #{ng_doc.to_xml}" if ng_doc.root.xpath('//text()').empty?
|
86
143
|
mods_rec = Stanford::Mods::Record.new
|
87
144
|
mods_rec.from_nk_node(ng_doc.root)
|
@@ -92,45 +149,59 @@ module Harvestdor
|
|
92
149
|
# @param [String] druid e.g. ab123cd4567
|
93
150
|
# @return [Nokogiri::XML::Document] the public xml for the DOR object
|
94
151
|
def public_xml druid
|
152
|
+
start_time=Time.now
|
95
153
|
ng_doc = harvestdor_client.public_xml druid
|
154
|
+
logger.info("Fetched public_xml for #{druid} in #{elapsed_time(start_time)} seconds")
|
96
155
|
raise "No public xml for #{druid}" if !ng_doc
|
97
156
|
raise "Empty public xml for #{druid}: #{ng_doc.to_xml}" if ng_doc.root.xpath('//text()').empty?
|
98
157
|
ng_doc
|
99
158
|
end
|
100
|
-
|
101
|
-
# the contentMetadata for this DOR object, from the purl public xml
|
102
|
-
# @param [
|
159
|
+
|
160
|
+
# the contentMetadata for this DOR object, ultimately from the purl public xml
|
161
|
+
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
162
|
+
# a Nokogiri::XML::Document containing the public_xml for an object
|
103
163
|
# @return [Nokogiri::XML::Document] the contentMetadata for the DOR object
|
104
|
-
def content_metadata
|
105
|
-
|
106
|
-
|
164
|
+
def content_metadata object
|
165
|
+
start_time=Time.now
|
166
|
+
ng_doc = harvestdor_client.content_metadata object
|
167
|
+
logger.info("Fetched content_metadata in #{elapsed_time(start_time)} seconds")
|
168
|
+
raise "No contentMetadata for #{object.inspect}" if !ng_doc || ng_doc.children.empty?
|
107
169
|
ng_doc
|
108
170
|
end
|
109
|
-
|
110
|
-
# the identityMetadata for this DOR object, from the purl public xml
|
111
|
-
# @param [
|
171
|
+
|
172
|
+
# the identityMetadata for this DOR object, ultimately from the purl public xml
|
173
|
+
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
174
|
+
# a Nokogiri::XML::Document containing the public_xml for an object
|
112
175
|
# @return [Nokogiri::XML::Document] the identityMetadata for the DOR object
|
113
|
-
def identity_metadata
|
114
|
-
|
115
|
-
|
176
|
+
def identity_metadata object
|
177
|
+
start_time=Time.now
|
178
|
+
ng_doc = harvestdor_client.identity_metadata object
|
179
|
+
logger.info("Fetched identity_metadata in #{elapsed_time(start_time)} seconds")
|
180
|
+
raise "No identityMetadata for #{object.inspect}" if !ng_doc || ng_doc.children.empty?
|
116
181
|
ng_doc
|
117
182
|
end
|
118
183
|
|
119
|
-
# the rightsMetadata for this DOR object, from the purl public xml
|
120
|
-
# @param [
|
184
|
+
# the rightsMetadata for this DOR object, ultimately from the purl public xml
|
185
|
+
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
186
|
+
# a Nokogiri::XML::Document containing the public_xml for an object
|
121
187
|
# @return [Nokogiri::XML::Document] the rightsMetadata for the DOR object
|
122
|
-
def rights_metadata
|
123
|
-
|
124
|
-
|
188
|
+
def rights_metadata object
|
189
|
+
start_time=Time.now
|
190
|
+
ng_doc = harvestdor_client.rights_metadata object
|
191
|
+
logger.info("Fetched rights_metadata in #{elapsed_time(start_time)} seconds")
|
192
|
+
raise "No rightsMetadata for #{object.inspect}" if !ng_doc || ng_doc.children.empty?
|
125
193
|
ng_doc
|
126
194
|
end
|
127
195
|
|
128
|
-
# the RDF for this DOR object, from the purl public xml
|
129
|
-
# @param [
|
196
|
+
# the RDF for this DOR object, ultimately from the purl public xml
|
197
|
+
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
198
|
+
# a Nokogiri::XML::Document containing the public_xml for an object
|
130
199
|
# @return [Nokogiri::XML::Document] the RDF for the DOR object
|
131
|
-
def rdf
|
132
|
-
|
133
|
-
|
200
|
+
def rdf object
|
201
|
+
start_time=Time.now
|
202
|
+
ng_doc = harvestdor_client.rdf object
|
203
|
+
logger.info("Fetched rdf in #{elapsed_time(start_time)} seconds")
|
204
|
+
raise "No RDF for #{object.inspect}" if !ng_doc || ng_doc.children.empty?
|
134
205
|
ng_doc
|
135
206
|
end
|
136
207
|
|
@@ -162,6 +233,20 @@ module Harvestdor
|
|
162
233
|
@harvestdor_client ||= Harvestdor::Client.new({:config_yml_path => @yml_path})
|
163
234
|
end
|
164
235
|
|
236
|
+
def elapsed_time(start_time,units=:seconds)
|
237
|
+
elapsed_seconds=Time.now-start_time
|
238
|
+
case units
|
239
|
+
when :seconds
|
240
|
+
return elapsed_seconds.round(2)
|
241
|
+
when :minutes
|
242
|
+
return (elapsed_seconds/60.0).round(1)
|
243
|
+
when :hours
|
244
|
+
return (elapsed_seconds/3600.0).round(2)
|
245
|
+
else
|
246
|
+
return elapsed_seconds
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
165
250
|
# populate @blacklist as an Array of druids ('oo000oo0000') that will NOT be processed
|
166
251
|
# by reading the File at the indicated path
|
167
252
|
# @param [String] path - path of file containing a list of druids
|
@@ -200,7 +285,7 @@ module Harvestdor
|
|
200
285
|
logger.fatal msg
|
201
286
|
raise msg
|
202
287
|
end
|
203
|
-
|
288
|
+
|
204
289
|
# Global, memoized, lazy initialized instance of a logger
|
205
290
|
# @param [String] log_dir directory for to get log file
|
206
291
|
# @param [String] log_name name of log file
|
@@ -13,13 +13,29 @@ describe Harvestdor::Indexer do
|
|
13
13
|
@whitelist_path = File.join(File.dirname(__FILE__), "../config/ap_whitelist.txt")
|
14
14
|
end
|
15
15
|
|
16
|
+
describe "access methods" do
|
17
|
+
it "initializes success count" do
|
18
|
+
@indexer.success_count.should == 0
|
19
|
+
end
|
20
|
+
it "initializes error count" do
|
21
|
+
@indexer.error_count.should == 0
|
22
|
+
end
|
23
|
+
it "initializes max_retries" do
|
24
|
+
@indexer.max_retries.should == 5
|
25
|
+
end
|
26
|
+
it "allows overriding of max_retries" do
|
27
|
+
@indexer.max_retries=6
|
28
|
+
@indexer.max_retries.should == 6
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
16
32
|
describe "logging" do
|
17
33
|
it "should write the log file to the directory indicated by log_dir" do
|
18
34
|
@indexer.logger.info("indexer_spec logging test message")
|
19
35
|
File.exists?(File.join(@yaml['log_dir'], @yaml['log_name'])).should == true
|
20
36
|
end
|
21
37
|
end
|
22
|
-
|
38
|
+
|
23
39
|
it "should initialize the harvestdor_client from the config" do
|
24
40
|
@hdor_client.should be_an_instance_of(Harvestdor::Client)
|
25
41
|
@hdor_client.config.default_set.should == @yaml['default_set']
|
@@ -32,7 +48,7 @@ describe Harvestdor::Indexer do
|
|
32
48
|
}
|
33
49
|
end
|
34
50
|
it "should call druids_via_oai and then call :add on rsolr connection" do
|
35
|
-
@
|
51
|
+
@indexer.should_receive(:druids).and_return([@fake_druid])
|
36
52
|
@indexer.solr_client.should_receive(:add).with(@doc_hash)
|
37
53
|
@indexer.solr_client.should_receive(:commit)
|
38
54
|
@indexer.harvest_and_index
|
@@ -76,7 +92,7 @@ describe Harvestdor::Indexer do
|
|
76
92
|
end
|
77
93
|
|
78
94
|
it "druids method should call druids_via_oai method on harvestdor_client" do
|
79
|
-
@hdor_client.should_receive(:druids_via_oai)
|
95
|
+
@hdor_client.should_receive(:druids_via_oai).and_return([@fake_druid])
|
80
96
|
@indexer.druids
|
81
97
|
end
|
82
98
|
|
@@ -129,16 +145,13 @@ describe Harvestdor::Indexer do
|
|
129
145
|
@hdor_client.should_receive(:public_xml).with(@fake_druid).and_return(Nokogiri::XML("<publicObject/>"))
|
130
146
|
expect { @indexer.public_xml(@fake_druid) }.to raise_error(RuntimeError, Regexp.new("^Empty public xml for #{@fake_druid}: <"))
|
131
147
|
end
|
132
|
-
it "raises Harvestdor::Errors::MissingPurlPage if there is no purl page for the druid" do
|
133
|
-
expect { @indexer.public_xml(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingPurlPage)
|
134
|
-
end
|
135
148
|
it "raises error if there is no public_xml page for the druid" do
|
136
149
|
@hdor_client.should_receive(:public_xml).with(@fake_druid).and_return(nil)
|
137
150
|
expect { @indexer.public_xml(@fake_druid) }.to raise_error(RuntimeError, "No public xml for #{@fake_druid}")
|
138
151
|
end
|
139
152
|
end
|
140
153
|
context "#content_metadata" do
|
141
|
-
it "returns a Nokogiri::XML::Document derived from the public xml" do
|
154
|
+
it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
|
142
155
|
Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(@ng_pub_xml)
|
143
156
|
cm = @indexer.content_metadata(@fake_druid)
|
144
157
|
cm.should be_kind_of(Nokogiri::XML::Document)
|
@@ -147,25 +160,23 @@ describe Harvestdor::Indexer do
|
|
147
160
|
cm.root.attributes['objectId'].text.should == @fake_druid
|
148
161
|
cm.root.text.strip.should == 'foo'
|
149
162
|
end
|
150
|
-
it "
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
163
|
+
it "if passed a Nokogiri::XML::Document of the public xml, it does no fetch" do
|
164
|
+
URI::HTTP.any_instance.should_not_receive(:open)
|
165
|
+
@hdor_client.should_receive(:content_metadata).and_call_original
|
166
|
+
cm = @indexer.content_metadata(@ng_pub_xml)
|
167
|
+
cm.should be_kind_of(Nokogiri::XML::Document)
|
168
|
+
cm.root.should_not == nil
|
169
|
+
cm.root.name.should == 'contentMetadata'
|
170
|
+
cm.root.attributes['objectId'].text.should == @fake_druid
|
171
|
+
cm.root.text.strip.should == 'foo'
|
157
172
|
end
|
158
173
|
it "raises RuntimeError if nil is returned by Harvestdor::Client.contentMetadata for the druid" do
|
159
174
|
@hdor_client.should_receive(:content_metadata).with(@fake_druid).and_return(nil)
|
160
|
-
expect { @indexer.content_metadata(@fake_druid) }.to raise_error(RuntimeError, "No contentMetadata for #{@fake_druid}")
|
161
|
-
end
|
162
|
-
it "raises MissingContentMetadata error if there is no contentMetadata in the public_xml for the druid" do
|
163
|
-
URI::HTTP.any_instance.should_receive(:open)
|
164
|
-
expect { @indexer.content_metadata(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingContentMetadata)
|
175
|
+
expect { @indexer.content_metadata(@fake_druid) }.to raise_error(RuntimeError, "No contentMetadata for \"#{@fake_druid}\"")
|
165
176
|
end
|
166
177
|
end
|
167
178
|
context "#identity_metadata" do
|
168
|
-
it "returns a Nokogiri::XML::Document derived from the public xml" do
|
179
|
+
it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
|
169
180
|
Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(@ng_pub_xml)
|
170
181
|
im = @indexer.identity_metadata(@fake_druid)
|
171
182
|
im.should be_kind_of(Nokogiri::XML::Document)
|
@@ -173,25 +184,22 @@ describe Harvestdor::Indexer do
|
|
173
184
|
im.root.name.should == 'identityMetadata'
|
174
185
|
im.root.text.strip.should == "druid:#{@fake_druid}"
|
175
186
|
end
|
176
|
-
it "
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
187
|
+
it "if passed a Nokogiri::XML::Document of the public xml, it does no fetch" do
|
188
|
+
URI::HTTP.any_instance.should_not_receive(:open)
|
189
|
+
@hdor_client.should_receive(:identity_metadata).and_call_original
|
190
|
+
im = @indexer.identity_metadata(@ng_pub_xml)
|
191
|
+
im.should be_kind_of(Nokogiri::XML::Document)
|
192
|
+
im.root.should_not == nil
|
193
|
+
im.root.name.should == 'identityMetadata'
|
194
|
+
im.root.text.strip.should == "druid:#{@fake_druid}"
|
183
195
|
end
|
184
196
|
it "raises RuntimeError if nil is returned by Harvestdor::Client.identityMetadata for the druid" do
|
185
197
|
@hdor_client.should_receive(:identity_metadata).with(@fake_druid).and_return(nil)
|
186
|
-
expect { @indexer.identity_metadata(@fake_druid) }.to raise_error(RuntimeError, "No identityMetadata for #{@fake_druid}")
|
187
|
-
end
|
188
|
-
it "raises MissingIdentityMetadata error if there is no identityMetadata in the public_xml for the druid" do
|
189
|
-
URI::HTTP.any_instance.should_receive(:open)
|
190
|
-
expect { @indexer.identity_metadata(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingIdentityMetadata)
|
198
|
+
expect { @indexer.identity_metadata(@fake_druid) }.to raise_error(RuntimeError, "No identityMetadata for \"#{@fake_druid}\"")
|
191
199
|
end
|
192
200
|
end
|
193
201
|
context "#rights_metadata" do
|
194
|
-
it "returns a Nokogiri::XML::Document derived from the public xml" do
|
202
|
+
it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
|
195
203
|
Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(@ng_pub_xml)
|
196
204
|
im = @indexer.rights_metadata(@fake_druid)
|
197
205
|
im.should be_kind_of(Nokogiri::XML::Document)
|
@@ -199,25 +207,13 @@ describe Harvestdor::Indexer do
|
|
199
207
|
im.root.name.should == 'rightsMetadata'
|
200
208
|
im.root.text.strip.should == "bar"
|
201
209
|
end
|
202
|
-
it "raises Harvestdor::Errors::MissingPurlPage if there is no purl page for the druid" do
|
203
|
-
expect { @indexer.rights_metadata(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingPurlPage)
|
204
|
-
end
|
205
|
-
it "should raise exception if there is no rightsMetadata in the public xml" do
|
206
|
-
pub_xml = "<publicObject id='druid:#{@fake_druid}'>#{@cntnt_md_xml}</publicObject>"
|
207
|
-
Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(Nokogiri::XML(pub_xml))
|
208
|
-
expect { @indexer.rights_metadata(@fake_druid) }.to raise_error(RuntimeError, "No rightsMetadata for #{@fake_druid}")
|
209
|
-
end
|
210
210
|
it "raises RuntimeError if nil is returned by Harvestdor::Client.rightsMetadata for the druid" do
|
211
211
|
@hdor_client.should_receive(:rights_metadata).with(@fake_druid).and_return(nil)
|
212
|
-
expect { @indexer.rights_metadata(@fake_druid) }.to raise_error(RuntimeError, "No rightsMetadata for #{@fake_druid}")
|
213
|
-
end
|
214
|
-
it "raises MissingRightsMetadata error if there is no rightsMetadata in the public_xml for the druid" do
|
215
|
-
URI::HTTP.any_instance.should_receive(:open)
|
216
|
-
expect { @indexer.rights_metadata(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingRightsMetadata)
|
212
|
+
expect { @indexer.rights_metadata(@fake_druid) }.to raise_error(RuntimeError, "No rightsMetadata for \"#{@fake_druid}\"")
|
217
213
|
end
|
218
214
|
end
|
219
215
|
context "#rdf" do
|
220
|
-
it "returns a Nokogiri::XML::Document derived from the public xml" do
|
216
|
+
it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
|
221
217
|
Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(@ng_pub_xml)
|
222
218
|
im = @indexer.rdf(@fake_druid)
|
223
219
|
im.should be_kind_of(Nokogiri::XML::Document)
|
@@ -225,23 +221,11 @@ describe Harvestdor::Indexer do
|
|
225
221
|
im.root.name.should == 'RDF'
|
226
222
|
im.root.text.strip.should == "relationship!"
|
227
223
|
end
|
228
|
-
it "raises Harvestdor::Errors::MissingPurlPage if there is no purl page for the druid" do
|
229
|
-
expect { @indexer.rdf(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingPurlPage)
|
230
|
-
end
|
231
|
-
it "should raise exception if there is no rdf in the public xml" do
|
232
|
-
pub_xml = "<publicObject id='druid:#{@fake_druid}'>#{@cntnt_md_xml}</publicObject>"
|
233
|
-
Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(Nokogiri::XML(pub_xml))
|
234
|
-
expect { @indexer.rdf(@fake_druid) }.to raise_error(RuntimeError, "No RDF for #{@fake_druid}")
|
235
|
-
end
|
236
224
|
it "raises RuntimeError if nil is returned by Harvestdor::Client.rdf for the druid" do
|
237
225
|
@hdor_client.should_receive(:rdf).with(@fake_druid).and_return(nil)
|
238
|
-
expect { @indexer.rdf(@fake_druid) }.to raise_error(RuntimeError, "No RDF for #{@fake_druid}")
|
239
|
-
end
|
240
|
-
it "raises MissingRDF error if there is no rdf in the public_xml for the druid" do
|
241
|
-
URI::HTTP.any_instance.should_receive(:open)
|
242
|
-
expect { @indexer.rdf(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingRDF)
|
226
|
+
expect { @indexer.rdf(@fake_druid) }.to raise_error(RuntimeError, "No RDF for \"#{@fake_druid}\"")
|
243
227
|
end
|
244
|
-
end
|
228
|
+
end
|
245
229
|
end
|
246
230
|
|
247
231
|
context "blacklist" do
|
metadata
CHANGED
@@ -1,20 +1,18 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: harvestdor-indexer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.0.10
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Naomi Dushay
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2013-
|
11
|
+
date: 2013-10-18 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: rsolr
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
17
|
- - ! '>='
|
20
18
|
- !ruby/object:Gem::Version
|
@@ -22,7 +20,6 @@ dependencies:
|
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
24
|
- - ! '>='
|
28
25
|
- !ruby/object:Gem::Version
|
@@ -30,7 +27,6 @@ dependencies:
|
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: harvestdor
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
31
|
- - ! '>='
|
36
32
|
- !ruby/object:Gem::Version
|
@@ -38,7 +34,6 @@ dependencies:
|
|
38
34
|
type: :runtime
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
38
|
- - ! '>='
|
44
39
|
- !ruby/object:Gem::Version
|
@@ -46,7 +41,6 @@ dependencies:
|
|
46
41
|
- !ruby/object:Gem::Dependency
|
47
42
|
name: stanford-mods
|
48
43
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
44
|
requirements:
|
51
45
|
- - ! '>='
|
52
46
|
- !ruby/object:Gem::Version
|
@@ -54,7 +48,20 @@ dependencies:
|
|
54
48
|
type: :runtime
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: confstruct
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
58
65
|
requirements:
|
59
66
|
- - ! '>='
|
60
67
|
- !ruby/object:Gem::Version
|
@@ -62,7 +69,6 @@ dependencies:
|
|
62
69
|
- !ruby/object:Gem::Dependency
|
63
70
|
name: lyberteam-gems-devel
|
64
71
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
72
|
requirements:
|
67
73
|
- - ! '>='
|
68
74
|
- !ruby/object:Gem::Version
|
@@ -70,7 +76,6 @@ dependencies:
|
|
70
76
|
type: :development
|
71
77
|
prerelease: false
|
72
78
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
79
|
requirements:
|
75
80
|
- - ! '>='
|
76
81
|
- !ruby/object:Gem::Version
|
@@ -78,7 +83,6 @@ dependencies:
|
|
78
83
|
- !ruby/object:Gem::Dependency
|
79
84
|
name: rake
|
80
85
|
requirement: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
86
|
requirements:
|
83
87
|
- - ! '>='
|
84
88
|
- !ruby/object:Gem::Version
|
@@ -86,7 +90,6 @@ dependencies:
|
|
86
90
|
type: :development
|
87
91
|
prerelease: false
|
88
92
|
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
93
|
requirements:
|
91
94
|
- - ! '>='
|
92
95
|
- !ruby/object:Gem::Version
|
@@ -94,7 +97,6 @@ dependencies:
|
|
94
97
|
- !ruby/object:Gem::Dependency
|
95
98
|
name: rdoc
|
96
99
|
requirement: !ruby/object:Gem::Requirement
|
97
|
-
none: false
|
98
100
|
requirements:
|
99
101
|
- - ! '>='
|
100
102
|
- !ruby/object:Gem::Version
|
@@ -102,7 +104,6 @@ dependencies:
|
|
102
104
|
type: :development
|
103
105
|
prerelease: false
|
104
106
|
version_requirements: !ruby/object:Gem::Requirement
|
105
|
-
none: false
|
106
107
|
requirements:
|
107
108
|
- - ! '>='
|
108
109
|
- !ruby/object:Gem::Version
|
@@ -110,7 +111,6 @@ dependencies:
|
|
110
111
|
- !ruby/object:Gem::Dependency
|
111
112
|
name: yard
|
112
113
|
requirement: !ruby/object:Gem::Requirement
|
113
|
-
none: false
|
114
114
|
requirements:
|
115
115
|
- - ! '>='
|
116
116
|
- !ruby/object:Gem::Version
|
@@ -118,7 +118,6 @@ dependencies:
|
|
118
118
|
type: :development
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
|
-
none: false
|
122
121
|
requirements:
|
123
122
|
- - ! '>='
|
124
123
|
- !ruby/object:Gem::Version
|
@@ -126,7 +125,6 @@ dependencies:
|
|
126
125
|
- !ruby/object:Gem::Dependency
|
127
126
|
name: rspec
|
128
127
|
requirement: !ruby/object:Gem::Requirement
|
129
|
-
none: false
|
130
128
|
requirements:
|
131
129
|
- - ! '>='
|
132
130
|
- !ruby/object:Gem::Version
|
@@ -134,7 +132,6 @@ dependencies:
|
|
134
132
|
type: :development
|
135
133
|
prerelease: false
|
136
134
|
version_requirements: !ruby/object:Gem::Requirement
|
137
|
-
none: false
|
138
135
|
requirements:
|
139
136
|
- - ! '>='
|
140
137
|
- !ruby/object:Gem::Version
|
@@ -142,7 +139,6 @@ dependencies:
|
|
142
139
|
- !ruby/object:Gem::Dependency
|
143
140
|
name: simplecov
|
144
141
|
requirement: !ruby/object:Gem::Requirement
|
145
|
-
none: false
|
146
142
|
requirements:
|
147
143
|
- - ! '>='
|
148
144
|
- !ruby/object:Gem::Version
|
@@ -150,7 +146,6 @@ dependencies:
|
|
150
146
|
type: :development
|
151
147
|
prerelease: false
|
152
148
|
version_requirements: !ruby/object:Gem::Requirement
|
153
|
-
none: false
|
154
149
|
requirements:
|
155
150
|
- - ! '>='
|
156
151
|
- !ruby/object:Gem::Version
|
@@ -158,7 +153,6 @@ dependencies:
|
|
158
153
|
- !ruby/object:Gem::Dependency
|
159
154
|
name: simplecov-rcov
|
160
155
|
requirement: !ruby/object:Gem::Requirement
|
161
|
-
none: false
|
162
156
|
requirements:
|
163
157
|
- - ! '>='
|
164
158
|
- !ruby/object:Gem::Version
|
@@ -166,7 +160,6 @@ dependencies:
|
|
166
160
|
type: :development
|
167
161
|
prerelease: false
|
168
162
|
version_requirements: !ruby/object:Gem::Requirement
|
169
|
-
none: false
|
170
163
|
requirements:
|
171
164
|
- - ! '>='
|
172
165
|
- !ruby/object:Gem::Version
|
@@ -196,33 +189,26 @@ files:
|
|
196
189
|
- spec/unit/harvestdor-indexer_spec.rb
|
197
190
|
homepage: https://consul.stanford.edu/display/chimera/Chimera+project
|
198
191
|
licenses: []
|
192
|
+
metadata: {}
|
199
193
|
post_install_message:
|
200
194
|
rdoc_options: []
|
201
195
|
require_paths:
|
202
196
|
- lib
|
203
197
|
required_ruby_version: !ruby/object:Gem::Requirement
|
204
|
-
none: false
|
205
198
|
requirements:
|
206
199
|
- - ! '>='
|
207
200
|
- !ruby/object:Gem::Version
|
208
201
|
version: '0'
|
209
|
-
segments:
|
210
|
-
- 0
|
211
|
-
hash: -2920299245033359379
|
212
202
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
213
|
-
none: false
|
214
203
|
requirements:
|
215
204
|
- - ! '>='
|
216
205
|
- !ruby/object:Gem::Version
|
217
206
|
version: '0'
|
218
|
-
segments:
|
219
|
-
- 0
|
220
|
-
hash: -2920299245033359379
|
221
207
|
requirements: []
|
222
208
|
rubyforge_project:
|
223
|
-
rubygems_version:
|
209
|
+
rubygems_version: 2.0.7
|
224
210
|
signing_key:
|
225
|
-
specification_version:
|
211
|
+
specification_version: 4
|
226
212
|
summary: Harvest DOR object metadata and index it to Solr
|
227
213
|
test_files:
|
228
214
|
- spec/config/ap.yml
|