harvestdor-indexer 0.0.3 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ OGYzNWY3ZWNkM2VlNzdhY2YyN2UxZGQ3NzM2MTQyOTRmYzRiZDVjMw==
5
+ data.tar.gz: !binary |-
6
+ YTE4YWFhMWEzOGQ0NDJhMDdlNGRlNGFiNjNiMjE1ZWJlNWMyNzQ2Yg==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ ODdlZDM2MTNkYmYzODNlZmVhY2I0YzJjNGRhMGE3MjMyYzNjNTZiYTJmNDMy
10
+ ZWI1ZTUyZjZmNWFlNTY3NGUzZGMzMzVlMDY0MTQxNjRiNzRlM2U0OGI3MzU4
11
+ NTZhNWUwMjE2MjNjMDJjODU4MTA3YzVjOGY5NzQzZGNkMGE1ZDM=
12
+ data.tar.gz: !binary |-
13
+ MTYyMzMzNDA2OTM5NWU4ZjdlNGQxYzc3ODU5MTBmNjI5MGM0OTM3NjNkZDdm
14
+ OWIwY2M1Yjk5OTEyOGIxNGFmYmEwMGNmZDhkZmQ4MmM5MWQxMDAzMmZhMTdl
15
+ YmNkODM1MDZmMTI4OThhYmFlYWFlMTc5MzEwYzBlMTIyNzg4Yzk=
@@ -108,6 +108,10 @@ I suggest you run your code on harvestdor-dev, as it is already set up to be abl
108
108
 
109
109
  == Releases
110
110
 
111
+ * <b>0.0.7</b> adding additional logging of error, success counts, and time to index and harvest
112
+ * <b>0.0.6</b> tweak error handling for public xml pieces
113
+ * <b>0.0.5</b> make rake release a no-op
114
+ * <b>0.0.4</b> add confstruct runtime dependency
111
115
  * <b>0.0.3</b> add methods for public_xml, content_metadata, identity_metadata ...
112
116
  * <b>0.0.2</b> better model code for index method (thanks, Bess!)
113
117
  * <b>0.0.1</b> initial commit
data/Rakefile CHANGED
@@ -19,7 +19,8 @@ rescue Bundler::BundlerError => e
19
19
  end
20
20
 
21
21
  desc "DO NOT USE! use dlss_release"
22
- task :release
22
+ task :release do
23
+ end
23
24
 
24
25
  task :default => :ci
25
26
 
@@ -24,7 +24,7 @@ Gem::Specification.new do |gem|
24
24
  gem.add_dependency 'stanford-mods'
25
25
 
26
26
  # Runtime dependencies
27
- # gem.add_runtime_dependency 'nokogiri'
27
+ gem.add_runtime_dependency 'confstruct'
28
28
 
29
29
  # Development dependencies
30
30
  # Bundler will install these gems too if you've checked out solrmarc-wrapper source from git and run 'bundle install'
@@ -15,7 +15,15 @@ module Harvestdor
15
15
  # Base class to harvest from DOR via harvestdor gem and then index
16
16
  class Indexer
17
17
 
18
+ attr_accessor :error_count, :success_count, :max_retries
19
+ attr_accessor :total_time_to_parse,:total_time_to_solr
20
+
18
21
  def initialize yml_path, options = {}
22
+ @success_count=0 # the number of objects successfully indexed
23
+ @error_count=0 # the number of objects that failed
24
+ @max_retries=5 # the number of times to retry an object
25
+ @total_time_to_solr=0
26
+ @total_time_to_parse=0
19
27
  @yml_path = yml_path
20
28
  config.configure(YAML.load_file(yml_path)) if yml_path
21
29
  config.configure options
@@ -35,19 +43,62 @@ module Harvestdor
35
43
  # create a Solr profiling document for each druid
36
44
  # write the result to the Solr index
37
45
  def harvest_and_index
46
+ start_time=Time.now
47
+ logger.info("Started harvest_and_index at #{start_time}")
38
48
  if whitelist.empty?
39
49
  druids.each { |druid| index druid }
40
50
  else
41
51
  whitelist.each { |druid| index druid }
42
52
  end
43
53
  solr_client.commit
44
- logger.info("Finished processing: final Solr commit returned.")
54
+ total_time=elapsed_time(start_time)
55
+ total_objects=@success_count+@error_count
56
+ logger.info("Finished harvest_and_index at #{Time.now}: final Solr commit returned")
57
+ logger.info("Total elapsed time for harvest and index: #{(total_time/60.0)} minutes")
58
+ logger.info("Avg solr commit time per object (successful): #{@total_time_to_solr/@success_count} seconds") unless (@total_time_to_solr == 0 || @success_count == 0)
59
+ logger.info("Avg solr commit time per object (all): #{@total_time_to_solr/total_objects} seconds") unless (@total_time_to_solr == 0 || @error_count == 0 || total_objects == 0)
60
+ logger.info("Avg parse time per object (successful): #{@total_time_to_parse/@success_count} seconds") unless (@total_time_to_parse == 0 || @success_count == 0)
61
+ logger.info("Avg parse time per object (all): #{@total_time_to_parse/total_objects} seconds") unless (@total_time_to_parse == 0 || @error_count == 0 || total_objects == 0)
62
+ logger.info("Avg complete index time per object (successful): #{total_time/@success_count} seconds") unless (@success_count == 0)
63
+ logger.info("Avg complete index time per object (all): #{total_time/total_objects} seconds") unless (@error_count == 0 || total_object == 0)
64
+ logger.info("Successful count: #{@success_count}")
65
+ logger.info("Error count: #{@error_count}")
66
+ logger.info("Total records processed: #{total_objects}")
45
67
  end
46
68
 
47
69
  # return Array of druids contained in the OAI harvest indicated by OAI params in yml configuration file
48
70
  # @return [Array<String>] or enumeration over it, if block is given. (strings are druids, e.g. ab123cd1234)
49
71
  def druids
50
- @druids ||= harvestdor_client.druids_via_oai
72
+ if @druids.nil?
73
+ start_time=Time.now
74
+ logger.info("Starting OAI harvest of druids at #{start_time}.")
75
+ @druids = harvestdor_client.druids_via_oai
76
+ logger.info("Completed OAI harves of druids at #{Time.now}. Found #{@druids.size} druids. Total elapsed time for OAI harvest = #{elapsed_time(start_time,:minutes)} minutes")
77
+ end
78
+ return @druids
79
+ end
80
+
81
+ #add the document to solr, retry if an error occurs
82
+ def solr_add(doc, id, do_retry=true)
83
+ #if do_retry is false, skip retrying
84
+ tries=do_retry ? 0 : 999
85
+ while tries < @max_retries
86
+ begin
87
+ tries+=1
88
+ solr_client.add(doc)
89
+ #return if successful
90
+ return
91
+ rescue => e
92
+ if tries<@max_retries
93
+ logger.warn "#{id}: #{e.message}, retrying"
94
+ else
95
+ @error_count+=1
96
+ logger.error "Failed saving #{id}: #{e.message}"
97
+ logger.error e.backtrace
98
+ return
99
+ end
100
+ end
101
+ end
51
102
  end
52
103
 
53
104
  # create Solr doc for the druid and add it to Solr, unless it is on the blacklist.
@@ -59,6 +110,8 @@ module Harvestdor
59
110
  logger.fatal("You must override the index method to transform druids into Solr docs and add them to Solr")
60
111
 
61
112
  begin
113
+ start_time=Time.now
114
+ logger.info("About to index #{druid} at #{start_time}")
62
115
  #logger.debug "About to index #{druid}"
63
116
  doc_hash = {}
64
117
  doc_hash[:id] = druid
@@ -69,10 +122,12 @@ module Harvestdor
69
122
 
70
123
  solr_client.add(doc_hash)
71
124
 
72
- # logger.debug("Just created Solr doc for #{druid}")
125
+ logger.info("Indexed #{druid} in #{elapsed_time(start_time)} seconds")
126
+ @success_count+=1
73
127
  # TODO: provide call to code to update DOR object's workflow datastream??
74
128
  rescue => e
75
- logger.error "Failed to index #{druid}: #{e.message}"
129
+ @error_count+=1
130
+ logger.error "Failed to index #{druid} in #{elapsed_time(start_time)} seconds: #{e.message}"
76
131
  end
77
132
  end
78
133
  end
@@ -81,7 +136,9 @@ module Harvestdor
81
136
  # @param [String] druid e.g. ab123cd4567
82
137
  # @return [Stanford::Mods::Record] created from the MODS xml for the druid
83
138
  def smods_rec druid
139
+ start_time=Time.now
84
140
  ng_doc = harvestdor_client.mods druid
141
+ logger.info("Fetched MODs for #{druid} in #{elapsed_time(start_time)} seconds")
85
142
  raise "Empty MODS metadata for #{druid}: #{ng_doc.to_xml}" if ng_doc.root.xpath('//text()').empty?
86
143
  mods_rec = Stanford::Mods::Record.new
87
144
  mods_rec.from_nk_node(ng_doc.root)
@@ -92,45 +149,59 @@ module Harvestdor
92
149
  # @param [String] druid e.g. ab123cd4567
93
150
  # @return [Nokogiri::XML::Document] the public xml for the DOR object
94
151
  def public_xml druid
152
+ start_time=Time.now
95
153
  ng_doc = harvestdor_client.public_xml druid
154
+ logger.info("Fetched public_xml for #{druid} in #{elapsed_time(start_time)} seconds")
96
155
  raise "No public xml for #{druid}" if !ng_doc
97
156
  raise "Empty public xml for #{druid}: #{ng_doc.to_xml}" if ng_doc.root.xpath('//text()').empty?
98
157
  ng_doc
99
158
  end
100
-
101
- # the contentMetadata for this DOR object, from the purl public xml
102
- # @param [String] druid e.g. ab123cd4567
159
+
160
+ # the contentMetadata for this DOR object, ultimately from the purl public xml
161
+ # @param [Object] object a String containing a druid (e.g. ab123cd4567), or
162
+ # a Nokogiri::XML::Document containing the public_xml for an object
103
163
  # @return [Nokogiri::XML::Document] the contentMetadata for the DOR object
104
- def content_metadata druid
105
- ng_doc = harvestdor_client.content_metadata druid
106
- raise "No contentMetadata for #{druid}" if !ng_doc || !ng_doc.root
164
+ def content_metadata object
165
+ start_time=Time.now
166
+ ng_doc = harvestdor_client.content_metadata object
167
+ logger.info("Fetched content_metadata in #{elapsed_time(start_time)} seconds")
168
+ raise "No contentMetadata for #{object.inspect}" if !ng_doc || ng_doc.children.empty?
107
169
  ng_doc
108
170
  end
109
-
110
- # the identityMetadata for this DOR object, from the purl public xml
111
- # @param [String] druid e.g. ab123cd4567
171
+
172
+ # the identityMetadata for this DOR object, ultimately from the purl public xml
173
+ # @param [Object] object a String containing a druid (e.g. ab123cd4567), or
174
+ # a Nokogiri::XML::Document containing the public_xml for an object
112
175
  # @return [Nokogiri::XML::Document] the identityMetadata for the DOR object
113
- def identity_metadata druid
114
- ng_doc = harvestdor_client.identity_metadata druid
115
- raise "No identityMetadata for #{druid}" if !ng_doc || !ng_doc.root
176
+ def identity_metadata object
177
+ start_time=Time.now
178
+ ng_doc = harvestdor_client.identity_metadata object
179
+ logger.info("Fetched identity_metadata in #{elapsed_time(start_time)} seconds")
180
+ raise "No identityMetadata for #{object.inspect}" if !ng_doc || ng_doc.children.empty?
116
181
  ng_doc
117
182
  end
118
183
 
119
- # the rightsMetadata for this DOR object, from the purl public xml
120
- # @param [String] druid e.g. ab123cd4567
184
+ # the rightsMetadata for this DOR object, ultimately from the purl public xml
185
+ # @param [Object] object a String containing a druid (e.g. ab123cd4567), or
186
+ # a Nokogiri::XML::Document containing the public_xml for an object
121
187
  # @return [Nokogiri::XML::Document] the rightsMetadata for the DOR object
122
- def rights_metadata druid
123
- ng_doc = harvestdor_client.rights_metadata druid
124
- raise "No rightsMetadata for #{druid}" if !ng_doc || !ng_doc.root
188
+ def rights_metadata object
189
+ start_time=Time.now
190
+ ng_doc = harvestdor_client.rights_metadata object
191
+ logger.info("Fetched rights_metadata in #{elapsed_time(start_time)} seconds")
192
+ raise "No rightsMetadata for #{object.inspect}" if !ng_doc || ng_doc.children.empty?
125
193
  ng_doc
126
194
  end
127
195
 
128
- # the RDF for this DOR object, from the purl public xml
129
- # @param [String] druid e.g. ab123cd4567
196
+ # the RDF for this DOR object, ultimately from the purl public xml
197
+ # @param [Object] object a String containing a druid (e.g. ab123cd4567), or
198
+ # a Nokogiri::XML::Document containing the public_xml for an object
130
199
  # @return [Nokogiri::XML::Document] the RDF for the DOR object
131
- def rdf druid
132
- ng_doc = harvestdor_client.rdf druid
133
- raise "No RDF for #{druid}" if !ng_doc || !ng_doc.root
200
+ def rdf object
201
+ start_time=Time.now
202
+ ng_doc = harvestdor_client.rdf object
203
+ logger.info("Fetched rdf in #{elapsed_time(start_time)} seconds")
204
+ raise "No RDF for #{object.inspect}" if !ng_doc || ng_doc.children.empty?
134
205
  ng_doc
135
206
  end
136
207
 
@@ -162,6 +233,20 @@ module Harvestdor
162
233
  @harvestdor_client ||= Harvestdor::Client.new({:config_yml_path => @yml_path})
163
234
  end
164
235
 
236
+ def elapsed_time(start_time,units=:seconds)
237
+ elapsed_seconds=Time.now-start_time
238
+ case units
239
+ when :seconds
240
+ return elapsed_seconds.round(2)
241
+ when :minutes
242
+ return (elapsed_seconds/60.0).round(1)
243
+ when :hours
244
+ return (elapsed_seconds/3600.0).round(2)
245
+ else
246
+ return elapsed_seconds
247
+ end
248
+ end
249
+
165
250
  # populate @blacklist as an Array of druids ('oo000oo0000') that will NOT be processed
166
251
  # by reading the File at the indicated path
167
252
  # @param [String] path - path of file containing a list of druids
@@ -200,7 +285,7 @@ module Harvestdor
200
285
  logger.fatal msg
201
286
  raise msg
202
287
  end
203
-
288
+
204
289
  # Global, memoized, lazy initialized instance of a logger
205
290
  # @param [String] log_dir directory for to get log file
206
291
  # @param [String] log_name name of log file
@@ -1,6 +1,6 @@
1
1
  module Harvestdor
2
2
  class Indexer
3
3
  # this is the Ruby Gem version
4
- VERSION = "0.0.3"
4
+ VERSION = "0.0.10"
5
5
  end
6
6
  end
@@ -13,13 +13,29 @@ describe Harvestdor::Indexer do
13
13
  @whitelist_path = File.join(File.dirname(__FILE__), "../config/ap_whitelist.txt")
14
14
  end
15
15
 
16
+ describe "access methods" do
17
+ it "initializes success count" do
18
+ @indexer.success_count.should == 0
19
+ end
20
+ it "initializes error count" do
21
+ @indexer.error_count.should == 0
22
+ end
23
+ it "initializes max_retries" do
24
+ @indexer.max_retries.should == 5
25
+ end
26
+ it "allows overriding of max_retries" do
27
+ @indexer.max_retries=6
28
+ @indexer.max_retries.should == 6
29
+ end
30
+ end
31
+
16
32
  describe "logging" do
17
33
  it "should write the log file to the directory indicated by log_dir" do
18
34
  @indexer.logger.info("indexer_spec logging test message")
19
35
  File.exists?(File.join(@yaml['log_dir'], @yaml['log_name'])).should == true
20
36
  end
21
37
  end
22
-
38
+
23
39
  it "should initialize the harvestdor_client from the config" do
24
40
  @hdor_client.should be_an_instance_of(Harvestdor::Client)
25
41
  @hdor_client.config.default_set.should == @yaml['default_set']
@@ -32,7 +48,7 @@ describe Harvestdor::Indexer do
32
48
  }
33
49
  end
34
50
  it "should call druids_via_oai and then call :add on rsolr connection" do
35
- @hdor_client.should_receive(:druids_via_oai).and_return([@fake_druid])
51
+ @indexer.should_receive(:druids).and_return([@fake_druid])
36
52
  @indexer.solr_client.should_receive(:add).with(@doc_hash)
37
53
  @indexer.solr_client.should_receive(:commit)
38
54
  @indexer.harvest_and_index
@@ -76,7 +92,7 @@ describe Harvestdor::Indexer do
76
92
  end
77
93
 
78
94
  it "druids method should call druids_via_oai method on harvestdor_client" do
79
- @hdor_client.should_receive(:druids_via_oai)
95
+ @hdor_client.should_receive(:druids_via_oai).and_return([@fake_druid])
80
96
  @indexer.druids
81
97
  end
82
98
 
@@ -129,16 +145,13 @@ describe Harvestdor::Indexer do
129
145
  @hdor_client.should_receive(:public_xml).with(@fake_druid).and_return(Nokogiri::XML("<publicObject/>"))
130
146
  expect { @indexer.public_xml(@fake_druid) }.to raise_error(RuntimeError, Regexp.new("^Empty public xml for #{@fake_druid}: <"))
131
147
  end
132
- it "raises Harvestdor::Errors::MissingPurlPage if there is no purl page for the druid" do
133
- expect { @indexer.public_xml(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingPurlPage)
134
- end
135
148
  it "raises error if there is no public_xml page for the druid" do
136
149
  @hdor_client.should_receive(:public_xml).with(@fake_druid).and_return(nil)
137
150
  expect { @indexer.public_xml(@fake_druid) }.to raise_error(RuntimeError, "No public xml for #{@fake_druid}")
138
151
  end
139
152
  end
140
153
  context "#content_metadata" do
141
- it "returns a Nokogiri::XML::Document derived from the public xml" do
154
+ it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
142
155
  Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(@ng_pub_xml)
143
156
  cm = @indexer.content_metadata(@fake_druid)
144
157
  cm.should be_kind_of(Nokogiri::XML::Document)
@@ -147,25 +160,23 @@ describe Harvestdor::Indexer do
147
160
  cm.root.attributes['objectId'].text.should == @fake_druid
148
161
  cm.root.text.strip.should == 'foo'
149
162
  end
150
- it "raises Harvestdor::Errors::MissingPurlPage if there is no purl page for the druid" do
151
- expect { @indexer.content_metadata(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingPurlPage)
152
- end
153
- it "should raise exception if there is no contentMetadata in the public xml" do
154
- pub_xml = "<publicObject id='druid:#{@fake_druid}'>#{@id_md_xml}</publicObject>"
155
- Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(Nokogiri::XML(pub_xml))
156
- expect { @indexer.content_metadata(@fake_druid) }.to raise_error(RuntimeError, "No contentMetadata for #{@fake_druid}")
163
+ it "if passed a Nokogiri::XML::Document of the public xml, it does no fetch" do
164
+ URI::HTTP.any_instance.should_not_receive(:open)
165
+ @hdor_client.should_receive(:content_metadata).and_call_original
166
+ cm = @indexer.content_metadata(@ng_pub_xml)
167
+ cm.should be_kind_of(Nokogiri::XML::Document)
168
+ cm.root.should_not == nil
169
+ cm.root.name.should == 'contentMetadata'
170
+ cm.root.attributes['objectId'].text.should == @fake_druid
171
+ cm.root.text.strip.should == 'foo'
157
172
  end
158
173
  it "raises RuntimeError if nil is returned by Harvestdor::Client.contentMetadata for the druid" do
159
174
  @hdor_client.should_receive(:content_metadata).with(@fake_druid).and_return(nil)
160
- expect { @indexer.content_metadata(@fake_druid) }.to raise_error(RuntimeError, "No contentMetadata for #{@fake_druid}")
161
- end
162
- it "raises MissingContentMetadata error if there is no contentMetadata in the public_xml for the druid" do
163
- URI::HTTP.any_instance.should_receive(:open)
164
- expect { @indexer.content_metadata(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingContentMetadata)
175
+ expect { @indexer.content_metadata(@fake_druid) }.to raise_error(RuntimeError, "No contentMetadata for \"#{@fake_druid}\"")
165
176
  end
166
177
  end
167
178
  context "#identity_metadata" do
168
- it "returns a Nokogiri::XML::Document derived from the public xml" do
179
+ it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
169
180
  Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(@ng_pub_xml)
170
181
  im = @indexer.identity_metadata(@fake_druid)
171
182
  im.should be_kind_of(Nokogiri::XML::Document)
@@ -173,25 +184,22 @@ describe Harvestdor::Indexer do
173
184
  im.root.name.should == 'identityMetadata'
174
185
  im.root.text.strip.should == "druid:#{@fake_druid}"
175
186
  end
176
- it "raises Harvestdor::Errors::MissingPurlPage if there is no purl page for the druid" do
177
- expect { @indexer.identity_metadata(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingPurlPage)
178
- end
179
- it "should raise exception if there is no identityMetadata in the public xml" do
180
- pub_xml = "<publicObject id='druid:#{@fake_druid}'>#{@cntnt_md_xml}</publicObject>"
181
- Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(Nokogiri::XML(pub_xml))
182
- expect { @indexer.identity_metadata(@fake_druid) }.to raise_error(RuntimeError, "No identityMetadata for #{@fake_druid}")
187
+ it "if passed a Nokogiri::XML::Document of the public xml, it does no fetch" do
188
+ URI::HTTP.any_instance.should_not_receive(:open)
189
+ @hdor_client.should_receive(:identity_metadata).and_call_original
190
+ im = @indexer.identity_metadata(@ng_pub_xml)
191
+ im.should be_kind_of(Nokogiri::XML::Document)
192
+ im.root.should_not == nil
193
+ im.root.name.should == 'identityMetadata'
194
+ im.root.text.strip.should == "druid:#{@fake_druid}"
183
195
  end
184
196
  it "raises RuntimeError if nil is returned by Harvestdor::Client.identityMetadata for the druid" do
185
197
  @hdor_client.should_receive(:identity_metadata).with(@fake_druid).and_return(nil)
186
- expect { @indexer.identity_metadata(@fake_druid) }.to raise_error(RuntimeError, "No identityMetadata for #{@fake_druid}")
187
- end
188
- it "raises MissingIdentityMetadata error if there is no identityMetadata in the public_xml for the druid" do
189
- URI::HTTP.any_instance.should_receive(:open)
190
- expect { @indexer.identity_metadata(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingIdentityMetadata)
198
+ expect { @indexer.identity_metadata(@fake_druid) }.to raise_error(RuntimeError, "No identityMetadata for \"#{@fake_druid}\"")
191
199
  end
192
200
  end
193
201
  context "#rights_metadata" do
194
- it "returns a Nokogiri::XML::Document derived from the public xml" do
202
+ it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
195
203
  Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(@ng_pub_xml)
196
204
  im = @indexer.rights_metadata(@fake_druid)
197
205
  im.should be_kind_of(Nokogiri::XML::Document)
@@ -199,25 +207,13 @@ describe Harvestdor::Indexer do
199
207
  im.root.name.should == 'rightsMetadata'
200
208
  im.root.text.strip.should == "bar"
201
209
  end
202
- it "raises Harvestdor::Errors::MissingPurlPage if there is no purl page for the druid" do
203
- expect { @indexer.rights_metadata(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingPurlPage)
204
- end
205
- it "should raise exception if there is no rightsMetadata in the public xml" do
206
- pub_xml = "<publicObject id='druid:#{@fake_druid}'>#{@cntnt_md_xml}</publicObject>"
207
- Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(Nokogiri::XML(pub_xml))
208
- expect { @indexer.rights_metadata(@fake_druid) }.to raise_error(RuntimeError, "No rightsMetadata for #{@fake_druid}")
209
- end
210
210
  it "raises RuntimeError if nil is returned by Harvestdor::Client.rightsMetadata for the druid" do
211
211
  @hdor_client.should_receive(:rights_metadata).with(@fake_druid).and_return(nil)
212
- expect { @indexer.rights_metadata(@fake_druid) }.to raise_error(RuntimeError, "No rightsMetadata for #{@fake_druid}")
213
- end
214
- it "raises MissingRightsMetadata error if there is no rightsMetadata in the public_xml for the druid" do
215
- URI::HTTP.any_instance.should_receive(:open)
216
- expect { @indexer.rights_metadata(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingRightsMetadata)
212
+ expect { @indexer.rights_metadata(@fake_druid) }.to raise_error(RuntimeError, "No rightsMetadata for \"#{@fake_druid}\"")
217
213
  end
218
214
  end
219
215
  context "#rdf" do
220
- it "returns a Nokogiri::XML::Document derived from the public xml" do
216
+ it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
221
217
  Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(@ng_pub_xml)
222
218
  im = @indexer.rdf(@fake_druid)
223
219
  im.should be_kind_of(Nokogiri::XML::Document)
@@ -225,23 +221,11 @@ describe Harvestdor::Indexer do
225
221
  im.root.name.should == 'RDF'
226
222
  im.root.text.strip.should == "relationship!"
227
223
  end
228
- it "raises Harvestdor::Errors::MissingPurlPage if there is no purl page for the druid" do
229
- expect { @indexer.rdf(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingPurlPage)
230
- end
231
- it "should raise exception if there is no rdf in the public xml" do
232
- pub_xml = "<publicObject id='druid:#{@fake_druid}'>#{@cntnt_md_xml}</publicObject>"
233
- Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(Nokogiri::XML(pub_xml))
234
- expect { @indexer.rdf(@fake_druid) }.to raise_error(RuntimeError, "No RDF for #{@fake_druid}")
235
- end
236
224
  it "raises RuntimeError if nil is returned by Harvestdor::Client.rdf for the druid" do
237
225
  @hdor_client.should_receive(:rdf).with(@fake_druid).and_return(nil)
238
- expect { @indexer.rdf(@fake_druid) }.to raise_error(RuntimeError, "No RDF for #{@fake_druid}")
239
- end
240
- it "raises MissingRDF error if there is no rdf in the public_xml for the druid" do
241
- URI::HTTP.any_instance.should_receive(:open)
242
- expect { @indexer.rdf(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingRDF)
226
+ expect { @indexer.rdf(@fake_druid) }.to raise_error(RuntimeError, "No RDF for \"#{@fake_druid}\"")
243
227
  end
244
- end
228
+ end
245
229
  end
246
230
 
247
231
  context "blacklist" do
metadata CHANGED
@@ -1,20 +1,18 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: harvestdor-indexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
5
- prerelease:
4
+ version: 0.0.10
6
5
  platform: ruby
7
6
  authors:
8
7
  - Naomi Dushay
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-03-08 00:00:00.000000000 Z
11
+ date: 2013-10-18 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: rsolr
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
17
  - - ! '>='
20
18
  - !ruby/object:Gem::Version
@@ -22,7 +20,6 @@ dependencies:
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
24
  - - ! '>='
28
25
  - !ruby/object:Gem::Version
@@ -30,7 +27,6 @@ dependencies:
30
27
  - !ruby/object:Gem::Dependency
31
28
  name: harvestdor
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
31
  - - ! '>='
36
32
  - !ruby/object:Gem::Version
@@ -38,7 +34,6 @@ dependencies:
38
34
  type: :runtime
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
38
  - - ! '>='
44
39
  - !ruby/object:Gem::Version
@@ -46,7 +41,6 @@ dependencies:
46
41
  - !ruby/object:Gem::Dependency
47
42
  name: stanford-mods
48
43
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
44
  requirements:
51
45
  - - ! '>='
52
46
  - !ruby/object:Gem::Version
@@ -54,7 +48,20 @@ dependencies:
54
48
  type: :runtime
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: confstruct
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
58
65
  requirements:
59
66
  - - ! '>='
60
67
  - !ruby/object:Gem::Version
@@ -62,7 +69,6 @@ dependencies:
62
69
  - !ruby/object:Gem::Dependency
63
70
  name: lyberteam-gems-devel
64
71
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
72
  requirements:
67
73
  - - ! '>='
68
74
  - !ruby/object:Gem::Version
@@ -70,7 +76,6 @@ dependencies:
70
76
  type: :development
71
77
  prerelease: false
72
78
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
79
  requirements:
75
80
  - - ! '>='
76
81
  - !ruby/object:Gem::Version
@@ -78,7 +83,6 @@ dependencies:
78
83
  - !ruby/object:Gem::Dependency
79
84
  name: rake
80
85
  requirement: !ruby/object:Gem::Requirement
81
- none: false
82
86
  requirements:
83
87
  - - ! '>='
84
88
  - !ruby/object:Gem::Version
@@ -86,7 +90,6 @@ dependencies:
86
90
  type: :development
87
91
  prerelease: false
88
92
  version_requirements: !ruby/object:Gem::Requirement
89
- none: false
90
93
  requirements:
91
94
  - - ! '>='
92
95
  - !ruby/object:Gem::Version
@@ -94,7 +97,6 @@ dependencies:
94
97
  - !ruby/object:Gem::Dependency
95
98
  name: rdoc
96
99
  requirement: !ruby/object:Gem::Requirement
97
- none: false
98
100
  requirements:
99
101
  - - ! '>='
100
102
  - !ruby/object:Gem::Version
@@ -102,7 +104,6 @@ dependencies:
102
104
  type: :development
103
105
  prerelease: false
104
106
  version_requirements: !ruby/object:Gem::Requirement
105
- none: false
106
107
  requirements:
107
108
  - - ! '>='
108
109
  - !ruby/object:Gem::Version
@@ -110,7 +111,6 @@ dependencies:
110
111
  - !ruby/object:Gem::Dependency
111
112
  name: yard
112
113
  requirement: !ruby/object:Gem::Requirement
113
- none: false
114
114
  requirements:
115
115
  - - ! '>='
116
116
  - !ruby/object:Gem::Version
@@ -118,7 +118,6 @@ dependencies:
118
118
  type: :development
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
- none: false
122
121
  requirements:
123
122
  - - ! '>='
124
123
  - !ruby/object:Gem::Version
@@ -126,7 +125,6 @@ dependencies:
126
125
  - !ruby/object:Gem::Dependency
127
126
  name: rspec
128
127
  requirement: !ruby/object:Gem::Requirement
129
- none: false
130
128
  requirements:
131
129
  - - ! '>='
132
130
  - !ruby/object:Gem::Version
@@ -134,7 +132,6 @@ dependencies:
134
132
  type: :development
135
133
  prerelease: false
136
134
  version_requirements: !ruby/object:Gem::Requirement
137
- none: false
138
135
  requirements:
139
136
  - - ! '>='
140
137
  - !ruby/object:Gem::Version
@@ -142,7 +139,6 @@ dependencies:
142
139
  - !ruby/object:Gem::Dependency
143
140
  name: simplecov
144
141
  requirement: !ruby/object:Gem::Requirement
145
- none: false
146
142
  requirements:
147
143
  - - ! '>='
148
144
  - !ruby/object:Gem::Version
@@ -150,7 +146,6 @@ dependencies:
150
146
  type: :development
151
147
  prerelease: false
152
148
  version_requirements: !ruby/object:Gem::Requirement
153
- none: false
154
149
  requirements:
155
150
  - - ! '>='
156
151
  - !ruby/object:Gem::Version
@@ -158,7 +153,6 @@ dependencies:
158
153
  - !ruby/object:Gem::Dependency
159
154
  name: simplecov-rcov
160
155
  requirement: !ruby/object:Gem::Requirement
161
- none: false
162
156
  requirements:
163
157
  - - ! '>='
164
158
  - !ruby/object:Gem::Version
@@ -166,7 +160,6 @@ dependencies:
166
160
  type: :development
167
161
  prerelease: false
168
162
  version_requirements: !ruby/object:Gem::Requirement
169
- none: false
170
163
  requirements:
171
164
  - - ! '>='
172
165
  - !ruby/object:Gem::Version
@@ -196,33 +189,26 @@ files:
196
189
  - spec/unit/harvestdor-indexer_spec.rb
197
190
  homepage: https://consul.stanford.edu/display/chimera/Chimera+project
198
191
  licenses: []
192
+ metadata: {}
199
193
  post_install_message:
200
194
  rdoc_options: []
201
195
  require_paths:
202
196
  - lib
203
197
  required_ruby_version: !ruby/object:Gem::Requirement
204
- none: false
205
198
  requirements:
206
199
  - - ! '>='
207
200
  - !ruby/object:Gem::Version
208
201
  version: '0'
209
- segments:
210
- - 0
211
- hash: -2920299245033359379
212
202
  required_rubygems_version: !ruby/object:Gem::Requirement
213
- none: false
214
203
  requirements:
215
204
  - - ! '>='
216
205
  - !ruby/object:Gem::Version
217
206
  version: '0'
218
- segments:
219
- - 0
220
- hash: -2920299245033359379
221
207
  requirements: []
222
208
  rubyforge_project:
223
- rubygems_version: 1.8.24
209
+ rubygems_version: 2.0.7
224
210
  signing_key:
225
- specification_version: 3
211
+ specification_version: 4
226
212
  summary: Harvest DOR object metadata and index it to Solr
227
213
  test_files:
228
214
  - spec/config/ap.yml