harvestdor-indexer 0.0.3 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ OGYzNWY3ZWNkM2VlNzdhY2YyN2UxZGQ3NzM2MTQyOTRmYzRiZDVjMw==
5
+ data.tar.gz: !binary |-
6
+ YTE4YWFhMWEzOGQ0NDJhMDdlNGRlNGFiNjNiMjE1ZWJlNWMyNzQ2Yg==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ ODdlZDM2MTNkYmYzODNlZmVhY2I0YzJjNGRhMGE3MjMyYzNjNTZiYTJmNDMy
10
+ ZWI1ZTUyZjZmNWFlNTY3NGUzZGMzMzVlMDY0MTQxNjRiNzRlM2U0OGI3MzU4
11
+ NTZhNWUwMjE2MjNjMDJjODU4MTA3YzVjOGY5NzQzZGNkMGE1ZDM=
12
+ data.tar.gz: !binary |-
13
+ MTYyMzMzNDA2OTM5NWU4ZjdlNGQxYzc3ODU5MTBmNjI5MGM0OTM3NjNkZDdm
14
+ OWIwY2M1Yjk5OTEyOGIxNGFmYmEwMGNmZDhkZmQ4MmM5MWQxMDAzMmZhMTdl
15
+ YmNkODM1MDZmMTI4OThhYmFlYWFlMTc5MzEwYzBlMTIyNzg4Yzk=
@@ -108,6 +108,10 @@ I suggest you run your code on harvestdor-dev, as it is already set up to be abl
108
108
 
109
109
  == Releases
110
110
 
111
+ * <b>0.0.7</b> adding additional logging of error, success counts, and time to index and harvest
112
+ * <b>0.0.6</b> tweak error handling for public xml pieces
113
+ * <b>0.0.5</b> make rake release a no-op
114
+ * <b>0.0.4</b> add confstruct runtime dependency
111
115
  * <b>0.0.3</b> add methods for public_xml, content_metadata, identity_metadata ...
112
116
  * <b>0.0.2</b> better model code for index method (thanks, Bess!)
113
117
  * <b>0.0.1</b> initial commit
data/Rakefile CHANGED
@@ -19,7 +19,8 @@ rescue Bundler::BundlerError => e
19
19
  end
20
20
 
21
21
  desc "DO NOT USE! use dlss_release"
22
- task :release
22
+ task :release do
23
+ end
23
24
 
24
25
  task :default => :ci
25
26
 
@@ -24,7 +24,7 @@ Gem::Specification.new do |gem|
24
24
  gem.add_dependency 'stanford-mods'
25
25
 
26
26
  # Runtime dependencies
27
- # gem.add_runtime_dependency 'nokogiri'
27
+ gem.add_runtime_dependency 'confstruct'
28
28
 
29
29
  # Development dependencies
30
30
  # Bundler will install these gems too if you've checked out solrmarc-wrapper source from git and run 'bundle install'
@@ -15,7 +15,15 @@ module Harvestdor
15
15
  # Base class to harvest from DOR via harvestdor gem and then index
16
16
  class Indexer
17
17
 
18
+ attr_accessor :error_count, :success_count, :max_retries
19
+ attr_accessor :total_time_to_parse,:total_time_to_solr
20
+
18
21
  def initialize yml_path, options = {}
22
+ @success_count=0 # the number of objects successfully indexed
23
+ @error_count=0 # the number of objects that failed
24
+ @max_retries=5 # the number of times to retry an object
25
+ @total_time_to_solr=0
26
+ @total_time_to_parse=0
19
27
  @yml_path = yml_path
20
28
  config.configure(YAML.load_file(yml_path)) if yml_path
21
29
  config.configure options
@@ -35,19 +43,62 @@ module Harvestdor
35
43
  # create a Solr profiling document for each druid
36
44
  # write the result to the Solr index
37
45
  def harvest_and_index
46
+ start_time=Time.now
47
+ logger.info("Started harvest_and_index at #{start_time}")
38
48
  if whitelist.empty?
39
49
  druids.each { |druid| index druid }
40
50
  else
41
51
  whitelist.each { |druid| index druid }
42
52
  end
43
53
  solr_client.commit
44
- logger.info("Finished processing: final Solr commit returned.")
54
+ total_time=elapsed_time(start_time)
55
+ total_objects=@success_count+@error_count
56
+ logger.info("Finished harvest_and_index at #{Time.now}: final Solr commit returned")
57
+ logger.info("Total elapsed time for harvest and index: #{(total_time/60.0)} minutes")
58
+ logger.info("Avg solr commit time per object (successful): #{@total_time_to_solr/@success_count} seconds") unless (@total_time_to_solr == 0 || @success_count == 0)
59
+ logger.info("Avg solr commit time per object (all): #{@total_time_to_solr/total_objects} seconds") unless (@total_time_to_solr == 0 || @error_count == 0 || total_objects == 0)
60
+ logger.info("Avg parse time per object (successful): #{@total_time_to_parse/@success_count} seconds") unless (@total_time_to_parse == 0 || @success_count == 0)
61
+ logger.info("Avg parse time per object (all): #{@total_time_to_parse/total_objects} seconds") unless (@total_time_to_parse == 0 || @error_count == 0 || total_objects == 0)
62
+ logger.info("Avg complete index time per object (successful): #{total_time/@success_count} seconds") unless (@success_count == 0)
63
+ logger.info("Avg complete index time per object (all): #{total_time/total_objects} seconds") unless (@error_count == 0 || total_object == 0)
64
+ logger.info("Successful count: #{@success_count}")
65
+ logger.info("Error count: #{@error_count}")
66
+ logger.info("Total records processed: #{total_objects}")
45
67
  end
46
68
 
47
69
  # return Array of druids contained in the OAI harvest indicated by OAI params in yml configuration file
48
70
  # @return [Array<String>] or enumeration over it, if block is given. (strings are druids, e.g. ab123cd1234)
49
71
  def druids
50
- @druids ||= harvestdor_client.druids_via_oai
72
+ if @druids.nil?
73
+ start_time=Time.now
74
+ logger.info("Starting OAI harvest of druids at #{start_time}.")
75
+ @druids = harvestdor_client.druids_via_oai
76
+ logger.info("Completed OAI harves of druids at #{Time.now}. Found #{@druids.size} druids. Total elapsed time for OAI harvest = #{elapsed_time(start_time,:minutes)} minutes")
77
+ end
78
+ return @druids
79
+ end
80
+
81
+ #add the document to solr, retry if an error occurs
82
+ def solr_add(doc, id, do_retry=true)
83
+ #if do_retry is false, skip retrying
84
+ tries=do_retry ? 0 : 999
85
+ while tries < @max_retries
86
+ begin
87
+ tries+=1
88
+ solr_client.add(doc)
89
+ #return if successful
90
+ return
91
+ rescue => e
92
+ if tries<@max_retries
93
+ logger.warn "#{id}: #{e.message}, retrying"
94
+ else
95
+ @error_count+=1
96
+ logger.error "Failed saving #{id}: #{e.message}"
97
+ logger.error e.backtrace
98
+ return
99
+ end
100
+ end
101
+ end
51
102
  end
52
103
 
53
104
  # create Solr doc for the druid and add it to Solr, unless it is on the blacklist.
@@ -59,6 +110,8 @@ module Harvestdor
59
110
  logger.fatal("You must override the index method to transform druids into Solr docs and add them to Solr")
60
111
 
61
112
  begin
113
+ start_time=Time.now
114
+ logger.info("About to index #{druid} at #{start_time}")
62
115
  #logger.debug "About to index #{druid}"
63
116
  doc_hash = {}
64
117
  doc_hash[:id] = druid
@@ -69,10 +122,12 @@ module Harvestdor
69
122
 
70
123
  solr_client.add(doc_hash)
71
124
 
72
- # logger.debug("Just created Solr doc for #{druid}")
125
+ logger.info("Indexed #{druid} in #{elapsed_time(start_time)} seconds")
126
+ @success_count+=1
73
127
  # TODO: provide call to code to update DOR object's workflow datastream??
74
128
  rescue => e
75
- logger.error "Failed to index #{druid}: #{e.message}"
129
+ @error_count+=1
130
+ logger.error "Failed to index #{druid} in #{elapsed_time(start_time)} seconds: #{e.message}"
76
131
  end
77
132
  end
78
133
  end
@@ -81,7 +136,9 @@ module Harvestdor
81
136
  # @param [String] druid e.g. ab123cd4567
82
137
  # @return [Stanford::Mods::Record] created from the MODS xml for the druid
83
138
  def smods_rec druid
139
+ start_time=Time.now
84
140
  ng_doc = harvestdor_client.mods druid
141
+ logger.info("Fetched MODs for #{druid} in #{elapsed_time(start_time)} seconds")
85
142
  raise "Empty MODS metadata for #{druid}: #{ng_doc.to_xml}" if ng_doc.root.xpath('//text()').empty?
86
143
  mods_rec = Stanford::Mods::Record.new
87
144
  mods_rec.from_nk_node(ng_doc.root)
@@ -92,45 +149,59 @@ module Harvestdor
92
149
  # @param [String] druid e.g. ab123cd4567
93
150
  # @return [Nokogiri::XML::Document] the public xml for the DOR object
94
151
  def public_xml druid
152
+ start_time=Time.now
95
153
  ng_doc = harvestdor_client.public_xml druid
154
+ logger.info("Fetched public_xml for #{druid} in #{elapsed_time(start_time)} seconds")
96
155
  raise "No public xml for #{druid}" if !ng_doc
97
156
  raise "Empty public xml for #{druid}: #{ng_doc.to_xml}" if ng_doc.root.xpath('//text()').empty?
98
157
  ng_doc
99
158
  end
100
-
101
- # the contentMetadata for this DOR object, from the purl public xml
102
- # @param [String] druid e.g. ab123cd4567
159
+
160
+ # the contentMetadata for this DOR object, ultimately from the purl public xml
161
+ # @param [Object] object a String containing a druid (e.g. ab123cd4567), or
162
+ # a Nokogiri::XML::Document containing the public_xml for an object
103
163
  # @return [Nokogiri::XML::Document] the contentMetadata for the DOR object
104
- def content_metadata druid
105
- ng_doc = harvestdor_client.content_metadata druid
106
- raise "No contentMetadata for #{druid}" if !ng_doc || !ng_doc.root
164
+ def content_metadata object
165
+ start_time=Time.now
166
+ ng_doc = harvestdor_client.content_metadata object
167
+ logger.info("Fetched content_metadata in #{elapsed_time(start_time)} seconds")
168
+ raise "No contentMetadata for #{object.inspect}" if !ng_doc || ng_doc.children.empty?
107
169
  ng_doc
108
170
  end
109
-
110
- # the identityMetadata for this DOR object, from the purl public xml
111
- # @param [String] druid e.g. ab123cd4567
171
+
172
+ # the identityMetadata for this DOR object, ultimately from the purl public xml
173
+ # @param [Object] object a String containing a druid (e.g. ab123cd4567), or
174
+ # a Nokogiri::XML::Document containing the public_xml for an object
112
175
  # @return [Nokogiri::XML::Document] the identityMetadata for the DOR object
113
- def identity_metadata druid
114
- ng_doc = harvestdor_client.identity_metadata druid
115
- raise "No identityMetadata for #{druid}" if !ng_doc || !ng_doc.root
176
+ def identity_metadata object
177
+ start_time=Time.now
178
+ ng_doc = harvestdor_client.identity_metadata object
179
+ logger.info("Fetched identity_metadata in #{elapsed_time(start_time)} seconds")
180
+ raise "No identityMetadata for #{object.inspect}" if !ng_doc || ng_doc.children.empty?
116
181
  ng_doc
117
182
  end
118
183
 
119
- # the rightsMetadata for this DOR object, from the purl public xml
120
- # @param [String] druid e.g. ab123cd4567
184
+ # the rightsMetadata for this DOR object, ultimately from the purl public xml
185
+ # @param [Object] object a String containing a druid (e.g. ab123cd4567), or
186
+ # a Nokogiri::XML::Document containing the public_xml for an object
121
187
  # @return [Nokogiri::XML::Document] the rightsMetadata for the DOR object
122
- def rights_metadata druid
123
- ng_doc = harvestdor_client.rights_metadata druid
124
- raise "No rightsMetadata for #{druid}" if !ng_doc || !ng_doc.root
188
+ def rights_metadata object
189
+ start_time=Time.now
190
+ ng_doc = harvestdor_client.rights_metadata object
191
+ logger.info("Fetched rights_metadata in #{elapsed_time(start_time)} seconds")
192
+ raise "No rightsMetadata for #{object.inspect}" if !ng_doc || ng_doc.children.empty?
125
193
  ng_doc
126
194
  end
127
195
 
128
- # the RDF for this DOR object, from the purl public xml
129
- # @param [String] druid e.g. ab123cd4567
196
+ # the RDF for this DOR object, ultimately from the purl public xml
197
+ # @param [Object] object a String containing a druid (e.g. ab123cd4567), or
198
+ # a Nokogiri::XML::Document containing the public_xml for an object
130
199
  # @return [Nokogiri::XML::Document] the RDF for the DOR object
131
- def rdf druid
132
- ng_doc = harvestdor_client.rdf druid
133
- raise "No RDF for #{druid}" if !ng_doc || !ng_doc.root
200
+ def rdf object
201
+ start_time=Time.now
202
+ ng_doc = harvestdor_client.rdf object
203
+ logger.info("Fetched rdf in #{elapsed_time(start_time)} seconds")
204
+ raise "No RDF for #{object.inspect}" if !ng_doc || ng_doc.children.empty?
134
205
  ng_doc
135
206
  end
136
207
 
@@ -162,6 +233,20 @@ module Harvestdor
162
233
  @harvestdor_client ||= Harvestdor::Client.new({:config_yml_path => @yml_path})
163
234
  end
164
235
 
236
+ def elapsed_time(start_time,units=:seconds)
237
+ elapsed_seconds=Time.now-start_time
238
+ case units
239
+ when :seconds
240
+ return elapsed_seconds.round(2)
241
+ when :minutes
242
+ return (elapsed_seconds/60.0).round(1)
243
+ when :hours
244
+ return (elapsed_seconds/3600.0).round(2)
245
+ else
246
+ return elapsed_seconds
247
+ end
248
+ end
249
+
165
250
  # populate @blacklist as an Array of druids ('oo000oo0000') that will NOT be processed
166
251
  # by reading the File at the indicated path
167
252
  # @param [String] path - path of file containing a list of druids
@@ -200,7 +285,7 @@ module Harvestdor
200
285
  logger.fatal msg
201
286
  raise msg
202
287
  end
203
-
288
+
204
289
  # Global, memoized, lazy initialized instance of a logger
205
290
  # @param [String] log_dir directory for to get log file
206
291
  # @param [String] log_name name of log file
@@ -1,6 +1,6 @@
1
1
  module Harvestdor
2
2
  class Indexer
3
3
  # this is the Ruby Gem version
4
- VERSION = "0.0.3"
4
+ VERSION = "0.0.10"
5
5
  end
6
6
  end
@@ -13,13 +13,29 @@ describe Harvestdor::Indexer do
13
13
  @whitelist_path = File.join(File.dirname(__FILE__), "../config/ap_whitelist.txt")
14
14
  end
15
15
 
16
+ describe "access methods" do
17
+ it "initializes success count" do
18
+ @indexer.success_count.should == 0
19
+ end
20
+ it "initializes error count" do
21
+ @indexer.error_count.should == 0
22
+ end
23
+ it "initializes max_retries" do
24
+ @indexer.max_retries.should == 5
25
+ end
26
+ it "allows overriding of max_retries" do
27
+ @indexer.max_retries=6
28
+ @indexer.max_retries.should == 6
29
+ end
30
+ end
31
+
16
32
  describe "logging" do
17
33
  it "should write the log file to the directory indicated by log_dir" do
18
34
  @indexer.logger.info("indexer_spec logging test message")
19
35
  File.exists?(File.join(@yaml['log_dir'], @yaml['log_name'])).should == true
20
36
  end
21
37
  end
22
-
38
+
23
39
  it "should initialize the harvestdor_client from the config" do
24
40
  @hdor_client.should be_an_instance_of(Harvestdor::Client)
25
41
  @hdor_client.config.default_set.should == @yaml['default_set']
@@ -32,7 +48,7 @@ describe Harvestdor::Indexer do
32
48
  }
33
49
  end
34
50
  it "should call druids_via_oai and then call :add on rsolr connection" do
35
- @hdor_client.should_receive(:druids_via_oai).and_return([@fake_druid])
51
+ @indexer.should_receive(:druids).and_return([@fake_druid])
36
52
  @indexer.solr_client.should_receive(:add).with(@doc_hash)
37
53
  @indexer.solr_client.should_receive(:commit)
38
54
  @indexer.harvest_and_index
@@ -76,7 +92,7 @@ describe Harvestdor::Indexer do
76
92
  end
77
93
 
78
94
  it "druids method should call druids_via_oai method on harvestdor_client" do
79
- @hdor_client.should_receive(:druids_via_oai)
95
+ @hdor_client.should_receive(:druids_via_oai).and_return([@fake_druid])
80
96
  @indexer.druids
81
97
  end
82
98
 
@@ -129,16 +145,13 @@ describe Harvestdor::Indexer do
129
145
  @hdor_client.should_receive(:public_xml).with(@fake_druid).and_return(Nokogiri::XML("<publicObject/>"))
130
146
  expect { @indexer.public_xml(@fake_druid) }.to raise_error(RuntimeError, Regexp.new("^Empty public xml for #{@fake_druid}: <"))
131
147
  end
132
- it "raises Harvestdor::Errors::MissingPurlPage if there is no purl page for the druid" do
133
- expect { @indexer.public_xml(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingPurlPage)
134
- end
135
148
  it "raises error if there is no public_xml page for the druid" do
136
149
  @hdor_client.should_receive(:public_xml).with(@fake_druid).and_return(nil)
137
150
  expect { @indexer.public_xml(@fake_druid) }.to raise_error(RuntimeError, "No public xml for #{@fake_druid}")
138
151
  end
139
152
  end
140
153
  context "#content_metadata" do
141
- it "returns a Nokogiri::XML::Document derived from the public xml" do
154
+ it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
142
155
  Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(@ng_pub_xml)
143
156
  cm = @indexer.content_metadata(@fake_druid)
144
157
  cm.should be_kind_of(Nokogiri::XML::Document)
@@ -147,25 +160,23 @@ describe Harvestdor::Indexer do
147
160
  cm.root.attributes['objectId'].text.should == @fake_druid
148
161
  cm.root.text.strip.should == 'foo'
149
162
  end
150
- it "raises Harvestdor::Errors::MissingPurlPage if there is no purl page for the druid" do
151
- expect { @indexer.content_metadata(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingPurlPage)
152
- end
153
- it "should raise exception if there is no contentMetadata in the public xml" do
154
- pub_xml = "<publicObject id='druid:#{@fake_druid}'>#{@id_md_xml}</publicObject>"
155
- Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(Nokogiri::XML(pub_xml))
156
- expect { @indexer.content_metadata(@fake_druid) }.to raise_error(RuntimeError, "No contentMetadata for #{@fake_druid}")
163
+ it "if passed a Nokogiri::XML::Document of the public xml, it does no fetch" do
164
+ URI::HTTP.any_instance.should_not_receive(:open)
165
+ @hdor_client.should_receive(:content_metadata).and_call_original
166
+ cm = @indexer.content_metadata(@ng_pub_xml)
167
+ cm.should be_kind_of(Nokogiri::XML::Document)
168
+ cm.root.should_not == nil
169
+ cm.root.name.should == 'contentMetadata'
170
+ cm.root.attributes['objectId'].text.should == @fake_druid
171
+ cm.root.text.strip.should == 'foo'
157
172
  end
158
173
  it "raises RuntimeError if nil is returned by Harvestdor::Client.contentMetadata for the druid" do
159
174
  @hdor_client.should_receive(:content_metadata).with(@fake_druid).and_return(nil)
160
- expect { @indexer.content_metadata(@fake_druid) }.to raise_error(RuntimeError, "No contentMetadata for #{@fake_druid}")
161
- end
162
- it "raises MissingContentMetadata error if there is no contentMetadata in the public_xml for the druid" do
163
- URI::HTTP.any_instance.should_receive(:open)
164
- expect { @indexer.content_metadata(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingContentMetadata)
175
+ expect { @indexer.content_metadata(@fake_druid) }.to raise_error(RuntimeError, "No contentMetadata for \"#{@fake_druid}\"")
165
176
  end
166
177
  end
167
178
  context "#identity_metadata" do
168
- it "returns a Nokogiri::XML::Document derived from the public xml" do
179
+ it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
169
180
  Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(@ng_pub_xml)
170
181
  im = @indexer.identity_metadata(@fake_druid)
171
182
  im.should be_kind_of(Nokogiri::XML::Document)
@@ -173,25 +184,22 @@ describe Harvestdor::Indexer do
173
184
  im.root.name.should == 'identityMetadata'
174
185
  im.root.text.strip.should == "druid:#{@fake_druid}"
175
186
  end
176
- it "raises Harvestdor::Errors::MissingPurlPage if there is no purl page for the druid" do
177
- expect { @indexer.identity_metadata(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingPurlPage)
178
- end
179
- it "should raise exception if there is no identityMetadata in the public xml" do
180
- pub_xml = "<publicObject id='druid:#{@fake_druid}'>#{@cntnt_md_xml}</publicObject>"
181
- Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(Nokogiri::XML(pub_xml))
182
- expect { @indexer.identity_metadata(@fake_druid) }.to raise_error(RuntimeError, "No identityMetadata for #{@fake_druid}")
187
+ it "if passed a Nokogiri::XML::Document of the public xml, it does no fetch" do
188
+ URI::HTTP.any_instance.should_not_receive(:open)
189
+ @hdor_client.should_receive(:identity_metadata).and_call_original
190
+ im = @indexer.identity_metadata(@ng_pub_xml)
191
+ im.should be_kind_of(Nokogiri::XML::Document)
192
+ im.root.should_not == nil
193
+ im.root.name.should == 'identityMetadata'
194
+ im.root.text.strip.should == "druid:#{@fake_druid}"
183
195
  end
184
196
  it "raises RuntimeError if nil is returned by Harvestdor::Client.identityMetadata for the druid" do
185
197
  @hdor_client.should_receive(:identity_metadata).with(@fake_druid).and_return(nil)
186
- expect { @indexer.identity_metadata(@fake_druid) }.to raise_error(RuntimeError, "No identityMetadata for #{@fake_druid}")
187
- end
188
- it "raises MissingIdentityMetadata error if there is no identityMetadata in the public_xml for the druid" do
189
- URI::HTTP.any_instance.should_receive(:open)
190
- expect { @indexer.identity_metadata(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingIdentityMetadata)
198
+ expect { @indexer.identity_metadata(@fake_druid) }.to raise_error(RuntimeError, "No identityMetadata for \"#{@fake_druid}\"")
191
199
  end
192
200
  end
193
201
  context "#rights_metadata" do
194
- it "returns a Nokogiri::XML::Document derived from the public xml" do
202
+ it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
195
203
  Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(@ng_pub_xml)
196
204
  im = @indexer.rights_metadata(@fake_druid)
197
205
  im.should be_kind_of(Nokogiri::XML::Document)
@@ -199,25 +207,13 @@ describe Harvestdor::Indexer do
199
207
  im.root.name.should == 'rightsMetadata'
200
208
  im.root.text.strip.should == "bar"
201
209
  end
202
- it "raises Harvestdor::Errors::MissingPurlPage if there is no purl page for the druid" do
203
- expect { @indexer.rights_metadata(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingPurlPage)
204
- end
205
- it "should raise exception if there is no rightsMetadata in the public xml" do
206
- pub_xml = "<publicObject id='druid:#{@fake_druid}'>#{@cntnt_md_xml}</publicObject>"
207
- Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(Nokogiri::XML(pub_xml))
208
- expect { @indexer.rights_metadata(@fake_druid) }.to raise_error(RuntimeError, "No rightsMetadata for #{@fake_druid}")
209
- end
210
210
  it "raises RuntimeError if nil is returned by Harvestdor::Client.rightsMetadata for the druid" do
211
211
  @hdor_client.should_receive(:rights_metadata).with(@fake_druid).and_return(nil)
212
- expect { @indexer.rights_metadata(@fake_druid) }.to raise_error(RuntimeError, "No rightsMetadata for #{@fake_druid}")
213
- end
214
- it "raises MissingRightsMetadata error if there is no rightsMetadata in the public_xml for the druid" do
215
- URI::HTTP.any_instance.should_receive(:open)
216
- expect { @indexer.rights_metadata(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingRightsMetadata)
212
+ expect { @indexer.rights_metadata(@fake_druid) }.to raise_error(RuntimeError, "No rightsMetadata for \"#{@fake_druid}\"")
217
213
  end
218
214
  end
219
215
  context "#rdf" do
220
- it "returns a Nokogiri::XML::Document derived from the public xml" do
216
+ it "returns a Nokogiri::XML::Document derived from the public xml if a druid is passed" do
221
217
  Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(@ng_pub_xml)
222
218
  im = @indexer.rdf(@fake_druid)
223
219
  im.should be_kind_of(Nokogiri::XML::Document)
@@ -225,23 +221,11 @@ describe Harvestdor::Indexer do
225
221
  im.root.name.should == 'RDF'
226
222
  im.root.text.strip.should == "relationship!"
227
223
  end
228
- it "raises Harvestdor::Errors::MissingPurlPage if there is no purl page for the druid" do
229
- expect { @indexer.rdf(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingPurlPage)
230
- end
231
- it "should raise exception if there is no rdf in the public xml" do
232
- pub_xml = "<publicObject id='druid:#{@fake_druid}'>#{@cntnt_md_xml}</publicObject>"
233
- Harvestdor.stub(:public_xml).with(@fake_druid, @indexer.config.purl).and_return(Nokogiri::XML(pub_xml))
234
- expect { @indexer.rdf(@fake_druid) }.to raise_error(RuntimeError, "No RDF for #{@fake_druid}")
235
- end
236
224
  it "raises RuntimeError if nil is returned by Harvestdor::Client.rdf for the druid" do
237
225
  @hdor_client.should_receive(:rdf).with(@fake_druid).and_return(nil)
238
- expect { @indexer.rdf(@fake_druid) }.to raise_error(RuntimeError, "No RDF for #{@fake_druid}")
239
- end
240
- it "raises MissingRDF error if there is no rdf in the public_xml for the druid" do
241
- URI::HTTP.any_instance.should_receive(:open)
242
- expect { @indexer.rdf(@fake_druid) }.to raise_error(Harvestdor::Errors::MissingRDF)
226
+ expect { @indexer.rdf(@fake_druid) }.to raise_error(RuntimeError, "No RDF for \"#{@fake_druid}\"")
243
227
  end
244
- end
228
+ end
245
229
  end
246
230
 
247
231
  context "blacklist" do
metadata CHANGED
@@ -1,20 +1,18 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: harvestdor-indexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
5
- prerelease:
4
+ version: 0.0.10
6
5
  platform: ruby
7
6
  authors:
8
7
  - Naomi Dushay
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-03-08 00:00:00.000000000 Z
11
+ date: 2013-10-18 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: rsolr
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
17
  - - ! '>='
20
18
  - !ruby/object:Gem::Version
@@ -22,7 +20,6 @@ dependencies:
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
24
  - - ! '>='
28
25
  - !ruby/object:Gem::Version
@@ -30,7 +27,6 @@ dependencies:
30
27
  - !ruby/object:Gem::Dependency
31
28
  name: harvestdor
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
31
  - - ! '>='
36
32
  - !ruby/object:Gem::Version
@@ -38,7 +34,6 @@ dependencies:
38
34
  type: :runtime
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
38
  - - ! '>='
44
39
  - !ruby/object:Gem::Version
@@ -46,7 +41,6 @@ dependencies:
46
41
  - !ruby/object:Gem::Dependency
47
42
  name: stanford-mods
48
43
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
44
  requirements:
51
45
  - - ! '>='
52
46
  - !ruby/object:Gem::Version
@@ -54,7 +48,20 @@ dependencies:
54
48
  type: :runtime
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: confstruct
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
58
65
  requirements:
59
66
  - - ! '>='
60
67
  - !ruby/object:Gem::Version
@@ -62,7 +69,6 @@ dependencies:
62
69
  - !ruby/object:Gem::Dependency
63
70
  name: lyberteam-gems-devel
64
71
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
72
  requirements:
67
73
  - - ! '>='
68
74
  - !ruby/object:Gem::Version
@@ -70,7 +76,6 @@ dependencies:
70
76
  type: :development
71
77
  prerelease: false
72
78
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
79
  requirements:
75
80
  - - ! '>='
76
81
  - !ruby/object:Gem::Version
@@ -78,7 +83,6 @@ dependencies:
78
83
  - !ruby/object:Gem::Dependency
79
84
  name: rake
80
85
  requirement: !ruby/object:Gem::Requirement
81
- none: false
82
86
  requirements:
83
87
  - - ! '>='
84
88
  - !ruby/object:Gem::Version
@@ -86,7 +90,6 @@ dependencies:
86
90
  type: :development
87
91
  prerelease: false
88
92
  version_requirements: !ruby/object:Gem::Requirement
89
- none: false
90
93
  requirements:
91
94
  - - ! '>='
92
95
  - !ruby/object:Gem::Version
@@ -94,7 +97,6 @@ dependencies:
94
97
  - !ruby/object:Gem::Dependency
95
98
  name: rdoc
96
99
  requirement: !ruby/object:Gem::Requirement
97
- none: false
98
100
  requirements:
99
101
  - - ! '>='
100
102
  - !ruby/object:Gem::Version
@@ -102,7 +104,6 @@ dependencies:
102
104
  type: :development
103
105
  prerelease: false
104
106
  version_requirements: !ruby/object:Gem::Requirement
105
- none: false
106
107
  requirements:
107
108
  - - ! '>='
108
109
  - !ruby/object:Gem::Version
@@ -110,7 +111,6 @@ dependencies:
110
111
  - !ruby/object:Gem::Dependency
111
112
  name: yard
112
113
  requirement: !ruby/object:Gem::Requirement
113
- none: false
114
114
  requirements:
115
115
  - - ! '>='
116
116
  - !ruby/object:Gem::Version
@@ -118,7 +118,6 @@ dependencies:
118
118
  type: :development
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
- none: false
122
121
  requirements:
123
122
  - - ! '>='
124
123
  - !ruby/object:Gem::Version
@@ -126,7 +125,6 @@ dependencies:
126
125
  - !ruby/object:Gem::Dependency
127
126
  name: rspec
128
127
  requirement: !ruby/object:Gem::Requirement
129
- none: false
130
128
  requirements:
131
129
  - - ! '>='
132
130
  - !ruby/object:Gem::Version
@@ -134,7 +132,6 @@ dependencies:
134
132
  type: :development
135
133
  prerelease: false
136
134
  version_requirements: !ruby/object:Gem::Requirement
137
- none: false
138
135
  requirements:
139
136
  - - ! '>='
140
137
  - !ruby/object:Gem::Version
@@ -142,7 +139,6 @@ dependencies:
142
139
  - !ruby/object:Gem::Dependency
143
140
  name: simplecov
144
141
  requirement: !ruby/object:Gem::Requirement
145
- none: false
146
142
  requirements:
147
143
  - - ! '>='
148
144
  - !ruby/object:Gem::Version
@@ -150,7 +146,6 @@ dependencies:
150
146
  type: :development
151
147
  prerelease: false
152
148
  version_requirements: !ruby/object:Gem::Requirement
153
- none: false
154
149
  requirements:
155
150
  - - ! '>='
156
151
  - !ruby/object:Gem::Version
@@ -158,7 +153,6 @@ dependencies:
158
153
  - !ruby/object:Gem::Dependency
159
154
  name: simplecov-rcov
160
155
  requirement: !ruby/object:Gem::Requirement
161
- none: false
162
156
  requirements:
163
157
  - - ! '>='
164
158
  - !ruby/object:Gem::Version
@@ -166,7 +160,6 @@ dependencies:
166
160
  type: :development
167
161
  prerelease: false
168
162
  version_requirements: !ruby/object:Gem::Requirement
169
- none: false
170
163
  requirements:
171
164
  - - ! '>='
172
165
  - !ruby/object:Gem::Version
@@ -196,33 +189,26 @@ files:
196
189
  - spec/unit/harvestdor-indexer_spec.rb
197
190
  homepage: https://consul.stanford.edu/display/chimera/Chimera+project
198
191
  licenses: []
192
+ metadata: {}
199
193
  post_install_message:
200
194
  rdoc_options: []
201
195
  require_paths:
202
196
  - lib
203
197
  required_ruby_version: !ruby/object:Gem::Requirement
204
- none: false
205
198
  requirements:
206
199
  - - ! '>='
207
200
  - !ruby/object:Gem::Version
208
201
  version: '0'
209
- segments:
210
- - 0
211
- hash: -2920299245033359379
212
202
  required_rubygems_version: !ruby/object:Gem::Requirement
213
- none: false
214
203
  requirements:
215
204
  - - ! '>='
216
205
  - !ruby/object:Gem::Version
217
206
  version: '0'
218
- segments:
219
- - 0
220
- hash: -2920299245033359379
221
207
  requirements: []
222
208
  rubyforge_project:
223
- rubygems_version: 1.8.24
209
+ rubygems_version: 2.0.7
224
210
  signing_key:
225
- specification_version: 3
211
+ specification_version: 4
226
212
  summary: Harvest DOR object metadata and index it to Solr
227
213
  test_files:
228
214
  - spec/config/ap.yml