oai 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +19 -4
  3. data/Rakefile +7 -0
  4. data/bin/oai +0 -2
  5. data/examples/models/file_model.rb +2 -2
  6. data/lib/oai/client/response.rb +8 -8
  7. data/lib/oai/client.rb +34 -10
  8. data/lib/oai/exception.rb +46 -38
  9. data/lib/oai/harvester/config.rb +1 -1
  10. data/lib/oai/harvester/harvest.rb +37 -25
  11. data/lib/oai/harvester/logging.rb +3 -5
  12. data/lib/oai/harvester.rb +4 -1
  13. data/lib/oai/provider/model/activerecord_caching_wrapper.rb +5 -8
  14. data/lib/oai/provider/model/activerecord_wrapper.rb +41 -25
  15. data/lib/oai/provider/model.rb +1 -1
  16. data/lib/oai/provider/response/list_records.rb +12 -0
  17. data/lib/oai/provider/response.rb +7 -4
  18. data/lib/oai/provider/resumption_token.rb +70 -21
  19. data/lib/oai/provider.rb +129 -7
  20. data/test/activerecord_provider/database/0001_oaipmh_tables.rb +7 -1
  21. data/test/activerecord_provider/helpers/providers.rb +10 -1
  22. data/test/activerecord_provider/helpers/transactional_test_case.rb +2 -1
  23. data/test/activerecord_provider/models/dc_field.rb +8 -0
  24. data/test/activerecord_provider/models/dc_lang.rb +3 -0
  25. data/test/activerecord_provider/models/exclusive_set_dc_field.rb +6 -0
  26. data/test/activerecord_provider/tc_activerecord_wrapper.rb +63 -0
  27. data/test/activerecord_provider/tc_ar_provider.rb +54 -26
  28. data/test/activerecord_provider/tc_ar_sets_provider.rb +10 -9
  29. data/test/activerecord_provider/tc_caching_paging_provider.rb +9 -7
  30. data/test/activerecord_provider/tc_simple_paging_provider.rb +28 -7
  31. data/test/client/tc_exception.rb +1 -1
  32. data/test/client/tc_get_record.rb +1 -1
  33. data/test/client/tc_http_client.rb +2 -2
  34. data/test/client/tc_libxml.rb +1 -1
  35. data/test/client/tc_utf8_escaping.rb +8 -1
  36. data/test/harvester/tc_harvest.rb +42 -0
  37. data/test/harvester/test_helper_harvester.rb +6 -0
  38. data/test/provider/models.rb +3 -3
  39. data/test/provider/tc_functional_tokens.rb +17 -11
  40. data/test/provider/tc_instance_provider.rb +41 -0
  41. data/test/provider/tc_provider.rb +26 -0
  42. data/test/provider/tc_resumption_tokens.rb +6 -0
  43. data/test/provider/test_helper_provider.rb +17 -0
  44. metadata +28 -17
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 74c8bf29b2463c130d3ec55f04a433fb828041fba20d9df6045bfec0ac03dbf3
4
- data.tar.gz: 8cc85a82a2d1006499baa2a80a238eb50c04b7e9c33e64b0aa5e19daa7afabdd
3
+ metadata.gz: c0a80559d64aa25add07953e978e93f9abfbc89e03c67bf306e79f0d7b18b0f7
4
+ data.tar.gz: 7989ff1c6dec95c3965cb3adc1d0bc156d250b6129002be50193443994d03caf
5
5
  SHA512:
6
- metadata.gz: fd82dfd51917453cea94d63549975bf60cb928c7c619262cb017a79b5dccfe6ae5d02e6a938aeaf4e3577417aa3644319d5e8c59ed9cdd3a992c4db49c886e1d
7
- data.tar.gz: 4cfb1c2ca5ec4b81fbe9e21462714801e4aa2c36ce58ecbb01facafd0cbcd7e354ba66d0df53f1789bc19dc9d8eb795cc220605381c9d2d6c24d1a611f6a3505
6
+ metadata.gz: db8b7e74c65625da8b47b35ef23a7e620777dfefb0a6a8d63a6a7314534dbfe46606bf863aef4a839087547280700519f35e3b10bdf10bf70ba93959ae3125e2
7
+ data.tar.gz: ad14343ae9a61b6516f7e079db4afc0b7eddbb5e70d8d962221842a5fe4f8b49da63d37dc8eeb7cfe9e163617d4d2a3cae93e37e8943b784df7d0dc1eb1a7cce
data/README.md CHANGED
@@ -1,9 +1,8 @@
1
1
  ruby-oai
2
2
  ========
3
+ [![Build Status](https://github.com/code4lib/ruby-oai/workflows/CI/badge.svg)](https://github.com/code4lib/ruby-oai/actions)
3
4
 
4
- [![Build Status](https://travis-ci.org/code4lib/ruby-oai.svg?branch=master)](https://travis-ci.org/code4lib/ruby-oai)
5
-
6
- [![Gem Version](https://badge.fury.io/rb/kithe.svg)](https://badge.fury.io/rb/oai)
5
+ [![Gem Version](https://badge.fury.io/rb/oai.svg)](https://badge.fury.io/rb/oai)
7
6
 
8
7
  ruby-oai is a Open Archives Protocol for Metadata Harvesting (OAI-PMH)
9
8
  library for Ruby. [OAI-PMH](http://openarchives.org) is a somewhat
@@ -47,6 +46,22 @@ For example to initiate a ListRecords request to pubmed you can:
47
46
  end
48
47
  ```
49
48
 
49
+ ### Retry-After
50
+ This library depends on faraday, but allows a wide range of versions. Depending on the client application's installed version of faraday, there may be different middleware libraries required to support automatically retrying requests that are rate limited/denied with a `Retry-After` header. The OAI client can, however, accept an externally configured faraday http client for handling this. For example, to retry on `429 Too Many Requests`:
51
+
52
+ ```ruby
53
+ require 'oai'
54
+ require 'faraday_middleware' # if using faraday version < 2
55
+ http_client = Faraday.new do |conn|
56
+ conn.request(:retry, max: 5, retry_statuses: 429)
57
+ conn.response(:follow_redirects, limit: 5)
58
+ conn.adapter :net_http
59
+ end
60
+ client = OAI::Client.new(base_url, http: http_client)
61
+ opts = {from:'2012-03-01', until:'2012-04-01', metadata_prefix:'oai_dc'}
62
+ puts client.list_records(opts).full.count
63
+ ```
64
+
50
65
  See {OAI::Client} for more details
51
66
 
52
67
  Server
@@ -97,7 +112,7 @@ There are also convenience tasks to run subsets of tests.
97
112
  We use [appraisal](https://github.com/thoughtbot/appraisal) to test ActiveRecord-related functionality under multiple versions of ActiveRecord. While the above commands will test with latest ActiveRecord (allowed in our .gemspec development dependency), you can test under a particular version defined in the [Appraisals](./Appraisals) file like so:
98
113
 
99
114
  $ bundle exec appraisal rails-52 rake test
100
- $ bundle exec appraisal rails-60 rake test
115
+ $ bundle exec appraisal rails-70 rake test
101
116
 
102
117
  If you run into trouble with appraisal's gemfiles getting out of date and bundler complaining,
103
118
  try:
data/Rakefile CHANGED
@@ -34,6 +34,13 @@ namespace :test do
34
34
  t.warning = false
35
35
  end
36
36
 
37
+ Rake::TestTask.new('harvester') do |t|
38
+ t.libs << ['lib', 'test/harvester']
39
+ t.pattern = 'test/harvester/tc_*.rb'
40
+ #t.verbose = true
41
+ t.warning = false
42
+ end
43
+
37
44
  Rake::TestTask.new('provider') do |t|
38
45
  t.libs << ['lib', 'test/provider']
39
46
  t.pattern = 'test/provider/tc_*.rb'
data/bin/oai CHANGED
@@ -5,8 +5,6 @@
5
5
 
6
6
  require 'optparse'
7
7
 
8
- DIRECTORY_LAYOUT = "%Y/%m".freeze
9
-
10
8
  require 'oai/harvester'
11
9
 
12
10
  include OAI::Harvester
@@ -45,8 +45,8 @@ class FileModel < OAI::Provider::Model
45
45
  case selector
46
46
  when :all
47
47
  records = Dir["#{@directory}/*.xml"].sort.collect do |file|
48
- File.new(file) unless File.stat(file).mtime.utc < opts[:from] or
49
- File.stat(file).mtime.utc > opts[:until]
48
+ File.new(file) unless File.stat(file).mtime.utc < opts[:from].to_time or
49
+ File.stat(file).mtime.utc > opts[:until].to_time
50
50
  end
51
51
  records
52
52
  else
@@ -37,16 +37,16 @@ module OAI
37
37
  message = error.content
38
38
  code = ""
39
39
  if defined?(error.property) == nil
40
- code = error.attributes['code']
41
- else
42
- begin
43
- code = error["code"]
44
- rescue
45
- code = error.property('code')
46
- end
40
+ code = error.attributes['code']
41
+ else
42
+ begin
43
+ code = error["code"]
44
+ rescue
45
+ code = error.property('code')
47
46
  end
47
+ end
48
48
  end
49
- raise OAI::Exception.new(message, code)
49
+ raise OAI::Exception.for(message: message, code: code)
50
50
  end
51
51
 
52
52
  end
data/lib/oai/client.rb CHANGED
@@ -54,7 +54,7 @@ module OAI
54
54
  # <http://www.openarchives.org/OAI/openarchivesprotocol.html>.
55
55
 
56
56
  class Client
57
-
57
+ UNESCAPED_AMPERSAND = /&(?!(?:amp|lt|gt|quot|apos|\#\d+);)/
58
58
  # The constructor which must be passed a valid base url for an oai
59
59
  # service:
60
60
  #
@@ -95,7 +95,8 @@ module OAI
95
95
  follow_redirects = 5 if follow_redirects == true
96
96
 
97
97
  if follow_redirects
98
- require 'faraday_middleware'
98
+ require 'faraday/follow_redirects'
99
+ builder.use Faraday::FollowRedirects::Middleware
99
100
  builder.response :follow_redirects, :limit => follow_redirects.to_i
100
101
  end
101
102
  builder.adapter :net_http
@@ -197,12 +198,9 @@ module OAI
197
198
  do_resumable(OAI::ListSetsResponse, 'ListSets', opts)
198
199
  end
199
200
 
200
- private
201
-
202
- def do_request(verb, opts = nil)
203
- # fire off the request and return appropriate DOM object
204
- uri = build_uri(verb, opts)
205
- xml = strip_invalid_utf_8_chars(get(uri))
201
+ def sanitize_xml(xml)
202
+ xml = strip_invalid_utf_8_chars(xml)
203
+ xml = strip_invalid_xml_chars(xml)
206
204
  if @parser == 'libxml'
207
205
  # remove default namespace for oai-pmh since libxml
208
206
  # isn't able to use our xpaths to get at them
@@ -210,7 +208,15 @@ module OAI
210
208
  xml = xml.gsub(
211
209
  /xmlns=\"http:\/\/www.openarchives.org\/OAI\/.\..\/\"/, '')
212
210
  end
213
- return load_document(xml)
211
+ xml
212
+ end
213
+
214
+ private
215
+
216
+ def do_request(verb, opts = nil)
217
+ # fire off the request and return appropriate DOM object
218
+ uri = build_uri(verb, opts)
219
+ return load_document(get(uri))
214
220
  end
215
221
 
216
222
  def do_resumable(responseClass, verb, opts)
@@ -240,6 +246,7 @@ module OAI
240
246
  end
241
247
 
242
248
  def load_document(xml)
249
+ xml = sanitize_xml(xml)
243
250
  case @parser
244
251
  when 'libxml'
245
252
  begin
@@ -330,7 +337,16 @@ module OAI
330
337
  # Regex is from WebCollab:
331
338
  # http://webcollab.sourceforge.net/unicode.html
332
339
  def strip_invalid_utf_8_chars(xml)
333
- xml && xml.gsub(/[\x00-\x08\x10\x0B\x0C\x0E-\x19\x7F]
340
+ return nil unless xml
341
+
342
+ # If it's in a specific encoding other than BINARY, it may trigger
343
+ # an exception to try to gsub these illegal bytes. Temporarily
344
+ # put it in BINARY. NOTE: We're not totally sure what's going on
345
+ # with encodings in this gem in general, it might not be totally reasonable.
346
+ orig_encoding = xml.encoding
347
+ xml.force_encoding("BINARY")
348
+
349
+ xml = xml.gsub(/[\x00-\x08\x10\x0B\x0C\x0E-\x19\x7F]
334
350
  | [\x00-\x7F][\x80-\xBF]+
335
351
  | ([\xC0\xC1]|[\xF0-\xFF])[\x80-\xBF]*
336
352
  | [\xC2-\xDF]((?![\x80-\xBF])|[\x80-\xBF]{2,})
@@ -338,7 +354,15 @@ module OAI
338
354
  | (?![\x80-\xBF]{2})|[\x80-\xBF]{3,})/x, '?')\
339
355
  .gsub(/\xE0[\x80-\x9F][\x80-\xBF]
340
356
  | \xED[\xA0-\xBF][\x80-\xBF]/,'?')
357
+
358
+ xml.force_encoding(orig_encoding)
359
+
360
+ xml
341
361
  end
342
362
 
363
+ def strip_invalid_xml_chars(xml)
364
+ return xml unless xml =~ UNESCAPED_AMPERSAND
365
+ xml.gsub(UNESCAPED_AMPERSAND, '&amp;')
366
+ end
343
367
  end
344
368
  end
data/lib/oai/exception.rb CHANGED
@@ -4,72 +4,80 @@ module OAI
4
4
  # messages will be wrapped in an XML response to the client.
5
5
 
6
6
  class Exception < RuntimeError
7
+ CODE = nil
8
+ MESSAGE = nil
9
+
7
10
  attr_reader :code
8
11
 
9
- def initialize(message, code = nil)
10
- super(message)
11
- @code = code
12
+ @@codes = {}
13
+
14
+ def self.register_exception_code(code, exception_class)
15
+ @@codes[code] = exception_class if exception_class.superclass == OAI::Exception
16
+ end
17
+
18
+ def self.for(message: nil, code: nil)
19
+ @@codes.fetch(code, Exception).new(message)
20
+ end
21
+
22
+ def initialize(message = nil, code = nil)
23
+ super(message || self.class::MESSAGE)
24
+ @code = code || self.class::CODE
12
25
  end
13
26
  end
14
27
 
15
28
  class ArgumentException < Exception
16
- def initialize()
17
- super('The request includes ' \
29
+ CODE = 'badArgument'
30
+ MESSAGE = 'The request includes ' \
18
31
  'illegal arguments, is missing required arguments, includes a ' \
19
- 'repeated argument, or values for arguments have an illegal syntax.',
20
- 'badArgument')
21
- end
32
+ 'repeated argument, or values for arguments have an illegal syntax.'
33
+ register_exception_code(CODE, self)
22
34
  end
23
35
 
24
36
  class VerbException < Exception
25
- def initialize()
26
- super('Value of the verb argument is not a legal OAI-PMH '\
27
- 'verb, the verb argument is missing, or the verb argument is repeated.',
28
- 'badVerb')
29
- end
37
+ CODE = 'badVerb'
38
+ MESSAGE = 'Value of the verb argument is not a legal OAI-PMH '\
39
+ 'verb, the verb argument is missing, or the verb argument is repeated.'
40
+ register_exception_code(CODE, self)
30
41
  end
31
42
 
32
43
  class FormatException < Exception
33
- def initialize()
34
- super('The metadata format identified by '\
44
+ CODE = 'cannotDisseminateFormat'
45
+ MESSAGE = 'The metadata format identified by '\
35
46
  'the value given for the metadataPrefix argument is not supported '\
36
- 'by the item or by the repository.', 'cannotDisseminateFormat')
37
- end
47
+ 'by the item or by the repository.'
48
+ register_exception_code(CODE, self)
38
49
  end
39
50
 
40
51
  class IdException < Exception
41
- def initialize()
42
- super('The value of the identifier argument is '\
43
- 'unknown or illegal in this repository.', 'idDoesNotExist')
44
- end
52
+ CODE = 'idDoesNotExist'
53
+ MESSAGE = 'The value of the identifier argument is '\
54
+ 'unknown or illegal in this repository.'
55
+ register_exception_code(CODE, self)
45
56
  end
46
57
 
47
58
  class NoMatchException < Exception
48
- def initialize()
49
- super('The combination of the values of the from, '\
50
- 'until, set and metadataPrefix arguments results in an empty list.',
51
- 'noRecordsMatch')
52
- end
59
+ CODE = 'noRecordsMatch'
60
+ MESSAGE = 'The combination of the values of the from, '\
61
+ 'until, set and metadataPrefix arguments results in an empty list.'
62
+ register_exception_code(CODE, self)
53
63
  end
54
64
 
55
65
  class MetadataFormatException < Exception
56
- def initialize()
57
- super('There are no metadata formats available '\
58
- 'for the specified item.', 'noMetadataFormats')
59
- end
66
+ CODE = 'noMetadataFormats'
67
+ MESSAGE = 'There are no metadata formats available '\
68
+ 'for the specified item.'
69
+ register_exception_code(CODE, self)
60
70
  end
61
71
 
62
72
  class SetException < Exception
63
- def initialize()
64
- super('This repository does not support sets.', 'noSetHierarchy')
65
- end
73
+ CODE = 'noSetHierarchy'
74
+ MESSAGE = 'This repository does not support sets.'
75
+ register_exception_code(CODE, self)
66
76
  end
67
77
 
68
78
  class ResumptionTokenException < Exception
69
- def initialize()
70
- super('The value of the resumptionToken argument is invalid or expired.',
71
- 'badResumptionToken')
72
- end
79
+ CODE = 'badResumptionToken'
80
+ MESSAGE = 'The value of the resumptionToken argument is invalid or expired.'
81
+ register_exception_code(CODE, self)
73
82
  end
74
-
75
83
  end
@@ -5,7 +5,7 @@
5
5
  module OAI
6
6
  module Harvester
7
7
 
8
- LOW_RESOLUTION = "YYYY-MM-DD"
8
+ LOW_RESOLUTION = OAI::Const::Granularity::LOW
9
9
 
10
10
  class Config < OpenStruct
11
11
 
@@ -3,14 +3,16 @@
3
3
 
4
4
  module OAI
5
5
  module Harvester
6
-
7
6
  class Harvest
7
+ DIRECTORY_LAYOUT = "%Y/%m".freeze
8
8
 
9
- def initialize(config = nil, directory = nil, date = nil)
9
+ def initialize(config = nil, directory = nil, date = nil, to = nil)
10
10
  @config = config || Config.load
11
11
  @directory = directory || @config.storage
12
12
  @from = date
13
13
  @from.freeze
14
+ @until = to
15
+ @until.freeze
14
16
  @parser = defined?(XML::Document) ? 'libxml' : 'rexml'
15
17
  end
16
18
 
@@ -30,9 +32,13 @@ module OAI
30
32
 
31
33
  def harvest(site)
32
34
  opts = build_options_hash(@config.sites[site])
33
- harvest_time = Time.now.utc
35
+ if @until
36
+ harvest_time = @until.to_time.utc
37
+ else
38
+ harvest_time = Time.now.utc
39
+ end
34
40
 
35
- if "YYYY-MM-DD" == granularity(opts[:url])
41
+ if OAI::Const::Granularity::LOW == granularity(opts[:url])
36
42
  opts[:until] = harvest_time.strftime("%Y-%m-%d")
37
43
  opts[:from] = @from.strftime("%Y-%m-%d") if @from
38
44
  else
@@ -43,22 +49,27 @@ module OAI
43
49
  # Allow a from date to be passed in
44
50
  opts[:from] = earliest(opts[:url]) unless opts[:from]
45
51
  opts.delete(:set) if 'all' == opts[:set]
46
-
47
52
  begin
48
53
  # Connect, and download
49
54
  file, records = call(opts.delete(:url), opts)
50
55
 
51
- # Move document to storage directory
52
- dir = File.join(@directory, date_based_directory(harvest_time))
53
- FileUtils.mkdir_p dir
54
- FileUtils.mv(file.path,
55
- File.join(dir, "#{site}-#{filename(Time.parse(opts[:from]),
56
- harvest_time)}.xml.gz"))
56
+ # Move document to storage directory if configured
57
+ if @directory
58
+ directory_layout = @config.layouts[site] if @config.layouts
59
+ dir = File.join(@directory, date_based_directory(harvest_time, directory_layout))
60
+ FileUtils.mkdir_p dir
61
+ FileUtils.mv(file.path,
62
+ File.join(dir, "#{site}-#{filename(Time.parse(opts[:from]),
63
+ harvest_time)}.xml.gz"))
64
+ else
65
+ puts "no configured destination for temp file" if @interactive
66
+ end
57
67
  @config.sites[site]['last'] = harvest_time
58
- rescue
59
- raise $! unless $!.respond_to?(:code)
60
- raise $! if not @interactive || "noRecordsMatch" != $!.code
61
- puts "No new records available"
68
+ rescue OAI::NoMatchException
69
+ puts "No new records available" if @interactive
70
+ rescue OAI::Exception => ex
71
+ raise ex if not @interactive
72
+ puts ex.message
62
73
  end
63
74
  end
64
75
 
@@ -69,15 +80,15 @@ module OAI
69
80
  records = 0;
70
81
  client = OAI::Client.new(url, :parser => @parser)
71
82
  provider_config = client.identify
72
-
83
+
73
84
  file = Tempfile.new('oai_data')
74
85
  gz = Zlib::GzipWriter.new(file)
75
86
  gz << "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"
76
87
  gz << "<records>"
77
88
  begin
78
89
  response = client.list_records(options)
79
- get_records(response.doc).each do |rec|
80
- gz << rec
90
+ response.each do |rec|
91
+ gz << rec._source
81
92
  records += 1
82
93
  end
83
94
  puts "#{records} records retrieved" if @interactive
@@ -89,8 +100,8 @@ module OAI
89
100
  puts "\nresumption token recieved, continuing" if @interactive
90
101
  response = client.list_records(:resumption_token =>
91
102
  response.resumption_token)
92
- get_records(response.doc).each do |rec|
93
- gz << rec
103
+ response.each do |rec|
104
+ gz << rec._source
94
105
  records += 1
95
106
  end
96
107
  puts "#{records} records retrieved" if @interactive
@@ -118,8 +129,9 @@ module OAI
118
129
  options
119
130
  end
120
131
 
121
- def date_based_directory(time)
122
- "#{time.strftime(DIRECTORY_LAYOUT)}"
132
+ def date_based_directory(time, directory_layout = nil)
133
+ directory_layout ||= Harvest::DIRECTORY_LAYOUT
134
+ "#{time.strftime(directory_layout)}"
123
135
  end
124
136
 
125
137
  def filename(from_time, until_time)
@@ -127,7 +139,7 @@ module OAI
127
139
  "#{from_time.strftime(format)}_til_#{until_time.strftime(format)}"\
128
140
  "_at_#{until_time.strftime('%H-%M-%S')}"
129
141
  end
130
-
142
+
131
143
  def granularity(url)
132
144
  client = OAI::Client.new url
133
145
  client.identify.granularity
@@ -137,7 +149,7 @@ module OAI
137
149
  def earliest(url)
138
150
  client = OAI::Client.new url
139
151
  identify = client.identify
140
- if "YYYY-MM-DD" == identify.granularity
152
+ if OAI::Const::Granularity::LOW == identify.granularity
141
153
  Time.parse(identify.earliest_datestamp).strftime("%Y-%m-%d")
142
154
  else
143
155
  Time.parse(identify.earliest_datestamp).xmlschema
@@ -147,4 +159,4 @@ module OAI
147
159
  end
148
160
 
149
161
  end
150
- end
162
+ end
@@ -11,10 +11,9 @@ module OAI
11
11
  def initialize(*args)
12
12
  orig_init(*args)
13
13
  @summary = []
14
- @logger = Logger.new(File.join(@config.logfile, "harvester.log"),
15
- shift_age = 'weekly') if @config.logfile
14
+ @logger = @config.logfile ? Logger.new(File.join(@config.logfile, "harvester.log"), 'weekly') : Logger.new(STDOUT)
16
15
  @logger.datetime_format = "%Y-%m-%d %H:%M"
17
-
16
+
18
17
  # Turn off logging if no logging directory is specified.
19
18
  @logger.level = Logger::FATAL unless @config.logfile
20
19
  end
@@ -24,8 +23,7 @@ module OAI
24
23
  @logger.info { "Starting regular harvest" }
25
24
  orig_start(sites)
26
25
  begin
27
- OAI::Harvester::
28
- Mailer.send(@config.mail_server, @config.email, @summary)
26
+ OAI::Harvester::Mailer.send(@config.mail_server, @config.email, @summary) if @config.email
29
27
  rescue
30
28
  @logger.error { "Error sending out summary email: #{$!}"}
31
29
  end
data/lib/oai/harvester.rb CHANGED
@@ -7,9 +7,12 @@ require 'logger'
7
7
  require 'fileutils'
8
8
  require 'ostruct'
9
9
  require 'readline'
10
- require 'chronic'
11
10
  require 'socket'
12
11
 
12
+ if not defined?(OAI::Const::VERBS)
13
+ require 'oai/constants'
14
+ end
15
+
13
16
  require 'oai/client'
14
17
  require 'oai/harvester/config'
15
18
  require 'oai/harvester/harvest'
@@ -78,13 +78,7 @@ module OAI::Provider
78
78
  raise ResumptionTokenException.new unless @limit
79
79
 
80
80
  token = ResumptionToken.parse(token_string)
81
- total = model.where(token_conditions(token)).count
82
-
83
- if token.last * @limit + @limit < total
84
- select_partial(token)
85
- else
86
- select_partial(token).records
87
- end
81
+ select_partial(token)
88
82
  end
89
83
 
90
84
  # select a subset of the result set, and return it with a
@@ -102,10 +96,13 @@ module OAI::Provider
102
96
 
103
97
  raise ResumptionTokenException.new unless oaitoken
104
98
 
99
+ total = model.where(token_conditions(token)).count
100
+ # token offset should be nil if this is the last set
101
+ offset = (token.last * @limit + @limit >= total) ? nil : token.last + 1
105
102
  PartialResult.new(
106
103
  hydrate_records(
107
104
  oaitoken.entries.limit(@limit).offset(token.last * @limit)),
108
- token.next(token.last + 1)
105
+ token.next(offset)
109
106
  )
110
107
  end
111
108