oai 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +19 -4
- data/Rakefile +7 -0
- data/bin/oai +0 -2
- data/examples/models/file_model.rb +2 -2
- data/lib/oai/client/response.rb +8 -8
- data/lib/oai/client.rb +34 -10
- data/lib/oai/exception.rb +46 -38
- data/lib/oai/harvester/config.rb +1 -1
- data/lib/oai/harvester/harvest.rb +37 -25
- data/lib/oai/harvester/logging.rb +3 -5
- data/lib/oai/harvester.rb +4 -1
- data/lib/oai/provider/model/activerecord_caching_wrapper.rb +5 -8
- data/lib/oai/provider/model/activerecord_wrapper.rb +41 -25
- data/lib/oai/provider/model.rb +1 -1
- data/lib/oai/provider/response/list_records.rb +12 -0
- data/lib/oai/provider/response.rb +7 -4
- data/lib/oai/provider/resumption_token.rb +70 -21
- data/lib/oai/provider.rb +129 -7
- data/test/activerecord_provider/database/0001_oaipmh_tables.rb +7 -1
- data/test/activerecord_provider/helpers/providers.rb +10 -1
- data/test/activerecord_provider/helpers/transactional_test_case.rb +2 -1
- data/test/activerecord_provider/models/dc_field.rb +8 -0
- data/test/activerecord_provider/models/dc_lang.rb +3 -0
- data/test/activerecord_provider/models/exclusive_set_dc_field.rb +6 -0
- data/test/activerecord_provider/tc_activerecord_wrapper.rb +63 -0
- data/test/activerecord_provider/tc_ar_provider.rb +54 -26
- data/test/activerecord_provider/tc_ar_sets_provider.rb +10 -9
- data/test/activerecord_provider/tc_caching_paging_provider.rb +9 -7
- data/test/activerecord_provider/tc_simple_paging_provider.rb +28 -7
- data/test/client/tc_exception.rb +1 -1
- data/test/client/tc_get_record.rb +1 -1
- data/test/client/tc_http_client.rb +2 -2
- data/test/client/tc_libxml.rb +1 -1
- data/test/client/tc_utf8_escaping.rb +8 -1
- data/test/harvester/tc_harvest.rb +42 -0
- data/test/harvester/test_helper_harvester.rb +6 -0
- data/test/provider/models.rb +3 -3
- data/test/provider/tc_functional_tokens.rb +17 -11
- data/test/provider/tc_instance_provider.rb +41 -0
- data/test/provider/tc_provider.rb +26 -0
- data/test/provider/tc_resumption_tokens.rb +6 -0
- data/test/provider/test_helper_provider.rb +17 -0
- metadata +28 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c0a80559d64aa25add07953e978e93f9abfbc89e03c67bf306e79f0d7b18b0f7
|
4
|
+
data.tar.gz: 7989ff1c6dec95c3965cb3adc1d0bc156d250b6129002be50193443994d03caf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: db8b7e74c65625da8b47b35ef23a7e620777dfefb0a6a8d63a6a7314534dbfe46606bf863aef4a839087547280700519f35e3b10bdf10bf70ba93959ae3125e2
|
7
|
+
data.tar.gz: ad14343ae9a61b6516f7e079db4afc0b7eddbb5e70d8d962221842a5fe4f8b49da63d37dc8eeb7cfe9e163617d4d2a3cae93e37e8943b784df7d0dc1eb1a7cce
|
data/README.md
CHANGED
@@ -1,9 +1,8 @@
|
|
1
1
|
ruby-oai
|
2
2
|
========
|
3
|
+
[](https://github.com/code4lib/ruby-oai/actions)
|
3
4
|
|
4
|
-
[](https://badge.fury.io/rb/oai)
|
5
|
+
[](https://badge.fury.io/rb/oai)
|
7
6
|
|
8
7
|
ruby-oai is a Open Archives Protocol for Metadata Harvesting (OAI-PMH)
|
9
8
|
library for Ruby. [OAI-PMH](http://openarchives.org) is a somewhat
|
@@ -47,6 +46,22 @@ For example to initiate a ListRecords request to pubmed you can:
|
|
47
46
|
end
|
48
47
|
```
|
49
48
|
|
49
|
+
### Retry-After
|
50
|
+
This library depends on faraday, but allows a wide range of versions. Depending on the client application's installed version of faraday, there may be different middleware libraries required to support automatically retrying requests that are rate limited/denied with a `Retry-After` header. The OAI client can, however, accept an externally configured faraday http client for handling this. For example, to retry on `429 Too Many Requests`:
|
51
|
+
|
52
|
+
```ruby
|
53
|
+
require 'oai'
|
54
|
+
require 'faraday_middleware' # if using faraday version < 2
|
55
|
+
http_client = Faraday.new do |conn|
|
56
|
+
conn.request(:retry, max: 5, retry_statuses: 429)
|
57
|
+
conn.response(:follow_redirects, limit: 5)
|
58
|
+
conn.adapter :net_http
|
59
|
+
end
|
60
|
+
client = OAI::Client.new(base_url, http: http_client)
|
61
|
+
opts = {from:'2012-03-01', until:'2012-04-01', metadata_prefix:'oai_dc'}
|
62
|
+
puts client.list_records(opts).full.count
|
63
|
+
```
|
64
|
+
|
50
65
|
See {OAI::Client} for more details
|
51
66
|
|
52
67
|
Server
|
@@ -97,7 +112,7 @@ There are also convenience tasks to run subsets of tests.
|
|
97
112
|
We use [appraisal](https://github.com/thoughtbot/appraisal) to test ActiveRecord-related functionality under multiple versions of ActiveRecord. While the above commands will test with latest ActiveRecord (allowed in our .gemspec development dependency), you can test under a particular version defined in the [Appraisals](./Appraisals) file like so:
|
98
113
|
|
99
114
|
$ bundle exec appraisal rails-52 rake test
|
100
|
-
$ bundle exec appraisal rails-
|
115
|
+
$ bundle exec appraisal rails-70 rake test
|
101
116
|
|
102
117
|
If you run into trouble with appraisal's gemfiles getting out of date and bundler complaining,
|
103
118
|
try:
|
data/Rakefile
CHANGED
@@ -34,6 +34,13 @@ namespace :test do
|
|
34
34
|
t.warning = false
|
35
35
|
end
|
36
36
|
|
37
|
+
Rake::TestTask.new('harvester') do |t|
|
38
|
+
t.libs << ['lib', 'test/harvester']
|
39
|
+
t.pattern = 'test/harvester/tc_*.rb'
|
40
|
+
#t.verbose = true
|
41
|
+
t.warning = false
|
42
|
+
end
|
43
|
+
|
37
44
|
Rake::TestTask.new('provider') do |t|
|
38
45
|
t.libs << ['lib', 'test/provider']
|
39
46
|
t.pattern = 'test/provider/tc_*.rb'
|
data/bin/oai
CHANGED
@@ -45,8 +45,8 @@ class FileModel < OAI::Provider::Model
|
|
45
45
|
case selector
|
46
46
|
when :all
|
47
47
|
records = Dir["#{@directory}/*.xml"].sort.collect do |file|
|
48
|
-
File.new(file) unless File.stat(file).mtime.utc < opts[:from] or
|
49
|
-
File.stat(file).mtime.utc > opts[:until]
|
48
|
+
File.new(file) unless File.stat(file).mtime.utc < opts[:from].to_time or
|
49
|
+
File.stat(file).mtime.utc > opts[:until].to_time
|
50
50
|
end
|
51
51
|
records
|
52
52
|
else
|
data/lib/oai/client/response.rb
CHANGED
@@ -37,16 +37,16 @@ module OAI
|
|
37
37
|
message = error.content
|
38
38
|
code = ""
|
39
39
|
if defined?(error.property) == nil
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
end
|
40
|
+
code = error.attributes['code']
|
41
|
+
else
|
42
|
+
begin
|
43
|
+
code = error["code"]
|
44
|
+
rescue
|
45
|
+
code = error.property('code')
|
47
46
|
end
|
47
|
+
end
|
48
48
|
end
|
49
|
-
raise OAI::Exception.
|
49
|
+
raise OAI::Exception.for(message: message, code: code)
|
50
50
|
end
|
51
51
|
|
52
52
|
end
|
data/lib/oai/client.rb
CHANGED
@@ -54,7 +54,7 @@ module OAI
|
|
54
54
|
# <http://www.openarchives.org/OAI/openarchivesprotocol.html>.
|
55
55
|
|
56
56
|
class Client
|
57
|
-
|
57
|
+
UNESCAPED_AMPERSAND = /&(?!(?:amp|lt|gt|quot|apos|\#\d+);)/
|
58
58
|
# The constructor which must be passed a valid base url for an oai
|
59
59
|
# service:
|
60
60
|
#
|
@@ -95,7 +95,8 @@ module OAI
|
|
95
95
|
follow_redirects = 5 if follow_redirects == true
|
96
96
|
|
97
97
|
if follow_redirects
|
98
|
-
require '
|
98
|
+
require 'faraday/follow_redirects'
|
99
|
+
builder.use Faraday::FollowRedirects::Middleware
|
99
100
|
builder.response :follow_redirects, :limit => follow_redirects.to_i
|
100
101
|
end
|
101
102
|
builder.adapter :net_http
|
@@ -197,12 +198,9 @@ module OAI
|
|
197
198
|
do_resumable(OAI::ListSetsResponse, 'ListSets', opts)
|
198
199
|
end
|
199
200
|
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
# fire off the request and return appropriate DOM object
|
204
|
-
uri = build_uri(verb, opts)
|
205
|
-
xml = strip_invalid_utf_8_chars(get(uri))
|
201
|
+
def sanitize_xml(xml)
|
202
|
+
xml = strip_invalid_utf_8_chars(xml)
|
203
|
+
xml = strip_invalid_xml_chars(xml)
|
206
204
|
if @parser == 'libxml'
|
207
205
|
# remove default namespace for oai-pmh since libxml
|
208
206
|
# isn't able to use our xpaths to get at them
|
@@ -210,7 +208,15 @@ module OAI
|
|
210
208
|
xml = xml.gsub(
|
211
209
|
/xmlns=\"http:\/\/www.openarchives.org\/OAI\/.\..\/\"/, '')
|
212
210
|
end
|
213
|
-
|
211
|
+
xml
|
212
|
+
end
|
213
|
+
|
214
|
+
private
|
215
|
+
|
216
|
+
def do_request(verb, opts = nil)
|
217
|
+
# fire off the request and return appropriate DOM object
|
218
|
+
uri = build_uri(verb, opts)
|
219
|
+
return load_document(get(uri))
|
214
220
|
end
|
215
221
|
|
216
222
|
def do_resumable(responseClass, verb, opts)
|
@@ -240,6 +246,7 @@ module OAI
|
|
240
246
|
end
|
241
247
|
|
242
248
|
def load_document(xml)
|
249
|
+
xml = sanitize_xml(xml)
|
243
250
|
case @parser
|
244
251
|
when 'libxml'
|
245
252
|
begin
|
@@ -330,7 +337,16 @@ module OAI
|
|
330
337
|
# Regex is from WebCollab:
|
331
338
|
# http://webcollab.sourceforge.net/unicode.html
|
332
339
|
def strip_invalid_utf_8_chars(xml)
|
333
|
-
|
340
|
+
return nil unless xml
|
341
|
+
|
342
|
+
# If it's in a specific encoding other than BINARY, it may trigger
|
343
|
+
# an exception to try to gsub these illegal bytes. Temporarily
|
344
|
+
# put it in BINARY. NOTE: We're not totally sure what's going on
|
345
|
+
# with encodings in this gem in general, it might not be totally reasonable.
|
346
|
+
orig_encoding = xml.encoding
|
347
|
+
xml.force_encoding("BINARY")
|
348
|
+
|
349
|
+
xml = xml.gsub(/[\x00-\x08\x10\x0B\x0C\x0E-\x19\x7F]
|
334
350
|
| [\x00-\x7F][\x80-\xBF]+
|
335
351
|
| ([\xC0\xC1]|[\xF0-\xFF])[\x80-\xBF]*
|
336
352
|
| [\xC2-\xDF]((?![\x80-\xBF])|[\x80-\xBF]{2,})
|
@@ -338,7 +354,15 @@ module OAI
|
|
338
354
|
| (?![\x80-\xBF]{2})|[\x80-\xBF]{3,})/x, '?')\
|
339
355
|
.gsub(/\xE0[\x80-\x9F][\x80-\xBF]
|
340
356
|
| \xED[\xA0-\xBF][\x80-\xBF]/,'?')
|
357
|
+
|
358
|
+
xml.force_encoding(orig_encoding)
|
359
|
+
|
360
|
+
xml
|
341
361
|
end
|
342
362
|
|
363
|
+
def strip_invalid_xml_chars(xml)
|
364
|
+
return xml unless xml =~ UNESCAPED_AMPERSAND
|
365
|
+
xml.gsub(UNESCAPED_AMPERSAND, '&')
|
366
|
+
end
|
343
367
|
end
|
344
368
|
end
|
data/lib/oai/exception.rb
CHANGED
@@ -4,72 +4,80 @@ module OAI
|
|
4
4
|
# messages will be wrapped in an XML response to the client.
|
5
5
|
|
6
6
|
class Exception < RuntimeError
|
7
|
+
CODE = nil
|
8
|
+
MESSAGE = nil
|
9
|
+
|
7
10
|
attr_reader :code
|
8
11
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
+
@@codes = {}
|
13
|
+
|
14
|
+
def self.register_exception_code(code, exception_class)
|
15
|
+
@@codes[code] = exception_class if exception_class.superclass == OAI::Exception
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.for(message: nil, code: nil)
|
19
|
+
@@codes.fetch(code, Exception).new(message)
|
20
|
+
end
|
21
|
+
|
22
|
+
def initialize(message = nil, code = nil)
|
23
|
+
super(message || self.class::MESSAGE)
|
24
|
+
@code = code || self.class::CODE
|
12
25
|
end
|
13
26
|
end
|
14
27
|
|
15
28
|
class ArgumentException < Exception
|
16
|
-
|
17
|
-
|
29
|
+
CODE = 'badArgument'
|
30
|
+
MESSAGE = 'The request includes ' \
|
18
31
|
'illegal arguments, is missing required arguments, includes a ' \
|
19
|
-
'repeated argument, or values for arguments have an illegal syntax.'
|
20
|
-
|
21
|
-
end
|
32
|
+
'repeated argument, or values for arguments have an illegal syntax.'
|
33
|
+
register_exception_code(CODE, self)
|
22
34
|
end
|
23
35
|
|
24
36
|
class VerbException < Exception
|
25
|
-
|
26
|
-
|
27
|
-
'verb, the verb argument is missing, or the verb argument is repeated.'
|
28
|
-
|
29
|
-
end
|
37
|
+
CODE = 'badVerb'
|
38
|
+
MESSAGE = 'Value of the verb argument is not a legal OAI-PMH '\
|
39
|
+
'verb, the verb argument is missing, or the verb argument is repeated.'
|
40
|
+
register_exception_code(CODE, self)
|
30
41
|
end
|
31
42
|
|
32
43
|
class FormatException < Exception
|
33
|
-
|
34
|
-
|
44
|
+
CODE = 'cannotDisseminateFormat'
|
45
|
+
MESSAGE = 'The metadata format identified by '\
|
35
46
|
'the value given for the metadataPrefix argument is not supported '\
|
36
|
-
'by the item or by the repository.'
|
37
|
-
|
47
|
+
'by the item or by the repository.'
|
48
|
+
register_exception_code(CODE, self)
|
38
49
|
end
|
39
50
|
|
40
51
|
class IdException < Exception
|
41
|
-
|
42
|
-
|
43
|
-
'unknown or illegal in this repository.'
|
44
|
-
|
52
|
+
CODE = 'idDoesNotExist'
|
53
|
+
MESSAGE = 'The value of the identifier argument is '\
|
54
|
+
'unknown or illegal in this repository.'
|
55
|
+
register_exception_code(CODE, self)
|
45
56
|
end
|
46
57
|
|
47
58
|
class NoMatchException < Exception
|
48
|
-
|
49
|
-
|
50
|
-
'until, set and metadataPrefix arguments results in an empty list.'
|
51
|
-
|
52
|
-
end
|
59
|
+
CODE = 'noRecordsMatch'
|
60
|
+
MESSAGE = 'The combination of the values of the from, '\
|
61
|
+
'until, set and metadataPrefix arguments results in an empty list.'
|
62
|
+
register_exception_code(CODE, self)
|
53
63
|
end
|
54
64
|
|
55
65
|
class MetadataFormatException < Exception
|
56
|
-
|
57
|
-
|
58
|
-
'for the specified item.'
|
59
|
-
|
66
|
+
CODE = 'noMetadataFormats'
|
67
|
+
MESSAGE = 'There are no metadata formats available '\
|
68
|
+
'for the specified item.'
|
69
|
+
register_exception_code(CODE, self)
|
60
70
|
end
|
61
71
|
|
62
72
|
class SetException < Exception
|
63
|
-
|
64
|
-
|
65
|
-
|
73
|
+
CODE = 'noSetHierarchy'
|
74
|
+
MESSAGE = 'This repository does not support sets.'
|
75
|
+
register_exception_code(CODE, self)
|
66
76
|
end
|
67
77
|
|
68
78
|
class ResumptionTokenException < Exception
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
end
|
79
|
+
CODE = 'badResumptionToken'
|
80
|
+
MESSAGE = 'The value of the resumptionToken argument is invalid or expired.'
|
81
|
+
register_exception_code(CODE, self)
|
73
82
|
end
|
74
|
-
|
75
83
|
end
|
data/lib/oai/harvester/config.rb
CHANGED
@@ -3,14 +3,16 @@
|
|
3
3
|
|
4
4
|
module OAI
|
5
5
|
module Harvester
|
6
|
-
|
7
6
|
class Harvest
|
7
|
+
DIRECTORY_LAYOUT = "%Y/%m".freeze
|
8
8
|
|
9
|
-
def initialize(config = nil, directory = nil, date = nil)
|
9
|
+
def initialize(config = nil, directory = nil, date = nil, to = nil)
|
10
10
|
@config = config || Config.load
|
11
11
|
@directory = directory || @config.storage
|
12
12
|
@from = date
|
13
13
|
@from.freeze
|
14
|
+
@until = to
|
15
|
+
@until.freeze
|
14
16
|
@parser = defined?(XML::Document) ? 'libxml' : 'rexml'
|
15
17
|
end
|
16
18
|
|
@@ -30,9 +32,13 @@ module OAI
|
|
30
32
|
|
31
33
|
def harvest(site)
|
32
34
|
opts = build_options_hash(@config.sites[site])
|
33
|
-
|
35
|
+
if @until
|
36
|
+
harvest_time = @until.to_time.utc
|
37
|
+
else
|
38
|
+
harvest_time = Time.now.utc
|
39
|
+
end
|
34
40
|
|
35
|
-
if
|
41
|
+
if OAI::Const::Granularity::LOW == granularity(opts[:url])
|
36
42
|
opts[:until] = harvest_time.strftime("%Y-%m-%d")
|
37
43
|
opts[:from] = @from.strftime("%Y-%m-%d") if @from
|
38
44
|
else
|
@@ -43,22 +49,27 @@ module OAI
|
|
43
49
|
# Allow a from date to be passed in
|
44
50
|
opts[:from] = earliest(opts[:url]) unless opts[:from]
|
45
51
|
opts.delete(:set) if 'all' == opts[:set]
|
46
|
-
|
47
52
|
begin
|
48
53
|
# Connect, and download
|
49
54
|
file, records = call(opts.delete(:url), opts)
|
50
55
|
|
51
|
-
# Move document to storage directory
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
56
|
+
# Move document to storage directory if configured
|
57
|
+
if @directory
|
58
|
+
directory_layout = @config.layouts[site] if @config.layouts
|
59
|
+
dir = File.join(@directory, date_based_directory(harvest_time, directory_layout))
|
60
|
+
FileUtils.mkdir_p dir
|
61
|
+
FileUtils.mv(file.path,
|
62
|
+
File.join(dir, "#{site}-#{filename(Time.parse(opts[:from]),
|
63
|
+
harvest_time)}.xml.gz"))
|
64
|
+
else
|
65
|
+
puts "no configured destination for temp file" if @interactive
|
66
|
+
end
|
57
67
|
@config.sites[site]['last'] = harvest_time
|
58
|
-
rescue
|
59
|
-
|
60
|
-
|
61
|
-
|
68
|
+
rescue OAI::NoMatchException
|
69
|
+
puts "No new records available" if @interactive
|
70
|
+
rescue OAI::Exception => ex
|
71
|
+
raise ex if not @interactive
|
72
|
+
puts ex.message
|
62
73
|
end
|
63
74
|
end
|
64
75
|
|
@@ -69,15 +80,15 @@ module OAI
|
|
69
80
|
records = 0;
|
70
81
|
client = OAI::Client.new(url, :parser => @parser)
|
71
82
|
provider_config = client.identify
|
72
|
-
|
83
|
+
|
73
84
|
file = Tempfile.new('oai_data')
|
74
85
|
gz = Zlib::GzipWriter.new(file)
|
75
86
|
gz << "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"
|
76
87
|
gz << "<records>"
|
77
88
|
begin
|
78
89
|
response = client.list_records(options)
|
79
|
-
|
80
|
-
gz << rec
|
90
|
+
response.each do |rec|
|
91
|
+
gz << rec._source
|
81
92
|
records += 1
|
82
93
|
end
|
83
94
|
puts "#{records} records retrieved" if @interactive
|
@@ -89,8 +100,8 @@ module OAI
|
|
89
100
|
puts "\nresumption token recieved, continuing" if @interactive
|
90
101
|
response = client.list_records(:resumption_token =>
|
91
102
|
response.resumption_token)
|
92
|
-
|
93
|
-
gz << rec
|
103
|
+
response.each do |rec|
|
104
|
+
gz << rec._source
|
94
105
|
records += 1
|
95
106
|
end
|
96
107
|
puts "#{records} records retrieved" if @interactive
|
@@ -118,8 +129,9 @@ module OAI
|
|
118
129
|
options
|
119
130
|
end
|
120
131
|
|
121
|
-
def date_based_directory(time)
|
122
|
-
|
132
|
+
def date_based_directory(time, directory_layout = nil)
|
133
|
+
directory_layout ||= Harvest::DIRECTORY_LAYOUT
|
134
|
+
"#{time.strftime(directory_layout)}"
|
123
135
|
end
|
124
136
|
|
125
137
|
def filename(from_time, until_time)
|
@@ -127,7 +139,7 @@ module OAI
|
|
127
139
|
"#{from_time.strftime(format)}_til_#{until_time.strftime(format)}"\
|
128
140
|
"_at_#{until_time.strftime('%H-%M-%S')}"
|
129
141
|
end
|
130
|
-
|
142
|
+
|
131
143
|
def granularity(url)
|
132
144
|
client = OAI::Client.new url
|
133
145
|
client.identify.granularity
|
@@ -137,7 +149,7 @@ module OAI
|
|
137
149
|
def earliest(url)
|
138
150
|
client = OAI::Client.new url
|
139
151
|
identify = client.identify
|
140
|
-
if
|
152
|
+
if OAI::Const::Granularity::LOW == identify.granularity
|
141
153
|
Time.parse(identify.earliest_datestamp).strftime("%Y-%m-%d")
|
142
154
|
else
|
143
155
|
Time.parse(identify.earliest_datestamp).xmlschema
|
@@ -147,4 +159,4 @@ module OAI
|
|
147
159
|
end
|
148
160
|
|
149
161
|
end
|
150
|
-
end
|
162
|
+
end
|
@@ -11,10 +11,9 @@ module OAI
|
|
11
11
|
def initialize(*args)
|
12
12
|
orig_init(*args)
|
13
13
|
@summary = []
|
14
|
-
@logger = Logger.new(File.join(@config.logfile, "harvester.log"),
|
15
|
-
shift_age = 'weekly') if @config.logfile
|
14
|
+
@logger = @config.logfile ? Logger.new(File.join(@config.logfile, "harvester.log"), 'weekly') : Logger.new(STDOUT)
|
16
15
|
@logger.datetime_format = "%Y-%m-%d %H:%M"
|
17
|
-
|
16
|
+
|
18
17
|
# Turn off logging if no logging directory is specified.
|
19
18
|
@logger.level = Logger::FATAL unless @config.logfile
|
20
19
|
end
|
@@ -24,8 +23,7 @@ module OAI
|
|
24
23
|
@logger.info { "Starting regular harvest" }
|
25
24
|
orig_start(sites)
|
26
25
|
begin
|
27
|
-
OAI::Harvester::
|
28
|
-
Mailer.send(@config.mail_server, @config.email, @summary)
|
26
|
+
OAI::Harvester::Mailer.send(@config.mail_server, @config.email, @summary) if @config.email
|
29
27
|
rescue
|
30
28
|
@logger.error { "Error sending out summary email: #{$!}"}
|
31
29
|
end
|
data/lib/oai/harvester.rb
CHANGED
@@ -7,9 +7,12 @@ require 'logger'
|
|
7
7
|
require 'fileutils'
|
8
8
|
require 'ostruct'
|
9
9
|
require 'readline'
|
10
|
-
require 'chronic'
|
11
10
|
require 'socket'
|
12
11
|
|
12
|
+
if not defined?(OAI::Const::VERBS)
|
13
|
+
require 'oai/constants'
|
14
|
+
end
|
15
|
+
|
13
16
|
require 'oai/client'
|
14
17
|
require 'oai/harvester/config'
|
15
18
|
require 'oai/harvester/harvest'
|
@@ -78,13 +78,7 @@ module OAI::Provider
|
|
78
78
|
raise ResumptionTokenException.new unless @limit
|
79
79
|
|
80
80
|
token = ResumptionToken.parse(token_string)
|
81
|
-
|
82
|
-
|
83
|
-
if token.last * @limit + @limit < total
|
84
|
-
select_partial(token)
|
85
|
-
else
|
86
|
-
select_partial(token).records
|
87
|
-
end
|
81
|
+
select_partial(token)
|
88
82
|
end
|
89
83
|
|
90
84
|
# select a subset of the result set, and return it with a
|
@@ -102,10 +96,13 @@ module OAI::Provider
|
|
102
96
|
|
103
97
|
raise ResumptionTokenException.new unless oaitoken
|
104
98
|
|
99
|
+
total = model.where(token_conditions(token)).count
|
100
|
+
# token offset should be nil if this is the last set
|
101
|
+
offset = (token.last * @limit + @limit >= total) ? nil : token.last + 1
|
105
102
|
PartialResult.new(
|
106
103
|
hydrate_records(
|
107
104
|
oaitoken.entries.limit(@limit).offset(token.last * @limit)),
|
108
|
-
token.next(
|
105
|
+
token.next(offset)
|
109
106
|
)
|
110
107
|
end
|
111
108
|
|