harvestdor 0.0.14 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.hound.yml +2 -0
- data/.rubocop.yml +3 -0
- data/.rubocop_todo.yml +249 -0
- data/.travis.yml +3 -1
- data/Gemfile +0 -4
- data/README.rdoc +13 -58
- data/Rakefile +10 -6
- data/harvestdor.gemspec +9 -14
- data/lib/harvestdor/client.rb +110 -0
- data/lib/harvestdor/errors.rb +8 -8
- data/lib/harvestdor/purl_xml.rb +13 -72
- data/lib/harvestdor/version.rb +1 -1
- data/lib/harvestdor.rb +10 -106
- data/spec/config/example.yml +16 -0
- data/spec/harvestdor_client_spec.rb +7 -65
- data/spec/purl_xml_spec.rb +16 -16
- metadata +53 -29
- data/lib/harvestdor/oai_harvest.rb +0 -115
- data/spec/config/oai.yml +0 -37
- data/spec/harvestdor_spec.rb +0 -23
- data/spec/oai_harvest_spec.rb +0 -220
- data/spec/oai_integration_spec.rb +0 -139
metadata
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: harvestdor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naomi Dushay
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-10-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: oai
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: 0.3.0
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - "~>"
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: 0.3.0
|
27
13
|
- !ruby/object:Gem::Dependency
|
28
14
|
name: faraday
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -122,6 +108,48 @@ dependencies:
|
|
122
108
|
- - ">="
|
123
109
|
- !ruby/object:Gem::Version
|
124
110
|
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: coveralls
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: rubocop
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: rubocop-rspec
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - ">="
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
125
153
|
- !ruby/object:Gem::Dependency
|
126
154
|
name: vcr
|
127
155
|
requirement: !ruby/object:Gem::Requirement
|
@@ -150,8 +178,7 @@ dependencies:
|
|
150
178
|
- - ">="
|
151
179
|
- !ruby/object:Gem::Version
|
152
180
|
version: '0'
|
153
|
-
description: Harvest DOR object metadata
|
154
|
-
rdf:resource="info:fedora/druid:hy787xj5878") and dates
|
181
|
+
description: Harvest DOR object metadata from a Stanford public purl page
|
155
182
|
email:
|
156
183
|
- ndushay@stanford.edu
|
157
184
|
executables: []
|
@@ -160,6 +187,9 @@ extra_rdoc_files: []
|
|
160
187
|
files:
|
161
188
|
- ".coveralls.yml"
|
162
189
|
- ".gitignore"
|
190
|
+
- ".hound.yml"
|
191
|
+
- ".rubocop.yml"
|
192
|
+
- ".rubocop_todo.yml"
|
163
193
|
- ".travis.yml"
|
164
194
|
- ".yardopts"
|
165
195
|
- Gemfile
|
@@ -168,11 +198,11 @@ files:
|
|
168
198
|
- Rakefile
|
169
199
|
- harvestdor.gemspec
|
170
200
|
- lib/harvestdor.rb
|
201
|
+
- lib/harvestdor/client.rb
|
171
202
|
- lib/harvestdor/errors.rb
|
172
|
-
- lib/harvestdor/oai_harvest.rb
|
173
203
|
- lib/harvestdor/purl_xml.rb
|
174
204
|
- lib/harvestdor/version.rb
|
175
|
-
- spec/config/
|
205
|
+
- spec/config/example.yml
|
176
206
|
- spec/fixtures/vcr_cassettes/content_metadata.yml
|
177
207
|
- spec/fixtures/vcr_cassettes/dc.yml
|
178
208
|
- spec/fixtures/vcr_cassettes/headers.yml
|
@@ -189,12 +219,9 @@ files:
|
|
189
219
|
- spec/fixtures/vcr_cassettes/records.yml
|
190
220
|
- spec/fixtures/vcr_cassettes/rights_metadata.yml
|
191
221
|
- spec/harvestdor_client_spec.rb
|
192
|
-
- spec/harvestdor_spec.rb
|
193
|
-
- spec/oai_harvest_spec.rb
|
194
|
-
- spec/oai_integration_spec.rb
|
195
222
|
- spec/purl_xml_spec.rb
|
196
223
|
- spec/spec_helper.rb
|
197
|
-
homepage:
|
224
|
+
homepage: ''
|
198
225
|
licenses: []
|
199
226
|
metadata: {}
|
200
227
|
post_install_message:
|
@@ -213,12 +240,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
213
240
|
version: '0'
|
214
241
|
requirements: []
|
215
242
|
rubyforge_project:
|
216
|
-
rubygems_version: 2.
|
243
|
+
rubygems_version: 2.4.3
|
217
244
|
signing_key:
|
218
245
|
specification_version: 4
|
219
246
|
summary: Harvest DOR object metadata
|
220
247
|
test_files:
|
221
|
-
- spec/config/
|
248
|
+
- spec/config/example.yml
|
222
249
|
- spec/fixtures/vcr_cassettes/content_metadata.yml
|
223
250
|
- spec/fixtures/vcr_cassettes/dc.yml
|
224
251
|
- spec/fixtures/vcr_cassettes/headers.yml
|
@@ -235,9 +262,6 @@ test_files:
|
|
235
262
|
- spec/fixtures/vcr_cassettes/records.yml
|
236
263
|
- spec/fixtures/vcr_cassettes/rights_metadata.yml
|
237
264
|
- spec/harvestdor_client_spec.rb
|
238
|
-
- spec/harvestdor_spec.rb
|
239
|
-
- spec/oai_harvest_spec.rb
|
240
|
-
- spec/oai_integration_spec.rb
|
241
265
|
- spec/purl_xml_spec.rb
|
242
266
|
- spec/spec_helper.rb
|
243
267
|
has_rdoc:
|
@@ -1,115 +0,0 @@
|
|
1
|
-
require 'oai'
|
2
|
-
|
3
|
-
module Harvestdor
|
4
|
-
|
5
|
-
# Mixin: methods to perform an OAI harvest and iterate over results
|
6
|
-
class Client
|
7
|
-
|
8
|
-
# return Array of OAI::Records from the OAI harvest indicated by OAI params (metadata_prefix, from, until, set)
|
9
|
-
# @param [Hash] oai_args optional OAI params (:metadata_prefix, :from, :until, :set) to be used in lieu of config default values
|
10
|
-
# @return [Array<OAI::Record>] or enumeration over it, if block is given
|
11
|
-
def oai_records oai_args = {}
|
12
|
-
return to_enum(:oai_records, oai_args).to_a unless block_given?
|
13
|
-
|
14
|
-
harvest(:list_records, scrub_oai_args(oai_args)) do |oai_rec|
|
15
|
-
yield oai_rec
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
# return Array of OAI::Headers from the OAI harvest indicated by OAI params (metadata_prefix, from, until, set)
|
20
|
-
# @param [Hash] oai_args optional OAI params (:metadata_prefix, :from, :until, :set) to be used in lieu of config default values
|
21
|
-
# @return [Array<OAI::Header>] or enumeration over it, if block is given
|
22
|
-
def oai_headers oai_args = {}
|
23
|
-
return to_enum(:oai_headers, oai_args).to_a unless block_given?
|
24
|
-
|
25
|
-
harvest(:list_identifiers, scrub_oai_args(oai_args)) do |oai_hdr|
|
26
|
-
yield oai_hdr
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
# return Array of druids contained in the OAI harvest indicated by OAI params (metadata_prefix, from, until, set)
|
31
|
-
# @param [Hash] oai_args optional OAI params (:metadata_prefix, :from, :until, :set) to be used in lieu of config default values
|
32
|
-
# @return [Array<String>] or enumeration over it, if block is given
|
33
|
-
def druids_via_oai oai_args = {}
|
34
|
-
return to_enum(:druids_via_oai, oai_args).to_a unless block_given?
|
35
|
-
|
36
|
-
harvest(:list_identifiers, scrub_oai_args(oai_args)) do |oai_hdr|
|
37
|
-
yield Harvestdor.druid(oai_hdr)
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
# get a single OAI record using a get_record OAI request
|
42
|
-
# @param [String] druid (which will be turned into OAI identifier)
|
43
|
-
# @param [String] md_prefix the OAI metadata prefix determining which metadata will be in the retrieved OAI::Record object
|
44
|
-
# @return [OAI::Record] record object retrieved from OAI server
|
45
|
-
def oai_record druid, md_prefix = 'mods'
|
46
|
-
prefix = md_prefix ? md_prefix : config.default_metadata_prefix
|
47
|
-
oai_client.get_record({:identifier => "oai:searchworks.stanford.edu/druid:#{druid}", :metadata_prefix => prefix}).record
|
48
|
-
end
|
49
|
-
|
50
|
-
protected #---------------------------------------------------------------------
|
51
|
-
|
52
|
-
# @param [Hash] oai_args Hash of OAI params (metadata_prefix, from, until, set) to be used in lieu of config default values
|
53
|
-
# @return [Hash] OAI params (metadata_prefix, from, until, set) cleaned up for making harvest request
|
54
|
-
def scrub_oai_args oai_args = {}
|
55
|
-
scrubbed_args={}
|
56
|
-
scrubbed_args[:metadata_prefix] = oai_args.keys.include?(:metadata_prefix) ? oai_args[:metadata_prefix] : config.default_metadata_prefix
|
57
|
-
scrubbed_args[:from] = oai_args.keys.include?(:from) ? oai_args[:from] : config.default_from_date
|
58
|
-
scrubbed_args[:until] = oai_args.keys.include?(:until) ? oai_args[:until] : config.default_until_date
|
59
|
-
scrubbed_args[:set] = oai_args.keys.include?(:set) ? oai_args[:set] : config.default_set
|
60
|
-
scrubbed_args.each { |k, v|
|
61
|
-
scrubbed_args.delete(k) if v.nil? || v == ''
|
62
|
-
}
|
63
|
-
scrubbed_args
|
64
|
-
end
|
65
|
-
|
66
|
-
# harvest OAI headers or OAI records and return a response object with one entry for each record/header retrieved
|
67
|
-
# follows resumption tokens (i.e. chunks are all present in result)
|
68
|
-
# @param [Symbol] verb :list_identifiers or :list_records
|
69
|
-
# @param [Hash] oai_args OAI params (metadata_prefix, from, until, set) used for request
|
70
|
-
# @return response to OAI request, as one enumerable object
|
71
|
-
# TODO: This could be moved into ruby-oai?
|
72
|
-
def harvest (verb, oai_args, &block)
|
73
|
-
response = oai_client.send verb, oai_args
|
74
|
-
while response && response.entries.size > 0
|
75
|
-
response.entries.each &block
|
76
|
-
|
77
|
-
token = response.resumption_token
|
78
|
-
if token.nil? or token.empty?
|
79
|
-
break
|
80
|
-
else
|
81
|
-
response = oai_client.send(verb, :resumption_token => token)
|
82
|
-
end
|
83
|
-
end
|
84
|
-
rescue Faraday::Error::TimeoutError => e
|
85
|
-
logger.error "No response from OAI Provider"
|
86
|
-
logger.error e
|
87
|
-
raise e
|
88
|
-
rescue OAI::Exception => e
|
89
|
-
# possibly unnecessary after ruby-oai 0.0.14
|
90
|
-
logger.error "Received unexpected OAI::Exception"
|
91
|
-
logger.error e
|
92
|
-
raise e
|
93
|
-
end
|
94
|
-
|
95
|
-
end # class OaiHarvester
|
96
|
-
|
97
|
-
end # module Harvestdor
|
98
|
-
|
99
|
-
module OAI
|
100
|
-
class Client
|
101
|
-
# monkey patch to adjust timeouts
|
102
|
-
# Do the actual HTTP get, following any temporary redirects
|
103
|
-
def get(uri)
|
104
|
-
# OLD: response = @http_client.get uri
|
105
|
-
response = @http_client.get do |req|
|
106
|
-
req.url uri
|
107
|
-
# FIXME: hard-coded default settings in harvestdor are used here
|
108
|
-
# values are in seconds
|
109
|
-
req.options[:timeout] = Harvestdor::Client.default_config.http_options.timeout # open/read timeout
|
110
|
-
req.options[:open_timeout] = Harvestdor::Client.default_config.http_options.open_timeout # connection open timeout
|
111
|
-
end
|
112
|
-
response.body
|
113
|
-
end
|
114
|
-
end
|
115
|
-
end
|
data/spec/config/oai.yml
DELETED
@@ -1,37 +0,0 @@
|
|
1
|
-
# log_dir: directory for log file (default logs, relative to harvestdor gem path)
|
2
|
-
log_dir: spec/test_logs
|
3
|
-
|
4
|
-
# log_name: name of log file (default: harvestdor.log)
|
5
|
-
|
6
|
-
# purl: url for the DOR purl server (used to get ContentMetadata, etc.)
|
7
|
-
# purl: http://purl-test.stanford.edu
|
8
|
-
|
9
|
-
# ---------- OAI harvesting parameters -----------
|
10
|
-
|
11
|
-
# oai_client_debug: true for OAI::Client debug mode (default: false)
|
12
|
-
|
13
|
-
# oai_repository_url: URL of the OAI data provider
|
14
|
-
oai_repository_url: https://dor-oaiprovider-test.stanford.edu/oai
|
15
|
-
|
16
|
-
# default_metadata_prefix: default metadata prefix to be used for harvesting (default: mods)
|
17
|
-
# can be overridden on calls to harvest_ids and harvest_records
|
18
|
-
default_metadata_prefix: mods
|
19
|
-
|
20
|
-
# default_from_date: default from date for harvest (default: nil)
|
21
|
-
# can be overridden on calls to harvest_ids and harvest_records
|
22
|
-
default_from_date: '2012-11-01'
|
23
|
-
|
24
|
-
# default_until_date: default until date for harvest (default: nil)
|
25
|
-
# can be overridden on calls to harvest_ids and harvest_records
|
26
|
-
|
27
|
-
# default_set: default set for harvest (default: nil)
|
28
|
-
# can be overridden on calls to harvest_ids and harvest_records
|
29
|
-
|
30
|
-
# Additional options to pass to Faraday http client (https://github.com/technoweenie/faraday)
|
31
|
-
# timeouts are in seconds; timeout -> open/read, open_timeout -> connection open
|
32
|
-
http_options:
|
33
|
-
ssl:
|
34
|
-
verify: false
|
35
|
-
request:
|
36
|
-
timeout: 121
|
37
|
-
open_timeout: 122
|
data/spec/harvestdor_spec.rb
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
require "spec_helper"
|
2
|
-
|
3
|
-
describe Harvestdor do
|
4
|
-
|
5
|
-
context "#druid" do
|
6
|
-
it "should return the druid part of an oai identifier" do
|
7
|
-
expect(Harvestdor.druid('oai:searchworks.stanford.edu/druid:foo')).to eql('foo')
|
8
|
-
end
|
9
|
-
it "should work with OAI::Header as argument" do
|
10
|
-
header = OAI::Header.new(nil)
|
11
|
-
header.identifier = 'oai:searchworks.stanford.edu/druid:foo'
|
12
|
-
expect(Harvestdor.druid(header)).to eql('foo')
|
13
|
-
end
|
14
|
-
it "should work with OAI::Record as argument" do
|
15
|
-
oai_rec = OAI::Record.new(nil)
|
16
|
-
header = OAI::Header.new(nil)
|
17
|
-
header.identifier = 'oai:searchworks.stanford.edu/druid:foo'
|
18
|
-
oai_rec.header = header
|
19
|
-
expect(Harvestdor.druid(oai_rec)).to eql('foo')
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
end
|
data/spec/oai_harvest_spec.rb
DELETED
@@ -1,220 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe 'Harvestdor::Client oai harvesting' do
|
4
|
-
before(:all) do
|
5
|
-
@harvestdor_client = Harvestdor::Client.new
|
6
|
-
@oai_arg_defaults = {:metadata_prefix => @harvestdor_client.config.default_metadata_prefix,
|
7
|
-
:from => @harvestdor_client.config.default_from_date,
|
8
|
-
:until => @harvestdor_client.config.default_until_date,
|
9
|
-
:set => @harvestdor_client.config.default_set }
|
10
|
-
end
|
11
|
-
|
12
|
-
describe "druids_via_oai" do
|
13
|
-
before(:each) do
|
14
|
-
oai_response = double('oai_response')
|
15
|
-
allow(oai_response).to receive(:entries).and_return(['foo', 'bar'])
|
16
|
-
allow(oai_response).to receive(:resumption_token).and_return('')
|
17
|
-
allow(@harvestdor_client.oai_client).to receive(:list_identifiers).with(an_instance_of(Hash)) {
|
18
|
-
oai_response
|
19
|
-
}
|
20
|
-
end
|
21
|
-
it "should return druids" do
|
22
|
-
header1 = OAI::Header.new(nil)
|
23
|
-
header1.identifier = 'oai:searchworks.stanford.edu/druid:foo'
|
24
|
-
header2 = OAI::Header.new(nil)
|
25
|
-
header2.identifier = 'oai:searchworks.stanford.edu/druid:bar'
|
26
|
-
oai_response = double('oai_response')
|
27
|
-
allow(oai_response).to receive(:entries).and_return([header1, header2])
|
28
|
-
expect(@harvestdor_client.druids_via_oai).to eql(['foo', 'bar'])
|
29
|
-
end
|
30
|
-
it "should have results viewable as an array" do
|
31
|
-
expect(@harvestdor_client.druids_via_oai).to be_an_instance_of(Array)
|
32
|
-
end
|
33
|
-
it "should have enumerable results" do
|
34
|
-
expect(@harvestdor_client.druids_via_oai).to respond_to(:each, :count)
|
35
|
-
end
|
36
|
-
it "should yield to a passed block" do
|
37
|
-
expect { |b| @harvestdor_client.druids_via_oai(&b) }.to yield_successive_args('foo', 'bar')
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
describe "oai_records" do
|
42
|
-
before(:each) do
|
43
|
-
@oai_response = double('oai_response')
|
44
|
-
allow(@oai_response).to receive(:entries).and_return([1, 2])
|
45
|
-
allow(@oai_response).to receive(:resumption_token).and_return('')
|
46
|
-
allow(@harvestdor_client.oai_client).to receive(:list_records).with(an_instance_of(Hash)) {
|
47
|
-
@oai_response
|
48
|
-
}
|
49
|
-
end
|
50
|
-
it "should return OAI::Record objects" do
|
51
|
-
header1 = OAI::Header.new(nil)
|
52
|
-
header1.identifier = 'oai:searchworks.stanford.edu/druid:foo'
|
53
|
-
oai_rec1 = OAI::Record.new(nil)
|
54
|
-
oai_rec1.header = header1
|
55
|
-
header2 = OAI::Header.new(nil)
|
56
|
-
header2.identifier = 'oai:searchworks.stanford.edu/druid:bar'
|
57
|
-
oai_rec2 = OAI::Record.new(nil)
|
58
|
-
oai_rec2.header = header2
|
59
|
-
allow(@oai_response).to receive(:entries).and_return([oai_rec1, oai_rec2])
|
60
|
-
expect(@harvestdor_client.oai_records).to eql([oai_rec1, oai_rec2])
|
61
|
-
end
|
62
|
-
it "should have results viewable as an array" do
|
63
|
-
expect(@harvestdor_client.oai_records).to be_an_instance_of(Array)
|
64
|
-
end
|
65
|
-
it "should have enumerable results" do
|
66
|
-
expect(@harvestdor_client.oai_records).to respond_to(:each, :count)
|
67
|
-
end
|
68
|
-
it "should yield to a passed block" do
|
69
|
-
expect { |b| @harvestdor_client.oai_records(&b) }.to yield_successive_args(1, 2)
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
describe "oai_headers" do
|
74
|
-
before(:each) do
|
75
|
-
@oai_response = double('oai_response')
|
76
|
-
allow(@oai_response).to receive(:entries).and_return([1, 2])
|
77
|
-
allow(@oai_response).to receive(:resumption_token).and_return('')
|
78
|
-
allow(@harvestdor_client.oai_client).to receive(:list_identifiers).with(an_instance_of(Hash)) {
|
79
|
-
@oai_response
|
80
|
-
}
|
81
|
-
end
|
82
|
-
it "should return OAI::Header objects" do
|
83
|
-
header1 = OAI::Header.new(nil)
|
84
|
-
header1.identifier = 'oai:searchworks.stanford.edu/druid:foo'
|
85
|
-
header2 = OAI::Header.new(nil)
|
86
|
-
header2.identifier = 'oai:searchworks.stanford.edu/druid:bar'
|
87
|
-
allow(@oai_response).to receive(:entries).and_return([header1, header2])
|
88
|
-
expect(@harvestdor_client.oai_headers).to eql([header1, header2])
|
89
|
-
end
|
90
|
-
it "should have results viewable as an array" do
|
91
|
-
expect(@harvestdor_client.oai_headers).to be_an_instance_of(Array)
|
92
|
-
end
|
93
|
-
it "should have enumerable results" do
|
94
|
-
expect(@harvestdor_client.oai_headers).to respond_to(:each, :count)
|
95
|
-
end
|
96
|
-
it "should yield to a passed block" do
|
97
|
-
expect { |b| @harvestdor_client.oai_headers(&b) }.to yield_successive_args(1, 2)
|
98
|
-
end
|
99
|
-
end
|
100
|
-
|
101
|
-
describe "oai_record (single record request)" do
|
102
|
-
it "should return OAI::Record object" do
|
103
|
-
oai_rec = OAI::Record.new(nil)
|
104
|
-
oai_resp = double('oai_response')
|
105
|
-
allow(oai_resp).to receive(:record).and_return(oai_rec)
|
106
|
-
allow(@harvestdor_client.oai_client).to receive(:get_record){
|
107
|
-
oai_resp
|
108
|
-
}
|
109
|
-
expect(@harvestdor_client.oai_record('druid')).to eql(oai_rec)
|
110
|
-
expect(@harvestdor_client.oai_record('druid', 'mods')).to eql(oai_rec)
|
111
|
-
end
|
112
|
-
end
|
113
|
-
|
114
|
-
describe "scrub_oai_args" do
|
115
|
-
before(:all) do
|
116
|
-
@expected_oai_args = @oai_arg_defaults.dup
|
117
|
-
@expected_oai_args.each { |k, v|
|
118
|
-
@expected_oai_args.delete(k) if v.nil? || v.size == 0
|
119
|
-
}
|
120
|
-
|
121
|
-
end
|
122
|
-
it "should use client's default values for OAI arguments if they are not present in the method param hash" do
|
123
|
-
expect(@harvestdor_client.send(:scrub_oai_args)).to eql(@expected_oai_args)
|
124
|
-
end
|
125
|
-
it "should use OAI arguments from the method param hash if they are present" do
|
126
|
-
passed_options = {:metadata_prefix => 'mods', :from => '2012-11-30'}
|
127
|
-
expect(@harvestdor_client.send(:scrub_oai_args, passed_options)).to eql(@expected_oai_args.merge(passed_options))
|
128
|
-
end
|
129
|
-
it "should use nil value for option when it is passed in options hash" do
|
130
|
-
client = Harvestdor::Client.new({:default_from_date => '2012-01-01'})
|
131
|
-
expect(client.config.default_from_date).to eql('2012-01-01')
|
132
|
-
passed_options = {:from => nil}
|
133
|
-
expect(client.send(:scrub_oai_args, passed_options)[:from]).to eql(nil)
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
|
-
describe "harvest" do
|
138
|
-
it "should perform a list_records OAI request when first arg is true" do
|
139
|
-
oai_response = double('oai_response')
|
140
|
-
allow(oai_response).to receive(:entries).and_return([])
|
141
|
-
allow(@harvestdor_client.oai_client).to receive(:list_records).with(an_instance_of(Hash)) {
|
142
|
-
oai_response
|
143
|
-
}
|
144
|
-
expect(@harvestdor_client.oai_client).to receive(:list_records)
|
145
|
-
@harvestdor_client.send(:harvest, :list_records, {})
|
146
|
-
end
|
147
|
-
|
148
|
-
it "should perform a list_identifiers OAI request when first arg is false" do
|
149
|
-
oai_response = double('oai_response')
|
150
|
-
allow(oai_response).to receive(:entries).and_return([])
|
151
|
-
allow(@harvestdor_client.oai_client).to receive(:list_identifiers).with(an_instance_of(Hash)) {
|
152
|
-
oai_response
|
153
|
-
}
|
154
|
-
expect(@harvestdor_client.oai_client).to receive(:list_identifiers)
|
155
|
-
@harvestdor_client.send(:harvest, :list_identifiers, {})
|
156
|
-
end
|
157
|
-
|
158
|
-
it "should use passed OAI arguments" do
|
159
|
-
oai_response = double('oai_response')
|
160
|
-
allow(oai_response).to receive(:entries).and_return([])
|
161
|
-
allow(@harvestdor_client.oai_client).to receive(:list_identifiers).with(an_instance_of(Hash)) {
|
162
|
-
oai_response
|
163
|
-
}
|
164
|
-
oai_options_hash = {:metadata_prefix => 'mods', :from => '2012-11-30'}
|
165
|
-
expect(@harvestdor_client.oai_client).to receive(:list_identifiers).with(oai_options_hash)
|
166
|
-
@harvestdor_client.send(:harvest, :list_identifiers, oai_options_hash)
|
167
|
-
end
|
168
|
-
|
169
|
-
it "should yield to a passed block" do
|
170
|
-
oai_response = double('oai_response')
|
171
|
-
allow(oai_response).to receive(:entries).and_return([1, 2])
|
172
|
-
allow(oai_response).to receive(:resumption_token).and_return('')
|
173
|
-
allow(@harvestdor_client.oai_client).to receive(:list_records).with(an_instance_of(Hash)) {
|
174
|
-
oai_response
|
175
|
-
}
|
176
|
-
expect { |b| @harvestdor_client.send(:harvest, :list_records, {}, &b) }.to yield_successive_args(1, 2)
|
177
|
-
end
|
178
|
-
|
179
|
-
context "resumption tokens" do
|
180
|
-
it "should stop processing when no records/headers are received" do
|
181
|
-
oai_response = double('oai_response')
|
182
|
-
allow(oai_response).to receive(:entries).and_return([])
|
183
|
-
allow(@harvestdor_client.oai_client).to receive(:list_records).with(an_instance_of(Hash)) {
|
184
|
-
oai_response
|
185
|
-
}
|
186
|
-
|
187
|
-
i = 0
|
188
|
-
@harvestdor_client.send(:harvest, :list_records, {}) { |record| i += 1 }
|
189
|
-
expect(i).to eql(0)
|
190
|
-
end
|
191
|
-
|
192
|
-
it "should stop processing when the resumption token is empty" do
|
193
|
-
oai_response_with_token = double('oai_response')
|
194
|
-
allow(oai_response_with_token).to receive(:entries).and_return([1,2,3,4,5])
|
195
|
-
allow(oai_response_with_token).to receive(:resumption_token).and_return('')
|
196
|
-
allow(@harvestdor_client.oai_client).to receive(:list_records).with(an_instance_of(Hash)) {
|
197
|
-
oai_response_with_token
|
198
|
-
}
|
199
|
-
|
200
|
-
i = 0
|
201
|
-
@harvestdor_client.send(:harvest, :list_records, {}) { |record| i += 1 }
|
202
|
-
expect(i).to eql(5)
|
203
|
-
end
|
204
|
-
|
205
|
-
it "should stop processing when there was no resumption token" do
|
206
|
-
oai_response_with_token = double('oai_response')
|
207
|
-
allow(oai_response_with_token).to receive(:entries).and_return([1,2,3,4,5])
|
208
|
-
allow(oai_response_with_token).to receive(:resumption_token).and_return(nil)
|
209
|
-
allow(@harvestdor_client.oai_client).to receive(:list_records).with(an_instance_of(Hash)) {
|
210
|
-
oai_response_with_token
|
211
|
-
}
|
212
|
-
|
213
|
-
i = 0
|
214
|
-
@harvestdor_client.send(:harvest, :list_records, {}) { |record| i += 1 }
|
215
|
-
expect(i).to eql(5)
|
216
|
-
end
|
217
|
-
end # resumption tokens
|
218
|
-
end
|
219
|
-
|
220
|
-
end
|
@@ -1,139 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
require 'spec_helper'
|
3
|
-
|
4
|
-
describe 'Harvestdor::Client OAI Harvesting Integration Tests', :integration => true do
|
5
|
-
|
6
|
-
before(:all) do
|
7
|
-
@config_yml_path = File.join(File.dirname(__FILE__), "config", "oai.yml")
|
8
|
-
end
|
9
|
-
|
10
|
-
context "test OAI server" do
|
11
|
-
before(:all) do
|
12
|
-
@test_hclient ||= Harvestdor::Client.new({:config_yml_path => @config_yml_path, :oai_client_debug => 'true', :oai_repository_url => 'https://dor-oaiprovider-test.stanford.edu/oai'})
|
13
|
-
end
|
14
|
-
context "withOUT resumption tokens" do
|
15
|
-
before(:all) do
|
16
|
-
@oai_args = {:metadata_prefix => 'mods', :from => nil, :until => nil, :set => 'is_governed_by_hy787xj5878'}
|
17
|
-
end
|
18
|
-
it "should be able to harvest headers" do
|
19
|
-
VCR.use_cassette('headers') do
|
20
|
-
headers = @test_hclient.oai_headers(@oai_args)
|
21
|
-
expect(headers).to be_an_instance_of(Array)
|
22
|
-
expect(headers.size).to be > 0
|
23
|
-
expect(headers.size).to be < 50 # no resumption token
|
24
|
-
expect(headers.first).to be_an_instance_of(OAI::Header)
|
25
|
-
end
|
26
|
-
end
|
27
|
-
it "should be able to harvest records" do
|
28
|
-
VCR.use_cassette('records') do
|
29
|
-
records = @test_hclient.oai_records(@oai_args)
|
30
|
-
expect(records).to be_an_instance_of(Array)
|
31
|
-
expect(records.size).to be > 0
|
32
|
-
expect(records.size).to be < 50 # no resumption token
|
33
|
-
expect(records.first).to be_an_instance_of(OAI::Record)
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
context "with resumption tokens" do
|
38
|
-
before(:all) do
|
39
|
-
@oai_args = {:metadata_prefix => 'mods', :from => nil, :until => nil, :set => 'is_member_of_kh678dr8608'}
|
40
|
-
end
|
41
|
-
it "should be able to harvest headers" do
|
42
|
-
skip "need to find small set > 50 on test"
|
43
|
-
headers = @test_hclient.oai_headers(@oai_args)
|
44
|
-
expect(headers).to be_an_instance_of(Array)
|
45
|
-
expect(headers.size).to be > 50
|
46
|
-
expect(headers.first).to be_an_instance_of(OAI::Header)
|
47
|
-
end
|
48
|
-
it "should be able to harvest records" do
|
49
|
-
skip "need to find small set > 50 on test"
|
50
|
-
records = @test_hclient.harvest_records(@oai_args)
|
51
|
-
expect(records).to be_an_instance_of(Array)
|
52
|
-
expect(records.size).to be > 50
|
53
|
-
expect(records.first).to be_an_instance_of(OAI::Record)
|
54
|
-
end
|
55
|
-
end
|
56
|
-
context "oai_record (single record request)" do
|
57
|
-
before(:all) do
|
58
|
-
VCR.use_cassette('jt959wc5586_test') do
|
59
|
-
@rec = @test_hclient.oai_record('jt959wc5586')
|
60
|
-
end
|
61
|
-
end
|
62
|
-
it "should get a single OAI::Record object" do
|
63
|
-
expect(@rec).to be_an_instance_of(OAI::Record)
|
64
|
-
end
|
65
|
-
it "should keep utf-8 encoded characters intact" do
|
66
|
-
xml = Nokogiri::XML(@rec.metadata.to_s)
|
67
|
-
xml.remove_namespaces!
|
68
|
-
expect(xml.root.xpath('/metadata/mods/titleInfo/subTitle').text).to match /^recueil complet des débats législatifs & politiques des chambres françaises/
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
context "production OAI server" do
|
74
|
-
before(:all) do
|
75
|
-
@prod_hclient ||= Harvestdor::Client.new({:config_yml_path => @config_yml_path, :oai_repository_url => 'https://dor-oaiprovider-prod.stanford.edu/oai'})
|
76
|
-
end
|
77
|
-
context "withOUT resumption tokens" do
|
78
|
-
before(:all) do
|
79
|
-
# Reid-Dennis: 47 objects
|
80
|
-
@oai_args = {:metadata_prefix => 'mods', :from => nil, :until => nil, :set => 'is_governed_by_sd064kn5856'}
|
81
|
-
end
|
82
|
-
it "should be able to harvest headers" do
|
83
|
-
VCR.use_cassette('prod_headers') do
|
84
|
-
headers = @prod_hclient.oai_headers(@oai_args)
|
85
|
-
expect(headers).to be_an_instance_of(Array)
|
86
|
-
expect(headers.size).to be > 0
|
87
|
-
expect(headers.size).to be < 50 # no resumption token
|
88
|
-
expect(headers.first).to be_an_instance_of(OAI::Header)
|
89
|
-
end
|
90
|
-
end
|
91
|
-
it "should be able to harvest records" do
|
92
|
-
VCR.use_cassette('prod_records') do
|
93
|
-
records = @prod_hclient.oai_records(@oai_args)
|
94
|
-
expect(records).to be_an_instance_of(Array)
|
95
|
-
expect(records.size).to be > 0
|
96
|
-
expect(records.size).to be < 50 # no resumption token
|
97
|
-
expect(records.first).to be_an_instance_of(OAI::Record)
|
98
|
-
end
|
99
|
-
end
|
100
|
-
end
|
101
|
-
context "with resumption tokens" do
|
102
|
-
before(:all) do
|
103
|
-
# Archives Parlementaires - 8x objects
|
104
|
-
@oai_args = {:metadata_prefix => 'mods', :from => nil, :until => nil, :set => 'is_member_of_collection_jh957jy1101'}
|
105
|
-
end
|
106
|
-
it "should be able to harvest headers" do
|
107
|
-
VCR.use_cassette('headers_with_resumption') do
|
108
|
-
headers = @prod_hclient.oai_headers(@oai_args)
|
109
|
-
expect(headers).to be_an_instance_of(Array)
|
110
|
-
expect(headers.size).to be > 50
|
111
|
-
expect(headers.first).to be_an_instance_of(OAI::Header)
|
112
|
-
end
|
113
|
-
end
|
114
|
-
it "should be able to harvest records" do
|
115
|
-
skip "the request always seems to time out"
|
116
|
-
records = @prod_hclient.oai_records(@oai_args)
|
117
|
-
expect(records).to be_an_instance_of(Array)
|
118
|
-
expect(records.size).to be > 50
|
119
|
-
expect(records.first).to be_an_instance_of(OAI::Record)
|
120
|
-
end
|
121
|
-
end
|
122
|
-
context "oai_record (single record request)" do
|
123
|
-
before(:all) do
|
124
|
-
VCR.use_cassette('jt959wc5586_prod') do
|
125
|
-
@rec = @prod_hclient.oai_record('jt959wc5586')
|
126
|
-
end
|
127
|
-
end
|
128
|
-
it "should get a single OAI::Record object" do
|
129
|
-
expect(@rec).to be_an_instance_of(OAI::Record)
|
130
|
-
end
|
131
|
-
it "should keep utf-8 encoded characters intact" do
|
132
|
-
xml = Nokogiri::XML(@rec.metadata.to_s)
|
133
|
-
xml.remove_namespaces!
|
134
|
-
expect(xml.root.xpath('/metadata/mods/titleInfo/subTitle').text).to match /^recueil complet des débats législatifs & politiques des chambres françaises/
|
135
|
-
end
|
136
|
-
end
|
137
|
-
end
|
138
|
-
|
139
|
-
end
|