harvestdor 0.0.14 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.hound.yml +2 -0
- data/.rubocop.yml +3 -0
- data/.rubocop_todo.yml +249 -0
- data/.travis.yml +3 -1
- data/Gemfile +0 -4
- data/README.rdoc +13 -58
- data/Rakefile +10 -6
- data/harvestdor.gemspec +9 -14
- data/lib/harvestdor/client.rb +110 -0
- data/lib/harvestdor/errors.rb +8 -8
- data/lib/harvestdor/purl_xml.rb +13 -72
- data/lib/harvestdor/version.rb +1 -1
- data/lib/harvestdor.rb +10 -106
- data/spec/config/example.yml +16 -0
- data/spec/harvestdor_client_spec.rb +7 -65
- data/spec/purl_xml_spec.rb +16 -16
- metadata +53 -29
- data/lib/harvestdor/oai_harvest.rb +0 -115
- data/spec/config/oai.yml +0 -37
- data/spec/harvestdor_spec.rb +0 -23
- data/spec/oai_harvest_spec.rb +0 -220
- data/spec/oai_integration_spec.rb +0 -139
metadata
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: harvestdor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naomi Dushay
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-10-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: oai
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: 0.3.0
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - "~>"
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: 0.3.0
|
27
13
|
- !ruby/object:Gem::Dependency
|
28
14
|
name: faraday
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -122,6 +108,48 @@ dependencies:
|
|
122
108
|
- - ">="
|
123
109
|
- !ruby/object:Gem::Version
|
124
110
|
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: coveralls
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: rubocop
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: rubocop-rspec
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - ">="
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
125
153
|
- !ruby/object:Gem::Dependency
|
126
154
|
name: vcr
|
127
155
|
requirement: !ruby/object:Gem::Requirement
|
@@ -150,8 +178,7 @@ dependencies:
|
|
150
178
|
- - ">="
|
151
179
|
- !ruby/object:Gem::Version
|
152
180
|
version: '0'
|
153
|
-
description: Harvest DOR object metadata
|
154
|
-
rdf:resource="info:fedora/druid:hy787xj5878") and dates
|
181
|
+
description: Harvest DOR object metadata from a Stanford public purl page
|
155
182
|
email:
|
156
183
|
- ndushay@stanford.edu
|
157
184
|
executables: []
|
@@ -160,6 +187,9 @@ extra_rdoc_files: []
|
|
160
187
|
files:
|
161
188
|
- ".coveralls.yml"
|
162
189
|
- ".gitignore"
|
190
|
+
- ".hound.yml"
|
191
|
+
- ".rubocop.yml"
|
192
|
+
- ".rubocop_todo.yml"
|
163
193
|
- ".travis.yml"
|
164
194
|
- ".yardopts"
|
165
195
|
- Gemfile
|
@@ -168,11 +198,11 @@ files:
|
|
168
198
|
- Rakefile
|
169
199
|
- harvestdor.gemspec
|
170
200
|
- lib/harvestdor.rb
|
201
|
+
- lib/harvestdor/client.rb
|
171
202
|
- lib/harvestdor/errors.rb
|
172
|
-
- lib/harvestdor/oai_harvest.rb
|
173
203
|
- lib/harvestdor/purl_xml.rb
|
174
204
|
- lib/harvestdor/version.rb
|
175
|
-
- spec/config/
|
205
|
+
- spec/config/example.yml
|
176
206
|
- spec/fixtures/vcr_cassettes/content_metadata.yml
|
177
207
|
- spec/fixtures/vcr_cassettes/dc.yml
|
178
208
|
- spec/fixtures/vcr_cassettes/headers.yml
|
@@ -189,12 +219,9 @@ files:
|
|
189
219
|
- spec/fixtures/vcr_cassettes/records.yml
|
190
220
|
- spec/fixtures/vcr_cassettes/rights_metadata.yml
|
191
221
|
- spec/harvestdor_client_spec.rb
|
192
|
-
- spec/harvestdor_spec.rb
|
193
|
-
- spec/oai_harvest_spec.rb
|
194
|
-
- spec/oai_integration_spec.rb
|
195
222
|
- spec/purl_xml_spec.rb
|
196
223
|
- spec/spec_helper.rb
|
197
|
-
homepage:
|
224
|
+
homepage: ''
|
198
225
|
licenses: []
|
199
226
|
metadata: {}
|
200
227
|
post_install_message:
|
@@ -213,12 +240,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
213
240
|
version: '0'
|
214
241
|
requirements: []
|
215
242
|
rubyforge_project:
|
216
|
-
rubygems_version: 2.
|
243
|
+
rubygems_version: 2.4.3
|
217
244
|
signing_key:
|
218
245
|
specification_version: 4
|
219
246
|
summary: Harvest DOR object metadata
|
220
247
|
test_files:
|
221
|
-
- spec/config/
|
248
|
+
- spec/config/example.yml
|
222
249
|
- spec/fixtures/vcr_cassettes/content_metadata.yml
|
223
250
|
- spec/fixtures/vcr_cassettes/dc.yml
|
224
251
|
- spec/fixtures/vcr_cassettes/headers.yml
|
@@ -235,9 +262,6 @@ test_files:
|
|
235
262
|
- spec/fixtures/vcr_cassettes/records.yml
|
236
263
|
- spec/fixtures/vcr_cassettes/rights_metadata.yml
|
237
264
|
- spec/harvestdor_client_spec.rb
|
238
|
-
- spec/harvestdor_spec.rb
|
239
|
-
- spec/oai_harvest_spec.rb
|
240
|
-
- spec/oai_integration_spec.rb
|
241
265
|
- spec/purl_xml_spec.rb
|
242
266
|
- spec/spec_helper.rb
|
243
267
|
has_rdoc:
|
@@ -1,115 +0,0 @@
|
|
1
|
-
require 'oai'
|
2
|
-
|
3
|
-
module Harvestdor
|
4
|
-
|
5
|
-
# Mixin: methods to perform an OAI harvest and iterate over results
|
6
|
-
class Client
|
7
|
-
|
8
|
-
# return Array of OAI::Records from the OAI harvest indicated by OAI params (metadata_prefix, from, until, set)
|
9
|
-
# @param [Hash] oai_args optional OAI params (:metadata_prefix, :from, :until, :set) to be used in lieu of config default values
|
10
|
-
# @return [Array<OAI::Record>] or enumeration over it, if block is given
|
11
|
-
def oai_records oai_args = {}
|
12
|
-
return to_enum(:oai_records, oai_args).to_a unless block_given?
|
13
|
-
|
14
|
-
harvest(:list_records, scrub_oai_args(oai_args)) do |oai_rec|
|
15
|
-
yield oai_rec
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
# return Array of OAI::Headers from the OAI harvest indicated by OAI params (metadata_prefix, from, until, set)
|
20
|
-
# @param [Hash] oai_args optional OAI params (:metadata_prefix, :from, :until, :set) to be used in lieu of config default values
|
21
|
-
# @return [Array<OAI::Header>] or enumeration over it, if block is given
|
22
|
-
def oai_headers oai_args = {}
|
23
|
-
return to_enum(:oai_headers, oai_args).to_a unless block_given?
|
24
|
-
|
25
|
-
harvest(:list_identifiers, scrub_oai_args(oai_args)) do |oai_hdr|
|
26
|
-
yield oai_hdr
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
# return Array of druids contained in the OAI harvest indicated by OAI params (metadata_prefix, from, until, set)
|
31
|
-
# @param [Hash] oai_args optional OAI params (:metadata_prefix, :from, :until, :set) to be used in lieu of config default values
|
32
|
-
# @return [Array<String>] or enumeration over it, if block is given
|
33
|
-
def druids_via_oai oai_args = {}
|
34
|
-
return to_enum(:druids_via_oai, oai_args).to_a unless block_given?
|
35
|
-
|
36
|
-
harvest(:list_identifiers, scrub_oai_args(oai_args)) do |oai_hdr|
|
37
|
-
yield Harvestdor.druid(oai_hdr)
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
# get a single OAI record using a get_record OAI request
|
42
|
-
# @param [String] druid (which will be turned into OAI identifier)
|
43
|
-
# @param [String] md_prefix the OAI metadata prefix determining which metadata will be in the retrieved OAI::Record object
|
44
|
-
# @return [OAI::Record] record object retrieved from OAI server
|
45
|
-
def oai_record druid, md_prefix = 'mods'
|
46
|
-
prefix = md_prefix ? md_prefix : config.default_metadata_prefix
|
47
|
-
oai_client.get_record({:identifier => "oai:searchworks.stanford.edu/druid:#{druid}", :metadata_prefix => prefix}).record
|
48
|
-
end
|
49
|
-
|
50
|
-
protected #---------------------------------------------------------------------
|
51
|
-
|
52
|
-
# @param [Hash] oai_args Hash of OAI params (metadata_prefix, from, until, set) to be used in lieu of config default values
|
53
|
-
# @return [Hash] OAI params (metadata_prefix, from, until, set) cleaned up for making harvest request
|
54
|
-
def scrub_oai_args oai_args = {}
|
55
|
-
scrubbed_args={}
|
56
|
-
scrubbed_args[:metadata_prefix] = oai_args.keys.include?(:metadata_prefix) ? oai_args[:metadata_prefix] : config.default_metadata_prefix
|
57
|
-
scrubbed_args[:from] = oai_args.keys.include?(:from) ? oai_args[:from] : config.default_from_date
|
58
|
-
scrubbed_args[:until] = oai_args.keys.include?(:until) ? oai_args[:until] : config.default_until_date
|
59
|
-
scrubbed_args[:set] = oai_args.keys.include?(:set) ? oai_args[:set] : config.default_set
|
60
|
-
scrubbed_args.each { |k, v|
|
61
|
-
scrubbed_args.delete(k) if v.nil? || v == ''
|
62
|
-
}
|
63
|
-
scrubbed_args
|
64
|
-
end
|
65
|
-
|
66
|
-
# harvest OAI headers or OAI records and return a response object with one entry for each record/header retrieved
|
67
|
-
# follows resumption tokens (i.e. chunks are all present in result)
|
68
|
-
# @param [Symbol] verb :list_identifiers or :list_records
|
69
|
-
# @param [Hash] oai_args OAI params (metadata_prefix, from, until, set) used for request
|
70
|
-
# @return response to OAI request, as one enumerable object
|
71
|
-
# TODO: This could be moved into ruby-oai?
|
72
|
-
def harvest (verb, oai_args, &block)
|
73
|
-
response = oai_client.send verb, oai_args
|
74
|
-
while response && response.entries.size > 0
|
75
|
-
response.entries.each &block
|
76
|
-
|
77
|
-
token = response.resumption_token
|
78
|
-
if token.nil? or token.empty?
|
79
|
-
break
|
80
|
-
else
|
81
|
-
response = oai_client.send(verb, :resumption_token => token)
|
82
|
-
end
|
83
|
-
end
|
84
|
-
rescue Faraday::Error::TimeoutError => e
|
85
|
-
logger.error "No response from OAI Provider"
|
86
|
-
logger.error e
|
87
|
-
raise e
|
88
|
-
rescue OAI::Exception => e
|
89
|
-
# possibly unnecessary after ruby-oai 0.0.14
|
90
|
-
logger.error "Received unexpected OAI::Exception"
|
91
|
-
logger.error e
|
92
|
-
raise e
|
93
|
-
end
|
94
|
-
|
95
|
-
end # class OaiHarvester
|
96
|
-
|
97
|
-
end # module Harvestdor
|
98
|
-
|
99
|
-
module OAI
|
100
|
-
class Client
|
101
|
-
# monkey patch to adjust timeouts
|
102
|
-
# Do the actual HTTP get, following any temporary redirects
|
103
|
-
def get(uri)
|
104
|
-
# OLD: response = @http_client.get uri
|
105
|
-
response = @http_client.get do |req|
|
106
|
-
req.url uri
|
107
|
-
# FIXME: hard-coded default settings in harvestdor are used here
|
108
|
-
# values are in seconds
|
109
|
-
req.options[:timeout] = Harvestdor::Client.default_config.http_options.timeout # open/read timeout
|
110
|
-
req.options[:open_timeout] = Harvestdor::Client.default_config.http_options.open_timeout # connection open timeout
|
111
|
-
end
|
112
|
-
response.body
|
113
|
-
end
|
114
|
-
end
|
115
|
-
end
|
data/spec/config/oai.yml
DELETED
@@ -1,37 +0,0 @@
|
|
1
|
-
# log_dir: directory for log file (default logs, relative to harvestdor gem path)
|
2
|
-
log_dir: spec/test_logs
|
3
|
-
|
4
|
-
# log_name: name of log file (default: harvestdor.log)
|
5
|
-
|
6
|
-
# purl: url for the DOR purl server (used to get ContentMetadata, etc.)
|
7
|
-
# purl: http://purl-test.stanford.edu
|
8
|
-
|
9
|
-
# ---------- OAI harvesting parameters -----------
|
10
|
-
|
11
|
-
# oai_client_debug: true for OAI::Client debug mode (default: false)
|
12
|
-
|
13
|
-
# oai_repository_url: URL of the OAI data provider
|
14
|
-
oai_repository_url: https://dor-oaiprovider-test.stanford.edu/oai
|
15
|
-
|
16
|
-
# default_metadata_prefix: default metadata prefix to be used for harvesting (default: mods)
|
17
|
-
# can be overridden on calls to harvest_ids and harvest_records
|
18
|
-
default_metadata_prefix: mods
|
19
|
-
|
20
|
-
# default_from_date: default from date for harvest (default: nil)
|
21
|
-
# can be overridden on calls to harvest_ids and harvest_records
|
22
|
-
default_from_date: '2012-11-01'
|
23
|
-
|
24
|
-
# default_until_date: default until date for harvest (default: nil)
|
25
|
-
# can be overridden on calls to harvest_ids and harvest_records
|
26
|
-
|
27
|
-
# default_set: default set for harvest (default: nil)
|
28
|
-
# can be overridden on calls to harvest_ids and harvest_records
|
29
|
-
|
30
|
-
# Additional options to pass to Faraday http client (https://github.com/technoweenie/faraday)
|
31
|
-
# timeouts are in seconds; timeout -> open/read, open_timeout -> connection open
|
32
|
-
http_options:
|
33
|
-
ssl:
|
34
|
-
verify: false
|
35
|
-
request:
|
36
|
-
timeout: 121
|
37
|
-
open_timeout: 122
|
data/spec/harvestdor_spec.rb
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
require "spec_helper"
|
2
|
-
|
3
|
-
describe Harvestdor do
|
4
|
-
|
5
|
-
context "#druid" do
|
6
|
-
it "should return the druid part of an oai identifier" do
|
7
|
-
expect(Harvestdor.druid('oai:searchworks.stanford.edu/druid:foo')).to eql('foo')
|
8
|
-
end
|
9
|
-
it "should work with OAI::Header as argument" do
|
10
|
-
header = OAI::Header.new(nil)
|
11
|
-
header.identifier = 'oai:searchworks.stanford.edu/druid:foo'
|
12
|
-
expect(Harvestdor.druid(header)).to eql('foo')
|
13
|
-
end
|
14
|
-
it "should work with OAI::Record as argument" do
|
15
|
-
oai_rec = OAI::Record.new(nil)
|
16
|
-
header = OAI::Header.new(nil)
|
17
|
-
header.identifier = 'oai:searchworks.stanford.edu/druid:foo'
|
18
|
-
oai_rec.header = header
|
19
|
-
expect(Harvestdor.druid(oai_rec)).to eql('foo')
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
end
|
data/spec/oai_harvest_spec.rb
DELETED
@@ -1,220 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe 'Harvestdor::Client oai harvesting' do
|
4
|
-
before(:all) do
|
5
|
-
@harvestdor_client = Harvestdor::Client.new
|
6
|
-
@oai_arg_defaults = {:metadata_prefix => @harvestdor_client.config.default_metadata_prefix,
|
7
|
-
:from => @harvestdor_client.config.default_from_date,
|
8
|
-
:until => @harvestdor_client.config.default_until_date,
|
9
|
-
:set => @harvestdor_client.config.default_set }
|
10
|
-
end
|
11
|
-
|
12
|
-
describe "druids_via_oai" do
|
13
|
-
before(:each) do
|
14
|
-
oai_response = double('oai_response')
|
15
|
-
allow(oai_response).to receive(:entries).and_return(['foo', 'bar'])
|
16
|
-
allow(oai_response).to receive(:resumption_token).and_return('')
|
17
|
-
allow(@harvestdor_client.oai_client).to receive(:list_identifiers).with(an_instance_of(Hash)) {
|
18
|
-
oai_response
|
19
|
-
}
|
20
|
-
end
|
21
|
-
it "should return druids" do
|
22
|
-
header1 = OAI::Header.new(nil)
|
23
|
-
header1.identifier = 'oai:searchworks.stanford.edu/druid:foo'
|
24
|
-
header2 = OAI::Header.new(nil)
|
25
|
-
header2.identifier = 'oai:searchworks.stanford.edu/druid:bar'
|
26
|
-
oai_response = double('oai_response')
|
27
|
-
allow(oai_response).to receive(:entries).and_return([header1, header2])
|
28
|
-
expect(@harvestdor_client.druids_via_oai).to eql(['foo', 'bar'])
|
29
|
-
end
|
30
|
-
it "should have results viewable as an array" do
|
31
|
-
expect(@harvestdor_client.druids_via_oai).to be_an_instance_of(Array)
|
32
|
-
end
|
33
|
-
it "should have enumerable results" do
|
34
|
-
expect(@harvestdor_client.druids_via_oai).to respond_to(:each, :count)
|
35
|
-
end
|
36
|
-
it "should yield to a passed block" do
|
37
|
-
expect { |b| @harvestdor_client.druids_via_oai(&b) }.to yield_successive_args('foo', 'bar')
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
describe "oai_records" do
|
42
|
-
before(:each) do
|
43
|
-
@oai_response = double('oai_response')
|
44
|
-
allow(@oai_response).to receive(:entries).and_return([1, 2])
|
45
|
-
allow(@oai_response).to receive(:resumption_token).and_return('')
|
46
|
-
allow(@harvestdor_client.oai_client).to receive(:list_records).with(an_instance_of(Hash)) {
|
47
|
-
@oai_response
|
48
|
-
}
|
49
|
-
end
|
50
|
-
it "should return OAI::Record objects" do
|
51
|
-
header1 = OAI::Header.new(nil)
|
52
|
-
header1.identifier = 'oai:searchworks.stanford.edu/druid:foo'
|
53
|
-
oai_rec1 = OAI::Record.new(nil)
|
54
|
-
oai_rec1.header = header1
|
55
|
-
header2 = OAI::Header.new(nil)
|
56
|
-
header2.identifier = 'oai:searchworks.stanford.edu/druid:bar'
|
57
|
-
oai_rec2 = OAI::Record.new(nil)
|
58
|
-
oai_rec2.header = header2
|
59
|
-
allow(@oai_response).to receive(:entries).and_return([oai_rec1, oai_rec2])
|
60
|
-
expect(@harvestdor_client.oai_records).to eql([oai_rec1, oai_rec2])
|
61
|
-
end
|
62
|
-
it "should have results viewable as an array" do
|
63
|
-
expect(@harvestdor_client.oai_records).to be_an_instance_of(Array)
|
64
|
-
end
|
65
|
-
it "should have enumerable results" do
|
66
|
-
expect(@harvestdor_client.oai_records).to respond_to(:each, :count)
|
67
|
-
end
|
68
|
-
it "should yield to a passed block" do
|
69
|
-
expect { |b| @harvestdor_client.oai_records(&b) }.to yield_successive_args(1, 2)
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
describe "oai_headers" do
|
74
|
-
before(:each) do
|
75
|
-
@oai_response = double('oai_response')
|
76
|
-
allow(@oai_response).to receive(:entries).and_return([1, 2])
|
77
|
-
allow(@oai_response).to receive(:resumption_token).and_return('')
|
78
|
-
allow(@harvestdor_client.oai_client).to receive(:list_identifiers).with(an_instance_of(Hash)) {
|
79
|
-
@oai_response
|
80
|
-
}
|
81
|
-
end
|
82
|
-
it "should return OAI::Header objects" do
|
83
|
-
header1 = OAI::Header.new(nil)
|
84
|
-
header1.identifier = 'oai:searchworks.stanford.edu/druid:foo'
|
85
|
-
header2 = OAI::Header.new(nil)
|
86
|
-
header2.identifier = 'oai:searchworks.stanford.edu/druid:bar'
|
87
|
-
allow(@oai_response).to receive(:entries).and_return([header1, header2])
|
88
|
-
expect(@harvestdor_client.oai_headers).to eql([header1, header2])
|
89
|
-
end
|
90
|
-
it "should have results viewable as an array" do
|
91
|
-
expect(@harvestdor_client.oai_headers).to be_an_instance_of(Array)
|
92
|
-
end
|
93
|
-
it "should have enumerable results" do
|
94
|
-
expect(@harvestdor_client.oai_headers).to respond_to(:each, :count)
|
95
|
-
end
|
96
|
-
it "should yield to a passed block" do
|
97
|
-
expect { |b| @harvestdor_client.oai_headers(&b) }.to yield_successive_args(1, 2)
|
98
|
-
end
|
99
|
-
end
|
100
|
-
|
101
|
-
describe "oai_record (single record request)" do
|
102
|
-
it "should return OAI::Record object" do
|
103
|
-
oai_rec = OAI::Record.new(nil)
|
104
|
-
oai_resp = double('oai_response')
|
105
|
-
allow(oai_resp).to receive(:record).and_return(oai_rec)
|
106
|
-
allow(@harvestdor_client.oai_client).to receive(:get_record){
|
107
|
-
oai_resp
|
108
|
-
}
|
109
|
-
expect(@harvestdor_client.oai_record('druid')).to eql(oai_rec)
|
110
|
-
expect(@harvestdor_client.oai_record('druid', 'mods')).to eql(oai_rec)
|
111
|
-
end
|
112
|
-
end
|
113
|
-
|
114
|
-
describe "scrub_oai_args" do
|
115
|
-
before(:all) do
|
116
|
-
@expected_oai_args = @oai_arg_defaults.dup
|
117
|
-
@expected_oai_args.each { |k, v|
|
118
|
-
@expected_oai_args.delete(k) if v.nil? || v.size == 0
|
119
|
-
}
|
120
|
-
|
121
|
-
end
|
122
|
-
it "should use client's default values for OAI arguments if they are not present in the method param hash" do
|
123
|
-
expect(@harvestdor_client.send(:scrub_oai_args)).to eql(@expected_oai_args)
|
124
|
-
end
|
125
|
-
it "should use OAI arguments from the method param hash if they are present" do
|
126
|
-
passed_options = {:metadata_prefix => 'mods', :from => '2012-11-30'}
|
127
|
-
expect(@harvestdor_client.send(:scrub_oai_args, passed_options)).to eql(@expected_oai_args.merge(passed_options))
|
128
|
-
end
|
129
|
-
it "should use nil value for option when it is passed in options hash" do
|
130
|
-
client = Harvestdor::Client.new({:default_from_date => '2012-01-01'})
|
131
|
-
expect(client.config.default_from_date).to eql('2012-01-01')
|
132
|
-
passed_options = {:from => nil}
|
133
|
-
expect(client.send(:scrub_oai_args, passed_options)[:from]).to eql(nil)
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
|
-
describe "harvest" do
|
138
|
-
it "should perform a list_records OAI request when first arg is true" do
|
139
|
-
oai_response = double('oai_response')
|
140
|
-
allow(oai_response).to receive(:entries).and_return([])
|
141
|
-
allow(@harvestdor_client.oai_client).to receive(:list_records).with(an_instance_of(Hash)) {
|
142
|
-
oai_response
|
143
|
-
}
|
144
|
-
expect(@harvestdor_client.oai_client).to receive(:list_records)
|
145
|
-
@harvestdor_client.send(:harvest, :list_records, {})
|
146
|
-
end
|
147
|
-
|
148
|
-
it "should perform a list_identifiers OAI request when first arg is false" do
|
149
|
-
oai_response = double('oai_response')
|
150
|
-
allow(oai_response).to receive(:entries).and_return([])
|
151
|
-
allow(@harvestdor_client.oai_client).to receive(:list_identifiers).with(an_instance_of(Hash)) {
|
152
|
-
oai_response
|
153
|
-
}
|
154
|
-
expect(@harvestdor_client.oai_client).to receive(:list_identifiers)
|
155
|
-
@harvestdor_client.send(:harvest, :list_identifiers, {})
|
156
|
-
end
|
157
|
-
|
158
|
-
it "should use passed OAI arguments" do
|
159
|
-
oai_response = double('oai_response')
|
160
|
-
allow(oai_response).to receive(:entries).and_return([])
|
161
|
-
allow(@harvestdor_client.oai_client).to receive(:list_identifiers).with(an_instance_of(Hash)) {
|
162
|
-
oai_response
|
163
|
-
}
|
164
|
-
oai_options_hash = {:metadata_prefix => 'mods', :from => '2012-11-30'}
|
165
|
-
expect(@harvestdor_client.oai_client).to receive(:list_identifiers).with(oai_options_hash)
|
166
|
-
@harvestdor_client.send(:harvest, :list_identifiers, oai_options_hash)
|
167
|
-
end
|
168
|
-
|
169
|
-
it "should yield to a passed block" do
|
170
|
-
oai_response = double('oai_response')
|
171
|
-
allow(oai_response).to receive(:entries).and_return([1, 2])
|
172
|
-
allow(oai_response).to receive(:resumption_token).and_return('')
|
173
|
-
allow(@harvestdor_client.oai_client).to receive(:list_records).with(an_instance_of(Hash)) {
|
174
|
-
oai_response
|
175
|
-
}
|
176
|
-
expect { |b| @harvestdor_client.send(:harvest, :list_records, {}, &b) }.to yield_successive_args(1, 2)
|
177
|
-
end
|
178
|
-
|
179
|
-
context "resumption tokens" do
|
180
|
-
it "should stop processing when no records/headers are received" do
|
181
|
-
oai_response = double('oai_response')
|
182
|
-
allow(oai_response).to receive(:entries).and_return([])
|
183
|
-
allow(@harvestdor_client.oai_client).to receive(:list_records).with(an_instance_of(Hash)) {
|
184
|
-
oai_response
|
185
|
-
}
|
186
|
-
|
187
|
-
i = 0
|
188
|
-
@harvestdor_client.send(:harvest, :list_records, {}) { |record| i += 1 }
|
189
|
-
expect(i).to eql(0)
|
190
|
-
end
|
191
|
-
|
192
|
-
it "should stop processing when the resumption token is empty" do
|
193
|
-
oai_response_with_token = double('oai_response')
|
194
|
-
allow(oai_response_with_token).to receive(:entries).and_return([1,2,3,4,5])
|
195
|
-
allow(oai_response_with_token).to receive(:resumption_token).and_return('')
|
196
|
-
allow(@harvestdor_client.oai_client).to receive(:list_records).with(an_instance_of(Hash)) {
|
197
|
-
oai_response_with_token
|
198
|
-
}
|
199
|
-
|
200
|
-
i = 0
|
201
|
-
@harvestdor_client.send(:harvest, :list_records, {}) { |record| i += 1 }
|
202
|
-
expect(i).to eql(5)
|
203
|
-
end
|
204
|
-
|
205
|
-
it "should stop processing when there was no resumption token" do
|
206
|
-
oai_response_with_token = double('oai_response')
|
207
|
-
allow(oai_response_with_token).to receive(:entries).and_return([1,2,3,4,5])
|
208
|
-
allow(oai_response_with_token).to receive(:resumption_token).and_return(nil)
|
209
|
-
allow(@harvestdor_client.oai_client).to receive(:list_records).with(an_instance_of(Hash)) {
|
210
|
-
oai_response_with_token
|
211
|
-
}
|
212
|
-
|
213
|
-
i = 0
|
214
|
-
@harvestdor_client.send(:harvest, :list_records, {}) { |record| i += 1 }
|
215
|
-
expect(i).to eql(5)
|
216
|
-
end
|
217
|
-
end # resumption tokens
|
218
|
-
end
|
219
|
-
|
220
|
-
end
|
@@ -1,139 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
require 'spec_helper'
|
3
|
-
|
4
|
-
describe 'Harvestdor::Client OAI Harvesting Integration Tests', :integration => true do
|
5
|
-
|
6
|
-
before(:all) do
|
7
|
-
@config_yml_path = File.join(File.dirname(__FILE__), "config", "oai.yml")
|
8
|
-
end
|
9
|
-
|
10
|
-
context "test OAI server" do
|
11
|
-
before(:all) do
|
12
|
-
@test_hclient ||= Harvestdor::Client.new({:config_yml_path => @config_yml_path, :oai_client_debug => 'true', :oai_repository_url => 'https://dor-oaiprovider-test.stanford.edu/oai'})
|
13
|
-
end
|
14
|
-
context "withOUT resumption tokens" do
|
15
|
-
before(:all) do
|
16
|
-
@oai_args = {:metadata_prefix => 'mods', :from => nil, :until => nil, :set => 'is_governed_by_hy787xj5878'}
|
17
|
-
end
|
18
|
-
it "should be able to harvest headers" do
|
19
|
-
VCR.use_cassette('headers') do
|
20
|
-
headers = @test_hclient.oai_headers(@oai_args)
|
21
|
-
expect(headers).to be_an_instance_of(Array)
|
22
|
-
expect(headers.size).to be > 0
|
23
|
-
expect(headers.size).to be < 50 # no resumption token
|
24
|
-
expect(headers.first).to be_an_instance_of(OAI::Header)
|
25
|
-
end
|
26
|
-
end
|
27
|
-
it "should be able to harvest records" do
|
28
|
-
VCR.use_cassette('records') do
|
29
|
-
records = @test_hclient.oai_records(@oai_args)
|
30
|
-
expect(records).to be_an_instance_of(Array)
|
31
|
-
expect(records.size).to be > 0
|
32
|
-
expect(records.size).to be < 50 # no resumption token
|
33
|
-
expect(records.first).to be_an_instance_of(OAI::Record)
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
context "with resumption tokens" do
|
38
|
-
before(:all) do
|
39
|
-
@oai_args = {:metadata_prefix => 'mods', :from => nil, :until => nil, :set => 'is_member_of_kh678dr8608'}
|
40
|
-
end
|
41
|
-
it "should be able to harvest headers" do
|
42
|
-
skip "need to find small set > 50 on test"
|
43
|
-
headers = @test_hclient.oai_headers(@oai_args)
|
44
|
-
expect(headers).to be_an_instance_of(Array)
|
45
|
-
expect(headers.size).to be > 50
|
46
|
-
expect(headers.first).to be_an_instance_of(OAI::Header)
|
47
|
-
end
|
48
|
-
it "should be able to harvest records" do
|
49
|
-
skip "need to find small set > 50 on test"
|
50
|
-
records = @test_hclient.harvest_records(@oai_args)
|
51
|
-
expect(records).to be_an_instance_of(Array)
|
52
|
-
expect(records.size).to be > 50
|
53
|
-
expect(records.first).to be_an_instance_of(OAI::Record)
|
54
|
-
end
|
55
|
-
end
|
56
|
-
context "oai_record (single record request)" do
|
57
|
-
before(:all) do
|
58
|
-
VCR.use_cassette('jt959wc5586_test') do
|
59
|
-
@rec = @test_hclient.oai_record('jt959wc5586')
|
60
|
-
end
|
61
|
-
end
|
62
|
-
it "should get a single OAI::Record object" do
|
63
|
-
expect(@rec).to be_an_instance_of(OAI::Record)
|
64
|
-
end
|
65
|
-
it "should keep utf-8 encoded characters intact" do
|
66
|
-
xml = Nokogiri::XML(@rec.metadata.to_s)
|
67
|
-
xml.remove_namespaces!
|
68
|
-
expect(xml.root.xpath('/metadata/mods/titleInfo/subTitle').text).to match /^recueil complet des débats législatifs & politiques des chambres françaises/
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
context "production OAI server" do
|
74
|
-
before(:all) do
|
75
|
-
@prod_hclient ||= Harvestdor::Client.new({:config_yml_path => @config_yml_path, :oai_repository_url => 'https://dor-oaiprovider-prod.stanford.edu/oai'})
|
76
|
-
end
|
77
|
-
context "withOUT resumption tokens" do
|
78
|
-
before(:all) do
|
79
|
-
# Reid-Dennis: 47 objects
|
80
|
-
@oai_args = {:metadata_prefix => 'mods', :from => nil, :until => nil, :set => 'is_governed_by_sd064kn5856'}
|
81
|
-
end
|
82
|
-
it "should be able to harvest headers" do
|
83
|
-
VCR.use_cassette('prod_headers') do
|
84
|
-
headers = @prod_hclient.oai_headers(@oai_args)
|
85
|
-
expect(headers).to be_an_instance_of(Array)
|
86
|
-
expect(headers.size).to be > 0
|
87
|
-
expect(headers.size).to be < 50 # no resumption token
|
88
|
-
expect(headers.first).to be_an_instance_of(OAI::Header)
|
89
|
-
end
|
90
|
-
end
|
91
|
-
it "should be able to harvest records" do
|
92
|
-
VCR.use_cassette('prod_records') do
|
93
|
-
records = @prod_hclient.oai_records(@oai_args)
|
94
|
-
expect(records).to be_an_instance_of(Array)
|
95
|
-
expect(records.size).to be > 0
|
96
|
-
expect(records.size).to be < 50 # no resumption token
|
97
|
-
expect(records.first).to be_an_instance_of(OAI::Record)
|
98
|
-
end
|
99
|
-
end
|
100
|
-
end
|
101
|
-
context "with resumption tokens" do
|
102
|
-
before(:all) do
|
103
|
-
# Archives Parlementaires - 8x objects
|
104
|
-
@oai_args = {:metadata_prefix => 'mods', :from => nil, :until => nil, :set => 'is_member_of_collection_jh957jy1101'}
|
105
|
-
end
|
106
|
-
it "should be able to harvest headers" do
|
107
|
-
VCR.use_cassette('headers_with_resumption') do
|
108
|
-
headers = @prod_hclient.oai_headers(@oai_args)
|
109
|
-
expect(headers).to be_an_instance_of(Array)
|
110
|
-
expect(headers.size).to be > 50
|
111
|
-
expect(headers.first).to be_an_instance_of(OAI::Header)
|
112
|
-
end
|
113
|
-
end
|
114
|
-
it "should be able to harvest records" do
|
115
|
-
skip "the request always seems to time out"
|
116
|
-
records = @prod_hclient.oai_records(@oai_args)
|
117
|
-
expect(records).to be_an_instance_of(Array)
|
118
|
-
expect(records.size).to be > 50
|
119
|
-
expect(records.first).to be_an_instance_of(OAI::Record)
|
120
|
-
end
|
121
|
-
end
|
122
|
-
context "oai_record (single record request)" do
|
123
|
-
before(:all) do
|
124
|
-
VCR.use_cassette('jt959wc5586_prod') do
|
125
|
-
@rec = @prod_hclient.oai_record('jt959wc5586')
|
126
|
-
end
|
127
|
-
end
|
128
|
-
it "should get a single OAI::Record object" do
|
129
|
-
expect(@rec).to be_an_instance_of(OAI::Record)
|
130
|
-
end
|
131
|
-
it "should keep utf-8 encoded characters intact" do
|
132
|
-
xml = Nokogiri::XML(@rec.metadata.to_s)
|
133
|
-
xml.remove_namespaces!
|
134
|
-
expect(xml.root.xpath('/metadata/mods/titleInfo/subTitle').text).to match /^recueil complet des débats législatifs & politiques des chambres françaises/
|
135
|
-
end
|
136
|
-
end
|
137
|
-
end
|
138
|
-
|
139
|
-
end
|