harvestdor 0.0.14 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: harvestdor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.14
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naomi Dushay
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-29 00:00:00.000000000 Z
11
+ date: 2015-10-23 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: oai
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: 0.3.0
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: 0.3.0
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: faraday
29
15
  requirement: !ruby/object:Gem::Requirement
@@ -122,6 +108,48 @@ dependencies:
122
108
  - - ">="
123
109
  - !ruby/object:Gem::Version
124
110
  version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: coveralls
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: rubocop
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: rubocop-rspec
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
125
153
  - !ruby/object:Gem::Dependency
126
154
  name: vcr
127
155
  requirement: !ruby/object:Gem::Requirement
@@ -150,8 +178,7 @@ dependencies:
150
178
  - - ">="
151
179
  - !ruby/object:Gem::Version
152
180
  version: '0'
153
- description: Harvest DOR object metadata via a relationship (e.g. hydra:isGovernedBy
154
- rdf:resource="info:fedora/druid:hy787xj5878") and dates
181
+ description: Harvest DOR object metadata from a Stanford public purl page
155
182
  email:
156
183
  - ndushay@stanford.edu
157
184
  executables: []
@@ -160,6 +187,9 @@ extra_rdoc_files: []
160
187
  files:
161
188
  - ".coveralls.yml"
162
189
  - ".gitignore"
190
+ - ".hound.yml"
191
+ - ".rubocop.yml"
192
+ - ".rubocop_todo.yml"
163
193
  - ".travis.yml"
164
194
  - ".yardopts"
165
195
  - Gemfile
@@ -168,11 +198,11 @@ files:
168
198
  - Rakefile
169
199
  - harvestdor.gemspec
170
200
  - lib/harvestdor.rb
201
+ - lib/harvestdor/client.rb
171
202
  - lib/harvestdor/errors.rb
172
- - lib/harvestdor/oai_harvest.rb
173
203
  - lib/harvestdor/purl_xml.rb
174
204
  - lib/harvestdor/version.rb
175
- - spec/config/oai.yml
205
+ - spec/config/example.yml
176
206
  - spec/fixtures/vcr_cassettes/content_metadata.yml
177
207
  - spec/fixtures/vcr_cassettes/dc.yml
178
208
  - spec/fixtures/vcr_cassettes/headers.yml
@@ -189,12 +219,9 @@ files:
189
219
  - spec/fixtures/vcr_cassettes/records.yml
190
220
  - spec/fixtures/vcr_cassettes/rights_metadata.yml
191
221
  - spec/harvestdor_client_spec.rb
192
- - spec/harvestdor_spec.rb
193
- - spec/oai_harvest_spec.rb
194
- - spec/oai_integration_spec.rb
195
222
  - spec/purl_xml_spec.rb
196
223
  - spec/spec_helper.rb
197
- homepage: https://consul.stanford.edu/display/chimera/Chimera+project
224
+ homepage: ''
198
225
  licenses: []
199
226
  metadata: {}
200
227
  post_install_message:
@@ -213,12 +240,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
213
240
  version: '0'
214
241
  requirements: []
215
242
  rubyforge_project:
216
- rubygems_version: 2.2.2
243
+ rubygems_version: 2.4.3
217
244
  signing_key:
218
245
  specification_version: 4
219
246
  summary: Harvest DOR object metadata
220
247
  test_files:
221
- - spec/config/oai.yml
248
+ - spec/config/example.yml
222
249
  - spec/fixtures/vcr_cassettes/content_metadata.yml
223
250
  - spec/fixtures/vcr_cassettes/dc.yml
224
251
  - spec/fixtures/vcr_cassettes/headers.yml
@@ -235,9 +262,6 @@ test_files:
235
262
  - spec/fixtures/vcr_cassettes/records.yml
236
263
  - spec/fixtures/vcr_cassettes/rights_metadata.yml
237
264
  - spec/harvestdor_client_spec.rb
238
- - spec/harvestdor_spec.rb
239
- - spec/oai_harvest_spec.rb
240
- - spec/oai_integration_spec.rb
241
265
  - spec/purl_xml_spec.rb
242
266
  - spec/spec_helper.rb
243
267
  has_rdoc:
@@ -1,115 +0,0 @@
1
- require 'oai'
2
-
3
- module Harvestdor
4
-
5
- # Mixin: methods to perform an OAI harvest and iterate over results
6
- class Client
7
-
8
- # return Array of OAI::Records from the OAI harvest indicated by OAI params (metadata_prefix, from, until, set)
9
- # @param [Hash] oai_args optional OAI params (:metadata_prefix, :from, :until, :set) to be used in lieu of config default values
10
- # @return [Array<OAI::Record>] or enumeration over it, if block is given
11
- def oai_records oai_args = {}
12
- return to_enum(:oai_records, oai_args).to_a unless block_given?
13
-
14
- harvest(:list_records, scrub_oai_args(oai_args)) do |oai_rec|
15
- yield oai_rec
16
- end
17
- end
18
-
19
- # return Array of OAI::Headers from the OAI harvest indicated by OAI params (metadata_prefix, from, until, set)
20
- # @param [Hash] oai_args optional OAI params (:metadata_prefix, :from, :until, :set) to be used in lieu of config default values
21
- # @return [Array<OAI::Header>] or enumeration over it, if block is given
22
- def oai_headers oai_args = {}
23
- return to_enum(:oai_headers, oai_args).to_a unless block_given?
24
-
25
- harvest(:list_identifiers, scrub_oai_args(oai_args)) do |oai_hdr|
26
- yield oai_hdr
27
- end
28
- end
29
-
30
- # return Array of druids contained in the OAI harvest indicated by OAI params (metadata_prefix, from, until, set)
31
- # @param [Hash] oai_args optional OAI params (:metadata_prefix, :from, :until, :set) to be used in lieu of config default values
32
- # @return [Array<String>] or enumeration over it, if block is given
33
- def druids_via_oai oai_args = {}
34
- return to_enum(:druids_via_oai, oai_args).to_a unless block_given?
35
-
36
- harvest(:list_identifiers, scrub_oai_args(oai_args)) do |oai_hdr|
37
- yield Harvestdor.druid(oai_hdr)
38
- end
39
- end
40
-
41
- # get a single OAI record using a get_record OAI request
42
- # @param [String] druid (which will be turned into OAI identifier)
43
- # @param [String] md_prefix the OAI metadata prefix determining which metadata will be in the retrieved OAI::Record object
44
- # @return [OAI::Record] record object retrieved from OAI server
45
- def oai_record druid, md_prefix = 'mods'
46
- prefix = md_prefix ? md_prefix : config.default_metadata_prefix
47
- oai_client.get_record({:identifier => "oai:searchworks.stanford.edu/druid:#{druid}", :metadata_prefix => prefix}).record
48
- end
49
-
50
- protected #---------------------------------------------------------------------
51
-
52
- # @param [Hash] oai_args Hash of OAI params (metadata_prefix, from, until, set) to be used in lieu of config default values
53
- # @return [Hash] OAI params (metadata_prefix, from, until, set) cleaned up for making harvest request
54
- def scrub_oai_args oai_args = {}
55
- scrubbed_args={}
56
- scrubbed_args[:metadata_prefix] = oai_args.keys.include?(:metadata_prefix) ? oai_args[:metadata_prefix] : config.default_metadata_prefix
57
- scrubbed_args[:from] = oai_args.keys.include?(:from) ? oai_args[:from] : config.default_from_date
58
- scrubbed_args[:until] = oai_args.keys.include?(:until) ? oai_args[:until] : config.default_until_date
59
- scrubbed_args[:set] = oai_args.keys.include?(:set) ? oai_args[:set] : config.default_set
60
- scrubbed_args.each { |k, v|
61
- scrubbed_args.delete(k) if v.nil? || v == ''
62
- }
63
- scrubbed_args
64
- end
65
-
66
- # harvest OAI headers or OAI records and return a response object with one entry for each record/header retrieved
67
- # follows resumption tokens (i.e. chunks are all present in result)
68
- # @param [Symbol] verb :list_identifiers or :list_records
69
- # @param [Hash] oai_args OAI params (metadata_prefix, from, until, set) used for request
70
- # @return response to OAI request, as one enumerable object
71
- # TODO: This could be moved into ruby-oai?
72
- def harvest (verb, oai_args, &block)
73
- response = oai_client.send verb, oai_args
74
- while response && response.entries.size > 0
75
- response.entries.each &block
76
-
77
- token = response.resumption_token
78
- if token.nil? or token.empty?
79
- break
80
- else
81
- response = oai_client.send(verb, :resumption_token => token)
82
- end
83
- end
84
- rescue Faraday::Error::TimeoutError => e
85
- logger.error "No response from OAI Provider"
86
- logger.error e
87
- raise e
88
- rescue OAI::Exception => e
89
- # possibly unnecessary after ruby-oai 0.0.14
90
- logger.error "Received unexpected OAI::Exception"
91
- logger.error e
92
- raise e
93
- end
94
-
95
- end # class OaiHarvester
96
-
97
- end # module Harvestdor
98
-
99
- module OAI
100
- class Client
101
- # monkey patch to adjust timeouts
102
- # Do the actual HTTP get, following any temporary redirects
103
- def get(uri)
104
- # OLD: response = @http_client.get uri
105
- response = @http_client.get do |req|
106
- req.url uri
107
- # FIXME: hard-coded default settings in harvestdor are used here
108
- # values are in seconds
109
- req.options[:timeout] = Harvestdor::Client.default_config.http_options.timeout # open/read timeout
110
- req.options[:open_timeout] = Harvestdor::Client.default_config.http_options.open_timeout # connection open timeout
111
- end
112
- response.body
113
- end
114
- end
115
- end
data/spec/config/oai.yml DELETED
@@ -1,37 +0,0 @@
1
- # log_dir: directory for log file (default logs, relative to harvestdor gem path)
2
- log_dir: spec/test_logs
3
-
4
- # log_name: name of log file (default: harvestdor.log)
5
-
6
- # purl: url for the DOR purl server (used to get ContentMetadata, etc.)
7
- # purl: http://purl-test.stanford.edu
8
-
9
- # ---------- OAI harvesting parameters -----------
10
-
11
- # oai_client_debug: true for OAI::Client debug mode (default: false)
12
-
13
- # oai_repository_url: URL of the OAI data provider
14
- oai_repository_url: https://dor-oaiprovider-test.stanford.edu/oai
15
-
16
- # default_metadata_prefix: default metadata prefix to be used for harvesting (default: mods)
17
- # can be overridden on calls to harvest_ids and harvest_records
18
- default_metadata_prefix: mods
19
-
20
- # default_from_date: default from date for harvest (default: nil)
21
- # can be overridden on calls to harvest_ids and harvest_records
22
- default_from_date: '2012-11-01'
23
-
24
- # default_until_date: default until date for harvest (default: nil)
25
- # can be overridden on calls to harvest_ids and harvest_records
26
-
27
- # default_set: default set for harvest (default: nil)
28
- # can be overridden on calls to harvest_ids and harvest_records
29
-
30
- # Additional options to pass to Faraday http client (https://github.com/technoweenie/faraday)
31
- # timeouts are in seconds; timeout -> open/read, open_timeout -> connection open
32
- http_options:
33
- ssl:
34
- verify: false
35
- request:
36
- timeout: 121
37
- open_timeout: 122
@@ -1,23 +0,0 @@
1
- require "spec_helper"
2
-
3
- describe Harvestdor do
4
-
5
- context "#druid" do
6
- it "should return the druid part of an oai identifier" do
7
- expect(Harvestdor.druid('oai:searchworks.stanford.edu/druid:foo')).to eql('foo')
8
- end
9
- it "should work with OAI::Header as argument" do
10
- header = OAI::Header.new(nil)
11
- header.identifier = 'oai:searchworks.stanford.edu/druid:foo'
12
- expect(Harvestdor.druid(header)).to eql('foo')
13
- end
14
- it "should work with OAI::Record as argument" do
15
- oai_rec = OAI::Record.new(nil)
16
- header = OAI::Header.new(nil)
17
- header.identifier = 'oai:searchworks.stanford.edu/druid:foo'
18
- oai_rec.header = header
19
- expect(Harvestdor.druid(oai_rec)).to eql('foo')
20
- end
21
- end
22
-
23
- end
@@ -1,220 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe 'Harvestdor::Client oai harvesting' do
4
- before(:all) do
5
- @harvestdor_client = Harvestdor::Client.new
6
- @oai_arg_defaults = {:metadata_prefix => @harvestdor_client.config.default_metadata_prefix,
7
- :from => @harvestdor_client.config.default_from_date,
8
- :until => @harvestdor_client.config.default_until_date,
9
- :set => @harvestdor_client.config.default_set }
10
- end
11
-
12
- describe "druids_via_oai" do
13
- before(:each) do
14
- oai_response = double('oai_response')
15
- allow(oai_response).to receive(:entries).and_return(['foo', 'bar'])
16
- allow(oai_response).to receive(:resumption_token).and_return('')
17
- allow(@harvestdor_client.oai_client).to receive(:list_identifiers).with(an_instance_of(Hash)) {
18
- oai_response
19
- }
20
- end
21
- it "should return druids" do
22
- header1 = OAI::Header.new(nil)
23
- header1.identifier = 'oai:searchworks.stanford.edu/druid:foo'
24
- header2 = OAI::Header.new(nil)
25
- header2.identifier = 'oai:searchworks.stanford.edu/druid:bar'
26
- oai_response = double('oai_response')
27
- allow(oai_response).to receive(:entries).and_return([header1, header2])
28
- expect(@harvestdor_client.druids_via_oai).to eql(['foo', 'bar'])
29
- end
30
- it "should have results viewable as an array" do
31
- expect(@harvestdor_client.druids_via_oai).to be_an_instance_of(Array)
32
- end
33
- it "should have enumerable results" do
34
- expect(@harvestdor_client.druids_via_oai).to respond_to(:each, :count)
35
- end
36
- it "should yield to a passed block" do
37
- expect { |b| @harvestdor_client.druids_via_oai(&b) }.to yield_successive_args('foo', 'bar')
38
- end
39
- end
40
-
41
- describe "oai_records" do
42
- before(:each) do
43
- @oai_response = double('oai_response')
44
- allow(@oai_response).to receive(:entries).and_return([1, 2])
45
- allow(@oai_response).to receive(:resumption_token).and_return('')
46
- allow(@harvestdor_client.oai_client).to receive(:list_records).with(an_instance_of(Hash)) {
47
- @oai_response
48
- }
49
- end
50
- it "should return OAI::Record objects" do
51
- header1 = OAI::Header.new(nil)
52
- header1.identifier = 'oai:searchworks.stanford.edu/druid:foo'
53
- oai_rec1 = OAI::Record.new(nil)
54
- oai_rec1.header = header1
55
- header2 = OAI::Header.new(nil)
56
- header2.identifier = 'oai:searchworks.stanford.edu/druid:bar'
57
- oai_rec2 = OAI::Record.new(nil)
58
- oai_rec2.header = header2
59
- allow(@oai_response).to receive(:entries).and_return([oai_rec1, oai_rec2])
60
- expect(@harvestdor_client.oai_records).to eql([oai_rec1, oai_rec2])
61
- end
62
- it "should have results viewable as an array" do
63
- expect(@harvestdor_client.oai_records).to be_an_instance_of(Array)
64
- end
65
- it "should have enumerable results" do
66
- expect(@harvestdor_client.oai_records).to respond_to(:each, :count)
67
- end
68
- it "should yield to a passed block" do
69
- expect { |b| @harvestdor_client.oai_records(&b) }.to yield_successive_args(1, 2)
70
- end
71
- end
72
-
73
- describe "oai_headers" do
74
- before(:each) do
75
- @oai_response = double('oai_response')
76
- allow(@oai_response).to receive(:entries).and_return([1, 2])
77
- allow(@oai_response).to receive(:resumption_token).and_return('')
78
- allow(@harvestdor_client.oai_client).to receive(:list_identifiers).with(an_instance_of(Hash)) {
79
- @oai_response
80
- }
81
- end
82
- it "should return OAI::Header objects" do
83
- header1 = OAI::Header.new(nil)
84
- header1.identifier = 'oai:searchworks.stanford.edu/druid:foo'
85
- header2 = OAI::Header.new(nil)
86
- header2.identifier = 'oai:searchworks.stanford.edu/druid:bar'
87
- allow(@oai_response).to receive(:entries).and_return([header1, header2])
88
- expect(@harvestdor_client.oai_headers).to eql([header1, header2])
89
- end
90
- it "should have results viewable as an array" do
91
- expect(@harvestdor_client.oai_headers).to be_an_instance_of(Array)
92
- end
93
- it "should have enumerable results" do
94
- expect(@harvestdor_client.oai_headers).to respond_to(:each, :count)
95
- end
96
- it "should yield to a passed block" do
97
- expect { |b| @harvestdor_client.oai_headers(&b) }.to yield_successive_args(1, 2)
98
- end
99
- end
100
-
101
- describe "oai_record (single record request)" do
102
- it "should return OAI::Record object" do
103
- oai_rec = OAI::Record.new(nil)
104
- oai_resp = double('oai_response')
105
- allow(oai_resp).to receive(:record).and_return(oai_rec)
106
- allow(@harvestdor_client.oai_client).to receive(:get_record){
107
- oai_resp
108
- }
109
- expect(@harvestdor_client.oai_record('druid')).to eql(oai_rec)
110
- expect(@harvestdor_client.oai_record('druid', 'mods')).to eql(oai_rec)
111
- end
112
- end
113
-
114
- describe "scrub_oai_args" do
115
- before(:all) do
116
- @expected_oai_args = @oai_arg_defaults.dup
117
- @expected_oai_args.each { |k, v|
118
- @expected_oai_args.delete(k) if v.nil? || v.size == 0
119
- }
120
-
121
- end
122
- it "should use client's default values for OAI arguments if they are not present in the method param hash" do
123
- expect(@harvestdor_client.send(:scrub_oai_args)).to eql(@expected_oai_args)
124
- end
125
- it "should use OAI arguments from the method param hash if they are present" do
126
- passed_options = {:metadata_prefix => 'mods', :from => '2012-11-30'}
127
- expect(@harvestdor_client.send(:scrub_oai_args, passed_options)).to eql(@expected_oai_args.merge(passed_options))
128
- end
129
- it "should use nil value for option when it is passed in options hash" do
130
- client = Harvestdor::Client.new({:default_from_date => '2012-01-01'})
131
- expect(client.config.default_from_date).to eql('2012-01-01')
132
- passed_options = {:from => nil}
133
- expect(client.send(:scrub_oai_args, passed_options)[:from]).to eql(nil)
134
- end
135
- end
136
-
137
- describe "harvest" do
138
- it "should perform a list_records OAI request when first arg is true" do
139
- oai_response = double('oai_response')
140
- allow(oai_response).to receive(:entries).and_return([])
141
- allow(@harvestdor_client.oai_client).to receive(:list_records).with(an_instance_of(Hash)) {
142
- oai_response
143
- }
144
- expect(@harvestdor_client.oai_client).to receive(:list_records)
145
- @harvestdor_client.send(:harvest, :list_records, {})
146
- end
147
-
148
- it "should perform a list_identifiers OAI request when first arg is false" do
149
- oai_response = double('oai_response')
150
- allow(oai_response).to receive(:entries).and_return([])
151
- allow(@harvestdor_client.oai_client).to receive(:list_identifiers).with(an_instance_of(Hash)) {
152
- oai_response
153
- }
154
- expect(@harvestdor_client.oai_client).to receive(:list_identifiers)
155
- @harvestdor_client.send(:harvest, :list_identifiers, {})
156
- end
157
-
158
- it "should use passed OAI arguments" do
159
- oai_response = double('oai_response')
160
- allow(oai_response).to receive(:entries).and_return([])
161
- allow(@harvestdor_client.oai_client).to receive(:list_identifiers).with(an_instance_of(Hash)) {
162
- oai_response
163
- }
164
- oai_options_hash = {:metadata_prefix => 'mods', :from => '2012-11-30'}
165
- expect(@harvestdor_client.oai_client).to receive(:list_identifiers).with(oai_options_hash)
166
- @harvestdor_client.send(:harvest, :list_identifiers, oai_options_hash)
167
- end
168
-
169
- it "should yield to a passed block" do
170
- oai_response = double('oai_response')
171
- allow(oai_response).to receive(:entries).and_return([1, 2])
172
- allow(oai_response).to receive(:resumption_token).and_return('')
173
- allow(@harvestdor_client.oai_client).to receive(:list_records).with(an_instance_of(Hash)) {
174
- oai_response
175
- }
176
- expect { |b| @harvestdor_client.send(:harvest, :list_records, {}, &b) }.to yield_successive_args(1, 2)
177
- end
178
-
179
- context "resumption tokens" do
180
- it "should stop processing when no records/headers are received" do
181
- oai_response = double('oai_response')
182
- allow(oai_response).to receive(:entries).and_return([])
183
- allow(@harvestdor_client.oai_client).to receive(:list_records).with(an_instance_of(Hash)) {
184
- oai_response
185
- }
186
-
187
- i = 0
188
- @harvestdor_client.send(:harvest, :list_records, {}) { |record| i += 1 }
189
- expect(i).to eql(0)
190
- end
191
-
192
- it "should stop processing when the resumption token is empty" do
193
- oai_response_with_token = double('oai_response')
194
- allow(oai_response_with_token).to receive(:entries).and_return([1,2,3,4,5])
195
- allow(oai_response_with_token).to receive(:resumption_token).and_return('')
196
- allow(@harvestdor_client.oai_client).to receive(:list_records).with(an_instance_of(Hash)) {
197
- oai_response_with_token
198
- }
199
-
200
- i = 0
201
- @harvestdor_client.send(:harvest, :list_records, {}) { |record| i += 1 }
202
- expect(i).to eql(5)
203
- end
204
-
205
- it "should stop processing when there was no resumption token" do
206
- oai_response_with_token = double('oai_response')
207
- allow(oai_response_with_token).to receive(:entries).and_return([1,2,3,4,5])
208
- allow(oai_response_with_token).to receive(:resumption_token).and_return(nil)
209
- allow(@harvestdor_client.oai_client).to receive(:list_records).with(an_instance_of(Hash)) {
210
- oai_response_with_token
211
- }
212
-
213
- i = 0
214
- @harvestdor_client.send(:harvest, :list_records, {}) { |record| i += 1 }
215
- expect(i).to eql(5)
216
- end
217
- end # resumption tokens
218
- end
219
-
220
- end
@@ -1,139 +0,0 @@
1
- # encoding: utf-8
2
- require 'spec_helper'
3
-
4
- describe 'Harvestdor::Client OAI Harvesting Integration Tests', :integration => true do
5
-
6
- before(:all) do
7
- @config_yml_path = File.join(File.dirname(__FILE__), "config", "oai.yml")
8
- end
9
-
10
- context "test OAI server" do
11
- before(:all) do
12
- @test_hclient ||= Harvestdor::Client.new({:config_yml_path => @config_yml_path, :oai_client_debug => 'true', :oai_repository_url => 'https://dor-oaiprovider-test.stanford.edu/oai'})
13
- end
14
- context "withOUT resumption tokens" do
15
- before(:all) do
16
- @oai_args = {:metadata_prefix => 'mods', :from => nil, :until => nil, :set => 'is_governed_by_hy787xj5878'}
17
- end
18
- it "should be able to harvest headers" do
19
- VCR.use_cassette('headers') do
20
- headers = @test_hclient.oai_headers(@oai_args)
21
- expect(headers).to be_an_instance_of(Array)
22
- expect(headers.size).to be > 0
23
- expect(headers.size).to be < 50 # no resumption token
24
- expect(headers.first).to be_an_instance_of(OAI::Header)
25
- end
26
- end
27
- it "should be able to harvest records" do
28
- VCR.use_cassette('records') do
29
- records = @test_hclient.oai_records(@oai_args)
30
- expect(records).to be_an_instance_of(Array)
31
- expect(records.size).to be > 0
32
- expect(records.size).to be < 50 # no resumption token
33
- expect(records.first).to be_an_instance_of(OAI::Record)
34
- end
35
- end
36
- end
37
- context "with resumption tokens" do
38
- before(:all) do
39
- @oai_args = {:metadata_prefix => 'mods', :from => nil, :until => nil, :set => 'is_member_of_kh678dr8608'}
40
- end
41
- it "should be able to harvest headers" do
42
- skip "need to find small set > 50 on test"
43
- headers = @test_hclient.oai_headers(@oai_args)
44
- expect(headers).to be_an_instance_of(Array)
45
- expect(headers.size).to be > 50
46
- expect(headers.first).to be_an_instance_of(OAI::Header)
47
- end
48
- it "should be able to harvest records" do
49
- skip "need to find small set > 50 on test"
50
- records = @test_hclient.harvest_records(@oai_args)
51
- expect(records).to be_an_instance_of(Array)
52
- expect(records.size).to be > 50
53
- expect(records.first).to be_an_instance_of(OAI::Record)
54
- end
55
- end
56
- context "oai_record (single record request)" do
57
- before(:all) do
58
- VCR.use_cassette('jt959wc5586_test') do
59
- @rec = @test_hclient.oai_record('jt959wc5586')
60
- end
61
- end
62
- it "should get a single OAI::Record object" do
63
- expect(@rec).to be_an_instance_of(OAI::Record)
64
- end
65
- it "should keep utf-8 encoded characters intact" do
66
- xml = Nokogiri::XML(@rec.metadata.to_s)
67
- xml.remove_namespaces!
68
- expect(xml.root.xpath('/metadata/mods/titleInfo/subTitle').text).to match /^recueil complet des débats législatifs & politiques des chambres françaises/
69
- end
70
- end
71
- end
72
-
73
- context "production OAI server" do
74
- before(:all) do
75
- @prod_hclient ||= Harvestdor::Client.new({:config_yml_path => @config_yml_path, :oai_repository_url => 'https://dor-oaiprovider-prod.stanford.edu/oai'})
76
- end
77
- context "withOUT resumption tokens" do
78
- before(:all) do
79
- # Reid-Dennis: 47 objects
80
- @oai_args = {:metadata_prefix => 'mods', :from => nil, :until => nil, :set => 'is_governed_by_sd064kn5856'}
81
- end
82
- it "should be able to harvest headers" do
83
- VCR.use_cassette('prod_headers') do
84
- headers = @prod_hclient.oai_headers(@oai_args)
85
- expect(headers).to be_an_instance_of(Array)
86
- expect(headers.size).to be > 0
87
- expect(headers.size).to be < 50 # no resumption token
88
- expect(headers.first).to be_an_instance_of(OAI::Header)
89
- end
90
- end
91
- it "should be able to harvest records" do
92
- VCR.use_cassette('prod_records') do
93
- records = @prod_hclient.oai_records(@oai_args)
94
- expect(records).to be_an_instance_of(Array)
95
- expect(records.size).to be > 0
96
- expect(records.size).to be < 50 # no resumption token
97
- expect(records.first).to be_an_instance_of(OAI::Record)
98
- end
99
- end
100
- end
101
- context "with resumption tokens" do
102
- before(:all) do
103
- # Archives Parlementaires - 8x objects
104
- @oai_args = {:metadata_prefix => 'mods', :from => nil, :until => nil, :set => 'is_member_of_collection_jh957jy1101'}
105
- end
106
- it "should be able to harvest headers" do
107
- VCR.use_cassette('headers_with_resumption') do
108
- headers = @prod_hclient.oai_headers(@oai_args)
109
- expect(headers).to be_an_instance_of(Array)
110
- expect(headers.size).to be > 50
111
- expect(headers.first).to be_an_instance_of(OAI::Header)
112
- end
113
- end
114
- it "should be able to harvest records" do
115
- skip "the request always seems to time out"
116
- records = @prod_hclient.oai_records(@oai_args)
117
- expect(records).to be_an_instance_of(Array)
118
- expect(records.size).to be > 50
119
- expect(records.first).to be_an_instance_of(OAI::Record)
120
- end
121
- end
122
- context "oai_record (single record request)" do
123
- before(:all) do
124
- VCR.use_cassette('jt959wc5586_prod') do
125
- @rec = @prod_hclient.oai_record('jt959wc5586')
126
- end
127
- end
128
- it "should get a single OAI::Record object" do
129
- expect(@rec).to be_an_instance_of(OAI::Record)
130
- end
131
- it "should keep utf-8 encoded characters intact" do
132
- xml = Nokogiri::XML(@rec.metadata.to_s)
133
- xml.remove_namespaces!
134
- expect(xml.root.xpath('/metadata/mods/titleInfo/subTitle').text).to match /^recueil complet des débats législatifs & politiques des chambres françaises/
135
- end
136
- end
137
- end
138
-
139
- end