harvestdor 0.0.14 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: harvestdor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.14
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naomi Dushay
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-29 00:00:00.000000000 Z
11
+ date: 2015-10-23 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: oai
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: 0.3.0
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: 0.3.0
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: faraday
29
15
  requirement: !ruby/object:Gem::Requirement
@@ -122,6 +108,48 @@ dependencies:
122
108
  - - ">="
123
109
  - !ruby/object:Gem::Version
124
110
  version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: coveralls
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: rubocop
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: rubocop-rspec
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
125
153
  - !ruby/object:Gem::Dependency
126
154
  name: vcr
127
155
  requirement: !ruby/object:Gem::Requirement
@@ -150,8 +178,7 @@ dependencies:
150
178
  - - ">="
151
179
  - !ruby/object:Gem::Version
152
180
  version: '0'
153
- description: Harvest DOR object metadata via a relationship (e.g. hydra:isGovernedBy
154
- rdf:resource="info:fedora/druid:hy787xj5878") and dates
181
+ description: Harvest DOR object metadata from a Stanford public purl page
155
182
  email:
156
183
  - ndushay@stanford.edu
157
184
  executables: []
@@ -160,6 +187,9 @@ extra_rdoc_files: []
160
187
  files:
161
188
  - ".coveralls.yml"
162
189
  - ".gitignore"
190
+ - ".hound.yml"
191
+ - ".rubocop.yml"
192
+ - ".rubocop_todo.yml"
163
193
  - ".travis.yml"
164
194
  - ".yardopts"
165
195
  - Gemfile
@@ -168,11 +198,11 @@ files:
168
198
  - Rakefile
169
199
  - harvestdor.gemspec
170
200
  - lib/harvestdor.rb
201
+ - lib/harvestdor/client.rb
171
202
  - lib/harvestdor/errors.rb
172
- - lib/harvestdor/oai_harvest.rb
173
203
  - lib/harvestdor/purl_xml.rb
174
204
  - lib/harvestdor/version.rb
175
- - spec/config/oai.yml
205
+ - spec/config/example.yml
176
206
  - spec/fixtures/vcr_cassettes/content_metadata.yml
177
207
  - spec/fixtures/vcr_cassettes/dc.yml
178
208
  - spec/fixtures/vcr_cassettes/headers.yml
@@ -189,12 +219,9 @@ files:
189
219
  - spec/fixtures/vcr_cassettes/records.yml
190
220
  - spec/fixtures/vcr_cassettes/rights_metadata.yml
191
221
  - spec/harvestdor_client_spec.rb
192
- - spec/harvestdor_spec.rb
193
- - spec/oai_harvest_spec.rb
194
- - spec/oai_integration_spec.rb
195
222
  - spec/purl_xml_spec.rb
196
223
  - spec/spec_helper.rb
197
- homepage: https://consul.stanford.edu/display/chimera/Chimera+project
224
+ homepage: ''
198
225
  licenses: []
199
226
  metadata: {}
200
227
  post_install_message:
@@ -213,12 +240,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
213
240
  version: '0'
214
241
  requirements: []
215
242
  rubyforge_project:
216
- rubygems_version: 2.2.2
243
+ rubygems_version: 2.4.3
217
244
  signing_key:
218
245
  specification_version: 4
219
246
  summary: Harvest DOR object metadata
220
247
  test_files:
221
- - spec/config/oai.yml
248
+ - spec/config/example.yml
222
249
  - spec/fixtures/vcr_cassettes/content_metadata.yml
223
250
  - spec/fixtures/vcr_cassettes/dc.yml
224
251
  - spec/fixtures/vcr_cassettes/headers.yml
@@ -235,9 +262,6 @@ test_files:
235
262
  - spec/fixtures/vcr_cassettes/records.yml
236
263
  - spec/fixtures/vcr_cassettes/rights_metadata.yml
237
264
  - spec/harvestdor_client_spec.rb
238
- - spec/harvestdor_spec.rb
239
- - spec/oai_harvest_spec.rb
240
- - spec/oai_integration_spec.rb
241
265
  - spec/purl_xml_spec.rb
242
266
  - spec/spec_helper.rb
243
267
  has_rdoc:
@@ -1,115 +0,0 @@
1
- require 'oai'
2
-
3
- module Harvestdor
4
-
5
- # Mixin: methods to perform an OAI harvest and iterate over results
6
- class Client
7
-
8
- # return Array of OAI::Records from the OAI harvest indicated by OAI params (metadata_prefix, from, until, set)
9
- # @param [Hash] oai_args optional OAI params (:metadata_prefix, :from, :until, :set) to be used in lieu of config default values
10
- # @return [Array<OAI::Record>] or enumeration over it, if block is given
11
- def oai_records oai_args = {}
12
- return to_enum(:oai_records, oai_args).to_a unless block_given?
13
-
14
- harvest(:list_records, scrub_oai_args(oai_args)) do |oai_rec|
15
- yield oai_rec
16
- end
17
- end
18
-
19
- # return Array of OAI::Headers from the OAI harvest indicated by OAI params (metadata_prefix, from, until, set)
20
- # @param [Hash] oai_args optional OAI params (:metadata_prefix, :from, :until, :set) to be used in lieu of config default values
21
- # @return [Array<OAI::Header>] or enumeration over it, if block is given
22
- def oai_headers oai_args = {}
23
- return to_enum(:oai_headers, oai_args).to_a unless block_given?
24
-
25
- harvest(:list_identifiers, scrub_oai_args(oai_args)) do |oai_hdr|
26
- yield oai_hdr
27
- end
28
- end
29
-
30
- # return Array of druids contained in the OAI harvest indicated by OAI params (metadata_prefix, from, until, set)
31
- # @param [Hash] oai_args optional OAI params (:metadata_prefix, :from, :until, :set) to be used in lieu of config default values
32
- # @return [Array<String>] or enumeration over it, if block is given
33
- def druids_via_oai oai_args = {}
34
- return to_enum(:druids_via_oai, oai_args).to_a unless block_given?
35
-
36
- harvest(:list_identifiers, scrub_oai_args(oai_args)) do |oai_hdr|
37
- yield Harvestdor.druid(oai_hdr)
38
- end
39
- end
40
-
41
- # get a single OAI record using a get_record OAI request
42
- # @param [String] druid (which will be turned into OAI identifier)
43
- # @param [String] md_prefix the OAI metadata prefix determining which metadata will be in the retrieved OAI::Record object
44
- # @return [OAI::Record] record object retrieved from OAI server
45
- def oai_record druid, md_prefix = 'mods'
46
- prefix = md_prefix ? md_prefix : config.default_metadata_prefix
47
- oai_client.get_record({:identifier => "oai:searchworks.stanford.edu/druid:#{druid}", :metadata_prefix => prefix}).record
48
- end
49
-
50
- protected #---------------------------------------------------------------------
51
-
52
- # @param [Hash] oai_args Hash of OAI params (metadata_prefix, from, until, set) to be used in lieu of config default values
53
- # @return [Hash] OAI params (metadata_prefix, from, until, set) cleaned up for making harvest request
54
- def scrub_oai_args oai_args = {}
55
- scrubbed_args={}
56
- scrubbed_args[:metadata_prefix] = oai_args.keys.include?(:metadata_prefix) ? oai_args[:metadata_prefix] : config.default_metadata_prefix
57
- scrubbed_args[:from] = oai_args.keys.include?(:from) ? oai_args[:from] : config.default_from_date
58
- scrubbed_args[:until] = oai_args.keys.include?(:until) ? oai_args[:until] : config.default_until_date
59
- scrubbed_args[:set] = oai_args.keys.include?(:set) ? oai_args[:set] : config.default_set
60
- scrubbed_args.each { |k, v|
61
- scrubbed_args.delete(k) if v.nil? || v == ''
62
- }
63
- scrubbed_args
64
- end
65
-
66
- # harvest OAI headers or OAI records and return a response object with one entry for each record/header retrieved
67
- # follows resumption tokens (i.e. chunks are all present in result)
68
- # @param [Symbol] verb :list_identifiers or :list_records
69
- # @param [Hash] oai_args OAI params (metadata_prefix, from, until, set) used for request
70
- # @return response to OAI request, as one enumerable object
71
- # TODO: This could be moved into ruby-oai?
72
- def harvest (verb, oai_args, &block)
73
- response = oai_client.send verb, oai_args
74
- while response && response.entries.size > 0
75
- response.entries.each &block
76
-
77
- token = response.resumption_token
78
- if token.nil? or token.empty?
79
- break
80
- else
81
- response = oai_client.send(verb, :resumption_token => token)
82
- end
83
- end
84
- rescue Faraday::Error::TimeoutError => e
85
- logger.error "No response from OAI Provider"
86
- logger.error e
87
- raise e
88
- rescue OAI::Exception => e
89
- # possibly unnecessary after ruby-oai 0.0.14
90
- logger.error "Received unexpected OAI::Exception"
91
- logger.error e
92
- raise e
93
- end
94
-
95
- end # class OaiHarvester
96
-
97
- end # module Harvestdor
98
-
99
- module OAI
100
- class Client
101
- # monkey patch to adjust timeouts
102
- # Do the actual HTTP get, following any temporary redirects
103
- def get(uri)
104
- # OLD: response = @http_client.get uri
105
- response = @http_client.get do |req|
106
- req.url uri
107
- # FIXME: hard-coded default settings in harvestdor are used here
108
- # values are in seconds
109
- req.options[:timeout] = Harvestdor::Client.default_config.http_options.timeout # open/read timeout
110
- req.options[:open_timeout] = Harvestdor::Client.default_config.http_options.open_timeout # connection open timeout
111
- end
112
- response.body
113
- end
114
- end
115
- end
data/spec/config/oai.yml DELETED
@@ -1,37 +0,0 @@
1
- # log_dir: directory for log file (default logs, relative to harvestdor gem path)
2
- log_dir: spec/test_logs
3
-
4
- # log_name: name of log file (default: harvestdor.log)
5
-
6
- # purl: url for the DOR purl server (used to get ContentMetadata, etc.)
7
- # purl: http://purl-test.stanford.edu
8
-
9
- # ---------- OAI harvesting parameters -----------
10
-
11
- # oai_client_debug: true for OAI::Client debug mode (default: false)
12
-
13
- # oai_repository_url: URL of the OAI data provider
14
- oai_repository_url: https://dor-oaiprovider-test.stanford.edu/oai
15
-
16
- # default_metadata_prefix: default metadata prefix to be used for harvesting (default: mods)
17
- # can be overridden on calls to harvest_ids and harvest_records
18
- default_metadata_prefix: mods
19
-
20
- # default_from_date: default from date for harvest (default: nil)
21
- # can be overridden on calls to harvest_ids and harvest_records
22
- default_from_date: '2012-11-01'
23
-
24
- # default_until_date: default until date for harvest (default: nil)
25
- # can be overridden on calls to harvest_ids and harvest_records
26
-
27
- # default_set: default set for harvest (default: nil)
28
- # can be overridden on calls to harvest_ids and harvest_records
29
-
30
- # Additional options to pass to Faraday http client (https://github.com/technoweenie/faraday)
31
- # timeouts are in seconds; timeout -> open/read, open_timeout -> connection open
32
- http_options:
33
- ssl:
34
- verify: false
35
- request:
36
- timeout: 121
37
- open_timeout: 122
@@ -1,23 +0,0 @@
1
- require "spec_helper"
2
-
3
- describe Harvestdor do
4
-
5
- context "#druid" do
6
- it "should return the druid part of an oai identifier" do
7
- expect(Harvestdor.druid('oai:searchworks.stanford.edu/druid:foo')).to eql('foo')
8
- end
9
- it "should work with OAI::Header as argument" do
10
- header = OAI::Header.new(nil)
11
- header.identifier = 'oai:searchworks.stanford.edu/druid:foo'
12
- expect(Harvestdor.druid(header)).to eql('foo')
13
- end
14
- it "should work with OAI::Record as argument" do
15
- oai_rec = OAI::Record.new(nil)
16
- header = OAI::Header.new(nil)
17
- header.identifier = 'oai:searchworks.stanford.edu/druid:foo'
18
- oai_rec.header = header
19
- expect(Harvestdor.druid(oai_rec)).to eql('foo')
20
- end
21
- end
22
-
23
- end
@@ -1,220 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe 'Harvestdor::Client oai harvesting' do
4
- before(:all) do
5
- @harvestdor_client = Harvestdor::Client.new
6
- @oai_arg_defaults = {:metadata_prefix => @harvestdor_client.config.default_metadata_prefix,
7
- :from => @harvestdor_client.config.default_from_date,
8
- :until => @harvestdor_client.config.default_until_date,
9
- :set => @harvestdor_client.config.default_set }
10
- end
11
-
12
- describe "druids_via_oai" do
13
- before(:each) do
14
- oai_response = double('oai_response')
15
- allow(oai_response).to receive(:entries).and_return(['foo', 'bar'])
16
- allow(oai_response).to receive(:resumption_token).and_return('')
17
- allow(@harvestdor_client.oai_client).to receive(:list_identifiers).with(an_instance_of(Hash)) {
18
- oai_response
19
- }
20
- end
21
- it "should return druids" do
22
- header1 = OAI::Header.new(nil)
23
- header1.identifier = 'oai:searchworks.stanford.edu/druid:foo'
24
- header2 = OAI::Header.new(nil)
25
- header2.identifier = 'oai:searchworks.stanford.edu/druid:bar'
26
- oai_response = double('oai_response')
27
- allow(oai_response).to receive(:entries).and_return([header1, header2])
28
- expect(@harvestdor_client.druids_via_oai).to eql(['foo', 'bar'])
29
- end
30
- it "should have results viewable as an array" do
31
- expect(@harvestdor_client.druids_via_oai).to be_an_instance_of(Array)
32
- end
33
- it "should have enumerable results" do
34
- expect(@harvestdor_client.druids_via_oai).to respond_to(:each, :count)
35
- end
36
- it "should yield to a passed block" do
37
- expect { |b| @harvestdor_client.druids_via_oai(&b) }.to yield_successive_args('foo', 'bar')
38
- end
39
- end
40
-
41
- describe "oai_records" do
42
- before(:each) do
43
- @oai_response = double('oai_response')
44
- allow(@oai_response).to receive(:entries).and_return([1, 2])
45
- allow(@oai_response).to receive(:resumption_token).and_return('')
46
- allow(@harvestdor_client.oai_client).to receive(:list_records).with(an_instance_of(Hash)) {
47
- @oai_response
48
- }
49
- end
50
- it "should return OAI::Record objects" do
51
- header1 = OAI::Header.new(nil)
52
- header1.identifier = 'oai:searchworks.stanford.edu/druid:foo'
53
- oai_rec1 = OAI::Record.new(nil)
54
- oai_rec1.header = header1
55
- header2 = OAI::Header.new(nil)
56
- header2.identifier = 'oai:searchworks.stanford.edu/druid:bar'
57
- oai_rec2 = OAI::Record.new(nil)
58
- oai_rec2.header = header2
59
- allow(@oai_response).to receive(:entries).and_return([oai_rec1, oai_rec2])
60
- expect(@harvestdor_client.oai_records).to eql([oai_rec1, oai_rec2])
61
- end
62
- it "should have results viewable as an array" do
63
- expect(@harvestdor_client.oai_records).to be_an_instance_of(Array)
64
- end
65
- it "should have enumerable results" do
66
- expect(@harvestdor_client.oai_records).to respond_to(:each, :count)
67
- end
68
- it "should yield to a passed block" do
69
- expect { |b| @harvestdor_client.oai_records(&b) }.to yield_successive_args(1, 2)
70
- end
71
- end
72
-
73
- describe "oai_headers" do
74
- before(:each) do
75
- @oai_response = double('oai_response')
76
- allow(@oai_response).to receive(:entries).and_return([1, 2])
77
- allow(@oai_response).to receive(:resumption_token).and_return('')
78
- allow(@harvestdor_client.oai_client).to receive(:list_identifiers).with(an_instance_of(Hash)) {
79
- @oai_response
80
- }
81
- end
82
- it "should return OAI::Header objects" do
83
- header1 = OAI::Header.new(nil)
84
- header1.identifier = 'oai:searchworks.stanford.edu/druid:foo'
85
- header2 = OAI::Header.new(nil)
86
- header2.identifier = 'oai:searchworks.stanford.edu/druid:bar'
87
- allow(@oai_response).to receive(:entries).and_return([header1, header2])
88
- expect(@harvestdor_client.oai_headers).to eql([header1, header2])
89
- end
90
- it "should have results viewable as an array" do
91
- expect(@harvestdor_client.oai_headers).to be_an_instance_of(Array)
92
- end
93
- it "should have enumerable results" do
94
- expect(@harvestdor_client.oai_headers).to respond_to(:each, :count)
95
- end
96
- it "should yield to a passed block" do
97
- expect { |b| @harvestdor_client.oai_headers(&b) }.to yield_successive_args(1, 2)
98
- end
99
- end
100
-
101
- describe "oai_record (single record request)" do
102
- it "should return OAI::Record object" do
103
- oai_rec = OAI::Record.new(nil)
104
- oai_resp = double('oai_response')
105
- allow(oai_resp).to receive(:record).and_return(oai_rec)
106
- allow(@harvestdor_client.oai_client).to receive(:get_record){
107
- oai_resp
108
- }
109
- expect(@harvestdor_client.oai_record('druid')).to eql(oai_rec)
110
- expect(@harvestdor_client.oai_record('druid', 'mods')).to eql(oai_rec)
111
- end
112
- end
113
-
114
- describe "scrub_oai_args" do
115
- before(:all) do
116
- @expected_oai_args = @oai_arg_defaults.dup
117
- @expected_oai_args.each { |k, v|
118
- @expected_oai_args.delete(k) if v.nil? || v.size == 0
119
- }
120
-
121
- end
122
- it "should use client's default values for OAI arguments if they are not present in the method param hash" do
123
- expect(@harvestdor_client.send(:scrub_oai_args)).to eql(@expected_oai_args)
124
- end
125
- it "should use OAI arguments from the method param hash if they are present" do
126
- passed_options = {:metadata_prefix => 'mods', :from => '2012-11-30'}
127
- expect(@harvestdor_client.send(:scrub_oai_args, passed_options)).to eql(@expected_oai_args.merge(passed_options))
128
- end
129
- it "should use nil value for option when it is passed in options hash" do
130
- client = Harvestdor::Client.new({:default_from_date => '2012-01-01'})
131
- expect(client.config.default_from_date).to eql('2012-01-01')
132
- passed_options = {:from => nil}
133
- expect(client.send(:scrub_oai_args, passed_options)[:from]).to eql(nil)
134
- end
135
- end
136
-
137
- describe "harvest" do
138
- it "should perform a list_records OAI request when first arg is true" do
139
- oai_response = double('oai_response')
140
- allow(oai_response).to receive(:entries).and_return([])
141
- allow(@harvestdor_client.oai_client).to receive(:list_records).with(an_instance_of(Hash)) {
142
- oai_response
143
- }
144
- expect(@harvestdor_client.oai_client).to receive(:list_records)
145
- @harvestdor_client.send(:harvest, :list_records, {})
146
- end
147
-
148
- it "should perform a list_identifiers OAI request when first arg is false" do
149
- oai_response = double('oai_response')
150
- allow(oai_response).to receive(:entries).and_return([])
151
- allow(@harvestdor_client.oai_client).to receive(:list_identifiers).with(an_instance_of(Hash)) {
152
- oai_response
153
- }
154
- expect(@harvestdor_client.oai_client).to receive(:list_identifiers)
155
- @harvestdor_client.send(:harvest, :list_identifiers, {})
156
- end
157
-
158
- it "should use passed OAI arguments" do
159
- oai_response = double('oai_response')
160
- allow(oai_response).to receive(:entries).and_return([])
161
- allow(@harvestdor_client.oai_client).to receive(:list_identifiers).with(an_instance_of(Hash)) {
162
- oai_response
163
- }
164
- oai_options_hash = {:metadata_prefix => 'mods', :from => '2012-11-30'}
165
- expect(@harvestdor_client.oai_client).to receive(:list_identifiers).with(oai_options_hash)
166
- @harvestdor_client.send(:harvest, :list_identifiers, oai_options_hash)
167
- end
168
-
169
- it "should yield to a passed block" do
170
- oai_response = double('oai_response')
171
- allow(oai_response).to receive(:entries).and_return([1, 2])
172
- allow(oai_response).to receive(:resumption_token).and_return('')
173
- allow(@harvestdor_client.oai_client).to receive(:list_records).with(an_instance_of(Hash)) {
174
- oai_response
175
- }
176
- expect { |b| @harvestdor_client.send(:harvest, :list_records, {}, &b) }.to yield_successive_args(1, 2)
177
- end
178
-
179
- context "resumption tokens" do
180
- it "should stop processing when no records/headers are received" do
181
- oai_response = double('oai_response')
182
- allow(oai_response).to receive(:entries).and_return([])
183
- allow(@harvestdor_client.oai_client).to receive(:list_records).with(an_instance_of(Hash)) {
184
- oai_response
185
- }
186
-
187
- i = 0
188
- @harvestdor_client.send(:harvest, :list_records, {}) { |record| i += 1 }
189
- expect(i).to eql(0)
190
- end
191
-
192
- it "should stop processing when the resumption token is empty" do
193
- oai_response_with_token = double('oai_response')
194
- allow(oai_response_with_token).to receive(:entries).and_return([1,2,3,4,5])
195
- allow(oai_response_with_token).to receive(:resumption_token).and_return('')
196
- allow(@harvestdor_client.oai_client).to receive(:list_records).with(an_instance_of(Hash)) {
197
- oai_response_with_token
198
- }
199
-
200
- i = 0
201
- @harvestdor_client.send(:harvest, :list_records, {}) { |record| i += 1 }
202
- expect(i).to eql(5)
203
- end
204
-
205
- it "should stop processing when there was no resumption token" do
206
- oai_response_with_token = double('oai_response')
207
- allow(oai_response_with_token).to receive(:entries).and_return([1,2,3,4,5])
208
- allow(oai_response_with_token).to receive(:resumption_token).and_return(nil)
209
- allow(@harvestdor_client.oai_client).to receive(:list_records).with(an_instance_of(Hash)) {
210
- oai_response_with_token
211
- }
212
-
213
- i = 0
214
- @harvestdor_client.send(:harvest, :list_records, {}) { |record| i += 1 }
215
- expect(i).to eql(5)
216
- end
217
- end # resumption tokens
218
- end
219
-
220
- end
@@ -1,139 +0,0 @@
1
- # encoding: utf-8
2
- require 'spec_helper'
3
-
4
- describe 'Harvestdor::Client OAI Harvesting Integration Tests', :integration => true do
5
-
6
- before(:all) do
7
- @config_yml_path = File.join(File.dirname(__FILE__), "config", "oai.yml")
8
- end
9
-
10
- context "test OAI server" do
11
- before(:all) do
12
- @test_hclient ||= Harvestdor::Client.new({:config_yml_path => @config_yml_path, :oai_client_debug => 'true', :oai_repository_url => 'https://dor-oaiprovider-test.stanford.edu/oai'})
13
- end
14
- context "withOUT resumption tokens" do
15
- before(:all) do
16
- @oai_args = {:metadata_prefix => 'mods', :from => nil, :until => nil, :set => 'is_governed_by_hy787xj5878'}
17
- end
18
- it "should be able to harvest headers" do
19
- VCR.use_cassette('headers') do
20
- headers = @test_hclient.oai_headers(@oai_args)
21
- expect(headers).to be_an_instance_of(Array)
22
- expect(headers.size).to be > 0
23
- expect(headers.size).to be < 50 # no resumption token
24
- expect(headers.first).to be_an_instance_of(OAI::Header)
25
- end
26
- end
27
- it "should be able to harvest records" do
28
- VCR.use_cassette('records') do
29
- records = @test_hclient.oai_records(@oai_args)
30
- expect(records).to be_an_instance_of(Array)
31
- expect(records.size).to be > 0
32
- expect(records.size).to be < 50 # no resumption token
33
- expect(records.first).to be_an_instance_of(OAI::Record)
34
- end
35
- end
36
- end
37
- context "with resumption tokens" do
38
- before(:all) do
39
- @oai_args = {:metadata_prefix => 'mods', :from => nil, :until => nil, :set => 'is_member_of_kh678dr8608'}
40
- end
41
- it "should be able to harvest headers" do
42
- skip "need to find small set > 50 on test"
43
- headers = @test_hclient.oai_headers(@oai_args)
44
- expect(headers).to be_an_instance_of(Array)
45
- expect(headers.size).to be > 50
46
- expect(headers.first).to be_an_instance_of(OAI::Header)
47
- end
48
- it "should be able to harvest records" do
49
- skip "need to find small set > 50 on test"
50
- records = @test_hclient.harvest_records(@oai_args)
51
- expect(records).to be_an_instance_of(Array)
52
- expect(records.size).to be > 50
53
- expect(records.first).to be_an_instance_of(OAI::Record)
54
- end
55
- end
56
- context "oai_record (single record request)" do
57
- before(:all) do
58
- VCR.use_cassette('jt959wc5586_test') do
59
- @rec = @test_hclient.oai_record('jt959wc5586')
60
- end
61
- end
62
- it "should get a single OAI::Record object" do
63
- expect(@rec).to be_an_instance_of(OAI::Record)
64
- end
65
- it "should keep utf-8 encoded characters intact" do
66
- xml = Nokogiri::XML(@rec.metadata.to_s)
67
- xml.remove_namespaces!
68
- expect(xml.root.xpath('/metadata/mods/titleInfo/subTitle').text).to match /^recueil complet des débats législatifs & politiques des chambres françaises/
69
- end
70
- end
71
- end
72
-
73
- context "production OAI server" do
74
- before(:all) do
75
- @prod_hclient ||= Harvestdor::Client.new({:config_yml_path => @config_yml_path, :oai_repository_url => 'https://dor-oaiprovider-prod.stanford.edu/oai'})
76
- end
77
- context "withOUT resumption tokens" do
78
- before(:all) do
79
- # Reid-Dennis: 47 objects
80
- @oai_args = {:metadata_prefix => 'mods', :from => nil, :until => nil, :set => 'is_governed_by_sd064kn5856'}
81
- end
82
- it "should be able to harvest headers" do
83
- VCR.use_cassette('prod_headers') do
84
- headers = @prod_hclient.oai_headers(@oai_args)
85
- expect(headers).to be_an_instance_of(Array)
86
- expect(headers.size).to be > 0
87
- expect(headers.size).to be < 50 # no resumption token
88
- expect(headers.first).to be_an_instance_of(OAI::Header)
89
- end
90
- end
91
- it "should be able to harvest records" do
92
- VCR.use_cassette('prod_records') do
93
- records = @prod_hclient.oai_records(@oai_args)
94
- expect(records).to be_an_instance_of(Array)
95
- expect(records.size).to be > 0
96
- expect(records.size).to be < 50 # no resumption token
97
- expect(records.first).to be_an_instance_of(OAI::Record)
98
- end
99
- end
100
- end
101
- context "with resumption tokens" do
102
- before(:all) do
103
- # Archives Parlementaires - 8x objects
104
- @oai_args = {:metadata_prefix => 'mods', :from => nil, :until => nil, :set => 'is_member_of_collection_jh957jy1101'}
105
- end
106
- it "should be able to harvest headers" do
107
- VCR.use_cassette('headers_with_resumption') do
108
- headers = @prod_hclient.oai_headers(@oai_args)
109
- expect(headers).to be_an_instance_of(Array)
110
- expect(headers.size).to be > 50
111
- expect(headers.first).to be_an_instance_of(OAI::Header)
112
- end
113
- end
114
- it "should be able to harvest records" do
115
- skip "the request always seems to time out"
116
- records = @prod_hclient.oai_records(@oai_args)
117
- expect(records).to be_an_instance_of(Array)
118
- expect(records.size).to be > 50
119
- expect(records.first).to be_an_instance_of(OAI::Record)
120
- end
121
- end
122
- context "oai_record (single record request)" do
123
- before(:all) do
124
- VCR.use_cassette('jt959wc5586_prod') do
125
- @rec = @prod_hclient.oai_record('jt959wc5586')
126
- end
127
- end
128
- it "should get a single OAI::Record object" do
129
- expect(@rec).to be_an_instance_of(OAI::Record)
130
- end
131
- it "should keep utf-8 encoded characters intact" do
132
- xml = Nokogiri::XML(@rec.metadata.to_s)
133
- xml.remove_namespaces!
134
- expect(xml.root.xpath('/metadata/mods/titleInfo/subTitle').text).to match /^recueil complet des débats législatifs & politiques des chambres françaises/
135
- end
136
- end
137
- end
138
-
139
- end