harvestdor-indexer 2.4.0 → 2.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2615efed3ea7e94441d7457e39cd5bc3503a5cd9
4
- data.tar.gz: 508ff570b19083f8f78ea29d3f820c09f698f392
3
+ metadata.gz: 7c1cecc67cfdfbd0f57a40c1bc667b775143d237
4
+ data.tar.gz: 7a0452f584a683dd7f9146b5e1d165fa7c276341
5
5
  SHA512:
6
- metadata.gz: 8960f3c459bd794532ddc94e0d80d31f5f85692f9e08eca0a540054290054dfeee9082c2bc68f9d3ee5e5e516be03b43fdebe8a2fba9834c47a8e21dfc2ea65a
7
- data.tar.gz: 10cf83435837099f544094927182de39a2ae624e66cdff7f0477158de923d22cd27c174d5f1d8127ead92df9f4162a3f3fc50a03bc98e7ce68eb59e94393b804
6
+ metadata.gz: ca9a9d59768d8125528e8770a0251a6b8b9480833264046b462ef7e07fb72ca4fe27ab490e35f00ac08fba14e895506e9d3a55aaa893e2b411f4924e78461997
7
+ data.tar.gz: 7238bca995fc84045046e3891cd3c45f02e49eccf18fda48622811cd2d76ae53db445dc265087e1d79acc258c958ce05eaf0da0697f49c8c30865cd74af3ac8a
@@ -2,7 +2,7 @@ language: ruby
2
2
  sudo: false
3
3
  script: rake
4
4
  rvm:
5
- - 2.2.3
5
+ - 2.3.1
6
6
 
7
7
  notifications:
8
8
  email:
@@ -24,6 +24,7 @@ Gem::Specification.new do |gem|
24
24
  gem.add_dependency 'dor-fetcher', '=1.0.5'
25
25
  gem.add_dependency 'activesupport'
26
26
  gem.add_dependency 'parallel'
27
+ gem.add_dependency 'faraday'
27
28
 
28
29
  # Runtime dependencies
29
30
  gem.add_runtime_dependency 'confstruct'
@@ -36,8 +37,8 @@ Gem::Specification.new do |gem|
36
37
  # tests
37
38
  gem.add_development_dependency 'rspec', '~> 3.0'
38
39
  gem.add_development_dependency 'coveralls'
39
- gem.add_development_dependency 'rubocop'
40
- gem.add_development_dependency 'rubocop-rspec'
40
+ gem.add_development_dependency 'rubocop', '0.42.0'
41
+ gem.add_development_dependency 'rubocop-rspec', '1.5.1'
41
42
  gem.add_development_dependency 'vcr'
42
43
  gem.add_development_dependency 'webmock'
43
44
 
@@ -20,6 +20,7 @@ module Harvestdor
20
20
  # Base class to harvest from DOR via harvestdor gem and then index
21
21
  class Indexer
22
22
  require 'harvestdor/indexer/metrics'
23
+ require 'harvestdor/indexer/purl_fetcher'
23
24
  require 'harvestdor/indexer/resource'
24
25
  require 'harvestdor/indexer/solr'
25
26
 
@@ -142,6 +143,12 @@ module Harvestdor
142
143
  @dor_fetcher_client ||= DorFetcher::Client.new(config.dor_fetcher)
143
144
  end
144
145
 
146
+ def purl_fetcher_client
147
+ return unless config.purl_fetcher
148
+
149
+ @purl_fetcher_client ||= Harvestdor::Indexer::PurlFetcher.new(config.purl_fetcher)
150
+ end
151
+
145
152
  def solr
146
153
  @solr ||= Harvestdor::Indexer::Solr.new self, config.solr.to_hash
147
154
  end
@@ -167,4 +174,4 @@ module Harvestdor
167
174
  raise msg
168
175
  end
169
176
  end # Indexer class
170
- end # Harvestdor module
177
+ end # Harvestdor module
@@ -0,0 +1,37 @@
1
+ module Harvestdor
2
+ class Indexer
3
+ # Client for working with the PURL Fetcher API
4
+ class PurlFetcher
5
+ attr_reader :config
6
+
7
+ def initialize(config = {})
8
+ @config = config
9
+ end
10
+
11
+ def druids_from_collection(collection)
12
+ return to_enum(:druids_from_collection, collection) unless block_given?
13
+
14
+ page = 1
15
+
16
+ loop do
17
+ response = client.get("/collections/#{collection}/purls", page: page, per_page: 100)
18
+ data = JSON.parse(response.body)
19
+
20
+ break if data['purls'].blank?
21
+
22
+ data['purls'].each { |d| yield d['druid'] }
23
+
24
+ page += 1
25
+
26
+ break if data['pages']['next_page'].nil?
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ def client
33
+ @client ||= Faraday.new(config)
34
+ end
35
+ end
36
+ end
37
+ end
@@ -14,6 +14,14 @@ module Harvestdor
14
14
  @options = options
15
15
  end
16
16
 
17
+ def namespaced_druid
18
+ if druid =~ /^druid:/
19
+ druid
20
+ else
21
+ "druid:#{druid}"
22
+ end
23
+ end
24
+
17
25
  # @return [String] string of form oo123oo1234
18
26
  def bare_druid
19
27
  @bare_druid ||= druid.gsub('druid:', '')
@@ -29,6 +37,10 @@ module Harvestdor
29
37
  indexer.dor_fetcher_client
30
38
  end
31
39
 
40
+ def purl_fetcher_client
41
+ indexer.purl_fetcher_client
42
+ end
43
+
32
44
  ##
33
45
  # Get the logger
34
46
  def logger
@@ -66,7 +78,7 @@ module Harvestdor
66
78
  return [] unless collection?
67
79
 
68
80
  # return an enumerator, with an estimated size of the collection
69
- return to_enum(:items) { items_druids.length } unless block_given?
81
+ return to_enum(:items) { items_druids.count } unless block_given?
70
82
 
71
83
  items_druids.each do |x|
72
84
  yield Harvestdor::Indexer::Resource.new(indexer, x)
@@ -74,7 +86,12 @@ module Harvestdor
74
86
  end
75
87
 
76
88
  def items_druids
77
- @items_druids ||= dor_fetcher_client.druid_array(dor_fetcher_client.get_collection(bare_druid, {}))
89
+ if purl_fetcher_client
90
+ # we don't need to memoize purl_fetcher_client, since it natively uses enumerables
91
+ purl_fetcher_client.druids_from_collection(namespaced_druid)
92
+ else
93
+ @items_druids ||= dor_fetcher_client.druid_array(dor_fetcher_client.get_collection(bare_druid, {}))
94
+ end
78
95
  end
79
96
 
80
97
  # given a druid, get its objectLabel from its purl page identityMetadata
@@ -167,4 +184,4 @@ module Harvestdor
167
184
  druid.hash ^ indexer.hash
168
185
  end
169
186
  end
170
- end
187
+ end
@@ -1,6 +1,6 @@
1
1
  module Harvestdor
2
2
  class Indexer
3
3
  # this is the Ruby Gem version
4
- VERSION = '2.4.0'.freeze
4
+ VERSION = '2.5.0'.freeze
5
5
  end
6
6
  end
@@ -0,0 +1,25 @@
1
+ require 'spec_helper'
2
+
3
+ describe Harvestdor::Indexer::PurlFetcher do
4
+ describe '#druids_from_collection' do
5
+ let(:client) { subject.send(:client) }
6
+
7
+ before do
8
+ allow(client).to receive(:get).with('/collections/druid:oo000oo0000/purls', page: 1, per_page: 100).and_return(
9
+ instance_double(Faraday::Response, body: { purls: [{ druid: 'druid:oo000oo0001' }], pages: { next_page: 2 } }.to_json)
10
+ )
11
+ allow(client).to receive(:get).with('/collections/druid:oo000oo0000/purls', page: 2, per_page: 100).and_return(
12
+ instance_double(Faraday::Response, body: { purls: [{ druid: 'druid:oo000oo0002' }], pages: { next_page: 3 } }.to_json)
13
+ )
14
+ allow(client).to receive(:get).with('/collections/druid:oo000oo0000/purls', page: 3, per_page: 100).and_return(
15
+ instance_double(Faraday::Response, body: { purls: [{ druid: 'druid:oo000oo0003' }], pages: { next_page: nil } }.to_json)
16
+ )
17
+ end
18
+
19
+ it 'returns an enumerable of druids from a collection' do
20
+ results = subject.druids_from_collection('druid:oo000oo0000')
21
+
22
+ expect(results.to_a).to match_array %w(druid:oo000oo0001 druid:oo000oo0002 druid:oo000oo0003)
23
+ end
24
+ end
25
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: harvestdor-indexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.0
4
+ version: 2.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naomi Dushay
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2016-07-27 00:00:00.000000000 Z
13
+ date: 2017-11-10 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: rsolr
@@ -110,6 +110,20 @@ dependencies:
110
110
  - - ">="
111
111
  - !ruby/object:Gem::Version
112
112
  version: '0'
113
+ - !ruby/object:Gem::Dependency
114
+ name: faraday
115
+ requirement: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - ">="
118
+ - !ruby/object:Gem::Version
119
+ version: '0'
120
+ type: :runtime
121
+ prerelease: false
122
+ version_requirements: !ruby/object:Gem::Requirement
123
+ requirements:
124
+ - - ">="
125
+ - !ruby/object:Gem::Version
126
+ version: '0'
113
127
  - !ruby/object:Gem::Dependency
114
128
  name: confstruct
115
129
  requirement: !ruby/object:Gem::Requirement
@@ -198,30 +212,30 @@ dependencies:
198
212
  name: rubocop
199
213
  requirement: !ruby/object:Gem::Requirement
200
214
  requirements:
201
- - - ">="
215
+ - - '='
202
216
  - !ruby/object:Gem::Version
203
- version: '0'
217
+ version: 0.42.0
204
218
  type: :development
205
219
  prerelease: false
206
220
  version_requirements: !ruby/object:Gem::Requirement
207
221
  requirements:
208
- - - ">="
222
+ - - '='
209
223
  - !ruby/object:Gem::Version
210
- version: '0'
224
+ version: 0.42.0
211
225
  - !ruby/object:Gem::Dependency
212
226
  name: rubocop-rspec
213
227
  requirement: !ruby/object:Gem::Requirement
214
228
  requirements:
215
- - - ">="
229
+ - - '='
216
230
  - !ruby/object:Gem::Version
217
- version: '0'
231
+ version: 1.5.1
218
232
  type: :development
219
233
  prerelease: false
220
234
  version_requirements: !ruby/object:Gem::Requirement
221
235
  requirements:
222
- - - ">="
236
+ - - '='
223
237
  - !ruby/object:Gem::Version
224
- version: '0'
238
+ version: 1.5.1
225
239
  - !ruby/object:Gem::Dependency
226
240
  name: vcr
227
241
  requirement: !ruby/object:Gem::Requirement
@@ -274,6 +288,7 @@ files:
274
288
  - lib/harvestdor-indexer.rb
275
289
  - lib/harvestdor/indexer.rb
276
290
  - lib/harvestdor/indexer/metrics.rb
291
+ - lib/harvestdor/indexer/purl_fetcher.rb
277
292
  - lib/harvestdor/indexer/resource.rb
278
293
  - lib/harvestdor/indexer/solr.rb
279
294
  - lib/harvestdor/indexer/version.rb
@@ -289,6 +304,7 @@ files:
289
304
  - spec/unit/harvestdor-indexer-solr_spec.rb
290
305
  - spec/unit/harvestdor-indexer_spec.rb
291
306
  - spec/unit/harvestdor/indexer/metrics_spec.rb
307
+ - spec/unit/harvestdor/indexer/purl_fetcher_spec.rb
292
308
  homepage: https://github.com/sul-dlss/harvestdor-indexer
293
309
  licenses: []
294
310
  metadata: {}
@@ -308,7 +324,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
308
324
  version: '0'
309
325
  requirements: []
310
326
  rubyforge_project:
311
- rubygems_version: 2.6.4
327
+ rubygems_version: 2.6.11
312
328
  signing_key:
313
329
  specification_version: 4
314
330
  summary: Harvest DOR object metadata and index it to Solr
@@ -325,3 +341,4 @@ test_files:
325
341
  - spec/unit/harvestdor-indexer-solr_spec.rb
326
342
  - spec/unit/harvestdor-indexer_spec.rb
327
343
  - spec/unit/harvestdor/indexer/metrics_spec.rb
344
+ - spec/unit/harvestdor/indexer/purl_fetcher_spec.rb