harvestdor-indexer 2.4.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2615efed3ea7e94441d7457e39cd5bc3503a5cd9
4
- data.tar.gz: 508ff570b19083f8f78ea29d3f820c09f698f392
3
+ metadata.gz: 7c1cecc67cfdfbd0f57a40c1bc667b775143d237
4
+ data.tar.gz: 7a0452f584a683dd7f9146b5e1d165fa7c276341
5
5
  SHA512:
6
- metadata.gz: 8960f3c459bd794532ddc94e0d80d31f5f85692f9e08eca0a540054290054dfeee9082c2bc68f9d3ee5e5e516be03b43fdebe8a2fba9834c47a8e21dfc2ea65a
7
- data.tar.gz: 10cf83435837099f544094927182de39a2ae624e66cdff7f0477158de923d22cd27c174d5f1d8127ead92df9f4162a3f3fc50a03bc98e7ce68eb59e94393b804
6
+ metadata.gz: ca9a9d59768d8125528e8770a0251a6b8b9480833264046b462ef7e07fb72ca4fe27ab490e35f00ac08fba14e895506e9d3a55aaa893e2b411f4924e78461997
7
+ data.tar.gz: 7238bca995fc84045046e3891cd3c45f02e49eccf18fda48622811cd2d76ae53db445dc265087e1d79acc258c958ce05eaf0da0697f49c8c30865cd74af3ac8a
@@ -2,7 +2,7 @@ language: ruby
2
2
  sudo: false
3
3
  script: rake
4
4
  rvm:
5
- - 2.2.3
5
+ - 2.3.1
6
6
 
7
7
  notifications:
8
8
  email:
@@ -24,6 +24,7 @@ Gem::Specification.new do |gem|
24
24
  gem.add_dependency 'dor-fetcher', '=1.0.5'
25
25
  gem.add_dependency 'activesupport'
26
26
  gem.add_dependency 'parallel'
27
+ gem.add_dependency 'faraday'
27
28
 
28
29
  # Runtime dependencies
29
30
  gem.add_runtime_dependency 'confstruct'
@@ -36,8 +37,8 @@ Gem::Specification.new do |gem|
36
37
  # tests
37
38
  gem.add_development_dependency 'rspec', '~> 3.0'
38
39
  gem.add_development_dependency 'coveralls'
39
- gem.add_development_dependency 'rubocop'
40
- gem.add_development_dependency 'rubocop-rspec'
40
+ gem.add_development_dependency 'rubocop', '0.42.0'
41
+ gem.add_development_dependency 'rubocop-rspec', '1.5.1'
41
42
  gem.add_development_dependency 'vcr'
42
43
  gem.add_development_dependency 'webmock'
43
44
 
@@ -20,6 +20,7 @@ module Harvestdor
20
20
  # Base class to harvest from DOR via harvestdor gem and then index
21
21
  class Indexer
22
22
  require 'harvestdor/indexer/metrics'
23
+ require 'harvestdor/indexer/purl_fetcher'
23
24
  require 'harvestdor/indexer/resource'
24
25
  require 'harvestdor/indexer/solr'
25
26
 
@@ -142,6 +143,12 @@ module Harvestdor
142
143
  @dor_fetcher_client ||= DorFetcher::Client.new(config.dor_fetcher)
143
144
  end
144
145
 
146
+ def purl_fetcher_client
147
+ return unless config.purl_fetcher
148
+
149
+ @purl_fetcher_client ||= Harvestdor::Indexer::PurlFetcher.new(config.purl_fetcher)
150
+ end
151
+
145
152
  def solr
146
153
  @solr ||= Harvestdor::Indexer::Solr.new self, config.solr.to_hash
147
154
  end
@@ -167,4 +174,4 @@ module Harvestdor
167
174
  raise msg
168
175
  end
169
176
  end # Indexer class
170
- end # Harvestdor module
177
+ end # Harvestdor module
@@ -0,0 +1,37 @@
1
+ module Harvestdor
2
+ class Indexer
3
+ # Client for working with the PURL Fetcher API
4
+ class PurlFetcher
5
+ attr_reader :config
6
+
7
+ def initialize(config = {})
8
+ @config = config
9
+ end
10
+
11
+ def druids_from_collection(collection)
12
+ return to_enum(:druids_from_collection, collection) unless block_given?
13
+
14
+ page = 1
15
+
16
+ loop do
17
+ response = client.get("/collections/#{collection}/purls", page: page, per_page: 100)
18
+ data = JSON.parse(response.body)
19
+
20
+ break if data['purls'].blank?
21
+
22
+ data['purls'].each { |d| yield d['druid'] }
23
+
24
+ page += 1
25
+
26
+ break if data['pages']['next_page'].nil?
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ def client
33
+ @client ||= Faraday.new(config)
34
+ end
35
+ end
36
+ end
37
+ end
@@ -14,6 +14,14 @@ module Harvestdor
14
14
  @options = options
15
15
  end
16
16
 
17
+ def namespaced_druid
18
+ if druid =~ /^druid:/
19
+ druid
20
+ else
21
+ "druid:#{druid}"
22
+ end
23
+ end
24
+
17
25
  # @return [String] string of form oo123oo1234
18
26
  def bare_druid
19
27
  @bare_druid ||= druid.gsub('druid:', '')
@@ -29,6 +37,10 @@ module Harvestdor
29
37
  indexer.dor_fetcher_client
30
38
  end
31
39
 
40
+ def purl_fetcher_client
41
+ indexer.purl_fetcher_client
42
+ end
43
+
32
44
  ##
33
45
  # Get the logger
34
46
  def logger
@@ -66,7 +78,7 @@ module Harvestdor
66
78
  return [] unless collection?
67
79
 
68
80
  # return an enumerator, with an estimated size of the collection
69
- return to_enum(:items) { items_druids.length } unless block_given?
81
+ return to_enum(:items) { items_druids.count } unless block_given?
70
82
 
71
83
  items_druids.each do |x|
72
84
  yield Harvestdor::Indexer::Resource.new(indexer, x)
@@ -74,7 +86,12 @@ module Harvestdor
74
86
  end
75
87
 
76
88
  def items_druids
77
- @items_druids ||= dor_fetcher_client.druid_array(dor_fetcher_client.get_collection(bare_druid, {}))
89
+ if purl_fetcher_client
90
+ # we don't need to memoize purl_fetcher_client, since it natively uses enumerables
91
+ purl_fetcher_client.druids_from_collection(namespaced_druid)
92
+ else
93
+ @items_druids ||= dor_fetcher_client.druid_array(dor_fetcher_client.get_collection(bare_druid, {}))
94
+ end
78
95
  end
79
96
 
80
97
  # given a druid, get its objectLabel from its purl page identityMetadata
@@ -167,4 +184,4 @@ module Harvestdor
167
184
  druid.hash ^ indexer.hash
168
185
  end
169
186
  end
170
- end
187
+ end
@@ -1,6 +1,6 @@
1
1
  module Harvestdor
2
2
  class Indexer
3
3
  # this is the Ruby Gem version
4
- VERSION = '2.4.0'.freeze
4
+ VERSION = '2.5.0'.freeze
5
5
  end
6
6
  end
@@ -0,0 +1,25 @@
1
+ require 'spec_helper'
2
+
3
+ describe Harvestdor::Indexer::PurlFetcher do
4
+ describe '#druids_from_collection' do
5
+ let(:client) { subject.send(:client) }
6
+
7
+ before do
8
+ allow(client).to receive(:get).with('/collections/druid:oo000oo0000/purls', page: 1, per_page: 100).and_return(
9
+ instance_double(Faraday::Response, body: { purls: [{ druid: 'druid:oo000oo0001' }], pages: { next_page: 2 } }.to_json)
10
+ )
11
+ allow(client).to receive(:get).with('/collections/druid:oo000oo0000/purls', page: 2, per_page: 100).and_return(
12
+ instance_double(Faraday::Response, body: { purls: [{ druid: 'druid:oo000oo0002' }], pages: { next_page: 3 } }.to_json)
13
+ )
14
+ allow(client).to receive(:get).with('/collections/druid:oo000oo0000/purls', page: 3, per_page: 100).and_return(
15
+ instance_double(Faraday::Response, body: { purls: [{ druid: 'druid:oo000oo0003' }], pages: { next_page: nil } }.to_json)
16
+ )
17
+ end
18
+
19
+ it 'returns an enumerable of druids from a collection' do
20
+ results = subject.druids_from_collection('druid:oo000oo0000')
21
+
22
+ expect(results.to_a).to match_array %w(druid:oo000oo0001 druid:oo000oo0002 druid:oo000oo0003)
23
+ end
24
+ end
25
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: harvestdor-indexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.0
4
+ version: 2.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naomi Dushay
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2016-07-27 00:00:00.000000000 Z
13
+ date: 2017-11-10 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: rsolr
@@ -110,6 +110,20 @@ dependencies:
110
110
  - - ">="
111
111
  - !ruby/object:Gem::Version
112
112
  version: '0'
113
+ - !ruby/object:Gem::Dependency
114
+ name: faraday
115
+ requirement: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - ">="
118
+ - !ruby/object:Gem::Version
119
+ version: '0'
120
+ type: :runtime
121
+ prerelease: false
122
+ version_requirements: !ruby/object:Gem::Requirement
123
+ requirements:
124
+ - - ">="
125
+ - !ruby/object:Gem::Version
126
+ version: '0'
113
127
  - !ruby/object:Gem::Dependency
114
128
  name: confstruct
115
129
  requirement: !ruby/object:Gem::Requirement
@@ -198,30 +212,30 @@ dependencies:
198
212
  name: rubocop
199
213
  requirement: !ruby/object:Gem::Requirement
200
214
  requirements:
201
- - - ">="
215
+ - - '='
202
216
  - !ruby/object:Gem::Version
203
- version: '0'
217
+ version: 0.42.0
204
218
  type: :development
205
219
  prerelease: false
206
220
  version_requirements: !ruby/object:Gem::Requirement
207
221
  requirements:
208
- - - ">="
222
+ - - '='
209
223
  - !ruby/object:Gem::Version
210
- version: '0'
224
+ version: 0.42.0
211
225
  - !ruby/object:Gem::Dependency
212
226
  name: rubocop-rspec
213
227
  requirement: !ruby/object:Gem::Requirement
214
228
  requirements:
215
- - - ">="
229
+ - - '='
216
230
  - !ruby/object:Gem::Version
217
- version: '0'
231
+ version: 1.5.1
218
232
  type: :development
219
233
  prerelease: false
220
234
  version_requirements: !ruby/object:Gem::Requirement
221
235
  requirements:
222
- - - ">="
236
+ - - '='
223
237
  - !ruby/object:Gem::Version
224
- version: '0'
238
+ version: 1.5.1
225
239
  - !ruby/object:Gem::Dependency
226
240
  name: vcr
227
241
  requirement: !ruby/object:Gem::Requirement
@@ -274,6 +288,7 @@ files:
274
288
  - lib/harvestdor-indexer.rb
275
289
  - lib/harvestdor/indexer.rb
276
290
  - lib/harvestdor/indexer/metrics.rb
291
+ - lib/harvestdor/indexer/purl_fetcher.rb
277
292
  - lib/harvestdor/indexer/resource.rb
278
293
  - lib/harvestdor/indexer/solr.rb
279
294
  - lib/harvestdor/indexer/version.rb
@@ -289,6 +304,7 @@ files:
289
304
  - spec/unit/harvestdor-indexer-solr_spec.rb
290
305
  - spec/unit/harvestdor-indexer_spec.rb
291
306
  - spec/unit/harvestdor/indexer/metrics_spec.rb
307
+ - spec/unit/harvestdor/indexer/purl_fetcher_spec.rb
292
308
  homepage: https://github.com/sul-dlss/harvestdor-indexer
293
309
  licenses: []
294
310
  metadata: {}
@@ -308,7 +324,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
308
324
  version: '0'
309
325
  requirements: []
310
326
  rubyforge_project:
311
- rubygems_version: 2.6.4
327
+ rubygems_version: 2.6.11
312
328
  signing_key:
313
329
  specification_version: 4
314
330
  summary: Harvest DOR object metadata and index it to Solr
@@ -325,3 +341,4 @@ test_files:
325
341
  - spec/unit/harvestdor-indexer-solr_spec.rb
326
342
  - spec/unit/harvestdor-indexer_spec.rb
327
343
  - spec/unit/harvestdor/indexer/metrics_spec.rb
344
+ - spec/unit/harvestdor/indexer/purl_fetcher_spec.rb