harvestdor-indexer 2.4.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/harvestdor-indexer.gemspec +3 -2
- data/lib/harvestdor/indexer.rb +8 -1
- data/lib/harvestdor/indexer/purl_fetcher.rb +37 -0
- data/lib/harvestdor/indexer/resource.rb +20 -3
- data/lib/harvestdor/indexer/version.rb +1 -1
- data/spec/unit/harvestdor/indexer/purl_fetcher_spec.rb +25 -0
- metadata +28 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7c1cecc67cfdfbd0f57a40c1bc667b775143d237
|
4
|
+
data.tar.gz: 7a0452f584a683dd7f9146b5e1d165fa7c276341
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ca9a9d59768d8125528e8770a0251a6b8b9480833264046b462ef7e07fb72ca4fe27ab490e35f00ac08fba14e895506e9d3a55aaa893e2b411f4924e78461997
|
7
|
+
data.tar.gz: 7238bca995fc84045046e3891cd3c45f02e49eccf18fda48622811cd2d76ae53db445dc265087e1d79acc258c958ce05eaf0da0697f49c8c30865cd74af3ac8a
|
data/.travis.yml
CHANGED
data/harvestdor-indexer.gemspec
CHANGED
@@ -24,6 +24,7 @@ Gem::Specification.new do |gem|
|
|
24
24
|
gem.add_dependency 'dor-fetcher', '=1.0.5'
|
25
25
|
gem.add_dependency 'activesupport'
|
26
26
|
gem.add_dependency 'parallel'
|
27
|
+
gem.add_dependency 'faraday'
|
27
28
|
|
28
29
|
# Runtime dependencies
|
29
30
|
gem.add_runtime_dependency 'confstruct'
|
@@ -36,8 +37,8 @@ Gem::Specification.new do |gem|
|
|
36
37
|
# tests
|
37
38
|
gem.add_development_dependency 'rspec', '~> 3.0'
|
38
39
|
gem.add_development_dependency 'coveralls'
|
39
|
-
gem.add_development_dependency 'rubocop'
|
40
|
-
gem.add_development_dependency 'rubocop-rspec'
|
40
|
+
gem.add_development_dependency 'rubocop', '0.42.0'
|
41
|
+
gem.add_development_dependency 'rubocop-rspec', '1.5.1'
|
41
42
|
gem.add_development_dependency 'vcr'
|
42
43
|
gem.add_development_dependency 'webmock'
|
43
44
|
|
data/lib/harvestdor/indexer.rb
CHANGED
@@ -20,6 +20,7 @@ module Harvestdor
|
|
20
20
|
# Base class to harvest from DOR via harvestdor gem and then index
|
21
21
|
class Indexer
|
22
22
|
require 'harvestdor/indexer/metrics'
|
23
|
+
require 'harvestdor/indexer/purl_fetcher'
|
23
24
|
require 'harvestdor/indexer/resource'
|
24
25
|
require 'harvestdor/indexer/solr'
|
25
26
|
|
@@ -142,6 +143,12 @@ module Harvestdor
|
|
142
143
|
@dor_fetcher_client ||= DorFetcher::Client.new(config.dor_fetcher)
|
143
144
|
end
|
144
145
|
|
146
|
+
def purl_fetcher_client
|
147
|
+
return unless config.purl_fetcher
|
148
|
+
|
149
|
+
@purl_fetcher_client ||= Harvestdor::Indexer::PurlFetcher.new(config.purl_fetcher)
|
150
|
+
end
|
151
|
+
|
145
152
|
def solr
|
146
153
|
@solr ||= Harvestdor::Indexer::Solr.new self, config.solr.to_hash
|
147
154
|
end
|
@@ -167,4 +174,4 @@ module Harvestdor
|
|
167
174
|
raise msg
|
168
175
|
end
|
169
176
|
end # Indexer class
|
170
|
-
end # Harvestdor module
|
177
|
+
end # Harvestdor module
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Harvestdor
|
2
|
+
class Indexer
|
3
|
+
# Client for working with the PURL Fetcher API
|
4
|
+
class PurlFetcher
|
5
|
+
attr_reader :config
|
6
|
+
|
7
|
+
def initialize(config = {})
|
8
|
+
@config = config
|
9
|
+
end
|
10
|
+
|
11
|
+
def druids_from_collection(collection)
|
12
|
+
return to_enum(:druids_from_collection, collection) unless block_given?
|
13
|
+
|
14
|
+
page = 1
|
15
|
+
|
16
|
+
loop do
|
17
|
+
response = client.get("/collections/#{collection}/purls", page: page, per_page: 100)
|
18
|
+
data = JSON.parse(response.body)
|
19
|
+
|
20
|
+
break if data['purls'].blank?
|
21
|
+
|
22
|
+
data['purls'].each { |d| yield d['druid'] }
|
23
|
+
|
24
|
+
page += 1
|
25
|
+
|
26
|
+
break if data['pages']['next_page'].nil?
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def client
|
33
|
+
@client ||= Faraday.new(config)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -14,6 +14,14 @@ module Harvestdor
|
|
14
14
|
@options = options
|
15
15
|
end
|
16
16
|
|
17
|
+
def namespaced_druid
|
18
|
+
if druid =~ /^druid:/
|
19
|
+
druid
|
20
|
+
else
|
21
|
+
"druid:#{druid}"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
17
25
|
# @return [String] string of form oo123oo1234
|
18
26
|
def bare_druid
|
19
27
|
@bare_druid ||= druid.gsub('druid:', '')
|
@@ -29,6 +37,10 @@ module Harvestdor
|
|
29
37
|
indexer.dor_fetcher_client
|
30
38
|
end
|
31
39
|
|
40
|
+
def purl_fetcher_client
|
41
|
+
indexer.purl_fetcher_client
|
42
|
+
end
|
43
|
+
|
32
44
|
##
|
33
45
|
# Get the logger
|
34
46
|
def logger
|
@@ -66,7 +78,7 @@ module Harvestdor
|
|
66
78
|
return [] unless collection?
|
67
79
|
|
68
80
|
# return an enumerator, with an estimated size of the collection
|
69
|
-
return to_enum(:items) { items_druids.
|
81
|
+
return to_enum(:items) { items_druids.count } unless block_given?
|
70
82
|
|
71
83
|
items_druids.each do |x|
|
72
84
|
yield Harvestdor::Indexer::Resource.new(indexer, x)
|
@@ -74,7 +86,12 @@ module Harvestdor
|
|
74
86
|
end
|
75
87
|
|
76
88
|
def items_druids
|
77
|
-
|
89
|
+
if purl_fetcher_client
|
90
|
+
# we don't need to memoize purl_fetcher_client, since it natively uses enumerables
|
91
|
+
purl_fetcher_client.druids_from_collection(namespaced_druid)
|
92
|
+
else
|
93
|
+
@items_druids ||= dor_fetcher_client.druid_array(dor_fetcher_client.get_collection(bare_druid, {}))
|
94
|
+
end
|
78
95
|
end
|
79
96
|
|
80
97
|
# given a druid, get its objectLabel from its purl page identityMetadata
|
@@ -167,4 +184,4 @@ module Harvestdor
|
|
167
184
|
druid.hash ^ indexer.hash
|
168
185
|
end
|
169
186
|
end
|
170
|
-
end
|
187
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Harvestdor::Indexer::PurlFetcher do
|
4
|
+
describe '#druids_from_collection' do
|
5
|
+
let(:client) { subject.send(:client) }
|
6
|
+
|
7
|
+
before do
|
8
|
+
allow(client).to receive(:get).with('/collections/druid:oo000oo0000/purls', page: 1, per_page: 100).and_return(
|
9
|
+
instance_double(Faraday::Response, body: { purls: [{ druid: 'druid:oo000oo0001' }], pages: { next_page: 2 } }.to_json)
|
10
|
+
)
|
11
|
+
allow(client).to receive(:get).with('/collections/druid:oo000oo0000/purls', page: 2, per_page: 100).and_return(
|
12
|
+
instance_double(Faraday::Response, body: { purls: [{ druid: 'druid:oo000oo0002' }], pages: { next_page: 3 } }.to_json)
|
13
|
+
)
|
14
|
+
allow(client).to receive(:get).with('/collections/druid:oo000oo0000/purls', page: 3, per_page: 100).and_return(
|
15
|
+
instance_double(Faraday::Response, body: { purls: [{ druid: 'druid:oo000oo0003' }], pages: { next_page: nil } }.to_json)
|
16
|
+
)
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'returns an enumerable of druids from a collection' do
|
20
|
+
results = subject.druids_from_collection('druid:oo000oo0000')
|
21
|
+
|
22
|
+
expect(results.to_a).to match_array %w(druid:oo000oo0001 druid:oo000oo0002 druid:oo000oo0003)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: harvestdor-indexer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naomi Dushay
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date:
|
13
|
+
date: 2017-11-10 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rsolr
|
@@ -110,6 +110,20 @@ dependencies:
|
|
110
110
|
- - ">="
|
111
111
|
- !ruby/object:Gem::Version
|
112
112
|
version: '0'
|
113
|
+
- !ruby/object:Gem::Dependency
|
114
|
+
name: faraday
|
115
|
+
requirement: !ruby/object:Gem::Requirement
|
116
|
+
requirements:
|
117
|
+
- - ">="
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: '0'
|
120
|
+
type: :runtime
|
121
|
+
prerelease: false
|
122
|
+
version_requirements: !ruby/object:Gem::Requirement
|
123
|
+
requirements:
|
124
|
+
- - ">="
|
125
|
+
- !ruby/object:Gem::Version
|
126
|
+
version: '0'
|
113
127
|
- !ruby/object:Gem::Dependency
|
114
128
|
name: confstruct
|
115
129
|
requirement: !ruby/object:Gem::Requirement
|
@@ -198,30 +212,30 @@ dependencies:
|
|
198
212
|
name: rubocop
|
199
213
|
requirement: !ruby/object:Gem::Requirement
|
200
214
|
requirements:
|
201
|
-
- -
|
215
|
+
- - '='
|
202
216
|
- !ruby/object:Gem::Version
|
203
|
-
version:
|
217
|
+
version: 0.42.0
|
204
218
|
type: :development
|
205
219
|
prerelease: false
|
206
220
|
version_requirements: !ruby/object:Gem::Requirement
|
207
221
|
requirements:
|
208
|
-
- -
|
222
|
+
- - '='
|
209
223
|
- !ruby/object:Gem::Version
|
210
|
-
version:
|
224
|
+
version: 0.42.0
|
211
225
|
- !ruby/object:Gem::Dependency
|
212
226
|
name: rubocop-rspec
|
213
227
|
requirement: !ruby/object:Gem::Requirement
|
214
228
|
requirements:
|
215
|
-
- -
|
229
|
+
- - '='
|
216
230
|
- !ruby/object:Gem::Version
|
217
|
-
version:
|
231
|
+
version: 1.5.1
|
218
232
|
type: :development
|
219
233
|
prerelease: false
|
220
234
|
version_requirements: !ruby/object:Gem::Requirement
|
221
235
|
requirements:
|
222
|
-
- -
|
236
|
+
- - '='
|
223
237
|
- !ruby/object:Gem::Version
|
224
|
-
version:
|
238
|
+
version: 1.5.1
|
225
239
|
- !ruby/object:Gem::Dependency
|
226
240
|
name: vcr
|
227
241
|
requirement: !ruby/object:Gem::Requirement
|
@@ -274,6 +288,7 @@ files:
|
|
274
288
|
- lib/harvestdor-indexer.rb
|
275
289
|
- lib/harvestdor/indexer.rb
|
276
290
|
- lib/harvestdor/indexer/metrics.rb
|
291
|
+
- lib/harvestdor/indexer/purl_fetcher.rb
|
277
292
|
- lib/harvestdor/indexer/resource.rb
|
278
293
|
- lib/harvestdor/indexer/solr.rb
|
279
294
|
- lib/harvestdor/indexer/version.rb
|
@@ -289,6 +304,7 @@ files:
|
|
289
304
|
- spec/unit/harvestdor-indexer-solr_spec.rb
|
290
305
|
- spec/unit/harvestdor-indexer_spec.rb
|
291
306
|
- spec/unit/harvestdor/indexer/metrics_spec.rb
|
307
|
+
- spec/unit/harvestdor/indexer/purl_fetcher_spec.rb
|
292
308
|
homepage: https://github.com/sul-dlss/harvestdor-indexer
|
293
309
|
licenses: []
|
294
310
|
metadata: {}
|
@@ -308,7 +324,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
308
324
|
version: '0'
|
309
325
|
requirements: []
|
310
326
|
rubyforge_project:
|
311
|
-
rubygems_version: 2.6.
|
327
|
+
rubygems_version: 2.6.11
|
312
328
|
signing_key:
|
313
329
|
specification_version: 4
|
314
330
|
summary: Harvest DOR object metadata and index it to Solr
|
@@ -325,3 +341,4 @@ test_files:
|
|
325
341
|
- spec/unit/harvestdor-indexer-solr_spec.rb
|
326
342
|
- spec/unit/harvestdor-indexer_spec.rb
|
327
343
|
- spec/unit/harvestdor/indexer/metrics_spec.rb
|
344
|
+
- spec/unit/harvestdor/indexer/purl_fetcher_spec.rb
|