harvestdor-indexer 2.4.0 → 2.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/harvestdor-indexer.gemspec +3 -2
- data/lib/harvestdor/indexer.rb +8 -1
- data/lib/harvestdor/indexer/purl_fetcher.rb +37 -0
- data/lib/harvestdor/indexer/resource.rb +20 -3
- data/lib/harvestdor/indexer/version.rb +1 -1
- data/spec/unit/harvestdor/indexer/purl_fetcher_spec.rb +25 -0
- metadata +28 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7c1cecc67cfdfbd0f57a40c1bc667b775143d237
|
4
|
+
data.tar.gz: 7a0452f584a683dd7f9146b5e1d165fa7c276341
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ca9a9d59768d8125528e8770a0251a6b8b9480833264046b462ef7e07fb72ca4fe27ab490e35f00ac08fba14e895506e9d3a55aaa893e2b411f4924e78461997
|
7
|
+
data.tar.gz: 7238bca995fc84045046e3891cd3c45f02e49eccf18fda48622811cd2d76ae53db445dc265087e1d79acc258c958ce05eaf0da0697f49c8c30865cd74af3ac8a
|
data/.travis.yml
CHANGED
data/harvestdor-indexer.gemspec
CHANGED
@@ -24,6 +24,7 @@ Gem::Specification.new do |gem|
|
|
24
24
|
gem.add_dependency 'dor-fetcher', '=1.0.5'
|
25
25
|
gem.add_dependency 'activesupport'
|
26
26
|
gem.add_dependency 'parallel'
|
27
|
+
gem.add_dependency 'faraday'
|
27
28
|
|
28
29
|
# Runtime dependencies
|
29
30
|
gem.add_runtime_dependency 'confstruct'
|
@@ -36,8 +37,8 @@ Gem::Specification.new do |gem|
|
|
36
37
|
# tests
|
37
38
|
gem.add_development_dependency 'rspec', '~> 3.0'
|
38
39
|
gem.add_development_dependency 'coveralls'
|
39
|
-
gem.add_development_dependency 'rubocop'
|
40
|
-
gem.add_development_dependency 'rubocop-rspec'
|
40
|
+
gem.add_development_dependency 'rubocop', '0.42.0'
|
41
|
+
gem.add_development_dependency 'rubocop-rspec', '1.5.1'
|
41
42
|
gem.add_development_dependency 'vcr'
|
42
43
|
gem.add_development_dependency 'webmock'
|
43
44
|
|
data/lib/harvestdor/indexer.rb
CHANGED
@@ -20,6 +20,7 @@ module Harvestdor
|
|
20
20
|
# Base class to harvest from DOR via harvestdor gem and then index
|
21
21
|
class Indexer
|
22
22
|
require 'harvestdor/indexer/metrics'
|
23
|
+
require 'harvestdor/indexer/purl_fetcher'
|
23
24
|
require 'harvestdor/indexer/resource'
|
24
25
|
require 'harvestdor/indexer/solr'
|
25
26
|
|
@@ -142,6 +143,12 @@ module Harvestdor
|
|
142
143
|
@dor_fetcher_client ||= DorFetcher::Client.new(config.dor_fetcher)
|
143
144
|
end
|
144
145
|
|
146
|
+
def purl_fetcher_client
|
147
|
+
return unless config.purl_fetcher
|
148
|
+
|
149
|
+
@purl_fetcher_client ||= Harvestdor::Indexer::PurlFetcher.new(config.purl_fetcher)
|
150
|
+
end
|
151
|
+
|
145
152
|
def solr
|
146
153
|
@solr ||= Harvestdor::Indexer::Solr.new self, config.solr.to_hash
|
147
154
|
end
|
@@ -167,4 +174,4 @@ module Harvestdor
|
|
167
174
|
raise msg
|
168
175
|
end
|
169
176
|
end # Indexer class
|
170
|
-
end # Harvestdor module
|
177
|
+
end # Harvestdor module
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Harvestdor
|
2
|
+
class Indexer
|
3
|
+
# Client for working with the PURL Fetcher API
|
4
|
+
class PurlFetcher
|
5
|
+
attr_reader :config
|
6
|
+
|
7
|
+
def initialize(config = {})
|
8
|
+
@config = config
|
9
|
+
end
|
10
|
+
|
11
|
+
def druids_from_collection(collection)
|
12
|
+
return to_enum(:druids_from_collection, collection) unless block_given?
|
13
|
+
|
14
|
+
page = 1
|
15
|
+
|
16
|
+
loop do
|
17
|
+
response = client.get("/collections/#{collection}/purls", page: page, per_page: 100)
|
18
|
+
data = JSON.parse(response.body)
|
19
|
+
|
20
|
+
break if data['purls'].blank?
|
21
|
+
|
22
|
+
data['purls'].each { |d| yield d['druid'] }
|
23
|
+
|
24
|
+
page += 1
|
25
|
+
|
26
|
+
break if data['pages']['next_page'].nil?
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def client
|
33
|
+
@client ||= Faraday.new(config)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -14,6 +14,14 @@ module Harvestdor
|
|
14
14
|
@options = options
|
15
15
|
end
|
16
16
|
|
17
|
+
def namespaced_druid
|
18
|
+
if druid =~ /^druid:/
|
19
|
+
druid
|
20
|
+
else
|
21
|
+
"druid:#{druid}"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
17
25
|
# @return [String] string of form oo123oo1234
|
18
26
|
def bare_druid
|
19
27
|
@bare_druid ||= druid.gsub('druid:', '')
|
@@ -29,6 +37,10 @@ module Harvestdor
|
|
29
37
|
indexer.dor_fetcher_client
|
30
38
|
end
|
31
39
|
|
40
|
+
def purl_fetcher_client
|
41
|
+
indexer.purl_fetcher_client
|
42
|
+
end
|
43
|
+
|
32
44
|
##
|
33
45
|
# Get the logger
|
34
46
|
def logger
|
@@ -66,7 +78,7 @@ module Harvestdor
|
|
66
78
|
return [] unless collection?
|
67
79
|
|
68
80
|
# return an enumerator, with an estimated size of the collection
|
69
|
-
return to_enum(:items) { items_druids.
|
81
|
+
return to_enum(:items) { items_druids.count } unless block_given?
|
70
82
|
|
71
83
|
items_druids.each do |x|
|
72
84
|
yield Harvestdor::Indexer::Resource.new(indexer, x)
|
@@ -74,7 +86,12 @@ module Harvestdor
|
|
74
86
|
end
|
75
87
|
|
76
88
|
def items_druids
|
77
|
-
|
89
|
+
if purl_fetcher_client
|
90
|
+
# we don't need to memoize purl_fetcher_client, since it natively uses enumerables
|
91
|
+
purl_fetcher_client.druids_from_collection(namespaced_druid)
|
92
|
+
else
|
93
|
+
@items_druids ||= dor_fetcher_client.druid_array(dor_fetcher_client.get_collection(bare_druid, {}))
|
94
|
+
end
|
78
95
|
end
|
79
96
|
|
80
97
|
# given a druid, get its objectLabel from its purl page identityMetadata
|
@@ -167,4 +184,4 @@ module Harvestdor
|
|
167
184
|
druid.hash ^ indexer.hash
|
168
185
|
end
|
169
186
|
end
|
170
|
-
end
|
187
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Harvestdor::Indexer::PurlFetcher do
|
4
|
+
describe '#druids_from_collection' do
|
5
|
+
let(:client) { subject.send(:client) }
|
6
|
+
|
7
|
+
before do
|
8
|
+
allow(client).to receive(:get).with('/collections/druid:oo000oo0000/purls', page: 1, per_page: 100).and_return(
|
9
|
+
instance_double(Faraday::Response, body: { purls: [{ druid: 'druid:oo000oo0001' }], pages: { next_page: 2 } }.to_json)
|
10
|
+
)
|
11
|
+
allow(client).to receive(:get).with('/collections/druid:oo000oo0000/purls', page: 2, per_page: 100).and_return(
|
12
|
+
instance_double(Faraday::Response, body: { purls: [{ druid: 'druid:oo000oo0002' }], pages: { next_page: 3 } }.to_json)
|
13
|
+
)
|
14
|
+
allow(client).to receive(:get).with('/collections/druid:oo000oo0000/purls', page: 3, per_page: 100).and_return(
|
15
|
+
instance_double(Faraday::Response, body: { purls: [{ druid: 'druid:oo000oo0003' }], pages: { next_page: nil } }.to_json)
|
16
|
+
)
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'returns an enumerable of druids from a collection' do
|
20
|
+
results = subject.druids_from_collection('druid:oo000oo0000')
|
21
|
+
|
22
|
+
expect(results.to_a).to match_array %w(druid:oo000oo0001 druid:oo000oo0002 druid:oo000oo0003)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: harvestdor-indexer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naomi Dushay
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date:
|
13
|
+
date: 2017-11-10 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rsolr
|
@@ -110,6 +110,20 @@ dependencies:
|
|
110
110
|
- - ">="
|
111
111
|
- !ruby/object:Gem::Version
|
112
112
|
version: '0'
|
113
|
+
- !ruby/object:Gem::Dependency
|
114
|
+
name: faraday
|
115
|
+
requirement: !ruby/object:Gem::Requirement
|
116
|
+
requirements:
|
117
|
+
- - ">="
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: '0'
|
120
|
+
type: :runtime
|
121
|
+
prerelease: false
|
122
|
+
version_requirements: !ruby/object:Gem::Requirement
|
123
|
+
requirements:
|
124
|
+
- - ">="
|
125
|
+
- !ruby/object:Gem::Version
|
126
|
+
version: '0'
|
113
127
|
- !ruby/object:Gem::Dependency
|
114
128
|
name: confstruct
|
115
129
|
requirement: !ruby/object:Gem::Requirement
|
@@ -198,30 +212,30 @@ dependencies:
|
|
198
212
|
name: rubocop
|
199
213
|
requirement: !ruby/object:Gem::Requirement
|
200
214
|
requirements:
|
201
|
-
- -
|
215
|
+
- - '='
|
202
216
|
- !ruby/object:Gem::Version
|
203
|
-
version:
|
217
|
+
version: 0.42.0
|
204
218
|
type: :development
|
205
219
|
prerelease: false
|
206
220
|
version_requirements: !ruby/object:Gem::Requirement
|
207
221
|
requirements:
|
208
|
-
- -
|
222
|
+
- - '='
|
209
223
|
- !ruby/object:Gem::Version
|
210
|
-
version:
|
224
|
+
version: 0.42.0
|
211
225
|
- !ruby/object:Gem::Dependency
|
212
226
|
name: rubocop-rspec
|
213
227
|
requirement: !ruby/object:Gem::Requirement
|
214
228
|
requirements:
|
215
|
-
- -
|
229
|
+
- - '='
|
216
230
|
- !ruby/object:Gem::Version
|
217
|
-
version:
|
231
|
+
version: 1.5.1
|
218
232
|
type: :development
|
219
233
|
prerelease: false
|
220
234
|
version_requirements: !ruby/object:Gem::Requirement
|
221
235
|
requirements:
|
222
|
-
- -
|
236
|
+
- - '='
|
223
237
|
- !ruby/object:Gem::Version
|
224
|
-
version:
|
238
|
+
version: 1.5.1
|
225
239
|
- !ruby/object:Gem::Dependency
|
226
240
|
name: vcr
|
227
241
|
requirement: !ruby/object:Gem::Requirement
|
@@ -274,6 +288,7 @@ files:
|
|
274
288
|
- lib/harvestdor-indexer.rb
|
275
289
|
- lib/harvestdor/indexer.rb
|
276
290
|
- lib/harvestdor/indexer/metrics.rb
|
291
|
+
- lib/harvestdor/indexer/purl_fetcher.rb
|
277
292
|
- lib/harvestdor/indexer/resource.rb
|
278
293
|
- lib/harvestdor/indexer/solr.rb
|
279
294
|
- lib/harvestdor/indexer/version.rb
|
@@ -289,6 +304,7 @@ files:
|
|
289
304
|
- spec/unit/harvestdor-indexer-solr_spec.rb
|
290
305
|
- spec/unit/harvestdor-indexer_spec.rb
|
291
306
|
- spec/unit/harvestdor/indexer/metrics_spec.rb
|
307
|
+
- spec/unit/harvestdor/indexer/purl_fetcher_spec.rb
|
292
308
|
homepage: https://github.com/sul-dlss/harvestdor-indexer
|
293
309
|
licenses: []
|
294
310
|
metadata: {}
|
@@ -308,7 +324,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
308
324
|
version: '0'
|
309
325
|
requirements: []
|
310
326
|
rubyforge_project:
|
311
|
-
rubygems_version: 2.6.
|
327
|
+
rubygems_version: 2.6.11
|
312
328
|
signing_key:
|
313
329
|
specification_version: 4
|
314
330
|
summary: Harvest DOR object metadata and index it to Solr
|
@@ -325,3 +341,4 @@ test_files:
|
|
325
341
|
- spec/unit/harvestdor-indexer-solr_spec.rb
|
326
342
|
- spec/unit/harvestdor-indexer_spec.rb
|
327
343
|
- spec/unit/harvestdor/indexer/metrics_spec.rb
|
344
|
+
- spec/unit/harvestdor/indexer/purl_fetcher_spec.rb
|