purl_fetcher-client 0.2.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +27 -0
- data/lib/purl_fetcher/client/deletes_reader.rb +6 -3
- data/lib/purl_fetcher/client/public_xml_record.rb +64 -8
- data/lib/purl_fetcher/client/reader.rb +14 -4
- data/lib/purl_fetcher/client/version.rb +1 -1
- data/purl_fetcher-client.gemspec +3 -1
- metadata +41 -13
- data/.travis.yml +0 -7
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b80b63b29ffbed55055e58d2d063f4496f84015b1130a6908e82e4d91a18c94d
|
|
4
|
+
data.tar.gz: 3d9691d8e9939f17a8d4f3a66a03bb3644463a3ffcc9604b8558242614f4e0a6
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: '0484a2254521f201fed8d95cf65ab85a3310111c6be46c6d13ecb4e7c40cec3d807861b42c47dc389f34c242918733dce7c21b66beb872d062112bbfe8674dd0'
|
|
7
|
+
data.tar.gz: ff5cdc4fe8d78eedeba1594e8aa47e87bfb6186337d4f1042f928a56f0a6b854487e7acfba5398e306401a123cb710ae0ac4bc6b9b11b157a81d20550b789570
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [ master ]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [ master ]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
tests:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v2
|
|
14
|
+
- name: Set up Ruby
|
|
15
|
+
uses: ruby/setup-ruby@v1
|
|
16
|
+
with:
|
|
17
|
+
ruby-version: 3.0
|
|
18
|
+
- uses: actions/cache@v2
|
|
19
|
+
with:
|
|
20
|
+
path: vendor/bundle
|
|
21
|
+
key: ${{ runner.os }}-gems-${{ hashFiles('**/Gemfile.lock') }}
|
|
22
|
+
restore-keys: |
|
|
23
|
+
${{ runner.os }}-gems-
|
|
24
|
+
- name: Bundle install
|
|
25
|
+
run: bundle install
|
|
26
|
+
- name: Run tests
|
|
27
|
+
run: bundle exec rake
|
|
@@ -4,15 +4,18 @@ class PurlFetcher::Client::DeletesReader < PurlFetcher::Client::Reader
|
|
|
4
4
|
return to_enum(:each) unless block_given?
|
|
5
5
|
|
|
6
6
|
deletes(first_modified: first_modified).each do |change|
|
|
7
|
-
|
|
7
|
+
|
|
8
|
+
public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
|
|
9
|
+
|
|
10
|
+
yield public_xml, change, self
|
|
8
11
|
end
|
|
9
12
|
|
|
10
13
|
changes(first_modified: first_modified, target: target).each do |change|
|
|
11
|
-
|
|
14
|
+
public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
|
|
12
15
|
|
|
13
16
|
next unless target.nil? || (change['false_targets'] && change['false_targets'].include?(target)) || (settings['skip_if_catkey'] && record.catkey)
|
|
14
17
|
|
|
15
|
-
yield
|
|
18
|
+
yield public_xml, change, self
|
|
16
19
|
end
|
|
17
20
|
end
|
|
18
21
|
end
|
|
@@ -1,15 +1,19 @@
|
|
|
1
1
|
require 'nokogiri'
|
|
2
2
|
require 'stanford-mods'
|
|
3
|
+
require 'mods_display'
|
|
4
|
+
require 'dor/rights_auth'
|
|
3
5
|
|
|
4
6
|
module PurlFetcher::Client
|
|
5
7
|
class PublicXmlRecord
|
|
6
8
|
attr_reader :druid, :options
|
|
7
9
|
|
|
8
10
|
def self.fetch(url)
|
|
9
|
-
if defined?(
|
|
10
|
-
Manticore.get(url)
|
|
11
|
+
if defined?(JRUBY_VERSION)
|
|
12
|
+
response = Manticore.get(url)
|
|
13
|
+
response.body if response.code == 200
|
|
11
14
|
else
|
|
12
|
-
HTTP.get(url)
|
|
15
|
+
response = HTTP.get(url)
|
|
16
|
+
response.body if response.status.ok?
|
|
13
17
|
end
|
|
14
18
|
end
|
|
15
19
|
|
|
@@ -42,10 +46,18 @@ module PurlFetcher::Client
|
|
|
42
46
|
end
|
|
43
47
|
end
|
|
44
48
|
|
|
49
|
+
def mods_display
|
|
50
|
+
@mods_display ||= ModsDisplay::HTML.new(stanford_mods)
|
|
51
|
+
end
|
|
52
|
+
|
|
45
53
|
def public_xml
|
|
46
54
|
@public_xml ||= self.class.fetch(purl_base_url + "/#{druid}.xml")
|
|
47
55
|
end
|
|
48
56
|
|
|
57
|
+
def public_xml?
|
|
58
|
+
!!public_xml
|
|
59
|
+
end
|
|
60
|
+
|
|
49
61
|
def public_xml_doc
|
|
50
62
|
@public_xml_doc ||= Nokogiri::XML(public_xml)
|
|
51
63
|
end
|
|
@@ -54,7 +66,14 @@ module PurlFetcher::Client
|
|
|
54
66
|
@mods ||= if public_xml_doc.xpath('/publicObject/mods:mods', mods: 'http://www.loc.gov/mods/v3').any?
|
|
55
67
|
public_xml_doc.xpath('/publicObject/mods:mods', mods: 'http://www.loc.gov/mods/v3').first
|
|
56
68
|
else
|
|
57
|
-
|
|
69
|
+
if defined?(Honeybadger)
|
|
70
|
+
Honeybadger.notify(
|
|
71
|
+
'Unable to find MODS in the public xml; falling back to stand-along mods document',
|
|
72
|
+
context: { druid: druid }
|
|
73
|
+
)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
Nokogiri::XML(self.class.fetch(purl_base_url + "/#{druid}.mods"))
|
|
58
77
|
end
|
|
59
78
|
end
|
|
60
79
|
|
|
@@ -111,16 +130,22 @@ module PurlFetcher::Client
|
|
|
111
130
|
|
|
112
131
|
def collections
|
|
113
132
|
@collections ||= predicate_druids('isMemberOfCollection').map do |druid|
|
|
114
|
-
PublicXmlRecord.new(druid,
|
|
133
|
+
PublicXmlRecord.new(druid, options)
|
|
115
134
|
end
|
|
116
135
|
end
|
|
117
136
|
|
|
118
137
|
def constituents
|
|
119
138
|
@constituents ||= predicate_druids('isConstituentOf').map do |druid|
|
|
120
|
-
PublicXmlRecord.new(druid,
|
|
139
|
+
PublicXmlRecord.new(druid, options)
|
|
121
140
|
end
|
|
122
141
|
end
|
|
123
142
|
|
|
143
|
+
def items(&block)
|
|
144
|
+
return [] unless is_collection
|
|
145
|
+
|
|
146
|
+
purl_fetcher_client.collection_members(druid, &block)
|
|
147
|
+
end
|
|
148
|
+
|
|
124
149
|
# the thumbnail in publicXML properly URI encoded, including the slash separator
|
|
125
150
|
# @return [String] thumb filename with druid prepended, e.g. oo000oo0001%2Ffilename%20withspace.jp2
|
|
126
151
|
def encoded_thumb
|
|
@@ -128,7 +153,7 @@ module PurlFetcher::Client
|
|
|
128
153
|
return unless thumb
|
|
129
154
|
thumb_druid=thumb.split('/').first # the druid (before the first slash)
|
|
130
155
|
thumb_filename=thumb.split(/[a-zA-Z]{2}[0-9]{3}[a-zA-Z]{2}[0-9]{4}[\/]/).last # everything after the druid
|
|
131
|
-
"#{thumb_druid}%2F#{
|
|
156
|
+
"#{thumb_druid}%2F#{ERB::Util.url_encode(thumb_filename)}"
|
|
132
157
|
end
|
|
133
158
|
|
|
134
159
|
# get the druids from predicate relationships in rels-ext from public_xml
|
|
@@ -142,8 +167,39 @@ module PurlFetcher::Client
|
|
|
142
167
|
end
|
|
143
168
|
end
|
|
144
169
|
|
|
170
|
+
def druid_tree
|
|
171
|
+
druid.match(/(..)(...)(..)(....)/).captures.join('/')
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def rights_xml
|
|
175
|
+
@rights_xml ||= public_xml_doc.xpath('//rightsMetadata').to_s
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def rights
|
|
179
|
+
@rights ||= ::Dor::RightsAuth.parse(rights_xml)
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def public?
|
|
183
|
+
rights.world_unrestricted?
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def stanford_only?
|
|
187
|
+
rights.stanford_only_unrestricted?
|
|
188
|
+
end
|
|
189
|
+
|
|
145
190
|
def purl_base_url
|
|
146
|
-
options[:purl_url] || 'https://purl.stanford.edu'
|
|
191
|
+
options[:purl_url]&.sub(%r{/$}, '') || 'https://purl.stanford.edu'
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def purl_fetcher_api_endpoint
|
|
195
|
+
options[:purl_fetcher_url] || 'https://purl-fetcher.stanford.edu'
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def purl_fetcher_client
|
|
199
|
+
@purl_fetcher_client ||= PurlFetcher::Client::Reader.new(
|
|
200
|
+
nil,
|
|
201
|
+
'purl_fetcher.api_endpoint' => purl_fetcher_api_endpoint
|
|
202
|
+
)
|
|
147
203
|
end
|
|
148
204
|
end
|
|
149
205
|
end
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
class PurlFetcher::Client::Reader
|
|
2
2
|
include Enumerable
|
|
3
|
-
attr_reader :input_stream, :settings
|
|
3
|
+
attr_reader :input_stream, :settings, :range
|
|
4
4
|
|
|
5
5
|
def initialize(input_stream, settings = {})
|
|
6
6
|
@settings = settings
|
|
7
7
|
@input_stream = input_stream
|
|
8
|
+
@range = {}
|
|
8
9
|
end
|
|
9
10
|
|
|
10
11
|
def each
|
|
@@ -15,7 +16,15 @@ class PurlFetcher::Client::Reader
|
|
|
15
16
|
|
|
16
17
|
public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
|
|
17
18
|
|
|
18
|
-
yield public_xml, change,
|
|
19
|
+
yield public_xml, change, self
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def collection_members(druid)
|
|
24
|
+
return to_enum(:collection_members, druid) unless block_given?
|
|
25
|
+
|
|
26
|
+
paginated_get("/collections/druid:#{druid.sub(/^druid:/, '')}/purls", 'purls').each do |obj, _meta|
|
|
27
|
+
yield PurlFetcher::Client::PublicXmlRecord.new(obj['druid'].sub('druid:', ''), settings), obj, self
|
|
19
28
|
end
|
|
20
29
|
end
|
|
21
30
|
|
|
@@ -68,18 +77,19 @@ class PurlFetcher::Client::Reader
|
|
|
68
77
|
def paginated_get(path, accessor, options = {})
|
|
69
78
|
Enumerator.new do |yielder|
|
|
70
79
|
params = options.dup
|
|
71
|
-
per_page = params.delete(:per_page) {
|
|
80
|
+
per_page = params.delete(:per_page) { 1000 }
|
|
72
81
|
page = params.delete(:page) { 1 }
|
|
73
82
|
max = params.delete(:max) { 1_000_000 }
|
|
74
83
|
total = 0
|
|
75
84
|
|
|
76
85
|
loop do
|
|
77
86
|
data = get(path, { per_page: per_page, page: page }.merge(params))
|
|
87
|
+
@range = data['range']
|
|
78
88
|
|
|
79
89
|
total += data[accessor].length
|
|
80
90
|
|
|
81
91
|
data[accessor].each do |element|
|
|
82
|
-
yielder.yield element,
|
|
92
|
+
yielder.yield element, self
|
|
83
93
|
end
|
|
84
94
|
|
|
85
95
|
page = data['pages']['next_page']
|
data/purl_fetcher-client.gemspec
CHANGED
|
@@ -23,8 +23,10 @@ Gem::Specification.new do |spec|
|
|
|
23
23
|
spec.add_dependency 'http'
|
|
24
24
|
spec.add_dependency 'nokogiri'
|
|
25
25
|
spec.add_dependency 'stanford-mods'
|
|
26
|
+
spec.add_dependency 'dor-rights-auth'
|
|
27
|
+
spec.add_dependency 'mods_display', '>= 1.0.0.alpha1'
|
|
26
28
|
|
|
27
29
|
spec.add_development_dependency "bundler"
|
|
28
|
-
spec.add_development_dependency "rake"
|
|
30
|
+
spec.add_development_dependency "rake"
|
|
29
31
|
spec.add_development_dependency "rspec", "~> 3.0"
|
|
30
32
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: purl_fetcher-client
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.5.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Chris Beer
|
|
8
|
-
autorequire:
|
|
8
|
+
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2022-01-13 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: http
|
|
@@ -52,6 +52,34 @@ dependencies:
|
|
|
52
52
|
- - ">="
|
|
53
53
|
- !ruby/object:Gem::Version
|
|
54
54
|
version: '0'
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: dor-rights-auth
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - ">="
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '0'
|
|
62
|
+
type: :runtime
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - ">="
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: '0'
|
|
69
|
+
- !ruby/object:Gem::Dependency
|
|
70
|
+
name: mods_display
|
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
|
72
|
+
requirements:
|
|
73
|
+
- - ">="
|
|
74
|
+
- !ruby/object:Gem::Version
|
|
75
|
+
version: 1.0.0.alpha1
|
|
76
|
+
type: :runtime
|
|
77
|
+
prerelease: false
|
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
79
|
+
requirements:
|
|
80
|
+
- - ">="
|
|
81
|
+
- !ruby/object:Gem::Version
|
|
82
|
+
version: 1.0.0.alpha1
|
|
55
83
|
- !ruby/object:Gem::Dependency
|
|
56
84
|
name: bundler
|
|
57
85
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -70,16 +98,16 @@ dependencies:
|
|
|
70
98
|
name: rake
|
|
71
99
|
requirement: !ruby/object:Gem::Requirement
|
|
72
100
|
requirements:
|
|
73
|
-
- - "
|
|
101
|
+
- - ">="
|
|
74
102
|
- !ruby/object:Gem::Version
|
|
75
|
-
version: '
|
|
103
|
+
version: '0'
|
|
76
104
|
type: :development
|
|
77
105
|
prerelease: false
|
|
78
106
|
version_requirements: !ruby/object:Gem::Requirement
|
|
79
107
|
requirements:
|
|
80
|
-
- - "
|
|
108
|
+
- - ">="
|
|
81
109
|
- !ruby/object:Gem::Version
|
|
82
|
-
version: '
|
|
110
|
+
version: '0'
|
|
83
111
|
- !ruby/object:Gem::Dependency
|
|
84
112
|
name: rspec
|
|
85
113
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -94,16 +122,16 @@ dependencies:
|
|
|
94
122
|
- - "~>"
|
|
95
123
|
- !ruby/object:Gem::Version
|
|
96
124
|
version: '3.0'
|
|
97
|
-
description:
|
|
125
|
+
description:
|
|
98
126
|
email:
|
|
99
127
|
- cabeer@stanford.edu
|
|
100
128
|
executables: []
|
|
101
129
|
extensions: []
|
|
102
130
|
extra_rdoc_files: []
|
|
103
131
|
files:
|
|
132
|
+
- ".github/workflows/ruby.yml"
|
|
104
133
|
- ".gitignore"
|
|
105
134
|
- ".rspec"
|
|
106
|
-
- ".travis.yml"
|
|
107
135
|
- CODE_OF_CONDUCT.md
|
|
108
136
|
- Gemfile
|
|
109
137
|
- README.md
|
|
@@ -116,10 +144,10 @@ files:
|
|
|
116
144
|
- lib/purl_fetcher/client/reader.rb
|
|
117
145
|
- lib/purl_fetcher/client/version.rb
|
|
118
146
|
- purl_fetcher-client.gemspec
|
|
119
|
-
homepage:
|
|
147
|
+
homepage:
|
|
120
148
|
licenses: []
|
|
121
149
|
metadata: {}
|
|
122
|
-
post_install_message:
|
|
150
|
+
post_install_message:
|
|
123
151
|
rdoc_options: []
|
|
124
152
|
require_paths:
|
|
125
153
|
- lib
|
|
@@ -134,8 +162,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
134
162
|
- !ruby/object:Gem::Version
|
|
135
163
|
version: '0'
|
|
136
164
|
requirements: []
|
|
137
|
-
rubygems_version: 3.
|
|
138
|
-
signing_key:
|
|
165
|
+
rubygems_version: 3.2.32
|
|
166
|
+
signing_key:
|
|
139
167
|
specification_version: 4
|
|
140
168
|
summary: Traject-compatible reader implementation for streaming data from purl-fetcher
|
|
141
169
|
test_files: []
|