purl_fetcher-client 0.2.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6ec6b5027a5b19369098f11455f0d3244ff3758d5469bd91998b63f2f80ccf80
4
- data.tar.gz: fbfa4939cba5c30ea7f31d98923e6a48114dc0db5e4d29494a1062e5d1199e64
3
+ metadata.gz: b80b63b29ffbed55055e58d2d063f4496f84015b1130a6908e82e4d91a18c94d
4
+ data.tar.gz: 3d9691d8e9939f17a8d4f3a66a03bb3644463a3ffcc9604b8558242614f4e0a6
5
5
  SHA512:
6
- metadata.gz: 5d6a1d548fc27b0eb7496f2f3603ef10acb22f7931e32899442b1f027d36a9b4b6d8f18bc517dd0d220cb9dff4cc3a56c79b58d9441d9a540d6bafc6d3f821ba
7
- data.tar.gz: f7aa278c16d5d1da2b38fb9ea9f11adbe4fe3cd4d2fc262b8f00cbfd00ee1a6ebdd933a2493c069904fc82bcd0e63feb0ad7598f76315838bbeaf9dd002f8a95
6
+ metadata.gz: '0484a2254521f201fed8d95cf65ab85a3310111c6be46c6d13ecb4e7c40cec3d807861b42c47dc389f34c242918733dce7c21b66beb872d062112bbfe8674dd0'
7
+ data.tar.gz: ff5cdc4fe8d78eedeba1594e8aa47e87bfb6186337d4f1042f928a56f0a6b854487e7acfba5398e306401a123cb710ae0ac4bc6b9b11b157a81d20550b789570
@@ -0,0 +1,27 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [ master ]
6
+ pull_request:
7
+ branches: [ master ]
8
+
9
+ jobs:
10
+ tests:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v2
14
+ - name: Set up Ruby
15
+ uses: ruby/setup-ruby@v1
16
+ with:
17
+ ruby-version: 3.0
18
+ - uses: actions/cache@v2
19
+ with:
20
+ path: vendor/bundle
21
+ key: ${{ runner.os }}-gems-${{ hashFiles('**/Gemfile.lock') }}
22
+ restore-keys: |
23
+ ${{ runner.os }}-gems-
24
+ - name: Bundle install
25
+ run: bundle install
26
+ - name: Run tests
27
+ run: bundle exec rake
@@ -4,15 +4,18 @@ class PurlFetcher::Client::DeletesReader < PurlFetcher::Client::Reader
4
4
  return to_enum(:each) unless block_given?
5
5
 
6
6
  deletes(first_modified: first_modified).each do |change|
7
- yield PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
7
+
8
+ public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
9
+
10
+ yield public_xml, change, self
8
11
  end
9
12
 
10
13
  changes(first_modified: first_modified, target: target).each do |change|
11
- record = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
14
+ public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
12
15
 
13
16
  next unless target.nil? || (change['false_targets'] && change['false_targets'].include?(target)) || (settings['skip_if_catkey'] && record.catkey)
14
17
 
15
- yield record
18
+ yield public_xml, change, self
16
19
  end
17
20
  end
18
21
  end
@@ -1,15 +1,19 @@
1
1
  require 'nokogiri'
2
2
  require 'stanford-mods'
3
+ require 'mods_display'
4
+ require 'dor/rights_auth'
3
5
 
4
6
  module PurlFetcher::Client
5
7
  class PublicXmlRecord
6
8
  attr_reader :druid, :options
7
9
 
8
10
  def self.fetch(url)
9
- if defined?(Manticore)
10
- Manticore.get(url).body
11
+ if defined?(JRUBY_VERSION)
12
+ response = Manticore.get(url)
13
+ response.body if response.code == 200
11
14
  else
12
- HTTP.get(url).body
15
+ response = HTTP.get(url)
16
+ response.body if response.status.ok?
13
17
  end
14
18
  end
15
19
 
@@ -42,10 +46,18 @@ module PurlFetcher::Client
42
46
  end
43
47
  end
44
48
 
49
+ def mods_display
50
+ @mods_display ||= ModsDisplay::HTML.new(stanford_mods)
51
+ end
52
+
45
53
  def public_xml
46
54
  @public_xml ||= self.class.fetch(purl_base_url + "/#{druid}.xml")
47
55
  end
48
56
 
57
+ def public_xml?
58
+ !!public_xml
59
+ end
60
+
49
61
  def public_xml_doc
50
62
  @public_xml_doc ||= Nokogiri::XML(public_xml)
51
63
  end
@@ -54,7 +66,14 @@ module PurlFetcher::Client
54
66
  @mods ||= if public_xml_doc.xpath('/publicObject/mods:mods', mods: 'http://www.loc.gov/mods/v3').any?
55
67
  public_xml_doc.xpath('/publicObject/mods:mods', mods: 'http://www.loc.gov/mods/v3').first
56
68
  else
57
- self.class.fetch(purl_base_url + "#{druid}.mods")
69
+ if defined?(Honeybadger)
70
+ Honeybadger.notify(
71
+ 'Unable to find MODS in the public xml; falling back to stand-along mods document',
72
+ context: { druid: druid }
73
+ )
74
+ end
75
+
76
+ Nokogiri::XML(self.class.fetch(purl_base_url + "/#{druid}.mods"))
58
77
  end
59
78
  end
60
79
 
@@ -111,16 +130,22 @@ module PurlFetcher::Client
111
130
 
112
131
  def collections
113
132
  @collections ||= predicate_druids('isMemberOfCollection').map do |druid|
114
- PublicXmlRecord.new(druid, settings)
133
+ PublicXmlRecord.new(druid, options)
115
134
  end
116
135
  end
117
136
 
118
137
  def constituents
119
138
  @constituents ||= predicate_druids('isConstituentOf').map do |druid|
120
- PublicXmlRecord.new(druid, settings)
139
+ PublicXmlRecord.new(druid, options)
121
140
  end
122
141
  end
123
142
 
143
+ def items(&block)
144
+ return [] unless is_collection
145
+
146
+ purl_fetcher_client.collection_members(druid, &block)
147
+ end
148
+
124
149
  # the thumbnail in publicXML properly URI encoded, including the slash separator
125
150
  # @return [String] thumb filename with druid prepended, e.g. oo000oo0001%2Ffilename%20withspace.jp2
126
151
  def encoded_thumb
@@ -128,7 +153,7 @@ module PurlFetcher::Client
128
153
  return unless thumb
129
154
  thumb_druid=thumb.split('/').first # the druid (before the first slash)
130
155
  thumb_filename=thumb.split(/[a-zA-Z]{2}[0-9]{3}[a-zA-Z]{2}[0-9]{4}[\/]/).last # everything after the druid
131
- "#{thumb_druid}%2F#{URI.escape(thumb_filename)}"
156
+ "#{thumb_druid}%2F#{ERB::Util.url_encode(thumb_filename)}"
132
157
  end
133
158
 
134
159
  # get the druids from predicate relationships in rels-ext from public_xml
@@ -142,8 +167,39 @@ module PurlFetcher::Client
142
167
  end
143
168
  end
144
169
 
170
+ def druid_tree
171
+ druid.match(/(..)(...)(..)(....)/).captures.join('/')
172
+ end
173
+
174
+ def rights_xml
175
+ @rights_xml ||= public_xml_doc.xpath('//rightsMetadata').to_s
176
+ end
177
+
178
+ def rights
179
+ @rights ||= ::Dor::RightsAuth.parse(rights_xml)
180
+ end
181
+
182
+ def public?
183
+ rights.world_unrestricted?
184
+ end
185
+
186
+ def stanford_only?
187
+ rights.stanford_only_unrestricted?
188
+ end
189
+
145
190
  def purl_base_url
146
- options[:purl_url] || 'https://purl.stanford.edu'
191
+ options[:purl_url]&.sub(%r{/$}, '') || 'https://purl.stanford.edu'
192
+ end
193
+
194
+ def purl_fetcher_api_endpoint
195
+ options[:purl_fetcher_url] || 'https://purl-fetcher.stanford.edu'
196
+ end
197
+
198
+ def purl_fetcher_client
199
+ @purl_fetcher_client ||= PurlFetcher::Client::Reader.new(
200
+ nil,
201
+ 'purl_fetcher.api_endpoint' => purl_fetcher_api_endpoint
202
+ )
147
203
  end
148
204
  end
149
205
  end
@@ -1,10 +1,11 @@
1
1
  class PurlFetcher::Client::Reader
2
2
  include Enumerable
3
- attr_reader :input_stream, :settings
3
+ attr_reader :input_stream, :settings, :range
4
4
 
5
5
  def initialize(input_stream, settings = {})
6
6
  @settings = settings
7
7
  @input_stream = input_stream
8
+ @range = {}
8
9
  end
9
10
 
10
11
  def each
@@ -15,7 +16,15 @@ class PurlFetcher::Client::Reader
15
16
 
16
17
  public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
17
18
 
18
- yield public_xml, change, meta
19
+ yield public_xml, change, self
20
+ end
21
+ end
22
+
23
+ def collection_members(druid)
24
+ return to_enum(:collection_members, druid) unless block_given?
25
+
26
+ paginated_get("/collections/druid:#{druid.sub(/^druid:/, '')}/purls", 'purls').each do |obj, _meta|
27
+ yield PurlFetcher::Client::PublicXmlRecord.new(obj['druid'].sub('druid:', ''), settings), obj, self
19
28
  end
20
29
  end
21
30
 
@@ -68,18 +77,19 @@ class PurlFetcher::Client::Reader
68
77
  def paginated_get(path, accessor, options = {})
69
78
  Enumerator.new do |yielder|
70
79
  params = options.dup
71
- per_page = params.delete(:per_page) { 100 }
80
+ per_page = params.delete(:per_page) { 1000 }
72
81
  page = params.delete(:page) { 1 }
73
82
  max = params.delete(:max) { 1_000_000 }
74
83
  total = 0
75
84
 
76
85
  loop do
77
86
  data = get(path, { per_page: per_page, page: page }.merge(params))
87
+ @range = data['range']
78
88
 
79
89
  total += data[accessor].length
80
90
 
81
91
  data[accessor].each do |element|
82
- yielder.yield element, { 'range' => data['range'] || {} }
92
+ yielder.yield element, self
83
93
  end
84
94
 
85
95
  page = data['pages']['next_page']
@@ -1,5 +1,5 @@
1
1
  module PurlFetcher
2
2
  module Client
3
- VERSION = "0.2.0"
3
+ VERSION = "0.5.0"
4
4
  end
5
5
  end
@@ -23,8 +23,10 @@ Gem::Specification.new do |spec|
23
23
  spec.add_dependency 'http'
24
24
  spec.add_dependency 'nokogiri'
25
25
  spec.add_dependency 'stanford-mods'
26
+ spec.add_dependency 'dor-rights-auth'
27
+ spec.add_dependency 'mods_display', '>= 1.0.0.alpha1'
26
28
 
27
29
  spec.add_development_dependency "bundler"
28
- spec.add_development_dependency "rake", "~> 10.0"
30
+ spec.add_development_dependency "rake"
29
31
  spec.add_development_dependency "rspec", "~> 3.0"
30
32
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: purl_fetcher-client
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Beer
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-05-29 00:00:00.000000000 Z
11
+ date: 2022-01-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: http
@@ -52,6 +52,34 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: dor-rights-auth
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: mods_display
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: 1.0.0.alpha1
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: 1.0.0.alpha1
55
83
  - !ruby/object:Gem::Dependency
56
84
  name: bundler
57
85
  requirement: !ruby/object:Gem::Requirement
@@ -70,16 +98,16 @@ dependencies:
70
98
  name: rake
71
99
  requirement: !ruby/object:Gem::Requirement
72
100
  requirements:
73
- - - "~>"
101
+ - - ">="
74
102
  - !ruby/object:Gem::Version
75
- version: '10.0'
103
+ version: '0'
76
104
  type: :development
77
105
  prerelease: false
78
106
  version_requirements: !ruby/object:Gem::Requirement
79
107
  requirements:
80
- - - "~>"
108
+ - - ">="
81
109
  - !ruby/object:Gem::Version
82
- version: '10.0'
110
+ version: '0'
83
111
  - !ruby/object:Gem::Dependency
84
112
  name: rspec
85
113
  requirement: !ruby/object:Gem::Requirement
@@ -94,16 +122,16 @@ dependencies:
94
122
  - - "~>"
95
123
  - !ruby/object:Gem::Version
96
124
  version: '3.0'
97
- description:
125
+ description:
98
126
  email:
99
127
  - cabeer@stanford.edu
100
128
  executables: []
101
129
  extensions: []
102
130
  extra_rdoc_files: []
103
131
  files:
132
+ - ".github/workflows/ruby.yml"
104
133
  - ".gitignore"
105
134
  - ".rspec"
106
- - ".travis.yml"
107
135
  - CODE_OF_CONDUCT.md
108
136
  - Gemfile
109
137
  - README.md
@@ -116,10 +144,10 @@ files:
116
144
  - lib/purl_fetcher/client/reader.rb
117
145
  - lib/purl_fetcher/client/version.rb
118
146
  - purl_fetcher-client.gemspec
119
- homepage:
147
+ homepage:
120
148
  licenses: []
121
149
  metadata: {}
122
- post_install_message:
150
+ post_install_message:
123
151
  rdoc_options: []
124
152
  require_paths:
125
153
  - lib
@@ -134,8 +162,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
134
162
  - !ruby/object:Gem::Version
135
163
  version: '0'
136
164
  requirements: []
137
- rubygems_version: 3.0.3
138
- signing_key:
165
+ rubygems_version: 3.2.32
166
+ signing_key:
139
167
  specification_version: 4
140
168
  summary: Traject-compatible reader implementation for streaming data from purl-fetcher
141
169
  test_files: []
data/.travis.yml DELETED
@@ -1,7 +0,0 @@
1
- ---
2
- sudo: false
3
- language: ruby
4
- cache: bundler
5
- rvm:
6
- - 2.4.1
7
- before_install: gem install bundler -v 1.16.3