purl_fetcher-client 0.2.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6ec6b5027a5b19369098f11455f0d3244ff3758d5469bd91998b63f2f80ccf80
4
- data.tar.gz: fbfa4939cba5c30ea7f31d98923e6a48114dc0db5e4d29494a1062e5d1199e64
3
+ metadata.gz: b80b63b29ffbed55055e58d2d063f4496f84015b1130a6908e82e4d91a18c94d
4
+ data.tar.gz: 3d9691d8e9939f17a8d4f3a66a03bb3644463a3ffcc9604b8558242614f4e0a6
5
5
  SHA512:
6
- metadata.gz: 5d6a1d548fc27b0eb7496f2f3603ef10acb22f7931e32899442b1f027d36a9b4b6d8f18bc517dd0d220cb9dff4cc3a56c79b58d9441d9a540d6bafc6d3f821ba
7
- data.tar.gz: f7aa278c16d5d1da2b38fb9ea9f11adbe4fe3cd4d2fc262b8f00cbfd00ee1a6ebdd933a2493c069904fc82bcd0e63feb0ad7598f76315838bbeaf9dd002f8a95
6
+ metadata.gz: '0484a2254521f201fed8d95cf65ab85a3310111c6be46c6d13ecb4e7c40cec3d807861b42c47dc389f34c242918733dce7c21b66beb872d062112bbfe8674dd0'
7
+ data.tar.gz: ff5cdc4fe8d78eedeba1594e8aa47e87bfb6186337d4f1042f928a56f0a6b854487e7acfba5398e306401a123cb710ae0ac4bc6b9b11b157a81d20550b789570
@@ -0,0 +1,27 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [ master ]
6
+ pull_request:
7
+ branches: [ master ]
8
+
9
+ jobs:
10
+ tests:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v2
14
+ - name: Set up Ruby
15
+ uses: ruby/setup-ruby@v1
16
+ with:
17
+ ruby-version: 3.0
18
+ - uses: actions/cache@v2
19
+ with:
20
+ path: vendor/bundle
21
+ key: ${{ runner.os }}-gems-${{ hashFiles('**/Gemfile.lock') }}
22
+ restore-keys: |
23
+ ${{ runner.os }}-gems-
24
+ - name: Bundle install
25
+ run: bundle install
26
+ - name: Run tests
27
+ run: bundle exec rake
@@ -4,15 +4,18 @@ class PurlFetcher::Client::DeletesReader < PurlFetcher::Client::Reader
4
4
  return to_enum(:each) unless block_given?
5
5
 
6
6
  deletes(first_modified: first_modified).each do |change|
7
- yield PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
7
+
8
+ public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
9
+
10
+ yield public_xml, change, self
8
11
  end
9
12
 
10
13
  changes(first_modified: first_modified, target: target).each do |change|
11
- record = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
14
+ public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
12
15
 
13
16
  next unless target.nil? || (change['false_targets'] && change['false_targets'].include?(target)) || (settings['skip_if_catkey'] && record.catkey)
14
17
 
15
- yield record
18
+ yield public_xml, change, self
16
19
  end
17
20
  end
18
21
  end
@@ -1,15 +1,19 @@
1
1
  require 'nokogiri'
2
2
  require 'stanford-mods'
3
+ require 'mods_display'
4
+ require 'dor/rights_auth'
3
5
 
4
6
  module PurlFetcher::Client
5
7
  class PublicXmlRecord
6
8
  attr_reader :druid, :options
7
9
 
8
10
  def self.fetch(url)
9
- if defined?(Manticore)
10
- Manticore.get(url).body
11
+ if defined?(JRUBY_VERSION)
12
+ response = Manticore.get(url)
13
+ response.body if response.code == 200
11
14
  else
12
- HTTP.get(url).body
15
+ response = HTTP.get(url)
16
+ response.body if response.status.ok?
13
17
  end
14
18
  end
15
19
 
@@ -42,10 +46,18 @@ module PurlFetcher::Client
42
46
  end
43
47
  end
44
48
 
49
+ def mods_display
50
+ @mods_display ||= ModsDisplay::HTML.new(stanford_mods)
51
+ end
52
+
45
53
  def public_xml
46
54
  @public_xml ||= self.class.fetch(purl_base_url + "/#{druid}.xml")
47
55
  end
48
56
 
57
+ def public_xml?
58
+ !!public_xml
59
+ end
60
+
49
61
  def public_xml_doc
50
62
  @public_xml_doc ||= Nokogiri::XML(public_xml)
51
63
  end
@@ -54,7 +66,14 @@ module PurlFetcher::Client
54
66
  @mods ||= if public_xml_doc.xpath('/publicObject/mods:mods', mods: 'http://www.loc.gov/mods/v3').any?
55
67
  public_xml_doc.xpath('/publicObject/mods:mods', mods: 'http://www.loc.gov/mods/v3').first
56
68
  else
57
- self.class.fetch(purl_base_url + "#{druid}.mods")
69
+ if defined?(Honeybadger)
70
+ Honeybadger.notify(
71
+ 'Unable to find MODS in the public xml; falling back to stand-along mods document',
72
+ context: { druid: druid }
73
+ )
74
+ end
75
+
76
+ Nokogiri::XML(self.class.fetch(purl_base_url + "/#{druid}.mods"))
58
77
  end
59
78
  end
60
79
 
@@ -111,16 +130,22 @@ module PurlFetcher::Client
111
130
 
112
131
  def collections
113
132
  @collections ||= predicate_druids('isMemberOfCollection').map do |druid|
114
- PublicXmlRecord.new(druid, settings)
133
+ PublicXmlRecord.new(druid, options)
115
134
  end
116
135
  end
117
136
 
118
137
  def constituents
119
138
  @constituents ||= predicate_druids('isConstituentOf').map do |druid|
120
- PublicXmlRecord.new(druid, settings)
139
+ PublicXmlRecord.new(druid, options)
121
140
  end
122
141
  end
123
142
 
143
+ def items(&block)
144
+ return [] unless is_collection
145
+
146
+ purl_fetcher_client.collection_members(druid, &block)
147
+ end
148
+
124
149
  # the thumbnail in publicXML properly URI encoded, including the slash separator
125
150
  # @return [String] thumb filename with druid prepended, e.g. oo000oo0001%2Ffilename%20withspace.jp2
126
151
  def encoded_thumb
@@ -128,7 +153,7 @@ module PurlFetcher::Client
128
153
  return unless thumb
129
154
  thumb_druid=thumb.split('/').first # the druid (before the first slash)
130
155
  thumb_filename=thumb.split(/[a-zA-Z]{2}[0-9]{3}[a-zA-Z]{2}[0-9]{4}[\/]/).last # everything after the druid
131
- "#{thumb_druid}%2F#{URI.escape(thumb_filename)}"
156
+ "#{thumb_druid}%2F#{ERB::Util.url_encode(thumb_filename)}"
132
157
  end
133
158
 
134
159
  # get the druids from predicate relationships in rels-ext from public_xml
@@ -142,8 +167,39 @@ module PurlFetcher::Client
142
167
  end
143
168
  end
144
169
 
170
+ def druid_tree
171
+ druid.match(/(..)(...)(..)(....)/).captures.join('/')
172
+ end
173
+
174
+ def rights_xml
175
+ @rights_xml ||= public_xml_doc.xpath('//rightsMetadata').to_s
176
+ end
177
+
178
+ def rights
179
+ @rights ||= ::Dor::RightsAuth.parse(rights_xml)
180
+ end
181
+
182
+ def public?
183
+ rights.world_unrestricted?
184
+ end
185
+
186
+ def stanford_only?
187
+ rights.stanford_only_unrestricted?
188
+ end
189
+
145
190
  def purl_base_url
146
- options[:purl_url] || 'https://purl.stanford.edu'
191
+ options[:purl_url]&.sub(%r{/$}, '') || 'https://purl.stanford.edu'
192
+ end
193
+
194
+ def purl_fetcher_api_endpoint
195
+ options[:purl_fetcher_url] || 'https://purl-fetcher.stanford.edu'
196
+ end
197
+
198
+ def purl_fetcher_client
199
+ @purl_fetcher_client ||= PurlFetcher::Client::Reader.new(
200
+ nil,
201
+ 'purl_fetcher.api_endpoint' => purl_fetcher_api_endpoint
202
+ )
147
203
  end
148
204
  end
149
205
  end
@@ -1,10 +1,11 @@
1
1
  class PurlFetcher::Client::Reader
2
2
  include Enumerable
3
- attr_reader :input_stream, :settings
3
+ attr_reader :input_stream, :settings, :range
4
4
 
5
5
  def initialize(input_stream, settings = {})
6
6
  @settings = settings
7
7
  @input_stream = input_stream
8
+ @range = {}
8
9
  end
9
10
 
10
11
  def each
@@ -15,7 +16,15 @@ class PurlFetcher::Client::Reader
15
16
 
16
17
  public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
17
18
 
18
- yield public_xml, change, meta
19
+ yield public_xml, change, self
20
+ end
21
+ end
22
+
23
+ def collection_members(druid)
24
+ return to_enum(:collection_members, druid) unless block_given?
25
+
26
+ paginated_get("/collections/druid:#{druid.sub(/^druid:/, '')}/purls", 'purls').each do |obj, _meta|
27
+ yield PurlFetcher::Client::PublicXmlRecord.new(obj['druid'].sub('druid:', ''), settings), obj, self
19
28
  end
20
29
  end
21
30
 
@@ -68,18 +77,19 @@ class PurlFetcher::Client::Reader
68
77
  def paginated_get(path, accessor, options = {})
69
78
  Enumerator.new do |yielder|
70
79
  params = options.dup
71
- per_page = params.delete(:per_page) { 100 }
80
+ per_page = params.delete(:per_page) { 1000 }
72
81
  page = params.delete(:page) { 1 }
73
82
  max = params.delete(:max) { 1_000_000 }
74
83
  total = 0
75
84
 
76
85
  loop do
77
86
  data = get(path, { per_page: per_page, page: page }.merge(params))
87
+ @range = data['range']
78
88
 
79
89
  total += data[accessor].length
80
90
 
81
91
  data[accessor].each do |element|
82
- yielder.yield element, { 'range' => data['range'] || {} }
92
+ yielder.yield element, self
83
93
  end
84
94
 
85
95
  page = data['pages']['next_page']
@@ -1,5 +1,5 @@
1
1
  module PurlFetcher
2
2
  module Client
3
- VERSION = "0.2.0"
3
+ VERSION = "0.5.0"
4
4
  end
5
5
  end
@@ -23,8 +23,10 @@ Gem::Specification.new do |spec|
23
23
  spec.add_dependency 'http'
24
24
  spec.add_dependency 'nokogiri'
25
25
  spec.add_dependency 'stanford-mods'
26
+ spec.add_dependency 'dor-rights-auth'
27
+ spec.add_dependency 'mods_display', '>= 1.0.0.alpha1'
26
28
 
27
29
  spec.add_development_dependency "bundler"
28
- spec.add_development_dependency "rake", "~> 10.0"
30
+ spec.add_development_dependency "rake"
29
31
  spec.add_development_dependency "rspec", "~> 3.0"
30
32
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: purl_fetcher-client
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Beer
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-05-29 00:00:00.000000000 Z
11
+ date: 2022-01-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: http
@@ -52,6 +52,34 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: dor-rights-auth
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: mods_display
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: 1.0.0.alpha1
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: 1.0.0.alpha1
55
83
  - !ruby/object:Gem::Dependency
56
84
  name: bundler
57
85
  requirement: !ruby/object:Gem::Requirement
@@ -70,16 +98,16 @@ dependencies:
70
98
  name: rake
71
99
  requirement: !ruby/object:Gem::Requirement
72
100
  requirements:
73
- - - "~>"
101
+ - - ">="
74
102
  - !ruby/object:Gem::Version
75
- version: '10.0'
103
+ version: '0'
76
104
  type: :development
77
105
  prerelease: false
78
106
  version_requirements: !ruby/object:Gem::Requirement
79
107
  requirements:
80
- - - "~>"
108
+ - - ">="
81
109
  - !ruby/object:Gem::Version
82
- version: '10.0'
110
+ version: '0'
83
111
  - !ruby/object:Gem::Dependency
84
112
  name: rspec
85
113
  requirement: !ruby/object:Gem::Requirement
@@ -94,16 +122,16 @@ dependencies:
94
122
  - - "~>"
95
123
  - !ruby/object:Gem::Version
96
124
  version: '3.0'
97
- description:
125
+ description:
98
126
  email:
99
127
  - cabeer@stanford.edu
100
128
  executables: []
101
129
  extensions: []
102
130
  extra_rdoc_files: []
103
131
  files:
132
+ - ".github/workflows/ruby.yml"
104
133
  - ".gitignore"
105
134
  - ".rspec"
106
- - ".travis.yml"
107
135
  - CODE_OF_CONDUCT.md
108
136
  - Gemfile
109
137
  - README.md
@@ -116,10 +144,10 @@ files:
116
144
  - lib/purl_fetcher/client/reader.rb
117
145
  - lib/purl_fetcher/client/version.rb
118
146
  - purl_fetcher-client.gemspec
119
- homepage:
147
+ homepage:
120
148
  licenses: []
121
149
  metadata: {}
122
- post_install_message:
150
+ post_install_message:
123
151
  rdoc_options: []
124
152
  require_paths:
125
153
  - lib
@@ -134,8 +162,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
134
162
  - !ruby/object:Gem::Version
135
163
  version: '0'
136
164
  requirements: []
137
- rubygems_version: 3.0.3
138
- signing_key:
165
+ rubygems_version: 3.2.32
166
+ signing_key:
139
167
  specification_version: 4
140
168
  summary: Traject-compatible reader implementation for streaming data from purl-fetcher
141
169
  test_files: []
data/.travis.yml DELETED
@@ -1,7 +0,0 @@
1
- ---
2
- sudo: false
3
- language: ruby
4
- cache: bundler
5
- rvm:
6
- - 2.4.1
7
- before_install: gem install bundler -v 1.16.3