purl_fetcher-client 0.5.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b80b63b29ffbed55055e58d2d063f4496f84015b1130a6908e82e4d91a18c94d
4
- data.tar.gz: 3d9691d8e9939f17a8d4f3a66a03bb3644463a3ffcc9604b8558242614f4e0a6
3
+ metadata.gz: cade5ac0289e6a39d9d9dca571bea47032391fa774d1356e56dbada402e6bb5e
4
+ data.tar.gz: 043a7f61d74b733bf2fd12bf26704ff25af6539f10c5731ae4843f11c2376c1a
5
5
  SHA512:
6
- metadata.gz: '0484a2254521f201fed8d95cf65ab85a3310111c6be46c6d13ecb4e7c40cec3d807861b42c47dc389f34c242918733dce7c21b66beb872d062112bbfe8674dd0'
7
- data.tar.gz: ff5cdc4fe8d78eedeba1594e8aa47e87bfb6186337d4f1042f928a56f0a6b854487e7acfba5398e306401a123cb710ae0ac4bc6b9b11b157a81d20550b789570
6
+ metadata.gz: fa1bce8d7e6ef5090a19d8cf8ecf42eeeb24f2c5ef59345c77ded4b50752f01e1b18307dd65aafcd0aa8c4873210becf6fcdfacb5cdd6179034c023a609d3218
7
+ data.tar.gz: 8f76a7eecfff70435931364f827571733cc6f4d9e7cc90d8f65276580eab0dbcd5cd8eddd5a146b3e7fe016b2f4d69a3dff177e77e941ef7bbe47e49b975d3dc
@@ -2,9 +2,9 @@ name: CI
2
2
 
3
3
  on:
4
4
  push:
5
- branches: [ master ]
5
+ branches: [ main ]
6
6
  pull_request:
7
- branches: [ master ]
7
+ branches: [ main ]
8
8
 
9
9
  jobs:
10
10
  tests:
data/README.md CHANGED
@@ -36,4 +36,4 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERN
36
36
 
37
37
  ## Code of Conduct
38
38
 
39
- Everyone interacting in the PurlFetcher::Client project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/purl_fetcher-client/blob/master/CODE_OF_CONDUCT.md).
39
+ Everyone interacting in the PurlFetcher::Client project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/purl_fetcher-client/blob/main/CODE_OF_CONDUCT.md).
@@ -1,77 +1,48 @@
1
1
  class PurlFetcher::Client::Reader
2
2
  include Enumerable
3
- attr_reader :input_stream, :settings, :range
3
+ attr_reader :host, :conn, :range
4
4
 
5
- def initialize(input_stream, settings = {})
6
- @settings = settings
7
- @input_stream = input_stream
5
+ def initialize(host: 'https://purl-fetcher.stanford.edu', conn: nil)
6
+ @host = host
7
+ @conn = conn || Faraday.new(host) do |f|
8
+ f.response :json
9
+ end
8
10
  @range = {}
9
11
  end
10
12
 
11
- def each
12
- return to_enum(:each) unless block_given?
13
-
14
- changes(first_modified: first_modified, target: target).each do |change, meta|
15
- next unless target.nil? || (change['true_targets'] && change['true_targets'].include?(target))
16
-
17
- public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
18
-
19
- yield public_xml, change, self
20
- end
21
- end
22
-
23
13
  def collection_members(druid)
24
14
  return to_enum(:collection_members, druid) unless block_given?
25
15
 
26
- paginated_get("/collections/druid:#{druid.sub(/^druid:/, '')}/purls", 'purls').each do |obj, _meta|
27
- yield PurlFetcher::Client::PublicXmlRecord.new(obj['druid'].sub('druid:', ''), settings), obj, self
16
+ paginated_get("/collections/druid:#{druid.delete_prefix('druid:')}/purls", 'purls').each do |obj, _meta|
17
+ yield obj['druid'].delete_prefix('druid:')
28
18
  end
29
19
  end
30
20
 
31
21
  private
32
22
 
33
- def first_modified
34
- settings['purl_fetcher.first_modified']
35
- end
36
-
37
- def target
38
- settings['purl_fetcher.target']
39
- end
40
-
41
- ##
42
- # @return [Enumerator]
43
- def changes(params = {})
44
- paginated_get('/docs/changes', 'changes', params)
45
- end
46
-
47
- ##
48
- # @return [Enumerator]
49
- def deletes(params = {})
50
- paginated_get('/docs/deletes', 'deletes', params)
51
- end
52
-
53
23
  ##
54
24
  # @return [Hash] a parsed JSON hash
55
- def get(path, params = {})
56
- JSON.parse(fetch(settings.fetch('purl_fetcher.api_endpoint', 'https://purl-fetcher.stanford.edu') + path, params))
57
- end
25
+ def fetch(path, params)
26
+ response = conn.get(path, params: params)
58
27
 
59
- def fetch(url, params)
60
- if defined?(Manticore)
61
- Manticore.get(url, query: params).body
62
- else
63
- HTTP.get(url, params: params).body
28
+ unless response.success?
29
+ if defined?(Honeybadger)
30
+ Honeybadger.context({ path:, params:, response_code: response.code, body: response.body })
31
+ end
32
+ raise PurlFetcher::Client::ResponseError, "Unsuccessful response from purl-fetcher"
64
33
  end
34
+
35
+ response.body
65
36
  end
66
37
 
67
38
  ##
68
39
  # For performance, and enumberable object is returned.
69
40
  #
70
41
  # @example operating on each of the results as they come in
71
- # paginated_get('/docs/changes', 'changes').map { |v| puts v.inspect }
42
+ # paginated_get('/docs/collections/druid:123', 'purls').map { |v| puts v.inspect }
72
43
  #
73
44
  # @example getting all of the results and converting to an array
74
- # paginated_get('/docs/changes', 'changes').to_a
45
+ # paginated_get('/docs/collections/druid:123', 'purls').to_a
75
46
  #
76
47
  # @return [Enumerator] an enumberable object
77
48
  def paginated_get(path, accessor, options = {})
@@ -83,7 +54,7 @@ class PurlFetcher::Client::Reader
83
54
  total = 0
84
55
 
85
56
  loop do
86
- data = get(path, { per_page: per_page, page: page }.merge(params))
57
+ data = fetch(path, { per_page: per_page, page: page }.merge(params))
87
58
  @range = data['range']
88
59
 
89
60
  total += data[accessor].length
@@ -1,5 +1,5 @@
1
1
  module PurlFetcher
2
2
  module Client
3
- VERSION = "0.5.0"
3
+ VERSION = "1.0.0"
4
4
  end
5
5
  end
@@ -1,15 +1,14 @@
1
1
  require "purl_fetcher/client/version"
2
- require 'http'
3
- begin
4
- require 'manticore' if defined? JRUBY_VERSION
5
- rescue LoadError
6
- end
2
+ require 'faraday'
7
3
 
8
4
  module PurlFetcher
9
5
  module Client
10
- require 'purl_fetcher/client/public_xml_record'
11
6
  require 'purl_fetcher/client/reader'
12
- require 'purl_fetcher/client/deletes_reader'
13
- # Your code goes here...
7
+
8
+ # General error originating in PurlFetcher::Client
9
+ class Error < StandardError; end
10
+
11
+ # Raised when the response from the server is not successful
12
+ class ResponseError < Error; end
14
13
  end
15
14
  end
@@ -20,13 +20,11 @@ Gem::Specification.new do |spec|
20
20
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
21
21
  spec.require_paths = ["lib"]
22
22
 
23
- spec.add_dependency 'http'
24
- spec.add_dependency 'nokogiri'
25
- spec.add_dependency 'stanford-mods'
26
- spec.add_dependency 'dor-rights-auth'
27
- spec.add_dependency 'mods_display', '>= 1.0.0.alpha1'
23
+ spec.add_dependency 'faraday', '~> 2.1'
28
24
 
29
25
  spec.add_development_dependency "bundler"
26
+ spec.add_development_dependency "debug"
30
27
  spec.add_development_dependency "rake"
31
28
  spec.add_development_dependency "rspec", "~> 3.0"
29
+ spec.add_development_dependency "webmock"
32
30
  end
metadata CHANGED
@@ -1,37 +1,37 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: purl_fetcher-client
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Beer
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-01-13 00:00:00.000000000 Z
11
+ date: 2024-04-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: http
14
+ name: faraday
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '2.1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '2.1'
27
27
  - !ruby/object:Gem::Dependency
28
- name: nokogiri
28
+ name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
- type: :runtime
34
+ type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
@@ -39,13 +39,13 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: stanford-mods
42
+ name: debug
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
- type: :runtime
48
+ type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
@@ -53,13 +53,13 @@ dependencies:
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: dor-rights-auth
56
+ name: rake
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
- type: :runtime
62
+ type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
@@ -67,35 +67,21 @@ dependencies:
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
- name: mods_display
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- version: 1.0.0.alpha1
76
- type: :runtime
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - ">="
81
- - !ruby/object:Gem::Version
82
- version: 1.0.0.alpha1
83
- - !ruby/object:Gem::Dependency
84
- name: bundler
70
+ name: rspec
85
71
  requirement: !ruby/object:Gem::Requirement
86
72
  requirements:
87
- - - ">="
73
+ - - "~>"
88
74
  - !ruby/object:Gem::Version
89
- version: '0'
75
+ version: '3.0'
90
76
  type: :development
91
77
  prerelease: false
92
78
  version_requirements: !ruby/object:Gem::Requirement
93
79
  requirements:
94
- - - ">="
80
+ - - "~>"
95
81
  - !ruby/object:Gem::Version
96
- version: '0'
82
+ version: '3.0'
97
83
  - !ruby/object:Gem::Dependency
98
- name: rake
84
+ name: webmock
99
85
  requirement: !ruby/object:Gem::Requirement
100
86
  requirements:
101
87
  - - ">="
@@ -108,20 +94,6 @@ dependencies:
108
94
  - - ">="
109
95
  - !ruby/object:Gem::Version
110
96
  version: '0'
111
- - !ruby/object:Gem::Dependency
112
- name: rspec
113
- requirement: !ruby/object:Gem::Requirement
114
- requirements:
115
- - - "~>"
116
- - !ruby/object:Gem::Version
117
- version: '3.0'
118
- type: :development
119
- prerelease: false
120
- version_requirements: !ruby/object:Gem::Requirement
121
- requirements:
122
- - - "~>"
123
- - !ruby/object:Gem::Version
124
- version: '3.0'
125
97
  description:
126
98
  email:
127
99
  - cabeer@stanford.edu
@@ -139,8 +111,6 @@ files:
139
111
  - bin/console
140
112
  - bin/setup
141
113
  - lib/purl_fetcher/client.rb
142
- - lib/purl_fetcher/client/deletes_reader.rb
143
- - lib/purl_fetcher/client/public_xml_record.rb
144
114
  - lib/purl_fetcher/client/reader.rb
145
115
  - lib/purl_fetcher/client/version.rb
146
116
  - purl_fetcher-client.gemspec
@@ -162,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
162
132
  - !ruby/object:Gem::Version
163
133
  version: '0'
164
134
  requirements: []
165
- rubygems_version: 3.2.32
135
+ rubygems_version: 3.4.19
166
136
  signing_key:
167
137
  specification_version: 4
168
138
  summary: Traject-compatible reader implementation for streaming data from purl-fetcher
@@ -1,21 +0,0 @@
1
- class PurlFetcher::Client::DeletesReader < PurlFetcher::Client::Reader
2
- # Enumerate objects that should be deleted.
3
- def each
4
- return to_enum(:each) unless block_given?
5
-
6
- deletes(first_modified: first_modified).each do |change|
7
-
8
- public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
9
-
10
- yield public_xml, change, self
11
- end
12
-
13
- changes(first_modified: first_modified, target: target).each do |change|
14
- public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
15
-
16
- next unless target.nil? || (change['false_targets'] && change['false_targets'].include?(target)) || (settings['skip_if_catkey'] && record.catkey)
17
-
18
- yield public_xml, change, self
19
- end
20
- end
21
- end
@@ -1,205 +0,0 @@
1
- require 'nokogiri'
2
- require 'stanford-mods'
3
- require 'mods_display'
4
- require 'dor/rights_auth'
5
-
6
- module PurlFetcher::Client
7
- class PublicXmlRecord
8
- attr_reader :druid, :options
9
-
10
- def self.fetch(url)
11
- if defined?(JRUBY_VERSION)
12
- response = Manticore.get(url)
13
- response.body if response.code == 200
14
- else
15
- response = HTTP.get(url)
16
- response.body if response.status.ok?
17
- end
18
- end
19
-
20
- def initialize(druid, options = {})
21
- @druid = druid
22
- @options = options
23
- end
24
-
25
- def searchworks_id
26
- catkey.nil? ? druid : catkey
27
- end
28
-
29
- # @return catkey value from the DOR identity_metadata, or nil if there is no catkey
30
- def catkey
31
- get_value(public_xml_doc.xpath("/publicObject/identityMetadata/otherId[@name='catkey']"))
32
- end
33
-
34
- # @return objectLabel value from the DOR identity_metadata, or nil if there is no barcode
35
- def label
36
- get_value(public_xml_doc.xpath('/publicObject/identityMetadata/objectLabel'))
37
- end
38
-
39
- def get_value(node)
40
- (node && node.first) ? node.first.content : nil
41
- end
42
-
43
- def stanford_mods
44
- @smods_rec ||= Stanford::Mods::Record.new.tap do |smods_rec|
45
- smods_rec.from_str(mods.to_s)
46
- end
47
- end
48
-
49
- def mods_display
50
- @mods_display ||= ModsDisplay::HTML.new(stanford_mods)
51
- end
52
-
53
- def public_xml
54
- @public_xml ||= self.class.fetch(purl_base_url + "/#{druid}.xml")
55
- end
56
-
57
- def public_xml?
58
- !!public_xml
59
- end
60
-
61
- def public_xml_doc
62
- @public_xml_doc ||= Nokogiri::XML(public_xml)
63
- end
64
-
65
- def mods
66
- @mods ||= if public_xml_doc.xpath('/publicObject/mods:mods', mods: 'http://www.loc.gov/mods/v3').any?
67
- public_xml_doc.xpath('/publicObject/mods:mods', mods: 'http://www.loc.gov/mods/v3').first
68
- else
69
- if defined?(Honeybadger)
70
- Honeybadger.notify(
71
- 'Unable to find MODS in the public xml; falling back to stand-along mods document',
72
- context: { druid: druid }
73
- )
74
- end
75
-
76
- Nokogiri::XML(self.class.fetch(purl_base_url + "/#{druid}.mods"))
77
- end
78
- end
79
-
80
- # @return true if the identityMetadata has <objectType>collection</objectType>, false otherwise
81
- def is_collection
82
- object_type_nodes = public_xml_doc.xpath('//objectType')
83
- object_type_nodes.find_index { |n| %w(collection set).include? n.text.downcase }
84
- end
85
-
86
- # value is used to tell SearchWorks UI app of specific display needs for objects
87
- # this comes from the <thumb> element in publicXML or the first image found (as parsed by discovery-indexer)
88
- # @return [String] filename or nil if none found
89
- def thumb
90
- return if is_collection
91
- encoded_thumb if %w(book image manuscript map webarchive-seed).include?(dor_content_type)
92
- end
93
-
94
- # the value of the type attribute for a DOR object's contentMetadata
95
- # more info about these values is here:
96
- # https://consul.stanford.edu/display/chimera/DOR+content+types%2C+resource+types+and+interpretive+metadata
97
- # https://consul.stanford.edu/display/chimera/Summary+of+Content+Types%2C+Resource+Types+and+their+behaviors
98
- # @return [String]
99
- def dor_content_type
100
- public_xml_doc.xpath('//contentMetadata/@type').text
101
- end
102
-
103
- # the thumbnail in publicXML, falling back to the first image if no thumb node is found
104
- # @return [String] thumb filename with druid prepended, e.g. oo000oo0001/filename withspace.jp2
105
- def parse_thumb
106
- unless public_xml_doc.nil?
107
- thumb = public_xml_doc.xpath('//thumb')
108
- # first try and parse what is in the thumb node of publicXML, but fallback to the first image if needed
109
- if thumb.size == 1
110
- thumb.first.content
111
- elsif thumb.size == 0 && parse_sw_image_ids.size > 0
112
- parse_sw_image_ids.first
113
- else
114
- nil
115
- end
116
- end
117
- end
118
-
119
- # the druid and id attribute of resource/file and objectId and fileId of the
120
- # resource/externalFile elements that match the image, page, or thumb resource type, including extension
121
- # Also, prepends the corresponding druid and / specifically for Searchworks use
122
- # @return [Array<String>] filenames
123
- def parse_sw_image_ids
124
- public_xml_doc.xpath('//resource[@type="page" or @type="image" or @type="thumb"]').map do |node|
125
- node.xpath('./file[@mimetype="image/jp2"]/@id').map{ |x| "#{@druid.gsub('druid:','')}/" + x } << node.xpath('./externalFile[@mimetype="image/jp2"]').map do |y|
126
- "#{y.attributes['objectId'].text.split(':').last}" + "/" + "#{y.attributes['fileId']}"
127
- end
128
- end.flatten
129
- end
130
-
131
- def collections
132
- @collections ||= predicate_druids('isMemberOfCollection').map do |druid|
133
- PublicXmlRecord.new(druid, options)
134
- end
135
- end
136
-
137
- def constituents
138
- @constituents ||= predicate_druids('isConstituentOf').map do |druid|
139
- PublicXmlRecord.new(druid, options)
140
- end
141
- end
142
-
143
- def items(&block)
144
- return [] unless is_collection
145
-
146
- purl_fetcher_client.collection_members(druid, &block)
147
- end
148
-
149
- # the thumbnail in publicXML properly URI encoded, including the slash separator
150
- # @return [String] thumb filename with druid prepended, e.g. oo000oo0001%2Ffilename%20withspace.jp2
151
- def encoded_thumb
152
- thumb=parse_thumb
153
- return unless thumb
154
- thumb_druid=thumb.split('/').first # the druid (before the first slash)
155
- thumb_filename=thumb.split(/[a-zA-Z]{2}[0-9]{3}[a-zA-Z]{2}[0-9]{4}[\/]/).last # everything after the druid
156
- "#{thumb_druid}%2F#{ERB::Util.url_encode(thumb_filename)}"
157
- end
158
-
159
- # get the druids from predicate relationships in rels-ext from public_xml
160
- # @return [Array<String>, nil] the druids (e.g. ww123yy1234) from the rdf:resource of the predicate relationships, or nil if none
161
- def predicate_druids(predicate, predicate_ns = 'info:fedora/fedora-system:def/relations-external#')
162
- ns_hash = { 'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'pred_ns' => predicate_ns }
163
- xpth = "/publicObject/rdf:RDF/rdf:Description/pred_ns:#{predicate}/@rdf:resource"
164
- pred_nodes = public_xml_doc.xpath(xpth, ns_hash)
165
- pred_nodes.reject { |n| n.value.empty? }.map do |n|
166
- n.value.split('druid:').last
167
- end
168
- end
169
-
170
- def druid_tree
171
- druid.match(/(..)(...)(..)(....)/).captures.join('/')
172
- end
173
-
174
- def rights_xml
175
- @rights_xml ||= public_xml_doc.xpath('//rightsMetadata').to_s
176
- end
177
-
178
- def rights
179
- @rights ||= ::Dor::RightsAuth.parse(rights_xml)
180
- end
181
-
182
- def public?
183
- rights.world_unrestricted?
184
- end
185
-
186
- def stanford_only?
187
- rights.stanford_only_unrestricted?
188
- end
189
-
190
- def purl_base_url
191
- options[:purl_url]&.sub(%r{/$}, '') || 'https://purl.stanford.edu'
192
- end
193
-
194
- def purl_fetcher_api_endpoint
195
- options[:purl_fetcher_url] || 'https://purl-fetcher.stanford.edu'
196
- end
197
-
198
- def purl_fetcher_client
199
- @purl_fetcher_client ||= PurlFetcher::Client::Reader.new(
200
- nil,
201
- 'purl_fetcher.api_endpoint' => purl_fetcher_api_endpoint
202
- )
203
- end
204
- end
205
- end