purl_fetcher-client 0.4.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3b158de0a0c55f35eda9d832c34969edda80ce26bc5802e56217beae5309c9a4
4
- data.tar.gz: c26835ca9f955e74e5dd07d6807b66754e3ce69d7f1f4174e483955fedbb9233
3
+ metadata.gz: cade5ac0289e6a39d9d9dca571bea47032391fa774d1356e56dbada402e6bb5e
4
+ data.tar.gz: 043a7f61d74b733bf2fd12bf26704ff25af6539f10c5731ae4843f11c2376c1a
5
5
  SHA512:
6
- metadata.gz: 86b9b87edbcf9505316a6d3674c04710376ef974c4d6a09fb869befb8147935ff4849cd71a09d8e7f15d0619f5338f0393cb587316948777a2c942e7fe170467
7
- data.tar.gz: 65d148438df919cc085ee88e966182ec5d42581266dda51e415c1e47737d4dac8f23541bf4f7984eea464f0f18c2f1f3565578187dc6370900ac41156f04a6ac
6
+ metadata.gz: fa1bce8d7e6ef5090a19d8cf8ecf42eeeb24f2c5ef59345c77ded4b50752f01e1b18307dd65aafcd0aa8c4873210becf6fcdfacb5cdd6179034c023a609d3218
7
+ data.tar.gz: 8f76a7eecfff70435931364f827571733cc6f4d9e7cc90d8f65276580eab0dbcd5cd8eddd5a146b3e7fe016b2f4d69a3dff177e77e941ef7bbe47e49b975d3dc
@@ -2,9 +2,9 @@ name: CI
2
2
 
3
3
  on:
4
4
  push:
5
- branches: [ master ]
5
+ branches: [ main ]
6
6
  pull_request:
7
- branches: [ master ]
7
+ branches: [ main ]
8
8
 
9
9
  jobs:
10
10
  tests:
data/README.md CHANGED
@@ -36,4 +36,4 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERN
36
36
 
37
37
  ## Code of Conduct
38
38
 
39
- Everyone interacting in the PurlFetcher::Client project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/purl_fetcher-client/blob/master/CODE_OF_CONDUCT.md).
39
+ Everyone interacting in the PurlFetcher::Client project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/purl_fetcher-client/blob/main/CODE_OF_CONDUCT.md).
@@ -1,77 +1,48 @@
1
1
  class PurlFetcher::Client::Reader
2
2
  include Enumerable
3
- attr_reader :input_stream, :settings, :range
3
+ attr_reader :host, :conn, :range
4
4
 
5
- def initialize(input_stream, settings = {})
6
- @settings = settings
7
- @input_stream = input_stream
5
+ def initialize(host: 'https://purl-fetcher.stanford.edu', conn: nil)
6
+ @host = host
7
+ @conn = conn || Faraday.new(host) do |f|
8
+ f.response :json
9
+ end
8
10
  @range = {}
9
11
  end
10
12
 
11
- def each
12
- return to_enum(:each) unless block_given?
13
-
14
- changes(first_modified: first_modified, target: target).each do |change, meta|
15
- next unless target.nil? || (change['true_targets'] && change['true_targets'].include?(target))
16
-
17
- public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
18
-
19
- yield public_xml, change, self
20
- end
21
- end
22
-
23
13
  def collection_members(druid)
24
14
  return to_enum(:collection_members, druid) unless block_given?
25
15
 
26
- paginated_get("/collections/druid:#{druid.sub(/^druid:/, '')}/purls", 'purls').each do |obj, _meta|
27
- yield PurlFetcher::Client::PublicXmlRecord.new(obj['druid'].sub('druid:', ''), settings), obj, self
16
+ paginated_get("/collections/druid:#{druid.delete_prefix('druid:')}/purls", 'purls').each do |obj, _meta|
17
+ yield obj['druid'].delete_prefix('druid:')
28
18
  end
29
19
  end
30
20
 
31
21
  private
32
22
 
33
- def first_modified
34
- settings['purl_fetcher.first_modified']
35
- end
36
-
37
- def target
38
- settings['purl_fetcher.target']
39
- end
40
-
41
- ##
42
- # @return [Enumerator]
43
- def changes(params = {})
44
- paginated_get('/docs/changes', 'changes', params)
45
- end
46
-
47
- ##
48
- # @return [Enumerator]
49
- def deletes(params = {})
50
- paginated_get('/docs/deletes', 'deletes', params)
51
- end
52
-
53
23
  ##
54
24
  # @return [Hash] a parsed JSON hash
55
- def get(path, params = {})
56
- JSON.parse(fetch(settings.fetch('purl_fetcher.api_endpoint', 'https://purl-fetcher.stanford.edu') + path, params))
57
- end
25
+ def fetch(path, params)
26
+ response = conn.get(path, params: params)
58
27
 
59
- def fetch(url, params)
60
- if defined?(Manticore)
61
- Manticore.get(url, query: params).body
62
- else
63
- HTTP.get(url, params: params).body
28
+ unless response.success?
29
+ if defined?(Honeybadger)
30
+ Honeybadger.context({ path:, params:, response_code: response.code, body: response.body })
31
+ end
32
+ raise PurlFetcher::Client::ResponseError, "Unsuccessful response from purl-fetcher"
64
33
  end
34
+
35
+ response.body
65
36
  end
66
37
 
67
38
  ##
68
39
  # For performance, and enumberable object is returned.
69
40
  #
70
41
  # @example operating on each of the results as they come in
71
- # paginated_get('/docs/changes', 'changes').map { |v| puts v.inspect }
42
+ # paginated_get('/docs/collections/druid:123', 'purls').map { |v| puts v.inspect }
72
43
  #
73
44
  # @example getting all of the results and converting to an array
74
- # paginated_get('/docs/changes', 'changes').to_a
45
+ # paginated_get('/docs/collections/druid:123', 'purls').to_a
75
46
  #
76
47
  # @return [Enumerator] an enumberable object
77
48
  def paginated_get(path, accessor, options = {})
@@ -83,7 +54,7 @@ class PurlFetcher::Client::Reader
83
54
  total = 0
84
55
 
85
56
  loop do
86
- data = get(path, { per_page: per_page, page: page }.merge(params))
57
+ data = fetch(path, { per_page: per_page, page: page }.merge(params))
87
58
  @range = data['range']
88
59
 
89
60
  total += data[accessor].length
@@ -1,5 +1,5 @@
1
1
  module PurlFetcher
2
2
  module Client
3
- VERSION = "0.4.1"
3
+ VERSION = "1.0.0"
4
4
  end
5
5
  end
@@ -1,15 +1,14 @@
1
1
  require "purl_fetcher/client/version"
2
- require 'http'
3
- begin
4
- require 'manticore' if defined? JRUBY_VERSION
5
- rescue LoadError
6
- end
2
+ require 'faraday'
7
3
 
8
4
  module PurlFetcher
9
5
  module Client
10
- require 'purl_fetcher/client/public_xml_record'
11
6
  require 'purl_fetcher/client/reader'
12
- require 'purl_fetcher/client/deletes_reader'
13
- # Your code goes here...
7
+
8
+ # General error originating in PurlFetcher::Client
9
+ class Error < StandardError; end
10
+
11
+ # Raised when the response from the server is not successful
12
+ class ResponseError < Error; end
14
13
  end
15
14
  end
@@ -20,13 +20,11 @@ Gem::Specification.new do |spec|
20
20
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
21
21
  spec.require_paths = ["lib"]
22
22
 
23
- spec.add_dependency 'http'
24
- spec.add_dependency 'nokogiri'
25
- spec.add_dependency 'stanford-mods'
26
- spec.add_dependency 'dor-rights-auth'
27
- spec.add_dependency 'mods_display'
23
+ spec.add_dependency 'faraday', '~> 2.1'
28
24
 
29
25
  spec.add_development_dependency "bundler"
26
+ spec.add_development_dependency "debug"
30
27
  spec.add_development_dependency "rake"
31
28
  spec.add_development_dependency "rspec", "~> 3.0"
29
+ spec.add_development_dependency "webmock"
32
30
  end
metadata CHANGED
@@ -1,51 +1,37 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: purl_fetcher-client
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Beer
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-03-05 00:00:00.000000000 Z
11
+ date: 2024-04-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: http
14
+ name: faraday
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- version: '0'
27
- - !ruby/object:Gem::Dependency
28
- name: nokogiri
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
17
+ - - "~>"
32
18
  - !ruby/object:Gem::Version
33
- version: '0'
19
+ version: '2.1'
34
20
  type: :runtime
35
21
  prerelease: false
36
22
  version_requirements: !ruby/object:Gem::Requirement
37
23
  requirements:
38
- - - ">="
24
+ - - "~>"
39
25
  - !ruby/object:Gem::Version
40
- version: '0'
26
+ version: '2.1'
41
27
  - !ruby/object:Gem::Dependency
42
- name: stanford-mods
28
+ name: bundler
43
29
  requirement: !ruby/object:Gem::Requirement
44
30
  requirements:
45
31
  - - ">="
46
32
  - !ruby/object:Gem::Version
47
33
  version: '0'
48
- type: :runtime
34
+ type: :development
49
35
  prerelease: false
50
36
  version_requirements: !ruby/object:Gem::Requirement
51
37
  requirements:
@@ -53,13 +39,13 @@ dependencies:
53
39
  - !ruby/object:Gem::Version
54
40
  version: '0'
55
41
  - !ruby/object:Gem::Dependency
56
- name: dor-rights-auth
42
+ name: debug
57
43
  requirement: !ruby/object:Gem::Requirement
58
44
  requirements:
59
45
  - - ">="
60
46
  - !ruby/object:Gem::Version
61
47
  version: '0'
62
- type: :runtime
48
+ type: :development
63
49
  prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
@@ -67,13 +53,13 @@ dependencies:
67
53
  - !ruby/object:Gem::Version
68
54
  version: '0'
69
55
  - !ruby/object:Gem::Dependency
70
- name: mods_display
56
+ name: rake
71
57
  requirement: !ruby/object:Gem::Requirement
72
58
  requirements:
73
59
  - - ">="
74
60
  - !ruby/object:Gem::Version
75
61
  version: '0'
76
- type: :runtime
62
+ type: :development
77
63
  prerelease: false
78
64
  version_requirements: !ruby/object:Gem::Requirement
79
65
  requirements:
@@ -81,21 +67,21 @@ dependencies:
81
67
  - !ruby/object:Gem::Version
82
68
  version: '0'
83
69
  - !ruby/object:Gem::Dependency
84
- name: bundler
70
+ name: rspec
85
71
  requirement: !ruby/object:Gem::Requirement
86
72
  requirements:
87
- - - ">="
73
+ - - "~>"
88
74
  - !ruby/object:Gem::Version
89
- version: '0'
75
+ version: '3.0'
90
76
  type: :development
91
77
  prerelease: false
92
78
  version_requirements: !ruby/object:Gem::Requirement
93
79
  requirements:
94
- - - ">="
80
+ - - "~>"
95
81
  - !ruby/object:Gem::Version
96
- version: '0'
82
+ version: '3.0'
97
83
  - !ruby/object:Gem::Dependency
98
- name: rake
84
+ name: webmock
99
85
  requirement: !ruby/object:Gem::Requirement
100
86
  requirements:
101
87
  - - ">="
@@ -108,20 +94,6 @@ dependencies:
108
94
  - - ">="
109
95
  - !ruby/object:Gem::Version
110
96
  version: '0'
111
- - !ruby/object:Gem::Dependency
112
- name: rspec
113
- requirement: !ruby/object:Gem::Requirement
114
- requirements:
115
- - - "~>"
116
- - !ruby/object:Gem::Version
117
- version: '3.0'
118
- type: :development
119
- prerelease: false
120
- version_requirements: !ruby/object:Gem::Requirement
121
- requirements:
122
- - - "~>"
123
- - !ruby/object:Gem::Version
124
- version: '3.0'
125
97
  description:
126
98
  email:
127
99
  - cabeer@stanford.edu
@@ -139,8 +111,6 @@ files:
139
111
  - bin/console
140
112
  - bin/setup
141
113
  - lib/purl_fetcher/client.rb
142
- - lib/purl_fetcher/client/deletes_reader.rb
143
- - lib/purl_fetcher/client/public_xml_record.rb
144
114
  - lib/purl_fetcher/client/reader.rb
145
115
  - lib/purl_fetcher/client/version.rb
146
116
  - purl_fetcher-client.gemspec
@@ -162,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
162
132
  - !ruby/object:Gem::Version
163
133
  version: '0'
164
134
  requirements: []
165
- rubygems_version: 3.2.3
135
+ rubygems_version: 3.4.19
166
136
  signing_key:
167
137
  specification_version: 4
168
138
  summary: Traject-compatible reader implementation for streaming data from purl-fetcher
@@ -1,21 +0,0 @@
1
- class PurlFetcher::Client::DeletesReader < PurlFetcher::Client::Reader
2
- # Enumerate objects that should be deleted.
3
- def each
4
- return to_enum(:each) unless block_given?
5
-
6
- deletes(first_modified: first_modified).each do |change|
7
-
8
- public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
9
-
10
- yield public_xml, change, self
11
- end
12
-
13
- changes(first_modified: first_modified, target: target).each do |change|
14
- public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
15
-
16
- next unless target.nil? || (change['false_targets'] && change['false_targets'].include?(target)) || (settings['skip_if_catkey'] && record.catkey)
17
-
18
- yield public_xml, change, self
19
- end
20
- end
21
- end
@@ -1,214 +0,0 @@
1
- require 'nokogiri'
2
- require 'stanford-mods'
3
- require 'mods_display'
4
- require 'dor/rights_auth'
5
-
6
- module PurlFetcher::Client
7
- class PublicXmlRecord
8
- include ModsDisplay::ModelExtension
9
- include ModsDisplay::ControllerExtension
10
-
11
- mods_xml_source do |model|
12
- model.mods.to_s
13
- end
14
- configure_mods_display do
15
- end
16
-
17
- attr_reader :druid, :options
18
-
19
- def self.fetch(url)
20
- if defined?(JRUBY_VERSION)
21
- response = Manticore.get(url)
22
- response.body if response.code == 200
23
- else
24
- response = HTTP.get(url)
25
- response.body if response.status.ok?
26
- end
27
- end
28
-
29
- def initialize(druid, options = {})
30
- @druid = druid
31
- @options = options
32
- end
33
-
34
- def searchworks_id
35
- catkey.nil? ? druid : catkey
36
- end
37
-
38
- # @return catkey value from the DOR identity_metadata, or nil if there is no catkey
39
- def catkey
40
- get_value(public_xml_doc.xpath("/publicObject/identityMetadata/otherId[@name='catkey']"))
41
- end
42
-
43
- # @return objectLabel value from the DOR identity_metadata, or nil if there is no barcode
44
- def label
45
- get_value(public_xml_doc.xpath('/publicObject/identityMetadata/objectLabel'))
46
- end
47
-
48
- def get_value(node)
49
- (node && node.first) ? node.first.content : nil
50
- end
51
-
52
- def stanford_mods
53
- @smods_rec ||= Stanford::Mods::Record.new.tap do |smods_rec|
54
- smods_rec.from_str(mods.to_s)
55
- end
56
- end
57
-
58
- def mods_display
59
- @mods_display ||= render_mods_display(self)
60
- end
61
-
62
- def public_xml
63
- @public_xml ||= self.class.fetch(purl_base_url + "/#{druid}.xml")
64
- end
65
-
66
- def public_xml?
67
- !!public_xml
68
- end
69
-
70
- def public_xml_doc
71
- @public_xml_doc ||= Nokogiri::XML(public_xml)
72
- end
73
-
74
- def mods
75
- @mods ||= if public_xml_doc.xpath('/publicObject/mods:mods', mods: 'http://www.loc.gov/mods/v3').any?
76
- public_xml_doc.xpath('/publicObject/mods:mods', mods: 'http://www.loc.gov/mods/v3').first
77
- else
78
- if defined?(Honeybadger)
79
- Honeybadger.notify(
80
- 'Unable to find MODS in the public xml; falling back to stand-along mods document',
81
- context: { druid: druid }
82
- )
83
- end
84
-
85
- Nokogiri::XML(self.class.fetch(purl_base_url + "/#{druid}.mods"))
86
- end
87
- end
88
-
89
- # @return true if the identityMetadata has <objectType>collection</objectType>, false otherwise
90
- def is_collection
91
- object_type_nodes = public_xml_doc.xpath('//objectType')
92
- object_type_nodes.find_index { |n| %w(collection set).include? n.text.downcase }
93
- end
94
-
95
- # value is used to tell SearchWorks UI app of specific display needs for objects
96
- # this comes from the <thumb> element in publicXML or the first image found (as parsed by discovery-indexer)
97
- # @return [String] filename or nil if none found
98
- def thumb
99
- return if is_collection
100
- encoded_thumb if %w(book image manuscript map webarchive-seed).include?(dor_content_type)
101
- end
102
-
103
- # the value of the type attribute for a DOR object's contentMetadata
104
- # more info about these values is here:
105
- # https://consul.stanford.edu/display/chimera/DOR+content+types%2C+resource+types+and+interpretive+metadata
106
- # https://consul.stanford.edu/display/chimera/Summary+of+Content+Types%2C+Resource+Types+and+their+behaviors
107
- # @return [String]
108
- def dor_content_type
109
- public_xml_doc.xpath('//contentMetadata/@type').text
110
- end
111
-
112
- # the thumbnail in publicXML, falling back to the first image if no thumb node is found
113
- # @return [String] thumb filename with druid prepended, e.g. oo000oo0001/filename withspace.jp2
114
- def parse_thumb
115
- unless public_xml_doc.nil?
116
- thumb = public_xml_doc.xpath('//thumb')
117
- # first try and parse what is in the thumb node of publicXML, but fallback to the first image if needed
118
- if thumb.size == 1
119
- thumb.first.content
120
- elsif thumb.size == 0 && parse_sw_image_ids.size > 0
121
- parse_sw_image_ids.first
122
- else
123
- nil
124
- end
125
- end
126
- end
127
-
128
- # the druid and id attribute of resource/file and objectId and fileId of the
129
- # resource/externalFile elements that match the image, page, or thumb resource type, including extension
130
- # Also, prepends the corresponding druid and / specifically for Searchworks use
131
- # @return [Array<String>] filenames
132
- def parse_sw_image_ids
133
- public_xml_doc.xpath('//resource[@type="page" or @type="image" or @type="thumb"]').map do |node|
134
- node.xpath('./file[@mimetype="image/jp2"]/@id').map{ |x| "#{@druid.gsub('druid:','')}/" + x } << node.xpath('./externalFile[@mimetype="image/jp2"]').map do |y|
135
- "#{y.attributes['objectId'].text.split(':').last}" + "/" + "#{y.attributes['fileId']}"
136
- end
137
- end.flatten
138
- end
139
-
140
- def collections
141
- @collections ||= predicate_druids('isMemberOfCollection').map do |druid|
142
- PublicXmlRecord.new(druid, options)
143
- end
144
- end
145
-
146
- def constituents
147
- @constituents ||= predicate_druids('isConstituentOf').map do |druid|
148
- PublicXmlRecord.new(druid, options)
149
- end
150
- end
151
-
152
- def items(&block)
153
- return [] unless is_collection
154
-
155
- purl_fetcher_client.collection_members(druid, &block)
156
- end
157
-
158
- # the thumbnail in publicXML properly URI encoded, including the slash separator
159
- # @return [String] thumb filename with druid prepended, e.g. oo000oo0001%2Ffilename%20withspace.jp2
160
- def encoded_thumb
161
- thumb=parse_thumb
162
- return unless thumb
163
- thumb_druid=thumb.split('/').first # the druid (before the first slash)
164
- thumb_filename=thumb.split(/[a-zA-Z]{2}[0-9]{3}[a-zA-Z]{2}[0-9]{4}[\/]/).last # everything after the druid
165
- "#{thumb_druid}%2F#{ERB::Util.url_encode(thumb_filename)}"
166
- end
167
-
168
- # get the druids from predicate relationships in rels-ext from public_xml
169
- # @return [Array<String>, nil] the druids (e.g. ww123yy1234) from the rdf:resource of the predicate relationships, or nil if none
170
- def predicate_druids(predicate, predicate_ns = 'info:fedora/fedora-system:def/relations-external#')
171
- ns_hash = { 'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'pred_ns' => predicate_ns }
172
- xpth = "/publicObject/rdf:RDF/rdf:Description/pred_ns:#{predicate}/@rdf:resource"
173
- pred_nodes = public_xml_doc.xpath(xpth, ns_hash)
174
- pred_nodes.reject { |n| n.value.empty? }.map do |n|
175
- n.value.split('druid:').last
176
- end
177
- end
178
-
179
- def druid_tree
180
- druid.match(/(..)(...)(..)(....)/).captures.join('/')
181
- end
182
-
183
- def rights_xml
184
- @rights_xml ||= public_xml_doc.xpath('//rightsMetadata').to_s
185
- end
186
-
187
- def rights
188
- @rights ||= ::Dor::RightsAuth.parse(rights_xml)
189
- end
190
-
191
- def public?
192
- rights.world_unrestricted?
193
- end
194
-
195
- def stanford_only?
196
- rights.stanford_only_unrestricted?
197
- end
198
-
199
- def purl_base_url
200
- options[:purl_url]&.sub(%r{/$}, '') || 'https://purl.stanford.edu'
201
- end
202
-
203
- def purl_fetcher_api_endpoint
204
- options[:purl_fetcher_url] || 'https://purl-fetcher.stanford.edu'
205
- end
206
-
207
- def purl_fetcher_client
208
- @purl_fetcher_client ||= PurlFetcher::Client::Reader.new(
209
- nil,
210
- 'purl_fetcher.api_endpoint' => purl_fetcher_api_endpoint
211
- )
212
- end
213
- end
214
- end