purl_fetcher-client 0.5.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b80b63b29ffbed55055e58d2d063f4496f84015b1130a6908e82e4d91a18c94d
4
- data.tar.gz: 3d9691d8e9939f17a8d4f3a66a03bb3644463a3ffcc9604b8558242614f4e0a6
3
+ metadata.gz: cade5ac0289e6a39d9d9dca571bea47032391fa774d1356e56dbada402e6bb5e
4
+ data.tar.gz: 043a7f61d74b733bf2fd12bf26704ff25af6539f10c5731ae4843f11c2376c1a
5
5
  SHA512:
6
- metadata.gz: '0484a2254521f201fed8d95cf65ab85a3310111c6be46c6d13ecb4e7c40cec3d807861b42c47dc389f34c242918733dce7c21b66beb872d062112bbfe8674dd0'
7
- data.tar.gz: ff5cdc4fe8d78eedeba1594e8aa47e87bfb6186337d4f1042f928a56f0a6b854487e7acfba5398e306401a123cb710ae0ac4bc6b9b11b157a81d20550b789570
6
+ metadata.gz: fa1bce8d7e6ef5090a19d8cf8ecf42eeeb24f2c5ef59345c77ded4b50752f01e1b18307dd65aafcd0aa8c4873210becf6fcdfacb5cdd6179034c023a609d3218
7
+ data.tar.gz: 8f76a7eecfff70435931364f827571733cc6f4d9e7cc90d8f65276580eab0dbcd5cd8eddd5a146b3e7fe016b2f4d69a3dff177e77e941ef7bbe47e49b975d3dc
@@ -2,9 +2,9 @@ name: CI
2
2
 
3
3
  on:
4
4
  push:
5
- branches: [ master ]
5
+ branches: [ main ]
6
6
  pull_request:
7
- branches: [ master ]
7
+ branches: [ main ]
8
8
 
9
9
  jobs:
10
10
  tests:
data/README.md CHANGED
@@ -36,4 +36,4 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERN
36
36
 
37
37
  ## Code of Conduct
38
38
 
39
- Everyone interacting in the PurlFetcher::Client project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/purl_fetcher-client/blob/master/CODE_OF_CONDUCT.md).
39
+ Everyone interacting in the PurlFetcher::Client project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/purl_fetcher-client/blob/main/CODE_OF_CONDUCT.md).
@@ -1,77 +1,48 @@
1
1
  class PurlFetcher::Client::Reader
2
2
  include Enumerable
3
- attr_reader :input_stream, :settings, :range
3
+ attr_reader :host, :conn, :range
4
4
 
5
- def initialize(input_stream, settings = {})
6
- @settings = settings
7
- @input_stream = input_stream
5
+ def initialize(host: 'https://purl-fetcher.stanford.edu', conn: nil)
6
+ @host = host
7
+ @conn = conn || Faraday.new(host) do |f|
8
+ f.response :json
9
+ end
8
10
  @range = {}
9
11
  end
10
12
 
11
- def each
12
- return to_enum(:each) unless block_given?
13
-
14
- changes(first_modified: first_modified, target: target).each do |change, meta|
15
- next unless target.nil? || (change['true_targets'] && change['true_targets'].include?(target))
16
-
17
- public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
18
-
19
- yield public_xml, change, self
20
- end
21
- end
22
-
23
13
  def collection_members(druid)
24
14
  return to_enum(:collection_members, druid) unless block_given?
25
15
 
26
- paginated_get("/collections/druid:#{druid.sub(/^druid:/, '')}/purls", 'purls').each do |obj, _meta|
27
- yield PurlFetcher::Client::PublicXmlRecord.new(obj['druid'].sub('druid:', ''), settings), obj, self
16
+ paginated_get("/collections/druid:#{druid.delete_prefix('druid:')}/purls", 'purls').each do |obj, _meta|
17
+ yield obj['druid'].delete_prefix('druid:')
28
18
  end
29
19
  end
30
20
 
31
21
  private
32
22
 
33
- def first_modified
34
- settings['purl_fetcher.first_modified']
35
- end
36
-
37
- def target
38
- settings['purl_fetcher.target']
39
- end
40
-
41
- ##
42
- # @return [Enumerator]
43
- def changes(params = {})
44
- paginated_get('/docs/changes', 'changes', params)
45
- end
46
-
47
- ##
48
- # @return [Enumerator]
49
- def deletes(params = {})
50
- paginated_get('/docs/deletes', 'deletes', params)
51
- end
52
-
53
23
  ##
54
24
  # @return [Hash] a parsed JSON hash
55
- def get(path, params = {})
56
- JSON.parse(fetch(settings.fetch('purl_fetcher.api_endpoint', 'https://purl-fetcher.stanford.edu') + path, params))
57
- end
25
+ def fetch(path, params)
26
+ response = conn.get(path, params: params)
58
27
 
59
- def fetch(url, params)
60
- if defined?(Manticore)
61
- Manticore.get(url, query: params).body
62
- else
63
- HTTP.get(url, params: params).body
28
+ unless response.success?
29
+ if defined?(Honeybadger)
30
+ Honeybadger.context({ path:, params:, response_code: response.code, body: response.body })
31
+ end
32
+ raise PurlFetcher::Client::ResponseError, "Unsuccessful response from purl-fetcher"
64
33
  end
34
+
35
+ response.body
65
36
  end
66
37
 
67
38
  ##
68
39
  # For performance, and enumberable object is returned.
69
40
  #
70
41
  # @example operating on each of the results as they come in
71
- # paginated_get('/docs/changes', 'changes').map { |v| puts v.inspect }
42
+ # paginated_get('/docs/collections/druid:123', 'purls').map { |v| puts v.inspect }
72
43
  #
73
44
  # @example getting all of the results and converting to an array
74
- # paginated_get('/docs/changes', 'changes').to_a
45
+ # paginated_get('/docs/collections/druid:123', 'purls').to_a
75
46
  #
76
47
  # @return [Enumerator] an enumberable object
77
48
  def paginated_get(path, accessor, options = {})
@@ -83,7 +54,7 @@ class PurlFetcher::Client::Reader
83
54
  total = 0
84
55
 
85
56
  loop do
86
- data = get(path, { per_page: per_page, page: page }.merge(params))
57
+ data = fetch(path, { per_page: per_page, page: page }.merge(params))
87
58
  @range = data['range']
88
59
 
89
60
  total += data[accessor].length
@@ -1,5 +1,5 @@
1
1
  module PurlFetcher
2
2
  module Client
3
- VERSION = "0.5.0"
3
+ VERSION = "1.0.0"
4
4
  end
5
5
  end
@@ -1,15 +1,14 @@
1
1
  require "purl_fetcher/client/version"
2
- require 'http'
3
- begin
4
- require 'manticore' if defined? JRUBY_VERSION
5
- rescue LoadError
6
- end
2
+ require 'faraday'
7
3
 
8
4
  module PurlFetcher
9
5
  module Client
10
- require 'purl_fetcher/client/public_xml_record'
11
6
  require 'purl_fetcher/client/reader'
12
- require 'purl_fetcher/client/deletes_reader'
13
- # Your code goes here...
7
+
8
+ # General error originating in PurlFetcher::Client
9
+ class Error < StandardError; end
10
+
11
+ # Raised when the response from the server is not successful
12
+ class ResponseError < Error; end
14
13
  end
15
14
  end
@@ -20,13 +20,11 @@ Gem::Specification.new do |spec|
20
20
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
21
21
  spec.require_paths = ["lib"]
22
22
 
23
- spec.add_dependency 'http'
24
- spec.add_dependency 'nokogiri'
25
- spec.add_dependency 'stanford-mods'
26
- spec.add_dependency 'dor-rights-auth'
27
- spec.add_dependency 'mods_display', '>= 1.0.0.alpha1'
23
+ spec.add_dependency 'faraday', '~> 2.1'
28
24
 
29
25
  spec.add_development_dependency "bundler"
26
+ spec.add_development_dependency "debug"
30
27
  spec.add_development_dependency "rake"
31
28
  spec.add_development_dependency "rspec", "~> 3.0"
29
+ spec.add_development_dependency "webmock"
32
30
  end
metadata CHANGED
@@ -1,37 +1,37 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: purl_fetcher-client
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Beer
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-01-13 00:00:00.000000000 Z
11
+ date: 2024-04-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: http
14
+ name: faraday
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '2.1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '2.1'
27
27
  - !ruby/object:Gem::Dependency
28
- name: nokogiri
28
+ name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
- type: :runtime
34
+ type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
@@ -39,13 +39,13 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: stanford-mods
42
+ name: debug
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
- type: :runtime
48
+ type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
@@ -53,13 +53,13 @@ dependencies:
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: dor-rights-auth
56
+ name: rake
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
- type: :runtime
62
+ type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
@@ -67,35 +67,21 @@ dependencies:
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
- name: mods_display
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- version: 1.0.0.alpha1
76
- type: :runtime
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - ">="
81
- - !ruby/object:Gem::Version
82
- version: 1.0.0.alpha1
83
- - !ruby/object:Gem::Dependency
84
- name: bundler
70
+ name: rspec
85
71
  requirement: !ruby/object:Gem::Requirement
86
72
  requirements:
87
- - - ">="
73
+ - - "~>"
88
74
  - !ruby/object:Gem::Version
89
- version: '0'
75
+ version: '3.0'
90
76
  type: :development
91
77
  prerelease: false
92
78
  version_requirements: !ruby/object:Gem::Requirement
93
79
  requirements:
94
- - - ">="
80
+ - - "~>"
95
81
  - !ruby/object:Gem::Version
96
- version: '0'
82
+ version: '3.0'
97
83
  - !ruby/object:Gem::Dependency
98
- name: rake
84
+ name: webmock
99
85
  requirement: !ruby/object:Gem::Requirement
100
86
  requirements:
101
87
  - - ">="
@@ -108,20 +94,6 @@ dependencies:
108
94
  - - ">="
109
95
  - !ruby/object:Gem::Version
110
96
  version: '0'
111
- - !ruby/object:Gem::Dependency
112
- name: rspec
113
- requirement: !ruby/object:Gem::Requirement
114
- requirements:
115
- - - "~>"
116
- - !ruby/object:Gem::Version
117
- version: '3.0'
118
- type: :development
119
- prerelease: false
120
- version_requirements: !ruby/object:Gem::Requirement
121
- requirements:
122
- - - "~>"
123
- - !ruby/object:Gem::Version
124
- version: '3.0'
125
97
  description:
126
98
  email:
127
99
  - cabeer@stanford.edu
@@ -139,8 +111,6 @@ files:
139
111
  - bin/console
140
112
  - bin/setup
141
113
  - lib/purl_fetcher/client.rb
142
- - lib/purl_fetcher/client/deletes_reader.rb
143
- - lib/purl_fetcher/client/public_xml_record.rb
144
114
  - lib/purl_fetcher/client/reader.rb
145
115
  - lib/purl_fetcher/client/version.rb
146
116
  - purl_fetcher-client.gemspec
@@ -162,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
162
132
  - !ruby/object:Gem::Version
163
133
  version: '0'
164
134
  requirements: []
165
- rubygems_version: 3.2.32
135
+ rubygems_version: 3.4.19
166
136
  signing_key:
167
137
  specification_version: 4
168
138
  summary: Traject-compatible reader implementation for streaming data from purl-fetcher
@@ -1,21 +0,0 @@
1
- class PurlFetcher::Client::DeletesReader < PurlFetcher::Client::Reader
2
- # Enumerate objects that should be deleted.
3
- def each
4
- return to_enum(:each) unless block_given?
5
-
6
- deletes(first_modified: first_modified).each do |change|
7
-
8
- public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
9
-
10
- yield public_xml, change, self
11
- end
12
-
13
- changes(first_modified: first_modified, target: target).each do |change|
14
- public_xml = PurlFetcher::Client::PublicXmlRecord.new(change['druid'].sub('druid:', ''), settings)
15
-
16
- next unless target.nil? || (change['false_targets'] && change['false_targets'].include?(target)) || (settings['skip_if_catkey'] && record.catkey)
17
-
18
- yield public_xml, change, self
19
- end
20
- end
21
- end
@@ -1,205 +0,0 @@
1
- require 'nokogiri'
2
- require 'stanford-mods'
3
- require 'mods_display'
4
- require 'dor/rights_auth'
5
-
6
- module PurlFetcher::Client
7
- class PublicXmlRecord
8
- attr_reader :druid, :options
9
-
10
- def self.fetch(url)
11
- if defined?(JRUBY_VERSION)
12
- response = Manticore.get(url)
13
- response.body if response.code == 200
14
- else
15
- response = HTTP.get(url)
16
- response.body if response.status.ok?
17
- end
18
- end
19
-
20
- def initialize(druid, options = {})
21
- @druid = druid
22
- @options = options
23
- end
24
-
25
- def searchworks_id
26
- catkey.nil? ? druid : catkey
27
- end
28
-
29
- # @return catkey value from the DOR identity_metadata, or nil if there is no catkey
30
- def catkey
31
- get_value(public_xml_doc.xpath("/publicObject/identityMetadata/otherId[@name='catkey']"))
32
- end
33
-
34
- # @return objectLabel value from the DOR identity_metadata, or nil if there is no barcode
35
- def label
36
- get_value(public_xml_doc.xpath('/publicObject/identityMetadata/objectLabel'))
37
- end
38
-
39
- def get_value(node)
40
- (node && node.first) ? node.first.content : nil
41
- end
42
-
43
- def stanford_mods
44
- @smods_rec ||= Stanford::Mods::Record.new.tap do |smods_rec|
45
- smods_rec.from_str(mods.to_s)
46
- end
47
- end
48
-
49
- def mods_display
50
- @mods_display ||= ModsDisplay::HTML.new(stanford_mods)
51
- end
52
-
53
- def public_xml
54
- @public_xml ||= self.class.fetch(purl_base_url + "/#{druid}.xml")
55
- end
56
-
57
- def public_xml?
58
- !!public_xml
59
- end
60
-
61
- def public_xml_doc
62
- @public_xml_doc ||= Nokogiri::XML(public_xml)
63
- end
64
-
65
- def mods
66
- @mods ||= if public_xml_doc.xpath('/publicObject/mods:mods', mods: 'http://www.loc.gov/mods/v3').any?
67
- public_xml_doc.xpath('/publicObject/mods:mods', mods: 'http://www.loc.gov/mods/v3').first
68
- else
69
- if defined?(Honeybadger)
70
- Honeybadger.notify(
71
- 'Unable to find MODS in the public xml; falling back to stand-along mods document',
72
- context: { druid: druid }
73
- )
74
- end
75
-
76
- Nokogiri::XML(self.class.fetch(purl_base_url + "/#{druid}.mods"))
77
- end
78
- end
79
-
80
- # @return true if the identityMetadata has <objectType>collection</objectType>, false otherwise
81
- def is_collection
82
- object_type_nodes = public_xml_doc.xpath('//objectType')
83
- object_type_nodes.find_index { |n| %w(collection set).include? n.text.downcase }
84
- end
85
-
86
- # value is used to tell SearchWorks UI app of specific display needs for objects
87
- # this comes from the <thumb> element in publicXML or the first image found (as parsed by discovery-indexer)
88
- # @return [String] filename or nil if none found
89
- def thumb
90
- return if is_collection
91
- encoded_thumb if %w(book image manuscript map webarchive-seed).include?(dor_content_type)
92
- end
93
-
94
- # the value of the type attribute for a DOR object's contentMetadata
95
- # more info about these values is here:
96
- # https://consul.stanford.edu/display/chimera/DOR+content+types%2C+resource+types+and+interpretive+metadata
97
- # https://consul.stanford.edu/display/chimera/Summary+of+Content+Types%2C+Resource+Types+and+their+behaviors
98
- # @return [String]
99
- def dor_content_type
100
- public_xml_doc.xpath('//contentMetadata/@type').text
101
- end
102
-
103
- # the thumbnail in publicXML, falling back to the first image if no thumb node is found
104
- # @return [String] thumb filename with druid prepended, e.g. oo000oo0001/filename withspace.jp2
105
- def parse_thumb
106
- unless public_xml_doc.nil?
107
- thumb = public_xml_doc.xpath('//thumb')
108
- # first try and parse what is in the thumb node of publicXML, but fallback to the first image if needed
109
- if thumb.size == 1
110
- thumb.first.content
111
- elsif thumb.size == 0 && parse_sw_image_ids.size > 0
112
- parse_sw_image_ids.first
113
- else
114
- nil
115
- end
116
- end
117
- end
118
-
119
- # the druid and id attribute of resource/file and objectId and fileId of the
120
- # resource/externalFile elements that match the image, page, or thumb resource type, including extension
121
- # Also, prepends the corresponding druid and / specifically for Searchworks use
122
- # @return [Array<String>] filenames
123
- def parse_sw_image_ids
124
- public_xml_doc.xpath('//resource[@type="page" or @type="image" or @type="thumb"]').map do |node|
125
- node.xpath('./file[@mimetype="image/jp2"]/@id').map{ |x| "#{@druid.gsub('druid:','')}/" + x } << node.xpath('./externalFile[@mimetype="image/jp2"]').map do |y|
126
- "#{y.attributes['objectId'].text.split(':').last}" + "/" + "#{y.attributes['fileId']}"
127
- end
128
- end.flatten
129
- end
130
-
131
- def collections
132
- @collections ||= predicate_druids('isMemberOfCollection').map do |druid|
133
- PublicXmlRecord.new(druid, options)
134
- end
135
- end
136
-
137
- def constituents
138
- @constituents ||= predicate_druids('isConstituentOf').map do |druid|
139
- PublicXmlRecord.new(druid, options)
140
- end
141
- end
142
-
143
- def items(&block)
144
- return [] unless is_collection
145
-
146
- purl_fetcher_client.collection_members(druid, &block)
147
- end
148
-
149
- # the thumbnail in publicXML properly URI encoded, including the slash separator
150
- # @return [String] thumb filename with druid prepended, e.g. oo000oo0001%2Ffilename%20withspace.jp2
151
- def encoded_thumb
152
- thumb=parse_thumb
153
- return unless thumb
154
- thumb_druid=thumb.split('/').first # the druid (before the first slash)
155
- thumb_filename=thumb.split(/[a-zA-Z]{2}[0-9]{3}[a-zA-Z]{2}[0-9]{4}[\/]/).last # everything after the druid
156
- "#{thumb_druid}%2F#{ERB::Util.url_encode(thumb_filename)}"
157
- end
158
-
159
- # get the druids from predicate relationships in rels-ext from public_xml
160
- # @return [Array<String>, nil] the druids (e.g. ww123yy1234) from the rdf:resource of the predicate relationships, or nil if none
161
- def predicate_druids(predicate, predicate_ns = 'info:fedora/fedora-system:def/relations-external#')
162
- ns_hash = { 'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'pred_ns' => predicate_ns }
163
- xpth = "/publicObject/rdf:RDF/rdf:Description/pred_ns:#{predicate}/@rdf:resource"
164
- pred_nodes = public_xml_doc.xpath(xpth, ns_hash)
165
- pred_nodes.reject { |n| n.value.empty? }.map do |n|
166
- n.value.split('druid:').last
167
- end
168
- end
169
-
170
- def druid_tree
171
- druid.match(/(..)(...)(..)(....)/).captures.join('/')
172
- end
173
-
174
- def rights_xml
175
- @rights_xml ||= public_xml_doc.xpath('//rightsMetadata').to_s
176
- end
177
-
178
- def rights
179
- @rights ||= ::Dor::RightsAuth.parse(rights_xml)
180
- end
181
-
182
- def public?
183
- rights.world_unrestricted?
184
- end
185
-
186
- def stanford_only?
187
- rights.stanford_only_unrestricted?
188
- end
189
-
190
- def purl_base_url
191
- options[:purl_url]&.sub(%r{/$}, '') || 'https://purl.stanford.edu'
192
- end
193
-
194
- def purl_fetcher_api_endpoint
195
- options[:purl_fetcher_url] || 'https://purl-fetcher.stanford.edu'
196
- end
197
-
198
- def purl_fetcher_client
199
- @purl_fetcher_client ||= PurlFetcher::Client::Reader.new(
200
- nil,
201
- 'purl_fetcher.api_endpoint' => purl_fetcher_api_endpoint
202
- )
203
- end
204
- end
205
- end