preservation-client 7.3.0 → 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 164a8df77b290866471041f9da4a8cd1950e54e3caf5df094e4201c997b6c1c7
4
- data.tar.gz: 93d3fd16f175e7ecdee4538b917698c1be483c9d8233b6d8b1a79306b4be3567
3
+ metadata.gz: c187f939eb6d782795bb422308b7417f024db4c4e9d4d36e0685d7060c4150a0
4
+ data.tar.gz: 9c3f855e31677b447712ba3c35d67e0ab1127b83a469fc17664be82ac0b28f70
5
5
  SHA512:
6
- metadata.gz: ad80fe3e37727f57c9e28ab83cac8f058013a9764cce950ab46a535eac845cffd7a5e9b6fcb8149849d87810d319aed4305a1661f9ad29760c5d2d47f4b7d56f
7
- data.tar.gz: c7e0a0cef3816863d13b3019ccd21f9465e07236a14f058cf748ddfaa19a51f08b3d0f39fb7a3024025495d961b16bd85400e2566ad03b09efc4b52b3ebaee11
6
+ metadata.gz: 9800102447b72c245667cd69cc90c97878a11a28663cb56012d21678355cc4e4e90dccc6e98a8481cab53ec64507e66a5968ef63d01b083b10959e79424bb54b
7
+ data.tar.gz: ab270e3e62cba84ed0e560b63ab392fd48c07854b35f37abb0e0251d471d78e48df5a7cf3d8a78fb93c06a7450ea1656d99624ac1ff283e799a5f18e410c7dec
data/.circleci/config.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  version: 2.1
2
2
  orbs:
3
- ruby-rails: sul-dlss/ruby-rails@4.8.0
3
+ ruby-rails: sul-dlss/ruby-rails@4.12.0
4
4
  workflows:
5
5
  build:
6
6
  jobs:
data/.rubocop.yml CHANGED
@@ -45,7 +45,7 @@ RSpec/ExampleLength:
45
45
 
46
46
  # most tests should test a single thing
47
47
  RSpec/MultipleExpectations:
48
- Max: 5 # default 1
48
+ Enabled: false
49
49
 
50
50
  RSpec/MultipleMemoizedHelpers:
51
51
  Enabled: false
data/Gemfile.lock CHANGED
@@ -1,9 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- preservation-client (7.3.0)
4
+ preservation-client (8.0.0)
5
5
  activesupport (>= 4.2)
6
6
  faraday (~> 2.0)
7
+ faraday-retry (~> 2.0)
7
8
  moab-versioning (>= 5.0.0, < 7)
8
9
  zeitwerk (~> 2.1)
9
10
 
@@ -31,7 +32,7 @@ GEM
31
32
  byebug (13.0.0)
32
33
  reline (>= 0.6.0)
33
34
  coderay (1.1.3)
34
- concurrent-ruby (1.3.6)
35
+ concurrent-ruby (1.3.7)
35
36
  connection_pool (3.0.2)
36
37
  crack (1.0.1)
37
38
  bigdecimal
@@ -40,17 +41,19 @@ GEM
40
41
  docile (1.4.1)
41
42
  drb (2.2.3)
42
43
  druid-tools (3.0.0)
43
- faraday (2.14.2)
44
+ faraday (2.14.3)
44
45
  faraday-net_http (>= 2.0, < 3.5)
45
46
  json
46
47
  logger
47
- faraday-net_http (3.4.2)
48
+ faraday-net_http (3.4.4)
48
49
  net-http (~> 0.5)
50
+ faraday-retry (2.4.0)
51
+ faraday (~> 2.0)
49
52
  hashdiff (1.2.1)
50
- i18n (1.14.8)
53
+ i18n (1.15.2)
51
54
  concurrent-ruby (~> 1.0)
52
55
  io-console (0.8.2)
53
- json (2.19.5)
56
+ json (2.20.0)
54
57
  language_server-protocol (3.17.0.5)
55
58
  lint_roller (1.1.0)
56
59
  logger (1.7.0)
@@ -65,11 +68,11 @@ GEM
65
68
  nokogiri-happymapper
66
69
  net-http (0.9.1)
67
70
  uri (>= 0.11.1)
68
- nokogiri (1.19.3-arm64-darwin)
71
+ nokogiri (1.19.4-arm64-darwin)
69
72
  racc (~> 1.4)
70
- nokogiri (1.19.3-x86_64-darwin)
73
+ nokogiri (1.19.4-x86_64-darwin)
71
74
  racc (~> 1.4)
72
- nokogiri (1.19.3-x86_64-linux-gnu)
75
+ nokogiri (1.19.4-x86_64-linux-gnu)
73
76
  racc (~> 1.4)
74
77
  nokogiri-happymapper (0.10.1)
75
78
  nokogiri (~> 1.5)
@@ -106,7 +109,7 @@ GEM
106
109
  diff-lcs (>= 1.2.0, < 2.0)
107
110
  rspec-support (~> 3.13.0)
108
111
  rspec-support (3.13.7)
109
- rubocop (1.86.2)
112
+ rubocop (1.88.0)
110
113
  json (~> 2.3)
111
114
  language_server-protocol (~> 3.17.0.2)
112
115
  lint_roller (~> 1.1.0)
@@ -123,9 +126,10 @@ GEM
123
126
  rubocop-rake (0.7.1)
124
127
  lint_roller (~> 1.1)
125
128
  rubocop (>= 1.72.1)
126
- rubocop-rspec (3.9.0)
129
+ rubocop-rspec (3.10.2)
127
130
  lint_roller (~> 1.1)
128
- rubocop (~> 1.81)
131
+ regexp_parser (>= 2.0)
132
+ rubocop (~> 1.86, >= 1.86.2)
129
133
  ruby-progressbar (1.13.0)
130
134
  securerandom (0.4.1)
131
135
  simplecov (0.22.0)
@@ -144,7 +148,7 @@ GEM
144
148
  addressable (>= 2.8.0)
145
149
  crack (>= 0.3.2)
146
150
  hashdiff (>= 0.4.0, < 2.0.0)
147
- zeitwerk (2.7.5)
151
+ zeitwerk (2.8.2)
148
152
 
149
153
  PLATFORMS
150
154
  arm64-darwin-23
data/README.md CHANGED
@@ -60,6 +60,11 @@ See https://github.com/sul-dlss/preservation_catalog#api for info on obtaining a
60
60
 
61
61
  Note that the preservation service is behind a firewall.
62
62
 
63
+ ### Retries
64
+ HTTP GET requests will be automatically retried for certain errors. The number of retries (`retries_max`) and interval between retries (`retry_interval`) can be specified as part of the configuration of the client.
65
+
66
+ Note that there is special retry behavior (cleaning up files on failure) for `content_to_file`, but it uses the same configuration.
67
+
63
68
  ## API Coverage
64
69
 
65
70
  - druids may be with or without the "druid:" prefix - 'oo000oo0000' or 'druid:oo000oo0000'
@@ -87,6 +92,27 @@ Note that the preservation service is behind a firewall.
87
92
  - `client.objects.metadata(druid: 'oo000oo0000', filepath: 'identityMetadata.xml', version: '8')` - returns contents of identityMetadata.xml in version 8 of Moab object
88
93
  - `client.objects.signature_catalog('oo000oo0000')` - returns latest Moab::SignatureCatalog from Moab
89
94
 
95
+ ### Download content files safely to disk
96
+
97
+ - `client.objects.content_to_file(...)` streams content to a tempfile in the destination directory, verifies integrity when requested, and atomically replaces the destination on success.
98
+
99
+ ```ruby
100
+ client.objects.content_to_file(
101
+ druid: 'oo000oo0000',
102
+ filepath: 'my_file.pdf',
103
+ destination_filepath: '/tmp/my_file.pdf',
104
+ version: '1',
105
+ expected_md5: 'ffc0cc90e4215e0a3d822b04a8eab980'
106
+ )
107
+ ```
108
+
109
+ Behavior notes:
110
+
111
+ - Retries transient failures (`ConnectionFailedError`, HTTP 5xx).
112
+ - Does not retry other errors or integrity failures.
113
+ - Raises `Preservation::Client::IntegrityError` on MD5 mismatch.
114
+ - Removes temp files on success and failure and never promotes partial downloads to the destination path.
115
+
90
116
  ### Validate the Moab
91
117
 
92
118
  - `client.objects.validate_moab(druid: 'ooo000oo0000')` - validates that the Moab object, used by preservationWF to ensure we have a valid Moab before replicating to various preservation endpoints
@@ -1,13 +1,22 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'digest'
4
+ require 'fileutils'
3
5
  require 'moab'
6
+ require 'tempfile'
4
7
 
5
8
  # NOTE: this class makes use of data structures from moab-versioning gem,
6
9
  # but it does NOT directly access any preservation storage roots
7
10
  module Preservation
8
11
  class Client
9
12
  # API calls that are about Preserved Objects
10
- class Objects < VersionedApiService
13
+ class Objects < VersionedApiService # rubocop:disable Metrics/ClassLength
14
+ def initialize(connection:, streaming_connection:, retry_max:, retry_interval:, api_version: DEFAULT_API_VERSION)
15
+ super(connection: connection, streaming_connection: streaming_connection, api_version: api_version)
16
+ @retry_max = retry_max
17
+ @retry_interval = retry_interval
18
+ end
19
+
11
20
  # @param [String] druid - with or without prefix: 'druid:bb123cd4567' OR 'bb123cd4567'
12
21
  # @return [Hash] the checksums and filesize for the druid
13
22
  def checksum(druid:)
@@ -60,6 +69,36 @@ module Preservation
60
69
  file(druid, 'content', filepath, version, on_data: on_data)
61
70
  end
62
71
 
72
+ # retrieve a content file from a Moab object and write it to destination atomically
73
+ # @param [String] druid - with or without prefix: 'druid:ab123cd4567' OR 'ab123cd4567'
74
+ # @param [String] filepath - the path of the file relative to the moab content directory
75
+ # @param [String] destination_filepath - absolute or relative path to desired destination file
76
+ # @param [String] version - the version of the file requested (defaults to nil for latest version)
77
+ # @param [String, nil] expected_md5 - optional expected md5 checksum for integrity validation
78
+ # @param [Integer] max - number of retry attempts after the initial attempt
79
+ # @param [Float] interval - base delay in seconds for exponential retry backoff
80
+ # @raise [Preservation::Client::IntegrityError] if the expected_md5 is provided and does not match the actual md5
81
+ # @raise [Preservation::Client::NotFoundError] if the specified file is not found
82
+ # @raise [Preservation::Client::Error] for other errors encountered during download
83
+ def content_to_file(druid:, filepath:, destination_filepath:, version: nil, expected_md5: nil, # rubocop:disable Metrics/ParameterLists
84
+ max: nil, interval: nil)
85
+ with_retries(max: max || @retry_max, interval: interval || @retry_interval) do
86
+ temp_filepath = nil
87
+
88
+ begin
89
+ temp_filepath = download_to_tempfile(druid: druid, filepath: filepath,
90
+ destination_filepath: destination_filepath,
91
+ version: version)
92
+ verify_md5!(filepath: temp_filepath, expected_md5: expected_md5) if expected_md5
93
+
94
+ File.rename(temp_filepath, destination_filepath)
95
+ temp_filepath = nil
96
+ ensure
97
+ cleanup_tempfile(temp_filepath)
98
+ end
99
+ end
100
+ end
101
+
63
102
  # retrieve a manifest file from a Moab object
64
103
  # @param [String] druid - with or without prefix: 'druid:ab123cd4567' OR 'ab123cd4567'
65
104
  # @param [String] filepath - the path of the file relative to the moab manifest directory
@@ -104,6 +143,70 @@ module Preservation
104
143
  def file(druid, category, filepath, version, on_data: nil)
105
144
  get("objects/#{druid}/file", { category: category, filepath: filepath, version: version }, on_data: on_data)
106
145
  end
146
+
147
+ def with_retries(max:, interval:)
148
+ attempt = 0
149
+
150
+ begin
151
+ yield
152
+ rescue StandardError => e
153
+ raise if !retryable_error?(e) || attempt >= max
154
+
155
+ sleep(interval.to_f * (Client::RETRY_BACKOFF_FACTOR**attempt))
156
+ attempt += 1
157
+ retry
158
+ end
159
+ end
160
+
161
+ def download_to_tempfile(druid:, filepath:, destination_filepath:, version: nil)
162
+ destination_dir = File.dirname(destination_filepath)
163
+ FileUtils.mkdir_p(destination_dir)
164
+
165
+ tempfile = Tempfile.create(['preservation-client-', '.tmp'], destination_dir)
166
+ tempfile.binmode
167
+ temp_filepath = tempfile.path
168
+
169
+ begin
170
+ content(druid: druid, filepath: filepath, version: version,
171
+ on_data: proc do |chunk, _size, _env|
172
+ tempfile.write(chunk)
173
+ end)
174
+ tempfile.flush
175
+ tempfile.fsync
176
+ rescue StandardError
177
+ cleanup_tempfile(temp_filepath)
178
+ raise
179
+ ensure
180
+ tempfile.close
181
+ end
182
+
183
+ temp_filepath
184
+ end
185
+
186
+ def verify_md5!(filepath:, expected_md5:)
187
+ actual_md5 = Digest::MD5.file(filepath).hexdigest
188
+ return if actual_md5.casecmp?(expected_md5)
189
+
190
+ raise IntegrityError,
191
+ "Downloaded file md5 mismatch for #{filepath}: expected #{expected_md5}, got #{actual_md5}"
192
+ end
193
+
194
+ def retryable_error?(error)
195
+ return true if error.is_a?(ConnectionFailedError)
196
+
197
+ return true if error.is_a?(Error) && (500..599).cover?(error.status)
198
+
199
+ false
200
+ end
201
+
202
+ def cleanup_tempfile(path)
203
+ return if path.nil?
204
+ return unless File.exist?(path)
205
+
206
+ File.delete(path)
207
+ rescue Errno::ENOENT
208
+ nil
209
+ end
107
210
  end
108
211
  end
109
212
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Preservation
4
4
  class Client
5
- VERSION = '7.3.0'
5
+ VERSION = '8.0.0'
6
6
  end
7
7
  end
@@ -4,14 +4,15 @@ module Preservation
4
4
  class Client
5
5
  # @abstract API calls to a versioned endpoint
6
6
  class VersionedApiService
7
- def initialize(connection:, api_version: DEFAULT_API_VERSION)
7
+ def initialize(connection:, api_version: DEFAULT_API_VERSION, streaming_connection: nil)
8
8
  @connection = connection
9
9
  @api_version = api_version
10
+ @streaming_connection = streaming_connection
10
11
  end
11
12
 
12
13
  private
13
14
 
14
- attr_reader :connection, :api_version
15
+ attr_reader :connection, :api_version, :streaming_connection
15
16
 
16
17
  # @param path [String] path to be appended to connection url (no leading slash)
17
18
  def get_json(path, object_id)
@@ -42,12 +43,12 @@ module Preservation
42
43
  return http_response(:get, path, params) unless on_data
43
44
 
44
45
  req_url = "#{api_version}/#{path}"
45
- connection.get("#{api_version}/#{path}", params) do |req|
46
+ (streaming_connection || connection).get("#{api_version}/#{path}", params) do |req|
46
47
  req.options.on_data = proc do |chunk, size, env|
47
48
  if env.status >= 300
48
49
  errmsg = "Preservation::Client.#{caller_locations.first.label} " \
49
50
  "got #{env.status} from Preservation at #{req_url}"
50
- raise http_exception_class(env.status), errmsg
51
+ raise http_exception_class(env.status).new(errmsg, status: env.status)
51
52
  end
52
53
  on_data.call(chunk, size, env)
53
54
  end
@@ -103,7 +104,7 @@ module Preservation
103
104
  rescue Faraday::Error => e
104
105
  errmsg = "Preservation::Client.#{caller_locations.first.label} " \
105
106
  "got #{e.response[:status]} from Preservation at #{req_url}: #{e.response[:body]}"
106
- raise http_exception_class(e.response[:status]), errmsg
107
+ raise http_exception_class(e.response[:status]).new(errmsg, status: e.response[:status])
107
108
  end
108
109
 
109
110
  # @param status_code [Integer] the HTTP status code to translate to an exception class
@@ -4,6 +4,7 @@ require 'active_support/core_ext/hash/indifferent_access'
4
4
  require 'active_support/core_ext/module/delegation'
5
5
  require 'active_support/core_ext/object/blank'
6
6
  require 'faraday'
7
+ require 'faraday/retry'
7
8
  require 'singleton'
8
9
  require 'zeitwerk'
9
10
 
@@ -15,7 +16,15 @@ loader.setup
15
16
  module Preservation
16
17
  # REST API client wrapper for PreservationCatalog with error handling
17
18
  class Client
18
- class Error < StandardError; end
19
+ # Base error class for preservation-client errors
20
+ class Error < StandardError
21
+ attr_reader :status
22
+
23
+ def initialize(message = nil, status: nil)
24
+ super(message)
25
+ @status = status
26
+ end
27
+ end
19
28
 
20
29
  # Error raised when server returns 404 Not Found
21
30
  class NotFoundError < Error; end
@@ -34,6 +43,9 @@ module Preservation
34
43
  # timeouts
35
44
  class ConnectionFailedError < Error; end
36
45
 
46
+ # Error raised when downloaded file integrity verification fails
47
+ class IntegrityError < Error; end
48
+
37
49
  Object = Struct.new('Object', :druid, :current_version, :ok_on_local_storage) do
38
50
  def ok_on_local_storage?
39
51
  ok_on_local_storage
@@ -42,13 +54,18 @@ module Preservation
42
54
 
43
55
  DEFAULT_API_VERSION = 'v1'
44
56
  DEFAULT_TIMEOUT = 300
57
+ DEFAULT_RETRY_MAX = 3
58
+ DEFAULT_RETRY_INTERVAL = 0.5
59
+ RETRY_BACKOFF_FACTOR = 2
45
60
  TOKEN_HEADER = 'Authorization'
46
61
 
47
62
  include Singleton
48
63
 
49
64
  # @return [Preservation::Client::Objects] an instance of the `Client::Objects` class
50
65
  def objects
51
- @objects ||= Objects.new(connection: connection, api_version: DEFAULT_API_VERSION)
66
+ @objects ||= Objects.new(connection: connection, streaming_connection: streaming_connection,
67
+ retry_max: retry_max, retry_interval: retry_interval,
68
+ api_version: DEFAULT_API_VERSION)
52
69
  end
53
70
 
54
71
  # @return [Preservation::Client::Catalog] an instance of the `Client::Catalog` class
@@ -60,13 +77,19 @@ module Preservation
60
77
  # @param [String] url the endpoint URL
61
78
  # @param [String] token a bearer token for HTTP authentication
62
79
  # @param [Integer] read_timeout the value in seconds of the read timeout
63
- def configure(url:, token:, read_timeout: DEFAULT_TIMEOUT)
80
+ # @param [Integer] retry_max number of retry attempts for GET requests
81
+ # @param [Float] retry_interval base delay in seconds between retries (exponential backoff)
82
+ def configure(url:, token:, read_timeout: DEFAULT_TIMEOUT,
83
+ retry_max: DEFAULT_RETRY_MAX, retry_interval: DEFAULT_RETRY_INTERVAL)
64
84
  instance.url = url
65
85
  instance.token = token
66
86
  instance.read_timeout = read_timeout
87
+ instance.retry_max = retry_max
88
+ instance.retry_interval = retry_interval
67
89
 
68
- # Force connection to be re-established when `.configure` is called
90
+ # Force connections to be re-established when `.configure` is called
69
91
  instance.connection = nil
92
+ instance.streaming_connection = nil
70
93
 
71
94
  self
72
95
  end
@@ -74,7 +97,7 @@ module Preservation
74
97
  delegate :objects, :update, to: :instance
75
98
  end
76
99
 
77
- attr_writer :connection, :read_timeout, :token, :url
100
+ attr_writer :connection, :read_timeout, :retry_interval, :retry_max, :streaming_connection, :token, :url
78
101
 
79
102
  delegate :update, to: :catalog
80
103
 
@@ -92,11 +115,35 @@ module Preservation
92
115
  @read_timeout || raise(Error, 'read timeout has not been configured')
93
116
  end
94
117
 
118
+ def retry_max
119
+ @retry_max || raise(Error, 'retry_max has not been configured')
120
+ end
121
+
122
+ def retry_interval
123
+ @retry_interval || raise(Error, 'retry_interval has not been configured')
124
+ end
125
+
95
126
  def connection
96
- @connection ||= Faraday.new(url, request: { read_timeout: read_timeout }) do |builder|
127
+ @connection ||= build_connection(with_retry: true)
128
+ end
129
+
130
+ def streaming_connection
131
+ @streaming_connection ||= build_connection(with_retry: false)
132
+ end
133
+
134
+ def build_connection(with_retry: true) # rubocop:disable Metrics/AbcSize
135
+ Faraday.new(url, request: { read_timeout: read_timeout }) do |builder|
97
136
  builder.use ErrorFaradayMiddleware
137
+ if with_retry
138
+ builder.request :retry, max: retry_max,
139
+ interval: retry_interval,
140
+ backoff_factor: RETRY_BACKOFF_FACTOR,
141
+ methods: [:get],
142
+ exceptions: Faraday::Retry::Middleware::DEFAULT_EXCEPTIONS +
143
+ [Faraday::ConnectionFailed, Faraday::SSLError, Faraday::ServerError]
144
+ end
98
145
  builder.use Faraday::Request::UrlEncoded
99
- builder.use Faraday::Response::RaiseError # raise exceptions on 40x, 50x responses
146
+ builder.use Faraday::Response::RaiseError
100
147
  builder.adapter Faraday.default_adapter
101
148
  builder.headers[:user_agent] = user_agent
102
149
  builder.headers[TOKEN_HEADER] = "Bearer #{token}"
@@ -31,6 +31,7 @@ Gem::Specification.new do |spec|
31
31
 
32
32
  spec.add_dependency 'activesupport', '>= 4.2'
33
33
  spec.add_dependency 'faraday', '~> 2.0'
34
+ spec.add_dependency 'faraday-retry', '~> 2.0'
34
35
  spec.add_dependency 'moab-versioning', '>= 5.0.0', '< 7'
35
36
  spec.add_dependency 'zeitwerk', '~> 2.1'
36
37
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: preservation-client
3
3
  version: !ruby/object:Gem::Version
4
- version: 7.3.0
4
+ version: 8.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naomi Dushay
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2026-05-18 00:00:00.000000000 Z
10
+ date: 2026-06-29 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: activesupport
@@ -37,6 +37,20 @@ dependencies:
37
37
  - - "~>"
38
38
  - !ruby/object:Gem::Version
39
39
  version: '2.0'
40
+ - !ruby/object:Gem::Dependency
41
+ name: faraday-retry
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '2.0'
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '2.0'
40
54
  - !ruby/object:Gem::Dependency
41
55
  name: moab-versioning
42
56
  requirement: !ruby/object:Gem::Requirement