preservation-client 7.3.0 → 7.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 164a8df77b290866471041f9da4a8cd1950e54e3caf5df094e4201c997b6c1c7
4
- data.tar.gz: 93d3fd16f175e7ecdee4538b917698c1be483c9d8233b6d8b1a79306b4be3567
3
+ metadata.gz: 4d817dbb112d807e3b070711015fef2a49f7b750152a2295be3035ae50656ee6
4
+ data.tar.gz: d68da61d268e666137004c72ce3418d9a3e46694e82007dc540819b1456baa35
5
5
  SHA512:
6
- metadata.gz: ad80fe3e37727f57c9e28ab83cac8f058013a9764cce950ab46a535eac845cffd7a5e9b6fcb8149849d87810d319aed4305a1661f9ad29760c5d2d47f4b7d56f
7
- data.tar.gz: c7e0a0cef3816863d13b3019ccd21f9465e07236a14f058cf748ddfaa19a51f08b3d0f39fb7a3024025495d961b16bd85400e2566ad03b09efc4b52b3ebaee11
6
+ metadata.gz: 1a10922191104b4055c087b00209e4fe8c03608fa821349e3c50d2f8ec14e403c072d6347efdf038d854f517254511906bcf6ed688d89d073fa3daa072960f03
7
+ data.tar.gz: 87a123f701eca1f7e7791c42684aa291e0801a1a9d29c26ea08f683cae8f696e02d2d6535cf2b199e240defd885d298ddbcbae490024728cdebbbf07832c09af
data/.rubocop.yml CHANGED
@@ -45,7 +45,7 @@ RSpec/ExampleLength:
45
45
 
46
46
  # most tests should test a single thing
47
47
  RSpec/MultipleExpectations:
48
- Max: 5 # default 1
48
+ Enabled: false
49
49
 
50
50
  RSpec/MultipleMemoizedHelpers:
51
51
  Enabled: false
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- preservation-client (7.3.0)
4
+ preservation-client (7.4.0)
5
5
  activesupport (>= 4.2)
6
6
  faraday (~> 2.0)
7
7
  moab-versioning (>= 5.0.0, < 7)
data/README.md CHANGED
@@ -87,6 +87,27 @@ Note that the preservation service is behind a firewall.
87
87
  - `client.objects.metadata(druid: 'oo000oo0000', filepath: 'identityMetadata.xml', version: '8')` - returns contents of identityMetadata.xml in version 8 of Moab object
88
88
  - `client.objects.signature_catalog('oo000oo0000')` - returns latest Moab::SignatureCatalog from Moab
89
89
 
90
+ ### Download content files safely to disk
91
+
92
+ - `client.objects.content_to_file(...)` streams content to a tempfile in the destination directory, verifies integrity when requested, and atomically replaces the destination on success.
93
+
94
+ ```ruby
95
+ client.objects.content_to_file(
96
+ druid: 'oo000oo0000',
97
+ filepath: 'my_file.pdf',
98
+ destination_filepath: '/tmp/my_file.pdf',
99
+ version: '1',
100
+ expected_md5: 'ffc0cc90e4215e0a3d822b04a8eab980'
101
+ )
102
+ ```
103
+
104
+ Behavior notes:
105
+
106
+ - Retries transient failures (`ConnectionFailedError`, HTTP 5xx).
107
+ - Does not retry other errors or integrity failures.
108
+ - Raises `Preservation::Client::IntegrityError` on MD5 mismatch.
109
+ - Removes temp files on success and failure and never promotes partial downloads to the destination path.
110
+
90
111
  ### Validate the Moab
91
112
 
92
113
  - `client.objects.validate_moab(druid: 'ooo000oo0000')` - validates that the Moab object, used by preservationWF to ensure we have a valid Moab before replicating to various preservation endpoints
@@ -1,13 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'digest'
4
+ require 'fileutils'
3
5
  require 'moab'
6
+ require 'tempfile'
4
7
 
5
8
  # NOTE: this class makes use of data structures from moab-versioning gem,
6
9
  # but it does NOT directly access any preservation storage roots
7
10
  module Preservation
8
11
  class Client
9
12
  # API calls that are about Preserved Objects
10
- class Objects < VersionedApiService
13
+ class Objects < VersionedApiService # rubocop:disable Metrics/ClassLength
11
14
  # @param [String] druid - with or without prefix: 'druid:bb123cd4567' OR 'bb123cd4567'
12
15
  # @return [Hash] the checksums and filesize for the druid
13
16
  def checksum(druid:)
@@ -60,6 +63,36 @@ module Preservation
60
63
  file(druid, 'content', filepath, version, on_data: on_data)
61
64
  end
62
65
 
66
+ # retrieve a content file from a Moab object and write it to destination atomically
67
+ # @param [String] druid - with or without prefix: 'druid:ab123cd4567' OR 'ab123cd4567'
68
+ # @param [String] filepath - the path of the file relative to the moab content directory
69
+ # @param [String] destination_filepath - absolute or relative path to desired destination file
70
+ # @param [String] version - the version of the file requested (defaults to nil for latest version)
71
+ # @param [String, nil] expected_md5 - optional expected md5 checksum for integrity validation
72
+ # @param [Integer] max_retries - number of retry attempts after the initial attempt
73
+ # @param [Float] delay_seconds - base delay for retry backoff
74
+ # @raise [Preservation::Client::IntegrityError] if the expected_md5 is provided and does not match the actual md5
75
+ # @raise [Preservation::Client::NotFoundError] if the specified file is not found
76
+ # @raise [Preservation::Client::Error] for other errors encountered during download
77
+ def content_to_file(druid:, filepath:, destination_filepath:, version: nil, expected_md5: nil, # rubocop:disable Metrics/ParameterLists
78
+ max_retries: 3, delay_seconds: 0.5)
79
+ with_retries(max_retries: max_retries, delay_seconds: delay_seconds) do
80
+ temp_filepath = nil
81
+
82
+ begin
83
+ temp_filepath = download_to_tempfile(druid: druid, filepath: filepath,
84
+ destination_filepath: destination_filepath,
85
+ version: version)
86
+ verify_md5!(filepath: temp_filepath, expected_md5: expected_md5) if expected_md5
87
+
88
+ File.rename(temp_filepath, destination_filepath)
89
+ temp_filepath = nil
90
+ ensure
91
+ cleanup_tempfile(temp_filepath)
92
+ end
93
+ end
94
+ end
95
+
63
96
  # retrieve a manifest file from a Moab object
64
97
  # @param [String] druid - with or without prefix: 'druid:ab123cd4567' OR 'ab123cd4567'
65
98
  # @param [String] filepath - the path of the file relative to the moab manifest directory
@@ -104,6 +137,71 @@ module Preservation
104
137
  def file(druid, category, filepath, version, on_data: nil)
105
138
  get("objects/#{druid}/file", { category: category, filepath: filepath, version: version }, on_data: on_data)
106
139
  end
140
+
141
+ def with_retries(max_retries:, delay_seconds:)
142
+ attempt = 0
143
+
144
+ begin
145
+ yield
146
+ rescue StandardError => e
147
+ raise if !retryable_error?(e) || attempt >= max_retries
148
+
149
+ sleep_seconds = delay_seconds.to_f * (attempt + 1)
150
+ sleep(sleep_seconds) unless sleep_seconds.nil?
151
+ attempt += 1
152
+ retry
153
+ end
154
+ end
155
+
156
+ def download_to_tempfile(druid:, filepath:, destination_filepath:, version: nil)
157
+ destination_dir = File.dirname(destination_filepath)
158
+ FileUtils.mkdir_p(destination_dir)
159
+
160
+ tempfile = Tempfile.create(['preservation-client-', '.tmp'], destination_dir)
161
+ tempfile.binmode
162
+ temp_filepath = tempfile.path
163
+
164
+ begin
165
+ content(druid: druid, filepath: filepath, version: version,
166
+ on_data: proc do |chunk, _size, _env|
167
+ tempfile.write(chunk)
168
+ end)
169
+ tempfile.flush
170
+ tempfile.fsync
171
+ rescue StandardError
172
+ cleanup_tempfile(temp_filepath)
173
+ raise
174
+ ensure
175
+ tempfile.close
176
+ end
177
+
178
+ temp_filepath
179
+ end
180
+
181
+ def verify_md5!(filepath:, expected_md5:)
182
+ actual_md5 = Digest::MD5.file(filepath).hexdigest
183
+ return if actual_md5.casecmp?(expected_md5)
184
+
185
+ raise IntegrityError,
186
+ "Downloaded file md5 mismatch for #{filepath}: expected #{expected_md5}, got #{actual_md5}"
187
+ end
188
+
189
+ def retryable_error?(error)
190
+ return true if error.is_a?(ConnectionFailedError)
191
+
192
+ return true if error.is_a?(Error) && (500..599).cover?(error.status)
193
+
194
+ false
195
+ end
196
+
197
+ def cleanup_tempfile(path)
198
+ return if path.nil?
199
+ return unless File.exist?(path)
200
+
201
+ File.delete(path)
202
+ rescue Errno::ENOENT
203
+ nil
204
+ end
107
205
  end
108
206
  end
109
207
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Preservation
4
4
  class Client
5
- VERSION = '7.3.0'
5
+ VERSION = '7.4.0'
6
6
  end
7
7
  end
@@ -47,7 +47,7 @@ module Preservation
47
47
  if env.status >= 300
48
48
  errmsg = "Preservation::Client.#{caller_locations.first.label} " \
49
49
  "got #{env.status} from Preservation at #{req_url}"
50
- raise http_exception_class(env.status), errmsg
50
+ raise http_exception_class(env.status).new(errmsg, status: env.status)
51
51
  end
52
52
  on_data.call(chunk, size, env)
53
53
  end
@@ -103,7 +103,7 @@ module Preservation
103
103
  rescue Faraday::Error => e
104
104
  errmsg = "Preservation::Client.#{caller_locations.first.label} " \
105
105
  "got #{e.response[:status]} from Preservation at #{req_url}: #{e.response[:body]}"
106
- raise http_exception_class(e.response[:status]), errmsg
106
+ raise http_exception_class(e.response[:status]).new(errmsg, status: e.response[:status])
107
107
  end
108
108
 
109
109
  # @param status_code [Integer] the HTTP status code to translate to an exception class
@@ -15,7 +15,15 @@ loader.setup
15
15
  module Preservation
16
16
  # REST API client wrapper for PreservationCatalog with error handling
17
17
  class Client
18
- class Error < StandardError; end
18
+ # Base error class for preservation-client errors
19
+ class Error < StandardError
20
+ attr_reader :status
21
+
22
+ def initialize(message = nil, status: nil)
23
+ super(message)
24
+ @status = status
25
+ end
26
+ end
19
27
 
20
28
  # Error raised when server returns 404 Not Found
21
29
  class NotFoundError < Error; end
@@ -34,6 +42,9 @@ module Preservation
34
42
  # timeouts
35
43
  class ConnectionFailedError < Error; end
36
44
 
45
+ # Error raised when downloaded file integrity verification fails
46
+ class IntegrityError < Error; end
47
+
37
48
  Object = Struct.new('Object', :druid, :current_version, :ok_on_local_storage) do
38
49
  def ok_on_local_storage?
39
50
  ok_on_local_storage
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: preservation-client
3
3
  version: !ruby/object:Gem::Version
4
- version: 7.3.0
4
+ version: 7.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naomi Dushay