preservation-client 7.3.0 → 7.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/Gemfile.lock +1 -1
- data/README.md +21 -0
- data/lib/preservation/client/objects.rb +99 -1
- data/lib/preservation/client/version.rb +1 -1
- data/lib/preservation/client/versioned_api_service.rb +2 -2
- data/lib/preservation/client.rb +12 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4d817dbb112d807e3b070711015fef2a49f7b750152a2295be3035ae50656ee6
|
|
4
|
+
data.tar.gz: d68da61d268e666137004c72ce3418d9a3e46694e82007dc540819b1456baa35
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1a10922191104b4055c087b00209e4fe8c03608fa821349e3c50d2f8ec14e403c072d6347efdf038d854f517254511906bcf6ed688d89d073fa3daa072960f03
|
|
7
|
+
data.tar.gz: 87a123f701eca1f7e7791c42684aa291e0801a1a9d29c26ea08f683cae8f696e02d2d6535cf2b199e240defd885d298ddbcbae490024728cdebbbf07832c09af
|
data/.rubocop.yml
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
|
@@ -87,6 +87,27 @@ Note that the preservation service is behind a firewall.
|
|
|
87
87
|
- `client.objects.metadata(druid: 'oo000oo0000', filepath: 'identityMetadata.xml', version: '8')` - returns contents of identityMetadata.xml in version 8 of Moab object
|
|
88
88
|
- `client.objects.signature_catalog('oo000oo0000')` - returns latest Moab::SignatureCatalog from Moab
|
|
89
89
|
|
|
90
|
+
### Download content files safely to disk
|
|
91
|
+
|
|
92
|
+
- `client.objects.content_to_file(...)` streams content to a tempfile in the destination directory, verifies integrity when requested, and atomically replaces the destination on success.
|
|
93
|
+
|
|
94
|
+
```ruby
|
|
95
|
+
client.objects.content_to_file(
|
|
96
|
+
druid: 'oo000oo0000',
|
|
97
|
+
filepath: 'my_file.pdf',
|
|
98
|
+
destination_filepath: '/tmp/my_file.pdf',
|
|
99
|
+
version: '1',
|
|
100
|
+
expected_md5: 'ffc0cc90e4215e0a3d822b04a8eab980'
|
|
101
|
+
)
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
Behavior notes:
|
|
105
|
+
|
|
106
|
+
- Retries transient failures (`ConnectionFailedError`, HTTP 5xx).
|
|
107
|
+
- Does not retry other errors or integrity failures.
|
|
108
|
+
- Raises `Preservation::Client::IntegrityError` on MD5 mismatch.
|
|
109
|
+
- Removes temp files on success and failure and never promotes partial downloads to the destination path.
|
|
110
|
+
|
|
90
111
|
### Validate the Moab
|
|
91
112
|
|
|
92
113
|
- `client.objects.validate_moab(druid: 'ooo000oo0000')` - validates that the Moab object, used by preservationWF to ensure we have a valid Moab before replicating to various preservation endpoints
|
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'digest'
|
|
4
|
+
require 'fileutils'
|
|
3
5
|
require 'moab'
|
|
6
|
+
require 'tempfile'
|
|
4
7
|
|
|
5
8
|
# NOTE: this class makes use of data structures from moab-versioning gem,
|
|
6
9
|
# but it does NOT directly access any preservation storage roots
|
|
7
10
|
module Preservation
|
|
8
11
|
class Client
|
|
9
12
|
# API calls that are about Preserved Objects
|
|
10
|
-
class Objects < VersionedApiService
|
|
13
|
+
class Objects < VersionedApiService # rubocop:disable Metrics/ClassLength
|
|
11
14
|
# @param [String] druid - with or without prefix: 'druid:bb123cd4567' OR 'bb123cd4567'
|
|
12
15
|
# @return [Hash] the checksums and filesize for the druid
|
|
13
16
|
def checksum(druid:)
|
|
@@ -60,6 +63,36 @@ module Preservation
|
|
|
60
63
|
file(druid, 'content', filepath, version, on_data: on_data)
|
|
61
64
|
end
|
|
62
65
|
|
|
66
|
+
# retrieve a content file from a Moab object and write it to destination atomically
|
|
67
|
+
# @param [String] druid - with or without prefix: 'druid:ab123cd4567' OR 'ab123cd4567'
|
|
68
|
+
# @param [String] filepath - the path of the file relative to the moab content directory
|
|
69
|
+
# @param [String] destination_filepath - absolute or relative path to desired destination file
|
|
70
|
+
# @param [String] version - the version of the file requested (defaults to nil for latest version)
|
|
71
|
+
# @param [String, nil] expected_md5 - optional expected md5 checksum for integrity validation
|
|
72
|
+
# @param [Integer] max_retries - number of retry attempts after the initial attempt
|
|
73
|
+
# @param [Float] delay_seconds - base delay for retry backoff
|
|
74
|
+
# @raise [Preservation::Client::IntegrityError] if the expected_md5 is provided and does not match the actual md5
|
|
75
|
+
# @raise [Preservation::Client::NotFoundError] if the specified file is not found
|
|
76
|
+
# @raise [Preservation::Client::Error] for other errors encountered during download
|
|
77
|
+
def content_to_file(druid:, filepath:, destination_filepath:, version: nil, expected_md5: nil, # rubocop:disable Metrics/ParameterLists
|
|
78
|
+
max_retries: 3, delay_seconds: 0.5)
|
|
79
|
+
with_retries(max_retries: max_retries, delay_seconds: delay_seconds) do
|
|
80
|
+
temp_filepath = nil
|
|
81
|
+
|
|
82
|
+
begin
|
|
83
|
+
temp_filepath = download_to_tempfile(druid: druid, filepath: filepath,
|
|
84
|
+
destination_filepath: destination_filepath,
|
|
85
|
+
version: version)
|
|
86
|
+
verify_md5!(filepath: temp_filepath, expected_md5: expected_md5) if expected_md5
|
|
87
|
+
|
|
88
|
+
File.rename(temp_filepath, destination_filepath)
|
|
89
|
+
temp_filepath = nil
|
|
90
|
+
ensure
|
|
91
|
+
cleanup_tempfile(temp_filepath)
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
63
96
|
# retrieve a manifest file from a Moab object
|
|
64
97
|
# @param [String] druid - with or without prefix: 'druid:ab123cd4567' OR 'ab123cd4567'
|
|
65
98
|
# @param [String] filepath - the path of the file relative to the moab manifest directory
|
|
@@ -104,6 +137,71 @@ module Preservation
|
|
|
104
137
|
def file(druid, category, filepath, version, on_data: nil)
|
|
105
138
|
get("objects/#{druid}/file", { category: category, filepath: filepath, version: version }, on_data: on_data)
|
|
106
139
|
end
|
|
140
|
+
|
|
141
|
+
def with_retries(max_retries:, delay_seconds:)
|
|
142
|
+
attempt = 0
|
|
143
|
+
|
|
144
|
+
begin
|
|
145
|
+
yield
|
|
146
|
+
rescue StandardError => e
|
|
147
|
+
raise if !retryable_error?(e) || attempt >= max_retries
|
|
148
|
+
|
|
149
|
+
sleep_seconds = delay_seconds.to_f * (attempt + 1)
|
|
150
|
+
sleep(sleep_seconds) unless sleep_seconds.nil?
|
|
151
|
+
attempt += 1
|
|
152
|
+
retry
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def download_to_tempfile(druid:, filepath:, destination_filepath:, version: nil)
|
|
157
|
+
destination_dir = File.dirname(destination_filepath)
|
|
158
|
+
FileUtils.mkdir_p(destination_dir)
|
|
159
|
+
|
|
160
|
+
tempfile = Tempfile.create(['preservation-client-', '.tmp'], destination_dir)
|
|
161
|
+
tempfile.binmode
|
|
162
|
+
temp_filepath = tempfile.path
|
|
163
|
+
|
|
164
|
+
begin
|
|
165
|
+
content(druid: druid, filepath: filepath, version: version,
|
|
166
|
+
on_data: proc do |chunk, _size, _env|
|
|
167
|
+
tempfile.write(chunk)
|
|
168
|
+
end)
|
|
169
|
+
tempfile.flush
|
|
170
|
+
tempfile.fsync
|
|
171
|
+
rescue StandardError
|
|
172
|
+
cleanup_tempfile(temp_filepath)
|
|
173
|
+
raise
|
|
174
|
+
ensure
|
|
175
|
+
tempfile.close
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
temp_filepath
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def verify_md5!(filepath:, expected_md5:)
|
|
182
|
+
actual_md5 = Digest::MD5.file(filepath).hexdigest
|
|
183
|
+
return if actual_md5.casecmp?(expected_md5)
|
|
184
|
+
|
|
185
|
+
raise IntegrityError,
|
|
186
|
+
"Downloaded file md5 mismatch for #{filepath}: expected #{expected_md5}, got #{actual_md5}"
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def retryable_error?(error)
|
|
190
|
+
return true if error.is_a?(ConnectionFailedError)
|
|
191
|
+
|
|
192
|
+
return true if error.is_a?(Error) && (500..599).cover?(error.status)
|
|
193
|
+
|
|
194
|
+
false
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def cleanup_tempfile(path)
|
|
198
|
+
return if path.nil?
|
|
199
|
+
return unless File.exist?(path)
|
|
200
|
+
|
|
201
|
+
File.delete(path)
|
|
202
|
+
rescue Errno::ENOENT
|
|
203
|
+
nil
|
|
204
|
+
end
|
|
107
205
|
end
|
|
108
206
|
end
|
|
109
207
|
end
|
|
@@ -47,7 +47,7 @@ module Preservation
|
|
|
47
47
|
if env.status >= 300
|
|
48
48
|
errmsg = "Preservation::Client.#{caller_locations.first.label} " \
|
|
49
49
|
"got #{env.status} from Preservation at #{req_url}"
|
|
50
|
-
raise http_exception_class(env.status),
|
|
50
|
+
raise http_exception_class(env.status).new(errmsg, status: env.status)
|
|
51
51
|
end
|
|
52
52
|
on_data.call(chunk, size, env)
|
|
53
53
|
end
|
|
@@ -103,7 +103,7 @@ module Preservation
|
|
|
103
103
|
rescue Faraday::Error => e
|
|
104
104
|
errmsg = "Preservation::Client.#{caller_locations.first.label} " \
|
|
105
105
|
"got #{e.response[:status]} from Preservation at #{req_url}: #{e.response[:body]}"
|
|
106
|
-
raise http_exception_class(e.response[:status]),
|
|
106
|
+
raise http_exception_class(e.response[:status]).new(errmsg, status: e.response[:status])
|
|
107
107
|
end
|
|
108
108
|
|
|
109
109
|
# @param status_code [Integer] the HTTP status code to translate to an exception class
|
data/lib/preservation/client.rb
CHANGED
|
@@ -15,7 +15,15 @@ loader.setup
|
|
|
15
15
|
module Preservation
|
|
16
16
|
# REST API client wrapper for PreservationCatalog with error handling
|
|
17
17
|
class Client
|
|
18
|
-
class
|
|
18
|
+
# Base error class for preservation-client errors
|
|
19
|
+
class Error < StandardError
|
|
20
|
+
attr_reader :status
|
|
21
|
+
|
|
22
|
+
def initialize(message = nil, status: nil)
|
|
23
|
+
super(message)
|
|
24
|
+
@status = status
|
|
25
|
+
end
|
|
26
|
+
end
|
|
19
27
|
|
|
20
28
|
# Error raised when server returns 404 Not Found
|
|
21
29
|
class NotFoundError < Error; end
|
|
@@ -34,6 +42,9 @@ module Preservation
|
|
|
34
42
|
# timeouts
|
|
35
43
|
class ConnectionFailedError < Error; end
|
|
36
44
|
|
|
45
|
+
# Error raised when downloaded file integrity verification fails
|
|
46
|
+
class IntegrityError < Error; end
|
|
47
|
+
|
|
37
48
|
Object = Struct.new('Object', :druid, :current_version, :ok_on_local_storage) do
|
|
38
49
|
def ok_on_local_storage?
|
|
39
50
|
ok_on_local_storage
|