atlas_rb 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7e6020b2b0ed84ff61541cdcc4ac4ef2634a4ee4573540c5e10888cab26ec2df
4
- data.tar.gz: e18077da93ef29644e95cd94bc231bd42497d7bc21e05b5f97cd27df92346a6f
3
+ metadata.gz: decab10cfb0c67c68615a9695a8c5084aa1bd349e199dccd77a76d024a3e7ba3
4
+ data.tar.gz: 90c4db9a3ffd80e9f226334f4862b9982534e431a3a6eb58746bfa96442ee701
5
5
  SHA512:
6
- metadata.gz: '068bedce232453c430a4e55c3f252f24a11b3710bc6870cc3061f9d3ec183c39600a5c814147d26ec054684afa5e3a7bc07ffc7d98a676b63904e5a11fc36343'
7
- data.tar.gz: 546dd7050f56a82cd66f54bc38d0d003cc1099f1b998b64255a06ba31ee54d9fbb84a799d56bc46a635b68dd97bb9b87abbb5a94f09bcf2c539220bbb40278c8
6
+ metadata.gz: 7fc789c2d47882f6d93664de27f7ab2f2a0d7d43957f4244436600e4247a332c5f8b3df030a5b11fb064187bd55439c0417a04974ca321df88861d47e0da2f15
7
+ data.tar.gz: 7189e8bcc3b4d483f623cfdc7a0881f41735646ef085f05a1418436b927fc84c2be514844ea44572c0d2c85f48e057be3a86d5774ac55bf8e09ae3ac4238fc4a
data/.version CHANGED
@@ -1 +1 @@
1
- 1.5.0
1
+ 1.6.0
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- atlas_rb (1.5.0)
4
+ atlas_rb (1.6.0)
5
5
  faraday (~> 2.7)
6
6
  faraday-follow_redirects (~> 0.3.0)
7
7
  faraday-multipart (~> 1)
data/lib/atlas_rb/blob.rb CHANGED
@@ -24,11 +24,15 @@ module AtlasRb
24
24
  # header. Falls through to {AtlasRb.config}.default_on_behalf_of when
25
25
  # omitted.
26
26
  # @return [Hash] the `"blob"` object, already unwrapped — typically
27
- # includes `"id"`, `"original_filename"`, `"size"`, and a download URL.
27
+ # includes `"id"`, `"original_filename"`, `"size"`, `"digest"` (the
28
+ # recorded fixity digest `"sha512:<hex>"`, or `nil` for a Blob with no
29
+ # held bytes — reconciliation compares this against the v1 manifest
30
+ # without re-downloading), and a download URL.
28
31
  #
29
32
  # @example
30
33
  # AtlasRb::Blob.find("b-321")
31
- # # => { "id" => "b-321", "original_filename" => "scan.pdf", ... }
34
+ # # => { "id" => "b-321", "original_filename" => "scan.pdf",
35
+ # # "digest" => "sha512:9f86d0…", ... }
32
36
  def self.find(id, nuid: nil, on_behalf_of: nil)
33
37
  AtlasRb::Mash.new(JSON.parse(
34
38
  connection({}, nuid, on_behalf_of: on_behalf_of).get(ROUTE + id)&.body
@@ -82,33 +86,44 @@ module AtlasRb
82
86
  # @param idempotency_key [String, nil] optional UUID. A repeat call with
83
87
  # the same key returns the originally-created Blob instead of creating
84
88
  # a new one. See {AtlasRb::Work.create} for full semantics.
89
+ # @param expected_digest [String, nil] optional verify-on-ingest checksum,
90
+ # `"<algorithm>:<hexvalue>"` (sha512/sha256/sha1/md5, e.g.
91
+ # `"sha256:abc…"`). Atlas hashes the uploaded bytes **before** persisting
92
+ # and raises {AtlasRb::FixityMismatchError} (HTTP 422) on a mismatch or an
93
+ # unsupported algorithm — nothing is left behind on rejection.
85
94
  # @param nuid [String, nil] optional acting user's NUID. On the relay-signing
86
95
  # path it is signed into the assertion `sub`; on the BYO-JWT (`ATLAS_JWT`)
87
96
  # path it is ignored (identity lives in the token).
88
97
  # @param on_behalf_of [String, nil] optional NUID for the `On-Behalf-Of`
89
98
  # header. Falls through to {AtlasRb.config}.default_on_behalf_of when
90
99
  # omitted.
91
- # @return [Hash] the created `"blob"` payload, including its `"id"`.
100
+ # @return [Hash] the created `"blob"` payload, including its `"id"` and
101
+ # `"digest"` (the recorded fixity digest, `"sha512:<hex>"`).
102
+ # @raise [AtlasRb::FixityMismatchError] if `expected_digest` was supplied and
103
+ # the uploaded bytes did not match (or the algorithm is unsupported).
104
+ #
105
+ # @note Streams the file (FD closed deterministically); a multi-GB upload is
106
+ # not buffered in memory. See {AtlasRb::FaradayHelper#with_file_part}.
92
107
  #
93
108
  # @example
94
109
  # AtlasRb::Blob.create("w-789", "/tmp/upload.tmp", "final_thesis.pdf")
95
110
  # # => { "id" => "b-321", "original_filename" => "final_thesis.pdf", ... }
96
111
  #
97
- # @example Retry-safe bulk-deposit create
112
+ # @example Retry-safe bulk-deposit create with fixity verification
98
113
  # key = SecureRandom.uuid
99
114
  # AtlasRb::Blob.create("w-789", "/tmp/upload.tmp", "thesis.pdf",
100
- # idempotency_key: key)
101
- def self.create(id, blob_path, original_filename, idempotency_key: nil, nuid: nil, on_behalf_of: nil)
102
- payload = { work_id: id,
103
- original_filename: original_filename,
104
- binary: Faraday::Multipart::FilePart.new(File.open(blob_path),
105
- "application/octet-stream",
106
- File.basename(blob_path)) }
115
+ # idempotency_key: key, expected_digest: "sha256:#{sha}")
116
+ def self.create(id, blob_path, original_filename, expected_digest: nil,
117
+ idempotency_key: nil, nuid: nil, on_behalf_of: nil)
118
+ with_file_part(blob_path) do |part|
119
+ payload = { work_id: id, original_filename: original_filename, binary: part }
120
+ payload[:expected_digest] = expected_digest if expected_digest
107
121
 
108
- AtlasRb::Mash.new(JSON.parse(
109
- multipart(nuid, on_behalf_of: on_behalf_of, idempotency_key: idempotency_key)
110
- .post(ROUTE, payload)&.body
111
- ))['blob']
122
+ AtlasRb::Mash.new(JSON.parse(
123
+ multipart(nuid, on_behalf_of: on_behalf_of, idempotency_key: idempotency_key)
124
+ .post(ROUTE, payload)&.body
125
+ ))['blob']
126
+ end
112
127
  end
113
128
 
114
129
  # Delete a Blob (the bytes *and* the metadata record).
@@ -136,23 +151,32 @@ module AtlasRb
136
151
  #
137
152
  # @param id [String] the Blob ID.
138
153
  # @param blob_path [String] path to the replacement binary on disk.
154
+ # @param expected_digest [String, nil] optional verify-on-ingest checksum,
155
+ # `"<algorithm>:<hexvalue>"`. 422 ({AtlasRb::FixityMismatchError}) on mismatch.
139
156
  # @param nuid [String, nil] optional acting user's NUID. On the relay-signing
140
157
  # path it is signed into the assertion `sub`; on the BYO-JWT (`ATLAS_JWT`)
141
158
  # path it is ignored (identity lives in the token).
142
159
  # @param on_behalf_of [String, nil] optional NUID for the `On-Behalf-Of`
143
160
  # header. Falls through to {AtlasRb.config}.default_on_behalf_of when
144
161
  # omitted.
145
- # @return [Hash] the parsed JSON response from the patch.
162
+ # @return [Hash] the parsed JSON response from the patch (the updated
163
+ # `"blob"`, with a refreshed `"digest"` for the new revision).
164
+ # @raise [AtlasRb::FixityMismatchError] if `expected_digest` was supplied and
165
+ # the uploaded bytes did not match (or the algorithm is unsupported).
166
+ #
167
+ # @note Streams the file with the FD closed deterministically — see {.create}.
146
168
  #
147
169
  # @example
148
170
  # AtlasRb::Blob.update("b-321", "/tmp/revised.pdf")
149
- def self.update(id, blob_path, nuid: nil, on_behalf_of: nil)
150
- payload = { binary: Faraday::Multipart::FilePart.new(File.open(blob_path),
151
- "application/octet-stream",
152
- File.basename(blob_path)) }
153
- AtlasRb::Mash.new(JSON.parse(
154
- multipart(nuid, on_behalf_of: on_behalf_of).patch(ROUTE + id, payload)&.body
155
- ))
171
+ def self.update(id, blob_path, expected_digest: nil, nuid: nil, on_behalf_of: nil)
172
+ with_file_part(blob_path) do |part|
173
+ payload = { binary: part }
174
+ payload[:expected_digest] = expected_digest if expected_digest
175
+
176
+ AtlasRb::Mash.new(JSON.parse(
177
+ multipart(nuid, on_behalf_of: on_behalf_of).patch(ROUTE + id, payload)&.body
178
+ ))
179
+ end
156
180
  end
157
181
  end
158
182
  end
@@ -134,6 +134,41 @@ module AtlasRb
134
134
  end
135
135
  end
136
136
 
137
+ # Raised when Atlas rejects a binary upload's verify-on-ingest check with a
138
+ # `422` carrying a fixity discriminator — `fixity_mismatch` (the uploaded
139
+ # bytes don't match the supplied `expected_digest`) or
140
+ # `unsupported_digest_algorithm` (a malformed/unknown `expected_digest`).
141
+ # Fires on `POST /files`, `PATCH /files/:id`, and `PATCH /file_sets/:id`.
142
+ #
143
+ # The upload sibling of {ReparentError} / {LinkedMemberError}; same shape,
144
+ # same rationale — without it the `["blob"]` / `["file_set"]` unwrap would
145
+ # return `nil` on the 422 and discard the signal a migration needs to tell a
146
+ # corrupted transfer from a clean one. Atlas rejects *before* persisting, so
147
+ # nothing is left behind to clean up.
148
+ #
149
+ # rescue AtlasRb::FixityMismatchError => e
150
+ # # e.code == "fixity_mismatch": re-fetch the source, retry, or quarantine
151
+ #
152
+ # @note Authorization failures surface as {ForbiddenError} (HTTP 403).
153
+ class FixityMismatchError < Error
154
+ # @return [String, nil] the machine-readable error code from the envelope
155
+ # (`"fixity_mismatch"` or `"unsupported_digest_algorithm"`).
156
+ attr_reader :code
157
+
158
+ # @return [String, nil] the rejected resource's ID, from the envelope (the
159
+ # FileSet on the attach path; may be nil on `POST /files`).
160
+ attr_reader :resource_id
161
+
162
+ # @param message [String] human-readable rejection description.
163
+ # @param code [String, nil] the envelope's `error` discriminator.
164
+ # @param resource_id [String, nil] the rejected resource's ID.
165
+ def initialize(message, code: nil, resource_id: nil)
166
+ super(message)
167
+ @code = code
168
+ @resource_id = resource_id
169
+ end
170
+ end
171
+
137
172
  # Raised when Atlas refuses a re-parent, linked-member, or Compilation
138
173
  # request with an HTTP `403`, whose envelope is
139
174
  # `{ "error", "action", "subject" }`. Lets callers distinguish "you may
@@ -124,11 +124,38 @@ module AtlasRb
124
124
  headers: headers
125
125
  ) do |f|
126
126
  f.use AtlasRb::Middleware::RaiseOnStaleResource
127
+ # Translate Atlas's verify-on-ingest 422 (fixity_mismatch /
128
+ # unsupported_digest_algorithm) into a typed FixityMismatchError —
129
+ # the JSON-connection path already carries this; uploads need it too.
130
+ f.use AtlasRb::Middleware::RaiseOnResourceError
127
131
  f.request :multipart
128
132
  f.request :url_encoded
129
133
  end
130
134
  end
131
135
 
136
+ # Build a streaming multipart FilePart for `blob_path`, run the request
137
+ # inside the block, and close the underlying File handle deterministically
138
+ # afterward (on success or exception). The handle must stay open *during*
139
+ # the request — Faraday reads it while posting — so it can't be closed
140
+ # before the call; an unclosed handle leaks a descriptor per upload, which
141
+ # exhausts FDs across a TB migration of millions of files.
142
+ #
143
+ # Streaming/memory: faraday-multipart wraps the part in a streaming
144
+ # CompositeReadIO and the default net_http adapter sends it via
145
+ # `request.body_stream` (Content-Length known), so a multi-GB file uploads
146
+ # without being buffered into a String in memory. (Swapping the host app's
147
+ # default Faraday adapter to a buffering one would regress this.)
148
+ #
149
+ # @param blob_path [String] path to the binary on disk.
150
+ # @yieldparam part [Faraday::Multipart::FilePart] the streaming part.
151
+ # @return the block's return value.
152
+ def with_file_part(blob_path)
153
+ File.open(blob_path, "rb") do |io|
154
+ yield Faraday::Multipart::FilePart.new(io, "application/octet-stream",
155
+ File.basename(blob_path))
156
+ end
157
+ end
158
+
132
159
  # Build a Faraday connection authenticated as the Atlas `:system`
133
160
  # fixture for system-context calls (SSO user provisioning, etc.).
134
161
  #
@@ -98,30 +98,52 @@ module AtlasRb
98
98
  # Attach (or replace) the binary content backing this FileSet.
99
99
  #
100
100
  # The body is uploaded as `application/octet-stream` regardless of the
101
- # file's true type — Atlas inspects the content server-side. To upload
102
- # a binary blob *plus* an original filename, use {Blob.create} directly
103
- # against the underlying `/files/` endpoint.
101
+ # file's true type — Atlas inspects the content server-side. This is the
102
+ # ordered/classified-slot attach used after {.create} cuts the slot.
104
103
  #
105
104
  # @param id [String] the FileSet ID.
106
105
  # @param blob_path [String] path to the binary file on disk.
106
+ # @param original_filename [String, nil] the user-facing filename Atlas
107
+ # should record on the resulting Blob (e.g. the v1 `"page-0001.tif"`);
108
+ # preserved separately from the temp `File.basename(blob_path)`.
109
+ # @param expected_digest [String, nil] optional verify-on-ingest checksum,
110
+ # `"<algorithm>:<hexvalue>"`. 422 ({AtlasRb::FixityMismatchError}) on mismatch.
111
+ # @param idempotency_key [String, nil] optional UUID. A repeat call with the
112
+ # same key returns the FileSet with its already-attached Blob **without
113
+ # recopying the bytes** (and 410 if it was tombstoned in the interim). See
114
+ # {AtlasRb::Work.create} for full semantics.
107
115
  # @param nuid [String, nil] optional acting user's NUID. On the relay-signing
108
116
  # path it is signed into the assertion `sub`; on the BYO-JWT (`ATLAS_JWT`)
109
117
  # path it is ignored (identity lives in the token).
110
118
  # @param on_behalf_of [String, nil] optional NUID for the `On-Behalf-Of`
111
119
  # header. Falls through to {AtlasRb.config}.default_on_behalf_of when
112
120
  # omitted.
113
- # @return [Hash] the parsed JSON response from the patch.
121
+ # @return [Hash] the parsed JSON response from the patch (the `"file_set"`).
122
+ # @raise [AtlasRb::FixityMismatchError] if `expected_digest` was supplied and
123
+ # the uploaded bytes did not match (or the algorithm is unsupported).
124
+ #
125
+ # @note Streams the file with the FD closed deterministically — see
126
+ # {Blob.create} / {AtlasRb::FaradayHelper#with_file_part}.
114
127
  #
115
128
  # @example
116
129
  # AtlasRb::FileSet.update("fs-001", "/tmp/article.pdf")
117
- def self.update(id, blob_path, nuid: nil, on_behalf_of: nil)
118
- # Need to figure out blob vs XML
119
- payload = { binary: Faraday::Multipart::FilePart.new(File.open(blob_path),
120
- "application/octet-stream",
121
- File.basename(blob_path)) }
122
- AtlasRb::Mash.new(JSON.parse(
123
- multipart(nuid, on_behalf_of: on_behalf_of).patch(ROUTE + id, payload)&.body
124
- ))
130
+ #
131
+ # @example Resumable, filename-preserving migration attach
132
+ # AtlasRb::FileSet.update(page["id"], "/tmp/p1.tif",
133
+ # original_filename: "page-0001.tif",
134
+ # idempotency_key: key)
135
+ def self.update(id, blob_path, original_filename: nil, expected_digest: nil,
136
+ idempotency_key: nil, nuid: nil, on_behalf_of: nil)
137
+ with_file_part(blob_path) do |part|
138
+ payload = { binary: part }
139
+ payload[:original_filename] = original_filename if original_filename
140
+ payload[:expected_digest] = expected_digest if expected_digest
141
+
142
+ AtlasRb::Mash.new(JSON.parse(
143
+ multipart(nuid, on_behalf_of: on_behalf_of, idempotency_key: idempotency_key)
144
+ .patch(ROUTE + id, payload)&.body
145
+ ))
146
+ end
125
147
  end
126
148
 
127
149
  # Persist the per-page IIIF image-service pointer on a FileSet.
@@ -15,39 +15,53 @@ module AtlasRb
15
15
  # {RaiseOnStaleResource}.
16
16
  #
17
17
  # It is intentionally narrow — it only fires on the re-parent
18
- # (`.../parent`) and linked-member (`.../linked_members...`) write paths
19
- # and the Compilation surface (`/compilations...`), and only on
20
- # `403` / `422` bodies carrying an `error` discriminator.
18
+ # (`.../parent`) and linked-member (`.../linked_members...`) write paths,
19
+ # the Compilation surface (`/compilations...`), and binary uploads
20
+ # (`/files...`, `/file_sets...`), and only on `403` / `422` bodies carrying
21
+ # an `error` discriminator. The upload branch is further gated on a fixity
22
+ # discriminator ({FIXITY_CODES}), so a `422` on those paths with any other
23
+ # `error` (or `403`s on uploads, which stay raw) passes through untouched.
21
24
  # Everything else (other paths, other statuses, a `422` whose body uses a
22
25
  # different discriminator such as `tombstone`'s `code: "has_live_children"`)
23
26
  # passes through untouched, so atlas_rb stays a thin Faraday binding that
24
27
  # translates only the wire signals callers genuinely need to discriminate.
25
28
  #
26
29
  # Mapping:
27
- # - `403` (any covered path) → {AtlasRb::ForbiddenError} (`error`/`action`/`subject`)
30
+ # - `403` on a re-parent/linked/Compilation path → {AtlasRb::ForbiddenError}
28
31
  # - `422` on `.../parent` → {AtlasRb::ReparentError} (`error`/`resource_id`)
29
32
  # - `422` on `.../linked_members...` → {AtlasRb::LinkedMemberError}
30
33
  # - `422` on `/compilations...` → {AtlasRb::CompilationError}
34
+ # - `422` + a fixity discriminator on `/files...` / `/file_sets...` →
35
+ # {AtlasRb::FixityMismatchError}
31
36
  class RaiseOnResourceError < Faraday::Middleware
37
+ # Upload-path `422` discriminators this middleware translates; any other
38
+ # `error` on those paths passes through (Atlas owns these as a wire contract).
39
+ FIXITY_CODES = %w[fixity_mismatch unsupported_digest_algorithm].freeze
40
+
32
41
  # @param env [Faraday::Env] the completed response environment.
33
- # @raise [AtlasRb::ForbiddenError] on a 403 to a covered path.
42
+ # @raise [AtlasRb::ForbiddenError] on a 403 to a re-parent/linked/Compilation path.
34
43
  # @raise [AtlasRb::ReparentError] on a 422 to a re-parent path.
35
44
  # @raise [AtlasRb::LinkedMemberError] on a 422 to a linked-member path.
36
45
  # @raise [AtlasRb::CompilationError] on a 422 to a Compilation path.
46
+ # @raise [AtlasRb::FixityMismatchError] on a 422 + fixity discriminator to an upload path.
37
47
  # @return [void]
38
48
  def on_complete(env)
39
- return unless env.status == 403 || env.status == 422
49
+ return unless [403, 422].include?(env.status)
40
50
 
41
51
  path = env.url&.path.to_s
42
52
  reparent = path.end_with?("/parent")
43
53
  linked = path.include?("/linked_members")
44
54
  compilation = path.start_with?("/compilations")
45
- return unless reparent || linked || compilation
55
+ upload = path.start_with?("/files") || path.start_with?("/file_sets")
56
+ return unless reparent || linked || compilation || upload
46
57
 
47
58
  body = parse_json(env.body)
48
59
  return unless body.is_a?(Hash) && body["error"]
49
60
 
50
61
  if env.status == 403
62
+ # 403s on upload paths stay raw — acting-as/authz isn't an upload concern here.
63
+ return unless reparent || linked || compilation
64
+
51
65
  raise AtlasRb::ForbiddenError.new(
52
66
  body["message"] || "Atlas refused the request",
53
67
  code: body["error"],
@@ -66,12 +80,18 @@ module AtlasRb
66
80
  code: body["error"],
67
81
  resource_id: body["resource_id"]
68
82
  )
69
- else
83
+ elsif compilation
70
84
  raise AtlasRb::CompilationError.new(
71
85
  body["message"] || "Atlas rejected the compilation write",
72
86
  code: body["error"],
73
87
  resource_id: body["resource_id"]
74
88
  )
89
+ elsif FIXITY_CODES.include?(body["error"])
90
+ raise AtlasRb::FixityMismatchError.new(
91
+ body["message"] || "Atlas rejected the upload (fixity)",
92
+ code: body["error"],
93
+ resource_id: body["resource_id"]
94
+ )
75
95
  end
76
96
  end
77
97
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: atlas_rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.0
4
+ version: 1.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Cliff
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-06-15 00:00:00.000000000 Z
11
+ date: 2026-06-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday