@de-otio/trellis 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/dist/env.d.ts +168 -0
  2. package/dist/env.d.ts.map +1 -1
  3. package/dist/env.js +155 -0
  4. package/dist/env.js.map +1 -1
  5. package/dist/lambda/media-completion-worker.d.ts +175 -0
  6. package/dist/lambda/media-completion-worker.d.ts.map +1 -0
  7. package/dist/lambda/media-completion-worker.js +373 -0
  8. package/dist/lambda/media-completion-worker.js.map +1 -0
  9. package/dist/lambda/media-processing-worker.d.ts +172 -1
  10. package/dist/lambda/media-processing-worker.d.ts.map +1 -1
  11. package/dist/lambda/media-processing-worker.js +343 -49
  12. package/dist/lambda/media-processing-worker.js.map +1 -1
  13. package/dist/lib/exif-stripper.d.ts +37 -22
  14. package/dist/lib/exif-stripper.d.ts.map +1 -1
  15. package/dist/lib/exif-stripper.js +101 -41
  16. package/dist/lib/exif-stripper.js.map +1 -1
  17. package/dist/lib/media/cas-keys.d.ts +63 -0
  18. package/dist/lib/media/cas-keys.d.ts.map +1 -0
  19. package/dist/lib/media/cas-keys.js +102 -0
  20. package/dist/lib/media/cas-keys.js.map +1 -0
  21. package/dist/lib/media/classify-worker-error.d.ts +48 -0
  22. package/dist/lib/media/classify-worker-error.d.ts.map +1 -0
  23. package/dist/lib/media/classify-worker-error.js +319 -0
  24. package/dist/lib/media/classify-worker-error.js.map +1 -0
  25. package/dist/lib/media/dedupe-key.d.ts +29 -0
  26. package/dist/lib/media/dedupe-key.d.ts.map +1 -0
  27. package/dist/lib/media/dedupe-key.js +49 -0
  28. package/dist/lib/media/dedupe-key.js.map +1 -0
  29. package/dist/lib/media/duration-cap.d.ts +30 -0
  30. package/dist/lib/media/duration-cap.d.ts.map +1 -0
  31. package/dist/lib/media/duration-cap.js +37 -0
  32. package/dist/lib/media/duration-cap.js.map +1 -0
  33. package/dist/lib/media/ffmpeg-args.d.ts +83 -0
  34. package/dist/lib/media/ffmpeg-args.d.ts.map +1 -0
  35. package/dist/lib/media/ffmpeg-args.js +119 -0
  36. package/dist/lib/media/ffmpeg-args.js.map +1 -0
  37. package/dist/lib/media/media-ports.d.ts +126 -0
  38. package/dist/lib/media/media-ports.d.ts.map +1 -0
  39. package/dist/lib/media/media-ports.js +129 -0
  40. package/dist/lib/media/media-ports.js.map +1 -0
  41. package/dist/lib/media/media-upsert.d.ts +55 -0
  42. package/dist/lib/media/media-upsert.d.ts.map +1 -0
  43. package/dist/lib/media/media-upsert.js +38 -0
  44. package/dist/lib/media/media-upsert.js.map +1 -0
  45. package/dist/lib/media/moderation-provider.d.ts +111 -0
  46. package/dist/lib/media/moderation-provider.d.ts.map +1 -0
  47. package/dist/lib/media/moderation-provider.js +130 -0
  48. package/dist/lib/media/moderation-provider.js.map +1 -0
  49. package/dist/lib/media/moderation-resolved-payload.d.ts +48 -0
  50. package/dist/lib/media/moderation-resolved-payload.d.ts.map +1 -0
  51. package/dist/lib/media/moderation-resolved-payload.js +37 -0
  52. package/dist/lib/media/moderation-resolved-payload.js.map +1 -0
  53. package/dist/lib/media/moderation-status.d.ts +98 -0
  54. package/dist/lib/media/moderation-status.d.ts.map +1 -0
  55. package/dist/lib/media/moderation-status.js +122 -0
  56. package/dist/lib/media/moderation-status.js.map +1 -0
  57. package/dist/lib/media/processing-types.d.ts +45 -0
  58. package/dist/lib/media/processing-types.d.ts.map +1 -0
  59. package/dist/lib/media/processing-types.js +9 -0
  60. package/dist/lib/media/processing-types.js.map +1 -0
  61. package/dist/lib/media/promote-decision.d.ts +64 -0
  62. package/dist/lib/media/promote-decision.d.ts.map +1 -0
  63. package/dist/lib/media/promote-decision.js +76 -0
  64. package/dist/lib/media/promote-decision.js.map +1 -0
  65. package/dist/lib/media/quota-check.d.ts +22 -0
  66. package/dist/lib/media/quota-check.d.ts.map +1 -0
  67. package/dist/lib/media/quota-check.js +42 -0
  68. package/dist/lib/media/quota-check.js.map +1 -0
  69. package/dist/lib/media/quota-types.d.ts +15 -0
  70. package/dist/lib/media/quota-types.d.ts.map +1 -0
  71. package/dist/lib/media/quota-types.js +9 -0
  72. package/dist/lib/media/quota-types.js.map +1 -0
  73. package/dist/lib/media/route-upload.d.ts +58 -0
  74. package/dist/lib/media/route-upload.d.ts.map +1 -0
  75. package/dist/lib/media/route-upload.js +80 -0
  76. package/dist/lib/media/route-upload.js.map +1 -0
  77. package/dist/lib/media/serve-gate.d.ts +51 -0
  78. package/dist/lib/media/serve-gate.d.ts.map +1 -0
  79. package/dist/lib/media/serve-gate.js +68 -0
  80. package/dist/lib/media/serve-gate.js.map +1 -0
  81. package/dist/lib/media/tenant-resolution.d.ts +42 -0
  82. package/dist/lib/media/tenant-resolution.d.ts.map +1 -0
  83. package/dist/lib/media/tenant-resolution.js +45 -0
  84. package/dist/lib/media/tenant-resolution.js.map +1 -0
  85. package/dist/lib/media/text-moderation.d.ts +28 -0
  86. package/dist/lib/media/text-moderation.d.ts.map +1 -0
  87. package/dist/lib/media/text-moderation.js +62 -0
  88. package/dist/lib/media/text-moderation.js.map +1 -0
  89. package/dist/lib/media/track-verdict.d.ts +45 -0
  90. package/dist/lib/media/track-verdict.d.ts.map +1 -0
  91. package/dist/lib/media/track-verdict.js +52 -0
  92. package/dist/lib/media/track-verdict.js.map +1 -0
  93. package/dist/lib/media/transcript-moderation.d.ts +47 -0
  94. package/dist/lib/media/transcript-moderation.d.ts.map +1 -0
  95. package/dist/lib/media/transcript-moderation.js +70 -0
  96. package/dist/lib/media/transcript-moderation.js.map +1 -0
  97. package/dist/lib/media-handler.d.ts.map +1 -1
  98. package/dist/lib/media-handler.js +15 -9
  99. package/dist/lib/media-handler.js.map +1 -1
  100. package/dist/lib/post-handler.d.ts.map +1 -1
  101. package/dist/lib/post-handler.js +4 -1
  102. package/dist/lib/post-handler.js.map +1 -1
  103. package/dist/lib/routes/media.d.ts +21 -0
  104. package/dist/lib/routes/media.d.ts.map +1 -1
  105. package/dist/lib/routes/media.js +584 -483
  106. package/dist/lib/routes/media.js.map +1 -1
  107. package/dist/lib/services/image-normalizer.d.ts +64 -6
  108. package/dist/lib/services/image-normalizer.d.ts.map +1 -1
  109. package/dist/lib/services/image-normalizer.js +88 -6
  110. package/dist/lib/services/image-normalizer.js.map +1 -1
  111. package/dist/lib/services/media-upload-service.d.ts +2 -2
  112. package/dist/lib/services/media-upload-service.d.ts.map +1 -1
  113. package/dist/lib/services/media-upload-service.js +22 -21
  114. package/dist/lib/services/media-upload-service.js.map +1 -1
  115. package/dist/lib/tenant-scope.d.ts.map +1 -1
  116. package/dist/lib/tenant-scope.js +16 -1
  117. package/dist/lib/tenant-scope.js.map +1 -1
  118. package/package.json +2 -1
  119. package/prisma/migrations/20260625000000_media_tenant_scope_and_moderation_status/migration.sql +49 -0
  120. package/prisma/migrations/20260625000001_p0b_moderation_jobs/migration.sql +73 -0
  121. package/prisma/schema.prisma +95 -17
  122. package/src/lambda/media-completion-worker.ts +567 -0
  123. package/src/lambda/media-processing-worker.ts +508 -59
@@ -0,0 +1,567 @@
1
+ // media-completion-worker.ts — imperative SHELL for the standardized media
2
+ // job-completion SQS queue (B2).
3
+ //
4
+ // One queue drains the completion notifications of BOTH async moderation
5
+ // tracks of a media object:
6
+ //
7
+ // - VISUAL: the image/video moderation provider finishes a job and publishes
8
+ // a completion to SNS, which is fanned into this SQS queue. The SNS envelope
9
+ // carries `{ Message: "<json>" }` whose inner JSON has a `JobId`.
10
+ // - AUDIO: speech-to-text (Transcribe) finishes and emits an EventBridge event
11
+ // fanned into this SQS queue, whose `detail` carries `TranscriptionJobName`.
12
+ //
13
+ // THREAT MODEL: the SQS message body is an UNTRUSTED POINTER. A replay, a forged
14
+ // body, or a spoofed verdict must not move a media object toward "approved". So
15
+ // the worker treats the body as nothing more than a job-id pointer and re-fetches
16
+ // the authoritative state from the provider. The body's own verdict/status (if
17
+ // any) is ALWAYS ignored.
18
+ //
19
+ // SAFETY ORDER (fixed, never reordered):
20
+ // 0. parse body -> jobId ONLY
21
+ // 1. DEDUPE FIRST: insert ProcessedModerationMessage(dedupeKey); if it already
22
+ // exists, ack-drop (idempotent no-op) BEFORE any side effect.
23
+ // 2. RE-FETCH authoritative track state from the provider (visual) or the
24
+ // transcription seam (audio). Derive THIS track's decision SOLELY from the
25
+ // re-fetched result. Non-terminal / failed / unknown => errored (fail closed).
26
+ // 3. Look up the MediaModerationJob by jobId => mediaId + track. Persist this
27
+ // track's decision. Read the OTHER track's decision. Build both TrackOutcomes.
28
+ // 4. decidePromotion({ visual, audio, currentStatus, casObjectPresent }).
29
+ // casObjectPresent = the cleaned bytes are available to serve: TRUE iff the
30
+ // cas/ key exists (a prior promote) OR the cleaned STAGING key exists (the
31
+ // processing worker left them there, pre-promote).
32
+ // 5. APPLY in fixed order:
33
+ // a. if shouldPromote: copyObject(STAGING -> cas) — promote the CLEANED
34
+ // STAGING bytes (the exact bytes that were moderated), NEVER the raw
35
+ // pending upload. Then best-effort deleteObject(pending) (raw-original
36
+ // cleanup) AND deleteObject(staging) (staging cleanup) — tolerate
37
+ // already-deleted on both. cas/ thus only ever holds APPROVED cleaned
38
+ // bytes ("cleaned-staging, promote-on-approval").
39
+ // b. if shouldPersistStatus: persist transition.status.
40
+ // c. if shouldEmitResolved: emit moderation.resolved with
41
+ // moderationResolvedPayload(mediaId, status) — ready|not-ready ONLY.
42
+ // An illegal transition (transition.ok === false) is ack-dropped, never DLQ.
43
+ //
44
+ // THRESHOLD SNAPSHOT: when (re)interpreting the re-fetched verdict we use the
45
+ // threshold snapshot stored ON THE JOB ROW (job.thresholdSnapshot), never live
46
+ // Env — so a config edit landing between the original submission and a (re)delivery
47
+ // cannot flip a replayed verdict. The reinterpreter is an injected pure function
48
+ // (no operational numbers live in this PUBLIC tarball).
49
+ //
50
+ // This is the imperative shell: it sequences I/O and delegates EVERY decision to
51
+ // the pure functional-core units (decidePromotion, combineTrackVerdicts,
52
+ // transcriptToModerationDecision, deriveDedupeKey, moderationResolvedPayload).
53
+
54
+ import type { SQSHandler, SQSRecord } from "aws-lambda";
55
+
56
+ import { decidePromotion } from "../lib/media/promote-decision.js";
57
+ import { casKey, pendingKey, isCasKeyError } from "../lib/media/cas-keys.js";
58
+ import type { TrackOutcome, Track } from "../lib/media/track-verdict.js";
59
+ import { deriveDedupeKey } from "../lib/media/dedupe-key.js";
60
+ import { moderationResolvedPayload } from "../lib/media/moderation-resolved-payload.js";
61
+ import { transcriptToModerationDecision } from "../lib/media/transcript-moderation.js";
62
+ import type { TextModerationProvider } from "../lib/media/text-moderation.js";
63
+ import type { StoragePort, TranscribePort } from "../lib/media/media-ports.js";
64
+ import type {
65
+ MediaModerationProvider,
66
+ ModerationVerdict,
67
+ } from "../lib/media/moderation-provider.js";
68
+ import type {
69
+ ModerationDecision,
70
+ ModerationStatus,
71
+ } from "../lib/media/moderation-status.js";
72
+
73
+ // ---------------------------------------------------------------------------
74
+ // Narrow data shapes the shell needs from persistence. These mirror the Prisma
75
+ // MediaModerationJob / MediaFile / ProcessedModerationMessage rows at the I/O
76
+ // boundary, but are declared structurally so this module does NOT import the
77
+ // generated Prisma client (matching the seam discipline elsewhere in media/).
78
+ // ---------------------------------------------------------------------------
79
+
80
+ /** The persisted moderation-job row, looked up by its provider jobId. */
81
+ export interface ModerationJobRow {
82
+ readonly mediaId: string;
83
+ readonly track: Track;
84
+ /** Threshold snapshot captured at submission time (opaque JSON). */
85
+ readonly thresholdSnapshot: unknown;
86
+ }
87
+
88
+ /**
89
+ * The persistence seam. Every method is idempotent-friendly and total; the
90
+ * shell never reaches around it to a concrete client.
91
+ */
92
+ export interface CompletionStore {
93
+ /**
94
+ * Attempt to claim a message for processing. Returns `true` if THIS call
95
+ * inserted the row (first delivery), `false` if the row already existed
96
+ * (duplicate — caller must ack-drop). Implemented with an INSERT ...
97
+ * ON CONFLICT DO NOTHING so it is atomic across concurrent deliveries.
98
+ */
99
+ claimMessage(dedupeKey: string): Promise<boolean>;
100
+
101
+ /** Look up the job row by its provider jobId. `null` if unknown. */
102
+ findJobByJobId(jobId: string): Promise<ModerationJobRow | null>;
103
+
104
+ /** Persist this track's resolved decision onto its job row. */
105
+ persistTrackDecision(
106
+ jobId: string,
107
+ decision: ModerationDecision,
108
+ ): Promise<void>;
109
+
110
+ /**
111
+ * Read the sibling track's resolved decision for a media object. Returns the
112
+ * decision if the other track's job exists AND has resolved; otherwise the
113
+ * `state` distinguishes a job that exists-but-unresolved from no-such-job.
114
+ */
115
+ readOtherTrack(
116
+ mediaId: string,
117
+ thisTrack: Track,
118
+ ): Promise<OtherTrackState>;
119
+
120
+ /** Read the media object's current persisted moderation status + CAS coords. */
121
+ findMedia(mediaId: string): Promise<MediaCoords | null>;
122
+
123
+ /** Persist a new moderation status for the media object. */
124
+ persistMediaStatus(
125
+ mediaId: string,
126
+ status: ModerationStatus,
127
+ ): Promise<void>;
128
+ }
129
+
130
+ /**
131
+ * The sibling track's state, used to build its {@link TrackOutcome}.
132
+ * - `decided` — the other track has a resolved decision.
133
+ * - `pending` — the other track's job exists but has not resolved yet.
134
+ * - `absent` — there is no job for the other track on this media object.
135
+ */
136
+ export type OtherTrackState =
137
+ | { readonly state: "decided"; readonly decision: ModerationDecision }
138
+ | { readonly state: "pending" }
139
+ | { readonly state: "absent" };
140
+
141
+ /**
142
+ * The media coordinates the shell needs to gate promotion and build keys. The
143
+ * store returns the RAW identity columns ({@link tenantId}, {@link uploadId},
144
+ * {@link contentHash}); the shell derives every storage key from them via the
145
+ * canonical cas-keys builders (so key construction is centralized here and the
146
+ * store never hand-rolls a key string). {@link contentHash} is the post-
147
+ * transcode SHA-256 the processing worker persisted (NOT the upload-time
148
+ * uploadId placeholder).
149
+ */
150
+ export interface MediaCoords {
151
+ readonly moderationStatus: ModerationStatus;
152
+ /** Tenant that owns this object (cas-keys input). */
153
+ readonly tenantId: string;
154
+ /** Upload session id — addresses the raw pending + cleaned staging keys. */
155
+ readonly uploadId: string;
156
+ /** 64-char lowercase SHA-256 of the CLEANED bytes (addresses the cas/ key). */
157
+ readonly contentHash: string;
158
+ }
159
+
160
+ // ---------------------------------------------------------------------------
161
+ // Injected pure helpers (no operational numbers in this tarball).
162
+ // ---------------------------------------------------------------------------
163
+
164
+ /**
165
+ * Pure re-interpreter for a VISUAL verdict against the job's threshold snapshot.
166
+ *
167
+ * Injected (not implemented here) so this PUBLIC module carries NO operational
168
+ * thresholds. It receives the SNAPSHOT taken at submission time — never live
169
+ * Env — so a config edit between deliveries cannot flip a replayed verdict. Must
170
+ * be total and fail-closed: any uncertainty maps to `"review"`, never `"approved"`.
171
+ */
172
+ export type VisualVerdictReinterpreter = (
173
+ verdict: ModerationVerdict,
174
+ thresholdSnapshot: unknown,
175
+ ) => ModerationDecision;
176
+
177
+ /** Everything the per-record processor binds to. */
178
+ export interface CompletionDeps {
179
+ readonly store: CompletionStore;
180
+ readonly moderation: MediaModerationProvider;
181
+ readonly transcribe: TranscribePort;
182
+ readonly textModeration: TextModerationProvider;
183
+ readonly storage: StoragePort;
184
+ /** Re-interpret a visual verdict using the job's threshold snapshot. */
185
+ readonly reinterpretVisual: VisualVerdictReinterpreter;
186
+ /** Emit the anti-oracle resolved event. Best-effort; must not throw to ack. */
187
+ readonly emitResolved: (
188
+ payload: { readonly mediaId: string; readonly status: "ready" | "not-ready" },
189
+ ) => Promise<void>;
190
+ /** Structured logger seam (defaults to a no-op in tests). */
191
+ readonly log?: {
192
+ info?: (msg: string, data?: unknown) => void;
193
+ warn?: (msg: string, data?: unknown) => void;
194
+ error?: (msg: string, data?: unknown) => void;
195
+ };
196
+ }
197
+
198
+ /**
199
+ * The outcome of processing one SQS record. `retry` is the ONLY value that
200
+ * causes the message to be returned to the queue (and eventually DLQ'd);
201
+ * everything else is an ack-drop (idempotent no-op or fail-closed terminal).
202
+ */
203
+ export type RecordOutcome =
204
+ | { readonly kind: "duplicate" } // dedupe hit — already processed
205
+ | { readonly kind: "unroutable" } // no jobId / unknown job — fail-closed drop
206
+ | { readonly kind: "illegal-transition" } // replay on terminal — ack-drop, no DLQ
207
+ | { readonly kind: "applied"; readonly status: ModerationStatus }
208
+ | { readonly kind: "retry"; readonly reason: string }; // transient I/O — return to queue
209
+
210
+ // ---------------------------------------------------------------------------
211
+ // Body parsing — extract ONLY the job id from an untrusted pointer.
212
+ // ---------------------------------------------------------------------------
213
+
214
+ /**
215
+ * Extract the provider job id from an untrusted completion message body.
216
+ *
217
+ * Two shapes are accepted; everything else (and any embedded verdict/status) is
218
+ * ignored:
219
+ * - Rekognition via SNS: { "Message": "{...\"JobId\":\"...\"}" } OR a body
220
+ * that itself directly carries { "JobId": "..." }.
221
+ * - Transcribe via EventBridge: { "detail": { "TranscriptionJobName": "..." } }
222
+ * OR a body that directly carries { "TranscriptionJobName": "..." }.
223
+ *
224
+ * Returns the job id and which track it belongs to, or `null` when no job id can
225
+ * be recovered (fail-closed: the caller ack-drops an unroutable message rather
226
+ * than DLQ-looping a permanently-malformed pointer).
227
+ *
228
+ * Pure & total: never throws.
229
+ */
230
+ export function extractJobPointer(
231
+ body: string,
232
+ ): { readonly jobId: string; readonly track: Track } | null {
233
+ let root: unknown;
234
+ try {
235
+ root = JSON.parse(body);
236
+ } catch {
237
+ return null;
238
+ }
239
+ if (root === null || typeof root !== "object") return null;
240
+
241
+ const obj = root as Record<string, unknown>;
242
+
243
+ // Transcribe (AUDIO): EventBridge `detail.TranscriptionJobName`, or direct.
244
+ const detail =
245
+ typeof obj.detail === "object" && obj.detail !== null
246
+ ? (obj.detail as Record<string, unknown>)
247
+ : undefined;
248
+ const transcriptionName =
249
+ pickString(detail?.TranscriptionJobName) ??
250
+ pickString(obj.TranscriptionJobName);
251
+ if (transcriptionName !== null) {
252
+ return { jobId: transcriptionName, track: "AUDIO" };
253
+ }
254
+
255
+ // Rekognition (VISUAL): SNS `Message` is a JSON string carrying `JobId`, or
256
+ // the body carries `JobId` directly.
257
+ const directJobId = pickString(obj.JobId);
258
+ if (directJobId !== null) {
259
+ return { jobId: directJobId, track: "VISUAL" };
260
+ }
261
+ const snsMessage = pickString(obj.Message);
262
+ if (snsMessage !== null) {
263
+ let inner: unknown;
264
+ try {
265
+ inner = JSON.parse(snsMessage);
266
+ } catch {
267
+ return null;
268
+ }
269
+ if (inner !== null && typeof inner === "object") {
270
+ const innerJobId = pickString((inner as Record<string, unknown>).JobId);
271
+ if (innerJobId !== null) {
272
+ return { jobId: innerJobId, track: "VISUAL" };
273
+ }
274
+ }
275
+ }
276
+
277
+ return null;
278
+ }
279
+
280
+ /** Return a non-empty string value, or null for anything else. */
281
+ function pickString(v: unknown): string | null {
282
+ return typeof v === "string" && v.length > 0 ? v : null;
283
+ }
284
+
285
+ // ---------------------------------------------------------------------------
286
+ // Re-fetch authoritative track decision (fail-closed).
287
+ // ---------------------------------------------------------------------------
288
+
289
+ /**
290
+ * Re-fetch THIS track's authoritative decision from the provider, ignoring the
291
+ * message body entirely. Fail-closed: a non-terminal / failed / unknown result
292
+ * yields `"errored"`-equivalent `null` so the caller records it as an errored
293
+ * track outcome (never `"approved"`).
294
+ *
295
+ * VISUAL: `getVideoModeration(jobId)` returns a settled verdict; the verdict is
296
+ * re-interpreted against the JOB's threshold snapshot (not live Env).
297
+ *
298
+ * AUDIO: `getTranscription(jobId)` is polled; only a COMPLETED transcription is
299
+ * fed to `transcriptToModerationDecision` (which is itself fail-closed). Any
300
+ * other status (IN_PROGRESS / FAILED) yields `null` (errored — fail closed).
301
+ */
302
+ export async function refetchTrackDecision(
303
+ pointer: { readonly jobId: string; readonly track: Track },
304
+ job: ModerationJobRow,
305
+ deps: CompletionDeps,
306
+ ): Promise<ModerationDecision | null> {
307
+ if (pointer.track === "VISUAL") {
308
+ const verdict = await deps.moderation.getVideoModeration(pointer.jobId);
309
+ if (verdict == null || typeof verdict !== "object") return null;
310
+ const decision = deps.reinterpretVisual(verdict, job.thresholdSnapshot);
311
+ return normalizeDecision(decision);
312
+ }
313
+
314
+ // AUDIO
315
+ const res = await deps.transcribe.getTranscription(pointer.jobId);
316
+ if (res == null || res.status !== "COMPLETED") {
317
+ // Non-terminal or failed transcription — fail closed (errored).
318
+ return null;
319
+ }
320
+ const transcript = res.transcript ?? "";
321
+ const decision = await transcriptToModerationDecision(
322
+ transcript,
323
+ deps.textModeration,
324
+ );
325
+ return normalizeDecision(decision);
326
+ }
327
+
328
+ /** Accept only the three known decisions; anything else fails closed to null. */
329
+ function normalizeDecision(d: unknown): ModerationDecision | null {
330
+ return d === "approved" || d === "review" || d === "quarantine" ? d : null;
331
+ }
332
+
333
+ // ---------------------------------------------------------------------------
334
+ // Build a TrackOutcome from a (possibly null) decision.
335
+ // ---------------------------------------------------------------------------
336
+
337
+ /** A resolved decision => `decided`; a null (failed/unknown) => `errored`. */
338
+ function outcomeFromDecision(d: ModerationDecision | null): TrackOutcome {
339
+ return d === null ? { state: "errored" } : { state: "decided", decision: d };
340
+ }
341
+
342
+ /** Map the sibling track's persisted state to a TrackOutcome (fail-closed). */
343
+ function outcomeFromOther(other: OtherTrackState): TrackOutcome {
344
+ switch (other.state) {
345
+ case "decided":
346
+ return { state: "decided", decision: other.decision };
347
+ case "absent":
348
+ return { state: "absent" };
349
+ case "pending":
350
+ default:
351
+ // A sibling job that exists but has not resolved is NOT approval — the
352
+ // combinator treats `absent` and `errored` alike (both degrade away from
353
+ // approved), and we have no positive evidence yet, so fail closed.
354
+ return { state: "errored" };
355
+ }
356
+ }
357
+
358
+ // ---------------------------------------------------------------------------
359
+ // Per-record processor (the heart; mock-friendly).
360
+ // ---------------------------------------------------------------------------
361
+
362
+ /**
363
+ * Process one completion message. Returns a {@link RecordOutcome}; only
364
+ * `kind: "retry"` should be surfaced to SQS as a batch-item failure. Throws are
365
+ * caught by the handler and converted to a retry.
366
+ */
367
+ export async function processCompletion(
368
+ body: string,
369
+ deps: CompletionDeps,
370
+ ): Promise<RecordOutcome> {
371
+ // 0. Extract ONLY the job id from the untrusted pointer.
372
+ const pointer = extractJobPointer(body);
373
+ if (pointer === null) {
374
+ deps.log?.warn?.("completion: no job pointer in body — dropping");
375
+ return { kind: "unroutable" };
376
+ }
377
+
378
+ // 1. Look up the job row first — needed for the contentHash that scopes the
379
+ // dedupe key and for the threshold snapshot. (No side effect yet.)
380
+ const job = await deps.store.findJobByJobId(pointer.jobId);
381
+ if (job === null) {
382
+ deps.log?.warn?.("completion: unknown jobId — dropping", {
383
+ jobId: pointer.jobId,
384
+ });
385
+ return { kind: "unroutable" };
386
+ }
387
+
388
+ // The media object's content hash addresses the dedupe key so identical bytes
389
+ // share fan-in across tenants; we read it from the media coords below. But the
390
+ // dedupe MUST happen before ANY side effect, and persistTrackDecision is a
391
+ // side effect — so we resolve the media first (read-only) to obtain the hash.
392
+ const media = await deps.store.findMedia(job.mediaId);
393
+ if (media === null) {
394
+ deps.log?.warn?.("completion: media row missing — dropping", {
395
+ mediaId: job.mediaId,
396
+ });
397
+ return { kind: "unroutable" };
398
+ }
399
+
400
+ // 2. DEDUPE FIRST — before any side effect. The dedupe key binds the content
401
+ // hash, the jobId, and the track so the two tracks of the same bytes never
402
+ // collide and a redelivery of the SAME completion is a no-op.
403
+ const dedupeKey = deriveDedupeKey({
404
+ contentHash: media.contentHash,
405
+ jobId: pointer.jobId,
406
+ track: pointer.track,
407
+ });
408
+ const claimed = await deps.store.claimMessage(dedupeKey);
409
+ if (!claimed) {
410
+ deps.log?.info?.("completion: duplicate delivery — ack-drop", {
411
+ jobId: pointer.jobId,
412
+ });
413
+ return { kind: "duplicate" };
414
+ }
415
+
416
+ // 3. RE-FETCH authoritative state for THIS track (body verdict ignored).
417
+ const thisDecision = await refetchTrackDecision(pointer, job, deps);
418
+
419
+ // Persist this track's decision (the side effect we just earned the right to
420
+ // perform). An errored re-fetch persists nothing on the row (decision stays
421
+ // null) but still contributes an `errored` outcome to the combine.
422
+ if (thisDecision !== null) {
423
+ await deps.store.persistTrackDecision(pointer.jobId, thisDecision);
424
+ }
425
+
426
+ // Read the OTHER track's decision and build both outcomes.
427
+ const other = await deps.store.readOtherTrack(job.mediaId, pointer.track);
428
+ const thisOutcome = outcomeFromDecision(thisDecision);
429
+ const otherOutcome = outcomeFromOther(other);
430
+
431
+ const visual = pointer.track === "VISUAL" ? thisOutcome : otherOutcome;
432
+ const audio = pointer.track === "AUDIO" ? thisOutcome : otherOutcome;
433
+
434
+ // Derive every storage key from the row's identity columns via the canonical
435
+ // cas-keys builders — key construction is centralized here, never trusted from
436
+ // the store. A malformed identity (should be impossible past the upload gate)
437
+ // fails closed to a retry rather than touching an un-addressable key.
438
+ const casK = casKey(media.tenantId, media.contentHash);
439
+ const pendingK = pendingKey(media.tenantId, media.uploadId);
440
+ if (isCasKeyError(casK) || isCasKeyError(pendingK)) {
441
+ deps.log?.error?.("completion: un-addressable media identity — retry", {
442
+ mediaId: job.mediaId,
443
+ });
444
+ return { kind: "retry", reason: "cas-key-error" };
445
+ }
446
+ // The cleaned bytes live at the STAGING key until promoted to cas/.
447
+ const stagingK = `processing/${media.tenantId}/${media.uploadId}`;
448
+
449
+ // 4. Decide (pure). Promotion is gated on the cleaned bytes being available to
450
+ // serve: present in cas/ (after a prior promote) OR at the staging key
451
+ // (before promote). Either satisfies casObjectPresent.
452
+ const casPresent =
453
+ (await deps.storage.headObject(casK)).exists ||
454
+ (await deps.storage.headObject(stagingK)).exists;
455
+ const action = decidePromotion({
456
+ visual,
457
+ audio,
458
+ currentStatus: media.moderationStatus,
459
+ casObjectPresent: casPresent,
460
+ });
461
+
462
+ // An illegal transition (e.g. replay on a terminal APPROVED/REJECTED) is an
463
+ // idempotent ack-drop — NEVER a DLQ.
464
+ if (action.transition.ok === false) {
465
+ deps.log?.info?.("completion: illegal/absorbing transition — ack-drop", {
466
+ mediaId: job.mediaId,
467
+ from: media.moderationStatus,
468
+ });
469
+ return { kind: "illegal-transition" };
470
+ }
471
+
472
+ const nextStatusValue = action.transition.status;
473
+
474
+ // 5. APPLY in fixed order: promote -> persist -> emit.
475
+
476
+ // 5a. PROMOTE: copy the CLEANED STAGING bytes (the exact bytes that were
477
+ // moderated) to cas/ so they can serve — NEVER the raw pending upload.
478
+ // copyObject is idempotent (content-derived target key). Then best-effort
479
+ // remove BOTH the raw original (pending/) and the staging copy. cas/ thus
480
+ // only ever holds APPROVED cleaned bytes.
481
+ if (action.shouldPromote) {
482
+ await deps.storage.copyObject(stagingK, casK);
483
+ // Best-effort raw-original cleanup. Tolerate already-deleted (a prior
484
+ // delivery or lifecycle expiry) — the cas/ copy is what matters.
485
+ try {
486
+ await deps.storage.deleteObject(pendingK);
487
+ } catch (err) {
488
+ deps.log?.warn?.("completion: pending delete tolerated", {
489
+ mediaId: job.mediaId,
490
+ error: String(err),
491
+ });
492
+ }
493
+ // Best-effort staging cleanup. Same tolerance.
494
+ try {
495
+ await deps.storage.deleteObject(stagingK);
496
+ } catch (err) {
497
+ deps.log?.warn?.("completion: staging delete tolerated", {
498
+ mediaId: job.mediaId,
499
+ error: String(err),
500
+ });
501
+ }
502
+ }
503
+
504
+ // 5b. PERSIST the new status.
505
+ if (action.shouldPersistStatus) {
506
+ await deps.store.persistMediaStatus(job.mediaId, nextStatusValue);
507
+ }
508
+
509
+ // 5c. EMIT the anti-oracle resolved event (ready|not-ready ONLY).
510
+ if (action.shouldEmitResolved) {
511
+ const payload = moderationResolvedPayload(job.mediaId, nextStatusValue);
512
+ await deps.emitResolved(payload);
513
+ }
514
+
515
+ deps.log?.info?.("completion: applied", {
516
+ mediaId: job.mediaId,
517
+ status: nextStatusValue,
518
+ });
519
+ return { kind: "applied", status: nextStatusValue };
520
+ }
521
+
522
+ // ---------------------------------------------------------------------------
523
+ // SQS handler — the thin adapter. Wiring of concrete adapters (Prisma,
524
+ // Rekognition/Transcribe SDK clients, S3, the event emitter, the threshold-
525
+ // snapshot reinterpreter) is done by the consuming app at startup and bound into
526
+ // `buildDeps`; THIS file ships SDK-free except for the lambda type import.
527
+ // ---------------------------------------------------------------------------
528
+
529
+ /**
530
+ * Build the SQS handler from injected deps. The consuming app provides the
531
+ * concrete adapters; tests provide mocks and call {@link processCompletion}
532
+ * directly.
533
+ *
534
+ * A record that yields `kind: "retry"` (or throws) is reported as a batch-item
535
+ * failure so SQS retries / DLQs it. Every other outcome is an ack (the message
536
+ * is consumed): duplicates, unroutable pointers, and illegal transitions are all
537
+ * fail-closed ack-drops — they must never DLQ-loop.
538
+ */
539
+ export function makeHandler(deps: CompletionDeps): SQSHandler {
540
+ return async (event) => {
541
+ const failedIds: string[] = [];
542
+
543
+ for (const record of event.Records as SQSRecord[]) {
544
+ try {
545
+ const outcome = await processCompletion(record.body, deps);
546
+ if (outcome.kind === "retry") {
547
+ deps.log?.error?.("completion: retry", {
548
+ messageId: record.messageId,
549
+ reason: outcome.reason,
550
+ });
551
+ failedIds.push(record.messageId);
552
+ }
553
+ } catch (err) {
554
+ // Unexpected (transient I/O) failure — return to the queue for retry.
555
+ deps.log?.error?.("completion: unexpected failure — retry", {
556
+ messageId: record.messageId,
557
+ error: String(err),
558
+ });
559
+ failedIds.push(record.messageId);
560
+ }
561
+ }
562
+
563
+ if (failedIds.length > 0) {
564
+ return { batchItemFailures: failedIds.map((id) => ({ itemIdentifier: id })) };
565
+ }
566
+ };
567
+ }