@de-otio/trellis 0.11.0 → 0.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/env.d.ts +168 -0
- package/dist/env.d.ts.map +1 -1
- package/dist/env.js +155 -0
- package/dist/env.js.map +1 -1
- package/dist/lambda/media-completion-worker.d.ts +175 -0
- package/dist/lambda/media-completion-worker.d.ts.map +1 -0
- package/dist/lambda/media-completion-worker.js +373 -0
- package/dist/lambda/media-completion-worker.js.map +1 -0
- package/dist/lambda/media-processing-worker.d.ts +172 -1
- package/dist/lambda/media-processing-worker.d.ts.map +1 -1
- package/dist/lambda/media-processing-worker.js +343 -49
- package/dist/lambda/media-processing-worker.js.map +1 -1
- package/dist/lib/exif-stripper.d.ts +37 -22
- package/dist/lib/exif-stripper.d.ts.map +1 -1
- package/dist/lib/exif-stripper.js +101 -41
- package/dist/lib/exif-stripper.js.map +1 -1
- package/dist/lib/media/cas-keys.d.ts +63 -0
- package/dist/lib/media/cas-keys.d.ts.map +1 -0
- package/dist/lib/media/cas-keys.js +102 -0
- package/dist/lib/media/cas-keys.js.map +1 -0
- package/dist/lib/media/classify-worker-error.d.ts +48 -0
- package/dist/lib/media/classify-worker-error.d.ts.map +1 -0
- package/dist/lib/media/classify-worker-error.js +319 -0
- package/dist/lib/media/classify-worker-error.js.map +1 -0
- package/dist/lib/media/dedupe-key.d.ts +29 -0
- package/dist/lib/media/dedupe-key.d.ts.map +1 -0
- package/dist/lib/media/dedupe-key.js +49 -0
- package/dist/lib/media/dedupe-key.js.map +1 -0
- package/dist/lib/media/duration-cap.d.ts +30 -0
- package/dist/lib/media/duration-cap.d.ts.map +1 -0
- package/dist/lib/media/duration-cap.js +37 -0
- package/dist/lib/media/duration-cap.js.map +1 -0
- package/dist/lib/media/ffmpeg-args.d.ts +83 -0
- package/dist/lib/media/ffmpeg-args.d.ts.map +1 -0
- package/dist/lib/media/ffmpeg-args.js +119 -0
- package/dist/lib/media/ffmpeg-args.js.map +1 -0
- package/dist/lib/media/media-ports.d.ts +126 -0
- package/dist/lib/media/media-ports.d.ts.map +1 -0
- package/dist/lib/media/media-ports.js +129 -0
- package/dist/lib/media/media-ports.js.map +1 -0
- package/dist/lib/media/media-upsert.d.ts +55 -0
- package/dist/lib/media/media-upsert.d.ts.map +1 -0
- package/dist/lib/media/media-upsert.js +38 -0
- package/dist/lib/media/media-upsert.js.map +1 -0
- package/dist/lib/media/moderation-provider.d.ts +111 -0
- package/dist/lib/media/moderation-provider.d.ts.map +1 -0
- package/dist/lib/media/moderation-provider.js +130 -0
- package/dist/lib/media/moderation-provider.js.map +1 -0
- package/dist/lib/media/moderation-resolved-payload.d.ts +48 -0
- package/dist/lib/media/moderation-resolved-payload.d.ts.map +1 -0
- package/dist/lib/media/moderation-resolved-payload.js +37 -0
- package/dist/lib/media/moderation-resolved-payload.js.map +1 -0
- package/dist/lib/media/moderation-status.d.ts +98 -0
- package/dist/lib/media/moderation-status.d.ts.map +1 -0
- package/dist/lib/media/moderation-status.js +122 -0
- package/dist/lib/media/moderation-status.js.map +1 -0
- package/dist/lib/media/processing-types.d.ts +45 -0
- package/dist/lib/media/processing-types.d.ts.map +1 -0
- package/dist/lib/media/processing-types.js +9 -0
- package/dist/lib/media/processing-types.js.map +1 -0
- package/dist/lib/media/promote-decision.d.ts +64 -0
- package/dist/lib/media/promote-decision.d.ts.map +1 -0
- package/dist/lib/media/promote-decision.js +76 -0
- package/dist/lib/media/promote-decision.js.map +1 -0
- package/dist/lib/media/quota-check.d.ts +22 -0
- package/dist/lib/media/quota-check.d.ts.map +1 -0
- package/dist/lib/media/quota-check.js +42 -0
- package/dist/lib/media/quota-check.js.map +1 -0
- package/dist/lib/media/quota-types.d.ts +15 -0
- package/dist/lib/media/quota-types.d.ts.map +1 -0
- package/dist/lib/media/quota-types.js +9 -0
- package/dist/lib/media/quota-types.js.map +1 -0
- package/dist/lib/media/route-upload.d.ts +58 -0
- package/dist/lib/media/route-upload.d.ts.map +1 -0
- package/dist/lib/media/route-upload.js +80 -0
- package/dist/lib/media/route-upload.js.map +1 -0
- package/dist/lib/media/serve-gate.d.ts +51 -0
- package/dist/lib/media/serve-gate.d.ts.map +1 -0
- package/dist/lib/media/serve-gate.js +68 -0
- package/dist/lib/media/serve-gate.js.map +1 -0
- package/dist/lib/media/tenant-resolution.d.ts +42 -0
- package/dist/lib/media/tenant-resolution.d.ts.map +1 -0
- package/dist/lib/media/tenant-resolution.js +45 -0
- package/dist/lib/media/tenant-resolution.js.map +1 -0
- package/dist/lib/media/text-moderation.d.ts +28 -0
- package/dist/lib/media/text-moderation.d.ts.map +1 -0
- package/dist/lib/media/text-moderation.js +62 -0
- package/dist/lib/media/text-moderation.js.map +1 -0
- package/dist/lib/media/track-verdict.d.ts +45 -0
- package/dist/lib/media/track-verdict.d.ts.map +1 -0
- package/dist/lib/media/track-verdict.js +52 -0
- package/dist/lib/media/track-verdict.js.map +1 -0
- package/dist/lib/media/transcript-moderation.d.ts +47 -0
- package/dist/lib/media/transcript-moderation.d.ts.map +1 -0
- package/dist/lib/media/transcript-moderation.js +70 -0
- package/dist/lib/media/transcript-moderation.js.map +1 -0
- package/dist/lib/media-handler.d.ts.map +1 -1
- package/dist/lib/media-handler.js +15 -9
- package/dist/lib/media-handler.js.map +1 -1
- package/dist/lib/post-handler.d.ts.map +1 -1
- package/dist/lib/post-handler.js +4 -1
- package/dist/lib/post-handler.js.map +1 -1
- package/dist/lib/route-helpers.d.ts.map +1 -1
- package/dist/lib/route-helpers.js +9 -1
- package/dist/lib/route-helpers.js.map +1 -1
- package/dist/lib/routes/media.d.ts +21 -0
- package/dist/lib/routes/media.d.ts.map +1 -1
- package/dist/lib/routes/media.js +584 -483
- package/dist/lib/routes/media.js.map +1 -1
- package/dist/lib/services/image-normalizer.d.ts +64 -6
- package/dist/lib/services/image-normalizer.d.ts.map +1 -1
- package/dist/lib/services/image-normalizer.js +88 -6
- package/dist/lib/services/image-normalizer.js.map +1 -1
- package/dist/lib/services/media-upload-service.d.ts +2 -2
- package/dist/lib/services/media-upload-service.d.ts.map +1 -1
- package/dist/lib/services/media-upload-service.js +22 -21
- package/dist/lib/services/media-upload-service.js.map +1 -1
- package/dist/lib/tenant-scope.d.ts.map +1 -1
- package/dist/lib/tenant-scope.js +16 -1
- package/dist/lib/tenant-scope.js.map +1 -1
- package/package.json +2 -1
- package/prisma/migrations/20260625000000_media_tenant_scope_and_moderation_status/migration.sql +49 -0
- package/prisma/migrations/20260625000001_p0b_moderation_jobs/migration.sql +73 -0
- package/prisma/schema.prisma +95 -17
- package/src/lambda/media-completion-worker.ts +567 -0
- package/src/lambda/media-processing-worker.ts +508 -59
|
@@ -1,71 +1,520 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
// media-processing-worker.ts — the P0b media-processing orchestration SHELL.
|
|
2
|
+
//
|
|
3
|
+
// This is the imperative shell over the pure functional-core media units. It is
|
|
4
|
+
// NOT itself a functional-core unit: it performs I/O (object storage, transcode,
|
|
5
|
+
// transcription, moderation, DB writes). BUT all of that I/O arrives through
|
|
6
|
+
// INJECTED capability seams (TranscodePort / StoragePort / TranscribePort /
|
|
7
|
+
// MediaModerationProvider) and a Prisma-shaped persistence port, so the
|
|
8
|
+
// orchestration logic is exercised in unit tests against the B0 in-memory Mocks
|
|
9
|
+
// — no real cloud, no real encoder, no real DB.
|
|
10
|
+
//
|
|
11
|
+
// Per the seam discipline (see lib/media/media-ports.ts and
|
|
12
|
+
// lib/media/moderation-provider.ts): CORE ships the interfaces + mocks; the
|
|
13
|
+
// consuming app (Skybber) injects the concrete cloud adapters at startup via
|
|
14
|
+
// `setMediaProcessingDeps()`. Until they are injected, the handler fails CLOSED
|
|
15
|
+
// (throws → SQS retry), never silently approves or drops work.
|
|
16
|
+
//
|
|
17
|
+
// Fail-closed posture, end to end:
|
|
18
|
+
// - A key that is not a well-formed `pending/{tenant}/{upload}` key is dropped
|
|
19
|
+
// (ack) and NEVER written under — the re-trigger-loop guard.
|
|
20
|
+
// - The tenant is re-derived FROM THE ROW, and the triggering key must equal
|
|
21
|
+
// pendingKey(rowTenant, uploadId); a mismatch is a hard reject (poison →
|
|
22
|
+
// REVIEW + ack), so a forged/odd key cannot make us moderate the wrong cas/.
|
|
23
|
+
// - Over-cap duration is poison → REVIEW + ack (no transcode attempted).
|
|
24
|
+
// - The worker ONLY starts moderation jobs + persists their jobIds; it never
|
|
25
|
+
// fetches verdicts (a separate poller owns fan-in). Moderation runs on the
|
|
26
|
+
// cleaned bytes at the STAGING key, NOT the raw pending upload — and the
|
|
27
|
+
// cleaned bytes are NOT written to cas/ here. cas/ is the CDN-served prefix,
|
|
28
|
+
// so it must only ever hold APPROVED cleaned bytes; the completion worker
|
|
29
|
+
// promotes staging -> cas/ on approval ("cleaned-staging, promote-on-approval").
|
|
30
|
+
// - classifyWorkerError() splits permanent media/payload defects (poison →
|
|
31
|
+
// REVIEW + ack, no DLQ loop) from transient infra faults (retryable → throw
|
|
32
|
+
// → SQS retry → DLQ + alert backstop).
|
|
33
|
+
|
|
34
|
+
import type { SQSHandler, SQSRecord, SQSBatchResponse } from "aws-lambda";
|
|
3
35
|
import { Logger } from "@aws-lambda-powertools/logger";
|
|
36
|
+
import { createHash } from "node:crypto";
|
|
4
37
|
|
|
5
|
-
|
|
38
|
+
import {
|
|
39
|
+
pendingKey,
|
|
40
|
+
casKey,
|
|
41
|
+
isCasKeyError,
|
|
42
|
+
} from "../lib/media/cas-keys.js";
|
|
43
|
+
import { exceedsDurationCap } from "../lib/media/duration-cap.js";
|
|
44
|
+
import { classifyWorkerError } from "../lib/media/classify-worker-error.js";
|
|
45
|
+
import type { Track } from "../lib/media/track-verdict.js";
|
|
46
|
+
import type {
|
|
47
|
+
StoragePort,
|
|
48
|
+
TranscodePort,
|
|
49
|
+
TranscribePort,
|
|
50
|
+
} from "../lib/media/media-ports.js";
|
|
51
|
+
import type {
|
|
52
|
+
MediaModerationProvider,
|
|
53
|
+
S3Ref,
|
|
54
|
+
} from "../lib/media/moderation-provider.js";
|
|
55
|
+
|
|
56
|
+
// ---------------------------------------------------------------------------
|
|
57
|
+
// Injected persistence + config seams
|
|
58
|
+
// ---------------------------------------------------------------------------
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* The minimal MediaFile row shape the worker reads. Re-declared (not imported
|
|
62
|
+
* from the Prisma client) so this module compiles in worktrees that have not
|
|
63
|
+
* regenerated the client, mirroring moderation-status.ts's discipline. The
|
|
64
|
+
* shell maps the real Prisma row to this shape at the persistence-port boundary.
|
|
65
|
+
*/
|
|
66
|
+
export interface MediaFileRow {
|
|
67
|
+
readonly id: string;
|
|
68
|
+
readonly tenantId: string;
|
|
69
|
+
readonly uploadId: string | null;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* A copy of the operative moderation thresholds, snapshotted at job-submission
|
|
74
|
+
* time so historical decisions stay auditable after a threshold change. The
|
|
75
|
+
* shape mirrors Env.media.thresholds; the worker treats it as an opaque JSON
|
|
76
|
+
* blob and never reads individual values (no compiled threshold logic here).
|
|
77
|
+
*/
|
|
78
|
+
export type ThresholdSnapshot = Record<
|
|
79
|
+
string,
|
|
80
|
+
{ review: number; quarantine: number }
|
|
81
|
+
>;
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* The persistence operations the worker needs, narrowed to exactly what it
|
|
85
|
+
* uses. Implemented in production by a thin Prisma adapter; in tests by an
|
|
86
|
+
* in-memory fake. Keeping this narrow keeps the worker testable without the
|
|
87
|
+
* generated client and documents the worker's full DB surface in one place.
|
|
88
|
+
*/
|
|
89
|
+
export interface MediaPersistencePort {
|
|
90
|
+
/** Load the MediaFile row for an upload session, or null if none exists. */
|
|
91
|
+
findMediaByUploadId(uploadId: string): Promise<MediaFileRow | null>;
|
|
92
|
+
/** Persist a started per-track moderation job with its threshold snapshot. */
|
|
93
|
+
createModerationJob(input: {
|
|
94
|
+
mediaId: string;
|
|
95
|
+
track: Track;
|
|
96
|
+
jobId: string;
|
|
97
|
+
thresholdSnapshot: ThresholdSnapshot;
|
|
98
|
+
}): Promise<void>;
|
|
99
|
+
/**
|
|
100
|
+
* Persist the REAL content identity of the cleaned bytes onto the MediaFile
|
|
101
|
+
* row, replacing the upload-time `uploadId` placeholder contentHash with the
|
|
102
|
+
* SHA-256 of the transcoded output and recording the future serve key. The
|
|
103
|
+
* completion worker derives the promote target (`cas/{tenant}/{hash}`) from
|
|
104
|
+
* this persisted `contentHash`, so this write MUST happen before moderation
|
|
105
|
+
* fans in — otherwise the object can never promote.
|
|
106
|
+
*/
|
|
107
|
+
persistCleanedContent(
|
|
108
|
+
mediaId: string,
|
|
109
|
+
content: { contentHash: string; originalKey: string },
|
|
110
|
+
): Promise<void>;
|
|
111
|
+
/** Drive a media object's moderationStatus to REVIEW (poison path). */
|
|
112
|
+
markMediaForReview(mediaId: string): Promise<void>;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* The slice of Env.media this worker consumes. Operational parameters arrive
|
|
117
|
+
* here as VALUES sourced from Env.media — never as literals in this file.
|
|
118
|
+
*/
|
|
119
|
+
export interface MediaProcessingConfig {
|
|
120
|
+
/** Hard duration cap (seconds). From Env.media.maxDurationSeconds. */
|
|
121
|
+
readonly maxDurationSeconds: number;
|
|
122
|
+
/** Current operative thresholds, snapshotted onto each started job. */
|
|
123
|
+
readonly thresholds: ThresholdSnapshot;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* All capability seams the orchestration core binds to. The handler builds this
|
|
128
|
+
* from the injected concrete adapters; tests build it from the B0 Mocks + an
|
|
129
|
+
* in-memory persistence fake.
|
|
130
|
+
*/
|
|
131
|
+
export interface MediaProcessingDeps {
|
|
132
|
+
readonly storage: StoragePort;
|
|
133
|
+
readonly transcode: TranscodePort;
|
|
134
|
+
readonly transcribe: TranscribePort;
|
|
135
|
+
readonly moderation: MediaModerationProvider;
|
|
136
|
+
readonly persistence: MediaPersistencePort;
|
|
137
|
+
readonly config: MediaProcessingConfig;
|
|
138
|
+
/** The object-storage bucket handle moderation/transcription refs carry. */
|
|
139
|
+
readonly bucket: string;
|
|
140
|
+
/**
|
|
141
|
+
* Deterministic job-name factory for transcription/idempotency. Injected so
|
|
142
|
+
* the shell stays free of Date.now/Math.random in tests; production passes a
|
|
143
|
+
* uuid/time-based generator. `seed` is a stable per-call input (the cas key).
|
|
144
|
+
*/
|
|
145
|
+
readonly newJobName: (seed: string) => string;
|
|
146
|
+
readonly logger: Pick<Logger, "info" | "warn" | "error">;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// ---------------------------------------------------------------------------
|
|
150
|
+
// Per-record outcome
|
|
151
|
+
// ---------------------------------------------------------------------------
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* The disposition of one SQS record after orchestration.
|
|
155
|
+
*
|
|
156
|
+
* - `ack` — remove from the queue (success, drop-non-pending, or poison routed
|
|
157
|
+
* to REVIEW). A poison ack carries `poison: true` for observability.
|
|
158
|
+
* - `fail` — leave on the queue for SQS to retry (transient/retryable fault).
|
|
159
|
+
* The handler maps this to a batchItemFailure.
|
|
160
|
+
*/
|
|
161
|
+
export type RecordOutcome =
|
|
162
|
+
| { readonly disposition: "ack"; readonly reason: string; readonly poison?: boolean }
|
|
163
|
+
| { readonly disposition: "fail"; readonly reason: string };
|
|
164
|
+
|
|
165
|
+
// ---------------------------------------------------------------------------
|
|
166
|
+
// Key parsing — pending/{tenantId}/{uploadId}
|
|
167
|
+
// ---------------------------------------------------------------------------
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Parse a triggering key as a `pending/{tenantId}/{uploadId}` key, validating
|
|
171
|
+
* the FORM by round-tripping the parsed parts back through `pendingKey()`. A
|
|
172
|
+
* key only parses if rebuilding it from its parts yields the identical string —
|
|
173
|
+
* so a path-traversal payload, extra segments, or a malformed id can never pass
|
|
174
|
+
* (cas-keys.ts owns the anchored allowlists).
|
|
175
|
+
*
|
|
176
|
+
* @returns the {tenantId, uploadId} when the key is a canonical pending key,
|
|
177
|
+
* or null for ANY other key (which the caller ack-drops; we never
|
|
178
|
+
* write outputs under pending/, so a non-pending key is not our work).
|
|
179
|
+
*/
|
|
180
|
+
export function parsePendingKey(
|
|
181
|
+
key: string,
|
|
182
|
+
): { tenantId: string; uploadId: string } | null {
|
|
183
|
+
const parts = key.split("/");
|
|
184
|
+
if (parts.length !== 3 || parts[0] !== "pending") {
|
|
185
|
+
return null;
|
|
186
|
+
}
|
|
187
|
+
const tenantId = parts[1];
|
|
188
|
+
const uploadId = parts[2];
|
|
189
|
+
const rebuilt = pendingKey(tenantId, uploadId);
|
|
190
|
+
if (isCasKeyError(rebuilt) || rebuilt !== key) {
|
|
191
|
+
return null;
|
|
192
|
+
}
|
|
193
|
+
return { tenantId, uploadId };
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// ---------------------------------------------------------------------------
|
|
197
|
+
// S3-event-over-SQS extraction
|
|
198
|
+
// ---------------------------------------------------------------------------
|
|
199
|
+
|
|
200
|
+
/** Every object key referenced by one SQS record's S3 event notification. */
|
|
201
|
+
export function extractObjectKeys(recordBody: string): string[] {
|
|
202
|
+
const parsed = JSON.parse(recordBody) as {
|
|
203
|
+
Records?: Array<{ s3?: { object?: { key?: string } } }>;
|
|
204
|
+
};
|
|
205
|
+
const s3Records = parsed.Records ?? [];
|
|
206
|
+
const keys: string[] = [];
|
|
207
|
+
for (const r of s3Records) {
|
|
208
|
+
const raw = r?.s3?.object?.key;
|
|
209
|
+
if (typeof raw === "string") {
|
|
210
|
+
// S3 URL-encodes keys and uses '+' for spaces in notifications.
|
|
211
|
+
keys.push(decodeURIComponent(raw.replace(/\+/g, " ")));
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
return keys;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// ---------------------------------------------------------------------------
|
|
218
|
+
// Typed errors the orchestration core throws internally
|
|
219
|
+
// ---------------------------------------------------------------------------
|
|
220
|
+
|
|
221
|
+
/** A permanent payload defect: the key did not match the row's tenant/upload. */
|
|
222
|
+
class KeyTenantMismatchError extends Error {
|
|
223
|
+
constructor() {
|
|
224
|
+
// The name is in classify-worker-error's poison fragment set ("validation").
|
|
225
|
+
super("media key/tenant validation mismatch: triggering key does not match the row");
|
|
226
|
+
this.name = "ValidationError";
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
/** A permanent payload defect: the probed duration exceeds the configured cap. */
|
|
231
|
+
class DurationCapExceededError extends Error {
|
|
232
|
+
constructor() {
|
|
233
|
+
super("media duration cap exceeded");
|
|
234
|
+
this.name = "DurationCapExceeded";
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// ---------------------------------------------------------------------------
|
|
239
|
+
// Orchestration core — testable against the B0 Mocks
|
|
240
|
+
// ---------------------------------------------------------------------------
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Orchestrate processing for ONE already-extracted object key.
|
|
244
|
+
*
|
|
245
|
+
* Steps (every uncertainty fails closed; nothing here can yield APPROVED):
|
|
246
|
+
* 1. Reject any key that is not a canonical `pending/{tenant}/{upload}` key —
|
|
247
|
+
* ack-drop it; outputs are NEVER written under pending/.
|
|
248
|
+
* 2. Load the MediaFile row by uploadId; re-derive tenant FROM THE ROW and
|
|
249
|
+
* assert pendingKey(rowTenant, uploadId) === the triggering key. Mismatch
|
|
250
|
+
* (or missing/uploadId-less row) is a hard reject → REVIEW + ack.
|
|
251
|
+
* 3. Probe duration; over-cap ⇒ poison ⇒ REVIEW + ack (no transcode).
|
|
252
|
+
* 4. Transcode-and-discard ⇒ cleaned bytes at the STAGING key (read back from
|
|
253
|
+
* the cleaned key). The cleaned bytes are NOT written to cas/ here — cas/ is
|
|
254
|
+
* the CDN-served prefix and must hold only APPROVED bytes (promotion happens
|
|
255
|
+
* in the completion worker).
|
|
256
|
+
* 5. Hash the cleaned bytes ⇒ realHash; PERSIST {contentHash: realHash,
|
|
257
|
+
* originalKey: casKey(tenant, realHash)} onto the row, replacing the
|
|
258
|
+
* upload-time uploadId placeholder so the completion worker can derive the
|
|
259
|
+
* promote target.
|
|
260
|
+
* 6. START moderation on the cleaned STAGING object (NOT the raw pending upload,
|
|
261
|
+
* NOT a cas/ key) — moderation must run on EXACTLY the bytes that will be
|
|
262
|
+
* served: provider.startVideoModeration ⇒ persist VISUAL job (+ threshold
|
|
263
|
+
* snapshot); transcribe.startTranscription ⇒ persist AUDIO job (+ snapshot).
|
|
264
|
+
* The worker only STARTS jobs + persists jobIds; it never fetches verdicts.
|
|
265
|
+
*/
|
|
266
|
+
export async function processObjectKey(
|
|
267
|
+
triggeringKey: string,
|
|
268
|
+
deps: MediaProcessingDeps,
|
|
269
|
+
): Promise<RecordOutcome> {
|
|
270
|
+
try {
|
|
271
|
+
// --- 1. Pending-key form gate (re-trigger-loop guard). ---
|
|
272
|
+
const parsed = parsePendingKey(triggeringKey);
|
|
273
|
+
if (parsed === null) {
|
|
274
|
+
deps.logger.info("Dropping non-pending key (not our work)", {
|
|
275
|
+
key: triggeringKey,
|
|
276
|
+
});
|
|
277
|
+
return { disposition: "ack", reason: "non-pending-key" };
|
|
278
|
+
}
|
|
279
|
+
const { uploadId } = parsed;
|
|
280
|
+
|
|
281
|
+
// --- 2. Load row; re-derive tenant FROM THE ROW; assert key match. ---
|
|
282
|
+
const row = await deps.persistence.findMediaByUploadId(uploadId);
|
|
283
|
+
if (row === null || row.uploadId === null) {
|
|
284
|
+
// No row, or a row that lost its upload session — cannot certify this
|
|
285
|
+
// object. Permanent w.r.t. these bytes: fail closed to human review.
|
|
286
|
+
throw new KeyTenantMismatchError();
|
|
287
|
+
}
|
|
288
|
+
const rowTenant = row.tenantId;
|
|
289
|
+
const expectedKey = pendingKey(rowTenant, uploadId);
|
|
290
|
+
if (isCasKeyError(expectedKey) || expectedKey !== triggeringKey) {
|
|
291
|
+
// The triggering key's tenant segment disagrees with the owning tenant,
|
|
292
|
+
// OR the row's tenant is itself malformed. Either way: hard reject.
|
|
293
|
+
throw new KeyTenantMismatchError();
|
|
294
|
+
}
|
|
6
295
|
|
|
7
|
-
|
|
296
|
+
// --- 3. Duration cap (probe BEFORE transcoding — cost + abuse guard). ---
|
|
297
|
+
const probed = await deps.transcode.probeDurationSeconds(triggeringKey);
|
|
298
|
+
if (exceedsDurationCap(probed, deps.config.maxDurationSeconds)) {
|
|
299
|
+
throw new DurationCapExceededError();
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// --- 4. Transcode-and-discard ⇒ cleaned bytes. ---
|
|
303
|
+
// The cleaned output is written to a transient staging key OUTSIDE pending/
|
|
304
|
+
// (so re-uploading the cleaned bytes can never re-trigger this worker).
|
|
305
|
+
const cleanedStagingKey = `processing/${rowTenant}/${uploadId}`;
|
|
306
|
+
const posterStagingKey = `processing/${rowTenant}/${uploadId}.poster`;
|
|
307
|
+
const transcodeResult = await deps.transcode.transcodeVideo({
|
|
308
|
+
inputPath: triggeringKey,
|
|
309
|
+
outputPath: cleanedStagingKey,
|
|
310
|
+
posterPath: posterStagingKey,
|
|
311
|
+
maxDurationSeconds: deps.config.maxDurationSeconds,
|
|
312
|
+
});
|
|
313
|
+
const cleanedStagingKeyOut = transcodeResult.cleanedPath;
|
|
314
|
+
const cleanedBytes = await deps.storage.getObject(cleanedStagingKeyOut);
|
|
315
|
+
|
|
316
|
+
// --- 5. Hash the CLEANED bytes ⇒ real content identity; persist it. ---
|
|
317
|
+
// We do NOT write the cleaned bytes to cas/ here: they already live at the
|
|
318
|
+
// STAGING key, and cas/ (the CDN-served prefix) must only ever hold APPROVED
|
|
319
|
+
// bytes. We persist the real hash + future serve key so the completion
|
|
320
|
+
// worker can promote staging -> cas/ on approval.
|
|
321
|
+
const contentHash = createHash("sha256").update(cleanedBytes).digest("hex");
|
|
322
|
+
const cleanedCasKey = casKey(rowTenant, contentHash);
|
|
323
|
+
if (isCasKeyError(cleanedCasKey)) {
|
|
324
|
+
// The hash/tenant failed the CAS allowlist — a permanent defect in our own
|
|
325
|
+
// derivation inputs (e.g. a malformed tenant that slipped the row check).
|
|
326
|
+
// Fail closed: route to review rather than serve un-addressable bytes.
|
|
327
|
+
throw new KeyTenantMismatchError();
|
|
328
|
+
}
|
|
329
|
+
// Replace the upload-time uploadId placeholder contentHash with the REAL
|
|
330
|
+
// hash and record the future serve key (cas/{tenant}/{hash}).
|
|
331
|
+
await deps.persistence.persistCleanedContent(row.id, {
|
|
332
|
+
contentHash,
|
|
333
|
+
originalKey: cleanedCasKey,
|
|
334
|
+
});
|
|
8
335
|
|
|
9
|
-
|
|
10
|
-
|
|
336
|
+
// --- 6. START moderation on the CLEANED STAGING object (the exact bytes ---
|
|
337
|
+
// that will be served), NOT the raw pending upload and NOT a cas/ key.
|
|
338
|
+
const stagingRef: S3Ref = { bucket: deps.bucket, key: cleanedStagingKeyOut };
|
|
11
339
|
|
|
340
|
+
const visual = await deps.moderation.startVideoModeration(stagingRef);
|
|
341
|
+
await deps.persistence.createModerationJob({
|
|
342
|
+
mediaId: row.id,
|
|
343
|
+
track: "VISUAL",
|
|
344
|
+
jobId: visual.jobId,
|
|
345
|
+
// Snapshot the CURRENT operative thresholds onto the job at submission.
|
|
346
|
+
thresholdSnapshot: deps.config.thresholds,
|
|
347
|
+
});
|
|
348
|
+
|
|
349
|
+
const audio = await deps.transcribe.startTranscription({
|
|
350
|
+
key: cleanedStagingKeyOut,
|
|
351
|
+
jobName: deps.newJobName(cleanedStagingKeyOut),
|
|
352
|
+
});
|
|
353
|
+
await deps.persistence.createModerationJob({
|
|
354
|
+
mediaId: row.id,
|
|
355
|
+
track: "AUDIO",
|
|
356
|
+
jobId: audio.jobId,
|
|
357
|
+
thresholdSnapshot: deps.config.thresholds,
|
|
358
|
+
});
|
|
359
|
+
|
|
360
|
+
deps.logger.info("Started per-track moderation jobs", {
|
|
361
|
+
mediaId: row.id,
|
|
362
|
+
stagingKey: cleanedStagingKeyOut,
|
|
363
|
+
casKey: cleanedCasKey,
|
|
364
|
+
visualJobId: visual.jobId,
|
|
365
|
+
audioJobId: audio.jobId,
|
|
366
|
+
});
|
|
367
|
+
|
|
368
|
+
return { disposition: "ack", reason: "started-moderation" };
|
|
369
|
+
} catch (err) {
|
|
370
|
+
// Single classification point: poison ⇒ REVIEW + ack; retryable ⇒ fail.
|
|
371
|
+
const klass = classifyWorkerError(err);
|
|
372
|
+
if (klass === "poison") {
|
|
373
|
+
// Best-effort route to REVIEW. If we can identify the row, mark it; if we
|
|
374
|
+
// cannot (e.g. the failure was the row lookup itself), there is nothing to
|
|
375
|
+
// mark and the ack simply drops a message that would loop forever.
|
|
376
|
+
const reviewReason = await routePoisonToReview(triggeringKey, deps, err);
|
|
377
|
+
return { disposition: "ack", reason: reviewReason, poison: true };
|
|
378
|
+
}
|
|
379
|
+
deps.logger.error("Retryable media-processing fault — letting SQS retry", {
|
|
380
|
+
key: triggeringKey,
|
|
381
|
+
error: err,
|
|
382
|
+
});
|
|
383
|
+
return { disposition: "fail", reason: "retryable" };
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
/**
|
|
388
|
+
* Best-effort: drive the owning MediaFile to REVIEW for a poison failure. Never
|
|
389
|
+
* throws — a failure to mark must not convert a poison ack into an infinite
|
|
390
|
+
* retry. Returns an observability reason string.
|
|
391
|
+
*/
|
|
392
|
+
async function routePoisonToReview(
|
|
393
|
+
triggeringKey: string,
|
|
394
|
+
deps: MediaProcessingDeps,
|
|
395
|
+
cause: unknown,
|
|
396
|
+
): Promise<string> {
|
|
397
|
+
deps.logger.warn("Poison media — routing to REVIEW + ack", {
|
|
398
|
+
key: triggeringKey,
|
|
399
|
+
error: cause,
|
|
400
|
+
});
|
|
401
|
+
const parsed = parsePendingKey(triggeringKey);
|
|
402
|
+
if (parsed === null) {
|
|
403
|
+
return "poison-no-row";
|
|
404
|
+
}
|
|
405
|
+
try {
|
|
406
|
+
const row = await deps.persistence.findMediaByUploadId(parsed.uploadId);
|
|
407
|
+
if (row === null) {
|
|
408
|
+
return "poison-no-row";
|
|
409
|
+
}
|
|
410
|
+
await deps.persistence.markMediaForReview(row.id);
|
|
411
|
+
return "poison-review";
|
|
412
|
+
} catch (markErr) {
|
|
413
|
+
deps.logger.error("Failed to mark poison media for REVIEW (acking anyway)", {
|
|
414
|
+
key: triggeringKey,
|
|
415
|
+
error: markErr,
|
|
416
|
+
});
|
|
417
|
+
return "poison-mark-failed";
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
/**
|
|
422
|
+
* Process one SQS record (which may carry several S3 object keys). The record
|
|
423
|
+
* fails (SQS retry) iff ANY of its keys produced a retryable fault; otherwise
|
|
424
|
+
* it is acked. Per-key poison is acked, never failed.
|
|
425
|
+
*/
|
|
426
|
+
export async function processRecord(
|
|
427
|
+
record: SQSRecord,
|
|
428
|
+
deps: MediaProcessingDeps,
|
|
429
|
+
): Promise<RecordOutcome> {
|
|
430
|
+
let keys: string[];
|
|
431
|
+
try {
|
|
432
|
+
keys = extractObjectKeys(record.body);
|
|
433
|
+
} catch (err) {
|
|
434
|
+
// A body we cannot even parse is a permanent payload defect (poison): a
|
|
435
|
+
// retry re-parses the same bytes to the same failure. Ack to avoid a loop.
|
|
436
|
+
deps.logger.warn("Unparseable SQS record body — acking as poison", {
|
|
437
|
+
messageId: record.messageId,
|
|
438
|
+
error: err,
|
|
439
|
+
});
|
|
440
|
+
return { disposition: "ack", reason: "unparseable-body", poison: true };
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
for (const key of keys) {
|
|
444
|
+
const outcome = await processObjectKey(key, deps);
|
|
445
|
+
if (outcome.disposition === "fail") {
|
|
446
|
+
// First retryable key fails the whole record; SQS redelivers it. Already-
|
|
447
|
+
// started keys are idempotent on the dedupe path (deriveDedupeKey).
|
|
448
|
+
return outcome;
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
return { disposition: "ack", reason: "record-complete" };
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
// ---------------------------------------------------------------------------
|
|
455
|
+
// Deps injection seam (consuming app wires concrete adapters at startup)
|
|
456
|
+
// ---------------------------------------------------------------------------
|
|
457
|
+
|
|
458
|
+
let injectedDeps: MediaProcessingDeps | undefined;
|
|
459
|
+
|
|
460
|
+
/**
|
|
461
|
+
* Inject the concrete media-processing seams. The consuming app (Skybber) calls
|
|
462
|
+
* this once at Lambda cold start with its ffmpeg/MediaConvert TranscodePort, S3
|
|
463
|
+
* StoragePort, Transcribe TranscribePort, injected MediaModerationProvider, and
|
|
464
|
+
* a Prisma-backed MediaPersistencePort. Core ships NO concrete adapters.
|
|
465
|
+
*/
|
|
466
|
+
export function setMediaProcessingDeps(deps: MediaProcessingDeps): void {
|
|
467
|
+
injectedDeps = deps;
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
/** Test helper: clear injected deps between cases. */
|
|
471
|
+
export function __resetMediaProcessingDeps(): void {
|
|
472
|
+
injectedDeps = undefined;
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
const logger = new Logger({ serviceName: "media-processing-worker" });
|
|
476
|
+
|
|
477
|
+
/**
|
|
478
|
+
* The SQS entry point. Preserves `reportBatchItemFailures` semantics: only the
|
|
479
|
+
* messageIds whose records produced a retryable fault are returned as batch
|
|
480
|
+
* item failures; everything else (success / drop / poison→REVIEW) is acked by
|
|
481
|
+
* omission.
|
|
482
|
+
*
|
|
483
|
+
* If no concrete deps were injected, the handler fails CLOSED: it throws, so the
|
|
484
|
+
* whole batch is retried rather than silently dropped. An un-wired worker must
|
|
485
|
+
* never ack-drop real uploads.
|
|
486
|
+
*/
|
|
487
|
+
export const handler: SQSHandler = async (event): Promise<SQSBatchResponse> => {
|
|
488
|
+
if (injectedDeps === undefined) {
|
|
489
|
+
// Fail closed: no backend wired ⇒ retry the batch, never drop. The
|
|
490
|
+
// consuming app must call setMediaProcessingDeps() at startup.
|
|
491
|
+
logger.error(
|
|
492
|
+
"media-processing-worker invoked with no injected deps — refusing to" +
|
|
493
|
+
" process. Call setMediaProcessingDeps() at cold start.",
|
|
494
|
+
);
|
|
495
|
+
throw new Error("media-processing-worker: deps not injected");
|
|
496
|
+
}
|
|
497
|
+
const deps = injectedDeps;
|
|
498
|
+
|
|
499
|
+
const batchItemFailures: { itemIdentifier: string }[] = [];
|
|
12
500
|
for (const record of event.Records) {
|
|
501
|
+
let outcome: RecordOutcome;
|
|
13
502
|
try {
|
|
14
|
-
|
|
15
|
-
const s3Event = JSON.parse(record.body);
|
|
16
|
-
const s3Records = s3Event.Records || [];
|
|
17
|
-
|
|
18
|
-
for (const s3Record of s3Records) {
|
|
19
|
-
const bucket = s3Record.s3.bucket.name;
|
|
20
|
-
const key = decodeURIComponent(s3Record.s3.object.key.replace(/\+/g, " "));
|
|
21
|
-
|
|
22
|
-
if (!key.startsWith("originals/")) continue;
|
|
23
|
-
|
|
24
|
-
// Get original
|
|
25
|
-
const original = await s3.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
|
|
26
|
-
const chunks: Uint8Array[] = [];
|
|
27
|
-
for await (const chunk of original.Body as AsyncIterable<Uint8Array>) {
|
|
28
|
-
chunks.push(chunk);
|
|
29
|
-
}
|
|
30
|
-
const buffer = Buffer.concat(chunks);
|
|
31
|
-
|
|
32
|
-
// Process with Sharp (must be installed as ARM64 binary)
|
|
33
|
-
// dynamic import to avoid bundling issues
|
|
34
|
-
const sharp = (await import("sharp")).default;
|
|
35
|
-
const hash = key.split("/").pop()!.replace(/\.[^.]+$/, "");
|
|
36
|
-
|
|
37
|
-
// Thumbnail: 300px WebP
|
|
38
|
-
const thumbnail = await sharp(buffer)
|
|
39
|
-
.resize(300, 300, { fit: "cover" })
|
|
40
|
-
.webp({ quality: 80 })
|
|
41
|
-
.toBuffer();
|
|
42
|
-
|
|
43
|
-
// Optimized: 1200px WebP
|
|
44
|
-
const optimized = await sharp(buffer)
|
|
45
|
-
.resize(1200, 1200, { fit: "inside", withoutEnlargement: true })
|
|
46
|
-
.webp({ quality: 85 })
|
|
47
|
-
.toBuffer();
|
|
48
|
-
|
|
49
|
-
await Promise.all([
|
|
50
|
-
s3.send(new PutObjectCommand({
|
|
51
|
-
Bucket: bucket, Key: `thumbnails/${hash}.webp`,
|
|
52
|
-
Body: thumbnail, ContentType: "image/webp",
|
|
53
|
-
})),
|
|
54
|
-
s3.send(new PutObjectCommand({
|
|
55
|
-
Bucket: bucket, Key: `optimized/${hash}.webp`,
|
|
56
|
-
Body: optimized, ContentType: "image/webp",
|
|
57
|
-
})),
|
|
58
|
-
]);
|
|
59
|
-
|
|
60
|
-
logger.info("Media processed", { key, hash });
|
|
61
|
-
}
|
|
503
|
+
outcome = await processRecord(record, deps);
|
|
62
504
|
} catch (err) {
|
|
63
|
-
|
|
64
|
-
|
|
505
|
+
// Defensive: processRecord is designed not to throw, but if it does, treat
|
|
506
|
+
// it as retryable (fail closed for retry; DLQ + alert is the backstop).
|
|
507
|
+
logger.error("Unexpected throw from processRecord — retrying record", {
|
|
508
|
+
messageId: record.messageId,
|
|
509
|
+
error: err,
|
|
510
|
+
});
|
|
511
|
+
batchItemFailures.push({ itemIdentifier: record.messageId });
|
|
512
|
+
continue;
|
|
513
|
+
}
|
|
514
|
+
if (outcome.disposition === "fail") {
|
|
515
|
+
batchItemFailures.push({ itemIdentifier: record.messageId });
|
|
65
516
|
}
|
|
66
517
|
}
|
|
67
518
|
|
|
68
|
-
|
|
69
|
-
return { batchItemFailures: failedIds.map((id) => ({ itemIdentifier: id })) };
|
|
70
|
-
}
|
|
519
|
+
return { batchItemFailures };
|
|
71
520
|
};
|