@de-otio/trellis 0.10.11 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. package/dist/env.d.ts +232 -0
  2. package/dist/env.d.ts.map +1 -1
  3. package/dist/env.js +221 -0
  4. package/dist/env.js.map +1 -1
  5. package/dist/index.d.ts +1 -0
  6. package/dist/index.d.ts.map +1 -1
  7. package/dist/index.js +3 -0
  8. package/dist/index.js.map +1 -1
  9. package/dist/lambda/media-completion-worker.d.ts +175 -0
  10. package/dist/lambda/media-completion-worker.d.ts.map +1 -0
  11. package/dist/lambda/media-completion-worker.js +373 -0
  12. package/dist/lambda/media-completion-worker.js.map +1 -0
  13. package/dist/lambda/media-processing-worker.d.ts +172 -1
  14. package/dist/lambda/media-processing-worker.d.ts.map +1 -1
  15. package/dist/lambda/media-processing-worker.js +343 -49
  16. package/dist/lambda/media-processing-worker.js.map +1 -1
  17. package/dist/lib/app.d.ts.map +1 -1
  18. package/dist/lib/app.js +5 -0
  19. package/dist/lib/app.js.map +1 -1
  20. package/dist/lib/encrypted-settings/config.d.ts +13 -0
  21. package/dist/lib/encrypted-settings/config.d.ts.map +1 -0
  22. package/dist/lib/encrypted-settings/config.js +19 -0
  23. package/dist/lib/encrypted-settings/config.js.map +1 -0
  24. package/dist/lib/encrypted-settings/encrypted-settings-handler.d.ts +57 -0
  25. package/dist/lib/encrypted-settings/encrypted-settings-handler.d.ts.map +1 -0
  26. package/dist/lib/encrypted-settings/encrypted-settings-handler.js +178 -0
  27. package/dist/lib/encrypted-settings/encrypted-settings-handler.js.map +1 -0
  28. package/dist/lib/encrypted-settings/encrypted-settings-store.d.ts +110 -0
  29. package/dist/lib/encrypted-settings/encrypted-settings-store.d.ts.map +1 -0
  30. package/dist/lib/encrypted-settings/encrypted-settings-store.js +103 -0
  31. package/dist/lib/encrypted-settings/encrypted-settings-store.js.map +1 -0
  32. package/dist/lib/encrypted-settings/types.d.ts +26 -0
  33. package/dist/lib/encrypted-settings/types.d.ts.map +1 -0
  34. package/dist/lib/encrypted-settings/types.js +27 -0
  35. package/dist/lib/encrypted-settings/types.js.map +1 -0
  36. package/dist/lib/exif-stripper.d.ts +37 -22
  37. package/dist/lib/exif-stripper.d.ts.map +1 -1
  38. package/dist/lib/exif-stripper.js +101 -41
  39. package/dist/lib/exif-stripper.js.map +1 -1
  40. package/dist/lib/media/cas-keys.d.ts +63 -0
  41. package/dist/lib/media/cas-keys.d.ts.map +1 -0
  42. package/dist/lib/media/cas-keys.js +102 -0
  43. package/dist/lib/media/cas-keys.js.map +1 -0
  44. package/dist/lib/media/classify-worker-error.d.ts +48 -0
  45. package/dist/lib/media/classify-worker-error.d.ts.map +1 -0
  46. package/dist/lib/media/classify-worker-error.js +319 -0
  47. package/dist/lib/media/classify-worker-error.js.map +1 -0
  48. package/dist/lib/media/dedupe-key.d.ts +29 -0
  49. package/dist/lib/media/dedupe-key.d.ts.map +1 -0
  50. package/dist/lib/media/dedupe-key.js +49 -0
  51. package/dist/lib/media/dedupe-key.js.map +1 -0
  52. package/dist/lib/media/duration-cap.d.ts +30 -0
  53. package/dist/lib/media/duration-cap.d.ts.map +1 -0
  54. package/dist/lib/media/duration-cap.js +37 -0
  55. package/dist/lib/media/duration-cap.js.map +1 -0
  56. package/dist/lib/media/ffmpeg-args.d.ts +83 -0
  57. package/dist/lib/media/ffmpeg-args.d.ts.map +1 -0
  58. package/dist/lib/media/ffmpeg-args.js +119 -0
  59. package/dist/lib/media/ffmpeg-args.js.map +1 -0
  60. package/dist/lib/media/media-ports.d.ts +126 -0
  61. package/dist/lib/media/media-ports.d.ts.map +1 -0
  62. package/dist/lib/media/media-ports.js +129 -0
  63. package/dist/lib/media/media-ports.js.map +1 -0
  64. package/dist/lib/media/media-upsert.d.ts +55 -0
  65. package/dist/lib/media/media-upsert.d.ts.map +1 -0
  66. package/dist/lib/media/media-upsert.js +38 -0
  67. package/dist/lib/media/media-upsert.js.map +1 -0
  68. package/dist/lib/media/moderation-provider.d.ts +111 -0
  69. package/dist/lib/media/moderation-provider.d.ts.map +1 -0
  70. package/dist/lib/media/moderation-provider.js +130 -0
  71. package/dist/lib/media/moderation-provider.js.map +1 -0
  72. package/dist/lib/media/moderation-resolved-payload.d.ts +48 -0
  73. package/dist/lib/media/moderation-resolved-payload.d.ts.map +1 -0
  74. package/dist/lib/media/moderation-resolved-payload.js +37 -0
  75. package/dist/lib/media/moderation-resolved-payload.js.map +1 -0
  76. package/dist/lib/media/moderation-status.d.ts +98 -0
  77. package/dist/lib/media/moderation-status.d.ts.map +1 -0
  78. package/dist/lib/media/moderation-status.js +122 -0
  79. package/dist/lib/media/moderation-status.js.map +1 -0
  80. package/dist/lib/media/processing-types.d.ts +45 -0
  81. package/dist/lib/media/processing-types.d.ts.map +1 -0
  82. package/dist/lib/media/processing-types.js +9 -0
  83. package/dist/lib/media/processing-types.js.map +1 -0
  84. package/dist/lib/media/promote-decision.d.ts +64 -0
  85. package/dist/lib/media/promote-decision.d.ts.map +1 -0
  86. package/dist/lib/media/promote-decision.js +76 -0
  87. package/dist/lib/media/promote-decision.js.map +1 -0
  88. package/dist/lib/media/quota-check.d.ts +22 -0
  89. package/dist/lib/media/quota-check.d.ts.map +1 -0
  90. package/dist/lib/media/quota-check.js +42 -0
  91. package/dist/lib/media/quota-check.js.map +1 -0
  92. package/dist/lib/media/quota-types.d.ts +15 -0
  93. package/dist/lib/media/quota-types.d.ts.map +1 -0
  94. package/dist/lib/media/quota-types.js +9 -0
  95. package/dist/lib/media/quota-types.js.map +1 -0
  96. package/dist/lib/media/route-upload.d.ts +58 -0
  97. package/dist/lib/media/route-upload.d.ts.map +1 -0
  98. package/dist/lib/media/route-upload.js +80 -0
  99. package/dist/lib/media/route-upload.js.map +1 -0
  100. package/dist/lib/media/serve-gate.d.ts +51 -0
  101. package/dist/lib/media/serve-gate.d.ts.map +1 -0
  102. package/dist/lib/media/serve-gate.js +68 -0
  103. package/dist/lib/media/serve-gate.js.map +1 -0
  104. package/dist/lib/media/tenant-resolution.d.ts +42 -0
  105. package/dist/lib/media/tenant-resolution.d.ts.map +1 -0
  106. package/dist/lib/media/tenant-resolution.js +45 -0
  107. package/dist/lib/media/tenant-resolution.js.map +1 -0
  108. package/dist/lib/media/text-moderation.d.ts +28 -0
  109. package/dist/lib/media/text-moderation.d.ts.map +1 -0
  110. package/dist/lib/media/text-moderation.js +62 -0
  111. package/dist/lib/media/text-moderation.js.map +1 -0
  112. package/dist/lib/media/track-verdict.d.ts +45 -0
  113. package/dist/lib/media/track-verdict.d.ts.map +1 -0
  114. package/dist/lib/media/track-verdict.js +52 -0
  115. package/dist/lib/media/track-verdict.js.map +1 -0
  116. package/dist/lib/media/transcript-moderation.d.ts +47 -0
  117. package/dist/lib/media/transcript-moderation.d.ts.map +1 -0
  118. package/dist/lib/media/transcript-moderation.js +70 -0
  119. package/dist/lib/media/transcript-moderation.js.map +1 -0
  120. package/dist/lib/media-handler.d.ts.map +1 -1
  121. package/dist/lib/media-handler.js +15 -9
  122. package/dist/lib/media-handler.js.map +1 -1
  123. package/dist/lib/notification-handler.d.ts +11 -4
  124. package/dist/lib/notification-handler.d.ts.map +1 -1
  125. package/dist/lib/notification-handler.js +161 -29
  126. package/dist/lib/notification-handler.js.map +1 -1
  127. package/dist/lib/post-handler.d.ts.map +1 -1
  128. package/dist/lib/post-handler.js +4 -1
  129. package/dist/lib/post-handler.js.map +1 -1
  130. package/dist/lib/realtime/block-store.d.ts +61 -0
  131. package/dist/lib/realtime/block-store.d.ts.map +1 -0
  132. package/dist/lib/realtime/block-store.js +0 -0
  133. package/dist/lib/realtime/block-store.js.map +1 -0
  134. package/dist/lib/realtime/channel.d.ts +34 -0
  135. package/dist/lib/realtime/channel.d.ts.map +1 -0
  136. package/dist/lib/realtime/channel.js +100 -0
  137. package/dist/lib/realtime/channel.js.map +1 -0
  138. package/dist/lib/realtime/delivery-policy.d.ts +51 -0
  139. package/dist/lib/realtime/delivery-policy.d.ts.map +1 -0
  140. package/dist/lib/realtime/delivery-policy.js +98 -0
  141. package/dist/lib/realtime/delivery-policy.js.map +1 -0
  142. package/dist/lib/realtime/index.d.ts +21 -0
  143. package/dist/lib/realtime/index.d.ts.map +1 -0
  144. package/dist/lib/realtime/index.js +39 -0
  145. package/dist/lib/realtime/index.js.map +1 -0
  146. package/dist/lib/realtime/no-op-transport.d.ts +10 -0
  147. package/dist/lib/realtime/no-op-transport.d.ts.map +1 -0
  148. package/dist/lib/realtime/no-op-transport.js +44 -0
  149. package/dist/lib/realtime/no-op-transport.js.map +1 -0
  150. package/dist/lib/realtime/poll-transport.d.ts +11 -0
  151. package/dist/lib/realtime/poll-transport.d.ts.map +1 -0
  152. package/dist/lib/realtime/poll-transport.js +68 -0
  153. package/dist/lib/realtime/poll-transport.js.map +1 -0
  154. package/dist/lib/realtime/push-notifier.d.ts +39 -0
  155. package/dist/lib/realtime/push-notifier.d.ts.map +1 -0
  156. package/dist/lib/realtime/push-notifier.js +76 -0
  157. package/dist/lib/realtime/push-notifier.js.map +1 -0
  158. package/dist/lib/realtime/realtime-transport.d.ts +2 -0
  159. package/dist/lib/realtime/realtime-transport.d.ts.map +1 -0
  160. package/dist/lib/realtime/realtime-transport.js +23 -0
  161. package/dist/lib/realtime/realtime-transport.js.map +1 -0
  162. package/dist/lib/realtime/setting-store.d.ts +30 -0
  163. package/dist/lib/realtime/setting-store.d.ts.map +1 -0
  164. package/dist/lib/realtime/setting-store.js +0 -0
  165. package/dist/lib/realtime/setting-store.js.map +1 -0
  166. package/dist/lib/realtime/types.d.ts +200 -0
  167. package/dist/lib/realtime/types.d.ts.map +1 -0
  168. package/dist/lib/realtime/types.js +61 -0
  169. package/dist/lib/realtime/types.js.map +1 -0
  170. package/dist/lib/routes/index.d.ts.map +1 -1
  171. package/dist/lib/routes/index.js +3 -0
  172. package/dist/lib/routes/index.js.map +1 -1
  173. package/dist/lib/routes/media.d.ts +21 -0
  174. package/dist/lib/routes/media.d.ts.map +1 -1
  175. package/dist/lib/routes/media.js +584 -483
  176. package/dist/lib/routes/media.js.map +1 -1
  177. package/dist/lib/routes/settings.d.ts +17 -0
  178. package/dist/lib/routes/settings.d.ts.map +1 -0
  179. package/dist/lib/routes/settings.js +187 -0
  180. package/dist/lib/routes/settings.js.map +1 -0
  181. package/dist/lib/services/image-normalizer.d.ts +64 -6
  182. package/dist/lib/services/image-normalizer.d.ts.map +1 -1
  183. package/dist/lib/services/image-normalizer.js +88 -6
  184. package/dist/lib/services/image-normalizer.js.map +1 -1
  185. package/dist/lib/services/media-upload-service.d.ts +2 -2
  186. package/dist/lib/services/media-upload-service.d.ts.map +1 -1
  187. package/dist/lib/services/media-upload-service.js +22 -21
  188. package/dist/lib/services/media-upload-service.js.map +1 -1
  189. package/dist/lib/tenant-scope.d.ts.map +1 -1
  190. package/dist/lib/tenant-scope.js +18 -1
  191. package/dist/lib/tenant-scope.js.map +1 -1
  192. package/package.json +23 -22
  193. package/prisma/migrations/20260620051144_add_encrypted_user_settings/migration.sql +24 -0
  194. package/prisma/migrations/20260620120000_add_blocked_users/migration.sql +29 -0
  195. package/prisma/migrations/20260625000000_media_tenant_scope_and_moderation_status/migration.sql +49 -0
  196. package/prisma/migrations/20260625000001_p0b_moderation_jobs/migration.sql +73 -0
  197. package/prisma/schema.prisma +133 -15
  198. package/src/lambda/media-completion-worker.ts +567 -0
  199. package/src/lambda/media-processing-worker.ts +508 -59
@@ -1,71 +1,520 @@
1
- import type { SQSHandler } from "aws-lambda";
2
- import { S3Client, GetObjectCommand, PutObjectCommand } from "@aws-sdk/client-s3";
1
+ // media-processing-worker.ts the P0b media-processing orchestration SHELL.
2
+ //
3
+ // This is the imperative shell over the pure functional-core media units. It is
4
+ // NOT itself a functional-core unit: it performs I/O (object storage, transcode,
5
+ // transcription, moderation, DB writes). BUT all of that I/O arrives through
6
+ // INJECTED capability seams (TranscodePort / StoragePort / TranscribePort /
7
+ // MediaModerationProvider) and a Prisma-shaped persistence port, so the
8
+ // orchestration logic is exercised in unit tests against the B0 in-memory Mocks
9
+ // — no real cloud, no real encoder, no real DB.
10
+ //
11
+ // Per the seam discipline (see lib/media/media-ports.ts and
12
+ // lib/media/moderation-provider.ts): CORE ships the interfaces + mocks; the
13
+ // consuming app (Skybber) injects the concrete cloud adapters at startup via
14
+ // `setMediaProcessingDeps()`. Until they are injected, the handler fails CLOSED
15
+ // (throws → SQS retry), never silently approves or drops work.
16
+ //
17
+ // Fail-closed posture, end to end:
18
+ // - A key that is not a well-formed `pending/{tenant}/{upload}` key is dropped
19
+ // (ack) and NEVER written under — the re-trigger-loop guard.
20
+ // - The tenant is re-derived FROM THE ROW, and the triggering key must equal
21
+ // pendingKey(rowTenant, uploadId); a mismatch is a hard reject (poison →
22
+ // REVIEW + ack), so a forged/odd key cannot make us moderate the wrong cas/.
23
+ // - Over-cap duration is poison → REVIEW + ack (no transcode attempted).
24
+ // - The worker ONLY starts moderation jobs + persists their jobIds; it never
25
+ // fetches verdicts (a separate poller owns fan-in). Moderation runs on the
26
+ // cleaned bytes at the STAGING key, NOT the raw pending upload — and the
27
+ // cleaned bytes are NOT written to cas/ here. cas/ is the CDN-served prefix,
28
+ // so it must only ever hold APPROVED cleaned bytes; the completion worker
29
+ // promotes staging -> cas/ on approval ("cleaned-staging, promote-on-approval").
30
+ // - classifyWorkerError() splits permanent media/payload defects (poison →
31
+ // REVIEW + ack, no DLQ loop) from transient infra faults (retryable → throw
32
+ // → SQS retry → DLQ + alert backstop).
33
+
34
+ import type { SQSHandler, SQSRecord, SQSBatchResponse } from "aws-lambda";
3
35
  import { Logger } from "@aws-lambda-powertools/logger";
36
+ import { createHash } from "node:crypto";
4
37
 
5
- const logger = new Logger({ serviceName: "media-processing-worker" });
38
+ import {
39
+ pendingKey,
40
+ casKey,
41
+ isCasKeyError,
42
+ } from "../lib/media/cas-keys.js";
43
+ import { exceedsDurationCap } from "../lib/media/duration-cap.js";
44
+ import { classifyWorkerError } from "../lib/media/classify-worker-error.js";
45
+ import type { Track } from "../lib/media/track-verdict.js";
46
+ import type {
47
+ StoragePort,
48
+ TranscodePort,
49
+ TranscribePort,
50
+ } from "../lib/media/media-ports.js";
51
+ import type {
52
+ MediaModerationProvider,
53
+ S3Ref,
54
+ } from "../lib/media/moderation-provider.js";
55
+
56
+ // ---------------------------------------------------------------------------
57
+ // Injected persistence + config seams
58
+ // ---------------------------------------------------------------------------
59
+
60
+ /**
61
+ * The minimal MediaFile row shape the worker reads. Re-declared (not imported
62
+ * from the Prisma client) so this module compiles in worktrees that have not
63
+ * regenerated the client, mirroring moderation-status.ts's discipline. The
64
+ * shell maps the real Prisma row to this shape at the persistence-port boundary.
65
+ */
66
+ export interface MediaFileRow {
67
+ readonly id: string;
68
+ readonly tenantId: string;
69
+ readonly uploadId: string | null;
70
+ }
71
+
72
+ /**
73
+ * A copy of the operative moderation thresholds, snapshotted at job-submission
74
+ * time so historical decisions stay auditable after a threshold change. The
75
+ * shape mirrors Env.media.thresholds; the worker treats it as an opaque JSON
76
+ * blob and never reads individual values (no compiled threshold logic here).
77
+ */
78
+ export type ThresholdSnapshot = Record<
79
+ string,
80
+ { review: number; quarantine: number }
81
+ >;
82
+
83
+ /**
84
+ * The persistence operations the worker needs, narrowed to exactly what it
85
+ * uses. Implemented in production by a thin Prisma adapter; in tests by an
86
+ * in-memory fake. Keeping this narrow keeps the worker testable without the
87
+ * generated client and documents the worker's full DB surface in one place.
88
+ */
89
+ export interface MediaPersistencePort {
90
+ /** Load the MediaFile row for an upload session, or null if none exists. */
91
+ findMediaByUploadId(uploadId: string): Promise<MediaFileRow | null>;
92
+ /** Persist a started per-track moderation job with its threshold snapshot. */
93
+ createModerationJob(input: {
94
+ mediaId: string;
95
+ track: Track;
96
+ jobId: string;
97
+ thresholdSnapshot: ThresholdSnapshot;
98
+ }): Promise<void>;
99
+ /**
100
+ * Persist the REAL content identity of the cleaned bytes onto the MediaFile
101
+ * row, replacing the upload-time `uploadId` placeholder contentHash with the
102
+ * SHA-256 of the transcoded output and recording the future serve key. The
103
+ * completion worker derives the promote target (`cas/{tenant}/{hash}`) from
104
+ * this persisted `contentHash`, so this write MUST happen before moderation
105
+ * fans in — otherwise the object can never promote.
106
+ */
107
+ persistCleanedContent(
108
+ mediaId: string,
109
+ content: { contentHash: string; originalKey: string },
110
+ ): Promise<void>;
111
+ /** Drive a media object's moderationStatus to REVIEW (poison path). */
112
+ markMediaForReview(mediaId: string): Promise<void>;
113
+ }
114
+
115
+ /**
116
+ * The slice of Env.media this worker consumes. Operational parameters arrive
117
+ * here as VALUES sourced from Env.media — never as literals in this file.
118
+ */
119
+ export interface MediaProcessingConfig {
120
+ /** Hard duration cap (seconds). From Env.media.maxDurationSeconds. */
121
+ readonly maxDurationSeconds: number;
122
+ /** Current operative thresholds, snapshotted onto each started job. */
123
+ readonly thresholds: ThresholdSnapshot;
124
+ }
125
+
126
+ /**
127
+ * All capability seams the orchestration core binds to. The handler builds this
128
+ * from the injected concrete adapters; tests build it from the B0 Mocks + an
129
+ * in-memory persistence fake.
130
+ */
131
+ export interface MediaProcessingDeps {
132
+ readonly storage: StoragePort;
133
+ readonly transcode: TranscodePort;
134
+ readonly transcribe: TranscribePort;
135
+ readonly moderation: MediaModerationProvider;
136
+ readonly persistence: MediaPersistencePort;
137
+ readonly config: MediaProcessingConfig;
138
+ /** The object-storage bucket handle moderation/transcription refs carry. */
139
+ readonly bucket: string;
140
+ /**
141
+ * Deterministic job-name factory for transcription/idempotency. Injected so
142
+ * the shell stays free of Date.now/Math.random in tests; production passes a
143
+ * uuid/time-based generator. `seed` is a stable per-call input (the cas key).
144
+ */
145
+ readonly newJobName: (seed: string) => string;
146
+ readonly logger: Pick<Logger, "info" | "warn" | "error">;
147
+ }
148
+
149
+ // ---------------------------------------------------------------------------
150
+ // Per-record outcome
151
+ // ---------------------------------------------------------------------------
152
+
153
+ /**
154
+ * The disposition of one SQS record after orchestration.
155
+ *
156
+ * - `ack` — remove from the queue (success, drop-non-pending, or poison routed
157
+ * to REVIEW). A poison ack carries `poison: true` for observability.
158
+ * - `fail` — leave on the queue for SQS to retry (transient/retryable fault).
159
+ * The handler maps this to a batchItemFailure.
160
+ */
161
+ export type RecordOutcome =
162
+ | { readonly disposition: "ack"; readonly reason: string; readonly poison?: boolean }
163
+ | { readonly disposition: "fail"; readonly reason: string };
164
+
165
+ // ---------------------------------------------------------------------------
166
+ // Key parsing — pending/{tenantId}/{uploadId}
167
+ // ---------------------------------------------------------------------------
168
+
169
+ /**
170
+ * Parse a triggering key as a `pending/{tenantId}/{uploadId}` key, validating
171
+ * the FORM by round-tripping the parsed parts back through `pendingKey()`. A
172
+ * key only parses if rebuilding it from its parts yields the identical string —
173
+ * so a path-traversal payload, extra segments, or a malformed id can never pass
174
+ * (cas-keys.ts owns the anchored allowlists).
175
+ *
176
+ * @returns the {tenantId, uploadId} when the key is a canonical pending key,
177
+ * or null for ANY other key (which the caller ack-drops; we never
178
+ * write outputs under pending/, so a non-pending key is not our work).
179
+ */
180
+ export function parsePendingKey(
181
+ key: string,
182
+ ): { tenantId: string; uploadId: string } | null {
183
+ const parts = key.split("/");
184
+ if (parts.length !== 3 || parts[0] !== "pending") {
185
+ return null;
186
+ }
187
+ const tenantId = parts[1];
188
+ const uploadId = parts[2];
189
+ const rebuilt = pendingKey(tenantId, uploadId);
190
+ if (isCasKeyError(rebuilt) || rebuilt !== key) {
191
+ return null;
192
+ }
193
+ return { tenantId, uploadId };
194
+ }
195
+
196
+ // ---------------------------------------------------------------------------
197
+ // S3-event-over-SQS extraction
198
+ // ---------------------------------------------------------------------------
199
+
200
+ /** Every object key referenced by one SQS record's S3 event notification. */
201
+ export function extractObjectKeys(recordBody: string): string[] {
202
+ const parsed = JSON.parse(recordBody) as {
203
+ Records?: Array<{ s3?: { object?: { key?: string } } }>;
204
+ };
205
+ const s3Records = parsed.Records ?? [];
206
+ const keys: string[] = [];
207
+ for (const r of s3Records) {
208
+ const raw = r?.s3?.object?.key;
209
+ if (typeof raw === "string") {
210
+ // S3 URL-encodes keys and uses '+' for spaces in notifications.
211
+ keys.push(decodeURIComponent(raw.replace(/\+/g, " ")));
212
+ }
213
+ }
214
+ return keys;
215
+ }
216
+
217
+ // ---------------------------------------------------------------------------
218
+ // Typed errors the orchestration core throws internally
219
+ // ---------------------------------------------------------------------------
220
+
221
+ /** A permanent payload defect: the key did not match the row's tenant/upload. */
222
+ class KeyTenantMismatchError extends Error {
223
+ constructor() {
224
+ // The name is in classify-worker-error's poison fragment set ("validation").
225
+ super("media key/tenant validation mismatch: triggering key does not match the row");
226
+ this.name = "ValidationError";
227
+ }
228
+ }
229
+
230
+ /** A permanent payload defect: the probed duration exceeds the configured cap. */
231
+ class DurationCapExceededError extends Error {
232
+ constructor() {
233
+ super("media duration cap exceeded");
234
+ this.name = "DurationCapExceeded";
235
+ }
236
+ }
237
+
238
+ // ---------------------------------------------------------------------------
239
+ // Orchestration core — testable against the B0 Mocks
240
+ // ---------------------------------------------------------------------------
241
+
242
+ /**
243
+ * Orchestrate processing for ONE already-extracted object key.
244
+ *
245
+ * Steps (every uncertainty fails closed; nothing here can yield APPROVED):
246
+ * 1. Reject any key that is not a canonical `pending/{tenant}/{upload}` key —
247
+ * ack-drop it; outputs are NEVER written under pending/.
248
+ * 2. Load the MediaFile row by uploadId; re-derive tenant FROM THE ROW and
249
+ * assert pendingKey(rowTenant, uploadId) === the triggering key. Mismatch
250
+ * (or missing/uploadId-less row) is a hard reject → REVIEW + ack.
251
+ * 3. Probe duration; over-cap ⇒ poison ⇒ REVIEW + ack (no transcode).
252
+ * 4. Transcode-and-discard ⇒ cleaned bytes at the STAGING key (read back from
253
+ * the cleaned key). The cleaned bytes are NOT written to cas/ here — cas/ is
254
+ * the CDN-served prefix and must hold only APPROVED bytes (promotion happens
255
+ * in the completion worker).
256
+ * 5. Hash the cleaned bytes ⇒ realHash; PERSIST {contentHash: realHash,
257
+ * originalKey: casKey(tenant, realHash)} onto the row, replacing the
258
+ * upload-time uploadId placeholder so the completion worker can derive the
259
+ * promote target.
260
+ * 6. START moderation on the cleaned STAGING object (NOT the raw pending upload,
261
+ * NOT a cas/ key) — moderation must run on EXACTLY the bytes that will be
262
+ * served: provider.startVideoModeration ⇒ persist VISUAL job (+ threshold
263
+ * snapshot); transcribe.startTranscription ⇒ persist AUDIO job (+ snapshot).
264
+ * The worker only STARTS jobs + persists jobIds; it never fetches verdicts.
265
+ */
266
+ export async function processObjectKey(
267
+ triggeringKey: string,
268
+ deps: MediaProcessingDeps,
269
+ ): Promise<RecordOutcome> {
270
+ try {
271
+ // --- 1. Pending-key form gate (re-trigger-loop guard). ---
272
+ const parsed = parsePendingKey(triggeringKey);
273
+ if (parsed === null) {
274
+ deps.logger.info("Dropping non-pending key (not our work)", {
275
+ key: triggeringKey,
276
+ });
277
+ return { disposition: "ack", reason: "non-pending-key" };
278
+ }
279
+ const { uploadId } = parsed;
280
+
281
+ // --- 2. Load row; re-derive tenant FROM THE ROW; assert key match. ---
282
+ const row = await deps.persistence.findMediaByUploadId(uploadId);
283
+ if (row === null || row.uploadId === null) {
284
+ // No row, or a row that lost its upload session — cannot certify this
285
+ // object. Permanent w.r.t. these bytes: fail closed to human review.
286
+ throw new KeyTenantMismatchError();
287
+ }
288
+ const rowTenant = row.tenantId;
289
+ const expectedKey = pendingKey(rowTenant, uploadId);
290
+ if (isCasKeyError(expectedKey) || expectedKey !== triggeringKey) {
291
+ // The triggering key's tenant segment disagrees with the owning tenant,
292
+ // OR the row's tenant is itself malformed. Either way: hard reject.
293
+ throw new KeyTenantMismatchError();
294
+ }
6
295
 
7
- const s3 = new S3Client({ region: process.env.AWS_REGION });
296
+ // --- 3. Duration cap (probe BEFORE transcoding — cost + abuse guard). ---
297
+ const probed = await deps.transcode.probeDurationSeconds(triggeringKey);
298
+ if (exceedsDurationCap(probed, deps.config.maxDurationSeconds)) {
299
+ throw new DurationCapExceededError();
300
+ }
301
+
302
+ // --- 4. Transcode-and-discard ⇒ cleaned bytes. ---
303
+ // The cleaned output is written to a transient staging key OUTSIDE pending/
304
+ // (so re-uploading the cleaned bytes can never re-trigger this worker).
305
+ const cleanedStagingKey = `processing/${rowTenant}/${uploadId}`;
306
+ const posterStagingKey = `processing/${rowTenant}/${uploadId}.poster`;
307
+ const transcodeResult = await deps.transcode.transcodeVideo({
308
+ inputPath: triggeringKey,
309
+ outputPath: cleanedStagingKey,
310
+ posterPath: posterStagingKey,
311
+ maxDurationSeconds: deps.config.maxDurationSeconds,
312
+ });
313
+ const cleanedStagingKeyOut = transcodeResult.cleanedPath;
314
+ const cleanedBytes = await deps.storage.getObject(cleanedStagingKeyOut);
315
+
316
+ // --- 5. Hash the CLEANED bytes ⇒ real content identity; persist it. ---
317
+ // We do NOT write the cleaned bytes to cas/ here: they already live at the
318
+ // STAGING key, and cas/ (the CDN-served prefix) must only ever hold APPROVED
319
+ // bytes. We persist the real hash + future serve key so the completion
320
+ // worker can promote staging -> cas/ on approval.
321
+ const contentHash = createHash("sha256").update(cleanedBytes).digest("hex");
322
+ const cleanedCasKey = casKey(rowTenant, contentHash);
323
+ if (isCasKeyError(cleanedCasKey)) {
324
+ // The hash/tenant failed the CAS allowlist — a permanent defect in our own
325
+ // derivation inputs (e.g. a malformed tenant that slipped the row check).
326
+ // Fail closed: route to review rather than serve un-addressable bytes.
327
+ throw new KeyTenantMismatchError();
328
+ }
329
+ // Replace the upload-time uploadId placeholder contentHash with the REAL
330
+ // hash and record the future serve key (cas/{tenant}/{hash}).
331
+ await deps.persistence.persistCleanedContent(row.id, {
332
+ contentHash,
333
+ originalKey: cleanedCasKey,
334
+ });
8
335
 
9
- export const handler: SQSHandler = async (event) => {
10
- const failedIds: string[] = [];
336
+ // --- 6. START moderation on the CLEANED STAGING object (the exact bytes ---
337
+ // that will be served), NOT the raw pending upload and NOT a cas/ key.
338
+ const stagingRef: S3Ref = { bucket: deps.bucket, key: cleanedStagingKeyOut };
11
339
 
340
+ const visual = await deps.moderation.startVideoModeration(stagingRef);
341
+ await deps.persistence.createModerationJob({
342
+ mediaId: row.id,
343
+ track: "VISUAL",
344
+ jobId: visual.jobId,
345
+ // Snapshot the CURRENT operative thresholds onto the job at submission.
346
+ thresholdSnapshot: deps.config.thresholds,
347
+ });
348
+
349
+ const audio = await deps.transcribe.startTranscription({
350
+ key: cleanedStagingKeyOut,
351
+ jobName: deps.newJobName(cleanedStagingKeyOut),
352
+ });
353
+ await deps.persistence.createModerationJob({
354
+ mediaId: row.id,
355
+ track: "AUDIO",
356
+ jobId: audio.jobId,
357
+ thresholdSnapshot: deps.config.thresholds,
358
+ });
359
+
360
+ deps.logger.info("Started per-track moderation jobs", {
361
+ mediaId: row.id,
362
+ stagingKey: cleanedStagingKeyOut,
363
+ casKey: cleanedCasKey,
364
+ visualJobId: visual.jobId,
365
+ audioJobId: audio.jobId,
366
+ });
367
+
368
+ return { disposition: "ack", reason: "started-moderation" };
369
+ } catch (err) {
370
+ // Single classification point: poison ⇒ REVIEW + ack; retryable ⇒ fail.
371
+ const klass = classifyWorkerError(err);
372
+ if (klass === "poison") {
373
+ // Best-effort route to REVIEW. If we can identify the row, mark it; if we
374
+ // cannot (e.g. the failure was the row lookup itself), there is nothing to
375
+ // mark and the ack simply drops a message that would loop forever.
376
+ const reviewReason = await routePoisonToReview(triggeringKey, deps, err);
377
+ return { disposition: "ack", reason: reviewReason, poison: true };
378
+ }
379
+ deps.logger.error("Retryable media-processing fault — letting SQS retry", {
380
+ key: triggeringKey,
381
+ error: err,
382
+ });
383
+ return { disposition: "fail", reason: "retryable" };
384
+ }
385
+ }
386
+
387
+ /**
388
+ * Best-effort: drive the owning MediaFile to REVIEW for a poison failure. Never
389
+ * throws — a failure to mark must not convert a poison ack into an infinite
390
+ * retry. Returns an observability reason string.
391
+ */
392
+ async function routePoisonToReview(
393
+ triggeringKey: string,
394
+ deps: MediaProcessingDeps,
395
+ cause: unknown,
396
+ ): Promise<string> {
397
+ deps.logger.warn("Poison media — routing to REVIEW + ack", {
398
+ key: triggeringKey,
399
+ error: cause,
400
+ });
401
+ const parsed = parsePendingKey(triggeringKey);
402
+ if (parsed === null) {
403
+ return "poison-no-row";
404
+ }
405
+ try {
406
+ const row = await deps.persistence.findMediaByUploadId(parsed.uploadId);
407
+ if (row === null) {
408
+ return "poison-no-row";
409
+ }
410
+ await deps.persistence.markMediaForReview(row.id);
411
+ return "poison-review";
412
+ } catch (markErr) {
413
+ deps.logger.error("Failed to mark poison media for REVIEW (acking anyway)", {
414
+ key: triggeringKey,
415
+ error: markErr,
416
+ });
417
+ return "poison-mark-failed";
418
+ }
419
+ }
420
+
421
+ /**
422
+ * Process one SQS record (which may carry several S3 object keys). The record
423
+ * fails (SQS retry) iff ANY of its keys produced a retryable fault; otherwise
424
+ * it is acked. Per-key poison is acked, never failed.
425
+ */
426
+ export async function processRecord(
427
+ record: SQSRecord,
428
+ deps: MediaProcessingDeps,
429
+ ): Promise<RecordOutcome> {
430
+ let keys: string[];
431
+ try {
432
+ keys = extractObjectKeys(record.body);
433
+ } catch (err) {
434
+ // A body we cannot even parse is a permanent payload defect (poison): a
435
+ // retry re-parses the same bytes to the same failure. Ack to avoid a loop.
436
+ deps.logger.warn("Unparseable SQS record body — acking as poison", {
437
+ messageId: record.messageId,
438
+ error: err,
439
+ });
440
+ return { disposition: "ack", reason: "unparseable-body", poison: true };
441
+ }
442
+
443
+ for (const key of keys) {
444
+ const outcome = await processObjectKey(key, deps);
445
+ if (outcome.disposition === "fail") {
446
+ // First retryable key fails the whole record; SQS redelivers it. Already-
447
+ // started keys are idempotent on the dedupe path (deriveDedupeKey).
448
+ return outcome;
449
+ }
450
+ }
451
+ return { disposition: "ack", reason: "record-complete" };
452
+ }
453
+
454
+ // ---------------------------------------------------------------------------
455
+ // Deps injection seam (consuming app wires concrete adapters at startup)
456
+ // ---------------------------------------------------------------------------
457
+
458
+ let injectedDeps: MediaProcessingDeps | undefined;
459
+
460
+ /**
461
+ * Inject the concrete media-processing seams. The consuming app (Skybber) calls
462
+ * this once at Lambda cold start with its ffmpeg/MediaConvert TranscodePort, S3
463
+ * StoragePort, Transcribe TranscribePort, injected MediaModerationProvider, and
464
+ * a Prisma-backed MediaPersistencePort. Core ships NO concrete adapters.
465
+ */
466
+ export function setMediaProcessingDeps(deps: MediaProcessingDeps): void {
467
+ injectedDeps = deps;
468
+ }
469
+
470
+ /** Test helper: clear injected deps between cases. */
471
+ export function __resetMediaProcessingDeps(): void {
472
+ injectedDeps = undefined;
473
+ }
474
+
475
+ const logger = new Logger({ serviceName: "media-processing-worker" });
476
+
477
+ /**
478
+ * The SQS entry point. Preserves `reportBatchItemFailures` semantics: only the
479
+ * messageIds whose records produced a retryable fault are returned as batch
480
+ * item failures; everything else (success / drop / poison→REVIEW) is acked by
481
+ * omission.
482
+ *
483
+ * If no concrete deps were injected, the handler fails CLOSED: it throws, so the
484
+ * whole batch is retried rather than silently dropped. An un-wired worker must
485
+ * never ack-drop real uploads.
486
+ */
487
+ export const handler: SQSHandler = async (event): Promise<SQSBatchResponse> => {
488
+ if (injectedDeps === undefined) {
489
+ // Fail closed: no backend wired ⇒ retry the batch, never drop. The
490
+ // consuming app must call setMediaProcessingDeps() at startup.
491
+ logger.error(
492
+ "media-processing-worker invoked with no injected deps — refusing to" +
493
+ " process. Call setMediaProcessingDeps() at cold start.",
494
+ );
495
+ throw new Error("media-processing-worker: deps not injected");
496
+ }
497
+ const deps = injectedDeps;
498
+
499
+ const batchItemFailures: { itemIdentifier: string }[] = [];
12
500
  for (const record of event.Records) {
501
+ let outcome: RecordOutcome;
13
502
  try {
14
- // S3 event notification comes via SQS
15
- const s3Event = JSON.parse(record.body);
16
- const s3Records = s3Event.Records || [];
17
-
18
- for (const s3Record of s3Records) {
19
- const bucket = s3Record.s3.bucket.name;
20
- const key = decodeURIComponent(s3Record.s3.object.key.replace(/\+/g, " "));
21
-
22
- if (!key.startsWith("originals/")) continue;
23
-
24
- // Get original
25
- const original = await s3.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
26
- const chunks: Uint8Array[] = [];
27
- for await (const chunk of original.Body as AsyncIterable<Uint8Array>) {
28
- chunks.push(chunk);
29
- }
30
- const buffer = Buffer.concat(chunks);
31
-
32
- // Process with Sharp (must be installed as ARM64 binary)
33
- // dynamic import to avoid bundling issues
34
- const sharp = (await import("sharp")).default;
35
- const hash = key.split("/").pop()!.replace(/\.[^.]+$/, "");
36
-
37
- // Thumbnail: 300px WebP
38
- const thumbnail = await sharp(buffer)
39
- .resize(300, 300, { fit: "cover" })
40
- .webp({ quality: 80 })
41
- .toBuffer();
42
-
43
- // Optimized: 1200px WebP
44
- const optimized = await sharp(buffer)
45
- .resize(1200, 1200, { fit: "inside", withoutEnlargement: true })
46
- .webp({ quality: 85 })
47
- .toBuffer();
48
-
49
- await Promise.all([
50
- s3.send(new PutObjectCommand({
51
- Bucket: bucket, Key: `thumbnails/${hash}.webp`,
52
- Body: thumbnail, ContentType: "image/webp",
53
- })),
54
- s3.send(new PutObjectCommand({
55
- Bucket: bucket, Key: `optimized/${hash}.webp`,
56
- Body: optimized, ContentType: "image/webp",
57
- })),
58
- ]);
59
-
60
- logger.info("Media processed", { key, hash });
61
- }
503
+ outcome = await processRecord(record, deps);
62
504
  } catch (err) {
63
- logger.error("Media processing failed", { error: err, messageId: record.messageId });
64
- failedIds.push(record.messageId);
505
+ // Defensive: processRecord is designed not to throw, but if it does, treat
506
+ // it as retryable (fail closed for retry; DLQ + alert is the backstop).
507
+ logger.error("Unexpected throw from processRecord — retrying record", {
508
+ messageId: record.messageId,
509
+ error: err,
510
+ });
511
+ batchItemFailures.push({ itemIdentifier: record.messageId });
512
+ continue;
513
+ }
514
+ if (outcome.disposition === "fail") {
515
+ batchItemFailures.push({ itemIdentifier: record.messageId });
65
516
  }
66
517
  }
67
518
 
68
- if (failedIds.length > 0) {
69
- return { batchItemFailures: failedIds.map((id) => ({ itemIdentifier: id })) };
70
- }
519
+ return { batchItemFailures };
71
520
  };