@mux/ai 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,26 +1,10 @@
1
- import pRetry, { AbortError } from 'p-retry';
2
- import Mux from '@mux/mux-node';
3
- import OpenAI from 'openai';
4
- import Anthropic from '@anthropic-ai/sdk';
5
- import { z } from 'zod';
6
- import { zodTextFormat } from 'openai/helpers/zod';
7
- import { S3Client, GetObjectCommand } from '@aws-sdk/client-s3';
8
- import { Upload } from '@aws-sdk/lib-storage';
9
- import { getSignedUrl } from '@aws-sdk/s3-request-presigner';
10
-
1
+ "use strict";
2
+ var __create = Object.create;
11
3
  var __defProp = Object.defineProperty;
12
4
  var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
13
5
  var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
14
7
  var __hasOwnProp = Object.prototype.hasOwnProperty;
15
- var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
16
- get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
17
- }) : x)(function(x) {
18
- if (typeof require !== "undefined") return require.apply(this, arguments);
19
- throw Error('Dynamic require of "' + x + '" is not supported');
20
- });
21
- var __esm = (fn, res) => function __init() {
22
- return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
23
- };
24
8
  var __export = (target, all) => {
25
9
  for (var name in all)
26
10
  __defProp(target, name, { get: all[name], enumerable: true });
@@ -33,19 +17,702 @@ var __copyProps = (to, from, except, desc) => {
33
17
  }
34
18
  return to;
35
19
  };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
36
28
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
37
29
 
38
- // src/utils/image-download.ts
39
- var image_download_exports = {};
40
- __export(image_download_exports, {
41
- downloadImageAsBase64: () => downloadImageAsBase64,
42
- downloadImagesAsBase64: () => downloadImagesAsBase64,
43
- uploadImageToAnthropicFiles: () => uploadImageToAnthropicFiles
30
+ // src/index.ts
31
+ var index_exports = {};
32
+ __export(index_exports, {
33
+ primitives: () => primitives_exports,
34
+ version: () => version,
35
+ workflows: () => workflows_exports
36
+ });
37
+ module.exports = __toCommonJS(index_exports);
38
+
39
+ // src/primitives/index.ts
40
+ var primitives_exports = {};
41
+ __export(primitives_exports, {
42
+ DEFAULT_STORYBOARD_WIDTH: () => DEFAULT_STORYBOARD_WIDTH,
43
+ buildTranscriptUrl: () => buildTranscriptUrl,
44
+ chunkByTokens: () => chunkByTokens,
45
+ chunkText: () => chunkText,
46
+ chunkVTTCues: () => chunkVTTCues,
47
+ estimateTokenCount: () => estimateTokenCount,
48
+ extractTextFromVTT: () => extractTextFromVTT,
49
+ extractTimestampedTranscript: () => extractTimestampedTranscript,
50
+ fetchTranscriptForAsset: () => fetchTranscriptForAsset,
51
+ findCaptionTrack: () => findCaptionTrack,
52
+ getReadyTextTracks: () => getReadyTextTracks,
53
+ getStoryboardUrl: () => getStoryboardUrl,
54
+ getThumbnailUrls: () => getThumbnailUrls,
55
+ parseVTTCues: () => parseVTTCues,
56
+ vttTimestampToSeconds: () => vttTimestampToSeconds
57
+ });
58
+
59
+ // src/lib/url-signing.ts
60
+ var import_mux_node = __toESM(require("@mux/mux-node"));
61
+
62
+ // src/env.ts
63
+ var import_node_path = __toESM(require("path"));
64
+ var import_dotenv = require("dotenv");
65
+ var import_dotenv_expand = require("dotenv-expand");
66
+ var import_zod = require("zod");
67
+ (0, import_dotenv_expand.expand)((0, import_dotenv.config)({
68
+ path: import_node_path.default.resolve(
69
+ process.cwd(),
70
+ process.env.NODE_ENV === "test" ? ".env.test" : ".env"
71
+ )
72
+ }));
73
+ function optionalString(description, message) {
74
+ return import_zod.z.preprocess(
75
+ (value) => typeof value === "string" && value.trim().length === 0 ? void 0 : value,
76
+ import_zod.z.string().trim().min(1, message).optional()
77
+ ).describe(description);
78
+ }
79
+ function requiredString(description, message) {
80
+ return import_zod.z.preprocess(
81
+ (value) => typeof value === "string" ? value.trim().length > 0 ? value.trim() : void 0 : value,
82
+ import_zod.z.string().trim().min(1, message)
83
+ ).describe(description);
84
+ }
85
+ var EnvSchema = import_zod.z.object({
86
+ NODE_ENV: import_zod.z.string().default("development").describe("Runtime environment."),
87
+ MUX_TOKEN_ID: requiredString("Mux access token ID.", "Required to access Mux APIs"),
88
+ MUX_TOKEN_SECRET: requiredString("Mux access token secret.", "Required to access Mux APIs"),
89
+ MUX_SIGNING_KEY: optionalString("Mux signing key ID for signed playback URLs.", "Used to sign playback URLs"),
90
+ MUX_PRIVATE_KEY: optionalString("Mux signing private key for signed playback URLs.", "Used to sign playback URLs"),
91
+ OPENAI_API_KEY: optionalString("OpenAI API key for OpenAI-backed workflows.", "OpenAI API key"),
92
+ ANTHROPIC_API_KEY: optionalString("Anthropic API key for Claude-backed workflows.", "Anthropic API key"),
93
+ GOOGLE_GENERATIVE_AI_API_KEY: optionalString("Google Generative AI API key for Gemini-backed workflows.", "Google Generative AI API key"),
94
+ ELEVENLABS_API_KEY: optionalString("ElevenLabs API key for audio translation.", "ElevenLabs API key"),
95
+ HIVE_API_KEY: optionalString("Hive Visual Moderation API key.", "Hive API key"),
96
+ S3_ENDPOINT: optionalString("S3-compatible endpoint for uploads.", "S3 endpoint"),
97
+ S3_REGION: optionalString("S3 region (defaults to 'auto' when omitted)."),
98
+ S3_BUCKET: optionalString("Bucket used for caption and audio uploads.", "S3 bucket"),
99
+ S3_ACCESS_KEY_ID: optionalString("Access key ID for S3-compatible uploads.", "S3 access key id"),
100
+ S3_SECRET_ACCESS_KEY: optionalString("Secret access key for S3-compatible uploads.", "S3 secret access key")
101
+ });
102
+ function parseEnv() {
103
+ const parsedEnv = EnvSchema.safeParse(process.env);
104
+ if (!parsedEnv.success) {
105
+ console.error("\u274C Invalid env:");
106
+ console.error(JSON.stringify(parsedEnv.error.flatten().fieldErrors, null, 2));
107
+ process.exit(1);
108
+ }
109
+ return parsedEnv.data;
110
+ }
111
+ var env = parseEnv();
112
+ var env_default = env;
113
+
114
+ // src/lib/url-signing.ts
115
+ function resolveSigningContext(config2) {
116
+ const keyId = config2.muxSigningKey ?? env_default.MUX_SIGNING_KEY;
117
+ const keySecret = config2.muxPrivateKey ?? env_default.MUX_PRIVATE_KEY;
118
+ if (!keyId || !keySecret) {
119
+ return void 0;
120
+ }
121
+ return { keyId, keySecret };
122
+ }
123
+ function createSigningClient(context) {
124
+ return new import_mux_node.default({
125
+ // These are not needed for signing, but the SDK requires them
126
+ // Using empty strings as we only need the jwt functionality
127
+ tokenId: env_default.MUX_TOKEN_ID || "",
128
+ tokenSecret: env_default.MUX_TOKEN_SECRET || "",
129
+ jwtSigningKey: context.keyId,
130
+ jwtPrivateKey: context.keySecret
131
+ });
132
+ }
133
+ async function signPlaybackId(playbackId, context, type = "video", params) {
134
+ const client = createSigningClient(context);
135
+ const stringParams = params ? Object.fromEntries(
136
+ Object.entries(params).map(([key, value]) => [key, String(value)])
137
+ ) : void 0;
138
+ return client.jwt.signPlaybackId(playbackId, {
139
+ type,
140
+ expiration: context.expiration || "1h",
141
+ params: stringParams
142
+ });
143
+ }
144
+ async function signUrl(url, playbackId, context, type = "video", params) {
145
+ const token = await signPlaybackId(playbackId, context, type, params);
146
+ const separator = url.includes("?") ? "&" : "?";
147
+ return `${url}${separator}token=${token}`;
148
+ }
149
+
150
+ // src/primitives/storyboards.ts
151
+ var DEFAULT_STORYBOARD_WIDTH = 640;
152
+ async function getStoryboardUrl(playbackId, width = DEFAULT_STORYBOARD_WIDTH, signingContext) {
153
+ const baseUrl = `https://image.mux.com/${playbackId}/storyboard.png`;
154
+ if (signingContext) {
155
+ return signUrl(baseUrl, playbackId, signingContext, "storyboard", { width });
156
+ }
157
+ return `${baseUrl}?width=${width}`;
158
+ }
159
+
160
+ // src/primitives/text-chunking.ts
161
+ function estimateTokenCount(text) {
162
+ const words = text.trim().split(/\s+/).length;
163
+ return Math.ceil(words / 0.75);
164
+ }
165
+ function chunkByTokens(text, maxTokens, overlapTokens = 0) {
166
+ if (!text.trim()) {
167
+ return [];
168
+ }
169
+ const chunks = [];
170
+ const words = text.trim().split(/\s+/);
171
+ const wordsPerChunk = Math.floor(maxTokens * 0.75);
172
+ const overlapWords = Math.floor(overlapTokens * 0.75);
173
+ let chunkIndex = 0;
174
+ let currentPosition = 0;
175
+ while (currentPosition < words.length) {
176
+ const chunkWords = words.slice(
177
+ currentPosition,
178
+ currentPosition + wordsPerChunk
179
+ );
180
+ const chunkText2 = chunkWords.join(" ");
181
+ const tokenCount = estimateTokenCount(chunkText2);
182
+ chunks.push({
183
+ id: `chunk-${chunkIndex}`,
184
+ text: chunkText2,
185
+ tokenCount
186
+ });
187
+ currentPosition += wordsPerChunk - overlapWords;
188
+ chunkIndex++;
189
+ if (currentPosition <= (chunkIndex - 1) * (wordsPerChunk - overlapWords)) {
190
+ break;
191
+ }
192
+ }
193
+ return chunks;
194
+ }
195
+ function createChunkFromCues(cues, index) {
196
+ const text = cues.map((c) => c.text).join(" ");
197
+ return {
198
+ id: `chunk-${index}`,
199
+ text,
200
+ tokenCount: estimateTokenCount(text),
201
+ startTime: cues[0].startTime,
202
+ endTime: cues[cues.length - 1].endTime
203
+ };
204
+ }
205
+ function chunkVTTCues(cues, maxTokens, overlapCues = 2) {
206
+ if (cues.length === 0)
207
+ return [];
208
+ const chunks = [];
209
+ let currentCues = [];
210
+ let currentTokens = 0;
211
+ let chunkIndex = 0;
212
+ for (let i = 0; i < cues.length; i++) {
213
+ const cue = cues[i];
214
+ const cueTokens = estimateTokenCount(cue.text);
215
+ if (currentTokens + cueTokens > maxTokens && currentCues.length > 0) {
216
+ chunks.push(createChunkFromCues(currentCues, chunkIndex));
217
+ chunkIndex++;
218
+ const overlapStart = Math.max(0, currentCues.length - overlapCues);
219
+ currentCues = currentCues.slice(overlapStart);
220
+ currentTokens = currentCues.reduce(
221
+ (sum, c) => sum + estimateTokenCount(c.text),
222
+ 0
223
+ );
224
+ }
225
+ currentCues.push(cue);
226
+ currentTokens += cueTokens;
227
+ }
228
+ if (currentCues.length > 0) {
229
+ chunks.push(createChunkFromCues(currentCues, chunkIndex));
230
+ }
231
+ return chunks;
232
+ }
233
+ function chunkText(text, strategy) {
234
+ switch (strategy.type) {
235
+ case "token": {
236
+ return chunkByTokens(text, strategy.maxTokens, strategy.overlap ?? 0);
237
+ }
238
+ default: {
239
+ const exhaustiveCheck = strategy;
240
+ throw new Error(`Unsupported chunking strategy: ${exhaustiveCheck}`);
241
+ }
242
+ }
243
+ }
244
+
245
+ // src/primitives/thumbnails.ts
246
+ async function getThumbnailUrls(playbackId, duration, options = {}) {
247
+ const { interval = 10, width = 640, signingContext } = options;
248
+ const timestamps = [];
249
+ if (duration <= 50) {
250
+ const spacing = duration / 6;
251
+ for (let i = 1; i <= 5; i++) {
252
+ timestamps.push(Math.round(i * spacing));
253
+ }
254
+ } else {
255
+ for (let time = 0; time < duration; time += interval) {
256
+ timestamps.push(time);
257
+ }
258
+ }
259
+ const baseUrl = `https://image.mux.com/${playbackId}/thumbnail.png`;
260
+ const urlPromises = timestamps.map(async (time) => {
261
+ if (signingContext) {
262
+ return signUrl(baseUrl, playbackId, signingContext, "thumbnail", { time, width });
263
+ }
264
+ return `${baseUrl}?time=${time}&width=${width}`;
265
+ });
266
+ return Promise.all(urlPromises);
267
+ }
268
+
269
+ // src/primitives/transcripts.ts
270
+ function getReadyTextTracks(asset) {
271
+ return (asset.tracks || []).filter(
272
+ (track) => track.type === "text" && track.status === "ready"
273
+ );
274
+ }
275
+ function findCaptionTrack(asset, languageCode) {
276
+ const tracks = getReadyTextTracks(asset);
277
+ if (!tracks.length)
278
+ return void 0;
279
+ if (!languageCode) {
280
+ return tracks[0];
281
+ }
282
+ return tracks.find(
283
+ (track) => track.text_type === "subtitles" && track.language_code === languageCode
284
+ );
285
+ }
286
+ function extractTextFromVTT(vttContent) {
287
+ if (!vttContent.trim()) {
288
+ return "";
289
+ }
290
+ const lines = vttContent.split("\n");
291
+ const textLines = [];
292
+ for (let i = 0; i < lines.length; i++) {
293
+ const line = lines[i].trim();
294
+ if (!line)
295
+ continue;
296
+ if (line === "WEBVTT")
297
+ continue;
298
+ if (line.startsWith("NOTE "))
299
+ continue;
300
+ if (line.includes("-->"))
301
+ continue;
302
+ if (/^[\w-]+$/.test(line) && !line.includes(" "))
303
+ continue;
304
+ if (line.startsWith("STYLE") || line.startsWith("REGION"))
305
+ continue;
306
+ const cleanLine = line.replace(/<[^>]*>/g, "").trim();
307
+ if (cleanLine) {
308
+ textLines.push(cleanLine);
309
+ }
310
+ }
311
+ return textLines.join(" ").replace(/\s+/g, " ").trim();
312
+ }
313
+ function vttTimestampToSeconds(timestamp) {
314
+ const parts = timestamp.split(":");
315
+ if (parts.length !== 3)
316
+ return 0;
317
+ const hours = Number.parseInt(parts[0], 10) || 0;
318
+ const minutes = Number.parseInt(parts[1], 10) || 0;
319
+ const seconds = Number.parseFloat(parts[2]) || 0;
320
+ return hours * 3600 + minutes * 60 + seconds;
321
+ }
322
+ function extractTimestampedTranscript(vttContent) {
323
+ if (!vttContent.trim()) {
324
+ return "";
325
+ }
326
+ const lines = vttContent.split("\n");
327
+ const segments = [];
328
+ for (let i = 0; i < lines.length; i++) {
329
+ const line = lines[i].trim();
330
+ if (line.includes("-->")) {
331
+ const startTime = line.split(" --> ")[0].trim();
332
+ const timeInSeconds = vttTimestampToSeconds(startTime);
333
+ let j = i + 1;
334
+ while (j < lines.length && !lines[j].trim()) {
335
+ j++;
336
+ }
337
+ if (j < lines.length) {
338
+ const text = lines[j].trim().replace(/<[^>]*>/g, "");
339
+ if (text) {
340
+ segments.push({ time: timeInSeconds, text });
341
+ }
342
+ }
343
+ }
344
+ }
345
+ return segments.map((segment) => `[${Math.floor(segment.time)}s] ${segment.text}`).join("\n");
346
+ }
347
+ function parseVTTCues(vttContent) {
348
+ if (!vttContent.trim())
349
+ return [];
350
+ const lines = vttContent.split("\n");
351
+ const cues = [];
352
+ for (let i = 0; i < lines.length; i++) {
353
+ const line = lines[i].trim();
354
+ if (line.includes("-->")) {
355
+ const [startStr, endStr] = line.split(" --> ").map((s) => s.trim());
356
+ const startTime = vttTimestampToSeconds(startStr);
357
+ const endTime = vttTimestampToSeconds(endStr.split(" ")[0]);
358
+ const textLines = [];
359
+ let j = i + 1;
360
+ while (j < lines.length && lines[j].trim() && !lines[j].includes("-->")) {
361
+ const cleanLine = lines[j].trim().replace(/<[^>]*>/g, "");
362
+ if (cleanLine)
363
+ textLines.push(cleanLine);
364
+ j++;
365
+ }
366
+ if (textLines.length > 0) {
367
+ cues.push({
368
+ startTime,
369
+ endTime,
370
+ text: textLines.join(" ")
371
+ });
372
+ }
373
+ }
374
+ }
375
+ return cues;
376
+ }
377
+ async function buildTranscriptUrl(playbackId, trackId, signingContext) {
378
+ const baseUrl = `https://stream.mux.com/${playbackId}/text/${trackId}.vtt`;
379
+ if (signingContext) {
380
+ return signUrl(baseUrl, playbackId, signingContext, "video");
381
+ }
382
+ return baseUrl;
383
+ }
384
+ async function fetchTranscriptForAsset(asset, playbackId, options = {}) {
385
+ const { languageCode, cleanTranscript = true, signingContext } = options;
386
+ const track = findCaptionTrack(asset, languageCode);
387
+ if (!track) {
388
+ return { transcriptText: "" };
389
+ }
390
+ if (!track.id) {
391
+ return { transcriptText: "", track };
392
+ }
393
+ const transcriptUrl = await buildTranscriptUrl(playbackId, track.id, signingContext);
394
+ try {
395
+ const response = await fetch(transcriptUrl);
396
+ if (!response.ok) {
397
+ return { transcriptText: "", transcriptUrl, track };
398
+ }
399
+ const rawVtt = await response.text();
400
+ const transcriptText = cleanTranscript ? extractTextFromVTT(rawVtt) : rawVtt;
401
+ return { transcriptText, transcriptUrl, track };
402
+ } catch (error) {
403
+ console.warn("Failed to fetch transcript:", error);
404
+ return { transcriptText: "", transcriptUrl, track };
405
+ }
406
+ }
407
+
408
+ // src/workflows/index.ts
409
+ var workflows_exports = {};
410
+ __export(workflows_exports, {
411
+ SUMMARY_KEYWORD_LIMIT: () => SUMMARY_KEYWORD_LIMIT,
412
+ burnedInCaptionsSchema: () => burnedInCaptionsSchema,
413
+ chapterSchema: () => chapterSchema,
414
+ chaptersSchema: () => chaptersSchema,
415
+ generateChapters: () => generateChapters,
416
+ generateVideoEmbeddings: () => generateVideoEmbeddings,
417
+ getModerationScores: () => getModerationScores,
418
+ getSummaryAndTags: () => getSummaryAndTags,
419
+ hasBurnedInCaptions: () => hasBurnedInCaptions,
420
+ summarySchema: () => summarySchema,
421
+ translateAudio: () => translateAudio,
422
+ translateCaptions: () => translateCaptions,
423
+ translationSchema: () => translationSchema
44
424
  });
425
+
426
+ // src/workflows/burned-in-captions.ts
427
+ var import_ai = require("ai");
428
+
429
+ // node_modules/dedent/dist/dedent.mjs
430
+ function ownKeys(object, enumerableOnly) {
431
+ var keys = Object.keys(object);
432
+ if (Object.getOwnPropertySymbols) {
433
+ var symbols = Object.getOwnPropertySymbols(object);
434
+ enumerableOnly && (symbols = symbols.filter(function(sym) {
435
+ return Object.getOwnPropertyDescriptor(object, sym).enumerable;
436
+ })), keys.push.apply(keys, symbols);
437
+ }
438
+ return keys;
439
+ }
440
+ function _objectSpread(target) {
441
+ for (var i = 1; i < arguments.length; i++) {
442
+ var source = null != arguments[i] ? arguments[i] : {};
443
+ i % 2 ? ownKeys(Object(source), true).forEach(function(key) {
444
+ _defineProperty(target, key, source[key]);
445
+ }) : Object.getOwnPropertyDescriptors ? Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)) : ownKeys(Object(source)).forEach(function(key) {
446
+ Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key));
447
+ });
448
+ }
449
+ return target;
450
+ }
451
+ function _defineProperty(obj, key, value) {
452
+ key = _toPropertyKey(key);
453
+ if (key in obj) {
454
+ Object.defineProperty(obj, key, { value, enumerable: true, configurable: true, writable: true });
455
+ } else {
456
+ obj[key] = value;
457
+ }
458
+ return obj;
459
+ }
460
+ function _toPropertyKey(arg) {
461
+ var key = _toPrimitive(arg, "string");
462
+ return typeof key === "symbol" ? key : String(key);
463
+ }
464
+ function _toPrimitive(input, hint) {
465
+ if (typeof input !== "object" || input === null) return input;
466
+ var prim = input[Symbol.toPrimitive];
467
+ if (prim !== void 0) {
468
+ var res = prim.call(input, hint || "default");
469
+ if (typeof res !== "object") return res;
470
+ throw new TypeError("@@toPrimitive must return a primitive value.");
471
+ }
472
+ return (hint === "string" ? String : Number)(input);
473
+ }
474
+ var dedent = createDedent({});
475
+ var dedent_default = dedent;
476
+ function createDedent(options) {
477
+ dedent2.withOptions = (newOptions) => createDedent(_objectSpread(_objectSpread({}, options), newOptions));
478
+ return dedent2;
479
+ function dedent2(strings, ...values) {
480
+ const raw = typeof strings === "string" ? [strings] : strings.raw;
481
+ const {
482
+ alignValues = false,
483
+ escapeSpecialCharacters = Array.isArray(strings),
484
+ trimWhitespace = true
485
+ } = options;
486
+ let result = "";
487
+ for (let i = 0; i < raw.length; i++) {
488
+ let next = raw[i];
489
+ if (escapeSpecialCharacters) {
490
+ next = next.replace(/\\\n[ \t]*/g, "").replace(/\\`/g, "`").replace(/\\\$/g, "$").replace(/\\\{/g, "{");
491
+ }
492
+ result += next;
493
+ if (i < values.length) {
494
+ const value = alignValues ? alignValue(values[i], result) : values[i];
495
+ result += value;
496
+ }
497
+ }
498
+ const lines = result.split("\n");
499
+ let mindent = null;
500
+ for (const l of lines) {
501
+ const m = l.match(/^(\s+)\S+/);
502
+ if (m) {
503
+ const indent = m[1].length;
504
+ if (!mindent) {
505
+ mindent = indent;
506
+ } else {
507
+ mindent = Math.min(mindent, indent);
508
+ }
509
+ }
510
+ }
511
+ if (mindent !== null) {
512
+ const m = mindent;
513
+ result = lines.map((l) => l[0] === " " || l[0] === " " ? l.slice(m) : l).join("\n");
514
+ }
515
+ if (trimWhitespace) {
516
+ result = result.trim();
517
+ }
518
+ if (escapeSpecialCharacters) {
519
+ result = result.replace(/\\n/g, "\n");
520
+ }
521
+ return result;
522
+ }
523
+ }
524
+ function alignValue(value, precedingText) {
525
+ if (typeof value !== "string" || !value.includes("\n")) {
526
+ return value;
527
+ }
528
+ const currentLine = precedingText.slice(precedingText.lastIndexOf("\n") + 1);
529
+ const indentMatch = currentLine.match(/^(\s+)/);
530
+ if (indentMatch) {
531
+ const indent = indentMatch[1];
532
+ return value.replace(/\n/g, `
533
+ ${indent}`);
534
+ }
535
+ return value;
536
+ }
537
+
538
+ // src/workflows/burned-in-captions.ts
539
+ var import_zod2 = require("zod");
540
+
541
+ // src/lib/client-factory.ts
542
+ var import_mux_node2 = __toESM(require("@mux/mux-node"));
543
+
544
+ // src/lib/providers.ts
545
+ var import_anthropic = require("@ai-sdk/anthropic");
546
+ var import_google = require("@ai-sdk/google");
547
+ var import_openai = require("@ai-sdk/openai");
548
+ var DEFAULT_LANGUAGE_MODELS = {
549
+ openai: "gpt-5-mini",
550
+ anthropic: "claude-haiku-4-5",
551
+ google: "gemini-2.5-flash"
552
+ };
553
+ var DEFAULT_EMBEDDING_MODELS = {
554
+ openai: "text-embedding-3-small",
555
+ google: "gemini-embedding-001"
556
+ };
557
+ function requireEnv(value, name) {
558
+ if (!value) {
559
+ throw new Error(`Missing ${name}. Set ${name} in your environment or pass it in options.`);
560
+ }
561
+ return value;
562
+ }
563
+ function resolveLanguageModel(options = {}) {
564
+ const provider = options.provider || "openai";
565
+ const modelId = options.model || DEFAULT_LANGUAGE_MODELS[provider];
566
+ switch (provider) {
567
+ case "openai": {
568
+ const apiKey = options.openaiApiKey ?? env_default.OPENAI_API_KEY;
569
+ requireEnv(apiKey, "OPENAI_API_KEY");
570
+ const openai = (0, import_openai.createOpenAI)({
571
+ apiKey
572
+ });
573
+ return {
574
+ provider,
575
+ modelId,
576
+ model: openai(modelId)
577
+ };
578
+ }
579
+ case "anthropic": {
580
+ const apiKey = options.anthropicApiKey ?? env_default.ANTHROPIC_API_KEY;
581
+ requireEnv(apiKey, "ANTHROPIC_API_KEY");
582
+ const anthropic = (0, import_anthropic.createAnthropic)({
583
+ apiKey
584
+ });
585
+ return {
586
+ provider,
587
+ modelId,
588
+ model: anthropic(modelId)
589
+ };
590
+ }
591
+ case "google": {
592
+ const apiKey = options.googleApiKey ?? env_default.GOOGLE_GENERATIVE_AI_API_KEY;
593
+ requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
594
+ const google = (0, import_google.createGoogleGenerativeAI)({
595
+ apiKey
596
+ });
597
+ return {
598
+ provider,
599
+ modelId,
600
+ model: google(modelId)
601
+ };
602
+ }
603
+ default: {
604
+ const exhaustiveCheck = provider;
605
+ throw new Error(`Unsupported provider: ${exhaustiveCheck}`);
606
+ }
607
+ }
608
+ }
609
+ function resolveEmbeddingModel(options = {}) {
610
+ const provider = options.provider || "openai";
611
+ const modelId = options.model || DEFAULT_EMBEDDING_MODELS[provider];
612
+ switch (provider) {
613
+ case "openai": {
614
+ const apiKey = options.openaiApiKey ?? env_default.OPENAI_API_KEY;
615
+ requireEnv(apiKey, "OPENAI_API_KEY");
616
+ const openai = (0, import_openai.createOpenAI)({
617
+ apiKey
618
+ });
619
+ return {
620
+ provider,
621
+ modelId,
622
+ model: openai.embedding(modelId)
623
+ };
624
+ }
625
+ case "google": {
626
+ const apiKey = options.googleApiKey ?? env_default.GOOGLE_GENERATIVE_AI_API_KEY;
627
+ requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
628
+ const google = (0, import_google.createGoogleGenerativeAI)({
629
+ apiKey
630
+ });
631
+ return {
632
+ provider,
633
+ modelId,
634
+ model: google.textEmbeddingModel(modelId)
635
+ };
636
+ }
637
+ default: {
638
+ const exhaustiveCheck = provider;
639
+ throw new Error(`Unsupported embedding provider: ${exhaustiveCheck}`);
640
+ }
641
+ }
642
+ }
643
+
644
+ // src/lib/client-factory.ts
645
+ function validateCredentials(options, requiredProvider) {
646
+ const muxTokenId = options.muxTokenId ?? env_default.MUX_TOKEN_ID;
647
+ const muxTokenSecret = options.muxTokenSecret ?? env_default.MUX_TOKEN_SECRET;
648
+ const openaiApiKey = options.openaiApiKey ?? env_default.OPENAI_API_KEY;
649
+ const anthropicApiKey = options.anthropicApiKey ?? env_default.ANTHROPIC_API_KEY;
650
+ const googleApiKey = options.googleApiKey ?? env_default.GOOGLE_GENERATIVE_AI_API_KEY;
651
+ if (!muxTokenId || !muxTokenSecret) {
652
+ throw new Error(
653
+ "Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables."
654
+ );
655
+ }
656
+ if (requiredProvider === "openai" && !openaiApiKey) {
657
+ throw new Error(
658
+ "OpenAI API key is required. Provide openaiApiKey in options or set OPENAI_API_KEY environment variable."
659
+ );
660
+ }
661
+ if (requiredProvider === "anthropic" && !anthropicApiKey) {
662
+ throw new Error(
663
+ "Anthropic API key is required. Provide anthropicApiKey in options or set ANTHROPIC_API_KEY environment variable."
664
+ );
665
+ }
666
+ if (requiredProvider === "google" && !googleApiKey) {
667
+ throw new Error(
668
+ "Google Generative AI API key is required. Provide googleApiKey in options or set GOOGLE_GENERATIVE_AI_API_KEY environment variable."
669
+ );
670
+ }
671
+ return {
672
+ muxTokenId,
673
+ muxTokenSecret,
674
+ openaiApiKey,
675
+ anthropicApiKey,
676
+ googleApiKey
677
+ };
678
+ }
679
+ function createMuxClient(credentials) {
680
+ if (!credentials.muxTokenId || !credentials.muxTokenSecret) {
681
+ throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
682
+ }
683
+ return new import_mux_node2.default({
684
+ tokenId: credentials.muxTokenId,
685
+ tokenSecret: credentials.muxTokenSecret
686
+ });
687
+ }
688
+ function createWorkflowClients(options, provider) {
689
+ const providerToUse = provider || options.provider || "openai";
690
+ const credentials = validateCredentials(options, providerToUse);
691
+ const languageModel = resolveLanguageModel({
692
+ ...options,
693
+ provider: providerToUse
694
+ });
695
+ return {
696
+ mux: createMuxClient(credentials),
697
+ languageModel,
698
+ credentials
699
+ };
700
+ }
701
+
702
+ // src/lib/image-download.ts
703
+ var import_node_buffer = require("buffer");
704
+ var import_p_retry = __toESM(require("p-retry"));
705
+ var DEFAULT_OPTIONS = {
706
+ timeout: 1e4,
707
+ retries: 3,
708
+ retryDelay: 1e3,
709
+ maxRetryDelay: 1e4,
710
+ exponentialBackoff: true
711
+ };
45
712
  async function downloadImageAsBase64(url, options = {}) {
46
713
  const opts = { ...DEFAULT_OPTIONS, ...options };
47
714
  let attemptCount = 0;
48
- return pRetry(
715
+ return (0, import_p_retry.default)(
49
716
  async () => {
50
717
  attemptCount++;
51
718
  const controller = new AbortController();
@@ -60,18 +727,18 @@ async function downloadImageAsBase64(url, options = {}) {
60
727
  clearTimeout(timeoutId);
61
728
  if (!response.ok) {
62
729
  if (response.status >= 400 && response.status < 500 && response.status !== 429) {
63
- throw new AbortError(`HTTP ${response.status}: ${response.statusText}`);
730
+ throw new import_p_retry.AbortError(`HTTP ${response.status}: ${response.statusText}`);
64
731
  }
65
732
  throw new Error(`HTTP ${response.status}: ${response.statusText}`);
66
733
  }
67
734
  const contentType = response.headers.get("content-type");
68
735
  if (!contentType?.startsWith("image/")) {
69
- throw new AbortError(`Invalid content type: ${contentType}. Expected image/*`);
736
+ throw new import_p_retry.AbortError(`Invalid content type: ${contentType}. Expected image/*`);
70
737
  }
71
738
  const arrayBuffer = await response.arrayBuffer();
72
- const buffer = Buffer.from(arrayBuffer);
739
+ const buffer = import_node_buffer.Buffer.from(arrayBuffer);
73
740
  if (buffer.length === 0) {
74
- throw new AbortError("Downloaded image is empty");
741
+ throw new import_p_retry.AbortError("Downloaded image is empty");
75
742
  }
76
743
  const base64Data = `data:${contentType};base64,${buffer.toString("base64")}`;
77
744
  return {
@@ -84,7 +751,7 @@ async function downloadImageAsBase64(url, options = {}) {
84
751
  };
85
752
  } catch (error) {
86
753
  clearTimeout(timeoutId);
87
- if (error instanceof AbortError) {
754
+ if (error instanceof import_p_retry.AbortError) {
88
755
  throw error;
89
756
  }
90
757
  if (error instanceof Error) {
@@ -122,68 +789,548 @@ async function downloadImagesAsBase64(urls, options = {}, maxConcurrent = 5) {
122
789
  }
123
790
  return results;
124
791
  }
125
- async function uploadImageToAnthropicFiles(url, anthropicApiKey, options = {}) {
126
- const downloadResult = await downloadImageAsBase64(url, options);
127
- const formData = new FormData();
128
- const imageBlob = new Blob([downloadResult.buffer], {
129
- type: downloadResult.contentType
130
- });
131
- const extension = downloadResult.contentType.split("/")[1] || "png";
132
- formData.append("file", imageBlob, `image.${extension}`);
133
- const response = await fetch("https://api.anthropic.com/v1/files", {
134
- method: "POST",
135
- headers: {
136
- "x-api-key": anthropicApiKey,
137
- "anthropic-version": "2023-06-01",
138
- "anthropic-beta": "files-api-2025-04-14"
139
- // Don't set Content-Type header - let fetch set it with boundary for multipart
140
- },
141
- body: formData
142
- });
143
- if (!response.ok) {
144
- const errorText = await response.text();
145
- throw new Error(`Anthropic Files API error: ${response.status} ${response.statusText} - ${errorText}`);
792
+
793
+ // src/lib/mux-assets.ts
794
+ function getPlaybackId(asset) {
795
+ const playbackIds = asset.playback_ids || [];
796
+ const publicPlaybackId = playbackIds.find((pid) => pid.policy === "public");
797
+ if (publicPlaybackId?.id) {
798
+ return { id: publicPlaybackId.id, policy: "public" };
799
+ }
800
+ const signedPlaybackId = playbackIds.find((pid) => pid.policy === "signed");
801
+ if (signedPlaybackId?.id) {
802
+ return { id: signedPlaybackId.id, policy: "signed" };
803
+ }
804
+ throw new Error(
805
+ "No public or signed playback ID found for this asset. A public or signed playback ID is required. DRM playback IDs are not currently supported."
806
+ );
807
+ }
808
+ async function getPlaybackIdForAsset(mux, assetId) {
809
+ const asset = await mux.video.assets.retrieve(assetId);
810
+ const { id: playbackId, policy } = getPlaybackId(asset);
811
+ return { asset, playbackId, policy };
812
+ }
813
+
814
+ // src/lib/prompt-builder.ts
815
+ function renderSection(section) {
816
+ const { tag, content, attributes } = section;
817
+ const XML_NAME_PATTERN = /^[A-Z_][\w.:-]*$/i;
818
+ const assertValidXmlName = (name, context) => {
819
+ if (!XML_NAME_PATTERN.test(name)) {
820
+ throw new Error(`Invalid XML ${context} name: "${name}"`);
821
+ }
822
+ };
823
+ const escapeXmlText = (value) => value.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;/");
824
+ const escapeXmlAttribute = (value) => escapeXmlText(value).replace(/"/g, "&quot;");
825
+ if (!content.trim()) {
826
+ return "";
827
+ }
828
+ assertValidXmlName(tag, "tag");
829
+ const attrString = attributes ? ` ${Object.entries(attributes).map(([key, value]) => {
830
+ assertValidXmlName(key, "attribute");
831
+ return `${key}="${escapeXmlAttribute(value)}"`;
832
+ }).join(" ")}` : "";
833
+ const safeContent = escapeXmlText(content.trim());
834
+ return `<${tag}${attrString}>
835
+ ${safeContent}
836
+ </${tag}>`;
837
+ }
838
+ function resolveSection(defaultSection, override) {
839
+ if (override === void 0) {
840
+ return defaultSection;
841
+ }
842
+ if (typeof override === "string") {
843
+ return { ...defaultSection, content: override };
146
844
  }
147
- const fileResult = await response.json();
845
+ return override;
846
+ }
847
+ function createPromptBuilder(config2) {
848
+ const { template, sectionOrder } = config2;
849
+ const getSection = (section, override) => {
850
+ const resolved = resolveSection(template[section], override);
851
+ return renderSection(resolved);
852
+ };
853
+ const build = (overrides) => {
854
+ const sections = sectionOrder.map((sectionKey) => getSection(sectionKey, overrides?.[sectionKey])).filter(Boolean);
855
+ return sections.join("\n\n");
856
+ };
857
+ const buildWithContext = (overrides, additionalSections) => {
858
+ const basePrompt = build(overrides);
859
+ if (!additionalSections?.length) {
860
+ return basePrompt;
861
+ }
862
+ const additional = additionalSections.map(renderSection).filter(Boolean).join("\n\n");
863
+ return additional ? `${basePrompt}
864
+
865
+ ${additional}` : basePrompt;
866
+ };
148
867
  return {
149
- fileId: fileResult.id,
150
- url: downloadResult.url,
151
- contentType: downloadResult.contentType,
152
- sizeBytes: downloadResult.sizeBytes
868
+ template,
869
+ build,
870
+ buildWithContext,
871
+ getSection
153
872
  };
154
873
  }
155
- var DEFAULT_OPTIONS;
156
- var init_image_download = __esm({
157
- "src/utils/image-download.ts"() {
158
- DEFAULT_OPTIONS = {
159
- timeout: 1e4,
160
- retries: 3,
161
- retryDelay: 1e3,
162
- maxRetryDelay: 1e4,
163
- exponentialBackoff: true
164
- };
165
- }
874
+ function createTranscriptSection(transcriptText, format = "plain text") {
875
+ return {
876
+ tag: "transcript",
877
+ content: transcriptText,
878
+ attributes: { format }
879
+ };
880
+ }
881
+ function createToneSection(instruction) {
882
+ return {
883
+ tag: "tone",
884
+ content: instruction
885
+ };
886
+ }
887
+
888
+ // src/workflows/burned-in-captions.ts
889
+ var burnedInCaptionsSchema = import_zod2.z.object({
890
+ hasBurnedInCaptions: import_zod2.z.boolean(),
891
+ confidence: import_zod2.z.number().min(0).max(1),
892
+ detectedLanguage: import_zod2.z.string().nullable()
166
893
  });
894
+ var SYSTEM_PROMPT = dedent_default`
895
+ <role>
896
+ You are an expert at analyzing video frames to detect burned-in captions (also called open captions or hardcoded subtitles).
897
+ These are text overlays that are permanently embedded in the video image, common on TikTok, Instagram Reels, and other social media platforms.
898
+ </role>
167
899
 
168
- // src/moderation.ts
169
- init_image_download();
170
- var DEFAULT_THRESHOLDS = {
171
- sexual: 0.7,
172
- violence: 0.8
173
- };
174
- async function processConcurrently(items, processor, maxConcurrent = 5) {
175
- const results = [];
176
- for (let i = 0; i < items.length; i += maxConcurrent) {
177
- const batch = items.slice(i, i + maxConcurrent);
178
- const batchPromises = batch.map(processor);
179
- const batchResults = await Promise.all(batchPromises);
180
- results.push(...batchResults);
181
- }
182
- return results;
183
- }
184
- var HIVE_SEXUAL_CATEGORIES = [
185
- "general_nsfw",
186
- "general_suggestive",
900
+ <critical_note>
901
+ Burned-in captions must appear consistently across MOST frames in the storyboard.
902
+ Text appearing in only 1-2 frames at the end is typically marketing copy, taglines, or end-cards - NOT burned-in captions.
903
+ </critical_note>
904
+
905
+ <confidence_scoring>
906
+ Use this rubric to determine your confidence score (0.0-1.0):
907
+
908
+ - Score 1.0: Definitive captions - text overlays visible in most frames, consistent positioning, content changes between frames indicating dialogue/narration, clear caption-style formatting
909
+ - Score 0.7-0.9: Strong evidence - captions visible across multiple frames with consistent placement, but minor ambiguity (e.g., some frames unclear, atypical styling)
910
+ - Score 0.4-0.6: Moderate evidence - text present in several frames but uncertain classification (e.g., could be captions or persistent on-screen graphics, ambiguous formatting)
911
+ - Score 0.1-0.3: Weak evidence - minimal text detected, appears in only a few frames, likely marketing copy or end-cards rather than captions
912
+ - Score 0.0: No captions - no text overlays detected, or text is clearly not captions (logos, watermarks, scene content, single end-card)
913
+ </confidence_scoring>
914
+
915
+ <context>
916
+ You receive storyboard images containing multiple sequential frames extracted from a video.
917
+ These frames are arranged in a grid and represent the visual progression of the content over time.
918
+ Read frames left-to-right, top-to-bottom to understand the temporal sequence.
919
+ </context>
920
+
921
+ <capabilities>
922
+ - Detect and analyze text overlays in video frames
923
+ - Distinguish between captions and other text elements (marketing, logos, UI)
924
+ - Identify language of detected caption text
925
+ - Assess confidence in caption detection
926
+ </capabilities>
927
+
928
+ <constraints>
929
+ - Only classify as burned-in captions when evidence is clear across multiple frames
930
+ - Base decisions on observable visual evidence
931
+ - Return structured data matching the requested schema
932
+ </constraints>`;
933
+ var burnedInCaptionsPromptBuilder = createPromptBuilder({
934
+ template: {
935
+ task: {
936
+ tag: "task",
937
+ content: dedent_default`
938
+ Analyze the provided video storyboard to detect burned-in captions (hardcoded subtitles).
939
+ Count frames with text vs no text, note position consistency and whether text changes across frames.
940
+ Decide if captions exist, with confidence (0.0-1.0) and detected language if any.`
941
+ },
942
+ analysisSteps: {
943
+ tag: "analysis_steps",
944
+ content: dedent_default`
945
+ 1. COUNT how many frames contain text overlays vs. how many don't
946
+ 2. Check if text appears in consistent positions across multiple frames
947
+ 3. Verify text changes content between frames (indicating dialogue/narration)
948
+ 4. Ensure text has caption-style formatting (contrasting colors, readable fonts)
949
+ 5. If captions are detected, identify the language of the text`
950
+ },
951
+ positiveIndicators: {
952
+ tag: "classify_as_captions",
953
+ content: dedent_default`
954
+ ONLY classify as burned-in captions if:
955
+ - Text appears in multiple frames (not just 1-2 end frames)
956
+ - Text positioning is consistent across those frames
957
+ - Content suggests dialogue, narration, or subtitles (not marketing)
958
+ - Formatting looks like captions (not graphics/logos)`
959
+ },
960
+ negativeIndicators: {
961
+ tag: "not_captions",
962
+ content: dedent_default`
963
+ DO NOT classify as burned-in captions:
964
+ - Marketing taglines appearing only in final 1-2 frames
965
+ - Single words or phrases that don't change between frames
966
+ - Graphics, logos, watermarks, or UI elements
967
+ - Text that's part of the original scene content
968
+ - End-cards with calls-to-action or brand messaging`
969
+ }
970
+ },
971
+ sectionOrder: ["task", "analysisSteps", "positiveIndicators", "negativeIndicators"]
972
+ });
973
+ function buildUserPrompt(promptOverrides) {
974
+ return burnedInCaptionsPromptBuilder.build(promptOverrides);
975
+ }
976
+ var DEFAULT_PROVIDER = "openai";
977
+ async function hasBurnedInCaptions(assetId, options = {}) {
978
+ const {
979
+ provider = DEFAULT_PROVIDER,
980
+ model,
981
+ imageSubmissionMode = "url",
982
+ imageDownloadOptions,
983
+ promptOverrides,
984
+ ...config2
985
+ } = options;
986
+ const userPrompt = buildUserPrompt(promptOverrides);
987
+ const clients = createWorkflowClients(
988
+ { ...config2, model },
989
+ provider
990
+ );
991
+ const { playbackId, policy } = await getPlaybackIdForAsset(clients.mux, assetId);
992
+ const signingContext = resolveSigningContext(options);
993
+ if (policy === "signed" && !signingContext) {
994
+ throw new Error(
995
+ "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
996
+ );
997
+ }
998
+ const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed" ? signingContext : void 0);
999
+ const analyzeStoryboard = async (imageDataUrl) => {
1000
+ const response = await (0, import_ai.generateObject)({
1001
+ model: clients.languageModel.model,
1002
+ schema: burnedInCaptionsSchema,
1003
+ abortSignal: options.abortSignal,
1004
+ experimental_telemetry: { isEnabled: true },
1005
+ messages: [
1006
+ {
1007
+ role: "system",
1008
+ content: SYSTEM_PROMPT
1009
+ },
1010
+ {
1011
+ role: "user",
1012
+ content: [
1013
+ { type: "text", text: userPrompt },
1014
+ { type: "image", image: imageDataUrl }
1015
+ ]
1016
+ }
1017
+ ]
1018
+ });
1019
+ return {
1020
+ result: response.object,
1021
+ usage: {
1022
+ inputTokens: response.usage.inputTokens,
1023
+ outputTokens: response.usage.outputTokens,
1024
+ totalTokens: response.usage.totalTokens,
1025
+ reasoningTokens: response.usage.reasoningTokens,
1026
+ cachedInputTokens: response.usage.cachedInputTokens
1027
+ }
1028
+ };
1029
+ };
1030
+ let analysisResponse;
1031
+ if (imageSubmissionMode === "base64") {
1032
+ const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
1033
+ analysisResponse = await analyzeStoryboard(downloadResult.base64Data);
1034
+ } else {
1035
+ analysisResponse = await analyzeStoryboard(imageUrl);
1036
+ }
1037
+ if (!analysisResponse.result) {
1038
+ throw new Error("No analysis result received from AI provider");
1039
+ }
1040
+ return {
1041
+ assetId,
1042
+ hasBurnedInCaptions: analysisResponse.result.hasBurnedInCaptions ?? false,
1043
+ confidence: analysisResponse.result.confidence ?? 0,
1044
+ detectedLanguage: analysisResponse.result.detectedLanguage ?? null,
1045
+ storyboardUrl: imageUrl,
1046
+ usage: analysisResponse.usage
1047
+ };
1048
+ }
1049
+
1050
+ // src/workflows/chapters.ts
1051
+ var import_ai2 = require("ai");
1052
+ var import_zod3 = require("zod");
1053
+
1054
+ // src/lib/retry.ts
1055
+ var DEFAULT_RETRY_OPTIONS = {
1056
+ maxRetries: 3,
1057
+ baseDelay: 2e3,
1058
+ maxDelay: 1e4
1059
+ };
1060
+ function defaultShouldRetry(error, _attempt) {
1061
+ return Boolean(error.message && error.message.includes("Timeout while downloading"));
1062
+ }
1063
+ function calculateDelay(attempt, baseDelay, maxDelay) {
1064
+ const exponentialDelay = baseDelay * 2 ** (attempt - 1);
1065
+ const delayWithJitter = exponentialDelay * (0.5 + Math.random() * 0.5);
1066
+ return Math.min(delayWithJitter, maxDelay);
1067
+ }
1068
+ async function withRetry(fn, {
1069
+ maxRetries = DEFAULT_RETRY_OPTIONS.maxRetries,
1070
+ baseDelay = DEFAULT_RETRY_OPTIONS.baseDelay,
1071
+ maxDelay = DEFAULT_RETRY_OPTIONS.maxDelay,
1072
+ shouldRetry = defaultShouldRetry
1073
+ } = {}) {
1074
+ let lastError;
1075
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
1076
+ try {
1077
+ return await fn();
1078
+ } catch (error) {
1079
+ lastError = error instanceof Error ? error : new Error(String(error));
1080
+ const isLastAttempt = attempt === maxRetries;
1081
+ if (isLastAttempt || !shouldRetry(lastError, attempt + 1)) {
1082
+ throw lastError;
1083
+ }
1084
+ const delay2 = calculateDelay(attempt + 1, baseDelay, maxDelay);
1085
+ console.warn(
1086
+ `Attempt ${attempt + 1} failed: ${lastError.message}. Retrying in ${Math.round(delay2)}ms...`
1087
+ );
1088
+ await new Promise((resolve) => setTimeout(resolve, delay2));
1089
+ }
1090
+ }
1091
+ throw lastError || new Error("Retry failed with unknown error");
1092
+ }
1093
+
1094
+ // src/workflows/chapters.ts
1095
+ var chapterSchema = import_zod3.z.object({
1096
+ startTime: import_zod3.z.number(),
1097
+ title: import_zod3.z.string()
1098
+ });
1099
+ var chaptersSchema = import_zod3.z.object({
1100
+ chapters: import_zod3.z.array(chapterSchema)
1101
+ });
1102
+ var DEFAULT_PROVIDER2 = "openai";
1103
+ var SYSTEM_PROMPT2 = `Your role is to segment the following captions into chunked chapters, summarising each chapter with a title.
1104
+
1105
+ Analyze the transcript and create logical chapter breaks based on topic changes, major transitions, or distinct sections of content. Each chapter should represent a meaningful segment of the video.
1106
+
1107
+ You must respond with valid JSON in exactly this format:
1108
+ {
1109
+ "chapters": [
1110
+ {"startTime": 0, "title": "Introduction"},
1111
+ {"startTime": 45.5, "title": "Main Topic Discussion"},
1112
+ {"startTime": 120.0, "title": "Conclusion"}
1113
+ ]
1114
+ }
1115
+
1116
+ Important rules:
1117
+ - startTime must be in seconds (not HH:MM:SS format)
1118
+ - Always start with startTime: 0 for the first chapter
1119
+ - Create 3-8 chapters depending on content length and natural breaks
1120
+ - Chapter titles should be concise and descriptive
1121
+ - Do not include any text before or after the JSON
1122
+ - The JSON must be valid and parseable`;
1123
+ async function generateChapters(assetId, languageCode, options = {}) {
1124
+ const { provider = DEFAULT_PROVIDER2, model, abortSignal } = options;
1125
+ const clients = createWorkflowClients({ ...options, model }, provider);
1126
+ const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(clients.mux, assetId);
1127
+ const signingContext = resolveSigningContext(options);
1128
+ if (policy === "signed" && !signingContext) {
1129
+ throw new Error(
1130
+ "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
1131
+ );
1132
+ }
1133
+ const transcriptResult = await fetchTranscriptForAsset(assetData, playbackId, {
1134
+ languageCode,
1135
+ cleanTranscript: false,
1136
+ // keep timestamps for chapter segmentation
1137
+ signingContext: policy === "signed" ? signingContext : void 0
1138
+ });
1139
+ if (!transcriptResult.track || !transcriptResult.transcriptText) {
1140
+ const availableLanguages = getReadyTextTracks(assetData).map((t) => t.language_code).filter(Boolean).join(", ");
1141
+ throw new Error(
1142
+ `No caption track found for language '${languageCode}'. Available languages: ${availableLanguages || "none"}`
1143
+ );
1144
+ }
1145
+ const timestampedTranscript = extractTimestampedTranscript(transcriptResult.transcriptText);
1146
+ if (!timestampedTranscript) {
1147
+ throw new Error("No usable content found in caption track");
1148
+ }
1149
+ let chaptersData = null;
1150
+ try {
1151
+ const response = await withRetry(
1152
+ () => (0, import_ai2.generateObject)({
1153
+ model: clients.languageModel.model,
1154
+ schema: chaptersSchema,
1155
+ abortSignal,
1156
+ messages: [
1157
+ {
1158
+ role: "system",
1159
+ content: SYSTEM_PROMPT2
1160
+ },
1161
+ {
1162
+ role: "user",
1163
+ content: timestampedTranscript
1164
+ }
1165
+ ]
1166
+ })
1167
+ );
1168
+ chaptersData = response.object;
1169
+ } catch (error) {
1170
+ throw new Error(
1171
+ `Failed to generate chapters with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
1172
+ );
1173
+ }
1174
+ if (!chaptersData || !chaptersData.chapters) {
1175
+ throw new Error("No chapters generated from AI response");
1176
+ }
1177
+ const validChapters = chaptersData.chapters.filter((chapter) => typeof chapter.startTime === "number" && typeof chapter.title === "string").sort((a, b) => a.startTime - b.startTime);
1178
+ if (validChapters.length === 0) {
1179
+ throw new Error("No valid chapters found in AI response");
1180
+ }
1181
+ if (validChapters[0].startTime !== 0) {
1182
+ validChapters[0].startTime = 0;
1183
+ }
1184
+ return {
1185
+ assetId,
1186
+ languageCode,
1187
+ chapters: validChapters
1188
+ };
1189
+ }
1190
+
1191
+ // src/workflows/embeddings.ts
1192
+ var import_ai3 = require("ai");
1193
+ var DEFAULT_PROVIDER3 = "openai";
1194
+ var DEFAULT_CHUNKING_STRATEGY = {
1195
+ type: "token",
1196
+ maxTokens: 500,
1197
+ overlap: 100
1198
+ };
1199
+ var DEFAULT_BATCH_SIZE = 5;
1200
+ function averageEmbeddings(embeddings) {
1201
+ if (embeddings.length === 0) {
1202
+ return [];
1203
+ }
1204
+ const dimensions = embeddings[0].length;
1205
+ const averaged = Array.from({ length: dimensions }, () => 0);
1206
+ for (const embedding of embeddings) {
1207
+ for (let i = 0; i < dimensions; i++) {
1208
+ averaged[i] += embedding[i];
1209
+ }
1210
+ }
1211
+ for (let i = 0; i < dimensions; i++) {
1212
+ averaged[i] /= embeddings.length;
1213
+ }
1214
+ return averaged;
1215
+ }
1216
+ async function generateChunkEmbeddings(chunks, model, batchSize, abortSignal) {
1217
+ const results = [];
1218
+ for (let i = 0; i < chunks.length; i += batchSize) {
1219
+ const batch = chunks.slice(i, i + batchSize);
1220
+ const batchResults = await Promise.all(
1221
+ batch.map(async (chunk) => {
1222
+ const response = await withRetry(
1223
+ () => (0, import_ai3.embed)({
1224
+ model,
1225
+ value: chunk.text,
1226
+ abortSignal
1227
+ })
1228
+ );
1229
+ return {
1230
+ chunkId: chunk.id,
1231
+ embedding: response.embedding,
1232
+ metadata: {
1233
+ startTime: chunk.startTime,
1234
+ endTime: chunk.endTime,
1235
+ tokenCount: chunk.tokenCount
1236
+ }
1237
+ };
1238
+ })
1239
+ );
1240
+ results.push(...batchResults);
1241
+ }
1242
+ return results;
1243
+ }
1244
+ async function generateVideoEmbeddings(assetId, options = {}) {
1245
+ const {
1246
+ provider = DEFAULT_PROVIDER3,
1247
+ model,
1248
+ languageCode,
1249
+ chunkingStrategy = DEFAULT_CHUNKING_STRATEGY,
1250
+ batchSize = DEFAULT_BATCH_SIZE,
1251
+ abortSignal
1252
+ } = options;
1253
+ const credentials = validateCredentials(options, provider === "google" ? "google" : "openai");
1254
+ const muxClient = createMuxClient(credentials);
1255
+ const embeddingModel = resolveEmbeddingModel({ ...options, provider, model });
1256
+ const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(
1257
+ muxClient,
1258
+ assetId
1259
+ );
1260
+ const signingContext = resolveSigningContext(options);
1261
+ if (policy === "signed" && !signingContext) {
1262
+ throw new Error(
1263
+ "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
1264
+ );
1265
+ }
1266
+ const useVttChunking = chunkingStrategy.type === "vtt";
1267
+ const transcriptResult = await fetchTranscriptForAsset(assetData, playbackId, {
1268
+ languageCode,
1269
+ cleanTranscript: !useVttChunking,
1270
+ signingContext: policy === "signed" ? signingContext : void 0
1271
+ });
1272
+ if (!transcriptResult.track || !transcriptResult.transcriptText) {
1273
+ const availableLanguages = getReadyTextTracks(assetData).map((t) => t.language_code).filter(Boolean).join(", ");
1274
+ throw new Error(
1275
+ `No caption track found${languageCode ? ` for language '${languageCode}'` : ""}. Available languages: ${availableLanguages || "none"}`
1276
+ );
1277
+ }
1278
+ const transcriptText = transcriptResult.transcriptText;
1279
+ if (!transcriptText.trim()) {
1280
+ throw new Error("Transcript is empty");
1281
+ }
1282
+ const chunks = useVttChunking ? chunkVTTCues(
1283
+ parseVTTCues(transcriptText),
1284
+ chunkingStrategy.maxTokens,
1285
+ chunkingStrategy.overlapCues
1286
+ ) : chunkText(transcriptText, chunkingStrategy);
1287
+ if (chunks.length === 0) {
1288
+ throw new Error("No chunks generated from transcript");
1289
+ }
1290
+ let chunkEmbeddings;
1291
+ try {
1292
+ chunkEmbeddings = await generateChunkEmbeddings(
1293
+ chunks,
1294
+ embeddingModel.model,
1295
+ batchSize,
1296
+ abortSignal
1297
+ );
1298
+ } catch (error) {
1299
+ throw new Error(
1300
+ `Failed to generate embeddings with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
1301
+ );
1302
+ }
1303
+ if (chunkEmbeddings.length === 0) {
1304
+ throw new Error("No embeddings generated");
1305
+ }
1306
+ const averagedEmbedding = averageEmbeddings(chunkEmbeddings.map((ce) => ce.embedding));
1307
+ const totalTokens = chunks.reduce((sum, chunk) => sum + chunk.tokenCount, 0);
1308
+ return {
1309
+ assetId,
1310
+ chunks: chunkEmbeddings,
1311
+ averagedEmbedding,
1312
+ provider,
1313
+ model: embeddingModel.modelId,
1314
+ metadata: {
1315
+ totalChunks: chunks.length,
1316
+ totalTokens,
1317
+ chunkingStrategy: JSON.stringify(chunkingStrategy),
1318
+ embeddingDimensions: chunkEmbeddings[0].embedding.length,
1319
+ generatedAt: (/* @__PURE__ */ new Date()).toISOString()
1320
+ }
1321
+ };
1322
+ }
1323
+
1324
+ // src/workflows/moderation.ts
1325
+ var DEFAULT_THRESHOLDS = {
1326
+ sexual: 0.7,
1327
+ violence: 0.8
1328
+ };
1329
+ var DEFAULT_PROVIDER4 = "openai";
1330
+ var HIVE_ENDPOINT = "https://api.thehive.ai/api/v2/task/sync";
1331
+ var HIVE_SEXUAL_CATEGORIES = [
1332
+ "general_nsfw",
1333
+ "general_suggestive",
187
1334
  "yes_sexual_activity",
188
1335
  "female_underwear",
189
1336
  "male_underwear",
@@ -216,280 +1363,183 @@ var HIVE_VIOLENCE_CATEGORIES = [
216
1363
  "fights",
217
1364
  "garm_death_injury_or_military_conflict"
218
1365
  ];
219
- function getThumbnailUrls(playbackId, duration, options = {}) {
220
- const { interval = 10, width = 640 } = options;
221
- const timestamps = [];
222
- if (duration <= 50) {
223
- const spacing = duration / 6;
224
- for (let i = 1; i <= 5; i++) {
225
- timestamps.push(Math.round(i * spacing));
226
- }
227
- } else {
228
- for (let time = 0; time < duration; time += interval) {
229
- timestamps.push(time);
230
- }
1366
+ async function processConcurrently(items, processor, maxConcurrent = 5) {
1367
+ const results = [];
1368
+ for (let i = 0; i < items.length; i += maxConcurrent) {
1369
+ const batch = items.slice(i, i + maxConcurrent);
1370
+ const batchPromises = batch.map(processor);
1371
+ const batchResults = await Promise.all(batchPromises);
1372
+ results.push(...batchResults);
231
1373
  }
232
- return timestamps.map(
233
- (time) => `https://image.mux.com/${playbackId}/thumbnail.png?time=${time}&width=${width}`
234
- );
1374
+ return results;
235
1375
  }
236
- async function requestOpenAIModeration(imageUrls, openaiClient, model, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
237
- if (submissionMode === "base64") {
1376
+ async function requestOpenAIModeration(imageUrls, apiKey, model, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
1377
+ const targetUrls = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map(
1378
+ (img) => ({ url: img.url, image: img.base64Data })
1379
+ ) : imageUrls.map((url) => ({ url, image: url }));
1380
+ const moderate = async (entry) => {
238
1381
  try {
239
- const downloadResults = await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent);
240
- const processor2 = async (downloadResult) => {
241
- try {
242
- const moderation = await openaiClient.moderations.create({
243
- model,
244
- input: [
245
- {
246
- type: "image_url",
247
- image_url: {
248
- url: downloadResult.base64Data
249
- // Use base64 data URI
250
- }
1382
+ const res = await fetch("https://api.openai.com/v1/moderations", {
1383
+ method: "POST",
1384
+ headers: {
1385
+ "Content-Type": "application/json",
1386
+ "Authorization": `Bearer ${apiKey}`
1387
+ },
1388
+ body: JSON.stringify({
1389
+ model,
1390
+ input: [
1391
+ {
1392
+ type: "image_url",
1393
+ image_url: {
1394
+ url: entry.image
251
1395
  }
252
- ]
253
- });
254
- const categoryScores = moderation.results[0].category_scores;
255
- return {
256
- url: downloadResult.url,
257
- // Return original URL for tracking
258
- sexual: categoryScores.sexual || 0,
259
- violence: categoryScores.violence || 0,
260
- error: false
261
- };
262
- } catch (error) {
263
- console.error(`Failed to moderate downloaded image ${downloadResult.url}:`, error);
264
- return {
265
- url: downloadResult.url,
266
- sexual: 0,
267
- violence: 0,
268
- error: true
269
- };
270
- }
271
- };
272
- return processConcurrently(downloadResults, processor2, maxConcurrent);
273
- } catch (error) {
274
- console.error("Failed to download images for base64 submission:", error);
275
- return imageUrls.map((url) => ({
276
- url,
277
- sexual: 0,
278
- violence: 0,
279
- error: true
280
- }));
281
- }
282
- }
283
- const processor = async (url) => {
284
- try {
285
- const moderation = await openaiClient.moderations.create({
286
- model,
287
- input: [
288
- {
289
- type: "image_url",
290
- image_url: {
291
- url
292
1396
  }
293
- }
294
- ]
1397
+ ]
1398
+ })
295
1399
  });
296
- const categoryScores = moderation.results[0].category_scores;
1400
+ const json = await res.json();
1401
+ if (!res.ok) {
1402
+ throw new Error(
1403
+ `OpenAI moderation error: ${res.status} ${res.statusText} - ${JSON.stringify(json)}`
1404
+ );
1405
+ }
1406
+ const categoryScores = json.results?.[0]?.category_scores || {};
297
1407
  return {
298
- url,
1408
+ url: entry.url,
299
1409
  sexual: categoryScores.sexual || 0,
300
1410
  violence: categoryScores.violence || 0,
301
1411
  error: false
302
1412
  };
303
1413
  } catch (error) {
304
- console.error("Failed to moderate image:", error);
1414
+ console.error("OpenAI moderation failed:", error);
305
1415
  return {
306
- url,
1416
+ url: entry.url,
307
1417
  sexual: 0,
308
1418
  violence: 0,
309
1419
  error: true
310
1420
  };
311
1421
  }
312
1422
  };
313
- return processConcurrently(imageUrls, processor, maxConcurrent);
1423
+ return processConcurrently(targetUrls, moderate, maxConcurrent);
314
1424
  }
315
- async function requestHiveModeration(imageUrls, hiveApiKey, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
316
- if (submissionMode === "base64") {
317
- try {
318
- const downloadResults = await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent);
319
- const processor2 = async (downloadResult) => {
320
- try {
321
- const formData = new FormData();
322
- const imageBlob = new Blob([downloadResult.buffer], {
323
- type: downloadResult.contentType
324
- });
325
- const extension = downloadResult.contentType.split("/")[1] || "png";
326
- formData.append("media", imageBlob, `image.${extension}`);
327
- const response = await fetch("https://api.thehive.ai/api/v2/task/sync", {
328
- method: "POST",
329
- headers: {
330
- "Authorization": `Token ${hiveApiKey}`
331
- // Don't set Content-Type header - let fetch set it with boundary for multipart
332
- },
333
- body: formData
334
- });
335
- if (!response.ok) {
336
- throw new Error(`Hive API error: ${response.statusText}`);
337
- }
338
- const hiveResult = await response.json();
339
- const classes = hiveResult.status?.[0]?.response?.output?.[0]?.classes || [];
340
- const scoreMap = Object.fromEntries(classes.map((c) => [c.class, c.score]));
341
- const sexualScores = HIVE_SEXUAL_CATEGORIES.map(
342
- (category) => scoreMap[category] || 0
343
- );
344
- const violenceScores = HIVE_VIOLENCE_CATEGORIES.map(
345
- (category) => scoreMap[category] || 0
346
- );
347
- return {
348
- url: downloadResult.url,
349
- // Return original URL for tracking
350
- sexual: Math.max(...sexualScores, 0),
351
- violence: Math.max(...violenceScores, 0),
352
- error: false
353
- };
354
- } catch (error) {
355
- console.error(`Failed to moderate uploaded image ${downloadResult.url}:`, error);
356
- return {
357
- url: downloadResult.url,
358
- sexual: 0,
359
- violence: 0,
360
- error: true
361
- };
362
- }
363
- };
364
- return processConcurrently(downloadResults, processor2, maxConcurrent);
365
- } catch (error) {
366
- console.error("Failed to download images for Hive multipart upload:", error);
367
- return imageUrls.map((url) => ({
368
- url,
369
- sexual: 0,
370
- violence: 0,
371
- error: true
372
- }));
1425
+ function getHiveCategoryScores(classes, categoryNames) {
1426
+ const scoreMap = Object.fromEntries(
1427
+ classes.map((c) => [c.class, c.score])
1428
+ );
1429
+ const scores = categoryNames.map((category) => scoreMap[category] || 0);
1430
+ return Math.max(...scores, 0);
1431
+ }
1432
+ async function requestHiveModeration(imageUrls, apiKey, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
1433
+ const targets = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map((img) => ({
1434
+ url: img.url,
1435
+ source: {
1436
+ kind: "file",
1437
+ buffer: img.buffer,
1438
+ contentType: img.contentType
373
1439
  }
374
- }
375
- const processor = async (url) => {
1440
+ })) : imageUrls.map((url) => ({
1441
+ url,
1442
+ source: { kind: "url", value: url }
1443
+ }));
1444
+ const moderate = async (entry) => {
376
1445
  try {
377
- const response = await fetch("https://api.thehive.ai/api/v2/task/sync", {
1446
+ const formData = new FormData();
1447
+ if (entry.source.kind === "url") {
1448
+ formData.append("url", entry.source.value);
1449
+ } else {
1450
+ const extension = entry.source.contentType.split("/")[1] || "jpg";
1451
+ const blob = new Blob([entry.source.buffer], {
1452
+ type: entry.source.contentType
1453
+ });
1454
+ formData.append("media", blob, `thumbnail.${extension}`);
1455
+ }
1456
+ const res = await fetch(HIVE_ENDPOINT, {
378
1457
  method: "POST",
379
1458
  headers: {
380
- "Authorization": `Token ${hiveApiKey}`,
381
- "Content-Type": "application/json"
1459
+ Accept: "application/json",
1460
+ Authorization: `Token ${apiKey}`
382
1461
  },
383
- body: JSON.stringify({ url })
1462
+ body: formData
384
1463
  });
385
- if (!response.ok) {
386
- throw new Error(`Hive API error: ${response.statusText}`);
1464
+ const json = await res.json().catch(() => void 0);
1465
+ if (!res.ok) {
1466
+ throw new Error(
1467
+ `Hive moderation error: ${res.status} ${res.statusText} - ${JSON.stringify(json)}`
1468
+ );
387
1469
  }
388
- const hiveResult = await response.json();
389
- const classes = hiveResult.status?.[0]?.response?.output?.[0]?.classes || [];
390
- const scoreMap = Object.fromEntries(classes.map((c) => [c.class, c.score]));
391
- const sexualScores = HIVE_SEXUAL_CATEGORIES.map(
392
- (category) => scoreMap[category] || 0
393
- );
394
- const violenceScores = HIVE_VIOLENCE_CATEGORIES.map(
395
- (category) => scoreMap[category] || 0
396
- );
1470
+ const classes = json?.status?.[0]?.response?.output?.[0]?.classes || [];
397
1471
  return {
398
- url,
399
- sexual: Math.max(...sexualScores, 0),
400
- violence: Math.max(...violenceScores, 0),
1472
+ url: entry.url,
1473
+ sexual: getHiveCategoryScores(classes, HIVE_SEXUAL_CATEGORIES),
1474
+ violence: getHiveCategoryScores(classes, HIVE_VIOLENCE_CATEGORIES),
401
1475
  error: false
402
1476
  };
403
1477
  } catch (error) {
404
- console.error("Failed to moderate image with Hive:", error);
1478
+ console.error("Hive moderation failed:", error);
405
1479
  return {
406
- url,
1480
+ url: entry.url,
407
1481
  sexual: 0,
408
1482
  violence: 0,
409
1483
  error: true
410
1484
  };
411
1485
  }
412
1486
  };
413
- return processConcurrently(imageUrls, processor, maxConcurrent);
1487
+ return processConcurrently(targets, moderate, maxConcurrent);
414
1488
  }
415
1489
  async function getModerationScores(assetId, options = {}) {
416
1490
  const {
417
- provider = "openai",
418
- model = "omni-moderation-latest",
1491
+ provider = DEFAULT_PROVIDER4,
1492
+ model = provider === "openai" ? "omni-moderation-latest" : void 0,
419
1493
  thresholds = DEFAULT_THRESHOLDS,
420
1494
  thumbnailInterval = 10,
421
1495
  thumbnailWidth = 640,
422
1496
  maxConcurrent = 5,
423
1497
  imageSubmissionMode = "url",
424
- imageDownloadOptions,
425
- muxTokenId,
426
- muxTokenSecret,
427
- openaiApiKey,
428
- ...config
1498
+ imageDownloadOptions
429
1499
  } = options;
430
- if (provider !== "openai" && provider !== "hive") {
431
- throw new Error("Only OpenAI and Hive providers are currently supported");
432
- }
433
- const muxId = muxTokenId || process.env.MUX_TOKEN_ID;
434
- const muxSecret = muxTokenSecret || process.env.MUX_TOKEN_SECRET;
435
- const openaiKey = openaiApiKey || process.env.OPENAI_API_KEY;
436
- const hiveKey = options.hiveApiKey || process.env.HIVE_API_KEY;
437
- if (!muxId || !muxSecret) {
438
- throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
439
- }
440
- if (provider === "openai" && !openaiKey) {
441
- throw new Error("OpenAI API key is required for OpenAI provider. Provide openaiApiKey in options or set OPENAI_API_KEY environment variable.");
442
- }
443
- if (provider === "hive" && !hiveKey) {
444
- throw new Error("Hive API key is required for Hive provider. Provide hiveApiKey in options or set HIVE_API_KEY environment variable.");
445
- }
446
- const mux = new Mux({
447
- tokenId: muxId,
448
- tokenSecret: muxSecret
449
- });
450
- let openaiClient;
451
- if (provider === "openai") {
452
- openaiClient = new OpenAI({
453
- apiKey: openaiKey
454
- });
455
- }
456
- let assetData;
457
- try {
458
- const asset = await mux.video.assets.retrieve(assetId);
459
- assetData = asset;
460
- } catch (error) {
461
- throw new Error(`Failed to fetch asset from Mux: ${error instanceof Error ? error.message : "Unknown error"}`);
462
- }
463
- const publicPlaybackIds = assetData.playback_ids?.filter((pid) => pid.policy === "public") || [];
464
- if (publicPlaybackIds.length === 0) {
465
- throw new Error("No public playback IDs found for this asset. Moderation requires public playback access.");
1500
+ const credentials = validateCredentials(options, provider === "openai" ? "openai" : void 0);
1501
+ const muxClient = createMuxClient(credentials);
1502
+ const { asset, playbackId, policy } = await getPlaybackIdForAsset(muxClient, assetId);
1503
+ const duration = asset.duration || 0;
1504
+ const signingContext = resolveSigningContext(options);
1505
+ if (policy === "signed" && !signingContext) {
1506
+ throw new Error(
1507
+ "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
1508
+ );
466
1509
  }
467
- const playbackId = publicPlaybackIds[0].id;
468
- const duration = assetData.duration || 0;
469
- const thumbnailUrls = getThumbnailUrls(playbackId, duration, {
1510
+ const thumbnailUrls = await getThumbnailUrls(playbackId, duration, {
470
1511
  interval: thumbnailInterval,
471
- width: thumbnailWidth
1512
+ width: thumbnailWidth,
1513
+ signingContext: policy === "signed" ? signingContext : void 0
472
1514
  });
473
1515
  let thumbnailScores;
474
1516
  if (provider === "openai") {
1517
+ const apiKey = credentials.openaiApiKey;
1518
+ if (!apiKey) {
1519
+ throw new Error("OpenAI API key is required for moderation. Set OPENAI_API_KEY or pass openaiApiKey.");
1520
+ }
475
1521
  thumbnailScores = await requestOpenAIModeration(
476
1522
  thumbnailUrls,
477
- openaiClient,
478
- model,
1523
+ apiKey,
1524
+ model || "omni-moderation-latest",
479
1525
  maxConcurrent,
480
1526
  imageSubmissionMode,
481
1527
  imageDownloadOptions
482
1528
  );
483
1529
  } else if (provider === "hive") {
1530
+ const hiveApiKey = options.hiveApiKey || env_default.HIVE_API_KEY;
1531
+ if (!hiveApiKey) {
1532
+ throw new Error("Hive API key is required for moderation. Set HIVE_API_KEY or pass hiveApiKey.");
1533
+ }
484
1534
  thumbnailScores = await requestHiveModeration(
485
1535
  thumbnailUrls,
486
- hiveKey,
1536
+ hiveApiKey,
487
1537
  maxConcurrent,
488
1538
  imageSubmissionMode,
489
1539
  imageDownloadOptions
490
1540
  );
491
1541
  } else {
492
- throw new Error("Unsupported provider");
1542
+ throw new Error(`Unsupported moderation provider: ${provider}`);
493
1543
  }
494
1544
  const maxSexual = Math.max(...thumbnailScores.map((s) => s.sexual));
495
1545
  const maxViolence = Math.max(...thumbnailScores.map((s) => s.violence));
@@ -506,521 +1556,297 @@ async function getModerationScores(assetId, options = {}) {
506
1556
  };
507
1557
  }
508
1558
 
509
- // src/summarization.ts
510
- init_image_download();
1559
+ // src/workflows/summarization.ts
1560
+ var import_ai4 = require("ai");
1561
+ var import_zod4 = require("zod");
1562
+ var SUMMARY_KEYWORD_LIMIT = 10;
1563
+ var summarySchema = import_zod4.z.object({
1564
+ keywords: import_zod4.z.array(import_zod4.z.string()),
1565
+ title: import_zod4.z.string(),
1566
+ description: import_zod4.z.string()
1567
+ });
1568
+ var TONE_INSTRUCTIONS = {
1569
+ normal: "Provide a clear, straightforward analysis.",
1570
+ sassy: "Answer with a sassy, playful attitude and personality.",
1571
+ professional: "Provide a professional, executive-level analysis suitable for business reporting."
1572
+ };
1573
+ var summarizationPromptBuilder = createPromptBuilder({
1574
+ template: {
1575
+ task: {
1576
+ tag: "task",
1577
+ content: "Analyze the storyboard frames and generate metadata that captures the essence of the video content."
1578
+ },
1579
+ title: {
1580
+ tag: "title_requirements",
1581
+ content: dedent_default`
1582
+ A short, compelling headline that immediately communicates the subject or action.
1583
+ Aim for brevity - typically under 10 words. Think of how a news headline or video card title would read.
1584
+ Start with the primary subject, action, or topic - never begin with "A video of" or similar phrasing.
1585
+ Use active, specific language.`
1586
+ },
1587
+ description: {
1588
+ tag: "description_requirements",
1589
+ content: dedent_default`
1590
+ A concise summary (2-4 sentences) that describes what happens across the video.
1591
+ Cover the main subjects, actions, setting, and any notable progression visible across frames.
1592
+ Write in present tense. Be specific about observable details rather than making assumptions.
1593
+ If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`
1594
+ },
1595
+ keywords: {
1596
+ tag: "keywords_requirements",
1597
+ content: dedent_default`
1598
+ Specific, searchable terms (up to 10) that capture:
1599
+ - Primary subjects (people, animals, objects)
1600
+ - Actions and activities being performed
1601
+ - Setting and environment
1602
+ - Notable objects or tools
1603
+ - Style or genre (if applicable)
1604
+ Prefer concrete nouns and action verbs over abstract concepts.
1605
+ Use lowercase. Avoid redundant or overly generic terms like "video" or "content".`
1606
+ },
1607
+ qualityGuidelines: {
1608
+ tag: "quality_guidelines",
1609
+ content: dedent_default`
1610
+ - Examine all frames to understand the full context and progression
1611
+ - Be precise: "golden retriever" is better than "dog" when identifiable
1612
+ - Capture the narrative: what begins, develops, and concludes
1613
+ - Balance brevity with informativeness`
1614
+ }
1615
+ },
1616
+ sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
1617
+ });
1618
+ var SYSTEM_PROMPT3 = dedent_default`
1619
+ <role>
1620
+ You are a video content analyst specializing in storyboard interpretation and multimodal analysis.
1621
+ </role>
511
1622
 
512
- // src/utils/vtt-parser.ts
513
- function extractTextFromVTT(vttContent) {
514
- if (!vttContent.trim()) {
515
- return "";
516
- }
517
- const lines = vttContent.split("\n");
518
- const textLines = [];
519
- for (let i = 0; i < lines.length; i++) {
520
- const line = lines[i].trim();
521
- if (!line) continue;
522
- if (line === "WEBVTT") continue;
523
- if (line.startsWith("NOTE ")) continue;
524
- if (line.includes("-->")) continue;
525
- if (/^[\d\w-]+$/.test(line) && !line.includes(" ")) continue;
526
- if (line.startsWith("STYLE") || line.startsWith("REGION")) continue;
527
- const cleanLine = line.replace(/<[^>]*>/g, "").trim();
528
- if (cleanLine) {
529
- textLines.push(cleanLine);
530
- }
531
- }
532
- return textLines.join(" ").replace(/\s+/g, " ").trim();
533
- }
1623
+ <context>
1624
+ You receive storyboard images containing multiple sequential frames extracted from a video.
1625
+ These frames are arranged in a grid and represent the visual progression of the content over time.
1626
+ Read frames left-to-right, top-to-bottom to understand the temporal sequence.
1627
+ </context>
534
1628
 
535
- // src/summarization.ts
536
- var summarySchema = z.object({
537
- keywords: z.array(z.string()).max(10),
538
- title: z.string().max(100),
539
- description: z.string().max(1e3)
540
- });
541
- var DEFAULT_PROMPT = "Generate a short title (max 100 characters) and description (max 500 characters) for what happens. Start immediately with the action or subject - never reference that this is a video, content, or storyboard. Example: Title: 'Cooking Pasta Tutorial' Description: 'Someone cooks pasta by boiling water and adding noodles.'";
542
- var ANTHROPIC_JSON_PROMPT = `You must respond with valid JSON in exactly this format:
543
- {
544
- "title": "Your title here (max 100 characters)",
545
- "description": "Your description here (max 500 characters)",
546
- "keywords": ["keyword1", "keyword2", "keyword3"]
547
- }
1629
+ <transcript_guidance>
1630
+ When a transcript is provided alongside the storyboard:
1631
+ - Use it to understand spoken content, dialogue, narration, and audio context
1632
+ - Correlate transcript content with visual frames to build a complete picture
1633
+ - Extract key terminology, names, and specific language used by speakers
1634
+ - Let the transcript inform keyword selection, especially for topics not visually obvious
1635
+ - Prioritize visual content for the description, but enrich it with transcript insights
1636
+ - If transcript and visuals conflict, trust the visual evidence
1637
+ </transcript_guidance>
548
1638
 
549
- Do not include any text before or after the JSON. The JSON must be valid and parseable.`;
550
- async function getSummaryAndTags(assetId, promptOrOptions, options) {
551
- let prompt;
552
- let actualOptions;
553
- if (typeof promptOrOptions === "string") {
554
- prompt = promptOrOptions;
555
- actualOptions = options || {};
556
- } else {
557
- prompt = DEFAULT_PROMPT;
558
- actualOptions = promptOrOptions || {};
1639
+ <capabilities>
1640
+ - Extract meaning from visual sequences
1641
+ - Identify subjects, actions, settings, and narrative arcs
1642
+ - Generate accurate, searchable metadata
1643
+ - Synthesize visual and transcript information when provided
1644
+ </capabilities>
1645
+
1646
+ <constraints>
1647
+ - Only describe what is clearly observable in the frames or explicitly stated in the transcript
1648
+ - Do not fabricate details or make unsupported assumptions
1649
+ - Return structured data matching the requested schema
1650
+ </constraints>`;
1651
+ function buildUserPrompt2({
1652
+ tone,
1653
+ transcriptText,
1654
+ isCleanTranscript = true,
1655
+ promptOverrides
1656
+ }) {
1657
+ const contextSections = [createToneSection(TONE_INSTRUCTIONS[tone])];
1658
+ if (transcriptText) {
1659
+ const format = isCleanTranscript ? "plain text" : "WebVTT";
1660
+ contextSections.push(createTranscriptSection(transcriptText, format));
1661
+ }
1662
+ return summarizationPromptBuilder.buildWithContext(promptOverrides, contextSections);
1663
+ }
1664
+ var DEFAULT_PROVIDER5 = "openai";
1665
+ var DEFAULT_TONE = "normal";
1666
+ function normalizeKeywords(keywords) {
1667
+ if (!Array.isArray(keywords) || keywords.length === 0) {
1668
+ return [];
1669
+ }
1670
+ const uniqueLowercase = /* @__PURE__ */ new Set();
1671
+ const normalized = [];
1672
+ for (const keyword of keywords) {
1673
+ const trimmed = keyword?.trim();
1674
+ if (!trimmed) {
1675
+ continue;
1676
+ }
1677
+ const lower = trimmed.toLowerCase();
1678
+ if (uniqueLowercase.has(lower)) {
1679
+ continue;
1680
+ }
1681
+ uniqueLowercase.add(lower);
1682
+ normalized.push(trimmed);
1683
+ if (normalized.length === SUMMARY_KEYWORD_LIMIT) {
1684
+ break;
1685
+ }
559
1686
  }
1687
+ return normalized;
1688
+ }
1689
+ async function getSummaryAndTags(assetId, options) {
560
1690
  const {
561
- provider = "openai",
1691
+ provider = DEFAULT_PROVIDER5,
562
1692
  model,
563
- tone = "normal",
1693
+ tone = DEFAULT_TONE,
564
1694
  includeTranscript = true,
565
1695
  cleanTranscript = true,
566
1696
  imageSubmissionMode = "url",
567
1697
  imageDownloadOptions,
568
- muxTokenId,
569
- muxTokenSecret,
570
- openaiApiKey,
571
- anthropicApiKey,
572
- ...config
573
- } = actualOptions;
574
- const defaultModel = provider === "anthropic" ? "claude-3-5-haiku-20241022" : "gpt-4o-mini";
575
- const finalModel = model || defaultModel;
576
- const muxId = muxTokenId || process.env.MUX_TOKEN_ID;
577
- const muxSecret = muxTokenSecret || process.env.MUX_TOKEN_SECRET;
578
- const openaiKey = openaiApiKey || process.env.OPENAI_API_KEY;
579
- const anthropicKey = anthropicApiKey || process.env.ANTHROPIC_API_KEY;
580
- if (!muxId || !muxSecret) {
581
- throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
582
- }
583
- if (provider === "openai" && !openaiKey) {
584
- throw new Error("OpenAI API key is required. Provide openaiApiKey in options or set OPENAI_API_KEY environment variable.");
585
- }
586
- if (provider === "anthropic" && !anthropicKey) {
587
- throw new Error("Anthropic API key is required. Provide anthropicApiKey in options or set ANTHROPIC_API_KEY environment variable.");
1698
+ abortSignal,
1699
+ promptOverrides
1700
+ } = options ?? {};
1701
+ const clients = createWorkflowClients(
1702
+ { ...options, model },
1703
+ provider
1704
+ );
1705
+ const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(clients.mux, assetId);
1706
+ const signingContext = resolveSigningContext(options ?? {});
1707
+ if (policy === "signed" && !signingContext) {
1708
+ throw new Error(
1709
+ "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
1710
+ );
588
1711
  }
589
- const mux = new Mux({
590
- tokenId: muxId,
591
- tokenSecret: muxSecret
1712
+ const transcriptText = includeTranscript ? (await fetchTranscriptForAsset(assetData, playbackId, {
1713
+ cleanTranscript,
1714
+ signingContext: policy === "signed" ? signingContext : void 0
1715
+ })).transcriptText : "";
1716
+ const userPrompt = buildUserPrompt2({
1717
+ tone,
1718
+ transcriptText,
1719
+ isCleanTranscript: cleanTranscript,
1720
+ promptOverrides
592
1721
  });
593
- let openaiClient;
594
- let anthropicClient;
595
- if (provider === "openai") {
596
- openaiClient = new OpenAI({
597
- apiKey: openaiKey
598
- });
599
- } else if (provider === "anthropic") {
600
- anthropicClient = new Anthropic({
601
- apiKey: anthropicKey
602
- });
603
- }
604
- let assetData;
605
- try {
606
- const asset = await mux.video.assets.retrieve(assetId);
607
- assetData = asset;
608
- } catch (error) {
609
- throw new Error(`Failed to fetch asset from Mux: ${error instanceof Error ? error.message : "Unknown error"}`);
610
- }
611
- const playbackId = assetData.playback_ids?.[0]?.id;
612
- if (!playbackId) {
613
- throw new Error("No playback ID found for this asset");
614
- }
615
- let transcriptText = "";
616
- if (includeTranscript && assetData.tracks) {
617
- const textTrack = assetData.tracks.find(
618
- (track) => track.type === "text" && track.status === "ready"
619
- );
620
- if (textTrack) {
621
- const transcriptUrl = `https://stream.mux.com/${playbackId}/text/${textTrack.id}.vtt`;
622
- try {
623
- const transcriptResponse = await fetch(transcriptUrl);
624
- if (transcriptResponse.ok) {
625
- const rawVttContent = await transcriptResponse.text();
626
- transcriptText = cleanTranscript ? extractTextFromVTT(rawVttContent) : rawVttContent;
1722
+ const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed" ? signingContext : void 0);
1723
+ const analyzeStoryboard = async (imageDataUrl) => {
1724
+ const response = await (0, import_ai4.generateObject)({
1725
+ model: clients.languageModel.model,
1726
+ schema: summarySchema,
1727
+ abortSignal,
1728
+ messages: [
1729
+ {
1730
+ role: "system",
1731
+ content: SYSTEM_PROMPT3
1732
+ },
1733
+ {
1734
+ role: "user",
1735
+ content: [
1736
+ { type: "text", text: userPrompt },
1737
+ { type: "image", image: imageDataUrl }
1738
+ ]
627
1739
  }
628
- } catch (error) {
629
- console.warn("Failed to fetch transcript:", error);
630
- }
631
- }
632
- }
633
- let toneInstruction = "";
634
- switch (tone) {
635
- case "sassy":
636
- toneInstruction = " Answer with a sassy, playful attitude and personality.";
637
- break;
638
- case "professional":
639
- toneInstruction = " Provide a professional, executive-level analysis suitable for business reporting.";
640
- break;
641
- default:
642
- toneInstruction = " Provide a clear, straightforward analysis.";
643
- }
644
- let contextualPrompt = prompt + toneInstruction;
645
- if (transcriptText) {
646
- const transcriptType = cleanTranscript ? "transcript" : "WebVTT transcript";
647
- contextualPrompt += ` Use the following ${transcriptType} for additional context: "${transcriptText}"`;
648
- }
649
- const imageUrl = `https://image.mux.com/${playbackId}/storyboard.png?width=640`;
1740
+ ]
1741
+ });
1742
+ return response.object;
1743
+ };
650
1744
  let aiAnalysis = null;
651
- let retryAttempt = 0;
652
- const maxRetries = 3;
653
- if (provider === "openai") {
654
- if (imageSubmissionMode === "base64") {
655
- try {
656
- const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
657
- const response = await openaiClient.responses.parse({
658
- model: finalModel,
659
- input: [
660
- {
661
- role: "system",
662
- content: "You are an image analysis tool. You will be given a storyboard image from a video showing multiple frames/scenes, and be expected to return structured data about the contents across all the frames."
663
- },
664
- {
665
- role: "user",
666
- content: [
667
- {
668
- type: "input_text",
669
- text: contextualPrompt
670
- },
671
- {
672
- type: "input_image",
673
- image_url: downloadResult.base64Data,
674
- // Use base64 data URI
675
- detail: "high"
676
- }
677
- ]
678
- }
679
- ],
680
- text: {
681
- format: zodTextFormat(summarySchema, "analysis")
682
- }
683
- });
684
- aiAnalysis = response.output_parsed;
685
- } catch (error) {
686
- throw new Error(`Failed to analyze video content with OpenAI in base64 mode: ${error instanceof Error ? error.message : "Unknown error"}`);
687
- }
688
- } else {
689
- while (retryAttempt <= maxRetries) {
690
- try {
691
- const response = await openaiClient.responses.parse({
692
- model: finalModel,
693
- input: [
694
- {
695
- role: "system",
696
- content: "You are an image analysis tool. You will be given a storyboard image from a video showing multiple frames/scenes, and be expected to return structured data about the contents across all the frames."
697
- },
698
- {
699
- role: "user",
700
- content: [
701
- {
702
- type: "input_text",
703
- text: contextualPrompt
704
- },
705
- {
706
- type: "input_image",
707
- image_url: imageUrl,
708
- detail: "high"
709
- }
710
- ]
711
- }
712
- ],
713
- text: {
714
- format: zodTextFormat(summarySchema, "analysis")
715
- }
716
- });
717
- aiAnalysis = response.output_parsed;
718
- break;
719
- } catch (error) {
720
- const isTimeoutError = error instanceof Error && error.message && error.message.includes("Timeout while downloading");
721
- if (isTimeoutError && retryAttempt < maxRetries) {
722
- await new Promise((resolve) => setTimeout(resolve, 5e3));
723
- retryAttempt++;
724
- continue;
725
- }
726
- throw new Error(`Failed to analyze video content with OpenAI: ${error instanceof Error ? error.message : "Unknown error"}`);
727
- }
728
- }
729
- }
730
- } else if (provider === "anthropic") {
731
- const anthropicPrompt = `${contextualPrompt}
732
-
733
- ${ANTHROPIC_JSON_PROMPT}`;
1745
+ try {
734
1746
  if (imageSubmissionMode === "base64") {
735
- try {
736
- const fileUploadResult = await uploadImageToAnthropicFiles(imageUrl, anthropicKey, imageDownloadOptions);
737
- const response = await anthropicClient.messages.create({
738
- model: finalModel,
739
- max_tokens: 1e3,
740
- messages: [
741
- {
742
- role: "user",
743
- content: [
744
- {
745
- type: "image",
746
- source: {
747
- type: "file",
748
- file_id: fileUploadResult.fileId
749
- }
750
- // Type assertion for Files API support
751
- },
752
- {
753
- type: "text",
754
- text: anthropicPrompt
755
- }
756
- ]
757
- }
758
- ]
759
- }, {
760
- headers: {
761
- "anthropic-beta": "files-api-2025-04-14"
762
- }
763
- });
764
- const content = response.content[0];
765
- if (content.type === "text") {
766
- const jsonText = content.text.trim();
767
- try {
768
- aiAnalysis = JSON.parse(jsonText);
769
- } catch (parseError) {
770
- throw new Error(`Failed to parse JSON response from Anthropic: ${parseError instanceof Error ? parseError.message : "Unknown error"}`);
771
- }
772
- } else {
773
- throw new Error("Unexpected response type from Anthropic");
774
- }
775
- } catch (error) {
776
- const errorMessage = error instanceof Error ? error.message : "Unknown error";
777
- throw new Error(`Failed to analyze video content with Anthropic Files API: ${errorMessage}`);
778
- }
1747
+ const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
1748
+ aiAnalysis = await analyzeStoryboard(downloadResult.base64Data);
779
1749
  } else {
780
- while (retryAttempt <= maxRetries) {
781
- try {
782
- const response = await anthropicClient.messages.create({
783
- model: finalModel,
784
- max_tokens: 1e3,
785
- messages: [
786
- {
787
- role: "user",
788
- content: [
789
- {
790
- type: "image",
791
- source: {
792
- type: "url",
793
- url: imageUrl
794
- }
795
- // Type assertion to work around SDK type definitions
796
- },
797
- {
798
- type: "text",
799
- text: anthropicPrompt
800
- }
801
- ]
802
- }
803
- ]
804
- });
805
- const content = response.content[0];
806
- if (content.type === "text") {
807
- const jsonText = content.text.trim();
808
- try {
809
- aiAnalysis = JSON.parse(jsonText);
810
- break;
811
- } catch (parseError) {
812
- if (retryAttempt < maxRetries) {
813
- console.warn(`Failed to parse JSON from Anthropic (attempt ${retryAttempt + 1}):`, jsonText);
814
- retryAttempt++;
815
- await new Promise((resolve) => setTimeout(resolve, 2e3));
816
- continue;
817
- }
818
- throw new Error(`Failed to parse JSON response from Anthropic: ${parseError instanceof Error ? parseError.message : "Unknown error"}`);
819
- }
820
- } else {
821
- throw new Error("Unexpected response type from Anthropic");
822
- }
823
- } catch (error) {
824
- if (retryAttempt < maxRetries) {
825
- await new Promise((resolve) => setTimeout(resolve, 5e3));
826
- retryAttempt++;
827
- continue;
828
- }
829
- throw new Error(`Failed to analyze video content with Anthropic: ${error instanceof Error ? error.message : "Unknown error"}`);
830
- }
831
- }
1750
+ aiAnalysis = await withRetry(() => analyzeStoryboard(imageUrl));
832
1751
  }
833
- } else {
834
- throw new Error(`Unsupported provider: ${provider}`);
1752
+ } catch (error) {
1753
+ throw new Error(
1754
+ `Failed to analyze video content with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
1755
+ );
835
1756
  }
836
1757
  if (!aiAnalysis) {
837
- throw new Error("No analysis result received from AI provider");
1758
+ throw new Error(`Failed to analyze video content for asset ${assetId}`);
1759
+ }
1760
+ if (!aiAnalysis.title) {
1761
+ throw new Error(`Failed to generate title for asset ${assetId}`);
1762
+ }
1763
+ if (!aiAnalysis.description) {
1764
+ throw new Error(`Failed to generate description for asset ${assetId}`);
838
1765
  }
839
1766
  return {
840
1767
  assetId,
841
- title: aiAnalysis.title || "No title available",
842
- description: aiAnalysis.description || "No description available",
843
- tags: aiAnalysis.keywords || [],
1768
+ title: aiAnalysis.title,
1769
+ description: aiAnalysis.description,
1770
+ tags: normalizeKeywords(aiAnalysis.keywords),
844
1771
  storyboardUrl: imageUrl
845
1772
  };
846
1773
  }
847
- async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, options = {}) {
848
- const {
849
- provider = "anthropic",
850
- model = "claude-sonnet-4-20250514",
851
- muxTokenId,
852
- muxTokenSecret,
853
- anthropicApiKey,
854
- ...config
855
- } = options;
856
- if (provider !== "anthropic") {
857
- throw new Error("Only Anthropic provider is currently supported for translation");
858
- }
859
- const muxId = muxTokenId || process.env.MUX_TOKEN_ID;
860
- const muxSecret = muxTokenSecret || process.env.MUX_TOKEN_SECRET;
861
- const anthropicKey = anthropicApiKey || process.env.ANTHROPIC_API_KEY;
862
- const s3Endpoint = options.s3Endpoint || process.env.S3_ENDPOINT;
863
- const s3Region = options.s3Region || process.env.S3_REGION || "auto";
864
- const s3Bucket = options.s3Bucket || process.env.S3_BUCKET;
865
- const s3AccessKeyId = options.s3AccessKeyId || process.env.S3_ACCESS_KEY_ID;
866
- const s3SecretAccessKey = options.s3SecretAccessKey || process.env.S3_SECRET_ACCESS_KEY;
867
- const uploadToMux = options.uploadToMux !== false;
868
- if (!muxId || !muxSecret) {
869
- throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
870
- }
871
- if (!anthropicKey) {
872
- throw new Error("Anthropic API key is required. Provide anthropicApiKey in options or set ANTHROPIC_API_KEY environment variable.");
873
- }
874
- if (uploadToMux && (!s3Endpoint || !s3Bucket || !s3AccessKeyId || !s3SecretAccessKey)) {
875
- throw new Error("S3 configuration is required for uploading to Mux. Provide s3Endpoint, s3Bucket, s3AccessKeyId, and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables.");
876
- }
877
- const mux = new Mux({
878
- tokenId: muxId,
879
- tokenSecret: muxSecret
880
- });
881
- const anthropicClient = new Anthropic({
882
- apiKey: anthropicKey
883
- });
884
- let assetData;
885
- try {
886
- const asset = await mux.video.assets.retrieve(assetId);
887
- assetData = asset;
888
- } catch (error) {
889
- throw new Error(`Failed to fetch asset from Mux: ${error instanceof Error ? error.message : "Unknown error"}`);
890
- }
891
- const playbackId = assetData.playback_ids?.[0]?.id;
892
- if (!playbackId) {
893
- throw new Error("No playback ID found for this asset");
894
- }
895
- if (!assetData.tracks) {
896
- throw new Error("No tracks found for this asset");
897
- }
898
- const sourceTextTrack = assetData.tracks.find(
899
- (track) => track.type === "text" && track.status === "ready" && track.language_code === fromLanguageCode
900
- );
901
- if (!sourceTextTrack) {
902
- throw new Error(`No ready text track found with language code '${fromLanguageCode}' for this asset`);
903
- }
904
- const vttUrl = `https://stream.mux.com/${playbackId}/text/${sourceTextTrack.id}.vtt`;
905
- let vttContent;
906
- try {
907
- const vttResponse = await fetch(vttUrl);
908
- if (!vttResponse.ok) {
909
- throw new Error(`Failed to fetch VTT file: ${vttResponse.statusText}`);
910
- }
911
- vttContent = await vttResponse.text();
912
- } catch (error) {
913
- throw new Error(`Failed to fetch VTT content: ${error instanceof Error ? error.message : "Unknown error"}`);
914
- }
915
- console.log(`\u2705 Found VTT content for language '${fromLanguageCode}'`);
916
- let translatedVtt;
917
- try {
918
- const response = await anthropicClient.messages.create({
919
- model,
920
- max_tokens: 4e3,
921
- messages: [
922
- {
923
- role: "user",
924
- content: `Translate the following VTT subtitle file from ${fromLanguageCode} to ${toLanguageCode}. Return the translated VTT in JSON format with the key 'translation'. Preserve all timestamps and VTT formatting exactly as they appear.
925
1774
 
926
- ${vttContent}`
927
- }
928
- ]
929
- });
930
- const content = response.content[0];
931
- if (content.type === "text") {
932
- const responseText = content.text.trim();
933
- try {
934
- const cleanedResponse = responseText.replace(/```json/g, "").replace(/```/g, "").trim();
935
- const parsed = JSON.parse(cleanedResponse);
936
- translatedVtt = parsed.translation;
937
- } catch (parseError) {
938
- throw new Error(`Failed to parse JSON response from Anthropic: ${parseError instanceof Error ? parseError.message : "Unknown error"}`);
939
- }
940
- } else {
941
- throw new Error("Unexpected response type from Anthropic");
942
- }
943
- } catch (error) {
944
- throw new Error(`Failed to translate VTT with Anthropic: ${error instanceof Error ? error.message : "Unknown error"}`);
945
- }
946
- console.log(`
947
- \u2705 Translation completed successfully!`);
948
- if (!uploadToMux) {
949
- console.log(`\u2705 VTT translated to ${toLanguageCode} successfully!`);
950
- return {
951
- assetId,
952
- sourceLanguageCode: fromLanguageCode,
953
- targetLanguageCode: toLanguageCode,
954
- originalVtt: vttContent,
955
- translatedVtt
956
- };
957
- }
958
- console.log("\u{1F4E4} Uploading translated VTT to S3-compatible storage...");
959
- const s3Client = new S3Client({
960
- region: s3Region,
961
- endpoint: s3Endpoint,
962
- credentials: {
963
- accessKeyId: s3AccessKeyId,
964
- secretAccessKey: s3SecretAccessKey
965
- },
966
- forcePathStyle: true
967
- // Often needed for non-AWS S3 services
968
- });
969
- const vttKey = `translations/${assetId}/${fromLanguageCode}-to-${toLanguageCode}-${Date.now()}.vtt`;
970
- let presignedUrl;
1775
+ // src/workflows/translate-audio.ts
1776
+ var import_client_s3 = require("@aws-sdk/client-s3");
1777
+ var import_lib_storage = require("@aws-sdk/lib-storage");
1778
+ var import_s3_request_presigner = require("@aws-sdk/s3-request-presigner");
1779
+ var import_mux_node3 = __toESM(require("@mux/mux-node"));
1780
+ var STATIC_RENDITION_POLL_INTERVAL_MS = 5e3;
1781
+ var STATIC_RENDITION_MAX_ATTEMPTS = 36;
1782
+ var delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
1783
+ function getReadyAudioStaticRendition(asset) {
1784
+ const files = asset.static_renditions?.files;
1785
+ if (!files || files.length === 0) {
1786
+ return void 0;
1787
+ }
1788
+ return files.find(
1789
+ (rendition) => rendition.name === "audio.m4a" && rendition.status === "ready"
1790
+ );
1791
+ }
1792
+ var hasReadyAudioStaticRendition = (asset) => Boolean(getReadyAudioStaticRendition(asset));
1793
+ async function requestStaticRenditionCreation(muxClient, assetId) {
1794
+ console.log("\u{1F4FC} Requesting static rendition from Mux...");
971
1795
  try {
972
- const upload = new Upload({
973
- client: s3Client,
974
- params: {
975
- Bucket: s3Bucket,
976
- Key: vttKey,
977
- Body: translatedVtt,
978
- ContentType: "text/vtt"
979
- }
980
- });
981
- await upload.done();
982
- console.log(`\u2705 VTT uploaded successfully to: ${vttKey}`);
983
- const getObjectCommand = new GetObjectCommand({
984
- Bucket: s3Bucket,
985
- Key: vttKey
986
- });
987
- presignedUrl = await getSignedUrl(s3Client, getObjectCommand, {
988
- expiresIn: 3600
989
- // 1 hour
1796
+ await muxClient.video.assets.createStaticRendition(assetId, {
1797
+ resolution: "audio-only"
990
1798
  });
991
- console.log(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
1799
+ console.log("\u{1F4FC} Static rendition request accepted by Mux.");
992
1800
  } catch (error) {
993
- throw new Error(`Failed to upload VTT to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
1801
+ const statusCode = error?.status ?? error?.statusCode;
1802
+ const messages = error?.error?.messages;
1803
+ const alreadyDefined = messages?.some((message2) => message2.toLowerCase().includes("already defined")) ?? error?.message?.toLowerCase().includes("already defined");
1804
+ if (statusCode === 409 || alreadyDefined) {
1805
+ console.log("\u2139\uFE0F Static rendition already requested. Waiting for it to finish...");
1806
+ return;
1807
+ }
1808
+ const message = error instanceof Error ? error.message : "Unknown error";
1809
+ throw new Error(`Failed to request static rendition from Mux: ${message}`);
994
1810
  }
995
- console.log("\u{1F4F9} Adding translated track to Mux asset...");
996
- let uploadedTrackId;
997
- try {
998
- const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(toLanguageCode) || toLanguageCode.toUpperCase();
999
- const trackName = `${languageName} (auto-translated)`;
1000
- const trackResponse = await mux.video.assets.createTrack(assetId, {
1001
- type: "text",
1002
- text_type: "subtitles",
1003
- language_code: toLanguageCode,
1004
- name: trackName,
1005
- url: presignedUrl
1006
- });
1007
- uploadedTrackId = trackResponse.id;
1008
- console.log(`\u2705 Track added to Mux asset with ID: ${uploadedTrackId}`);
1009
- console.log(`\u{1F4CB} Track name: "${trackName}"`);
1010
- } catch (error) {
1011
- console.warn(`\u26A0\uFE0F Failed to add track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
1012
- console.log("\u{1F517} You can manually add the track using this presigned URL:");
1013
- console.log(presignedUrl);
1811
+ }
1812
+ async function waitForAudioStaticRendition({
1813
+ assetId,
1814
+ muxClient,
1815
+ initialAsset
1816
+ }) {
1817
+ let currentAsset = initialAsset;
1818
+ if (hasReadyAudioStaticRendition(currentAsset)) {
1819
+ return currentAsset;
1820
+ }
1821
+ const status = currentAsset.static_renditions?.status ?? "not_requested";
1822
+ if (status === "not_requested" || status === void 0) {
1823
+ await requestStaticRenditionCreation(muxClient, assetId);
1824
+ } else if (status === "errored") {
1825
+ console.log("\u26A0\uFE0F Previous static rendition request errored. Creating a new one...");
1826
+ await requestStaticRenditionCreation(muxClient, assetId);
1827
+ } else {
1828
+ console.log(`\u2139\uFE0F Static rendition already ${status}. Waiting for it to finish...`);
1829
+ }
1830
+ for (let attempt = 1; attempt <= STATIC_RENDITION_MAX_ATTEMPTS; attempt++) {
1831
+ await delay(STATIC_RENDITION_POLL_INTERVAL_MS);
1832
+ currentAsset = await muxClient.video.assets.retrieve(assetId);
1833
+ if (hasReadyAudioStaticRendition(currentAsset)) {
1834
+ console.log("\u2705 Audio static rendition is ready!");
1835
+ return currentAsset;
1836
+ }
1837
+ const currentStatus = currentAsset.static_renditions?.status || "unknown";
1838
+ console.log(
1839
+ `\u231B Waiting for static rendition (attempt ${attempt}/${STATIC_RENDITION_MAX_ATTEMPTS}) \u2192 ${currentStatus}`
1840
+ );
1841
+ if (currentStatus === "errored") {
1842
+ throw new Error(
1843
+ "Mux failed to create the static rendition for this asset. Please check the asset in the Mux dashboard."
1844
+ );
1845
+ }
1014
1846
  }
1015
- return {
1016
- assetId,
1017
- sourceLanguageCode: fromLanguageCode,
1018
- targetLanguageCode: toLanguageCode,
1019
- originalVtt: vttContent,
1020
- translatedVtt,
1021
- uploadedTrackId,
1022
- presignedUrl
1023
- };
1847
+ throw new Error(
1848
+ "Timed out waiting for the static rendition to become ready. Please try again in a moment."
1849
+ );
1024
1850
  }
1025
1851
  async function translateAudio(assetId, toLanguageCode, options = {}) {
1026
1852
  const {
@@ -1030,20 +1856,19 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
1030
1856
  muxTokenId,
1031
1857
  muxTokenSecret,
1032
1858
  elevenLabsApiKey,
1033
- uploadToMux = true,
1034
- ...config
1859
+ uploadToMux = true
1035
1860
  } = options;
1036
1861
  if (provider !== "elevenlabs") {
1037
1862
  throw new Error("Only ElevenLabs provider is currently supported for audio translation");
1038
1863
  }
1039
- const muxId = muxTokenId || process.env.MUX_TOKEN_ID;
1040
- const muxSecret = muxTokenSecret || process.env.MUX_TOKEN_SECRET;
1041
- const elevenLabsKey = elevenLabsApiKey || process.env.ELEVENLABS_API_KEY;
1042
- const s3Endpoint = options.s3Endpoint || process.env.S3_ENDPOINT;
1043
- const s3Region = options.s3Region || process.env.S3_REGION || "auto";
1044
- const s3Bucket = options.s3Bucket || process.env.S3_BUCKET;
1045
- const s3AccessKeyId = options.s3AccessKeyId || process.env.S3_ACCESS_KEY_ID;
1046
- const s3SecretAccessKey = options.s3SecretAccessKey || process.env.S3_SECRET_ACCESS_KEY;
1864
+ const muxId = muxTokenId ?? env_default.MUX_TOKEN_ID;
1865
+ const muxSecret = muxTokenSecret ?? env_default.MUX_TOKEN_SECRET;
1866
+ const elevenLabsKey = elevenLabsApiKey ?? env_default.ELEVENLABS_API_KEY;
1867
+ const s3Endpoint = options.s3Endpoint ?? env_default.S3_ENDPOINT;
1868
+ const s3Region = options.s3Region ?? env_default.S3_REGION ?? "auto";
1869
+ const s3Bucket = options.s3Bucket ?? env_default.S3_BUCKET;
1870
+ const s3AccessKeyId = options.s3AccessKeyId ?? env_default.S3_ACCESS_KEY_ID;
1871
+ const s3SecretAccessKey = options.s3SecretAccessKey ?? env_default.S3_SECRET_ACCESS_KEY;
1047
1872
  if (!muxId || !muxSecret) {
1048
1873
  throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
1049
1874
  }
@@ -1053,33 +1878,38 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
1053
1878
  if (uploadToMux && (!s3Endpoint || !s3Bucket || !s3AccessKeyId || !s3SecretAccessKey)) {
1054
1879
  throw new Error("S3 configuration is required for uploading to Mux. Provide s3Endpoint, s3Bucket, s3AccessKeyId, and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables.");
1055
1880
  }
1056
- const mux = new Mux({
1881
+ const mux = new import_mux_node3.default({
1057
1882
  tokenId: muxId,
1058
1883
  tokenSecret: muxSecret
1059
1884
  });
1060
1885
  console.log(`\u{1F3AC} Fetching Mux asset: ${assetId}`);
1061
- let assetData;
1062
- try {
1063
- const asset = await mux.video.assets.retrieve(assetId);
1064
- assetData = asset;
1065
- } catch (error) {
1066
- throw new Error(`Failed to fetch asset from Mux: ${error instanceof Error ? error.message : "Unknown error"}`);
1886
+ const { asset: initialAsset, playbackId, policy } = await getPlaybackIdForAsset(mux, assetId);
1887
+ const signingContext = resolveSigningContext(options);
1888
+ if (policy === "signed" && !signingContext) {
1889
+ throw new Error(
1890
+ "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
1891
+ );
1067
1892
  }
1068
1893
  console.log("\u{1F50D} Checking for audio-only static rendition...");
1069
- if (!assetData.static_renditions || !assetData.static_renditions.files) {
1070
- throw new Error("No static renditions found for this asset");
1071
- }
1072
- const staticRenditionFiles = assetData.static_renditions.files;
1073
- if (staticRenditionFiles.length === 0) {
1074
- throw new Error("No static rendition files found for this asset");
1894
+ let currentAsset = initialAsset;
1895
+ if (!hasReadyAudioStaticRendition(currentAsset)) {
1896
+ console.log("\u274C No ready audio static rendition found. Requesting one now...");
1897
+ currentAsset = await waitForAudioStaticRendition({
1898
+ assetId,
1899
+ muxClient: mux,
1900
+ initialAsset: currentAsset
1901
+ });
1075
1902
  }
1076
- const audioRendition = staticRenditionFiles.find(
1077
- (rendition) => rendition.name === "audio.m4a" && rendition.status === "ready"
1078
- );
1903
+ const audioRendition = getReadyAudioStaticRendition(currentAsset);
1079
1904
  if (!audioRendition) {
1080
- throw new Error("No ready audio-only static rendition found for this asset. Please ensure the asset has an audio.m4a static rendition.");
1905
+ throw new Error(
1906
+ "Unable to obtain an audio-only static rendition for this asset. Please verify static renditions are enabled in Mux."
1907
+ );
1908
+ }
1909
+ let audioUrl = `https://stream.mux.com/${playbackId}/audio.m4a`;
1910
+ if (policy === "signed" && signingContext) {
1911
+ audioUrl = await signUrl(audioUrl, playbackId, signingContext, "video");
1081
1912
  }
1082
- const audioUrl = `https://stream.mux.com/${assetData.playback_ids?.[0]?.id}/audio.m4a`;
1083
1913
  console.log(`\u2705 Found audio rendition: ${audioUrl}`);
1084
1914
  console.log(`\u{1F399}\uFE0F Creating ElevenLabs dubbing job (auto-detect \u2192 ${toLanguageCode})`);
1085
1915
  let dubbingId;
@@ -1168,7 +1998,7 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
1168
1998
  throw new Error(`Failed to download dubbed audio: ${error instanceof Error ? error.message : "Unknown error"}`);
1169
1999
  }
1170
2000
  console.log("\u{1F4E4} Uploading dubbed audio to S3-compatible storage...");
1171
- const s3Client = new S3Client({
2001
+ const s3Client = new import_client_s3.S3Client({
1172
2002
  region: s3Region,
1173
2003
  endpoint: s3Endpoint,
1174
2004
  credentials: {
@@ -1180,7 +2010,7 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
1180
2010
  const audioKey = `audio-translations/${assetId}/auto-to-${toLanguageCode}-${Date.now()}.m4a`;
1181
2011
  let presignedUrl;
1182
2012
  try {
1183
- const upload = new Upload({
2013
+ const upload = new import_lib_storage.Upload({
1184
2014
  client: s3Client,
1185
2015
  params: {
1186
2016
  Bucket: s3Bucket,
@@ -1191,11 +2021,11 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
1191
2021
  });
1192
2022
  await upload.done();
1193
2023
  console.log(`\u2705 Audio uploaded successfully to: ${audioKey}`);
1194
- const getObjectCommand = new GetObjectCommand({
2024
+ const getObjectCommand = new import_client_s3.GetObjectCommand({
1195
2025
  Bucket: s3Bucket,
1196
2026
  Key: audioKey
1197
2027
  });
1198
- presignedUrl = await getSignedUrl(s3Client, getObjectCommand, {
2028
+ presignedUrl = await (0, import_s3_request_presigner.getSignedUrl)(s3Client, getObjectCommand, {
1199
2029
  expiresIn: 3600
1200
2030
  // 1 hour
1201
2031
  });
@@ -1230,529 +2060,179 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
1230
2060
  presignedUrl
1231
2061
  };
1232
2062
  }
1233
- var chaptersSchema = z.object({
1234
- chapters: z.array(z.object({
1235
- startTime: z.number(),
1236
- title: z.string()
1237
- }))
1238
- });
1239
- var DEFAULT_SYSTEM_PROMPT = `Your role is to segment the following captions into chunked chapters, summarising each chapter with a title.
1240
-
1241
- Analyze the transcript and create logical chapter breaks based on topic changes, major transitions, or distinct sections of content. Each chapter should represent a meaningful segment of the video.
1242
2063
 
1243
- You must respond with valid JSON in exactly this format:
1244
- {
1245
- "chapters": [
1246
- {"startTime": 0, "title": "Introduction"},
1247
- {"startTime": 45.5, "title": "Main Topic Discussion"},
1248
- {"startTime": 120.0, "title": "Conclusion"}
1249
- ]
1250
- }
1251
-
1252
- Important rules:
1253
- - startTime must be in seconds (not HH:MM:SS format)
1254
- - Always start with startTime: 0 for the first chapter
1255
- - Create 3-8 chapters depending on content length and natural breaks
1256
- - Chapter titles should be concise and descriptive
1257
- - Do not include any text before or after the JSON
1258
- - The JSON must be valid and parseable`;
1259
- var ANTHROPIC_JSON_PROMPT2 = `You must respond with valid JSON in exactly this format:
1260
- {
1261
- "chapters": [
1262
- {"startTime": 0, "title": "Chapter title here"},
1263
- {"startTime": 45.5, "title": "Another chapter title"}
1264
- ]
1265
- }
1266
-
1267
- Do not include any text before or after the JSON. The JSON must be valid and parseable.`;
1268
- function vttTimestampToSeconds(timestamp) {
1269
- const parts = timestamp.split(":");
1270
- if (parts.length !== 3) return 0;
1271
- const hours = parseInt(parts[0], 10) || 0;
1272
- const minutes = parseInt(parts[1], 10) || 0;
1273
- const seconds = parseFloat(parts[2]) || 0;
1274
- return hours * 3600 + minutes * 60 + seconds;
1275
- }
1276
- function extractTimestampsFromVTT(vttContent) {
1277
- if (!vttContent.trim()) {
1278
- return "";
1279
- }
1280
- const lines = vttContent.split("\n");
1281
- const segments = [];
1282
- for (let i = 0; i < lines.length; i++) {
1283
- const line = lines[i].trim();
1284
- if (line.includes("-->")) {
1285
- const startTime = line.split(" --> ")[0].trim();
1286
- const timeInSeconds = vttTimestampToSeconds(startTime);
1287
- let j = i + 1;
1288
- while (j < lines.length && !lines[j].trim()) {
1289
- j++;
1290
- }
1291
- if (j < lines.length) {
1292
- const text = lines[j].trim().replace(/<[^>]*>/g, "");
1293
- if (text) {
1294
- segments.push({ time: timeInSeconds, text });
1295
- }
1296
- }
1297
- }
1298
- }
1299
- return segments.map((segment) => `[${Math.floor(segment.time)}s] ${segment.text}`).join("\n");
1300
- }
1301
- async function generateChapters(assetId, languageCode, options = {}) {
2064
+ // src/workflows/translate-captions.ts
2065
+ var import_client_s32 = require("@aws-sdk/client-s3");
2066
+ var import_lib_storage2 = require("@aws-sdk/lib-storage");
2067
+ var import_s3_request_presigner2 = require("@aws-sdk/s3-request-presigner");
2068
+ var import_ai5 = require("ai");
2069
+ var import_zod5 = require("zod");
2070
+ var translationSchema = import_zod5.z.object({
2071
+ translation: import_zod5.z.string()
2072
+ });
2073
+ var DEFAULT_PROVIDER6 = "openai";
2074
+ async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, options) {
1302
2075
  const {
1303
- provider = "openai",
2076
+ provider = DEFAULT_PROVIDER6,
1304
2077
  model,
1305
- muxTokenId,
1306
- muxTokenSecret,
1307
- openaiApiKey,
1308
- anthropicApiKey,
1309
- ...config
2078
+ s3Endpoint: providedS3Endpoint,
2079
+ s3Region: providedS3Region,
2080
+ s3Bucket: providedS3Bucket,
2081
+ s3AccessKeyId: providedS3AccessKeyId,
2082
+ s3SecretAccessKey: providedS3SecretAccessKey,
2083
+ uploadToMux: uploadToMuxOption,
2084
+ ...clientConfig
1310
2085
  } = options;
1311
- const defaultModel = provider === "anthropic" ? "claude-3-5-haiku-20241022" : "gpt-4o-mini";
1312
- const finalModel = model || defaultModel;
1313
- const muxId = muxTokenId || process.env.MUX_TOKEN_ID;
1314
- const muxSecret = muxTokenSecret || process.env.MUX_TOKEN_SECRET;
1315
- const openaiKey = openaiApiKey || process.env.OPENAI_API_KEY;
1316
- const anthropicKey = anthropicApiKey || process.env.ANTHROPIC_API_KEY;
1317
- if (!muxId || !muxSecret) {
1318
- throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
1319
- }
1320
- if (provider === "openai" && !openaiKey) {
1321
- throw new Error("OpenAI API key is required for OpenAI provider. Provide openaiApiKey in options or set OPENAI_API_KEY environment variable.");
1322
- }
1323
- if (provider === "anthropic" && !anthropicKey) {
1324
- throw new Error("Anthropic API key is required for Anthropic provider. Provide anthropicApiKey in options or set ANTHROPIC_API_KEY environment variable.");
1325
- }
1326
- const mux = new Mux({
1327
- tokenId: muxId,
1328
- tokenSecret: muxSecret
1329
- });
1330
- let openaiClient;
1331
- let anthropicClient;
1332
- if (provider === "openai") {
1333
- openaiClient = new OpenAI({
1334
- apiKey: openaiKey
1335
- });
1336
- } else if (provider === "anthropic") {
1337
- anthropicClient = new Anthropic({
1338
- apiKey: anthropicKey
1339
- });
1340
- }
1341
- let assetData;
1342
- try {
1343
- const asset = await mux.video.assets.retrieve(assetId);
1344
- assetData = asset;
1345
- } catch (error) {
1346
- throw new Error(`Failed to fetch asset from Mux: ${error instanceof Error ? error.message : "Unknown error"}`);
2086
+ const resolvedProvider = provider;
2087
+ const s3Endpoint = providedS3Endpoint ?? env_default.S3_ENDPOINT;
2088
+ const s3Region = providedS3Region ?? env_default.S3_REGION ?? "auto";
2089
+ const s3Bucket = providedS3Bucket ?? env_default.S3_BUCKET;
2090
+ const s3AccessKeyId = providedS3AccessKeyId ?? env_default.S3_ACCESS_KEY_ID;
2091
+ const s3SecretAccessKey = providedS3SecretAccessKey ?? env_default.S3_SECRET_ACCESS_KEY;
2092
+ const uploadToMux = uploadToMuxOption !== false;
2093
+ const clients = createWorkflowClients(
2094
+ { ...clientConfig, provider: resolvedProvider, model },
2095
+ resolvedProvider
2096
+ );
2097
+ if (uploadToMux && (!s3Endpoint || !s3Bucket || !s3AccessKeyId || !s3SecretAccessKey)) {
2098
+ throw new Error("S3 configuration is required for uploading to Mux. Provide s3Endpoint, s3Bucket, s3AccessKeyId, and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables.");
1347
2099
  }
1348
- const playbackId = assetData.playback_ids?.[0]?.id;
1349
- if (!playbackId) {
1350
- throw new Error("No playback ID found for this asset");
2100
+ const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(clients.mux, assetId);
2101
+ const signingContext = resolveSigningContext(options);
2102
+ if (policy === "signed" && !signingContext) {
2103
+ throw new Error(
2104
+ "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
2105
+ );
1351
2106
  }
1352
2107
  if (!assetData.tracks) {
1353
2108
  throw new Error("No tracks found for this asset");
1354
2109
  }
1355
- const captionTrack = assetData.tracks.find(
1356
- (track) => track.type === "text" && track.status === "ready" && track.text_type === "subtitles" && track.language_code === languageCode
2110
+ const sourceTextTrack = assetData.tracks.find(
2111
+ (track) => track.type === "text" && track.status === "ready" && track.language_code === fromLanguageCode
1357
2112
  );
1358
- if (!captionTrack) {
1359
- throw new Error(`No caption track found for language '${languageCode}'. Available languages: ${assetData.tracks.filter((t) => t.type === "text").map((t) => t.language_code).join(", ")}`);
2113
+ if (!sourceTextTrack) {
2114
+ throw new Error(`No ready text track found with language code '${fromLanguageCode}' for this asset`);
2115
+ }
2116
+ let vttUrl = `https://stream.mux.com/${playbackId}/text/${sourceTextTrack.id}.vtt`;
2117
+ if (policy === "signed" && signingContext) {
2118
+ vttUrl = await signUrl(vttUrl, playbackId, signingContext, "video");
1360
2119
  }
1361
- const transcriptUrl = `https://stream.mux.com/${playbackId}/text/${captionTrack.id}.vtt`;
1362
2120
  let vttContent;
1363
2121
  try {
1364
- const transcriptResponse = await fetch(transcriptUrl);
1365
- if (!transcriptResponse.ok) {
1366
- throw new Error(`Failed to fetch VTT: ${transcriptResponse.statusText}`);
2122
+ const vttResponse = await fetch(vttUrl);
2123
+ if (!vttResponse.ok) {
2124
+ throw new Error(`Failed to fetch VTT file: ${vttResponse.statusText}`);
1367
2125
  }
1368
- vttContent = await transcriptResponse.text();
2126
+ vttContent = await vttResponse.text();
1369
2127
  } catch (error) {
1370
- throw new Error(`Failed to fetch caption track: ${error instanceof Error ? error.message : "Unknown error"}`);
1371
- }
1372
- const timestampedTranscript = extractTimestampsFromVTT(vttContent);
1373
- if (!timestampedTranscript) {
1374
- throw new Error("No usable content found in caption track");
2128
+ throw new Error(`Failed to fetch VTT content: ${error instanceof Error ? error.message : "Unknown error"}`);
1375
2129
  }
1376
- let chaptersData = null;
1377
- if (provider === "openai") {
1378
- try {
1379
- const response = await openaiClient.responses.parse({
1380
- model: finalModel,
1381
- input: [
1382
- {
1383
- role: "system",
1384
- content: DEFAULT_SYSTEM_PROMPT
1385
- },
1386
- {
1387
- role: "user",
1388
- content: [
1389
- {
1390
- type: "input_text",
1391
- text: timestampedTranscript
1392
- }
1393
- ]
1394
- }
1395
- ],
1396
- text: {
1397
- format: zodTextFormat(chaptersSchema, "chapters")
1398
- }
1399
- });
1400
- chaptersData = response.output_parsed;
1401
- } catch (error) {
1402
- throw new Error(`Failed to generate chapters with OpenAI: ${error instanceof Error ? error.message : "Unknown error"}`);
1403
- }
1404
- } else if (provider === "anthropic") {
1405
- const anthropicPrompt = `${DEFAULT_SYSTEM_PROMPT}
1406
-
1407
- ${ANTHROPIC_JSON_PROMPT2}
2130
+ console.log(`\u2705 Found VTT content for language '${fromLanguageCode}'`);
2131
+ let translatedVtt;
2132
+ try {
2133
+ const response = await (0, import_ai5.generateObject)({
2134
+ model: clients.languageModel.model,
2135
+ schema: translationSchema,
2136
+ abortSignal: options.abortSignal,
2137
+ messages: [
2138
+ {
2139
+ role: "user",
2140
+ content: `Translate the following VTT subtitle file from ${fromLanguageCode} to ${toLanguageCode}. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT.
1408
2141
 
1409
- Transcript:
1410
- ${timestampedTranscript}`;
1411
- try {
1412
- const response = await anthropicClient.messages.create({
1413
- model: finalModel,
1414
- max_tokens: 2e3,
1415
- messages: [
1416
- {
1417
- role: "user",
1418
- content: anthropicPrompt
1419
- }
1420
- ]
1421
- });
1422
- const content = response.content[0];
1423
- if (content.type === "text") {
1424
- const jsonText = content.text.trim();
1425
- try {
1426
- chaptersData = JSON.parse(jsonText);
1427
- } catch (parseError) {
1428
- throw new Error(`Failed to parse JSON response from Anthropic: ${parseError instanceof Error ? parseError.message : "Unknown error"}`);
2142
+ ${vttContent}`
1429
2143
  }
1430
- } else {
1431
- throw new Error("Unexpected response type from Anthropic");
1432
- }
1433
- } catch (error) {
1434
- throw new Error(`Failed to generate chapters with Anthropic: ${error instanceof Error ? error.message : "Unknown error"}`);
1435
- }
1436
- } else {
1437
- throw new Error(`Unsupported provider: ${provider}`);
1438
- }
1439
- if (!chaptersData || !chaptersData.chapters) {
1440
- throw new Error("No chapters generated from AI response");
1441
- }
1442
- const validChapters = chaptersData.chapters.filter((chapter) => typeof chapter.startTime === "number" && typeof chapter.title === "string").sort((a, b) => a.startTime - b.startTime);
1443
- if (validChapters.length === 0) {
1444
- throw new Error("No valid chapters found in AI response");
1445
- }
1446
- if (validChapters[0].startTime !== 0) {
1447
- validChapters[0].startTime = 0;
1448
- }
1449
- return {
1450
- assetId,
1451
- languageCode,
1452
- chapters: validChapters
1453
- };
1454
- }
1455
-
1456
- // src/utils/storyboard-processor.ts
1457
- init_image_download();
1458
- async function getAssetInfo(assetId, options) {
1459
- const muxId = options.muxTokenId || process.env.MUX_TOKEN_ID;
1460
- const muxSecret = options.muxTokenSecret || process.env.MUX_TOKEN_SECRET;
1461
- if (!muxId || !muxSecret) {
1462
- throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
2144
+ ]
2145
+ });
2146
+ translatedVtt = response.object.translation;
2147
+ } catch (error) {
2148
+ throw new Error(`Failed to translate VTT with ${resolvedProvider}: ${error instanceof Error ? error.message : "Unknown error"}`);
1463
2149
  }
1464
- const mux = new Mux({
1465
- tokenId: muxId,
1466
- tokenSecret: muxSecret
1467
- });
1468
- try {
1469
- const asset = await mux.video.assets.retrieve(assetId);
1470
- const playbackId = asset.playback_ids?.[0]?.id;
1471
- if (!playbackId) {
1472
- throw new Error("No playback ID found for this asset");
1473
- }
2150
+ console.log(`
2151
+ \u2705 Translation completed successfully!`);
2152
+ if (!uploadToMux) {
2153
+ console.log(`\u2705 VTT translated to ${toLanguageCode} successfully!`);
1474
2154
  return {
1475
- playbackId,
1476
- duration: asset.duration || void 0
2155
+ assetId,
2156
+ sourceLanguageCode: fromLanguageCode,
2157
+ targetLanguageCode: toLanguageCode,
2158
+ originalVtt: vttContent,
2159
+ translatedVtt
1477
2160
  };
1478
- } catch (error) {
1479
- throw new Error(`Failed to fetch asset from Mux: ${error instanceof Error ? error.message : "Unknown error"}`);
1480
2161
  }
1481
- }
1482
- async function processStoryboardWithAnthropic(imageUrl, prompt, options) {
1483
- const { apiKey, model, responseParser, imageSubmissionMode = "url", imageDownloadOptions, maxRetries = 3 } = options;
1484
- const anthropicClient = new Anthropic({ apiKey });
1485
- let retryAttempt = 0;
1486
- if (imageSubmissionMode === "base64") {
1487
- try {
1488
- const fileUploadResult = await uploadImageToAnthropicFiles(imageUrl, apiKey, imageDownloadOptions);
1489
- const response = await anthropicClient.messages.create({
1490
- model,
1491
- max_tokens: 1e3,
1492
- messages: [
1493
- {
1494
- role: "user",
1495
- content: [
1496
- {
1497
- type: "image",
1498
- source: {
1499
- type: "file",
1500
- file_id: fileUploadResult.fileId
1501
- }
1502
- // Type assertion for Files API support
1503
- },
1504
- {
1505
- type: "text",
1506
- text: prompt
1507
- }
1508
- ]
1509
- }
1510
- ]
1511
- }, {
1512
- headers: {
1513
- "anthropic-beta": "files-api-2025-04-14"
1514
- }
1515
- });
1516
- return responseParser(response);
1517
- } catch (error) {
1518
- const errorMessage = error instanceof Error ? error.message : "Unknown error";
1519
- throw new Error(`Failed to process storyboard with Anthropic Files API: ${errorMessage}`);
1520
- }
1521
- } else {
1522
- while (retryAttempt <= maxRetries) {
1523
- try {
1524
- const response = await anthropicClient.messages.create({
1525
- model,
1526
- max_tokens: 1e3,
1527
- messages: [
1528
- {
1529
- role: "user",
1530
- content: [
1531
- {
1532
- type: "image",
1533
- source: {
1534
- type: "url",
1535
- url: imageUrl
1536
- }
1537
- // Type assertion to work around SDK type definitions
1538
- },
1539
- {
1540
- type: "text",
1541
- text: prompt
1542
- }
1543
- ]
1544
- }
1545
- ]
1546
- });
1547
- return responseParser(response);
1548
- } catch (error) {
1549
- if (retryAttempt < maxRetries) {
1550
- await new Promise((resolve) => setTimeout(resolve, 5e3));
1551
- retryAttempt++;
1552
- continue;
1553
- }
1554
- throw new Error(`Failed to process storyboard with Anthropic: ${error instanceof Error ? error.message : "Unknown error"}`);
1555
- }
1556
- }
1557
- }
1558
- throw new Error("All retry attempts failed");
1559
- }
1560
-
1561
- // src/burned-in-captions.ts
1562
- var burnedInCaptionsSchema = z.object({
1563
- hasBurnedInCaptions: z.boolean(),
1564
- confidence: z.number().min(0).max(1),
1565
- detectedLanguage: z.string().nullable()
1566
- });
1567
- var ANTHROPIC_SYSTEM_PROMPT = `You are an expert at analyzing video frames to detect burned-in captions (also called open captions or hardcoded subtitles). These are text overlays permanently embedded in video images, common on social media platforms.
1568
-
1569
- Key principles:
1570
- 1. Burned-in captions appear across multiple frames throughout the video timeline
1571
- 2. End-cards and marketing text appear only in final frames
1572
- 3. Captions have consistent positioning and caption-style formatting
1573
- 4. Caption text typically changes between frames (dialogue/narration)
1574
-
1575
- Analysis approach:
1576
- - Look for text overlays distributed across different parts of the timeline
1577
- - Distinguish between dialogue captions vs. marketing end-cards
1578
- - Consider text positioning, formatting, and content patterns`;
1579
- var ANTHROPIC_USER_PROMPT = `Analyze this storyboard for burned-in captions:
1580
-
1581
- 1. Examine each frame from left to right (timeline order)
1582
- 2. Note which frames have text overlays and their positions
1583
- 3. Determine the pattern:
1584
- - Text scattered across timeline = likely captions
1585
- - Text only in final 1-2 frames = likely end-card/marketing
1586
-
1587
- Classification rules:
1588
- - If text appears in 3+ frames distributed throughout timeline \u2192 burned-in captions
1589
- - If text appears only in final frames \u2192 NOT burned-in captions
1590
- - Look for dialogue-style content vs. marketing taglines
1591
-
1592
- Analyze and classify with confidence level.`;
1593
- var ANTHROPIC_JSON_PROMPT3 = `Apply the frame analysis above.
1594
-
1595
- Key rule: Text appearing only in final 2-3 frames = NOT captions. Text distributed throughout timeline = captions.
1596
-
1597
- Respond ONLY with valid JSON:
1598
- {
1599
- "hasBurnedInCaptions": true/false,
1600
- "confidence": 0.85,
1601
- "detectedLanguage": "English" (or null if no captions or language unclear)
1602
- }
1603
-
1604
- Do not include any text before or after the JSON. The JSON must be valid and parseable.`;
1605
- async function hasBurnedInCaptions(assetId, options = {}) {
1606
- const {
1607
- provider = "openai",
1608
- model,
1609
- imageSubmissionMode = "url",
1610
- imageDownloadOptions,
1611
- muxTokenId,
1612
- muxTokenSecret,
1613
- openaiApiKey,
1614
- anthropicApiKey,
1615
- ...config
1616
- } = options;
1617
- const defaultModel = provider === "anthropic" ? "claude-3-5-haiku-20241022" : "gpt-4o-mini";
1618
- const finalModel = model || defaultModel;
1619
- const openaiKey = openaiApiKey || process.env.OPENAI_API_KEY;
1620
- const anthropicKey = anthropicApiKey || process.env.ANTHROPIC_API_KEY;
1621
- if (provider === "openai" && !openaiKey) {
1622
- throw new Error("OpenAI API key is required for OpenAI provider. Provide openaiApiKey in options or set OPENAI_API_KEY environment variable.");
1623
- }
1624
- if (provider === "anthropic" && !anthropicKey) {
1625
- throw new Error("Anthropic API key is required for Anthropic provider. Provide anthropicApiKey in options or set ANTHROPIC_API_KEY environment variable.");
1626
- }
1627
- const storyboardOptions = {
1628
- muxTokenId,
1629
- muxTokenSecret};
1630
- const assetInfo = await getAssetInfo(assetId, storyboardOptions);
1631
- const imageUrl = `https://image.mux.com/${assetInfo.playbackId}/storyboard.png?width=640`;
1632
- let analysisResult = null;
1633
- if (provider === "openai") {
1634
- const OpenAI5 = __require("openai").default;
1635
- const openaiClient = new OpenAI5({ apiKey: openaiKey });
1636
- if (imageSubmissionMode === "base64") {
1637
- const { downloadImageAsBase64: downloadImageAsBase642 } = (init_image_download(), __toCommonJS(image_download_exports));
1638
- const downloadResult = await downloadImageAsBase642(imageUrl, imageDownloadOptions);
1639
- const response = await openaiClient.responses.parse({
1640
- model: finalModel,
1641
- input: [
1642
- {
1643
- role: "system",
1644
- content: ANTHROPIC_SYSTEM_PROMPT
1645
- },
1646
- {
1647
- role: "user",
1648
- content: [
1649
- {
1650
- type: "input_text",
1651
- text: ANTHROPIC_USER_PROMPT
1652
- },
1653
- {
1654
- type: "input_image",
1655
- image_url: downloadResult.base64Data,
1656
- detail: "high"
1657
- }
1658
- ]
1659
- }
1660
- ],
1661
- text: {
1662
- format: zodTextFormat(burnedInCaptionsSchema, "analysis")
1663
- }
1664
- });
1665
- analysisResult = response.output_parsed;
1666
- } else {
1667
- let retryAttempt = 0;
1668
- const maxRetries = 3;
1669
- while (retryAttempt <= maxRetries) {
1670
- try {
1671
- const response = await openaiClient.responses.parse({
1672
- model: finalModel,
1673
- input: [
1674
- {
1675
- role: "system",
1676
- content: ANTHROPIC_SYSTEM_PROMPT
1677
- },
1678
- {
1679
- role: "user",
1680
- content: [
1681
- {
1682
- type: "input_text",
1683
- text: ANTHROPIC_USER_PROMPT
1684
- },
1685
- {
1686
- type: "input_image",
1687
- image_url: imageUrl,
1688
- detail: "high"
1689
- }
1690
- ]
1691
- }
1692
- ],
1693
- text: {
1694
- format: zodTextFormat(burnedInCaptionsSchema, "analysis")
1695
- }
1696
- });
1697
- analysisResult = response.output_parsed;
1698
- break;
1699
- } catch (error) {
1700
- const isTimeoutError = error instanceof Error && error.message && error.message.includes("Timeout while downloading");
1701
- if (isTimeoutError && retryAttempt < maxRetries) {
1702
- await new Promise((resolve) => setTimeout(resolve, 5e3));
1703
- retryAttempt++;
1704
- continue;
1705
- }
1706
- throw new Error(`Failed to analyze storyboard with OpenAI: ${error instanceof Error ? error.message : "Unknown error"}`);
1707
- }
1708
- }
1709
- }
1710
- } else if (provider === "anthropic") {
1711
- const anthropicPrompt = `${ANTHROPIC_USER_PROMPT}
1712
-
1713
- ${ANTHROPIC_JSON_PROMPT3}`;
1714
- const responseParser = (response) => {
1715
- const content = response.content[0];
1716
- if (content.type === "text") {
1717
- const jsonText = content.text.trim();
1718
- try {
1719
- return JSON.parse(jsonText);
1720
- } catch (parseError) {
1721
- throw new Error(`Failed to parse JSON response from Anthropic: ${parseError instanceof Error ? parseError.message : "Unknown error"}`);
1722
- }
1723
- } else {
1724
- throw new Error("Unexpected response type from Anthropic");
1725
- }
1726
- };
1727
- analysisResult = await processStoryboardWithAnthropic(
1728
- imageUrl,
1729
- anthropicPrompt,
1730
- {
1731
- apiKey: anthropicKey,
1732
- model: finalModel,
1733
- responseParser,
1734
- imageSubmissionMode,
1735
- imageDownloadOptions
2162
+ console.log("\u{1F4E4} Uploading translated VTT to S3-compatible storage...");
2163
+ const s3Client = new import_client_s32.S3Client({
2164
+ region: s3Region,
2165
+ endpoint: s3Endpoint,
2166
+ credentials: {
2167
+ accessKeyId: s3AccessKeyId,
2168
+ secretAccessKey: s3SecretAccessKey
2169
+ },
2170
+ forcePathStyle: true
2171
+ // Often needed for non-AWS S3 services
2172
+ });
2173
+ const vttKey = `translations/${assetId}/${fromLanguageCode}-to-${toLanguageCode}-${Date.now()}.vtt`;
2174
+ let presignedUrl;
2175
+ try {
2176
+ const upload = new import_lib_storage2.Upload({
2177
+ client: s3Client,
2178
+ params: {
2179
+ Bucket: s3Bucket,
2180
+ Key: vttKey,
2181
+ Body: translatedVtt,
2182
+ ContentType: "text/vtt"
1736
2183
  }
1737
- );
1738
- } else {
1739
- throw new Error(`Unsupported provider: ${provider}`);
2184
+ });
2185
+ await upload.done();
2186
+ console.log(`\u2705 VTT uploaded successfully to: ${vttKey}`);
2187
+ const getObjectCommand = new import_client_s32.GetObjectCommand({
2188
+ Bucket: s3Bucket,
2189
+ Key: vttKey
2190
+ });
2191
+ presignedUrl = await (0, import_s3_request_presigner2.getSignedUrl)(s3Client, getObjectCommand, {
2192
+ expiresIn: 3600
2193
+ // 1 hour
2194
+ });
2195
+ console.log(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
2196
+ } catch (error) {
2197
+ throw new Error(`Failed to upload VTT to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
1740
2198
  }
1741
- if (!analysisResult) {
1742
- throw new Error("No analysis result received from AI provider");
2199
+ console.log("\u{1F4F9} Adding translated track to Mux asset...");
2200
+ let uploadedTrackId;
2201
+ try {
2202
+ const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(toLanguageCode) || toLanguageCode.toUpperCase();
2203
+ const trackName = `${languageName} (auto-translated)`;
2204
+ const trackResponse = await clients.mux.video.assets.createTrack(assetId, {
2205
+ type: "text",
2206
+ text_type: "subtitles",
2207
+ language_code: toLanguageCode,
2208
+ name: trackName,
2209
+ url: presignedUrl
2210
+ });
2211
+ uploadedTrackId = trackResponse.id;
2212
+ console.log(`\u2705 Track added to Mux asset with ID: ${uploadedTrackId}`);
2213
+ console.log(`\u{1F4CB} Track name: "${trackName}"`);
2214
+ } catch (error) {
2215
+ console.warn(`\u26A0\uFE0F Failed to add track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
2216
+ console.log("\u{1F517} You can manually add the track using this presigned URL:");
2217
+ console.log(presignedUrl);
1743
2218
  }
1744
2219
  return {
1745
2220
  assetId,
1746
- hasBurnedInCaptions: analysisResult.hasBurnedInCaptions ?? false,
1747
- confidence: analysisResult.confidence ?? 0,
1748
- detectedLanguage: analysisResult.detectedLanguage ?? null,
1749
- storyboardUrl: imageUrl
2221
+ sourceLanguageCode: fromLanguageCode,
2222
+ targetLanguageCode: toLanguageCode,
2223
+ originalVtt: vttContent,
2224
+ translatedVtt,
2225
+ uploadedTrackId,
2226
+ presignedUrl
1750
2227
  };
1751
2228
  }
1752
2229
 
1753
2230
  // src/index.ts
1754
2231
  var version = "0.1.0";
1755
-
1756
- export { generateChapters, getModerationScores, getSummaryAndTags, hasBurnedInCaptions, translateAudio, translateCaptions, version };
1757
- //# sourceMappingURL=index.js.map
2232
+ // Annotate the CommonJS export names for ESM import in node:
2233
+ 0 && (module.exports = {
2234
+ primitives,
2235
+ version,
2236
+ workflows
2237
+ });
1758
2238
  //# sourceMappingURL=index.js.map