@mux/ai 0.1.6 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,194 +1,25 @@
1
- "use strict";
2
- var __create = Object.create;
3
- var __defProp = Object.defineProperty;
4
- var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
- var __getOwnPropNames = Object.getOwnPropertyNames;
6
- var __getProtoOf = Object.getPrototypeOf;
7
- var __hasOwnProp = Object.prototype.hasOwnProperty;
8
- var __export = (target, all) => {
9
- for (var name in all)
10
- __defProp(target, name, { get: all[name], enumerable: true });
11
- };
12
- var __copyProps = (to, from, except, desc) => {
13
- if (from && typeof from === "object" || typeof from === "function") {
14
- for (let key of __getOwnPropNames(from))
15
- if (!__hasOwnProp.call(to, key) && key !== except)
16
- __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
- }
18
- return to;
19
- };
20
- var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
- // If the importer is in node compatibility mode or this is not an ESM
22
- // file that has been converted to a CommonJS file using a Babel-
23
- // compatible transform (i.e. "__esModule" has not been set), then set
24
- // "default" to the CommonJS "module.exports" for node compatibility.
25
- isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
- mod
27
- ));
28
- var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
-
30
- // src/workflows/index.ts
31
- var workflows_exports = {};
32
- __export(workflows_exports, {
33
- SUMMARY_KEYWORD_LIMIT: () => SUMMARY_KEYWORD_LIMIT,
34
- burnedInCaptionsSchema: () => burnedInCaptionsSchema,
35
- chapterSchema: () => chapterSchema,
36
- chaptersSchema: () => chaptersSchema,
37
- generateChapters: () => generateChapters,
38
- generateVideoEmbeddings: () => generateVideoEmbeddings,
39
- getModerationScores: () => getModerationScores,
40
- getSummaryAndTags: () => getSummaryAndTags,
41
- hasBurnedInCaptions: () => hasBurnedInCaptions,
42
- summarySchema: () => summarySchema,
43
- translateAudio: () => translateAudio,
44
- translateCaptions: () => translateCaptions,
45
- translationSchema: () => translationSchema
46
- });
47
- module.exports = __toCommonJS(workflows_exports);
48
-
49
- // src/workflows/burned-in-captions.ts
50
- var import_ai = require("ai");
51
-
52
- // node_modules/dedent/dist/dedent.mjs
53
- function ownKeys(object, enumerableOnly) {
54
- var keys = Object.keys(object);
55
- if (Object.getOwnPropertySymbols) {
56
- var symbols = Object.getOwnPropertySymbols(object);
57
- enumerableOnly && (symbols = symbols.filter(function(sym) {
58
- return Object.getOwnPropertyDescriptor(object, sym).enumerable;
59
- })), keys.push.apply(keys, symbols);
60
- }
61
- return keys;
62
- }
63
- function _objectSpread(target) {
64
- for (var i = 1; i < arguments.length; i++) {
65
- var source = null != arguments[i] ? arguments[i] : {};
66
- i % 2 ? ownKeys(Object(source), true).forEach(function(key) {
67
- _defineProperty(target, key, source[key]);
68
- }) : Object.getOwnPropertyDescriptors ? Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)) : ownKeys(Object(source)).forEach(function(key) {
69
- Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key));
70
- });
71
- }
72
- return target;
73
- }
74
- function _defineProperty(obj, key, value) {
75
- key = _toPropertyKey(key);
76
- if (key in obj) {
77
- Object.defineProperty(obj, key, { value, enumerable: true, configurable: true, writable: true });
78
- } else {
79
- obj[key] = value;
80
- }
81
- return obj;
82
- }
83
- function _toPropertyKey(arg) {
84
- var key = _toPrimitive(arg, "string");
85
- return typeof key === "symbol" ? key : String(key);
86
- }
87
- function _toPrimitive(input, hint) {
88
- if (typeof input !== "object" || input === null) return input;
89
- var prim = input[Symbol.toPrimitive];
90
- if (prim !== void 0) {
91
- var res = prim.call(input, hint || "default");
92
- if (typeof res !== "object") return res;
93
- throw new TypeError("@@toPrimitive must return a primitive value.");
94
- }
95
- return (hint === "string" ? String : Number)(input);
96
- }
97
- var dedent = createDedent({});
98
- var dedent_default = dedent;
99
- function createDedent(options) {
100
- dedent2.withOptions = (newOptions) => createDedent(_objectSpread(_objectSpread({}, options), newOptions));
101
- return dedent2;
102
- function dedent2(strings, ...values) {
103
- const raw = typeof strings === "string" ? [strings] : strings.raw;
104
- const {
105
- alignValues = false,
106
- escapeSpecialCharacters = Array.isArray(strings),
107
- trimWhitespace = true
108
- } = options;
109
- let result = "";
110
- for (let i = 0; i < raw.length; i++) {
111
- let next = raw[i];
112
- if (escapeSpecialCharacters) {
113
- next = next.replace(/\\\n[ \t]*/g, "").replace(/\\`/g, "`").replace(/\\\$/g, "$").replace(/\\\{/g, "{");
114
- }
115
- result += next;
116
- if (i < values.length) {
117
- const value = alignValues ? alignValue(values[i], result) : values[i];
118
- result += value;
119
- }
120
- }
121
- const lines = result.split("\n");
122
- let mindent = null;
123
- for (const l of lines) {
124
- const m = l.match(/^(\s+)\S+/);
125
- if (m) {
126
- const indent = m[1].length;
127
- if (!mindent) {
128
- mindent = indent;
129
- } else {
130
- mindent = Math.min(mindent, indent);
131
- }
132
- }
133
- }
134
- if (mindent !== null) {
135
- const m = mindent;
136
- result = lines.map((l) => l[0] === " " || l[0] === " " ? l.slice(m) : l).join("\n");
137
- }
138
- if (trimWhitespace) {
139
- result = result.trim();
140
- }
141
- if (escapeSpecialCharacters) {
142
- result = result.replace(/\\n/g, "\n");
143
- }
144
- return result;
145
- }
146
- }
147
- function alignValue(value, precedingText) {
148
- if (typeof value !== "string" || !value.includes("\n")) {
149
- return value;
150
- }
151
- const currentLine = precedingText.slice(precedingText.lastIndexOf("\n") + 1);
152
- const indentMatch = currentLine.match(/^(\s+)/);
153
- if (indentMatch) {
154
- const indent = indentMatch[1];
155
- return value.replace(/\n/g, `
156
- ${indent}`);
157
- }
158
- return value;
159
- }
160
-
161
1
  // src/workflows/burned-in-captions.ts
162
- var import_zod2 = require("zod");
163
-
164
- // src/lib/client-factory.ts
165
- var import_mux_node = __toESM(require("@mux/mux-node"));
2
+ import { generateObject } from "ai";
3
+ import dedent from "dedent";
4
+ import { z as z2 } from "zod";
166
5
 
167
6
  // src/env.ts
168
- var import_node_path = __toESM(require("path"));
169
- var import_dotenv = require("dotenv");
170
- var import_dotenv_expand = require("dotenv-expand");
171
- var import_zod = require("zod");
172
- (0, import_dotenv_expand.expand)((0, import_dotenv.config)({
173
- path: import_node_path.default.resolve(
174
- process.cwd(),
175
- process.env.NODE_ENV === "test" ? ".env.test" : ".env"
176
- )
177
- }));
7
+ import { z } from "zod";
8
+ import "dotenv/config";
178
9
  function optionalString(description, message) {
179
- return import_zod.z.preprocess(
10
+ return z.preprocess(
180
11
  (value) => typeof value === "string" && value.trim().length === 0 ? void 0 : value,
181
- import_zod.z.string().trim().min(1, message).optional()
12
+ z.string().trim().min(1, message).optional()
182
13
  ).describe(description);
183
14
  }
184
15
  function requiredString(description, message) {
185
- return import_zod.z.preprocess(
16
+ return z.preprocess(
186
17
  (value) => typeof value === "string" ? value.trim().length > 0 ? value.trim() : void 0 : value,
187
- import_zod.z.string().trim().min(1, message)
18
+ z.string().trim().min(1, message)
188
19
  ).describe(description);
189
20
  }
190
- var EnvSchema = import_zod.z.object({
191
- NODE_ENV: import_zod.z.string().default("development").describe("Runtime environment."),
21
+ var EnvSchema = z.object({
22
+ NODE_ENV: z.string().default("development").describe("Runtime environment."),
192
23
  MUX_TOKEN_ID: requiredString("Mux access token ID.", "Required to access Mux APIs"),
193
24
  MUX_TOKEN_SECRET: requiredString("Mux access token secret.", "Required to access Mux APIs"),
194
25
  MUX_SIGNING_KEY: optionalString("Mux signing key ID for signed playback URLs.", "Used to sign playback URLs"),
@@ -217,12 +48,12 @@ var env = parseEnv();
217
48
  var env_default = env;
218
49
 
219
50
  // src/lib/providers.ts
220
- var import_anthropic = require("@ai-sdk/anthropic");
221
- var import_google = require("@ai-sdk/google");
222
- var import_openai = require("@ai-sdk/openai");
51
+ import { createAnthropic } from "@ai-sdk/anthropic";
52
+ import { createGoogleGenerativeAI } from "@ai-sdk/google";
53
+ import { createOpenAI } from "@ai-sdk/openai";
223
54
  var DEFAULT_LANGUAGE_MODELS = {
224
- openai: "gpt-5-mini",
225
- anthropic: "claude-haiku-4-5",
55
+ openai: "gpt-5.1",
56
+ anthropic: "claude-sonnet-4-5",
226
57
  google: "gemini-2.5-flash"
227
58
  };
228
59
  var DEFAULT_EMBEDDING_MODELS = {
@@ -235,6 +66,52 @@ function requireEnv(value, name) {
235
66
  }
236
67
  return value;
237
68
  }
69
+ function createLanguageModelFromConfig(provider, modelId, credentials) {
70
+ switch (provider) {
71
+ case "openai": {
72
+ const apiKey = credentials.openaiApiKey;
73
+ requireEnv(apiKey, "OPENAI_API_KEY");
74
+ const openai = createOpenAI({ apiKey });
75
+ return openai(modelId);
76
+ }
77
+ case "anthropic": {
78
+ const apiKey = credentials.anthropicApiKey;
79
+ requireEnv(apiKey, "ANTHROPIC_API_KEY");
80
+ const anthropic = createAnthropic({ apiKey });
81
+ return anthropic(modelId);
82
+ }
83
+ case "google": {
84
+ const apiKey = credentials.googleApiKey;
85
+ requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
86
+ const google = createGoogleGenerativeAI({ apiKey });
87
+ return google(modelId);
88
+ }
89
+ default: {
90
+ const exhaustiveCheck = provider;
91
+ throw new Error(`Unsupported provider: ${exhaustiveCheck}`);
92
+ }
93
+ }
94
+ }
95
+ function createEmbeddingModelFromConfig(provider, modelId, credentials) {
96
+ switch (provider) {
97
+ case "openai": {
98
+ const apiKey = credentials.openaiApiKey;
99
+ requireEnv(apiKey, "OPENAI_API_KEY");
100
+ const openai = createOpenAI({ apiKey });
101
+ return openai.embedding(modelId);
102
+ }
103
+ case "google": {
104
+ const apiKey = credentials.googleApiKey;
105
+ requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
106
+ const google = createGoogleGenerativeAI({ apiKey });
107
+ return google.textEmbeddingModel(modelId);
108
+ }
109
+ default: {
110
+ const exhaustiveCheck = provider;
111
+ throw new Error(`Unsupported embedding provider: ${exhaustiveCheck}`);
112
+ }
113
+ }
114
+ }
238
115
  function resolveLanguageModel(options = {}) {
239
116
  const provider = options.provider || "openai";
240
117
  const modelId = options.model || DEFAULT_LANGUAGE_MODELS[provider];
@@ -242,7 +119,7 @@ function resolveLanguageModel(options = {}) {
242
119
  case "openai": {
243
120
  const apiKey = options.openaiApiKey ?? env_default.OPENAI_API_KEY;
244
121
  requireEnv(apiKey, "OPENAI_API_KEY");
245
- const openai = (0, import_openai.createOpenAI)({
122
+ const openai = createOpenAI({
246
123
  apiKey
247
124
  });
248
125
  return {
@@ -254,7 +131,7 @@ function resolveLanguageModel(options = {}) {
254
131
  case "anthropic": {
255
132
  const apiKey = options.anthropicApiKey ?? env_default.ANTHROPIC_API_KEY;
256
133
  requireEnv(apiKey, "ANTHROPIC_API_KEY");
257
- const anthropic = (0, import_anthropic.createAnthropic)({
134
+ const anthropic = createAnthropic({
258
135
  apiKey
259
136
  });
260
137
  return {
@@ -266,7 +143,7 @@ function resolveLanguageModel(options = {}) {
266
143
  case "google": {
267
144
  const apiKey = options.googleApiKey ?? env_default.GOOGLE_GENERATIVE_AI_API_KEY;
268
145
  requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
269
- const google = (0, import_google.createGoogleGenerativeAI)({
146
+ const google = createGoogleGenerativeAI({
270
147
  apiKey
271
148
  });
272
149
  return {
@@ -288,7 +165,7 @@ function resolveEmbeddingModel(options = {}) {
288
165
  case "openai": {
289
166
  const apiKey = options.openaiApiKey ?? env_default.OPENAI_API_KEY;
290
167
  requireEnv(apiKey, "OPENAI_API_KEY");
291
- const openai = (0, import_openai.createOpenAI)({
168
+ const openai = createOpenAI({
292
169
  apiKey
293
170
  });
294
171
  return {
@@ -300,7 +177,7 @@ function resolveEmbeddingModel(options = {}) {
300
177
  case "google": {
301
178
  const apiKey = options.googleApiKey ?? env_default.GOOGLE_GENERATIVE_AI_API_KEY;
302
179
  requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
303
- const google = (0, import_google.createGoogleGenerativeAI)({
180
+ const google = createGoogleGenerativeAI({
304
181
  apiKey
305
182
  });
306
183
  return {
@@ -317,7 +194,8 @@ function resolveEmbeddingModel(options = {}) {
317
194
  }
318
195
 
319
196
  // src/lib/client-factory.ts
320
- function validateCredentials(options, requiredProvider) {
197
+ async function validateCredentials(options, requiredProvider) {
198
+ "use step";
321
199
  const muxTokenId = options.muxTokenId ?? env_default.MUX_TOKEN_ID;
322
200
  const muxTokenSecret = options.muxTokenSecret ?? env_default.MUX_TOKEN_SECRET;
323
201
  const openaiApiKey = options.openaiApiKey ?? env_default.OPENAI_API_KEY;
@@ -351,32 +229,24 @@ function validateCredentials(options, requiredProvider) {
351
229
  googleApiKey
352
230
  };
353
231
  }
354
- function createMuxClient(credentials) {
355
- if (!credentials.muxTokenId || !credentials.muxTokenSecret) {
356
- throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
357
- }
358
- return new import_mux_node.default({
359
- tokenId: credentials.muxTokenId,
360
- tokenSecret: credentials.muxTokenSecret
361
- });
362
- }
363
- function createWorkflowClients(options, provider) {
232
+ async function createWorkflowConfig(options, provider) {
233
+ "use step";
364
234
  const providerToUse = provider || options.provider || "openai";
365
- const credentials = validateCredentials(options, providerToUse);
366
- const languageModel = resolveLanguageModel({
235
+ const credentials = await validateCredentials(options, providerToUse);
236
+ const resolved = resolveLanguageModel({
367
237
  ...options,
368
238
  provider: providerToUse
369
239
  });
370
240
  return {
371
- mux: createMuxClient(credentials),
372
- languageModel,
373
- credentials
241
+ credentials,
242
+ provider: resolved.provider,
243
+ modelId: resolved.modelId
374
244
  };
375
245
  }
376
246
 
377
247
  // src/lib/image-download.ts
378
- var import_node_buffer = require("buffer");
379
- var import_p_retry = __toESM(require("p-retry"));
248
+ import { Buffer } from "buffer";
249
+ import pRetry, { AbortError } from "p-retry";
380
250
  var DEFAULT_OPTIONS = {
381
251
  timeout: 1e4,
382
252
  retries: 3,
@@ -385,9 +255,10 @@ var DEFAULT_OPTIONS = {
385
255
  exponentialBackoff: true
386
256
  };
387
257
  async function downloadImageAsBase64(url, options = {}) {
258
+ "use step";
388
259
  const opts = { ...DEFAULT_OPTIONS, ...options };
389
260
  let attemptCount = 0;
390
- return (0, import_p_retry.default)(
261
+ return pRetry(
391
262
  async () => {
392
263
  attemptCount++;
393
264
  const controller = new AbortController();
@@ -402,18 +273,18 @@ async function downloadImageAsBase64(url, options = {}) {
402
273
  clearTimeout(timeoutId);
403
274
  if (!response.ok) {
404
275
  if (response.status >= 400 && response.status < 500 && response.status !== 429) {
405
- throw new import_p_retry.AbortError(`HTTP ${response.status}: ${response.statusText}`);
276
+ throw new AbortError(`HTTP ${response.status}: ${response.statusText}`);
406
277
  }
407
278
  throw new Error(`HTTP ${response.status}: ${response.statusText}`);
408
279
  }
409
280
  const contentType = response.headers.get("content-type");
410
281
  if (!contentType?.startsWith("image/")) {
411
- throw new import_p_retry.AbortError(`Invalid content type: ${contentType}. Expected image/*`);
282
+ throw new AbortError(`Invalid content type: ${contentType}. Expected image/*`);
412
283
  }
413
284
  const arrayBuffer = await response.arrayBuffer();
414
- const buffer = import_node_buffer.Buffer.from(arrayBuffer);
285
+ const buffer = Buffer.from(arrayBuffer);
415
286
  if (buffer.length === 0) {
416
- throw new import_p_retry.AbortError("Downloaded image is empty");
287
+ throw new AbortError("Downloaded image is empty");
417
288
  }
418
289
  const base64Data = `data:${contentType};base64,${buffer.toString("base64")}`;
419
290
  return {
@@ -426,7 +297,7 @@ async function downloadImageAsBase64(url, options = {}) {
426
297
  };
427
298
  } catch (error) {
428
299
  clearTimeout(timeoutId);
429
- if (error instanceof import_p_retry.AbortError) {
300
+ if (error instanceof AbortError) {
430
301
  throw error;
431
302
  }
432
303
  if (error instanceof Error) {
@@ -455,6 +326,7 @@ async function downloadImageAsBase64(url, options = {}) {
455
326
  );
456
327
  }
457
328
  async function downloadImagesAsBase64(urls, options = {}, maxConcurrent = 5) {
329
+ "use step";
458
330
  const results = [];
459
331
  for (let i = 0; i < urls.length; i += maxConcurrent) {
460
332
  const batch = urls.slice(i, i + maxConcurrent);
@@ -466,6 +338,7 @@ async function downloadImagesAsBase64(urls, options = {}, maxConcurrent = 5) {
466
338
  }
467
339
 
468
340
  // src/lib/mux-assets.ts
341
+ import Mux from "@mux/mux-node";
469
342
  function getPlaybackId(asset) {
470
343
  const playbackIds = asset.playback_ids || [];
471
344
  const publicPlaybackId = playbackIds.find((pid) => pid.policy === "public");
@@ -480,7 +353,12 @@ function getPlaybackId(asset) {
480
353
  "No public or signed playback ID found for this asset. A public or signed playback ID is required. DRM playback IDs are not currently supported."
481
354
  );
482
355
  }
483
- async function getPlaybackIdForAsset(mux, assetId) {
356
+ async function getPlaybackIdForAsset(credentials, assetId) {
357
+ "use step";
358
+ const mux = new Mux({
359
+ tokenId: credentials.muxTokenId,
360
+ tokenSecret: credentials.muxTokenSecret
361
+ });
484
362
  const asset = await mux.video.assets.retrieve(assetId);
485
363
  const { id: playbackId, policy } = getPlaybackId(asset);
486
364
  return { asset, playbackId, policy };
@@ -519,8 +397,8 @@ function resolveSection(defaultSection, override) {
519
397
  }
520
398
  return override;
521
399
  }
522
- function createPromptBuilder(config2) {
523
- const { template, sectionOrder } = config2;
400
+ function createPromptBuilder(config) {
401
+ const { template, sectionOrder } = config;
524
402
  const getSection = (section, override) => {
525
403
  const resolved = resolveSection(template[section], override);
526
404
  return renderSection(resolved);
@@ -561,17 +439,18 @@ function createToneSection(instruction) {
561
439
  }
562
440
 
563
441
  // src/lib/url-signing.ts
564
- var import_mux_node2 = __toESM(require("@mux/mux-node"));
565
- function resolveSigningContext(config2) {
566
- const keyId = config2.muxSigningKey ?? env_default.MUX_SIGNING_KEY;
567
- const keySecret = config2.muxPrivateKey ?? env_default.MUX_PRIVATE_KEY;
442
+ import Mux2 from "@mux/mux-node";
443
+ async function resolveSigningContext(config) {
444
+ "use step";
445
+ const keyId = config.muxSigningKey ?? env_default.MUX_SIGNING_KEY;
446
+ const keySecret = config.muxPrivateKey ?? env_default.MUX_PRIVATE_KEY;
568
447
  if (!keyId || !keySecret) {
569
448
  return void 0;
570
449
  }
571
450
  return { keyId, keySecret };
572
451
  }
573
452
  function createSigningClient(context) {
574
- return new import_mux_node2.default({
453
+ return new Mux2({
575
454
  // These are not needed for signing, but the SDK requires them
576
455
  // Using empty strings as we only need the jwt functionality
577
456
  tokenId: env_default.MUX_TOKEN_ID || "",
@@ -581,6 +460,7 @@ function createSigningClient(context) {
581
460
  });
582
461
  }
583
462
  async function signPlaybackId(playbackId, context, type = "video", params) {
463
+ "use step";
584
464
  const client = createSigningClient(context);
585
465
  const stringParams = params ? Object.fromEntries(
586
466
  Object.entries(params).map(([key, value]) => [key, String(value)])
@@ -592,6 +472,7 @@ async function signPlaybackId(playbackId, context, type = "video", params) {
592
472
  });
593
473
  }
594
474
  async function signUrl(url, playbackId, context, type = "video", params) {
475
+ "use step";
595
476
  const token = await signPlaybackId(playbackId, context, type, params);
596
477
  const separator = url.includes("?") ? "&" : "?";
597
478
  return `${url}${separator}token=${token}`;
@@ -600,6 +481,7 @@ async function signUrl(url, playbackId, context, type = "video", params) {
600
481
  // src/primitives/storyboards.ts
601
482
  var DEFAULT_STORYBOARD_WIDTH = 640;
602
483
  async function getStoryboardUrl(playbackId, width = DEFAULT_STORYBOARD_WIDTH, signingContext) {
484
+ "use step";
603
485
  const baseUrl = `https://image.mux.com/${playbackId}/storyboard.png`;
604
486
  if (signingContext) {
605
487
  return signUrl(baseUrl, playbackId, signingContext, "storyboard", { width });
@@ -608,12 +490,12 @@ async function getStoryboardUrl(playbackId, width = DEFAULT_STORYBOARD_WIDTH, si
608
490
  }
609
491
 
610
492
  // src/workflows/burned-in-captions.ts
611
- var burnedInCaptionsSchema = import_zod2.z.object({
612
- hasBurnedInCaptions: import_zod2.z.boolean(),
613
- confidence: import_zod2.z.number().min(0).max(1),
614
- detectedLanguage: import_zod2.z.string().nullable()
493
+ var burnedInCaptionsSchema = z2.object({
494
+ hasBurnedInCaptions: z2.boolean(),
495
+ confidence: z2.number().min(0).max(1),
496
+ detectedLanguage: z2.string().nullable()
615
497
  });
616
- var SYSTEM_PROMPT = dedent_default`
498
+ var SYSTEM_PROMPT = dedent`
617
499
  <role>
618
500
  You are an expert at analyzing video frames to detect burned-in captions (also called open captions or hardcoded subtitles).
619
501
  These are text overlays that are permanently embedded in the video image, common on TikTok, Instagram Reels, and other social media platforms.
@@ -656,14 +538,14 @@ var burnedInCaptionsPromptBuilder = createPromptBuilder({
656
538
  template: {
657
539
  task: {
658
540
  tag: "task",
659
- content: dedent_default`
541
+ content: dedent`
660
542
  Analyze the provided video storyboard to detect burned-in captions (hardcoded subtitles).
661
543
  Count frames with text vs no text, note position consistency and whether text changes across frames.
662
544
  Decide if captions exist, with confidence (0.0-1.0) and detected language if any.`
663
545
  },
664
546
  analysisSteps: {
665
547
  tag: "analysis_steps",
666
- content: dedent_default`
548
+ content: dedent`
667
549
  1. COUNT how many frames contain text overlays vs. how many don't
668
550
  2. Check if text appears in consistent positions across multiple frames
669
551
  3. Verify text changes content between frames (indicating dialogue/narration)
@@ -672,7 +554,7 @@ var burnedInCaptionsPromptBuilder = createPromptBuilder({
672
554
  },
673
555
  positiveIndicators: {
674
556
  tag: "classify_as_captions",
675
- content: dedent_default`
557
+ content: dedent`
676
558
  ONLY classify as burned-in captions if:
677
559
  - Text appears in multiple frames (not just 1-2 end frames)
678
560
  - Text positioning is consistent across those frames
@@ -681,7 +563,7 @@ var burnedInCaptionsPromptBuilder = createPromptBuilder({
681
563
  },
682
564
  negativeIndicators: {
683
565
  tag: "not_captions",
684
- content: dedent_default`
566
+ content: dedent`
685
567
  DO NOT classify as burned-in captions:
686
568
  - Marketing taglines appearing only in final 1-2 frames
687
569
  - Single words or phrases that don't change between frames
@@ -696,65 +578,97 @@ function buildUserPrompt(promptOverrides) {
696
578
  return burnedInCaptionsPromptBuilder.build(promptOverrides);
697
579
  }
698
580
  var DEFAULT_PROVIDER = "openai";
581
+ async function fetchImageAsBase64(imageUrl, imageDownloadOptions) {
582
+ "use step";
583
+ const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
584
+ return downloadResult.base64Data;
585
+ }
586
+ async function analyzeStoryboard({
587
+ imageDataUrl,
588
+ provider,
589
+ modelId,
590
+ credentials,
591
+ userPrompt,
592
+ systemPrompt
593
+ }) {
594
+ "use step";
595
+ const model = createLanguageModelFromConfig(
596
+ provider,
597
+ modelId,
598
+ credentials
599
+ );
600
+ const response = await generateObject({
601
+ model,
602
+ schema: burnedInCaptionsSchema,
603
+ experimental_telemetry: { isEnabled: true },
604
+ messages: [
605
+ {
606
+ role: "system",
607
+ content: systemPrompt
608
+ },
609
+ {
610
+ role: "user",
611
+ content: [
612
+ { type: "text", text: userPrompt },
613
+ { type: "image", image: imageDataUrl }
614
+ ]
615
+ }
616
+ ]
617
+ });
618
+ return {
619
+ result: response.object,
620
+ usage: {
621
+ inputTokens: response.usage.inputTokens,
622
+ outputTokens: response.usage.outputTokens,
623
+ totalTokens: response.usage.totalTokens,
624
+ reasoningTokens: response.usage.reasoningTokens,
625
+ cachedInputTokens: response.usage.cachedInputTokens
626
+ }
627
+ };
628
+ }
699
629
  async function hasBurnedInCaptions(assetId, options = {}) {
630
+ "use workflow";
700
631
  const {
701
632
  provider = DEFAULT_PROVIDER,
702
633
  model,
703
634
  imageSubmissionMode = "url",
704
635
  imageDownloadOptions,
705
636
  promptOverrides,
706
- ...config2
637
+ ...config
707
638
  } = options;
708
639
  const userPrompt = buildUserPrompt(promptOverrides);
709
- const clients = createWorkflowClients(
710
- { ...config2, model },
640
+ const workflowConfig = await createWorkflowConfig(
641
+ { ...config, model },
711
642
  provider
712
643
  );
713
- const { playbackId, policy } = await getPlaybackIdForAsset(clients.mux, assetId);
714
- const signingContext = resolveSigningContext(options);
644
+ const { playbackId, policy } = await getPlaybackIdForAsset(workflowConfig.credentials, assetId);
645
+ const signingContext = await resolveSigningContext(options);
715
646
  if (policy === "signed" && !signingContext) {
716
647
  throw new Error(
717
648
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
718
649
  );
719
650
  }
720
651
  const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed" ? signingContext : void 0);
721
- const analyzeStoryboard = async (imageDataUrl) => {
722
- const response = await (0, import_ai.generateObject)({
723
- model: clients.languageModel.model,
724
- schema: burnedInCaptionsSchema,
725
- abortSignal: options.abortSignal,
726
- experimental_telemetry: { isEnabled: true },
727
- messages: [
728
- {
729
- role: "system",
730
- content: SYSTEM_PROMPT
731
- },
732
- {
733
- role: "user",
734
- content: [
735
- { type: "text", text: userPrompt },
736
- { type: "image", image: imageDataUrl }
737
- ]
738
- }
739
- ]
740
- });
741
- return {
742
- result: response.object,
743
- usage: {
744
- inputTokens: response.usage.inputTokens,
745
- outputTokens: response.usage.outputTokens,
746
- totalTokens: response.usage.totalTokens,
747
- reasoningTokens: response.usage.reasoningTokens,
748
- cachedInputTokens: response.usage.cachedInputTokens
749
- }
750
- };
751
- };
752
652
  let analysisResponse;
753
653
  if (imageSubmissionMode === "base64") {
754
- const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
755
- analysisResponse = await analyzeStoryboard(downloadResult.base64Data);
654
+ const base64Data = await fetchImageAsBase64(imageUrl, imageDownloadOptions);
655
+ analysisResponse = await analyzeStoryboard({
656
+ imageDataUrl: base64Data,
657
+ provider: workflowConfig.provider,
658
+ modelId: workflowConfig.modelId,
659
+ credentials: workflowConfig.credentials,
660
+ userPrompt,
661
+ systemPrompt: SYSTEM_PROMPT
662
+ });
756
663
  } else {
757
- analysisResponse = await analyzeStoryboard(imageUrl);
664
+ analysisResponse = await analyzeStoryboard({
665
+ imageDataUrl: imageUrl,
666
+ provider: workflowConfig.provider,
667
+ modelId: workflowConfig.modelId,
668
+ credentials: workflowConfig.credentials,
669
+ userPrompt,
670
+ systemPrompt: SYSTEM_PROMPT
671
+ });
758
672
  }
759
673
  if (!analysisResponse.result) {
760
674
  throw new Error("No analysis result received from AI provider");
@@ -770,8 +684,8 @@ async function hasBurnedInCaptions(assetId, options = {}) {
770
684
  }
771
685
 
772
686
  // src/workflows/chapters.ts
773
- var import_ai2 = require("ai");
774
- var import_zod3 = require("zod");
687
+ import { generateObject as generateObject2 } from "ai";
688
+ import { z as z3 } from "zod";
775
689
 
776
690
  // src/lib/retry.ts
777
691
  var DEFAULT_RETRY_OPTIONS = {
@@ -803,11 +717,11 @@ async function withRetry(fn, {
803
717
  if (isLastAttempt || !shouldRetry(lastError, attempt + 1)) {
804
718
  throw lastError;
805
719
  }
806
- const delay2 = calculateDelay(attempt + 1, baseDelay, maxDelay);
720
+ const delay = calculateDelay(attempt + 1, baseDelay, maxDelay);
807
721
  console.warn(
808
- `Attempt ${attempt + 1} failed: ${lastError.message}. Retrying in ${Math.round(delay2)}ms...`
722
+ `Attempt ${attempt + 1} failed: ${lastError.message}. Retrying in ${Math.round(delay)}ms...`
809
723
  );
810
- await new Promise((resolve) => setTimeout(resolve, delay2));
724
+ await new Promise((resolve) => setTimeout(resolve, delay));
811
725
  }
812
726
  }
813
727
  throw lastError || new Error("Retry failed with unknown error");
@@ -922,6 +836,7 @@ function parseVTTCues(vttContent) {
922
836
  return cues;
923
837
  }
924
838
  async function buildTranscriptUrl(playbackId, trackId, signingContext) {
839
+ "use step";
925
840
  const baseUrl = `https://stream.mux.com/${playbackId}/text/${trackId}.vtt`;
926
841
  if (signingContext) {
927
842
  return signUrl(baseUrl, playbackId, signingContext, "video");
@@ -929,6 +844,7 @@ async function buildTranscriptUrl(playbackId, trackId, signingContext) {
929
844
  return baseUrl;
930
845
  }
931
846
  async function fetchTranscriptForAsset(asset, playbackId, options = {}) {
847
+ "use step";
932
848
  const { languageCode, cleanTranscript = true, signingContext } = options;
933
849
  const track = findCaptionTrack(asset, languageCode);
934
850
  if (!track) {
@@ -953,14 +869,44 @@ async function fetchTranscriptForAsset(asset, playbackId, options = {}) {
953
869
  }
954
870
 
955
871
  // src/workflows/chapters.ts
956
- var chapterSchema = import_zod3.z.object({
957
- startTime: import_zod3.z.number(),
958
- title: import_zod3.z.string()
872
+ var chapterSchema = z3.object({
873
+ startTime: z3.number(),
874
+ title: z3.string()
959
875
  });
960
- var chaptersSchema = import_zod3.z.object({
961
- chapters: import_zod3.z.array(chapterSchema)
876
+ var chaptersSchema = z3.object({
877
+ chapters: z3.array(chapterSchema)
962
878
  });
963
- var DEFAULT_PROVIDER2 = "openai";
879
+ async function generateChaptersWithAI({
880
+ provider,
881
+ modelId,
882
+ credentials,
883
+ timestampedTranscript,
884
+ systemPrompt
885
+ }) {
886
+ "use step";
887
+ const model = createLanguageModelFromConfig(
888
+ provider,
889
+ modelId,
890
+ credentials
891
+ );
892
+ const response = await withRetry(
893
+ () => generateObject2({
894
+ model,
895
+ schema: chaptersSchema,
896
+ messages: [
897
+ {
898
+ role: "system",
899
+ content: systemPrompt
900
+ },
901
+ {
902
+ role: "user",
903
+ content: timestampedTranscript
904
+ }
905
+ ]
906
+ })
907
+ );
908
+ return response.object;
909
+ }
964
910
  var SYSTEM_PROMPT2 = `Your role is to segment the following captions into chunked chapters, summarising each chapter with a title.
965
911
 
966
912
  Analyze the transcript and create logical chapter breaks based on topic changes, major transitions, or distinct sections of content. Each chapter should represent a meaningful segment of the video.
@@ -982,10 +928,11 @@ Important rules:
982
928
  - Do not include any text before or after the JSON
983
929
  - The JSON must be valid and parseable`;
984
930
  async function generateChapters(assetId, languageCode, options = {}) {
985
- const { provider = DEFAULT_PROVIDER2, model, abortSignal } = options;
986
- const clients = createWorkflowClients({ ...options, model }, provider);
987
- const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(clients.mux, assetId);
988
- const signingContext = resolveSigningContext(options);
931
+ "use workflow";
932
+ const { provider = "openai", model } = options;
933
+ const config = await createWorkflowConfig({ ...options, model }, provider);
934
+ const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(config.credentials, assetId);
935
+ const signingContext = await resolveSigningContext(options);
989
936
  if (policy === "signed" && !signingContext) {
990
937
  throw new Error(
991
938
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
@@ -1009,24 +956,13 @@ async function generateChapters(assetId, languageCode, options = {}) {
1009
956
  }
1010
957
  let chaptersData = null;
1011
958
  try {
1012
- const response = await withRetry(
1013
- () => (0, import_ai2.generateObject)({
1014
- model: clients.languageModel.model,
1015
- schema: chaptersSchema,
1016
- abortSignal,
1017
- messages: [
1018
- {
1019
- role: "system",
1020
- content: SYSTEM_PROMPT2
1021
- },
1022
- {
1023
- role: "user",
1024
- content: timestampedTranscript
1025
- }
1026
- ]
1027
- })
1028
- );
1029
- chaptersData = response.object;
959
+ chaptersData = await generateChaptersWithAI({
960
+ provider: config.provider,
961
+ modelId: config.modelId,
962
+ credentials: config.credentials,
963
+ timestampedTranscript,
964
+ systemPrompt: SYSTEM_PROMPT2
965
+ });
1030
966
  } catch (error) {
1031
967
  throw new Error(
1032
968
  `Failed to generate chapters with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
@@ -1050,7 +986,7 @@ async function generateChapters(assetId, languageCode, options = {}) {
1050
986
  }
1051
987
 
1052
988
  // src/workflows/embeddings.ts
1053
- var import_ai3 = require("ai");
989
+ import { embed } from "ai";
1054
990
 
1055
991
  // src/primitives/text-chunking.ts
1056
992
  function estimateTokenCount(text) {
@@ -1138,13 +1074,6 @@ function chunkText(text, strategy) {
1138
1074
  }
1139
1075
 
1140
1076
  // src/workflows/embeddings.ts
1141
- var DEFAULT_PROVIDER3 = "openai";
1142
- var DEFAULT_CHUNKING_STRATEGY = {
1143
- type: "token",
1144
- maxTokens: 500,
1145
- overlap: 100
1146
- };
1147
- var DEFAULT_BATCH_SIZE = 5;
1148
1077
  function averageEmbeddings(embeddings) {
1149
1078
  if (embeddings.length === 0) {
1150
1079
  return [];
@@ -1161,51 +1090,46 @@ function averageEmbeddings(embeddings) {
1161
1090
  }
1162
1091
  return averaged;
1163
1092
  }
1164
- async function generateChunkEmbeddings(chunks, model, batchSize, abortSignal) {
1165
- const results = [];
1166
- for (let i = 0; i < chunks.length; i += batchSize) {
1167
- const batch = chunks.slice(i, i + batchSize);
1168
- const batchResults = await Promise.all(
1169
- batch.map(async (chunk) => {
1170
- const response = await withRetry(
1171
- () => (0, import_ai3.embed)({
1172
- model,
1173
- value: chunk.text,
1174
- abortSignal
1175
- })
1176
- );
1177
- return {
1178
- chunkId: chunk.id,
1179
- embedding: response.embedding,
1180
- metadata: {
1181
- startTime: chunk.startTime,
1182
- endTime: chunk.endTime,
1183
- tokenCount: chunk.tokenCount
1184
- }
1185
- };
1186
- })
1187
- );
1188
- results.push(...batchResults);
1189
- }
1190
- return results;
1093
+ async function generateSingleChunkEmbedding({
1094
+ chunk,
1095
+ provider,
1096
+ modelId,
1097
+ credentials
1098
+ }) {
1099
+ "use step";
1100
+ const model = createEmbeddingModelFromConfig(provider, modelId, credentials);
1101
+ const response = await withRetry(
1102
+ () => embed({
1103
+ model,
1104
+ value: chunk.text
1105
+ })
1106
+ );
1107
+ return {
1108
+ chunkId: chunk.id,
1109
+ embedding: response.embedding,
1110
+ metadata: {
1111
+ startTime: chunk.startTime,
1112
+ endTime: chunk.endTime,
1113
+ tokenCount: chunk.tokenCount
1114
+ }
1115
+ };
1191
1116
  }
1192
1117
  async function generateVideoEmbeddings(assetId, options = {}) {
1118
+ "use workflow";
1193
1119
  const {
1194
- provider = DEFAULT_PROVIDER3,
1120
+ provider = "openai",
1195
1121
  model,
1196
1122
  languageCode,
1197
- chunkingStrategy = DEFAULT_CHUNKING_STRATEGY,
1198
- batchSize = DEFAULT_BATCH_SIZE,
1199
- abortSignal
1123
+ chunkingStrategy = { type: "token", maxTokens: 500, overlap: 100 },
1124
+ batchSize = 5
1200
1125
  } = options;
1201
- const credentials = validateCredentials(options, provider === "google" ? "google" : "openai");
1202
- const muxClient = createMuxClient(credentials);
1126
+ const credentials = await validateCredentials(options, provider === "google" ? "google" : "openai");
1203
1127
  const embeddingModel = resolveEmbeddingModel({ ...options, provider, model });
1204
1128
  const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(
1205
- muxClient,
1129
+ credentials,
1206
1130
  assetId
1207
1131
  );
1208
- const signingContext = resolveSigningContext(options);
1132
+ const signingContext = await resolveSigningContext(options);
1209
1133
  if (policy === "signed" && !signingContext) {
1210
1134
  throw new Error(
1211
1135
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
@@ -1235,14 +1159,22 @@ async function generateVideoEmbeddings(assetId, options = {}) {
1235
1159
  if (chunks.length === 0) {
1236
1160
  throw new Error("No chunks generated from transcript");
1237
1161
  }
1238
- let chunkEmbeddings;
1162
+ const chunkEmbeddings = [];
1239
1163
  try {
1240
- chunkEmbeddings = await generateChunkEmbeddings(
1241
- chunks,
1242
- embeddingModel.model,
1243
- batchSize,
1244
- abortSignal
1245
- );
1164
+ for (let i = 0; i < chunks.length; i += batchSize) {
1165
+ const batch = chunks.slice(i, i + batchSize);
1166
+ const batchResults = await Promise.all(
1167
+ batch.map(
1168
+ (chunk) => generateSingleChunkEmbedding({
1169
+ chunk,
1170
+ provider: embeddingModel.provider,
1171
+ modelId: embeddingModel.modelId,
1172
+ credentials
1173
+ })
1174
+ )
1175
+ );
1176
+ chunkEmbeddings.push(...batchResults);
1177
+ }
1246
1178
  } catch (error) {
1247
1179
  throw new Error(
1248
1180
  `Failed to generate embeddings with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
@@ -1271,6 +1203,7 @@ async function generateVideoEmbeddings(assetId, options = {}) {
1271
1203
 
1272
1204
  // src/primitives/thumbnails.ts
1273
1205
  async function getThumbnailUrls(playbackId, duration, options = {}) {
1206
+ "use step";
1274
1207
  const { interval = 10, width = 640, signingContext } = options;
1275
1208
  const timestamps = [];
1276
1209
  if (duration <= 50) {
@@ -1298,7 +1231,7 @@ var DEFAULT_THRESHOLDS = {
1298
1231
  sexual: 0.7,
1299
1232
  violence: 0.8
1300
1233
  };
1301
- var DEFAULT_PROVIDER4 = "openai";
1234
+ var DEFAULT_PROVIDER2 = "openai";
1302
1235
  var HIVE_ENDPOINT = "https://api.thehive.ai/api/v2/task/sync";
1303
1236
  var HIVE_SEXUAL_CATEGORIES = [
1304
1237
  "general_nsfw",
@@ -1336,6 +1269,7 @@ var HIVE_VIOLENCE_CATEGORIES = [
1336
1269
  "garm_death_injury_or_military_conflict"
1337
1270
  ];
1338
1271
  async function processConcurrently(items, processor, maxConcurrent = 5) {
1272
+ "use step";
1339
1273
  const results = [];
1340
1274
  for (let i = 0; i < items.length; i += maxConcurrent) {
1341
1275
  const batch = items.slice(i, i + maxConcurrent);
@@ -1346,19 +1280,21 @@ async function processConcurrently(items, processor, maxConcurrent = 5) {
1346
1280
  return results;
1347
1281
  }
1348
1282
  async function requestOpenAIModeration(imageUrls, apiKey, model, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
1283
+ "use step";
1349
1284
  const targetUrls = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map(
1350
- (img) => ({ url: img.url, image: img.base64Data })
1351
- ) : imageUrls.map((url) => ({ url, image: url }));
1285
+ (img) => ({ url: img.url, image: img.base64Data, apiKey, model })
1286
+ ) : imageUrls.map((url) => ({ url, image: url, apiKey, model }));
1352
1287
  const moderate = async (entry) => {
1288
+ "use step";
1353
1289
  try {
1354
1290
  const res = await fetch("https://api.openai.com/v1/moderations", {
1355
1291
  method: "POST",
1356
1292
  headers: {
1357
1293
  "Content-Type": "application/json",
1358
- "Authorization": `Bearer ${apiKey}`
1294
+ "Authorization": `Bearer ${entry.apiKey}`
1359
1295
  },
1360
1296
  body: JSON.stringify({
1361
- model,
1297
+ model: entry.model,
1362
1298
  input: [
1363
1299
  {
1364
1300
  type: "image_url",
@@ -1402,6 +1338,7 @@ function getHiveCategoryScores(classes, categoryNames) {
1402
1338
  return Math.max(...scores, 0);
1403
1339
  }
1404
1340
  async function requestHiveModeration(imageUrls, apiKey, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
1341
+ "use step";
1405
1342
  const targets = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map((img) => ({
1406
1343
  url: img.url,
1407
1344
  source: {
@@ -1414,6 +1351,7 @@ async function requestHiveModeration(imageUrls, apiKey, maxConcurrent = 5, submi
1414
1351
  source: { kind: "url", value: url }
1415
1352
  }));
1416
1353
  const moderate = async (entry) => {
1354
+ "use step";
1417
1355
  try {
1418
1356
  const formData = new FormData();
1419
1357
  if (entry.source.kind === "url") {
@@ -1459,8 +1397,9 @@ async function requestHiveModeration(imageUrls, apiKey, maxConcurrent = 5, submi
1459
1397
  return processConcurrently(targets, moderate, maxConcurrent);
1460
1398
  }
1461
1399
  async function getModerationScores(assetId, options = {}) {
1400
+ "use workflow";
1462
1401
  const {
1463
- provider = DEFAULT_PROVIDER4,
1402
+ provider = DEFAULT_PROVIDER2,
1464
1403
  model = provider === "openai" ? "omni-moderation-latest" : void 0,
1465
1404
  thresholds = DEFAULT_THRESHOLDS,
1466
1405
  thumbnailInterval = 10,
@@ -1469,11 +1408,10 @@ async function getModerationScores(assetId, options = {}) {
1469
1408
  imageSubmissionMode = "url",
1470
1409
  imageDownloadOptions
1471
1410
  } = options;
1472
- const credentials = validateCredentials(options, provider === "openai" ? "openai" : void 0);
1473
- const muxClient = createMuxClient(credentials);
1474
- const { asset, playbackId, policy } = await getPlaybackIdForAsset(muxClient, assetId);
1411
+ const credentials = await validateCredentials(options, provider === "openai" ? "openai" : void 0);
1412
+ const { asset, playbackId, policy } = await getPlaybackIdForAsset(credentials, assetId);
1475
1413
  const duration = asset.duration || 0;
1476
- const signingContext = resolveSigningContext(options);
1414
+ const signingContext = await resolveSigningContext(options);
1477
1415
  if (policy === "signed" && !signingContext) {
1478
1416
  throw new Error(
1479
1417
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
@@ -1529,17 +1467,18 @@ async function getModerationScores(assetId, options = {}) {
1529
1467
  }
1530
1468
 
1531
1469
  // src/workflows/summarization.ts
1532
- var import_ai4 = require("ai");
1533
- var import_zod4 = require("zod");
1470
+ import { generateObject as generateObject3 } from "ai";
1471
+ import dedent2 from "dedent";
1472
+ import { z as z4 } from "zod";
1534
1473
  var SUMMARY_KEYWORD_LIMIT = 10;
1535
- var summarySchema = import_zod4.z.object({
1536
- keywords: import_zod4.z.array(import_zod4.z.string()),
1537
- title: import_zod4.z.string(),
1538
- description: import_zod4.z.string()
1474
+ var summarySchema = z4.object({
1475
+ keywords: z4.array(z4.string()),
1476
+ title: z4.string(),
1477
+ description: z4.string()
1539
1478
  });
1540
1479
  var TONE_INSTRUCTIONS = {
1541
1480
  normal: "Provide a clear, straightforward analysis.",
1542
- sassy: "Answer with a sassy, playful attitude and personality.",
1481
+ sassy: "Channel your inner diva! Answer with maximum sass, wit, and playful attitude. Don't hold back - be cheeky, clever, and delightfully snarky. Make it pop!",
1543
1482
  professional: "Provide a professional, executive-level analysis suitable for business reporting."
1544
1483
  };
1545
1484
  var summarizationPromptBuilder = createPromptBuilder({
@@ -1550,7 +1489,7 @@ var summarizationPromptBuilder = createPromptBuilder({
1550
1489
  },
1551
1490
  title: {
1552
1491
  tag: "title_requirements",
1553
- content: dedent_default`
1492
+ content: dedent2`
1554
1493
  A short, compelling headline that immediately communicates the subject or action.
1555
1494
  Aim for brevity - typically under 10 words. Think of how a news headline or video card title would read.
1556
1495
  Start with the primary subject, action, or topic - never begin with "A video of" or similar phrasing.
@@ -1558,7 +1497,7 @@ var summarizationPromptBuilder = createPromptBuilder({
1558
1497
  },
1559
1498
  description: {
1560
1499
  tag: "description_requirements",
1561
- content: dedent_default`
1500
+ content: dedent2`
1562
1501
  A concise summary (2-4 sentences) that describes what happens across the video.
1563
1502
  Cover the main subjects, actions, setting, and any notable progression visible across frames.
1564
1503
  Write in present tense. Be specific about observable details rather than making assumptions.
@@ -1566,7 +1505,7 @@ var summarizationPromptBuilder = createPromptBuilder({
1566
1505
  },
1567
1506
  keywords: {
1568
1507
  tag: "keywords_requirements",
1569
- content: dedent_default`
1508
+ content: dedent2`
1570
1509
  Specific, searchable terms (up to 10) that capture:
1571
1510
  - Primary subjects (people, animals, objects)
1572
1511
  - Actions and activities being performed
@@ -1578,7 +1517,7 @@ var summarizationPromptBuilder = createPromptBuilder({
1578
1517
  },
1579
1518
  qualityGuidelines: {
1580
1519
  tag: "quality_guidelines",
1581
- content: dedent_default`
1520
+ content: dedent2`
1582
1521
  - Examine all frames to understand the full context and progression
1583
1522
  - Be precise: "golden retriever" is better than "dog" when identifiable
1584
1523
  - Capture the narrative: what begins, develops, and concludes
@@ -1587,7 +1526,7 @@ var summarizationPromptBuilder = createPromptBuilder({
1587
1526
  },
1588
1527
  sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
1589
1528
  });
1590
- var SYSTEM_PROMPT3 = dedent_default`
1529
+ var SYSTEM_PROMPT3 = dedent2`
1591
1530
  <role>
1592
1531
  You are a video content analyst specializing in storyboard interpretation and multimodal analysis.
1593
1532
  </role>
@@ -1619,7 +1558,29 @@ var SYSTEM_PROMPT3 = dedent_default`
1619
1558
  - Only describe what is clearly observable in the frames or explicitly stated in the transcript
1620
1559
  - Do not fabricate details or make unsupported assumptions
1621
1560
  - Return structured data matching the requested schema
1622
- </constraints>`;
1561
+ </constraints>
1562
+
1563
+ <tone_guidance>
1564
+ Pay special attention to the <tone> section and lean heavily into those instructions.
1565
+ Adapt your entire analysis and writing style to match the specified tone - this should influence
1566
+ your word choice, personality, formality level, and overall presentation of the content.
1567
+ The tone instructions are not suggestions but core requirements for how you should express yourself.
1568
+ </tone_guidance>
1569
+
1570
+ <language_guidelines>
1571
+ AVOID these meta-descriptive phrases that reference the medium rather than the content:
1572
+ - "The image shows..." / "The storyboard shows..."
1573
+ - "In this video..." / "This video features..."
1574
+ - "The frames depict..." / "The footage shows..."
1575
+ - "We can see..." / "You can see..."
1576
+ - "The clip shows..." / "The scene shows..."
1577
+
1578
+ INSTEAD, describe the content directly:
1579
+ - BAD: "The video shows a chef preparing a meal"
1580
+ - GOOD: "A chef prepares a meal in a professional kitchen"
1581
+
1582
+ Write as if describing reality, not describing a recording of reality.
1583
+ </language_guidelines>`;
1623
1584
  function buildUserPrompt2({
1624
1585
  tone,
1625
1586
  transcriptText,
@@ -1633,8 +1594,41 @@ function buildUserPrompt2({
1633
1594
  }
1634
1595
  return summarizationPromptBuilder.buildWithContext(promptOverrides, contextSections);
1635
1596
  }
1636
- var DEFAULT_PROVIDER5 = "openai";
1637
- var DEFAULT_TONE = "normal";
1597
+ async function analyzeStoryboard2(imageDataUrl, workflowConfig, userPrompt, systemPrompt) {
1598
+ "use step";
1599
+ const model = createLanguageModelFromConfig(
1600
+ workflowConfig.provider,
1601
+ workflowConfig.modelId,
1602
+ workflowConfig.credentials
1603
+ );
1604
+ const response = await generateObject3({
1605
+ model,
1606
+ schema: summarySchema,
1607
+ messages: [
1608
+ {
1609
+ role: "system",
1610
+ content: systemPrompt
1611
+ },
1612
+ {
1613
+ role: "user",
1614
+ content: [
1615
+ { type: "text", text: userPrompt },
1616
+ { type: "image", image: imageDataUrl }
1617
+ ]
1618
+ }
1619
+ ]
1620
+ });
1621
+ return {
1622
+ result: response.object,
1623
+ usage: {
1624
+ inputTokens: response.usage.inputTokens,
1625
+ outputTokens: response.usage.outputTokens,
1626
+ totalTokens: response.usage.totalTokens,
1627
+ reasoningTokens: response.usage.reasoningTokens,
1628
+ cachedInputTokens: response.usage.cachedInputTokens
1629
+ }
1630
+ };
1631
+ }
1638
1632
  function normalizeKeywords(keywords) {
1639
1633
  if (!Array.isArray(keywords) || keywords.length === 0) {
1640
1634
  return [];
@@ -1659,23 +1653,24 @@ function normalizeKeywords(keywords) {
1659
1653
  return normalized;
1660
1654
  }
1661
1655
  async function getSummaryAndTags(assetId, options) {
1656
+ "use workflow";
1662
1657
  const {
1663
- provider = DEFAULT_PROVIDER5,
1658
+ provider = "openai",
1664
1659
  model,
1665
- tone = DEFAULT_TONE,
1660
+ tone = "normal",
1666
1661
  includeTranscript = true,
1667
1662
  cleanTranscript = true,
1668
1663
  imageSubmissionMode = "url",
1669
1664
  imageDownloadOptions,
1670
- abortSignal,
1665
+ abortSignal: _abortSignal,
1671
1666
  promptOverrides
1672
1667
  } = options ?? {};
1673
- const clients = createWorkflowClients(
1668
+ const config = await createWorkflowConfig(
1674
1669
  { ...options, model },
1675
1670
  provider
1676
1671
  );
1677
- const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(clients.mux, assetId);
1678
- const signingContext = resolveSigningContext(options ?? {});
1672
+ const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(config.credentials, assetId);
1673
+ const signingContext = await resolveSigningContext(options ?? {});
1679
1674
  if (policy === "signed" && !signingContext) {
1680
1675
  throw new Error(
1681
1676
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
@@ -1692,66 +1687,212 @@ async function getSummaryAndTags(assetId, options) {
1692
1687
  promptOverrides
1693
1688
  });
1694
1689
  const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed" ? signingContext : void 0);
1695
- const analyzeStoryboard = async (imageDataUrl) => {
1696
- const response = await (0, import_ai4.generateObject)({
1697
- model: clients.languageModel.model,
1698
- schema: summarySchema,
1699
- abortSignal,
1700
- messages: [
1701
- {
1702
- role: "system",
1703
- content: SYSTEM_PROMPT3
1704
- },
1705
- {
1706
- role: "user",
1707
- content: [
1708
- { type: "text", text: userPrompt },
1709
- { type: "image", image: imageDataUrl }
1710
- ]
1711
- }
1712
- ]
1713
- });
1714
- return response.object;
1715
- };
1716
- let aiAnalysis = null;
1690
+ let analysisResponse;
1717
1691
  try {
1718
1692
  if (imageSubmissionMode === "base64") {
1719
1693
  const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
1720
- aiAnalysis = await analyzeStoryboard(downloadResult.base64Data);
1694
+ analysisResponse = await analyzeStoryboard2(
1695
+ downloadResult.base64Data,
1696
+ config,
1697
+ userPrompt,
1698
+ SYSTEM_PROMPT3
1699
+ );
1721
1700
  } else {
1722
- aiAnalysis = await withRetry(() => analyzeStoryboard(imageUrl));
1701
+ analysisResponse = await withRetry(() => analyzeStoryboard2(imageUrl, config, userPrompt, SYSTEM_PROMPT3));
1723
1702
  }
1724
1703
  } catch (error) {
1725
1704
  throw new Error(
1726
1705
  `Failed to analyze video content with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
1727
1706
  );
1728
1707
  }
1729
- if (!aiAnalysis) {
1708
+ if (!analysisResponse.result) {
1730
1709
  throw new Error(`Failed to analyze video content for asset ${assetId}`);
1731
1710
  }
1732
- if (!aiAnalysis.title) {
1711
+ if (!analysisResponse.result.title) {
1733
1712
  throw new Error(`Failed to generate title for asset ${assetId}`);
1734
1713
  }
1735
- if (!aiAnalysis.description) {
1714
+ if (!analysisResponse.result.description) {
1736
1715
  throw new Error(`Failed to generate description for asset ${assetId}`);
1737
1716
  }
1738
1717
  return {
1739
1718
  assetId,
1740
- title: aiAnalysis.title,
1741
- description: aiAnalysis.description,
1742
- tags: normalizeKeywords(aiAnalysis.keywords),
1743
- storyboardUrl: imageUrl
1719
+ title: analysisResponse.result.title,
1720
+ description: analysisResponse.result.description,
1721
+ tags: normalizeKeywords(analysisResponse.result.keywords),
1722
+ storyboardUrl: imageUrl,
1723
+ usage: analysisResponse.usage,
1724
+ transcriptText: transcriptText || void 0
1744
1725
  };
1745
1726
  }
1746
1727
 
1747
1728
  // src/workflows/translate-audio.ts
1748
- var import_client_s3 = require("@aws-sdk/client-s3");
1749
- var import_lib_storage = require("@aws-sdk/lib-storage");
1750
- var import_s3_request_presigner = require("@aws-sdk/s3-request-presigner");
1751
- var import_mux_node3 = __toESM(require("@mux/mux-node"));
1729
+ import Mux3 from "@mux/mux-node";
1730
+
1731
+ // src/lib/language-codes.ts
1732
+ var ISO639_1_TO_3 = {
1733
+ // Major world languages
1734
+ en: "eng",
1735
+ // English
1736
+ es: "spa",
1737
+ // Spanish
1738
+ fr: "fra",
1739
+ // French
1740
+ de: "deu",
1741
+ // German
1742
+ it: "ita",
1743
+ // Italian
1744
+ pt: "por",
1745
+ // Portuguese
1746
+ ru: "rus",
1747
+ // Russian
1748
+ zh: "zho",
1749
+ // Chinese
1750
+ ja: "jpn",
1751
+ // Japanese
1752
+ ko: "kor",
1753
+ // Korean
1754
+ ar: "ara",
1755
+ // Arabic
1756
+ hi: "hin",
1757
+ // Hindi
1758
+ // European languages
1759
+ nl: "nld",
1760
+ // Dutch
1761
+ pl: "pol",
1762
+ // Polish
1763
+ sv: "swe",
1764
+ // Swedish
1765
+ da: "dan",
1766
+ // Danish
1767
+ no: "nor",
1768
+ // Norwegian
1769
+ fi: "fin",
1770
+ // Finnish
1771
+ el: "ell",
1772
+ // Greek
1773
+ cs: "ces",
1774
+ // Czech
1775
+ hu: "hun",
1776
+ // Hungarian
1777
+ ro: "ron",
1778
+ // Romanian
1779
+ bg: "bul",
1780
+ // Bulgarian
1781
+ hr: "hrv",
1782
+ // Croatian
1783
+ sk: "slk",
1784
+ // Slovak
1785
+ sl: "slv",
1786
+ // Slovenian
1787
+ uk: "ukr",
1788
+ // Ukrainian
1789
+ tr: "tur",
1790
+ // Turkish
1791
+ // Asian languages
1792
+ th: "tha",
1793
+ // Thai
1794
+ vi: "vie",
1795
+ // Vietnamese
1796
+ id: "ind",
1797
+ // Indonesian
1798
+ ms: "msa",
1799
+ // Malay
1800
+ tl: "tgl",
1801
+ // Tagalog/Filipino
1802
+ // Other languages
1803
+ he: "heb",
1804
+ // Hebrew
1805
+ fa: "fas",
1806
+ // Persian/Farsi
1807
+ bn: "ben",
1808
+ // Bengali
1809
+ ta: "tam",
1810
+ // Tamil
1811
+ te: "tel",
1812
+ // Telugu
1813
+ mr: "mar",
1814
+ // Marathi
1815
+ gu: "guj",
1816
+ // Gujarati
1817
+ kn: "kan",
1818
+ // Kannada
1819
+ ml: "mal",
1820
+ // Malayalam
1821
+ pa: "pan",
1822
+ // Punjabi
1823
+ ur: "urd",
1824
+ // Urdu
1825
+ sw: "swa",
1826
+ // Swahili
1827
+ af: "afr",
1828
+ // Afrikaans
1829
+ ca: "cat",
1830
+ // Catalan
1831
+ eu: "eus",
1832
+ // Basque
1833
+ gl: "glg",
1834
+ // Galician
1835
+ is: "isl",
1836
+ // Icelandic
1837
+ et: "est",
1838
+ // Estonian
1839
+ lv: "lav",
1840
+ // Latvian
1841
+ lt: "lit"
1842
+ // Lithuanian
1843
+ };
1844
+ var ISO639_3_TO_1 = Object.fromEntries(
1845
+ Object.entries(ISO639_1_TO_3).map(([iso1, iso3]) => [iso3, iso1])
1846
+ );
1847
+ function toISO639_3(code) {
1848
+ const normalized = code.toLowerCase().trim();
1849
+ if (normalized.length === 3) {
1850
+ return normalized;
1851
+ }
1852
+ return ISO639_1_TO_3[normalized] ?? normalized;
1853
+ }
1854
+ function toISO639_1(code) {
1855
+ const normalized = code.toLowerCase().trim();
1856
+ if (normalized.length === 2) {
1857
+ return normalized;
1858
+ }
1859
+ return ISO639_3_TO_1[normalized] ?? normalized;
1860
+ }
1861
+ function getLanguageCodePair(code) {
1862
+ const normalized = code.toLowerCase().trim();
1863
+ if (normalized.length === 2) {
1864
+ return {
1865
+ iso639_1: normalized,
1866
+ iso639_3: toISO639_3(normalized)
1867
+ };
1868
+ } else if (normalized.length === 3) {
1869
+ return {
1870
+ iso639_1: toISO639_1(normalized),
1871
+ iso639_3: normalized
1872
+ };
1873
+ }
1874
+ return {
1875
+ iso639_1: normalized,
1876
+ iso639_3: normalized
1877
+ };
1878
+ }
1879
+ function getLanguageName(code) {
1880
+ const iso639_1 = toISO639_1(code);
1881
+ try {
1882
+ const displayNames = new Intl.DisplayNames(["en"], { type: "language" });
1883
+ return displayNames.of(iso639_1) ?? code.toUpperCase();
1884
+ } catch {
1885
+ return code.toUpperCase();
1886
+ }
1887
+ }
1888
+
1889
+ // src/workflows/translate-audio.ts
1752
1890
  var STATIC_RENDITION_POLL_INTERVAL_MS = 5e3;
1753
1891
  var STATIC_RENDITION_MAX_ATTEMPTS = 36;
1754
- var delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
1892
+ async function sleep(ms) {
1893
+ "use step";
1894
+ await new Promise((resolve) => setTimeout(resolve, ms));
1895
+ }
1755
1896
  function getReadyAudioStaticRendition(asset) {
1756
1897
  const files = asset.static_renditions?.files;
1757
1898
  if (!files || files.length === 0) {
@@ -1762,19 +1903,21 @@ function getReadyAudioStaticRendition(asset) {
1762
1903
  );
1763
1904
  }
1764
1905
  var hasReadyAudioStaticRendition = (asset) => Boolean(getReadyAudioStaticRendition(asset));
1765
- async function requestStaticRenditionCreation(muxClient, assetId) {
1766
- console.log("\u{1F4FC} Requesting static rendition from Mux...");
1906
+ async function requestStaticRenditionCreation(credentials, assetId) {
1907
+ "use step";
1908
+ const mux = new Mux3({
1909
+ tokenId: credentials.muxTokenId,
1910
+ tokenSecret: credentials.muxTokenSecret
1911
+ });
1767
1912
  try {
1768
- await muxClient.video.assets.createStaticRendition(assetId, {
1913
+ await mux.video.assets.createStaticRendition(assetId, {
1769
1914
  resolution: "audio-only"
1770
1915
  });
1771
- console.log("\u{1F4FC} Static rendition request accepted by Mux.");
1772
1916
  } catch (error) {
1773
1917
  const statusCode = error?.status ?? error?.statusCode;
1774
1918
  const messages = error?.error?.messages;
1775
1919
  const alreadyDefined = messages?.some((message2) => message2.toLowerCase().includes("already defined")) ?? error?.message?.toLowerCase().includes("already defined");
1776
1920
  if (statusCode === 409 || alreadyDefined) {
1777
- console.log("\u2139\uFE0F Static rendition already requested. Waiting for it to finish...");
1778
1921
  return;
1779
1922
  }
1780
1923
  const message = error instanceof Error ? error.message : "Unknown error";
@@ -1783,31 +1926,34 @@ async function requestStaticRenditionCreation(muxClient, assetId) {
1783
1926
  }
1784
1927
  async function waitForAudioStaticRendition({
1785
1928
  assetId,
1786
- muxClient,
1929
+ credentials,
1787
1930
  initialAsset
1788
1931
  }) {
1932
+ "use step";
1933
+ const mux = new Mux3({
1934
+ tokenId: credentials.muxTokenId,
1935
+ tokenSecret: credentials.muxTokenSecret
1936
+ });
1789
1937
  let currentAsset = initialAsset;
1790
1938
  if (hasReadyAudioStaticRendition(currentAsset)) {
1791
1939
  return currentAsset;
1792
1940
  }
1793
1941
  const status = currentAsset.static_renditions?.status ?? "not_requested";
1794
1942
  if (status === "not_requested" || status === void 0) {
1795
- await requestStaticRenditionCreation(muxClient, assetId);
1943
+ await requestStaticRenditionCreation(credentials, assetId);
1796
1944
  } else if (status === "errored") {
1797
- console.log("\u26A0\uFE0F Previous static rendition request errored. Creating a new one...");
1798
- await requestStaticRenditionCreation(muxClient, assetId);
1945
+ await requestStaticRenditionCreation(credentials, assetId);
1799
1946
  } else {
1800
- console.log(`\u2139\uFE0F Static rendition already ${status}. Waiting for it to finish...`);
1947
+ console.warn(`\u2139\uFE0F Static rendition already ${status}. Waiting for it to finish...`);
1801
1948
  }
1802
1949
  for (let attempt = 1; attempt <= STATIC_RENDITION_MAX_ATTEMPTS; attempt++) {
1803
- await delay(STATIC_RENDITION_POLL_INTERVAL_MS);
1804
- currentAsset = await muxClient.video.assets.retrieve(assetId);
1950
+ await sleep(STATIC_RENDITION_POLL_INTERVAL_MS);
1951
+ currentAsset = await mux.video.assets.retrieve(assetId);
1805
1952
  if (hasReadyAudioStaticRendition(currentAsset)) {
1806
- console.log("\u2705 Audio static rendition is ready!");
1807
1953
  return currentAsset;
1808
1954
  }
1809
1955
  const currentStatus = currentAsset.static_renditions?.status || "unknown";
1810
- console.log(
1956
+ console.warn(
1811
1957
  `\u231B Waiting for static rendition (attempt ${attempt}/${STATIC_RENDITION_MAX_ATTEMPTS}) \u2192 ${currentStatus}`
1812
1958
  );
1813
1959
  if (currentStatus === "errored") {
@@ -1820,55 +1966,180 @@ async function waitForAudioStaticRendition({
1820
1966
  "Timed out waiting for the static rendition to become ready. Please try again in a moment."
1821
1967
  );
1822
1968
  }
1969
+ async function fetchAudioFromMux(audioUrl) {
1970
+ "use step";
1971
+ const audioResponse = await fetch(audioUrl);
1972
+ if (!audioResponse.ok) {
1973
+ throw new Error(`Failed to fetch audio file: ${audioResponse.statusText}`);
1974
+ }
1975
+ return audioResponse.arrayBuffer();
1976
+ }
1977
+ async function createElevenLabsDubbingJob({
1978
+ audioBuffer,
1979
+ assetId,
1980
+ elevenLabsLangCode,
1981
+ elevenLabsApiKey,
1982
+ numSpeakers
1983
+ }) {
1984
+ "use step";
1985
+ const audioBlob = new Blob([audioBuffer], { type: "audio/mp4" });
1986
+ const formData = new FormData();
1987
+ formData.append("file", audioBlob);
1988
+ formData.append("target_lang", elevenLabsLangCode);
1989
+ formData.append("num_speakers", numSpeakers.toString());
1990
+ formData.append("name", `Mux Asset ${assetId} - auto to ${elevenLabsLangCode}`);
1991
+ const dubbingResponse = await fetch("https://api.elevenlabs.io/v1/dubbing", {
1992
+ method: "POST",
1993
+ headers: {
1994
+ "xi-api-key": elevenLabsApiKey
1995
+ },
1996
+ body: formData
1997
+ });
1998
+ if (!dubbingResponse.ok) {
1999
+ throw new Error(`ElevenLabs API error: ${dubbingResponse.statusText}`);
2000
+ }
2001
+ const dubbingData = await dubbingResponse.json();
2002
+ return dubbingData.dubbing_id;
2003
+ }
2004
+ async function checkElevenLabsDubbingStatus({
2005
+ dubbingId,
2006
+ elevenLabsApiKey
2007
+ }) {
2008
+ "use step";
2009
+ const statusResponse = await fetch(`https://api.elevenlabs.io/v1/dubbing/${dubbingId}`, {
2010
+ headers: {
2011
+ "xi-api-key": elevenLabsApiKey
2012
+ }
2013
+ });
2014
+ if (!statusResponse.ok) {
2015
+ throw new Error(`Status check failed: ${statusResponse.statusText}`);
2016
+ }
2017
+ const statusData = await statusResponse.json();
2018
+ return {
2019
+ status: statusData.status,
2020
+ targetLanguages: statusData.target_languages ?? []
2021
+ };
2022
+ }
2023
+ async function downloadDubbedAudioFromElevenLabs({
2024
+ dubbingId,
2025
+ languageCode,
2026
+ elevenLabsApiKey
2027
+ }) {
2028
+ "use step";
2029
+ const audioUrl = `https://api.elevenlabs.io/v1/dubbing/${dubbingId}/audio/${languageCode}`;
2030
+ const audioResponse = await fetch(audioUrl, {
2031
+ headers: {
2032
+ "xi-api-key": elevenLabsApiKey
2033
+ }
2034
+ });
2035
+ if (!audioResponse.ok) {
2036
+ throw new Error(`Failed to fetch dubbed audio: ${audioResponse.statusText}`);
2037
+ }
2038
+ return audioResponse.arrayBuffer();
2039
+ }
2040
+ async function uploadDubbedAudioToS3({
2041
+ dubbedAudioBuffer,
2042
+ assetId,
2043
+ toLanguageCode,
2044
+ s3Endpoint,
2045
+ s3Region,
2046
+ s3Bucket,
2047
+ s3AccessKeyId,
2048
+ s3SecretAccessKey
2049
+ }) {
2050
+ "use step";
2051
+ const { S3Client, GetObjectCommand } = await import("@aws-sdk/client-s3");
2052
+ const { Upload } = await import("@aws-sdk/lib-storage");
2053
+ const { getSignedUrl } = await import("@aws-sdk/s3-request-presigner");
2054
+ const s3Client = new S3Client({
2055
+ region: s3Region,
2056
+ endpoint: s3Endpoint,
2057
+ credentials: {
2058
+ accessKeyId: s3AccessKeyId,
2059
+ secretAccessKey: s3SecretAccessKey
2060
+ },
2061
+ forcePathStyle: true
2062
+ });
2063
+ const audioKey = `audio-translations/${assetId}/auto-to-${toLanguageCode}-${Date.now()}.m4a`;
2064
+ const upload = new Upload({
2065
+ client: s3Client,
2066
+ params: {
2067
+ Bucket: s3Bucket,
2068
+ Key: audioKey,
2069
+ Body: new Uint8Array(dubbedAudioBuffer),
2070
+ ContentType: "audio/mp4"
2071
+ }
2072
+ });
2073
+ await upload.done();
2074
+ const getObjectCommand = new GetObjectCommand({
2075
+ Bucket: s3Bucket,
2076
+ Key: audioKey
2077
+ });
2078
+ const presignedUrl = await getSignedUrl(s3Client, getObjectCommand, {
2079
+ expiresIn: 3600
2080
+ // 1 hour
2081
+ });
2082
+ console.warn(`\u2705 Audio uploaded successfully to: ${audioKey}`);
2083
+ console.warn(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
2084
+ return presignedUrl;
2085
+ }
2086
+ async function createAudioTrackOnMux(credentials, assetId, languageCode, presignedUrl) {
2087
+ "use step";
2088
+ const mux = new Mux3({
2089
+ tokenId: credentials.muxTokenId,
2090
+ tokenSecret: credentials.muxTokenSecret
2091
+ });
2092
+ const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(languageCode) || languageCode.toUpperCase();
2093
+ const trackName = `${languageName} (auto-dubbed)`;
2094
+ const trackResponse = await mux.video.assets.createTrack(assetId, {
2095
+ type: "audio",
2096
+ language_code: languageCode,
2097
+ name: trackName,
2098
+ url: presignedUrl
2099
+ });
2100
+ if (!trackResponse.id) {
2101
+ throw new Error("Failed to create audio track: no track ID returned from Mux");
2102
+ }
2103
+ return trackResponse.id;
2104
+ }
1823
2105
  async function translateAudio(assetId, toLanguageCode, options = {}) {
2106
+ "use workflow";
1824
2107
  const {
1825
2108
  provider = "elevenlabs",
1826
2109
  numSpeakers = 0,
1827
2110
  // 0 = auto-detect
1828
- muxTokenId,
1829
- muxTokenSecret,
1830
2111
  elevenLabsApiKey,
1831
2112
  uploadToMux = true
1832
2113
  } = options;
1833
2114
  if (provider !== "elevenlabs") {
1834
2115
  throw new Error("Only ElevenLabs provider is currently supported for audio translation");
1835
2116
  }
1836
- const muxId = muxTokenId ?? env_default.MUX_TOKEN_ID;
1837
- const muxSecret = muxTokenSecret ?? env_default.MUX_TOKEN_SECRET;
2117
+ const credentials = await validateCredentials(options);
1838
2118
  const elevenLabsKey = elevenLabsApiKey ?? env_default.ELEVENLABS_API_KEY;
1839
2119
  const s3Endpoint = options.s3Endpoint ?? env_default.S3_ENDPOINT;
1840
2120
  const s3Region = options.s3Region ?? env_default.S3_REGION ?? "auto";
1841
2121
  const s3Bucket = options.s3Bucket ?? env_default.S3_BUCKET;
1842
2122
  const s3AccessKeyId = options.s3AccessKeyId ?? env_default.S3_ACCESS_KEY_ID;
1843
2123
  const s3SecretAccessKey = options.s3SecretAccessKey ?? env_default.S3_SECRET_ACCESS_KEY;
1844
- if (!muxId || !muxSecret) {
1845
- throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
1846
- }
1847
2124
  if (!elevenLabsKey) {
1848
2125
  throw new Error("ElevenLabs API key is required. Provide elevenLabsApiKey in options or set ELEVENLABS_API_KEY environment variable.");
1849
2126
  }
1850
2127
  if (uploadToMux && (!s3Endpoint || !s3Bucket || !s3AccessKeyId || !s3SecretAccessKey)) {
1851
2128
  throw new Error("S3 configuration is required for uploading to Mux. Provide s3Endpoint, s3Bucket, s3AccessKeyId, and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables.");
1852
2129
  }
1853
- const mux = new import_mux_node3.default({
1854
- tokenId: muxId,
1855
- tokenSecret: muxSecret
1856
- });
1857
- console.log(`\u{1F3AC} Fetching Mux asset: ${assetId}`);
1858
- const { asset: initialAsset, playbackId, policy } = await getPlaybackIdForAsset(mux, assetId);
1859
- const signingContext = resolveSigningContext(options);
2130
+ const { asset: initialAsset, playbackId, policy } = await getPlaybackIdForAsset(credentials, assetId);
2131
+ const signingContext = await resolveSigningContext(options);
1860
2132
  if (policy === "signed" && !signingContext) {
1861
2133
  throw new Error(
1862
2134
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
1863
2135
  );
1864
2136
  }
1865
- console.log("\u{1F50D} Checking for audio-only static rendition...");
1866
2137
  let currentAsset = initialAsset;
1867
2138
  if (!hasReadyAudioStaticRendition(currentAsset)) {
1868
- console.log("\u274C No ready audio static rendition found. Requesting one now...");
2139
+ console.warn("\u274C No ready audio static rendition found. Requesting one now...");
1869
2140
  currentAsset = await waitForAudioStaticRendition({
1870
2141
  assetId,
1871
- muxClient: mux,
2142
+ credentials,
1872
2143
  initialAsset: currentAsset
1873
2144
  });
1874
2145
  }
@@ -1882,58 +2153,44 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
1882
2153
  if (policy === "signed" && signingContext) {
1883
2154
  audioUrl = await signUrl(audioUrl, playbackId, signingContext, "video");
1884
2155
  }
1885
- console.log(`\u2705 Found audio rendition: ${audioUrl}`);
1886
- console.log(`\u{1F399}\uFE0F Creating ElevenLabs dubbing job (auto-detect \u2192 ${toLanguageCode})`);
2156
+ console.warn("\u{1F399}\uFE0F Fetching audio from Mux...");
2157
+ let audioBuffer;
2158
+ try {
2159
+ audioBuffer = await fetchAudioFromMux(audioUrl);
2160
+ } catch (error) {
2161
+ throw new Error(`Failed to fetch audio from Mux: ${error instanceof Error ? error.message : "Unknown error"}`);
2162
+ }
2163
+ console.warn("\u{1F399}\uFE0F Creating dubbing job in ElevenLabs...");
2164
+ const elevenLabsLangCode = toISO639_3(toLanguageCode);
2165
+ console.warn(`\u{1F50D} Creating dubbing job for asset ${assetId} with language code: ${elevenLabsLangCode}`);
1887
2166
  let dubbingId;
1888
2167
  try {
1889
- const audioResponse = await fetch(audioUrl);
1890
- if (!audioResponse.ok) {
1891
- throw new Error(`Failed to fetch audio file: ${audioResponse.statusText}`);
1892
- }
1893
- const audioBuffer = await audioResponse.arrayBuffer();
1894
- const audioBlob = new Blob([audioBuffer], { type: "audio/mp4" });
1895
- const audioFile = audioBlob;
1896
- const formData = new FormData();
1897
- formData.append("file", audioFile);
1898
- formData.append("target_lang", toLanguageCode);
1899
- formData.append("num_speakers", numSpeakers.toString());
1900
- formData.append("name", `Mux Asset ${assetId} - auto to ${toLanguageCode}`);
1901
- const dubbingResponse = await fetch("https://api.elevenlabs.io/v1/dubbing", {
1902
- method: "POST",
1903
- headers: {
1904
- "xi-api-key": elevenLabsKey
1905
- },
1906
- body: formData
2168
+ dubbingId = await createElevenLabsDubbingJob({
2169
+ audioBuffer,
2170
+ assetId,
2171
+ elevenLabsLangCode,
2172
+ elevenLabsApiKey: elevenLabsKey,
2173
+ numSpeakers
1907
2174
  });
1908
- if (!dubbingResponse.ok) {
1909
- throw new Error(`ElevenLabs API error: ${dubbingResponse.statusText}`);
1910
- }
1911
- const dubbingData = await dubbingResponse.json();
1912
- dubbingId = dubbingData.dubbing_id;
1913
- console.log(`\u2705 Dubbing job created: ${dubbingId}`);
1914
- console.log(`\u23F1\uFE0F Expected duration: ${dubbingData.expected_duration_sec}s`);
2175
+ console.warn(`\u2705 Dubbing job created with ID: ${dubbingId}`);
1915
2176
  } catch (error) {
1916
2177
  throw new Error(`Failed to create ElevenLabs dubbing job: ${error instanceof Error ? error.message : "Unknown error"}`);
1917
2178
  }
1918
- console.log("\u23F3 Waiting for dubbing to complete...");
2179
+ console.warn("\u23F3 Waiting for dubbing to complete...");
1919
2180
  let dubbingStatus = "dubbing";
1920
2181
  let pollAttempts = 0;
1921
2182
  const maxPollAttempts = 180;
2183
+ let targetLanguages = [];
1922
2184
  while (dubbingStatus === "dubbing" && pollAttempts < maxPollAttempts) {
1923
- await new Promise((resolve) => setTimeout(resolve, 1e4));
2185
+ await sleep(1e4);
1924
2186
  pollAttempts++;
1925
2187
  try {
1926
- const statusResponse = await fetch(`https://api.elevenlabs.io/v1/dubbing/${dubbingId}`, {
1927
- headers: {
1928
- "xi-api-key": elevenLabsKey
1929
- }
2188
+ const statusResult = await checkElevenLabsDubbingStatus({
2189
+ dubbingId,
2190
+ elevenLabsApiKey: elevenLabsKey
1930
2191
  });
1931
- if (!statusResponse.ok) {
1932
- throw new Error(`Status check failed: ${statusResponse.statusText}`);
1933
- }
1934
- const statusData = await statusResponse.json();
1935
- dubbingStatus = statusData.status;
1936
- console.log(`\u{1F4CA} Status check ${pollAttempts}: ${dubbingStatus}`);
2192
+ dubbingStatus = statusResult.status;
2193
+ targetLanguages = statusResult.targetLanguages;
1937
2194
  if (dubbingStatus === "failed") {
1938
2195
  throw new Error("ElevenLabs dubbing job failed");
1939
2196
  }
@@ -1944,89 +2201,77 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
1944
2201
  if (dubbingStatus !== "dubbed") {
1945
2202
  throw new Error(`Dubbing job timed out or failed. Final status: ${dubbingStatus}`);
1946
2203
  }
1947
- console.log("\u2705 Dubbing completed successfully!");
2204
+ console.warn("\u2705 Dubbing completed successfully!");
1948
2205
  if (!uploadToMux) {
2206
+ const targetLanguage2 = getLanguageCodePair(toLanguageCode);
1949
2207
  return {
1950
2208
  assetId,
1951
- targetLanguageCode: toLanguageCode,
2209
+ targetLanguageCode: targetLanguage2.iso639_1,
2210
+ targetLanguage: targetLanguage2,
1952
2211
  dubbingId
1953
2212
  };
1954
2213
  }
1955
- console.log("\u{1F4E5} Downloading dubbed audio from ElevenLabs...");
2214
+ console.warn("\u{1F4E5} Downloading dubbed audio from ElevenLabs...");
1956
2215
  let dubbedAudioBuffer;
1957
2216
  try {
1958
- const audioUrl2 = `https://api.elevenlabs.io/v1/dubbing/${dubbingId}/audio/${toLanguageCode}`;
1959
- const audioResponse = await fetch(audioUrl2, {
1960
- headers: {
1961
- "xi-api-key": elevenLabsKey
1962
- }
1963
- });
1964
- if (!audioResponse.ok) {
1965
- throw new Error(`Failed to fetch dubbed audio: ${audioResponse.statusText}`);
2217
+ const requestedLangCode = toISO639_3(toLanguageCode);
2218
+ let downloadLangCode = targetLanguages.find(
2219
+ (lang) => lang === requestedLangCode
2220
+ ) ?? targetLanguages.find(
2221
+ (lang) => lang.toLowerCase() === requestedLangCode.toLowerCase()
2222
+ );
2223
+ if (!downloadLangCode && targetLanguages.length > 0) {
2224
+ downloadLangCode = targetLanguages[0];
2225
+ console.warn(`\u26A0\uFE0F Requested language "${requestedLangCode}" not found in target_languages. Using "${downloadLangCode}" instead.`);
2226
+ }
2227
+ if (!downloadLangCode) {
2228
+ downloadLangCode = requestedLangCode;
2229
+ console.warn(`\u26A0\uFE0F No target_languages available from ElevenLabs status. Using requested language code: ${requestedLangCode}`);
1966
2230
  }
1967
- dubbedAudioBuffer = await audioResponse.arrayBuffer();
1968
- console.log(`\u2705 Downloaded dubbed audio (${dubbedAudioBuffer.byteLength} bytes)`);
2231
+ dubbedAudioBuffer = await downloadDubbedAudioFromElevenLabs({
2232
+ dubbingId,
2233
+ languageCode: downloadLangCode,
2234
+ elevenLabsApiKey: elevenLabsKey
2235
+ });
2236
+ console.warn("\u2705 Dubbed audio downloaded successfully!");
1969
2237
  } catch (error) {
1970
2238
  throw new Error(`Failed to download dubbed audio: ${error instanceof Error ? error.message : "Unknown error"}`);
1971
2239
  }
1972
- console.log("\u{1F4E4} Uploading dubbed audio to S3-compatible storage...");
1973
- const s3Client = new import_client_s3.S3Client({
1974
- region: s3Region,
1975
- endpoint: s3Endpoint,
1976
- credentials: {
1977
- accessKeyId: s3AccessKeyId,
1978
- secretAccessKey: s3SecretAccessKey
1979
- },
1980
- forcePathStyle: true
1981
- });
1982
- const audioKey = `audio-translations/${assetId}/auto-to-${toLanguageCode}-${Date.now()}.m4a`;
2240
+ console.warn("\u{1F4E4} Uploading dubbed audio to S3-compatible storage...");
1983
2241
  let presignedUrl;
1984
2242
  try {
1985
- const upload = new import_lib_storage.Upload({
1986
- client: s3Client,
1987
- params: {
1988
- Bucket: s3Bucket,
1989
- Key: audioKey,
1990
- Body: new Uint8Array(dubbedAudioBuffer),
1991
- ContentType: "audio/mp4"
1992
- }
1993
- });
1994
- await upload.done();
1995
- console.log(`\u2705 Audio uploaded successfully to: ${audioKey}`);
1996
- const getObjectCommand = new import_client_s3.GetObjectCommand({
1997
- Bucket: s3Bucket,
1998
- Key: audioKey
1999
- });
2000
- presignedUrl = await (0, import_s3_request_presigner.getSignedUrl)(s3Client, getObjectCommand, {
2001
- expiresIn: 3600
2002
- // 1 hour
2243
+ presignedUrl = await uploadDubbedAudioToS3({
2244
+ dubbedAudioBuffer,
2245
+ assetId,
2246
+ toLanguageCode,
2247
+ s3Endpoint,
2248
+ s3Region,
2249
+ s3Bucket,
2250
+ s3AccessKeyId,
2251
+ s3SecretAccessKey
2003
2252
  });
2004
- console.log(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
2005
2253
  } catch (error) {
2006
2254
  throw new Error(`Failed to upload audio to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
2007
2255
  }
2008
- console.log("\u{1F3AC} Adding translated audio track to Mux asset...");
2256
+ console.warn("\u{1F4F9} Adding dubbed audio track to Mux asset...");
2009
2257
  let uploadedTrackId;
2258
+ const muxLangCode = toISO639_1(toLanguageCode);
2010
2259
  try {
2011
- const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(toLanguageCode) || toLanguageCode.toUpperCase();
2260
+ uploadedTrackId = await createAudioTrackOnMux(credentials, assetId, muxLangCode, presignedUrl);
2261
+ const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(muxLangCode) || muxLangCode.toUpperCase();
2012
2262
  const trackName = `${languageName} (auto-dubbed)`;
2013
- const trackResponse = await mux.video.assets.createTrack(assetId, {
2014
- type: "audio",
2015
- language_code: toLanguageCode,
2016
- name: trackName,
2017
- url: presignedUrl
2018
- });
2019
- uploadedTrackId = trackResponse.id;
2020
- console.log(`\u2705 Audio track added to Mux asset with ID: ${uploadedTrackId}`);
2021
- console.log(`\u{1F3B5} Track name: "${trackName}"`);
2263
+ console.warn(`\u2705 Track added to Mux asset with ID: ${uploadedTrackId}`);
2264
+ console.warn(`\u{1F4CB} Track name: "${trackName}"`);
2022
2265
  } catch (error) {
2023
2266
  console.warn(`\u26A0\uFE0F Failed to add audio track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
2024
- console.log("\u{1F517} You can manually add the track using this presigned URL:");
2025
- console.log(presignedUrl);
2267
+ console.warn("\u{1F517} You can manually add the track using this presigned URL:");
2268
+ console.warn(presignedUrl);
2026
2269
  }
2270
+ const targetLanguage = getLanguageCodePair(toLanguageCode);
2027
2271
  return {
2028
2272
  assetId,
2029
- targetLanguageCode: toLanguageCode,
2273
+ targetLanguageCode: targetLanguage.iso639_1,
2274
+ targetLanguage,
2030
2275
  dubbingId,
2031
2276
  uploadedTrackId,
2032
2277
  presignedUrl
@@ -2034,43 +2279,149 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
2034
2279
  }
2035
2280
 
2036
2281
  // src/workflows/translate-captions.ts
2037
- var import_client_s32 = require("@aws-sdk/client-s3");
2038
- var import_lib_storage2 = require("@aws-sdk/lib-storage");
2039
- var import_s3_request_presigner2 = require("@aws-sdk/s3-request-presigner");
2040
- var import_ai5 = require("ai");
2041
- var import_zod5 = require("zod");
2042
- var translationSchema = import_zod5.z.object({
2043
- translation: import_zod5.z.string()
2282
+ import Mux4 from "@mux/mux-node";
2283
+ import { generateObject as generateObject4 } from "ai";
2284
+ import { z as z5 } from "zod";
2285
+ var translationSchema = z5.object({
2286
+ translation: z5.string()
2044
2287
  });
2045
- var DEFAULT_PROVIDER6 = "openai";
2288
+ async function fetchVttFromMux(vttUrl) {
2289
+ "use step";
2290
+ const vttResponse = await fetch(vttUrl);
2291
+ if (!vttResponse.ok) {
2292
+ throw new Error(`Failed to fetch VTT file: ${vttResponse.statusText}`);
2293
+ }
2294
+ return vttResponse.text();
2295
+ }
2296
+ async function translateVttWithAI({
2297
+ vttContent,
2298
+ fromLanguageCode,
2299
+ toLanguageCode,
2300
+ provider,
2301
+ modelId,
2302
+ credentials,
2303
+ abortSignal
2304
+ }) {
2305
+ "use step";
2306
+ const languageModel = createLanguageModelFromConfig(
2307
+ provider,
2308
+ modelId,
2309
+ credentials
2310
+ );
2311
+ const response = await generateObject4({
2312
+ model: languageModel,
2313
+ schema: translationSchema,
2314
+ abortSignal,
2315
+ messages: [
2316
+ {
2317
+ role: "user",
2318
+ content: `Translate the following VTT subtitle file from ${fromLanguageCode} to ${toLanguageCode}. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT.
2319
+
2320
+ ${vttContent}`
2321
+ }
2322
+ ]
2323
+ });
2324
+ return {
2325
+ translatedVtt: response.object.translation,
2326
+ usage: {
2327
+ inputTokens: response.usage.inputTokens,
2328
+ outputTokens: response.usage.outputTokens,
2329
+ totalTokens: response.usage.totalTokens,
2330
+ reasoningTokens: response.usage.reasoningTokens,
2331
+ cachedInputTokens: response.usage.cachedInputTokens
2332
+ }
2333
+ };
2334
+ }
2335
+ async function uploadVttToS3({
2336
+ translatedVtt,
2337
+ assetId,
2338
+ fromLanguageCode,
2339
+ toLanguageCode,
2340
+ s3Endpoint,
2341
+ s3Region,
2342
+ s3Bucket,
2343
+ s3AccessKeyId,
2344
+ s3SecretAccessKey
2345
+ }) {
2346
+ "use step";
2347
+ const { S3Client, GetObjectCommand } = await import("@aws-sdk/client-s3");
2348
+ const { Upload } = await import("@aws-sdk/lib-storage");
2349
+ const { getSignedUrl } = await import("@aws-sdk/s3-request-presigner");
2350
+ const s3Client = new S3Client({
2351
+ region: s3Region,
2352
+ endpoint: s3Endpoint,
2353
+ credentials: {
2354
+ accessKeyId: s3AccessKeyId,
2355
+ secretAccessKey: s3SecretAccessKey
2356
+ },
2357
+ forcePathStyle: true
2358
+ });
2359
+ const vttKey = `translations/${assetId}/${fromLanguageCode}-to-${toLanguageCode}-${Date.now()}.vtt`;
2360
+ const upload = new Upload({
2361
+ client: s3Client,
2362
+ params: {
2363
+ Bucket: s3Bucket,
2364
+ Key: vttKey,
2365
+ Body: translatedVtt,
2366
+ ContentType: "text/vtt"
2367
+ }
2368
+ });
2369
+ await upload.done();
2370
+ const getObjectCommand = new GetObjectCommand({
2371
+ Bucket: s3Bucket,
2372
+ Key: vttKey
2373
+ });
2374
+ const presignedUrl = await getSignedUrl(s3Client, getObjectCommand, {
2375
+ expiresIn: 3600
2376
+ // 1 hour
2377
+ });
2378
+ return presignedUrl;
2379
+ }
2380
+ async function createTextTrackOnMux(credentials, assetId, languageCode, trackName, presignedUrl) {
2381
+ "use step";
2382
+ const mux = new Mux4({
2383
+ tokenId: credentials.muxTokenId,
2384
+ tokenSecret: credentials.muxTokenSecret
2385
+ });
2386
+ const trackResponse = await mux.video.assets.createTrack(assetId, {
2387
+ type: "text",
2388
+ text_type: "subtitles",
2389
+ language_code: languageCode,
2390
+ name: trackName,
2391
+ url: presignedUrl
2392
+ });
2393
+ if (!trackResponse.id) {
2394
+ throw new Error("Failed to create text track: no track ID returned from Mux");
2395
+ }
2396
+ return trackResponse.id;
2397
+ }
2046
2398
  async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, options) {
2399
+ "use workflow";
2047
2400
  const {
2048
- provider = DEFAULT_PROVIDER6,
2401
+ provider = "openai",
2049
2402
  model,
2050
2403
  s3Endpoint: providedS3Endpoint,
2051
2404
  s3Region: providedS3Region,
2052
2405
  s3Bucket: providedS3Bucket,
2053
2406
  s3AccessKeyId: providedS3AccessKeyId,
2054
2407
  s3SecretAccessKey: providedS3SecretAccessKey,
2055
- uploadToMux: uploadToMuxOption,
2056
- ...clientConfig
2408
+ uploadToMux: uploadToMuxOption
2057
2409
  } = options;
2058
- const resolvedProvider = provider;
2059
2410
  const s3Endpoint = providedS3Endpoint ?? env_default.S3_ENDPOINT;
2060
2411
  const s3Region = providedS3Region ?? env_default.S3_REGION ?? "auto";
2061
2412
  const s3Bucket = providedS3Bucket ?? env_default.S3_BUCKET;
2062
2413
  const s3AccessKeyId = providedS3AccessKeyId ?? env_default.S3_ACCESS_KEY_ID;
2063
2414
  const s3SecretAccessKey = providedS3SecretAccessKey ?? env_default.S3_SECRET_ACCESS_KEY;
2064
2415
  const uploadToMux = uploadToMuxOption !== false;
2065
- const clients = createWorkflowClients(
2066
- { ...clientConfig, provider: resolvedProvider, model },
2067
- resolvedProvider
2416
+ const config = await createWorkflowConfig(
2417
+ { ...options, model },
2418
+ provider
2068
2419
  );
2069
2420
  if (uploadToMux && (!s3Endpoint || !s3Bucket || !s3AccessKeyId || !s3SecretAccessKey)) {
2070
2421
  throw new Error("S3 configuration is required for uploading to Mux. Provide s3Endpoint, s3Bucket, s3AccessKeyId, and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables.");
2071
2422
  }
2072
- const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(clients.mux, assetId);
2073
- const signingContext = resolveSigningContext(options);
2423
+ const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(config.credentials, assetId);
2424
+ const signingContext = await resolveSigningContext(options);
2074
2425
  if (policy === "signed" && !signingContext) {
2075
2426
  throw new Error(
2076
2427
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
@@ -2091,115 +2442,79 @@ async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, opti
2091
2442
  }
2092
2443
  let vttContent;
2093
2444
  try {
2094
- const vttResponse = await fetch(vttUrl);
2095
- if (!vttResponse.ok) {
2096
- throw new Error(`Failed to fetch VTT file: ${vttResponse.statusText}`);
2097
- }
2098
- vttContent = await vttResponse.text();
2445
+ vttContent = await fetchVttFromMux(vttUrl);
2099
2446
  } catch (error) {
2100
2447
  throw new Error(`Failed to fetch VTT content: ${error instanceof Error ? error.message : "Unknown error"}`);
2101
2448
  }
2102
- console.log(`\u2705 Found VTT content for language '${fromLanguageCode}'`);
2103
2449
  let translatedVtt;
2450
+ let usage;
2104
2451
  try {
2105
- const response = await (0, import_ai5.generateObject)({
2106
- model: clients.languageModel.model,
2107
- schema: translationSchema,
2108
- abortSignal: options.abortSignal,
2109
- messages: [
2110
- {
2111
- role: "user",
2112
- content: `Translate the following VTT subtitle file from ${fromLanguageCode} to ${toLanguageCode}. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT.
2113
-
2114
- ${vttContent}`
2115
- }
2116
- ]
2452
+ const result = await translateVttWithAI({
2453
+ vttContent,
2454
+ fromLanguageCode,
2455
+ toLanguageCode,
2456
+ provider: config.provider,
2457
+ modelId: config.modelId,
2458
+ credentials: config.credentials,
2459
+ abortSignal: options.abortSignal
2117
2460
  });
2118
- translatedVtt = response.object.translation;
2461
+ translatedVtt = result.translatedVtt;
2462
+ usage = result.usage;
2119
2463
  } catch (error) {
2120
- throw new Error(`Failed to translate VTT with ${resolvedProvider}: ${error instanceof Error ? error.message : "Unknown error"}`);
2464
+ throw new Error(`Failed to translate VTT with ${config.provider}: ${error instanceof Error ? error.message : "Unknown error"}`);
2121
2465
  }
2122
- console.log(`
2123
- \u2705 Translation completed successfully!`);
2466
+ const sourceLanguage = getLanguageCodePair(fromLanguageCode);
2467
+ const targetLanguage = getLanguageCodePair(toLanguageCode);
2124
2468
  if (!uploadToMux) {
2125
- console.log(`\u2705 VTT translated to ${toLanguageCode} successfully!`);
2126
2469
  return {
2127
2470
  assetId,
2128
2471
  sourceLanguageCode: fromLanguageCode,
2129
2472
  targetLanguageCode: toLanguageCode,
2473
+ sourceLanguage,
2474
+ targetLanguage,
2130
2475
  originalVtt: vttContent,
2131
- translatedVtt
2476
+ translatedVtt,
2477
+ usage
2132
2478
  };
2133
2479
  }
2134
- console.log("\u{1F4E4} Uploading translated VTT to S3-compatible storage...");
2135
- const s3Client = new import_client_s32.S3Client({
2136
- region: s3Region,
2137
- endpoint: s3Endpoint,
2138
- credentials: {
2139
- accessKeyId: s3AccessKeyId,
2140
- secretAccessKey: s3SecretAccessKey
2141
- },
2142
- forcePathStyle: true
2143
- // Often needed for non-AWS S3 services
2144
- });
2145
- const vttKey = `translations/${assetId}/${fromLanguageCode}-to-${toLanguageCode}-${Date.now()}.vtt`;
2146
2480
  let presignedUrl;
2147
2481
  try {
2148
- const upload = new import_lib_storage2.Upload({
2149
- client: s3Client,
2150
- params: {
2151
- Bucket: s3Bucket,
2152
- Key: vttKey,
2153
- Body: translatedVtt,
2154
- ContentType: "text/vtt"
2155
- }
2156
- });
2157
- await upload.done();
2158
- console.log(`\u2705 VTT uploaded successfully to: ${vttKey}`);
2159
- const getObjectCommand = new import_client_s32.GetObjectCommand({
2160
- Bucket: s3Bucket,
2161
- Key: vttKey
2162
- });
2163
- presignedUrl = await (0, import_s3_request_presigner2.getSignedUrl)(s3Client, getObjectCommand, {
2164
- expiresIn: 3600
2165
- // 1 hour
2482
+ presignedUrl = await uploadVttToS3({
2483
+ translatedVtt,
2484
+ assetId,
2485
+ fromLanguageCode,
2486
+ toLanguageCode,
2487
+ s3Endpoint,
2488
+ s3Region,
2489
+ s3Bucket,
2490
+ s3AccessKeyId,
2491
+ s3SecretAccessKey
2166
2492
  });
2167
- console.log(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
2168
2493
  } catch (error) {
2169
2494
  throw new Error(`Failed to upload VTT to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
2170
2495
  }
2171
- console.log("\u{1F4F9} Adding translated track to Mux asset...");
2172
2496
  let uploadedTrackId;
2173
2497
  try {
2174
- const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(toLanguageCode) || toLanguageCode.toUpperCase();
2498
+ const languageName = getLanguageName(toLanguageCode);
2175
2499
  const trackName = `${languageName} (auto-translated)`;
2176
- const trackResponse = await clients.mux.video.assets.createTrack(assetId, {
2177
- type: "text",
2178
- text_type: "subtitles",
2179
- language_code: toLanguageCode,
2180
- name: trackName,
2181
- url: presignedUrl
2182
- });
2183
- uploadedTrackId = trackResponse.id;
2184
- console.log(`\u2705 Track added to Mux asset with ID: ${uploadedTrackId}`);
2185
- console.log(`\u{1F4CB} Track name: "${trackName}"`);
2500
+ uploadedTrackId = await createTextTrackOnMux(config.credentials, assetId, toLanguageCode, trackName, presignedUrl);
2186
2501
  } catch (error) {
2187
- console.warn(`\u26A0\uFE0F Failed to add track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
2188
- console.log("\u{1F517} You can manually add the track using this presigned URL:");
2189
- console.log(presignedUrl);
2502
+ console.warn(`Failed to add track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
2190
2503
  }
2191
2504
  return {
2192
2505
  assetId,
2193
2506
  sourceLanguageCode: fromLanguageCode,
2194
2507
  targetLanguageCode: toLanguageCode,
2508
+ sourceLanguage,
2509
+ targetLanguage,
2195
2510
  originalVtt: vttContent,
2196
2511
  translatedVtt,
2197
2512
  uploadedTrackId,
2198
- presignedUrl
2513
+ presignedUrl,
2514
+ usage
2199
2515
  };
2200
2516
  }
2201
- // Annotate the CommonJS export names for ESM import in node:
2202
- 0 && (module.exports = {
2517
+ export {
2203
2518
  SUMMARY_KEYWORD_LIMIT,
2204
2519
  burnedInCaptionsSchema,
2205
2520
  chapterSchema,
@@ -2213,5 +2528,5 @@ ${vttContent}`
2213
2528
  translateAudio,
2214
2529
  translateCaptions,
2215
2530
  translationSchema
2216
- });
2531
+ };
2217
2532
  //# sourceMappingURL=index.js.map