@mux/ai 0.1.6 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,194 +1,25 @@
1
- "use strict";
2
- var __create = Object.create;
3
- var __defProp = Object.defineProperty;
4
- var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
- var __getOwnPropNames = Object.getOwnPropertyNames;
6
- var __getProtoOf = Object.getPrototypeOf;
7
- var __hasOwnProp = Object.prototype.hasOwnProperty;
8
- var __export = (target, all) => {
9
- for (var name in all)
10
- __defProp(target, name, { get: all[name], enumerable: true });
11
- };
12
- var __copyProps = (to, from, except, desc) => {
13
- if (from && typeof from === "object" || typeof from === "function") {
14
- for (let key of __getOwnPropNames(from))
15
- if (!__hasOwnProp.call(to, key) && key !== except)
16
- __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
- }
18
- return to;
19
- };
20
- var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
- // If the importer is in node compatibility mode or this is not an ESM
22
- // file that has been converted to a CommonJS file using a Babel-
23
- // compatible transform (i.e. "__esModule" has not been set), then set
24
- // "default" to the CommonJS "module.exports" for node compatibility.
25
- isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
- mod
27
- ));
28
- var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
-
30
- // src/workflows/index.ts
31
- var workflows_exports = {};
32
- __export(workflows_exports, {
33
- SUMMARY_KEYWORD_LIMIT: () => SUMMARY_KEYWORD_LIMIT,
34
- burnedInCaptionsSchema: () => burnedInCaptionsSchema,
35
- chapterSchema: () => chapterSchema,
36
- chaptersSchema: () => chaptersSchema,
37
- generateChapters: () => generateChapters,
38
- generateVideoEmbeddings: () => generateVideoEmbeddings,
39
- getModerationScores: () => getModerationScores,
40
- getSummaryAndTags: () => getSummaryAndTags,
41
- hasBurnedInCaptions: () => hasBurnedInCaptions,
42
- summarySchema: () => summarySchema,
43
- translateAudio: () => translateAudio,
44
- translateCaptions: () => translateCaptions,
45
- translationSchema: () => translationSchema
46
- });
47
- module.exports = __toCommonJS(workflows_exports);
48
-
49
1
  // src/workflows/burned-in-captions.ts
50
- var import_ai = require("ai");
51
-
52
- // node_modules/dedent/dist/dedent.mjs
53
- function ownKeys(object, enumerableOnly) {
54
- var keys = Object.keys(object);
55
- if (Object.getOwnPropertySymbols) {
56
- var symbols = Object.getOwnPropertySymbols(object);
57
- enumerableOnly && (symbols = symbols.filter(function(sym) {
58
- return Object.getOwnPropertyDescriptor(object, sym).enumerable;
59
- })), keys.push.apply(keys, symbols);
60
- }
61
- return keys;
62
- }
63
- function _objectSpread(target) {
64
- for (var i = 1; i < arguments.length; i++) {
65
- var source = null != arguments[i] ? arguments[i] : {};
66
- i % 2 ? ownKeys(Object(source), true).forEach(function(key) {
67
- _defineProperty(target, key, source[key]);
68
- }) : Object.getOwnPropertyDescriptors ? Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)) : ownKeys(Object(source)).forEach(function(key) {
69
- Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key));
70
- });
71
- }
72
- return target;
73
- }
74
- function _defineProperty(obj, key, value) {
75
- key = _toPropertyKey(key);
76
- if (key in obj) {
77
- Object.defineProperty(obj, key, { value, enumerable: true, configurable: true, writable: true });
78
- } else {
79
- obj[key] = value;
80
- }
81
- return obj;
82
- }
83
- function _toPropertyKey(arg) {
84
- var key = _toPrimitive(arg, "string");
85
- return typeof key === "symbol" ? key : String(key);
86
- }
87
- function _toPrimitive(input, hint) {
88
- if (typeof input !== "object" || input === null) return input;
89
- var prim = input[Symbol.toPrimitive];
90
- if (prim !== void 0) {
91
- var res = prim.call(input, hint || "default");
92
- if (typeof res !== "object") return res;
93
- throw new TypeError("@@toPrimitive must return a primitive value.");
94
- }
95
- return (hint === "string" ? String : Number)(input);
96
- }
97
- var dedent = createDedent({});
98
- var dedent_default = dedent;
99
- function createDedent(options) {
100
- dedent2.withOptions = (newOptions) => createDedent(_objectSpread(_objectSpread({}, options), newOptions));
101
- return dedent2;
102
- function dedent2(strings, ...values) {
103
- const raw = typeof strings === "string" ? [strings] : strings.raw;
104
- const {
105
- alignValues = false,
106
- escapeSpecialCharacters = Array.isArray(strings),
107
- trimWhitespace = true
108
- } = options;
109
- let result = "";
110
- for (let i = 0; i < raw.length; i++) {
111
- let next = raw[i];
112
- if (escapeSpecialCharacters) {
113
- next = next.replace(/\\\n[ \t]*/g, "").replace(/\\`/g, "`").replace(/\\\$/g, "$").replace(/\\\{/g, "{");
114
- }
115
- result += next;
116
- if (i < values.length) {
117
- const value = alignValues ? alignValue(values[i], result) : values[i];
118
- result += value;
119
- }
120
- }
121
- const lines = result.split("\n");
122
- let mindent = null;
123
- for (const l of lines) {
124
- const m = l.match(/^(\s+)\S+/);
125
- if (m) {
126
- const indent = m[1].length;
127
- if (!mindent) {
128
- mindent = indent;
129
- } else {
130
- mindent = Math.min(mindent, indent);
131
- }
132
- }
133
- }
134
- if (mindent !== null) {
135
- const m = mindent;
136
- result = lines.map((l) => l[0] === " " || l[0] === " " ? l.slice(m) : l).join("\n");
137
- }
138
- if (trimWhitespace) {
139
- result = result.trim();
140
- }
141
- if (escapeSpecialCharacters) {
142
- result = result.replace(/\\n/g, "\n");
143
- }
144
- return result;
145
- }
146
- }
147
- function alignValue(value, precedingText) {
148
- if (typeof value !== "string" || !value.includes("\n")) {
149
- return value;
150
- }
151
- const currentLine = precedingText.slice(precedingText.lastIndexOf("\n") + 1);
152
- const indentMatch = currentLine.match(/^(\s+)/);
153
- if (indentMatch) {
154
- const indent = indentMatch[1];
155
- return value.replace(/\n/g, `
156
- ${indent}`);
157
- }
158
- return value;
159
- }
160
-
161
- // src/workflows/burned-in-captions.ts
162
- var import_zod2 = require("zod");
163
-
164
- // src/lib/client-factory.ts
165
- var import_mux_node = __toESM(require("@mux/mux-node"));
2
+ import { generateObject } from "ai";
3
+ import dedent from "dedent";
4
+ import { z as z2 } from "zod";
166
5
 
167
6
  // src/env.ts
168
- var import_node_path = __toESM(require("path"));
169
- var import_dotenv = require("dotenv");
170
- var import_dotenv_expand = require("dotenv-expand");
171
- var import_zod = require("zod");
172
- (0, import_dotenv_expand.expand)((0, import_dotenv.config)({
173
- path: import_node_path.default.resolve(
174
- process.cwd(),
175
- process.env.NODE_ENV === "test" ? ".env.test" : ".env"
176
- )
177
- }));
7
+ import { z } from "zod";
8
+ import "dotenv/config";
178
9
  function optionalString(description, message) {
179
- return import_zod.z.preprocess(
10
+ return z.preprocess(
180
11
  (value) => typeof value === "string" && value.trim().length === 0 ? void 0 : value,
181
- import_zod.z.string().trim().min(1, message).optional()
12
+ z.string().trim().min(1, message).optional()
182
13
  ).describe(description);
183
14
  }
184
15
  function requiredString(description, message) {
185
- return import_zod.z.preprocess(
16
+ return z.preprocess(
186
17
  (value) => typeof value === "string" ? value.trim().length > 0 ? value.trim() : void 0 : value,
187
- import_zod.z.string().trim().min(1, message)
18
+ z.string().trim().min(1, message)
188
19
  ).describe(description);
189
20
  }
190
- var EnvSchema = import_zod.z.object({
191
- NODE_ENV: import_zod.z.string().default("development").describe("Runtime environment."),
21
+ var EnvSchema = z.object({
22
+ NODE_ENV: z.string().default("development").describe("Runtime environment."),
192
23
  MUX_TOKEN_ID: requiredString("Mux access token ID.", "Required to access Mux APIs"),
193
24
  MUX_TOKEN_SECRET: requiredString("Mux access token secret.", "Required to access Mux APIs"),
194
25
  MUX_SIGNING_KEY: optionalString("Mux signing key ID for signed playback URLs.", "Used to sign playback URLs"),
@@ -217,12 +48,12 @@ var env = parseEnv();
217
48
  var env_default = env;
218
49
 
219
50
  // src/lib/providers.ts
220
- var import_anthropic = require("@ai-sdk/anthropic");
221
- var import_google = require("@ai-sdk/google");
222
- var import_openai = require("@ai-sdk/openai");
51
+ import { createAnthropic } from "@ai-sdk/anthropic";
52
+ import { createGoogleGenerativeAI } from "@ai-sdk/google";
53
+ import { createOpenAI } from "@ai-sdk/openai";
223
54
  var DEFAULT_LANGUAGE_MODELS = {
224
- openai: "gpt-5-mini",
225
- anthropic: "claude-haiku-4-5",
55
+ openai: "gpt-5.1",
56
+ anthropic: "claude-sonnet-4-5",
226
57
  google: "gemini-2.5-flash"
227
58
  };
228
59
  var DEFAULT_EMBEDDING_MODELS = {
@@ -235,14 +66,60 @@ function requireEnv(value, name) {
235
66
  }
236
67
  return value;
237
68
  }
69
+ function createLanguageModelFromConfig(provider, modelId) {
70
+ switch (provider) {
71
+ case "openai": {
72
+ const apiKey = env_default.OPENAI_API_KEY;
73
+ requireEnv(apiKey, "OPENAI_API_KEY");
74
+ const openai = createOpenAI({ apiKey });
75
+ return openai(modelId);
76
+ }
77
+ case "anthropic": {
78
+ const apiKey = env_default.ANTHROPIC_API_KEY;
79
+ requireEnv(apiKey, "ANTHROPIC_API_KEY");
80
+ const anthropic = createAnthropic({ apiKey });
81
+ return anthropic(modelId);
82
+ }
83
+ case "google": {
84
+ const apiKey = env_default.GOOGLE_GENERATIVE_AI_API_KEY;
85
+ requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
86
+ const google = createGoogleGenerativeAI({ apiKey });
87
+ return google(modelId);
88
+ }
89
+ default: {
90
+ const exhaustiveCheck = provider;
91
+ throw new Error(`Unsupported provider: ${exhaustiveCheck}`);
92
+ }
93
+ }
94
+ }
95
+ function createEmbeddingModelFromConfig(provider, modelId) {
96
+ switch (provider) {
97
+ case "openai": {
98
+ const apiKey = env_default.OPENAI_API_KEY;
99
+ requireEnv(apiKey, "OPENAI_API_KEY");
100
+ const openai = createOpenAI({ apiKey });
101
+ return openai.embedding(modelId);
102
+ }
103
+ case "google": {
104
+ const apiKey = env_default.GOOGLE_GENERATIVE_AI_API_KEY;
105
+ requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
106
+ const google = createGoogleGenerativeAI({ apiKey });
107
+ return google.textEmbeddingModel(modelId);
108
+ }
109
+ default: {
110
+ const exhaustiveCheck = provider;
111
+ throw new Error(`Unsupported embedding provider: ${exhaustiveCheck}`);
112
+ }
113
+ }
114
+ }
238
115
  function resolveLanguageModel(options = {}) {
239
116
  const provider = options.provider || "openai";
240
117
  const modelId = options.model || DEFAULT_LANGUAGE_MODELS[provider];
241
118
  switch (provider) {
242
119
  case "openai": {
243
- const apiKey = options.openaiApiKey ?? env_default.OPENAI_API_KEY;
120
+ const apiKey = env_default.OPENAI_API_KEY;
244
121
  requireEnv(apiKey, "OPENAI_API_KEY");
245
- const openai = (0, import_openai.createOpenAI)({
122
+ const openai = createOpenAI({
246
123
  apiKey
247
124
  });
248
125
  return {
@@ -252,9 +129,9 @@ function resolveLanguageModel(options = {}) {
252
129
  };
253
130
  }
254
131
  case "anthropic": {
255
- const apiKey = options.anthropicApiKey ?? env_default.ANTHROPIC_API_KEY;
132
+ const apiKey = env_default.ANTHROPIC_API_KEY;
256
133
  requireEnv(apiKey, "ANTHROPIC_API_KEY");
257
- const anthropic = (0, import_anthropic.createAnthropic)({
134
+ const anthropic = createAnthropic({
258
135
  apiKey
259
136
  });
260
137
  return {
@@ -264,9 +141,9 @@ function resolveLanguageModel(options = {}) {
264
141
  };
265
142
  }
266
143
  case "google": {
267
- const apiKey = options.googleApiKey ?? env_default.GOOGLE_GENERATIVE_AI_API_KEY;
144
+ const apiKey = env_default.GOOGLE_GENERATIVE_AI_API_KEY;
268
145
  requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
269
- const google = (0, import_google.createGoogleGenerativeAI)({
146
+ const google = createGoogleGenerativeAI({
270
147
  apiKey
271
148
  });
272
149
  return {
@@ -286,9 +163,9 @@ function resolveEmbeddingModel(options = {}) {
286
163
  const modelId = options.model || DEFAULT_EMBEDDING_MODELS[provider];
287
164
  switch (provider) {
288
165
  case "openai": {
289
- const apiKey = options.openaiApiKey ?? env_default.OPENAI_API_KEY;
166
+ const apiKey = env_default.OPENAI_API_KEY;
290
167
  requireEnv(apiKey, "OPENAI_API_KEY");
291
- const openai = (0, import_openai.createOpenAI)({
168
+ const openai = createOpenAI({
292
169
  apiKey
293
170
  });
294
171
  return {
@@ -298,9 +175,9 @@ function resolveEmbeddingModel(options = {}) {
298
175
  };
299
176
  }
300
177
  case "google": {
301
- const apiKey = options.googleApiKey ?? env_default.GOOGLE_GENERATIVE_AI_API_KEY;
178
+ const apiKey = env_default.GOOGLE_GENERATIVE_AI_API_KEY;
302
179
  requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
303
- const google = (0, import_google.createGoogleGenerativeAI)({
180
+ const google = createGoogleGenerativeAI({
304
181
  apiKey
305
182
  });
306
183
  return {
@@ -317,12 +194,45 @@ function resolveEmbeddingModel(options = {}) {
317
194
  }
318
195
 
319
196
  // src/lib/client-factory.ts
320
- function validateCredentials(options, requiredProvider) {
321
- const muxTokenId = options.muxTokenId ?? env_default.MUX_TOKEN_ID;
322
- const muxTokenSecret = options.muxTokenSecret ?? env_default.MUX_TOKEN_SECRET;
323
- const openaiApiKey = options.openaiApiKey ?? env_default.OPENAI_API_KEY;
324
- const anthropicApiKey = options.anthropicApiKey ?? env_default.ANTHROPIC_API_KEY;
325
- const googleApiKey = options.googleApiKey ?? env_default.GOOGLE_GENERATIVE_AI_API_KEY;
197
+ function getMuxCredentialsFromEnv() {
198
+ const muxTokenId = env_default.MUX_TOKEN_ID;
199
+ const muxTokenSecret = env_default.MUX_TOKEN_SECRET;
200
+ if (!muxTokenId || !muxTokenSecret) {
201
+ throw new Error(
202
+ "Mux credentials are required. Set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables."
203
+ );
204
+ }
205
+ return { muxTokenId, muxTokenSecret };
206
+ }
207
+ function getApiKeyFromEnv(provider) {
208
+ const envVarMap = {
209
+ openai: env_default.OPENAI_API_KEY,
210
+ anthropic: env_default.ANTHROPIC_API_KEY,
211
+ google: env_default.GOOGLE_GENERATIVE_AI_API_KEY,
212
+ hive: env_default.HIVE_API_KEY,
213
+ elevenlabs: env_default.ELEVENLABS_API_KEY
214
+ };
215
+ const apiKey = envVarMap[provider];
216
+ if (!apiKey) {
217
+ const envVarNames = {
218
+ openai: "OPENAI_API_KEY",
219
+ anthropic: "ANTHROPIC_API_KEY",
220
+ google: "GOOGLE_GENERATIVE_AI_API_KEY",
221
+ hive: "HIVE_API_KEY",
222
+ elevenlabs: "ELEVENLABS_API_KEY"
223
+ };
224
+ throw new Error(
225
+ `${provider} API key is required. Set ${envVarNames[provider]} environment variable.`
226
+ );
227
+ }
228
+ return apiKey;
229
+ }
230
+ async function validateCredentials(requiredProvider) {
231
+ const muxTokenId = env_default.MUX_TOKEN_ID;
232
+ const muxTokenSecret = env_default.MUX_TOKEN_SECRET;
233
+ const openaiApiKey = env_default.OPENAI_API_KEY;
234
+ const anthropicApiKey = env_default.ANTHROPIC_API_KEY;
235
+ const googleApiKey = env_default.GOOGLE_GENERATIVE_AI_API_KEY;
326
236
  if (!muxTokenId || !muxTokenSecret) {
327
237
  throw new Error(
328
238
  "Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables."
@@ -351,32 +261,23 @@ function validateCredentials(options, requiredProvider) {
351
261
  googleApiKey
352
262
  };
353
263
  }
354
- function createMuxClient(credentials) {
355
- if (!credentials.muxTokenId || !credentials.muxTokenSecret) {
356
- throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
357
- }
358
- return new import_mux_node.default({
359
- tokenId: credentials.muxTokenId,
360
- tokenSecret: credentials.muxTokenSecret
361
- });
362
- }
363
- function createWorkflowClients(options, provider) {
264
+ async function createWorkflowConfig(options, provider) {
364
265
  const providerToUse = provider || options.provider || "openai";
365
- const credentials = validateCredentials(options, providerToUse);
366
- const languageModel = resolveLanguageModel({
266
+ const credentials = await validateCredentials(providerToUse);
267
+ const resolved = resolveLanguageModel({
367
268
  ...options,
368
269
  provider: providerToUse
369
270
  });
370
271
  return {
371
- mux: createMuxClient(credentials),
372
- languageModel,
373
- credentials
272
+ credentials,
273
+ provider: resolved.provider,
274
+ modelId: resolved.modelId
374
275
  };
375
276
  }
376
277
 
377
278
  // src/lib/image-download.ts
378
- var import_node_buffer = require("buffer");
379
- var import_p_retry = __toESM(require("p-retry"));
279
+ import { Buffer } from "buffer";
280
+ import pRetry, { AbortError } from "p-retry";
380
281
  var DEFAULT_OPTIONS = {
381
282
  timeout: 1e4,
382
283
  retries: 3,
@@ -385,9 +286,10 @@ var DEFAULT_OPTIONS = {
385
286
  exponentialBackoff: true
386
287
  };
387
288
  async function downloadImageAsBase64(url, options = {}) {
289
+ "use step";
388
290
  const opts = { ...DEFAULT_OPTIONS, ...options };
389
291
  let attemptCount = 0;
390
- return (0, import_p_retry.default)(
292
+ return pRetry(
391
293
  async () => {
392
294
  attemptCount++;
393
295
  const controller = new AbortController();
@@ -402,18 +304,18 @@ async function downloadImageAsBase64(url, options = {}) {
402
304
  clearTimeout(timeoutId);
403
305
  if (!response.ok) {
404
306
  if (response.status >= 400 && response.status < 500 && response.status !== 429) {
405
- throw new import_p_retry.AbortError(`HTTP ${response.status}: ${response.statusText}`);
307
+ throw new AbortError(`HTTP ${response.status}: ${response.statusText}`);
406
308
  }
407
309
  throw new Error(`HTTP ${response.status}: ${response.statusText}`);
408
310
  }
409
311
  const contentType = response.headers.get("content-type");
410
312
  if (!contentType?.startsWith("image/")) {
411
- throw new import_p_retry.AbortError(`Invalid content type: ${contentType}. Expected image/*`);
313
+ throw new AbortError(`Invalid content type: ${contentType}. Expected image/*`);
412
314
  }
413
315
  const arrayBuffer = await response.arrayBuffer();
414
- const buffer = import_node_buffer.Buffer.from(arrayBuffer);
316
+ const buffer = Buffer.from(arrayBuffer);
415
317
  if (buffer.length === 0) {
416
- throw new import_p_retry.AbortError("Downloaded image is empty");
318
+ throw new AbortError("Downloaded image is empty");
417
319
  }
418
320
  const base64Data = `data:${contentType};base64,${buffer.toString("base64")}`;
419
321
  return {
@@ -426,7 +328,7 @@ async function downloadImageAsBase64(url, options = {}) {
426
328
  };
427
329
  } catch (error) {
428
330
  clearTimeout(timeoutId);
429
- if (error instanceof import_p_retry.AbortError) {
331
+ if (error instanceof AbortError) {
430
332
  throw error;
431
333
  }
432
334
  if (error instanceof Error) {
@@ -455,6 +357,7 @@ async function downloadImageAsBase64(url, options = {}) {
455
357
  );
456
358
  }
457
359
  async function downloadImagesAsBase64(urls, options = {}, maxConcurrent = 5) {
360
+ "use step";
458
361
  const results = [];
459
362
  for (let i = 0; i < urls.length; i += maxConcurrent) {
460
363
  const batch = urls.slice(i, i + maxConcurrent);
@@ -466,6 +369,7 @@ async function downloadImagesAsBase64(urls, options = {}, maxConcurrent = 5) {
466
369
  }
467
370
 
468
371
  // src/lib/mux-assets.ts
372
+ import Mux from "@mux/mux-node";
469
373
  function getPlaybackId(asset) {
470
374
  const playbackIds = asset.playback_ids || [];
471
375
  const publicPlaybackId = playbackIds.find((pid) => pid.policy === "public");
@@ -480,7 +384,13 @@ function getPlaybackId(asset) {
480
384
  "No public or signed playback ID found for this asset. A public or signed playback ID is required. DRM playback IDs are not currently supported."
481
385
  );
482
386
  }
483
- async function getPlaybackIdForAsset(mux, assetId) {
387
+ async function getPlaybackIdForAsset(assetId) {
388
+ "use step";
389
+ const { muxTokenId, muxTokenSecret } = getMuxCredentialsFromEnv();
390
+ const mux = new Mux({
391
+ tokenId: muxTokenId,
392
+ tokenSecret: muxTokenSecret
393
+ });
484
394
  const asset = await mux.video.assets.retrieve(assetId);
485
395
  const { id: playbackId, policy } = getPlaybackId(asset);
486
396
  return { asset, playbackId, policy };
@@ -519,8 +429,8 @@ function resolveSection(defaultSection, override) {
519
429
  }
520
430
  return override;
521
431
  }
522
- function createPromptBuilder(config2) {
523
- const { template, sectionOrder } = config2;
432
+ function createPromptBuilder(config) {
433
+ const { template, sectionOrder } = config;
524
434
  const getSection = (section, override) => {
525
435
  const resolved = resolveSection(template[section], override);
526
436
  return renderSection(resolved);
@@ -561,17 +471,17 @@ function createToneSection(instruction) {
561
471
  }
562
472
 
563
473
  // src/lib/url-signing.ts
564
- var import_mux_node2 = __toESM(require("@mux/mux-node"));
565
- function resolveSigningContext(config2) {
566
- const keyId = config2.muxSigningKey ?? env_default.MUX_SIGNING_KEY;
567
- const keySecret = config2.muxPrivateKey ?? env_default.MUX_PRIVATE_KEY;
474
+ import Mux2 from "@mux/mux-node";
475
+ function getMuxSigningContextFromEnv() {
476
+ const keyId = env_default.MUX_SIGNING_KEY;
477
+ const keySecret = env_default.MUX_PRIVATE_KEY;
568
478
  if (!keyId || !keySecret) {
569
479
  return void 0;
570
480
  }
571
481
  return { keyId, keySecret };
572
482
  }
573
483
  function createSigningClient(context) {
574
- return new import_mux_node2.default({
484
+ return new Mux2({
575
485
  // These are not needed for signing, but the SDK requires them
576
486
  // Using empty strings as we only need the jwt functionality
577
487
  tokenId: env_default.MUX_TOKEN_ID || "",
@@ -581,6 +491,7 @@ function createSigningClient(context) {
581
491
  });
582
492
  }
583
493
  async function signPlaybackId(playbackId, context, type = "video", params) {
494
+ "use step";
584
495
  const client = createSigningClient(context);
585
496
  const stringParams = params ? Object.fromEntries(
586
497
  Object.entries(params).map(([key, value]) => [key, String(value)])
@@ -592,6 +503,7 @@ async function signPlaybackId(playbackId, context, type = "video", params) {
592
503
  });
593
504
  }
594
505
  async function signUrl(url, playbackId, context, type = "video", params) {
506
+ "use step";
595
507
  const token = await signPlaybackId(playbackId, context, type, params);
596
508
  const separator = url.includes("?") ? "&" : "?";
597
509
  return `${url}${separator}token=${token}`;
@@ -599,21 +511,23 @@ async function signUrl(url, playbackId, context, type = "video", params) {
599
511
 
600
512
  // src/primitives/storyboards.ts
601
513
  var DEFAULT_STORYBOARD_WIDTH = 640;
602
- async function getStoryboardUrl(playbackId, width = DEFAULT_STORYBOARD_WIDTH, signingContext) {
514
+ async function getStoryboardUrl(playbackId, width = DEFAULT_STORYBOARD_WIDTH, shouldSign = false) {
515
+ "use step";
603
516
  const baseUrl = `https://image.mux.com/${playbackId}/storyboard.png`;
604
- if (signingContext) {
517
+ if (shouldSign) {
518
+ const signingContext = getMuxSigningContextFromEnv();
605
519
  return signUrl(baseUrl, playbackId, signingContext, "storyboard", { width });
606
520
  }
607
521
  return `${baseUrl}?width=${width}`;
608
522
  }
609
523
 
610
524
  // src/workflows/burned-in-captions.ts
611
- var burnedInCaptionsSchema = import_zod2.z.object({
612
- hasBurnedInCaptions: import_zod2.z.boolean(),
613
- confidence: import_zod2.z.number().min(0).max(1),
614
- detectedLanguage: import_zod2.z.string().nullable()
525
+ var burnedInCaptionsSchema = z2.object({
526
+ hasBurnedInCaptions: z2.boolean(),
527
+ confidence: z2.number().min(0).max(1),
528
+ detectedLanguage: z2.string().nullable()
615
529
  });
616
- var SYSTEM_PROMPT = dedent_default`
530
+ var SYSTEM_PROMPT = dedent`
617
531
  <role>
618
532
  You are an expert at analyzing video frames to detect burned-in captions (also called open captions or hardcoded subtitles).
619
533
  These are text overlays that are permanently embedded in the video image, common on TikTok, Instagram Reels, and other social media platforms.
@@ -656,14 +570,14 @@ var burnedInCaptionsPromptBuilder = createPromptBuilder({
656
570
  template: {
657
571
  task: {
658
572
  tag: "task",
659
- content: dedent_default`
573
+ content: dedent`
660
574
  Analyze the provided video storyboard to detect burned-in captions (hardcoded subtitles).
661
575
  Count frames with text vs no text, note position consistency and whether text changes across frames.
662
576
  Decide if captions exist, with confidence (0.0-1.0) and detected language if any.`
663
577
  },
664
578
  analysisSteps: {
665
579
  tag: "analysis_steps",
666
- content: dedent_default`
580
+ content: dedent`
667
581
  1. COUNT how many frames contain text overlays vs. how many don't
668
582
  2. Check if text appears in consistent positions across multiple frames
669
583
  3. Verify text changes content between frames (indicating dialogue/narration)
@@ -672,7 +586,7 @@ var burnedInCaptionsPromptBuilder = createPromptBuilder({
672
586
  },
673
587
  positiveIndicators: {
674
588
  tag: "classify_as_captions",
675
- content: dedent_default`
589
+ content: dedent`
676
590
  ONLY classify as burned-in captions if:
677
591
  - Text appears in multiple frames (not just 1-2 end frames)
678
592
  - Text positioning is consistent across those frames
@@ -681,7 +595,7 @@ var burnedInCaptionsPromptBuilder = createPromptBuilder({
681
595
  },
682
596
  negativeIndicators: {
683
597
  tag: "not_captions",
684
- content: dedent_default`
598
+ content: dedent`
685
599
  DO NOT classify as burned-in captions:
686
600
  - Marketing taglines appearing only in final 1-2 frames
687
601
  - Single words or phrases that don't change between frames
@@ -696,65 +610,90 @@ function buildUserPrompt(promptOverrides) {
696
610
  return burnedInCaptionsPromptBuilder.build(promptOverrides);
697
611
  }
698
612
  var DEFAULT_PROVIDER = "openai";
613
+ async function fetchImageAsBase64(imageUrl, imageDownloadOptions) {
614
+ "use step";
615
+ const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
616
+ return downloadResult.base64Data;
617
+ }
618
+ async function analyzeStoryboard({
619
+ imageDataUrl,
620
+ provider,
621
+ modelId,
622
+ userPrompt,
623
+ systemPrompt
624
+ }) {
625
+ "use step";
626
+ const model = createLanguageModelFromConfig(provider, modelId);
627
+ const response = await generateObject({
628
+ model,
629
+ schema: burnedInCaptionsSchema,
630
+ experimental_telemetry: { isEnabled: true },
631
+ messages: [
632
+ {
633
+ role: "system",
634
+ content: systemPrompt
635
+ },
636
+ {
637
+ role: "user",
638
+ content: [
639
+ { type: "text", text: userPrompt },
640
+ { type: "image", image: imageDataUrl }
641
+ ]
642
+ }
643
+ ]
644
+ });
645
+ return {
646
+ result: response.object,
647
+ usage: {
648
+ inputTokens: response.usage.inputTokens,
649
+ outputTokens: response.usage.outputTokens,
650
+ totalTokens: response.usage.totalTokens,
651
+ reasoningTokens: response.usage.reasoningTokens,
652
+ cachedInputTokens: response.usage.cachedInputTokens
653
+ }
654
+ };
655
+ }
699
656
  async function hasBurnedInCaptions(assetId, options = {}) {
657
+ "use workflow";
700
658
  const {
701
659
  provider = DEFAULT_PROVIDER,
702
660
  model,
703
661
  imageSubmissionMode = "url",
704
662
  imageDownloadOptions,
705
663
  promptOverrides,
706
- ...config2
664
+ ...config
707
665
  } = options;
708
666
  const userPrompt = buildUserPrompt(promptOverrides);
709
- const clients = createWorkflowClients(
710
- { ...config2, model },
667
+ const workflowConfig = await createWorkflowConfig(
668
+ { ...config, model },
711
669
  provider
712
670
  );
713
- const { playbackId, policy } = await getPlaybackIdForAsset(clients.mux, assetId);
714
- const signingContext = resolveSigningContext(options);
671
+ const { playbackId, policy } = await getPlaybackIdForAsset(assetId);
672
+ const signingContext = getMuxSigningContextFromEnv();
715
673
  if (policy === "signed" && !signingContext) {
716
674
  throw new Error(
717
675
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
718
676
  );
719
677
  }
720
- const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed" ? signingContext : void 0);
721
- const analyzeStoryboard = async (imageDataUrl) => {
722
- const response = await (0, import_ai.generateObject)({
723
- model: clients.languageModel.model,
724
- schema: burnedInCaptionsSchema,
725
- abortSignal: options.abortSignal,
726
- experimental_telemetry: { isEnabled: true },
727
- messages: [
728
- {
729
- role: "system",
730
- content: SYSTEM_PROMPT
731
- },
732
- {
733
- role: "user",
734
- content: [
735
- { type: "text", text: userPrompt },
736
- { type: "image", image: imageDataUrl }
737
- ]
738
- }
739
- ]
740
- });
741
- return {
742
- result: response.object,
743
- usage: {
744
- inputTokens: response.usage.inputTokens,
745
- outputTokens: response.usage.outputTokens,
746
- totalTokens: response.usage.totalTokens,
747
- reasoningTokens: response.usage.reasoningTokens,
748
- cachedInputTokens: response.usage.cachedInputTokens
749
- }
750
- };
751
- };
678
+ const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed");
752
679
  let analysisResponse;
753
680
  if (imageSubmissionMode === "base64") {
754
- const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
755
- analysisResponse = await analyzeStoryboard(downloadResult.base64Data);
681
+ const base64Data = await fetchImageAsBase64(imageUrl, imageDownloadOptions);
682
+ analysisResponse = await analyzeStoryboard({
683
+ imageDataUrl: base64Data,
684
+ provider: workflowConfig.provider,
685
+ modelId: workflowConfig.modelId,
686
+ userPrompt,
687
+ systemPrompt: SYSTEM_PROMPT
688
+ });
756
689
  } else {
757
- analysisResponse = await analyzeStoryboard(imageUrl);
690
+ analysisResponse = await analyzeStoryboard({
691
+ imageDataUrl: imageUrl,
692
+ provider: workflowConfig.provider,
693
+ modelId: workflowConfig.modelId,
694
+ userPrompt,
695
+ systemPrompt: SYSTEM_PROMPT
696
+ });
758
697
  }
759
698
  if (!analysisResponse.result) {
760
699
  throw new Error("No analysis result received from AI provider");
@@ -770,8 +709,8 @@ async function hasBurnedInCaptions(assetId, options = {}) {
770
709
  }
771
710
 
772
711
  // src/workflows/chapters.ts
773
- var import_ai2 = require("ai");
774
- var import_zod3 = require("zod");
712
+ import { generateObject as generateObject2 } from "ai";
713
+ import { z as z3 } from "zod";
775
714
 
776
715
  // src/lib/retry.ts
777
716
  var DEFAULT_RETRY_OPTIONS = {
@@ -803,11 +742,11 @@ async function withRetry(fn, {
803
742
  if (isLastAttempt || !shouldRetry(lastError, attempt + 1)) {
804
743
  throw lastError;
805
744
  }
806
- const delay2 = calculateDelay(attempt + 1, baseDelay, maxDelay);
745
+ const delay = calculateDelay(attempt + 1, baseDelay, maxDelay);
807
746
  console.warn(
808
- `Attempt ${attempt + 1} failed: ${lastError.message}. Retrying in ${Math.round(delay2)}ms...`
747
+ `Attempt ${attempt + 1} failed: ${lastError.message}. Retrying in ${Math.round(delay)}ms...`
809
748
  );
810
- await new Promise((resolve) => setTimeout(resolve, delay2));
749
+ await new Promise((resolve) => setTimeout(resolve, delay));
811
750
  }
812
751
  }
813
752
  throw lastError || new Error("Retry failed with unknown error");
@@ -921,15 +860,18 @@ function parseVTTCues(vttContent) {
921
860
  }
922
861
  return cues;
923
862
  }
924
- async function buildTranscriptUrl(playbackId, trackId, signingContext) {
863
+ async function buildTranscriptUrl(playbackId, trackId, shouldSign = false) {
864
+ "use step";
925
865
  const baseUrl = `https://stream.mux.com/${playbackId}/text/${trackId}.vtt`;
926
- if (signingContext) {
866
+ if (shouldSign) {
867
+ const signingContext = getMuxSigningContextFromEnv();
927
868
  return signUrl(baseUrl, playbackId, signingContext, "video");
928
869
  }
929
870
  return baseUrl;
930
871
  }
931
872
  async function fetchTranscriptForAsset(asset, playbackId, options = {}) {
932
- const { languageCode, cleanTranscript = true, signingContext } = options;
873
+ "use step";
874
+ const { languageCode, cleanTranscript = true, shouldSign } = options;
933
875
  const track = findCaptionTrack(asset, languageCode);
934
876
  if (!track) {
935
877
  return { transcriptText: "" };
@@ -937,7 +879,7 @@ async function fetchTranscriptForAsset(asset, playbackId, options = {}) {
937
879
  if (!track.id) {
938
880
  return { transcriptText: "", track };
939
881
  }
940
- const transcriptUrl = await buildTranscriptUrl(playbackId, track.id, signingContext);
882
+ const transcriptUrl = await buildTranscriptUrl(playbackId, track.id, shouldSign);
941
883
  try {
942
884
  const response = await fetch(transcriptUrl);
943
885
  if (!response.ok) {
@@ -953,14 +895,39 @@ async function fetchTranscriptForAsset(asset, playbackId, options = {}) {
953
895
  }
954
896
 
955
897
  // src/workflows/chapters.ts
956
- var chapterSchema = import_zod3.z.object({
957
- startTime: import_zod3.z.number(),
958
- title: import_zod3.z.string()
898
+ var chapterSchema = z3.object({
899
+ startTime: z3.number(),
900
+ title: z3.string()
959
901
  });
960
- var chaptersSchema = import_zod3.z.object({
961
- chapters: import_zod3.z.array(chapterSchema)
902
+ var chaptersSchema = z3.object({
903
+ chapters: z3.array(chapterSchema)
962
904
  });
963
- var DEFAULT_PROVIDER2 = "openai";
905
+ async function generateChaptersWithAI({
906
+ provider,
907
+ modelId,
908
+ timestampedTranscript,
909
+ systemPrompt
910
+ }) {
911
+ "use step";
912
+ const model = createLanguageModelFromConfig(provider, modelId);
913
+ const response = await withRetry(
914
+ () => generateObject2({
915
+ model,
916
+ schema: chaptersSchema,
917
+ messages: [
918
+ {
919
+ role: "system",
920
+ content: systemPrompt
921
+ },
922
+ {
923
+ role: "user",
924
+ content: timestampedTranscript
925
+ }
926
+ ]
927
+ })
928
+ );
929
+ return response.object;
930
+ }
964
931
  var SYSTEM_PROMPT2 = `Your role is to segment the following captions into chunked chapters, summarising each chapter with a title.
965
932
 
966
933
  Analyze the transcript and create logical chapter breaks based on topic changes, major transitions, or distinct sections of content. Each chapter should represent a meaningful segment of the video.
@@ -982,10 +949,11 @@ Important rules:
982
949
  - Do not include any text before or after the JSON
983
950
  - The JSON must be valid and parseable`;
984
951
  async function generateChapters(assetId, languageCode, options = {}) {
985
- const { provider = DEFAULT_PROVIDER2, model, abortSignal } = options;
986
- const clients = createWorkflowClients({ ...options, model }, provider);
987
- const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(clients.mux, assetId);
988
- const signingContext = resolveSigningContext(options);
952
+ "use workflow";
953
+ const { provider = "openai", model } = options;
954
+ const config = await createWorkflowConfig({ ...options, model }, provider);
955
+ const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(assetId);
956
+ const signingContext = getMuxSigningContextFromEnv();
989
957
  if (policy === "signed" && !signingContext) {
990
958
  throw new Error(
991
959
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
@@ -995,7 +963,7 @@ async function generateChapters(assetId, languageCode, options = {}) {
995
963
  languageCode,
996
964
  cleanTranscript: false,
997
965
  // keep timestamps for chapter segmentation
998
- signingContext: policy === "signed" ? signingContext : void 0
966
+ shouldSign: policy === "signed"
999
967
  });
1000
968
  if (!transcriptResult.track || !transcriptResult.transcriptText) {
1001
969
  const availableLanguages = getReadyTextTracks(assetData).map((t) => t.language_code).filter(Boolean).join(", ");
@@ -1009,24 +977,12 @@ async function generateChapters(assetId, languageCode, options = {}) {
1009
977
  }
1010
978
  let chaptersData = null;
1011
979
  try {
1012
- const response = await withRetry(
1013
- () => (0, import_ai2.generateObject)({
1014
- model: clients.languageModel.model,
1015
- schema: chaptersSchema,
1016
- abortSignal,
1017
- messages: [
1018
- {
1019
- role: "system",
1020
- content: SYSTEM_PROMPT2
1021
- },
1022
- {
1023
- role: "user",
1024
- content: timestampedTranscript
1025
- }
1026
- ]
1027
- })
1028
- );
1029
- chaptersData = response.object;
980
+ chaptersData = await generateChaptersWithAI({
981
+ provider: config.provider,
982
+ modelId: config.modelId,
983
+ timestampedTranscript,
984
+ systemPrompt: SYSTEM_PROMPT2
985
+ });
1030
986
  } catch (error) {
1031
987
  throw new Error(
1032
988
  `Failed to generate chapters with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
@@ -1050,7 +1006,7 @@ async function generateChapters(assetId, languageCode, options = {}) {
1050
1006
  }
1051
1007
 
1052
1008
  // src/workflows/embeddings.ts
1053
- var import_ai3 = require("ai");
1009
+ import { embed } from "ai";
1054
1010
 
1055
1011
  // src/primitives/text-chunking.ts
1056
1012
  function estimateTokenCount(text) {
@@ -1138,13 +1094,6 @@ function chunkText(text, strategy) {
1138
1094
  }
1139
1095
 
1140
1096
  // src/workflows/embeddings.ts
1141
- var DEFAULT_PROVIDER3 = "openai";
1142
- var DEFAULT_CHUNKING_STRATEGY = {
1143
- type: "token",
1144
- maxTokens: 500,
1145
- overlap: 100
1146
- };
1147
- var DEFAULT_BATCH_SIZE = 5;
1148
1097
  function averageEmbeddings(embeddings) {
1149
1098
  if (embeddings.length === 0) {
1150
1099
  return [];
@@ -1161,51 +1110,41 @@ function averageEmbeddings(embeddings) {
1161
1110
  }
1162
1111
  return averaged;
1163
1112
  }
1164
- async function generateChunkEmbeddings(chunks, model, batchSize, abortSignal) {
1165
- const results = [];
1166
- for (let i = 0; i < chunks.length; i += batchSize) {
1167
- const batch = chunks.slice(i, i + batchSize);
1168
- const batchResults = await Promise.all(
1169
- batch.map(async (chunk) => {
1170
- const response = await withRetry(
1171
- () => (0, import_ai3.embed)({
1172
- model,
1173
- value: chunk.text,
1174
- abortSignal
1175
- })
1176
- );
1177
- return {
1178
- chunkId: chunk.id,
1179
- embedding: response.embedding,
1180
- metadata: {
1181
- startTime: chunk.startTime,
1182
- endTime: chunk.endTime,
1183
- tokenCount: chunk.tokenCount
1184
- }
1185
- };
1186
- })
1187
- );
1188
- results.push(...batchResults);
1189
- }
1190
- return results;
1113
+ async function generateSingleChunkEmbedding({
1114
+ chunk,
1115
+ provider,
1116
+ modelId
1117
+ }) {
1118
+ "use step";
1119
+ const model = createEmbeddingModelFromConfig(provider, modelId);
1120
+ const response = await withRetry(
1121
+ () => embed({
1122
+ model,
1123
+ value: chunk.text
1124
+ })
1125
+ );
1126
+ return {
1127
+ chunkId: chunk.id,
1128
+ embedding: response.embedding,
1129
+ metadata: {
1130
+ startTime: chunk.startTime,
1131
+ endTime: chunk.endTime,
1132
+ tokenCount: chunk.tokenCount
1133
+ }
1134
+ };
1191
1135
  }
1192
1136
  async function generateVideoEmbeddings(assetId, options = {}) {
1137
+ "use workflow";
1193
1138
  const {
1194
- provider = DEFAULT_PROVIDER3,
1139
+ provider = "openai",
1195
1140
  model,
1196
1141
  languageCode,
1197
- chunkingStrategy = DEFAULT_CHUNKING_STRATEGY,
1198
- batchSize = DEFAULT_BATCH_SIZE,
1199
- abortSignal
1142
+ chunkingStrategy = { type: "token", maxTokens: 500, overlap: 100 },
1143
+ batchSize = 5
1200
1144
  } = options;
1201
- const credentials = validateCredentials(options, provider === "google" ? "google" : "openai");
1202
- const muxClient = createMuxClient(credentials);
1203
1145
  const embeddingModel = resolveEmbeddingModel({ ...options, provider, model });
1204
- const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(
1205
- muxClient,
1206
- assetId
1207
- );
1208
- const signingContext = resolveSigningContext(options);
1146
+ const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(assetId);
1147
+ const signingContext = getMuxSigningContextFromEnv();
1209
1148
  if (policy === "signed" && !signingContext) {
1210
1149
  throw new Error(
1211
1150
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
@@ -1215,7 +1154,7 @@ async function generateVideoEmbeddings(assetId, options = {}) {
1215
1154
  const transcriptResult = await fetchTranscriptForAsset(assetData, playbackId, {
1216
1155
  languageCode,
1217
1156
  cleanTranscript: !useVttChunking,
1218
- signingContext: policy === "signed" ? signingContext : void 0
1157
+ shouldSign: policy === "signed"
1219
1158
  });
1220
1159
  if (!transcriptResult.track || !transcriptResult.transcriptText) {
1221
1160
  const availableLanguages = getReadyTextTracks(assetData).map((t) => t.language_code).filter(Boolean).join(", ");
@@ -1235,14 +1174,21 @@ async function generateVideoEmbeddings(assetId, options = {}) {
1235
1174
  if (chunks.length === 0) {
1236
1175
  throw new Error("No chunks generated from transcript");
1237
1176
  }
1238
- let chunkEmbeddings;
1177
+ const chunkEmbeddings = [];
1239
1178
  try {
1240
- chunkEmbeddings = await generateChunkEmbeddings(
1241
- chunks,
1242
- embeddingModel.model,
1243
- batchSize,
1244
- abortSignal
1245
- );
1179
+ for (let i = 0; i < chunks.length; i += batchSize) {
1180
+ const batch = chunks.slice(i, i + batchSize);
1181
+ const batchResults = await Promise.all(
1182
+ batch.map(
1183
+ (chunk) => generateSingleChunkEmbedding({
1184
+ chunk,
1185
+ provider: embeddingModel.provider,
1186
+ modelId: embeddingModel.modelId
1187
+ })
1188
+ )
1189
+ );
1190
+ chunkEmbeddings.push(...batchResults);
1191
+ }
1246
1192
  } catch (error) {
1247
1193
  throw new Error(
1248
1194
  `Failed to generate embeddings with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
@@ -1271,7 +1217,8 @@ async function generateVideoEmbeddings(assetId, options = {}) {
1271
1217
 
1272
1218
  // src/primitives/thumbnails.ts
1273
1219
  async function getThumbnailUrls(playbackId, duration, options = {}) {
1274
- const { interval = 10, width = 640, signingContext } = options;
1220
+ "use step";
1221
+ const { interval = 10, width = 640, shouldSign = false } = options;
1275
1222
  const timestamps = [];
1276
1223
  if (duration <= 50) {
1277
1224
  const spacing = duration / 6;
@@ -1285,7 +1232,8 @@ async function getThumbnailUrls(playbackId, duration, options = {}) {
1285
1232
  }
1286
1233
  const baseUrl = `https://image.mux.com/${playbackId}/thumbnail.png`;
1287
1234
  const urlPromises = timestamps.map(async (time) => {
1288
- if (signingContext) {
1235
+ if (shouldSign) {
1236
+ const signingContext = getMuxSigningContextFromEnv();
1289
1237
  return signUrl(baseUrl, playbackId, signingContext, "thumbnail", { time, width });
1290
1238
  }
1291
1239
  return `${baseUrl}?time=${time}&width=${width}`;
@@ -1298,7 +1246,7 @@ var DEFAULT_THRESHOLDS = {
1298
1246
  sexual: 0.7,
1299
1247
  violence: 0.8
1300
1248
  };
1301
- var DEFAULT_PROVIDER4 = "openai";
1249
+ var DEFAULT_PROVIDER2 = "openai";
1302
1250
  var HIVE_ENDPOINT = "https://api.thehive.ai/api/v2/task/sync";
1303
1251
  var HIVE_SEXUAL_CATEGORIES = [
1304
1252
  "general_nsfw",
@@ -1336,6 +1284,7 @@ var HIVE_VIOLENCE_CATEGORIES = [
1336
1284
  "garm_death_injury_or_military_conflict"
1337
1285
  ];
1338
1286
  async function processConcurrently(items, processor, maxConcurrent = 5) {
1287
+ "use step";
1339
1288
  const results = [];
1340
1289
  for (let i = 0; i < items.length; i += maxConcurrent) {
1341
1290
  const batch = items.slice(i, i + maxConcurrent);
@@ -1345,11 +1294,14 @@ async function processConcurrently(items, processor, maxConcurrent = 5) {
1345
1294
  }
1346
1295
  return results;
1347
1296
  }
1348
- async function requestOpenAIModeration(imageUrls, apiKey, model, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
1297
+ async function requestOpenAIModeration(imageUrls, model, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
1298
+ "use step";
1349
1299
  const targetUrls = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map(
1350
- (img) => ({ url: img.url, image: img.base64Data })
1351
- ) : imageUrls.map((url) => ({ url, image: url }));
1300
+ (img) => ({ url: img.url, image: img.base64Data, model })
1301
+ ) : imageUrls.map((url) => ({ url, image: url, model }));
1352
1302
  const moderate = async (entry) => {
1303
+ "use step";
1304
+ const apiKey = getApiKeyFromEnv("openai");
1353
1305
  try {
1354
1306
  const res = await fetch("https://api.openai.com/v1/moderations", {
1355
1307
  method: "POST",
@@ -1358,7 +1310,7 @@ async function requestOpenAIModeration(imageUrls, apiKey, model, maxConcurrent =
1358
1310
  "Authorization": `Bearer ${apiKey}`
1359
1311
  },
1360
1312
  body: JSON.stringify({
1361
- model,
1313
+ model: entry.model,
1362
1314
  input: [
1363
1315
  {
1364
1316
  type: "image_url",
@@ -1401,7 +1353,8 @@ function getHiveCategoryScores(classes, categoryNames) {
1401
1353
  const scores = categoryNames.map((category) => scoreMap[category] || 0);
1402
1354
  return Math.max(...scores, 0);
1403
1355
  }
1404
- async function requestHiveModeration(imageUrls, apiKey, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
1356
+ async function requestHiveModeration(imageUrls, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
1357
+ "use step";
1405
1358
  const targets = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map((img) => ({
1406
1359
  url: img.url,
1407
1360
  source: {
@@ -1414,6 +1367,8 @@ async function requestHiveModeration(imageUrls, apiKey, maxConcurrent = 5, submi
1414
1367
  source: { kind: "url", value: url }
1415
1368
  }));
1416
1369
  const moderate = async (entry) => {
1370
+ "use step";
1371
+ const apiKey = getApiKeyFromEnv("hive");
1417
1372
  try {
1418
1373
  const formData = new FormData();
1419
1374
  if (entry.source.kind === "url") {
@@ -1459,8 +1414,9 @@ async function requestHiveModeration(imageUrls, apiKey, maxConcurrent = 5, submi
1459
1414
  return processConcurrently(targets, moderate, maxConcurrent);
1460
1415
  }
1461
1416
  async function getModerationScores(assetId, options = {}) {
1417
+ "use workflow";
1462
1418
  const {
1463
- provider = DEFAULT_PROVIDER4,
1419
+ provider = DEFAULT_PROVIDER2,
1464
1420
  model = provider === "openai" ? "omni-moderation-latest" : void 0,
1465
1421
  thresholds = DEFAULT_THRESHOLDS,
1466
1422
  thumbnailInterval = 10,
@@ -1469,11 +1425,9 @@ async function getModerationScores(assetId, options = {}) {
1469
1425
  imageSubmissionMode = "url",
1470
1426
  imageDownloadOptions
1471
1427
  } = options;
1472
- const credentials = validateCredentials(options, provider === "openai" ? "openai" : void 0);
1473
- const muxClient = createMuxClient(credentials);
1474
- const { asset, playbackId, policy } = await getPlaybackIdForAsset(muxClient, assetId);
1428
+ const { asset, playbackId, policy } = await getPlaybackIdForAsset(assetId);
1475
1429
  const duration = asset.duration || 0;
1476
- const signingContext = resolveSigningContext(options);
1430
+ const signingContext = getMuxSigningContextFromEnv();
1477
1431
  if (policy === "signed" && !signingContext) {
1478
1432
  throw new Error(
1479
1433
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
@@ -1482,30 +1436,20 @@ async function getModerationScores(assetId, options = {}) {
1482
1436
  const thumbnailUrls = await getThumbnailUrls(playbackId, duration, {
1483
1437
  interval: thumbnailInterval,
1484
1438
  width: thumbnailWidth,
1485
- signingContext: policy === "signed" ? signingContext : void 0
1439
+ shouldSign: policy === "signed"
1486
1440
  });
1487
1441
  let thumbnailScores;
1488
1442
  if (provider === "openai") {
1489
- const apiKey = credentials.openaiApiKey;
1490
- if (!apiKey) {
1491
- throw new Error("OpenAI API key is required for moderation. Set OPENAI_API_KEY or pass openaiApiKey.");
1492
- }
1493
1443
  thumbnailScores = await requestOpenAIModeration(
1494
1444
  thumbnailUrls,
1495
- apiKey,
1496
1445
  model || "omni-moderation-latest",
1497
1446
  maxConcurrent,
1498
1447
  imageSubmissionMode,
1499
1448
  imageDownloadOptions
1500
1449
  );
1501
1450
  } else if (provider === "hive") {
1502
- const hiveApiKey = options.hiveApiKey || env_default.HIVE_API_KEY;
1503
- if (!hiveApiKey) {
1504
- throw new Error("Hive API key is required for moderation. Set HIVE_API_KEY or pass hiveApiKey.");
1505
- }
1506
1451
  thumbnailScores = await requestHiveModeration(
1507
1452
  thumbnailUrls,
1508
- hiveApiKey,
1509
1453
  maxConcurrent,
1510
1454
  imageSubmissionMode,
1511
1455
  imageDownloadOptions
@@ -1529,17 +1473,18 @@ async function getModerationScores(assetId, options = {}) {
1529
1473
  }
1530
1474
 
1531
1475
  // src/workflows/summarization.ts
1532
- var import_ai4 = require("ai");
1533
- var import_zod4 = require("zod");
1476
+ import { generateObject as generateObject3 } from "ai";
1477
+ import dedent2 from "dedent";
1478
+ import { z as z4 } from "zod";
1534
1479
  var SUMMARY_KEYWORD_LIMIT = 10;
1535
- var summarySchema = import_zod4.z.object({
1536
- keywords: import_zod4.z.array(import_zod4.z.string()),
1537
- title: import_zod4.z.string(),
1538
- description: import_zod4.z.string()
1480
+ var summarySchema = z4.object({
1481
+ keywords: z4.array(z4.string()),
1482
+ title: z4.string(),
1483
+ description: z4.string()
1539
1484
  });
1540
1485
  var TONE_INSTRUCTIONS = {
1541
- normal: "Provide a clear, straightforward analysis.",
1542
- sassy: "Answer with a sassy, playful attitude and personality.",
1486
+ neutral: "Provide a clear, straightforward analysis.",
1487
+ playful: "Channel your inner diva! Answer with maximum sass, wit, and playful attitude. Don't hold back - be cheeky, clever, and delightfully snarky. Make it pop!",
1543
1488
  professional: "Provide a professional, executive-level analysis suitable for business reporting."
1544
1489
  };
1545
1490
  var summarizationPromptBuilder = createPromptBuilder({
@@ -1550,7 +1495,7 @@ var summarizationPromptBuilder = createPromptBuilder({
1550
1495
  },
1551
1496
  title: {
1552
1497
  tag: "title_requirements",
1553
- content: dedent_default`
1498
+ content: dedent2`
1554
1499
  A short, compelling headline that immediately communicates the subject or action.
1555
1500
  Aim for brevity - typically under 10 words. Think of how a news headline or video card title would read.
1556
1501
  Start with the primary subject, action, or topic - never begin with "A video of" or similar phrasing.
@@ -1558,7 +1503,7 @@ var summarizationPromptBuilder = createPromptBuilder({
1558
1503
  },
1559
1504
  description: {
1560
1505
  tag: "description_requirements",
1561
- content: dedent_default`
1506
+ content: dedent2`
1562
1507
  A concise summary (2-4 sentences) that describes what happens across the video.
1563
1508
  Cover the main subjects, actions, setting, and any notable progression visible across frames.
1564
1509
  Write in present tense. Be specific about observable details rather than making assumptions.
@@ -1566,7 +1511,7 @@ var summarizationPromptBuilder = createPromptBuilder({
1566
1511
  },
1567
1512
  keywords: {
1568
1513
  tag: "keywords_requirements",
1569
- content: dedent_default`
1514
+ content: dedent2`
1570
1515
  Specific, searchable terms (up to 10) that capture:
1571
1516
  - Primary subjects (people, animals, objects)
1572
1517
  - Actions and activities being performed
@@ -1578,7 +1523,7 @@ var summarizationPromptBuilder = createPromptBuilder({
1578
1523
  },
1579
1524
  qualityGuidelines: {
1580
1525
  tag: "quality_guidelines",
1581
- content: dedent_default`
1526
+ content: dedent2`
1582
1527
  - Examine all frames to understand the full context and progression
1583
1528
  - Be precise: "golden retriever" is better than "dog" when identifiable
1584
1529
  - Capture the narrative: what begins, develops, and concludes
@@ -1587,7 +1532,7 @@ var summarizationPromptBuilder = createPromptBuilder({
1587
1532
  },
1588
1533
  sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
1589
1534
  });
1590
- var SYSTEM_PROMPT3 = dedent_default`
1535
+ var SYSTEM_PROMPT3 = dedent2`
1591
1536
  <role>
1592
1537
  You are a video content analyst specializing in storyboard interpretation and multimodal analysis.
1593
1538
  </role>
@@ -1619,7 +1564,29 @@ var SYSTEM_PROMPT3 = dedent_default`
1619
1564
  - Only describe what is clearly observable in the frames or explicitly stated in the transcript
1620
1565
  - Do not fabricate details or make unsupported assumptions
1621
1566
  - Return structured data matching the requested schema
1622
- </constraints>`;
1567
+ </constraints>
1568
+
1569
+ <tone_guidance>
1570
+ Pay special attention to the <tone> section and lean heavily into those instructions.
1571
+ Adapt your entire analysis and writing style to match the specified tone - this should influence
1572
+ your word choice, personality, formality level, and overall presentation of the content.
1573
+ The tone instructions are not suggestions but core requirements for how you should express yourself.
1574
+ </tone_guidance>
1575
+
1576
+ <language_guidelines>
1577
+ AVOID these meta-descriptive phrases that reference the medium rather than the content:
1578
+ - "The image shows..." / "The storyboard shows..."
1579
+ - "In this video..." / "This video features..."
1580
+ - "The frames depict..." / "The footage shows..."
1581
+ - "We can see..." / "You can see..."
1582
+ - "The clip shows..." / "The scene shows..."
1583
+
1584
+ INSTEAD, describe the content directly:
1585
+ - BAD: "The video shows a chef preparing a meal"
1586
+ - GOOD: "A chef prepares a meal in a professional kitchen"
1587
+
1588
+ Write as if describing reality, not describing a recording of reality.
1589
+ </language_guidelines>`;
1623
1590
  function buildUserPrompt2({
1624
1591
  tone,
1625
1592
  transcriptText,
@@ -1633,8 +1600,37 @@ function buildUserPrompt2({
1633
1600
  }
1634
1601
  return summarizationPromptBuilder.buildWithContext(promptOverrides, contextSections);
1635
1602
  }
1636
- var DEFAULT_PROVIDER5 = "openai";
1637
- var DEFAULT_TONE = "normal";
1603
+ async function analyzeStoryboard2(imageDataUrl, provider, modelId, userPrompt, systemPrompt) {
1604
+ "use step";
1605
+ const model = createLanguageModelFromConfig(provider, modelId);
1606
+ const response = await generateObject3({
1607
+ model,
1608
+ schema: summarySchema,
1609
+ messages: [
1610
+ {
1611
+ role: "system",
1612
+ content: systemPrompt
1613
+ },
1614
+ {
1615
+ role: "user",
1616
+ content: [
1617
+ { type: "text", text: userPrompt },
1618
+ { type: "image", image: imageDataUrl }
1619
+ ]
1620
+ }
1621
+ ]
1622
+ });
1623
+ return {
1624
+ result: response.object,
1625
+ usage: {
1626
+ inputTokens: response.usage.inputTokens,
1627
+ outputTokens: response.usage.outputTokens,
1628
+ totalTokens: response.usage.totalTokens,
1629
+ reasoningTokens: response.usage.reasoningTokens,
1630
+ cachedInputTokens: response.usage.cachedInputTokens
1631
+ }
1632
+ };
1633
+ }
1638
1634
  function normalizeKeywords(keywords) {
1639
1635
  if (!Array.isArray(keywords) || keywords.length === 0) {
1640
1636
  return [];
@@ -1659,23 +1655,24 @@ function normalizeKeywords(keywords) {
1659
1655
  return normalized;
1660
1656
  }
1661
1657
  async function getSummaryAndTags(assetId, options) {
1658
+ "use workflow";
1662
1659
  const {
1663
- provider = DEFAULT_PROVIDER5,
1660
+ provider = "openai",
1664
1661
  model,
1665
- tone = DEFAULT_TONE,
1662
+ tone = "neutral",
1666
1663
  includeTranscript = true,
1667
1664
  cleanTranscript = true,
1668
1665
  imageSubmissionMode = "url",
1669
1666
  imageDownloadOptions,
1670
- abortSignal,
1667
+ abortSignal: _abortSignal,
1671
1668
  promptOverrides
1672
1669
  } = options ?? {};
1673
- const clients = createWorkflowClients(
1670
+ const config = await createWorkflowConfig(
1674
1671
  { ...options, model },
1675
1672
  provider
1676
1673
  );
1677
- const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(clients.mux, assetId);
1678
- const signingContext = resolveSigningContext(options ?? {});
1674
+ const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(assetId);
1675
+ const signingContext = getMuxSigningContextFromEnv();
1679
1676
  if (policy === "signed" && !signingContext) {
1680
1677
  throw new Error(
1681
1678
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
@@ -1683,7 +1680,7 @@ async function getSummaryAndTags(assetId, options) {
1683
1680
  }
1684
1681
  const transcriptText = includeTranscript ? (await fetchTranscriptForAsset(assetData, playbackId, {
1685
1682
  cleanTranscript,
1686
- signingContext: policy === "signed" ? signingContext : void 0
1683
+ shouldSign: policy === "signed"
1687
1684
  })).transcriptText : "";
1688
1685
  const userPrompt = buildUserPrompt2({
1689
1686
  tone,
@@ -1691,67 +1688,214 @@ async function getSummaryAndTags(assetId, options) {
1691
1688
  isCleanTranscript: cleanTranscript,
1692
1689
  promptOverrides
1693
1690
  });
1694
- const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed" ? signingContext : void 0);
1695
- const analyzeStoryboard = async (imageDataUrl) => {
1696
- const response = await (0, import_ai4.generateObject)({
1697
- model: clients.languageModel.model,
1698
- schema: summarySchema,
1699
- abortSignal,
1700
- messages: [
1701
- {
1702
- role: "system",
1703
- content: SYSTEM_PROMPT3
1704
- },
1705
- {
1706
- role: "user",
1707
- content: [
1708
- { type: "text", text: userPrompt },
1709
- { type: "image", image: imageDataUrl }
1710
- ]
1711
- }
1712
- ]
1713
- });
1714
- return response.object;
1715
- };
1716
- let aiAnalysis = null;
1691
+ const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed");
1692
+ let analysisResponse;
1717
1693
  try {
1718
1694
  if (imageSubmissionMode === "base64") {
1719
1695
  const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
1720
- aiAnalysis = await analyzeStoryboard(downloadResult.base64Data);
1696
+ analysisResponse = await analyzeStoryboard2(
1697
+ downloadResult.base64Data,
1698
+ config.provider,
1699
+ config.modelId,
1700
+ userPrompt,
1701
+ SYSTEM_PROMPT3
1702
+ );
1721
1703
  } else {
1722
- aiAnalysis = await withRetry(() => analyzeStoryboard(imageUrl));
1704
+ analysisResponse = await withRetry(() => analyzeStoryboard2(imageUrl, config.provider, config.modelId, userPrompt, SYSTEM_PROMPT3));
1723
1705
  }
1724
1706
  } catch (error) {
1725
1707
  throw new Error(
1726
1708
  `Failed to analyze video content with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
1727
1709
  );
1728
1710
  }
1729
- if (!aiAnalysis) {
1711
+ if (!analysisResponse.result) {
1730
1712
  throw new Error(`Failed to analyze video content for asset ${assetId}`);
1731
1713
  }
1732
- if (!aiAnalysis.title) {
1714
+ if (!analysisResponse.result.title) {
1733
1715
  throw new Error(`Failed to generate title for asset ${assetId}`);
1734
1716
  }
1735
- if (!aiAnalysis.description) {
1717
+ if (!analysisResponse.result.description) {
1736
1718
  throw new Error(`Failed to generate description for asset ${assetId}`);
1737
1719
  }
1738
1720
  return {
1739
1721
  assetId,
1740
- title: aiAnalysis.title,
1741
- description: aiAnalysis.description,
1742
- tags: normalizeKeywords(aiAnalysis.keywords),
1743
- storyboardUrl: imageUrl
1722
+ title: analysisResponse.result.title,
1723
+ description: analysisResponse.result.description,
1724
+ tags: normalizeKeywords(analysisResponse.result.keywords),
1725
+ storyboardUrl: imageUrl,
1726
+ usage: analysisResponse.usage,
1727
+ transcriptText: transcriptText || void 0
1744
1728
  };
1745
1729
  }
1746
1730
 
1747
1731
  // src/workflows/translate-audio.ts
1748
- var import_client_s3 = require("@aws-sdk/client-s3");
1749
- var import_lib_storage = require("@aws-sdk/lib-storage");
1750
- var import_s3_request_presigner = require("@aws-sdk/s3-request-presigner");
1751
- var import_mux_node3 = __toESM(require("@mux/mux-node"));
1732
+ import Mux3 from "@mux/mux-node";
1733
+
1734
+ // src/lib/language-codes.ts
1735
+ var ISO639_1_TO_3 = {
1736
+ // Major world languages
1737
+ en: "eng",
1738
+ // English
1739
+ es: "spa",
1740
+ // Spanish
1741
+ fr: "fra",
1742
+ // French
1743
+ de: "deu",
1744
+ // German
1745
+ it: "ita",
1746
+ // Italian
1747
+ pt: "por",
1748
+ // Portuguese
1749
+ ru: "rus",
1750
+ // Russian
1751
+ zh: "zho",
1752
+ // Chinese
1753
+ ja: "jpn",
1754
+ // Japanese
1755
+ ko: "kor",
1756
+ // Korean
1757
+ ar: "ara",
1758
+ // Arabic
1759
+ hi: "hin",
1760
+ // Hindi
1761
+ // European languages
1762
+ nl: "nld",
1763
+ // Dutch
1764
+ pl: "pol",
1765
+ // Polish
1766
+ sv: "swe",
1767
+ // Swedish
1768
+ da: "dan",
1769
+ // Danish
1770
+ no: "nor",
1771
+ // Norwegian
1772
+ fi: "fin",
1773
+ // Finnish
1774
+ el: "ell",
1775
+ // Greek
1776
+ cs: "ces",
1777
+ // Czech
1778
+ hu: "hun",
1779
+ // Hungarian
1780
+ ro: "ron",
1781
+ // Romanian
1782
+ bg: "bul",
1783
+ // Bulgarian
1784
+ hr: "hrv",
1785
+ // Croatian
1786
+ sk: "slk",
1787
+ // Slovak
1788
+ sl: "slv",
1789
+ // Slovenian
1790
+ uk: "ukr",
1791
+ // Ukrainian
1792
+ tr: "tur",
1793
+ // Turkish
1794
+ // Asian languages
1795
+ th: "tha",
1796
+ // Thai
1797
+ vi: "vie",
1798
+ // Vietnamese
1799
+ id: "ind",
1800
+ // Indonesian
1801
+ ms: "msa",
1802
+ // Malay
1803
+ tl: "tgl",
1804
+ // Tagalog/Filipino
1805
+ // Other languages
1806
+ he: "heb",
1807
+ // Hebrew
1808
+ fa: "fas",
1809
+ // Persian/Farsi
1810
+ bn: "ben",
1811
+ // Bengali
1812
+ ta: "tam",
1813
+ // Tamil
1814
+ te: "tel",
1815
+ // Telugu
1816
+ mr: "mar",
1817
+ // Marathi
1818
+ gu: "guj",
1819
+ // Gujarati
1820
+ kn: "kan",
1821
+ // Kannada
1822
+ ml: "mal",
1823
+ // Malayalam
1824
+ pa: "pan",
1825
+ // Punjabi
1826
+ ur: "urd",
1827
+ // Urdu
1828
+ sw: "swa",
1829
+ // Swahili
1830
+ af: "afr",
1831
+ // Afrikaans
1832
+ ca: "cat",
1833
+ // Catalan
1834
+ eu: "eus",
1835
+ // Basque
1836
+ gl: "glg",
1837
+ // Galician
1838
+ is: "isl",
1839
+ // Icelandic
1840
+ et: "est",
1841
+ // Estonian
1842
+ lv: "lav",
1843
+ // Latvian
1844
+ lt: "lit"
1845
+ // Lithuanian
1846
+ };
1847
+ var ISO639_3_TO_1 = Object.fromEntries(
1848
+ Object.entries(ISO639_1_TO_3).map(([iso1, iso3]) => [iso3, iso1])
1849
+ );
1850
+ function toISO639_3(code) {
1851
+ const normalized = code.toLowerCase().trim();
1852
+ if (normalized.length === 3) {
1853
+ return normalized;
1854
+ }
1855
+ return ISO639_1_TO_3[normalized] ?? normalized;
1856
+ }
1857
+ function toISO639_1(code) {
1858
+ const normalized = code.toLowerCase().trim();
1859
+ if (normalized.length === 2) {
1860
+ return normalized;
1861
+ }
1862
+ return ISO639_3_TO_1[normalized] ?? normalized;
1863
+ }
1864
+ function getLanguageCodePair(code) {
1865
+ const normalized = code.toLowerCase().trim();
1866
+ if (normalized.length === 2) {
1867
+ return {
1868
+ iso639_1: normalized,
1869
+ iso639_3: toISO639_3(normalized)
1870
+ };
1871
+ } else if (normalized.length === 3) {
1872
+ return {
1873
+ iso639_1: toISO639_1(normalized),
1874
+ iso639_3: normalized
1875
+ };
1876
+ }
1877
+ return {
1878
+ iso639_1: normalized,
1879
+ iso639_3: normalized
1880
+ };
1881
+ }
1882
+ function getLanguageName(code) {
1883
+ const iso639_1 = toISO639_1(code);
1884
+ try {
1885
+ const displayNames = new Intl.DisplayNames(["en"], { type: "language" });
1886
+ return displayNames.of(iso639_1) ?? code.toUpperCase();
1887
+ } catch {
1888
+ return code.toUpperCase();
1889
+ }
1890
+ }
1891
+
1892
+ // src/workflows/translate-audio.ts
1752
1893
  var STATIC_RENDITION_POLL_INTERVAL_MS = 5e3;
1753
1894
  var STATIC_RENDITION_MAX_ATTEMPTS = 36;
1754
- var delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
1895
+ async function sleep(ms) {
1896
+ "use step";
1897
+ await new Promise((resolve) => setTimeout(resolve, ms));
1898
+ }
1755
1899
  function getReadyAudioStaticRendition(asset) {
1756
1900
  const files = asset.static_renditions?.files;
1757
1901
  if (!files || files.length === 0) {
@@ -1762,19 +1906,22 @@ function getReadyAudioStaticRendition(asset) {
1762
1906
  );
1763
1907
  }
1764
1908
  var hasReadyAudioStaticRendition = (asset) => Boolean(getReadyAudioStaticRendition(asset));
1765
- async function requestStaticRenditionCreation(muxClient, assetId) {
1766
- console.log("\u{1F4FC} Requesting static rendition from Mux...");
1909
+ async function requestStaticRenditionCreation(assetId) {
1910
+ "use step";
1911
+ const { muxTokenId, muxTokenSecret } = getMuxCredentialsFromEnv();
1912
+ const mux = new Mux3({
1913
+ tokenId: muxTokenId,
1914
+ tokenSecret: muxTokenSecret
1915
+ });
1767
1916
  try {
1768
- await muxClient.video.assets.createStaticRendition(assetId, {
1917
+ await mux.video.assets.createStaticRendition(assetId, {
1769
1918
  resolution: "audio-only"
1770
1919
  });
1771
- console.log("\u{1F4FC} Static rendition request accepted by Mux.");
1772
1920
  } catch (error) {
1773
1921
  const statusCode = error?.status ?? error?.statusCode;
1774
1922
  const messages = error?.error?.messages;
1775
1923
  const alreadyDefined = messages?.some((message2) => message2.toLowerCase().includes("already defined")) ?? error?.message?.toLowerCase().includes("already defined");
1776
1924
  if (statusCode === 409 || alreadyDefined) {
1777
- console.log("\u2139\uFE0F Static rendition already requested. Waiting for it to finish...");
1778
1925
  return;
1779
1926
  }
1780
1927
  const message = error instanceof Error ? error.message : "Unknown error";
@@ -1783,31 +1930,34 @@ async function requestStaticRenditionCreation(muxClient, assetId) {
1783
1930
  }
1784
1931
  async function waitForAudioStaticRendition({
1785
1932
  assetId,
1786
- muxClient,
1787
1933
  initialAsset
1788
1934
  }) {
1935
+ "use step";
1936
+ const { muxTokenId, muxTokenSecret } = getMuxCredentialsFromEnv();
1937
+ const mux = new Mux3({
1938
+ tokenId: muxTokenId,
1939
+ tokenSecret: muxTokenSecret
1940
+ });
1789
1941
  let currentAsset = initialAsset;
1790
1942
  if (hasReadyAudioStaticRendition(currentAsset)) {
1791
1943
  return currentAsset;
1792
1944
  }
1793
1945
  const status = currentAsset.static_renditions?.status ?? "not_requested";
1794
1946
  if (status === "not_requested" || status === void 0) {
1795
- await requestStaticRenditionCreation(muxClient, assetId);
1947
+ await requestStaticRenditionCreation(assetId);
1796
1948
  } else if (status === "errored") {
1797
- console.log("\u26A0\uFE0F Previous static rendition request errored. Creating a new one...");
1798
- await requestStaticRenditionCreation(muxClient, assetId);
1949
+ await requestStaticRenditionCreation(assetId);
1799
1950
  } else {
1800
- console.log(`\u2139\uFE0F Static rendition already ${status}. Waiting for it to finish...`);
1951
+ console.warn(`\u2139\uFE0F Static rendition already ${status}. Waiting for it to finish...`);
1801
1952
  }
1802
1953
  for (let attempt = 1; attempt <= STATIC_RENDITION_MAX_ATTEMPTS; attempt++) {
1803
- await delay(STATIC_RENDITION_POLL_INTERVAL_MS);
1804
- currentAsset = await muxClient.video.assets.retrieve(assetId);
1954
+ await sleep(STATIC_RENDITION_POLL_INTERVAL_MS);
1955
+ currentAsset = await mux.video.assets.retrieve(assetId);
1805
1956
  if (hasReadyAudioStaticRendition(currentAsset)) {
1806
- console.log("\u2705 Audio static rendition is ready!");
1807
1957
  return currentAsset;
1808
1958
  }
1809
1959
  const currentStatus = currentAsset.static_renditions?.status || "unknown";
1810
- console.log(
1960
+ console.warn(
1811
1961
  `\u231B Waiting for static rendition (attempt ${attempt}/${STATIC_RENDITION_MAX_ATTEMPTS}) \u2192 ${currentStatus}`
1812
1962
  );
1813
1963
  if (currentStatus === "errored") {
@@ -1820,55 +1970,179 @@ async function waitForAudioStaticRendition({
1820
1970
  "Timed out waiting for the static rendition to become ready. Please try again in a moment."
1821
1971
  );
1822
1972
  }
1973
+ async function fetchAudioFromMux(audioUrl) {
1974
+ "use step";
1975
+ const audioResponse = await fetch(audioUrl);
1976
+ if (!audioResponse.ok) {
1977
+ throw new Error(`Failed to fetch audio file: ${audioResponse.statusText}`);
1978
+ }
1979
+ return audioResponse.arrayBuffer();
1980
+ }
1981
+ async function createElevenLabsDubbingJob({
1982
+ audioBuffer,
1983
+ assetId,
1984
+ elevenLabsLangCode,
1985
+ numSpeakers
1986
+ }) {
1987
+ "use step";
1988
+ const elevenLabsApiKey = getApiKeyFromEnv("elevenlabs");
1989
+ const audioBlob = new Blob([audioBuffer], { type: "audio/mp4" });
1990
+ const formData = new FormData();
1991
+ formData.append("file", audioBlob);
1992
+ formData.append("target_lang", elevenLabsLangCode);
1993
+ formData.append("num_speakers", numSpeakers.toString());
1994
+ formData.append("name", `Mux Asset ${assetId} - auto to ${elevenLabsLangCode}`);
1995
+ const dubbingResponse = await fetch("https://api.elevenlabs.io/v1/dubbing", {
1996
+ method: "POST",
1997
+ headers: {
1998
+ "xi-api-key": elevenLabsApiKey
1999
+ },
2000
+ body: formData
2001
+ });
2002
+ if (!dubbingResponse.ok) {
2003
+ throw new Error(`ElevenLabs API error: ${dubbingResponse.statusText}`);
2004
+ }
2005
+ const dubbingData = await dubbingResponse.json();
2006
+ return dubbingData.dubbing_id;
2007
+ }
2008
+ async function checkElevenLabsDubbingStatus({
2009
+ dubbingId
2010
+ }) {
2011
+ "use step";
2012
+ const elevenLabsApiKey = getApiKeyFromEnv("elevenlabs");
2013
+ const statusResponse = await fetch(`https://api.elevenlabs.io/v1/dubbing/${dubbingId}`, {
2014
+ headers: {
2015
+ "xi-api-key": elevenLabsApiKey
2016
+ }
2017
+ });
2018
+ if (!statusResponse.ok) {
2019
+ throw new Error(`Status check failed: ${statusResponse.statusText}`);
2020
+ }
2021
+ const statusData = await statusResponse.json();
2022
+ return {
2023
+ status: statusData.status,
2024
+ targetLanguages: statusData.target_languages ?? []
2025
+ };
2026
+ }
2027
+ async function downloadDubbedAudioFromElevenLabs({
2028
+ dubbingId,
2029
+ languageCode
2030
+ }) {
2031
+ "use step";
2032
+ const elevenLabsApiKey = getApiKeyFromEnv("elevenlabs");
2033
+ const audioUrl = `https://api.elevenlabs.io/v1/dubbing/${dubbingId}/audio/${languageCode}`;
2034
+ const audioResponse = await fetch(audioUrl, {
2035
+ headers: {
2036
+ "xi-api-key": elevenLabsApiKey
2037
+ }
2038
+ });
2039
+ if (!audioResponse.ok) {
2040
+ throw new Error(`Failed to fetch dubbed audio: ${audioResponse.statusText}`);
2041
+ }
2042
+ return audioResponse.arrayBuffer();
2043
+ }
2044
+ async function uploadDubbedAudioToS3({
2045
+ dubbedAudioBuffer,
2046
+ assetId,
2047
+ toLanguageCode,
2048
+ s3Endpoint,
2049
+ s3Region,
2050
+ s3Bucket
2051
+ }) {
2052
+ "use step";
2053
+ const { S3Client, GetObjectCommand } = await import("@aws-sdk/client-s3");
2054
+ const { Upload } = await import("@aws-sdk/lib-storage");
2055
+ const { getSignedUrl } = await import("@aws-sdk/s3-request-presigner");
2056
+ const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
2057
+ const s3SecretAccessKey = env_default.S3_SECRET_ACCESS_KEY;
2058
+ const s3Client = new S3Client({
2059
+ region: s3Region,
2060
+ endpoint: s3Endpoint,
2061
+ credentials: {
2062
+ accessKeyId: s3AccessKeyId,
2063
+ secretAccessKey: s3SecretAccessKey
2064
+ },
2065
+ forcePathStyle: true
2066
+ });
2067
+ const audioKey = `audio-translations/${assetId}/auto-to-${toLanguageCode}-${Date.now()}.m4a`;
2068
+ const upload = new Upload({
2069
+ client: s3Client,
2070
+ params: {
2071
+ Bucket: s3Bucket,
2072
+ Key: audioKey,
2073
+ Body: new Uint8Array(dubbedAudioBuffer),
2074
+ ContentType: "audio/mp4"
2075
+ }
2076
+ });
2077
+ await upload.done();
2078
+ const getObjectCommand = new GetObjectCommand({
2079
+ Bucket: s3Bucket,
2080
+ Key: audioKey
2081
+ });
2082
+ const presignedUrl = await getSignedUrl(s3Client, getObjectCommand, {
2083
+ expiresIn: 3600
2084
+ // 1 hour
2085
+ });
2086
+ console.warn(`\u2705 Audio uploaded successfully to: ${audioKey}`);
2087
+ console.warn(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
2088
+ return presignedUrl;
2089
+ }
2090
+ async function createAudioTrackOnMux(assetId, languageCode, presignedUrl) {
2091
+ "use step";
2092
+ const { muxTokenId, muxTokenSecret } = getMuxCredentialsFromEnv();
2093
+ const mux = new Mux3({
2094
+ tokenId: muxTokenId,
2095
+ tokenSecret: muxTokenSecret
2096
+ });
2097
+ const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(languageCode) || languageCode.toUpperCase();
2098
+ const trackName = `${languageName} (auto-dubbed)`;
2099
+ const trackResponse = await mux.video.assets.createTrack(assetId, {
2100
+ type: "audio",
2101
+ language_code: languageCode,
2102
+ name: trackName,
2103
+ url: presignedUrl
2104
+ });
2105
+ if (!trackResponse.id) {
2106
+ throw new Error("Failed to create audio track: no track ID returned from Mux");
2107
+ }
2108
+ return trackResponse.id;
2109
+ }
1823
2110
  async function translateAudio(assetId, toLanguageCode, options = {}) {
2111
+ "use workflow";
1824
2112
  const {
1825
2113
  provider = "elevenlabs",
1826
2114
  numSpeakers = 0,
1827
2115
  // 0 = auto-detect
1828
- muxTokenId,
1829
- muxTokenSecret,
1830
2116
  elevenLabsApiKey,
1831
2117
  uploadToMux = true
1832
2118
  } = options;
1833
2119
  if (provider !== "elevenlabs") {
1834
2120
  throw new Error("Only ElevenLabs provider is currently supported for audio translation");
1835
2121
  }
1836
- const muxId = muxTokenId ?? env_default.MUX_TOKEN_ID;
1837
- const muxSecret = muxTokenSecret ?? env_default.MUX_TOKEN_SECRET;
1838
2122
  const elevenLabsKey = elevenLabsApiKey ?? env_default.ELEVENLABS_API_KEY;
1839
2123
  const s3Endpoint = options.s3Endpoint ?? env_default.S3_ENDPOINT;
1840
2124
  const s3Region = options.s3Region ?? env_default.S3_REGION ?? "auto";
1841
2125
  const s3Bucket = options.s3Bucket ?? env_default.S3_BUCKET;
1842
- const s3AccessKeyId = options.s3AccessKeyId ?? env_default.S3_ACCESS_KEY_ID;
1843
- const s3SecretAccessKey = options.s3SecretAccessKey ?? env_default.S3_SECRET_ACCESS_KEY;
1844
- if (!muxId || !muxSecret) {
1845
- throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
1846
- }
2126
+ const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
2127
+ const s3SecretAccessKey = env_default.S3_SECRET_ACCESS_KEY;
1847
2128
  if (!elevenLabsKey) {
1848
2129
  throw new Error("ElevenLabs API key is required. Provide elevenLabsApiKey in options or set ELEVENLABS_API_KEY environment variable.");
1849
2130
  }
1850
2131
  if (uploadToMux && (!s3Endpoint || !s3Bucket || !s3AccessKeyId || !s3SecretAccessKey)) {
1851
2132
  throw new Error("S3 configuration is required for uploading to Mux. Provide s3Endpoint, s3Bucket, s3AccessKeyId, and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables.");
1852
2133
  }
1853
- const mux = new import_mux_node3.default({
1854
- tokenId: muxId,
1855
- tokenSecret: muxSecret
1856
- });
1857
- console.log(`\u{1F3AC} Fetching Mux asset: ${assetId}`);
1858
- const { asset: initialAsset, playbackId, policy } = await getPlaybackIdForAsset(mux, assetId);
1859
- const signingContext = resolveSigningContext(options);
2134
+ const { asset: initialAsset, playbackId, policy } = await getPlaybackIdForAsset(assetId);
2135
+ const signingContext = getMuxSigningContextFromEnv();
1860
2136
  if (policy === "signed" && !signingContext) {
1861
2137
  throw new Error(
1862
2138
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
1863
2139
  );
1864
2140
  }
1865
- console.log("\u{1F50D} Checking for audio-only static rendition...");
1866
2141
  let currentAsset = initialAsset;
1867
2142
  if (!hasReadyAudioStaticRendition(currentAsset)) {
1868
- console.log("\u274C No ready audio static rendition found. Requesting one now...");
2143
+ console.warn("\u274C No ready audio static rendition found. Requesting one now...");
1869
2144
  currentAsset = await waitForAudioStaticRendition({
1870
2145
  assetId,
1871
- muxClient: mux,
1872
2146
  initialAsset: currentAsset
1873
2147
  });
1874
2148
  }
@@ -1882,58 +2156,42 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
1882
2156
  if (policy === "signed" && signingContext) {
1883
2157
  audioUrl = await signUrl(audioUrl, playbackId, signingContext, "video");
1884
2158
  }
1885
- console.log(`\u2705 Found audio rendition: ${audioUrl}`);
1886
- console.log(`\u{1F399}\uFE0F Creating ElevenLabs dubbing job (auto-detect \u2192 ${toLanguageCode})`);
2159
+ console.warn("\u{1F399}\uFE0F Fetching audio from Mux...");
2160
+ let audioBuffer;
2161
+ try {
2162
+ audioBuffer = await fetchAudioFromMux(audioUrl);
2163
+ } catch (error) {
2164
+ throw new Error(`Failed to fetch audio from Mux: ${error instanceof Error ? error.message : "Unknown error"}`);
2165
+ }
2166
+ console.warn("\u{1F399}\uFE0F Creating dubbing job in ElevenLabs...");
2167
+ const elevenLabsLangCode = toISO639_3(toLanguageCode);
2168
+ console.warn(`\u{1F50D} Creating dubbing job for asset ${assetId} with language code: ${elevenLabsLangCode}`);
1887
2169
  let dubbingId;
1888
2170
  try {
1889
- const audioResponse = await fetch(audioUrl);
1890
- if (!audioResponse.ok) {
1891
- throw new Error(`Failed to fetch audio file: ${audioResponse.statusText}`);
1892
- }
1893
- const audioBuffer = await audioResponse.arrayBuffer();
1894
- const audioBlob = new Blob([audioBuffer], { type: "audio/mp4" });
1895
- const audioFile = audioBlob;
1896
- const formData = new FormData();
1897
- formData.append("file", audioFile);
1898
- formData.append("target_lang", toLanguageCode);
1899
- formData.append("num_speakers", numSpeakers.toString());
1900
- formData.append("name", `Mux Asset ${assetId} - auto to ${toLanguageCode}`);
1901
- const dubbingResponse = await fetch("https://api.elevenlabs.io/v1/dubbing", {
1902
- method: "POST",
1903
- headers: {
1904
- "xi-api-key": elevenLabsKey
1905
- },
1906
- body: formData
2171
+ dubbingId = await createElevenLabsDubbingJob({
2172
+ audioBuffer,
2173
+ assetId,
2174
+ elevenLabsLangCode,
2175
+ numSpeakers
1907
2176
  });
1908
- if (!dubbingResponse.ok) {
1909
- throw new Error(`ElevenLabs API error: ${dubbingResponse.statusText}`);
1910
- }
1911
- const dubbingData = await dubbingResponse.json();
1912
- dubbingId = dubbingData.dubbing_id;
1913
- console.log(`\u2705 Dubbing job created: ${dubbingId}`);
1914
- console.log(`\u23F1\uFE0F Expected duration: ${dubbingData.expected_duration_sec}s`);
2177
+ console.warn(`\u2705 Dubbing job created with ID: ${dubbingId}`);
1915
2178
  } catch (error) {
1916
2179
  throw new Error(`Failed to create ElevenLabs dubbing job: ${error instanceof Error ? error.message : "Unknown error"}`);
1917
2180
  }
1918
- console.log("\u23F3 Waiting for dubbing to complete...");
2181
+ console.warn("\u23F3 Waiting for dubbing to complete...");
1919
2182
  let dubbingStatus = "dubbing";
1920
2183
  let pollAttempts = 0;
1921
2184
  const maxPollAttempts = 180;
2185
+ let targetLanguages = [];
1922
2186
  while (dubbingStatus === "dubbing" && pollAttempts < maxPollAttempts) {
1923
- await new Promise((resolve) => setTimeout(resolve, 1e4));
2187
+ await sleep(1e4);
1924
2188
  pollAttempts++;
1925
2189
  try {
1926
- const statusResponse = await fetch(`https://api.elevenlabs.io/v1/dubbing/${dubbingId}`, {
1927
- headers: {
1928
- "xi-api-key": elevenLabsKey
1929
- }
2190
+ const statusResult = await checkElevenLabsDubbingStatus({
2191
+ dubbingId
1930
2192
  });
1931
- if (!statusResponse.ok) {
1932
- throw new Error(`Status check failed: ${statusResponse.statusText}`);
1933
- }
1934
- const statusData = await statusResponse.json();
1935
- dubbingStatus = statusData.status;
1936
- console.log(`\u{1F4CA} Status check ${pollAttempts}: ${dubbingStatus}`);
2193
+ dubbingStatus = statusResult.status;
2194
+ targetLanguages = statusResult.targetLanguages;
1937
2195
  if (dubbingStatus === "failed") {
1938
2196
  throw new Error("ElevenLabs dubbing job failed");
1939
2197
  }
@@ -1944,89 +2202,74 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
1944
2202
  if (dubbingStatus !== "dubbed") {
1945
2203
  throw new Error(`Dubbing job timed out or failed. Final status: ${dubbingStatus}`);
1946
2204
  }
1947
- console.log("\u2705 Dubbing completed successfully!");
2205
+ console.warn("\u2705 Dubbing completed successfully!");
1948
2206
  if (!uploadToMux) {
2207
+ const targetLanguage2 = getLanguageCodePair(toLanguageCode);
1949
2208
  return {
1950
2209
  assetId,
1951
- targetLanguageCode: toLanguageCode,
2210
+ targetLanguageCode: targetLanguage2.iso639_1,
2211
+ targetLanguage: targetLanguage2,
1952
2212
  dubbingId
1953
2213
  };
1954
2214
  }
1955
- console.log("\u{1F4E5} Downloading dubbed audio from ElevenLabs...");
2215
+ console.warn("\u{1F4E5} Downloading dubbed audio from ElevenLabs...");
1956
2216
  let dubbedAudioBuffer;
1957
2217
  try {
1958
- const audioUrl2 = `https://api.elevenlabs.io/v1/dubbing/${dubbingId}/audio/${toLanguageCode}`;
1959
- const audioResponse = await fetch(audioUrl2, {
1960
- headers: {
1961
- "xi-api-key": elevenLabsKey
1962
- }
1963
- });
1964
- if (!audioResponse.ok) {
1965
- throw new Error(`Failed to fetch dubbed audio: ${audioResponse.statusText}`);
2218
+ const requestedLangCode = toISO639_3(toLanguageCode);
2219
+ let downloadLangCode = targetLanguages.find(
2220
+ (lang) => lang === requestedLangCode
2221
+ ) ?? targetLanguages.find(
2222
+ (lang) => lang.toLowerCase() === requestedLangCode.toLowerCase()
2223
+ );
2224
+ if (!downloadLangCode && targetLanguages.length > 0) {
2225
+ downloadLangCode = targetLanguages[0];
2226
+ console.warn(`\u26A0\uFE0F Requested language "${requestedLangCode}" not found in target_languages. Using "${downloadLangCode}" instead.`);
2227
+ }
2228
+ if (!downloadLangCode) {
2229
+ downloadLangCode = requestedLangCode;
2230
+ console.warn(`\u26A0\uFE0F No target_languages available from ElevenLabs status. Using requested language code: ${requestedLangCode}`);
1966
2231
  }
1967
- dubbedAudioBuffer = await audioResponse.arrayBuffer();
1968
- console.log(`\u2705 Downloaded dubbed audio (${dubbedAudioBuffer.byteLength} bytes)`);
2232
+ dubbedAudioBuffer = await downloadDubbedAudioFromElevenLabs({
2233
+ dubbingId,
2234
+ languageCode: downloadLangCode
2235
+ });
2236
+ console.warn("\u2705 Dubbed audio downloaded successfully!");
1969
2237
  } catch (error) {
1970
2238
  throw new Error(`Failed to download dubbed audio: ${error instanceof Error ? error.message : "Unknown error"}`);
1971
2239
  }
1972
- console.log("\u{1F4E4} Uploading dubbed audio to S3-compatible storage...");
1973
- const s3Client = new import_client_s3.S3Client({
1974
- region: s3Region,
1975
- endpoint: s3Endpoint,
1976
- credentials: {
1977
- accessKeyId: s3AccessKeyId,
1978
- secretAccessKey: s3SecretAccessKey
1979
- },
1980
- forcePathStyle: true
1981
- });
1982
- const audioKey = `audio-translations/${assetId}/auto-to-${toLanguageCode}-${Date.now()}.m4a`;
2240
+ console.warn("\u{1F4E4} Uploading dubbed audio to S3-compatible storage...");
1983
2241
  let presignedUrl;
1984
2242
  try {
1985
- const upload = new import_lib_storage.Upload({
1986
- client: s3Client,
1987
- params: {
1988
- Bucket: s3Bucket,
1989
- Key: audioKey,
1990
- Body: new Uint8Array(dubbedAudioBuffer),
1991
- ContentType: "audio/mp4"
1992
- }
1993
- });
1994
- await upload.done();
1995
- console.log(`\u2705 Audio uploaded successfully to: ${audioKey}`);
1996
- const getObjectCommand = new import_client_s3.GetObjectCommand({
1997
- Bucket: s3Bucket,
1998
- Key: audioKey
1999
- });
2000
- presignedUrl = await (0, import_s3_request_presigner.getSignedUrl)(s3Client, getObjectCommand, {
2001
- expiresIn: 3600
2002
- // 1 hour
2243
+ presignedUrl = await uploadDubbedAudioToS3({
2244
+ dubbedAudioBuffer,
2245
+ assetId,
2246
+ toLanguageCode,
2247
+ s3Endpoint,
2248
+ s3Region,
2249
+ s3Bucket
2003
2250
  });
2004
- console.log(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
2005
2251
  } catch (error) {
2006
2252
  throw new Error(`Failed to upload audio to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
2007
2253
  }
2008
- console.log("\u{1F3AC} Adding translated audio track to Mux asset...");
2254
+ console.warn("\u{1F4F9} Adding dubbed audio track to Mux asset...");
2009
2255
  let uploadedTrackId;
2256
+ const muxLangCode = toISO639_1(toLanguageCode);
2010
2257
  try {
2011
- const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(toLanguageCode) || toLanguageCode.toUpperCase();
2258
+ uploadedTrackId = await createAudioTrackOnMux(assetId, muxLangCode, presignedUrl);
2259
+ const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(muxLangCode) || muxLangCode.toUpperCase();
2012
2260
  const trackName = `${languageName} (auto-dubbed)`;
2013
- const trackResponse = await mux.video.assets.createTrack(assetId, {
2014
- type: "audio",
2015
- language_code: toLanguageCode,
2016
- name: trackName,
2017
- url: presignedUrl
2018
- });
2019
- uploadedTrackId = trackResponse.id;
2020
- console.log(`\u2705 Audio track added to Mux asset with ID: ${uploadedTrackId}`);
2021
- console.log(`\u{1F3B5} Track name: "${trackName}"`);
2261
+ console.warn(`\u2705 Track added to Mux asset with ID: ${uploadedTrackId}`);
2262
+ console.warn(`\u{1F4CB} Track name: "${trackName}"`);
2022
2263
  } catch (error) {
2023
2264
  console.warn(`\u26A0\uFE0F Failed to add audio track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
2024
- console.log("\u{1F517} You can manually add the track using this presigned URL:");
2025
- console.log(presignedUrl);
2265
+ console.warn("\u{1F517} You can manually add the track using this presigned URL:");
2266
+ console.warn(presignedUrl);
2026
2267
  }
2268
+ const targetLanguage = getLanguageCodePair(toLanguageCode);
2027
2269
  return {
2028
2270
  assetId,
2029
- targetLanguageCode: toLanguageCode,
2271
+ targetLanguageCode: targetLanguage.iso639_1,
2272
+ targetLanguage,
2030
2273
  dubbingId,
2031
2274
  uploadedTrackId,
2032
2275
  presignedUrl
@@ -2034,43 +2277,143 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
2034
2277
  }
2035
2278
 
2036
2279
  // src/workflows/translate-captions.ts
2037
- var import_client_s32 = require("@aws-sdk/client-s3");
2038
- var import_lib_storage2 = require("@aws-sdk/lib-storage");
2039
- var import_s3_request_presigner2 = require("@aws-sdk/s3-request-presigner");
2040
- var import_ai5 = require("ai");
2041
- var import_zod5 = require("zod");
2042
- var translationSchema = import_zod5.z.object({
2043
- translation: import_zod5.z.string()
2280
+ import Mux4 from "@mux/mux-node";
2281
+ import { generateObject as generateObject4 } from "ai";
2282
+ import { z as z5 } from "zod";
2283
+ var translationSchema = z5.object({
2284
+ translation: z5.string()
2044
2285
  });
2045
- var DEFAULT_PROVIDER6 = "openai";
2286
+ async function fetchVttFromMux(vttUrl) {
2287
+ "use step";
2288
+ const vttResponse = await fetch(vttUrl);
2289
+ if (!vttResponse.ok) {
2290
+ throw new Error(`Failed to fetch VTT file: ${vttResponse.statusText}`);
2291
+ }
2292
+ return vttResponse.text();
2293
+ }
2294
+ async function translateVttWithAI({
2295
+ vttContent,
2296
+ fromLanguageCode,
2297
+ toLanguageCode,
2298
+ provider,
2299
+ modelId,
2300
+ abortSignal
2301
+ }) {
2302
+ "use step";
2303
+ const languageModel = createLanguageModelFromConfig(provider, modelId);
2304
+ const response = await generateObject4({
2305
+ model: languageModel,
2306
+ schema: translationSchema,
2307
+ abortSignal,
2308
+ messages: [
2309
+ {
2310
+ role: "user",
2311
+ content: `Translate the following VTT subtitle file from ${fromLanguageCode} to ${toLanguageCode}. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT.
2312
+
2313
+ ${vttContent}`
2314
+ }
2315
+ ]
2316
+ });
2317
+ return {
2318
+ translatedVtt: response.object.translation,
2319
+ usage: {
2320
+ inputTokens: response.usage.inputTokens,
2321
+ outputTokens: response.usage.outputTokens,
2322
+ totalTokens: response.usage.totalTokens,
2323
+ reasoningTokens: response.usage.reasoningTokens,
2324
+ cachedInputTokens: response.usage.cachedInputTokens
2325
+ }
2326
+ };
2327
+ }
2328
+ async function uploadVttToS3({
2329
+ translatedVtt,
2330
+ assetId,
2331
+ fromLanguageCode,
2332
+ toLanguageCode,
2333
+ s3Endpoint,
2334
+ s3Region,
2335
+ s3Bucket
2336
+ }) {
2337
+ "use step";
2338
+ const { S3Client, GetObjectCommand } = await import("@aws-sdk/client-s3");
2339
+ const { Upload } = await import("@aws-sdk/lib-storage");
2340
+ const { getSignedUrl } = await import("@aws-sdk/s3-request-presigner");
2341
+ const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
2342
+ const s3SecretAccessKey = env_default.S3_SECRET_ACCESS_KEY;
2343
+ const s3Client = new S3Client({
2344
+ region: s3Region,
2345
+ endpoint: s3Endpoint,
2346
+ credentials: {
2347
+ accessKeyId: s3AccessKeyId,
2348
+ secretAccessKey: s3SecretAccessKey
2349
+ },
2350
+ forcePathStyle: true
2351
+ });
2352
+ const vttKey = `translations/${assetId}/${fromLanguageCode}-to-${toLanguageCode}-${Date.now()}.vtt`;
2353
+ const upload = new Upload({
2354
+ client: s3Client,
2355
+ params: {
2356
+ Bucket: s3Bucket,
2357
+ Key: vttKey,
2358
+ Body: translatedVtt,
2359
+ ContentType: "text/vtt"
2360
+ }
2361
+ });
2362
+ await upload.done();
2363
+ const getObjectCommand = new GetObjectCommand({
2364
+ Bucket: s3Bucket,
2365
+ Key: vttKey
2366
+ });
2367
+ const presignedUrl = await getSignedUrl(s3Client, getObjectCommand, {
2368
+ expiresIn: 3600
2369
+ // 1 hour
2370
+ });
2371
+ return presignedUrl;
2372
+ }
2373
+ async function createTextTrackOnMux(assetId, languageCode, trackName, presignedUrl) {
2374
+ "use step";
2375
+ const { muxTokenId, muxTokenSecret } = getMuxCredentialsFromEnv();
2376
+ const mux = new Mux4({
2377
+ tokenId: muxTokenId,
2378
+ tokenSecret: muxTokenSecret
2379
+ });
2380
+ const trackResponse = await mux.video.assets.createTrack(assetId, {
2381
+ type: "text",
2382
+ text_type: "subtitles",
2383
+ language_code: languageCode,
2384
+ name: trackName,
2385
+ url: presignedUrl
2386
+ });
2387
+ if (!trackResponse.id) {
2388
+ throw new Error("Failed to create text track: no track ID returned from Mux");
2389
+ }
2390
+ return trackResponse.id;
2391
+ }
2046
2392
  async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, options) {
2393
+ "use workflow";
2047
2394
  const {
2048
- provider = DEFAULT_PROVIDER6,
2395
+ provider = "openai",
2049
2396
  model,
2050
2397
  s3Endpoint: providedS3Endpoint,
2051
2398
  s3Region: providedS3Region,
2052
2399
  s3Bucket: providedS3Bucket,
2053
- s3AccessKeyId: providedS3AccessKeyId,
2054
- s3SecretAccessKey: providedS3SecretAccessKey,
2055
- uploadToMux: uploadToMuxOption,
2056
- ...clientConfig
2400
+ uploadToMux: uploadToMuxOption
2057
2401
  } = options;
2058
- const resolvedProvider = provider;
2059
2402
  const s3Endpoint = providedS3Endpoint ?? env_default.S3_ENDPOINT;
2060
2403
  const s3Region = providedS3Region ?? env_default.S3_REGION ?? "auto";
2061
2404
  const s3Bucket = providedS3Bucket ?? env_default.S3_BUCKET;
2062
- const s3AccessKeyId = providedS3AccessKeyId ?? env_default.S3_ACCESS_KEY_ID;
2063
- const s3SecretAccessKey = providedS3SecretAccessKey ?? env_default.S3_SECRET_ACCESS_KEY;
2405
+ const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
2406
+ const s3SecretAccessKey = env_default.S3_SECRET_ACCESS_KEY;
2064
2407
  const uploadToMux = uploadToMuxOption !== false;
2065
- const clients = createWorkflowClients(
2066
- { ...clientConfig, provider: resolvedProvider, model },
2067
- resolvedProvider
2408
+ const config = await createWorkflowConfig(
2409
+ { ...options, model },
2410
+ provider
2068
2411
  );
2069
2412
  if (uploadToMux && (!s3Endpoint || !s3Bucket || !s3AccessKeyId || !s3SecretAccessKey)) {
2070
2413
  throw new Error("S3 configuration is required for uploading to Mux. Provide s3Endpoint, s3Bucket, s3AccessKeyId, and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables.");
2071
2414
  }
2072
- const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(clients.mux, assetId);
2073
- const signingContext = resolveSigningContext(options);
2415
+ const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(assetId);
2416
+ const signingContext = getMuxSigningContextFromEnv();
2074
2417
  if (policy === "signed" && !signingContext) {
2075
2418
  throw new Error(
2076
2419
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
@@ -2091,115 +2434,76 @@ async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, opti
2091
2434
  }
2092
2435
  let vttContent;
2093
2436
  try {
2094
- const vttResponse = await fetch(vttUrl);
2095
- if (!vttResponse.ok) {
2096
- throw new Error(`Failed to fetch VTT file: ${vttResponse.statusText}`);
2097
- }
2098
- vttContent = await vttResponse.text();
2437
+ vttContent = await fetchVttFromMux(vttUrl);
2099
2438
  } catch (error) {
2100
2439
  throw new Error(`Failed to fetch VTT content: ${error instanceof Error ? error.message : "Unknown error"}`);
2101
2440
  }
2102
- console.log(`\u2705 Found VTT content for language '${fromLanguageCode}'`);
2103
2441
  let translatedVtt;
2442
+ let usage;
2104
2443
  try {
2105
- const response = await (0, import_ai5.generateObject)({
2106
- model: clients.languageModel.model,
2107
- schema: translationSchema,
2108
- abortSignal: options.abortSignal,
2109
- messages: [
2110
- {
2111
- role: "user",
2112
- content: `Translate the following VTT subtitle file from ${fromLanguageCode} to ${toLanguageCode}. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT.
2113
-
2114
- ${vttContent}`
2115
- }
2116
- ]
2444
+ const result = await translateVttWithAI({
2445
+ vttContent,
2446
+ fromLanguageCode,
2447
+ toLanguageCode,
2448
+ provider: config.provider,
2449
+ modelId: config.modelId,
2450
+ abortSignal: options.abortSignal
2117
2451
  });
2118
- translatedVtt = response.object.translation;
2452
+ translatedVtt = result.translatedVtt;
2453
+ usage = result.usage;
2119
2454
  } catch (error) {
2120
- throw new Error(`Failed to translate VTT with ${resolvedProvider}: ${error instanceof Error ? error.message : "Unknown error"}`);
2455
+ throw new Error(`Failed to translate VTT with ${config.provider}: ${error instanceof Error ? error.message : "Unknown error"}`);
2121
2456
  }
2122
- console.log(`
2123
- \u2705 Translation completed successfully!`);
2457
+ const sourceLanguage = getLanguageCodePair(fromLanguageCode);
2458
+ const targetLanguage = getLanguageCodePair(toLanguageCode);
2124
2459
  if (!uploadToMux) {
2125
- console.log(`\u2705 VTT translated to ${toLanguageCode} successfully!`);
2126
2460
  return {
2127
2461
  assetId,
2128
2462
  sourceLanguageCode: fromLanguageCode,
2129
2463
  targetLanguageCode: toLanguageCode,
2464
+ sourceLanguage,
2465
+ targetLanguage,
2130
2466
  originalVtt: vttContent,
2131
- translatedVtt
2467
+ translatedVtt,
2468
+ usage
2132
2469
  };
2133
2470
  }
2134
- console.log("\u{1F4E4} Uploading translated VTT to S3-compatible storage...");
2135
- const s3Client = new import_client_s32.S3Client({
2136
- region: s3Region,
2137
- endpoint: s3Endpoint,
2138
- credentials: {
2139
- accessKeyId: s3AccessKeyId,
2140
- secretAccessKey: s3SecretAccessKey
2141
- },
2142
- forcePathStyle: true
2143
- // Often needed for non-AWS S3 services
2144
- });
2145
- const vttKey = `translations/${assetId}/${fromLanguageCode}-to-${toLanguageCode}-${Date.now()}.vtt`;
2146
2471
  let presignedUrl;
2147
2472
  try {
2148
- const upload = new import_lib_storage2.Upload({
2149
- client: s3Client,
2150
- params: {
2151
- Bucket: s3Bucket,
2152
- Key: vttKey,
2153
- Body: translatedVtt,
2154
- ContentType: "text/vtt"
2155
- }
2156
- });
2157
- await upload.done();
2158
- console.log(`\u2705 VTT uploaded successfully to: ${vttKey}`);
2159
- const getObjectCommand = new import_client_s32.GetObjectCommand({
2160
- Bucket: s3Bucket,
2161
- Key: vttKey
2162
- });
2163
- presignedUrl = await (0, import_s3_request_presigner2.getSignedUrl)(s3Client, getObjectCommand, {
2164
- expiresIn: 3600
2165
- // 1 hour
2473
+ presignedUrl = await uploadVttToS3({
2474
+ translatedVtt,
2475
+ assetId,
2476
+ fromLanguageCode,
2477
+ toLanguageCode,
2478
+ s3Endpoint,
2479
+ s3Region,
2480
+ s3Bucket
2166
2481
  });
2167
- console.log(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
2168
2482
  } catch (error) {
2169
2483
  throw new Error(`Failed to upload VTT to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
2170
2484
  }
2171
- console.log("\u{1F4F9} Adding translated track to Mux asset...");
2172
2485
  let uploadedTrackId;
2173
2486
  try {
2174
- const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(toLanguageCode) || toLanguageCode.toUpperCase();
2487
+ const languageName = getLanguageName(toLanguageCode);
2175
2488
  const trackName = `${languageName} (auto-translated)`;
2176
- const trackResponse = await clients.mux.video.assets.createTrack(assetId, {
2177
- type: "text",
2178
- text_type: "subtitles",
2179
- language_code: toLanguageCode,
2180
- name: trackName,
2181
- url: presignedUrl
2182
- });
2183
- uploadedTrackId = trackResponse.id;
2184
- console.log(`\u2705 Track added to Mux asset with ID: ${uploadedTrackId}`);
2185
- console.log(`\u{1F4CB} Track name: "${trackName}"`);
2489
+ uploadedTrackId = await createTextTrackOnMux(assetId, toLanguageCode, trackName, presignedUrl);
2186
2490
  } catch (error) {
2187
- console.warn(`\u26A0\uFE0F Failed to add track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
2188
- console.log("\u{1F517} You can manually add the track using this presigned URL:");
2189
- console.log(presignedUrl);
2491
+ console.warn(`Failed to add track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
2190
2492
  }
2191
2493
  return {
2192
2494
  assetId,
2193
2495
  sourceLanguageCode: fromLanguageCode,
2194
2496
  targetLanguageCode: toLanguageCode,
2497
+ sourceLanguage,
2498
+ targetLanguage,
2195
2499
  originalVtt: vttContent,
2196
2500
  translatedVtt,
2197
2501
  uploadedTrackId,
2198
- presignedUrl
2502
+ presignedUrl,
2503
+ usage
2199
2504
  };
2200
2505
  }
2201
- // Annotate the CommonJS export names for ESM import in node:
2202
- 0 && (module.exports = {
2506
+ export {
2203
2507
  SUMMARY_KEYWORD_LIMIT,
2204
2508
  burnedInCaptionsSchema,
2205
2509
  chapterSchema,
@@ -2213,5 +2517,5 @@ ${vttContent}`
2213
2517
  translateAudio,
2214
2518
  translateCaptions,
2215
2519
  translationSchema
2216
- });
2520
+ };
2217
2521
  //# sourceMappingURL=index.js.map