@mux/ai 0.1.6 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,40 +1,8 @@
1
- "use strict";
2
- var __create = Object.create;
3
1
  var __defProp = Object.defineProperty;
4
- var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
- var __getOwnPropNames = Object.getOwnPropertyNames;
6
- var __getProtoOf = Object.getPrototypeOf;
7
- var __hasOwnProp = Object.prototype.hasOwnProperty;
8
2
  var __export = (target, all) => {
9
3
  for (var name in all)
10
4
  __defProp(target, name, { get: all[name], enumerable: true });
11
5
  };
12
- var __copyProps = (to, from, except, desc) => {
13
- if (from && typeof from === "object" || typeof from === "function") {
14
- for (let key of __getOwnPropNames(from))
15
- if (!__hasOwnProp.call(to, key) && key !== except)
16
- __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
- }
18
- return to;
19
- };
20
- var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
- // If the importer is in node compatibility mode or this is not an ESM
22
- // file that has been converted to a CommonJS file using a Babel-
23
- // compatible transform (i.e. "__esModule" has not been set), then set
24
- // "default" to the CommonJS "module.exports" for node compatibility.
25
- isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
- mod
27
- ));
28
- var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
-
30
- // src/index.ts
31
- var index_exports = {};
32
- __export(index_exports, {
33
- primitives: () => primitives_exports,
34
- version: () => version,
35
- workflows: () => workflows_exports
36
- });
37
- module.exports = __toCommonJS(index_exports);
38
6
 
39
7
  // src/primitives/index.ts
40
8
  var primitives_exports = {};
@@ -57,33 +25,25 @@ __export(primitives_exports, {
57
25
  });
58
26
 
59
27
  // src/lib/url-signing.ts
60
- var import_mux_node = __toESM(require("@mux/mux-node"));
28
+ import Mux from "@mux/mux-node";
61
29
 
62
30
  // src/env.ts
63
- var import_node_path = __toESM(require("path"));
64
- var import_dotenv = require("dotenv");
65
- var import_dotenv_expand = require("dotenv-expand");
66
- var import_zod = require("zod");
67
- (0, import_dotenv_expand.expand)((0, import_dotenv.config)({
68
- path: import_node_path.default.resolve(
69
- process.cwd(),
70
- process.env.NODE_ENV === "test" ? ".env.test" : ".env"
71
- )
72
- }));
31
+ import { z } from "zod";
32
+ import "dotenv/config";
73
33
  function optionalString(description, message) {
74
- return import_zod.z.preprocess(
34
+ return z.preprocess(
75
35
  (value) => typeof value === "string" && value.trim().length === 0 ? void 0 : value,
76
- import_zod.z.string().trim().min(1, message).optional()
36
+ z.string().trim().min(1, message).optional()
77
37
  ).describe(description);
78
38
  }
79
39
  function requiredString(description, message) {
80
- return import_zod.z.preprocess(
40
+ return z.preprocess(
81
41
  (value) => typeof value === "string" ? value.trim().length > 0 ? value.trim() : void 0 : value,
82
- import_zod.z.string().trim().min(1, message)
42
+ z.string().trim().min(1, message)
83
43
  ).describe(description);
84
44
  }
85
- var EnvSchema = import_zod.z.object({
86
- NODE_ENV: import_zod.z.string().default("development").describe("Runtime environment."),
45
+ var EnvSchema = z.object({
46
+ NODE_ENV: z.string().default("development").describe("Runtime environment."),
87
47
  MUX_TOKEN_ID: requiredString("Mux access token ID.", "Required to access Mux APIs"),
88
48
  MUX_TOKEN_SECRET: requiredString("Mux access token secret.", "Required to access Mux APIs"),
89
49
  MUX_SIGNING_KEY: optionalString("Mux signing key ID for signed playback URLs.", "Used to sign playback URLs"),
@@ -112,16 +72,16 @@ var env = parseEnv();
112
72
  var env_default = env;
113
73
 
114
74
  // src/lib/url-signing.ts
115
- function resolveSigningContext(config2) {
116
- const keyId = config2.muxSigningKey ?? env_default.MUX_SIGNING_KEY;
117
- const keySecret = config2.muxPrivateKey ?? env_default.MUX_PRIVATE_KEY;
75
+ function getMuxSigningContextFromEnv() {
76
+ const keyId = env_default.MUX_SIGNING_KEY;
77
+ const keySecret = env_default.MUX_PRIVATE_KEY;
118
78
  if (!keyId || !keySecret) {
119
79
  return void 0;
120
80
  }
121
81
  return { keyId, keySecret };
122
82
  }
123
83
  function createSigningClient(context) {
124
- return new import_mux_node.default({
84
+ return new Mux({
125
85
  // These are not needed for signing, but the SDK requires them
126
86
  // Using empty strings as we only need the jwt functionality
127
87
  tokenId: env_default.MUX_TOKEN_ID || "",
@@ -131,6 +91,7 @@ function createSigningClient(context) {
131
91
  });
132
92
  }
133
93
  async function signPlaybackId(playbackId, context, type = "video", params) {
94
+ "use step";
134
95
  const client = createSigningClient(context);
135
96
  const stringParams = params ? Object.fromEntries(
136
97
  Object.entries(params).map(([key, value]) => [key, String(value)])
@@ -142,6 +103,7 @@ async function signPlaybackId(playbackId, context, type = "video", params) {
142
103
  });
143
104
  }
144
105
  async function signUrl(url, playbackId, context, type = "video", params) {
106
+ "use step";
145
107
  const token = await signPlaybackId(playbackId, context, type, params);
146
108
  const separator = url.includes("?") ? "&" : "?";
147
109
  return `${url}${separator}token=${token}`;
@@ -149,9 +111,11 @@ async function signUrl(url, playbackId, context, type = "video", params) {
149
111
 
150
112
  // src/primitives/storyboards.ts
151
113
  var DEFAULT_STORYBOARD_WIDTH = 640;
152
- async function getStoryboardUrl(playbackId, width = DEFAULT_STORYBOARD_WIDTH, signingContext) {
114
+ async function getStoryboardUrl(playbackId, width = DEFAULT_STORYBOARD_WIDTH, shouldSign = false) {
115
+ "use step";
153
116
  const baseUrl = `https://image.mux.com/${playbackId}/storyboard.png`;
154
- if (signingContext) {
117
+ if (shouldSign) {
118
+ const signingContext = getMuxSigningContextFromEnv();
155
119
  return signUrl(baseUrl, playbackId, signingContext, "storyboard", { width });
156
120
  }
157
121
  return `${baseUrl}?width=${width}`;
@@ -244,7 +208,8 @@ function chunkText(text, strategy) {
244
208
 
245
209
  // src/primitives/thumbnails.ts
246
210
  async function getThumbnailUrls(playbackId, duration, options = {}) {
247
- const { interval = 10, width = 640, signingContext } = options;
211
+ "use step";
212
+ const { interval = 10, width = 640, shouldSign = false } = options;
248
213
  const timestamps = [];
249
214
  if (duration <= 50) {
250
215
  const spacing = duration / 6;
@@ -258,7 +223,8 @@ async function getThumbnailUrls(playbackId, duration, options = {}) {
258
223
  }
259
224
  const baseUrl = `https://image.mux.com/${playbackId}/thumbnail.png`;
260
225
  const urlPromises = timestamps.map(async (time) => {
261
- if (signingContext) {
226
+ if (shouldSign) {
227
+ const signingContext = getMuxSigningContextFromEnv();
262
228
  return signUrl(baseUrl, playbackId, signingContext, "thumbnail", { time, width });
263
229
  }
264
230
  return `${baseUrl}?time=${time}&width=${width}`;
@@ -374,15 +340,18 @@ function parseVTTCues(vttContent) {
374
340
  }
375
341
  return cues;
376
342
  }
377
- async function buildTranscriptUrl(playbackId, trackId, signingContext) {
343
+ async function buildTranscriptUrl(playbackId, trackId, shouldSign = false) {
344
+ "use step";
378
345
  const baseUrl = `https://stream.mux.com/${playbackId}/text/${trackId}.vtt`;
379
- if (signingContext) {
346
+ if (shouldSign) {
347
+ const signingContext = getMuxSigningContextFromEnv();
380
348
  return signUrl(baseUrl, playbackId, signingContext, "video");
381
349
  }
382
350
  return baseUrl;
383
351
  }
384
352
  async function fetchTranscriptForAsset(asset, playbackId, options = {}) {
385
- const { languageCode, cleanTranscript = true, signingContext } = options;
353
+ "use step";
354
+ const { languageCode, cleanTranscript = true, shouldSign } = options;
386
355
  const track = findCaptionTrack(asset, languageCode);
387
356
  if (!track) {
388
357
  return { transcriptText: "" };
@@ -390,7 +359,7 @@ async function fetchTranscriptForAsset(asset, playbackId, options = {}) {
390
359
  if (!track.id) {
391
360
  return { transcriptText: "", track };
392
361
  }
393
- const transcriptUrl = await buildTranscriptUrl(playbackId, track.id, signingContext);
362
+ const transcriptUrl = await buildTranscriptUrl(playbackId, track.id, shouldSign);
394
363
  try {
395
364
  const response = await fetch(transcriptUrl);
396
365
  if (!response.ok) {
@@ -424,130 +393,17 @@ __export(workflows_exports, {
424
393
  });
425
394
 
426
395
  // src/workflows/burned-in-captions.ts
427
- var import_ai = require("ai");
428
-
429
- // node_modules/dedent/dist/dedent.mjs
430
- function ownKeys(object, enumerableOnly) {
431
- var keys = Object.keys(object);
432
- if (Object.getOwnPropertySymbols) {
433
- var symbols = Object.getOwnPropertySymbols(object);
434
- enumerableOnly && (symbols = symbols.filter(function(sym) {
435
- return Object.getOwnPropertyDescriptor(object, sym).enumerable;
436
- })), keys.push.apply(keys, symbols);
437
- }
438
- return keys;
439
- }
440
- function _objectSpread(target) {
441
- for (var i = 1; i < arguments.length; i++) {
442
- var source = null != arguments[i] ? arguments[i] : {};
443
- i % 2 ? ownKeys(Object(source), true).forEach(function(key) {
444
- _defineProperty(target, key, source[key]);
445
- }) : Object.getOwnPropertyDescriptors ? Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)) : ownKeys(Object(source)).forEach(function(key) {
446
- Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key));
447
- });
448
- }
449
- return target;
450
- }
451
- function _defineProperty(obj, key, value) {
452
- key = _toPropertyKey(key);
453
- if (key in obj) {
454
- Object.defineProperty(obj, key, { value, enumerable: true, configurable: true, writable: true });
455
- } else {
456
- obj[key] = value;
457
- }
458
- return obj;
459
- }
460
- function _toPropertyKey(arg) {
461
- var key = _toPrimitive(arg, "string");
462
- return typeof key === "symbol" ? key : String(key);
463
- }
464
- function _toPrimitive(input, hint) {
465
- if (typeof input !== "object" || input === null) return input;
466
- var prim = input[Symbol.toPrimitive];
467
- if (prim !== void 0) {
468
- var res = prim.call(input, hint || "default");
469
- if (typeof res !== "object") return res;
470
- throw new TypeError("@@toPrimitive must return a primitive value.");
471
- }
472
- return (hint === "string" ? String : Number)(input);
473
- }
474
- var dedent = createDedent({});
475
- var dedent_default = dedent;
476
- function createDedent(options) {
477
- dedent2.withOptions = (newOptions) => createDedent(_objectSpread(_objectSpread({}, options), newOptions));
478
- return dedent2;
479
- function dedent2(strings, ...values) {
480
- const raw = typeof strings === "string" ? [strings] : strings.raw;
481
- const {
482
- alignValues = false,
483
- escapeSpecialCharacters = Array.isArray(strings),
484
- trimWhitespace = true
485
- } = options;
486
- let result = "";
487
- for (let i = 0; i < raw.length; i++) {
488
- let next = raw[i];
489
- if (escapeSpecialCharacters) {
490
- next = next.replace(/\\\n[ \t]*/g, "").replace(/\\`/g, "`").replace(/\\\$/g, "$").replace(/\\\{/g, "{");
491
- }
492
- result += next;
493
- if (i < values.length) {
494
- const value = alignValues ? alignValue(values[i], result) : values[i];
495
- result += value;
496
- }
497
- }
498
- const lines = result.split("\n");
499
- let mindent = null;
500
- for (const l of lines) {
501
- const m = l.match(/^(\s+)\S+/);
502
- if (m) {
503
- const indent = m[1].length;
504
- if (!mindent) {
505
- mindent = indent;
506
- } else {
507
- mindent = Math.min(mindent, indent);
508
- }
509
- }
510
- }
511
- if (mindent !== null) {
512
- const m = mindent;
513
- result = lines.map((l) => l[0] === " " || l[0] === " " ? l.slice(m) : l).join("\n");
514
- }
515
- if (trimWhitespace) {
516
- result = result.trim();
517
- }
518
- if (escapeSpecialCharacters) {
519
- result = result.replace(/\\n/g, "\n");
520
- }
521
- return result;
522
- }
523
- }
524
- function alignValue(value, precedingText) {
525
- if (typeof value !== "string" || !value.includes("\n")) {
526
- return value;
527
- }
528
- const currentLine = precedingText.slice(precedingText.lastIndexOf("\n") + 1);
529
- const indentMatch = currentLine.match(/^(\s+)/);
530
- if (indentMatch) {
531
- const indent = indentMatch[1];
532
- return value.replace(/\n/g, `
533
- ${indent}`);
534
- }
535
- return value;
536
- }
537
-
538
- // src/workflows/burned-in-captions.ts
539
- var import_zod2 = require("zod");
540
-
541
- // src/lib/client-factory.ts
542
- var import_mux_node2 = __toESM(require("@mux/mux-node"));
396
+ import { generateObject } from "ai";
397
+ import dedent from "dedent";
398
+ import { z as z2 } from "zod";
543
399
 
544
400
  // src/lib/providers.ts
545
- var import_anthropic = require("@ai-sdk/anthropic");
546
- var import_google = require("@ai-sdk/google");
547
- var import_openai = require("@ai-sdk/openai");
401
+ import { createAnthropic } from "@ai-sdk/anthropic";
402
+ import { createGoogleGenerativeAI } from "@ai-sdk/google";
403
+ import { createOpenAI } from "@ai-sdk/openai";
548
404
  var DEFAULT_LANGUAGE_MODELS = {
549
- openai: "gpt-5-mini",
550
- anthropic: "claude-haiku-4-5",
405
+ openai: "gpt-5.1",
406
+ anthropic: "claude-sonnet-4-5",
551
407
  google: "gemini-2.5-flash"
552
408
  };
553
409
  var DEFAULT_EMBEDDING_MODELS = {
@@ -560,14 +416,60 @@ function requireEnv(value, name) {
560
416
  }
561
417
  return value;
562
418
  }
419
+ function createLanguageModelFromConfig(provider, modelId) {
420
+ switch (provider) {
421
+ case "openai": {
422
+ const apiKey = env_default.OPENAI_API_KEY;
423
+ requireEnv(apiKey, "OPENAI_API_KEY");
424
+ const openai = createOpenAI({ apiKey });
425
+ return openai(modelId);
426
+ }
427
+ case "anthropic": {
428
+ const apiKey = env_default.ANTHROPIC_API_KEY;
429
+ requireEnv(apiKey, "ANTHROPIC_API_KEY");
430
+ const anthropic = createAnthropic({ apiKey });
431
+ return anthropic(modelId);
432
+ }
433
+ case "google": {
434
+ const apiKey = env_default.GOOGLE_GENERATIVE_AI_API_KEY;
435
+ requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
436
+ const google = createGoogleGenerativeAI({ apiKey });
437
+ return google(modelId);
438
+ }
439
+ default: {
440
+ const exhaustiveCheck = provider;
441
+ throw new Error(`Unsupported provider: ${exhaustiveCheck}`);
442
+ }
443
+ }
444
+ }
445
+ function createEmbeddingModelFromConfig(provider, modelId) {
446
+ switch (provider) {
447
+ case "openai": {
448
+ const apiKey = env_default.OPENAI_API_KEY;
449
+ requireEnv(apiKey, "OPENAI_API_KEY");
450
+ const openai = createOpenAI({ apiKey });
451
+ return openai.embedding(modelId);
452
+ }
453
+ case "google": {
454
+ const apiKey = env_default.GOOGLE_GENERATIVE_AI_API_KEY;
455
+ requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
456
+ const google = createGoogleGenerativeAI({ apiKey });
457
+ return google.textEmbeddingModel(modelId);
458
+ }
459
+ default: {
460
+ const exhaustiveCheck = provider;
461
+ throw new Error(`Unsupported embedding provider: ${exhaustiveCheck}`);
462
+ }
463
+ }
464
+ }
563
465
  function resolveLanguageModel(options = {}) {
564
466
  const provider = options.provider || "openai";
565
467
  const modelId = options.model || DEFAULT_LANGUAGE_MODELS[provider];
566
468
  switch (provider) {
567
469
  case "openai": {
568
- const apiKey = options.openaiApiKey ?? env_default.OPENAI_API_KEY;
470
+ const apiKey = env_default.OPENAI_API_KEY;
569
471
  requireEnv(apiKey, "OPENAI_API_KEY");
570
- const openai = (0, import_openai.createOpenAI)({
472
+ const openai = createOpenAI({
571
473
  apiKey
572
474
  });
573
475
  return {
@@ -577,9 +479,9 @@ function resolveLanguageModel(options = {}) {
577
479
  };
578
480
  }
579
481
  case "anthropic": {
580
- const apiKey = options.anthropicApiKey ?? env_default.ANTHROPIC_API_KEY;
482
+ const apiKey = env_default.ANTHROPIC_API_KEY;
581
483
  requireEnv(apiKey, "ANTHROPIC_API_KEY");
582
- const anthropic = (0, import_anthropic.createAnthropic)({
484
+ const anthropic = createAnthropic({
583
485
  apiKey
584
486
  });
585
487
  return {
@@ -589,9 +491,9 @@ function resolveLanguageModel(options = {}) {
589
491
  };
590
492
  }
591
493
  case "google": {
592
- const apiKey = options.googleApiKey ?? env_default.GOOGLE_GENERATIVE_AI_API_KEY;
494
+ const apiKey = env_default.GOOGLE_GENERATIVE_AI_API_KEY;
593
495
  requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
594
- const google = (0, import_google.createGoogleGenerativeAI)({
496
+ const google = createGoogleGenerativeAI({
595
497
  apiKey
596
498
  });
597
499
  return {
@@ -611,9 +513,9 @@ function resolveEmbeddingModel(options = {}) {
611
513
  const modelId = options.model || DEFAULT_EMBEDDING_MODELS[provider];
612
514
  switch (provider) {
613
515
  case "openai": {
614
- const apiKey = options.openaiApiKey ?? env_default.OPENAI_API_KEY;
516
+ const apiKey = env_default.OPENAI_API_KEY;
615
517
  requireEnv(apiKey, "OPENAI_API_KEY");
616
- const openai = (0, import_openai.createOpenAI)({
518
+ const openai = createOpenAI({
617
519
  apiKey
618
520
  });
619
521
  return {
@@ -623,9 +525,9 @@ function resolveEmbeddingModel(options = {}) {
623
525
  };
624
526
  }
625
527
  case "google": {
626
- const apiKey = options.googleApiKey ?? env_default.GOOGLE_GENERATIVE_AI_API_KEY;
528
+ const apiKey = env_default.GOOGLE_GENERATIVE_AI_API_KEY;
627
529
  requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
628
- const google = (0, import_google.createGoogleGenerativeAI)({
530
+ const google = createGoogleGenerativeAI({
629
531
  apiKey
630
532
  });
631
533
  return {
@@ -642,12 +544,45 @@ function resolveEmbeddingModel(options = {}) {
642
544
  }
643
545
 
644
546
  // src/lib/client-factory.ts
645
- function validateCredentials(options, requiredProvider) {
646
- const muxTokenId = options.muxTokenId ?? env_default.MUX_TOKEN_ID;
647
- const muxTokenSecret = options.muxTokenSecret ?? env_default.MUX_TOKEN_SECRET;
648
- const openaiApiKey = options.openaiApiKey ?? env_default.OPENAI_API_KEY;
649
- const anthropicApiKey = options.anthropicApiKey ?? env_default.ANTHROPIC_API_KEY;
650
- const googleApiKey = options.googleApiKey ?? env_default.GOOGLE_GENERATIVE_AI_API_KEY;
547
+ function getMuxCredentialsFromEnv() {
548
+ const muxTokenId = env_default.MUX_TOKEN_ID;
549
+ const muxTokenSecret = env_default.MUX_TOKEN_SECRET;
550
+ if (!muxTokenId || !muxTokenSecret) {
551
+ throw new Error(
552
+ "Mux credentials are required. Set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables."
553
+ );
554
+ }
555
+ return { muxTokenId, muxTokenSecret };
556
+ }
557
+ function getApiKeyFromEnv(provider) {
558
+ const envVarMap = {
559
+ openai: env_default.OPENAI_API_KEY,
560
+ anthropic: env_default.ANTHROPIC_API_KEY,
561
+ google: env_default.GOOGLE_GENERATIVE_AI_API_KEY,
562
+ hive: env_default.HIVE_API_KEY,
563
+ elevenlabs: env_default.ELEVENLABS_API_KEY
564
+ };
565
+ const apiKey = envVarMap[provider];
566
+ if (!apiKey) {
567
+ const envVarNames = {
568
+ openai: "OPENAI_API_KEY",
569
+ anthropic: "ANTHROPIC_API_KEY",
570
+ google: "GOOGLE_GENERATIVE_AI_API_KEY",
571
+ hive: "HIVE_API_KEY",
572
+ elevenlabs: "ELEVENLABS_API_KEY"
573
+ };
574
+ throw new Error(
575
+ `${provider} API key is required. Set ${envVarNames[provider]} environment variable.`
576
+ );
577
+ }
578
+ return apiKey;
579
+ }
580
+ async function validateCredentials(requiredProvider) {
581
+ const muxTokenId = env_default.MUX_TOKEN_ID;
582
+ const muxTokenSecret = env_default.MUX_TOKEN_SECRET;
583
+ const openaiApiKey = env_default.OPENAI_API_KEY;
584
+ const anthropicApiKey = env_default.ANTHROPIC_API_KEY;
585
+ const googleApiKey = env_default.GOOGLE_GENERATIVE_AI_API_KEY;
651
586
  if (!muxTokenId || !muxTokenSecret) {
652
587
  throw new Error(
653
588
  "Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables."
@@ -676,32 +611,23 @@ function validateCredentials(options, requiredProvider) {
676
611
  googleApiKey
677
612
  };
678
613
  }
679
- function createMuxClient(credentials) {
680
- if (!credentials.muxTokenId || !credentials.muxTokenSecret) {
681
- throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
682
- }
683
- return new import_mux_node2.default({
684
- tokenId: credentials.muxTokenId,
685
- tokenSecret: credentials.muxTokenSecret
686
- });
687
- }
688
- function createWorkflowClients(options, provider) {
614
+ async function createWorkflowConfig(options, provider) {
689
615
  const providerToUse = provider || options.provider || "openai";
690
- const credentials = validateCredentials(options, providerToUse);
691
- const languageModel = resolveLanguageModel({
616
+ const credentials = await validateCredentials(providerToUse);
617
+ const resolved = resolveLanguageModel({
692
618
  ...options,
693
619
  provider: providerToUse
694
620
  });
695
621
  return {
696
- mux: createMuxClient(credentials),
697
- languageModel,
698
- credentials
622
+ credentials,
623
+ provider: resolved.provider,
624
+ modelId: resolved.modelId
699
625
  };
700
626
  }
701
627
 
702
628
  // src/lib/image-download.ts
703
- var import_node_buffer = require("buffer");
704
- var import_p_retry = __toESM(require("p-retry"));
629
+ import { Buffer as Buffer2 } from "buffer";
630
+ import pRetry, { AbortError } from "p-retry";
705
631
  var DEFAULT_OPTIONS = {
706
632
  timeout: 1e4,
707
633
  retries: 3,
@@ -710,9 +636,10 @@ var DEFAULT_OPTIONS = {
710
636
  exponentialBackoff: true
711
637
  };
712
638
  async function downloadImageAsBase64(url, options = {}) {
639
+ "use step";
713
640
  const opts = { ...DEFAULT_OPTIONS, ...options };
714
641
  let attemptCount = 0;
715
- return (0, import_p_retry.default)(
642
+ return pRetry(
716
643
  async () => {
717
644
  attemptCount++;
718
645
  const controller = new AbortController();
@@ -727,18 +654,18 @@ async function downloadImageAsBase64(url, options = {}) {
727
654
  clearTimeout(timeoutId);
728
655
  if (!response.ok) {
729
656
  if (response.status >= 400 && response.status < 500 && response.status !== 429) {
730
- throw new import_p_retry.AbortError(`HTTP ${response.status}: ${response.statusText}`);
657
+ throw new AbortError(`HTTP ${response.status}: ${response.statusText}`);
731
658
  }
732
659
  throw new Error(`HTTP ${response.status}: ${response.statusText}`);
733
660
  }
734
661
  const contentType = response.headers.get("content-type");
735
662
  if (!contentType?.startsWith("image/")) {
736
- throw new import_p_retry.AbortError(`Invalid content type: ${contentType}. Expected image/*`);
663
+ throw new AbortError(`Invalid content type: ${contentType}. Expected image/*`);
737
664
  }
738
665
  const arrayBuffer = await response.arrayBuffer();
739
- const buffer = import_node_buffer.Buffer.from(arrayBuffer);
666
+ const buffer = Buffer2.from(arrayBuffer);
740
667
  if (buffer.length === 0) {
741
- throw new import_p_retry.AbortError("Downloaded image is empty");
668
+ throw new AbortError("Downloaded image is empty");
742
669
  }
743
670
  const base64Data = `data:${contentType};base64,${buffer.toString("base64")}`;
744
671
  return {
@@ -751,7 +678,7 @@ async function downloadImageAsBase64(url, options = {}) {
751
678
  };
752
679
  } catch (error) {
753
680
  clearTimeout(timeoutId);
754
- if (error instanceof import_p_retry.AbortError) {
681
+ if (error instanceof AbortError) {
755
682
  throw error;
756
683
  }
757
684
  if (error instanceof Error) {
@@ -780,6 +707,7 @@ async function downloadImageAsBase64(url, options = {}) {
780
707
  );
781
708
  }
782
709
  async function downloadImagesAsBase64(urls, options = {}, maxConcurrent = 5) {
710
+ "use step";
783
711
  const results = [];
784
712
  for (let i = 0; i < urls.length; i += maxConcurrent) {
785
713
  const batch = urls.slice(i, i + maxConcurrent);
@@ -791,6 +719,7 @@ async function downloadImagesAsBase64(urls, options = {}, maxConcurrent = 5) {
791
719
  }
792
720
 
793
721
  // src/lib/mux-assets.ts
722
+ import Mux2 from "@mux/mux-node";
794
723
  function getPlaybackId(asset) {
795
724
  const playbackIds = asset.playback_ids || [];
796
725
  const publicPlaybackId = playbackIds.find((pid) => pid.policy === "public");
@@ -805,7 +734,13 @@ function getPlaybackId(asset) {
805
734
  "No public or signed playback ID found for this asset. A public or signed playback ID is required. DRM playback IDs are not currently supported."
806
735
  );
807
736
  }
808
- async function getPlaybackIdForAsset(mux, assetId) {
737
+ async function getPlaybackIdForAsset(assetId) {
738
+ "use step";
739
+ const { muxTokenId, muxTokenSecret } = getMuxCredentialsFromEnv();
740
+ const mux = new Mux2({
741
+ tokenId: muxTokenId,
742
+ tokenSecret: muxTokenSecret
743
+ });
809
744
  const asset = await mux.video.assets.retrieve(assetId);
810
745
  const { id: playbackId, policy } = getPlaybackId(asset);
811
746
  return { asset, playbackId, policy };
@@ -844,8 +779,8 @@ function resolveSection(defaultSection, override) {
844
779
  }
845
780
  return override;
846
781
  }
847
- function createPromptBuilder(config2) {
848
- const { template, sectionOrder } = config2;
782
+ function createPromptBuilder(config) {
783
+ const { template, sectionOrder } = config;
849
784
  const getSection = (section, override) => {
850
785
  const resolved = resolveSection(template[section], override);
851
786
  return renderSection(resolved);
@@ -886,12 +821,12 @@ function createToneSection(instruction) {
886
821
  }
887
822
 
888
823
  // src/workflows/burned-in-captions.ts
889
- var burnedInCaptionsSchema = import_zod2.z.object({
890
- hasBurnedInCaptions: import_zod2.z.boolean(),
891
- confidence: import_zod2.z.number().min(0).max(1),
892
- detectedLanguage: import_zod2.z.string().nullable()
824
+ var burnedInCaptionsSchema = z2.object({
825
+ hasBurnedInCaptions: z2.boolean(),
826
+ confidence: z2.number().min(0).max(1),
827
+ detectedLanguage: z2.string().nullable()
893
828
  });
894
- var SYSTEM_PROMPT = dedent_default`
829
+ var SYSTEM_PROMPT = dedent`
895
830
  <role>
896
831
  You are an expert at analyzing video frames to detect burned-in captions (also called open captions or hardcoded subtitles).
897
832
  These are text overlays that are permanently embedded in the video image, common on TikTok, Instagram Reels, and other social media platforms.
@@ -934,14 +869,14 @@ var burnedInCaptionsPromptBuilder = createPromptBuilder({
934
869
  template: {
935
870
  task: {
936
871
  tag: "task",
937
- content: dedent_default`
872
+ content: dedent`
938
873
  Analyze the provided video storyboard to detect burned-in captions (hardcoded subtitles).
939
874
  Count frames with text vs no text, note position consistency and whether text changes across frames.
940
875
  Decide if captions exist, with confidence (0.0-1.0) and detected language if any.`
941
876
  },
942
877
  analysisSteps: {
943
878
  tag: "analysis_steps",
944
- content: dedent_default`
879
+ content: dedent`
945
880
  1. COUNT how many frames contain text overlays vs. how many don't
946
881
  2. Check if text appears in consistent positions across multiple frames
947
882
  3. Verify text changes content between frames (indicating dialogue/narration)
@@ -950,7 +885,7 @@ var burnedInCaptionsPromptBuilder = createPromptBuilder({
950
885
  },
951
886
  positiveIndicators: {
952
887
  tag: "classify_as_captions",
953
- content: dedent_default`
888
+ content: dedent`
954
889
  ONLY classify as burned-in captions if:
955
890
  - Text appears in multiple frames (not just 1-2 end frames)
956
891
  - Text positioning is consistent across those frames
@@ -959,7 +894,7 @@ var burnedInCaptionsPromptBuilder = createPromptBuilder({
959
894
  },
960
895
  negativeIndicators: {
961
896
  tag: "not_captions",
962
- content: dedent_default`
897
+ content: dedent`
963
898
  DO NOT classify as burned-in captions:
964
899
  - Marketing taglines appearing only in final 1-2 frames
965
900
  - Single words or phrases that don't change between frames
@@ -974,65 +909,90 @@ function buildUserPrompt(promptOverrides) {
974
909
  return burnedInCaptionsPromptBuilder.build(promptOverrides);
975
910
  }
976
911
  var DEFAULT_PROVIDER = "openai";
912
+ async function fetchImageAsBase64(imageUrl, imageDownloadOptions) {
913
+ "use step";
914
+ const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
915
+ return downloadResult.base64Data;
916
+ }
917
+ async function analyzeStoryboard({
918
+ imageDataUrl,
919
+ provider,
920
+ modelId,
921
+ userPrompt,
922
+ systemPrompt
923
+ }) {
924
+ "use step";
925
+ const model = createLanguageModelFromConfig(provider, modelId);
926
+ const response = await generateObject({
927
+ model,
928
+ schema: burnedInCaptionsSchema,
929
+ experimental_telemetry: { isEnabled: true },
930
+ messages: [
931
+ {
932
+ role: "system",
933
+ content: systemPrompt
934
+ },
935
+ {
936
+ role: "user",
937
+ content: [
938
+ { type: "text", text: userPrompt },
939
+ { type: "image", image: imageDataUrl }
940
+ ]
941
+ }
942
+ ]
943
+ });
944
+ return {
945
+ result: response.object,
946
+ usage: {
947
+ inputTokens: response.usage.inputTokens,
948
+ outputTokens: response.usage.outputTokens,
949
+ totalTokens: response.usage.totalTokens,
950
+ reasoningTokens: response.usage.reasoningTokens,
951
+ cachedInputTokens: response.usage.cachedInputTokens
952
+ }
953
+ };
954
+ }
977
955
  async function hasBurnedInCaptions(assetId, options = {}) {
956
+ "use workflow";
978
957
  const {
979
958
  provider = DEFAULT_PROVIDER,
980
959
  model,
981
960
  imageSubmissionMode = "url",
982
961
  imageDownloadOptions,
983
962
  promptOverrides,
984
- ...config2
963
+ ...config
985
964
  } = options;
986
965
  const userPrompt = buildUserPrompt(promptOverrides);
987
- const clients = createWorkflowClients(
988
- { ...config2, model },
966
+ const workflowConfig = await createWorkflowConfig(
967
+ { ...config, model },
989
968
  provider
990
969
  );
991
- const { playbackId, policy } = await getPlaybackIdForAsset(clients.mux, assetId);
992
- const signingContext = resolveSigningContext(options);
970
+ const { playbackId, policy } = await getPlaybackIdForAsset(assetId);
971
+ const signingContext = getMuxSigningContextFromEnv();
993
972
  if (policy === "signed" && !signingContext) {
994
973
  throw new Error(
995
974
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
996
975
  );
997
976
  }
998
- const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed" ? signingContext : void 0);
999
- const analyzeStoryboard = async (imageDataUrl) => {
1000
- const response = await (0, import_ai.generateObject)({
1001
- model: clients.languageModel.model,
1002
- schema: burnedInCaptionsSchema,
1003
- abortSignal: options.abortSignal,
1004
- experimental_telemetry: { isEnabled: true },
1005
- messages: [
1006
- {
1007
- role: "system",
1008
- content: SYSTEM_PROMPT
1009
- },
1010
- {
1011
- role: "user",
1012
- content: [
1013
- { type: "text", text: userPrompt },
1014
- { type: "image", image: imageDataUrl }
1015
- ]
1016
- }
1017
- ]
1018
- });
1019
- return {
1020
- result: response.object,
1021
- usage: {
1022
- inputTokens: response.usage.inputTokens,
1023
- outputTokens: response.usage.outputTokens,
1024
- totalTokens: response.usage.totalTokens,
1025
- reasoningTokens: response.usage.reasoningTokens,
1026
- cachedInputTokens: response.usage.cachedInputTokens
1027
- }
1028
- };
1029
- };
977
+ const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed");
1030
978
  let analysisResponse;
1031
979
  if (imageSubmissionMode === "base64") {
1032
- const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
1033
- analysisResponse = await analyzeStoryboard(downloadResult.base64Data);
980
+ const base64Data = await fetchImageAsBase64(imageUrl, imageDownloadOptions);
981
+ analysisResponse = await analyzeStoryboard({
982
+ imageDataUrl: base64Data,
983
+ provider: workflowConfig.provider,
984
+ modelId: workflowConfig.modelId,
985
+ userPrompt,
986
+ systemPrompt: SYSTEM_PROMPT
987
+ });
1034
988
  } else {
1035
- analysisResponse = await analyzeStoryboard(imageUrl);
989
+ analysisResponse = await analyzeStoryboard({
990
+ imageDataUrl: imageUrl,
991
+ provider: workflowConfig.provider,
992
+ modelId: workflowConfig.modelId,
993
+ userPrompt,
994
+ systemPrompt: SYSTEM_PROMPT
995
+ });
1036
996
  }
1037
997
  if (!analysisResponse.result) {
1038
998
  throw new Error("No analysis result received from AI provider");
@@ -1048,8 +1008,8 @@ async function hasBurnedInCaptions(assetId, options = {}) {
1048
1008
  }
1049
1009
 
1050
1010
  // src/workflows/chapters.ts
1051
- var import_ai2 = require("ai");
1052
- var import_zod3 = require("zod");
1011
+ import { generateObject as generateObject2 } from "ai";
1012
+ import { z as z3 } from "zod";
1053
1013
 
1054
1014
  // src/lib/retry.ts
1055
1015
  var DEFAULT_RETRY_OPTIONS = {
@@ -1081,25 +1041,50 @@ async function withRetry(fn, {
1081
1041
  if (isLastAttempt || !shouldRetry(lastError, attempt + 1)) {
1082
1042
  throw lastError;
1083
1043
  }
1084
- const delay2 = calculateDelay(attempt + 1, baseDelay, maxDelay);
1044
+ const delay = calculateDelay(attempt + 1, baseDelay, maxDelay);
1085
1045
  console.warn(
1086
- `Attempt ${attempt + 1} failed: ${lastError.message}. Retrying in ${Math.round(delay2)}ms...`
1046
+ `Attempt ${attempt + 1} failed: ${lastError.message}. Retrying in ${Math.round(delay)}ms...`
1087
1047
  );
1088
- await new Promise((resolve) => setTimeout(resolve, delay2));
1048
+ await new Promise((resolve) => setTimeout(resolve, delay));
1089
1049
  }
1090
1050
  }
1091
1051
  throw lastError || new Error("Retry failed with unknown error");
1092
1052
  }
1093
1053
 
1094
1054
  // src/workflows/chapters.ts
1095
- var chapterSchema = import_zod3.z.object({
1096
- startTime: import_zod3.z.number(),
1097
- title: import_zod3.z.string()
1055
+ var chapterSchema = z3.object({
1056
+ startTime: z3.number(),
1057
+ title: z3.string()
1098
1058
  });
1099
- var chaptersSchema = import_zod3.z.object({
1100
- chapters: import_zod3.z.array(chapterSchema)
1059
+ var chaptersSchema = z3.object({
1060
+ chapters: z3.array(chapterSchema)
1101
1061
  });
1102
- var DEFAULT_PROVIDER2 = "openai";
1062
+ async function generateChaptersWithAI({
1063
+ provider,
1064
+ modelId,
1065
+ timestampedTranscript,
1066
+ systemPrompt
1067
+ }) {
1068
+ "use step";
1069
+ const model = createLanguageModelFromConfig(provider, modelId);
1070
+ const response = await withRetry(
1071
+ () => generateObject2({
1072
+ model,
1073
+ schema: chaptersSchema,
1074
+ messages: [
1075
+ {
1076
+ role: "system",
1077
+ content: systemPrompt
1078
+ },
1079
+ {
1080
+ role: "user",
1081
+ content: timestampedTranscript
1082
+ }
1083
+ ]
1084
+ })
1085
+ );
1086
+ return response.object;
1087
+ }
1103
1088
  var SYSTEM_PROMPT2 = `Your role is to segment the following captions into chunked chapters, summarising each chapter with a title.
1104
1089
 
1105
1090
  Analyze the transcript and create logical chapter breaks based on topic changes, major transitions, or distinct sections of content. Each chapter should represent a meaningful segment of the video.
@@ -1121,10 +1106,11 @@ Important rules:
1121
1106
  - Do not include any text before or after the JSON
1122
1107
  - The JSON must be valid and parseable`;
1123
1108
  async function generateChapters(assetId, languageCode, options = {}) {
1124
- const { provider = DEFAULT_PROVIDER2, model, abortSignal } = options;
1125
- const clients = createWorkflowClients({ ...options, model }, provider);
1126
- const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(clients.mux, assetId);
1127
- const signingContext = resolveSigningContext(options);
1109
+ "use workflow";
1110
+ const { provider = "openai", model } = options;
1111
+ const config = await createWorkflowConfig({ ...options, model }, provider);
1112
+ const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(assetId);
1113
+ const signingContext = getMuxSigningContextFromEnv();
1128
1114
  if (policy === "signed" && !signingContext) {
1129
1115
  throw new Error(
1130
1116
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
@@ -1134,7 +1120,7 @@ async function generateChapters(assetId, languageCode, options = {}) {
1134
1120
  languageCode,
1135
1121
  cleanTranscript: false,
1136
1122
  // keep timestamps for chapter segmentation
1137
- signingContext: policy === "signed" ? signingContext : void 0
1123
+ shouldSign: policy === "signed"
1138
1124
  });
1139
1125
  if (!transcriptResult.track || !transcriptResult.transcriptText) {
1140
1126
  const availableLanguages = getReadyTextTracks(assetData).map((t) => t.language_code).filter(Boolean).join(", ");
@@ -1148,24 +1134,12 @@ async function generateChapters(assetId, languageCode, options = {}) {
1148
1134
  }
1149
1135
  let chaptersData = null;
1150
1136
  try {
1151
- const response = await withRetry(
1152
- () => (0, import_ai2.generateObject)({
1153
- model: clients.languageModel.model,
1154
- schema: chaptersSchema,
1155
- abortSignal,
1156
- messages: [
1157
- {
1158
- role: "system",
1159
- content: SYSTEM_PROMPT2
1160
- },
1161
- {
1162
- role: "user",
1163
- content: timestampedTranscript
1164
- }
1165
- ]
1166
- })
1167
- );
1168
- chaptersData = response.object;
1137
+ chaptersData = await generateChaptersWithAI({
1138
+ provider: config.provider,
1139
+ modelId: config.modelId,
1140
+ timestampedTranscript,
1141
+ systemPrompt: SYSTEM_PROMPT2
1142
+ });
1169
1143
  } catch (error) {
1170
1144
  throw new Error(
1171
1145
  `Failed to generate chapters with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
@@ -1189,14 +1163,7 @@ async function generateChapters(assetId, languageCode, options = {}) {
1189
1163
  }
1190
1164
 
1191
1165
  // src/workflows/embeddings.ts
1192
- var import_ai3 = require("ai");
1193
- var DEFAULT_PROVIDER3 = "openai";
1194
- var DEFAULT_CHUNKING_STRATEGY = {
1195
- type: "token",
1196
- maxTokens: 500,
1197
- overlap: 100
1198
- };
1199
- var DEFAULT_BATCH_SIZE = 5;
1166
+ import { embed } from "ai";
1200
1167
  function averageEmbeddings(embeddings) {
1201
1168
  if (embeddings.length === 0) {
1202
1169
  return [];
@@ -1213,51 +1180,41 @@ function averageEmbeddings(embeddings) {
1213
1180
  }
1214
1181
  return averaged;
1215
1182
  }
1216
- async function generateChunkEmbeddings(chunks, model, batchSize, abortSignal) {
1217
- const results = [];
1218
- for (let i = 0; i < chunks.length; i += batchSize) {
1219
- const batch = chunks.slice(i, i + batchSize);
1220
- const batchResults = await Promise.all(
1221
- batch.map(async (chunk) => {
1222
- const response = await withRetry(
1223
- () => (0, import_ai3.embed)({
1224
- model,
1225
- value: chunk.text,
1226
- abortSignal
1227
- })
1228
- );
1229
- return {
1230
- chunkId: chunk.id,
1231
- embedding: response.embedding,
1232
- metadata: {
1233
- startTime: chunk.startTime,
1234
- endTime: chunk.endTime,
1235
- tokenCount: chunk.tokenCount
1236
- }
1237
- };
1238
- })
1239
- );
1240
- results.push(...batchResults);
1241
- }
1242
- return results;
1183
+ async function generateSingleChunkEmbedding({
1184
+ chunk,
1185
+ provider,
1186
+ modelId
1187
+ }) {
1188
+ "use step";
1189
+ const model = createEmbeddingModelFromConfig(provider, modelId);
1190
+ const response = await withRetry(
1191
+ () => embed({
1192
+ model,
1193
+ value: chunk.text
1194
+ })
1195
+ );
1196
+ return {
1197
+ chunkId: chunk.id,
1198
+ embedding: response.embedding,
1199
+ metadata: {
1200
+ startTime: chunk.startTime,
1201
+ endTime: chunk.endTime,
1202
+ tokenCount: chunk.tokenCount
1203
+ }
1204
+ };
1243
1205
  }
1244
1206
  async function generateVideoEmbeddings(assetId, options = {}) {
1207
+ "use workflow";
1245
1208
  const {
1246
- provider = DEFAULT_PROVIDER3,
1209
+ provider = "openai",
1247
1210
  model,
1248
1211
  languageCode,
1249
- chunkingStrategy = DEFAULT_CHUNKING_STRATEGY,
1250
- batchSize = DEFAULT_BATCH_SIZE,
1251
- abortSignal
1212
+ chunkingStrategy = { type: "token", maxTokens: 500, overlap: 100 },
1213
+ batchSize = 5
1252
1214
  } = options;
1253
- const credentials = validateCredentials(options, provider === "google" ? "google" : "openai");
1254
- const muxClient = createMuxClient(credentials);
1255
1215
  const embeddingModel = resolveEmbeddingModel({ ...options, provider, model });
1256
- const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(
1257
- muxClient,
1258
- assetId
1259
- );
1260
- const signingContext = resolveSigningContext(options);
1216
+ const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(assetId);
1217
+ const signingContext = getMuxSigningContextFromEnv();
1261
1218
  if (policy === "signed" && !signingContext) {
1262
1219
  throw new Error(
1263
1220
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
@@ -1267,7 +1224,7 @@ async function generateVideoEmbeddings(assetId, options = {}) {
1267
1224
  const transcriptResult = await fetchTranscriptForAsset(assetData, playbackId, {
1268
1225
  languageCode,
1269
1226
  cleanTranscript: !useVttChunking,
1270
- signingContext: policy === "signed" ? signingContext : void 0
1227
+ shouldSign: policy === "signed"
1271
1228
  });
1272
1229
  if (!transcriptResult.track || !transcriptResult.transcriptText) {
1273
1230
  const availableLanguages = getReadyTextTracks(assetData).map((t) => t.language_code).filter(Boolean).join(", ");
@@ -1287,14 +1244,21 @@ async function generateVideoEmbeddings(assetId, options = {}) {
1287
1244
  if (chunks.length === 0) {
1288
1245
  throw new Error("No chunks generated from transcript");
1289
1246
  }
1290
- let chunkEmbeddings;
1247
+ const chunkEmbeddings = [];
1291
1248
  try {
1292
- chunkEmbeddings = await generateChunkEmbeddings(
1293
- chunks,
1294
- embeddingModel.model,
1295
- batchSize,
1296
- abortSignal
1297
- );
1249
+ for (let i = 0; i < chunks.length; i += batchSize) {
1250
+ const batch = chunks.slice(i, i + batchSize);
1251
+ const batchResults = await Promise.all(
1252
+ batch.map(
1253
+ (chunk) => generateSingleChunkEmbedding({
1254
+ chunk,
1255
+ provider: embeddingModel.provider,
1256
+ modelId: embeddingModel.modelId
1257
+ })
1258
+ )
1259
+ );
1260
+ chunkEmbeddings.push(...batchResults);
1261
+ }
1298
1262
  } catch (error) {
1299
1263
  throw new Error(
1300
1264
  `Failed to generate embeddings with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
@@ -1326,7 +1290,7 @@ var DEFAULT_THRESHOLDS = {
1326
1290
  sexual: 0.7,
1327
1291
  violence: 0.8
1328
1292
  };
1329
- var DEFAULT_PROVIDER4 = "openai";
1293
+ var DEFAULT_PROVIDER2 = "openai";
1330
1294
  var HIVE_ENDPOINT = "https://api.thehive.ai/api/v2/task/sync";
1331
1295
  var HIVE_SEXUAL_CATEGORIES = [
1332
1296
  "general_nsfw",
@@ -1364,6 +1328,7 @@ var HIVE_VIOLENCE_CATEGORIES = [
1364
1328
  "garm_death_injury_or_military_conflict"
1365
1329
  ];
1366
1330
  async function processConcurrently(items, processor, maxConcurrent = 5) {
1331
+ "use step";
1367
1332
  const results = [];
1368
1333
  for (let i = 0; i < items.length; i += maxConcurrent) {
1369
1334
  const batch = items.slice(i, i + maxConcurrent);
@@ -1373,11 +1338,14 @@ async function processConcurrently(items, processor, maxConcurrent = 5) {
1373
1338
  }
1374
1339
  return results;
1375
1340
  }
1376
- async function requestOpenAIModeration(imageUrls, apiKey, model, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
1341
+ async function requestOpenAIModeration(imageUrls, model, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
1342
+ "use step";
1377
1343
  const targetUrls = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map(
1378
- (img) => ({ url: img.url, image: img.base64Data })
1379
- ) : imageUrls.map((url) => ({ url, image: url }));
1344
+ (img) => ({ url: img.url, image: img.base64Data, model })
1345
+ ) : imageUrls.map((url) => ({ url, image: url, model }));
1380
1346
  const moderate = async (entry) => {
1347
+ "use step";
1348
+ const apiKey = getApiKeyFromEnv("openai");
1381
1349
  try {
1382
1350
  const res = await fetch("https://api.openai.com/v1/moderations", {
1383
1351
  method: "POST",
@@ -1386,7 +1354,7 @@ async function requestOpenAIModeration(imageUrls, apiKey, model, maxConcurrent =
1386
1354
  "Authorization": `Bearer ${apiKey}`
1387
1355
  },
1388
1356
  body: JSON.stringify({
1389
- model,
1357
+ model: entry.model,
1390
1358
  input: [
1391
1359
  {
1392
1360
  type: "image_url",
@@ -1429,7 +1397,8 @@ function getHiveCategoryScores(classes, categoryNames) {
1429
1397
  const scores = categoryNames.map((category) => scoreMap[category] || 0);
1430
1398
  return Math.max(...scores, 0);
1431
1399
  }
1432
- async function requestHiveModeration(imageUrls, apiKey, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
1400
+ async function requestHiveModeration(imageUrls, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
1401
+ "use step";
1433
1402
  const targets = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map((img) => ({
1434
1403
  url: img.url,
1435
1404
  source: {
@@ -1442,6 +1411,8 @@ async function requestHiveModeration(imageUrls, apiKey, maxConcurrent = 5, submi
1442
1411
  source: { kind: "url", value: url }
1443
1412
  }));
1444
1413
  const moderate = async (entry) => {
1414
+ "use step";
1415
+ const apiKey = getApiKeyFromEnv("hive");
1445
1416
  try {
1446
1417
  const formData = new FormData();
1447
1418
  if (entry.source.kind === "url") {
@@ -1487,8 +1458,9 @@ async function requestHiveModeration(imageUrls, apiKey, maxConcurrent = 5, submi
1487
1458
  return processConcurrently(targets, moderate, maxConcurrent);
1488
1459
  }
1489
1460
  async function getModerationScores(assetId, options = {}) {
1461
+ "use workflow";
1490
1462
  const {
1491
- provider = DEFAULT_PROVIDER4,
1463
+ provider = DEFAULT_PROVIDER2,
1492
1464
  model = provider === "openai" ? "omni-moderation-latest" : void 0,
1493
1465
  thresholds = DEFAULT_THRESHOLDS,
1494
1466
  thumbnailInterval = 10,
@@ -1497,11 +1469,9 @@ async function getModerationScores(assetId, options = {}) {
1497
1469
  imageSubmissionMode = "url",
1498
1470
  imageDownloadOptions
1499
1471
  } = options;
1500
- const credentials = validateCredentials(options, provider === "openai" ? "openai" : void 0);
1501
- const muxClient = createMuxClient(credentials);
1502
- const { asset, playbackId, policy } = await getPlaybackIdForAsset(muxClient, assetId);
1472
+ const { asset, playbackId, policy } = await getPlaybackIdForAsset(assetId);
1503
1473
  const duration = asset.duration || 0;
1504
- const signingContext = resolveSigningContext(options);
1474
+ const signingContext = getMuxSigningContextFromEnv();
1505
1475
  if (policy === "signed" && !signingContext) {
1506
1476
  throw new Error(
1507
1477
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
@@ -1510,30 +1480,20 @@ async function getModerationScores(assetId, options = {}) {
1510
1480
  const thumbnailUrls = await getThumbnailUrls(playbackId, duration, {
1511
1481
  interval: thumbnailInterval,
1512
1482
  width: thumbnailWidth,
1513
- signingContext: policy === "signed" ? signingContext : void 0
1483
+ shouldSign: policy === "signed"
1514
1484
  });
1515
1485
  let thumbnailScores;
1516
1486
  if (provider === "openai") {
1517
- const apiKey = credentials.openaiApiKey;
1518
- if (!apiKey) {
1519
- throw new Error("OpenAI API key is required for moderation. Set OPENAI_API_KEY or pass openaiApiKey.");
1520
- }
1521
1487
  thumbnailScores = await requestOpenAIModeration(
1522
1488
  thumbnailUrls,
1523
- apiKey,
1524
1489
  model || "omni-moderation-latest",
1525
1490
  maxConcurrent,
1526
1491
  imageSubmissionMode,
1527
1492
  imageDownloadOptions
1528
1493
  );
1529
1494
  } else if (provider === "hive") {
1530
- const hiveApiKey = options.hiveApiKey || env_default.HIVE_API_KEY;
1531
- if (!hiveApiKey) {
1532
- throw new Error("Hive API key is required for moderation. Set HIVE_API_KEY or pass hiveApiKey.");
1533
- }
1534
1495
  thumbnailScores = await requestHiveModeration(
1535
1496
  thumbnailUrls,
1536
- hiveApiKey,
1537
1497
  maxConcurrent,
1538
1498
  imageSubmissionMode,
1539
1499
  imageDownloadOptions
@@ -1557,17 +1517,18 @@ async function getModerationScores(assetId, options = {}) {
1557
1517
  }
1558
1518
 
1559
1519
  // src/workflows/summarization.ts
1560
- var import_ai4 = require("ai");
1561
- var import_zod4 = require("zod");
1520
+ import { generateObject as generateObject3 } from "ai";
1521
+ import dedent2 from "dedent";
1522
+ import { z as z4 } from "zod";
1562
1523
  var SUMMARY_KEYWORD_LIMIT = 10;
1563
- var summarySchema = import_zod4.z.object({
1564
- keywords: import_zod4.z.array(import_zod4.z.string()),
1565
- title: import_zod4.z.string(),
1566
- description: import_zod4.z.string()
1524
+ var summarySchema = z4.object({
1525
+ keywords: z4.array(z4.string()),
1526
+ title: z4.string(),
1527
+ description: z4.string()
1567
1528
  });
1568
1529
  var TONE_INSTRUCTIONS = {
1569
- normal: "Provide a clear, straightforward analysis.",
1570
- sassy: "Answer with a sassy, playful attitude and personality.",
1530
+ neutral: "Provide a clear, straightforward analysis.",
1531
+ playful: "Channel your inner diva! Answer with maximum sass, wit, and playful attitude. Don't hold back - be cheeky, clever, and delightfully snarky. Make it pop!",
1571
1532
  professional: "Provide a professional, executive-level analysis suitable for business reporting."
1572
1533
  };
1573
1534
  var summarizationPromptBuilder = createPromptBuilder({
@@ -1578,7 +1539,7 @@ var summarizationPromptBuilder = createPromptBuilder({
1578
1539
  },
1579
1540
  title: {
1580
1541
  tag: "title_requirements",
1581
- content: dedent_default`
1542
+ content: dedent2`
1582
1543
  A short, compelling headline that immediately communicates the subject or action.
1583
1544
  Aim for brevity - typically under 10 words. Think of how a news headline or video card title would read.
1584
1545
  Start with the primary subject, action, or topic - never begin with "A video of" or similar phrasing.
@@ -1586,7 +1547,7 @@ var summarizationPromptBuilder = createPromptBuilder({
1586
1547
  },
1587
1548
  description: {
1588
1549
  tag: "description_requirements",
1589
- content: dedent_default`
1550
+ content: dedent2`
1590
1551
  A concise summary (2-4 sentences) that describes what happens across the video.
1591
1552
  Cover the main subjects, actions, setting, and any notable progression visible across frames.
1592
1553
  Write in present tense. Be specific about observable details rather than making assumptions.
@@ -1594,7 +1555,7 @@ var summarizationPromptBuilder = createPromptBuilder({
1594
1555
  },
1595
1556
  keywords: {
1596
1557
  tag: "keywords_requirements",
1597
- content: dedent_default`
1558
+ content: dedent2`
1598
1559
  Specific, searchable terms (up to 10) that capture:
1599
1560
  - Primary subjects (people, animals, objects)
1600
1561
  - Actions and activities being performed
@@ -1606,7 +1567,7 @@ var summarizationPromptBuilder = createPromptBuilder({
1606
1567
  },
1607
1568
  qualityGuidelines: {
1608
1569
  tag: "quality_guidelines",
1609
- content: dedent_default`
1570
+ content: dedent2`
1610
1571
  - Examine all frames to understand the full context and progression
1611
1572
  - Be precise: "golden retriever" is better than "dog" when identifiable
1612
1573
  - Capture the narrative: what begins, develops, and concludes
@@ -1615,7 +1576,7 @@ var summarizationPromptBuilder = createPromptBuilder({
1615
1576
  },
1616
1577
  sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
1617
1578
  });
1618
- var SYSTEM_PROMPT3 = dedent_default`
1579
+ var SYSTEM_PROMPT3 = dedent2`
1619
1580
  <role>
1620
1581
  You are a video content analyst specializing in storyboard interpretation and multimodal analysis.
1621
1582
  </role>
@@ -1647,7 +1608,29 @@ var SYSTEM_PROMPT3 = dedent_default`
1647
1608
  - Only describe what is clearly observable in the frames or explicitly stated in the transcript
1648
1609
  - Do not fabricate details or make unsupported assumptions
1649
1610
  - Return structured data matching the requested schema
1650
- </constraints>`;
1611
+ </constraints>
1612
+
1613
+ <tone_guidance>
1614
+ Pay special attention to the <tone> section and lean heavily into those instructions.
1615
+ Adapt your entire analysis and writing style to match the specified tone - this should influence
1616
+ your word choice, personality, formality level, and overall presentation of the content.
1617
+ The tone instructions are not suggestions but core requirements for how you should express yourself.
1618
+ </tone_guidance>
1619
+
1620
+ <language_guidelines>
1621
+ AVOID these meta-descriptive phrases that reference the medium rather than the content:
1622
+ - "The image shows..." / "The storyboard shows..."
1623
+ - "In this video..." / "This video features..."
1624
+ - "The frames depict..." / "The footage shows..."
1625
+ - "We can see..." / "You can see..."
1626
+ - "The clip shows..." / "The scene shows..."
1627
+
1628
+ INSTEAD, describe the content directly:
1629
+ - BAD: "The video shows a chef preparing a meal"
1630
+ - GOOD: "A chef prepares a meal in a professional kitchen"
1631
+
1632
+ Write as if describing reality, not describing a recording of reality.
1633
+ </language_guidelines>`;
1651
1634
  function buildUserPrompt2({
1652
1635
  tone,
1653
1636
  transcriptText,
@@ -1661,8 +1644,37 @@ function buildUserPrompt2({
1661
1644
  }
1662
1645
  return summarizationPromptBuilder.buildWithContext(promptOverrides, contextSections);
1663
1646
  }
1664
- var DEFAULT_PROVIDER5 = "openai";
1665
- var DEFAULT_TONE = "normal";
1647
+ async function analyzeStoryboard2(imageDataUrl, provider, modelId, userPrompt, systemPrompt) {
1648
+ "use step";
1649
+ const model = createLanguageModelFromConfig(provider, modelId);
1650
+ const response = await generateObject3({
1651
+ model,
1652
+ schema: summarySchema,
1653
+ messages: [
1654
+ {
1655
+ role: "system",
1656
+ content: systemPrompt
1657
+ },
1658
+ {
1659
+ role: "user",
1660
+ content: [
1661
+ { type: "text", text: userPrompt },
1662
+ { type: "image", image: imageDataUrl }
1663
+ ]
1664
+ }
1665
+ ]
1666
+ });
1667
+ return {
1668
+ result: response.object,
1669
+ usage: {
1670
+ inputTokens: response.usage.inputTokens,
1671
+ outputTokens: response.usage.outputTokens,
1672
+ totalTokens: response.usage.totalTokens,
1673
+ reasoningTokens: response.usage.reasoningTokens,
1674
+ cachedInputTokens: response.usage.cachedInputTokens
1675
+ }
1676
+ };
1677
+ }
1666
1678
  function normalizeKeywords(keywords) {
1667
1679
  if (!Array.isArray(keywords) || keywords.length === 0) {
1668
1680
  return [];
@@ -1687,23 +1699,24 @@ function normalizeKeywords(keywords) {
1687
1699
  return normalized;
1688
1700
  }
1689
1701
  async function getSummaryAndTags(assetId, options) {
1702
+ "use workflow";
1690
1703
  const {
1691
- provider = DEFAULT_PROVIDER5,
1704
+ provider = "openai",
1692
1705
  model,
1693
- tone = DEFAULT_TONE,
1706
+ tone = "neutral",
1694
1707
  includeTranscript = true,
1695
1708
  cleanTranscript = true,
1696
1709
  imageSubmissionMode = "url",
1697
1710
  imageDownloadOptions,
1698
- abortSignal,
1711
+ abortSignal: _abortSignal,
1699
1712
  promptOverrides
1700
1713
  } = options ?? {};
1701
- const clients = createWorkflowClients(
1714
+ const config = await createWorkflowConfig(
1702
1715
  { ...options, model },
1703
1716
  provider
1704
1717
  );
1705
- const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(clients.mux, assetId);
1706
- const signingContext = resolveSigningContext(options ?? {});
1718
+ const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(assetId);
1719
+ const signingContext = getMuxSigningContextFromEnv();
1707
1720
  if (policy === "signed" && !signingContext) {
1708
1721
  throw new Error(
1709
1722
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
@@ -1711,7 +1724,7 @@ async function getSummaryAndTags(assetId, options) {
1711
1724
  }
1712
1725
  const transcriptText = includeTranscript ? (await fetchTranscriptForAsset(assetData, playbackId, {
1713
1726
  cleanTranscript,
1714
- signingContext: policy === "signed" ? signingContext : void 0
1727
+ shouldSign: policy === "signed"
1715
1728
  })).transcriptText : "";
1716
1729
  const userPrompt = buildUserPrompt2({
1717
1730
  tone,
@@ -1719,67 +1732,214 @@ async function getSummaryAndTags(assetId, options) {
1719
1732
  isCleanTranscript: cleanTranscript,
1720
1733
  promptOverrides
1721
1734
  });
1722
- const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed" ? signingContext : void 0);
1723
- const analyzeStoryboard = async (imageDataUrl) => {
1724
- const response = await (0, import_ai4.generateObject)({
1725
- model: clients.languageModel.model,
1726
- schema: summarySchema,
1727
- abortSignal,
1728
- messages: [
1729
- {
1730
- role: "system",
1731
- content: SYSTEM_PROMPT3
1732
- },
1733
- {
1734
- role: "user",
1735
- content: [
1736
- { type: "text", text: userPrompt },
1737
- { type: "image", image: imageDataUrl }
1738
- ]
1739
- }
1740
- ]
1741
- });
1742
- return response.object;
1743
- };
1744
- let aiAnalysis = null;
1735
+ const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed");
1736
+ let analysisResponse;
1745
1737
  try {
1746
1738
  if (imageSubmissionMode === "base64") {
1747
1739
  const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
1748
- aiAnalysis = await analyzeStoryboard(downloadResult.base64Data);
1740
+ analysisResponse = await analyzeStoryboard2(
1741
+ downloadResult.base64Data,
1742
+ config.provider,
1743
+ config.modelId,
1744
+ userPrompt,
1745
+ SYSTEM_PROMPT3
1746
+ );
1749
1747
  } else {
1750
- aiAnalysis = await withRetry(() => analyzeStoryboard(imageUrl));
1748
+ analysisResponse = await withRetry(() => analyzeStoryboard2(imageUrl, config.provider, config.modelId, userPrompt, SYSTEM_PROMPT3));
1751
1749
  }
1752
1750
  } catch (error) {
1753
1751
  throw new Error(
1754
1752
  `Failed to analyze video content with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
1755
1753
  );
1756
1754
  }
1757
- if (!aiAnalysis) {
1755
+ if (!analysisResponse.result) {
1758
1756
  throw new Error(`Failed to analyze video content for asset ${assetId}`);
1759
1757
  }
1760
- if (!aiAnalysis.title) {
1758
+ if (!analysisResponse.result.title) {
1761
1759
  throw new Error(`Failed to generate title for asset ${assetId}`);
1762
1760
  }
1763
- if (!aiAnalysis.description) {
1761
+ if (!analysisResponse.result.description) {
1764
1762
  throw new Error(`Failed to generate description for asset ${assetId}`);
1765
1763
  }
1766
1764
  return {
1767
1765
  assetId,
1768
- title: aiAnalysis.title,
1769
- description: aiAnalysis.description,
1770
- tags: normalizeKeywords(aiAnalysis.keywords),
1771
- storyboardUrl: imageUrl
1766
+ title: analysisResponse.result.title,
1767
+ description: analysisResponse.result.description,
1768
+ tags: normalizeKeywords(analysisResponse.result.keywords),
1769
+ storyboardUrl: imageUrl,
1770
+ usage: analysisResponse.usage,
1771
+ transcriptText: transcriptText || void 0
1772
1772
  };
1773
1773
  }
1774
1774
 
1775
1775
  // src/workflows/translate-audio.ts
1776
- var import_client_s3 = require("@aws-sdk/client-s3");
1777
- var import_lib_storage = require("@aws-sdk/lib-storage");
1778
- var import_s3_request_presigner = require("@aws-sdk/s3-request-presigner");
1779
- var import_mux_node3 = __toESM(require("@mux/mux-node"));
1776
+ import Mux3 from "@mux/mux-node";
1777
+
1778
+ // src/lib/language-codes.ts
1779
+ var ISO639_1_TO_3 = {
1780
+ // Major world languages
1781
+ en: "eng",
1782
+ // English
1783
+ es: "spa",
1784
+ // Spanish
1785
+ fr: "fra",
1786
+ // French
1787
+ de: "deu",
1788
+ // German
1789
+ it: "ita",
1790
+ // Italian
1791
+ pt: "por",
1792
+ // Portuguese
1793
+ ru: "rus",
1794
+ // Russian
1795
+ zh: "zho",
1796
+ // Chinese
1797
+ ja: "jpn",
1798
+ // Japanese
1799
+ ko: "kor",
1800
+ // Korean
1801
+ ar: "ara",
1802
+ // Arabic
1803
+ hi: "hin",
1804
+ // Hindi
1805
+ // European languages
1806
+ nl: "nld",
1807
+ // Dutch
1808
+ pl: "pol",
1809
+ // Polish
1810
+ sv: "swe",
1811
+ // Swedish
1812
+ da: "dan",
1813
+ // Danish
1814
+ no: "nor",
1815
+ // Norwegian
1816
+ fi: "fin",
1817
+ // Finnish
1818
+ el: "ell",
1819
+ // Greek
1820
+ cs: "ces",
1821
+ // Czech
1822
+ hu: "hun",
1823
+ // Hungarian
1824
+ ro: "ron",
1825
+ // Romanian
1826
+ bg: "bul",
1827
+ // Bulgarian
1828
+ hr: "hrv",
1829
+ // Croatian
1830
+ sk: "slk",
1831
+ // Slovak
1832
+ sl: "slv",
1833
+ // Slovenian
1834
+ uk: "ukr",
1835
+ // Ukrainian
1836
+ tr: "tur",
1837
+ // Turkish
1838
+ // Asian languages
1839
+ th: "tha",
1840
+ // Thai
1841
+ vi: "vie",
1842
+ // Vietnamese
1843
+ id: "ind",
1844
+ // Indonesian
1845
+ ms: "msa",
1846
+ // Malay
1847
+ tl: "tgl",
1848
+ // Tagalog/Filipino
1849
+ // Other languages
1850
+ he: "heb",
1851
+ // Hebrew
1852
+ fa: "fas",
1853
+ // Persian/Farsi
1854
+ bn: "ben",
1855
+ // Bengali
1856
+ ta: "tam",
1857
+ // Tamil
1858
+ te: "tel",
1859
+ // Telugu
1860
+ mr: "mar",
1861
+ // Marathi
1862
+ gu: "guj",
1863
+ // Gujarati
1864
+ kn: "kan",
1865
+ // Kannada
1866
+ ml: "mal",
1867
+ // Malayalam
1868
+ pa: "pan",
1869
+ // Punjabi
1870
+ ur: "urd",
1871
+ // Urdu
1872
+ sw: "swa",
1873
+ // Swahili
1874
+ af: "afr",
1875
+ // Afrikaans
1876
+ ca: "cat",
1877
+ // Catalan
1878
+ eu: "eus",
1879
+ // Basque
1880
+ gl: "glg",
1881
+ // Galician
1882
+ is: "isl",
1883
+ // Icelandic
1884
+ et: "est",
1885
+ // Estonian
1886
+ lv: "lav",
1887
+ // Latvian
1888
+ lt: "lit"
1889
+ // Lithuanian
1890
+ };
1891
+ var ISO639_3_TO_1 = Object.fromEntries(
1892
+ Object.entries(ISO639_1_TO_3).map(([iso1, iso3]) => [iso3, iso1])
1893
+ );
1894
+ function toISO639_3(code) {
1895
+ const normalized = code.toLowerCase().trim();
1896
+ if (normalized.length === 3) {
1897
+ return normalized;
1898
+ }
1899
+ return ISO639_1_TO_3[normalized] ?? normalized;
1900
+ }
1901
+ function toISO639_1(code) {
1902
+ const normalized = code.toLowerCase().trim();
1903
+ if (normalized.length === 2) {
1904
+ return normalized;
1905
+ }
1906
+ return ISO639_3_TO_1[normalized] ?? normalized;
1907
+ }
1908
+ function getLanguageCodePair(code) {
1909
+ const normalized = code.toLowerCase().trim();
1910
+ if (normalized.length === 2) {
1911
+ return {
1912
+ iso639_1: normalized,
1913
+ iso639_3: toISO639_3(normalized)
1914
+ };
1915
+ } else if (normalized.length === 3) {
1916
+ return {
1917
+ iso639_1: toISO639_1(normalized),
1918
+ iso639_3: normalized
1919
+ };
1920
+ }
1921
+ return {
1922
+ iso639_1: normalized,
1923
+ iso639_3: normalized
1924
+ };
1925
+ }
1926
+ function getLanguageName(code) {
1927
+ const iso639_1 = toISO639_1(code);
1928
+ try {
1929
+ const displayNames = new Intl.DisplayNames(["en"], { type: "language" });
1930
+ return displayNames.of(iso639_1) ?? code.toUpperCase();
1931
+ } catch {
1932
+ return code.toUpperCase();
1933
+ }
1934
+ }
1935
+
1936
+ // src/workflows/translate-audio.ts
1780
1937
  var STATIC_RENDITION_POLL_INTERVAL_MS = 5e3;
1781
1938
  var STATIC_RENDITION_MAX_ATTEMPTS = 36;
1782
- var delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
1939
+ async function sleep(ms) {
1940
+ "use step";
1941
+ await new Promise((resolve) => setTimeout(resolve, ms));
1942
+ }
1783
1943
  function getReadyAudioStaticRendition(asset) {
1784
1944
  const files = asset.static_renditions?.files;
1785
1945
  if (!files || files.length === 0) {
@@ -1790,19 +1950,22 @@ function getReadyAudioStaticRendition(asset) {
1790
1950
  );
1791
1951
  }
1792
1952
  var hasReadyAudioStaticRendition = (asset) => Boolean(getReadyAudioStaticRendition(asset));
1793
- async function requestStaticRenditionCreation(muxClient, assetId) {
1794
- console.log("\u{1F4FC} Requesting static rendition from Mux...");
1953
+ async function requestStaticRenditionCreation(assetId) {
1954
+ "use step";
1955
+ const { muxTokenId, muxTokenSecret } = getMuxCredentialsFromEnv();
1956
+ const mux = new Mux3({
1957
+ tokenId: muxTokenId,
1958
+ tokenSecret: muxTokenSecret
1959
+ });
1795
1960
  try {
1796
- await muxClient.video.assets.createStaticRendition(assetId, {
1961
+ await mux.video.assets.createStaticRendition(assetId, {
1797
1962
  resolution: "audio-only"
1798
1963
  });
1799
- console.log("\u{1F4FC} Static rendition request accepted by Mux.");
1800
1964
  } catch (error) {
1801
1965
  const statusCode = error?.status ?? error?.statusCode;
1802
1966
  const messages = error?.error?.messages;
1803
1967
  const alreadyDefined = messages?.some((message2) => message2.toLowerCase().includes("already defined")) ?? error?.message?.toLowerCase().includes("already defined");
1804
1968
  if (statusCode === 409 || alreadyDefined) {
1805
- console.log("\u2139\uFE0F Static rendition already requested. Waiting for it to finish...");
1806
1969
  return;
1807
1970
  }
1808
1971
  const message = error instanceof Error ? error.message : "Unknown error";
@@ -1811,31 +1974,34 @@ async function requestStaticRenditionCreation(muxClient, assetId) {
1811
1974
  }
1812
1975
  async function waitForAudioStaticRendition({
1813
1976
  assetId,
1814
- muxClient,
1815
1977
  initialAsset
1816
1978
  }) {
1979
+ "use step";
1980
+ const { muxTokenId, muxTokenSecret } = getMuxCredentialsFromEnv();
1981
+ const mux = new Mux3({
1982
+ tokenId: muxTokenId,
1983
+ tokenSecret: muxTokenSecret
1984
+ });
1817
1985
  let currentAsset = initialAsset;
1818
1986
  if (hasReadyAudioStaticRendition(currentAsset)) {
1819
1987
  return currentAsset;
1820
1988
  }
1821
1989
  const status = currentAsset.static_renditions?.status ?? "not_requested";
1822
1990
  if (status === "not_requested" || status === void 0) {
1823
- await requestStaticRenditionCreation(muxClient, assetId);
1991
+ await requestStaticRenditionCreation(assetId);
1824
1992
  } else if (status === "errored") {
1825
- console.log("\u26A0\uFE0F Previous static rendition request errored. Creating a new one...");
1826
- await requestStaticRenditionCreation(muxClient, assetId);
1993
+ await requestStaticRenditionCreation(assetId);
1827
1994
  } else {
1828
- console.log(`\u2139\uFE0F Static rendition already ${status}. Waiting for it to finish...`);
1995
+ console.warn(`\u2139\uFE0F Static rendition already ${status}. Waiting for it to finish...`);
1829
1996
  }
1830
1997
  for (let attempt = 1; attempt <= STATIC_RENDITION_MAX_ATTEMPTS; attempt++) {
1831
- await delay(STATIC_RENDITION_POLL_INTERVAL_MS);
1832
- currentAsset = await muxClient.video.assets.retrieve(assetId);
1998
+ await sleep(STATIC_RENDITION_POLL_INTERVAL_MS);
1999
+ currentAsset = await mux.video.assets.retrieve(assetId);
1833
2000
  if (hasReadyAudioStaticRendition(currentAsset)) {
1834
- console.log("\u2705 Audio static rendition is ready!");
1835
2001
  return currentAsset;
1836
2002
  }
1837
2003
  const currentStatus = currentAsset.static_renditions?.status || "unknown";
1838
- console.log(
2004
+ console.warn(
1839
2005
  `\u231B Waiting for static rendition (attempt ${attempt}/${STATIC_RENDITION_MAX_ATTEMPTS}) \u2192 ${currentStatus}`
1840
2006
  );
1841
2007
  if (currentStatus === "errored") {
@@ -1848,55 +2014,179 @@ async function waitForAudioStaticRendition({
1848
2014
  "Timed out waiting for the static rendition to become ready. Please try again in a moment."
1849
2015
  );
1850
2016
  }
2017
+ async function fetchAudioFromMux(audioUrl) {
2018
+ "use step";
2019
+ const audioResponse = await fetch(audioUrl);
2020
+ if (!audioResponse.ok) {
2021
+ throw new Error(`Failed to fetch audio file: ${audioResponse.statusText}`);
2022
+ }
2023
+ return audioResponse.arrayBuffer();
2024
+ }
2025
+ async function createElevenLabsDubbingJob({
2026
+ audioBuffer,
2027
+ assetId,
2028
+ elevenLabsLangCode,
2029
+ numSpeakers
2030
+ }) {
2031
+ "use step";
2032
+ const elevenLabsApiKey = getApiKeyFromEnv("elevenlabs");
2033
+ const audioBlob = new Blob([audioBuffer], { type: "audio/mp4" });
2034
+ const formData = new FormData();
2035
+ formData.append("file", audioBlob);
2036
+ formData.append("target_lang", elevenLabsLangCode);
2037
+ formData.append("num_speakers", numSpeakers.toString());
2038
+ formData.append("name", `Mux Asset ${assetId} - auto to ${elevenLabsLangCode}`);
2039
+ const dubbingResponse = await fetch("https://api.elevenlabs.io/v1/dubbing", {
2040
+ method: "POST",
2041
+ headers: {
2042
+ "xi-api-key": elevenLabsApiKey
2043
+ },
2044
+ body: formData
2045
+ });
2046
+ if (!dubbingResponse.ok) {
2047
+ throw new Error(`ElevenLabs API error: ${dubbingResponse.statusText}`);
2048
+ }
2049
+ const dubbingData = await dubbingResponse.json();
2050
+ return dubbingData.dubbing_id;
2051
+ }
2052
+ async function checkElevenLabsDubbingStatus({
2053
+ dubbingId
2054
+ }) {
2055
+ "use step";
2056
+ const elevenLabsApiKey = getApiKeyFromEnv("elevenlabs");
2057
+ const statusResponse = await fetch(`https://api.elevenlabs.io/v1/dubbing/${dubbingId}`, {
2058
+ headers: {
2059
+ "xi-api-key": elevenLabsApiKey
2060
+ }
2061
+ });
2062
+ if (!statusResponse.ok) {
2063
+ throw new Error(`Status check failed: ${statusResponse.statusText}`);
2064
+ }
2065
+ const statusData = await statusResponse.json();
2066
+ return {
2067
+ status: statusData.status,
2068
+ targetLanguages: statusData.target_languages ?? []
2069
+ };
2070
+ }
2071
+ async function downloadDubbedAudioFromElevenLabs({
2072
+ dubbingId,
2073
+ languageCode
2074
+ }) {
2075
+ "use step";
2076
+ const elevenLabsApiKey = getApiKeyFromEnv("elevenlabs");
2077
+ const audioUrl = `https://api.elevenlabs.io/v1/dubbing/${dubbingId}/audio/${languageCode}`;
2078
+ const audioResponse = await fetch(audioUrl, {
2079
+ headers: {
2080
+ "xi-api-key": elevenLabsApiKey
2081
+ }
2082
+ });
2083
+ if (!audioResponse.ok) {
2084
+ throw new Error(`Failed to fetch dubbed audio: ${audioResponse.statusText}`);
2085
+ }
2086
+ return audioResponse.arrayBuffer();
2087
+ }
2088
+ async function uploadDubbedAudioToS3({
2089
+ dubbedAudioBuffer,
2090
+ assetId,
2091
+ toLanguageCode,
2092
+ s3Endpoint,
2093
+ s3Region,
2094
+ s3Bucket
2095
+ }) {
2096
+ "use step";
2097
+ const { S3Client, GetObjectCommand } = await import("@aws-sdk/client-s3");
2098
+ const { Upload } = await import("@aws-sdk/lib-storage");
2099
+ const { getSignedUrl } = await import("@aws-sdk/s3-request-presigner");
2100
+ const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
2101
+ const s3SecretAccessKey = env_default.S3_SECRET_ACCESS_KEY;
2102
+ const s3Client = new S3Client({
2103
+ region: s3Region,
2104
+ endpoint: s3Endpoint,
2105
+ credentials: {
2106
+ accessKeyId: s3AccessKeyId,
2107
+ secretAccessKey: s3SecretAccessKey
2108
+ },
2109
+ forcePathStyle: true
2110
+ });
2111
+ const audioKey = `audio-translations/${assetId}/auto-to-${toLanguageCode}-${Date.now()}.m4a`;
2112
+ const upload = new Upload({
2113
+ client: s3Client,
2114
+ params: {
2115
+ Bucket: s3Bucket,
2116
+ Key: audioKey,
2117
+ Body: new Uint8Array(dubbedAudioBuffer),
2118
+ ContentType: "audio/mp4"
2119
+ }
2120
+ });
2121
+ await upload.done();
2122
+ const getObjectCommand = new GetObjectCommand({
2123
+ Bucket: s3Bucket,
2124
+ Key: audioKey
2125
+ });
2126
+ const presignedUrl = await getSignedUrl(s3Client, getObjectCommand, {
2127
+ expiresIn: 3600
2128
+ // 1 hour
2129
+ });
2130
+ console.warn(`\u2705 Audio uploaded successfully to: ${audioKey}`);
2131
+ console.warn(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
2132
+ return presignedUrl;
2133
+ }
2134
+ async function createAudioTrackOnMux(assetId, languageCode, presignedUrl) {
2135
+ "use step";
2136
+ const { muxTokenId, muxTokenSecret } = getMuxCredentialsFromEnv();
2137
+ const mux = new Mux3({
2138
+ tokenId: muxTokenId,
2139
+ tokenSecret: muxTokenSecret
2140
+ });
2141
+ const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(languageCode) || languageCode.toUpperCase();
2142
+ const trackName = `${languageName} (auto-dubbed)`;
2143
+ const trackResponse = await mux.video.assets.createTrack(assetId, {
2144
+ type: "audio",
2145
+ language_code: languageCode,
2146
+ name: trackName,
2147
+ url: presignedUrl
2148
+ });
2149
+ if (!trackResponse.id) {
2150
+ throw new Error("Failed to create audio track: no track ID returned from Mux");
2151
+ }
2152
+ return trackResponse.id;
2153
+ }
1851
2154
  async function translateAudio(assetId, toLanguageCode, options = {}) {
2155
+ "use workflow";
1852
2156
  const {
1853
2157
  provider = "elevenlabs",
1854
2158
  numSpeakers = 0,
1855
2159
  // 0 = auto-detect
1856
- muxTokenId,
1857
- muxTokenSecret,
1858
2160
  elevenLabsApiKey,
1859
2161
  uploadToMux = true
1860
2162
  } = options;
1861
2163
  if (provider !== "elevenlabs") {
1862
2164
  throw new Error("Only ElevenLabs provider is currently supported for audio translation");
1863
2165
  }
1864
- const muxId = muxTokenId ?? env_default.MUX_TOKEN_ID;
1865
- const muxSecret = muxTokenSecret ?? env_default.MUX_TOKEN_SECRET;
1866
2166
  const elevenLabsKey = elevenLabsApiKey ?? env_default.ELEVENLABS_API_KEY;
1867
2167
  const s3Endpoint = options.s3Endpoint ?? env_default.S3_ENDPOINT;
1868
2168
  const s3Region = options.s3Region ?? env_default.S3_REGION ?? "auto";
1869
2169
  const s3Bucket = options.s3Bucket ?? env_default.S3_BUCKET;
1870
- const s3AccessKeyId = options.s3AccessKeyId ?? env_default.S3_ACCESS_KEY_ID;
1871
- const s3SecretAccessKey = options.s3SecretAccessKey ?? env_default.S3_SECRET_ACCESS_KEY;
1872
- if (!muxId || !muxSecret) {
1873
- throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
1874
- }
2170
+ const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
2171
+ const s3SecretAccessKey = env_default.S3_SECRET_ACCESS_KEY;
1875
2172
  if (!elevenLabsKey) {
1876
2173
  throw new Error("ElevenLabs API key is required. Provide elevenLabsApiKey in options or set ELEVENLABS_API_KEY environment variable.");
1877
2174
  }
1878
2175
  if (uploadToMux && (!s3Endpoint || !s3Bucket || !s3AccessKeyId || !s3SecretAccessKey)) {
1879
2176
  throw new Error("S3 configuration is required for uploading to Mux. Provide s3Endpoint, s3Bucket, s3AccessKeyId, and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables.");
1880
2177
  }
1881
- const mux = new import_mux_node3.default({
1882
- tokenId: muxId,
1883
- tokenSecret: muxSecret
1884
- });
1885
- console.log(`\u{1F3AC} Fetching Mux asset: ${assetId}`);
1886
- const { asset: initialAsset, playbackId, policy } = await getPlaybackIdForAsset(mux, assetId);
1887
- const signingContext = resolveSigningContext(options);
2178
+ const { asset: initialAsset, playbackId, policy } = await getPlaybackIdForAsset(assetId);
2179
+ const signingContext = getMuxSigningContextFromEnv();
1888
2180
  if (policy === "signed" && !signingContext) {
1889
2181
  throw new Error(
1890
2182
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
1891
2183
  );
1892
2184
  }
1893
- console.log("\u{1F50D} Checking for audio-only static rendition...");
1894
2185
  let currentAsset = initialAsset;
1895
2186
  if (!hasReadyAudioStaticRendition(currentAsset)) {
1896
- console.log("\u274C No ready audio static rendition found. Requesting one now...");
2187
+ console.warn("\u274C No ready audio static rendition found. Requesting one now...");
1897
2188
  currentAsset = await waitForAudioStaticRendition({
1898
2189
  assetId,
1899
- muxClient: mux,
1900
2190
  initialAsset: currentAsset
1901
2191
  });
1902
2192
  }
@@ -1910,58 +2200,42 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
1910
2200
  if (policy === "signed" && signingContext) {
1911
2201
  audioUrl = await signUrl(audioUrl, playbackId, signingContext, "video");
1912
2202
  }
1913
- console.log(`\u2705 Found audio rendition: ${audioUrl}`);
1914
- console.log(`\u{1F399}\uFE0F Creating ElevenLabs dubbing job (auto-detect \u2192 ${toLanguageCode})`);
2203
+ console.warn("\u{1F399}\uFE0F Fetching audio from Mux...");
2204
+ let audioBuffer;
2205
+ try {
2206
+ audioBuffer = await fetchAudioFromMux(audioUrl);
2207
+ } catch (error) {
2208
+ throw new Error(`Failed to fetch audio from Mux: ${error instanceof Error ? error.message : "Unknown error"}`);
2209
+ }
2210
+ console.warn("\u{1F399}\uFE0F Creating dubbing job in ElevenLabs...");
2211
+ const elevenLabsLangCode = toISO639_3(toLanguageCode);
2212
+ console.warn(`\u{1F50D} Creating dubbing job for asset ${assetId} with language code: ${elevenLabsLangCode}`);
1915
2213
  let dubbingId;
1916
2214
  try {
1917
- const audioResponse = await fetch(audioUrl);
1918
- if (!audioResponse.ok) {
1919
- throw new Error(`Failed to fetch audio file: ${audioResponse.statusText}`);
1920
- }
1921
- const audioBuffer = await audioResponse.arrayBuffer();
1922
- const audioBlob = new Blob([audioBuffer], { type: "audio/mp4" });
1923
- const audioFile = audioBlob;
1924
- const formData = new FormData();
1925
- formData.append("file", audioFile);
1926
- formData.append("target_lang", toLanguageCode);
1927
- formData.append("num_speakers", numSpeakers.toString());
1928
- formData.append("name", `Mux Asset ${assetId} - auto to ${toLanguageCode}`);
1929
- const dubbingResponse = await fetch("https://api.elevenlabs.io/v1/dubbing", {
1930
- method: "POST",
1931
- headers: {
1932
- "xi-api-key": elevenLabsKey
1933
- },
1934
- body: formData
2215
+ dubbingId = await createElevenLabsDubbingJob({
2216
+ audioBuffer,
2217
+ assetId,
2218
+ elevenLabsLangCode,
2219
+ numSpeakers
1935
2220
  });
1936
- if (!dubbingResponse.ok) {
1937
- throw new Error(`ElevenLabs API error: ${dubbingResponse.statusText}`);
1938
- }
1939
- const dubbingData = await dubbingResponse.json();
1940
- dubbingId = dubbingData.dubbing_id;
1941
- console.log(`\u2705 Dubbing job created: ${dubbingId}`);
1942
- console.log(`\u23F1\uFE0F Expected duration: ${dubbingData.expected_duration_sec}s`);
2221
+ console.warn(`\u2705 Dubbing job created with ID: ${dubbingId}`);
1943
2222
  } catch (error) {
1944
2223
  throw new Error(`Failed to create ElevenLabs dubbing job: ${error instanceof Error ? error.message : "Unknown error"}`);
1945
2224
  }
1946
- console.log("\u23F3 Waiting for dubbing to complete...");
2225
+ console.warn("\u23F3 Waiting for dubbing to complete...");
1947
2226
  let dubbingStatus = "dubbing";
1948
2227
  let pollAttempts = 0;
1949
2228
  const maxPollAttempts = 180;
2229
+ let targetLanguages = [];
1950
2230
  while (dubbingStatus === "dubbing" && pollAttempts < maxPollAttempts) {
1951
- await new Promise((resolve) => setTimeout(resolve, 1e4));
2231
+ await sleep(1e4);
1952
2232
  pollAttempts++;
1953
2233
  try {
1954
- const statusResponse = await fetch(`https://api.elevenlabs.io/v1/dubbing/${dubbingId}`, {
1955
- headers: {
1956
- "xi-api-key": elevenLabsKey
1957
- }
2234
+ const statusResult = await checkElevenLabsDubbingStatus({
2235
+ dubbingId
1958
2236
  });
1959
- if (!statusResponse.ok) {
1960
- throw new Error(`Status check failed: ${statusResponse.statusText}`);
1961
- }
1962
- const statusData = await statusResponse.json();
1963
- dubbingStatus = statusData.status;
1964
- console.log(`\u{1F4CA} Status check ${pollAttempts}: ${dubbingStatus}`);
2237
+ dubbingStatus = statusResult.status;
2238
+ targetLanguages = statusResult.targetLanguages;
1965
2239
  if (dubbingStatus === "failed") {
1966
2240
  throw new Error("ElevenLabs dubbing job failed");
1967
2241
  }
@@ -1972,89 +2246,74 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
1972
2246
  if (dubbingStatus !== "dubbed") {
1973
2247
  throw new Error(`Dubbing job timed out or failed. Final status: ${dubbingStatus}`);
1974
2248
  }
1975
- console.log("\u2705 Dubbing completed successfully!");
2249
+ console.warn("\u2705 Dubbing completed successfully!");
1976
2250
  if (!uploadToMux) {
2251
+ const targetLanguage2 = getLanguageCodePair(toLanguageCode);
1977
2252
  return {
1978
2253
  assetId,
1979
- targetLanguageCode: toLanguageCode,
2254
+ targetLanguageCode: targetLanguage2.iso639_1,
2255
+ targetLanguage: targetLanguage2,
1980
2256
  dubbingId
1981
2257
  };
1982
2258
  }
1983
- console.log("\u{1F4E5} Downloading dubbed audio from ElevenLabs...");
2259
+ console.warn("\u{1F4E5} Downloading dubbed audio from ElevenLabs...");
1984
2260
  let dubbedAudioBuffer;
1985
2261
  try {
1986
- const audioUrl2 = `https://api.elevenlabs.io/v1/dubbing/${dubbingId}/audio/${toLanguageCode}`;
1987
- const audioResponse = await fetch(audioUrl2, {
1988
- headers: {
1989
- "xi-api-key": elevenLabsKey
1990
- }
1991
- });
1992
- if (!audioResponse.ok) {
1993
- throw new Error(`Failed to fetch dubbed audio: ${audioResponse.statusText}`);
2262
+ const requestedLangCode = toISO639_3(toLanguageCode);
2263
+ let downloadLangCode = targetLanguages.find(
2264
+ (lang) => lang === requestedLangCode
2265
+ ) ?? targetLanguages.find(
2266
+ (lang) => lang.toLowerCase() === requestedLangCode.toLowerCase()
2267
+ );
2268
+ if (!downloadLangCode && targetLanguages.length > 0) {
2269
+ downloadLangCode = targetLanguages[0];
2270
+ console.warn(`\u26A0\uFE0F Requested language "${requestedLangCode}" not found in target_languages. Using "${downloadLangCode}" instead.`);
2271
+ }
2272
+ if (!downloadLangCode) {
2273
+ downloadLangCode = requestedLangCode;
2274
+ console.warn(`\u26A0\uFE0F No target_languages available from ElevenLabs status. Using requested language code: ${requestedLangCode}`);
1994
2275
  }
1995
- dubbedAudioBuffer = await audioResponse.arrayBuffer();
1996
- console.log(`\u2705 Downloaded dubbed audio (${dubbedAudioBuffer.byteLength} bytes)`);
2276
+ dubbedAudioBuffer = await downloadDubbedAudioFromElevenLabs({
2277
+ dubbingId,
2278
+ languageCode: downloadLangCode
2279
+ });
2280
+ console.warn("\u2705 Dubbed audio downloaded successfully!");
1997
2281
  } catch (error) {
1998
2282
  throw new Error(`Failed to download dubbed audio: ${error instanceof Error ? error.message : "Unknown error"}`);
1999
2283
  }
2000
- console.log("\u{1F4E4} Uploading dubbed audio to S3-compatible storage...");
2001
- const s3Client = new import_client_s3.S3Client({
2002
- region: s3Region,
2003
- endpoint: s3Endpoint,
2004
- credentials: {
2005
- accessKeyId: s3AccessKeyId,
2006
- secretAccessKey: s3SecretAccessKey
2007
- },
2008
- forcePathStyle: true
2009
- });
2010
- const audioKey = `audio-translations/${assetId}/auto-to-${toLanguageCode}-${Date.now()}.m4a`;
2284
+ console.warn("\u{1F4E4} Uploading dubbed audio to S3-compatible storage...");
2011
2285
  let presignedUrl;
2012
2286
  try {
2013
- const upload = new import_lib_storage.Upload({
2014
- client: s3Client,
2015
- params: {
2016
- Bucket: s3Bucket,
2017
- Key: audioKey,
2018
- Body: new Uint8Array(dubbedAudioBuffer),
2019
- ContentType: "audio/mp4"
2020
- }
2021
- });
2022
- await upload.done();
2023
- console.log(`\u2705 Audio uploaded successfully to: ${audioKey}`);
2024
- const getObjectCommand = new import_client_s3.GetObjectCommand({
2025
- Bucket: s3Bucket,
2026
- Key: audioKey
2027
- });
2028
- presignedUrl = await (0, import_s3_request_presigner.getSignedUrl)(s3Client, getObjectCommand, {
2029
- expiresIn: 3600
2030
- // 1 hour
2287
+ presignedUrl = await uploadDubbedAudioToS3({
2288
+ dubbedAudioBuffer,
2289
+ assetId,
2290
+ toLanguageCode,
2291
+ s3Endpoint,
2292
+ s3Region,
2293
+ s3Bucket
2031
2294
  });
2032
- console.log(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
2033
2295
  } catch (error) {
2034
2296
  throw new Error(`Failed to upload audio to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
2035
2297
  }
2036
- console.log("\u{1F3AC} Adding translated audio track to Mux asset...");
2298
+ console.warn("\u{1F4F9} Adding dubbed audio track to Mux asset...");
2037
2299
  let uploadedTrackId;
2300
+ const muxLangCode = toISO639_1(toLanguageCode);
2038
2301
  try {
2039
- const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(toLanguageCode) || toLanguageCode.toUpperCase();
2302
+ uploadedTrackId = await createAudioTrackOnMux(assetId, muxLangCode, presignedUrl);
2303
+ const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(muxLangCode) || muxLangCode.toUpperCase();
2040
2304
  const trackName = `${languageName} (auto-dubbed)`;
2041
- const trackResponse = await mux.video.assets.createTrack(assetId, {
2042
- type: "audio",
2043
- language_code: toLanguageCode,
2044
- name: trackName,
2045
- url: presignedUrl
2046
- });
2047
- uploadedTrackId = trackResponse.id;
2048
- console.log(`\u2705 Audio track added to Mux asset with ID: ${uploadedTrackId}`);
2049
- console.log(`\u{1F3B5} Track name: "${trackName}"`);
2305
+ console.warn(`\u2705 Track added to Mux asset with ID: ${uploadedTrackId}`);
2306
+ console.warn(`\u{1F4CB} Track name: "${trackName}"`);
2050
2307
  } catch (error) {
2051
2308
  console.warn(`\u26A0\uFE0F Failed to add audio track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
2052
- console.log("\u{1F517} You can manually add the track using this presigned URL:");
2053
- console.log(presignedUrl);
2309
+ console.warn("\u{1F517} You can manually add the track using this presigned URL:");
2310
+ console.warn(presignedUrl);
2054
2311
  }
2312
+ const targetLanguage = getLanguageCodePair(toLanguageCode);
2055
2313
  return {
2056
2314
  assetId,
2057
- targetLanguageCode: toLanguageCode,
2315
+ targetLanguageCode: targetLanguage.iso639_1,
2316
+ targetLanguage,
2058
2317
  dubbingId,
2059
2318
  uploadedTrackId,
2060
2319
  presignedUrl
@@ -2062,43 +2321,143 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
2062
2321
  }
2063
2322
 
2064
2323
  // src/workflows/translate-captions.ts
2065
- var import_client_s32 = require("@aws-sdk/client-s3");
2066
- var import_lib_storage2 = require("@aws-sdk/lib-storage");
2067
- var import_s3_request_presigner2 = require("@aws-sdk/s3-request-presigner");
2068
- var import_ai5 = require("ai");
2069
- var import_zod5 = require("zod");
2070
- var translationSchema = import_zod5.z.object({
2071
- translation: import_zod5.z.string()
2324
+ import Mux4 from "@mux/mux-node";
2325
+ import { generateObject as generateObject4 } from "ai";
2326
+ import { z as z5 } from "zod";
2327
+ var translationSchema = z5.object({
2328
+ translation: z5.string()
2072
2329
  });
2073
- var DEFAULT_PROVIDER6 = "openai";
2330
+ async function fetchVttFromMux(vttUrl) {
2331
+ "use step";
2332
+ const vttResponse = await fetch(vttUrl);
2333
+ if (!vttResponse.ok) {
2334
+ throw new Error(`Failed to fetch VTT file: ${vttResponse.statusText}`);
2335
+ }
2336
+ return vttResponse.text();
2337
+ }
2338
+ async function translateVttWithAI({
2339
+ vttContent,
2340
+ fromLanguageCode,
2341
+ toLanguageCode,
2342
+ provider,
2343
+ modelId,
2344
+ abortSignal
2345
+ }) {
2346
+ "use step";
2347
+ const languageModel = createLanguageModelFromConfig(provider, modelId);
2348
+ const response = await generateObject4({
2349
+ model: languageModel,
2350
+ schema: translationSchema,
2351
+ abortSignal,
2352
+ messages: [
2353
+ {
2354
+ role: "user",
2355
+ content: `Translate the following VTT subtitle file from ${fromLanguageCode} to ${toLanguageCode}. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT.
2356
+
2357
+ ${vttContent}`
2358
+ }
2359
+ ]
2360
+ });
2361
+ return {
2362
+ translatedVtt: response.object.translation,
2363
+ usage: {
2364
+ inputTokens: response.usage.inputTokens,
2365
+ outputTokens: response.usage.outputTokens,
2366
+ totalTokens: response.usage.totalTokens,
2367
+ reasoningTokens: response.usage.reasoningTokens,
2368
+ cachedInputTokens: response.usage.cachedInputTokens
2369
+ }
2370
+ };
2371
+ }
2372
+ async function uploadVttToS3({
2373
+ translatedVtt,
2374
+ assetId,
2375
+ fromLanguageCode,
2376
+ toLanguageCode,
2377
+ s3Endpoint,
2378
+ s3Region,
2379
+ s3Bucket
2380
+ }) {
2381
+ "use step";
2382
+ const { S3Client, GetObjectCommand } = await import("@aws-sdk/client-s3");
2383
+ const { Upload } = await import("@aws-sdk/lib-storage");
2384
+ const { getSignedUrl } = await import("@aws-sdk/s3-request-presigner");
2385
+ const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
2386
+ const s3SecretAccessKey = env_default.S3_SECRET_ACCESS_KEY;
2387
+ const s3Client = new S3Client({
2388
+ region: s3Region,
2389
+ endpoint: s3Endpoint,
2390
+ credentials: {
2391
+ accessKeyId: s3AccessKeyId,
2392
+ secretAccessKey: s3SecretAccessKey
2393
+ },
2394
+ forcePathStyle: true
2395
+ });
2396
+ const vttKey = `translations/${assetId}/${fromLanguageCode}-to-${toLanguageCode}-${Date.now()}.vtt`;
2397
+ const upload = new Upload({
2398
+ client: s3Client,
2399
+ params: {
2400
+ Bucket: s3Bucket,
2401
+ Key: vttKey,
2402
+ Body: translatedVtt,
2403
+ ContentType: "text/vtt"
2404
+ }
2405
+ });
2406
+ await upload.done();
2407
+ const getObjectCommand = new GetObjectCommand({
2408
+ Bucket: s3Bucket,
2409
+ Key: vttKey
2410
+ });
2411
+ const presignedUrl = await getSignedUrl(s3Client, getObjectCommand, {
2412
+ expiresIn: 3600
2413
+ // 1 hour
2414
+ });
2415
+ return presignedUrl;
2416
+ }
2417
+ async function createTextTrackOnMux(assetId, languageCode, trackName, presignedUrl) {
2418
+ "use step";
2419
+ const { muxTokenId, muxTokenSecret } = getMuxCredentialsFromEnv();
2420
+ const mux = new Mux4({
2421
+ tokenId: muxTokenId,
2422
+ tokenSecret: muxTokenSecret
2423
+ });
2424
+ const trackResponse = await mux.video.assets.createTrack(assetId, {
2425
+ type: "text",
2426
+ text_type: "subtitles",
2427
+ language_code: languageCode,
2428
+ name: trackName,
2429
+ url: presignedUrl
2430
+ });
2431
+ if (!trackResponse.id) {
2432
+ throw new Error("Failed to create text track: no track ID returned from Mux");
2433
+ }
2434
+ return trackResponse.id;
2435
+ }
2074
2436
  async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, options) {
2437
+ "use workflow";
2075
2438
  const {
2076
- provider = DEFAULT_PROVIDER6,
2439
+ provider = "openai",
2077
2440
  model,
2078
2441
  s3Endpoint: providedS3Endpoint,
2079
2442
  s3Region: providedS3Region,
2080
2443
  s3Bucket: providedS3Bucket,
2081
- s3AccessKeyId: providedS3AccessKeyId,
2082
- s3SecretAccessKey: providedS3SecretAccessKey,
2083
- uploadToMux: uploadToMuxOption,
2084
- ...clientConfig
2444
+ uploadToMux: uploadToMuxOption
2085
2445
  } = options;
2086
- const resolvedProvider = provider;
2087
2446
  const s3Endpoint = providedS3Endpoint ?? env_default.S3_ENDPOINT;
2088
2447
  const s3Region = providedS3Region ?? env_default.S3_REGION ?? "auto";
2089
2448
  const s3Bucket = providedS3Bucket ?? env_default.S3_BUCKET;
2090
- const s3AccessKeyId = providedS3AccessKeyId ?? env_default.S3_ACCESS_KEY_ID;
2091
- const s3SecretAccessKey = providedS3SecretAccessKey ?? env_default.S3_SECRET_ACCESS_KEY;
2449
+ const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
2450
+ const s3SecretAccessKey = env_default.S3_SECRET_ACCESS_KEY;
2092
2451
  const uploadToMux = uploadToMuxOption !== false;
2093
- const clients = createWorkflowClients(
2094
- { ...clientConfig, provider: resolvedProvider, model },
2095
- resolvedProvider
2452
+ const config = await createWorkflowConfig(
2453
+ { ...options, model },
2454
+ provider
2096
2455
  );
2097
2456
  if (uploadToMux && (!s3Endpoint || !s3Bucket || !s3AccessKeyId || !s3SecretAccessKey)) {
2098
2457
  throw new Error("S3 configuration is required for uploading to Mux. Provide s3Endpoint, s3Bucket, s3AccessKeyId, and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables.");
2099
2458
  }
2100
- const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(clients.mux, assetId);
2101
- const signingContext = resolveSigningContext(options);
2459
+ const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(assetId);
2460
+ const signingContext = getMuxSigningContextFromEnv();
2102
2461
  if (policy === "signed" && !signingContext) {
2103
2462
  throw new Error(
2104
2463
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
@@ -2119,120 +2478,81 @@ async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, opti
2119
2478
  }
2120
2479
  let vttContent;
2121
2480
  try {
2122
- const vttResponse = await fetch(vttUrl);
2123
- if (!vttResponse.ok) {
2124
- throw new Error(`Failed to fetch VTT file: ${vttResponse.statusText}`);
2125
- }
2126
- vttContent = await vttResponse.text();
2481
+ vttContent = await fetchVttFromMux(vttUrl);
2127
2482
  } catch (error) {
2128
2483
  throw new Error(`Failed to fetch VTT content: ${error instanceof Error ? error.message : "Unknown error"}`);
2129
2484
  }
2130
- console.log(`\u2705 Found VTT content for language '${fromLanguageCode}'`);
2131
2485
  let translatedVtt;
2486
+ let usage;
2132
2487
  try {
2133
- const response = await (0, import_ai5.generateObject)({
2134
- model: clients.languageModel.model,
2135
- schema: translationSchema,
2136
- abortSignal: options.abortSignal,
2137
- messages: [
2138
- {
2139
- role: "user",
2140
- content: `Translate the following VTT subtitle file from ${fromLanguageCode} to ${toLanguageCode}. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT.
2141
-
2142
- ${vttContent}`
2143
- }
2144
- ]
2488
+ const result = await translateVttWithAI({
2489
+ vttContent,
2490
+ fromLanguageCode,
2491
+ toLanguageCode,
2492
+ provider: config.provider,
2493
+ modelId: config.modelId,
2494
+ abortSignal: options.abortSignal
2145
2495
  });
2146
- translatedVtt = response.object.translation;
2496
+ translatedVtt = result.translatedVtt;
2497
+ usage = result.usage;
2147
2498
  } catch (error) {
2148
- throw new Error(`Failed to translate VTT with ${resolvedProvider}: ${error instanceof Error ? error.message : "Unknown error"}`);
2499
+ throw new Error(`Failed to translate VTT with ${config.provider}: ${error instanceof Error ? error.message : "Unknown error"}`);
2149
2500
  }
2150
- console.log(`
2151
- \u2705 Translation completed successfully!`);
2501
+ const sourceLanguage = getLanguageCodePair(fromLanguageCode);
2502
+ const targetLanguage = getLanguageCodePair(toLanguageCode);
2152
2503
  if (!uploadToMux) {
2153
- console.log(`\u2705 VTT translated to ${toLanguageCode} successfully!`);
2154
2504
  return {
2155
2505
  assetId,
2156
2506
  sourceLanguageCode: fromLanguageCode,
2157
2507
  targetLanguageCode: toLanguageCode,
2508
+ sourceLanguage,
2509
+ targetLanguage,
2158
2510
  originalVtt: vttContent,
2159
- translatedVtt
2511
+ translatedVtt,
2512
+ usage
2160
2513
  };
2161
2514
  }
2162
- console.log("\u{1F4E4} Uploading translated VTT to S3-compatible storage...");
2163
- const s3Client = new import_client_s32.S3Client({
2164
- region: s3Region,
2165
- endpoint: s3Endpoint,
2166
- credentials: {
2167
- accessKeyId: s3AccessKeyId,
2168
- secretAccessKey: s3SecretAccessKey
2169
- },
2170
- forcePathStyle: true
2171
- // Often needed for non-AWS S3 services
2172
- });
2173
- const vttKey = `translations/${assetId}/${fromLanguageCode}-to-${toLanguageCode}-${Date.now()}.vtt`;
2174
2515
  let presignedUrl;
2175
2516
  try {
2176
- const upload = new import_lib_storage2.Upload({
2177
- client: s3Client,
2178
- params: {
2179
- Bucket: s3Bucket,
2180
- Key: vttKey,
2181
- Body: translatedVtt,
2182
- ContentType: "text/vtt"
2183
- }
2184
- });
2185
- await upload.done();
2186
- console.log(`\u2705 VTT uploaded successfully to: ${vttKey}`);
2187
- const getObjectCommand = new import_client_s32.GetObjectCommand({
2188
- Bucket: s3Bucket,
2189
- Key: vttKey
2190
- });
2191
- presignedUrl = await (0, import_s3_request_presigner2.getSignedUrl)(s3Client, getObjectCommand, {
2192
- expiresIn: 3600
2193
- // 1 hour
2517
+ presignedUrl = await uploadVttToS3({
2518
+ translatedVtt,
2519
+ assetId,
2520
+ fromLanguageCode,
2521
+ toLanguageCode,
2522
+ s3Endpoint,
2523
+ s3Region,
2524
+ s3Bucket
2194
2525
  });
2195
- console.log(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
2196
2526
  } catch (error) {
2197
2527
  throw new Error(`Failed to upload VTT to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
2198
2528
  }
2199
- console.log("\u{1F4F9} Adding translated track to Mux asset...");
2200
2529
  let uploadedTrackId;
2201
2530
  try {
2202
- const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(toLanguageCode) || toLanguageCode.toUpperCase();
2531
+ const languageName = getLanguageName(toLanguageCode);
2203
2532
  const trackName = `${languageName} (auto-translated)`;
2204
- const trackResponse = await clients.mux.video.assets.createTrack(assetId, {
2205
- type: "text",
2206
- text_type: "subtitles",
2207
- language_code: toLanguageCode,
2208
- name: trackName,
2209
- url: presignedUrl
2210
- });
2211
- uploadedTrackId = trackResponse.id;
2212
- console.log(`\u2705 Track added to Mux asset with ID: ${uploadedTrackId}`);
2213
- console.log(`\u{1F4CB} Track name: "${trackName}"`);
2533
+ uploadedTrackId = await createTextTrackOnMux(assetId, toLanguageCode, trackName, presignedUrl);
2214
2534
  } catch (error) {
2215
- console.warn(`\u26A0\uFE0F Failed to add track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
2216
- console.log("\u{1F517} You can manually add the track using this presigned URL:");
2217
- console.log(presignedUrl);
2535
+ console.warn(`Failed to add track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
2218
2536
  }
2219
2537
  return {
2220
2538
  assetId,
2221
2539
  sourceLanguageCode: fromLanguageCode,
2222
2540
  targetLanguageCode: toLanguageCode,
2541
+ sourceLanguage,
2542
+ targetLanguage,
2223
2543
  originalVtt: vttContent,
2224
2544
  translatedVtt,
2225
2545
  uploadedTrackId,
2226
- presignedUrl
2546
+ presignedUrl,
2547
+ usage
2227
2548
  };
2228
2549
  }
2229
2550
 
2230
2551
  // src/index.ts
2231
2552
  var version = "0.1.0";
2232
- // Annotate the CommonJS export names for ESM import in node:
2233
- 0 && (module.exports = {
2234
- primitives,
2553
+ export {
2554
+ primitives_exports as primitives,
2235
2555
  version,
2236
- workflows
2237
- });
2556
+ workflows_exports as workflows
2557
+ };
2238
2558
  //# sourceMappingURL=index.js.map