@mux/ai 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,40 +1,8 @@
1
- "use strict";
2
- var __create = Object.create;
3
1
  var __defProp = Object.defineProperty;
4
- var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
- var __getOwnPropNames = Object.getOwnPropertyNames;
6
- var __getProtoOf = Object.getPrototypeOf;
7
- var __hasOwnProp = Object.prototype.hasOwnProperty;
8
2
  var __export = (target, all) => {
9
3
  for (var name in all)
10
4
  __defProp(target, name, { get: all[name], enumerable: true });
11
5
  };
12
- var __copyProps = (to, from, except, desc) => {
13
- if (from && typeof from === "object" || typeof from === "function") {
14
- for (let key of __getOwnPropNames(from))
15
- if (!__hasOwnProp.call(to, key) && key !== except)
16
- __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
- }
18
- return to;
19
- };
20
- var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
- // If the importer is in node compatibility mode or this is not an ESM
22
- // file that has been converted to a CommonJS file using a Babel-
23
- // compatible transform (i.e. "__esModule" has not been set), then set
24
- // "default" to the CommonJS "module.exports" for node compatibility.
25
- isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
- mod
27
- ));
28
- var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
-
30
- // src/index.ts
31
- var index_exports = {};
32
- __export(index_exports, {
33
- primitives: () => primitives_exports,
34
- version: () => version,
35
- workflows: () => workflows_exports
36
- });
37
- module.exports = __toCommonJS(index_exports);
38
6
 
39
7
  // src/primitives/index.ts
40
8
  var primitives_exports = {};
@@ -57,33 +25,25 @@ __export(primitives_exports, {
57
25
  });
58
26
 
59
27
  // src/lib/url-signing.ts
60
- var import_mux_node = __toESM(require("@mux/mux-node"));
28
+ import Mux from "@mux/mux-node";
61
29
 
62
30
  // src/env.ts
63
- var import_node_path = __toESM(require("path"));
64
- var import_dotenv = require("dotenv");
65
- var import_dotenv_expand = require("dotenv-expand");
66
- var import_zod = require("zod");
67
- (0, import_dotenv_expand.expand)((0, import_dotenv.config)({
68
- path: import_node_path.default.resolve(
69
- process.cwd(),
70
- process.env.NODE_ENV === "test" ? ".env.test" : ".env"
71
- )
72
- }));
31
+ import { z } from "zod";
32
+ import "dotenv/config";
73
33
  function optionalString(description, message) {
74
- return import_zod.z.preprocess(
34
+ return z.preprocess(
75
35
  (value) => typeof value === "string" && value.trim().length === 0 ? void 0 : value,
76
- import_zod.z.string().trim().min(1, message).optional()
36
+ z.string().trim().min(1, message).optional()
77
37
  ).describe(description);
78
38
  }
79
39
  function requiredString(description, message) {
80
- return import_zod.z.preprocess(
40
+ return z.preprocess(
81
41
  (value) => typeof value === "string" ? value.trim().length > 0 ? value.trim() : void 0 : value,
82
- import_zod.z.string().trim().min(1, message)
42
+ z.string().trim().min(1, message)
83
43
  ).describe(description);
84
44
  }
85
- var EnvSchema = import_zod.z.object({
86
- NODE_ENV: import_zod.z.string().default("development").describe("Runtime environment."),
45
+ var EnvSchema = z.object({
46
+ NODE_ENV: z.string().default("development").describe("Runtime environment."),
87
47
  MUX_TOKEN_ID: requiredString("Mux access token ID.", "Required to access Mux APIs"),
88
48
  MUX_TOKEN_SECRET: requiredString("Mux access token secret.", "Required to access Mux APIs"),
89
49
  MUX_SIGNING_KEY: optionalString("Mux signing key ID for signed playback URLs.", "Used to sign playback URLs"),
@@ -112,16 +72,17 @@ var env = parseEnv();
112
72
  var env_default = env;
113
73
 
114
74
  // src/lib/url-signing.ts
115
- function resolveSigningContext(config2) {
116
- const keyId = config2.muxSigningKey ?? env_default.MUX_SIGNING_KEY;
117
- const keySecret = config2.muxPrivateKey ?? env_default.MUX_PRIVATE_KEY;
75
+ async function resolveSigningContext(config) {
76
+ "use step";
77
+ const keyId = config.muxSigningKey ?? env_default.MUX_SIGNING_KEY;
78
+ const keySecret = config.muxPrivateKey ?? env_default.MUX_PRIVATE_KEY;
118
79
  if (!keyId || !keySecret) {
119
80
  return void 0;
120
81
  }
121
82
  return { keyId, keySecret };
122
83
  }
123
84
  function createSigningClient(context) {
124
- return new import_mux_node.default({
85
+ return new Mux({
125
86
  // These are not needed for signing, but the SDK requires them
126
87
  // Using empty strings as we only need the jwt functionality
127
88
  tokenId: env_default.MUX_TOKEN_ID || "",
@@ -131,6 +92,7 @@ function createSigningClient(context) {
131
92
  });
132
93
  }
133
94
  async function signPlaybackId(playbackId, context, type = "video", params) {
95
+ "use step";
134
96
  const client = createSigningClient(context);
135
97
  const stringParams = params ? Object.fromEntries(
136
98
  Object.entries(params).map(([key, value]) => [key, String(value)])
@@ -142,6 +104,7 @@ async function signPlaybackId(playbackId, context, type = "video", params) {
142
104
  });
143
105
  }
144
106
  async function signUrl(url, playbackId, context, type = "video", params) {
107
+ "use step";
145
108
  const token = await signPlaybackId(playbackId, context, type, params);
146
109
  const separator = url.includes("?") ? "&" : "?";
147
110
  return `${url}${separator}token=${token}`;
@@ -150,6 +113,7 @@ async function signUrl(url, playbackId, context, type = "video", params) {
150
113
  // src/primitives/storyboards.ts
151
114
  var DEFAULT_STORYBOARD_WIDTH = 640;
152
115
  async function getStoryboardUrl(playbackId, width = DEFAULT_STORYBOARD_WIDTH, signingContext) {
116
+ "use step";
153
117
  const baseUrl = `https://image.mux.com/${playbackId}/storyboard.png`;
154
118
  if (signingContext) {
155
119
  return signUrl(baseUrl, playbackId, signingContext, "storyboard", { width });
@@ -244,6 +208,7 @@ function chunkText(text, strategy) {
244
208
 
245
209
  // src/primitives/thumbnails.ts
246
210
  async function getThumbnailUrls(playbackId, duration, options = {}) {
211
+ "use step";
247
212
  const { interval = 10, width = 640, signingContext } = options;
248
213
  const timestamps = [];
249
214
  if (duration <= 50) {
@@ -375,6 +340,7 @@ function parseVTTCues(vttContent) {
375
340
  return cues;
376
341
  }
377
342
  async function buildTranscriptUrl(playbackId, trackId, signingContext) {
343
+ "use step";
378
344
  const baseUrl = `https://stream.mux.com/${playbackId}/text/${trackId}.vtt`;
379
345
  if (signingContext) {
380
346
  return signUrl(baseUrl, playbackId, signingContext, "video");
@@ -382,6 +348,7 @@ async function buildTranscriptUrl(playbackId, trackId, signingContext) {
382
348
  return baseUrl;
383
349
  }
384
350
  async function fetchTranscriptForAsset(asset, playbackId, options = {}) {
351
+ "use step";
385
352
  const { languageCode, cleanTranscript = true, signingContext } = options;
386
353
  const track = findCaptionTrack(asset, languageCode);
387
354
  if (!track) {
@@ -424,130 +391,17 @@ __export(workflows_exports, {
424
391
  });
425
392
 
426
393
  // src/workflows/burned-in-captions.ts
427
- var import_ai = require("ai");
428
-
429
- // node_modules/dedent/dist/dedent.mjs
430
- function ownKeys(object, enumerableOnly) {
431
- var keys = Object.keys(object);
432
- if (Object.getOwnPropertySymbols) {
433
- var symbols = Object.getOwnPropertySymbols(object);
434
- enumerableOnly && (symbols = symbols.filter(function(sym) {
435
- return Object.getOwnPropertyDescriptor(object, sym).enumerable;
436
- })), keys.push.apply(keys, symbols);
437
- }
438
- return keys;
439
- }
440
- function _objectSpread(target) {
441
- for (var i = 1; i < arguments.length; i++) {
442
- var source = null != arguments[i] ? arguments[i] : {};
443
- i % 2 ? ownKeys(Object(source), true).forEach(function(key) {
444
- _defineProperty(target, key, source[key]);
445
- }) : Object.getOwnPropertyDescriptors ? Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)) : ownKeys(Object(source)).forEach(function(key) {
446
- Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key));
447
- });
448
- }
449
- return target;
450
- }
451
- function _defineProperty(obj, key, value) {
452
- key = _toPropertyKey(key);
453
- if (key in obj) {
454
- Object.defineProperty(obj, key, { value, enumerable: true, configurable: true, writable: true });
455
- } else {
456
- obj[key] = value;
457
- }
458
- return obj;
459
- }
460
- function _toPropertyKey(arg) {
461
- var key = _toPrimitive(arg, "string");
462
- return typeof key === "symbol" ? key : String(key);
463
- }
464
- function _toPrimitive(input, hint) {
465
- if (typeof input !== "object" || input === null) return input;
466
- var prim = input[Symbol.toPrimitive];
467
- if (prim !== void 0) {
468
- var res = prim.call(input, hint || "default");
469
- if (typeof res !== "object") return res;
470
- throw new TypeError("@@toPrimitive must return a primitive value.");
471
- }
472
- return (hint === "string" ? String : Number)(input);
473
- }
474
- var dedent = createDedent({});
475
- var dedent_default = dedent;
476
- function createDedent(options) {
477
- dedent2.withOptions = (newOptions) => createDedent(_objectSpread(_objectSpread({}, options), newOptions));
478
- return dedent2;
479
- function dedent2(strings, ...values) {
480
- const raw = typeof strings === "string" ? [strings] : strings.raw;
481
- const {
482
- alignValues = false,
483
- escapeSpecialCharacters = Array.isArray(strings),
484
- trimWhitespace = true
485
- } = options;
486
- let result = "";
487
- for (let i = 0; i < raw.length; i++) {
488
- let next = raw[i];
489
- if (escapeSpecialCharacters) {
490
- next = next.replace(/\\\n[ \t]*/g, "").replace(/\\`/g, "`").replace(/\\\$/g, "$").replace(/\\\{/g, "{");
491
- }
492
- result += next;
493
- if (i < values.length) {
494
- const value = alignValues ? alignValue(values[i], result) : values[i];
495
- result += value;
496
- }
497
- }
498
- const lines = result.split("\n");
499
- let mindent = null;
500
- for (const l of lines) {
501
- const m = l.match(/^(\s+)\S+/);
502
- if (m) {
503
- const indent = m[1].length;
504
- if (!mindent) {
505
- mindent = indent;
506
- } else {
507
- mindent = Math.min(mindent, indent);
508
- }
509
- }
510
- }
511
- if (mindent !== null) {
512
- const m = mindent;
513
- result = lines.map((l) => l[0] === " " || l[0] === " " ? l.slice(m) : l).join("\n");
514
- }
515
- if (trimWhitespace) {
516
- result = result.trim();
517
- }
518
- if (escapeSpecialCharacters) {
519
- result = result.replace(/\\n/g, "\n");
520
- }
521
- return result;
522
- }
523
- }
524
- function alignValue(value, precedingText) {
525
- if (typeof value !== "string" || !value.includes("\n")) {
526
- return value;
527
- }
528
- const currentLine = precedingText.slice(precedingText.lastIndexOf("\n") + 1);
529
- const indentMatch = currentLine.match(/^(\s+)/);
530
- if (indentMatch) {
531
- const indent = indentMatch[1];
532
- return value.replace(/\n/g, `
533
- ${indent}`);
534
- }
535
- return value;
536
- }
537
-
538
- // src/workflows/burned-in-captions.ts
539
- var import_zod2 = require("zod");
540
-
541
- // src/lib/client-factory.ts
542
- var import_mux_node2 = __toESM(require("@mux/mux-node"));
394
+ import { generateObject } from "ai";
395
+ import dedent from "dedent";
396
+ import { z as z2 } from "zod";
543
397
 
544
398
  // src/lib/providers.ts
545
- var import_anthropic = require("@ai-sdk/anthropic");
546
- var import_google = require("@ai-sdk/google");
547
- var import_openai = require("@ai-sdk/openai");
399
+ import { createAnthropic } from "@ai-sdk/anthropic";
400
+ import { createGoogleGenerativeAI } from "@ai-sdk/google";
401
+ import { createOpenAI } from "@ai-sdk/openai";
548
402
  var DEFAULT_LANGUAGE_MODELS = {
549
- openai: "gpt-5-mini",
550
- anthropic: "claude-haiku-4-5",
403
+ openai: "gpt-5.1",
404
+ anthropic: "claude-sonnet-4-5",
551
405
  google: "gemini-2.5-flash"
552
406
  };
553
407
  var DEFAULT_EMBEDDING_MODELS = {
@@ -560,6 +414,52 @@ function requireEnv(value, name) {
560
414
  }
561
415
  return value;
562
416
  }
417
+ function createLanguageModelFromConfig(provider, modelId, credentials) {
418
+ switch (provider) {
419
+ case "openai": {
420
+ const apiKey = credentials.openaiApiKey;
421
+ requireEnv(apiKey, "OPENAI_API_KEY");
422
+ const openai = createOpenAI({ apiKey });
423
+ return openai(modelId);
424
+ }
425
+ case "anthropic": {
426
+ const apiKey = credentials.anthropicApiKey;
427
+ requireEnv(apiKey, "ANTHROPIC_API_KEY");
428
+ const anthropic = createAnthropic({ apiKey });
429
+ return anthropic(modelId);
430
+ }
431
+ case "google": {
432
+ const apiKey = credentials.googleApiKey;
433
+ requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
434
+ const google = createGoogleGenerativeAI({ apiKey });
435
+ return google(modelId);
436
+ }
437
+ default: {
438
+ const exhaustiveCheck = provider;
439
+ throw new Error(`Unsupported provider: ${exhaustiveCheck}`);
440
+ }
441
+ }
442
+ }
443
+ function createEmbeddingModelFromConfig(provider, modelId, credentials) {
444
+ switch (provider) {
445
+ case "openai": {
446
+ const apiKey = credentials.openaiApiKey;
447
+ requireEnv(apiKey, "OPENAI_API_KEY");
448
+ const openai = createOpenAI({ apiKey });
449
+ return openai.embedding(modelId);
450
+ }
451
+ case "google": {
452
+ const apiKey = credentials.googleApiKey;
453
+ requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
454
+ const google = createGoogleGenerativeAI({ apiKey });
455
+ return google.textEmbeddingModel(modelId);
456
+ }
457
+ default: {
458
+ const exhaustiveCheck = provider;
459
+ throw new Error(`Unsupported embedding provider: ${exhaustiveCheck}`);
460
+ }
461
+ }
462
+ }
563
463
  function resolveLanguageModel(options = {}) {
564
464
  const provider = options.provider || "openai";
565
465
  const modelId = options.model || DEFAULT_LANGUAGE_MODELS[provider];
@@ -567,7 +467,7 @@ function resolveLanguageModel(options = {}) {
567
467
  case "openai": {
568
468
  const apiKey = options.openaiApiKey ?? env_default.OPENAI_API_KEY;
569
469
  requireEnv(apiKey, "OPENAI_API_KEY");
570
- const openai = (0, import_openai.createOpenAI)({
470
+ const openai = createOpenAI({
571
471
  apiKey
572
472
  });
573
473
  return {
@@ -579,7 +479,7 @@ function resolveLanguageModel(options = {}) {
579
479
  case "anthropic": {
580
480
  const apiKey = options.anthropicApiKey ?? env_default.ANTHROPIC_API_KEY;
581
481
  requireEnv(apiKey, "ANTHROPIC_API_KEY");
582
- const anthropic = (0, import_anthropic.createAnthropic)({
482
+ const anthropic = createAnthropic({
583
483
  apiKey
584
484
  });
585
485
  return {
@@ -591,7 +491,7 @@ function resolveLanguageModel(options = {}) {
591
491
  case "google": {
592
492
  const apiKey = options.googleApiKey ?? env_default.GOOGLE_GENERATIVE_AI_API_KEY;
593
493
  requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
594
- const google = (0, import_google.createGoogleGenerativeAI)({
494
+ const google = createGoogleGenerativeAI({
595
495
  apiKey
596
496
  });
597
497
  return {
@@ -613,7 +513,7 @@ function resolveEmbeddingModel(options = {}) {
613
513
  case "openai": {
614
514
  const apiKey = options.openaiApiKey ?? env_default.OPENAI_API_KEY;
615
515
  requireEnv(apiKey, "OPENAI_API_KEY");
616
- const openai = (0, import_openai.createOpenAI)({
516
+ const openai = createOpenAI({
617
517
  apiKey
618
518
  });
619
519
  return {
@@ -625,7 +525,7 @@ function resolveEmbeddingModel(options = {}) {
625
525
  case "google": {
626
526
  const apiKey = options.googleApiKey ?? env_default.GOOGLE_GENERATIVE_AI_API_KEY;
627
527
  requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
628
- const google = (0, import_google.createGoogleGenerativeAI)({
528
+ const google = createGoogleGenerativeAI({
629
529
  apiKey
630
530
  });
631
531
  return {
@@ -642,7 +542,8 @@ function resolveEmbeddingModel(options = {}) {
642
542
  }
643
543
 
644
544
  // src/lib/client-factory.ts
645
- function validateCredentials(options, requiredProvider) {
545
+ async function validateCredentials(options, requiredProvider) {
546
+ "use step";
646
547
  const muxTokenId = options.muxTokenId ?? env_default.MUX_TOKEN_ID;
647
548
  const muxTokenSecret = options.muxTokenSecret ?? env_default.MUX_TOKEN_SECRET;
648
549
  const openaiApiKey = options.openaiApiKey ?? env_default.OPENAI_API_KEY;
@@ -676,32 +577,24 @@ function validateCredentials(options, requiredProvider) {
676
577
  googleApiKey
677
578
  };
678
579
  }
679
- function createMuxClient(credentials) {
680
- if (!credentials.muxTokenId || !credentials.muxTokenSecret) {
681
- throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
682
- }
683
- return new import_mux_node2.default({
684
- tokenId: credentials.muxTokenId,
685
- tokenSecret: credentials.muxTokenSecret
686
- });
687
- }
688
- function createWorkflowClients(options, provider) {
580
+ async function createWorkflowConfig(options, provider) {
581
+ "use step";
689
582
  const providerToUse = provider || options.provider || "openai";
690
- const credentials = validateCredentials(options, providerToUse);
691
- const languageModel = resolveLanguageModel({
583
+ const credentials = await validateCredentials(options, providerToUse);
584
+ const resolved = resolveLanguageModel({
692
585
  ...options,
693
586
  provider: providerToUse
694
587
  });
695
588
  return {
696
- mux: createMuxClient(credentials),
697
- languageModel,
698
- credentials
589
+ credentials,
590
+ provider: resolved.provider,
591
+ modelId: resolved.modelId
699
592
  };
700
593
  }
701
594
 
702
595
  // src/lib/image-download.ts
703
- var import_node_buffer = require("buffer");
704
- var import_p_retry = __toESM(require("p-retry"));
596
+ import { Buffer as Buffer2 } from "buffer";
597
+ import pRetry, { AbortError } from "p-retry";
705
598
  var DEFAULT_OPTIONS = {
706
599
  timeout: 1e4,
707
600
  retries: 3,
@@ -710,9 +603,10 @@ var DEFAULT_OPTIONS = {
710
603
  exponentialBackoff: true
711
604
  };
712
605
  async function downloadImageAsBase64(url, options = {}) {
606
+ "use step";
713
607
  const opts = { ...DEFAULT_OPTIONS, ...options };
714
608
  let attemptCount = 0;
715
- return (0, import_p_retry.default)(
609
+ return pRetry(
716
610
  async () => {
717
611
  attemptCount++;
718
612
  const controller = new AbortController();
@@ -727,18 +621,18 @@ async function downloadImageAsBase64(url, options = {}) {
727
621
  clearTimeout(timeoutId);
728
622
  if (!response.ok) {
729
623
  if (response.status >= 400 && response.status < 500 && response.status !== 429) {
730
- throw new import_p_retry.AbortError(`HTTP ${response.status}: ${response.statusText}`);
624
+ throw new AbortError(`HTTP ${response.status}: ${response.statusText}`);
731
625
  }
732
626
  throw new Error(`HTTP ${response.status}: ${response.statusText}`);
733
627
  }
734
628
  const contentType = response.headers.get("content-type");
735
629
  if (!contentType?.startsWith("image/")) {
736
- throw new import_p_retry.AbortError(`Invalid content type: ${contentType}. Expected image/*`);
630
+ throw new AbortError(`Invalid content type: ${contentType}. Expected image/*`);
737
631
  }
738
632
  const arrayBuffer = await response.arrayBuffer();
739
- const buffer = import_node_buffer.Buffer.from(arrayBuffer);
633
+ const buffer = Buffer2.from(arrayBuffer);
740
634
  if (buffer.length === 0) {
741
- throw new import_p_retry.AbortError("Downloaded image is empty");
635
+ throw new AbortError("Downloaded image is empty");
742
636
  }
743
637
  const base64Data = `data:${contentType};base64,${buffer.toString("base64")}`;
744
638
  return {
@@ -751,7 +645,7 @@ async function downloadImageAsBase64(url, options = {}) {
751
645
  };
752
646
  } catch (error) {
753
647
  clearTimeout(timeoutId);
754
- if (error instanceof import_p_retry.AbortError) {
648
+ if (error instanceof AbortError) {
755
649
  throw error;
756
650
  }
757
651
  if (error instanceof Error) {
@@ -780,6 +674,7 @@ async function downloadImageAsBase64(url, options = {}) {
780
674
  );
781
675
  }
782
676
  async function downloadImagesAsBase64(urls, options = {}, maxConcurrent = 5) {
677
+ "use step";
783
678
  const results = [];
784
679
  for (let i = 0; i < urls.length; i += maxConcurrent) {
785
680
  const batch = urls.slice(i, i + maxConcurrent);
@@ -791,6 +686,7 @@ async function downloadImagesAsBase64(urls, options = {}, maxConcurrent = 5) {
791
686
  }
792
687
 
793
688
  // src/lib/mux-assets.ts
689
+ import Mux2 from "@mux/mux-node";
794
690
  function getPlaybackId(asset) {
795
691
  const playbackIds = asset.playback_ids || [];
796
692
  const publicPlaybackId = playbackIds.find((pid) => pid.policy === "public");
@@ -805,7 +701,12 @@ function getPlaybackId(asset) {
805
701
  "No public or signed playback ID found for this asset. A public or signed playback ID is required. DRM playback IDs are not currently supported."
806
702
  );
807
703
  }
808
- async function getPlaybackIdForAsset(mux, assetId) {
704
+ async function getPlaybackIdForAsset(credentials, assetId) {
705
+ "use step";
706
+ const mux = new Mux2({
707
+ tokenId: credentials.muxTokenId,
708
+ tokenSecret: credentials.muxTokenSecret
709
+ });
809
710
  const asset = await mux.video.assets.retrieve(assetId);
810
711
  const { id: playbackId, policy } = getPlaybackId(asset);
811
712
  return { asset, playbackId, policy };
@@ -844,8 +745,8 @@ function resolveSection(defaultSection, override) {
844
745
  }
845
746
  return override;
846
747
  }
847
- function createPromptBuilder(config2) {
848
- const { template, sectionOrder } = config2;
748
+ function createPromptBuilder(config) {
749
+ const { template, sectionOrder } = config;
849
750
  const getSection = (section, override) => {
850
751
  const resolved = resolveSection(template[section], override);
851
752
  return renderSection(resolved);
@@ -886,12 +787,12 @@ function createToneSection(instruction) {
886
787
  }
887
788
 
888
789
  // src/workflows/burned-in-captions.ts
889
- var burnedInCaptionsSchema = import_zod2.z.object({
890
- hasBurnedInCaptions: import_zod2.z.boolean(),
891
- confidence: import_zod2.z.number().min(0).max(1),
892
- detectedLanguage: import_zod2.z.string().nullable()
790
+ var burnedInCaptionsSchema = z2.object({
791
+ hasBurnedInCaptions: z2.boolean(),
792
+ confidence: z2.number().min(0).max(1),
793
+ detectedLanguage: z2.string().nullable()
893
794
  });
894
- var SYSTEM_PROMPT = dedent_default`
795
+ var SYSTEM_PROMPT = dedent`
895
796
  <role>
896
797
  You are an expert at analyzing video frames to detect burned-in captions (also called open captions or hardcoded subtitles).
897
798
  These are text overlays that are permanently embedded in the video image, common on TikTok, Instagram Reels, and other social media platforms.
@@ -934,14 +835,14 @@ var burnedInCaptionsPromptBuilder = createPromptBuilder({
934
835
  template: {
935
836
  task: {
936
837
  tag: "task",
937
- content: dedent_default`
838
+ content: dedent`
938
839
  Analyze the provided video storyboard to detect burned-in captions (hardcoded subtitles).
939
840
  Count frames with text vs no text, note position consistency and whether text changes across frames.
940
841
  Decide if captions exist, with confidence (0.0-1.0) and detected language if any.`
941
842
  },
942
843
  analysisSteps: {
943
844
  tag: "analysis_steps",
944
- content: dedent_default`
845
+ content: dedent`
945
846
  1. COUNT how many frames contain text overlays vs. how many don't
946
847
  2. Check if text appears in consistent positions across multiple frames
947
848
  3. Verify text changes content between frames (indicating dialogue/narration)
@@ -950,7 +851,7 @@ var burnedInCaptionsPromptBuilder = createPromptBuilder({
950
851
  },
951
852
  positiveIndicators: {
952
853
  tag: "classify_as_captions",
953
- content: dedent_default`
854
+ content: dedent`
954
855
  ONLY classify as burned-in captions if:
955
856
  - Text appears in multiple frames (not just 1-2 end frames)
956
857
  - Text positioning is consistent across those frames
@@ -959,7 +860,7 @@ var burnedInCaptionsPromptBuilder = createPromptBuilder({
959
860
  },
960
861
  negativeIndicators: {
961
862
  tag: "not_captions",
962
- content: dedent_default`
863
+ content: dedent`
963
864
  DO NOT classify as burned-in captions:
964
865
  - Marketing taglines appearing only in final 1-2 frames
965
866
  - Single words or phrases that don't change between frames
@@ -974,65 +875,97 @@ function buildUserPrompt(promptOverrides) {
974
875
  return burnedInCaptionsPromptBuilder.build(promptOverrides);
975
876
  }
976
877
  var DEFAULT_PROVIDER = "openai";
878
+ async function fetchImageAsBase64(imageUrl, imageDownloadOptions) {
879
+ "use step";
880
+ const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
881
+ return downloadResult.base64Data;
882
+ }
883
+ async function analyzeStoryboard({
884
+ imageDataUrl,
885
+ provider,
886
+ modelId,
887
+ credentials,
888
+ userPrompt,
889
+ systemPrompt
890
+ }) {
891
+ "use step";
892
+ const model = createLanguageModelFromConfig(
893
+ provider,
894
+ modelId,
895
+ credentials
896
+ );
897
+ const response = await generateObject({
898
+ model,
899
+ schema: burnedInCaptionsSchema,
900
+ experimental_telemetry: { isEnabled: true },
901
+ messages: [
902
+ {
903
+ role: "system",
904
+ content: systemPrompt
905
+ },
906
+ {
907
+ role: "user",
908
+ content: [
909
+ { type: "text", text: userPrompt },
910
+ { type: "image", image: imageDataUrl }
911
+ ]
912
+ }
913
+ ]
914
+ });
915
+ return {
916
+ result: response.object,
917
+ usage: {
918
+ inputTokens: response.usage.inputTokens,
919
+ outputTokens: response.usage.outputTokens,
920
+ totalTokens: response.usage.totalTokens,
921
+ reasoningTokens: response.usage.reasoningTokens,
922
+ cachedInputTokens: response.usage.cachedInputTokens
923
+ }
924
+ };
925
+ }
977
926
  async function hasBurnedInCaptions(assetId, options = {}) {
927
+ "use workflow";
978
928
  const {
979
929
  provider = DEFAULT_PROVIDER,
980
930
  model,
981
931
  imageSubmissionMode = "url",
982
932
  imageDownloadOptions,
983
933
  promptOverrides,
984
- ...config2
934
+ ...config
985
935
  } = options;
986
936
  const userPrompt = buildUserPrompt(promptOverrides);
987
- const clients = createWorkflowClients(
988
- { ...config2, model },
937
+ const workflowConfig = await createWorkflowConfig(
938
+ { ...config, model },
989
939
  provider
990
940
  );
991
- const { playbackId, policy } = await getPlaybackIdForAsset(clients.mux, assetId);
992
- const signingContext = resolveSigningContext(options);
941
+ const { playbackId, policy } = await getPlaybackIdForAsset(workflowConfig.credentials, assetId);
942
+ const signingContext = await resolveSigningContext(options);
993
943
  if (policy === "signed" && !signingContext) {
994
944
  throw new Error(
995
945
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
996
946
  );
997
947
  }
998
948
  const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed" ? signingContext : void 0);
999
- const analyzeStoryboard = async (imageDataUrl) => {
1000
- const response = await (0, import_ai.generateObject)({
1001
- model: clients.languageModel.model,
1002
- schema: burnedInCaptionsSchema,
1003
- abortSignal: options.abortSignal,
1004
- experimental_telemetry: { isEnabled: true },
1005
- messages: [
1006
- {
1007
- role: "system",
1008
- content: SYSTEM_PROMPT
1009
- },
1010
- {
1011
- role: "user",
1012
- content: [
1013
- { type: "text", text: userPrompt },
1014
- { type: "image", image: imageDataUrl }
1015
- ]
1016
- }
1017
- ]
1018
- });
1019
- return {
1020
- result: response.object,
1021
- usage: {
1022
- inputTokens: response.usage.inputTokens,
1023
- outputTokens: response.usage.outputTokens,
1024
- totalTokens: response.usage.totalTokens,
1025
- reasoningTokens: response.usage.reasoningTokens,
1026
- cachedInputTokens: response.usage.cachedInputTokens
1027
- }
1028
- };
1029
- };
1030
949
  let analysisResponse;
1031
950
  if (imageSubmissionMode === "base64") {
1032
- const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
1033
- analysisResponse = await analyzeStoryboard(downloadResult.base64Data);
951
+ const base64Data = await fetchImageAsBase64(imageUrl, imageDownloadOptions);
952
+ analysisResponse = await analyzeStoryboard({
953
+ imageDataUrl: base64Data,
954
+ provider: workflowConfig.provider,
955
+ modelId: workflowConfig.modelId,
956
+ credentials: workflowConfig.credentials,
957
+ userPrompt,
958
+ systemPrompt: SYSTEM_PROMPT
959
+ });
1034
960
  } else {
1035
- analysisResponse = await analyzeStoryboard(imageUrl);
961
+ analysisResponse = await analyzeStoryboard({
962
+ imageDataUrl: imageUrl,
963
+ provider: workflowConfig.provider,
964
+ modelId: workflowConfig.modelId,
965
+ credentials: workflowConfig.credentials,
966
+ userPrompt,
967
+ systemPrompt: SYSTEM_PROMPT
968
+ });
1036
969
  }
1037
970
  if (!analysisResponse.result) {
1038
971
  throw new Error("No analysis result received from AI provider");
@@ -1048,8 +981,8 @@ async function hasBurnedInCaptions(assetId, options = {}) {
1048
981
  }
1049
982
 
1050
983
  // src/workflows/chapters.ts
1051
- var import_ai2 = require("ai");
1052
- var import_zod3 = require("zod");
984
+ import { generateObject as generateObject2 } from "ai";
985
+ import { z as z3 } from "zod";
1053
986
 
1054
987
  // src/lib/retry.ts
1055
988
  var DEFAULT_RETRY_OPTIONS = {
@@ -1081,25 +1014,55 @@ async function withRetry(fn, {
1081
1014
  if (isLastAttempt || !shouldRetry(lastError, attempt + 1)) {
1082
1015
  throw lastError;
1083
1016
  }
1084
- const delay2 = calculateDelay(attempt + 1, baseDelay, maxDelay);
1017
+ const delay = calculateDelay(attempt + 1, baseDelay, maxDelay);
1085
1018
  console.warn(
1086
- `Attempt ${attempt + 1} failed: ${lastError.message}. Retrying in ${Math.round(delay2)}ms...`
1019
+ `Attempt ${attempt + 1} failed: ${lastError.message}. Retrying in ${Math.round(delay)}ms...`
1087
1020
  );
1088
- await new Promise((resolve) => setTimeout(resolve, delay2));
1021
+ await new Promise((resolve) => setTimeout(resolve, delay));
1089
1022
  }
1090
1023
  }
1091
1024
  throw lastError || new Error("Retry failed with unknown error");
1092
1025
  }
1093
1026
 
1094
1027
  // src/workflows/chapters.ts
1095
- var chapterSchema = import_zod3.z.object({
1096
- startTime: import_zod3.z.number(),
1097
- title: import_zod3.z.string()
1028
+ var chapterSchema = z3.object({
1029
+ startTime: z3.number(),
1030
+ title: z3.string()
1098
1031
  });
1099
- var chaptersSchema = import_zod3.z.object({
1100
- chapters: import_zod3.z.array(chapterSchema)
1032
+ var chaptersSchema = z3.object({
1033
+ chapters: z3.array(chapterSchema)
1101
1034
  });
1102
- var DEFAULT_PROVIDER2 = "openai";
1035
+ async function generateChaptersWithAI({
1036
+ provider,
1037
+ modelId,
1038
+ credentials,
1039
+ timestampedTranscript,
1040
+ systemPrompt
1041
+ }) {
1042
+ "use step";
1043
+ const model = createLanguageModelFromConfig(
1044
+ provider,
1045
+ modelId,
1046
+ credentials
1047
+ );
1048
+ const response = await withRetry(
1049
+ () => generateObject2({
1050
+ model,
1051
+ schema: chaptersSchema,
1052
+ messages: [
1053
+ {
1054
+ role: "system",
1055
+ content: systemPrompt
1056
+ },
1057
+ {
1058
+ role: "user",
1059
+ content: timestampedTranscript
1060
+ }
1061
+ ]
1062
+ })
1063
+ );
1064
+ return response.object;
1065
+ }
1103
1066
  var SYSTEM_PROMPT2 = `Your role is to segment the following captions into chunked chapters, summarising each chapter with a title.
1104
1067
 
1105
1068
  Analyze the transcript and create logical chapter breaks based on topic changes, major transitions, or distinct sections of content. Each chapter should represent a meaningful segment of the video.
@@ -1121,10 +1084,11 @@ Important rules:
1121
1084
  - Do not include any text before or after the JSON
1122
1085
  - The JSON must be valid and parseable`;
1123
1086
  async function generateChapters(assetId, languageCode, options = {}) {
1124
- const { provider = DEFAULT_PROVIDER2, model, abortSignal } = options;
1125
- const clients = createWorkflowClients({ ...options, model }, provider);
1126
- const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(clients.mux, assetId);
1127
- const signingContext = resolveSigningContext(options);
1087
+ "use workflow";
1088
+ const { provider = "openai", model } = options;
1089
+ const config = await createWorkflowConfig({ ...options, model }, provider);
1090
+ const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(config.credentials, assetId);
1091
+ const signingContext = await resolveSigningContext(options);
1128
1092
  if (policy === "signed" && !signingContext) {
1129
1093
  throw new Error(
1130
1094
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
@@ -1148,24 +1112,13 @@ async function generateChapters(assetId, languageCode, options = {}) {
1148
1112
  }
1149
1113
  let chaptersData = null;
1150
1114
  try {
1151
- const response = await withRetry(
1152
- () => (0, import_ai2.generateObject)({
1153
- model: clients.languageModel.model,
1154
- schema: chaptersSchema,
1155
- abortSignal,
1156
- messages: [
1157
- {
1158
- role: "system",
1159
- content: SYSTEM_PROMPT2
1160
- },
1161
- {
1162
- role: "user",
1163
- content: timestampedTranscript
1164
- }
1165
- ]
1166
- })
1167
- );
1168
- chaptersData = response.object;
1115
+ chaptersData = await generateChaptersWithAI({
1116
+ provider: config.provider,
1117
+ modelId: config.modelId,
1118
+ credentials: config.credentials,
1119
+ timestampedTranscript,
1120
+ systemPrompt: SYSTEM_PROMPT2
1121
+ });
1169
1122
  } catch (error) {
1170
1123
  throw new Error(
1171
1124
  `Failed to generate chapters with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
@@ -1189,14 +1142,7 @@ async function generateChapters(assetId, languageCode, options = {}) {
1189
1142
  }
1190
1143
 
1191
1144
  // src/workflows/embeddings.ts
1192
- var import_ai3 = require("ai");
1193
- var DEFAULT_PROVIDER3 = "openai";
1194
- var DEFAULT_CHUNKING_STRATEGY = {
1195
- type: "token",
1196
- maxTokens: 500,
1197
- overlap: 100
1198
- };
1199
- var DEFAULT_BATCH_SIZE = 5;
1145
+ import { embed } from "ai";
1200
1146
  function averageEmbeddings(embeddings) {
1201
1147
  if (embeddings.length === 0) {
1202
1148
  return [];
@@ -1213,51 +1159,46 @@ function averageEmbeddings(embeddings) {
1213
1159
  }
1214
1160
  return averaged;
1215
1161
  }
1216
- async function generateChunkEmbeddings(chunks, model, batchSize, abortSignal) {
1217
- const results = [];
1218
- for (let i = 0; i < chunks.length; i += batchSize) {
1219
- const batch = chunks.slice(i, i + batchSize);
1220
- const batchResults = await Promise.all(
1221
- batch.map(async (chunk) => {
1222
- const response = await withRetry(
1223
- () => (0, import_ai3.embed)({
1224
- model,
1225
- value: chunk.text,
1226
- abortSignal
1227
- })
1228
- );
1229
- return {
1230
- chunkId: chunk.id,
1231
- embedding: response.embedding,
1232
- metadata: {
1233
- startTime: chunk.startTime,
1234
- endTime: chunk.endTime,
1235
- tokenCount: chunk.tokenCount
1236
- }
1237
- };
1238
- })
1239
- );
1240
- results.push(...batchResults);
1241
- }
1242
- return results;
1162
+ async function generateSingleChunkEmbedding({
1163
+ chunk,
1164
+ provider,
1165
+ modelId,
1166
+ credentials
1167
+ }) {
1168
+ "use step";
1169
+ const model = createEmbeddingModelFromConfig(provider, modelId, credentials);
1170
+ const response = await withRetry(
1171
+ () => embed({
1172
+ model,
1173
+ value: chunk.text
1174
+ })
1175
+ );
1176
+ return {
1177
+ chunkId: chunk.id,
1178
+ embedding: response.embedding,
1179
+ metadata: {
1180
+ startTime: chunk.startTime,
1181
+ endTime: chunk.endTime,
1182
+ tokenCount: chunk.tokenCount
1183
+ }
1184
+ };
1243
1185
  }
1244
1186
  async function generateVideoEmbeddings(assetId, options = {}) {
1187
+ "use workflow";
1245
1188
  const {
1246
- provider = DEFAULT_PROVIDER3,
1189
+ provider = "openai",
1247
1190
  model,
1248
1191
  languageCode,
1249
- chunkingStrategy = DEFAULT_CHUNKING_STRATEGY,
1250
- batchSize = DEFAULT_BATCH_SIZE,
1251
- abortSignal
1192
+ chunkingStrategy = { type: "token", maxTokens: 500, overlap: 100 },
1193
+ batchSize = 5
1252
1194
  } = options;
1253
- const credentials = validateCredentials(options, provider === "google" ? "google" : "openai");
1254
- const muxClient = createMuxClient(credentials);
1195
+ const credentials = await validateCredentials(options, provider === "google" ? "google" : "openai");
1255
1196
  const embeddingModel = resolveEmbeddingModel({ ...options, provider, model });
1256
1197
  const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(
1257
- muxClient,
1198
+ credentials,
1258
1199
  assetId
1259
1200
  );
1260
- const signingContext = resolveSigningContext(options);
1201
+ const signingContext = await resolveSigningContext(options);
1261
1202
  if (policy === "signed" && !signingContext) {
1262
1203
  throw new Error(
1263
1204
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
@@ -1287,14 +1228,22 @@ async function generateVideoEmbeddings(assetId, options = {}) {
1287
1228
  if (chunks.length === 0) {
1288
1229
  throw new Error("No chunks generated from transcript");
1289
1230
  }
1290
- let chunkEmbeddings;
1231
+ const chunkEmbeddings = [];
1291
1232
  try {
1292
- chunkEmbeddings = await generateChunkEmbeddings(
1293
- chunks,
1294
- embeddingModel.model,
1295
- batchSize,
1296
- abortSignal
1297
- );
1233
+ for (let i = 0; i < chunks.length; i += batchSize) {
1234
+ const batch = chunks.slice(i, i + batchSize);
1235
+ const batchResults = await Promise.all(
1236
+ batch.map(
1237
+ (chunk) => generateSingleChunkEmbedding({
1238
+ chunk,
1239
+ provider: embeddingModel.provider,
1240
+ modelId: embeddingModel.modelId,
1241
+ credentials
1242
+ })
1243
+ )
1244
+ );
1245
+ chunkEmbeddings.push(...batchResults);
1246
+ }
1298
1247
  } catch (error) {
1299
1248
  throw new Error(
1300
1249
  `Failed to generate embeddings with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
@@ -1326,7 +1275,7 @@ var DEFAULT_THRESHOLDS = {
1326
1275
  sexual: 0.7,
1327
1276
  violence: 0.8
1328
1277
  };
1329
- var DEFAULT_PROVIDER4 = "openai";
1278
+ var DEFAULT_PROVIDER2 = "openai";
1330
1279
  var HIVE_ENDPOINT = "https://api.thehive.ai/api/v2/task/sync";
1331
1280
  var HIVE_SEXUAL_CATEGORIES = [
1332
1281
  "general_nsfw",
@@ -1364,6 +1313,7 @@ var HIVE_VIOLENCE_CATEGORIES = [
1364
1313
  "garm_death_injury_or_military_conflict"
1365
1314
  ];
1366
1315
  async function processConcurrently(items, processor, maxConcurrent = 5) {
1316
+ "use step";
1367
1317
  const results = [];
1368
1318
  for (let i = 0; i < items.length; i += maxConcurrent) {
1369
1319
  const batch = items.slice(i, i + maxConcurrent);
@@ -1374,19 +1324,21 @@ async function processConcurrently(items, processor, maxConcurrent = 5) {
1374
1324
  return results;
1375
1325
  }
1376
1326
  async function requestOpenAIModeration(imageUrls, apiKey, model, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
1327
+ "use step";
1377
1328
  const targetUrls = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map(
1378
- (img) => ({ url: img.url, image: img.base64Data })
1379
- ) : imageUrls.map((url) => ({ url, image: url }));
1329
+ (img) => ({ url: img.url, image: img.base64Data, apiKey, model })
1330
+ ) : imageUrls.map((url) => ({ url, image: url, apiKey, model }));
1380
1331
  const moderate = async (entry) => {
1332
+ "use step";
1381
1333
  try {
1382
1334
  const res = await fetch("https://api.openai.com/v1/moderations", {
1383
1335
  method: "POST",
1384
1336
  headers: {
1385
1337
  "Content-Type": "application/json",
1386
- "Authorization": `Bearer ${apiKey}`
1338
+ "Authorization": `Bearer ${entry.apiKey}`
1387
1339
  },
1388
1340
  body: JSON.stringify({
1389
- model,
1341
+ model: entry.model,
1390
1342
  input: [
1391
1343
  {
1392
1344
  type: "image_url",
@@ -1430,6 +1382,7 @@ function getHiveCategoryScores(classes, categoryNames) {
1430
1382
  return Math.max(...scores, 0);
1431
1383
  }
1432
1384
  async function requestHiveModeration(imageUrls, apiKey, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
1385
+ "use step";
1433
1386
  const targets = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map((img) => ({
1434
1387
  url: img.url,
1435
1388
  source: {
@@ -1442,6 +1395,7 @@ async function requestHiveModeration(imageUrls, apiKey, maxConcurrent = 5, submi
1442
1395
  source: { kind: "url", value: url }
1443
1396
  }));
1444
1397
  const moderate = async (entry) => {
1398
+ "use step";
1445
1399
  try {
1446
1400
  const formData = new FormData();
1447
1401
  if (entry.source.kind === "url") {
@@ -1487,8 +1441,9 @@ async function requestHiveModeration(imageUrls, apiKey, maxConcurrent = 5, submi
1487
1441
  return processConcurrently(targets, moderate, maxConcurrent);
1488
1442
  }
1489
1443
  async function getModerationScores(assetId, options = {}) {
1444
+ "use workflow";
1490
1445
  const {
1491
- provider = DEFAULT_PROVIDER4,
1446
+ provider = DEFAULT_PROVIDER2,
1492
1447
  model = provider === "openai" ? "omni-moderation-latest" : void 0,
1493
1448
  thresholds = DEFAULT_THRESHOLDS,
1494
1449
  thumbnailInterval = 10,
@@ -1497,11 +1452,10 @@ async function getModerationScores(assetId, options = {}) {
1497
1452
  imageSubmissionMode = "url",
1498
1453
  imageDownloadOptions
1499
1454
  } = options;
1500
- const credentials = validateCredentials(options, provider === "openai" ? "openai" : void 0);
1501
- const muxClient = createMuxClient(credentials);
1502
- const { asset, playbackId, policy } = await getPlaybackIdForAsset(muxClient, assetId);
1455
+ const credentials = await validateCredentials(options, provider === "openai" ? "openai" : void 0);
1456
+ const { asset, playbackId, policy } = await getPlaybackIdForAsset(credentials, assetId);
1503
1457
  const duration = asset.duration || 0;
1504
- const signingContext = resolveSigningContext(options);
1458
+ const signingContext = await resolveSigningContext(options);
1505
1459
  if (policy === "signed" && !signingContext) {
1506
1460
  throw new Error(
1507
1461
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
@@ -1557,17 +1511,18 @@ async function getModerationScores(assetId, options = {}) {
1557
1511
  }
1558
1512
 
1559
1513
  // src/workflows/summarization.ts
1560
- var import_ai4 = require("ai");
1561
- var import_zod4 = require("zod");
1514
+ import { generateObject as generateObject3 } from "ai";
1515
+ import dedent2 from "dedent";
1516
+ import { z as z4 } from "zod";
1562
1517
  var SUMMARY_KEYWORD_LIMIT = 10;
1563
- var summarySchema = import_zod4.z.object({
1564
- keywords: import_zod4.z.array(import_zod4.z.string()),
1565
- title: import_zod4.z.string(),
1566
- description: import_zod4.z.string()
1518
+ var summarySchema = z4.object({
1519
+ keywords: z4.array(z4.string()),
1520
+ title: z4.string(),
1521
+ description: z4.string()
1567
1522
  });
1568
1523
  var TONE_INSTRUCTIONS = {
1569
1524
  normal: "Provide a clear, straightforward analysis.",
1570
- sassy: "Answer with a sassy, playful attitude and personality.",
1525
+ sassy: "Channel your inner diva! Answer with maximum sass, wit, and playful attitude. Don't hold back - be cheeky, clever, and delightfully snarky. Make it pop!",
1571
1526
  professional: "Provide a professional, executive-level analysis suitable for business reporting."
1572
1527
  };
1573
1528
  var summarizationPromptBuilder = createPromptBuilder({
@@ -1578,7 +1533,7 @@ var summarizationPromptBuilder = createPromptBuilder({
1578
1533
  },
1579
1534
  title: {
1580
1535
  tag: "title_requirements",
1581
- content: dedent_default`
1536
+ content: dedent2`
1582
1537
  A short, compelling headline that immediately communicates the subject or action.
1583
1538
  Aim for brevity - typically under 10 words. Think of how a news headline or video card title would read.
1584
1539
  Start with the primary subject, action, or topic - never begin with "A video of" or similar phrasing.
@@ -1586,7 +1541,7 @@ var summarizationPromptBuilder = createPromptBuilder({
1586
1541
  },
1587
1542
  description: {
1588
1543
  tag: "description_requirements",
1589
- content: dedent_default`
1544
+ content: dedent2`
1590
1545
  A concise summary (2-4 sentences) that describes what happens across the video.
1591
1546
  Cover the main subjects, actions, setting, and any notable progression visible across frames.
1592
1547
  Write in present tense. Be specific about observable details rather than making assumptions.
@@ -1594,7 +1549,7 @@ var summarizationPromptBuilder = createPromptBuilder({
1594
1549
  },
1595
1550
  keywords: {
1596
1551
  tag: "keywords_requirements",
1597
- content: dedent_default`
1552
+ content: dedent2`
1598
1553
  Specific, searchable terms (up to 10) that capture:
1599
1554
  - Primary subjects (people, animals, objects)
1600
1555
  - Actions and activities being performed
@@ -1606,7 +1561,7 @@ var summarizationPromptBuilder = createPromptBuilder({
1606
1561
  },
1607
1562
  qualityGuidelines: {
1608
1563
  tag: "quality_guidelines",
1609
- content: dedent_default`
1564
+ content: dedent2`
1610
1565
  - Examine all frames to understand the full context and progression
1611
1566
  - Be precise: "golden retriever" is better than "dog" when identifiable
1612
1567
  - Capture the narrative: what begins, develops, and concludes
@@ -1615,7 +1570,7 @@ var summarizationPromptBuilder = createPromptBuilder({
1615
1570
  },
1616
1571
  sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
1617
1572
  });
1618
- var SYSTEM_PROMPT3 = dedent_default`
1573
+ var SYSTEM_PROMPT3 = dedent2`
1619
1574
  <role>
1620
1575
  You are a video content analyst specializing in storyboard interpretation and multimodal analysis.
1621
1576
  </role>
@@ -1647,7 +1602,29 @@ var SYSTEM_PROMPT3 = dedent_default`
1647
1602
  - Only describe what is clearly observable in the frames or explicitly stated in the transcript
1648
1603
  - Do not fabricate details or make unsupported assumptions
1649
1604
  - Return structured data matching the requested schema
1650
- </constraints>`;
1605
+ </constraints>
1606
+
1607
+ <tone_guidance>
1608
+ Pay special attention to the <tone> section and lean heavily into those instructions.
1609
+ Adapt your entire analysis and writing style to match the specified tone - this should influence
1610
+ your word choice, personality, formality level, and overall presentation of the content.
1611
+ The tone instructions are not suggestions but core requirements for how you should express yourself.
1612
+ </tone_guidance>
1613
+
1614
+ <language_guidelines>
1615
+ AVOID these meta-descriptive phrases that reference the medium rather than the content:
1616
+ - "The image shows..." / "The storyboard shows..."
1617
+ - "In this video..." / "This video features..."
1618
+ - "The frames depict..." / "The footage shows..."
1619
+ - "We can see..." / "You can see..."
1620
+ - "The clip shows..." / "The scene shows..."
1621
+
1622
+ INSTEAD, describe the content directly:
1623
+ - BAD: "The video shows a chef preparing a meal"
1624
+ - GOOD: "A chef prepares a meal in a professional kitchen"
1625
+
1626
+ Write as if describing reality, not describing a recording of reality.
1627
+ </language_guidelines>`;
1651
1628
  function buildUserPrompt2({
1652
1629
  tone,
1653
1630
  transcriptText,
@@ -1661,8 +1638,41 @@ function buildUserPrompt2({
1661
1638
  }
1662
1639
  return summarizationPromptBuilder.buildWithContext(promptOverrides, contextSections);
1663
1640
  }
1664
- var DEFAULT_PROVIDER5 = "openai";
1665
- var DEFAULT_TONE = "normal";
1641
+ async function analyzeStoryboard2(imageDataUrl, workflowConfig, userPrompt, systemPrompt) {
1642
+ "use step";
1643
+ const model = createLanguageModelFromConfig(
1644
+ workflowConfig.provider,
1645
+ workflowConfig.modelId,
1646
+ workflowConfig.credentials
1647
+ );
1648
+ const response = await generateObject3({
1649
+ model,
1650
+ schema: summarySchema,
1651
+ messages: [
1652
+ {
1653
+ role: "system",
1654
+ content: systemPrompt
1655
+ },
1656
+ {
1657
+ role: "user",
1658
+ content: [
1659
+ { type: "text", text: userPrompt },
1660
+ { type: "image", image: imageDataUrl }
1661
+ ]
1662
+ }
1663
+ ]
1664
+ });
1665
+ return {
1666
+ result: response.object,
1667
+ usage: {
1668
+ inputTokens: response.usage.inputTokens,
1669
+ outputTokens: response.usage.outputTokens,
1670
+ totalTokens: response.usage.totalTokens,
1671
+ reasoningTokens: response.usage.reasoningTokens,
1672
+ cachedInputTokens: response.usage.cachedInputTokens
1673
+ }
1674
+ };
1675
+ }
1666
1676
  function normalizeKeywords(keywords) {
1667
1677
  if (!Array.isArray(keywords) || keywords.length === 0) {
1668
1678
  return [];
@@ -1687,23 +1697,24 @@ function normalizeKeywords(keywords) {
1687
1697
  return normalized;
1688
1698
  }
1689
1699
  async function getSummaryAndTags(assetId, options) {
1700
+ "use workflow";
1690
1701
  const {
1691
- provider = DEFAULT_PROVIDER5,
1702
+ provider = "openai",
1692
1703
  model,
1693
- tone = DEFAULT_TONE,
1704
+ tone = "normal",
1694
1705
  includeTranscript = true,
1695
1706
  cleanTranscript = true,
1696
1707
  imageSubmissionMode = "url",
1697
1708
  imageDownloadOptions,
1698
- abortSignal,
1709
+ abortSignal: _abortSignal,
1699
1710
  promptOverrides
1700
1711
  } = options ?? {};
1701
- const clients = createWorkflowClients(
1712
+ const config = await createWorkflowConfig(
1702
1713
  { ...options, model },
1703
1714
  provider
1704
1715
  );
1705
- const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(clients.mux, assetId);
1706
- const signingContext = resolveSigningContext(options ?? {});
1716
+ const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(config.credentials, assetId);
1717
+ const signingContext = await resolveSigningContext(options ?? {});
1707
1718
  if (policy === "signed" && !signingContext) {
1708
1719
  throw new Error(
1709
1720
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
@@ -1720,66 +1731,212 @@ async function getSummaryAndTags(assetId, options) {
1720
1731
  promptOverrides
1721
1732
  });
1722
1733
  const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed" ? signingContext : void 0);
1723
- const analyzeStoryboard = async (imageDataUrl) => {
1724
- const response = await (0, import_ai4.generateObject)({
1725
- model: clients.languageModel.model,
1726
- schema: summarySchema,
1727
- abortSignal,
1728
- messages: [
1729
- {
1730
- role: "system",
1731
- content: SYSTEM_PROMPT3
1732
- },
1733
- {
1734
- role: "user",
1735
- content: [
1736
- { type: "text", text: userPrompt },
1737
- { type: "image", image: imageDataUrl }
1738
- ]
1739
- }
1740
- ]
1741
- });
1742
- return response.object;
1743
- };
1744
- let aiAnalysis = null;
1734
+ let analysisResponse;
1745
1735
  try {
1746
1736
  if (imageSubmissionMode === "base64") {
1747
1737
  const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
1748
- aiAnalysis = await analyzeStoryboard(downloadResult.base64Data);
1738
+ analysisResponse = await analyzeStoryboard2(
1739
+ downloadResult.base64Data,
1740
+ config,
1741
+ userPrompt,
1742
+ SYSTEM_PROMPT3
1743
+ );
1749
1744
  } else {
1750
- aiAnalysis = await withRetry(() => analyzeStoryboard(imageUrl));
1745
+ analysisResponse = await withRetry(() => analyzeStoryboard2(imageUrl, config, userPrompt, SYSTEM_PROMPT3));
1751
1746
  }
1752
1747
  } catch (error) {
1753
1748
  throw new Error(
1754
1749
  `Failed to analyze video content with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
1755
1750
  );
1756
1751
  }
1757
- if (!aiAnalysis) {
1752
+ if (!analysisResponse.result) {
1758
1753
  throw new Error(`Failed to analyze video content for asset ${assetId}`);
1759
1754
  }
1760
- if (!aiAnalysis.title) {
1755
+ if (!analysisResponse.result.title) {
1761
1756
  throw new Error(`Failed to generate title for asset ${assetId}`);
1762
1757
  }
1763
- if (!aiAnalysis.description) {
1758
+ if (!analysisResponse.result.description) {
1764
1759
  throw new Error(`Failed to generate description for asset ${assetId}`);
1765
1760
  }
1766
1761
  return {
1767
1762
  assetId,
1768
- title: aiAnalysis.title,
1769
- description: aiAnalysis.description,
1770
- tags: normalizeKeywords(aiAnalysis.keywords),
1771
- storyboardUrl: imageUrl
1763
+ title: analysisResponse.result.title,
1764
+ description: analysisResponse.result.description,
1765
+ tags: normalizeKeywords(analysisResponse.result.keywords),
1766
+ storyboardUrl: imageUrl,
1767
+ usage: analysisResponse.usage,
1768
+ transcriptText: transcriptText || void 0
1772
1769
  };
1773
1770
  }
1774
1771
 
1775
1772
  // src/workflows/translate-audio.ts
1776
- var import_client_s3 = require("@aws-sdk/client-s3");
1777
- var import_lib_storage = require("@aws-sdk/lib-storage");
1778
- var import_s3_request_presigner = require("@aws-sdk/s3-request-presigner");
1779
- var import_mux_node3 = __toESM(require("@mux/mux-node"));
1773
+ import Mux3 from "@mux/mux-node";
1774
+
1775
+ // src/lib/language-codes.ts
1776
+ var ISO639_1_TO_3 = {
1777
+ // Major world languages
1778
+ en: "eng",
1779
+ // English
1780
+ es: "spa",
1781
+ // Spanish
1782
+ fr: "fra",
1783
+ // French
1784
+ de: "deu",
1785
+ // German
1786
+ it: "ita",
1787
+ // Italian
1788
+ pt: "por",
1789
+ // Portuguese
1790
+ ru: "rus",
1791
+ // Russian
1792
+ zh: "zho",
1793
+ // Chinese
1794
+ ja: "jpn",
1795
+ // Japanese
1796
+ ko: "kor",
1797
+ // Korean
1798
+ ar: "ara",
1799
+ // Arabic
1800
+ hi: "hin",
1801
+ // Hindi
1802
+ // European languages
1803
+ nl: "nld",
1804
+ // Dutch
1805
+ pl: "pol",
1806
+ // Polish
1807
+ sv: "swe",
1808
+ // Swedish
1809
+ da: "dan",
1810
+ // Danish
1811
+ no: "nor",
1812
+ // Norwegian
1813
+ fi: "fin",
1814
+ // Finnish
1815
+ el: "ell",
1816
+ // Greek
1817
+ cs: "ces",
1818
+ // Czech
1819
+ hu: "hun",
1820
+ // Hungarian
1821
+ ro: "ron",
1822
+ // Romanian
1823
+ bg: "bul",
1824
+ // Bulgarian
1825
+ hr: "hrv",
1826
+ // Croatian
1827
+ sk: "slk",
1828
+ // Slovak
1829
+ sl: "slv",
1830
+ // Slovenian
1831
+ uk: "ukr",
1832
+ // Ukrainian
1833
+ tr: "tur",
1834
+ // Turkish
1835
+ // Asian languages
1836
+ th: "tha",
1837
+ // Thai
1838
+ vi: "vie",
1839
+ // Vietnamese
1840
+ id: "ind",
1841
+ // Indonesian
1842
+ ms: "msa",
1843
+ // Malay
1844
+ tl: "tgl",
1845
+ // Tagalog/Filipino
1846
+ // Other languages
1847
+ he: "heb",
1848
+ // Hebrew
1849
+ fa: "fas",
1850
+ // Persian/Farsi
1851
+ bn: "ben",
1852
+ // Bengali
1853
+ ta: "tam",
1854
+ // Tamil
1855
+ te: "tel",
1856
+ // Telugu
1857
+ mr: "mar",
1858
+ // Marathi
1859
+ gu: "guj",
1860
+ // Gujarati
1861
+ kn: "kan",
1862
+ // Kannada
1863
+ ml: "mal",
1864
+ // Malayalam
1865
+ pa: "pan",
1866
+ // Punjabi
1867
+ ur: "urd",
1868
+ // Urdu
1869
+ sw: "swa",
1870
+ // Swahili
1871
+ af: "afr",
1872
+ // Afrikaans
1873
+ ca: "cat",
1874
+ // Catalan
1875
+ eu: "eus",
1876
+ // Basque
1877
+ gl: "glg",
1878
+ // Galician
1879
+ is: "isl",
1880
+ // Icelandic
1881
+ et: "est",
1882
+ // Estonian
1883
+ lv: "lav",
1884
+ // Latvian
1885
+ lt: "lit"
1886
+ // Lithuanian
1887
+ };
1888
+ var ISO639_3_TO_1 = Object.fromEntries(
1889
+ Object.entries(ISO639_1_TO_3).map(([iso1, iso3]) => [iso3, iso1])
1890
+ );
1891
+ function toISO639_3(code) {
1892
+ const normalized = code.toLowerCase().trim();
1893
+ if (normalized.length === 3) {
1894
+ return normalized;
1895
+ }
1896
+ return ISO639_1_TO_3[normalized] ?? normalized;
1897
+ }
1898
+ function toISO639_1(code) {
1899
+ const normalized = code.toLowerCase().trim();
1900
+ if (normalized.length === 2) {
1901
+ return normalized;
1902
+ }
1903
+ return ISO639_3_TO_1[normalized] ?? normalized;
1904
+ }
1905
+ function getLanguageCodePair(code) {
1906
+ const normalized = code.toLowerCase().trim();
1907
+ if (normalized.length === 2) {
1908
+ return {
1909
+ iso639_1: normalized,
1910
+ iso639_3: toISO639_3(normalized)
1911
+ };
1912
+ } else if (normalized.length === 3) {
1913
+ return {
1914
+ iso639_1: toISO639_1(normalized),
1915
+ iso639_3: normalized
1916
+ };
1917
+ }
1918
+ return {
1919
+ iso639_1: normalized,
1920
+ iso639_3: normalized
1921
+ };
1922
+ }
1923
+ function getLanguageName(code) {
1924
+ const iso639_1 = toISO639_1(code);
1925
+ try {
1926
+ const displayNames = new Intl.DisplayNames(["en"], { type: "language" });
1927
+ return displayNames.of(iso639_1) ?? code.toUpperCase();
1928
+ } catch {
1929
+ return code.toUpperCase();
1930
+ }
1931
+ }
1932
+
1933
+ // src/workflows/translate-audio.ts
1780
1934
  var STATIC_RENDITION_POLL_INTERVAL_MS = 5e3;
1781
1935
  var STATIC_RENDITION_MAX_ATTEMPTS = 36;
1782
- var delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
1936
+ async function sleep(ms) {
1937
+ "use step";
1938
+ await new Promise((resolve) => setTimeout(resolve, ms));
1939
+ }
1783
1940
  function getReadyAudioStaticRendition(asset) {
1784
1941
  const files = asset.static_renditions?.files;
1785
1942
  if (!files || files.length === 0) {
@@ -1790,19 +1947,21 @@ function getReadyAudioStaticRendition(asset) {
1790
1947
  );
1791
1948
  }
1792
1949
  var hasReadyAudioStaticRendition = (asset) => Boolean(getReadyAudioStaticRendition(asset));
1793
- async function requestStaticRenditionCreation(muxClient, assetId) {
1794
- console.log("\u{1F4FC} Requesting static rendition from Mux...");
1950
+ async function requestStaticRenditionCreation(credentials, assetId) {
1951
+ "use step";
1952
+ const mux = new Mux3({
1953
+ tokenId: credentials.muxTokenId,
1954
+ tokenSecret: credentials.muxTokenSecret
1955
+ });
1795
1956
  try {
1796
- await muxClient.video.assets.createStaticRendition(assetId, {
1957
+ await mux.video.assets.createStaticRendition(assetId, {
1797
1958
  resolution: "audio-only"
1798
1959
  });
1799
- console.log("\u{1F4FC} Static rendition request accepted by Mux.");
1800
1960
  } catch (error) {
1801
1961
  const statusCode = error?.status ?? error?.statusCode;
1802
1962
  const messages = error?.error?.messages;
1803
1963
  const alreadyDefined = messages?.some((message2) => message2.toLowerCase().includes("already defined")) ?? error?.message?.toLowerCase().includes("already defined");
1804
1964
  if (statusCode === 409 || alreadyDefined) {
1805
- console.log("\u2139\uFE0F Static rendition already requested. Waiting for it to finish...");
1806
1965
  return;
1807
1966
  }
1808
1967
  const message = error instanceof Error ? error.message : "Unknown error";
@@ -1811,31 +1970,34 @@ async function requestStaticRenditionCreation(muxClient, assetId) {
1811
1970
  }
1812
1971
  async function waitForAudioStaticRendition({
1813
1972
  assetId,
1814
- muxClient,
1973
+ credentials,
1815
1974
  initialAsset
1816
1975
  }) {
1976
+ "use step";
1977
+ const mux = new Mux3({
1978
+ tokenId: credentials.muxTokenId,
1979
+ tokenSecret: credentials.muxTokenSecret
1980
+ });
1817
1981
  let currentAsset = initialAsset;
1818
1982
  if (hasReadyAudioStaticRendition(currentAsset)) {
1819
1983
  return currentAsset;
1820
1984
  }
1821
1985
  const status = currentAsset.static_renditions?.status ?? "not_requested";
1822
1986
  if (status === "not_requested" || status === void 0) {
1823
- await requestStaticRenditionCreation(muxClient, assetId);
1987
+ await requestStaticRenditionCreation(credentials, assetId);
1824
1988
  } else if (status === "errored") {
1825
- console.log("\u26A0\uFE0F Previous static rendition request errored. Creating a new one...");
1826
- await requestStaticRenditionCreation(muxClient, assetId);
1989
+ await requestStaticRenditionCreation(credentials, assetId);
1827
1990
  } else {
1828
- console.log(`\u2139\uFE0F Static rendition already ${status}. Waiting for it to finish...`);
1991
+ console.warn(`\u2139\uFE0F Static rendition already ${status}. Waiting for it to finish...`);
1829
1992
  }
1830
1993
  for (let attempt = 1; attempt <= STATIC_RENDITION_MAX_ATTEMPTS; attempt++) {
1831
- await delay(STATIC_RENDITION_POLL_INTERVAL_MS);
1832
- currentAsset = await muxClient.video.assets.retrieve(assetId);
1994
+ await sleep(STATIC_RENDITION_POLL_INTERVAL_MS);
1995
+ currentAsset = await mux.video.assets.retrieve(assetId);
1833
1996
  if (hasReadyAudioStaticRendition(currentAsset)) {
1834
- console.log("\u2705 Audio static rendition is ready!");
1835
1997
  return currentAsset;
1836
1998
  }
1837
1999
  const currentStatus = currentAsset.static_renditions?.status || "unknown";
1838
- console.log(
2000
+ console.warn(
1839
2001
  `\u231B Waiting for static rendition (attempt ${attempt}/${STATIC_RENDITION_MAX_ATTEMPTS}) \u2192 ${currentStatus}`
1840
2002
  );
1841
2003
  if (currentStatus === "errored") {
@@ -1848,55 +2010,180 @@ async function waitForAudioStaticRendition({
1848
2010
  "Timed out waiting for the static rendition to become ready. Please try again in a moment."
1849
2011
  );
1850
2012
  }
2013
+ async function fetchAudioFromMux(audioUrl) {
2014
+ "use step";
2015
+ const audioResponse = await fetch(audioUrl);
2016
+ if (!audioResponse.ok) {
2017
+ throw new Error(`Failed to fetch audio file: ${audioResponse.statusText}`);
2018
+ }
2019
+ return audioResponse.arrayBuffer();
2020
+ }
2021
+ async function createElevenLabsDubbingJob({
2022
+ audioBuffer,
2023
+ assetId,
2024
+ elevenLabsLangCode,
2025
+ elevenLabsApiKey,
2026
+ numSpeakers
2027
+ }) {
2028
+ "use step";
2029
+ const audioBlob = new Blob([audioBuffer], { type: "audio/mp4" });
2030
+ const formData = new FormData();
2031
+ formData.append("file", audioBlob);
2032
+ formData.append("target_lang", elevenLabsLangCode);
2033
+ formData.append("num_speakers", numSpeakers.toString());
2034
+ formData.append("name", `Mux Asset ${assetId} - auto to ${elevenLabsLangCode}`);
2035
+ const dubbingResponse = await fetch("https://api.elevenlabs.io/v1/dubbing", {
2036
+ method: "POST",
2037
+ headers: {
2038
+ "xi-api-key": elevenLabsApiKey
2039
+ },
2040
+ body: formData
2041
+ });
2042
+ if (!dubbingResponse.ok) {
2043
+ throw new Error(`ElevenLabs API error: ${dubbingResponse.statusText}`);
2044
+ }
2045
+ const dubbingData = await dubbingResponse.json();
2046
+ return dubbingData.dubbing_id;
2047
+ }
2048
+ async function checkElevenLabsDubbingStatus({
2049
+ dubbingId,
2050
+ elevenLabsApiKey
2051
+ }) {
2052
+ "use step";
2053
+ const statusResponse = await fetch(`https://api.elevenlabs.io/v1/dubbing/${dubbingId}`, {
2054
+ headers: {
2055
+ "xi-api-key": elevenLabsApiKey
2056
+ }
2057
+ });
2058
+ if (!statusResponse.ok) {
2059
+ throw new Error(`Status check failed: ${statusResponse.statusText}`);
2060
+ }
2061
+ const statusData = await statusResponse.json();
2062
+ return {
2063
+ status: statusData.status,
2064
+ targetLanguages: statusData.target_languages ?? []
2065
+ };
2066
+ }
2067
+ async function downloadDubbedAudioFromElevenLabs({
2068
+ dubbingId,
2069
+ languageCode,
2070
+ elevenLabsApiKey
2071
+ }) {
2072
+ "use step";
2073
+ const audioUrl = `https://api.elevenlabs.io/v1/dubbing/${dubbingId}/audio/${languageCode}`;
2074
+ const audioResponse = await fetch(audioUrl, {
2075
+ headers: {
2076
+ "xi-api-key": elevenLabsApiKey
2077
+ }
2078
+ });
2079
+ if (!audioResponse.ok) {
2080
+ throw new Error(`Failed to fetch dubbed audio: ${audioResponse.statusText}`);
2081
+ }
2082
+ return audioResponse.arrayBuffer();
2083
+ }
2084
+ async function uploadDubbedAudioToS3({
2085
+ dubbedAudioBuffer,
2086
+ assetId,
2087
+ toLanguageCode,
2088
+ s3Endpoint,
2089
+ s3Region,
2090
+ s3Bucket,
2091
+ s3AccessKeyId,
2092
+ s3SecretAccessKey
2093
+ }) {
2094
+ "use step";
2095
+ const { S3Client, GetObjectCommand } = await import("@aws-sdk/client-s3");
2096
+ const { Upload } = await import("@aws-sdk/lib-storage");
2097
+ const { getSignedUrl } = await import("@aws-sdk/s3-request-presigner");
2098
+ const s3Client = new S3Client({
2099
+ region: s3Region,
2100
+ endpoint: s3Endpoint,
2101
+ credentials: {
2102
+ accessKeyId: s3AccessKeyId,
2103
+ secretAccessKey: s3SecretAccessKey
2104
+ },
2105
+ forcePathStyle: true
2106
+ });
2107
+ const audioKey = `audio-translations/${assetId}/auto-to-${toLanguageCode}-${Date.now()}.m4a`;
2108
+ const upload = new Upload({
2109
+ client: s3Client,
2110
+ params: {
2111
+ Bucket: s3Bucket,
2112
+ Key: audioKey,
2113
+ Body: new Uint8Array(dubbedAudioBuffer),
2114
+ ContentType: "audio/mp4"
2115
+ }
2116
+ });
2117
+ await upload.done();
2118
+ const getObjectCommand = new GetObjectCommand({
2119
+ Bucket: s3Bucket,
2120
+ Key: audioKey
2121
+ });
2122
+ const presignedUrl = await getSignedUrl(s3Client, getObjectCommand, {
2123
+ expiresIn: 3600
2124
+ // 1 hour
2125
+ });
2126
+ console.warn(`\u2705 Audio uploaded successfully to: ${audioKey}`);
2127
+ console.warn(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
2128
+ return presignedUrl;
2129
+ }
2130
+ async function createAudioTrackOnMux(credentials, assetId, languageCode, presignedUrl) {
2131
+ "use step";
2132
+ const mux = new Mux3({
2133
+ tokenId: credentials.muxTokenId,
2134
+ tokenSecret: credentials.muxTokenSecret
2135
+ });
2136
+ const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(languageCode) || languageCode.toUpperCase();
2137
+ const trackName = `${languageName} (auto-dubbed)`;
2138
+ const trackResponse = await mux.video.assets.createTrack(assetId, {
2139
+ type: "audio",
2140
+ language_code: languageCode,
2141
+ name: trackName,
2142
+ url: presignedUrl
2143
+ });
2144
+ if (!trackResponse.id) {
2145
+ throw new Error("Failed to create audio track: no track ID returned from Mux");
2146
+ }
2147
+ return trackResponse.id;
2148
+ }
1851
2149
  async function translateAudio(assetId, toLanguageCode, options = {}) {
2150
+ "use workflow";
1852
2151
  const {
1853
2152
  provider = "elevenlabs",
1854
2153
  numSpeakers = 0,
1855
2154
  // 0 = auto-detect
1856
- muxTokenId,
1857
- muxTokenSecret,
1858
2155
  elevenLabsApiKey,
1859
2156
  uploadToMux = true
1860
2157
  } = options;
1861
2158
  if (provider !== "elevenlabs") {
1862
2159
  throw new Error("Only ElevenLabs provider is currently supported for audio translation");
1863
2160
  }
1864
- const muxId = muxTokenId ?? env_default.MUX_TOKEN_ID;
1865
- const muxSecret = muxTokenSecret ?? env_default.MUX_TOKEN_SECRET;
2161
+ const credentials = await validateCredentials(options);
1866
2162
  const elevenLabsKey = elevenLabsApiKey ?? env_default.ELEVENLABS_API_KEY;
1867
2163
  const s3Endpoint = options.s3Endpoint ?? env_default.S3_ENDPOINT;
1868
2164
  const s3Region = options.s3Region ?? env_default.S3_REGION ?? "auto";
1869
2165
  const s3Bucket = options.s3Bucket ?? env_default.S3_BUCKET;
1870
2166
  const s3AccessKeyId = options.s3AccessKeyId ?? env_default.S3_ACCESS_KEY_ID;
1871
2167
  const s3SecretAccessKey = options.s3SecretAccessKey ?? env_default.S3_SECRET_ACCESS_KEY;
1872
- if (!muxId || !muxSecret) {
1873
- throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
1874
- }
1875
2168
  if (!elevenLabsKey) {
1876
2169
  throw new Error("ElevenLabs API key is required. Provide elevenLabsApiKey in options or set ELEVENLABS_API_KEY environment variable.");
1877
2170
  }
1878
2171
  if (uploadToMux && (!s3Endpoint || !s3Bucket || !s3AccessKeyId || !s3SecretAccessKey)) {
1879
2172
  throw new Error("S3 configuration is required for uploading to Mux. Provide s3Endpoint, s3Bucket, s3AccessKeyId, and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables.");
1880
2173
  }
1881
- const mux = new import_mux_node3.default({
1882
- tokenId: muxId,
1883
- tokenSecret: muxSecret
1884
- });
1885
- console.log(`\u{1F3AC} Fetching Mux asset: ${assetId}`);
1886
- const { asset: initialAsset, playbackId, policy } = await getPlaybackIdForAsset(mux, assetId);
1887
- const signingContext = resolveSigningContext(options);
2174
+ const { asset: initialAsset, playbackId, policy } = await getPlaybackIdForAsset(credentials, assetId);
2175
+ const signingContext = await resolveSigningContext(options);
1888
2176
  if (policy === "signed" && !signingContext) {
1889
2177
  throw new Error(
1890
2178
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
1891
2179
  );
1892
2180
  }
1893
- console.log("\u{1F50D} Checking for audio-only static rendition...");
1894
2181
  let currentAsset = initialAsset;
1895
2182
  if (!hasReadyAudioStaticRendition(currentAsset)) {
1896
- console.log("\u274C No ready audio static rendition found. Requesting one now...");
2183
+ console.warn("\u274C No ready audio static rendition found. Requesting one now...");
1897
2184
  currentAsset = await waitForAudioStaticRendition({
1898
2185
  assetId,
1899
- muxClient: mux,
2186
+ credentials,
1900
2187
  initialAsset: currentAsset
1901
2188
  });
1902
2189
  }
@@ -1910,58 +2197,44 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
1910
2197
  if (policy === "signed" && signingContext) {
1911
2198
  audioUrl = await signUrl(audioUrl, playbackId, signingContext, "video");
1912
2199
  }
1913
- console.log(`\u2705 Found audio rendition: ${audioUrl}`);
1914
- console.log(`\u{1F399}\uFE0F Creating ElevenLabs dubbing job (auto-detect \u2192 ${toLanguageCode})`);
2200
+ console.warn("\u{1F399}\uFE0F Fetching audio from Mux...");
2201
+ let audioBuffer;
2202
+ try {
2203
+ audioBuffer = await fetchAudioFromMux(audioUrl);
2204
+ } catch (error) {
2205
+ throw new Error(`Failed to fetch audio from Mux: ${error instanceof Error ? error.message : "Unknown error"}`);
2206
+ }
2207
+ console.warn("\u{1F399}\uFE0F Creating dubbing job in ElevenLabs...");
2208
+ const elevenLabsLangCode = toISO639_3(toLanguageCode);
2209
+ console.warn(`\u{1F50D} Creating dubbing job for asset ${assetId} with language code: ${elevenLabsLangCode}`);
1915
2210
  let dubbingId;
1916
2211
  try {
1917
- const audioResponse = await fetch(audioUrl);
1918
- if (!audioResponse.ok) {
1919
- throw new Error(`Failed to fetch audio file: ${audioResponse.statusText}`);
1920
- }
1921
- const audioBuffer = await audioResponse.arrayBuffer();
1922
- const audioBlob = new Blob([audioBuffer], { type: "audio/mp4" });
1923
- const audioFile = audioBlob;
1924
- const formData = new FormData();
1925
- formData.append("file", audioFile);
1926
- formData.append("target_lang", toLanguageCode);
1927
- formData.append("num_speakers", numSpeakers.toString());
1928
- formData.append("name", `Mux Asset ${assetId} - auto to ${toLanguageCode}`);
1929
- const dubbingResponse = await fetch("https://api.elevenlabs.io/v1/dubbing", {
1930
- method: "POST",
1931
- headers: {
1932
- "xi-api-key": elevenLabsKey
1933
- },
1934
- body: formData
2212
+ dubbingId = await createElevenLabsDubbingJob({
2213
+ audioBuffer,
2214
+ assetId,
2215
+ elevenLabsLangCode,
2216
+ elevenLabsApiKey: elevenLabsKey,
2217
+ numSpeakers
1935
2218
  });
1936
- if (!dubbingResponse.ok) {
1937
- throw new Error(`ElevenLabs API error: ${dubbingResponse.statusText}`);
1938
- }
1939
- const dubbingData = await dubbingResponse.json();
1940
- dubbingId = dubbingData.dubbing_id;
1941
- console.log(`\u2705 Dubbing job created: ${dubbingId}`);
1942
- console.log(`\u23F1\uFE0F Expected duration: ${dubbingData.expected_duration_sec}s`);
2219
+ console.warn(`\u2705 Dubbing job created with ID: ${dubbingId}`);
1943
2220
  } catch (error) {
1944
2221
  throw new Error(`Failed to create ElevenLabs dubbing job: ${error instanceof Error ? error.message : "Unknown error"}`);
1945
2222
  }
1946
- console.log("\u23F3 Waiting for dubbing to complete...");
2223
+ console.warn("\u23F3 Waiting for dubbing to complete...");
1947
2224
  let dubbingStatus = "dubbing";
1948
2225
  let pollAttempts = 0;
1949
2226
  const maxPollAttempts = 180;
2227
+ let targetLanguages = [];
1950
2228
  while (dubbingStatus === "dubbing" && pollAttempts < maxPollAttempts) {
1951
- await new Promise((resolve) => setTimeout(resolve, 1e4));
2229
+ await sleep(1e4);
1952
2230
  pollAttempts++;
1953
2231
  try {
1954
- const statusResponse = await fetch(`https://api.elevenlabs.io/v1/dubbing/${dubbingId}`, {
1955
- headers: {
1956
- "xi-api-key": elevenLabsKey
1957
- }
2232
+ const statusResult = await checkElevenLabsDubbingStatus({
2233
+ dubbingId,
2234
+ elevenLabsApiKey: elevenLabsKey
1958
2235
  });
1959
- if (!statusResponse.ok) {
1960
- throw new Error(`Status check failed: ${statusResponse.statusText}`);
1961
- }
1962
- const statusData = await statusResponse.json();
1963
- dubbingStatus = statusData.status;
1964
- console.log(`\u{1F4CA} Status check ${pollAttempts}: ${dubbingStatus}`);
2236
+ dubbingStatus = statusResult.status;
2237
+ targetLanguages = statusResult.targetLanguages;
1965
2238
  if (dubbingStatus === "failed") {
1966
2239
  throw new Error("ElevenLabs dubbing job failed");
1967
2240
  }
@@ -1972,89 +2245,77 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
1972
2245
  if (dubbingStatus !== "dubbed") {
1973
2246
  throw new Error(`Dubbing job timed out or failed. Final status: ${dubbingStatus}`);
1974
2247
  }
1975
- console.log("\u2705 Dubbing completed successfully!");
2248
+ console.warn("\u2705 Dubbing completed successfully!");
1976
2249
  if (!uploadToMux) {
2250
+ const targetLanguage2 = getLanguageCodePair(toLanguageCode);
1977
2251
  return {
1978
2252
  assetId,
1979
- targetLanguageCode: toLanguageCode,
2253
+ targetLanguageCode: targetLanguage2.iso639_1,
2254
+ targetLanguage: targetLanguage2,
1980
2255
  dubbingId
1981
2256
  };
1982
2257
  }
1983
- console.log("\u{1F4E5} Downloading dubbed audio from ElevenLabs...");
2258
+ console.warn("\u{1F4E5} Downloading dubbed audio from ElevenLabs...");
1984
2259
  let dubbedAudioBuffer;
1985
2260
  try {
1986
- const audioUrl2 = `https://api.elevenlabs.io/v1/dubbing/${dubbingId}/audio/${toLanguageCode}`;
1987
- const audioResponse = await fetch(audioUrl2, {
1988
- headers: {
1989
- "xi-api-key": elevenLabsKey
1990
- }
1991
- });
1992
- if (!audioResponse.ok) {
1993
- throw new Error(`Failed to fetch dubbed audio: ${audioResponse.statusText}`);
2261
+ const requestedLangCode = toISO639_3(toLanguageCode);
2262
+ let downloadLangCode = targetLanguages.find(
2263
+ (lang) => lang === requestedLangCode
2264
+ ) ?? targetLanguages.find(
2265
+ (lang) => lang.toLowerCase() === requestedLangCode.toLowerCase()
2266
+ );
2267
+ if (!downloadLangCode && targetLanguages.length > 0) {
2268
+ downloadLangCode = targetLanguages[0];
2269
+ console.warn(`\u26A0\uFE0F Requested language "${requestedLangCode}" not found in target_languages. Using "${downloadLangCode}" instead.`);
2270
+ }
2271
+ if (!downloadLangCode) {
2272
+ downloadLangCode = requestedLangCode;
2273
+ console.warn(`\u26A0\uFE0F No target_languages available from ElevenLabs status. Using requested language code: ${requestedLangCode}`);
1994
2274
  }
1995
- dubbedAudioBuffer = await audioResponse.arrayBuffer();
1996
- console.log(`\u2705 Downloaded dubbed audio (${dubbedAudioBuffer.byteLength} bytes)`);
2275
+ dubbedAudioBuffer = await downloadDubbedAudioFromElevenLabs({
2276
+ dubbingId,
2277
+ languageCode: downloadLangCode,
2278
+ elevenLabsApiKey: elevenLabsKey
2279
+ });
2280
+ console.warn("\u2705 Dubbed audio downloaded successfully!");
1997
2281
  } catch (error) {
1998
2282
  throw new Error(`Failed to download dubbed audio: ${error instanceof Error ? error.message : "Unknown error"}`);
1999
2283
  }
2000
- console.log("\u{1F4E4} Uploading dubbed audio to S3-compatible storage...");
2001
- const s3Client = new import_client_s3.S3Client({
2002
- region: s3Region,
2003
- endpoint: s3Endpoint,
2004
- credentials: {
2005
- accessKeyId: s3AccessKeyId,
2006
- secretAccessKey: s3SecretAccessKey
2007
- },
2008
- forcePathStyle: true
2009
- });
2010
- const audioKey = `audio-translations/${assetId}/auto-to-${toLanguageCode}-${Date.now()}.m4a`;
2284
+ console.warn("\u{1F4E4} Uploading dubbed audio to S3-compatible storage...");
2011
2285
  let presignedUrl;
2012
2286
  try {
2013
- const upload = new import_lib_storage.Upload({
2014
- client: s3Client,
2015
- params: {
2016
- Bucket: s3Bucket,
2017
- Key: audioKey,
2018
- Body: new Uint8Array(dubbedAudioBuffer),
2019
- ContentType: "audio/mp4"
2020
- }
2021
- });
2022
- await upload.done();
2023
- console.log(`\u2705 Audio uploaded successfully to: ${audioKey}`);
2024
- const getObjectCommand = new import_client_s3.GetObjectCommand({
2025
- Bucket: s3Bucket,
2026
- Key: audioKey
2027
- });
2028
- presignedUrl = await (0, import_s3_request_presigner.getSignedUrl)(s3Client, getObjectCommand, {
2029
- expiresIn: 3600
2030
- // 1 hour
2287
+ presignedUrl = await uploadDubbedAudioToS3({
2288
+ dubbedAudioBuffer,
2289
+ assetId,
2290
+ toLanguageCode,
2291
+ s3Endpoint,
2292
+ s3Region,
2293
+ s3Bucket,
2294
+ s3AccessKeyId,
2295
+ s3SecretAccessKey
2031
2296
  });
2032
- console.log(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
2033
2297
  } catch (error) {
2034
2298
  throw new Error(`Failed to upload audio to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
2035
2299
  }
2036
- console.log("\u{1F3AC} Adding translated audio track to Mux asset...");
2300
+ console.warn("\u{1F4F9} Adding dubbed audio track to Mux asset...");
2037
2301
  let uploadedTrackId;
2302
+ const muxLangCode = toISO639_1(toLanguageCode);
2038
2303
  try {
2039
- const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(toLanguageCode) || toLanguageCode.toUpperCase();
2304
+ uploadedTrackId = await createAudioTrackOnMux(credentials, assetId, muxLangCode, presignedUrl);
2305
+ const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(muxLangCode) || muxLangCode.toUpperCase();
2040
2306
  const trackName = `${languageName} (auto-dubbed)`;
2041
- const trackResponse = await mux.video.assets.createTrack(assetId, {
2042
- type: "audio",
2043
- language_code: toLanguageCode,
2044
- name: trackName,
2045
- url: presignedUrl
2046
- });
2047
- uploadedTrackId = trackResponse.id;
2048
- console.log(`\u2705 Audio track added to Mux asset with ID: ${uploadedTrackId}`);
2049
- console.log(`\u{1F3B5} Track name: "${trackName}"`);
2307
+ console.warn(`\u2705 Track added to Mux asset with ID: ${uploadedTrackId}`);
2308
+ console.warn(`\u{1F4CB} Track name: "${trackName}"`);
2050
2309
  } catch (error) {
2051
2310
  console.warn(`\u26A0\uFE0F Failed to add audio track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
2052
- console.log("\u{1F517} You can manually add the track using this presigned URL:");
2053
- console.log(presignedUrl);
2311
+ console.warn("\u{1F517} You can manually add the track using this presigned URL:");
2312
+ console.warn(presignedUrl);
2054
2313
  }
2314
+ const targetLanguage = getLanguageCodePair(toLanguageCode);
2055
2315
  return {
2056
2316
  assetId,
2057
- targetLanguageCode: toLanguageCode,
2317
+ targetLanguageCode: targetLanguage.iso639_1,
2318
+ targetLanguage,
2058
2319
  dubbingId,
2059
2320
  uploadedTrackId,
2060
2321
  presignedUrl
@@ -2062,43 +2323,149 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
2062
2323
  }
2063
2324
 
2064
2325
  // src/workflows/translate-captions.ts
2065
- var import_client_s32 = require("@aws-sdk/client-s3");
2066
- var import_lib_storage2 = require("@aws-sdk/lib-storage");
2067
- var import_s3_request_presigner2 = require("@aws-sdk/s3-request-presigner");
2068
- var import_ai5 = require("ai");
2069
- var import_zod5 = require("zod");
2070
- var translationSchema = import_zod5.z.object({
2071
- translation: import_zod5.z.string()
2326
+ import Mux4 from "@mux/mux-node";
2327
+ import { generateObject as generateObject4 } from "ai";
2328
+ import { z as z5 } from "zod";
2329
+ var translationSchema = z5.object({
2330
+ translation: z5.string()
2072
2331
  });
2073
- var DEFAULT_PROVIDER6 = "openai";
2332
+ async function fetchVttFromMux(vttUrl) {
2333
+ "use step";
2334
+ const vttResponse = await fetch(vttUrl);
2335
+ if (!vttResponse.ok) {
2336
+ throw new Error(`Failed to fetch VTT file: ${vttResponse.statusText}`);
2337
+ }
2338
+ return vttResponse.text();
2339
+ }
2340
+ async function translateVttWithAI({
2341
+ vttContent,
2342
+ fromLanguageCode,
2343
+ toLanguageCode,
2344
+ provider,
2345
+ modelId,
2346
+ credentials,
2347
+ abortSignal
2348
+ }) {
2349
+ "use step";
2350
+ const languageModel = createLanguageModelFromConfig(
2351
+ provider,
2352
+ modelId,
2353
+ credentials
2354
+ );
2355
+ const response = await generateObject4({
2356
+ model: languageModel,
2357
+ schema: translationSchema,
2358
+ abortSignal,
2359
+ messages: [
2360
+ {
2361
+ role: "user",
2362
+ content: `Translate the following VTT subtitle file from ${fromLanguageCode} to ${toLanguageCode}. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT.
2363
+
2364
+ ${vttContent}`
2365
+ }
2366
+ ]
2367
+ });
2368
+ return {
2369
+ translatedVtt: response.object.translation,
2370
+ usage: {
2371
+ inputTokens: response.usage.inputTokens,
2372
+ outputTokens: response.usage.outputTokens,
2373
+ totalTokens: response.usage.totalTokens,
2374
+ reasoningTokens: response.usage.reasoningTokens,
2375
+ cachedInputTokens: response.usage.cachedInputTokens
2376
+ }
2377
+ };
2378
+ }
2379
+ async function uploadVttToS3({
2380
+ translatedVtt,
2381
+ assetId,
2382
+ fromLanguageCode,
2383
+ toLanguageCode,
2384
+ s3Endpoint,
2385
+ s3Region,
2386
+ s3Bucket,
2387
+ s3AccessKeyId,
2388
+ s3SecretAccessKey
2389
+ }) {
2390
+ "use step";
2391
+ const { S3Client, GetObjectCommand } = await import("@aws-sdk/client-s3");
2392
+ const { Upload } = await import("@aws-sdk/lib-storage");
2393
+ const { getSignedUrl } = await import("@aws-sdk/s3-request-presigner");
2394
+ const s3Client = new S3Client({
2395
+ region: s3Region,
2396
+ endpoint: s3Endpoint,
2397
+ credentials: {
2398
+ accessKeyId: s3AccessKeyId,
2399
+ secretAccessKey: s3SecretAccessKey
2400
+ },
2401
+ forcePathStyle: true
2402
+ });
2403
+ const vttKey = `translations/${assetId}/${fromLanguageCode}-to-${toLanguageCode}-${Date.now()}.vtt`;
2404
+ const upload = new Upload({
2405
+ client: s3Client,
2406
+ params: {
2407
+ Bucket: s3Bucket,
2408
+ Key: vttKey,
2409
+ Body: translatedVtt,
2410
+ ContentType: "text/vtt"
2411
+ }
2412
+ });
2413
+ await upload.done();
2414
+ const getObjectCommand = new GetObjectCommand({
2415
+ Bucket: s3Bucket,
2416
+ Key: vttKey
2417
+ });
2418
+ const presignedUrl = await getSignedUrl(s3Client, getObjectCommand, {
2419
+ expiresIn: 3600
2420
+ // 1 hour
2421
+ });
2422
+ return presignedUrl;
2423
+ }
2424
+ async function createTextTrackOnMux(credentials, assetId, languageCode, trackName, presignedUrl) {
2425
+ "use step";
2426
+ const mux = new Mux4({
2427
+ tokenId: credentials.muxTokenId,
2428
+ tokenSecret: credentials.muxTokenSecret
2429
+ });
2430
+ const trackResponse = await mux.video.assets.createTrack(assetId, {
2431
+ type: "text",
2432
+ text_type: "subtitles",
2433
+ language_code: languageCode,
2434
+ name: trackName,
2435
+ url: presignedUrl
2436
+ });
2437
+ if (!trackResponse.id) {
2438
+ throw new Error("Failed to create text track: no track ID returned from Mux");
2439
+ }
2440
+ return trackResponse.id;
2441
+ }
2074
2442
  async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, options) {
2443
+ "use workflow";
2075
2444
  const {
2076
- provider = DEFAULT_PROVIDER6,
2445
+ provider = "openai",
2077
2446
  model,
2078
2447
  s3Endpoint: providedS3Endpoint,
2079
2448
  s3Region: providedS3Region,
2080
2449
  s3Bucket: providedS3Bucket,
2081
2450
  s3AccessKeyId: providedS3AccessKeyId,
2082
2451
  s3SecretAccessKey: providedS3SecretAccessKey,
2083
- uploadToMux: uploadToMuxOption,
2084
- ...clientConfig
2452
+ uploadToMux: uploadToMuxOption
2085
2453
  } = options;
2086
- const resolvedProvider = provider;
2087
2454
  const s3Endpoint = providedS3Endpoint ?? env_default.S3_ENDPOINT;
2088
2455
  const s3Region = providedS3Region ?? env_default.S3_REGION ?? "auto";
2089
2456
  const s3Bucket = providedS3Bucket ?? env_default.S3_BUCKET;
2090
2457
  const s3AccessKeyId = providedS3AccessKeyId ?? env_default.S3_ACCESS_KEY_ID;
2091
2458
  const s3SecretAccessKey = providedS3SecretAccessKey ?? env_default.S3_SECRET_ACCESS_KEY;
2092
2459
  const uploadToMux = uploadToMuxOption !== false;
2093
- const clients = createWorkflowClients(
2094
- { ...clientConfig, provider: resolvedProvider, model },
2095
- resolvedProvider
2460
+ const config = await createWorkflowConfig(
2461
+ { ...options, model },
2462
+ provider
2096
2463
  );
2097
2464
  if (uploadToMux && (!s3Endpoint || !s3Bucket || !s3AccessKeyId || !s3SecretAccessKey)) {
2098
2465
  throw new Error("S3 configuration is required for uploading to Mux. Provide s3Endpoint, s3Bucket, s3AccessKeyId, and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables.");
2099
2466
  }
2100
- const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(clients.mux, assetId);
2101
- const signingContext = resolveSigningContext(options);
2467
+ const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(config.credentials, assetId);
2468
+ const signingContext = await resolveSigningContext(options);
2102
2469
  if (policy === "signed" && !signingContext) {
2103
2470
  throw new Error(
2104
2471
  "Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
@@ -2119,120 +2486,84 @@ async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, opti
2119
2486
  }
2120
2487
  let vttContent;
2121
2488
  try {
2122
- const vttResponse = await fetch(vttUrl);
2123
- if (!vttResponse.ok) {
2124
- throw new Error(`Failed to fetch VTT file: ${vttResponse.statusText}`);
2125
- }
2126
- vttContent = await vttResponse.text();
2489
+ vttContent = await fetchVttFromMux(vttUrl);
2127
2490
  } catch (error) {
2128
2491
  throw new Error(`Failed to fetch VTT content: ${error instanceof Error ? error.message : "Unknown error"}`);
2129
2492
  }
2130
- console.log(`\u2705 Found VTT content for language '${fromLanguageCode}'`);
2131
2493
  let translatedVtt;
2494
+ let usage;
2132
2495
  try {
2133
- const response = await (0, import_ai5.generateObject)({
2134
- model: clients.languageModel.model,
2135
- schema: translationSchema,
2136
- abortSignal: options.abortSignal,
2137
- messages: [
2138
- {
2139
- role: "user",
2140
- content: `Translate the following VTT subtitle file from ${fromLanguageCode} to ${toLanguageCode}. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT.
2141
-
2142
- ${vttContent}`
2143
- }
2144
- ]
2496
+ const result = await translateVttWithAI({
2497
+ vttContent,
2498
+ fromLanguageCode,
2499
+ toLanguageCode,
2500
+ provider: config.provider,
2501
+ modelId: config.modelId,
2502
+ credentials: config.credentials,
2503
+ abortSignal: options.abortSignal
2145
2504
  });
2146
- translatedVtt = response.object.translation;
2505
+ translatedVtt = result.translatedVtt;
2506
+ usage = result.usage;
2147
2507
  } catch (error) {
2148
- throw new Error(`Failed to translate VTT with ${resolvedProvider}: ${error instanceof Error ? error.message : "Unknown error"}`);
2508
+ throw new Error(`Failed to translate VTT with ${config.provider}: ${error instanceof Error ? error.message : "Unknown error"}`);
2149
2509
  }
2150
- console.log(`
2151
- \u2705 Translation completed successfully!`);
2510
+ const sourceLanguage = getLanguageCodePair(fromLanguageCode);
2511
+ const targetLanguage = getLanguageCodePair(toLanguageCode);
2152
2512
  if (!uploadToMux) {
2153
- console.log(`\u2705 VTT translated to ${toLanguageCode} successfully!`);
2154
2513
  return {
2155
2514
  assetId,
2156
2515
  sourceLanguageCode: fromLanguageCode,
2157
2516
  targetLanguageCode: toLanguageCode,
2517
+ sourceLanguage,
2518
+ targetLanguage,
2158
2519
  originalVtt: vttContent,
2159
- translatedVtt
2520
+ translatedVtt,
2521
+ usage
2160
2522
  };
2161
2523
  }
2162
- console.log("\u{1F4E4} Uploading translated VTT to S3-compatible storage...");
2163
- const s3Client = new import_client_s32.S3Client({
2164
- region: s3Region,
2165
- endpoint: s3Endpoint,
2166
- credentials: {
2167
- accessKeyId: s3AccessKeyId,
2168
- secretAccessKey: s3SecretAccessKey
2169
- },
2170
- forcePathStyle: true
2171
- // Often needed for non-AWS S3 services
2172
- });
2173
- const vttKey = `translations/${assetId}/${fromLanguageCode}-to-${toLanguageCode}-${Date.now()}.vtt`;
2174
2524
  let presignedUrl;
2175
2525
  try {
2176
- const upload = new import_lib_storage2.Upload({
2177
- client: s3Client,
2178
- params: {
2179
- Bucket: s3Bucket,
2180
- Key: vttKey,
2181
- Body: translatedVtt,
2182
- ContentType: "text/vtt"
2183
- }
2184
- });
2185
- await upload.done();
2186
- console.log(`\u2705 VTT uploaded successfully to: ${vttKey}`);
2187
- const getObjectCommand = new import_client_s32.GetObjectCommand({
2188
- Bucket: s3Bucket,
2189
- Key: vttKey
2190
- });
2191
- presignedUrl = await (0, import_s3_request_presigner2.getSignedUrl)(s3Client, getObjectCommand, {
2192
- expiresIn: 3600
2193
- // 1 hour
2526
+ presignedUrl = await uploadVttToS3({
2527
+ translatedVtt,
2528
+ assetId,
2529
+ fromLanguageCode,
2530
+ toLanguageCode,
2531
+ s3Endpoint,
2532
+ s3Region,
2533
+ s3Bucket,
2534
+ s3AccessKeyId,
2535
+ s3SecretAccessKey
2194
2536
  });
2195
- console.log(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
2196
2537
  } catch (error) {
2197
2538
  throw new Error(`Failed to upload VTT to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
2198
2539
  }
2199
- console.log("\u{1F4F9} Adding translated track to Mux asset...");
2200
2540
  let uploadedTrackId;
2201
2541
  try {
2202
- const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(toLanguageCode) || toLanguageCode.toUpperCase();
2542
+ const languageName = getLanguageName(toLanguageCode);
2203
2543
  const trackName = `${languageName} (auto-translated)`;
2204
- const trackResponse = await clients.mux.video.assets.createTrack(assetId, {
2205
- type: "text",
2206
- text_type: "subtitles",
2207
- language_code: toLanguageCode,
2208
- name: trackName,
2209
- url: presignedUrl
2210
- });
2211
- uploadedTrackId = trackResponse.id;
2212
- console.log(`\u2705 Track added to Mux asset with ID: ${uploadedTrackId}`);
2213
- console.log(`\u{1F4CB} Track name: "${trackName}"`);
2544
+ uploadedTrackId = await createTextTrackOnMux(config.credentials, assetId, toLanguageCode, trackName, presignedUrl);
2214
2545
  } catch (error) {
2215
- console.warn(`\u26A0\uFE0F Failed to add track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
2216
- console.log("\u{1F517} You can manually add the track using this presigned URL:");
2217
- console.log(presignedUrl);
2546
+ console.warn(`Failed to add track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
2218
2547
  }
2219
2548
  return {
2220
2549
  assetId,
2221
2550
  sourceLanguageCode: fromLanguageCode,
2222
2551
  targetLanguageCode: toLanguageCode,
2552
+ sourceLanguage,
2553
+ targetLanguage,
2223
2554
  originalVtt: vttContent,
2224
2555
  translatedVtt,
2225
2556
  uploadedTrackId,
2226
- presignedUrl
2557
+ presignedUrl,
2558
+ usage
2227
2559
  };
2228
2560
  }
2229
2561
 
2230
2562
  // src/index.ts
2231
2563
  var version = "0.1.0";
2232
- // Annotate the CommonJS export names for ESM import in node:
2233
- 0 && (module.exports = {
2234
- primitives,
2564
+ export {
2565
+ primitives_exports as primitives,
2235
2566
  version,
2236
- workflows
2237
- });
2567
+ workflows_exports as workflows
2568
+ };
2238
2569
  //# sourceMappingURL=index.js.map