@mux/ai 0.1.5 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +226 -821
- package/dist/{index-Bnv7tv90.d.ts → index-CMZYZcj6.d.ts} +122 -3
- package/dist/index.d.ts +1 -1
- package/dist/index.js +955 -624
- package/dist/index.js.map +1 -1
- package/dist/primitives/index.js +18 -71
- package/dist/primitives/index.js.map +1 -1
- package/dist/workflows/index.d.ts +1 -1
- package/dist/workflows/index.js +953 -638
- package/dist/workflows/index.js.map +1 -1
- package/package.json +21 -23
- package/dist/index-BNnz9P_5.d.mts +0 -144
- package/dist/index-vJ5r2FNm.d.mts +0 -477
- package/dist/index.d.mts +0 -13
- package/dist/index.mjs +0 -2205
- package/dist/index.mjs.map +0 -1
- package/dist/primitives/index.d.mts +0 -3
- package/dist/primitives/index.mjs +0 -358
- package/dist/primitives/index.mjs.map +0 -1
- package/dist/types-ktXDZ93V.d.mts +0 -137
- package/dist/workflows/index.d.mts +0 -8
- package/dist/workflows/index.mjs +0 -2168
- package/dist/workflows/index.mjs.map +0 -1
package/dist/index.js
CHANGED
|
@@ -1,40 +1,8 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __create = Object.create;
|
|
3
1
|
var __defProp = Object.defineProperty;
|
|
4
|
-
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
-
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
-
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
2
|
var __export = (target, all) => {
|
|
9
3
|
for (var name in all)
|
|
10
4
|
__defProp(target, name, { get: all[name], enumerable: true });
|
|
11
5
|
};
|
|
12
|
-
var __copyProps = (to, from, except, desc) => {
|
|
13
|
-
if (from && typeof from === "object" || typeof from === "function") {
|
|
14
|
-
for (let key of __getOwnPropNames(from))
|
|
15
|
-
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
16
|
-
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
17
|
-
}
|
|
18
|
-
return to;
|
|
19
|
-
};
|
|
20
|
-
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
-
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
-
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
-
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
-
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
-
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
-
mod
|
|
27
|
-
));
|
|
28
|
-
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
|
-
|
|
30
|
-
// src/index.ts
|
|
31
|
-
var index_exports = {};
|
|
32
|
-
__export(index_exports, {
|
|
33
|
-
primitives: () => primitives_exports,
|
|
34
|
-
version: () => version,
|
|
35
|
-
workflows: () => workflows_exports
|
|
36
|
-
});
|
|
37
|
-
module.exports = __toCommonJS(index_exports);
|
|
38
6
|
|
|
39
7
|
// src/primitives/index.ts
|
|
40
8
|
var primitives_exports = {};
|
|
@@ -57,33 +25,25 @@ __export(primitives_exports, {
|
|
|
57
25
|
});
|
|
58
26
|
|
|
59
27
|
// src/lib/url-signing.ts
|
|
60
|
-
|
|
28
|
+
import Mux from "@mux/mux-node";
|
|
61
29
|
|
|
62
30
|
// src/env.ts
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
var import_dotenv_expand = require("dotenv-expand");
|
|
66
|
-
var import_zod = require("zod");
|
|
67
|
-
(0, import_dotenv_expand.expand)((0, import_dotenv.config)({
|
|
68
|
-
path: import_node_path.default.resolve(
|
|
69
|
-
process.cwd(),
|
|
70
|
-
process.env.NODE_ENV === "test" ? ".env.test" : ".env"
|
|
71
|
-
)
|
|
72
|
-
}));
|
|
31
|
+
import { z } from "zod";
|
|
32
|
+
import "dotenv/config";
|
|
73
33
|
function optionalString(description, message) {
|
|
74
|
-
return
|
|
34
|
+
return z.preprocess(
|
|
75
35
|
(value) => typeof value === "string" && value.trim().length === 0 ? void 0 : value,
|
|
76
|
-
|
|
36
|
+
z.string().trim().min(1, message).optional()
|
|
77
37
|
).describe(description);
|
|
78
38
|
}
|
|
79
39
|
function requiredString(description, message) {
|
|
80
|
-
return
|
|
40
|
+
return z.preprocess(
|
|
81
41
|
(value) => typeof value === "string" ? value.trim().length > 0 ? value.trim() : void 0 : value,
|
|
82
|
-
|
|
42
|
+
z.string().trim().min(1, message)
|
|
83
43
|
).describe(description);
|
|
84
44
|
}
|
|
85
|
-
var EnvSchema =
|
|
86
|
-
NODE_ENV:
|
|
45
|
+
var EnvSchema = z.object({
|
|
46
|
+
NODE_ENV: z.string().default("development").describe("Runtime environment."),
|
|
87
47
|
MUX_TOKEN_ID: requiredString("Mux access token ID.", "Required to access Mux APIs"),
|
|
88
48
|
MUX_TOKEN_SECRET: requiredString("Mux access token secret.", "Required to access Mux APIs"),
|
|
89
49
|
MUX_SIGNING_KEY: optionalString("Mux signing key ID for signed playback URLs.", "Used to sign playback URLs"),
|
|
@@ -112,16 +72,17 @@ var env = parseEnv();
|
|
|
112
72
|
var env_default = env;
|
|
113
73
|
|
|
114
74
|
// src/lib/url-signing.ts
|
|
115
|
-
function resolveSigningContext(
|
|
116
|
-
|
|
117
|
-
const
|
|
75
|
+
async function resolveSigningContext(config) {
|
|
76
|
+
"use step";
|
|
77
|
+
const keyId = config.muxSigningKey ?? env_default.MUX_SIGNING_KEY;
|
|
78
|
+
const keySecret = config.muxPrivateKey ?? env_default.MUX_PRIVATE_KEY;
|
|
118
79
|
if (!keyId || !keySecret) {
|
|
119
80
|
return void 0;
|
|
120
81
|
}
|
|
121
82
|
return { keyId, keySecret };
|
|
122
83
|
}
|
|
123
84
|
function createSigningClient(context) {
|
|
124
|
-
return new
|
|
85
|
+
return new Mux({
|
|
125
86
|
// These are not needed for signing, but the SDK requires them
|
|
126
87
|
// Using empty strings as we only need the jwt functionality
|
|
127
88
|
tokenId: env_default.MUX_TOKEN_ID || "",
|
|
@@ -131,6 +92,7 @@ function createSigningClient(context) {
|
|
|
131
92
|
});
|
|
132
93
|
}
|
|
133
94
|
async function signPlaybackId(playbackId, context, type = "video", params) {
|
|
95
|
+
"use step";
|
|
134
96
|
const client = createSigningClient(context);
|
|
135
97
|
const stringParams = params ? Object.fromEntries(
|
|
136
98
|
Object.entries(params).map(([key, value]) => [key, String(value)])
|
|
@@ -142,6 +104,7 @@ async function signPlaybackId(playbackId, context, type = "video", params) {
|
|
|
142
104
|
});
|
|
143
105
|
}
|
|
144
106
|
async function signUrl(url, playbackId, context, type = "video", params) {
|
|
107
|
+
"use step";
|
|
145
108
|
const token = await signPlaybackId(playbackId, context, type, params);
|
|
146
109
|
const separator = url.includes("?") ? "&" : "?";
|
|
147
110
|
return `${url}${separator}token=${token}`;
|
|
@@ -150,6 +113,7 @@ async function signUrl(url, playbackId, context, type = "video", params) {
|
|
|
150
113
|
// src/primitives/storyboards.ts
|
|
151
114
|
var DEFAULT_STORYBOARD_WIDTH = 640;
|
|
152
115
|
async function getStoryboardUrl(playbackId, width = DEFAULT_STORYBOARD_WIDTH, signingContext) {
|
|
116
|
+
"use step";
|
|
153
117
|
const baseUrl = `https://image.mux.com/${playbackId}/storyboard.png`;
|
|
154
118
|
if (signingContext) {
|
|
155
119
|
return signUrl(baseUrl, playbackId, signingContext, "storyboard", { width });
|
|
@@ -244,6 +208,7 @@ function chunkText(text, strategy) {
|
|
|
244
208
|
|
|
245
209
|
// src/primitives/thumbnails.ts
|
|
246
210
|
async function getThumbnailUrls(playbackId, duration, options = {}) {
|
|
211
|
+
"use step";
|
|
247
212
|
const { interval = 10, width = 640, signingContext } = options;
|
|
248
213
|
const timestamps = [];
|
|
249
214
|
if (duration <= 50) {
|
|
@@ -375,6 +340,7 @@ function parseVTTCues(vttContent) {
|
|
|
375
340
|
return cues;
|
|
376
341
|
}
|
|
377
342
|
async function buildTranscriptUrl(playbackId, trackId, signingContext) {
|
|
343
|
+
"use step";
|
|
378
344
|
const baseUrl = `https://stream.mux.com/${playbackId}/text/${trackId}.vtt`;
|
|
379
345
|
if (signingContext) {
|
|
380
346
|
return signUrl(baseUrl, playbackId, signingContext, "video");
|
|
@@ -382,6 +348,7 @@ async function buildTranscriptUrl(playbackId, trackId, signingContext) {
|
|
|
382
348
|
return baseUrl;
|
|
383
349
|
}
|
|
384
350
|
async function fetchTranscriptForAsset(asset, playbackId, options = {}) {
|
|
351
|
+
"use step";
|
|
385
352
|
const { languageCode, cleanTranscript = true, signingContext } = options;
|
|
386
353
|
const track = findCaptionTrack(asset, languageCode);
|
|
387
354
|
if (!track) {
|
|
@@ -424,130 +391,17 @@ __export(workflows_exports, {
|
|
|
424
391
|
});
|
|
425
392
|
|
|
426
393
|
// src/workflows/burned-in-captions.ts
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
function ownKeys(object, enumerableOnly) {
|
|
431
|
-
var keys = Object.keys(object);
|
|
432
|
-
if (Object.getOwnPropertySymbols) {
|
|
433
|
-
var symbols = Object.getOwnPropertySymbols(object);
|
|
434
|
-
enumerableOnly && (symbols = symbols.filter(function(sym) {
|
|
435
|
-
return Object.getOwnPropertyDescriptor(object, sym).enumerable;
|
|
436
|
-
})), keys.push.apply(keys, symbols);
|
|
437
|
-
}
|
|
438
|
-
return keys;
|
|
439
|
-
}
|
|
440
|
-
function _objectSpread(target) {
|
|
441
|
-
for (var i = 1; i < arguments.length; i++) {
|
|
442
|
-
var source = null != arguments[i] ? arguments[i] : {};
|
|
443
|
-
i % 2 ? ownKeys(Object(source), true).forEach(function(key) {
|
|
444
|
-
_defineProperty(target, key, source[key]);
|
|
445
|
-
}) : Object.getOwnPropertyDescriptors ? Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)) : ownKeys(Object(source)).forEach(function(key) {
|
|
446
|
-
Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key));
|
|
447
|
-
});
|
|
448
|
-
}
|
|
449
|
-
return target;
|
|
450
|
-
}
|
|
451
|
-
function _defineProperty(obj, key, value) {
|
|
452
|
-
key = _toPropertyKey(key);
|
|
453
|
-
if (key in obj) {
|
|
454
|
-
Object.defineProperty(obj, key, { value, enumerable: true, configurable: true, writable: true });
|
|
455
|
-
} else {
|
|
456
|
-
obj[key] = value;
|
|
457
|
-
}
|
|
458
|
-
return obj;
|
|
459
|
-
}
|
|
460
|
-
function _toPropertyKey(arg) {
|
|
461
|
-
var key = _toPrimitive(arg, "string");
|
|
462
|
-
return typeof key === "symbol" ? key : String(key);
|
|
463
|
-
}
|
|
464
|
-
function _toPrimitive(input, hint) {
|
|
465
|
-
if (typeof input !== "object" || input === null) return input;
|
|
466
|
-
var prim = input[Symbol.toPrimitive];
|
|
467
|
-
if (prim !== void 0) {
|
|
468
|
-
var res = prim.call(input, hint || "default");
|
|
469
|
-
if (typeof res !== "object") return res;
|
|
470
|
-
throw new TypeError("@@toPrimitive must return a primitive value.");
|
|
471
|
-
}
|
|
472
|
-
return (hint === "string" ? String : Number)(input);
|
|
473
|
-
}
|
|
474
|
-
var dedent = createDedent({});
|
|
475
|
-
var dedent_default = dedent;
|
|
476
|
-
function createDedent(options) {
|
|
477
|
-
dedent2.withOptions = (newOptions) => createDedent(_objectSpread(_objectSpread({}, options), newOptions));
|
|
478
|
-
return dedent2;
|
|
479
|
-
function dedent2(strings, ...values) {
|
|
480
|
-
const raw = typeof strings === "string" ? [strings] : strings.raw;
|
|
481
|
-
const {
|
|
482
|
-
alignValues = false,
|
|
483
|
-
escapeSpecialCharacters = Array.isArray(strings),
|
|
484
|
-
trimWhitespace = true
|
|
485
|
-
} = options;
|
|
486
|
-
let result = "";
|
|
487
|
-
for (let i = 0; i < raw.length; i++) {
|
|
488
|
-
let next = raw[i];
|
|
489
|
-
if (escapeSpecialCharacters) {
|
|
490
|
-
next = next.replace(/\\\n[ \t]*/g, "").replace(/\\`/g, "`").replace(/\\\$/g, "$").replace(/\\\{/g, "{");
|
|
491
|
-
}
|
|
492
|
-
result += next;
|
|
493
|
-
if (i < values.length) {
|
|
494
|
-
const value = alignValues ? alignValue(values[i], result) : values[i];
|
|
495
|
-
result += value;
|
|
496
|
-
}
|
|
497
|
-
}
|
|
498
|
-
const lines = result.split("\n");
|
|
499
|
-
let mindent = null;
|
|
500
|
-
for (const l of lines) {
|
|
501
|
-
const m = l.match(/^(\s+)\S+/);
|
|
502
|
-
if (m) {
|
|
503
|
-
const indent = m[1].length;
|
|
504
|
-
if (!mindent) {
|
|
505
|
-
mindent = indent;
|
|
506
|
-
} else {
|
|
507
|
-
mindent = Math.min(mindent, indent);
|
|
508
|
-
}
|
|
509
|
-
}
|
|
510
|
-
}
|
|
511
|
-
if (mindent !== null) {
|
|
512
|
-
const m = mindent;
|
|
513
|
-
result = lines.map((l) => l[0] === " " || l[0] === " " ? l.slice(m) : l).join("\n");
|
|
514
|
-
}
|
|
515
|
-
if (trimWhitespace) {
|
|
516
|
-
result = result.trim();
|
|
517
|
-
}
|
|
518
|
-
if (escapeSpecialCharacters) {
|
|
519
|
-
result = result.replace(/\\n/g, "\n");
|
|
520
|
-
}
|
|
521
|
-
return result;
|
|
522
|
-
}
|
|
523
|
-
}
|
|
524
|
-
function alignValue(value, precedingText) {
|
|
525
|
-
if (typeof value !== "string" || !value.includes("\n")) {
|
|
526
|
-
return value;
|
|
527
|
-
}
|
|
528
|
-
const currentLine = precedingText.slice(precedingText.lastIndexOf("\n") + 1);
|
|
529
|
-
const indentMatch = currentLine.match(/^(\s+)/);
|
|
530
|
-
if (indentMatch) {
|
|
531
|
-
const indent = indentMatch[1];
|
|
532
|
-
return value.replace(/\n/g, `
|
|
533
|
-
${indent}`);
|
|
534
|
-
}
|
|
535
|
-
return value;
|
|
536
|
-
}
|
|
537
|
-
|
|
538
|
-
// src/workflows/burned-in-captions.ts
|
|
539
|
-
var import_zod2 = require("zod");
|
|
540
|
-
|
|
541
|
-
// src/lib/client-factory.ts
|
|
542
|
-
var import_mux_node2 = __toESM(require("@mux/mux-node"));
|
|
394
|
+
import { generateObject } from "ai";
|
|
395
|
+
import dedent from "dedent";
|
|
396
|
+
import { z as z2 } from "zod";
|
|
543
397
|
|
|
544
398
|
// src/lib/providers.ts
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
399
|
+
import { createAnthropic } from "@ai-sdk/anthropic";
|
|
400
|
+
import { createGoogleGenerativeAI } from "@ai-sdk/google";
|
|
401
|
+
import { createOpenAI } from "@ai-sdk/openai";
|
|
548
402
|
var DEFAULT_LANGUAGE_MODELS = {
|
|
549
|
-
openai: "gpt-5
|
|
550
|
-
anthropic: "claude-
|
|
403
|
+
openai: "gpt-5.1",
|
|
404
|
+
anthropic: "claude-sonnet-4-5",
|
|
551
405
|
google: "gemini-2.5-flash"
|
|
552
406
|
};
|
|
553
407
|
var DEFAULT_EMBEDDING_MODELS = {
|
|
@@ -560,6 +414,52 @@ function requireEnv(value, name) {
|
|
|
560
414
|
}
|
|
561
415
|
return value;
|
|
562
416
|
}
|
|
417
|
+
function createLanguageModelFromConfig(provider, modelId, credentials) {
|
|
418
|
+
switch (provider) {
|
|
419
|
+
case "openai": {
|
|
420
|
+
const apiKey = credentials.openaiApiKey;
|
|
421
|
+
requireEnv(apiKey, "OPENAI_API_KEY");
|
|
422
|
+
const openai = createOpenAI({ apiKey });
|
|
423
|
+
return openai(modelId);
|
|
424
|
+
}
|
|
425
|
+
case "anthropic": {
|
|
426
|
+
const apiKey = credentials.anthropicApiKey;
|
|
427
|
+
requireEnv(apiKey, "ANTHROPIC_API_KEY");
|
|
428
|
+
const anthropic = createAnthropic({ apiKey });
|
|
429
|
+
return anthropic(modelId);
|
|
430
|
+
}
|
|
431
|
+
case "google": {
|
|
432
|
+
const apiKey = credentials.googleApiKey;
|
|
433
|
+
requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
|
|
434
|
+
const google = createGoogleGenerativeAI({ apiKey });
|
|
435
|
+
return google(modelId);
|
|
436
|
+
}
|
|
437
|
+
default: {
|
|
438
|
+
const exhaustiveCheck = provider;
|
|
439
|
+
throw new Error(`Unsupported provider: ${exhaustiveCheck}`);
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
function createEmbeddingModelFromConfig(provider, modelId, credentials) {
|
|
444
|
+
switch (provider) {
|
|
445
|
+
case "openai": {
|
|
446
|
+
const apiKey = credentials.openaiApiKey;
|
|
447
|
+
requireEnv(apiKey, "OPENAI_API_KEY");
|
|
448
|
+
const openai = createOpenAI({ apiKey });
|
|
449
|
+
return openai.embedding(modelId);
|
|
450
|
+
}
|
|
451
|
+
case "google": {
|
|
452
|
+
const apiKey = credentials.googleApiKey;
|
|
453
|
+
requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
|
|
454
|
+
const google = createGoogleGenerativeAI({ apiKey });
|
|
455
|
+
return google.textEmbeddingModel(modelId);
|
|
456
|
+
}
|
|
457
|
+
default: {
|
|
458
|
+
const exhaustiveCheck = provider;
|
|
459
|
+
throw new Error(`Unsupported embedding provider: ${exhaustiveCheck}`);
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
}
|
|
563
463
|
function resolveLanguageModel(options = {}) {
|
|
564
464
|
const provider = options.provider || "openai";
|
|
565
465
|
const modelId = options.model || DEFAULT_LANGUAGE_MODELS[provider];
|
|
@@ -567,7 +467,7 @@ function resolveLanguageModel(options = {}) {
|
|
|
567
467
|
case "openai": {
|
|
568
468
|
const apiKey = options.openaiApiKey ?? env_default.OPENAI_API_KEY;
|
|
569
469
|
requireEnv(apiKey, "OPENAI_API_KEY");
|
|
570
|
-
const openai =
|
|
470
|
+
const openai = createOpenAI({
|
|
571
471
|
apiKey
|
|
572
472
|
});
|
|
573
473
|
return {
|
|
@@ -579,7 +479,7 @@ function resolveLanguageModel(options = {}) {
|
|
|
579
479
|
case "anthropic": {
|
|
580
480
|
const apiKey = options.anthropicApiKey ?? env_default.ANTHROPIC_API_KEY;
|
|
581
481
|
requireEnv(apiKey, "ANTHROPIC_API_KEY");
|
|
582
|
-
const anthropic =
|
|
482
|
+
const anthropic = createAnthropic({
|
|
583
483
|
apiKey
|
|
584
484
|
});
|
|
585
485
|
return {
|
|
@@ -591,7 +491,7 @@ function resolveLanguageModel(options = {}) {
|
|
|
591
491
|
case "google": {
|
|
592
492
|
const apiKey = options.googleApiKey ?? env_default.GOOGLE_GENERATIVE_AI_API_KEY;
|
|
593
493
|
requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
|
|
594
|
-
const google =
|
|
494
|
+
const google = createGoogleGenerativeAI({
|
|
595
495
|
apiKey
|
|
596
496
|
});
|
|
597
497
|
return {
|
|
@@ -613,7 +513,7 @@ function resolveEmbeddingModel(options = {}) {
|
|
|
613
513
|
case "openai": {
|
|
614
514
|
const apiKey = options.openaiApiKey ?? env_default.OPENAI_API_KEY;
|
|
615
515
|
requireEnv(apiKey, "OPENAI_API_KEY");
|
|
616
|
-
const openai =
|
|
516
|
+
const openai = createOpenAI({
|
|
617
517
|
apiKey
|
|
618
518
|
});
|
|
619
519
|
return {
|
|
@@ -625,7 +525,7 @@ function resolveEmbeddingModel(options = {}) {
|
|
|
625
525
|
case "google": {
|
|
626
526
|
const apiKey = options.googleApiKey ?? env_default.GOOGLE_GENERATIVE_AI_API_KEY;
|
|
627
527
|
requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
|
|
628
|
-
const google =
|
|
528
|
+
const google = createGoogleGenerativeAI({
|
|
629
529
|
apiKey
|
|
630
530
|
});
|
|
631
531
|
return {
|
|
@@ -642,7 +542,8 @@ function resolveEmbeddingModel(options = {}) {
|
|
|
642
542
|
}
|
|
643
543
|
|
|
644
544
|
// src/lib/client-factory.ts
|
|
645
|
-
function validateCredentials(options, requiredProvider) {
|
|
545
|
+
async function validateCredentials(options, requiredProvider) {
|
|
546
|
+
"use step";
|
|
646
547
|
const muxTokenId = options.muxTokenId ?? env_default.MUX_TOKEN_ID;
|
|
647
548
|
const muxTokenSecret = options.muxTokenSecret ?? env_default.MUX_TOKEN_SECRET;
|
|
648
549
|
const openaiApiKey = options.openaiApiKey ?? env_default.OPENAI_API_KEY;
|
|
@@ -676,32 +577,24 @@ function validateCredentials(options, requiredProvider) {
|
|
|
676
577
|
googleApiKey
|
|
677
578
|
};
|
|
678
579
|
}
|
|
679
|
-
function
|
|
680
|
-
|
|
681
|
-
throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
|
|
682
|
-
}
|
|
683
|
-
return new import_mux_node2.default({
|
|
684
|
-
tokenId: credentials.muxTokenId,
|
|
685
|
-
tokenSecret: credentials.muxTokenSecret
|
|
686
|
-
});
|
|
687
|
-
}
|
|
688
|
-
function createWorkflowClients(options, provider) {
|
|
580
|
+
async function createWorkflowConfig(options, provider) {
|
|
581
|
+
"use step";
|
|
689
582
|
const providerToUse = provider || options.provider || "openai";
|
|
690
|
-
const credentials = validateCredentials(options, providerToUse);
|
|
691
|
-
const
|
|
583
|
+
const credentials = await validateCredentials(options, providerToUse);
|
|
584
|
+
const resolved = resolveLanguageModel({
|
|
692
585
|
...options,
|
|
693
586
|
provider: providerToUse
|
|
694
587
|
});
|
|
695
588
|
return {
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
589
|
+
credentials,
|
|
590
|
+
provider: resolved.provider,
|
|
591
|
+
modelId: resolved.modelId
|
|
699
592
|
};
|
|
700
593
|
}
|
|
701
594
|
|
|
702
595
|
// src/lib/image-download.ts
|
|
703
|
-
|
|
704
|
-
|
|
596
|
+
import { Buffer as Buffer2 } from "buffer";
|
|
597
|
+
import pRetry, { AbortError } from "p-retry";
|
|
705
598
|
var DEFAULT_OPTIONS = {
|
|
706
599
|
timeout: 1e4,
|
|
707
600
|
retries: 3,
|
|
@@ -710,9 +603,10 @@ var DEFAULT_OPTIONS = {
|
|
|
710
603
|
exponentialBackoff: true
|
|
711
604
|
};
|
|
712
605
|
async function downloadImageAsBase64(url, options = {}) {
|
|
606
|
+
"use step";
|
|
713
607
|
const opts = { ...DEFAULT_OPTIONS, ...options };
|
|
714
608
|
let attemptCount = 0;
|
|
715
|
-
return (
|
|
609
|
+
return pRetry(
|
|
716
610
|
async () => {
|
|
717
611
|
attemptCount++;
|
|
718
612
|
const controller = new AbortController();
|
|
@@ -727,18 +621,18 @@ async function downloadImageAsBase64(url, options = {}) {
|
|
|
727
621
|
clearTimeout(timeoutId);
|
|
728
622
|
if (!response.ok) {
|
|
729
623
|
if (response.status >= 400 && response.status < 500 && response.status !== 429) {
|
|
730
|
-
throw new
|
|
624
|
+
throw new AbortError(`HTTP ${response.status}: ${response.statusText}`);
|
|
731
625
|
}
|
|
732
626
|
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
733
627
|
}
|
|
734
628
|
const contentType = response.headers.get("content-type");
|
|
735
629
|
if (!contentType?.startsWith("image/")) {
|
|
736
|
-
throw new
|
|
630
|
+
throw new AbortError(`Invalid content type: ${contentType}. Expected image/*`);
|
|
737
631
|
}
|
|
738
632
|
const arrayBuffer = await response.arrayBuffer();
|
|
739
|
-
const buffer =
|
|
633
|
+
const buffer = Buffer2.from(arrayBuffer);
|
|
740
634
|
if (buffer.length === 0) {
|
|
741
|
-
throw new
|
|
635
|
+
throw new AbortError("Downloaded image is empty");
|
|
742
636
|
}
|
|
743
637
|
const base64Data = `data:${contentType};base64,${buffer.toString("base64")}`;
|
|
744
638
|
return {
|
|
@@ -751,7 +645,7 @@ async function downloadImageAsBase64(url, options = {}) {
|
|
|
751
645
|
};
|
|
752
646
|
} catch (error) {
|
|
753
647
|
clearTimeout(timeoutId);
|
|
754
|
-
if (error instanceof
|
|
648
|
+
if (error instanceof AbortError) {
|
|
755
649
|
throw error;
|
|
756
650
|
}
|
|
757
651
|
if (error instanceof Error) {
|
|
@@ -780,6 +674,7 @@ async function downloadImageAsBase64(url, options = {}) {
|
|
|
780
674
|
);
|
|
781
675
|
}
|
|
782
676
|
async function downloadImagesAsBase64(urls, options = {}, maxConcurrent = 5) {
|
|
677
|
+
"use step";
|
|
783
678
|
const results = [];
|
|
784
679
|
for (let i = 0; i < urls.length; i += maxConcurrent) {
|
|
785
680
|
const batch = urls.slice(i, i + maxConcurrent);
|
|
@@ -791,6 +686,7 @@ async function downloadImagesAsBase64(urls, options = {}, maxConcurrent = 5) {
|
|
|
791
686
|
}
|
|
792
687
|
|
|
793
688
|
// src/lib/mux-assets.ts
|
|
689
|
+
import Mux2 from "@mux/mux-node";
|
|
794
690
|
function getPlaybackId(asset) {
|
|
795
691
|
const playbackIds = asset.playback_ids || [];
|
|
796
692
|
const publicPlaybackId = playbackIds.find((pid) => pid.policy === "public");
|
|
@@ -805,7 +701,12 @@ function getPlaybackId(asset) {
|
|
|
805
701
|
"No public or signed playback ID found for this asset. A public or signed playback ID is required. DRM playback IDs are not currently supported."
|
|
806
702
|
);
|
|
807
703
|
}
|
|
808
|
-
async function getPlaybackIdForAsset(
|
|
704
|
+
async function getPlaybackIdForAsset(credentials, assetId) {
|
|
705
|
+
"use step";
|
|
706
|
+
const mux = new Mux2({
|
|
707
|
+
tokenId: credentials.muxTokenId,
|
|
708
|
+
tokenSecret: credentials.muxTokenSecret
|
|
709
|
+
});
|
|
809
710
|
const asset = await mux.video.assets.retrieve(assetId);
|
|
810
711
|
const { id: playbackId, policy } = getPlaybackId(asset);
|
|
811
712
|
return { asset, playbackId, policy };
|
|
@@ -844,8 +745,8 @@ function resolveSection(defaultSection, override) {
|
|
|
844
745
|
}
|
|
845
746
|
return override;
|
|
846
747
|
}
|
|
847
|
-
function createPromptBuilder(
|
|
848
|
-
const { template, sectionOrder } =
|
|
748
|
+
function createPromptBuilder(config) {
|
|
749
|
+
const { template, sectionOrder } = config;
|
|
849
750
|
const getSection = (section, override) => {
|
|
850
751
|
const resolved = resolveSection(template[section], override);
|
|
851
752
|
return renderSection(resolved);
|
|
@@ -886,12 +787,12 @@ function createToneSection(instruction) {
|
|
|
886
787
|
}
|
|
887
788
|
|
|
888
789
|
// src/workflows/burned-in-captions.ts
|
|
889
|
-
var burnedInCaptionsSchema =
|
|
890
|
-
hasBurnedInCaptions:
|
|
891
|
-
confidence:
|
|
892
|
-
detectedLanguage:
|
|
790
|
+
var burnedInCaptionsSchema = z2.object({
|
|
791
|
+
hasBurnedInCaptions: z2.boolean(),
|
|
792
|
+
confidence: z2.number().min(0).max(1),
|
|
793
|
+
detectedLanguage: z2.string().nullable()
|
|
893
794
|
});
|
|
894
|
-
var SYSTEM_PROMPT =
|
|
795
|
+
var SYSTEM_PROMPT = dedent`
|
|
895
796
|
<role>
|
|
896
797
|
You are an expert at analyzing video frames to detect burned-in captions (also called open captions or hardcoded subtitles).
|
|
897
798
|
These are text overlays that are permanently embedded in the video image, common on TikTok, Instagram Reels, and other social media platforms.
|
|
@@ -934,14 +835,14 @@ var burnedInCaptionsPromptBuilder = createPromptBuilder({
|
|
|
934
835
|
template: {
|
|
935
836
|
task: {
|
|
936
837
|
tag: "task",
|
|
937
|
-
content:
|
|
838
|
+
content: dedent`
|
|
938
839
|
Analyze the provided video storyboard to detect burned-in captions (hardcoded subtitles).
|
|
939
840
|
Count frames with text vs no text, note position consistency and whether text changes across frames.
|
|
940
841
|
Decide if captions exist, with confidence (0.0-1.0) and detected language if any.`
|
|
941
842
|
},
|
|
942
843
|
analysisSteps: {
|
|
943
844
|
tag: "analysis_steps",
|
|
944
|
-
content:
|
|
845
|
+
content: dedent`
|
|
945
846
|
1. COUNT how many frames contain text overlays vs. how many don't
|
|
946
847
|
2. Check if text appears in consistent positions across multiple frames
|
|
947
848
|
3. Verify text changes content between frames (indicating dialogue/narration)
|
|
@@ -950,7 +851,7 @@ var burnedInCaptionsPromptBuilder = createPromptBuilder({
|
|
|
950
851
|
},
|
|
951
852
|
positiveIndicators: {
|
|
952
853
|
tag: "classify_as_captions",
|
|
953
|
-
content:
|
|
854
|
+
content: dedent`
|
|
954
855
|
ONLY classify as burned-in captions if:
|
|
955
856
|
- Text appears in multiple frames (not just 1-2 end frames)
|
|
956
857
|
- Text positioning is consistent across those frames
|
|
@@ -959,7 +860,7 @@ var burnedInCaptionsPromptBuilder = createPromptBuilder({
|
|
|
959
860
|
},
|
|
960
861
|
negativeIndicators: {
|
|
961
862
|
tag: "not_captions",
|
|
962
|
-
content:
|
|
863
|
+
content: dedent`
|
|
963
864
|
DO NOT classify as burned-in captions:
|
|
964
865
|
- Marketing taglines appearing only in final 1-2 frames
|
|
965
866
|
- Single words or phrases that don't change between frames
|
|
@@ -974,65 +875,97 @@ function buildUserPrompt(promptOverrides) {
|
|
|
974
875
|
return burnedInCaptionsPromptBuilder.build(promptOverrides);
|
|
975
876
|
}
|
|
976
877
|
var DEFAULT_PROVIDER = "openai";
|
|
878
|
+
async function fetchImageAsBase64(imageUrl, imageDownloadOptions) {
|
|
879
|
+
"use step";
|
|
880
|
+
const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
|
|
881
|
+
return downloadResult.base64Data;
|
|
882
|
+
}
|
|
883
|
+
async function analyzeStoryboard({
|
|
884
|
+
imageDataUrl,
|
|
885
|
+
provider,
|
|
886
|
+
modelId,
|
|
887
|
+
credentials,
|
|
888
|
+
userPrompt,
|
|
889
|
+
systemPrompt
|
|
890
|
+
}) {
|
|
891
|
+
"use step";
|
|
892
|
+
const model = createLanguageModelFromConfig(
|
|
893
|
+
provider,
|
|
894
|
+
modelId,
|
|
895
|
+
credentials
|
|
896
|
+
);
|
|
897
|
+
const response = await generateObject({
|
|
898
|
+
model,
|
|
899
|
+
schema: burnedInCaptionsSchema,
|
|
900
|
+
experimental_telemetry: { isEnabled: true },
|
|
901
|
+
messages: [
|
|
902
|
+
{
|
|
903
|
+
role: "system",
|
|
904
|
+
content: systemPrompt
|
|
905
|
+
},
|
|
906
|
+
{
|
|
907
|
+
role: "user",
|
|
908
|
+
content: [
|
|
909
|
+
{ type: "text", text: userPrompt },
|
|
910
|
+
{ type: "image", image: imageDataUrl }
|
|
911
|
+
]
|
|
912
|
+
}
|
|
913
|
+
]
|
|
914
|
+
});
|
|
915
|
+
return {
|
|
916
|
+
result: response.object,
|
|
917
|
+
usage: {
|
|
918
|
+
inputTokens: response.usage.inputTokens,
|
|
919
|
+
outputTokens: response.usage.outputTokens,
|
|
920
|
+
totalTokens: response.usage.totalTokens,
|
|
921
|
+
reasoningTokens: response.usage.reasoningTokens,
|
|
922
|
+
cachedInputTokens: response.usage.cachedInputTokens
|
|
923
|
+
}
|
|
924
|
+
};
|
|
925
|
+
}
|
|
977
926
|
async function hasBurnedInCaptions(assetId, options = {}) {
|
|
927
|
+
"use workflow";
|
|
978
928
|
const {
|
|
979
929
|
provider = DEFAULT_PROVIDER,
|
|
980
930
|
model,
|
|
981
931
|
imageSubmissionMode = "url",
|
|
982
932
|
imageDownloadOptions,
|
|
983
933
|
promptOverrides,
|
|
984
|
-
...
|
|
934
|
+
...config
|
|
985
935
|
} = options;
|
|
986
936
|
const userPrompt = buildUserPrompt(promptOverrides);
|
|
987
|
-
const
|
|
988
|
-
{ ...
|
|
937
|
+
const workflowConfig = await createWorkflowConfig(
|
|
938
|
+
{ ...config, model },
|
|
989
939
|
provider
|
|
990
940
|
);
|
|
991
|
-
const { playbackId, policy } = await getPlaybackIdForAsset(
|
|
992
|
-
const signingContext = resolveSigningContext(options);
|
|
941
|
+
const { playbackId, policy } = await getPlaybackIdForAsset(workflowConfig.credentials, assetId);
|
|
942
|
+
const signingContext = await resolveSigningContext(options);
|
|
993
943
|
if (policy === "signed" && !signingContext) {
|
|
994
944
|
throw new Error(
|
|
995
945
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
996
946
|
);
|
|
997
947
|
}
|
|
998
948
|
const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed" ? signingContext : void 0);
|
|
999
|
-
const analyzeStoryboard = async (imageDataUrl) => {
|
|
1000
|
-
const response = await (0, import_ai.generateObject)({
|
|
1001
|
-
model: clients.languageModel.model,
|
|
1002
|
-
schema: burnedInCaptionsSchema,
|
|
1003
|
-
abortSignal: options.abortSignal,
|
|
1004
|
-
experimental_telemetry: { isEnabled: true },
|
|
1005
|
-
messages: [
|
|
1006
|
-
{
|
|
1007
|
-
role: "system",
|
|
1008
|
-
content: SYSTEM_PROMPT
|
|
1009
|
-
},
|
|
1010
|
-
{
|
|
1011
|
-
role: "user",
|
|
1012
|
-
content: [
|
|
1013
|
-
{ type: "text", text: userPrompt },
|
|
1014
|
-
{ type: "image", image: imageDataUrl }
|
|
1015
|
-
]
|
|
1016
|
-
}
|
|
1017
|
-
]
|
|
1018
|
-
});
|
|
1019
|
-
return {
|
|
1020
|
-
result: response.object,
|
|
1021
|
-
usage: {
|
|
1022
|
-
inputTokens: response.usage.inputTokens,
|
|
1023
|
-
outputTokens: response.usage.outputTokens,
|
|
1024
|
-
totalTokens: response.usage.totalTokens,
|
|
1025
|
-
reasoningTokens: response.usage.reasoningTokens,
|
|
1026
|
-
cachedInputTokens: response.usage.cachedInputTokens
|
|
1027
|
-
}
|
|
1028
|
-
};
|
|
1029
|
-
};
|
|
1030
949
|
let analysisResponse;
|
|
1031
950
|
if (imageSubmissionMode === "base64") {
|
|
1032
|
-
const
|
|
1033
|
-
analysisResponse = await analyzeStoryboard(
|
|
951
|
+
const base64Data = await fetchImageAsBase64(imageUrl, imageDownloadOptions);
|
|
952
|
+
analysisResponse = await analyzeStoryboard({
|
|
953
|
+
imageDataUrl: base64Data,
|
|
954
|
+
provider: workflowConfig.provider,
|
|
955
|
+
modelId: workflowConfig.modelId,
|
|
956
|
+
credentials: workflowConfig.credentials,
|
|
957
|
+
userPrompt,
|
|
958
|
+
systemPrompt: SYSTEM_PROMPT
|
|
959
|
+
});
|
|
1034
960
|
} else {
|
|
1035
|
-
analysisResponse = await analyzeStoryboard(
|
|
961
|
+
analysisResponse = await analyzeStoryboard({
|
|
962
|
+
imageDataUrl: imageUrl,
|
|
963
|
+
provider: workflowConfig.provider,
|
|
964
|
+
modelId: workflowConfig.modelId,
|
|
965
|
+
credentials: workflowConfig.credentials,
|
|
966
|
+
userPrompt,
|
|
967
|
+
systemPrompt: SYSTEM_PROMPT
|
|
968
|
+
});
|
|
1036
969
|
}
|
|
1037
970
|
if (!analysisResponse.result) {
|
|
1038
971
|
throw new Error("No analysis result received from AI provider");
|
|
@@ -1048,8 +981,8 @@ async function hasBurnedInCaptions(assetId, options = {}) {
|
|
|
1048
981
|
}
|
|
1049
982
|
|
|
1050
983
|
// src/workflows/chapters.ts
|
|
1051
|
-
|
|
1052
|
-
|
|
984
|
+
import { generateObject as generateObject2 } from "ai";
|
|
985
|
+
import { z as z3 } from "zod";
|
|
1053
986
|
|
|
1054
987
|
// src/lib/retry.ts
|
|
1055
988
|
var DEFAULT_RETRY_OPTIONS = {
|
|
@@ -1081,25 +1014,55 @@ async function withRetry(fn, {
|
|
|
1081
1014
|
if (isLastAttempt || !shouldRetry(lastError, attempt + 1)) {
|
|
1082
1015
|
throw lastError;
|
|
1083
1016
|
}
|
|
1084
|
-
const
|
|
1017
|
+
const delay = calculateDelay(attempt + 1, baseDelay, maxDelay);
|
|
1085
1018
|
console.warn(
|
|
1086
|
-
`Attempt ${attempt + 1} failed: ${lastError.message}. Retrying in ${Math.round(
|
|
1019
|
+
`Attempt ${attempt + 1} failed: ${lastError.message}. Retrying in ${Math.round(delay)}ms...`
|
|
1087
1020
|
);
|
|
1088
|
-
await new Promise((resolve) => setTimeout(resolve,
|
|
1021
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
1089
1022
|
}
|
|
1090
1023
|
}
|
|
1091
1024
|
throw lastError || new Error("Retry failed with unknown error");
|
|
1092
1025
|
}
|
|
1093
1026
|
|
|
1094
1027
|
// src/workflows/chapters.ts
|
|
1095
|
-
var chapterSchema =
|
|
1096
|
-
startTime:
|
|
1097
|
-
title:
|
|
1028
|
+
var chapterSchema = z3.object({
|
|
1029
|
+
startTime: z3.number(),
|
|
1030
|
+
title: z3.string()
|
|
1098
1031
|
});
|
|
1099
|
-
var chaptersSchema =
|
|
1100
|
-
chapters:
|
|
1032
|
+
var chaptersSchema = z3.object({
|
|
1033
|
+
chapters: z3.array(chapterSchema)
|
|
1101
1034
|
});
|
|
1102
|
-
|
|
1035
|
+
async function generateChaptersWithAI({
|
|
1036
|
+
provider,
|
|
1037
|
+
modelId,
|
|
1038
|
+
credentials,
|
|
1039
|
+
timestampedTranscript,
|
|
1040
|
+
systemPrompt
|
|
1041
|
+
}) {
|
|
1042
|
+
"use step";
|
|
1043
|
+
const model = createLanguageModelFromConfig(
|
|
1044
|
+
provider,
|
|
1045
|
+
modelId,
|
|
1046
|
+
credentials
|
|
1047
|
+
);
|
|
1048
|
+
const response = await withRetry(
|
|
1049
|
+
() => generateObject2({
|
|
1050
|
+
model,
|
|
1051
|
+
schema: chaptersSchema,
|
|
1052
|
+
messages: [
|
|
1053
|
+
{
|
|
1054
|
+
role: "system",
|
|
1055
|
+
content: systemPrompt
|
|
1056
|
+
},
|
|
1057
|
+
{
|
|
1058
|
+
role: "user",
|
|
1059
|
+
content: timestampedTranscript
|
|
1060
|
+
}
|
|
1061
|
+
]
|
|
1062
|
+
})
|
|
1063
|
+
);
|
|
1064
|
+
return response.object;
|
|
1065
|
+
}
|
|
1103
1066
|
var SYSTEM_PROMPT2 = `Your role is to segment the following captions into chunked chapters, summarising each chapter with a title.
|
|
1104
1067
|
|
|
1105
1068
|
Analyze the transcript and create logical chapter breaks based on topic changes, major transitions, or distinct sections of content. Each chapter should represent a meaningful segment of the video.
|
|
@@ -1121,10 +1084,11 @@ Important rules:
|
|
|
1121
1084
|
- Do not include any text before or after the JSON
|
|
1122
1085
|
- The JSON must be valid and parseable`;
|
|
1123
1086
|
async function generateChapters(assetId, languageCode, options = {}) {
|
|
1124
|
-
|
|
1125
|
-
const
|
|
1126
|
-
const
|
|
1127
|
-
const
|
|
1087
|
+
"use workflow";
|
|
1088
|
+
const { provider = "openai", model } = options;
|
|
1089
|
+
const config = await createWorkflowConfig({ ...options, model }, provider);
|
|
1090
|
+
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(config.credentials, assetId);
|
|
1091
|
+
const signingContext = await resolveSigningContext(options);
|
|
1128
1092
|
if (policy === "signed" && !signingContext) {
|
|
1129
1093
|
throw new Error(
|
|
1130
1094
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
@@ -1148,24 +1112,13 @@ async function generateChapters(assetId, languageCode, options = {}) {
|
|
|
1148
1112
|
}
|
|
1149
1113
|
let chaptersData = null;
|
|
1150
1114
|
try {
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
role: "system",
|
|
1159
|
-
content: SYSTEM_PROMPT2
|
|
1160
|
-
},
|
|
1161
|
-
{
|
|
1162
|
-
role: "user",
|
|
1163
|
-
content: timestampedTranscript
|
|
1164
|
-
}
|
|
1165
|
-
]
|
|
1166
|
-
})
|
|
1167
|
-
);
|
|
1168
|
-
chaptersData = response.object;
|
|
1115
|
+
chaptersData = await generateChaptersWithAI({
|
|
1116
|
+
provider: config.provider,
|
|
1117
|
+
modelId: config.modelId,
|
|
1118
|
+
credentials: config.credentials,
|
|
1119
|
+
timestampedTranscript,
|
|
1120
|
+
systemPrompt: SYSTEM_PROMPT2
|
|
1121
|
+
});
|
|
1169
1122
|
} catch (error) {
|
|
1170
1123
|
throw new Error(
|
|
1171
1124
|
`Failed to generate chapters with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
@@ -1189,14 +1142,7 @@ async function generateChapters(assetId, languageCode, options = {}) {
|
|
|
1189
1142
|
}
|
|
1190
1143
|
|
|
1191
1144
|
// src/workflows/embeddings.ts
|
|
1192
|
-
|
|
1193
|
-
var DEFAULT_PROVIDER3 = "openai";
|
|
1194
|
-
var DEFAULT_CHUNKING_STRATEGY = {
|
|
1195
|
-
type: "token",
|
|
1196
|
-
maxTokens: 500,
|
|
1197
|
-
overlap: 100
|
|
1198
|
-
};
|
|
1199
|
-
var DEFAULT_BATCH_SIZE = 5;
|
|
1145
|
+
import { embed } from "ai";
|
|
1200
1146
|
function averageEmbeddings(embeddings) {
|
|
1201
1147
|
if (embeddings.length === 0) {
|
|
1202
1148
|
return [];
|
|
@@ -1213,51 +1159,46 @@ function averageEmbeddings(embeddings) {
|
|
|
1213
1159
|
}
|
|
1214
1160
|
return averaged;
|
|
1215
1161
|
}
|
|
1216
|
-
async function
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
);
|
|
1240
|
-
results.push(...batchResults);
|
|
1241
|
-
}
|
|
1242
|
-
return results;
|
|
1162
|
+
async function generateSingleChunkEmbedding({
|
|
1163
|
+
chunk,
|
|
1164
|
+
provider,
|
|
1165
|
+
modelId,
|
|
1166
|
+
credentials
|
|
1167
|
+
}) {
|
|
1168
|
+
"use step";
|
|
1169
|
+
const model = createEmbeddingModelFromConfig(provider, modelId, credentials);
|
|
1170
|
+
const response = await withRetry(
|
|
1171
|
+
() => embed({
|
|
1172
|
+
model,
|
|
1173
|
+
value: chunk.text
|
|
1174
|
+
})
|
|
1175
|
+
);
|
|
1176
|
+
return {
|
|
1177
|
+
chunkId: chunk.id,
|
|
1178
|
+
embedding: response.embedding,
|
|
1179
|
+
metadata: {
|
|
1180
|
+
startTime: chunk.startTime,
|
|
1181
|
+
endTime: chunk.endTime,
|
|
1182
|
+
tokenCount: chunk.tokenCount
|
|
1183
|
+
}
|
|
1184
|
+
};
|
|
1243
1185
|
}
|
|
1244
1186
|
async function generateVideoEmbeddings(assetId, options = {}) {
|
|
1187
|
+
"use workflow";
|
|
1245
1188
|
const {
|
|
1246
|
-
provider =
|
|
1189
|
+
provider = "openai",
|
|
1247
1190
|
model,
|
|
1248
1191
|
languageCode,
|
|
1249
|
-
chunkingStrategy =
|
|
1250
|
-
batchSize =
|
|
1251
|
-
abortSignal
|
|
1192
|
+
chunkingStrategy = { type: "token", maxTokens: 500, overlap: 100 },
|
|
1193
|
+
batchSize = 5
|
|
1252
1194
|
} = options;
|
|
1253
|
-
const credentials = validateCredentials(options, provider === "google" ? "google" : "openai");
|
|
1254
|
-
const muxClient = createMuxClient(credentials);
|
|
1195
|
+
const credentials = await validateCredentials(options, provider === "google" ? "google" : "openai");
|
|
1255
1196
|
const embeddingModel = resolveEmbeddingModel({ ...options, provider, model });
|
|
1256
1197
|
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(
|
|
1257
|
-
|
|
1198
|
+
credentials,
|
|
1258
1199
|
assetId
|
|
1259
1200
|
);
|
|
1260
|
-
const signingContext = resolveSigningContext(options);
|
|
1201
|
+
const signingContext = await resolveSigningContext(options);
|
|
1261
1202
|
if (policy === "signed" && !signingContext) {
|
|
1262
1203
|
throw new Error(
|
|
1263
1204
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
@@ -1287,14 +1228,22 @@ async function generateVideoEmbeddings(assetId, options = {}) {
|
|
|
1287
1228
|
if (chunks.length === 0) {
|
|
1288
1229
|
throw new Error("No chunks generated from transcript");
|
|
1289
1230
|
}
|
|
1290
|
-
|
|
1231
|
+
const chunkEmbeddings = [];
|
|
1291
1232
|
try {
|
|
1292
|
-
|
|
1293
|
-
chunks,
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1233
|
+
for (let i = 0; i < chunks.length; i += batchSize) {
|
|
1234
|
+
const batch = chunks.slice(i, i + batchSize);
|
|
1235
|
+
const batchResults = await Promise.all(
|
|
1236
|
+
batch.map(
|
|
1237
|
+
(chunk) => generateSingleChunkEmbedding({
|
|
1238
|
+
chunk,
|
|
1239
|
+
provider: embeddingModel.provider,
|
|
1240
|
+
modelId: embeddingModel.modelId,
|
|
1241
|
+
credentials
|
|
1242
|
+
})
|
|
1243
|
+
)
|
|
1244
|
+
);
|
|
1245
|
+
chunkEmbeddings.push(...batchResults);
|
|
1246
|
+
}
|
|
1298
1247
|
} catch (error) {
|
|
1299
1248
|
throw new Error(
|
|
1300
1249
|
`Failed to generate embeddings with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
@@ -1326,7 +1275,7 @@ var DEFAULT_THRESHOLDS = {
|
|
|
1326
1275
|
sexual: 0.7,
|
|
1327
1276
|
violence: 0.8
|
|
1328
1277
|
};
|
|
1329
|
-
var
|
|
1278
|
+
var DEFAULT_PROVIDER2 = "openai";
|
|
1330
1279
|
var HIVE_ENDPOINT = "https://api.thehive.ai/api/v2/task/sync";
|
|
1331
1280
|
var HIVE_SEXUAL_CATEGORIES = [
|
|
1332
1281
|
"general_nsfw",
|
|
@@ -1364,6 +1313,7 @@ var HIVE_VIOLENCE_CATEGORIES = [
|
|
|
1364
1313
|
"garm_death_injury_or_military_conflict"
|
|
1365
1314
|
];
|
|
1366
1315
|
async function processConcurrently(items, processor, maxConcurrent = 5) {
|
|
1316
|
+
"use step";
|
|
1367
1317
|
const results = [];
|
|
1368
1318
|
for (let i = 0; i < items.length; i += maxConcurrent) {
|
|
1369
1319
|
const batch = items.slice(i, i + maxConcurrent);
|
|
@@ -1374,19 +1324,21 @@ async function processConcurrently(items, processor, maxConcurrent = 5) {
|
|
|
1374
1324
|
return results;
|
|
1375
1325
|
}
|
|
1376
1326
|
async function requestOpenAIModeration(imageUrls, apiKey, model, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
|
|
1327
|
+
"use step";
|
|
1377
1328
|
const targetUrls = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map(
|
|
1378
|
-
(img) => ({ url: img.url, image: img.base64Data })
|
|
1379
|
-
) : imageUrls.map((url) => ({ url, image: url }));
|
|
1329
|
+
(img) => ({ url: img.url, image: img.base64Data, apiKey, model })
|
|
1330
|
+
) : imageUrls.map((url) => ({ url, image: url, apiKey, model }));
|
|
1380
1331
|
const moderate = async (entry) => {
|
|
1332
|
+
"use step";
|
|
1381
1333
|
try {
|
|
1382
1334
|
const res = await fetch("https://api.openai.com/v1/moderations", {
|
|
1383
1335
|
method: "POST",
|
|
1384
1336
|
headers: {
|
|
1385
1337
|
"Content-Type": "application/json",
|
|
1386
|
-
"Authorization": `Bearer ${apiKey}`
|
|
1338
|
+
"Authorization": `Bearer ${entry.apiKey}`
|
|
1387
1339
|
},
|
|
1388
1340
|
body: JSON.stringify({
|
|
1389
|
-
model,
|
|
1341
|
+
model: entry.model,
|
|
1390
1342
|
input: [
|
|
1391
1343
|
{
|
|
1392
1344
|
type: "image_url",
|
|
@@ -1430,6 +1382,7 @@ function getHiveCategoryScores(classes, categoryNames) {
|
|
|
1430
1382
|
return Math.max(...scores, 0);
|
|
1431
1383
|
}
|
|
1432
1384
|
async function requestHiveModeration(imageUrls, apiKey, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
|
|
1385
|
+
"use step";
|
|
1433
1386
|
const targets = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map((img) => ({
|
|
1434
1387
|
url: img.url,
|
|
1435
1388
|
source: {
|
|
@@ -1442,6 +1395,7 @@ async function requestHiveModeration(imageUrls, apiKey, maxConcurrent = 5, submi
|
|
|
1442
1395
|
source: { kind: "url", value: url }
|
|
1443
1396
|
}));
|
|
1444
1397
|
const moderate = async (entry) => {
|
|
1398
|
+
"use step";
|
|
1445
1399
|
try {
|
|
1446
1400
|
const formData = new FormData();
|
|
1447
1401
|
if (entry.source.kind === "url") {
|
|
@@ -1487,8 +1441,9 @@ async function requestHiveModeration(imageUrls, apiKey, maxConcurrent = 5, submi
|
|
|
1487
1441
|
return processConcurrently(targets, moderate, maxConcurrent);
|
|
1488
1442
|
}
|
|
1489
1443
|
async function getModerationScores(assetId, options = {}) {
|
|
1444
|
+
"use workflow";
|
|
1490
1445
|
const {
|
|
1491
|
-
provider =
|
|
1446
|
+
provider = DEFAULT_PROVIDER2,
|
|
1492
1447
|
model = provider === "openai" ? "omni-moderation-latest" : void 0,
|
|
1493
1448
|
thresholds = DEFAULT_THRESHOLDS,
|
|
1494
1449
|
thumbnailInterval = 10,
|
|
@@ -1497,11 +1452,10 @@ async function getModerationScores(assetId, options = {}) {
|
|
|
1497
1452
|
imageSubmissionMode = "url",
|
|
1498
1453
|
imageDownloadOptions
|
|
1499
1454
|
} = options;
|
|
1500
|
-
const credentials = validateCredentials(options, provider === "openai" ? "openai" : void 0);
|
|
1501
|
-
const
|
|
1502
|
-
const { asset, playbackId, policy } = await getPlaybackIdForAsset(muxClient, assetId);
|
|
1455
|
+
const credentials = await validateCredentials(options, provider === "openai" ? "openai" : void 0);
|
|
1456
|
+
const { asset, playbackId, policy } = await getPlaybackIdForAsset(credentials, assetId);
|
|
1503
1457
|
const duration = asset.duration || 0;
|
|
1504
|
-
const signingContext = resolveSigningContext(options);
|
|
1458
|
+
const signingContext = await resolveSigningContext(options);
|
|
1505
1459
|
if (policy === "signed" && !signingContext) {
|
|
1506
1460
|
throw new Error(
|
|
1507
1461
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
@@ -1557,17 +1511,18 @@ async function getModerationScores(assetId, options = {}) {
|
|
|
1557
1511
|
}
|
|
1558
1512
|
|
|
1559
1513
|
// src/workflows/summarization.ts
|
|
1560
|
-
|
|
1561
|
-
|
|
1514
|
+
import { generateObject as generateObject3 } from "ai";
|
|
1515
|
+
import dedent2 from "dedent";
|
|
1516
|
+
import { z as z4 } from "zod";
|
|
1562
1517
|
var SUMMARY_KEYWORD_LIMIT = 10;
|
|
1563
|
-
var summarySchema =
|
|
1564
|
-
keywords:
|
|
1565
|
-
title:
|
|
1566
|
-
description:
|
|
1518
|
+
var summarySchema = z4.object({
|
|
1519
|
+
keywords: z4.array(z4.string()),
|
|
1520
|
+
title: z4.string(),
|
|
1521
|
+
description: z4.string()
|
|
1567
1522
|
});
|
|
1568
1523
|
var TONE_INSTRUCTIONS = {
|
|
1569
1524
|
normal: "Provide a clear, straightforward analysis.",
|
|
1570
|
-
sassy: "Answer with
|
|
1525
|
+
sassy: "Channel your inner diva! Answer with maximum sass, wit, and playful attitude. Don't hold back - be cheeky, clever, and delightfully snarky. Make it pop!",
|
|
1571
1526
|
professional: "Provide a professional, executive-level analysis suitable for business reporting."
|
|
1572
1527
|
};
|
|
1573
1528
|
var summarizationPromptBuilder = createPromptBuilder({
|
|
@@ -1578,7 +1533,7 @@ var summarizationPromptBuilder = createPromptBuilder({
|
|
|
1578
1533
|
},
|
|
1579
1534
|
title: {
|
|
1580
1535
|
tag: "title_requirements",
|
|
1581
|
-
content:
|
|
1536
|
+
content: dedent2`
|
|
1582
1537
|
A short, compelling headline that immediately communicates the subject or action.
|
|
1583
1538
|
Aim for brevity - typically under 10 words. Think of how a news headline or video card title would read.
|
|
1584
1539
|
Start with the primary subject, action, or topic - never begin with "A video of" or similar phrasing.
|
|
@@ -1586,7 +1541,7 @@ var summarizationPromptBuilder = createPromptBuilder({
|
|
|
1586
1541
|
},
|
|
1587
1542
|
description: {
|
|
1588
1543
|
tag: "description_requirements",
|
|
1589
|
-
content:
|
|
1544
|
+
content: dedent2`
|
|
1590
1545
|
A concise summary (2-4 sentences) that describes what happens across the video.
|
|
1591
1546
|
Cover the main subjects, actions, setting, and any notable progression visible across frames.
|
|
1592
1547
|
Write in present tense. Be specific about observable details rather than making assumptions.
|
|
@@ -1594,7 +1549,7 @@ var summarizationPromptBuilder = createPromptBuilder({
|
|
|
1594
1549
|
},
|
|
1595
1550
|
keywords: {
|
|
1596
1551
|
tag: "keywords_requirements",
|
|
1597
|
-
content:
|
|
1552
|
+
content: dedent2`
|
|
1598
1553
|
Specific, searchable terms (up to 10) that capture:
|
|
1599
1554
|
- Primary subjects (people, animals, objects)
|
|
1600
1555
|
- Actions and activities being performed
|
|
@@ -1606,7 +1561,7 @@ var summarizationPromptBuilder = createPromptBuilder({
|
|
|
1606
1561
|
},
|
|
1607
1562
|
qualityGuidelines: {
|
|
1608
1563
|
tag: "quality_guidelines",
|
|
1609
|
-
content:
|
|
1564
|
+
content: dedent2`
|
|
1610
1565
|
- Examine all frames to understand the full context and progression
|
|
1611
1566
|
- Be precise: "golden retriever" is better than "dog" when identifiable
|
|
1612
1567
|
- Capture the narrative: what begins, develops, and concludes
|
|
@@ -1615,7 +1570,7 @@ var summarizationPromptBuilder = createPromptBuilder({
|
|
|
1615
1570
|
},
|
|
1616
1571
|
sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
|
|
1617
1572
|
});
|
|
1618
|
-
var SYSTEM_PROMPT3 =
|
|
1573
|
+
var SYSTEM_PROMPT3 = dedent2`
|
|
1619
1574
|
<role>
|
|
1620
1575
|
You are a video content analyst specializing in storyboard interpretation and multimodal analysis.
|
|
1621
1576
|
</role>
|
|
@@ -1647,7 +1602,29 @@ var SYSTEM_PROMPT3 = dedent_default`
|
|
|
1647
1602
|
- Only describe what is clearly observable in the frames or explicitly stated in the transcript
|
|
1648
1603
|
- Do not fabricate details or make unsupported assumptions
|
|
1649
1604
|
- Return structured data matching the requested schema
|
|
1650
|
-
</constraints
|
|
1605
|
+
</constraints>
|
|
1606
|
+
|
|
1607
|
+
<tone_guidance>
|
|
1608
|
+
Pay special attention to the <tone> section and lean heavily into those instructions.
|
|
1609
|
+
Adapt your entire analysis and writing style to match the specified tone - this should influence
|
|
1610
|
+
your word choice, personality, formality level, and overall presentation of the content.
|
|
1611
|
+
The tone instructions are not suggestions but core requirements for how you should express yourself.
|
|
1612
|
+
</tone_guidance>
|
|
1613
|
+
|
|
1614
|
+
<language_guidelines>
|
|
1615
|
+
AVOID these meta-descriptive phrases that reference the medium rather than the content:
|
|
1616
|
+
- "The image shows..." / "The storyboard shows..."
|
|
1617
|
+
- "In this video..." / "This video features..."
|
|
1618
|
+
- "The frames depict..." / "The footage shows..."
|
|
1619
|
+
- "We can see..." / "You can see..."
|
|
1620
|
+
- "The clip shows..." / "The scene shows..."
|
|
1621
|
+
|
|
1622
|
+
INSTEAD, describe the content directly:
|
|
1623
|
+
- BAD: "The video shows a chef preparing a meal"
|
|
1624
|
+
- GOOD: "A chef prepares a meal in a professional kitchen"
|
|
1625
|
+
|
|
1626
|
+
Write as if describing reality, not describing a recording of reality.
|
|
1627
|
+
</language_guidelines>`;
|
|
1651
1628
|
function buildUserPrompt2({
|
|
1652
1629
|
tone,
|
|
1653
1630
|
transcriptText,
|
|
@@ -1661,8 +1638,41 @@ function buildUserPrompt2({
|
|
|
1661
1638
|
}
|
|
1662
1639
|
return summarizationPromptBuilder.buildWithContext(promptOverrides, contextSections);
|
|
1663
1640
|
}
|
|
1664
|
-
|
|
1665
|
-
|
|
1641
|
+
async function analyzeStoryboard2(imageDataUrl, workflowConfig, userPrompt, systemPrompt) {
|
|
1642
|
+
"use step";
|
|
1643
|
+
const model = createLanguageModelFromConfig(
|
|
1644
|
+
workflowConfig.provider,
|
|
1645
|
+
workflowConfig.modelId,
|
|
1646
|
+
workflowConfig.credentials
|
|
1647
|
+
);
|
|
1648
|
+
const response = await generateObject3({
|
|
1649
|
+
model,
|
|
1650
|
+
schema: summarySchema,
|
|
1651
|
+
messages: [
|
|
1652
|
+
{
|
|
1653
|
+
role: "system",
|
|
1654
|
+
content: systemPrompt
|
|
1655
|
+
},
|
|
1656
|
+
{
|
|
1657
|
+
role: "user",
|
|
1658
|
+
content: [
|
|
1659
|
+
{ type: "text", text: userPrompt },
|
|
1660
|
+
{ type: "image", image: imageDataUrl }
|
|
1661
|
+
]
|
|
1662
|
+
}
|
|
1663
|
+
]
|
|
1664
|
+
});
|
|
1665
|
+
return {
|
|
1666
|
+
result: response.object,
|
|
1667
|
+
usage: {
|
|
1668
|
+
inputTokens: response.usage.inputTokens,
|
|
1669
|
+
outputTokens: response.usage.outputTokens,
|
|
1670
|
+
totalTokens: response.usage.totalTokens,
|
|
1671
|
+
reasoningTokens: response.usage.reasoningTokens,
|
|
1672
|
+
cachedInputTokens: response.usage.cachedInputTokens
|
|
1673
|
+
}
|
|
1674
|
+
};
|
|
1675
|
+
}
|
|
1666
1676
|
function normalizeKeywords(keywords) {
|
|
1667
1677
|
if (!Array.isArray(keywords) || keywords.length === 0) {
|
|
1668
1678
|
return [];
|
|
@@ -1687,23 +1697,24 @@ function normalizeKeywords(keywords) {
|
|
|
1687
1697
|
return normalized;
|
|
1688
1698
|
}
|
|
1689
1699
|
async function getSummaryAndTags(assetId, options) {
|
|
1700
|
+
"use workflow";
|
|
1690
1701
|
const {
|
|
1691
|
-
provider =
|
|
1702
|
+
provider = "openai",
|
|
1692
1703
|
model,
|
|
1693
|
-
tone =
|
|
1704
|
+
tone = "normal",
|
|
1694
1705
|
includeTranscript = true,
|
|
1695
1706
|
cleanTranscript = true,
|
|
1696
1707
|
imageSubmissionMode = "url",
|
|
1697
1708
|
imageDownloadOptions,
|
|
1698
|
-
abortSignal,
|
|
1709
|
+
abortSignal: _abortSignal,
|
|
1699
1710
|
promptOverrides
|
|
1700
1711
|
} = options ?? {};
|
|
1701
|
-
const
|
|
1712
|
+
const config = await createWorkflowConfig(
|
|
1702
1713
|
{ ...options, model },
|
|
1703
1714
|
provider
|
|
1704
1715
|
);
|
|
1705
|
-
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(
|
|
1706
|
-
const signingContext = resolveSigningContext(options ?? {});
|
|
1716
|
+
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(config.credentials, assetId);
|
|
1717
|
+
const signingContext = await resolveSigningContext(options ?? {});
|
|
1707
1718
|
if (policy === "signed" && !signingContext) {
|
|
1708
1719
|
throw new Error(
|
|
1709
1720
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
@@ -1720,66 +1731,212 @@ async function getSummaryAndTags(assetId, options) {
|
|
|
1720
1731
|
promptOverrides
|
|
1721
1732
|
});
|
|
1722
1733
|
const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed" ? signingContext : void 0);
|
|
1723
|
-
|
|
1724
|
-
const response = await (0, import_ai4.generateObject)({
|
|
1725
|
-
model: clients.languageModel.model,
|
|
1726
|
-
schema: summarySchema,
|
|
1727
|
-
abortSignal,
|
|
1728
|
-
messages: [
|
|
1729
|
-
{
|
|
1730
|
-
role: "system",
|
|
1731
|
-
content: SYSTEM_PROMPT3
|
|
1732
|
-
},
|
|
1733
|
-
{
|
|
1734
|
-
role: "user",
|
|
1735
|
-
content: [
|
|
1736
|
-
{ type: "text", text: userPrompt },
|
|
1737
|
-
{ type: "image", image: imageDataUrl }
|
|
1738
|
-
]
|
|
1739
|
-
}
|
|
1740
|
-
]
|
|
1741
|
-
});
|
|
1742
|
-
return response.object;
|
|
1743
|
-
};
|
|
1744
|
-
let aiAnalysis = null;
|
|
1734
|
+
let analysisResponse;
|
|
1745
1735
|
try {
|
|
1746
1736
|
if (imageSubmissionMode === "base64") {
|
|
1747
1737
|
const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
|
|
1748
|
-
|
|
1738
|
+
analysisResponse = await analyzeStoryboard2(
|
|
1739
|
+
downloadResult.base64Data,
|
|
1740
|
+
config,
|
|
1741
|
+
userPrompt,
|
|
1742
|
+
SYSTEM_PROMPT3
|
|
1743
|
+
);
|
|
1749
1744
|
} else {
|
|
1750
|
-
|
|
1745
|
+
analysisResponse = await withRetry(() => analyzeStoryboard2(imageUrl, config, userPrompt, SYSTEM_PROMPT3));
|
|
1751
1746
|
}
|
|
1752
1747
|
} catch (error) {
|
|
1753
1748
|
throw new Error(
|
|
1754
1749
|
`Failed to analyze video content with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
1755
1750
|
);
|
|
1756
1751
|
}
|
|
1757
|
-
if (!
|
|
1752
|
+
if (!analysisResponse.result) {
|
|
1758
1753
|
throw new Error(`Failed to analyze video content for asset ${assetId}`);
|
|
1759
1754
|
}
|
|
1760
|
-
if (!
|
|
1755
|
+
if (!analysisResponse.result.title) {
|
|
1761
1756
|
throw new Error(`Failed to generate title for asset ${assetId}`);
|
|
1762
1757
|
}
|
|
1763
|
-
if (!
|
|
1758
|
+
if (!analysisResponse.result.description) {
|
|
1764
1759
|
throw new Error(`Failed to generate description for asset ${assetId}`);
|
|
1765
1760
|
}
|
|
1766
1761
|
return {
|
|
1767
1762
|
assetId,
|
|
1768
|
-
title:
|
|
1769
|
-
description:
|
|
1770
|
-
tags: normalizeKeywords(
|
|
1771
|
-
storyboardUrl: imageUrl
|
|
1763
|
+
title: analysisResponse.result.title,
|
|
1764
|
+
description: analysisResponse.result.description,
|
|
1765
|
+
tags: normalizeKeywords(analysisResponse.result.keywords),
|
|
1766
|
+
storyboardUrl: imageUrl,
|
|
1767
|
+
usage: analysisResponse.usage,
|
|
1768
|
+
transcriptText: transcriptText || void 0
|
|
1772
1769
|
};
|
|
1773
1770
|
}
|
|
1774
1771
|
|
|
1775
1772
|
// src/workflows/translate-audio.ts
|
|
1776
|
-
|
|
1777
|
-
|
|
1778
|
-
|
|
1779
|
-
var
|
|
1773
|
+
import Mux3 from "@mux/mux-node";
|
|
1774
|
+
|
|
1775
|
+
// src/lib/language-codes.ts
|
|
1776
|
+
var ISO639_1_TO_3 = {
|
|
1777
|
+
// Major world languages
|
|
1778
|
+
en: "eng",
|
|
1779
|
+
// English
|
|
1780
|
+
es: "spa",
|
|
1781
|
+
// Spanish
|
|
1782
|
+
fr: "fra",
|
|
1783
|
+
// French
|
|
1784
|
+
de: "deu",
|
|
1785
|
+
// German
|
|
1786
|
+
it: "ita",
|
|
1787
|
+
// Italian
|
|
1788
|
+
pt: "por",
|
|
1789
|
+
// Portuguese
|
|
1790
|
+
ru: "rus",
|
|
1791
|
+
// Russian
|
|
1792
|
+
zh: "zho",
|
|
1793
|
+
// Chinese
|
|
1794
|
+
ja: "jpn",
|
|
1795
|
+
// Japanese
|
|
1796
|
+
ko: "kor",
|
|
1797
|
+
// Korean
|
|
1798
|
+
ar: "ara",
|
|
1799
|
+
// Arabic
|
|
1800
|
+
hi: "hin",
|
|
1801
|
+
// Hindi
|
|
1802
|
+
// European languages
|
|
1803
|
+
nl: "nld",
|
|
1804
|
+
// Dutch
|
|
1805
|
+
pl: "pol",
|
|
1806
|
+
// Polish
|
|
1807
|
+
sv: "swe",
|
|
1808
|
+
// Swedish
|
|
1809
|
+
da: "dan",
|
|
1810
|
+
// Danish
|
|
1811
|
+
no: "nor",
|
|
1812
|
+
// Norwegian
|
|
1813
|
+
fi: "fin",
|
|
1814
|
+
// Finnish
|
|
1815
|
+
el: "ell",
|
|
1816
|
+
// Greek
|
|
1817
|
+
cs: "ces",
|
|
1818
|
+
// Czech
|
|
1819
|
+
hu: "hun",
|
|
1820
|
+
// Hungarian
|
|
1821
|
+
ro: "ron",
|
|
1822
|
+
// Romanian
|
|
1823
|
+
bg: "bul",
|
|
1824
|
+
// Bulgarian
|
|
1825
|
+
hr: "hrv",
|
|
1826
|
+
// Croatian
|
|
1827
|
+
sk: "slk",
|
|
1828
|
+
// Slovak
|
|
1829
|
+
sl: "slv",
|
|
1830
|
+
// Slovenian
|
|
1831
|
+
uk: "ukr",
|
|
1832
|
+
// Ukrainian
|
|
1833
|
+
tr: "tur",
|
|
1834
|
+
// Turkish
|
|
1835
|
+
// Asian languages
|
|
1836
|
+
th: "tha",
|
|
1837
|
+
// Thai
|
|
1838
|
+
vi: "vie",
|
|
1839
|
+
// Vietnamese
|
|
1840
|
+
id: "ind",
|
|
1841
|
+
// Indonesian
|
|
1842
|
+
ms: "msa",
|
|
1843
|
+
// Malay
|
|
1844
|
+
tl: "tgl",
|
|
1845
|
+
// Tagalog/Filipino
|
|
1846
|
+
// Other languages
|
|
1847
|
+
he: "heb",
|
|
1848
|
+
// Hebrew
|
|
1849
|
+
fa: "fas",
|
|
1850
|
+
// Persian/Farsi
|
|
1851
|
+
bn: "ben",
|
|
1852
|
+
// Bengali
|
|
1853
|
+
ta: "tam",
|
|
1854
|
+
// Tamil
|
|
1855
|
+
te: "tel",
|
|
1856
|
+
// Telugu
|
|
1857
|
+
mr: "mar",
|
|
1858
|
+
// Marathi
|
|
1859
|
+
gu: "guj",
|
|
1860
|
+
// Gujarati
|
|
1861
|
+
kn: "kan",
|
|
1862
|
+
// Kannada
|
|
1863
|
+
ml: "mal",
|
|
1864
|
+
// Malayalam
|
|
1865
|
+
pa: "pan",
|
|
1866
|
+
// Punjabi
|
|
1867
|
+
ur: "urd",
|
|
1868
|
+
// Urdu
|
|
1869
|
+
sw: "swa",
|
|
1870
|
+
// Swahili
|
|
1871
|
+
af: "afr",
|
|
1872
|
+
// Afrikaans
|
|
1873
|
+
ca: "cat",
|
|
1874
|
+
// Catalan
|
|
1875
|
+
eu: "eus",
|
|
1876
|
+
// Basque
|
|
1877
|
+
gl: "glg",
|
|
1878
|
+
// Galician
|
|
1879
|
+
is: "isl",
|
|
1880
|
+
// Icelandic
|
|
1881
|
+
et: "est",
|
|
1882
|
+
// Estonian
|
|
1883
|
+
lv: "lav",
|
|
1884
|
+
// Latvian
|
|
1885
|
+
lt: "lit"
|
|
1886
|
+
// Lithuanian
|
|
1887
|
+
};
|
|
1888
|
+
var ISO639_3_TO_1 = Object.fromEntries(
|
|
1889
|
+
Object.entries(ISO639_1_TO_3).map(([iso1, iso3]) => [iso3, iso1])
|
|
1890
|
+
);
|
|
1891
|
+
function toISO639_3(code) {
|
|
1892
|
+
const normalized = code.toLowerCase().trim();
|
|
1893
|
+
if (normalized.length === 3) {
|
|
1894
|
+
return normalized;
|
|
1895
|
+
}
|
|
1896
|
+
return ISO639_1_TO_3[normalized] ?? normalized;
|
|
1897
|
+
}
|
|
1898
|
+
function toISO639_1(code) {
|
|
1899
|
+
const normalized = code.toLowerCase().trim();
|
|
1900
|
+
if (normalized.length === 2) {
|
|
1901
|
+
return normalized;
|
|
1902
|
+
}
|
|
1903
|
+
return ISO639_3_TO_1[normalized] ?? normalized;
|
|
1904
|
+
}
|
|
1905
|
+
function getLanguageCodePair(code) {
|
|
1906
|
+
const normalized = code.toLowerCase().trim();
|
|
1907
|
+
if (normalized.length === 2) {
|
|
1908
|
+
return {
|
|
1909
|
+
iso639_1: normalized,
|
|
1910
|
+
iso639_3: toISO639_3(normalized)
|
|
1911
|
+
};
|
|
1912
|
+
} else if (normalized.length === 3) {
|
|
1913
|
+
return {
|
|
1914
|
+
iso639_1: toISO639_1(normalized),
|
|
1915
|
+
iso639_3: normalized
|
|
1916
|
+
};
|
|
1917
|
+
}
|
|
1918
|
+
return {
|
|
1919
|
+
iso639_1: normalized,
|
|
1920
|
+
iso639_3: normalized
|
|
1921
|
+
};
|
|
1922
|
+
}
|
|
1923
|
+
function getLanguageName(code) {
|
|
1924
|
+
const iso639_1 = toISO639_1(code);
|
|
1925
|
+
try {
|
|
1926
|
+
const displayNames = new Intl.DisplayNames(["en"], { type: "language" });
|
|
1927
|
+
return displayNames.of(iso639_1) ?? code.toUpperCase();
|
|
1928
|
+
} catch {
|
|
1929
|
+
return code.toUpperCase();
|
|
1930
|
+
}
|
|
1931
|
+
}
|
|
1932
|
+
|
|
1933
|
+
// src/workflows/translate-audio.ts
|
|
1780
1934
|
var STATIC_RENDITION_POLL_INTERVAL_MS = 5e3;
|
|
1781
1935
|
var STATIC_RENDITION_MAX_ATTEMPTS = 36;
|
|
1782
|
-
|
|
1936
|
+
async function sleep(ms) {
|
|
1937
|
+
"use step";
|
|
1938
|
+
await new Promise((resolve) => setTimeout(resolve, ms));
|
|
1939
|
+
}
|
|
1783
1940
|
function getReadyAudioStaticRendition(asset) {
|
|
1784
1941
|
const files = asset.static_renditions?.files;
|
|
1785
1942
|
if (!files || files.length === 0) {
|
|
@@ -1790,19 +1947,21 @@ function getReadyAudioStaticRendition(asset) {
|
|
|
1790
1947
|
);
|
|
1791
1948
|
}
|
|
1792
1949
|
var hasReadyAudioStaticRendition = (asset) => Boolean(getReadyAudioStaticRendition(asset));
|
|
1793
|
-
async function requestStaticRenditionCreation(
|
|
1794
|
-
|
|
1950
|
+
async function requestStaticRenditionCreation(credentials, assetId) {
|
|
1951
|
+
"use step";
|
|
1952
|
+
const mux = new Mux3({
|
|
1953
|
+
tokenId: credentials.muxTokenId,
|
|
1954
|
+
tokenSecret: credentials.muxTokenSecret
|
|
1955
|
+
});
|
|
1795
1956
|
try {
|
|
1796
|
-
await
|
|
1957
|
+
await mux.video.assets.createStaticRendition(assetId, {
|
|
1797
1958
|
resolution: "audio-only"
|
|
1798
1959
|
});
|
|
1799
|
-
console.log("\u{1F4FC} Static rendition request accepted by Mux.");
|
|
1800
1960
|
} catch (error) {
|
|
1801
1961
|
const statusCode = error?.status ?? error?.statusCode;
|
|
1802
1962
|
const messages = error?.error?.messages;
|
|
1803
1963
|
const alreadyDefined = messages?.some((message2) => message2.toLowerCase().includes("already defined")) ?? error?.message?.toLowerCase().includes("already defined");
|
|
1804
1964
|
if (statusCode === 409 || alreadyDefined) {
|
|
1805
|
-
console.log("\u2139\uFE0F Static rendition already requested. Waiting for it to finish...");
|
|
1806
1965
|
return;
|
|
1807
1966
|
}
|
|
1808
1967
|
const message = error instanceof Error ? error.message : "Unknown error";
|
|
@@ -1811,31 +1970,34 @@ async function requestStaticRenditionCreation(muxClient, assetId) {
|
|
|
1811
1970
|
}
|
|
1812
1971
|
async function waitForAudioStaticRendition({
|
|
1813
1972
|
assetId,
|
|
1814
|
-
|
|
1973
|
+
credentials,
|
|
1815
1974
|
initialAsset
|
|
1816
1975
|
}) {
|
|
1976
|
+
"use step";
|
|
1977
|
+
const mux = new Mux3({
|
|
1978
|
+
tokenId: credentials.muxTokenId,
|
|
1979
|
+
tokenSecret: credentials.muxTokenSecret
|
|
1980
|
+
});
|
|
1817
1981
|
let currentAsset = initialAsset;
|
|
1818
1982
|
if (hasReadyAudioStaticRendition(currentAsset)) {
|
|
1819
1983
|
return currentAsset;
|
|
1820
1984
|
}
|
|
1821
1985
|
const status = currentAsset.static_renditions?.status ?? "not_requested";
|
|
1822
1986
|
if (status === "not_requested" || status === void 0) {
|
|
1823
|
-
await requestStaticRenditionCreation(
|
|
1987
|
+
await requestStaticRenditionCreation(credentials, assetId);
|
|
1824
1988
|
} else if (status === "errored") {
|
|
1825
|
-
|
|
1826
|
-
await requestStaticRenditionCreation(muxClient, assetId);
|
|
1989
|
+
await requestStaticRenditionCreation(credentials, assetId);
|
|
1827
1990
|
} else {
|
|
1828
|
-
console.
|
|
1991
|
+
console.warn(`\u2139\uFE0F Static rendition already ${status}. Waiting for it to finish...`);
|
|
1829
1992
|
}
|
|
1830
1993
|
for (let attempt = 1; attempt <= STATIC_RENDITION_MAX_ATTEMPTS; attempt++) {
|
|
1831
|
-
await
|
|
1832
|
-
currentAsset = await
|
|
1994
|
+
await sleep(STATIC_RENDITION_POLL_INTERVAL_MS);
|
|
1995
|
+
currentAsset = await mux.video.assets.retrieve(assetId);
|
|
1833
1996
|
if (hasReadyAudioStaticRendition(currentAsset)) {
|
|
1834
|
-
console.log("\u2705 Audio static rendition is ready!");
|
|
1835
1997
|
return currentAsset;
|
|
1836
1998
|
}
|
|
1837
1999
|
const currentStatus = currentAsset.static_renditions?.status || "unknown";
|
|
1838
|
-
console.
|
|
2000
|
+
console.warn(
|
|
1839
2001
|
`\u231B Waiting for static rendition (attempt ${attempt}/${STATIC_RENDITION_MAX_ATTEMPTS}) \u2192 ${currentStatus}`
|
|
1840
2002
|
);
|
|
1841
2003
|
if (currentStatus === "errored") {
|
|
@@ -1848,55 +2010,180 @@ async function waitForAudioStaticRendition({
|
|
|
1848
2010
|
"Timed out waiting for the static rendition to become ready. Please try again in a moment."
|
|
1849
2011
|
);
|
|
1850
2012
|
}
|
|
2013
|
+
async function fetchAudioFromMux(audioUrl) {
|
|
2014
|
+
"use step";
|
|
2015
|
+
const audioResponse = await fetch(audioUrl);
|
|
2016
|
+
if (!audioResponse.ok) {
|
|
2017
|
+
throw new Error(`Failed to fetch audio file: ${audioResponse.statusText}`);
|
|
2018
|
+
}
|
|
2019
|
+
return audioResponse.arrayBuffer();
|
|
2020
|
+
}
|
|
2021
|
+
async function createElevenLabsDubbingJob({
|
|
2022
|
+
audioBuffer,
|
|
2023
|
+
assetId,
|
|
2024
|
+
elevenLabsLangCode,
|
|
2025
|
+
elevenLabsApiKey,
|
|
2026
|
+
numSpeakers
|
|
2027
|
+
}) {
|
|
2028
|
+
"use step";
|
|
2029
|
+
const audioBlob = new Blob([audioBuffer], { type: "audio/mp4" });
|
|
2030
|
+
const formData = new FormData();
|
|
2031
|
+
formData.append("file", audioBlob);
|
|
2032
|
+
formData.append("target_lang", elevenLabsLangCode);
|
|
2033
|
+
formData.append("num_speakers", numSpeakers.toString());
|
|
2034
|
+
formData.append("name", `Mux Asset ${assetId} - auto to ${elevenLabsLangCode}`);
|
|
2035
|
+
const dubbingResponse = await fetch("https://api.elevenlabs.io/v1/dubbing", {
|
|
2036
|
+
method: "POST",
|
|
2037
|
+
headers: {
|
|
2038
|
+
"xi-api-key": elevenLabsApiKey
|
|
2039
|
+
},
|
|
2040
|
+
body: formData
|
|
2041
|
+
});
|
|
2042
|
+
if (!dubbingResponse.ok) {
|
|
2043
|
+
throw new Error(`ElevenLabs API error: ${dubbingResponse.statusText}`);
|
|
2044
|
+
}
|
|
2045
|
+
const dubbingData = await dubbingResponse.json();
|
|
2046
|
+
return dubbingData.dubbing_id;
|
|
2047
|
+
}
|
|
2048
|
+
async function checkElevenLabsDubbingStatus({
|
|
2049
|
+
dubbingId,
|
|
2050
|
+
elevenLabsApiKey
|
|
2051
|
+
}) {
|
|
2052
|
+
"use step";
|
|
2053
|
+
const statusResponse = await fetch(`https://api.elevenlabs.io/v1/dubbing/${dubbingId}`, {
|
|
2054
|
+
headers: {
|
|
2055
|
+
"xi-api-key": elevenLabsApiKey
|
|
2056
|
+
}
|
|
2057
|
+
});
|
|
2058
|
+
if (!statusResponse.ok) {
|
|
2059
|
+
throw new Error(`Status check failed: ${statusResponse.statusText}`);
|
|
2060
|
+
}
|
|
2061
|
+
const statusData = await statusResponse.json();
|
|
2062
|
+
return {
|
|
2063
|
+
status: statusData.status,
|
|
2064
|
+
targetLanguages: statusData.target_languages ?? []
|
|
2065
|
+
};
|
|
2066
|
+
}
|
|
2067
|
+
async function downloadDubbedAudioFromElevenLabs({
|
|
2068
|
+
dubbingId,
|
|
2069
|
+
languageCode,
|
|
2070
|
+
elevenLabsApiKey
|
|
2071
|
+
}) {
|
|
2072
|
+
"use step";
|
|
2073
|
+
const audioUrl = `https://api.elevenlabs.io/v1/dubbing/${dubbingId}/audio/${languageCode}`;
|
|
2074
|
+
const audioResponse = await fetch(audioUrl, {
|
|
2075
|
+
headers: {
|
|
2076
|
+
"xi-api-key": elevenLabsApiKey
|
|
2077
|
+
}
|
|
2078
|
+
});
|
|
2079
|
+
if (!audioResponse.ok) {
|
|
2080
|
+
throw new Error(`Failed to fetch dubbed audio: ${audioResponse.statusText}`);
|
|
2081
|
+
}
|
|
2082
|
+
return audioResponse.arrayBuffer();
|
|
2083
|
+
}
|
|
2084
|
+
async function uploadDubbedAudioToS3({
|
|
2085
|
+
dubbedAudioBuffer,
|
|
2086
|
+
assetId,
|
|
2087
|
+
toLanguageCode,
|
|
2088
|
+
s3Endpoint,
|
|
2089
|
+
s3Region,
|
|
2090
|
+
s3Bucket,
|
|
2091
|
+
s3AccessKeyId,
|
|
2092
|
+
s3SecretAccessKey
|
|
2093
|
+
}) {
|
|
2094
|
+
"use step";
|
|
2095
|
+
const { S3Client, GetObjectCommand } = await import("@aws-sdk/client-s3");
|
|
2096
|
+
const { Upload } = await import("@aws-sdk/lib-storage");
|
|
2097
|
+
const { getSignedUrl } = await import("@aws-sdk/s3-request-presigner");
|
|
2098
|
+
const s3Client = new S3Client({
|
|
2099
|
+
region: s3Region,
|
|
2100
|
+
endpoint: s3Endpoint,
|
|
2101
|
+
credentials: {
|
|
2102
|
+
accessKeyId: s3AccessKeyId,
|
|
2103
|
+
secretAccessKey: s3SecretAccessKey
|
|
2104
|
+
},
|
|
2105
|
+
forcePathStyle: true
|
|
2106
|
+
});
|
|
2107
|
+
const audioKey = `audio-translations/${assetId}/auto-to-${toLanguageCode}-${Date.now()}.m4a`;
|
|
2108
|
+
const upload = new Upload({
|
|
2109
|
+
client: s3Client,
|
|
2110
|
+
params: {
|
|
2111
|
+
Bucket: s3Bucket,
|
|
2112
|
+
Key: audioKey,
|
|
2113
|
+
Body: new Uint8Array(dubbedAudioBuffer),
|
|
2114
|
+
ContentType: "audio/mp4"
|
|
2115
|
+
}
|
|
2116
|
+
});
|
|
2117
|
+
await upload.done();
|
|
2118
|
+
const getObjectCommand = new GetObjectCommand({
|
|
2119
|
+
Bucket: s3Bucket,
|
|
2120
|
+
Key: audioKey
|
|
2121
|
+
});
|
|
2122
|
+
const presignedUrl = await getSignedUrl(s3Client, getObjectCommand, {
|
|
2123
|
+
expiresIn: 3600
|
|
2124
|
+
// 1 hour
|
|
2125
|
+
});
|
|
2126
|
+
console.warn(`\u2705 Audio uploaded successfully to: ${audioKey}`);
|
|
2127
|
+
console.warn(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
|
|
2128
|
+
return presignedUrl;
|
|
2129
|
+
}
|
|
2130
|
+
async function createAudioTrackOnMux(credentials, assetId, languageCode, presignedUrl) {
|
|
2131
|
+
"use step";
|
|
2132
|
+
const mux = new Mux3({
|
|
2133
|
+
tokenId: credentials.muxTokenId,
|
|
2134
|
+
tokenSecret: credentials.muxTokenSecret
|
|
2135
|
+
});
|
|
2136
|
+
const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(languageCode) || languageCode.toUpperCase();
|
|
2137
|
+
const trackName = `${languageName} (auto-dubbed)`;
|
|
2138
|
+
const trackResponse = await mux.video.assets.createTrack(assetId, {
|
|
2139
|
+
type: "audio",
|
|
2140
|
+
language_code: languageCode,
|
|
2141
|
+
name: trackName,
|
|
2142
|
+
url: presignedUrl
|
|
2143
|
+
});
|
|
2144
|
+
if (!trackResponse.id) {
|
|
2145
|
+
throw new Error("Failed to create audio track: no track ID returned from Mux");
|
|
2146
|
+
}
|
|
2147
|
+
return trackResponse.id;
|
|
2148
|
+
}
|
|
1851
2149
|
async function translateAudio(assetId, toLanguageCode, options = {}) {
|
|
2150
|
+
"use workflow";
|
|
1852
2151
|
const {
|
|
1853
2152
|
provider = "elevenlabs",
|
|
1854
2153
|
numSpeakers = 0,
|
|
1855
2154
|
// 0 = auto-detect
|
|
1856
|
-
muxTokenId,
|
|
1857
|
-
muxTokenSecret,
|
|
1858
2155
|
elevenLabsApiKey,
|
|
1859
2156
|
uploadToMux = true
|
|
1860
2157
|
} = options;
|
|
1861
2158
|
if (provider !== "elevenlabs") {
|
|
1862
2159
|
throw new Error("Only ElevenLabs provider is currently supported for audio translation");
|
|
1863
2160
|
}
|
|
1864
|
-
const
|
|
1865
|
-
const muxSecret = muxTokenSecret ?? env_default.MUX_TOKEN_SECRET;
|
|
2161
|
+
const credentials = await validateCredentials(options);
|
|
1866
2162
|
const elevenLabsKey = elevenLabsApiKey ?? env_default.ELEVENLABS_API_KEY;
|
|
1867
2163
|
const s3Endpoint = options.s3Endpoint ?? env_default.S3_ENDPOINT;
|
|
1868
2164
|
const s3Region = options.s3Region ?? env_default.S3_REGION ?? "auto";
|
|
1869
2165
|
const s3Bucket = options.s3Bucket ?? env_default.S3_BUCKET;
|
|
1870
2166
|
const s3AccessKeyId = options.s3AccessKeyId ?? env_default.S3_ACCESS_KEY_ID;
|
|
1871
2167
|
const s3SecretAccessKey = options.s3SecretAccessKey ?? env_default.S3_SECRET_ACCESS_KEY;
|
|
1872
|
-
if (!muxId || !muxSecret) {
|
|
1873
|
-
throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
|
|
1874
|
-
}
|
|
1875
2168
|
if (!elevenLabsKey) {
|
|
1876
2169
|
throw new Error("ElevenLabs API key is required. Provide elevenLabsApiKey in options or set ELEVENLABS_API_KEY environment variable.");
|
|
1877
2170
|
}
|
|
1878
2171
|
if (uploadToMux && (!s3Endpoint || !s3Bucket || !s3AccessKeyId || !s3SecretAccessKey)) {
|
|
1879
2172
|
throw new Error("S3 configuration is required for uploading to Mux. Provide s3Endpoint, s3Bucket, s3AccessKeyId, and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables.");
|
|
1880
2173
|
}
|
|
1881
|
-
const
|
|
1882
|
-
|
|
1883
|
-
tokenSecret: muxSecret
|
|
1884
|
-
});
|
|
1885
|
-
console.log(`\u{1F3AC} Fetching Mux asset: ${assetId}`);
|
|
1886
|
-
const { asset: initialAsset, playbackId, policy } = await getPlaybackIdForAsset(mux, assetId);
|
|
1887
|
-
const signingContext = resolveSigningContext(options);
|
|
2174
|
+
const { asset: initialAsset, playbackId, policy } = await getPlaybackIdForAsset(credentials, assetId);
|
|
2175
|
+
const signingContext = await resolveSigningContext(options);
|
|
1888
2176
|
if (policy === "signed" && !signingContext) {
|
|
1889
2177
|
throw new Error(
|
|
1890
2178
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
1891
2179
|
);
|
|
1892
2180
|
}
|
|
1893
|
-
console.log("\u{1F50D} Checking for audio-only static rendition...");
|
|
1894
2181
|
let currentAsset = initialAsset;
|
|
1895
2182
|
if (!hasReadyAudioStaticRendition(currentAsset)) {
|
|
1896
|
-
console.
|
|
2183
|
+
console.warn("\u274C No ready audio static rendition found. Requesting one now...");
|
|
1897
2184
|
currentAsset = await waitForAudioStaticRendition({
|
|
1898
2185
|
assetId,
|
|
1899
|
-
|
|
2186
|
+
credentials,
|
|
1900
2187
|
initialAsset: currentAsset
|
|
1901
2188
|
});
|
|
1902
2189
|
}
|
|
@@ -1910,58 +2197,44 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
|
|
|
1910
2197
|
if (policy === "signed" && signingContext) {
|
|
1911
2198
|
audioUrl = await signUrl(audioUrl, playbackId, signingContext, "video");
|
|
1912
2199
|
}
|
|
1913
|
-
console.
|
|
1914
|
-
|
|
2200
|
+
console.warn("\u{1F399}\uFE0F Fetching audio from Mux...");
|
|
2201
|
+
let audioBuffer;
|
|
2202
|
+
try {
|
|
2203
|
+
audioBuffer = await fetchAudioFromMux(audioUrl);
|
|
2204
|
+
} catch (error) {
|
|
2205
|
+
throw new Error(`Failed to fetch audio from Mux: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2206
|
+
}
|
|
2207
|
+
console.warn("\u{1F399}\uFE0F Creating dubbing job in ElevenLabs...");
|
|
2208
|
+
const elevenLabsLangCode = toISO639_3(toLanguageCode);
|
|
2209
|
+
console.warn(`\u{1F50D} Creating dubbing job for asset ${assetId} with language code: ${elevenLabsLangCode}`);
|
|
1915
2210
|
let dubbingId;
|
|
1916
2211
|
try {
|
|
1917
|
-
|
|
1918
|
-
|
|
1919
|
-
|
|
1920
|
-
|
|
1921
|
-
|
|
1922
|
-
|
|
1923
|
-
const audioFile = audioBlob;
|
|
1924
|
-
const formData = new FormData();
|
|
1925
|
-
formData.append("file", audioFile);
|
|
1926
|
-
formData.append("target_lang", toLanguageCode);
|
|
1927
|
-
formData.append("num_speakers", numSpeakers.toString());
|
|
1928
|
-
formData.append("name", `Mux Asset ${assetId} - auto to ${toLanguageCode}`);
|
|
1929
|
-
const dubbingResponse = await fetch("https://api.elevenlabs.io/v1/dubbing", {
|
|
1930
|
-
method: "POST",
|
|
1931
|
-
headers: {
|
|
1932
|
-
"xi-api-key": elevenLabsKey
|
|
1933
|
-
},
|
|
1934
|
-
body: formData
|
|
2212
|
+
dubbingId = await createElevenLabsDubbingJob({
|
|
2213
|
+
audioBuffer,
|
|
2214
|
+
assetId,
|
|
2215
|
+
elevenLabsLangCode,
|
|
2216
|
+
elevenLabsApiKey: elevenLabsKey,
|
|
2217
|
+
numSpeakers
|
|
1935
2218
|
});
|
|
1936
|
-
|
|
1937
|
-
throw new Error(`ElevenLabs API error: ${dubbingResponse.statusText}`);
|
|
1938
|
-
}
|
|
1939
|
-
const dubbingData = await dubbingResponse.json();
|
|
1940
|
-
dubbingId = dubbingData.dubbing_id;
|
|
1941
|
-
console.log(`\u2705 Dubbing job created: ${dubbingId}`);
|
|
1942
|
-
console.log(`\u23F1\uFE0F Expected duration: ${dubbingData.expected_duration_sec}s`);
|
|
2219
|
+
console.warn(`\u2705 Dubbing job created with ID: ${dubbingId}`);
|
|
1943
2220
|
} catch (error) {
|
|
1944
2221
|
throw new Error(`Failed to create ElevenLabs dubbing job: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1945
2222
|
}
|
|
1946
|
-
console.
|
|
2223
|
+
console.warn("\u23F3 Waiting for dubbing to complete...");
|
|
1947
2224
|
let dubbingStatus = "dubbing";
|
|
1948
2225
|
let pollAttempts = 0;
|
|
1949
2226
|
const maxPollAttempts = 180;
|
|
2227
|
+
let targetLanguages = [];
|
|
1950
2228
|
while (dubbingStatus === "dubbing" && pollAttempts < maxPollAttempts) {
|
|
1951
|
-
await
|
|
2229
|
+
await sleep(1e4);
|
|
1952
2230
|
pollAttempts++;
|
|
1953
2231
|
try {
|
|
1954
|
-
const
|
|
1955
|
-
|
|
1956
|
-
|
|
1957
|
-
}
|
|
2232
|
+
const statusResult = await checkElevenLabsDubbingStatus({
|
|
2233
|
+
dubbingId,
|
|
2234
|
+
elevenLabsApiKey: elevenLabsKey
|
|
1958
2235
|
});
|
|
1959
|
-
|
|
1960
|
-
|
|
1961
|
-
}
|
|
1962
|
-
const statusData = await statusResponse.json();
|
|
1963
|
-
dubbingStatus = statusData.status;
|
|
1964
|
-
console.log(`\u{1F4CA} Status check ${pollAttempts}: ${dubbingStatus}`);
|
|
2236
|
+
dubbingStatus = statusResult.status;
|
|
2237
|
+
targetLanguages = statusResult.targetLanguages;
|
|
1965
2238
|
if (dubbingStatus === "failed") {
|
|
1966
2239
|
throw new Error("ElevenLabs dubbing job failed");
|
|
1967
2240
|
}
|
|
@@ -1972,89 +2245,77 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
|
|
|
1972
2245
|
if (dubbingStatus !== "dubbed") {
|
|
1973
2246
|
throw new Error(`Dubbing job timed out or failed. Final status: ${dubbingStatus}`);
|
|
1974
2247
|
}
|
|
1975
|
-
console.
|
|
2248
|
+
console.warn("\u2705 Dubbing completed successfully!");
|
|
1976
2249
|
if (!uploadToMux) {
|
|
2250
|
+
const targetLanguage2 = getLanguageCodePair(toLanguageCode);
|
|
1977
2251
|
return {
|
|
1978
2252
|
assetId,
|
|
1979
|
-
targetLanguageCode:
|
|
2253
|
+
targetLanguageCode: targetLanguage2.iso639_1,
|
|
2254
|
+
targetLanguage: targetLanguage2,
|
|
1980
2255
|
dubbingId
|
|
1981
2256
|
};
|
|
1982
2257
|
}
|
|
1983
|
-
console.
|
|
2258
|
+
console.warn("\u{1F4E5} Downloading dubbed audio from ElevenLabs...");
|
|
1984
2259
|
let dubbedAudioBuffer;
|
|
1985
2260
|
try {
|
|
1986
|
-
const
|
|
1987
|
-
|
|
1988
|
-
|
|
1989
|
-
|
|
1990
|
-
|
|
1991
|
-
|
|
1992
|
-
if (!
|
|
1993
|
-
|
|
2261
|
+
const requestedLangCode = toISO639_3(toLanguageCode);
|
|
2262
|
+
let downloadLangCode = targetLanguages.find(
|
|
2263
|
+
(lang) => lang === requestedLangCode
|
|
2264
|
+
) ?? targetLanguages.find(
|
|
2265
|
+
(lang) => lang.toLowerCase() === requestedLangCode.toLowerCase()
|
|
2266
|
+
);
|
|
2267
|
+
if (!downloadLangCode && targetLanguages.length > 0) {
|
|
2268
|
+
downloadLangCode = targetLanguages[0];
|
|
2269
|
+
console.warn(`\u26A0\uFE0F Requested language "${requestedLangCode}" not found in target_languages. Using "${downloadLangCode}" instead.`);
|
|
2270
|
+
}
|
|
2271
|
+
if (!downloadLangCode) {
|
|
2272
|
+
downloadLangCode = requestedLangCode;
|
|
2273
|
+
console.warn(`\u26A0\uFE0F No target_languages available from ElevenLabs status. Using requested language code: ${requestedLangCode}`);
|
|
1994
2274
|
}
|
|
1995
|
-
dubbedAudioBuffer = await
|
|
1996
|
-
|
|
2275
|
+
dubbedAudioBuffer = await downloadDubbedAudioFromElevenLabs({
|
|
2276
|
+
dubbingId,
|
|
2277
|
+
languageCode: downloadLangCode,
|
|
2278
|
+
elevenLabsApiKey: elevenLabsKey
|
|
2279
|
+
});
|
|
2280
|
+
console.warn("\u2705 Dubbed audio downloaded successfully!");
|
|
1997
2281
|
} catch (error) {
|
|
1998
2282
|
throw new Error(`Failed to download dubbed audio: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1999
2283
|
}
|
|
2000
|
-
console.
|
|
2001
|
-
const s3Client = new import_client_s3.S3Client({
|
|
2002
|
-
region: s3Region,
|
|
2003
|
-
endpoint: s3Endpoint,
|
|
2004
|
-
credentials: {
|
|
2005
|
-
accessKeyId: s3AccessKeyId,
|
|
2006
|
-
secretAccessKey: s3SecretAccessKey
|
|
2007
|
-
},
|
|
2008
|
-
forcePathStyle: true
|
|
2009
|
-
});
|
|
2010
|
-
const audioKey = `audio-translations/${assetId}/auto-to-${toLanguageCode}-${Date.now()}.m4a`;
|
|
2284
|
+
console.warn("\u{1F4E4} Uploading dubbed audio to S3-compatible storage...");
|
|
2011
2285
|
let presignedUrl;
|
|
2012
2286
|
try {
|
|
2013
|
-
|
|
2014
|
-
|
|
2015
|
-
|
|
2016
|
-
|
|
2017
|
-
|
|
2018
|
-
|
|
2019
|
-
|
|
2020
|
-
|
|
2021
|
-
|
|
2022
|
-
await upload.done();
|
|
2023
|
-
console.log(`\u2705 Audio uploaded successfully to: ${audioKey}`);
|
|
2024
|
-
const getObjectCommand = new import_client_s3.GetObjectCommand({
|
|
2025
|
-
Bucket: s3Bucket,
|
|
2026
|
-
Key: audioKey
|
|
2027
|
-
});
|
|
2028
|
-
presignedUrl = await (0, import_s3_request_presigner.getSignedUrl)(s3Client, getObjectCommand, {
|
|
2029
|
-
expiresIn: 3600
|
|
2030
|
-
// 1 hour
|
|
2287
|
+
presignedUrl = await uploadDubbedAudioToS3({
|
|
2288
|
+
dubbedAudioBuffer,
|
|
2289
|
+
assetId,
|
|
2290
|
+
toLanguageCode,
|
|
2291
|
+
s3Endpoint,
|
|
2292
|
+
s3Region,
|
|
2293
|
+
s3Bucket,
|
|
2294
|
+
s3AccessKeyId,
|
|
2295
|
+
s3SecretAccessKey
|
|
2031
2296
|
});
|
|
2032
|
-
console.log(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
|
|
2033
2297
|
} catch (error) {
|
|
2034
2298
|
throw new Error(`Failed to upload audio to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2035
2299
|
}
|
|
2036
|
-
console.
|
|
2300
|
+
console.warn("\u{1F4F9} Adding dubbed audio track to Mux asset...");
|
|
2037
2301
|
let uploadedTrackId;
|
|
2302
|
+
const muxLangCode = toISO639_1(toLanguageCode);
|
|
2038
2303
|
try {
|
|
2039
|
-
|
|
2304
|
+
uploadedTrackId = await createAudioTrackOnMux(credentials, assetId, muxLangCode, presignedUrl);
|
|
2305
|
+
const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(muxLangCode) || muxLangCode.toUpperCase();
|
|
2040
2306
|
const trackName = `${languageName} (auto-dubbed)`;
|
|
2041
|
-
|
|
2042
|
-
|
|
2043
|
-
language_code: toLanguageCode,
|
|
2044
|
-
name: trackName,
|
|
2045
|
-
url: presignedUrl
|
|
2046
|
-
});
|
|
2047
|
-
uploadedTrackId = trackResponse.id;
|
|
2048
|
-
console.log(`\u2705 Audio track added to Mux asset with ID: ${uploadedTrackId}`);
|
|
2049
|
-
console.log(`\u{1F3B5} Track name: "${trackName}"`);
|
|
2307
|
+
console.warn(`\u2705 Track added to Mux asset with ID: ${uploadedTrackId}`);
|
|
2308
|
+
console.warn(`\u{1F4CB} Track name: "${trackName}"`);
|
|
2050
2309
|
} catch (error) {
|
|
2051
2310
|
console.warn(`\u26A0\uFE0F Failed to add audio track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2052
|
-
console.
|
|
2053
|
-
console.
|
|
2311
|
+
console.warn("\u{1F517} You can manually add the track using this presigned URL:");
|
|
2312
|
+
console.warn(presignedUrl);
|
|
2054
2313
|
}
|
|
2314
|
+
const targetLanguage = getLanguageCodePair(toLanguageCode);
|
|
2055
2315
|
return {
|
|
2056
2316
|
assetId,
|
|
2057
|
-
targetLanguageCode:
|
|
2317
|
+
targetLanguageCode: targetLanguage.iso639_1,
|
|
2318
|
+
targetLanguage,
|
|
2058
2319
|
dubbingId,
|
|
2059
2320
|
uploadedTrackId,
|
|
2060
2321
|
presignedUrl
|
|
@@ -2062,43 +2323,149 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
|
|
|
2062
2323
|
}
|
|
2063
2324
|
|
|
2064
2325
|
// src/workflows/translate-captions.ts
|
|
2065
|
-
|
|
2066
|
-
|
|
2067
|
-
|
|
2068
|
-
var
|
|
2069
|
-
|
|
2070
|
-
var translationSchema = import_zod5.z.object({
|
|
2071
|
-
translation: import_zod5.z.string()
|
|
2326
|
+
import Mux4 from "@mux/mux-node";
|
|
2327
|
+
import { generateObject as generateObject4 } from "ai";
|
|
2328
|
+
import { z as z5 } from "zod";
|
|
2329
|
+
var translationSchema = z5.object({
|
|
2330
|
+
translation: z5.string()
|
|
2072
2331
|
});
|
|
2073
|
-
|
|
2332
|
+
async function fetchVttFromMux(vttUrl) {
|
|
2333
|
+
"use step";
|
|
2334
|
+
const vttResponse = await fetch(vttUrl);
|
|
2335
|
+
if (!vttResponse.ok) {
|
|
2336
|
+
throw new Error(`Failed to fetch VTT file: ${vttResponse.statusText}`);
|
|
2337
|
+
}
|
|
2338
|
+
return vttResponse.text();
|
|
2339
|
+
}
|
|
2340
|
+
async function translateVttWithAI({
|
|
2341
|
+
vttContent,
|
|
2342
|
+
fromLanguageCode,
|
|
2343
|
+
toLanguageCode,
|
|
2344
|
+
provider,
|
|
2345
|
+
modelId,
|
|
2346
|
+
credentials,
|
|
2347
|
+
abortSignal
|
|
2348
|
+
}) {
|
|
2349
|
+
"use step";
|
|
2350
|
+
const languageModel = createLanguageModelFromConfig(
|
|
2351
|
+
provider,
|
|
2352
|
+
modelId,
|
|
2353
|
+
credentials
|
|
2354
|
+
);
|
|
2355
|
+
const response = await generateObject4({
|
|
2356
|
+
model: languageModel,
|
|
2357
|
+
schema: translationSchema,
|
|
2358
|
+
abortSignal,
|
|
2359
|
+
messages: [
|
|
2360
|
+
{
|
|
2361
|
+
role: "user",
|
|
2362
|
+
content: `Translate the following VTT subtitle file from ${fromLanguageCode} to ${toLanguageCode}. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT.
|
|
2363
|
+
|
|
2364
|
+
${vttContent}`
|
|
2365
|
+
}
|
|
2366
|
+
]
|
|
2367
|
+
});
|
|
2368
|
+
return {
|
|
2369
|
+
translatedVtt: response.object.translation,
|
|
2370
|
+
usage: {
|
|
2371
|
+
inputTokens: response.usage.inputTokens,
|
|
2372
|
+
outputTokens: response.usage.outputTokens,
|
|
2373
|
+
totalTokens: response.usage.totalTokens,
|
|
2374
|
+
reasoningTokens: response.usage.reasoningTokens,
|
|
2375
|
+
cachedInputTokens: response.usage.cachedInputTokens
|
|
2376
|
+
}
|
|
2377
|
+
};
|
|
2378
|
+
}
|
|
2379
|
+
async function uploadVttToS3({
|
|
2380
|
+
translatedVtt,
|
|
2381
|
+
assetId,
|
|
2382
|
+
fromLanguageCode,
|
|
2383
|
+
toLanguageCode,
|
|
2384
|
+
s3Endpoint,
|
|
2385
|
+
s3Region,
|
|
2386
|
+
s3Bucket,
|
|
2387
|
+
s3AccessKeyId,
|
|
2388
|
+
s3SecretAccessKey
|
|
2389
|
+
}) {
|
|
2390
|
+
"use step";
|
|
2391
|
+
const { S3Client, GetObjectCommand } = await import("@aws-sdk/client-s3");
|
|
2392
|
+
const { Upload } = await import("@aws-sdk/lib-storage");
|
|
2393
|
+
const { getSignedUrl } = await import("@aws-sdk/s3-request-presigner");
|
|
2394
|
+
const s3Client = new S3Client({
|
|
2395
|
+
region: s3Region,
|
|
2396
|
+
endpoint: s3Endpoint,
|
|
2397
|
+
credentials: {
|
|
2398
|
+
accessKeyId: s3AccessKeyId,
|
|
2399
|
+
secretAccessKey: s3SecretAccessKey
|
|
2400
|
+
},
|
|
2401
|
+
forcePathStyle: true
|
|
2402
|
+
});
|
|
2403
|
+
const vttKey = `translations/${assetId}/${fromLanguageCode}-to-${toLanguageCode}-${Date.now()}.vtt`;
|
|
2404
|
+
const upload = new Upload({
|
|
2405
|
+
client: s3Client,
|
|
2406
|
+
params: {
|
|
2407
|
+
Bucket: s3Bucket,
|
|
2408
|
+
Key: vttKey,
|
|
2409
|
+
Body: translatedVtt,
|
|
2410
|
+
ContentType: "text/vtt"
|
|
2411
|
+
}
|
|
2412
|
+
});
|
|
2413
|
+
await upload.done();
|
|
2414
|
+
const getObjectCommand = new GetObjectCommand({
|
|
2415
|
+
Bucket: s3Bucket,
|
|
2416
|
+
Key: vttKey
|
|
2417
|
+
});
|
|
2418
|
+
const presignedUrl = await getSignedUrl(s3Client, getObjectCommand, {
|
|
2419
|
+
expiresIn: 3600
|
|
2420
|
+
// 1 hour
|
|
2421
|
+
});
|
|
2422
|
+
return presignedUrl;
|
|
2423
|
+
}
|
|
2424
|
+
async function createTextTrackOnMux(credentials, assetId, languageCode, trackName, presignedUrl) {
|
|
2425
|
+
"use step";
|
|
2426
|
+
const mux = new Mux4({
|
|
2427
|
+
tokenId: credentials.muxTokenId,
|
|
2428
|
+
tokenSecret: credentials.muxTokenSecret
|
|
2429
|
+
});
|
|
2430
|
+
const trackResponse = await mux.video.assets.createTrack(assetId, {
|
|
2431
|
+
type: "text",
|
|
2432
|
+
text_type: "subtitles",
|
|
2433
|
+
language_code: languageCode,
|
|
2434
|
+
name: trackName,
|
|
2435
|
+
url: presignedUrl
|
|
2436
|
+
});
|
|
2437
|
+
if (!trackResponse.id) {
|
|
2438
|
+
throw new Error("Failed to create text track: no track ID returned from Mux");
|
|
2439
|
+
}
|
|
2440
|
+
return trackResponse.id;
|
|
2441
|
+
}
|
|
2074
2442
|
async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, options) {
|
|
2443
|
+
"use workflow";
|
|
2075
2444
|
const {
|
|
2076
|
-
provider =
|
|
2445
|
+
provider = "openai",
|
|
2077
2446
|
model,
|
|
2078
2447
|
s3Endpoint: providedS3Endpoint,
|
|
2079
2448
|
s3Region: providedS3Region,
|
|
2080
2449
|
s3Bucket: providedS3Bucket,
|
|
2081
2450
|
s3AccessKeyId: providedS3AccessKeyId,
|
|
2082
2451
|
s3SecretAccessKey: providedS3SecretAccessKey,
|
|
2083
|
-
uploadToMux: uploadToMuxOption
|
|
2084
|
-
...clientConfig
|
|
2452
|
+
uploadToMux: uploadToMuxOption
|
|
2085
2453
|
} = options;
|
|
2086
|
-
const resolvedProvider = provider;
|
|
2087
2454
|
const s3Endpoint = providedS3Endpoint ?? env_default.S3_ENDPOINT;
|
|
2088
2455
|
const s3Region = providedS3Region ?? env_default.S3_REGION ?? "auto";
|
|
2089
2456
|
const s3Bucket = providedS3Bucket ?? env_default.S3_BUCKET;
|
|
2090
2457
|
const s3AccessKeyId = providedS3AccessKeyId ?? env_default.S3_ACCESS_KEY_ID;
|
|
2091
2458
|
const s3SecretAccessKey = providedS3SecretAccessKey ?? env_default.S3_SECRET_ACCESS_KEY;
|
|
2092
2459
|
const uploadToMux = uploadToMuxOption !== false;
|
|
2093
|
-
const
|
|
2094
|
-
{ ...
|
|
2095
|
-
|
|
2460
|
+
const config = await createWorkflowConfig(
|
|
2461
|
+
{ ...options, model },
|
|
2462
|
+
provider
|
|
2096
2463
|
);
|
|
2097
2464
|
if (uploadToMux && (!s3Endpoint || !s3Bucket || !s3AccessKeyId || !s3SecretAccessKey)) {
|
|
2098
2465
|
throw new Error("S3 configuration is required for uploading to Mux. Provide s3Endpoint, s3Bucket, s3AccessKeyId, and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables.");
|
|
2099
2466
|
}
|
|
2100
|
-
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(
|
|
2101
|
-
const signingContext = resolveSigningContext(options);
|
|
2467
|
+
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(config.credentials, assetId);
|
|
2468
|
+
const signingContext = await resolveSigningContext(options);
|
|
2102
2469
|
if (policy === "signed" && !signingContext) {
|
|
2103
2470
|
throw new Error(
|
|
2104
2471
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
@@ -2119,120 +2486,84 @@ async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, opti
|
|
|
2119
2486
|
}
|
|
2120
2487
|
let vttContent;
|
|
2121
2488
|
try {
|
|
2122
|
-
|
|
2123
|
-
if (!vttResponse.ok) {
|
|
2124
|
-
throw new Error(`Failed to fetch VTT file: ${vttResponse.statusText}`);
|
|
2125
|
-
}
|
|
2126
|
-
vttContent = await vttResponse.text();
|
|
2489
|
+
vttContent = await fetchVttFromMux(vttUrl);
|
|
2127
2490
|
} catch (error) {
|
|
2128
2491
|
throw new Error(`Failed to fetch VTT content: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2129
2492
|
}
|
|
2130
|
-
console.log(`\u2705 Found VTT content for language '${fromLanguageCode}'`);
|
|
2131
2493
|
let translatedVtt;
|
|
2494
|
+
let usage;
|
|
2132
2495
|
try {
|
|
2133
|
-
const
|
|
2134
|
-
|
|
2135
|
-
|
|
2136
|
-
|
|
2137
|
-
|
|
2138
|
-
|
|
2139
|
-
|
|
2140
|
-
|
|
2141
|
-
|
|
2142
|
-
${vttContent}`
|
|
2143
|
-
}
|
|
2144
|
-
]
|
|
2496
|
+
const result = await translateVttWithAI({
|
|
2497
|
+
vttContent,
|
|
2498
|
+
fromLanguageCode,
|
|
2499
|
+
toLanguageCode,
|
|
2500
|
+
provider: config.provider,
|
|
2501
|
+
modelId: config.modelId,
|
|
2502
|
+
credentials: config.credentials,
|
|
2503
|
+
abortSignal: options.abortSignal
|
|
2145
2504
|
});
|
|
2146
|
-
translatedVtt =
|
|
2505
|
+
translatedVtt = result.translatedVtt;
|
|
2506
|
+
usage = result.usage;
|
|
2147
2507
|
} catch (error) {
|
|
2148
|
-
throw new Error(`Failed to translate VTT with ${
|
|
2508
|
+
throw new Error(`Failed to translate VTT with ${config.provider}: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2149
2509
|
}
|
|
2150
|
-
|
|
2151
|
-
|
|
2510
|
+
const sourceLanguage = getLanguageCodePair(fromLanguageCode);
|
|
2511
|
+
const targetLanguage = getLanguageCodePair(toLanguageCode);
|
|
2152
2512
|
if (!uploadToMux) {
|
|
2153
|
-
console.log(`\u2705 VTT translated to ${toLanguageCode} successfully!`);
|
|
2154
2513
|
return {
|
|
2155
2514
|
assetId,
|
|
2156
2515
|
sourceLanguageCode: fromLanguageCode,
|
|
2157
2516
|
targetLanguageCode: toLanguageCode,
|
|
2517
|
+
sourceLanguage,
|
|
2518
|
+
targetLanguage,
|
|
2158
2519
|
originalVtt: vttContent,
|
|
2159
|
-
translatedVtt
|
|
2520
|
+
translatedVtt,
|
|
2521
|
+
usage
|
|
2160
2522
|
};
|
|
2161
2523
|
}
|
|
2162
|
-
console.log("\u{1F4E4} Uploading translated VTT to S3-compatible storage...");
|
|
2163
|
-
const s3Client = new import_client_s32.S3Client({
|
|
2164
|
-
region: s3Region,
|
|
2165
|
-
endpoint: s3Endpoint,
|
|
2166
|
-
credentials: {
|
|
2167
|
-
accessKeyId: s3AccessKeyId,
|
|
2168
|
-
secretAccessKey: s3SecretAccessKey
|
|
2169
|
-
},
|
|
2170
|
-
forcePathStyle: true
|
|
2171
|
-
// Often needed for non-AWS S3 services
|
|
2172
|
-
});
|
|
2173
|
-
const vttKey = `translations/${assetId}/${fromLanguageCode}-to-${toLanguageCode}-${Date.now()}.vtt`;
|
|
2174
2524
|
let presignedUrl;
|
|
2175
2525
|
try {
|
|
2176
|
-
|
|
2177
|
-
|
|
2178
|
-
|
|
2179
|
-
|
|
2180
|
-
|
|
2181
|
-
|
|
2182
|
-
|
|
2183
|
-
|
|
2184
|
-
|
|
2185
|
-
|
|
2186
|
-
console.log(`\u2705 VTT uploaded successfully to: ${vttKey}`);
|
|
2187
|
-
const getObjectCommand = new import_client_s32.GetObjectCommand({
|
|
2188
|
-
Bucket: s3Bucket,
|
|
2189
|
-
Key: vttKey
|
|
2190
|
-
});
|
|
2191
|
-
presignedUrl = await (0, import_s3_request_presigner2.getSignedUrl)(s3Client, getObjectCommand, {
|
|
2192
|
-
expiresIn: 3600
|
|
2193
|
-
// 1 hour
|
|
2526
|
+
presignedUrl = await uploadVttToS3({
|
|
2527
|
+
translatedVtt,
|
|
2528
|
+
assetId,
|
|
2529
|
+
fromLanguageCode,
|
|
2530
|
+
toLanguageCode,
|
|
2531
|
+
s3Endpoint,
|
|
2532
|
+
s3Region,
|
|
2533
|
+
s3Bucket,
|
|
2534
|
+
s3AccessKeyId,
|
|
2535
|
+
s3SecretAccessKey
|
|
2194
2536
|
});
|
|
2195
|
-
console.log(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
|
|
2196
2537
|
} catch (error) {
|
|
2197
2538
|
throw new Error(`Failed to upload VTT to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2198
2539
|
}
|
|
2199
|
-
console.log("\u{1F4F9} Adding translated track to Mux asset...");
|
|
2200
2540
|
let uploadedTrackId;
|
|
2201
2541
|
try {
|
|
2202
|
-
const languageName =
|
|
2542
|
+
const languageName = getLanguageName(toLanguageCode);
|
|
2203
2543
|
const trackName = `${languageName} (auto-translated)`;
|
|
2204
|
-
|
|
2205
|
-
type: "text",
|
|
2206
|
-
text_type: "subtitles",
|
|
2207
|
-
language_code: toLanguageCode,
|
|
2208
|
-
name: trackName,
|
|
2209
|
-
url: presignedUrl
|
|
2210
|
-
});
|
|
2211
|
-
uploadedTrackId = trackResponse.id;
|
|
2212
|
-
console.log(`\u2705 Track added to Mux asset with ID: ${uploadedTrackId}`);
|
|
2213
|
-
console.log(`\u{1F4CB} Track name: "${trackName}"`);
|
|
2544
|
+
uploadedTrackId = await createTextTrackOnMux(config.credentials, assetId, toLanguageCode, trackName, presignedUrl);
|
|
2214
2545
|
} catch (error) {
|
|
2215
|
-
console.warn(
|
|
2216
|
-
console.log("\u{1F517} You can manually add the track using this presigned URL:");
|
|
2217
|
-
console.log(presignedUrl);
|
|
2546
|
+
console.warn(`Failed to add track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2218
2547
|
}
|
|
2219
2548
|
return {
|
|
2220
2549
|
assetId,
|
|
2221
2550
|
sourceLanguageCode: fromLanguageCode,
|
|
2222
2551
|
targetLanguageCode: toLanguageCode,
|
|
2552
|
+
sourceLanguage,
|
|
2553
|
+
targetLanguage,
|
|
2223
2554
|
originalVtt: vttContent,
|
|
2224
2555
|
translatedVtt,
|
|
2225
2556
|
uploadedTrackId,
|
|
2226
|
-
presignedUrl
|
|
2557
|
+
presignedUrl,
|
|
2558
|
+
usage
|
|
2227
2559
|
};
|
|
2228
2560
|
}
|
|
2229
2561
|
|
|
2230
2562
|
// src/index.ts
|
|
2231
2563
|
var version = "0.1.0";
|
|
2232
|
-
|
|
2233
|
-
|
|
2234
|
-
primitives,
|
|
2564
|
+
export {
|
|
2565
|
+
primitives_exports as primitives,
|
|
2235
2566
|
version,
|
|
2236
|
-
workflows
|
|
2237
|
-
}
|
|
2567
|
+
workflows_exports as workflows
|
|
2568
|
+
};
|
|
2238
2569
|
//# sourceMappingURL=index.js.map
|