@mux/ai 0.1.6 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +346 -86
- package/dist/{index-DyTSka2R.d.ts → index-BcNDGOI6.d.ts} +12 -24
- package/dist/{index-Bnv7tv90.d.ts → index-D3fZHu0h.d.ts} +124 -13
- package/dist/index.d.ts +3 -3
- package/dist/index.js +989 -669
- package/dist/index.js.map +1 -1
- package/dist/primitives/index.d.ts +2 -2
- package/dist/primitives/index.js +37 -79
- package/dist/primitives/index.js.map +1 -1
- package/dist/{types-ktXDZ93V.d.mts → types-DzOQNn9R.d.ts} +3 -25
- package/dist/workflows/index.d.ts +2 -2
- package/dist/workflows/index.js +987 -683
- package/dist/workflows/index.js.map +1 -1
- package/package.json +34 -36
- package/dist/index-BNnz9P_5.d.mts +0 -144
- package/dist/index-vJ5r2FNm.d.mts +0 -477
- package/dist/index.d.mts +0 -13
- package/dist/index.mjs +0 -2205
- package/dist/index.mjs.map +0 -1
- package/dist/primitives/index.d.mts +0 -3
- package/dist/primitives/index.mjs +0 -358
- package/dist/primitives/index.mjs.map +0 -1
- package/dist/types-ktXDZ93V.d.ts +0 -137
- package/dist/workflows/index.d.mts +0 -8
- package/dist/workflows/index.mjs +0 -2168
- package/dist/workflows/index.mjs.map +0 -1
package/dist/index.js
CHANGED
|
@@ -1,40 +1,8 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __create = Object.create;
|
|
3
1
|
var __defProp = Object.defineProperty;
|
|
4
|
-
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
-
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
-
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
2
|
var __export = (target, all) => {
|
|
9
3
|
for (var name in all)
|
|
10
4
|
__defProp(target, name, { get: all[name], enumerable: true });
|
|
11
5
|
};
|
|
12
|
-
var __copyProps = (to, from, except, desc) => {
|
|
13
|
-
if (from && typeof from === "object" || typeof from === "function") {
|
|
14
|
-
for (let key of __getOwnPropNames(from))
|
|
15
|
-
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
16
|
-
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
17
|
-
}
|
|
18
|
-
return to;
|
|
19
|
-
};
|
|
20
|
-
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
-
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
-
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
-
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
-
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
-
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
-
mod
|
|
27
|
-
));
|
|
28
|
-
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
|
-
|
|
30
|
-
// src/index.ts
|
|
31
|
-
var index_exports = {};
|
|
32
|
-
__export(index_exports, {
|
|
33
|
-
primitives: () => primitives_exports,
|
|
34
|
-
version: () => version,
|
|
35
|
-
workflows: () => workflows_exports
|
|
36
|
-
});
|
|
37
|
-
module.exports = __toCommonJS(index_exports);
|
|
38
6
|
|
|
39
7
|
// src/primitives/index.ts
|
|
40
8
|
var primitives_exports = {};
|
|
@@ -57,33 +25,25 @@ __export(primitives_exports, {
|
|
|
57
25
|
});
|
|
58
26
|
|
|
59
27
|
// src/lib/url-signing.ts
|
|
60
|
-
|
|
28
|
+
import Mux from "@mux/mux-node";
|
|
61
29
|
|
|
62
30
|
// src/env.ts
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
var import_dotenv_expand = require("dotenv-expand");
|
|
66
|
-
var import_zod = require("zod");
|
|
67
|
-
(0, import_dotenv_expand.expand)((0, import_dotenv.config)({
|
|
68
|
-
path: import_node_path.default.resolve(
|
|
69
|
-
process.cwd(),
|
|
70
|
-
process.env.NODE_ENV === "test" ? ".env.test" : ".env"
|
|
71
|
-
)
|
|
72
|
-
}));
|
|
31
|
+
import { z } from "zod";
|
|
32
|
+
import "dotenv/config";
|
|
73
33
|
function optionalString(description, message) {
|
|
74
|
-
return
|
|
34
|
+
return z.preprocess(
|
|
75
35
|
(value) => typeof value === "string" && value.trim().length === 0 ? void 0 : value,
|
|
76
|
-
|
|
36
|
+
z.string().trim().min(1, message).optional()
|
|
77
37
|
).describe(description);
|
|
78
38
|
}
|
|
79
39
|
function requiredString(description, message) {
|
|
80
|
-
return
|
|
40
|
+
return z.preprocess(
|
|
81
41
|
(value) => typeof value === "string" ? value.trim().length > 0 ? value.trim() : void 0 : value,
|
|
82
|
-
|
|
42
|
+
z.string().trim().min(1, message)
|
|
83
43
|
).describe(description);
|
|
84
44
|
}
|
|
85
|
-
var EnvSchema =
|
|
86
|
-
NODE_ENV:
|
|
45
|
+
var EnvSchema = z.object({
|
|
46
|
+
NODE_ENV: z.string().default("development").describe("Runtime environment."),
|
|
87
47
|
MUX_TOKEN_ID: requiredString("Mux access token ID.", "Required to access Mux APIs"),
|
|
88
48
|
MUX_TOKEN_SECRET: requiredString("Mux access token secret.", "Required to access Mux APIs"),
|
|
89
49
|
MUX_SIGNING_KEY: optionalString("Mux signing key ID for signed playback URLs.", "Used to sign playback URLs"),
|
|
@@ -112,16 +72,16 @@ var env = parseEnv();
|
|
|
112
72
|
var env_default = env;
|
|
113
73
|
|
|
114
74
|
// src/lib/url-signing.ts
|
|
115
|
-
function
|
|
116
|
-
const keyId =
|
|
117
|
-
const keySecret =
|
|
75
|
+
function getMuxSigningContextFromEnv() {
|
|
76
|
+
const keyId = env_default.MUX_SIGNING_KEY;
|
|
77
|
+
const keySecret = env_default.MUX_PRIVATE_KEY;
|
|
118
78
|
if (!keyId || !keySecret) {
|
|
119
79
|
return void 0;
|
|
120
80
|
}
|
|
121
81
|
return { keyId, keySecret };
|
|
122
82
|
}
|
|
123
83
|
function createSigningClient(context) {
|
|
124
|
-
return new
|
|
84
|
+
return new Mux({
|
|
125
85
|
// These are not needed for signing, but the SDK requires them
|
|
126
86
|
// Using empty strings as we only need the jwt functionality
|
|
127
87
|
tokenId: env_default.MUX_TOKEN_ID || "",
|
|
@@ -131,6 +91,7 @@ function createSigningClient(context) {
|
|
|
131
91
|
});
|
|
132
92
|
}
|
|
133
93
|
async function signPlaybackId(playbackId, context, type = "video", params) {
|
|
94
|
+
"use step";
|
|
134
95
|
const client = createSigningClient(context);
|
|
135
96
|
const stringParams = params ? Object.fromEntries(
|
|
136
97
|
Object.entries(params).map(([key, value]) => [key, String(value)])
|
|
@@ -142,6 +103,7 @@ async function signPlaybackId(playbackId, context, type = "video", params) {
|
|
|
142
103
|
});
|
|
143
104
|
}
|
|
144
105
|
async function signUrl(url, playbackId, context, type = "video", params) {
|
|
106
|
+
"use step";
|
|
145
107
|
const token = await signPlaybackId(playbackId, context, type, params);
|
|
146
108
|
const separator = url.includes("?") ? "&" : "?";
|
|
147
109
|
return `${url}${separator}token=${token}`;
|
|
@@ -149,9 +111,11 @@ async function signUrl(url, playbackId, context, type = "video", params) {
|
|
|
149
111
|
|
|
150
112
|
// src/primitives/storyboards.ts
|
|
151
113
|
var DEFAULT_STORYBOARD_WIDTH = 640;
|
|
152
|
-
async function getStoryboardUrl(playbackId, width = DEFAULT_STORYBOARD_WIDTH,
|
|
114
|
+
async function getStoryboardUrl(playbackId, width = DEFAULT_STORYBOARD_WIDTH, shouldSign = false) {
|
|
115
|
+
"use step";
|
|
153
116
|
const baseUrl = `https://image.mux.com/${playbackId}/storyboard.png`;
|
|
154
|
-
if (
|
|
117
|
+
if (shouldSign) {
|
|
118
|
+
const signingContext = getMuxSigningContextFromEnv();
|
|
155
119
|
return signUrl(baseUrl, playbackId, signingContext, "storyboard", { width });
|
|
156
120
|
}
|
|
157
121
|
return `${baseUrl}?width=${width}`;
|
|
@@ -244,7 +208,8 @@ function chunkText(text, strategy) {
|
|
|
244
208
|
|
|
245
209
|
// src/primitives/thumbnails.ts
|
|
246
210
|
async function getThumbnailUrls(playbackId, duration, options = {}) {
|
|
247
|
-
|
|
211
|
+
"use step";
|
|
212
|
+
const { interval = 10, width = 640, shouldSign = false } = options;
|
|
248
213
|
const timestamps = [];
|
|
249
214
|
if (duration <= 50) {
|
|
250
215
|
const spacing = duration / 6;
|
|
@@ -258,7 +223,8 @@ async function getThumbnailUrls(playbackId, duration, options = {}) {
|
|
|
258
223
|
}
|
|
259
224
|
const baseUrl = `https://image.mux.com/${playbackId}/thumbnail.png`;
|
|
260
225
|
const urlPromises = timestamps.map(async (time) => {
|
|
261
|
-
if (
|
|
226
|
+
if (shouldSign) {
|
|
227
|
+
const signingContext = getMuxSigningContextFromEnv();
|
|
262
228
|
return signUrl(baseUrl, playbackId, signingContext, "thumbnail", { time, width });
|
|
263
229
|
}
|
|
264
230
|
return `${baseUrl}?time=${time}&width=${width}`;
|
|
@@ -374,15 +340,18 @@ function parseVTTCues(vttContent) {
|
|
|
374
340
|
}
|
|
375
341
|
return cues;
|
|
376
342
|
}
|
|
377
|
-
async function buildTranscriptUrl(playbackId, trackId,
|
|
343
|
+
async function buildTranscriptUrl(playbackId, trackId, shouldSign = false) {
|
|
344
|
+
"use step";
|
|
378
345
|
const baseUrl = `https://stream.mux.com/${playbackId}/text/${trackId}.vtt`;
|
|
379
|
-
if (
|
|
346
|
+
if (shouldSign) {
|
|
347
|
+
const signingContext = getMuxSigningContextFromEnv();
|
|
380
348
|
return signUrl(baseUrl, playbackId, signingContext, "video");
|
|
381
349
|
}
|
|
382
350
|
return baseUrl;
|
|
383
351
|
}
|
|
384
352
|
async function fetchTranscriptForAsset(asset, playbackId, options = {}) {
|
|
385
|
-
|
|
353
|
+
"use step";
|
|
354
|
+
const { languageCode, cleanTranscript = true, shouldSign } = options;
|
|
386
355
|
const track = findCaptionTrack(asset, languageCode);
|
|
387
356
|
if (!track) {
|
|
388
357
|
return { transcriptText: "" };
|
|
@@ -390,7 +359,7 @@ async function fetchTranscriptForAsset(asset, playbackId, options = {}) {
|
|
|
390
359
|
if (!track.id) {
|
|
391
360
|
return { transcriptText: "", track };
|
|
392
361
|
}
|
|
393
|
-
const transcriptUrl = await buildTranscriptUrl(playbackId, track.id,
|
|
362
|
+
const transcriptUrl = await buildTranscriptUrl(playbackId, track.id, shouldSign);
|
|
394
363
|
try {
|
|
395
364
|
const response = await fetch(transcriptUrl);
|
|
396
365
|
if (!response.ok) {
|
|
@@ -424,130 +393,17 @@ __export(workflows_exports, {
|
|
|
424
393
|
});
|
|
425
394
|
|
|
426
395
|
// src/workflows/burned-in-captions.ts
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
function ownKeys(object, enumerableOnly) {
|
|
431
|
-
var keys = Object.keys(object);
|
|
432
|
-
if (Object.getOwnPropertySymbols) {
|
|
433
|
-
var symbols = Object.getOwnPropertySymbols(object);
|
|
434
|
-
enumerableOnly && (symbols = symbols.filter(function(sym) {
|
|
435
|
-
return Object.getOwnPropertyDescriptor(object, sym).enumerable;
|
|
436
|
-
})), keys.push.apply(keys, symbols);
|
|
437
|
-
}
|
|
438
|
-
return keys;
|
|
439
|
-
}
|
|
440
|
-
function _objectSpread(target) {
|
|
441
|
-
for (var i = 1; i < arguments.length; i++) {
|
|
442
|
-
var source = null != arguments[i] ? arguments[i] : {};
|
|
443
|
-
i % 2 ? ownKeys(Object(source), true).forEach(function(key) {
|
|
444
|
-
_defineProperty(target, key, source[key]);
|
|
445
|
-
}) : Object.getOwnPropertyDescriptors ? Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)) : ownKeys(Object(source)).forEach(function(key) {
|
|
446
|
-
Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key));
|
|
447
|
-
});
|
|
448
|
-
}
|
|
449
|
-
return target;
|
|
450
|
-
}
|
|
451
|
-
function _defineProperty(obj, key, value) {
|
|
452
|
-
key = _toPropertyKey(key);
|
|
453
|
-
if (key in obj) {
|
|
454
|
-
Object.defineProperty(obj, key, { value, enumerable: true, configurable: true, writable: true });
|
|
455
|
-
} else {
|
|
456
|
-
obj[key] = value;
|
|
457
|
-
}
|
|
458
|
-
return obj;
|
|
459
|
-
}
|
|
460
|
-
function _toPropertyKey(arg) {
|
|
461
|
-
var key = _toPrimitive(arg, "string");
|
|
462
|
-
return typeof key === "symbol" ? key : String(key);
|
|
463
|
-
}
|
|
464
|
-
function _toPrimitive(input, hint) {
|
|
465
|
-
if (typeof input !== "object" || input === null) return input;
|
|
466
|
-
var prim = input[Symbol.toPrimitive];
|
|
467
|
-
if (prim !== void 0) {
|
|
468
|
-
var res = prim.call(input, hint || "default");
|
|
469
|
-
if (typeof res !== "object") return res;
|
|
470
|
-
throw new TypeError("@@toPrimitive must return a primitive value.");
|
|
471
|
-
}
|
|
472
|
-
return (hint === "string" ? String : Number)(input);
|
|
473
|
-
}
|
|
474
|
-
var dedent = createDedent({});
|
|
475
|
-
var dedent_default = dedent;
|
|
476
|
-
function createDedent(options) {
|
|
477
|
-
dedent2.withOptions = (newOptions) => createDedent(_objectSpread(_objectSpread({}, options), newOptions));
|
|
478
|
-
return dedent2;
|
|
479
|
-
function dedent2(strings, ...values) {
|
|
480
|
-
const raw = typeof strings === "string" ? [strings] : strings.raw;
|
|
481
|
-
const {
|
|
482
|
-
alignValues = false,
|
|
483
|
-
escapeSpecialCharacters = Array.isArray(strings),
|
|
484
|
-
trimWhitespace = true
|
|
485
|
-
} = options;
|
|
486
|
-
let result = "";
|
|
487
|
-
for (let i = 0; i < raw.length; i++) {
|
|
488
|
-
let next = raw[i];
|
|
489
|
-
if (escapeSpecialCharacters) {
|
|
490
|
-
next = next.replace(/\\\n[ \t]*/g, "").replace(/\\`/g, "`").replace(/\\\$/g, "$").replace(/\\\{/g, "{");
|
|
491
|
-
}
|
|
492
|
-
result += next;
|
|
493
|
-
if (i < values.length) {
|
|
494
|
-
const value = alignValues ? alignValue(values[i], result) : values[i];
|
|
495
|
-
result += value;
|
|
496
|
-
}
|
|
497
|
-
}
|
|
498
|
-
const lines = result.split("\n");
|
|
499
|
-
let mindent = null;
|
|
500
|
-
for (const l of lines) {
|
|
501
|
-
const m = l.match(/^(\s+)\S+/);
|
|
502
|
-
if (m) {
|
|
503
|
-
const indent = m[1].length;
|
|
504
|
-
if (!mindent) {
|
|
505
|
-
mindent = indent;
|
|
506
|
-
} else {
|
|
507
|
-
mindent = Math.min(mindent, indent);
|
|
508
|
-
}
|
|
509
|
-
}
|
|
510
|
-
}
|
|
511
|
-
if (mindent !== null) {
|
|
512
|
-
const m = mindent;
|
|
513
|
-
result = lines.map((l) => l[0] === " " || l[0] === " " ? l.slice(m) : l).join("\n");
|
|
514
|
-
}
|
|
515
|
-
if (trimWhitespace) {
|
|
516
|
-
result = result.trim();
|
|
517
|
-
}
|
|
518
|
-
if (escapeSpecialCharacters) {
|
|
519
|
-
result = result.replace(/\\n/g, "\n");
|
|
520
|
-
}
|
|
521
|
-
return result;
|
|
522
|
-
}
|
|
523
|
-
}
|
|
524
|
-
function alignValue(value, precedingText) {
|
|
525
|
-
if (typeof value !== "string" || !value.includes("\n")) {
|
|
526
|
-
return value;
|
|
527
|
-
}
|
|
528
|
-
const currentLine = precedingText.slice(precedingText.lastIndexOf("\n") + 1);
|
|
529
|
-
const indentMatch = currentLine.match(/^(\s+)/);
|
|
530
|
-
if (indentMatch) {
|
|
531
|
-
const indent = indentMatch[1];
|
|
532
|
-
return value.replace(/\n/g, `
|
|
533
|
-
${indent}`);
|
|
534
|
-
}
|
|
535
|
-
return value;
|
|
536
|
-
}
|
|
537
|
-
|
|
538
|
-
// src/workflows/burned-in-captions.ts
|
|
539
|
-
var import_zod2 = require("zod");
|
|
540
|
-
|
|
541
|
-
// src/lib/client-factory.ts
|
|
542
|
-
var import_mux_node2 = __toESM(require("@mux/mux-node"));
|
|
396
|
+
import { generateObject } from "ai";
|
|
397
|
+
import dedent from "dedent";
|
|
398
|
+
import { z as z2 } from "zod";
|
|
543
399
|
|
|
544
400
|
// src/lib/providers.ts
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
401
|
+
import { createAnthropic } from "@ai-sdk/anthropic";
|
|
402
|
+
import { createGoogleGenerativeAI } from "@ai-sdk/google";
|
|
403
|
+
import { createOpenAI } from "@ai-sdk/openai";
|
|
548
404
|
var DEFAULT_LANGUAGE_MODELS = {
|
|
549
|
-
openai: "gpt-5
|
|
550
|
-
anthropic: "claude-
|
|
405
|
+
openai: "gpt-5.1",
|
|
406
|
+
anthropic: "claude-sonnet-4-5",
|
|
551
407
|
google: "gemini-2.5-flash"
|
|
552
408
|
};
|
|
553
409
|
var DEFAULT_EMBEDDING_MODELS = {
|
|
@@ -560,14 +416,60 @@ function requireEnv(value, name) {
|
|
|
560
416
|
}
|
|
561
417
|
return value;
|
|
562
418
|
}
|
|
419
|
+
function createLanguageModelFromConfig(provider, modelId) {
|
|
420
|
+
switch (provider) {
|
|
421
|
+
case "openai": {
|
|
422
|
+
const apiKey = env_default.OPENAI_API_KEY;
|
|
423
|
+
requireEnv(apiKey, "OPENAI_API_KEY");
|
|
424
|
+
const openai = createOpenAI({ apiKey });
|
|
425
|
+
return openai(modelId);
|
|
426
|
+
}
|
|
427
|
+
case "anthropic": {
|
|
428
|
+
const apiKey = env_default.ANTHROPIC_API_KEY;
|
|
429
|
+
requireEnv(apiKey, "ANTHROPIC_API_KEY");
|
|
430
|
+
const anthropic = createAnthropic({ apiKey });
|
|
431
|
+
return anthropic(modelId);
|
|
432
|
+
}
|
|
433
|
+
case "google": {
|
|
434
|
+
const apiKey = env_default.GOOGLE_GENERATIVE_AI_API_KEY;
|
|
435
|
+
requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
|
|
436
|
+
const google = createGoogleGenerativeAI({ apiKey });
|
|
437
|
+
return google(modelId);
|
|
438
|
+
}
|
|
439
|
+
default: {
|
|
440
|
+
const exhaustiveCheck = provider;
|
|
441
|
+
throw new Error(`Unsupported provider: ${exhaustiveCheck}`);
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
function createEmbeddingModelFromConfig(provider, modelId) {
|
|
446
|
+
switch (provider) {
|
|
447
|
+
case "openai": {
|
|
448
|
+
const apiKey = env_default.OPENAI_API_KEY;
|
|
449
|
+
requireEnv(apiKey, "OPENAI_API_KEY");
|
|
450
|
+
const openai = createOpenAI({ apiKey });
|
|
451
|
+
return openai.embedding(modelId);
|
|
452
|
+
}
|
|
453
|
+
case "google": {
|
|
454
|
+
const apiKey = env_default.GOOGLE_GENERATIVE_AI_API_KEY;
|
|
455
|
+
requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
|
|
456
|
+
const google = createGoogleGenerativeAI({ apiKey });
|
|
457
|
+
return google.textEmbeddingModel(modelId);
|
|
458
|
+
}
|
|
459
|
+
default: {
|
|
460
|
+
const exhaustiveCheck = provider;
|
|
461
|
+
throw new Error(`Unsupported embedding provider: ${exhaustiveCheck}`);
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
}
|
|
563
465
|
function resolveLanguageModel(options = {}) {
|
|
564
466
|
const provider = options.provider || "openai";
|
|
565
467
|
const modelId = options.model || DEFAULT_LANGUAGE_MODELS[provider];
|
|
566
468
|
switch (provider) {
|
|
567
469
|
case "openai": {
|
|
568
|
-
const apiKey =
|
|
470
|
+
const apiKey = env_default.OPENAI_API_KEY;
|
|
569
471
|
requireEnv(apiKey, "OPENAI_API_KEY");
|
|
570
|
-
const openai =
|
|
472
|
+
const openai = createOpenAI({
|
|
571
473
|
apiKey
|
|
572
474
|
});
|
|
573
475
|
return {
|
|
@@ -577,9 +479,9 @@ function resolveLanguageModel(options = {}) {
|
|
|
577
479
|
};
|
|
578
480
|
}
|
|
579
481
|
case "anthropic": {
|
|
580
|
-
const apiKey =
|
|
482
|
+
const apiKey = env_default.ANTHROPIC_API_KEY;
|
|
581
483
|
requireEnv(apiKey, "ANTHROPIC_API_KEY");
|
|
582
|
-
const anthropic =
|
|
484
|
+
const anthropic = createAnthropic({
|
|
583
485
|
apiKey
|
|
584
486
|
});
|
|
585
487
|
return {
|
|
@@ -589,9 +491,9 @@ function resolveLanguageModel(options = {}) {
|
|
|
589
491
|
};
|
|
590
492
|
}
|
|
591
493
|
case "google": {
|
|
592
|
-
const apiKey =
|
|
494
|
+
const apiKey = env_default.GOOGLE_GENERATIVE_AI_API_KEY;
|
|
593
495
|
requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
|
|
594
|
-
const google =
|
|
496
|
+
const google = createGoogleGenerativeAI({
|
|
595
497
|
apiKey
|
|
596
498
|
});
|
|
597
499
|
return {
|
|
@@ -611,9 +513,9 @@ function resolveEmbeddingModel(options = {}) {
|
|
|
611
513
|
const modelId = options.model || DEFAULT_EMBEDDING_MODELS[provider];
|
|
612
514
|
switch (provider) {
|
|
613
515
|
case "openai": {
|
|
614
|
-
const apiKey =
|
|
516
|
+
const apiKey = env_default.OPENAI_API_KEY;
|
|
615
517
|
requireEnv(apiKey, "OPENAI_API_KEY");
|
|
616
|
-
const openai =
|
|
518
|
+
const openai = createOpenAI({
|
|
617
519
|
apiKey
|
|
618
520
|
});
|
|
619
521
|
return {
|
|
@@ -623,9 +525,9 @@ function resolveEmbeddingModel(options = {}) {
|
|
|
623
525
|
};
|
|
624
526
|
}
|
|
625
527
|
case "google": {
|
|
626
|
-
const apiKey =
|
|
528
|
+
const apiKey = env_default.GOOGLE_GENERATIVE_AI_API_KEY;
|
|
627
529
|
requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
|
|
628
|
-
const google =
|
|
530
|
+
const google = createGoogleGenerativeAI({
|
|
629
531
|
apiKey
|
|
630
532
|
});
|
|
631
533
|
return {
|
|
@@ -642,12 +544,45 @@ function resolveEmbeddingModel(options = {}) {
|
|
|
642
544
|
}
|
|
643
545
|
|
|
644
546
|
// src/lib/client-factory.ts
|
|
645
|
-
function
|
|
646
|
-
const muxTokenId =
|
|
647
|
-
const muxTokenSecret =
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
547
|
+
function getMuxCredentialsFromEnv() {
|
|
548
|
+
const muxTokenId = env_default.MUX_TOKEN_ID;
|
|
549
|
+
const muxTokenSecret = env_default.MUX_TOKEN_SECRET;
|
|
550
|
+
if (!muxTokenId || !muxTokenSecret) {
|
|
551
|
+
throw new Error(
|
|
552
|
+
"Mux credentials are required. Set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables."
|
|
553
|
+
);
|
|
554
|
+
}
|
|
555
|
+
return { muxTokenId, muxTokenSecret };
|
|
556
|
+
}
|
|
557
|
+
function getApiKeyFromEnv(provider) {
|
|
558
|
+
const envVarMap = {
|
|
559
|
+
openai: env_default.OPENAI_API_KEY,
|
|
560
|
+
anthropic: env_default.ANTHROPIC_API_KEY,
|
|
561
|
+
google: env_default.GOOGLE_GENERATIVE_AI_API_KEY,
|
|
562
|
+
hive: env_default.HIVE_API_KEY,
|
|
563
|
+
elevenlabs: env_default.ELEVENLABS_API_KEY
|
|
564
|
+
};
|
|
565
|
+
const apiKey = envVarMap[provider];
|
|
566
|
+
if (!apiKey) {
|
|
567
|
+
const envVarNames = {
|
|
568
|
+
openai: "OPENAI_API_KEY",
|
|
569
|
+
anthropic: "ANTHROPIC_API_KEY",
|
|
570
|
+
google: "GOOGLE_GENERATIVE_AI_API_KEY",
|
|
571
|
+
hive: "HIVE_API_KEY",
|
|
572
|
+
elevenlabs: "ELEVENLABS_API_KEY"
|
|
573
|
+
};
|
|
574
|
+
throw new Error(
|
|
575
|
+
`${provider} API key is required. Set ${envVarNames[provider]} environment variable.`
|
|
576
|
+
);
|
|
577
|
+
}
|
|
578
|
+
return apiKey;
|
|
579
|
+
}
|
|
580
|
+
async function validateCredentials(requiredProvider) {
|
|
581
|
+
const muxTokenId = env_default.MUX_TOKEN_ID;
|
|
582
|
+
const muxTokenSecret = env_default.MUX_TOKEN_SECRET;
|
|
583
|
+
const openaiApiKey = env_default.OPENAI_API_KEY;
|
|
584
|
+
const anthropicApiKey = env_default.ANTHROPIC_API_KEY;
|
|
585
|
+
const googleApiKey = env_default.GOOGLE_GENERATIVE_AI_API_KEY;
|
|
651
586
|
if (!muxTokenId || !muxTokenSecret) {
|
|
652
587
|
throw new Error(
|
|
653
588
|
"Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables."
|
|
@@ -676,32 +611,23 @@ function validateCredentials(options, requiredProvider) {
|
|
|
676
611
|
googleApiKey
|
|
677
612
|
};
|
|
678
613
|
}
|
|
679
|
-
function
|
|
680
|
-
if (!credentials.muxTokenId || !credentials.muxTokenSecret) {
|
|
681
|
-
throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
|
|
682
|
-
}
|
|
683
|
-
return new import_mux_node2.default({
|
|
684
|
-
tokenId: credentials.muxTokenId,
|
|
685
|
-
tokenSecret: credentials.muxTokenSecret
|
|
686
|
-
});
|
|
687
|
-
}
|
|
688
|
-
function createWorkflowClients(options, provider) {
|
|
614
|
+
async function createWorkflowConfig(options, provider) {
|
|
689
615
|
const providerToUse = provider || options.provider || "openai";
|
|
690
|
-
const credentials = validateCredentials(
|
|
691
|
-
const
|
|
616
|
+
const credentials = await validateCredentials(providerToUse);
|
|
617
|
+
const resolved = resolveLanguageModel({
|
|
692
618
|
...options,
|
|
693
619
|
provider: providerToUse
|
|
694
620
|
});
|
|
695
621
|
return {
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
622
|
+
credentials,
|
|
623
|
+
provider: resolved.provider,
|
|
624
|
+
modelId: resolved.modelId
|
|
699
625
|
};
|
|
700
626
|
}
|
|
701
627
|
|
|
702
628
|
// src/lib/image-download.ts
|
|
703
|
-
|
|
704
|
-
|
|
629
|
+
import { Buffer as Buffer2 } from "buffer";
|
|
630
|
+
import pRetry, { AbortError } from "p-retry";
|
|
705
631
|
var DEFAULT_OPTIONS = {
|
|
706
632
|
timeout: 1e4,
|
|
707
633
|
retries: 3,
|
|
@@ -710,9 +636,10 @@ var DEFAULT_OPTIONS = {
|
|
|
710
636
|
exponentialBackoff: true
|
|
711
637
|
};
|
|
712
638
|
async function downloadImageAsBase64(url, options = {}) {
|
|
639
|
+
"use step";
|
|
713
640
|
const opts = { ...DEFAULT_OPTIONS, ...options };
|
|
714
641
|
let attemptCount = 0;
|
|
715
|
-
return (
|
|
642
|
+
return pRetry(
|
|
716
643
|
async () => {
|
|
717
644
|
attemptCount++;
|
|
718
645
|
const controller = new AbortController();
|
|
@@ -727,18 +654,18 @@ async function downloadImageAsBase64(url, options = {}) {
|
|
|
727
654
|
clearTimeout(timeoutId);
|
|
728
655
|
if (!response.ok) {
|
|
729
656
|
if (response.status >= 400 && response.status < 500 && response.status !== 429) {
|
|
730
|
-
throw new
|
|
657
|
+
throw new AbortError(`HTTP ${response.status}: ${response.statusText}`);
|
|
731
658
|
}
|
|
732
659
|
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
733
660
|
}
|
|
734
661
|
const contentType = response.headers.get("content-type");
|
|
735
662
|
if (!contentType?.startsWith("image/")) {
|
|
736
|
-
throw new
|
|
663
|
+
throw new AbortError(`Invalid content type: ${contentType}. Expected image/*`);
|
|
737
664
|
}
|
|
738
665
|
const arrayBuffer = await response.arrayBuffer();
|
|
739
|
-
const buffer =
|
|
666
|
+
const buffer = Buffer2.from(arrayBuffer);
|
|
740
667
|
if (buffer.length === 0) {
|
|
741
|
-
throw new
|
|
668
|
+
throw new AbortError("Downloaded image is empty");
|
|
742
669
|
}
|
|
743
670
|
const base64Data = `data:${contentType};base64,${buffer.toString("base64")}`;
|
|
744
671
|
return {
|
|
@@ -751,7 +678,7 @@ async function downloadImageAsBase64(url, options = {}) {
|
|
|
751
678
|
};
|
|
752
679
|
} catch (error) {
|
|
753
680
|
clearTimeout(timeoutId);
|
|
754
|
-
if (error instanceof
|
|
681
|
+
if (error instanceof AbortError) {
|
|
755
682
|
throw error;
|
|
756
683
|
}
|
|
757
684
|
if (error instanceof Error) {
|
|
@@ -780,6 +707,7 @@ async function downloadImageAsBase64(url, options = {}) {
|
|
|
780
707
|
);
|
|
781
708
|
}
|
|
782
709
|
async function downloadImagesAsBase64(urls, options = {}, maxConcurrent = 5) {
|
|
710
|
+
"use step";
|
|
783
711
|
const results = [];
|
|
784
712
|
for (let i = 0; i < urls.length; i += maxConcurrent) {
|
|
785
713
|
const batch = urls.slice(i, i + maxConcurrent);
|
|
@@ -791,6 +719,7 @@ async function downloadImagesAsBase64(urls, options = {}, maxConcurrent = 5) {
|
|
|
791
719
|
}
|
|
792
720
|
|
|
793
721
|
// src/lib/mux-assets.ts
|
|
722
|
+
import Mux2 from "@mux/mux-node";
|
|
794
723
|
function getPlaybackId(asset) {
|
|
795
724
|
const playbackIds = asset.playback_ids || [];
|
|
796
725
|
const publicPlaybackId = playbackIds.find((pid) => pid.policy === "public");
|
|
@@ -805,7 +734,13 @@ function getPlaybackId(asset) {
|
|
|
805
734
|
"No public or signed playback ID found for this asset. A public or signed playback ID is required. DRM playback IDs are not currently supported."
|
|
806
735
|
);
|
|
807
736
|
}
|
|
808
|
-
async function getPlaybackIdForAsset(
|
|
737
|
+
async function getPlaybackIdForAsset(assetId) {
|
|
738
|
+
"use step";
|
|
739
|
+
const { muxTokenId, muxTokenSecret } = getMuxCredentialsFromEnv();
|
|
740
|
+
const mux = new Mux2({
|
|
741
|
+
tokenId: muxTokenId,
|
|
742
|
+
tokenSecret: muxTokenSecret
|
|
743
|
+
});
|
|
809
744
|
const asset = await mux.video.assets.retrieve(assetId);
|
|
810
745
|
const { id: playbackId, policy } = getPlaybackId(asset);
|
|
811
746
|
return { asset, playbackId, policy };
|
|
@@ -844,8 +779,8 @@ function resolveSection(defaultSection, override) {
|
|
|
844
779
|
}
|
|
845
780
|
return override;
|
|
846
781
|
}
|
|
847
|
-
function createPromptBuilder(
|
|
848
|
-
const { template, sectionOrder } =
|
|
782
|
+
function createPromptBuilder(config) {
|
|
783
|
+
const { template, sectionOrder } = config;
|
|
849
784
|
const getSection = (section, override) => {
|
|
850
785
|
const resolved = resolveSection(template[section], override);
|
|
851
786
|
return renderSection(resolved);
|
|
@@ -886,12 +821,12 @@ function createToneSection(instruction) {
|
|
|
886
821
|
}
|
|
887
822
|
|
|
888
823
|
// src/workflows/burned-in-captions.ts
|
|
889
|
-
var burnedInCaptionsSchema =
|
|
890
|
-
hasBurnedInCaptions:
|
|
891
|
-
confidence:
|
|
892
|
-
detectedLanguage:
|
|
824
|
+
var burnedInCaptionsSchema = z2.object({
|
|
825
|
+
hasBurnedInCaptions: z2.boolean(),
|
|
826
|
+
confidence: z2.number().min(0).max(1),
|
|
827
|
+
detectedLanguage: z2.string().nullable()
|
|
893
828
|
});
|
|
894
|
-
var SYSTEM_PROMPT =
|
|
829
|
+
var SYSTEM_PROMPT = dedent`
|
|
895
830
|
<role>
|
|
896
831
|
You are an expert at analyzing video frames to detect burned-in captions (also called open captions or hardcoded subtitles).
|
|
897
832
|
These are text overlays that are permanently embedded in the video image, common on TikTok, Instagram Reels, and other social media platforms.
|
|
@@ -934,14 +869,14 @@ var burnedInCaptionsPromptBuilder = createPromptBuilder({
|
|
|
934
869
|
template: {
|
|
935
870
|
task: {
|
|
936
871
|
tag: "task",
|
|
937
|
-
content:
|
|
872
|
+
content: dedent`
|
|
938
873
|
Analyze the provided video storyboard to detect burned-in captions (hardcoded subtitles).
|
|
939
874
|
Count frames with text vs no text, note position consistency and whether text changes across frames.
|
|
940
875
|
Decide if captions exist, with confidence (0.0-1.0) and detected language if any.`
|
|
941
876
|
},
|
|
942
877
|
analysisSteps: {
|
|
943
878
|
tag: "analysis_steps",
|
|
944
|
-
content:
|
|
879
|
+
content: dedent`
|
|
945
880
|
1. COUNT how many frames contain text overlays vs. how many don't
|
|
946
881
|
2. Check if text appears in consistent positions across multiple frames
|
|
947
882
|
3. Verify text changes content between frames (indicating dialogue/narration)
|
|
@@ -950,7 +885,7 @@ var burnedInCaptionsPromptBuilder = createPromptBuilder({
|
|
|
950
885
|
},
|
|
951
886
|
positiveIndicators: {
|
|
952
887
|
tag: "classify_as_captions",
|
|
953
|
-
content:
|
|
888
|
+
content: dedent`
|
|
954
889
|
ONLY classify as burned-in captions if:
|
|
955
890
|
- Text appears in multiple frames (not just 1-2 end frames)
|
|
956
891
|
- Text positioning is consistent across those frames
|
|
@@ -959,7 +894,7 @@ var burnedInCaptionsPromptBuilder = createPromptBuilder({
|
|
|
959
894
|
},
|
|
960
895
|
negativeIndicators: {
|
|
961
896
|
tag: "not_captions",
|
|
962
|
-
content:
|
|
897
|
+
content: dedent`
|
|
963
898
|
DO NOT classify as burned-in captions:
|
|
964
899
|
- Marketing taglines appearing only in final 1-2 frames
|
|
965
900
|
- Single words or phrases that don't change between frames
|
|
@@ -974,65 +909,90 @@ function buildUserPrompt(promptOverrides) {
|
|
|
974
909
|
return burnedInCaptionsPromptBuilder.build(promptOverrides);
|
|
975
910
|
}
|
|
976
911
|
var DEFAULT_PROVIDER = "openai";
|
|
912
|
+
async function fetchImageAsBase64(imageUrl, imageDownloadOptions) {
|
|
913
|
+
"use step";
|
|
914
|
+
const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
|
|
915
|
+
return downloadResult.base64Data;
|
|
916
|
+
}
|
|
917
|
+
async function analyzeStoryboard({
|
|
918
|
+
imageDataUrl,
|
|
919
|
+
provider,
|
|
920
|
+
modelId,
|
|
921
|
+
userPrompt,
|
|
922
|
+
systemPrompt
|
|
923
|
+
}) {
|
|
924
|
+
"use step";
|
|
925
|
+
const model = createLanguageModelFromConfig(provider, modelId);
|
|
926
|
+
const response = await generateObject({
|
|
927
|
+
model,
|
|
928
|
+
schema: burnedInCaptionsSchema,
|
|
929
|
+
experimental_telemetry: { isEnabled: true },
|
|
930
|
+
messages: [
|
|
931
|
+
{
|
|
932
|
+
role: "system",
|
|
933
|
+
content: systemPrompt
|
|
934
|
+
},
|
|
935
|
+
{
|
|
936
|
+
role: "user",
|
|
937
|
+
content: [
|
|
938
|
+
{ type: "text", text: userPrompt },
|
|
939
|
+
{ type: "image", image: imageDataUrl }
|
|
940
|
+
]
|
|
941
|
+
}
|
|
942
|
+
]
|
|
943
|
+
});
|
|
944
|
+
return {
|
|
945
|
+
result: response.object,
|
|
946
|
+
usage: {
|
|
947
|
+
inputTokens: response.usage.inputTokens,
|
|
948
|
+
outputTokens: response.usage.outputTokens,
|
|
949
|
+
totalTokens: response.usage.totalTokens,
|
|
950
|
+
reasoningTokens: response.usage.reasoningTokens,
|
|
951
|
+
cachedInputTokens: response.usage.cachedInputTokens
|
|
952
|
+
}
|
|
953
|
+
};
|
|
954
|
+
}
|
|
977
955
|
async function hasBurnedInCaptions(assetId, options = {}) {
|
|
956
|
+
"use workflow";
|
|
978
957
|
const {
|
|
979
958
|
provider = DEFAULT_PROVIDER,
|
|
980
959
|
model,
|
|
981
960
|
imageSubmissionMode = "url",
|
|
982
961
|
imageDownloadOptions,
|
|
983
962
|
promptOverrides,
|
|
984
|
-
...
|
|
963
|
+
...config
|
|
985
964
|
} = options;
|
|
986
965
|
const userPrompt = buildUserPrompt(promptOverrides);
|
|
987
|
-
const
|
|
988
|
-
{ ...
|
|
966
|
+
const workflowConfig = await createWorkflowConfig(
|
|
967
|
+
{ ...config, model },
|
|
989
968
|
provider
|
|
990
969
|
);
|
|
991
|
-
const { playbackId, policy } = await getPlaybackIdForAsset(
|
|
992
|
-
const signingContext =
|
|
970
|
+
const { playbackId, policy } = await getPlaybackIdForAsset(assetId);
|
|
971
|
+
const signingContext = getMuxSigningContextFromEnv();
|
|
993
972
|
if (policy === "signed" && !signingContext) {
|
|
994
973
|
throw new Error(
|
|
995
974
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
996
975
|
);
|
|
997
976
|
}
|
|
998
|
-
const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed"
|
|
999
|
-
const analyzeStoryboard = async (imageDataUrl) => {
|
|
1000
|
-
const response = await (0, import_ai.generateObject)({
|
|
1001
|
-
model: clients.languageModel.model,
|
|
1002
|
-
schema: burnedInCaptionsSchema,
|
|
1003
|
-
abortSignal: options.abortSignal,
|
|
1004
|
-
experimental_telemetry: { isEnabled: true },
|
|
1005
|
-
messages: [
|
|
1006
|
-
{
|
|
1007
|
-
role: "system",
|
|
1008
|
-
content: SYSTEM_PROMPT
|
|
1009
|
-
},
|
|
1010
|
-
{
|
|
1011
|
-
role: "user",
|
|
1012
|
-
content: [
|
|
1013
|
-
{ type: "text", text: userPrompt },
|
|
1014
|
-
{ type: "image", image: imageDataUrl }
|
|
1015
|
-
]
|
|
1016
|
-
}
|
|
1017
|
-
]
|
|
1018
|
-
});
|
|
1019
|
-
return {
|
|
1020
|
-
result: response.object,
|
|
1021
|
-
usage: {
|
|
1022
|
-
inputTokens: response.usage.inputTokens,
|
|
1023
|
-
outputTokens: response.usage.outputTokens,
|
|
1024
|
-
totalTokens: response.usage.totalTokens,
|
|
1025
|
-
reasoningTokens: response.usage.reasoningTokens,
|
|
1026
|
-
cachedInputTokens: response.usage.cachedInputTokens
|
|
1027
|
-
}
|
|
1028
|
-
};
|
|
1029
|
-
};
|
|
977
|
+
const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed");
|
|
1030
978
|
let analysisResponse;
|
|
1031
979
|
if (imageSubmissionMode === "base64") {
|
|
1032
|
-
const
|
|
1033
|
-
analysisResponse = await analyzeStoryboard(
|
|
980
|
+
const base64Data = await fetchImageAsBase64(imageUrl, imageDownloadOptions);
|
|
981
|
+
analysisResponse = await analyzeStoryboard({
|
|
982
|
+
imageDataUrl: base64Data,
|
|
983
|
+
provider: workflowConfig.provider,
|
|
984
|
+
modelId: workflowConfig.modelId,
|
|
985
|
+
userPrompt,
|
|
986
|
+
systemPrompt: SYSTEM_PROMPT
|
|
987
|
+
});
|
|
1034
988
|
} else {
|
|
1035
|
-
analysisResponse = await analyzeStoryboard(
|
|
989
|
+
analysisResponse = await analyzeStoryboard({
|
|
990
|
+
imageDataUrl: imageUrl,
|
|
991
|
+
provider: workflowConfig.provider,
|
|
992
|
+
modelId: workflowConfig.modelId,
|
|
993
|
+
userPrompt,
|
|
994
|
+
systemPrompt: SYSTEM_PROMPT
|
|
995
|
+
});
|
|
1036
996
|
}
|
|
1037
997
|
if (!analysisResponse.result) {
|
|
1038
998
|
throw new Error("No analysis result received from AI provider");
|
|
@@ -1048,8 +1008,8 @@ async function hasBurnedInCaptions(assetId, options = {}) {
|
|
|
1048
1008
|
}
|
|
1049
1009
|
|
|
1050
1010
|
// src/workflows/chapters.ts
|
|
1051
|
-
|
|
1052
|
-
|
|
1011
|
+
import { generateObject as generateObject2 } from "ai";
|
|
1012
|
+
import { z as z3 } from "zod";
|
|
1053
1013
|
|
|
1054
1014
|
// src/lib/retry.ts
|
|
1055
1015
|
var DEFAULT_RETRY_OPTIONS = {
|
|
@@ -1081,25 +1041,50 @@ async function withRetry(fn, {
|
|
|
1081
1041
|
if (isLastAttempt || !shouldRetry(lastError, attempt + 1)) {
|
|
1082
1042
|
throw lastError;
|
|
1083
1043
|
}
|
|
1084
|
-
const
|
|
1044
|
+
const delay = calculateDelay(attempt + 1, baseDelay, maxDelay);
|
|
1085
1045
|
console.warn(
|
|
1086
|
-
`Attempt ${attempt + 1} failed: ${lastError.message}. Retrying in ${Math.round(
|
|
1046
|
+
`Attempt ${attempt + 1} failed: ${lastError.message}. Retrying in ${Math.round(delay)}ms...`
|
|
1087
1047
|
);
|
|
1088
|
-
await new Promise((resolve) => setTimeout(resolve,
|
|
1048
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
1089
1049
|
}
|
|
1090
1050
|
}
|
|
1091
1051
|
throw lastError || new Error("Retry failed with unknown error");
|
|
1092
1052
|
}
|
|
1093
1053
|
|
|
1094
1054
|
// src/workflows/chapters.ts
|
|
1095
|
-
var chapterSchema =
|
|
1096
|
-
startTime:
|
|
1097
|
-
title:
|
|
1055
|
+
var chapterSchema = z3.object({
|
|
1056
|
+
startTime: z3.number(),
|
|
1057
|
+
title: z3.string()
|
|
1098
1058
|
});
|
|
1099
|
-
var chaptersSchema =
|
|
1100
|
-
chapters:
|
|
1059
|
+
var chaptersSchema = z3.object({
|
|
1060
|
+
chapters: z3.array(chapterSchema)
|
|
1101
1061
|
});
|
|
1102
|
-
|
|
1062
|
+
async function generateChaptersWithAI({
|
|
1063
|
+
provider,
|
|
1064
|
+
modelId,
|
|
1065
|
+
timestampedTranscript,
|
|
1066
|
+
systemPrompt
|
|
1067
|
+
}) {
|
|
1068
|
+
"use step";
|
|
1069
|
+
const model = createLanguageModelFromConfig(provider, modelId);
|
|
1070
|
+
const response = await withRetry(
|
|
1071
|
+
() => generateObject2({
|
|
1072
|
+
model,
|
|
1073
|
+
schema: chaptersSchema,
|
|
1074
|
+
messages: [
|
|
1075
|
+
{
|
|
1076
|
+
role: "system",
|
|
1077
|
+
content: systemPrompt
|
|
1078
|
+
},
|
|
1079
|
+
{
|
|
1080
|
+
role: "user",
|
|
1081
|
+
content: timestampedTranscript
|
|
1082
|
+
}
|
|
1083
|
+
]
|
|
1084
|
+
})
|
|
1085
|
+
);
|
|
1086
|
+
return response.object;
|
|
1087
|
+
}
|
|
1103
1088
|
var SYSTEM_PROMPT2 = `Your role is to segment the following captions into chunked chapters, summarising each chapter with a title.
|
|
1104
1089
|
|
|
1105
1090
|
Analyze the transcript and create logical chapter breaks based on topic changes, major transitions, or distinct sections of content. Each chapter should represent a meaningful segment of the video.
|
|
@@ -1121,10 +1106,11 @@ Important rules:
|
|
|
1121
1106
|
- Do not include any text before or after the JSON
|
|
1122
1107
|
- The JSON must be valid and parseable`;
|
|
1123
1108
|
async function generateChapters(assetId, languageCode, options = {}) {
|
|
1124
|
-
|
|
1125
|
-
const
|
|
1126
|
-
const
|
|
1127
|
-
const
|
|
1109
|
+
"use workflow";
|
|
1110
|
+
const { provider = "openai", model } = options;
|
|
1111
|
+
const config = await createWorkflowConfig({ ...options, model }, provider);
|
|
1112
|
+
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(assetId);
|
|
1113
|
+
const signingContext = getMuxSigningContextFromEnv();
|
|
1128
1114
|
if (policy === "signed" && !signingContext) {
|
|
1129
1115
|
throw new Error(
|
|
1130
1116
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
@@ -1134,7 +1120,7 @@ async function generateChapters(assetId, languageCode, options = {}) {
|
|
|
1134
1120
|
languageCode,
|
|
1135
1121
|
cleanTranscript: false,
|
|
1136
1122
|
// keep timestamps for chapter segmentation
|
|
1137
|
-
|
|
1123
|
+
shouldSign: policy === "signed"
|
|
1138
1124
|
});
|
|
1139
1125
|
if (!transcriptResult.track || !transcriptResult.transcriptText) {
|
|
1140
1126
|
const availableLanguages = getReadyTextTracks(assetData).map((t) => t.language_code).filter(Boolean).join(", ");
|
|
@@ -1148,24 +1134,12 @@ async function generateChapters(assetId, languageCode, options = {}) {
|
|
|
1148
1134
|
}
|
|
1149
1135
|
let chaptersData = null;
|
|
1150
1136
|
try {
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
{
|
|
1158
|
-
role: "system",
|
|
1159
|
-
content: SYSTEM_PROMPT2
|
|
1160
|
-
},
|
|
1161
|
-
{
|
|
1162
|
-
role: "user",
|
|
1163
|
-
content: timestampedTranscript
|
|
1164
|
-
}
|
|
1165
|
-
]
|
|
1166
|
-
})
|
|
1167
|
-
);
|
|
1168
|
-
chaptersData = response.object;
|
|
1137
|
+
chaptersData = await generateChaptersWithAI({
|
|
1138
|
+
provider: config.provider,
|
|
1139
|
+
modelId: config.modelId,
|
|
1140
|
+
timestampedTranscript,
|
|
1141
|
+
systemPrompt: SYSTEM_PROMPT2
|
|
1142
|
+
});
|
|
1169
1143
|
} catch (error) {
|
|
1170
1144
|
throw new Error(
|
|
1171
1145
|
`Failed to generate chapters with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
@@ -1189,14 +1163,7 @@ async function generateChapters(assetId, languageCode, options = {}) {
|
|
|
1189
1163
|
}
|
|
1190
1164
|
|
|
1191
1165
|
// src/workflows/embeddings.ts
|
|
1192
|
-
|
|
1193
|
-
var DEFAULT_PROVIDER3 = "openai";
|
|
1194
|
-
var DEFAULT_CHUNKING_STRATEGY = {
|
|
1195
|
-
type: "token",
|
|
1196
|
-
maxTokens: 500,
|
|
1197
|
-
overlap: 100
|
|
1198
|
-
};
|
|
1199
|
-
var DEFAULT_BATCH_SIZE = 5;
|
|
1166
|
+
import { embed } from "ai";
|
|
1200
1167
|
function averageEmbeddings(embeddings) {
|
|
1201
1168
|
if (embeddings.length === 0) {
|
|
1202
1169
|
return [];
|
|
@@ -1213,51 +1180,41 @@ function averageEmbeddings(embeddings) {
|
|
|
1213
1180
|
}
|
|
1214
1181
|
return averaged;
|
|
1215
1182
|
}
|
|
1216
|
-
async function
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
})
|
|
1239
|
-
);
|
|
1240
|
-
results.push(...batchResults);
|
|
1241
|
-
}
|
|
1242
|
-
return results;
|
|
1183
|
+
async function generateSingleChunkEmbedding({
|
|
1184
|
+
chunk,
|
|
1185
|
+
provider,
|
|
1186
|
+
modelId
|
|
1187
|
+
}) {
|
|
1188
|
+
"use step";
|
|
1189
|
+
const model = createEmbeddingModelFromConfig(provider, modelId);
|
|
1190
|
+
const response = await withRetry(
|
|
1191
|
+
() => embed({
|
|
1192
|
+
model,
|
|
1193
|
+
value: chunk.text
|
|
1194
|
+
})
|
|
1195
|
+
);
|
|
1196
|
+
return {
|
|
1197
|
+
chunkId: chunk.id,
|
|
1198
|
+
embedding: response.embedding,
|
|
1199
|
+
metadata: {
|
|
1200
|
+
startTime: chunk.startTime,
|
|
1201
|
+
endTime: chunk.endTime,
|
|
1202
|
+
tokenCount: chunk.tokenCount
|
|
1203
|
+
}
|
|
1204
|
+
};
|
|
1243
1205
|
}
|
|
1244
1206
|
async function generateVideoEmbeddings(assetId, options = {}) {
|
|
1207
|
+
"use workflow";
|
|
1245
1208
|
const {
|
|
1246
|
-
provider =
|
|
1209
|
+
provider = "openai",
|
|
1247
1210
|
model,
|
|
1248
1211
|
languageCode,
|
|
1249
|
-
chunkingStrategy =
|
|
1250
|
-
batchSize =
|
|
1251
|
-
abortSignal
|
|
1212
|
+
chunkingStrategy = { type: "token", maxTokens: 500, overlap: 100 },
|
|
1213
|
+
batchSize = 5
|
|
1252
1214
|
} = options;
|
|
1253
|
-
const credentials = validateCredentials(options, provider === "google" ? "google" : "openai");
|
|
1254
|
-
const muxClient = createMuxClient(credentials);
|
|
1255
1215
|
const embeddingModel = resolveEmbeddingModel({ ...options, provider, model });
|
|
1256
|
-
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(
|
|
1257
|
-
|
|
1258
|
-
assetId
|
|
1259
|
-
);
|
|
1260
|
-
const signingContext = resolveSigningContext(options);
|
|
1216
|
+
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(assetId);
|
|
1217
|
+
const signingContext = getMuxSigningContextFromEnv();
|
|
1261
1218
|
if (policy === "signed" && !signingContext) {
|
|
1262
1219
|
throw new Error(
|
|
1263
1220
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
@@ -1267,7 +1224,7 @@ async function generateVideoEmbeddings(assetId, options = {}) {
|
|
|
1267
1224
|
const transcriptResult = await fetchTranscriptForAsset(assetData, playbackId, {
|
|
1268
1225
|
languageCode,
|
|
1269
1226
|
cleanTranscript: !useVttChunking,
|
|
1270
|
-
|
|
1227
|
+
shouldSign: policy === "signed"
|
|
1271
1228
|
});
|
|
1272
1229
|
if (!transcriptResult.track || !transcriptResult.transcriptText) {
|
|
1273
1230
|
const availableLanguages = getReadyTextTracks(assetData).map((t) => t.language_code).filter(Boolean).join(", ");
|
|
@@ -1287,14 +1244,21 @@ async function generateVideoEmbeddings(assetId, options = {}) {
|
|
|
1287
1244
|
if (chunks.length === 0) {
|
|
1288
1245
|
throw new Error("No chunks generated from transcript");
|
|
1289
1246
|
}
|
|
1290
|
-
|
|
1247
|
+
const chunkEmbeddings = [];
|
|
1291
1248
|
try {
|
|
1292
|
-
|
|
1293
|
-
chunks,
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1249
|
+
for (let i = 0; i < chunks.length; i += batchSize) {
|
|
1250
|
+
const batch = chunks.slice(i, i + batchSize);
|
|
1251
|
+
const batchResults = await Promise.all(
|
|
1252
|
+
batch.map(
|
|
1253
|
+
(chunk) => generateSingleChunkEmbedding({
|
|
1254
|
+
chunk,
|
|
1255
|
+
provider: embeddingModel.provider,
|
|
1256
|
+
modelId: embeddingModel.modelId
|
|
1257
|
+
})
|
|
1258
|
+
)
|
|
1259
|
+
);
|
|
1260
|
+
chunkEmbeddings.push(...batchResults);
|
|
1261
|
+
}
|
|
1298
1262
|
} catch (error) {
|
|
1299
1263
|
throw new Error(
|
|
1300
1264
|
`Failed to generate embeddings with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
@@ -1326,7 +1290,7 @@ var DEFAULT_THRESHOLDS = {
|
|
|
1326
1290
|
sexual: 0.7,
|
|
1327
1291
|
violence: 0.8
|
|
1328
1292
|
};
|
|
1329
|
-
var
|
|
1293
|
+
var DEFAULT_PROVIDER2 = "openai";
|
|
1330
1294
|
var HIVE_ENDPOINT = "https://api.thehive.ai/api/v2/task/sync";
|
|
1331
1295
|
var HIVE_SEXUAL_CATEGORIES = [
|
|
1332
1296
|
"general_nsfw",
|
|
@@ -1364,6 +1328,7 @@ var HIVE_VIOLENCE_CATEGORIES = [
|
|
|
1364
1328
|
"garm_death_injury_or_military_conflict"
|
|
1365
1329
|
];
|
|
1366
1330
|
async function processConcurrently(items, processor, maxConcurrent = 5) {
|
|
1331
|
+
"use step";
|
|
1367
1332
|
const results = [];
|
|
1368
1333
|
for (let i = 0; i < items.length; i += maxConcurrent) {
|
|
1369
1334
|
const batch = items.slice(i, i + maxConcurrent);
|
|
@@ -1373,11 +1338,14 @@ async function processConcurrently(items, processor, maxConcurrent = 5) {
|
|
|
1373
1338
|
}
|
|
1374
1339
|
return results;
|
|
1375
1340
|
}
|
|
1376
|
-
async function requestOpenAIModeration(imageUrls,
|
|
1341
|
+
async function requestOpenAIModeration(imageUrls, model, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
|
|
1342
|
+
"use step";
|
|
1377
1343
|
const targetUrls = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map(
|
|
1378
|
-
(img) => ({ url: img.url, image: img.base64Data })
|
|
1379
|
-
) : imageUrls.map((url) => ({ url, image: url }));
|
|
1344
|
+
(img) => ({ url: img.url, image: img.base64Data, model })
|
|
1345
|
+
) : imageUrls.map((url) => ({ url, image: url, model }));
|
|
1380
1346
|
const moderate = async (entry) => {
|
|
1347
|
+
"use step";
|
|
1348
|
+
const apiKey = getApiKeyFromEnv("openai");
|
|
1381
1349
|
try {
|
|
1382
1350
|
const res = await fetch("https://api.openai.com/v1/moderations", {
|
|
1383
1351
|
method: "POST",
|
|
@@ -1386,7 +1354,7 @@ async function requestOpenAIModeration(imageUrls, apiKey, model, maxConcurrent =
|
|
|
1386
1354
|
"Authorization": `Bearer ${apiKey}`
|
|
1387
1355
|
},
|
|
1388
1356
|
body: JSON.stringify({
|
|
1389
|
-
model,
|
|
1357
|
+
model: entry.model,
|
|
1390
1358
|
input: [
|
|
1391
1359
|
{
|
|
1392
1360
|
type: "image_url",
|
|
@@ -1429,7 +1397,8 @@ function getHiveCategoryScores(classes, categoryNames) {
|
|
|
1429
1397
|
const scores = categoryNames.map((category) => scoreMap[category] || 0);
|
|
1430
1398
|
return Math.max(...scores, 0);
|
|
1431
1399
|
}
|
|
1432
|
-
async function requestHiveModeration(imageUrls,
|
|
1400
|
+
async function requestHiveModeration(imageUrls, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
|
|
1401
|
+
"use step";
|
|
1433
1402
|
const targets = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map((img) => ({
|
|
1434
1403
|
url: img.url,
|
|
1435
1404
|
source: {
|
|
@@ -1442,6 +1411,8 @@ async function requestHiveModeration(imageUrls, apiKey, maxConcurrent = 5, submi
|
|
|
1442
1411
|
source: { kind: "url", value: url }
|
|
1443
1412
|
}));
|
|
1444
1413
|
const moderate = async (entry) => {
|
|
1414
|
+
"use step";
|
|
1415
|
+
const apiKey = getApiKeyFromEnv("hive");
|
|
1445
1416
|
try {
|
|
1446
1417
|
const formData = new FormData();
|
|
1447
1418
|
if (entry.source.kind === "url") {
|
|
@@ -1487,8 +1458,9 @@ async function requestHiveModeration(imageUrls, apiKey, maxConcurrent = 5, submi
|
|
|
1487
1458
|
return processConcurrently(targets, moderate, maxConcurrent);
|
|
1488
1459
|
}
|
|
1489
1460
|
async function getModerationScores(assetId, options = {}) {
|
|
1461
|
+
"use workflow";
|
|
1490
1462
|
const {
|
|
1491
|
-
provider =
|
|
1463
|
+
provider = DEFAULT_PROVIDER2,
|
|
1492
1464
|
model = provider === "openai" ? "omni-moderation-latest" : void 0,
|
|
1493
1465
|
thresholds = DEFAULT_THRESHOLDS,
|
|
1494
1466
|
thumbnailInterval = 10,
|
|
@@ -1497,11 +1469,9 @@ async function getModerationScores(assetId, options = {}) {
|
|
|
1497
1469
|
imageSubmissionMode = "url",
|
|
1498
1470
|
imageDownloadOptions
|
|
1499
1471
|
} = options;
|
|
1500
|
-
const
|
|
1501
|
-
const muxClient = createMuxClient(credentials);
|
|
1502
|
-
const { asset, playbackId, policy } = await getPlaybackIdForAsset(muxClient, assetId);
|
|
1472
|
+
const { asset, playbackId, policy } = await getPlaybackIdForAsset(assetId);
|
|
1503
1473
|
const duration = asset.duration || 0;
|
|
1504
|
-
const signingContext =
|
|
1474
|
+
const signingContext = getMuxSigningContextFromEnv();
|
|
1505
1475
|
if (policy === "signed" && !signingContext) {
|
|
1506
1476
|
throw new Error(
|
|
1507
1477
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
@@ -1510,30 +1480,20 @@ async function getModerationScores(assetId, options = {}) {
|
|
|
1510
1480
|
const thumbnailUrls = await getThumbnailUrls(playbackId, duration, {
|
|
1511
1481
|
interval: thumbnailInterval,
|
|
1512
1482
|
width: thumbnailWidth,
|
|
1513
|
-
|
|
1483
|
+
shouldSign: policy === "signed"
|
|
1514
1484
|
});
|
|
1515
1485
|
let thumbnailScores;
|
|
1516
1486
|
if (provider === "openai") {
|
|
1517
|
-
const apiKey = credentials.openaiApiKey;
|
|
1518
|
-
if (!apiKey) {
|
|
1519
|
-
throw new Error("OpenAI API key is required for moderation. Set OPENAI_API_KEY or pass openaiApiKey.");
|
|
1520
|
-
}
|
|
1521
1487
|
thumbnailScores = await requestOpenAIModeration(
|
|
1522
1488
|
thumbnailUrls,
|
|
1523
|
-
apiKey,
|
|
1524
1489
|
model || "omni-moderation-latest",
|
|
1525
1490
|
maxConcurrent,
|
|
1526
1491
|
imageSubmissionMode,
|
|
1527
1492
|
imageDownloadOptions
|
|
1528
1493
|
);
|
|
1529
1494
|
} else if (provider === "hive") {
|
|
1530
|
-
const hiveApiKey = options.hiveApiKey || env_default.HIVE_API_KEY;
|
|
1531
|
-
if (!hiveApiKey) {
|
|
1532
|
-
throw new Error("Hive API key is required for moderation. Set HIVE_API_KEY or pass hiveApiKey.");
|
|
1533
|
-
}
|
|
1534
1495
|
thumbnailScores = await requestHiveModeration(
|
|
1535
1496
|
thumbnailUrls,
|
|
1536
|
-
hiveApiKey,
|
|
1537
1497
|
maxConcurrent,
|
|
1538
1498
|
imageSubmissionMode,
|
|
1539
1499
|
imageDownloadOptions
|
|
@@ -1557,17 +1517,18 @@ async function getModerationScores(assetId, options = {}) {
|
|
|
1557
1517
|
}
|
|
1558
1518
|
|
|
1559
1519
|
// src/workflows/summarization.ts
|
|
1560
|
-
|
|
1561
|
-
|
|
1520
|
+
import { generateObject as generateObject3 } from "ai";
|
|
1521
|
+
import dedent2 from "dedent";
|
|
1522
|
+
import { z as z4 } from "zod";
|
|
1562
1523
|
var SUMMARY_KEYWORD_LIMIT = 10;
|
|
1563
|
-
var summarySchema =
|
|
1564
|
-
keywords:
|
|
1565
|
-
title:
|
|
1566
|
-
description:
|
|
1524
|
+
var summarySchema = z4.object({
|
|
1525
|
+
keywords: z4.array(z4.string()),
|
|
1526
|
+
title: z4.string(),
|
|
1527
|
+
description: z4.string()
|
|
1567
1528
|
});
|
|
1568
1529
|
var TONE_INSTRUCTIONS = {
|
|
1569
|
-
|
|
1570
|
-
|
|
1530
|
+
neutral: "Provide a clear, straightforward analysis.",
|
|
1531
|
+
playful: "Channel your inner diva! Answer with maximum sass, wit, and playful attitude. Don't hold back - be cheeky, clever, and delightfully snarky. Make it pop!",
|
|
1571
1532
|
professional: "Provide a professional, executive-level analysis suitable for business reporting."
|
|
1572
1533
|
};
|
|
1573
1534
|
var summarizationPromptBuilder = createPromptBuilder({
|
|
@@ -1578,7 +1539,7 @@ var summarizationPromptBuilder = createPromptBuilder({
|
|
|
1578
1539
|
},
|
|
1579
1540
|
title: {
|
|
1580
1541
|
tag: "title_requirements",
|
|
1581
|
-
content:
|
|
1542
|
+
content: dedent2`
|
|
1582
1543
|
A short, compelling headline that immediately communicates the subject or action.
|
|
1583
1544
|
Aim for brevity - typically under 10 words. Think of how a news headline or video card title would read.
|
|
1584
1545
|
Start with the primary subject, action, or topic - never begin with "A video of" or similar phrasing.
|
|
@@ -1586,7 +1547,7 @@ var summarizationPromptBuilder = createPromptBuilder({
|
|
|
1586
1547
|
},
|
|
1587
1548
|
description: {
|
|
1588
1549
|
tag: "description_requirements",
|
|
1589
|
-
content:
|
|
1550
|
+
content: dedent2`
|
|
1590
1551
|
A concise summary (2-4 sentences) that describes what happens across the video.
|
|
1591
1552
|
Cover the main subjects, actions, setting, and any notable progression visible across frames.
|
|
1592
1553
|
Write in present tense. Be specific about observable details rather than making assumptions.
|
|
@@ -1594,7 +1555,7 @@ var summarizationPromptBuilder = createPromptBuilder({
|
|
|
1594
1555
|
},
|
|
1595
1556
|
keywords: {
|
|
1596
1557
|
tag: "keywords_requirements",
|
|
1597
|
-
content:
|
|
1558
|
+
content: dedent2`
|
|
1598
1559
|
Specific, searchable terms (up to 10) that capture:
|
|
1599
1560
|
- Primary subjects (people, animals, objects)
|
|
1600
1561
|
- Actions and activities being performed
|
|
@@ -1606,7 +1567,7 @@ var summarizationPromptBuilder = createPromptBuilder({
|
|
|
1606
1567
|
},
|
|
1607
1568
|
qualityGuidelines: {
|
|
1608
1569
|
tag: "quality_guidelines",
|
|
1609
|
-
content:
|
|
1570
|
+
content: dedent2`
|
|
1610
1571
|
- Examine all frames to understand the full context and progression
|
|
1611
1572
|
- Be precise: "golden retriever" is better than "dog" when identifiable
|
|
1612
1573
|
- Capture the narrative: what begins, develops, and concludes
|
|
@@ -1615,7 +1576,7 @@ var summarizationPromptBuilder = createPromptBuilder({
|
|
|
1615
1576
|
},
|
|
1616
1577
|
sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
|
|
1617
1578
|
});
|
|
1618
|
-
var SYSTEM_PROMPT3 =
|
|
1579
|
+
var SYSTEM_PROMPT3 = dedent2`
|
|
1619
1580
|
<role>
|
|
1620
1581
|
You are a video content analyst specializing in storyboard interpretation and multimodal analysis.
|
|
1621
1582
|
</role>
|
|
@@ -1647,7 +1608,29 @@ var SYSTEM_PROMPT3 = dedent_default`
|
|
|
1647
1608
|
- Only describe what is clearly observable in the frames or explicitly stated in the transcript
|
|
1648
1609
|
- Do not fabricate details or make unsupported assumptions
|
|
1649
1610
|
- Return structured data matching the requested schema
|
|
1650
|
-
</constraints
|
|
1611
|
+
</constraints>
|
|
1612
|
+
|
|
1613
|
+
<tone_guidance>
|
|
1614
|
+
Pay special attention to the <tone> section and lean heavily into those instructions.
|
|
1615
|
+
Adapt your entire analysis and writing style to match the specified tone - this should influence
|
|
1616
|
+
your word choice, personality, formality level, and overall presentation of the content.
|
|
1617
|
+
The tone instructions are not suggestions but core requirements for how you should express yourself.
|
|
1618
|
+
</tone_guidance>
|
|
1619
|
+
|
|
1620
|
+
<language_guidelines>
|
|
1621
|
+
AVOID these meta-descriptive phrases that reference the medium rather than the content:
|
|
1622
|
+
- "The image shows..." / "The storyboard shows..."
|
|
1623
|
+
- "In this video..." / "This video features..."
|
|
1624
|
+
- "The frames depict..." / "The footage shows..."
|
|
1625
|
+
- "We can see..." / "You can see..."
|
|
1626
|
+
- "The clip shows..." / "The scene shows..."
|
|
1627
|
+
|
|
1628
|
+
INSTEAD, describe the content directly:
|
|
1629
|
+
- BAD: "The video shows a chef preparing a meal"
|
|
1630
|
+
- GOOD: "A chef prepares a meal in a professional kitchen"
|
|
1631
|
+
|
|
1632
|
+
Write as if describing reality, not describing a recording of reality.
|
|
1633
|
+
</language_guidelines>`;
|
|
1651
1634
|
function buildUserPrompt2({
|
|
1652
1635
|
tone,
|
|
1653
1636
|
transcriptText,
|
|
@@ -1661,8 +1644,37 @@ function buildUserPrompt2({
|
|
|
1661
1644
|
}
|
|
1662
1645
|
return summarizationPromptBuilder.buildWithContext(promptOverrides, contextSections);
|
|
1663
1646
|
}
|
|
1664
|
-
|
|
1665
|
-
|
|
1647
|
+
async function analyzeStoryboard2(imageDataUrl, provider, modelId, userPrompt, systemPrompt) {
|
|
1648
|
+
"use step";
|
|
1649
|
+
const model = createLanguageModelFromConfig(provider, modelId);
|
|
1650
|
+
const response = await generateObject3({
|
|
1651
|
+
model,
|
|
1652
|
+
schema: summarySchema,
|
|
1653
|
+
messages: [
|
|
1654
|
+
{
|
|
1655
|
+
role: "system",
|
|
1656
|
+
content: systemPrompt
|
|
1657
|
+
},
|
|
1658
|
+
{
|
|
1659
|
+
role: "user",
|
|
1660
|
+
content: [
|
|
1661
|
+
{ type: "text", text: userPrompt },
|
|
1662
|
+
{ type: "image", image: imageDataUrl }
|
|
1663
|
+
]
|
|
1664
|
+
}
|
|
1665
|
+
]
|
|
1666
|
+
});
|
|
1667
|
+
return {
|
|
1668
|
+
result: response.object,
|
|
1669
|
+
usage: {
|
|
1670
|
+
inputTokens: response.usage.inputTokens,
|
|
1671
|
+
outputTokens: response.usage.outputTokens,
|
|
1672
|
+
totalTokens: response.usage.totalTokens,
|
|
1673
|
+
reasoningTokens: response.usage.reasoningTokens,
|
|
1674
|
+
cachedInputTokens: response.usage.cachedInputTokens
|
|
1675
|
+
}
|
|
1676
|
+
};
|
|
1677
|
+
}
|
|
1666
1678
|
function normalizeKeywords(keywords) {
|
|
1667
1679
|
if (!Array.isArray(keywords) || keywords.length === 0) {
|
|
1668
1680
|
return [];
|
|
@@ -1687,23 +1699,24 @@ function normalizeKeywords(keywords) {
|
|
|
1687
1699
|
return normalized;
|
|
1688
1700
|
}
|
|
1689
1701
|
async function getSummaryAndTags(assetId, options) {
|
|
1702
|
+
"use workflow";
|
|
1690
1703
|
const {
|
|
1691
|
-
provider =
|
|
1704
|
+
provider = "openai",
|
|
1692
1705
|
model,
|
|
1693
|
-
tone =
|
|
1706
|
+
tone = "neutral",
|
|
1694
1707
|
includeTranscript = true,
|
|
1695
1708
|
cleanTranscript = true,
|
|
1696
1709
|
imageSubmissionMode = "url",
|
|
1697
1710
|
imageDownloadOptions,
|
|
1698
|
-
abortSignal,
|
|
1711
|
+
abortSignal: _abortSignal,
|
|
1699
1712
|
promptOverrides
|
|
1700
1713
|
} = options ?? {};
|
|
1701
|
-
const
|
|
1714
|
+
const config = await createWorkflowConfig(
|
|
1702
1715
|
{ ...options, model },
|
|
1703
1716
|
provider
|
|
1704
1717
|
);
|
|
1705
|
-
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(
|
|
1706
|
-
const signingContext =
|
|
1718
|
+
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(assetId);
|
|
1719
|
+
const signingContext = getMuxSigningContextFromEnv();
|
|
1707
1720
|
if (policy === "signed" && !signingContext) {
|
|
1708
1721
|
throw new Error(
|
|
1709
1722
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
@@ -1711,7 +1724,7 @@ async function getSummaryAndTags(assetId, options) {
|
|
|
1711
1724
|
}
|
|
1712
1725
|
const transcriptText = includeTranscript ? (await fetchTranscriptForAsset(assetData, playbackId, {
|
|
1713
1726
|
cleanTranscript,
|
|
1714
|
-
|
|
1727
|
+
shouldSign: policy === "signed"
|
|
1715
1728
|
})).transcriptText : "";
|
|
1716
1729
|
const userPrompt = buildUserPrompt2({
|
|
1717
1730
|
tone,
|
|
@@ -1719,67 +1732,214 @@ async function getSummaryAndTags(assetId, options) {
|
|
|
1719
1732
|
isCleanTranscript: cleanTranscript,
|
|
1720
1733
|
promptOverrides
|
|
1721
1734
|
});
|
|
1722
|
-
const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed"
|
|
1723
|
-
|
|
1724
|
-
const response = await (0, import_ai4.generateObject)({
|
|
1725
|
-
model: clients.languageModel.model,
|
|
1726
|
-
schema: summarySchema,
|
|
1727
|
-
abortSignal,
|
|
1728
|
-
messages: [
|
|
1729
|
-
{
|
|
1730
|
-
role: "system",
|
|
1731
|
-
content: SYSTEM_PROMPT3
|
|
1732
|
-
},
|
|
1733
|
-
{
|
|
1734
|
-
role: "user",
|
|
1735
|
-
content: [
|
|
1736
|
-
{ type: "text", text: userPrompt },
|
|
1737
|
-
{ type: "image", image: imageDataUrl }
|
|
1738
|
-
]
|
|
1739
|
-
}
|
|
1740
|
-
]
|
|
1741
|
-
});
|
|
1742
|
-
return response.object;
|
|
1743
|
-
};
|
|
1744
|
-
let aiAnalysis = null;
|
|
1735
|
+
const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed");
|
|
1736
|
+
let analysisResponse;
|
|
1745
1737
|
try {
|
|
1746
1738
|
if (imageSubmissionMode === "base64") {
|
|
1747
1739
|
const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
|
|
1748
|
-
|
|
1740
|
+
analysisResponse = await analyzeStoryboard2(
|
|
1741
|
+
downloadResult.base64Data,
|
|
1742
|
+
config.provider,
|
|
1743
|
+
config.modelId,
|
|
1744
|
+
userPrompt,
|
|
1745
|
+
SYSTEM_PROMPT3
|
|
1746
|
+
);
|
|
1749
1747
|
} else {
|
|
1750
|
-
|
|
1748
|
+
analysisResponse = await withRetry(() => analyzeStoryboard2(imageUrl, config.provider, config.modelId, userPrompt, SYSTEM_PROMPT3));
|
|
1751
1749
|
}
|
|
1752
1750
|
} catch (error) {
|
|
1753
1751
|
throw new Error(
|
|
1754
1752
|
`Failed to analyze video content with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
1755
1753
|
);
|
|
1756
1754
|
}
|
|
1757
|
-
if (!
|
|
1755
|
+
if (!analysisResponse.result) {
|
|
1758
1756
|
throw new Error(`Failed to analyze video content for asset ${assetId}`);
|
|
1759
1757
|
}
|
|
1760
|
-
if (!
|
|
1758
|
+
if (!analysisResponse.result.title) {
|
|
1761
1759
|
throw new Error(`Failed to generate title for asset ${assetId}`);
|
|
1762
1760
|
}
|
|
1763
|
-
if (!
|
|
1761
|
+
if (!analysisResponse.result.description) {
|
|
1764
1762
|
throw new Error(`Failed to generate description for asset ${assetId}`);
|
|
1765
1763
|
}
|
|
1766
1764
|
return {
|
|
1767
1765
|
assetId,
|
|
1768
|
-
title:
|
|
1769
|
-
description:
|
|
1770
|
-
tags: normalizeKeywords(
|
|
1771
|
-
storyboardUrl: imageUrl
|
|
1766
|
+
title: analysisResponse.result.title,
|
|
1767
|
+
description: analysisResponse.result.description,
|
|
1768
|
+
tags: normalizeKeywords(analysisResponse.result.keywords),
|
|
1769
|
+
storyboardUrl: imageUrl,
|
|
1770
|
+
usage: analysisResponse.usage,
|
|
1771
|
+
transcriptText: transcriptText || void 0
|
|
1772
1772
|
};
|
|
1773
1773
|
}
|
|
1774
1774
|
|
|
1775
1775
|
// src/workflows/translate-audio.ts
|
|
1776
|
-
|
|
1777
|
-
|
|
1778
|
-
|
|
1779
|
-
var
|
|
1776
|
+
import Mux3 from "@mux/mux-node";
|
|
1777
|
+
|
|
1778
|
+
// src/lib/language-codes.ts
|
|
1779
|
+
var ISO639_1_TO_3 = {
|
|
1780
|
+
// Major world languages
|
|
1781
|
+
en: "eng",
|
|
1782
|
+
// English
|
|
1783
|
+
es: "spa",
|
|
1784
|
+
// Spanish
|
|
1785
|
+
fr: "fra",
|
|
1786
|
+
// French
|
|
1787
|
+
de: "deu",
|
|
1788
|
+
// German
|
|
1789
|
+
it: "ita",
|
|
1790
|
+
// Italian
|
|
1791
|
+
pt: "por",
|
|
1792
|
+
// Portuguese
|
|
1793
|
+
ru: "rus",
|
|
1794
|
+
// Russian
|
|
1795
|
+
zh: "zho",
|
|
1796
|
+
// Chinese
|
|
1797
|
+
ja: "jpn",
|
|
1798
|
+
// Japanese
|
|
1799
|
+
ko: "kor",
|
|
1800
|
+
// Korean
|
|
1801
|
+
ar: "ara",
|
|
1802
|
+
// Arabic
|
|
1803
|
+
hi: "hin",
|
|
1804
|
+
// Hindi
|
|
1805
|
+
// European languages
|
|
1806
|
+
nl: "nld",
|
|
1807
|
+
// Dutch
|
|
1808
|
+
pl: "pol",
|
|
1809
|
+
// Polish
|
|
1810
|
+
sv: "swe",
|
|
1811
|
+
// Swedish
|
|
1812
|
+
da: "dan",
|
|
1813
|
+
// Danish
|
|
1814
|
+
no: "nor",
|
|
1815
|
+
// Norwegian
|
|
1816
|
+
fi: "fin",
|
|
1817
|
+
// Finnish
|
|
1818
|
+
el: "ell",
|
|
1819
|
+
// Greek
|
|
1820
|
+
cs: "ces",
|
|
1821
|
+
// Czech
|
|
1822
|
+
hu: "hun",
|
|
1823
|
+
// Hungarian
|
|
1824
|
+
ro: "ron",
|
|
1825
|
+
// Romanian
|
|
1826
|
+
bg: "bul",
|
|
1827
|
+
// Bulgarian
|
|
1828
|
+
hr: "hrv",
|
|
1829
|
+
// Croatian
|
|
1830
|
+
sk: "slk",
|
|
1831
|
+
// Slovak
|
|
1832
|
+
sl: "slv",
|
|
1833
|
+
// Slovenian
|
|
1834
|
+
uk: "ukr",
|
|
1835
|
+
// Ukrainian
|
|
1836
|
+
tr: "tur",
|
|
1837
|
+
// Turkish
|
|
1838
|
+
// Asian languages
|
|
1839
|
+
th: "tha",
|
|
1840
|
+
// Thai
|
|
1841
|
+
vi: "vie",
|
|
1842
|
+
// Vietnamese
|
|
1843
|
+
id: "ind",
|
|
1844
|
+
// Indonesian
|
|
1845
|
+
ms: "msa",
|
|
1846
|
+
// Malay
|
|
1847
|
+
tl: "tgl",
|
|
1848
|
+
// Tagalog/Filipino
|
|
1849
|
+
// Other languages
|
|
1850
|
+
he: "heb",
|
|
1851
|
+
// Hebrew
|
|
1852
|
+
fa: "fas",
|
|
1853
|
+
// Persian/Farsi
|
|
1854
|
+
bn: "ben",
|
|
1855
|
+
// Bengali
|
|
1856
|
+
ta: "tam",
|
|
1857
|
+
// Tamil
|
|
1858
|
+
te: "tel",
|
|
1859
|
+
// Telugu
|
|
1860
|
+
mr: "mar",
|
|
1861
|
+
// Marathi
|
|
1862
|
+
gu: "guj",
|
|
1863
|
+
// Gujarati
|
|
1864
|
+
kn: "kan",
|
|
1865
|
+
// Kannada
|
|
1866
|
+
ml: "mal",
|
|
1867
|
+
// Malayalam
|
|
1868
|
+
pa: "pan",
|
|
1869
|
+
// Punjabi
|
|
1870
|
+
ur: "urd",
|
|
1871
|
+
// Urdu
|
|
1872
|
+
sw: "swa",
|
|
1873
|
+
// Swahili
|
|
1874
|
+
af: "afr",
|
|
1875
|
+
// Afrikaans
|
|
1876
|
+
ca: "cat",
|
|
1877
|
+
// Catalan
|
|
1878
|
+
eu: "eus",
|
|
1879
|
+
// Basque
|
|
1880
|
+
gl: "glg",
|
|
1881
|
+
// Galician
|
|
1882
|
+
is: "isl",
|
|
1883
|
+
// Icelandic
|
|
1884
|
+
et: "est",
|
|
1885
|
+
// Estonian
|
|
1886
|
+
lv: "lav",
|
|
1887
|
+
// Latvian
|
|
1888
|
+
lt: "lit"
|
|
1889
|
+
// Lithuanian
|
|
1890
|
+
};
|
|
1891
|
+
var ISO639_3_TO_1 = Object.fromEntries(
|
|
1892
|
+
Object.entries(ISO639_1_TO_3).map(([iso1, iso3]) => [iso3, iso1])
|
|
1893
|
+
);
|
|
1894
|
+
function toISO639_3(code) {
|
|
1895
|
+
const normalized = code.toLowerCase().trim();
|
|
1896
|
+
if (normalized.length === 3) {
|
|
1897
|
+
return normalized;
|
|
1898
|
+
}
|
|
1899
|
+
return ISO639_1_TO_3[normalized] ?? normalized;
|
|
1900
|
+
}
|
|
1901
|
+
function toISO639_1(code) {
|
|
1902
|
+
const normalized = code.toLowerCase().trim();
|
|
1903
|
+
if (normalized.length === 2) {
|
|
1904
|
+
return normalized;
|
|
1905
|
+
}
|
|
1906
|
+
return ISO639_3_TO_1[normalized] ?? normalized;
|
|
1907
|
+
}
|
|
1908
|
+
function getLanguageCodePair(code) {
|
|
1909
|
+
const normalized = code.toLowerCase().trim();
|
|
1910
|
+
if (normalized.length === 2) {
|
|
1911
|
+
return {
|
|
1912
|
+
iso639_1: normalized,
|
|
1913
|
+
iso639_3: toISO639_3(normalized)
|
|
1914
|
+
};
|
|
1915
|
+
} else if (normalized.length === 3) {
|
|
1916
|
+
return {
|
|
1917
|
+
iso639_1: toISO639_1(normalized),
|
|
1918
|
+
iso639_3: normalized
|
|
1919
|
+
};
|
|
1920
|
+
}
|
|
1921
|
+
return {
|
|
1922
|
+
iso639_1: normalized,
|
|
1923
|
+
iso639_3: normalized
|
|
1924
|
+
};
|
|
1925
|
+
}
|
|
1926
|
+
function getLanguageName(code) {
|
|
1927
|
+
const iso639_1 = toISO639_1(code);
|
|
1928
|
+
try {
|
|
1929
|
+
const displayNames = new Intl.DisplayNames(["en"], { type: "language" });
|
|
1930
|
+
return displayNames.of(iso639_1) ?? code.toUpperCase();
|
|
1931
|
+
} catch {
|
|
1932
|
+
return code.toUpperCase();
|
|
1933
|
+
}
|
|
1934
|
+
}
|
|
1935
|
+
|
|
1936
|
+
// src/workflows/translate-audio.ts
|
|
1780
1937
|
var STATIC_RENDITION_POLL_INTERVAL_MS = 5e3;
|
|
1781
1938
|
var STATIC_RENDITION_MAX_ATTEMPTS = 36;
|
|
1782
|
-
|
|
1939
|
+
async function sleep(ms) {
|
|
1940
|
+
"use step";
|
|
1941
|
+
await new Promise((resolve) => setTimeout(resolve, ms));
|
|
1942
|
+
}
|
|
1783
1943
|
function getReadyAudioStaticRendition(asset) {
|
|
1784
1944
|
const files = asset.static_renditions?.files;
|
|
1785
1945
|
if (!files || files.length === 0) {
|
|
@@ -1790,19 +1950,22 @@ function getReadyAudioStaticRendition(asset) {
|
|
|
1790
1950
|
);
|
|
1791
1951
|
}
|
|
1792
1952
|
var hasReadyAudioStaticRendition = (asset) => Boolean(getReadyAudioStaticRendition(asset));
|
|
1793
|
-
async function requestStaticRenditionCreation(
|
|
1794
|
-
|
|
1953
|
+
async function requestStaticRenditionCreation(assetId) {
|
|
1954
|
+
"use step";
|
|
1955
|
+
const { muxTokenId, muxTokenSecret } = getMuxCredentialsFromEnv();
|
|
1956
|
+
const mux = new Mux3({
|
|
1957
|
+
tokenId: muxTokenId,
|
|
1958
|
+
tokenSecret: muxTokenSecret
|
|
1959
|
+
});
|
|
1795
1960
|
try {
|
|
1796
|
-
await
|
|
1961
|
+
await mux.video.assets.createStaticRendition(assetId, {
|
|
1797
1962
|
resolution: "audio-only"
|
|
1798
1963
|
});
|
|
1799
|
-
console.log("\u{1F4FC} Static rendition request accepted by Mux.");
|
|
1800
1964
|
} catch (error) {
|
|
1801
1965
|
const statusCode = error?.status ?? error?.statusCode;
|
|
1802
1966
|
const messages = error?.error?.messages;
|
|
1803
1967
|
const alreadyDefined = messages?.some((message2) => message2.toLowerCase().includes("already defined")) ?? error?.message?.toLowerCase().includes("already defined");
|
|
1804
1968
|
if (statusCode === 409 || alreadyDefined) {
|
|
1805
|
-
console.log("\u2139\uFE0F Static rendition already requested. Waiting for it to finish...");
|
|
1806
1969
|
return;
|
|
1807
1970
|
}
|
|
1808
1971
|
const message = error instanceof Error ? error.message : "Unknown error";
|
|
@@ -1811,31 +1974,34 @@ async function requestStaticRenditionCreation(muxClient, assetId) {
|
|
|
1811
1974
|
}
|
|
1812
1975
|
async function waitForAudioStaticRendition({
|
|
1813
1976
|
assetId,
|
|
1814
|
-
muxClient,
|
|
1815
1977
|
initialAsset
|
|
1816
1978
|
}) {
|
|
1979
|
+
"use step";
|
|
1980
|
+
const { muxTokenId, muxTokenSecret } = getMuxCredentialsFromEnv();
|
|
1981
|
+
const mux = new Mux3({
|
|
1982
|
+
tokenId: muxTokenId,
|
|
1983
|
+
tokenSecret: muxTokenSecret
|
|
1984
|
+
});
|
|
1817
1985
|
let currentAsset = initialAsset;
|
|
1818
1986
|
if (hasReadyAudioStaticRendition(currentAsset)) {
|
|
1819
1987
|
return currentAsset;
|
|
1820
1988
|
}
|
|
1821
1989
|
const status = currentAsset.static_renditions?.status ?? "not_requested";
|
|
1822
1990
|
if (status === "not_requested" || status === void 0) {
|
|
1823
|
-
await requestStaticRenditionCreation(
|
|
1991
|
+
await requestStaticRenditionCreation(assetId);
|
|
1824
1992
|
} else if (status === "errored") {
|
|
1825
|
-
|
|
1826
|
-
await requestStaticRenditionCreation(muxClient, assetId);
|
|
1993
|
+
await requestStaticRenditionCreation(assetId);
|
|
1827
1994
|
} else {
|
|
1828
|
-
console.
|
|
1995
|
+
console.warn(`\u2139\uFE0F Static rendition already ${status}. Waiting for it to finish...`);
|
|
1829
1996
|
}
|
|
1830
1997
|
for (let attempt = 1; attempt <= STATIC_RENDITION_MAX_ATTEMPTS; attempt++) {
|
|
1831
|
-
await
|
|
1832
|
-
currentAsset = await
|
|
1998
|
+
await sleep(STATIC_RENDITION_POLL_INTERVAL_MS);
|
|
1999
|
+
currentAsset = await mux.video.assets.retrieve(assetId);
|
|
1833
2000
|
if (hasReadyAudioStaticRendition(currentAsset)) {
|
|
1834
|
-
console.log("\u2705 Audio static rendition is ready!");
|
|
1835
2001
|
return currentAsset;
|
|
1836
2002
|
}
|
|
1837
2003
|
const currentStatus = currentAsset.static_renditions?.status || "unknown";
|
|
1838
|
-
console.
|
|
2004
|
+
console.warn(
|
|
1839
2005
|
`\u231B Waiting for static rendition (attempt ${attempt}/${STATIC_RENDITION_MAX_ATTEMPTS}) \u2192 ${currentStatus}`
|
|
1840
2006
|
);
|
|
1841
2007
|
if (currentStatus === "errored") {
|
|
@@ -1848,55 +2014,179 @@ async function waitForAudioStaticRendition({
|
|
|
1848
2014
|
"Timed out waiting for the static rendition to become ready. Please try again in a moment."
|
|
1849
2015
|
);
|
|
1850
2016
|
}
|
|
2017
|
+
async function fetchAudioFromMux(audioUrl) {
|
|
2018
|
+
"use step";
|
|
2019
|
+
const audioResponse = await fetch(audioUrl);
|
|
2020
|
+
if (!audioResponse.ok) {
|
|
2021
|
+
throw new Error(`Failed to fetch audio file: ${audioResponse.statusText}`);
|
|
2022
|
+
}
|
|
2023
|
+
return audioResponse.arrayBuffer();
|
|
2024
|
+
}
|
|
2025
|
+
async function createElevenLabsDubbingJob({
|
|
2026
|
+
audioBuffer,
|
|
2027
|
+
assetId,
|
|
2028
|
+
elevenLabsLangCode,
|
|
2029
|
+
numSpeakers
|
|
2030
|
+
}) {
|
|
2031
|
+
"use step";
|
|
2032
|
+
const elevenLabsApiKey = getApiKeyFromEnv("elevenlabs");
|
|
2033
|
+
const audioBlob = new Blob([audioBuffer], { type: "audio/mp4" });
|
|
2034
|
+
const formData = new FormData();
|
|
2035
|
+
formData.append("file", audioBlob);
|
|
2036
|
+
formData.append("target_lang", elevenLabsLangCode);
|
|
2037
|
+
formData.append("num_speakers", numSpeakers.toString());
|
|
2038
|
+
formData.append("name", `Mux Asset ${assetId} - auto to ${elevenLabsLangCode}`);
|
|
2039
|
+
const dubbingResponse = await fetch("https://api.elevenlabs.io/v1/dubbing", {
|
|
2040
|
+
method: "POST",
|
|
2041
|
+
headers: {
|
|
2042
|
+
"xi-api-key": elevenLabsApiKey
|
|
2043
|
+
},
|
|
2044
|
+
body: formData
|
|
2045
|
+
});
|
|
2046
|
+
if (!dubbingResponse.ok) {
|
|
2047
|
+
throw new Error(`ElevenLabs API error: ${dubbingResponse.statusText}`);
|
|
2048
|
+
}
|
|
2049
|
+
const dubbingData = await dubbingResponse.json();
|
|
2050
|
+
return dubbingData.dubbing_id;
|
|
2051
|
+
}
|
|
2052
|
+
async function checkElevenLabsDubbingStatus({
|
|
2053
|
+
dubbingId
|
|
2054
|
+
}) {
|
|
2055
|
+
"use step";
|
|
2056
|
+
const elevenLabsApiKey = getApiKeyFromEnv("elevenlabs");
|
|
2057
|
+
const statusResponse = await fetch(`https://api.elevenlabs.io/v1/dubbing/${dubbingId}`, {
|
|
2058
|
+
headers: {
|
|
2059
|
+
"xi-api-key": elevenLabsApiKey
|
|
2060
|
+
}
|
|
2061
|
+
});
|
|
2062
|
+
if (!statusResponse.ok) {
|
|
2063
|
+
throw new Error(`Status check failed: ${statusResponse.statusText}`);
|
|
2064
|
+
}
|
|
2065
|
+
const statusData = await statusResponse.json();
|
|
2066
|
+
return {
|
|
2067
|
+
status: statusData.status,
|
|
2068
|
+
targetLanguages: statusData.target_languages ?? []
|
|
2069
|
+
};
|
|
2070
|
+
}
|
|
2071
|
+
async function downloadDubbedAudioFromElevenLabs({
|
|
2072
|
+
dubbingId,
|
|
2073
|
+
languageCode
|
|
2074
|
+
}) {
|
|
2075
|
+
"use step";
|
|
2076
|
+
const elevenLabsApiKey = getApiKeyFromEnv("elevenlabs");
|
|
2077
|
+
const audioUrl = `https://api.elevenlabs.io/v1/dubbing/${dubbingId}/audio/${languageCode}`;
|
|
2078
|
+
const audioResponse = await fetch(audioUrl, {
|
|
2079
|
+
headers: {
|
|
2080
|
+
"xi-api-key": elevenLabsApiKey
|
|
2081
|
+
}
|
|
2082
|
+
});
|
|
2083
|
+
if (!audioResponse.ok) {
|
|
2084
|
+
throw new Error(`Failed to fetch dubbed audio: ${audioResponse.statusText}`);
|
|
2085
|
+
}
|
|
2086
|
+
return audioResponse.arrayBuffer();
|
|
2087
|
+
}
|
|
2088
|
+
async function uploadDubbedAudioToS3({
|
|
2089
|
+
dubbedAudioBuffer,
|
|
2090
|
+
assetId,
|
|
2091
|
+
toLanguageCode,
|
|
2092
|
+
s3Endpoint,
|
|
2093
|
+
s3Region,
|
|
2094
|
+
s3Bucket
|
|
2095
|
+
}) {
|
|
2096
|
+
"use step";
|
|
2097
|
+
const { S3Client, GetObjectCommand } = await import("@aws-sdk/client-s3");
|
|
2098
|
+
const { Upload } = await import("@aws-sdk/lib-storage");
|
|
2099
|
+
const { getSignedUrl } = await import("@aws-sdk/s3-request-presigner");
|
|
2100
|
+
const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
|
|
2101
|
+
const s3SecretAccessKey = env_default.S3_SECRET_ACCESS_KEY;
|
|
2102
|
+
const s3Client = new S3Client({
|
|
2103
|
+
region: s3Region,
|
|
2104
|
+
endpoint: s3Endpoint,
|
|
2105
|
+
credentials: {
|
|
2106
|
+
accessKeyId: s3AccessKeyId,
|
|
2107
|
+
secretAccessKey: s3SecretAccessKey
|
|
2108
|
+
},
|
|
2109
|
+
forcePathStyle: true
|
|
2110
|
+
});
|
|
2111
|
+
const audioKey = `audio-translations/${assetId}/auto-to-${toLanguageCode}-${Date.now()}.m4a`;
|
|
2112
|
+
const upload = new Upload({
|
|
2113
|
+
client: s3Client,
|
|
2114
|
+
params: {
|
|
2115
|
+
Bucket: s3Bucket,
|
|
2116
|
+
Key: audioKey,
|
|
2117
|
+
Body: new Uint8Array(dubbedAudioBuffer),
|
|
2118
|
+
ContentType: "audio/mp4"
|
|
2119
|
+
}
|
|
2120
|
+
});
|
|
2121
|
+
await upload.done();
|
|
2122
|
+
const getObjectCommand = new GetObjectCommand({
|
|
2123
|
+
Bucket: s3Bucket,
|
|
2124
|
+
Key: audioKey
|
|
2125
|
+
});
|
|
2126
|
+
const presignedUrl = await getSignedUrl(s3Client, getObjectCommand, {
|
|
2127
|
+
expiresIn: 3600
|
|
2128
|
+
// 1 hour
|
|
2129
|
+
});
|
|
2130
|
+
console.warn(`\u2705 Audio uploaded successfully to: ${audioKey}`);
|
|
2131
|
+
console.warn(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
|
|
2132
|
+
return presignedUrl;
|
|
2133
|
+
}
|
|
2134
|
+
async function createAudioTrackOnMux(assetId, languageCode, presignedUrl) {
|
|
2135
|
+
"use step";
|
|
2136
|
+
const { muxTokenId, muxTokenSecret } = getMuxCredentialsFromEnv();
|
|
2137
|
+
const mux = new Mux3({
|
|
2138
|
+
tokenId: muxTokenId,
|
|
2139
|
+
tokenSecret: muxTokenSecret
|
|
2140
|
+
});
|
|
2141
|
+
const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(languageCode) || languageCode.toUpperCase();
|
|
2142
|
+
const trackName = `${languageName} (auto-dubbed)`;
|
|
2143
|
+
const trackResponse = await mux.video.assets.createTrack(assetId, {
|
|
2144
|
+
type: "audio",
|
|
2145
|
+
language_code: languageCode,
|
|
2146
|
+
name: trackName,
|
|
2147
|
+
url: presignedUrl
|
|
2148
|
+
});
|
|
2149
|
+
if (!trackResponse.id) {
|
|
2150
|
+
throw new Error("Failed to create audio track: no track ID returned from Mux");
|
|
2151
|
+
}
|
|
2152
|
+
return trackResponse.id;
|
|
2153
|
+
}
|
|
1851
2154
|
async function translateAudio(assetId, toLanguageCode, options = {}) {
|
|
2155
|
+
"use workflow";
|
|
1852
2156
|
const {
|
|
1853
2157
|
provider = "elevenlabs",
|
|
1854
2158
|
numSpeakers = 0,
|
|
1855
2159
|
// 0 = auto-detect
|
|
1856
|
-
muxTokenId,
|
|
1857
|
-
muxTokenSecret,
|
|
1858
2160
|
elevenLabsApiKey,
|
|
1859
2161
|
uploadToMux = true
|
|
1860
2162
|
} = options;
|
|
1861
2163
|
if (provider !== "elevenlabs") {
|
|
1862
2164
|
throw new Error("Only ElevenLabs provider is currently supported for audio translation");
|
|
1863
2165
|
}
|
|
1864
|
-
const muxId = muxTokenId ?? env_default.MUX_TOKEN_ID;
|
|
1865
|
-
const muxSecret = muxTokenSecret ?? env_default.MUX_TOKEN_SECRET;
|
|
1866
2166
|
const elevenLabsKey = elevenLabsApiKey ?? env_default.ELEVENLABS_API_KEY;
|
|
1867
2167
|
const s3Endpoint = options.s3Endpoint ?? env_default.S3_ENDPOINT;
|
|
1868
2168
|
const s3Region = options.s3Region ?? env_default.S3_REGION ?? "auto";
|
|
1869
2169
|
const s3Bucket = options.s3Bucket ?? env_default.S3_BUCKET;
|
|
1870
|
-
const s3AccessKeyId =
|
|
1871
|
-
const s3SecretAccessKey =
|
|
1872
|
-
if (!muxId || !muxSecret) {
|
|
1873
|
-
throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
|
|
1874
|
-
}
|
|
2170
|
+
const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
|
|
2171
|
+
const s3SecretAccessKey = env_default.S3_SECRET_ACCESS_KEY;
|
|
1875
2172
|
if (!elevenLabsKey) {
|
|
1876
2173
|
throw new Error("ElevenLabs API key is required. Provide elevenLabsApiKey in options or set ELEVENLABS_API_KEY environment variable.");
|
|
1877
2174
|
}
|
|
1878
2175
|
if (uploadToMux && (!s3Endpoint || !s3Bucket || !s3AccessKeyId || !s3SecretAccessKey)) {
|
|
1879
2176
|
throw new Error("S3 configuration is required for uploading to Mux. Provide s3Endpoint, s3Bucket, s3AccessKeyId, and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables.");
|
|
1880
2177
|
}
|
|
1881
|
-
const
|
|
1882
|
-
|
|
1883
|
-
tokenSecret: muxSecret
|
|
1884
|
-
});
|
|
1885
|
-
console.log(`\u{1F3AC} Fetching Mux asset: ${assetId}`);
|
|
1886
|
-
const { asset: initialAsset, playbackId, policy } = await getPlaybackIdForAsset(mux, assetId);
|
|
1887
|
-
const signingContext = resolveSigningContext(options);
|
|
2178
|
+
const { asset: initialAsset, playbackId, policy } = await getPlaybackIdForAsset(assetId);
|
|
2179
|
+
const signingContext = getMuxSigningContextFromEnv();
|
|
1888
2180
|
if (policy === "signed" && !signingContext) {
|
|
1889
2181
|
throw new Error(
|
|
1890
2182
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
1891
2183
|
);
|
|
1892
2184
|
}
|
|
1893
|
-
console.log("\u{1F50D} Checking for audio-only static rendition...");
|
|
1894
2185
|
let currentAsset = initialAsset;
|
|
1895
2186
|
if (!hasReadyAudioStaticRendition(currentAsset)) {
|
|
1896
|
-
console.
|
|
2187
|
+
console.warn("\u274C No ready audio static rendition found. Requesting one now...");
|
|
1897
2188
|
currentAsset = await waitForAudioStaticRendition({
|
|
1898
2189
|
assetId,
|
|
1899
|
-
muxClient: mux,
|
|
1900
2190
|
initialAsset: currentAsset
|
|
1901
2191
|
});
|
|
1902
2192
|
}
|
|
@@ -1910,58 +2200,42 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
|
|
|
1910
2200
|
if (policy === "signed" && signingContext) {
|
|
1911
2201
|
audioUrl = await signUrl(audioUrl, playbackId, signingContext, "video");
|
|
1912
2202
|
}
|
|
1913
|
-
console.
|
|
1914
|
-
|
|
2203
|
+
console.warn("\u{1F399}\uFE0F Fetching audio from Mux...");
|
|
2204
|
+
let audioBuffer;
|
|
2205
|
+
try {
|
|
2206
|
+
audioBuffer = await fetchAudioFromMux(audioUrl);
|
|
2207
|
+
} catch (error) {
|
|
2208
|
+
throw new Error(`Failed to fetch audio from Mux: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2209
|
+
}
|
|
2210
|
+
console.warn("\u{1F399}\uFE0F Creating dubbing job in ElevenLabs...");
|
|
2211
|
+
const elevenLabsLangCode = toISO639_3(toLanguageCode);
|
|
2212
|
+
console.warn(`\u{1F50D} Creating dubbing job for asset ${assetId} with language code: ${elevenLabsLangCode}`);
|
|
1915
2213
|
let dubbingId;
|
|
1916
2214
|
try {
|
|
1917
|
-
|
|
1918
|
-
|
|
1919
|
-
|
|
1920
|
-
|
|
1921
|
-
|
|
1922
|
-
const audioBlob = new Blob([audioBuffer], { type: "audio/mp4" });
|
|
1923
|
-
const audioFile = audioBlob;
|
|
1924
|
-
const formData = new FormData();
|
|
1925
|
-
formData.append("file", audioFile);
|
|
1926
|
-
formData.append("target_lang", toLanguageCode);
|
|
1927
|
-
formData.append("num_speakers", numSpeakers.toString());
|
|
1928
|
-
formData.append("name", `Mux Asset ${assetId} - auto to ${toLanguageCode}`);
|
|
1929
|
-
const dubbingResponse = await fetch("https://api.elevenlabs.io/v1/dubbing", {
|
|
1930
|
-
method: "POST",
|
|
1931
|
-
headers: {
|
|
1932
|
-
"xi-api-key": elevenLabsKey
|
|
1933
|
-
},
|
|
1934
|
-
body: formData
|
|
2215
|
+
dubbingId = await createElevenLabsDubbingJob({
|
|
2216
|
+
audioBuffer,
|
|
2217
|
+
assetId,
|
|
2218
|
+
elevenLabsLangCode,
|
|
2219
|
+
numSpeakers
|
|
1935
2220
|
});
|
|
1936
|
-
|
|
1937
|
-
throw new Error(`ElevenLabs API error: ${dubbingResponse.statusText}`);
|
|
1938
|
-
}
|
|
1939
|
-
const dubbingData = await dubbingResponse.json();
|
|
1940
|
-
dubbingId = dubbingData.dubbing_id;
|
|
1941
|
-
console.log(`\u2705 Dubbing job created: ${dubbingId}`);
|
|
1942
|
-
console.log(`\u23F1\uFE0F Expected duration: ${dubbingData.expected_duration_sec}s`);
|
|
2221
|
+
console.warn(`\u2705 Dubbing job created with ID: ${dubbingId}`);
|
|
1943
2222
|
} catch (error) {
|
|
1944
2223
|
throw new Error(`Failed to create ElevenLabs dubbing job: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1945
2224
|
}
|
|
1946
|
-
console.
|
|
2225
|
+
console.warn("\u23F3 Waiting for dubbing to complete...");
|
|
1947
2226
|
let dubbingStatus = "dubbing";
|
|
1948
2227
|
let pollAttempts = 0;
|
|
1949
2228
|
const maxPollAttempts = 180;
|
|
2229
|
+
let targetLanguages = [];
|
|
1950
2230
|
while (dubbingStatus === "dubbing" && pollAttempts < maxPollAttempts) {
|
|
1951
|
-
await
|
|
2231
|
+
await sleep(1e4);
|
|
1952
2232
|
pollAttempts++;
|
|
1953
2233
|
try {
|
|
1954
|
-
const
|
|
1955
|
-
|
|
1956
|
-
"xi-api-key": elevenLabsKey
|
|
1957
|
-
}
|
|
2234
|
+
const statusResult = await checkElevenLabsDubbingStatus({
|
|
2235
|
+
dubbingId
|
|
1958
2236
|
});
|
|
1959
|
-
|
|
1960
|
-
|
|
1961
|
-
}
|
|
1962
|
-
const statusData = await statusResponse.json();
|
|
1963
|
-
dubbingStatus = statusData.status;
|
|
1964
|
-
console.log(`\u{1F4CA} Status check ${pollAttempts}: ${dubbingStatus}`);
|
|
2237
|
+
dubbingStatus = statusResult.status;
|
|
2238
|
+
targetLanguages = statusResult.targetLanguages;
|
|
1965
2239
|
if (dubbingStatus === "failed") {
|
|
1966
2240
|
throw new Error("ElevenLabs dubbing job failed");
|
|
1967
2241
|
}
|
|
@@ -1972,89 +2246,74 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
|
|
|
1972
2246
|
if (dubbingStatus !== "dubbed") {
|
|
1973
2247
|
throw new Error(`Dubbing job timed out or failed. Final status: ${dubbingStatus}`);
|
|
1974
2248
|
}
|
|
1975
|
-
console.
|
|
2249
|
+
console.warn("\u2705 Dubbing completed successfully!");
|
|
1976
2250
|
if (!uploadToMux) {
|
|
2251
|
+
const targetLanguage2 = getLanguageCodePair(toLanguageCode);
|
|
1977
2252
|
return {
|
|
1978
2253
|
assetId,
|
|
1979
|
-
targetLanguageCode:
|
|
2254
|
+
targetLanguageCode: targetLanguage2.iso639_1,
|
|
2255
|
+
targetLanguage: targetLanguage2,
|
|
1980
2256
|
dubbingId
|
|
1981
2257
|
};
|
|
1982
2258
|
}
|
|
1983
|
-
console.
|
|
2259
|
+
console.warn("\u{1F4E5} Downloading dubbed audio from ElevenLabs...");
|
|
1984
2260
|
let dubbedAudioBuffer;
|
|
1985
2261
|
try {
|
|
1986
|
-
const
|
|
1987
|
-
|
|
1988
|
-
|
|
1989
|
-
|
|
1990
|
-
|
|
1991
|
-
|
|
1992
|
-
if (!
|
|
1993
|
-
|
|
2262
|
+
const requestedLangCode = toISO639_3(toLanguageCode);
|
|
2263
|
+
let downloadLangCode = targetLanguages.find(
|
|
2264
|
+
(lang) => lang === requestedLangCode
|
|
2265
|
+
) ?? targetLanguages.find(
|
|
2266
|
+
(lang) => lang.toLowerCase() === requestedLangCode.toLowerCase()
|
|
2267
|
+
);
|
|
2268
|
+
if (!downloadLangCode && targetLanguages.length > 0) {
|
|
2269
|
+
downloadLangCode = targetLanguages[0];
|
|
2270
|
+
console.warn(`\u26A0\uFE0F Requested language "${requestedLangCode}" not found in target_languages. Using "${downloadLangCode}" instead.`);
|
|
2271
|
+
}
|
|
2272
|
+
if (!downloadLangCode) {
|
|
2273
|
+
downloadLangCode = requestedLangCode;
|
|
2274
|
+
console.warn(`\u26A0\uFE0F No target_languages available from ElevenLabs status. Using requested language code: ${requestedLangCode}`);
|
|
1994
2275
|
}
|
|
1995
|
-
dubbedAudioBuffer = await
|
|
1996
|
-
|
|
2276
|
+
dubbedAudioBuffer = await downloadDubbedAudioFromElevenLabs({
|
|
2277
|
+
dubbingId,
|
|
2278
|
+
languageCode: downloadLangCode
|
|
2279
|
+
});
|
|
2280
|
+
console.warn("\u2705 Dubbed audio downloaded successfully!");
|
|
1997
2281
|
} catch (error) {
|
|
1998
2282
|
throw new Error(`Failed to download dubbed audio: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1999
2283
|
}
|
|
2000
|
-
console.
|
|
2001
|
-
const s3Client = new import_client_s3.S3Client({
|
|
2002
|
-
region: s3Region,
|
|
2003
|
-
endpoint: s3Endpoint,
|
|
2004
|
-
credentials: {
|
|
2005
|
-
accessKeyId: s3AccessKeyId,
|
|
2006
|
-
secretAccessKey: s3SecretAccessKey
|
|
2007
|
-
},
|
|
2008
|
-
forcePathStyle: true
|
|
2009
|
-
});
|
|
2010
|
-
const audioKey = `audio-translations/${assetId}/auto-to-${toLanguageCode}-${Date.now()}.m4a`;
|
|
2284
|
+
console.warn("\u{1F4E4} Uploading dubbed audio to S3-compatible storage...");
|
|
2011
2285
|
let presignedUrl;
|
|
2012
2286
|
try {
|
|
2013
|
-
|
|
2014
|
-
|
|
2015
|
-
|
|
2016
|
-
|
|
2017
|
-
|
|
2018
|
-
|
|
2019
|
-
|
|
2020
|
-
}
|
|
2021
|
-
});
|
|
2022
|
-
await upload.done();
|
|
2023
|
-
console.log(`\u2705 Audio uploaded successfully to: ${audioKey}`);
|
|
2024
|
-
const getObjectCommand = new import_client_s3.GetObjectCommand({
|
|
2025
|
-
Bucket: s3Bucket,
|
|
2026
|
-
Key: audioKey
|
|
2027
|
-
});
|
|
2028
|
-
presignedUrl = await (0, import_s3_request_presigner.getSignedUrl)(s3Client, getObjectCommand, {
|
|
2029
|
-
expiresIn: 3600
|
|
2030
|
-
// 1 hour
|
|
2287
|
+
presignedUrl = await uploadDubbedAudioToS3({
|
|
2288
|
+
dubbedAudioBuffer,
|
|
2289
|
+
assetId,
|
|
2290
|
+
toLanguageCode,
|
|
2291
|
+
s3Endpoint,
|
|
2292
|
+
s3Region,
|
|
2293
|
+
s3Bucket
|
|
2031
2294
|
});
|
|
2032
|
-
console.log(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
|
|
2033
2295
|
} catch (error) {
|
|
2034
2296
|
throw new Error(`Failed to upload audio to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2035
2297
|
}
|
|
2036
|
-
console.
|
|
2298
|
+
console.warn("\u{1F4F9} Adding dubbed audio track to Mux asset...");
|
|
2037
2299
|
let uploadedTrackId;
|
|
2300
|
+
const muxLangCode = toISO639_1(toLanguageCode);
|
|
2038
2301
|
try {
|
|
2039
|
-
|
|
2302
|
+
uploadedTrackId = await createAudioTrackOnMux(assetId, muxLangCode, presignedUrl);
|
|
2303
|
+
const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(muxLangCode) || muxLangCode.toUpperCase();
|
|
2040
2304
|
const trackName = `${languageName} (auto-dubbed)`;
|
|
2041
|
-
|
|
2042
|
-
|
|
2043
|
-
language_code: toLanguageCode,
|
|
2044
|
-
name: trackName,
|
|
2045
|
-
url: presignedUrl
|
|
2046
|
-
});
|
|
2047
|
-
uploadedTrackId = trackResponse.id;
|
|
2048
|
-
console.log(`\u2705 Audio track added to Mux asset with ID: ${uploadedTrackId}`);
|
|
2049
|
-
console.log(`\u{1F3B5} Track name: "${trackName}"`);
|
|
2305
|
+
console.warn(`\u2705 Track added to Mux asset with ID: ${uploadedTrackId}`);
|
|
2306
|
+
console.warn(`\u{1F4CB} Track name: "${trackName}"`);
|
|
2050
2307
|
} catch (error) {
|
|
2051
2308
|
console.warn(`\u26A0\uFE0F Failed to add audio track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2052
|
-
console.
|
|
2053
|
-
console.
|
|
2309
|
+
console.warn("\u{1F517} You can manually add the track using this presigned URL:");
|
|
2310
|
+
console.warn(presignedUrl);
|
|
2054
2311
|
}
|
|
2312
|
+
const targetLanguage = getLanguageCodePair(toLanguageCode);
|
|
2055
2313
|
return {
|
|
2056
2314
|
assetId,
|
|
2057
|
-
targetLanguageCode:
|
|
2315
|
+
targetLanguageCode: targetLanguage.iso639_1,
|
|
2316
|
+
targetLanguage,
|
|
2058
2317
|
dubbingId,
|
|
2059
2318
|
uploadedTrackId,
|
|
2060
2319
|
presignedUrl
|
|
@@ -2062,43 +2321,143 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
|
|
|
2062
2321
|
}
|
|
2063
2322
|
|
|
2064
2323
|
// src/workflows/translate-captions.ts
|
|
2065
|
-
|
|
2066
|
-
|
|
2067
|
-
|
|
2068
|
-
var
|
|
2069
|
-
|
|
2070
|
-
var translationSchema = import_zod5.z.object({
|
|
2071
|
-
translation: import_zod5.z.string()
|
|
2324
|
+
import Mux4 from "@mux/mux-node";
|
|
2325
|
+
import { generateObject as generateObject4 } from "ai";
|
|
2326
|
+
import { z as z5 } from "zod";
|
|
2327
|
+
var translationSchema = z5.object({
|
|
2328
|
+
translation: z5.string()
|
|
2072
2329
|
});
|
|
2073
|
-
|
|
2330
|
+
async function fetchVttFromMux(vttUrl) {
|
|
2331
|
+
"use step";
|
|
2332
|
+
const vttResponse = await fetch(vttUrl);
|
|
2333
|
+
if (!vttResponse.ok) {
|
|
2334
|
+
throw new Error(`Failed to fetch VTT file: ${vttResponse.statusText}`);
|
|
2335
|
+
}
|
|
2336
|
+
return vttResponse.text();
|
|
2337
|
+
}
|
|
2338
|
+
async function translateVttWithAI({
|
|
2339
|
+
vttContent,
|
|
2340
|
+
fromLanguageCode,
|
|
2341
|
+
toLanguageCode,
|
|
2342
|
+
provider,
|
|
2343
|
+
modelId,
|
|
2344
|
+
abortSignal
|
|
2345
|
+
}) {
|
|
2346
|
+
"use step";
|
|
2347
|
+
const languageModel = createLanguageModelFromConfig(provider, modelId);
|
|
2348
|
+
const response = await generateObject4({
|
|
2349
|
+
model: languageModel,
|
|
2350
|
+
schema: translationSchema,
|
|
2351
|
+
abortSignal,
|
|
2352
|
+
messages: [
|
|
2353
|
+
{
|
|
2354
|
+
role: "user",
|
|
2355
|
+
content: `Translate the following VTT subtitle file from ${fromLanguageCode} to ${toLanguageCode}. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT.
|
|
2356
|
+
|
|
2357
|
+
${vttContent}`
|
|
2358
|
+
}
|
|
2359
|
+
]
|
|
2360
|
+
});
|
|
2361
|
+
return {
|
|
2362
|
+
translatedVtt: response.object.translation,
|
|
2363
|
+
usage: {
|
|
2364
|
+
inputTokens: response.usage.inputTokens,
|
|
2365
|
+
outputTokens: response.usage.outputTokens,
|
|
2366
|
+
totalTokens: response.usage.totalTokens,
|
|
2367
|
+
reasoningTokens: response.usage.reasoningTokens,
|
|
2368
|
+
cachedInputTokens: response.usage.cachedInputTokens
|
|
2369
|
+
}
|
|
2370
|
+
};
|
|
2371
|
+
}
|
|
2372
|
+
async function uploadVttToS3({
|
|
2373
|
+
translatedVtt,
|
|
2374
|
+
assetId,
|
|
2375
|
+
fromLanguageCode,
|
|
2376
|
+
toLanguageCode,
|
|
2377
|
+
s3Endpoint,
|
|
2378
|
+
s3Region,
|
|
2379
|
+
s3Bucket
|
|
2380
|
+
}) {
|
|
2381
|
+
"use step";
|
|
2382
|
+
const { S3Client, GetObjectCommand } = await import("@aws-sdk/client-s3");
|
|
2383
|
+
const { Upload } = await import("@aws-sdk/lib-storage");
|
|
2384
|
+
const { getSignedUrl } = await import("@aws-sdk/s3-request-presigner");
|
|
2385
|
+
const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
|
|
2386
|
+
const s3SecretAccessKey = env_default.S3_SECRET_ACCESS_KEY;
|
|
2387
|
+
const s3Client = new S3Client({
|
|
2388
|
+
region: s3Region,
|
|
2389
|
+
endpoint: s3Endpoint,
|
|
2390
|
+
credentials: {
|
|
2391
|
+
accessKeyId: s3AccessKeyId,
|
|
2392
|
+
secretAccessKey: s3SecretAccessKey
|
|
2393
|
+
},
|
|
2394
|
+
forcePathStyle: true
|
|
2395
|
+
});
|
|
2396
|
+
const vttKey = `translations/${assetId}/${fromLanguageCode}-to-${toLanguageCode}-${Date.now()}.vtt`;
|
|
2397
|
+
const upload = new Upload({
|
|
2398
|
+
client: s3Client,
|
|
2399
|
+
params: {
|
|
2400
|
+
Bucket: s3Bucket,
|
|
2401
|
+
Key: vttKey,
|
|
2402
|
+
Body: translatedVtt,
|
|
2403
|
+
ContentType: "text/vtt"
|
|
2404
|
+
}
|
|
2405
|
+
});
|
|
2406
|
+
await upload.done();
|
|
2407
|
+
const getObjectCommand = new GetObjectCommand({
|
|
2408
|
+
Bucket: s3Bucket,
|
|
2409
|
+
Key: vttKey
|
|
2410
|
+
});
|
|
2411
|
+
const presignedUrl = await getSignedUrl(s3Client, getObjectCommand, {
|
|
2412
|
+
expiresIn: 3600
|
|
2413
|
+
// 1 hour
|
|
2414
|
+
});
|
|
2415
|
+
return presignedUrl;
|
|
2416
|
+
}
|
|
2417
|
+
async function createTextTrackOnMux(assetId, languageCode, trackName, presignedUrl) {
|
|
2418
|
+
"use step";
|
|
2419
|
+
const { muxTokenId, muxTokenSecret } = getMuxCredentialsFromEnv();
|
|
2420
|
+
const mux = new Mux4({
|
|
2421
|
+
tokenId: muxTokenId,
|
|
2422
|
+
tokenSecret: muxTokenSecret
|
|
2423
|
+
});
|
|
2424
|
+
const trackResponse = await mux.video.assets.createTrack(assetId, {
|
|
2425
|
+
type: "text",
|
|
2426
|
+
text_type: "subtitles",
|
|
2427
|
+
language_code: languageCode,
|
|
2428
|
+
name: trackName,
|
|
2429
|
+
url: presignedUrl
|
|
2430
|
+
});
|
|
2431
|
+
if (!trackResponse.id) {
|
|
2432
|
+
throw new Error("Failed to create text track: no track ID returned from Mux");
|
|
2433
|
+
}
|
|
2434
|
+
return trackResponse.id;
|
|
2435
|
+
}
|
|
2074
2436
|
async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, options) {
|
|
2437
|
+
"use workflow";
|
|
2075
2438
|
const {
|
|
2076
|
-
provider =
|
|
2439
|
+
provider = "openai",
|
|
2077
2440
|
model,
|
|
2078
2441
|
s3Endpoint: providedS3Endpoint,
|
|
2079
2442
|
s3Region: providedS3Region,
|
|
2080
2443
|
s3Bucket: providedS3Bucket,
|
|
2081
|
-
|
|
2082
|
-
s3SecretAccessKey: providedS3SecretAccessKey,
|
|
2083
|
-
uploadToMux: uploadToMuxOption,
|
|
2084
|
-
...clientConfig
|
|
2444
|
+
uploadToMux: uploadToMuxOption
|
|
2085
2445
|
} = options;
|
|
2086
|
-
const resolvedProvider = provider;
|
|
2087
2446
|
const s3Endpoint = providedS3Endpoint ?? env_default.S3_ENDPOINT;
|
|
2088
2447
|
const s3Region = providedS3Region ?? env_default.S3_REGION ?? "auto";
|
|
2089
2448
|
const s3Bucket = providedS3Bucket ?? env_default.S3_BUCKET;
|
|
2090
|
-
const s3AccessKeyId =
|
|
2091
|
-
const s3SecretAccessKey =
|
|
2449
|
+
const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
|
|
2450
|
+
const s3SecretAccessKey = env_default.S3_SECRET_ACCESS_KEY;
|
|
2092
2451
|
const uploadToMux = uploadToMuxOption !== false;
|
|
2093
|
-
const
|
|
2094
|
-
{ ...
|
|
2095
|
-
|
|
2452
|
+
const config = await createWorkflowConfig(
|
|
2453
|
+
{ ...options, model },
|
|
2454
|
+
provider
|
|
2096
2455
|
);
|
|
2097
2456
|
if (uploadToMux && (!s3Endpoint || !s3Bucket || !s3AccessKeyId || !s3SecretAccessKey)) {
|
|
2098
2457
|
throw new Error("S3 configuration is required for uploading to Mux. Provide s3Endpoint, s3Bucket, s3AccessKeyId, and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables.");
|
|
2099
2458
|
}
|
|
2100
|
-
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(
|
|
2101
|
-
const signingContext =
|
|
2459
|
+
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(assetId);
|
|
2460
|
+
const signingContext = getMuxSigningContextFromEnv();
|
|
2102
2461
|
if (policy === "signed" && !signingContext) {
|
|
2103
2462
|
throw new Error(
|
|
2104
2463
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
@@ -2119,120 +2478,81 @@ async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, opti
|
|
|
2119
2478
|
}
|
|
2120
2479
|
let vttContent;
|
|
2121
2480
|
try {
|
|
2122
|
-
|
|
2123
|
-
if (!vttResponse.ok) {
|
|
2124
|
-
throw new Error(`Failed to fetch VTT file: ${vttResponse.statusText}`);
|
|
2125
|
-
}
|
|
2126
|
-
vttContent = await vttResponse.text();
|
|
2481
|
+
vttContent = await fetchVttFromMux(vttUrl);
|
|
2127
2482
|
} catch (error) {
|
|
2128
2483
|
throw new Error(`Failed to fetch VTT content: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2129
2484
|
}
|
|
2130
|
-
console.log(`\u2705 Found VTT content for language '${fromLanguageCode}'`);
|
|
2131
2485
|
let translatedVtt;
|
|
2486
|
+
let usage;
|
|
2132
2487
|
try {
|
|
2133
|
-
const
|
|
2134
|
-
|
|
2135
|
-
|
|
2136
|
-
|
|
2137
|
-
|
|
2138
|
-
|
|
2139
|
-
|
|
2140
|
-
content: `Translate the following VTT subtitle file from ${fromLanguageCode} to ${toLanguageCode}. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT.
|
|
2141
|
-
|
|
2142
|
-
${vttContent}`
|
|
2143
|
-
}
|
|
2144
|
-
]
|
|
2488
|
+
const result = await translateVttWithAI({
|
|
2489
|
+
vttContent,
|
|
2490
|
+
fromLanguageCode,
|
|
2491
|
+
toLanguageCode,
|
|
2492
|
+
provider: config.provider,
|
|
2493
|
+
modelId: config.modelId,
|
|
2494
|
+
abortSignal: options.abortSignal
|
|
2145
2495
|
});
|
|
2146
|
-
translatedVtt =
|
|
2496
|
+
translatedVtt = result.translatedVtt;
|
|
2497
|
+
usage = result.usage;
|
|
2147
2498
|
} catch (error) {
|
|
2148
|
-
throw new Error(`Failed to translate VTT with ${
|
|
2499
|
+
throw new Error(`Failed to translate VTT with ${config.provider}: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2149
2500
|
}
|
|
2150
|
-
|
|
2151
|
-
|
|
2501
|
+
const sourceLanguage = getLanguageCodePair(fromLanguageCode);
|
|
2502
|
+
const targetLanguage = getLanguageCodePair(toLanguageCode);
|
|
2152
2503
|
if (!uploadToMux) {
|
|
2153
|
-
console.log(`\u2705 VTT translated to ${toLanguageCode} successfully!`);
|
|
2154
2504
|
return {
|
|
2155
2505
|
assetId,
|
|
2156
2506
|
sourceLanguageCode: fromLanguageCode,
|
|
2157
2507
|
targetLanguageCode: toLanguageCode,
|
|
2508
|
+
sourceLanguage,
|
|
2509
|
+
targetLanguage,
|
|
2158
2510
|
originalVtt: vttContent,
|
|
2159
|
-
translatedVtt
|
|
2511
|
+
translatedVtt,
|
|
2512
|
+
usage
|
|
2160
2513
|
};
|
|
2161
2514
|
}
|
|
2162
|
-
console.log("\u{1F4E4} Uploading translated VTT to S3-compatible storage...");
|
|
2163
|
-
const s3Client = new import_client_s32.S3Client({
|
|
2164
|
-
region: s3Region,
|
|
2165
|
-
endpoint: s3Endpoint,
|
|
2166
|
-
credentials: {
|
|
2167
|
-
accessKeyId: s3AccessKeyId,
|
|
2168
|
-
secretAccessKey: s3SecretAccessKey
|
|
2169
|
-
},
|
|
2170
|
-
forcePathStyle: true
|
|
2171
|
-
// Often needed for non-AWS S3 services
|
|
2172
|
-
});
|
|
2173
|
-
const vttKey = `translations/${assetId}/${fromLanguageCode}-to-${toLanguageCode}-${Date.now()}.vtt`;
|
|
2174
2515
|
let presignedUrl;
|
|
2175
2516
|
try {
|
|
2176
|
-
|
|
2177
|
-
|
|
2178
|
-
|
|
2179
|
-
|
|
2180
|
-
|
|
2181
|
-
|
|
2182
|
-
|
|
2183
|
-
|
|
2184
|
-
});
|
|
2185
|
-
await upload.done();
|
|
2186
|
-
console.log(`\u2705 VTT uploaded successfully to: ${vttKey}`);
|
|
2187
|
-
const getObjectCommand = new import_client_s32.GetObjectCommand({
|
|
2188
|
-
Bucket: s3Bucket,
|
|
2189
|
-
Key: vttKey
|
|
2190
|
-
});
|
|
2191
|
-
presignedUrl = await (0, import_s3_request_presigner2.getSignedUrl)(s3Client, getObjectCommand, {
|
|
2192
|
-
expiresIn: 3600
|
|
2193
|
-
// 1 hour
|
|
2517
|
+
presignedUrl = await uploadVttToS3({
|
|
2518
|
+
translatedVtt,
|
|
2519
|
+
assetId,
|
|
2520
|
+
fromLanguageCode,
|
|
2521
|
+
toLanguageCode,
|
|
2522
|
+
s3Endpoint,
|
|
2523
|
+
s3Region,
|
|
2524
|
+
s3Bucket
|
|
2194
2525
|
});
|
|
2195
|
-
console.log(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
|
|
2196
2526
|
} catch (error) {
|
|
2197
2527
|
throw new Error(`Failed to upload VTT to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2198
2528
|
}
|
|
2199
|
-
console.log("\u{1F4F9} Adding translated track to Mux asset...");
|
|
2200
2529
|
let uploadedTrackId;
|
|
2201
2530
|
try {
|
|
2202
|
-
const languageName =
|
|
2531
|
+
const languageName = getLanguageName(toLanguageCode);
|
|
2203
2532
|
const trackName = `${languageName} (auto-translated)`;
|
|
2204
|
-
|
|
2205
|
-
type: "text",
|
|
2206
|
-
text_type: "subtitles",
|
|
2207
|
-
language_code: toLanguageCode,
|
|
2208
|
-
name: trackName,
|
|
2209
|
-
url: presignedUrl
|
|
2210
|
-
});
|
|
2211
|
-
uploadedTrackId = trackResponse.id;
|
|
2212
|
-
console.log(`\u2705 Track added to Mux asset with ID: ${uploadedTrackId}`);
|
|
2213
|
-
console.log(`\u{1F4CB} Track name: "${trackName}"`);
|
|
2533
|
+
uploadedTrackId = await createTextTrackOnMux(assetId, toLanguageCode, trackName, presignedUrl);
|
|
2214
2534
|
} catch (error) {
|
|
2215
|
-
console.warn(
|
|
2216
|
-
console.log("\u{1F517} You can manually add the track using this presigned URL:");
|
|
2217
|
-
console.log(presignedUrl);
|
|
2535
|
+
console.warn(`Failed to add track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2218
2536
|
}
|
|
2219
2537
|
return {
|
|
2220
2538
|
assetId,
|
|
2221
2539
|
sourceLanguageCode: fromLanguageCode,
|
|
2222
2540
|
targetLanguageCode: toLanguageCode,
|
|
2541
|
+
sourceLanguage,
|
|
2542
|
+
targetLanguage,
|
|
2223
2543
|
originalVtt: vttContent,
|
|
2224
2544
|
translatedVtt,
|
|
2225
2545
|
uploadedTrackId,
|
|
2226
|
-
presignedUrl
|
|
2546
|
+
presignedUrl,
|
|
2547
|
+
usage
|
|
2227
2548
|
};
|
|
2228
2549
|
}
|
|
2229
2550
|
|
|
2230
2551
|
// src/index.ts
|
|
2231
2552
|
var version = "0.1.0";
|
|
2232
|
-
|
|
2233
|
-
|
|
2234
|
-
primitives,
|
|
2553
|
+
export {
|
|
2554
|
+
primitives_exports as primitives,
|
|
2235
2555
|
version,
|
|
2236
|
-
workflows
|
|
2237
|
-
}
|
|
2556
|
+
workflows_exports as workflows
|
|
2557
|
+
};
|
|
2238
2558
|
//# sourceMappingURL=index.js.map
|