@mux/ai 0.1.6 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +294 -79
- package/dist/{index-Bnv7tv90.d.ts → index-CMZYZcj6.d.ts} +122 -3
- package/dist/index.d.ts +1 -1
- package/dist/index.js +955 -624
- package/dist/index.js.map +1 -1
- package/dist/primitives/index.js +18 -71
- package/dist/primitives/index.js.map +1 -1
- package/dist/workflows/index.d.ts +1 -1
- package/dist/workflows/index.js +953 -638
- package/dist/workflows/index.js.map +1 -1
- package/package.json +21 -23
- package/dist/index-BNnz9P_5.d.mts +0 -144
- package/dist/index-vJ5r2FNm.d.mts +0 -477
- package/dist/index.d.mts +0 -13
- package/dist/index.mjs +0 -2205
- package/dist/index.mjs.map +0 -1
- package/dist/primitives/index.d.mts +0 -3
- package/dist/primitives/index.mjs +0 -358
- package/dist/primitives/index.mjs.map +0 -1
- package/dist/types-ktXDZ93V.d.mts +0 -137
- package/dist/workflows/index.d.mts +0 -8
- package/dist/workflows/index.mjs +0 -2168
- package/dist/workflows/index.mjs.map +0 -1
package/dist/workflows/index.js
CHANGED
|
@@ -1,194 +1,25 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __create = Object.create;
|
|
3
|
-
var __defProp = Object.defineProperty;
|
|
4
|
-
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
-
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
-
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
-
var __export = (target, all) => {
|
|
9
|
-
for (var name in all)
|
|
10
|
-
__defProp(target, name, { get: all[name], enumerable: true });
|
|
11
|
-
};
|
|
12
|
-
var __copyProps = (to, from, except, desc) => {
|
|
13
|
-
if (from && typeof from === "object" || typeof from === "function") {
|
|
14
|
-
for (let key of __getOwnPropNames(from))
|
|
15
|
-
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
16
|
-
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
17
|
-
}
|
|
18
|
-
return to;
|
|
19
|
-
};
|
|
20
|
-
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
-
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
-
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
-
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
-
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
-
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
-
mod
|
|
27
|
-
));
|
|
28
|
-
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
|
-
|
|
30
|
-
// src/workflows/index.ts
|
|
31
|
-
var workflows_exports = {};
|
|
32
|
-
__export(workflows_exports, {
|
|
33
|
-
SUMMARY_KEYWORD_LIMIT: () => SUMMARY_KEYWORD_LIMIT,
|
|
34
|
-
burnedInCaptionsSchema: () => burnedInCaptionsSchema,
|
|
35
|
-
chapterSchema: () => chapterSchema,
|
|
36
|
-
chaptersSchema: () => chaptersSchema,
|
|
37
|
-
generateChapters: () => generateChapters,
|
|
38
|
-
generateVideoEmbeddings: () => generateVideoEmbeddings,
|
|
39
|
-
getModerationScores: () => getModerationScores,
|
|
40
|
-
getSummaryAndTags: () => getSummaryAndTags,
|
|
41
|
-
hasBurnedInCaptions: () => hasBurnedInCaptions,
|
|
42
|
-
summarySchema: () => summarySchema,
|
|
43
|
-
translateAudio: () => translateAudio,
|
|
44
|
-
translateCaptions: () => translateCaptions,
|
|
45
|
-
translationSchema: () => translationSchema
|
|
46
|
-
});
|
|
47
|
-
module.exports = __toCommonJS(workflows_exports);
|
|
48
|
-
|
|
49
|
-
// src/workflows/burned-in-captions.ts
|
|
50
|
-
var import_ai = require("ai");
|
|
51
|
-
|
|
52
|
-
// node_modules/dedent/dist/dedent.mjs
|
|
53
|
-
function ownKeys(object, enumerableOnly) {
|
|
54
|
-
var keys = Object.keys(object);
|
|
55
|
-
if (Object.getOwnPropertySymbols) {
|
|
56
|
-
var symbols = Object.getOwnPropertySymbols(object);
|
|
57
|
-
enumerableOnly && (symbols = symbols.filter(function(sym) {
|
|
58
|
-
return Object.getOwnPropertyDescriptor(object, sym).enumerable;
|
|
59
|
-
})), keys.push.apply(keys, symbols);
|
|
60
|
-
}
|
|
61
|
-
return keys;
|
|
62
|
-
}
|
|
63
|
-
function _objectSpread(target) {
|
|
64
|
-
for (var i = 1; i < arguments.length; i++) {
|
|
65
|
-
var source = null != arguments[i] ? arguments[i] : {};
|
|
66
|
-
i % 2 ? ownKeys(Object(source), true).forEach(function(key) {
|
|
67
|
-
_defineProperty(target, key, source[key]);
|
|
68
|
-
}) : Object.getOwnPropertyDescriptors ? Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)) : ownKeys(Object(source)).forEach(function(key) {
|
|
69
|
-
Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key));
|
|
70
|
-
});
|
|
71
|
-
}
|
|
72
|
-
return target;
|
|
73
|
-
}
|
|
74
|
-
function _defineProperty(obj, key, value) {
|
|
75
|
-
key = _toPropertyKey(key);
|
|
76
|
-
if (key in obj) {
|
|
77
|
-
Object.defineProperty(obj, key, { value, enumerable: true, configurable: true, writable: true });
|
|
78
|
-
} else {
|
|
79
|
-
obj[key] = value;
|
|
80
|
-
}
|
|
81
|
-
return obj;
|
|
82
|
-
}
|
|
83
|
-
function _toPropertyKey(arg) {
|
|
84
|
-
var key = _toPrimitive(arg, "string");
|
|
85
|
-
return typeof key === "symbol" ? key : String(key);
|
|
86
|
-
}
|
|
87
|
-
function _toPrimitive(input, hint) {
|
|
88
|
-
if (typeof input !== "object" || input === null) return input;
|
|
89
|
-
var prim = input[Symbol.toPrimitive];
|
|
90
|
-
if (prim !== void 0) {
|
|
91
|
-
var res = prim.call(input, hint || "default");
|
|
92
|
-
if (typeof res !== "object") return res;
|
|
93
|
-
throw new TypeError("@@toPrimitive must return a primitive value.");
|
|
94
|
-
}
|
|
95
|
-
return (hint === "string" ? String : Number)(input);
|
|
96
|
-
}
|
|
97
|
-
var dedent = createDedent({});
|
|
98
|
-
var dedent_default = dedent;
|
|
99
|
-
function createDedent(options) {
|
|
100
|
-
dedent2.withOptions = (newOptions) => createDedent(_objectSpread(_objectSpread({}, options), newOptions));
|
|
101
|
-
return dedent2;
|
|
102
|
-
function dedent2(strings, ...values) {
|
|
103
|
-
const raw = typeof strings === "string" ? [strings] : strings.raw;
|
|
104
|
-
const {
|
|
105
|
-
alignValues = false,
|
|
106
|
-
escapeSpecialCharacters = Array.isArray(strings),
|
|
107
|
-
trimWhitespace = true
|
|
108
|
-
} = options;
|
|
109
|
-
let result = "";
|
|
110
|
-
for (let i = 0; i < raw.length; i++) {
|
|
111
|
-
let next = raw[i];
|
|
112
|
-
if (escapeSpecialCharacters) {
|
|
113
|
-
next = next.replace(/\\\n[ \t]*/g, "").replace(/\\`/g, "`").replace(/\\\$/g, "$").replace(/\\\{/g, "{");
|
|
114
|
-
}
|
|
115
|
-
result += next;
|
|
116
|
-
if (i < values.length) {
|
|
117
|
-
const value = alignValues ? alignValue(values[i], result) : values[i];
|
|
118
|
-
result += value;
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
const lines = result.split("\n");
|
|
122
|
-
let mindent = null;
|
|
123
|
-
for (const l of lines) {
|
|
124
|
-
const m = l.match(/^(\s+)\S+/);
|
|
125
|
-
if (m) {
|
|
126
|
-
const indent = m[1].length;
|
|
127
|
-
if (!mindent) {
|
|
128
|
-
mindent = indent;
|
|
129
|
-
} else {
|
|
130
|
-
mindent = Math.min(mindent, indent);
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
if (mindent !== null) {
|
|
135
|
-
const m = mindent;
|
|
136
|
-
result = lines.map((l) => l[0] === " " || l[0] === " " ? l.slice(m) : l).join("\n");
|
|
137
|
-
}
|
|
138
|
-
if (trimWhitespace) {
|
|
139
|
-
result = result.trim();
|
|
140
|
-
}
|
|
141
|
-
if (escapeSpecialCharacters) {
|
|
142
|
-
result = result.replace(/\\n/g, "\n");
|
|
143
|
-
}
|
|
144
|
-
return result;
|
|
145
|
-
}
|
|
146
|
-
}
|
|
147
|
-
function alignValue(value, precedingText) {
|
|
148
|
-
if (typeof value !== "string" || !value.includes("\n")) {
|
|
149
|
-
return value;
|
|
150
|
-
}
|
|
151
|
-
const currentLine = precedingText.slice(precedingText.lastIndexOf("\n") + 1);
|
|
152
|
-
const indentMatch = currentLine.match(/^(\s+)/);
|
|
153
|
-
if (indentMatch) {
|
|
154
|
-
const indent = indentMatch[1];
|
|
155
|
-
return value.replace(/\n/g, `
|
|
156
|
-
${indent}`);
|
|
157
|
-
}
|
|
158
|
-
return value;
|
|
159
|
-
}
|
|
160
|
-
|
|
161
1
|
// src/workflows/burned-in-captions.ts
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
var import_mux_node = __toESM(require("@mux/mux-node"));
|
|
2
|
+
import { generateObject } from "ai";
|
|
3
|
+
import dedent from "dedent";
|
|
4
|
+
import { z as z2 } from "zod";
|
|
166
5
|
|
|
167
6
|
// src/env.ts
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
var import_dotenv_expand = require("dotenv-expand");
|
|
171
|
-
var import_zod = require("zod");
|
|
172
|
-
(0, import_dotenv_expand.expand)((0, import_dotenv.config)({
|
|
173
|
-
path: import_node_path.default.resolve(
|
|
174
|
-
process.cwd(),
|
|
175
|
-
process.env.NODE_ENV === "test" ? ".env.test" : ".env"
|
|
176
|
-
)
|
|
177
|
-
}));
|
|
7
|
+
import { z } from "zod";
|
|
8
|
+
import "dotenv/config";
|
|
178
9
|
function optionalString(description, message) {
|
|
179
|
-
return
|
|
10
|
+
return z.preprocess(
|
|
180
11
|
(value) => typeof value === "string" && value.trim().length === 0 ? void 0 : value,
|
|
181
|
-
|
|
12
|
+
z.string().trim().min(1, message).optional()
|
|
182
13
|
).describe(description);
|
|
183
14
|
}
|
|
184
15
|
function requiredString(description, message) {
|
|
185
|
-
return
|
|
16
|
+
return z.preprocess(
|
|
186
17
|
(value) => typeof value === "string" ? value.trim().length > 0 ? value.trim() : void 0 : value,
|
|
187
|
-
|
|
18
|
+
z.string().trim().min(1, message)
|
|
188
19
|
).describe(description);
|
|
189
20
|
}
|
|
190
|
-
var EnvSchema =
|
|
191
|
-
NODE_ENV:
|
|
21
|
+
var EnvSchema = z.object({
|
|
22
|
+
NODE_ENV: z.string().default("development").describe("Runtime environment."),
|
|
192
23
|
MUX_TOKEN_ID: requiredString("Mux access token ID.", "Required to access Mux APIs"),
|
|
193
24
|
MUX_TOKEN_SECRET: requiredString("Mux access token secret.", "Required to access Mux APIs"),
|
|
194
25
|
MUX_SIGNING_KEY: optionalString("Mux signing key ID for signed playback URLs.", "Used to sign playback URLs"),
|
|
@@ -217,12 +48,12 @@ var env = parseEnv();
|
|
|
217
48
|
var env_default = env;
|
|
218
49
|
|
|
219
50
|
// src/lib/providers.ts
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
51
|
+
import { createAnthropic } from "@ai-sdk/anthropic";
|
|
52
|
+
import { createGoogleGenerativeAI } from "@ai-sdk/google";
|
|
53
|
+
import { createOpenAI } from "@ai-sdk/openai";
|
|
223
54
|
var DEFAULT_LANGUAGE_MODELS = {
|
|
224
|
-
openai: "gpt-5
|
|
225
|
-
anthropic: "claude-
|
|
55
|
+
openai: "gpt-5.1",
|
|
56
|
+
anthropic: "claude-sonnet-4-5",
|
|
226
57
|
google: "gemini-2.5-flash"
|
|
227
58
|
};
|
|
228
59
|
var DEFAULT_EMBEDDING_MODELS = {
|
|
@@ -235,6 +66,52 @@ function requireEnv(value, name) {
|
|
|
235
66
|
}
|
|
236
67
|
return value;
|
|
237
68
|
}
|
|
69
|
+
function createLanguageModelFromConfig(provider, modelId, credentials) {
|
|
70
|
+
switch (provider) {
|
|
71
|
+
case "openai": {
|
|
72
|
+
const apiKey = credentials.openaiApiKey;
|
|
73
|
+
requireEnv(apiKey, "OPENAI_API_KEY");
|
|
74
|
+
const openai = createOpenAI({ apiKey });
|
|
75
|
+
return openai(modelId);
|
|
76
|
+
}
|
|
77
|
+
case "anthropic": {
|
|
78
|
+
const apiKey = credentials.anthropicApiKey;
|
|
79
|
+
requireEnv(apiKey, "ANTHROPIC_API_KEY");
|
|
80
|
+
const anthropic = createAnthropic({ apiKey });
|
|
81
|
+
return anthropic(modelId);
|
|
82
|
+
}
|
|
83
|
+
case "google": {
|
|
84
|
+
const apiKey = credentials.googleApiKey;
|
|
85
|
+
requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
|
|
86
|
+
const google = createGoogleGenerativeAI({ apiKey });
|
|
87
|
+
return google(modelId);
|
|
88
|
+
}
|
|
89
|
+
default: {
|
|
90
|
+
const exhaustiveCheck = provider;
|
|
91
|
+
throw new Error(`Unsupported provider: ${exhaustiveCheck}`);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
function createEmbeddingModelFromConfig(provider, modelId, credentials) {
|
|
96
|
+
switch (provider) {
|
|
97
|
+
case "openai": {
|
|
98
|
+
const apiKey = credentials.openaiApiKey;
|
|
99
|
+
requireEnv(apiKey, "OPENAI_API_KEY");
|
|
100
|
+
const openai = createOpenAI({ apiKey });
|
|
101
|
+
return openai.embedding(modelId);
|
|
102
|
+
}
|
|
103
|
+
case "google": {
|
|
104
|
+
const apiKey = credentials.googleApiKey;
|
|
105
|
+
requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
|
|
106
|
+
const google = createGoogleGenerativeAI({ apiKey });
|
|
107
|
+
return google.textEmbeddingModel(modelId);
|
|
108
|
+
}
|
|
109
|
+
default: {
|
|
110
|
+
const exhaustiveCheck = provider;
|
|
111
|
+
throw new Error(`Unsupported embedding provider: ${exhaustiveCheck}`);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
238
115
|
function resolveLanguageModel(options = {}) {
|
|
239
116
|
const provider = options.provider || "openai";
|
|
240
117
|
const modelId = options.model || DEFAULT_LANGUAGE_MODELS[provider];
|
|
@@ -242,7 +119,7 @@ function resolveLanguageModel(options = {}) {
|
|
|
242
119
|
case "openai": {
|
|
243
120
|
const apiKey = options.openaiApiKey ?? env_default.OPENAI_API_KEY;
|
|
244
121
|
requireEnv(apiKey, "OPENAI_API_KEY");
|
|
245
|
-
const openai =
|
|
122
|
+
const openai = createOpenAI({
|
|
246
123
|
apiKey
|
|
247
124
|
});
|
|
248
125
|
return {
|
|
@@ -254,7 +131,7 @@ function resolveLanguageModel(options = {}) {
|
|
|
254
131
|
case "anthropic": {
|
|
255
132
|
const apiKey = options.anthropicApiKey ?? env_default.ANTHROPIC_API_KEY;
|
|
256
133
|
requireEnv(apiKey, "ANTHROPIC_API_KEY");
|
|
257
|
-
const anthropic =
|
|
134
|
+
const anthropic = createAnthropic({
|
|
258
135
|
apiKey
|
|
259
136
|
});
|
|
260
137
|
return {
|
|
@@ -266,7 +143,7 @@ function resolveLanguageModel(options = {}) {
|
|
|
266
143
|
case "google": {
|
|
267
144
|
const apiKey = options.googleApiKey ?? env_default.GOOGLE_GENERATIVE_AI_API_KEY;
|
|
268
145
|
requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
|
|
269
|
-
const google =
|
|
146
|
+
const google = createGoogleGenerativeAI({
|
|
270
147
|
apiKey
|
|
271
148
|
});
|
|
272
149
|
return {
|
|
@@ -288,7 +165,7 @@ function resolveEmbeddingModel(options = {}) {
|
|
|
288
165
|
case "openai": {
|
|
289
166
|
const apiKey = options.openaiApiKey ?? env_default.OPENAI_API_KEY;
|
|
290
167
|
requireEnv(apiKey, "OPENAI_API_KEY");
|
|
291
|
-
const openai =
|
|
168
|
+
const openai = createOpenAI({
|
|
292
169
|
apiKey
|
|
293
170
|
});
|
|
294
171
|
return {
|
|
@@ -300,7 +177,7 @@ function resolveEmbeddingModel(options = {}) {
|
|
|
300
177
|
case "google": {
|
|
301
178
|
const apiKey = options.googleApiKey ?? env_default.GOOGLE_GENERATIVE_AI_API_KEY;
|
|
302
179
|
requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
|
|
303
|
-
const google =
|
|
180
|
+
const google = createGoogleGenerativeAI({
|
|
304
181
|
apiKey
|
|
305
182
|
});
|
|
306
183
|
return {
|
|
@@ -317,7 +194,8 @@ function resolveEmbeddingModel(options = {}) {
|
|
|
317
194
|
}
|
|
318
195
|
|
|
319
196
|
// src/lib/client-factory.ts
|
|
320
|
-
function validateCredentials(options, requiredProvider) {
|
|
197
|
+
async function validateCredentials(options, requiredProvider) {
|
|
198
|
+
"use step";
|
|
321
199
|
const muxTokenId = options.muxTokenId ?? env_default.MUX_TOKEN_ID;
|
|
322
200
|
const muxTokenSecret = options.muxTokenSecret ?? env_default.MUX_TOKEN_SECRET;
|
|
323
201
|
const openaiApiKey = options.openaiApiKey ?? env_default.OPENAI_API_KEY;
|
|
@@ -351,32 +229,24 @@ function validateCredentials(options, requiredProvider) {
|
|
|
351
229
|
googleApiKey
|
|
352
230
|
};
|
|
353
231
|
}
|
|
354
|
-
function
|
|
355
|
-
|
|
356
|
-
throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
|
|
357
|
-
}
|
|
358
|
-
return new import_mux_node.default({
|
|
359
|
-
tokenId: credentials.muxTokenId,
|
|
360
|
-
tokenSecret: credentials.muxTokenSecret
|
|
361
|
-
});
|
|
362
|
-
}
|
|
363
|
-
function createWorkflowClients(options, provider) {
|
|
232
|
+
async function createWorkflowConfig(options, provider) {
|
|
233
|
+
"use step";
|
|
364
234
|
const providerToUse = provider || options.provider || "openai";
|
|
365
|
-
const credentials = validateCredentials(options, providerToUse);
|
|
366
|
-
const
|
|
235
|
+
const credentials = await validateCredentials(options, providerToUse);
|
|
236
|
+
const resolved = resolveLanguageModel({
|
|
367
237
|
...options,
|
|
368
238
|
provider: providerToUse
|
|
369
239
|
});
|
|
370
240
|
return {
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
241
|
+
credentials,
|
|
242
|
+
provider: resolved.provider,
|
|
243
|
+
modelId: resolved.modelId
|
|
374
244
|
};
|
|
375
245
|
}
|
|
376
246
|
|
|
377
247
|
// src/lib/image-download.ts
|
|
378
|
-
|
|
379
|
-
|
|
248
|
+
import { Buffer } from "buffer";
|
|
249
|
+
import pRetry, { AbortError } from "p-retry";
|
|
380
250
|
var DEFAULT_OPTIONS = {
|
|
381
251
|
timeout: 1e4,
|
|
382
252
|
retries: 3,
|
|
@@ -385,9 +255,10 @@ var DEFAULT_OPTIONS = {
|
|
|
385
255
|
exponentialBackoff: true
|
|
386
256
|
};
|
|
387
257
|
async function downloadImageAsBase64(url, options = {}) {
|
|
258
|
+
"use step";
|
|
388
259
|
const opts = { ...DEFAULT_OPTIONS, ...options };
|
|
389
260
|
let attemptCount = 0;
|
|
390
|
-
return (
|
|
261
|
+
return pRetry(
|
|
391
262
|
async () => {
|
|
392
263
|
attemptCount++;
|
|
393
264
|
const controller = new AbortController();
|
|
@@ -402,18 +273,18 @@ async function downloadImageAsBase64(url, options = {}) {
|
|
|
402
273
|
clearTimeout(timeoutId);
|
|
403
274
|
if (!response.ok) {
|
|
404
275
|
if (response.status >= 400 && response.status < 500 && response.status !== 429) {
|
|
405
|
-
throw new
|
|
276
|
+
throw new AbortError(`HTTP ${response.status}: ${response.statusText}`);
|
|
406
277
|
}
|
|
407
278
|
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
408
279
|
}
|
|
409
280
|
const contentType = response.headers.get("content-type");
|
|
410
281
|
if (!contentType?.startsWith("image/")) {
|
|
411
|
-
throw new
|
|
282
|
+
throw new AbortError(`Invalid content type: ${contentType}. Expected image/*`);
|
|
412
283
|
}
|
|
413
284
|
const arrayBuffer = await response.arrayBuffer();
|
|
414
|
-
const buffer =
|
|
285
|
+
const buffer = Buffer.from(arrayBuffer);
|
|
415
286
|
if (buffer.length === 0) {
|
|
416
|
-
throw new
|
|
287
|
+
throw new AbortError("Downloaded image is empty");
|
|
417
288
|
}
|
|
418
289
|
const base64Data = `data:${contentType};base64,${buffer.toString("base64")}`;
|
|
419
290
|
return {
|
|
@@ -426,7 +297,7 @@ async function downloadImageAsBase64(url, options = {}) {
|
|
|
426
297
|
};
|
|
427
298
|
} catch (error) {
|
|
428
299
|
clearTimeout(timeoutId);
|
|
429
|
-
if (error instanceof
|
|
300
|
+
if (error instanceof AbortError) {
|
|
430
301
|
throw error;
|
|
431
302
|
}
|
|
432
303
|
if (error instanceof Error) {
|
|
@@ -455,6 +326,7 @@ async function downloadImageAsBase64(url, options = {}) {
|
|
|
455
326
|
);
|
|
456
327
|
}
|
|
457
328
|
async function downloadImagesAsBase64(urls, options = {}, maxConcurrent = 5) {
|
|
329
|
+
"use step";
|
|
458
330
|
const results = [];
|
|
459
331
|
for (let i = 0; i < urls.length; i += maxConcurrent) {
|
|
460
332
|
const batch = urls.slice(i, i + maxConcurrent);
|
|
@@ -466,6 +338,7 @@ async function downloadImagesAsBase64(urls, options = {}, maxConcurrent = 5) {
|
|
|
466
338
|
}
|
|
467
339
|
|
|
468
340
|
// src/lib/mux-assets.ts
|
|
341
|
+
import Mux from "@mux/mux-node";
|
|
469
342
|
function getPlaybackId(asset) {
|
|
470
343
|
const playbackIds = asset.playback_ids || [];
|
|
471
344
|
const publicPlaybackId = playbackIds.find((pid) => pid.policy === "public");
|
|
@@ -480,7 +353,12 @@ function getPlaybackId(asset) {
|
|
|
480
353
|
"No public or signed playback ID found for this asset. A public or signed playback ID is required. DRM playback IDs are not currently supported."
|
|
481
354
|
);
|
|
482
355
|
}
|
|
483
|
-
async function getPlaybackIdForAsset(
|
|
356
|
+
async function getPlaybackIdForAsset(credentials, assetId) {
|
|
357
|
+
"use step";
|
|
358
|
+
const mux = new Mux({
|
|
359
|
+
tokenId: credentials.muxTokenId,
|
|
360
|
+
tokenSecret: credentials.muxTokenSecret
|
|
361
|
+
});
|
|
484
362
|
const asset = await mux.video.assets.retrieve(assetId);
|
|
485
363
|
const { id: playbackId, policy } = getPlaybackId(asset);
|
|
486
364
|
return { asset, playbackId, policy };
|
|
@@ -519,8 +397,8 @@ function resolveSection(defaultSection, override) {
|
|
|
519
397
|
}
|
|
520
398
|
return override;
|
|
521
399
|
}
|
|
522
|
-
function createPromptBuilder(
|
|
523
|
-
const { template, sectionOrder } =
|
|
400
|
+
function createPromptBuilder(config) {
|
|
401
|
+
const { template, sectionOrder } = config;
|
|
524
402
|
const getSection = (section, override) => {
|
|
525
403
|
const resolved = resolveSection(template[section], override);
|
|
526
404
|
return renderSection(resolved);
|
|
@@ -561,17 +439,18 @@ function createToneSection(instruction) {
|
|
|
561
439
|
}
|
|
562
440
|
|
|
563
441
|
// src/lib/url-signing.ts
|
|
564
|
-
|
|
565
|
-
function resolveSigningContext(
|
|
566
|
-
|
|
567
|
-
const
|
|
442
|
+
import Mux2 from "@mux/mux-node";
|
|
443
|
+
async function resolveSigningContext(config) {
|
|
444
|
+
"use step";
|
|
445
|
+
const keyId = config.muxSigningKey ?? env_default.MUX_SIGNING_KEY;
|
|
446
|
+
const keySecret = config.muxPrivateKey ?? env_default.MUX_PRIVATE_KEY;
|
|
568
447
|
if (!keyId || !keySecret) {
|
|
569
448
|
return void 0;
|
|
570
449
|
}
|
|
571
450
|
return { keyId, keySecret };
|
|
572
451
|
}
|
|
573
452
|
function createSigningClient(context) {
|
|
574
|
-
return new
|
|
453
|
+
return new Mux2({
|
|
575
454
|
// These are not needed for signing, but the SDK requires them
|
|
576
455
|
// Using empty strings as we only need the jwt functionality
|
|
577
456
|
tokenId: env_default.MUX_TOKEN_ID || "",
|
|
@@ -581,6 +460,7 @@ function createSigningClient(context) {
|
|
|
581
460
|
});
|
|
582
461
|
}
|
|
583
462
|
async function signPlaybackId(playbackId, context, type = "video", params) {
|
|
463
|
+
"use step";
|
|
584
464
|
const client = createSigningClient(context);
|
|
585
465
|
const stringParams = params ? Object.fromEntries(
|
|
586
466
|
Object.entries(params).map(([key, value]) => [key, String(value)])
|
|
@@ -592,6 +472,7 @@ async function signPlaybackId(playbackId, context, type = "video", params) {
|
|
|
592
472
|
});
|
|
593
473
|
}
|
|
594
474
|
async function signUrl(url, playbackId, context, type = "video", params) {
|
|
475
|
+
"use step";
|
|
595
476
|
const token = await signPlaybackId(playbackId, context, type, params);
|
|
596
477
|
const separator = url.includes("?") ? "&" : "?";
|
|
597
478
|
return `${url}${separator}token=${token}`;
|
|
@@ -600,6 +481,7 @@ async function signUrl(url, playbackId, context, type = "video", params) {
|
|
|
600
481
|
// src/primitives/storyboards.ts
|
|
601
482
|
var DEFAULT_STORYBOARD_WIDTH = 640;
|
|
602
483
|
async function getStoryboardUrl(playbackId, width = DEFAULT_STORYBOARD_WIDTH, signingContext) {
|
|
484
|
+
"use step";
|
|
603
485
|
const baseUrl = `https://image.mux.com/${playbackId}/storyboard.png`;
|
|
604
486
|
if (signingContext) {
|
|
605
487
|
return signUrl(baseUrl, playbackId, signingContext, "storyboard", { width });
|
|
@@ -608,12 +490,12 @@ async function getStoryboardUrl(playbackId, width = DEFAULT_STORYBOARD_WIDTH, si
|
|
|
608
490
|
}
|
|
609
491
|
|
|
610
492
|
// src/workflows/burned-in-captions.ts
|
|
611
|
-
var burnedInCaptionsSchema =
|
|
612
|
-
hasBurnedInCaptions:
|
|
613
|
-
confidence:
|
|
614
|
-
detectedLanguage:
|
|
493
|
+
var burnedInCaptionsSchema = z2.object({
|
|
494
|
+
hasBurnedInCaptions: z2.boolean(),
|
|
495
|
+
confidence: z2.number().min(0).max(1),
|
|
496
|
+
detectedLanguage: z2.string().nullable()
|
|
615
497
|
});
|
|
616
|
-
var SYSTEM_PROMPT =
|
|
498
|
+
var SYSTEM_PROMPT = dedent`
|
|
617
499
|
<role>
|
|
618
500
|
You are an expert at analyzing video frames to detect burned-in captions (also called open captions or hardcoded subtitles).
|
|
619
501
|
These are text overlays that are permanently embedded in the video image, common on TikTok, Instagram Reels, and other social media platforms.
|
|
@@ -656,14 +538,14 @@ var burnedInCaptionsPromptBuilder = createPromptBuilder({
|
|
|
656
538
|
template: {
|
|
657
539
|
task: {
|
|
658
540
|
tag: "task",
|
|
659
|
-
content:
|
|
541
|
+
content: dedent`
|
|
660
542
|
Analyze the provided video storyboard to detect burned-in captions (hardcoded subtitles).
|
|
661
543
|
Count frames with text vs no text, note position consistency and whether text changes across frames.
|
|
662
544
|
Decide if captions exist, with confidence (0.0-1.0) and detected language if any.`
|
|
663
545
|
},
|
|
664
546
|
analysisSteps: {
|
|
665
547
|
tag: "analysis_steps",
|
|
666
|
-
content:
|
|
548
|
+
content: dedent`
|
|
667
549
|
1. COUNT how many frames contain text overlays vs. how many don't
|
|
668
550
|
2. Check if text appears in consistent positions across multiple frames
|
|
669
551
|
3. Verify text changes content between frames (indicating dialogue/narration)
|
|
@@ -672,7 +554,7 @@ var burnedInCaptionsPromptBuilder = createPromptBuilder({
|
|
|
672
554
|
},
|
|
673
555
|
positiveIndicators: {
|
|
674
556
|
tag: "classify_as_captions",
|
|
675
|
-
content:
|
|
557
|
+
content: dedent`
|
|
676
558
|
ONLY classify as burned-in captions if:
|
|
677
559
|
- Text appears in multiple frames (not just 1-2 end frames)
|
|
678
560
|
- Text positioning is consistent across those frames
|
|
@@ -681,7 +563,7 @@ var burnedInCaptionsPromptBuilder = createPromptBuilder({
|
|
|
681
563
|
},
|
|
682
564
|
negativeIndicators: {
|
|
683
565
|
tag: "not_captions",
|
|
684
|
-
content:
|
|
566
|
+
content: dedent`
|
|
685
567
|
DO NOT classify as burned-in captions:
|
|
686
568
|
- Marketing taglines appearing only in final 1-2 frames
|
|
687
569
|
- Single words or phrases that don't change between frames
|
|
@@ -696,65 +578,97 @@ function buildUserPrompt(promptOverrides) {
|
|
|
696
578
|
return burnedInCaptionsPromptBuilder.build(promptOverrides);
|
|
697
579
|
}
|
|
698
580
|
var DEFAULT_PROVIDER = "openai";
|
|
581
|
+
async function fetchImageAsBase64(imageUrl, imageDownloadOptions) {
|
|
582
|
+
"use step";
|
|
583
|
+
const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
|
|
584
|
+
return downloadResult.base64Data;
|
|
585
|
+
}
|
|
586
|
+
async function analyzeStoryboard({
|
|
587
|
+
imageDataUrl,
|
|
588
|
+
provider,
|
|
589
|
+
modelId,
|
|
590
|
+
credentials,
|
|
591
|
+
userPrompt,
|
|
592
|
+
systemPrompt
|
|
593
|
+
}) {
|
|
594
|
+
"use step";
|
|
595
|
+
const model = createLanguageModelFromConfig(
|
|
596
|
+
provider,
|
|
597
|
+
modelId,
|
|
598
|
+
credentials
|
|
599
|
+
);
|
|
600
|
+
const response = await generateObject({
|
|
601
|
+
model,
|
|
602
|
+
schema: burnedInCaptionsSchema,
|
|
603
|
+
experimental_telemetry: { isEnabled: true },
|
|
604
|
+
messages: [
|
|
605
|
+
{
|
|
606
|
+
role: "system",
|
|
607
|
+
content: systemPrompt
|
|
608
|
+
},
|
|
609
|
+
{
|
|
610
|
+
role: "user",
|
|
611
|
+
content: [
|
|
612
|
+
{ type: "text", text: userPrompt },
|
|
613
|
+
{ type: "image", image: imageDataUrl }
|
|
614
|
+
]
|
|
615
|
+
}
|
|
616
|
+
]
|
|
617
|
+
});
|
|
618
|
+
return {
|
|
619
|
+
result: response.object,
|
|
620
|
+
usage: {
|
|
621
|
+
inputTokens: response.usage.inputTokens,
|
|
622
|
+
outputTokens: response.usage.outputTokens,
|
|
623
|
+
totalTokens: response.usage.totalTokens,
|
|
624
|
+
reasoningTokens: response.usage.reasoningTokens,
|
|
625
|
+
cachedInputTokens: response.usage.cachedInputTokens
|
|
626
|
+
}
|
|
627
|
+
};
|
|
628
|
+
}
|
|
699
629
|
async function hasBurnedInCaptions(assetId, options = {}) {
|
|
630
|
+
"use workflow";
|
|
700
631
|
const {
|
|
701
632
|
provider = DEFAULT_PROVIDER,
|
|
702
633
|
model,
|
|
703
634
|
imageSubmissionMode = "url",
|
|
704
635
|
imageDownloadOptions,
|
|
705
636
|
promptOverrides,
|
|
706
|
-
...
|
|
637
|
+
...config
|
|
707
638
|
} = options;
|
|
708
639
|
const userPrompt = buildUserPrompt(promptOverrides);
|
|
709
|
-
const
|
|
710
|
-
{ ...
|
|
640
|
+
const workflowConfig = await createWorkflowConfig(
|
|
641
|
+
{ ...config, model },
|
|
711
642
|
provider
|
|
712
643
|
);
|
|
713
|
-
const { playbackId, policy } = await getPlaybackIdForAsset(
|
|
714
|
-
const signingContext = resolveSigningContext(options);
|
|
644
|
+
const { playbackId, policy } = await getPlaybackIdForAsset(workflowConfig.credentials, assetId);
|
|
645
|
+
const signingContext = await resolveSigningContext(options);
|
|
715
646
|
if (policy === "signed" && !signingContext) {
|
|
716
647
|
throw new Error(
|
|
717
648
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
718
649
|
);
|
|
719
650
|
}
|
|
720
651
|
const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed" ? signingContext : void 0);
|
|
721
|
-
const analyzeStoryboard = async (imageDataUrl) => {
|
|
722
|
-
const response = await (0, import_ai.generateObject)({
|
|
723
|
-
model: clients.languageModel.model,
|
|
724
|
-
schema: burnedInCaptionsSchema,
|
|
725
|
-
abortSignal: options.abortSignal,
|
|
726
|
-
experimental_telemetry: { isEnabled: true },
|
|
727
|
-
messages: [
|
|
728
|
-
{
|
|
729
|
-
role: "system",
|
|
730
|
-
content: SYSTEM_PROMPT
|
|
731
|
-
},
|
|
732
|
-
{
|
|
733
|
-
role: "user",
|
|
734
|
-
content: [
|
|
735
|
-
{ type: "text", text: userPrompt },
|
|
736
|
-
{ type: "image", image: imageDataUrl }
|
|
737
|
-
]
|
|
738
|
-
}
|
|
739
|
-
]
|
|
740
|
-
});
|
|
741
|
-
return {
|
|
742
|
-
result: response.object,
|
|
743
|
-
usage: {
|
|
744
|
-
inputTokens: response.usage.inputTokens,
|
|
745
|
-
outputTokens: response.usage.outputTokens,
|
|
746
|
-
totalTokens: response.usage.totalTokens,
|
|
747
|
-
reasoningTokens: response.usage.reasoningTokens,
|
|
748
|
-
cachedInputTokens: response.usage.cachedInputTokens
|
|
749
|
-
}
|
|
750
|
-
};
|
|
751
|
-
};
|
|
752
652
|
let analysisResponse;
|
|
753
653
|
if (imageSubmissionMode === "base64") {
|
|
754
|
-
const
|
|
755
|
-
analysisResponse = await analyzeStoryboard(
|
|
654
|
+
const base64Data = await fetchImageAsBase64(imageUrl, imageDownloadOptions);
|
|
655
|
+
analysisResponse = await analyzeStoryboard({
|
|
656
|
+
imageDataUrl: base64Data,
|
|
657
|
+
provider: workflowConfig.provider,
|
|
658
|
+
modelId: workflowConfig.modelId,
|
|
659
|
+
credentials: workflowConfig.credentials,
|
|
660
|
+
userPrompt,
|
|
661
|
+
systemPrompt: SYSTEM_PROMPT
|
|
662
|
+
});
|
|
756
663
|
} else {
|
|
757
|
-
analysisResponse = await analyzeStoryboard(
|
|
664
|
+
analysisResponse = await analyzeStoryboard({
|
|
665
|
+
imageDataUrl: imageUrl,
|
|
666
|
+
provider: workflowConfig.provider,
|
|
667
|
+
modelId: workflowConfig.modelId,
|
|
668
|
+
credentials: workflowConfig.credentials,
|
|
669
|
+
userPrompt,
|
|
670
|
+
systemPrompt: SYSTEM_PROMPT
|
|
671
|
+
});
|
|
758
672
|
}
|
|
759
673
|
if (!analysisResponse.result) {
|
|
760
674
|
throw new Error("No analysis result received from AI provider");
|
|
@@ -770,8 +684,8 @@ async function hasBurnedInCaptions(assetId, options = {}) {
|
|
|
770
684
|
}
|
|
771
685
|
|
|
772
686
|
// src/workflows/chapters.ts
|
|
773
|
-
|
|
774
|
-
|
|
687
|
+
import { generateObject as generateObject2 } from "ai";
|
|
688
|
+
import { z as z3 } from "zod";
|
|
775
689
|
|
|
776
690
|
// src/lib/retry.ts
|
|
777
691
|
var DEFAULT_RETRY_OPTIONS = {
|
|
@@ -803,11 +717,11 @@ async function withRetry(fn, {
|
|
|
803
717
|
if (isLastAttempt || !shouldRetry(lastError, attempt + 1)) {
|
|
804
718
|
throw lastError;
|
|
805
719
|
}
|
|
806
|
-
const
|
|
720
|
+
const delay = calculateDelay(attempt + 1, baseDelay, maxDelay);
|
|
807
721
|
console.warn(
|
|
808
|
-
`Attempt ${attempt + 1} failed: ${lastError.message}. Retrying in ${Math.round(
|
|
722
|
+
`Attempt ${attempt + 1} failed: ${lastError.message}. Retrying in ${Math.round(delay)}ms...`
|
|
809
723
|
);
|
|
810
|
-
await new Promise((resolve) => setTimeout(resolve,
|
|
724
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
811
725
|
}
|
|
812
726
|
}
|
|
813
727
|
throw lastError || new Error("Retry failed with unknown error");
|
|
@@ -922,6 +836,7 @@ function parseVTTCues(vttContent) {
|
|
|
922
836
|
return cues;
|
|
923
837
|
}
|
|
924
838
|
async function buildTranscriptUrl(playbackId, trackId, signingContext) {
|
|
839
|
+
"use step";
|
|
925
840
|
const baseUrl = `https://stream.mux.com/${playbackId}/text/${trackId}.vtt`;
|
|
926
841
|
if (signingContext) {
|
|
927
842
|
return signUrl(baseUrl, playbackId, signingContext, "video");
|
|
@@ -929,6 +844,7 @@ async function buildTranscriptUrl(playbackId, trackId, signingContext) {
|
|
|
929
844
|
return baseUrl;
|
|
930
845
|
}
|
|
931
846
|
async function fetchTranscriptForAsset(asset, playbackId, options = {}) {
|
|
847
|
+
"use step";
|
|
932
848
|
const { languageCode, cleanTranscript = true, signingContext } = options;
|
|
933
849
|
const track = findCaptionTrack(asset, languageCode);
|
|
934
850
|
if (!track) {
|
|
@@ -953,14 +869,44 @@ async function fetchTranscriptForAsset(asset, playbackId, options = {}) {
|
|
|
953
869
|
}
|
|
954
870
|
|
|
955
871
|
// src/workflows/chapters.ts
|
|
956
|
-
var chapterSchema =
|
|
957
|
-
startTime:
|
|
958
|
-
title:
|
|
872
|
+
var chapterSchema = z3.object({
|
|
873
|
+
startTime: z3.number(),
|
|
874
|
+
title: z3.string()
|
|
959
875
|
});
|
|
960
|
-
var chaptersSchema =
|
|
961
|
-
chapters:
|
|
876
|
+
var chaptersSchema = z3.object({
|
|
877
|
+
chapters: z3.array(chapterSchema)
|
|
962
878
|
});
|
|
963
|
-
|
|
879
|
+
async function generateChaptersWithAI({
|
|
880
|
+
provider,
|
|
881
|
+
modelId,
|
|
882
|
+
credentials,
|
|
883
|
+
timestampedTranscript,
|
|
884
|
+
systemPrompt
|
|
885
|
+
}) {
|
|
886
|
+
"use step";
|
|
887
|
+
const model = createLanguageModelFromConfig(
|
|
888
|
+
provider,
|
|
889
|
+
modelId,
|
|
890
|
+
credentials
|
|
891
|
+
);
|
|
892
|
+
const response = await withRetry(
|
|
893
|
+
() => generateObject2({
|
|
894
|
+
model,
|
|
895
|
+
schema: chaptersSchema,
|
|
896
|
+
messages: [
|
|
897
|
+
{
|
|
898
|
+
role: "system",
|
|
899
|
+
content: systemPrompt
|
|
900
|
+
},
|
|
901
|
+
{
|
|
902
|
+
role: "user",
|
|
903
|
+
content: timestampedTranscript
|
|
904
|
+
}
|
|
905
|
+
]
|
|
906
|
+
})
|
|
907
|
+
);
|
|
908
|
+
return response.object;
|
|
909
|
+
}
|
|
964
910
|
var SYSTEM_PROMPT2 = `Your role is to segment the following captions into chunked chapters, summarising each chapter with a title.
|
|
965
911
|
|
|
966
912
|
Analyze the transcript and create logical chapter breaks based on topic changes, major transitions, or distinct sections of content. Each chapter should represent a meaningful segment of the video.
|
|
@@ -982,10 +928,11 @@ Important rules:
|
|
|
982
928
|
- Do not include any text before or after the JSON
|
|
983
929
|
- The JSON must be valid and parseable`;
|
|
984
930
|
async function generateChapters(assetId, languageCode, options = {}) {
|
|
985
|
-
|
|
986
|
-
const
|
|
987
|
-
const
|
|
988
|
-
const
|
|
931
|
+
"use workflow";
|
|
932
|
+
const { provider = "openai", model } = options;
|
|
933
|
+
const config = await createWorkflowConfig({ ...options, model }, provider);
|
|
934
|
+
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(config.credentials, assetId);
|
|
935
|
+
const signingContext = await resolveSigningContext(options);
|
|
989
936
|
if (policy === "signed" && !signingContext) {
|
|
990
937
|
throw new Error(
|
|
991
938
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
@@ -1009,24 +956,13 @@ async function generateChapters(assetId, languageCode, options = {}) {
|
|
|
1009
956
|
}
|
|
1010
957
|
let chaptersData = null;
|
|
1011
958
|
try {
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
role: "system",
|
|
1020
|
-
content: SYSTEM_PROMPT2
|
|
1021
|
-
},
|
|
1022
|
-
{
|
|
1023
|
-
role: "user",
|
|
1024
|
-
content: timestampedTranscript
|
|
1025
|
-
}
|
|
1026
|
-
]
|
|
1027
|
-
})
|
|
1028
|
-
);
|
|
1029
|
-
chaptersData = response.object;
|
|
959
|
+
chaptersData = await generateChaptersWithAI({
|
|
960
|
+
provider: config.provider,
|
|
961
|
+
modelId: config.modelId,
|
|
962
|
+
credentials: config.credentials,
|
|
963
|
+
timestampedTranscript,
|
|
964
|
+
systemPrompt: SYSTEM_PROMPT2
|
|
965
|
+
});
|
|
1030
966
|
} catch (error) {
|
|
1031
967
|
throw new Error(
|
|
1032
968
|
`Failed to generate chapters with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
@@ -1050,7 +986,7 @@ async function generateChapters(assetId, languageCode, options = {}) {
|
|
|
1050
986
|
}
|
|
1051
987
|
|
|
1052
988
|
// src/workflows/embeddings.ts
|
|
1053
|
-
|
|
989
|
+
import { embed } from "ai";
|
|
1054
990
|
|
|
1055
991
|
// src/primitives/text-chunking.ts
|
|
1056
992
|
function estimateTokenCount(text) {
|
|
@@ -1138,13 +1074,6 @@ function chunkText(text, strategy) {
|
|
|
1138
1074
|
}
|
|
1139
1075
|
|
|
1140
1076
|
// src/workflows/embeddings.ts
|
|
1141
|
-
var DEFAULT_PROVIDER3 = "openai";
|
|
1142
|
-
var DEFAULT_CHUNKING_STRATEGY = {
|
|
1143
|
-
type: "token",
|
|
1144
|
-
maxTokens: 500,
|
|
1145
|
-
overlap: 100
|
|
1146
|
-
};
|
|
1147
|
-
var DEFAULT_BATCH_SIZE = 5;
|
|
1148
1077
|
function averageEmbeddings(embeddings) {
|
|
1149
1078
|
if (embeddings.length === 0) {
|
|
1150
1079
|
return [];
|
|
@@ -1161,51 +1090,46 @@ function averageEmbeddings(embeddings) {
|
|
|
1161
1090
|
}
|
|
1162
1091
|
return averaged;
|
|
1163
1092
|
}
|
|
1164
|
-
async function
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
);
|
|
1188
|
-
results.push(...batchResults);
|
|
1189
|
-
}
|
|
1190
|
-
return results;
|
|
1093
|
+
async function generateSingleChunkEmbedding({
|
|
1094
|
+
chunk,
|
|
1095
|
+
provider,
|
|
1096
|
+
modelId,
|
|
1097
|
+
credentials
|
|
1098
|
+
}) {
|
|
1099
|
+
"use step";
|
|
1100
|
+
const model = createEmbeddingModelFromConfig(provider, modelId, credentials);
|
|
1101
|
+
const response = await withRetry(
|
|
1102
|
+
() => embed({
|
|
1103
|
+
model,
|
|
1104
|
+
value: chunk.text
|
|
1105
|
+
})
|
|
1106
|
+
);
|
|
1107
|
+
return {
|
|
1108
|
+
chunkId: chunk.id,
|
|
1109
|
+
embedding: response.embedding,
|
|
1110
|
+
metadata: {
|
|
1111
|
+
startTime: chunk.startTime,
|
|
1112
|
+
endTime: chunk.endTime,
|
|
1113
|
+
tokenCount: chunk.tokenCount
|
|
1114
|
+
}
|
|
1115
|
+
};
|
|
1191
1116
|
}
|
|
1192
1117
|
async function generateVideoEmbeddings(assetId, options = {}) {
|
|
1118
|
+
"use workflow";
|
|
1193
1119
|
const {
|
|
1194
|
-
provider =
|
|
1120
|
+
provider = "openai",
|
|
1195
1121
|
model,
|
|
1196
1122
|
languageCode,
|
|
1197
|
-
chunkingStrategy =
|
|
1198
|
-
batchSize =
|
|
1199
|
-
abortSignal
|
|
1123
|
+
chunkingStrategy = { type: "token", maxTokens: 500, overlap: 100 },
|
|
1124
|
+
batchSize = 5
|
|
1200
1125
|
} = options;
|
|
1201
|
-
const credentials = validateCredentials(options, provider === "google" ? "google" : "openai");
|
|
1202
|
-
const muxClient = createMuxClient(credentials);
|
|
1126
|
+
const credentials = await validateCredentials(options, provider === "google" ? "google" : "openai");
|
|
1203
1127
|
const embeddingModel = resolveEmbeddingModel({ ...options, provider, model });
|
|
1204
1128
|
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(
|
|
1205
|
-
|
|
1129
|
+
credentials,
|
|
1206
1130
|
assetId
|
|
1207
1131
|
);
|
|
1208
|
-
const signingContext = resolveSigningContext(options);
|
|
1132
|
+
const signingContext = await resolveSigningContext(options);
|
|
1209
1133
|
if (policy === "signed" && !signingContext) {
|
|
1210
1134
|
throw new Error(
|
|
1211
1135
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
@@ -1235,14 +1159,22 @@ async function generateVideoEmbeddings(assetId, options = {}) {
|
|
|
1235
1159
|
if (chunks.length === 0) {
|
|
1236
1160
|
throw new Error("No chunks generated from transcript");
|
|
1237
1161
|
}
|
|
1238
|
-
|
|
1162
|
+
const chunkEmbeddings = [];
|
|
1239
1163
|
try {
|
|
1240
|
-
|
|
1241
|
-
chunks,
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1164
|
+
for (let i = 0; i < chunks.length; i += batchSize) {
|
|
1165
|
+
const batch = chunks.slice(i, i + batchSize);
|
|
1166
|
+
const batchResults = await Promise.all(
|
|
1167
|
+
batch.map(
|
|
1168
|
+
(chunk) => generateSingleChunkEmbedding({
|
|
1169
|
+
chunk,
|
|
1170
|
+
provider: embeddingModel.provider,
|
|
1171
|
+
modelId: embeddingModel.modelId,
|
|
1172
|
+
credentials
|
|
1173
|
+
})
|
|
1174
|
+
)
|
|
1175
|
+
);
|
|
1176
|
+
chunkEmbeddings.push(...batchResults);
|
|
1177
|
+
}
|
|
1246
1178
|
} catch (error) {
|
|
1247
1179
|
throw new Error(
|
|
1248
1180
|
`Failed to generate embeddings with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
@@ -1271,6 +1203,7 @@ async function generateVideoEmbeddings(assetId, options = {}) {
|
|
|
1271
1203
|
|
|
1272
1204
|
// src/primitives/thumbnails.ts
|
|
1273
1205
|
async function getThumbnailUrls(playbackId, duration, options = {}) {
|
|
1206
|
+
"use step";
|
|
1274
1207
|
const { interval = 10, width = 640, signingContext } = options;
|
|
1275
1208
|
const timestamps = [];
|
|
1276
1209
|
if (duration <= 50) {
|
|
@@ -1298,7 +1231,7 @@ var DEFAULT_THRESHOLDS = {
|
|
|
1298
1231
|
sexual: 0.7,
|
|
1299
1232
|
violence: 0.8
|
|
1300
1233
|
};
|
|
1301
|
-
var
|
|
1234
|
+
var DEFAULT_PROVIDER2 = "openai";
|
|
1302
1235
|
var HIVE_ENDPOINT = "https://api.thehive.ai/api/v2/task/sync";
|
|
1303
1236
|
var HIVE_SEXUAL_CATEGORIES = [
|
|
1304
1237
|
"general_nsfw",
|
|
@@ -1336,6 +1269,7 @@ var HIVE_VIOLENCE_CATEGORIES = [
|
|
|
1336
1269
|
"garm_death_injury_or_military_conflict"
|
|
1337
1270
|
];
|
|
1338
1271
|
async function processConcurrently(items, processor, maxConcurrent = 5) {
|
|
1272
|
+
"use step";
|
|
1339
1273
|
const results = [];
|
|
1340
1274
|
for (let i = 0; i < items.length; i += maxConcurrent) {
|
|
1341
1275
|
const batch = items.slice(i, i + maxConcurrent);
|
|
@@ -1346,19 +1280,21 @@ async function processConcurrently(items, processor, maxConcurrent = 5) {
|
|
|
1346
1280
|
return results;
|
|
1347
1281
|
}
|
|
1348
1282
|
async function requestOpenAIModeration(imageUrls, apiKey, model, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
|
|
1283
|
+
"use step";
|
|
1349
1284
|
const targetUrls = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map(
|
|
1350
|
-
(img) => ({ url: img.url, image: img.base64Data })
|
|
1351
|
-
) : imageUrls.map((url) => ({ url, image: url }));
|
|
1285
|
+
(img) => ({ url: img.url, image: img.base64Data, apiKey, model })
|
|
1286
|
+
) : imageUrls.map((url) => ({ url, image: url, apiKey, model }));
|
|
1352
1287
|
const moderate = async (entry) => {
|
|
1288
|
+
"use step";
|
|
1353
1289
|
try {
|
|
1354
1290
|
const res = await fetch("https://api.openai.com/v1/moderations", {
|
|
1355
1291
|
method: "POST",
|
|
1356
1292
|
headers: {
|
|
1357
1293
|
"Content-Type": "application/json",
|
|
1358
|
-
"Authorization": `Bearer ${apiKey}`
|
|
1294
|
+
"Authorization": `Bearer ${entry.apiKey}`
|
|
1359
1295
|
},
|
|
1360
1296
|
body: JSON.stringify({
|
|
1361
|
-
model,
|
|
1297
|
+
model: entry.model,
|
|
1362
1298
|
input: [
|
|
1363
1299
|
{
|
|
1364
1300
|
type: "image_url",
|
|
@@ -1402,6 +1338,7 @@ function getHiveCategoryScores(classes, categoryNames) {
|
|
|
1402
1338
|
return Math.max(...scores, 0);
|
|
1403
1339
|
}
|
|
1404
1340
|
async function requestHiveModeration(imageUrls, apiKey, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
|
|
1341
|
+
"use step";
|
|
1405
1342
|
const targets = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map((img) => ({
|
|
1406
1343
|
url: img.url,
|
|
1407
1344
|
source: {
|
|
@@ -1414,6 +1351,7 @@ async function requestHiveModeration(imageUrls, apiKey, maxConcurrent = 5, submi
|
|
|
1414
1351
|
source: { kind: "url", value: url }
|
|
1415
1352
|
}));
|
|
1416
1353
|
const moderate = async (entry) => {
|
|
1354
|
+
"use step";
|
|
1417
1355
|
try {
|
|
1418
1356
|
const formData = new FormData();
|
|
1419
1357
|
if (entry.source.kind === "url") {
|
|
@@ -1459,8 +1397,9 @@ async function requestHiveModeration(imageUrls, apiKey, maxConcurrent = 5, submi
|
|
|
1459
1397
|
return processConcurrently(targets, moderate, maxConcurrent);
|
|
1460
1398
|
}
|
|
1461
1399
|
async function getModerationScores(assetId, options = {}) {
|
|
1400
|
+
"use workflow";
|
|
1462
1401
|
const {
|
|
1463
|
-
provider =
|
|
1402
|
+
provider = DEFAULT_PROVIDER2,
|
|
1464
1403
|
model = provider === "openai" ? "omni-moderation-latest" : void 0,
|
|
1465
1404
|
thresholds = DEFAULT_THRESHOLDS,
|
|
1466
1405
|
thumbnailInterval = 10,
|
|
@@ -1469,11 +1408,10 @@ async function getModerationScores(assetId, options = {}) {
|
|
|
1469
1408
|
imageSubmissionMode = "url",
|
|
1470
1409
|
imageDownloadOptions
|
|
1471
1410
|
} = options;
|
|
1472
|
-
const credentials = validateCredentials(options, provider === "openai" ? "openai" : void 0);
|
|
1473
|
-
const
|
|
1474
|
-
const { asset, playbackId, policy } = await getPlaybackIdForAsset(muxClient, assetId);
|
|
1411
|
+
const credentials = await validateCredentials(options, provider === "openai" ? "openai" : void 0);
|
|
1412
|
+
const { asset, playbackId, policy } = await getPlaybackIdForAsset(credentials, assetId);
|
|
1475
1413
|
const duration = asset.duration || 0;
|
|
1476
|
-
const signingContext = resolveSigningContext(options);
|
|
1414
|
+
const signingContext = await resolveSigningContext(options);
|
|
1477
1415
|
if (policy === "signed" && !signingContext) {
|
|
1478
1416
|
throw new Error(
|
|
1479
1417
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
@@ -1529,17 +1467,18 @@ async function getModerationScores(assetId, options = {}) {
|
|
|
1529
1467
|
}
|
|
1530
1468
|
|
|
1531
1469
|
// src/workflows/summarization.ts
|
|
1532
|
-
|
|
1533
|
-
|
|
1470
|
+
import { generateObject as generateObject3 } from "ai";
|
|
1471
|
+
import dedent2 from "dedent";
|
|
1472
|
+
import { z as z4 } from "zod";
|
|
1534
1473
|
var SUMMARY_KEYWORD_LIMIT = 10;
|
|
1535
|
-
var summarySchema =
|
|
1536
|
-
keywords:
|
|
1537
|
-
title:
|
|
1538
|
-
description:
|
|
1474
|
+
var summarySchema = z4.object({
|
|
1475
|
+
keywords: z4.array(z4.string()),
|
|
1476
|
+
title: z4.string(),
|
|
1477
|
+
description: z4.string()
|
|
1539
1478
|
});
|
|
1540
1479
|
var TONE_INSTRUCTIONS = {
|
|
1541
1480
|
normal: "Provide a clear, straightforward analysis.",
|
|
1542
|
-
sassy: "Answer with
|
|
1481
|
+
sassy: "Channel your inner diva! Answer with maximum sass, wit, and playful attitude. Don't hold back - be cheeky, clever, and delightfully snarky. Make it pop!",
|
|
1543
1482
|
professional: "Provide a professional, executive-level analysis suitable for business reporting."
|
|
1544
1483
|
};
|
|
1545
1484
|
var summarizationPromptBuilder = createPromptBuilder({
|
|
@@ -1550,7 +1489,7 @@ var summarizationPromptBuilder = createPromptBuilder({
|
|
|
1550
1489
|
},
|
|
1551
1490
|
title: {
|
|
1552
1491
|
tag: "title_requirements",
|
|
1553
|
-
content:
|
|
1492
|
+
content: dedent2`
|
|
1554
1493
|
A short, compelling headline that immediately communicates the subject or action.
|
|
1555
1494
|
Aim for brevity - typically under 10 words. Think of how a news headline or video card title would read.
|
|
1556
1495
|
Start with the primary subject, action, or topic - never begin with "A video of" or similar phrasing.
|
|
@@ -1558,7 +1497,7 @@ var summarizationPromptBuilder = createPromptBuilder({
|
|
|
1558
1497
|
},
|
|
1559
1498
|
description: {
|
|
1560
1499
|
tag: "description_requirements",
|
|
1561
|
-
content:
|
|
1500
|
+
content: dedent2`
|
|
1562
1501
|
A concise summary (2-4 sentences) that describes what happens across the video.
|
|
1563
1502
|
Cover the main subjects, actions, setting, and any notable progression visible across frames.
|
|
1564
1503
|
Write in present tense. Be specific about observable details rather than making assumptions.
|
|
@@ -1566,7 +1505,7 @@ var summarizationPromptBuilder = createPromptBuilder({
|
|
|
1566
1505
|
},
|
|
1567
1506
|
keywords: {
|
|
1568
1507
|
tag: "keywords_requirements",
|
|
1569
|
-
content:
|
|
1508
|
+
content: dedent2`
|
|
1570
1509
|
Specific, searchable terms (up to 10) that capture:
|
|
1571
1510
|
- Primary subjects (people, animals, objects)
|
|
1572
1511
|
- Actions and activities being performed
|
|
@@ -1578,7 +1517,7 @@ var summarizationPromptBuilder = createPromptBuilder({
|
|
|
1578
1517
|
},
|
|
1579
1518
|
qualityGuidelines: {
|
|
1580
1519
|
tag: "quality_guidelines",
|
|
1581
|
-
content:
|
|
1520
|
+
content: dedent2`
|
|
1582
1521
|
- Examine all frames to understand the full context and progression
|
|
1583
1522
|
- Be precise: "golden retriever" is better than "dog" when identifiable
|
|
1584
1523
|
- Capture the narrative: what begins, develops, and concludes
|
|
@@ -1587,7 +1526,7 @@ var summarizationPromptBuilder = createPromptBuilder({
|
|
|
1587
1526
|
},
|
|
1588
1527
|
sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
|
|
1589
1528
|
});
|
|
1590
|
-
var SYSTEM_PROMPT3 =
|
|
1529
|
+
var SYSTEM_PROMPT3 = dedent2`
|
|
1591
1530
|
<role>
|
|
1592
1531
|
You are a video content analyst specializing in storyboard interpretation and multimodal analysis.
|
|
1593
1532
|
</role>
|
|
@@ -1619,7 +1558,29 @@ var SYSTEM_PROMPT3 = dedent_default`
|
|
|
1619
1558
|
- Only describe what is clearly observable in the frames or explicitly stated in the transcript
|
|
1620
1559
|
- Do not fabricate details or make unsupported assumptions
|
|
1621
1560
|
- Return structured data matching the requested schema
|
|
1622
|
-
</constraints
|
|
1561
|
+
</constraints>
|
|
1562
|
+
|
|
1563
|
+
<tone_guidance>
|
|
1564
|
+
Pay special attention to the <tone> section and lean heavily into those instructions.
|
|
1565
|
+
Adapt your entire analysis and writing style to match the specified tone - this should influence
|
|
1566
|
+
your word choice, personality, formality level, and overall presentation of the content.
|
|
1567
|
+
The tone instructions are not suggestions but core requirements for how you should express yourself.
|
|
1568
|
+
</tone_guidance>
|
|
1569
|
+
|
|
1570
|
+
<language_guidelines>
|
|
1571
|
+
AVOID these meta-descriptive phrases that reference the medium rather than the content:
|
|
1572
|
+
- "The image shows..." / "The storyboard shows..."
|
|
1573
|
+
- "In this video..." / "This video features..."
|
|
1574
|
+
- "The frames depict..." / "The footage shows..."
|
|
1575
|
+
- "We can see..." / "You can see..."
|
|
1576
|
+
- "The clip shows..." / "The scene shows..."
|
|
1577
|
+
|
|
1578
|
+
INSTEAD, describe the content directly:
|
|
1579
|
+
- BAD: "The video shows a chef preparing a meal"
|
|
1580
|
+
- GOOD: "A chef prepares a meal in a professional kitchen"
|
|
1581
|
+
|
|
1582
|
+
Write as if describing reality, not describing a recording of reality.
|
|
1583
|
+
</language_guidelines>`;
|
|
1623
1584
|
function buildUserPrompt2({
|
|
1624
1585
|
tone,
|
|
1625
1586
|
transcriptText,
|
|
@@ -1633,8 +1594,41 @@ function buildUserPrompt2({
|
|
|
1633
1594
|
}
|
|
1634
1595
|
return summarizationPromptBuilder.buildWithContext(promptOverrides, contextSections);
|
|
1635
1596
|
}
|
|
1636
|
-
|
|
1637
|
-
|
|
1597
|
+
async function analyzeStoryboard2(imageDataUrl, workflowConfig, userPrompt, systemPrompt) {
|
|
1598
|
+
"use step";
|
|
1599
|
+
const model = createLanguageModelFromConfig(
|
|
1600
|
+
workflowConfig.provider,
|
|
1601
|
+
workflowConfig.modelId,
|
|
1602
|
+
workflowConfig.credentials
|
|
1603
|
+
);
|
|
1604
|
+
const response = await generateObject3({
|
|
1605
|
+
model,
|
|
1606
|
+
schema: summarySchema,
|
|
1607
|
+
messages: [
|
|
1608
|
+
{
|
|
1609
|
+
role: "system",
|
|
1610
|
+
content: systemPrompt
|
|
1611
|
+
},
|
|
1612
|
+
{
|
|
1613
|
+
role: "user",
|
|
1614
|
+
content: [
|
|
1615
|
+
{ type: "text", text: userPrompt },
|
|
1616
|
+
{ type: "image", image: imageDataUrl }
|
|
1617
|
+
]
|
|
1618
|
+
}
|
|
1619
|
+
]
|
|
1620
|
+
});
|
|
1621
|
+
return {
|
|
1622
|
+
result: response.object,
|
|
1623
|
+
usage: {
|
|
1624
|
+
inputTokens: response.usage.inputTokens,
|
|
1625
|
+
outputTokens: response.usage.outputTokens,
|
|
1626
|
+
totalTokens: response.usage.totalTokens,
|
|
1627
|
+
reasoningTokens: response.usage.reasoningTokens,
|
|
1628
|
+
cachedInputTokens: response.usage.cachedInputTokens
|
|
1629
|
+
}
|
|
1630
|
+
};
|
|
1631
|
+
}
|
|
1638
1632
|
function normalizeKeywords(keywords) {
|
|
1639
1633
|
if (!Array.isArray(keywords) || keywords.length === 0) {
|
|
1640
1634
|
return [];
|
|
@@ -1659,23 +1653,24 @@ function normalizeKeywords(keywords) {
|
|
|
1659
1653
|
return normalized;
|
|
1660
1654
|
}
|
|
1661
1655
|
async function getSummaryAndTags(assetId, options) {
|
|
1656
|
+
"use workflow";
|
|
1662
1657
|
const {
|
|
1663
|
-
provider =
|
|
1658
|
+
provider = "openai",
|
|
1664
1659
|
model,
|
|
1665
|
-
tone =
|
|
1660
|
+
tone = "normal",
|
|
1666
1661
|
includeTranscript = true,
|
|
1667
1662
|
cleanTranscript = true,
|
|
1668
1663
|
imageSubmissionMode = "url",
|
|
1669
1664
|
imageDownloadOptions,
|
|
1670
|
-
abortSignal,
|
|
1665
|
+
abortSignal: _abortSignal,
|
|
1671
1666
|
promptOverrides
|
|
1672
1667
|
} = options ?? {};
|
|
1673
|
-
const
|
|
1668
|
+
const config = await createWorkflowConfig(
|
|
1674
1669
|
{ ...options, model },
|
|
1675
1670
|
provider
|
|
1676
1671
|
);
|
|
1677
|
-
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(
|
|
1678
|
-
const signingContext = resolveSigningContext(options ?? {});
|
|
1672
|
+
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(config.credentials, assetId);
|
|
1673
|
+
const signingContext = await resolveSigningContext(options ?? {});
|
|
1679
1674
|
if (policy === "signed" && !signingContext) {
|
|
1680
1675
|
throw new Error(
|
|
1681
1676
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
@@ -1692,66 +1687,212 @@ async function getSummaryAndTags(assetId, options) {
|
|
|
1692
1687
|
promptOverrides
|
|
1693
1688
|
});
|
|
1694
1689
|
const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed" ? signingContext : void 0);
|
|
1695
|
-
|
|
1696
|
-
const response = await (0, import_ai4.generateObject)({
|
|
1697
|
-
model: clients.languageModel.model,
|
|
1698
|
-
schema: summarySchema,
|
|
1699
|
-
abortSignal,
|
|
1700
|
-
messages: [
|
|
1701
|
-
{
|
|
1702
|
-
role: "system",
|
|
1703
|
-
content: SYSTEM_PROMPT3
|
|
1704
|
-
},
|
|
1705
|
-
{
|
|
1706
|
-
role: "user",
|
|
1707
|
-
content: [
|
|
1708
|
-
{ type: "text", text: userPrompt },
|
|
1709
|
-
{ type: "image", image: imageDataUrl }
|
|
1710
|
-
]
|
|
1711
|
-
}
|
|
1712
|
-
]
|
|
1713
|
-
});
|
|
1714
|
-
return response.object;
|
|
1715
|
-
};
|
|
1716
|
-
let aiAnalysis = null;
|
|
1690
|
+
let analysisResponse;
|
|
1717
1691
|
try {
|
|
1718
1692
|
if (imageSubmissionMode === "base64") {
|
|
1719
1693
|
const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
|
|
1720
|
-
|
|
1694
|
+
analysisResponse = await analyzeStoryboard2(
|
|
1695
|
+
downloadResult.base64Data,
|
|
1696
|
+
config,
|
|
1697
|
+
userPrompt,
|
|
1698
|
+
SYSTEM_PROMPT3
|
|
1699
|
+
);
|
|
1721
1700
|
} else {
|
|
1722
|
-
|
|
1701
|
+
analysisResponse = await withRetry(() => analyzeStoryboard2(imageUrl, config, userPrompt, SYSTEM_PROMPT3));
|
|
1723
1702
|
}
|
|
1724
1703
|
} catch (error) {
|
|
1725
1704
|
throw new Error(
|
|
1726
1705
|
`Failed to analyze video content with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
1727
1706
|
);
|
|
1728
1707
|
}
|
|
1729
|
-
if (!
|
|
1708
|
+
if (!analysisResponse.result) {
|
|
1730
1709
|
throw new Error(`Failed to analyze video content for asset ${assetId}`);
|
|
1731
1710
|
}
|
|
1732
|
-
if (!
|
|
1711
|
+
if (!analysisResponse.result.title) {
|
|
1733
1712
|
throw new Error(`Failed to generate title for asset ${assetId}`);
|
|
1734
1713
|
}
|
|
1735
|
-
if (!
|
|
1714
|
+
if (!analysisResponse.result.description) {
|
|
1736
1715
|
throw new Error(`Failed to generate description for asset ${assetId}`);
|
|
1737
1716
|
}
|
|
1738
1717
|
return {
|
|
1739
1718
|
assetId,
|
|
1740
|
-
title:
|
|
1741
|
-
description:
|
|
1742
|
-
tags: normalizeKeywords(
|
|
1743
|
-
storyboardUrl: imageUrl
|
|
1719
|
+
title: analysisResponse.result.title,
|
|
1720
|
+
description: analysisResponse.result.description,
|
|
1721
|
+
tags: normalizeKeywords(analysisResponse.result.keywords),
|
|
1722
|
+
storyboardUrl: imageUrl,
|
|
1723
|
+
usage: analysisResponse.usage,
|
|
1724
|
+
transcriptText: transcriptText || void 0
|
|
1744
1725
|
};
|
|
1745
1726
|
}
|
|
1746
1727
|
|
|
1747
1728
|
// src/workflows/translate-audio.ts
|
|
1748
|
-
|
|
1749
|
-
|
|
1750
|
-
|
|
1751
|
-
var
|
|
1729
|
+
import Mux3 from "@mux/mux-node";
|
|
1730
|
+
|
|
1731
|
+
// src/lib/language-codes.ts
|
|
1732
|
+
var ISO639_1_TO_3 = {
|
|
1733
|
+
// Major world languages
|
|
1734
|
+
en: "eng",
|
|
1735
|
+
// English
|
|
1736
|
+
es: "spa",
|
|
1737
|
+
// Spanish
|
|
1738
|
+
fr: "fra",
|
|
1739
|
+
// French
|
|
1740
|
+
de: "deu",
|
|
1741
|
+
// German
|
|
1742
|
+
it: "ita",
|
|
1743
|
+
// Italian
|
|
1744
|
+
pt: "por",
|
|
1745
|
+
// Portuguese
|
|
1746
|
+
ru: "rus",
|
|
1747
|
+
// Russian
|
|
1748
|
+
zh: "zho",
|
|
1749
|
+
// Chinese
|
|
1750
|
+
ja: "jpn",
|
|
1751
|
+
// Japanese
|
|
1752
|
+
ko: "kor",
|
|
1753
|
+
// Korean
|
|
1754
|
+
ar: "ara",
|
|
1755
|
+
// Arabic
|
|
1756
|
+
hi: "hin",
|
|
1757
|
+
// Hindi
|
|
1758
|
+
// European languages
|
|
1759
|
+
nl: "nld",
|
|
1760
|
+
// Dutch
|
|
1761
|
+
pl: "pol",
|
|
1762
|
+
// Polish
|
|
1763
|
+
sv: "swe",
|
|
1764
|
+
// Swedish
|
|
1765
|
+
da: "dan",
|
|
1766
|
+
// Danish
|
|
1767
|
+
no: "nor",
|
|
1768
|
+
// Norwegian
|
|
1769
|
+
fi: "fin",
|
|
1770
|
+
// Finnish
|
|
1771
|
+
el: "ell",
|
|
1772
|
+
// Greek
|
|
1773
|
+
cs: "ces",
|
|
1774
|
+
// Czech
|
|
1775
|
+
hu: "hun",
|
|
1776
|
+
// Hungarian
|
|
1777
|
+
ro: "ron",
|
|
1778
|
+
// Romanian
|
|
1779
|
+
bg: "bul",
|
|
1780
|
+
// Bulgarian
|
|
1781
|
+
hr: "hrv",
|
|
1782
|
+
// Croatian
|
|
1783
|
+
sk: "slk",
|
|
1784
|
+
// Slovak
|
|
1785
|
+
sl: "slv",
|
|
1786
|
+
// Slovenian
|
|
1787
|
+
uk: "ukr",
|
|
1788
|
+
// Ukrainian
|
|
1789
|
+
tr: "tur",
|
|
1790
|
+
// Turkish
|
|
1791
|
+
// Asian languages
|
|
1792
|
+
th: "tha",
|
|
1793
|
+
// Thai
|
|
1794
|
+
vi: "vie",
|
|
1795
|
+
// Vietnamese
|
|
1796
|
+
id: "ind",
|
|
1797
|
+
// Indonesian
|
|
1798
|
+
ms: "msa",
|
|
1799
|
+
// Malay
|
|
1800
|
+
tl: "tgl",
|
|
1801
|
+
// Tagalog/Filipino
|
|
1802
|
+
// Other languages
|
|
1803
|
+
he: "heb",
|
|
1804
|
+
// Hebrew
|
|
1805
|
+
fa: "fas",
|
|
1806
|
+
// Persian/Farsi
|
|
1807
|
+
bn: "ben",
|
|
1808
|
+
// Bengali
|
|
1809
|
+
ta: "tam",
|
|
1810
|
+
// Tamil
|
|
1811
|
+
te: "tel",
|
|
1812
|
+
// Telugu
|
|
1813
|
+
mr: "mar",
|
|
1814
|
+
// Marathi
|
|
1815
|
+
gu: "guj",
|
|
1816
|
+
// Gujarati
|
|
1817
|
+
kn: "kan",
|
|
1818
|
+
// Kannada
|
|
1819
|
+
ml: "mal",
|
|
1820
|
+
// Malayalam
|
|
1821
|
+
pa: "pan",
|
|
1822
|
+
// Punjabi
|
|
1823
|
+
ur: "urd",
|
|
1824
|
+
// Urdu
|
|
1825
|
+
sw: "swa",
|
|
1826
|
+
// Swahili
|
|
1827
|
+
af: "afr",
|
|
1828
|
+
// Afrikaans
|
|
1829
|
+
ca: "cat",
|
|
1830
|
+
// Catalan
|
|
1831
|
+
eu: "eus",
|
|
1832
|
+
// Basque
|
|
1833
|
+
gl: "glg",
|
|
1834
|
+
// Galician
|
|
1835
|
+
is: "isl",
|
|
1836
|
+
// Icelandic
|
|
1837
|
+
et: "est",
|
|
1838
|
+
// Estonian
|
|
1839
|
+
lv: "lav",
|
|
1840
|
+
// Latvian
|
|
1841
|
+
lt: "lit"
|
|
1842
|
+
// Lithuanian
|
|
1843
|
+
};
|
|
1844
|
+
var ISO639_3_TO_1 = Object.fromEntries(
|
|
1845
|
+
Object.entries(ISO639_1_TO_3).map(([iso1, iso3]) => [iso3, iso1])
|
|
1846
|
+
);
|
|
1847
|
+
function toISO639_3(code) {
|
|
1848
|
+
const normalized = code.toLowerCase().trim();
|
|
1849
|
+
if (normalized.length === 3) {
|
|
1850
|
+
return normalized;
|
|
1851
|
+
}
|
|
1852
|
+
return ISO639_1_TO_3[normalized] ?? normalized;
|
|
1853
|
+
}
|
|
1854
|
+
function toISO639_1(code) {
|
|
1855
|
+
const normalized = code.toLowerCase().trim();
|
|
1856
|
+
if (normalized.length === 2) {
|
|
1857
|
+
return normalized;
|
|
1858
|
+
}
|
|
1859
|
+
return ISO639_3_TO_1[normalized] ?? normalized;
|
|
1860
|
+
}
|
|
1861
|
+
function getLanguageCodePair(code) {
|
|
1862
|
+
const normalized = code.toLowerCase().trim();
|
|
1863
|
+
if (normalized.length === 2) {
|
|
1864
|
+
return {
|
|
1865
|
+
iso639_1: normalized,
|
|
1866
|
+
iso639_3: toISO639_3(normalized)
|
|
1867
|
+
};
|
|
1868
|
+
} else if (normalized.length === 3) {
|
|
1869
|
+
return {
|
|
1870
|
+
iso639_1: toISO639_1(normalized),
|
|
1871
|
+
iso639_3: normalized
|
|
1872
|
+
};
|
|
1873
|
+
}
|
|
1874
|
+
return {
|
|
1875
|
+
iso639_1: normalized,
|
|
1876
|
+
iso639_3: normalized
|
|
1877
|
+
};
|
|
1878
|
+
}
|
|
1879
|
+
function getLanguageName(code) {
|
|
1880
|
+
const iso639_1 = toISO639_1(code);
|
|
1881
|
+
try {
|
|
1882
|
+
const displayNames = new Intl.DisplayNames(["en"], { type: "language" });
|
|
1883
|
+
return displayNames.of(iso639_1) ?? code.toUpperCase();
|
|
1884
|
+
} catch {
|
|
1885
|
+
return code.toUpperCase();
|
|
1886
|
+
}
|
|
1887
|
+
}
|
|
1888
|
+
|
|
1889
|
+
// src/workflows/translate-audio.ts
|
|
1752
1890
|
var STATIC_RENDITION_POLL_INTERVAL_MS = 5e3;
|
|
1753
1891
|
var STATIC_RENDITION_MAX_ATTEMPTS = 36;
|
|
1754
|
-
|
|
1892
|
+
async function sleep(ms) {
|
|
1893
|
+
"use step";
|
|
1894
|
+
await new Promise((resolve) => setTimeout(resolve, ms));
|
|
1895
|
+
}
|
|
1755
1896
|
function getReadyAudioStaticRendition(asset) {
|
|
1756
1897
|
const files = asset.static_renditions?.files;
|
|
1757
1898
|
if (!files || files.length === 0) {
|
|
@@ -1762,19 +1903,21 @@ function getReadyAudioStaticRendition(asset) {
|
|
|
1762
1903
|
);
|
|
1763
1904
|
}
|
|
1764
1905
|
var hasReadyAudioStaticRendition = (asset) => Boolean(getReadyAudioStaticRendition(asset));
|
|
1765
|
-
async function requestStaticRenditionCreation(
|
|
1766
|
-
|
|
1906
|
+
async function requestStaticRenditionCreation(credentials, assetId) {
|
|
1907
|
+
"use step";
|
|
1908
|
+
const mux = new Mux3({
|
|
1909
|
+
tokenId: credentials.muxTokenId,
|
|
1910
|
+
tokenSecret: credentials.muxTokenSecret
|
|
1911
|
+
});
|
|
1767
1912
|
try {
|
|
1768
|
-
await
|
|
1913
|
+
await mux.video.assets.createStaticRendition(assetId, {
|
|
1769
1914
|
resolution: "audio-only"
|
|
1770
1915
|
});
|
|
1771
|
-
console.log("\u{1F4FC} Static rendition request accepted by Mux.");
|
|
1772
1916
|
} catch (error) {
|
|
1773
1917
|
const statusCode = error?.status ?? error?.statusCode;
|
|
1774
1918
|
const messages = error?.error?.messages;
|
|
1775
1919
|
const alreadyDefined = messages?.some((message2) => message2.toLowerCase().includes("already defined")) ?? error?.message?.toLowerCase().includes("already defined");
|
|
1776
1920
|
if (statusCode === 409 || alreadyDefined) {
|
|
1777
|
-
console.log("\u2139\uFE0F Static rendition already requested. Waiting for it to finish...");
|
|
1778
1921
|
return;
|
|
1779
1922
|
}
|
|
1780
1923
|
const message = error instanceof Error ? error.message : "Unknown error";
|
|
@@ -1783,31 +1926,34 @@ async function requestStaticRenditionCreation(muxClient, assetId) {
|
|
|
1783
1926
|
}
|
|
1784
1927
|
async function waitForAudioStaticRendition({
|
|
1785
1928
|
assetId,
|
|
1786
|
-
|
|
1929
|
+
credentials,
|
|
1787
1930
|
initialAsset
|
|
1788
1931
|
}) {
|
|
1932
|
+
"use step";
|
|
1933
|
+
const mux = new Mux3({
|
|
1934
|
+
tokenId: credentials.muxTokenId,
|
|
1935
|
+
tokenSecret: credentials.muxTokenSecret
|
|
1936
|
+
});
|
|
1789
1937
|
let currentAsset = initialAsset;
|
|
1790
1938
|
if (hasReadyAudioStaticRendition(currentAsset)) {
|
|
1791
1939
|
return currentAsset;
|
|
1792
1940
|
}
|
|
1793
1941
|
const status = currentAsset.static_renditions?.status ?? "not_requested";
|
|
1794
1942
|
if (status === "not_requested" || status === void 0) {
|
|
1795
|
-
await requestStaticRenditionCreation(
|
|
1943
|
+
await requestStaticRenditionCreation(credentials, assetId);
|
|
1796
1944
|
} else if (status === "errored") {
|
|
1797
|
-
|
|
1798
|
-
await requestStaticRenditionCreation(muxClient, assetId);
|
|
1945
|
+
await requestStaticRenditionCreation(credentials, assetId);
|
|
1799
1946
|
} else {
|
|
1800
|
-
console.
|
|
1947
|
+
console.warn(`\u2139\uFE0F Static rendition already ${status}. Waiting for it to finish...`);
|
|
1801
1948
|
}
|
|
1802
1949
|
for (let attempt = 1; attempt <= STATIC_RENDITION_MAX_ATTEMPTS; attempt++) {
|
|
1803
|
-
await
|
|
1804
|
-
currentAsset = await
|
|
1950
|
+
await sleep(STATIC_RENDITION_POLL_INTERVAL_MS);
|
|
1951
|
+
currentAsset = await mux.video.assets.retrieve(assetId);
|
|
1805
1952
|
if (hasReadyAudioStaticRendition(currentAsset)) {
|
|
1806
|
-
console.log("\u2705 Audio static rendition is ready!");
|
|
1807
1953
|
return currentAsset;
|
|
1808
1954
|
}
|
|
1809
1955
|
const currentStatus = currentAsset.static_renditions?.status || "unknown";
|
|
1810
|
-
console.
|
|
1956
|
+
console.warn(
|
|
1811
1957
|
`\u231B Waiting for static rendition (attempt ${attempt}/${STATIC_RENDITION_MAX_ATTEMPTS}) \u2192 ${currentStatus}`
|
|
1812
1958
|
);
|
|
1813
1959
|
if (currentStatus === "errored") {
|
|
@@ -1820,55 +1966,180 @@ async function waitForAudioStaticRendition({
|
|
|
1820
1966
|
"Timed out waiting for the static rendition to become ready. Please try again in a moment."
|
|
1821
1967
|
);
|
|
1822
1968
|
}
|
|
1969
|
+
async function fetchAudioFromMux(audioUrl) {
|
|
1970
|
+
"use step";
|
|
1971
|
+
const audioResponse = await fetch(audioUrl);
|
|
1972
|
+
if (!audioResponse.ok) {
|
|
1973
|
+
throw new Error(`Failed to fetch audio file: ${audioResponse.statusText}`);
|
|
1974
|
+
}
|
|
1975
|
+
return audioResponse.arrayBuffer();
|
|
1976
|
+
}
|
|
1977
|
+
async function createElevenLabsDubbingJob({
|
|
1978
|
+
audioBuffer,
|
|
1979
|
+
assetId,
|
|
1980
|
+
elevenLabsLangCode,
|
|
1981
|
+
elevenLabsApiKey,
|
|
1982
|
+
numSpeakers
|
|
1983
|
+
}) {
|
|
1984
|
+
"use step";
|
|
1985
|
+
const audioBlob = new Blob([audioBuffer], { type: "audio/mp4" });
|
|
1986
|
+
const formData = new FormData();
|
|
1987
|
+
formData.append("file", audioBlob);
|
|
1988
|
+
formData.append("target_lang", elevenLabsLangCode);
|
|
1989
|
+
formData.append("num_speakers", numSpeakers.toString());
|
|
1990
|
+
formData.append("name", `Mux Asset ${assetId} - auto to ${elevenLabsLangCode}`);
|
|
1991
|
+
const dubbingResponse = await fetch("https://api.elevenlabs.io/v1/dubbing", {
|
|
1992
|
+
method: "POST",
|
|
1993
|
+
headers: {
|
|
1994
|
+
"xi-api-key": elevenLabsApiKey
|
|
1995
|
+
},
|
|
1996
|
+
body: formData
|
|
1997
|
+
});
|
|
1998
|
+
if (!dubbingResponse.ok) {
|
|
1999
|
+
throw new Error(`ElevenLabs API error: ${dubbingResponse.statusText}`);
|
|
2000
|
+
}
|
|
2001
|
+
const dubbingData = await dubbingResponse.json();
|
|
2002
|
+
return dubbingData.dubbing_id;
|
|
2003
|
+
}
|
|
2004
|
+
async function checkElevenLabsDubbingStatus({
|
|
2005
|
+
dubbingId,
|
|
2006
|
+
elevenLabsApiKey
|
|
2007
|
+
}) {
|
|
2008
|
+
"use step";
|
|
2009
|
+
const statusResponse = await fetch(`https://api.elevenlabs.io/v1/dubbing/${dubbingId}`, {
|
|
2010
|
+
headers: {
|
|
2011
|
+
"xi-api-key": elevenLabsApiKey
|
|
2012
|
+
}
|
|
2013
|
+
});
|
|
2014
|
+
if (!statusResponse.ok) {
|
|
2015
|
+
throw new Error(`Status check failed: ${statusResponse.statusText}`);
|
|
2016
|
+
}
|
|
2017
|
+
const statusData = await statusResponse.json();
|
|
2018
|
+
return {
|
|
2019
|
+
status: statusData.status,
|
|
2020
|
+
targetLanguages: statusData.target_languages ?? []
|
|
2021
|
+
};
|
|
2022
|
+
}
|
|
2023
|
+
async function downloadDubbedAudioFromElevenLabs({
|
|
2024
|
+
dubbingId,
|
|
2025
|
+
languageCode,
|
|
2026
|
+
elevenLabsApiKey
|
|
2027
|
+
}) {
|
|
2028
|
+
"use step";
|
|
2029
|
+
const audioUrl = `https://api.elevenlabs.io/v1/dubbing/${dubbingId}/audio/${languageCode}`;
|
|
2030
|
+
const audioResponse = await fetch(audioUrl, {
|
|
2031
|
+
headers: {
|
|
2032
|
+
"xi-api-key": elevenLabsApiKey
|
|
2033
|
+
}
|
|
2034
|
+
});
|
|
2035
|
+
if (!audioResponse.ok) {
|
|
2036
|
+
throw new Error(`Failed to fetch dubbed audio: ${audioResponse.statusText}`);
|
|
2037
|
+
}
|
|
2038
|
+
return audioResponse.arrayBuffer();
|
|
2039
|
+
}
|
|
2040
|
+
async function uploadDubbedAudioToS3({
|
|
2041
|
+
dubbedAudioBuffer,
|
|
2042
|
+
assetId,
|
|
2043
|
+
toLanguageCode,
|
|
2044
|
+
s3Endpoint,
|
|
2045
|
+
s3Region,
|
|
2046
|
+
s3Bucket,
|
|
2047
|
+
s3AccessKeyId,
|
|
2048
|
+
s3SecretAccessKey
|
|
2049
|
+
}) {
|
|
2050
|
+
"use step";
|
|
2051
|
+
const { S3Client, GetObjectCommand } = await import("@aws-sdk/client-s3");
|
|
2052
|
+
const { Upload } = await import("@aws-sdk/lib-storage");
|
|
2053
|
+
const { getSignedUrl } = await import("@aws-sdk/s3-request-presigner");
|
|
2054
|
+
const s3Client = new S3Client({
|
|
2055
|
+
region: s3Region,
|
|
2056
|
+
endpoint: s3Endpoint,
|
|
2057
|
+
credentials: {
|
|
2058
|
+
accessKeyId: s3AccessKeyId,
|
|
2059
|
+
secretAccessKey: s3SecretAccessKey
|
|
2060
|
+
},
|
|
2061
|
+
forcePathStyle: true
|
|
2062
|
+
});
|
|
2063
|
+
const audioKey = `audio-translations/${assetId}/auto-to-${toLanguageCode}-${Date.now()}.m4a`;
|
|
2064
|
+
const upload = new Upload({
|
|
2065
|
+
client: s3Client,
|
|
2066
|
+
params: {
|
|
2067
|
+
Bucket: s3Bucket,
|
|
2068
|
+
Key: audioKey,
|
|
2069
|
+
Body: new Uint8Array(dubbedAudioBuffer),
|
|
2070
|
+
ContentType: "audio/mp4"
|
|
2071
|
+
}
|
|
2072
|
+
});
|
|
2073
|
+
await upload.done();
|
|
2074
|
+
const getObjectCommand = new GetObjectCommand({
|
|
2075
|
+
Bucket: s3Bucket,
|
|
2076
|
+
Key: audioKey
|
|
2077
|
+
});
|
|
2078
|
+
const presignedUrl = await getSignedUrl(s3Client, getObjectCommand, {
|
|
2079
|
+
expiresIn: 3600
|
|
2080
|
+
// 1 hour
|
|
2081
|
+
});
|
|
2082
|
+
console.warn(`\u2705 Audio uploaded successfully to: ${audioKey}`);
|
|
2083
|
+
console.warn(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
|
|
2084
|
+
return presignedUrl;
|
|
2085
|
+
}
|
|
2086
|
+
async function createAudioTrackOnMux(credentials, assetId, languageCode, presignedUrl) {
|
|
2087
|
+
"use step";
|
|
2088
|
+
const mux = new Mux3({
|
|
2089
|
+
tokenId: credentials.muxTokenId,
|
|
2090
|
+
tokenSecret: credentials.muxTokenSecret
|
|
2091
|
+
});
|
|
2092
|
+
const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(languageCode) || languageCode.toUpperCase();
|
|
2093
|
+
const trackName = `${languageName} (auto-dubbed)`;
|
|
2094
|
+
const trackResponse = await mux.video.assets.createTrack(assetId, {
|
|
2095
|
+
type: "audio",
|
|
2096
|
+
language_code: languageCode,
|
|
2097
|
+
name: trackName,
|
|
2098
|
+
url: presignedUrl
|
|
2099
|
+
});
|
|
2100
|
+
if (!trackResponse.id) {
|
|
2101
|
+
throw new Error("Failed to create audio track: no track ID returned from Mux");
|
|
2102
|
+
}
|
|
2103
|
+
return trackResponse.id;
|
|
2104
|
+
}
|
|
1823
2105
|
async function translateAudio(assetId, toLanguageCode, options = {}) {
|
|
2106
|
+
"use workflow";
|
|
1824
2107
|
const {
|
|
1825
2108
|
provider = "elevenlabs",
|
|
1826
2109
|
numSpeakers = 0,
|
|
1827
2110
|
// 0 = auto-detect
|
|
1828
|
-
muxTokenId,
|
|
1829
|
-
muxTokenSecret,
|
|
1830
2111
|
elevenLabsApiKey,
|
|
1831
2112
|
uploadToMux = true
|
|
1832
2113
|
} = options;
|
|
1833
2114
|
if (provider !== "elevenlabs") {
|
|
1834
2115
|
throw new Error("Only ElevenLabs provider is currently supported for audio translation");
|
|
1835
2116
|
}
|
|
1836
|
-
const
|
|
1837
|
-
const muxSecret = muxTokenSecret ?? env_default.MUX_TOKEN_SECRET;
|
|
2117
|
+
const credentials = await validateCredentials(options);
|
|
1838
2118
|
const elevenLabsKey = elevenLabsApiKey ?? env_default.ELEVENLABS_API_KEY;
|
|
1839
2119
|
const s3Endpoint = options.s3Endpoint ?? env_default.S3_ENDPOINT;
|
|
1840
2120
|
const s3Region = options.s3Region ?? env_default.S3_REGION ?? "auto";
|
|
1841
2121
|
const s3Bucket = options.s3Bucket ?? env_default.S3_BUCKET;
|
|
1842
2122
|
const s3AccessKeyId = options.s3AccessKeyId ?? env_default.S3_ACCESS_KEY_ID;
|
|
1843
2123
|
const s3SecretAccessKey = options.s3SecretAccessKey ?? env_default.S3_SECRET_ACCESS_KEY;
|
|
1844
|
-
if (!muxId || !muxSecret) {
|
|
1845
|
-
throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
|
|
1846
|
-
}
|
|
1847
2124
|
if (!elevenLabsKey) {
|
|
1848
2125
|
throw new Error("ElevenLabs API key is required. Provide elevenLabsApiKey in options or set ELEVENLABS_API_KEY environment variable.");
|
|
1849
2126
|
}
|
|
1850
2127
|
if (uploadToMux && (!s3Endpoint || !s3Bucket || !s3AccessKeyId || !s3SecretAccessKey)) {
|
|
1851
2128
|
throw new Error("S3 configuration is required for uploading to Mux. Provide s3Endpoint, s3Bucket, s3AccessKeyId, and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables.");
|
|
1852
2129
|
}
|
|
1853
|
-
const
|
|
1854
|
-
|
|
1855
|
-
tokenSecret: muxSecret
|
|
1856
|
-
});
|
|
1857
|
-
console.log(`\u{1F3AC} Fetching Mux asset: ${assetId}`);
|
|
1858
|
-
const { asset: initialAsset, playbackId, policy } = await getPlaybackIdForAsset(mux, assetId);
|
|
1859
|
-
const signingContext = resolveSigningContext(options);
|
|
2130
|
+
const { asset: initialAsset, playbackId, policy } = await getPlaybackIdForAsset(credentials, assetId);
|
|
2131
|
+
const signingContext = await resolveSigningContext(options);
|
|
1860
2132
|
if (policy === "signed" && !signingContext) {
|
|
1861
2133
|
throw new Error(
|
|
1862
2134
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
1863
2135
|
);
|
|
1864
2136
|
}
|
|
1865
|
-
console.log("\u{1F50D} Checking for audio-only static rendition...");
|
|
1866
2137
|
let currentAsset = initialAsset;
|
|
1867
2138
|
if (!hasReadyAudioStaticRendition(currentAsset)) {
|
|
1868
|
-
console.
|
|
2139
|
+
console.warn("\u274C No ready audio static rendition found. Requesting one now...");
|
|
1869
2140
|
currentAsset = await waitForAudioStaticRendition({
|
|
1870
2141
|
assetId,
|
|
1871
|
-
|
|
2142
|
+
credentials,
|
|
1872
2143
|
initialAsset: currentAsset
|
|
1873
2144
|
});
|
|
1874
2145
|
}
|
|
@@ -1882,58 +2153,44 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
|
|
|
1882
2153
|
if (policy === "signed" && signingContext) {
|
|
1883
2154
|
audioUrl = await signUrl(audioUrl, playbackId, signingContext, "video");
|
|
1884
2155
|
}
|
|
1885
|
-
console.
|
|
1886
|
-
|
|
2156
|
+
console.warn("\u{1F399}\uFE0F Fetching audio from Mux...");
|
|
2157
|
+
let audioBuffer;
|
|
2158
|
+
try {
|
|
2159
|
+
audioBuffer = await fetchAudioFromMux(audioUrl);
|
|
2160
|
+
} catch (error) {
|
|
2161
|
+
throw new Error(`Failed to fetch audio from Mux: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2162
|
+
}
|
|
2163
|
+
console.warn("\u{1F399}\uFE0F Creating dubbing job in ElevenLabs...");
|
|
2164
|
+
const elevenLabsLangCode = toISO639_3(toLanguageCode);
|
|
2165
|
+
console.warn(`\u{1F50D} Creating dubbing job for asset ${assetId} with language code: ${elevenLabsLangCode}`);
|
|
1887
2166
|
let dubbingId;
|
|
1888
2167
|
try {
|
|
1889
|
-
|
|
1890
|
-
|
|
1891
|
-
|
|
1892
|
-
|
|
1893
|
-
|
|
1894
|
-
|
|
1895
|
-
const audioFile = audioBlob;
|
|
1896
|
-
const formData = new FormData();
|
|
1897
|
-
formData.append("file", audioFile);
|
|
1898
|
-
formData.append("target_lang", toLanguageCode);
|
|
1899
|
-
formData.append("num_speakers", numSpeakers.toString());
|
|
1900
|
-
formData.append("name", `Mux Asset ${assetId} - auto to ${toLanguageCode}`);
|
|
1901
|
-
const dubbingResponse = await fetch("https://api.elevenlabs.io/v1/dubbing", {
|
|
1902
|
-
method: "POST",
|
|
1903
|
-
headers: {
|
|
1904
|
-
"xi-api-key": elevenLabsKey
|
|
1905
|
-
},
|
|
1906
|
-
body: formData
|
|
2168
|
+
dubbingId = await createElevenLabsDubbingJob({
|
|
2169
|
+
audioBuffer,
|
|
2170
|
+
assetId,
|
|
2171
|
+
elevenLabsLangCode,
|
|
2172
|
+
elevenLabsApiKey: elevenLabsKey,
|
|
2173
|
+
numSpeakers
|
|
1907
2174
|
});
|
|
1908
|
-
|
|
1909
|
-
throw new Error(`ElevenLabs API error: ${dubbingResponse.statusText}`);
|
|
1910
|
-
}
|
|
1911
|
-
const dubbingData = await dubbingResponse.json();
|
|
1912
|
-
dubbingId = dubbingData.dubbing_id;
|
|
1913
|
-
console.log(`\u2705 Dubbing job created: ${dubbingId}`);
|
|
1914
|
-
console.log(`\u23F1\uFE0F Expected duration: ${dubbingData.expected_duration_sec}s`);
|
|
2175
|
+
console.warn(`\u2705 Dubbing job created with ID: ${dubbingId}`);
|
|
1915
2176
|
} catch (error) {
|
|
1916
2177
|
throw new Error(`Failed to create ElevenLabs dubbing job: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1917
2178
|
}
|
|
1918
|
-
console.
|
|
2179
|
+
console.warn("\u23F3 Waiting for dubbing to complete...");
|
|
1919
2180
|
let dubbingStatus = "dubbing";
|
|
1920
2181
|
let pollAttempts = 0;
|
|
1921
2182
|
const maxPollAttempts = 180;
|
|
2183
|
+
let targetLanguages = [];
|
|
1922
2184
|
while (dubbingStatus === "dubbing" && pollAttempts < maxPollAttempts) {
|
|
1923
|
-
await
|
|
2185
|
+
await sleep(1e4);
|
|
1924
2186
|
pollAttempts++;
|
|
1925
2187
|
try {
|
|
1926
|
-
const
|
|
1927
|
-
|
|
1928
|
-
|
|
1929
|
-
}
|
|
2188
|
+
const statusResult = await checkElevenLabsDubbingStatus({
|
|
2189
|
+
dubbingId,
|
|
2190
|
+
elevenLabsApiKey: elevenLabsKey
|
|
1930
2191
|
});
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
}
|
|
1934
|
-
const statusData = await statusResponse.json();
|
|
1935
|
-
dubbingStatus = statusData.status;
|
|
1936
|
-
console.log(`\u{1F4CA} Status check ${pollAttempts}: ${dubbingStatus}`);
|
|
2192
|
+
dubbingStatus = statusResult.status;
|
|
2193
|
+
targetLanguages = statusResult.targetLanguages;
|
|
1937
2194
|
if (dubbingStatus === "failed") {
|
|
1938
2195
|
throw new Error("ElevenLabs dubbing job failed");
|
|
1939
2196
|
}
|
|
@@ -1944,89 +2201,77 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
|
|
|
1944
2201
|
if (dubbingStatus !== "dubbed") {
|
|
1945
2202
|
throw new Error(`Dubbing job timed out or failed. Final status: ${dubbingStatus}`);
|
|
1946
2203
|
}
|
|
1947
|
-
console.
|
|
2204
|
+
console.warn("\u2705 Dubbing completed successfully!");
|
|
1948
2205
|
if (!uploadToMux) {
|
|
2206
|
+
const targetLanguage2 = getLanguageCodePair(toLanguageCode);
|
|
1949
2207
|
return {
|
|
1950
2208
|
assetId,
|
|
1951
|
-
targetLanguageCode:
|
|
2209
|
+
targetLanguageCode: targetLanguage2.iso639_1,
|
|
2210
|
+
targetLanguage: targetLanguage2,
|
|
1952
2211
|
dubbingId
|
|
1953
2212
|
};
|
|
1954
2213
|
}
|
|
1955
|
-
console.
|
|
2214
|
+
console.warn("\u{1F4E5} Downloading dubbed audio from ElevenLabs...");
|
|
1956
2215
|
let dubbedAudioBuffer;
|
|
1957
2216
|
try {
|
|
1958
|
-
const
|
|
1959
|
-
|
|
1960
|
-
|
|
1961
|
-
|
|
1962
|
-
|
|
1963
|
-
|
|
1964
|
-
if (!
|
|
1965
|
-
|
|
2217
|
+
const requestedLangCode = toISO639_3(toLanguageCode);
|
|
2218
|
+
let downloadLangCode = targetLanguages.find(
|
|
2219
|
+
(lang) => lang === requestedLangCode
|
|
2220
|
+
) ?? targetLanguages.find(
|
|
2221
|
+
(lang) => lang.toLowerCase() === requestedLangCode.toLowerCase()
|
|
2222
|
+
);
|
|
2223
|
+
if (!downloadLangCode && targetLanguages.length > 0) {
|
|
2224
|
+
downloadLangCode = targetLanguages[0];
|
|
2225
|
+
console.warn(`\u26A0\uFE0F Requested language "${requestedLangCode}" not found in target_languages. Using "${downloadLangCode}" instead.`);
|
|
2226
|
+
}
|
|
2227
|
+
if (!downloadLangCode) {
|
|
2228
|
+
downloadLangCode = requestedLangCode;
|
|
2229
|
+
console.warn(`\u26A0\uFE0F No target_languages available from ElevenLabs status. Using requested language code: ${requestedLangCode}`);
|
|
1966
2230
|
}
|
|
1967
|
-
dubbedAudioBuffer = await
|
|
1968
|
-
|
|
2231
|
+
dubbedAudioBuffer = await downloadDubbedAudioFromElevenLabs({
|
|
2232
|
+
dubbingId,
|
|
2233
|
+
languageCode: downloadLangCode,
|
|
2234
|
+
elevenLabsApiKey: elevenLabsKey
|
|
2235
|
+
});
|
|
2236
|
+
console.warn("\u2705 Dubbed audio downloaded successfully!");
|
|
1969
2237
|
} catch (error) {
|
|
1970
2238
|
throw new Error(`Failed to download dubbed audio: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1971
2239
|
}
|
|
1972
|
-
console.
|
|
1973
|
-
const s3Client = new import_client_s3.S3Client({
|
|
1974
|
-
region: s3Region,
|
|
1975
|
-
endpoint: s3Endpoint,
|
|
1976
|
-
credentials: {
|
|
1977
|
-
accessKeyId: s3AccessKeyId,
|
|
1978
|
-
secretAccessKey: s3SecretAccessKey
|
|
1979
|
-
},
|
|
1980
|
-
forcePathStyle: true
|
|
1981
|
-
});
|
|
1982
|
-
const audioKey = `audio-translations/${assetId}/auto-to-${toLanguageCode}-${Date.now()}.m4a`;
|
|
2240
|
+
console.warn("\u{1F4E4} Uploading dubbed audio to S3-compatible storage...");
|
|
1983
2241
|
let presignedUrl;
|
|
1984
2242
|
try {
|
|
1985
|
-
|
|
1986
|
-
|
|
1987
|
-
|
|
1988
|
-
|
|
1989
|
-
|
|
1990
|
-
|
|
1991
|
-
|
|
1992
|
-
|
|
1993
|
-
|
|
1994
|
-
await upload.done();
|
|
1995
|
-
console.log(`\u2705 Audio uploaded successfully to: ${audioKey}`);
|
|
1996
|
-
const getObjectCommand = new import_client_s3.GetObjectCommand({
|
|
1997
|
-
Bucket: s3Bucket,
|
|
1998
|
-
Key: audioKey
|
|
1999
|
-
});
|
|
2000
|
-
presignedUrl = await (0, import_s3_request_presigner.getSignedUrl)(s3Client, getObjectCommand, {
|
|
2001
|
-
expiresIn: 3600
|
|
2002
|
-
// 1 hour
|
|
2243
|
+
presignedUrl = await uploadDubbedAudioToS3({
|
|
2244
|
+
dubbedAudioBuffer,
|
|
2245
|
+
assetId,
|
|
2246
|
+
toLanguageCode,
|
|
2247
|
+
s3Endpoint,
|
|
2248
|
+
s3Region,
|
|
2249
|
+
s3Bucket,
|
|
2250
|
+
s3AccessKeyId,
|
|
2251
|
+
s3SecretAccessKey
|
|
2003
2252
|
});
|
|
2004
|
-
console.log(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
|
|
2005
2253
|
} catch (error) {
|
|
2006
2254
|
throw new Error(`Failed to upload audio to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2007
2255
|
}
|
|
2008
|
-
console.
|
|
2256
|
+
console.warn("\u{1F4F9} Adding dubbed audio track to Mux asset...");
|
|
2009
2257
|
let uploadedTrackId;
|
|
2258
|
+
const muxLangCode = toISO639_1(toLanguageCode);
|
|
2010
2259
|
try {
|
|
2011
|
-
|
|
2260
|
+
uploadedTrackId = await createAudioTrackOnMux(credentials, assetId, muxLangCode, presignedUrl);
|
|
2261
|
+
const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(muxLangCode) || muxLangCode.toUpperCase();
|
|
2012
2262
|
const trackName = `${languageName} (auto-dubbed)`;
|
|
2013
|
-
|
|
2014
|
-
|
|
2015
|
-
language_code: toLanguageCode,
|
|
2016
|
-
name: trackName,
|
|
2017
|
-
url: presignedUrl
|
|
2018
|
-
});
|
|
2019
|
-
uploadedTrackId = trackResponse.id;
|
|
2020
|
-
console.log(`\u2705 Audio track added to Mux asset with ID: ${uploadedTrackId}`);
|
|
2021
|
-
console.log(`\u{1F3B5} Track name: "${trackName}"`);
|
|
2263
|
+
console.warn(`\u2705 Track added to Mux asset with ID: ${uploadedTrackId}`);
|
|
2264
|
+
console.warn(`\u{1F4CB} Track name: "${trackName}"`);
|
|
2022
2265
|
} catch (error) {
|
|
2023
2266
|
console.warn(`\u26A0\uFE0F Failed to add audio track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2024
|
-
console.
|
|
2025
|
-
console.
|
|
2267
|
+
console.warn("\u{1F517} You can manually add the track using this presigned URL:");
|
|
2268
|
+
console.warn(presignedUrl);
|
|
2026
2269
|
}
|
|
2270
|
+
const targetLanguage = getLanguageCodePair(toLanguageCode);
|
|
2027
2271
|
return {
|
|
2028
2272
|
assetId,
|
|
2029
|
-
targetLanguageCode:
|
|
2273
|
+
targetLanguageCode: targetLanguage.iso639_1,
|
|
2274
|
+
targetLanguage,
|
|
2030
2275
|
dubbingId,
|
|
2031
2276
|
uploadedTrackId,
|
|
2032
2277
|
presignedUrl
|
|
@@ -2034,43 +2279,149 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
|
|
|
2034
2279
|
}
|
|
2035
2280
|
|
|
2036
2281
|
// src/workflows/translate-captions.ts
|
|
2037
|
-
|
|
2038
|
-
|
|
2039
|
-
|
|
2040
|
-
var
|
|
2041
|
-
|
|
2042
|
-
var translationSchema = import_zod5.z.object({
|
|
2043
|
-
translation: import_zod5.z.string()
|
|
2282
|
+
import Mux4 from "@mux/mux-node";
|
|
2283
|
+
import { generateObject as generateObject4 } from "ai";
|
|
2284
|
+
import { z as z5 } from "zod";
|
|
2285
|
+
var translationSchema = z5.object({
|
|
2286
|
+
translation: z5.string()
|
|
2044
2287
|
});
|
|
2045
|
-
|
|
2288
|
+
async function fetchVttFromMux(vttUrl) {
|
|
2289
|
+
"use step";
|
|
2290
|
+
const vttResponse = await fetch(vttUrl);
|
|
2291
|
+
if (!vttResponse.ok) {
|
|
2292
|
+
throw new Error(`Failed to fetch VTT file: ${vttResponse.statusText}`);
|
|
2293
|
+
}
|
|
2294
|
+
return vttResponse.text();
|
|
2295
|
+
}
|
|
2296
|
+
async function translateVttWithAI({
|
|
2297
|
+
vttContent,
|
|
2298
|
+
fromLanguageCode,
|
|
2299
|
+
toLanguageCode,
|
|
2300
|
+
provider,
|
|
2301
|
+
modelId,
|
|
2302
|
+
credentials,
|
|
2303
|
+
abortSignal
|
|
2304
|
+
}) {
|
|
2305
|
+
"use step";
|
|
2306
|
+
const languageModel = createLanguageModelFromConfig(
|
|
2307
|
+
provider,
|
|
2308
|
+
modelId,
|
|
2309
|
+
credentials
|
|
2310
|
+
);
|
|
2311
|
+
const response = await generateObject4({
|
|
2312
|
+
model: languageModel,
|
|
2313
|
+
schema: translationSchema,
|
|
2314
|
+
abortSignal,
|
|
2315
|
+
messages: [
|
|
2316
|
+
{
|
|
2317
|
+
role: "user",
|
|
2318
|
+
content: `Translate the following VTT subtitle file from ${fromLanguageCode} to ${toLanguageCode}. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT.
|
|
2319
|
+
|
|
2320
|
+
${vttContent}`
|
|
2321
|
+
}
|
|
2322
|
+
]
|
|
2323
|
+
});
|
|
2324
|
+
return {
|
|
2325
|
+
translatedVtt: response.object.translation,
|
|
2326
|
+
usage: {
|
|
2327
|
+
inputTokens: response.usage.inputTokens,
|
|
2328
|
+
outputTokens: response.usage.outputTokens,
|
|
2329
|
+
totalTokens: response.usage.totalTokens,
|
|
2330
|
+
reasoningTokens: response.usage.reasoningTokens,
|
|
2331
|
+
cachedInputTokens: response.usage.cachedInputTokens
|
|
2332
|
+
}
|
|
2333
|
+
};
|
|
2334
|
+
}
|
|
2335
|
+
async function uploadVttToS3({
|
|
2336
|
+
translatedVtt,
|
|
2337
|
+
assetId,
|
|
2338
|
+
fromLanguageCode,
|
|
2339
|
+
toLanguageCode,
|
|
2340
|
+
s3Endpoint,
|
|
2341
|
+
s3Region,
|
|
2342
|
+
s3Bucket,
|
|
2343
|
+
s3AccessKeyId,
|
|
2344
|
+
s3SecretAccessKey
|
|
2345
|
+
}) {
|
|
2346
|
+
"use step";
|
|
2347
|
+
const { S3Client, GetObjectCommand } = await import("@aws-sdk/client-s3");
|
|
2348
|
+
const { Upload } = await import("@aws-sdk/lib-storage");
|
|
2349
|
+
const { getSignedUrl } = await import("@aws-sdk/s3-request-presigner");
|
|
2350
|
+
const s3Client = new S3Client({
|
|
2351
|
+
region: s3Region,
|
|
2352
|
+
endpoint: s3Endpoint,
|
|
2353
|
+
credentials: {
|
|
2354
|
+
accessKeyId: s3AccessKeyId,
|
|
2355
|
+
secretAccessKey: s3SecretAccessKey
|
|
2356
|
+
},
|
|
2357
|
+
forcePathStyle: true
|
|
2358
|
+
});
|
|
2359
|
+
const vttKey = `translations/${assetId}/${fromLanguageCode}-to-${toLanguageCode}-${Date.now()}.vtt`;
|
|
2360
|
+
const upload = new Upload({
|
|
2361
|
+
client: s3Client,
|
|
2362
|
+
params: {
|
|
2363
|
+
Bucket: s3Bucket,
|
|
2364
|
+
Key: vttKey,
|
|
2365
|
+
Body: translatedVtt,
|
|
2366
|
+
ContentType: "text/vtt"
|
|
2367
|
+
}
|
|
2368
|
+
});
|
|
2369
|
+
await upload.done();
|
|
2370
|
+
const getObjectCommand = new GetObjectCommand({
|
|
2371
|
+
Bucket: s3Bucket,
|
|
2372
|
+
Key: vttKey
|
|
2373
|
+
});
|
|
2374
|
+
const presignedUrl = await getSignedUrl(s3Client, getObjectCommand, {
|
|
2375
|
+
expiresIn: 3600
|
|
2376
|
+
// 1 hour
|
|
2377
|
+
});
|
|
2378
|
+
return presignedUrl;
|
|
2379
|
+
}
|
|
2380
|
+
async function createTextTrackOnMux(credentials, assetId, languageCode, trackName, presignedUrl) {
|
|
2381
|
+
"use step";
|
|
2382
|
+
const mux = new Mux4({
|
|
2383
|
+
tokenId: credentials.muxTokenId,
|
|
2384
|
+
tokenSecret: credentials.muxTokenSecret
|
|
2385
|
+
});
|
|
2386
|
+
const trackResponse = await mux.video.assets.createTrack(assetId, {
|
|
2387
|
+
type: "text",
|
|
2388
|
+
text_type: "subtitles",
|
|
2389
|
+
language_code: languageCode,
|
|
2390
|
+
name: trackName,
|
|
2391
|
+
url: presignedUrl
|
|
2392
|
+
});
|
|
2393
|
+
if (!trackResponse.id) {
|
|
2394
|
+
throw new Error("Failed to create text track: no track ID returned from Mux");
|
|
2395
|
+
}
|
|
2396
|
+
return trackResponse.id;
|
|
2397
|
+
}
|
|
2046
2398
|
async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, options) {
|
|
2399
|
+
"use workflow";
|
|
2047
2400
|
const {
|
|
2048
|
-
provider =
|
|
2401
|
+
provider = "openai",
|
|
2049
2402
|
model,
|
|
2050
2403
|
s3Endpoint: providedS3Endpoint,
|
|
2051
2404
|
s3Region: providedS3Region,
|
|
2052
2405
|
s3Bucket: providedS3Bucket,
|
|
2053
2406
|
s3AccessKeyId: providedS3AccessKeyId,
|
|
2054
2407
|
s3SecretAccessKey: providedS3SecretAccessKey,
|
|
2055
|
-
uploadToMux: uploadToMuxOption
|
|
2056
|
-
...clientConfig
|
|
2408
|
+
uploadToMux: uploadToMuxOption
|
|
2057
2409
|
} = options;
|
|
2058
|
-
const resolvedProvider = provider;
|
|
2059
2410
|
const s3Endpoint = providedS3Endpoint ?? env_default.S3_ENDPOINT;
|
|
2060
2411
|
const s3Region = providedS3Region ?? env_default.S3_REGION ?? "auto";
|
|
2061
2412
|
const s3Bucket = providedS3Bucket ?? env_default.S3_BUCKET;
|
|
2062
2413
|
const s3AccessKeyId = providedS3AccessKeyId ?? env_default.S3_ACCESS_KEY_ID;
|
|
2063
2414
|
const s3SecretAccessKey = providedS3SecretAccessKey ?? env_default.S3_SECRET_ACCESS_KEY;
|
|
2064
2415
|
const uploadToMux = uploadToMuxOption !== false;
|
|
2065
|
-
const
|
|
2066
|
-
{ ...
|
|
2067
|
-
|
|
2416
|
+
const config = await createWorkflowConfig(
|
|
2417
|
+
{ ...options, model },
|
|
2418
|
+
provider
|
|
2068
2419
|
);
|
|
2069
2420
|
if (uploadToMux && (!s3Endpoint || !s3Bucket || !s3AccessKeyId || !s3SecretAccessKey)) {
|
|
2070
2421
|
throw new Error("S3 configuration is required for uploading to Mux. Provide s3Endpoint, s3Bucket, s3AccessKeyId, and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables.");
|
|
2071
2422
|
}
|
|
2072
|
-
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(
|
|
2073
|
-
const signingContext = resolveSigningContext(options);
|
|
2423
|
+
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(config.credentials, assetId);
|
|
2424
|
+
const signingContext = await resolveSigningContext(options);
|
|
2074
2425
|
if (policy === "signed" && !signingContext) {
|
|
2075
2426
|
throw new Error(
|
|
2076
2427
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
@@ -2091,115 +2442,79 @@ async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, opti
|
|
|
2091
2442
|
}
|
|
2092
2443
|
let vttContent;
|
|
2093
2444
|
try {
|
|
2094
|
-
|
|
2095
|
-
if (!vttResponse.ok) {
|
|
2096
|
-
throw new Error(`Failed to fetch VTT file: ${vttResponse.statusText}`);
|
|
2097
|
-
}
|
|
2098
|
-
vttContent = await vttResponse.text();
|
|
2445
|
+
vttContent = await fetchVttFromMux(vttUrl);
|
|
2099
2446
|
} catch (error) {
|
|
2100
2447
|
throw new Error(`Failed to fetch VTT content: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2101
2448
|
}
|
|
2102
|
-
console.log(`\u2705 Found VTT content for language '${fromLanguageCode}'`);
|
|
2103
2449
|
let translatedVtt;
|
|
2450
|
+
let usage;
|
|
2104
2451
|
try {
|
|
2105
|
-
const
|
|
2106
|
-
|
|
2107
|
-
|
|
2108
|
-
|
|
2109
|
-
|
|
2110
|
-
|
|
2111
|
-
|
|
2112
|
-
|
|
2113
|
-
|
|
2114
|
-
${vttContent}`
|
|
2115
|
-
}
|
|
2116
|
-
]
|
|
2452
|
+
const result = await translateVttWithAI({
|
|
2453
|
+
vttContent,
|
|
2454
|
+
fromLanguageCode,
|
|
2455
|
+
toLanguageCode,
|
|
2456
|
+
provider: config.provider,
|
|
2457
|
+
modelId: config.modelId,
|
|
2458
|
+
credentials: config.credentials,
|
|
2459
|
+
abortSignal: options.abortSignal
|
|
2117
2460
|
});
|
|
2118
|
-
translatedVtt =
|
|
2461
|
+
translatedVtt = result.translatedVtt;
|
|
2462
|
+
usage = result.usage;
|
|
2119
2463
|
} catch (error) {
|
|
2120
|
-
throw new Error(`Failed to translate VTT with ${
|
|
2464
|
+
throw new Error(`Failed to translate VTT with ${config.provider}: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2121
2465
|
}
|
|
2122
|
-
|
|
2123
|
-
|
|
2466
|
+
const sourceLanguage = getLanguageCodePair(fromLanguageCode);
|
|
2467
|
+
const targetLanguage = getLanguageCodePair(toLanguageCode);
|
|
2124
2468
|
if (!uploadToMux) {
|
|
2125
|
-
console.log(`\u2705 VTT translated to ${toLanguageCode} successfully!`);
|
|
2126
2469
|
return {
|
|
2127
2470
|
assetId,
|
|
2128
2471
|
sourceLanguageCode: fromLanguageCode,
|
|
2129
2472
|
targetLanguageCode: toLanguageCode,
|
|
2473
|
+
sourceLanguage,
|
|
2474
|
+
targetLanguage,
|
|
2130
2475
|
originalVtt: vttContent,
|
|
2131
|
-
translatedVtt
|
|
2476
|
+
translatedVtt,
|
|
2477
|
+
usage
|
|
2132
2478
|
};
|
|
2133
2479
|
}
|
|
2134
|
-
console.log("\u{1F4E4} Uploading translated VTT to S3-compatible storage...");
|
|
2135
|
-
const s3Client = new import_client_s32.S3Client({
|
|
2136
|
-
region: s3Region,
|
|
2137
|
-
endpoint: s3Endpoint,
|
|
2138
|
-
credentials: {
|
|
2139
|
-
accessKeyId: s3AccessKeyId,
|
|
2140
|
-
secretAccessKey: s3SecretAccessKey
|
|
2141
|
-
},
|
|
2142
|
-
forcePathStyle: true
|
|
2143
|
-
// Often needed for non-AWS S3 services
|
|
2144
|
-
});
|
|
2145
|
-
const vttKey = `translations/${assetId}/${fromLanguageCode}-to-${toLanguageCode}-${Date.now()}.vtt`;
|
|
2146
2480
|
let presignedUrl;
|
|
2147
2481
|
try {
|
|
2148
|
-
|
|
2149
|
-
|
|
2150
|
-
|
|
2151
|
-
|
|
2152
|
-
|
|
2153
|
-
|
|
2154
|
-
|
|
2155
|
-
|
|
2156
|
-
|
|
2157
|
-
|
|
2158
|
-
console.log(`\u2705 VTT uploaded successfully to: ${vttKey}`);
|
|
2159
|
-
const getObjectCommand = new import_client_s32.GetObjectCommand({
|
|
2160
|
-
Bucket: s3Bucket,
|
|
2161
|
-
Key: vttKey
|
|
2162
|
-
});
|
|
2163
|
-
presignedUrl = await (0, import_s3_request_presigner2.getSignedUrl)(s3Client, getObjectCommand, {
|
|
2164
|
-
expiresIn: 3600
|
|
2165
|
-
// 1 hour
|
|
2482
|
+
presignedUrl = await uploadVttToS3({
|
|
2483
|
+
translatedVtt,
|
|
2484
|
+
assetId,
|
|
2485
|
+
fromLanguageCode,
|
|
2486
|
+
toLanguageCode,
|
|
2487
|
+
s3Endpoint,
|
|
2488
|
+
s3Region,
|
|
2489
|
+
s3Bucket,
|
|
2490
|
+
s3AccessKeyId,
|
|
2491
|
+
s3SecretAccessKey
|
|
2166
2492
|
});
|
|
2167
|
-
console.log(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
|
|
2168
2493
|
} catch (error) {
|
|
2169
2494
|
throw new Error(`Failed to upload VTT to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2170
2495
|
}
|
|
2171
|
-
console.log("\u{1F4F9} Adding translated track to Mux asset...");
|
|
2172
2496
|
let uploadedTrackId;
|
|
2173
2497
|
try {
|
|
2174
|
-
const languageName =
|
|
2498
|
+
const languageName = getLanguageName(toLanguageCode);
|
|
2175
2499
|
const trackName = `${languageName} (auto-translated)`;
|
|
2176
|
-
|
|
2177
|
-
type: "text",
|
|
2178
|
-
text_type: "subtitles",
|
|
2179
|
-
language_code: toLanguageCode,
|
|
2180
|
-
name: trackName,
|
|
2181
|
-
url: presignedUrl
|
|
2182
|
-
});
|
|
2183
|
-
uploadedTrackId = trackResponse.id;
|
|
2184
|
-
console.log(`\u2705 Track added to Mux asset with ID: ${uploadedTrackId}`);
|
|
2185
|
-
console.log(`\u{1F4CB} Track name: "${trackName}"`);
|
|
2500
|
+
uploadedTrackId = await createTextTrackOnMux(config.credentials, assetId, toLanguageCode, trackName, presignedUrl);
|
|
2186
2501
|
} catch (error) {
|
|
2187
|
-
console.warn(
|
|
2188
|
-
console.log("\u{1F517} You can manually add the track using this presigned URL:");
|
|
2189
|
-
console.log(presignedUrl);
|
|
2502
|
+
console.warn(`Failed to add track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2190
2503
|
}
|
|
2191
2504
|
return {
|
|
2192
2505
|
assetId,
|
|
2193
2506
|
sourceLanguageCode: fromLanguageCode,
|
|
2194
2507
|
targetLanguageCode: toLanguageCode,
|
|
2508
|
+
sourceLanguage,
|
|
2509
|
+
targetLanguage,
|
|
2195
2510
|
originalVtt: vttContent,
|
|
2196
2511
|
translatedVtt,
|
|
2197
2512
|
uploadedTrackId,
|
|
2198
|
-
presignedUrl
|
|
2513
|
+
presignedUrl,
|
|
2514
|
+
usage
|
|
2199
2515
|
};
|
|
2200
2516
|
}
|
|
2201
|
-
|
|
2202
|
-
0 && (module.exports = {
|
|
2517
|
+
export {
|
|
2203
2518
|
SUMMARY_KEYWORD_LIMIT,
|
|
2204
2519
|
burnedInCaptionsSchema,
|
|
2205
2520
|
chapterSchema,
|
|
@@ -2213,5 +2528,5 @@ ${vttContent}`
|
|
|
2213
2528
|
translateAudio,
|
|
2214
2529
|
translateCaptions,
|
|
2215
2530
|
translationSchema
|
|
2216
|
-
}
|
|
2531
|
+
};
|
|
2217
2532
|
//# sourceMappingURL=index.js.map
|