@mux/ai 0.1.6 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +346 -86
- package/dist/{index-DyTSka2R.d.ts → index-BcNDGOI6.d.ts} +12 -24
- package/dist/{index-Bnv7tv90.d.ts → index-D3fZHu0h.d.ts} +124 -13
- package/dist/index.d.ts +3 -3
- package/dist/index.js +989 -669
- package/dist/index.js.map +1 -1
- package/dist/primitives/index.d.ts +2 -2
- package/dist/primitives/index.js +37 -79
- package/dist/primitives/index.js.map +1 -1
- package/dist/{types-ktXDZ93V.d.mts → types-DzOQNn9R.d.ts} +3 -25
- package/dist/workflows/index.d.ts +2 -2
- package/dist/workflows/index.js +987 -683
- package/dist/workflows/index.js.map +1 -1
- package/package.json +34 -36
- package/dist/index-BNnz9P_5.d.mts +0 -144
- package/dist/index-vJ5r2FNm.d.mts +0 -477
- package/dist/index.d.mts +0 -13
- package/dist/index.mjs +0 -2205
- package/dist/index.mjs.map +0 -1
- package/dist/primitives/index.d.mts +0 -3
- package/dist/primitives/index.mjs +0 -358
- package/dist/primitives/index.mjs.map +0 -1
- package/dist/types-ktXDZ93V.d.ts +0 -137
- package/dist/workflows/index.d.mts +0 -8
- package/dist/workflows/index.mjs +0 -2168
- package/dist/workflows/index.mjs.map +0 -1
package/dist/workflows/index.js
CHANGED
|
@@ -1,194 +1,25 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __create = Object.create;
|
|
3
|
-
var __defProp = Object.defineProperty;
|
|
4
|
-
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
-
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
-
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
-
var __export = (target, all) => {
|
|
9
|
-
for (var name in all)
|
|
10
|
-
__defProp(target, name, { get: all[name], enumerable: true });
|
|
11
|
-
};
|
|
12
|
-
var __copyProps = (to, from, except, desc) => {
|
|
13
|
-
if (from && typeof from === "object" || typeof from === "function") {
|
|
14
|
-
for (let key of __getOwnPropNames(from))
|
|
15
|
-
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
16
|
-
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
17
|
-
}
|
|
18
|
-
return to;
|
|
19
|
-
};
|
|
20
|
-
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
-
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
-
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
-
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
-
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
-
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
-
mod
|
|
27
|
-
));
|
|
28
|
-
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
|
-
|
|
30
|
-
// src/workflows/index.ts
|
|
31
|
-
var workflows_exports = {};
|
|
32
|
-
__export(workflows_exports, {
|
|
33
|
-
SUMMARY_KEYWORD_LIMIT: () => SUMMARY_KEYWORD_LIMIT,
|
|
34
|
-
burnedInCaptionsSchema: () => burnedInCaptionsSchema,
|
|
35
|
-
chapterSchema: () => chapterSchema,
|
|
36
|
-
chaptersSchema: () => chaptersSchema,
|
|
37
|
-
generateChapters: () => generateChapters,
|
|
38
|
-
generateVideoEmbeddings: () => generateVideoEmbeddings,
|
|
39
|
-
getModerationScores: () => getModerationScores,
|
|
40
|
-
getSummaryAndTags: () => getSummaryAndTags,
|
|
41
|
-
hasBurnedInCaptions: () => hasBurnedInCaptions,
|
|
42
|
-
summarySchema: () => summarySchema,
|
|
43
|
-
translateAudio: () => translateAudio,
|
|
44
|
-
translateCaptions: () => translateCaptions,
|
|
45
|
-
translationSchema: () => translationSchema
|
|
46
|
-
});
|
|
47
|
-
module.exports = __toCommonJS(workflows_exports);
|
|
48
|
-
|
|
49
1
|
// src/workflows/burned-in-captions.ts
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
function ownKeys(object, enumerableOnly) {
|
|
54
|
-
var keys = Object.keys(object);
|
|
55
|
-
if (Object.getOwnPropertySymbols) {
|
|
56
|
-
var symbols = Object.getOwnPropertySymbols(object);
|
|
57
|
-
enumerableOnly && (symbols = symbols.filter(function(sym) {
|
|
58
|
-
return Object.getOwnPropertyDescriptor(object, sym).enumerable;
|
|
59
|
-
})), keys.push.apply(keys, symbols);
|
|
60
|
-
}
|
|
61
|
-
return keys;
|
|
62
|
-
}
|
|
63
|
-
function _objectSpread(target) {
|
|
64
|
-
for (var i = 1; i < arguments.length; i++) {
|
|
65
|
-
var source = null != arguments[i] ? arguments[i] : {};
|
|
66
|
-
i % 2 ? ownKeys(Object(source), true).forEach(function(key) {
|
|
67
|
-
_defineProperty(target, key, source[key]);
|
|
68
|
-
}) : Object.getOwnPropertyDescriptors ? Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)) : ownKeys(Object(source)).forEach(function(key) {
|
|
69
|
-
Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key));
|
|
70
|
-
});
|
|
71
|
-
}
|
|
72
|
-
return target;
|
|
73
|
-
}
|
|
74
|
-
function _defineProperty(obj, key, value) {
|
|
75
|
-
key = _toPropertyKey(key);
|
|
76
|
-
if (key in obj) {
|
|
77
|
-
Object.defineProperty(obj, key, { value, enumerable: true, configurable: true, writable: true });
|
|
78
|
-
} else {
|
|
79
|
-
obj[key] = value;
|
|
80
|
-
}
|
|
81
|
-
return obj;
|
|
82
|
-
}
|
|
83
|
-
function _toPropertyKey(arg) {
|
|
84
|
-
var key = _toPrimitive(arg, "string");
|
|
85
|
-
return typeof key === "symbol" ? key : String(key);
|
|
86
|
-
}
|
|
87
|
-
function _toPrimitive(input, hint) {
|
|
88
|
-
if (typeof input !== "object" || input === null) return input;
|
|
89
|
-
var prim = input[Symbol.toPrimitive];
|
|
90
|
-
if (prim !== void 0) {
|
|
91
|
-
var res = prim.call(input, hint || "default");
|
|
92
|
-
if (typeof res !== "object") return res;
|
|
93
|
-
throw new TypeError("@@toPrimitive must return a primitive value.");
|
|
94
|
-
}
|
|
95
|
-
return (hint === "string" ? String : Number)(input);
|
|
96
|
-
}
|
|
97
|
-
var dedent = createDedent({});
|
|
98
|
-
var dedent_default = dedent;
|
|
99
|
-
function createDedent(options) {
|
|
100
|
-
dedent2.withOptions = (newOptions) => createDedent(_objectSpread(_objectSpread({}, options), newOptions));
|
|
101
|
-
return dedent2;
|
|
102
|
-
function dedent2(strings, ...values) {
|
|
103
|
-
const raw = typeof strings === "string" ? [strings] : strings.raw;
|
|
104
|
-
const {
|
|
105
|
-
alignValues = false,
|
|
106
|
-
escapeSpecialCharacters = Array.isArray(strings),
|
|
107
|
-
trimWhitespace = true
|
|
108
|
-
} = options;
|
|
109
|
-
let result = "";
|
|
110
|
-
for (let i = 0; i < raw.length; i++) {
|
|
111
|
-
let next = raw[i];
|
|
112
|
-
if (escapeSpecialCharacters) {
|
|
113
|
-
next = next.replace(/\\\n[ \t]*/g, "").replace(/\\`/g, "`").replace(/\\\$/g, "$").replace(/\\\{/g, "{");
|
|
114
|
-
}
|
|
115
|
-
result += next;
|
|
116
|
-
if (i < values.length) {
|
|
117
|
-
const value = alignValues ? alignValue(values[i], result) : values[i];
|
|
118
|
-
result += value;
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
const lines = result.split("\n");
|
|
122
|
-
let mindent = null;
|
|
123
|
-
for (const l of lines) {
|
|
124
|
-
const m = l.match(/^(\s+)\S+/);
|
|
125
|
-
if (m) {
|
|
126
|
-
const indent = m[1].length;
|
|
127
|
-
if (!mindent) {
|
|
128
|
-
mindent = indent;
|
|
129
|
-
} else {
|
|
130
|
-
mindent = Math.min(mindent, indent);
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
if (mindent !== null) {
|
|
135
|
-
const m = mindent;
|
|
136
|
-
result = lines.map((l) => l[0] === " " || l[0] === " " ? l.slice(m) : l).join("\n");
|
|
137
|
-
}
|
|
138
|
-
if (trimWhitespace) {
|
|
139
|
-
result = result.trim();
|
|
140
|
-
}
|
|
141
|
-
if (escapeSpecialCharacters) {
|
|
142
|
-
result = result.replace(/\\n/g, "\n");
|
|
143
|
-
}
|
|
144
|
-
return result;
|
|
145
|
-
}
|
|
146
|
-
}
|
|
147
|
-
function alignValue(value, precedingText) {
|
|
148
|
-
if (typeof value !== "string" || !value.includes("\n")) {
|
|
149
|
-
return value;
|
|
150
|
-
}
|
|
151
|
-
const currentLine = precedingText.slice(precedingText.lastIndexOf("\n") + 1);
|
|
152
|
-
const indentMatch = currentLine.match(/^(\s+)/);
|
|
153
|
-
if (indentMatch) {
|
|
154
|
-
const indent = indentMatch[1];
|
|
155
|
-
return value.replace(/\n/g, `
|
|
156
|
-
${indent}`);
|
|
157
|
-
}
|
|
158
|
-
return value;
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
// src/workflows/burned-in-captions.ts
|
|
162
|
-
var import_zod2 = require("zod");
|
|
163
|
-
|
|
164
|
-
// src/lib/client-factory.ts
|
|
165
|
-
var import_mux_node = __toESM(require("@mux/mux-node"));
|
|
2
|
+
import { generateObject } from "ai";
|
|
3
|
+
import dedent from "dedent";
|
|
4
|
+
import { z as z2 } from "zod";
|
|
166
5
|
|
|
167
6
|
// src/env.ts
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
var import_dotenv_expand = require("dotenv-expand");
|
|
171
|
-
var import_zod = require("zod");
|
|
172
|
-
(0, import_dotenv_expand.expand)((0, import_dotenv.config)({
|
|
173
|
-
path: import_node_path.default.resolve(
|
|
174
|
-
process.cwd(),
|
|
175
|
-
process.env.NODE_ENV === "test" ? ".env.test" : ".env"
|
|
176
|
-
)
|
|
177
|
-
}));
|
|
7
|
+
import { z } from "zod";
|
|
8
|
+
import "dotenv/config";
|
|
178
9
|
function optionalString(description, message) {
|
|
179
|
-
return
|
|
10
|
+
return z.preprocess(
|
|
180
11
|
(value) => typeof value === "string" && value.trim().length === 0 ? void 0 : value,
|
|
181
|
-
|
|
12
|
+
z.string().trim().min(1, message).optional()
|
|
182
13
|
).describe(description);
|
|
183
14
|
}
|
|
184
15
|
function requiredString(description, message) {
|
|
185
|
-
return
|
|
16
|
+
return z.preprocess(
|
|
186
17
|
(value) => typeof value === "string" ? value.trim().length > 0 ? value.trim() : void 0 : value,
|
|
187
|
-
|
|
18
|
+
z.string().trim().min(1, message)
|
|
188
19
|
).describe(description);
|
|
189
20
|
}
|
|
190
|
-
var EnvSchema =
|
|
191
|
-
NODE_ENV:
|
|
21
|
+
var EnvSchema = z.object({
|
|
22
|
+
NODE_ENV: z.string().default("development").describe("Runtime environment."),
|
|
192
23
|
MUX_TOKEN_ID: requiredString("Mux access token ID.", "Required to access Mux APIs"),
|
|
193
24
|
MUX_TOKEN_SECRET: requiredString("Mux access token secret.", "Required to access Mux APIs"),
|
|
194
25
|
MUX_SIGNING_KEY: optionalString("Mux signing key ID for signed playback URLs.", "Used to sign playback URLs"),
|
|
@@ -217,12 +48,12 @@ var env = parseEnv();
|
|
|
217
48
|
var env_default = env;
|
|
218
49
|
|
|
219
50
|
// src/lib/providers.ts
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
51
|
+
import { createAnthropic } from "@ai-sdk/anthropic";
|
|
52
|
+
import { createGoogleGenerativeAI } from "@ai-sdk/google";
|
|
53
|
+
import { createOpenAI } from "@ai-sdk/openai";
|
|
223
54
|
var DEFAULT_LANGUAGE_MODELS = {
|
|
224
|
-
openai: "gpt-5
|
|
225
|
-
anthropic: "claude-
|
|
55
|
+
openai: "gpt-5.1",
|
|
56
|
+
anthropic: "claude-sonnet-4-5",
|
|
226
57
|
google: "gemini-2.5-flash"
|
|
227
58
|
};
|
|
228
59
|
var DEFAULT_EMBEDDING_MODELS = {
|
|
@@ -235,14 +66,60 @@ function requireEnv(value, name) {
|
|
|
235
66
|
}
|
|
236
67
|
return value;
|
|
237
68
|
}
|
|
69
|
+
function createLanguageModelFromConfig(provider, modelId) {
|
|
70
|
+
switch (provider) {
|
|
71
|
+
case "openai": {
|
|
72
|
+
const apiKey = env_default.OPENAI_API_KEY;
|
|
73
|
+
requireEnv(apiKey, "OPENAI_API_KEY");
|
|
74
|
+
const openai = createOpenAI({ apiKey });
|
|
75
|
+
return openai(modelId);
|
|
76
|
+
}
|
|
77
|
+
case "anthropic": {
|
|
78
|
+
const apiKey = env_default.ANTHROPIC_API_KEY;
|
|
79
|
+
requireEnv(apiKey, "ANTHROPIC_API_KEY");
|
|
80
|
+
const anthropic = createAnthropic({ apiKey });
|
|
81
|
+
return anthropic(modelId);
|
|
82
|
+
}
|
|
83
|
+
case "google": {
|
|
84
|
+
const apiKey = env_default.GOOGLE_GENERATIVE_AI_API_KEY;
|
|
85
|
+
requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
|
|
86
|
+
const google = createGoogleGenerativeAI({ apiKey });
|
|
87
|
+
return google(modelId);
|
|
88
|
+
}
|
|
89
|
+
default: {
|
|
90
|
+
const exhaustiveCheck = provider;
|
|
91
|
+
throw new Error(`Unsupported provider: ${exhaustiveCheck}`);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
function createEmbeddingModelFromConfig(provider, modelId) {
|
|
96
|
+
switch (provider) {
|
|
97
|
+
case "openai": {
|
|
98
|
+
const apiKey = env_default.OPENAI_API_KEY;
|
|
99
|
+
requireEnv(apiKey, "OPENAI_API_KEY");
|
|
100
|
+
const openai = createOpenAI({ apiKey });
|
|
101
|
+
return openai.embedding(modelId);
|
|
102
|
+
}
|
|
103
|
+
case "google": {
|
|
104
|
+
const apiKey = env_default.GOOGLE_GENERATIVE_AI_API_KEY;
|
|
105
|
+
requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
|
|
106
|
+
const google = createGoogleGenerativeAI({ apiKey });
|
|
107
|
+
return google.textEmbeddingModel(modelId);
|
|
108
|
+
}
|
|
109
|
+
default: {
|
|
110
|
+
const exhaustiveCheck = provider;
|
|
111
|
+
throw new Error(`Unsupported embedding provider: ${exhaustiveCheck}`);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
238
115
|
function resolveLanguageModel(options = {}) {
|
|
239
116
|
const provider = options.provider || "openai";
|
|
240
117
|
const modelId = options.model || DEFAULT_LANGUAGE_MODELS[provider];
|
|
241
118
|
switch (provider) {
|
|
242
119
|
case "openai": {
|
|
243
|
-
const apiKey =
|
|
120
|
+
const apiKey = env_default.OPENAI_API_KEY;
|
|
244
121
|
requireEnv(apiKey, "OPENAI_API_KEY");
|
|
245
|
-
const openai =
|
|
122
|
+
const openai = createOpenAI({
|
|
246
123
|
apiKey
|
|
247
124
|
});
|
|
248
125
|
return {
|
|
@@ -252,9 +129,9 @@ function resolveLanguageModel(options = {}) {
|
|
|
252
129
|
};
|
|
253
130
|
}
|
|
254
131
|
case "anthropic": {
|
|
255
|
-
const apiKey =
|
|
132
|
+
const apiKey = env_default.ANTHROPIC_API_KEY;
|
|
256
133
|
requireEnv(apiKey, "ANTHROPIC_API_KEY");
|
|
257
|
-
const anthropic =
|
|
134
|
+
const anthropic = createAnthropic({
|
|
258
135
|
apiKey
|
|
259
136
|
});
|
|
260
137
|
return {
|
|
@@ -264,9 +141,9 @@ function resolveLanguageModel(options = {}) {
|
|
|
264
141
|
};
|
|
265
142
|
}
|
|
266
143
|
case "google": {
|
|
267
|
-
const apiKey =
|
|
144
|
+
const apiKey = env_default.GOOGLE_GENERATIVE_AI_API_KEY;
|
|
268
145
|
requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
|
|
269
|
-
const google =
|
|
146
|
+
const google = createGoogleGenerativeAI({
|
|
270
147
|
apiKey
|
|
271
148
|
});
|
|
272
149
|
return {
|
|
@@ -286,9 +163,9 @@ function resolveEmbeddingModel(options = {}) {
|
|
|
286
163
|
const modelId = options.model || DEFAULT_EMBEDDING_MODELS[provider];
|
|
287
164
|
switch (provider) {
|
|
288
165
|
case "openai": {
|
|
289
|
-
const apiKey =
|
|
166
|
+
const apiKey = env_default.OPENAI_API_KEY;
|
|
290
167
|
requireEnv(apiKey, "OPENAI_API_KEY");
|
|
291
|
-
const openai =
|
|
168
|
+
const openai = createOpenAI({
|
|
292
169
|
apiKey
|
|
293
170
|
});
|
|
294
171
|
return {
|
|
@@ -298,9 +175,9 @@ function resolveEmbeddingModel(options = {}) {
|
|
|
298
175
|
};
|
|
299
176
|
}
|
|
300
177
|
case "google": {
|
|
301
|
-
const apiKey =
|
|
178
|
+
const apiKey = env_default.GOOGLE_GENERATIVE_AI_API_KEY;
|
|
302
179
|
requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
|
|
303
|
-
const google =
|
|
180
|
+
const google = createGoogleGenerativeAI({
|
|
304
181
|
apiKey
|
|
305
182
|
});
|
|
306
183
|
return {
|
|
@@ -317,12 +194,45 @@ function resolveEmbeddingModel(options = {}) {
|
|
|
317
194
|
}
|
|
318
195
|
|
|
319
196
|
// src/lib/client-factory.ts
|
|
320
|
-
function
|
|
321
|
-
const muxTokenId =
|
|
322
|
-
const muxTokenSecret =
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
197
|
+
function getMuxCredentialsFromEnv() {
|
|
198
|
+
const muxTokenId = env_default.MUX_TOKEN_ID;
|
|
199
|
+
const muxTokenSecret = env_default.MUX_TOKEN_SECRET;
|
|
200
|
+
if (!muxTokenId || !muxTokenSecret) {
|
|
201
|
+
throw new Error(
|
|
202
|
+
"Mux credentials are required. Set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables."
|
|
203
|
+
);
|
|
204
|
+
}
|
|
205
|
+
return { muxTokenId, muxTokenSecret };
|
|
206
|
+
}
|
|
207
|
+
function getApiKeyFromEnv(provider) {
|
|
208
|
+
const envVarMap = {
|
|
209
|
+
openai: env_default.OPENAI_API_KEY,
|
|
210
|
+
anthropic: env_default.ANTHROPIC_API_KEY,
|
|
211
|
+
google: env_default.GOOGLE_GENERATIVE_AI_API_KEY,
|
|
212
|
+
hive: env_default.HIVE_API_KEY,
|
|
213
|
+
elevenlabs: env_default.ELEVENLABS_API_KEY
|
|
214
|
+
};
|
|
215
|
+
const apiKey = envVarMap[provider];
|
|
216
|
+
if (!apiKey) {
|
|
217
|
+
const envVarNames = {
|
|
218
|
+
openai: "OPENAI_API_KEY",
|
|
219
|
+
anthropic: "ANTHROPIC_API_KEY",
|
|
220
|
+
google: "GOOGLE_GENERATIVE_AI_API_KEY",
|
|
221
|
+
hive: "HIVE_API_KEY",
|
|
222
|
+
elevenlabs: "ELEVENLABS_API_KEY"
|
|
223
|
+
};
|
|
224
|
+
throw new Error(
|
|
225
|
+
`${provider} API key is required. Set ${envVarNames[provider]} environment variable.`
|
|
226
|
+
);
|
|
227
|
+
}
|
|
228
|
+
return apiKey;
|
|
229
|
+
}
|
|
230
|
+
async function validateCredentials(requiredProvider) {
|
|
231
|
+
const muxTokenId = env_default.MUX_TOKEN_ID;
|
|
232
|
+
const muxTokenSecret = env_default.MUX_TOKEN_SECRET;
|
|
233
|
+
const openaiApiKey = env_default.OPENAI_API_KEY;
|
|
234
|
+
const anthropicApiKey = env_default.ANTHROPIC_API_KEY;
|
|
235
|
+
const googleApiKey = env_default.GOOGLE_GENERATIVE_AI_API_KEY;
|
|
326
236
|
if (!muxTokenId || !muxTokenSecret) {
|
|
327
237
|
throw new Error(
|
|
328
238
|
"Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables."
|
|
@@ -351,32 +261,23 @@ function validateCredentials(options, requiredProvider) {
|
|
|
351
261
|
googleApiKey
|
|
352
262
|
};
|
|
353
263
|
}
|
|
354
|
-
function
|
|
355
|
-
if (!credentials.muxTokenId || !credentials.muxTokenSecret) {
|
|
356
|
-
throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
|
|
357
|
-
}
|
|
358
|
-
return new import_mux_node.default({
|
|
359
|
-
tokenId: credentials.muxTokenId,
|
|
360
|
-
tokenSecret: credentials.muxTokenSecret
|
|
361
|
-
});
|
|
362
|
-
}
|
|
363
|
-
function createWorkflowClients(options, provider) {
|
|
264
|
+
async function createWorkflowConfig(options, provider) {
|
|
364
265
|
const providerToUse = provider || options.provider || "openai";
|
|
365
|
-
const credentials = validateCredentials(
|
|
366
|
-
const
|
|
266
|
+
const credentials = await validateCredentials(providerToUse);
|
|
267
|
+
const resolved = resolveLanguageModel({
|
|
367
268
|
...options,
|
|
368
269
|
provider: providerToUse
|
|
369
270
|
});
|
|
370
271
|
return {
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
272
|
+
credentials,
|
|
273
|
+
provider: resolved.provider,
|
|
274
|
+
modelId: resolved.modelId
|
|
374
275
|
};
|
|
375
276
|
}
|
|
376
277
|
|
|
377
278
|
// src/lib/image-download.ts
|
|
378
|
-
|
|
379
|
-
|
|
279
|
+
import { Buffer } from "buffer";
|
|
280
|
+
import pRetry, { AbortError } from "p-retry";
|
|
380
281
|
var DEFAULT_OPTIONS = {
|
|
381
282
|
timeout: 1e4,
|
|
382
283
|
retries: 3,
|
|
@@ -385,9 +286,10 @@ var DEFAULT_OPTIONS = {
|
|
|
385
286
|
exponentialBackoff: true
|
|
386
287
|
};
|
|
387
288
|
async function downloadImageAsBase64(url, options = {}) {
|
|
289
|
+
"use step";
|
|
388
290
|
const opts = { ...DEFAULT_OPTIONS, ...options };
|
|
389
291
|
let attemptCount = 0;
|
|
390
|
-
return (
|
|
292
|
+
return pRetry(
|
|
391
293
|
async () => {
|
|
392
294
|
attemptCount++;
|
|
393
295
|
const controller = new AbortController();
|
|
@@ -402,18 +304,18 @@ async function downloadImageAsBase64(url, options = {}) {
|
|
|
402
304
|
clearTimeout(timeoutId);
|
|
403
305
|
if (!response.ok) {
|
|
404
306
|
if (response.status >= 400 && response.status < 500 && response.status !== 429) {
|
|
405
|
-
throw new
|
|
307
|
+
throw new AbortError(`HTTP ${response.status}: ${response.statusText}`);
|
|
406
308
|
}
|
|
407
309
|
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
408
310
|
}
|
|
409
311
|
const contentType = response.headers.get("content-type");
|
|
410
312
|
if (!contentType?.startsWith("image/")) {
|
|
411
|
-
throw new
|
|
313
|
+
throw new AbortError(`Invalid content type: ${contentType}. Expected image/*`);
|
|
412
314
|
}
|
|
413
315
|
const arrayBuffer = await response.arrayBuffer();
|
|
414
|
-
const buffer =
|
|
316
|
+
const buffer = Buffer.from(arrayBuffer);
|
|
415
317
|
if (buffer.length === 0) {
|
|
416
|
-
throw new
|
|
318
|
+
throw new AbortError("Downloaded image is empty");
|
|
417
319
|
}
|
|
418
320
|
const base64Data = `data:${contentType};base64,${buffer.toString("base64")}`;
|
|
419
321
|
return {
|
|
@@ -426,7 +328,7 @@ async function downloadImageAsBase64(url, options = {}) {
|
|
|
426
328
|
};
|
|
427
329
|
} catch (error) {
|
|
428
330
|
clearTimeout(timeoutId);
|
|
429
|
-
if (error instanceof
|
|
331
|
+
if (error instanceof AbortError) {
|
|
430
332
|
throw error;
|
|
431
333
|
}
|
|
432
334
|
if (error instanceof Error) {
|
|
@@ -455,6 +357,7 @@ async function downloadImageAsBase64(url, options = {}) {
|
|
|
455
357
|
);
|
|
456
358
|
}
|
|
457
359
|
async function downloadImagesAsBase64(urls, options = {}, maxConcurrent = 5) {
|
|
360
|
+
"use step";
|
|
458
361
|
const results = [];
|
|
459
362
|
for (let i = 0; i < urls.length; i += maxConcurrent) {
|
|
460
363
|
const batch = urls.slice(i, i + maxConcurrent);
|
|
@@ -466,6 +369,7 @@ async function downloadImagesAsBase64(urls, options = {}, maxConcurrent = 5) {
|
|
|
466
369
|
}
|
|
467
370
|
|
|
468
371
|
// src/lib/mux-assets.ts
|
|
372
|
+
import Mux from "@mux/mux-node";
|
|
469
373
|
function getPlaybackId(asset) {
|
|
470
374
|
const playbackIds = asset.playback_ids || [];
|
|
471
375
|
const publicPlaybackId = playbackIds.find((pid) => pid.policy === "public");
|
|
@@ -480,7 +384,13 @@ function getPlaybackId(asset) {
|
|
|
480
384
|
"No public or signed playback ID found for this asset. A public or signed playback ID is required. DRM playback IDs are not currently supported."
|
|
481
385
|
);
|
|
482
386
|
}
|
|
483
|
-
async function getPlaybackIdForAsset(
|
|
387
|
+
async function getPlaybackIdForAsset(assetId) {
|
|
388
|
+
"use step";
|
|
389
|
+
const { muxTokenId, muxTokenSecret } = getMuxCredentialsFromEnv();
|
|
390
|
+
const mux = new Mux({
|
|
391
|
+
tokenId: muxTokenId,
|
|
392
|
+
tokenSecret: muxTokenSecret
|
|
393
|
+
});
|
|
484
394
|
const asset = await mux.video.assets.retrieve(assetId);
|
|
485
395
|
const { id: playbackId, policy } = getPlaybackId(asset);
|
|
486
396
|
return { asset, playbackId, policy };
|
|
@@ -519,8 +429,8 @@ function resolveSection(defaultSection, override) {
|
|
|
519
429
|
}
|
|
520
430
|
return override;
|
|
521
431
|
}
|
|
522
|
-
function createPromptBuilder(
|
|
523
|
-
const { template, sectionOrder } =
|
|
432
|
+
function createPromptBuilder(config) {
|
|
433
|
+
const { template, sectionOrder } = config;
|
|
524
434
|
const getSection = (section, override) => {
|
|
525
435
|
const resolved = resolveSection(template[section], override);
|
|
526
436
|
return renderSection(resolved);
|
|
@@ -561,17 +471,17 @@ function createToneSection(instruction) {
|
|
|
561
471
|
}
|
|
562
472
|
|
|
563
473
|
// src/lib/url-signing.ts
|
|
564
|
-
|
|
565
|
-
function
|
|
566
|
-
const keyId =
|
|
567
|
-
const keySecret =
|
|
474
|
+
import Mux2 from "@mux/mux-node";
|
|
475
|
+
function getMuxSigningContextFromEnv() {
|
|
476
|
+
const keyId = env_default.MUX_SIGNING_KEY;
|
|
477
|
+
const keySecret = env_default.MUX_PRIVATE_KEY;
|
|
568
478
|
if (!keyId || !keySecret) {
|
|
569
479
|
return void 0;
|
|
570
480
|
}
|
|
571
481
|
return { keyId, keySecret };
|
|
572
482
|
}
|
|
573
483
|
function createSigningClient(context) {
|
|
574
|
-
return new
|
|
484
|
+
return new Mux2({
|
|
575
485
|
// These are not needed for signing, but the SDK requires them
|
|
576
486
|
// Using empty strings as we only need the jwt functionality
|
|
577
487
|
tokenId: env_default.MUX_TOKEN_ID || "",
|
|
@@ -581,6 +491,7 @@ function createSigningClient(context) {
|
|
|
581
491
|
});
|
|
582
492
|
}
|
|
583
493
|
async function signPlaybackId(playbackId, context, type = "video", params) {
|
|
494
|
+
"use step";
|
|
584
495
|
const client = createSigningClient(context);
|
|
585
496
|
const stringParams = params ? Object.fromEntries(
|
|
586
497
|
Object.entries(params).map(([key, value]) => [key, String(value)])
|
|
@@ -592,6 +503,7 @@ async function signPlaybackId(playbackId, context, type = "video", params) {
|
|
|
592
503
|
});
|
|
593
504
|
}
|
|
594
505
|
async function signUrl(url, playbackId, context, type = "video", params) {
|
|
506
|
+
"use step";
|
|
595
507
|
const token = await signPlaybackId(playbackId, context, type, params);
|
|
596
508
|
const separator = url.includes("?") ? "&" : "?";
|
|
597
509
|
return `${url}${separator}token=${token}`;
|
|
@@ -599,21 +511,23 @@ async function signUrl(url, playbackId, context, type = "video", params) {
|
|
|
599
511
|
|
|
600
512
|
// src/primitives/storyboards.ts
|
|
601
513
|
var DEFAULT_STORYBOARD_WIDTH = 640;
|
|
602
|
-
async function getStoryboardUrl(playbackId, width = DEFAULT_STORYBOARD_WIDTH,
|
|
514
|
+
async function getStoryboardUrl(playbackId, width = DEFAULT_STORYBOARD_WIDTH, shouldSign = false) {
|
|
515
|
+
"use step";
|
|
603
516
|
const baseUrl = `https://image.mux.com/${playbackId}/storyboard.png`;
|
|
604
|
-
if (
|
|
517
|
+
if (shouldSign) {
|
|
518
|
+
const signingContext = getMuxSigningContextFromEnv();
|
|
605
519
|
return signUrl(baseUrl, playbackId, signingContext, "storyboard", { width });
|
|
606
520
|
}
|
|
607
521
|
return `${baseUrl}?width=${width}`;
|
|
608
522
|
}
|
|
609
523
|
|
|
610
524
|
// src/workflows/burned-in-captions.ts
|
|
611
|
-
var burnedInCaptionsSchema =
|
|
612
|
-
hasBurnedInCaptions:
|
|
613
|
-
confidence:
|
|
614
|
-
detectedLanguage:
|
|
525
|
+
var burnedInCaptionsSchema = z2.object({
|
|
526
|
+
hasBurnedInCaptions: z2.boolean(),
|
|
527
|
+
confidence: z2.number().min(0).max(1),
|
|
528
|
+
detectedLanguage: z2.string().nullable()
|
|
615
529
|
});
|
|
616
|
-
var SYSTEM_PROMPT =
|
|
530
|
+
var SYSTEM_PROMPT = dedent`
|
|
617
531
|
<role>
|
|
618
532
|
You are an expert at analyzing video frames to detect burned-in captions (also called open captions or hardcoded subtitles).
|
|
619
533
|
These are text overlays that are permanently embedded in the video image, common on TikTok, Instagram Reels, and other social media platforms.
|
|
@@ -656,14 +570,14 @@ var burnedInCaptionsPromptBuilder = createPromptBuilder({
|
|
|
656
570
|
template: {
|
|
657
571
|
task: {
|
|
658
572
|
tag: "task",
|
|
659
|
-
content:
|
|
573
|
+
content: dedent`
|
|
660
574
|
Analyze the provided video storyboard to detect burned-in captions (hardcoded subtitles).
|
|
661
575
|
Count frames with text vs no text, note position consistency and whether text changes across frames.
|
|
662
576
|
Decide if captions exist, with confidence (0.0-1.0) and detected language if any.`
|
|
663
577
|
},
|
|
664
578
|
analysisSteps: {
|
|
665
579
|
tag: "analysis_steps",
|
|
666
|
-
content:
|
|
580
|
+
content: dedent`
|
|
667
581
|
1. COUNT how many frames contain text overlays vs. how many don't
|
|
668
582
|
2. Check if text appears in consistent positions across multiple frames
|
|
669
583
|
3. Verify text changes content between frames (indicating dialogue/narration)
|
|
@@ -672,7 +586,7 @@ var burnedInCaptionsPromptBuilder = createPromptBuilder({
|
|
|
672
586
|
},
|
|
673
587
|
positiveIndicators: {
|
|
674
588
|
tag: "classify_as_captions",
|
|
675
|
-
content:
|
|
589
|
+
content: dedent`
|
|
676
590
|
ONLY classify as burned-in captions if:
|
|
677
591
|
- Text appears in multiple frames (not just 1-2 end frames)
|
|
678
592
|
- Text positioning is consistent across those frames
|
|
@@ -681,7 +595,7 @@ var burnedInCaptionsPromptBuilder = createPromptBuilder({
|
|
|
681
595
|
},
|
|
682
596
|
negativeIndicators: {
|
|
683
597
|
tag: "not_captions",
|
|
684
|
-
content:
|
|
598
|
+
content: dedent`
|
|
685
599
|
DO NOT classify as burned-in captions:
|
|
686
600
|
- Marketing taglines appearing only in final 1-2 frames
|
|
687
601
|
- Single words or phrases that don't change between frames
|
|
@@ -696,65 +610,90 @@ function buildUserPrompt(promptOverrides) {
|
|
|
696
610
|
return burnedInCaptionsPromptBuilder.build(promptOverrides);
|
|
697
611
|
}
|
|
698
612
|
var DEFAULT_PROVIDER = "openai";
|
|
613
|
+
async function fetchImageAsBase64(imageUrl, imageDownloadOptions) {
|
|
614
|
+
"use step";
|
|
615
|
+
const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
|
|
616
|
+
return downloadResult.base64Data;
|
|
617
|
+
}
|
|
618
|
+
async function analyzeStoryboard({
|
|
619
|
+
imageDataUrl,
|
|
620
|
+
provider,
|
|
621
|
+
modelId,
|
|
622
|
+
userPrompt,
|
|
623
|
+
systemPrompt
|
|
624
|
+
}) {
|
|
625
|
+
"use step";
|
|
626
|
+
const model = createLanguageModelFromConfig(provider, modelId);
|
|
627
|
+
const response = await generateObject({
|
|
628
|
+
model,
|
|
629
|
+
schema: burnedInCaptionsSchema,
|
|
630
|
+
experimental_telemetry: { isEnabled: true },
|
|
631
|
+
messages: [
|
|
632
|
+
{
|
|
633
|
+
role: "system",
|
|
634
|
+
content: systemPrompt
|
|
635
|
+
},
|
|
636
|
+
{
|
|
637
|
+
role: "user",
|
|
638
|
+
content: [
|
|
639
|
+
{ type: "text", text: userPrompt },
|
|
640
|
+
{ type: "image", image: imageDataUrl }
|
|
641
|
+
]
|
|
642
|
+
}
|
|
643
|
+
]
|
|
644
|
+
});
|
|
645
|
+
return {
|
|
646
|
+
result: response.object,
|
|
647
|
+
usage: {
|
|
648
|
+
inputTokens: response.usage.inputTokens,
|
|
649
|
+
outputTokens: response.usage.outputTokens,
|
|
650
|
+
totalTokens: response.usage.totalTokens,
|
|
651
|
+
reasoningTokens: response.usage.reasoningTokens,
|
|
652
|
+
cachedInputTokens: response.usage.cachedInputTokens
|
|
653
|
+
}
|
|
654
|
+
};
|
|
655
|
+
}
|
|
699
656
|
async function hasBurnedInCaptions(assetId, options = {}) {
|
|
657
|
+
"use workflow";
|
|
700
658
|
const {
|
|
701
659
|
provider = DEFAULT_PROVIDER,
|
|
702
660
|
model,
|
|
703
661
|
imageSubmissionMode = "url",
|
|
704
662
|
imageDownloadOptions,
|
|
705
663
|
promptOverrides,
|
|
706
|
-
...
|
|
664
|
+
...config
|
|
707
665
|
} = options;
|
|
708
666
|
const userPrompt = buildUserPrompt(promptOverrides);
|
|
709
|
-
const
|
|
710
|
-
{ ...
|
|
667
|
+
const workflowConfig = await createWorkflowConfig(
|
|
668
|
+
{ ...config, model },
|
|
711
669
|
provider
|
|
712
670
|
);
|
|
713
|
-
const { playbackId, policy } = await getPlaybackIdForAsset(
|
|
714
|
-
const signingContext =
|
|
671
|
+
const { playbackId, policy } = await getPlaybackIdForAsset(assetId);
|
|
672
|
+
const signingContext = getMuxSigningContextFromEnv();
|
|
715
673
|
if (policy === "signed" && !signingContext) {
|
|
716
674
|
throw new Error(
|
|
717
675
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
718
676
|
);
|
|
719
677
|
}
|
|
720
|
-
const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed"
|
|
721
|
-
const analyzeStoryboard = async (imageDataUrl) => {
|
|
722
|
-
const response = await (0, import_ai.generateObject)({
|
|
723
|
-
model: clients.languageModel.model,
|
|
724
|
-
schema: burnedInCaptionsSchema,
|
|
725
|
-
abortSignal: options.abortSignal,
|
|
726
|
-
experimental_telemetry: { isEnabled: true },
|
|
727
|
-
messages: [
|
|
728
|
-
{
|
|
729
|
-
role: "system",
|
|
730
|
-
content: SYSTEM_PROMPT
|
|
731
|
-
},
|
|
732
|
-
{
|
|
733
|
-
role: "user",
|
|
734
|
-
content: [
|
|
735
|
-
{ type: "text", text: userPrompt },
|
|
736
|
-
{ type: "image", image: imageDataUrl }
|
|
737
|
-
]
|
|
738
|
-
}
|
|
739
|
-
]
|
|
740
|
-
});
|
|
741
|
-
return {
|
|
742
|
-
result: response.object,
|
|
743
|
-
usage: {
|
|
744
|
-
inputTokens: response.usage.inputTokens,
|
|
745
|
-
outputTokens: response.usage.outputTokens,
|
|
746
|
-
totalTokens: response.usage.totalTokens,
|
|
747
|
-
reasoningTokens: response.usage.reasoningTokens,
|
|
748
|
-
cachedInputTokens: response.usage.cachedInputTokens
|
|
749
|
-
}
|
|
750
|
-
};
|
|
751
|
-
};
|
|
678
|
+
const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed");
|
|
752
679
|
let analysisResponse;
|
|
753
680
|
if (imageSubmissionMode === "base64") {
|
|
754
|
-
const
|
|
755
|
-
analysisResponse = await analyzeStoryboard(
|
|
681
|
+
const base64Data = await fetchImageAsBase64(imageUrl, imageDownloadOptions);
|
|
682
|
+
analysisResponse = await analyzeStoryboard({
|
|
683
|
+
imageDataUrl: base64Data,
|
|
684
|
+
provider: workflowConfig.provider,
|
|
685
|
+
modelId: workflowConfig.modelId,
|
|
686
|
+
userPrompt,
|
|
687
|
+
systemPrompt: SYSTEM_PROMPT
|
|
688
|
+
});
|
|
756
689
|
} else {
|
|
757
|
-
analysisResponse = await analyzeStoryboard(
|
|
690
|
+
analysisResponse = await analyzeStoryboard({
|
|
691
|
+
imageDataUrl: imageUrl,
|
|
692
|
+
provider: workflowConfig.provider,
|
|
693
|
+
modelId: workflowConfig.modelId,
|
|
694
|
+
userPrompt,
|
|
695
|
+
systemPrompt: SYSTEM_PROMPT
|
|
696
|
+
});
|
|
758
697
|
}
|
|
759
698
|
if (!analysisResponse.result) {
|
|
760
699
|
throw new Error("No analysis result received from AI provider");
|
|
@@ -770,8 +709,8 @@ async function hasBurnedInCaptions(assetId, options = {}) {
|
|
|
770
709
|
}
|
|
771
710
|
|
|
772
711
|
// src/workflows/chapters.ts
|
|
773
|
-
|
|
774
|
-
|
|
712
|
+
import { generateObject as generateObject2 } from "ai";
|
|
713
|
+
import { z as z3 } from "zod";
|
|
775
714
|
|
|
776
715
|
// src/lib/retry.ts
|
|
777
716
|
var DEFAULT_RETRY_OPTIONS = {
|
|
@@ -803,11 +742,11 @@ async function withRetry(fn, {
|
|
|
803
742
|
if (isLastAttempt || !shouldRetry(lastError, attempt + 1)) {
|
|
804
743
|
throw lastError;
|
|
805
744
|
}
|
|
806
|
-
const
|
|
745
|
+
const delay = calculateDelay(attempt + 1, baseDelay, maxDelay);
|
|
807
746
|
console.warn(
|
|
808
|
-
`Attempt ${attempt + 1} failed: ${lastError.message}. Retrying in ${Math.round(
|
|
747
|
+
`Attempt ${attempt + 1} failed: ${lastError.message}. Retrying in ${Math.round(delay)}ms...`
|
|
809
748
|
);
|
|
810
|
-
await new Promise((resolve) => setTimeout(resolve,
|
|
749
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
811
750
|
}
|
|
812
751
|
}
|
|
813
752
|
throw lastError || new Error("Retry failed with unknown error");
|
|
@@ -921,15 +860,18 @@ function parseVTTCues(vttContent) {
|
|
|
921
860
|
}
|
|
922
861
|
return cues;
|
|
923
862
|
}
|
|
924
|
-
async function buildTranscriptUrl(playbackId, trackId,
|
|
863
|
+
async function buildTranscriptUrl(playbackId, trackId, shouldSign = false) {
|
|
864
|
+
"use step";
|
|
925
865
|
const baseUrl = `https://stream.mux.com/${playbackId}/text/${trackId}.vtt`;
|
|
926
|
-
if (
|
|
866
|
+
if (shouldSign) {
|
|
867
|
+
const signingContext = getMuxSigningContextFromEnv();
|
|
927
868
|
return signUrl(baseUrl, playbackId, signingContext, "video");
|
|
928
869
|
}
|
|
929
870
|
return baseUrl;
|
|
930
871
|
}
|
|
931
872
|
async function fetchTranscriptForAsset(asset, playbackId, options = {}) {
|
|
932
|
-
|
|
873
|
+
"use step";
|
|
874
|
+
const { languageCode, cleanTranscript = true, shouldSign } = options;
|
|
933
875
|
const track = findCaptionTrack(asset, languageCode);
|
|
934
876
|
if (!track) {
|
|
935
877
|
return { transcriptText: "" };
|
|
@@ -937,7 +879,7 @@ async function fetchTranscriptForAsset(asset, playbackId, options = {}) {
|
|
|
937
879
|
if (!track.id) {
|
|
938
880
|
return { transcriptText: "", track };
|
|
939
881
|
}
|
|
940
|
-
const transcriptUrl = await buildTranscriptUrl(playbackId, track.id,
|
|
882
|
+
const transcriptUrl = await buildTranscriptUrl(playbackId, track.id, shouldSign);
|
|
941
883
|
try {
|
|
942
884
|
const response = await fetch(transcriptUrl);
|
|
943
885
|
if (!response.ok) {
|
|
@@ -953,14 +895,39 @@ async function fetchTranscriptForAsset(asset, playbackId, options = {}) {
|
|
|
953
895
|
}
|
|
954
896
|
|
|
955
897
|
// src/workflows/chapters.ts
|
|
956
|
-
var chapterSchema =
|
|
957
|
-
startTime:
|
|
958
|
-
title:
|
|
898
|
+
var chapterSchema = z3.object({
|
|
899
|
+
startTime: z3.number(),
|
|
900
|
+
title: z3.string()
|
|
959
901
|
});
|
|
960
|
-
var chaptersSchema =
|
|
961
|
-
chapters:
|
|
902
|
+
var chaptersSchema = z3.object({
|
|
903
|
+
chapters: z3.array(chapterSchema)
|
|
962
904
|
});
|
|
963
|
-
|
|
905
|
+
async function generateChaptersWithAI({
|
|
906
|
+
provider,
|
|
907
|
+
modelId,
|
|
908
|
+
timestampedTranscript,
|
|
909
|
+
systemPrompt
|
|
910
|
+
}) {
|
|
911
|
+
"use step";
|
|
912
|
+
const model = createLanguageModelFromConfig(provider, modelId);
|
|
913
|
+
const response = await withRetry(
|
|
914
|
+
() => generateObject2({
|
|
915
|
+
model,
|
|
916
|
+
schema: chaptersSchema,
|
|
917
|
+
messages: [
|
|
918
|
+
{
|
|
919
|
+
role: "system",
|
|
920
|
+
content: systemPrompt
|
|
921
|
+
},
|
|
922
|
+
{
|
|
923
|
+
role: "user",
|
|
924
|
+
content: timestampedTranscript
|
|
925
|
+
}
|
|
926
|
+
]
|
|
927
|
+
})
|
|
928
|
+
);
|
|
929
|
+
return response.object;
|
|
930
|
+
}
|
|
964
931
|
var SYSTEM_PROMPT2 = `Your role is to segment the following captions into chunked chapters, summarising each chapter with a title.
|
|
965
932
|
|
|
966
933
|
Analyze the transcript and create logical chapter breaks based on topic changes, major transitions, or distinct sections of content. Each chapter should represent a meaningful segment of the video.
|
|
@@ -982,10 +949,11 @@ Important rules:
|
|
|
982
949
|
- Do not include any text before or after the JSON
|
|
983
950
|
- The JSON must be valid and parseable`;
|
|
984
951
|
async function generateChapters(assetId, languageCode, options = {}) {
|
|
985
|
-
|
|
986
|
-
const
|
|
987
|
-
const
|
|
988
|
-
const
|
|
952
|
+
"use workflow";
|
|
953
|
+
const { provider = "openai", model } = options;
|
|
954
|
+
const config = await createWorkflowConfig({ ...options, model }, provider);
|
|
955
|
+
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(assetId);
|
|
956
|
+
const signingContext = getMuxSigningContextFromEnv();
|
|
989
957
|
if (policy === "signed" && !signingContext) {
|
|
990
958
|
throw new Error(
|
|
991
959
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
@@ -995,7 +963,7 @@ async function generateChapters(assetId, languageCode, options = {}) {
|
|
|
995
963
|
languageCode,
|
|
996
964
|
cleanTranscript: false,
|
|
997
965
|
// keep timestamps for chapter segmentation
|
|
998
|
-
|
|
966
|
+
shouldSign: policy === "signed"
|
|
999
967
|
});
|
|
1000
968
|
if (!transcriptResult.track || !transcriptResult.transcriptText) {
|
|
1001
969
|
const availableLanguages = getReadyTextTracks(assetData).map((t) => t.language_code).filter(Boolean).join(", ");
|
|
@@ -1009,24 +977,12 @@ async function generateChapters(assetId, languageCode, options = {}) {
|
|
|
1009
977
|
}
|
|
1010
978
|
let chaptersData = null;
|
|
1011
979
|
try {
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
{
|
|
1019
|
-
role: "system",
|
|
1020
|
-
content: SYSTEM_PROMPT2
|
|
1021
|
-
},
|
|
1022
|
-
{
|
|
1023
|
-
role: "user",
|
|
1024
|
-
content: timestampedTranscript
|
|
1025
|
-
}
|
|
1026
|
-
]
|
|
1027
|
-
})
|
|
1028
|
-
);
|
|
1029
|
-
chaptersData = response.object;
|
|
980
|
+
chaptersData = await generateChaptersWithAI({
|
|
981
|
+
provider: config.provider,
|
|
982
|
+
modelId: config.modelId,
|
|
983
|
+
timestampedTranscript,
|
|
984
|
+
systemPrompt: SYSTEM_PROMPT2
|
|
985
|
+
});
|
|
1030
986
|
} catch (error) {
|
|
1031
987
|
throw new Error(
|
|
1032
988
|
`Failed to generate chapters with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
@@ -1050,7 +1006,7 @@ async function generateChapters(assetId, languageCode, options = {}) {
|
|
|
1050
1006
|
}
|
|
1051
1007
|
|
|
1052
1008
|
// src/workflows/embeddings.ts
|
|
1053
|
-
|
|
1009
|
+
import { embed } from "ai";
|
|
1054
1010
|
|
|
1055
1011
|
// src/primitives/text-chunking.ts
|
|
1056
1012
|
function estimateTokenCount(text) {
|
|
@@ -1138,13 +1094,6 @@ function chunkText(text, strategy) {
|
|
|
1138
1094
|
}
|
|
1139
1095
|
|
|
1140
1096
|
// src/workflows/embeddings.ts
|
|
1141
|
-
var DEFAULT_PROVIDER3 = "openai";
|
|
1142
|
-
var DEFAULT_CHUNKING_STRATEGY = {
|
|
1143
|
-
type: "token",
|
|
1144
|
-
maxTokens: 500,
|
|
1145
|
-
overlap: 100
|
|
1146
|
-
};
|
|
1147
|
-
var DEFAULT_BATCH_SIZE = 5;
|
|
1148
1097
|
function averageEmbeddings(embeddings) {
|
|
1149
1098
|
if (embeddings.length === 0) {
|
|
1150
1099
|
return [];
|
|
@@ -1161,51 +1110,41 @@ function averageEmbeddings(embeddings) {
|
|
|
1161
1110
|
}
|
|
1162
1111
|
return averaged;
|
|
1163
1112
|
}
|
|
1164
|
-
async function
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
})
|
|
1187
|
-
);
|
|
1188
|
-
results.push(...batchResults);
|
|
1189
|
-
}
|
|
1190
|
-
return results;
|
|
1113
|
+
async function generateSingleChunkEmbedding({
|
|
1114
|
+
chunk,
|
|
1115
|
+
provider,
|
|
1116
|
+
modelId
|
|
1117
|
+
}) {
|
|
1118
|
+
"use step";
|
|
1119
|
+
const model = createEmbeddingModelFromConfig(provider, modelId);
|
|
1120
|
+
const response = await withRetry(
|
|
1121
|
+
() => embed({
|
|
1122
|
+
model,
|
|
1123
|
+
value: chunk.text
|
|
1124
|
+
})
|
|
1125
|
+
);
|
|
1126
|
+
return {
|
|
1127
|
+
chunkId: chunk.id,
|
|
1128
|
+
embedding: response.embedding,
|
|
1129
|
+
metadata: {
|
|
1130
|
+
startTime: chunk.startTime,
|
|
1131
|
+
endTime: chunk.endTime,
|
|
1132
|
+
tokenCount: chunk.tokenCount
|
|
1133
|
+
}
|
|
1134
|
+
};
|
|
1191
1135
|
}
|
|
1192
1136
|
async function generateVideoEmbeddings(assetId, options = {}) {
|
|
1137
|
+
"use workflow";
|
|
1193
1138
|
const {
|
|
1194
|
-
provider =
|
|
1139
|
+
provider = "openai",
|
|
1195
1140
|
model,
|
|
1196
1141
|
languageCode,
|
|
1197
|
-
chunkingStrategy =
|
|
1198
|
-
batchSize =
|
|
1199
|
-
abortSignal
|
|
1142
|
+
chunkingStrategy = { type: "token", maxTokens: 500, overlap: 100 },
|
|
1143
|
+
batchSize = 5
|
|
1200
1144
|
} = options;
|
|
1201
|
-
const credentials = validateCredentials(options, provider === "google" ? "google" : "openai");
|
|
1202
|
-
const muxClient = createMuxClient(credentials);
|
|
1203
1145
|
const embeddingModel = resolveEmbeddingModel({ ...options, provider, model });
|
|
1204
|
-
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(
|
|
1205
|
-
|
|
1206
|
-
assetId
|
|
1207
|
-
);
|
|
1208
|
-
const signingContext = resolveSigningContext(options);
|
|
1146
|
+
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(assetId);
|
|
1147
|
+
const signingContext = getMuxSigningContextFromEnv();
|
|
1209
1148
|
if (policy === "signed" && !signingContext) {
|
|
1210
1149
|
throw new Error(
|
|
1211
1150
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
@@ -1215,7 +1154,7 @@ async function generateVideoEmbeddings(assetId, options = {}) {
|
|
|
1215
1154
|
const transcriptResult = await fetchTranscriptForAsset(assetData, playbackId, {
|
|
1216
1155
|
languageCode,
|
|
1217
1156
|
cleanTranscript: !useVttChunking,
|
|
1218
|
-
|
|
1157
|
+
shouldSign: policy === "signed"
|
|
1219
1158
|
});
|
|
1220
1159
|
if (!transcriptResult.track || !transcriptResult.transcriptText) {
|
|
1221
1160
|
const availableLanguages = getReadyTextTracks(assetData).map((t) => t.language_code).filter(Boolean).join(", ");
|
|
@@ -1235,14 +1174,21 @@ async function generateVideoEmbeddings(assetId, options = {}) {
|
|
|
1235
1174
|
if (chunks.length === 0) {
|
|
1236
1175
|
throw new Error("No chunks generated from transcript");
|
|
1237
1176
|
}
|
|
1238
|
-
|
|
1177
|
+
const chunkEmbeddings = [];
|
|
1239
1178
|
try {
|
|
1240
|
-
|
|
1241
|
-
chunks,
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1179
|
+
for (let i = 0; i < chunks.length; i += batchSize) {
|
|
1180
|
+
const batch = chunks.slice(i, i + batchSize);
|
|
1181
|
+
const batchResults = await Promise.all(
|
|
1182
|
+
batch.map(
|
|
1183
|
+
(chunk) => generateSingleChunkEmbedding({
|
|
1184
|
+
chunk,
|
|
1185
|
+
provider: embeddingModel.provider,
|
|
1186
|
+
modelId: embeddingModel.modelId
|
|
1187
|
+
})
|
|
1188
|
+
)
|
|
1189
|
+
);
|
|
1190
|
+
chunkEmbeddings.push(...batchResults);
|
|
1191
|
+
}
|
|
1246
1192
|
} catch (error) {
|
|
1247
1193
|
throw new Error(
|
|
1248
1194
|
`Failed to generate embeddings with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
@@ -1271,7 +1217,8 @@ async function generateVideoEmbeddings(assetId, options = {}) {
|
|
|
1271
1217
|
|
|
1272
1218
|
// src/primitives/thumbnails.ts
|
|
1273
1219
|
async function getThumbnailUrls(playbackId, duration, options = {}) {
|
|
1274
|
-
|
|
1220
|
+
"use step";
|
|
1221
|
+
const { interval = 10, width = 640, shouldSign = false } = options;
|
|
1275
1222
|
const timestamps = [];
|
|
1276
1223
|
if (duration <= 50) {
|
|
1277
1224
|
const spacing = duration / 6;
|
|
@@ -1285,7 +1232,8 @@ async function getThumbnailUrls(playbackId, duration, options = {}) {
|
|
|
1285
1232
|
}
|
|
1286
1233
|
const baseUrl = `https://image.mux.com/${playbackId}/thumbnail.png`;
|
|
1287
1234
|
const urlPromises = timestamps.map(async (time) => {
|
|
1288
|
-
if (
|
|
1235
|
+
if (shouldSign) {
|
|
1236
|
+
const signingContext = getMuxSigningContextFromEnv();
|
|
1289
1237
|
return signUrl(baseUrl, playbackId, signingContext, "thumbnail", { time, width });
|
|
1290
1238
|
}
|
|
1291
1239
|
return `${baseUrl}?time=${time}&width=${width}`;
|
|
@@ -1298,7 +1246,7 @@ var DEFAULT_THRESHOLDS = {
|
|
|
1298
1246
|
sexual: 0.7,
|
|
1299
1247
|
violence: 0.8
|
|
1300
1248
|
};
|
|
1301
|
-
var
|
|
1249
|
+
var DEFAULT_PROVIDER2 = "openai";
|
|
1302
1250
|
var HIVE_ENDPOINT = "https://api.thehive.ai/api/v2/task/sync";
|
|
1303
1251
|
var HIVE_SEXUAL_CATEGORIES = [
|
|
1304
1252
|
"general_nsfw",
|
|
@@ -1336,6 +1284,7 @@ var HIVE_VIOLENCE_CATEGORIES = [
|
|
|
1336
1284
|
"garm_death_injury_or_military_conflict"
|
|
1337
1285
|
];
|
|
1338
1286
|
async function processConcurrently(items, processor, maxConcurrent = 5) {
|
|
1287
|
+
"use step";
|
|
1339
1288
|
const results = [];
|
|
1340
1289
|
for (let i = 0; i < items.length; i += maxConcurrent) {
|
|
1341
1290
|
const batch = items.slice(i, i + maxConcurrent);
|
|
@@ -1345,11 +1294,14 @@ async function processConcurrently(items, processor, maxConcurrent = 5) {
|
|
|
1345
1294
|
}
|
|
1346
1295
|
return results;
|
|
1347
1296
|
}
|
|
1348
|
-
async function requestOpenAIModeration(imageUrls,
|
|
1297
|
+
async function requestOpenAIModeration(imageUrls, model, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
|
|
1298
|
+
"use step";
|
|
1349
1299
|
const targetUrls = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map(
|
|
1350
|
-
(img) => ({ url: img.url, image: img.base64Data })
|
|
1351
|
-
) : imageUrls.map((url) => ({ url, image: url }));
|
|
1300
|
+
(img) => ({ url: img.url, image: img.base64Data, model })
|
|
1301
|
+
) : imageUrls.map((url) => ({ url, image: url, model }));
|
|
1352
1302
|
const moderate = async (entry) => {
|
|
1303
|
+
"use step";
|
|
1304
|
+
const apiKey = getApiKeyFromEnv("openai");
|
|
1353
1305
|
try {
|
|
1354
1306
|
const res = await fetch("https://api.openai.com/v1/moderations", {
|
|
1355
1307
|
method: "POST",
|
|
@@ -1358,7 +1310,7 @@ async function requestOpenAIModeration(imageUrls, apiKey, model, maxConcurrent =
|
|
|
1358
1310
|
"Authorization": `Bearer ${apiKey}`
|
|
1359
1311
|
},
|
|
1360
1312
|
body: JSON.stringify({
|
|
1361
|
-
model,
|
|
1313
|
+
model: entry.model,
|
|
1362
1314
|
input: [
|
|
1363
1315
|
{
|
|
1364
1316
|
type: "image_url",
|
|
@@ -1401,7 +1353,8 @@ function getHiveCategoryScores(classes, categoryNames) {
|
|
|
1401
1353
|
const scores = categoryNames.map((category) => scoreMap[category] || 0);
|
|
1402
1354
|
return Math.max(...scores, 0);
|
|
1403
1355
|
}
|
|
1404
|
-
async function requestHiveModeration(imageUrls,
|
|
1356
|
+
async function requestHiveModeration(imageUrls, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
|
|
1357
|
+
"use step";
|
|
1405
1358
|
const targets = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map((img) => ({
|
|
1406
1359
|
url: img.url,
|
|
1407
1360
|
source: {
|
|
@@ -1414,6 +1367,8 @@ async function requestHiveModeration(imageUrls, apiKey, maxConcurrent = 5, submi
|
|
|
1414
1367
|
source: { kind: "url", value: url }
|
|
1415
1368
|
}));
|
|
1416
1369
|
const moderate = async (entry) => {
|
|
1370
|
+
"use step";
|
|
1371
|
+
const apiKey = getApiKeyFromEnv("hive");
|
|
1417
1372
|
try {
|
|
1418
1373
|
const formData = new FormData();
|
|
1419
1374
|
if (entry.source.kind === "url") {
|
|
@@ -1459,8 +1414,9 @@ async function requestHiveModeration(imageUrls, apiKey, maxConcurrent = 5, submi
|
|
|
1459
1414
|
return processConcurrently(targets, moderate, maxConcurrent);
|
|
1460
1415
|
}
|
|
1461
1416
|
async function getModerationScores(assetId, options = {}) {
|
|
1417
|
+
"use workflow";
|
|
1462
1418
|
const {
|
|
1463
|
-
provider =
|
|
1419
|
+
provider = DEFAULT_PROVIDER2,
|
|
1464
1420
|
model = provider === "openai" ? "omni-moderation-latest" : void 0,
|
|
1465
1421
|
thresholds = DEFAULT_THRESHOLDS,
|
|
1466
1422
|
thumbnailInterval = 10,
|
|
@@ -1469,11 +1425,9 @@ async function getModerationScores(assetId, options = {}) {
|
|
|
1469
1425
|
imageSubmissionMode = "url",
|
|
1470
1426
|
imageDownloadOptions
|
|
1471
1427
|
} = options;
|
|
1472
|
-
const
|
|
1473
|
-
const muxClient = createMuxClient(credentials);
|
|
1474
|
-
const { asset, playbackId, policy } = await getPlaybackIdForAsset(muxClient, assetId);
|
|
1428
|
+
const { asset, playbackId, policy } = await getPlaybackIdForAsset(assetId);
|
|
1475
1429
|
const duration = asset.duration || 0;
|
|
1476
|
-
const signingContext =
|
|
1430
|
+
const signingContext = getMuxSigningContextFromEnv();
|
|
1477
1431
|
if (policy === "signed" && !signingContext) {
|
|
1478
1432
|
throw new Error(
|
|
1479
1433
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
@@ -1482,30 +1436,20 @@ async function getModerationScores(assetId, options = {}) {
|
|
|
1482
1436
|
const thumbnailUrls = await getThumbnailUrls(playbackId, duration, {
|
|
1483
1437
|
interval: thumbnailInterval,
|
|
1484
1438
|
width: thumbnailWidth,
|
|
1485
|
-
|
|
1439
|
+
shouldSign: policy === "signed"
|
|
1486
1440
|
});
|
|
1487
1441
|
let thumbnailScores;
|
|
1488
1442
|
if (provider === "openai") {
|
|
1489
|
-
const apiKey = credentials.openaiApiKey;
|
|
1490
|
-
if (!apiKey) {
|
|
1491
|
-
throw new Error("OpenAI API key is required for moderation. Set OPENAI_API_KEY or pass openaiApiKey.");
|
|
1492
|
-
}
|
|
1493
1443
|
thumbnailScores = await requestOpenAIModeration(
|
|
1494
1444
|
thumbnailUrls,
|
|
1495
|
-
apiKey,
|
|
1496
1445
|
model || "omni-moderation-latest",
|
|
1497
1446
|
maxConcurrent,
|
|
1498
1447
|
imageSubmissionMode,
|
|
1499
1448
|
imageDownloadOptions
|
|
1500
1449
|
);
|
|
1501
1450
|
} else if (provider === "hive") {
|
|
1502
|
-
const hiveApiKey = options.hiveApiKey || env_default.HIVE_API_KEY;
|
|
1503
|
-
if (!hiveApiKey) {
|
|
1504
|
-
throw new Error("Hive API key is required for moderation. Set HIVE_API_KEY or pass hiveApiKey.");
|
|
1505
|
-
}
|
|
1506
1451
|
thumbnailScores = await requestHiveModeration(
|
|
1507
1452
|
thumbnailUrls,
|
|
1508
|
-
hiveApiKey,
|
|
1509
1453
|
maxConcurrent,
|
|
1510
1454
|
imageSubmissionMode,
|
|
1511
1455
|
imageDownloadOptions
|
|
@@ -1529,17 +1473,18 @@ async function getModerationScores(assetId, options = {}) {
|
|
|
1529
1473
|
}
|
|
1530
1474
|
|
|
1531
1475
|
// src/workflows/summarization.ts
|
|
1532
|
-
|
|
1533
|
-
|
|
1476
|
+
import { generateObject as generateObject3 } from "ai";
|
|
1477
|
+
import dedent2 from "dedent";
|
|
1478
|
+
import { z as z4 } from "zod";
|
|
1534
1479
|
var SUMMARY_KEYWORD_LIMIT = 10;
|
|
1535
|
-
var summarySchema =
|
|
1536
|
-
keywords:
|
|
1537
|
-
title:
|
|
1538
|
-
description:
|
|
1480
|
+
var summarySchema = z4.object({
|
|
1481
|
+
keywords: z4.array(z4.string()),
|
|
1482
|
+
title: z4.string(),
|
|
1483
|
+
description: z4.string()
|
|
1539
1484
|
});
|
|
1540
1485
|
var TONE_INSTRUCTIONS = {
|
|
1541
|
-
|
|
1542
|
-
|
|
1486
|
+
neutral: "Provide a clear, straightforward analysis.",
|
|
1487
|
+
playful: "Channel your inner diva! Answer with maximum sass, wit, and playful attitude. Don't hold back - be cheeky, clever, and delightfully snarky. Make it pop!",
|
|
1543
1488
|
professional: "Provide a professional, executive-level analysis suitable for business reporting."
|
|
1544
1489
|
};
|
|
1545
1490
|
var summarizationPromptBuilder = createPromptBuilder({
|
|
@@ -1550,7 +1495,7 @@ var summarizationPromptBuilder = createPromptBuilder({
|
|
|
1550
1495
|
},
|
|
1551
1496
|
title: {
|
|
1552
1497
|
tag: "title_requirements",
|
|
1553
|
-
content:
|
|
1498
|
+
content: dedent2`
|
|
1554
1499
|
A short, compelling headline that immediately communicates the subject or action.
|
|
1555
1500
|
Aim for brevity - typically under 10 words. Think of how a news headline or video card title would read.
|
|
1556
1501
|
Start with the primary subject, action, or topic - never begin with "A video of" or similar phrasing.
|
|
@@ -1558,7 +1503,7 @@ var summarizationPromptBuilder = createPromptBuilder({
|
|
|
1558
1503
|
},
|
|
1559
1504
|
description: {
|
|
1560
1505
|
tag: "description_requirements",
|
|
1561
|
-
content:
|
|
1506
|
+
content: dedent2`
|
|
1562
1507
|
A concise summary (2-4 sentences) that describes what happens across the video.
|
|
1563
1508
|
Cover the main subjects, actions, setting, and any notable progression visible across frames.
|
|
1564
1509
|
Write in present tense. Be specific about observable details rather than making assumptions.
|
|
@@ -1566,7 +1511,7 @@ var summarizationPromptBuilder = createPromptBuilder({
|
|
|
1566
1511
|
},
|
|
1567
1512
|
keywords: {
|
|
1568
1513
|
tag: "keywords_requirements",
|
|
1569
|
-
content:
|
|
1514
|
+
content: dedent2`
|
|
1570
1515
|
Specific, searchable terms (up to 10) that capture:
|
|
1571
1516
|
- Primary subjects (people, animals, objects)
|
|
1572
1517
|
- Actions and activities being performed
|
|
@@ -1578,7 +1523,7 @@ var summarizationPromptBuilder = createPromptBuilder({
|
|
|
1578
1523
|
},
|
|
1579
1524
|
qualityGuidelines: {
|
|
1580
1525
|
tag: "quality_guidelines",
|
|
1581
|
-
content:
|
|
1526
|
+
content: dedent2`
|
|
1582
1527
|
- Examine all frames to understand the full context and progression
|
|
1583
1528
|
- Be precise: "golden retriever" is better than "dog" when identifiable
|
|
1584
1529
|
- Capture the narrative: what begins, develops, and concludes
|
|
@@ -1587,7 +1532,7 @@ var summarizationPromptBuilder = createPromptBuilder({
|
|
|
1587
1532
|
},
|
|
1588
1533
|
sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
|
|
1589
1534
|
});
|
|
1590
|
-
var SYSTEM_PROMPT3 =
|
|
1535
|
+
var SYSTEM_PROMPT3 = dedent2`
|
|
1591
1536
|
<role>
|
|
1592
1537
|
You are a video content analyst specializing in storyboard interpretation and multimodal analysis.
|
|
1593
1538
|
</role>
|
|
@@ -1619,7 +1564,29 @@ var SYSTEM_PROMPT3 = dedent_default`
|
|
|
1619
1564
|
- Only describe what is clearly observable in the frames or explicitly stated in the transcript
|
|
1620
1565
|
- Do not fabricate details or make unsupported assumptions
|
|
1621
1566
|
- Return structured data matching the requested schema
|
|
1622
|
-
</constraints
|
|
1567
|
+
</constraints>
|
|
1568
|
+
|
|
1569
|
+
<tone_guidance>
|
|
1570
|
+
Pay special attention to the <tone> section and lean heavily into those instructions.
|
|
1571
|
+
Adapt your entire analysis and writing style to match the specified tone - this should influence
|
|
1572
|
+
your word choice, personality, formality level, and overall presentation of the content.
|
|
1573
|
+
The tone instructions are not suggestions but core requirements for how you should express yourself.
|
|
1574
|
+
</tone_guidance>
|
|
1575
|
+
|
|
1576
|
+
<language_guidelines>
|
|
1577
|
+
AVOID these meta-descriptive phrases that reference the medium rather than the content:
|
|
1578
|
+
- "The image shows..." / "The storyboard shows..."
|
|
1579
|
+
- "In this video..." / "This video features..."
|
|
1580
|
+
- "The frames depict..." / "The footage shows..."
|
|
1581
|
+
- "We can see..." / "You can see..."
|
|
1582
|
+
- "The clip shows..." / "The scene shows..."
|
|
1583
|
+
|
|
1584
|
+
INSTEAD, describe the content directly:
|
|
1585
|
+
- BAD: "The video shows a chef preparing a meal"
|
|
1586
|
+
- GOOD: "A chef prepares a meal in a professional kitchen"
|
|
1587
|
+
|
|
1588
|
+
Write as if describing reality, not describing a recording of reality.
|
|
1589
|
+
</language_guidelines>`;
|
|
1623
1590
|
function buildUserPrompt2({
|
|
1624
1591
|
tone,
|
|
1625
1592
|
transcriptText,
|
|
@@ -1633,8 +1600,37 @@ function buildUserPrompt2({
|
|
|
1633
1600
|
}
|
|
1634
1601
|
return summarizationPromptBuilder.buildWithContext(promptOverrides, contextSections);
|
|
1635
1602
|
}
|
|
1636
|
-
|
|
1637
|
-
|
|
1603
|
+
async function analyzeStoryboard2(imageDataUrl, provider, modelId, userPrompt, systemPrompt) {
|
|
1604
|
+
"use step";
|
|
1605
|
+
const model = createLanguageModelFromConfig(provider, modelId);
|
|
1606
|
+
const response = await generateObject3({
|
|
1607
|
+
model,
|
|
1608
|
+
schema: summarySchema,
|
|
1609
|
+
messages: [
|
|
1610
|
+
{
|
|
1611
|
+
role: "system",
|
|
1612
|
+
content: systemPrompt
|
|
1613
|
+
},
|
|
1614
|
+
{
|
|
1615
|
+
role: "user",
|
|
1616
|
+
content: [
|
|
1617
|
+
{ type: "text", text: userPrompt },
|
|
1618
|
+
{ type: "image", image: imageDataUrl }
|
|
1619
|
+
]
|
|
1620
|
+
}
|
|
1621
|
+
]
|
|
1622
|
+
});
|
|
1623
|
+
return {
|
|
1624
|
+
result: response.object,
|
|
1625
|
+
usage: {
|
|
1626
|
+
inputTokens: response.usage.inputTokens,
|
|
1627
|
+
outputTokens: response.usage.outputTokens,
|
|
1628
|
+
totalTokens: response.usage.totalTokens,
|
|
1629
|
+
reasoningTokens: response.usage.reasoningTokens,
|
|
1630
|
+
cachedInputTokens: response.usage.cachedInputTokens
|
|
1631
|
+
}
|
|
1632
|
+
};
|
|
1633
|
+
}
|
|
1638
1634
|
function normalizeKeywords(keywords) {
|
|
1639
1635
|
if (!Array.isArray(keywords) || keywords.length === 0) {
|
|
1640
1636
|
return [];
|
|
@@ -1659,23 +1655,24 @@ function normalizeKeywords(keywords) {
|
|
|
1659
1655
|
return normalized;
|
|
1660
1656
|
}
|
|
1661
1657
|
async function getSummaryAndTags(assetId, options) {
|
|
1658
|
+
"use workflow";
|
|
1662
1659
|
const {
|
|
1663
|
-
provider =
|
|
1660
|
+
provider = "openai",
|
|
1664
1661
|
model,
|
|
1665
|
-
tone =
|
|
1662
|
+
tone = "neutral",
|
|
1666
1663
|
includeTranscript = true,
|
|
1667
1664
|
cleanTranscript = true,
|
|
1668
1665
|
imageSubmissionMode = "url",
|
|
1669
1666
|
imageDownloadOptions,
|
|
1670
|
-
abortSignal,
|
|
1667
|
+
abortSignal: _abortSignal,
|
|
1671
1668
|
promptOverrides
|
|
1672
1669
|
} = options ?? {};
|
|
1673
|
-
const
|
|
1670
|
+
const config = await createWorkflowConfig(
|
|
1674
1671
|
{ ...options, model },
|
|
1675
1672
|
provider
|
|
1676
1673
|
);
|
|
1677
|
-
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(
|
|
1678
|
-
const signingContext =
|
|
1674
|
+
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(assetId);
|
|
1675
|
+
const signingContext = getMuxSigningContextFromEnv();
|
|
1679
1676
|
if (policy === "signed" && !signingContext) {
|
|
1680
1677
|
throw new Error(
|
|
1681
1678
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
@@ -1683,7 +1680,7 @@ async function getSummaryAndTags(assetId, options) {
|
|
|
1683
1680
|
}
|
|
1684
1681
|
const transcriptText = includeTranscript ? (await fetchTranscriptForAsset(assetData, playbackId, {
|
|
1685
1682
|
cleanTranscript,
|
|
1686
|
-
|
|
1683
|
+
shouldSign: policy === "signed"
|
|
1687
1684
|
})).transcriptText : "";
|
|
1688
1685
|
const userPrompt = buildUserPrompt2({
|
|
1689
1686
|
tone,
|
|
@@ -1691,67 +1688,214 @@ async function getSummaryAndTags(assetId, options) {
|
|
|
1691
1688
|
isCleanTranscript: cleanTranscript,
|
|
1692
1689
|
promptOverrides
|
|
1693
1690
|
});
|
|
1694
|
-
const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed"
|
|
1695
|
-
|
|
1696
|
-
const response = await (0, import_ai4.generateObject)({
|
|
1697
|
-
model: clients.languageModel.model,
|
|
1698
|
-
schema: summarySchema,
|
|
1699
|
-
abortSignal,
|
|
1700
|
-
messages: [
|
|
1701
|
-
{
|
|
1702
|
-
role: "system",
|
|
1703
|
-
content: SYSTEM_PROMPT3
|
|
1704
|
-
},
|
|
1705
|
-
{
|
|
1706
|
-
role: "user",
|
|
1707
|
-
content: [
|
|
1708
|
-
{ type: "text", text: userPrompt },
|
|
1709
|
-
{ type: "image", image: imageDataUrl }
|
|
1710
|
-
]
|
|
1711
|
-
}
|
|
1712
|
-
]
|
|
1713
|
-
});
|
|
1714
|
-
return response.object;
|
|
1715
|
-
};
|
|
1716
|
-
let aiAnalysis = null;
|
|
1691
|
+
const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed");
|
|
1692
|
+
let analysisResponse;
|
|
1717
1693
|
try {
|
|
1718
1694
|
if (imageSubmissionMode === "base64") {
|
|
1719
1695
|
const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
|
|
1720
|
-
|
|
1696
|
+
analysisResponse = await analyzeStoryboard2(
|
|
1697
|
+
downloadResult.base64Data,
|
|
1698
|
+
config.provider,
|
|
1699
|
+
config.modelId,
|
|
1700
|
+
userPrompt,
|
|
1701
|
+
SYSTEM_PROMPT3
|
|
1702
|
+
);
|
|
1721
1703
|
} else {
|
|
1722
|
-
|
|
1704
|
+
analysisResponse = await withRetry(() => analyzeStoryboard2(imageUrl, config.provider, config.modelId, userPrompt, SYSTEM_PROMPT3));
|
|
1723
1705
|
}
|
|
1724
1706
|
} catch (error) {
|
|
1725
1707
|
throw new Error(
|
|
1726
1708
|
`Failed to analyze video content with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
1727
1709
|
);
|
|
1728
1710
|
}
|
|
1729
|
-
if (!
|
|
1711
|
+
if (!analysisResponse.result) {
|
|
1730
1712
|
throw new Error(`Failed to analyze video content for asset ${assetId}`);
|
|
1731
1713
|
}
|
|
1732
|
-
if (!
|
|
1714
|
+
if (!analysisResponse.result.title) {
|
|
1733
1715
|
throw new Error(`Failed to generate title for asset ${assetId}`);
|
|
1734
1716
|
}
|
|
1735
|
-
if (!
|
|
1717
|
+
if (!analysisResponse.result.description) {
|
|
1736
1718
|
throw new Error(`Failed to generate description for asset ${assetId}`);
|
|
1737
1719
|
}
|
|
1738
1720
|
return {
|
|
1739
1721
|
assetId,
|
|
1740
|
-
title:
|
|
1741
|
-
description:
|
|
1742
|
-
tags: normalizeKeywords(
|
|
1743
|
-
storyboardUrl: imageUrl
|
|
1722
|
+
title: analysisResponse.result.title,
|
|
1723
|
+
description: analysisResponse.result.description,
|
|
1724
|
+
tags: normalizeKeywords(analysisResponse.result.keywords),
|
|
1725
|
+
storyboardUrl: imageUrl,
|
|
1726
|
+
usage: analysisResponse.usage,
|
|
1727
|
+
transcriptText: transcriptText || void 0
|
|
1744
1728
|
};
|
|
1745
1729
|
}
|
|
1746
1730
|
|
|
1747
1731
|
// src/workflows/translate-audio.ts
|
|
1748
|
-
|
|
1749
|
-
|
|
1750
|
-
|
|
1751
|
-
var
|
|
1732
|
+
import Mux3 from "@mux/mux-node";
|
|
1733
|
+
|
|
1734
|
+
// src/lib/language-codes.ts
|
|
1735
|
+
var ISO639_1_TO_3 = {
|
|
1736
|
+
// Major world languages
|
|
1737
|
+
en: "eng",
|
|
1738
|
+
// English
|
|
1739
|
+
es: "spa",
|
|
1740
|
+
// Spanish
|
|
1741
|
+
fr: "fra",
|
|
1742
|
+
// French
|
|
1743
|
+
de: "deu",
|
|
1744
|
+
// German
|
|
1745
|
+
it: "ita",
|
|
1746
|
+
// Italian
|
|
1747
|
+
pt: "por",
|
|
1748
|
+
// Portuguese
|
|
1749
|
+
ru: "rus",
|
|
1750
|
+
// Russian
|
|
1751
|
+
zh: "zho",
|
|
1752
|
+
// Chinese
|
|
1753
|
+
ja: "jpn",
|
|
1754
|
+
// Japanese
|
|
1755
|
+
ko: "kor",
|
|
1756
|
+
// Korean
|
|
1757
|
+
ar: "ara",
|
|
1758
|
+
// Arabic
|
|
1759
|
+
hi: "hin",
|
|
1760
|
+
// Hindi
|
|
1761
|
+
// European languages
|
|
1762
|
+
nl: "nld",
|
|
1763
|
+
// Dutch
|
|
1764
|
+
pl: "pol",
|
|
1765
|
+
// Polish
|
|
1766
|
+
sv: "swe",
|
|
1767
|
+
// Swedish
|
|
1768
|
+
da: "dan",
|
|
1769
|
+
// Danish
|
|
1770
|
+
no: "nor",
|
|
1771
|
+
// Norwegian
|
|
1772
|
+
fi: "fin",
|
|
1773
|
+
// Finnish
|
|
1774
|
+
el: "ell",
|
|
1775
|
+
// Greek
|
|
1776
|
+
cs: "ces",
|
|
1777
|
+
// Czech
|
|
1778
|
+
hu: "hun",
|
|
1779
|
+
// Hungarian
|
|
1780
|
+
ro: "ron",
|
|
1781
|
+
// Romanian
|
|
1782
|
+
bg: "bul",
|
|
1783
|
+
// Bulgarian
|
|
1784
|
+
hr: "hrv",
|
|
1785
|
+
// Croatian
|
|
1786
|
+
sk: "slk",
|
|
1787
|
+
// Slovak
|
|
1788
|
+
sl: "slv",
|
|
1789
|
+
// Slovenian
|
|
1790
|
+
uk: "ukr",
|
|
1791
|
+
// Ukrainian
|
|
1792
|
+
tr: "tur",
|
|
1793
|
+
// Turkish
|
|
1794
|
+
// Asian languages
|
|
1795
|
+
th: "tha",
|
|
1796
|
+
// Thai
|
|
1797
|
+
vi: "vie",
|
|
1798
|
+
// Vietnamese
|
|
1799
|
+
id: "ind",
|
|
1800
|
+
// Indonesian
|
|
1801
|
+
ms: "msa",
|
|
1802
|
+
// Malay
|
|
1803
|
+
tl: "tgl",
|
|
1804
|
+
// Tagalog/Filipino
|
|
1805
|
+
// Other languages
|
|
1806
|
+
he: "heb",
|
|
1807
|
+
// Hebrew
|
|
1808
|
+
fa: "fas",
|
|
1809
|
+
// Persian/Farsi
|
|
1810
|
+
bn: "ben",
|
|
1811
|
+
// Bengali
|
|
1812
|
+
ta: "tam",
|
|
1813
|
+
// Tamil
|
|
1814
|
+
te: "tel",
|
|
1815
|
+
// Telugu
|
|
1816
|
+
mr: "mar",
|
|
1817
|
+
// Marathi
|
|
1818
|
+
gu: "guj",
|
|
1819
|
+
// Gujarati
|
|
1820
|
+
kn: "kan",
|
|
1821
|
+
// Kannada
|
|
1822
|
+
ml: "mal",
|
|
1823
|
+
// Malayalam
|
|
1824
|
+
pa: "pan",
|
|
1825
|
+
// Punjabi
|
|
1826
|
+
ur: "urd",
|
|
1827
|
+
// Urdu
|
|
1828
|
+
sw: "swa",
|
|
1829
|
+
// Swahili
|
|
1830
|
+
af: "afr",
|
|
1831
|
+
// Afrikaans
|
|
1832
|
+
ca: "cat",
|
|
1833
|
+
// Catalan
|
|
1834
|
+
eu: "eus",
|
|
1835
|
+
// Basque
|
|
1836
|
+
gl: "glg",
|
|
1837
|
+
// Galician
|
|
1838
|
+
is: "isl",
|
|
1839
|
+
// Icelandic
|
|
1840
|
+
et: "est",
|
|
1841
|
+
// Estonian
|
|
1842
|
+
lv: "lav",
|
|
1843
|
+
// Latvian
|
|
1844
|
+
lt: "lit"
|
|
1845
|
+
// Lithuanian
|
|
1846
|
+
};
|
|
1847
|
+
var ISO639_3_TO_1 = Object.fromEntries(
|
|
1848
|
+
Object.entries(ISO639_1_TO_3).map(([iso1, iso3]) => [iso3, iso1])
|
|
1849
|
+
);
|
|
1850
|
+
function toISO639_3(code) {
|
|
1851
|
+
const normalized = code.toLowerCase().trim();
|
|
1852
|
+
if (normalized.length === 3) {
|
|
1853
|
+
return normalized;
|
|
1854
|
+
}
|
|
1855
|
+
return ISO639_1_TO_3[normalized] ?? normalized;
|
|
1856
|
+
}
|
|
1857
|
+
function toISO639_1(code) {
|
|
1858
|
+
const normalized = code.toLowerCase().trim();
|
|
1859
|
+
if (normalized.length === 2) {
|
|
1860
|
+
return normalized;
|
|
1861
|
+
}
|
|
1862
|
+
return ISO639_3_TO_1[normalized] ?? normalized;
|
|
1863
|
+
}
|
|
1864
|
+
function getLanguageCodePair(code) {
|
|
1865
|
+
const normalized = code.toLowerCase().trim();
|
|
1866
|
+
if (normalized.length === 2) {
|
|
1867
|
+
return {
|
|
1868
|
+
iso639_1: normalized,
|
|
1869
|
+
iso639_3: toISO639_3(normalized)
|
|
1870
|
+
};
|
|
1871
|
+
} else if (normalized.length === 3) {
|
|
1872
|
+
return {
|
|
1873
|
+
iso639_1: toISO639_1(normalized),
|
|
1874
|
+
iso639_3: normalized
|
|
1875
|
+
};
|
|
1876
|
+
}
|
|
1877
|
+
return {
|
|
1878
|
+
iso639_1: normalized,
|
|
1879
|
+
iso639_3: normalized
|
|
1880
|
+
};
|
|
1881
|
+
}
|
|
1882
|
+
function getLanguageName(code) {
|
|
1883
|
+
const iso639_1 = toISO639_1(code);
|
|
1884
|
+
try {
|
|
1885
|
+
const displayNames = new Intl.DisplayNames(["en"], { type: "language" });
|
|
1886
|
+
return displayNames.of(iso639_1) ?? code.toUpperCase();
|
|
1887
|
+
} catch {
|
|
1888
|
+
return code.toUpperCase();
|
|
1889
|
+
}
|
|
1890
|
+
}
|
|
1891
|
+
|
|
1892
|
+
// src/workflows/translate-audio.ts
|
|
1752
1893
|
var STATIC_RENDITION_POLL_INTERVAL_MS = 5e3;
|
|
1753
1894
|
var STATIC_RENDITION_MAX_ATTEMPTS = 36;
|
|
1754
|
-
|
|
1895
|
+
async function sleep(ms) {
|
|
1896
|
+
"use step";
|
|
1897
|
+
await new Promise((resolve) => setTimeout(resolve, ms));
|
|
1898
|
+
}
|
|
1755
1899
|
function getReadyAudioStaticRendition(asset) {
|
|
1756
1900
|
const files = asset.static_renditions?.files;
|
|
1757
1901
|
if (!files || files.length === 0) {
|
|
@@ -1762,19 +1906,22 @@ function getReadyAudioStaticRendition(asset) {
|
|
|
1762
1906
|
);
|
|
1763
1907
|
}
|
|
1764
1908
|
var hasReadyAudioStaticRendition = (asset) => Boolean(getReadyAudioStaticRendition(asset));
|
|
1765
|
-
async function requestStaticRenditionCreation(
|
|
1766
|
-
|
|
1909
|
+
async function requestStaticRenditionCreation(assetId) {
|
|
1910
|
+
"use step";
|
|
1911
|
+
const { muxTokenId, muxTokenSecret } = getMuxCredentialsFromEnv();
|
|
1912
|
+
const mux = new Mux3({
|
|
1913
|
+
tokenId: muxTokenId,
|
|
1914
|
+
tokenSecret: muxTokenSecret
|
|
1915
|
+
});
|
|
1767
1916
|
try {
|
|
1768
|
-
await
|
|
1917
|
+
await mux.video.assets.createStaticRendition(assetId, {
|
|
1769
1918
|
resolution: "audio-only"
|
|
1770
1919
|
});
|
|
1771
|
-
console.log("\u{1F4FC} Static rendition request accepted by Mux.");
|
|
1772
1920
|
} catch (error) {
|
|
1773
1921
|
const statusCode = error?.status ?? error?.statusCode;
|
|
1774
1922
|
const messages = error?.error?.messages;
|
|
1775
1923
|
const alreadyDefined = messages?.some((message2) => message2.toLowerCase().includes("already defined")) ?? error?.message?.toLowerCase().includes("already defined");
|
|
1776
1924
|
if (statusCode === 409 || alreadyDefined) {
|
|
1777
|
-
console.log("\u2139\uFE0F Static rendition already requested. Waiting for it to finish...");
|
|
1778
1925
|
return;
|
|
1779
1926
|
}
|
|
1780
1927
|
const message = error instanceof Error ? error.message : "Unknown error";
|
|
@@ -1783,31 +1930,34 @@ async function requestStaticRenditionCreation(muxClient, assetId) {
|
|
|
1783
1930
|
}
|
|
1784
1931
|
async function waitForAudioStaticRendition({
|
|
1785
1932
|
assetId,
|
|
1786
|
-
muxClient,
|
|
1787
1933
|
initialAsset
|
|
1788
1934
|
}) {
|
|
1935
|
+
"use step";
|
|
1936
|
+
const { muxTokenId, muxTokenSecret } = getMuxCredentialsFromEnv();
|
|
1937
|
+
const mux = new Mux3({
|
|
1938
|
+
tokenId: muxTokenId,
|
|
1939
|
+
tokenSecret: muxTokenSecret
|
|
1940
|
+
});
|
|
1789
1941
|
let currentAsset = initialAsset;
|
|
1790
1942
|
if (hasReadyAudioStaticRendition(currentAsset)) {
|
|
1791
1943
|
return currentAsset;
|
|
1792
1944
|
}
|
|
1793
1945
|
const status = currentAsset.static_renditions?.status ?? "not_requested";
|
|
1794
1946
|
if (status === "not_requested" || status === void 0) {
|
|
1795
|
-
await requestStaticRenditionCreation(
|
|
1947
|
+
await requestStaticRenditionCreation(assetId);
|
|
1796
1948
|
} else if (status === "errored") {
|
|
1797
|
-
|
|
1798
|
-
await requestStaticRenditionCreation(muxClient, assetId);
|
|
1949
|
+
await requestStaticRenditionCreation(assetId);
|
|
1799
1950
|
} else {
|
|
1800
|
-
console.
|
|
1951
|
+
console.warn(`\u2139\uFE0F Static rendition already ${status}. Waiting for it to finish...`);
|
|
1801
1952
|
}
|
|
1802
1953
|
for (let attempt = 1; attempt <= STATIC_RENDITION_MAX_ATTEMPTS; attempt++) {
|
|
1803
|
-
await
|
|
1804
|
-
currentAsset = await
|
|
1954
|
+
await sleep(STATIC_RENDITION_POLL_INTERVAL_MS);
|
|
1955
|
+
currentAsset = await mux.video.assets.retrieve(assetId);
|
|
1805
1956
|
if (hasReadyAudioStaticRendition(currentAsset)) {
|
|
1806
|
-
console.log("\u2705 Audio static rendition is ready!");
|
|
1807
1957
|
return currentAsset;
|
|
1808
1958
|
}
|
|
1809
1959
|
const currentStatus = currentAsset.static_renditions?.status || "unknown";
|
|
1810
|
-
console.
|
|
1960
|
+
console.warn(
|
|
1811
1961
|
`\u231B Waiting for static rendition (attempt ${attempt}/${STATIC_RENDITION_MAX_ATTEMPTS}) \u2192 ${currentStatus}`
|
|
1812
1962
|
);
|
|
1813
1963
|
if (currentStatus === "errored") {
|
|
@@ -1820,55 +1970,179 @@ async function waitForAudioStaticRendition({
|
|
|
1820
1970
|
"Timed out waiting for the static rendition to become ready. Please try again in a moment."
|
|
1821
1971
|
);
|
|
1822
1972
|
}
|
|
1973
|
+
async function fetchAudioFromMux(audioUrl) {
|
|
1974
|
+
"use step";
|
|
1975
|
+
const audioResponse = await fetch(audioUrl);
|
|
1976
|
+
if (!audioResponse.ok) {
|
|
1977
|
+
throw new Error(`Failed to fetch audio file: ${audioResponse.statusText}`);
|
|
1978
|
+
}
|
|
1979
|
+
return audioResponse.arrayBuffer();
|
|
1980
|
+
}
|
|
1981
|
+
async function createElevenLabsDubbingJob({
|
|
1982
|
+
audioBuffer,
|
|
1983
|
+
assetId,
|
|
1984
|
+
elevenLabsLangCode,
|
|
1985
|
+
numSpeakers
|
|
1986
|
+
}) {
|
|
1987
|
+
"use step";
|
|
1988
|
+
const elevenLabsApiKey = getApiKeyFromEnv("elevenlabs");
|
|
1989
|
+
const audioBlob = new Blob([audioBuffer], { type: "audio/mp4" });
|
|
1990
|
+
const formData = new FormData();
|
|
1991
|
+
formData.append("file", audioBlob);
|
|
1992
|
+
formData.append("target_lang", elevenLabsLangCode);
|
|
1993
|
+
formData.append("num_speakers", numSpeakers.toString());
|
|
1994
|
+
formData.append("name", `Mux Asset ${assetId} - auto to ${elevenLabsLangCode}`);
|
|
1995
|
+
const dubbingResponse = await fetch("https://api.elevenlabs.io/v1/dubbing", {
|
|
1996
|
+
method: "POST",
|
|
1997
|
+
headers: {
|
|
1998
|
+
"xi-api-key": elevenLabsApiKey
|
|
1999
|
+
},
|
|
2000
|
+
body: formData
|
|
2001
|
+
});
|
|
2002
|
+
if (!dubbingResponse.ok) {
|
|
2003
|
+
throw new Error(`ElevenLabs API error: ${dubbingResponse.statusText}`);
|
|
2004
|
+
}
|
|
2005
|
+
const dubbingData = await dubbingResponse.json();
|
|
2006
|
+
return dubbingData.dubbing_id;
|
|
2007
|
+
}
|
|
2008
|
+
async function checkElevenLabsDubbingStatus({
|
|
2009
|
+
dubbingId
|
|
2010
|
+
}) {
|
|
2011
|
+
"use step";
|
|
2012
|
+
const elevenLabsApiKey = getApiKeyFromEnv("elevenlabs");
|
|
2013
|
+
const statusResponse = await fetch(`https://api.elevenlabs.io/v1/dubbing/${dubbingId}`, {
|
|
2014
|
+
headers: {
|
|
2015
|
+
"xi-api-key": elevenLabsApiKey
|
|
2016
|
+
}
|
|
2017
|
+
});
|
|
2018
|
+
if (!statusResponse.ok) {
|
|
2019
|
+
throw new Error(`Status check failed: ${statusResponse.statusText}`);
|
|
2020
|
+
}
|
|
2021
|
+
const statusData = await statusResponse.json();
|
|
2022
|
+
return {
|
|
2023
|
+
status: statusData.status,
|
|
2024
|
+
targetLanguages: statusData.target_languages ?? []
|
|
2025
|
+
};
|
|
2026
|
+
}
|
|
2027
|
+
async function downloadDubbedAudioFromElevenLabs({
|
|
2028
|
+
dubbingId,
|
|
2029
|
+
languageCode
|
|
2030
|
+
}) {
|
|
2031
|
+
"use step";
|
|
2032
|
+
const elevenLabsApiKey = getApiKeyFromEnv("elevenlabs");
|
|
2033
|
+
const audioUrl = `https://api.elevenlabs.io/v1/dubbing/${dubbingId}/audio/${languageCode}`;
|
|
2034
|
+
const audioResponse = await fetch(audioUrl, {
|
|
2035
|
+
headers: {
|
|
2036
|
+
"xi-api-key": elevenLabsApiKey
|
|
2037
|
+
}
|
|
2038
|
+
});
|
|
2039
|
+
if (!audioResponse.ok) {
|
|
2040
|
+
throw new Error(`Failed to fetch dubbed audio: ${audioResponse.statusText}`);
|
|
2041
|
+
}
|
|
2042
|
+
return audioResponse.arrayBuffer();
|
|
2043
|
+
}
|
|
2044
|
+
async function uploadDubbedAudioToS3({
|
|
2045
|
+
dubbedAudioBuffer,
|
|
2046
|
+
assetId,
|
|
2047
|
+
toLanguageCode,
|
|
2048
|
+
s3Endpoint,
|
|
2049
|
+
s3Region,
|
|
2050
|
+
s3Bucket
|
|
2051
|
+
}) {
|
|
2052
|
+
"use step";
|
|
2053
|
+
const { S3Client, GetObjectCommand } = await import("@aws-sdk/client-s3");
|
|
2054
|
+
const { Upload } = await import("@aws-sdk/lib-storage");
|
|
2055
|
+
const { getSignedUrl } = await import("@aws-sdk/s3-request-presigner");
|
|
2056
|
+
const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
|
|
2057
|
+
const s3SecretAccessKey = env_default.S3_SECRET_ACCESS_KEY;
|
|
2058
|
+
const s3Client = new S3Client({
|
|
2059
|
+
region: s3Region,
|
|
2060
|
+
endpoint: s3Endpoint,
|
|
2061
|
+
credentials: {
|
|
2062
|
+
accessKeyId: s3AccessKeyId,
|
|
2063
|
+
secretAccessKey: s3SecretAccessKey
|
|
2064
|
+
},
|
|
2065
|
+
forcePathStyle: true
|
|
2066
|
+
});
|
|
2067
|
+
const audioKey = `audio-translations/${assetId}/auto-to-${toLanguageCode}-${Date.now()}.m4a`;
|
|
2068
|
+
const upload = new Upload({
|
|
2069
|
+
client: s3Client,
|
|
2070
|
+
params: {
|
|
2071
|
+
Bucket: s3Bucket,
|
|
2072
|
+
Key: audioKey,
|
|
2073
|
+
Body: new Uint8Array(dubbedAudioBuffer),
|
|
2074
|
+
ContentType: "audio/mp4"
|
|
2075
|
+
}
|
|
2076
|
+
});
|
|
2077
|
+
await upload.done();
|
|
2078
|
+
const getObjectCommand = new GetObjectCommand({
|
|
2079
|
+
Bucket: s3Bucket,
|
|
2080
|
+
Key: audioKey
|
|
2081
|
+
});
|
|
2082
|
+
const presignedUrl = await getSignedUrl(s3Client, getObjectCommand, {
|
|
2083
|
+
expiresIn: 3600
|
|
2084
|
+
// 1 hour
|
|
2085
|
+
});
|
|
2086
|
+
console.warn(`\u2705 Audio uploaded successfully to: ${audioKey}`);
|
|
2087
|
+
console.warn(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
|
|
2088
|
+
return presignedUrl;
|
|
2089
|
+
}
|
|
2090
|
+
async function createAudioTrackOnMux(assetId, languageCode, presignedUrl) {
|
|
2091
|
+
"use step";
|
|
2092
|
+
const { muxTokenId, muxTokenSecret } = getMuxCredentialsFromEnv();
|
|
2093
|
+
const mux = new Mux3({
|
|
2094
|
+
tokenId: muxTokenId,
|
|
2095
|
+
tokenSecret: muxTokenSecret
|
|
2096
|
+
});
|
|
2097
|
+
const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(languageCode) || languageCode.toUpperCase();
|
|
2098
|
+
const trackName = `${languageName} (auto-dubbed)`;
|
|
2099
|
+
const trackResponse = await mux.video.assets.createTrack(assetId, {
|
|
2100
|
+
type: "audio",
|
|
2101
|
+
language_code: languageCode,
|
|
2102
|
+
name: trackName,
|
|
2103
|
+
url: presignedUrl
|
|
2104
|
+
});
|
|
2105
|
+
if (!trackResponse.id) {
|
|
2106
|
+
throw new Error("Failed to create audio track: no track ID returned from Mux");
|
|
2107
|
+
}
|
|
2108
|
+
return trackResponse.id;
|
|
2109
|
+
}
|
|
1823
2110
|
async function translateAudio(assetId, toLanguageCode, options = {}) {
|
|
2111
|
+
"use workflow";
|
|
1824
2112
|
const {
|
|
1825
2113
|
provider = "elevenlabs",
|
|
1826
2114
|
numSpeakers = 0,
|
|
1827
2115
|
// 0 = auto-detect
|
|
1828
|
-
muxTokenId,
|
|
1829
|
-
muxTokenSecret,
|
|
1830
2116
|
elevenLabsApiKey,
|
|
1831
2117
|
uploadToMux = true
|
|
1832
2118
|
} = options;
|
|
1833
2119
|
if (provider !== "elevenlabs") {
|
|
1834
2120
|
throw new Error("Only ElevenLabs provider is currently supported for audio translation");
|
|
1835
2121
|
}
|
|
1836
|
-
const muxId = muxTokenId ?? env_default.MUX_TOKEN_ID;
|
|
1837
|
-
const muxSecret = muxTokenSecret ?? env_default.MUX_TOKEN_SECRET;
|
|
1838
2122
|
const elevenLabsKey = elevenLabsApiKey ?? env_default.ELEVENLABS_API_KEY;
|
|
1839
2123
|
const s3Endpoint = options.s3Endpoint ?? env_default.S3_ENDPOINT;
|
|
1840
2124
|
const s3Region = options.s3Region ?? env_default.S3_REGION ?? "auto";
|
|
1841
2125
|
const s3Bucket = options.s3Bucket ?? env_default.S3_BUCKET;
|
|
1842
|
-
const s3AccessKeyId =
|
|
1843
|
-
const s3SecretAccessKey =
|
|
1844
|
-
if (!muxId || !muxSecret) {
|
|
1845
|
-
throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
|
|
1846
|
-
}
|
|
2126
|
+
const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
|
|
2127
|
+
const s3SecretAccessKey = env_default.S3_SECRET_ACCESS_KEY;
|
|
1847
2128
|
if (!elevenLabsKey) {
|
|
1848
2129
|
throw new Error("ElevenLabs API key is required. Provide elevenLabsApiKey in options or set ELEVENLABS_API_KEY environment variable.");
|
|
1849
2130
|
}
|
|
1850
2131
|
if (uploadToMux && (!s3Endpoint || !s3Bucket || !s3AccessKeyId || !s3SecretAccessKey)) {
|
|
1851
2132
|
throw new Error("S3 configuration is required for uploading to Mux. Provide s3Endpoint, s3Bucket, s3AccessKeyId, and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables.");
|
|
1852
2133
|
}
|
|
1853
|
-
const
|
|
1854
|
-
|
|
1855
|
-
tokenSecret: muxSecret
|
|
1856
|
-
});
|
|
1857
|
-
console.log(`\u{1F3AC} Fetching Mux asset: ${assetId}`);
|
|
1858
|
-
const { asset: initialAsset, playbackId, policy } = await getPlaybackIdForAsset(mux, assetId);
|
|
1859
|
-
const signingContext = resolveSigningContext(options);
|
|
2134
|
+
const { asset: initialAsset, playbackId, policy } = await getPlaybackIdForAsset(assetId);
|
|
2135
|
+
const signingContext = getMuxSigningContextFromEnv();
|
|
1860
2136
|
if (policy === "signed" && !signingContext) {
|
|
1861
2137
|
throw new Error(
|
|
1862
2138
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
1863
2139
|
);
|
|
1864
2140
|
}
|
|
1865
|
-
console.log("\u{1F50D} Checking for audio-only static rendition...");
|
|
1866
2141
|
let currentAsset = initialAsset;
|
|
1867
2142
|
if (!hasReadyAudioStaticRendition(currentAsset)) {
|
|
1868
|
-
console.
|
|
2143
|
+
console.warn("\u274C No ready audio static rendition found. Requesting one now...");
|
|
1869
2144
|
currentAsset = await waitForAudioStaticRendition({
|
|
1870
2145
|
assetId,
|
|
1871
|
-
muxClient: mux,
|
|
1872
2146
|
initialAsset: currentAsset
|
|
1873
2147
|
});
|
|
1874
2148
|
}
|
|
@@ -1882,58 +2156,42 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
|
|
|
1882
2156
|
if (policy === "signed" && signingContext) {
|
|
1883
2157
|
audioUrl = await signUrl(audioUrl, playbackId, signingContext, "video");
|
|
1884
2158
|
}
|
|
1885
|
-
console.
|
|
1886
|
-
|
|
2159
|
+
console.warn("\u{1F399}\uFE0F Fetching audio from Mux...");
|
|
2160
|
+
let audioBuffer;
|
|
2161
|
+
try {
|
|
2162
|
+
audioBuffer = await fetchAudioFromMux(audioUrl);
|
|
2163
|
+
} catch (error) {
|
|
2164
|
+
throw new Error(`Failed to fetch audio from Mux: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2165
|
+
}
|
|
2166
|
+
console.warn("\u{1F399}\uFE0F Creating dubbing job in ElevenLabs...");
|
|
2167
|
+
const elevenLabsLangCode = toISO639_3(toLanguageCode);
|
|
2168
|
+
console.warn(`\u{1F50D} Creating dubbing job for asset ${assetId} with language code: ${elevenLabsLangCode}`);
|
|
1887
2169
|
let dubbingId;
|
|
1888
2170
|
try {
|
|
1889
|
-
|
|
1890
|
-
|
|
1891
|
-
|
|
1892
|
-
|
|
1893
|
-
|
|
1894
|
-
const audioBlob = new Blob([audioBuffer], { type: "audio/mp4" });
|
|
1895
|
-
const audioFile = audioBlob;
|
|
1896
|
-
const formData = new FormData();
|
|
1897
|
-
formData.append("file", audioFile);
|
|
1898
|
-
formData.append("target_lang", toLanguageCode);
|
|
1899
|
-
formData.append("num_speakers", numSpeakers.toString());
|
|
1900
|
-
formData.append("name", `Mux Asset ${assetId} - auto to ${toLanguageCode}`);
|
|
1901
|
-
const dubbingResponse = await fetch("https://api.elevenlabs.io/v1/dubbing", {
|
|
1902
|
-
method: "POST",
|
|
1903
|
-
headers: {
|
|
1904
|
-
"xi-api-key": elevenLabsKey
|
|
1905
|
-
},
|
|
1906
|
-
body: formData
|
|
2171
|
+
dubbingId = await createElevenLabsDubbingJob({
|
|
2172
|
+
audioBuffer,
|
|
2173
|
+
assetId,
|
|
2174
|
+
elevenLabsLangCode,
|
|
2175
|
+
numSpeakers
|
|
1907
2176
|
});
|
|
1908
|
-
|
|
1909
|
-
throw new Error(`ElevenLabs API error: ${dubbingResponse.statusText}`);
|
|
1910
|
-
}
|
|
1911
|
-
const dubbingData = await dubbingResponse.json();
|
|
1912
|
-
dubbingId = dubbingData.dubbing_id;
|
|
1913
|
-
console.log(`\u2705 Dubbing job created: ${dubbingId}`);
|
|
1914
|
-
console.log(`\u23F1\uFE0F Expected duration: ${dubbingData.expected_duration_sec}s`);
|
|
2177
|
+
console.warn(`\u2705 Dubbing job created with ID: ${dubbingId}`);
|
|
1915
2178
|
} catch (error) {
|
|
1916
2179
|
throw new Error(`Failed to create ElevenLabs dubbing job: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1917
2180
|
}
|
|
1918
|
-
console.
|
|
2181
|
+
console.warn("\u23F3 Waiting for dubbing to complete...");
|
|
1919
2182
|
let dubbingStatus = "dubbing";
|
|
1920
2183
|
let pollAttempts = 0;
|
|
1921
2184
|
const maxPollAttempts = 180;
|
|
2185
|
+
let targetLanguages = [];
|
|
1922
2186
|
while (dubbingStatus === "dubbing" && pollAttempts < maxPollAttempts) {
|
|
1923
|
-
await
|
|
2187
|
+
await sleep(1e4);
|
|
1924
2188
|
pollAttempts++;
|
|
1925
2189
|
try {
|
|
1926
|
-
const
|
|
1927
|
-
|
|
1928
|
-
"xi-api-key": elevenLabsKey
|
|
1929
|
-
}
|
|
2190
|
+
const statusResult = await checkElevenLabsDubbingStatus({
|
|
2191
|
+
dubbingId
|
|
1930
2192
|
});
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
}
|
|
1934
|
-
const statusData = await statusResponse.json();
|
|
1935
|
-
dubbingStatus = statusData.status;
|
|
1936
|
-
console.log(`\u{1F4CA} Status check ${pollAttempts}: ${dubbingStatus}`);
|
|
2193
|
+
dubbingStatus = statusResult.status;
|
|
2194
|
+
targetLanguages = statusResult.targetLanguages;
|
|
1937
2195
|
if (dubbingStatus === "failed") {
|
|
1938
2196
|
throw new Error("ElevenLabs dubbing job failed");
|
|
1939
2197
|
}
|
|
@@ -1944,89 +2202,74 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
|
|
|
1944
2202
|
if (dubbingStatus !== "dubbed") {
|
|
1945
2203
|
throw new Error(`Dubbing job timed out or failed. Final status: ${dubbingStatus}`);
|
|
1946
2204
|
}
|
|
1947
|
-
console.
|
|
2205
|
+
console.warn("\u2705 Dubbing completed successfully!");
|
|
1948
2206
|
if (!uploadToMux) {
|
|
2207
|
+
const targetLanguage2 = getLanguageCodePair(toLanguageCode);
|
|
1949
2208
|
return {
|
|
1950
2209
|
assetId,
|
|
1951
|
-
targetLanguageCode:
|
|
2210
|
+
targetLanguageCode: targetLanguage2.iso639_1,
|
|
2211
|
+
targetLanguage: targetLanguage2,
|
|
1952
2212
|
dubbingId
|
|
1953
2213
|
};
|
|
1954
2214
|
}
|
|
1955
|
-
console.
|
|
2215
|
+
console.warn("\u{1F4E5} Downloading dubbed audio from ElevenLabs...");
|
|
1956
2216
|
let dubbedAudioBuffer;
|
|
1957
2217
|
try {
|
|
1958
|
-
const
|
|
1959
|
-
|
|
1960
|
-
|
|
1961
|
-
|
|
1962
|
-
|
|
1963
|
-
|
|
1964
|
-
if (!
|
|
1965
|
-
|
|
2218
|
+
const requestedLangCode = toISO639_3(toLanguageCode);
|
|
2219
|
+
let downloadLangCode = targetLanguages.find(
|
|
2220
|
+
(lang) => lang === requestedLangCode
|
|
2221
|
+
) ?? targetLanguages.find(
|
|
2222
|
+
(lang) => lang.toLowerCase() === requestedLangCode.toLowerCase()
|
|
2223
|
+
);
|
|
2224
|
+
if (!downloadLangCode && targetLanguages.length > 0) {
|
|
2225
|
+
downloadLangCode = targetLanguages[0];
|
|
2226
|
+
console.warn(`\u26A0\uFE0F Requested language "${requestedLangCode}" not found in target_languages. Using "${downloadLangCode}" instead.`);
|
|
2227
|
+
}
|
|
2228
|
+
if (!downloadLangCode) {
|
|
2229
|
+
downloadLangCode = requestedLangCode;
|
|
2230
|
+
console.warn(`\u26A0\uFE0F No target_languages available from ElevenLabs status. Using requested language code: ${requestedLangCode}`);
|
|
1966
2231
|
}
|
|
1967
|
-
dubbedAudioBuffer = await
|
|
1968
|
-
|
|
2232
|
+
dubbedAudioBuffer = await downloadDubbedAudioFromElevenLabs({
|
|
2233
|
+
dubbingId,
|
|
2234
|
+
languageCode: downloadLangCode
|
|
2235
|
+
});
|
|
2236
|
+
console.warn("\u2705 Dubbed audio downloaded successfully!");
|
|
1969
2237
|
} catch (error) {
|
|
1970
2238
|
throw new Error(`Failed to download dubbed audio: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1971
2239
|
}
|
|
1972
|
-
console.
|
|
1973
|
-
const s3Client = new import_client_s3.S3Client({
|
|
1974
|
-
region: s3Region,
|
|
1975
|
-
endpoint: s3Endpoint,
|
|
1976
|
-
credentials: {
|
|
1977
|
-
accessKeyId: s3AccessKeyId,
|
|
1978
|
-
secretAccessKey: s3SecretAccessKey
|
|
1979
|
-
},
|
|
1980
|
-
forcePathStyle: true
|
|
1981
|
-
});
|
|
1982
|
-
const audioKey = `audio-translations/${assetId}/auto-to-${toLanguageCode}-${Date.now()}.m4a`;
|
|
2240
|
+
console.warn("\u{1F4E4} Uploading dubbed audio to S3-compatible storage...");
|
|
1983
2241
|
let presignedUrl;
|
|
1984
2242
|
try {
|
|
1985
|
-
|
|
1986
|
-
|
|
1987
|
-
|
|
1988
|
-
|
|
1989
|
-
|
|
1990
|
-
|
|
1991
|
-
|
|
1992
|
-
}
|
|
1993
|
-
});
|
|
1994
|
-
await upload.done();
|
|
1995
|
-
console.log(`\u2705 Audio uploaded successfully to: ${audioKey}`);
|
|
1996
|
-
const getObjectCommand = new import_client_s3.GetObjectCommand({
|
|
1997
|
-
Bucket: s3Bucket,
|
|
1998
|
-
Key: audioKey
|
|
1999
|
-
});
|
|
2000
|
-
presignedUrl = await (0, import_s3_request_presigner.getSignedUrl)(s3Client, getObjectCommand, {
|
|
2001
|
-
expiresIn: 3600
|
|
2002
|
-
// 1 hour
|
|
2243
|
+
presignedUrl = await uploadDubbedAudioToS3({
|
|
2244
|
+
dubbedAudioBuffer,
|
|
2245
|
+
assetId,
|
|
2246
|
+
toLanguageCode,
|
|
2247
|
+
s3Endpoint,
|
|
2248
|
+
s3Region,
|
|
2249
|
+
s3Bucket
|
|
2003
2250
|
});
|
|
2004
|
-
console.log(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
|
|
2005
2251
|
} catch (error) {
|
|
2006
2252
|
throw new Error(`Failed to upload audio to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2007
2253
|
}
|
|
2008
|
-
console.
|
|
2254
|
+
console.warn("\u{1F4F9} Adding dubbed audio track to Mux asset...");
|
|
2009
2255
|
let uploadedTrackId;
|
|
2256
|
+
const muxLangCode = toISO639_1(toLanguageCode);
|
|
2010
2257
|
try {
|
|
2011
|
-
|
|
2258
|
+
uploadedTrackId = await createAudioTrackOnMux(assetId, muxLangCode, presignedUrl);
|
|
2259
|
+
const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(muxLangCode) || muxLangCode.toUpperCase();
|
|
2012
2260
|
const trackName = `${languageName} (auto-dubbed)`;
|
|
2013
|
-
|
|
2014
|
-
|
|
2015
|
-
language_code: toLanguageCode,
|
|
2016
|
-
name: trackName,
|
|
2017
|
-
url: presignedUrl
|
|
2018
|
-
});
|
|
2019
|
-
uploadedTrackId = trackResponse.id;
|
|
2020
|
-
console.log(`\u2705 Audio track added to Mux asset with ID: ${uploadedTrackId}`);
|
|
2021
|
-
console.log(`\u{1F3B5} Track name: "${trackName}"`);
|
|
2261
|
+
console.warn(`\u2705 Track added to Mux asset with ID: ${uploadedTrackId}`);
|
|
2262
|
+
console.warn(`\u{1F4CB} Track name: "${trackName}"`);
|
|
2022
2263
|
} catch (error) {
|
|
2023
2264
|
console.warn(`\u26A0\uFE0F Failed to add audio track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2024
|
-
console.
|
|
2025
|
-
console.
|
|
2265
|
+
console.warn("\u{1F517} You can manually add the track using this presigned URL:");
|
|
2266
|
+
console.warn(presignedUrl);
|
|
2026
2267
|
}
|
|
2268
|
+
const targetLanguage = getLanguageCodePair(toLanguageCode);
|
|
2027
2269
|
return {
|
|
2028
2270
|
assetId,
|
|
2029
|
-
targetLanguageCode:
|
|
2271
|
+
targetLanguageCode: targetLanguage.iso639_1,
|
|
2272
|
+
targetLanguage,
|
|
2030
2273
|
dubbingId,
|
|
2031
2274
|
uploadedTrackId,
|
|
2032
2275
|
presignedUrl
|
|
@@ -2034,43 +2277,143 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
|
|
|
2034
2277
|
}
|
|
2035
2278
|
|
|
2036
2279
|
// src/workflows/translate-captions.ts
|
|
2037
|
-
|
|
2038
|
-
|
|
2039
|
-
|
|
2040
|
-
var
|
|
2041
|
-
|
|
2042
|
-
var translationSchema = import_zod5.z.object({
|
|
2043
|
-
translation: import_zod5.z.string()
|
|
2280
|
+
import Mux4 from "@mux/mux-node";
|
|
2281
|
+
import { generateObject as generateObject4 } from "ai";
|
|
2282
|
+
import { z as z5 } from "zod";
|
|
2283
|
+
var translationSchema = z5.object({
|
|
2284
|
+
translation: z5.string()
|
|
2044
2285
|
});
|
|
2045
|
-
|
|
2286
|
+
async function fetchVttFromMux(vttUrl) {
|
|
2287
|
+
"use step";
|
|
2288
|
+
const vttResponse = await fetch(vttUrl);
|
|
2289
|
+
if (!vttResponse.ok) {
|
|
2290
|
+
throw new Error(`Failed to fetch VTT file: ${vttResponse.statusText}`);
|
|
2291
|
+
}
|
|
2292
|
+
return vttResponse.text();
|
|
2293
|
+
}
|
|
2294
|
+
async function translateVttWithAI({
|
|
2295
|
+
vttContent,
|
|
2296
|
+
fromLanguageCode,
|
|
2297
|
+
toLanguageCode,
|
|
2298
|
+
provider,
|
|
2299
|
+
modelId,
|
|
2300
|
+
abortSignal
|
|
2301
|
+
}) {
|
|
2302
|
+
"use step";
|
|
2303
|
+
const languageModel = createLanguageModelFromConfig(provider, modelId);
|
|
2304
|
+
const response = await generateObject4({
|
|
2305
|
+
model: languageModel,
|
|
2306
|
+
schema: translationSchema,
|
|
2307
|
+
abortSignal,
|
|
2308
|
+
messages: [
|
|
2309
|
+
{
|
|
2310
|
+
role: "user",
|
|
2311
|
+
content: `Translate the following VTT subtitle file from ${fromLanguageCode} to ${toLanguageCode}. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT.
|
|
2312
|
+
|
|
2313
|
+
${vttContent}`
|
|
2314
|
+
}
|
|
2315
|
+
]
|
|
2316
|
+
});
|
|
2317
|
+
return {
|
|
2318
|
+
translatedVtt: response.object.translation,
|
|
2319
|
+
usage: {
|
|
2320
|
+
inputTokens: response.usage.inputTokens,
|
|
2321
|
+
outputTokens: response.usage.outputTokens,
|
|
2322
|
+
totalTokens: response.usage.totalTokens,
|
|
2323
|
+
reasoningTokens: response.usage.reasoningTokens,
|
|
2324
|
+
cachedInputTokens: response.usage.cachedInputTokens
|
|
2325
|
+
}
|
|
2326
|
+
};
|
|
2327
|
+
}
|
|
2328
|
+
async function uploadVttToS3({
|
|
2329
|
+
translatedVtt,
|
|
2330
|
+
assetId,
|
|
2331
|
+
fromLanguageCode,
|
|
2332
|
+
toLanguageCode,
|
|
2333
|
+
s3Endpoint,
|
|
2334
|
+
s3Region,
|
|
2335
|
+
s3Bucket
|
|
2336
|
+
}) {
|
|
2337
|
+
"use step";
|
|
2338
|
+
const { S3Client, GetObjectCommand } = await import("@aws-sdk/client-s3");
|
|
2339
|
+
const { Upload } = await import("@aws-sdk/lib-storage");
|
|
2340
|
+
const { getSignedUrl } = await import("@aws-sdk/s3-request-presigner");
|
|
2341
|
+
const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
|
|
2342
|
+
const s3SecretAccessKey = env_default.S3_SECRET_ACCESS_KEY;
|
|
2343
|
+
const s3Client = new S3Client({
|
|
2344
|
+
region: s3Region,
|
|
2345
|
+
endpoint: s3Endpoint,
|
|
2346
|
+
credentials: {
|
|
2347
|
+
accessKeyId: s3AccessKeyId,
|
|
2348
|
+
secretAccessKey: s3SecretAccessKey
|
|
2349
|
+
},
|
|
2350
|
+
forcePathStyle: true
|
|
2351
|
+
});
|
|
2352
|
+
const vttKey = `translations/${assetId}/${fromLanguageCode}-to-${toLanguageCode}-${Date.now()}.vtt`;
|
|
2353
|
+
const upload = new Upload({
|
|
2354
|
+
client: s3Client,
|
|
2355
|
+
params: {
|
|
2356
|
+
Bucket: s3Bucket,
|
|
2357
|
+
Key: vttKey,
|
|
2358
|
+
Body: translatedVtt,
|
|
2359
|
+
ContentType: "text/vtt"
|
|
2360
|
+
}
|
|
2361
|
+
});
|
|
2362
|
+
await upload.done();
|
|
2363
|
+
const getObjectCommand = new GetObjectCommand({
|
|
2364
|
+
Bucket: s3Bucket,
|
|
2365
|
+
Key: vttKey
|
|
2366
|
+
});
|
|
2367
|
+
const presignedUrl = await getSignedUrl(s3Client, getObjectCommand, {
|
|
2368
|
+
expiresIn: 3600
|
|
2369
|
+
// 1 hour
|
|
2370
|
+
});
|
|
2371
|
+
return presignedUrl;
|
|
2372
|
+
}
|
|
2373
|
+
async function createTextTrackOnMux(assetId, languageCode, trackName, presignedUrl) {
|
|
2374
|
+
"use step";
|
|
2375
|
+
const { muxTokenId, muxTokenSecret } = getMuxCredentialsFromEnv();
|
|
2376
|
+
const mux = new Mux4({
|
|
2377
|
+
tokenId: muxTokenId,
|
|
2378
|
+
tokenSecret: muxTokenSecret
|
|
2379
|
+
});
|
|
2380
|
+
const trackResponse = await mux.video.assets.createTrack(assetId, {
|
|
2381
|
+
type: "text",
|
|
2382
|
+
text_type: "subtitles",
|
|
2383
|
+
language_code: languageCode,
|
|
2384
|
+
name: trackName,
|
|
2385
|
+
url: presignedUrl
|
|
2386
|
+
});
|
|
2387
|
+
if (!trackResponse.id) {
|
|
2388
|
+
throw new Error("Failed to create text track: no track ID returned from Mux");
|
|
2389
|
+
}
|
|
2390
|
+
return trackResponse.id;
|
|
2391
|
+
}
|
|
2046
2392
|
async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, options) {
|
|
2393
|
+
"use workflow";
|
|
2047
2394
|
const {
|
|
2048
|
-
provider =
|
|
2395
|
+
provider = "openai",
|
|
2049
2396
|
model,
|
|
2050
2397
|
s3Endpoint: providedS3Endpoint,
|
|
2051
2398
|
s3Region: providedS3Region,
|
|
2052
2399
|
s3Bucket: providedS3Bucket,
|
|
2053
|
-
|
|
2054
|
-
s3SecretAccessKey: providedS3SecretAccessKey,
|
|
2055
|
-
uploadToMux: uploadToMuxOption,
|
|
2056
|
-
...clientConfig
|
|
2400
|
+
uploadToMux: uploadToMuxOption
|
|
2057
2401
|
} = options;
|
|
2058
|
-
const resolvedProvider = provider;
|
|
2059
2402
|
const s3Endpoint = providedS3Endpoint ?? env_default.S3_ENDPOINT;
|
|
2060
2403
|
const s3Region = providedS3Region ?? env_default.S3_REGION ?? "auto";
|
|
2061
2404
|
const s3Bucket = providedS3Bucket ?? env_default.S3_BUCKET;
|
|
2062
|
-
const s3AccessKeyId =
|
|
2063
|
-
const s3SecretAccessKey =
|
|
2405
|
+
const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
|
|
2406
|
+
const s3SecretAccessKey = env_default.S3_SECRET_ACCESS_KEY;
|
|
2064
2407
|
const uploadToMux = uploadToMuxOption !== false;
|
|
2065
|
-
const
|
|
2066
|
-
{ ...
|
|
2067
|
-
|
|
2408
|
+
const config = await createWorkflowConfig(
|
|
2409
|
+
{ ...options, model },
|
|
2410
|
+
provider
|
|
2068
2411
|
);
|
|
2069
2412
|
if (uploadToMux && (!s3Endpoint || !s3Bucket || !s3AccessKeyId || !s3SecretAccessKey)) {
|
|
2070
2413
|
throw new Error("S3 configuration is required for uploading to Mux. Provide s3Endpoint, s3Bucket, s3AccessKeyId, and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables.");
|
|
2071
2414
|
}
|
|
2072
|
-
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(
|
|
2073
|
-
const signingContext =
|
|
2415
|
+
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(assetId);
|
|
2416
|
+
const signingContext = getMuxSigningContextFromEnv();
|
|
2074
2417
|
if (policy === "signed" && !signingContext) {
|
|
2075
2418
|
throw new Error(
|
|
2076
2419
|
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
@@ -2091,115 +2434,76 @@ async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, opti
|
|
|
2091
2434
|
}
|
|
2092
2435
|
let vttContent;
|
|
2093
2436
|
try {
|
|
2094
|
-
|
|
2095
|
-
if (!vttResponse.ok) {
|
|
2096
|
-
throw new Error(`Failed to fetch VTT file: ${vttResponse.statusText}`);
|
|
2097
|
-
}
|
|
2098
|
-
vttContent = await vttResponse.text();
|
|
2437
|
+
vttContent = await fetchVttFromMux(vttUrl);
|
|
2099
2438
|
} catch (error) {
|
|
2100
2439
|
throw new Error(`Failed to fetch VTT content: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2101
2440
|
}
|
|
2102
|
-
console.log(`\u2705 Found VTT content for language '${fromLanguageCode}'`);
|
|
2103
2441
|
let translatedVtt;
|
|
2442
|
+
let usage;
|
|
2104
2443
|
try {
|
|
2105
|
-
const
|
|
2106
|
-
|
|
2107
|
-
|
|
2108
|
-
|
|
2109
|
-
|
|
2110
|
-
|
|
2111
|
-
|
|
2112
|
-
content: `Translate the following VTT subtitle file from ${fromLanguageCode} to ${toLanguageCode}. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT.
|
|
2113
|
-
|
|
2114
|
-
${vttContent}`
|
|
2115
|
-
}
|
|
2116
|
-
]
|
|
2444
|
+
const result = await translateVttWithAI({
|
|
2445
|
+
vttContent,
|
|
2446
|
+
fromLanguageCode,
|
|
2447
|
+
toLanguageCode,
|
|
2448
|
+
provider: config.provider,
|
|
2449
|
+
modelId: config.modelId,
|
|
2450
|
+
abortSignal: options.abortSignal
|
|
2117
2451
|
});
|
|
2118
|
-
translatedVtt =
|
|
2452
|
+
translatedVtt = result.translatedVtt;
|
|
2453
|
+
usage = result.usage;
|
|
2119
2454
|
} catch (error) {
|
|
2120
|
-
throw new Error(`Failed to translate VTT with ${
|
|
2455
|
+
throw new Error(`Failed to translate VTT with ${config.provider}: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2121
2456
|
}
|
|
2122
|
-
|
|
2123
|
-
|
|
2457
|
+
const sourceLanguage = getLanguageCodePair(fromLanguageCode);
|
|
2458
|
+
const targetLanguage = getLanguageCodePair(toLanguageCode);
|
|
2124
2459
|
if (!uploadToMux) {
|
|
2125
|
-
console.log(`\u2705 VTT translated to ${toLanguageCode} successfully!`);
|
|
2126
2460
|
return {
|
|
2127
2461
|
assetId,
|
|
2128
2462
|
sourceLanguageCode: fromLanguageCode,
|
|
2129
2463
|
targetLanguageCode: toLanguageCode,
|
|
2464
|
+
sourceLanguage,
|
|
2465
|
+
targetLanguage,
|
|
2130
2466
|
originalVtt: vttContent,
|
|
2131
|
-
translatedVtt
|
|
2467
|
+
translatedVtt,
|
|
2468
|
+
usage
|
|
2132
2469
|
};
|
|
2133
2470
|
}
|
|
2134
|
-
console.log("\u{1F4E4} Uploading translated VTT to S3-compatible storage...");
|
|
2135
|
-
const s3Client = new import_client_s32.S3Client({
|
|
2136
|
-
region: s3Region,
|
|
2137
|
-
endpoint: s3Endpoint,
|
|
2138
|
-
credentials: {
|
|
2139
|
-
accessKeyId: s3AccessKeyId,
|
|
2140
|
-
secretAccessKey: s3SecretAccessKey
|
|
2141
|
-
},
|
|
2142
|
-
forcePathStyle: true
|
|
2143
|
-
// Often needed for non-AWS S3 services
|
|
2144
|
-
});
|
|
2145
|
-
const vttKey = `translations/${assetId}/${fromLanguageCode}-to-${toLanguageCode}-${Date.now()}.vtt`;
|
|
2146
2471
|
let presignedUrl;
|
|
2147
2472
|
try {
|
|
2148
|
-
|
|
2149
|
-
|
|
2150
|
-
|
|
2151
|
-
|
|
2152
|
-
|
|
2153
|
-
|
|
2154
|
-
|
|
2155
|
-
|
|
2156
|
-
});
|
|
2157
|
-
await upload.done();
|
|
2158
|
-
console.log(`\u2705 VTT uploaded successfully to: ${vttKey}`);
|
|
2159
|
-
const getObjectCommand = new import_client_s32.GetObjectCommand({
|
|
2160
|
-
Bucket: s3Bucket,
|
|
2161
|
-
Key: vttKey
|
|
2162
|
-
});
|
|
2163
|
-
presignedUrl = await (0, import_s3_request_presigner2.getSignedUrl)(s3Client, getObjectCommand, {
|
|
2164
|
-
expiresIn: 3600
|
|
2165
|
-
// 1 hour
|
|
2473
|
+
presignedUrl = await uploadVttToS3({
|
|
2474
|
+
translatedVtt,
|
|
2475
|
+
assetId,
|
|
2476
|
+
fromLanguageCode,
|
|
2477
|
+
toLanguageCode,
|
|
2478
|
+
s3Endpoint,
|
|
2479
|
+
s3Region,
|
|
2480
|
+
s3Bucket
|
|
2166
2481
|
});
|
|
2167
|
-
console.log(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
|
|
2168
2482
|
} catch (error) {
|
|
2169
2483
|
throw new Error(`Failed to upload VTT to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2170
2484
|
}
|
|
2171
|
-
console.log("\u{1F4F9} Adding translated track to Mux asset...");
|
|
2172
2485
|
let uploadedTrackId;
|
|
2173
2486
|
try {
|
|
2174
|
-
const languageName =
|
|
2487
|
+
const languageName = getLanguageName(toLanguageCode);
|
|
2175
2488
|
const trackName = `${languageName} (auto-translated)`;
|
|
2176
|
-
|
|
2177
|
-
type: "text",
|
|
2178
|
-
text_type: "subtitles",
|
|
2179
|
-
language_code: toLanguageCode,
|
|
2180
|
-
name: trackName,
|
|
2181
|
-
url: presignedUrl
|
|
2182
|
-
});
|
|
2183
|
-
uploadedTrackId = trackResponse.id;
|
|
2184
|
-
console.log(`\u2705 Track added to Mux asset with ID: ${uploadedTrackId}`);
|
|
2185
|
-
console.log(`\u{1F4CB} Track name: "${trackName}"`);
|
|
2489
|
+
uploadedTrackId = await createTextTrackOnMux(assetId, toLanguageCode, trackName, presignedUrl);
|
|
2186
2490
|
} catch (error) {
|
|
2187
|
-
console.warn(
|
|
2188
|
-
console.log("\u{1F517} You can manually add the track using this presigned URL:");
|
|
2189
|
-
console.log(presignedUrl);
|
|
2491
|
+
console.warn(`Failed to add track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2190
2492
|
}
|
|
2191
2493
|
return {
|
|
2192
2494
|
assetId,
|
|
2193
2495
|
sourceLanguageCode: fromLanguageCode,
|
|
2194
2496
|
targetLanguageCode: toLanguageCode,
|
|
2497
|
+
sourceLanguage,
|
|
2498
|
+
targetLanguage,
|
|
2195
2499
|
originalVtt: vttContent,
|
|
2196
2500
|
translatedVtt,
|
|
2197
2501
|
uploadedTrackId,
|
|
2198
|
-
presignedUrl
|
|
2502
|
+
presignedUrl,
|
|
2503
|
+
usage
|
|
2199
2504
|
};
|
|
2200
2505
|
}
|
|
2201
|
-
|
|
2202
|
-
0 && (module.exports = {
|
|
2506
|
+
export {
|
|
2203
2507
|
SUMMARY_KEYWORD_LIMIT,
|
|
2204
2508
|
burnedInCaptionsSchema,
|
|
2205
2509
|
chapterSchema,
|
|
@@ -2213,5 +2517,5 @@ ${vttContent}`
|
|
|
2213
2517
|
translateAudio,
|
|
2214
2518
|
translateCaptions,
|
|
2215
2519
|
translationSchema
|
|
2216
|
-
}
|
|
2520
|
+
};
|
|
2217
2521
|
//# sourceMappingURL=index.js.map
|