@mux/ai 0.1.6 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +294 -79
- package/dist/{index-Bnv7tv90.d.ts → index-CMZYZcj6.d.ts} +122 -3
- package/dist/index.d.ts +1 -1
- package/dist/index.js +955 -624
- package/dist/index.js.map +1 -1
- package/dist/primitives/index.js +18 -71
- package/dist/primitives/index.js.map +1 -1
- package/dist/workflows/index.d.ts +1 -1
- package/dist/workflows/index.js +953 -638
- package/dist/workflows/index.js.map +1 -1
- package/package.json +21 -23
- package/dist/index-BNnz9P_5.d.mts +0 -144
- package/dist/index-vJ5r2FNm.d.mts +0 -477
- package/dist/index.d.mts +0 -13
- package/dist/index.mjs +0 -2205
- package/dist/index.mjs.map +0 -1
- package/dist/primitives/index.d.mts +0 -3
- package/dist/primitives/index.mjs +0 -358
- package/dist/primitives/index.mjs.map +0 -1
- package/dist/types-ktXDZ93V.d.mts +0 -137
- package/dist/workflows/index.d.mts +0 -8
- package/dist/workflows/index.mjs +0 -2168
- package/dist/workflows/index.mjs.map +0 -1
package/dist/index.mjs
DELETED
|
@@ -1,2205 +0,0 @@
|
|
|
1
|
-
var __defProp = Object.defineProperty;
|
|
2
|
-
var __export = (target, all) => {
|
|
3
|
-
for (var name in all)
|
|
4
|
-
__defProp(target, name, { get: all[name], enumerable: true });
|
|
5
|
-
};
|
|
6
|
-
|
|
7
|
-
// src/primitives/index.ts
|
|
8
|
-
var primitives_exports = {};
|
|
9
|
-
__export(primitives_exports, {
|
|
10
|
-
DEFAULT_STORYBOARD_WIDTH: () => DEFAULT_STORYBOARD_WIDTH,
|
|
11
|
-
buildTranscriptUrl: () => buildTranscriptUrl,
|
|
12
|
-
chunkByTokens: () => chunkByTokens,
|
|
13
|
-
chunkText: () => chunkText,
|
|
14
|
-
chunkVTTCues: () => chunkVTTCues,
|
|
15
|
-
estimateTokenCount: () => estimateTokenCount,
|
|
16
|
-
extractTextFromVTT: () => extractTextFromVTT,
|
|
17
|
-
extractTimestampedTranscript: () => extractTimestampedTranscript,
|
|
18
|
-
fetchTranscriptForAsset: () => fetchTranscriptForAsset,
|
|
19
|
-
findCaptionTrack: () => findCaptionTrack,
|
|
20
|
-
getReadyTextTracks: () => getReadyTextTracks,
|
|
21
|
-
getStoryboardUrl: () => getStoryboardUrl,
|
|
22
|
-
getThumbnailUrls: () => getThumbnailUrls,
|
|
23
|
-
parseVTTCues: () => parseVTTCues,
|
|
24
|
-
vttTimestampToSeconds: () => vttTimestampToSeconds
|
|
25
|
-
});
|
|
26
|
-
|
|
27
|
-
// src/lib/url-signing.ts
|
|
28
|
-
import Mux from "@mux/mux-node";
|
|
29
|
-
|
|
30
|
-
// src/env.ts
|
|
31
|
-
import path from "path";
|
|
32
|
-
import { config } from "dotenv";
|
|
33
|
-
import { expand } from "dotenv-expand";
|
|
34
|
-
import { z } from "zod";
|
|
35
|
-
expand(config({
|
|
36
|
-
path: path.resolve(
|
|
37
|
-
process.cwd(),
|
|
38
|
-
process.env.NODE_ENV === "test" ? ".env.test" : ".env"
|
|
39
|
-
)
|
|
40
|
-
}));
|
|
41
|
-
function optionalString(description, message) {
|
|
42
|
-
return z.preprocess(
|
|
43
|
-
(value) => typeof value === "string" && value.trim().length === 0 ? void 0 : value,
|
|
44
|
-
z.string().trim().min(1, message).optional()
|
|
45
|
-
).describe(description);
|
|
46
|
-
}
|
|
47
|
-
function requiredString(description, message) {
|
|
48
|
-
return z.preprocess(
|
|
49
|
-
(value) => typeof value === "string" ? value.trim().length > 0 ? value.trim() : void 0 : value,
|
|
50
|
-
z.string().trim().min(1, message)
|
|
51
|
-
).describe(description);
|
|
52
|
-
}
|
|
53
|
-
var EnvSchema = z.object({
|
|
54
|
-
NODE_ENV: z.string().default("development").describe("Runtime environment."),
|
|
55
|
-
MUX_TOKEN_ID: requiredString("Mux access token ID.", "Required to access Mux APIs"),
|
|
56
|
-
MUX_TOKEN_SECRET: requiredString("Mux access token secret.", "Required to access Mux APIs"),
|
|
57
|
-
MUX_SIGNING_KEY: optionalString("Mux signing key ID for signed playback URLs.", "Used to sign playback URLs"),
|
|
58
|
-
MUX_PRIVATE_KEY: optionalString("Mux signing private key for signed playback URLs.", "Used to sign playback URLs"),
|
|
59
|
-
OPENAI_API_KEY: optionalString("OpenAI API key for OpenAI-backed workflows.", "OpenAI API key"),
|
|
60
|
-
ANTHROPIC_API_KEY: optionalString("Anthropic API key for Claude-backed workflows.", "Anthropic API key"),
|
|
61
|
-
GOOGLE_GENERATIVE_AI_API_KEY: optionalString("Google Generative AI API key for Gemini-backed workflows.", "Google Generative AI API key"),
|
|
62
|
-
ELEVENLABS_API_KEY: optionalString("ElevenLabs API key for audio translation.", "ElevenLabs API key"),
|
|
63
|
-
HIVE_API_KEY: optionalString("Hive Visual Moderation API key.", "Hive API key"),
|
|
64
|
-
S3_ENDPOINT: optionalString("S3-compatible endpoint for uploads.", "S3 endpoint"),
|
|
65
|
-
S3_REGION: optionalString("S3 region (defaults to 'auto' when omitted)."),
|
|
66
|
-
S3_BUCKET: optionalString("Bucket used for caption and audio uploads.", "S3 bucket"),
|
|
67
|
-
S3_ACCESS_KEY_ID: optionalString("Access key ID for S3-compatible uploads.", "S3 access key id"),
|
|
68
|
-
S3_SECRET_ACCESS_KEY: optionalString("Secret access key for S3-compatible uploads.", "S3 secret access key")
|
|
69
|
-
});
|
|
70
|
-
function parseEnv() {
|
|
71
|
-
const parsedEnv = EnvSchema.safeParse(process.env);
|
|
72
|
-
if (!parsedEnv.success) {
|
|
73
|
-
console.error("\u274C Invalid env:");
|
|
74
|
-
console.error(JSON.stringify(parsedEnv.error.flatten().fieldErrors, null, 2));
|
|
75
|
-
process.exit(1);
|
|
76
|
-
}
|
|
77
|
-
return parsedEnv.data;
|
|
78
|
-
}
|
|
79
|
-
var env = parseEnv();
|
|
80
|
-
var env_default = env;
|
|
81
|
-
|
|
82
|
-
// src/lib/url-signing.ts
|
|
83
|
-
function resolveSigningContext(config2) {
|
|
84
|
-
const keyId = config2.muxSigningKey ?? env_default.MUX_SIGNING_KEY;
|
|
85
|
-
const keySecret = config2.muxPrivateKey ?? env_default.MUX_PRIVATE_KEY;
|
|
86
|
-
if (!keyId || !keySecret) {
|
|
87
|
-
return void 0;
|
|
88
|
-
}
|
|
89
|
-
return { keyId, keySecret };
|
|
90
|
-
}
|
|
91
|
-
function createSigningClient(context) {
|
|
92
|
-
return new Mux({
|
|
93
|
-
// These are not needed for signing, but the SDK requires them
|
|
94
|
-
// Using empty strings as we only need the jwt functionality
|
|
95
|
-
tokenId: env_default.MUX_TOKEN_ID || "",
|
|
96
|
-
tokenSecret: env_default.MUX_TOKEN_SECRET || "",
|
|
97
|
-
jwtSigningKey: context.keyId,
|
|
98
|
-
jwtPrivateKey: context.keySecret
|
|
99
|
-
});
|
|
100
|
-
}
|
|
101
|
-
async function signPlaybackId(playbackId, context, type = "video", params) {
|
|
102
|
-
const client = createSigningClient(context);
|
|
103
|
-
const stringParams = params ? Object.fromEntries(
|
|
104
|
-
Object.entries(params).map(([key, value]) => [key, String(value)])
|
|
105
|
-
) : void 0;
|
|
106
|
-
return client.jwt.signPlaybackId(playbackId, {
|
|
107
|
-
type,
|
|
108
|
-
expiration: context.expiration || "1h",
|
|
109
|
-
params: stringParams
|
|
110
|
-
});
|
|
111
|
-
}
|
|
112
|
-
async function signUrl(url, playbackId, context, type = "video", params) {
|
|
113
|
-
const token = await signPlaybackId(playbackId, context, type, params);
|
|
114
|
-
const separator = url.includes("?") ? "&" : "?";
|
|
115
|
-
return `${url}${separator}token=${token}`;
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
// src/primitives/storyboards.ts
|
|
119
|
-
var DEFAULT_STORYBOARD_WIDTH = 640;
|
|
120
|
-
async function getStoryboardUrl(playbackId, width = DEFAULT_STORYBOARD_WIDTH, signingContext) {
|
|
121
|
-
const baseUrl = `https://image.mux.com/${playbackId}/storyboard.png`;
|
|
122
|
-
if (signingContext) {
|
|
123
|
-
return signUrl(baseUrl, playbackId, signingContext, "storyboard", { width });
|
|
124
|
-
}
|
|
125
|
-
return `${baseUrl}?width=${width}`;
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
// src/primitives/text-chunking.ts
|
|
129
|
-
function estimateTokenCount(text) {
|
|
130
|
-
const words = text.trim().split(/\s+/).length;
|
|
131
|
-
return Math.ceil(words / 0.75);
|
|
132
|
-
}
|
|
133
|
-
function chunkByTokens(text, maxTokens, overlapTokens = 0) {
|
|
134
|
-
if (!text.trim()) {
|
|
135
|
-
return [];
|
|
136
|
-
}
|
|
137
|
-
const chunks = [];
|
|
138
|
-
const words = text.trim().split(/\s+/);
|
|
139
|
-
const wordsPerChunk = Math.floor(maxTokens * 0.75);
|
|
140
|
-
const overlapWords = Math.floor(overlapTokens * 0.75);
|
|
141
|
-
let chunkIndex = 0;
|
|
142
|
-
let currentPosition = 0;
|
|
143
|
-
while (currentPosition < words.length) {
|
|
144
|
-
const chunkWords = words.slice(
|
|
145
|
-
currentPosition,
|
|
146
|
-
currentPosition + wordsPerChunk
|
|
147
|
-
);
|
|
148
|
-
const chunkText2 = chunkWords.join(" ");
|
|
149
|
-
const tokenCount = estimateTokenCount(chunkText2);
|
|
150
|
-
chunks.push({
|
|
151
|
-
id: `chunk-${chunkIndex}`,
|
|
152
|
-
text: chunkText2,
|
|
153
|
-
tokenCount
|
|
154
|
-
});
|
|
155
|
-
currentPosition += wordsPerChunk - overlapWords;
|
|
156
|
-
chunkIndex++;
|
|
157
|
-
if (currentPosition <= (chunkIndex - 1) * (wordsPerChunk - overlapWords)) {
|
|
158
|
-
break;
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
return chunks;
|
|
162
|
-
}
|
|
163
|
-
function createChunkFromCues(cues, index) {
|
|
164
|
-
const text = cues.map((c) => c.text).join(" ");
|
|
165
|
-
return {
|
|
166
|
-
id: `chunk-${index}`,
|
|
167
|
-
text,
|
|
168
|
-
tokenCount: estimateTokenCount(text),
|
|
169
|
-
startTime: cues[0].startTime,
|
|
170
|
-
endTime: cues[cues.length - 1].endTime
|
|
171
|
-
};
|
|
172
|
-
}
|
|
173
|
-
function chunkVTTCues(cues, maxTokens, overlapCues = 2) {
|
|
174
|
-
if (cues.length === 0)
|
|
175
|
-
return [];
|
|
176
|
-
const chunks = [];
|
|
177
|
-
let currentCues = [];
|
|
178
|
-
let currentTokens = 0;
|
|
179
|
-
let chunkIndex = 0;
|
|
180
|
-
for (let i = 0; i < cues.length; i++) {
|
|
181
|
-
const cue = cues[i];
|
|
182
|
-
const cueTokens = estimateTokenCount(cue.text);
|
|
183
|
-
if (currentTokens + cueTokens > maxTokens && currentCues.length > 0) {
|
|
184
|
-
chunks.push(createChunkFromCues(currentCues, chunkIndex));
|
|
185
|
-
chunkIndex++;
|
|
186
|
-
const overlapStart = Math.max(0, currentCues.length - overlapCues);
|
|
187
|
-
currentCues = currentCues.slice(overlapStart);
|
|
188
|
-
currentTokens = currentCues.reduce(
|
|
189
|
-
(sum, c) => sum + estimateTokenCount(c.text),
|
|
190
|
-
0
|
|
191
|
-
);
|
|
192
|
-
}
|
|
193
|
-
currentCues.push(cue);
|
|
194
|
-
currentTokens += cueTokens;
|
|
195
|
-
}
|
|
196
|
-
if (currentCues.length > 0) {
|
|
197
|
-
chunks.push(createChunkFromCues(currentCues, chunkIndex));
|
|
198
|
-
}
|
|
199
|
-
return chunks;
|
|
200
|
-
}
|
|
201
|
-
function chunkText(text, strategy) {
|
|
202
|
-
switch (strategy.type) {
|
|
203
|
-
case "token": {
|
|
204
|
-
return chunkByTokens(text, strategy.maxTokens, strategy.overlap ?? 0);
|
|
205
|
-
}
|
|
206
|
-
default: {
|
|
207
|
-
const exhaustiveCheck = strategy;
|
|
208
|
-
throw new Error(`Unsupported chunking strategy: ${exhaustiveCheck}`);
|
|
209
|
-
}
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
// src/primitives/thumbnails.ts
|
|
214
|
-
async function getThumbnailUrls(playbackId, duration, options = {}) {
|
|
215
|
-
const { interval = 10, width = 640, signingContext } = options;
|
|
216
|
-
const timestamps = [];
|
|
217
|
-
if (duration <= 50) {
|
|
218
|
-
const spacing = duration / 6;
|
|
219
|
-
for (let i = 1; i <= 5; i++) {
|
|
220
|
-
timestamps.push(Math.round(i * spacing));
|
|
221
|
-
}
|
|
222
|
-
} else {
|
|
223
|
-
for (let time = 0; time < duration; time += interval) {
|
|
224
|
-
timestamps.push(time);
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
const baseUrl = `https://image.mux.com/${playbackId}/thumbnail.png`;
|
|
228
|
-
const urlPromises = timestamps.map(async (time) => {
|
|
229
|
-
if (signingContext) {
|
|
230
|
-
return signUrl(baseUrl, playbackId, signingContext, "thumbnail", { time, width });
|
|
231
|
-
}
|
|
232
|
-
return `${baseUrl}?time=${time}&width=${width}`;
|
|
233
|
-
});
|
|
234
|
-
return Promise.all(urlPromises);
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
// src/primitives/transcripts.ts
|
|
238
|
-
function getReadyTextTracks(asset) {
|
|
239
|
-
return (asset.tracks || []).filter(
|
|
240
|
-
(track) => track.type === "text" && track.status === "ready"
|
|
241
|
-
);
|
|
242
|
-
}
|
|
243
|
-
function findCaptionTrack(asset, languageCode) {
|
|
244
|
-
const tracks = getReadyTextTracks(asset);
|
|
245
|
-
if (!tracks.length)
|
|
246
|
-
return void 0;
|
|
247
|
-
if (!languageCode) {
|
|
248
|
-
return tracks[0];
|
|
249
|
-
}
|
|
250
|
-
return tracks.find(
|
|
251
|
-
(track) => track.text_type === "subtitles" && track.language_code === languageCode
|
|
252
|
-
);
|
|
253
|
-
}
|
|
254
|
-
function extractTextFromVTT(vttContent) {
|
|
255
|
-
if (!vttContent.trim()) {
|
|
256
|
-
return "";
|
|
257
|
-
}
|
|
258
|
-
const lines = vttContent.split("\n");
|
|
259
|
-
const textLines = [];
|
|
260
|
-
for (let i = 0; i < lines.length; i++) {
|
|
261
|
-
const line = lines[i].trim();
|
|
262
|
-
if (!line)
|
|
263
|
-
continue;
|
|
264
|
-
if (line === "WEBVTT")
|
|
265
|
-
continue;
|
|
266
|
-
if (line.startsWith("NOTE "))
|
|
267
|
-
continue;
|
|
268
|
-
if (line.includes("-->"))
|
|
269
|
-
continue;
|
|
270
|
-
if (/^[\w-]+$/.test(line) && !line.includes(" "))
|
|
271
|
-
continue;
|
|
272
|
-
if (line.startsWith("STYLE") || line.startsWith("REGION"))
|
|
273
|
-
continue;
|
|
274
|
-
const cleanLine = line.replace(/<[^>]*>/g, "").trim();
|
|
275
|
-
if (cleanLine) {
|
|
276
|
-
textLines.push(cleanLine);
|
|
277
|
-
}
|
|
278
|
-
}
|
|
279
|
-
return textLines.join(" ").replace(/\s+/g, " ").trim();
|
|
280
|
-
}
|
|
281
|
-
function vttTimestampToSeconds(timestamp) {
|
|
282
|
-
const parts = timestamp.split(":");
|
|
283
|
-
if (parts.length !== 3)
|
|
284
|
-
return 0;
|
|
285
|
-
const hours = Number.parseInt(parts[0], 10) || 0;
|
|
286
|
-
const minutes = Number.parseInt(parts[1], 10) || 0;
|
|
287
|
-
const seconds = Number.parseFloat(parts[2]) || 0;
|
|
288
|
-
return hours * 3600 + minutes * 60 + seconds;
|
|
289
|
-
}
|
|
290
|
-
function extractTimestampedTranscript(vttContent) {
|
|
291
|
-
if (!vttContent.trim()) {
|
|
292
|
-
return "";
|
|
293
|
-
}
|
|
294
|
-
const lines = vttContent.split("\n");
|
|
295
|
-
const segments = [];
|
|
296
|
-
for (let i = 0; i < lines.length; i++) {
|
|
297
|
-
const line = lines[i].trim();
|
|
298
|
-
if (line.includes("-->")) {
|
|
299
|
-
const startTime = line.split(" --> ")[0].trim();
|
|
300
|
-
const timeInSeconds = vttTimestampToSeconds(startTime);
|
|
301
|
-
let j = i + 1;
|
|
302
|
-
while (j < lines.length && !lines[j].trim()) {
|
|
303
|
-
j++;
|
|
304
|
-
}
|
|
305
|
-
if (j < lines.length) {
|
|
306
|
-
const text = lines[j].trim().replace(/<[^>]*>/g, "");
|
|
307
|
-
if (text) {
|
|
308
|
-
segments.push({ time: timeInSeconds, text });
|
|
309
|
-
}
|
|
310
|
-
}
|
|
311
|
-
}
|
|
312
|
-
}
|
|
313
|
-
return segments.map((segment) => `[${Math.floor(segment.time)}s] ${segment.text}`).join("\n");
|
|
314
|
-
}
|
|
315
|
-
function parseVTTCues(vttContent) {
|
|
316
|
-
if (!vttContent.trim())
|
|
317
|
-
return [];
|
|
318
|
-
const lines = vttContent.split("\n");
|
|
319
|
-
const cues = [];
|
|
320
|
-
for (let i = 0; i < lines.length; i++) {
|
|
321
|
-
const line = lines[i].trim();
|
|
322
|
-
if (line.includes("-->")) {
|
|
323
|
-
const [startStr, endStr] = line.split(" --> ").map((s) => s.trim());
|
|
324
|
-
const startTime = vttTimestampToSeconds(startStr);
|
|
325
|
-
const endTime = vttTimestampToSeconds(endStr.split(" ")[0]);
|
|
326
|
-
const textLines = [];
|
|
327
|
-
let j = i + 1;
|
|
328
|
-
while (j < lines.length && lines[j].trim() && !lines[j].includes("-->")) {
|
|
329
|
-
const cleanLine = lines[j].trim().replace(/<[^>]*>/g, "");
|
|
330
|
-
if (cleanLine)
|
|
331
|
-
textLines.push(cleanLine);
|
|
332
|
-
j++;
|
|
333
|
-
}
|
|
334
|
-
if (textLines.length > 0) {
|
|
335
|
-
cues.push({
|
|
336
|
-
startTime,
|
|
337
|
-
endTime,
|
|
338
|
-
text: textLines.join(" ")
|
|
339
|
-
});
|
|
340
|
-
}
|
|
341
|
-
}
|
|
342
|
-
}
|
|
343
|
-
return cues;
|
|
344
|
-
}
|
|
345
|
-
async function buildTranscriptUrl(playbackId, trackId, signingContext) {
|
|
346
|
-
const baseUrl = `https://stream.mux.com/${playbackId}/text/${trackId}.vtt`;
|
|
347
|
-
if (signingContext) {
|
|
348
|
-
return signUrl(baseUrl, playbackId, signingContext, "video");
|
|
349
|
-
}
|
|
350
|
-
return baseUrl;
|
|
351
|
-
}
|
|
352
|
-
async function fetchTranscriptForAsset(asset, playbackId, options = {}) {
|
|
353
|
-
const { languageCode, cleanTranscript = true, signingContext } = options;
|
|
354
|
-
const track = findCaptionTrack(asset, languageCode);
|
|
355
|
-
if (!track) {
|
|
356
|
-
return { transcriptText: "" };
|
|
357
|
-
}
|
|
358
|
-
if (!track.id) {
|
|
359
|
-
return { transcriptText: "", track };
|
|
360
|
-
}
|
|
361
|
-
const transcriptUrl = await buildTranscriptUrl(playbackId, track.id, signingContext);
|
|
362
|
-
try {
|
|
363
|
-
const response = await fetch(transcriptUrl);
|
|
364
|
-
if (!response.ok) {
|
|
365
|
-
return { transcriptText: "", transcriptUrl, track };
|
|
366
|
-
}
|
|
367
|
-
const rawVtt = await response.text();
|
|
368
|
-
const transcriptText = cleanTranscript ? extractTextFromVTT(rawVtt) : rawVtt;
|
|
369
|
-
return { transcriptText, transcriptUrl, track };
|
|
370
|
-
} catch (error) {
|
|
371
|
-
console.warn("Failed to fetch transcript:", error);
|
|
372
|
-
return { transcriptText: "", transcriptUrl, track };
|
|
373
|
-
}
|
|
374
|
-
}
|
|
375
|
-
|
|
376
|
-
// src/workflows/index.ts
|
|
377
|
-
var workflows_exports = {};
|
|
378
|
-
__export(workflows_exports, {
|
|
379
|
-
SUMMARY_KEYWORD_LIMIT: () => SUMMARY_KEYWORD_LIMIT,
|
|
380
|
-
burnedInCaptionsSchema: () => burnedInCaptionsSchema,
|
|
381
|
-
chapterSchema: () => chapterSchema,
|
|
382
|
-
chaptersSchema: () => chaptersSchema,
|
|
383
|
-
generateChapters: () => generateChapters,
|
|
384
|
-
generateVideoEmbeddings: () => generateVideoEmbeddings,
|
|
385
|
-
getModerationScores: () => getModerationScores,
|
|
386
|
-
getSummaryAndTags: () => getSummaryAndTags,
|
|
387
|
-
hasBurnedInCaptions: () => hasBurnedInCaptions,
|
|
388
|
-
summarySchema: () => summarySchema,
|
|
389
|
-
translateAudio: () => translateAudio,
|
|
390
|
-
translateCaptions: () => translateCaptions,
|
|
391
|
-
translationSchema: () => translationSchema
|
|
392
|
-
});
|
|
393
|
-
|
|
394
|
-
// src/workflows/burned-in-captions.ts
|
|
395
|
-
import { generateObject } from "ai";
|
|
396
|
-
|
|
397
|
-
// node_modules/dedent/dist/dedent.mjs
|
|
398
|
-
function ownKeys(object, enumerableOnly) {
|
|
399
|
-
var keys = Object.keys(object);
|
|
400
|
-
if (Object.getOwnPropertySymbols) {
|
|
401
|
-
var symbols = Object.getOwnPropertySymbols(object);
|
|
402
|
-
enumerableOnly && (symbols = symbols.filter(function(sym) {
|
|
403
|
-
return Object.getOwnPropertyDescriptor(object, sym).enumerable;
|
|
404
|
-
})), keys.push.apply(keys, symbols);
|
|
405
|
-
}
|
|
406
|
-
return keys;
|
|
407
|
-
}
|
|
408
|
-
function _objectSpread(target) {
|
|
409
|
-
for (var i = 1; i < arguments.length; i++) {
|
|
410
|
-
var source = null != arguments[i] ? arguments[i] : {};
|
|
411
|
-
i % 2 ? ownKeys(Object(source), true).forEach(function(key) {
|
|
412
|
-
_defineProperty(target, key, source[key]);
|
|
413
|
-
}) : Object.getOwnPropertyDescriptors ? Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)) : ownKeys(Object(source)).forEach(function(key) {
|
|
414
|
-
Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key));
|
|
415
|
-
});
|
|
416
|
-
}
|
|
417
|
-
return target;
|
|
418
|
-
}
|
|
419
|
-
function _defineProperty(obj, key, value) {
|
|
420
|
-
key = _toPropertyKey(key);
|
|
421
|
-
if (key in obj) {
|
|
422
|
-
Object.defineProperty(obj, key, { value, enumerable: true, configurable: true, writable: true });
|
|
423
|
-
} else {
|
|
424
|
-
obj[key] = value;
|
|
425
|
-
}
|
|
426
|
-
return obj;
|
|
427
|
-
}
|
|
428
|
-
function _toPropertyKey(arg) {
|
|
429
|
-
var key = _toPrimitive(arg, "string");
|
|
430
|
-
return typeof key === "symbol" ? key : String(key);
|
|
431
|
-
}
|
|
432
|
-
function _toPrimitive(input, hint) {
|
|
433
|
-
if (typeof input !== "object" || input === null) return input;
|
|
434
|
-
var prim = input[Symbol.toPrimitive];
|
|
435
|
-
if (prim !== void 0) {
|
|
436
|
-
var res = prim.call(input, hint || "default");
|
|
437
|
-
if (typeof res !== "object") return res;
|
|
438
|
-
throw new TypeError("@@toPrimitive must return a primitive value.");
|
|
439
|
-
}
|
|
440
|
-
return (hint === "string" ? String : Number)(input);
|
|
441
|
-
}
|
|
442
|
-
var dedent = createDedent({});
|
|
443
|
-
var dedent_default = dedent;
|
|
444
|
-
function createDedent(options) {
|
|
445
|
-
dedent2.withOptions = (newOptions) => createDedent(_objectSpread(_objectSpread({}, options), newOptions));
|
|
446
|
-
return dedent2;
|
|
447
|
-
function dedent2(strings, ...values) {
|
|
448
|
-
const raw = typeof strings === "string" ? [strings] : strings.raw;
|
|
449
|
-
const {
|
|
450
|
-
alignValues = false,
|
|
451
|
-
escapeSpecialCharacters = Array.isArray(strings),
|
|
452
|
-
trimWhitespace = true
|
|
453
|
-
} = options;
|
|
454
|
-
let result = "";
|
|
455
|
-
for (let i = 0; i < raw.length; i++) {
|
|
456
|
-
let next = raw[i];
|
|
457
|
-
if (escapeSpecialCharacters) {
|
|
458
|
-
next = next.replace(/\\\n[ \t]*/g, "").replace(/\\`/g, "`").replace(/\\\$/g, "$").replace(/\\\{/g, "{");
|
|
459
|
-
}
|
|
460
|
-
result += next;
|
|
461
|
-
if (i < values.length) {
|
|
462
|
-
const value = alignValues ? alignValue(values[i], result) : values[i];
|
|
463
|
-
result += value;
|
|
464
|
-
}
|
|
465
|
-
}
|
|
466
|
-
const lines = result.split("\n");
|
|
467
|
-
let mindent = null;
|
|
468
|
-
for (const l of lines) {
|
|
469
|
-
const m = l.match(/^(\s+)\S+/);
|
|
470
|
-
if (m) {
|
|
471
|
-
const indent = m[1].length;
|
|
472
|
-
if (!mindent) {
|
|
473
|
-
mindent = indent;
|
|
474
|
-
} else {
|
|
475
|
-
mindent = Math.min(mindent, indent);
|
|
476
|
-
}
|
|
477
|
-
}
|
|
478
|
-
}
|
|
479
|
-
if (mindent !== null) {
|
|
480
|
-
const m = mindent;
|
|
481
|
-
result = lines.map((l) => l[0] === " " || l[0] === " " ? l.slice(m) : l).join("\n");
|
|
482
|
-
}
|
|
483
|
-
if (trimWhitespace) {
|
|
484
|
-
result = result.trim();
|
|
485
|
-
}
|
|
486
|
-
if (escapeSpecialCharacters) {
|
|
487
|
-
result = result.replace(/\\n/g, "\n");
|
|
488
|
-
}
|
|
489
|
-
return result;
|
|
490
|
-
}
|
|
491
|
-
}
|
|
492
|
-
function alignValue(value, precedingText) {
|
|
493
|
-
if (typeof value !== "string" || !value.includes("\n")) {
|
|
494
|
-
return value;
|
|
495
|
-
}
|
|
496
|
-
const currentLine = precedingText.slice(precedingText.lastIndexOf("\n") + 1);
|
|
497
|
-
const indentMatch = currentLine.match(/^(\s+)/);
|
|
498
|
-
if (indentMatch) {
|
|
499
|
-
const indent = indentMatch[1];
|
|
500
|
-
return value.replace(/\n/g, `
|
|
501
|
-
${indent}`);
|
|
502
|
-
}
|
|
503
|
-
return value;
|
|
504
|
-
}
|
|
505
|
-
|
|
506
|
-
// src/workflows/burned-in-captions.ts
|
|
507
|
-
import { z as z2 } from "zod";
|
|
508
|
-
|
|
509
|
-
// src/lib/client-factory.ts
|
|
510
|
-
import Mux2 from "@mux/mux-node";
|
|
511
|
-
|
|
512
|
-
// src/lib/providers.ts
|
|
513
|
-
import { createAnthropic } from "@ai-sdk/anthropic";
|
|
514
|
-
import { createGoogleGenerativeAI } from "@ai-sdk/google";
|
|
515
|
-
import { createOpenAI } from "@ai-sdk/openai";
|
|
516
|
-
var DEFAULT_LANGUAGE_MODELS = {
|
|
517
|
-
openai: "gpt-5-mini",
|
|
518
|
-
anthropic: "claude-haiku-4-5",
|
|
519
|
-
google: "gemini-2.5-flash"
|
|
520
|
-
};
|
|
521
|
-
var DEFAULT_EMBEDDING_MODELS = {
|
|
522
|
-
openai: "text-embedding-3-small",
|
|
523
|
-
google: "gemini-embedding-001"
|
|
524
|
-
};
|
|
525
|
-
function requireEnv(value, name) {
|
|
526
|
-
if (!value) {
|
|
527
|
-
throw new Error(`Missing ${name}. Set ${name} in your environment or pass it in options.`);
|
|
528
|
-
}
|
|
529
|
-
return value;
|
|
530
|
-
}
|
|
531
|
-
function resolveLanguageModel(options = {}) {
|
|
532
|
-
const provider = options.provider || "openai";
|
|
533
|
-
const modelId = options.model || DEFAULT_LANGUAGE_MODELS[provider];
|
|
534
|
-
switch (provider) {
|
|
535
|
-
case "openai": {
|
|
536
|
-
const apiKey = options.openaiApiKey ?? env_default.OPENAI_API_KEY;
|
|
537
|
-
requireEnv(apiKey, "OPENAI_API_KEY");
|
|
538
|
-
const openai = createOpenAI({
|
|
539
|
-
apiKey
|
|
540
|
-
});
|
|
541
|
-
return {
|
|
542
|
-
provider,
|
|
543
|
-
modelId,
|
|
544
|
-
model: openai(modelId)
|
|
545
|
-
};
|
|
546
|
-
}
|
|
547
|
-
case "anthropic": {
|
|
548
|
-
const apiKey = options.anthropicApiKey ?? env_default.ANTHROPIC_API_KEY;
|
|
549
|
-
requireEnv(apiKey, "ANTHROPIC_API_KEY");
|
|
550
|
-
const anthropic = createAnthropic({
|
|
551
|
-
apiKey
|
|
552
|
-
});
|
|
553
|
-
return {
|
|
554
|
-
provider,
|
|
555
|
-
modelId,
|
|
556
|
-
model: anthropic(modelId)
|
|
557
|
-
};
|
|
558
|
-
}
|
|
559
|
-
case "google": {
|
|
560
|
-
const apiKey = options.googleApiKey ?? env_default.GOOGLE_GENERATIVE_AI_API_KEY;
|
|
561
|
-
requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
|
|
562
|
-
const google = createGoogleGenerativeAI({
|
|
563
|
-
apiKey
|
|
564
|
-
});
|
|
565
|
-
return {
|
|
566
|
-
provider,
|
|
567
|
-
modelId,
|
|
568
|
-
model: google(modelId)
|
|
569
|
-
};
|
|
570
|
-
}
|
|
571
|
-
default: {
|
|
572
|
-
const exhaustiveCheck = provider;
|
|
573
|
-
throw new Error(`Unsupported provider: ${exhaustiveCheck}`);
|
|
574
|
-
}
|
|
575
|
-
}
|
|
576
|
-
}
|
|
577
|
-
function resolveEmbeddingModel(options = {}) {
|
|
578
|
-
const provider = options.provider || "openai";
|
|
579
|
-
const modelId = options.model || DEFAULT_EMBEDDING_MODELS[provider];
|
|
580
|
-
switch (provider) {
|
|
581
|
-
case "openai": {
|
|
582
|
-
const apiKey = options.openaiApiKey ?? env_default.OPENAI_API_KEY;
|
|
583
|
-
requireEnv(apiKey, "OPENAI_API_KEY");
|
|
584
|
-
const openai = createOpenAI({
|
|
585
|
-
apiKey
|
|
586
|
-
});
|
|
587
|
-
return {
|
|
588
|
-
provider,
|
|
589
|
-
modelId,
|
|
590
|
-
model: openai.embedding(modelId)
|
|
591
|
-
};
|
|
592
|
-
}
|
|
593
|
-
case "google": {
|
|
594
|
-
const apiKey = options.googleApiKey ?? env_default.GOOGLE_GENERATIVE_AI_API_KEY;
|
|
595
|
-
requireEnv(apiKey, "GOOGLE_GENERATIVE_AI_API_KEY");
|
|
596
|
-
const google = createGoogleGenerativeAI({
|
|
597
|
-
apiKey
|
|
598
|
-
});
|
|
599
|
-
return {
|
|
600
|
-
provider,
|
|
601
|
-
modelId,
|
|
602
|
-
model: google.textEmbeddingModel(modelId)
|
|
603
|
-
};
|
|
604
|
-
}
|
|
605
|
-
default: {
|
|
606
|
-
const exhaustiveCheck = provider;
|
|
607
|
-
throw new Error(`Unsupported embedding provider: ${exhaustiveCheck}`);
|
|
608
|
-
}
|
|
609
|
-
}
|
|
610
|
-
}
|
|
611
|
-
|
|
612
|
-
// src/lib/client-factory.ts
|
|
613
|
-
function validateCredentials(options, requiredProvider) {
|
|
614
|
-
const muxTokenId = options.muxTokenId ?? env_default.MUX_TOKEN_ID;
|
|
615
|
-
const muxTokenSecret = options.muxTokenSecret ?? env_default.MUX_TOKEN_SECRET;
|
|
616
|
-
const openaiApiKey = options.openaiApiKey ?? env_default.OPENAI_API_KEY;
|
|
617
|
-
const anthropicApiKey = options.anthropicApiKey ?? env_default.ANTHROPIC_API_KEY;
|
|
618
|
-
const googleApiKey = options.googleApiKey ?? env_default.GOOGLE_GENERATIVE_AI_API_KEY;
|
|
619
|
-
if (!muxTokenId || !muxTokenSecret) {
|
|
620
|
-
throw new Error(
|
|
621
|
-
"Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables."
|
|
622
|
-
);
|
|
623
|
-
}
|
|
624
|
-
if (requiredProvider === "openai" && !openaiApiKey) {
|
|
625
|
-
throw new Error(
|
|
626
|
-
"OpenAI API key is required. Provide openaiApiKey in options or set OPENAI_API_KEY environment variable."
|
|
627
|
-
);
|
|
628
|
-
}
|
|
629
|
-
if (requiredProvider === "anthropic" && !anthropicApiKey) {
|
|
630
|
-
throw new Error(
|
|
631
|
-
"Anthropic API key is required. Provide anthropicApiKey in options or set ANTHROPIC_API_KEY environment variable."
|
|
632
|
-
);
|
|
633
|
-
}
|
|
634
|
-
if (requiredProvider === "google" && !googleApiKey) {
|
|
635
|
-
throw new Error(
|
|
636
|
-
"Google Generative AI API key is required. Provide googleApiKey in options or set GOOGLE_GENERATIVE_AI_API_KEY environment variable."
|
|
637
|
-
);
|
|
638
|
-
}
|
|
639
|
-
return {
|
|
640
|
-
muxTokenId,
|
|
641
|
-
muxTokenSecret,
|
|
642
|
-
openaiApiKey,
|
|
643
|
-
anthropicApiKey,
|
|
644
|
-
googleApiKey
|
|
645
|
-
};
|
|
646
|
-
}
|
|
647
|
-
function createMuxClient(credentials) {
|
|
648
|
-
if (!credentials.muxTokenId || !credentials.muxTokenSecret) {
|
|
649
|
-
throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
|
|
650
|
-
}
|
|
651
|
-
return new Mux2({
|
|
652
|
-
tokenId: credentials.muxTokenId,
|
|
653
|
-
tokenSecret: credentials.muxTokenSecret
|
|
654
|
-
});
|
|
655
|
-
}
|
|
656
|
-
function createWorkflowClients(options, provider) {
|
|
657
|
-
const providerToUse = provider || options.provider || "openai";
|
|
658
|
-
const credentials = validateCredentials(options, providerToUse);
|
|
659
|
-
const languageModel = resolveLanguageModel({
|
|
660
|
-
...options,
|
|
661
|
-
provider: providerToUse
|
|
662
|
-
});
|
|
663
|
-
return {
|
|
664
|
-
mux: createMuxClient(credentials),
|
|
665
|
-
languageModel,
|
|
666
|
-
credentials
|
|
667
|
-
};
|
|
668
|
-
}
|
|
669
|
-
|
|
670
|
-
// src/lib/image-download.ts
|
|
671
|
-
import { Buffer as Buffer2 } from "buffer";
|
|
672
|
-
import pRetry, { AbortError } from "p-retry";
|
|
673
|
-
var DEFAULT_OPTIONS = {
|
|
674
|
-
timeout: 1e4,
|
|
675
|
-
retries: 3,
|
|
676
|
-
retryDelay: 1e3,
|
|
677
|
-
maxRetryDelay: 1e4,
|
|
678
|
-
exponentialBackoff: true
|
|
679
|
-
};
|
|
680
|
-
async function downloadImageAsBase64(url, options = {}) {
|
|
681
|
-
const opts = { ...DEFAULT_OPTIONS, ...options };
|
|
682
|
-
let attemptCount = 0;
|
|
683
|
-
return pRetry(
|
|
684
|
-
async () => {
|
|
685
|
-
attemptCount++;
|
|
686
|
-
const controller = new AbortController();
|
|
687
|
-
const timeoutId = setTimeout(() => controller.abort(), opts.timeout);
|
|
688
|
-
try {
|
|
689
|
-
const response = await fetch(url, {
|
|
690
|
-
signal: controller.signal,
|
|
691
|
-
headers: {
|
|
692
|
-
"User-Agent": "@mux/ai image downloader"
|
|
693
|
-
}
|
|
694
|
-
});
|
|
695
|
-
clearTimeout(timeoutId);
|
|
696
|
-
if (!response.ok) {
|
|
697
|
-
if (response.status >= 400 && response.status < 500 && response.status !== 429) {
|
|
698
|
-
throw new AbortError(`HTTP ${response.status}: ${response.statusText}`);
|
|
699
|
-
}
|
|
700
|
-
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
701
|
-
}
|
|
702
|
-
const contentType = response.headers.get("content-type");
|
|
703
|
-
if (!contentType?.startsWith("image/")) {
|
|
704
|
-
throw new AbortError(`Invalid content type: ${contentType}. Expected image/*`);
|
|
705
|
-
}
|
|
706
|
-
const arrayBuffer = await response.arrayBuffer();
|
|
707
|
-
const buffer = Buffer2.from(arrayBuffer);
|
|
708
|
-
if (buffer.length === 0) {
|
|
709
|
-
throw new AbortError("Downloaded image is empty");
|
|
710
|
-
}
|
|
711
|
-
const base64Data = `data:${contentType};base64,${buffer.toString("base64")}`;
|
|
712
|
-
return {
|
|
713
|
-
base64Data,
|
|
714
|
-
buffer,
|
|
715
|
-
url,
|
|
716
|
-
contentType,
|
|
717
|
-
sizeBytes: buffer.length,
|
|
718
|
-
attempts: attemptCount
|
|
719
|
-
};
|
|
720
|
-
} catch (error) {
|
|
721
|
-
clearTimeout(timeoutId);
|
|
722
|
-
if (error instanceof AbortError) {
|
|
723
|
-
throw error;
|
|
724
|
-
}
|
|
725
|
-
if (error instanceof Error) {
|
|
726
|
-
if (error.name === "AbortError") {
|
|
727
|
-
throw new Error(`Request timeout after ${opts.timeout}ms`);
|
|
728
|
-
}
|
|
729
|
-
throw new Error(`Download failed: ${error.message}`);
|
|
730
|
-
}
|
|
731
|
-
throw new Error("Unknown download error");
|
|
732
|
-
}
|
|
733
|
-
},
|
|
734
|
-
{
|
|
735
|
-
retries: opts.retries,
|
|
736
|
-
minTimeout: opts.retryDelay,
|
|
737
|
-
maxTimeout: opts.maxRetryDelay,
|
|
738
|
-
factor: opts.exponentialBackoff ? 2 : 1,
|
|
739
|
-
randomize: true,
|
|
740
|
-
// Add jitter to prevent thundering herd
|
|
741
|
-
onFailedAttempt: (error) => {
|
|
742
|
-
console.warn(`Image download attempt ${error.attemptNumber} failed for ${url}`);
|
|
743
|
-
if (error.retriesLeft > 0) {
|
|
744
|
-
console.warn(`Retrying... (${error.retriesLeft} attempts left)`);
|
|
745
|
-
}
|
|
746
|
-
}
|
|
747
|
-
}
|
|
748
|
-
);
|
|
749
|
-
}
|
|
750
|
-
async function downloadImagesAsBase64(urls, options = {}, maxConcurrent = 5) {
|
|
751
|
-
const results = [];
|
|
752
|
-
for (let i = 0; i < urls.length; i += maxConcurrent) {
|
|
753
|
-
const batch = urls.slice(i, i + maxConcurrent);
|
|
754
|
-
const batchPromises = batch.map((url) => downloadImageAsBase64(url, options));
|
|
755
|
-
const batchResults = await Promise.all(batchPromises);
|
|
756
|
-
results.push(...batchResults);
|
|
757
|
-
}
|
|
758
|
-
return results;
|
|
759
|
-
}
|
|
760
|
-
|
|
761
|
-
// src/lib/mux-assets.ts
|
|
762
|
-
function getPlaybackId(asset) {
|
|
763
|
-
const playbackIds = asset.playback_ids || [];
|
|
764
|
-
const publicPlaybackId = playbackIds.find((pid) => pid.policy === "public");
|
|
765
|
-
if (publicPlaybackId?.id) {
|
|
766
|
-
return { id: publicPlaybackId.id, policy: "public" };
|
|
767
|
-
}
|
|
768
|
-
const signedPlaybackId = playbackIds.find((pid) => pid.policy === "signed");
|
|
769
|
-
if (signedPlaybackId?.id) {
|
|
770
|
-
return { id: signedPlaybackId.id, policy: "signed" };
|
|
771
|
-
}
|
|
772
|
-
throw new Error(
|
|
773
|
-
"No public or signed playback ID found for this asset. A public or signed playback ID is required. DRM playback IDs are not currently supported."
|
|
774
|
-
);
|
|
775
|
-
}
|
|
776
|
-
async function getPlaybackIdForAsset(mux, assetId) {
|
|
777
|
-
const asset = await mux.video.assets.retrieve(assetId);
|
|
778
|
-
const { id: playbackId, policy } = getPlaybackId(asset);
|
|
779
|
-
return { asset, playbackId, policy };
|
|
780
|
-
}
|
|
781
|
-
|
|
782
|
-
// src/lib/prompt-builder.ts
|
|
783
|
-
function renderSection(section) {
|
|
784
|
-
const { tag, content, attributes } = section;
|
|
785
|
-
const XML_NAME_PATTERN = /^[A-Z_][\w.:-]*$/i;
|
|
786
|
-
const assertValidXmlName = (name, context) => {
|
|
787
|
-
if (!XML_NAME_PATTERN.test(name)) {
|
|
788
|
-
throw new Error(`Invalid XML ${context} name: "${name}"`);
|
|
789
|
-
}
|
|
790
|
-
};
|
|
791
|
-
const escapeXmlText = (value) => value.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">/");
|
|
792
|
-
const escapeXmlAttribute = (value) => escapeXmlText(value).replace(/"/g, """);
|
|
793
|
-
if (!content.trim()) {
|
|
794
|
-
return "";
|
|
795
|
-
}
|
|
796
|
-
assertValidXmlName(tag, "tag");
|
|
797
|
-
const attrString = attributes ? ` ${Object.entries(attributes).map(([key, value]) => {
|
|
798
|
-
assertValidXmlName(key, "attribute");
|
|
799
|
-
return `${key}="${escapeXmlAttribute(value)}"`;
|
|
800
|
-
}).join(" ")}` : "";
|
|
801
|
-
const safeContent = escapeXmlText(content.trim());
|
|
802
|
-
return `<${tag}${attrString}>
|
|
803
|
-
${safeContent}
|
|
804
|
-
</${tag}>`;
|
|
805
|
-
}
|
|
806
|
-
function resolveSection(defaultSection, override) {
|
|
807
|
-
if (override === void 0) {
|
|
808
|
-
return defaultSection;
|
|
809
|
-
}
|
|
810
|
-
if (typeof override === "string") {
|
|
811
|
-
return { ...defaultSection, content: override };
|
|
812
|
-
}
|
|
813
|
-
return override;
|
|
814
|
-
}
|
|
815
|
-
function createPromptBuilder(config2) {
|
|
816
|
-
const { template, sectionOrder } = config2;
|
|
817
|
-
const getSection = (section, override) => {
|
|
818
|
-
const resolved = resolveSection(template[section], override);
|
|
819
|
-
return renderSection(resolved);
|
|
820
|
-
};
|
|
821
|
-
const build = (overrides) => {
|
|
822
|
-
const sections = sectionOrder.map((sectionKey) => getSection(sectionKey, overrides?.[sectionKey])).filter(Boolean);
|
|
823
|
-
return sections.join("\n\n");
|
|
824
|
-
};
|
|
825
|
-
const buildWithContext = (overrides, additionalSections) => {
|
|
826
|
-
const basePrompt = build(overrides);
|
|
827
|
-
if (!additionalSections?.length) {
|
|
828
|
-
return basePrompt;
|
|
829
|
-
}
|
|
830
|
-
const additional = additionalSections.map(renderSection).filter(Boolean).join("\n\n");
|
|
831
|
-
return additional ? `${basePrompt}
|
|
832
|
-
|
|
833
|
-
${additional}` : basePrompt;
|
|
834
|
-
};
|
|
835
|
-
return {
|
|
836
|
-
template,
|
|
837
|
-
build,
|
|
838
|
-
buildWithContext,
|
|
839
|
-
getSection
|
|
840
|
-
};
|
|
841
|
-
}
|
|
842
|
-
function createTranscriptSection(transcriptText, format = "plain text") {
|
|
843
|
-
return {
|
|
844
|
-
tag: "transcript",
|
|
845
|
-
content: transcriptText,
|
|
846
|
-
attributes: { format }
|
|
847
|
-
};
|
|
848
|
-
}
|
|
849
|
-
function createToneSection(instruction) {
|
|
850
|
-
return {
|
|
851
|
-
tag: "tone",
|
|
852
|
-
content: instruction
|
|
853
|
-
};
|
|
854
|
-
}
|
|
855
|
-
|
|
856
|
-
// src/workflows/burned-in-captions.ts
|
|
857
|
-
var burnedInCaptionsSchema = z2.object({
|
|
858
|
-
hasBurnedInCaptions: z2.boolean(),
|
|
859
|
-
confidence: z2.number().min(0).max(1),
|
|
860
|
-
detectedLanguage: z2.string().nullable()
|
|
861
|
-
});
|
|
862
|
-
var SYSTEM_PROMPT = dedent_default`
|
|
863
|
-
<role>
|
|
864
|
-
You are an expert at analyzing video frames to detect burned-in captions (also called open captions or hardcoded subtitles).
|
|
865
|
-
These are text overlays that are permanently embedded in the video image, common on TikTok, Instagram Reels, and other social media platforms.
|
|
866
|
-
</role>
|
|
867
|
-
|
|
868
|
-
<critical_note>
|
|
869
|
-
Burned-in captions must appear consistently across MOST frames in the storyboard.
|
|
870
|
-
Text appearing in only 1-2 frames at the end is typically marketing copy, taglines, or end-cards - NOT burned-in captions.
|
|
871
|
-
</critical_note>
|
|
872
|
-
|
|
873
|
-
<confidence_scoring>
|
|
874
|
-
Use this rubric to determine your confidence score (0.0-1.0):
|
|
875
|
-
|
|
876
|
-
- Score 1.0: Definitive captions - text overlays visible in most frames, consistent positioning, content changes between frames indicating dialogue/narration, clear caption-style formatting
|
|
877
|
-
- Score 0.7-0.9: Strong evidence - captions visible across multiple frames with consistent placement, but minor ambiguity (e.g., some frames unclear, atypical styling)
|
|
878
|
-
- Score 0.4-0.6: Moderate evidence - text present in several frames but uncertain classification (e.g., could be captions or persistent on-screen graphics, ambiguous formatting)
|
|
879
|
-
- Score 0.1-0.3: Weak evidence - minimal text detected, appears in only a few frames, likely marketing copy or end-cards rather than captions
|
|
880
|
-
- Score 0.0: No captions - no text overlays detected, or text is clearly not captions (logos, watermarks, scene content, single end-card)
|
|
881
|
-
</confidence_scoring>
|
|
882
|
-
|
|
883
|
-
<context>
|
|
884
|
-
You receive storyboard images containing multiple sequential frames extracted from a video.
|
|
885
|
-
These frames are arranged in a grid and represent the visual progression of the content over time.
|
|
886
|
-
Read frames left-to-right, top-to-bottom to understand the temporal sequence.
|
|
887
|
-
</context>
|
|
888
|
-
|
|
889
|
-
<capabilities>
|
|
890
|
-
- Detect and analyze text overlays in video frames
|
|
891
|
-
- Distinguish between captions and other text elements (marketing, logos, UI)
|
|
892
|
-
- Identify language of detected caption text
|
|
893
|
-
- Assess confidence in caption detection
|
|
894
|
-
</capabilities>
|
|
895
|
-
|
|
896
|
-
<constraints>
|
|
897
|
-
- Only classify as burned-in captions when evidence is clear across multiple frames
|
|
898
|
-
- Base decisions on observable visual evidence
|
|
899
|
-
- Return structured data matching the requested schema
|
|
900
|
-
</constraints>`;
|
|
901
|
-
var burnedInCaptionsPromptBuilder = createPromptBuilder({
|
|
902
|
-
template: {
|
|
903
|
-
task: {
|
|
904
|
-
tag: "task",
|
|
905
|
-
content: dedent_default`
|
|
906
|
-
Analyze the provided video storyboard to detect burned-in captions (hardcoded subtitles).
|
|
907
|
-
Count frames with text vs no text, note position consistency and whether text changes across frames.
|
|
908
|
-
Decide if captions exist, with confidence (0.0-1.0) and detected language if any.`
|
|
909
|
-
},
|
|
910
|
-
analysisSteps: {
|
|
911
|
-
tag: "analysis_steps",
|
|
912
|
-
content: dedent_default`
|
|
913
|
-
1. COUNT how many frames contain text overlays vs. how many don't
|
|
914
|
-
2. Check if text appears in consistent positions across multiple frames
|
|
915
|
-
3. Verify text changes content between frames (indicating dialogue/narration)
|
|
916
|
-
4. Ensure text has caption-style formatting (contrasting colors, readable fonts)
|
|
917
|
-
5. If captions are detected, identify the language of the text`
|
|
918
|
-
},
|
|
919
|
-
positiveIndicators: {
|
|
920
|
-
tag: "classify_as_captions",
|
|
921
|
-
content: dedent_default`
|
|
922
|
-
ONLY classify as burned-in captions if:
|
|
923
|
-
- Text appears in multiple frames (not just 1-2 end frames)
|
|
924
|
-
- Text positioning is consistent across those frames
|
|
925
|
-
- Content suggests dialogue, narration, or subtitles (not marketing)
|
|
926
|
-
- Formatting looks like captions (not graphics/logos)`
|
|
927
|
-
},
|
|
928
|
-
negativeIndicators: {
|
|
929
|
-
tag: "not_captions",
|
|
930
|
-
content: dedent_default`
|
|
931
|
-
DO NOT classify as burned-in captions:
|
|
932
|
-
- Marketing taglines appearing only in final 1-2 frames
|
|
933
|
-
- Single words or phrases that don't change between frames
|
|
934
|
-
- Graphics, logos, watermarks, or UI elements
|
|
935
|
-
- Text that's part of the original scene content
|
|
936
|
-
- End-cards with calls-to-action or brand messaging`
|
|
937
|
-
}
|
|
938
|
-
},
|
|
939
|
-
sectionOrder: ["task", "analysisSteps", "positiveIndicators", "negativeIndicators"]
|
|
940
|
-
});
|
|
941
|
-
function buildUserPrompt(promptOverrides) {
|
|
942
|
-
return burnedInCaptionsPromptBuilder.build(promptOverrides);
|
|
943
|
-
}
|
|
944
|
-
var DEFAULT_PROVIDER = "openai";
|
|
945
|
-
async function hasBurnedInCaptions(assetId, options = {}) {
|
|
946
|
-
const {
|
|
947
|
-
provider = DEFAULT_PROVIDER,
|
|
948
|
-
model,
|
|
949
|
-
imageSubmissionMode = "url",
|
|
950
|
-
imageDownloadOptions,
|
|
951
|
-
promptOverrides,
|
|
952
|
-
...config2
|
|
953
|
-
} = options;
|
|
954
|
-
const userPrompt = buildUserPrompt(promptOverrides);
|
|
955
|
-
const clients = createWorkflowClients(
|
|
956
|
-
{ ...config2, model },
|
|
957
|
-
provider
|
|
958
|
-
);
|
|
959
|
-
const { playbackId, policy } = await getPlaybackIdForAsset(clients.mux, assetId);
|
|
960
|
-
const signingContext = resolveSigningContext(options);
|
|
961
|
-
if (policy === "signed" && !signingContext) {
|
|
962
|
-
throw new Error(
|
|
963
|
-
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
964
|
-
);
|
|
965
|
-
}
|
|
966
|
-
const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed" ? signingContext : void 0);
|
|
967
|
-
const analyzeStoryboard = async (imageDataUrl) => {
|
|
968
|
-
const response = await generateObject({
|
|
969
|
-
model: clients.languageModel.model,
|
|
970
|
-
schema: burnedInCaptionsSchema,
|
|
971
|
-
abortSignal: options.abortSignal,
|
|
972
|
-
experimental_telemetry: { isEnabled: true },
|
|
973
|
-
messages: [
|
|
974
|
-
{
|
|
975
|
-
role: "system",
|
|
976
|
-
content: SYSTEM_PROMPT
|
|
977
|
-
},
|
|
978
|
-
{
|
|
979
|
-
role: "user",
|
|
980
|
-
content: [
|
|
981
|
-
{ type: "text", text: userPrompt },
|
|
982
|
-
{ type: "image", image: imageDataUrl }
|
|
983
|
-
]
|
|
984
|
-
}
|
|
985
|
-
]
|
|
986
|
-
});
|
|
987
|
-
return {
|
|
988
|
-
result: response.object,
|
|
989
|
-
usage: {
|
|
990
|
-
inputTokens: response.usage.inputTokens,
|
|
991
|
-
outputTokens: response.usage.outputTokens,
|
|
992
|
-
totalTokens: response.usage.totalTokens,
|
|
993
|
-
reasoningTokens: response.usage.reasoningTokens,
|
|
994
|
-
cachedInputTokens: response.usage.cachedInputTokens
|
|
995
|
-
}
|
|
996
|
-
};
|
|
997
|
-
};
|
|
998
|
-
let analysisResponse;
|
|
999
|
-
if (imageSubmissionMode === "base64") {
|
|
1000
|
-
const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
|
|
1001
|
-
analysisResponse = await analyzeStoryboard(downloadResult.base64Data);
|
|
1002
|
-
} else {
|
|
1003
|
-
analysisResponse = await analyzeStoryboard(imageUrl);
|
|
1004
|
-
}
|
|
1005
|
-
if (!analysisResponse.result) {
|
|
1006
|
-
throw new Error("No analysis result received from AI provider");
|
|
1007
|
-
}
|
|
1008
|
-
return {
|
|
1009
|
-
assetId,
|
|
1010
|
-
hasBurnedInCaptions: analysisResponse.result.hasBurnedInCaptions ?? false,
|
|
1011
|
-
confidence: analysisResponse.result.confidence ?? 0,
|
|
1012
|
-
detectedLanguage: analysisResponse.result.detectedLanguage ?? null,
|
|
1013
|
-
storyboardUrl: imageUrl,
|
|
1014
|
-
usage: analysisResponse.usage
|
|
1015
|
-
};
|
|
1016
|
-
}
|
|
1017
|
-
|
|
1018
|
-
// src/workflows/chapters.ts
|
|
1019
|
-
import { generateObject as generateObject2 } from "ai";
|
|
1020
|
-
import { z as z3 } from "zod";
|
|
1021
|
-
|
|
1022
|
-
// src/lib/retry.ts
|
|
1023
|
-
var DEFAULT_RETRY_OPTIONS = {
|
|
1024
|
-
maxRetries: 3,
|
|
1025
|
-
baseDelay: 2e3,
|
|
1026
|
-
maxDelay: 1e4
|
|
1027
|
-
};
|
|
1028
|
-
function defaultShouldRetry(error, _attempt) {
|
|
1029
|
-
return Boolean(error.message && error.message.includes("Timeout while downloading"));
|
|
1030
|
-
}
|
|
1031
|
-
function calculateDelay(attempt, baseDelay, maxDelay) {
|
|
1032
|
-
const exponentialDelay = baseDelay * 2 ** (attempt - 1);
|
|
1033
|
-
const delayWithJitter = exponentialDelay * (0.5 + Math.random() * 0.5);
|
|
1034
|
-
return Math.min(delayWithJitter, maxDelay);
|
|
1035
|
-
}
|
|
1036
|
-
async function withRetry(fn, {
|
|
1037
|
-
maxRetries = DEFAULT_RETRY_OPTIONS.maxRetries,
|
|
1038
|
-
baseDelay = DEFAULT_RETRY_OPTIONS.baseDelay,
|
|
1039
|
-
maxDelay = DEFAULT_RETRY_OPTIONS.maxDelay,
|
|
1040
|
-
shouldRetry = defaultShouldRetry
|
|
1041
|
-
} = {}) {
|
|
1042
|
-
let lastError;
|
|
1043
|
-
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
1044
|
-
try {
|
|
1045
|
-
return await fn();
|
|
1046
|
-
} catch (error) {
|
|
1047
|
-
lastError = error instanceof Error ? error : new Error(String(error));
|
|
1048
|
-
const isLastAttempt = attempt === maxRetries;
|
|
1049
|
-
if (isLastAttempt || !shouldRetry(lastError, attempt + 1)) {
|
|
1050
|
-
throw lastError;
|
|
1051
|
-
}
|
|
1052
|
-
const delay2 = calculateDelay(attempt + 1, baseDelay, maxDelay);
|
|
1053
|
-
console.warn(
|
|
1054
|
-
`Attempt ${attempt + 1} failed: ${lastError.message}. Retrying in ${Math.round(delay2)}ms...`
|
|
1055
|
-
);
|
|
1056
|
-
await new Promise((resolve) => setTimeout(resolve, delay2));
|
|
1057
|
-
}
|
|
1058
|
-
}
|
|
1059
|
-
throw lastError || new Error("Retry failed with unknown error");
|
|
1060
|
-
}
|
|
1061
|
-
|
|
1062
|
-
// src/workflows/chapters.ts
|
|
1063
|
-
var chapterSchema = z3.object({
|
|
1064
|
-
startTime: z3.number(),
|
|
1065
|
-
title: z3.string()
|
|
1066
|
-
});
|
|
1067
|
-
var chaptersSchema = z3.object({
|
|
1068
|
-
chapters: z3.array(chapterSchema)
|
|
1069
|
-
});
|
|
1070
|
-
var DEFAULT_PROVIDER2 = "openai";
|
|
1071
|
-
var SYSTEM_PROMPT2 = `Your role is to segment the following captions into chunked chapters, summarising each chapter with a title.
|
|
1072
|
-
|
|
1073
|
-
Analyze the transcript and create logical chapter breaks based on topic changes, major transitions, or distinct sections of content. Each chapter should represent a meaningful segment of the video.
|
|
1074
|
-
|
|
1075
|
-
You must respond with valid JSON in exactly this format:
|
|
1076
|
-
{
|
|
1077
|
-
"chapters": [
|
|
1078
|
-
{"startTime": 0, "title": "Introduction"},
|
|
1079
|
-
{"startTime": 45.5, "title": "Main Topic Discussion"},
|
|
1080
|
-
{"startTime": 120.0, "title": "Conclusion"}
|
|
1081
|
-
]
|
|
1082
|
-
}
|
|
1083
|
-
|
|
1084
|
-
Important rules:
|
|
1085
|
-
- startTime must be in seconds (not HH:MM:SS format)
|
|
1086
|
-
- Always start with startTime: 0 for the first chapter
|
|
1087
|
-
- Create 3-8 chapters depending on content length and natural breaks
|
|
1088
|
-
- Chapter titles should be concise and descriptive
|
|
1089
|
-
- Do not include any text before or after the JSON
|
|
1090
|
-
- The JSON must be valid and parseable`;
|
|
1091
|
-
async function generateChapters(assetId, languageCode, options = {}) {
|
|
1092
|
-
const { provider = DEFAULT_PROVIDER2, model, abortSignal } = options;
|
|
1093
|
-
const clients = createWorkflowClients({ ...options, model }, provider);
|
|
1094
|
-
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(clients.mux, assetId);
|
|
1095
|
-
const signingContext = resolveSigningContext(options);
|
|
1096
|
-
if (policy === "signed" && !signingContext) {
|
|
1097
|
-
throw new Error(
|
|
1098
|
-
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
1099
|
-
);
|
|
1100
|
-
}
|
|
1101
|
-
const transcriptResult = await fetchTranscriptForAsset(assetData, playbackId, {
|
|
1102
|
-
languageCode,
|
|
1103
|
-
cleanTranscript: false,
|
|
1104
|
-
// keep timestamps for chapter segmentation
|
|
1105
|
-
signingContext: policy === "signed" ? signingContext : void 0
|
|
1106
|
-
});
|
|
1107
|
-
if (!transcriptResult.track || !transcriptResult.transcriptText) {
|
|
1108
|
-
const availableLanguages = getReadyTextTracks(assetData).map((t) => t.language_code).filter(Boolean).join(", ");
|
|
1109
|
-
throw new Error(
|
|
1110
|
-
`No caption track found for language '${languageCode}'. Available languages: ${availableLanguages || "none"}`
|
|
1111
|
-
);
|
|
1112
|
-
}
|
|
1113
|
-
const timestampedTranscript = extractTimestampedTranscript(transcriptResult.transcriptText);
|
|
1114
|
-
if (!timestampedTranscript) {
|
|
1115
|
-
throw new Error("No usable content found in caption track");
|
|
1116
|
-
}
|
|
1117
|
-
let chaptersData = null;
|
|
1118
|
-
try {
|
|
1119
|
-
const response = await withRetry(
|
|
1120
|
-
() => generateObject2({
|
|
1121
|
-
model: clients.languageModel.model,
|
|
1122
|
-
schema: chaptersSchema,
|
|
1123
|
-
abortSignal,
|
|
1124
|
-
messages: [
|
|
1125
|
-
{
|
|
1126
|
-
role: "system",
|
|
1127
|
-
content: SYSTEM_PROMPT2
|
|
1128
|
-
},
|
|
1129
|
-
{
|
|
1130
|
-
role: "user",
|
|
1131
|
-
content: timestampedTranscript
|
|
1132
|
-
}
|
|
1133
|
-
]
|
|
1134
|
-
})
|
|
1135
|
-
);
|
|
1136
|
-
chaptersData = response.object;
|
|
1137
|
-
} catch (error) {
|
|
1138
|
-
throw new Error(
|
|
1139
|
-
`Failed to generate chapters with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
1140
|
-
);
|
|
1141
|
-
}
|
|
1142
|
-
if (!chaptersData || !chaptersData.chapters) {
|
|
1143
|
-
throw new Error("No chapters generated from AI response");
|
|
1144
|
-
}
|
|
1145
|
-
const validChapters = chaptersData.chapters.filter((chapter) => typeof chapter.startTime === "number" && typeof chapter.title === "string").sort((a, b) => a.startTime - b.startTime);
|
|
1146
|
-
if (validChapters.length === 0) {
|
|
1147
|
-
throw new Error("No valid chapters found in AI response");
|
|
1148
|
-
}
|
|
1149
|
-
if (validChapters[0].startTime !== 0) {
|
|
1150
|
-
validChapters[0].startTime = 0;
|
|
1151
|
-
}
|
|
1152
|
-
return {
|
|
1153
|
-
assetId,
|
|
1154
|
-
languageCode,
|
|
1155
|
-
chapters: validChapters
|
|
1156
|
-
};
|
|
1157
|
-
}
|
|
1158
|
-
|
|
1159
|
-
// src/workflows/embeddings.ts
|
|
1160
|
-
import { embed } from "ai";
|
|
1161
|
-
var DEFAULT_PROVIDER3 = "openai";
|
|
1162
|
-
var DEFAULT_CHUNKING_STRATEGY = {
|
|
1163
|
-
type: "token",
|
|
1164
|
-
maxTokens: 500,
|
|
1165
|
-
overlap: 100
|
|
1166
|
-
};
|
|
1167
|
-
var DEFAULT_BATCH_SIZE = 5;
|
|
1168
|
-
function averageEmbeddings(embeddings) {
|
|
1169
|
-
if (embeddings.length === 0) {
|
|
1170
|
-
return [];
|
|
1171
|
-
}
|
|
1172
|
-
const dimensions = embeddings[0].length;
|
|
1173
|
-
const averaged = Array.from({ length: dimensions }, () => 0);
|
|
1174
|
-
for (const embedding of embeddings) {
|
|
1175
|
-
for (let i = 0; i < dimensions; i++) {
|
|
1176
|
-
averaged[i] += embedding[i];
|
|
1177
|
-
}
|
|
1178
|
-
}
|
|
1179
|
-
for (let i = 0; i < dimensions; i++) {
|
|
1180
|
-
averaged[i] /= embeddings.length;
|
|
1181
|
-
}
|
|
1182
|
-
return averaged;
|
|
1183
|
-
}
|
|
1184
|
-
async function generateChunkEmbeddings(chunks, model, batchSize, abortSignal) {
|
|
1185
|
-
const results = [];
|
|
1186
|
-
for (let i = 0; i < chunks.length; i += batchSize) {
|
|
1187
|
-
const batch = chunks.slice(i, i + batchSize);
|
|
1188
|
-
const batchResults = await Promise.all(
|
|
1189
|
-
batch.map(async (chunk) => {
|
|
1190
|
-
const response = await withRetry(
|
|
1191
|
-
() => embed({
|
|
1192
|
-
model,
|
|
1193
|
-
value: chunk.text,
|
|
1194
|
-
abortSignal
|
|
1195
|
-
})
|
|
1196
|
-
);
|
|
1197
|
-
return {
|
|
1198
|
-
chunkId: chunk.id,
|
|
1199
|
-
embedding: response.embedding,
|
|
1200
|
-
metadata: {
|
|
1201
|
-
startTime: chunk.startTime,
|
|
1202
|
-
endTime: chunk.endTime,
|
|
1203
|
-
tokenCount: chunk.tokenCount
|
|
1204
|
-
}
|
|
1205
|
-
};
|
|
1206
|
-
})
|
|
1207
|
-
);
|
|
1208
|
-
results.push(...batchResults);
|
|
1209
|
-
}
|
|
1210
|
-
return results;
|
|
1211
|
-
}
|
|
1212
|
-
async function generateVideoEmbeddings(assetId, options = {}) {
|
|
1213
|
-
const {
|
|
1214
|
-
provider = DEFAULT_PROVIDER3,
|
|
1215
|
-
model,
|
|
1216
|
-
languageCode,
|
|
1217
|
-
chunkingStrategy = DEFAULT_CHUNKING_STRATEGY,
|
|
1218
|
-
batchSize = DEFAULT_BATCH_SIZE,
|
|
1219
|
-
abortSignal
|
|
1220
|
-
} = options;
|
|
1221
|
-
const credentials = validateCredentials(options, provider === "google" ? "google" : "openai");
|
|
1222
|
-
const muxClient = createMuxClient(credentials);
|
|
1223
|
-
const embeddingModel = resolveEmbeddingModel({ ...options, provider, model });
|
|
1224
|
-
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(
|
|
1225
|
-
muxClient,
|
|
1226
|
-
assetId
|
|
1227
|
-
);
|
|
1228
|
-
const signingContext = resolveSigningContext(options);
|
|
1229
|
-
if (policy === "signed" && !signingContext) {
|
|
1230
|
-
throw new Error(
|
|
1231
|
-
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
1232
|
-
);
|
|
1233
|
-
}
|
|
1234
|
-
const useVttChunking = chunkingStrategy.type === "vtt";
|
|
1235
|
-
const transcriptResult = await fetchTranscriptForAsset(assetData, playbackId, {
|
|
1236
|
-
languageCode,
|
|
1237
|
-
cleanTranscript: !useVttChunking,
|
|
1238
|
-
signingContext: policy === "signed" ? signingContext : void 0
|
|
1239
|
-
});
|
|
1240
|
-
if (!transcriptResult.track || !transcriptResult.transcriptText) {
|
|
1241
|
-
const availableLanguages = getReadyTextTracks(assetData).map((t) => t.language_code).filter(Boolean).join(", ");
|
|
1242
|
-
throw new Error(
|
|
1243
|
-
`No caption track found${languageCode ? ` for language '${languageCode}'` : ""}. Available languages: ${availableLanguages || "none"}`
|
|
1244
|
-
);
|
|
1245
|
-
}
|
|
1246
|
-
const transcriptText = transcriptResult.transcriptText;
|
|
1247
|
-
if (!transcriptText.trim()) {
|
|
1248
|
-
throw new Error("Transcript is empty");
|
|
1249
|
-
}
|
|
1250
|
-
const chunks = useVttChunking ? chunkVTTCues(
|
|
1251
|
-
parseVTTCues(transcriptText),
|
|
1252
|
-
chunkingStrategy.maxTokens,
|
|
1253
|
-
chunkingStrategy.overlapCues
|
|
1254
|
-
) : chunkText(transcriptText, chunkingStrategy);
|
|
1255
|
-
if (chunks.length === 0) {
|
|
1256
|
-
throw new Error("No chunks generated from transcript");
|
|
1257
|
-
}
|
|
1258
|
-
let chunkEmbeddings;
|
|
1259
|
-
try {
|
|
1260
|
-
chunkEmbeddings = await generateChunkEmbeddings(
|
|
1261
|
-
chunks,
|
|
1262
|
-
embeddingModel.model,
|
|
1263
|
-
batchSize,
|
|
1264
|
-
abortSignal
|
|
1265
|
-
);
|
|
1266
|
-
} catch (error) {
|
|
1267
|
-
throw new Error(
|
|
1268
|
-
`Failed to generate embeddings with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
1269
|
-
);
|
|
1270
|
-
}
|
|
1271
|
-
if (chunkEmbeddings.length === 0) {
|
|
1272
|
-
throw new Error("No embeddings generated");
|
|
1273
|
-
}
|
|
1274
|
-
const averagedEmbedding = averageEmbeddings(chunkEmbeddings.map((ce) => ce.embedding));
|
|
1275
|
-
const totalTokens = chunks.reduce((sum, chunk) => sum + chunk.tokenCount, 0);
|
|
1276
|
-
return {
|
|
1277
|
-
assetId,
|
|
1278
|
-
chunks: chunkEmbeddings,
|
|
1279
|
-
averagedEmbedding,
|
|
1280
|
-
provider,
|
|
1281
|
-
model: embeddingModel.modelId,
|
|
1282
|
-
metadata: {
|
|
1283
|
-
totalChunks: chunks.length,
|
|
1284
|
-
totalTokens,
|
|
1285
|
-
chunkingStrategy: JSON.stringify(chunkingStrategy),
|
|
1286
|
-
embeddingDimensions: chunkEmbeddings[0].embedding.length,
|
|
1287
|
-
generatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
1288
|
-
}
|
|
1289
|
-
};
|
|
1290
|
-
}
|
|
1291
|
-
|
|
1292
|
-
// src/workflows/moderation.ts
|
|
1293
|
-
var DEFAULT_THRESHOLDS = {
|
|
1294
|
-
sexual: 0.7,
|
|
1295
|
-
violence: 0.8
|
|
1296
|
-
};
|
|
1297
|
-
var DEFAULT_PROVIDER4 = "openai";
|
|
1298
|
-
var HIVE_ENDPOINT = "https://api.thehive.ai/api/v2/task/sync";
|
|
1299
|
-
var HIVE_SEXUAL_CATEGORIES = [
|
|
1300
|
-
"general_nsfw",
|
|
1301
|
-
"general_suggestive",
|
|
1302
|
-
"yes_sexual_activity",
|
|
1303
|
-
"female_underwear",
|
|
1304
|
-
"male_underwear",
|
|
1305
|
-
"bra",
|
|
1306
|
-
"panties",
|
|
1307
|
-
"sex_toys",
|
|
1308
|
-
"nudity_female",
|
|
1309
|
-
"nudity_male",
|
|
1310
|
-
"cleavage",
|
|
1311
|
-
"swimwear"
|
|
1312
|
-
];
|
|
1313
|
-
var HIVE_VIOLENCE_CATEGORIES = [
|
|
1314
|
-
"gun_in_hand",
|
|
1315
|
-
"gun_not_in_hand",
|
|
1316
|
-
"animated_gun",
|
|
1317
|
-
"knife_in_hand",
|
|
1318
|
-
"knife_not_in_hand",
|
|
1319
|
-
"culinary_knife_not_in_hand",
|
|
1320
|
-
"culinary_knife_in_hand",
|
|
1321
|
-
"very_bloody",
|
|
1322
|
-
"a_little_bloody",
|
|
1323
|
-
"other_blood",
|
|
1324
|
-
"hanging",
|
|
1325
|
-
"noose",
|
|
1326
|
-
"human_corpse",
|
|
1327
|
-
"animated_corpse",
|
|
1328
|
-
"emaciated_body",
|
|
1329
|
-
"self_harm",
|
|
1330
|
-
"animal_abuse",
|
|
1331
|
-
"fights",
|
|
1332
|
-
"garm_death_injury_or_military_conflict"
|
|
1333
|
-
];
|
|
1334
|
-
async function processConcurrently(items, processor, maxConcurrent = 5) {
|
|
1335
|
-
const results = [];
|
|
1336
|
-
for (let i = 0; i < items.length; i += maxConcurrent) {
|
|
1337
|
-
const batch = items.slice(i, i + maxConcurrent);
|
|
1338
|
-
const batchPromises = batch.map(processor);
|
|
1339
|
-
const batchResults = await Promise.all(batchPromises);
|
|
1340
|
-
results.push(...batchResults);
|
|
1341
|
-
}
|
|
1342
|
-
return results;
|
|
1343
|
-
}
|
|
1344
|
-
async function requestOpenAIModeration(imageUrls, apiKey, model, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
|
|
1345
|
-
const targetUrls = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map(
|
|
1346
|
-
(img) => ({ url: img.url, image: img.base64Data })
|
|
1347
|
-
) : imageUrls.map((url) => ({ url, image: url }));
|
|
1348
|
-
const moderate = async (entry) => {
|
|
1349
|
-
try {
|
|
1350
|
-
const res = await fetch("https://api.openai.com/v1/moderations", {
|
|
1351
|
-
method: "POST",
|
|
1352
|
-
headers: {
|
|
1353
|
-
"Content-Type": "application/json",
|
|
1354
|
-
"Authorization": `Bearer ${apiKey}`
|
|
1355
|
-
},
|
|
1356
|
-
body: JSON.stringify({
|
|
1357
|
-
model,
|
|
1358
|
-
input: [
|
|
1359
|
-
{
|
|
1360
|
-
type: "image_url",
|
|
1361
|
-
image_url: {
|
|
1362
|
-
url: entry.image
|
|
1363
|
-
}
|
|
1364
|
-
}
|
|
1365
|
-
]
|
|
1366
|
-
})
|
|
1367
|
-
});
|
|
1368
|
-
const json = await res.json();
|
|
1369
|
-
if (!res.ok) {
|
|
1370
|
-
throw new Error(
|
|
1371
|
-
`OpenAI moderation error: ${res.status} ${res.statusText} - ${JSON.stringify(json)}`
|
|
1372
|
-
);
|
|
1373
|
-
}
|
|
1374
|
-
const categoryScores = json.results?.[0]?.category_scores || {};
|
|
1375
|
-
return {
|
|
1376
|
-
url: entry.url,
|
|
1377
|
-
sexual: categoryScores.sexual || 0,
|
|
1378
|
-
violence: categoryScores.violence || 0,
|
|
1379
|
-
error: false
|
|
1380
|
-
};
|
|
1381
|
-
} catch (error) {
|
|
1382
|
-
console.error("OpenAI moderation failed:", error);
|
|
1383
|
-
return {
|
|
1384
|
-
url: entry.url,
|
|
1385
|
-
sexual: 0,
|
|
1386
|
-
violence: 0,
|
|
1387
|
-
error: true
|
|
1388
|
-
};
|
|
1389
|
-
}
|
|
1390
|
-
};
|
|
1391
|
-
return processConcurrently(targetUrls, moderate, maxConcurrent);
|
|
1392
|
-
}
|
|
1393
|
-
function getHiveCategoryScores(classes, categoryNames) {
|
|
1394
|
-
const scoreMap = Object.fromEntries(
|
|
1395
|
-
classes.map((c) => [c.class, c.score])
|
|
1396
|
-
);
|
|
1397
|
-
const scores = categoryNames.map((category) => scoreMap[category] || 0);
|
|
1398
|
-
return Math.max(...scores, 0);
|
|
1399
|
-
}
|
|
1400
|
-
async function requestHiveModeration(imageUrls, apiKey, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
|
|
1401
|
-
const targets = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map((img) => ({
|
|
1402
|
-
url: img.url,
|
|
1403
|
-
source: {
|
|
1404
|
-
kind: "file",
|
|
1405
|
-
buffer: img.buffer,
|
|
1406
|
-
contentType: img.contentType
|
|
1407
|
-
}
|
|
1408
|
-
})) : imageUrls.map((url) => ({
|
|
1409
|
-
url,
|
|
1410
|
-
source: { kind: "url", value: url }
|
|
1411
|
-
}));
|
|
1412
|
-
const moderate = async (entry) => {
|
|
1413
|
-
try {
|
|
1414
|
-
const formData = new FormData();
|
|
1415
|
-
if (entry.source.kind === "url") {
|
|
1416
|
-
formData.append("url", entry.source.value);
|
|
1417
|
-
} else {
|
|
1418
|
-
const extension = entry.source.contentType.split("/")[1] || "jpg";
|
|
1419
|
-
const blob = new Blob([entry.source.buffer], {
|
|
1420
|
-
type: entry.source.contentType
|
|
1421
|
-
});
|
|
1422
|
-
formData.append("media", blob, `thumbnail.${extension}`);
|
|
1423
|
-
}
|
|
1424
|
-
const res = await fetch(HIVE_ENDPOINT, {
|
|
1425
|
-
method: "POST",
|
|
1426
|
-
headers: {
|
|
1427
|
-
Accept: "application/json",
|
|
1428
|
-
Authorization: `Token ${apiKey}`
|
|
1429
|
-
},
|
|
1430
|
-
body: formData
|
|
1431
|
-
});
|
|
1432
|
-
const json = await res.json().catch(() => void 0);
|
|
1433
|
-
if (!res.ok) {
|
|
1434
|
-
throw new Error(
|
|
1435
|
-
`Hive moderation error: ${res.status} ${res.statusText} - ${JSON.stringify(json)}`
|
|
1436
|
-
);
|
|
1437
|
-
}
|
|
1438
|
-
const classes = json?.status?.[0]?.response?.output?.[0]?.classes || [];
|
|
1439
|
-
return {
|
|
1440
|
-
url: entry.url,
|
|
1441
|
-
sexual: getHiveCategoryScores(classes, HIVE_SEXUAL_CATEGORIES),
|
|
1442
|
-
violence: getHiveCategoryScores(classes, HIVE_VIOLENCE_CATEGORIES),
|
|
1443
|
-
error: false
|
|
1444
|
-
};
|
|
1445
|
-
} catch (error) {
|
|
1446
|
-
console.error("Hive moderation failed:", error);
|
|
1447
|
-
return {
|
|
1448
|
-
url: entry.url,
|
|
1449
|
-
sexual: 0,
|
|
1450
|
-
violence: 0,
|
|
1451
|
-
error: true
|
|
1452
|
-
};
|
|
1453
|
-
}
|
|
1454
|
-
};
|
|
1455
|
-
return processConcurrently(targets, moderate, maxConcurrent);
|
|
1456
|
-
}
|
|
1457
|
-
async function getModerationScores(assetId, options = {}) {
|
|
1458
|
-
const {
|
|
1459
|
-
provider = DEFAULT_PROVIDER4,
|
|
1460
|
-
model = provider === "openai" ? "omni-moderation-latest" : void 0,
|
|
1461
|
-
thresholds = DEFAULT_THRESHOLDS,
|
|
1462
|
-
thumbnailInterval = 10,
|
|
1463
|
-
thumbnailWidth = 640,
|
|
1464
|
-
maxConcurrent = 5,
|
|
1465
|
-
imageSubmissionMode = "url",
|
|
1466
|
-
imageDownloadOptions
|
|
1467
|
-
} = options;
|
|
1468
|
-
const credentials = validateCredentials(options, provider === "openai" ? "openai" : void 0);
|
|
1469
|
-
const muxClient = createMuxClient(credentials);
|
|
1470
|
-
const { asset, playbackId, policy } = await getPlaybackIdForAsset(muxClient, assetId);
|
|
1471
|
-
const duration = asset.duration || 0;
|
|
1472
|
-
const signingContext = resolveSigningContext(options);
|
|
1473
|
-
if (policy === "signed" && !signingContext) {
|
|
1474
|
-
throw new Error(
|
|
1475
|
-
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
1476
|
-
);
|
|
1477
|
-
}
|
|
1478
|
-
const thumbnailUrls = await getThumbnailUrls(playbackId, duration, {
|
|
1479
|
-
interval: thumbnailInterval,
|
|
1480
|
-
width: thumbnailWidth,
|
|
1481
|
-
signingContext: policy === "signed" ? signingContext : void 0
|
|
1482
|
-
});
|
|
1483
|
-
let thumbnailScores;
|
|
1484
|
-
if (provider === "openai") {
|
|
1485
|
-
const apiKey = credentials.openaiApiKey;
|
|
1486
|
-
if (!apiKey) {
|
|
1487
|
-
throw new Error("OpenAI API key is required for moderation. Set OPENAI_API_KEY or pass openaiApiKey.");
|
|
1488
|
-
}
|
|
1489
|
-
thumbnailScores = await requestOpenAIModeration(
|
|
1490
|
-
thumbnailUrls,
|
|
1491
|
-
apiKey,
|
|
1492
|
-
model || "omni-moderation-latest",
|
|
1493
|
-
maxConcurrent,
|
|
1494
|
-
imageSubmissionMode,
|
|
1495
|
-
imageDownloadOptions
|
|
1496
|
-
);
|
|
1497
|
-
} else if (provider === "hive") {
|
|
1498
|
-
const hiveApiKey = options.hiveApiKey || env_default.HIVE_API_KEY;
|
|
1499
|
-
if (!hiveApiKey) {
|
|
1500
|
-
throw new Error("Hive API key is required for moderation. Set HIVE_API_KEY or pass hiveApiKey.");
|
|
1501
|
-
}
|
|
1502
|
-
thumbnailScores = await requestHiveModeration(
|
|
1503
|
-
thumbnailUrls,
|
|
1504
|
-
hiveApiKey,
|
|
1505
|
-
maxConcurrent,
|
|
1506
|
-
imageSubmissionMode,
|
|
1507
|
-
imageDownloadOptions
|
|
1508
|
-
);
|
|
1509
|
-
} else {
|
|
1510
|
-
throw new Error(`Unsupported moderation provider: ${provider}`);
|
|
1511
|
-
}
|
|
1512
|
-
const maxSexual = Math.max(...thumbnailScores.map((s) => s.sexual));
|
|
1513
|
-
const maxViolence = Math.max(...thumbnailScores.map((s) => s.violence));
|
|
1514
|
-
const finalThresholds = { ...DEFAULT_THRESHOLDS, ...thresholds };
|
|
1515
|
-
return {
|
|
1516
|
-
assetId,
|
|
1517
|
-
thumbnailScores,
|
|
1518
|
-
maxScores: {
|
|
1519
|
-
sexual: maxSexual,
|
|
1520
|
-
violence: maxViolence
|
|
1521
|
-
},
|
|
1522
|
-
exceedsThreshold: maxSexual > finalThresholds.sexual || maxViolence > finalThresholds.violence,
|
|
1523
|
-
thresholds: finalThresholds
|
|
1524
|
-
};
|
|
1525
|
-
}
|
|
1526
|
-
|
|
1527
|
-
// src/workflows/summarization.ts
|
|
1528
|
-
import { generateObject as generateObject3 } from "ai";
|
|
1529
|
-
import { z as z4 } from "zod";
|
|
1530
|
-
var SUMMARY_KEYWORD_LIMIT = 10;
|
|
1531
|
-
var summarySchema = z4.object({
|
|
1532
|
-
keywords: z4.array(z4.string()),
|
|
1533
|
-
title: z4.string(),
|
|
1534
|
-
description: z4.string()
|
|
1535
|
-
});
|
|
1536
|
-
var TONE_INSTRUCTIONS = {
|
|
1537
|
-
normal: "Provide a clear, straightforward analysis.",
|
|
1538
|
-
sassy: "Answer with a sassy, playful attitude and personality.",
|
|
1539
|
-
professional: "Provide a professional, executive-level analysis suitable for business reporting."
|
|
1540
|
-
};
|
|
1541
|
-
var summarizationPromptBuilder = createPromptBuilder({
|
|
1542
|
-
template: {
|
|
1543
|
-
task: {
|
|
1544
|
-
tag: "task",
|
|
1545
|
-
content: "Analyze the storyboard frames and generate metadata that captures the essence of the video content."
|
|
1546
|
-
},
|
|
1547
|
-
title: {
|
|
1548
|
-
tag: "title_requirements",
|
|
1549
|
-
content: dedent_default`
|
|
1550
|
-
A short, compelling headline that immediately communicates the subject or action.
|
|
1551
|
-
Aim for brevity - typically under 10 words. Think of how a news headline or video card title would read.
|
|
1552
|
-
Start with the primary subject, action, or topic - never begin with "A video of" or similar phrasing.
|
|
1553
|
-
Use active, specific language.`
|
|
1554
|
-
},
|
|
1555
|
-
description: {
|
|
1556
|
-
tag: "description_requirements",
|
|
1557
|
-
content: dedent_default`
|
|
1558
|
-
A concise summary (2-4 sentences) that describes what happens across the video.
|
|
1559
|
-
Cover the main subjects, actions, setting, and any notable progression visible across frames.
|
|
1560
|
-
Write in present tense. Be specific about observable details rather than making assumptions.
|
|
1561
|
-
If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`
|
|
1562
|
-
},
|
|
1563
|
-
keywords: {
|
|
1564
|
-
tag: "keywords_requirements",
|
|
1565
|
-
content: dedent_default`
|
|
1566
|
-
Specific, searchable terms (up to 10) that capture:
|
|
1567
|
-
- Primary subjects (people, animals, objects)
|
|
1568
|
-
- Actions and activities being performed
|
|
1569
|
-
- Setting and environment
|
|
1570
|
-
- Notable objects or tools
|
|
1571
|
-
- Style or genre (if applicable)
|
|
1572
|
-
Prefer concrete nouns and action verbs over abstract concepts.
|
|
1573
|
-
Use lowercase. Avoid redundant or overly generic terms like "video" or "content".`
|
|
1574
|
-
},
|
|
1575
|
-
qualityGuidelines: {
|
|
1576
|
-
tag: "quality_guidelines",
|
|
1577
|
-
content: dedent_default`
|
|
1578
|
-
- Examine all frames to understand the full context and progression
|
|
1579
|
-
- Be precise: "golden retriever" is better than "dog" when identifiable
|
|
1580
|
-
- Capture the narrative: what begins, develops, and concludes
|
|
1581
|
-
- Balance brevity with informativeness`
|
|
1582
|
-
}
|
|
1583
|
-
},
|
|
1584
|
-
sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
|
|
1585
|
-
});
|
|
1586
|
-
var SYSTEM_PROMPT3 = dedent_default`
|
|
1587
|
-
<role>
|
|
1588
|
-
You are a video content analyst specializing in storyboard interpretation and multimodal analysis.
|
|
1589
|
-
</role>
|
|
1590
|
-
|
|
1591
|
-
<context>
|
|
1592
|
-
You receive storyboard images containing multiple sequential frames extracted from a video.
|
|
1593
|
-
These frames are arranged in a grid and represent the visual progression of the content over time.
|
|
1594
|
-
Read frames left-to-right, top-to-bottom to understand the temporal sequence.
|
|
1595
|
-
</context>
|
|
1596
|
-
|
|
1597
|
-
<transcript_guidance>
|
|
1598
|
-
When a transcript is provided alongside the storyboard:
|
|
1599
|
-
- Use it to understand spoken content, dialogue, narration, and audio context
|
|
1600
|
-
- Correlate transcript content with visual frames to build a complete picture
|
|
1601
|
-
- Extract key terminology, names, and specific language used by speakers
|
|
1602
|
-
- Let the transcript inform keyword selection, especially for topics not visually obvious
|
|
1603
|
-
- Prioritize visual content for the description, but enrich it with transcript insights
|
|
1604
|
-
- If transcript and visuals conflict, trust the visual evidence
|
|
1605
|
-
</transcript_guidance>
|
|
1606
|
-
|
|
1607
|
-
<capabilities>
|
|
1608
|
-
- Extract meaning from visual sequences
|
|
1609
|
-
- Identify subjects, actions, settings, and narrative arcs
|
|
1610
|
-
- Generate accurate, searchable metadata
|
|
1611
|
-
- Synthesize visual and transcript information when provided
|
|
1612
|
-
</capabilities>
|
|
1613
|
-
|
|
1614
|
-
<constraints>
|
|
1615
|
-
- Only describe what is clearly observable in the frames or explicitly stated in the transcript
|
|
1616
|
-
- Do not fabricate details or make unsupported assumptions
|
|
1617
|
-
- Return structured data matching the requested schema
|
|
1618
|
-
</constraints>`;
|
|
1619
|
-
function buildUserPrompt2({
|
|
1620
|
-
tone,
|
|
1621
|
-
transcriptText,
|
|
1622
|
-
isCleanTranscript = true,
|
|
1623
|
-
promptOverrides
|
|
1624
|
-
}) {
|
|
1625
|
-
const contextSections = [createToneSection(TONE_INSTRUCTIONS[tone])];
|
|
1626
|
-
if (transcriptText) {
|
|
1627
|
-
const format = isCleanTranscript ? "plain text" : "WebVTT";
|
|
1628
|
-
contextSections.push(createTranscriptSection(transcriptText, format));
|
|
1629
|
-
}
|
|
1630
|
-
return summarizationPromptBuilder.buildWithContext(promptOverrides, contextSections);
|
|
1631
|
-
}
|
|
1632
|
-
var DEFAULT_PROVIDER5 = "openai";
|
|
1633
|
-
var DEFAULT_TONE = "normal";
|
|
1634
|
-
function normalizeKeywords(keywords) {
|
|
1635
|
-
if (!Array.isArray(keywords) || keywords.length === 0) {
|
|
1636
|
-
return [];
|
|
1637
|
-
}
|
|
1638
|
-
const uniqueLowercase = /* @__PURE__ */ new Set();
|
|
1639
|
-
const normalized = [];
|
|
1640
|
-
for (const keyword of keywords) {
|
|
1641
|
-
const trimmed = keyword?.trim();
|
|
1642
|
-
if (!trimmed) {
|
|
1643
|
-
continue;
|
|
1644
|
-
}
|
|
1645
|
-
const lower = trimmed.toLowerCase();
|
|
1646
|
-
if (uniqueLowercase.has(lower)) {
|
|
1647
|
-
continue;
|
|
1648
|
-
}
|
|
1649
|
-
uniqueLowercase.add(lower);
|
|
1650
|
-
normalized.push(trimmed);
|
|
1651
|
-
if (normalized.length === SUMMARY_KEYWORD_LIMIT) {
|
|
1652
|
-
break;
|
|
1653
|
-
}
|
|
1654
|
-
}
|
|
1655
|
-
return normalized;
|
|
1656
|
-
}
|
|
1657
|
-
async function getSummaryAndTags(assetId, options) {
|
|
1658
|
-
const {
|
|
1659
|
-
provider = DEFAULT_PROVIDER5,
|
|
1660
|
-
model,
|
|
1661
|
-
tone = DEFAULT_TONE,
|
|
1662
|
-
includeTranscript = true,
|
|
1663
|
-
cleanTranscript = true,
|
|
1664
|
-
imageSubmissionMode = "url",
|
|
1665
|
-
imageDownloadOptions,
|
|
1666
|
-
abortSignal,
|
|
1667
|
-
promptOverrides
|
|
1668
|
-
} = options ?? {};
|
|
1669
|
-
const clients = createWorkflowClients(
|
|
1670
|
-
{ ...options, model },
|
|
1671
|
-
provider
|
|
1672
|
-
);
|
|
1673
|
-
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(clients.mux, assetId);
|
|
1674
|
-
const signingContext = resolveSigningContext(options ?? {});
|
|
1675
|
-
if (policy === "signed" && !signingContext) {
|
|
1676
|
-
throw new Error(
|
|
1677
|
-
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
1678
|
-
);
|
|
1679
|
-
}
|
|
1680
|
-
const transcriptText = includeTranscript ? (await fetchTranscriptForAsset(assetData, playbackId, {
|
|
1681
|
-
cleanTranscript,
|
|
1682
|
-
signingContext: policy === "signed" ? signingContext : void 0
|
|
1683
|
-
})).transcriptText : "";
|
|
1684
|
-
const userPrompt = buildUserPrompt2({
|
|
1685
|
-
tone,
|
|
1686
|
-
transcriptText,
|
|
1687
|
-
isCleanTranscript: cleanTranscript,
|
|
1688
|
-
promptOverrides
|
|
1689
|
-
});
|
|
1690
|
-
const imageUrl = await getStoryboardUrl(playbackId, 640, policy === "signed" ? signingContext : void 0);
|
|
1691
|
-
const analyzeStoryboard = async (imageDataUrl) => {
|
|
1692
|
-
const response = await generateObject3({
|
|
1693
|
-
model: clients.languageModel.model,
|
|
1694
|
-
schema: summarySchema,
|
|
1695
|
-
abortSignal,
|
|
1696
|
-
messages: [
|
|
1697
|
-
{
|
|
1698
|
-
role: "system",
|
|
1699
|
-
content: SYSTEM_PROMPT3
|
|
1700
|
-
},
|
|
1701
|
-
{
|
|
1702
|
-
role: "user",
|
|
1703
|
-
content: [
|
|
1704
|
-
{ type: "text", text: userPrompt },
|
|
1705
|
-
{ type: "image", image: imageDataUrl }
|
|
1706
|
-
]
|
|
1707
|
-
}
|
|
1708
|
-
]
|
|
1709
|
-
});
|
|
1710
|
-
return response.object;
|
|
1711
|
-
};
|
|
1712
|
-
let aiAnalysis = null;
|
|
1713
|
-
try {
|
|
1714
|
-
if (imageSubmissionMode === "base64") {
|
|
1715
|
-
const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
|
|
1716
|
-
aiAnalysis = await analyzeStoryboard(downloadResult.base64Data);
|
|
1717
|
-
} else {
|
|
1718
|
-
aiAnalysis = await withRetry(() => analyzeStoryboard(imageUrl));
|
|
1719
|
-
}
|
|
1720
|
-
} catch (error) {
|
|
1721
|
-
throw new Error(
|
|
1722
|
-
`Failed to analyze video content with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
1723
|
-
);
|
|
1724
|
-
}
|
|
1725
|
-
if (!aiAnalysis) {
|
|
1726
|
-
throw new Error(`Failed to analyze video content for asset ${assetId}`);
|
|
1727
|
-
}
|
|
1728
|
-
if (!aiAnalysis.title) {
|
|
1729
|
-
throw new Error(`Failed to generate title for asset ${assetId}`);
|
|
1730
|
-
}
|
|
1731
|
-
if (!aiAnalysis.description) {
|
|
1732
|
-
throw new Error(`Failed to generate description for asset ${assetId}`);
|
|
1733
|
-
}
|
|
1734
|
-
return {
|
|
1735
|
-
assetId,
|
|
1736
|
-
title: aiAnalysis.title,
|
|
1737
|
-
description: aiAnalysis.description,
|
|
1738
|
-
tags: normalizeKeywords(aiAnalysis.keywords),
|
|
1739
|
-
storyboardUrl: imageUrl
|
|
1740
|
-
};
|
|
1741
|
-
}
|
|
1742
|
-
|
|
1743
|
-
// src/workflows/translate-audio.ts
|
|
1744
|
-
import { GetObjectCommand, S3Client } from "@aws-sdk/client-s3";
|
|
1745
|
-
import { Upload } from "@aws-sdk/lib-storage";
|
|
1746
|
-
import { getSignedUrl } from "@aws-sdk/s3-request-presigner";
|
|
1747
|
-
import Mux3 from "@mux/mux-node";
|
|
1748
|
-
var STATIC_RENDITION_POLL_INTERVAL_MS = 5e3;
|
|
1749
|
-
var STATIC_RENDITION_MAX_ATTEMPTS = 36;
|
|
1750
|
-
var delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
|
|
1751
|
-
function getReadyAudioStaticRendition(asset) {
|
|
1752
|
-
const files = asset.static_renditions?.files;
|
|
1753
|
-
if (!files || files.length === 0) {
|
|
1754
|
-
return void 0;
|
|
1755
|
-
}
|
|
1756
|
-
return files.find(
|
|
1757
|
-
(rendition) => rendition.name === "audio.m4a" && rendition.status === "ready"
|
|
1758
|
-
);
|
|
1759
|
-
}
|
|
1760
|
-
var hasReadyAudioStaticRendition = (asset) => Boolean(getReadyAudioStaticRendition(asset));
|
|
1761
|
-
async function requestStaticRenditionCreation(muxClient, assetId) {
|
|
1762
|
-
console.log("\u{1F4FC} Requesting static rendition from Mux...");
|
|
1763
|
-
try {
|
|
1764
|
-
await muxClient.video.assets.createStaticRendition(assetId, {
|
|
1765
|
-
resolution: "audio-only"
|
|
1766
|
-
});
|
|
1767
|
-
console.log("\u{1F4FC} Static rendition request accepted by Mux.");
|
|
1768
|
-
} catch (error) {
|
|
1769
|
-
const statusCode = error?.status ?? error?.statusCode;
|
|
1770
|
-
const messages = error?.error?.messages;
|
|
1771
|
-
const alreadyDefined = messages?.some((message2) => message2.toLowerCase().includes("already defined")) ?? error?.message?.toLowerCase().includes("already defined");
|
|
1772
|
-
if (statusCode === 409 || alreadyDefined) {
|
|
1773
|
-
console.log("\u2139\uFE0F Static rendition already requested. Waiting for it to finish...");
|
|
1774
|
-
return;
|
|
1775
|
-
}
|
|
1776
|
-
const message = error instanceof Error ? error.message : "Unknown error";
|
|
1777
|
-
throw new Error(`Failed to request static rendition from Mux: ${message}`);
|
|
1778
|
-
}
|
|
1779
|
-
}
|
|
1780
|
-
async function waitForAudioStaticRendition({
|
|
1781
|
-
assetId,
|
|
1782
|
-
muxClient,
|
|
1783
|
-
initialAsset
|
|
1784
|
-
}) {
|
|
1785
|
-
let currentAsset = initialAsset;
|
|
1786
|
-
if (hasReadyAudioStaticRendition(currentAsset)) {
|
|
1787
|
-
return currentAsset;
|
|
1788
|
-
}
|
|
1789
|
-
const status = currentAsset.static_renditions?.status ?? "not_requested";
|
|
1790
|
-
if (status === "not_requested" || status === void 0) {
|
|
1791
|
-
await requestStaticRenditionCreation(muxClient, assetId);
|
|
1792
|
-
} else if (status === "errored") {
|
|
1793
|
-
console.log("\u26A0\uFE0F Previous static rendition request errored. Creating a new one...");
|
|
1794
|
-
await requestStaticRenditionCreation(muxClient, assetId);
|
|
1795
|
-
} else {
|
|
1796
|
-
console.log(`\u2139\uFE0F Static rendition already ${status}. Waiting for it to finish...`);
|
|
1797
|
-
}
|
|
1798
|
-
for (let attempt = 1; attempt <= STATIC_RENDITION_MAX_ATTEMPTS; attempt++) {
|
|
1799
|
-
await delay(STATIC_RENDITION_POLL_INTERVAL_MS);
|
|
1800
|
-
currentAsset = await muxClient.video.assets.retrieve(assetId);
|
|
1801
|
-
if (hasReadyAudioStaticRendition(currentAsset)) {
|
|
1802
|
-
console.log("\u2705 Audio static rendition is ready!");
|
|
1803
|
-
return currentAsset;
|
|
1804
|
-
}
|
|
1805
|
-
const currentStatus = currentAsset.static_renditions?.status || "unknown";
|
|
1806
|
-
console.log(
|
|
1807
|
-
`\u231B Waiting for static rendition (attempt ${attempt}/${STATIC_RENDITION_MAX_ATTEMPTS}) \u2192 ${currentStatus}`
|
|
1808
|
-
);
|
|
1809
|
-
if (currentStatus === "errored") {
|
|
1810
|
-
throw new Error(
|
|
1811
|
-
"Mux failed to create the static rendition for this asset. Please check the asset in the Mux dashboard."
|
|
1812
|
-
);
|
|
1813
|
-
}
|
|
1814
|
-
}
|
|
1815
|
-
throw new Error(
|
|
1816
|
-
"Timed out waiting for the static rendition to become ready. Please try again in a moment."
|
|
1817
|
-
);
|
|
1818
|
-
}
|
|
1819
|
-
async function translateAudio(assetId, toLanguageCode, options = {}) {
|
|
1820
|
-
const {
|
|
1821
|
-
provider = "elevenlabs",
|
|
1822
|
-
numSpeakers = 0,
|
|
1823
|
-
// 0 = auto-detect
|
|
1824
|
-
muxTokenId,
|
|
1825
|
-
muxTokenSecret,
|
|
1826
|
-
elevenLabsApiKey,
|
|
1827
|
-
uploadToMux = true
|
|
1828
|
-
} = options;
|
|
1829
|
-
if (provider !== "elevenlabs") {
|
|
1830
|
-
throw new Error("Only ElevenLabs provider is currently supported for audio translation");
|
|
1831
|
-
}
|
|
1832
|
-
const muxId = muxTokenId ?? env_default.MUX_TOKEN_ID;
|
|
1833
|
-
const muxSecret = muxTokenSecret ?? env_default.MUX_TOKEN_SECRET;
|
|
1834
|
-
const elevenLabsKey = elevenLabsApiKey ?? env_default.ELEVENLABS_API_KEY;
|
|
1835
|
-
const s3Endpoint = options.s3Endpoint ?? env_default.S3_ENDPOINT;
|
|
1836
|
-
const s3Region = options.s3Region ?? env_default.S3_REGION ?? "auto";
|
|
1837
|
-
const s3Bucket = options.s3Bucket ?? env_default.S3_BUCKET;
|
|
1838
|
-
const s3AccessKeyId = options.s3AccessKeyId ?? env_default.S3_ACCESS_KEY_ID;
|
|
1839
|
-
const s3SecretAccessKey = options.s3SecretAccessKey ?? env_default.S3_SECRET_ACCESS_KEY;
|
|
1840
|
-
if (!muxId || !muxSecret) {
|
|
1841
|
-
throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
|
|
1842
|
-
}
|
|
1843
|
-
if (!elevenLabsKey) {
|
|
1844
|
-
throw new Error("ElevenLabs API key is required. Provide elevenLabsApiKey in options or set ELEVENLABS_API_KEY environment variable.");
|
|
1845
|
-
}
|
|
1846
|
-
if (uploadToMux && (!s3Endpoint || !s3Bucket || !s3AccessKeyId || !s3SecretAccessKey)) {
|
|
1847
|
-
throw new Error("S3 configuration is required for uploading to Mux. Provide s3Endpoint, s3Bucket, s3AccessKeyId, and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables.");
|
|
1848
|
-
}
|
|
1849
|
-
const mux = new Mux3({
|
|
1850
|
-
tokenId: muxId,
|
|
1851
|
-
tokenSecret: muxSecret
|
|
1852
|
-
});
|
|
1853
|
-
console.log(`\u{1F3AC} Fetching Mux asset: ${assetId}`);
|
|
1854
|
-
const { asset: initialAsset, playbackId, policy } = await getPlaybackIdForAsset(mux, assetId);
|
|
1855
|
-
const signingContext = resolveSigningContext(options);
|
|
1856
|
-
if (policy === "signed" && !signingContext) {
|
|
1857
|
-
throw new Error(
|
|
1858
|
-
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
1859
|
-
);
|
|
1860
|
-
}
|
|
1861
|
-
console.log("\u{1F50D} Checking for audio-only static rendition...");
|
|
1862
|
-
let currentAsset = initialAsset;
|
|
1863
|
-
if (!hasReadyAudioStaticRendition(currentAsset)) {
|
|
1864
|
-
console.log("\u274C No ready audio static rendition found. Requesting one now...");
|
|
1865
|
-
currentAsset = await waitForAudioStaticRendition({
|
|
1866
|
-
assetId,
|
|
1867
|
-
muxClient: mux,
|
|
1868
|
-
initialAsset: currentAsset
|
|
1869
|
-
});
|
|
1870
|
-
}
|
|
1871
|
-
const audioRendition = getReadyAudioStaticRendition(currentAsset);
|
|
1872
|
-
if (!audioRendition) {
|
|
1873
|
-
throw new Error(
|
|
1874
|
-
"Unable to obtain an audio-only static rendition for this asset. Please verify static renditions are enabled in Mux."
|
|
1875
|
-
);
|
|
1876
|
-
}
|
|
1877
|
-
let audioUrl = `https://stream.mux.com/${playbackId}/audio.m4a`;
|
|
1878
|
-
if (policy === "signed" && signingContext) {
|
|
1879
|
-
audioUrl = await signUrl(audioUrl, playbackId, signingContext, "video");
|
|
1880
|
-
}
|
|
1881
|
-
console.log(`\u2705 Found audio rendition: ${audioUrl}`);
|
|
1882
|
-
console.log(`\u{1F399}\uFE0F Creating ElevenLabs dubbing job (auto-detect \u2192 ${toLanguageCode})`);
|
|
1883
|
-
let dubbingId;
|
|
1884
|
-
try {
|
|
1885
|
-
const audioResponse = await fetch(audioUrl);
|
|
1886
|
-
if (!audioResponse.ok) {
|
|
1887
|
-
throw new Error(`Failed to fetch audio file: ${audioResponse.statusText}`);
|
|
1888
|
-
}
|
|
1889
|
-
const audioBuffer = await audioResponse.arrayBuffer();
|
|
1890
|
-
const audioBlob = new Blob([audioBuffer], { type: "audio/mp4" });
|
|
1891
|
-
const audioFile = audioBlob;
|
|
1892
|
-
const formData = new FormData();
|
|
1893
|
-
formData.append("file", audioFile);
|
|
1894
|
-
formData.append("target_lang", toLanguageCode);
|
|
1895
|
-
formData.append("num_speakers", numSpeakers.toString());
|
|
1896
|
-
formData.append("name", `Mux Asset ${assetId} - auto to ${toLanguageCode}`);
|
|
1897
|
-
const dubbingResponse = await fetch("https://api.elevenlabs.io/v1/dubbing", {
|
|
1898
|
-
method: "POST",
|
|
1899
|
-
headers: {
|
|
1900
|
-
"xi-api-key": elevenLabsKey
|
|
1901
|
-
},
|
|
1902
|
-
body: formData
|
|
1903
|
-
});
|
|
1904
|
-
if (!dubbingResponse.ok) {
|
|
1905
|
-
throw new Error(`ElevenLabs API error: ${dubbingResponse.statusText}`);
|
|
1906
|
-
}
|
|
1907
|
-
const dubbingData = await dubbingResponse.json();
|
|
1908
|
-
dubbingId = dubbingData.dubbing_id;
|
|
1909
|
-
console.log(`\u2705 Dubbing job created: ${dubbingId}`);
|
|
1910
|
-
console.log(`\u23F1\uFE0F Expected duration: ${dubbingData.expected_duration_sec}s`);
|
|
1911
|
-
} catch (error) {
|
|
1912
|
-
throw new Error(`Failed to create ElevenLabs dubbing job: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1913
|
-
}
|
|
1914
|
-
console.log("\u23F3 Waiting for dubbing to complete...");
|
|
1915
|
-
let dubbingStatus = "dubbing";
|
|
1916
|
-
let pollAttempts = 0;
|
|
1917
|
-
const maxPollAttempts = 180;
|
|
1918
|
-
while (dubbingStatus === "dubbing" && pollAttempts < maxPollAttempts) {
|
|
1919
|
-
await new Promise((resolve) => setTimeout(resolve, 1e4));
|
|
1920
|
-
pollAttempts++;
|
|
1921
|
-
try {
|
|
1922
|
-
const statusResponse = await fetch(`https://api.elevenlabs.io/v1/dubbing/${dubbingId}`, {
|
|
1923
|
-
headers: {
|
|
1924
|
-
"xi-api-key": elevenLabsKey
|
|
1925
|
-
}
|
|
1926
|
-
});
|
|
1927
|
-
if (!statusResponse.ok) {
|
|
1928
|
-
throw new Error(`Status check failed: ${statusResponse.statusText}`);
|
|
1929
|
-
}
|
|
1930
|
-
const statusData = await statusResponse.json();
|
|
1931
|
-
dubbingStatus = statusData.status;
|
|
1932
|
-
console.log(`\u{1F4CA} Status check ${pollAttempts}: ${dubbingStatus}`);
|
|
1933
|
-
if (dubbingStatus === "failed") {
|
|
1934
|
-
throw new Error("ElevenLabs dubbing job failed");
|
|
1935
|
-
}
|
|
1936
|
-
} catch (error) {
|
|
1937
|
-
throw new Error(`Failed to check dubbing status: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1938
|
-
}
|
|
1939
|
-
}
|
|
1940
|
-
if (dubbingStatus !== "dubbed") {
|
|
1941
|
-
throw new Error(`Dubbing job timed out or failed. Final status: ${dubbingStatus}`);
|
|
1942
|
-
}
|
|
1943
|
-
console.log("\u2705 Dubbing completed successfully!");
|
|
1944
|
-
if (!uploadToMux) {
|
|
1945
|
-
return {
|
|
1946
|
-
assetId,
|
|
1947
|
-
targetLanguageCode: toLanguageCode,
|
|
1948
|
-
dubbingId
|
|
1949
|
-
};
|
|
1950
|
-
}
|
|
1951
|
-
console.log("\u{1F4E5} Downloading dubbed audio from ElevenLabs...");
|
|
1952
|
-
let dubbedAudioBuffer;
|
|
1953
|
-
try {
|
|
1954
|
-
const audioUrl2 = `https://api.elevenlabs.io/v1/dubbing/${dubbingId}/audio/${toLanguageCode}`;
|
|
1955
|
-
const audioResponse = await fetch(audioUrl2, {
|
|
1956
|
-
headers: {
|
|
1957
|
-
"xi-api-key": elevenLabsKey
|
|
1958
|
-
}
|
|
1959
|
-
});
|
|
1960
|
-
if (!audioResponse.ok) {
|
|
1961
|
-
throw new Error(`Failed to fetch dubbed audio: ${audioResponse.statusText}`);
|
|
1962
|
-
}
|
|
1963
|
-
dubbedAudioBuffer = await audioResponse.arrayBuffer();
|
|
1964
|
-
console.log(`\u2705 Downloaded dubbed audio (${dubbedAudioBuffer.byteLength} bytes)`);
|
|
1965
|
-
} catch (error) {
|
|
1966
|
-
throw new Error(`Failed to download dubbed audio: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1967
|
-
}
|
|
1968
|
-
console.log("\u{1F4E4} Uploading dubbed audio to S3-compatible storage...");
|
|
1969
|
-
const s3Client = new S3Client({
|
|
1970
|
-
region: s3Region,
|
|
1971
|
-
endpoint: s3Endpoint,
|
|
1972
|
-
credentials: {
|
|
1973
|
-
accessKeyId: s3AccessKeyId,
|
|
1974
|
-
secretAccessKey: s3SecretAccessKey
|
|
1975
|
-
},
|
|
1976
|
-
forcePathStyle: true
|
|
1977
|
-
});
|
|
1978
|
-
const audioKey = `audio-translations/${assetId}/auto-to-${toLanguageCode}-${Date.now()}.m4a`;
|
|
1979
|
-
let presignedUrl;
|
|
1980
|
-
try {
|
|
1981
|
-
const upload = new Upload({
|
|
1982
|
-
client: s3Client,
|
|
1983
|
-
params: {
|
|
1984
|
-
Bucket: s3Bucket,
|
|
1985
|
-
Key: audioKey,
|
|
1986
|
-
Body: new Uint8Array(dubbedAudioBuffer),
|
|
1987
|
-
ContentType: "audio/mp4"
|
|
1988
|
-
}
|
|
1989
|
-
});
|
|
1990
|
-
await upload.done();
|
|
1991
|
-
console.log(`\u2705 Audio uploaded successfully to: ${audioKey}`);
|
|
1992
|
-
const getObjectCommand = new GetObjectCommand({
|
|
1993
|
-
Bucket: s3Bucket,
|
|
1994
|
-
Key: audioKey
|
|
1995
|
-
});
|
|
1996
|
-
presignedUrl = await getSignedUrl(s3Client, getObjectCommand, {
|
|
1997
|
-
expiresIn: 3600
|
|
1998
|
-
// 1 hour
|
|
1999
|
-
});
|
|
2000
|
-
console.log(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
|
|
2001
|
-
} catch (error) {
|
|
2002
|
-
throw new Error(`Failed to upload audio to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2003
|
-
}
|
|
2004
|
-
console.log("\u{1F3AC} Adding translated audio track to Mux asset...");
|
|
2005
|
-
let uploadedTrackId;
|
|
2006
|
-
try {
|
|
2007
|
-
const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(toLanguageCode) || toLanguageCode.toUpperCase();
|
|
2008
|
-
const trackName = `${languageName} (auto-dubbed)`;
|
|
2009
|
-
const trackResponse = await mux.video.assets.createTrack(assetId, {
|
|
2010
|
-
type: "audio",
|
|
2011
|
-
language_code: toLanguageCode,
|
|
2012
|
-
name: trackName,
|
|
2013
|
-
url: presignedUrl
|
|
2014
|
-
});
|
|
2015
|
-
uploadedTrackId = trackResponse.id;
|
|
2016
|
-
console.log(`\u2705 Audio track added to Mux asset with ID: ${uploadedTrackId}`);
|
|
2017
|
-
console.log(`\u{1F3B5} Track name: "${trackName}"`);
|
|
2018
|
-
} catch (error) {
|
|
2019
|
-
console.warn(`\u26A0\uFE0F Failed to add audio track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2020
|
-
console.log("\u{1F517} You can manually add the track using this presigned URL:");
|
|
2021
|
-
console.log(presignedUrl);
|
|
2022
|
-
}
|
|
2023
|
-
return {
|
|
2024
|
-
assetId,
|
|
2025
|
-
targetLanguageCode: toLanguageCode,
|
|
2026
|
-
dubbingId,
|
|
2027
|
-
uploadedTrackId,
|
|
2028
|
-
presignedUrl
|
|
2029
|
-
};
|
|
2030
|
-
}
|
|
2031
|
-
|
|
2032
|
-
// src/workflows/translate-captions.ts
|
|
2033
|
-
import { GetObjectCommand as GetObjectCommand2, S3Client as S3Client2 } from "@aws-sdk/client-s3";
|
|
2034
|
-
import { Upload as Upload2 } from "@aws-sdk/lib-storage";
|
|
2035
|
-
import { getSignedUrl as getSignedUrl2 } from "@aws-sdk/s3-request-presigner";
|
|
2036
|
-
import { generateObject as generateObject4 } from "ai";
|
|
2037
|
-
import { z as z5 } from "zod";
|
|
2038
|
-
var translationSchema = z5.object({
|
|
2039
|
-
translation: z5.string()
|
|
2040
|
-
});
|
|
2041
|
-
var DEFAULT_PROVIDER6 = "openai";
|
|
2042
|
-
async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, options) {
|
|
2043
|
-
const {
|
|
2044
|
-
provider = DEFAULT_PROVIDER6,
|
|
2045
|
-
model,
|
|
2046
|
-
s3Endpoint: providedS3Endpoint,
|
|
2047
|
-
s3Region: providedS3Region,
|
|
2048
|
-
s3Bucket: providedS3Bucket,
|
|
2049
|
-
s3AccessKeyId: providedS3AccessKeyId,
|
|
2050
|
-
s3SecretAccessKey: providedS3SecretAccessKey,
|
|
2051
|
-
uploadToMux: uploadToMuxOption,
|
|
2052
|
-
...clientConfig
|
|
2053
|
-
} = options;
|
|
2054
|
-
const resolvedProvider = provider;
|
|
2055
|
-
const s3Endpoint = providedS3Endpoint ?? env_default.S3_ENDPOINT;
|
|
2056
|
-
const s3Region = providedS3Region ?? env_default.S3_REGION ?? "auto";
|
|
2057
|
-
const s3Bucket = providedS3Bucket ?? env_default.S3_BUCKET;
|
|
2058
|
-
const s3AccessKeyId = providedS3AccessKeyId ?? env_default.S3_ACCESS_KEY_ID;
|
|
2059
|
-
const s3SecretAccessKey = providedS3SecretAccessKey ?? env_default.S3_SECRET_ACCESS_KEY;
|
|
2060
|
-
const uploadToMux = uploadToMuxOption !== false;
|
|
2061
|
-
const clients = createWorkflowClients(
|
|
2062
|
-
{ ...clientConfig, provider: resolvedProvider, model },
|
|
2063
|
-
resolvedProvider
|
|
2064
|
-
);
|
|
2065
|
-
if (uploadToMux && (!s3Endpoint || !s3Bucket || !s3AccessKeyId || !s3SecretAccessKey)) {
|
|
2066
|
-
throw new Error("S3 configuration is required for uploading to Mux. Provide s3Endpoint, s3Bucket, s3AccessKeyId, and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables.");
|
|
2067
|
-
}
|
|
2068
|
-
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(clients.mux, assetId);
|
|
2069
|
-
const signingContext = resolveSigningContext(options);
|
|
2070
|
-
if (policy === "signed" && !signingContext) {
|
|
2071
|
-
throw new Error(
|
|
2072
|
-
"Signed playback ID requires signing credentials. Provide muxSigningKey and muxPrivateKey in options or set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
2073
|
-
);
|
|
2074
|
-
}
|
|
2075
|
-
if (!assetData.tracks) {
|
|
2076
|
-
throw new Error("No tracks found for this asset");
|
|
2077
|
-
}
|
|
2078
|
-
const sourceTextTrack = assetData.tracks.find(
|
|
2079
|
-
(track) => track.type === "text" && track.status === "ready" && track.language_code === fromLanguageCode
|
|
2080
|
-
);
|
|
2081
|
-
if (!sourceTextTrack) {
|
|
2082
|
-
throw new Error(`No ready text track found with language code '${fromLanguageCode}' for this asset`);
|
|
2083
|
-
}
|
|
2084
|
-
let vttUrl = `https://stream.mux.com/${playbackId}/text/${sourceTextTrack.id}.vtt`;
|
|
2085
|
-
if (policy === "signed" && signingContext) {
|
|
2086
|
-
vttUrl = await signUrl(vttUrl, playbackId, signingContext, "video");
|
|
2087
|
-
}
|
|
2088
|
-
let vttContent;
|
|
2089
|
-
try {
|
|
2090
|
-
const vttResponse = await fetch(vttUrl);
|
|
2091
|
-
if (!vttResponse.ok) {
|
|
2092
|
-
throw new Error(`Failed to fetch VTT file: ${vttResponse.statusText}`);
|
|
2093
|
-
}
|
|
2094
|
-
vttContent = await vttResponse.text();
|
|
2095
|
-
} catch (error) {
|
|
2096
|
-
throw new Error(`Failed to fetch VTT content: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2097
|
-
}
|
|
2098
|
-
console.log(`\u2705 Found VTT content for language '${fromLanguageCode}'`);
|
|
2099
|
-
let translatedVtt;
|
|
2100
|
-
try {
|
|
2101
|
-
const response = await generateObject4({
|
|
2102
|
-
model: clients.languageModel.model,
|
|
2103
|
-
schema: translationSchema,
|
|
2104
|
-
abortSignal: options.abortSignal,
|
|
2105
|
-
messages: [
|
|
2106
|
-
{
|
|
2107
|
-
role: "user",
|
|
2108
|
-
content: `Translate the following VTT subtitle file from ${fromLanguageCode} to ${toLanguageCode}. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT.
|
|
2109
|
-
|
|
2110
|
-
${vttContent}`
|
|
2111
|
-
}
|
|
2112
|
-
]
|
|
2113
|
-
});
|
|
2114
|
-
translatedVtt = response.object.translation;
|
|
2115
|
-
} catch (error) {
|
|
2116
|
-
throw new Error(`Failed to translate VTT with ${resolvedProvider}: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2117
|
-
}
|
|
2118
|
-
console.log(`
|
|
2119
|
-
\u2705 Translation completed successfully!`);
|
|
2120
|
-
if (!uploadToMux) {
|
|
2121
|
-
console.log(`\u2705 VTT translated to ${toLanguageCode} successfully!`);
|
|
2122
|
-
return {
|
|
2123
|
-
assetId,
|
|
2124
|
-
sourceLanguageCode: fromLanguageCode,
|
|
2125
|
-
targetLanguageCode: toLanguageCode,
|
|
2126
|
-
originalVtt: vttContent,
|
|
2127
|
-
translatedVtt
|
|
2128
|
-
};
|
|
2129
|
-
}
|
|
2130
|
-
console.log("\u{1F4E4} Uploading translated VTT to S3-compatible storage...");
|
|
2131
|
-
const s3Client = new S3Client2({
|
|
2132
|
-
region: s3Region,
|
|
2133
|
-
endpoint: s3Endpoint,
|
|
2134
|
-
credentials: {
|
|
2135
|
-
accessKeyId: s3AccessKeyId,
|
|
2136
|
-
secretAccessKey: s3SecretAccessKey
|
|
2137
|
-
},
|
|
2138
|
-
forcePathStyle: true
|
|
2139
|
-
// Often needed for non-AWS S3 services
|
|
2140
|
-
});
|
|
2141
|
-
const vttKey = `translations/${assetId}/${fromLanguageCode}-to-${toLanguageCode}-${Date.now()}.vtt`;
|
|
2142
|
-
let presignedUrl;
|
|
2143
|
-
try {
|
|
2144
|
-
const upload = new Upload2({
|
|
2145
|
-
client: s3Client,
|
|
2146
|
-
params: {
|
|
2147
|
-
Bucket: s3Bucket,
|
|
2148
|
-
Key: vttKey,
|
|
2149
|
-
Body: translatedVtt,
|
|
2150
|
-
ContentType: "text/vtt"
|
|
2151
|
-
}
|
|
2152
|
-
});
|
|
2153
|
-
await upload.done();
|
|
2154
|
-
console.log(`\u2705 VTT uploaded successfully to: ${vttKey}`);
|
|
2155
|
-
const getObjectCommand = new GetObjectCommand2({
|
|
2156
|
-
Bucket: s3Bucket,
|
|
2157
|
-
Key: vttKey
|
|
2158
|
-
});
|
|
2159
|
-
presignedUrl = await getSignedUrl2(s3Client, getObjectCommand, {
|
|
2160
|
-
expiresIn: 3600
|
|
2161
|
-
// 1 hour
|
|
2162
|
-
});
|
|
2163
|
-
console.log(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
|
|
2164
|
-
} catch (error) {
|
|
2165
|
-
throw new Error(`Failed to upload VTT to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2166
|
-
}
|
|
2167
|
-
console.log("\u{1F4F9} Adding translated track to Mux asset...");
|
|
2168
|
-
let uploadedTrackId;
|
|
2169
|
-
try {
|
|
2170
|
-
const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(toLanguageCode) || toLanguageCode.toUpperCase();
|
|
2171
|
-
const trackName = `${languageName} (auto-translated)`;
|
|
2172
|
-
const trackResponse = await clients.mux.video.assets.createTrack(assetId, {
|
|
2173
|
-
type: "text",
|
|
2174
|
-
text_type: "subtitles",
|
|
2175
|
-
language_code: toLanguageCode,
|
|
2176
|
-
name: trackName,
|
|
2177
|
-
url: presignedUrl
|
|
2178
|
-
});
|
|
2179
|
-
uploadedTrackId = trackResponse.id;
|
|
2180
|
-
console.log(`\u2705 Track added to Mux asset with ID: ${uploadedTrackId}`);
|
|
2181
|
-
console.log(`\u{1F4CB} Track name: "${trackName}"`);
|
|
2182
|
-
} catch (error) {
|
|
2183
|
-
console.warn(`\u26A0\uFE0F Failed to add track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2184
|
-
console.log("\u{1F517} You can manually add the track using this presigned URL:");
|
|
2185
|
-
console.log(presignedUrl);
|
|
2186
|
-
}
|
|
2187
|
-
return {
|
|
2188
|
-
assetId,
|
|
2189
|
-
sourceLanguageCode: fromLanguageCode,
|
|
2190
|
-
targetLanguageCode: toLanguageCode,
|
|
2191
|
-
originalVtt: vttContent,
|
|
2192
|
-
translatedVtt,
|
|
2193
|
-
uploadedTrackId,
|
|
2194
|
-
presignedUrl
|
|
2195
|
-
};
|
|
2196
|
-
}
|
|
2197
|
-
|
|
2198
|
-
// src/index.ts
|
|
2199
|
-
var version = "0.1.0";
|
|
2200
|
-
export {
|
|
2201
|
-
primitives_exports as primitives,
|
|
2202
|
-
version,
|
|
2203
|
-
workflows_exports as workflows
|
|
2204
|
-
};
|
|
2205
|
-
//# sourceMappingURL=index.mjs.map
|