@mux/ai 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1773 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +164 -0
- package/dist/index.d.ts +164 -9
- package/dist/index.js +1757 -8
- package/dist/index.js.map +1 -1
- package/package.json +8 -6
- package/dist/audio-translation.d.ts +0 -21
- package/dist/audio-translation.d.ts.map +0 -1
- package/dist/audio-translation.js +0 -229
- package/dist/audio-translation.js.map +0 -1
- package/dist/burned-in-captions.d.ts +0 -19
- package/dist/burned-in-captions.d.ts.map +0 -1
- package/dist/burned-in-captions.js +0 -243
- package/dist/burned-in-captions.js.map +0 -1
- package/dist/chapters.d.ts +0 -18
- package/dist/chapters.d.ts.map +0 -1
- package/dist/chapters.js +0 -255
- package/dist/chapters.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/moderation.d.ts +0 -39
- package/dist/moderation.d.ts.map +0 -1
- package/dist/moderation.js +0 -341
- package/dist/moderation.js.map +0 -1
- package/dist/summarization.d.ts +0 -26
- package/dist/summarization.d.ts.map +0 -1
- package/dist/summarization.js +0 -337
- package/dist/summarization.js.map +0 -1
- package/dist/translation.d.ts +0 -22
- package/dist/translation.d.ts.map +0 -1
- package/dist/translation.js +0 -196
- package/dist/translation.js.map +0 -1
- package/dist/types.d.ts +0 -12
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js +0 -2
- package/dist/types.js.map +0 -1
- package/dist/utils/image-download.d.ts +0 -65
- package/dist/utils/image-download.d.ts.map +0 -1
- package/dist/utils/image-download.js +0 -150
- package/dist/utils/image-download.js.map +0 -1
- package/dist/utils/storyboard-processor.d.ts +0 -40
- package/dist/utils/storyboard-processor.d.ts.map +0 -1
- package/dist/utils/storyboard-processor.js +0 -202
- package/dist/utils/storyboard-processor.js.map +0 -1
- package/dist/utils/vtt-parser.d.ts +0 -8
- package/dist/utils/vtt-parser.d.ts.map +0 -1
- package/dist/utils/vtt-parser.js +0 -43
- package/dist/utils/vtt-parser.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -1,9 +1,1758 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
1
|
+
import pRetry, { AbortError } from 'p-retry';
|
|
2
|
+
import Mux from '@mux/mux-node';
|
|
3
|
+
import OpenAI from 'openai';
|
|
4
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
5
|
+
import { z } from 'zod';
|
|
6
|
+
import { zodTextFormat } from 'openai/helpers/zod';
|
|
7
|
+
import { S3Client, GetObjectCommand } from '@aws-sdk/client-s3';
|
|
8
|
+
import { Upload } from '@aws-sdk/lib-storage';
|
|
9
|
+
import { getSignedUrl } from '@aws-sdk/s3-request-presigner';
|
|
10
|
+
|
|
11
|
+
var __defProp = Object.defineProperty;
|
|
12
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
13
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
14
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
15
|
+
var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
|
|
16
|
+
get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
|
|
17
|
+
}) : x)(function(x) {
|
|
18
|
+
if (typeof require !== "undefined") return require.apply(this, arguments);
|
|
19
|
+
throw Error('Dynamic require of "' + x + '" is not supported');
|
|
20
|
+
});
|
|
21
|
+
var __esm = (fn, res) => function __init() {
|
|
22
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
23
|
+
};
|
|
24
|
+
var __export = (target, all) => {
|
|
25
|
+
for (var name in all)
|
|
26
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
27
|
+
};
|
|
28
|
+
var __copyProps = (to, from, except, desc) => {
|
|
29
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
30
|
+
for (let key of __getOwnPropNames(from))
|
|
31
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
32
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
33
|
+
}
|
|
34
|
+
return to;
|
|
35
|
+
};
|
|
36
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
37
|
+
|
|
38
|
+
// src/utils/image-download.ts
|
|
39
|
+
var image_download_exports = {};
|
|
40
|
+
__export(image_download_exports, {
|
|
41
|
+
downloadImageAsBase64: () => downloadImageAsBase64,
|
|
42
|
+
downloadImagesAsBase64: () => downloadImagesAsBase64,
|
|
43
|
+
uploadImageToAnthropicFiles: () => uploadImageToAnthropicFiles
|
|
44
|
+
});
|
|
45
|
+
async function downloadImageAsBase64(url, options = {}) {
|
|
46
|
+
const opts = { ...DEFAULT_OPTIONS, ...options };
|
|
47
|
+
let attemptCount = 0;
|
|
48
|
+
return pRetry(
|
|
49
|
+
async () => {
|
|
50
|
+
attemptCount++;
|
|
51
|
+
const controller = new AbortController();
|
|
52
|
+
const timeoutId = setTimeout(() => controller.abort(), opts.timeout);
|
|
53
|
+
try {
|
|
54
|
+
const response = await fetch(url, {
|
|
55
|
+
signal: controller.signal,
|
|
56
|
+
headers: {
|
|
57
|
+
"User-Agent": "@mux/ai image downloader"
|
|
58
|
+
}
|
|
59
|
+
});
|
|
60
|
+
clearTimeout(timeoutId);
|
|
61
|
+
if (!response.ok) {
|
|
62
|
+
if (response.status >= 400 && response.status < 500 && response.status !== 429) {
|
|
63
|
+
throw new AbortError(`HTTP ${response.status}: ${response.statusText}`);
|
|
64
|
+
}
|
|
65
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
66
|
+
}
|
|
67
|
+
const contentType = response.headers.get("content-type");
|
|
68
|
+
if (!contentType?.startsWith("image/")) {
|
|
69
|
+
throw new AbortError(`Invalid content type: ${contentType}. Expected image/*`);
|
|
70
|
+
}
|
|
71
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
72
|
+
const buffer = Buffer.from(arrayBuffer);
|
|
73
|
+
if (buffer.length === 0) {
|
|
74
|
+
throw new AbortError("Downloaded image is empty");
|
|
75
|
+
}
|
|
76
|
+
const base64Data = `data:${contentType};base64,${buffer.toString("base64")}`;
|
|
77
|
+
return {
|
|
78
|
+
base64Data,
|
|
79
|
+
buffer,
|
|
80
|
+
url,
|
|
81
|
+
contentType,
|
|
82
|
+
sizeBytes: buffer.length,
|
|
83
|
+
attempts: attemptCount
|
|
84
|
+
};
|
|
85
|
+
} catch (error) {
|
|
86
|
+
clearTimeout(timeoutId);
|
|
87
|
+
if (error instanceof AbortError) {
|
|
88
|
+
throw error;
|
|
89
|
+
}
|
|
90
|
+
if (error instanceof Error) {
|
|
91
|
+
if (error.name === "AbortError") {
|
|
92
|
+
throw new Error(`Request timeout after ${opts.timeout}ms`);
|
|
93
|
+
}
|
|
94
|
+
throw new Error(`Download failed: ${error.message}`);
|
|
95
|
+
}
|
|
96
|
+
throw new Error("Unknown download error");
|
|
97
|
+
}
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
retries: opts.retries,
|
|
101
|
+
minTimeout: opts.retryDelay,
|
|
102
|
+
maxTimeout: opts.maxRetryDelay,
|
|
103
|
+
factor: opts.exponentialBackoff ? 2 : 1,
|
|
104
|
+
randomize: true,
|
|
105
|
+
// Add jitter to prevent thundering herd
|
|
106
|
+
onFailedAttempt: (error) => {
|
|
107
|
+
console.warn(`Image download attempt ${error.attemptNumber} failed for ${url}`);
|
|
108
|
+
if (error.retriesLeft > 0) {
|
|
109
|
+
console.warn(`Retrying... (${error.retriesLeft} attempts left)`);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
);
|
|
114
|
+
}
|
|
115
|
+
async function downloadImagesAsBase64(urls, options = {}, maxConcurrent = 5) {
|
|
116
|
+
const results = [];
|
|
117
|
+
for (let i = 0; i < urls.length; i += maxConcurrent) {
|
|
118
|
+
const batch = urls.slice(i, i + maxConcurrent);
|
|
119
|
+
const batchPromises = batch.map((url) => downloadImageAsBase64(url, options));
|
|
120
|
+
const batchResults = await Promise.all(batchPromises);
|
|
121
|
+
results.push(...batchResults);
|
|
122
|
+
}
|
|
123
|
+
return results;
|
|
124
|
+
}
|
|
125
|
+
async function uploadImageToAnthropicFiles(url, anthropicApiKey, options = {}) {
|
|
126
|
+
const downloadResult = await downloadImageAsBase64(url, options);
|
|
127
|
+
const formData = new FormData();
|
|
128
|
+
const imageBlob = new Blob([downloadResult.buffer], {
|
|
129
|
+
type: downloadResult.contentType
|
|
130
|
+
});
|
|
131
|
+
const extension = downloadResult.contentType.split("/")[1] || "png";
|
|
132
|
+
formData.append("file", imageBlob, `image.${extension}`);
|
|
133
|
+
const response = await fetch("https://api.anthropic.com/v1/files", {
|
|
134
|
+
method: "POST",
|
|
135
|
+
headers: {
|
|
136
|
+
"x-api-key": anthropicApiKey,
|
|
137
|
+
"anthropic-version": "2023-06-01",
|
|
138
|
+
"anthropic-beta": "files-api-2025-04-14"
|
|
139
|
+
// Don't set Content-Type header - let fetch set it with boundary for multipart
|
|
140
|
+
},
|
|
141
|
+
body: formData
|
|
142
|
+
});
|
|
143
|
+
if (!response.ok) {
|
|
144
|
+
const errorText = await response.text();
|
|
145
|
+
throw new Error(`Anthropic Files API error: ${response.status} ${response.statusText} - ${errorText}`);
|
|
146
|
+
}
|
|
147
|
+
const fileResult = await response.json();
|
|
148
|
+
return {
|
|
149
|
+
fileId: fileResult.id,
|
|
150
|
+
url: downloadResult.url,
|
|
151
|
+
contentType: downloadResult.contentType,
|
|
152
|
+
sizeBytes: downloadResult.sizeBytes
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
var DEFAULT_OPTIONS;
|
|
156
|
+
var init_image_download = __esm({
|
|
157
|
+
"src/utils/image-download.ts"() {
|
|
158
|
+
DEFAULT_OPTIONS = {
|
|
159
|
+
timeout: 1e4,
|
|
160
|
+
retries: 3,
|
|
161
|
+
retryDelay: 1e3,
|
|
162
|
+
maxRetryDelay: 1e4,
|
|
163
|
+
exponentialBackoff: true
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
// src/moderation.ts
|
|
169
|
+
init_image_download();
|
|
170
|
+
var DEFAULT_THRESHOLDS = {
|
|
171
|
+
sexual: 0.7,
|
|
172
|
+
violence: 0.8
|
|
173
|
+
};
|
|
174
|
+
async function processConcurrently(items, processor, maxConcurrent = 5) {
|
|
175
|
+
const results = [];
|
|
176
|
+
for (let i = 0; i < items.length; i += maxConcurrent) {
|
|
177
|
+
const batch = items.slice(i, i + maxConcurrent);
|
|
178
|
+
const batchPromises = batch.map(processor);
|
|
179
|
+
const batchResults = await Promise.all(batchPromises);
|
|
180
|
+
results.push(...batchResults);
|
|
181
|
+
}
|
|
182
|
+
return results;
|
|
183
|
+
}
|
|
184
|
+
var HIVE_SEXUAL_CATEGORIES = [
|
|
185
|
+
"general_nsfw",
|
|
186
|
+
"general_suggestive",
|
|
187
|
+
"yes_sexual_activity",
|
|
188
|
+
"female_underwear",
|
|
189
|
+
"male_underwear",
|
|
190
|
+
"bra",
|
|
191
|
+
"panties",
|
|
192
|
+
"sex_toys",
|
|
193
|
+
"nudity_female",
|
|
194
|
+
"nudity_male",
|
|
195
|
+
"cleavage",
|
|
196
|
+
"swimwear"
|
|
197
|
+
];
|
|
198
|
+
var HIVE_VIOLENCE_CATEGORIES = [
|
|
199
|
+
"gun_in_hand",
|
|
200
|
+
"gun_not_in_hand",
|
|
201
|
+
"animated_gun",
|
|
202
|
+
"knife_in_hand",
|
|
203
|
+
"knife_not_in_hand",
|
|
204
|
+
"culinary_knife_not_in_hand",
|
|
205
|
+
"culinary_knife_in_hand",
|
|
206
|
+
"very_bloody",
|
|
207
|
+
"a_little_bloody",
|
|
208
|
+
"other_blood",
|
|
209
|
+
"hanging",
|
|
210
|
+
"noose",
|
|
211
|
+
"human_corpse",
|
|
212
|
+
"animated_corpse",
|
|
213
|
+
"emaciated_body",
|
|
214
|
+
"self_harm",
|
|
215
|
+
"animal_abuse",
|
|
216
|
+
"fights",
|
|
217
|
+
"garm_death_injury_or_military_conflict"
|
|
218
|
+
];
|
|
219
|
+
function getThumbnailUrls(playbackId, duration, options = {}) {
|
|
220
|
+
const { interval = 10, width = 640 } = options;
|
|
221
|
+
const timestamps = [];
|
|
222
|
+
if (duration <= 50) {
|
|
223
|
+
const spacing = duration / 6;
|
|
224
|
+
for (let i = 1; i <= 5; i++) {
|
|
225
|
+
timestamps.push(Math.round(i * spacing));
|
|
226
|
+
}
|
|
227
|
+
} else {
|
|
228
|
+
for (let time = 0; time < duration; time += interval) {
|
|
229
|
+
timestamps.push(time);
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
return timestamps.map(
|
|
233
|
+
(time) => `https://image.mux.com/${playbackId}/thumbnail.png?time=${time}&width=${width}`
|
|
234
|
+
);
|
|
235
|
+
}
|
|
236
|
+
async function requestOpenAIModeration(imageUrls, openaiClient, model, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
|
|
237
|
+
if (submissionMode === "base64") {
|
|
238
|
+
try {
|
|
239
|
+
const downloadResults = await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent);
|
|
240
|
+
const processor2 = async (downloadResult) => {
|
|
241
|
+
try {
|
|
242
|
+
const moderation = await openaiClient.moderations.create({
|
|
243
|
+
model,
|
|
244
|
+
input: [
|
|
245
|
+
{
|
|
246
|
+
type: "image_url",
|
|
247
|
+
image_url: {
|
|
248
|
+
url: downloadResult.base64Data
|
|
249
|
+
// Use base64 data URI
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
]
|
|
253
|
+
});
|
|
254
|
+
const categoryScores = moderation.results[0].category_scores;
|
|
255
|
+
return {
|
|
256
|
+
url: downloadResult.url,
|
|
257
|
+
// Return original URL for tracking
|
|
258
|
+
sexual: categoryScores.sexual || 0,
|
|
259
|
+
violence: categoryScores.violence || 0,
|
|
260
|
+
error: false
|
|
261
|
+
};
|
|
262
|
+
} catch (error) {
|
|
263
|
+
console.error(`Failed to moderate downloaded image ${downloadResult.url}:`, error);
|
|
264
|
+
return {
|
|
265
|
+
url: downloadResult.url,
|
|
266
|
+
sexual: 0,
|
|
267
|
+
violence: 0,
|
|
268
|
+
error: true
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
};
|
|
272
|
+
return processConcurrently(downloadResults, processor2, maxConcurrent);
|
|
273
|
+
} catch (error) {
|
|
274
|
+
console.error("Failed to download images for base64 submission:", error);
|
|
275
|
+
return imageUrls.map((url) => ({
|
|
276
|
+
url,
|
|
277
|
+
sexual: 0,
|
|
278
|
+
violence: 0,
|
|
279
|
+
error: true
|
|
280
|
+
}));
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
const processor = async (url) => {
|
|
284
|
+
try {
|
|
285
|
+
const moderation = await openaiClient.moderations.create({
|
|
286
|
+
model,
|
|
287
|
+
input: [
|
|
288
|
+
{
|
|
289
|
+
type: "image_url",
|
|
290
|
+
image_url: {
|
|
291
|
+
url
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
]
|
|
295
|
+
});
|
|
296
|
+
const categoryScores = moderation.results[0].category_scores;
|
|
297
|
+
return {
|
|
298
|
+
url,
|
|
299
|
+
sexual: categoryScores.sexual || 0,
|
|
300
|
+
violence: categoryScores.violence || 0,
|
|
301
|
+
error: false
|
|
302
|
+
};
|
|
303
|
+
} catch (error) {
|
|
304
|
+
console.error("Failed to moderate image:", error);
|
|
305
|
+
return {
|
|
306
|
+
url,
|
|
307
|
+
sexual: 0,
|
|
308
|
+
violence: 0,
|
|
309
|
+
error: true
|
|
310
|
+
};
|
|
311
|
+
}
|
|
312
|
+
};
|
|
313
|
+
return processConcurrently(imageUrls, processor, maxConcurrent);
|
|
314
|
+
}
|
|
315
|
+
async function requestHiveModeration(imageUrls, hiveApiKey, maxConcurrent = 5, submissionMode = "url", downloadOptions) {
|
|
316
|
+
if (submissionMode === "base64") {
|
|
317
|
+
try {
|
|
318
|
+
const downloadResults = await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent);
|
|
319
|
+
const processor2 = async (downloadResult) => {
|
|
320
|
+
try {
|
|
321
|
+
const formData = new FormData();
|
|
322
|
+
const imageBlob = new Blob([downloadResult.buffer], {
|
|
323
|
+
type: downloadResult.contentType
|
|
324
|
+
});
|
|
325
|
+
const extension = downloadResult.contentType.split("/")[1] || "png";
|
|
326
|
+
formData.append("media", imageBlob, `image.${extension}`);
|
|
327
|
+
const response = await fetch("https://api.thehive.ai/api/v2/task/sync", {
|
|
328
|
+
method: "POST",
|
|
329
|
+
headers: {
|
|
330
|
+
"Authorization": `Token ${hiveApiKey}`
|
|
331
|
+
// Don't set Content-Type header - let fetch set it with boundary for multipart
|
|
332
|
+
},
|
|
333
|
+
body: formData
|
|
334
|
+
});
|
|
335
|
+
if (!response.ok) {
|
|
336
|
+
throw new Error(`Hive API error: ${response.statusText}`);
|
|
337
|
+
}
|
|
338
|
+
const hiveResult = await response.json();
|
|
339
|
+
const classes = hiveResult.status?.[0]?.response?.output?.[0]?.classes || [];
|
|
340
|
+
const scoreMap = Object.fromEntries(classes.map((c) => [c.class, c.score]));
|
|
341
|
+
const sexualScores = HIVE_SEXUAL_CATEGORIES.map(
|
|
342
|
+
(category) => scoreMap[category] || 0
|
|
343
|
+
);
|
|
344
|
+
const violenceScores = HIVE_VIOLENCE_CATEGORIES.map(
|
|
345
|
+
(category) => scoreMap[category] || 0
|
|
346
|
+
);
|
|
347
|
+
return {
|
|
348
|
+
url: downloadResult.url,
|
|
349
|
+
// Return original URL for tracking
|
|
350
|
+
sexual: Math.max(...sexualScores, 0),
|
|
351
|
+
violence: Math.max(...violenceScores, 0),
|
|
352
|
+
error: false
|
|
353
|
+
};
|
|
354
|
+
} catch (error) {
|
|
355
|
+
console.error(`Failed to moderate uploaded image ${downloadResult.url}:`, error);
|
|
356
|
+
return {
|
|
357
|
+
url: downloadResult.url,
|
|
358
|
+
sexual: 0,
|
|
359
|
+
violence: 0,
|
|
360
|
+
error: true
|
|
361
|
+
};
|
|
362
|
+
}
|
|
363
|
+
};
|
|
364
|
+
return processConcurrently(downloadResults, processor2, maxConcurrent);
|
|
365
|
+
} catch (error) {
|
|
366
|
+
console.error("Failed to download images for Hive multipart upload:", error);
|
|
367
|
+
return imageUrls.map((url) => ({
|
|
368
|
+
url,
|
|
369
|
+
sexual: 0,
|
|
370
|
+
violence: 0,
|
|
371
|
+
error: true
|
|
372
|
+
}));
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
const processor = async (url) => {
|
|
376
|
+
try {
|
|
377
|
+
const response = await fetch("https://api.thehive.ai/api/v2/task/sync", {
|
|
378
|
+
method: "POST",
|
|
379
|
+
headers: {
|
|
380
|
+
"Authorization": `Token ${hiveApiKey}`,
|
|
381
|
+
"Content-Type": "application/json"
|
|
382
|
+
},
|
|
383
|
+
body: JSON.stringify({ url })
|
|
384
|
+
});
|
|
385
|
+
if (!response.ok) {
|
|
386
|
+
throw new Error(`Hive API error: ${response.statusText}`);
|
|
387
|
+
}
|
|
388
|
+
const hiveResult = await response.json();
|
|
389
|
+
const classes = hiveResult.status?.[0]?.response?.output?.[0]?.classes || [];
|
|
390
|
+
const scoreMap = Object.fromEntries(classes.map((c) => [c.class, c.score]));
|
|
391
|
+
const sexualScores = HIVE_SEXUAL_CATEGORIES.map(
|
|
392
|
+
(category) => scoreMap[category] || 0
|
|
393
|
+
);
|
|
394
|
+
const violenceScores = HIVE_VIOLENCE_CATEGORIES.map(
|
|
395
|
+
(category) => scoreMap[category] || 0
|
|
396
|
+
);
|
|
397
|
+
return {
|
|
398
|
+
url,
|
|
399
|
+
sexual: Math.max(...sexualScores, 0),
|
|
400
|
+
violence: Math.max(...violenceScores, 0),
|
|
401
|
+
error: false
|
|
402
|
+
};
|
|
403
|
+
} catch (error) {
|
|
404
|
+
console.error("Failed to moderate image with Hive:", error);
|
|
405
|
+
return {
|
|
406
|
+
url,
|
|
407
|
+
sexual: 0,
|
|
408
|
+
violence: 0,
|
|
409
|
+
error: true
|
|
410
|
+
};
|
|
411
|
+
}
|
|
412
|
+
};
|
|
413
|
+
return processConcurrently(imageUrls, processor, maxConcurrent);
|
|
414
|
+
}
|
|
415
|
+
async function getModerationScores(assetId, options = {}) {
|
|
416
|
+
const {
|
|
417
|
+
provider = "openai",
|
|
418
|
+
model = "omni-moderation-latest",
|
|
419
|
+
thresholds = DEFAULT_THRESHOLDS,
|
|
420
|
+
thumbnailInterval = 10,
|
|
421
|
+
thumbnailWidth = 640,
|
|
422
|
+
maxConcurrent = 5,
|
|
423
|
+
imageSubmissionMode = "url",
|
|
424
|
+
imageDownloadOptions,
|
|
425
|
+
muxTokenId,
|
|
426
|
+
muxTokenSecret,
|
|
427
|
+
openaiApiKey,
|
|
428
|
+
...config
|
|
429
|
+
} = options;
|
|
430
|
+
if (provider !== "openai" && provider !== "hive") {
|
|
431
|
+
throw new Error("Only OpenAI and Hive providers are currently supported");
|
|
432
|
+
}
|
|
433
|
+
const muxId = muxTokenId || process.env.MUX_TOKEN_ID;
|
|
434
|
+
const muxSecret = muxTokenSecret || process.env.MUX_TOKEN_SECRET;
|
|
435
|
+
const openaiKey = openaiApiKey || process.env.OPENAI_API_KEY;
|
|
436
|
+
const hiveKey = options.hiveApiKey || process.env.HIVE_API_KEY;
|
|
437
|
+
if (!muxId || !muxSecret) {
|
|
438
|
+
throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
|
|
439
|
+
}
|
|
440
|
+
if (provider === "openai" && !openaiKey) {
|
|
441
|
+
throw new Error("OpenAI API key is required for OpenAI provider. Provide openaiApiKey in options or set OPENAI_API_KEY environment variable.");
|
|
442
|
+
}
|
|
443
|
+
if (provider === "hive" && !hiveKey) {
|
|
444
|
+
throw new Error("Hive API key is required for Hive provider. Provide hiveApiKey in options or set HIVE_API_KEY environment variable.");
|
|
445
|
+
}
|
|
446
|
+
const mux = new Mux({
|
|
447
|
+
tokenId: muxId,
|
|
448
|
+
tokenSecret: muxSecret
|
|
449
|
+
});
|
|
450
|
+
let openaiClient;
|
|
451
|
+
if (provider === "openai") {
|
|
452
|
+
openaiClient = new OpenAI({
|
|
453
|
+
apiKey: openaiKey
|
|
454
|
+
});
|
|
455
|
+
}
|
|
456
|
+
let assetData;
|
|
457
|
+
try {
|
|
458
|
+
const asset = await mux.video.assets.retrieve(assetId);
|
|
459
|
+
assetData = asset;
|
|
460
|
+
} catch (error) {
|
|
461
|
+
throw new Error(`Failed to fetch asset from Mux: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
462
|
+
}
|
|
463
|
+
const publicPlaybackIds = assetData.playback_ids?.filter((pid) => pid.policy === "public") || [];
|
|
464
|
+
if (publicPlaybackIds.length === 0) {
|
|
465
|
+
throw new Error("No public playback IDs found for this asset. Moderation requires public playback access.");
|
|
466
|
+
}
|
|
467
|
+
const playbackId = publicPlaybackIds[0].id;
|
|
468
|
+
const duration = assetData.duration || 0;
|
|
469
|
+
const thumbnailUrls = getThumbnailUrls(playbackId, duration, {
|
|
470
|
+
interval: thumbnailInterval,
|
|
471
|
+
width: thumbnailWidth
|
|
472
|
+
});
|
|
473
|
+
let thumbnailScores;
|
|
474
|
+
if (provider === "openai") {
|
|
475
|
+
thumbnailScores = await requestOpenAIModeration(
|
|
476
|
+
thumbnailUrls,
|
|
477
|
+
openaiClient,
|
|
478
|
+
model,
|
|
479
|
+
maxConcurrent,
|
|
480
|
+
imageSubmissionMode,
|
|
481
|
+
imageDownloadOptions
|
|
482
|
+
);
|
|
483
|
+
} else if (provider === "hive") {
|
|
484
|
+
thumbnailScores = await requestHiveModeration(
|
|
485
|
+
thumbnailUrls,
|
|
486
|
+
hiveKey,
|
|
487
|
+
maxConcurrent,
|
|
488
|
+
imageSubmissionMode,
|
|
489
|
+
imageDownloadOptions
|
|
490
|
+
);
|
|
491
|
+
} else {
|
|
492
|
+
throw new Error("Unsupported provider");
|
|
493
|
+
}
|
|
494
|
+
const maxSexual = Math.max(...thumbnailScores.map((s) => s.sexual));
|
|
495
|
+
const maxViolence = Math.max(...thumbnailScores.map((s) => s.violence));
|
|
496
|
+
const finalThresholds = { ...DEFAULT_THRESHOLDS, ...thresholds };
|
|
497
|
+
return {
|
|
498
|
+
assetId,
|
|
499
|
+
thumbnailScores,
|
|
500
|
+
maxScores: {
|
|
501
|
+
sexual: maxSexual,
|
|
502
|
+
violence: maxViolence
|
|
503
|
+
},
|
|
504
|
+
exceedsThreshold: maxSexual > finalThresholds.sexual || maxViolence > finalThresholds.violence,
|
|
505
|
+
thresholds: finalThresholds
|
|
506
|
+
};
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
// src/summarization.ts
|
|
510
|
+
init_image_download();
|
|
511
|
+
|
|
512
|
+
// src/utils/vtt-parser.ts
|
|
513
|
+
function extractTextFromVTT(vttContent) {
|
|
514
|
+
if (!vttContent.trim()) {
|
|
515
|
+
return "";
|
|
516
|
+
}
|
|
517
|
+
const lines = vttContent.split("\n");
|
|
518
|
+
const textLines = [];
|
|
519
|
+
for (let i = 0; i < lines.length; i++) {
|
|
520
|
+
const line = lines[i].trim();
|
|
521
|
+
if (!line) continue;
|
|
522
|
+
if (line === "WEBVTT") continue;
|
|
523
|
+
if (line.startsWith("NOTE ")) continue;
|
|
524
|
+
if (line.includes("-->")) continue;
|
|
525
|
+
if (/^[\d\w-]+$/.test(line) && !line.includes(" ")) continue;
|
|
526
|
+
if (line.startsWith("STYLE") || line.startsWith("REGION")) continue;
|
|
527
|
+
const cleanLine = line.replace(/<[^>]*>/g, "").trim();
|
|
528
|
+
if (cleanLine) {
|
|
529
|
+
textLines.push(cleanLine);
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
return textLines.join(" ").replace(/\s+/g, " ").trim();
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
// src/summarization.ts
|
|
536
|
+
var summarySchema = z.object({
|
|
537
|
+
keywords: z.array(z.string()).max(10),
|
|
538
|
+
title: z.string().max(100),
|
|
539
|
+
description: z.string().max(1e3)
|
|
540
|
+
});
|
|
541
|
+
var DEFAULT_PROMPT = "Generate a short title (max 100 characters) and description (max 500 characters) for what happens. Start immediately with the action or subject - never reference that this is a video, content, or storyboard. Example: Title: 'Cooking Pasta Tutorial' Description: 'Someone cooks pasta by boiling water and adding noodles.'";
|
|
542
|
+
var ANTHROPIC_JSON_PROMPT = `You must respond with valid JSON in exactly this format:
|
|
543
|
+
{
|
|
544
|
+
"title": "Your title here (max 100 characters)",
|
|
545
|
+
"description": "Your description here (max 500 characters)",
|
|
546
|
+
"keywords": ["keyword1", "keyword2", "keyword3"]
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
Do not include any text before or after the JSON. The JSON must be valid and parseable.`;
|
|
550
|
+
async function getSummaryAndTags(assetId, promptOrOptions, options) {
|
|
551
|
+
let prompt;
|
|
552
|
+
let actualOptions;
|
|
553
|
+
if (typeof promptOrOptions === "string") {
|
|
554
|
+
prompt = promptOrOptions;
|
|
555
|
+
actualOptions = options || {};
|
|
556
|
+
} else {
|
|
557
|
+
prompt = DEFAULT_PROMPT;
|
|
558
|
+
actualOptions = promptOrOptions || {};
|
|
559
|
+
}
|
|
560
|
+
const {
|
|
561
|
+
provider = "openai",
|
|
562
|
+
model,
|
|
563
|
+
tone = "normal",
|
|
564
|
+
includeTranscript = true,
|
|
565
|
+
cleanTranscript = true,
|
|
566
|
+
imageSubmissionMode = "url",
|
|
567
|
+
imageDownloadOptions,
|
|
568
|
+
muxTokenId,
|
|
569
|
+
muxTokenSecret,
|
|
570
|
+
openaiApiKey,
|
|
571
|
+
anthropicApiKey,
|
|
572
|
+
...config
|
|
573
|
+
} = actualOptions;
|
|
574
|
+
const defaultModel = provider === "anthropic" ? "claude-3-5-haiku-20241022" : "gpt-4o-mini";
|
|
575
|
+
const finalModel = model || defaultModel;
|
|
576
|
+
const muxId = muxTokenId || process.env.MUX_TOKEN_ID;
|
|
577
|
+
const muxSecret = muxTokenSecret || process.env.MUX_TOKEN_SECRET;
|
|
578
|
+
const openaiKey = openaiApiKey || process.env.OPENAI_API_KEY;
|
|
579
|
+
const anthropicKey = anthropicApiKey || process.env.ANTHROPIC_API_KEY;
|
|
580
|
+
if (!muxId || !muxSecret) {
|
|
581
|
+
throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
|
|
582
|
+
}
|
|
583
|
+
if (provider === "openai" && !openaiKey) {
|
|
584
|
+
throw new Error("OpenAI API key is required. Provide openaiApiKey in options or set OPENAI_API_KEY environment variable.");
|
|
585
|
+
}
|
|
586
|
+
if (provider === "anthropic" && !anthropicKey) {
|
|
587
|
+
throw new Error("Anthropic API key is required. Provide anthropicApiKey in options or set ANTHROPIC_API_KEY environment variable.");
|
|
588
|
+
}
|
|
589
|
+
const mux = new Mux({
|
|
590
|
+
tokenId: muxId,
|
|
591
|
+
tokenSecret: muxSecret
|
|
592
|
+
});
|
|
593
|
+
let openaiClient;
|
|
594
|
+
let anthropicClient;
|
|
595
|
+
if (provider === "openai") {
|
|
596
|
+
openaiClient = new OpenAI({
|
|
597
|
+
apiKey: openaiKey
|
|
598
|
+
});
|
|
599
|
+
} else if (provider === "anthropic") {
|
|
600
|
+
anthropicClient = new Anthropic({
|
|
601
|
+
apiKey: anthropicKey
|
|
602
|
+
});
|
|
603
|
+
}
|
|
604
|
+
let assetData;
|
|
605
|
+
try {
|
|
606
|
+
const asset = await mux.video.assets.retrieve(assetId);
|
|
607
|
+
assetData = asset;
|
|
608
|
+
} catch (error) {
|
|
609
|
+
throw new Error(`Failed to fetch asset from Mux: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
610
|
+
}
|
|
611
|
+
const playbackId = assetData.playback_ids?.[0]?.id;
|
|
612
|
+
if (!playbackId) {
|
|
613
|
+
throw new Error("No playback ID found for this asset");
|
|
614
|
+
}
|
|
615
|
+
let transcriptText = "";
|
|
616
|
+
if (includeTranscript && assetData.tracks) {
|
|
617
|
+
const textTrack = assetData.tracks.find(
|
|
618
|
+
(track) => track.type === "text" && track.status === "ready"
|
|
619
|
+
);
|
|
620
|
+
if (textTrack) {
|
|
621
|
+
const transcriptUrl = `https://stream.mux.com/${playbackId}/text/${textTrack.id}.vtt`;
|
|
622
|
+
try {
|
|
623
|
+
const transcriptResponse = await fetch(transcriptUrl);
|
|
624
|
+
if (transcriptResponse.ok) {
|
|
625
|
+
const rawVttContent = await transcriptResponse.text();
|
|
626
|
+
transcriptText = cleanTranscript ? extractTextFromVTT(rawVttContent) : rawVttContent;
|
|
627
|
+
}
|
|
628
|
+
} catch (error) {
|
|
629
|
+
console.warn("Failed to fetch transcript:", error);
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
let toneInstruction = "";
|
|
634
|
+
switch (tone) {
|
|
635
|
+
case "sassy":
|
|
636
|
+
toneInstruction = " Answer with a sassy, playful attitude and personality.";
|
|
637
|
+
break;
|
|
638
|
+
case "professional":
|
|
639
|
+
toneInstruction = " Provide a professional, executive-level analysis suitable for business reporting.";
|
|
640
|
+
break;
|
|
641
|
+
default:
|
|
642
|
+
toneInstruction = " Provide a clear, straightforward analysis.";
|
|
643
|
+
}
|
|
644
|
+
let contextualPrompt = prompt + toneInstruction;
|
|
645
|
+
if (transcriptText) {
|
|
646
|
+
const transcriptType = cleanTranscript ? "transcript" : "WebVTT transcript";
|
|
647
|
+
contextualPrompt += ` Use the following ${transcriptType} for additional context: "${transcriptText}"`;
|
|
648
|
+
}
|
|
649
|
+
const imageUrl = `https://image.mux.com/${playbackId}/storyboard.png?width=640`;
|
|
650
|
+
let aiAnalysis = null;
|
|
651
|
+
let retryAttempt = 0;
|
|
652
|
+
const maxRetries = 3;
|
|
653
|
+
if (provider === "openai") {
|
|
654
|
+
if (imageSubmissionMode === "base64") {
|
|
655
|
+
try {
|
|
656
|
+
const downloadResult = await downloadImageAsBase64(imageUrl, imageDownloadOptions);
|
|
657
|
+
const response = await openaiClient.responses.parse({
|
|
658
|
+
model: finalModel,
|
|
659
|
+
input: [
|
|
660
|
+
{
|
|
661
|
+
role: "system",
|
|
662
|
+
content: "You are an image analysis tool. You will be given a storyboard image from a video showing multiple frames/scenes, and be expected to return structured data about the contents across all the frames."
|
|
663
|
+
},
|
|
664
|
+
{
|
|
665
|
+
role: "user",
|
|
666
|
+
content: [
|
|
667
|
+
{
|
|
668
|
+
type: "input_text",
|
|
669
|
+
text: contextualPrompt
|
|
670
|
+
},
|
|
671
|
+
{
|
|
672
|
+
type: "input_image",
|
|
673
|
+
image_url: downloadResult.base64Data,
|
|
674
|
+
// Use base64 data URI
|
|
675
|
+
detail: "high"
|
|
676
|
+
}
|
|
677
|
+
]
|
|
678
|
+
}
|
|
679
|
+
],
|
|
680
|
+
text: {
|
|
681
|
+
format: zodTextFormat(summarySchema, "analysis")
|
|
682
|
+
}
|
|
683
|
+
});
|
|
684
|
+
aiAnalysis = response.output_parsed;
|
|
685
|
+
} catch (error) {
|
|
686
|
+
throw new Error(`Failed to analyze video content with OpenAI in base64 mode: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
687
|
+
}
|
|
688
|
+
} else {
|
|
689
|
+
while (retryAttempt <= maxRetries) {
|
|
690
|
+
try {
|
|
691
|
+
const response = await openaiClient.responses.parse({
|
|
692
|
+
model: finalModel,
|
|
693
|
+
input: [
|
|
694
|
+
{
|
|
695
|
+
role: "system",
|
|
696
|
+
content: "You are an image analysis tool. You will be given a storyboard image from a video showing multiple frames/scenes, and be expected to return structured data about the contents across all the frames."
|
|
697
|
+
},
|
|
698
|
+
{
|
|
699
|
+
role: "user",
|
|
700
|
+
content: [
|
|
701
|
+
{
|
|
702
|
+
type: "input_text",
|
|
703
|
+
text: contextualPrompt
|
|
704
|
+
},
|
|
705
|
+
{
|
|
706
|
+
type: "input_image",
|
|
707
|
+
image_url: imageUrl,
|
|
708
|
+
detail: "high"
|
|
709
|
+
}
|
|
710
|
+
]
|
|
711
|
+
}
|
|
712
|
+
],
|
|
713
|
+
text: {
|
|
714
|
+
format: zodTextFormat(summarySchema, "analysis")
|
|
715
|
+
}
|
|
716
|
+
});
|
|
717
|
+
aiAnalysis = response.output_parsed;
|
|
718
|
+
break;
|
|
719
|
+
} catch (error) {
|
|
720
|
+
const isTimeoutError = error instanceof Error && error.message && error.message.includes("Timeout while downloading");
|
|
721
|
+
if (isTimeoutError && retryAttempt < maxRetries) {
|
|
722
|
+
await new Promise((resolve) => setTimeout(resolve, 5e3));
|
|
723
|
+
retryAttempt++;
|
|
724
|
+
continue;
|
|
725
|
+
}
|
|
726
|
+
throw new Error(`Failed to analyze video content with OpenAI: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
727
|
+
}
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
} else if (provider === "anthropic") {
|
|
731
|
+
const anthropicPrompt = `${contextualPrompt}
|
|
732
|
+
|
|
733
|
+
${ANTHROPIC_JSON_PROMPT}`;
|
|
734
|
+
if (imageSubmissionMode === "base64") {
|
|
735
|
+
try {
|
|
736
|
+
const fileUploadResult = await uploadImageToAnthropicFiles(imageUrl, anthropicKey, imageDownloadOptions);
|
|
737
|
+
const response = await anthropicClient.messages.create({
|
|
738
|
+
model: finalModel,
|
|
739
|
+
max_tokens: 1e3,
|
|
740
|
+
messages: [
|
|
741
|
+
{
|
|
742
|
+
role: "user",
|
|
743
|
+
content: [
|
|
744
|
+
{
|
|
745
|
+
type: "image",
|
|
746
|
+
source: {
|
|
747
|
+
type: "file",
|
|
748
|
+
file_id: fileUploadResult.fileId
|
|
749
|
+
}
|
|
750
|
+
// Type assertion for Files API support
|
|
751
|
+
},
|
|
752
|
+
{
|
|
753
|
+
type: "text",
|
|
754
|
+
text: anthropicPrompt
|
|
755
|
+
}
|
|
756
|
+
]
|
|
757
|
+
}
|
|
758
|
+
]
|
|
759
|
+
}, {
|
|
760
|
+
headers: {
|
|
761
|
+
"anthropic-beta": "files-api-2025-04-14"
|
|
762
|
+
}
|
|
763
|
+
});
|
|
764
|
+
const content = response.content[0];
|
|
765
|
+
if (content.type === "text") {
|
|
766
|
+
const jsonText = content.text.trim();
|
|
767
|
+
try {
|
|
768
|
+
aiAnalysis = JSON.parse(jsonText);
|
|
769
|
+
} catch (parseError) {
|
|
770
|
+
throw new Error(`Failed to parse JSON response from Anthropic: ${parseError instanceof Error ? parseError.message : "Unknown error"}`);
|
|
771
|
+
}
|
|
772
|
+
} else {
|
|
773
|
+
throw new Error("Unexpected response type from Anthropic");
|
|
774
|
+
}
|
|
775
|
+
} catch (error) {
|
|
776
|
+
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
|
777
|
+
throw new Error(`Failed to analyze video content with Anthropic Files API: ${errorMessage}`);
|
|
778
|
+
}
|
|
779
|
+
} else {
|
|
780
|
+
while (retryAttempt <= maxRetries) {
|
|
781
|
+
try {
|
|
782
|
+
const response = await anthropicClient.messages.create({
|
|
783
|
+
model: finalModel,
|
|
784
|
+
max_tokens: 1e3,
|
|
785
|
+
messages: [
|
|
786
|
+
{
|
|
787
|
+
role: "user",
|
|
788
|
+
content: [
|
|
789
|
+
{
|
|
790
|
+
type: "image",
|
|
791
|
+
source: {
|
|
792
|
+
type: "url",
|
|
793
|
+
url: imageUrl
|
|
794
|
+
}
|
|
795
|
+
// Type assertion to work around SDK type definitions
|
|
796
|
+
},
|
|
797
|
+
{
|
|
798
|
+
type: "text",
|
|
799
|
+
text: anthropicPrompt
|
|
800
|
+
}
|
|
801
|
+
]
|
|
802
|
+
}
|
|
803
|
+
]
|
|
804
|
+
});
|
|
805
|
+
const content = response.content[0];
|
|
806
|
+
if (content.type === "text") {
|
|
807
|
+
const jsonText = content.text.trim();
|
|
808
|
+
try {
|
|
809
|
+
aiAnalysis = JSON.parse(jsonText);
|
|
810
|
+
break;
|
|
811
|
+
} catch (parseError) {
|
|
812
|
+
if (retryAttempt < maxRetries) {
|
|
813
|
+
console.warn(`Failed to parse JSON from Anthropic (attempt ${retryAttempt + 1}):`, jsonText);
|
|
814
|
+
retryAttempt++;
|
|
815
|
+
await new Promise((resolve) => setTimeout(resolve, 2e3));
|
|
816
|
+
continue;
|
|
817
|
+
}
|
|
818
|
+
throw new Error(`Failed to parse JSON response from Anthropic: ${parseError instanceof Error ? parseError.message : "Unknown error"}`);
|
|
819
|
+
}
|
|
820
|
+
} else {
|
|
821
|
+
throw new Error("Unexpected response type from Anthropic");
|
|
822
|
+
}
|
|
823
|
+
} catch (error) {
|
|
824
|
+
if (retryAttempt < maxRetries) {
|
|
825
|
+
await new Promise((resolve) => setTimeout(resolve, 5e3));
|
|
826
|
+
retryAttempt++;
|
|
827
|
+
continue;
|
|
828
|
+
}
|
|
829
|
+
throw new Error(`Failed to analyze video content with Anthropic: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
830
|
+
}
|
|
831
|
+
}
|
|
832
|
+
}
|
|
833
|
+
} else {
|
|
834
|
+
throw new Error(`Unsupported provider: ${provider}`);
|
|
835
|
+
}
|
|
836
|
+
if (!aiAnalysis) {
|
|
837
|
+
throw new Error("No analysis result received from AI provider");
|
|
838
|
+
}
|
|
839
|
+
return {
|
|
840
|
+
assetId,
|
|
841
|
+
title: aiAnalysis.title || "No title available",
|
|
842
|
+
description: aiAnalysis.description || "No description available",
|
|
843
|
+
tags: aiAnalysis.keywords || [],
|
|
844
|
+
storyboardUrl: imageUrl
|
|
845
|
+
};
|
|
846
|
+
}
|
|
847
|
+
async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, options = {}) {
|
|
848
|
+
const {
|
|
849
|
+
provider = "anthropic",
|
|
850
|
+
model = "claude-sonnet-4-20250514",
|
|
851
|
+
muxTokenId,
|
|
852
|
+
muxTokenSecret,
|
|
853
|
+
anthropicApiKey,
|
|
854
|
+
...config
|
|
855
|
+
} = options;
|
|
856
|
+
if (provider !== "anthropic") {
|
|
857
|
+
throw new Error("Only Anthropic provider is currently supported for translation");
|
|
858
|
+
}
|
|
859
|
+
const muxId = muxTokenId || process.env.MUX_TOKEN_ID;
|
|
860
|
+
const muxSecret = muxTokenSecret || process.env.MUX_TOKEN_SECRET;
|
|
861
|
+
const anthropicKey = anthropicApiKey || process.env.ANTHROPIC_API_KEY;
|
|
862
|
+
const s3Endpoint = options.s3Endpoint || process.env.S3_ENDPOINT;
|
|
863
|
+
const s3Region = options.s3Region || process.env.S3_REGION || "auto";
|
|
864
|
+
const s3Bucket = options.s3Bucket || process.env.S3_BUCKET;
|
|
865
|
+
const s3AccessKeyId = options.s3AccessKeyId || process.env.S3_ACCESS_KEY_ID;
|
|
866
|
+
const s3SecretAccessKey = options.s3SecretAccessKey || process.env.S3_SECRET_ACCESS_KEY;
|
|
867
|
+
const uploadToMux = options.uploadToMux !== false;
|
|
868
|
+
if (!muxId || !muxSecret) {
|
|
869
|
+
throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
|
|
870
|
+
}
|
|
871
|
+
if (!anthropicKey) {
|
|
872
|
+
throw new Error("Anthropic API key is required. Provide anthropicApiKey in options or set ANTHROPIC_API_KEY environment variable.");
|
|
873
|
+
}
|
|
874
|
+
if (uploadToMux && (!s3Endpoint || !s3Bucket || !s3AccessKeyId || !s3SecretAccessKey)) {
|
|
875
|
+
throw new Error("S3 configuration is required for uploading to Mux. Provide s3Endpoint, s3Bucket, s3AccessKeyId, and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables.");
|
|
876
|
+
}
|
|
877
|
+
const mux = new Mux({
|
|
878
|
+
tokenId: muxId,
|
|
879
|
+
tokenSecret: muxSecret
|
|
880
|
+
});
|
|
881
|
+
const anthropicClient = new Anthropic({
|
|
882
|
+
apiKey: anthropicKey
|
|
883
|
+
});
|
|
884
|
+
let assetData;
|
|
885
|
+
try {
|
|
886
|
+
const asset = await mux.video.assets.retrieve(assetId);
|
|
887
|
+
assetData = asset;
|
|
888
|
+
} catch (error) {
|
|
889
|
+
throw new Error(`Failed to fetch asset from Mux: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
890
|
+
}
|
|
891
|
+
const playbackId = assetData.playback_ids?.[0]?.id;
|
|
892
|
+
if (!playbackId) {
|
|
893
|
+
throw new Error("No playback ID found for this asset");
|
|
894
|
+
}
|
|
895
|
+
if (!assetData.tracks) {
|
|
896
|
+
throw new Error("No tracks found for this asset");
|
|
897
|
+
}
|
|
898
|
+
const sourceTextTrack = assetData.tracks.find(
|
|
899
|
+
(track) => track.type === "text" && track.status === "ready" && track.language_code === fromLanguageCode
|
|
900
|
+
);
|
|
901
|
+
if (!sourceTextTrack) {
|
|
902
|
+
throw new Error(`No ready text track found with language code '${fromLanguageCode}' for this asset`);
|
|
903
|
+
}
|
|
904
|
+
const vttUrl = `https://stream.mux.com/${playbackId}/text/${sourceTextTrack.id}.vtt`;
|
|
905
|
+
let vttContent;
|
|
906
|
+
try {
|
|
907
|
+
const vttResponse = await fetch(vttUrl);
|
|
908
|
+
if (!vttResponse.ok) {
|
|
909
|
+
throw new Error(`Failed to fetch VTT file: ${vttResponse.statusText}`);
|
|
910
|
+
}
|
|
911
|
+
vttContent = await vttResponse.text();
|
|
912
|
+
} catch (error) {
|
|
913
|
+
throw new Error(`Failed to fetch VTT content: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
914
|
+
}
|
|
915
|
+
console.log(`\u2705 Found VTT content for language '${fromLanguageCode}'`);
|
|
916
|
+
let translatedVtt;
|
|
917
|
+
try {
|
|
918
|
+
const response = await anthropicClient.messages.create({
|
|
919
|
+
model,
|
|
920
|
+
max_tokens: 4e3,
|
|
921
|
+
messages: [
|
|
922
|
+
{
|
|
923
|
+
role: "user",
|
|
924
|
+
content: `Translate the following VTT subtitle file from ${fromLanguageCode} to ${toLanguageCode}. Return the translated VTT in JSON format with the key 'translation'. Preserve all timestamps and VTT formatting exactly as they appear.
|
|
925
|
+
|
|
926
|
+
${vttContent}`
|
|
927
|
+
}
|
|
928
|
+
]
|
|
929
|
+
});
|
|
930
|
+
const content = response.content[0];
|
|
931
|
+
if (content.type === "text") {
|
|
932
|
+
const responseText = content.text.trim();
|
|
933
|
+
try {
|
|
934
|
+
const cleanedResponse = responseText.replace(/```json/g, "").replace(/```/g, "").trim();
|
|
935
|
+
const parsed = JSON.parse(cleanedResponse);
|
|
936
|
+
translatedVtt = parsed.translation;
|
|
937
|
+
} catch (parseError) {
|
|
938
|
+
throw new Error(`Failed to parse JSON response from Anthropic: ${parseError instanceof Error ? parseError.message : "Unknown error"}`);
|
|
939
|
+
}
|
|
940
|
+
} else {
|
|
941
|
+
throw new Error("Unexpected response type from Anthropic");
|
|
942
|
+
}
|
|
943
|
+
} catch (error) {
|
|
944
|
+
throw new Error(`Failed to translate VTT with Anthropic: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
945
|
+
}
|
|
946
|
+
console.log(`
|
|
947
|
+
\u2705 Translation completed successfully!`);
|
|
948
|
+
if (!uploadToMux) {
|
|
949
|
+
console.log(`\u2705 VTT translated to ${toLanguageCode} successfully!`);
|
|
950
|
+
return {
|
|
951
|
+
assetId,
|
|
952
|
+
sourceLanguageCode: fromLanguageCode,
|
|
953
|
+
targetLanguageCode: toLanguageCode,
|
|
954
|
+
originalVtt: vttContent,
|
|
955
|
+
translatedVtt
|
|
956
|
+
};
|
|
957
|
+
}
|
|
958
|
+
console.log("\u{1F4E4} Uploading translated VTT to S3-compatible storage...");
|
|
959
|
+
const s3Client = new S3Client({
|
|
960
|
+
region: s3Region,
|
|
961
|
+
endpoint: s3Endpoint,
|
|
962
|
+
credentials: {
|
|
963
|
+
accessKeyId: s3AccessKeyId,
|
|
964
|
+
secretAccessKey: s3SecretAccessKey
|
|
965
|
+
},
|
|
966
|
+
forcePathStyle: true
|
|
967
|
+
// Often needed for non-AWS S3 services
|
|
968
|
+
});
|
|
969
|
+
const vttKey = `translations/${assetId}/${fromLanguageCode}-to-${toLanguageCode}-${Date.now()}.vtt`;
|
|
970
|
+
let presignedUrl;
|
|
971
|
+
try {
|
|
972
|
+
const upload = new Upload({
|
|
973
|
+
client: s3Client,
|
|
974
|
+
params: {
|
|
975
|
+
Bucket: s3Bucket,
|
|
976
|
+
Key: vttKey,
|
|
977
|
+
Body: translatedVtt,
|
|
978
|
+
ContentType: "text/vtt"
|
|
979
|
+
}
|
|
980
|
+
});
|
|
981
|
+
await upload.done();
|
|
982
|
+
console.log(`\u2705 VTT uploaded successfully to: ${vttKey}`);
|
|
983
|
+
const getObjectCommand = new GetObjectCommand({
|
|
984
|
+
Bucket: s3Bucket,
|
|
985
|
+
Key: vttKey
|
|
986
|
+
});
|
|
987
|
+
presignedUrl = await getSignedUrl(s3Client, getObjectCommand, {
|
|
988
|
+
expiresIn: 3600
|
|
989
|
+
// 1 hour
|
|
990
|
+
});
|
|
991
|
+
console.log(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
|
|
992
|
+
} catch (error) {
|
|
993
|
+
throw new Error(`Failed to upload VTT to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
994
|
+
}
|
|
995
|
+
console.log("\u{1F4F9} Adding translated track to Mux asset...");
|
|
996
|
+
let uploadedTrackId;
|
|
997
|
+
try {
|
|
998
|
+
const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(toLanguageCode) || toLanguageCode.toUpperCase();
|
|
999
|
+
const trackName = `${languageName} (auto-translated)`;
|
|
1000
|
+
const trackResponse = await mux.video.assets.createTrack(assetId, {
|
|
1001
|
+
type: "text",
|
|
1002
|
+
text_type: "subtitles",
|
|
1003
|
+
language_code: toLanguageCode,
|
|
1004
|
+
name: trackName,
|
|
1005
|
+
url: presignedUrl
|
|
1006
|
+
});
|
|
1007
|
+
uploadedTrackId = trackResponse.id;
|
|
1008
|
+
console.log(`\u2705 Track added to Mux asset with ID: ${uploadedTrackId}`);
|
|
1009
|
+
console.log(`\u{1F4CB} Track name: "${trackName}"`);
|
|
1010
|
+
} catch (error) {
|
|
1011
|
+
console.warn(`\u26A0\uFE0F Failed to add track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1012
|
+
console.log("\u{1F517} You can manually add the track using this presigned URL:");
|
|
1013
|
+
console.log(presignedUrl);
|
|
1014
|
+
}
|
|
1015
|
+
return {
|
|
1016
|
+
assetId,
|
|
1017
|
+
sourceLanguageCode: fromLanguageCode,
|
|
1018
|
+
targetLanguageCode: toLanguageCode,
|
|
1019
|
+
originalVtt: vttContent,
|
|
1020
|
+
translatedVtt,
|
|
1021
|
+
uploadedTrackId,
|
|
1022
|
+
presignedUrl
|
|
1023
|
+
};
|
|
1024
|
+
}
|
|
1025
|
+
async function translateAudio(assetId, toLanguageCode, options = {}) {
|
|
1026
|
+
const {
|
|
1027
|
+
provider = "elevenlabs",
|
|
1028
|
+
numSpeakers = 0,
|
|
1029
|
+
// 0 = auto-detect
|
|
1030
|
+
muxTokenId,
|
|
1031
|
+
muxTokenSecret,
|
|
1032
|
+
elevenLabsApiKey,
|
|
1033
|
+
uploadToMux = true,
|
|
1034
|
+
...config
|
|
1035
|
+
} = options;
|
|
1036
|
+
if (provider !== "elevenlabs") {
|
|
1037
|
+
throw new Error("Only ElevenLabs provider is currently supported for audio translation");
|
|
1038
|
+
}
|
|
1039
|
+
const muxId = muxTokenId || process.env.MUX_TOKEN_ID;
|
|
1040
|
+
const muxSecret = muxTokenSecret || process.env.MUX_TOKEN_SECRET;
|
|
1041
|
+
const elevenLabsKey = elevenLabsApiKey || process.env.ELEVENLABS_API_KEY;
|
|
1042
|
+
const s3Endpoint = options.s3Endpoint || process.env.S3_ENDPOINT;
|
|
1043
|
+
const s3Region = options.s3Region || process.env.S3_REGION || "auto";
|
|
1044
|
+
const s3Bucket = options.s3Bucket || process.env.S3_BUCKET;
|
|
1045
|
+
const s3AccessKeyId = options.s3AccessKeyId || process.env.S3_ACCESS_KEY_ID;
|
|
1046
|
+
const s3SecretAccessKey = options.s3SecretAccessKey || process.env.S3_SECRET_ACCESS_KEY;
|
|
1047
|
+
if (!muxId || !muxSecret) {
|
|
1048
|
+
throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
|
|
1049
|
+
}
|
|
1050
|
+
if (!elevenLabsKey) {
|
|
1051
|
+
throw new Error("ElevenLabs API key is required. Provide elevenLabsApiKey in options or set ELEVENLABS_API_KEY environment variable.");
|
|
1052
|
+
}
|
|
1053
|
+
if (uploadToMux && (!s3Endpoint || !s3Bucket || !s3AccessKeyId || !s3SecretAccessKey)) {
|
|
1054
|
+
throw new Error("S3 configuration is required for uploading to Mux. Provide s3Endpoint, s3Bucket, s3AccessKeyId, and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables.");
|
|
1055
|
+
}
|
|
1056
|
+
const mux = new Mux({
|
|
1057
|
+
tokenId: muxId,
|
|
1058
|
+
tokenSecret: muxSecret
|
|
1059
|
+
});
|
|
1060
|
+
console.log(`\u{1F3AC} Fetching Mux asset: ${assetId}`);
|
|
1061
|
+
let assetData;
|
|
1062
|
+
try {
|
|
1063
|
+
const asset = await mux.video.assets.retrieve(assetId);
|
|
1064
|
+
assetData = asset;
|
|
1065
|
+
} catch (error) {
|
|
1066
|
+
throw new Error(`Failed to fetch asset from Mux: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1067
|
+
}
|
|
1068
|
+
console.log("\u{1F50D} Checking for audio-only static rendition...");
|
|
1069
|
+
if (!assetData.static_renditions || !assetData.static_renditions.files) {
|
|
1070
|
+
throw new Error("No static renditions found for this asset");
|
|
1071
|
+
}
|
|
1072
|
+
const staticRenditionFiles = assetData.static_renditions.files;
|
|
1073
|
+
if (staticRenditionFiles.length === 0) {
|
|
1074
|
+
throw new Error("No static rendition files found for this asset");
|
|
1075
|
+
}
|
|
1076
|
+
const audioRendition = staticRenditionFiles.find(
|
|
1077
|
+
(rendition) => rendition.name === "audio.m4a" && rendition.status === "ready"
|
|
1078
|
+
);
|
|
1079
|
+
if (!audioRendition) {
|
|
1080
|
+
throw new Error("No ready audio-only static rendition found for this asset. Please ensure the asset has an audio.m4a static rendition.");
|
|
1081
|
+
}
|
|
1082
|
+
const audioUrl = `https://stream.mux.com/${assetData.playback_ids?.[0]?.id}/audio.m4a`;
|
|
1083
|
+
console.log(`\u2705 Found audio rendition: ${audioUrl}`);
|
|
1084
|
+
console.log(`\u{1F399}\uFE0F Creating ElevenLabs dubbing job (auto-detect \u2192 ${toLanguageCode})`);
|
|
1085
|
+
let dubbingId;
|
|
1086
|
+
try {
|
|
1087
|
+
const audioResponse = await fetch(audioUrl);
|
|
1088
|
+
if (!audioResponse.ok) {
|
|
1089
|
+
throw new Error(`Failed to fetch audio file: ${audioResponse.statusText}`);
|
|
1090
|
+
}
|
|
1091
|
+
const audioBuffer = await audioResponse.arrayBuffer();
|
|
1092
|
+
const audioBlob = new Blob([audioBuffer], { type: "audio/mp4" });
|
|
1093
|
+
const audioFile = audioBlob;
|
|
1094
|
+
const formData = new FormData();
|
|
1095
|
+
formData.append("file", audioFile);
|
|
1096
|
+
formData.append("target_lang", toLanguageCode);
|
|
1097
|
+
formData.append("num_speakers", numSpeakers.toString());
|
|
1098
|
+
formData.append("name", `Mux Asset ${assetId} - auto to ${toLanguageCode}`);
|
|
1099
|
+
const dubbingResponse = await fetch("https://api.elevenlabs.io/v1/dubbing", {
|
|
1100
|
+
method: "POST",
|
|
1101
|
+
headers: {
|
|
1102
|
+
"xi-api-key": elevenLabsKey
|
|
1103
|
+
},
|
|
1104
|
+
body: formData
|
|
1105
|
+
});
|
|
1106
|
+
if (!dubbingResponse.ok) {
|
|
1107
|
+
throw new Error(`ElevenLabs API error: ${dubbingResponse.statusText}`);
|
|
1108
|
+
}
|
|
1109
|
+
const dubbingData = await dubbingResponse.json();
|
|
1110
|
+
dubbingId = dubbingData.dubbing_id;
|
|
1111
|
+
console.log(`\u2705 Dubbing job created: ${dubbingId}`);
|
|
1112
|
+
console.log(`\u23F1\uFE0F Expected duration: ${dubbingData.expected_duration_sec}s`);
|
|
1113
|
+
} catch (error) {
|
|
1114
|
+
throw new Error(`Failed to create ElevenLabs dubbing job: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1115
|
+
}
|
|
1116
|
+
console.log("\u23F3 Waiting for dubbing to complete...");
|
|
1117
|
+
let dubbingStatus = "dubbing";
|
|
1118
|
+
let pollAttempts = 0;
|
|
1119
|
+
const maxPollAttempts = 180;
|
|
1120
|
+
while (dubbingStatus === "dubbing" && pollAttempts < maxPollAttempts) {
|
|
1121
|
+
await new Promise((resolve) => setTimeout(resolve, 1e4));
|
|
1122
|
+
pollAttempts++;
|
|
1123
|
+
try {
|
|
1124
|
+
const statusResponse = await fetch(`https://api.elevenlabs.io/v1/dubbing/${dubbingId}`, {
|
|
1125
|
+
headers: {
|
|
1126
|
+
"xi-api-key": elevenLabsKey
|
|
1127
|
+
}
|
|
1128
|
+
});
|
|
1129
|
+
if (!statusResponse.ok) {
|
|
1130
|
+
throw new Error(`Status check failed: ${statusResponse.statusText}`);
|
|
1131
|
+
}
|
|
1132
|
+
const statusData = await statusResponse.json();
|
|
1133
|
+
dubbingStatus = statusData.status;
|
|
1134
|
+
console.log(`\u{1F4CA} Status check ${pollAttempts}: ${dubbingStatus}`);
|
|
1135
|
+
if (dubbingStatus === "failed") {
|
|
1136
|
+
throw new Error("ElevenLabs dubbing job failed");
|
|
1137
|
+
}
|
|
1138
|
+
} catch (error) {
|
|
1139
|
+
throw new Error(`Failed to check dubbing status: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1140
|
+
}
|
|
1141
|
+
}
|
|
1142
|
+
if (dubbingStatus !== "dubbed") {
|
|
1143
|
+
throw new Error(`Dubbing job timed out or failed. Final status: ${dubbingStatus}`);
|
|
1144
|
+
}
|
|
1145
|
+
console.log("\u2705 Dubbing completed successfully!");
|
|
1146
|
+
if (!uploadToMux) {
|
|
1147
|
+
return {
|
|
1148
|
+
assetId,
|
|
1149
|
+
targetLanguageCode: toLanguageCode,
|
|
1150
|
+
dubbingId
|
|
1151
|
+
};
|
|
1152
|
+
}
|
|
1153
|
+
console.log("\u{1F4E5} Downloading dubbed audio from ElevenLabs...");
|
|
1154
|
+
let dubbedAudioBuffer;
|
|
1155
|
+
try {
|
|
1156
|
+
const audioUrl2 = `https://api.elevenlabs.io/v1/dubbing/${dubbingId}/audio/${toLanguageCode}`;
|
|
1157
|
+
const audioResponse = await fetch(audioUrl2, {
|
|
1158
|
+
headers: {
|
|
1159
|
+
"xi-api-key": elevenLabsKey
|
|
1160
|
+
}
|
|
1161
|
+
});
|
|
1162
|
+
if (!audioResponse.ok) {
|
|
1163
|
+
throw new Error(`Failed to fetch dubbed audio: ${audioResponse.statusText}`);
|
|
1164
|
+
}
|
|
1165
|
+
dubbedAudioBuffer = await audioResponse.arrayBuffer();
|
|
1166
|
+
console.log(`\u2705 Downloaded dubbed audio (${dubbedAudioBuffer.byteLength} bytes)`);
|
|
1167
|
+
} catch (error) {
|
|
1168
|
+
throw new Error(`Failed to download dubbed audio: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1169
|
+
}
|
|
1170
|
+
console.log("\u{1F4E4} Uploading dubbed audio to S3-compatible storage...");
|
|
1171
|
+
const s3Client = new S3Client({
|
|
1172
|
+
region: s3Region,
|
|
1173
|
+
endpoint: s3Endpoint,
|
|
1174
|
+
credentials: {
|
|
1175
|
+
accessKeyId: s3AccessKeyId,
|
|
1176
|
+
secretAccessKey: s3SecretAccessKey
|
|
1177
|
+
},
|
|
1178
|
+
forcePathStyle: true
|
|
1179
|
+
});
|
|
1180
|
+
const audioKey = `audio-translations/${assetId}/auto-to-${toLanguageCode}-${Date.now()}.m4a`;
|
|
1181
|
+
let presignedUrl;
|
|
1182
|
+
try {
|
|
1183
|
+
const upload = new Upload({
|
|
1184
|
+
client: s3Client,
|
|
1185
|
+
params: {
|
|
1186
|
+
Bucket: s3Bucket,
|
|
1187
|
+
Key: audioKey,
|
|
1188
|
+
Body: new Uint8Array(dubbedAudioBuffer),
|
|
1189
|
+
ContentType: "audio/mp4"
|
|
1190
|
+
}
|
|
1191
|
+
});
|
|
1192
|
+
await upload.done();
|
|
1193
|
+
console.log(`\u2705 Audio uploaded successfully to: ${audioKey}`);
|
|
1194
|
+
const getObjectCommand = new GetObjectCommand({
|
|
1195
|
+
Bucket: s3Bucket,
|
|
1196
|
+
Key: audioKey
|
|
1197
|
+
});
|
|
1198
|
+
presignedUrl = await getSignedUrl(s3Client, getObjectCommand, {
|
|
1199
|
+
expiresIn: 3600
|
|
1200
|
+
// 1 hour
|
|
1201
|
+
});
|
|
1202
|
+
console.log(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
|
|
1203
|
+
} catch (error) {
|
|
1204
|
+
throw new Error(`Failed to upload audio to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1205
|
+
}
|
|
1206
|
+
console.log("\u{1F3AC} Adding translated audio track to Mux asset...");
|
|
1207
|
+
let uploadedTrackId;
|
|
1208
|
+
try {
|
|
1209
|
+
const languageName = new Intl.DisplayNames(["en"], { type: "language" }).of(toLanguageCode) || toLanguageCode.toUpperCase();
|
|
1210
|
+
const trackName = `${languageName} (auto-dubbed)`;
|
|
1211
|
+
const trackResponse = await mux.video.assets.createTrack(assetId, {
|
|
1212
|
+
type: "audio",
|
|
1213
|
+
language_code: toLanguageCode,
|
|
1214
|
+
name: trackName,
|
|
1215
|
+
url: presignedUrl
|
|
1216
|
+
});
|
|
1217
|
+
uploadedTrackId = trackResponse.id;
|
|
1218
|
+
console.log(`\u2705 Audio track added to Mux asset with ID: ${uploadedTrackId}`);
|
|
1219
|
+
console.log(`\u{1F3B5} Track name: "${trackName}"`);
|
|
1220
|
+
} catch (error) {
|
|
1221
|
+
console.warn(`\u26A0\uFE0F Failed to add audio track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1222
|
+
console.log("\u{1F517} You can manually add the track using this presigned URL:");
|
|
1223
|
+
console.log(presignedUrl);
|
|
1224
|
+
}
|
|
1225
|
+
return {
|
|
1226
|
+
assetId,
|
|
1227
|
+
targetLanguageCode: toLanguageCode,
|
|
1228
|
+
dubbingId,
|
|
1229
|
+
uploadedTrackId,
|
|
1230
|
+
presignedUrl
|
|
1231
|
+
};
|
|
1232
|
+
}
|
|
1233
|
+
var chaptersSchema = z.object({
|
|
1234
|
+
chapters: z.array(z.object({
|
|
1235
|
+
startTime: z.number(),
|
|
1236
|
+
title: z.string()
|
|
1237
|
+
}))
|
|
1238
|
+
});
|
|
1239
|
+
var DEFAULT_SYSTEM_PROMPT = `Your role is to segment the following captions into chunked chapters, summarising each chapter with a title.
|
|
1240
|
+
|
|
1241
|
+
Analyze the transcript and create logical chapter breaks based on topic changes, major transitions, or distinct sections of content. Each chapter should represent a meaningful segment of the video.
|
|
1242
|
+
|
|
1243
|
+
You must respond with valid JSON in exactly this format:
|
|
1244
|
+
{
|
|
1245
|
+
"chapters": [
|
|
1246
|
+
{"startTime": 0, "title": "Introduction"},
|
|
1247
|
+
{"startTime": 45.5, "title": "Main Topic Discussion"},
|
|
1248
|
+
{"startTime": 120.0, "title": "Conclusion"}
|
|
1249
|
+
]
|
|
1250
|
+
}
|
|
1251
|
+
|
|
1252
|
+
Important rules:
|
|
1253
|
+
- startTime must be in seconds (not HH:MM:SS format)
|
|
1254
|
+
- Always start with startTime: 0 for the first chapter
|
|
1255
|
+
- Create 3-8 chapters depending on content length and natural breaks
|
|
1256
|
+
- Chapter titles should be concise and descriptive
|
|
1257
|
+
- Do not include any text before or after the JSON
|
|
1258
|
+
- The JSON must be valid and parseable`;
|
|
1259
|
+
var ANTHROPIC_JSON_PROMPT2 = `You must respond with valid JSON in exactly this format:
|
|
1260
|
+
{
|
|
1261
|
+
"chapters": [
|
|
1262
|
+
{"startTime": 0, "title": "Chapter title here"},
|
|
1263
|
+
{"startTime": 45.5, "title": "Another chapter title"}
|
|
1264
|
+
]
|
|
1265
|
+
}
|
|
1266
|
+
|
|
1267
|
+
Do not include any text before or after the JSON. The JSON must be valid and parseable.`;
|
|
1268
|
+
function vttTimestampToSeconds(timestamp) {
|
|
1269
|
+
const parts = timestamp.split(":");
|
|
1270
|
+
if (parts.length !== 3) return 0;
|
|
1271
|
+
const hours = parseInt(parts[0], 10) || 0;
|
|
1272
|
+
const minutes = parseInt(parts[1], 10) || 0;
|
|
1273
|
+
const seconds = parseFloat(parts[2]) || 0;
|
|
1274
|
+
return hours * 3600 + minutes * 60 + seconds;
|
|
1275
|
+
}
|
|
1276
|
+
function extractTimestampsFromVTT(vttContent) {
|
|
1277
|
+
if (!vttContent.trim()) {
|
|
1278
|
+
return "";
|
|
1279
|
+
}
|
|
1280
|
+
const lines = vttContent.split("\n");
|
|
1281
|
+
const segments = [];
|
|
1282
|
+
for (let i = 0; i < lines.length; i++) {
|
|
1283
|
+
const line = lines[i].trim();
|
|
1284
|
+
if (line.includes("-->")) {
|
|
1285
|
+
const startTime = line.split(" --> ")[0].trim();
|
|
1286
|
+
const timeInSeconds = vttTimestampToSeconds(startTime);
|
|
1287
|
+
let j = i + 1;
|
|
1288
|
+
while (j < lines.length && !lines[j].trim()) {
|
|
1289
|
+
j++;
|
|
1290
|
+
}
|
|
1291
|
+
if (j < lines.length) {
|
|
1292
|
+
const text = lines[j].trim().replace(/<[^>]*>/g, "");
|
|
1293
|
+
if (text) {
|
|
1294
|
+
segments.push({ time: timeInSeconds, text });
|
|
1295
|
+
}
|
|
1296
|
+
}
|
|
1297
|
+
}
|
|
1298
|
+
}
|
|
1299
|
+
return segments.map((segment) => `[${Math.floor(segment.time)}s] ${segment.text}`).join("\n");
|
|
1300
|
+
}
|
|
1301
|
+
async function generateChapters(assetId, languageCode, options = {}) {
|
|
1302
|
+
const {
|
|
1303
|
+
provider = "openai",
|
|
1304
|
+
model,
|
|
1305
|
+
muxTokenId,
|
|
1306
|
+
muxTokenSecret,
|
|
1307
|
+
openaiApiKey,
|
|
1308
|
+
anthropicApiKey,
|
|
1309
|
+
...config
|
|
1310
|
+
} = options;
|
|
1311
|
+
const defaultModel = provider === "anthropic" ? "claude-3-5-haiku-20241022" : "gpt-4o-mini";
|
|
1312
|
+
const finalModel = model || defaultModel;
|
|
1313
|
+
const muxId = muxTokenId || process.env.MUX_TOKEN_ID;
|
|
1314
|
+
const muxSecret = muxTokenSecret || process.env.MUX_TOKEN_SECRET;
|
|
1315
|
+
const openaiKey = openaiApiKey || process.env.OPENAI_API_KEY;
|
|
1316
|
+
const anthropicKey = anthropicApiKey || process.env.ANTHROPIC_API_KEY;
|
|
1317
|
+
if (!muxId || !muxSecret) {
|
|
1318
|
+
throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
|
|
1319
|
+
}
|
|
1320
|
+
if (provider === "openai" && !openaiKey) {
|
|
1321
|
+
throw new Error("OpenAI API key is required for OpenAI provider. Provide openaiApiKey in options or set OPENAI_API_KEY environment variable.");
|
|
1322
|
+
}
|
|
1323
|
+
if (provider === "anthropic" && !anthropicKey) {
|
|
1324
|
+
throw new Error("Anthropic API key is required for Anthropic provider. Provide anthropicApiKey in options or set ANTHROPIC_API_KEY environment variable.");
|
|
1325
|
+
}
|
|
1326
|
+
const mux = new Mux({
|
|
1327
|
+
tokenId: muxId,
|
|
1328
|
+
tokenSecret: muxSecret
|
|
1329
|
+
});
|
|
1330
|
+
let openaiClient;
|
|
1331
|
+
let anthropicClient;
|
|
1332
|
+
if (provider === "openai") {
|
|
1333
|
+
openaiClient = new OpenAI({
|
|
1334
|
+
apiKey: openaiKey
|
|
1335
|
+
});
|
|
1336
|
+
} else if (provider === "anthropic") {
|
|
1337
|
+
anthropicClient = new Anthropic({
|
|
1338
|
+
apiKey: anthropicKey
|
|
1339
|
+
});
|
|
1340
|
+
}
|
|
1341
|
+
let assetData;
|
|
1342
|
+
try {
|
|
1343
|
+
const asset = await mux.video.assets.retrieve(assetId);
|
|
1344
|
+
assetData = asset;
|
|
1345
|
+
} catch (error) {
|
|
1346
|
+
throw new Error(`Failed to fetch asset from Mux: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1347
|
+
}
|
|
1348
|
+
const playbackId = assetData.playback_ids?.[0]?.id;
|
|
1349
|
+
if (!playbackId) {
|
|
1350
|
+
throw new Error("No playback ID found for this asset");
|
|
1351
|
+
}
|
|
1352
|
+
if (!assetData.tracks) {
|
|
1353
|
+
throw new Error("No tracks found for this asset");
|
|
1354
|
+
}
|
|
1355
|
+
const captionTrack = assetData.tracks.find(
|
|
1356
|
+
(track) => track.type === "text" && track.status === "ready" && track.text_type === "subtitles" && track.language_code === languageCode
|
|
1357
|
+
);
|
|
1358
|
+
if (!captionTrack) {
|
|
1359
|
+
throw new Error(`No caption track found for language '${languageCode}'. Available languages: ${assetData.tracks.filter((t) => t.type === "text").map((t) => t.language_code).join(", ")}`);
|
|
1360
|
+
}
|
|
1361
|
+
const transcriptUrl = `https://stream.mux.com/${playbackId}/text/${captionTrack.id}.vtt`;
|
|
1362
|
+
let vttContent;
|
|
1363
|
+
try {
|
|
1364
|
+
const transcriptResponse = await fetch(transcriptUrl);
|
|
1365
|
+
if (!transcriptResponse.ok) {
|
|
1366
|
+
throw new Error(`Failed to fetch VTT: ${transcriptResponse.statusText}`);
|
|
1367
|
+
}
|
|
1368
|
+
vttContent = await transcriptResponse.text();
|
|
1369
|
+
} catch (error) {
|
|
1370
|
+
throw new Error(`Failed to fetch caption track: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1371
|
+
}
|
|
1372
|
+
const timestampedTranscript = extractTimestampsFromVTT(vttContent);
|
|
1373
|
+
if (!timestampedTranscript) {
|
|
1374
|
+
throw new Error("No usable content found in caption track");
|
|
1375
|
+
}
|
|
1376
|
+
let chaptersData = null;
|
|
1377
|
+
if (provider === "openai") {
|
|
1378
|
+
try {
|
|
1379
|
+
const response = await openaiClient.responses.parse({
|
|
1380
|
+
model: finalModel,
|
|
1381
|
+
input: [
|
|
1382
|
+
{
|
|
1383
|
+
role: "system",
|
|
1384
|
+
content: DEFAULT_SYSTEM_PROMPT
|
|
1385
|
+
},
|
|
1386
|
+
{
|
|
1387
|
+
role: "user",
|
|
1388
|
+
content: [
|
|
1389
|
+
{
|
|
1390
|
+
type: "input_text",
|
|
1391
|
+
text: timestampedTranscript
|
|
1392
|
+
}
|
|
1393
|
+
]
|
|
1394
|
+
}
|
|
1395
|
+
],
|
|
1396
|
+
text: {
|
|
1397
|
+
format: zodTextFormat(chaptersSchema, "chapters")
|
|
1398
|
+
}
|
|
1399
|
+
});
|
|
1400
|
+
chaptersData = response.output_parsed;
|
|
1401
|
+
} catch (error) {
|
|
1402
|
+
throw new Error(`Failed to generate chapters with OpenAI: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1403
|
+
}
|
|
1404
|
+
} else if (provider === "anthropic") {
|
|
1405
|
+
const anthropicPrompt = `${DEFAULT_SYSTEM_PROMPT}
|
|
1406
|
+
|
|
1407
|
+
${ANTHROPIC_JSON_PROMPT2}
|
|
1408
|
+
|
|
1409
|
+
Transcript:
|
|
1410
|
+
${timestampedTranscript}`;
|
|
1411
|
+
try {
|
|
1412
|
+
const response = await anthropicClient.messages.create({
|
|
1413
|
+
model: finalModel,
|
|
1414
|
+
max_tokens: 2e3,
|
|
1415
|
+
messages: [
|
|
1416
|
+
{
|
|
1417
|
+
role: "user",
|
|
1418
|
+
content: anthropicPrompt
|
|
1419
|
+
}
|
|
1420
|
+
]
|
|
1421
|
+
});
|
|
1422
|
+
const content = response.content[0];
|
|
1423
|
+
if (content.type === "text") {
|
|
1424
|
+
const jsonText = content.text.trim();
|
|
1425
|
+
try {
|
|
1426
|
+
chaptersData = JSON.parse(jsonText);
|
|
1427
|
+
} catch (parseError) {
|
|
1428
|
+
throw new Error(`Failed to parse JSON response from Anthropic: ${parseError instanceof Error ? parseError.message : "Unknown error"}`);
|
|
1429
|
+
}
|
|
1430
|
+
} else {
|
|
1431
|
+
throw new Error("Unexpected response type from Anthropic");
|
|
1432
|
+
}
|
|
1433
|
+
} catch (error) {
|
|
1434
|
+
throw new Error(`Failed to generate chapters with Anthropic: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1435
|
+
}
|
|
1436
|
+
} else {
|
|
1437
|
+
throw new Error(`Unsupported provider: ${provider}`);
|
|
1438
|
+
}
|
|
1439
|
+
if (!chaptersData || !chaptersData.chapters) {
|
|
1440
|
+
throw new Error("No chapters generated from AI response");
|
|
1441
|
+
}
|
|
1442
|
+
const validChapters = chaptersData.chapters.filter((chapter) => typeof chapter.startTime === "number" && typeof chapter.title === "string").sort((a, b) => a.startTime - b.startTime);
|
|
1443
|
+
if (validChapters.length === 0) {
|
|
1444
|
+
throw new Error("No valid chapters found in AI response");
|
|
1445
|
+
}
|
|
1446
|
+
if (validChapters[0].startTime !== 0) {
|
|
1447
|
+
validChapters[0].startTime = 0;
|
|
1448
|
+
}
|
|
1449
|
+
return {
|
|
1450
|
+
assetId,
|
|
1451
|
+
languageCode,
|
|
1452
|
+
chapters: validChapters
|
|
1453
|
+
};
|
|
1454
|
+
}
|
|
1455
|
+
|
|
1456
|
+
// src/utils/storyboard-processor.ts
|
|
1457
|
+
init_image_download();
|
|
1458
|
+
async function getAssetInfo(assetId, options) {
|
|
1459
|
+
const muxId = options.muxTokenId || process.env.MUX_TOKEN_ID;
|
|
1460
|
+
const muxSecret = options.muxTokenSecret || process.env.MUX_TOKEN_SECRET;
|
|
1461
|
+
if (!muxId || !muxSecret) {
|
|
1462
|
+
throw new Error("Mux credentials are required. Provide muxTokenId and muxTokenSecret in options or set MUX_TOKEN_ID and MUX_TOKEN_SECRET environment variables.");
|
|
1463
|
+
}
|
|
1464
|
+
const mux = new Mux({
|
|
1465
|
+
tokenId: muxId,
|
|
1466
|
+
tokenSecret: muxSecret
|
|
1467
|
+
});
|
|
1468
|
+
try {
|
|
1469
|
+
const asset = await mux.video.assets.retrieve(assetId);
|
|
1470
|
+
const playbackId = asset.playback_ids?.[0]?.id;
|
|
1471
|
+
if (!playbackId) {
|
|
1472
|
+
throw new Error("No playback ID found for this asset");
|
|
1473
|
+
}
|
|
1474
|
+
return {
|
|
1475
|
+
playbackId,
|
|
1476
|
+
duration: asset.duration || void 0
|
|
1477
|
+
};
|
|
1478
|
+
} catch (error) {
|
|
1479
|
+
throw new Error(`Failed to fetch asset from Mux: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1480
|
+
}
|
|
1481
|
+
}
|
|
1482
|
+
async function processStoryboardWithAnthropic(imageUrl, prompt, options) {
|
|
1483
|
+
const { apiKey, model, responseParser, imageSubmissionMode = "url", imageDownloadOptions, maxRetries = 3 } = options;
|
|
1484
|
+
const anthropicClient = new Anthropic({ apiKey });
|
|
1485
|
+
let retryAttempt = 0;
|
|
1486
|
+
if (imageSubmissionMode === "base64") {
|
|
1487
|
+
try {
|
|
1488
|
+
const fileUploadResult = await uploadImageToAnthropicFiles(imageUrl, apiKey, imageDownloadOptions);
|
|
1489
|
+
const response = await anthropicClient.messages.create({
|
|
1490
|
+
model,
|
|
1491
|
+
max_tokens: 1e3,
|
|
1492
|
+
messages: [
|
|
1493
|
+
{
|
|
1494
|
+
role: "user",
|
|
1495
|
+
content: [
|
|
1496
|
+
{
|
|
1497
|
+
type: "image",
|
|
1498
|
+
source: {
|
|
1499
|
+
type: "file",
|
|
1500
|
+
file_id: fileUploadResult.fileId
|
|
1501
|
+
}
|
|
1502
|
+
// Type assertion for Files API support
|
|
1503
|
+
},
|
|
1504
|
+
{
|
|
1505
|
+
type: "text",
|
|
1506
|
+
text: prompt
|
|
1507
|
+
}
|
|
1508
|
+
]
|
|
1509
|
+
}
|
|
1510
|
+
]
|
|
1511
|
+
}, {
|
|
1512
|
+
headers: {
|
|
1513
|
+
"anthropic-beta": "files-api-2025-04-14"
|
|
1514
|
+
}
|
|
1515
|
+
});
|
|
1516
|
+
return responseParser(response);
|
|
1517
|
+
} catch (error) {
|
|
1518
|
+
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
|
1519
|
+
throw new Error(`Failed to process storyboard with Anthropic Files API: ${errorMessage}`);
|
|
1520
|
+
}
|
|
1521
|
+
} else {
|
|
1522
|
+
while (retryAttempt <= maxRetries) {
|
|
1523
|
+
try {
|
|
1524
|
+
const response = await anthropicClient.messages.create({
|
|
1525
|
+
model,
|
|
1526
|
+
max_tokens: 1e3,
|
|
1527
|
+
messages: [
|
|
1528
|
+
{
|
|
1529
|
+
role: "user",
|
|
1530
|
+
content: [
|
|
1531
|
+
{
|
|
1532
|
+
type: "image",
|
|
1533
|
+
source: {
|
|
1534
|
+
type: "url",
|
|
1535
|
+
url: imageUrl
|
|
1536
|
+
}
|
|
1537
|
+
// Type assertion to work around SDK type definitions
|
|
1538
|
+
},
|
|
1539
|
+
{
|
|
1540
|
+
type: "text",
|
|
1541
|
+
text: prompt
|
|
1542
|
+
}
|
|
1543
|
+
]
|
|
1544
|
+
}
|
|
1545
|
+
]
|
|
1546
|
+
});
|
|
1547
|
+
return responseParser(response);
|
|
1548
|
+
} catch (error) {
|
|
1549
|
+
if (retryAttempt < maxRetries) {
|
|
1550
|
+
await new Promise((resolve) => setTimeout(resolve, 5e3));
|
|
1551
|
+
retryAttempt++;
|
|
1552
|
+
continue;
|
|
1553
|
+
}
|
|
1554
|
+
throw new Error(`Failed to process storyboard with Anthropic: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1555
|
+
}
|
|
1556
|
+
}
|
|
1557
|
+
}
|
|
1558
|
+
throw new Error("All retry attempts failed");
|
|
1559
|
+
}
|
|
1560
|
+
|
|
1561
|
+
// src/burned-in-captions.ts
|
|
1562
|
+
var burnedInCaptionsSchema = z.object({
|
|
1563
|
+
hasBurnedInCaptions: z.boolean(),
|
|
1564
|
+
confidence: z.number().min(0).max(1),
|
|
1565
|
+
detectedLanguage: z.string().nullable()
|
|
1566
|
+
});
|
|
1567
|
+
var ANTHROPIC_SYSTEM_PROMPT = `You are an expert at analyzing video frames to detect burned-in captions (also called open captions or hardcoded subtitles). These are text overlays permanently embedded in video images, common on social media platforms.
|
|
1568
|
+
|
|
1569
|
+
Key principles:
|
|
1570
|
+
1. Burned-in captions appear across multiple frames throughout the video timeline
|
|
1571
|
+
2. End-cards and marketing text appear only in final frames
|
|
1572
|
+
3. Captions have consistent positioning and caption-style formatting
|
|
1573
|
+
4. Caption text typically changes between frames (dialogue/narration)
|
|
1574
|
+
|
|
1575
|
+
Analysis approach:
|
|
1576
|
+
- Look for text overlays distributed across different parts of the timeline
|
|
1577
|
+
- Distinguish between dialogue captions vs. marketing end-cards
|
|
1578
|
+
- Consider text positioning, formatting, and content patterns`;
|
|
1579
|
+
var ANTHROPIC_USER_PROMPT = `Analyze this storyboard for burned-in captions:
|
|
1580
|
+
|
|
1581
|
+
1. Examine each frame from left to right (timeline order)
|
|
1582
|
+
2. Note which frames have text overlays and their positions
|
|
1583
|
+
3. Determine the pattern:
|
|
1584
|
+
- Text scattered across timeline = likely captions
|
|
1585
|
+
- Text only in final 1-2 frames = likely end-card/marketing
|
|
1586
|
+
|
|
1587
|
+
Classification rules:
|
|
1588
|
+
- If text appears in 3+ frames distributed throughout timeline \u2192 burned-in captions
|
|
1589
|
+
- If text appears only in final frames \u2192 NOT burned-in captions
|
|
1590
|
+
- Look for dialogue-style content vs. marketing taglines
|
|
1591
|
+
|
|
1592
|
+
Analyze and classify with confidence level.`;
|
|
1593
|
+
var ANTHROPIC_JSON_PROMPT3 = `Apply the frame analysis above.
|
|
1594
|
+
|
|
1595
|
+
Key rule: Text appearing only in final 2-3 frames = NOT captions. Text distributed throughout timeline = captions.
|
|
1596
|
+
|
|
1597
|
+
Respond ONLY with valid JSON:
|
|
1598
|
+
{
|
|
1599
|
+
"hasBurnedInCaptions": true/false,
|
|
1600
|
+
"confidence": 0.85,
|
|
1601
|
+
"detectedLanguage": "English" (or null if no captions or language unclear)
|
|
1602
|
+
}
|
|
1603
|
+
|
|
1604
|
+
Do not include any text before or after the JSON. The JSON must be valid and parseable.`;
|
|
1605
|
+
async function hasBurnedInCaptions(assetId, options = {}) {
|
|
1606
|
+
const {
|
|
1607
|
+
provider = "openai",
|
|
1608
|
+
model,
|
|
1609
|
+
imageSubmissionMode = "url",
|
|
1610
|
+
imageDownloadOptions,
|
|
1611
|
+
muxTokenId,
|
|
1612
|
+
muxTokenSecret,
|
|
1613
|
+
openaiApiKey,
|
|
1614
|
+
anthropicApiKey,
|
|
1615
|
+
...config
|
|
1616
|
+
} = options;
|
|
1617
|
+
const defaultModel = provider === "anthropic" ? "claude-3-5-haiku-20241022" : "gpt-4o-mini";
|
|
1618
|
+
const finalModel = model || defaultModel;
|
|
1619
|
+
const openaiKey = openaiApiKey || process.env.OPENAI_API_KEY;
|
|
1620
|
+
const anthropicKey = anthropicApiKey || process.env.ANTHROPIC_API_KEY;
|
|
1621
|
+
if (provider === "openai" && !openaiKey) {
|
|
1622
|
+
throw new Error("OpenAI API key is required for OpenAI provider. Provide openaiApiKey in options or set OPENAI_API_KEY environment variable.");
|
|
1623
|
+
}
|
|
1624
|
+
if (provider === "anthropic" && !anthropicKey) {
|
|
1625
|
+
throw new Error("Anthropic API key is required for Anthropic provider. Provide anthropicApiKey in options or set ANTHROPIC_API_KEY environment variable.");
|
|
1626
|
+
}
|
|
1627
|
+
const storyboardOptions = {
|
|
1628
|
+
muxTokenId,
|
|
1629
|
+
muxTokenSecret};
|
|
1630
|
+
const assetInfo = await getAssetInfo(assetId, storyboardOptions);
|
|
1631
|
+
const imageUrl = `https://image.mux.com/${assetInfo.playbackId}/storyboard.png?width=640`;
|
|
1632
|
+
let analysisResult = null;
|
|
1633
|
+
if (provider === "openai") {
|
|
1634
|
+
const OpenAI5 = __require("openai").default;
|
|
1635
|
+
const openaiClient = new OpenAI5({ apiKey: openaiKey });
|
|
1636
|
+
if (imageSubmissionMode === "base64") {
|
|
1637
|
+
const { downloadImageAsBase64: downloadImageAsBase642 } = (init_image_download(), __toCommonJS(image_download_exports));
|
|
1638
|
+
const downloadResult = await downloadImageAsBase642(imageUrl, imageDownloadOptions);
|
|
1639
|
+
const response = await openaiClient.responses.parse({
|
|
1640
|
+
model: finalModel,
|
|
1641
|
+
input: [
|
|
1642
|
+
{
|
|
1643
|
+
role: "system",
|
|
1644
|
+
content: ANTHROPIC_SYSTEM_PROMPT
|
|
1645
|
+
},
|
|
1646
|
+
{
|
|
1647
|
+
role: "user",
|
|
1648
|
+
content: [
|
|
1649
|
+
{
|
|
1650
|
+
type: "input_text",
|
|
1651
|
+
text: ANTHROPIC_USER_PROMPT
|
|
1652
|
+
},
|
|
1653
|
+
{
|
|
1654
|
+
type: "input_image",
|
|
1655
|
+
image_url: downloadResult.base64Data,
|
|
1656
|
+
detail: "high"
|
|
1657
|
+
}
|
|
1658
|
+
]
|
|
1659
|
+
}
|
|
1660
|
+
],
|
|
1661
|
+
text: {
|
|
1662
|
+
format: zodTextFormat(burnedInCaptionsSchema, "analysis")
|
|
1663
|
+
}
|
|
1664
|
+
});
|
|
1665
|
+
analysisResult = response.output_parsed;
|
|
1666
|
+
} else {
|
|
1667
|
+
let retryAttempt = 0;
|
|
1668
|
+
const maxRetries = 3;
|
|
1669
|
+
while (retryAttempt <= maxRetries) {
|
|
1670
|
+
try {
|
|
1671
|
+
const response = await openaiClient.responses.parse({
|
|
1672
|
+
model: finalModel,
|
|
1673
|
+
input: [
|
|
1674
|
+
{
|
|
1675
|
+
role: "system",
|
|
1676
|
+
content: ANTHROPIC_SYSTEM_PROMPT
|
|
1677
|
+
},
|
|
1678
|
+
{
|
|
1679
|
+
role: "user",
|
|
1680
|
+
content: [
|
|
1681
|
+
{
|
|
1682
|
+
type: "input_text",
|
|
1683
|
+
text: ANTHROPIC_USER_PROMPT
|
|
1684
|
+
},
|
|
1685
|
+
{
|
|
1686
|
+
type: "input_image",
|
|
1687
|
+
image_url: imageUrl,
|
|
1688
|
+
detail: "high"
|
|
1689
|
+
}
|
|
1690
|
+
]
|
|
1691
|
+
}
|
|
1692
|
+
],
|
|
1693
|
+
text: {
|
|
1694
|
+
format: zodTextFormat(burnedInCaptionsSchema, "analysis")
|
|
1695
|
+
}
|
|
1696
|
+
});
|
|
1697
|
+
analysisResult = response.output_parsed;
|
|
1698
|
+
break;
|
|
1699
|
+
} catch (error) {
|
|
1700
|
+
const isTimeoutError = error instanceof Error && error.message && error.message.includes("Timeout while downloading");
|
|
1701
|
+
if (isTimeoutError && retryAttempt < maxRetries) {
|
|
1702
|
+
await new Promise((resolve) => setTimeout(resolve, 5e3));
|
|
1703
|
+
retryAttempt++;
|
|
1704
|
+
continue;
|
|
1705
|
+
}
|
|
1706
|
+
throw new Error(`Failed to analyze storyboard with OpenAI: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
1707
|
+
}
|
|
1708
|
+
}
|
|
1709
|
+
}
|
|
1710
|
+
} else if (provider === "anthropic") {
|
|
1711
|
+
const anthropicPrompt = `${ANTHROPIC_USER_PROMPT}
|
|
1712
|
+
|
|
1713
|
+
${ANTHROPIC_JSON_PROMPT3}`;
|
|
1714
|
+
const responseParser = (response) => {
|
|
1715
|
+
const content = response.content[0];
|
|
1716
|
+
if (content.type === "text") {
|
|
1717
|
+
const jsonText = content.text.trim();
|
|
1718
|
+
try {
|
|
1719
|
+
return JSON.parse(jsonText);
|
|
1720
|
+
} catch (parseError) {
|
|
1721
|
+
throw new Error(`Failed to parse JSON response from Anthropic: ${parseError instanceof Error ? parseError.message : "Unknown error"}`);
|
|
1722
|
+
}
|
|
1723
|
+
} else {
|
|
1724
|
+
throw new Error("Unexpected response type from Anthropic");
|
|
1725
|
+
}
|
|
1726
|
+
};
|
|
1727
|
+
analysisResult = await processStoryboardWithAnthropic(
|
|
1728
|
+
imageUrl,
|
|
1729
|
+
anthropicPrompt,
|
|
1730
|
+
{
|
|
1731
|
+
apiKey: anthropicKey,
|
|
1732
|
+
model: finalModel,
|
|
1733
|
+
responseParser,
|
|
1734
|
+
imageSubmissionMode,
|
|
1735
|
+
imageDownloadOptions
|
|
1736
|
+
}
|
|
1737
|
+
);
|
|
1738
|
+
} else {
|
|
1739
|
+
throw new Error(`Unsupported provider: ${provider}`);
|
|
1740
|
+
}
|
|
1741
|
+
if (!analysisResult) {
|
|
1742
|
+
throw new Error("No analysis result received from AI provider");
|
|
1743
|
+
}
|
|
1744
|
+
return {
|
|
1745
|
+
assetId,
|
|
1746
|
+
hasBurnedInCaptions: analysisResult.hasBurnedInCaptions ?? false,
|
|
1747
|
+
confidence: analysisResult.confidence ?? 0,
|
|
1748
|
+
detectedLanguage: analysisResult.detectedLanguage ?? null,
|
|
1749
|
+
storyboardUrl: imageUrl
|
|
1750
|
+
};
|
|
1751
|
+
}
|
|
1752
|
+
|
|
1753
|
+
// src/index.ts
|
|
1754
|
+
var version = "0.1.0";
|
|
1755
|
+
|
|
1756
|
+
export { generateChapters, getModerationScores, getSummaryAndTags, hasBurnedInCaptions, translateAudio, translateCaptions, version };
|
|
1757
|
+
//# sourceMappingURL=index.js.map
|
|
9
1758
|
//# sourceMappingURL=index.js.map
|