@mux/ai 0.9.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -1
- package/dist/{index-CkJStzYO.d.ts → index-BapL6paa.d.ts} +175 -12
- package/dist/{index-Nxf6BaBO.d.ts → index-DLhfJsOd.d.ts} +130 -4
- package/dist/index.d.ts +3 -3
- package/dist/index.js +1400 -172
- package/dist/index.js.map +1 -1
- package/dist/primitives/index.d.ts +1 -1
- package/dist/primitives/index.js +476 -15
- package/dist/primitives/index.js.map +1 -1
- package/dist/workflows/index.d.ts +1 -1
- package/dist/workflows/index.js +1739 -657
- package/dist/workflows/index.js.map +1 -1
- package/package.json +2 -1
package/dist/index.js
CHANGED
|
@@ -5,7 +5,7 @@ var __export = (target, all) => {
|
|
|
5
5
|
};
|
|
6
6
|
|
|
7
7
|
// package.json
|
|
8
|
-
var version = "0.
|
|
8
|
+
var version = "0.11.0";
|
|
9
9
|
|
|
10
10
|
// src/env.ts
|
|
11
11
|
import { z } from "zod";
|
|
@@ -783,9 +783,14 @@ var primitives_exports = {};
|
|
|
783
783
|
__export(primitives_exports, {
|
|
784
784
|
DEFAULT_STORYBOARD_WIDTH: () => DEFAULT_STORYBOARD_WIDTH,
|
|
785
785
|
buildTranscriptUrl: () => buildTranscriptUrl,
|
|
786
|
+
buildVttFromCueBlocks: () => buildVttFromCueBlocks,
|
|
787
|
+
buildVttFromTranslatedCueBlocks: () => buildVttFromTranslatedCueBlocks,
|
|
786
788
|
chunkByTokens: () => chunkByTokens,
|
|
787
789
|
chunkText: () => chunkText,
|
|
788
790
|
chunkVTTCues: () => chunkVTTCues,
|
|
791
|
+
chunkVTTCuesByBudget: () => chunkVTTCuesByBudget,
|
|
792
|
+
chunkVTTCuesByDuration: () => chunkVTTCuesByDuration,
|
|
793
|
+
concatenateVttSegments: () => concatenateVttSegments,
|
|
789
794
|
estimateTokenCount: () => estimateTokenCount,
|
|
790
795
|
extractTextFromVTT: () => extractTextFromVTT,
|
|
791
796
|
extractTimestampedTranscript: () => extractTimestampedTranscript,
|
|
@@ -798,11 +803,16 @@ __export(primitives_exports, {
|
|
|
798
803
|
getHotspotsForPlaybackId: () => getHotspotsForPlaybackId,
|
|
799
804
|
getHotspotsForVideo: () => getHotspotsForVideo,
|
|
800
805
|
getReadyTextTracks: () => getReadyTextTracks,
|
|
806
|
+
getShotsForAsset: () => getShotsForAsset,
|
|
801
807
|
getStoryboardUrl: () => getStoryboardUrl,
|
|
802
808
|
getThumbnailUrls: () => getThumbnailUrls,
|
|
803
809
|
parseVTTCues: () => parseVTTCues,
|
|
810
|
+
replaceCueText: () => replaceCueText,
|
|
811
|
+
requestShotsForAsset: () => requestShotsForAsset,
|
|
804
812
|
secondsToTimestamp: () => secondsToTimestamp,
|
|
805
|
-
|
|
813
|
+
splitVttPreambleAndCueBlocks: () => splitVttPreambleAndCueBlocks,
|
|
814
|
+
vttTimestampToSeconds: () => vttTimestampToSeconds,
|
|
815
|
+
waitForShotsForAsset: () => waitForShotsForAsset
|
|
806
816
|
});
|
|
807
817
|
|
|
808
818
|
// src/lib/providers.ts
|
|
@@ -1075,6 +1085,142 @@ async function fetchHotspots(identifierType, id, options) {
|
|
|
1075
1085
|
return transformHotspotResponse(response);
|
|
1076
1086
|
}
|
|
1077
1087
|
|
|
1088
|
+
// src/primitives/shots.ts
|
|
1089
|
+
var DEFAULT_POLL_INTERVAL_MS = 2e3;
|
|
1090
|
+
var MIN_POLL_INTERVAL_MS = 1e3;
|
|
1091
|
+
var DEFAULT_MAX_ATTEMPTS = 60;
|
|
1092
|
+
var SHOTS_ALREADY_REQUESTED_MESSAGE = "shots generation has already been requested";
|
|
1093
|
+
function getShotsPath(assetId) {
|
|
1094
|
+
return `/video/v1/assets/${assetId}/shots`;
|
|
1095
|
+
}
|
|
1096
|
+
function mapManifestShots(shots) {
|
|
1097
|
+
return shots.map((shot, index) => {
|
|
1098
|
+
const { startTime, imageUrl } = shot;
|
|
1099
|
+
if (typeof startTime !== "number" || !Number.isFinite(startTime)) {
|
|
1100
|
+
throw new TypeError(`Invalid shot startTime in shots manifest at index ${index}`);
|
|
1101
|
+
}
|
|
1102
|
+
if (typeof imageUrl !== "string" || imageUrl.length === 0) {
|
|
1103
|
+
throw new TypeError(`Invalid shot imageUrl in shots manifest at index ${index}`);
|
|
1104
|
+
}
|
|
1105
|
+
return {
|
|
1106
|
+
startTime,
|
|
1107
|
+
imageUrl
|
|
1108
|
+
};
|
|
1109
|
+
});
|
|
1110
|
+
}
|
|
1111
|
+
async function fetchShotsFromManifest(shotsManifestUrl) {
|
|
1112
|
+
const response = await fetch(shotsManifestUrl);
|
|
1113
|
+
if (!response.ok) {
|
|
1114
|
+
throw new Error(
|
|
1115
|
+
`Failed to fetch shots manifest: ${response.status} ${response.statusText}`
|
|
1116
|
+
);
|
|
1117
|
+
}
|
|
1118
|
+
const manifest = await response.json();
|
|
1119
|
+
if (!Array.isArray(manifest.shots)) {
|
|
1120
|
+
throw new TypeError("Invalid shots manifest response: missing shots array");
|
|
1121
|
+
}
|
|
1122
|
+
return mapManifestShots(manifest.shots);
|
|
1123
|
+
}
|
|
1124
|
+
async function transformShotsResponse(response) {
|
|
1125
|
+
switch (response.data.status) {
|
|
1126
|
+
case "pending":
|
|
1127
|
+
return {
|
|
1128
|
+
status: "pending",
|
|
1129
|
+
createdAt: response.data.created_at
|
|
1130
|
+
};
|
|
1131
|
+
case "errored":
|
|
1132
|
+
return {
|
|
1133
|
+
status: "errored",
|
|
1134
|
+
createdAt: response.data.created_at,
|
|
1135
|
+
error: response.data.error
|
|
1136
|
+
};
|
|
1137
|
+
case "completed":
|
|
1138
|
+
return {
|
|
1139
|
+
status: "completed",
|
|
1140
|
+
createdAt: response.data.created_at,
|
|
1141
|
+
shots: await fetchShotsFromManifest(response.data.shots_manifest_url)
|
|
1142
|
+
};
|
|
1143
|
+
default: {
|
|
1144
|
+
const exhaustiveCheck = response.data;
|
|
1145
|
+
throw new Error(`Unsupported shots response: ${JSON.stringify(exhaustiveCheck)}`);
|
|
1146
|
+
}
|
|
1147
|
+
}
|
|
1148
|
+
}
|
|
1149
|
+
function sleep(ms) {
|
|
1150
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
1151
|
+
}
|
|
1152
|
+
function isShotsAlreadyRequestedError(error) {
|
|
1153
|
+
const statusCode = error?.status ?? error?.statusCode;
|
|
1154
|
+
const messages = error?.error?.messages;
|
|
1155
|
+
const lowerCaseMessages = messages?.map((message) => message.toLowerCase()) ?? [];
|
|
1156
|
+
const errorMessage = error instanceof Error ? error.message.toLowerCase() : "";
|
|
1157
|
+
return statusCode === 400 && (lowerCaseMessages.some((message) => message.includes(SHOTS_ALREADY_REQUESTED_MESSAGE)) || errorMessage.includes(SHOTS_ALREADY_REQUESTED_MESSAGE));
|
|
1158
|
+
}
|
|
1159
|
+
async function requestShotsForAsset(assetId, options = {}) {
|
|
1160
|
+
"use step";
|
|
1161
|
+
const { credentials } = options;
|
|
1162
|
+
const muxClient = await getMuxClientFromEnv(credentials);
|
|
1163
|
+
const mux = await muxClient.createClient();
|
|
1164
|
+
const response = await mux.post(
|
|
1165
|
+
getShotsPath(assetId),
|
|
1166
|
+
{ body: {} }
|
|
1167
|
+
);
|
|
1168
|
+
const result = await transformShotsResponse(response);
|
|
1169
|
+
if (result.status !== "pending") {
|
|
1170
|
+
throw new Error(
|
|
1171
|
+
`Expected pending status after requesting shots for asset '${assetId}', received '${result.status}'`
|
|
1172
|
+
);
|
|
1173
|
+
}
|
|
1174
|
+
return result;
|
|
1175
|
+
}
|
|
1176
|
+
async function getShotsForAsset(assetId, options = {}) {
|
|
1177
|
+
"use step";
|
|
1178
|
+
const { credentials } = options;
|
|
1179
|
+
const muxClient = await getMuxClientFromEnv(credentials);
|
|
1180
|
+
const mux = await muxClient.createClient();
|
|
1181
|
+
const response = await mux.get(
|
|
1182
|
+
getShotsPath(assetId)
|
|
1183
|
+
);
|
|
1184
|
+
return await transformShotsResponse(response);
|
|
1185
|
+
}
|
|
1186
|
+
async function waitForShotsForAsset(assetId, options = {}) {
|
|
1187
|
+
"use step";
|
|
1188
|
+
const {
|
|
1189
|
+
pollIntervalMs = DEFAULT_POLL_INTERVAL_MS,
|
|
1190
|
+
maxAttempts = DEFAULT_MAX_ATTEMPTS,
|
|
1191
|
+
createIfMissing = true,
|
|
1192
|
+
credentials
|
|
1193
|
+
} = options;
|
|
1194
|
+
if (createIfMissing) {
|
|
1195
|
+
try {
|
|
1196
|
+
await requestShotsForAsset(assetId, { credentials });
|
|
1197
|
+
} catch (error) {
|
|
1198
|
+
if (!isShotsAlreadyRequestedError(error)) {
|
|
1199
|
+
throw error;
|
|
1200
|
+
}
|
|
1201
|
+
}
|
|
1202
|
+
}
|
|
1203
|
+
const normalizedMaxAttempts = Math.max(1, maxAttempts);
|
|
1204
|
+
const normalizedPollIntervalMs = Math.max(MIN_POLL_INTERVAL_MS, pollIntervalMs);
|
|
1205
|
+
let lastStatus;
|
|
1206
|
+
for (let attempt = 0; attempt < normalizedMaxAttempts; attempt++) {
|
|
1207
|
+
const result = await getShotsForAsset(assetId, { credentials });
|
|
1208
|
+
lastStatus = result.status;
|
|
1209
|
+
if (result.status === "completed") {
|
|
1210
|
+
return result;
|
|
1211
|
+
}
|
|
1212
|
+
if (result.status === "errored") {
|
|
1213
|
+
throw new Error(`Shots generation errored for asset '${assetId}'`);
|
|
1214
|
+
}
|
|
1215
|
+
if (attempt < normalizedMaxAttempts - 1) {
|
|
1216
|
+
await sleep(normalizedPollIntervalMs);
|
|
1217
|
+
}
|
|
1218
|
+
}
|
|
1219
|
+
throw new Error(
|
|
1220
|
+
`Timed out waiting for shots for asset '${assetId}' after ${normalizedMaxAttempts} attempts. Last status: ${lastStatus ?? "unknown"}`
|
|
1221
|
+
);
|
|
1222
|
+
}
|
|
1223
|
+
|
|
1078
1224
|
// src/lib/mux-image-url.ts
|
|
1079
1225
|
var DEFAULT_MUX_IMAGE_ORIGIN = "https://image.mux.com";
|
|
1080
1226
|
function normalizeMuxImageOrigin(value) {
|
|
@@ -1162,6 +1308,14 @@ async function getStoryboardUrl(playbackId, width = DEFAULT_STORYBOARD_WIDTH, sh
|
|
|
1162
1308
|
}
|
|
1163
1309
|
|
|
1164
1310
|
// src/primitives/text-chunking.ts
|
|
1311
|
+
var DEFAULT_MIN_CHUNK_DURATION_RATIO = 2 / 3;
|
|
1312
|
+
var DEFAULT_BOUNDARY_LOOKAHEAD_CUES = 12;
|
|
1313
|
+
var DEFAULT_BOUNDARY_PAUSE_SECONDS = 1.25;
|
|
1314
|
+
var STRONG_BOUNDARY_SCORE = 4;
|
|
1315
|
+
var PREFERRED_BOUNDARY_WINDOW_SECONDS = 5 * 60;
|
|
1316
|
+
var SENTENCE_BOUNDARY_REGEX = /[.!?]["')\]]*$/;
|
|
1317
|
+
var CLAUSE_BOUNDARY_REGEX = /[,;:]["')\]]*$/;
|
|
1318
|
+
var NEXT_SENTENCE_START_REGEX = /^[A-Z0-9"'([{]/;
|
|
1165
1319
|
function estimateTokenCount(text) {
|
|
1166
1320
|
const words = text.trim().split(/\s+/).length;
|
|
1167
1321
|
return Math.ceil(words / 0.75);
|
|
@@ -1234,6 +1388,151 @@ function chunkVTTCues(cues, maxTokens, overlapCues = 2) {
|
|
|
1234
1388
|
}
|
|
1235
1389
|
return chunks;
|
|
1236
1390
|
}
|
|
1391
|
+
function scoreCueBoundary(cues, index, boundaryPauseSeconds) {
|
|
1392
|
+
const cue = cues[index];
|
|
1393
|
+
const nextCue = cues[index + 1];
|
|
1394
|
+
if (!nextCue) {
|
|
1395
|
+
return Number.POSITIVE_INFINITY;
|
|
1396
|
+
}
|
|
1397
|
+
const trimmedText = cue.text.trim();
|
|
1398
|
+
let score = 0;
|
|
1399
|
+
if (SENTENCE_BOUNDARY_REGEX.test(trimmedText)) {
|
|
1400
|
+
score += 4;
|
|
1401
|
+
} else if (CLAUSE_BOUNDARY_REGEX.test(trimmedText)) {
|
|
1402
|
+
score += 2;
|
|
1403
|
+
}
|
|
1404
|
+
if (nextCue.startTime - cue.endTime >= boundaryPauseSeconds) {
|
|
1405
|
+
score += 2;
|
|
1406
|
+
}
|
|
1407
|
+
if (NEXT_SENTENCE_START_REGEX.test(nextCue.text.trim())) {
|
|
1408
|
+
score += 1;
|
|
1409
|
+
}
|
|
1410
|
+
return score;
|
|
1411
|
+
}
|
|
1412
|
+
function chunkVTTCuesByBudget(cues, options) {
|
|
1413
|
+
if (cues.length === 0) {
|
|
1414
|
+
return [];
|
|
1415
|
+
}
|
|
1416
|
+
const maxCuesPerChunk = Math.max(1, options.maxCuesPerChunk);
|
|
1417
|
+
let maxTextTokensPerChunk = Number.POSITIVE_INFINITY;
|
|
1418
|
+
if (options.maxTextTokensPerChunk) {
|
|
1419
|
+
maxTextTokensPerChunk = Math.max(1, options.maxTextTokensPerChunk);
|
|
1420
|
+
}
|
|
1421
|
+
const chunks = [];
|
|
1422
|
+
let chunkIndex = 0;
|
|
1423
|
+
let cueStartIndex = 0;
|
|
1424
|
+
let currentTokenCount = 0;
|
|
1425
|
+
for (let cueIndex = 0; cueIndex < cues.length; cueIndex++) {
|
|
1426
|
+
const cue = cues[cueIndex];
|
|
1427
|
+
const cueTokenCount = estimateTokenCount(cue.text);
|
|
1428
|
+
const currentCueCount = cueIndex - cueStartIndex;
|
|
1429
|
+
const wouldExceedCueCount = currentCueCount >= maxCuesPerChunk;
|
|
1430
|
+
const wouldExceedTokenCount = currentCueCount > 0 && currentTokenCount + cueTokenCount > maxTextTokensPerChunk;
|
|
1431
|
+
if (wouldExceedCueCount || wouldExceedTokenCount) {
|
|
1432
|
+
chunks.push({
|
|
1433
|
+
id: `chunk-${chunkIndex}`,
|
|
1434
|
+
cueStartIndex,
|
|
1435
|
+
cueEndIndex: cueIndex - 1,
|
|
1436
|
+
cueCount: cueIndex - cueStartIndex,
|
|
1437
|
+
startTime: cues[cueStartIndex].startTime,
|
|
1438
|
+
endTime: cues[cueIndex - 1].endTime
|
|
1439
|
+
});
|
|
1440
|
+
cueStartIndex = cueIndex;
|
|
1441
|
+
currentTokenCount = 0;
|
|
1442
|
+
chunkIndex++;
|
|
1443
|
+
}
|
|
1444
|
+
currentTokenCount += cueTokenCount;
|
|
1445
|
+
}
|
|
1446
|
+
chunks.push({
|
|
1447
|
+
id: `chunk-${chunkIndex}`,
|
|
1448
|
+
cueStartIndex,
|
|
1449
|
+
cueEndIndex: cues.length - 1,
|
|
1450
|
+
cueCount: cues.length - cueStartIndex,
|
|
1451
|
+
startTime: cues[cueStartIndex].startTime,
|
|
1452
|
+
endTime: cues[cues.length - 1].endTime
|
|
1453
|
+
});
|
|
1454
|
+
return chunks;
|
|
1455
|
+
}
|
|
1456
|
+
function chunkVTTCuesByDuration(cues, options) {
|
|
1457
|
+
if (cues.length === 0) {
|
|
1458
|
+
return [];
|
|
1459
|
+
}
|
|
1460
|
+
const targetChunkDurationSeconds = Math.max(1, options.targetChunkDurationSeconds);
|
|
1461
|
+
const maxChunkDurationSeconds = Math.max(targetChunkDurationSeconds, options.maxChunkDurationSeconds);
|
|
1462
|
+
const minChunkDurationSeconds = Math.min(
|
|
1463
|
+
targetChunkDurationSeconds,
|
|
1464
|
+
Math.max(
|
|
1465
|
+
1,
|
|
1466
|
+
options.minChunkDurationSeconds ?? Math.floor(targetChunkDurationSeconds * DEFAULT_MIN_CHUNK_DURATION_RATIO)
|
|
1467
|
+
)
|
|
1468
|
+
);
|
|
1469
|
+
const boundaryLookaheadCues = Math.max(1, options.boundaryLookaheadCues ?? DEFAULT_BOUNDARY_LOOKAHEAD_CUES);
|
|
1470
|
+
const boundaryPauseSeconds = options.boundaryPauseSeconds ?? DEFAULT_BOUNDARY_PAUSE_SECONDS;
|
|
1471
|
+
const preferredBoundaryStartSeconds = Math.max(
|
|
1472
|
+
minChunkDurationSeconds,
|
|
1473
|
+
targetChunkDurationSeconds - Math.min(PREFERRED_BOUNDARY_WINDOW_SECONDS, targetChunkDurationSeconds / 6)
|
|
1474
|
+
);
|
|
1475
|
+
const chunks = [];
|
|
1476
|
+
let chunkIndex = 0;
|
|
1477
|
+
let cueStartIndex = 0;
|
|
1478
|
+
while (cueStartIndex < cues.length) {
|
|
1479
|
+
const chunkStartTime = cues[cueStartIndex].startTime;
|
|
1480
|
+
let cueEndIndex = cueStartIndex;
|
|
1481
|
+
let bestBoundaryIndex = -1;
|
|
1482
|
+
let bestBoundaryScore = -1;
|
|
1483
|
+
let bestPreferredBoundaryIndex = -1;
|
|
1484
|
+
let bestPreferredBoundaryScore = -1;
|
|
1485
|
+
while (cueEndIndex < cues.length) {
|
|
1486
|
+
const cue = cues[cueEndIndex];
|
|
1487
|
+
const currentDuration = cue.endTime - chunkStartTime;
|
|
1488
|
+
if (currentDuration >= minChunkDurationSeconds) {
|
|
1489
|
+
const boundaryScore = scoreCueBoundary(cues, cueEndIndex, boundaryPauseSeconds);
|
|
1490
|
+
if (boundaryScore >= bestBoundaryScore) {
|
|
1491
|
+
bestBoundaryIndex = cueEndIndex;
|
|
1492
|
+
bestBoundaryScore = boundaryScore;
|
|
1493
|
+
}
|
|
1494
|
+
if (currentDuration >= preferredBoundaryStartSeconds && boundaryScore >= bestPreferredBoundaryScore) {
|
|
1495
|
+
bestPreferredBoundaryIndex = cueEndIndex;
|
|
1496
|
+
bestPreferredBoundaryScore = boundaryScore;
|
|
1497
|
+
}
|
|
1498
|
+
}
|
|
1499
|
+
const nextCue = cues[cueEndIndex + 1];
|
|
1500
|
+
if (!nextCue) {
|
|
1501
|
+
break;
|
|
1502
|
+
}
|
|
1503
|
+
const nextDuration = nextCue.endTime - chunkStartTime;
|
|
1504
|
+
const lookaheadExceeded = cueEndIndex - cueStartIndex >= boundaryLookaheadCues;
|
|
1505
|
+
const preferredBoundaryIndex = bestPreferredBoundaryIndex >= cueStartIndex ? bestPreferredBoundaryIndex : bestBoundaryIndex;
|
|
1506
|
+
const preferredBoundaryScore = bestPreferredBoundaryIndex >= cueStartIndex ? bestPreferredBoundaryScore : bestBoundaryScore;
|
|
1507
|
+
if (currentDuration >= targetChunkDurationSeconds) {
|
|
1508
|
+
if (preferredBoundaryIndex >= cueStartIndex && preferredBoundaryScore >= STRONG_BOUNDARY_SCORE) {
|
|
1509
|
+
cueEndIndex = preferredBoundaryIndex;
|
|
1510
|
+
break;
|
|
1511
|
+
}
|
|
1512
|
+
if (nextDuration > maxChunkDurationSeconds || lookaheadExceeded) {
|
|
1513
|
+
cueEndIndex = preferredBoundaryIndex >= cueStartIndex ? preferredBoundaryIndex : cueEndIndex;
|
|
1514
|
+
break;
|
|
1515
|
+
}
|
|
1516
|
+
}
|
|
1517
|
+
if (nextDuration > maxChunkDurationSeconds) {
|
|
1518
|
+
cueEndIndex = preferredBoundaryIndex >= cueStartIndex ? preferredBoundaryIndex : cueEndIndex;
|
|
1519
|
+
break;
|
|
1520
|
+
}
|
|
1521
|
+
cueEndIndex++;
|
|
1522
|
+
}
|
|
1523
|
+
chunks.push({
|
|
1524
|
+
id: `chunk-${chunkIndex}`,
|
|
1525
|
+
cueStartIndex,
|
|
1526
|
+
cueEndIndex,
|
|
1527
|
+
cueCount: cueEndIndex - cueStartIndex + 1,
|
|
1528
|
+
startTime: cues[cueStartIndex].startTime,
|
|
1529
|
+
endTime: cues[cueEndIndex].endTime
|
|
1530
|
+
});
|
|
1531
|
+
cueStartIndex = cueEndIndex + 1;
|
|
1532
|
+
chunkIndex++;
|
|
1533
|
+
}
|
|
1534
|
+
return chunks;
|
|
1535
|
+
}
|
|
1237
1536
|
function chunkText(text, strategy) {
|
|
1238
1537
|
switch (strategy.type) {
|
|
1239
1538
|
case "token": {
|
|
@@ -1275,10 +1574,8 @@ async function getThumbnailUrls(playbackId, duration, options = {}) {
|
|
|
1275
1574
|
}
|
|
1276
1575
|
const baseUrl = getMuxThumbnailBaseUrl(playbackId);
|
|
1277
1576
|
const urlPromises = timestamps.map(async (time) => {
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
}
|
|
1281
|
-
return `${baseUrl}?time=${time}&width=${width}`;
|
|
1577
|
+
const url = shouldSign ? await signUrl(baseUrl, playbackId, "thumbnail", { time, width }, credentials) : `${baseUrl}?time=${time}&width=${width}`;
|
|
1578
|
+
return { url, time };
|
|
1282
1579
|
});
|
|
1283
1580
|
return Promise.all(urlPromises);
|
|
1284
1581
|
}
|
|
@@ -1300,24 +1597,82 @@ function findCaptionTrack(asset, languageCode) {
|
|
|
1300
1597
|
(track) => track.text_type === "subtitles" && track.language_code === languageCode
|
|
1301
1598
|
);
|
|
1302
1599
|
}
|
|
1600
|
+
function normalizeLineEndings(value) {
|
|
1601
|
+
return value.replace(/\r\n/g, "\n");
|
|
1602
|
+
}
|
|
1603
|
+
function isTimingLine(line) {
|
|
1604
|
+
return line.includes("-->");
|
|
1605
|
+
}
|
|
1606
|
+
function parseNumericCueIdentifier(line) {
|
|
1607
|
+
if (!/^\d+$/.test(line)) {
|
|
1608
|
+
return null;
|
|
1609
|
+
}
|
|
1610
|
+
return Number.parseInt(line, 10);
|
|
1611
|
+
}
|
|
1612
|
+
function isLikelyTitledCueIdentifier(line) {
|
|
1613
|
+
return /^\d+\s+-\s+\S.*$/.test(line);
|
|
1614
|
+
}
|
|
1615
|
+
function isLikelyCueIdentifier({
|
|
1616
|
+
line,
|
|
1617
|
+
nextLine,
|
|
1618
|
+
previousCueIdentifier
|
|
1619
|
+
}) {
|
|
1620
|
+
if (!line || !nextLine || !isTimingLine(nextLine)) {
|
|
1621
|
+
return false;
|
|
1622
|
+
}
|
|
1623
|
+
const numericIdentifier = parseNumericCueIdentifier(line);
|
|
1624
|
+
if (numericIdentifier !== null) {
|
|
1625
|
+
if (previousCueIdentifier === null || previousCueIdentifier === void 0) {
|
|
1626
|
+
return numericIdentifier === 1;
|
|
1627
|
+
}
|
|
1628
|
+
return numericIdentifier === previousCueIdentifier + 1;
|
|
1629
|
+
}
|
|
1630
|
+
return isLikelyTitledCueIdentifier(line);
|
|
1631
|
+
}
|
|
1632
|
+
function getCueIdentifierLineIndex(lines, timingLineIndex, previousCueIdentifier) {
|
|
1633
|
+
const identifierIndex = timingLineIndex - 1;
|
|
1634
|
+
if (identifierIndex < 0) {
|
|
1635
|
+
return -1;
|
|
1636
|
+
}
|
|
1637
|
+
const candidate = lines[identifierIndex].trim();
|
|
1638
|
+
if (!candidate || isTimingLine(candidate)) {
|
|
1639
|
+
return -1;
|
|
1640
|
+
}
|
|
1641
|
+
return isLikelyCueIdentifier({
|
|
1642
|
+
line: candidate,
|
|
1643
|
+
nextLine: lines[timingLineIndex]?.trim(),
|
|
1644
|
+
previousCueIdentifier
|
|
1645
|
+
}) ? identifierIndex : -1;
|
|
1646
|
+
}
|
|
1303
1647
|
function extractTextFromVTT(vttContent) {
|
|
1304
1648
|
if (!vttContent.trim()) {
|
|
1305
1649
|
return "";
|
|
1306
1650
|
}
|
|
1307
1651
|
const lines = vttContent.split("\n");
|
|
1308
1652
|
const textLines = [];
|
|
1653
|
+
let previousCueIdentifier = null;
|
|
1654
|
+
let isInsideNoteBlock = false;
|
|
1309
1655
|
for (let i = 0; i < lines.length; i++) {
|
|
1310
1656
|
const line = lines[i].trim();
|
|
1311
|
-
|
|
1657
|
+
const nextLine = lines[i + 1]?.trim();
|
|
1658
|
+
if (!line) {
|
|
1659
|
+
isInsideNoteBlock = false;
|
|
1660
|
+
continue;
|
|
1661
|
+
}
|
|
1662
|
+
if (isInsideNoteBlock)
|
|
1312
1663
|
continue;
|
|
1313
1664
|
if (line === "WEBVTT")
|
|
1314
1665
|
continue;
|
|
1315
|
-
if (line.startsWith("NOTE "))
|
|
1666
|
+
if (line === "NOTE" || line.startsWith("NOTE ")) {
|
|
1667
|
+
isInsideNoteBlock = true;
|
|
1316
1668
|
continue;
|
|
1317
|
-
|
|
1669
|
+
}
|
|
1670
|
+
if (isTimingLine(line))
|
|
1318
1671
|
continue;
|
|
1319
|
-
if (
|
|
1672
|
+
if (isLikelyCueIdentifier({ line, nextLine, previousCueIdentifier })) {
|
|
1673
|
+
previousCueIdentifier = parseNumericCueIdentifier(line);
|
|
1320
1674
|
continue;
|
|
1675
|
+
}
|
|
1321
1676
|
if (line.startsWith("STYLE") || line.startsWith("REGION"))
|
|
1322
1677
|
continue;
|
|
1323
1678
|
const cleanLine = line.replace(/<[^>]*>/g, "").trim();
|
|
@@ -1376,20 +1731,34 @@ function parseVTTCues(vttContent) {
|
|
|
1376
1731
|
return [];
|
|
1377
1732
|
const lines = vttContent.split("\n");
|
|
1378
1733
|
const cues = [];
|
|
1734
|
+
let previousCueIdentifier = null;
|
|
1379
1735
|
for (let i = 0; i < lines.length; i++) {
|
|
1380
1736
|
const line = lines[i].trim();
|
|
1381
|
-
if (line
|
|
1737
|
+
if (isTimingLine(line)) {
|
|
1382
1738
|
const [startStr, endStr] = line.split(" --> ").map((s) => s.trim());
|
|
1383
1739
|
const startTime = vttTimestampToSeconds(startStr);
|
|
1384
1740
|
const endTime = vttTimestampToSeconds(endStr.split(" ")[0]);
|
|
1385
|
-
const
|
|
1741
|
+
const currentCueIdentifierLine = lines[i - 1]?.trim() ?? "";
|
|
1742
|
+
const currentCueIdentifier = isLikelyCueIdentifier({
|
|
1743
|
+
line: currentCueIdentifierLine,
|
|
1744
|
+
nextLine: line,
|
|
1745
|
+
previousCueIdentifier
|
|
1746
|
+
}) ? parseNumericCueIdentifier(currentCueIdentifierLine) : null;
|
|
1747
|
+
const rawTextLines = [];
|
|
1386
1748
|
let j = i + 1;
|
|
1387
|
-
while (j < lines.length && lines[j].trim() && !lines[j].
|
|
1388
|
-
|
|
1389
|
-
if (cleanLine)
|
|
1390
|
-
textLines.push(cleanLine);
|
|
1749
|
+
while (j < lines.length && lines[j].trim() && !isTimingLine(lines[j].trim())) {
|
|
1750
|
+
rawTextLines.push(lines[j].trim());
|
|
1391
1751
|
j++;
|
|
1392
1752
|
}
|
|
1753
|
+
const trailingNumericLine = parseNumericCueIdentifier(rawTextLines.at(-1) ?? "");
|
|
1754
|
+
if (trailingNumericLine !== null && isLikelyCueIdentifier({
|
|
1755
|
+
line: rawTextLines.at(-1) ?? "",
|
|
1756
|
+
nextLine: lines[j]?.trim(),
|
|
1757
|
+
previousCueIdentifier: currentCueIdentifier
|
|
1758
|
+
}) && rawTextLines.length > 1) {
|
|
1759
|
+
rawTextLines.pop();
|
|
1760
|
+
}
|
|
1761
|
+
const textLines = rawTextLines.map((textLine) => textLine.replace(/<[^>]*>/g, "")).filter(Boolean);
|
|
1393
1762
|
if (textLines.length > 0) {
|
|
1394
1763
|
cues.push({
|
|
1395
1764
|
startTime,
|
|
@@ -1397,10 +1766,102 @@ function parseVTTCues(vttContent) {
|
|
|
1397
1766
|
text: textLines.join(" ")
|
|
1398
1767
|
});
|
|
1399
1768
|
}
|
|
1769
|
+
previousCueIdentifier = currentCueIdentifier;
|
|
1400
1770
|
}
|
|
1401
1771
|
}
|
|
1402
1772
|
return cues;
|
|
1403
1773
|
}
|
|
1774
|
+
function splitVttPreambleAndCueBlocks(vttContent) {
|
|
1775
|
+
const normalizedContent = normalizeLineEndings(vttContent).trim();
|
|
1776
|
+
if (!normalizedContent) {
|
|
1777
|
+
return {
|
|
1778
|
+
preamble: "WEBVTT",
|
|
1779
|
+
cueBlocks: []
|
|
1780
|
+
};
|
|
1781
|
+
}
|
|
1782
|
+
const rawBlocks = normalizedContent.split(/\n{2,}/).map((block) => block.trim()).filter(Boolean);
|
|
1783
|
+
const cueBlockStartIndex = rawBlocks.findIndex((block) => block.includes("-->"));
|
|
1784
|
+
if (cueBlockStartIndex === -1) {
|
|
1785
|
+
return {
|
|
1786
|
+
preamble: normalizedContent.startsWith("WEBVTT") ? normalizedContent : `WEBVTT
|
|
1787
|
+
|
|
1788
|
+
${normalizedContent}`,
|
|
1789
|
+
cueBlocks: []
|
|
1790
|
+
};
|
|
1791
|
+
}
|
|
1792
|
+
const hasMergedCueBlocks = rawBlocks.slice(cueBlockStartIndex).some((block) => (block.match(/-->/g) ?? []).length > 1);
|
|
1793
|
+
if (hasMergedCueBlocks) {
|
|
1794
|
+
const lines = normalizedContent.split("\n");
|
|
1795
|
+
const timingLineIndices = lines.map((line, index) => isTimingLine(line.trim()) ? index : -1).filter((index) => index >= 0);
|
|
1796
|
+
let previousCueIdentifier = null;
|
|
1797
|
+
const firstCueStartIndex = getCueIdentifierLineIndex(lines, timingLineIndices[0], previousCueIdentifier);
|
|
1798
|
+
const preambleEndIndex = firstCueStartIndex >= 0 ? firstCueStartIndex : timingLineIndices[0];
|
|
1799
|
+
const preamble2 = lines.slice(0, preambleEndIndex).join("\n").trim() || "WEBVTT";
|
|
1800
|
+
const cueBlocks2 = timingLineIndices.map((timingLineIndex, index) => {
|
|
1801
|
+
const cueIdentifierLineIndex = getCueIdentifierLineIndex(lines, timingLineIndex, previousCueIdentifier);
|
|
1802
|
+
const cueStartIndex = cueIdentifierLineIndex >= 0 ? cueIdentifierLineIndex : timingLineIndex;
|
|
1803
|
+
const currentCueIdentifier = cueIdentifierLineIndex >= 0 ? parseNumericCueIdentifier(lines[cueIdentifierLineIndex].trim()) : null;
|
|
1804
|
+
const nextTimingLineIndex = timingLineIndices[index + 1] ?? lines.length;
|
|
1805
|
+
let cueEndIndex = nextTimingLineIndex - 1;
|
|
1806
|
+
while (cueEndIndex > timingLineIndex && !lines[cueEndIndex].trim()) {
|
|
1807
|
+
cueEndIndex--;
|
|
1808
|
+
}
|
|
1809
|
+
const nextCueIdentifierLineIndex = index < timingLineIndices.length - 1 ? getCueIdentifierLineIndex(lines, nextTimingLineIndex, currentCueIdentifier) : -1;
|
|
1810
|
+
if (nextCueIdentifierLineIndex === cueEndIndex) {
|
|
1811
|
+
cueEndIndex--;
|
|
1812
|
+
}
|
|
1813
|
+
while (cueEndIndex > timingLineIndex && !lines[cueEndIndex].trim()) {
|
|
1814
|
+
cueEndIndex--;
|
|
1815
|
+
}
|
|
1816
|
+
previousCueIdentifier = currentCueIdentifier;
|
|
1817
|
+
return lines.slice(cueStartIndex, cueEndIndex + 1).join("\n").trim();
|
|
1818
|
+
});
|
|
1819
|
+
return {
|
|
1820
|
+
preamble: preamble2,
|
|
1821
|
+
cueBlocks: cueBlocks2
|
|
1822
|
+
};
|
|
1823
|
+
}
|
|
1824
|
+
const preambleBlocks = rawBlocks.slice(0, cueBlockStartIndex);
|
|
1825
|
+
const cueBlocks = rawBlocks.slice(cueBlockStartIndex);
|
|
1826
|
+
const preamble = preambleBlocks.length > 0 ? preambleBlocks.join("\n\n") : "WEBVTT";
|
|
1827
|
+
return {
|
|
1828
|
+
preamble,
|
|
1829
|
+
cueBlocks
|
|
1830
|
+
};
|
|
1831
|
+
}
|
|
1832
|
+
function buildVttFromCueBlocks(cueBlocks, preamble = "WEBVTT") {
|
|
1833
|
+
if (cueBlocks.length === 0) {
|
|
1834
|
+
return `${preamble.trim()}
|
|
1835
|
+
`;
|
|
1836
|
+
}
|
|
1837
|
+
return `${preamble.trim()}
|
|
1838
|
+
|
|
1839
|
+
${cueBlocks.map((block) => block.trim()).join("\n\n")}
|
|
1840
|
+
`;
|
|
1841
|
+
}
|
|
1842
|
+
function replaceCueText(cueBlock, translatedText) {
|
|
1843
|
+
const lines = normalizeLineEndings(cueBlock).split("\n").map((line) => line.trim()).filter(Boolean);
|
|
1844
|
+
const timingLineIndex = lines.findIndex((line) => line.includes("-->"));
|
|
1845
|
+
if (timingLineIndex === -1) {
|
|
1846
|
+
throw new Error("Cue block is missing a timestamp line");
|
|
1847
|
+
}
|
|
1848
|
+
const headerLines = lines.slice(0, timingLineIndex + 1);
|
|
1849
|
+
const translatedLines = normalizeLineEndings(translatedText).split("\n").map((line) => line.trim()).filter(Boolean);
|
|
1850
|
+
return [...headerLines, ...translatedLines].join("\n");
|
|
1851
|
+
}
|
|
1852
|
+
function buildVttFromTranslatedCueBlocks(cueBlocks, translatedTexts, preamble = "WEBVTT") {
|
|
1853
|
+
if (cueBlocks.length !== translatedTexts.length) {
|
|
1854
|
+
throw new Error(`Expected ${cueBlocks.length} translated cues, received ${translatedTexts.length}`);
|
|
1855
|
+
}
|
|
1856
|
+
return buildVttFromCueBlocks(
|
|
1857
|
+
cueBlocks.map((cueBlock, index) => replaceCueText(cueBlock, translatedTexts[index])),
|
|
1858
|
+
preamble
|
|
1859
|
+
);
|
|
1860
|
+
}
|
|
1861
|
+
function concatenateVttSegments(segments, preamble = "WEBVTT") {
|
|
1862
|
+
const cueBlocks = segments.flatMap((segment) => splitVttPreambleAndCueBlocks(segment).cueBlocks);
|
|
1863
|
+
return buildVttFromCueBlocks(cueBlocks, preamble);
|
|
1864
|
+
}
|
|
1404
1865
|
async function buildTranscriptUrl(playbackId, trackId, shouldSign = false, credentials) {
|
|
1405
1866
|
"use step";
|
|
1406
1867
|
const baseUrl = `https://stream.mux.com/${playbackId}/text/${trackId}.vtt`;
|
|
@@ -1463,21 +1924,33 @@ async function fetchTranscriptForAsset(asset, playbackId, options = {}) {
|
|
|
1463
1924
|
// src/workflows/index.ts
|
|
1464
1925
|
var workflows_exports = {};
|
|
1465
1926
|
__export(workflows_exports, {
|
|
1927
|
+
DEFAULT_DESCRIPTION_LENGTH: () => DEFAULT_DESCRIPTION_LENGTH,
|
|
1928
|
+
DEFAULT_SUMMARY_KEYWORD_LIMIT: () => DEFAULT_SUMMARY_KEYWORD_LIMIT,
|
|
1929
|
+
DEFAULT_TITLE_LENGTH: () => DEFAULT_TITLE_LENGTH,
|
|
1466
1930
|
HIVE_SEXUAL_CATEGORIES: () => HIVE_SEXUAL_CATEGORIES,
|
|
1467
1931
|
HIVE_VIOLENCE_CATEGORIES: () => HIVE_VIOLENCE_CATEGORIES,
|
|
1468
|
-
|
|
1932
|
+
aggregateTokenUsage: () => aggregateTokenUsage,
|
|
1933
|
+
applyOverrideLists: () => applyOverrideLists,
|
|
1934
|
+
applyReplacements: () => applyReplacements,
|
|
1469
1935
|
askQuestions: () => askQuestions,
|
|
1936
|
+
buildReplacementRegex: () => buildReplacementRegex,
|
|
1470
1937
|
burnedInCaptionsSchema: () => burnedInCaptionsSchema,
|
|
1938
|
+
censorVttContent: () => censorVttContent,
|
|
1471
1939
|
chapterSchema: () => chapterSchema,
|
|
1472
1940
|
chaptersSchema: () => chaptersSchema,
|
|
1941
|
+
createReplacer: () => createReplacer,
|
|
1942
|
+
editCaptions: () => editCaptions,
|
|
1473
1943
|
generateChapters: () => generateChapters,
|
|
1474
1944
|
generateEmbeddings: () => generateEmbeddings,
|
|
1475
1945
|
generateVideoEmbeddings: () => generateVideoEmbeddings,
|
|
1476
1946
|
getModerationScores: () => getModerationScores,
|
|
1477
1947
|
getSummaryAndTags: () => getSummaryAndTags,
|
|
1478
1948
|
hasBurnedInCaptions: () => hasBurnedInCaptions,
|
|
1949
|
+
profanityDetectionSchema: () => profanityDetectionSchema,
|
|
1479
1950
|
questionAnswerSchema: () => questionAnswerSchema,
|
|
1951
|
+
shouldSplitChunkTranslationError: () => shouldSplitChunkTranslationError,
|
|
1480
1952
|
summarySchema: () => summarySchema,
|
|
1953
|
+
transformCueText: () => transformCueText,
|
|
1481
1954
|
translateAudio: () => translateAudio,
|
|
1482
1955
|
translateCaptions: () => translateCaptions,
|
|
1483
1956
|
translationSchema: () => translationSchema
|
|
@@ -2610,32 +3083,455 @@ async function generateChapters(assetId, languageCode, options = {}) {
|
|
|
2610
3083
|
credentials
|
|
2611
3084
|
});
|
|
2612
3085
|
} catch (error) {
|
|
2613
|
-
throw new Error(
|
|
2614
|
-
`Failed to generate chapters with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
2615
|
-
);
|
|
2616
|
-
}
|
|
2617
|
-
if (!chaptersData || !chaptersData.chapters) {
|
|
2618
|
-
throw new Error("No chapters generated from AI response");
|
|
2619
|
-
}
|
|
2620
|
-
const { chapters: chaptersPayload, usage } = chaptersData;
|
|
2621
|
-
const validChapters = chaptersPayload.chapters.filter((chapter) => typeof chapter.startTime === "number" && typeof chapter.title === "string").sort((a, b) => a.startTime - b.startTime);
|
|
2622
|
-
if (validChapters.length === 0) {
|
|
2623
|
-
throw new Error("No valid chapters found in AI response");
|
|
3086
|
+
throw new Error(
|
|
3087
|
+
`Failed to generate chapters with ${provider}: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
3088
|
+
);
|
|
3089
|
+
}
|
|
3090
|
+
if (!chaptersData || !chaptersData.chapters) {
|
|
3091
|
+
throw new Error("No chapters generated from AI response");
|
|
3092
|
+
}
|
|
3093
|
+
const { chapters: chaptersPayload, usage } = chaptersData;
|
|
3094
|
+
const validChapters = chaptersPayload.chapters.filter((chapter) => typeof chapter.startTime === "number" && typeof chapter.title === "string").sort((a, b) => a.startTime - b.startTime);
|
|
3095
|
+
if (validChapters.length === 0) {
|
|
3096
|
+
throw new Error("No valid chapters found in AI response");
|
|
3097
|
+
}
|
|
3098
|
+
if (validChapters[0].startTime !== 0) {
|
|
3099
|
+
validChapters[0].startTime = 0;
|
|
3100
|
+
}
|
|
3101
|
+
const usageWithMetadata = {
|
|
3102
|
+
...usage,
|
|
3103
|
+
metadata: {
|
|
3104
|
+
...usage?.metadata,
|
|
3105
|
+
assetDurationSeconds
|
|
3106
|
+
}
|
|
3107
|
+
};
|
|
3108
|
+
return {
|
|
3109
|
+
assetId,
|
|
3110
|
+
languageCode,
|
|
3111
|
+
chapters: validChapters,
|
|
3112
|
+
usage: usageWithMetadata
|
|
3113
|
+
};
|
|
3114
|
+
}
|
|
3115
|
+
|
|
3116
|
+
// src/workflows/edit-captions.ts
|
|
3117
|
+
import { generateText as generateText4, Output as Output4 } from "ai";
|
|
3118
|
+
import dedent4 from "dedent";
|
|
3119
|
+
import { z as z5 } from "zod";
|
|
3120
|
+
|
|
3121
|
+
// src/lib/mux-tracks.ts
|
|
3122
|
+
async function fetchVttFromMux(vttUrl) {
|
|
3123
|
+
"use step";
|
|
3124
|
+
const vttResponse = await fetch(vttUrl);
|
|
3125
|
+
if (!vttResponse.ok) {
|
|
3126
|
+
throw new Error(`Failed to fetch VTT file: ${vttResponse.statusText}`);
|
|
3127
|
+
}
|
|
3128
|
+
return vttResponse.text();
|
|
3129
|
+
}
|
|
3130
|
+
async function createTextTrackOnMux(assetId, languageCode, trackName, presignedUrl, credentials) {
|
|
3131
|
+
"use step";
|
|
3132
|
+
const muxClient = await resolveMuxClient(credentials);
|
|
3133
|
+
const mux = await muxClient.createClient();
|
|
3134
|
+
const trackResponse = await mux.video.assets.createTrack(assetId, {
|
|
3135
|
+
type: "text",
|
|
3136
|
+
text_type: "subtitles",
|
|
3137
|
+
language_code: languageCode,
|
|
3138
|
+
name: trackName,
|
|
3139
|
+
url: presignedUrl
|
|
3140
|
+
});
|
|
3141
|
+
if (!trackResponse.id) {
|
|
3142
|
+
throw new Error("Failed to create text track: no track ID returned from Mux");
|
|
3143
|
+
}
|
|
3144
|
+
return trackResponse.id;
|
|
3145
|
+
}
|
|
3146
|
+
|
|
3147
|
+
// src/lib/storage-adapter.ts
|
|
3148
|
+
function requireCredentials(accessKeyId, secretAccessKey) {
|
|
3149
|
+
if (!accessKeyId || !secretAccessKey) {
|
|
3150
|
+
throw new Error(
|
|
3151
|
+
"S3 credentials are required for default storage operations. Provide S3_ACCESS_KEY_ID and S3_SECRET_ACCESS_KEY or pass options.storageAdapter."
|
|
3152
|
+
);
|
|
3153
|
+
}
|
|
3154
|
+
return { accessKeyId, secretAccessKey };
|
|
3155
|
+
}
|
|
3156
|
+
async function putObjectWithStorageAdapter(input, adapter) {
|
|
3157
|
+
if (adapter) {
|
|
3158
|
+
await adapter.putObject(input);
|
|
3159
|
+
return;
|
|
3160
|
+
}
|
|
3161
|
+
const credentials = requireCredentials(input.accessKeyId, input.secretAccessKey);
|
|
3162
|
+
await putObjectToS3({
|
|
3163
|
+
accessKeyId: credentials.accessKeyId,
|
|
3164
|
+
secretAccessKey: credentials.secretAccessKey,
|
|
3165
|
+
endpoint: input.endpoint,
|
|
3166
|
+
region: input.region,
|
|
3167
|
+
bucket: input.bucket,
|
|
3168
|
+
key: input.key,
|
|
3169
|
+
body: input.body,
|
|
3170
|
+
contentType: input.contentType
|
|
3171
|
+
});
|
|
3172
|
+
}
|
|
3173
|
+
async function createPresignedGetUrlWithStorageAdapter(input, adapter) {
|
|
3174
|
+
if (adapter) {
|
|
3175
|
+
return adapter.createPresignedGetUrl(input);
|
|
3176
|
+
}
|
|
3177
|
+
const credentials = requireCredentials(input.accessKeyId, input.secretAccessKey);
|
|
3178
|
+
return createPresignedGetUrl({
|
|
3179
|
+
accessKeyId: credentials.accessKeyId,
|
|
3180
|
+
secretAccessKey: credentials.secretAccessKey,
|
|
3181
|
+
endpoint: input.endpoint,
|
|
3182
|
+
region: input.region,
|
|
3183
|
+
bucket: input.bucket,
|
|
3184
|
+
key: input.key,
|
|
3185
|
+
expiresInSeconds: input.expiresInSeconds
|
|
3186
|
+
});
|
|
3187
|
+
}
|
|
3188
|
+
|
|
3189
|
+
// src/workflows/edit-captions.ts
|
|
3190
|
+
var profanityDetectionSchema = z5.object({
|
|
3191
|
+
profanity: z5.array(z5.string()).describe(
|
|
3192
|
+
"Unique profane words or short phrases exactly as they appear in the transcript text. Include each distinct form only once (e.g., if 'fuck' and 'fucking' both appear, list both)."
|
|
3193
|
+
)
|
|
3194
|
+
});
|
|
3195
|
+
var SYSTEM_PROMPT3 = dedent4`
|
|
3196
|
+
You are a content moderation assistant. Your task is to identify profane, vulgar, or obscene
|
|
3197
|
+
words and phrases in subtitle text. Return ONLY the exact profane words or phrases as they appear
|
|
3198
|
+
in the text. Do not modify, censor, or paraphrase them. Do not include words that are merely
|
|
3199
|
+
informal or slang but not profane. Focus on words that would be bleeped on broadcast television.`;
|
|
3200
|
+
function transformCueText(rawVtt, transform) {
|
|
3201
|
+
const lines = rawVtt.split("\n");
|
|
3202
|
+
let inCueText = false;
|
|
3203
|
+
let currentCueStartTime = 0;
|
|
3204
|
+
const transformed = lines.map((line) => {
|
|
3205
|
+
if (line.includes("-->")) {
|
|
3206
|
+
const startTimestamp = line.split("-->")[0].trim();
|
|
3207
|
+
currentCueStartTime = vttTimestampToSeconds(startTimestamp);
|
|
3208
|
+
inCueText = true;
|
|
3209
|
+
return line;
|
|
3210
|
+
}
|
|
3211
|
+
if (line.trim() === "") {
|
|
3212
|
+
inCueText = false;
|
|
3213
|
+
return line;
|
|
3214
|
+
}
|
|
3215
|
+
if (inCueText) {
|
|
3216
|
+
return transform(line, currentCueStartTime);
|
|
3217
|
+
}
|
|
3218
|
+
return line;
|
|
3219
|
+
});
|
|
3220
|
+
return transformed.join("\n");
|
|
3221
|
+
}
|
|
3222
|
+
function buildReplacementRegex(words) {
|
|
3223
|
+
const filtered = words.filter((w) => w.length > 0);
|
|
3224
|
+
if (filtered.length === 0)
|
|
3225
|
+
return null;
|
|
3226
|
+
filtered.sort((a, b) => b.length - a.length);
|
|
3227
|
+
const escaped = filtered.map((w) => w.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"));
|
|
3228
|
+
const pattern = escaped.join("|");
|
|
3229
|
+
return new RegExp(`\\b(?:${pattern})\\b`, "gi");
|
|
3230
|
+
}
|
|
3231
|
+
function createReplacer(mode) {
|
|
3232
|
+
switch (mode) {
|
|
3233
|
+
case "blank":
|
|
3234
|
+
return (match) => `[${"_".repeat(match.length)}]`;
|
|
3235
|
+
case "remove":
|
|
3236
|
+
return () => "";
|
|
3237
|
+
case "mask":
|
|
3238
|
+
return (match) => "?".repeat(match.length);
|
|
3239
|
+
}
|
|
3240
|
+
}
|
|
3241
|
+
function censorVttContent(rawVtt, profanity, mode) {
|
|
3242
|
+
if (profanity.length === 0) {
|
|
3243
|
+
return { censoredVtt: rawVtt, replacements: [] };
|
|
3244
|
+
}
|
|
3245
|
+
const regex = buildReplacementRegex(profanity);
|
|
3246
|
+
if (!regex) {
|
|
3247
|
+
return { censoredVtt: rawVtt, replacements: [] };
|
|
3248
|
+
}
|
|
3249
|
+
const replacer = createReplacer(mode);
|
|
3250
|
+
const replacements = [];
|
|
3251
|
+
const censoredVtt = transformCueText(rawVtt, (line, cueStartTime) => {
|
|
3252
|
+
return line.replace(regex, (match) => {
|
|
3253
|
+
const after = replacer(match);
|
|
3254
|
+
replacements.push({ cueStartTime, before: match, after });
|
|
3255
|
+
return after;
|
|
3256
|
+
});
|
|
3257
|
+
});
|
|
3258
|
+
return { censoredVtt, replacements };
|
|
3259
|
+
}
|
|
3260
|
+
function applyOverrideLists(detected, alwaysCensor, neverCensor) {
|
|
3261
|
+
const seen = new Set(detected.map((w) => w.toLowerCase()));
|
|
3262
|
+
const merged = [...detected];
|
|
3263
|
+
for (const word of alwaysCensor) {
|
|
3264
|
+
const lower = word.toLowerCase();
|
|
3265
|
+
if (!seen.has(lower)) {
|
|
3266
|
+
seen.add(lower);
|
|
3267
|
+
merged.push(word);
|
|
3268
|
+
}
|
|
3269
|
+
}
|
|
3270
|
+
const neverSet = new Set(neverCensor.map((w) => w.toLowerCase()));
|
|
3271
|
+
return merged.filter((w) => !neverSet.has(w.toLowerCase()));
|
|
3272
|
+
}
|
|
3273
|
+
function applyReplacements(rawVtt, replacements) {
|
|
3274
|
+
const filtered = replacements.filter((r) => r.find.length > 0);
|
|
3275
|
+
if (filtered.length === 0) {
|
|
3276
|
+
return { editedVtt: rawVtt, replacements: [] };
|
|
3277
|
+
}
|
|
3278
|
+
const records = [];
|
|
3279
|
+
const editedVtt = transformCueText(rawVtt, (line, cueStartTime) => {
|
|
3280
|
+
let result = line;
|
|
3281
|
+
for (const { find, replace } of filtered) {
|
|
3282
|
+
const escaped = find.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
3283
|
+
const regex = new RegExp(`\\b${escaped}\\b`, "g");
|
|
3284
|
+
result = result.replace(regex, (match) => {
|
|
3285
|
+
records.push({ cueStartTime, before: match, after: replace });
|
|
3286
|
+
return replace;
|
|
3287
|
+
});
|
|
3288
|
+
}
|
|
3289
|
+
return result;
|
|
3290
|
+
});
|
|
3291
|
+
return { editedVtt, replacements: records };
|
|
3292
|
+
}
|
|
3293
|
+
async function identifyProfanityWithAI({
|
|
3294
|
+
plainText,
|
|
3295
|
+
provider,
|
|
3296
|
+
modelId,
|
|
3297
|
+
credentials
|
|
3298
|
+
}) {
|
|
3299
|
+
"use step";
|
|
3300
|
+
const model = await createLanguageModelFromConfig(provider, modelId, credentials);
|
|
3301
|
+
const response = await generateText4({
|
|
3302
|
+
model,
|
|
3303
|
+
output: Output4.object({ schema: profanityDetectionSchema }),
|
|
3304
|
+
messages: [
|
|
3305
|
+
{
|
|
3306
|
+
role: "system",
|
|
3307
|
+
content: SYSTEM_PROMPT3
|
|
3308
|
+
},
|
|
3309
|
+
{
|
|
3310
|
+
role: "user",
|
|
3311
|
+
content: `Identify all profane words and phrases in the following subtitle transcript. Return each unique profane word or phrase exactly as it appears in the text.
|
|
3312
|
+
|
|
3313
|
+
<transcript>
|
|
3314
|
+
${plainText}
|
|
3315
|
+
</transcript>`
|
|
3316
|
+
}
|
|
3317
|
+
]
|
|
3318
|
+
});
|
|
3319
|
+
return {
|
|
3320
|
+
profanity: response.output.profanity,
|
|
3321
|
+
usage: {
|
|
3322
|
+
inputTokens: response.usage.inputTokens,
|
|
3323
|
+
outputTokens: response.usage.outputTokens,
|
|
3324
|
+
totalTokens: response.usage.totalTokens,
|
|
3325
|
+
reasoningTokens: response.usage.reasoningTokens,
|
|
3326
|
+
cachedInputTokens: response.usage.cachedInputTokens
|
|
3327
|
+
}
|
|
3328
|
+
};
|
|
3329
|
+
}
|
|
3330
|
+
async function uploadEditedVttToS3({
|
|
3331
|
+
editedVtt,
|
|
3332
|
+
assetId,
|
|
3333
|
+
trackId,
|
|
3334
|
+
s3Endpoint,
|
|
3335
|
+
s3Region,
|
|
3336
|
+
s3Bucket,
|
|
3337
|
+
storageAdapter,
|
|
3338
|
+
s3SignedUrlExpirySeconds
|
|
3339
|
+
}) {
|
|
3340
|
+
"use step";
|
|
3341
|
+
const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
|
|
3342
|
+
const s3SecretAccessKey = env_default.S3_SECRET_ACCESS_KEY;
|
|
3343
|
+
const vttKey = `edited/${assetId}/${trackId}-edited-${Date.now()}.vtt`;
|
|
3344
|
+
await putObjectWithStorageAdapter({
|
|
3345
|
+
accessKeyId: s3AccessKeyId,
|
|
3346
|
+
secretAccessKey: s3SecretAccessKey,
|
|
3347
|
+
endpoint: s3Endpoint,
|
|
3348
|
+
region: s3Region,
|
|
3349
|
+
bucket: s3Bucket,
|
|
3350
|
+
key: vttKey,
|
|
3351
|
+
body: editedVtt,
|
|
3352
|
+
contentType: "text/vtt"
|
|
3353
|
+
}, storageAdapter);
|
|
3354
|
+
return createPresignedGetUrlWithStorageAdapter({
|
|
3355
|
+
accessKeyId: s3AccessKeyId,
|
|
3356
|
+
secretAccessKey: s3SecretAccessKey,
|
|
3357
|
+
endpoint: s3Endpoint,
|
|
3358
|
+
region: s3Region,
|
|
3359
|
+
bucket: s3Bucket,
|
|
3360
|
+
key: vttKey,
|
|
3361
|
+
expiresInSeconds: s3SignedUrlExpirySeconds ?? 86400
|
|
3362
|
+
}, storageAdapter);
|
|
3363
|
+
}
|
|
3364
|
+
async function deleteTrackOnMux(assetId, trackId, credentials) {
|
|
3365
|
+
"use step";
|
|
3366
|
+
const muxClient = await resolveMuxClient(credentials);
|
|
3367
|
+
const mux = await muxClient.createClient();
|
|
3368
|
+
await mux.video.assets.deleteTrack(assetId, trackId);
|
|
3369
|
+
}
|
|
3370
|
+
async function editCaptions(assetId, trackId, options) {
|
|
3371
|
+
"use workflow";
|
|
3372
|
+
const {
|
|
3373
|
+
provider,
|
|
3374
|
+
model,
|
|
3375
|
+
autoCensorProfanity: autoCensorOption,
|
|
3376
|
+
replacements: replacementsOption,
|
|
3377
|
+
deleteOriginalTrack,
|
|
3378
|
+
uploadToMux: uploadToMuxOption,
|
|
3379
|
+
s3Endpoint: providedS3Endpoint,
|
|
3380
|
+
s3Region: providedS3Region,
|
|
3381
|
+
s3Bucket: providedS3Bucket,
|
|
3382
|
+
trackNameSuffix,
|
|
3383
|
+
storageAdapter,
|
|
3384
|
+
credentials
|
|
3385
|
+
} = options;
|
|
3386
|
+
const hasAutoCensor = !!autoCensorOption;
|
|
3387
|
+
const hasReplacements = !!replacementsOption && replacementsOption.length > 0;
|
|
3388
|
+
if (!hasAutoCensor && !hasReplacements) {
|
|
3389
|
+
throw new Error("At least one of autoCensorProfanity or replacements must be provided.");
|
|
3390
|
+
}
|
|
3391
|
+
if (autoCensorOption && !provider) {
|
|
3392
|
+
throw new Error("provider is required when using autoCensorProfanity.");
|
|
3393
|
+
}
|
|
3394
|
+
const deleteOriginal = deleteOriginalTrack !== false;
|
|
3395
|
+
const uploadToMux = uploadToMuxOption !== false;
|
|
3396
|
+
const s3Endpoint = providedS3Endpoint ?? env_default.S3_ENDPOINT;
|
|
3397
|
+
const s3Region = providedS3Region ?? env_default.S3_REGION ?? "auto";
|
|
3398
|
+
const s3Bucket = providedS3Bucket ?? env_default.S3_BUCKET;
|
|
3399
|
+
const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
|
|
3400
|
+
const s3SecretAccessKey = env_default.S3_SECRET_ACCESS_KEY;
|
|
3401
|
+
if (uploadToMux && (!s3Endpoint || !s3Bucket || !storageAdapter && (!s3AccessKeyId || !s3SecretAccessKey))) {
|
|
3402
|
+
throw new Error(
|
|
3403
|
+
"Storage configuration is required for uploading to Mux. Provide s3Endpoint and s3Bucket. If no storageAdapter is supplied, also provide s3AccessKeyId and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables."
|
|
3404
|
+
);
|
|
3405
|
+
}
|
|
3406
|
+
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(assetId, credentials);
|
|
3407
|
+
const assetDurationSeconds = getAssetDurationSecondsFromAsset(assetData);
|
|
3408
|
+
const signingContext = await resolveMuxSigningContext(credentials);
|
|
3409
|
+
if (policy === "signed" && !signingContext) {
|
|
3410
|
+
throw new Error(
|
|
3411
|
+
"Signed playback ID requires signing credentials. Set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
3412
|
+
);
|
|
3413
|
+
}
|
|
3414
|
+
const readyTextTracks = getReadyTextTracks(assetData);
|
|
3415
|
+
const sourceTrack = readyTextTracks.find((t) => t.id === trackId);
|
|
3416
|
+
if (!sourceTrack) {
|
|
3417
|
+
const availableTrackIds = readyTextTracks.map((t) => t.id).filter(Boolean).join(", ");
|
|
3418
|
+
throw new Error(
|
|
3419
|
+
`Track '${trackId}' not found or not ready on asset '${assetId}'. Available track IDs: ${availableTrackIds || "none"}`
|
|
3420
|
+
);
|
|
3421
|
+
}
|
|
3422
|
+
const vttUrl = await buildTranscriptUrl(playbackId, trackId, policy === "signed", credentials);
|
|
3423
|
+
let vttContent;
|
|
3424
|
+
try {
|
|
3425
|
+
vttContent = await fetchVttFromMux(vttUrl);
|
|
3426
|
+
} catch (error) {
|
|
3427
|
+
throw new Error(`Failed to fetch VTT content: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
3428
|
+
}
|
|
3429
|
+
let editedVtt = vttContent;
|
|
3430
|
+
let totalReplacementCount = 0;
|
|
3431
|
+
let autoCensorResult;
|
|
3432
|
+
let usage;
|
|
3433
|
+
if (autoCensorOption) {
|
|
3434
|
+
const { mode = "blank", alwaysCensor = [], neverCensor = [] } = autoCensorOption;
|
|
3435
|
+
const plainText = extractTextFromVTT(vttContent);
|
|
3436
|
+
if (!plainText.trim()) {
|
|
3437
|
+
throw new Error("Track transcript is empty; nothing to censor.");
|
|
3438
|
+
}
|
|
3439
|
+
const modelConfig = resolveLanguageModelConfig({
|
|
3440
|
+
...options,
|
|
3441
|
+
provider,
|
|
3442
|
+
model
|
|
3443
|
+
});
|
|
3444
|
+
let detectedProfanity;
|
|
3445
|
+
try {
|
|
3446
|
+
const result = await identifyProfanityWithAI({
|
|
3447
|
+
plainText,
|
|
3448
|
+
provider: modelConfig.provider,
|
|
3449
|
+
modelId: modelConfig.modelId,
|
|
3450
|
+
credentials
|
|
3451
|
+
});
|
|
3452
|
+
detectedProfanity = result.profanity;
|
|
3453
|
+
usage = result.usage;
|
|
3454
|
+
} catch (error) {
|
|
3455
|
+
throw new Error(`Failed to detect profanity with ${modelConfig.provider}: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
3456
|
+
}
|
|
3457
|
+
const finalProfanity = applyOverrideLists(detectedProfanity, alwaysCensor, neverCensor);
|
|
3458
|
+
const { censoredVtt, replacements: censorReplacements } = censorVttContent(editedVtt, finalProfanity, mode);
|
|
3459
|
+
editedVtt = censoredVtt;
|
|
3460
|
+
totalReplacementCount += censorReplacements.length;
|
|
3461
|
+
autoCensorResult = { replacements: censorReplacements };
|
|
3462
|
+
}
|
|
3463
|
+
let replacementsResult;
|
|
3464
|
+
if (replacementsOption && replacementsOption.length > 0) {
|
|
3465
|
+
const { editedVtt: afterReplacements, replacements: staticReplacements } = applyReplacements(editedVtt, replacementsOption);
|
|
3466
|
+
editedVtt = afterReplacements;
|
|
3467
|
+
totalReplacementCount += staticReplacements.length;
|
|
3468
|
+
replacementsResult = { replacements: staticReplacements };
|
|
3469
|
+
}
|
|
3470
|
+
const usageWithMetadata = usage ? {
|
|
3471
|
+
...usage,
|
|
3472
|
+
metadata: {
|
|
3473
|
+
assetDurationSeconds
|
|
3474
|
+
}
|
|
3475
|
+
} : void 0;
|
|
3476
|
+
if (!uploadToMux) {
|
|
3477
|
+
return {
|
|
3478
|
+
assetId,
|
|
3479
|
+
trackId,
|
|
3480
|
+
originalVtt: vttContent,
|
|
3481
|
+
editedVtt,
|
|
3482
|
+
totalReplacementCount,
|
|
3483
|
+
autoCensorProfanity: autoCensorResult,
|
|
3484
|
+
replacements: replacementsResult,
|
|
3485
|
+
usage: usageWithMetadata
|
|
3486
|
+
};
|
|
3487
|
+
}
|
|
3488
|
+
let presignedUrl;
|
|
3489
|
+
try {
|
|
3490
|
+
presignedUrl = await uploadEditedVttToS3({
|
|
3491
|
+
editedVtt,
|
|
3492
|
+
assetId,
|
|
3493
|
+
trackId,
|
|
3494
|
+
s3Endpoint,
|
|
3495
|
+
s3Region,
|
|
3496
|
+
s3Bucket,
|
|
3497
|
+
storageAdapter,
|
|
3498
|
+
s3SignedUrlExpirySeconds: options.s3SignedUrlExpirySeconds
|
|
3499
|
+
});
|
|
3500
|
+
} catch (error) {
|
|
3501
|
+
throw new Error(`Failed to upload VTT to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2624
3502
|
}
|
|
2625
|
-
|
|
2626
|
-
|
|
3503
|
+
let uploadedTrackId;
|
|
3504
|
+
try {
|
|
3505
|
+
const languageCode = sourceTrack.language_code || "en";
|
|
3506
|
+
const suffix = trackNameSuffix ?? "edited";
|
|
3507
|
+
const trackName = `${sourceTrack.name || "Subtitles"} (${suffix})`;
|
|
3508
|
+
uploadedTrackId = await createTextTrackOnMux(
|
|
3509
|
+
assetId,
|
|
3510
|
+
languageCode,
|
|
3511
|
+
trackName,
|
|
3512
|
+
presignedUrl,
|
|
3513
|
+
credentials
|
|
3514
|
+
);
|
|
3515
|
+
} catch (error) {
|
|
3516
|
+
console.warn(`Failed to add track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2627
3517
|
}
|
|
2628
|
-
|
|
2629
|
-
|
|
2630
|
-
|
|
2631
|
-
|
|
2632
|
-
|
|
3518
|
+
if (deleteOriginal && uploadedTrackId) {
|
|
3519
|
+
try {
|
|
3520
|
+
await deleteTrackOnMux(assetId, trackId, credentials);
|
|
3521
|
+
} catch (error) {
|
|
3522
|
+
console.warn(`Failed to delete original track: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
2633
3523
|
}
|
|
2634
|
-
}
|
|
3524
|
+
}
|
|
2635
3525
|
return {
|
|
2636
3526
|
assetId,
|
|
2637
|
-
|
|
2638
|
-
|
|
3527
|
+
trackId,
|
|
3528
|
+
originalVtt: vttContent,
|
|
3529
|
+
editedVtt,
|
|
3530
|
+
totalReplacementCount,
|
|
3531
|
+
autoCensorProfanity: autoCensorResult,
|
|
3532
|
+
replacements: replacementsResult,
|
|
3533
|
+
uploadedTrackId,
|
|
3534
|
+
presignedUrl,
|
|
2639
3535
|
usage: usageWithMetadata
|
|
2640
3536
|
};
|
|
2641
3537
|
}
|
|
@@ -2924,6 +3820,7 @@ async function moderateImageWithOpenAI(entry) {
|
|
|
2924
3820
|
const categoryScores = json.results?.[0]?.category_scores || {};
|
|
2925
3821
|
return {
|
|
2926
3822
|
url: entry.url,
|
|
3823
|
+
time: entry.time,
|
|
2927
3824
|
sexual: categoryScores.sexual || 0,
|
|
2928
3825
|
violence: categoryScores.violence || 0,
|
|
2929
3826
|
error: false
|
|
@@ -2932,6 +3829,7 @@ async function moderateImageWithOpenAI(entry) {
|
|
|
2932
3829
|
console.error("OpenAI moderation failed:", error);
|
|
2933
3830
|
return {
|
|
2934
3831
|
url: entry.url,
|
|
3832
|
+
time: entry.time,
|
|
2935
3833
|
sexual: 0,
|
|
2936
3834
|
violence: 0,
|
|
2937
3835
|
error: true,
|
|
@@ -2939,11 +3837,13 @@ async function moderateImageWithOpenAI(entry) {
|
|
|
2939
3837
|
};
|
|
2940
3838
|
}
|
|
2941
3839
|
}
|
|
2942
|
-
async function requestOpenAIModeration(
|
|
3840
|
+
async function requestOpenAIModeration(images, model, maxConcurrent = 5, submissionMode = "url", downloadOptions, credentials) {
|
|
2943
3841
|
"use step";
|
|
3842
|
+
const imageUrls = images.map((img) => img.url);
|
|
3843
|
+
const timeByUrl = new Map(images.map((img) => [img.url, img.time]));
|
|
2944
3844
|
const targetUrls = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map(
|
|
2945
|
-
(img) => ({ url: img.url, image: img.base64Data, model, credentials })
|
|
2946
|
-
) :
|
|
3845
|
+
(img) => ({ url: img.url, time: timeByUrl.get(img.url), image: img.base64Data, model, credentials })
|
|
3846
|
+
) : images.map((img) => ({ url: img.url, time: img.time, image: img.url, model, credentials }));
|
|
2947
3847
|
return processConcurrently(targetUrls, moderateImageWithOpenAI, maxConcurrent);
|
|
2948
3848
|
}
|
|
2949
3849
|
async function requestOpenAITextModeration(text, model, url, credentials) {
|
|
@@ -3088,6 +3988,7 @@ async function moderateImageWithHive(entry) {
|
|
|
3088
3988
|
const violence = getHiveCategoryScores(classes, HIVE_VIOLENCE_CATEGORIES);
|
|
3089
3989
|
return {
|
|
3090
3990
|
url: entry.url,
|
|
3991
|
+
time: entry.time,
|
|
3091
3992
|
sexual,
|
|
3092
3993
|
violence,
|
|
3093
3994
|
error: false
|
|
@@ -3095,6 +3996,7 @@ async function moderateImageWithHive(entry) {
|
|
|
3095
3996
|
} catch (error) {
|
|
3096
3997
|
return {
|
|
3097
3998
|
url: entry.url,
|
|
3999
|
+
time: entry.time,
|
|
3098
4000
|
sexual: 0,
|
|
3099
4001
|
violence: 0,
|
|
3100
4002
|
error: true,
|
|
@@ -3102,19 +4004,23 @@ async function moderateImageWithHive(entry) {
|
|
|
3102
4004
|
};
|
|
3103
4005
|
}
|
|
3104
4006
|
}
|
|
3105
|
-
async function requestHiveModeration(
|
|
4007
|
+
async function requestHiveModeration(images, maxConcurrent = 5, submissionMode = "url", downloadOptions, credentials) {
|
|
3106
4008
|
"use step";
|
|
4009
|
+
const imageUrls = images.map((img) => img.url);
|
|
4010
|
+
const timeByUrl = new Map(images.map((img) => [img.url, img.time]));
|
|
3107
4011
|
const targets = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map((img) => ({
|
|
3108
4012
|
url: img.url,
|
|
4013
|
+
time: timeByUrl.get(img.url),
|
|
3109
4014
|
source: {
|
|
3110
4015
|
kind: "file",
|
|
3111
4016
|
buffer: img.buffer,
|
|
3112
4017
|
contentType: img.contentType
|
|
3113
4018
|
},
|
|
3114
4019
|
credentials
|
|
3115
|
-
})) :
|
|
3116
|
-
url,
|
|
3117
|
-
|
|
4020
|
+
})) : images.map((img) => ({
|
|
4021
|
+
url: img.url,
|
|
4022
|
+
time: img.time,
|
|
4023
|
+
source: { kind: "url", value: img.url },
|
|
3118
4024
|
credentials
|
|
3119
4025
|
}));
|
|
3120
4026
|
return await processConcurrently(targets, moderateImageWithHive, maxConcurrent);
|
|
@@ -3125,10 +4031,8 @@ async function getThumbnailUrlsFromTimestamps(playbackId, timestampsMs, options)
|
|
|
3125
4031
|
const baseUrl = getMuxThumbnailBaseUrl(playbackId);
|
|
3126
4032
|
const urlPromises = timestampsMs.map(async (tsMs) => {
|
|
3127
4033
|
const time = Number((tsMs / 1e3).toFixed(2));
|
|
3128
|
-
|
|
3129
|
-
|
|
3130
|
-
}
|
|
3131
|
-
return `${baseUrl}?time=${time}&width=${width}`;
|
|
4034
|
+
const url = shouldSign ? await signUrl(baseUrl, playbackId, "thumbnail", { time, width }, credentials) : `${baseUrl}?time=${time}&width=${width}`;
|
|
4035
|
+
return { url, time };
|
|
3132
4036
|
});
|
|
3133
4037
|
return Promise.all(urlPromises);
|
|
3134
4038
|
}
|
|
@@ -3279,16 +4183,18 @@ async function getModerationScores(assetId, options = {}) {
|
|
|
3279
4183
|
}
|
|
3280
4184
|
|
|
3281
4185
|
// src/workflows/summarization.ts
|
|
3282
|
-
import { generateText as
|
|
3283
|
-
import
|
|
3284
|
-
import { z as
|
|
3285
|
-
var
|
|
3286
|
-
var
|
|
3287
|
-
|
|
3288
|
-
|
|
3289
|
-
|
|
4186
|
+
import { generateText as generateText5, Output as Output5 } from "ai";
|
|
4187
|
+
import dedent5 from "dedent";
|
|
4188
|
+
import { z as z6 } from "zod";
|
|
4189
|
+
var DEFAULT_SUMMARY_KEYWORD_LIMIT = 10;
|
|
4190
|
+
var DEFAULT_TITLE_LENGTH = 10;
|
|
4191
|
+
var DEFAULT_DESCRIPTION_LENGTH = 50;
|
|
4192
|
+
var summarySchema = z6.object({
|
|
4193
|
+
keywords: z6.array(z6.string()),
|
|
4194
|
+
title: z6.string(),
|
|
4195
|
+
description: z6.string()
|
|
3290
4196
|
}).strict();
|
|
3291
|
-
var SUMMARY_OUTPUT =
|
|
4197
|
+
var SUMMARY_OUTPUT = Output5.object({
|
|
3292
4198
|
name: "summary_metadata",
|
|
3293
4199
|
description: "Structured summary with title, description, and keywords.",
|
|
3294
4200
|
schema: summarySchema
|
|
@@ -3299,10 +4205,49 @@ var TONE_INSTRUCTIONS = {
|
|
|
3299
4205
|
playful: "Channel your inner diva! Answer with maximum sass, wit, and playful attitude. Don't hold back - be cheeky, clever, and delightfully snarky. Make it pop!",
|
|
3300
4206
|
professional: "Provide a professional, executive-level analysis suitable for business reporting."
|
|
3301
4207
|
};
|
|
4208
|
+
var DESCRIPTION_LENGTH_THRESHOLD_SMALL = 25;
|
|
4209
|
+
var DESCRIPTION_LENGTH_THRESHOLD_LARGE = 100;
|
|
4210
|
+
function buildDescriptionGuidance(wordCount, contentType) {
|
|
4211
|
+
if (wordCount < DESCRIPTION_LENGTH_THRESHOLD_SMALL) {
|
|
4212
|
+
if (contentType === "video") {
|
|
4213
|
+
return dedent5`A brief summary of the video in approximately ${wordCount} words.
|
|
4214
|
+
Focus on the single most important subject or action.
|
|
4215
|
+
Write in present tense.`;
|
|
4216
|
+
}
|
|
4217
|
+
return dedent5`A brief summary of the audio content in approximately ${wordCount} words.
|
|
4218
|
+
Focus on the single most important topic or theme.
|
|
4219
|
+
Write in present tense.`;
|
|
4220
|
+
}
|
|
4221
|
+
if (wordCount > DESCRIPTION_LENGTH_THRESHOLD_LARGE) {
|
|
4222
|
+
if (contentType === "video") {
|
|
4223
|
+
return dedent5`A detailed summary that describes what happens across the video.
|
|
4224
|
+
Aim for approximately ${wordCount} words, and you may use multiple sentences.
|
|
4225
|
+
Be thorough: cover subjects, actions, setting, progression, and any notable details visible across frames.
|
|
4226
|
+
Write in present tense. Be specific about observable details rather than making assumptions.
|
|
4227
|
+
If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`;
|
|
4228
|
+
}
|
|
4229
|
+
return dedent5`A detailed summary that describes the audio content.
|
|
4230
|
+
Aim for approximately ${wordCount} words, and you may use multiple sentences.
|
|
4231
|
+
Be thorough: cover topics, speakers, themes, progression, and any notable insights.
|
|
4232
|
+
Write in present tense. Be specific about what is discussed or presented rather than making assumptions.
|
|
4233
|
+
Focus on the spoken content and any key insights, dialogue, or narrative elements.`;
|
|
4234
|
+
}
|
|
4235
|
+
if (contentType === "video") {
|
|
4236
|
+
return dedent5`A summary that describes what happens across the video.
|
|
4237
|
+
Aim for approximately ${wordCount} words, and you may use multiple sentences.
|
|
4238
|
+
Cover the main subjects, actions, setting, and any notable progression visible across frames.
|
|
4239
|
+
Write in present tense. Be specific about observable details rather than making assumptions.
|
|
4240
|
+
If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`;
|
|
4241
|
+
}
|
|
4242
|
+
return dedent5`A summary that describes the audio content.
|
|
4243
|
+
Aim for approximately ${wordCount} words, and you may use multiple sentences.
|
|
4244
|
+
Cover the main topics, speakers, themes, and any notable progression in the discussion or narration.
|
|
4245
|
+
Write in present tense. Be specific about what is discussed or presented rather than making assumptions.
|
|
4246
|
+
Focus on the spoken content and any key insights, dialogue, or narrative elements.`;
|
|
4247
|
+
}
|
|
3302
4248
|
function createSummarizationBuilder({ titleLength, descriptionLength, tagCount } = {}) {
|
|
3303
|
-
const titleBrevity =
|
|
3304
|
-
const
|
|
3305
|
-
const keywordLimit = tagCount ?? SUMMARY_KEYWORD_LIMIT;
|
|
4249
|
+
const titleBrevity = `Aim for approximately ${titleLength ?? DEFAULT_TITLE_LENGTH} words.`;
|
|
4250
|
+
const keywordLimit = tagCount ?? DEFAULT_SUMMARY_KEYWORD_LIMIT;
|
|
3306
4251
|
return createPromptBuilder({
|
|
3307
4252
|
template: {
|
|
3308
4253
|
task: {
|
|
@@ -3311,7 +4256,7 @@ function createSummarizationBuilder({ titleLength, descriptionLength, tagCount }
|
|
|
3311
4256
|
},
|
|
3312
4257
|
title: {
|
|
3313
4258
|
tag: "title_requirements",
|
|
3314
|
-
content:
|
|
4259
|
+
content: dedent5`
|
|
3315
4260
|
A short, compelling headline that immediately communicates the subject or action.
|
|
3316
4261
|
${titleBrevity} Think of how a news headline or video card title would read.
|
|
3317
4262
|
Start with the primary subject, action, or topic - never begin with "A video of" or similar phrasing.
|
|
@@ -3319,15 +4264,11 @@ function createSummarizationBuilder({ titleLength, descriptionLength, tagCount }
|
|
|
3319
4264
|
},
|
|
3320
4265
|
description: {
|
|
3321
4266
|
tag: "description_requirements",
|
|
3322
|
-
content:
|
|
3323
|
-
A concise summary (${descConstraint}) that describes what happens across the video.
|
|
3324
|
-
Cover the main subjects, actions, setting, and any notable progression visible across frames.
|
|
3325
|
-
Write in present tense. Be specific about observable details rather than making assumptions.
|
|
3326
|
-
If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`
|
|
4267
|
+
content: buildDescriptionGuidance(descriptionLength ?? DEFAULT_DESCRIPTION_LENGTH, "video")
|
|
3327
4268
|
},
|
|
3328
4269
|
keywords: {
|
|
3329
4270
|
tag: "keywords_requirements",
|
|
3330
|
-
content:
|
|
4271
|
+
content: dedent5`
|
|
3331
4272
|
Specific, searchable terms (up to ${keywordLimit}) that capture:
|
|
3332
4273
|
- Primary subjects (people, animals, objects)
|
|
3333
4274
|
- Actions and activities being performed
|
|
@@ -3339,7 +4280,7 @@ function createSummarizationBuilder({ titleLength, descriptionLength, tagCount }
|
|
|
3339
4280
|
},
|
|
3340
4281
|
qualityGuidelines: {
|
|
3341
4282
|
tag: "quality_guidelines",
|
|
3342
|
-
content:
|
|
4283
|
+
content: dedent5`
|
|
3343
4284
|
- Examine all frames to understand the full context and progression
|
|
3344
4285
|
- Be precise: "golden retriever" is better than "dog" when identifiable
|
|
3345
4286
|
- Capture the narrative: what begins, develops, and concludes
|
|
@@ -3350,9 +4291,8 @@ function createSummarizationBuilder({ titleLength, descriptionLength, tagCount }
|
|
|
3350
4291
|
});
|
|
3351
4292
|
}
|
|
3352
4293
|
function createAudioOnlyBuilder({ titleLength, descriptionLength, tagCount } = {}) {
|
|
3353
|
-
const titleBrevity =
|
|
3354
|
-
const
|
|
3355
|
-
const keywordLimit = tagCount ?? SUMMARY_KEYWORD_LIMIT;
|
|
4294
|
+
const titleBrevity = `Aim for approximately ${titleLength ?? DEFAULT_TITLE_LENGTH} words.`;
|
|
4295
|
+
const keywordLimit = tagCount ?? DEFAULT_SUMMARY_KEYWORD_LIMIT;
|
|
3356
4296
|
return createPromptBuilder({
|
|
3357
4297
|
template: {
|
|
3358
4298
|
task: {
|
|
@@ -3361,7 +4301,7 @@ function createAudioOnlyBuilder({ titleLength, descriptionLength, tagCount } = {
|
|
|
3361
4301
|
},
|
|
3362
4302
|
title: {
|
|
3363
4303
|
tag: "title_requirements",
|
|
3364
|
-
content:
|
|
4304
|
+
content: dedent5`
|
|
3365
4305
|
A short, compelling headline that immediately communicates the subject or topic.
|
|
3366
4306
|
${titleBrevity} Think of how a podcast title or audio description would read.
|
|
3367
4307
|
Start with the primary subject, action, or topic - never begin with "An audio of" or similar phrasing.
|
|
@@ -3369,15 +4309,11 @@ function createAudioOnlyBuilder({ titleLength, descriptionLength, tagCount } = {
|
|
|
3369
4309
|
},
|
|
3370
4310
|
description: {
|
|
3371
4311
|
tag: "description_requirements",
|
|
3372
|
-
content:
|
|
3373
|
-
A concise summary (${descConstraint}) that describes the audio content.
|
|
3374
|
-
Cover the main topics, speakers, themes, and any notable progression in the discussion or narration.
|
|
3375
|
-
Write in present tense. Be specific about what is discussed or presented rather than making assumptions.
|
|
3376
|
-
Focus on the spoken content and any key insights, dialogue, or narrative elements.`
|
|
4312
|
+
content: buildDescriptionGuidance(descriptionLength ?? DEFAULT_DESCRIPTION_LENGTH, "audio")
|
|
3377
4313
|
},
|
|
3378
4314
|
keywords: {
|
|
3379
4315
|
tag: "keywords_requirements",
|
|
3380
|
-
content:
|
|
4316
|
+
content: dedent5`
|
|
3381
4317
|
Specific, searchable terms (up to ${keywordLimit}) that capture:
|
|
3382
4318
|
- Primary topics and themes
|
|
3383
4319
|
- Speakers or presenters (if named)
|
|
@@ -3389,7 +4325,7 @@ function createAudioOnlyBuilder({ titleLength, descriptionLength, tagCount } = {
|
|
|
3389
4325
|
},
|
|
3390
4326
|
qualityGuidelines: {
|
|
3391
4327
|
tag: "quality_guidelines",
|
|
3392
|
-
content:
|
|
4328
|
+
content: dedent5`
|
|
3393
4329
|
- Analyze the full transcript to understand context and themes
|
|
3394
4330
|
- Be precise: use specific terminology when mentioned
|
|
3395
4331
|
- Capture the narrative: what is introduced, discussed, and concluded
|
|
@@ -3399,7 +4335,7 @@ function createAudioOnlyBuilder({ titleLength, descriptionLength, tagCount } = {
|
|
|
3399
4335
|
sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
|
|
3400
4336
|
});
|
|
3401
4337
|
}
|
|
3402
|
-
var
|
|
4338
|
+
var SYSTEM_PROMPT4 = dedent5`
|
|
3403
4339
|
<role>
|
|
3404
4340
|
You are a video content analyst specializing in storyboard interpretation and multimodal analysis.
|
|
3405
4341
|
</role>
|
|
@@ -3456,7 +4392,7 @@ var SYSTEM_PROMPT3 = dedent4`
|
|
|
3456
4392
|
|
|
3457
4393
|
Write as if describing reality, not describing a recording of reality.
|
|
3458
4394
|
</language_guidelines>`;
|
|
3459
|
-
var AUDIO_ONLY_SYSTEM_PROMPT =
|
|
4395
|
+
var AUDIO_ONLY_SYSTEM_PROMPT = dedent5`
|
|
3460
4396
|
<role>
|
|
3461
4397
|
You are an audio content analyst specializing in transcript analysis and metadata generation.
|
|
3462
4398
|
</role>
|
|
@@ -3524,6 +4460,11 @@ function buildUserPrompt4({
|
|
|
3524
4460
|
const contextSections = [createToneSection(TONE_INSTRUCTIONS[tone])];
|
|
3525
4461
|
if (languageName) {
|
|
3526
4462
|
contextSections.push(createLanguageSection(languageName));
|
|
4463
|
+
} else {
|
|
4464
|
+
contextSections.push({
|
|
4465
|
+
tag: "language",
|
|
4466
|
+
content: "Respond in English. Never switch languages to satisfy length constraints."
|
|
4467
|
+
});
|
|
3527
4468
|
}
|
|
3528
4469
|
if (transcriptText) {
|
|
3529
4470
|
const format = isCleanTranscript ? "plain text" : "WebVTT";
|
|
@@ -3536,7 +4477,7 @@ function buildUserPrompt4({
|
|
|
3536
4477
|
async function analyzeStoryboard2(imageDataUrl, provider, modelId, userPrompt, systemPrompt, credentials) {
|
|
3537
4478
|
"use step";
|
|
3538
4479
|
const model = await createLanguageModelFromConfig(provider, modelId, credentials);
|
|
3539
|
-
const response = await
|
|
4480
|
+
const response = await generateText5({
|
|
3540
4481
|
model,
|
|
3541
4482
|
output: SUMMARY_OUTPUT,
|
|
3542
4483
|
messages: [
|
|
@@ -3571,7 +4512,7 @@ async function analyzeStoryboard2(imageDataUrl, provider, modelId, userPrompt, s
|
|
|
3571
4512
|
async function analyzeAudioOnly(provider, modelId, userPrompt, systemPrompt, credentials) {
|
|
3572
4513
|
"use step";
|
|
3573
4514
|
const model = await createLanguageModelFromConfig(provider, modelId, credentials);
|
|
3574
|
-
const response = await
|
|
4515
|
+
const response = await generateText5({
|
|
3575
4516
|
model,
|
|
3576
4517
|
output: SUMMARY_OUTPUT,
|
|
3577
4518
|
messages: [
|
|
@@ -3600,7 +4541,7 @@ async function analyzeAudioOnly(provider, modelId, userPrompt, systemPrompt, cre
|
|
|
3600
4541
|
}
|
|
3601
4542
|
};
|
|
3602
4543
|
}
|
|
3603
|
-
function normalizeKeywords(keywords, limit =
|
|
4544
|
+
function normalizeKeywords(keywords, limit = DEFAULT_SUMMARY_KEYWORD_LIMIT) {
|
|
3604
4545
|
if (!Array.isArray(keywords) || keywords.length === 0) {
|
|
3605
4546
|
return [];
|
|
3606
4547
|
}
|
|
@@ -3687,7 +4628,7 @@ async function getSummaryAndTags(assetId, options) {
|
|
|
3687
4628
|
});
|
|
3688
4629
|
let analysisResponse;
|
|
3689
4630
|
let imageUrl;
|
|
3690
|
-
const systemPrompt = isAudioOnly ? AUDIO_ONLY_SYSTEM_PROMPT :
|
|
4631
|
+
const systemPrompt = isAudioOnly ? AUDIO_ONLY_SYSTEM_PROMPT : SYSTEM_PROMPT4;
|
|
3691
4632
|
try {
|
|
3692
4633
|
if (isAudioOnly) {
|
|
3693
4634
|
analysisResponse = await analyzeAudioOnly(
|
|
@@ -3740,7 +4681,7 @@ async function getSummaryAndTags(assetId, options) {
|
|
|
3740
4681
|
assetId,
|
|
3741
4682
|
title: analysisResponse.result.title,
|
|
3742
4683
|
description: analysisResponse.result.description,
|
|
3743
|
-
tags: normalizeKeywords(analysisResponse.result.keywords, tagCount ??
|
|
4684
|
+
tags: normalizeKeywords(analysisResponse.result.keywords, tagCount ?? DEFAULT_SUMMARY_KEYWORD_LIMIT),
|
|
3744
4685
|
storyboardUrl: imageUrl,
|
|
3745
4686
|
// undefined for audio-only assets
|
|
3746
4687
|
usage: {
|
|
@@ -3753,52 +4694,10 @@ async function getSummaryAndTags(assetId, options) {
|
|
|
3753
4694
|
};
|
|
3754
4695
|
}
|
|
3755
4696
|
|
|
3756
|
-
// src/lib/storage-adapter.ts
|
|
3757
|
-
function requireCredentials(accessKeyId, secretAccessKey) {
|
|
3758
|
-
if (!accessKeyId || !secretAccessKey) {
|
|
3759
|
-
throw new Error(
|
|
3760
|
-
"S3 credentials are required for default storage operations. Provide S3_ACCESS_KEY_ID and S3_SECRET_ACCESS_KEY or pass options.storageAdapter."
|
|
3761
|
-
);
|
|
3762
|
-
}
|
|
3763
|
-
return { accessKeyId, secretAccessKey };
|
|
3764
|
-
}
|
|
3765
|
-
async function putObjectWithStorageAdapter(input, adapter) {
|
|
3766
|
-
if (adapter) {
|
|
3767
|
-
await adapter.putObject(input);
|
|
3768
|
-
return;
|
|
3769
|
-
}
|
|
3770
|
-
const credentials = requireCredentials(input.accessKeyId, input.secretAccessKey);
|
|
3771
|
-
await putObjectToS3({
|
|
3772
|
-
accessKeyId: credentials.accessKeyId,
|
|
3773
|
-
secretAccessKey: credentials.secretAccessKey,
|
|
3774
|
-
endpoint: input.endpoint,
|
|
3775
|
-
region: input.region,
|
|
3776
|
-
bucket: input.bucket,
|
|
3777
|
-
key: input.key,
|
|
3778
|
-
body: input.body,
|
|
3779
|
-
contentType: input.contentType
|
|
3780
|
-
});
|
|
3781
|
-
}
|
|
3782
|
-
async function createPresignedGetUrlWithStorageAdapter(input, adapter) {
|
|
3783
|
-
if (adapter) {
|
|
3784
|
-
return adapter.createPresignedGetUrl(input);
|
|
3785
|
-
}
|
|
3786
|
-
const credentials = requireCredentials(input.accessKeyId, input.secretAccessKey);
|
|
3787
|
-
return createPresignedGetUrl({
|
|
3788
|
-
accessKeyId: credentials.accessKeyId,
|
|
3789
|
-
secretAccessKey: credentials.secretAccessKey,
|
|
3790
|
-
endpoint: input.endpoint,
|
|
3791
|
-
region: input.region,
|
|
3792
|
-
bucket: input.bucket,
|
|
3793
|
-
key: input.key,
|
|
3794
|
-
expiresInSeconds: input.expiresInSeconds
|
|
3795
|
-
});
|
|
3796
|
-
}
|
|
3797
|
-
|
|
3798
4697
|
// src/workflows/translate-audio.ts
|
|
3799
4698
|
var STATIC_RENDITION_POLL_INTERVAL_MS = 5e3;
|
|
3800
4699
|
var STATIC_RENDITION_MAX_ATTEMPTS = 36;
|
|
3801
|
-
async function
|
|
4700
|
+
async function sleep2(ms) {
|
|
3802
4701
|
"use step";
|
|
3803
4702
|
await new Promise((resolve) => setTimeout(resolve, ms));
|
|
3804
4703
|
}
|
|
@@ -3864,7 +4763,7 @@ async function waitForAudioStaticRendition({
|
|
|
3864
4763
|
console.warn(`\u2139\uFE0F Static rendition already ${status}. Waiting for it to finish...`);
|
|
3865
4764
|
}
|
|
3866
4765
|
for (let attempt = 1; attempt <= STATIC_RENDITION_MAX_ATTEMPTS; attempt++) {
|
|
3867
|
-
await
|
|
4766
|
+
await sleep2(STATIC_RENDITION_POLL_INTERVAL_MS);
|
|
3868
4767
|
currentAsset = await mux.video.assets.retrieve(assetId);
|
|
3869
4768
|
if (hasReadyAudioStaticRendition(currentAsset)) {
|
|
3870
4769
|
return currentAsset;
|
|
@@ -3971,7 +4870,8 @@ async function uploadDubbedAudioToS3({
|
|
|
3971
4870
|
s3Endpoint,
|
|
3972
4871
|
s3Region,
|
|
3973
4872
|
s3Bucket,
|
|
3974
|
-
storageAdapter
|
|
4873
|
+
storageAdapter,
|
|
4874
|
+
s3SignedUrlExpirySeconds
|
|
3975
4875
|
}) {
|
|
3976
4876
|
"use step";
|
|
3977
4877
|
const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
|
|
@@ -3994,10 +4894,11 @@ async function uploadDubbedAudioToS3({
|
|
|
3994
4894
|
region: s3Region,
|
|
3995
4895
|
bucket: s3Bucket,
|
|
3996
4896
|
key: audioKey,
|
|
3997
|
-
expiresInSeconds:
|
|
4897
|
+
expiresInSeconds: s3SignedUrlExpirySeconds ?? 86400
|
|
3998
4898
|
}, storageAdapter);
|
|
4899
|
+
const expiryHours = Math.round((s3SignedUrlExpirySeconds ?? 86400) / 3600);
|
|
3999
4900
|
console.warn(`\u2705 Audio uploaded successfully to: ${audioKey}`);
|
|
4000
|
-
console.warn(`\u{1F517} Generated presigned URL (expires in 1
|
|
4901
|
+
console.warn(`\u{1F517} Generated presigned URL (expires in ${expiryHours} hour${expiryHours === 1 ? "" : "s"})`);
|
|
4001
4902
|
return presignedUrl;
|
|
4002
4903
|
}
|
|
4003
4904
|
async function createAudioTrackOnMux(assetId, languageCode, presignedUrl, credentials) {
|
|
@@ -4096,7 +4997,7 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
|
|
|
4096
4997
|
const maxPollAttempts = 180;
|
|
4097
4998
|
let targetLanguages = [];
|
|
4098
4999
|
while (dubbingStatus === "dubbing" && pollAttempts < maxPollAttempts) {
|
|
4099
|
-
await
|
|
5000
|
+
await sleep2(1e4);
|
|
4100
5001
|
pollAttempts++;
|
|
4101
5002
|
try {
|
|
4102
5003
|
const statusResult = await checkElevenLabsDubbingStatus({
|
|
@@ -4161,7 +5062,8 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
|
|
|
4161
5062
|
s3Endpoint,
|
|
4162
5063
|
s3Region,
|
|
4163
5064
|
s3Bucket,
|
|
4164
|
-
storageAdapter: effectiveStorageAdapter
|
|
5065
|
+
storageAdapter: effectiveStorageAdapter,
|
|
5066
|
+
s3SignedUrlExpirySeconds: options.s3SignedUrlExpirySeconds
|
|
4165
5067
|
});
|
|
4166
5068
|
} catch (error) {
|
|
4167
5069
|
throw new Error(`Failed to upload audio to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
@@ -4197,19 +5099,186 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
|
|
|
4197
5099
|
}
|
|
4198
5100
|
|
|
4199
5101
|
// src/workflows/translate-captions.ts
|
|
4200
|
-
import {
|
|
4201
|
-
|
|
4202
|
-
|
|
4203
|
-
|
|
5102
|
+
import {
|
|
5103
|
+
APICallError,
|
|
5104
|
+
generateText as generateText6,
|
|
5105
|
+
NoObjectGeneratedError,
|
|
5106
|
+
Output as Output6,
|
|
5107
|
+
RetryError,
|
|
5108
|
+
TypeValidationError
|
|
5109
|
+
} from "ai";
|
|
5110
|
+
import dedent6 from "dedent";
|
|
5111
|
+
import { z as z7 } from "zod";
|
|
5112
|
+
var translationSchema = z7.object({
|
|
5113
|
+
translation: z7.string()
|
|
4204
5114
|
});
|
|
4205
|
-
var
|
|
4206
|
-
|
|
4207
|
-
|
|
4208
|
-
|
|
4209
|
-
|
|
4210
|
-
|
|
5115
|
+
var SYSTEM_PROMPT5 = dedent6`
|
|
5116
|
+
You are a subtitle translation expert. Translate VTT subtitle files to the target language specified by the user.
|
|
5117
|
+
You may receive either a full VTT file or a chunk from a larger VTT.
|
|
5118
|
+
Preserve all timestamps, cue ordering, and VTT formatting exactly as they appear.
|
|
5119
|
+
Return JSON with a single key "translation" containing the translated VTT content.
|
|
5120
|
+
`;
|
|
5121
|
+
var CUE_TRANSLATION_SYSTEM_PROMPT = dedent6`
|
|
5122
|
+
You are a subtitle translation expert.
|
|
5123
|
+
You will receive a sequence of subtitle cues extracted from a VTT file.
|
|
5124
|
+
Translate the cues to the requested target language while preserving their original order.
|
|
5125
|
+
Treat the cue list as continuous context so the translation reads naturally across adjacent lines.
|
|
5126
|
+
Return JSON with a single key "translations" containing exactly one translated string for each input cue.
|
|
5127
|
+
Do not merge, split, omit, reorder, or add cues.
|
|
5128
|
+
`;
|
|
5129
|
+
var DEFAULT_TRANSLATION_CHUNKING = {
|
|
5130
|
+
enabled: true,
|
|
5131
|
+
minimumAssetDurationSeconds: 30 * 60,
|
|
5132
|
+
targetChunkDurationSeconds: 30 * 60,
|
|
5133
|
+
maxConcurrentTranslations: 4,
|
|
5134
|
+
maxCuesPerChunk: 80,
|
|
5135
|
+
maxCueTextTokensPerChunk: 2e3
|
|
5136
|
+
};
|
|
5137
|
+
var TOKEN_USAGE_FIELDS = [
|
|
5138
|
+
"inputTokens",
|
|
5139
|
+
"outputTokens",
|
|
5140
|
+
"totalTokens",
|
|
5141
|
+
"reasoningTokens",
|
|
5142
|
+
"cachedInputTokens"
|
|
5143
|
+
];
|
|
5144
|
+
var TranslationChunkValidationError = class extends Error {
|
|
5145
|
+
constructor(message) {
|
|
5146
|
+
super(message);
|
|
5147
|
+
this.name = "TranslationChunkValidationError";
|
|
4211
5148
|
}
|
|
4212
|
-
|
|
5149
|
+
};
|
|
5150
|
+
function isTranslationChunkValidationError(error) {
|
|
5151
|
+
return error instanceof TranslationChunkValidationError;
|
|
5152
|
+
}
|
|
5153
|
+
function isProviderServiceError(error) {
|
|
5154
|
+
if (!error) {
|
|
5155
|
+
return false;
|
|
5156
|
+
}
|
|
5157
|
+
if (RetryError.isInstance(error)) {
|
|
5158
|
+
return isProviderServiceError(error.lastError);
|
|
5159
|
+
}
|
|
5160
|
+
if (APICallError.isInstance(error)) {
|
|
5161
|
+
return true;
|
|
5162
|
+
}
|
|
5163
|
+
if (error instanceof Error && "cause" in error) {
|
|
5164
|
+
return isProviderServiceError(error.cause);
|
|
5165
|
+
}
|
|
5166
|
+
return false;
|
|
5167
|
+
}
|
|
5168
|
+
function shouldSplitChunkTranslationError(error) {
|
|
5169
|
+
if (isProviderServiceError(error)) {
|
|
5170
|
+
return false;
|
|
5171
|
+
}
|
|
5172
|
+
return NoObjectGeneratedError.isInstance(error) || TypeValidationError.isInstance(error) || isTranslationChunkValidationError(error);
|
|
5173
|
+
}
|
|
5174
|
+
function isDefinedTokenUsageValue(value) {
|
|
5175
|
+
return typeof value === "number";
|
|
5176
|
+
}
|
|
5177
|
+
function resolveTranslationChunkingOptions(options) {
|
|
5178
|
+
const targetChunkDurationSeconds = Math.max(
|
|
5179
|
+
1,
|
|
5180
|
+
options?.targetChunkDurationSeconds ?? DEFAULT_TRANSLATION_CHUNKING.targetChunkDurationSeconds
|
|
5181
|
+
);
|
|
5182
|
+
return {
|
|
5183
|
+
enabled: options?.enabled ?? DEFAULT_TRANSLATION_CHUNKING.enabled,
|
|
5184
|
+
minimumAssetDurationSeconds: Math.max(
|
|
5185
|
+
1,
|
|
5186
|
+
options?.minimumAssetDurationSeconds ?? DEFAULT_TRANSLATION_CHUNKING.minimumAssetDurationSeconds
|
|
5187
|
+
),
|
|
5188
|
+
targetChunkDurationSeconds,
|
|
5189
|
+
maxConcurrentTranslations: Math.max(
|
|
5190
|
+
1,
|
|
5191
|
+
options?.maxConcurrentTranslations ?? DEFAULT_TRANSLATION_CHUNKING.maxConcurrentTranslations
|
|
5192
|
+
),
|
|
5193
|
+
maxCuesPerChunk: Math.max(
|
|
5194
|
+
1,
|
|
5195
|
+
options?.maxCuesPerChunk ?? DEFAULT_TRANSLATION_CHUNKING.maxCuesPerChunk
|
|
5196
|
+
),
|
|
5197
|
+
maxCueTextTokensPerChunk: Math.max(
|
|
5198
|
+
1,
|
|
5199
|
+
options?.maxCueTextTokensPerChunk ?? DEFAULT_TRANSLATION_CHUNKING.maxCueTextTokensPerChunk
|
|
5200
|
+
)
|
|
5201
|
+
};
|
|
5202
|
+
}
|
|
5203
|
+
function aggregateTokenUsage(usages) {
|
|
5204
|
+
return TOKEN_USAGE_FIELDS.reduce((aggregate, field) => {
|
|
5205
|
+
const values = usages.map((usage) => usage[field]).filter(isDefinedTokenUsageValue);
|
|
5206
|
+
if (values.length > 0) {
|
|
5207
|
+
aggregate[field] = values.reduce((total, value) => total + value, 0);
|
|
5208
|
+
}
|
|
5209
|
+
return aggregate;
|
|
5210
|
+
}, {});
|
|
5211
|
+
}
|
|
5212
|
+
function createTranslationChunkRequest(id, cues, cueBlocks) {
|
|
5213
|
+
return {
|
|
5214
|
+
id,
|
|
5215
|
+
cueCount: cues.length,
|
|
5216
|
+
startTime: cues[0].startTime,
|
|
5217
|
+
endTime: cues[cues.length - 1].endTime,
|
|
5218
|
+
cues,
|
|
5219
|
+
cueBlocks
|
|
5220
|
+
};
|
|
5221
|
+
}
|
|
5222
|
+
function splitTranslationChunkRequestByBudget(id, cues, cueBlocks, maxCuesPerChunk, maxCueTextTokensPerChunk) {
|
|
5223
|
+
const chunks = chunkVTTCuesByBudget(cues, {
|
|
5224
|
+
maxCuesPerChunk,
|
|
5225
|
+
maxTextTokensPerChunk: maxCueTextTokensPerChunk
|
|
5226
|
+
});
|
|
5227
|
+
return chunks.map(
|
|
5228
|
+
(chunk, index) => createTranslationChunkRequest(
|
|
5229
|
+
chunks.length === 1 ? id : `${id}-part-${index}`,
|
|
5230
|
+
cues.slice(chunk.cueStartIndex, chunk.cueEndIndex + 1),
|
|
5231
|
+
cueBlocks.slice(chunk.cueStartIndex, chunk.cueEndIndex + 1)
|
|
5232
|
+
)
|
|
5233
|
+
);
|
|
5234
|
+
}
|
|
5235
|
+
function buildTranslationChunkRequests(vttContent, assetDurationSeconds, chunkingOptions) {
|
|
5236
|
+
const resolvedChunking = resolveTranslationChunkingOptions(chunkingOptions);
|
|
5237
|
+
const cues = parseVTTCues(vttContent);
|
|
5238
|
+
if (cues.length === 0) {
|
|
5239
|
+
return null;
|
|
5240
|
+
}
|
|
5241
|
+
const { preamble, cueBlocks } = splitVttPreambleAndCueBlocks(vttContent);
|
|
5242
|
+
if (cueBlocks.length !== cues.length) {
|
|
5243
|
+
console.warn(
|
|
5244
|
+
`Falling back to full-VTT caption translation because cue block count (${cueBlocks.length}) does not match parsed cue count (${cues.length}).`
|
|
5245
|
+
);
|
|
5246
|
+
return null;
|
|
5247
|
+
}
|
|
5248
|
+
if (!resolvedChunking.enabled) {
|
|
5249
|
+
return {
|
|
5250
|
+
preamble,
|
|
5251
|
+
chunks: [
|
|
5252
|
+
createTranslationChunkRequest("chunk-0", cues, cueBlocks)
|
|
5253
|
+
]
|
|
5254
|
+
};
|
|
5255
|
+
}
|
|
5256
|
+
if (typeof assetDurationSeconds !== "number" || assetDurationSeconds < resolvedChunking.minimumAssetDurationSeconds) {
|
|
5257
|
+
return {
|
|
5258
|
+
preamble,
|
|
5259
|
+
chunks: [
|
|
5260
|
+
createTranslationChunkRequest("chunk-0", cues, cueBlocks)
|
|
5261
|
+
]
|
|
5262
|
+
};
|
|
5263
|
+
}
|
|
5264
|
+
const targetChunkDurationSeconds = resolvedChunking.targetChunkDurationSeconds;
|
|
5265
|
+
const durationChunks = chunkVTTCuesByDuration(cues, {
|
|
5266
|
+
targetChunkDurationSeconds,
|
|
5267
|
+
maxChunkDurationSeconds: Math.max(targetChunkDurationSeconds, Math.round(targetChunkDurationSeconds * (7 / 6))),
|
|
5268
|
+
minChunkDurationSeconds: Math.max(1, Math.round(targetChunkDurationSeconds * (2 / 3)))
|
|
5269
|
+
});
|
|
5270
|
+
return {
|
|
5271
|
+
preamble,
|
|
5272
|
+
chunks: durationChunks.flatMap(
|
|
5273
|
+
(chunk) => splitTranslationChunkRequestByBudget(
|
|
5274
|
+
chunk.id,
|
|
5275
|
+
cues.slice(chunk.cueStartIndex, chunk.cueEndIndex + 1),
|
|
5276
|
+
cueBlocks.slice(chunk.cueStartIndex, chunk.cueEndIndex + 1),
|
|
5277
|
+
resolvedChunking.maxCuesPerChunk,
|
|
5278
|
+
resolvedChunking.maxCueTextTokensPerChunk
|
|
5279
|
+
)
|
|
5280
|
+
)
|
|
5281
|
+
};
|
|
4213
5282
|
}
|
|
4214
5283
|
async function translateVttWithAI({
|
|
4215
5284
|
vttContent,
|
|
@@ -4221,13 +5290,13 @@ async function translateVttWithAI({
|
|
|
4221
5290
|
}) {
|
|
4222
5291
|
"use step";
|
|
4223
5292
|
const model = await createLanguageModelFromConfig(provider, modelId, credentials);
|
|
4224
|
-
const response = await
|
|
5293
|
+
const response = await generateText6({
|
|
4225
5294
|
model,
|
|
4226
|
-
output:
|
|
5295
|
+
output: Output6.object({ schema: translationSchema }),
|
|
4227
5296
|
messages: [
|
|
4228
5297
|
{
|
|
4229
5298
|
role: "system",
|
|
4230
|
-
content:
|
|
5299
|
+
content: SYSTEM_PROMPT5
|
|
4231
5300
|
},
|
|
4232
5301
|
{
|
|
4233
5302
|
role: "user",
|
|
@@ -4248,6 +5317,176 @@ ${vttContent}`
|
|
|
4248
5317
|
}
|
|
4249
5318
|
};
|
|
4250
5319
|
}
|
|
5320
|
+
async function translateCueChunkWithAI({
|
|
5321
|
+
cues,
|
|
5322
|
+
fromLanguageCode,
|
|
5323
|
+
toLanguageCode,
|
|
5324
|
+
provider,
|
|
5325
|
+
modelId,
|
|
5326
|
+
credentials
|
|
5327
|
+
}) {
|
|
5328
|
+
"use step";
|
|
5329
|
+
const model = await createLanguageModelFromConfig(provider, modelId, credentials);
|
|
5330
|
+
const schema = z7.object({
|
|
5331
|
+
translations: z7.array(z7.string().min(1)).length(cues.length)
|
|
5332
|
+
});
|
|
5333
|
+
const cuePayload = cues.map((cue, index) => ({
|
|
5334
|
+
index,
|
|
5335
|
+
startTime: cue.startTime,
|
|
5336
|
+
endTime: cue.endTime,
|
|
5337
|
+
text: cue.text
|
|
5338
|
+
}));
|
|
5339
|
+
const response = await generateText6({
|
|
5340
|
+
model,
|
|
5341
|
+
output: Output6.object({ schema }),
|
|
5342
|
+
messages: [
|
|
5343
|
+
{
|
|
5344
|
+
role: "system",
|
|
5345
|
+
content: CUE_TRANSLATION_SYSTEM_PROMPT
|
|
5346
|
+
},
|
|
5347
|
+
{
|
|
5348
|
+
role: "user",
|
|
5349
|
+
content: `Translate from ${fromLanguageCode} to ${toLanguageCode}.
|
|
5350
|
+
Return exactly ${cues.length} translated cues in the same order as the input.
|
|
5351
|
+
|
|
5352
|
+
${JSON.stringify(cuePayload, null, 2)}`
|
|
5353
|
+
}
|
|
5354
|
+
]
|
|
5355
|
+
});
|
|
5356
|
+
return {
|
|
5357
|
+
translations: response.output.translations,
|
|
5358
|
+
usage: {
|
|
5359
|
+
inputTokens: response.usage.inputTokens,
|
|
5360
|
+
outputTokens: response.usage.outputTokens,
|
|
5361
|
+
totalTokens: response.usage.totalTokens,
|
|
5362
|
+
reasoningTokens: response.usage.reasoningTokens,
|
|
5363
|
+
cachedInputTokens: response.usage.cachedInputTokens
|
|
5364
|
+
}
|
|
5365
|
+
};
|
|
5366
|
+
}
|
|
5367
|
+
function splitTranslationChunkAtMidpoint(chunk) {
|
|
5368
|
+
const midpoint = Math.floor(chunk.cueCount / 2);
|
|
5369
|
+
if (midpoint <= 0 || midpoint >= chunk.cueCount) {
|
|
5370
|
+
throw new Error(`Cannot split chunk ${chunk.id} with cueCount=${chunk.cueCount}`);
|
|
5371
|
+
}
|
|
5372
|
+
return [
|
|
5373
|
+
createTranslationChunkRequest(
|
|
5374
|
+
`${chunk.id}-a`,
|
|
5375
|
+
chunk.cues.slice(0, midpoint),
|
|
5376
|
+
chunk.cueBlocks.slice(0, midpoint)
|
|
5377
|
+
),
|
|
5378
|
+
createTranslationChunkRequest(
|
|
5379
|
+
`${chunk.id}-b`,
|
|
5380
|
+
chunk.cues.slice(midpoint),
|
|
5381
|
+
chunk.cueBlocks.slice(midpoint)
|
|
5382
|
+
)
|
|
5383
|
+
];
|
|
5384
|
+
}
|
|
5385
|
+
async function translateChunkWithFallback({
|
|
5386
|
+
chunk,
|
|
5387
|
+
fromLanguageCode,
|
|
5388
|
+
toLanguageCode,
|
|
5389
|
+
provider,
|
|
5390
|
+
modelId,
|
|
5391
|
+
credentials
|
|
5392
|
+
}) {
|
|
5393
|
+
"use step";
|
|
5394
|
+
try {
|
|
5395
|
+
const result = await translateCueChunkWithAI({
|
|
5396
|
+
cues: chunk.cues,
|
|
5397
|
+
fromLanguageCode,
|
|
5398
|
+
toLanguageCode,
|
|
5399
|
+
provider,
|
|
5400
|
+
modelId,
|
|
5401
|
+
credentials
|
|
5402
|
+
});
|
|
5403
|
+
if (result.translations.length !== chunk.cueCount) {
|
|
5404
|
+
throw new TranslationChunkValidationError(
|
|
5405
|
+
`Chunk ${chunk.id} returned ${result.translations.length} cues, expected ${chunk.cueCount} for ${Math.round(chunk.startTime)}s-${Math.round(chunk.endTime)}s`
|
|
5406
|
+
);
|
|
5407
|
+
}
|
|
5408
|
+
return {
|
|
5409
|
+
translatedVtt: buildVttFromTranslatedCueBlocks(chunk.cueBlocks, result.translations),
|
|
5410
|
+
usage: result.usage
|
|
5411
|
+
};
|
|
5412
|
+
} catch (error) {
|
|
5413
|
+
if (!shouldSplitChunkTranslationError(error) || chunk.cueCount <= 1) {
|
|
5414
|
+
throw new Error(
|
|
5415
|
+
`Chunk ${chunk.id} failed for ${Math.round(chunk.startTime)}s-${Math.round(chunk.endTime)}s: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
5416
|
+
);
|
|
5417
|
+
}
|
|
5418
|
+
const [leftChunk, rightChunk] = splitTranslationChunkAtMidpoint(chunk);
|
|
5419
|
+
const [leftResult, rightResult] = await Promise.all([
|
|
5420
|
+
translateChunkWithFallback({
|
|
5421
|
+
chunk: leftChunk,
|
|
5422
|
+
fromLanguageCode,
|
|
5423
|
+
toLanguageCode,
|
|
5424
|
+
provider,
|
|
5425
|
+
modelId,
|
|
5426
|
+
credentials
|
|
5427
|
+
}),
|
|
5428
|
+
translateChunkWithFallback({
|
|
5429
|
+
chunk: rightChunk,
|
|
5430
|
+
fromLanguageCode,
|
|
5431
|
+
toLanguageCode,
|
|
5432
|
+
provider,
|
|
5433
|
+
modelId,
|
|
5434
|
+
credentials
|
|
5435
|
+
})
|
|
5436
|
+
]);
|
|
5437
|
+
return {
|
|
5438
|
+
translatedVtt: concatenateVttSegments([leftResult.translatedVtt, rightResult.translatedVtt]),
|
|
5439
|
+
usage: aggregateTokenUsage([leftResult.usage, rightResult.usage])
|
|
5440
|
+
};
|
|
5441
|
+
}
|
|
5442
|
+
}
|
|
5443
|
+
async function translateCaptionTrack({
|
|
5444
|
+
vttContent,
|
|
5445
|
+
assetDurationSeconds,
|
|
5446
|
+
fromLanguageCode,
|
|
5447
|
+
toLanguageCode,
|
|
5448
|
+
provider,
|
|
5449
|
+
modelId,
|
|
5450
|
+
credentials,
|
|
5451
|
+
chunking
|
|
5452
|
+
}) {
|
|
5453
|
+
"use step";
|
|
5454
|
+
const chunkPlan = buildTranslationChunkRequests(vttContent, assetDurationSeconds, chunking);
|
|
5455
|
+
if (!chunkPlan) {
|
|
5456
|
+
return translateVttWithAI({
|
|
5457
|
+
vttContent,
|
|
5458
|
+
fromLanguageCode,
|
|
5459
|
+
toLanguageCode,
|
|
5460
|
+
provider,
|
|
5461
|
+
modelId,
|
|
5462
|
+
credentials
|
|
5463
|
+
});
|
|
5464
|
+
}
|
|
5465
|
+
const resolvedChunking = resolveTranslationChunkingOptions(chunking);
|
|
5466
|
+
const translatedSegments = [];
|
|
5467
|
+
const usageByChunk = [];
|
|
5468
|
+
for (let index = 0; index < chunkPlan.chunks.length; index += resolvedChunking.maxConcurrentTranslations) {
|
|
5469
|
+
const batch = chunkPlan.chunks.slice(index, index + resolvedChunking.maxConcurrentTranslations);
|
|
5470
|
+
const batchResults = await Promise.all(
|
|
5471
|
+
batch.map(
|
|
5472
|
+
(chunk) => translateChunkWithFallback({
|
|
5473
|
+
chunk,
|
|
5474
|
+
fromLanguageCode,
|
|
5475
|
+
toLanguageCode,
|
|
5476
|
+
provider,
|
|
5477
|
+
modelId,
|
|
5478
|
+
credentials
|
|
5479
|
+
})
|
|
5480
|
+
)
|
|
5481
|
+
);
|
|
5482
|
+
translatedSegments.push(...batchResults.map((result) => result.translatedVtt));
|
|
5483
|
+
usageByChunk.push(...batchResults.map((result) => result.usage));
|
|
5484
|
+
}
|
|
5485
|
+
return {
|
|
5486
|
+
translatedVtt: concatenateVttSegments(translatedSegments, chunkPlan.preamble),
|
|
5487
|
+
usage: aggregateTokenUsage(usageByChunk)
|
|
5488
|
+
};
|
|
5489
|
+
}
|
|
4251
5490
|
async function uploadVttToS3({
|
|
4252
5491
|
translatedVtt,
|
|
4253
5492
|
assetId,
|
|
@@ -4256,7 +5495,8 @@ async function uploadVttToS3({
|
|
|
4256
5495
|
s3Endpoint,
|
|
4257
5496
|
s3Region,
|
|
4258
5497
|
s3Bucket,
|
|
4259
|
-
storageAdapter
|
|
5498
|
+
storageAdapter,
|
|
5499
|
+
s3SignedUrlExpirySeconds
|
|
4260
5500
|
}) {
|
|
4261
5501
|
"use step";
|
|
4262
5502
|
const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
|
|
@@ -4279,25 +5519,9 @@ async function uploadVttToS3({
|
|
|
4279
5519
|
region: s3Region,
|
|
4280
5520
|
bucket: s3Bucket,
|
|
4281
5521
|
key: vttKey,
|
|
4282
|
-
expiresInSeconds:
|
|
5522
|
+
expiresInSeconds: s3SignedUrlExpirySeconds ?? 86400
|
|
4283
5523
|
}, storageAdapter);
|
|
4284
5524
|
}
|
|
4285
|
-
async function createTextTrackOnMux(assetId, languageCode, trackName, presignedUrl, credentials) {
|
|
4286
|
-
"use step";
|
|
4287
|
-
const muxClient = await resolveMuxClient(credentials);
|
|
4288
|
-
const mux = await muxClient.createClient();
|
|
4289
|
-
const trackResponse = await mux.video.assets.createTrack(assetId, {
|
|
4290
|
-
type: "text",
|
|
4291
|
-
text_type: "subtitles",
|
|
4292
|
-
language_code: languageCode,
|
|
4293
|
-
name: trackName,
|
|
4294
|
-
url: presignedUrl
|
|
4295
|
-
});
|
|
4296
|
-
if (!trackResponse.id) {
|
|
4297
|
-
throw new Error("Failed to create text track: no track ID returned from Mux");
|
|
4298
|
-
}
|
|
4299
|
-
return trackResponse.id;
|
|
4300
|
-
}
|
|
4301
5525
|
async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, options) {
|
|
4302
5526
|
"use workflow";
|
|
4303
5527
|
const {
|
|
@@ -4308,7 +5532,8 @@ async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, opti
|
|
|
4308
5532
|
s3Bucket: providedS3Bucket,
|
|
4309
5533
|
uploadToMux: uploadToMuxOption,
|
|
4310
5534
|
storageAdapter,
|
|
4311
|
-
credentials: providedCredentials
|
|
5535
|
+
credentials: providedCredentials,
|
|
5536
|
+
chunking
|
|
4312
5537
|
} = options;
|
|
4313
5538
|
const credentials = providedCredentials;
|
|
4314
5539
|
const effectiveStorageAdapter = storageAdapter;
|
|
@@ -4369,13 +5594,15 @@ async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, opti
|
|
|
4369
5594
|
let translatedVtt;
|
|
4370
5595
|
let usage;
|
|
4371
5596
|
try {
|
|
4372
|
-
const result = await
|
|
5597
|
+
const result = await translateCaptionTrack({
|
|
4373
5598
|
vttContent,
|
|
5599
|
+
assetDurationSeconds,
|
|
4374
5600
|
fromLanguageCode,
|
|
4375
5601
|
toLanguageCode,
|
|
4376
5602
|
provider: modelConfig.provider,
|
|
4377
5603
|
modelId: modelConfig.modelId,
|
|
4378
|
-
credentials
|
|
5604
|
+
credentials,
|
|
5605
|
+
chunking
|
|
4379
5606
|
});
|
|
4380
5607
|
translatedVtt = result.translatedVtt;
|
|
4381
5608
|
usage = result.usage;
|
|
@@ -4412,7 +5639,8 @@ async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, opti
|
|
|
4412
5639
|
s3Endpoint,
|
|
4413
5640
|
s3Region,
|
|
4414
5641
|
s3Bucket,
|
|
4415
|
-
storageAdapter: effectiveStorageAdapter
|
|
5642
|
+
storageAdapter: effectiveStorageAdapter,
|
|
5643
|
+
s3SignedUrlExpirySeconds: options.s3SignedUrlExpirySeconds
|
|
4416
5644
|
});
|
|
4417
5645
|
} catch (error) {
|
|
4418
5646
|
throw new Error(`Failed to upload VTT to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
|