@mux/ai 0.8.2 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{index-Nxf6BaBO.d.ts → index-C8-E3VR9.d.ts} +59 -4
- package/dist/{index-DP02N3iR.d.ts → index-CA7bG50u.d.ts} +41 -2
- package/dist/index.d.ts +3 -3
- package/dist/index.js +908 -199
- package/dist/index.js.map +1 -1
- package/dist/primitives/index.d.ts +1 -1
- package/dist/primitives/index.js +336 -14
- package/dist/primitives/index.js.map +1 -1
- package/dist/workflows/index.d.ts +1 -1
- package/dist/workflows/index.js +900 -198
- package/dist/workflows/index.js.map +1 -1
- package/package.json +1 -1
package/dist/workflows/index.js
CHANGED
|
@@ -837,6 +837,12 @@ function createToneSection(instruction) {
|
|
|
837
837
|
content: instruction
|
|
838
838
|
};
|
|
839
839
|
}
|
|
840
|
+
function createLanguageSection(languageName) {
|
|
841
|
+
return {
|
|
842
|
+
tag: "language",
|
|
843
|
+
content: `All output (title, description, keywords, chapter titles) MUST be written in ${languageName}.`
|
|
844
|
+
};
|
|
845
|
+
}
|
|
840
846
|
|
|
841
847
|
// src/lib/retry.ts
|
|
842
848
|
var DEFAULT_RETRY_OPTIONS = {
|
|
@@ -981,24 +987,82 @@ function findCaptionTrack(asset, languageCode) {
|
|
|
981
987
|
(track) => track.text_type === "subtitles" && track.language_code === languageCode
|
|
982
988
|
);
|
|
983
989
|
}
|
|
990
|
+
function normalizeLineEndings(value) {
|
|
991
|
+
return value.replace(/\r\n/g, "\n");
|
|
992
|
+
}
|
|
993
|
+
function isTimingLine(line) {
|
|
994
|
+
return line.includes("-->");
|
|
995
|
+
}
|
|
996
|
+
function parseNumericCueIdentifier(line) {
|
|
997
|
+
if (!/^\d+$/.test(line)) {
|
|
998
|
+
return null;
|
|
999
|
+
}
|
|
1000
|
+
return Number.parseInt(line, 10);
|
|
1001
|
+
}
|
|
1002
|
+
function isLikelyTitledCueIdentifier(line) {
|
|
1003
|
+
return /^\d+\s+-\s+\S.*$/.test(line);
|
|
1004
|
+
}
|
|
1005
|
+
function isLikelyCueIdentifier({
|
|
1006
|
+
line,
|
|
1007
|
+
nextLine,
|
|
1008
|
+
previousCueIdentifier
|
|
1009
|
+
}) {
|
|
1010
|
+
if (!line || !nextLine || !isTimingLine(nextLine)) {
|
|
1011
|
+
return false;
|
|
1012
|
+
}
|
|
1013
|
+
const numericIdentifier = parseNumericCueIdentifier(line);
|
|
1014
|
+
if (numericIdentifier !== null) {
|
|
1015
|
+
if (previousCueIdentifier === null || previousCueIdentifier === void 0) {
|
|
1016
|
+
return numericIdentifier === 1;
|
|
1017
|
+
}
|
|
1018
|
+
return numericIdentifier === previousCueIdentifier + 1;
|
|
1019
|
+
}
|
|
1020
|
+
return isLikelyTitledCueIdentifier(line);
|
|
1021
|
+
}
|
|
1022
|
+
function getCueIdentifierLineIndex(lines, timingLineIndex, previousCueIdentifier) {
|
|
1023
|
+
const identifierIndex = timingLineIndex - 1;
|
|
1024
|
+
if (identifierIndex < 0) {
|
|
1025
|
+
return -1;
|
|
1026
|
+
}
|
|
1027
|
+
const candidate = lines[identifierIndex].trim();
|
|
1028
|
+
if (!candidate || isTimingLine(candidate)) {
|
|
1029
|
+
return -1;
|
|
1030
|
+
}
|
|
1031
|
+
return isLikelyCueIdentifier({
|
|
1032
|
+
line: candidate,
|
|
1033
|
+
nextLine: lines[timingLineIndex]?.trim(),
|
|
1034
|
+
previousCueIdentifier
|
|
1035
|
+
}) ? identifierIndex : -1;
|
|
1036
|
+
}
|
|
984
1037
|
function extractTextFromVTT(vttContent) {
|
|
985
1038
|
if (!vttContent.trim()) {
|
|
986
1039
|
return "";
|
|
987
1040
|
}
|
|
988
1041
|
const lines = vttContent.split("\n");
|
|
989
1042
|
const textLines = [];
|
|
1043
|
+
let previousCueIdentifier = null;
|
|
1044
|
+
let isInsideNoteBlock = false;
|
|
990
1045
|
for (let i = 0; i < lines.length; i++) {
|
|
991
1046
|
const line = lines[i].trim();
|
|
992
|
-
|
|
1047
|
+
const nextLine = lines[i + 1]?.trim();
|
|
1048
|
+
if (!line) {
|
|
1049
|
+
isInsideNoteBlock = false;
|
|
1050
|
+
continue;
|
|
1051
|
+
}
|
|
1052
|
+
if (isInsideNoteBlock)
|
|
993
1053
|
continue;
|
|
994
1054
|
if (line === "WEBVTT")
|
|
995
1055
|
continue;
|
|
996
|
-
if (line.startsWith("NOTE "))
|
|
1056
|
+
if (line === "NOTE" || line.startsWith("NOTE ")) {
|
|
1057
|
+
isInsideNoteBlock = true;
|
|
997
1058
|
continue;
|
|
998
|
-
|
|
1059
|
+
}
|
|
1060
|
+
if (isTimingLine(line))
|
|
999
1061
|
continue;
|
|
1000
|
-
if (
|
|
1062
|
+
if (isLikelyCueIdentifier({ line, nextLine, previousCueIdentifier })) {
|
|
1063
|
+
previousCueIdentifier = parseNumericCueIdentifier(line);
|
|
1001
1064
|
continue;
|
|
1065
|
+
}
|
|
1002
1066
|
if (line.startsWith("STYLE") || line.startsWith("REGION"))
|
|
1003
1067
|
continue;
|
|
1004
1068
|
const cleanLine = line.replace(/<[^>]*>/g, "").trim();
|
|
@@ -1047,20 +1111,34 @@ function parseVTTCues(vttContent) {
|
|
|
1047
1111
|
return [];
|
|
1048
1112
|
const lines = vttContent.split("\n");
|
|
1049
1113
|
const cues = [];
|
|
1114
|
+
let previousCueIdentifier = null;
|
|
1050
1115
|
for (let i = 0; i < lines.length; i++) {
|
|
1051
1116
|
const line = lines[i].trim();
|
|
1052
|
-
if (line
|
|
1117
|
+
if (isTimingLine(line)) {
|
|
1053
1118
|
const [startStr, endStr] = line.split(" --> ").map((s) => s.trim());
|
|
1054
1119
|
const startTime = vttTimestampToSeconds(startStr);
|
|
1055
1120
|
const endTime = vttTimestampToSeconds(endStr.split(" ")[0]);
|
|
1056
|
-
const
|
|
1121
|
+
const currentCueIdentifierLine = lines[i - 1]?.trim() ?? "";
|
|
1122
|
+
const currentCueIdentifier = isLikelyCueIdentifier({
|
|
1123
|
+
line: currentCueIdentifierLine,
|
|
1124
|
+
nextLine: line,
|
|
1125
|
+
previousCueIdentifier
|
|
1126
|
+
}) ? parseNumericCueIdentifier(currentCueIdentifierLine) : null;
|
|
1127
|
+
const rawTextLines = [];
|
|
1057
1128
|
let j = i + 1;
|
|
1058
|
-
while (j < lines.length && lines[j].trim() && !lines[j].
|
|
1059
|
-
|
|
1060
|
-
if (cleanLine)
|
|
1061
|
-
textLines.push(cleanLine);
|
|
1129
|
+
while (j < lines.length && lines[j].trim() && !isTimingLine(lines[j].trim())) {
|
|
1130
|
+
rawTextLines.push(lines[j].trim());
|
|
1062
1131
|
j++;
|
|
1063
1132
|
}
|
|
1133
|
+
const trailingNumericLine = parseNumericCueIdentifier(rawTextLines.at(-1) ?? "");
|
|
1134
|
+
if (trailingNumericLine !== null && isLikelyCueIdentifier({
|
|
1135
|
+
line: rawTextLines.at(-1) ?? "",
|
|
1136
|
+
nextLine: lines[j]?.trim(),
|
|
1137
|
+
previousCueIdentifier: currentCueIdentifier
|
|
1138
|
+
}) && rawTextLines.length > 1) {
|
|
1139
|
+
rawTextLines.pop();
|
|
1140
|
+
}
|
|
1141
|
+
const textLines = rawTextLines.map((textLine) => textLine.replace(/<[^>]*>/g, "")).filter(Boolean);
|
|
1064
1142
|
if (textLines.length > 0) {
|
|
1065
1143
|
cues.push({
|
|
1066
1144
|
startTime,
|
|
@@ -1068,10 +1146,102 @@ function parseVTTCues(vttContent) {
|
|
|
1068
1146
|
text: textLines.join(" ")
|
|
1069
1147
|
});
|
|
1070
1148
|
}
|
|
1149
|
+
previousCueIdentifier = currentCueIdentifier;
|
|
1071
1150
|
}
|
|
1072
1151
|
}
|
|
1073
1152
|
return cues;
|
|
1074
1153
|
}
|
|
1154
|
+
function splitVttPreambleAndCueBlocks(vttContent) {
|
|
1155
|
+
const normalizedContent = normalizeLineEndings(vttContent).trim();
|
|
1156
|
+
if (!normalizedContent) {
|
|
1157
|
+
return {
|
|
1158
|
+
preamble: "WEBVTT",
|
|
1159
|
+
cueBlocks: []
|
|
1160
|
+
};
|
|
1161
|
+
}
|
|
1162
|
+
const rawBlocks = normalizedContent.split(/\n{2,}/).map((block) => block.trim()).filter(Boolean);
|
|
1163
|
+
const cueBlockStartIndex = rawBlocks.findIndex((block) => block.includes("-->"));
|
|
1164
|
+
if (cueBlockStartIndex === -1) {
|
|
1165
|
+
return {
|
|
1166
|
+
preamble: normalizedContent.startsWith("WEBVTT") ? normalizedContent : `WEBVTT
|
|
1167
|
+
|
|
1168
|
+
${normalizedContent}`,
|
|
1169
|
+
cueBlocks: []
|
|
1170
|
+
};
|
|
1171
|
+
}
|
|
1172
|
+
const hasMergedCueBlocks = rawBlocks.slice(cueBlockStartIndex).some((block) => (block.match(/-->/g) ?? []).length > 1);
|
|
1173
|
+
if (hasMergedCueBlocks) {
|
|
1174
|
+
const lines = normalizedContent.split("\n");
|
|
1175
|
+
const timingLineIndices = lines.map((line, index) => isTimingLine(line.trim()) ? index : -1).filter((index) => index >= 0);
|
|
1176
|
+
let previousCueIdentifier = null;
|
|
1177
|
+
const firstCueStartIndex = getCueIdentifierLineIndex(lines, timingLineIndices[0], previousCueIdentifier);
|
|
1178
|
+
const preambleEndIndex = firstCueStartIndex >= 0 ? firstCueStartIndex : timingLineIndices[0];
|
|
1179
|
+
const preamble2 = lines.slice(0, preambleEndIndex).join("\n").trim() || "WEBVTT";
|
|
1180
|
+
const cueBlocks2 = timingLineIndices.map((timingLineIndex, index) => {
|
|
1181
|
+
const cueIdentifierLineIndex = getCueIdentifierLineIndex(lines, timingLineIndex, previousCueIdentifier);
|
|
1182
|
+
const cueStartIndex = cueIdentifierLineIndex >= 0 ? cueIdentifierLineIndex : timingLineIndex;
|
|
1183
|
+
const currentCueIdentifier = cueIdentifierLineIndex >= 0 ? parseNumericCueIdentifier(lines[cueIdentifierLineIndex].trim()) : null;
|
|
1184
|
+
const nextTimingLineIndex = timingLineIndices[index + 1] ?? lines.length;
|
|
1185
|
+
let cueEndIndex = nextTimingLineIndex - 1;
|
|
1186
|
+
while (cueEndIndex > timingLineIndex && !lines[cueEndIndex].trim()) {
|
|
1187
|
+
cueEndIndex--;
|
|
1188
|
+
}
|
|
1189
|
+
const nextCueIdentifierLineIndex = index < timingLineIndices.length - 1 ? getCueIdentifierLineIndex(lines, nextTimingLineIndex, currentCueIdentifier) : -1;
|
|
1190
|
+
if (nextCueIdentifierLineIndex === cueEndIndex) {
|
|
1191
|
+
cueEndIndex--;
|
|
1192
|
+
}
|
|
1193
|
+
while (cueEndIndex > timingLineIndex && !lines[cueEndIndex].trim()) {
|
|
1194
|
+
cueEndIndex--;
|
|
1195
|
+
}
|
|
1196
|
+
previousCueIdentifier = currentCueIdentifier;
|
|
1197
|
+
return lines.slice(cueStartIndex, cueEndIndex + 1).join("\n").trim();
|
|
1198
|
+
});
|
|
1199
|
+
return {
|
|
1200
|
+
preamble: preamble2,
|
|
1201
|
+
cueBlocks: cueBlocks2
|
|
1202
|
+
};
|
|
1203
|
+
}
|
|
1204
|
+
const preambleBlocks = rawBlocks.slice(0, cueBlockStartIndex);
|
|
1205
|
+
const cueBlocks = rawBlocks.slice(cueBlockStartIndex);
|
|
1206
|
+
const preamble = preambleBlocks.length > 0 ? preambleBlocks.join("\n\n") : "WEBVTT";
|
|
1207
|
+
return {
|
|
1208
|
+
preamble,
|
|
1209
|
+
cueBlocks
|
|
1210
|
+
};
|
|
1211
|
+
}
|
|
1212
|
+
function buildVttFromCueBlocks(cueBlocks, preamble = "WEBVTT") {
|
|
1213
|
+
if (cueBlocks.length === 0) {
|
|
1214
|
+
return `${preamble.trim()}
|
|
1215
|
+
`;
|
|
1216
|
+
}
|
|
1217
|
+
return `${preamble.trim()}
|
|
1218
|
+
|
|
1219
|
+
${cueBlocks.map((block) => block.trim()).join("\n\n")}
|
|
1220
|
+
`;
|
|
1221
|
+
}
|
|
1222
|
+
function replaceCueText(cueBlock, translatedText) {
|
|
1223
|
+
const lines = normalizeLineEndings(cueBlock).split("\n").map((line) => line.trim()).filter(Boolean);
|
|
1224
|
+
const timingLineIndex = lines.findIndex((line) => line.includes("-->"));
|
|
1225
|
+
if (timingLineIndex === -1) {
|
|
1226
|
+
throw new Error("Cue block is missing a timestamp line");
|
|
1227
|
+
}
|
|
1228
|
+
const headerLines = lines.slice(0, timingLineIndex + 1);
|
|
1229
|
+
const translatedLines = normalizeLineEndings(translatedText).split("\n").map((line) => line.trim()).filter(Boolean);
|
|
1230
|
+
return [...headerLines, ...translatedLines].join("\n");
|
|
1231
|
+
}
|
|
1232
|
+
function buildVttFromTranslatedCueBlocks(cueBlocks, translatedTexts, preamble = "WEBVTT") {
|
|
1233
|
+
if (cueBlocks.length !== translatedTexts.length) {
|
|
1234
|
+
throw new Error(`Expected ${cueBlocks.length} translated cues, received ${translatedTexts.length}`);
|
|
1235
|
+
}
|
|
1236
|
+
return buildVttFromCueBlocks(
|
|
1237
|
+
cueBlocks.map((cueBlock, index) => replaceCueText(cueBlock, translatedTexts[index])),
|
|
1238
|
+
preamble
|
|
1239
|
+
);
|
|
1240
|
+
}
|
|
1241
|
+
function concatenateVttSegments(segments, preamble = "WEBVTT") {
|
|
1242
|
+
const cueBlocks = segments.flatMap((segment) => splitVttPreambleAndCueBlocks(segment).cueBlocks);
|
|
1243
|
+
return buildVttFromCueBlocks(cueBlocks, preamble);
|
|
1244
|
+
}
|
|
1075
1245
|
async function buildTranscriptUrl(playbackId, trackId, shouldSign = false, credentials) {
|
|
1076
1246
|
"use step";
|
|
1077
1247
|
const baseUrl = `https://stream.mux.com/${playbackId}/text/${trackId}.vtt`;
|
|
@@ -1200,6 +1370,7 @@ var SYSTEM_PROMPT = dedent`
|
|
|
1200
1370
|
- Only describe observable evidence from frames or transcript
|
|
1201
1371
|
- Do not fabricate details or make unsupported assumptions
|
|
1202
1372
|
- Return structured data matching the requested schema exactly
|
|
1373
|
+
- Provide reasoning in the same language as the question
|
|
1203
1374
|
</constraints>
|
|
1204
1375
|
|
|
1205
1376
|
<language_guidelines>
|
|
@@ -1600,6 +1771,166 @@ async function hasBurnedInCaptions(assetId, options = {}) {
|
|
|
1600
1771
|
import { generateText as generateText3, Output as Output3 } from "ai";
|
|
1601
1772
|
import dedent3 from "dedent";
|
|
1602
1773
|
import { z as z4 } from "zod";
|
|
1774
|
+
|
|
1775
|
+
// src/lib/language-codes.ts
|
|
1776
|
+
var ISO639_1_TO_3 = {
|
|
1777
|
+
// Major world languages
|
|
1778
|
+
en: "eng",
|
|
1779
|
+
// English
|
|
1780
|
+
es: "spa",
|
|
1781
|
+
// Spanish
|
|
1782
|
+
fr: "fra",
|
|
1783
|
+
// French
|
|
1784
|
+
de: "deu",
|
|
1785
|
+
// German
|
|
1786
|
+
it: "ita",
|
|
1787
|
+
// Italian
|
|
1788
|
+
pt: "por",
|
|
1789
|
+
// Portuguese
|
|
1790
|
+
ru: "rus",
|
|
1791
|
+
// Russian
|
|
1792
|
+
zh: "zho",
|
|
1793
|
+
// Chinese
|
|
1794
|
+
ja: "jpn",
|
|
1795
|
+
// Japanese
|
|
1796
|
+
ko: "kor",
|
|
1797
|
+
// Korean
|
|
1798
|
+
ar: "ara",
|
|
1799
|
+
// Arabic
|
|
1800
|
+
hi: "hin",
|
|
1801
|
+
// Hindi
|
|
1802
|
+
// European languages
|
|
1803
|
+
nl: "nld",
|
|
1804
|
+
// Dutch
|
|
1805
|
+
pl: "pol",
|
|
1806
|
+
// Polish
|
|
1807
|
+
sv: "swe",
|
|
1808
|
+
// Swedish
|
|
1809
|
+
da: "dan",
|
|
1810
|
+
// Danish
|
|
1811
|
+
no: "nor",
|
|
1812
|
+
// Norwegian
|
|
1813
|
+
fi: "fin",
|
|
1814
|
+
// Finnish
|
|
1815
|
+
el: "ell",
|
|
1816
|
+
// Greek
|
|
1817
|
+
cs: "ces",
|
|
1818
|
+
// Czech
|
|
1819
|
+
hu: "hun",
|
|
1820
|
+
// Hungarian
|
|
1821
|
+
ro: "ron",
|
|
1822
|
+
// Romanian
|
|
1823
|
+
bg: "bul",
|
|
1824
|
+
// Bulgarian
|
|
1825
|
+
hr: "hrv",
|
|
1826
|
+
// Croatian
|
|
1827
|
+
sk: "slk",
|
|
1828
|
+
// Slovak
|
|
1829
|
+
sl: "slv",
|
|
1830
|
+
// Slovenian
|
|
1831
|
+
uk: "ukr",
|
|
1832
|
+
// Ukrainian
|
|
1833
|
+
tr: "tur",
|
|
1834
|
+
// Turkish
|
|
1835
|
+
// Asian languages
|
|
1836
|
+
th: "tha",
|
|
1837
|
+
// Thai
|
|
1838
|
+
vi: "vie",
|
|
1839
|
+
// Vietnamese
|
|
1840
|
+
id: "ind",
|
|
1841
|
+
// Indonesian
|
|
1842
|
+
ms: "msa",
|
|
1843
|
+
// Malay
|
|
1844
|
+
tl: "tgl",
|
|
1845
|
+
// Tagalog/Filipino
|
|
1846
|
+
// Other languages
|
|
1847
|
+
he: "heb",
|
|
1848
|
+
// Hebrew
|
|
1849
|
+
fa: "fas",
|
|
1850
|
+
// Persian/Farsi
|
|
1851
|
+
bn: "ben",
|
|
1852
|
+
// Bengali
|
|
1853
|
+
ta: "tam",
|
|
1854
|
+
// Tamil
|
|
1855
|
+
te: "tel",
|
|
1856
|
+
// Telugu
|
|
1857
|
+
mr: "mar",
|
|
1858
|
+
// Marathi
|
|
1859
|
+
gu: "guj",
|
|
1860
|
+
// Gujarati
|
|
1861
|
+
kn: "kan",
|
|
1862
|
+
// Kannada
|
|
1863
|
+
ml: "mal",
|
|
1864
|
+
// Malayalam
|
|
1865
|
+
pa: "pan",
|
|
1866
|
+
// Punjabi
|
|
1867
|
+
ur: "urd",
|
|
1868
|
+
// Urdu
|
|
1869
|
+
sw: "swa",
|
|
1870
|
+
// Swahili
|
|
1871
|
+
af: "afr",
|
|
1872
|
+
// Afrikaans
|
|
1873
|
+
ca: "cat",
|
|
1874
|
+
// Catalan
|
|
1875
|
+
eu: "eus",
|
|
1876
|
+
// Basque
|
|
1877
|
+
gl: "glg",
|
|
1878
|
+
// Galician
|
|
1879
|
+
is: "isl",
|
|
1880
|
+
// Icelandic
|
|
1881
|
+
et: "est",
|
|
1882
|
+
// Estonian
|
|
1883
|
+
lv: "lav",
|
|
1884
|
+
// Latvian
|
|
1885
|
+
lt: "lit"
|
|
1886
|
+
// Lithuanian
|
|
1887
|
+
};
|
|
1888
|
+
var ISO639_3_TO_1 = Object.fromEntries(
|
|
1889
|
+
Object.entries(ISO639_1_TO_3).map(([iso1, iso3]) => [iso3, iso1])
|
|
1890
|
+
);
|
|
1891
|
+
function toISO639_3(code) {
|
|
1892
|
+
const normalized = code.toLowerCase().trim();
|
|
1893
|
+
if (normalized.length === 3) {
|
|
1894
|
+
return normalized;
|
|
1895
|
+
}
|
|
1896
|
+
return ISO639_1_TO_3[normalized] ?? normalized;
|
|
1897
|
+
}
|
|
1898
|
+
function toISO639_1(code) {
|
|
1899
|
+
const normalized = code.toLowerCase().trim();
|
|
1900
|
+
if (normalized.length === 2) {
|
|
1901
|
+
return normalized;
|
|
1902
|
+
}
|
|
1903
|
+
return ISO639_3_TO_1[normalized] ?? normalized;
|
|
1904
|
+
}
|
|
1905
|
+
function getLanguageCodePair(code) {
|
|
1906
|
+
const normalized = code.toLowerCase().trim();
|
|
1907
|
+
if (normalized.length === 2) {
|
|
1908
|
+
return {
|
|
1909
|
+
iso639_1: normalized,
|
|
1910
|
+
iso639_3: toISO639_3(normalized)
|
|
1911
|
+
};
|
|
1912
|
+
} else if (normalized.length === 3) {
|
|
1913
|
+
return {
|
|
1914
|
+
iso639_1: toISO639_1(normalized),
|
|
1915
|
+
iso639_3: normalized
|
|
1916
|
+
};
|
|
1917
|
+
}
|
|
1918
|
+
return {
|
|
1919
|
+
iso639_1: normalized,
|
|
1920
|
+
iso639_3: normalized
|
|
1921
|
+
};
|
|
1922
|
+
}
|
|
1923
|
+
function getLanguageName(code) {
|
|
1924
|
+
const iso639_1 = toISO639_1(code);
|
|
1925
|
+
try {
|
|
1926
|
+
const displayNames = new Intl.DisplayNames(["en"], { type: "language" });
|
|
1927
|
+
return displayNames.of(iso639_1) ?? code.toUpperCase();
|
|
1928
|
+
} catch {
|
|
1929
|
+
return code.toUpperCase();
|
|
1930
|
+
}
|
|
1931
|
+
}
|
|
1932
|
+
|
|
1933
|
+
// src/workflows/chapters.ts
|
|
1603
1934
|
var chapterSchema = z4.object({
|
|
1604
1935
|
startTime: z4.number(),
|
|
1605
1936
|
title: z4.string()
|
|
@@ -1660,7 +1991,8 @@ var chapterSystemPromptBuilder = createPromptBuilder({
|
|
|
1660
1991
|
content: dedent3`
|
|
1661
1992
|
- Only use information present in the transcript
|
|
1662
1993
|
- Return structured data that matches the requested JSON schema
|
|
1663
|
-
- Do not add commentary or extra text outside the JSON
|
|
1994
|
+
- Do not add commentary or extra text outside the JSON
|
|
1995
|
+
- When a <language> section is provided, all chapter titles MUST be written in that language`
|
|
1664
1996
|
},
|
|
1665
1997
|
qualityGuidelines: {
|
|
1666
1998
|
tag: "quality_guidelines",
|
|
@@ -1708,7 +2040,7 @@ var chaptersPromptBuilder = createPromptBuilder({
|
|
|
1708
2040
|
content: dedent3`
|
|
1709
2041
|
- Keep titles concise and descriptive
|
|
1710
2042
|
- Avoid filler or generic labels like "Chapter 1"
|
|
1711
|
-
- Use the transcript's
|
|
2043
|
+
- Use the transcript's terminology`
|
|
1712
2044
|
}
|
|
1713
2045
|
},
|
|
1714
2046
|
sectionOrder: ["task", "outputFormat", "chapterGuidelines", "titleGuidelines"]
|
|
@@ -1717,7 +2049,8 @@ function buildUserPrompt3({
|
|
|
1717
2049
|
timestampedTranscript,
|
|
1718
2050
|
promptOverrides,
|
|
1719
2051
|
minChaptersPerHour = 3,
|
|
1720
|
-
maxChaptersPerHour = 8
|
|
2052
|
+
maxChaptersPerHour = 8,
|
|
2053
|
+
languageName
|
|
1721
2054
|
}) {
|
|
1722
2055
|
const contextSections = [
|
|
1723
2056
|
{
|
|
@@ -1726,6 +2059,9 @@ function buildUserPrompt3({
|
|
|
1726
2059
|
attributes: { format: "seconds" }
|
|
1727
2060
|
}
|
|
1728
2061
|
];
|
|
2062
|
+
if (languageName) {
|
|
2063
|
+
contextSections.push(createLanguageSection(languageName));
|
|
2064
|
+
}
|
|
1729
2065
|
const dynamicChapterGuidelines = dedent3`
|
|
1730
2066
|
- Create at least ${minChaptersPerHour} and at most ${maxChaptersPerHour} chapters per hour of content
|
|
1731
2067
|
- Use start times in seconds (not HH:MM:SS)
|
|
@@ -1745,7 +2081,8 @@ async function generateChapters(assetId, languageCode, options = {}) {
|
|
|
1745
2081
|
promptOverrides,
|
|
1746
2082
|
minChaptersPerHour,
|
|
1747
2083
|
maxChaptersPerHour,
|
|
1748
|
-
credentials
|
|
2084
|
+
credentials,
|
|
2085
|
+
outputLanguageCode
|
|
1749
2086
|
} = options;
|
|
1750
2087
|
const modelConfig = resolveLanguageModelConfig({
|
|
1751
2088
|
...options,
|
|
@@ -1789,11 +2126,14 @@ async function generateChapters(assetId, languageCode, options = {}) {
|
|
|
1789
2126
|
const contentLabel = isAudioOnly ? "transcript" : "caption track";
|
|
1790
2127
|
throw new Error(`No usable content found in ${contentLabel}`);
|
|
1791
2128
|
}
|
|
2129
|
+
const resolvedLanguageCode = outputLanguageCode && outputLanguageCode !== "auto" ? outputLanguageCode : transcriptResult.track?.language_code ?? languageCode;
|
|
2130
|
+
const languageName = resolvedLanguageCode ? getLanguageName(resolvedLanguageCode) : void 0;
|
|
1792
2131
|
const userPrompt = buildUserPrompt3({
|
|
1793
2132
|
timestampedTranscript,
|
|
1794
2133
|
promptOverrides,
|
|
1795
2134
|
minChaptersPerHour,
|
|
1796
|
-
maxChaptersPerHour
|
|
2135
|
+
maxChaptersPerHour,
|
|
2136
|
+
languageName
|
|
1797
2137
|
});
|
|
1798
2138
|
let chaptersData = null;
|
|
1799
2139
|
try {
|
|
@@ -1840,6 +2180,14 @@ async function generateChapters(assetId, languageCode, options = {}) {
|
|
|
1840
2180
|
import { embed } from "ai";
|
|
1841
2181
|
|
|
1842
2182
|
// src/primitives/text-chunking.ts
|
|
2183
|
+
var DEFAULT_MIN_CHUNK_DURATION_RATIO = 2 / 3;
|
|
2184
|
+
var DEFAULT_BOUNDARY_LOOKAHEAD_CUES = 12;
|
|
2185
|
+
var DEFAULT_BOUNDARY_PAUSE_SECONDS = 1.25;
|
|
2186
|
+
var STRONG_BOUNDARY_SCORE = 4;
|
|
2187
|
+
var PREFERRED_BOUNDARY_WINDOW_SECONDS = 5 * 60;
|
|
2188
|
+
var SENTENCE_BOUNDARY_REGEX = /[.!?]["')\]]*$/;
|
|
2189
|
+
var CLAUSE_BOUNDARY_REGEX = /[,;:]["')\]]*$/;
|
|
2190
|
+
var NEXT_SENTENCE_START_REGEX = /^[A-Z0-9"'([{]/;
|
|
1843
2191
|
function estimateTokenCount(text) {
|
|
1844
2192
|
const words = text.trim().split(/\s+/).length;
|
|
1845
2193
|
return Math.ceil(words / 0.75);
|
|
@@ -1912,6 +2260,151 @@ function chunkVTTCues(cues, maxTokens, overlapCues = 2) {
|
|
|
1912
2260
|
}
|
|
1913
2261
|
return chunks;
|
|
1914
2262
|
}
|
|
2263
|
+
function scoreCueBoundary(cues, index, boundaryPauseSeconds) {
|
|
2264
|
+
const cue = cues[index];
|
|
2265
|
+
const nextCue = cues[index + 1];
|
|
2266
|
+
if (!nextCue) {
|
|
2267
|
+
return Number.POSITIVE_INFINITY;
|
|
2268
|
+
}
|
|
2269
|
+
const trimmedText = cue.text.trim();
|
|
2270
|
+
let score = 0;
|
|
2271
|
+
if (SENTENCE_BOUNDARY_REGEX.test(trimmedText)) {
|
|
2272
|
+
score += 4;
|
|
2273
|
+
} else if (CLAUSE_BOUNDARY_REGEX.test(trimmedText)) {
|
|
2274
|
+
score += 2;
|
|
2275
|
+
}
|
|
2276
|
+
if (nextCue.startTime - cue.endTime >= boundaryPauseSeconds) {
|
|
2277
|
+
score += 2;
|
|
2278
|
+
}
|
|
2279
|
+
if (NEXT_SENTENCE_START_REGEX.test(nextCue.text.trim())) {
|
|
2280
|
+
score += 1;
|
|
2281
|
+
}
|
|
2282
|
+
return score;
|
|
2283
|
+
}
|
|
2284
|
+
function chunkVTTCuesByBudget(cues, options) {
|
|
2285
|
+
if (cues.length === 0) {
|
|
2286
|
+
return [];
|
|
2287
|
+
}
|
|
2288
|
+
const maxCuesPerChunk = Math.max(1, options.maxCuesPerChunk);
|
|
2289
|
+
let maxTextTokensPerChunk = Number.POSITIVE_INFINITY;
|
|
2290
|
+
if (options.maxTextTokensPerChunk) {
|
|
2291
|
+
maxTextTokensPerChunk = Math.max(1, options.maxTextTokensPerChunk);
|
|
2292
|
+
}
|
|
2293
|
+
const chunks = [];
|
|
2294
|
+
let chunkIndex = 0;
|
|
2295
|
+
let cueStartIndex = 0;
|
|
2296
|
+
let currentTokenCount = 0;
|
|
2297
|
+
for (let cueIndex = 0; cueIndex < cues.length; cueIndex++) {
|
|
2298
|
+
const cue = cues[cueIndex];
|
|
2299
|
+
const cueTokenCount = estimateTokenCount(cue.text);
|
|
2300
|
+
const currentCueCount = cueIndex - cueStartIndex;
|
|
2301
|
+
const wouldExceedCueCount = currentCueCount >= maxCuesPerChunk;
|
|
2302
|
+
const wouldExceedTokenCount = currentCueCount > 0 && currentTokenCount + cueTokenCount > maxTextTokensPerChunk;
|
|
2303
|
+
if (wouldExceedCueCount || wouldExceedTokenCount) {
|
|
2304
|
+
chunks.push({
|
|
2305
|
+
id: `chunk-${chunkIndex}`,
|
|
2306
|
+
cueStartIndex,
|
|
2307
|
+
cueEndIndex: cueIndex - 1,
|
|
2308
|
+
cueCount: cueIndex - cueStartIndex,
|
|
2309
|
+
startTime: cues[cueStartIndex].startTime,
|
|
2310
|
+
endTime: cues[cueIndex - 1].endTime
|
|
2311
|
+
});
|
|
2312
|
+
cueStartIndex = cueIndex;
|
|
2313
|
+
currentTokenCount = 0;
|
|
2314
|
+
chunkIndex++;
|
|
2315
|
+
}
|
|
2316
|
+
currentTokenCount += cueTokenCount;
|
|
2317
|
+
}
|
|
2318
|
+
chunks.push({
|
|
2319
|
+
id: `chunk-${chunkIndex}`,
|
|
2320
|
+
cueStartIndex,
|
|
2321
|
+
cueEndIndex: cues.length - 1,
|
|
2322
|
+
cueCount: cues.length - cueStartIndex,
|
|
2323
|
+
startTime: cues[cueStartIndex].startTime,
|
|
2324
|
+
endTime: cues[cues.length - 1].endTime
|
|
2325
|
+
});
|
|
2326
|
+
return chunks;
|
|
2327
|
+
}
|
|
2328
|
+
function chunkVTTCuesByDuration(cues, options) {
|
|
2329
|
+
if (cues.length === 0) {
|
|
2330
|
+
return [];
|
|
2331
|
+
}
|
|
2332
|
+
const targetChunkDurationSeconds = Math.max(1, options.targetChunkDurationSeconds);
|
|
2333
|
+
const maxChunkDurationSeconds = Math.max(targetChunkDurationSeconds, options.maxChunkDurationSeconds);
|
|
2334
|
+
const minChunkDurationSeconds = Math.min(
|
|
2335
|
+
targetChunkDurationSeconds,
|
|
2336
|
+
Math.max(
|
|
2337
|
+
1,
|
|
2338
|
+
options.minChunkDurationSeconds ?? Math.floor(targetChunkDurationSeconds * DEFAULT_MIN_CHUNK_DURATION_RATIO)
|
|
2339
|
+
)
|
|
2340
|
+
);
|
|
2341
|
+
const boundaryLookaheadCues = Math.max(1, options.boundaryLookaheadCues ?? DEFAULT_BOUNDARY_LOOKAHEAD_CUES);
|
|
2342
|
+
const boundaryPauseSeconds = options.boundaryPauseSeconds ?? DEFAULT_BOUNDARY_PAUSE_SECONDS;
|
|
2343
|
+
const preferredBoundaryStartSeconds = Math.max(
|
|
2344
|
+
minChunkDurationSeconds,
|
|
2345
|
+
targetChunkDurationSeconds - Math.min(PREFERRED_BOUNDARY_WINDOW_SECONDS, targetChunkDurationSeconds / 6)
|
|
2346
|
+
);
|
|
2347
|
+
const chunks = [];
|
|
2348
|
+
let chunkIndex = 0;
|
|
2349
|
+
let cueStartIndex = 0;
|
|
2350
|
+
while (cueStartIndex < cues.length) {
|
|
2351
|
+
const chunkStartTime = cues[cueStartIndex].startTime;
|
|
2352
|
+
let cueEndIndex = cueStartIndex;
|
|
2353
|
+
let bestBoundaryIndex = -1;
|
|
2354
|
+
let bestBoundaryScore = -1;
|
|
2355
|
+
let bestPreferredBoundaryIndex = -1;
|
|
2356
|
+
let bestPreferredBoundaryScore = -1;
|
|
2357
|
+
while (cueEndIndex < cues.length) {
|
|
2358
|
+
const cue = cues[cueEndIndex];
|
|
2359
|
+
const currentDuration = cue.endTime - chunkStartTime;
|
|
2360
|
+
if (currentDuration >= minChunkDurationSeconds) {
|
|
2361
|
+
const boundaryScore = scoreCueBoundary(cues, cueEndIndex, boundaryPauseSeconds);
|
|
2362
|
+
if (boundaryScore >= bestBoundaryScore) {
|
|
2363
|
+
bestBoundaryIndex = cueEndIndex;
|
|
2364
|
+
bestBoundaryScore = boundaryScore;
|
|
2365
|
+
}
|
|
2366
|
+
if (currentDuration >= preferredBoundaryStartSeconds && boundaryScore >= bestPreferredBoundaryScore) {
|
|
2367
|
+
bestPreferredBoundaryIndex = cueEndIndex;
|
|
2368
|
+
bestPreferredBoundaryScore = boundaryScore;
|
|
2369
|
+
}
|
|
2370
|
+
}
|
|
2371
|
+
const nextCue = cues[cueEndIndex + 1];
|
|
2372
|
+
if (!nextCue) {
|
|
2373
|
+
break;
|
|
2374
|
+
}
|
|
2375
|
+
const nextDuration = nextCue.endTime - chunkStartTime;
|
|
2376
|
+
const lookaheadExceeded = cueEndIndex - cueStartIndex >= boundaryLookaheadCues;
|
|
2377
|
+
const preferredBoundaryIndex = bestPreferredBoundaryIndex >= cueStartIndex ? bestPreferredBoundaryIndex : bestBoundaryIndex;
|
|
2378
|
+
const preferredBoundaryScore = bestPreferredBoundaryIndex >= cueStartIndex ? bestPreferredBoundaryScore : bestBoundaryScore;
|
|
2379
|
+
if (currentDuration >= targetChunkDurationSeconds) {
|
|
2380
|
+
if (preferredBoundaryIndex >= cueStartIndex && preferredBoundaryScore >= STRONG_BOUNDARY_SCORE) {
|
|
2381
|
+
cueEndIndex = preferredBoundaryIndex;
|
|
2382
|
+
break;
|
|
2383
|
+
}
|
|
2384
|
+
if (nextDuration > maxChunkDurationSeconds || lookaheadExceeded) {
|
|
2385
|
+
cueEndIndex = preferredBoundaryIndex >= cueStartIndex ? preferredBoundaryIndex : cueEndIndex;
|
|
2386
|
+
break;
|
|
2387
|
+
}
|
|
2388
|
+
}
|
|
2389
|
+
if (nextDuration > maxChunkDurationSeconds) {
|
|
2390
|
+
cueEndIndex = preferredBoundaryIndex >= cueStartIndex ? preferredBoundaryIndex : cueEndIndex;
|
|
2391
|
+
break;
|
|
2392
|
+
}
|
|
2393
|
+
cueEndIndex++;
|
|
2394
|
+
}
|
|
2395
|
+
chunks.push({
|
|
2396
|
+
id: `chunk-${chunkIndex}`,
|
|
2397
|
+
cueStartIndex,
|
|
2398
|
+
cueEndIndex,
|
|
2399
|
+
cueCount: cueEndIndex - cueStartIndex + 1,
|
|
2400
|
+
startTime: cues[cueStartIndex].startTime,
|
|
2401
|
+
endTime: cues[cueEndIndex].endTime
|
|
2402
|
+
});
|
|
2403
|
+
cueStartIndex = cueEndIndex + 1;
|
|
2404
|
+
chunkIndex++;
|
|
2405
|
+
}
|
|
2406
|
+
return chunks;
|
|
2407
|
+
}
|
|
1915
2408
|
function chunkText(text, strategy) {
|
|
1916
2409
|
switch (strategy.type) {
|
|
1917
2410
|
case "token": {
|
|
@@ -2167,10 +2660,8 @@ async function getThumbnailUrls(playbackId, duration, options = {}) {
|
|
|
2167
2660
|
}
|
|
2168
2661
|
const baseUrl = getMuxThumbnailBaseUrl(playbackId);
|
|
2169
2662
|
const urlPromises = timestamps.map(async (time) => {
|
|
2170
|
-
|
|
2171
|
-
|
|
2172
|
-
}
|
|
2173
|
-
return `${baseUrl}?time=${time}&width=${width}`;
|
|
2663
|
+
const url = shouldSign ? await signUrl(baseUrl, playbackId, "thumbnail", { time, width }, credentials) : `${baseUrl}?time=${time}&width=${width}`;
|
|
2664
|
+
return { url, time };
|
|
2174
2665
|
});
|
|
2175
2666
|
return Promise.all(urlPromises);
|
|
2176
2667
|
}
|
|
@@ -2244,6 +2735,7 @@ async function moderateImageWithOpenAI(entry) {
|
|
|
2244
2735
|
const categoryScores = json.results?.[0]?.category_scores || {};
|
|
2245
2736
|
return {
|
|
2246
2737
|
url: entry.url,
|
|
2738
|
+
time: entry.time,
|
|
2247
2739
|
sexual: categoryScores.sexual || 0,
|
|
2248
2740
|
violence: categoryScores.violence || 0,
|
|
2249
2741
|
error: false
|
|
@@ -2252,6 +2744,7 @@ async function moderateImageWithOpenAI(entry) {
|
|
|
2252
2744
|
console.error("OpenAI moderation failed:", error);
|
|
2253
2745
|
return {
|
|
2254
2746
|
url: entry.url,
|
|
2747
|
+
time: entry.time,
|
|
2255
2748
|
sexual: 0,
|
|
2256
2749
|
violence: 0,
|
|
2257
2750
|
error: true,
|
|
@@ -2259,11 +2752,13 @@ async function moderateImageWithOpenAI(entry) {
|
|
|
2259
2752
|
};
|
|
2260
2753
|
}
|
|
2261
2754
|
}
|
|
2262
|
-
async function requestOpenAIModeration(
|
|
2755
|
+
async function requestOpenAIModeration(images, model, maxConcurrent = 5, submissionMode = "url", downloadOptions, credentials) {
|
|
2263
2756
|
"use step";
|
|
2757
|
+
const imageUrls = images.map((img) => img.url);
|
|
2758
|
+
const timeByUrl = new Map(images.map((img) => [img.url, img.time]));
|
|
2264
2759
|
const targetUrls = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map(
|
|
2265
|
-
(img) => ({ url: img.url, image: img.base64Data, model, credentials })
|
|
2266
|
-
) :
|
|
2760
|
+
(img) => ({ url: img.url, time: timeByUrl.get(img.url), image: img.base64Data, model, credentials })
|
|
2761
|
+
) : images.map((img) => ({ url: img.url, time: img.time, image: img.url, model, credentials }));
|
|
2267
2762
|
return processConcurrently(targetUrls, moderateImageWithOpenAI, maxConcurrent);
|
|
2268
2763
|
}
|
|
2269
2764
|
async function requestOpenAITextModeration(text, model, url, credentials) {
|
|
@@ -2408,6 +2903,7 @@ async function moderateImageWithHive(entry) {
|
|
|
2408
2903
|
const violence = getHiveCategoryScores(classes, HIVE_VIOLENCE_CATEGORIES);
|
|
2409
2904
|
return {
|
|
2410
2905
|
url: entry.url,
|
|
2906
|
+
time: entry.time,
|
|
2411
2907
|
sexual,
|
|
2412
2908
|
violence,
|
|
2413
2909
|
error: false
|
|
@@ -2415,6 +2911,7 @@ async function moderateImageWithHive(entry) {
|
|
|
2415
2911
|
} catch (error) {
|
|
2416
2912
|
return {
|
|
2417
2913
|
url: entry.url,
|
|
2914
|
+
time: entry.time,
|
|
2418
2915
|
sexual: 0,
|
|
2419
2916
|
violence: 0,
|
|
2420
2917
|
error: true,
|
|
@@ -2422,19 +2919,23 @@ async function moderateImageWithHive(entry) {
|
|
|
2422
2919
|
};
|
|
2423
2920
|
}
|
|
2424
2921
|
}
|
|
2425
|
-
async function requestHiveModeration(
|
|
2922
|
+
async function requestHiveModeration(images, maxConcurrent = 5, submissionMode = "url", downloadOptions, credentials) {
|
|
2426
2923
|
"use step";
|
|
2924
|
+
const imageUrls = images.map((img) => img.url);
|
|
2925
|
+
const timeByUrl = new Map(images.map((img) => [img.url, img.time]));
|
|
2427
2926
|
const targets = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map((img) => ({
|
|
2428
2927
|
url: img.url,
|
|
2928
|
+
time: timeByUrl.get(img.url),
|
|
2429
2929
|
source: {
|
|
2430
2930
|
kind: "file",
|
|
2431
2931
|
buffer: img.buffer,
|
|
2432
2932
|
contentType: img.contentType
|
|
2433
2933
|
},
|
|
2434
2934
|
credentials
|
|
2435
|
-
})) :
|
|
2436
|
-
url,
|
|
2437
|
-
|
|
2935
|
+
})) : images.map((img) => ({
|
|
2936
|
+
url: img.url,
|
|
2937
|
+
time: img.time,
|
|
2938
|
+
source: { kind: "url", value: img.url },
|
|
2438
2939
|
credentials
|
|
2439
2940
|
}));
|
|
2440
2941
|
return await processConcurrently(targets, moderateImageWithHive, maxConcurrent);
|
|
@@ -2445,10 +2946,8 @@ async function getThumbnailUrlsFromTimestamps(playbackId, timestampsMs, options)
|
|
|
2445
2946
|
const baseUrl = getMuxThumbnailBaseUrl(playbackId);
|
|
2446
2947
|
const urlPromises = timestampsMs.map(async (tsMs) => {
|
|
2447
2948
|
const time = Number((tsMs / 1e3).toFixed(2));
|
|
2448
|
-
|
|
2449
|
-
|
|
2450
|
-
}
|
|
2451
|
-
return `${baseUrl}?time=${time}&width=${width}`;
|
|
2949
|
+
const url = shouldSign ? await signUrl(baseUrl, playbackId, "thumbnail", { time, width }, credentials) : `${baseUrl}?time=${time}&width=${width}`;
|
|
2950
|
+
return { url, time };
|
|
2452
2951
|
});
|
|
2453
2952
|
return Promise.all(urlPromises);
|
|
2454
2953
|
}
|
|
@@ -2752,6 +3251,7 @@ var SYSTEM_PROMPT3 = dedent4`
|
|
|
2752
3251
|
- Do not fabricate details or make unsupported assumptions
|
|
2753
3252
|
- Return structured data matching the requested schema
|
|
2754
3253
|
- Output only the JSON object; no markdown or extra text
|
|
3254
|
+
- When a <language> section is provided, all output text MUST be written in that language
|
|
2755
3255
|
</constraints>
|
|
2756
3256
|
|
|
2757
3257
|
<tone_guidance>
|
|
@@ -2806,6 +3306,7 @@ var AUDIO_ONLY_SYSTEM_PROMPT = dedent4`
|
|
|
2806
3306
|
- Return structured data matching the requested schema
|
|
2807
3307
|
- Focus entirely on audio/spoken content - there are no visual elements
|
|
2808
3308
|
- Output only the JSON object; no markdown or extra text
|
|
3309
|
+
- When a <language> section is provided, all output text MUST be written in that language
|
|
2809
3310
|
</constraints>
|
|
2810
3311
|
|
|
2811
3312
|
<tone_guidance>
|
|
@@ -2836,9 +3337,13 @@ function buildUserPrompt4({
|
|
|
2836
3337
|
isAudioOnly = false,
|
|
2837
3338
|
titleLength,
|
|
2838
3339
|
descriptionLength,
|
|
2839
|
-
tagCount
|
|
3340
|
+
tagCount,
|
|
3341
|
+
languageName
|
|
2840
3342
|
}) {
|
|
2841
3343
|
const contextSections = [createToneSection(TONE_INSTRUCTIONS[tone])];
|
|
3344
|
+
if (languageName) {
|
|
3345
|
+
contextSections.push(createLanguageSection(languageName));
|
|
3346
|
+
}
|
|
2842
3347
|
if (transcriptText) {
|
|
2843
3348
|
const format = isCleanTranscript ? "plain text" : "WebVTT";
|
|
2844
3349
|
contextSections.push(createTranscriptSection(transcriptText, format));
|
|
@@ -2951,7 +3456,8 @@ async function getSummaryAndTags(assetId, options) {
|
|
|
2951
3456
|
credentials,
|
|
2952
3457
|
titleLength,
|
|
2953
3458
|
descriptionLength,
|
|
2954
|
-
tagCount
|
|
3459
|
+
tagCount,
|
|
3460
|
+
outputLanguageCode
|
|
2955
3461
|
} = options ?? {};
|
|
2956
3462
|
if (!VALID_TONES.includes(tone)) {
|
|
2957
3463
|
throw new Error(
|
|
@@ -2978,12 +3484,15 @@ async function getSummaryAndTags(assetId, options) {
|
|
|
2978
3484
|
"Signed playback ID requires signing credentials. Set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
2979
3485
|
);
|
|
2980
3486
|
}
|
|
2981
|
-
const
|
|
3487
|
+
const transcriptResult = includeTranscript ? await fetchTranscriptForAsset(assetData, playbackId, {
|
|
2982
3488
|
cleanTranscript,
|
|
2983
3489
|
shouldSign: policy === "signed",
|
|
2984
3490
|
credentials: workflowCredentials,
|
|
2985
3491
|
required: isAudioOnly
|
|
2986
|
-
})
|
|
3492
|
+
}) : void 0;
|
|
3493
|
+
const transcriptText = transcriptResult?.transcriptText ?? "";
|
|
3494
|
+
const resolvedLanguageCode = outputLanguageCode && outputLanguageCode !== "auto" ? outputLanguageCode : transcriptResult?.track?.language_code ?? getReadyTextTracks(assetData)[0]?.language_code;
|
|
3495
|
+
const languageName = resolvedLanguageCode ? getLanguageName(resolvedLanguageCode) : void 0;
|
|
2987
3496
|
const userPrompt = buildUserPrompt4({
|
|
2988
3497
|
tone,
|
|
2989
3498
|
transcriptText,
|
|
@@ -2992,7 +3501,8 @@ async function getSummaryAndTags(assetId, options) {
|
|
|
2992
3501
|
isAudioOnly,
|
|
2993
3502
|
titleLength,
|
|
2994
3503
|
descriptionLength,
|
|
2995
|
-
tagCount
|
|
3504
|
+
tagCount,
|
|
3505
|
+
languageName
|
|
2996
3506
|
});
|
|
2997
3507
|
let analysisResponse;
|
|
2998
3508
|
let imageUrl;
|
|
@@ -3062,164 +3572,6 @@ async function getSummaryAndTags(assetId, options) {
|
|
|
3062
3572
|
};
|
|
3063
3573
|
}
|
|
3064
3574
|
|
|
3065
|
-
// src/lib/language-codes.ts
|
|
3066
|
-
var ISO639_1_TO_3 = {
|
|
3067
|
-
// Major world languages
|
|
3068
|
-
en: "eng",
|
|
3069
|
-
// English
|
|
3070
|
-
es: "spa",
|
|
3071
|
-
// Spanish
|
|
3072
|
-
fr: "fra",
|
|
3073
|
-
// French
|
|
3074
|
-
de: "deu",
|
|
3075
|
-
// German
|
|
3076
|
-
it: "ita",
|
|
3077
|
-
// Italian
|
|
3078
|
-
pt: "por",
|
|
3079
|
-
// Portuguese
|
|
3080
|
-
ru: "rus",
|
|
3081
|
-
// Russian
|
|
3082
|
-
zh: "zho",
|
|
3083
|
-
// Chinese
|
|
3084
|
-
ja: "jpn",
|
|
3085
|
-
// Japanese
|
|
3086
|
-
ko: "kor",
|
|
3087
|
-
// Korean
|
|
3088
|
-
ar: "ara",
|
|
3089
|
-
// Arabic
|
|
3090
|
-
hi: "hin",
|
|
3091
|
-
// Hindi
|
|
3092
|
-
// European languages
|
|
3093
|
-
nl: "nld",
|
|
3094
|
-
// Dutch
|
|
3095
|
-
pl: "pol",
|
|
3096
|
-
// Polish
|
|
3097
|
-
sv: "swe",
|
|
3098
|
-
// Swedish
|
|
3099
|
-
da: "dan",
|
|
3100
|
-
// Danish
|
|
3101
|
-
no: "nor",
|
|
3102
|
-
// Norwegian
|
|
3103
|
-
fi: "fin",
|
|
3104
|
-
// Finnish
|
|
3105
|
-
el: "ell",
|
|
3106
|
-
// Greek
|
|
3107
|
-
cs: "ces",
|
|
3108
|
-
// Czech
|
|
3109
|
-
hu: "hun",
|
|
3110
|
-
// Hungarian
|
|
3111
|
-
ro: "ron",
|
|
3112
|
-
// Romanian
|
|
3113
|
-
bg: "bul",
|
|
3114
|
-
// Bulgarian
|
|
3115
|
-
hr: "hrv",
|
|
3116
|
-
// Croatian
|
|
3117
|
-
sk: "slk",
|
|
3118
|
-
// Slovak
|
|
3119
|
-
sl: "slv",
|
|
3120
|
-
// Slovenian
|
|
3121
|
-
uk: "ukr",
|
|
3122
|
-
// Ukrainian
|
|
3123
|
-
tr: "tur",
|
|
3124
|
-
// Turkish
|
|
3125
|
-
// Asian languages
|
|
3126
|
-
th: "tha",
|
|
3127
|
-
// Thai
|
|
3128
|
-
vi: "vie",
|
|
3129
|
-
// Vietnamese
|
|
3130
|
-
id: "ind",
|
|
3131
|
-
// Indonesian
|
|
3132
|
-
ms: "msa",
|
|
3133
|
-
// Malay
|
|
3134
|
-
tl: "tgl",
|
|
3135
|
-
// Tagalog/Filipino
|
|
3136
|
-
// Other languages
|
|
3137
|
-
he: "heb",
|
|
3138
|
-
// Hebrew
|
|
3139
|
-
fa: "fas",
|
|
3140
|
-
// Persian/Farsi
|
|
3141
|
-
bn: "ben",
|
|
3142
|
-
// Bengali
|
|
3143
|
-
ta: "tam",
|
|
3144
|
-
// Tamil
|
|
3145
|
-
te: "tel",
|
|
3146
|
-
// Telugu
|
|
3147
|
-
mr: "mar",
|
|
3148
|
-
// Marathi
|
|
3149
|
-
gu: "guj",
|
|
3150
|
-
// Gujarati
|
|
3151
|
-
kn: "kan",
|
|
3152
|
-
// Kannada
|
|
3153
|
-
ml: "mal",
|
|
3154
|
-
// Malayalam
|
|
3155
|
-
pa: "pan",
|
|
3156
|
-
// Punjabi
|
|
3157
|
-
ur: "urd",
|
|
3158
|
-
// Urdu
|
|
3159
|
-
sw: "swa",
|
|
3160
|
-
// Swahili
|
|
3161
|
-
af: "afr",
|
|
3162
|
-
// Afrikaans
|
|
3163
|
-
ca: "cat",
|
|
3164
|
-
// Catalan
|
|
3165
|
-
eu: "eus",
|
|
3166
|
-
// Basque
|
|
3167
|
-
gl: "glg",
|
|
3168
|
-
// Galician
|
|
3169
|
-
is: "isl",
|
|
3170
|
-
// Icelandic
|
|
3171
|
-
et: "est",
|
|
3172
|
-
// Estonian
|
|
3173
|
-
lv: "lav",
|
|
3174
|
-
// Latvian
|
|
3175
|
-
lt: "lit"
|
|
3176
|
-
// Lithuanian
|
|
3177
|
-
};
|
|
3178
|
-
var ISO639_3_TO_1 = Object.fromEntries(
|
|
3179
|
-
Object.entries(ISO639_1_TO_3).map(([iso1, iso3]) => [iso3, iso1])
|
|
3180
|
-
);
|
|
3181
|
-
function toISO639_3(code) {
|
|
3182
|
-
const normalized = code.toLowerCase().trim();
|
|
3183
|
-
if (normalized.length === 3) {
|
|
3184
|
-
return normalized;
|
|
3185
|
-
}
|
|
3186
|
-
return ISO639_1_TO_3[normalized] ?? normalized;
|
|
3187
|
-
}
|
|
3188
|
-
function toISO639_1(code) {
|
|
3189
|
-
const normalized = code.toLowerCase().trim();
|
|
3190
|
-
if (normalized.length === 2) {
|
|
3191
|
-
return normalized;
|
|
3192
|
-
}
|
|
3193
|
-
return ISO639_3_TO_1[normalized] ?? normalized;
|
|
3194
|
-
}
|
|
3195
|
-
function getLanguageCodePair(code) {
|
|
3196
|
-
const normalized = code.toLowerCase().trim();
|
|
3197
|
-
if (normalized.length === 2) {
|
|
3198
|
-
return {
|
|
3199
|
-
iso639_1: normalized,
|
|
3200
|
-
iso639_3: toISO639_3(normalized)
|
|
3201
|
-
};
|
|
3202
|
-
} else if (normalized.length === 3) {
|
|
3203
|
-
return {
|
|
3204
|
-
iso639_1: toISO639_1(normalized),
|
|
3205
|
-
iso639_3: normalized
|
|
3206
|
-
};
|
|
3207
|
-
}
|
|
3208
|
-
return {
|
|
3209
|
-
iso639_1: normalized,
|
|
3210
|
-
iso639_3: normalized
|
|
3211
|
-
};
|
|
3212
|
-
}
|
|
3213
|
-
function getLanguageName(code) {
|
|
3214
|
-
const iso639_1 = toISO639_1(code);
|
|
3215
|
-
try {
|
|
3216
|
-
const displayNames = new Intl.DisplayNames(["en"], { type: "language" });
|
|
3217
|
-
return displayNames.of(iso639_1) ?? code.toUpperCase();
|
|
3218
|
-
} catch {
|
|
3219
|
-
return code.toUpperCase();
|
|
3220
|
-
}
|
|
3221
|
-
}
|
|
3222
|
-
|
|
3223
3575
|
// src/lib/s3-sigv4.ts
|
|
3224
3576
|
var AWS4_ALGORITHM = "AWS4-HMAC-SHA256";
|
|
3225
3577
|
var AWS4_REQUEST_TERMINATOR = "aws4_request";
|
|
@@ -3876,12 +4228,187 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
|
|
|
3876
4228
|
}
|
|
3877
4229
|
|
|
3878
4230
|
// src/workflows/translate-captions.ts
|
|
3879
|
-
import {
|
|
4231
|
+
import {
|
|
4232
|
+
APICallError,
|
|
4233
|
+
generateText as generateText5,
|
|
4234
|
+
NoObjectGeneratedError,
|
|
4235
|
+
Output as Output5,
|
|
4236
|
+
RetryError,
|
|
4237
|
+
TypeValidationError
|
|
4238
|
+
} from "ai";
|
|
4239
|
+
import dedent5 from "dedent";
|
|
3880
4240
|
import { z as z6 } from "zod";
|
|
3881
4241
|
var translationSchema = z6.object({
|
|
3882
4242
|
translation: z6.string()
|
|
3883
4243
|
});
|
|
3884
|
-
var SYSTEM_PROMPT4 =
|
|
4244
|
+
var SYSTEM_PROMPT4 = dedent5`
|
|
4245
|
+
You are a subtitle translation expert. Translate VTT subtitle files to the target language specified by the user.
|
|
4246
|
+
You may receive either a full VTT file or a chunk from a larger VTT.
|
|
4247
|
+
Preserve all timestamps, cue ordering, and VTT formatting exactly as they appear.
|
|
4248
|
+
Return JSON with a single key "translation" containing the translated VTT content.
|
|
4249
|
+
`;
|
|
4250
|
+
var CUE_TRANSLATION_SYSTEM_PROMPT = dedent5`
|
|
4251
|
+
You are a subtitle translation expert.
|
|
4252
|
+
You will receive a sequence of subtitle cues extracted from a VTT file.
|
|
4253
|
+
Translate the cues to the requested target language while preserving their original order.
|
|
4254
|
+
Treat the cue list as continuous context so the translation reads naturally across adjacent lines.
|
|
4255
|
+
Return JSON with a single key "translations" containing exactly one translated string for each input cue.
|
|
4256
|
+
Do not merge, split, omit, reorder, or add cues.
|
|
4257
|
+
`;
|
|
4258
|
+
var DEFAULT_TRANSLATION_CHUNKING = {
|
|
4259
|
+
enabled: true,
|
|
4260
|
+
minimumAssetDurationSeconds: 30 * 60,
|
|
4261
|
+
targetChunkDurationSeconds: 30 * 60,
|
|
4262
|
+
maxConcurrentTranslations: 4,
|
|
4263
|
+
maxCuesPerChunk: 80,
|
|
4264
|
+
maxCueTextTokensPerChunk: 2e3
|
|
4265
|
+
};
|
|
4266
|
+
var TOKEN_USAGE_FIELDS = [
|
|
4267
|
+
"inputTokens",
|
|
4268
|
+
"outputTokens",
|
|
4269
|
+
"totalTokens",
|
|
4270
|
+
"reasoningTokens",
|
|
4271
|
+
"cachedInputTokens"
|
|
4272
|
+
];
|
|
4273
|
+
var TranslationChunkValidationError = class extends Error {
|
|
4274
|
+
constructor(message) {
|
|
4275
|
+
super(message);
|
|
4276
|
+
this.name = "TranslationChunkValidationError";
|
|
4277
|
+
}
|
|
4278
|
+
};
|
|
4279
|
+
function isTranslationChunkValidationError(error) {
|
|
4280
|
+
return error instanceof TranslationChunkValidationError;
|
|
4281
|
+
}
|
|
4282
|
+
function isProviderServiceError(error) {
|
|
4283
|
+
if (!error) {
|
|
4284
|
+
return false;
|
|
4285
|
+
}
|
|
4286
|
+
if (RetryError.isInstance(error)) {
|
|
4287
|
+
return isProviderServiceError(error.lastError);
|
|
4288
|
+
}
|
|
4289
|
+
if (APICallError.isInstance(error)) {
|
|
4290
|
+
return true;
|
|
4291
|
+
}
|
|
4292
|
+
if (error instanceof Error && "cause" in error) {
|
|
4293
|
+
return isProviderServiceError(error.cause);
|
|
4294
|
+
}
|
|
4295
|
+
return false;
|
|
4296
|
+
}
|
|
4297
|
+
function shouldSplitChunkTranslationError(error) {
|
|
4298
|
+
if (isProviderServiceError(error)) {
|
|
4299
|
+
return false;
|
|
4300
|
+
}
|
|
4301
|
+
return NoObjectGeneratedError.isInstance(error) || TypeValidationError.isInstance(error) || isTranslationChunkValidationError(error);
|
|
4302
|
+
}
|
|
4303
|
+
function isDefinedTokenUsageValue(value) {
|
|
4304
|
+
return typeof value === "number";
|
|
4305
|
+
}
|
|
4306
|
+
function resolveTranslationChunkingOptions(options) {
|
|
4307
|
+
const targetChunkDurationSeconds = Math.max(
|
|
4308
|
+
1,
|
|
4309
|
+
options?.targetChunkDurationSeconds ?? DEFAULT_TRANSLATION_CHUNKING.targetChunkDurationSeconds
|
|
4310
|
+
);
|
|
4311
|
+
return {
|
|
4312
|
+
enabled: options?.enabled ?? DEFAULT_TRANSLATION_CHUNKING.enabled,
|
|
4313
|
+
minimumAssetDurationSeconds: Math.max(
|
|
4314
|
+
1,
|
|
4315
|
+
options?.minimumAssetDurationSeconds ?? DEFAULT_TRANSLATION_CHUNKING.minimumAssetDurationSeconds
|
|
4316
|
+
),
|
|
4317
|
+
targetChunkDurationSeconds,
|
|
4318
|
+
maxConcurrentTranslations: Math.max(
|
|
4319
|
+
1,
|
|
4320
|
+
options?.maxConcurrentTranslations ?? DEFAULT_TRANSLATION_CHUNKING.maxConcurrentTranslations
|
|
4321
|
+
),
|
|
4322
|
+
maxCuesPerChunk: Math.max(
|
|
4323
|
+
1,
|
|
4324
|
+
options?.maxCuesPerChunk ?? DEFAULT_TRANSLATION_CHUNKING.maxCuesPerChunk
|
|
4325
|
+
),
|
|
4326
|
+
maxCueTextTokensPerChunk: Math.max(
|
|
4327
|
+
1,
|
|
4328
|
+
options?.maxCueTextTokensPerChunk ?? DEFAULT_TRANSLATION_CHUNKING.maxCueTextTokensPerChunk
|
|
4329
|
+
)
|
|
4330
|
+
};
|
|
4331
|
+
}
|
|
4332
|
+
function aggregateTokenUsage(usages) {
|
|
4333
|
+
return TOKEN_USAGE_FIELDS.reduce((aggregate, field) => {
|
|
4334
|
+
const values = usages.map((usage) => usage[field]).filter(isDefinedTokenUsageValue);
|
|
4335
|
+
if (values.length > 0) {
|
|
4336
|
+
aggregate[field] = values.reduce((total, value) => total + value, 0);
|
|
4337
|
+
}
|
|
4338
|
+
return aggregate;
|
|
4339
|
+
}, {});
|
|
4340
|
+
}
|
|
4341
|
+
function createTranslationChunkRequest(id, cues, cueBlocks) {
|
|
4342
|
+
return {
|
|
4343
|
+
id,
|
|
4344
|
+
cueCount: cues.length,
|
|
4345
|
+
startTime: cues[0].startTime,
|
|
4346
|
+
endTime: cues[cues.length - 1].endTime,
|
|
4347
|
+
cues,
|
|
4348
|
+
cueBlocks
|
|
4349
|
+
};
|
|
4350
|
+
}
|
|
4351
|
+
function splitTranslationChunkRequestByBudget(id, cues, cueBlocks, maxCuesPerChunk, maxCueTextTokensPerChunk) {
|
|
4352
|
+
const chunks = chunkVTTCuesByBudget(cues, {
|
|
4353
|
+
maxCuesPerChunk,
|
|
4354
|
+
maxTextTokensPerChunk: maxCueTextTokensPerChunk
|
|
4355
|
+
});
|
|
4356
|
+
return chunks.map(
|
|
4357
|
+
(chunk, index) => createTranslationChunkRequest(
|
|
4358
|
+
chunks.length === 1 ? id : `${id}-part-${index}`,
|
|
4359
|
+
cues.slice(chunk.cueStartIndex, chunk.cueEndIndex + 1),
|
|
4360
|
+
cueBlocks.slice(chunk.cueStartIndex, chunk.cueEndIndex + 1)
|
|
4361
|
+
)
|
|
4362
|
+
);
|
|
4363
|
+
}
|
|
4364
|
+
function buildTranslationChunkRequests(vttContent, assetDurationSeconds, chunkingOptions) {
|
|
4365
|
+
const resolvedChunking = resolveTranslationChunkingOptions(chunkingOptions);
|
|
4366
|
+
const cues = parseVTTCues(vttContent);
|
|
4367
|
+
if (cues.length === 0) {
|
|
4368
|
+
return null;
|
|
4369
|
+
}
|
|
4370
|
+
const { preamble, cueBlocks } = splitVttPreambleAndCueBlocks(vttContent);
|
|
4371
|
+
if (cueBlocks.length !== cues.length) {
|
|
4372
|
+
console.warn(
|
|
4373
|
+
`Falling back to full-VTT caption translation because cue block count (${cueBlocks.length}) does not match parsed cue count (${cues.length}).`
|
|
4374
|
+
);
|
|
4375
|
+
return null;
|
|
4376
|
+
}
|
|
4377
|
+
if (!resolvedChunking.enabled) {
|
|
4378
|
+
return {
|
|
4379
|
+
preamble,
|
|
4380
|
+
chunks: [
|
|
4381
|
+
createTranslationChunkRequest("chunk-0", cues, cueBlocks)
|
|
4382
|
+
]
|
|
4383
|
+
};
|
|
4384
|
+
}
|
|
4385
|
+
if (typeof assetDurationSeconds !== "number" || assetDurationSeconds < resolvedChunking.minimumAssetDurationSeconds) {
|
|
4386
|
+
return {
|
|
4387
|
+
preamble,
|
|
4388
|
+
chunks: [
|
|
4389
|
+
createTranslationChunkRequest("chunk-0", cues, cueBlocks)
|
|
4390
|
+
]
|
|
4391
|
+
};
|
|
4392
|
+
}
|
|
4393
|
+
const targetChunkDurationSeconds = resolvedChunking.targetChunkDurationSeconds;
|
|
4394
|
+
const durationChunks = chunkVTTCuesByDuration(cues, {
|
|
4395
|
+
targetChunkDurationSeconds,
|
|
4396
|
+
maxChunkDurationSeconds: Math.max(targetChunkDurationSeconds, Math.round(targetChunkDurationSeconds * (7 / 6))),
|
|
4397
|
+
minChunkDurationSeconds: Math.max(1, Math.round(targetChunkDurationSeconds * (2 / 3)))
|
|
4398
|
+
});
|
|
4399
|
+
return {
|
|
4400
|
+
preamble,
|
|
4401
|
+
chunks: durationChunks.flatMap(
|
|
4402
|
+
(chunk) => splitTranslationChunkRequestByBudget(
|
|
4403
|
+
chunk.id,
|
|
4404
|
+
cues.slice(chunk.cueStartIndex, chunk.cueEndIndex + 1),
|
|
4405
|
+
cueBlocks.slice(chunk.cueStartIndex, chunk.cueEndIndex + 1),
|
|
4406
|
+
resolvedChunking.maxCuesPerChunk,
|
|
4407
|
+
resolvedChunking.maxCueTextTokensPerChunk
|
|
4408
|
+
)
|
|
4409
|
+
)
|
|
4410
|
+
};
|
|
4411
|
+
}
|
|
3885
4412
|
async function fetchVttFromMux(vttUrl) {
|
|
3886
4413
|
"use step";
|
|
3887
4414
|
const vttResponse = await fetch(vttUrl);
|
|
@@ -3927,6 +4454,176 @@ ${vttContent}`
|
|
|
3927
4454
|
}
|
|
3928
4455
|
};
|
|
3929
4456
|
}
|
|
4457
|
+
async function translateCueChunkWithAI({
|
|
4458
|
+
cues,
|
|
4459
|
+
fromLanguageCode,
|
|
4460
|
+
toLanguageCode,
|
|
4461
|
+
provider,
|
|
4462
|
+
modelId,
|
|
4463
|
+
credentials
|
|
4464
|
+
}) {
|
|
4465
|
+
"use step";
|
|
4466
|
+
const model = await createLanguageModelFromConfig(provider, modelId, credentials);
|
|
4467
|
+
const schema = z6.object({
|
|
4468
|
+
translations: z6.array(z6.string().min(1)).length(cues.length)
|
|
4469
|
+
});
|
|
4470
|
+
const cuePayload = cues.map((cue, index) => ({
|
|
4471
|
+
index,
|
|
4472
|
+
startTime: cue.startTime,
|
|
4473
|
+
endTime: cue.endTime,
|
|
4474
|
+
text: cue.text
|
|
4475
|
+
}));
|
|
4476
|
+
const response = await generateText5({
|
|
4477
|
+
model,
|
|
4478
|
+
output: Output5.object({ schema }),
|
|
4479
|
+
messages: [
|
|
4480
|
+
{
|
|
4481
|
+
role: "system",
|
|
4482
|
+
content: CUE_TRANSLATION_SYSTEM_PROMPT
|
|
4483
|
+
},
|
|
4484
|
+
{
|
|
4485
|
+
role: "user",
|
|
4486
|
+
content: `Translate from ${fromLanguageCode} to ${toLanguageCode}.
|
|
4487
|
+
Return exactly ${cues.length} translated cues in the same order as the input.
|
|
4488
|
+
|
|
4489
|
+
${JSON.stringify(cuePayload, null, 2)}`
|
|
4490
|
+
}
|
|
4491
|
+
]
|
|
4492
|
+
});
|
|
4493
|
+
return {
|
|
4494
|
+
translations: response.output.translations,
|
|
4495
|
+
usage: {
|
|
4496
|
+
inputTokens: response.usage.inputTokens,
|
|
4497
|
+
outputTokens: response.usage.outputTokens,
|
|
4498
|
+
totalTokens: response.usage.totalTokens,
|
|
4499
|
+
reasoningTokens: response.usage.reasoningTokens,
|
|
4500
|
+
cachedInputTokens: response.usage.cachedInputTokens
|
|
4501
|
+
}
|
|
4502
|
+
};
|
|
4503
|
+
}
|
|
4504
|
+
function splitTranslationChunkAtMidpoint(chunk) {
|
|
4505
|
+
const midpoint = Math.floor(chunk.cueCount / 2);
|
|
4506
|
+
if (midpoint <= 0 || midpoint >= chunk.cueCount) {
|
|
4507
|
+
throw new Error(`Cannot split chunk ${chunk.id} with cueCount=${chunk.cueCount}`);
|
|
4508
|
+
}
|
|
4509
|
+
return [
|
|
4510
|
+
createTranslationChunkRequest(
|
|
4511
|
+
`${chunk.id}-a`,
|
|
4512
|
+
chunk.cues.slice(0, midpoint),
|
|
4513
|
+
chunk.cueBlocks.slice(0, midpoint)
|
|
4514
|
+
),
|
|
4515
|
+
createTranslationChunkRequest(
|
|
4516
|
+
`${chunk.id}-b`,
|
|
4517
|
+
chunk.cues.slice(midpoint),
|
|
4518
|
+
chunk.cueBlocks.slice(midpoint)
|
|
4519
|
+
)
|
|
4520
|
+
];
|
|
4521
|
+
}
|
|
4522
|
+
async function translateChunkWithFallback({
|
|
4523
|
+
chunk,
|
|
4524
|
+
fromLanguageCode,
|
|
4525
|
+
toLanguageCode,
|
|
4526
|
+
provider,
|
|
4527
|
+
modelId,
|
|
4528
|
+
credentials
|
|
4529
|
+
}) {
|
|
4530
|
+
"use step";
|
|
4531
|
+
try {
|
|
4532
|
+
const result = await translateCueChunkWithAI({
|
|
4533
|
+
cues: chunk.cues,
|
|
4534
|
+
fromLanguageCode,
|
|
4535
|
+
toLanguageCode,
|
|
4536
|
+
provider,
|
|
4537
|
+
modelId,
|
|
4538
|
+
credentials
|
|
4539
|
+
});
|
|
4540
|
+
if (result.translations.length !== chunk.cueCount) {
|
|
4541
|
+
throw new TranslationChunkValidationError(
|
|
4542
|
+
`Chunk ${chunk.id} returned ${result.translations.length} cues, expected ${chunk.cueCount} for ${Math.round(chunk.startTime)}s-${Math.round(chunk.endTime)}s`
|
|
4543
|
+
);
|
|
4544
|
+
}
|
|
4545
|
+
return {
|
|
4546
|
+
translatedVtt: buildVttFromTranslatedCueBlocks(chunk.cueBlocks, result.translations),
|
|
4547
|
+
usage: result.usage
|
|
4548
|
+
};
|
|
4549
|
+
} catch (error) {
|
|
4550
|
+
if (!shouldSplitChunkTranslationError(error) || chunk.cueCount <= 1) {
|
|
4551
|
+
throw new Error(
|
|
4552
|
+
`Chunk ${chunk.id} failed for ${Math.round(chunk.startTime)}s-${Math.round(chunk.endTime)}s: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
4553
|
+
);
|
|
4554
|
+
}
|
|
4555
|
+
const [leftChunk, rightChunk] = splitTranslationChunkAtMidpoint(chunk);
|
|
4556
|
+
const [leftResult, rightResult] = await Promise.all([
|
|
4557
|
+
translateChunkWithFallback({
|
|
4558
|
+
chunk: leftChunk,
|
|
4559
|
+
fromLanguageCode,
|
|
4560
|
+
toLanguageCode,
|
|
4561
|
+
provider,
|
|
4562
|
+
modelId,
|
|
4563
|
+
credentials
|
|
4564
|
+
}),
|
|
4565
|
+
translateChunkWithFallback({
|
|
4566
|
+
chunk: rightChunk,
|
|
4567
|
+
fromLanguageCode,
|
|
4568
|
+
toLanguageCode,
|
|
4569
|
+
provider,
|
|
4570
|
+
modelId,
|
|
4571
|
+
credentials
|
|
4572
|
+
})
|
|
4573
|
+
]);
|
|
4574
|
+
return {
|
|
4575
|
+
translatedVtt: concatenateVttSegments([leftResult.translatedVtt, rightResult.translatedVtt]),
|
|
4576
|
+
usage: aggregateTokenUsage([leftResult.usage, rightResult.usage])
|
|
4577
|
+
};
|
|
4578
|
+
}
|
|
4579
|
+
}
|
|
4580
|
+
async function translateCaptionTrack({
|
|
4581
|
+
vttContent,
|
|
4582
|
+
assetDurationSeconds,
|
|
4583
|
+
fromLanguageCode,
|
|
4584
|
+
toLanguageCode,
|
|
4585
|
+
provider,
|
|
4586
|
+
modelId,
|
|
4587
|
+
credentials,
|
|
4588
|
+
chunking
|
|
4589
|
+
}) {
|
|
4590
|
+
"use step";
|
|
4591
|
+
const chunkPlan = buildTranslationChunkRequests(vttContent, assetDurationSeconds, chunking);
|
|
4592
|
+
if (!chunkPlan) {
|
|
4593
|
+
return translateVttWithAI({
|
|
4594
|
+
vttContent,
|
|
4595
|
+
fromLanguageCode,
|
|
4596
|
+
toLanguageCode,
|
|
4597
|
+
provider,
|
|
4598
|
+
modelId,
|
|
4599
|
+
credentials
|
|
4600
|
+
});
|
|
4601
|
+
}
|
|
4602
|
+
const resolvedChunking = resolveTranslationChunkingOptions(chunking);
|
|
4603
|
+
const translatedSegments = [];
|
|
4604
|
+
const usageByChunk = [];
|
|
4605
|
+
for (let index = 0; index < chunkPlan.chunks.length; index += resolvedChunking.maxConcurrentTranslations) {
|
|
4606
|
+
const batch = chunkPlan.chunks.slice(index, index + resolvedChunking.maxConcurrentTranslations);
|
|
4607
|
+
const batchResults = await Promise.all(
|
|
4608
|
+
batch.map(
|
|
4609
|
+
(chunk) => translateChunkWithFallback({
|
|
4610
|
+
chunk,
|
|
4611
|
+
fromLanguageCode,
|
|
4612
|
+
toLanguageCode,
|
|
4613
|
+
provider,
|
|
4614
|
+
modelId,
|
|
4615
|
+
credentials
|
|
4616
|
+
})
|
|
4617
|
+
)
|
|
4618
|
+
);
|
|
4619
|
+
translatedSegments.push(...batchResults.map((result) => result.translatedVtt));
|
|
4620
|
+
usageByChunk.push(...batchResults.map((result) => result.usage));
|
|
4621
|
+
}
|
|
4622
|
+
return {
|
|
4623
|
+
translatedVtt: concatenateVttSegments(translatedSegments, chunkPlan.preamble),
|
|
4624
|
+
usage: aggregateTokenUsage(usageByChunk)
|
|
4625
|
+
};
|
|
4626
|
+
}
|
|
3930
4627
|
async function uploadVttToS3({
|
|
3931
4628
|
translatedVtt,
|
|
3932
4629
|
assetId,
|
|
@@ -3987,7 +4684,8 @@ async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, opti
|
|
|
3987
4684
|
s3Bucket: providedS3Bucket,
|
|
3988
4685
|
uploadToMux: uploadToMuxOption,
|
|
3989
4686
|
storageAdapter,
|
|
3990
|
-
credentials: providedCredentials
|
|
4687
|
+
credentials: providedCredentials,
|
|
4688
|
+
chunking
|
|
3991
4689
|
} = options;
|
|
3992
4690
|
const credentials = providedCredentials;
|
|
3993
4691
|
const effectiveStorageAdapter = storageAdapter;
|
|
@@ -4048,13 +4746,15 @@ async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, opti
|
|
|
4048
4746
|
let translatedVtt;
|
|
4049
4747
|
let usage;
|
|
4050
4748
|
try {
|
|
4051
|
-
const result = await
|
|
4749
|
+
const result = await translateCaptionTrack({
|
|
4052
4750
|
vttContent,
|
|
4751
|
+
assetDurationSeconds,
|
|
4053
4752
|
fromLanguageCode,
|
|
4054
4753
|
toLanguageCode,
|
|
4055
4754
|
provider: modelConfig.provider,
|
|
4056
4755
|
modelId: modelConfig.modelId,
|
|
4057
|
-
credentials
|
|
4756
|
+
credentials,
|
|
4757
|
+
chunking
|
|
4058
4758
|
});
|
|
4059
4759
|
translatedVtt = result.translatedVtt;
|
|
4060
4760
|
usage = result.usage;
|
|
@@ -4127,6 +4827,7 @@ export {
|
|
|
4127
4827
|
HIVE_SEXUAL_CATEGORIES,
|
|
4128
4828
|
HIVE_VIOLENCE_CATEGORIES,
|
|
4129
4829
|
SUMMARY_KEYWORD_LIMIT,
|
|
4830
|
+
aggregateTokenUsage,
|
|
4130
4831
|
askQuestions,
|
|
4131
4832
|
burnedInCaptionsSchema,
|
|
4132
4833
|
chapterSchema,
|
|
@@ -4138,6 +4839,7 @@ export {
|
|
|
4138
4839
|
getSummaryAndTags,
|
|
4139
4840
|
hasBurnedInCaptions,
|
|
4140
4841
|
questionAnswerSchema,
|
|
4842
|
+
shouldSplitChunkTranslationError,
|
|
4141
4843
|
summarySchema,
|
|
4142
4844
|
translateAudio,
|
|
4143
4845
|
translateCaptions,
|