npm - @mux/ai - Versions diffs - 0.8.2 → 0.10.0 - Mend

@mux/ai 0.8.2 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/{index-Nxf6BaBO.d.ts → index-C8-E3VR9.d.ts} +59 -4
package/dist/{index-DP02N3iR.d.ts → index-CA7bG50u.d.ts} +41 -2
package/dist/index.d.ts +3 -3
package/dist/index.js +908 -199
package/dist/index.js.map +1 -1
package/dist/primitives/index.d.ts +1 -1
package/dist/primitives/index.js +336 -14
package/dist/primitives/index.js.map +1 -1
package/dist/workflows/index.d.ts +1 -1
package/dist/workflows/index.js +900 -198
package/dist/workflows/index.js.map +1 -1
package/package.json +1 -1

package/dist/workflows/index.js CHANGED Viewed

@@ -837,6 +837,12 @@ function createToneSection(instruction) {
     content: instruction
   };
 }
+function createLanguageSection(languageName) {
+  return {
+    tag: "language",
+    content: `All output (title, description, keywords, chapter titles) MUST be written in ${languageName}.`
+  };
+}
 // src/lib/retry.ts
 var DEFAULT_RETRY_OPTIONS = {
@@ -981,24 +987,82 @@ function findCaptionTrack(asset, languageCode) {
     (track) => track.text_type === "subtitles" && track.language_code === languageCode
   );
 }
+function normalizeLineEndings(value) {
+  return value.replace(/\r\n/g, "\n");
+}
+function isTimingLine(line) {
+  return line.includes("-->");
+}
+function parseNumericCueIdentifier(line) {
+  if (!/^\d+$/.test(line)) {
+    return null;
+  }
+  return Number.parseInt(line, 10);
+}
+function isLikelyTitledCueIdentifier(line) {
+  return /^\d+\s+-\s+\S.*$/.test(line);
+}
+function isLikelyCueIdentifier({
+  line,
+  nextLine,
+  previousCueIdentifier
+}) {
+  if (!line || !nextLine || !isTimingLine(nextLine)) {
+    return false;
+  }
+  const numericIdentifier = parseNumericCueIdentifier(line);
+  if (numericIdentifier !== null) {
+    if (previousCueIdentifier === null || previousCueIdentifier === void 0) {
+      return numericIdentifier === 1;
+    }
+    return numericIdentifier === previousCueIdentifier + 1;
+  }
+  return isLikelyTitledCueIdentifier(line);
+}
+function getCueIdentifierLineIndex(lines, timingLineIndex, previousCueIdentifier) {
+  const identifierIndex = timingLineIndex - 1;
+  if (identifierIndex < 0) {
+    return -1;
+  }
+  const candidate = lines[identifierIndex].trim();
+  if (!candidate || isTimingLine(candidate)) {
+    return -1;
+  }
+  return isLikelyCueIdentifier({
+    line: candidate,
+    nextLine: lines[timingLineIndex]?.trim(),
+    previousCueIdentifier
+  }) ? identifierIndex : -1;
+}
 function extractTextFromVTT(vttContent) {
   if (!vttContent.trim()) {
     return "";
   }
   const lines = vttContent.split("\n");
   const textLines = [];
+  let previousCueIdentifier = null;
+  let isInsideNoteBlock = false;
   for (let i = 0; i < lines.length; i++) {
     const line = lines[i].trim();
-    if (!line)
+    const nextLine = lines[i + 1]?.trim();
+    if (!line) {
+      isInsideNoteBlock = false;
+      continue;
+    }
+    if (isInsideNoteBlock)
       continue;
     if (line === "WEBVTT")
       continue;
-    if (line.startsWith("NOTE "))
+    if (line === "NOTE" || line.startsWith("NOTE ")) {
+      isInsideNoteBlock = true;
       continue;
-    if (line.includes("-->"))
+    }
+    if (isTimingLine(line))
       continue;
-    if (/^[\w-]+$/.test(line) && !line.includes(" "))
+    if (isLikelyCueIdentifier({ line, nextLine, previousCueIdentifier })) {
+      previousCueIdentifier = parseNumericCueIdentifier(line);
       continue;
+    }
     if (line.startsWith("STYLE") || line.startsWith("REGION"))
       continue;
     const cleanLine = line.replace(/<[^>]*>/g, "").trim();
@@ -1047,20 +1111,34 @@ function parseVTTCues(vttContent) {
     return [];
   const lines = vttContent.split("\n");
   const cues = [];
+  let previousCueIdentifier = null;
   for (let i = 0; i < lines.length; i++) {
     const line = lines[i].trim();
-    if (line.includes("-->")) {
+    if (isTimingLine(line)) {
       const [startStr, endStr] = line.split(" --> ").map((s) => s.trim());
       const startTime = vttTimestampToSeconds(startStr);
       const endTime = vttTimestampToSeconds(endStr.split(" ")[0]);
-      const textLines = [];
+      const currentCueIdentifierLine = lines[i - 1]?.trim() ?? "";
+      const currentCueIdentifier = isLikelyCueIdentifier({
+        line: currentCueIdentifierLine,
+        nextLine: line,
+        previousCueIdentifier
+      }) ? parseNumericCueIdentifier(currentCueIdentifierLine) : null;
+      const rawTextLines = [];
       let j = i + 1;
-      while (j < lines.length && lines[j].trim() && !lines[j].includes("-->")) {
-        const cleanLine = lines[j].trim().replace(/<[^>]*>/g, "");
-        if (cleanLine)
-          textLines.push(cleanLine);
+      while (j < lines.length && lines[j].trim() && !isTimingLine(lines[j].trim())) {
+        rawTextLines.push(lines[j].trim());
         j++;
       }
+      const trailingNumericLine = parseNumericCueIdentifier(rawTextLines.at(-1) ?? "");
+      if (trailingNumericLine !== null && isLikelyCueIdentifier({
+        line: rawTextLines.at(-1) ?? "",
+        nextLine: lines[j]?.trim(),
+        previousCueIdentifier: currentCueIdentifier
+      }) && rawTextLines.length > 1) {
+        rawTextLines.pop();
+      }
+      const textLines = rawTextLines.map((textLine) => textLine.replace(/<[^>]*>/g, "")).filter(Boolean);
       if (textLines.length > 0) {
         cues.push({
           startTime,
@@ -1068,10 +1146,102 @@ function parseVTTCues(vttContent) {
           text: textLines.join(" ")
         });
       }
+      previousCueIdentifier = currentCueIdentifier;
     }
   }
   return cues;
 }
+function splitVttPreambleAndCueBlocks(vttContent) {
+  const normalizedContent = normalizeLineEndings(vttContent).trim();
+  if (!normalizedContent) {
+    return {
+      preamble: "WEBVTT",
+      cueBlocks: []
+    };
+  }
+  const rawBlocks = normalizedContent.split(/\n{2,}/).map((block) => block.trim()).filter(Boolean);
+  const cueBlockStartIndex = rawBlocks.findIndex((block) => block.includes("-->"));
+  if (cueBlockStartIndex === -1) {
+    return {
+      preamble: normalizedContent.startsWith("WEBVTT") ? normalizedContent : `WEBVTT
+${normalizedContent}`,
+      cueBlocks: []
+    };
+  }
+  const hasMergedCueBlocks = rawBlocks.slice(cueBlockStartIndex).some((block) => (block.match(/-->/g) ?? []).length > 1);
+  if (hasMergedCueBlocks) {
+    const lines = normalizedContent.split("\n");
+    const timingLineIndices = lines.map((line, index) => isTimingLine(line.trim()) ? index : -1).filter((index) => index >= 0);
+    let previousCueIdentifier = null;
+    const firstCueStartIndex = getCueIdentifierLineIndex(lines, timingLineIndices[0], previousCueIdentifier);
+    const preambleEndIndex = firstCueStartIndex >= 0 ? firstCueStartIndex : timingLineIndices[0];
+    const preamble2 = lines.slice(0, preambleEndIndex).join("\n").trim() || "WEBVTT";
+    const cueBlocks2 = timingLineIndices.map((timingLineIndex, index) => {
+      const cueIdentifierLineIndex = getCueIdentifierLineIndex(lines, timingLineIndex, previousCueIdentifier);
+      const cueStartIndex = cueIdentifierLineIndex >= 0 ? cueIdentifierLineIndex : timingLineIndex;
+      const currentCueIdentifier = cueIdentifierLineIndex >= 0 ? parseNumericCueIdentifier(lines[cueIdentifierLineIndex].trim()) : null;
+      const nextTimingLineIndex = timingLineIndices[index + 1] ?? lines.length;
+      let cueEndIndex = nextTimingLineIndex - 1;
+      while (cueEndIndex > timingLineIndex && !lines[cueEndIndex].trim()) {
+        cueEndIndex--;
+      }
+      const nextCueIdentifierLineIndex = index < timingLineIndices.length - 1 ? getCueIdentifierLineIndex(lines, nextTimingLineIndex, currentCueIdentifier) : -1;
+      if (nextCueIdentifierLineIndex === cueEndIndex) {
+        cueEndIndex--;
+      }
+      while (cueEndIndex > timingLineIndex && !lines[cueEndIndex].trim()) {
+        cueEndIndex--;
+      }
+      previousCueIdentifier = currentCueIdentifier;
+      return lines.slice(cueStartIndex, cueEndIndex + 1).join("\n").trim();
+    });
+    return {
+      preamble: preamble2,
+      cueBlocks: cueBlocks2
+    };
+  }
+  const preambleBlocks = rawBlocks.slice(0, cueBlockStartIndex);
+  const cueBlocks = rawBlocks.slice(cueBlockStartIndex);
+  const preamble = preambleBlocks.length > 0 ? preambleBlocks.join("\n\n") : "WEBVTT";
+  return {
+    preamble,
+    cueBlocks
+  };
+}
+function buildVttFromCueBlocks(cueBlocks, preamble = "WEBVTT") {
+  if (cueBlocks.length === 0) {
+    return `${preamble.trim()}
+`;
+  }
+  return `${preamble.trim()}
+${cueBlocks.map((block) => block.trim()).join("\n\n")}
+`;
+}
+function replaceCueText(cueBlock, translatedText) {
+  const lines = normalizeLineEndings(cueBlock).split("\n").map((line) => line.trim()).filter(Boolean);
+  const timingLineIndex = lines.findIndex((line) => line.includes("-->"));
+  if (timingLineIndex === -1) {
+    throw new Error("Cue block is missing a timestamp line");
+  }
+  const headerLines = lines.slice(0, timingLineIndex + 1);
+  const translatedLines = normalizeLineEndings(translatedText).split("\n").map((line) => line.trim()).filter(Boolean);
+  return [...headerLines, ...translatedLines].join("\n");
+}
+function buildVttFromTranslatedCueBlocks(cueBlocks, translatedTexts, preamble = "WEBVTT") {
+  if (cueBlocks.length !== translatedTexts.length) {
+    throw new Error(`Expected ${cueBlocks.length} translated cues, received ${translatedTexts.length}`);
+  }
+  return buildVttFromCueBlocks(
+    cueBlocks.map((cueBlock, index) => replaceCueText(cueBlock, translatedTexts[index])),
+    preamble
+  );
+}
+function concatenateVttSegments(segments, preamble = "WEBVTT") {
+  const cueBlocks = segments.flatMap((segment) => splitVttPreambleAndCueBlocks(segment).cueBlocks);
+  return buildVttFromCueBlocks(cueBlocks, preamble);
+}
 async function buildTranscriptUrl(playbackId, trackId, shouldSign = false, credentials) {
   "use step";
   const baseUrl = `https://stream.mux.com/${playbackId}/text/${trackId}.vtt`;
@@ -1200,6 +1370,7 @@ var SYSTEM_PROMPT = dedent`
     - Only describe observable evidence from frames or transcript
     - Do not fabricate details or make unsupported assumptions
     - Return structured data matching the requested schema exactly
+    - Provide reasoning in the same language as the question
   </constraints>
   <language_guidelines>
@@ -1600,6 +1771,166 @@ async function hasBurnedInCaptions(assetId, options = {}) {
 import { generateText as generateText3, Output as Output3 } from "ai";
 import dedent3 from "dedent";
 import { z as z4 } from "zod";
+// src/lib/language-codes.ts
+var ISO639_1_TO_3 = {
+  // Major world languages
+  en: "eng",
+  // English
+  es: "spa",
+  // Spanish
+  fr: "fra",
+  // French
+  de: "deu",
+  // German
+  it: "ita",
+  // Italian
+  pt: "por",
+  // Portuguese
+  ru: "rus",
+  // Russian
+  zh: "zho",
+  // Chinese
+  ja: "jpn",
+  // Japanese
+  ko: "kor",
+  // Korean
+  ar: "ara",
+  // Arabic
+  hi: "hin",
+  // Hindi
+  // European languages
+  nl: "nld",
+  // Dutch
+  pl: "pol",
+  // Polish
+  sv: "swe",
+  // Swedish
+  da: "dan",
+  // Danish
+  no: "nor",
+  // Norwegian
+  fi: "fin",
+  // Finnish
+  el: "ell",
+  // Greek
+  cs: "ces",
+  // Czech
+  hu: "hun",
+  // Hungarian
+  ro: "ron",
+  // Romanian
+  bg: "bul",
+  // Bulgarian
+  hr: "hrv",
+  // Croatian
+  sk: "slk",
+  // Slovak
+  sl: "slv",
+  // Slovenian
+  uk: "ukr",
+  // Ukrainian
+  tr: "tur",
+  // Turkish
+  // Asian languages
+  th: "tha",
+  // Thai
+  vi: "vie",
+  // Vietnamese
+  id: "ind",
+  // Indonesian
+  ms: "msa",
+  // Malay
+  tl: "tgl",
+  // Tagalog/Filipino
+  // Other languages
+  he: "heb",
+  // Hebrew
+  fa: "fas",
+  // Persian/Farsi
+  bn: "ben",
+  // Bengali
+  ta: "tam",
+  // Tamil
+  te: "tel",
+  // Telugu
+  mr: "mar",
+  // Marathi
+  gu: "guj",
+  // Gujarati
+  kn: "kan",
+  // Kannada
+  ml: "mal",
+  // Malayalam
+  pa: "pan",
+  // Punjabi
+  ur: "urd",
+  // Urdu
+  sw: "swa",
+  // Swahili
+  af: "afr",
+  // Afrikaans
+  ca: "cat",
+  // Catalan
+  eu: "eus",
+  // Basque
+  gl: "glg",
+  // Galician
+  is: "isl",
+  // Icelandic
+  et: "est",
+  // Estonian
+  lv: "lav",
+  // Latvian
+  lt: "lit"
+  // Lithuanian
+};
+var ISO639_3_TO_1 = Object.fromEntries(
+  Object.entries(ISO639_1_TO_3).map(([iso1, iso3]) => [iso3, iso1])
+);
+function toISO639_3(code) {
+  const normalized = code.toLowerCase().trim();
+  if (normalized.length === 3) {
+    return normalized;
+  }
+  return ISO639_1_TO_3[normalized] ?? normalized;
+}
+function toISO639_1(code) {
+  const normalized = code.toLowerCase().trim();
+  if (normalized.length === 2) {
+    return normalized;
+  }
+  return ISO639_3_TO_1[normalized] ?? normalized;
+}
+function getLanguageCodePair(code) {
+  const normalized = code.toLowerCase().trim();
+  if (normalized.length === 2) {
+    return {
+      iso639_1: normalized,
+      iso639_3: toISO639_3(normalized)
+    };
+  } else if (normalized.length === 3) {
+    return {
+      iso639_1: toISO639_1(normalized),
+      iso639_3: normalized
+    };
+  }
+  return {
+    iso639_1: normalized,
+    iso639_3: normalized
+  };
+}
+function getLanguageName(code) {
+  const iso639_1 = toISO639_1(code);
+  try {
+    const displayNames = new Intl.DisplayNames(["en"], { type: "language" });
+    return displayNames.of(iso639_1) ?? code.toUpperCase();
+  } catch {
+    return code.toUpperCase();
+  }
+}
+// src/workflows/chapters.ts
 var chapterSchema = z4.object({
   startTime: z4.number(),
   title: z4.string()
@@ -1660,7 +1991,8 @@ var chapterSystemPromptBuilder = createPromptBuilder({
       content: dedent3`
         - Only use information present in the transcript
         - Return structured data that matches the requested JSON schema
-        - Do not add commentary or extra text outside the JSON`
+        - Do not add commentary or extra text outside the JSON
+        - When a <language> section is provided, all chapter titles MUST be written in that language`
     },
     qualityGuidelines: {
       tag: "quality_guidelines",
@@ -1708,7 +2040,7 @@ var chaptersPromptBuilder = createPromptBuilder({
       content: dedent3`
         - Keep titles concise and descriptive
         - Avoid filler or generic labels like "Chapter 1"
-        - Use the transcript's language and terminology`
+        - Use the transcript's terminology`
     }
   },
   sectionOrder: ["task", "outputFormat", "chapterGuidelines", "titleGuidelines"]
@@ -1717,7 +2049,8 @@ function buildUserPrompt3({
   timestampedTranscript,
   promptOverrides,
   minChaptersPerHour = 3,
-  maxChaptersPerHour = 8
+  maxChaptersPerHour = 8,
+  languageName
 }) {
   const contextSections = [
     {
@@ -1726,6 +2059,9 @@ function buildUserPrompt3({
       attributes: { format: "seconds" }
     }
   ];
+  if (languageName) {
+    contextSections.push(createLanguageSection(languageName));
+  }
   const dynamicChapterGuidelines = dedent3`
     - Create at least ${minChaptersPerHour} and at most ${maxChaptersPerHour} chapters per hour of content
     - Use start times in seconds (not HH:MM:SS)
@@ -1745,7 +2081,8 @@ async function generateChapters(assetId, languageCode, options = {}) {
     promptOverrides,
     minChaptersPerHour,
     maxChaptersPerHour,
-    credentials
+    credentials,
+    outputLanguageCode
   } = options;
   const modelConfig = resolveLanguageModelConfig({
     ...options,
@@ -1789,11 +2126,14 @@ async function generateChapters(assetId, languageCode, options = {}) {
     const contentLabel = isAudioOnly ? "transcript" : "caption track";
     throw new Error(`No usable content found in ${contentLabel}`);
   }
+  const resolvedLanguageCode = outputLanguageCode && outputLanguageCode !== "auto" ? outputLanguageCode : transcriptResult.track?.language_code ?? languageCode;
+  const languageName = resolvedLanguageCode ? getLanguageName(resolvedLanguageCode) : void 0;
   const userPrompt = buildUserPrompt3({
     timestampedTranscript,
     promptOverrides,
     minChaptersPerHour,
-    maxChaptersPerHour
+    maxChaptersPerHour,
+    languageName
   });
   let chaptersData = null;
   try {
@@ -1840,6 +2180,14 @@ async function generateChapters(assetId, languageCode, options = {}) {
 import { embed } from "ai";
 // src/primitives/text-chunking.ts
+var DEFAULT_MIN_CHUNK_DURATION_RATIO = 2 / 3;
+var DEFAULT_BOUNDARY_LOOKAHEAD_CUES = 12;
+var DEFAULT_BOUNDARY_PAUSE_SECONDS = 1.25;
+var STRONG_BOUNDARY_SCORE = 4;
+var PREFERRED_BOUNDARY_WINDOW_SECONDS = 5 * 60;
+var SENTENCE_BOUNDARY_REGEX = /[.!?]["')\]]*$/;
+var CLAUSE_BOUNDARY_REGEX = /[,;:]["')\]]*$/;
+var NEXT_SENTENCE_START_REGEX = /^[A-Z0-9"'([{]/;
 function estimateTokenCount(text) {
   const words = text.trim().split(/\s+/).length;
   return Math.ceil(words / 0.75);
@@ -1912,6 +2260,151 @@ function chunkVTTCues(cues, maxTokens, overlapCues = 2) {
   }
   return chunks;
 }
+function scoreCueBoundary(cues, index, boundaryPauseSeconds) {
+  const cue = cues[index];
+  const nextCue = cues[index + 1];
+  if (!nextCue) {
+    return Number.POSITIVE_INFINITY;
+  }
+  const trimmedText = cue.text.trim();
+  let score = 0;
+  if (SENTENCE_BOUNDARY_REGEX.test(trimmedText)) {
+    score += 4;
+  } else if (CLAUSE_BOUNDARY_REGEX.test(trimmedText)) {
+    score += 2;
+  }
+  if (nextCue.startTime - cue.endTime >= boundaryPauseSeconds) {
+    score += 2;
+  }
+  if (NEXT_SENTENCE_START_REGEX.test(nextCue.text.trim())) {
+    score += 1;
+  }
+  return score;
+}
+function chunkVTTCuesByBudget(cues, options) {
+  if (cues.length === 0) {
+    return [];
+  }
+  const maxCuesPerChunk = Math.max(1, options.maxCuesPerChunk);
+  let maxTextTokensPerChunk = Number.POSITIVE_INFINITY;
+  if (options.maxTextTokensPerChunk) {
+    maxTextTokensPerChunk = Math.max(1, options.maxTextTokensPerChunk);
+  }
+  const chunks = [];
+  let chunkIndex = 0;
+  let cueStartIndex = 0;
+  let currentTokenCount = 0;
+  for (let cueIndex = 0; cueIndex < cues.length; cueIndex++) {
+    const cue = cues[cueIndex];
+    const cueTokenCount = estimateTokenCount(cue.text);
+    const currentCueCount = cueIndex - cueStartIndex;
+    const wouldExceedCueCount = currentCueCount >= maxCuesPerChunk;
+    const wouldExceedTokenCount = currentCueCount > 0 && currentTokenCount + cueTokenCount > maxTextTokensPerChunk;
+    if (wouldExceedCueCount || wouldExceedTokenCount) {
+      chunks.push({
+        id: `chunk-${chunkIndex}`,
+        cueStartIndex,
+        cueEndIndex: cueIndex - 1,
+        cueCount: cueIndex - cueStartIndex,
+        startTime: cues[cueStartIndex].startTime,
+        endTime: cues[cueIndex - 1].endTime
+      });
+      cueStartIndex = cueIndex;
+      currentTokenCount = 0;
+      chunkIndex++;
+    }
+    currentTokenCount += cueTokenCount;
+  }
+  chunks.push({
+    id: `chunk-${chunkIndex}`,
+    cueStartIndex,
+    cueEndIndex: cues.length - 1,
+    cueCount: cues.length - cueStartIndex,
+    startTime: cues[cueStartIndex].startTime,
+    endTime: cues[cues.length - 1].endTime
+  });
+  return chunks;
+}
+function chunkVTTCuesByDuration(cues, options) {
+  if (cues.length === 0) {
+    return [];
+  }
+  const targetChunkDurationSeconds = Math.max(1, options.targetChunkDurationSeconds);
+  const maxChunkDurationSeconds = Math.max(targetChunkDurationSeconds, options.maxChunkDurationSeconds);
+  const minChunkDurationSeconds = Math.min(
+    targetChunkDurationSeconds,
+    Math.max(
+      1,
+      options.minChunkDurationSeconds ?? Math.floor(targetChunkDurationSeconds * DEFAULT_MIN_CHUNK_DURATION_RATIO)
+    )
+  );
+  const boundaryLookaheadCues = Math.max(1, options.boundaryLookaheadCues ?? DEFAULT_BOUNDARY_LOOKAHEAD_CUES);
+  const boundaryPauseSeconds = options.boundaryPauseSeconds ?? DEFAULT_BOUNDARY_PAUSE_SECONDS;
+  const preferredBoundaryStartSeconds = Math.max(
+    minChunkDurationSeconds,
+    targetChunkDurationSeconds - Math.min(PREFERRED_BOUNDARY_WINDOW_SECONDS, targetChunkDurationSeconds / 6)
+  );
+  const chunks = [];
+  let chunkIndex = 0;
+  let cueStartIndex = 0;
+  while (cueStartIndex < cues.length) {
+    const chunkStartTime = cues[cueStartIndex].startTime;
+    let cueEndIndex = cueStartIndex;
+    let bestBoundaryIndex = -1;
+    let bestBoundaryScore = -1;
+    let bestPreferredBoundaryIndex = -1;
+    let bestPreferredBoundaryScore = -1;
+    while (cueEndIndex < cues.length) {
+      const cue = cues[cueEndIndex];
+      const currentDuration = cue.endTime - chunkStartTime;
+      if (currentDuration >= minChunkDurationSeconds) {
+        const boundaryScore = scoreCueBoundary(cues, cueEndIndex, boundaryPauseSeconds);
+        if (boundaryScore >= bestBoundaryScore) {
+          bestBoundaryIndex = cueEndIndex;
+          bestBoundaryScore = boundaryScore;
+        }
+        if (currentDuration >= preferredBoundaryStartSeconds && boundaryScore >= bestPreferredBoundaryScore) {
+          bestPreferredBoundaryIndex = cueEndIndex;
+          bestPreferredBoundaryScore = boundaryScore;
+        }
+      }
+      const nextCue = cues[cueEndIndex + 1];
+      if (!nextCue) {
+        break;
+      }
+      const nextDuration = nextCue.endTime - chunkStartTime;
+      const lookaheadExceeded = cueEndIndex - cueStartIndex >= boundaryLookaheadCues;
+      const preferredBoundaryIndex = bestPreferredBoundaryIndex >= cueStartIndex ? bestPreferredBoundaryIndex : bestBoundaryIndex;
+      const preferredBoundaryScore = bestPreferredBoundaryIndex >= cueStartIndex ? bestPreferredBoundaryScore : bestBoundaryScore;
+      if (currentDuration >= targetChunkDurationSeconds) {
+        if (preferredBoundaryIndex >= cueStartIndex && preferredBoundaryScore >= STRONG_BOUNDARY_SCORE) {
+          cueEndIndex = preferredBoundaryIndex;
+          break;
+        }
+        if (nextDuration > maxChunkDurationSeconds || lookaheadExceeded) {
+          cueEndIndex = preferredBoundaryIndex >= cueStartIndex ? preferredBoundaryIndex : cueEndIndex;
+          break;
+        }
+      }
+      if (nextDuration > maxChunkDurationSeconds) {
+        cueEndIndex = preferredBoundaryIndex >= cueStartIndex ? preferredBoundaryIndex : cueEndIndex;
+        break;
+      }
+      cueEndIndex++;
+    }
+    chunks.push({
+      id: `chunk-${chunkIndex}`,
+      cueStartIndex,
+      cueEndIndex,
+      cueCount: cueEndIndex - cueStartIndex + 1,
+      startTime: cues[cueStartIndex].startTime,
+      endTime: cues[cueEndIndex].endTime
+    });
+    cueStartIndex = cueEndIndex + 1;
+    chunkIndex++;
+  }
+  return chunks;
+}
 function chunkText(text, strategy) {
   switch (strategy.type) {
     case "token": {
@@ -2167,10 +2660,8 @@ async function getThumbnailUrls(playbackId, duration, options = {}) {
   }
   const baseUrl = getMuxThumbnailBaseUrl(playbackId);
   const urlPromises = timestamps.map(async (time) => {
-    if (shouldSign) {
-      return signUrl(baseUrl, playbackId, "thumbnail", { time, width }, credentials);
-    }
-    return `${baseUrl}?time=${time}&width=${width}`;
+    const url = shouldSign ? await signUrl(baseUrl, playbackId, "thumbnail", { time, width }, credentials) : `${baseUrl}?time=${time}&width=${width}`;
+    return { url, time };
   });
   return Promise.all(urlPromises);
 }
@@ -2244,6 +2735,7 @@ async function moderateImageWithOpenAI(entry) {
     const categoryScores = json.results?.[0]?.category_scores || {};
     return {
       url: entry.url,
+      time: entry.time,
       sexual: categoryScores.sexual || 0,
       violence: categoryScores.violence || 0,
       error: false
@@ -2252,6 +2744,7 @@ async function moderateImageWithOpenAI(entry) {
     console.error("OpenAI moderation failed:", error);
     return {
       url: entry.url,
+      time: entry.time,
       sexual: 0,
       violence: 0,
       error: true,
@@ -2259,11 +2752,13 @@ async function moderateImageWithOpenAI(entry) {
     };
   }
 }
-async function requestOpenAIModeration(imageUrls, model, maxConcurrent = 5, submissionMode = "url", downloadOptions, credentials) {
+async function requestOpenAIModeration(images, model, maxConcurrent = 5, submissionMode = "url", downloadOptions, credentials) {
   "use step";
+  const imageUrls = images.map((img) => img.url);
+  const timeByUrl = new Map(images.map((img) => [img.url, img.time]));
   const targetUrls = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map(
-    (img) => ({ url: img.url, image: img.base64Data, model, credentials })
-  ) : imageUrls.map((url) => ({ url, image: url, model, credentials }));
+    (img) => ({ url: img.url, time: timeByUrl.get(img.url), image: img.base64Data, model, credentials })
+  ) : images.map((img) => ({ url: img.url, time: img.time, image: img.url, model, credentials }));
   return processConcurrently(targetUrls, moderateImageWithOpenAI, maxConcurrent);
 }
 async function requestOpenAITextModeration(text, model, url, credentials) {
@@ -2408,6 +2903,7 @@ async function moderateImageWithHive(entry) {
     const violence = getHiveCategoryScores(classes, HIVE_VIOLENCE_CATEGORIES);
     return {
       url: entry.url,
+      time: entry.time,
       sexual,
       violence,
       error: false
@@ -2415,6 +2911,7 @@ async function moderateImageWithHive(entry) {
   } catch (error) {
     return {
       url: entry.url,
+      time: entry.time,
       sexual: 0,
       violence: 0,
       error: true,
@@ -2422,19 +2919,23 @@ async function moderateImageWithHive(entry) {
     };
   }
 }
-async function requestHiveModeration(imageUrls, maxConcurrent = 5, submissionMode = "url", downloadOptions, credentials) {
+async function requestHiveModeration(images, maxConcurrent = 5, submissionMode = "url", downloadOptions, credentials) {
   "use step";
+  const imageUrls = images.map((img) => img.url);
+  const timeByUrl = new Map(images.map((img) => [img.url, img.time]));
   const targets = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map((img) => ({
     url: img.url,
+    time: timeByUrl.get(img.url),
     source: {
       kind: "file",
       buffer: img.buffer,
       contentType: img.contentType
     },
     credentials
-  })) : imageUrls.map((url) => ({
-    url,
-    source: { kind: "url", value: url },
+  })) : images.map((img) => ({
+    url: img.url,
+    time: img.time,
+    source: { kind: "url", value: img.url },
     credentials
   }));
   return await processConcurrently(targets, moderateImageWithHive, maxConcurrent);
@@ -2445,10 +2946,8 @@ async function getThumbnailUrlsFromTimestamps(playbackId, timestampsMs, options)
   const baseUrl = getMuxThumbnailBaseUrl(playbackId);
   const urlPromises = timestampsMs.map(async (tsMs) => {
     const time = Number((tsMs / 1e3).toFixed(2));
-    if (shouldSign) {
-      return signUrl(baseUrl, playbackId, "thumbnail", { time, width }, credentials);
-    }
-    return `${baseUrl}?time=${time}&width=${width}`;
+    const url = shouldSign ? await signUrl(baseUrl, playbackId, "thumbnail", { time, width }, credentials) : `${baseUrl}?time=${time}&width=${width}`;
+    return { url, time };
   });
   return Promise.all(urlPromises);
 }
@@ -2752,6 +3251,7 @@ var SYSTEM_PROMPT3 = dedent4`
     - Do not fabricate details or make unsupported assumptions
     - Return structured data matching the requested schema
     - Output only the JSON object; no markdown or extra text
+    - When a <language> section is provided, all output text MUST be written in that language
   </constraints>
   <tone_guidance>
@@ -2806,6 +3306,7 @@ var AUDIO_ONLY_SYSTEM_PROMPT = dedent4`
     - Return structured data matching the requested schema
     - Focus entirely on audio/spoken content - there are no visual elements
     - Output only the JSON object; no markdown or extra text
+    - When a <language> section is provided, all output text MUST be written in that language
   </constraints>
   <tone_guidance>
@@ -2836,9 +3337,13 @@ function buildUserPrompt4({
   isAudioOnly = false,
   titleLength,
   descriptionLength,
-  tagCount
+  tagCount,
+  languageName
 }) {
   const contextSections = [createToneSection(TONE_INSTRUCTIONS[tone])];
+  if (languageName) {
+    contextSections.push(createLanguageSection(languageName));
+  }
   if (transcriptText) {
     const format = isCleanTranscript ? "plain text" : "WebVTT";
     contextSections.push(createTranscriptSection(transcriptText, format));
@@ -2951,7 +3456,8 @@ async function getSummaryAndTags(assetId, options) {
     credentials,
     titleLength,
     descriptionLength,
-    tagCount
+    tagCount,
+    outputLanguageCode
   } = options ?? {};
   if (!VALID_TONES.includes(tone)) {
     throw new Error(
@@ -2978,12 +3484,15 @@ async function getSummaryAndTags(assetId, options) {
       "Signed playback ID requires signing credentials. Set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
     );
   }
-  const transcriptText = includeTranscript ? (await fetchTranscriptForAsset(assetData, playbackId, {
+  const transcriptResult = includeTranscript ? await fetchTranscriptForAsset(assetData, playbackId, {
     cleanTranscript,
     shouldSign: policy === "signed",
     credentials: workflowCredentials,
     required: isAudioOnly
-  })).transcriptText : "";
+  }) : void 0;
+  const transcriptText = transcriptResult?.transcriptText ?? "";
+  const resolvedLanguageCode = outputLanguageCode && outputLanguageCode !== "auto" ? outputLanguageCode : transcriptResult?.track?.language_code ?? getReadyTextTracks(assetData)[0]?.language_code;
+  const languageName = resolvedLanguageCode ? getLanguageName(resolvedLanguageCode) : void 0;
   const userPrompt = buildUserPrompt4({
     tone,
     transcriptText,
@@ -2992,7 +3501,8 @@ async function getSummaryAndTags(assetId, options) {
     isAudioOnly,
     titleLength,
     descriptionLength,
-    tagCount
+    tagCount,
+    languageName
   });
   let analysisResponse;
   let imageUrl;
@@ -3062,164 +3572,6 @@ async function getSummaryAndTags(assetId, options) {
   };
 }
-// src/lib/language-codes.ts
-var ISO639_1_TO_3 = {
-  // Major world languages
-  en: "eng",
-  // English
-  es: "spa",
-  // Spanish
-  fr: "fra",
-  // French
-  de: "deu",
-  // German
-  it: "ita",
-  // Italian
-  pt: "por",
-  // Portuguese
-  ru: "rus",
-  // Russian
-  zh: "zho",
-  // Chinese
-  ja: "jpn",
-  // Japanese
-  ko: "kor",
-  // Korean
-  ar: "ara",
-  // Arabic
-  hi: "hin",
-  // Hindi
-  // European languages
-  nl: "nld",
-  // Dutch
-  pl: "pol",
-  // Polish
-  sv: "swe",
-  // Swedish
-  da: "dan",
-  // Danish
-  no: "nor",
-  // Norwegian
-  fi: "fin",
-  // Finnish
-  el: "ell",
-  // Greek
-  cs: "ces",
-  // Czech
-  hu: "hun",
-  // Hungarian
-  ro: "ron",
-  // Romanian
-  bg: "bul",
-  // Bulgarian
-  hr: "hrv",
-  // Croatian
-  sk: "slk",
-  // Slovak
-  sl: "slv",
-  // Slovenian
-  uk: "ukr",
-  // Ukrainian
-  tr: "tur",
-  // Turkish
-  // Asian languages
-  th: "tha",
-  // Thai
-  vi: "vie",
-  // Vietnamese
-  id: "ind",
-  // Indonesian
-  ms: "msa",
-  // Malay
-  tl: "tgl",
-  // Tagalog/Filipino
-  // Other languages
-  he: "heb",
-  // Hebrew
-  fa: "fas",
-  // Persian/Farsi
-  bn: "ben",
-  // Bengali
-  ta: "tam",
-  // Tamil
-  te: "tel",
-  // Telugu
-  mr: "mar",
-  // Marathi
-  gu: "guj",
-  // Gujarati
-  kn: "kan",
-  // Kannada
-  ml: "mal",
-  // Malayalam
-  pa: "pan",
-  // Punjabi
-  ur: "urd",
-  // Urdu
-  sw: "swa",
-  // Swahili
-  af: "afr",
-  // Afrikaans
-  ca: "cat",
-  // Catalan
-  eu: "eus",
-  // Basque
-  gl: "glg",
-  // Galician
-  is: "isl",
-  // Icelandic
-  et: "est",
-  // Estonian
-  lv: "lav",
-  // Latvian
-  lt: "lit"
-  // Lithuanian
-};
-var ISO639_3_TO_1 = Object.fromEntries(
-  Object.entries(ISO639_1_TO_3).map(([iso1, iso3]) => [iso3, iso1])
-);
-function toISO639_3(code) {
-  const normalized = code.toLowerCase().trim();
-  if (normalized.length === 3) {
-    return normalized;
-  }
-  return ISO639_1_TO_3[normalized] ?? normalized;
-}
-function toISO639_1(code) {
-  const normalized = code.toLowerCase().trim();
-  if (normalized.length === 2) {
-    return normalized;
-  }
-  return ISO639_3_TO_1[normalized] ?? normalized;
-}
-function getLanguageCodePair(code) {
-  const normalized = code.toLowerCase().trim();
-  if (normalized.length === 2) {
-    return {
-      iso639_1: normalized,
-      iso639_3: toISO639_3(normalized)
-    };
-  } else if (normalized.length === 3) {
-    return {
-      iso639_1: toISO639_1(normalized),
-      iso639_3: normalized
-    };
-  }
-  return {
-    iso639_1: normalized,
-    iso639_3: normalized
-  };
-}
-function getLanguageName(code) {
-  const iso639_1 = toISO639_1(code);
-  try {
-    const displayNames = new Intl.DisplayNames(["en"], { type: "language" });
-    return displayNames.of(iso639_1) ?? code.toUpperCase();
-  } catch {
-    return code.toUpperCase();
-  }
-}
 // src/lib/s3-sigv4.ts
 var AWS4_ALGORITHM = "AWS4-HMAC-SHA256";
 var AWS4_REQUEST_TERMINATOR = "aws4_request";
@@ -3876,12 +4228,187 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
 }
 // src/workflows/translate-captions.ts
-import { generateText as generateText5, Output as Output5 } from "ai";
+import {
+  APICallError,
+  generateText as generateText5,
+  NoObjectGeneratedError,
+  Output as Output5,
+  RetryError,
+  TypeValidationError
+} from "ai";
+import dedent5 from "dedent";
 import { z as z6 } from "zod";
 var translationSchema = z6.object({
   translation: z6.string()
 });
-var SYSTEM_PROMPT4 = 'You are a subtitle translation expert. Translate VTT subtitle files to the target language specified by the user. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT content.';
+var SYSTEM_PROMPT4 = dedent5`
+  You are a subtitle translation expert. Translate VTT subtitle files to the target language specified by the user.
+  You may receive either a full VTT file or a chunk from a larger VTT.
+  Preserve all timestamps, cue ordering, and VTT formatting exactly as they appear.
+  Return JSON with a single key "translation" containing the translated VTT content.
+`;
+var CUE_TRANSLATION_SYSTEM_PROMPT = dedent5`
+  You are a subtitle translation expert.
+  You will receive a sequence of subtitle cues extracted from a VTT file.
+  Translate the cues to the requested target language while preserving their original order.
+  Treat the cue list as continuous context so the translation reads naturally across adjacent lines.
+  Return JSON with a single key "translations" containing exactly one translated string for each input cue.
+  Do not merge, split, omit, reorder, or add cues.
+`;
+var DEFAULT_TRANSLATION_CHUNKING = {
+  enabled: true,
+  minimumAssetDurationSeconds: 30 * 60,
+  targetChunkDurationSeconds: 30 * 60,
+  maxConcurrentTranslations: 4,
+  maxCuesPerChunk: 80,
+  maxCueTextTokensPerChunk: 2e3
+};
+var TOKEN_USAGE_FIELDS = [
+  "inputTokens",
+  "outputTokens",
+  "totalTokens",
+  "reasoningTokens",
+  "cachedInputTokens"
+];
+var TranslationChunkValidationError = class extends Error {
+  constructor(message) {
+    super(message);
+    this.name = "TranslationChunkValidationError";
+  }
+};
+function isTranslationChunkValidationError(error) {
+  return error instanceof TranslationChunkValidationError;
+}
+function isProviderServiceError(error) {
+  if (!error) {
+    return false;
+  }
+  if (RetryError.isInstance(error)) {
+    return isProviderServiceError(error.lastError);
+  }
+  if (APICallError.isInstance(error)) {
+    return true;
+  }
+  if (error instanceof Error && "cause" in error) {
+    return isProviderServiceError(error.cause);
+  }
+  return false;
+}
+function shouldSplitChunkTranslationError(error) {
+  if (isProviderServiceError(error)) {
+    return false;
+  }
+  return NoObjectGeneratedError.isInstance(error) || TypeValidationError.isInstance(error) || isTranslationChunkValidationError(error);
+}
+function isDefinedTokenUsageValue(value) {
+  return typeof value === "number";
+}
+function resolveTranslationChunkingOptions(options) {
+  const targetChunkDurationSeconds = Math.max(
+    1,
+    options?.targetChunkDurationSeconds ?? DEFAULT_TRANSLATION_CHUNKING.targetChunkDurationSeconds
+  );
+  return {
+    enabled: options?.enabled ?? DEFAULT_TRANSLATION_CHUNKING.enabled,
+    minimumAssetDurationSeconds: Math.max(
+      1,
+      options?.minimumAssetDurationSeconds ?? DEFAULT_TRANSLATION_CHUNKING.minimumAssetDurationSeconds
+    ),
+    targetChunkDurationSeconds,
+    maxConcurrentTranslations: Math.max(
+      1,
+      options?.maxConcurrentTranslations ?? DEFAULT_TRANSLATION_CHUNKING.maxConcurrentTranslations
+    ),
+    maxCuesPerChunk: Math.max(
+      1,
+      options?.maxCuesPerChunk ?? DEFAULT_TRANSLATION_CHUNKING.maxCuesPerChunk
+    ),
+    maxCueTextTokensPerChunk: Math.max(
+      1,
+      options?.maxCueTextTokensPerChunk ?? DEFAULT_TRANSLATION_CHUNKING.maxCueTextTokensPerChunk
+    )
+  };
+}
+function aggregateTokenUsage(usages) {
+  return TOKEN_USAGE_FIELDS.reduce((aggregate, field) => {
+    const values = usages.map((usage) => usage[field]).filter(isDefinedTokenUsageValue);
+    if (values.length > 0) {
+      aggregate[field] = values.reduce((total, value) => total + value, 0);
+    }
+    return aggregate;
+  }, {});
+}
+function createTranslationChunkRequest(id, cues, cueBlocks) {
+  return {
+    id,
+    cueCount: cues.length,
+    startTime: cues[0].startTime,
+    endTime: cues[cues.length - 1].endTime,
+    cues,
+    cueBlocks
+  };
+}
+function splitTranslationChunkRequestByBudget(id, cues, cueBlocks, maxCuesPerChunk, maxCueTextTokensPerChunk) {
+  const chunks = chunkVTTCuesByBudget(cues, {
+    maxCuesPerChunk,
+    maxTextTokensPerChunk: maxCueTextTokensPerChunk
+  });
+  return chunks.map(
+    (chunk, index) => createTranslationChunkRequest(
+      chunks.length === 1 ? id : `${id}-part-${index}`,
+      cues.slice(chunk.cueStartIndex, chunk.cueEndIndex + 1),
+      cueBlocks.slice(chunk.cueStartIndex, chunk.cueEndIndex + 1)
+    )
+  );
+}
+function buildTranslationChunkRequests(vttContent, assetDurationSeconds, chunkingOptions) {
+  const resolvedChunking = resolveTranslationChunkingOptions(chunkingOptions);
+  const cues = parseVTTCues(vttContent);
+  if (cues.length === 0) {
+    return null;
+  }
+  const { preamble, cueBlocks } = splitVttPreambleAndCueBlocks(vttContent);
+  if (cueBlocks.length !== cues.length) {
+    console.warn(
+      `Falling back to full-VTT caption translation because cue block count (${cueBlocks.length}) does not match parsed cue count (${cues.length}).`
+    );
+    return null;
+  }
+  if (!resolvedChunking.enabled) {
+    return {
+      preamble,
+      chunks: [
+        createTranslationChunkRequest("chunk-0", cues, cueBlocks)
+      ]
+    };
+  }
+  if (typeof assetDurationSeconds !== "number" || assetDurationSeconds < resolvedChunking.minimumAssetDurationSeconds) {
+    return {
+      preamble,
+      chunks: [
+        createTranslationChunkRequest("chunk-0", cues, cueBlocks)
+      ]
+    };
+  }
+  const targetChunkDurationSeconds = resolvedChunking.targetChunkDurationSeconds;
+  const durationChunks = chunkVTTCuesByDuration(cues, {
+    targetChunkDurationSeconds,
+    maxChunkDurationSeconds: Math.max(targetChunkDurationSeconds, Math.round(targetChunkDurationSeconds * (7 / 6))),
+    minChunkDurationSeconds: Math.max(1, Math.round(targetChunkDurationSeconds * (2 / 3)))
+  });
+  return {
+    preamble,
+    chunks: durationChunks.flatMap(
+      (chunk) => splitTranslationChunkRequestByBudget(
+        chunk.id,
+        cues.slice(chunk.cueStartIndex, chunk.cueEndIndex + 1),
+        cueBlocks.slice(chunk.cueStartIndex, chunk.cueEndIndex + 1),
+        resolvedChunking.maxCuesPerChunk,
+        resolvedChunking.maxCueTextTokensPerChunk
+      )
+    )
+  };
+}
 async function fetchVttFromMux(vttUrl) {
   "use step";
   const vttResponse = await fetch(vttUrl);
@@ -3927,6 +4454,176 @@ ${vttContent}`
     }
   };
 }
+async function translateCueChunkWithAI({
+  cues,
+  fromLanguageCode,
+  toLanguageCode,
+  provider,
+  modelId,
+  credentials
+}) {
+  "use step";
+  const model = await createLanguageModelFromConfig(provider, modelId, credentials);
+  const schema = z6.object({
+    translations: z6.array(z6.string().min(1)).length(cues.length)
+  });
+  const cuePayload = cues.map((cue, index) => ({
+    index,
+    startTime: cue.startTime,
+    endTime: cue.endTime,
+    text: cue.text
+  }));
+  const response = await generateText5({
+    model,
+    output: Output5.object({ schema }),
+    messages: [
+      {
+        role: "system",
+        content: CUE_TRANSLATION_SYSTEM_PROMPT
+      },
+      {
+        role: "user",
+        content: `Translate from ${fromLanguageCode} to ${toLanguageCode}.
+Return exactly ${cues.length} translated cues in the same order as the input.
+${JSON.stringify(cuePayload, null, 2)}`
+      }
+    ]
+  });
+  return {
+    translations: response.output.translations,
+    usage: {
+      inputTokens: response.usage.inputTokens,
+      outputTokens: response.usage.outputTokens,
+      totalTokens: response.usage.totalTokens,
+      reasoningTokens: response.usage.reasoningTokens,
+      cachedInputTokens: response.usage.cachedInputTokens
+    }
+  };
+}
+function splitTranslationChunkAtMidpoint(chunk) {
+  const midpoint = Math.floor(chunk.cueCount / 2);
+  if (midpoint <= 0 || midpoint >= chunk.cueCount) {
+    throw new Error(`Cannot split chunk ${chunk.id} with cueCount=${chunk.cueCount}`);
+  }
+  return [
+    createTranslationChunkRequest(
+      `${chunk.id}-a`,
+      chunk.cues.slice(0, midpoint),
+      chunk.cueBlocks.slice(0, midpoint)
+    ),
+    createTranslationChunkRequest(
+      `${chunk.id}-b`,
+      chunk.cues.slice(midpoint),
+      chunk.cueBlocks.slice(midpoint)
+    )
+  ];
+}
+async function translateChunkWithFallback({
+  chunk,
+  fromLanguageCode,
+  toLanguageCode,
+  provider,
+  modelId,
+  credentials
+}) {
+  "use step";
+  try {
+    const result = await translateCueChunkWithAI({
+      cues: chunk.cues,
+      fromLanguageCode,
+      toLanguageCode,
+      provider,
+      modelId,
+      credentials
+    });
+    if (result.translations.length !== chunk.cueCount) {
+      throw new TranslationChunkValidationError(
+        `Chunk ${chunk.id} returned ${result.translations.length} cues, expected ${chunk.cueCount} for ${Math.round(chunk.startTime)}s-${Math.round(chunk.endTime)}s`
+      );
+    }
+    return {
+      translatedVtt: buildVttFromTranslatedCueBlocks(chunk.cueBlocks, result.translations),
+      usage: result.usage
+    };
+  } catch (error) {
+    if (!shouldSplitChunkTranslationError(error) || chunk.cueCount <= 1) {
+      throw new Error(
+        `Chunk ${chunk.id} failed for ${Math.round(chunk.startTime)}s-${Math.round(chunk.endTime)}s: ${error instanceof Error ? error.message : "Unknown error"}`
+      );
+    }
+    const [leftChunk, rightChunk] = splitTranslationChunkAtMidpoint(chunk);
+    const [leftResult, rightResult] = await Promise.all([
+      translateChunkWithFallback({
+        chunk: leftChunk,
+        fromLanguageCode,
+        toLanguageCode,
+        provider,
+        modelId,
+        credentials
+      }),
+      translateChunkWithFallback({
+        chunk: rightChunk,
+        fromLanguageCode,
+        toLanguageCode,
+        provider,
+        modelId,
+        credentials
+      })
+    ]);
+    return {
+      translatedVtt: concatenateVttSegments([leftResult.translatedVtt, rightResult.translatedVtt]),
+      usage: aggregateTokenUsage([leftResult.usage, rightResult.usage])
+    };
+  }
+}
+async function translateCaptionTrack({
+  vttContent,
+  assetDurationSeconds,
+  fromLanguageCode,
+  toLanguageCode,
+  provider,
+  modelId,
+  credentials,
+  chunking
+}) {
+  "use step";
+  const chunkPlan = buildTranslationChunkRequests(vttContent, assetDurationSeconds, chunking);
+  if (!chunkPlan) {
+    return translateVttWithAI({
+      vttContent,
+      fromLanguageCode,
+      toLanguageCode,
+      provider,
+      modelId,
+      credentials
+    });
+  }
+  const resolvedChunking = resolveTranslationChunkingOptions(chunking);
+  const translatedSegments = [];
+  const usageByChunk = [];
+  for (let index = 0; index < chunkPlan.chunks.length; index += resolvedChunking.maxConcurrentTranslations) {
+    const batch = chunkPlan.chunks.slice(index, index + resolvedChunking.maxConcurrentTranslations);
+    const batchResults = await Promise.all(
+      batch.map(
+        (chunk) => translateChunkWithFallback({
+          chunk,
+          fromLanguageCode,
+          toLanguageCode,
+          provider,
+          modelId,
+          credentials
+        })
+      )
+    );
+    translatedSegments.push(...batchResults.map((result) => result.translatedVtt));
+    usageByChunk.push(...batchResults.map((result) => result.usage));
+  }
+  return {
+    translatedVtt: concatenateVttSegments(translatedSegments, chunkPlan.preamble),
+    usage: aggregateTokenUsage(usageByChunk)
+  };
+}
 async function uploadVttToS3({
   translatedVtt,
   assetId,
@@ -3987,7 +4684,8 @@ async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, opti
     s3Bucket: providedS3Bucket,
     uploadToMux: uploadToMuxOption,
     storageAdapter,
-    credentials: providedCredentials
+    credentials: providedCredentials,
+    chunking
   } = options;
   const credentials = providedCredentials;
   const effectiveStorageAdapter = storageAdapter;
@@ -4048,13 +4746,15 @@ async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, opti
   let translatedVtt;
   let usage;
   try {
-    const result = await translateVttWithAI({
+    const result = await translateCaptionTrack({
       vttContent,
+      assetDurationSeconds,
       fromLanguageCode,
       toLanguageCode,
       provider: modelConfig.provider,
       modelId: modelConfig.modelId,
-      credentials
+      credentials,
+      chunking
     });
     translatedVtt = result.translatedVtt;
     usage = result.usage;
@@ -4127,6 +4827,7 @@ export {
   HIVE_SEXUAL_CATEGORIES,
   HIVE_VIOLENCE_CATEGORIES,
   SUMMARY_KEYWORD_LIMIT,
+  aggregateTokenUsage,
   askQuestions,
   burnedInCaptionsSchema,
   chapterSchema,
@@ -4138,6 +4839,7 @@ export {
   getSummaryAndTags,
   hasBurnedInCaptions,
   questionAnswerSchema,
+  shouldSplitChunkTranslationError,
   summarySchema,
   translateAudio,
   translateCaptions,