npm - @mux/ai - Versions diffs - 0.9.0 → 0.10.0 - Mend

@mux/ai 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/{index-Nxf6BaBO.d.ts → index-C8-E3VR9.d.ts} +59 -4
package/dist/{index-CkJStzYO.d.ts → index-CA7bG50u.d.ts} +29 -2
package/dist/index.d.ts +3 -3
package/dist/index.js +711 -31
package/dist/index.js.map +1 -1
package/dist/primitives/index.d.ts +1 -1
package/dist/primitives/index.js +336 -14
package/dist/primitives/index.js.map +1 -1
package/dist/workflows/index.d.ts +1 -1
package/dist/workflows/index.js +703 -30
package/dist/workflows/index.js.map +1 -1
package/package.json +1 -1

package/dist/workflows/index.js CHANGED Viewed

@@ -987,24 +987,82 @@ function findCaptionTrack(asset, languageCode) {
     (track) => track.text_type === "subtitles" && track.language_code === languageCode
   );
 }
+function normalizeLineEndings(value) {
+  return value.replace(/\r\n/g, "\n");
+}
+function isTimingLine(line) {
+  return line.includes("-->");
+}
+function parseNumericCueIdentifier(line) {
+  if (!/^\d+$/.test(line)) {
+    return null;
+  }
+  return Number.parseInt(line, 10);
+}
+function isLikelyTitledCueIdentifier(line) {
+  return /^\d+\s+-\s+\S.*$/.test(line);
+}
+function isLikelyCueIdentifier({
+  line,
+  nextLine,
+  previousCueIdentifier
+}) {
+  if (!line || !nextLine || !isTimingLine(nextLine)) {
+    return false;
+  }
+  const numericIdentifier = parseNumericCueIdentifier(line);
+  if (numericIdentifier !== null) {
+    if (previousCueIdentifier === null || previousCueIdentifier === void 0) {
+      return numericIdentifier === 1;
+    }
+    return numericIdentifier === previousCueIdentifier + 1;
+  }
+  return isLikelyTitledCueIdentifier(line);
+}
+function getCueIdentifierLineIndex(lines, timingLineIndex, previousCueIdentifier) {
+  const identifierIndex = timingLineIndex - 1;
+  if (identifierIndex < 0) {
+    return -1;
+  }
+  const candidate = lines[identifierIndex].trim();
+  if (!candidate || isTimingLine(candidate)) {
+    return -1;
+  }
+  return isLikelyCueIdentifier({
+    line: candidate,
+    nextLine: lines[timingLineIndex]?.trim(),
+    previousCueIdentifier
+  }) ? identifierIndex : -1;
+}
 function extractTextFromVTT(vttContent) {
   if (!vttContent.trim()) {
     return "";
   }
   const lines = vttContent.split("\n");
   const textLines = [];
+  let previousCueIdentifier = null;
+  let isInsideNoteBlock = false;
   for (let i = 0; i < lines.length; i++) {
     const line = lines[i].trim();
-    if (!line)
+    const nextLine = lines[i + 1]?.trim();
+    if (!line) {
+      isInsideNoteBlock = false;
+      continue;
+    }
+    if (isInsideNoteBlock)
       continue;
     if (line === "WEBVTT")
       continue;
-    if (line.startsWith("NOTE "))
+    if (line === "NOTE" || line.startsWith("NOTE ")) {
+      isInsideNoteBlock = true;
       continue;
-    if (line.includes("-->"))
+    }
+    if (isTimingLine(line))
       continue;
-    if (/^[\w-]+$/.test(line) && !line.includes(" "))
+    if (isLikelyCueIdentifier({ line, nextLine, previousCueIdentifier })) {
+      previousCueIdentifier = parseNumericCueIdentifier(line);
       continue;
+    }
     if (line.startsWith("STYLE") || line.startsWith("REGION"))
       continue;
     const cleanLine = line.replace(/<[^>]*>/g, "").trim();
@@ -1053,20 +1111,34 @@ function parseVTTCues(vttContent) {
     return [];
   const lines = vttContent.split("\n");
   const cues = [];
+  let previousCueIdentifier = null;
   for (let i = 0; i < lines.length; i++) {
     const line = lines[i].trim();
-    if (line.includes("-->")) {
+    if (isTimingLine(line)) {
       const [startStr, endStr] = line.split(" --> ").map((s) => s.trim());
       const startTime = vttTimestampToSeconds(startStr);
       const endTime = vttTimestampToSeconds(endStr.split(" ")[0]);
-      const textLines = [];
+      const currentCueIdentifierLine = lines[i - 1]?.trim() ?? "";
+      const currentCueIdentifier = isLikelyCueIdentifier({
+        line: currentCueIdentifierLine,
+        nextLine: line,
+        previousCueIdentifier
+      }) ? parseNumericCueIdentifier(currentCueIdentifierLine) : null;
+      const rawTextLines = [];
       let j = i + 1;
-      while (j < lines.length && lines[j].trim() && !lines[j].includes("-->")) {
-        const cleanLine = lines[j].trim().replace(/<[^>]*>/g, "");
-        if (cleanLine)
-          textLines.push(cleanLine);
+      while (j < lines.length && lines[j].trim() && !isTimingLine(lines[j].trim())) {
+        rawTextLines.push(lines[j].trim());
         j++;
       }
+      const trailingNumericLine = parseNumericCueIdentifier(rawTextLines.at(-1) ?? "");
+      if (trailingNumericLine !== null && isLikelyCueIdentifier({
+        line: rawTextLines.at(-1) ?? "",
+        nextLine: lines[j]?.trim(),
+        previousCueIdentifier: currentCueIdentifier
+      }) && rawTextLines.length > 1) {
+        rawTextLines.pop();
+      }
+      const textLines = rawTextLines.map((textLine) => textLine.replace(/<[^>]*>/g, "")).filter(Boolean);
       if (textLines.length > 0) {
         cues.push({
           startTime,
@@ -1074,10 +1146,102 @@ function parseVTTCues(vttContent) {
           text: textLines.join(" ")
         });
       }
+      previousCueIdentifier = currentCueIdentifier;
     }
   }
   return cues;
 }
+function splitVttPreambleAndCueBlocks(vttContent) {
+  const normalizedContent = normalizeLineEndings(vttContent).trim();
+  if (!normalizedContent) {
+    return {
+      preamble: "WEBVTT",
+      cueBlocks: []
+    };
+  }
+  const rawBlocks = normalizedContent.split(/\n{2,}/).map((block) => block.trim()).filter(Boolean);
+  const cueBlockStartIndex = rawBlocks.findIndex((block) => block.includes("-->"));
+  if (cueBlockStartIndex === -1) {
+    return {
+      preamble: normalizedContent.startsWith("WEBVTT") ? normalizedContent : `WEBVTT
+${normalizedContent}`,
+      cueBlocks: []
+    };
+  }
+  const hasMergedCueBlocks = rawBlocks.slice(cueBlockStartIndex).some((block) => (block.match(/-->/g) ?? []).length > 1);
+  if (hasMergedCueBlocks) {
+    const lines = normalizedContent.split("\n");
+    const timingLineIndices = lines.map((line, index) => isTimingLine(line.trim()) ? index : -1).filter((index) => index >= 0);
+    let previousCueIdentifier = null;
+    const firstCueStartIndex = getCueIdentifierLineIndex(lines, timingLineIndices[0], previousCueIdentifier);
+    const preambleEndIndex = firstCueStartIndex >= 0 ? firstCueStartIndex : timingLineIndices[0];
+    const preamble2 = lines.slice(0, preambleEndIndex).join("\n").trim() || "WEBVTT";
+    const cueBlocks2 = timingLineIndices.map((timingLineIndex, index) => {
+      const cueIdentifierLineIndex = getCueIdentifierLineIndex(lines, timingLineIndex, previousCueIdentifier);
+      const cueStartIndex = cueIdentifierLineIndex >= 0 ? cueIdentifierLineIndex : timingLineIndex;
+      const currentCueIdentifier = cueIdentifierLineIndex >= 0 ? parseNumericCueIdentifier(lines[cueIdentifierLineIndex].trim()) : null;
+      const nextTimingLineIndex = timingLineIndices[index + 1] ?? lines.length;
+      let cueEndIndex = nextTimingLineIndex - 1;
+      while (cueEndIndex > timingLineIndex && !lines[cueEndIndex].trim()) {
+        cueEndIndex--;
+      }
+      const nextCueIdentifierLineIndex = index < timingLineIndices.length - 1 ? getCueIdentifierLineIndex(lines, nextTimingLineIndex, currentCueIdentifier) : -1;
+      if (nextCueIdentifierLineIndex === cueEndIndex) {
+        cueEndIndex--;
+      }
+      while (cueEndIndex > timingLineIndex && !lines[cueEndIndex].trim()) {
+        cueEndIndex--;
+      }
+      previousCueIdentifier = currentCueIdentifier;
+      return lines.slice(cueStartIndex, cueEndIndex + 1).join("\n").trim();
+    });
+    return {
+      preamble: preamble2,
+      cueBlocks: cueBlocks2
+    };
+  }
+  const preambleBlocks = rawBlocks.slice(0, cueBlockStartIndex);
+  const cueBlocks = rawBlocks.slice(cueBlockStartIndex);
+  const preamble = preambleBlocks.length > 0 ? preambleBlocks.join("\n\n") : "WEBVTT";
+  return {
+    preamble,
+    cueBlocks
+  };
+}
+function buildVttFromCueBlocks(cueBlocks, preamble = "WEBVTT") {
+  if (cueBlocks.length === 0) {
+    return `${preamble.trim()}
+`;
+  }
+  return `${preamble.trim()}
+${cueBlocks.map((block) => block.trim()).join("\n\n")}
+`;
+}
+function replaceCueText(cueBlock, translatedText) {
+  const lines = normalizeLineEndings(cueBlock).split("\n").map((line) => line.trim()).filter(Boolean);
+  const timingLineIndex = lines.findIndex((line) => line.includes("-->"));
+  if (timingLineIndex === -1) {
+    throw new Error("Cue block is missing a timestamp line");
+  }
+  const headerLines = lines.slice(0, timingLineIndex + 1);
+  const translatedLines = normalizeLineEndings(translatedText).split("\n").map((line) => line.trim()).filter(Boolean);
+  return [...headerLines, ...translatedLines].join("\n");
+}
+function buildVttFromTranslatedCueBlocks(cueBlocks, translatedTexts, preamble = "WEBVTT") {
+  if (cueBlocks.length !== translatedTexts.length) {
+    throw new Error(`Expected ${cueBlocks.length} translated cues, received ${translatedTexts.length}`);
+  }
+  return buildVttFromCueBlocks(
+    cueBlocks.map((cueBlock, index) => replaceCueText(cueBlock, translatedTexts[index])),
+    preamble
+  );
+}
+function concatenateVttSegments(segments, preamble = "WEBVTT") {
+  const cueBlocks = segments.flatMap((segment) => splitVttPreambleAndCueBlocks(segment).cueBlocks);
+  return buildVttFromCueBlocks(cueBlocks, preamble);
+}
 async function buildTranscriptUrl(playbackId, trackId, shouldSign = false, credentials) {
   "use step";
   const baseUrl = `https://stream.mux.com/${playbackId}/text/${trackId}.vtt`;
@@ -2016,6 +2180,14 @@ async function generateChapters(assetId, languageCode, options = {}) {
 import { embed } from "ai";
 // src/primitives/text-chunking.ts
+var DEFAULT_MIN_CHUNK_DURATION_RATIO = 2 / 3;
+var DEFAULT_BOUNDARY_LOOKAHEAD_CUES = 12;
+var DEFAULT_BOUNDARY_PAUSE_SECONDS = 1.25;
+var STRONG_BOUNDARY_SCORE = 4;
+var PREFERRED_BOUNDARY_WINDOW_SECONDS = 5 * 60;
+var SENTENCE_BOUNDARY_REGEX = /[.!?]["')\]]*$/;
+var CLAUSE_BOUNDARY_REGEX = /[,;:]["')\]]*$/;
+var NEXT_SENTENCE_START_REGEX = /^[A-Z0-9"'([{]/;
 function estimateTokenCount(text) {
   const words = text.trim().split(/\s+/).length;
   return Math.ceil(words / 0.75);
@@ -2088,6 +2260,151 @@ function chunkVTTCues(cues, maxTokens, overlapCues = 2) {
   }
   return chunks;
 }
+function scoreCueBoundary(cues, index, boundaryPauseSeconds) {
+  const cue = cues[index];
+  const nextCue = cues[index + 1];
+  if (!nextCue) {
+    return Number.POSITIVE_INFINITY;
+  }
+  const trimmedText = cue.text.trim();
+  let score = 0;
+  if (SENTENCE_BOUNDARY_REGEX.test(trimmedText)) {
+    score += 4;
+  } else if (CLAUSE_BOUNDARY_REGEX.test(trimmedText)) {
+    score += 2;
+  }
+  if (nextCue.startTime - cue.endTime >= boundaryPauseSeconds) {
+    score += 2;
+  }
+  if (NEXT_SENTENCE_START_REGEX.test(nextCue.text.trim())) {
+    score += 1;
+  }
+  return score;
+}
+function chunkVTTCuesByBudget(cues, options) {
+  if (cues.length === 0) {
+    return [];
+  }
+  const maxCuesPerChunk = Math.max(1, options.maxCuesPerChunk);
+  let maxTextTokensPerChunk = Number.POSITIVE_INFINITY;
+  if (options.maxTextTokensPerChunk) {
+    maxTextTokensPerChunk = Math.max(1, options.maxTextTokensPerChunk);
+  }
+  const chunks = [];
+  let chunkIndex = 0;
+  let cueStartIndex = 0;
+  let currentTokenCount = 0;
+  for (let cueIndex = 0; cueIndex < cues.length; cueIndex++) {
+    const cue = cues[cueIndex];
+    const cueTokenCount = estimateTokenCount(cue.text);
+    const currentCueCount = cueIndex - cueStartIndex;
+    const wouldExceedCueCount = currentCueCount >= maxCuesPerChunk;
+    const wouldExceedTokenCount = currentCueCount > 0 && currentTokenCount + cueTokenCount > maxTextTokensPerChunk;
+    if (wouldExceedCueCount || wouldExceedTokenCount) {
+      chunks.push({
+        id: `chunk-${chunkIndex}`,
+        cueStartIndex,
+        cueEndIndex: cueIndex - 1,
+        cueCount: cueIndex - cueStartIndex,
+        startTime: cues[cueStartIndex].startTime,
+        endTime: cues[cueIndex - 1].endTime
+      });
+      cueStartIndex = cueIndex;
+      currentTokenCount = 0;
+      chunkIndex++;
+    }
+    currentTokenCount += cueTokenCount;
+  }
+  chunks.push({
+    id: `chunk-${chunkIndex}`,
+    cueStartIndex,
+    cueEndIndex: cues.length - 1,
+    cueCount: cues.length - cueStartIndex,
+    startTime: cues[cueStartIndex].startTime,
+    endTime: cues[cues.length - 1].endTime
+  });
+  return chunks;
+}
+function chunkVTTCuesByDuration(cues, options) {
+  if (cues.length === 0) {
+    return [];
+  }
+  const targetChunkDurationSeconds = Math.max(1, options.targetChunkDurationSeconds);
+  const maxChunkDurationSeconds = Math.max(targetChunkDurationSeconds, options.maxChunkDurationSeconds);
+  const minChunkDurationSeconds = Math.min(
+    targetChunkDurationSeconds,
+    Math.max(
+      1,
+      options.minChunkDurationSeconds ?? Math.floor(targetChunkDurationSeconds * DEFAULT_MIN_CHUNK_DURATION_RATIO)
+    )
+  );
+  const boundaryLookaheadCues = Math.max(1, options.boundaryLookaheadCues ?? DEFAULT_BOUNDARY_LOOKAHEAD_CUES);
+  const boundaryPauseSeconds = options.boundaryPauseSeconds ?? DEFAULT_BOUNDARY_PAUSE_SECONDS;
+  const preferredBoundaryStartSeconds = Math.max(
+    minChunkDurationSeconds,
+    targetChunkDurationSeconds - Math.min(PREFERRED_BOUNDARY_WINDOW_SECONDS, targetChunkDurationSeconds / 6)
+  );
+  const chunks = [];
+  let chunkIndex = 0;
+  let cueStartIndex = 0;
+  while (cueStartIndex < cues.length) {
+    const chunkStartTime = cues[cueStartIndex].startTime;
+    let cueEndIndex = cueStartIndex;
+    let bestBoundaryIndex = -1;
+    let bestBoundaryScore = -1;
+    let bestPreferredBoundaryIndex = -1;
+    let bestPreferredBoundaryScore = -1;
+    while (cueEndIndex < cues.length) {
+      const cue = cues[cueEndIndex];
+      const currentDuration = cue.endTime - chunkStartTime;
+      if (currentDuration >= minChunkDurationSeconds) {
+        const boundaryScore = scoreCueBoundary(cues, cueEndIndex, boundaryPauseSeconds);
+        if (boundaryScore >= bestBoundaryScore) {
+          bestBoundaryIndex = cueEndIndex;
+          bestBoundaryScore = boundaryScore;
+        }
+        if (currentDuration >= preferredBoundaryStartSeconds && boundaryScore >= bestPreferredBoundaryScore) {
+          bestPreferredBoundaryIndex = cueEndIndex;
+          bestPreferredBoundaryScore = boundaryScore;
+        }
+      }
+      const nextCue = cues[cueEndIndex + 1];
+      if (!nextCue) {
+        break;
+      }
+      const nextDuration = nextCue.endTime - chunkStartTime;
+      const lookaheadExceeded = cueEndIndex - cueStartIndex >= boundaryLookaheadCues;
+      const preferredBoundaryIndex = bestPreferredBoundaryIndex >= cueStartIndex ? bestPreferredBoundaryIndex : bestBoundaryIndex;
+      const preferredBoundaryScore = bestPreferredBoundaryIndex >= cueStartIndex ? bestPreferredBoundaryScore : bestBoundaryScore;
+      if (currentDuration >= targetChunkDurationSeconds) {
+        if (preferredBoundaryIndex >= cueStartIndex && preferredBoundaryScore >= STRONG_BOUNDARY_SCORE) {
+          cueEndIndex = preferredBoundaryIndex;
+          break;
+        }
+        if (nextDuration > maxChunkDurationSeconds || lookaheadExceeded) {
+          cueEndIndex = preferredBoundaryIndex >= cueStartIndex ? preferredBoundaryIndex : cueEndIndex;
+          break;
+        }
+      }
+      if (nextDuration > maxChunkDurationSeconds) {
+        cueEndIndex = preferredBoundaryIndex >= cueStartIndex ? preferredBoundaryIndex : cueEndIndex;
+        break;
+      }
+      cueEndIndex++;
+    }
+    chunks.push({
+      id: `chunk-${chunkIndex}`,
+      cueStartIndex,
+      cueEndIndex,
+      cueCount: cueEndIndex - cueStartIndex + 1,
+      startTime: cues[cueStartIndex].startTime,
+      endTime: cues[cueEndIndex].endTime
+    });
+    cueStartIndex = cueEndIndex + 1;
+    chunkIndex++;
+  }
+  return chunks;
+}
 function chunkText(text, strategy) {
   switch (strategy.type) {
     case "token": {
@@ -2343,10 +2660,8 @@ async function getThumbnailUrls(playbackId, duration, options = {}) {
   }
   const baseUrl = getMuxThumbnailBaseUrl(playbackId);
   const urlPromises = timestamps.map(async (time) => {
-    if (shouldSign) {
-      return signUrl(baseUrl, playbackId, "thumbnail", { time, width }, credentials);
-    }
-    return `${baseUrl}?time=${time}&width=${width}`;
+    const url = shouldSign ? await signUrl(baseUrl, playbackId, "thumbnail", { time, width }, credentials) : `${baseUrl}?time=${time}&width=${width}`;
+    return { url, time };
   });
   return Promise.all(urlPromises);
 }
@@ -2420,6 +2735,7 @@ async function moderateImageWithOpenAI(entry) {
     const categoryScores = json.results?.[0]?.category_scores || {};
     return {
       url: entry.url,
+      time: entry.time,
       sexual: categoryScores.sexual || 0,
       violence: categoryScores.violence || 0,
       error: false
@@ -2428,6 +2744,7 @@ async function moderateImageWithOpenAI(entry) {
     console.error("OpenAI moderation failed:", error);
     return {
       url: entry.url,
+      time: entry.time,
       sexual: 0,
       violence: 0,
       error: true,
@@ -2435,11 +2752,13 @@ async function moderateImageWithOpenAI(entry) {
     };
   }
 }
-async function requestOpenAIModeration(imageUrls, model, maxConcurrent = 5, submissionMode = "url", downloadOptions, credentials) {
+async function requestOpenAIModeration(images, model, maxConcurrent = 5, submissionMode = "url", downloadOptions, credentials) {
   "use step";
+  const imageUrls = images.map((img) => img.url);
+  const timeByUrl = new Map(images.map((img) => [img.url, img.time]));
   const targetUrls = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map(
-    (img) => ({ url: img.url, image: img.base64Data, model, credentials })
-  ) : imageUrls.map((url) => ({ url, image: url, model, credentials }));
+    (img) => ({ url: img.url, time: timeByUrl.get(img.url), image: img.base64Data, model, credentials })
+  ) : images.map((img) => ({ url: img.url, time: img.time, image: img.url, model, credentials }));
   return processConcurrently(targetUrls, moderateImageWithOpenAI, maxConcurrent);
 }
 async function requestOpenAITextModeration(text, model, url, credentials) {
@@ -2584,6 +2903,7 @@ async function moderateImageWithHive(entry) {
     const violence = getHiveCategoryScores(classes, HIVE_VIOLENCE_CATEGORIES);
     return {
       url: entry.url,
+      time: entry.time,
       sexual,
       violence,
       error: false
@@ -2591,6 +2911,7 @@ async function moderateImageWithHive(entry) {
   } catch (error) {
     return {
       url: entry.url,
+      time: entry.time,
       sexual: 0,
       violence: 0,
       error: true,
@@ -2598,19 +2919,23 @@ async function moderateImageWithHive(entry) {
     };
   }
 }
-async function requestHiveModeration(imageUrls, maxConcurrent = 5, submissionMode = "url", downloadOptions, credentials) {
+async function requestHiveModeration(images, maxConcurrent = 5, submissionMode = "url", downloadOptions, credentials) {
   "use step";
+  const imageUrls = images.map((img) => img.url);
+  const timeByUrl = new Map(images.map((img) => [img.url, img.time]));
   const targets = submissionMode === "base64" ? (await downloadImagesAsBase64(imageUrls, downloadOptions, maxConcurrent)).map((img) => ({
     url: img.url,
+    time: timeByUrl.get(img.url),
     source: {
       kind: "file",
       buffer: img.buffer,
       contentType: img.contentType
     },
     credentials
-  })) : imageUrls.map((url) => ({
-    url,
-    source: { kind: "url", value: url },
+  })) : images.map((img) => ({
+    url: img.url,
+    time: img.time,
+    source: { kind: "url", value: img.url },
     credentials
   }));
   return await processConcurrently(targets, moderateImageWithHive, maxConcurrent);
@@ -2621,10 +2946,8 @@ async function getThumbnailUrlsFromTimestamps(playbackId, timestampsMs, options)
   const baseUrl = getMuxThumbnailBaseUrl(playbackId);
   const urlPromises = timestampsMs.map(async (tsMs) => {
     const time = Number((tsMs / 1e3).toFixed(2));
-    if (shouldSign) {
-      return signUrl(baseUrl, playbackId, "thumbnail", { time, width }, credentials);
-    }
-    return `${baseUrl}?time=${time}&width=${width}`;
+    const url = shouldSign ? await signUrl(baseUrl, playbackId, "thumbnail", { time, width }, credentials) : `${baseUrl}?time=${time}&width=${width}`;
+    return { url, time };
   });
   return Promise.all(urlPromises);
 }
@@ -3905,12 +4228,187 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
 }
 // src/workflows/translate-captions.ts
-import { generateText as generateText5, Output as Output5 } from "ai";
+import {
+  APICallError,
+  generateText as generateText5,
+  NoObjectGeneratedError,
+  Output as Output5,
+  RetryError,
+  TypeValidationError
+} from "ai";
+import dedent5 from "dedent";
 import { z as z6 } from "zod";
 var translationSchema = z6.object({
   translation: z6.string()
 });
-var SYSTEM_PROMPT4 = 'You are a subtitle translation expert. Translate VTT subtitle files to the target language specified by the user. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT content.';
+var SYSTEM_PROMPT4 = dedent5`
+  You are a subtitle translation expert. Translate VTT subtitle files to the target language specified by the user.
+  You may receive either a full VTT file or a chunk from a larger VTT.
+  Preserve all timestamps, cue ordering, and VTT formatting exactly as they appear.
+  Return JSON with a single key "translation" containing the translated VTT content.
+`;
+var CUE_TRANSLATION_SYSTEM_PROMPT = dedent5`
+  You are a subtitle translation expert.
+  You will receive a sequence of subtitle cues extracted from a VTT file.
+  Translate the cues to the requested target language while preserving their original order.
+  Treat the cue list as continuous context so the translation reads naturally across adjacent lines.
+  Return JSON with a single key "translations" containing exactly one translated string for each input cue.
+  Do not merge, split, omit, reorder, or add cues.
+`;
+var DEFAULT_TRANSLATION_CHUNKING = {
+  enabled: true,
+  minimumAssetDurationSeconds: 30 * 60,
+  targetChunkDurationSeconds: 30 * 60,
+  maxConcurrentTranslations: 4,
+  maxCuesPerChunk: 80,
+  maxCueTextTokensPerChunk: 2e3
+};
+var TOKEN_USAGE_FIELDS = [
+  "inputTokens",
+  "outputTokens",
+  "totalTokens",
+  "reasoningTokens",
+  "cachedInputTokens"
+];
+var TranslationChunkValidationError = class extends Error {
+  constructor(message) {
+    super(message);
+    this.name = "TranslationChunkValidationError";
+  }
+};
+function isTranslationChunkValidationError(error) {
+  return error instanceof TranslationChunkValidationError;
+}
+function isProviderServiceError(error) {
+  if (!error) {
+    return false;
+  }
+  if (RetryError.isInstance(error)) {
+    return isProviderServiceError(error.lastError);
+  }
+  if (APICallError.isInstance(error)) {
+    return true;
+  }
+  if (error instanceof Error && "cause" in error) {
+    return isProviderServiceError(error.cause);
+  }
+  return false;
+}
+function shouldSplitChunkTranslationError(error) {
+  if (isProviderServiceError(error)) {
+    return false;
+  }
+  return NoObjectGeneratedError.isInstance(error) || TypeValidationError.isInstance(error) || isTranslationChunkValidationError(error);
+}
+function isDefinedTokenUsageValue(value) {
+  return typeof value === "number";
+}
+function resolveTranslationChunkingOptions(options) {
+  const targetChunkDurationSeconds = Math.max(
+    1,
+    options?.targetChunkDurationSeconds ?? DEFAULT_TRANSLATION_CHUNKING.targetChunkDurationSeconds
+  );
+  return {
+    enabled: options?.enabled ?? DEFAULT_TRANSLATION_CHUNKING.enabled,
+    minimumAssetDurationSeconds: Math.max(
+      1,
+      options?.minimumAssetDurationSeconds ?? DEFAULT_TRANSLATION_CHUNKING.minimumAssetDurationSeconds
+    ),
+    targetChunkDurationSeconds,
+    maxConcurrentTranslations: Math.max(
+      1,
+      options?.maxConcurrentTranslations ?? DEFAULT_TRANSLATION_CHUNKING.maxConcurrentTranslations
+    ),
+    maxCuesPerChunk: Math.max(
+      1,
+      options?.maxCuesPerChunk ?? DEFAULT_TRANSLATION_CHUNKING.maxCuesPerChunk
+    ),
+    maxCueTextTokensPerChunk: Math.max(
+      1,
+      options?.maxCueTextTokensPerChunk ?? DEFAULT_TRANSLATION_CHUNKING.maxCueTextTokensPerChunk
+    )
+  };
+}
+function aggregateTokenUsage(usages) {
+  return TOKEN_USAGE_FIELDS.reduce((aggregate, field) => {
+    const values = usages.map((usage) => usage[field]).filter(isDefinedTokenUsageValue);
+    if (values.length > 0) {
+      aggregate[field] = values.reduce((total, value) => total + value, 0);
+    }
+    return aggregate;
+  }, {});
+}
+function createTranslationChunkRequest(id, cues, cueBlocks) {
+  return {
+    id,
+    cueCount: cues.length,
+    startTime: cues[0].startTime,
+    endTime: cues[cues.length - 1].endTime,
+    cues,
+    cueBlocks
+  };
+}
+function splitTranslationChunkRequestByBudget(id, cues, cueBlocks, maxCuesPerChunk, maxCueTextTokensPerChunk) {
+  const chunks = chunkVTTCuesByBudget(cues, {
+    maxCuesPerChunk,
+    maxTextTokensPerChunk: maxCueTextTokensPerChunk
+  });
+  return chunks.map(
+    (chunk, index) => createTranslationChunkRequest(
+      chunks.length === 1 ? id : `${id}-part-${index}`,
+      cues.slice(chunk.cueStartIndex, chunk.cueEndIndex + 1),
+      cueBlocks.slice(chunk.cueStartIndex, chunk.cueEndIndex + 1)
+    )
+  );
+}
+function buildTranslationChunkRequests(vttContent, assetDurationSeconds, chunkingOptions) {
+  const resolvedChunking = resolveTranslationChunkingOptions(chunkingOptions);
+  const cues = parseVTTCues(vttContent);
+  if (cues.length === 0) {
+    return null;
+  }
+  const { preamble, cueBlocks } = splitVttPreambleAndCueBlocks(vttContent);
+  if (cueBlocks.length !== cues.length) {
+    console.warn(
+      `Falling back to full-VTT caption translation because cue block count (${cueBlocks.length}) does not match parsed cue count (${cues.length}).`
+    );
+    return null;
+  }
+  if (!resolvedChunking.enabled) {
+    return {
+      preamble,
+      chunks: [
+        createTranslationChunkRequest("chunk-0", cues, cueBlocks)
+      ]
+    };
+  }
+  if (typeof assetDurationSeconds !== "number" || assetDurationSeconds < resolvedChunking.minimumAssetDurationSeconds) {
+    return {
+      preamble,
+      chunks: [
+        createTranslationChunkRequest("chunk-0", cues, cueBlocks)
+      ]
+    };
+  }
+  const targetChunkDurationSeconds = resolvedChunking.targetChunkDurationSeconds;
+  const durationChunks = chunkVTTCuesByDuration(cues, {
+    targetChunkDurationSeconds,
+    maxChunkDurationSeconds: Math.max(targetChunkDurationSeconds, Math.round(targetChunkDurationSeconds * (7 / 6))),
+    minChunkDurationSeconds: Math.max(1, Math.round(targetChunkDurationSeconds * (2 / 3)))
+  });
+  return {
+    preamble,
+    chunks: durationChunks.flatMap(
+      (chunk) => splitTranslationChunkRequestByBudget(
+        chunk.id,
+        cues.slice(chunk.cueStartIndex, chunk.cueEndIndex + 1),
+        cueBlocks.slice(chunk.cueStartIndex, chunk.cueEndIndex + 1),
+        resolvedChunking.maxCuesPerChunk,
+        resolvedChunking.maxCueTextTokensPerChunk
+      )
+    )
+  };
+}
 async function fetchVttFromMux(vttUrl) {
   "use step";
   const vttResponse = await fetch(vttUrl);
@@ -3956,6 +4454,176 @@ ${vttContent}`
     }
   };
 }
+async function translateCueChunkWithAI({
+  cues,
+  fromLanguageCode,
+  toLanguageCode,
+  provider,
+  modelId,
+  credentials
+}) {
+  "use step";
+  const model = await createLanguageModelFromConfig(provider, modelId, credentials);
+  const schema = z6.object({
+    translations: z6.array(z6.string().min(1)).length(cues.length)
+  });
+  const cuePayload = cues.map((cue, index) => ({
+    index,
+    startTime: cue.startTime,
+    endTime: cue.endTime,
+    text: cue.text
+  }));
+  const response = await generateText5({
+    model,
+    output: Output5.object({ schema }),
+    messages: [
+      {
+        role: "system",
+        content: CUE_TRANSLATION_SYSTEM_PROMPT
+      },
+      {
+        role: "user",
+        content: `Translate from ${fromLanguageCode} to ${toLanguageCode}.
+Return exactly ${cues.length} translated cues in the same order as the input.
+${JSON.stringify(cuePayload, null, 2)}`
+      }
+    ]
+  });
+  return {
+    translations: response.output.translations,
+    usage: {
+      inputTokens: response.usage.inputTokens,
+      outputTokens: response.usage.outputTokens,
+      totalTokens: response.usage.totalTokens,
+      reasoningTokens: response.usage.reasoningTokens,
+      cachedInputTokens: response.usage.cachedInputTokens
+    }
+  };
+}
+function splitTranslationChunkAtMidpoint(chunk) {
+  const midpoint = Math.floor(chunk.cueCount / 2);
+  if (midpoint <= 0 || midpoint >= chunk.cueCount) {
+    throw new Error(`Cannot split chunk ${chunk.id} with cueCount=${chunk.cueCount}`);
+  }
+  return [
+    createTranslationChunkRequest(
+      `${chunk.id}-a`,
+      chunk.cues.slice(0, midpoint),
+      chunk.cueBlocks.slice(0, midpoint)
+    ),
+    createTranslationChunkRequest(
+      `${chunk.id}-b`,
+      chunk.cues.slice(midpoint),
+      chunk.cueBlocks.slice(midpoint)
+    )
+  ];
+}
+async function translateChunkWithFallback({
+  chunk,
+  fromLanguageCode,
+  toLanguageCode,
+  provider,
+  modelId,
+  credentials
+}) {
+  "use step";
+  try {
+    const result = await translateCueChunkWithAI({
+      cues: chunk.cues,
+      fromLanguageCode,
+      toLanguageCode,
+      provider,
+      modelId,
+      credentials
+    });
+    if (result.translations.length !== chunk.cueCount) {
+      throw new TranslationChunkValidationError(
+        `Chunk ${chunk.id} returned ${result.translations.length} cues, expected ${chunk.cueCount} for ${Math.round(chunk.startTime)}s-${Math.round(chunk.endTime)}s`
+      );
+    }
+    return {
+      translatedVtt: buildVttFromTranslatedCueBlocks(chunk.cueBlocks, result.translations),
+      usage: result.usage
+    };
+  } catch (error) {
+    if (!shouldSplitChunkTranslationError(error) || chunk.cueCount <= 1) {
+      throw new Error(
+        `Chunk ${chunk.id} failed for ${Math.round(chunk.startTime)}s-${Math.round(chunk.endTime)}s: ${error instanceof Error ? error.message : "Unknown error"}`
+      );
+    }
+    const [leftChunk, rightChunk] = splitTranslationChunkAtMidpoint(chunk);
+    const [leftResult, rightResult] = await Promise.all([
+      translateChunkWithFallback({
+        chunk: leftChunk,
+        fromLanguageCode,
+        toLanguageCode,
+        provider,
+        modelId,
+        credentials
+      }),
+      translateChunkWithFallback({
+        chunk: rightChunk,
+        fromLanguageCode,
+        toLanguageCode,
+        provider,
+        modelId,
+        credentials
+      })
+    ]);
+    return {
+      translatedVtt: concatenateVttSegments([leftResult.translatedVtt, rightResult.translatedVtt]),
+      usage: aggregateTokenUsage([leftResult.usage, rightResult.usage])
+    };
+  }
+}
+async function translateCaptionTrack({
+  vttContent,
+  assetDurationSeconds,
+  fromLanguageCode,
+  toLanguageCode,
+  provider,
+  modelId,
+  credentials,
+  chunking
+}) {
+  "use step";
+  const chunkPlan = buildTranslationChunkRequests(vttContent, assetDurationSeconds, chunking);
+  if (!chunkPlan) {
+    return translateVttWithAI({
+      vttContent,
+      fromLanguageCode,
+      toLanguageCode,
+      provider,
+      modelId,
+      credentials
+    });
+  }
+  const resolvedChunking = resolveTranslationChunkingOptions(chunking);
+  const translatedSegments = [];
+  const usageByChunk = [];
+  for (let index = 0; index < chunkPlan.chunks.length; index += resolvedChunking.maxConcurrentTranslations) {
+    const batch = chunkPlan.chunks.slice(index, index + resolvedChunking.maxConcurrentTranslations);
+    const batchResults = await Promise.all(
+      batch.map(
+        (chunk) => translateChunkWithFallback({
+          chunk,
+          fromLanguageCode,
+          toLanguageCode,
+          provider,
+          modelId,
+          credentials
+        })
+      )
+    );
+    translatedSegments.push(...batchResults.map((result) => result.translatedVtt));
+    usageByChunk.push(...batchResults.map((result) => result.usage));
+  }
+  return {
+    translatedVtt: concatenateVttSegments(translatedSegments, chunkPlan.preamble),
+    usage: aggregateTokenUsage(usageByChunk)
+  };
+}
 async function uploadVttToS3({
   translatedVtt,
   assetId,
@@ -4016,7 +4684,8 @@ async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, opti
     s3Bucket: providedS3Bucket,
     uploadToMux: uploadToMuxOption,
     storageAdapter,
-    credentials: providedCredentials
+    credentials: providedCredentials,
+    chunking
   } = options;
   const credentials = providedCredentials;
   const effectiveStorageAdapter = storageAdapter;
@@ -4077,13 +4746,15 @@ async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, opti
   let translatedVtt;
   let usage;
   try {
-    const result = await translateVttWithAI({
+    const result = await translateCaptionTrack({
       vttContent,
+      assetDurationSeconds,
       fromLanguageCode,
       toLanguageCode,
       provider: modelConfig.provider,
       modelId: modelConfig.modelId,
-      credentials
+      credentials,
+      chunking
     });
     translatedVtt = result.translatedVtt;
     usage = result.usage;
@@ -4156,6 +4827,7 @@ export {
   HIVE_SEXUAL_CATEGORIES,
   HIVE_VIOLENCE_CATEGORIES,
   SUMMARY_KEYWORD_LIMIT,
+  aggregateTokenUsage,
   askQuestions,
   burnedInCaptionsSchema,
   chapterSchema,
@@ -4167,6 +4839,7 @@ export {
   getSummaryAndTags,
   hasBurnedInCaptions,
   questionAnswerSchema,
+  shouldSplitChunkTranslationError,
   summarySchema,
   translateAudio,
   translateCaptions,