@mux/ai 0.9.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,3 @@
1
- export { D as DEFAULT_STORYBOARD_WIDTH, H as HeatmapOptions, a as HeatmapResponse, b as Hotspot, c as HotspotOptions, d as HotspotResponse, T as ThumbnailOptions, e as TranscriptFetchOptions, f as TranscriptResult, V as VTTCue, g as buildTranscriptUrl, h as chunkByTokens, j as chunkText, k as chunkVTTCues, l as estimateTokenCount, m as extractTextFromVTT, n as extractTimestampedTranscript, o as fetchTranscriptForAsset, p as findCaptionTrack, q as getHeatmapForAsset, r as getHeatmapForPlaybackId, s as getHeatmapForVideo, t as getHotspotsForAsset, u as getHotspotsForPlaybackId, v as getHotspotsForVideo, w as getReadyTextTracks, x as getStoryboardUrl, y as getThumbnailUrls, z as parseVTTCues, A as secondsToTimestamp, B as vttTimestampToSeconds } from '../index-Nxf6BaBO.js';
1
+ export { C as CompletedShotsResult, D as DEFAULT_STORYBOARD_WIDTH, E as ErroredShotsResult, H as HeatmapOptions, a as HeatmapResponse, b as Hotspot, c as HotspotOptions, d as HotspotResponse, P as PendingShotsResult, S as Shot, e as ShotRequestOptions, f as ShotsResult, T as ThumbnailOptions, g as TranscriptFetchOptions, h as TranscriptResult, V as VTTCue, j as VTTCueBudgetChunkingOptions, k as VTTDurationChunk, l as VTTDurationChunkingOptions, W as WaitForShotsOptions, m as buildTranscriptUrl, n as buildVttFromCueBlocks, o as buildVttFromTranslatedCueBlocks, p as chunkByTokens, q as chunkText, r as chunkVTTCues, s as chunkVTTCuesByBudget, t as chunkVTTCuesByDuration, u as concatenateVttSegments, v as estimateTokenCount, w as extractTextFromVTT, x as extractTimestampedTranscript, y as fetchTranscriptForAsset, z as findCaptionTrack, A as getHeatmapForAsset, B as getHeatmapForPlaybackId, F as getHeatmapForVideo, G as getHotspotsForAsset, I as getHotspotsForPlaybackId, J as getHotspotsForVideo, K as getReadyTextTracks, L as getShotsForAsset, M as getStoryboardUrl, N as getThumbnailUrls, O as parseVTTCues, Q as replaceCueText, R as requestShotsForAsset, U as secondsToTimestamp, X as splitVttPreambleAndCueBlocks, Y as vttTimestampToSeconds, Z as waitForShotsForAsset } from '../index-DLhfJsOd.js';
2
2
  import '../types-BRbaGW3t.js';
3
3
  import '@mux/mux-node';
@@ -592,6 +592,142 @@ async function fetchHotspots(identifierType, id, options) {
592
592
  return transformHotspotResponse(response);
593
593
  }
594
594
 
595
+ // src/primitives/shots.ts
596
+ var DEFAULT_POLL_INTERVAL_MS = 2e3;
597
+ var MIN_POLL_INTERVAL_MS = 1e3;
598
+ var DEFAULT_MAX_ATTEMPTS = 60;
599
+ var SHOTS_ALREADY_REQUESTED_MESSAGE = "shots generation has already been requested";
600
+ function getShotsPath(assetId) {
601
+ return `/video/v1/assets/${assetId}/shots`;
602
+ }
603
+ function mapManifestShots(shots) {
604
+ return shots.map((shot, index) => {
605
+ const { startTime, imageUrl } = shot;
606
+ if (typeof startTime !== "number" || !Number.isFinite(startTime)) {
607
+ throw new TypeError(`Invalid shot startTime in shots manifest at index ${index}`);
608
+ }
609
+ if (typeof imageUrl !== "string" || imageUrl.length === 0) {
610
+ throw new TypeError(`Invalid shot imageUrl in shots manifest at index ${index}`);
611
+ }
612
+ return {
613
+ startTime,
614
+ imageUrl
615
+ };
616
+ });
617
+ }
618
+ async function fetchShotsFromManifest(shotsManifestUrl) {
619
+ const response = await fetch(shotsManifestUrl);
620
+ if (!response.ok) {
621
+ throw new Error(
622
+ `Failed to fetch shots manifest: ${response.status} ${response.statusText}`
623
+ );
624
+ }
625
+ const manifest = await response.json();
626
+ if (!Array.isArray(manifest.shots)) {
627
+ throw new TypeError("Invalid shots manifest response: missing shots array");
628
+ }
629
+ return mapManifestShots(manifest.shots);
630
+ }
631
+ async function transformShotsResponse(response) {
632
+ switch (response.data.status) {
633
+ case "pending":
634
+ return {
635
+ status: "pending",
636
+ createdAt: response.data.created_at
637
+ };
638
+ case "errored":
639
+ return {
640
+ status: "errored",
641
+ createdAt: response.data.created_at,
642
+ error: response.data.error
643
+ };
644
+ case "completed":
645
+ return {
646
+ status: "completed",
647
+ createdAt: response.data.created_at,
648
+ shots: await fetchShotsFromManifest(response.data.shots_manifest_url)
649
+ };
650
+ default: {
651
+ const exhaustiveCheck = response.data;
652
+ throw new Error(`Unsupported shots response: ${JSON.stringify(exhaustiveCheck)}`);
653
+ }
654
+ }
655
+ }
656
+ function sleep(ms) {
657
+ return new Promise((resolve) => setTimeout(resolve, ms));
658
+ }
659
+ function isShotsAlreadyRequestedError(error) {
660
+ const statusCode = error?.status ?? error?.statusCode;
661
+ const messages = error?.error?.messages;
662
+ const lowerCaseMessages = messages?.map((message) => message.toLowerCase()) ?? [];
663
+ const errorMessage = error instanceof Error ? error.message.toLowerCase() : "";
664
+ return statusCode === 400 && (lowerCaseMessages.some((message) => message.includes(SHOTS_ALREADY_REQUESTED_MESSAGE)) || errorMessage.includes(SHOTS_ALREADY_REQUESTED_MESSAGE));
665
+ }
666
+ async function requestShotsForAsset(assetId, options = {}) {
667
+ "use step";
668
+ const { credentials } = options;
669
+ const muxClient = await getMuxClientFromEnv(credentials);
670
+ const mux = await muxClient.createClient();
671
+ const response = await mux.post(
672
+ getShotsPath(assetId),
673
+ { body: {} }
674
+ );
675
+ const result = await transformShotsResponse(response);
676
+ if (result.status !== "pending") {
677
+ throw new Error(
678
+ `Expected pending status after requesting shots for asset '${assetId}', received '${result.status}'`
679
+ );
680
+ }
681
+ return result;
682
+ }
683
+ async function getShotsForAsset(assetId, options = {}) {
684
+ "use step";
685
+ const { credentials } = options;
686
+ const muxClient = await getMuxClientFromEnv(credentials);
687
+ const mux = await muxClient.createClient();
688
+ const response = await mux.get(
689
+ getShotsPath(assetId)
690
+ );
691
+ return await transformShotsResponse(response);
692
+ }
693
+ async function waitForShotsForAsset(assetId, options = {}) {
694
+ "use step";
695
+ const {
696
+ pollIntervalMs = DEFAULT_POLL_INTERVAL_MS,
697
+ maxAttempts = DEFAULT_MAX_ATTEMPTS,
698
+ createIfMissing = true,
699
+ credentials
700
+ } = options;
701
+ if (createIfMissing) {
702
+ try {
703
+ await requestShotsForAsset(assetId, { credentials });
704
+ } catch (error) {
705
+ if (!isShotsAlreadyRequestedError(error)) {
706
+ throw error;
707
+ }
708
+ }
709
+ }
710
+ const normalizedMaxAttempts = Math.max(1, maxAttempts);
711
+ const normalizedPollIntervalMs = Math.max(MIN_POLL_INTERVAL_MS, pollIntervalMs);
712
+ let lastStatus;
713
+ for (let attempt = 0; attempt < normalizedMaxAttempts; attempt++) {
714
+ const result = await getShotsForAsset(assetId, { credentials });
715
+ lastStatus = result.status;
716
+ if (result.status === "completed") {
717
+ return result;
718
+ }
719
+ if (result.status === "errored") {
720
+ throw new Error(`Shots generation errored for asset '${assetId}'`);
721
+ }
722
+ if (attempt < normalizedMaxAttempts - 1) {
723
+ await sleep(normalizedPollIntervalMs);
724
+ }
725
+ }
726
+ throw new Error(
727
+ `Timed out waiting for shots for asset '${assetId}' after ${normalizedMaxAttempts} attempts. Last status: ${lastStatus ?? "unknown"}`
728
+ );
729
+ }
730
+
595
731
  // src/lib/mux-image-url.ts
596
732
  var DEFAULT_MUX_IMAGE_ORIGIN = "https://image.mux.com";
597
733
  function normalizeMuxImageOrigin(value) {
@@ -679,6 +815,14 @@ async function getStoryboardUrl(playbackId, width = DEFAULT_STORYBOARD_WIDTH, sh
679
815
  }
680
816
 
681
817
  // src/primitives/text-chunking.ts
818
+ var DEFAULT_MIN_CHUNK_DURATION_RATIO = 2 / 3;
819
+ var DEFAULT_BOUNDARY_LOOKAHEAD_CUES = 12;
820
+ var DEFAULT_BOUNDARY_PAUSE_SECONDS = 1.25;
821
+ var STRONG_BOUNDARY_SCORE = 4;
822
+ var PREFERRED_BOUNDARY_WINDOW_SECONDS = 5 * 60;
823
+ var SENTENCE_BOUNDARY_REGEX = /[.!?]["')\]]*$/;
824
+ var CLAUSE_BOUNDARY_REGEX = /[,;:]["')\]]*$/;
825
+ var NEXT_SENTENCE_START_REGEX = /^[A-Z0-9"'([{]/;
682
826
  function estimateTokenCount(text) {
683
827
  const words = text.trim().split(/\s+/).length;
684
828
  return Math.ceil(words / 0.75);
@@ -751,6 +895,151 @@ function chunkVTTCues(cues, maxTokens, overlapCues = 2) {
751
895
  }
752
896
  return chunks;
753
897
  }
898
+ function scoreCueBoundary(cues, index, boundaryPauseSeconds) {
899
+ const cue = cues[index];
900
+ const nextCue = cues[index + 1];
901
+ if (!nextCue) {
902
+ return Number.POSITIVE_INFINITY;
903
+ }
904
+ const trimmedText = cue.text.trim();
905
+ let score = 0;
906
+ if (SENTENCE_BOUNDARY_REGEX.test(trimmedText)) {
907
+ score += 4;
908
+ } else if (CLAUSE_BOUNDARY_REGEX.test(trimmedText)) {
909
+ score += 2;
910
+ }
911
+ if (nextCue.startTime - cue.endTime >= boundaryPauseSeconds) {
912
+ score += 2;
913
+ }
914
+ if (NEXT_SENTENCE_START_REGEX.test(nextCue.text.trim())) {
915
+ score += 1;
916
+ }
917
+ return score;
918
+ }
919
+ function chunkVTTCuesByBudget(cues, options) {
920
+ if (cues.length === 0) {
921
+ return [];
922
+ }
923
+ const maxCuesPerChunk = Math.max(1, options.maxCuesPerChunk);
924
+ let maxTextTokensPerChunk = Number.POSITIVE_INFINITY;
925
+ if (options.maxTextTokensPerChunk) {
926
+ maxTextTokensPerChunk = Math.max(1, options.maxTextTokensPerChunk);
927
+ }
928
+ const chunks = [];
929
+ let chunkIndex = 0;
930
+ let cueStartIndex = 0;
931
+ let currentTokenCount = 0;
932
+ for (let cueIndex = 0; cueIndex < cues.length; cueIndex++) {
933
+ const cue = cues[cueIndex];
934
+ const cueTokenCount = estimateTokenCount(cue.text);
935
+ const currentCueCount = cueIndex - cueStartIndex;
936
+ const wouldExceedCueCount = currentCueCount >= maxCuesPerChunk;
937
+ const wouldExceedTokenCount = currentCueCount > 0 && currentTokenCount + cueTokenCount > maxTextTokensPerChunk;
938
+ if (wouldExceedCueCount || wouldExceedTokenCount) {
939
+ chunks.push({
940
+ id: `chunk-${chunkIndex}`,
941
+ cueStartIndex,
942
+ cueEndIndex: cueIndex - 1,
943
+ cueCount: cueIndex - cueStartIndex,
944
+ startTime: cues[cueStartIndex].startTime,
945
+ endTime: cues[cueIndex - 1].endTime
946
+ });
947
+ cueStartIndex = cueIndex;
948
+ currentTokenCount = 0;
949
+ chunkIndex++;
950
+ }
951
+ currentTokenCount += cueTokenCount;
952
+ }
953
+ chunks.push({
954
+ id: `chunk-${chunkIndex}`,
955
+ cueStartIndex,
956
+ cueEndIndex: cues.length - 1,
957
+ cueCount: cues.length - cueStartIndex,
958
+ startTime: cues[cueStartIndex].startTime,
959
+ endTime: cues[cues.length - 1].endTime
960
+ });
961
+ return chunks;
962
+ }
963
+ function chunkVTTCuesByDuration(cues, options) {
964
+ if (cues.length === 0) {
965
+ return [];
966
+ }
967
+ const targetChunkDurationSeconds = Math.max(1, options.targetChunkDurationSeconds);
968
+ const maxChunkDurationSeconds = Math.max(targetChunkDurationSeconds, options.maxChunkDurationSeconds);
969
+ const minChunkDurationSeconds = Math.min(
970
+ targetChunkDurationSeconds,
971
+ Math.max(
972
+ 1,
973
+ options.minChunkDurationSeconds ?? Math.floor(targetChunkDurationSeconds * DEFAULT_MIN_CHUNK_DURATION_RATIO)
974
+ )
975
+ );
976
+ const boundaryLookaheadCues = Math.max(1, options.boundaryLookaheadCues ?? DEFAULT_BOUNDARY_LOOKAHEAD_CUES);
977
+ const boundaryPauseSeconds = options.boundaryPauseSeconds ?? DEFAULT_BOUNDARY_PAUSE_SECONDS;
978
+ const preferredBoundaryStartSeconds = Math.max(
979
+ minChunkDurationSeconds,
980
+ targetChunkDurationSeconds - Math.min(PREFERRED_BOUNDARY_WINDOW_SECONDS, targetChunkDurationSeconds / 6)
981
+ );
982
+ const chunks = [];
983
+ let chunkIndex = 0;
984
+ let cueStartIndex = 0;
985
+ while (cueStartIndex < cues.length) {
986
+ const chunkStartTime = cues[cueStartIndex].startTime;
987
+ let cueEndIndex = cueStartIndex;
988
+ let bestBoundaryIndex = -1;
989
+ let bestBoundaryScore = -1;
990
+ let bestPreferredBoundaryIndex = -1;
991
+ let bestPreferredBoundaryScore = -1;
992
+ while (cueEndIndex < cues.length) {
993
+ const cue = cues[cueEndIndex];
994
+ const currentDuration = cue.endTime - chunkStartTime;
995
+ if (currentDuration >= minChunkDurationSeconds) {
996
+ const boundaryScore = scoreCueBoundary(cues, cueEndIndex, boundaryPauseSeconds);
997
+ if (boundaryScore >= bestBoundaryScore) {
998
+ bestBoundaryIndex = cueEndIndex;
999
+ bestBoundaryScore = boundaryScore;
1000
+ }
1001
+ if (currentDuration >= preferredBoundaryStartSeconds && boundaryScore >= bestPreferredBoundaryScore) {
1002
+ bestPreferredBoundaryIndex = cueEndIndex;
1003
+ bestPreferredBoundaryScore = boundaryScore;
1004
+ }
1005
+ }
1006
+ const nextCue = cues[cueEndIndex + 1];
1007
+ if (!nextCue) {
1008
+ break;
1009
+ }
1010
+ const nextDuration = nextCue.endTime - chunkStartTime;
1011
+ const lookaheadExceeded = cueEndIndex - cueStartIndex >= boundaryLookaheadCues;
1012
+ const preferredBoundaryIndex = bestPreferredBoundaryIndex >= cueStartIndex ? bestPreferredBoundaryIndex : bestBoundaryIndex;
1013
+ const preferredBoundaryScore = bestPreferredBoundaryIndex >= cueStartIndex ? bestPreferredBoundaryScore : bestBoundaryScore;
1014
+ if (currentDuration >= targetChunkDurationSeconds) {
1015
+ if (preferredBoundaryIndex >= cueStartIndex && preferredBoundaryScore >= STRONG_BOUNDARY_SCORE) {
1016
+ cueEndIndex = preferredBoundaryIndex;
1017
+ break;
1018
+ }
1019
+ if (nextDuration > maxChunkDurationSeconds || lookaheadExceeded) {
1020
+ cueEndIndex = preferredBoundaryIndex >= cueStartIndex ? preferredBoundaryIndex : cueEndIndex;
1021
+ break;
1022
+ }
1023
+ }
1024
+ if (nextDuration > maxChunkDurationSeconds) {
1025
+ cueEndIndex = preferredBoundaryIndex >= cueStartIndex ? preferredBoundaryIndex : cueEndIndex;
1026
+ break;
1027
+ }
1028
+ cueEndIndex++;
1029
+ }
1030
+ chunks.push({
1031
+ id: `chunk-${chunkIndex}`,
1032
+ cueStartIndex,
1033
+ cueEndIndex,
1034
+ cueCount: cueEndIndex - cueStartIndex + 1,
1035
+ startTime: cues[cueStartIndex].startTime,
1036
+ endTime: cues[cueEndIndex].endTime
1037
+ });
1038
+ cueStartIndex = cueEndIndex + 1;
1039
+ chunkIndex++;
1040
+ }
1041
+ return chunks;
1042
+ }
754
1043
  function chunkText(text, strategy) {
755
1044
  switch (strategy.type) {
756
1045
  case "token": {
@@ -792,10 +1081,8 @@ async function getThumbnailUrls(playbackId, duration, options = {}) {
792
1081
  }
793
1082
  const baseUrl = getMuxThumbnailBaseUrl(playbackId);
794
1083
  const urlPromises = timestamps.map(async (time) => {
795
- if (shouldSign) {
796
- return signUrl(baseUrl, playbackId, "thumbnail", { time, width }, credentials);
797
- }
798
- return `${baseUrl}?time=${time}&width=${width}`;
1084
+ const url = shouldSign ? await signUrl(baseUrl, playbackId, "thumbnail", { time, width }, credentials) : `${baseUrl}?time=${time}&width=${width}`;
1085
+ return { url, time };
799
1086
  });
800
1087
  return Promise.all(urlPromises);
801
1088
  }
@@ -817,24 +1104,82 @@ function findCaptionTrack(asset, languageCode) {
817
1104
  (track) => track.text_type === "subtitles" && track.language_code === languageCode
818
1105
  );
819
1106
  }
1107
+ function normalizeLineEndings(value) {
1108
+ return value.replace(/\r\n/g, "\n");
1109
+ }
1110
+ function isTimingLine(line) {
1111
+ return line.includes("-->");
1112
+ }
1113
+ function parseNumericCueIdentifier(line) {
1114
+ if (!/^\d+$/.test(line)) {
1115
+ return null;
1116
+ }
1117
+ return Number.parseInt(line, 10);
1118
+ }
1119
+ function isLikelyTitledCueIdentifier(line) {
1120
+ return /^\d+\s+-\s+\S.*$/.test(line);
1121
+ }
1122
+ function isLikelyCueIdentifier({
1123
+ line,
1124
+ nextLine,
1125
+ previousCueIdentifier
1126
+ }) {
1127
+ if (!line || !nextLine || !isTimingLine(nextLine)) {
1128
+ return false;
1129
+ }
1130
+ const numericIdentifier = parseNumericCueIdentifier(line);
1131
+ if (numericIdentifier !== null) {
1132
+ if (previousCueIdentifier === null || previousCueIdentifier === void 0) {
1133
+ return numericIdentifier === 1;
1134
+ }
1135
+ return numericIdentifier === previousCueIdentifier + 1;
1136
+ }
1137
+ return isLikelyTitledCueIdentifier(line);
1138
+ }
1139
+ function getCueIdentifierLineIndex(lines, timingLineIndex, previousCueIdentifier) {
1140
+ const identifierIndex = timingLineIndex - 1;
1141
+ if (identifierIndex < 0) {
1142
+ return -1;
1143
+ }
1144
+ const candidate = lines[identifierIndex].trim();
1145
+ if (!candidate || isTimingLine(candidate)) {
1146
+ return -1;
1147
+ }
1148
+ return isLikelyCueIdentifier({
1149
+ line: candidate,
1150
+ nextLine: lines[timingLineIndex]?.trim(),
1151
+ previousCueIdentifier
1152
+ }) ? identifierIndex : -1;
1153
+ }
820
1154
  function extractTextFromVTT(vttContent) {
821
1155
  if (!vttContent.trim()) {
822
1156
  return "";
823
1157
  }
824
1158
  const lines = vttContent.split("\n");
825
1159
  const textLines = [];
1160
+ let previousCueIdentifier = null;
1161
+ let isInsideNoteBlock = false;
826
1162
  for (let i = 0; i < lines.length; i++) {
827
1163
  const line = lines[i].trim();
828
- if (!line)
1164
+ const nextLine = lines[i + 1]?.trim();
1165
+ if (!line) {
1166
+ isInsideNoteBlock = false;
1167
+ continue;
1168
+ }
1169
+ if (isInsideNoteBlock)
829
1170
  continue;
830
1171
  if (line === "WEBVTT")
831
1172
  continue;
832
- if (line.startsWith("NOTE "))
1173
+ if (line === "NOTE" || line.startsWith("NOTE ")) {
1174
+ isInsideNoteBlock = true;
833
1175
  continue;
834
- if (line.includes("-->"))
1176
+ }
1177
+ if (isTimingLine(line))
835
1178
  continue;
836
- if (/^[\w-]+$/.test(line) && !line.includes(" "))
1179
+ if (isLikelyCueIdentifier({ line, nextLine, previousCueIdentifier })) {
1180
+ previousCueIdentifier = parseNumericCueIdentifier(line);
837
1181
  continue;
1182
+ }
838
1183
  if (line.startsWith("STYLE") || line.startsWith("REGION"))
839
1184
  continue;
840
1185
  const cleanLine = line.replace(/<[^>]*>/g, "").trim();
@@ -893,20 +1238,34 @@ function parseVTTCues(vttContent) {
893
1238
  return [];
894
1239
  const lines = vttContent.split("\n");
895
1240
  const cues = [];
1241
+ let previousCueIdentifier = null;
896
1242
  for (let i = 0; i < lines.length; i++) {
897
1243
  const line = lines[i].trim();
898
- if (line.includes("-->")) {
1244
+ if (isTimingLine(line)) {
899
1245
  const [startStr, endStr] = line.split(" --> ").map((s) => s.trim());
900
1246
  const startTime = vttTimestampToSeconds(startStr);
901
1247
  const endTime = vttTimestampToSeconds(endStr.split(" ")[0]);
902
- const textLines = [];
1248
+ const currentCueIdentifierLine = lines[i - 1]?.trim() ?? "";
1249
+ const currentCueIdentifier = isLikelyCueIdentifier({
1250
+ line: currentCueIdentifierLine,
1251
+ nextLine: line,
1252
+ previousCueIdentifier
1253
+ }) ? parseNumericCueIdentifier(currentCueIdentifierLine) : null;
1254
+ const rawTextLines = [];
903
1255
  let j = i + 1;
904
- while (j < lines.length && lines[j].trim() && !lines[j].includes("-->")) {
905
- const cleanLine = lines[j].trim().replace(/<[^>]*>/g, "");
906
- if (cleanLine)
907
- textLines.push(cleanLine);
1256
+ while (j < lines.length && lines[j].trim() && !isTimingLine(lines[j].trim())) {
1257
+ rawTextLines.push(lines[j].trim());
908
1258
  j++;
909
1259
  }
1260
+ const trailingNumericLine = parseNumericCueIdentifier(rawTextLines.at(-1) ?? "");
1261
+ if (trailingNumericLine !== null && isLikelyCueIdentifier({
1262
+ line: rawTextLines.at(-1) ?? "",
1263
+ nextLine: lines[j]?.trim(),
1264
+ previousCueIdentifier: currentCueIdentifier
1265
+ }) && rawTextLines.length > 1) {
1266
+ rawTextLines.pop();
1267
+ }
1268
+ const textLines = rawTextLines.map((textLine) => textLine.replace(/<[^>]*>/g, "")).filter(Boolean);
910
1269
  if (textLines.length > 0) {
911
1270
  cues.push({
912
1271
  startTime,
@@ -914,10 +1273,102 @@ function parseVTTCues(vttContent) {
914
1273
  text: textLines.join(" ")
915
1274
  });
916
1275
  }
1276
+ previousCueIdentifier = currentCueIdentifier;
917
1277
  }
918
1278
  }
919
1279
  return cues;
920
1280
  }
1281
+ function splitVttPreambleAndCueBlocks(vttContent) {
1282
+ const normalizedContent = normalizeLineEndings(vttContent).trim();
1283
+ if (!normalizedContent) {
1284
+ return {
1285
+ preamble: "WEBVTT",
1286
+ cueBlocks: []
1287
+ };
1288
+ }
1289
+ const rawBlocks = normalizedContent.split(/\n{2,}/).map((block) => block.trim()).filter(Boolean);
1290
+ const cueBlockStartIndex = rawBlocks.findIndex((block) => block.includes("-->"));
1291
+ if (cueBlockStartIndex === -1) {
1292
+ return {
1293
+ preamble: normalizedContent.startsWith("WEBVTT") ? normalizedContent : `WEBVTT
1294
+
1295
+ ${normalizedContent}`,
1296
+ cueBlocks: []
1297
+ };
1298
+ }
1299
+ const hasMergedCueBlocks = rawBlocks.slice(cueBlockStartIndex).some((block) => (block.match(/-->/g) ?? []).length > 1);
1300
+ if (hasMergedCueBlocks) {
1301
+ const lines = normalizedContent.split("\n");
1302
+ const timingLineIndices = lines.map((line, index) => isTimingLine(line.trim()) ? index : -1).filter((index) => index >= 0);
1303
+ let previousCueIdentifier = null;
1304
+ const firstCueStartIndex = getCueIdentifierLineIndex(lines, timingLineIndices[0], previousCueIdentifier);
1305
+ const preambleEndIndex = firstCueStartIndex >= 0 ? firstCueStartIndex : timingLineIndices[0];
1306
+ const preamble2 = lines.slice(0, preambleEndIndex).join("\n").trim() || "WEBVTT";
1307
+ const cueBlocks2 = timingLineIndices.map((timingLineIndex, index) => {
1308
+ const cueIdentifierLineIndex = getCueIdentifierLineIndex(lines, timingLineIndex, previousCueIdentifier);
1309
+ const cueStartIndex = cueIdentifierLineIndex >= 0 ? cueIdentifierLineIndex : timingLineIndex;
1310
+ const currentCueIdentifier = cueIdentifierLineIndex >= 0 ? parseNumericCueIdentifier(lines[cueIdentifierLineIndex].trim()) : null;
1311
+ const nextTimingLineIndex = timingLineIndices[index + 1] ?? lines.length;
1312
+ let cueEndIndex = nextTimingLineIndex - 1;
1313
+ while (cueEndIndex > timingLineIndex && !lines[cueEndIndex].trim()) {
1314
+ cueEndIndex--;
1315
+ }
1316
+ const nextCueIdentifierLineIndex = index < timingLineIndices.length - 1 ? getCueIdentifierLineIndex(lines, nextTimingLineIndex, currentCueIdentifier) : -1;
1317
+ if (nextCueIdentifierLineIndex === cueEndIndex) {
1318
+ cueEndIndex--;
1319
+ }
1320
+ while (cueEndIndex > timingLineIndex && !lines[cueEndIndex].trim()) {
1321
+ cueEndIndex--;
1322
+ }
1323
+ previousCueIdentifier = currentCueIdentifier;
1324
+ return lines.slice(cueStartIndex, cueEndIndex + 1).join("\n").trim();
1325
+ });
1326
+ return {
1327
+ preamble: preamble2,
1328
+ cueBlocks: cueBlocks2
1329
+ };
1330
+ }
1331
+ const preambleBlocks = rawBlocks.slice(0, cueBlockStartIndex);
1332
+ const cueBlocks = rawBlocks.slice(cueBlockStartIndex);
1333
+ const preamble = preambleBlocks.length > 0 ? preambleBlocks.join("\n\n") : "WEBVTT";
1334
+ return {
1335
+ preamble,
1336
+ cueBlocks
1337
+ };
1338
+ }
1339
+ function buildVttFromCueBlocks(cueBlocks, preamble = "WEBVTT") {
1340
+ if (cueBlocks.length === 0) {
1341
+ return `${preamble.trim()}
1342
+ `;
1343
+ }
1344
+ return `${preamble.trim()}
1345
+
1346
+ ${cueBlocks.map((block) => block.trim()).join("\n\n")}
1347
+ `;
1348
+ }
1349
+ function replaceCueText(cueBlock, translatedText) {
1350
+ const lines = normalizeLineEndings(cueBlock).split("\n").map((line) => line.trim()).filter(Boolean);
1351
+ const timingLineIndex = lines.findIndex((line) => line.includes("-->"));
1352
+ if (timingLineIndex === -1) {
1353
+ throw new Error("Cue block is missing a timestamp line");
1354
+ }
1355
+ const headerLines = lines.slice(0, timingLineIndex + 1);
1356
+ const translatedLines = normalizeLineEndings(translatedText).split("\n").map((line) => line.trim()).filter(Boolean);
1357
+ return [...headerLines, ...translatedLines].join("\n");
1358
+ }
1359
+ function buildVttFromTranslatedCueBlocks(cueBlocks, translatedTexts, preamble = "WEBVTT") {
1360
+ if (cueBlocks.length !== translatedTexts.length) {
1361
+ throw new Error(`Expected ${cueBlocks.length} translated cues, received ${translatedTexts.length}`);
1362
+ }
1363
+ return buildVttFromCueBlocks(
1364
+ cueBlocks.map((cueBlock, index) => replaceCueText(cueBlock, translatedTexts[index])),
1365
+ preamble
1366
+ );
1367
+ }
1368
+ function concatenateVttSegments(segments, preamble = "WEBVTT") {
1369
+ const cueBlocks = segments.flatMap((segment) => splitVttPreambleAndCueBlocks(segment).cueBlocks);
1370
+ return buildVttFromCueBlocks(cueBlocks, preamble);
1371
+ }
921
1372
  async function buildTranscriptUrl(playbackId, trackId, shouldSign = false, credentials) {
922
1373
  "use step";
923
1374
  const baseUrl = `https://stream.mux.com/${playbackId}/text/${trackId}.vtt`;
@@ -979,9 +1430,14 @@ async function fetchTranscriptForAsset(asset, playbackId, options = {}) {
979
1430
  export {
980
1431
  DEFAULT_STORYBOARD_WIDTH,
981
1432
  buildTranscriptUrl,
1433
+ buildVttFromCueBlocks,
1434
+ buildVttFromTranslatedCueBlocks,
982
1435
  chunkByTokens,
983
1436
  chunkText,
984
1437
  chunkVTTCues,
1438
+ chunkVTTCuesByBudget,
1439
+ chunkVTTCuesByDuration,
1440
+ concatenateVttSegments,
985
1441
  estimateTokenCount,
986
1442
  extractTextFromVTT,
987
1443
  extractTimestampedTranscript,
@@ -994,10 +1450,15 @@ export {
994
1450
  getHotspotsForPlaybackId,
995
1451
  getHotspotsForVideo,
996
1452
  getReadyTextTracks,
1453
+ getShotsForAsset,
997
1454
  getStoryboardUrl,
998
1455
  getThumbnailUrls,
999
1456
  parseVTTCues,
1457
+ replaceCueText,
1458
+ requestShotsForAsset,
1000
1459
  secondsToTimestamp,
1001
- vttTimestampToSeconds
1460
+ splitVttPreambleAndCueBlocks,
1461
+ vttTimestampToSeconds,
1462
+ waitForShotsForAsset
1002
1463
  };
1003
1464
  //# sourceMappingURL=index.js.map