@mux/ai 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -5,7 +5,7 @@ var __export = (target, all) => {
5
5
  };
6
6
 
7
7
  // package.json
8
- var version = "0.10.0";
8
+ var version = "0.11.0";
9
9
 
10
10
  // src/env.ts
11
11
  import { z } from "zod";
@@ -803,13 +803,16 @@ __export(primitives_exports, {
803
803
  getHotspotsForPlaybackId: () => getHotspotsForPlaybackId,
804
804
  getHotspotsForVideo: () => getHotspotsForVideo,
805
805
  getReadyTextTracks: () => getReadyTextTracks,
806
+ getShotsForAsset: () => getShotsForAsset,
806
807
  getStoryboardUrl: () => getStoryboardUrl,
807
808
  getThumbnailUrls: () => getThumbnailUrls,
808
809
  parseVTTCues: () => parseVTTCues,
809
810
  replaceCueText: () => replaceCueText,
811
+ requestShotsForAsset: () => requestShotsForAsset,
810
812
  secondsToTimestamp: () => secondsToTimestamp,
811
813
  splitVttPreambleAndCueBlocks: () => splitVttPreambleAndCueBlocks,
812
- vttTimestampToSeconds: () => vttTimestampToSeconds
814
+ vttTimestampToSeconds: () => vttTimestampToSeconds,
815
+ waitForShotsForAsset: () => waitForShotsForAsset
813
816
  });
814
817
 
815
818
  // src/lib/providers.ts
@@ -1082,6 +1085,142 @@ async function fetchHotspots(identifierType, id, options) {
1082
1085
  return transformHotspotResponse(response);
1083
1086
  }
1084
1087
 
1088
+ // src/primitives/shots.ts
1089
+ var DEFAULT_POLL_INTERVAL_MS = 2e3;
1090
+ var MIN_POLL_INTERVAL_MS = 1e3;
1091
+ var DEFAULT_MAX_ATTEMPTS = 60;
1092
+ var SHOTS_ALREADY_REQUESTED_MESSAGE = "shots generation has already been requested";
1093
+ function getShotsPath(assetId) {
1094
+ return `/video/v1/assets/${assetId}/shots`;
1095
+ }
1096
+ function mapManifestShots(shots) {
1097
+ return shots.map((shot, index) => {
1098
+ const { startTime, imageUrl } = shot;
1099
+ if (typeof startTime !== "number" || !Number.isFinite(startTime)) {
1100
+ throw new TypeError(`Invalid shot startTime in shots manifest at index ${index}`);
1101
+ }
1102
+ if (typeof imageUrl !== "string" || imageUrl.length === 0) {
1103
+ throw new TypeError(`Invalid shot imageUrl in shots manifest at index ${index}`);
1104
+ }
1105
+ return {
1106
+ startTime,
1107
+ imageUrl
1108
+ };
1109
+ });
1110
+ }
1111
+ async function fetchShotsFromManifest(shotsManifestUrl) {
1112
+ const response = await fetch(shotsManifestUrl);
1113
+ if (!response.ok) {
1114
+ throw new Error(
1115
+ `Failed to fetch shots manifest: ${response.status} ${response.statusText}`
1116
+ );
1117
+ }
1118
+ const manifest = await response.json();
1119
+ if (!Array.isArray(manifest.shots)) {
1120
+ throw new TypeError("Invalid shots manifest response: missing shots array");
1121
+ }
1122
+ return mapManifestShots(manifest.shots);
1123
+ }
1124
+ async function transformShotsResponse(response) {
1125
+ switch (response.data.status) {
1126
+ case "pending":
1127
+ return {
1128
+ status: "pending",
1129
+ createdAt: response.data.created_at
1130
+ };
1131
+ case "errored":
1132
+ return {
1133
+ status: "errored",
1134
+ createdAt: response.data.created_at,
1135
+ error: response.data.error
1136
+ };
1137
+ case "completed":
1138
+ return {
1139
+ status: "completed",
1140
+ createdAt: response.data.created_at,
1141
+ shots: await fetchShotsFromManifest(response.data.shots_manifest_url)
1142
+ };
1143
+ default: {
1144
+ const exhaustiveCheck = response.data;
1145
+ throw new Error(`Unsupported shots response: ${JSON.stringify(exhaustiveCheck)}`);
1146
+ }
1147
+ }
1148
+ }
1149
+ function sleep(ms) {
1150
+ return new Promise((resolve) => setTimeout(resolve, ms));
1151
+ }
1152
+ function isShotsAlreadyRequestedError(error) {
1153
+ const statusCode = error?.status ?? error?.statusCode;
1154
+ const messages = error?.error?.messages;
1155
+ const lowerCaseMessages = messages?.map((message) => message.toLowerCase()) ?? [];
1156
+ const errorMessage = error instanceof Error ? error.message.toLowerCase() : "";
1157
+ return statusCode === 400 && (lowerCaseMessages.some((message) => message.includes(SHOTS_ALREADY_REQUESTED_MESSAGE)) || errorMessage.includes(SHOTS_ALREADY_REQUESTED_MESSAGE));
1158
+ }
1159
+ async function requestShotsForAsset(assetId, options = {}) {
1160
+ "use step";
1161
+ const { credentials } = options;
1162
+ const muxClient = await getMuxClientFromEnv(credentials);
1163
+ const mux = await muxClient.createClient();
1164
+ const response = await mux.post(
1165
+ getShotsPath(assetId),
1166
+ { body: {} }
1167
+ );
1168
+ const result = await transformShotsResponse(response);
1169
+ if (result.status !== "pending") {
1170
+ throw new Error(
1171
+ `Expected pending status after requesting shots for asset '${assetId}', received '${result.status}'`
1172
+ );
1173
+ }
1174
+ return result;
1175
+ }
1176
+ async function getShotsForAsset(assetId, options = {}) {
1177
+ "use step";
1178
+ const { credentials } = options;
1179
+ const muxClient = await getMuxClientFromEnv(credentials);
1180
+ const mux = await muxClient.createClient();
1181
+ const response = await mux.get(
1182
+ getShotsPath(assetId)
1183
+ );
1184
+ return await transformShotsResponse(response);
1185
+ }
1186
+ async function waitForShotsForAsset(assetId, options = {}) {
1187
+ "use step";
1188
+ const {
1189
+ pollIntervalMs = DEFAULT_POLL_INTERVAL_MS,
1190
+ maxAttempts = DEFAULT_MAX_ATTEMPTS,
1191
+ createIfMissing = true,
1192
+ credentials
1193
+ } = options;
1194
+ if (createIfMissing) {
1195
+ try {
1196
+ await requestShotsForAsset(assetId, { credentials });
1197
+ } catch (error) {
1198
+ if (!isShotsAlreadyRequestedError(error)) {
1199
+ throw error;
1200
+ }
1201
+ }
1202
+ }
1203
+ const normalizedMaxAttempts = Math.max(1, maxAttempts);
1204
+ const normalizedPollIntervalMs = Math.max(MIN_POLL_INTERVAL_MS, pollIntervalMs);
1205
+ let lastStatus;
1206
+ for (let attempt = 0; attempt < normalizedMaxAttempts; attempt++) {
1207
+ const result = await getShotsForAsset(assetId, { credentials });
1208
+ lastStatus = result.status;
1209
+ if (result.status === "completed") {
1210
+ return result;
1211
+ }
1212
+ if (result.status === "errored") {
1213
+ throw new Error(`Shots generation errored for asset '${assetId}'`);
1214
+ }
1215
+ if (attempt < normalizedMaxAttempts - 1) {
1216
+ await sleep(normalizedPollIntervalMs);
1217
+ }
1218
+ }
1219
+ throw new Error(
1220
+ `Timed out waiting for shots for asset '${assetId}' after ${normalizedMaxAttempts} attempts. Last status: ${lastStatus ?? "unknown"}`
1221
+ );
1222
+ }
1223
+
1085
1224
  // src/lib/mux-image-url.ts
1086
1225
  var DEFAULT_MUX_IMAGE_ORIGIN = "https://image.mux.com";
1087
1226
  function normalizeMuxImageOrigin(value) {
@@ -1785,23 +1924,33 @@ async function fetchTranscriptForAsset(asset, playbackId, options = {}) {
1785
1924
  // src/workflows/index.ts
1786
1925
  var workflows_exports = {};
1787
1926
  __export(workflows_exports, {
1927
+ DEFAULT_DESCRIPTION_LENGTH: () => DEFAULT_DESCRIPTION_LENGTH,
1928
+ DEFAULT_SUMMARY_KEYWORD_LIMIT: () => DEFAULT_SUMMARY_KEYWORD_LIMIT,
1929
+ DEFAULT_TITLE_LENGTH: () => DEFAULT_TITLE_LENGTH,
1788
1930
  HIVE_SEXUAL_CATEGORIES: () => HIVE_SEXUAL_CATEGORIES,
1789
1931
  HIVE_VIOLENCE_CATEGORIES: () => HIVE_VIOLENCE_CATEGORIES,
1790
- SUMMARY_KEYWORD_LIMIT: () => SUMMARY_KEYWORD_LIMIT,
1791
1932
  aggregateTokenUsage: () => aggregateTokenUsage,
1933
+ applyOverrideLists: () => applyOverrideLists,
1934
+ applyReplacements: () => applyReplacements,
1792
1935
  askQuestions: () => askQuestions,
1936
+ buildReplacementRegex: () => buildReplacementRegex,
1793
1937
  burnedInCaptionsSchema: () => burnedInCaptionsSchema,
1938
+ censorVttContent: () => censorVttContent,
1794
1939
  chapterSchema: () => chapterSchema,
1795
1940
  chaptersSchema: () => chaptersSchema,
1941
+ createReplacer: () => createReplacer,
1942
+ editCaptions: () => editCaptions,
1796
1943
  generateChapters: () => generateChapters,
1797
1944
  generateEmbeddings: () => generateEmbeddings,
1798
1945
  generateVideoEmbeddings: () => generateVideoEmbeddings,
1799
1946
  getModerationScores: () => getModerationScores,
1800
1947
  getSummaryAndTags: () => getSummaryAndTags,
1801
1948
  hasBurnedInCaptions: () => hasBurnedInCaptions,
1949
+ profanityDetectionSchema: () => profanityDetectionSchema,
1802
1950
  questionAnswerSchema: () => questionAnswerSchema,
1803
1951
  shouldSplitChunkTranslationError: () => shouldSplitChunkTranslationError,
1804
1952
  summarySchema: () => summarySchema,
1953
+ transformCueText: () => transformCueText,
1805
1954
  translateAudio: () => translateAudio,
1806
1955
  translateCaptions: () => translateCaptions,
1807
1956
  translationSchema: () => translationSchema
@@ -2964,6 +3113,429 @@ async function generateChapters(assetId, languageCode, options = {}) {
2964
3113
  };
2965
3114
  }
2966
3115
 
3116
+ // src/workflows/edit-captions.ts
3117
+ import { generateText as generateText4, Output as Output4 } from "ai";
3118
+ import dedent4 from "dedent";
3119
+ import { z as z5 } from "zod";
3120
+
3121
+ // src/lib/mux-tracks.ts
3122
+ async function fetchVttFromMux(vttUrl) {
3123
+ "use step";
3124
+ const vttResponse = await fetch(vttUrl);
3125
+ if (!vttResponse.ok) {
3126
+ throw new Error(`Failed to fetch VTT file: ${vttResponse.statusText}`);
3127
+ }
3128
+ return vttResponse.text();
3129
+ }
3130
+ async function createTextTrackOnMux(assetId, languageCode, trackName, presignedUrl, credentials) {
3131
+ "use step";
3132
+ const muxClient = await resolveMuxClient(credentials);
3133
+ const mux = await muxClient.createClient();
3134
+ const trackResponse = await mux.video.assets.createTrack(assetId, {
3135
+ type: "text",
3136
+ text_type: "subtitles",
3137
+ language_code: languageCode,
3138
+ name: trackName,
3139
+ url: presignedUrl
3140
+ });
3141
+ if (!trackResponse.id) {
3142
+ throw new Error("Failed to create text track: no track ID returned from Mux");
3143
+ }
3144
+ return trackResponse.id;
3145
+ }
3146
+
3147
+ // src/lib/storage-adapter.ts
3148
+ function requireCredentials(accessKeyId, secretAccessKey) {
3149
+ if (!accessKeyId || !secretAccessKey) {
3150
+ throw new Error(
3151
+ "S3 credentials are required for default storage operations. Provide S3_ACCESS_KEY_ID and S3_SECRET_ACCESS_KEY or pass options.storageAdapter."
3152
+ );
3153
+ }
3154
+ return { accessKeyId, secretAccessKey };
3155
+ }
3156
+ async function putObjectWithStorageAdapter(input, adapter) {
3157
+ if (adapter) {
3158
+ await adapter.putObject(input);
3159
+ return;
3160
+ }
3161
+ const credentials = requireCredentials(input.accessKeyId, input.secretAccessKey);
3162
+ await putObjectToS3({
3163
+ accessKeyId: credentials.accessKeyId,
3164
+ secretAccessKey: credentials.secretAccessKey,
3165
+ endpoint: input.endpoint,
3166
+ region: input.region,
3167
+ bucket: input.bucket,
3168
+ key: input.key,
3169
+ body: input.body,
3170
+ contentType: input.contentType
3171
+ });
3172
+ }
3173
+ async function createPresignedGetUrlWithStorageAdapter(input, adapter) {
3174
+ if (adapter) {
3175
+ return adapter.createPresignedGetUrl(input);
3176
+ }
3177
+ const credentials = requireCredentials(input.accessKeyId, input.secretAccessKey);
3178
+ return createPresignedGetUrl({
3179
+ accessKeyId: credentials.accessKeyId,
3180
+ secretAccessKey: credentials.secretAccessKey,
3181
+ endpoint: input.endpoint,
3182
+ region: input.region,
3183
+ bucket: input.bucket,
3184
+ key: input.key,
3185
+ expiresInSeconds: input.expiresInSeconds
3186
+ });
3187
+ }
3188
+
3189
+ // src/workflows/edit-captions.ts
3190
+ var profanityDetectionSchema = z5.object({
3191
+ profanity: z5.array(z5.string()).describe(
3192
+ "Unique profane words or short phrases exactly as they appear in the transcript text. Include each distinct form only once (e.g., if 'fuck' and 'fucking' both appear, list both)."
3193
+ )
3194
+ });
3195
+ var SYSTEM_PROMPT3 = dedent4`
3196
+ You are a content moderation assistant. Your task is to identify profane, vulgar, or obscene
3197
+ words and phrases in subtitle text. Return ONLY the exact profane words or phrases as they appear
3198
+ in the text. Do not modify, censor, or paraphrase them. Do not include words that are merely
3199
+ informal or slang but not profane. Focus on words that would be bleeped on broadcast television.`;
3200
+ function transformCueText(rawVtt, transform) {
3201
+ const lines = rawVtt.split("\n");
3202
+ let inCueText = false;
3203
+ let currentCueStartTime = 0;
3204
+ const transformed = lines.map((line) => {
3205
+ if (line.includes("-->")) {
3206
+ const startTimestamp = line.split("-->")[0].trim();
3207
+ currentCueStartTime = vttTimestampToSeconds(startTimestamp);
3208
+ inCueText = true;
3209
+ return line;
3210
+ }
3211
+ if (line.trim() === "") {
3212
+ inCueText = false;
3213
+ return line;
3214
+ }
3215
+ if (inCueText) {
3216
+ return transform(line, currentCueStartTime);
3217
+ }
3218
+ return line;
3219
+ });
3220
+ return transformed.join("\n");
3221
+ }
3222
+ function buildReplacementRegex(words) {
3223
+ const filtered = words.filter((w) => w.length > 0);
3224
+ if (filtered.length === 0)
3225
+ return null;
3226
+ filtered.sort((a, b) => b.length - a.length);
3227
+ const escaped = filtered.map((w) => w.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"));
3228
+ const pattern = escaped.join("|");
3229
+ return new RegExp(`\\b(?:${pattern})\\b`, "gi");
3230
+ }
3231
+ function createReplacer(mode) {
3232
+ switch (mode) {
3233
+ case "blank":
3234
+ return (match) => `[${"_".repeat(match.length)}]`;
3235
+ case "remove":
3236
+ return () => "";
3237
+ case "mask":
3238
+ return (match) => "?".repeat(match.length);
3239
+ }
3240
+ }
3241
+ function censorVttContent(rawVtt, profanity, mode) {
3242
+ if (profanity.length === 0) {
3243
+ return { censoredVtt: rawVtt, replacements: [] };
3244
+ }
3245
+ const regex = buildReplacementRegex(profanity);
3246
+ if (!regex) {
3247
+ return { censoredVtt: rawVtt, replacements: [] };
3248
+ }
3249
+ const replacer = createReplacer(mode);
3250
+ const replacements = [];
3251
+ const censoredVtt = transformCueText(rawVtt, (line, cueStartTime) => {
3252
+ return line.replace(regex, (match) => {
3253
+ const after = replacer(match);
3254
+ replacements.push({ cueStartTime, before: match, after });
3255
+ return after;
3256
+ });
3257
+ });
3258
+ return { censoredVtt, replacements };
3259
+ }
3260
+ function applyOverrideLists(detected, alwaysCensor, neverCensor) {
3261
+ const seen = new Set(detected.map((w) => w.toLowerCase()));
3262
+ const merged = [...detected];
3263
+ for (const word of alwaysCensor) {
3264
+ const lower = word.toLowerCase();
3265
+ if (!seen.has(lower)) {
3266
+ seen.add(lower);
3267
+ merged.push(word);
3268
+ }
3269
+ }
3270
+ const neverSet = new Set(neverCensor.map((w) => w.toLowerCase()));
3271
+ return merged.filter((w) => !neverSet.has(w.toLowerCase()));
3272
+ }
3273
+ function applyReplacements(rawVtt, replacements) {
3274
+ const filtered = replacements.filter((r) => r.find.length > 0);
3275
+ if (filtered.length === 0) {
3276
+ return { editedVtt: rawVtt, replacements: [] };
3277
+ }
3278
+ const records = [];
3279
+ const editedVtt = transformCueText(rawVtt, (line, cueStartTime) => {
3280
+ let result = line;
3281
+ for (const { find, replace } of filtered) {
3282
+ const escaped = find.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
3283
+ const regex = new RegExp(`\\b${escaped}\\b`, "g");
3284
+ result = result.replace(regex, (match) => {
3285
+ records.push({ cueStartTime, before: match, after: replace });
3286
+ return replace;
3287
+ });
3288
+ }
3289
+ return result;
3290
+ });
3291
+ return { editedVtt, replacements: records };
3292
+ }
3293
+ async function identifyProfanityWithAI({
3294
+ plainText,
3295
+ provider,
3296
+ modelId,
3297
+ credentials
3298
+ }) {
3299
+ "use step";
3300
+ const model = await createLanguageModelFromConfig(provider, modelId, credentials);
3301
+ const response = await generateText4({
3302
+ model,
3303
+ output: Output4.object({ schema: profanityDetectionSchema }),
3304
+ messages: [
3305
+ {
3306
+ role: "system",
3307
+ content: SYSTEM_PROMPT3
3308
+ },
3309
+ {
3310
+ role: "user",
3311
+ content: `Identify all profane words and phrases in the following subtitle transcript. Return each unique profane word or phrase exactly as it appears in the text.
3312
+
3313
+ <transcript>
3314
+ ${plainText}
3315
+ </transcript>`
3316
+ }
3317
+ ]
3318
+ });
3319
+ return {
3320
+ profanity: response.output.profanity,
3321
+ usage: {
3322
+ inputTokens: response.usage.inputTokens,
3323
+ outputTokens: response.usage.outputTokens,
3324
+ totalTokens: response.usage.totalTokens,
3325
+ reasoningTokens: response.usage.reasoningTokens,
3326
+ cachedInputTokens: response.usage.cachedInputTokens
3327
+ }
3328
+ };
3329
+ }
3330
+ async function uploadEditedVttToS3({
3331
+ editedVtt,
3332
+ assetId,
3333
+ trackId,
3334
+ s3Endpoint,
3335
+ s3Region,
3336
+ s3Bucket,
3337
+ storageAdapter,
3338
+ s3SignedUrlExpirySeconds
3339
+ }) {
3340
+ "use step";
3341
+ const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
3342
+ const s3SecretAccessKey = env_default.S3_SECRET_ACCESS_KEY;
3343
+ const vttKey = `edited/${assetId}/${trackId}-edited-${Date.now()}.vtt`;
3344
+ await putObjectWithStorageAdapter({
3345
+ accessKeyId: s3AccessKeyId,
3346
+ secretAccessKey: s3SecretAccessKey,
3347
+ endpoint: s3Endpoint,
3348
+ region: s3Region,
3349
+ bucket: s3Bucket,
3350
+ key: vttKey,
3351
+ body: editedVtt,
3352
+ contentType: "text/vtt"
3353
+ }, storageAdapter);
3354
+ return createPresignedGetUrlWithStorageAdapter({
3355
+ accessKeyId: s3AccessKeyId,
3356
+ secretAccessKey: s3SecretAccessKey,
3357
+ endpoint: s3Endpoint,
3358
+ region: s3Region,
3359
+ bucket: s3Bucket,
3360
+ key: vttKey,
3361
+ expiresInSeconds: s3SignedUrlExpirySeconds ?? 86400
3362
+ }, storageAdapter);
3363
+ }
3364
+ async function deleteTrackOnMux(assetId, trackId, credentials) {
3365
+ "use step";
3366
+ const muxClient = await resolveMuxClient(credentials);
3367
+ const mux = await muxClient.createClient();
3368
+ await mux.video.assets.deleteTrack(assetId, trackId);
3369
+ }
3370
+ async function editCaptions(assetId, trackId, options) {
3371
+ "use workflow";
3372
+ const {
3373
+ provider,
3374
+ model,
3375
+ autoCensorProfanity: autoCensorOption,
3376
+ replacements: replacementsOption,
3377
+ deleteOriginalTrack,
3378
+ uploadToMux: uploadToMuxOption,
3379
+ s3Endpoint: providedS3Endpoint,
3380
+ s3Region: providedS3Region,
3381
+ s3Bucket: providedS3Bucket,
3382
+ trackNameSuffix,
3383
+ storageAdapter,
3384
+ credentials
3385
+ } = options;
3386
+ const hasAutoCensor = !!autoCensorOption;
3387
+ const hasReplacements = !!replacementsOption && replacementsOption.length > 0;
3388
+ if (!hasAutoCensor && !hasReplacements) {
3389
+ throw new Error("At least one of autoCensorProfanity or replacements must be provided.");
3390
+ }
3391
+ if (autoCensorOption && !provider) {
3392
+ throw new Error("provider is required when using autoCensorProfanity.");
3393
+ }
3394
+ const deleteOriginal = deleteOriginalTrack !== false;
3395
+ const uploadToMux = uploadToMuxOption !== false;
3396
+ const s3Endpoint = providedS3Endpoint ?? env_default.S3_ENDPOINT;
3397
+ const s3Region = providedS3Region ?? env_default.S3_REGION ?? "auto";
3398
+ const s3Bucket = providedS3Bucket ?? env_default.S3_BUCKET;
3399
+ const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
3400
+ const s3SecretAccessKey = env_default.S3_SECRET_ACCESS_KEY;
3401
+ if (uploadToMux && (!s3Endpoint || !s3Bucket || !storageAdapter && (!s3AccessKeyId || !s3SecretAccessKey))) {
3402
+ throw new Error(
3403
+ "Storage configuration is required for uploading to Mux. Provide s3Endpoint and s3Bucket. If no storageAdapter is supplied, also provide s3AccessKeyId and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables."
3404
+ );
3405
+ }
3406
+ const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(assetId, credentials);
3407
+ const assetDurationSeconds = getAssetDurationSecondsFromAsset(assetData);
3408
+ const signingContext = await resolveMuxSigningContext(credentials);
3409
+ if (policy === "signed" && !signingContext) {
3410
+ throw new Error(
3411
+ "Signed playback ID requires signing credentials. Set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
3412
+ );
3413
+ }
3414
+ const readyTextTracks = getReadyTextTracks(assetData);
3415
+ const sourceTrack = readyTextTracks.find((t) => t.id === trackId);
3416
+ if (!sourceTrack) {
3417
+ const availableTrackIds = readyTextTracks.map((t) => t.id).filter(Boolean).join(", ");
3418
+ throw new Error(
3419
+ `Track '${trackId}' not found or not ready on asset '${assetId}'. Available track IDs: ${availableTrackIds || "none"}`
3420
+ );
3421
+ }
3422
+ const vttUrl = await buildTranscriptUrl(playbackId, trackId, policy === "signed", credentials);
3423
+ let vttContent;
3424
+ try {
3425
+ vttContent = await fetchVttFromMux(vttUrl);
3426
+ } catch (error) {
3427
+ throw new Error(`Failed to fetch VTT content: ${error instanceof Error ? error.message : "Unknown error"}`);
3428
+ }
3429
+ let editedVtt = vttContent;
3430
+ let totalReplacementCount = 0;
3431
+ let autoCensorResult;
3432
+ let usage;
3433
+ if (autoCensorOption) {
3434
+ const { mode = "blank", alwaysCensor = [], neverCensor = [] } = autoCensorOption;
3435
+ const plainText = extractTextFromVTT(vttContent);
3436
+ if (!plainText.trim()) {
3437
+ throw new Error("Track transcript is empty; nothing to censor.");
3438
+ }
3439
+ const modelConfig = resolveLanguageModelConfig({
3440
+ ...options,
3441
+ provider,
3442
+ model
3443
+ });
3444
+ let detectedProfanity;
3445
+ try {
3446
+ const result = await identifyProfanityWithAI({
3447
+ plainText,
3448
+ provider: modelConfig.provider,
3449
+ modelId: modelConfig.modelId,
3450
+ credentials
3451
+ });
3452
+ detectedProfanity = result.profanity;
3453
+ usage = result.usage;
3454
+ } catch (error) {
3455
+ throw new Error(`Failed to detect profanity with ${modelConfig.provider}: ${error instanceof Error ? error.message : "Unknown error"}`);
3456
+ }
3457
+ const finalProfanity = applyOverrideLists(detectedProfanity, alwaysCensor, neverCensor);
3458
+ const { censoredVtt, replacements: censorReplacements } = censorVttContent(editedVtt, finalProfanity, mode);
3459
+ editedVtt = censoredVtt;
3460
+ totalReplacementCount += censorReplacements.length;
3461
+ autoCensorResult = { replacements: censorReplacements };
3462
+ }
3463
+ let replacementsResult;
3464
+ if (replacementsOption && replacementsOption.length > 0) {
3465
+ const { editedVtt: afterReplacements, replacements: staticReplacements } = applyReplacements(editedVtt, replacementsOption);
3466
+ editedVtt = afterReplacements;
3467
+ totalReplacementCount += staticReplacements.length;
3468
+ replacementsResult = { replacements: staticReplacements };
3469
+ }
3470
+ const usageWithMetadata = usage ? {
3471
+ ...usage,
3472
+ metadata: {
3473
+ assetDurationSeconds
3474
+ }
3475
+ } : void 0;
3476
+ if (!uploadToMux) {
3477
+ return {
3478
+ assetId,
3479
+ trackId,
3480
+ originalVtt: vttContent,
3481
+ editedVtt,
3482
+ totalReplacementCount,
3483
+ autoCensorProfanity: autoCensorResult,
3484
+ replacements: replacementsResult,
3485
+ usage: usageWithMetadata
3486
+ };
3487
+ }
3488
+ let presignedUrl;
3489
+ try {
3490
+ presignedUrl = await uploadEditedVttToS3({
3491
+ editedVtt,
3492
+ assetId,
3493
+ trackId,
3494
+ s3Endpoint,
3495
+ s3Region,
3496
+ s3Bucket,
3497
+ storageAdapter,
3498
+ s3SignedUrlExpirySeconds: options.s3SignedUrlExpirySeconds
3499
+ });
3500
+ } catch (error) {
3501
+ throw new Error(`Failed to upload VTT to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
3502
+ }
3503
+ let uploadedTrackId;
3504
+ try {
3505
+ const languageCode = sourceTrack.language_code || "en";
3506
+ const suffix = trackNameSuffix ?? "edited";
3507
+ const trackName = `${sourceTrack.name || "Subtitles"} (${suffix})`;
3508
+ uploadedTrackId = await createTextTrackOnMux(
3509
+ assetId,
3510
+ languageCode,
3511
+ trackName,
3512
+ presignedUrl,
3513
+ credentials
3514
+ );
3515
+ } catch (error) {
3516
+ console.warn(`Failed to add track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
3517
+ }
3518
+ if (deleteOriginal && uploadedTrackId) {
3519
+ try {
3520
+ await deleteTrackOnMux(assetId, trackId, credentials);
3521
+ } catch (error) {
3522
+ console.warn(`Failed to delete original track: ${error instanceof Error ? error.message : "Unknown error"}`);
3523
+ }
3524
+ }
3525
+ return {
3526
+ assetId,
3527
+ trackId,
3528
+ originalVtt: vttContent,
3529
+ editedVtt,
3530
+ totalReplacementCount,
3531
+ autoCensorProfanity: autoCensorResult,
3532
+ replacements: replacementsResult,
3533
+ uploadedTrackId,
3534
+ presignedUrl,
3535
+ usage: usageWithMetadata
3536
+ };
3537
+ }
3538
+
2967
3539
  // src/workflows/embeddings.ts
2968
3540
  import { embed } from "ai";
2969
3541
  function averageEmbeddings(embeddings) {
@@ -3611,16 +4183,18 @@ async function getModerationScores(assetId, options = {}) {
3611
4183
  }
3612
4184
 
3613
4185
  // src/workflows/summarization.ts
3614
- import { generateText as generateText4, Output as Output4 } from "ai";
3615
- import dedent4 from "dedent";
3616
- import { z as z5 } from "zod";
3617
- var SUMMARY_KEYWORD_LIMIT = 10;
3618
- var summarySchema = z5.object({
3619
- keywords: z5.array(z5.string()),
3620
- title: z5.string(),
3621
- description: z5.string()
4186
+ import { generateText as generateText5, Output as Output5 } from "ai";
4187
+ import dedent5 from "dedent";
4188
+ import { z as z6 } from "zod";
4189
+ var DEFAULT_SUMMARY_KEYWORD_LIMIT = 10;
4190
+ var DEFAULT_TITLE_LENGTH = 10;
4191
+ var DEFAULT_DESCRIPTION_LENGTH = 50;
4192
+ var summarySchema = z6.object({
4193
+ keywords: z6.array(z6.string()),
4194
+ title: z6.string(),
4195
+ description: z6.string()
3622
4196
  }).strict();
3623
- var SUMMARY_OUTPUT = Output4.object({
4197
+ var SUMMARY_OUTPUT = Output5.object({
3624
4198
  name: "summary_metadata",
3625
4199
  description: "Structured summary with title, description, and keywords.",
3626
4200
  schema: summarySchema
@@ -3631,10 +4205,49 @@ var TONE_INSTRUCTIONS = {
3631
4205
  playful: "Channel your inner diva! Answer with maximum sass, wit, and playful attitude. Don't hold back - be cheeky, clever, and delightfully snarky. Make it pop!",
3632
4206
  professional: "Provide a professional, executive-level analysis suitable for business reporting."
3633
4207
  };
4208
+ var DESCRIPTION_LENGTH_THRESHOLD_SMALL = 25;
4209
+ var DESCRIPTION_LENGTH_THRESHOLD_LARGE = 100;
4210
+ function buildDescriptionGuidance(wordCount, contentType) {
4211
+ if (wordCount < DESCRIPTION_LENGTH_THRESHOLD_SMALL) {
4212
+ if (contentType === "video") {
4213
+ return dedent5`A brief summary of the video in approximately ${wordCount} words.
4214
+ Focus on the single most important subject or action.
4215
+ Write in present tense.`;
4216
+ }
4217
+ return dedent5`A brief summary of the audio content in approximately ${wordCount} words.
4218
+ Focus on the single most important topic or theme.
4219
+ Write in present tense.`;
4220
+ }
4221
+ if (wordCount > DESCRIPTION_LENGTH_THRESHOLD_LARGE) {
4222
+ if (contentType === "video") {
4223
+ return dedent5`A detailed summary that describes what happens across the video.
4224
+ Aim for approximately ${wordCount} words, and you may use multiple sentences.
4225
+ Be thorough: cover subjects, actions, setting, progression, and any notable details visible across frames.
4226
+ Write in present tense. Be specific about observable details rather than making assumptions.
4227
+ If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`;
4228
+ }
4229
+ return dedent5`A detailed summary that describes the audio content.
4230
+ Aim for approximately ${wordCount} words, and you may use multiple sentences.
4231
+ Be thorough: cover topics, speakers, themes, progression, and any notable insights.
4232
+ Write in present tense. Be specific about what is discussed or presented rather than making assumptions.
4233
+ Focus on the spoken content and any key insights, dialogue, or narrative elements.`;
4234
+ }
4235
+ if (contentType === "video") {
4236
+ return dedent5`A summary that describes what happens across the video.
4237
+ Aim for approximately ${wordCount} words, and you may use multiple sentences.
4238
+ Cover the main subjects, actions, setting, and any notable progression visible across frames.
4239
+ Write in present tense. Be specific about observable details rather than making assumptions.
4240
+ If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`;
4241
+ }
4242
+ return dedent5`A summary that describes the audio content.
4243
+ Aim for approximately ${wordCount} words, and you may use multiple sentences.
4244
+ Cover the main topics, speakers, themes, and any notable progression in the discussion or narration.
4245
+ Write in present tense. Be specific about what is discussed or presented rather than making assumptions.
4246
+ Focus on the spoken content and any key insights, dialogue, or narrative elements.`;
4247
+ }
3634
4248
  function createSummarizationBuilder({ titleLength, descriptionLength, tagCount } = {}) {
3635
- const titleBrevity = titleLength != null ? `Aim for approximately ${titleLength} characters.` : "Aim for brevity - typically under 10 words.";
3636
- const descConstraint = descriptionLength != null ? `approximately ${descriptionLength} characters` : "2-4 sentences";
3637
- const keywordLimit = tagCount ?? SUMMARY_KEYWORD_LIMIT;
4249
+ const titleBrevity = `Aim for approximately ${titleLength ?? DEFAULT_TITLE_LENGTH} words.`;
4250
+ const keywordLimit = tagCount ?? DEFAULT_SUMMARY_KEYWORD_LIMIT;
3638
4251
  return createPromptBuilder({
3639
4252
  template: {
3640
4253
  task: {
@@ -3643,7 +4256,7 @@ function createSummarizationBuilder({ titleLength, descriptionLength, tagCount }
3643
4256
  },
3644
4257
  title: {
3645
4258
  tag: "title_requirements",
3646
- content: dedent4`
4259
+ content: dedent5`
3647
4260
  A short, compelling headline that immediately communicates the subject or action.
3648
4261
  ${titleBrevity} Think of how a news headline or video card title would read.
3649
4262
  Start with the primary subject, action, or topic - never begin with "A video of" or similar phrasing.
@@ -3651,15 +4264,11 @@ function createSummarizationBuilder({ titleLength, descriptionLength, tagCount }
3651
4264
  },
3652
4265
  description: {
3653
4266
  tag: "description_requirements",
3654
- content: dedent4`
3655
- A concise summary (${descConstraint}) that describes what happens across the video.
3656
- Cover the main subjects, actions, setting, and any notable progression visible across frames.
3657
- Write in present tense. Be specific about observable details rather than making assumptions.
3658
- If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`
4267
+ content: buildDescriptionGuidance(descriptionLength ?? DEFAULT_DESCRIPTION_LENGTH, "video")
3659
4268
  },
3660
4269
  keywords: {
3661
4270
  tag: "keywords_requirements",
3662
- content: dedent4`
4271
+ content: dedent5`
3663
4272
  Specific, searchable terms (up to ${keywordLimit}) that capture:
3664
4273
  - Primary subjects (people, animals, objects)
3665
4274
  - Actions and activities being performed
@@ -3671,7 +4280,7 @@ function createSummarizationBuilder({ titleLength, descriptionLength, tagCount }
3671
4280
  },
3672
4281
  qualityGuidelines: {
3673
4282
  tag: "quality_guidelines",
3674
- content: dedent4`
4283
+ content: dedent5`
3675
4284
  - Examine all frames to understand the full context and progression
3676
4285
  - Be precise: "golden retriever" is better than "dog" when identifiable
3677
4286
  - Capture the narrative: what begins, develops, and concludes
@@ -3682,9 +4291,8 @@ function createSummarizationBuilder({ titleLength, descriptionLength, tagCount }
3682
4291
  });
3683
4292
  }
3684
4293
  function createAudioOnlyBuilder({ titleLength, descriptionLength, tagCount } = {}) {
3685
- const titleBrevity = titleLength != null ? `Aim for approximately ${titleLength} characters.` : "Aim for brevity - typically under 10 words.";
3686
- const descConstraint = descriptionLength != null ? `approximately ${descriptionLength} characters` : "2-4 sentences";
3687
- const keywordLimit = tagCount ?? SUMMARY_KEYWORD_LIMIT;
4294
+ const titleBrevity = `Aim for approximately ${titleLength ?? DEFAULT_TITLE_LENGTH} words.`;
4295
+ const keywordLimit = tagCount ?? DEFAULT_SUMMARY_KEYWORD_LIMIT;
3688
4296
  return createPromptBuilder({
3689
4297
  template: {
3690
4298
  task: {
@@ -3693,7 +4301,7 @@ function createAudioOnlyBuilder({ titleLength, descriptionLength, tagCount } = {
3693
4301
  },
3694
4302
  title: {
3695
4303
  tag: "title_requirements",
3696
- content: dedent4`
4304
+ content: dedent5`
3697
4305
  A short, compelling headline that immediately communicates the subject or topic.
3698
4306
  ${titleBrevity} Think of how a podcast title or audio description would read.
3699
4307
  Start with the primary subject, action, or topic - never begin with "An audio of" or similar phrasing.
@@ -3701,15 +4309,11 @@ function createAudioOnlyBuilder({ titleLength, descriptionLength, tagCount } = {
3701
4309
  },
3702
4310
  description: {
3703
4311
  tag: "description_requirements",
3704
- content: dedent4`
3705
- A concise summary (${descConstraint}) that describes the audio content.
3706
- Cover the main topics, speakers, themes, and any notable progression in the discussion or narration.
3707
- Write in present tense. Be specific about what is discussed or presented rather than making assumptions.
3708
- Focus on the spoken content and any key insights, dialogue, or narrative elements.`
4312
+ content: buildDescriptionGuidance(descriptionLength ?? DEFAULT_DESCRIPTION_LENGTH, "audio")
3709
4313
  },
3710
4314
  keywords: {
3711
4315
  tag: "keywords_requirements",
3712
- content: dedent4`
4316
+ content: dedent5`
3713
4317
  Specific, searchable terms (up to ${keywordLimit}) that capture:
3714
4318
  - Primary topics and themes
3715
4319
  - Speakers or presenters (if named)
@@ -3721,7 +4325,7 @@ function createAudioOnlyBuilder({ titleLength, descriptionLength, tagCount } = {
3721
4325
  },
3722
4326
  qualityGuidelines: {
3723
4327
  tag: "quality_guidelines",
3724
- content: dedent4`
4328
+ content: dedent5`
3725
4329
  - Analyze the full transcript to understand context and themes
3726
4330
  - Be precise: use specific terminology when mentioned
3727
4331
  - Capture the narrative: what is introduced, discussed, and concluded
@@ -3731,7 +4335,7 @@ function createAudioOnlyBuilder({ titleLength, descriptionLength, tagCount } = {
3731
4335
  sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
3732
4336
  });
3733
4337
  }
3734
- var SYSTEM_PROMPT3 = dedent4`
4338
+ var SYSTEM_PROMPT4 = dedent5`
3735
4339
  <role>
3736
4340
  You are a video content analyst specializing in storyboard interpretation and multimodal analysis.
3737
4341
  </role>
@@ -3788,7 +4392,7 @@ var SYSTEM_PROMPT3 = dedent4`
3788
4392
 
3789
4393
  Write as if describing reality, not describing a recording of reality.
3790
4394
  </language_guidelines>`;
3791
- var AUDIO_ONLY_SYSTEM_PROMPT = dedent4`
4395
+ var AUDIO_ONLY_SYSTEM_PROMPT = dedent5`
3792
4396
  <role>
3793
4397
  You are an audio content analyst specializing in transcript analysis and metadata generation.
3794
4398
  </role>
@@ -3856,6 +4460,11 @@ function buildUserPrompt4({
3856
4460
  const contextSections = [createToneSection(TONE_INSTRUCTIONS[tone])];
3857
4461
  if (languageName) {
3858
4462
  contextSections.push(createLanguageSection(languageName));
4463
+ } else {
4464
+ contextSections.push({
4465
+ tag: "language",
4466
+ content: "Respond in English. Never switch languages to satisfy length constraints."
4467
+ });
3859
4468
  }
3860
4469
  if (transcriptText) {
3861
4470
  const format = isCleanTranscript ? "plain text" : "WebVTT";
@@ -3868,7 +4477,7 @@ function buildUserPrompt4({
3868
4477
  async function analyzeStoryboard2(imageDataUrl, provider, modelId, userPrompt, systemPrompt, credentials) {
3869
4478
  "use step";
3870
4479
  const model = await createLanguageModelFromConfig(provider, modelId, credentials);
3871
- const response = await generateText4({
4480
+ const response = await generateText5({
3872
4481
  model,
3873
4482
  output: SUMMARY_OUTPUT,
3874
4483
  messages: [
@@ -3903,7 +4512,7 @@ async function analyzeStoryboard2(imageDataUrl, provider, modelId, userPrompt, s
3903
4512
  async function analyzeAudioOnly(provider, modelId, userPrompt, systemPrompt, credentials) {
3904
4513
  "use step";
3905
4514
  const model = await createLanguageModelFromConfig(provider, modelId, credentials);
3906
- const response = await generateText4({
4515
+ const response = await generateText5({
3907
4516
  model,
3908
4517
  output: SUMMARY_OUTPUT,
3909
4518
  messages: [
@@ -3932,7 +4541,7 @@ async function analyzeAudioOnly(provider, modelId, userPrompt, systemPrompt, cre
3932
4541
  }
3933
4542
  };
3934
4543
  }
3935
- function normalizeKeywords(keywords, limit = SUMMARY_KEYWORD_LIMIT) {
4544
+ function normalizeKeywords(keywords, limit = DEFAULT_SUMMARY_KEYWORD_LIMIT) {
3936
4545
  if (!Array.isArray(keywords) || keywords.length === 0) {
3937
4546
  return [];
3938
4547
  }
@@ -4019,7 +4628,7 @@ async function getSummaryAndTags(assetId, options) {
4019
4628
  });
4020
4629
  let analysisResponse;
4021
4630
  let imageUrl;
4022
- const systemPrompt = isAudioOnly ? AUDIO_ONLY_SYSTEM_PROMPT : SYSTEM_PROMPT3;
4631
+ const systemPrompt = isAudioOnly ? AUDIO_ONLY_SYSTEM_PROMPT : SYSTEM_PROMPT4;
4023
4632
  try {
4024
4633
  if (isAudioOnly) {
4025
4634
  analysisResponse = await analyzeAudioOnly(
@@ -4072,7 +4681,7 @@ async function getSummaryAndTags(assetId, options) {
4072
4681
  assetId,
4073
4682
  title: analysisResponse.result.title,
4074
4683
  description: analysisResponse.result.description,
4075
- tags: normalizeKeywords(analysisResponse.result.keywords, tagCount ?? SUMMARY_KEYWORD_LIMIT),
4684
+ tags: normalizeKeywords(analysisResponse.result.keywords, tagCount ?? DEFAULT_SUMMARY_KEYWORD_LIMIT),
4076
4685
  storyboardUrl: imageUrl,
4077
4686
  // undefined for audio-only assets
4078
4687
  usage: {
@@ -4085,52 +4694,10 @@ async function getSummaryAndTags(assetId, options) {
4085
4694
  };
4086
4695
  }
4087
4696
 
4088
- // src/lib/storage-adapter.ts
4089
- function requireCredentials(accessKeyId, secretAccessKey) {
4090
- if (!accessKeyId || !secretAccessKey) {
4091
- throw new Error(
4092
- "S3 credentials are required for default storage operations. Provide S3_ACCESS_KEY_ID and S3_SECRET_ACCESS_KEY or pass options.storageAdapter."
4093
- );
4094
- }
4095
- return { accessKeyId, secretAccessKey };
4096
- }
4097
- async function putObjectWithStorageAdapter(input, adapter) {
4098
- if (adapter) {
4099
- await adapter.putObject(input);
4100
- return;
4101
- }
4102
- const credentials = requireCredentials(input.accessKeyId, input.secretAccessKey);
4103
- await putObjectToS3({
4104
- accessKeyId: credentials.accessKeyId,
4105
- secretAccessKey: credentials.secretAccessKey,
4106
- endpoint: input.endpoint,
4107
- region: input.region,
4108
- bucket: input.bucket,
4109
- key: input.key,
4110
- body: input.body,
4111
- contentType: input.contentType
4112
- });
4113
- }
4114
- async function createPresignedGetUrlWithStorageAdapter(input, adapter) {
4115
- if (adapter) {
4116
- return adapter.createPresignedGetUrl(input);
4117
- }
4118
- const credentials = requireCredentials(input.accessKeyId, input.secretAccessKey);
4119
- return createPresignedGetUrl({
4120
- accessKeyId: credentials.accessKeyId,
4121
- secretAccessKey: credentials.secretAccessKey,
4122
- endpoint: input.endpoint,
4123
- region: input.region,
4124
- bucket: input.bucket,
4125
- key: input.key,
4126
- expiresInSeconds: input.expiresInSeconds
4127
- });
4128
- }
4129
-
4130
4697
  // src/workflows/translate-audio.ts
4131
4698
  var STATIC_RENDITION_POLL_INTERVAL_MS = 5e3;
4132
4699
  var STATIC_RENDITION_MAX_ATTEMPTS = 36;
4133
- async function sleep(ms) {
4700
+ async function sleep2(ms) {
4134
4701
  "use step";
4135
4702
  await new Promise((resolve) => setTimeout(resolve, ms));
4136
4703
  }
@@ -4196,7 +4763,7 @@ async function waitForAudioStaticRendition({
4196
4763
  console.warn(`\u2139\uFE0F Static rendition already ${status}. Waiting for it to finish...`);
4197
4764
  }
4198
4765
  for (let attempt = 1; attempt <= STATIC_RENDITION_MAX_ATTEMPTS; attempt++) {
4199
- await sleep(STATIC_RENDITION_POLL_INTERVAL_MS);
4766
+ await sleep2(STATIC_RENDITION_POLL_INTERVAL_MS);
4200
4767
  currentAsset = await mux.video.assets.retrieve(assetId);
4201
4768
  if (hasReadyAudioStaticRendition(currentAsset)) {
4202
4769
  return currentAsset;
@@ -4303,7 +4870,8 @@ async function uploadDubbedAudioToS3({
4303
4870
  s3Endpoint,
4304
4871
  s3Region,
4305
4872
  s3Bucket,
4306
- storageAdapter
4873
+ storageAdapter,
4874
+ s3SignedUrlExpirySeconds
4307
4875
  }) {
4308
4876
  "use step";
4309
4877
  const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
@@ -4326,10 +4894,11 @@ async function uploadDubbedAudioToS3({
4326
4894
  region: s3Region,
4327
4895
  bucket: s3Bucket,
4328
4896
  key: audioKey,
4329
- expiresInSeconds: 3600
4897
+ expiresInSeconds: s3SignedUrlExpirySeconds ?? 86400
4330
4898
  }, storageAdapter);
4899
+ const expiryHours = Math.round((s3SignedUrlExpirySeconds ?? 86400) / 3600);
4331
4900
  console.warn(`\u2705 Audio uploaded successfully to: ${audioKey}`);
4332
- console.warn(`\u{1F517} Generated presigned URL (expires in 1 hour)`);
4901
+ console.warn(`\u{1F517} Generated presigned URL (expires in ${expiryHours} hour${expiryHours === 1 ? "" : "s"})`);
4333
4902
  return presignedUrl;
4334
4903
  }
4335
4904
  async function createAudioTrackOnMux(assetId, languageCode, presignedUrl, credentials) {
@@ -4428,7 +4997,7 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
4428
4997
  const maxPollAttempts = 180;
4429
4998
  let targetLanguages = [];
4430
4999
  while (dubbingStatus === "dubbing" && pollAttempts < maxPollAttempts) {
4431
- await sleep(1e4);
5000
+ await sleep2(1e4);
4432
5001
  pollAttempts++;
4433
5002
  try {
4434
5003
  const statusResult = await checkElevenLabsDubbingStatus({
@@ -4493,7 +5062,8 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
4493
5062
  s3Endpoint,
4494
5063
  s3Region,
4495
5064
  s3Bucket,
4496
- storageAdapter: effectiveStorageAdapter
5065
+ storageAdapter: effectiveStorageAdapter,
5066
+ s3SignedUrlExpirySeconds: options.s3SignedUrlExpirySeconds
4497
5067
  });
4498
5068
  } catch (error) {
4499
5069
  throw new Error(`Failed to upload audio to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
@@ -4531,24 +5101,24 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
4531
5101
  // src/workflows/translate-captions.ts
4532
5102
  import {
4533
5103
  APICallError,
4534
- generateText as generateText5,
5104
+ generateText as generateText6,
4535
5105
  NoObjectGeneratedError,
4536
- Output as Output5,
5106
+ Output as Output6,
4537
5107
  RetryError,
4538
5108
  TypeValidationError
4539
5109
  } from "ai";
4540
- import dedent5 from "dedent";
4541
- import { z as z6 } from "zod";
4542
- var translationSchema = z6.object({
4543
- translation: z6.string()
5110
+ import dedent6 from "dedent";
5111
+ import { z as z7 } from "zod";
5112
+ var translationSchema = z7.object({
5113
+ translation: z7.string()
4544
5114
  });
4545
- var SYSTEM_PROMPT4 = dedent5`
5115
+ var SYSTEM_PROMPT5 = dedent6`
4546
5116
  You are a subtitle translation expert. Translate VTT subtitle files to the target language specified by the user.
4547
5117
  You may receive either a full VTT file or a chunk from a larger VTT.
4548
5118
  Preserve all timestamps, cue ordering, and VTT formatting exactly as they appear.
4549
5119
  Return JSON with a single key "translation" containing the translated VTT content.
4550
5120
  `;
4551
- var CUE_TRANSLATION_SYSTEM_PROMPT = dedent5`
5121
+ var CUE_TRANSLATION_SYSTEM_PROMPT = dedent6`
4552
5122
  You are a subtitle translation expert.
4553
5123
  You will receive a sequence of subtitle cues extracted from a VTT file.
4554
5124
  Translate the cues to the requested target language while preserving their original order.
@@ -4710,14 +5280,6 @@ function buildTranslationChunkRequests(vttContent, assetDurationSeconds, chunkin
4710
5280
  )
4711
5281
  };
4712
5282
  }
4713
- async function fetchVttFromMux(vttUrl) {
4714
- "use step";
4715
- const vttResponse = await fetch(vttUrl);
4716
- if (!vttResponse.ok) {
4717
- throw new Error(`Failed to fetch VTT file: ${vttResponse.statusText}`);
4718
- }
4719
- return vttResponse.text();
4720
- }
4721
5283
  async function translateVttWithAI({
4722
5284
  vttContent,
4723
5285
  fromLanguageCode,
@@ -4728,13 +5290,13 @@ async function translateVttWithAI({
4728
5290
  }) {
4729
5291
  "use step";
4730
5292
  const model = await createLanguageModelFromConfig(provider, modelId, credentials);
4731
- const response = await generateText5({
5293
+ const response = await generateText6({
4732
5294
  model,
4733
- output: Output5.object({ schema: translationSchema }),
5295
+ output: Output6.object({ schema: translationSchema }),
4734
5296
  messages: [
4735
5297
  {
4736
5298
  role: "system",
4737
- content: SYSTEM_PROMPT4
5299
+ content: SYSTEM_PROMPT5
4738
5300
  },
4739
5301
  {
4740
5302
  role: "user",
@@ -4765,8 +5327,8 @@ async function translateCueChunkWithAI({
4765
5327
  }) {
4766
5328
  "use step";
4767
5329
  const model = await createLanguageModelFromConfig(provider, modelId, credentials);
4768
- const schema = z6.object({
4769
- translations: z6.array(z6.string().min(1)).length(cues.length)
5330
+ const schema = z7.object({
5331
+ translations: z7.array(z7.string().min(1)).length(cues.length)
4770
5332
  });
4771
5333
  const cuePayload = cues.map((cue, index) => ({
4772
5334
  index,
@@ -4774,9 +5336,9 @@ async function translateCueChunkWithAI({
4774
5336
  endTime: cue.endTime,
4775
5337
  text: cue.text
4776
5338
  }));
4777
- const response = await generateText5({
5339
+ const response = await generateText6({
4778
5340
  model,
4779
- output: Output5.object({ schema }),
5341
+ output: Output6.object({ schema }),
4780
5342
  messages: [
4781
5343
  {
4782
5344
  role: "system",
@@ -4933,7 +5495,8 @@ async function uploadVttToS3({
4933
5495
  s3Endpoint,
4934
5496
  s3Region,
4935
5497
  s3Bucket,
4936
- storageAdapter
5498
+ storageAdapter,
5499
+ s3SignedUrlExpirySeconds
4937
5500
  }) {
4938
5501
  "use step";
4939
5502
  const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
@@ -4956,25 +5519,9 @@ async function uploadVttToS3({
4956
5519
  region: s3Region,
4957
5520
  bucket: s3Bucket,
4958
5521
  key: vttKey,
4959
- expiresInSeconds: 3600
5522
+ expiresInSeconds: s3SignedUrlExpirySeconds ?? 86400
4960
5523
  }, storageAdapter);
4961
5524
  }
4962
- async function createTextTrackOnMux(assetId, languageCode, trackName, presignedUrl, credentials) {
4963
- "use step";
4964
- const muxClient = await resolveMuxClient(credentials);
4965
- const mux = await muxClient.createClient();
4966
- const trackResponse = await mux.video.assets.createTrack(assetId, {
4967
- type: "text",
4968
- text_type: "subtitles",
4969
- language_code: languageCode,
4970
- name: trackName,
4971
- url: presignedUrl
4972
- });
4973
- if (!trackResponse.id) {
4974
- throw new Error("Failed to create text track: no track ID returned from Mux");
4975
- }
4976
- return trackResponse.id;
4977
- }
4978
5525
  async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, options) {
4979
5526
  "use workflow";
4980
5527
  const {
@@ -5092,7 +5639,8 @@ async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, opti
5092
5639
  s3Endpoint,
5093
5640
  s3Region,
5094
5641
  s3Bucket,
5095
- storageAdapter: effectiveStorageAdapter
5642
+ storageAdapter: effectiveStorageAdapter,
5643
+ s3SignedUrlExpirySeconds: options.s3SignedUrlExpirySeconds
5096
5644
  });
5097
5645
  } catch (error) {
5098
5646
  throw new Error(`Failed to upload VTT to S3: ${error instanceof Error ? error.message : "Unknown error"}`);