@mux/ai 0.10.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -1
- package/dist/{index-C8-E3VR9.d.ts → index-DLhfJsOd.d.ts} +73 -2
- package/dist/{index-CA7bG50u.d.ts → index-DyzifniY.d.ts} +167 -21
- package/dist/index.d.ts +3 -3
- package/dist/index.js +733 -153
- package/dist/index.js.map +1 -1
- package/dist/primitives/index.d.ts +1 -1
- package/dist/primitives/index.js +140 -1
- package/dist/primitives/index.js.map +1 -1
- package/dist/workflows/index.d.ts +1 -1
- package/dist/workflows/index.js +1980 -1539
- package/dist/workflows/index.js.map +1 -1
- package/package.json +2 -1
package/dist/index.js
CHANGED
|
@@ -5,7 +5,7 @@ var __export = (target, all) => {
|
|
|
5
5
|
};
|
|
6
6
|
|
|
7
7
|
// package.json
|
|
8
|
-
var version = "0.
|
|
8
|
+
var version = "0.12.0";
|
|
9
9
|
|
|
10
10
|
// src/env.ts
|
|
11
11
|
import { z } from "zod";
|
|
@@ -803,13 +803,16 @@ __export(primitives_exports, {
|
|
|
803
803
|
getHotspotsForPlaybackId: () => getHotspotsForPlaybackId,
|
|
804
804
|
getHotspotsForVideo: () => getHotspotsForVideo,
|
|
805
805
|
getReadyTextTracks: () => getReadyTextTracks,
|
|
806
|
+
getShotsForAsset: () => getShotsForAsset,
|
|
806
807
|
getStoryboardUrl: () => getStoryboardUrl,
|
|
807
808
|
getThumbnailUrls: () => getThumbnailUrls,
|
|
808
809
|
parseVTTCues: () => parseVTTCues,
|
|
809
810
|
replaceCueText: () => replaceCueText,
|
|
811
|
+
requestShotsForAsset: () => requestShotsForAsset,
|
|
810
812
|
secondsToTimestamp: () => secondsToTimestamp,
|
|
811
813
|
splitVttPreambleAndCueBlocks: () => splitVttPreambleAndCueBlocks,
|
|
812
|
-
vttTimestampToSeconds: () => vttTimestampToSeconds
|
|
814
|
+
vttTimestampToSeconds: () => vttTimestampToSeconds,
|
|
815
|
+
waitForShotsForAsset: () => waitForShotsForAsset
|
|
813
816
|
});
|
|
814
817
|
|
|
815
818
|
// src/lib/providers.ts
|
|
@@ -1082,6 +1085,142 @@ async function fetchHotspots(identifierType, id, options) {
|
|
|
1082
1085
|
return transformHotspotResponse(response);
|
|
1083
1086
|
}
|
|
1084
1087
|
|
|
1088
|
+
// src/primitives/shots.ts
|
|
1089
|
+
var DEFAULT_POLL_INTERVAL_MS = 2e3;
|
|
1090
|
+
var MIN_POLL_INTERVAL_MS = 1e3;
|
|
1091
|
+
var DEFAULT_MAX_ATTEMPTS = 60;
|
|
1092
|
+
var SHOTS_ALREADY_REQUESTED_MESSAGE = "shots generation has already been requested";
|
|
1093
|
+
function getShotsPath(assetId) {
|
|
1094
|
+
return `/video/v1/assets/${assetId}/shots`;
|
|
1095
|
+
}
|
|
1096
|
+
function mapManifestShots(shots) {
|
|
1097
|
+
return shots.map((shot, index) => {
|
|
1098
|
+
const { startTime, imageUrl } = shot;
|
|
1099
|
+
if (typeof startTime !== "number" || !Number.isFinite(startTime)) {
|
|
1100
|
+
throw new TypeError(`Invalid shot startTime in shots manifest at index ${index}`);
|
|
1101
|
+
}
|
|
1102
|
+
if (typeof imageUrl !== "string" || imageUrl.length === 0) {
|
|
1103
|
+
throw new TypeError(`Invalid shot imageUrl in shots manifest at index ${index}`);
|
|
1104
|
+
}
|
|
1105
|
+
return {
|
|
1106
|
+
startTime,
|
|
1107
|
+
imageUrl
|
|
1108
|
+
};
|
|
1109
|
+
});
|
|
1110
|
+
}
|
|
1111
|
+
async function fetchShotsFromManifest(shotsManifestUrl) {
|
|
1112
|
+
const response = await fetch(shotsManifestUrl);
|
|
1113
|
+
if (!response.ok) {
|
|
1114
|
+
throw new Error(
|
|
1115
|
+
`Failed to fetch shots manifest: ${response.status} ${response.statusText}`
|
|
1116
|
+
);
|
|
1117
|
+
}
|
|
1118
|
+
const manifest = await response.json();
|
|
1119
|
+
if (!Array.isArray(manifest.shots)) {
|
|
1120
|
+
throw new TypeError("Invalid shots manifest response: missing shots array");
|
|
1121
|
+
}
|
|
1122
|
+
return mapManifestShots(manifest.shots);
|
|
1123
|
+
}
|
|
1124
|
+
async function transformShotsResponse(response) {
|
|
1125
|
+
switch (response.data.status) {
|
|
1126
|
+
case "pending":
|
|
1127
|
+
return {
|
|
1128
|
+
status: "pending",
|
|
1129
|
+
createdAt: response.data.created_at
|
|
1130
|
+
};
|
|
1131
|
+
case "errored":
|
|
1132
|
+
return {
|
|
1133
|
+
status: "errored",
|
|
1134
|
+
createdAt: response.data.created_at,
|
|
1135
|
+
error: response.data.error
|
|
1136
|
+
};
|
|
1137
|
+
case "completed":
|
|
1138
|
+
return {
|
|
1139
|
+
status: "completed",
|
|
1140
|
+
createdAt: response.data.created_at,
|
|
1141
|
+
shots: await fetchShotsFromManifest(response.data.shots_manifest_url)
|
|
1142
|
+
};
|
|
1143
|
+
default: {
|
|
1144
|
+
const exhaustiveCheck = response.data;
|
|
1145
|
+
throw new Error(`Unsupported shots response: ${JSON.stringify(exhaustiveCheck)}`);
|
|
1146
|
+
}
|
|
1147
|
+
}
|
|
1148
|
+
}
|
|
1149
|
+
function sleep(ms) {
|
|
1150
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
1151
|
+
}
|
|
1152
|
+
function isShotsAlreadyRequestedError(error) {
|
|
1153
|
+
const statusCode = error?.status ?? error?.statusCode;
|
|
1154
|
+
const messages = error?.error?.messages;
|
|
1155
|
+
const lowerCaseMessages = messages?.map((message) => message.toLowerCase()) ?? [];
|
|
1156
|
+
const errorMessage = error instanceof Error ? error.message.toLowerCase() : "";
|
|
1157
|
+
return statusCode === 400 && (lowerCaseMessages.some((message) => message.includes(SHOTS_ALREADY_REQUESTED_MESSAGE)) || errorMessage.includes(SHOTS_ALREADY_REQUESTED_MESSAGE));
|
|
1158
|
+
}
|
|
1159
|
+
async function requestShotsForAsset(assetId, options = {}) {
|
|
1160
|
+
"use step";
|
|
1161
|
+
const { credentials } = options;
|
|
1162
|
+
const muxClient = await getMuxClientFromEnv(credentials);
|
|
1163
|
+
const mux = await muxClient.createClient();
|
|
1164
|
+
const response = await mux.post(
|
|
1165
|
+
getShotsPath(assetId),
|
|
1166
|
+
{ body: {} }
|
|
1167
|
+
);
|
|
1168
|
+
const result = await transformShotsResponse(response);
|
|
1169
|
+
if (result.status !== "pending") {
|
|
1170
|
+
throw new Error(
|
|
1171
|
+
`Expected pending status after requesting shots for asset '${assetId}', received '${result.status}'`
|
|
1172
|
+
);
|
|
1173
|
+
}
|
|
1174
|
+
return result;
|
|
1175
|
+
}
|
|
1176
|
+
async function getShotsForAsset(assetId, options = {}) {
|
|
1177
|
+
"use step";
|
|
1178
|
+
const { credentials } = options;
|
|
1179
|
+
const muxClient = await getMuxClientFromEnv(credentials);
|
|
1180
|
+
const mux = await muxClient.createClient();
|
|
1181
|
+
const response = await mux.get(
|
|
1182
|
+
getShotsPath(assetId)
|
|
1183
|
+
);
|
|
1184
|
+
return await transformShotsResponse(response);
|
|
1185
|
+
}
|
|
1186
|
+
async function waitForShotsForAsset(assetId, options = {}) {
|
|
1187
|
+
"use step";
|
|
1188
|
+
const {
|
|
1189
|
+
pollIntervalMs = DEFAULT_POLL_INTERVAL_MS,
|
|
1190
|
+
maxAttempts = DEFAULT_MAX_ATTEMPTS,
|
|
1191
|
+
createIfMissing = true,
|
|
1192
|
+
credentials
|
|
1193
|
+
} = options;
|
|
1194
|
+
if (createIfMissing) {
|
|
1195
|
+
try {
|
|
1196
|
+
await requestShotsForAsset(assetId, { credentials });
|
|
1197
|
+
} catch (error) {
|
|
1198
|
+
if (!isShotsAlreadyRequestedError(error)) {
|
|
1199
|
+
throw error;
|
|
1200
|
+
}
|
|
1201
|
+
}
|
|
1202
|
+
}
|
|
1203
|
+
const normalizedMaxAttempts = Math.max(1, maxAttempts);
|
|
1204
|
+
const normalizedPollIntervalMs = Math.max(MIN_POLL_INTERVAL_MS, pollIntervalMs);
|
|
1205
|
+
let lastStatus;
|
|
1206
|
+
for (let attempt = 0; attempt < normalizedMaxAttempts; attempt++) {
|
|
1207
|
+
const result = await getShotsForAsset(assetId, { credentials });
|
|
1208
|
+
lastStatus = result.status;
|
|
1209
|
+
if (result.status === "completed") {
|
|
1210
|
+
return result;
|
|
1211
|
+
}
|
|
1212
|
+
if (result.status === "errored") {
|
|
1213
|
+
throw new Error(`Shots generation errored for asset '${assetId}'`);
|
|
1214
|
+
}
|
|
1215
|
+
if (attempt < normalizedMaxAttempts - 1) {
|
|
1216
|
+
await sleep(normalizedPollIntervalMs);
|
|
1217
|
+
}
|
|
1218
|
+
}
|
|
1219
|
+
throw new Error(
|
|
1220
|
+
`Timed out waiting for shots for asset '${assetId}' after ${normalizedMaxAttempts} attempts. Last status: ${lastStatus ?? "unknown"}`
|
|
1221
|
+
);
|
|
1222
|
+
}
|
|
1223
|
+
|
|
1085
1224
|
// src/lib/mux-image-url.ts
|
|
1086
1225
|
var DEFAULT_MUX_IMAGE_ORIGIN = "https://image.mux.com";
|
|
1087
1226
|
function normalizeMuxImageOrigin(value) {
|
|
@@ -1785,23 +1924,33 @@ async function fetchTranscriptForAsset(asset, playbackId, options = {}) {
|
|
|
1785
1924
|
// src/workflows/index.ts
|
|
1786
1925
|
var workflows_exports = {};
|
|
1787
1926
|
__export(workflows_exports, {
|
|
1927
|
+
DEFAULT_DESCRIPTION_LENGTH: () => DEFAULT_DESCRIPTION_LENGTH,
|
|
1928
|
+
DEFAULT_SUMMARY_KEYWORD_LIMIT: () => DEFAULT_SUMMARY_KEYWORD_LIMIT,
|
|
1929
|
+
DEFAULT_TITLE_LENGTH: () => DEFAULT_TITLE_LENGTH,
|
|
1788
1930
|
HIVE_SEXUAL_CATEGORIES: () => HIVE_SEXUAL_CATEGORIES,
|
|
1789
1931
|
HIVE_VIOLENCE_CATEGORIES: () => HIVE_VIOLENCE_CATEGORIES,
|
|
1790
|
-
SUMMARY_KEYWORD_LIMIT: () => SUMMARY_KEYWORD_LIMIT,
|
|
1791
1932
|
aggregateTokenUsage: () => aggregateTokenUsage,
|
|
1933
|
+
applyOverrideLists: () => applyOverrideLists,
|
|
1934
|
+
applyReplacements: () => applyReplacements,
|
|
1792
1935
|
askQuestions: () => askQuestions,
|
|
1936
|
+
buildReplacementRegex: () => buildReplacementRegex,
|
|
1793
1937
|
burnedInCaptionsSchema: () => burnedInCaptionsSchema,
|
|
1938
|
+
censorVttContent: () => censorVttContent,
|
|
1794
1939
|
chapterSchema: () => chapterSchema,
|
|
1795
1940
|
chaptersSchema: () => chaptersSchema,
|
|
1941
|
+
createReplacer: () => createReplacer,
|
|
1942
|
+
editCaptions: () => editCaptions,
|
|
1796
1943
|
generateChapters: () => generateChapters,
|
|
1797
1944
|
generateEmbeddings: () => generateEmbeddings,
|
|
1798
1945
|
generateVideoEmbeddings: () => generateVideoEmbeddings,
|
|
1799
1946
|
getModerationScores: () => getModerationScores,
|
|
1800
1947
|
getSummaryAndTags: () => getSummaryAndTags,
|
|
1801
1948
|
hasBurnedInCaptions: () => hasBurnedInCaptions,
|
|
1949
|
+
profanityDetectionSchema: () => profanityDetectionSchema,
|
|
1802
1950
|
questionAnswerSchema: () => questionAnswerSchema,
|
|
1803
1951
|
shouldSplitChunkTranslationError: () => shouldSplitChunkTranslationError,
|
|
1804
1952
|
summarySchema: () => summarySchema,
|
|
1953
|
+
transformCueText: () => transformCueText,
|
|
1805
1954
|
translateAudio: () => translateAudio,
|
|
1806
1955
|
translateCaptions: () => translateCaptions,
|
|
1807
1956
|
translationSchema: () => translationSchema
|
|
@@ -2092,12 +2241,14 @@ async function withRetry(fn, {
|
|
|
2092
2241
|
// src/workflows/ask-questions.ts
|
|
2093
2242
|
var questionAnswerSchema = z2.object({
|
|
2094
2243
|
question: z2.string(),
|
|
2095
|
-
answer: z2.string(),
|
|
2244
|
+
answer: z2.string().optional(),
|
|
2096
2245
|
confidence: z2.number(),
|
|
2097
|
-
reasoning: z2.string()
|
|
2246
|
+
reasoning: z2.string(),
|
|
2247
|
+
skipped: z2.boolean()
|
|
2098
2248
|
});
|
|
2249
|
+
var SKIP_SENTINEL = "__SKIPPED__";
|
|
2099
2250
|
function createAskQuestionsSchema(allowedAnswers) {
|
|
2100
|
-
const answerSchema = z2.enum(allowedAnswers);
|
|
2251
|
+
const answerSchema = z2.enum([...allowedAnswers, SKIP_SENTINEL]);
|
|
2101
2252
|
return z2.object({
|
|
2102
2253
|
answers: z2.array(
|
|
2103
2254
|
questionAnswerSchema.extend({
|
|
@@ -2153,8 +2304,32 @@ var SYSTEM_PROMPT = dedent`
|
|
|
2153
2304
|
- Be precise: cite specific frames, objects, actions, or transcript quotes
|
|
2154
2305
|
</answer_guidelines>
|
|
2155
2306
|
|
|
2307
|
+
<relevance_filtering>
|
|
2308
|
+
Before answering each question, assess whether it can be meaningfully
|
|
2309
|
+
answered based on the video storyboard and/or transcript. A question is
|
|
2310
|
+
relevant if it asks about something observable or inferable from the
|
|
2311
|
+
video content (visuals, audio, dialogue, setting, subjects, actions, etc.).
|
|
2312
|
+
|
|
2313
|
+
Mark a question as skipped (skipped: true) if it:
|
|
2314
|
+
- Is completely unrelated to video content (e.g., math, trivia, personal questions)
|
|
2315
|
+
- Asks about information that cannot be determined from storyboard frames or transcript
|
|
2316
|
+
- Is a general knowledge question with no connection to what is shown or said in the video
|
|
2317
|
+
- Attempts to use the system for non-video-analysis purposes
|
|
2318
|
+
|
|
2319
|
+
For skipped questions:
|
|
2320
|
+
- Set skipped to true
|
|
2321
|
+
- Set answer to "${SKIP_SENTINEL}"
|
|
2322
|
+
- Set confidence to 0
|
|
2323
|
+
- Use the reasoning field to explain why the question is not answerable
|
|
2324
|
+
from the video content
|
|
2325
|
+
|
|
2326
|
+
For borderline questions that are loosely related to the video content,
|
|
2327
|
+
still answer them but use a lower confidence score to reflect uncertainty.
|
|
2328
|
+
</relevance_filtering>
|
|
2329
|
+
|
|
2156
2330
|
<constraints>
|
|
2157
|
-
- You MUST answer every question with one of the allowed response options
|
|
2331
|
+
- You MUST answer every relevant question with one of the allowed response options
|
|
2332
|
+
- Skip irrelevant questions as described in relevance_filtering
|
|
2158
2333
|
- Only describe observable evidence from frames or transcript
|
|
2159
2334
|
- Do not fabricate details or make unsupported assumptions
|
|
2160
2335
|
- Return structured data matching the requested schema exactly
|
|
@@ -2230,14 +2405,7 @@ async function analyzeQuestionsWithStoryboard(imageDataUrl, provider, modelId, u
|
|
|
2230
2405
|
]
|
|
2231
2406
|
});
|
|
2232
2407
|
return {
|
|
2233
|
-
result:
|
|
2234
|
-
answers: response.output.answers.map((answer) => ({
|
|
2235
|
-
...answer,
|
|
2236
|
-
// Strip numbering prefix (e.g., "1. " or "2. ") from questions
|
|
2237
|
-
question: answer.question.replace(/^\d+\.\s*/, ""),
|
|
2238
|
-
confidence: Math.min(1, Math.max(0, answer.confidence))
|
|
2239
|
-
}))
|
|
2240
|
-
},
|
|
2408
|
+
result: response.output,
|
|
2241
2409
|
usage: {
|
|
2242
2410
|
inputTokens: response.usage.inputTokens,
|
|
2243
2411
|
outputTokens: response.usage.outputTokens,
|
|
@@ -2343,9 +2511,20 @@ async function askQuestions(assetId, questions, options) {
|
|
|
2343
2511
|
`Expected ${questions.length} answers but received ${analysisResponse.result.answers.length}`
|
|
2344
2512
|
);
|
|
2345
2513
|
}
|
|
2514
|
+
const answers = analysisResponse.result.answers.map((raw) => {
|
|
2515
|
+
const isSkipped = raw.skipped || raw.answer === SKIP_SENTINEL;
|
|
2516
|
+
return {
|
|
2517
|
+
// Strip numbering prefix (e.g., "1. " or "2. ") from questions
|
|
2518
|
+
question: raw.question.replace(/^\d+\.\s*/, ""),
|
|
2519
|
+
confidence: isSkipped ? 0 : Math.min(1, Math.max(0, raw.confidence)),
|
|
2520
|
+
reasoning: raw.reasoning,
|
|
2521
|
+
skipped: isSkipped,
|
|
2522
|
+
...isSkipped ? {} : { answer: raw.answer }
|
|
2523
|
+
};
|
|
2524
|
+
});
|
|
2346
2525
|
return {
|
|
2347
2526
|
assetId,
|
|
2348
|
-
answers
|
|
2527
|
+
answers,
|
|
2349
2528
|
storyboardUrl: imageUrl,
|
|
2350
2529
|
usage: {
|
|
2351
2530
|
...analysisResponse.usage,
|
|
@@ -2964,6 +3143,429 @@ async function generateChapters(assetId, languageCode, options = {}) {
|
|
|
2964
3143
|
};
|
|
2965
3144
|
}
|
|
2966
3145
|
|
|
3146
|
+
// src/workflows/edit-captions.ts
|
|
3147
|
+
import { generateText as generateText4, Output as Output4 } from "ai";
|
|
3148
|
+
import dedent4 from "dedent";
|
|
3149
|
+
import { z as z5 } from "zod";
|
|
3150
|
+
|
|
3151
|
+
// src/lib/mux-tracks.ts
|
|
3152
|
+
async function fetchVttFromMux(vttUrl) {
|
|
3153
|
+
"use step";
|
|
3154
|
+
const vttResponse = await fetch(vttUrl);
|
|
3155
|
+
if (!vttResponse.ok) {
|
|
3156
|
+
throw new Error(`Failed to fetch VTT file: ${vttResponse.statusText}`);
|
|
3157
|
+
}
|
|
3158
|
+
return vttResponse.text();
|
|
3159
|
+
}
|
|
3160
|
+
async function createTextTrackOnMux(assetId, languageCode, trackName, presignedUrl, credentials) {
|
|
3161
|
+
"use step";
|
|
3162
|
+
const muxClient = await resolveMuxClient(credentials);
|
|
3163
|
+
const mux = await muxClient.createClient();
|
|
3164
|
+
const trackResponse = await mux.video.assets.createTrack(assetId, {
|
|
3165
|
+
type: "text",
|
|
3166
|
+
text_type: "subtitles",
|
|
3167
|
+
language_code: languageCode,
|
|
3168
|
+
name: trackName,
|
|
3169
|
+
url: presignedUrl
|
|
3170
|
+
});
|
|
3171
|
+
if (!trackResponse.id) {
|
|
3172
|
+
throw new Error("Failed to create text track: no track ID returned from Mux");
|
|
3173
|
+
}
|
|
3174
|
+
return trackResponse.id;
|
|
3175
|
+
}
|
|
3176
|
+
|
|
3177
|
+
// src/lib/storage-adapter.ts
|
|
3178
|
+
function requireCredentials(accessKeyId, secretAccessKey) {
|
|
3179
|
+
if (!accessKeyId || !secretAccessKey) {
|
|
3180
|
+
throw new Error(
|
|
3181
|
+
"S3 credentials are required for default storage operations. Provide S3_ACCESS_KEY_ID and S3_SECRET_ACCESS_KEY or pass options.storageAdapter."
|
|
3182
|
+
);
|
|
3183
|
+
}
|
|
3184
|
+
return { accessKeyId, secretAccessKey };
|
|
3185
|
+
}
|
|
3186
|
+
async function putObjectWithStorageAdapter(input, adapter) {
|
|
3187
|
+
if (adapter) {
|
|
3188
|
+
await adapter.putObject(input);
|
|
3189
|
+
return;
|
|
3190
|
+
}
|
|
3191
|
+
const credentials = requireCredentials(input.accessKeyId, input.secretAccessKey);
|
|
3192
|
+
await putObjectToS3({
|
|
3193
|
+
accessKeyId: credentials.accessKeyId,
|
|
3194
|
+
secretAccessKey: credentials.secretAccessKey,
|
|
3195
|
+
endpoint: input.endpoint,
|
|
3196
|
+
region: input.region,
|
|
3197
|
+
bucket: input.bucket,
|
|
3198
|
+
key: input.key,
|
|
3199
|
+
body: input.body,
|
|
3200
|
+
contentType: input.contentType
|
|
3201
|
+
});
|
|
3202
|
+
}
|
|
3203
|
+
async function createPresignedGetUrlWithStorageAdapter(input, adapter) {
|
|
3204
|
+
if (adapter) {
|
|
3205
|
+
return adapter.createPresignedGetUrl(input);
|
|
3206
|
+
}
|
|
3207
|
+
const credentials = requireCredentials(input.accessKeyId, input.secretAccessKey);
|
|
3208
|
+
return createPresignedGetUrl({
|
|
3209
|
+
accessKeyId: credentials.accessKeyId,
|
|
3210
|
+
secretAccessKey: credentials.secretAccessKey,
|
|
3211
|
+
endpoint: input.endpoint,
|
|
3212
|
+
region: input.region,
|
|
3213
|
+
bucket: input.bucket,
|
|
3214
|
+
key: input.key,
|
|
3215
|
+
expiresInSeconds: input.expiresInSeconds
|
|
3216
|
+
});
|
|
3217
|
+
}
|
|
3218
|
+
|
|
3219
|
+
// src/workflows/edit-captions.ts
|
|
3220
|
+
var profanityDetectionSchema = z5.object({
|
|
3221
|
+
profanity: z5.array(z5.string()).describe(
|
|
3222
|
+
"Unique profane words or short phrases exactly as they appear in the transcript text. Include each distinct form only once (e.g., if 'fuck' and 'fucking' both appear, list both)."
|
|
3223
|
+
)
|
|
3224
|
+
});
|
|
3225
|
+
var SYSTEM_PROMPT3 = dedent4`
|
|
3226
|
+
You are a content moderation assistant. Your task is to identify profane, vulgar, or obscene
|
|
3227
|
+
words and phrases in subtitle text. Return ONLY the exact profane words or phrases as they appear
|
|
3228
|
+
in the text. Do not modify, censor, or paraphrase them. Do not include words that are merely
|
|
3229
|
+
informal or slang but not profane. Focus on words that would be bleeped on broadcast television.`;
|
|
3230
|
+
function transformCueText(rawVtt, transform) {
|
|
3231
|
+
const lines = rawVtt.split("\n");
|
|
3232
|
+
let inCueText = false;
|
|
3233
|
+
let currentCueStartTime = 0;
|
|
3234
|
+
const transformed = lines.map((line) => {
|
|
3235
|
+
if (line.includes("-->")) {
|
|
3236
|
+
const startTimestamp = line.split("-->")[0].trim();
|
|
3237
|
+
currentCueStartTime = vttTimestampToSeconds(startTimestamp);
|
|
3238
|
+
inCueText = true;
|
|
3239
|
+
return line;
|
|
3240
|
+
}
|
|
3241
|
+
if (line.trim() === "") {
|
|
3242
|
+
inCueText = false;
|
|
3243
|
+
return line;
|
|
3244
|
+
}
|
|
3245
|
+
if (inCueText) {
|
|
3246
|
+
return transform(line, currentCueStartTime);
|
|
3247
|
+
}
|
|
3248
|
+
return line;
|
|
3249
|
+
});
|
|
3250
|
+
return transformed.join("\n");
|
|
3251
|
+
}
|
|
3252
|
+
function buildReplacementRegex(words) {
|
|
3253
|
+
const filtered = words.filter((w) => w.length > 0);
|
|
3254
|
+
if (filtered.length === 0)
|
|
3255
|
+
return null;
|
|
3256
|
+
filtered.sort((a, b) => b.length - a.length);
|
|
3257
|
+
const escaped = filtered.map((w) => w.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"));
|
|
3258
|
+
const pattern = escaped.join("|");
|
|
3259
|
+
return new RegExp(`\\b(?:${pattern})\\b`, "gi");
|
|
3260
|
+
}
|
|
3261
|
+
function createReplacer(mode) {
|
|
3262
|
+
switch (mode) {
|
|
3263
|
+
case "blank":
|
|
3264
|
+
return (match) => `[${"_".repeat(match.length)}]`;
|
|
3265
|
+
case "remove":
|
|
3266
|
+
return () => "";
|
|
3267
|
+
case "mask":
|
|
3268
|
+
return (match) => "?".repeat(match.length);
|
|
3269
|
+
}
|
|
3270
|
+
}
|
|
3271
|
+
function censorVttContent(rawVtt, profanity, mode) {
|
|
3272
|
+
if (profanity.length === 0) {
|
|
3273
|
+
return { censoredVtt: rawVtt, replacements: [] };
|
|
3274
|
+
}
|
|
3275
|
+
const regex = buildReplacementRegex(profanity);
|
|
3276
|
+
if (!regex) {
|
|
3277
|
+
return { censoredVtt: rawVtt, replacements: [] };
|
|
3278
|
+
}
|
|
3279
|
+
const replacer = createReplacer(mode);
|
|
3280
|
+
const replacements = [];
|
|
3281
|
+
const censoredVtt = transformCueText(rawVtt, (line, cueStartTime) => {
|
|
3282
|
+
return line.replace(regex, (match) => {
|
|
3283
|
+
const after = replacer(match);
|
|
3284
|
+
replacements.push({ cueStartTime, before: match, after });
|
|
3285
|
+
return after;
|
|
3286
|
+
});
|
|
3287
|
+
});
|
|
3288
|
+
return { censoredVtt, replacements };
|
|
3289
|
+
}
|
|
3290
|
+
function applyOverrideLists(detected, alwaysCensor, neverCensor) {
|
|
3291
|
+
const seen = new Set(detected.map((w) => w.toLowerCase()));
|
|
3292
|
+
const merged = [...detected];
|
|
3293
|
+
for (const word of alwaysCensor) {
|
|
3294
|
+
const lower = word.toLowerCase();
|
|
3295
|
+
if (!seen.has(lower)) {
|
|
3296
|
+
seen.add(lower);
|
|
3297
|
+
merged.push(word);
|
|
3298
|
+
}
|
|
3299
|
+
}
|
|
3300
|
+
const neverSet = new Set(neverCensor.map((w) => w.toLowerCase()));
|
|
3301
|
+
return merged.filter((w) => !neverSet.has(w.toLowerCase()));
|
|
3302
|
+
}
|
|
3303
|
+
function applyReplacements(rawVtt, replacements) {
|
|
3304
|
+
const filtered = replacements.filter((r) => r.find.length > 0);
|
|
3305
|
+
if (filtered.length === 0) {
|
|
3306
|
+
return { editedVtt: rawVtt, replacements: [] };
|
|
3307
|
+
}
|
|
3308
|
+
const records = [];
|
|
3309
|
+
const editedVtt = transformCueText(rawVtt, (line, cueStartTime) => {
|
|
3310
|
+
let result = line;
|
|
3311
|
+
for (const { find, replace } of filtered) {
|
|
3312
|
+
const escaped = find.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
3313
|
+
const regex = new RegExp(`\\b${escaped}\\b`, "g");
|
|
3314
|
+
result = result.replace(regex, (match) => {
|
|
3315
|
+
records.push({ cueStartTime, before: match, after: replace });
|
|
3316
|
+
return replace;
|
|
3317
|
+
});
|
|
3318
|
+
}
|
|
3319
|
+
return result;
|
|
3320
|
+
});
|
|
3321
|
+
return { editedVtt, replacements: records };
|
|
3322
|
+
}
|
|
3323
|
+
async function identifyProfanityWithAI({
|
|
3324
|
+
plainText,
|
|
3325
|
+
provider,
|
|
3326
|
+
modelId,
|
|
3327
|
+
credentials
|
|
3328
|
+
}) {
|
|
3329
|
+
"use step";
|
|
3330
|
+
const model = await createLanguageModelFromConfig(provider, modelId, credentials);
|
|
3331
|
+
const response = await generateText4({
|
|
3332
|
+
model,
|
|
3333
|
+
output: Output4.object({ schema: profanityDetectionSchema }),
|
|
3334
|
+
messages: [
|
|
3335
|
+
{
|
|
3336
|
+
role: "system",
|
|
3337
|
+
content: SYSTEM_PROMPT3
|
|
3338
|
+
},
|
|
3339
|
+
{
|
|
3340
|
+
role: "user",
|
|
3341
|
+
content: `Identify all profane words and phrases in the following subtitle transcript. Return each unique profane word or phrase exactly as it appears in the text.
|
|
3342
|
+
|
|
3343
|
+
<transcript>
|
|
3344
|
+
${plainText}
|
|
3345
|
+
</transcript>`
|
|
3346
|
+
}
|
|
3347
|
+
]
|
|
3348
|
+
});
|
|
3349
|
+
return {
|
|
3350
|
+
profanity: response.output.profanity,
|
|
3351
|
+
usage: {
|
|
3352
|
+
inputTokens: response.usage.inputTokens,
|
|
3353
|
+
outputTokens: response.usage.outputTokens,
|
|
3354
|
+
totalTokens: response.usage.totalTokens,
|
|
3355
|
+
reasoningTokens: response.usage.reasoningTokens,
|
|
3356
|
+
cachedInputTokens: response.usage.cachedInputTokens
|
|
3357
|
+
}
|
|
3358
|
+
};
|
|
3359
|
+
}
|
|
3360
|
+
async function uploadEditedVttToS3({
|
|
3361
|
+
editedVtt,
|
|
3362
|
+
assetId,
|
|
3363
|
+
trackId,
|
|
3364
|
+
s3Endpoint,
|
|
3365
|
+
s3Region,
|
|
3366
|
+
s3Bucket,
|
|
3367
|
+
storageAdapter,
|
|
3368
|
+
s3SignedUrlExpirySeconds
|
|
3369
|
+
}) {
|
|
3370
|
+
"use step";
|
|
3371
|
+
const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
|
|
3372
|
+
const s3SecretAccessKey = env_default.S3_SECRET_ACCESS_KEY;
|
|
3373
|
+
const vttKey = `edited/${assetId}/${trackId}-edited-${Date.now()}.vtt`;
|
|
3374
|
+
await putObjectWithStorageAdapter({
|
|
3375
|
+
accessKeyId: s3AccessKeyId,
|
|
3376
|
+
secretAccessKey: s3SecretAccessKey,
|
|
3377
|
+
endpoint: s3Endpoint,
|
|
3378
|
+
region: s3Region,
|
|
3379
|
+
bucket: s3Bucket,
|
|
3380
|
+
key: vttKey,
|
|
3381
|
+
body: editedVtt,
|
|
3382
|
+
contentType: "text/vtt"
|
|
3383
|
+
}, storageAdapter);
|
|
3384
|
+
return createPresignedGetUrlWithStorageAdapter({
|
|
3385
|
+
accessKeyId: s3AccessKeyId,
|
|
3386
|
+
secretAccessKey: s3SecretAccessKey,
|
|
3387
|
+
endpoint: s3Endpoint,
|
|
3388
|
+
region: s3Region,
|
|
3389
|
+
bucket: s3Bucket,
|
|
3390
|
+
key: vttKey,
|
|
3391
|
+
expiresInSeconds: s3SignedUrlExpirySeconds ?? 86400
|
|
3392
|
+
}, storageAdapter);
|
|
3393
|
+
}
|
|
3394
|
+
async function deleteTrackOnMux(assetId, trackId, credentials) {
|
|
3395
|
+
"use step";
|
|
3396
|
+
const muxClient = await resolveMuxClient(credentials);
|
|
3397
|
+
const mux = await muxClient.createClient();
|
|
3398
|
+
await mux.video.assets.deleteTrack(assetId, trackId);
|
|
3399
|
+
}
|
|
3400
|
+
async function editCaptions(assetId, trackId, options) {
|
|
3401
|
+
"use workflow";
|
|
3402
|
+
const {
|
|
3403
|
+
provider,
|
|
3404
|
+
model,
|
|
3405
|
+
autoCensorProfanity: autoCensorOption,
|
|
3406
|
+
replacements: replacementsOption,
|
|
3407
|
+
deleteOriginalTrack,
|
|
3408
|
+
uploadToMux: uploadToMuxOption,
|
|
3409
|
+
s3Endpoint: providedS3Endpoint,
|
|
3410
|
+
s3Region: providedS3Region,
|
|
3411
|
+
s3Bucket: providedS3Bucket,
|
|
3412
|
+
trackNameSuffix,
|
|
3413
|
+
storageAdapter,
|
|
3414
|
+
credentials
|
|
3415
|
+
} = options;
|
|
3416
|
+
const hasAutoCensor = !!autoCensorOption;
|
|
3417
|
+
const hasReplacements = !!replacementsOption && replacementsOption.length > 0;
|
|
3418
|
+
if (!hasAutoCensor && !hasReplacements) {
|
|
3419
|
+
throw new Error("At least one of autoCensorProfanity or replacements must be provided.");
|
|
3420
|
+
}
|
|
3421
|
+
if (autoCensorOption && !provider) {
|
|
3422
|
+
throw new Error("provider is required when using autoCensorProfanity.");
|
|
3423
|
+
}
|
|
3424
|
+
const deleteOriginal = deleteOriginalTrack !== false;
|
|
3425
|
+
const uploadToMux = uploadToMuxOption !== false;
|
|
3426
|
+
const s3Endpoint = providedS3Endpoint ?? env_default.S3_ENDPOINT;
|
|
3427
|
+
const s3Region = providedS3Region ?? env_default.S3_REGION ?? "auto";
|
|
3428
|
+
const s3Bucket = providedS3Bucket ?? env_default.S3_BUCKET;
|
|
3429
|
+
const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
|
|
3430
|
+
const s3SecretAccessKey = env_default.S3_SECRET_ACCESS_KEY;
|
|
3431
|
+
if (uploadToMux && (!s3Endpoint || !s3Bucket || !storageAdapter && (!s3AccessKeyId || !s3SecretAccessKey))) {
|
|
3432
|
+
throw new Error(
|
|
3433
|
+
"Storage configuration is required for uploading to Mux. Provide s3Endpoint and s3Bucket. If no storageAdapter is supplied, also provide s3AccessKeyId and s3SecretAccessKey in options or set S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY_ID, and S3_SECRET_ACCESS_KEY environment variables."
|
|
3434
|
+
);
|
|
3435
|
+
}
|
|
3436
|
+
const { asset: assetData, playbackId, policy } = await getPlaybackIdForAsset(assetId, credentials);
|
|
3437
|
+
const assetDurationSeconds = getAssetDurationSecondsFromAsset(assetData);
|
|
3438
|
+
const signingContext = await resolveMuxSigningContext(credentials);
|
|
3439
|
+
if (policy === "signed" && !signingContext) {
|
|
3440
|
+
throw new Error(
|
|
3441
|
+
"Signed playback ID requires signing credentials. Set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
|
|
3442
|
+
);
|
|
3443
|
+
}
|
|
3444
|
+
const readyTextTracks = getReadyTextTracks(assetData);
|
|
3445
|
+
const sourceTrack = readyTextTracks.find((t) => t.id === trackId);
|
|
3446
|
+
if (!sourceTrack) {
|
|
3447
|
+
const availableTrackIds = readyTextTracks.map((t) => t.id).filter(Boolean).join(", ");
|
|
3448
|
+
throw new Error(
|
|
3449
|
+
`Track '${trackId}' not found or not ready on asset '${assetId}'. Available track IDs: ${availableTrackIds || "none"}`
|
|
3450
|
+
);
|
|
3451
|
+
}
|
|
3452
|
+
const vttUrl = await buildTranscriptUrl(playbackId, trackId, policy === "signed", credentials);
|
|
3453
|
+
let vttContent;
|
|
3454
|
+
try {
|
|
3455
|
+
vttContent = await fetchVttFromMux(vttUrl);
|
|
3456
|
+
} catch (error) {
|
|
3457
|
+
throw new Error(`Failed to fetch VTT content: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
3458
|
+
}
|
|
3459
|
+
let editedVtt = vttContent;
|
|
3460
|
+
let totalReplacementCount = 0;
|
|
3461
|
+
let autoCensorResult;
|
|
3462
|
+
let usage;
|
|
3463
|
+
if (autoCensorOption) {
|
|
3464
|
+
const { mode = "blank", alwaysCensor = [], neverCensor = [] } = autoCensorOption;
|
|
3465
|
+
const plainText = extractTextFromVTT(vttContent);
|
|
3466
|
+
if (!plainText.trim()) {
|
|
3467
|
+
throw new Error("Track transcript is empty; nothing to censor.");
|
|
3468
|
+
}
|
|
3469
|
+
const modelConfig = resolveLanguageModelConfig({
|
|
3470
|
+
...options,
|
|
3471
|
+
provider,
|
|
3472
|
+
model
|
|
3473
|
+
});
|
|
3474
|
+
let detectedProfanity;
|
|
3475
|
+
try {
|
|
3476
|
+
const result = await identifyProfanityWithAI({
|
|
3477
|
+
plainText,
|
|
3478
|
+
provider: modelConfig.provider,
|
|
3479
|
+
modelId: modelConfig.modelId,
|
|
3480
|
+
credentials
|
|
3481
|
+
});
|
|
3482
|
+
detectedProfanity = result.profanity;
|
|
3483
|
+
usage = result.usage;
|
|
3484
|
+
} catch (error) {
|
|
3485
|
+
throw new Error(`Failed to detect profanity with ${modelConfig.provider}: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
3486
|
+
}
|
|
3487
|
+
const finalProfanity = applyOverrideLists(detectedProfanity, alwaysCensor, neverCensor);
|
|
3488
|
+
const { censoredVtt, replacements: censorReplacements } = censorVttContent(editedVtt, finalProfanity, mode);
|
|
3489
|
+
editedVtt = censoredVtt;
|
|
3490
|
+
totalReplacementCount += censorReplacements.length;
|
|
3491
|
+
autoCensorResult = { replacements: censorReplacements };
|
|
3492
|
+
}
|
|
3493
|
+
let replacementsResult;
|
|
3494
|
+
if (replacementsOption && replacementsOption.length > 0) {
|
|
3495
|
+
const { editedVtt: afterReplacements, replacements: staticReplacements } = applyReplacements(editedVtt, replacementsOption);
|
|
3496
|
+
editedVtt = afterReplacements;
|
|
3497
|
+
totalReplacementCount += staticReplacements.length;
|
|
3498
|
+
replacementsResult = { replacements: staticReplacements };
|
|
3499
|
+
}
|
|
3500
|
+
const usageWithMetadata = usage ? {
|
|
3501
|
+
...usage,
|
|
3502
|
+
metadata: {
|
|
3503
|
+
assetDurationSeconds
|
|
3504
|
+
}
|
|
3505
|
+
} : void 0;
|
|
3506
|
+
if (!uploadToMux) {
|
|
3507
|
+
return {
|
|
3508
|
+
assetId,
|
|
3509
|
+
trackId,
|
|
3510
|
+
originalVtt: vttContent,
|
|
3511
|
+
editedVtt,
|
|
3512
|
+
totalReplacementCount,
|
|
3513
|
+
autoCensorProfanity: autoCensorResult,
|
|
3514
|
+
replacements: replacementsResult,
|
|
3515
|
+
usage: usageWithMetadata
|
|
3516
|
+
};
|
|
3517
|
+
}
|
|
3518
|
+
let presignedUrl;
|
|
3519
|
+
try {
|
|
3520
|
+
presignedUrl = await uploadEditedVttToS3({
|
|
3521
|
+
editedVtt,
|
|
3522
|
+
assetId,
|
|
3523
|
+
trackId,
|
|
3524
|
+
s3Endpoint,
|
|
3525
|
+
s3Region,
|
|
3526
|
+
s3Bucket,
|
|
3527
|
+
storageAdapter,
|
|
3528
|
+
s3SignedUrlExpirySeconds: options.s3SignedUrlExpirySeconds
|
|
3529
|
+
});
|
|
3530
|
+
} catch (error) {
|
|
3531
|
+
throw new Error(`Failed to upload VTT to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
3532
|
+
}
|
|
3533
|
+
let uploadedTrackId;
|
|
3534
|
+
try {
|
|
3535
|
+
const languageCode = sourceTrack.language_code || "en";
|
|
3536
|
+
const suffix = trackNameSuffix ?? "edited";
|
|
3537
|
+
const trackName = `${sourceTrack.name || "Subtitles"} (${suffix})`;
|
|
3538
|
+
uploadedTrackId = await createTextTrackOnMux(
|
|
3539
|
+
assetId,
|
|
3540
|
+
languageCode,
|
|
3541
|
+
trackName,
|
|
3542
|
+
presignedUrl,
|
|
3543
|
+
credentials
|
|
3544
|
+
);
|
|
3545
|
+
} catch (error) {
|
|
3546
|
+
console.warn(`Failed to add track to Mux asset: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
3547
|
+
}
|
|
3548
|
+
if (deleteOriginal && uploadedTrackId) {
|
|
3549
|
+
try {
|
|
3550
|
+
await deleteTrackOnMux(assetId, trackId, credentials);
|
|
3551
|
+
} catch (error) {
|
|
3552
|
+
console.warn(`Failed to delete original track: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
3553
|
+
}
|
|
3554
|
+
}
|
|
3555
|
+
return {
|
|
3556
|
+
assetId,
|
|
3557
|
+
trackId,
|
|
3558
|
+
originalVtt: vttContent,
|
|
3559
|
+
editedVtt,
|
|
3560
|
+
totalReplacementCount,
|
|
3561
|
+
autoCensorProfanity: autoCensorResult,
|
|
3562
|
+
replacements: replacementsResult,
|
|
3563
|
+
uploadedTrackId,
|
|
3564
|
+
presignedUrl,
|
|
3565
|
+
usage: usageWithMetadata
|
|
3566
|
+
};
|
|
3567
|
+
}
|
|
3568
|
+
|
|
2967
3569
|
// src/workflows/embeddings.ts
|
|
2968
3570
|
import { embed } from "ai";
|
|
2969
3571
|
function averageEmbeddings(embeddings) {
|
|
@@ -3611,16 +4213,18 @@ async function getModerationScores(assetId, options = {}) {
|
|
|
3611
4213
|
}
|
|
3612
4214
|
|
|
3613
4215
|
// src/workflows/summarization.ts
|
|
3614
|
-
import { generateText as
|
|
3615
|
-
import
|
|
3616
|
-
import { z as
|
|
3617
|
-
var
|
|
3618
|
-
var
|
|
3619
|
-
|
|
3620
|
-
|
|
3621
|
-
|
|
4216
|
+
import { generateText as generateText5, Output as Output5 } from "ai";
|
|
4217
|
+
import dedent5 from "dedent";
|
|
4218
|
+
import { z as z6 } from "zod";
|
|
4219
|
+
var DEFAULT_SUMMARY_KEYWORD_LIMIT = 10;
|
|
4220
|
+
var DEFAULT_TITLE_LENGTH = 10;
|
|
4221
|
+
var DEFAULT_DESCRIPTION_LENGTH = 50;
|
|
4222
|
+
var summarySchema = z6.object({
|
|
4223
|
+
keywords: z6.array(z6.string()),
|
|
4224
|
+
title: z6.string(),
|
|
4225
|
+
description: z6.string()
|
|
3622
4226
|
}).strict();
|
|
3623
|
-
var SUMMARY_OUTPUT =
|
|
4227
|
+
var SUMMARY_OUTPUT = Output5.object({
|
|
3624
4228
|
name: "summary_metadata",
|
|
3625
4229
|
description: "Structured summary with title, description, and keywords.",
|
|
3626
4230
|
schema: summarySchema
|
|
@@ -3631,10 +4235,49 @@ var TONE_INSTRUCTIONS = {
|
|
|
3631
4235
|
playful: "Channel your inner diva! Answer with maximum sass, wit, and playful attitude. Don't hold back - be cheeky, clever, and delightfully snarky. Make it pop!",
|
|
3632
4236
|
professional: "Provide a professional, executive-level analysis suitable for business reporting."
|
|
3633
4237
|
};
|
|
4238
|
+
var DESCRIPTION_LENGTH_THRESHOLD_SMALL = 25;
|
|
4239
|
+
var DESCRIPTION_LENGTH_THRESHOLD_LARGE = 100;
|
|
4240
|
+
function buildDescriptionGuidance(wordCount, contentType) {
|
|
4241
|
+
if (wordCount < DESCRIPTION_LENGTH_THRESHOLD_SMALL) {
|
|
4242
|
+
if (contentType === "video") {
|
|
4243
|
+
return dedent5`A brief summary of the video in no more than ${wordCount} words. Shorter is fine.
|
|
4244
|
+
Focus on the single most important subject or action.
|
|
4245
|
+
Write in present tense.`;
|
|
4246
|
+
}
|
|
4247
|
+
return dedent5`A brief summary of the audio content in no more than ${wordCount} words. Shorter is fine.
|
|
4248
|
+
Focus on the single most important topic or theme.
|
|
4249
|
+
Write in present tense.`;
|
|
4250
|
+
}
|
|
4251
|
+
if (wordCount > DESCRIPTION_LENGTH_THRESHOLD_LARGE) {
|
|
4252
|
+
if (contentType === "video") {
|
|
4253
|
+
return dedent5`A detailed summary that describes what happens across the video.
|
|
4254
|
+
Never exceed ${wordCount} words, but shorter is perfectly fine. You may use multiple sentences.
|
|
4255
|
+
Be thorough: cover subjects, actions, setting, progression, and any notable details visible across frames.
|
|
4256
|
+
Write in present tense. Be specific about observable details rather than making assumptions.
|
|
4257
|
+
If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`;
|
|
4258
|
+
}
|
|
4259
|
+
return dedent5`A detailed summary that describes the audio content.
|
|
4260
|
+
Never exceed ${wordCount} words, but shorter is perfectly fine. You may use multiple sentences.
|
|
4261
|
+
Be thorough: cover topics, speakers, themes, progression, and any notable insights.
|
|
4262
|
+
Write in present tense. Be specific about what is discussed or presented rather than making assumptions.
|
|
4263
|
+
Focus on the spoken content and any key insights, dialogue, or narrative elements.`;
|
|
4264
|
+
}
|
|
4265
|
+
if (contentType === "video") {
|
|
4266
|
+
return dedent5`A summary that describes what happens across the video.
|
|
4267
|
+
Never exceed ${wordCount} words, but shorter is perfectly fine. You may use multiple sentences.
|
|
4268
|
+
Cover the main subjects, actions, setting, and any notable progression visible across frames.
|
|
4269
|
+
Write in present tense. Be specific about observable details rather than making assumptions.
|
|
4270
|
+
If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`;
|
|
4271
|
+
}
|
|
4272
|
+
return dedent5`A summary that describes the audio content.
|
|
4273
|
+
Never exceed ${wordCount} words, but shorter is perfectly fine. You may use multiple sentences.
|
|
4274
|
+
Cover the main topics, speakers, themes, and any notable progression in the discussion or narration.
|
|
4275
|
+
Write in present tense. Be specific about what is discussed or presented rather than making assumptions.
|
|
4276
|
+
Focus on the spoken content and any key insights, dialogue, or narrative elements.`;
|
|
4277
|
+
}
|
|
3634
4278
|
function createSummarizationBuilder({ titleLength, descriptionLength, tagCount } = {}) {
|
|
3635
|
-
const
|
|
3636
|
-
const
|
|
3637
|
-
const keywordLimit = tagCount ?? SUMMARY_KEYWORD_LIMIT;
|
|
4279
|
+
const titleLimit = titleLength ?? DEFAULT_TITLE_LENGTH;
|
|
4280
|
+
const keywordLimit = tagCount ?? DEFAULT_SUMMARY_KEYWORD_LIMIT;
|
|
3638
4281
|
return createPromptBuilder({
|
|
3639
4282
|
template: {
|
|
3640
4283
|
task: {
|
|
@@ -3643,23 +4286,20 @@ function createSummarizationBuilder({ titleLength, descriptionLength, tagCount }
|
|
|
3643
4286
|
},
|
|
3644
4287
|
title: {
|
|
3645
4288
|
tag: "title_requirements",
|
|
3646
|
-
content:
|
|
3647
|
-
A
|
|
3648
|
-
${
|
|
3649
|
-
|
|
3650
|
-
|
|
4289
|
+
content: dedent5`
|
|
4290
|
+
A concise, label-style title — not a sentence or description.
|
|
4291
|
+
Never exceed ${titleLimit} words, but shorter is better.
|
|
4292
|
+
Think of how a video card title, playlist entry, or file name would read — e.g. "Predator: Badlands Trailer" or "Chef Prepares Holiday Feast".
|
|
4293
|
+
Start with the primary subject or topic. Never begin with "A video of" or similar phrasing.
|
|
4294
|
+
Use specific nouns over lengthy descriptions. Avoid clauses, conjunctions, or narrative structure.`
|
|
3651
4295
|
},
|
|
3652
4296
|
description: {
|
|
3653
4297
|
tag: "description_requirements",
|
|
3654
|
-
content:
|
|
3655
|
-
A concise summary (${descConstraint}) that describes what happens across the video.
|
|
3656
|
-
Cover the main subjects, actions, setting, and any notable progression visible across frames.
|
|
3657
|
-
Write in present tense. Be specific about observable details rather than making assumptions.
|
|
3658
|
-
If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`
|
|
4298
|
+
content: buildDescriptionGuidance(descriptionLength ?? DEFAULT_DESCRIPTION_LENGTH, "video")
|
|
3659
4299
|
},
|
|
3660
4300
|
keywords: {
|
|
3661
4301
|
tag: "keywords_requirements",
|
|
3662
|
-
content:
|
|
4302
|
+
content: dedent5`
|
|
3663
4303
|
Specific, searchable terms (up to ${keywordLimit}) that capture:
|
|
3664
4304
|
- Primary subjects (people, animals, objects)
|
|
3665
4305
|
- Actions and activities being performed
|
|
@@ -3671,7 +4311,7 @@ function createSummarizationBuilder({ titleLength, descriptionLength, tagCount }
|
|
|
3671
4311
|
},
|
|
3672
4312
|
qualityGuidelines: {
|
|
3673
4313
|
tag: "quality_guidelines",
|
|
3674
|
-
content:
|
|
4314
|
+
content: dedent5`
|
|
3675
4315
|
- Examine all frames to understand the full context and progression
|
|
3676
4316
|
- Be precise: "golden retriever" is better than "dog" when identifiable
|
|
3677
4317
|
- Capture the narrative: what begins, develops, and concludes
|
|
@@ -3682,9 +4322,8 @@ function createSummarizationBuilder({ titleLength, descriptionLength, tagCount }
|
|
|
3682
4322
|
});
|
|
3683
4323
|
}
|
|
3684
4324
|
function createAudioOnlyBuilder({ titleLength, descriptionLength, tagCount } = {}) {
|
|
3685
|
-
const
|
|
3686
|
-
const
|
|
3687
|
-
const keywordLimit = tagCount ?? SUMMARY_KEYWORD_LIMIT;
|
|
4325
|
+
const titleLimit = titleLength ?? DEFAULT_TITLE_LENGTH;
|
|
4326
|
+
const keywordLimit = tagCount ?? DEFAULT_SUMMARY_KEYWORD_LIMIT;
|
|
3688
4327
|
return createPromptBuilder({
|
|
3689
4328
|
template: {
|
|
3690
4329
|
task: {
|
|
@@ -3693,23 +4332,20 @@ function createAudioOnlyBuilder({ titleLength, descriptionLength, tagCount } = {
|
|
|
3693
4332
|
},
|
|
3694
4333
|
title: {
|
|
3695
4334
|
tag: "title_requirements",
|
|
3696
|
-
content:
|
|
3697
|
-
A
|
|
3698
|
-
${
|
|
3699
|
-
|
|
3700
|
-
|
|
4335
|
+
content: dedent5`
|
|
4336
|
+
A concise, label-style title — not a sentence or description.
|
|
4337
|
+
Never exceed ${titleLimit} words, but shorter is better.
|
|
4338
|
+
Think of how a podcast episode title or playlist entry would read — e.g. "Weekly News Roundup" or "Interview with Dr. Smith".
|
|
4339
|
+
Start with the primary subject or topic. Never begin with "An audio of" or similar phrasing.
|
|
4340
|
+
Use specific nouns over lengthy descriptions. Avoid clauses, conjunctions, or narrative structure.`
|
|
3701
4341
|
},
|
|
3702
4342
|
description: {
|
|
3703
4343
|
tag: "description_requirements",
|
|
3704
|
-
content:
|
|
3705
|
-
A concise summary (${descConstraint}) that describes the audio content.
|
|
3706
|
-
Cover the main topics, speakers, themes, and any notable progression in the discussion or narration.
|
|
3707
|
-
Write in present tense. Be specific about what is discussed or presented rather than making assumptions.
|
|
3708
|
-
Focus on the spoken content and any key insights, dialogue, or narrative elements.`
|
|
4344
|
+
content: buildDescriptionGuidance(descriptionLength ?? DEFAULT_DESCRIPTION_LENGTH, "audio")
|
|
3709
4345
|
},
|
|
3710
4346
|
keywords: {
|
|
3711
4347
|
tag: "keywords_requirements",
|
|
3712
|
-
content:
|
|
4348
|
+
content: dedent5`
|
|
3713
4349
|
Specific, searchable terms (up to ${keywordLimit}) that capture:
|
|
3714
4350
|
- Primary topics and themes
|
|
3715
4351
|
- Speakers or presenters (if named)
|
|
@@ -3721,7 +4357,7 @@ function createAudioOnlyBuilder({ titleLength, descriptionLength, tagCount } = {
|
|
|
3721
4357
|
},
|
|
3722
4358
|
qualityGuidelines: {
|
|
3723
4359
|
tag: "quality_guidelines",
|
|
3724
|
-
content:
|
|
4360
|
+
content: dedent5`
|
|
3725
4361
|
- Analyze the full transcript to understand context and themes
|
|
3726
4362
|
- Be precise: use specific terminology when mentioned
|
|
3727
4363
|
- Capture the narrative: what is introduced, discussed, and concluded
|
|
@@ -3731,7 +4367,7 @@ function createAudioOnlyBuilder({ titleLength, descriptionLength, tagCount } = {
|
|
|
3731
4367
|
sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
|
|
3732
4368
|
});
|
|
3733
4369
|
}
|
|
3734
|
-
var
|
|
4370
|
+
var SYSTEM_PROMPT4 = dedent5`
|
|
3735
4371
|
<role>
|
|
3736
4372
|
You are a video content analyst specializing in storyboard interpretation and multimodal analysis.
|
|
3737
4373
|
</role>
|
|
@@ -3788,7 +4424,7 @@ var SYSTEM_PROMPT3 = dedent4`
|
|
|
3788
4424
|
|
|
3789
4425
|
Write as if describing reality, not describing a recording of reality.
|
|
3790
4426
|
</language_guidelines>`;
|
|
3791
|
-
var AUDIO_ONLY_SYSTEM_PROMPT =
|
|
4427
|
+
var AUDIO_ONLY_SYSTEM_PROMPT = dedent5`
|
|
3792
4428
|
<role>
|
|
3793
4429
|
You are an audio content analyst specializing in transcript analysis and metadata generation.
|
|
3794
4430
|
</role>
|
|
@@ -3856,6 +4492,11 @@ function buildUserPrompt4({
|
|
|
3856
4492
|
const contextSections = [createToneSection(TONE_INSTRUCTIONS[tone])];
|
|
3857
4493
|
if (languageName) {
|
|
3858
4494
|
contextSections.push(createLanguageSection(languageName));
|
|
4495
|
+
} else {
|
|
4496
|
+
contextSections.push({
|
|
4497
|
+
tag: "language",
|
|
4498
|
+
content: "Respond in English. Never switch languages to satisfy length constraints."
|
|
4499
|
+
});
|
|
3859
4500
|
}
|
|
3860
4501
|
if (transcriptText) {
|
|
3861
4502
|
const format = isCleanTranscript ? "plain text" : "WebVTT";
|
|
@@ -3868,7 +4509,7 @@ function buildUserPrompt4({
|
|
|
3868
4509
|
async function analyzeStoryboard2(imageDataUrl, provider, modelId, userPrompt, systemPrompt, credentials) {
|
|
3869
4510
|
"use step";
|
|
3870
4511
|
const model = await createLanguageModelFromConfig(provider, modelId, credentials);
|
|
3871
|
-
const response = await
|
|
4512
|
+
const response = await generateText5({
|
|
3872
4513
|
model,
|
|
3873
4514
|
output: SUMMARY_OUTPUT,
|
|
3874
4515
|
messages: [
|
|
@@ -3903,7 +4544,7 @@ async function analyzeStoryboard2(imageDataUrl, provider, modelId, userPrompt, s
|
|
|
3903
4544
|
async function analyzeAudioOnly(provider, modelId, userPrompt, systemPrompt, credentials) {
|
|
3904
4545
|
"use step";
|
|
3905
4546
|
const model = await createLanguageModelFromConfig(provider, modelId, credentials);
|
|
3906
|
-
const response = await
|
|
4547
|
+
const response = await generateText5({
|
|
3907
4548
|
model,
|
|
3908
4549
|
output: SUMMARY_OUTPUT,
|
|
3909
4550
|
messages: [
|
|
@@ -3932,7 +4573,7 @@ async function analyzeAudioOnly(provider, modelId, userPrompt, systemPrompt, cre
|
|
|
3932
4573
|
}
|
|
3933
4574
|
};
|
|
3934
4575
|
}
|
|
3935
|
-
function normalizeKeywords(keywords, limit =
|
|
4576
|
+
function normalizeKeywords(keywords, limit = DEFAULT_SUMMARY_KEYWORD_LIMIT) {
|
|
3936
4577
|
if (!Array.isArray(keywords) || keywords.length === 0) {
|
|
3937
4578
|
return [];
|
|
3938
4579
|
}
|
|
@@ -4019,7 +4660,7 @@ async function getSummaryAndTags(assetId, options) {
|
|
|
4019
4660
|
});
|
|
4020
4661
|
let analysisResponse;
|
|
4021
4662
|
let imageUrl;
|
|
4022
|
-
const systemPrompt = isAudioOnly ? AUDIO_ONLY_SYSTEM_PROMPT :
|
|
4663
|
+
const systemPrompt = isAudioOnly ? AUDIO_ONLY_SYSTEM_PROMPT : SYSTEM_PROMPT4;
|
|
4023
4664
|
try {
|
|
4024
4665
|
if (isAudioOnly) {
|
|
4025
4666
|
analysisResponse = await analyzeAudioOnly(
|
|
@@ -4072,7 +4713,7 @@ async function getSummaryAndTags(assetId, options) {
|
|
|
4072
4713
|
assetId,
|
|
4073
4714
|
title: analysisResponse.result.title,
|
|
4074
4715
|
description: analysisResponse.result.description,
|
|
4075
|
-
tags: normalizeKeywords(analysisResponse.result.keywords, tagCount ??
|
|
4716
|
+
tags: normalizeKeywords(analysisResponse.result.keywords, tagCount ?? DEFAULT_SUMMARY_KEYWORD_LIMIT),
|
|
4076
4717
|
storyboardUrl: imageUrl,
|
|
4077
4718
|
// undefined for audio-only assets
|
|
4078
4719
|
usage: {
|
|
@@ -4085,52 +4726,10 @@ async function getSummaryAndTags(assetId, options) {
|
|
|
4085
4726
|
};
|
|
4086
4727
|
}
|
|
4087
4728
|
|
|
4088
|
-
// src/lib/storage-adapter.ts
|
|
4089
|
-
function requireCredentials(accessKeyId, secretAccessKey) {
|
|
4090
|
-
if (!accessKeyId || !secretAccessKey) {
|
|
4091
|
-
throw new Error(
|
|
4092
|
-
"S3 credentials are required for default storage operations. Provide S3_ACCESS_KEY_ID and S3_SECRET_ACCESS_KEY or pass options.storageAdapter."
|
|
4093
|
-
);
|
|
4094
|
-
}
|
|
4095
|
-
return { accessKeyId, secretAccessKey };
|
|
4096
|
-
}
|
|
4097
|
-
async function putObjectWithStorageAdapter(input, adapter) {
|
|
4098
|
-
if (adapter) {
|
|
4099
|
-
await adapter.putObject(input);
|
|
4100
|
-
return;
|
|
4101
|
-
}
|
|
4102
|
-
const credentials = requireCredentials(input.accessKeyId, input.secretAccessKey);
|
|
4103
|
-
await putObjectToS3({
|
|
4104
|
-
accessKeyId: credentials.accessKeyId,
|
|
4105
|
-
secretAccessKey: credentials.secretAccessKey,
|
|
4106
|
-
endpoint: input.endpoint,
|
|
4107
|
-
region: input.region,
|
|
4108
|
-
bucket: input.bucket,
|
|
4109
|
-
key: input.key,
|
|
4110
|
-
body: input.body,
|
|
4111
|
-
contentType: input.contentType
|
|
4112
|
-
});
|
|
4113
|
-
}
|
|
4114
|
-
async function createPresignedGetUrlWithStorageAdapter(input, adapter) {
|
|
4115
|
-
if (adapter) {
|
|
4116
|
-
return adapter.createPresignedGetUrl(input);
|
|
4117
|
-
}
|
|
4118
|
-
const credentials = requireCredentials(input.accessKeyId, input.secretAccessKey);
|
|
4119
|
-
return createPresignedGetUrl({
|
|
4120
|
-
accessKeyId: credentials.accessKeyId,
|
|
4121
|
-
secretAccessKey: credentials.secretAccessKey,
|
|
4122
|
-
endpoint: input.endpoint,
|
|
4123
|
-
region: input.region,
|
|
4124
|
-
bucket: input.bucket,
|
|
4125
|
-
key: input.key,
|
|
4126
|
-
expiresInSeconds: input.expiresInSeconds
|
|
4127
|
-
});
|
|
4128
|
-
}
|
|
4129
|
-
|
|
4130
4729
|
// src/workflows/translate-audio.ts
|
|
4131
4730
|
var STATIC_RENDITION_POLL_INTERVAL_MS = 5e3;
|
|
4132
4731
|
var STATIC_RENDITION_MAX_ATTEMPTS = 36;
|
|
4133
|
-
async function
|
|
4732
|
+
async function sleep2(ms) {
|
|
4134
4733
|
"use step";
|
|
4135
4734
|
await new Promise((resolve) => setTimeout(resolve, ms));
|
|
4136
4735
|
}
|
|
@@ -4196,7 +4795,7 @@ async function waitForAudioStaticRendition({
|
|
|
4196
4795
|
console.warn(`\u2139\uFE0F Static rendition already ${status}. Waiting for it to finish...`);
|
|
4197
4796
|
}
|
|
4198
4797
|
for (let attempt = 1; attempt <= STATIC_RENDITION_MAX_ATTEMPTS; attempt++) {
|
|
4199
|
-
await
|
|
4798
|
+
await sleep2(STATIC_RENDITION_POLL_INTERVAL_MS);
|
|
4200
4799
|
currentAsset = await mux.video.assets.retrieve(assetId);
|
|
4201
4800
|
if (hasReadyAudioStaticRendition(currentAsset)) {
|
|
4202
4801
|
return currentAsset;
|
|
@@ -4303,7 +4902,8 @@ async function uploadDubbedAudioToS3({
|
|
|
4303
4902
|
s3Endpoint,
|
|
4304
4903
|
s3Region,
|
|
4305
4904
|
s3Bucket,
|
|
4306
|
-
storageAdapter
|
|
4905
|
+
storageAdapter,
|
|
4906
|
+
s3SignedUrlExpirySeconds
|
|
4307
4907
|
}) {
|
|
4308
4908
|
"use step";
|
|
4309
4909
|
const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
|
|
@@ -4326,10 +4926,11 @@ async function uploadDubbedAudioToS3({
|
|
|
4326
4926
|
region: s3Region,
|
|
4327
4927
|
bucket: s3Bucket,
|
|
4328
4928
|
key: audioKey,
|
|
4329
|
-
expiresInSeconds:
|
|
4929
|
+
expiresInSeconds: s3SignedUrlExpirySeconds ?? 86400
|
|
4330
4930
|
}, storageAdapter);
|
|
4931
|
+
const expiryHours = Math.round((s3SignedUrlExpirySeconds ?? 86400) / 3600);
|
|
4331
4932
|
console.warn(`\u2705 Audio uploaded successfully to: ${audioKey}`);
|
|
4332
|
-
console.warn(`\u{1F517} Generated presigned URL (expires in 1
|
|
4933
|
+
console.warn(`\u{1F517} Generated presigned URL (expires in ${expiryHours} hour${expiryHours === 1 ? "" : "s"})`);
|
|
4333
4934
|
return presignedUrl;
|
|
4334
4935
|
}
|
|
4335
4936
|
async function createAudioTrackOnMux(assetId, languageCode, presignedUrl, credentials) {
|
|
@@ -4428,7 +5029,7 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
|
|
|
4428
5029
|
const maxPollAttempts = 180;
|
|
4429
5030
|
let targetLanguages = [];
|
|
4430
5031
|
while (dubbingStatus === "dubbing" && pollAttempts < maxPollAttempts) {
|
|
4431
|
-
await
|
|
5032
|
+
await sleep2(1e4);
|
|
4432
5033
|
pollAttempts++;
|
|
4433
5034
|
try {
|
|
4434
5035
|
const statusResult = await checkElevenLabsDubbingStatus({
|
|
@@ -4493,7 +5094,8 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
|
|
|
4493
5094
|
s3Endpoint,
|
|
4494
5095
|
s3Region,
|
|
4495
5096
|
s3Bucket,
|
|
4496
|
-
storageAdapter: effectiveStorageAdapter
|
|
5097
|
+
storageAdapter: effectiveStorageAdapter,
|
|
5098
|
+
s3SignedUrlExpirySeconds: options.s3SignedUrlExpirySeconds
|
|
4497
5099
|
});
|
|
4498
5100
|
} catch (error) {
|
|
4499
5101
|
throw new Error(`Failed to upload audio to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
@@ -4531,24 +5133,24 @@ async function translateAudio(assetId, toLanguageCode, options = {}) {
|
|
|
4531
5133
|
// src/workflows/translate-captions.ts
|
|
4532
5134
|
import {
|
|
4533
5135
|
APICallError,
|
|
4534
|
-
generateText as
|
|
5136
|
+
generateText as generateText6,
|
|
4535
5137
|
NoObjectGeneratedError,
|
|
4536
|
-
Output as
|
|
5138
|
+
Output as Output6,
|
|
4537
5139
|
RetryError,
|
|
4538
5140
|
TypeValidationError
|
|
4539
5141
|
} from "ai";
|
|
4540
|
-
import
|
|
4541
|
-
import { z as
|
|
4542
|
-
var translationSchema =
|
|
4543
|
-
translation:
|
|
5142
|
+
import dedent6 from "dedent";
|
|
5143
|
+
import { z as z7 } from "zod";
|
|
5144
|
+
var translationSchema = z7.object({
|
|
5145
|
+
translation: z7.string()
|
|
4544
5146
|
});
|
|
4545
|
-
var
|
|
5147
|
+
var SYSTEM_PROMPT5 = dedent6`
|
|
4546
5148
|
You are a subtitle translation expert. Translate VTT subtitle files to the target language specified by the user.
|
|
4547
5149
|
You may receive either a full VTT file or a chunk from a larger VTT.
|
|
4548
5150
|
Preserve all timestamps, cue ordering, and VTT formatting exactly as they appear.
|
|
4549
5151
|
Return JSON with a single key "translation" containing the translated VTT content.
|
|
4550
5152
|
`;
|
|
4551
|
-
var CUE_TRANSLATION_SYSTEM_PROMPT =
|
|
5153
|
+
var CUE_TRANSLATION_SYSTEM_PROMPT = dedent6`
|
|
4552
5154
|
You are a subtitle translation expert.
|
|
4553
5155
|
You will receive a sequence of subtitle cues extracted from a VTT file.
|
|
4554
5156
|
Translate the cues to the requested target language while preserving their original order.
|
|
@@ -4710,14 +5312,6 @@ function buildTranslationChunkRequests(vttContent, assetDurationSeconds, chunkin
|
|
|
4710
5312
|
)
|
|
4711
5313
|
};
|
|
4712
5314
|
}
|
|
4713
|
-
async function fetchVttFromMux(vttUrl) {
|
|
4714
|
-
"use step";
|
|
4715
|
-
const vttResponse = await fetch(vttUrl);
|
|
4716
|
-
if (!vttResponse.ok) {
|
|
4717
|
-
throw new Error(`Failed to fetch VTT file: ${vttResponse.statusText}`);
|
|
4718
|
-
}
|
|
4719
|
-
return vttResponse.text();
|
|
4720
|
-
}
|
|
4721
5315
|
async function translateVttWithAI({
|
|
4722
5316
|
vttContent,
|
|
4723
5317
|
fromLanguageCode,
|
|
@@ -4728,13 +5322,13 @@ async function translateVttWithAI({
|
|
|
4728
5322
|
}) {
|
|
4729
5323
|
"use step";
|
|
4730
5324
|
const model = await createLanguageModelFromConfig(provider, modelId, credentials);
|
|
4731
|
-
const response = await
|
|
5325
|
+
const response = await generateText6({
|
|
4732
5326
|
model,
|
|
4733
|
-
output:
|
|
5327
|
+
output: Output6.object({ schema: translationSchema }),
|
|
4734
5328
|
messages: [
|
|
4735
5329
|
{
|
|
4736
5330
|
role: "system",
|
|
4737
|
-
content:
|
|
5331
|
+
content: SYSTEM_PROMPT5
|
|
4738
5332
|
},
|
|
4739
5333
|
{
|
|
4740
5334
|
role: "user",
|
|
@@ -4765,8 +5359,8 @@ async function translateCueChunkWithAI({
|
|
|
4765
5359
|
}) {
|
|
4766
5360
|
"use step";
|
|
4767
5361
|
const model = await createLanguageModelFromConfig(provider, modelId, credentials);
|
|
4768
|
-
const schema =
|
|
4769
|
-
translations:
|
|
5362
|
+
const schema = z7.object({
|
|
5363
|
+
translations: z7.array(z7.string().min(1)).length(cues.length)
|
|
4770
5364
|
});
|
|
4771
5365
|
const cuePayload = cues.map((cue, index) => ({
|
|
4772
5366
|
index,
|
|
@@ -4774,9 +5368,9 @@ async function translateCueChunkWithAI({
|
|
|
4774
5368
|
endTime: cue.endTime,
|
|
4775
5369
|
text: cue.text
|
|
4776
5370
|
}));
|
|
4777
|
-
const response = await
|
|
5371
|
+
const response = await generateText6({
|
|
4778
5372
|
model,
|
|
4779
|
-
output:
|
|
5373
|
+
output: Output6.object({ schema }),
|
|
4780
5374
|
messages: [
|
|
4781
5375
|
{
|
|
4782
5376
|
role: "system",
|
|
@@ -4933,7 +5527,8 @@ async function uploadVttToS3({
|
|
|
4933
5527
|
s3Endpoint,
|
|
4934
5528
|
s3Region,
|
|
4935
5529
|
s3Bucket,
|
|
4936
|
-
storageAdapter
|
|
5530
|
+
storageAdapter,
|
|
5531
|
+
s3SignedUrlExpirySeconds
|
|
4937
5532
|
}) {
|
|
4938
5533
|
"use step";
|
|
4939
5534
|
const s3AccessKeyId = env_default.S3_ACCESS_KEY_ID;
|
|
@@ -4956,25 +5551,9 @@ async function uploadVttToS3({
|
|
|
4956
5551
|
region: s3Region,
|
|
4957
5552
|
bucket: s3Bucket,
|
|
4958
5553
|
key: vttKey,
|
|
4959
|
-
expiresInSeconds:
|
|
5554
|
+
expiresInSeconds: s3SignedUrlExpirySeconds ?? 86400
|
|
4960
5555
|
}, storageAdapter);
|
|
4961
5556
|
}
|
|
4962
|
-
async function createTextTrackOnMux(assetId, languageCode, trackName, presignedUrl, credentials) {
|
|
4963
|
-
"use step";
|
|
4964
|
-
const muxClient = await resolveMuxClient(credentials);
|
|
4965
|
-
const mux = await muxClient.createClient();
|
|
4966
|
-
const trackResponse = await mux.video.assets.createTrack(assetId, {
|
|
4967
|
-
type: "text",
|
|
4968
|
-
text_type: "subtitles",
|
|
4969
|
-
language_code: languageCode,
|
|
4970
|
-
name: trackName,
|
|
4971
|
-
url: presignedUrl
|
|
4972
|
-
});
|
|
4973
|
-
if (!trackResponse.id) {
|
|
4974
|
-
throw new Error("Failed to create text track: no track ID returned from Mux");
|
|
4975
|
-
}
|
|
4976
|
-
return trackResponse.id;
|
|
4977
|
-
}
|
|
4978
5557
|
async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, options) {
|
|
4979
5558
|
"use workflow";
|
|
4980
5559
|
const {
|
|
@@ -5092,7 +5671,8 @@ async function translateCaptions(assetId, fromLanguageCode, toLanguageCode, opti
|
|
|
5092
5671
|
s3Endpoint,
|
|
5093
5672
|
s3Region,
|
|
5094
5673
|
s3Bucket,
|
|
5095
|
-
storageAdapter: effectiveStorageAdapter
|
|
5674
|
+
storageAdapter: effectiveStorageAdapter,
|
|
5675
|
+
s3SignedUrlExpirySeconds: options.s3SignedUrlExpirySeconds
|
|
5096
5676
|
});
|
|
5097
5677
|
} catch (error) {
|
|
5098
5678
|
throw new Error(`Failed to upload VTT to S3: ${error instanceof Error ? error.message : "Unknown error"}`);
|