@mux/ai 0.7.6 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -543,8 +543,44 @@ var DEFAULT_EMBEDDING_MODELS = {
543
543
  var LANGUAGE_MODELS = {
544
544
  openai: ["gpt-5.1", "gpt-5-mini"],
545
545
  anthropic: ["claude-sonnet-4-5"],
546
- google: ["gemini-3-flash-preview", "gemini-2.5-flash"]
546
+ google: ["gemini-3-flash-preview", "gemini-3.1-flash-lite-preview", "gemini-2.5-flash"]
547
547
  };
548
+ var LANGUAGE_MODEL_DEPRECATIONS = [
549
+ {
550
+ provider: "google",
551
+ modelId: "gemini-2.5-flash",
552
+ replacementModelId: "gemini-3.1-flash-lite-preview",
553
+ phase: "warn",
554
+ deprecatedOn: "2026-03-03",
555
+ sunsetOn: "2026-06-30",
556
+ reason: "Gemini 3.1 Flash-Lite Preview offers better quality/latency/cost balance in current evals."
557
+ }
558
+ ];
559
+ var warnedDeprecatedLanguageModels = /* @__PURE__ */ new Set();
560
+ function getLanguageModelDeprecation(provider, modelId) {
561
+ return LANGUAGE_MODEL_DEPRECATIONS.find(
562
+ (deprecation) => deprecation.provider === provider && deprecation.modelId === modelId
563
+ );
564
+ }
565
+ function maybeWarnOrThrowForDeprecatedLanguageModel(provider, modelId) {
566
+ const deprecation = getLanguageModelDeprecation(provider, modelId);
567
+ if (!deprecation) {
568
+ return;
569
+ }
570
+ const replacementText = deprecation.replacementModelId ? ` Use replacement provider="${provider}" model="${deprecation.replacementModelId}" instead.` : "";
571
+ const sunsetText = deprecation.sunsetOn ? ` Planned removal date: ${deprecation.sunsetOn}.` : "";
572
+ const reasonText = deprecation.reason ? ` Reason: ${deprecation.reason}` : "";
573
+ const message = deprecation.phase === "blocked" ? `Language model is no longer supported for provider="${provider}" model="${modelId}".${replacementText}${reasonText}` : `Language model is deprecated and in a grace period for provider="${provider}" model="${modelId}".${replacementText}${sunsetText}${reasonText}`;
574
+ if (deprecation.phase === "blocked") {
575
+ throw new Error(message);
576
+ }
577
+ const warningKey = `${provider}:${modelId}`;
578
+ if (warnedDeprecatedLanguageModels.has(warningKey)) {
579
+ return;
580
+ }
581
+ warnedDeprecatedLanguageModels.add(warningKey);
582
+ console.warn(message);
583
+ }
548
584
  function getDefaultEvalModelConfigs() {
549
585
  return Object.entries(DEFAULT_LANGUAGE_MODELS).map(([provider, modelId]) => ({ provider, modelId }));
550
586
  }
@@ -575,6 +611,7 @@ function parseEvalModelPair(value) {
575
611
  `Unsupported eval model "${modelId}" for provider "${provider}". Supported models: ${supportedModels.join(", ")}.`
576
612
  );
577
613
  }
614
+ maybeWarnOrThrowForDeprecatedLanguageModel(provider, modelId);
578
615
  return {
579
616
  provider,
580
617
  modelId
@@ -618,6 +655,7 @@ var EVAL_MODEL_CONFIGS = resolveEvalModelConfigsFromEnv();
618
655
  function resolveLanguageModelConfig(options = {}) {
619
656
  const provider = options.provider || "openai";
620
657
  const modelId = options.model || DEFAULT_LANGUAGE_MODELS[provider];
658
+ maybeWarnOrThrowForDeprecatedLanguageModel(provider, modelId);
621
659
  return { provider, modelId };
622
660
  }
623
661
  function resolveEmbeddingModelConfig(options = {}) {
@@ -626,6 +664,7 @@ function resolveEmbeddingModelConfig(options = {}) {
626
664
  return { provider, modelId };
627
665
  }
628
666
  async function createLanguageModelFromConfig(provider, modelId, credentials) {
667
+ maybeWarnOrThrowForDeprecatedLanguageModel(provider, modelId);
629
668
  switch (provider) {
630
669
  case "openai": {
631
670
  const apiKey = await resolveProviderApiKey("openai", credentials);
@@ -1170,16 +1209,6 @@ var SYSTEM_PROMPT = dedent`
1170
1209
  - GOOD: "A person runs through a park"
1171
1210
  - Be specific and evidence-based
1172
1211
  </language_guidelines>`;
1173
- function buildSystemPrompt(allowedAnswers) {
1174
- const answerList = allowedAnswers.map((answer) => `"${answer}"`).join(", ");
1175
- return `${SYSTEM_PROMPT}
1176
-
1177
- ${dedent`
1178
- <response_options>
1179
- Allowed answers: ${answerList}
1180
- </response_options>
1181
- `}`;
1182
- }
1183
1212
  var askQuestionsPromptBuilder = createPromptBuilder({
1184
1213
  template: {
1185
1214
  questions: {
@@ -1189,21 +1218,30 @@ var askQuestionsPromptBuilder = createPromptBuilder({
1189
1218
  },
1190
1219
  sectionOrder: ["questions"]
1191
1220
  });
1192
- function buildUserPrompt(questions, transcriptText, isCleanTranscript = true) {
1221
+ function buildUserPrompt(questions, allowedAnswers, transcriptText, isCleanTranscript = true) {
1193
1222
  const questionsList = questions.map((q, idx) => `${idx + 1}. ${q.question}`).join("\n");
1194
1223
  const questionsContent = dedent`
1195
1224
  Please answer the following yes/no questions about this video:
1196
1225
 
1197
1226
  ${questionsList}`;
1227
+ const answerList = allowedAnswers.map((answer) => `"${answer}"`).join(", ");
1228
+ const responseOptions = dedent`
1229
+ <response_options>
1230
+ Allowed answers: ${answerList}
1231
+ </response_options>`;
1232
+ const questionsSection = askQuestionsPromptBuilder.build({ questions: questionsContent });
1198
1233
  if (!transcriptText) {
1199
- return askQuestionsPromptBuilder.build({ questions: questionsContent });
1234
+ return `${questionsSection}
1235
+
1236
+ ${responseOptions}`;
1200
1237
  }
1201
1238
  const format = isCleanTranscript ? "plain text" : "WebVTT";
1202
- const transcriptSection = createTranscriptSection(transcriptText, format);
1203
- return askQuestionsPromptBuilder.buildWithContext(
1204
- { questions: questionsContent },
1205
- [transcriptSection]
1206
- );
1239
+ const transcriptSection = renderSection(createTranscriptSection(transcriptText, format));
1240
+ return `${transcriptSection}
1241
+
1242
+ ${questionsSection}
1243
+
1244
+ ${responseOptions}`;
1207
1245
  }
1208
1246
  async function fetchImageAsBase64(imageUrl, imageDownloadOptions) {
1209
1247
  "use step";
@@ -1299,8 +1337,8 @@ async function askQuestions(assetId, questions, options) {
1299
1337
  cleanTranscript,
1300
1338
  shouldSign: policy === "signed"
1301
1339
  })).transcriptText : "";
1302
- const userPrompt = buildUserPrompt(questions, transcriptText, cleanTranscript);
1303
- const systemPrompt = buildSystemPrompt(normalizedAnswerOptions);
1340
+ const userPrompt = buildUserPrompt(questions, allowedAnswers, transcriptText, cleanTranscript);
1341
+ const systemPrompt = SYSTEM_PROMPT;
1304
1342
  const imageUrl = await getStoryboardUrl(
1305
1343
  playbackId,
1306
1344
  storyboardWidth,
@@ -2139,7 +2177,7 @@ async function getThumbnailUrls(playbackId, duration, options = {}) {
2139
2177
 
2140
2178
  // src/workflows/moderation.ts
2141
2179
  var DEFAULT_THRESHOLDS = {
2142
- sexual: 0.7,
2180
+ sexual: 0.8,
2143
2181
  violence: 0.8
2144
2182
  };
2145
2183
  var DEFAULT_PROVIDER2 = "openai";
@@ -2581,96 +2619,106 @@ var TONE_INSTRUCTIONS = {
2581
2619
  playful: "Channel your inner diva! Answer with maximum sass, wit, and playful attitude. Don't hold back - be cheeky, clever, and delightfully snarky. Make it pop!",
2582
2620
  professional: "Provide a professional, executive-level analysis suitable for business reporting."
2583
2621
  };
2584
- var summarizationPromptBuilder = createPromptBuilder({
2585
- template: {
2586
- task: {
2587
- tag: "task",
2588
- content: "Analyze the storyboard frames and generate metadata that captures the essence of the video content."
2589
- },
2590
- title: {
2591
- tag: "title_requirements",
2592
- content: dedent4`
2593
- A short, compelling headline that immediately communicates the subject or action.
2594
- Aim for brevity - typically under 10 words. Think of how a news headline or video card title would read.
2595
- Start with the primary subject, action, or topic - never begin with "A video of" or similar phrasing.
2596
- Use active, specific language.`
2597
- },
2598
- description: {
2599
- tag: "description_requirements",
2600
- content: dedent4`
2601
- A concise summary (2-4 sentences) that describes what happens across the video.
2602
- Cover the main subjects, actions, setting, and any notable progression visible across frames.
2603
- Write in present tense. Be specific about observable details rather than making assumptions.
2604
- If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`
2605
- },
2606
- keywords: {
2607
- tag: "keywords_requirements",
2608
- content: dedent4`
2609
- Specific, searchable terms (up to ${SUMMARY_KEYWORD_LIMIT}) that capture:
2610
- - Primary subjects (people, animals, objects)
2611
- - Actions and activities being performed
2612
- - Setting and environment
2613
- - Notable objects or tools
2614
- - Style or genre (if applicable)
2615
- Prefer concrete nouns and action verbs over abstract concepts.
2616
- Use lowercase. Avoid redundant or overly generic terms like "video" or "content".`
2617
- },
2618
- qualityGuidelines: {
2619
- tag: "quality_guidelines",
2620
- content: dedent4`
2621
- - Examine all frames to understand the full context and progression
2622
- - Be precise: "golden retriever" is better than "dog" when identifiable
2623
- - Capture the narrative: what begins, develops, and concludes
2624
- - Balance brevity with informativeness`
2625
- }
2626
- },
2627
- sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
2628
- });
2629
- var audioOnlyPromptBuilder = createPromptBuilder({
2630
- template: {
2631
- task: {
2632
- tag: "task",
2633
- content: "Analyze the transcript and generate metadata that captures the essence of the audio content."
2634
- },
2635
- title: {
2636
- tag: "title_requirements",
2637
- content: dedent4`
2638
- A short, compelling headline that immediately communicates the subject or topic.
2639
- Aim for brevity - typically under 10 words. Think of how a podcast title or audio description would read.
2640
- Start with the primary subject, action, or topic - never begin with "An audio of" or similar phrasing.
2641
- Use active, specific language.`
2642
- },
2643
- description: {
2644
- tag: "description_requirements",
2645
- content: dedent4`
2646
- A concise summary (2-4 sentences) that describes the audio content.
2647
- Cover the main topics, speakers, themes, and any notable progression in the discussion or narration.
2648
- Write in present tense. Be specific about what is discussed or presented rather than making assumptions.
2649
- Focus on the spoken content and any key insights, dialogue, or narrative elements.`
2622
+ function createSummarizationBuilder({ titleLength, descriptionLength, tagCount } = {}) {
2623
+ const titleBrevity = titleLength != null ? `Aim for approximately ${titleLength} characters.` : "Aim for brevity - typically under 10 words.";
2624
+ const descConstraint = descriptionLength != null ? `approximately ${descriptionLength} characters` : "2-4 sentences";
2625
+ const keywordLimit = tagCount ?? SUMMARY_KEYWORD_LIMIT;
2626
+ return createPromptBuilder({
2627
+ template: {
2628
+ task: {
2629
+ tag: "task",
2630
+ content: "Analyze the storyboard frames and generate metadata that captures the essence of the video content."
2631
+ },
2632
+ title: {
2633
+ tag: "title_requirements",
2634
+ content: dedent4`
2635
+ A short, compelling headline that immediately communicates the subject or action.
2636
+ ${titleBrevity} Think of how a news headline or video card title would read.
2637
+ Start with the primary subject, action, or topic - never begin with "A video of" or similar phrasing.
2638
+ Use active, specific language.`
2639
+ },
2640
+ description: {
2641
+ tag: "description_requirements",
2642
+ content: dedent4`
2643
+ A concise summary (${descConstraint}) that describes what happens across the video.
2644
+ Cover the main subjects, actions, setting, and any notable progression visible across frames.
2645
+ Write in present tense. Be specific about observable details rather than making assumptions.
2646
+ If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`
2647
+ },
2648
+ keywords: {
2649
+ tag: "keywords_requirements",
2650
+ content: dedent4`
2651
+ Specific, searchable terms (up to ${keywordLimit}) that capture:
2652
+ - Primary subjects (people, animals, objects)
2653
+ - Actions and activities being performed
2654
+ - Setting and environment
2655
+ - Notable objects or tools
2656
+ - Style or genre (if applicable)
2657
+ Prefer concrete nouns and action verbs over abstract concepts.
2658
+ Use lowercase. Avoid redundant or overly generic terms like "video" or "content".`
2659
+ },
2660
+ qualityGuidelines: {
2661
+ tag: "quality_guidelines",
2662
+ content: dedent4`
2663
+ - Examine all frames to understand the full context and progression
2664
+ - Be precise: "golden retriever" is better than "dog" when identifiable
2665
+ - Capture the narrative: what begins, develops, and concludes
2666
+ - Balance brevity with informativeness`
2667
+ }
2650
2668
  },
2651
- keywords: {
2652
- tag: "keywords_requirements",
2653
- content: dedent4`
2654
- Specific, searchable terms (up to ${SUMMARY_KEYWORD_LIMIT}) that capture:
2655
- - Primary topics and themes
2656
- - Speakers or presenters (if named)
2657
- - Key concepts and terminology
2658
- - Content type (interview, lecture, music, etc.)
2659
- - Genre or style (if applicable)
2660
- Prefer concrete nouns and relevant terms over abstract concepts.
2661
- Use lowercase. Avoid redundant or overly generic terms like "audio" or "content".`
2669
+ sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
2670
+ });
2671
+ }
2672
+ function createAudioOnlyBuilder({ titleLength, descriptionLength, tagCount } = {}) {
2673
+ const titleBrevity = titleLength != null ? `Aim for approximately ${titleLength} characters.` : "Aim for brevity - typically under 10 words.";
2674
+ const descConstraint = descriptionLength != null ? `approximately ${descriptionLength} characters` : "2-4 sentences";
2675
+ const keywordLimit = tagCount ?? SUMMARY_KEYWORD_LIMIT;
2676
+ return createPromptBuilder({
2677
+ template: {
2678
+ task: {
2679
+ tag: "task",
2680
+ content: "Analyze the transcript and generate metadata that captures the essence of the audio content."
2681
+ },
2682
+ title: {
2683
+ tag: "title_requirements",
2684
+ content: dedent4`
2685
+ A short, compelling headline that immediately communicates the subject or topic.
2686
+ ${titleBrevity} Think of how a podcast title or audio description would read.
2687
+ Start with the primary subject, action, or topic - never begin with "An audio of" or similar phrasing.
2688
+ Use active, specific language.`
2689
+ },
2690
+ description: {
2691
+ tag: "description_requirements",
2692
+ content: dedent4`
2693
+ A concise summary (${descConstraint}) that describes the audio content.
2694
+ Cover the main topics, speakers, themes, and any notable progression in the discussion or narration.
2695
+ Write in present tense. Be specific about what is discussed or presented rather than making assumptions.
2696
+ Focus on the spoken content and any key insights, dialogue, or narrative elements.`
2697
+ },
2698
+ keywords: {
2699
+ tag: "keywords_requirements",
2700
+ content: dedent4`
2701
+ Specific, searchable terms (up to ${keywordLimit}) that capture:
2702
+ - Primary topics and themes
2703
+ - Speakers or presenters (if named)
2704
+ - Key concepts and terminology
2705
+ - Content type (interview, lecture, music, etc.)
2706
+ - Genre or style (if applicable)
2707
+ Prefer concrete nouns and relevant terms over abstract concepts.
2708
+ Use lowercase. Avoid redundant or overly generic terms like "audio" or "content".`
2709
+ },
2710
+ qualityGuidelines: {
2711
+ tag: "quality_guidelines",
2712
+ content: dedent4`
2713
+ - Analyze the full transcript to understand context and themes
2714
+ - Be precise: use specific terminology when mentioned
2715
+ - Capture the narrative: what is introduced, discussed, and concluded
2716
+ - Balance brevity with informativeness`
2717
+ }
2662
2718
  },
2663
- qualityGuidelines: {
2664
- tag: "quality_guidelines",
2665
- content: dedent4`
2666
- - Analyze the full transcript to understand context and themes
2667
- - Be precise: use specific terminology when mentioned
2668
- - Capture the narrative: what is introduced, discussed, and concluded
2669
- - Balance brevity with informativeness`
2670
- }
2671
- },
2672
- sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
2673
- });
2719
+ sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
2720
+ });
2721
+ }
2674
2722
  var SYSTEM_PROMPT3 = dedent4`
2675
2723
  <role>
2676
2724
  You are a video content analyst specializing in storyboard interpretation and multimodal analysis.
@@ -2785,14 +2833,18 @@ function buildUserPrompt4({
2785
2833
  transcriptText,
2786
2834
  isCleanTranscript = true,
2787
2835
  promptOverrides,
2788
- isAudioOnly = false
2836
+ isAudioOnly = false,
2837
+ titleLength,
2838
+ descriptionLength,
2839
+ tagCount
2789
2840
  }) {
2790
2841
  const contextSections = [createToneSection(TONE_INSTRUCTIONS[tone])];
2791
2842
  if (transcriptText) {
2792
2843
  const format = isCleanTranscript ? "plain text" : "WebVTT";
2793
2844
  contextSections.push(createTranscriptSection(transcriptText, format));
2794
2845
  }
2795
- const promptBuilder = isAudioOnly ? audioOnlyPromptBuilder : summarizationPromptBuilder;
2846
+ const constraints = { titleLength, descriptionLength, tagCount };
2847
+ const promptBuilder = isAudioOnly ? createAudioOnlyBuilder(constraints) : createSummarizationBuilder(constraints);
2796
2848
  return promptBuilder.buildWithContext(promptOverrides, contextSections);
2797
2849
  }
2798
2850
  async function analyzeStoryboard2(imageDataUrl, provider, modelId, userPrompt, systemPrompt, credentials) {
@@ -2862,7 +2914,7 @@ async function analyzeAudioOnly(provider, modelId, userPrompt, systemPrompt, cre
2862
2914
  }
2863
2915
  };
2864
2916
  }
2865
- function normalizeKeywords(keywords) {
2917
+ function normalizeKeywords(keywords, limit = SUMMARY_KEYWORD_LIMIT) {
2866
2918
  if (!Array.isArray(keywords) || keywords.length === 0) {
2867
2919
  return [];
2868
2920
  }
@@ -2879,7 +2931,7 @@ function normalizeKeywords(keywords) {
2879
2931
  }
2880
2932
  uniqueLowercase.add(lower);
2881
2933
  normalized.push(trimmed);
2882
- if (normalized.length === SUMMARY_KEYWORD_LIMIT) {
2934
+ if (normalized.length === limit) {
2883
2935
  break;
2884
2936
  }
2885
2937
  }
@@ -2896,7 +2948,10 @@ async function getSummaryAndTags(assetId, options) {
2896
2948
  imageSubmissionMode = "url",
2897
2949
  imageDownloadOptions,
2898
2950
  promptOverrides,
2899
- credentials
2951
+ credentials,
2952
+ titleLength,
2953
+ descriptionLength,
2954
+ tagCount
2900
2955
  } = options ?? {};
2901
2956
  if (!VALID_TONES.includes(tone)) {
2902
2957
  throw new Error(
@@ -2934,7 +2989,10 @@ async function getSummaryAndTags(assetId, options) {
2934
2989
  transcriptText,
2935
2990
  isCleanTranscript: cleanTranscript,
2936
2991
  promptOverrides,
2937
- isAudioOnly
2992
+ isAudioOnly,
2993
+ titleLength,
2994
+ descriptionLength,
2995
+ tagCount
2938
2996
  });
2939
2997
  let analysisResponse;
2940
2998
  let imageUrl;
@@ -2991,7 +3049,7 @@ async function getSummaryAndTags(assetId, options) {
2991
3049
  assetId,
2992
3050
  title: analysisResponse.result.title,
2993
3051
  description: analysisResponse.result.description,
2994
- tags: normalizeKeywords(analysisResponse.result.keywords),
3052
+ tags: normalizeKeywords(analysisResponse.result.keywords, tagCount ?? SUMMARY_KEYWORD_LIMIT),
2995
3053
  storyboardUrl: imageUrl,
2996
3054
  // undefined for audio-only assets
2997
3055
  usage: {
@@ -3823,6 +3881,7 @@ import { z as z6 } from "zod";
3823
3881
  var translationSchema = z6.object({
3824
3882
  translation: z6.string()
3825
3883
  });
3884
+ var SYSTEM_PROMPT4 = 'You are a subtitle translation expert. Translate VTT subtitle files to the target language specified by the user. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT content.';
3826
3885
  async function fetchVttFromMux(vttUrl) {
3827
3886
  "use step";
3828
3887
  const vttResponse = await fetch(vttUrl);
@@ -3845,9 +3904,13 @@ async function translateVttWithAI({
3845
3904
  model,
3846
3905
  output: Output5.object({ schema: translationSchema }),
3847
3906
  messages: [
3907
+ {
3908
+ role: "system",
3909
+ content: SYSTEM_PROMPT4
3910
+ },
3848
3911
  {
3849
3912
  role: "user",
3850
- content: `Translate the following VTT subtitle file from ${fromLanguageCode} to ${toLanguageCode}. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT.
3913
+ content: `Translate from ${fromLanguageCode} to ${toLanguageCode}:
3851
3914
 
3852
3915
  ${vttContent}`
3853
3916
  }