@mux/ai 0.7.5 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -138,6 +138,10 @@ var EnvSchema = z.object({
138
138
  ),
139
139
  MUX_SIGNING_KEY: optionalString("Mux signing key ID for signed playback URLs.", "Used to sign playback URLs"),
140
140
  MUX_PRIVATE_KEY: optionalString("Mux signing private key for signed playback URLs.", "Used to sign playback URLs"),
141
+ MUX_IMAGE_URL_OVERRIDE: optionalString(
142
+ "Override for Mux image base URL (defaults to https://image.mux.com).",
143
+ "Mux image URL override"
144
+ ),
141
145
  // Test-only helpers (used by this repo's integration tests)
142
146
  MUX_TEST_ASSET_ID: optionalString("Mux asset ID used by integration tests.", "Mux test asset id"),
143
147
  MUX_TEST_ASSET_ID_CHAPTERS: optionalString("Mux asset ID used by integration tests for chapters.", "Mux test asset id for chapters"),
@@ -835,6 +839,44 @@ async function withRetry(fn, {
835
839
  throw lastError || new Error("Retry failed with unknown error");
836
840
  }
837
841
 
842
+ // src/lib/mux-image-url.ts
843
+ var DEFAULT_MUX_IMAGE_ORIGIN = "https://image.mux.com";
844
+ function normalizeMuxImageOrigin(value) {
845
+ const trimmed = value.trim();
846
+ const candidate = trimmed.includes("://") ? trimmed : `https://${trimmed}`;
847
+ let parsed;
848
+ try {
849
+ parsed = new URL(candidate);
850
+ } catch {
851
+ throw new Error(
852
+ `Invalid MUX_IMAGE_URL_OVERRIDE. Provide a hostname like "image.example.mux.com" (or a URL origin such as "https://image.example.mux.com").`
853
+ );
854
+ }
855
+ if (parsed.username || parsed.password || parsed.search || parsed.hash || parsed.pathname && parsed.pathname !== "/") {
856
+ throw new Error(
857
+ "Invalid MUX_IMAGE_URL_OVERRIDE. Only a hostname/origin is allowed (no credentials, query params, hash fragments, or path)."
858
+ );
859
+ }
860
+ return parsed.origin;
861
+ }
862
+ function getMuxImageOrigin() {
863
+ const override = env_default.MUX_IMAGE_URL_OVERRIDE;
864
+ if (!override) {
865
+ return DEFAULT_MUX_IMAGE_ORIGIN;
866
+ }
867
+ return normalizeMuxImageOrigin(override);
868
+ }
869
+ function getMuxImageBaseUrl(playbackId, assetType) {
870
+ const origin = getMuxImageOrigin();
871
+ return `${origin}/${playbackId}/${assetType}.png`;
872
+ }
873
+ function getMuxStoryboardBaseUrl(playbackId) {
874
+ return getMuxImageBaseUrl(playbackId, "storyboard");
875
+ }
876
+ function getMuxThumbnailBaseUrl(playbackId) {
877
+ return getMuxImageBaseUrl(playbackId, "thumbnail");
878
+ }
879
+
838
880
  // src/lib/url-signing.ts
839
881
  async function createSigningClient(context) {
840
882
  const { default: MuxClient } = await import("@mux/mux-node");
@@ -876,7 +918,7 @@ async function signUrl(url, playbackId, type = "video", params, credentials) {
876
918
  var DEFAULT_STORYBOARD_WIDTH = 640;
877
919
  async function getStoryboardUrl(playbackId, width = DEFAULT_STORYBOARD_WIDTH, shouldSign = false, credentials) {
878
920
  "use step";
879
- const baseUrl = `https://image.mux.com/${playbackId}/storyboard.png`;
921
+ const baseUrl = getMuxStoryboardBaseUrl(playbackId);
880
922
  if (shouldSign) {
881
923
  return signUrl(baseUrl, playbackId, "storyboard", { width }, credentials);
882
924
  }
@@ -1128,16 +1170,6 @@ var SYSTEM_PROMPT = dedent`
1128
1170
  - GOOD: "A person runs through a park"
1129
1171
  - Be specific and evidence-based
1130
1172
  </language_guidelines>`;
1131
- function buildSystemPrompt(allowedAnswers) {
1132
- const answerList = allowedAnswers.map((answer) => `"${answer}"`).join(", ");
1133
- return `${SYSTEM_PROMPT}
1134
-
1135
- ${dedent`
1136
- <response_options>
1137
- Allowed answers: ${answerList}
1138
- </response_options>
1139
- `}`;
1140
- }
1141
1173
  var askQuestionsPromptBuilder = createPromptBuilder({
1142
1174
  template: {
1143
1175
  questions: {
@@ -1147,21 +1179,30 @@ var askQuestionsPromptBuilder = createPromptBuilder({
1147
1179
  },
1148
1180
  sectionOrder: ["questions"]
1149
1181
  });
1150
- function buildUserPrompt(questions, transcriptText, isCleanTranscript = true) {
1182
+ function buildUserPrompt(questions, allowedAnswers, transcriptText, isCleanTranscript = true) {
1151
1183
  const questionsList = questions.map((q, idx) => `${idx + 1}. ${q.question}`).join("\n");
1152
1184
  const questionsContent = dedent`
1153
1185
  Please answer the following yes/no questions about this video:
1154
1186
 
1155
1187
  ${questionsList}`;
1188
+ const answerList = allowedAnswers.map((answer) => `"${answer}"`).join(", ");
1189
+ const responseOptions = dedent`
1190
+ <response_options>
1191
+ Allowed answers: ${answerList}
1192
+ </response_options>`;
1193
+ const questionsSection = askQuestionsPromptBuilder.build({ questions: questionsContent });
1156
1194
  if (!transcriptText) {
1157
- return askQuestionsPromptBuilder.build({ questions: questionsContent });
1195
+ return `${questionsSection}
1196
+
1197
+ ${responseOptions}`;
1158
1198
  }
1159
1199
  const format = isCleanTranscript ? "plain text" : "WebVTT";
1160
- const transcriptSection = createTranscriptSection(transcriptText, format);
1161
- return askQuestionsPromptBuilder.buildWithContext(
1162
- { questions: questionsContent },
1163
- [transcriptSection]
1164
- );
1200
+ const transcriptSection = renderSection(createTranscriptSection(transcriptText, format));
1201
+ return `${transcriptSection}
1202
+
1203
+ ${questionsSection}
1204
+
1205
+ ${responseOptions}`;
1165
1206
  }
1166
1207
  async function fetchImageAsBase64(imageUrl, imageDownloadOptions) {
1167
1208
  "use step";
@@ -1257,8 +1298,8 @@ async function askQuestions(assetId, questions, options) {
1257
1298
  cleanTranscript,
1258
1299
  shouldSign: policy === "signed"
1259
1300
  })).transcriptText : "";
1260
- const userPrompt = buildUserPrompt(questions, transcriptText, cleanTranscript);
1261
- const systemPrompt = buildSystemPrompt(normalizedAnswerOptions);
1301
+ const userPrompt = buildUserPrompt(questions, allowedAnswers, transcriptText, cleanTranscript);
1302
+ const systemPrompt = SYSTEM_PROMPT;
1262
1303
  const imageUrl = await getStoryboardUrl(
1263
1304
  playbackId,
1264
1305
  storyboardWidth,
@@ -2085,7 +2126,7 @@ async function getThumbnailUrls(playbackId, duration, options = {}) {
2085
2126
  }
2086
2127
  timestamps = newTimestamps;
2087
2128
  }
2088
- const baseUrl = `https://image.mux.com/${playbackId}/thumbnail.png`;
2129
+ const baseUrl = getMuxThumbnailBaseUrl(playbackId);
2089
2130
  const urlPromises = timestamps.map(async (time) => {
2090
2131
  if (shouldSign) {
2091
2132
  return signUrl(baseUrl, playbackId, "thumbnail", { time, width }, credentials);
@@ -2097,7 +2138,7 @@ async function getThumbnailUrls(playbackId, duration, options = {}) {
2097
2138
 
2098
2139
  // src/workflows/moderation.ts
2099
2140
  var DEFAULT_THRESHOLDS = {
2100
- sexual: 0.7,
2141
+ sexual: 0.8,
2101
2142
  violence: 0.8
2102
2143
  };
2103
2144
  var DEFAULT_PROVIDER2 = "openai";
@@ -2362,7 +2403,7 @@ async function requestHiveModeration(imageUrls, maxConcurrent = 5, submissionMod
2362
2403
  async function getThumbnailUrlsFromTimestamps(playbackId, timestampsMs, options) {
2363
2404
  "use step";
2364
2405
  const { width, shouldSign, credentials } = options;
2365
- const baseUrl = `https://image.mux.com/${playbackId}/thumbnail.png`;
2406
+ const baseUrl = getMuxThumbnailBaseUrl(playbackId);
2366
2407
  const urlPromises = timestampsMs.map(async (tsMs) => {
2367
2408
  const time = Number((tsMs / 1e3).toFixed(2));
2368
2409
  if (shouldSign) {
@@ -2539,96 +2580,106 @@ var TONE_INSTRUCTIONS = {
2539
2580
  playful: "Channel your inner diva! Answer with maximum sass, wit, and playful attitude. Don't hold back - be cheeky, clever, and delightfully snarky. Make it pop!",
2540
2581
  professional: "Provide a professional, executive-level analysis suitable for business reporting."
2541
2582
  };
2542
- var summarizationPromptBuilder = createPromptBuilder({
2543
- template: {
2544
- task: {
2545
- tag: "task",
2546
- content: "Analyze the storyboard frames and generate metadata that captures the essence of the video content."
2547
- },
2548
- title: {
2549
- tag: "title_requirements",
2550
- content: dedent4`
2551
- A short, compelling headline that immediately communicates the subject or action.
2552
- Aim for brevity - typically under 10 words. Think of how a news headline or video card title would read.
2553
- Start with the primary subject, action, or topic - never begin with "A video of" or similar phrasing.
2554
- Use active, specific language.`
2555
- },
2556
- description: {
2557
- tag: "description_requirements",
2558
- content: dedent4`
2559
- A concise summary (2-4 sentences) that describes what happens across the video.
2560
- Cover the main subjects, actions, setting, and any notable progression visible across frames.
2561
- Write in present tense. Be specific about observable details rather than making assumptions.
2562
- If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`
2563
- },
2564
- keywords: {
2565
- tag: "keywords_requirements",
2566
- content: dedent4`
2567
- Specific, searchable terms (up to ${SUMMARY_KEYWORD_LIMIT}) that capture:
2568
- - Primary subjects (people, animals, objects)
2569
- - Actions and activities being performed
2570
- - Setting and environment
2571
- - Notable objects or tools
2572
- - Style or genre (if applicable)
2573
- Prefer concrete nouns and action verbs over abstract concepts.
2574
- Use lowercase. Avoid redundant or overly generic terms like "video" or "content".`
2575
- },
2576
- qualityGuidelines: {
2577
- tag: "quality_guidelines",
2578
- content: dedent4`
2579
- - Examine all frames to understand the full context and progression
2580
- - Be precise: "golden retriever" is better than "dog" when identifiable
2581
- - Capture the narrative: what begins, develops, and concludes
2582
- - Balance brevity with informativeness`
2583
- }
2584
- },
2585
- sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
2586
- });
2587
- var audioOnlyPromptBuilder = createPromptBuilder({
2588
- template: {
2589
- task: {
2590
- tag: "task",
2591
- content: "Analyze the transcript and generate metadata that captures the essence of the audio content."
2592
- },
2593
- title: {
2594
- tag: "title_requirements",
2595
- content: dedent4`
2596
- A short, compelling headline that immediately communicates the subject or topic.
2597
- Aim for brevity - typically under 10 words. Think of how a podcast title or audio description would read.
2598
- Start with the primary subject, action, or topic - never begin with "An audio of" or similar phrasing.
2599
- Use active, specific language.`
2600
- },
2601
- description: {
2602
- tag: "description_requirements",
2603
- content: dedent4`
2604
- A concise summary (2-4 sentences) that describes the audio content.
2605
- Cover the main topics, speakers, themes, and any notable progression in the discussion or narration.
2606
- Write in present tense. Be specific about what is discussed or presented rather than making assumptions.
2607
- Focus on the spoken content and any key insights, dialogue, or narrative elements.`
2583
+ function createSummarizationBuilder({ titleLength, descriptionLength, tagCount } = {}) {
2584
+ const titleBrevity = titleLength != null ? `Aim for approximately ${titleLength} characters.` : "Aim for brevity - typically under 10 words.";
2585
+ const descConstraint = descriptionLength != null ? `approximately ${descriptionLength} characters` : "2-4 sentences";
2586
+ const keywordLimit = tagCount ?? SUMMARY_KEYWORD_LIMIT;
2587
+ return createPromptBuilder({
2588
+ template: {
2589
+ task: {
2590
+ tag: "task",
2591
+ content: "Analyze the storyboard frames and generate metadata that captures the essence of the video content."
2592
+ },
2593
+ title: {
2594
+ tag: "title_requirements",
2595
+ content: dedent4`
2596
+ A short, compelling headline that immediately communicates the subject or action.
2597
+ ${titleBrevity} Think of how a news headline or video card title would read.
2598
+ Start with the primary subject, action, or topic - never begin with "A video of" or similar phrasing.
2599
+ Use active, specific language.`
2600
+ },
2601
+ description: {
2602
+ tag: "description_requirements",
2603
+ content: dedent4`
2604
+ A concise summary (${descConstraint}) that describes what happens across the video.
2605
+ Cover the main subjects, actions, setting, and any notable progression visible across frames.
2606
+ Write in present tense. Be specific about observable details rather than making assumptions.
2607
+ If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`
2608
+ },
2609
+ keywords: {
2610
+ tag: "keywords_requirements",
2611
+ content: dedent4`
2612
+ Specific, searchable terms (up to ${keywordLimit}) that capture:
2613
+ - Primary subjects (people, animals, objects)
2614
+ - Actions and activities being performed
2615
+ - Setting and environment
2616
+ - Notable objects or tools
2617
+ - Style or genre (if applicable)
2618
+ Prefer concrete nouns and action verbs over abstract concepts.
2619
+ Use lowercase. Avoid redundant or overly generic terms like "video" or "content".`
2620
+ },
2621
+ qualityGuidelines: {
2622
+ tag: "quality_guidelines",
2623
+ content: dedent4`
2624
+ - Examine all frames to understand the full context and progression
2625
+ - Be precise: "golden retriever" is better than "dog" when identifiable
2626
+ - Capture the narrative: what begins, develops, and concludes
2627
+ - Balance brevity with informativeness`
2628
+ }
2608
2629
  },
2609
- keywords: {
2610
- tag: "keywords_requirements",
2611
- content: dedent4`
2612
- Specific, searchable terms (up to ${SUMMARY_KEYWORD_LIMIT}) that capture:
2613
- - Primary topics and themes
2614
- - Speakers or presenters (if named)
2615
- - Key concepts and terminology
2616
- - Content type (interview, lecture, music, etc.)
2617
- - Genre or style (if applicable)
2618
- Prefer concrete nouns and relevant terms over abstract concepts.
2619
- Use lowercase. Avoid redundant or overly generic terms like "audio" or "content".`
2630
+ sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
2631
+ });
2632
+ }
2633
+ function createAudioOnlyBuilder({ titleLength, descriptionLength, tagCount } = {}) {
2634
+ const titleBrevity = titleLength != null ? `Aim for approximately ${titleLength} characters.` : "Aim for brevity - typically under 10 words.";
2635
+ const descConstraint = descriptionLength != null ? `approximately ${descriptionLength} characters` : "2-4 sentences";
2636
+ const keywordLimit = tagCount ?? SUMMARY_KEYWORD_LIMIT;
2637
+ return createPromptBuilder({
2638
+ template: {
2639
+ task: {
2640
+ tag: "task",
2641
+ content: "Analyze the transcript and generate metadata that captures the essence of the audio content."
2642
+ },
2643
+ title: {
2644
+ tag: "title_requirements",
2645
+ content: dedent4`
2646
+ A short, compelling headline that immediately communicates the subject or topic.
2647
+ ${titleBrevity} Think of how a podcast title or audio description would read.
2648
+ Start with the primary subject, action, or topic - never begin with "An audio of" or similar phrasing.
2649
+ Use active, specific language.`
2650
+ },
2651
+ description: {
2652
+ tag: "description_requirements",
2653
+ content: dedent4`
2654
+ A concise summary (${descConstraint}) that describes the audio content.
2655
+ Cover the main topics, speakers, themes, and any notable progression in the discussion or narration.
2656
+ Write in present tense. Be specific about what is discussed or presented rather than making assumptions.
2657
+ Focus on the spoken content and any key insights, dialogue, or narrative elements.`
2658
+ },
2659
+ keywords: {
2660
+ tag: "keywords_requirements",
2661
+ content: dedent4`
2662
+ Specific, searchable terms (up to ${keywordLimit}) that capture:
2663
+ - Primary topics and themes
2664
+ - Speakers or presenters (if named)
2665
+ - Key concepts and terminology
2666
+ - Content type (interview, lecture, music, etc.)
2667
+ - Genre or style (if applicable)
2668
+ Prefer concrete nouns and relevant terms over abstract concepts.
2669
+ Use lowercase. Avoid redundant or overly generic terms like "audio" or "content".`
2670
+ },
2671
+ qualityGuidelines: {
2672
+ tag: "quality_guidelines",
2673
+ content: dedent4`
2674
+ - Analyze the full transcript to understand context and themes
2675
+ - Be precise: use specific terminology when mentioned
2676
+ - Capture the narrative: what is introduced, discussed, and concluded
2677
+ - Balance brevity with informativeness`
2678
+ }
2620
2679
  },
2621
- qualityGuidelines: {
2622
- tag: "quality_guidelines",
2623
- content: dedent4`
2624
- - Analyze the full transcript to understand context and themes
2625
- - Be precise: use specific terminology when mentioned
2626
- - Capture the narrative: what is introduced, discussed, and concluded
2627
- - Balance brevity with informativeness`
2628
- }
2629
- },
2630
- sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
2631
- });
2680
+ sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
2681
+ });
2682
+ }
2632
2683
  var SYSTEM_PROMPT3 = dedent4`
2633
2684
  <role>
2634
2685
  You are a video content analyst specializing in storyboard interpretation and multimodal analysis.
@@ -2743,14 +2794,18 @@ function buildUserPrompt4({
2743
2794
  transcriptText,
2744
2795
  isCleanTranscript = true,
2745
2796
  promptOverrides,
2746
- isAudioOnly = false
2797
+ isAudioOnly = false,
2798
+ titleLength,
2799
+ descriptionLength,
2800
+ tagCount
2747
2801
  }) {
2748
2802
  const contextSections = [createToneSection(TONE_INSTRUCTIONS[tone])];
2749
2803
  if (transcriptText) {
2750
2804
  const format = isCleanTranscript ? "plain text" : "WebVTT";
2751
2805
  contextSections.push(createTranscriptSection(transcriptText, format));
2752
2806
  }
2753
- const promptBuilder = isAudioOnly ? audioOnlyPromptBuilder : summarizationPromptBuilder;
2807
+ const constraints = { titleLength, descriptionLength, tagCount };
2808
+ const promptBuilder = isAudioOnly ? createAudioOnlyBuilder(constraints) : createSummarizationBuilder(constraints);
2754
2809
  return promptBuilder.buildWithContext(promptOverrides, contextSections);
2755
2810
  }
2756
2811
  async function analyzeStoryboard2(imageDataUrl, provider, modelId, userPrompt, systemPrompt, credentials) {
@@ -2820,7 +2875,7 @@ async function analyzeAudioOnly(provider, modelId, userPrompt, systemPrompt, cre
2820
2875
  }
2821
2876
  };
2822
2877
  }
2823
- function normalizeKeywords(keywords) {
2878
+ function normalizeKeywords(keywords, limit = SUMMARY_KEYWORD_LIMIT) {
2824
2879
  if (!Array.isArray(keywords) || keywords.length === 0) {
2825
2880
  return [];
2826
2881
  }
@@ -2837,7 +2892,7 @@ function normalizeKeywords(keywords) {
2837
2892
  }
2838
2893
  uniqueLowercase.add(lower);
2839
2894
  normalized.push(trimmed);
2840
- if (normalized.length === SUMMARY_KEYWORD_LIMIT) {
2895
+ if (normalized.length === limit) {
2841
2896
  break;
2842
2897
  }
2843
2898
  }
@@ -2854,7 +2909,10 @@ async function getSummaryAndTags(assetId, options) {
2854
2909
  imageSubmissionMode = "url",
2855
2910
  imageDownloadOptions,
2856
2911
  promptOverrides,
2857
- credentials
2912
+ credentials,
2913
+ titleLength,
2914
+ descriptionLength,
2915
+ tagCount
2858
2916
  } = options ?? {};
2859
2917
  if (!VALID_TONES.includes(tone)) {
2860
2918
  throw new Error(
@@ -2892,7 +2950,10 @@ async function getSummaryAndTags(assetId, options) {
2892
2950
  transcriptText,
2893
2951
  isCleanTranscript: cleanTranscript,
2894
2952
  promptOverrides,
2895
- isAudioOnly
2953
+ isAudioOnly,
2954
+ titleLength,
2955
+ descriptionLength,
2956
+ tagCount
2896
2957
  });
2897
2958
  let analysisResponse;
2898
2959
  let imageUrl;
@@ -2949,7 +3010,7 @@ async function getSummaryAndTags(assetId, options) {
2949
3010
  assetId,
2950
3011
  title: analysisResponse.result.title,
2951
3012
  description: analysisResponse.result.description,
2952
- tags: normalizeKeywords(analysisResponse.result.keywords),
3013
+ tags: normalizeKeywords(analysisResponse.result.keywords, tagCount ?? SUMMARY_KEYWORD_LIMIT),
2953
3014
  storyboardUrl: imageUrl,
2954
3015
  // undefined for audio-only assets
2955
3016
  usage: {
@@ -3781,6 +3842,7 @@ import { z as z6 } from "zod";
3781
3842
  var translationSchema = z6.object({
3782
3843
  translation: z6.string()
3783
3844
  });
3845
+ var SYSTEM_PROMPT4 = 'You are a subtitle translation expert. Translate VTT subtitle files to the target language specified by the user. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT content.';
3784
3846
  async function fetchVttFromMux(vttUrl) {
3785
3847
  "use step";
3786
3848
  const vttResponse = await fetch(vttUrl);
@@ -3803,9 +3865,13 @@ async function translateVttWithAI({
3803
3865
  model,
3804
3866
  output: Output5.object({ schema: translationSchema }),
3805
3867
  messages: [
3868
+ {
3869
+ role: "system",
3870
+ content: SYSTEM_PROMPT4
3871
+ },
3806
3872
  {
3807
3873
  role: "user",
3808
- content: `Translate the following VTT subtitle file from ${fromLanguageCode} to ${toLanguageCode}. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT.
3874
+ content: `Translate from ${fromLanguageCode} to ${toLanguageCode}:
3809
3875
 
3810
3876
  ${vttContent}`
3811
3877
  }