@mux/ai 0.7.6 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -523,6 +523,12 @@ interface SummarizationOptions extends MuxAIOptions {
523
523
  * Useful for customizing the AI's output for specific use cases (SEO, social media, etc.)
524
524
  */
525
525
  promptOverrides?: SummarizationPromptOverrides;
526
+ /** Desired title length in characters. */
527
+ titleLength?: number;
528
+ /** Desired description length in characters. */
529
+ descriptionLength?: number;
530
+ /** Desired number of tags. */
531
+ tagCount?: number;
526
532
  }
527
533
  declare function getSummaryAndTags(assetId: string, options?: SummarizationOptions): Promise<SummaryAndTagsResult>;
528
534
 
package/dist/index.d.ts CHANGED
@@ -2,14 +2,14 @@ import { W as WorkflowCredentials, S as StoragePutObjectInput, a as StoragePresi
2
2
  export { A as AssetTextTrack, C as ChunkEmbedding, b as ChunkingStrategy, E as Encrypted, c as EncryptedPayload, I as ImageSubmissionMode, M as MuxAIOptions, d as MuxAsset, P as PlaybackAsset, e as PlaybackPolicy, f as StorageAdapter, T as TextChunk, g as TokenChunkingConfig, h as TokenUsage, i as ToneType, U as UsageMetadata, V as VTTChunkingConfig, j as VideoEmbeddingsResult, k as WorkflowCredentialsInput, l as WorkflowMuxClient, m as decryptFromWorkflow, n as encryptForWorkflow } from './types-BRbaGW3t.js';
3
3
  import { WORKFLOW_SERIALIZE, WORKFLOW_DESERIALIZE } from '@workflow/serde';
4
4
  export { i as primitives } from './index-Nxf6BaBO.js';
5
- export { i as workflows } from './index-B0U9upb4.js';
5
+ export { i as workflows } from './index-DP02N3iR.js';
6
6
  import '@mux/mux-node';
7
7
  import 'zod';
8
8
  import '@ai-sdk/anthropic';
9
9
  import '@ai-sdk/google';
10
10
  import '@ai-sdk/openai';
11
11
 
12
- var version = "0.7.6";
12
+ var version = "0.8.0";
13
13
 
14
14
  /**
15
15
  * A function that returns workflow credentials, either synchronously or asynchronously.
package/dist/index.js CHANGED
@@ -5,7 +5,7 @@ var __export = (target, all) => {
5
5
  };
6
6
 
7
7
  // package.json
8
- var version = "0.7.6";
8
+ var version = "0.8.0";
9
9
 
10
10
  // src/env.ts
11
11
  import { z } from "zod";
@@ -1798,16 +1798,6 @@ var SYSTEM_PROMPT = dedent`
1798
1798
  - GOOD: "A person runs through a park"
1799
1799
  - Be specific and evidence-based
1800
1800
  </language_guidelines>`;
1801
- function buildSystemPrompt(allowedAnswers) {
1802
- const answerList = allowedAnswers.map((answer) => `"${answer}"`).join(", ");
1803
- return `${SYSTEM_PROMPT}
1804
-
1805
- ${dedent`
1806
- <response_options>
1807
- Allowed answers: ${answerList}
1808
- </response_options>
1809
- `}`;
1810
- }
1811
1801
  var askQuestionsPromptBuilder = createPromptBuilder({
1812
1802
  template: {
1813
1803
  questions: {
@@ -1817,21 +1807,30 @@ var askQuestionsPromptBuilder = createPromptBuilder({
1817
1807
  },
1818
1808
  sectionOrder: ["questions"]
1819
1809
  });
1820
- function buildUserPrompt(questions, transcriptText, isCleanTranscript = true) {
1810
+ function buildUserPrompt(questions, allowedAnswers, transcriptText, isCleanTranscript = true) {
1821
1811
  const questionsList = questions.map((q, idx) => `${idx + 1}. ${q.question}`).join("\n");
1822
1812
  const questionsContent = dedent`
1823
1813
  Please answer the following yes/no questions about this video:
1824
1814
 
1825
1815
  ${questionsList}`;
1816
+ const answerList = allowedAnswers.map((answer) => `"${answer}"`).join(", ");
1817
+ const responseOptions = dedent`
1818
+ <response_options>
1819
+ Allowed answers: ${answerList}
1820
+ </response_options>`;
1821
+ const questionsSection = askQuestionsPromptBuilder.build({ questions: questionsContent });
1826
1822
  if (!transcriptText) {
1827
- return askQuestionsPromptBuilder.build({ questions: questionsContent });
1823
+ return `${questionsSection}
1824
+
1825
+ ${responseOptions}`;
1828
1826
  }
1829
1827
  const format = isCleanTranscript ? "plain text" : "WebVTT";
1830
- const transcriptSection = createTranscriptSection(transcriptText, format);
1831
- return askQuestionsPromptBuilder.buildWithContext(
1832
- { questions: questionsContent },
1833
- [transcriptSection]
1834
- );
1828
+ const transcriptSection = renderSection(createTranscriptSection(transcriptText, format));
1829
+ return `${transcriptSection}
1830
+
1831
+ ${questionsSection}
1832
+
1833
+ ${responseOptions}`;
1835
1834
  }
1836
1835
  async function fetchImageAsBase64(imageUrl, imageDownloadOptions) {
1837
1836
  "use step";
@@ -1927,8 +1926,8 @@ async function askQuestions(assetId, questions, options) {
1927
1926
  cleanTranscript,
1928
1927
  shouldSign: policy === "signed"
1929
1928
  })).transcriptText : "";
1930
- const userPrompt = buildUserPrompt(questions, transcriptText, cleanTranscript);
1931
- const systemPrompt = buildSystemPrompt(normalizedAnswerOptions);
1929
+ const userPrompt = buildUserPrompt(questions, allowedAnswers, transcriptText, cleanTranscript);
1930
+ const systemPrompt = SYSTEM_PROMPT;
1932
1931
  const imageUrl = await getStoryboardUrl(
1933
1932
  playbackId,
1934
1933
  storyboardWidth,
@@ -2643,7 +2642,7 @@ function planSamplingTimestamps(options) {
2643
2642
 
2644
2643
  // src/workflows/moderation.ts
2645
2644
  var DEFAULT_THRESHOLDS = {
2646
- sexual: 0.7,
2645
+ sexual: 0.8,
2647
2646
  violence: 0.8
2648
2647
  };
2649
2648
  var DEFAULT_PROVIDER2 = "openai";
@@ -3085,96 +3084,106 @@ var TONE_INSTRUCTIONS = {
3085
3084
  playful: "Channel your inner diva! Answer with maximum sass, wit, and playful attitude. Don't hold back - be cheeky, clever, and delightfully snarky. Make it pop!",
3086
3085
  professional: "Provide a professional, executive-level analysis suitable for business reporting."
3087
3086
  };
3088
- var summarizationPromptBuilder = createPromptBuilder({
3089
- template: {
3090
- task: {
3091
- tag: "task",
3092
- content: "Analyze the storyboard frames and generate metadata that captures the essence of the video content."
3093
- },
3094
- title: {
3095
- tag: "title_requirements",
3096
- content: dedent4`
3097
- A short, compelling headline that immediately communicates the subject or action.
3098
- Aim for brevity - typically under 10 words. Think of how a news headline or video card title would read.
3099
- Start with the primary subject, action, or topic - never begin with "A video of" or similar phrasing.
3100
- Use active, specific language.`
3101
- },
3102
- description: {
3103
- tag: "description_requirements",
3104
- content: dedent4`
3105
- A concise summary (2-4 sentences) that describes what happens across the video.
3106
- Cover the main subjects, actions, setting, and any notable progression visible across frames.
3107
- Write in present tense. Be specific about observable details rather than making assumptions.
3108
- If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`
3109
- },
3110
- keywords: {
3111
- tag: "keywords_requirements",
3112
- content: dedent4`
3113
- Specific, searchable terms (up to ${SUMMARY_KEYWORD_LIMIT}) that capture:
3114
- - Primary subjects (people, animals, objects)
3115
- - Actions and activities being performed
3116
- - Setting and environment
3117
- - Notable objects or tools
3118
- - Style or genre (if applicable)
3119
- Prefer concrete nouns and action verbs over abstract concepts.
3120
- Use lowercase. Avoid redundant or overly generic terms like "video" or "content".`
3121
- },
3122
- qualityGuidelines: {
3123
- tag: "quality_guidelines",
3124
- content: dedent4`
3125
- - Examine all frames to understand the full context and progression
3126
- - Be precise: "golden retriever" is better than "dog" when identifiable
3127
- - Capture the narrative: what begins, develops, and concludes
3128
- - Balance brevity with informativeness`
3129
- }
3130
- },
3131
- sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
3132
- });
3133
- var audioOnlyPromptBuilder = createPromptBuilder({
3134
- template: {
3135
- task: {
3136
- tag: "task",
3137
- content: "Analyze the transcript and generate metadata that captures the essence of the audio content."
3138
- },
3139
- title: {
3140
- tag: "title_requirements",
3141
- content: dedent4`
3142
- A short, compelling headline that immediately communicates the subject or topic.
3143
- Aim for brevity - typically under 10 words. Think of how a podcast title or audio description would read.
3144
- Start with the primary subject, action, or topic - never begin with "An audio of" or similar phrasing.
3145
- Use active, specific language.`
3146
- },
3147
- description: {
3148
- tag: "description_requirements",
3149
- content: dedent4`
3150
- A concise summary (2-4 sentences) that describes the audio content.
3151
- Cover the main topics, speakers, themes, and any notable progression in the discussion or narration.
3152
- Write in present tense. Be specific about what is discussed or presented rather than making assumptions.
3153
- Focus on the spoken content and any key insights, dialogue, or narrative elements.`
3087
+ function createSummarizationBuilder({ titleLength, descriptionLength, tagCount } = {}) {
3088
+ const titleBrevity = titleLength != null ? `Aim for approximately ${titleLength} characters.` : "Aim for brevity - typically under 10 words.";
3089
+ const descConstraint = descriptionLength != null ? `approximately ${descriptionLength} characters` : "2-4 sentences";
3090
+ const keywordLimit = tagCount ?? SUMMARY_KEYWORD_LIMIT;
3091
+ return createPromptBuilder({
3092
+ template: {
3093
+ task: {
3094
+ tag: "task",
3095
+ content: "Analyze the storyboard frames and generate metadata that captures the essence of the video content."
3096
+ },
3097
+ title: {
3098
+ tag: "title_requirements",
3099
+ content: dedent4`
3100
+ A short, compelling headline that immediately communicates the subject or action.
3101
+ ${titleBrevity} Think of how a news headline or video card title would read.
3102
+ Start with the primary subject, action, or topic - never begin with "A video of" or similar phrasing.
3103
+ Use active, specific language.`
3104
+ },
3105
+ description: {
3106
+ tag: "description_requirements",
3107
+ content: dedent4`
3108
+ A concise summary (${descConstraint}) that describes what happens across the video.
3109
+ Cover the main subjects, actions, setting, and any notable progression visible across frames.
3110
+ Write in present tense. Be specific about observable details rather than making assumptions.
3111
+ If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`
3112
+ },
3113
+ keywords: {
3114
+ tag: "keywords_requirements",
3115
+ content: dedent4`
3116
+ Specific, searchable terms (up to ${keywordLimit}) that capture:
3117
+ - Primary subjects (people, animals, objects)
3118
+ - Actions and activities being performed
3119
+ - Setting and environment
3120
+ - Notable objects or tools
3121
+ - Style or genre (if applicable)
3122
+ Prefer concrete nouns and action verbs over abstract concepts.
3123
+ Use lowercase. Avoid redundant or overly generic terms like "video" or "content".`
3124
+ },
3125
+ qualityGuidelines: {
3126
+ tag: "quality_guidelines",
3127
+ content: dedent4`
3128
+ - Examine all frames to understand the full context and progression
3129
+ - Be precise: "golden retriever" is better than "dog" when identifiable
3130
+ - Capture the narrative: what begins, develops, and concludes
3131
+ - Balance brevity with informativeness`
3132
+ }
3154
3133
  },
3155
- keywords: {
3156
- tag: "keywords_requirements",
3157
- content: dedent4`
3158
- Specific, searchable terms (up to ${SUMMARY_KEYWORD_LIMIT}) that capture:
3159
- - Primary topics and themes
3160
- - Speakers or presenters (if named)
3161
- - Key concepts and terminology
3162
- - Content type (interview, lecture, music, etc.)
3163
- - Genre or style (if applicable)
3164
- Prefer concrete nouns and relevant terms over abstract concepts.
3165
- Use lowercase. Avoid redundant or overly generic terms like "audio" or "content".`
3134
+ sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
3135
+ });
3136
+ }
3137
+ function createAudioOnlyBuilder({ titleLength, descriptionLength, tagCount } = {}) {
3138
+ const titleBrevity = titleLength != null ? `Aim for approximately ${titleLength} characters.` : "Aim for brevity - typically under 10 words.";
3139
+ const descConstraint = descriptionLength != null ? `approximately ${descriptionLength} characters` : "2-4 sentences";
3140
+ const keywordLimit = tagCount ?? SUMMARY_KEYWORD_LIMIT;
3141
+ return createPromptBuilder({
3142
+ template: {
3143
+ task: {
3144
+ tag: "task",
3145
+ content: "Analyze the transcript and generate metadata that captures the essence of the audio content."
3146
+ },
3147
+ title: {
3148
+ tag: "title_requirements",
3149
+ content: dedent4`
3150
+ A short, compelling headline that immediately communicates the subject or topic.
3151
+ ${titleBrevity} Think of how a podcast title or audio description would read.
3152
+ Start with the primary subject, action, or topic - never begin with "An audio of" or similar phrasing.
3153
+ Use active, specific language.`
3154
+ },
3155
+ description: {
3156
+ tag: "description_requirements",
3157
+ content: dedent4`
3158
+ A concise summary (${descConstraint}) that describes the audio content.
3159
+ Cover the main topics, speakers, themes, and any notable progression in the discussion or narration.
3160
+ Write in present tense. Be specific about what is discussed or presented rather than making assumptions.
3161
+ Focus on the spoken content and any key insights, dialogue, or narrative elements.`
3162
+ },
3163
+ keywords: {
3164
+ tag: "keywords_requirements",
3165
+ content: dedent4`
3166
+ Specific, searchable terms (up to ${keywordLimit}) that capture:
3167
+ - Primary topics and themes
3168
+ - Speakers or presenters (if named)
3169
+ - Key concepts and terminology
3170
+ - Content type (interview, lecture, music, etc.)
3171
+ - Genre or style (if applicable)
3172
+ Prefer concrete nouns and relevant terms over abstract concepts.
3173
+ Use lowercase. Avoid redundant or overly generic terms like "audio" or "content".`
3174
+ },
3175
+ qualityGuidelines: {
3176
+ tag: "quality_guidelines",
3177
+ content: dedent4`
3178
+ - Analyze the full transcript to understand context and themes
3179
+ - Be precise: use specific terminology when mentioned
3180
+ - Capture the narrative: what is introduced, discussed, and concluded
3181
+ - Balance brevity with informativeness`
3182
+ }
3166
3183
  },
3167
- qualityGuidelines: {
3168
- tag: "quality_guidelines",
3169
- content: dedent4`
3170
- - Analyze the full transcript to understand context and themes
3171
- - Be precise: use specific terminology when mentioned
3172
- - Capture the narrative: what is introduced, discussed, and concluded
3173
- - Balance brevity with informativeness`
3174
- }
3175
- },
3176
- sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
3177
- });
3184
+ sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
3185
+ });
3186
+ }
3178
3187
  var SYSTEM_PROMPT3 = dedent4`
3179
3188
  <role>
3180
3189
  You are a video content analyst specializing in storyboard interpretation and multimodal analysis.
@@ -3289,14 +3298,18 @@ function buildUserPrompt4({
3289
3298
  transcriptText,
3290
3299
  isCleanTranscript = true,
3291
3300
  promptOverrides,
3292
- isAudioOnly = false
3301
+ isAudioOnly = false,
3302
+ titleLength,
3303
+ descriptionLength,
3304
+ tagCount
3293
3305
  }) {
3294
3306
  const contextSections = [createToneSection(TONE_INSTRUCTIONS[tone])];
3295
3307
  if (transcriptText) {
3296
3308
  const format = isCleanTranscript ? "plain text" : "WebVTT";
3297
3309
  contextSections.push(createTranscriptSection(transcriptText, format));
3298
3310
  }
3299
- const promptBuilder = isAudioOnly ? audioOnlyPromptBuilder : summarizationPromptBuilder;
3311
+ const constraints = { titleLength, descriptionLength, tagCount };
3312
+ const promptBuilder = isAudioOnly ? createAudioOnlyBuilder(constraints) : createSummarizationBuilder(constraints);
3300
3313
  return promptBuilder.buildWithContext(promptOverrides, contextSections);
3301
3314
  }
3302
3315
  async function analyzeStoryboard2(imageDataUrl, provider, modelId, userPrompt, systemPrompt, credentials) {
@@ -3366,7 +3379,7 @@ async function analyzeAudioOnly(provider, modelId, userPrompt, systemPrompt, cre
3366
3379
  }
3367
3380
  };
3368
3381
  }
3369
- function normalizeKeywords(keywords) {
3382
+ function normalizeKeywords(keywords, limit = SUMMARY_KEYWORD_LIMIT) {
3370
3383
  if (!Array.isArray(keywords) || keywords.length === 0) {
3371
3384
  return [];
3372
3385
  }
@@ -3383,7 +3396,7 @@ function normalizeKeywords(keywords) {
3383
3396
  }
3384
3397
  uniqueLowercase.add(lower);
3385
3398
  normalized.push(trimmed);
3386
- if (normalized.length === SUMMARY_KEYWORD_LIMIT) {
3399
+ if (normalized.length === limit) {
3387
3400
  break;
3388
3401
  }
3389
3402
  }
@@ -3400,7 +3413,10 @@ async function getSummaryAndTags(assetId, options) {
3400
3413
  imageSubmissionMode = "url",
3401
3414
  imageDownloadOptions,
3402
3415
  promptOverrides,
3403
- credentials
3416
+ credentials,
3417
+ titleLength,
3418
+ descriptionLength,
3419
+ tagCount
3404
3420
  } = options ?? {};
3405
3421
  if (!VALID_TONES.includes(tone)) {
3406
3422
  throw new Error(
@@ -3438,7 +3454,10 @@ async function getSummaryAndTags(assetId, options) {
3438
3454
  transcriptText,
3439
3455
  isCleanTranscript: cleanTranscript,
3440
3456
  promptOverrides,
3441
- isAudioOnly
3457
+ isAudioOnly,
3458
+ titleLength,
3459
+ descriptionLength,
3460
+ tagCount
3442
3461
  });
3443
3462
  let analysisResponse;
3444
3463
  let imageUrl;
@@ -3495,7 +3514,7 @@ async function getSummaryAndTags(assetId, options) {
3495
3514
  assetId,
3496
3515
  title: analysisResponse.result.title,
3497
3516
  description: analysisResponse.result.description,
3498
- tags: normalizeKeywords(analysisResponse.result.keywords),
3517
+ tags: normalizeKeywords(analysisResponse.result.keywords, tagCount ?? SUMMARY_KEYWORD_LIMIT),
3499
3518
  storyboardUrl: imageUrl,
3500
3519
  // undefined for audio-only assets
3501
3520
  usage: {
@@ -4115,6 +4134,7 @@ import { z as z6 } from "zod";
4115
4134
  var translationSchema = z6.object({
4116
4135
  translation: z6.string()
4117
4136
  });
4137
+ var SYSTEM_PROMPT4 = 'You are a subtitle translation expert. Translate VTT subtitle files to the target language specified by the user. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT content.';
4118
4138
  async function fetchVttFromMux(vttUrl) {
4119
4139
  "use step";
4120
4140
  const vttResponse = await fetch(vttUrl);
@@ -4137,9 +4157,13 @@ async function translateVttWithAI({
4137
4157
  model,
4138
4158
  output: Output5.object({ schema: translationSchema }),
4139
4159
  messages: [
4160
+ {
4161
+ role: "system",
4162
+ content: SYSTEM_PROMPT4
4163
+ },
4140
4164
  {
4141
4165
  role: "user",
4142
- content: `Translate the following VTT subtitle file from ${fromLanguageCode} to ${toLanguageCode}. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT.
4166
+ content: `Translate from ${fromLanguageCode} to ${toLanguageCode}:
4143
4167
 
4144
4168
  ${vttContent}`
4145
4169
  }