npm - @mux/ai - Versions diffs - 0.7.6 → 0.8.1 - Mend

@mux/ai 0.7.6 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/dist/{index-B0U9upb4.d.ts → index-DP02N3iR.d.ts} +6 -0
package/dist/index.d.ts +2 -2
package/dist/index.js +181 -118
package/dist/index.js.map +1 -1
package/dist/primitives/index.js +38 -1
package/dist/primitives/index.js.map +1 -1
package/dist/workflows/index.d.ts +1 -1
package/dist/workflows/index.js +180 -117
package/dist/workflows/index.js.map +1 -1
package/package.json +1 -1

package/dist/workflows/index.js CHANGED Viewed

@@ -543,8 +543,44 @@ var DEFAULT_EMBEDDING_MODELS = {
 var LANGUAGE_MODELS = {
   openai: ["gpt-5.1", "gpt-5-mini"],
   anthropic: ["claude-sonnet-4-5"],
-  google: ["gemini-3-flash-preview", "gemini-2.5-flash"]
+  google: ["gemini-3-flash-preview", "gemini-3.1-flash-lite-preview", "gemini-2.5-flash"]
 };
+var LANGUAGE_MODEL_DEPRECATIONS = [
+  {
+    provider: "google",
+    modelId: "gemini-2.5-flash",
+    replacementModelId: "gemini-3.1-flash-lite-preview",
+    phase: "warn",
+    deprecatedOn: "2026-03-03",
+    sunsetOn: "2026-06-30",
+    reason: "Gemini 3.1 Flash-Lite Preview offers better quality/latency/cost balance in current evals."
+  }
+];
+var warnedDeprecatedLanguageModels = /* @__PURE__ */ new Set();
+function getLanguageModelDeprecation(provider, modelId) {
+  return LANGUAGE_MODEL_DEPRECATIONS.find(
+    (deprecation) => deprecation.provider === provider && deprecation.modelId === modelId
+  );
+}
+function maybeWarnOrThrowForDeprecatedLanguageModel(provider, modelId) {
+  const deprecation = getLanguageModelDeprecation(provider, modelId);
+  if (!deprecation) {
+    return;
+  }
+  const replacementText = deprecation.replacementModelId ? ` Use replacement provider="${provider}" model="${deprecation.replacementModelId}" instead.` : "";
+  const sunsetText = deprecation.sunsetOn ? ` Planned removal date: ${deprecation.sunsetOn}.` : "";
+  const reasonText = deprecation.reason ? ` Reason: ${deprecation.reason}` : "";
+  const message = deprecation.phase === "blocked" ? `Language model is no longer supported for provider="${provider}" model="${modelId}".${replacementText}${reasonText}` : `Language model is deprecated and in a grace period for provider="${provider}" model="${modelId}".${replacementText}${sunsetText}${reasonText}`;
+  if (deprecation.phase === "blocked") {
+    throw new Error(message);
+  }
+  const warningKey = `${provider}:${modelId}`;
+  if (warnedDeprecatedLanguageModels.has(warningKey)) {
+    return;
+  }
+  warnedDeprecatedLanguageModels.add(warningKey);
+  console.warn(message);
+}
 function getDefaultEvalModelConfigs() {
   return Object.entries(DEFAULT_LANGUAGE_MODELS).map(([provider, modelId]) => ({ provider, modelId }));
 }
@@ -575,6 +611,7 @@ function parseEvalModelPair(value) {
       `Unsupported eval model "${modelId}" for provider "${provider}". Supported models: ${supportedModels.join(", ")}.`
     );
   }
+  maybeWarnOrThrowForDeprecatedLanguageModel(provider, modelId);
   return {
     provider,
     modelId
@@ -618,6 +655,7 @@ var EVAL_MODEL_CONFIGS = resolveEvalModelConfigsFromEnv();
 function resolveLanguageModelConfig(options = {}) {
   const provider = options.provider || "openai";
   const modelId = options.model || DEFAULT_LANGUAGE_MODELS[provider];
+  maybeWarnOrThrowForDeprecatedLanguageModel(provider, modelId);
   return { provider, modelId };
 }
 function resolveEmbeddingModelConfig(options = {}) {
@@ -626,6 +664,7 @@ function resolveEmbeddingModelConfig(options = {}) {
   return { provider, modelId };
 }
 async function createLanguageModelFromConfig(provider, modelId, credentials) {
+  maybeWarnOrThrowForDeprecatedLanguageModel(provider, modelId);
   switch (provider) {
     case "openai": {
       const apiKey = await resolveProviderApiKey("openai", credentials);
@@ -1170,16 +1209,6 @@ var SYSTEM_PROMPT = dedent`
     - GOOD: "A person runs through a park"
     - Be specific and evidence-based
   </language_guidelines>`;
-function buildSystemPrompt(allowedAnswers) {
-  const answerList = allowedAnswers.map((answer) => `"${answer}"`).join(", ");
-  return `${SYSTEM_PROMPT}
-${dedent`
-    <response_options>
-      Allowed answers: ${answerList}
-    </response_options>
-  `}`;
-}
 var askQuestionsPromptBuilder = createPromptBuilder({
   template: {
     questions: {
@@ -1189,21 +1218,30 @@ var askQuestionsPromptBuilder = createPromptBuilder({
   },
   sectionOrder: ["questions"]
 });
-function buildUserPrompt(questions, transcriptText, isCleanTranscript = true) {
+function buildUserPrompt(questions, allowedAnswers, transcriptText, isCleanTranscript = true) {
   const questionsList = questions.map((q, idx) => `${idx + 1}. ${q.question}`).join("\n");
   const questionsContent = dedent`
     Please answer the following yes/no questions about this video:
     ${questionsList}`;
+  const answerList = allowedAnswers.map((answer) => `"${answer}"`).join(", ");
+  const responseOptions = dedent`
+    <response_options>
+      Allowed answers: ${answerList}
+    </response_options>`;
+  const questionsSection = askQuestionsPromptBuilder.build({ questions: questionsContent });
   if (!transcriptText) {
-    return askQuestionsPromptBuilder.build({ questions: questionsContent });
+    return `${questionsSection}
+${responseOptions}`;
   }
   const format = isCleanTranscript ? "plain text" : "WebVTT";
-  const transcriptSection = createTranscriptSection(transcriptText, format);
-  return askQuestionsPromptBuilder.buildWithContext(
-    { questions: questionsContent },
-    [transcriptSection]
-  );
+  const transcriptSection = renderSection(createTranscriptSection(transcriptText, format));
+  return `${transcriptSection}
+${questionsSection}
+${responseOptions}`;
 }
 async function fetchImageAsBase64(imageUrl, imageDownloadOptions) {
   "use step";
@@ -1299,8 +1337,8 @@ async function askQuestions(assetId, questions, options) {
     cleanTranscript,
     shouldSign: policy === "signed"
   })).transcriptText : "";
-  const userPrompt = buildUserPrompt(questions, transcriptText, cleanTranscript);
-  const systemPrompt = buildSystemPrompt(normalizedAnswerOptions);
+  const userPrompt = buildUserPrompt(questions, allowedAnswers, transcriptText, cleanTranscript);
+  const systemPrompt = SYSTEM_PROMPT;
   const imageUrl = await getStoryboardUrl(
     playbackId,
     storyboardWidth,
@@ -2139,7 +2177,7 @@ async function getThumbnailUrls(playbackId, duration, options = {}) {
 // src/workflows/moderation.ts
 var DEFAULT_THRESHOLDS = {
-  sexual: 0.7,
+  sexual: 0.8,
   violence: 0.8
 };
 var DEFAULT_PROVIDER2 = "openai";
@@ -2581,96 +2619,106 @@ var TONE_INSTRUCTIONS = {
   playful: "Channel your inner diva! Answer with maximum sass, wit, and playful attitude. Don't hold back - be cheeky, clever, and delightfully snarky. Make it pop!",
   professional: "Provide a professional, executive-level analysis suitable for business reporting."
 };
-var summarizationPromptBuilder = createPromptBuilder({
-  template: {
-    task: {
-      tag: "task",
-      content: "Analyze the storyboard frames and generate metadata that captures the essence of the video content."
-    },
-    title: {
-      tag: "title_requirements",
-      content: dedent4`
-        A short, compelling headline that immediately communicates the subject or action.
-        Aim for brevity - typically under 10 words. Think of how a news headline or video card title would read.
-        Start with the primary subject, action, or topic - never begin with "A video of" or similar phrasing.
-        Use active, specific language.`
-    },
-    description: {
-      tag: "description_requirements",
-      content: dedent4`
-        A concise summary (2-4 sentences) that describes what happens across the video.
-        Cover the main subjects, actions, setting, and any notable progression visible across frames.
-        Write in present tense. Be specific about observable details rather than making assumptions.
-        If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`
-    },
-    keywords: {
-      tag: "keywords_requirements",
-      content: dedent4`
-        Specific, searchable terms (up to ${SUMMARY_KEYWORD_LIMIT}) that capture:
-        - Primary subjects (people, animals, objects)
-        - Actions and activities being performed
-        - Setting and environment
-        - Notable objects or tools
-        - Style or genre (if applicable)
-        Prefer concrete nouns and action verbs over abstract concepts.
-        Use lowercase. Avoid redundant or overly generic terms like "video" or "content".`
-    },
-    qualityGuidelines: {
-      tag: "quality_guidelines",
-      content: dedent4`
-        - Examine all frames to understand the full context and progression
-        - Be precise: "golden retriever" is better than "dog" when identifiable
-        - Capture the narrative: what begins, develops, and concludes
-        - Balance brevity with informativeness`
-    }
-  },
-  sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
-});
-var audioOnlyPromptBuilder = createPromptBuilder({
-  template: {
-    task: {
-      tag: "task",
-      content: "Analyze the transcript and generate metadata that captures the essence of the audio content."
-    },
-    title: {
-      tag: "title_requirements",
-      content: dedent4`
-        A short, compelling headline that immediately communicates the subject or topic.
-        Aim for brevity - typically under 10 words. Think of how a podcast title or audio description would read.
-        Start with the primary subject, action, or topic - never begin with "An audio of" or similar phrasing.
-        Use active, specific language.`
-    },
-    description: {
-      tag: "description_requirements",
-      content: dedent4`
-        A concise summary (2-4 sentences) that describes the audio content.
-        Cover the main topics, speakers, themes, and any notable progression in the discussion or narration.
-        Write in present tense. Be specific about what is discussed or presented rather than making assumptions.
-        Focus on the spoken content and any key insights, dialogue, or narrative elements.`
+function createSummarizationBuilder({ titleLength, descriptionLength, tagCount } = {}) {
+  const titleBrevity = titleLength != null ? `Aim for approximately ${titleLength} characters.` : "Aim for brevity - typically under 10 words.";
+  const descConstraint = descriptionLength != null ? `approximately ${descriptionLength} characters` : "2-4 sentences";
+  const keywordLimit = tagCount ?? SUMMARY_KEYWORD_LIMIT;
+  return createPromptBuilder({
+    template: {
+      task: {
+        tag: "task",
+        content: "Analyze the storyboard frames and generate metadata that captures the essence of the video content."
+      },
+      title: {
+        tag: "title_requirements",
+        content: dedent4`
+          A short, compelling headline that immediately communicates the subject or action.
+          ${titleBrevity} Think of how a news headline or video card title would read.
+          Start with the primary subject, action, or topic - never begin with "A video of" or similar phrasing.
+          Use active, specific language.`
+      },
+      description: {
+        tag: "description_requirements",
+        content: dedent4`
+          A concise summary (${descConstraint}) that describes what happens across the video.
+          Cover the main subjects, actions, setting, and any notable progression visible across frames.
+          Write in present tense. Be specific about observable details rather than making assumptions.
+          If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`
+      },
+      keywords: {
+        tag: "keywords_requirements",
+        content: dedent4`
+          Specific, searchable terms (up to ${keywordLimit}) that capture:
+          - Primary subjects (people, animals, objects)
+          - Actions and activities being performed
+          - Setting and environment
+          - Notable objects or tools
+          - Style or genre (if applicable)
+          Prefer concrete nouns and action verbs over abstract concepts.
+          Use lowercase. Avoid redundant or overly generic terms like "video" or "content".`
+      },
+      qualityGuidelines: {
+        tag: "quality_guidelines",
+        content: dedent4`
+          - Examine all frames to understand the full context and progression
+          - Be precise: "golden retriever" is better than "dog" when identifiable
+          - Capture the narrative: what begins, develops, and concludes
+          - Balance brevity with informativeness`
+      }
     },
-    keywords: {
-      tag: "keywords_requirements",
-      content: dedent4`
-        Specific, searchable terms (up to ${SUMMARY_KEYWORD_LIMIT}) that capture:
-        - Primary topics and themes
-        - Speakers or presenters (if named)
-        - Key concepts and terminology
-        - Content type (interview, lecture, music, etc.)
-        - Genre or style (if applicable)
-        Prefer concrete nouns and relevant terms over abstract concepts.
-        Use lowercase. Avoid redundant or overly generic terms like "audio" or "content".`
+    sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
+  });
+}
+function createAudioOnlyBuilder({ titleLength, descriptionLength, tagCount } = {}) {
+  const titleBrevity = titleLength != null ? `Aim for approximately ${titleLength} characters.` : "Aim for brevity - typically under 10 words.";
+  const descConstraint = descriptionLength != null ? `approximately ${descriptionLength} characters` : "2-4 sentences";
+  const keywordLimit = tagCount ?? SUMMARY_KEYWORD_LIMIT;
+  return createPromptBuilder({
+    template: {
+      task: {
+        tag: "task",
+        content: "Analyze the transcript and generate metadata that captures the essence of the audio content."
+      },
+      title: {
+        tag: "title_requirements",
+        content: dedent4`
+          A short, compelling headline that immediately communicates the subject or topic.
+          ${titleBrevity} Think of how a podcast title or audio description would read.
+          Start with the primary subject, action, or topic - never begin with "An audio of" or similar phrasing.
+          Use active, specific language.`
+      },
+      description: {
+        tag: "description_requirements",
+        content: dedent4`
+          A concise summary (${descConstraint}) that describes the audio content.
+          Cover the main topics, speakers, themes, and any notable progression in the discussion or narration.
+          Write in present tense. Be specific about what is discussed or presented rather than making assumptions.
+          Focus on the spoken content and any key insights, dialogue, or narrative elements.`
+      },
+      keywords: {
+        tag: "keywords_requirements",
+        content: dedent4`
+          Specific, searchable terms (up to ${keywordLimit}) that capture:
+          - Primary topics and themes
+          - Speakers or presenters (if named)
+          - Key concepts and terminology
+          - Content type (interview, lecture, music, etc.)
+          - Genre or style (if applicable)
+          Prefer concrete nouns and relevant terms over abstract concepts.
+          Use lowercase. Avoid redundant or overly generic terms like "audio" or "content".`
+      },
+      qualityGuidelines: {
+        tag: "quality_guidelines",
+        content: dedent4`
+          - Analyze the full transcript to understand context and themes
+          - Be precise: use specific terminology when mentioned
+          - Capture the narrative: what is introduced, discussed, and concluded
+          - Balance brevity with informativeness`
+      }
     },
-    qualityGuidelines: {
-      tag: "quality_guidelines",
-      content: dedent4`
-        - Analyze the full transcript to understand context and themes
-        - Be precise: use specific terminology when mentioned
-        - Capture the narrative: what is introduced, discussed, and concluded
-        - Balance brevity with informativeness`
-    }
-  },
-  sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
-});
+    sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
+  });
+}
 var SYSTEM_PROMPT3 = dedent4`
   <role>
     You are a video content analyst specializing in storyboard interpretation and multimodal analysis.
@@ -2785,14 +2833,18 @@ function buildUserPrompt4({
   transcriptText,
   isCleanTranscript = true,
   promptOverrides,
-  isAudioOnly = false
+  isAudioOnly = false,
+  titleLength,
+  descriptionLength,
+  tagCount
 }) {
   const contextSections = [createToneSection(TONE_INSTRUCTIONS[tone])];
   if (transcriptText) {
     const format = isCleanTranscript ? "plain text" : "WebVTT";
     contextSections.push(createTranscriptSection(transcriptText, format));
   }
-  const promptBuilder = isAudioOnly ? audioOnlyPromptBuilder : summarizationPromptBuilder;
+  const constraints = { titleLength, descriptionLength, tagCount };
+  const promptBuilder = isAudioOnly ? createAudioOnlyBuilder(constraints) : createSummarizationBuilder(constraints);
   return promptBuilder.buildWithContext(promptOverrides, contextSections);
 }
 async function analyzeStoryboard2(imageDataUrl, provider, modelId, userPrompt, systemPrompt, credentials) {
@@ -2862,7 +2914,7 @@ async function analyzeAudioOnly(provider, modelId, userPrompt, systemPrompt, cre
     }
   };
 }
-function normalizeKeywords(keywords) {
+function normalizeKeywords(keywords, limit = SUMMARY_KEYWORD_LIMIT) {
   if (!Array.isArray(keywords) || keywords.length === 0) {
     return [];
   }
@@ -2879,7 +2931,7 @@ function normalizeKeywords(keywords) {
     }
     uniqueLowercase.add(lower);
     normalized.push(trimmed);
-    if (normalized.length === SUMMARY_KEYWORD_LIMIT) {
+    if (normalized.length === limit) {
       break;
     }
   }
@@ -2896,7 +2948,10 @@ async function getSummaryAndTags(assetId, options) {
     imageSubmissionMode = "url",
     imageDownloadOptions,
     promptOverrides,
-    credentials
+    credentials,
+    titleLength,
+    descriptionLength,
+    tagCount
   } = options ?? {};
   if (!VALID_TONES.includes(tone)) {
     throw new Error(
@@ -2934,7 +2989,10 @@ async function getSummaryAndTags(assetId, options) {
     transcriptText,
     isCleanTranscript: cleanTranscript,
     promptOverrides,
-    isAudioOnly
+    isAudioOnly,
+    titleLength,
+    descriptionLength,
+    tagCount
   });
   let analysisResponse;
   let imageUrl;
@@ -2991,7 +3049,7 @@ async function getSummaryAndTags(assetId, options) {
     assetId,
     title: analysisResponse.result.title,
     description: analysisResponse.result.description,
-    tags: normalizeKeywords(analysisResponse.result.keywords),
+    tags: normalizeKeywords(analysisResponse.result.keywords, tagCount ?? SUMMARY_KEYWORD_LIMIT),
     storyboardUrl: imageUrl,
     // undefined for audio-only assets
     usage: {
@@ -3823,6 +3881,7 @@ import { z as z6 } from "zod";
 var translationSchema = z6.object({
   translation: z6.string()
 });
+var SYSTEM_PROMPT4 = 'You are a subtitle translation expert. Translate VTT subtitle files to the target language specified by the user. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT content.';
 async function fetchVttFromMux(vttUrl) {
   "use step";
   const vttResponse = await fetch(vttUrl);
@@ -3845,9 +3904,13 @@ async function translateVttWithAI({
     model,
     output: Output5.object({ schema: translationSchema }),
     messages: [
+      {
+        role: "system",
+        content: SYSTEM_PROMPT4
+      },
       {
         role: "user",
-        content: `Translate the following VTT subtitle file from ${fromLanguageCode} to ${toLanguageCode}. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT.
+        content: `Translate from ${fromLanguageCode} to ${toLanguageCode}:
 ${vttContent}`
       }