npm - @mux/ai - Versions diffs - 0.7.5 → 0.8.0 - Mend

@mux/ai 0.7.5 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/dist/{index-B0U9upb4.d.ts → index-DP02N3iR.d.ts} +6 -0
package/dist/index.d.ts +2 -2
package/dist/index.js +186 -120
package/dist/index.js.map +1 -1
package/dist/primitives/index.js +44 -2
package/dist/primitives/index.js.map +1 -1
package/dist/workflows/index.d.ts +1 -1
package/dist/workflows/index.js +185 -119
package/dist/workflows/index.js.map +1 -1
package/package.json +1 -1

package/dist/workflows/index.js CHANGED Viewed

@@ -138,6 +138,10 @@ var EnvSchema = z.object({
   ),
   MUX_SIGNING_KEY: optionalString("Mux signing key ID for signed playback URLs.", "Used to sign playback URLs"),
   MUX_PRIVATE_KEY: optionalString("Mux signing private key for signed playback URLs.", "Used to sign playback URLs"),
+  MUX_IMAGE_URL_OVERRIDE: optionalString(
+    "Override for Mux image base URL (defaults to https://image.mux.com).",
+    "Mux image URL override"
+  ),
   // Test-only helpers (used by this repo's integration tests)
   MUX_TEST_ASSET_ID: optionalString("Mux asset ID used by integration tests.", "Mux test asset id"),
   MUX_TEST_ASSET_ID_CHAPTERS: optionalString("Mux asset ID used by integration tests for chapters.", "Mux test asset id for chapters"),
@@ -835,6 +839,44 @@ async function withRetry(fn, {
   throw lastError || new Error("Retry failed with unknown error");
 }
+// src/lib/mux-image-url.ts
+var DEFAULT_MUX_IMAGE_ORIGIN = "https://image.mux.com";
+function normalizeMuxImageOrigin(value) {
+  const trimmed = value.trim();
+  const candidate = trimmed.includes("://") ? trimmed : `https://${trimmed}`;
+  let parsed;
+  try {
+    parsed = new URL(candidate);
+  } catch {
+    throw new Error(
+      `Invalid MUX_IMAGE_URL_OVERRIDE. Provide a hostname like "image.example.mux.com" (or a URL origin such as "https://image.example.mux.com").`
+    );
+  }
+  if (parsed.username || parsed.password || parsed.search || parsed.hash || parsed.pathname && parsed.pathname !== "/") {
+    throw new Error(
+      "Invalid MUX_IMAGE_URL_OVERRIDE. Only a hostname/origin is allowed (no credentials, query params, hash fragments, or path)."
+    );
+  }
+  return parsed.origin;
+}
+function getMuxImageOrigin() {
+  const override = env_default.MUX_IMAGE_URL_OVERRIDE;
+  if (!override) {
+    return DEFAULT_MUX_IMAGE_ORIGIN;
+  }
+  return normalizeMuxImageOrigin(override);
+}
+function getMuxImageBaseUrl(playbackId, assetType) {
+  const origin = getMuxImageOrigin();
+  return `${origin}/${playbackId}/${assetType}.png`;
+}
+function getMuxStoryboardBaseUrl(playbackId) {
+  return getMuxImageBaseUrl(playbackId, "storyboard");
+}
+function getMuxThumbnailBaseUrl(playbackId) {
+  return getMuxImageBaseUrl(playbackId, "thumbnail");
+}
 // src/lib/url-signing.ts
 async function createSigningClient(context) {
   const { default: MuxClient } = await import("@mux/mux-node");
@@ -876,7 +918,7 @@ async function signUrl(url, playbackId, type = "video", params, credentials) {
 var DEFAULT_STORYBOARD_WIDTH = 640;
 async function getStoryboardUrl(playbackId, width = DEFAULT_STORYBOARD_WIDTH, shouldSign = false, credentials) {
   "use step";
-  const baseUrl = `https://image.mux.com/${playbackId}/storyboard.png`;
+  const baseUrl = getMuxStoryboardBaseUrl(playbackId);
   if (shouldSign) {
     return signUrl(baseUrl, playbackId, "storyboard", { width }, credentials);
   }
@@ -1128,16 +1170,6 @@ var SYSTEM_PROMPT = dedent`
     - GOOD: "A person runs through a park"
     - Be specific and evidence-based
   </language_guidelines>`;
-function buildSystemPrompt(allowedAnswers) {
-  const answerList = allowedAnswers.map((answer) => `"${answer}"`).join(", ");
-  return `${SYSTEM_PROMPT}
-${dedent`
-    <response_options>
-      Allowed answers: ${answerList}
-    </response_options>
-  `}`;
-}
 var askQuestionsPromptBuilder = createPromptBuilder({
   template: {
     questions: {
@@ -1147,21 +1179,30 @@ var askQuestionsPromptBuilder = createPromptBuilder({
   },
   sectionOrder: ["questions"]
 });
-function buildUserPrompt(questions, transcriptText, isCleanTranscript = true) {
+function buildUserPrompt(questions, allowedAnswers, transcriptText, isCleanTranscript = true) {
   const questionsList = questions.map((q, idx) => `${idx + 1}. ${q.question}`).join("\n");
   const questionsContent = dedent`
     Please answer the following yes/no questions about this video:
     ${questionsList}`;
+  const answerList = allowedAnswers.map((answer) => `"${answer}"`).join(", ");
+  const responseOptions = dedent`
+    <response_options>
+      Allowed answers: ${answerList}
+    </response_options>`;
+  const questionsSection = askQuestionsPromptBuilder.build({ questions: questionsContent });
   if (!transcriptText) {
-    return askQuestionsPromptBuilder.build({ questions: questionsContent });
+    return `${questionsSection}
+${responseOptions}`;
   }
   const format = isCleanTranscript ? "plain text" : "WebVTT";
-  const transcriptSection = createTranscriptSection(transcriptText, format);
-  return askQuestionsPromptBuilder.buildWithContext(
-    { questions: questionsContent },
-    [transcriptSection]
-  );
+  const transcriptSection = renderSection(createTranscriptSection(transcriptText, format));
+  return `${transcriptSection}
+${questionsSection}
+${responseOptions}`;
 }
 async function fetchImageAsBase64(imageUrl, imageDownloadOptions) {
   "use step";
@@ -1257,8 +1298,8 @@ async function askQuestions(assetId, questions, options) {
     cleanTranscript,
     shouldSign: policy === "signed"
   })).transcriptText : "";
-  const userPrompt = buildUserPrompt(questions, transcriptText, cleanTranscript);
-  const systemPrompt = buildSystemPrompt(normalizedAnswerOptions);
+  const userPrompt = buildUserPrompt(questions, allowedAnswers, transcriptText, cleanTranscript);
+  const systemPrompt = SYSTEM_PROMPT;
   const imageUrl = await getStoryboardUrl(
     playbackId,
     storyboardWidth,
@@ -2085,7 +2126,7 @@ async function getThumbnailUrls(playbackId, duration, options = {}) {
     }
     timestamps = newTimestamps;
   }
-  const baseUrl = `https://image.mux.com/${playbackId}/thumbnail.png`;
+  const baseUrl = getMuxThumbnailBaseUrl(playbackId);
   const urlPromises = timestamps.map(async (time) => {
     if (shouldSign) {
       return signUrl(baseUrl, playbackId, "thumbnail", { time, width }, credentials);
@@ -2097,7 +2138,7 @@ async function getThumbnailUrls(playbackId, duration, options = {}) {
 // src/workflows/moderation.ts
 var DEFAULT_THRESHOLDS = {
-  sexual: 0.7,
+  sexual: 0.8,
   violence: 0.8
 };
 var DEFAULT_PROVIDER2 = "openai";
@@ -2362,7 +2403,7 @@ async function requestHiveModeration(imageUrls, maxConcurrent = 5, submissionMod
 async function getThumbnailUrlsFromTimestamps(playbackId, timestampsMs, options) {
   "use step";
   const { width, shouldSign, credentials } = options;
-  const baseUrl = `https://image.mux.com/${playbackId}/thumbnail.png`;
+  const baseUrl = getMuxThumbnailBaseUrl(playbackId);
   const urlPromises = timestampsMs.map(async (tsMs) => {
     const time = Number((tsMs / 1e3).toFixed(2));
     if (shouldSign) {
@@ -2539,96 +2580,106 @@ var TONE_INSTRUCTIONS = {
   playful: "Channel your inner diva! Answer with maximum sass, wit, and playful attitude. Don't hold back - be cheeky, clever, and delightfully snarky. Make it pop!",
   professional: "Provide a professional, executive-level analysis suitable for business reporting."
 };
-var summarizationPromptBuilder = createPromptBuilder({
-  template: {
-    task: {
-      tag: "task",
-      content: "Analyze the storyboard frames and generate metadata that captures the essence of the video content."
-    },
-    title: {
-      tag: "title_requirements",
-      content: dedent4`
-        A short, compelling headline that immediately communicates the subject or action.
-        Aim for brevity - typically under 10 words. Think of how a news headline or video card title would read.
-        Start with the primary subject, action, or topic - never begin with "A video of" or similar phrasing.
-        Use active, specific language.`
-    },
-    description: {
-      tag: "description_requirements",
-      content: dedent4`
-        A concise summary (2-4 sentences) that describes what happens across the video.
-        Cover the main subjects, actions, setting, and any notable progression visible across frames.
-        Write in present tense. Be specific about observable details rather than making assumptions.
-        If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`
-    },
-    keywords: {
-      tag: "keywords_requirements",
-      content: dedent4`
-        Specific, searchable terms (up to ${SUMMARY_KEYWORD_LIMIT}) that capture:
-        - Primary subjects (people, animals, objects)
-        - Actions and activities being performed
-        - Setting and environment
-        - Notable objects or tools
-        - Style or genre (if applicable)
-        Prefer concrete nouns and action verbs over abstract concepts.
-        Use lowercase. Avoid redundant or overly generic terms like "video" or "content".`
-    },
-    qualityGuidelines: {
-      tag: "quality_guidelines",
-      content: dedent4`
-        - Examine all frames to understand the full context and progression
-        - Be precise: "golden retriever" is better than "dog" when identifiable
-        - Capture the narrative: what begins, develops, and concludes
-        - Balance brevity with informativeness`
-    }
-  },
-  sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
-});
-var audioOnlyPromptBuilder = createPromptBuilder({
-  template: {
-    task: {
-      tag: "task",
-      content: "Analyze the transcript and generate metadata that captures the essence of the audio content."
-    },
-    title: {
-      tag: "title_requirements",
-      content: dedent4`
-        A short, compelling headline that immediately communicates the subject or topic.
-        Aim for brevity - typically under 10 words. Think of how a podcast title or audio description would read.
-        Start with the primary subject, action, or topic - never begin with "An audio of" or similar phrasing.
-        Use active, specific language.`
-    },
-    description: {
-      tag: "description_requirements",
-      content: dedent4`
-        A concise summary (2-4 sentences) that describes the audio content.
-        Cover the main topics, speakers, themes, and any notable progression in the discussion or narration.
-        Write in present tense. Be specific about what is discussed or presented rather than making assumptions.
-        Focus on the spoken content and any key insights, dialogue, or narrative elements.`
+function createSummarizationBuilder({ titleLength, descriptionLength, tagCount } = {}) {
+  const titleBrevity = titleLength != null ? `Aim for approximately ${titleLength} characters.` : "Aim for brevity - typically under 10 words.";
+  const descConstraint = descriptionLength != null ? `approximately ${descriptionLength} characters` : "2-4 sentences";
+  const keywordLimit = tagCount ?? SUMMARY_KEYWORD_LIMIT;
+  return createPromptBuilder({
+    template: {
+      task: {
+        tag: "task",
+        content: "Analyze the storyboard frames and generate metadata that captures the essence of the video content."
+      },
+      title: {
+        tag: "title_requirements",
+        content: dedent4`
+          A short, compelling headline that immediately communicates the subject or action.
+          ${titleBrevity} Think of how a news headline or video card title would read.
+          Start with the primary subject, action, or topic - never begin with "A video of" or similar phrasing.
+          Use active, specific language.`
+      },
+      description: {
+        tag: "description_requirements",
+        content: dedent4`
+          A concise summary (${descConstraint}) that describes what happens across the video.
+          Cover the main subjects, actions, setting, and any notable progression visible across frames.
+          Write in present tense. Be specific about observable details rather than making assumptions.
+          If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`
+      },
+      keywords: {
+        tag: "keywords_requirements",
+        content: dedent4`
+          Specific, searchable terms (up to ${keywordLimit}) that capture:
+          - Primary subjects (people, animals, objects)
+          - Actions and activities being performed
+          - Setting and environment
+          - Notable objects or tools
+          - Style or genre (if applicable)
+          Prefer concrete nouns and action verbs over abstract concepts.
+          Use lowercase. Avoid redundant or overly generic terms like "video" or "content".`
+      },
+      qualityGuidelines: {
+        tag: "quality_guidelines",
+        content: dedent4`
+          - Examine all frames to understand the full context and progression
+          - Be precise: "golden retriever" is better than "dog" when identifiable
+          - Capture the narrative: what begins, develops, and concludes
+          - Balance brevity with informativeness`
+      }
     },
-    keywords: {
-      tag: "keywords_requirements",
-      content: dedent4`
-        Specific, searchable terms (up to ${SUMMARY_KEYWORD_LIMIT}) that capture:
-        - Primary topics and themes
-        - Speakers or presenters (if named)
-        - Key concepts and terminology
-        - Content type (interview, lecture, music, etc.)
-        - Genre or style (if applicable)
-        Prefer concrete nouns and relevant terms over abstract concepts.
-        Use lowercase. Avoid redundant or overly generic terms like "audio" or "content".`
+    sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
+  });
+}
+function createAudioOnlyBuilder({ titleLength, descriptionLength, tagCount } = {}) {
+  const titleBrevity = titleLength != null ? `Aim for approximately ${titleLength} characters.` : "Aim for brevity - typically under 10 words.";
+  const descConstraint = descriptionLength != null ? `approximately ${descriptionLength} characters` : "2-4 sentences";
+  const keywordLimit = tagCount ?? SUMMARY_KEYWORD_LIMIT;
+  return createPromptBuilder({
+    template: {
+      task: {
+        tag: "task",
+        content: "Analyze the transcript and generate metadata that captures the essence of the audio content."
+      },
+      title: {
+        tag: "title_requirements",
+        content: dedent4`
+          A short, compelling headline that immediately communicates the subject or topic.
+          ${titleBrevity} Think of how a podcast title or audio description would read.
+          Start with the primary subject, action, or topic - never begin with "An audio of" or similar phrasing.
+          Use active, specific language.`
+      },
+      description: {
+        tag: "description_requirements",
+        content: dedent4`
+          A concise summary (${descConstraint}) that describes the audio content.
+          Cover the main topics, speakers, themes, and any notable progression in the discussion or narration.
+          Write in present tense. Be specific about what is discussed or presented rather than making assumptions.
+          Focus on the spoken content and any key insights, dialogue, or narrative elements.`
+      },
+      keywords: {
+        tag: "keywords_requirements",
+        content: dedent4`
+          Specific, searchable terms (up to ${keywordLimit}) that capture:
+          - Primary topics and themes
+          - Speakers or presenters (if named)
+          - Key concepts and terminology
+          - Content type (interview, lecture, music, etc.)
+          - Genre or style (if applicable)
+          Prefer concrete nouns and relevant terms over abstract concepts.
+          Use lowercase. Avoid redundant or overly generic terms like "audio" or "content".`
+      },
+      qualityGuidelines: {
+        tag: "quality_guidelines",
+        content: dedent4`
+          - Analyze the full transcript to understand context and themes
+          - Be precise: use specific terminology when mentioned
+          - Capture the narrative: what is introduced, discussed, and concluded
+          - Balance brevity with informativeness`
+      }
     },
-    qualityGuidelines: {
-      tag: "quality_guidelines",
-      content: dedent4`
-        - Analyze the full transcript to understand context and themes
-        - Be precise: use specific terminology when mentioned
-        - Capture the narrative: what is introduced, discussed, and concluded
-        - Balance brevity with informativeness`
-    }
-  },
-  sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
-});
+    sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
+  });
+}
 var SYSTEM_PROMPT3 = dedent4`
   <role>
     You are a video content analyst specializing in storyboard interpretation and multimodal analysis.
@@ -2743,14 +2794,18 @@ function buildUserPrompt4({
   transcriptText,
   isCleanTranscript = true,
   promptOverrides,
-  isAudioOnly = false
+  isAudioOnly = false,
+  titleLength,
+  descriptionLength,
+  tagCount
 }) {
   const contextSections = [createToneSection(TONE_INSTRUCTIONS[tone])];
   if (transcriptText) {
     const format = isCleanTranscript ? "plain text" : "WebVTT";
     contextSections.push(createTranscriptSection(transcriptText, format));
   }
-  const promptBuilder = isAudioOnly ? audioOnlyPromptBuilder : summarizationPromptBuilder;
+  const constraints = { titleLength, descriptionLength, tagCount };
+  const promptBuilder = isAudioOnly ? createAudioOnlyBuilder(constraints) : createSummarizationBuilder(constraints);
   return promptBuilder.buildWithContext(promptOverrides, contextSections);
 }
 async function analyzeStoryboard2(imageDataUrl, provider, modelId, userPrompt, systemPrompt, credentials) {
@@ -2820,7 +2875,7 @@ async function analyzeAudioOnly(provider, modelId, userPrompt, systemPrompt, cre
     }
   };
 }
-function normalizeKeywords(keywords) {
+function normalizeKeywords(keywords, limit = SUMMARY_KEYWORD_LIMIT) {
   if (!Array.isArray(keywords) || keywords.length === 0) {
     return [];
   }
@@ -2837,7 +2892,7 @@ function normalizeKeywords(keywords) {
     }
     uniqueLowercase.add(lower);
     normalized.push(trimmed);
-    if (normalized.length === SUMMARY_KEYWORD_LIMIT) {
+    if (normalized.length === limit) {
       break;
     }
   }
@@ -2854,7 +2909,10 @@ async function getSummaryAndTags(assetId, options) {
     imageSubmissionMode = "url",
     imageDownloadOptions,
     promptOverrides,
-    credentials
+    credentials,
+    titleLength,
+    descriptionLength,
+    tagCount
   } = options ?? {};
   if (!VALID_TONES.includes(tone)) {
     throw new Error(
@@ -2892,7 +2950,10 @@ async function getSummaryAndTags(assetId, options) {
     transcriptText,
     isCleanTranscript: cleanTranscript,
     promptOverrides,
-    isAudioOnly
+    isAudioOnly,
+    titleLength,
+    descriptionLength,
+    tagCount
   });
   let analysisResponse;
   let imageUrl;
@@ -2949,7 +3010,7 @@ async function getSummaryAndTags(assetId, options) {
     assetId,
     title: analysisResponse.result.title,
     description: analysisResponse.result.description,
-    tags: normalizeKeywords(analysisResponse.result.keywords),
+    tags: normalizeKeywords(analysisResponse.result.keywords, tagCount ?? SUMMARY_KEYWORD_LIMIT),
     storyboardUrl: imageUrl,
     // undefined for audio-only assets
     usage: {
@@ -3781,6 +3842,7 @@ import { z as z6 } from "zod";
 var translationSchema = z6.object({
   translation: z6.string()
 });
+var SYSTEM_PROMPT4 = 'You are a subtitle translation expert. Translate VTT subtitle files to the target language specified by the user. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT content.';
 async function fetchVttFromMux(vttUrl) {
   "use step";
   const vttResponse = await fetch(vttUrl);
@@ -3803,9 +3865,13 @@ async function translateVttWithAI({
     model,
     output: Output5.object({ schema: translationSchema }),
     messages: [
+      {
+        role: "system",
+        content: SYSTEM_PROMPT4
+      },
       {
         role: "user",
-        content: `Translate the following VTT subtitle file from ${fromLanguageCode} to ${toLanguageCode}. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT.
+        content: `Translate from ${fromLanguageCode} to ${toLanguageCode}:
 ${vttContent}`
       }