@mux/ai 0.7.6 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{index-B0U9upb4.d.ts → index-DP02N3iR.d.ts} +6 -0
- package/dist/index.d.ts +2 -2
- package/dist/index.js +141 -117
- package/dist/index.js.map +1 -1
- package/dist/workflows/index.d.ts +1 -1
- package/dist/workflows/index.js +140 -116
- package/dist/workflows/index.js.map +1 -1
- package/package.json +1 -1
|
@@ -523,6 +523,12 @@ interface SummarizationOptions extends MuxAIOptions {
|
|
|
523
523
|
* Useful for customizing the AI's output for specific use cases (SEO, social media, etc.)
|
|
524
524
|
*/
|
|
525
525
|
promptOverrides?: SummarizationPromptOverrides;
|
|
526
|
+
/** Desired title length in characters. */
|
|
527
|
+
titleLength?: number;
|
|
528
|
+
/** Desired description length in characters. */
|
|
529
|
+
descriptionLength?: number;
|
|
530
|
+
/** Desired number of tags. */
|
|
531
|
+
tagCount?: number;
|
|
526
532
|
}
|
|
527
533
|
declare function getSummaryAndTags(assetId: string, options?: SummarizationOptions): Promise<SummaryAndTagsResult>;
|
|
528
534
|
|
package/dist/index.d.ts
CHANGED
|
@@ -2,14 +2,14 @@ import { W as WorkflowCredentials, S as StoragePutObjectInput, a as StoragePresi
|
|
|
2
2
|
export { A as AssetTextTrack, C as ChunkEmbedding, b as ChunkingStrategy, E as Encrypted, c as EncryptedPayload, I as ImageSubmissionMode, M as MuxAIOptions, d as MuxAsset, P as PlaybackAsset, e as PlaybackPolicy, f as StorageAdapter, T as TextChunk, g as TokenChunkingConfig, h as TokenUsage, i as ToneType, U as UsageMetadata, V as VTTChunkingConfig, j as VideoEmbeddingsResult, k as WorkflowCredentialsInput, l as WorkflowMuxClient, m as decryptFromWorkflow, n as encryptForWorkflow } from './types-BRbaGW3t.js';
|
|
3
3
|
import { WORKFLOW_SERIALIZE, WORKFLOW_DESERIALIZE } from '@workflow/serde';
|
|
4
4
|
export { i as primitives } from './index-Nxf6BaBO.js';
|
|
5
|
-
export { i as workflows } from './index-
|
|
5
|
+
export { i as workflows } from './index-DP02N3iR.js';
|
|
6
6
|
import '@mux/mux-node';
|
|
7
7
|
import 'zod';
|
|
8
8
|
import '@ai-sdk/anthropic';
|
|
9
9
|
import '@ai-sdk/google';
|
|
10
10
|
import '@ai-sdk/openai';
|
|
11
11
|
|
|
12
|
-
var version = "0.
|
|
12
|
+
var version = "0.8.0";
|
|
13
13
|
|
|
14
14
|
/**
|
|
15
15
|
* A function that returns workflow credentials, either synchronously or asynchronously.
|
package/dist/index.js
CHANGED
|
@@ -5,7 +5,7 @@ var __export = (target, all) => {
|
|
|
5
5
|
};
|
|
6
6
|
|
|
7
7
|
// package.json
|
|
8
|
-
var version = "0.
|
|
8
|
+
var version = "0.8.0";
|
|
9
9
|
|
|
10
10
|
// src/env.ts
|
|
11
11
|
import { z } from "zod";
|
|
@@ -1798,16 +1798,6 @@ var SYSTEM_PROMPT = dedent`
|
|
|
1798
1798
|
- GOOD: "A person runs through a park"
|
|
1799
1799
|
- Be specific and evidence-based
|
|
1800
1800
|
</language_guidelines>`;
|
|
1801
|
-
function buildSystemPrompt(allowedAnswers) {
|
|
1802
|
-
const answerList = allowedAnswers.map((answer) => `"${answer}"`).join(", ");
|
|
1803
|
-
return `${SYSTEM_PROMPT}
|
|
1804
|
-
|
|
1805
|
-
${dedent`
|
|
1806
|
-
<response_options>
|
|
1807
|
-
Allowed answers: ${answerList}
|
|
1808
|
-
</response_options>
|
|
1809
|
-
`}`;
|
|
1810
|
-
}
|
|
1811
1801
|
var askQuestionsPromptBuilder = createPromptBuilder({
|
|
1812
1802
|
template: {
|
|
1813
1803
|
questions: {
|
|
@@ -1817,21 +1807,30 @@ var askQuestionsPromptBuilder = createPromptBuilder({
|
|
|
1817
1807
|
},
|
|
1818
1808
|
sectionOrder: ["questions"]
|
|
1819
1809
|
});
|
|
1820
|
-
function buildUserPrompt(questions, transcriptText, isCleanTranscript = true) {
|
|
1810
|
+
function buildUserPrompt(questions, allowedAnswers, transcriptText, isCleanTranscript = true) {
|
|
1821
1811
|
const questionsList = questions.map((q, idx) => `${idx + 1}. ${q.question}`).join("\n");
|
|
1822
1812
|
const questionsContent = dedent`
|
|
1823
1813
|
Please answer the following yes/no questions about this video:
|
|
1824
1814
|
|
|
1825
1815
|
${questionsList}`;
|
|
1816
|
+
const answerList = allowedAnswers.map((answer) => `"${answer}"`).join(", ");
|
|
1817
|
+
const responseOptions = dedent`
|
|
1818
|
+
<response_options>
|
|
1819
|
+
Allowed answers: ${answerList}
|
|
1820
|
+
</response_options>`;
|
|
1821
|
+
const questionsSection = askQuestionsPromptBuilder.build({ questions: questionsContent });
|
|
1826
1822
|
if (!transcriptText) {
|
|
1827
|
-
return
|
|
1823
|
+
return `${questionsSection}
|
|
1824
|
+
|
|
1825
|
+
${responseOptions}`;
|
|
1828
1826
|
}
|
|
1829
1827
|
const format = isCleanTranscript ? "plain text" : "WebVTT";
|
|
1830
|
-
const transcriptSection = createTranscriptSection(transcriptText, format);
|
|
1831
|
-
return
|
|
1832
|
-
|
|
1833
|
-
|
|
1834
|
-
|
|
1828
|
+
const transcriptSection = renderSection(createTranscriptSection(transcriptText, format));
|
|
1829
|
+
return `${transcriptSection}
|
|
1830
|
+
|
|
1831
|
+
${questionsSection}
|
|
1832
|
+
|
|
1833
|
+
${responseOptions}`;
|
|
1835
1834
|
}
|
|
1836
1835
|
async function fetchImageAsBase64(imageUrl, imageDownloadOptions) {
|
|
1837
1836
|
"use step";
|
|
@@ -1927,8 +1926,8 @@ async function askQuestions(assetId, questions, options) {
|
|
|
1927
1926
|
cleanTranscript,
|
|
1928
1927
|
shouldSign: policy === "signed"
|
|
1929
1928
|
})).transcriptText : "";
|
|
1930
|
-
const userPrompt = buildUserPrompt(questions, transcriptText, cleanTranscript);
|
|
1931
|
-
const systemPrompt =
|
|
1929
|
+
const userPrompt = buildUserPrompt(questions, allowedAnswers, transcriptText, cleanTranscript);
|
|
1930
|
+
const systemPrompt = SYSTEM_PROMPT;
|
|
1932
1931
|
const imageUrl = await getStoryboardUrl(
|
|
1933
1932
|
playbackId,
|
|
1934
1933
|
storyboardWidth,
|
|
@@ -2643,7 +2642,7 @@ function planSamplingTimestamps(options) {
|
|
|
2643
2642
|
|
|
2644
2643
|
// src/workflows/moderation.ts
|
|
2645
2644
|
var DEFAULT_THRESHOLDS = {
|
|
2646
|
-
sexual: 0.
|
|
2645
|
+
sexual: 0.8,
|
|
2647
2646
|
violence: 0.8
|
|
2648
2647
|
};
|
|
2649
2648
|
var DEFAULT_PROVIDER2 = "openai";
|
|
@@ -3085,96 +3084,106 @@ var TONE_INSTRUCTIONS = {
|
|
|
3085
3084
|
playful: "Channel your inner diva! Answer with maximum sass, wit, and playful attitude. Don't hold back - be cheeky, clever, and delightfully snarky. Make it pop!",
|
|
3086
3085
|
professional: "Provide a professional, executive-level analysis suitable for business reporting."
|
|
3087
3086
|
};
|
|
3088
|
-
|
|
3089
|
-
|
|
3090
|
-
|
|
3091
|
-
|
|
3092
|
-
|
|
3093
|
-
|
|
3094
|
-
|
|
3095
|
-
|
|
3096
|
-
|
|
3097
|
-
|
|
3098
|
-
|
|
3099
|
-
|
|
3100
|
-
|
|
3101
|
-
|
|
3102
|
-
|
|
3103
|
-
|
|
3104
|
-
|
|
3105
|
-
|
|
3106
|
-
|
|
3107
|
-
|
|
3108
|
-
|
|
3109
|
-
|
|
3110
|
-
|
|
3111
|
-
|
|
3112
|
-
|
|
3113
|
-
|
|
3114
|
-
|
|
3115
|
-
|
|
3116
|
-
|
|
3117
|
-
|
|
3118
|
-
|
|
3119
|
-
|
|
3120
|
-
|
|
3121
|
-
|
|
3122
|
-
|
|
3123
|
-
|
|
3124
|
-
|
|
3125
|
-
|
|
3126
|
-
|
|
3127
|
-
|
|
3128
|
-
|
|
3129
|
-
|
|
3130
|
-
|
|
3131
|
-
|
|
3132
|
-
|
|
3133
|
-
|
|
3134
|
-
template: {
|
|
3135
|
-
task: {
|
|
3136
|
-
tag: "task",
|
|
3137
|
-
content: "Analyze the transcript and generate metadata that captures the essence of the audio content."
|
|
3138
|
-
},
|
|
3139
|
-
title: {
|
|
3140
|
-
tag: "title_requirements",
|
|
3141
|
-
content: dedent4`
|
|
3142
|
-
A short, compelling headline that immediately communicates the subject or topic.
|
|
3143
|
-
Aim for brevity - typically under 10 words. Think of how a podcast title or audio description would read.
|
|
3144
|
-
Start with the primary subject, action, or topic - never begin with "An audio of" or similar phrasing.
|
|
3145
|
-
Use active, specific language.`
|
|
3146
|
-
},
|
|
3147
|
-
description: {
|
|
3148
|
-
tag: "description_requirements",
|
|
3149
|
-
content: dedent4`
|
|
3150
|
-
A concise summary (2-4 sentences) that describes the audio content.
|
|
3151
|
-
Cover the main topics, speakers, themes, and any notable progression in the discussion or narration.
|
|
3152
|
-
Write in present tense. Be specific about what is discussed or presented rather than making assumptions.
|
|
3153
|
-
Focus on the spoken content and any key insights, dialogue, or narrative elements.`
|
|
3087
|
+
function createSummarizationBuilder({ titleLength, descriptionLength, tagCount } = {}) {
|
|
3088
|
+
const titleBrevity = titleLength != null ? `Aim for approximately ${titleLength} characters.` : "Aim for brevity - typically under 10 words.";
|
|
3089
|
+
const descConstraint = descriptionLength != null ? `approximately ${descriptionLength} characters` : "2-4 sentences";
|
|
3090
|
+
const keywordLimit = tagCount ?? SUMMARY_KEYWORD_LIMIT;
|
|
3091
|
+
return createPromptBuilder({
|
|
3092
|
+
template: {
|
|
3093
|
+
task: {
|
|
3094
|
+
tag: "task",
|
|
3095
|
+
content: "Analyze the storyboard frames and generate metadata that captures the essence of the video content."
|
|
3096
|
+
},
|
|
3097
|
+
title: {
|
|
3098
|
+
tag: "title_requirements",
|
|
3099
|
+
content: dedent4`
|
|
3100
|
+
A short, compelling headline that immediately communicates the subject or action.
|
|
3101
|
+
${titleBrevity} Think of how a news headline or video card title would read.
|
|
3102
|
+
Start with the primary subject, action, or topic - never begin with "A video of" or similar phrasing.
|
|
3103
|
+
Use active, specific language.`
|
|
3104
|
+
},
|
|
3105
|
+
description: {
|
|
3106
|
+
tag: "description_requirements",
|
|
3107
|
+
content: dedent4`
|
|
3108
|
+
A concise summary (${descConstraint}) that describes what happens across the video.
|
|
3109
|
+
Cover the main subjects, actions, setting, and any notable progression visible across frames.
|
|
3110
|
+
Write in present tense. Be specific about observable details rather than making assumptions.
|
|
3111
|
+
If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`
|
|
3112
|
+
},
|
|
3113
|
+
keywords: {
|
|
3114
|
+
tag: "keywords_requirements",
|
|
3115
|
+
content: dedent4`
|
|
3116
|
+
Specific, searchable terms (up to ${keywordLimit}) that capture:
|
|
3117
|
+
- Primary subjects (people, animals, objects)
|
|
3118
|
+
- Actions and activities being performed
|
|
3119
|
+
- Setting and environment
|
|
3120
|
+
- Notable objects or tools
|
|
3121
|
+
- Style or genre (if applicable)
|
|
3122
|
+
Prefer concrete nouns and action verbs over abstract concepts.
|
|
3123
|
+
Use lowercase. Avoid redundant or overly generic terms like "video" or "content".`
|
|
3124
|
+
},
|
|
3125
|
+
qualityGuidelines: {
|
|
3126
|
+
tag: "quality_guidelines",
|
|
3127
|
+
content: dedent4`
|
|
3128
|
+
- Examine all frames to understand the full context and progression
|
|
3129
|
+
- Be precise: "golden retriever" is better than "dog" when identifiable
|
|
3130
|
+
- Capture the narrative: what begins, develops, and concludes
|
|
3131
|
+
- Balance brevity with informativeness`
|
|
3132
|
+
}
|
|
3154
3133
|
},
|
|
3155
|
-
|
|
3156
|
-
|
|
3157
|
-
|
|
3158
|
-
|
|
3159
|
-
|
|
3160
|
-
|
|
3161
|
-
|
|
3162
|
-
|
|
3163
|
-
|
|
3164
|
-
|
|
3165
|
-
|
|
3134
|
+
sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
|
|
3135
|
+
});
|
|
3136
|
+
}
|
|
3137
|
+
function createAudioOnlyBuilder({ titleLength, descriptionLength, tagCount } = {}) {
|
|
3138
|
+
const titleBrevity = titleLength != null ? `Aim for approximately ${titleLength} characters.` : "Aim for brevity - typically under 10 words.";
|
|
3139
|
+
const descConstraint = descriptionLength != null ? `approximately ${descriptionLength} characters` : "2-4 sentences";
|
|
3140
|
+
const keywordLimit = tagCount ?? SUMMARY_KEYWORD_LIMIT;
|
|
3141
|
+
return createPromptBuilder({
|
|
3142
|
+
template: {
|
|
3143
|
+
task: {
|
|
3144
|
+
tag: "task",
|
|
3145
|
+
content: "Analyze the transcript and generate metadata that captures the essence of the audio content."
|
|
3146
|
+
},
|
|
3147
|
+
title: {
|
|
3148
|
+
tag: "title_requirements",
|
|
3149
|
+
content: dedent4`
|
|
3150
|
+
A short, compelling headline that immediately communicates the subject or topic.
|
|
3151
|
+
${titleBrevity} Think of how a podcast title or audio description would read.
|
|
3152
|
+
Start with the primary subject, action, or topic - never begin with "An audio of" or similar phrasing.
|
|
3153
|
+
Use active, specific language.`
|
|
3154
|
+
},
|
|
3155
|
+
description: {
|
|
3156
|
+
tag: "description_requirements",
|
|
3157
|
+
content: dedent4`
|
|
3158
|
+
A concise summary (${descConstraint}) that describes the audio content.
|
|
3159
|
+
Cover the main topics, speakers, themes, and any notable progression in the discussion or narration.
|
|
3160
|
+
Write in present tense. Be specific about what is discussed or presented rather than making assumptions.
|
|
3161
|
+
Focus on the spoken content and any key insights, dialogue, or narrative elements.`
|
|
3162
|
+
},
|
|
3163
|
+
keywords: {
|
|
3164
|
+
tag: "keywords_requirements",
|
|
3165
|
+
content: dedent4`
|
|
3166
|
+
Specific, searchable terms (up to ${keywordLimit}) that capture:
|
|
3167
|
+
- Primary topics and themes
|
|
3168
|
+
- Speakers or presenters (if named)
|
|
3169
|
+
- Key concepts and terminology
|
|
3170
|
+
- Content type (interview, lecture, music, etc.)
|
|
3171
|
+
- Genre or style (if applicable)
|
|
3172
|
+
Prefer concrete nouns and relevant terms over abstract concepts.
|
|
3173
|
+
Use lowercase. Avoid redundant or overly generic terms like "audio" or "content".`
|
|
3174
|
+
},
|
|
3175
|
+
qualityGuidelines: {
|
|
3176
|
+
tag: "quality_guidelines",
|
|
3177
|
+
content: dedent4`
|
|
3178
|
+
- Analyze the full transcript to understand context and themes
|
|
3179
|
+
- Be precise: use specific terminology when mentioned
|
|
3180
|
+
- Capture the narrative: what is introduced, discussed, and concluded
|
|
3181
|
+
- Balance brevity with informativeness`
|
|
3182
|
+
}
|
|
3166
3183
|
},
|
|
3167
|
-
|
|
3168
|
-
|
|
3169
|
-
|
|
3170
|
-
- Analyze the full transcript to understand context and themes
|
|
3171
|
-
- Be precise: use specific terminology when mentioned
|
|
3172
|
-
- Capture the narrative: what is introduced, discussed, and concluded
|
|
3173
|
-
- Balance brevity with informativeness`
|
|
3174
|
-
}
|
|
3175
|
-
},
|
|
3176
|
-
sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
|
|
3177
|
-
});
|
|
3184
|
+
sectionOrder: ["task", "title", "description", "keywords", "qualityGuidelines"]
|
|
3185
|
+
});
|
|
3186
|
+
}
|
|
3178
3187
|
var SYSTEM_PROMPT3 = dedent4`
|
|
3179
3188
|
<role>
|
|
3180
3189
|
You are a video content analyst specializing in storyboard interpretation and multimodal analysis.
|
|
@@ -3289,14 +3298,18 @@ function buildUserPrompt4({
|
|
|
3289
3298
|
transcriptText,
|
|
3290
3299
|
isCleanTranscript = true,
|
|
3291
3300
|
promptOverrides,
|
|
3292
|
-
isAudioOnly = false
|
|
3301
|
+
isAudioOnly = false,
|
|
3302
|
+
titleLength,
|
|
3303
|
+
descriptionLength,
|
|
3304
|
+
tagCount
|
|
3293
3305
|
}) {
|
|
3294
3306
|
const contextSections = [createToneSection(TONE_INSTRUCTIONS[tone])];
|
|
3295
3307
|
if (transcriptText) {
|
|
3296
3308
|
const format = isCleanTranscript ? "plain text" : "WebVTT";
|
|
3297
3309
|
contextSections.push(createTranscriptSection(transcriptText, format));
|
|
3298
3310
|
}
|
|
3299
|
-
const
|
|
3311
|
+
const constraints = { titleLength, descriptionLength, tagCount };
|
|
3312
|
+
const promptBuilder = isAudioOnly ? createAudioOnlyBuilder(constraints) : createSummarizationBuilder(constraints);
|
|
3300
3313
|
return promptBuilder.buildWithContext(promptOverrides, contextSections);
|
|
3301
3314
|
}
|
|
3302
3315
|
async function analyzeStoryboard2(imageDataUrl, provider, modelId, userPrompt, systemPrompt, credentials) {
|
|
@@ -3366,7 +3379,7 @@ async function analyzeAudioOnly(provider, modelId, userPrompt, systemPrompt, cre
|
|
|
3366
3379
|
}
|
|
3367
3380
|
};
|
|
3368
3381
|
}
|
|
3369
|
-
function normalizeKeywords(keywords) {
|
|
3382
|
+
function normalizeKeywords(keywords, limit = SUMMARY_KEYWORD_LIMIT) {
|
|
3370
3383
|
if (!Array.isArray(keywords) || keywords.length === 0) {
|
|
3371
3384
|
return [];
|
|
3372
3385
|
}
|
|
@@ -3383,7 +3396,7 @@ function normalizeKeywords(keywords) {
|
|
|
3383
3396
|
}
|
|
3384
3397
|
uniqueLowercase.add(lower);
|
|
3385
3398
|
normalized.push(trimmed);
|
|
3386
|
-
if (normalized.length ===
|
|
3399
|
+
if (normalized.length === limit) {
|
|
3387
3400
|
break;
|
|
3388
3401
|
}
|
|
3389
3402
|
}
|
|
@@ -3400,7 +3413,10 @@ async function getSummaryAndTags(assetId, options) {
|
|
|
3400
3413
|
imageSubmissionMode = "url",
|
|
3401
3414
|
imageDownloadOptions,
|
|
3402
3415
|
promptOverrides,
|
|
3403
|
-
credentials
|
|
3416
|
+
credentials,
|
|
3417
|
+
titleLength,
|
|
3418
|
+
descriptionLength,
|
|
3419
|
+
tagCount
|
|
3404
3420
|
} = options ?? {};
|
|
3405
3421
|
if (!VALID_TONES.includes(tone)) {
|
|
3406
3422
|
throw new Error(
|
|
@@ -3438,7 +3454,10 @@ async function getSummaryAndTags(assetId, options) {
|
|
|
3438
3454
|
transcriptText,
|
|
3439
3455
|
isCleanTranscript: cleanTranscript,
|
|
3440
3456
|
promptOverrides,
|
|
3441
|
-
isAudioOnly
|
|
3457
|
+
isAudioOnly,
|
|
3458
|
+
titleLength,
|
|
3459
|
+
descriptionLength,
|
|
3460
|
+
tagCount
|
|
3442
3461
|
});
|
|
3443
3462
|
let analysisResponse;
|
|
3444
3463
|
let imageUrl;
|
|
@@ -3495,7 +3514,7 @@ async function getSummaryAndTags(assetId, options) {
|
|
|
3495
3514
|
assetId,
|
|
3496
3515
|
title: analysisResponse.result.title,
|
|
3497
3516
|
description: analysisResponse.result.description,
|
|
3498
|
-
tags: normalizeKeywords(analysisResponse.result.keywords),
|
|
3517
|
+
tags: normalizeKeywords(analysisResponse.result.keywords, tagCount ?? SUMMARY_KEYWORD_LIMIT),
|
|
3499
3518
|
storyboardUrl: imageUrl,
|
|
3500
3519
|
// undefined for audio-only assets
|
|
3501
3520
|
usage: {
|
|
@@ -4115,6 +4134,7 @@ import { z as z6 } from "zod";
|
|
|
4115
4134
|
var translationSchema = z6.object({
|
|
4116
4135
|
translation: z6.string()
|
|
4117
4136
|
});
|
|
4137
|
+
var SYSTEM_PROMPT4 = 'You are a subtitle translation expert. Translate VTT subtitle files to the target language specified by the user. Preserve all timestamps and VTT formatting exactly as they appear. Return JSON with a single key "translation" containing the translated VTT content.';
|
|
4118
4138
|
async function fetchVttFromMux(vttUrl) {
|
|
4119
4139
|
"use step";
|
|
4120
4140
|
const vttResponse = await fetch(vttUrl);
|
|
@@ -4137,9 +4157,13 @@ async function translateVttWithAI({
|
|
|
4137
4157
|
model,
|
|
4138
4158
|
output: Output5.object({ schema: translationSchema }),
|
|
4139
4159
|
messages: [
|
|
4160
|
+
{
|
|
4161
|
+
role: "system",
|
|
4162
|
+
content: SYSTEM_PROMPT4
|
|
4163
|
+
},
|
|
4140
4164
|
{
|
|
4141
4165
|
role: "user",
|
|
4142
|
-
content: `Translate
|
|
4166
|
+
content: `Translate from ${fromLanguageCode} to ${toLanguageCode}:
|
|
4143
4167
|
|
|
4144
4168
|
${vttContent}`
|
|
4145
4169
|
}
|