@mux/ai 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{index-BapL6paa.d.ts → index-DyzifniY.d.ts} +21 -11
- package/dist/index.d.ts +2 -2
- package/dist/index.js +62 -30
- package/dist/index.js.map +1 -1
- package/dist/workflows/index.d.ts +1 -1
- package/dist/workflows/index.js +61 -29
- package/dist/workflows/index.js.map +1 -1
- package/package.json +1 -1
|
@@ -43,12 +43,14 @@ interface Question {
|
|
|
43
43
|
interface QuestionAnswer {
|
|
44
44
|
/** The original question */
|
|
45
45
|
question: string;
|
|
46
|
-
/** Answer selected from the allowed options */
|
|
47
|
-
answer
|
|
48
|
-
/** Confidence score between 0 and 1 */
|
|
46
|
+
/** Answer selected from the allowed options. Undefined when skipped. */
|
|
47
|
+
answer?: string;
|
|
48
|
+
/** Confidence score between 0 and 1. Always 0 when skipped. */
|
|
49
49
|
confidence: number;
|
|
50
|
-
/** Reasoning explaining the answer
|
|
50
|
+
/** Reasoning explaining the answer, or why the question was skipped */
|
|
51
51
|
reasoning: string;
|
|
52
|
+
/** Whether the question was skipped due to irrelevance to the video content */
|
|
53
|
+
skipped: boolean;
|
|
52
54
|
}
|
|
53
55
|
/** Configuration options for askQuestions workflow. */
|
|
54
56
|
interface AskQuestionsOptions extends MuxAIOptions {
|
|
@@ -82,22 +84,25 @@ interface AskQuestionsResult {
|
|
|
82
84
|
/** Raw transcript text used for analysis (when includeTranscript is true). */
|
|
83
85
|
transcriptText?: string;
|
|
84
86
|
}
|
|
85
|
-
/** Zod schema for a single answer. */
|
|
87
|
+
/** Zod schema for a single answer (matches the public QuestionAnswer interface). */
|
|
86
88
|
declare const questionAnswerSchema: z.ZodObject<{
|
|
87
89
|
question: z.ZodString;
|
|
88
|
-
answer: z.ZodString
|
|
90
|
+
answer: z.ZodOptional<z.ZodString>;
|
|
89
91
|
confidence: z.ZodNumber;
|
|
90
92
|
reasoning: z.ZodString;
|
|
93
|
+
skipped: z.ZodBoolean;
|
|
91
94
|
}, "strip", z.ZodTypeAny, {
|
|
92
95
|
question: string;
|
|
93
|
-
answer: string;
|
|
94
96
|
confidence: number;
|
|
95
97
|
reasoning: string;
|
|
98
|
+
skipped: boolean;
|
|
99
|
+
answer?: string | undefined;
|
|
96
100
|
}, {
|
|
97
101
|
question: string;
|
|
98
|
-
answer: string;
|
|
99
102
|
confidence: number;
|
|
100
103
|
reasoning: string;
|
|
104
|
+
skipped: boolean;
|
|
105
|
+
answer?: string | undefined;
|
|
101
106
|
}>;
|
|
102
107
|
type QuestionAnswerType = z.infer<typeof questionAnswerSchema>;
|
|
103
108
|
declare function createAskQuestionsSchema(allowedAnswers: [string, ...string[]]): z.ZodObject<{
|
|
@@ -105,18 +110,21 @@ declare function createAskQuestionsSchema(allowedAnswers: [string, ...string[]])
|
|
|
105
110
|
question: z.ZodString;
|
|
106
111
|
confidence: z.ZodNumber;
|
|
107
112
|
reasoning: z.ZodString;
|
|
113
|
+
skipped: z.ZodBoolean;
|
|
108
114
|
} & {
|
|
109
|
-
answer: z.ZodEnum<[string, ...string[]]>;
|
|
115
|
+
answer: z.ZodEnum<[string, ...string[], "__SKIPPED__"]>;
|
|
110
116
|
}, "strip", z.ZodTypeAny, {
|
|
111
117
|
question: string;
|
|
112
118
|
answer: string;
|
|
113
119
|
confidence: number;
|
|
114
120
|
reasoning: string;
|
|
121
|
+
skipped: boolean;
|
|
115
122
|
}, {
|
|
116
123
|
question: string;
|
|
117
124
|
answer: string;
|
|
118
125
|
confidence: number;
|
|
119
126
|
reasoning: string;
|
|
127
|
+
skipped: boolean;
|
|
120
128
|
}>, "many">;
|
|
121
129
|
}, "strip", z.ZodTypeAny, {
|
|
122
130
|
answers: {
|
|
@@ -124,6 +132,7 @@ declare function createAskQuestionsSchema(allowedAnswers: [string, ...string[]])
|
|
|
124
132
|
answer: string;
|
|
125
133
|
confidence: number;
|
|
126
134
|
reasoning: string;
|
|
135
|
+
skipped: boolean;
|
|
127
136
|
}[];
|
|
128
137
|
}, {
|
|
129
138
|
answers: {
|
|
@@ -131,6 +140,7 @@ declare function createAskQuestionsSchema(allowedAnswers: [string, ...string[]])
|
|
|
131
140
|
answer: string;
|
|
132
141
|
confidence: number;
|
|
133
142
|
reasoning: string;
|
|
143
|
+
skipped: boolean;
|
|
134
144
|
}[];
|
|
135
145
|
}>;
|
|
136
146
|
type AskQuestionsSchema = ReturnType<typeof createAskQuestionsSchema>;
|
|
@@ -646,9 +656,9 @@ interface SummarizationOptions extends MuxAIOptions {
|
|
|
646
656
|
* Useful for customizing the AI's output for specific use cases (SEO, social media, etc.)
|
|
647
657
|
*/
|
|
648
658
|
promptOverrides?: SummarizationPromptOverrides;
|
|
649
|
-
/**
|
|
659
|
+
/** Maximum title length in words. Shorter titles are preferred. */
|
|
650
660
|
titleLength?: number;
|
|
651
|
-
/**
|
|
661
|
+
/** Maximum description length in words. Shorter descriptions are acceptable. */
|
|
652
662
|
descriptionLength?: number;
|
|
653
663
|
/** Desired number of tags. */
|
|
654
664
|
tagCount?: number;
|
package/dist/index.d.ts
CHANGED
|
@@ -2,14 +2,14 @@ import { W as WorkflowCredentials, S as StoragePutObjectInput, a as StoragePresi
|
|
|
2
2
|
export { A as AssetTextTrack, C as ChunkEmbedding, b as ChunkingStrategy, E as Encrypted, c as EncryptedPayload, I as ImageSubmissionMode, M as MuxAIOptions, d as MuxAsset, P as PlaybackAsset, e as PlaybackPolicy, f as StorageAdapter, T as TextChunk, g as TokenChunkingConfig, h as TokenUsage, i as ToneType, U as UsageMetadata, V as VTTChunkingConfig, j as VideoEmbeddingsResult, k as WorkflowCredentialsInput, l as WorkflowMuxClient, m as decryptFromWorkflow, n as encryptForWorkflow } from './types-BRbaGW3t.js';
|
|
3
3
|
import { WORKFLOW_SERIALIZE, WORKFLOW_DESERIALIZE } from '@workflow/serde';
|
|
4
4
|
export { i as primitives } from './index-DLhfJsOd.js';
|
|
5
|
-
export { i as workflows } from './index-
|
|
5
|
+
export { i as workflows } from './index-DyzifniY.js';
|
|
6
6
|
import '@mux/mux-node';
|
|
7
7
|
import 'zod';
|
|
8
8
|
import '@ai-sdk/anthropic';
|
|
9
9
|
import '@ai-sdk/google';
|
|
10
10
|
import '@ai-sdk/openai';
|
|
11
11
|
|
|
12
|
-
var version = "0.
|
|
12
|
+
var version = "0.12.0";
|
|
13
13
|
|
|
14
14
|
/**
|
|
15
15
|
* A function that returns workflow credentials, either synchronously or asynchronously.
|
package/dist/index.js
CHANGED
|
@@ -5,7 +5,7 @@ var __export = (target, all) => {
|
|
|
5
5
|
};
|
|
6
6
|
|
|
7
7
|
// package.json
|
|
8
|
-
var version = "0.
|
|
8
|
+
var version = "0.12.0";
|
|
9
9
|
|
|
10
10
|
// src/env.ts
|
|
11
11
|
import { z } from "zod";
|
|
@@ -2241,12 +2241,14 @@ async function withRetry(fn, {
|
|
|
2241
2241
|
// src/workflows/ask-questions.ts
|
|
2242
2242
|
var questionAnswerSchema = z2.object({
|
|
2243
2243
|
question: z2.string(),
|
|
2244
|
-
answer: z2.string(),
|
|
2244
|
+
answer: z2.string().optional(),
|
|
2245
2245
|
confidence: z2.number(),
|
|
2246
|
-
reasoning: z2.string()
|
|
2246
|
+
reasoning: z2.string(),
|
|
2247
|
+
skipped: z2.boolean()
|
|
2247
2248
|
});
|
|
2249
|
+
var SKIP_SENTINEL = "__SKIPPED__";
|
|
2248
2250
|
function createAskQuestionsSchema(allowedAnswers) {
|
|
2249
|
-
const answerSchema = z2.enum(allowedAnswers);
|
|
2251
|
+
const answerSchema = z2.enum([...allowedAnswers, SKIP_SENTINEL]);
|
|
2250
2252
|
return z2.object({
|
|
2251
2253
|
answers: z2.array(
|
|
2252
2254
|
questionAnswerSchema.extend({
|
|
@@ -2302,8 +2304,32 @@ var SYSTEM_PROMPT = dedent`
|
|
|
2302
2304
|
- Be precise: cite specific frames, objects, actions, or transcript quotes
|
|
2303
2305
|
</answer_guidelines>
|
|
2304
2306
|
|
|
2307
|
+
<relevance_filtering>
|
|
2308
|
+
Before answering each question, assess whether it can be meaningfully
|
|
2309
|
+
answered based on the video storyboard and/or transcript. A question is
|
|
2310
|
+
relevant if it asks about something observable or inferable from the
|
|
2311
|
+
video content (visuals, audio, dialogue, setting, subjects, actions, etc.).
|
|
2312
|
+
|
|
2313
|
+
Mark a question as skipped (skipped: true) if it:
|
|
2314
|
+
- Is completely unrelated to video content (e.g., math, trivia, personal questions)
|
|
2315
|
+
- Asks about information that cannot be determined from storyboard frames or transcript
|
|
2316
|
+
- Is a general knowledge question with no connection to what is shown or said in the video
|
|
2317
|
+
- Attempts to use the system for non-video-analysis purposes
|
|
2318
|
+
|
|
2319
|
+
For skipped questions:
|
|
2320
|
+
- Set skipped to true
|
|
2321
|
+
- Set answer to "${SKIP_SENTINEL}"
|
|
2322
|
+
- Set confidence to 0
|
|
2323
|
+
- Use the reasoning field to explain why the question is not answerable
|
|
2324
|
+
from the video content
|
|
2325
|
+
|
|
2326
|
+
For borderline questions that are loosely related to the video content,
|
|
2327
|
+
still answer them but use a lower confidence score to reflect uncertainty.
|
|
2328
|
+
</relevance_filtering>
|
|
2329
|
+
|
|
2305
2330
|
<constraints>
|
|
2306
|
-
- You MUST answer every question with one of the allowed response options
|
|
2331
|
+
- You MUST answer every relevant question with one of the allowed response options
|
|
2332
|
+
- Skip irrelevant questions as described in relevance_filtering
|
|
2307
2333
|
- Only describe observable evidence from frames or transcript
|
|
2308
2334
|
- Do not fabricate details or make unsupported assumptions
|
|
2309
2335
|
- Return structured data matching the requested schema exactly
|
|
@@ -2379,14 +2405,7 @@ async function analyzeQuestionsWithStoryboard(imageDataUrl, provider, modelId, u
|
|
|
2379
2405
|
]
|
|
2380
2406
|
});
|
|
2381
2407
|
return {
|
|
2382
|
-
result:
|
|
2383
|
-
answers: response.output.answers.map((answer) => ({
|
|
2384
|
-
...answer,
|
|
2385
|
-
// Strip numbering prefix (e.g., "1. " or "2. ") from questions
|
|
2386
|
-
question: answer.question.replace(/^\d+\.\s*/, ""),
|
|
2387
|
-
confidence: Math.min(1, Math.max(0, answer.confidence))
|
|
2388
|
-
}))
|
|
2389
|
-
},
|
|
2408
|
+
result: response.output,
|
|
2390
2409
|
usage: {
|
|
2391
2410
|
inputTokens: response.usage.inputTokens,
|
|
2392
2411
|
outputTokens: response.usage.outputTokens,
|
|
@@ -2492,9 +2511,20 @@ async function askQuestions(assetId, questions, options) {
|
|
|
2492
2511
|
`Expected ${questions.length} answers but received ${analysisResponse.result.answers.length}`
|
|
2493
2512
|
);
|
|
2494
2513
|
}
|
|
2514
|
+
const answers = analysisResponse.result.answers.map((raw) => {
|
|
2515
|
+
const isSkipped = raw.skipped || raw.answer === SKIP_SENTINEL;
|
|
2516
|
+
return {
|
|
2517
|
+
// Strip numbering prefix (e.g., "1. " or "2. ") from questions
|
|
2518
|
+
question: raw.question.replace(/^\d+\.\s*/, ""),
|
|
2519
|
+
confidence: isSkipped ? 0 : Math.min(1, Math.max(0, raw.confidence)),
|
|
2520
|
+
reasoning: raw.reasoning,
|
|
2521
|
+
skipped: isSkipped,
|
|
2522
|
+
...isSkipped ? {} : { answer: raw.answer }
|
|
2523
|
+
};
|
|
2524
|
+
});
|
|
2495
2525
|
return {
|
|
2496
2526
|
assetId,
|
|
2497
|
-
answers
|
|
2527
|
+
answers,
|
|
2498
2528
|
storyboardUrl: imageUrl,
|
|
2499
2529
|
usage: {
|
|
2500
2530
|
...analysisResponse.usage,
|
|
@@ -4210,43 +4240,43 @@ var DESCRIPTION_LENGTH_THRESHOLD_LARGE = 100;
|
|
|
4210
4240
|
function buildDescriptionGuidance(wordCount, contentType) {
|
|
4211
4241
|
if (wordCount < DESCRIPTION_LENGTH_THRESHOLD_SMALL) {
|
|
4212
4242
|
if (contentType === "video") {
|
|
4213
|
-
return dedent5`A brief summary of the video in
|
|
4243
|
+
return dedent5`A brief summary of the video in no more than ${wordCount} words. Shorter is fine.
|
|
4214
4244
|
Focus on the single most important subject or action.
|
|
4215
4245
|
Write in present tense.`;
|
|
4216
4246
|
}
|
|
4217
|
-
return dedent5`A brief summary of the audio content in
|
|
4247
|
+
return dedent5`A brief summary of the audio content in no more than ${wordCount} words. Shorter is fine.
|
|
4218
4248
|
Focus on the single most important topic or theme.
|
|
4219
4249
|
Write in present tense.`;
|
|
4220
4250
|
}
|
|
4221
4251
|
if (wordCount > DESCRIPTION_LENGTH_THRESHOLD_LARGE) {
|
|
4222
4252
|
if (contentType === "video") {
|
|
4223
4253
|
return dedent5`A detailed summary that describes what happens across the video.
|
|
4224
|
-
|
|
4254
|
+
Never exceed ${wordCount} words, but shorter is perfectly fine. You may use multiple sentences.
|
|
4225
4255
|
Be thorough: cover subjects, actions, setting, progression, and any notable details visible across frames.
|
|
4226
4256
|
Write in present tense. Be specific about observable details rather than making assumptions.
|
|
4227
4257
|
If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`;
|
|
4228
4258
|
}
|
|
4229
4259
|
return dedent5`A detailed summary that describes the audio content.
|
|
4230
|
-
|
|
4260
|
+
Never exceed ${wordCount} words, but shorter is perfectly fine. You may use multiple sentences.
|
|
4231
4261
|
Be thorough: cover topics, speakers, themes, progression, and any notable insights.
|
|
4232
4262
|
Write in present tense. Be specific about what is discussed or presented rather than making assumptions.
|
|
4233
4263
|
Focus on the spoken content and any key insights, dialogue, or narrative elements.`;
|
|
4234
4264
|
}
|
|
4235
4265
|
if (contentType === "video") {
|
|
4236
4266
|
return dedent5`A summary that describes what happens across the video.
|
|
4237
|
-
|
|
4267
|
+
Never exceed ${wordCount} words, but shorter is perfectly fine. You may use multiple sentences.
|
|
4238
4268
|
Cover the main subjects, actions, setting, and any notable progression visible across frames.
|
|
4239
4269
|
Write in present tense. Be specific about observable details rather than making assumptions.
|
|
4240
4270
|
If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`;
|
|
4241
4271
|
}
|
|
4242
4272
|
return dedent5`A summary that describes the audio content.
|
|
4243
|
-
|
|
4273
|
+
Never exceed ${wordCount} words, but shorter is perfectly fine. You may use multiple sentences.
|
|
4244
4274
|
Cover the main topics, speakers, themes, and any notable progression in the discussion or narration.
|
|
4245
4275
|
Write in present tense. Be specific about what is discussed or presented rather than making assumptions.
|
|
4246
4276
|
Focus on the spoken content and any key insights, dialogue, or narrative elements.`;
|
|
4247
4277
|
}
|
|
4248
4278
|
function createSummarizationBuilder({ titleLength, descriptionLength, tagCount } = {}) {
|
|
4249
|
-
const
|
|
4279
|
+
const titleLimit = titleLength ?? DEFAULT_TITLE_LENGTH;
|
|
4250
4280
|
const keywordLimit = tagCount ?? DEFAULT_SUMMARY_KEYWORD_LIMIT;
|
|
4251
4281
|
return createPromptBuilder({
|
|
4252
4282
|
template: {
|
|
@@ -4257,10 +4287,11 @@ function createSummarizationBuilder({ titleLength, descriptionLength, tagCount }
|
|
|
4257
4287
|
title: {
|
|
4258
4288
|
tag: "title_requirements",
|
|
4259
4289
|
content: dedent5`
|
|
4260
|
-
A
|
|
4261
|
-
${
|
|
4262
|
-
|
|
4263
|
-
|
|
4290
|
+
A concise, label-style title — not a sentence or description.
|
|
4291
|
+
Never exceed ${titleLimit} words, but shorter is better.
|
|
4292
|
+
Think of how a video card title, playlist entry, or file name would read — e.g. "Predator: Badlands Trailer" or "Chef Prepares Holiday Feast".
|
|
4293
|
+
Start with the primary subject or topic. Never begin with "A video of" or similar phrasing.
|
|
4294
|
+
Use specific nouns over lengthy descriptions. Avoid clauses, conjunctions, or narrative structure.`
|
|
4264
4295
|
},
|
|
4265
4296
|
description: {
|
|
4266
4297
|
tag: "description_requirements",
|
|
@@ -4291,7 +4322,7 @@ function createSummarizationBuilder({ titleLength, descriptionLength, tagCount }
|
|
|
4291
4322
|
});
|
|
4292
4323
|
}
|
|
4293
4324
|
function createAudioOnlyBuilder({ titleLength, descriptionLength, tagCount } = {}) {
|
|
4294
|
-
const
|
|
4325
|
+
const titleLimit = titleLength ?? DEFAULT_TITLE_LENGTH;
|
|
4295
4326
|
const keywordLimit = tagCount ?? DEFAULT_SUMMARY_KEYWORD_LIMIT;
|
|
4296
4327
|
return createPromptBuilder({
|
|
4297
4328
|
template: {
|
|
@@ -4302,10 +4333,11 @@ function createAudioOnlyBuilder({ titleLength, descriptionLength, tagCount } = {
|
|
|
4302
4333
|
title: {
|
|
4303
4334
|
tag: "title_requirements",
|
|
4304
4335
|
content: dedent5`
|
|
4305
|
-
A
|
|
4306
|
-
${
|
|
4307
|
-
|
|
4308
|
-
|
|
4336
|
+
A concise, label-style title — not a sentence or description.
|
|
4337
|
+
Never exceed ${titleLimit} words, but shorter is better.
|
|
4338
|
+
Think of how a podcast episode title or playlist entry would read — e.g. "Weekly News Roundup" or "Interview with Dr. Smith".
|
|
4339
|
+
Start with the primary subject or topic. Never begin with "An audio of" or similar phrasing.
|
|
4340
|
+
Use specific nouns over lengthy descriptions. Avoid clauses, conjunctions, or narrative structure.`
|
|
4309
4341
|
},
|
|
4310
4342
|
description: {
|
|
4311
4343
|
tag: "description_requirements",
|