@mux/ai 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -43,12 +43,14 @@ interface Question {
43
43
  interface QuestionAnswer {
44
44
  /** The original question */
45
45
  question: string;
46
- /** Answer selected from the allowed options */
47
- answer: string;
48
- /** Confidence score between 0 and 1 */
46
+ /** Answer selected from the allowed options. Undefined when skipped. */
47
+ answer?: string;
48
+ /** Confidence score between 0 and 1. Always 0 when skipped. */
49
49
  confidence: number;
50
- /** Reasoning explaining the answer based on observable evidence */
50
+ /** Reasoning explaining the answer, or why the question was skipped */
51
51
  reasoning: string;
52
+ /** Whether the question was skipped due to irrelevance to the video content */
53
+ skipped: boolean;
52
54
  }
53
55
  /** Configuration options for askQuestions workflow. */
54
56
  interface AskQuestionsOptions extends MuxAIOptions {
@@ -82,22 +84,25 @@ interface AskQuestionsResult {
82
84
  /** Raw transcript text used for analysis (when includeTranscript is true). */
83
85
  transcriptText?: string;
84
86
  }
85
- /** Zod schema for a single answer. */
87
+ /** Zod schema for a single answer (matches the public QuestionAnswer interface). */
86
88
  declare const questionAnswerSchema: z.ZodObject<{
87
89
  question: z.ZodString;
88
- answer: z.ZodString;
90
+ answer: z.ZodOptional<z.ZodString>;
89
91
  confidence: z.ZodNumber;
90
92
  reasoning: z.ZodString;
93
+ skipped: z.ZodBoolean;
91
94
  }, "strip", z.ZodTypeAny, {
92
95
  question: string;
93
- answer: string;
94
96
  confidence: number;
95
97
  reasoning: string;
98
+ skipped: boolean;
99
+ answer?: string | undefined;
96
100
  }, {
97
101
  question: string;
98
- answer: string;
99
102
  confidence: number;
100
103
  reasoning: string;
104
+ skipped: boolean;
105
+ answer?: string | undefined;
101
106
  }>;
102
107
  type QuestionAnswerType = z.infer<typeof questionAnswerSchema>;
103
108
  declare function createAskQuestionsSchema(allowedAnswers: [string, ...string[]]): z.ZodObject<{
@@ -105,18 +110,21 @@ declare function createAskQuestionsSchema(allowedAnswers: [string, ...string[]])
105
110
  question: z.ZodString;
106
111
  confidence: z.ZodNumber;
107
112
  reasoning: z.ZodString;
113
+ skipped: z.ZodBoolean;
108
114
  } & {
109
- answer: z.ZodEnum<[string, ...string[]]>;
115
+ answer: z.ZodEnum<[string, ...string[], "__SKIPPED__"]>;
110
116
  }, "strip", z.ZodTypeAny, {
111
117
  question: string;
112
118
  answer: string;
113
119
  confidence: number;
114
120
  reasoning: string;
121
+ skipped: boolean;
115
122
  }, {
116
123
  question: string;
117
124
  answer: string;
118
125
  confidence: number;
119
126
  reasoning: string;
127
+ skipped: boolean;
120
128
  }>, "many">;
121
129
  }, "strip", z.ZodTypeAny, {
122
130
  answers: {
@@ -124,6 +132,7 @@ declare function createAskQuestionsSchema(allowedAnswers: [string, ...string[]])
124
132
  answer: string;
125
133
  confidence: number;
126
134
  reasoning: string;
135
+ skipped: boolean;
127
136
  }[];
128
137
  }, {
129
138
  answers: {
@@ -131,6 +140,7 @@ declare function createAskQuestionsSchema(allowedAnswers: [string, ...string[]])
131
140
  answer: string;
132
141
  confidence: number;
133
142
  reasoning: string;
143
+ skipped: boolean;
134
144
  }[];
135
145
  }>;
136
146
  type AskQuestionsSchema = ReturnType<typeof createAskQuestionsSchema>;
@@ -646,9 +656,9 @@ interface SummarizationOptions extends MuxAIOptions {
646
656
  * Useful for customizing the AI's output for specific use cases (SEO, social media, etc.)
647
657
  */
648
658
  promptOverrides?: SummarizationPromptOverrides;
649
- /** Desired title length in words. */
659
+ /** Maximum title length in words. Shorter titles are preferred. */
650
660
  titleLength?: number;
651
- /** Desired description length in words. */
661
+ /** Maximum description length in words. Shorter descriptions are acceptable. */
652
662
  descriptionLength?: number;
653
663
  /** Desired number of tags. */
654
664
  tagCount?: number;
package/dist/index.d.ts CHANGED
@@ -2,14 +2,14 @@ import { W as WorkflowCredentials, S as StoragePutObjectInput, a as StoragePresi
2
2
  export { A as AssetTextTrack, C as ChunkEmbedding, b as ChunkingStrategy, E as Encrypted, c as EncryptedPayload, I as ImageSubmissionMode, M as MuxAIOptions, d as MuxAsset, P as PlaybackAsset, e as PlaybackPolicy, f as StorageAdapter, T as TextChunk, g as TokenChunkingConfig, h as TokenUsage, i as ToneType, U as UsageMetadata, V as VTTChunkingConfig, j as VideoEmbeddingsResult, k as WorkflowCredentialsInput, l as WorkflowMuxClient, m as decryptFromWorkflow, n as encryptForWorkflow } from './types-BRbaGW3t.js';
3
3
  import { WORKFLOW_SERIALIZE, WORKFLOW_DESERIALIZE } from '@workflow/serde';
4
4
  export { i as primitives } from './index-DLhfJsOd.js';
5
- export { i as workflows } from './index-BapL6paa.js';
5
+ export { i as workflows } from './index-DyzifniY.js';
6
6
  import '@mux/mux-node';
7
7
  import 'zod';
8
8
  import '@ai-sdk/anthropic';
9
9
  import '@ai-sdk/google';
10
10
  import '@ai-sdk/openai';
11
11
 
12
- var version = "0.11.0";
12
+ var version = "0.12.0";
13
13
 
14
14
  /**
15
15
  * A function that returns workflow credentials, either synchronously or asynchronously.
package/dist/index.js CHANGED
@@ -5,7 +5,7 @@ var __export = (target, all) => {
5
5
  };
6
6
 
7
7
  // package.json
8
- var version = "0.11.0";
8
+ var version = "0.12.0";
9
9
 
10
10
  // src/env.ts
11
11
  import { z } from "zod";
@@ -2241,12 +2241,14 @@ async function withRetry(fn, {
2241
2241
  // src/workflows/ask-questions.ts
2242
2242
  var questionAnswerSchema = z2.object({
2243
2243
  question: z2.string(),
2244
- answer: z2.string(),
2244
+ answer: z2.string().optional(),
2245
2245
  confidence: z2.number(),
2246
- reasoning: z2.string()
2246
+ reasoning: z2.string(),
2247
+ skipped: z2.boolean()
2247
2248
  });
2249
+ var SKIP_SENTINEL = "__SKIPPED__";
2248
2250
  function createAskQuestionsSchema(allowedAnswers) {
2249
- const answerSchema = z2.enum(allowedAnswers);
2251
+ const answerSchema = z2.enum([...allowedAnswers, SKIP_SENTINEL]);
2250
2252
  return z2.object({
2251
2253
  answers: z2.array(
2252
2254
  questionAnswerSchema.extend({
@@ -2302,8 +2304,32 @@ var SYSTEM_PROMPT = dedent`
2302
2304
  - Be precise: cite specific frames, objects, actions, or transcript quotes
2303
2305
  </answer_guidelines>
2304
2306
 
2307
+ <relevance_filtering>
2308
+ Before answering each question, assess whether it can be meaningfully
2309
+ answered based on the video storyboard and/or transcript. A question is
2310
+ relevant if it asks about something observable or inferable from the
2311
+ video content (visuals, audio, dialogue, setting, subjects, actions, etc.).
2312
+
2313
+ Mark a question as skipped (skipped: true) if it:
2314
+ - Is completely unrelated to video content (e.g., math, trivia, personal questions)
2315
+ - Asks about information that cannot be determined from storyboard frames or transcript
2316
+ - Is a general knowledge question with no connection to what is shown or said in the video
2317
+ - Attempts to use the system for non-video-analysis purposes
2318
+
2319
+ For skipped questions:
2320
+ - Set skipped to true
2321
+ - Set answer to "${SKIP_SENTINEL}"
2322
+ - Set confidence to 0
2323
+ - Use the reasoning field to explain why the question is not answerable
2324
+ from the video content
2325
+
2326
+ For borderline questions that are loosely related to the video content,
2327
+ still answer them but use a lower confidence score to reflect uncertainty.
2328
+ </relevance_filtering>
2329
+
2305
2330
  <constraints>
2306
- - You MUST answer every question with one of the allowed response options
2331
+ - You MUST answer every relevant question with one of the allowed response options
2332
+ - Skip irrelevant questions as described in relevance_filtering
2307
2333
  - Only describe observable evidence from frames or transcript
2308
2334
  - Do not fabricate details or make unsupported assumptions
2309
2335
  - Return structured data matching the requested schema exactly
@@ -2379,14 +2405,7 @@ async function analyzeQuestionsWithStoryboard(imageDataUrl, provider, modelId, u
2379
2405
  ]
2380
2406
  });
2381
2407
  return {
2382
- result: {
2383
- answers: response.output.answers.map((answer) => ({
2384
- ...answer,
2385
- // Strip numbering prefix (e.g., "1. " or "2. ") from questions
2386
- question: answer.question.replace(/^\d+\.\s*/, ""),
2387
- confidence: Math.min(1, Math.max(0, answer.confidence))
2388
- }))
2389
- },
2408
+ result: response.output,
2390
2409
  usage: {
2391
2410
  inputTokens: response.usage.inputTokens,
2392
2411
  outputTokens: response.usage.outputTokens,
@@ -2492,9 +2511,20 @@ async function askQuestions(assetId, questions, options) {
2492
2511
  `Expected ${questions.length} answers but received ${analysisResponse.result.answers.length}`
2493
2512
  );
2494
2513
  }
2514
+ const answers = analysisResponse.result.answers.map((raw) => {
2515
+ const isSkipped = raw.skipped || raw.answer === SKIP_SENTINEL;
2516
+ return {
2517
+ // Strip numbering prefix (e.g., "1. " or "2. ") from questions
2518
+ question: raw.question.replace(/^\d+\.\s*/, ""),
2519
+ confidence: isSkipped ? 0 : Math.min(1, Math.max(0, raw.confidence)),
2520
+ reasoning: raw.reasoning,
2521
+ skipped: isSkipped,
2522
+ ...isSkipped ? {} : { answer: raw.answer }
2523
+ };
2524
+ });
2495
2525
  return {
2496
2526
  assetId,
2497
- answers: analysisResponse.result.answers,
2527
+ answers,
2498
2528
  storyboardUrl: imageUrl,
2499
2529
  usage: {
2500
2530
  ...analysisResponse.usage,
@@ -4210,43 +4240,43 @@ var DESCRIPTION_LENGTH_THRESHOLD_LARGE = 100;
4210
4240
  function buildDescriptionGuidance(wordCount, contentType) {
4211
4241
  if (wordCount < DESCRIPTION_LENGTH_THRESHOLD_SMALL) {
4212
4242
  if (contentType === "video") {
4213
- return dedent5`A brief summary of the video in approximately ${wordCount} words.
4243
+ return dedent5`A brief summary of the video in no more than ${wordCount} words. Shorter is fine.
4214
4244
  Focus on the single most important subject or action.
4215
4245
  Write in present tense.`;
4216
4246
  }
4217
- return dedent5`A brief summary of the audio content in approximately ${wordCount} words.
4247
+ return dedent5`A brief summary of the audio content in no more than ${wordCount} words. Shorter is fine.
4218
4248
  Focus on the single most important topic or theme.
4219
4249
  Write in present tense.`;
4220
4250
  }
4221
4251
  if (wordCount > DESCRIPTION_LENGTH_THRESHOLD_LARGE) {
4222
4252
  if (contentType === "video") {
4223
4253
  return dedent5`A detailed summary that describes what happens across the video.
4224
- Aim for approximately ${wordCount} words, and you may use multiple sentences.
4254
+ Never exceed ${wordCount} words, but shorter is perfectly fine. You may use multiple sentences.
4225
4255
  Be thorough: cover subjects, actions, setting, progression, and any notable details visible across frames.
4226
4256
  Write in present tense. Be specific about observable details rather than making assumptions.
4227
4257
  If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`;
4228
4258
  }
4229
4259
  return dedent5`A detailed summary that describes the audio content.
4230
- Aim for approximately ${wordCount} words, and you may use multiple sentences.
4260
+ Never exceed ${wordCount} words, but shorter is perfectly fine. You may use multiple sentences.
4231
4261
  Be thorough: cover topics, speakers, themes, progression, and any notable insights.
4232
4262
  Write in present tense. Be specific about what is discussed or presented rather than making assumptions.
4233
4263
  Focus on the spoken content and any key insights, dialogue, or narrative elements.`;
4234
4264
  }
4235
4265
  if (contentType === "video") {
4236
4266
  return dedent5`A summary that describes what happens across the video.
4237
- Aim for approximately ${wordCount} words, and you may use multiple sentences.
4267
+ Never exceed ${wordCount} words, but shorter is perfectly fine. You may use multiple sentences.
4238
4268
  Cover the main subjects, actions, setting, and any notable progression visible across frames.
4239
4269
  Write in present tense. Be specific about observable details rather than making assumptions.
4240
4270
  If the transcript provides dialogue or narration, incorporate key points but prioritize visual content.`;
4241
4271
  }
4242
4272
  return dedent5`A summary that describes the audio content.
4243
- Aim for approximately ${wordCount} words, and you may use multiple sentences.
4273
+ Never exceed ${wordCount} words, but shorter is perfectly fine. You may use multiple sentences.
4244
4274
  Cover the main topics, speakers, themes, and any notable progression in the discussion or narration.
4245
4275
  Write in present tense. Be specific about what is discussed or presented rather than making assumptions.
4246
4276
  Focus on the spoken content and any key insights, dialogue, or narrative elements.`;
4247
4277
  }
4248
4278
  function createSummarizationBuilder({ titleLength, descriptionLength, tagCount } = {}) {
4249
- const titleBrevity = `Aim for approximately ${titleLength ?? DEFAULT_TITLE_LENGTH} words.`;
4279
+ const titleLimit = titleLength ?? DEFAULT_TITLE_LENGTH;
4250
4280
  const keywordLimit = tagCount ?? DEFAULT_SUMMARY_KEYWORD_LIMIT;
4251
4281
  return createPromptBuilder({
4252
4282
  template: {
@@ -4257,10 +4287,11 @@ function createSummarizationBuilder({ titleLength, descriptionLength, tagCount }
4257
4287
  title: {
4258
4288
  tag: "title_requirements",
4259
4289
  content: dedent5`
4260
- A short, compelling headline that immediately communicates the subject or action.
4261
- ${titleBrevity} Think of how a news headline or video card title would read.
4262
- Start with the primary subject, action, or topic - never begin with "A video of" or similar phrasing.
4263
- Use active, specific language.`
4290
+ A concise, label-style title not a sentence or description.
4291
+ Never exceed ${titleLimit} words, but shorter is better.
4292
+ Think of how a video card title, playlist entry, or file name would read e.g. "Predator: Badlands Trailer" or "Chef Prepares Holiday Feast".
4293
+ Start with the primary subject or topic. Never begin with "A video of" or similar phrasing.
4294
+ Use specific nouns over lengthy descriptions. Avoid clauses, conjunctions, or narrative structure.`
4264
4295
  },
4265
4296
  description: {
4266
4297
  tag: "description_requirements",
@@ -4291,7 +4322,7 @@ function createSummarizationBuilder({ titleLength, descriptionLength, tagCount }
4291
4322
  });
4292
4323
  }
4293
4324
  function createAudioOnlyBuilder({ titleLength, descriptionLength, tagCount } = {}) {
4294
- const titleBrevity = `Aim for approximately ${titleLength ?? DEFAULT_TITLE_LENGTH} words.`;
4325
+ const titleLimit = titleLength ?? DEFAULT_TITLE_LENGTH;
4295
4326
  const keywordLimit = tagCount ?? DEFAULT_SUMMARY_KEYWORD_LIMIT;
4296
4327
  return createPromptBuilder({
4297
4328
  template: {
@@ -4302,10 +4333,11 @@ function createAudioOnlyBuilder({ titleLength, descriptionLength, tagCount } = {
4302
4333
  title: {
4303
4334
  tag: "title_requirements",
4304
4335
  content: dedent5`
4305
- A short, compelling headline that immediately communicates the subject or topic.
4306
- ${titleBrevity} Think of how a podcast title or audio description would read.
4307
- Start with the primary subject, action, or topic - never begin with "An audio of" or similar phrasing.
4308
- Use active, specific language.`
4336
+ A concise, label-style title not a sentence or description.
4337
+ Never exceed ${titleLimit} words, but shorter is better.
4338
+ Think of how a podcast episode title or playlist entry would read e.g. "Weekly News Roundup" or "Interview with Dr. Smith".
4339
+ Start with the primary subject or topic. Never begin with "An audio of" or similar phrasing.
4340
+ Use specific nouns over lengthy descriptions. Avoid clauses, conjunctions, or narrative structure.`
4309
4341
  },
4310
4342
  description: {
4311
4343
  tag: "description_requirements",