@mux/ai 0.8.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -334,6 +334,12 @@ interface ChaptersOptions extends MuxAIOptions {
334
334
  * Defaults to 8.
335
335
  */
336
336
  maxChaptersPerHour?: number;
337
+ /**
338
+ * BCP 47 language code for chapter titles (e.g. "en", "fr", "ja").
339
+ * When omitted, auto-detects from the transcript track's language.
340
+ * Falls back to unconstrained (LLM decides) if no language metadata is available.
341
+ */
342
+ outputLanguageCode?: string;
337
343
  }
338
344
  /**
339
345
  * Sections of the chaptering system prompt that can be overridden.
@@ -529,6 +535,12 @@ interface SummarizationOptions extends MuxAIOptions {
529
535
  descriptionLength?: number;
530
536
  /** Desired number of tags. */
531
537
  tagCount?: number;
538
+ /**
539
+ * BCP 47 language code for the output (e.g. "en", "fr", "ja").
540
+ * When omitted, auto-detects from the transcript track's language.
541
+ * Falls back to unconstrained (LLM decides) if no language metadata is available.
542
+ */
543
+ outputLanguageCode?: string;
532
544
  }
533
545
  declare function getSummaryAndTags(assetId: string, options?: SummarizationOptions): Promise<SummaryAndTagsResult>;
534
546
 
package/dist/index.d.ts CHANGED
@@ -2,14 +2,14 @@ import { W as WorkflowCredentials, S as StoragePutObjectInput, a as StoragePresi
2
2
  export { A as AssetTextTrack, C as ChunkEmbedding, b as ChunkingStrategy, E as Encrypted, c as EncryptedPayload, I as ImageSubmissionMode, M as MuxAIOptions, d as MuxAsset, P as PlaybackAsset, e as PlaybackPolicy, f as StorageAdapter, T as TextChunk, g as TokenChunkingConfig, h as TokenUsage, i as ToneType, U as UsageMetadata, V as VTTChunkingConfig, j as VideoEmbeddingsResult, k as WorkflowCredentialsInput, l as WorkflowMuxClient, m as decryptFromWorkflow, n as encryptForWorkflow } from './types-BRbaGW3t.js';
3
3
  import { WORKFLOW_SERIALIZE, WORKFLOW_DESERIALIZE } from '@workflow/serde';
4
4
  export { i as primitives } from './index-Nxf6BaBO.js';
5
- export { i as workflows } from './index-DP02N3iR.js';
5
+ export { i as workflows } from './index-CkJStzYO.js';
6
6
  import '@mux/mux-node';
7
7
  import 'zod';
8
8
  import '@ai-sdk/anthropic';
9
9
  import '@ai-sdk/google';
10
10
  import '@ai-sdk/openai';
11
11
 
12
- var version = "0.8.1";
12
+ var version = "0.9.0";
13
13
 
14
14
  /**
15
15
  * A function that returns workflow credentials, either synchronously or asynchronously.
package/dist/index.js CHANGED
@@ -5,7 +5,7 @@ var __export = (target, all) => {
5
5
  };
6
6
 
7
7
  // package.json
8
- var version = "0.8.1";
8
+ var version = "0.9.0";
9
9
 
10
10
  // src/env.ts
11
11
  import { z } from "zod";
@@ -1718,6 +1718,12 @@ function createToneSection(instruction) {
1718
1718
  content: instruction
1719
1719
  };
1720
1720
  }
1721
+ function createLanguageSection(languageName) {
1722
+ return {
1723
+ tag: "language",
1724
+ content: `All output (title, description, keywords, chapter titles) MUST be written in ${languageName}.`
1725
+ };
1726
+ }
1721
1727
 
1722
1728
  // src/lib/retry.ts
1723
1729
  var DEFAULT_RETRY_OPTIONS = {
@@ -1828,6 +1834,7 @@ var SYSTEM_PROMPT = dedent`
1828
1834
  - Only describe observable evidence from frames or transcript
1829
1835
  - Do not fabricate details or make unsupported assumptions
1830
1836
  - Return structured data matching the requested schema exactly
1837
+ - Provide reasoning in the same language as the question
1831
1838
  </constraints>
1832
1839
 
1833
1840
  <language_guidelines>
@@ -2228,6 +2235,166 @@ async function hasBurnedInCaptions(assetId, options = {}) {
2228
2235
  import { generateText as generateText3, Output as Output3 } from "ai";
2229
2236
  import dedent3 from "dedent";
2230
2237
  import { z as z4 } from "zod";
2238
+
2239
+ // src/lib/language-codes.ts
2240
+ var ISO639_1_TO_3 = {
2241
+ // Major world languages
2242
+ en: "eng",
2243
+ // English
2244
+ es: "spa",
2245
+ // Spanish
2246
+ fr: "fra",
2247
+ // French
2248
+ de: "deu",
2249
+ // German
2250
+ it: "ita",
2251
+ // Italian
2252
+ pt: "por",
2253
+ // Portuguese
2254
+ ru: "rus",
2255
+ // Russian
2256
+ zh: "zho",
2257
+ // Chinese
2258
+ ja: "jpn",
2259
+ // Japanese
2260
+ ko: "kor",
2261
+ // Korean
2262
+ ar: "ara",
2263
+ // Arabic
2264
+ hi: "hin",
2265
+ // Hindi
2266
+ // European languages
2267
+ nl: "nld",
2268
+ // Dutch
2269
+ pl: "pol",
2270
+ // Polish
2271
+ sv: "swe",
2272
+ // Swedish
2273
+ da: "dan",
2274
+ // Danish
2275
+ no: "nor",
2276
+ // Norwegian
2277
+ fi: "fin",
2278
+ // Finnish
2279
+ el: "ell",
2280
+ // Greek
2281
+ cs: "ces",
2282
+ // Czech
2283
+ hu: "hun",
2284
+ // Hungarian
2285
+ ro: "ron",
2286
+ // Romanian
2287
+ bg: "bul",
2288
+ // Bulgarian
2289
+ hr: "hrv",
2290
+ // Croatian
2291
+ sk: "slk",
2292
+ // Slovak
2293
+ sl: "slv",
2294
+ // Slovenian
2295
+ uk: "ukr",
2296
+ // Ukrainian
2297
+ tr: "tur",
2298
+ // Turkish
2299
+ // Asian languages
2300
+ th: "tha",
2301
+ // Thai
2302
+ vi: "vie",
2303
+ // Vietnamese
2304
+ id: "ind",
2305
+ // Indonesian
2306
+ ms: "msa",
2307
+ // Malay
2308
+ tl: "tgl",
2309
+ // Tagalog/Filipino
2310
+ // Other languages
2311
+ he: "heb",
2312
+ // Hebrew
2313
+ fa: "fas",
2314
+ // Persian/Farsi
2315
+ bn: "ben",
2316
+ // Bengali
2317
+ ta: "tam",
2318
+ // Tamil
2319
+ te: "tel",
2320
+ // Telugu
2321
+ mr: "mar",
2322
+ // Marathi
2323
+ gu: "guj",
2324
+ // Gujarati
2325
+ kn: "kan",
2326
+ // Kannada
2327
+ ml: "mal",
2328
+ // Malayalam
2329
+ pa: "pan",
2330
+ // Punjabi
2331
+ ur: "urd",
2332
+ // Urdu
2333
+ sw: "swa",
2334
+ // Swahili
2335
+ af: "afr",
2336
+ // Afrikaans
2337
+ ca: "cat",
2338
+ // Catalan
2339
+ eu: "eus",
2340
+ // Basque
2341
+ gl: "glg",
2342
+ // Galician
2343
+ is: "isl",
2344
+ // Icelandic
2345
+ et: "est",
2346
+ // Estonian
2347
+ lv: "lav",
2348
+ // Latvian
2349
+ lt: "lit"
2350
+ // Lithuanian
2351
+ };
2352
+ var ISO639_3_TO_1 = Object.fromEntries(
2353
+ Object.entries(ISO639_1_TO_3).map(([iso1, iso3]) => [iso3, iso1])
2354
+ );
2355
+ function toISO639_3(code) {
2356
+ const normalized = code.toLowerCase().trim();
2357
+ if (normalized.length === 3) {
2358
+ return normalized;
2359
+ }
2360
+ return ISO639_1_TO_3[normalized] ?? normalized;
2361
+ }
2362
+ function toISO639_1(code) {
2363
+ const normalized = code.toLowerCase().trim();
2364
+ if (normalized.length === 2) {
2365
+ return normalized;
2366
+ }
2367
+ return ISO639_3_TO_1[normalized] ?? normalized;
2368
+ }
2369
+ function getLanguageCodePair(code) {
2370
+ const normalized = code.toLowerCase().trim();
2371
+ if (normalized.length === 2) {
2372
+ return {
2373
+ iso639_1: normalized,
2374
+ iso639_3: toISO639_3(normalized)
2375
+ };
2376
+ } else if (normalized.length === 3) {
2377
+ return {
2378
+ iso639_1: toISO639_1(normalized),
2379
+ iso639_3: normalized
2380
+ };
2381
+ }
2382
+ return {
2383
+ iso639_1: normalized,
2384
+ iso639_3: normalized
2385
+ };
2386
+ }
2387
+ function getLanguageName(code) {
2388
+ const iso639_1 = toISO639_1(code);
2389
+ try {
2390
+ const displayNames = new Intl.DisplayNames(["en"], { type: "language" });
2391
+ return displayNames.of(iso639_1) ?? code.toUpperCase();
2392
+ } catch {
2393
+ return code.toUpperCase();
2394
+ }
2395
+ }
2396
+
2397
+ // src/workflows/chapters.ts
2231
2398
  var chapterSchema = z4.object({
2232
2399
  startTime: z4.number(),
2233
2400
  title: z4.string()
@@ -2288,7 +2455,8 @@ var chapterSystemPromptBuilder = createPromptBuilder({
2288
2455
  content: dedent3`
2289
2456
  - Only use information present in the transcript
2290
2457
  - Return structured data that matches the requested JSON schema
2291
- - Do not add commentary or extra text outside the JSON`
2458
+ - Do not add commentary or extra text outside the JSON
2459
+ - When a <language> section is provided, all chapter titles MUST be written in that language`
2292
2460
  },
2293
2461
  qualityGuidelines: {
2294
2462
  tag: "quality_guidelines",
@@ -2336,7 +2504,7 @@ var chaptersPromptBuilder = createPromptBuilder({
2336
2504
  content: dedent3`
2337
2505
  - Keep titles concise and descriptive
2338
2506
  - Avoid filler or generic labels like "Chapter 1"
2339
- - Use the transcript's language and terminology`
2507
+ - Use the transcript's terminology`
2340
2508
  }
2341
2509
  },
2342
2510
  sectionOrder: ["task", "outputFormat", "chapterGuidelines", "titleGuidelines"]
@@ -2345,7 +2513,8 @@ function buildUserPrompt3({
2345
2513
  timestampedTranscript,
2346
2514
  promptOverrides,
2347
2515
  minChaptersPerHour = 3,
2348
- maxChaptersPerHour = 8
2516
+ maxChaptersPerHour = 8,
2517
+ languageName
2349
2518
  }) {
2350
2519
  const contextSections = [
2351
2520
  {
@@ -2354,6 +2523,9 @@ function buildUserPrompt3({
2354
2523
  attributes: { format: "seconds" }
2355
2524
  }
2356
2525
  ];
2526
+ if (languageName) {
2527
+ contextSections.push(createLanguageSection(languageName));
2528
+ }
2357
2529
  const dynamicChapterGuidelines = dedent3`
2358
2530
  - Create at least ${minChaptersPerHour} and at most ${maxChaptersPerHour} chapters per hour of content
2359
2531
  - Use start times in seconds (not HH:MM:SS)
@@ -2373,7 +2545,8 @@ async function generateChapters(assetId, languageCode, options = {}) {
2373
2545
  promptOverrides,
2374
2546
  minChaptersPerHour,
2375
2547
  maxChaptersPerHour,
2376
- credentials
2548
+ credentials,
2549
+ outputLanguageCode
2377
2550
  } = options;
2378
2551
  const modelConfig = resolveLanguageModelConfig({
2379
2552
  ...options,
@@ -2417,11 +2590,14 @@ async function generateChapters(assetId, languageCode, options = {}) {
2417
2590
  const contentLabel = isAudioOnly ? "transcript" : "caption track";
2418
2591
  throw new Error(`No usable content found in ${contentLabel}`);
2419
2592
  }
2593
+ const resolvedLanguageCode = outputLanguageCode && outputLanguageCode !== "auto" ? outputLanguageCode : transcriptResult.track?.language_code ?? languageCode;
2594
+ const languageName = resolvedLanguageCode ? getLanguageName(resolvedLanguageCode) : void 0;
2420
2595
  const userPrompt = buildUserPrompt3({
2421
2596
  timestampedTranscript,
2422
2597
  promptOverrides,
2423
2598
  minChaptersPerHour,
2424
- maxChaptersPerHour
2599
+ maxChaptersPerHour,
2600
+ languageName
2425
2601
  });
2426
2602
  let chaptersData = null;
2427
2603
  try {
@@ -3256,6 +3432,7 @@ var SYSTEM_PROMPT3 = dedent4`
3256
3432
  - Do not fabricate details or make unsupported assumptions
3257
3433
  - Return structured data matching the requested schema
3258
3434
  - Output only the JSON object; no markdown or extra text
3435
+ - When a <language> section is provided, all output text MUST be written in that language
3259
3436
  </constraints>
3260
3437
 
3261
3438
  <tone_guidance>
@@ -3310,6 +3487,7 @@ var AUDIO_ONLY_SYSTEM_PROMPT = dedent4`
3310
3487
  - Return structured data matching the requested schema
3311
3488
  - Focus entirely on audio/spoken content - there are no visual elements
3312
3489
  - Output only the JSON object; no markdown or extra text
3490
+ - When a <language> section is provided, all output text MUST be written in that language
3313
3491
  </constraints>
3314
3492
 
3315
3493
  <tone_guidance>
@@ -3340,9 +3518,13 @@ function buildUserPrompt4({
3340
3518
  isAudioOnly = false,
3341
3519
  titleLength,
3342
3520
  descriptionLength,
3343
- tagCount
3521
+ tagCount,
3522
+ languageName
3344
3523
  }) {
3345
3524
  const contextSections = [createToneSection(TONE_INSTRUCTIONS[tone])];
3525
+ if (languageName) {
3526
+ contextSections.push(createLanguageSection(languageName));
3527
+ }
3346
3528
  if (transcriptText) {
3347
3529
  const format = isCleanTranscript ? "plain text" : "WebVTT";
3348
3530
  contextSections.push(createTranscriptSection(transcriptText, format));
@@ -3455,7 +3637,8 @@ async function getSummaryAndTags(assetId, options) {
3455
3637
  credentials,
3456
3638
  titleLength,
3457
3639
  descriptionLength,
3458
- tagCount
3640
+ tagCount,
3641
+ outputLanguageCode
3459
3642
  } = options ?? {};
3460
3643
  if (!VALID_TONES.includes(tone)) {
3461
3644
  throw new Error(
@@ -3482,12 +3665,15 @@ async function getSummaryAndTags(assetId, options) {
3482
3665
  "Signed playback ID requires signing credentials. Set MUX_SIGNING_KEY and MUX_PRIVATE_KEY environment variables."
3483
3666
  );
3484
3667
  }
3485
- const transcriptText = includeTranscript ? (await fetchTranscriptForAsset(assetData, playbackId, {
3668
+ const transcriptResult = includeTranscript ? await fetchTranscriptForAsset(assetData, playbackId, {
3486
3669
  cleanTranscript,
3487
3670
  shouldSign: policy === "signed",
3488
3671
  credentials: workflowCredentials,
3489
3672
  required: isAudioOnly
3490
- })).transcriptText : "";
3673
+ }) : void 0;
3674
+ const transcriptText = transcriptResult?.transcriptText ?? "";
3675
+ const resolvedLanguageCode = outputLanguageCode && outputLanguageCode !== "auto" ? outputLanguageCode : transcriptResult?.track?.language_code ?? getReadyTextTracks(assetData)[0]?.language_code;
3676
+ const languageName = resolvedLanguageCode ? getLanguageName(resolvedLanguageCode) : void 0;
3491
3677
  const userPrompt = buildUserPrompt4({
3492
3678
  tone,
3493
3679
  transcriptText,
@@ -3496,7 +3682,8 @@ async function getSummaryAndTags(assetId, options) {
3496
3682
  isAudioOnly,
3497
3683
  titleLength,
3498
3684
  descriptionLength,
3499
- tagCount
3685
+ tagCount,
3686
+ languageName
3500
3687
  });
3501
3688
  let analysisResponse;
3502
3689
  let imageUrl;
@@ -3566,164 +3753,6 @@ async function getSummaryAndTags(assetId, options) {
3566
3753
  };
3567
3754
  }
3568
3755
 
3569
- // src/lib/language-codes.ts
3570
- var ISO639_1_TO_3 = {
3571
- // Major world languages
3572
- en: "eng",
3573
- // English
3574
- es: "spa",
3575
- // Spanish
3576
- fr: "fra",
3577
- // French
3578
- de: "deu",
3579
- // German
3580
- it: "ita",
3581
- // Italian
3582
- pt: "por",
3583
- // Portuguese
3584
- ru: "rus",
3585
- // Russian
3586
- zh: "zho",
3587
- // Chinese
3588
- ja: "jpn",
3589
- // Japanese
3590
- ko: "kor",
3591
- // Korean
3592
- ar: "ara",
3593
- // Arabic
3594
- hi: "hin",
3595
- // Hindi
3596
- // European languages
3597
- nl: "nld",
3598
- // Dutch
3599
- pl: "pol",
3600
- // Polish
3601
- sv: "swe",
3602
- // Swedish
3603
- da: "dan",
3604
- // Danish
3605
- no: "nor",
3606
- // Norwegian
3607
- fi: "fin",
3608
- // Finnish
3609
- el: "ell",
3610
- // Greek
3611
- cs: "ces",
3612
- // Czech
3613
- hu: "hun",
3614
- // Hungarian
3615
- ro: "ron",
3616
- // Romanian
3617
- bg: "bul",
3618
- // Bulgarian
3619
- hr: "hrv",
3620
- // Croatian
3621
- sk: "slk",
3622
- // Slovak
3623
- sl: "slv",
3624
- // Slovenian
3625
- uk: "ukr",
3626
- // Ukrainian
3627
- tr: "tur",
3628
- // Turkish
3629
- // Asian languages
3630
- th: "tha",
3631
- // Thai
3632
- vi: "vie",
3633
- // Vietnamese
3634
- id: "ind",
3635
- // Indonesian
3636
- ms: "msa",
3637
- // Malay
3638
- tl: "tgl",
3639
- // Tagalog/Filipino
3640
- // Other languages
3641
- he: "heb",
3642
- // Hebrew
3643
- fa: "fas",
3644
- // Persian/Farsi
3645
- bn: "ben",
3646
- // Bengali
3647
- ta: "tam",
3648
- // Tamil
3649
- te: "tel",
3650
- // Telugu
3651
- mr: "mar",
3652
- // Marathi
3653
- gu: "guj",
3654
- // Gujarati
3655
- kn: "kan",
3656
- // Kannada
3657
- ml: "mal",
3658
- // Malayalam
3659
- pa: "pan",
3660
- // Punjabi
3661
- ur: "urd",
3662
- // Urdu
3663
- sw: "swa",
3664
- // Swahili
3665
- af: "afr",
3666
- // Afrikaans
3667
- ca: "cat",
3668
- // Catalan
3669
- eu: "eus",
3670
- // Basque
3671
- gl: "glg",
3672
- // Galician
3673
- is: "isl",
3674
- // Icelandic
3675
- et: "est",
3676
- // Estonian
3677
- lv: "lav",
3678
- // Latvian
3679
- lt: "lit"
3680
- // Lithuanian
3681
- };
3682
- var ISO639_3_TO_1 = Object.fromEntries(
3683
- Object.entries(ISO639_1_TO_3).map(([iso1, iso3]) => [iso3, iso1])
3684
- );
3685
- function toISO639_3(code) {
3686
- const normalized = code.toLowerCase().trim();
3687
- if (normalized.length === 3) {
3688
- return normalized;
3689
- }
3690
- return ISO639_1_TO_3[normalized] ?? normalized;
3691
- }
3692
- function toISO639_1(code) {
3693
- const normalized = code.toLowerCase().trim();
3694
- if (normalized.length === 2) {
3695
- return normalized;
3696
- }
3697
- return ISO639_3_TO_1[normalized] ?? normalized;
3698
- }
3699
- function getLanguageCodePair(code) {
3700
- const normalized = code.toLowerCase().trim();
3701
- if (normalized.length === 2) {
3702
- return {
3703
- iso639_1: normalized,
3704
- iso639_3: toISO639_3(normalized)
3705
- };
3706
- } else if (normalized.length === 3) {
3707
- return {
3708
- iso639_1: toISO639_1(normalized),
3709
- iso639_3: normalized
3710
- };
3711
- }
3712
- return {
3713
- iso639_1: normalized,
3714
- iso639_3: normalized
3715
- };
3716
- }
3717
- function getLanguageName(code) {
3718
- const iso639_1 = toISO639_1(code);
3719
- try {
3720
- const displayNames = new Intl.DisplayNames(["en"], { type: "language" });
3721
- return displayNames.of(iso639_1) ?? code.toUpperCase();
3722
- } catch {
3723
- return code.toUpperCase();
3724
- }
3725
- }
3726
-
3727
3756
  // src/lib/storage-adapter.ts
3728
3757
  function requireCredentials(accessKeyId, secretAccessKey) {
3729
3758
  if (!accessKeyId || !secretAccessKey) {