@mux/ai 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +671 -0
  3. package/package.json +49 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Mux, Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,671 @@
1
+ # @mux/ai
2
+
3
+ AI-powered video analysis library for Mux, built in TypeScript.
4
+
5
+ ## Available Tools
6
+
7
+ | Function | Description | Providers | Default Models | Input | Output |
8
+ |----------|-------------|-----------|----------------|--------|--------|
9
+ | `getSummaryAndTags` | Generate titles, descriptions, and tags from a Mux video asset | OpenAI, Anthropic | `gpt-4o-mini`, `claude-3-5-haiku-20241022` | Asset ID + options | Title, description, tags, storyboard URL |
10
+ | `getModerationScores` | Analyze video thumbnails for inappropriate content | OpenAI, Hive | `omni-moderation-latest`, Hive Visual API | Asset ID + thresholds | Sexual/violence scores, flagged status |
11
+ | `hasBurnedInCaptions` | Detect burned-in captions (hardcoded subtitles) in video frames | OpenAI, Anthropic | `gpt-4o-mini`, `claude-3-5-haiku-20241022` | Asset ID + options | Boolean result, confidence, language |
12
+ | `generateChapters` | Generate AI-powered chapter markers from video captions | OpenAI, Anthropic | `gpt-4o-mini`, `claude-3-5-haiku-20241022` | Asset ID + language + options | Timestamped chapter list |
13
+ | `translateCaptions` | Translate video captions to different languages | Anthropic only | `claude-sonnet-4-20250514` | Asset ID + languages + S3 config | Translated VTT + Mux track ID |
14
+ | `translateAudio` | Create AI-dubbed audio tracks in different languages | ElevenLabs only | ElevenLabs Dubbing API | Asset ID + languages + S3 config | Dubbed audio + Mux track ID |
15
+
16
+ ## Features
17
+
18
+ - **Cost-Effective by Default**: Uses affordable models like `gpt-4o-mini` and `claude-3-5-haiku` to keep analysis costs low while maintaining high quality results
19
+ - **Multi-modal Analysis**: Combines storyboard images with video transcripts
20
+ - **Tone Control**: Normal, sassy, or professional analysis styles (summarization only)
21
+ - **Configurable Thresholds**: Custom sensitivity levels for content moderation
22
+ - **TypeScript**: Fully typed for excellent developer experience
23
+ - **Provider Choice**: Switch between OpenAI and Anthropic for different perspectives
24
+ - **Universal Language Support**: Automatic language name detection using `Intl.DisplayNames` for all ISO 639-1 codes
25
+
26
+ ## Installation
27
+
28
+ ```bash
29
+ npm install @mux/ai
30
+ ```
31
+
32
+ ## Quick Start
33
+
34
+ ### Video Summarization
35
+
36
+ ```typescript
37
+ import { getSummaryAndTags } from '@mux/ai';
38
+
39
+ // Uses built-in optimized prompt
40
+ const result = await getSummaryAndTags('your-mux-asset-id', {
41
+ tone: 'professional'
42
+ });
43
+
44
+ console.log(result.title); // Short, descriptive title
45
+ console.log(result.description); // Detailed description
46
+ console.log(result.tags); // Array of relevant keywords
47
+ console.log(result.storyboardUrl); // URL to Mux storyboard
48
+
49
+ // Use base64 mode for improved reliability (works with both OpenAI and Anthropic)
50
+ const reliableResult = await getSummaryAndTags('your-mux-asset-id', {
51
+ provider: 'anthropic',
52
+ imageSubmissionMode: 'base64', // Uses Files API for Anthropic, base64 for OpenAI
53
+ imageDownloadOptions: {
54
+ timeout: 15000,
55
+ retries: 2,
56
+ retryDelay: 1000
57
+ },
58
+ tone: 'professional'
59
+ });
60
+ ```
61
+
62
+ ### Content Moderation
63
+
64
+ ```typescript
65
+ import { getModerationScores } from '@mux/ai';
66
+
67
+ // Analyze Mux video asset for inappropriate content (OpenAI default)
68
+ const result = await getModerationScores('your-mux-asset-id', {
69
+ thresholds: { sexual: 0.7, violence: 0.8 }
70
+ });
71
+
72
+ console.log(result.maxScores); // Highest scores across all thumbnails
73
+ console.log(result.exceedsThreshold); // true if content should be flagged
74
+ console.log(result.thumbnailScores); // Individual thumbnail results
75
+
76
+ // Or use Hive for moderation
77
+ const hiveResult = await getModerationScores('your-mux-asset-id', {
78
+ provider: 'hive',
79
+ thresholds: { sexual: 0.7, violence: 0.8 }
80
+ });
81
+
82
+ // Use base64 submission for improved reliability (downloads images locally)
83
+ const reliableResult = await getModerationScores('your-mux-asset-id', {
84
+ provider: 'openai',
85
+ imageSubmissionMode: 'base64',
86
+ imageDownloadOptions: {
87
+ timeout: 15000,
88
+ retries: 3,
89
+ retryDelay: 1000
90
+ }
91
+ });
92
+
93
+ // Hive also supports base64 mode (uses multipart upload)
94
+ const hiveReliableResult = await getModerationScores('your-mux-asset-id', {
95
+ provider: 'hive',
96
+ imageSubmissionMode: 'base64',
97
+ imageDownloadOptions: {
98
+ timeout: 15000,
99
+ retries: 2,
100
+ retryDelay: 1000
101
+ }
102
+ });
103
+ ```
104
+
105
+ ### Burned-in Caption Detection
106
+
107
+ ```typescript
108
+ import { hasBurnedInCaptions } from '@mux/ai';
109
+
110
+ // Detect burned-in captions (hardcoded subtitles) in video frames
111
+ const result = await hasBurnedInCaptions('your-mux-asset-id', {
112
+ provider: 'openai'
113
+ });
114
+
115
+ console.log(result.hasBurnedInCaptions); // true/false
116
+ console.log(result.confidence); // 0.0-1.0 confidence score
117
+ console.log(result.detectedLanguage); // Language if captions detected
118
+ console.log(result.storyboardUrl); // Video storyboard analyzed
119
+
120
+ // Compare providers
121
+ const anthropicResult = await hasBurnedInCaptions('your-mux-asset-id', {
122
+ provider: 'anthropic',
123
+ model: 'claude-3-5-haiku-20241022'
124
+ });
125
+
126
+ // Use base64 mode for improved reliability
127
+ const reliableResult = await hasBurnedInCaptions('your-mux-asset-id', {
128
+ provider: 'openai',
129
+ imageSubmissionMode: 'base64',
130
+ imageDownloadOptions: {
131
+ timeout: 15000,
132
+ retries: 3,
133
+ retryDelay: 1000
134
+ }
135
+ });
136
+ ```
137
+
138
+ #### Image Submission Modes
139
+
140
+ Choose between two methods for submitting images to AI providers:
141
+
142
+ **URL Mode (Default):**
143
+ - Fast initial response
144
+ - Lower bandwidth usage
145
+ - Relies on AI provider's image downloading
146
+ - May encounter timeouts with slow/unreliable image sources
147
+
148
+ **Base64 Mode (Recommended for Production):**
149
+ - Downloads images locally with robust retry logic
150
+ - Eliminates AI provider timeout issues
151
+ - Better control over slow TTFB and network issues
152
+ - Slightly higher bandwidth usage but more reliable results
153
+ - For OpenAI: submits images as base64 data URIs
154
+ - For Hive: uploads images via multipart/form-data (Hive doesn't support base64 data URIs)
155
+ - For Anthropic (summarization): uploads to Files API then references by file_id (no size limit)
156
+
157
+ ```typescript
158
+ // High reliability mode - recommended for production
159
+ const result = await getModerationScores(assetId, {
160
+ imageSubmissionMode: 'base64',
161
+ imageDownloadOptions: {
162
+ timeout: 15000, // 15s timeout per image
163
+ retries: 3, // Retry failed downloads 3x
164
+ retryDelay: 1000, // 1s base delay with exponential backoff
165
+ exponentialBackoff: true
166
+ }
167
+ });
168
+ ```
169
+
170
+ ### Caption Translation
171
+
172
+ ```typescript
173
+ import { translateCaptions } from '@mux/ai';
174
+
175
+ // Translate existing captions to Spanish and add as new track
176
+ const result = await translateCaptions(
177
+ 'your-mux-asset-id',
178
+ 'en', // from language
179
+ 'es', // to language
180
+ {
181
+ provider: 'anthropic',
182
+ model: 'claude-sonnet-4-20250514'
183
+ }
184
+ );
185
+
186
+ console.log(result.uploadedTrackId); // New Mux track ID
187
+ console.log(result.presignedUrl); // S3 file URL
188
+ console.log(result.translatedVtt); // Translated VTT content
189
+ ```
190
+
191
+ ### Video Chapters
192
+
193
+ ```typescript
194
+ import { generateChapters } from '@mux/ai';
195
+
196
+ // Generate AI-powered chapters from video captions
197
+ const result = await generateChapters('your-mux-asset-id', 'en', {
198
+ provider: 'openai'
199
+ });
200
+
201
+ console.log(result.chapters); // Array of {startTime: number, title: string}
202
+
203
+ // Use with Mux Player
204
+ const player = document.querySelector('mux-player');
205
+ player.addChapters(result.chapters);
206
+
207
+ // Compare providers
208
+ const anthropicResult = await generateChapters('your-mux-asset-id', 'en', {
209
+ provider: 'anthropic',
210
+ model: 'claude-3-5-haiku-20241022'
211
+ });
212
+ ```
213
+
214
+ ### Audio Dubbing
215
+
216
+ ```typescript
217
+ import { translateAudio } from '@mux/ai';
218
+
219
+ // Create AI-dubbed audio track and add to Mux asset
220
+ // Uses the default audio track on your asset, language is auto-detected
221
+ const result = await translateAudio(
222
+ 'your-mux-asset-id',
223
+ 'es', // target language
224
+ {
225
+ provider: 'elevenlabs',
226
+ numSpeakers: 0 // Auto-detect speakers
227
+ }
228
+ );
229
+
230
+ console.log(result.dubbingId); // ElevenLabs dubbing job ID
231
+ console.log(result.uploadedTrackId); // New Mux audio track ID
232
+ console.log(result.presignedUrl); // S3 audio file URL
233
+ ```
234
+
235
+ ### Compare Summarization from Providers
236
+
237
+ ```typescript
238
+ import { getSummaryAndTags } from '@mux/ai';
239
+
240
+ // Compare different AI providers analyzing the same Mux video asset
241
+ const assetId = 'your-mux-asset-id';
242
+
243
+ // OpenAI analysis (default: gpt-4o-mini)
244
+ const openaiResult = await getSummaryAndTags(assetId, {
245
+ provider: 'openai',
246
+ tone: 'professional'
247
+ });
248
+
249
+ // Anthropic analysis (default: claude-3-5-haiku-20241022)
250
+ const anthropicResult = await getSummaryAndTags(assetId, {
251
+ provider: 'anthropic',
252
+ tone: 'professional'
253
+ });
254
+
255
+ // Compare results
256
+ console.log('OpenAI:', openaiResult.title);
257
+ console.log('Anthropic:', anthropicResult.title);
258
+ ```
259
+
260
+ ## Configuration
261
+
262
+ Set environment variables:
263
+
264
+ ```bash
265
+ MUX_TOKEN_ID=your_mux_token_id
266
+ MUX_TOKEN_SECRET=your_mux_token_secret
267
+ OPENAI_API_KEY=your_openai_api_key
268
+ ANTHROPIC_API_KEY=your_anthropic_api_key
269
+ ELEVENLABS_API_KEY=your_elevenlabs_api_key
270
+ HIVE_API_KEY=your_hive_api_key
271
+
272
+ # S3-Compatible Storage (required for translation & audio dubbing)
273
+ S3_ENDPOINT=https://your-s3-endpoint.com
274
+ S3_REGION=auto
275
+ S3_BUCKET=your-bucket-name
276
+ S3_ACCESS_KEY_ID=your-access-key
277
+ S3_SECRET_ACCESS_KEY=your-secret-key
278
+ ```
279
+
280
+ Or pass credentials directly:
281
+
282
+ ```typescript
283
+ const result = await getSummaryAndTags(assetId, {
284
+ muxTokenId: 'your-token-id',
285
+ muxTokenSecret: 'your-token-secret',
286
+ openaiApiKey: 'your-openai-key'
287
+ });
288
+ ```
289
+
290
+ ## API Reference
291
+
292
+ ### `getSummaryAndTags(assetId, options?)`
293
+
294
+ Analyzes a Mux video asset and returns AI-generated metadata.
295
+
296
+ **Parameters:**
297
+ - `assetId` (string) - Mux video asset ID
298
+ - `options` (optional) - Configuration options
299
+
300
+ **Options:**
301
+ - `provider?: 'openai' | 'anthropic'` - AI provider (default: 'openai')
302
+ - `tone?: 'normal' | 'sassy' | 'professional'` - Analysis tone (default: 'normal')
303
+ - `model?: string` - AI model to use (default: 'gpt-4o-mini' for OpenAI, 'claude-3-5-haiku-20241022' for Anthropic)
304
+ - `includeTranscript?: boolean` - Include video transcript in analysis (default: true)
305
+ - `cleanTranscript?: boolean` - Remove VTT timestamps and formatting from transcript (default: true)
306
+ - `imageSubmissionMode?: 'url' | 'base64'` - How to submit storyboard to AI providers (default: 'url')
307
+ - `imageDownloadOptions?: object` - Options for image download when using base64 mode
308
+ - `timeout?: number` - Request timeout in milliseconds (default: 10000)
309
+ - `retries?: number` - Maximum retry attempts (default: 3)
310
+ - `retryDelay?: number` - Base delay between retries in milliseconds (default: 1000)
311
+ - `maxRetryDelay?: number` - Maximum delay between retries in milliseconds (default: 10000)
312
+ - `exponentialBackoff?: boolean` - Whether to use exponential backoff (default: true)
313
+ - `muxTokenId?: string` - Mux API token ID
314
+ - `muxTokenSecret?: string` - Mux API token secret
315
+ - `openaiApiKey?: string` - OpenAI API key
316
+ - `anthropicApiKey?: string` - Anthropic API key
317
+
318
+ **Returns:**
319
+ ```typescript
320
+ {
321
+ assetId: string;
322
+ title: string; // Short title (max 100 chars)
323
+ description: string; // Detailed description
324
+ tags: string[]; // Relevant keywords
325
+ storyboardUrl: string; // Video storyboard URL
326
+ }
327
+ ```
328
+
329
+ ### `getModerationScores(assetId, options?)`
330
+
331
+ Analyzes video thumbnails for inappropriate content using OpenAI's moderation API or Hive's Visual Moderation API.
332
+
333
+ **Parameters:**
334
+ - `assetId` (string) - Mux video asset ID
335
+ - `options` (optional) - Configuration options
336
+
337
+ **Options:**
338
+ - `provider?: 'openai' | 'hive'` - Moderation provider (default: 'openai')
339
+ - `model?: string` - OpenAI model to use (default: 'omni-moderation-latest')
340
+ - `thresholds?: { sexual?: number; violence?: number }` - Custom thresholds (default: {sexual: 0.7, violence: 0.8})
341
+ - `thumbnailInterval?: number` - Seconds between thumbnails for long videos (default: 10)
342
+ - `thumbnailWidth?: number` - Thumbnail width in pixels (default: 640)
343
+ - `maxConcurrent?: number` - Maximum concurrent API requests (default: 5)
344
+ - `imageSubmissionMode?: 'url' | 'base64'` - How to submit images to AI providers (default: 'url')
345
+ - `imageDownloadOptions?: object` - Options for image download when using base64 mode
346
+ - `timeout?: number` - Request timeout in milliseconds (default: 10000)
347
+ - `retries?: number` - Maximum retry attempts (default: 3)
348
+ - `retryDelay?: number` - Base delay between retries in milliseconds (default: 1000)
349
+ - `maxRetryDelay?: number` - Maximum delay between retries in milliseconds (default: 10000)
350
+ - `exponentialBackoff?: boolean` - Whether to use exponential backoff (default: true)
351
+ - `muxTokenId/muxTokenSecret/openaiApiKey?: string` - API credentials
352
+ - `hiveApiKey?: string` - Hive API key (required for Hive provider)
353
+
354
+ **Returns:**
355
+ ```typescript
356
+ {
357
+ assetId: string;
358
+ thumbnailScores: Array<{ // Individual thumbnail results
359
+ url: string;
360
+ sexual: number; // 0-1 score
361
+ violence: number; // 0-1 score
362
+ error: boolean;
363
+ }>;
364
+ maxScores: { // Highest scores across all thumbnails
365
+ sexual: number;
366
+ violence: number;
367
+ };
368
+ exceedsThreshold: boolean; // true if content should be flagged
369
+ thresholds: { // Threshold values used
370
+ sexual: number;
371
+ violence: number;
372
+ };
373
+ }
374
+ ```
375
+
376
+ ### `hasBurnedInCaptions(assetId, options?)`
377
+
378
+ Analyzes video frames to detect burned-in captions (hardcoded subtitles) that are permanently embedded in the video image.
379
+
380
+ **Parameters:**
381
+ - `assetId` (string) - Mux video asset ID
382
+ - `options` (optional) - Configuration options
383
+
384
+ **Options:**
385
+ - `provider?: 'openai' | 'anthropic'` - AI provider (default: 'openai')
386
+ - `model?: string` - AI model to use (default: 'gpt-4o-mini' for OpenAI, 'claude-3-5-haiku-20241022' for Anthropic)
387
+ - `imageSubmissionMode?: 'url' | 'base64'` - How to submit storyboard to AI providers (default: 'url')
388
+ - `imageDownloadOptions?: object` - Options for image download when using base64 mode
389
+ - `timeout?: number` - Request timeout in milliseconds (default: 10000)
390
+ - `retries?: number` - Maximum retry attempts (default: 3)
391
+ - `retryDelay?: number` - Base delay between retries in milliseconds (default: 1000)
392
+ - `maxRetryDelay?: number` - Maximum delay between retries in milliseconds (default: 10000)
393
+ - `exponentialBackoff?: boolean` - Whether to use exponential backoff (default: true)
394
+ - `muxTokenId?: string` - Mux API token ID
395
+ - `muxTokenSecret?: string` - Mux API token secret
396
+ - `openaiApiKey?: string` - OpenAI API key
397
+ - `anthropicApiKey?: string` - Anthropic API key
398
+
399
+ **Returns:**
400
+ ```typescript
401
+ {
402
+ assetId: string;
403
+ hasBurnedInCaptions: boolean; // Whether burned-in captions were detected
404
+ confidence: number; // Confidence score (0.0-1.0)
405
+ detectedLanguage: string | null; // Language of detected captions, or null
406
+ storyboardUrl: string; // URL to analyzed storyboard
407
+ }
408
+ ```
409
+
410
+ **Detection Logic:**
411
+ - Analyzes video storyboard frames to identify text overlays
412
+ - Distinguishes between actual captions and marketing/end-card text
413
+ - Text appearing only in final 1-2 frames is classified as marketing copy
414
+ - Caption text must appear across multiple frames throughout the timeline
415
+ - Both providers use optimized prompts to minimize false positives
416
+
417
+ ### `translateCaptions(assetId, fromLanguageCode, toLanguageCode, options?)`
418
+
419
+ Translates existing captions from one language to another and optionally adds them as a new track to the Mux asset.
420
+
421
+ **Parameters:**
422
+ - `assetId` (string) - Mux video asset ID
423
+ - `fromLanguageCode` (string) - Source language code (e.g., 'en', 'es', 'fr')
424
+ - `toLanguageCode` (string) - Target language code (e.g., 'es', 'fr', 'de')
425
+ - `options` (optional) - Configuration options
426
+
427
+ **Options:**
428
+ - `provider?: 'anthropic'` - AI provider (default: 'anthropic')
429
+ - `model?: string` - Model to use (default: 'claude-sonnet-4-20250514')
430
+ - `uploadToMux?: boolean` - Whether to upload translated track to Mux (default: true)
431
+ - `s3Endpoint?: string` - S3-compatible storage endpoint
432
+ - `s3Region?: string` - S3 region (default: 'auto')
433
+ - `s3Bucket?: string` - S3 bucket name
434
+ - `s3AccessKeyId?: string` - S3 access key ID
435
+ - `s3SecretAccessKey?: string` - S3 secret access key
436
+ - `muxTokenId/muxTokenSecret/anthropicApiKey?: string` - API credentials
437
+
438
+ **Returns:**
439
+ ```typescript
440
+ {
441
+ assetId: string;
442
+ sourceLanguageCode: string;
443
+ targetLanguageCode: string;
444
+ originalVtt: string; // Original VTT content
445
+ translatedVtt: string; // Translated VTT content
446
+ uploadedTrackId?: string; // Mux track ID (if uploaded)
447
+ presignedUrl?: string; // S3 presigned URL (expires in 1 hour)
448
+ }
449
+ ```
450
+
451
+ **Supported Languages:**
452
+ All ISO 639-1 language codes are automatically supported using `Intl.DisplayNames`. Examples: Spanish (es), French (fr), German (de), Italian (it), Portuguese (pt), Polish (pl), Japanese (ja), Korean (ko), Chinese (zh), Russian (ru), Arabic (ar), Hindi (hi), Thai (th), Swahili (sw), and many more.
453
+
454
+ ### `generateChapters(assetId, languageCode, options?)`
455
+
456
+ Generates AI-powered chapter markers by analyzing video captions. Creates logical chapter breaks based on topic changes and content transitions.
457
+
458
+ **Parameters:**
459
+ - `assetId` (string) - Mux video asset ID
460
+ - `languageCode` (string) - Language code for captions (e.g., 'en', 'es', 'fr')
461
+ - `options` (optional) - Configuration options
462
+
463
+ **Options:**
464
+ - `provider?: 'openai' | 'anthropic'` - AI provider (default: 'openai')
465
+ - `model?: string` - AI model to use (default: 'gpt-4o-mini' for OpenAI, 'claude-3-5-haiku-20241022' for Anthropic)
466
+ - `muxTokenId?: string` - Mux API token ID
467
+ - `muxTokenSecret?: string` - Mux API token secret
468
+ - `openaiApiKey?: string` - OpenAI API key
469
+ - `anthropicApiKey?: string` - Anthropic API key
470
+
471
+ **Returns:**
472
+ ```typescript
473
+ {
474
+ assetId: string;
475
+ languageCode: string;
476
+ chapters: Array<{
477
+ startTime: number; // Chapter start time in seconds
478
+ title: string; // Descriptive chapter title
479
+ }>;
480
+ }
481
+ ```
482
+
483
+ **Requirements:**
484
+ - Asset must have caption track in the specified language
485
+ - Caption track must be in 'ready' status
486
+ - Uses existing auto-generated or uploaded captions
487
+
488
+ **Example Output:**
489
+ ```javascript
490
+ // Perfect format for Mux Player
491
+ player.addChapters([
492
+ {startTime: 0, title: 'Introduction and Setup'},
493
+ {startTime: 45, title: 'Main Content Discussion'},
494
+ {startTime: 120, title: 'Conclusion'}
495
+ ]);
496
+ ```
497
+
498
+ ### `translateAudio(assetId, toLanguageCode, options?)`
499
+
500
+ Creates AI-dubbed audio tracks from existing video content using ElevenLabs voice cloning and translation. Uses the default audio track on your asset, language is auto-detected.
501
+
502
+ **Parameters:**
503
+ - `assetId` (string) - Mux video asset ID (must have audio.m4a static rendition)
504
+ - `toLanguageCode` (string) - Target language code (e.g., 'es', 'fr', 'de')
505
+ - `options` (optional) - Configuration options
506
+
507
+ **Options:**
508
+ - `provider?: 'elevenlabs'` - AI provider (default: 'elevenlabs')
509
+ - `numSpeakers?: number` - Number of speakers (default: 0 for auto-detect)
510
+ - `uploadToMux?: boolean` - Whether to upload dubbed track to Mux (default: true)
511
+ - `s3Endpoint?: string` - S3-compatible storage endpoint
512
+ - `s3Region?: string` - S3 region (default: 'auto')
513
+ - `s3Bucket?: string` - S3 bucket name
514
+ - `s3AccessKeyId?: string` - S3 access key ID
515
+ - `s3SecretAccessKey?: string` - S3 secret access key
516
+ - `elevenLabsApiKey?: string` - ElevenLabs API key
517
+ - `muxTokenId/muxTokenSecret?: string` - API credentials
518
+
519
+ **Returns:**
520
+ ```typescript
521
+ {
522
+ assetId: string;
523
+ targetLanguageCode: string;
524
+ dubbingId: string; // ElevenLabs dubbing job ID
525
+ uploadedTrackId?: string; // Mux audio track ID (if uploaded)
526
+ presignedUrl?: string; // S3 presigned URL (expires in 1 hour)
527
+ }
528
+ ```
529
+
530
+ **Requirements:**
531
+ - Asset must have an `audio.m4a` static rendition
532
+ - ElevenLabs API key with Creator plan or higher
533
+ - S3-compatible storage for Mux ingestion
534
+
535
+ **Supported Languages:**
536
+ ElevenLabs supports 32+ languages with automatic language name detection via `Intl.DisplayNames`. Supported languages include English, Spanish, French, German, Italian, Portuguese, Polish, Japanese, Korean, Chinese, Russian, Arabic, Hindi, Thai, and many more. Track names are automatically generated (e.g., "Polish (auto-dubbed)").
537
+
538
+ ### Custom Prompts
539
+
540
+ Override the default summarization prompt:
541
+
542
+ ```typescript
543
+ const result = await getSummaryAndTags(
544
+ assetId,
545
+ 'Custom analysis prompt here',
546
+ { tone: 'professional' }
547
+ );
548
+ ```
549
+
550
+ ## Examples
551
+
552
+ See the `examples/` directory for complete working examples:
553
+
554
+ ### Summarization Examples
555
+ - **Basic Usage**: Default prompt with different tones
556
+ - **Custom Prompts**: Override default behavior
557
+ - **Tone Variations**: Compare analysis styles
558
+
559
+ ```bash
560
+ cd examples/summarization
561
+ npm install
562
+ npm run basic <your-asset-id>
563
+ npm run tones <your-asset-id>
564
+ npm run custom
565
+ ```
566
+
567
+ ### Moderation Examples
568
+ - **Basic Moderation**: Analyze content with default thresholds
569
+ - **Custom Thresholds**: Compare strict/default/permissive settings
570
+ - **Hive Provider**: Use Hive's Visual Moderation API
571
+ - **Provider Comparison**: Compare OpenAI vs Hive results side-by-side
572
+
573
+ ```bash
574
+ cd examples/moderation
575
+ npm install
576
+ npm run basic <your-asset-id>
577
+ npm run thresholds <your-asset-id>
578
+ npm run hive <your-asset-id>
579
+ npm run compare <your-asset-id>
580
+ ```
581
+
582
+ ### Burned-in Caption Examples
583
+ - **Basic Detection**: Detect burned-in captions with different AI providers
584
+ - **Provider Comparison**: Compare OpenAI vs Anthropic detection accuracy
585
+
586
+ ```bash
587
+ cd examples/burned-in-captions
588
+ npm install
589
+ npm run burned-in:basic <your-asset-id> [provider]
590
+ npm run compare <your-asset-id>
591
+ ```
592
+
593
+ ### Chapter Generation Examples
594
+ - **Basic Chapters**: Generate chapters with different AI providers
595
+ - **Provider Comparison**: Compare OpenAI vs Anthropic chapter generation
596
+
597
+ ```bash
598
+ cd examples/chapters
599
+ npm install
600
+ npm run chapters:basic <your-asset-id> [language-code] [provider]
601
+ npm run compare <your-asset-id> [language-code]
602
+ ```
603
+
604
+ ### Translation Examples
605
+ - **Basic Translation**: Translate captions and upload to Mux
606
+ - **Translation Only**: Translate without uploading to Mux
607
+
608
+ ```bash
609
+ cd examples/translation
610
+ npm install
611
+ npm run basic <your-asset-id> en es
612
+ npm run translation-only <your-asset-id> en fr
613
+ ```
614
+
615
+ **Translation Workflow:**
616
+ 1. Fetches existing captions from Mux asset
617
+ 2. Translates VTT content using Anthropic Claude
618
+ 3. Uploads translated VTT to S3-compatible storage
619
+ 4. Generates presigned URL (1-hour expiry)
620
+ 5. Adds new subtitle track to Mux asset
621
+ 6. Track name: "{Language} (auto-translated)"
622
+
623
+ ### Audio Dubbing Examples
624
+ - **Basic Dubbing**: Create AI-dubbed audio and upload to Mux
625
+ - **Dubbing Only**: Create dubbed audio without uploading to Mux
626
+
627
+ ```bash
628
+ cd examples/audio-translation
629
+ npm install
630
+ npm run basic <your-asset-id> es
631
+ npm run dubbing-only <your-asset-id> fr
632
+ ```
633
+
634
+ **Audio Dubbing Workflow:**
635
+ 1. Checks asset has audio.m4a static rendition
636
+ 2. Downloads default audio track from Mux
637
+ 3. Creates ElevenLabs dubbing job with automatic language detection
638
+ 4. Polls for completion (up to 30 minutes)
639
+ 5. Downloads dubbed audio file
640
+ 6. Uploads to S3-compatible storage
641
+ 7. Generates presigned URL (1-hour expiry)
642
+ 8. Adds new audio track to Mux asset
643
+ 9. Track name: "{Language} (auto-dubbed)"
644
+
645
+ ## S3-Compatible Storage
646
+
647
+ The translation feature requires S3-compatible storage to temporarily host VTT files for Mux ingestion. Supported providers include:
648
+
649
+ - **AWS S3** - Amazon's object storage
650
+ - **DigitalOcean Spaces** - S3-compatible with CDN
651
+ - **Cloudflare R2** - Zero egress fees
652
+ - **MinIO** - Self-hosted S3 alternative
653
+ - **Backblaze B2** - Cost-effective storage
654
+ - **Wasabi** - Hot cloud storage
655
+
656
+ **Why S3 Storage?**
657
+ Mux requires a publicly accessible URL to ingest subtitle tracks. The translation workflow:
658
+ 1. Uploads translated VTT to your S3 storage
659
+ 2. Generates a presigned URL for secure access
660
+ 3. Mux fetches the file using the presigned URL
661
+ 4. File remains in your storage for future use
662
+
663
+ ## Planned Features
664
+
665
+ - **Additional Translation Providers**: OpenAI GPT-4 support
666
+ - **Batch Translation**: Translate multiple assets at once
667
+ - **Custom Translation Prompts**: Override default translation behavior
668
+
669
+ ## License
670
+
671
+ MIT © Mux, Inc.
package/package.json ADDED
@@ -0,0 +1,49 @@
1
+ {
2
+ "name": "@mux/ai",
3
+ "version": "0.1.0",
4
+ "description": "AI library for Mux",
5
+ "main": "dist/index.js",
6
+ "types": "dist/index.d.ts",
7
+ "scripts": {
8
+ "build": "tsc",
9
+ "dev": "tsc --watch",
10
+ "test": "jest",
11
+ "lint": "eslint src/**/*.ts",
12
+ "typecheck": "tsc --noEmit",
13
+ "chapters:basic": "npx ts-node examples/chapters/basic-example.ts",
14
+ "burned-in:basic": "npx ts-node examples/burned-in-captions/basic-example.ts"
15
+ },
16
+ "keywords": [
17
+ "mux",
18
+ "ai",
19
+ "typescript"
20
+ ],
21
+ "author": "Mux",
22
+ "license": "MIT",
23
+ "repository": {
24
+ "type": "git",
25
+ "url": "git+https://github.com/muxinc/mux-ai.git"
26
+ },
27
+ "files": [
28
+ "dist"
29
+ ],
30
+ "dependencies": {
31
+ "@anthropic-ai/sdk": "^0.28.0",
32
+ "@aws-sdk/client-s3": "^3.0.0",
33
+ "@aws-sdk/lib-storage": "^3.0.0",
34
+ "@aws-sdk/s3-request-presigner": "^3.0.0",
35
+ "@mux/mux-node": "^12.5.0",
36
+ "dotenv": "^17.2.2",
37
+ "openai": "^5.13.1",
38
+ "p-retry": "^7.0.0",
39
+ "zod": "^3.25.76"
40
+ },
41
+ "devDependencies": {
42
+ "@types/node": "^20.0.0",
43
+ "@typescript-eslint/eslint-plugin": "^6.0.0",
44
+ "@typescript-eslint/parser": "^6.0.0",
45
+ "eslint": "^8.0.0",
46
+ "jest": "^29.0.0",
47
+ "typescript": "^5.0.0"
48
+ }
49
+ }