@mux/ai 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +671 -0
- package/package.json +49 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Mux, Inc.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,671 @@
|
|
|
1
|
+
# @mux/ai
|
|
2
|
+
|
|
3
|
+
AI-powered video analysis library for Mux, built in TypeScript.
|
|
4
|
+
|
|
5
|
+
## Available Tools
|
|
6
|
+
|
|
7
|
+
| Function | Description | Providers | Default Models | Input | Output |
|
|
8
|
+
|----------|-------------|-----------|----------------|--------|--------|
|
|
9
|
+
| `getSummaryAndTags` | Generate titles, descriptions, and tags from a Mux video asset | OpenAI, Anthropic | `gpt-4o-mini`, `claude-3-5-haiku-20241022` | Asset ID + options | Title, description, tags, storyboard URL |
|
|
10
|
+
| `getModerationScores` | Analyze video thumbnails for inappropriate content | OpenAI, Hive | `omni-moderation-latest`, Hive Visual API | Asset ID + thresholds | Sexual/violence scores, flagged status |
|
|
11
|
+
| `hasBurnedInCaptions` | Detect burned-in captions (hardcoded subtitles) in video frames | OpenAI, Anthropic | `gpt-4o-mini`, `claude-3-5-haiku-20241022` | Asset ID + options | Boolean result, confidence, language |
|
|
12
|
+
| `generateChapters` | Generate AI-powered chapter markers from video captions | OpenAI, Anthropic | `gpt-4o-mini`, `claude-3-5-haiku-20241022` | Asset ID + language + options | Timestamped chapter list |
|
|
13
|
+
| `translateCaptions` | Translate video captions to different languages | Anthropic only | `claude-sonnet-4-20250514` | Asset ID + languages + S3 config | Translated VTT + Mux track ID |
|
|
14
|
+
| `translateAudio` | Create AI-dubbed audio tracks in different languages | ElevenLabs only | ElevenLabs Dubbing API | Asset ID + languages + S3 config | Dubbed audio + Mux track ID |
|
|
15
|
+
|
|
16
|
+
## Features
|
|
17
|
+
|
|
18
|
+
- **Cost-Effective by Default**: Uses affordable models like `gpt-4o-mini` and `claude-3-5-haiku` to keep analysis costs low while maintaining high quality results
|
|
19
|
+
- **Multi-modal Analysis**: Combines storyboard images with video transcripts
|
|
20
|
+
- **Tone Control**: Normal, sassy, or professional analysis styles (summarization only)
|
|
21
|
+
- **Configurable Thresholds**: Custom sensitivity levels for content moderation
|
|
22
|
+
- **TypeScript**: Fully typed for excellent developer experience
|
|
23
|
+
- **Provider Choice**: Switch between OpenAI and Anthropic for different perspectives
|
|
24
|
+
- **Universal Language Support**: Automatic language name detection using `Intl.DisplayNames` for all ISO 639-1 codes
|
|
25
|
+
|
|
26
|
+
## Installation
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
npm install @mux/ai
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Quick Start
|
|
33
|
+
|
|
34
|
+
### Video Summarization
|
|
35
|
+
|
|
36
|
+
```typescript
|
|
37
|
+
import { getSummaryAndTags } from '@mux/ai';
|
|
38
|
+
|
|
39
|
+
// Uses built-in optimized prompt
|
|
40
|
+
const result = await getSummaryAndTags('your-mux-asset-id', {
|
|
41
|
+
tone: 'professional'
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
console.log(result.title); // Short, descriptive title
|
|
45
|
+
console.log(result.description); // Detailed description
|
|
46
|
+
console.log(result.tags); // Array of relevant keywords
|
|
47
|
+
console.log(result.storyboardUrl); // URL to Mux storyboard
|
|
48
|
+
|
|
49
|
+
// Use base64 mode for improved reliability (works with both OpenAI and Anthropic)
|
|
50
|
+
const reliableResult = await getSummaryAndTags('your-mux-asset-id', {
|
|
51
|
+
provider: 'anthropic',
|
|
52
|
+
imageSubmissionMode: 'base64', // Uses Files API for Anthropic, base64 for OpenAI
|
|
53
|
+
imageDownloadOptions: {
|
|
54
|
+
timeout: 15000,
|
|
55
|
+
retries: 2,
|
|
56
|
+
retryDelay: 1000
|
|
57
|
+
},
|
|
58
|
+
tone: 'professional'
|
|
59
|
+
});
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### Content Moderation
|
|
63
|
+
|
|
64
|
+
```typescript
|
|
65
|
+
import { getModerationScores } from '@mux/ai';
|
|
66
|
+
|
|
67
|
+
// Analyze Mux video asset for inappropriate content (OpenAI default)
|
|
68
|
+
const result = await getModerationScores('your-mux-asset-id', {
|
|
69
|
+
thresholds: { sexual: 0.7, violence: 0.8 }
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
console.log(result.maxScores); // Highest scores across all thumbnails
|
|
73
|
+
console.log(result.exceedsThreshold); // true if content should be flagged
|
|
74
|
+
console.log(result.thumbnailScores); // Individual thumbnail results
|
|
75
|
+
|
|
76
|
+
// Or use Hive for moderation
|
|
77
|
+
const hiveResult = await getModerationScores('your-mux-asset-id', {
|
|
78
|
+
provider: 'hive',
|
|
79
|
+
thresholds: { sexual: 0.7, violence: 0.8 }
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
// Use base64 submission for improved reliability (downloads images locally)
|
|
83
|
+
const reliableResult = await getModerationScores('your-mux-asset-id', {
|
|
84
|
+
provider: 'openai',
|
|
85
|
+
imageSubmissionMode: 'base64',
|
|
86
|
+
imageDownloadOptions: {
|
|
87
|
+
timeout: 15000,
|
|
88
|
+
retries: 3,
|
|
89
|
+
retryDelay: 1000
|
|
90
|
+
}
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
// Hive also supports base64 mode (uses multipart upload)
|
|
94
|
+
const hiveReliableResult = await getModerationScores('your-mux-asset-id', {
|
|
95
|
+
provider: 'hive',
|
|
96
|
+
imageSubmissionMode: 'base64',
|
|
97
|
+
imageDownloadOptions: {
|
|
98
|
+
timeout: 15000,
|
|
99
|
+
retries: 2,
|
|
100
|
+
retryDelay: 1000
|
|
101
|
+
}
|
|
102
|
+
});
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### Burned-in Caption Detection
|
|
106
|
+
|
|
107
|
+
```typescript
|
|
108
|
+
import { hasBurnedInCaptions } from '@mux/ai';
|
|
109
|
+
|
|
110
|
+
// Detect burned-in captions (hardcoded subtitles) in video frames
|
|
111
|
+
const result = await hasBurnedInCaptions('your-mux-asset-id', {
|
|
112
|
+
provider: 'openai'
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
console.log(result.hasBurnedInCaptions); // true/false
|
|
116
|
+
console.log(result.confidence); // 0.0-1.0 confidence score
|
|
117
|
+
console.log(result.detectedLanguage); // Language if captions detected
|
|
118
|
+
console.log(result.storyboardUrl); // Video storyboard analyzed
|
|
119
|
+
|
|
120
|
+
// Compare providers
|
|
121
|
+
const anthropicResult = await hasBurnedInCaptions('your-mux-asset-id', {
|
|
122
|
+
provider: 'anthropic',
|
|
123
|
+
model: 'claude-3-5-haiku-20241022'
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
// Use base64 mode for improved reliability
|
|
127
|
+
const reliableResult = await hasBurnedInCaptions('your-mux-asset-id', {
|
|
128
|
+
provider: 'openai',
|
|
129
|
+
imageSubmissionMode: 'base64',
|
|
130
|
+
imageDownloadOptions: {
|
|
131
|
+
timeout: 15000,
|
|
132
|
+
retries: 3,
|
|
133
|
+
retryDelay: 1000
|
|
134
|
+
}
|
|
135
|
+
});
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
#### Image Submission Modes
|
|
139
|
+
|
|
140
|
+
Choose between two methods for submitting images to AI providers:
|
|
141
|
+
|
|
142
|
+
**URL Mode (Default):**
|
|
143
|
+
- Fast initial response
|
|
144
|
+
- Lower bandwidth usage
|
|
145
|
+
- Relies on AI provider's image downloading
|
|
146
|
+
- May encounter timeouts with slow/unreliable image sources
|
|
147
|
+
|
|
148
|
+
**Base64 Mode (Recommended for Production):**
|
|
149
|
+
- Downloads images locally with robust retry logic
|
|
150
|
+
- Eliminates AI provider timeout issues
|
|
151
|
+
- Better control over slow TTFB and network issues
|
|
152
|
+
- Slightly higher bandwidth usage but more reliable results
|
|
153
|
+
- For OpenAI: submits images as base64 data URIs
|
|
154
|
+
- For Hive: uploads images via multipart/form-data (Hive doesn't support base64 data URIs)
|
|
155
|
+
- For Anthropic (summarization): uploads to Files API then references by file_id (no size limit)
|
|
156
|
+
|
|
157
|
+
```typescript
|
|
158
|
+
// High reliability mode - recommended for production
|
|
159
|
+
const result = await getModerationScores(assetId, {
|
|
160
|
+
imageSubmissionMode: 'base64',
|
|
161
|
+
imageDownloadOptions: {
|
|
162
|
+
timeout: 15000, // 15s timeout per image
|
|
163
|
+
retries: 3, // Retry failed downloads 3x
|
|
164
|
+
retryDelay: 1000, // 1s base delay with exponential backoff
|
|
165
|
+
exponentialBackoff: true
|
|
166
|
+
}
|
|
167
|
+
});
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### Caption Translation
|
|
171
|
+
|
|
172
|
+
```typescript
|
|
173
|
+
import { translateCaptions } from '@mux/ai';
|
|
174
|
+
|
|
175
|
+
// Translate existing captions to Spanish and add as new track
|
|
176
|
+
const result = await translateCaptions(
|
|
177
|
+
'your-mux-asset-id',
|
|
178
|
+
'en', // from language
|
|
179
|
+
'es', // to language
|
|
180
|
+
{
|
|
181
|
+
provider: 'anthropic',
|
|
182
|
+
model: 'claude-sonnet-4-20250514'
|
|
183
|
+
}
|
|
184
|
+
);
|
|
185
|
+
|
|
186
|
+
console.log(result.uploadedTrackId); // New Mux track ID
|
|
187
|
+
console.log(result.presignedUrl); // S3 file URL
|
|
188
|
+
console.log(result.translatedVtt); // Translated VTT content
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
### Video Chapters
|
|
192
|
+
|
|
193
|
+
```typescript
|
|
194
|
+
import { generateChapters } from '@mux/ai';
|
|
195
|
+
|
|
196
|
+
// Generate AI-powered chapters from video captions
|
|
197
|
+
const result = await generateChapters('your-mux-asset-id', 'en', {
|
|
198
|
+
provider: 'openai'
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
console.log(result.chapters); // Array of {startTime: number, title: string}
|
|
202
|
+
|
|
203
|
+
// Use with Mux Player
|
|
204
|
+
const player = document.querySelector('mux-player');
|
|
205
|
+
player.addChapters(result.chapters);
|
|
206
|
+
|
|
207
|
+
// Compare providers
|
|
208
|
+
const anthropicResult = await generateChapters('your-mux-asset-id', 'en', {
|
|
209
|
+
provider: 'anthropic',
|
|
210
|
+
model: 'claude-3-5-haiku-20241022'
|
|
211
|
+
});
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
### Audio Dubbing
|
|
215
|
+
|
|
216
|
+
```typescript
|
|
217
|
+
import { translateAudio } from '@mux/ai';
|
|
218
|
+
|
|
219
|
+
// Create AI-dubbed audio track and add to Mux asset
|
|
220
|
+
// Uses the default audio track on your asset, language is auto-detected
|
|
221
|
+
const result = await translateAudio(
|
|
222
|
+
'your-mux-asset-id',
|
|
223
|
+
'es', // target language
|
|
224
|
+
{
|
|
225
|
+
provider: 'elevenlabs',
|
|
226
|
+
numSpeakers: 0 // Auto-detect speakers
|
|
227
|
+
}
|
|
228
|
+
);
|
|
229
|
+
|
|
230
|
+
console.log(result.dubbingId); // ElevenLabs dubbing job ID
|
|
231
|
+
console.log(result.uploadedTrackId); // New Mux audio track ID
|
|
232
|
+
console.log(result.presignedUrl); // S3 audio file URL
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
### Compare Summarization from Providers
|
|
236
|
+
|
|
237
|
+
```typescript
|
|
238
|
+
import { getSummaryAndTags } from '@mux/ai';
|
|
239
|
+
|
|
240
|
+
// Compare different AI providers analyzing the same Mux video asset
|
|
241
|
+
const assetId = 'your-mux-asset-id';
|
|
242
|
+
|
|
243
|
+
// OpenAI analysis (default: gpt-4o-mini)
|
|
244
|
+
const openaiResult = await getSummaryAndTags(assetId, {
|
|
245
|
+
provider: 'openai',
|
|
246
|
+
tone: 'professional'
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
// Anthropic analysis (default: claude-3-5-haiku-20241022)
|
|
250
|
+
const anthropicResult = await getSummaryAndTags(assetId, {
|
|
251
|
+
provider: 'anthropic',
|
|
252
|
+
tone: 'professional'
|
|
253
|
+
});
|
|
254
|
+
|
|
255
|
+
// Compare results
|
|
256
|
+
console.log('OpenAI:', openaiResult.title);
|
|
257
|
+
console.log('Anthropic:', anthropicResult.title);
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
## Configuration
|
|
261
|
+
|
|
262
|
+
Set environment variables:
|
|
263
|
+
|
|
264
|
+
```bash
|
|
265
|
+
MUX_TOKEN_ID=your_mux_token_id
|
|
266
|
+
MUX_TOKEN_SECRET=your_mux_token_secret
|
|
267
|
+
OPENAI_API_KEY=your_openai_api_key
|
|
268
|
+
ANTHROPIC_API_KEY=your_anthropic_api_key
|
|
269
|
+
ELEVENLABS_API_KEY=your_elevenlabs_api_key
|
|
270
|
+
HIVE_API_KEY=your_hive_api_key
|
|
271
|
+
|
|
272
|
+
# S3-Compatible Storage (required for translation & audio dubbing)
|
|
273
|
+
S3_ENDPOINT=https://your-s3-endpoint.com
|
|
274
|
+
S3_REGION=auto
|
|
275
|
+
S3_BUCKET=your-bucket-name
|
|
276
|
+
S3_ACCESS_KEY_ID=your-access-key
|
|
277
|
+
S3_SECRET_ACCESS_KEY=your-secret-key
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
Or pass credentials directly:
|
|
281
|
+
|
|
282
|
+
```typescript
|
|
283
|
+
const result = await getSummaryAndTags(assetId, {
|
|
284
|
+
muxTokenId: 'your-token-id',
|
|
285
|
+
muxTokenSecret: 'your-token-secret',
|
|
286
|
+
openaiApiKey: 'your-openai-key'
|
|
287
|
+
});
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
## API Reference
|
|
291
|
+
|
|
292
|
+
### `getSummaryAndTags(assetId, options?)`
|
|
293
|
+
|
|
294
|
+
Analyzes a Mux video asset and returns AI-generated metadata.
|
|
295
|
+
|
|
296
|
+
**Parameters:**
|
|
297
|
+
- `assetId` (string) - Mux video asset ID
|
|
298
|
+
- `options` (optional) - Configuration options
|
|
299
|
+
|
|
300
|
+
**Options:**
|
|
301
|
+
- `provider?: 'openai' | 'anthropic'` - AI provider (default: 'openai')
|
|
302
|
+
- `tone?: 'normal' | 'sassy' | 'professional'` - Analysis tone (default: 'normal')
|
|
303
|
+
- `model?: string` - AI model to use (default: 'gpt-4o-mini' for OpenAI, 'claude-3-5-haiku-20241022' for Anthropic)
|
|
304
|
+
- `includeTranscript?: boolean` - Include video transcript in analysis (default: true)
|
|
305
|
+
- `cleanTranscript?: boolean` - Remove VTT timestamps and formatting from transcript (default: true)
|
|
306
|
+
- `imageSubmissionMode?: 'url' | 'base64'` - How to submit storyboard to AI providers (default: 'url')
|
|
307
|
+
- `imageDownloadOptions?: object` - Options for image download when using base64 mode
|
|
308
|
+
- `timeout?: number` - Request timeout in milliseconds (default: 10000)
|
|
309
|
+
- `retries?: number` - Maximum retry attempts (default: 3)
|
|
310
|
+
- `retryDelay?: number` - Base delay between retries in milliseconds (default: 1000)
|
|
311
|
+
- `maxRetryDelay?: number` - Maximum delay between retries in milliseconds (default: 10000)
|
|
312
|
+
- `exponentialBackoff?: boolean` - Whether to use exponential backoff (default: true)
|
|
313
|
+
- `muxTokenId?: string` - Mux API token ID
|
|
314
|
+
- `muxTokenSecret?: string` - Mux API token secret
|
|
315
|
+
- `openaiApiKey?: string` - OpenAI API key
|
|
316
|
+
- `anthropicApiKey?: string` - Anthropic API key
|
|
317
|
+
|
|
318
|
+
**Returns:**
|
|
319
|
+
```typescript
|
|
320
|
+
{
|
|
321
|
+
assetId: string;
|
|
322
|
+
title: string; // Short title (max 100 chars)
|
|
323
|
+
description: string; // Detailed description
|
|
324
|
+
tags: string[]; // Relevant keywords
|
|
325
|
+
storyboardUrl: string; // Video storyboard URL
|
|
326
|
+
}
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
### `getModerationScores(assetId, options?)`
|
|
330
|
+
|
|
331
|
+
Analyzes video thumbnails for inappropriate content using OpenAI's moderation API or Hive's Visual Moderation API.
|
|
332
|
+
|
|
333
|
+
**Parameters:**
|
|
334
|
+
- `assetId` (string) - Mux video asset ID
|
|
335
|
+
- `options` (optional) - Configuration options
|
|
336
|
+
|
|
337
|
+
**Options:**
|
|
338
|
+
- `provider?: 'openai' | 'hive'` - Moderation provider (default: 'openai')
|
|
339
|
+
- `model?: string` - OpenAI model to use (default: 'omni-moderation-latest')
|
|
340
|
+
- `thresholds?: { sexual?: number; violence?: number }` - Custom thresholds (default: {sexual: 0.7, violence: 0.8})
|
|
341
|
+
- `thumbnailInterval?: number` - Seconds between thumbnails for long videos (default: 10)
|
|
342
|
+
- `thumbnailWidth?: number` - Thumbnail width in pixels (default: 640)
|
|
343
|
+
- `maxConcurrent?: number` - Maximum concurrent API requests (default: 5)
|
|
344
|
+
- `imageSubmissionMode?: 'url' | 'base64'` - How to submit images to AI providers (default: 'url')
|
|
345
|
+
- `imageDownloadOptions?: object` - Options for image download when using base64 mode
|
|
346
|
+
- `timeout?: number` - Request timeout in milliseconds (default: 10000)
|
|
347
|
+
- `retries?: number` - Maximum retry attempts (default: 3)
|
|
348
|
+
- `retryDelay?: number` - Base delay between retries in milliseconds (default: 1000)
|
|
349
|
+
- `maxRetryDelay?: number` - Maximum delay between retries in milliseconds (default: 10000)
|
|
350
|
+
- `exponentialBackoff?: boolean` - Whether to use exponential backoff (default: true)
|
|
351
|
+
- `muxTokenId/muxTokenSecret/openaiApiKey?: string` - API credentials
|
|
352
|
+
- `hiveApiKey?: string` - Hive API key (required for Hive provider)
|
|
353
|
+
|
|
354
|
+
**Returns:**
|
|
355
|
+
```typescript
|
|
356
|
+
{
|
|
357
|
+
assetId: string;
|
|
358
|
+
thumbnailScores: Array<{ // Individual thumbnail results
|
|
359
|
+
url: string;
|
|
360
|
+
sexual: number; // 0-1 score
|
|
361
|
+
violence: number; // 0-1 score
|
|
362
|
+
error: boolean;
|
|
363
|
+
}>;
|
|
364
|
+
maxScores: { // Highest scores across all thumbnails
|
|
365
|
+
sexual: number;
|
|
366
|
+
violence: number;
|
|
367
|
+
};
|
|
368
|
+
exceedsThreshold: boolean; // true if content should be flagged
|
|
369
|
+
thresholds: { // Threshold values used
|
|
370
|
+
sexual: number;
|
|
371
|
+
violence: number;
|
|
372
|
+
};
|
|
373
|
+
}
|
|
374
|
+
```
|
|
375
|
+
|
|
376
|
+
### `hasBurnedInCaptions(assetId, options?)`
|
|
377
|
+
|
|
378
|
+
Analyzes video frames to detect burned-in captions (hardcoded subtitles) that are permanently embedded in the video image.
|
|
379
|
+
|
|
380
|
+
**Parameters:**
|
|
381
|
+
- `assetId` (string) - Mux video asset ID
|
|
382
|
+
- `options` (optional) - Configuration options
|
|
383
|
+
|
|
384
|
+
**Options:**
|
|
385
|
+
- `provider?: 'openai' | 'anthropic'` - AI provider (default: 'openai')
|
|
386
|
+
- `model?: string` - AI model to use (default: 'gpt-4o-mini' for OpenAI, 'claude-3-5-haiku-20241022' for Anthropic)
|
|
387
|
+
- `imageSubmissionMode?: 'url' | 'base64'` - How to submit storyboard to AI providers (default: 'url')
|
|
388
|
+
- `imageDownloadOptions?: object` - Options for image download when using base64 mode
|
|
389
|
+
- `timeout?: number` - Request timeout in milliseconds (default: 10000)
|
|
390
|
+
- `retries?: number` - Maximum retry attempts (default: 3)
|
|
391
|
+
- `retryDelay?: number` - Base delay between retries in milliseconds (default: 1000)
|
|
392
|
+
- `maxRetryDelay?: number` - Maximum delay between retries in milliseconds (default: 10000)
|
|
393
|
+
- `exponentialBackoff?: boolean` - Whether to use exponential backoff (default: true)
|
|
394
|
+
- `muxTokenId?: string` - Mux API token ID
|
|
395
|
+
- `muxTokenSecret?: string` - Mux API token secret
|
|
396
|
+
- `openaiApiKey?: string` - OpenAI API key
|
|
397
|
+
- `anthropicApiKey?: string` - Anthropic API key
|
|
398
|
+
|
|
399
|
+
**Returns:**
|
|
400
|
+
```typescript
|
|
401
|
+
{
|
|
402
|
+
assetId: string;
|
|
403
|
+
hasBurnedInCaptions: boolean; // Whether burned-in captions were detected
|
|
404
|
+
confidence: number; // Confidence score (0.0-1.0)
|
|
405
|
+
detectedLanguage: string | null; // Language of detected captions, or null
|
|
406
|
+
storyboardUrl: string; // URL to analyzed storyboard
|
|
407
|
+
}
|
|
408
|
+
```
|
|
409
|
+
|
|
410
|
+
**Detection Logic:**
|
|
411
|
+
- Analyzes video storyboard frames to identify text overlays
|
|
412
|
+
- Distinguishes between actual captions and marketing/end-card text
|
|
413
|
+
- Text appearing only in final 1-2 frames is classified as marketing copy
|
|
414
|
+
- Caption text must appear across multiple frames throughout the timeline
|
|
415
|
+
- Both providers use optimized prompts to minimize false positives
|
|
416
|
+
|
|
417
|
+
### `translateCaptions(assetId, fromLanguageCode, toLanguageCode, options?)`
|
|
418
|
+
|
|
419
|
+
Translates existing captions from one language to another and optionally adds them as a new track to the Mux asset.
|
|
420
|
+
|
|
421
|
+
**Parameters:**
|
|
422
|
+
- `assetId` (string) - Mux video asset ID
|
|
423
|
+
- `fromLanguageCode` (string) - Source language code (e.g., 'en', 'es', 'fr')
|
|
424
|
+
- `toLanguageCode` (string) - Target language code (e.g., 'es', 'fr', 'de')
|
|
425
|
+
- `options` (optional) - Configuration options
|
|
426
|
+
|
|
427
|
+
**Options:**
|
|
428
|
+
- `provider?: 'anthropic'` - AI provider (default: 'anthropic')
|
|
429
|
+
- `model?: string` - Model to use (default: 'claude-sonnet-4-20250514')
|
|
430
|
+
- `uploadToMux?: boolean` - Whether to upload translated track to Mux (default: true)
|
|
431
|
+
- `s3Endpoint?: string` - S3-compatible storage endpoint
|
|
432
|
+
- `s3Region?: string` - S3 region (default: 'auto')
|
|
433
|
+
- `s3Bucket?: string` - S3 bucket name
|
|
434
|
+
- `s3AccessKeyId?: string` - S3 access key ID
|
|
435
|
+
- `s3SecretAccessKey?: string` - S3 secret access key
|
|
436
|
+
- `muxTokenId/muxTokenSecret/anthropicApiKey?: string` - API credentials
|
|
437
|
+
|
|
438
|
+
**Returns:**
|
|
439
|
+
```typescript
|
|
440
|
+
{
|
|
441
|
+
assetId: string;
|
|
442
|
+
sourceLanguageCode: string;
|
|
443
|
+
targetLanguageCode: string;
|
|
444
|
+
originalVtt: string; // Original VTT content
|
|
445
|
+
translatedVtt: string; // Translated VTT content
|
|
446
|
+
uploadedTrackId?: string; // Mux track ID (if uploaded)
|
|
447
|
+
presignedUrl?: string; // S3 presigned URL (expires in 1 hour)
|
|
448
|
+
}
|
|
449
|
+
```
|
|
450
|
+
|
|
451
|
+
**Supported Languages:**
|
|
452
|
+
All ISO 639-1 language codes are automatically supported using `Intl.DisplayNames`. Examples: Spanish (es), French (fr), German (de), Italian (it), Portuguese (pt), Polish (pl), Japanese (ja), Korean (ko), Chinese (zh), Russian (ru), Arabic (ar), Hindi (hi), Thai (th), Swahili (sw), and many more.
|
|
453
|
+
|
|
454
|
+
### `generateChapters(assetId, languageCode, options?)`
|
|
455
|
+
|
|
456
|
+
Generates AI-powered chapter markers by analyzing video captions. Creates logical chapter breaks based on topic changes and content transitions.
|
|
457
|
+
|
|
458
|
+
**Parameters:**
|
|
459
|
+
- `assetId` (string) - Mux video asset ID
|
|
460
|
+
- `languageCode` (string) - Language code for captions (e.g., 'en', 'es', 'fr')
|
|
461
|
+
- `options` (optional) - Configuration options
|
|
462
|
+
|
|
463
|
+
**Options:**
|
|
464
|
+
- `provider?: 'openai' | 'anthropic'` - AI provider (default: 'openai')
|
|
465
|
+
- `model?: string` - AI model to use (default: 'gpt-4o-mini' for OpenAI, 'claude-3-5-haiku-20241022' for Anthropic)
|
|
466
|
+
- `muxTokenId?: string` - Mux API token ID
|
|
467
|
+
- `muxTokenSecret?: string` - Mux API token secret
|
|
468
|
+
- `openaiApiKey?: string` - OpenAI API key
|
|
469
|
+
- `anthropicApiKey?: string` - Anthropic API key
|
|
470
|
+
|
|
471
|
+
**Returns:**
|
|
472
|
+
```typescript
|
|
473
|
+
{
|
|
474
|
+
assetId: string;
|
|
475
|
+
languageCode: string;
|
|
476
|
+
chapters: Array<{
|
|
477
|
+
startTime: number; // Chapter start time in seconds
|
|
478
|
+
title: string; // Descriptive chapter title
|
|
479
|
+
}>;
|
|
480
|
+
}
|
|
481
|
+
```
|
|
482
|
+
|
|
483
|
+
**Requirements:**
|
|
484
|
+
- Asset must have caption track in the specified language
|
|
485
|
+
- Caption track must be in 'ready' status
|
|
486
|
+
- Uses existing auto-generated or uploaded captions
|
|
487
|
+
|
|
488
|
+
**Example Output:**
|
|
489
|
+
```javascript
|
|
490
|
+
// Perfect format for Mux Player
|
|
491
|
+
player.addChapters([
|
|
492
|
+
{startTime: 0, title: 'Introduction and Setup'},
|
|
493
|
+
{startTime: 45, title: 'Main Content Discussion'},
|
|
494
|
+
{startTime: 120, title: 'Conclusion'}
|
|
495
|
+
]);
|
|
496
|
+
```
|
|
497
|
+
|
|
498
|
+
### `translateAudio(assetId, toLanguageCode, options?)`
|
|
499
|
+
|
|
500
|
+
Creates AI-dubbed audio tracks from existing video content using ElevenLabs voice cloning and translation. Uses the default audio track on your asset, language is auto-detected.
|
|
501
|
+
|
|
502
|
+
**Parameters:**
|
|
503
|
+
- `assetId` (string) - Mux video asset ID (must have audio.m4a static rendition)
|
|
504
|
+
- `toLanguageCode` (string) - Target language code (e.g., 'es', 'fr', 'de')
|
|
505
|
+
- `options` (optional) - Configuration options
|
|
506
|
+
|
|
507
|
+
**Options:**
|
|
508
|
+
- `provider?: 'elevenlabs'` - AI provider (default: 'elevenlabs')
|
|
509
|
+
- `numSpeakers?: number` - Number of speakers (default: 0 for auto-detect)
|
|
510
|
+
- `uploadToMux?: boolean` - Whether to upload dubbed track to Mux (default: true)
|
|
511
|
+
- `s3Endpoint?: string` - S3-compatible storage endpoint
|
|
512
|
+
- `s3Region?: string` - S3 region (default: 'auto')
|
|
513
|
+
- `s3Bucket?: string` - S3 bucket name
|
|
514
|
+
- `s3AccessKeyId?: string` - S3 access key ID
|
|
515
|
+
- `s3SecretAccessKey?: string` - S3 secret access key
|
|
516
|
+
- `elevenLabsApiKey?: string` - ElevenLabs API key
|
|
517
|
+
- `muxTokenId/muxTokenSecret?: string` - API credentials
|
|
518
|
+
|
|
519
|
+
**Returns:**
|
|
520
|
+
```typescript
|
|
521
|
+
{
|
|
522
|
+
assetId: string;
|
|
523
|
+
targetLanguageCode: string;
|
|
524
|
+
dubbingId: string; // ElevenLabs dubbing job ID
|
|
525
|
+
uploadedTrackId?: string; // Mux audio track ID (if uploaded)
|
|
526
|
+
presignedUrl?: string; // S3 presigned URL (expires in 1 hour)
|
|
527
|
+
}
|
|
528
|
+
```
|
|
529
|
+
|
|
530
|
+
**Requirements:**
|
|
531
|
+
- Asset must have an `audio.m4a` static rendition
|
|
532
|
+
- ElevenLabs API key with Creator plan or higher
|
|
533
|
+
- S3-compatible storage for Mux ingestion
|
|
534
|
+
|
|
535
|
+
**Supported Languages:**
|
|
536
|
+
ElevenLabs supports 32+ languages with automatic language name detection via `Intl.DisplayNames`. Supported languages include English, Spanish, French, German, Italian, Portuguese, Polish, Japanese, Korean, Chinese, Russian, Arabic, Hindi, Thai, and many more. Track names are automatically generated (e.g., "Polish (auto-dubbed)").
|
|
537
|
+
|
|
538
|
+
### Custom Prompts
|
|
539
|
+
|
|
540
|
+
Override the default summarization prompt:
|
|
541
|
+
|
|
542
|
+
```typescript
|
|
543
|
+
const result = await getSummaryAndTags(
|
|
544
|
+
assetId,
|
|
545
|
+
'Custom analysis prompt here',
|
|
546
|
+
{ tone: 'professional' }
|
|
547
|
+
);
|
|
548
|
+
```
|
|
549
|
+
|
|
550
|
+
## Examples
|
|
551
|
+
|
|
552
|
+
See the `examples/` directory for complete working examples:
|
|
553
|
+
|
|
554
|
+
### Summarization Examples
|
|
555
|
+
- **Basic Usage**: Default prompt with different tones
|
|
556
|
+
- **Custom Prompts**: Override default behavior
|
|
557
|
+
- **Tone Variations**: Compare analysis styles
|
|
558
|
+
|
|
559
|
+
```bash
|
|
560
|
+
cd examples/summarization
|
|
561
|
+
npm install
|
|
562
|
+
npm run basic <your-asset-id>
|
|
563
|
+
npm run tones <your-asset-id>
|
|
564
|
+
npm run custom
|
|
565
|
+
```
|
|
566
|
+
|
|
567
|
+
### Moderation Examples
|
|
568
|
+
- **Basic Moderation**: Analyze content with default thresholds
|
|
569
|
+
- **Custom Thresholds**: Compare strict/default/permissive settings
|
|
570
|
+
- **Hive Provider**: Use Hive's Visual Moderation API
|
|
571
|
+
- **Provider Comparison**: Compare OpenAI vs Hive results side-by-side
|
|
572
|
+
|
|
573
|
+
```bash
|
|
574
|
+
cd examples/moderation
|
|
575
|
+
npm install
|
|
576
|
+
npm run basic <your-asset-id>
|
|
577
|
+
npm run thresholds <your-asset-id>
|
|
578
|
+
npm run hive <your-asset-id>
|
|
579
|
+
npm run compare <your-asset-id>
|
|
580
|
+
```
|
|
581
|
+
|
|
582
|
+
### Burned-in Caption Examples
|
|
583
|
+
- **Basic Detection**: Detect burned-in captions with different AI providers
|
|
584
|
+
- **Provider Comparison**: Compare OpenAI vs Anthropic detection accuracy
|
|
585
|
+
|
|
586
|
+
```bash
|
|
587
|
+
cd examples/burned-in-captions
|
|
588
|
+
npm install
|
|
589
|
+
npm run burned-in:basic <your-asset-id> [provider]
|
|
590
|
+
npm run compare <your-asset-id>
|
|
591
|
+
```
|
|
592
|
+
|
|
593
|
+
### Chapter Generation Examples
|
|
594
|
+
- **Basic Chapters**: Generate chapters with different AI providers
|
|
595
|
+
- **Provider Comparison**: Compare OpenAI vs Anthropic chapter generation
|
|
596
|
+
|
|
597
|
+
```bash
|
|
598
|
+
cd examples/chapters
|
|
599
|
+
npm install
|
|
600
|
+
npm run chapters:basic <your-asset-id> [language-code] [provider]
|
|
601
|
+
npm run compare <your-asset-id> [language-code]
|
|
602
|
+
```
|
|
603
|
+
|
|
604
|
+
### Translation Examples
|
|
605
|
+
- **Basic Translation**: Translate captions and upload to Mux
|
|
606
|
+
- **Translation Only**: Translate without uploading to Mux
|
|
607
|
+
|
|
608
|
+
```bash
|
|
609
|
+
cd examples/translation
|
|
610
|
+
npm install
|
|
611
|
+
npm run basic <your-asset-id> en es
|
|
612
|
+
npm run translation-only <your-asset-id> en fr
|
|
613
|
+
```
|
|
614
|
+
|
|
615
|
+
**Translation Workflow:**
|
|
616
|
+
1. Fetches existing captions from Mux asset
|
|
617
|
+
2. Translates VTT content using Anthropic Claude
|
|
618
|
+
3. Uploads translated VTT to S3-compatible storage
|
|
619
|
+
4. Generates presigned URL (1-hour expiry)
|
|
620
|
+
5. Adds new subtitle track to Mux asset
|
|
621
|
+
6. Track name: "{Language} (auto-translated)"
|
|
622
|
+
|
|
623
|
+
### Audio Dubbing Examples
|
|
624
|
+
- **Basic Dubbing**: Create AI-dubbed audio and upload to Mux
|
|
625
|
+
- **Dubbing Only**: Create dubbed audio without uploading to Mux
|
|
626
|
+
|
|
627
|
+
```bash
|
|
628
|
+
cd examples/audio-translation
|
|
629
|
+
npm install
|
|
630
|
+
npm run basic <your-asset-id> es
|
|
631
|
+
npm run dubbing-only <your-asset-id> fr
|
|
632
|
+
```
|
|
633
|
+
|
|
634
|
+
**Audio Dubbing Workflow:**
|
|
635
|
+
1. Checks asset has audio.m4a static rendition
|
|
636
|
+
2. Downloads default audio track from Mux
|
|
637
|
+
3. Creates ElevenLabs dubbing job with automatic language detection
|
|
638
|
+
4. Polls for completion (up to 30 minutes)
|
|
639
|
+
5. Downloads dubbed audio file
|
|
640
|
+
6. Uploads to S3-compatible storage
|
|
641
|
+
7. Generates presigned URL (1-hour expiry)
|
|
642
|
+
8. Adds new audio track to Mux asset
|
|
643
|
+
9. Track name: "{Language} (auto-dubbed)"
|
|
644
|
+
|
|
645
|
+
## S3-Compatible Storage
|
|
646
|
+
|
|
647
|
+
The translation feature requires S3-compatible storage to temporarily host VTT files for Mux ingestion. Supported providers include:
|
|
648
|
+
|
|
649
|
+
- **AWS S3** - Amazon's object storage
|
|
650
|
+
- **DigitalOcean Spaces** - S3-compatible with CDN
|
|
651
|
+
- **Cloudflare R2** - Zero egress fees
|
|
652
|
+
- **MinIO** - Self-hosted S3 alternative
|
|
653
|
+
- **Backblaze B2** - Cost-effective storage
|
|
654
|
+
- **Wasabi** - Hot cloud storage
|
|
655
|
+
|
|
656
|
+
**Why S3 Storage?**
|
|
657
|
+
Mux requires a publicly accessible URL to ingest subtitle tracks. The translation workflow:
|
|
658
|
+
1. Uploads translated VTT to your S3 storage
|
|
659
|
+
2. Generates a presigned URL for secure access
|
|
660
|
+
3. Mux fetches the file using the presigned URL
|
|
661
|
+
4. File remains in your storage for future use
|
|
662
|
+
|
|
663
|
+
## Planned Features
|
|
664
|
+
|
|
665
|
+
- **Additional Translation Providers**: OpenAI GPT-4 support
|
|
666
|
+
- **Batch Translation**: Translate multiple assets at once
|
|
667
|
+
- **Custom Translation Prompts**: Override default translation behavior
|
|
668
|
+
|
|
669
|
+
## License
|
|
670
|
+
|
|
671
|
+
MIT © Mux, Inc.
|
package/package.json
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@mux/ai",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "AI library for Mux",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"types": "dist/index.d.ts",
|
|
7
|
+
"scripts": {
|
|
8
|
+
"build": "tsc",
|
|
9
|
+
"dev": "tsc --watch",
|
|
10
|
+
"test": "jest",
|
|
11
|
+
"lint": "eslint src/**/*.ts",
|
|
12
|
+
"typecheck": "tsc --noEmit",
|
|
13
|
+
"chapters:basic": "npx ts-node examples/chapters/basic-example.ts",
|
|
14
|
+
"burned-in:basic": "npx ts-node examples/burned-in-captions/basic-example.ts"
|
|
15
|
+
},
|
|
16
|
+
"keywords": [
|
|
17
|
+
"mux",
|
|
18
|
+
"ai",
|
|
19
|
+
"typescript"
|
|
20
|
+
],
|
|
21
|
+
"author": "Mux",
|
|
22
|
+
"license": "MIT",
|
|
23
|
+
"repository": {
|
|
24
|
+
"type": "git",
|
|
25
|
+
"url": "git+https://github.com/muxinc/mux-ai.git"
|
|
26
|
+
},
|
|
27
|
+
"files": [
|
|
28
|
+
"dist"
|
|
29
|
+
],
|
|
30
|
+
"dependencies": {
|
|
31
|
+
"@anthropic-ai/sdk": "^0.28.0",
|
|
32
|
+
"@aws-sdk/client-s3": "^3.0.0",
|
|
33
|
+
"@aws-sdk/lib-storage": "^3.0.0",
|
|
34
|
+
"@aws-sdk/s3-request-presigner": "^3.0.0",
|
|
35
|
+
"@mux/mux-node": "^12.5.0",
|
|
36
|
+
"dotenv": "^17.2.2",
|
|
37
|
+
"openai": "^5.13.1",
|
|
38
|
+
"p-retry": "^7.0.0",
|
|
39
|
+
"zod": "^3.25.76"
|
|
40
|
+
},
|
|
41
|
+
"devDependencies": {
|
|
42
|
+
"@types/node": "^20.0.0",
|
|
43
|
+
"@typescript-eslint/eslint-plugin": "^6.0.0",
|
|
44
|
+
"@typescript-eslint/parser": "^6.0.0",
|
|
45
|
+
"eslint": "^8.0.0",
|
|
46
|
+
"jest": "^29.0.0",
|
|
47
|
+
"typescript": "^5.0.0"
|
|
48
|
+
}
|
|
49
|
+
}
|