@mux/ai 0.7.3 → 0.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +113 -390
- package/dist/{index-BMqnP1RV.d.ts → index-Bavk1Y8-.d.ts} +6 -0
- package/dist/index.d.ts +2 -2
- package/dist/index.js +107 -4
- package/dist/index.js.map +1 -1
- package/dist/primitives/index.js +85 -0
- package/dist/primitives/index.js.map +1 -1
- package/dist/workflows/index.d.ts +1 -1
- package/dist/workflows/index.js +106 -3
- package/dist/workflows/index.js.map +1 -1
- package/package.json +3 -3
package/README.md
CHANGED
|
@@ -1,210 +1,40 @@
|
|
|
1
|
-
# `@mux/ai`
|
|
1
|
+
# `@mux/ai`
|
|
2
2
|
|
|
3
3
|
[](https://www.npmjs.com/package/@mux/ai)
|
|
4
4
|
[](https://opensource.org/licenses/Apache-2.0)
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
Easy to use, purpose-driven, cost effective, configurable **_workflow functions_** in a TypeScript SDK for building AI-powered video and audio workflows on the server, powered by [Mux](https://www.mux.com), with support for popular AI/LLM providers (OpenAI, Anthropic, Google).
|
|
7
7
|
|
|
8
|
-
`@mux/ai` does this by providing:
|
|
9
|
-
|
|
10
|
-
Easy to use, purpose-driven, cost effective, configurable **_workflow functions_** that integrate with a variety of popular AI/LLM providers (OpenAI, Anthropic, Google).
|
|
11
8
|
- **Examples:** [`getSummaryAndTags`](#video-summarization), [`getModerationScores`](#content-moderation), [`hasBurnedInCaptions`](#burned-in-caption-detection), [`generateChapters`](#chapter-generation), [`generateEmbeddings`](#search-with-embeddings), [`translateCaptions`](#caption-translation), [`translateAudio`](#audio-dubbing)
|
|
12
9
|
- Workflows automatically ship with `"use workflow"` [compatability with Workflow DevKit](#compatability-with-workflow-devkit)
|
|
13
10
|
|
|
14
|
-
|
|
15
|
-
- **Examples:** `getStoryboardUrl`, `chunkVTTCues`, `fetchTranscriptForAsset`
|
|
16
|
-
|
|
17
|
-
# Usage
|
|
18
|
-
|
|
19
|
-
```ts
|
|
20
|
-
import { getSummaryAndTags } from "@mux/ai/workflows";
|
|
21
|
-
|
|
22
|
-
const result = await getSummaryAndTags("your-asset-id", {
|
|
23
|
-
provider: "openai",
|
|
24
|
-
tone: "professional",
|
|
25
|
-
includeTranscript: true
|
|
26
|
-
});
|
|
27
|
-
|
|
28
|
-
console.log(result.title); // "Getting Started with TypeScript"
|
|
29
|
-
console.log(result.description); // "A comprehensive guide to..."
|
|
30
|
-
console.log(result.tags); // ["typescript", "tutorial", "programming"]
|
|
31
|
-
```
|
|
32
|
-
|
|
33
|
-
> **⚠️ Important:** Many workflows rely on video transcripts for best results. Consider enabling [auto-generated captions](https://www.mux.com/docs/guides/add-autogenerated-captions-and-use-transcripts) on your Mux assets to unlock the full potential of transcript-based workflows like summarization, chapters, and embeddings.
|
|
34
|
-
|
|
35
|
-
# Quick Start
|
|
36
|
-
|
|
37
|
-
## Prerequisites
|
|
11
|
+
Turn your Mux video and audio assets into structured, actionable data — summaries, chapters, moderation scores, translations, embeddings, and more — with a single function call. `@mux/ai` handles fetching media data from Mux, formatting it for AI providers, and returning typed results so you can focus on building your product instead of wrangling prompts and media pipelines.
|
|
38
12
|
|
|
39
|
-
|
|
40
|
-
- A Mux account and necessary [credentials](#credentials---mux) for your environment (sign up [here](https://dashboard.mux.com/signup) for free!)
|
|
41
|
-
- Accounts and [credentials](#credentials---ai-providers) for any AI providers you intend to use for your workflows
|
|
42
|
-
- (For some workflows only) AWS S3 and [other credentials](#credentials---other)
|
|
13
|
+
## Quick Start
|
|
43
14
|
|
|
44
|
-
|
|
45
|
-
## Installation
|
|
15
|
+
### Install
|
|
46
16
|
|
|
47
17
|
```bash
|
|
48
18
|
npm install @mux/ai
|
|
49
19
|
```
|
|
50
20
|
|
|
51
|
-
|
|
21
|
+
### Configure
|
|
52
22
|
|
|
53
|
-
|
|
23
|
+
Add your credentials to a `.env` file (we support [dotenv](https://www.npmjs.com/package/dotenv)):
|
|
54
24
|
|
|
55
25
|
```bash
|
|
56
|
-
# Required
|
|
57
26
|
MUX_TOKEN_ID=your_mux_token_id
|
|
58
27
|
MUX_TOKEN_SECRET=your_mux_token_secret
|
|
59
|
-
|
|
60
|
-
# Needed if your assets _only_ have signed playback IDs
|
|
61
|
-
MUX_SIGNING_KEY=your_signing_key_id
|
|
62
|
-
MUX_PRIVATE_KEY=your_base64_encoded_private_key
|
|
63
|
-
|
|
64
|
-
# You only need to configure API keys for the AI platforms and workflows you're using
|
|
65
|
-
OPENAI_API_KEY=your_openai_api_key
|
|
66
|
-
ANTHROPIC_API_KEY=your_anthropic_api_key
|
|
67
|
-
GOOGLE_GENERATIVE_AI_API_KEY=your_google_api_key
|
|
68
|
-
ELEVENLABS_API_KEY=your_elevenlabs_api_key
|
|
69
|
-
|
|
70
|
-
# S3-Compatible Storage (required for translation & audio dubbing)
|
|
71
|
-
S3_ENDPOINT=https://your-s3-endpoint.com
|
|
72
|
-
S3_REGION=auto
|
|
73
|
-
S3_BUCKET=your-bucket-name
|
|
74
|
-
S3_ACCESS_KEY_ID=your-access-key
|
|
75
|
-
S3_SECRET_ACCESS_KEY=your-secret-key
|
|
28
|
+
OPENAI_API_KEY=your_openai_api_key # or ANTHROPIC_API_KEY, GOOGLE_GENERATIVE_AI_API_KEY
|
|
76
29
|
```
|
|
77
30
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
# Workflows
|
|
81
|
-
|
|
82
|
-
## Available pre-built workflows
|
|
83
|
-
|
|
84
|
-
| Workflow | Description | Providers | Default Models | Mux Asset Requirements | Cloud Infrastructure Requirements |
|
|
85
|
-
| ------------------------------------------------------------------------ | ----------------------------------------------------------------- | ------------------------- | ------------------------------------------------------------------ | ---------------------- | --------------------------------- |
|
|
86
|
-
| [`getSummaryAndTags`](./docs/WORKFLOWS.md#video-summarization)<br/>[API](./docs/API.md#getsummaryandtagsassetid-options) · [Source](./src/workflows/summarization.ts) | Generate titles, descriptions, and tags for an asset | OpenAI, Anthropic, Google | `gpt-5.1` (OpenAI), `claude-sonnet-4-5` (Anthropic), `gemini-3-flash-preview` (Google) | Video (required), Captions (optional) | None |
|
|
87
|
-
| [`getModerationScores`](./docs/WORKFLOWS.md#content-moderation)<br/>[API](./docs/API.md#getmoderationscoresassetid-options) · [Source](./src/workflows/moderation.ts) | Detect inappropriate (sexual or violent) content in an asset | OpenAI, Hive | `omni-moderation-latest` (OpenAI) or Hive visual moderation task | Video (required) | None |
|
|
88
|
-
| [`hasBurnedInCaptions`](./docs/WORKFLOWS.md#burned-in-caption-detection)<br/>[API](./docs/API.md#hasburnedincaptionsassetid-options) · [Source](./src/workflows/burned-in-captions.ts) | Detect burned-in captions (hardcoded subtitles) in an asset | OpenAI, Anthropic, Google | `gpt-5.1` (OpenAI), `claude-sonnet-4-5` (Anthropic), `gemini-3-flash-preview` (Google) | Video (required) | None |
|
|
89
|
-
| [`askQuestions`](./docs/WORKFLOWS.md#ask-questions)<br/>[API](./docs/API.md#askquestionsassetid-questions-options) · [Source](./src/workflows/ask-questions.ts) | Answer yes/no questions about an asset's content | OpenAI, Anthropic, Google | `gpt-5.1` (OpenAI), `claude-sonnet-4-5` (Anthropic), `gemini-3-flash-preview` (Google) | Video (required), Captions (optional) | None |
|
|
90
|
-
| [`generateChapters`](./docs/WORKFLOWS.md#chapter-generation)<br/>[API](./docs/API.md#generatechaptersassetid-languagecode-options) · [Source](./src/workflows/chapters.ts) | Generate chapter markers for an asset using the transcript | OpenAI, Anthropic, Google | `gpt-5.1` (OpenAI), `claude-sonnet-4-5` (Anthropic), `gemini-3-flash-preview` (Google) | Video or audio-only, Captions/Transcripts (required) | None |
|
|
91
|
-
| [`generateEmbeddings`](./docs/WORKFLOWS.md#embeddings)<br/>[API](./docs/API.md#generateembeddingsassetid-options) · [Source](./src/workflows/embeddings.ts) | Generate vector embeddings for an asset's transcript chunks | OpenAI, Google | `text-embedding-3-small` (OpenAI), `gemini-embedding-001` (Google) | Video or audio-only, Captions/Transcripts (required) | None |
|
|
92
|
-
| [`translateCaptions`](./docs/WORKFLOWS.md#caption-translation)<br/>[API](./docs/API.md#translatecaptionsassetid-fromlanguagecode-tolanguagecode-options) · [Source](./src/workflows/translate-captions.ts) | Translate an asset's captions into different languages | OpenAI, Anthropic, Google | `gpt-5.1` (OpenAI), `claude-sonnet-4-5` (Anthropic), `gemini-3-flash-preview` (Google) | Video or audio-only, Captions/Transcripts (required) | AWS S3 (if `uploadToMux=true`) |
|
|
93
|
-
| [`translateAudio`](./docs/WORKFLOWS.md#audio-dubbing)<br/>[API](./docs/API.md#translateaudioassetid-tolanguagecode-options) · [Source](./src/workflows/translate-audio.ts) | Create AI-dubbed audio tracks in different languages for an asset | ElevenLabs only | ElevenLabs Dubbing API | Video or audio-only, Audio (required) | AWS S3 (if `uploadToMux=true`) |
|
|
31
|
+
You only need credentials for the AI provider you're using. See the [Credentials guide](./docs/CREDENTIALS.md) for full setup details including signed playback, S3 storage, and all supported providers.
|
|
94
32
|
|
|
95
|
-
|
|
33
|
+
For multi-tenant apps or cases where you need to provide API keys at runtime rather than through environment variables, every workflow accepts a `credentials` option. You can also register a global credentials provider with `setWorkflowCredentialsProvider()` for dynamic key resolution (e.g. per-tenant secrets). When using [Workflow DevKit](https://useworkflow.dev), credentials can be [encrypted](./docs/WORKFLOW-ENCRYPTION.md) before crossing workflow boundaries so plaintext secrets never appear in serialized payloads.
|
|
96
34
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
Workflow DevKit serializes workflow inputs/outputs for observability. To avoid sending plaintext secrets through `start(...)`, encrypt credentials in the trigger host and decrypt them in workflow steps.
|
|
100
|
-
See the dedicated [Workflow Encryption guide](./docs/WORKFLOW-ENCRYPTION.md) for full setup and patterns.
|
|
101
|
-
|
|
102
|
-
If you are using Workflow DevKit in your project, then you must call workflow functions like this:
|
|
35
|
+
### Run Your First Workflow
|
|
103
36
|
|
|
104
37
|
```ts
|
|
105
|
-
import { start } from 'workflow/api';
|
|
106
|
-
import { getSummaryAndTags } from '@mux/ai/workflows';
|
|
107
|
-
|
|
108
|
-
const assetId = 'YOUR_ASSET_ID';
|
|
109
|
-
const run = await start(getSummaryAndTags, [assetId]);
|
|
110
|
-
|
|
111
|
-
// optionally, wait for the workflow run return value:
|
|
112
|
-
// const result = await run.returnValue
|
|
113
|
-
```
|
|
114
|
-
|
|
115
|
-
### Multi-tenant credentials with Workflow Dev Kit
|
|
116
|
-
|
|
117
|
-
Set a shared workflow secret key (base64-encoded 32-byte value) in your environment:
|
|
118
|
-
|
|
119
|
-
```bash
|
|
120
|
-
MUX_AI_WORKFLOW_SECRET_KEY=your_base64_32_byte_key
|
|
121
|
-
```
|
|
122
|
-
|
|
123
|
-
Then encrypt credentials before calling `start()`:
|
|
124
|
-
|
|
125
|
-
```ts
|
|
126
|
-
import { start } from "workflow/api";
|
|
127
|
-
import { encryptForWorkflow } from "@mux/ai";
|
|
128
|
-
import { getSummaryAndTags } from "@mux/ai/workflows";
|
|
129
|
-
|
|
130
|
-
const workflowKey = process.env.MUX_AI_WORKFLOW_SECRET_KEY!;
|
|
131
|
-
const encryptedCredentials = await encryptForWorkflow(
|
|
132
|
-
{
|
|
133
|
-
muxTokenId: "mux-token-id",
|
|
134
|
-
muxTokenSecret: "mux-token-secret",
|
|
135
|
-
openaiApiKey: "openai-api-key",
|
|
136
|
-
},
|
|
137
|
-
workflowKey,
|
|
138
|
-
);
|
|
139
|
-
|
|
140
|
-
const run = await start(getSummaryAndTags, [
|
|
141
|
-
"your-asset-id",
|
|
142
|
-
{
|
|
143
|
-
provider: "openai",
|
|
144
|
-
credentials: encryptedCredentials,
|
|
145
|
-
},
|
|
146
|
-
]);
|
|
147
|
-
```
|
|
148
|
-
|
|
149
|
-
For Mux tokens specifically, `setWorkflowCredentialsProvider(...)` (or environment variables) is still recommended so raw Mux secrets are never embedded in workflow input payloads.
|
|
150
|
-
|
|
151
|
-
You can also register a credential provider on the execution host to resolve secrets inside steps.
|
|
152
|
-
This is useful for dynamic key resolution, e.g. rotating keys or per-tenant secrets:
|
|
153
|
-
|
|
154
|
-
```ts
|
|
155
|
-
import {
|
|
156
|
-
setWorkflowCredentialsProvider,
|
|
157
|
-
} from "@mux/ai";
|
|
158
|
-
|
|
159
|
-
setWorkflowCredentialsProvider(async () => ({
|
|
160
|
-
muxTokenId: "mux-token-id",
|
|
161
|
-
muxTokenSecret: "mux-token-secret",
|
|
162
|
-
openaiApiKey: await getOpenAIKeyForTenant(),
|
|
163
|
-
}));
|
|
164
|
-
```
|
|
165
|
-
|
|
166
|
-
### Features of Workflow DevKit
|
|
167
|
-
|
|
168
|
-
- [Observability Dashboard](https://useworkflow.dev/docs/observability)
|
|
169
|
-
- [Control Flow Patterns](https://useworkflow.dev/docs/foundations/control-flow-patterns) like Parallel Execution.
|
|
170
|
-
- [Errors and Retrying](https://useworkflow.dev/docs/foundations/errors-and-retries)
|
|
171
|
-
- [Hooks and Webhooks](https://useworkflow.dev/docs/foundations/hooks)
|
|
172
|
-
- Patterns for building Agents with [Human in the Loop](https://useworkflow.dev/docs/ai/human-in-the-loop)
|
|
173
|
-
|
|
174
|
-
**Workflows can be nested**
|
|
175
|
-
|
|
176
|
-
```ts
|
|
177
|
-
import { start } from "workflow/api";
|
|
178
|
-
import { getSummaryAndTags } from '@mux/ai/workflows';
|
|
179
|
-
|
|
180
|
-
async function processVideoSummary (assetId: string) {
|
|
181
|
-
'use workflow'
|
|
182
|
-
|
|
183
|
-
const summary = await getSummaryAndTags(assetId);
|
|
184
|
-
const emailResp = await emailSummaryToAdmins(summary: summary);
|
|
185
|
-
|
|
186
|
-
return { assetId, summary, emailResp }
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
async function emailSummaryToAdmins (assetId: string) {
|
|
190
|
-
'use step';
|
|
191
|
-
return { sent: true }
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
//
|
|
195
|
-
// this will call the processVideoSummary workflow that is defined above
|
|
196
|
-
// in that workflow, it calls `getSummaryAndTags()` workflow
|
|
197
|
-
//
|
|
198
|
-
const run = await start(processVideoSummary, [assetId]);
|
|
199
|
-
```
|
|
200
|
-
|
|
201
|
-
## Example Workflows
|
|
202
|
-
|
|
203
|
-
### Video Summarization
|
|
204
|
-
|
|
205
|
-
Generate SEO-friendly titles, descriptions, and tags from your video content:
|
|
206
|
-
|
|
207
|
-
```typescript
|
|
208
38
|
import { getSummaryAndTags } from "@mux/ai/workflows";
|
|
209
39
|
|
|
210
40
|
const result = await getSummaryAndTags("your-asset-id", {
|
|
@@ -218,11 +48,42 @@ console.log(result.description); // "A comprehensive guide to..."
|
|
|
218
48
|
console.log(result.tags); // ["typescript", "tutorial", "programming"]
|
|
219
49
|
```
|
|
220
50
|
|
|
221
|
-
|
|
51
|
+
> **⚠️ Note:** Many workflows rely on transcripts for best results. Consider enabling [auto-generated captions](https://www.mux.com/docs/guides/add-autogenerated-captions-and-use-transcripts) on your Mux assets to unlock the full potential of transcript-based workflows like summarization, chapters, and embeddings. This applies to both video and audio-only assets.
|
|
52
|
+
|
|
53
|
+
## Why `@mux/ai`?
|
|
222
54
|
|
|
223
|
-
|
|
55
|
+
- **Pre-built workflows for media AI tasks.** Common multi-step operations (transcript access, frame analysis, LLM calls, and structured parsing) are available as high-level functions.
|
|
56
|
+
- **Support for video and audio assets.** The same workflows work with video and [audio-only assets](./docs/AUDIO-ONLY.md), including summarization, moderation, chaptering, and more.
|
|
57
|
+
- **Provider-flexible API.** Choose OpenAI, Anthropic, or Google through workflow options while keeping the same workflow interface.
|
|
58
|
+
- **Published evaluation coverage.** Workflows include [evals](./docs/EVALS.md) for quality, latency, and cost, with results [published publicly](https://evaluating-mux-ai.vercel.app/) on pushes to `main`.
|
|
59
|
+
- **Sensible default models.** Defaults (`gpt-5.1`, `claude-sonnet-4-5`, `gemini-3-flash-preview`) are selected to balance output quality and runtime cost.
|
|
60
|
+
- **Typed end-to-end.** Workflow inputs, options, and outputs are fully typed in TypeScript.
|
|
61
|
+
- **Operational defaults included.** Retry handling, error handling, signed playback support, and [Workflow DevKit](https://useworkflow.dev) compatibility are built in.
|
|
62
|
+
- **Prompt customization support.** Use `promptOverrides` to adjust sections of workflow prompts for your domain or product requirements.
|
|
63
|
+
- **Composable abstractions.** Start with full workflows and drop down to lower-level primitives when you need more control.
|
|
64
|
+
|
|
65
|
+
## Workflows
|
|
224
66
|
|
|
225
|
-
|
|
67
|
+
Workflows are high-level functions that handle complete media AI tasks end-to-end — fetching data from Mux, calling AI providers, and returning structured results. Most workflows support both video and audio-only assets.
|
|
68
|
+
|
|
69
|
+
| Workflow | What it does | Providers | Audio-only |
|
|
70
|
+
| --- | --- | --- | :---: |
|
|
71
|
+
| [`getSummaryAndTags`](./docs/WORKFLOWS.md#video-summarization) | Generate titles, descriptions, and tags | OpenAI, Anthropic, Google | Yes |
|
|
72
|
+
| [`getModerationScores`](./docs/WORKFLOWS.md#content-moderation) | Detect inappropriate content | OpenAI, Hive | Yes |
|
|
73
|
+
| [`hasBurnedInCaptions`](./docs/WORKFLOWS.md#burned-in-caption-detection) | Detect hardcoded subtitles in video frames | OpenAI, Anthropic, Google | — |
|
|
74
|
+
| [`askQuestions`](./docs/WORKFLOWS.md#ask-questions) | Answer yes/no questions about asset content | OpenAI, Anthropic, Google | — |
|
|
75
|
+
| [`generateChapters`](./docs/WORKFLOWS.md#chapter-generation) | Create chapter markers from transcripts | OpenAI, Anthropic, Google | Yes |
|
|
76
|
+
| [`generateEmbeddings`](./docs/WORKFLOWS.md#embeddings) | Generate vector embeddings for semantic search | OpenAI, Google | Yes |
|
|
77
|
+
| [`translateCaptions`](./docs/WORKFLOWS.md#caption-translation) | Translate captions into other languages | OpenAI, Anthropic, Google | Yes |
|
|
78
|
+
| [`translateAudio`](./docs/WORKFLOWS.md#audio-dubbing) | Create AI-dubbed audio tracks | ElevenLabs | Yes |
|
|
79
|
+
|
|
80
|
+
See the [Workflows guide](./docs/WORKFLOWS.md) for detailed documentation, options, and examples for each workflow. See the [API Reference](./docs/API.md) for complete parameter and return type details.
|
|
81
|
+
|
|
82
|
+
### Quick Examples
|
|
83
|
+
|
|
84
|
+
**Content moderation:**
|
|
85
|
+
|
|
86
|
+
```ts
|
|
226
87
|
import { getModerationScores } from "@mux/ai/workflows";
|
|
227
88
|
|
|
228
89
|
const result = await getModerationScores("your-asset-id", {
|
|
@@ -232,268 +93,130 @@ const result = await getModerationScores("your-asset-id", {
|
|
|
232
93
|
|
|
233
94
|
if (result.exceedsThreshold) {
|
|
234
95
|
console.log("Content flagged for review");
|
|
235
|
-
console.log(`Max scores: ${result.maxScores}`);
|
|
236
96
|
}
|
|
237
97
|
```
|
|
238
98
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
Create automatic chapter markers for better video navigation:
|
|
99
|
+
**Chapter generation:**
|
|
242
100
|
|
|
243
|
-
```
|
|
101
|
+
```ts
|
|
244
102
|
import { generateChapters } from "@mux/ai/workflows";
|
|
245
103
|
|
|
246
104
|
const result = await generateChapters("your-asset-id", "en", {
|
|
247
105
|
provider: "anthropic"
|
|
248
106
|
});
|
|
249
107
|
|
|
250
|
-
//
|
|
251
|
-
player.addChapters(result.chapters);
|
|
252
|
-
// [
|
|
253
|
-
// { startTime: 0, title: "Introduction" },
|
|
254
|
-
// { startTime: 45, title: "Main Content" },
|
|
255
|
-
// { startTime: 120, title: "Conclusion" }
|
|
256
|
-
// ]
|
|
108
|
+
// [{ startTime: 0, title: "Introduction" }, { startTime: 45, title: "Main Content" }, ...]
|
|
257
109
|
```
|
|
258
110
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
Generate embeddings for semantic search over transcripts:
|
|
111
|
+
**Semantic search embeddings:**
|
|
262
112
|
|
|
263
|
-
```
|
|
113
|
+
```ts
|
|
264
114
|
import { generateEmbeddings } from "@mux/ai/workflows";
|
|
265
115
|
|
|
266
116
|
const result = await generateEmbeddings("your-asset-id", {
|
|
267
117
|
provider: "openai",
|
|
268
|
-
|
|
269
|
-
chunkingStrategy: {
|
|
270
|
-
type: "token",
|
|
271
|
-
maxTokens: 500,
|
|
272
|
-
overlap: 100
|
|
273
|
-
}
|
|
118
|
+
chunkingStrategy: { type: "token", maxTokens: 500, overlap: 100 }
|
|
274
119
|
});
|
|
275
120
|
|
|
276
|
-
// Store embeddings in your vector database
|
|
277
121
|
for (const chunk of result.chunks) {
|
|
278
|
-
await vectorDB.insert({
|
|
279
|
-
embedding: chunk.embedding,
|
|
280
|
-
metadata: {
|
|
281
|
-
assetId: result.assetId,
|
|
282
|
-
startTime: chunk.metadata.startTime,
|
|
283
|
-
endTime: chunk.metadata.endTime
|
|
284
|
-
}
|
|
285
|
-
});
|
|
122
|
+
await vectorDB.insert({ embedding: chunk.embedding, startTime: chunk.metadata.startTime });
|
|
286
123
|
}
|
|
287
124
|
```
|
|
288
125
|
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
- **Cost-Effective by Default**: Uses affordable frontier models like `gpt-5.1`, `claude-sonnet-4-5`, and `gemini-3-flash-preview` to keep analysis costs low while maintaining high quality results
|
|
292
|
-
- **Multi-modal Analysis**: Combines storyboard images with video transcripts for richer understanding
|
|
293
|
-
- **Tone Control**: Choose between neutral, playful, or professional analysis styles for summarization
|
|
294
|
-
- **Prompt Customization**: Override specific prompt sections to tune workflows to your exact use case
|
|
295
|
-
- **Configurable Thresholds**: Set custom sensitivity levels for content moderation
|
|
296
|
-
- **Full TypeScript Support**: Comprehensive types for excellent developer experience and IDE autocomplete
|
|
297
|
-
- **Provider Flexibility**: Switch between OpenAI, Anthropic, Google, and other providers based on your needs
|
|
298
|
-
- **Composable Building Blocks**: Use primitives to fetch transcripts, thumbnails, and storyboards for custom workflows
|
|
299
|
-
- **Universal Language Support**: Automatic language name detection using `Intl.DisplayNames` for all ISO 639-1 codes
|
|
300
|
-
- **Production Ready**: Built-in retry logic, error handling, and edge case management
|
|
301
|
-
|
|
302
|
-
# Core Concepts
|
|
303
|
-
|
|
304
|
-
`@mux/ai` is built around two complementary abstractions:
|
|
305
|
-
|
|
306
|
-
## Workflows
|
|
307
|
-
|
|
308
|
-
**Workflows** are functions that handle complete video AI tasks end-to-end. Each workflow orchestrates the entire process: fetching video data from Mux (transcripts, thumbnails, storyboards), formatting it for AI providers, and returning structured results.
|
|
309
|
-
|
|
310
|
-
```typescript
|
|
311
|
-
import { getSummaryAndTags } from "@mux/ai/workflows";
|
|
312
|
-
|
|
313
|
-
const result = await getSummaryAndTags("asset-id", { provider: "openai" });
|
|
314
|
-
```
|
|
315
|
-
|
|
316
|
-
Use workflows when you need battle-tested solutions for common tasks like summarization, content moderation, chapter generation, or translation.
|
|
317
|
-
|
|
318
|
-
## Primitives
|
|
319
|
-
|
|
320
|
-
**Primitives** are low-level building blocks that give you direct access to Mux video data and utilities. They provide functions for fetching transcripts, storyboards, thumbnails, and processing text—perfect for building custom workflows.
|
|
321
|
-
|
|
322
|
-
```typescript
|
|
323
|
-
import { fetchTranscriptForAsset, getStoryboardUrl } from "@mux/ai/primitives";
|
|
324
|
-
|
|
325
|
-
const transcript = await fetchTranscriptForAsset("asset-id", "en");
|
|
326
|
-
const storyboard = getStoryboardUrl("playback-id", { width: 640 });
|
|
327
|
-
```
|
|
328
|
-
|
|
329
|
-
Use primitives when you need complete control over your AI prompts or want to build custom workflows not covered by the pre-built options.
|
|
330
|
-
|
|
331
|
-
## Package Structure
|
|
332
|
-
|
|
333
|
-
```typescript
|
|
334
|
-
// Import workflows
|
|
335
|
-
import { generateChapters } from "@mux/ai/workflows";
|
|
336
|
-
|
|
337
|
-
// Import primitives
|
|
338
|
-
import { fetchTranscriptForAsset } from "@mux/ai/primitives";
|
|
339
|
-
|
|
340
|
-
// Or import everything
|
|
341
|
-
import { workflows, primitives } from "@mux/ai";
|
|
342
|
-
```
|
|
343
|
-
|
|
344
|
-
# Credentials
|
|
345
|
-
|
|
346
|
-
You'll need to set up credentials for Mux as well as any AI provider you want to use for a particular workflow. In addition, some workflows will need other cloud-hosted access (e.g. cloud storage via AWS S3).
|
|
347
|
-
|
|
348
|
-
## Credentials - Mux
|
|
349
|
-
|
|
350
|
-
### Access Token (required)
|
|
351
|
-
|
|
352
|
-
All workflows require a Mux API access token to interact with your video assets. If you're already logged into the dashboard, you can [create a new access token here](https://dashboard.mux.com/settings/access-tokens).
|
|
353
|
-
|
|
354
|
-
**Required Permissions:**
|
|
355
|
-
- **Mux Video**: Read + Write access
|
|
356
|
-
- **Mux Data**: Read access
|
|
357
|
-
|
|
358
|
-
These permissions cover all current workflows. You can set these when creating your token in the dashboard.
|
|
359
|
-
|
|
360
|
-
> **💡 Tip:** For security reasons, consider creating a dedicated access token specifically for your AI workflows rather than reusing existing tokens.
|
|
361
|
-
|
|
362
|
-
### Signing Key (conditionally required)
|
|
363
|
-
|
|
364
|
-
If your Mux assets use [signed playback URLs](https://docs.mux.com/guides/secure-video-playback) for security, you'll need to provide signing credentials so `@mux/ai` can access the video data.
|
|
365
|
-
|
|
366
|
-
**When needed:** Only if your assets have signed playback policies enabled and no public playback ID.
|
|
367
|
-
|
|
368
|
-
**How to get:**
|
|
369
|
-
1. Go to [Settings > Signing Keys](https://dashboard.mux.com/settings/signing-keys) in your Mux dashboard
|
|
370
|
-
2. Create a new signing key or use an existing one
|
|
371
|
-
3. Save both the **Signing Key ID** and the **Base64-encoded Private Key**
|
|
372
|
-
|
|
373
|
-
**Configuration:**
|
|
374
|
-
```bash
|
|
375
|
-
MUX_SIGNING_KEY=your_signing_key_id
|
|
376
|
-
MUX_PRIVATE_KEY=your_base64_encoded_private_key
|
|
377
|
-
```
|
|
378
|
-
|
|
379
|
-
## Credentials - AI Providers
|
|
380
|
-
|
|
381
|
-
Different workflows support various AI providers. You only need to configure API keys for the providers you plan to use.
|
|
382
|
-
|
|
383
|
-
### OpenAI
|
|
126
|
+
## Prompt Customization
|
|
384
127
|
|
|
385
|
-
|
|
128
|
+
Every workflow prompt is built from a structured template of named sections. The `promptOverrides` option lets you swap out individual sections with your own instructions while keeping the battle-tested defaults for everything else — no need to rewrite entire prompts.
|
|
386
129
|
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
130
|
+
```ts
|
|
131
|
+
const result = await getSummaryAndTags(assetId, {
|
|
132
|
+
provider: "openai",
|
|
133
|
+
promptOverrides: {
|
|
134
|
+
title: "Create a search-optimized title (50-60 chars) with the primary keyword front-loaded.",
|
|
135
|
+
keywords: "Focus on high search volume terms and long-tail keyword phrases.",
|
|
136
|
+
// task, description, qualityGuidelines → keep defaults
|
|
137
|
+
},
|
|
138
|
+
});
|
|
391
139
|
```
|
|
392
140
|
|
|
393
|
-
|
|
141
|
+
This works with `getSummaryAndTags`, `generateChapters`, and `hasBurnedInCaptions`. The [Prompt Customization guide](./docs/PROMPT-CUSTOMIZATION.md) has ready-to-use presets for SEO, social media, e-commerce, and technical analysis, along with tips for writing effective overrides.
|
|
394
142
|
|
|
395
|
-
|
|
143
|
+
## Evaluations
|
|
396
144
|
|
|
397
|
-
|
|
145
|
+
Choosing between OpenAI, Anthropic, and Google for a given workflow isn't guesswork. Every workflow in `@mux/ai` ships with eval coverage that benchmarks providers and models against three dimensions:
|
|
398
146
|
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
### Google Generative AI
|
|
147
|
+
- **Efficacy** — Does it produce accurate, high-quality results?
|
|
148
|
+
- **Efficiency** — How fast is it and how many tokens does it consume?
|
|
149
|
+
- **Expense** — What does each request cost?
|
|
404
150
|
|
|
405
|
-
**
|
|
151
|
+
Evals run automatically on every push to `main` and results are published to a **[public dashboard](https://evaluating-mux-ai.vercel.app/)** so you can compare providers side-by-side before choosing one for your use case.
|
|
406
152
|
|
|
407
|
-
|
|
153
|
+
You can also run evals locally against your own assets:
|
|
408
154
|
|
|
409
155
|
```bash
|
|
410
|
-
|
|
156
|
+
npm run test:eval
|
|
411
157
|
```
|
|
412
158
|
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
**Used by:** `translateAudio` (audio dubbing)
|
|
416
|
-
|
|
417
|
-
**Get your API key:** [ElevenLabs API Keys](https://elevenlabs.io/app/settings/api-keys)
|
|
159
|
+
See the [Evaluations guide](./docs/EVALS.md) for details on the 3 E's framework, adding your own evals, and cross-provider testing.
|
|
418
160
|
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
```bash
|
|
422
|
-
ELEVENLABS_API_KEY=your_elevenlabs_api_key
|
|
423
|
-
```
|
|
424
|
-
|
|
425
|
-
### Hive
|
|
161
|
+
## Primitives
|
|
426
162
|
|
|
427
|
-
|
|
163
|
+
Primitives are low-level building blocks that give you direct access to Mux media data — transcripts, storyboards, thumbnails, and text chunking utilities. Use them when you need full control over your AI prompts or want to build custom workflows.
|
|
428
164
|
|
|
429
|
-
|
|
165
|
+
```ts
|
|
166
|
+
import { fetchTranscriptForAsset, getStoryboardUrl } from "@mux/ai/primitives";
|
|
430
167
|
|
|
431
|
-
|
|
432
|
-
|
|
168
|
+
const transcript = await fetchTranscriptForAsset(asset, playbackId, { languageCode: "en" });
|
|
169
|
+
const storyboard = getStoryboardUrl(playbackId, 640);
|
|
433
170
|
```
|
|
434
171
|
|
|
435
|
-
|
|
172
|
+
All pre-built workflows are composed from these primitives internally, so you can always drop down a level when you need to customize behavior.
|
|
436
173
|
|
|
437
|
-
|
|
174
|
+
See the [Primitives guide](./docs/PRIMITIVES.md) for the full list of available functions and examples of building custom workflows.
|
|
438
175
|
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
Translation workflows need temporary storage to upload translated files before attaching them to your Mux assets. Any S3-compatible storage service works (AWS S3, Cloudflare R2, DigitalOcean Spaces, etc.).
|
|
176
|
+
## Package Structure
|
|
442
177
|
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
3. Attach a policy with `s3:PutObject`, `s3:GetObject`, and `s3:PutObjectAcl` permissions for your bucket
|
|
178
|
+
```ts
|
|
179
|
+
// Import specific workflows
|
|
180
|
+
import { getSummaryAndTags, generateChapters } from "@mux/ai/workflows";
|
|
447
181
|
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
S3_ENDPOINT=https://s3.amazonaws.com # Or your S3-compatible endpoint
|
|
451
|
-
S3_REGION=us-east-1 # Your bucket region
|
|
452
|
-
S3_BUCKET=your-bucket-name
|
|
453
|
-
S3_ACCESS_KEY_ID=your-access-key
|
|
454
|
-
S3_SECRET_ACCESS_KEY=your-secret-key
|
|
455
|
-
```
|
|
182
|
+
// Import specific primitives
|
|
183
|
+
import { fetchTranscriptForAsset, getStoryboardUrl } from "@mux/ai/primitives";
|
|
456
184
|
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
S3_ENDPOINT=https://your-account-id.r2.cloudflarestorage.com
|
|
460
|
-
S3_REGION=auto
|
|
461
|
-
S3_BUCKET=your-bucket-name
|
|
462
|
-
S3_ACCESS_KEY_ID=your-r2-access-key
|
|
463
|
-
S3_SECRET_ACCESS_KEY=your-r2-secret-key
|
|
185
|
+
// Or import everything via namespace
|
|
186
|
+
import { workflows, primitives } from "@mux/ai";
|
|
464
187
|
```
|
|
465
188
|
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
## Full Documentation
|
|
469
|
-
|
|
470
|
-
- **[Workflows Guide](./docs/WORKFLOWS.md)** - Detailed guide to each pre-built workflow with examples
|
|
471
|
-
- **[API Reference](./docs/API.md)** - Complete API documentation for all functions, parameters, and return types
|
|
472
|
-
- **[Workflow Encryption](./docs/WORKFLOW-ENCRYPTION.md)** - Encrypting credentials across Workflow DevKit boundaries
|
|
473
|
-
- **[Storage Adapters](./docs/STORAGE-ADAPTERS.md)** - Using custom storage SDKs (AWS, Cloudflare R2, MinIO)
|
|
474
|
-
- **[Primitives Guide](./docs/PRIMITIVES.md)** - Low-level building blocks for custom workflows
|
|
475
|
-
- **[Examples](./docs/EXAMPLES.md)** - Running examples from the repository
|
|
189
|
+
## Prerequisites
|
|
476
190
|
|
|
477
|
-
|
|
191
|
+
- [Node.js](https://nodejs.org/en/download) (>= 21.0.0)
|
|
192
|
+
- A [Mux](https://dashboard.mux.com/signup) account (free to sign up)
|
|
193
|
+
- An API key for at least one supported AI provider
|
|
478
194
|
|
|
479
|
-
|
|
480
|
-
- **[Auto-generated Captions](https://www.mux.com/docs/guides/add-autogenerated-captions-and-use-transcripts)** - Enable transcripts for your assets
|
|
481
|
-
- **[GitHub Repository](https://github.com/muxinc/ai)** - Source code, issues, and contributions
|
|
482
|
-
- **[npm Package](https://www.npmjs.com/package/@mux/ai)** - Package page and version history
|
|
195
|
+
## Documentation
|
|
483
196
|
|
|
484
|
-
|
|
197
|
+
| Guide | Description |
|
|
198
|
+
| --- | --- |
|
|
199
|
+
| [Workflows](./docs/WORKFLOWS.md) | Detailed guide for each pre-built workflow with examples and options |
|
|
200
|
+
| [API Reference](./docs/API.md) | Complete API docs — all function signatures, parameters, and return types |
|
|
201
|
+
| [Primitives](./docs/PRIMITIVES.md) | Low-level building blocks for custom workflows |
|
|
202
|
+
| [Prompt Customization](./docs/PROMPT-CUSTOMIZATION.md) | Overriding prompt sections with `promptOverrides` for custom use cases |
|
|
203
|
+
| [Credentials](./docs/CREDENTIALS.md) | Setting up Mux, AI provider, and cloud storage credentials |
|
|
204
|
+
| [Workflow DevKit](./docs/WORKFLOW-DEVKIT.md) | Integration with Workflow DevKit for observability and orchestration |
|
|
205
|
+
| [Workflow Encryption](./docs/WORKFLOW-ENCRYPTION.md) | Encrypting credentials across Workflow DevKit boundaries |
|
|
206
|
+
| [Storage Adapters](./docs/STORAGE-ADAPTERS.md) | Using custom storage SDKs (AWS, Cloudflare R2, MinIO) |
|
|
207
|
+
| [Audio-Only Workflows](./docs/AUDIO-ONLY.md) | Working with audio-only assets (no video track) |
|
|
208
|
+
| [Evaluations](./docs/EVALS.md) | AI eval testing with the 3 E's framework — [public dashboard](https://evaluating-mux-ai.vercel.app/) |
|
|
209
|
+
| [Examples](./docs/EXAMPLES.md) | Running the example scripts from the repository |
|
|
485
210
|
|
|
486
|
-
|
|
211
|
+
### Additional Resources
|
|
487
212
|
|
|
488
|
-
|
|
213
|
+
- [Mux Video API Docs](https://docs.mux.com/guides/video) — Learn about Mux Video features
|
|
214
|
+
- [Auto-generated Captions](https://www.mux.com/docs/guides/add-autogenerated-captions-and-use-transcripts) — Enable transcripts for your assets
|
|
215
|
+
- [npm Package](https://www.npmjs.com/package/@mux/ai) — Package page and version history
|
|
489
216
|
|
|
490
|
-
|
|
491
|
-
- Running examples and tests
|
|
492
|
-
- Code style and conventions
|
|
493
|
-
- Submitting pull requests
|
|
494
|
-
- Reporting issues
|
|
217
|
+
## Contributing
|
|
495
218
|
|
|
496
|
-
|
|
219
|
+
We welcome contributions! Please see the [Contributing Guide](./CONTRIBUTING.md) for details on setting up your development environment, running tests, and submitting pull requests.
|
|
497
220
|
|
|
498
221
|
For questions or discussions, feel free to [open an issue](https://github.com/muxinc/ai/issues).
|
|
499
222
|
|
|
@@ -639,6 +639,12 @@ interface AudioTranslationResult {
|
|
|
639
639
|
interface AudioTranslationOptions extends MuxAIOptions {
|
|
640
640
|
/** Audio dubbing provider (currently ElevenLabs only). */
|
|
641
641
|
provider?: "elevenlabs";
|
|
642
|
+
/**
|
|
643
|
+
* Optional source language code for ElevenLabs `source_lang`.
|
|
644
|
+
* Accepts ISO 639-1 (e.g. "en") or ISO 639-3 (e.g. "eng").
|
|
645
|
+
* Defaults to auto-detect when omitted.
|
|
646
|
+
*/
|
|
647
|
+
fromLanguageCode?: string;
|
|
642
648
|
/** Number of speakers supplied to ElevenLabs (0 = auto-detect, default). */
|
|
643
649
|
numSpeakers?: number;
|
|
644
650
|
/** Optional override for the S3-compatible endpoint used for uploads. */
|
package/dist/index.d.ts
CHANGED
|
@@ -2,14 +2,14 @@ import { W as WorkflowCredentials, S as StoragePutObjectInput, a as StoragePresi
|
|
|
2
2
|
export { A as AssetTextTrack, C as ChunkEmbedding, b as ChunkingStrategy, E as Encrypted, c as EncryptedPayload, I as ImageSubmissionMode, M as MuxAIOptions, d as MuxAsset, P as PlaybackAsset, e as PlaybackPolicy, f as StorageAdapter, T as TextChunk, g as TokenChunkingConfig, h as TokenUsage, i as ToneType, U as UsageMetadata, V as VTTChunkingConfig, j as VideoEmbeddingsResult, k as WorkflowCredentialsInput, l as WorkflowMuxClient, m as decryptFromWorkflow, n as encryptForWorkflow } from './types-BQVi_wnh.js';
|
|
3
3
|
import { WORKFLOW_SERIALIZE, WORKFLOW_DESERIALIZE } from '@workflow/serde';
|
|
4
4
|
export { i as primitives } from './index-DZlygsvb.js';
|
|
5
|
-
export { i as workflows } from './index-
|
|
5
|
+
export { i as workflows } from './index-Bavk1Y8-.js';
|
|
6
6
|
import '@mux/mux-node';
|
|
7
7
|
import 'zod';
|
|
8
8
|
import '@ai-sdk/anthropic';
|
|
9
9
|
import '@ai-sdk/google';
|
|
10
10
|
import '@ai-sdk/openai';
|
|
11
11
|
|
|
12
|
-
var version = "0.7.
|
|
12
|
+
var version = "0.7.4";
|
|
13
13
|
|
|
14
14
|
/**
|
|
15
15
|
* A function that returns workflow credentials, either synchronously or asynchronously.
|