video-context-mcp-server 0.11.0-beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +432 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +152 -0
- package/dist/index.js.map +1 -0
- package/dist/services/audio/assemblyAiClient.d.ts +28 -0
- package/dist/services/audio/assemblyAiClient.d.ts.map +1 -0
- package/dist/services/audio/assemblyAiClient.js +40 -0
- package/dist/services/audio/assemblyAiClient.js.map +1 -0
- package/dist/services/audio/deepgramClient.d.ts +23 -0
- package/dist/services/audio/deepgramClient.d.ts.map +1 -0
- package/dist/services/audio/deepgramClient.js +50 -0
- package/dist/services/audio/deepgramClient.js.map +1 -0
- package/dist/services/audio/groqAudioClient.d.ts +18 -0
- package/dist/services/audio/groqAudioClient.d.ts.map +1 -0
- package/dist/services/audio/groqAudioClient.js +43 -0
- package/dist/services/audio/groqAudioClient.js.map +1 -0
- package/dist/services/audioRouter.d.ts +38 -0
- package/dist/services/audioRouter.d.ts.map +1 -0
- package/dist/services/audioRouter.js +81 -0
- package/dist/services/audioRouter.js.map +1 -0
- package/dist/services/ffmpeg.d.ts +54 -0
- package/dist/services/ffmpeg.d.ts.map +1 -0
- package/dist/services/ffmpeg.js +188 -0
- package/dist/services/ffmpeg.js.map +1 -0
- package/dist/services/geminiClient.d.ts +55 -0
- package/dist/services/geminiClient.d.ts.map +1 -0
- package/dist/services/geminiClient.js +143 -0
- package/dist/services/geminiClient.js.map +1 -0
- package/dist/services/glmClient.d.ts +50 -0
- package/dist/services/glmClient.d.ts.map +1 -0
- package/dist/services/glmClient.js +196 -0
- package/dist/services/glmClient.js.map +1 -0
- package/dist/services/kimiClient.d.ts +45 -0
- package/dist/services/kimiClient.d.ts.map +1 -0
- package/dist/services/kimiClient.js +152 -0
- package/dist/services/kimiClient.js.map +1 -0
- package/dist/services/providerRouter.d.ts +40 -0
- package/dist/services/providerRouter.d.ts.map +1 -0
- package/dist/services/providerRouter.js +64 -0
- package/dist/services/providerRouter.js.map +1 -0
- package/dist/tools/analyzeVideo.d.ts +18 -0
- package/dist/tools/analyzeVideo.d.ts.map +1 -0
- package/dist/tools/analyzeVideo.js +153 -0
- package/dist/tools/analyzeVideo.js.map +1 -0
- package/dist/tools/extractFrames.d.ts +22 -0
- package/dist/tools/extractFrames.d.ts.map +1 -0
- package/dist/tools/extractFrames.js +82 -0
- package/dist/tools/extractFrames.js.map +1 -0
- package/dist/tools/getVideoInfo.d.ts +18 -0
- package/dist/tools/getVideoInfo.d.ts.map +1 -0
- package/dist/tools/getVideoInfo.js +52 -0
- package/dist/tools/getVideoInfo.js.map +1 -0
- package/dist/tools/searchTimestamp.d.ts +25 -0
- package/dist/tools/searchTimestamp.d.ts.map +1 -0
- package/dist/tools/searchTimestamp.js +152 -0
- package/dist/tools/searchTimestamp.js.map +1 -0
- package/dist/tools/summarizeVideo.d.ts +18 -0
- package/dist/tools/summarizeVideo.d.ts.map +1 -0
- package/dist/tools/summarizeVideo.js +208 -0
- package/dist/tools/summarizeVideo.js.map +1 -0
- package/dist/tools/transcribeVideo.d.ts +18 -0
- package/dist/tools/transcribeVideo.d.ts.map +1 -0
- package/dist/tools/transcribeVideo.js +106 -0
- package/dist/tools/transcribeVideo.js.map +1 -0
- package/dist/utils/audioUtils.d.ts +62 -0
- package/dist/utils/audioUtils.d.ts.map +1 -0
- package/dist/utils/audioUtils.js +153 -0
- package/dist/utils/audioUtils.js.map +1 -0
- package/dist/utils/base64.d.ts +35 -0
- package/dist/utils/base64.d.ts.map +1 -0
- package/dist/utils/base64.js +50 -0
- package/dist/utils/base64.js.map +1 -0
- package/dist/utils/logger.d.ts +23 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/dist/utils/logger.js +34 -0
- package/dist/utils/logger.js.map +1 -0
- package/dist/utils/tempFiles.d.ts +20 -0
- package/dist/utils/tempFiles.d.ts.map +1 -0
- package/dist/utils/tempFiles.js +31 -0
- package/dist/utils/tempFiles.js.map +1 -0
- package/dist/utils/videoUtils.d.ts +19 -0
- package/dist/utils/videoUtils.d.ts.map +1 -0
- package/dist/utils/videoUtils.js +38 -0
- package/dist/utils/videoUtils.js.map +1 -0
- package/package.json +64 -0
package/README.md
ADDED
|
@@ -0,0 +1,432 @@
|
|
|
1
|
+
# Video Context MCP Server
|
|
2
|
+
|
|
3
|
+
Video Context MCP Server is a Model Context Protocol (MCP) server that gives MCP-compatible coding assistants (such as GitHub Copilot in VS Code, Cursor, and Claude Code) the ability to understand and analyze video content.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- 🎬 **Video Q&A** — Ask questions about video content and get AI-powered answers
|
|
8
|
+
- 📝 **Video Summarization** — Generate structured summaries with key scenes and timelines
|
|
9
|
+
- 🖼️ **Frame Extraction** — Extract frames at specific timestamps or intervals
|
|
10
|
+
- 🔍 **Timestamp Search** — Find the exact moment when something happens in a video
|
|
11
|
+
- 📊 **Video Metadata** — Get duration, resolution, fps, codec, and other technical details
|
|
12
|
+
- 🎙️ **Audio Transcription** — Transcribe speech from any video using Deepgram, AssemblyAI, Groq/Whisper, or Gemini
|
|
13
|
+
- 🔊 **Speaker Diarization** — Identify who said what (Deepgram and AssemblyAI)
|
|
14
|
+
- 🔄 **Multi-Backend Support** — Choose between Gemini (native multimodal), GLM-4.6V (cheap/free), or Kimi K2.5 (broader format support)
|
|
15
|
+
- 🎯 **Smart Video Handling** — Extracts keyframes from long videos to reduce token usage (when not using Gemini)
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
### Quick Start (Recommended for ordinary users)
|
|
20
|
+
|
|
21
|
+
#### 1. Prerequisites
|
|
22
|
+
|
|
23
|
+
- Node.js 18+
|
|
24
|
+
- VS Code with GitHub Copilot Chat enabled
|
|
25
|
+
|
|
26
|
+
#### 2. Get API keys
|
|
27
|
+
|
|
28
|
+
You'll need API keys for one or more **video** backends:
|
|
29
|
+
|
|
30
|
+
- **Gemini 3 Flash Preview (Google)**: [Get API Key](https://aistudio.google.com/app/apikey)
|
|
31
|
+
- **Kimi K2.5 (Moonshot AI)**: [Get API Key](https://platform.moonshot.ai)
|
|
32
|
+
- **GLM-4.6V (Z.AI)**: [Get API Key](https://z.ai/manage-apikey/apikey-list)
|
|
33
|
+
|
|
34
|
+
For **audio transcription** (`transcribe_video`), you'll also need at least one audio provider key:
|
|
35
|
+
|
|
36
|
+
- **Deepgram** (default): [Get API Key](https://console.deepgram.com/)
|
|
37
|
+
- **AssemblyAI**: [Get API Key](https://www.assemblyai.com/dashboard)
|
|
38
|
+
- **Groq** (free Whisper): [Get API Key](https://console.groq.com/)
|
|
39
|
+
- **Gemini** (reuse `GEMINI_API_KEY` above — no extra key needed)
|
|
40
|
+
|
|
41
|
+
#### 3. Install the MCP server
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
npm install -g video-context-mcp-server
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
This installs the executable command: `video-context-mcp`.
|
|
48
|
+
|
|
49
|
+
> **Tip:** Periodically re-run the above command to get the latest version:
|
|
50
|
+
>
|
|
51
|
+
> ```bash
|
|
52
|
+
> npm install -g video-context-mcp-server@latest
|
|
53
|
+
> ```
|
|
54
|
+
|
|
55
|
+
#### 4. Configure VS Code MCP
|
|
56
|
+
|
|
57
|
+
Create (or update) `.vscode/mcp.json` in your project/workspace:
|
|
58
|
+
|
|
59
|
+
```json
|
|
60
|
+
{
|
|
61
|
+
"servers": {
|
|
62
|
+
"videoMcp": {
|
|
63
|
+
"type": "stdio",
|
|
64
|
+
"command": "video-context-mcp",
|
|
65
|
+
"env": {
|
|
66
|
+
"GEMINI_API_KEY": "your-gemini-key",
|
|
67
|
+
"MOONSHOT_API_KEY": "your-moonshot-key",
|
|
68
|
+
"Z_AI_API_KEY": "your-zai-key",
|
|
69
|
+
"DEEPGRAM_API_KEY": "your-deepgram-key",
|
|
70
|
+
"ASSEMBLYAI_API_KEY": "your-assemblyai-key",
|
|
71
|
+
"GROQ_API_KEY": "your-groq-key"
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Open Copilot Chat in VS Code. The MCP server starts automatically when tools are needed.
|
|
79
|
+
|
|
80
|
+
### Configure Cursor MCP
|
|
81
|
+
|
|
82
|
+
Add this server to your Cursor MCP configuration (global or project-level):
|
|
83
|
+
|
|
84
|
+
```json
|
|
85
|
+
{
|
|
86
|
+
"mcpServers": {
|
|
87
|
+
"videoMcp": {
|
|
88
|
+
"command": "video-context-mcp",
|
|
89
|
+
"env": {
|
|
90
|
+
"GEMINI_API_KEY": "your-gemini-key",
|
|
91
|
+
"MOONSHOT_API_KEY": "your-moonshot-key",
|
|
92
|
+
"Z_AI_API_KEY": "your-zai-key",
|
|
93
|
+
"DEEPGRAM_API_KEY": "your-deepgram-key",
|
|
94
|
+
"ASSEMBLYAI_API_KEY": "your-assemblyai-key",
|
|
95
|
+
"GROQ_API_KEY": "your-groq-key"
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Notes:
|
|
103
|
+
|
|
104
|
+
- Run `npm install -g video-context-mcp-server` first if you haven't already.
|
|
105
|
+
- If you prefer not to install globally, use `npx -y video-context-mcp-server@latest` as the command (slower startup due to registry check).
|
|
106
|
+
- Set one or both API keys depending on which provider you use.
|
|
107
|
+
|
|
108
|
+
### Configure Claude Code MCP
|
|
109
|
+
|
|
110
|
+
Use the Claude CLI to register the MCP server:
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
claude mcp add videoMcp video-context-mcp \
|
|
114
|
+
--env GEMINI_API_KEY=your-gemini-key \
|
|
115
|
+
--env MOONSHOT_API_KEY=your-moonshot-key \
|
|
116
|
+
--env Z_AI_API_KEY=your-zai-key \
|
|
117
|
+
--env DEEPGRAM_API_KEY=your-deepgram-key \
|
|
118
|
+
--env ASSEMBLYAI_API_KEY=your-assemblyai-key \
|
|
119
|
+
--env GROQ_API_KEY=your-groq-key
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Then verify with:
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
claude mcp list
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
If you prefer not to install globally, register via `npx` instead (slower startup due to registry check):
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
claude mcp add videoMcp npx -y video-context-mcp-server@latest \
|
|
132
|
+
--env MOONSHOT_API_KEY=your-moonshot-key \
|
|
133
|
+
--env Z_AI_API_KEY=your-zai-key \
|
|
134
|
+
--env DEEPGRAM_API_KEY=your-deepgram-key \
|
|
135
|
+
--env ASSEMBLYAI_API_KEY=your-assemblyai-key \
|
|
136
|
+
--env GROQ_API_KEY=your-groq-key
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### Troubleshooting Setup
|
|
140
|
+
|
|
141
|
+
- **`video-context-mcp: command not found`**
|
|
142
|
+
- Make sure Node.js is installed and available in your shell (`node -v`, `npm -v`).
|
|
143
|
+
- If installed globally, re-run: `npm install -g video-context-mcp-server`.
|
|
144
|
+
- If global binaries are not on PATH, use `npx -y video-context-mcp-server@latest` instead of `video-context-mcp`.
|
|
145
|
+
|
|
146
|
+
- **MCP server not appearing in client**
|
|
147
|
+
- Restart the client app after changing MCP configuration.
|
|
148
|
+
- Validate JSON syntax in your MCP config file.
|
|
149
|
+
- For Claude Code, verify registration with `claude mcp list`.
|
|
150
|
+
|
|
151
|
+
- **Missing API key errors**
|
|
152
|
+
- Set `GEMINI_API_KEY` for Gemini usage, `Z_AI_API_KEY` for GLM usage, and `MOONSHOT_API_KEY` for Kimi usage.
|
|
153
|
+
- For audio transcription, set `DEEPGRAM_API_KEY` (default), `ASSEMBLYAI_API_KEY`, `GROQ_API_KEY` (free tier), or reuse `GEMINI_API_KEY`. At least one audio key is needed to use `transcribe_video`.
|
|
154
|
+
- You can set only the keys for the providers you intend to use.
|
|
155
|
+
- For local files, if `provider=glm` is requested but `Z_AI_API_KEY` is missing, the server automatically falls back to Kimi (and then Gemini) when those keys are available.
|
|
156
|
+
- For remote `http(s)` video URLs, all three AI providers (`glm`, `kimi`, `gemini`) are supported. All tools automatically download remote videos to a temporary file before processing.
|
|
157
|
+
|
|
158
|
+
### Alternative: Run via npx without global install
|
|
159
|
+
|
|
160
|
+
If you prefer not to install globally, you can use `npx` instead. Note this adds a startup delay due to the npm registry check on each run:
|
|
161
|
+
|
|
162
|
+
```json
|
|
163
|
+
{
|
|
164
|
+
"servers": {
|
|
165
|
+
"videoMcp": {
|
|
166
|
+
"type": "stdio",
|
|
167
|
+
"command": "npx",
|
|
168
|
+
"args": ["-y", "video-context-mcp-server@latest"],
|
|
169
|
+
"env": {
|
|
170
|
+
"GEMINI_API_KEY": "your-gemini-key",
|
|
171
|
+
"MOONSHOT_API_KEY": "your-moonshot-key",
|
|
172
|
+
"Z_AI_API_KEY": "your-zai-key",
|
|
173
|
+
"DEEPGRAM_API_KEY": "your-deepgram-key",
|
|
174
|
+
"ASSEMBLYAI_API_KEY": "your-assemblyai-key",
|
|
175
|
+
"GROQ_API_KEY": "your-groq-key"
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
The `@latest` tag ensures you always get the newest published version, at the cost of a network round-trip on every startup — this also means the `npx` approach self-updates automatically and requires no manual update step.
|
|
183
|
+
|
|
184
|
+
### For contributors: install from source
|
|
185
|
+
|
|
186
|
+
Clone this repository, then:
|
|
187
|
+
|
|
188
|
+
```bash
|
|
189
|
+
npm install
|
|
190
|
+
npm run build
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
Then use this `.vscode/mcp.json` server command:
|
|
194
|
+
|
|
195
|
+
```json
|
|
196
|
+
{
|
|
197
|
+
"servers": {
|
|
198
|
+
"videoMcp": {
|
|
199
|
+
"type": "stdio",
|
|
200
|
+
"command": "node",
|
|
201
|
+
"args": ["${workspaceFolder}/dist/index.js"],
|
|
202
|
+
"env": {
|
|
203
|
+
"GEMINI_API_KEY": "your-gemini-key",
|
|
204
|
+
"MOONSHOT_API_KEY": "your-moonshot-key",
|
|
205
|
+
"Z_AI_API_KEY": "your-zai-key",
|
|
206
|
+
"DEEPGRAM_API_KEY": "your-deepgram-key",
|
|
207
|
+
"ASSEMBLYAI_API_KEY": "your-assemblyai-key",
|
|
208
|
+
"GROQ_API_KEY": "your-groq-key"
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
### Debugging Behavior in VS Code
|
|
216
|
+
|
|
217
|
+
When your `.vscode/mcp.json` includes a `dev` block such as:
|
|
218
|
+
|
|
219
|
+
```jsonc
|
|
220
|
+
{
|
|
221
|
+
"servers": {
|
|
222
|
+
"videoMcp": {
|
|
223
|
+
"type": "stdio",
|
|
224
|
+
"command": "node",
|
|
225
|
+
"args": ["${workspaceFolder}/dist/index.js"],
|
|
226
|
+
"env": {
|
|
227
|
+
"GEMINI_API_KEY": "your-gemini-key",
|
|
228
|
+
"MOONSHOT_API_KEY": "your-moonshot-key",
|
|
229
|
+
"Z_AI_API_KEY": "your-zai-key",
|
|
230
|
+
"DEEPGRAM_API_KEY": "your-deepgram-key",
|
|
231
|
+
"ASSEMBLYAI_API_KEY": "your-assemblyai-key",
|
|
232
|
+
"GROQ_API_KEY": "your-groq-key",
|
|
233
|
+
},
|
|
234
|
+
"dev": {
|
|
235
|
+
"watch": "src/**/*.ts",
|
|
236
|
+
"debug": { "type": "node" },
|
|
237
|
+
},
|
|
238
|
+
},
|
|
239
|
+
},
|
|
240
|
+
}
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
you may see frequent logs in the **Output** panel under `MCP: videoMcp`.
|
|
244
|
+
|
|
245
|
+
This is expected in development mode:
|
|
246
|
+
|
|
247
|
+
- `watch` restarts/reloads the MCP server when TypeScript files change
|
|
248
|
+
- `debug` enables Node debug integration
|
|
249
|
+
- MCP protocol payloads (tool schemas, discovery events, lifecycle messages) are printed in that output channel
|
|
250
|
+
|
|
251
|
+
If you want less noise, remove either:
|
|
252
|
+
|
|
253
|
+
- `dev.debug` (keeps auto-watch, disables debug integration), or
|
|
254
|
+
- the full `dev` block (disables watch + debug behavior)
|
|
255
|
+
|
|
256
|
+
## Available Tools
|
|
257
|
+
|
|
258
|
+
| Tool | Description | Parameters |
|
|
259
|
+
| ------------------ | ------------------------------------ | --------------------------------------------------------------- |
|
|
260
|
+
| `analyze_video` | Ask questions about video content | `videoPath`, `question`, `provider?` |
|
|
261
|
+
| `summarize_video` | Generate a structured video summary | `videoPath`, `provider?` |
|
|
262
|
+
| `extract_frames` | Extract frames from a video | `videoPath`, `mode`, `count/intervalSec/timestamps` |
|
|
263
|
+
| `search_timestamp` | Find when something specific happens | `videoPath`, `query`, `provider?` |
|
|
264
|
+
| `get_video_info` | Get video metadata | `videoPath` |
|
|
265
|
+
| `transcribe_video` | Transcribe audio/speech from a video | `videoPath`, `provider?`, `language?`, `diarize?`, `translate?` |
|
|
266
|
+
|
|
267
|
+
### Path and Provider Constraints
|
|
268
|
+
|
|
269
|
+
- **All 6 tools** support both local files and remote `http(s)` URLs. Remote videos are automatically downloaded to a temporary file before processing.
|
|
270
|
+
- For remote `http(s)` URLs with AI-powered video tools (`analyze_video`, `summarize_video`, `search_timestamp`): all three providers (`provider=gemini`, `provider=glm`, `provider=kimi`) are supported. Remote videos are downloaded to a temp file before upload.
|
|
271
|
+
- For `transcribe_video`, all four audio providers (Deepgram, AssemblyAI, Groq, Gemini) support both local files and remote URLs.
|
|
272
|
+
- For local inputs, all tools accept normal filesystem paths or `file://` URIs (automatically normalized).
|
|
273
|
+
|
|
274
|
+
## Usage Examples
|
|
275
|
+
|
|
276
|
+
### Analyze Video
|
|
277
|
+
|
|
278
|
+
Ask Copilot Chat:
|
|
279
|
+
|
|
280
|
+
> "Analyze the video at `./demo.mp4` and tell me what happens in it"
|
|
281
|
+
|
|
282
|
+
### Summarize Video
|
|
283
|
+
|
|
284
|
+
> "Summarize the video at `./long-video.mp4`"
|
|
285
|
+
|
|
286
|
+
### Extract Frames
|
|
287
|
+
|
|
288
|
+
> "Extract 5 evenly-spaced frames from `./video.mp4`"
|
|
289
|
+
|
|
290
|
+
> "Extract a frame at timestamp 30 seconds from `./video.mp4`"
|
|
291
|
+
|
|
292
|
+
### Search Timestamp
|
|
293
|
+
|
|
294
|
+
> "In `./video.mp4`, at what timestamp does the person wave?"
|
|
295
|
+
|
|
296
|
+
### Get Video Info
|
|
297
|
+
|
|
298
|
+
> "Get the video info for `./video.mp4`"
|
|
299
|
+
|
|
300
|
+
### Transcribe Video
|
|
301
|
+
|
|
302
|
+
> "Transcribe the audio from `./meeting.mp4`"
|
|
303
|
+
|
|
304
|
+
> "Transcribe `./interview.mp4` with speaker diarization using AssemblyAI"
|
|
305
|
+
|
|
306
|
+
> "Transcribe this Spanish video and translate it to English: `./video.mp4`"
|
|
307
|
+
|
|
308
|
+
## Additional Guides
|
|
309
|
+
|
|
310
|
+
- [Screen Recording for Small File Sizes (Windows)](docs/screen-recording-small-files.md)
|
|
311
|
+
|
|
312
|
+
## Backend Comparison
|
|
313
|
+
|
|
314
|
+
| Feature | Gemini 3 Flash Preview | GLM-4.6V | Kimi K2.5 |
|
|
315
|
+
| -------------- | ---------------------------------------------- | ----------------------------- | ---------------------------------------------- |
|
|
316
|
+
| Video formats | mp4, mpeg, mov, avi, flv, mpg, webm, wmv, 3gpp | mp4, avi, mov, wmv, webm, m4v | mp4, mpeg, mov, avi, flv, mpg, webm, wmv, 3gpp |
|
|
317
|
+
| Price | Free tier available | $0.30 input / $0.90 output | $0.60 input / $3.00 output |
|
|
318
|
+
| Free tier | Yes | Yes (GLM-4.6V-Flash) | No |
|
|
319
|
+
| Context window | 1M tokens | 128K | 256K |
|
|
320
|
+
| Max file size | 2 GB | ~20 MB (base64) | 100 MB |
|
|
321
|
+
| Best for | Fallback only (inaccurate despite features) | **Default** (free tier) | Alternative to GLM |
|
|
322
|
+
|
|
323
|
+
**GLM-4.6V is the default backend** — it offers a free tier (GLM-4.6V-Flash), making it a good zero-cost starting point. Kimi K2.5 is a paid alternative with broader format support; accuracy between the two has not been systematically compared. Gemini 3 Flash Preview is used as the **last resort fallback** despite its superior technical features (1M token context, 2GB file size, native multimodal audio+video support) because it has **proven inaccurate for video content analysis** in practice. Set `VIDEO_MCP_DEFAULT_PROVIDER=kimi` or `VIDEO_MCP_DEFAULT_PROVIDER=gemini` to switch the default. This env default is used when a tool call omits the `provider` parameter.
|
|
324
|
+
|
|
325
|
+
When a provider's API key is missing, the tool automatically falls back to the next available provider in the ranking chain (**GLM → Kimi → Gemini**) and includes a notice in the response, e.g. `Provider used: kimi (fell back from glm)`.
|
|
326
|
+
|
|
327
|
+
## Environment Variables
|
|
328
|
+
|
|
329
|
+
| Variable | Description | Required |
|
|
330
|
+
| ------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------- |
|
|
331
|
+
| `GEMINI_API_KEY` | Google API key for Gemini 3 Flash Preview | Optional (required if using Gemini) |
|
|
332
|
+
| `MOONSHOT_API_KEY` | Moonshot AI API key for Kimi K2.5 | Optional (required if using Kimi) |
|
|
333
|
+
| `Z_AI_API_KEY` | Z.AI API key for GLM-4.6V | Optional (required if using GLM) |
|
|
334
|
+
| `VIDEO_MCP_DEFAULT_PROVIDER` | Default video backend (`gemini`, `glm`, `kimi`) | Optional (default: `glm`) |
|
|
335
|
+
| `VIDEO_MCP_MAX_FRAMES` | Max frames for summarization (GLM/Kimi only) | Optional (default: 20; clamped to 5-100) |
|
|
336
|
+
| `DEEPGRAM_API_KEY` | Deepgram API key for `transcribe_video` | Optional (required if using Deepgram) |
|
|
337
|
+
| `ASSEMBLYAI_API_KEY` | AssemblyAI API key for `transcribe_video` | Optional (required if using AssemblyAI) |
|
|
338
|
+
| `GROQ_API_KEY` | Groq API key for Whisper transcription via `transcribe_video` | Optional (required if using Groq) |
|
|
339
|
+
| `AUDIO_MCP_DEFAULT_PROVIDER` | Default audio provider; defaults to `deepgram`. Falls back in order: **deepgram → assemblyai → groq → gemini** when the selected provider's key is unavailable. A fallback notice is included in the response. | Optional (auto-selects from available keys) |
|
|
340
|
+
| `AUDIO_ENHANCE_VIDEO_ANALYSIS` | Controls audio transcript injection into GLM/Kimi `analyze_video`/`summarize_video` prompts. `auto` (default) — transcribes only when the video has a detected audio track; `true` — always attempt transcription; `false` — disabled. A confidence label (`high`/`medium`/`low`) is included in the injected header so the model can weight the transcript appropriately. Gemini is always skipped (handles audio natively). | Optional (default: `auto`) |
|
|
341
|
+
|
|
342
|
+
### Example Configuration
|
|
343
|
+
|
|
344
|
+
```json
|
|
345
|
+
{
|
|
346
|
+
"servers": {
|
|
347
|
+
"videoMcp": {
|
|
348
|
+
"type": "stdio",
|
|
349
|
+
"command": "video-context-mcp",
|
|
350
|
+
"env": {
|
|
351
|
+
"GEMINI_API_KEY": "your-gemini-key",
|
|
352
|
+
"Z_AI_API_KEY": "your-zai-key",
|
|
353
|
+
"MOONSHOT_API_KEY": "your-moonshot-key",
|
|
354
|
+
"DEEPGRAM_API_KEY": "your-deepgram-key",
|
|
355
|
+
"ASSEMBLYAI_API_KEY": "your-assemblyai-key",
|
|
356
|
+
"GROQ_API_KEY": "your-groq-key",
|
|
357
|
+
"VIDEO_MCP_DEFAULT_PROVIDER": "glm",
|
|
358
|
+
"AUDIO_ENHANCE_VIDEO_ANALYSIS": "auto"
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
```
|
|
364
|
+
|
|
365
|
+
> **Note:** `VIDEO_MCP_MAX_FRAMES` only applies when using **GLM or Kimi** as the provider. Gemini uploads the full video natively and ignores this setting. Add it to `env` only if you are running with `VIDEO_MCP_DEFAULT_PROVIDER=gemini`.
|
|
366
|
+
|
|
367
|
+
## Development
|
|
368
|
+
|
|
369
|
+
```bash
|
|
370
|
+
# Install dependencies
|
|
371
|
+
npm install
|
|
372
|
+
|
|
373
|
+
# Run in development (auto-restart on changes)
|
|
374
|
+
npm run dev
|
|
375
|
+
|
|
376
|
+
# Build for production
|
|
377
|
+
npm run build
|
|
378
|
+
|
|
379
|
+
# Run type checking
|
|
380
|
+
npm run type-check
|
|
381
|
+
|
|
382
|
+
# Run linter
|
|
383
|
+
npm run lint
|
|
384
|
+
|
|
385
|
+
# Run automated tests
|
|
386
|
+
npm run test
|
|
387
|
+
|
|
388
|
+
# Run tests in watch mode
|
|
389
|
+
npm run test:watch
|
|
390
|
+
|
|
391
|
+
# Run tests with coverage
|
|
392
|
+
npm run test:coverage
|
|
393
|
+
|
|
394
|
+
# Format all files
|
|
395
|
+
npm run format
|
|
396
|
+
|
|
397
|
+
# Check formatting only
|
|
398
|
+
npm run format:check
|
|
399
|
+
|
|
400
|
+
# Lint + Type-check + Format + Build
|
|
401
|
+
npm run ltfb
|
|
402
|
+
```
|
|
403
|
+
|
|
404
|
+
## Architecture
|
|
405
|
+
|
|
406
|
+
```
|
|
407
|
+
video-mcp/
|
|
408
|
+
├── src/
|
|
409
|
+
│ ├── index.ts # MCP server entry point
|
|
410
|
+
│ ├── tools/ # MCP tool implementations
|
|
411
|
+
│ │ └── transcribeVideo.ts # Audio transcription tool
|
|
412
|
+
│ ├── services/ # Backend clients (Kimi, GLM, Gemini, ffmpeg)
|
|
413
|
+
│ │ └── audio/ # Audio provider clients (Deepgram, AssemblyAI, Groq)
|
|
414
|
+
│ └── utils/ # Helpers (temp files, base64, audio injection)
|
|
415
|
+
├── .vscode/
|
|
416
|
+
│ └── mcp.json # VS Code MCP configuration
|
|
417
|
+
├── docs/
|
|
418
|
+
│ └── technical/ # Technical documentation
|
|
419
|
+
└── .github/
|
|
420
|
+
└── copilot-instructions.md # Copilot AI assistant guidelines
|
|
421
|
+
```
|
|
422
|
+
|
|
423
|
+
## License
|
|
424
|
+
|
|
425
|
+
MIT
|
|
426
|
+
|
|
427
|
+
## Credits
|
|
428
|
+
|
|
429
|
+
- [MCP SDK](https://github.com/modelcontextprotocol/typescript-sdk) by Anthropic
|
|
430
|
+
- [Kimi K2.5](https://github.com/MoonshotAI/Kimi-K2.5) by Moonshot AI
|
|
431
|
+
- [GLM-4.6V](https://docs.z.ai/guides/vlm/glm-4.6v) by Z.AI
|
|
432
|
+
- [ffmpeg](https://ffmpeg.org/) for video processing
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":""}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
3
|
+
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
4
|
+
import { z } from 'zod';
|
|
5
|
+
// Import tool handlers
|
|
6
|
+
import { analyzeVideoTool } from './tools/analyzeVideo.js';
|
|
7
|
+
import { summarizeVideoTool } from './tools/summarizeVideo.js';
|
|
8
|
+
import { extractFramesTool } from './tools/extractFrames.js';
|
|
9
|
+
import { searchTimestampTool } from './tools/searchTimestamp.js';
|
|
10
|
+
import { getVideoInfoTool } from './tools/getVideoInfo.js';
|
|
11
|
+
import { transcribeVideoTool } from './tools/transcribeVideo.js';
|
|
12
|
+
import { setLoggerServer } from './utils/logger.js';
|
|
13
|
+
/**
|
|
14
|
+
* Main entry point for the Video Context MCP Server
|
|
15
|
+
* Creates an MCP server, registers all video analysis tools, and connects via stdio
|
|
16
|
+
*/
|
|
17
|
+
async function main() {
|
|
18
|
+
// Create MCP server with name and version
|
|
19
|
+
const server = new McpServer({
|
|
20
|
+
name: 'video-mcp',
|
|
21
|
+
version: '1.0.0',
|
|
22
|
+
}, {
|
|
23
|
+
capabilities: {
|
|
24
|
+
logging: {}, // Enable logging for progress reporting
|
|
25
|
+
},
|
|
26
|
+
});
|
|
27
|
+
// Wire up the logger so tools can emit progress notifications
|
|
28
|
+
setLoggerServer(server);
|
|
29
|
+
// Register all video analysis tools
|
|
30
|
+
// Tool 1: analyze_video - Ask questions about video content
|
|
31
|
+
server.registerTool('analyze_video', {
|
|
32
|
+
title: 'Analyze Video',
|
|
33
|
+
description: 'Ask questions about video content and get AI-powered answers. Supports both local files and URLs.',
|
|
34
|
+
inputSchema: z.object({
|
|
35
|
+
videoPath: z
|
|
36
|
+
.string()
|
|
37
|
+
.describe('Path to the video file (local path or URL)'),
|
|
38
|
+
question: z
|
|
39
|
+
.string()
|
|
40
|
+
.describe('Question to ask about the video content'),
|
|
41
|
+
provider: z
|
|
42
|
+
.enum(['glm', 'kimi', 'gemini'])
|
|
43
|
+
.optional()
|
|
44
|
+
.describe("AI backend to use: 'glm' (GLM-4.6V, default), 'kimi' (Kimi K2.5), or 'gemini' (Gemini 3 Flash Preview)"),
|
|
45
|
+
}),
|
|
46
|
+
}, analyzeVideoTool);
|
|
47
|
+
// Tool 2: summarize_video - Generate structured video summary
|
|
48
|
+
server.registerTool('summarize_video', {
|
|
49
|
+
title: 'Summarize Video',
|
|
50
|
+
description: 'Generate a structured summary of the video including overview, key scenes, and timeline. For long videos (>5 min), extracts keyframes to reduce token usage (unless using Gemini, which processes natively).',
|
|
51
|
+
inputSchema: z.object({
|
|
52
|
+
videoPath: z
|
|
53
|
+
.string()
|
|
54
|
+
.describe('Path to the video file (local path or URL)'),
|
|
55
|
+
provider: z
|
|
56
|
+
.enum(['glm', 'kimi', 'gemini'])
|
|
57
|
+
.optional()
|
|
58
|
+
.describe("AI backend to use: 'glm' (GLM-4.6V, default), 'kimi' (Kimi K2.5), or 'gemini' (Gemini 3 Flash Preview)"),
|
|
59
|
+
}),
|
|
60
|
+
}, summarizeVideoTool);
|
|
61
|
+
// Tool 3: extract_frames - Extract frames from video
|
|
62
|
+
server.registerTool('extract_frames', {
|
|
63
|
+
title: 'Extract Frames',
|
|
64
|
+
description: 'Extract frames from a video at specific timestamps or intervals. Supports local files (including file:// URIs) and remote http(s) URLs. No AI backend required.',
|
|
65
|
+
inputSchema: z.object({
|
|
66
|
+
videoPath: z
|
|
67
|
+
.string()
|
|
68
|
+
.describe('Path to the video file (local path, file:// URI, or http(s) URL)'),
|
|
69
|
+
mode: z
|
|
70
|
+
.enum(['even', 'interval', 'timestamps'])
|
|
71
|
+
.describe("Extraction mode: 'even' (N evenly-spaced frames), 'interval' (every N seconds), or 'timestamps' (at specific times)"),
|
|
72
|
+
count: z
|
|
73
|
+
.number()
|
|
74
|
+
.int()
|
|
75
|
+
.min(1)
|
|
76
|
+
.max(100)
|
|
77
|
+
.optional()
|
|
78
|
+
.describe("Number of frames to extract (required for 'even' mode)"),
|
|
79
|
+
intervalSec: z
|
|
80
|
+
.number()
|
|
81
|
+
.min(0.1)
|
|
82
|
+
.optional()
|
|
83
|
+
.describe("Interval in seconds between frames (required for 'interval' mode)"),
|
|
84
|
+
timestamps: z
|
|
85
|
+
.array(z.number().min(0))
|
|
86
|
+
.optional()
|
|
87
|
+
.describe("Array of timestamps in seconds (required for 'timestamps' mode)"),
|
|
88
|
+
}),
|
|
89
|
+
}, extractFramesTool);
|
|
90
|
+
// Tool 4: search_timestamp - Find when something happens in video
|
|
91
|
+
server.registerTool('search_timestamp', {
|
|
92
|
+
title: 'Search Timestamp',
|
|
93
|
+
description: 'Find the timestamp when something specific happens in a video. Extracts frames and uses AI to locate the content. Supports local files (including file:// URIs) and remote http(s) URLs.',
|
|
94
|
+
inputSchema: z.object({
|
|
95
|
+
videoPath: z
|
|
96
|
+
.string()
|
|
97
|
+
.describe('Path to the video file (local path, file:// URI, or http(s) URL)'),
|
|
98
|
+
query: z
|
|
99
|
+
.string()
|
|
100
|
+
.describe("What to search for, e.g., 'person waves', 'dog runs', 'car crash'"),
|
|
101
|
+
provider: z
|
|
102
|
+
.enum(['glm', 'kimi', 'gemini'])
|
|
103
|
+
.optional()
|
|
104
|
+
.describe("AI backend to use: 'glm' (GLM-4.6V, default), 'kimi' (Kimi K2.5), or 'gemini' (Gemini 3 Flash Preview)"),
|
|
105
|
+
}),
|
|
106
|
+
}, searchTimestampTool);
|
|
107
|
+
// Tool 5: get_video_info - Get video metadata
|
|
108
|
+
server.registerTool('get_video_info', {
|
|
109
|
+
title: 'Get Video Info',
|
|
110
|
+
description: 'Get video metadata including duration, resolution, fps, codec, file size, and format. Supports local files (including file:// URIs) and remote http(s) URLs. No AI backend required.',
|
|
111
|
+
inputSchema: z.object({
|
|
112
|
+
videoPath: z
|
|
113
|
+
.string()
|
|
114
|
+
.describe('Path to the video file (local path, file:// URI, or http(s) URL)'),
|
|
115
|
+
}),
|
|
116
|
+
}, getVideoInfoTool);
|
|
117
|
+
// Tool 6: transcribe_video - Transcribe audio from a video
|
|
118
|
+
server.registerTool('transcribe_video', {
|
|
119
|
+
title: 'Transcribe Video',
|
|
120
|
+
description: 'Extract audio from a video and transcribe it using a dedicated speech-to-text provider (Deepgram, AssemblyAI, Groq/Whisper, or Gemini). Supports speaker diarization and translation to English.',
|
|
121
|
+
inputSchema: z.object({
|
|
122
|
+
videoPath: z
|
|
123
|
+
.string()
|
|
124
|
+
.describe('Path to the video file (local path, file:// URI, or http(s) URL)'),
|
|
125
|
+
provider: z
|
|
126
|
+
.enum(['deepgram', 'assemblyai', 'groq', 'gemini'])
|
|
127
|
+
.optional()
|
|
128
|
+
.describe("Audio provider to use: 'deepgram' (Nova-2, default), 'assemblyai' (Universal), 'groq' (Whisper-large-v3, free), or 'gemini'. Defaults to AUDIO_MCP_DEFAULT_PROVIDER env var or first available key."),
|
|
129
|
+
language: z
|
|
130
|
+
.string()
|
|
131
|
+
.optional()
|
|
132
|
+
.describe("BCP-47 language code, e.g. 'en', 'es', 'fr'. Auto-detected if omitted."),
|
|
133
|
+
diarize: z
|
|
134
|
+
.boolean()
|
|
135
|
+
.optional()
|
|
136
|
+
.describe('Enable speaker diarization (who said what). Supported by Deepgram and AssemblyAI only. Silently ignored for other providers.'),
|
|
137
|
+
translate: z
|
|
138
|
+
.boolean()
|
|
139
|
+
.optional()
|
|
140
|
+
.describe('Translate the transcript to English. Supported by Groq and Gemini only. Silently ignored for other providers.'),
|
|
141
|
+
}),
|
|
142
|
+
}, transcribeVideoTool);
|
|
143
|
+
// Connect to VS Code via stdio transport
|
|
144
|
+
const transport = new StdioServerTransport();
|
|
145
|
+
await server.connect(transport);
|
|
146
|
+
// Server is now running, listening for tool calls from Copilot
|
|
147
|
+
}
|
|
148
|
+
main().catch((error) => {
|
|
149
|
+
console.error('Fatal error starting video-mcp server:', error);
|
|
150
|
+
process.exit(1);
|
|
151
|
+
});
|
|
152
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAA;AACnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAA;AAChF,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAA;AAEvB,uBAAuB;AACvB,OAAO,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAA;AAC1D,OAAO,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAA;AAC9D,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAA;AAC5D,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAA;AAChE,OAAO,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAA;AAC1D,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAA;AAChE,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAA;AAEnD;;;GAGG;AAEH,KAAK,UAAU,IAAI;IACjB,0CAA0C;IAC1C,MAAM,MAAM,GAAG,IAAI,SAAS,CAC1B;QACE,IAAI,EAAE,WAAW;QACjB,OAAO,EAAE,OAAO;KACjB,EACD;QACE,YAAY,EAAE;YACZ,OAAO,EAAE,EAAE,EAAE,wCAAwC;SACtD;KACF,CACF,CAAA;IAED,8DAA8D;IAC9D,eAAe,CAAC,MAAM,CAAC,CAAA;IAEvB,oCAAoC;IAEpC,4DAA4D;IAC5D,MAAM,CAAC,YAAY,CACjB,eAAe,EACf;QACE,KAAK,EAAE,eAAe;QACtB,WAAW,EACT,mGAAmG;QACrG,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC;YACpB,SAAS,EAAE,CAAC;iBACT,MAAM,EAAE;iBACR,QAAQ,CAAC,4CAA4C,CAAC;YACzD,QAAQ,EAAE,CAAC;iBACR,MAAM,EAAE;iBACR,QAAQ,CAAC,yCAAyC,CAAC;YACtD,QAAQ,EAAE,CAAC;iBACR,IAAI,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;iBAC/B,QAAQ,EAAE;iBACV,QAAQ,CACP,wGAAwG,CACzG;SACJ,CAAC;KACH,EACD,gBAAgB,CACjB,CAAA;IAED,8DAA8D;IAC9D,MAAM,CAAC,YAAY,CACjB,iBAAiB,EACjB;QACE,KAAK,EAAE,iBAAiB;QACxB,WAAW,EACT,8MAA8M;QAChN,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC;YACpB,SAAS,EAAE,CAAC;iBACT,MAAM,EAAE;iBACR,QAAQ,CAAC,4CAA4C,CAAC;YACzD,QAAQ,EAAE,CAAC;iBACR,IAAI,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;iBAC/B,QAAQ,EAAE;iBACV,QAAQ,CACP,wGAAwG,CACzG;SACJ,CAAC;KACH,EACD,kBAAkB,CACnB,CAAA;IAED,qDAAqD;IACrD,MAAM,CAAC,YAAY,CACjB,gBAAgB,EAChB;QACE,KAAK,EAAE,gBAAgB;QACvB,WAAW,EACT,iKAAiK;QACnK,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC;YACpB,SAAS,EAAE,CAAC;iBACT,MAAM,EAAE;iBACR,QAAQ,CACP,kEAAkE,CACnE;YACH,IAAI,EAAE,CAAC;iBACJ,IAAI,CAAC,CAAC,MAAM,EAAE,UAAU,EAAE,YAAY,CAAC,CAAC;iBACxC,QAAQ,CACP,qHAAqH,CACtH;YACH,KAAK,EAAE,CAAC;iBACL,MAAM,EAAE;iBACR,GAAG,EAAE;iBACL,GAAG,CAAC,CAAC,CAAC;iBACN,GAAG,CAAC,GAAG,CAAC;iBACR,QAAQ,EAAE;iBACV,QAAQ,CAAC,wDAAwD,CAAC;YACrE,WAAW,EAAE,CAAC;iBACX,MAAM,EAAE;iBACR,GAAG,CAAC,GAAG,CAAC;iBACR,QAAQ,EAAE;iBACV,QAAQ,CACP,mEAAmE,CACpE;YACH,UAAU,EAAE,CAAC;iBACV,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;iBACxB,QAAQ,EAAE;iBACV,QAAQ,CACP,iEAAiE,CAClE;SACJ,CAAC;KACH,EACD,iBAAiB,CAClB,CAAA;IAED,kEAAkE;IAClE,MAAM,CAAC,YAAY,CACjB,kBAAkB,EAClB;QACE,KAAK,EAAE,kBAAkB;QACzB,WAAW,EACT,0LAA0L;QAC5L,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC;YACpB,SAAS,EAAE,CAAC;iBACT,MAAM,EAAE;iBACR,QAAQ,CACP,kEAAkE,CACnE;YACH,KAAK,EAAE,CAAC;iBACL,MAAM,EAAE;iBACR,QAAQ,CACP,mEAAmE,CACpE;YACH,QAAQ,EAAE,CAAC;iBACR,IAAI,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;iBAC/B,QAAQ,EAAE;iBACV,QAAQ,CACP,wGAAwG,CACzG;SACJ,CAAC;KACH,EACD,mBAAmB,CACpB,CAAA;IAED,8CAA8C;IAC9C,MAAM,CAAC,YAAY,CACjB,gBAAgB,EAChB;QACE,KAAK,EAAE,gBAAgB;QACvB,WAAW,EACT,sLAAsL;QACxL,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC;YACpB,SAAS,EAAE,CAAC;iBACT,MAAM,EAAE;iBACR,QAAQ,CACP,kEAAkE,CACnE;SACJ,CAAC;KACH,EACD,gBAAgB,CACjB,CAAA;IAED,2DAA2D;IAC3D,MAAM,CAAC,YAAY,CACjB,kBAAkB,EAClB;QACE,KAAK,EAAE,kBAAkB;QACzB,WAAW,EACT,kMAAkM;QACpM,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC;YACpB,SAAS,EAAE,CAAC;iBACT,MAAM,EAAE;iBACR,QAAQ,CACP,kEAAkE,CACnE;YACH,QAAQ,EAAE,CAAC;iBACR,IAAI,CAAC,CAAC,UAAU,EAAE,YAAY,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;iBAClD,QAAQ,EAAE;iBACV,QAAQ,CACP,qMAAqM,CACtM;YACH,QAAQ,EAAE,CAAC;iBACR,MAAM,EAAE;iBACR,QAAQ,EAAE;iBACV,QAAQ,CACP,wEAAwE,CACzE;YACH,OAAO,EAAE,CAAC;iBACP,OAAO,EAAE;iBACT,QAAQ,EAAE;iBACV,QAAQ,CACP,8HAA8H,CAC/H;YACH,SAAS,EAAE,CAAC;iBACT,OAAO,EAAE;iBACT,QAAQ,EAAE;iBACV,QAAQ,CACP,+GAA+G,CAChH;SACJ,CAAC;KACH,EACD,mBAAmB,CACpB,CAAA;IAED,yCAAyC;IACzC,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAA;IAC5C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAA;IAE/B,+DAA+D;AACjE,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;IACrB,OAAO,CAAC,KAAK,CAAC,wCAAwC,EAAE,KAAK,CAAC,CAAA;IAC9D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;AACjB,CAAC,CAAC,CAAA"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AssemblyAI Audio Client
|
|
3
|
+
* Transcribes audio files using AssemblyAI's Universal (best) model
|
|
4
|
+
* Supports speaker diarization and other audio intelligence features
|
|
5
|
+
*/
|
|
6
|
+
export interface AssemblyAiTranscribeOptions {
|
|
7
|
+
language?: string;
|
|
8
|
+
diarize?: boolean;
|
|
9
|
+
}
|
|
10
|
+
export interface TranscriptSegment {
|
|
11
|
+
speaker?: string;
|
|
12
|
+
text: string;
|
|
13
|
+
}
|
|
14
|
+
export interface TranscriptResult {
|
|
15
|
+
text: string;
|
|
16
|
+
segments?: TranscriptSegment[];
|
|
17
|
+
}
|
|
18
|
+
export declare class AssemblyAiClient {
|
|
19
|
+
private client;
|
|
20
|
+
constructor(apiKey: string);
|
|
21
|
+
/**
|
|
22
|
+
* Transcribe an audio file using AssemblyAI
|
|
23
|
+
* @param audioPath - Local path to the audio file (.m4a, .mp3, etc.)
|
|
24
|
+
* @param options - Transcription options
|
|
25
|
+
*/
|
|
26
|
+
transcribe(audioPath: string, options?: AssemblyAiTranscribeOptions): Promise<TranscriptResult>;
|
|
27
|
+
}
|
|
28
|
+
//# sourceMappingURL=assemblyAiClient.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"assemblyAiClient.d.ts","sourceRoot":"","sources":["../../../src/services/audio/assemblyAiClient.ts"],"names":[],"mappings":"AAEA;;;;GAIG;AAEH,MAAM,WAAW,2BAA2B;IAC1C,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,OAAO,CAAC,EAAE,OAAO,CAAA;CAClB;AAED,MAAM,WAAW,iBAAiB;IAChC,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,IAAI,EAAE,MAAM,CAAA;CACb;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAA;IACZ,QAAQ,CAAC,EAAE,iBAAiB,EAAE,CAAA;CAC/B;AAED,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,MAAM,CAAY;gBAEd,MAAM,EAAE,MAAM;IAI1B;;;;OAIG;IACG,UAAU,CACd,SAAS,EAAE,MAAM,EACjB,OAAO,GAAE,2BAAgC,GACxC,OAAO,CAAC,gBAAgB,CAAC;CAkC7B"}
|