video-context-mcp-server 0.11.0-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/README.md +432 -0
  2. package/dist/index.d.ts +3 -0
  3. package/dist/index.d.ts.map +1 -0
  4. package/dist/index.js +152 -0
  5. package/dist/index.js.map +1 -0
  6. package/dist/services/audio/assemblyAiClient.d.ts +28 -0
  7. package/dist/services/audio/assemblyAiClient.d.ts.map +1 -0
  8. package/dist/services/audio/assemblyAiClient.js +40 -0
  9. package/dist/services/audio/assemblyAiClient.js.map +1 -0
  10. package/dist/services/audio/deepgramClient.d.ts +23 -0
  11. package/dist/services/audio/deepgramClient.d.ts.map +1 -0
  12. package/dist/services/audio/deepgramClient.js +50 -0
  13. package/dist/services/audio/deepgramClient.js.map +1 -0
  14. package/dist/services/audio/groqAudioClient.d.ts +18 -0
  15. package/dist/services/audio/groqAudioClient.d.ts.map +1 -0
  16. package/dist/services/audio/groqAudioClient.js +43 -0
  17. package/dist/services/audio/groqAudioClient.js.map +1 -0
  18. package/dist/services/audioRouter.d.ts +38 -0
  19. package/dist/services/audioRouter.d.ts.map +1 -0
  20. package/dist/services/audioRouter.js +81 -0
  21. package/dist/services/audioRouter.js.map +1 -0
  22. package/dist/services/ffmpeg.d.ts +54 -0
  23. package/dist/services/ffmpeg.d.ts.map +1 -0
  24. package/dist/services/ffmpeg.js +188 -0
  25. package/dist/services/ffmpeg.js.map +1 -0
  26. package/dist/services/geminiClient.d.ts +55 -0
  27. package/dist/services/geminiClient.d.ts.map +1 -0
  28. package/dist/services/geminiClient.js +143 -0
  29. package/dist/services/geminiClient.js.map +1 -0
  30. package/dist/services/glmClient.d.ts +50 -0
  31. package/dist/services/glmClient.d.ts.map +1 -0
  32. package/dist/services/glmClient.js +196 -0
  33. package/dist/services/glmClient.js.map +1 -0
  34. package/dist/services/kimiClient.d.ts +45 -0
  35. package/dist/services/kimiClient.d.ts.map +1 -0
  36. package/dist/services/kimiClient.js +152 -0
  37. package/dist/services/kimiClient.js.map +1 -0
  38. package/dist/services/providerRouter.d.ts +40 -0
  39. package/dist/services/providerRouter.d.ts.map +1 -0
  40. package/dist/services/providerRouter.js +64 -0
  41. package/dist/services/providerRouter.js.map +1 -0
  42. package/dist/tools/analyzeVideo.d.ts +18 -0
  43. package/dist/tools/analyzeVideo.d.ts.map +1 -0
  44. package/dist/tools/analyzeVideo.js +153 -0
  45. package/dist/tools/analyzeVideo.js.map +1 -0
  46. package/dist/tools/extractFrames.d.ts +22 -0
  47. package/dist/tools/extractFrames.d.ts.map +1 -0
  48. package/dist/tools/extractFrames.js +82 -0
  49. package/dist/tools/extractFrames.js.map +1 -0
  50. package/dist/tools/getVideoInfo.d.ts +18 -0
  51. package/dist/tools/getVideoInfo.d.ts.map +1 -0
  52. package/dist/tools/getVideoInfo.js +52 -0
  53. package/dist/tools/getVideoInfo.js.map +1 -0
  54. package/dist/tools/searchTimestamp.d.ts +25 -0
  55. package/dist/tools/searchTimestamp.d.ts.map +1 -0
  56. package/dist/tools/searchTimestamp.js +152 -0
  57. package/dist/tools/searchTimestamp.js.map +1 -0
  58. package/dist/tools/summarizeVideo.d.ts +18 -0
  59. package/dist/tools/summarizeVideo.d.ts.map +1 -0
  60. package/dist/tools/summarizeVideo.js +208 -0
  61. package/dist/tools/summarizeVideo.js.map +1 -0
  62. package/dist/tools/transcribeVideo.d.ts +18 -0
  63. package/dist/tools/transcribeVideo.d.ts.map +1 -0
  64. package/dist/tools/transcribeVideo.js +106 -0
  65. package/dist/tools/transcribeVideo.js.map +1 -0
  66. package/dist/utils/audioUtils.d.ts +62 -0
  67. package/dist/utils/audioUtils.d.ts.map +1 -0
  68. package/dist/utils/audioUtils.js +153 -0
  69. package/dist/utils/audioUtils.js.map +1 -0
  70. package/dist/utils/base64.d.ts +35 -0
  71. package/dist/utils/base64.d.ts.map +1 -0
  72. package/dist/utils/base64.js +50 -0
  73. package/dist/utils/base64.js.map +1 -0
  74. package/dist/utils/logger.d.ts +23 -0
  75. package/dist/utils/logger.d.ts.map +1 -0
  76. package/dist/utils/logger.js +34 -0
  77. package/dist/utils/logger.js.map +1 -0
  78. package/dist/utils/tempFiles.d.ts +20 -0
  79. package/dist/utils/tempFiles.d.ts.map +1 -0
  80. package/dist/utils/tempFiles.js +31 -0
  81. package/dist/utils/tempFiles.js.map +1 -0
  82. package/dist/utils/videoUtils.d.ts +19 -0
  83. package/dist/utils/videoUtils.d.ts.map +1 -0
  84. package/dist/utils/videoUtils.js +38 -0
  85. package/dist/utils/videoUtils.js.map +1 -0
  86. package/package.json +64 -0
package/README.md ADDED
@@ -0,0 +1,432 @@
1
+ # Video Context MCP Server
2
+
3
+ Video Context MCP Server is a Model Context Protocol (MCP) server that gives MCP-compatible coding assistants (such as GitHub Copilot in VS Code, Cursor, and Claude Code) the ability to understand and analyze video content.
4
+
5
+ ## Features
6
+
7
+ - 🎬 **Video Q&A** — Ask questions about video content and get AI-powered answers
8
+ - 📝 **Video Summarization** — Generate structured summaries with key scenes and timelines
9
+ - 🖼️ **Frame Extraction** — Extract frames at specific timestamps or intervals
10
+ - 🔍 **Timestamp Search** — Find the exact moment when something happens in a video
11
+ - 📊 **Video Metadata** — Get duration, resolution, fps, codec, and other technical details
12
+ - 🎙️ **Audio Transcription** — Transcribe speech from any video using Deepgram, AssemblyAI, Groq/Whisper, or Gemini
13
+ - 🔊 **Speaker Diarization** — Identify who said what (Deepgram and AssemblyAI)
14
+ - 🔄 **Multi-Backend Support** — Choose between Gemini (native multimodal), GLM-4.6V (cheap/free), or Kimi K2.5 (broader format support)
15
+ - 🎯 **Smart Video Handling** — Extracts keyframes from long videos to reduce token usage (when not using Gemini)
16
+
17
+ ## Installation
18
+
19
+ ### Quick Start (Recommended for ordinary users)
20
+
21
+ #### 1. Prerequisites
22
+
23
+ - Node.js 18+
24
+ - VS Code with GitHub Copilot Chat enabled
25
+
26
+ #### 2. Get API keys
27
+
28
+ You'll need API keys for one or more **video** backends:
29
+
30
+ - **Gemini 3 Flash Preview (Google)**: [Get API Key](https://aistudio.google.com/app/apikey)
31
+ - **Kimi K2.5 (Moonshot AI)**: [Get API Key](https://platform.moonshot.ai)
32
+ - **GLM-4.6V (Z.AI)**: [Get API Key](https://z.ai/manage-apikey/apikey-list)
33
+
34
+ For **audio transcription** (`transcribe_video`), you'll also need at least one audio provider key:
35
+
36
+ - **Deepgram** (default): [Get API Key](https://console.deepgram.com/)
37
+ - **AssemblyAI**: [Get API Key](https://www.assemblyai.com/dashboard)
38
+ - **Groq** (free Whisper): [Get API Key](https://console.groq.com/)
39
+ - **Gemini** (reuse `GEMINI_API_KEY` above — no extra key needed)
40
+
41
+ #### 3. Install the MCP server
42
+
43
+ ```bash
44
+ npm install -g video-context-mcp-server
45
+ ```
46
+
47
+ This installs the executable command: `video-context-mcp`.
48
+
49
+ > **Tip:** Periodically re-run the above command to get the latest version:
50
+ >
51
+ > ```bash
52
+ > npm install -g video-context-mcp-server@latest
53
+ > ```
54
+
55
+ #### 4. Configure VS Code MCP
56
+
57
+ Create (or update) `.vscode/mcp.json` in your project/workspace:
58
+
59
+ ```json
60
+ {
61
+ "servers": {
62
+ "videoMcp": {
63
+ "type": "stdio",
64
+ "command": "video-context-mcp",
65
+ "env": {
66
+ "GEMINI_API_KEY": "your-gemini-key",
67
+ "MOONSHOT_API_KEY": "your-moonshot-key",
68
+ "Z_AI_API_KEY": "your-zai-key",
69
+ "DEEPGRAM_API_KEY": "your-deepgram-key",
70
+ "ASSEMBLYAI_API_KEY": "your-assemblyai-key",
71
+ "GROQ_API_KEY": "your-groq-key"
72
+ }
73
+ }
74
+ }
75
+ }
76
+ ```
77
+
78
+ Open Copilot Chat in VS Code. The MCP server starts automatically when tools are needed.
79
+
80
+ ### Configure Cursor MCP
81
+
82
+ Add this server to your Cursor MCP configuration (global or project-level):
83
+
84
+ ```json
85
+ {
86
+ "mcpServers": {
87
+ "videoMcp": {
88
+ "command": "video-context-mcp",
89
+ "env": {
90
+ "GEMINI_API_KEY": "your-gemini-key",
91
+ "MOONSHOT_API_KEY": "your-moonshot-key",
92
+ "Z_AI_API_KEY": "your-zai-key",
93
+ "DEEPGRAM_API_KEY": "your-deepgram-key",
94
+ "ASSEMBLYAI_API_KEY": "your-assemblyai-key",
95
+ "GROQ_API_KEY": "your-groq-key"
96
+ }
97
+ }
98
+ }
99
+ }
100
+ ```
101
+
102
+ Notes:
103
+
104
+ - Run `npm install -g video-context-mcp-server` first if you haven't already.
105
+ - If you prefer not to install globally, use `npx -y video-context-mcp-server@latest` as the command (slower startup due to registry check).
106
+ - Set one or both API keys depending on which provider you use.
107
+
108
+ ### Configure Claude Code MCP
109
+
110
+ Use the Claude CLI to register the MCP server:
111
+
112
+ ```bash
113
+ claude mcp add videoMcp video-context-mcp \
114
+ --env GEMINI_API_KEY=your-gemini-key \
115
+ --env MOONSHOT_API_KEY=your-moonshot-key \
116
+ --env Z_AI_API_KEY=your-zai-key \
117
+ --env DEEPGRAM_API_KEY=your-deepgram-key \
118
+ --env ASSEMBLYAI_API_KEY=your-assemblyai-key \
119
+ --env GROQ_API_KEY=your-groq-key
120
+ ```
121
+
122
+ Then verify with:
123
+
124
+ ```bash
125
+ claude mcp list
126
+ ```
127
+
128
+ If you prefer not to install globally, register via `npx` instead (slower startup due to registry check):
129
+
130
+ ```bash
131
+ claude mcp add videoMcp npx -y video-context-mcp-server@latest \
132
+ --env MOONSHOT_API_KEY=your-moonshot-key \
133
+ --env Z_AI_API_KEY=your-zai-key \
134
+ --env DEEPGRAM_API_KEY=your-deepgram-key \
135
+ --env ASSEMBLYAI_API_KEY=your-assemblyai-key \
136
+ --env GROQ_API_KEY=your-groq-key
137
+ ```
138
+
139
+ ### Troubleshooting Setup
140
+
141
+ - **`video-context-mcp: command not found`**
142
+ - Make sure Node.js is installed and available in your shell (`node -v`, `npm -v`).
143
+ - If installed globally, re-run: `npm install -g video-context-mcp-server`.
144
+ - If global binaries are not on PATH, use `npx -y video-context-mcp-server@latest` instead of `video-context-mcp`.
145
+
146
+ - **MCP server not appearing in client**
147
+ - Restart the client app after changing MCP configuration.
148
+ - Validate JSON syntax in your MCP config file.
149
+ - For Claude Code, verify registration with `claude mcp list`.
150
+
151
+ - **Missing API key errors**
152
+ - Set `GEMINI_API_KEY` for Gemini usage, `Z_AI_API_KEY` for GLM usage, and `MOONSHOT_API_KEY` for Kimi usage.
153
+ - For audio transcription, set `DEEPGRAM_API_KEY` (default), `ASSEMBLYAI_API_KEY`, `GROQ_API_KEY` (free tier), or reuse `GEMINI_API_KEY`. At least one audio key is needed to use `transcribe_video`.
154
+ - You can set only the keys for the providers you intend to use.
155
+ - For local files, if `provider=glm` is requested but `Z_AI_API_KEY` is missing, the server automatically falls back to Kimi (and then Gemini) when those keys are available.
156
+ - For remote `http(s)` video URLs, all three AI providers (`glm`, `kimi`, `gemini`) are supported. All tools automatically download remote videos to a temporary file before processing.
157
+
158
+ ### Alternative: Run via npx without global install
159
+
160
+ If you prefer not to install globally, you can use `npx` instead. Note this adds a startup delay due to the npm registry check on each run:
161
+
162
+ ```json
163
+ {
164
+ "servers": {
165
+ "videoMcp": {
166
+ "type": "stdio",
167
+ "command": "npx",
168
+ "args": ["-y", "video-context-mcp-server@latest"],
169
+ "env": {
170
+ "GEMINI_API_KEY": "your-gemini-key",
171
+ "MOONSHOT_API_KEY": "your-moonshot-key",
172
+ "Z_AI_API_KEY": "your-zai-key",
173
+ "DEEPGRAM_API_KEY": "your-deepgram-key",
174
+ "ASSEMBLYAI_API_KEY": "your-assemblyai-key",
175
+ "GROQ_API_KEY": "your-groq-key"
176
+ }
177
+ }
178
+ }
179
+ }
180
+ ```
181
+
182
+ The `@latest` tag ensures you always get the newest published version, at the cost of a network round-trip on every startup — this also means the `npx` approach self-updates automatically and requires no manual update step.
183
+
184
+ ### For contributors: install from source
185
+
186
+ Clone this repository, then:
187
+
188
+ ```bash
189
+ npm install
190
+ npm run build
191
+ ```
192
+
193
+ Then use this `.vscode/mcp.json` server command:
194
+
195
+ ```json
196
+ {
197
+ "servers": {
198
+ "videoMcp": {
199
+ "type": "stdio",
200
+ "command": "node",
201
+ "args": ["${workspaceFolder}/dist/index.js"],
202
+ "env": {
203
+ "GEMINI_API_KEY": "your-gemini-key",
204
+ "MOONSHOT_API_KEY": "your-moonshot-key",
205
+ "Z_AI_API_KEY": "your-zai-key",
206
+ "DEEPGRAM_API_KEY": "your-deepgram-key",
207
+ "ASSEMBLYAI_API_KEY": "your-assemblyai-key",
208
+ "GROQ_API_KEY": "your-groq-key"
209
+ }
210
+ }
211
+ }
212
+ }
213
+ ```
214
+
215
+ ### Debugging Behavior in VS Code
216
+
217
+ When your `.vscode/mcp.json` includes a `dev` block such as:
218
+
219
+ ```jsonc
220
+ {
221
+ "servers": {
222
+ "videoMcp": {
223
+ "type": "stdio",
224
+ "command": "node",
225
+ "args": ["${workspaceFolder}/dist/index.js"],
226
+ "env": {
227
+ "GEMINI_API_KEY": "your-gemini-key",
228
+ "MOONSHOT_API_KEY": "your-moonshot-key",
229
+ "Z_AI_API_KEY": "your-zai-key",
230
+ "DEEPGRAM_API_KEY": "your-deepgram-key",
231
+ "ASSEMBLYAI_API_KEY": "your-assemblyai-key",
232
+ "GROQ_API_KEY": "your-groq-key",
233
+ },
234
+ "dev": {
235
+ "watch": "src/**/*.ts",
236
+ "debug": { "type": "node" },
237
+ },
238
+ },
239
+ },
240
+ }
241
+ ```
242
+
243
+ you may see frequent logs in the **Output** panel under `MCP: videoMcp`.
244
+
245
+ This is expected in development mode:
246
+
247
+ - `watch` restarts/reloads the MCP server when TypeScript files change
248
+ - `debug` enables Node debug integration
249
+ - MCP protocol payloads (tool schemas, discovery events, lifecycle messages) are printed in that output channel
250
+
251
+ If you want less noise, remove either:
252
+
253
+ - `dev.debug` (keeps auto-watch, disables debug integration), or
254
+ - the full `dev` block (disables watch + debug behavior)
255
+
256
+ ## Available Tools
257
+
258
+ | Tool | Description | Parameters |
259
+ | ------------------ | ------------------------------------ | --------------------------------------------------------------- |
260
+ | `analyze_video` | Ask questions about video content | `videoPath`, `question`, `provider?` |
261
+ | `summarize_video` | Generate a structured video summary | `videoPath`, `provider?` |
262
+ | `extract_frames` | Extract frames from a video | `videoPath`, `mode`, `count/intervalSec/timestamps` |
263
+ | `search_timestamp` | Find when something specific happens | `videoPath`, `query`, `provider?` |
264
+ | `get_video_info` | Get video metadata | `videoPath` |
265
+ | `transcribe_video` | Transcribe audio/speech from a video | `videoPath`, `provider?`, `language?`, `diarize?`, `translate?` |
266
+
267
+ ### Path and Provider Constraints
268
+
269
+ - **All 6 tools** support both local files and remote `http(s)` URLs. Remote videos are automatically downloaded to a temporary file before processing.
270
+ - For remote `http(s)` URLs with AI-powered video tools (`analyze_video`, `summarize_video`, `search_timestamp`): all three providers (`provider=gemini`, `provider=glm`, `provider=kimi`) are supported. Remote videos are downloaded to a temp file before upload.
271
+ - For `transcribe_video`, all four audio providers (Deepgram, AssemblyAI, Groq, Gemini) support both local files and remote URLs.
272
+ - For local inputs, all tools accept normal filesystem paths or `file://` URIs (automatically normalized).
273
+
274
+ ## Usage Examples
275
+
276
+ ### Analyze Video
277
+
278
+ Ask Copilot Chat:
279
+
280
+ > "Analyze the video at `./demo.mp4` and tell me what happens in it"
281
+
282
+ ### Summarize Video
283
+
284
+ > "Summarize the video at `./long-video.mp4`"
285
+
286
+ ### Extract Frames
287
+
288
+ > "Extract 5 evenly-spaced frames from `./video.mp4`"
289
+
290
+ > "Extract a frame at timestamp 30 seconds from `./video.mp4`"
291
+
292
+ ### Search Timestamp
293
+
294
+ > "In `./video.mp4`, at what timestamp does the person wave?"
295
+
296
+ ### Get Video Info
297
+
298
+ > "Get the video info for `./video.mp4`"
299
+
300
+ ### Transcribe Video
301
+
302
+ > "Transcribe the audio from `./meeting.mp4`"
303
+
304
+ > "Transcribe `./interview.mp4` with speaker diarization using AssemblyAI"
305
+
306
+ > "Transcribe this Spanish video and translate it to English: `./video.mp4`"
307
+
308
+ ## Additional Guides
309
+
310
+ - [Screen Recording for Small File Sizes (Windows)](docs/screen-recording-small-files.md)
311
+
312
+ ## Backend Comparison
313
+
314
+ | Feature | Gemini 3 Flash Preview | GLM-4.6V | Kimi K2.5 |
315
+ | -------------- | ---------------------------------------------- | ----------------------------- | ---------------------------------------------- |
316
+ | Video formats | mp4, mpeg, mov, avi, flv, mpg, webm, wmv, 3gpp | mp4, avi, mov, wmv, webm, m4v | mp4, mpeg, mov, avi, flv, mpg, webm, wmv, 3gpp |
317
+ | Price | Free tier available | $0.30 input / $0.90 output | $0.60 input / $3.00 output |
318
+ | Free tier | Yes | Yes (GLM-4.6V-Flash) | No |
319
+ | Context window | 1M tokens | 128K | 256K |
320
+ | Max file size | 2 GB | ~20 MB (base64) | 100 MB |
321
+ | Best for | Fallback only (inaccurate despite features) | **Default** (free tier) | Alternative to GLM |
322
+
323
+ **GLM-4.6V is the default backend** — it offers a free tier (GLM-4.6V-Flash), making it a good zero-cost starting point. Kimi K2.5 is a paid alternative with broader format support; accuracy between the two has not been systematically compared. Gemini 3 Flash Preview is used as the **last resort fallback** despite its superior technical features (1M token context, 2GB file size, native multimodal audio+video support) because it has **proven inaccurate for video content analysis** in practice. Set `VIDEO_MCP_DEFAULT_PROVIDER=kimi` or `VIDEO_MCP_DEFAULT_PROVIDER=gemini` to switch the default. This env default is used when a tool call omits the `provider` parameter.
324
+
325
+ When a provider's API key is missing, the tool automatically falls back to the next available provider in the ranking chain (**GLM → Kimi → Gemini**) and includes a notice in the response, e.g. `Provider used: kimi (fell back from glm)`.
326
+
327
+ ## Environment Variables
328
+
329
+ | Variable | Description | Required |
330
+ | ------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------- |
331
+ | `GEMINI_API_KEY` | Google API key for Gemini 3 Flash Preview | Optional (required if using Gemini) |
332
+ | `MOONSHOT_API_KEY` | Moonshot AI API key for Kimi K2.5 | Optional (required if using Kimi) |
333
+ | `Z_AI_API_KEY` | Z.AI API key for GLM-4.6V | Optional (required if using GLM) |
334
+ | `VIDEO_MCP_DEFAULT_PROVIDER` | Default video backend (`gemini`, `glm`, `kimi`) | Optional (default: `glm`) |
335
+ | `VIDEO_MCP_MAX_FRAMES` | Max frames for summarization (GLM/Kimi only) | Optional (default: 20; clamped to 5-100) |
336
+ | `DEEPGRAM_API_KEY` | Deepgram API key for `transcribe_video` | Optional (required if using Deepgram) |
337
+ | `ASSEMBLYAI_API_KEY` | AssemblyAI API key for `transcribe_video` | Optional (required if using AssemblyAI) |
338
+ | `GROQ_API_KEY` | Groq API key for Whisper transcription via `transcribe_video` | Optional (required if using Groq) |
339
+ | `AUDIO_MCP_DEFAULT_PROVIDER` | Default audio provider; defaults to `deepgram`. Falls back in order: **deepgram → assemblyai → groq → gemini** when the selected provider's key is unavailable. A fallback notice is included in the response. | Optional (auto-selects from available keys) |
340
+ | `AUDIO_ENHANCE_VIDEO_ANALYSIS` | Controls audio transcript injection into GLM/Kimi `analyze_video`/`summarize_video` prompts. `auto` (default) — transcribes only when the video has a detected audio track; `true` — always attempt transcription; `false` — disabled. A confidence label (`high`/`medium`/`low`) is included in the injected header so the model can weight the transcript appropriately. Gemini is always skipped (handles audio natively). | Optional (default: `auto`) |
341
+
342
+ ### Example Configuration
343
+
344
+ ```json
345
+ {
346
+ "servers": {
347
+ "videoMcp": {
348
+ "type": "stdio",
349
+ "command": "video-context-mcp",
350
+ "env": {
351
+ "GEMINI_API_KEY": "your-gemini-key",
352
+ "Z_AI_API_KEY": "your-zai-key",
353
+ "MOONSHOT_API_KEY": "your-moonshot-key",
354
+ "DEEPGRAM_API_KEY": "your-deepgram-key",
355
+ "ASSEMBLYAI_API_KEY": "your-assemblyai-key",
356
+ "GROQ_API_KEY": "your-groq-key",
357
+ "VIDEO_MCP_DEFAULT_PROVIDER": "glm",
358
+ "AUDIO_ENHANCE_VIDEO_ANALYSIS": "auto"
359
+ }
360
+ }
361
+ }
362
+ }
363
+ ```
364
+
365
+ > **Note:** `VIDEO_MCP_MAX_FRAMES` only applies when using **GLM or Kimi** as the provider. Gemini uploads the full video natively and ignores this setting. Add it to `env` only if you are running with `VIDEO_MCP_DEFAULT_PROVIDER=gemini`.
366
+
367
+ ## Development
368
+
369
+ ```bash
370
+ # Install dependencies
371
+ npm install
372
+
373
+ # Run in development (auto-restart on changes)
374
+ npm run dev
375
+
376
+ # Build for production
377
+ npm run build
378
+
379
+ # Run type checking
380
+ npm run type-check
381
+
382
+ # Run linter
383
+ npm run lint
384
+
385
+ # Run automated tests
386
+ npm run test
387
+
388
+ # Run tests in watch mode
389
+ npm run test:watch
390
+
391
+ # Run tests with coverage
392
+ npm run test:coverage
393
+
394
+ # Format all files
395
+ npm run format
396
+
397
+ # Check formatting only
398
+ npm run format:check
399
+
400
+ # Lint + Type-check + Format + Build
401
+ npm run ltfb
402
+ ```
403
+
404
+ ## Architecture
405
+
406
+ ```
407
+ video-mcp/
408
+ ├── src/
409
+ │ ├── index.ts # MCP server entry point
410
+ │ ├── tools/ # MCP tool implementations
411
+ │ │ └── transcribeVideo.ts # Audio transcription tool
412
+ │ ├── services/ # Backend clients (Kimi, GLM, Gemini, ffmpeg)
413
+ │ │ └── audio/ # Audio provider clients (Deepgram, AssemblyAI, Groq)
414
+ │ └── utils/ # Helpers (temp files, base64, audio injection)
415
+ ├── .vscode/
416
+ │ └── mcp.json # VS Code MCP configuration
417
+ ├── docs/
418
+ │ └── technical/ # Technical documentation
419
+ └── .github/
420
+ └── copilot-instructions.md # Copilot AI assistant guidelines
421
+ ```
422
+
423
+ ## License
424
+
425
+ MIT
426
+
427
+ ## Credits
428
+
429
+ - [MCP SDK](https://github.com/modelcontextprotocol/typescript-sdk) by Anthropic
430
+ - [Kimi K2.5](https://github.com/MoonshotAI/Kimi-K2.5) by Moonshot AI
431
+ - [GLM-4.6V](https://docs.z.ai/guides/vlm/glm-4.6v) by Z.AI
432
+ - [ffmpeg](https://ffmpeg.org/) for video processing
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ export {};
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":""}
package/dist/index.js ADDED
@@ -0,0 +1,152 @@
1
+ #!/usr/bin/env node
2
+ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
3
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
4
+ import { z } from 'zod';
5
+ // Import tool handlers
6
+ import { analyzeVideoTool } from './tools/analyzeVideo.js';
7
+ import { summarizeVideoTool } from './tools/summarizeVideo.js';
8
+ import { extractFramesTool } from './tools/extractFrames.js';
9
+ import { searchTimestampTool } from './tools/searchTimestamp.js';
10
+ import { getVideoInfoTool } from './tools/getVideoInfo.js';
11
+ import { transcribeVideoTool } from './tools/transcribeVideo.js';
12
+ import { setLoggerServer } from './utils/logger.js';
13
+ /**
14
+ * Main entry point for the Video Context MCP Server
15
+ * Creates an MCP server, registers all video analysis tools, and connects via stdio
16
+ */
17
+ async function main() {
18
+ // Create MCP server with name and version
19
+ const server = new McpServer({
20
+ name: 'video-mcp',
21
+ version: '1.0.0',
22
+ }, {
23
+ capabilities: {
24
+ logging: {}, // Enable logging for progress reporting
25
+ },
26
+ });
27
+ // Wire up the logger so tools can emit progress notifications
28
+ setLoggerServer(server);
29
+ // Register all video analysis tools
30
+ // Tool 1: analyze_video - Ask questions about video content
31
+ server.registerTool('analyze_video', {
32
+ title: 'Analyze Video',
33
+ description: 'Ask questions about video content and get AI-powered answers. Supports both local files and URLs.',
34
+ inputSchema: z.object({
35
+ videoPath: z
36
+ .string()
37
+ .describe('Path to the video file (local path or URL)'),
38
+ question: z
39
+ .string()
40
+ .describe('Question to ask about the video content'),
41
+ provider: z
42
+ .enum(['glm', 'kimi', 'gemini'])
43
+ .optional()
44
+ .describe("AI backend to use: 'glm' (GLM-4.6V, default), 'kimi' (Kimi K2.5), or 'gemini' (Gemini 3 Flash Preview)"),
45
+ }),
46
+ }, analyzeVideoTool);
47
+ // Tool 2: summarize_video - Generate structured video summary
48
+ server.registerTool('summarize_video', {
49
+ title: 'Summarize Video',
50
+ description: 'Generate a structured summary of the video including overview, key scenes, and timeline. For long videos (>5 min), extracts keyframes to reduce token usage (unless using Gemini, which processes natively).',
51
+ inputSchema: z.object({
52
+ videoPath: z
53
+ .string()
54
+ .describe('Path to the video file (local path or URL)'),
55
+ provider: z
56
+ .enum(['glm', 'kimi', 'gemini'])
57
+ .optional()
58
+ .describe("AI backend to use: 'glm' (GLM-4.6V, default), 'kimi' (Kimi K2.5), or 'gemini' (Gemini 3 Flash Preview)"),
59
+ }),
60
+ }, summarizeVideoTool);
61
+ // Tool 3: extract_frames - Extract frames from video
62
+ server.registerTool('extract_frames', {
63
+ title: 'Extract Frames',
64
+ description: 'Extract frames from a video at specific timestamps or intervals. Supports local files (including file:// URIs) and remote http(s) URLs. No AI backend required.',
65
+ inputSchema: z.object({
66
+ videoPath: z
67
+ .string()
68
+ .describe('Path to the video file (local path, file:// URI, or http(s) URL)'),
69
+ mode: z
70
+ .enum(['even', 'interval', 'timestamps'])
71
+ .describe("Extraction mode: 'even' (N evenly-spaced frames), 'interval' (every N seconds), or 'timestamps' (at specific times)"),
72
+ count: z
73
+ .number()
74
+ .int()
75
+ .min(1)
76
+ .max(100)
77
+ .optional()
78
+ .describe("Number of frames to extract (required for 'even' mode)"),
79
+ intervalSec: z
80
+ .number()
81
+ .min(0.1)
82
+ .optional()
83
+ .describe("Interval in seconds between frames (required for 'interval' mode)"),
84
+ timestamps: z
85
+ .array(z.number().min(0))
86
+ .optional()
87
+ .describe("Array of timestamps in seconds (required for 'timestamps' mode)"),
88
+ }),
89
+ }, extractFramesTool);
90
+ // Tool 4: search_timestamp - Find when something happens in video
91
+ server.registerTool('search_timestamp', {
92
+ title: 'Search Timestamp',
93
+ description: 'Find the timestamp when something specific happens in a video. Extracts frames and uses AI to locate the content. Supports local files (including file:// URIs) and remote http(s) URLs.',
94
+ inputSchema: z.object({
95
+ videoPath: z
96
+ .string()
97
+ .describe('Path to the video file (local path, file:// URI, or http(s) URL)'),
98
+ query: z
99
+ .string()
100
+ .describe("What to search for, e.g., 'person waves', 'dog runs', 'car crash'"),
101
+ provider: z
102
+ .enum(['glm', 'kimi', 'gemini'])
103
+ .optional()
104
+ .describe("AI backend to use: 'glm' (GLM-4.6V, default), 'kimi' (Kimi K2.5), or 'gemini' (Gemini 3 Flash Preview)"),
105
+ }),
106
+ }, searchTimestampTool);
107
+ // Tool 5: get_video_info - Get video metadata
108
+ server.registerTool('get_video_info', {
109
+ title: 'Get Video Info',
110
+ description: 'Get video metadata including duration, resolution, fps, codec, file size, and format. Supports local files (including file:// URIs) and remote http(s) URLs. No AI backend required.',
111
+ inputSchema: z.object({
112
+ videoPath: z
113
+ .string()
114
+ .describe('Path to the video file (local path, file:// URI, or http(s) URL)'),
115
+ }),
116
+ }, getVideoInfoTool);
117
+ // Tool 6: transcribe_video - Transcribe audio from a video
118
+ server.registerTool('transcribe_video', {
119
+ title: 'Transcribe Video',
120
+ description: 'Extract audio from a video and transcribe it using a dedicated speech-to-text provider (Deepgram, AssemblyAI, Groq/Whisper, or Gemini). Supports speaker diarization and translation to English.',
121
+ inputSchema: z.object({
122
+ videoPath: z
123
+ .string()
124
+ .describe('Path to the video file (local path, file:// URI, or http(s) URL)'),
125
+ provider: z
126
+ .enum(['deepgram', 'assemblyai', 'groq', 'gemini'])
127
+ .optional()
128
+ .describe("Audio provider to use: 'deepgram' (Nova-2, default), 'assemblyai' (Universal), 'groq' (Whisper-large-v3, free), or 'gemini'. Defaults to AUDIO_MCP_DEFAULT_PROVIDER env var or first available key."),
129
+ language: z
130
+ .string()
131
+ .optional()
132
+ .describe("BCP-47 language code, e.g. 'en', 'es', 'fr'. Auto-detected if omitted."),
133
+ diarize: z
134
+ .boolean()
135
+ .optional()
136
+ .describe('Enable speaker diarization (who said what). Supported by Deepgram and AssemblyAI only. Silently ignored for other providers.'),
137
+ translate: z
138
+ .boolean()
139
+ .optional()
140
+ .describe('Translate the transcript to English. Supported by Groq and Gemini only. Silently ignored for other providers.'),
141
+ }),
142
+ }, transcribeVideoTool);
143
+ // Connect to VS Code via stdio transport
144
+ const transport = new StdioServerTransport();
145
+ await server.connect(transport);
146
+ // Server is now running, listening for tool calls from Copilot
147
+ }
148
+ main().catch((error) => {
149
+ console.error('Fatal error starting video-mcp server:', error);
150
+ process.exit(1);
151
+ });
152
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAA;AACnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAA;AAChF,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAA;AAEvB,uBAAuB;AACvB,OAAO,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAA;AAC1D,OAAO,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAA;AAC9D,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAA;AAC5D,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAA;AAChE,OAAO,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAA;AAC1D,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAA;AAChE,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAA;AAEnD;;;GAGG;AAEH,KAAK,UAAU,IAAI;IACjB,0CAA0C;IAC1C,MAAM,MAAM,GAAG,IAAI,SAAS,CAC1B;QACE,IAAI,EAAE,WAAW;QACjB,OAAO,EAAE,OAAO;KACjB,EACD;QACE,YAAY,EAAE;YACZ,OAAO,EAAE,EAAE,EAAE,wCAAwC;SACtD;KACF,CACF,CAAA;IAED,8DAA8D;IAC9D,eAAe,CAAC,MAAM,CAAC,CAAA;IAEvB,oCAAoC;IAEpC,4DAA4D;IAC5D,MAAM,CAAC,YAAY,CACjB,eAAe,EACf;QACE,KAAK,EAAE,eAAe;QACtB,WAAW,EACT,mGAAmG;QACrG,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC;YACpB,SAAS,EAAE,CAAC;iBACT,MAAM,EAAE;iBACR,QAAQ,CAAC,4CAA4C,CAAC;YACzD,QAAQ,EAAE,CAAC;iBACR,MAAM,EAAE;iBACR,QAAQ,CAAC,yCAAyC,CAAC;YACtD,QAAQ,EAAE,CAAC;iBACR,IAAI,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;iBAC/B,QAAQ,EAAE;iBACV,QAAQ,CACP,wGAAwG,CACzG;SACJ,CAAC;KACH,EACD,gBAAgB,CACjB,CAAA;IAED,8DAA8D;IAC9D,MAAM,CAAC,YAAY,CACjB,iBAAiB,EACjB;QACE,KAAK,EAAE,iBAAiB;QACxB,WAAW,EACT,8MAA8M;QAChN,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC;YACpB,SAAS,EAAE,CAAC;iBACT,MAAM,EAAE;iBACR,QAAQ,CAAC,4CAA4C,CAAC;YACzD,QAAQ,EAAE,CAAC;iBACR,IAAI,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;iBAC/B,QAAQ,EAAE;iBACV,QAAQ,CACP,wGAAwG,CACzG;SACJ,CAAC;KACH,EACD,kBAAkB,CACnB,CAAA;IAED,qDAAqD;IACrD,MAAM,CAAC,YAAY,CACjB,gBAAgB,EAChB;QACE,KAAK,EAAE,gBAAgB;QACvB,WAAW,EACT,iKAAiK;QACnK,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC;YACpB,SAAS,EAAE,CAAC;iBACT,MAAM,EAAE;iBACR,QAAQ,CACP,kEAAkE,CACnE;YACH,IAAI,EAAE,CAAC;iBACJ,IAAI,CAAC,CAAC,MAAM,EAAE,UAAU,EAAE,YAAY,CAAC,CAAC;iBACxC,QAAQ,CACP,qHAAqH,CACtH;YACH,KAAK,EAAE,CAAC;iBACL,MAAM,EAAE;iBACR,GAAG,EAAE;iBACL,GAAG,CAAC,CAAC,CAAC;iBACN,GAAG,CAAC,GAAG,CAAC;iBACR,QAAQ,EAAE;iBACV,QAAQ,CAAC,wDAAwD,CAAC;YACrE,WAAW,EAAE,CAAC;iBACX,MAAM,EAAE;iBACR,GAAG,CAAC,GAAG,CAAC;iBACR,QAAQ,EAAE;iBACV,QAAQ,CACP,mEAAmE,CACpE;YACH,UAAU,EAAE,CAAC;iBACV,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;iBACxB,QAAQ,EAAE;iBACV,QAAQ,CACP,iEAAiE,CAClE;SACJ,CAAC;KACH,EACD,iBAAiB,CAClB,CAAA;IAED,kEAAkE;IAClE,MAAM,CAAC,YAAY,CACjB,kBAAkB,EAClB;QACE,KAAK,EAAE,kBAAkB;QACzB,WAAW,EACT,0LAA0L;QAC5L,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC;YACpB,SAAS,EAAE,CAAC;iBACT,MAAM,EAAE;iBACR,QAAQ,CACP,kEAAkE,CACnE;YACH,KAAK,EAAE,CAAC;iBACL,MAAM,EAAE;iBACR,QAAQ,CACP,mEAAmE,CACpE;YACH,QAAQ,EAAE,CAAC;iBACR,IAAI,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;iBAC/B,QAAQ,EAAE;iBACV,QAAQ,CACP,wGAAwG,CACzG;SACJ,CAAC;KACH,EACD,mBAAmB,CACpB,CAAA;IAED,8CAA8C;IAC9C,MAAM,CAAC,YAAY,CACjB,gBAAgB,EAChB;QACE,KAAK,EAAE,gBAAgB;QACvB,WAAW,EACT,sLAAsL;QACxL,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC;YACpB,SAAS,EAAE,CAAC;iBACT,MAAM,EAAE;iBACR,QAAQ,CACP,kEAAkE,CACnE;SACJ,CAAC;KACH,EACD,gBAAgB,CACjB,CAAA;IAED,2DAA2D;IAC3D,MAAM,CAAC,YAAY,CACjB,kBAAkB,EAClB;QACE,KAAK,EAAE,kBAAkB;QACzB,WAAW,EACT,kMAAkM;QACpM,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC;YACpB,SAAS,EAAE,CAAC;iBACT,MAAM,EAAE;iBACR,QAAQ,CACP,kEAAkE,CACnE;YACH,QAAQ,EAAE,CAAC;iBACR,IAAI,CAAC,CAAC,UAAU,EAAE,YAAY,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;iBAClD,QAAQ,EAAE;iBACV,QAAQ,CACP,qMAAqM,CACtM;YACH,QAAQ,EAAE,CAAC;iBACR,MAAM,EAAE;iBACR,QAAQ,EAAE;iBACV,QAAQ,CACP,wEAAwE,CACzE;YACH,OAAO,EAAE,CAAC;iBACP,OAAO,EAAE;iBACT,QAAQ,EAAE;iBACV,QAAQ,CACP,8HAA8H,CAC/H;YACH,SAAS,EAAE,CAAC;iBACT,OAAO,EAAE;iBACT,QAAQ,EAAE;iBACV,QAAQ,CACP,+GAA+G,CAChH;SACJ,CAAC;KACH,EACD,mBAAmB,CACpB,CAAA;IAED,yCAAyC;IACzC,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAA;IAC5C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAA;IAE/B,+DAA+D;AACjE,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;IACrB,OAAO,CAAC,KAAK,CAAC,wCAAwC,EAAE,KAAK,CAAC,CAAA;IAC9D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;AACjB,CAAC,CAAC,CAAA"}
@@ -0,0 +1,28 @@
1
+ /**
2
+ * AssemblyAI Audio Client
3
+ * Transcribes audio files using AssemblyAI's Universal (best) model
4
+ * Supports speaker diarization and other audio intelligence features
5
+ */
6
+ export interface AssemblyAiTranscribeOptions {
7
+ language?: string;
8
+ diarize?: boolean;
9
+ }
10
+ export interface TranscriptSegment {
11
+ speaker?: string;
12
+ text: string;
13
+ }
14
+ export interface TranscriptResult {
15
+ text: string;
16
+ segments?: TranscriptSegment[];
17
+ }
18
+ export declare class AssemblyAiClient {
19
+ private client;
20
+ constructor(apiKey: string);
21
+ /**
22
+ * Transcribe an audio file using AssemblyAI
23
+ * @param audioPath - Local path to the audio file (.m4a, .mp3, etc.)
24
+ * @param options - Transcription options
25
+ */
26
+ transcribe(audioPath: string, options?: AssemblyAiTranscribeOptions): Promise<TranscriptResult>;
27
+ }
28
+ //# sourceMappingURL=assemblyAiClient.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"assemblyAiClient.d.ts","sourceRoot":"","sources":["../../../src/services/audio/assemblyAiClient.ts"],"names":[],"mappings":"AAEA;;;;GAIG;AAEH,MAAM,WAAW,2BAA2B;IAC1C,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,OAAO,CAAC,EAAE,OAAO,CAAA;CAClB;AAED,MAAM,WAAW,iBAAiB;IAChC,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,IAAI,EAAE,MAAM,CAAA;CACb;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAA;IACZ,QAAQ,CAAC,EAAE,iBAAiB,EAAE,CAAA;CAC/B;AAED,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,MAAM,CAAY;gBAEd,MAAM,EAAE,MAAM;IAI1B;;;;OAIG;IACG,UAAU,CACd,SAAS,EAAE,MAAM,EACjB,OAAO,GAAE,2BAAgC,GACxC,OAAO,CAAC,gBAAgB,CAAC;CAkC7B"}