video-context-mcp-server 1.2.2 → 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -7
- package/dist/generated/version.d.ts +1 -1
- package/dist/generated/version.js +1 -1
- package/dist/services/providerRouter.js +1 -1
- package/dist/services/qwenClient.d.ts +1 -1
- package/dist/services/qwenClient.js +3 -3
- package/dist/tools/schemas.js +3 -3
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -15,7 +15,7 @@ An MCP server that gives coding assistants (GitHub Copilot, Cursor, Claude Code)
|
|
|
15
15
|
- 🎙️ **Audio Transcription** — Transcribe speech with paragraph-level timestamps (`[MM:SS]`) or export as SRT/VTT subtitles and JSON using Deepgram, AssemblyAI, Groq/Whisper, or Gemini
|
|
16
16
|
- 🔊 **Speaker Diarization** — Identify who said what (Deepgram and AssemblyAI)
|
|
17
17
|
- 🔊 **Audio-Enhanced Analysis** — Auto-transcribes audio and injects transcripts into AI prompts for richer results (GLM/Kimi/Qwen)
|
|
18
|
-
- 🔄 **Multi-Provider Support** — Choose between GLM-4.6V, Qwen3.
|
|
18
|
+
- 🔄 **Multi-Provider Support** — Choose between GLM-4.6V, Qwen3.7, Kimi K2.6, Gemini, or MiMo-V2.5
|
|
19
19
|
- 🎯 **Smart Video Handling** — Extracts keyframes from long videos to reduce token usage
|
|
20
20
|
- 🗣️ **Text-to-Speech** ⚠️ _Experimental_ — Convert text to natural speech audio (MiniMax TTS)
|
|
21
21
|
- 🖼️ **Image Generation** ⚠️ _Experimental_ — Generate images from text prompts (MiniMax image-01)
|
|
@@ -417,7 +417,7 @@ Set all keys to get the full fallback chain. The server will try Gemini first, t
|
|
|
417
417
|
| ----------------------------------------- | ------------------- | --------------------------------------------------------------------------------------------- |
|
|
418
418
|
| **Gemini 3.5 Flash** (default, free-tier) | `GEMINI_API_KEY` | [Get key](https://aistudio.google.com/app/apikey) |
|
|
419
419
|
| **GLM-4.6V** (free-tier) | `Z_AI_API_KEY` | [Get key](https://z.ai/manage-apikey/apikey-list) |
|
|
420
|
-
| **Qwen3.
|
|
420
|
+
| **Qwen3.7** (paid) | `DASHSCOPE_API_KEY` | [Get key](https://modelstudio.console.alibabacloud.com/ap-southeast-1?tab=dashboard#/api-key) |
|
|
421
421
|
| **Kimi K2.6** (paid) | `MOONSHOT_API_KEY` | [Get key](https://platform.kimi.ai) |
|
|
422
422
|
| **MiMo-V2.5** (paid) | `MIMO_API_KEY` | [Get key](https://platform.xiaomimimo.com/#/console/api-keys) |
|
|
423
423
|
|
|
@@ -448,7 +448,7 @@ When an audio key is missing or an audio API call fails at runtime, tools automa
|
|
|
448
448
|
|
|
449
449
|
### Video Providers
|
|
450
450
|
|
|
451
|
-
| Feature | Gemini 3.5 Flash (default) | GLM-4.6V | Qwen3.
|
|
451
|
+
| Feature | Gemini 3.5 Flash (default) | GLM-4.6V | Qwen3.7 | Kimi K2.6 | MiMo-V2.5 |
|
|
452
452
|
| -------------- | ---------------------------------------------- | ------------------------------------------------------ | ------------------------------------------------------ | ---------------------------------------------- | ------------------------------------------------------ |
|
|
453
453
|
| Price | Free tier available | Free tier available (GLM-4.6V-Flash) | $0.50 input / $3.00 output per 1M tokens | $0.60 input / $3.00 output per 1M tokens | $0.40 input / $2.00 output per 1M tokens |
|
|
454
454
|
| Video formats | mp4, mpeg, mov, avi, flv, mpg, webm, wmv, 3gpp | mp4, avi, mov, wmv, webm, m4v | mp4, avi, mov, wmv, webm, m4v | mp4, mpeg, mov, avi, flv, mpg, webm, wmv, 3gpp | mp4, mov, avi, wmv |
|
|
@@ -456,7 +456,7 @@ When an audio key is missing or an audio API call fails at runtime, tools automa
|
|
|
456
456
|
| Max file size | 2 GB | ~12 MB base64 / frames fallback / **unlimited w/ S3↓** | ~10 MB base64 / frames fallback / **unlimited w/ S3↓** | 100 MB | ~10 MB base64 / frames fallback / **unlimited w/ S3↓** |
|
|
457
457
|
| Best for | **Default** — free, no card required | Free, no card required | SOTA agentic coding | Paid — broadest format support | Paid — thinking mode; multimodal |
|
|
458
458
|
|
|
459
|
-
**Gemini 3.5 Flash** is the default — it offers a free tier with no credit card required, 1M context window, and 2 GB file support. **GLM-4.6V** is the second fallback — also free with no card required. **Qwen3.
|
|
459
|
+
**Gemini 3.5 Flash** is the default — it offers a free tier with no credit card required, 1M context window, and 2 GB file support. **GLM-4.6V** is the second fallback — also free with no card required. **Qwen3.7** is a paid provider at $0.50 input / $3.00 output per 1M tokens with SOTA agentic coding performance. **Kimi K2.6** is a paid provider with the broadest video format support. **MiMo-V2.5** is Xiaomi's multimodal model with thinking mode support ($0.40 input / $2.00 output per 1M tokens).
|
|
460
460
|
|
|
461
461
|
Set `VIDEO_MCP_DEFAULT_PROVIDER=gemini`, `glm`, `qwen`, `kimi`, or `mimo` to change the default provider used when a tool call does not pass `provider`. If a tool call includes `provider`, that per-call value takes precedence.
|
|
462
462
|
|
|
@@ -465,7 +465,7 @@ Set `VIDEO_MCP_DEFAULT_PROVIDER=gemini`, `glm`, `qwen`, `kimi`, or `mimo` to cha
|
|
|
465
465
|
<details open>
|
|
466
466
|
<summary><strong>Automatic S3 relay: bypass the 10 MB local file limit with GLM, Qwen, and MiMo</strong></summary>
|
|
467
467
|
|
|
468
|
-
**GLM-4.6V**, **Qwen3.
|
|
468
|
+
**GLM-4.6V**, **Qwen3.7**, and **MiMo-V2.5** all accept direct video URLs, but base64-encoding a local file caps out at **10–12 MB**. Above that limit, the server first tries to fall back to an upload-capable provider (Gemini or Kimi) if one is available, then falls back to **frame-based analysis** as a last resort. For the best results on large local videos, set `AWS_S3_BUCKET` — the server uploads the full video to S3 and passes a presigned URL to GLM, Qwen, and MiMo, bypassing the base64 limit entirely and taking priority over both fallbacks. No manual upload step needed.
|
|
469
469
|
|
|
470
470
|
#### Why S3 works
|
|
471
471
|
|
|
@@ -691,7 +691,7 @@ Set `AUDIO_MCP_DEFAULT_PROVIDER` to change the default.
|
|
|
691
691
|
| Variable | Description | Default |
|
|
692
692
|
| ---------------------------- | --------------------------------------------------------------------------------------------------------------------- | -------- |
|
|
693
693
|
| `Z_AI_API_KEY` | Z.AI API key for GLM-4.6V | — |
|
|
694
|
-
| `DASHSCOPE_API_KEY` | Alibaba Cloud API key for Qwen3.
|
|
694
|
+
| `DASHSCOPE_API_KEY` | Alibaba Cloud API key for Qwen3.7 | — |
|
|
695
695
|
| `MOONSHOT_API_KEY` | Moonshot AI API key for Kimi K2.6 | — |
|
|
696
696
|
| `GEMINI_API_KEY` | Google API key for Gemini | — |
|
|
697
697
|
| `MIMO_API_KEY` | Xiaomi MiMo API key for MiMo-V2.5 | — |
|
|
@@ -1067,7 +1067,7 @@ Proprietary — All Rights Reserved. No part of this software may be copied, mod
|
|
|
1067
1067
|
- [MCP SDK](https://github.com/modelcontextprotocol/typescript-sdk) by Anthropic
|
|
1068
1068
|
- [Kimi K2.6](https://github.com/MoonshotAI/Kimi-K2.6) by Moonshot AI
|
|
1069
1069
|
- [GLM-4.6V](https://docs.z.ai/guides/vlm/glm-4.6v) by Z.AI
|
|
1070
|
-
- [Qwen3.
|
|
1070
|
+
- [Qwen3.7](https://bailian.console.alibabacloud.com/ap-southeast-1/) by Alibaba Cloud
|
|
1071
1071
|
- [MiMo-V2.5](https://platform.xiaomimimo.com/) by Xiaomi
|
|
1072
1072
|
- [Deepgram](https://www.deepgram.com/) for audio transcription
|
|
1073
1073
|
- [AssemblyAI](https://www.assemblyai.com/) for audio transcription
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare const VERSION = "1.2.
|
|
1
|
+
export declare const VERSION = "1.2.3";
|
|
2
2
|
//# sourceMappingURL=version.d.ts.map
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Qwen3.
|
|
2
|
+
* Qwen3.7 Client
|
|
3
3
|
* Handles video analysis using Alibaba Cloud's DashScope API (OpenAI-compatible)
|
|
4
4
|
*/
|
|
5
5
|
const QWEN_BASE_URL = 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions';
|
|
6
|
-
const QWEN_MODEL = 'qwen3.
|
|
7
|
-
const QWEN_MODEL_FLASH = 'qwen3.
|
|
6
|
+
const QWEN_MODEL = 'qwen3.7-plus';
|
|
7
|
+
const QWEN_MODEL_FLASH = 'qwen3.7-plus';
|
|
8
8
|
/** Default fps for server-side frame extraction on video URL inputs. */
|
|
9
9
|
const QWEN_DEFAULT_FPS = 2.0;
|
|
10
10
|
/** Client-side request timeout in ms. Default: 4 min (just under DashScope's ~5 min server cap).
|
package/dist/tools/schemas.js
CHANGED
|
@@ -24,13 +24,13 @@ export const analyzeVideoSchema = z.object({
|
|
|
24
24
|
question: z.string().describe('Question to ask about the video content'),
|
|
25
25
|
provider: videoProviderEnum
|
|
26
26
|
.optional()
|
|
27
|
-
.describe("AI backend to use: 'gemini' (Gemini 3.5 Flash, default), 'glm' (GLM-4.6V), 'qwen' (Qwen3.
|
|
27
|
+
.describe("AI backend to use: 'gemini' (Gemini 3.5 Flash, default), 'glm' (GLM-4.6V), 'qwen' (Qwen3.7), 'kimi' (Kimi K2.6), or 'mimo' (MiMo-V2.5)"),
|
|
28
28
|
});
|
|
29
29
|
export const summarizeVideoSchema = z.object({
|
|
30
30
|
videoPath: z.string().describe('Path to the video file (local path or URL)'),
|
|
31
31
|
provider: videoProviderEnum
|
|
32
32
|
.optional()
|
|
33
|
-
.describe("AI backend to use: 'gemini' (Gemini 3.5 Flash, default), 'glm' (GLM-4.6V), 'qwen' (Qwen3.
|
|
33
|
+
.describe("AI backend to use: 'gemini' (Gemini 3.5 Flash, default), 'glm' (GLM-4.6V), 'qwen' (Qwen3.7), 'kimi' (Kimi K2.6), or 'mimo' (MiMo-V2.5)"),
|
|
34
34
|
});
|
|
35
35
|
export const extractFramesSchema = z.object({
|
|
36
36
|
videoPath: z
|
|
@@ -84,7 +84,7 @@ export const searchTimestampSchema = z.object({
|
|
|
84
84
|
.describe("What to search for, e.g., 'person waves', 'dog runs', 'car crash'"),
|
|
85
85
|
provider: videoProviderEnum
|
|
86
86
|
.optional()
|
|
87
|
-
.describe("AI backend to use: 'gemini' (Gemini 3.5 Flash, default), 'glm' (GLM-4.6V), 'qwen' (Qwen3.
|
|
87
|
+
.describe("AI backend to use: 'gemini' (Gemini 3.5 Flash, default), 'glm' (GLM-4.6V), 'qwen' (Qwen3.7), 'kimi' (Kimi K2.6), or 'mimo' (MiMo-V2.5)"),
|
|
88
88
|
});
|
|
89
89
|
export const getVideoInfoSchema = z.object({
|
|
90
90
|
videoPath: z
|
package/package.json
CHANGED