nvidia-nim-mcp 1.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,18 +1,20 @@
1
1
  # NVIDIA NIM MCP Server
2
2
 
3
- A production-ready **Model Context Protocol (MCP)** server for consuming **NVIDIA NIM** (NVIDIA Inference Microservices) models. Supports LLMs, embeddings, reranking, function calling, and vision models.
3
+ A production-ready **Model Context Protocol (MCP)** server for consuming **NVIDIA NIM** (NVIDIA Inference Microservices) models. Supports 50+ LLMs, multimodal models, image generation, embeddings, reranking, function calling, vision, and code-specialized models with rich metadata for intelligent agent selection.
4
4
 
5
5
  ---
6
6
 
7
7
  ## 🚀 Features
8
8
 
9
- - **7 MCP Tools**: chat completion, text generation, embeddings, reranking, function calling, model listing, and model info
10
- - **20+ Supported Models**: Llama 3.1/3.2, Mistral, Mixtral, Phi-3, Gemma 2, Qwen 2.5, Nemotron, and more
9
+ - **10 MCP Tools**: chat completion, text generation, embeddings, reranking, function calling, model listing, model info, **image generation**, **image analysis**, **multimodal tasks**, **model comparison**
10
+ - **50+ Supported Models**: Llama 3.1/3.2, Nemotron 3 Ultra (550B), MiniMax M3, Kimi K2.6 (1T), DeepSeek V4 Pro, GLM 5.1, Qwen 3.5 397B, Mistral Large 3 (675B), GPT-OSS 120B, DiffusionGemma, FLUX.1, SDXL, SD3, and more
11
+ - **Rich Model Metadata**: licensing, hardware requirements, benchmarks, image generation specs, reasoning modes, tags for agent selection
12
+ - **Advanced Filtering**: by commercial use, reasoning, vision, function calling, multimodal, context length, tags, hardware
11
13
  - **Production-Grade**: automatic retries with exponential backoff, per-minute rate limiting, structured JSON logging
12
14
  - **Type-Safe**: full TypeScript, Zod input validation on every tool
13
15
  - **Docker-Ready**: multi-stage Dockerfile with non-root user, health checks
14
16
  - **Configurable**: all settings via environment variables
15
- - **Multiple Distribution Formats**: NPM package, Docker image, standalone executable
17
+ - **Single Required Env**: Only `NVIDIA_API_KEY` required; all others have sensible defaults
16
18
 
17
19
  ---
18
20
 
@@ -81,17 +83,22 @@ Copy `.env.example` to `.env` and fill in your API key:
81
83
  cp .env.example .env
82
84
  ```
83
85
 
86
+ **Only `NVIDIA_API_KEY` is required** — all other variables have production-ready defaults:
87
+
84
88
  | Variable | Required | Default | Description |
85
89
  |---|---|---|---|
86
90
  | `NVIDIA_API_KEY` | ✅ | — | Your NVIDIA NGC API key |
87
91
  | `NVIDIA_NIM_BASE_URL` | ❌ | `https://integrate.api.nvidia.com/v1` | Base URL for NIM API |
88
- | `DEFAULT_MODEL` | ❌ | `z-ai/glm5` | Default model for completions (specialized in software development) |
92
+ | `DEFAULT_MODEL` | ❌ | `black-forest-labs/flux.1-dev` | Default model (best image generation) |
89
93
  | `MAX_REQUESTS_PER_MINUTE` | ❌ | `40` | Rate limit cap (NVIDIA API limit) |
90
94
  | `MAX_TOKENS_PER_REQUEST` | ❌ | `4096` | Hard cap on tokens per request |
91
95
  | `REQUEST_TIMEOUT_MS` | ❌ | `120000` | Request timeout (ms) |
92
96
  | `MAX_RETRIES` | ❌ | `3` | Max retry attempts on failure |
93
97
  | `RETRY_DELAY_MS` | ❌ | `1000` | Base delay between retries (ms) |
94
98
  | `LOG_LEVEL` | ❌ | `info` | `error\|warn\|info\|debug` |
99
+ | `ENABLE_IMAGE_GENERATION` | ❌ | `true` | Enable image generation tools |
100
+ | `ENABLE_VISION` | ❌ | `true` | Enable vision/multimodal tools |
101
+ | `ENABLE_MULTIMODAL` | ❌ | `true` | Enable multimodal task tools |
95
102
 
96
103
  ---
97
104
 
@@ -164,7 +171,6 @@ NVIDIA_API_KEY=nvapi-your-key ./dist/index.js
164
171
  "command": "nvidia-nim-mcp",
165
172
  "env": {
166
173
  "NVIDIA_API_KEY": "nvapi-your-key-here",
167
- "DEFAULT_MODEL": "z-ai/glm5",
168
174
  "LOG_LEVEL": "info"
169
175
  }
170
176
  }
@@ -181,7 +187,6 @@ NVIDIA_API_KEY=nvapi-your-key ./dist/index.js
181
187
  "args": ["nvidia-nim-mcp"],
182
188
  "env": {
183
189
  "NVIDIA_API_KEY": "nvapi-your-key-here",
184
- "DEFAULT_MODEL": "z-ai/glm5",
185
190
  "LOG_LEVEL": "info"
186
191
  }
187
192
  }
@@ -198,7 +203,6 @@ NVIDIA_API_KEY=nvapi-your-key ./dist/index.js
198
203
  "args": ["/absolute/path/to/nvidia-nim-mcp/dist/index.js"],
199
204
  "env": {
200
205
  "NVIDIA_API_KEY": "nvapi-your-key-here",
201
- "DEFAULT_MODEL": "z-ai/glm5",
202
206
  "LOG_LEVEL": "info"
203
207
  }
204
208
  }
@@ -215,12 +219,12 @@ Multi-turn conversation with any NIM LLM.
215
219
 
216
220
  ```json
217
221
  {
218
- "model": "z-ai/glm5",
222
+ "model": "nvidia/nemotron-3-ultra-550b-a55b",
219
223
  "messages": [
220
224
  { "role": "user", "content": "Explain quantum computing" }
221
225
  ],
222
226
  "temperature": 0.3,
223
- "max_tokens": 2048
227
+ "max_tokens": 4096
224
228
  }
225
229
  ```
226
230
 
@@ -230,7 +234,8 @@ Single-prompt text generation (simplified interface).
230
234
  ```json
231
235
  {
232
236
  "prompt": "Write a haiku about machine learning",
233
- "temperature": 0.5
237
+ "temperature": 0.5,
238
+ "max_tokens": 512
234
239
  }
235
240
  ```
236
241
 
@@ -261,7 +266,7 @@ Use NIM models with tool/function calling.
261
266
 
262
267
  ```json
263
268
  {
264
- "model": "z-ai/glm5",
269
+ "model": "z-ai/glm-5.1",
265
270
  "messages": [{ "role": "user", "content": "What's the weather in Paris?" }],
266
271
  "tools": [{
267
272
  "type": "function",
@@ -278,31 +283,162 @@ Use NIM models with tool/function calling.
278
283
  }
279
284
  ```
280
285
 
286
+ ### `generate_image`
287
+ Generate images from text prompts using FLUX.1, SDXL, SD3, DiffusionGemma.
288
+
289
+ ```json
290
+ {
291
+ "model": "black-forest-labs/flux.1-dev",
292
+ "prompt": "A photorealistic mountain landscape at sunset, 8K",
293
+ "width": 1024,
294
+ "height": 1024,
295
+ "steps": 30,
296
+ "cfg_scale": 3.5,
297
+ "sampler": "euler_a",
298
+ "scheduler": "simple"
299
+ }
300
+ ```
301
+
302
+ ### `analyze_image`
303
+ Analyze and describe images using vision/multimodal models.
304
+
305
+ ```json
306
+ {
307
+ "model": "moonshotai/kimi-k2.6",
308
+ "image_url": "https://example.com/image.jpg",
309
+ "prompt": "Describe this image in detail",
310
+ "detail": "high"
311
+ }
312
+ ```
313
+
314
+ ### `multimodal_task`
315
+ Perform multimodal tasks combining text and images.
316
+
317
+ ```json
318
+ {
319
+ "model": "minimaxai/minimax-m3",
320
+ "messages": [
321
+ {
322
+ "role": "user",
323
+ "content": [
324
+ { "type": "text", "text": "Analyze this chart" },
325
+ { "type": "image_url", "image_url": { "url": "https://example.com/chart.png" } }
326
+ ]
327
+ }
328
+ ],
329
+ "max_tokens": 2048
330
+ }
331
+ ```
332
+
281
333
  ### `list_models`
282
- List available models filtered by category.
334
+ List available models with rich metadata and advanced filtering.
283
335
 
284
336
  ```json
285
- { "category": "embedding" }
337
+ {
338
+ "category": "code",
339
+ "commercial_use": true,
340
+ "supports_reasoning": true,
341
+ "tags": ["coding", "agentic"],
342
+ "include_details": true
343
+ }
286
344
  ```
287
345
 
346
+ **Filter Options:**
347
+ - `category`: `language`, `embedding`, `reranking`, `vision`, `code`, `multimodal`, `image_generation`, `all`
348
+ - `commercial_use`: Filter by commercial license
349
+ - `supports_reasoning`: Filter by reasoning capability
350
+ - `supports_vision`: Filter by vision capability
351
+ - `supports_function_calling`: Filter by function calling
352
+ - `supports_multimodal`: Filter by multimodal input
353
+ - `min_context_length`: Minimum context window (tokens)
354
+ - `tags`: Filter by use case tags
355
+ - `hardware`: Filter by GPU type (Hopper, Blackwell, Ampere)
356
+ - `include_details`: Include full metadata (benchmarks, image specs, etc.)
357
+
288
358
  ### `get_model_info`
289
- Get details about a specific model.
359
+ Get complete metadata for a specific model.
290
360
 
291
361
  ```json
292
- { "model_id": "meta/llama-3.1-405b-instruct" }
362
+ { "model_id": "nvidia/nemotron-3-ultra-550b-a55b" }
293
363
  ```
294
364
 
295
- ---
365
+ **Returns:** licensing, hardware requirements, benchmarks, image gen specs, reasoning modes, tags, supported languages, etc.
366
+
367
+ ### `compare_models`
368
+ Compare 2-5 models side-by-side across all decision factors.
369
+
370
+ ```json
371
+ {
372
+ "model_ids": [
373
+ "nvidia/nemotron-3-ultra-550b-a55b",
374
+ "deepseek-ai/deepseek-v4-pro",
375
+ "moonshotai/kimi-k2.6",
376
+ "z-ai/glm-5.1"
377
+ ]
378
+ }
379
+ ```
380
+
381
+ **Returns:** Structured comparison table with licensing, hardware, benchmarks, capabilities, tags, image generation specs, etc.
296
382
 
297
- ## 📦 Supported Models
383
+ ---
298
384
 
299
- | Category | Models |
300
- |---|---|
301
- | **Language** | Llama 3.1 (8B/70B/405B), Mistral Large 2, Mixtral 8x22B/8x7B, Phi-3.5 Mini, Gemma 2 (9B/27B), Qwen 2.5 72B, Nemotron 70B, GLM-4 9B |
302
- | **Code** | Qwen 2.5 Coder 32B, **GLM-5** (default - specialized in software development & architecture) |
303
- | **Vision** | Llama 3.2 Vision (11B/90B) |
304
- | **Embeddings** | NV-Embed v1, NV-EmbedQA E5 v5, BGE-M3 |
305
- | **Reranking** | NV-RerankQA Mistral 4B v3 |
385
+ ## 📦 Supported Models (50+)
386
+
387
+ ### Language Models (Frontier Reasoning)
388
+
389
+ | Model | Parameters | Context | License | Commercial | Best For |
390
+ |---|---|---|---|---|---|
391
+ | `nvidia/nemotron-3-ultra-550b-a55b` | 550B (55B active) | 131K | OpenMDW-1.1 | | Frontier reasoning, coding, agentic, 1M context, multilingual |
392
+ | `nvidia/nemotron-3-ultra-550b-a55b-instruct` | 550B | 131K | OpenMDW-1.1 | ✅ | Instruction-tuned variant |
393
+ | `minimaxai/minimax-m3` | 428B (22B active) | 1M | Non-Commercial | ❌ | Multimodal, video (30min), 8hr coding, agentic |
394
+ | `moonshotai/kimi-k2.6` | 1T (32B active) | 256K | Modified MIT | ✅ | Long-horizon coding, 300 agents, vision, agentic |
395
+ | `deepseek-ai/deepseek-v4-pro` | 1.6T (49B active) | 1M | MIT | ✅ | Advanced coding, math, reasoning, 3 reasoning modes |
396
+ | `z-ai/glm-5.1` | 754B (DSA) | 131K | MIT | ✅ | Software engineering, agentic, SWE-Bench 58.4% |
397
+ | `qwen/qwen3.5-397b-a17b` | 397B (MoE) | 131K | Research | ❌ | Large-scale multilingual, multimodal |
398
+ | `mistralai/mistral-large-3-675b-instruct-2512` | 675B | 131K | Research | ❌ | Frontier reasoning, multimodal |
399
+ | `openai/gpt-oss-120b` | 120B | 131K | Apache 2.0 | ✅ | Open-weight, research, fine-tuning |
400
+ | `google/diffusiongemma-26b-a4b-it` | 25.2B (3.8B active) | 256K | Apache 2.0 | ✅ | Diffusion text gen, 35+ langs, fast, multimodal |
401
+
402
+ ### Code-Specialized Models
403
+
404
+ | Model | Parameters | Context | License | Commercial |
405
+ |---|---|---|---|---|
406
+ | `z-ai/glm-5.1` | 754B | 131K | MIT | ✅ |
407
+ | `z-ai/glm5` | - | 128K | Z.ai | ✅ |
408
+ | `qwen/qwen2.5-coder-32b-instruct` | 32B | 131K | Research | ❌ |
409
+
410
+ ### Multimodal / Vision Models
411
+
412
+ | Model | Parameters | Context | Vision | Video | License | Commercial |
413
+ |---|---|---|---|---|---|---|
414
+ | `meta/llama-3.2-90b-vision-instruct` | 90B | 128K | ✅ | ❌ | Llama 3.2 | ✅ |
415
+ | `meta/llama-3.2-11b-vision-instruct` | 11B | 128K | ✅ | ❌ | Llama 3.2 | ✅ |
416
+ | `nvidia/neva-22b` | 22B | 4K | ✅ | ❌ | NVIDIA | ✅ |
417
+ | `microsoft/phi-3.5-vision-instruct` | - | 128K | ✅ | ❌ | MIT | ✅ |
418
+ | `minimaxai/minimax-m3` | 428B | 1M | ✅ | ✅ (30min) | Non-Commercial | ❌ |
419
+ | `moonshotai/kimi-k2.6` | 1T | 256K | ✅ | ✅ | Modified MIT | ✅ |
420
+
421
+ ### Image Generation Models
422
+
423
+ | Model | Architecture | Resolutions | Aspect Ratios | Max Images | ControlNet | License | Commercial |
424
+ |---|---|---|---|---|---|---|---|
425
+ | `black-forest-labs/flux.1-dev` | Diffusion Transformer | 1024², 1152×896, 1344×768, 21:9 | 1:1, 16:9, 9:16, 4:3, 3:4, 21:9 | 1 | Canny, Depth | Apache 2.0* | ❌* |
426
+ | `black-forest-labs/flux.1-kontext-dev` | Diffusion Transformer | Same | Same | 1 | - | Apache 2.0* | ❌* |
427
+ | `nvidia/stable-diffusion-xl` | UNet + Attention | 1024², 1152×896, 1216×832 | 1:1, 16:9, 9:16, 4:3, 3:4 | 4 | - | SDXL 1.0 | ✅** |
428
+ | `stabilityai/sd-3-medium` | SD3 | Same | Same | 2 | - | Stability AI | ✅** |
429
+ | `nvidia/sdxl-turbo` | ADD | 512², 1024² | 1:1 | 4 | - | SDXL 1.0 | ✅** |
430
+
431
+ *\*Non-commercial default; commercial via contact*
432
+ **\*\*Requires Stability AI membership**
433
+
434
+ ### Embeddings & Reranking
435
+
436
+ | Model | Type | Context | Dimensions | License | Commercial |
437
+ |---|---|---|---|---|---|
438
+ | `nvidia/nv-embedqa-e5-v5` | Embedding | 512 | - | NVIDIA | ✅ |
439
+ | `nvidia/nv-embed-v1` | Embedding | 4096 | - | NVIDIA | ✅ |
440
+ | `baai/bge-m3` | Embedding | 8192 | - | MIT | ✅ |
441
+ | `nvidia/nv-rerankqa-mistral-4b-v3` | Reranking | 4096 | - | NVIDIA | ✅ |
306
442
 
307
443
  ---
308
444
 
@@ -319,13 +455,17 @@ Get details about a specific model.
319
455
  - [x] Docker multi-stage build (minimal image)
320
456
  - [x] Non-root Docker user
321
457
  - [x] Token cap enforcement
458
+ - [x] Single required env var (`NVIDIA_API_KEY`)
459
+ - [x] Feature flags for optional capabilities
460
+
461
+ ---
322
462
 
323
463
  ## 🧪 Testing
324
464
 
325
- The project includes a comprehensive test suite with over 60 tests covering:
465
+ The project includes a comprehensive test suite:
326
466
 
327
- - **Unit Tests**: Configuration, logging, model handling, and tool validation
328
- - **Integration Tests**: All 7 MCP tools with various input scenarios
467
+ - **Unit Tests**: Configuration, logging, model handling, tool validation
468
+ - **Integration Tests**: All 10 MCP tools with various input scenarios
329
469
  - **Error Handling**: Validation of edge cases and failure modes
330
470
  - **Schema Validation**: Zod-based input validation for all tools
331
471
 
@@ -345,7 +485,9 @@ npm test -- --watch
345
485
  npm test src/handlers.test.ts
346
486
  ```
347
487
 
348
- **Current Test Status**: ✅ All tests passing (62/62 tests)
488
+ **Current Test Status**: ✅ All tests passing (96 tests)
489
+
490
+ ---
349
491
 
350
492
  ## 🛠️ Development
351
493
 
@@ -378,9 +520,11 @@ npm test
378
520
  npm run check
379
521
  ```
380
522
 
523
+ ---
524
+
381
525
  ## 🤝 Contributing
382
526
 
383
- Contributions are welcome! Here's how you can contribute:
527
+ Contributions are welcome!
384
528
 
385
529
  1. **Fork the Repository**
386
530
  2. **Create a Feature Branch**: `git checkout -b feature/your-feature-name`
@@ -388,8 +532,8 @@ Contributions are welcome! Here's how you can contribute:
388
532
  4. **Add Tests**: Ensure new functionality is properly tested
389
533
  5. **Run Checks**: `npm run check` to verify code quality and tests
390
534
  6. **Commit Changes**: Use clear, descriptive commit messages
391
- 7. **Push to Your Fork**: `git push origin feature/your-feature-name`
392
- 8. **Open a Pull Request**: Describe your changes and their benefits
535
+ 6. **Push to Your Fork**: `git push origin feature/your-feature-name`
536
+ 7. **Open a Pull Request**: Describe your changes and their benefits
393
537
 
394
538
  ### Code Standards
395
539
 
@@ -407,9 +551,9 @@ Contributions are welcome! Here's how you can contribute:
407
551
  4. **Building**: Use `npm run build` to compile the project
408
552
  5. **Linting**: Run `npm run lint` to check code quality
409
553
 
410
- ## 📦 Packaging & Distribution
554
+ ---
411
555
 
412
- This project can be distributed and deployed in multiple formats:
556
+ ## 📦 Packaging & Distribution
413
557
 
414
558
  ### NPM Package
415
559
  - Published to npm registry for easy installation
@@ -429,6 +573,7 @@ This project can be distributed and deployed in multiple formats:
429
573
  - No installation required beyond Node.js
430
574
 
431
575
  ### Building Packages
576
+
432
577
  ```bash
433
578
  # Build the project
434
579
  npm run build
@@ -443,6 +588,8 @@ docker build -t nvidia-nim-mcp .
443
588
  npm run check && npm run build
444
589
  ```
445
590
 
591
+ ---
592
+
446
593
  ## 📄 License
447
594
 
448
- MIT
595
+ MIT
package/dist/client.d.ts CHANGED
@@ -7,6 +7,7 @@ export interface ContentPart {
7
7
  text?: string;
8
8
  image_url?: {
9
9
  url: string;
10
+ detail?: "low" | "high" | "auto";
10
11
  };
11
12
  }
12
13
  export interface ChatCompletionRequest {
@@ -27,6 +28,9 @@ export interface ChatCompletionRequest {
27
28
  name: string;
28
29
  };
29
30
  };
31
+ response_format?: {
32
+ type: "text" | "json_object";
33
+ };
30
34
  }
31
35
  export interface Tool {
32
36
  type: "function";
@@ -106,14 +110,64 @@ export interface RerankResponse {
106
110
  total_tokens: number;
107
111
  };
108
112
  }
113
+ export interface ImageGenerationRequest {
114
+ model: string;
115
+ prompt: string;
116
+ negative_prompt?: string;
117
+ width?: number;
118
+ height?: number;
119
+ num_images?: number;
120
+ steps?: number;
121
+ cfg_scale?: number;
122
+ seed?: number;
123
+ sampler?: string;
124
+ scheduler?: string;
125
+ response_format?: "url" | "b64_json";
126
+ }
127
+ export interface ImageGenerationResponse {
128
+ created: number;
129
+ model?: string;
130
+ data: Array<{
131
+ url?: string;
132
+ b64_json?: string;
133
+ revised_prompt?: string;
134
+ }>;
135
+ usage?: {
136
+ total_images: number;
137
+ };
138
+ }
139
+ export interface ImageAnalysisRequest {
140
+ model: string;
141
+ messages: ChatMessage[];
142
+ temperature?: number;
143
+ top_p?: number;
144
+ max_tokens?: number;
145
+ stream?: boolean;
146
+ }
109
147
  export declare class NIMClient {
110
- private readonly http;
111
- private readonly rateLimiter;
148
+ private readonly httpClient;
112
149
  constructor();
150
+ private createHttpClient;
113
151
  private normalizeError;
114
152
  chatCompletion(request: ChatCompletionRequest): Promise<ChatCompletionResponse>;
115
- chatCompletionStream(request: ChatCompletionRequest): AsyncGenerator<string>;
116
153
  embeddings(request: EmbeddingRequest): Promise<EmbeddingResponse>;
117
154
  rerank(request: RerankRequest): Promise<RerankResponse>;
155
+ generateImage(request: ImageGenerationRequest): Promise<ImageGenerationResponse>;
156
+ generateImageFluxSchnell(request: {
157
+ prompt: string;
158
+ width?: number;
159
+ height?: number;
160
+ seed?: number;
161
+ steps?: number;
162
+ }): Promise<ImageGenerationResponse>;
163
+ generateImageFluxKontext(request: {
164
+ prompt: string;
165
+ image: string;
166
+ aspect_ratio?: string;
167
+ steps?: number;
168
+ cfg_scale?: number;
169
+ seed?: number;
170
+ }): Promise<ImageGenerationResponse>;
171
+ analyzeImage(request: ImageAnalysisRequest): Promise<ChatCompletionResponse>;
118
172
  listModels(): Promise<string[]>;
119
173
  }