nvidia-nim-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example ADDED
@@ -0,0 +1,23 @@
1
+ # NVIDIA NIM Configuration
2
+ NVIDIA_API_KEY=nvapi-your-api-key-here
3
+ NVIDIA_NIM_BASE_URL=https://integrate.api.nvidia.com/v1
4
+
5
+ # Server Configuration
6
+ MCP_SERVER_NAME=nvidia-nim-mcp
7
+ MCP_SERVER_VERSION=1.0.0
8
+ LOG_LEVEL=info
9
+
10
+ # Rate Limiting
11
+ MAX_REQUESTS_PER_MINUTE=40
12
+ MAX_TOKENS_PER_REQUEST=4096
13
+ REQUEST_TIMEOUT_MS=120000
14
+
15
+ # Retry Configuration
16
+ MAX_RETRIES=3
17
+ RETRY_DELAY_MS=1000
18
+
19
+ # Default Model Settings
20
+ DEFAULT_MODEL=z-ai/glm5
21
+ DEFAULT_TEMPERATURE=0.3
22
+ DEFAULT_TOP_P=0.95
23
+ DEFAULT_MAX_TOKENS=4096
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,448 @@
1
+ # NVIDIA NIM MCP Server
2
+
3
+ A production-ready **Model Context Protocol (MCP)** server for consuming **NVIDIA NIM** (NVIDIA Inference Microservices) models. Supports LLMs, embeddings, reranking, function calling, and vision models.
4
+
5
+ ---
6
+
7
+ ## 🚀 Features
8
+
9
+ - **7 MCP Tools**: chat completion, text generation, embeddings, reranking, function calling, model listing, and model info
10
+ - **20+ Supported Models**: Llama 3.1/3.2, Mistral, Mixtral, Phi-3, Gemma 2, Qwen 2.5, Nemotron, and more
11
+ - **Production-Grade**: automatic retries with exponential backoff, per-minute rate limiting, structured JSON logging
12
+ - **Type-Safe**: full TypeScript, Zod input validation on every tool
13
+ - **Docker-Ready**: multi-stage Dockerfile with non-root user, health checks
14
+ - **Configurable**: all settings via environment variables
15
+ - **Multiple Distribution Formats**: NPM package, Docker image, standalone executable
16
+
17
+ ---
18
+
19
+ ## 📋 Prerequisites
20
+
21
+ - Node.js 18+ (for NPM installation) or Docker (for container deployment)
22
+ - A [NVIDIA NGC API key](https://build.nvidia.com) (`nvapi-...`)
23
+
24
+ ---
25
+
26
+ ## ⚙️ Installation
27
+
28
+ ### Option 1: NPM Global Installation (Recommended)
29
+
30
+ ```bash
31
+ # Install globally
32
+ npm install -g nvidia-nim-mcp
33
+
34
+ # Run directly
35
+ nvidia-nim-mcp
36
+ ```
37
+
38
+ ### Option 2: NPM Local Installation
39
+
40
+ ```bash
41
+ # Initialize your project
42
+ npm init -y
43
+
44
+ # Install locally
45
+ npm install nvidia-nim-mcp
46
+
47
+ # Run with npx
48
+ npx nvidia-nim-mcp
49
+ ```
50
+
51
+ ### Option 3: From Source
52
+
53
+ ```bash
54
+ # Clone / download the project
55
+ cd nvidia-nim-mcp
56
+
57
+ # Install dependencies
58
+ npm install
59
+
60
+ # Build TypeScript
61
+ npm run build
62
+ ```
63
+
64
+ ### Option 4: Docker
65
+
66
+ ```bash
67
+ # Pull from Docker Hub (when published)
68
+ docker pull nvidia-nim-mcp
69
+
70
+ # Or build locally
71
+ docker build -t nvidia-nim-mcp .
72
+ ```
73
+
74
+ ---
75
+
76
+ ## 🔑 Configuration
77
+
78
+ Copy `.env.example` to `.env` and fill in your API key:
79
+
80
+ ```bash
81
+ cp .env.example .env
82
+ ```
83
+
84
+ | Variable | Required | Default | Description |
85
+ |---|---|---|---|
86
+ | `NVIDIA_API_KEY` | ✅ | — | Your NVIDIA NGC API key |
87
+ | `NVIDIA_NIM_BASE_URL` | ❌ | `https://integrate.api.nvidia.com/v1` | Base URL for NIM API |
88
+ | `DEFAULT_MODEL` | ❌ | `z-ai/glm5` | Default model for completions (specialized in software development) |
89
+ | `MAX_REQUESTS_PER_MINUTE` | ❌ | `40` | Rate limit cap (NVIDIA API limit) |
90
+ | `MAX_TOKENS_PER_REQUEST` | ❌ | `4096` | Hard cap on tokens per request |
91
+ | `REQUEST_TIMEOUT_MS` | ❌ | `120000` | Request timeout (ms) |
92
+ | `MAX_RETRIES` | ❌ | `3` | Max retry attempts on failure |
93
+ | `RETRY_DELAY_MS` | ❌ | `1000` | Base delay between retries (ms) |
94
+ | `LOG_LEVEL` | ❌ | `info` | `error\|warn\|info\|debug` |
95
+
96
+ ---
97
+
98
+ ## 🚀 Running
99
+
100
+ ### NPM Global Installation
101
+ ```bash
102
+ # Run the server
103
+ nvidia-nim-mcp
104
+
105
+ # With custom environment variables
106
+ NVIDIA_API_KEY=nvapi-your-key LOG_LEVEL=debug nvidia-nim-mcp
107
+ ```
108
+
109
+ ### NPM Local Installation
110
+ ```bash
111
+ # Run with npx
112
+ npx nvidia-nim-mcp
113
+
114
+ # Or add to package.json scripts
115
+ # "scripts": { "start": "nvidia-nim-mcp" }
116
+ npm start
117
+ ```
118
+
119
+ ### From Source
120
+ ```bash
121
+ # Development mode with auto-reload
122
+ npm run dev
123
+
124
+ # Production mode (compiled)
125
+ npm run build && npm start
126
+ ```
127
+
128
+ ### Docker
129
+ ```bash
130
+ # Run with environment variables
131
+ docker run --rm \
132
+ -e NVIDIA_API_KEY=nvapi-your-key \
133
+ -e LOG_LEVEL=info \
134
+ nvidia-nim-mcp
135
+
136
+ # Run in background with port mapping (if needed)
137
+ docker run -d \
138
+ --name nvidia-nim-mcp \
139
+ -e NVIDIA_API_KEY=nvapi-your-key \
140
+ nvidia-nim-mcp
141
+ ```
142
+
143
+ ### Standalone Executable
144
+ ```bash
145
+ # Make executable (if not already)
146
+ chmod +x dist/index.js
147
+
148
+ # Run directly
149
+ ./dist/index.js
150
+
151
+ # With environment variables
152
+ NVIDIA_API_KEY=nvapi-your-key ./dist/index.js
153
+ ```
154
+
155
+ ---
156
+
157
+ ## 🔧 MCP Client Configuration
158
+
159
+ ### For Global NPM Installation
160
+ ```json
161
+ {
162
+ "mcpServers": {
163
+ "nvidia-nim": {
164
+ "command": "nvidia-nim-mcp",
165
+ "env": {
166
+ "NVIDIA_API_KEY": "nvapi-your-key-here",
167
+ "DEFAULT_MODEL": "z-ai/glm5",
168
+ "LOG_LEVEL": "info"
169
+ }
170
+ }
171
+ }
172
+ }
173
+ ```
174
+
175
+ ### For Local NPM Installation
176
+ ```json
177
+ {
178
+ "mcpServers": {
179
+ "nvidia-nim": {
180
+ "command": "npx",
181
+ "args": ["nvidia-nim-mcp"],
182
+ "env": {
183
+ "NVIDIA_API_KEY": "nvapi-your-key-here",
184
+ "DEFAULT_MODEL": "z-ai/glm5",
185
+ "LOG_LEVEL": "info"
186
+ }
187
+ }
188
+ }
189
+ }
190
+ ```
191
+
192
+ ### For Direct Executable Path
193
+ ```json
194
+ {
195
+ "mcpServers": {
196
+ "nvidia-nim": {
197
+ "command": "node",
198
+ "args": ["/absolute/path/to/nvidia-nim-mcp/dist/index.js"],
199
+ "env": {
200
+ "NVIDIA_API_KEY": "nvapi-your-key-here",
201
+ "DEFAULT_MODEL": "z-ai/glm5",
202
+ "LOG_LEVEL": "info"
203
+ }
204
+ }
205
+ }
206
+ }
207
+ ```
208
+
209
+ ---
210
+
211
+ ## 🛠️ Available Tools
212
+
213
+ ### `chat_completion`
214
+ Multi-turn conversation with any NIM LLM.
215
+
216
+ ```json
217
+ {
218
+ "model": "z-ai/glm5",
219
+ "messages": [
220
+ { "role": "user", "content": "Explain quantum computing" }
221
+ ],
222
+ "temperature": 0.3,
223
+ "max_tokens": 2048
224
+ }
225
+ ```
226
+
227
+ ### `text_generation`
228
+ Single-prompt text generation (simplified interface).
229
+
230
+ ```json
231
+ {
232
+ "prompt": "Write a haiku about machine learning",
233
+ "temperature": 0.5
234
+ }
235
+ ```
236
+
237
+ ### `create_embeddings`
238
+ Convert text(s) to vector embeddings for RAG/search.
239
+
240
+ ```json
241
+ {
242
+ "model": "nvidia/nv-embed-v1",
243
+ "input": ["NVIDIA makes GPUs", "AI runs on GPUs"],
244
+ "truncate": "END"
245
+ }
246
+ ```
247
+
248
+ ### `rerank_passages`
249
+ Rerank passages by relevance to a query.
250
+
251
+ ```json
252
+ {
253
+ "query": "What is CUDA?",
254
+ "passages": ["CUDA is a GPU programming platform", "NIM serves AI models"],
255
+ "top_k": 3
256
+ }
257
+ ```
258
+
259
+ ### `function_calling`
260
+ Use NIM models with tool/function calling.
261
+
262
+ ```json
263
+ {
264
+ "model": "z-ai/glm5",
265
+ "messages": [{ "role": "user", "content": "What's the weather in Paris?" }],
266
+ "tools": [{
267
+ "type": "function",
268
+ "function": {
269
+ "name": "get_weather",
270
+ "description": "Get current weather",
271
+ "parameters": {
272
+ "type": "object",
273
+ "properties": { "city": { "type": "string" } },
274
+ "required": ["city"]
275
+ }
276
+ }
277
+ }]
278
+ }
279
+ ```
280
+
281
+ ### `list_models`
282
+ List available models filtered by category.
283
+
284
+ ```json
285
+ { "category": "embedding" }
286
+ ```
287
+
288
+ ### `get_model_info`
289
+ Get details about a specific model.
290
+
291
+ ```json
292
+ { "model_id": "meta/llama-3.1-405b-instruct" }
293
+ ```
294
+
295
+ ---
296
+
297
+ ## 📦 Supported Models
298
+
299
+ | Category | Models |
300
+ |---|---|
301
+ | **Language** | Llama 3.1 (8B/70B/405B), Mistral Large 2, Mixtral 8x22B/8x7B, Phi-3.5 Mini, Gemma 2 (9B/27B), Qwen 2.5 72B, Nemotron 70B, GLM-4 9B |
302
+ | **Code** | Qwen 2.5 Coder 32B, **GLM-5** (default - specialized in software development & architecture) |
303
+ | **Vision** | Llama 3.2 Vision (11B/90B) |
304
+ | **Embeddings** | NV-Embed v1, NV-EmbedQA E5 v5, BGE-M3 |
305
+ | **Reranking** | NV-RerankQA Mistral 4B v3 |
306
+
307
+ ---
308
+
309
+ ## 🏭 Production Checklist
310
+
311
+ - [x] Environment variable validation on startup
312
+ - [x] Exponential backoff retry (configurable)
313
+ - [x] Per-minute rate limiter
314
+ - [x] Request/response logging with Winston
315
+ - [x] Structured JSON logs in production
316
+ - [x] Zod input validation for all tools
317
+ - [x] Graceful shutdown (SIGINT/SIGTERM)
318
+ - [x] Unhandled exception/rejection handlers
319
+ - [x] Docker multi-stage build (minimal image)
320
+ - [x] Non-root Docker user
321
+ - [x] Token cap enforcement
322
+
323
+ ## 🧪 Testing
324
+
325
+ The project includes a comprehensive test suite with over 60 tests covering:
326
+
327
+ - **Unit Tests**: Configuration, logging, model handling, and tool validation
328
+ - **Integration Tests**: All 7 MCP tools with various input scenarios
329
+ - **Error Handling**: Validation of edge cases and failure modes
330
+ - **Schema Validation**: Zod-based input validation for all tools
331
+
332
+ ### Running Tests
333
+
334
+ ```bash
335
+ # Run all tests
336
+ npm test
337
+
338
+ # Run tests with coverage report
339
+ npm test -- --coverage
340
+
341
+ # Run tests in watch mode
342
+ npm test -- --watch
343
+
344
+ # Run specific test file
345
+ npm test src/handlers.test.ts
346
+ ```
347
+
348
+ **Current Test Status**: ✅ All tests passing (62/62 tests)
349
+
350
+ ## 🛠️ Development
351
+
352
+ ### Building the Project
353
+
354
+ ```bash
355
+ # Install dependencies
356
+ npm install
357
+
358
+ # Compile TypeScript to JavaScript
359
+ npm run build
360
+
361
+ # Clean build artifacts
362
+ npm run clean
363
+
364
+ # Development mode with auto-reload
365
+ npm run dev
366
+ ```
367
+
368
+ ### Code Quality
369
+
370
+ ```bash
371
+ # Run linter
372
+ npm run lint
373
+
374
+ # Run tests
375
+ npm test
376
+
377
+ # Run both linting and tests
378
+ npm run check
379
+ ```
380
+
381
+ ## 🤝 Contributing
382
+
383
+ Contributions are welcome! Here's how you can contribute:
384
+
385
+ 1. **Fork the Repository**
386
+ 2. **Create a Feature Branch**: `git checkout -b feature/your-feature-name`
387
+ 3. **Make Your Changes**: Follow the existing code style and patterns
388
+ 4. **Add Tests**: Ensure new functionality is properly tested
389
+ 5. **Run Checks**: `npm run check` to verify code quality and tests
390
+ 6. **Commit Changes**: Use clear, descriptive commit messages
391
+ 7. **Push to Your Fork**: `git push origin feature/your-feature-name`
392
+ 8. **Open a Pull Request**: Describe your changes and their benefits
393
+
394
+ ### Code Standards
395
+
396
+ - **TypeScript**: Strict type checking enabled
397
+ - **ESLint**: Code formatting and best practices
398
+ - **Zod**: Runtime validation for all external inputs
399
+ - **Testing**: Comprehensive test coverage for new features
400
+ - **Documentation**: Update README.md for user-facing changes
401
+
402
+ ### Development Workflow
403
+
404
+ 1. **Setup**: Follow the installation instructions
405
+ 2. **Development**: Use `npm run dev` for continuous development
406
+ 3. **Testing**: Run `npm test` to verify your changes
407
+ 4. **Building**: Use `npm run build` to compile the project
408
+ 5. **Linting**: Run `npm run lint` to check code quality
409
+
410
+ ## 📦 Packaging & Distribution
411
+
412
+ This project can be distributed and deployed in multiple formats:
413
+
414
+ ### NPM Package
415
+ - Published to npm registry for easy installation
416
+ - Includes compiled JavaScript and TypeScript definitions
417
+ - Global and local installation options
418
+ - Runs as a standard CLI tool
419
+
420
+ ### Docker Image
421
+ - Multi-stage build for minimal image size
422
+ - Runs as non-root user for security
423
+ - Includes health check endpoint
424
+ - Easy deployment to containerized environments
425
+
426
+ ### Standalone Executable
427
+ - Self-contained JavaScript file with shebang
428
+ - Can be run directly on any system with Node.js
429
+ - No installation required beyond Node.js
430
+
431
+ ### Building Packages
432
+ ```bash
433
+ # Build the project
434
+ npm run build
435
+
436
+ # Create NPM package (.tgz)
437
+ npm pack
438
+
439
+ # Build Docker image
440
+ docker build -t nvidia-nim-mcp .
441
+
442
+ # All checks (lint, test, build)
443
+ npm run check && npm run build
444
+ ```
445
+
446
+ ## 📄 License
447
+
448
+ MIT
@@ -0,0 +1,119 @@
1
+ export interface ChatMessage {
2
+ role: "system" | "user" | "assistant";
3
+ content: string | ContentPart[];
4
+ }
5
+ export interface ContentPart {
6
+ type: "text" | "image_url";
7
+ text?: string;
8
+ image_url?: {
9
+ url: string;
10
+ };
11
+ }
12
+ export interface ChatCompletionRequest {
13
+ model: string;
14
+ messages: ChatMessage[];
15
+ temperature?: number;
16
+ top_p?: number;
17
+ max_tokens?: number;
18
+ stream?: boolean;
19
+ stop?: string | string[];
20
+ frequency_penalty?: number;
21
+ presence_penalty?: number;
22
+ seed?: number;
23
+ tools?: Tool[];
24
+ tool_choice?: "auto" | "none" | {
25
+ type: "function";
26
+ function: {
27
+ name: string;
28
+ };
29
+ };
30
+ }
31
+ export interface Tool {
32
+ type: "function";
33
+ function: {
34
+ name: string;
35
+ description: string;
36
+ parameters: Record<string, unknown>;
37
+ };
38
+ }
39
+ export interface ChatCompletionResponse {
40
+ id: string;
41
+ object: string;
42
+ created: number;
43
+ model: string;
44
+ choices: Choice[];
45
+ usage: Usage;
46
+ }
47
+ export interface Choice {
48
+ index: number;
49
+ message: {
50
+ role: string;
51
+ content: string | null;
52
+ tool_calls?: ToolCall[];
53
+ };
54
+ finish_reason: string;
55
+ }
56
+ export interface ToolCall {
57
+ id: string;
58
+ type: "function";
59
+ function: {
60
+ name: string;
61
+ arguments: string;
62
+ };
63
+ }
64
+ export interface Usage {
65
+ prompt_tokens: number;
66
+ completion_tokens: number;
67
+ total_tokens: number;
68
+ }
69
+ export interface EmbeddingRequest {
70
+ model: string;
71
+ input: string | string[];
72
+ encoding_format?: "float" | "base64";
73
+ truncate?: "NONE" | "START" | "END";
74
+ }
75
+ export interface EmbeddingResponse {
76
+ object: string;
77
+ data: Array<{
78
+ object: string;
79
+ embedding: number[];
80
+ index: number;
81
+ }>;
82
+ model: string;
83
+ usage: {
84
+ prompt_tokens: number;
85
+ total_tokens: number;
86
+ };
87
+ }
88
+ export interface RerankRequest {
89
+ model: string;
90
+ query: string;
91
+ passages: Array<{
92
+ text: string;
93
+ }>;
94
+ truncate?: "NONE" | "END";
95
+ }
96
+ export interface RerankResponse {
97
+ rankings: Array<{
98
+ index: number;
99
+ logit: number;
100
+ passage: {
101
+ text: string;
102
+ };
103
+ }>;
104
+ usage: {
105
+ prompt_tokens: number;
106
+ total_tokens: number;
107
+ };
108
+ }
109
+ export declare class NIMClient {
110
+ private readonly http;
111
+ private readonly rateLimiter;
112
+ constructor();
113
+ private normalizeError;
114
+ chatCompletion(request: ChatCompletionRequest): Promise<ChatCompletionResponse>;
115
+ chatCompletionStream(request: ChatCompletionRequest): AsyncGenerator<string>;
116
+ embeddings(request: EmbeddingRequest): Promise<EmbeddingResponse>;
117
+ rerank(request: RerankRequest): Promise<RerankResponse>;
118
+ listModels(): Promise<string[]>;
119
+ }