nvidia-nim-mcp 1.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +182 -35
- package/dist/client.d.ts +57 -3
- package/dist/client.js +135 -56
- package/dist/config.d.ts +8 -3
- package/dist/config.js +16 -5
- package/dist/handlers.d.ts +4 -0
- package/dist/handlers.js +347 -7
- package/dist/logger.js +2 -2
- package/dist/models.d.ts +60 -1
- package/dist/models.js +910 -0
- package/dist/tools.d.ts +506 -61
- package/dist/tools.js +192 -8
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -1,18 +1,20 @@
|
|
|
1
1
|
# NVIDIA NIM MCP Server
|
|
2
2
|
|
|
3
|
-
A production-ready **Model Context Protocol (MCP)** server for consuming **NVIDIA NIM** (NVIDIA Inference Microservices) models. Supports LLMs, embeddings, reranking, function calling, and
|
|
3
|
+
A production-ready **Model Context Protocol (MCP)** server for consuming **NVIDIA NIM** (NVIDIA Inference Microservices) models. Supports 50+ LLMs, multimodal models, image generation, embeddings, reranking, function calling, vision, and code-specialized models with rich metadata for intelligent agent selection.
|
|
4
4
|
|
|
5
5
|
---
|
|
6
6
|
|
|
7
7
|
## 🚀 Features
|
|
8
8
|
|
|
9
|
-
- **
|
|
10
|
-
- **
|
|
9
|
+
- **10 MCP Tools**: chat completion, text generation, embeddings, reranking, function calling, model listing, model info, **image generation**, **image analysis**, **multimodal tasks**, **model comparison**
|
|
10
|
+
- **50+ Supported Models**: Llama 3.1/3.2, Nemotron 3 Ultra (550B), MiniMax M3, Kimi K2.6 (1T), DeepSeek V4 Pro, GLM 5.1, Qwen 3.5 397B, Mistral Large 3 (675B), GPT-OSS 120B, DiffusionGemma, FLUX.1, SDXL, SD3, and more
|
|
11
|
+
- **Rich Model Metadata**: licensing, hardware requirements, benchmarks, image generation specs, reasoning modes, tags for agent selection
|
|
12
|
+
- **Advanced Filtering**: by commercial use, reasoning, vision, function calling, multimodal, context length, tags, hardware
|
|
11
13
|
- **Production-Grade**: automatic retries with exponential backoff, per-minute rate limiting, structured JSON logging
|
|
12
14
|
- **Type-Safe**: full TypeScript, Zod input validation on every tool
|
|
13
15
|
- **Docker-Ready**: multi-stage Dockerfile with non-root user, health checks
|
|
14
16
|
- **Configurable**: all settings via environment variables
|
|
15
|
-
- **
|
|
17
|
+
- **Single Required Env**: Only `NVIDIA_API_KEY` required; all others have sensible defaults
|
|
16
18
|
|
|
17
19
|
---
|
|
18
20
|
|
|
@@ -81,17 +83,22 @@ Copy `.env.example` to `.env` and fill in your API key:
|
|
|
81
83
|
cp .env.example .env
|
|
82
84
|
```
|
|
83
85
|
|
|
86
|
+
**Only `NVIDIA_API_KEY` is required** — all other variables have production-ready defaults:
|
|
87
|
+
|
|
84
88
|
| Variable | Required | Default | Description |
|
|
85
89
|
|---|---|---|---|
|
|
86
90
|
| `NVIDIA_API_KEY` | ✅ | — | Your NVIDIA NGC API key |
|
|
87
91
|
| `NVIDIA_NIM_BASE_URL` | ❌ | `https://integrate.api.nvidia.com/v1` | Base URL for NIM API |
|
|
88
|
-
| `DEFAULT_MODEL` | ❌ | `
|
|
92
|
+
| `DEFAULT_MODEL` | ❌ | `black-forest-labs/flux.1-dev` | Default model (best image generation) |
|
|
89
93
|
| `MAX_REQUESTS_PER_MINUTE` | ❌ | `40` | Rate limit cap (NVIDIA API limit) |
|
|
90
94
|
| `MAX_TOKENS_PER_REQUEST` | ❌ | `4096` | Hard cap on tokens per request |
|
|
91
95
|
| `REQUEST_TIMEOUT_MS` | ❌ | `120000` | Request timeout (ms) |
|
|
92
96
|
| `MAX_RETRIES` | ❌ | `3` | Max retry attempts on failure |
|
|
93
97
|
| `RETRY_DELAY_MS` | ❌ | `1000` | Base delay between retries (ms) |
|
|
94
98
|
| `LOG_LEVEL` | ❌ | `info` | `error\|warn\|info\|debug` |
|
|
99
|
+
| `ENABLE_IMAGE_GENERATION` | ❌ | `true` | Enable image generation tools |
|
|
100
|
+
| `ENABLE_VISION` | ❌ | `true` | Enable vision/multimodal tools |
|
|
101
|
+
| `ENABLE_MULTIMODAL` | ❌ | `true` | Enable multimodal task tools |
|
|
95
102
|
|
|
96
103
|
---
|
|
97
104
|
|
|
@@ -164,7 +171,6 @@ NVIDIA_API_KEY=nvapi-your-key ./dist/index.js
|
|
|
164
171
|
"command": "nvidia-nim-mcp",
|
|
165
172
|
"env": {
|
|
166
173
|
"NVIDIA_API_KEY": "nvapi-your-key-here",
|
|
167
|
-
"DEFAULT_MODEL": "z-ai/glm5",
|
|
168
174
|
"LOG_LEVEL": "info"
|
|
169
175
|
}
|
|
170
176
|
}
|
|
@@ -181,7 +187,6 @@ NVIDIA_API_KEY=nvapi-your-key ./dist/index.js
|
|
|
181
187
|
"args": ["nvidia-nim-mcp"],
|
|
182
188
|
"env": {
|
|
183
189
|
"NVIDIA_API_KEY": "nvapi-your-key-here",
|
|
184
|
-
"DEFAULT_MODEL": "z-ai/glm5",
|
|
185
190
|
"LOG_LEVEL": "info"
|
|
186
191
|
}
|
|
187
192
|
}
|
|
@@ -198,7 +203,6 @@ NVIDIA_API_KEY=nvapi-your-key ./dist/index.js
|
|
|
198
203
|
"args": ["/absolute/path/to/nvidia-nim-mcp/dist/index.js"],
|
|
199
204
|
"env": {
|
|
200
205
|
"NVIDIA_API_KEY": "nvapi-your-key-here",
|
|
201
|
-
"DEFAULT_MODEL": "z-ai/glm5",
|
|
202
206
|
"LOG_LEVEL": "info"
|
|
203
207
|
}
|
|
204
208
|
}
|
|
@@ -215,12 +219,12 @@ Multi-turn conversation with any NIM LLM.
|
|
|
215
219
|
|
|
216
220
|
```json
|
|
217
221
|
{
|
|
218
|
-
"model": "
|
|
222
|
+
"model": "nvidia/nemotron-3-ultra-550b-a55b",
|
|
219
223
|
"messages": [
|
|
220
224
|
{ "role": "user", "content": "Explain quantum computing" }
|
|
221
225
|
],
|
|
222
226
|
"temperature": 0.3,
|
|
223
|
-
"max_tokens":
|
|
227
|
+
"max_tokens": 4096
|
|
224
228
|
}
|
|
225
229
|
```
|
|
226
230
|
|
|
@@ -230,7 +234,8 @@ Single-prompt text generation (simplified interface).
|
|
|
230
234
|
```json
|
|
231
235
|
{
|
|
232
236
|
"prompt": "Write a haiku about machine learning",
|
|
233
|
-
"temperature": 0.5
|
|
237
|
+
"temperature": 0.5,
|
|
238
|
+
"max_tokens": 512
|
|
234
239
|
}
|
|
235
240
|
```
|
|
236
241
|
|
|
@@ -261,7 +266,7 @@ Use NIM models with tool/function calling.
|
|
|
261
266
|
|
|
262
267
|
```json
|
|
263
268
|
{
|
|
264
|
-
"model": "z-ai/
|
|
269
|
+
"model": "z-ai/glm-5.1",
|
|
265
270
|
"messages": [{ "role": "user", "content": "What's the weather in Paris?" }],
|
|
266
271
|
"tools": [{
|
|
267
272
|
"type": "function",
|
|
@@ -278,31 +283,162 @@ Use NIM models with tool/function calling.
|
|
|
278
283
|
}
|
|
279
284
|
```
|
|
280
285
|
|
|
286
|
+
### `generate_image`
|
|
287
|
+
Generate images from text prompts using FLUX.1, SDXL, SD3, DiffusionGemma.
|
|
288
|
+
|
|
289
|
+
```json
|
|
290
|
+
{
|
|
291
|
+
"model": "black-forest-labs/flux.1-dev",
|
|
292
|
+
"prompt": "A photorealistic mountain landscape at sunset, 8K",
|
|
293
|
+
"width": 1024,
|
|
294
|
+
"height": 1024,
|
|
295
|
+
"steps": 30,
|
|
296
|
+
"cfg_scale": 3.5,
|
|
297
|
+
"sampler": "euler_a",
|
|
298
|
+
"scheduler": "simple"
|
|
299
|
+
}
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
### `analyze_image`
|
|
303
|
+
Analyze and describe images using vision/multimodal models.
|
|
304
|
+
|
|
305
|
+
```json
|
|
306
|
+
{
|
|
307
|
+
"model": "moonshotai/kimi-k2.6",
|
|
308
|
+
"image_url": "https://example.com/image.jpg",
|
|
309
|
+
"prompt": "Describe this image in detail",
|
|
310
|
+
"detail": "high"
|
|
311
|
+
}
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
### `multimodal_task`
|
|
315
|
+
Perform multimodal tasks combining text and images.
|
|
316
|
+
|
|
317
|
+
```json
|
|
318
|
+
{
|
|
319
|
+
"model": "minimaxai/minimax-m3",
|
|
320
|
+
"messages": [
|
|
321
|
+
{
|
|
322
|
+
"role": "user",
|
|
323
|
+
"content": [
|
|
324
|
+
{ "type": "text", "text": "Analyze this chart" },
|
|
325
|
+
{ "type": "image_url", "image_url": { "url": "https://example.com/chart.png" } }
|
|
326
|
+
]
|
|
327
|
+
}
|
|
328
|
+
],
|
|
329
|
+
"max_tokens": 2048
|
|
330
|
+
}
|
|
331
|
+
```
|
|
332
|
+
|
|
281
333
|
### `list_models`
|
|
282
|
-
List available models
|
|
334
|
+
List available models with rich metadata and advanced filtering.
|
|
283
335
|
|
|
284
336
|
```json
|
|
285
|
-
{
|
|
337
|
+
{
|
|
338
|
+
"category": "code",
|
|
339
|
+
"commercial_use": true,
|
|
340
|
+
"supports_reasoning": true,
|
|
341
|
+
"tags": ["coding", "agentic"],
|
|
342
|
+
"include_details": true
|
|
343
|
+
}
|
|
286
344
|
```
|
|
287
345
|
|
|
346
|
+
**Filter Options:**
|
|
347
|
+
- `category`: `language`, `embedding`, `reranking`, `vision`, `code`, `multimodal`, `image_generation`, `all`
|
|
348
|
+
- `commercial_use`: Filter by commercial license
|
|
349
|
+
- `supports_reasoning`: Filter by reasoning capability
|
|
350
|
+
- `supports_vision`: Filter by vision capability
|
|
351
|
+
- `supports_function_calling`: Filter by function calling
|
|
352
|
+
- `supports_multimodal`: Filter by multimodal input
|
|
353
|
+
- `min_context_length`: Minimum context window (tokens)
|
|
354
|
+
- `tags`: Filter by use case tags
|
|
355
|
+
- `hardware`: Filter by GPU type (Hopper, Blackwell, Ampere)
|
|
356
|
+
- `include_details`: Include full metadata (benchmarks, image specs, etc.)
|
|
357
|
+
|
|
288
358
|
### `get_model_info`
|
|
289
|
-
Get
|
|
359
|
+
Get complete metadata for a specific model.
|
|
290
360
|
|
|
291
361
|
```json
|
|
292
|
-
{ "model_id": "
|
|
362
|
+
{ "model_id": "nvidia/nemotron-3-ultra-550b-a55b" }
|
|
293
363
|
```
|
|
294
364
|
|
|
295
|
-
|
|
365
|
+
**Returns:** licensing, hardware requirements, benchmarks, image gen specs, reasoning modes, tags, supported languages, etc.
|
|
366
|
+
|
|
367
|
+
### `compare_models`
|
|
368
|
+
Compare 2-5 models side-by-side across all decision factors.
|
|
369
|
+
|
|
370
|
+
```json
|
|
371
|
+
{
|
|
372
|
+
"model_ids": [
|
|
373
|
+
"nvidia/nemotron-3-ultra-550b-a55b",
|
|
374
|
+
"deepseek-ai/deepseek-v4-pro",
|
|
375
|
+
"moonshotai/kimi-k2.6",
|
|
376
|
+
"z-ai/glm-5.1"
|
|
377
|
+
]
|
|
378
|
+
}
|
|
379
|
+
```
|
|
380
|
+
|
|
381
|
+
**Returns:** Structured comparison table with licensing, hardware, benchmarks, capabilities, tags, image generation specs, etc.
|
|
296
382
|
|
|
297
|
-
|
|
383
|
+
---
|
|
298
384
|
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
|
304
|
-
|
|
305
|
-
|
|
|
385
|
+
## 📦 Supported Models (50+)
|
|
386
|
+
|
|
387
|
+
### Language Models (Frontier Reasoning)
|
|
388
|
+
|
|
389
|
+
| Model | Parameters | Context | License | Commercial | Best For |
|
|
390
|
+
|---|---|---|---|---|---|
|
|
391
|
+
| `nvidia/nemotron-3-ultra-550b-a55b` | 550B (55B active) | 131K | OpenMDW-1.1 | ✅ | Frontier reasoning, coding, agentic, 1M context, multilingual |
|
|
392
|
+
| `nvidia/nemotron-3-ultra-550b-a55b-instruct` | 550B | 131K | OpenMDW-1.1 | ✅ | Instruction-tuned variant |
|
|
393
|
+
| `minimaxai/minimax-m3` | 428B (22B active) | 1M | Non-Commercial | ❌ | Multimodal, video (30min), 8hr coding, agentic |
|
|
394
|
+
| `moonshotai/kimi-k2.6` | 1T (32B active) | 256K | Modified MIT | ✅ | Long-horizon coding, 300 agents, vision, agentic |
|
|
395
|
+
| `deepseek-ai/deepseek-v4-pro` | 1.6T (49B active) | 1M | MIT | ✅ | Advanced coding, math, reasoning, 3 reasoning modes |
|
|
396
|
+
| `z-ai/glm-5.1` | 754B (DSA) | 131K | MIT | ✅ | Software engineering, agentic, SWE-Bench 58.4% |
|
|
397
|
+
| `qwen/qwen3.5-397b-a17b` | 397B (MoE) | 131K | Research | ❌ | Large-scale multilingual, multimodal |
|
|
398
|
+
| `mistralai/mistral-large-3-675b-instruct-2512` | 675B | 131K | Research | ❌ | Frontier reasoning, multimodal |
|
|
399
|
+
| `openai/gpt-oss-120b` | 120B | 131K | Apache 2.0 | ✅ | Open-weight, research, fine-tuning |
|
|
400
|
+
| `google/diffusiongemma-26b-a4b-it` | 25.2B (3.8B active) | 256K | Apache 2.0 | ✅ | Diffusion text gen, 35+ langs, fast, multimodal |
|
|
401
|
+
|
|
402
|
+
### Code-Specialized Models
|
|
403
|
+
|
|
404
|
+
| Model | Parameters | Context | License | Commercial |
|
|
405
|
+
|---|---|---|---|---|
|
|
406
|
+
| `z-ai/glm-5.1` | 754B | 131K | MIT | ✅ |
|
|
407
|
+
| `z-ai/glm5` | - | 128K | Z.ai | ✅ |
|
|
408
|
+
| `qwen/qwen2.5-coder-32b-instruct` | 32B | 131K | Research | ❌ |
|
|
409
|
+
|
|
410
|
+
### Multimodal / Vision Models
|
|
411
|
+
|
|
412
|
+
| Model | Parameters | Context | Vision | Video | License | Commercial |
|
|
413
|
+
|---|---|---|---|---|---|---|
|
|
414
|
+
| `meta/llama-3.2-90b-vision-instruct` | 90B | 128K | ✅ | ❌ | Llama 3.2 | ✅ |
|
|
415
|
+
| `meta/llama-3.2-11b-vision-instruct` | 11B | 128K | ✅ | ❌ | Llama 3.2 | ✅ |
|
|
416
|
+
| `nvidia/neva-22b` | 22B | 4K | ✅ | ❌ | NVIDIA | ✅ |
|
|
417
|
+
| `microsoft/phi-3.5-vision-instruct` | - | 128K | ✅ | ❌ | MIT | ✅ |
|
|
418
|
+
| `minimaxai/minimax-m3` | 428B | 1M | ✅ | ✅ (30min) | Non-Commercial | ❌ |
|
|
419
|
+
| `moonshotai/kimi-k2.6` | 1T | 256K | ✅ | ✅ | Modified MIT | ✅ |
|
|
420
|
+
|
|
421
|
+
### Image Generation Models
|
|
422
|
+
|
|
423
|
+
| Model | Architecture | Resolutions | Aspect Ratios | Max Images | ControlNet | License | Commercial |
|
|
424
|
+
|---|---|---|---|---|---|---|---|
|
|
425
|
+
| `black-forest-labs/flux.1-dev` | Diffusion Transformer | 1024², 1152×896, 1344×768, 21:9 | 1:1, 16:9, 9:16, 4:3, 3:4, 21:9 | 1 | Canny, Depth | Apache 2.0* | ❌* |
|
|
426
|
+
| `black-forest-labs/flux.1-kontext-dev` | Diffusion Transformer | Same | Same | 1 | - | Apache 2.0* | ❌* |
|
|
427
|
+
| `nvidia/stable-diffusion-xl` | UNet + Attention | 1024², 1152×896, 1216×832 | 1:1, 16:9, 9:16, 4:3, 3:4 | 4 | - | SDXL 1.0 | ✅** |
|
|
428
|
+
| `stabilityai/sd-3-medium` | SD3 | Same | Same | 2 | - | Stability AI | ✅** |
|
|
429
|
+
| `nvidia/sdxl-turbo` | ADD | 512², 1024² | 1:1 | 4 | - | SDXL 1.0 | ✅** |
|
|
430
|
+
|
|
431
|
+
*\*Non-commercial default; commercial via contact*
|
|
432
|
+
**\*\*Requires Stability AI membership**
|
|
433
|
+
|
|
434
|
+
### Embeddings & Reranking
|
|
435
|
+
|
|
436
|
+
| Model | Type | Context | Dimensions | License | Commercial |
|
|
437
|
+
|---|---|---|---|---|---|
|
|
438
|
+
| `nvidia/nv-embedqa-e5-v5` | Embedding | 512 | - | NVIDIA | ✅ |
|
|
439
|
+
| `nvidia/nv-embed-v1` | Embedding | 4096 | - | NVIDIA | ✅ |
|
|
440
|
+
| `baai/bge-m3` | Embedding | 8192 | - | MIT | ✅ |
|
|
441
|
+
| `nvidia/nv-rerankqa-mistral-4b-v3` | Reranking | 4096 | - | NVIDIA | ✅ |
|
|
306
442
|
|
|
307
443
|
---
|
|
308
444
|
|
|
@@ -319,13 +455,17 @@ Get details about a specific model.
|
|
|
319
455
|
- [x] Docker multi-stage build (minimal image)
|
|
320
456
|
- [x] Non-root Docker user
|
|
321
457
|
- [x] Token cap enforcement
|
|
458
|
+
- [x] Single required env var (`NVIDIA_API_KEY`)
|
|
459
|
+
- [x] Feature flags for optional capabilities
|
|
460
|
+
|
|
461
|
+
---
|
|
322
462
|
|
|
323
463
|
## 🧪 Testing
|
|
324
464
|
|
|
325
|
-
The project includes a comprehensive test suite
|
|
465
|
+
The project includes a comprehensive test suite:
|
|
326
466
|
|
|
327
|
-
- **Unit Tests**: Configuration, logging, model handling,
|
|
328
|
-
- **Integration Tests**: All
|
|
467
|
+
- **Unit Tests**: Configuration, logging, model handling, tool validation
|
|
468
|
+
- **Integration Tests**: All 10 MCP tools with various input scenarios
|
|
329
469
|
- **Error Handling**: Validation of edge cases and failure modes
|
|
330
470
|
- **Schema Validation**: Zod-based input validation for all tools
|
|
331
471
|
|
|
@@ -345,7 +485,9 @@ npm test -- --watch
|
|
|
345
485
|
npm test src/handlers.test.ts
|
|
346
486
|
```
|
|
347
487
|
|
|
348
|
-
**Current Test Status**: ✅ All tests passing (
|
|
488
|
+
**Current Test Status**: ✅ All tests passing (96 tests)
|
|
489
|
+
|
|
490
|
+
---
|
|
349
491
|
|
|
350
492
|
## 🛠️ Development
|
|
351
493
|
|
|
@@ -378,9 +520,11 @@ npm test
|
|
|
378
520
|
npm run check
|
|
379
521
|
```
|
|
380
522
|
|
|
523
|
+
---
|
|
524
|
+
|
|
381
525
|
## 🤝 Contributing
|
|
382
526
|
|
|
383
|
-
Contributions are welcome!
|
|
527
|
+
Contributions are welcome!
|
|
384
528
|
|
|
385
529
|
1. **Fork the Repository**
|
|
386
530
|
2. **Create a Feature Branch**: `git checkout -b feature/your-feature-name`
|
|
@@ -388,8 +532,8 @@ Contributions are welcome! Here's how you can contribute:
|
|
|
388
532
|
4. **Add Tests**: Ensure new functionality is properly tested
|
|
389
533
|
5. **Run Checks**: `npm run check` to verify code quality and tests
|
|
390
534
|
6. **Commit Changes**: Use clear, descriptive commit messages
|
|
391
|
-
|
|
392
|
-
|
|
535
|
+
6. **Push to Your Fork**: `git push origin feature/your-feature-name`
|
|
536
|
+
7. **Open a Pull Request**: Describe your changes and their benefits
|
|
393
537
|
|
|
394
538
|
### Code Standards
|
|
395
539
|
|
|
@@ -407,9 +551,9 @@ Contributions are welcome! Here's how you can contribute:
|
|
|
407
551
|
4. **Building**: Use `npm run build` to compile the project
|
|
408
552
|
5. **Linting**: Run `npm run lint` to check code quality
|
|
409
553
|
|
|
410
|
-
|
|
554
|
+
---
|
|
411
555
|
|
|
412
|
-
|
|
556
|
+
## 📦 Packaging & Distribution
|
|
413
557
|
|
|
414
558
|
### NPM Package
|
|
415
559
|
- Published to npm registry for easy installation
|
|
@@ -429,6 +573,7 @@ This project can be distributed and deployed in multiple formats:
|
|
|
429
573
|
- No installation required beyond Node.js
|
|
430
574
|
|
|
431
575
|
### Building Packages
|
|
576
|
+
|
|
432
577
|
```bash
|
|
433
578
|
# Build the project
|
|
434
579
|
npm run build
|
|
@@ -443,6 +588,8 @@ docker build -t nvidia-nim-mcp .
|
|
|
443
588
|
npm run check && npm run build
|
|
444
589
|
```
|
|
445
590
|
|
|
591
|
+
---
|
|
592
|
+
|
|
446
593
|
## 📄 License
|
|
447
594
|
|
|
448
|
-
MIT
|
|
595
|
+
MIT
|
package/dist/client.d.ts
CHANGED
|
@@ -7,6 +7,7 @@ export interface ContentPart {
|
|
|
7
7
|
text?: string;
|
|
8
8
|
image_url?: {
|
|
9
9
|
url: string;
|
|
10
|
+
detail?: "low" | "high" | "auto";
|
|
10
11
|
};
|
|
11
12
|
}
|
|
12
13
|
export interface ChatCompletionRequest {
|
|
@@ -27,6 +28,9 @@ export interface ChatCompletionRequest {
|
|
|
27
28
|
name: string;
|
|
28
29
|
};
|
|
29
30
|
};
|
|
31
|
+
response_format?: {
|
|
32
|
+
type: "text" | "json_object";
|
|
33
|
+
};
|
|
30
34
|
}
|
|
31
35
|
export interface Tool {
|
|
32
36
|
type: "function";
|
|
@@ -106,14 +110,64 @@ export interface RerankResponse {
|
|
|
106
110
|
total_tokens: number;
|
|
107
111
|
};
|
|
108
112
|
}
|
|
113
|
+
export interface ImageGenerationRequest {
|
|
114
|
+
model: string;
|
|
115
|
+
prompt: string;
|
|
116
|
+
negative_prompt?: string;
|
|
117
|
+
width?: number;
|
|
118
|
+
height?: number;
|
|
119
|
+
num_images?: number;
|
|
120
|
+
steps?: number;
|
|
121
|
+
cfg_scale?: number;
|
|
122
|
+
seed?: number;
|
|
123
|
+
sampler?: string;
|
|
124
|
+
scheduler?: string;
|
|
125
|
+
response_format?: "url" | "b64_json";
|
|
126
|
+
}
|
|
127
|
+
export interface ImageGenerationResponse {
|
|
128
|
+
created: number;
|
|
129
|
+
model?: string;
|
|
130
|
+
data: Array<{
|
|
131
|
+
url?: string;
|
|
132
|
+
b64_json?: string;
|
|
133
|
+
revised_prompt?: string;
|
|
134
|
+
}>;
|
|
135
|
+
usage?: {
|
|
136
|
+
total_images: number;
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
export interface ImageAnalysisRequest {
|
|
140
|
+
model: string;
|
|
141
|
+
messages: ChatMessage[];
|
|
142
|
+
temperature?: number;
|
|
143
|
+
top_p?: number;
|
|
144
|
+
max_tokens?: number;
|
|
145
|
+
stream?: boolean;
|
|
146
|
+
}
|
|
109
147
|
export declare class NIMClient {
|
|
110
|
-
private readonly
|
|
111
|
-
private readonly rateLimiter;
|
|
148
|
+
private readonly httpClient;
|
|
112
149
|
constructor();
|
|
150
|
+
private createHttpClient;
|
|
113
151
|
private normalizeError;
|
|
114
152
|
chatCompletion(request: ChatCompletionRequest): Promise<ChatCompletionResponse>;
|
|
115
|
-
chatCompletionStream(request: ChatCompletionRequest): AsyncGenerator<string>;
|
|
116
153
|
embeddings(request: EmbeddingRequest): Promise<EmbeddingResponse>;
|
|
117
154
|
rerank(request: RerankRequest): Promise<RerankResponse>;
|
|
155
|
+
generateImage(request: ImageGenerationRequest): Promise<ImageGenerationResponse>;
|
|
156
|
+
generateImageFluxSchnell(request: {
|
|
157
|
+
prompt: string;
|
|
158
|
+
width?: number;
|
|
159
|
+
height?: number;
|
|
160
|
+
seed?: number;
|
|
161
|
+
steps?: number;
|
|
162
|
+
}): Promise<ImageGenerationResponse>;
|
|
163
|
+
generateImageFluxKontext(request: {
|
|
164
|
+
prompt: string;
|
|
165
|
+
image: string;
|
|
166
|
+
aspect_ratio?: string;
|
|
167
|
+
steps?: number;
|
|
168
|
+
cfg_scale?: number;
|
|
169
|
+
seed?: number;
|
|
170
|
+
}): Promise<ImageGenerationResponse>;
|
|
171
|
+
analyzeImage(request: ImageAnalysisRequest): Promise<ChatCompletionResponse>;
|
|
118
172
|
listModels(): Promise<string[]>;
|
|
119
173
|
}
|