pi-nvidia-nim 1.1.18 → 1.1.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -14
- package/index.ts +241 -58
- package/package.json +3 -2
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# pi-nvidia-nim
|
|
2
2
|
|
|
3
|
-
NVIDIA NIM API provider extension for [pi coding agent](https://github.com/badlogic/pi-mono) - access 100+ models from [build.nvidia.com](https://build.nvidia.com) including DeepSeek V3.2, Kimi K2.
|
|
3
|
+
NVIDIA NIM API provider extension for [pi coding agent](https://github.com/badlogic/pi-mono) - access 100+ models from [build.nvidia.com](https://build.nvidia.com) including DeepSeek V4 Flash/Pro, DeepSeek V3.2, Kimi K2.6, MiniMax M2.1, GLM-5, GLM-4.7, Qwen3, Llama 4, and many more.
|
|
4
4
|
|
|
5
5
|
https://github.com/user-attachments/assets/f44773e4-9bf8-4bb5-a9c0-d5938030701c
|
|
6
6
|
|
|
@@ -17,10 +17,14 @@ https://github.com/user-attachments/assets/f44773e4-9bf8-4bb5-a9c0-d5938030701c
|
|
|
17
17
|
### 2. Set Your API Key
|
|
18
18
|
|
|
19
19
|
```bash
|
|
20
|
+
# Preferred by this extension
|
|
20
21
|
export NVIDIA_NIM_API_KEY=nvapi-your-key-here
|
|
22
|
+
|
|
23
|
+
# Also supported, matching NVIDIA's website examples
|
|
24
|
+
export NVIDIA_API_KEY=nvapi-your-key-here
|
|
21
25
|
```
|
|
22
26
|
|
|
23
|
-
Add
|
|
27
|
+
Add one of these to your `~/.bashrc`, `~/.zshrc`, or shell profile to persist it.
|
|
24
28
|
|
|
25
29
|
### 3. Install the Extension
|
|
26
30
|
|
|
@@ -53,10 +57,10 @@ Once loaded, NVIDIA NIM models appear in the `/model` selector under the `nvidia
|
|
|
53
57
|
|
|
54
58
|
```bash
|
|
55
59
|
# Use a specific NIM model directly
|
|
56
|
-
pi --provider nvidia-nim --model "deepseek-ai/deepseek-
|
|
60
|
+
pi --provider nvidia-nim --model "deepseek-ai/deepseek-v4-flash"
|
|
57
61
|
|
|
58
62
|
# With thinking enabled
|
|
59
|
-
pi --provider nvidia-nim --model "deepseek-ai/deepseek-
|
|
63
|
+
pi --provider nvidia-nim --model "deepseek-ai/deepseek-v4-flash" --thinking high
|
|
60
64
|
|
|
61
65
|
# Limit model cycling to NIM models
|
|
62
66
|
pi --models "nvidia-nim/*"
|
|
@@ -70,11 +74,12 @@ NVIDIA NIM models use a non-standard `chat_template_kwargs` parameter to enable
|
|
|
70
74
|
|
|
71
75
|
When you change the thinking level in pi (`Shift+Tab` to cycle), the extension:
|
|
72
76
|
|
|
73
|
-
1. **Maps
|
|
77
|
+
1. **Maps thinking levels** to values each NIM model accepts. For DeepSeek V4, `xhigh` maps to `max`; lower enabled levels use `high`.
|
|
74
78
|
2. **Injects `chat_template_kwargs`** per model to actually enable thinking:
|
|
79
|
+
- DeepSeek V4: `{ thinking: true, reasoning_effort: "high" | "max" }`
|
|
75
80
|
- DeepSeek V3.x, R1 distills: `{ thinking: true }`
|
|
76
81
|
- GLM-5, GLM-4.7: `{ enable_thinking: true, clear_thinking: false }`
|
|
77
|
-
- Kimi K2.
|
|
82
|
+
- Kimi K2.6, K2-thinking: `{ thinking: true }`
|
|
78
83
|
- Qwen3, QwQ: `{ enable_thinking: true }`
|
|
79
84
|
3. **Explicitly disables thinking** when the level is "off" for models that think by default (e.g., GLM-5, GLM-4.7).
|
|
80
85
|
4. **Uses `system` role** instead of `developer` for all NIM models - the `developer` role combined with `chat_template_kwargs` causes 500 errors on NIM.
|
|
@@ -84,22 +89,25 @@ When you change the thinking level in pi (`Shift+Tab` to cycle), the extension:
|
|
|
84
89
|
| pi Level | NIM Mapping | Effect |
|
|
85
90
|
|----------|-------------|--------|
|
|
86
91
|
| off | No kwargs (or explicit disable) | No reasoning output |
|
|
87
|
-
| minimal |
|
|
88
|
-
| low | low | Thinking enabled |
|
|
89
|
-
| medium | medium | Thinking enabled |
|
|
92
|
+
| minimal | low, or high for DeepSeek V4 | Thinking enabled |
|
|
93
|
+
| low | low, or high for DeepSeek V4 | Thinking enabled |
|
|
94
|
+
| medium | medium, or high for DeepSeek V4 | Thinking enabled |
|
|
90
95
|
| high | high | Thinking enabled |
|
|
96
|
+
| xhigh | high, or max for DeepSeek V4 | Maximum supported thinking |
|
|
91
97
|
|
|
92
98
|
## Available Models
|
|
93
99
|
|
|
94
|
-
The extension ships with curated metadata for
|
|
100
|
+
The extension ships with curated metadata for 42 featured models. At startup, it also queries the NVIDIA NIM API to discover additional models automatically.
|
|
95
101
|
|
|
96
102
|
### Featured Models
|
|
97
103
|
|
|
98
104
|
| Model | Reasoning | Vision | Context |
|
|
99
105
|
|-------|-----------|--------|---------|
|
|
106
|
+
| `deepseek-ai/deepseek-v4-flash` | ✅ | | 1M |
|
|
107
|
+
| `deepseek-ai/deepseek-v4-pro` | ✅ | | 1M |
|
|
100
108
|
| `deepseek-ai/deepseek-v3.2` | ✅ | | 128K |
|
|
101
109
|
| `deepseek-ai/deepseek-v3.1` | ✅ | | 128K |
|
|
102
|
-
| `moonshotai/kimi-k2.
|
|
110
|
+
| `moonshotai/kimi-k2.6` | ✅ | | 256K |
|
|
103
111
|
| `moonshotai/kimi-k2-thinking` | ✅ | | 128K |
|
|
104
112
|
| `minimaxai/minimax-m2.1` | | | 1M |
|
|
105
113
|
| `z-ai/glm5` | ✅ | | 128K |
|
|
@@ -121,7 +129,7 @@ The extension ships with curated metadata for 39 featured models. At startup, it
|
|
|
121
129
|
|
|
122
130
|
### Tool Calling
|
|
123
131
|
|
|
124
|
-
All major models support OpenAI-compatible tool calling. Tested and confirmed working with DeepSeek V3.2, GLM-5, GLM-4.7, Qwen3, Kimi K2.
|
|
132
|
+
All major models support OpenAI-compatible tool calling. Tested and confirmed working with DeepSeek V4/V3.2, GLM-5, GLM-4.7, Qwen3, Kimi K2.6, and others.
|
|
125
133
|
|
|
126
134
|
## How It Works
|
|
127
135
|
|
|
@@ -130,13 +138,13 @@ This extension uses `pi.registerProvider()` to register NVIDIA NIM as a custom p
|
|
|
130
138
|
The custom streamer:
|
|
131
139
|
1. Intercepts the request payload via `onPayload` callback
|
|
132
140
|
2. Injects `chat_template_kwargs` for models that need it to enable thinking
|
|
133
|
-
3. Maps unsupported thinking levels (`minimal` → `low`)
|
|
141
|
+
3. Maps unsupported thinking levels to NIM-compatible values (`minimal` → `low`; `xhigh` → `high` or DeepSeek V4 `max`)
|
|
134
142
|
4. Suppresses `reasoning_effort` for models that don't respond to it (e.g., DeepSeek without kwargs)
|
|
135
143
|
5. Uses the standard OpenAI SSE streaming format - pi already parses `reasoning_content` and `reasoning` fields from streaming deltas
|
|
136
144
|
|
|
137
145
|
## Configuration
|
|
138
146
|
|
|
139
|
-
The only configuration needed is the `NVIDIA_NIM_API_KEY` environment variable. All models on NVIDIA NIM are free during the preview period (with rate limits).
|
|
147
|
+
The only configuration needed is either the `NVIDIA_NIM_API_KEY` or `NVIDIA_API_KEY` environment variable. All models on NVIDIA NIM are free during the preview period (with rate limits).
|
|
140
148
|
|
|
141
149
|
## Notes
|
|
142
150
|
|
package/index.ts
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
*
|
|
7
7
|
* Setup:
|
|
8
8
|
* 1. Get an API key from https://build.nvidia.com
|
|
9
|
-
* 2. Export it: export NVIDIA_NIM_API_KEY=nvapi-...
|
|
9
|
+
* 2. Export it: export NVIDIA_NIM_API_KEY=nvapi-... (or NVIDIA_API_KEY=nvapi-...)
|
|
10
10
|
* 3. Load the extension:
|
|
11
11
|
* pi -e ./path/to/pi-nvidia-nim
|
|
12
12
|
* # or install as a package:
|
|
@@ -22,17 +22,20 @@
|
|
|
22
22
|
* parameters:
|
|
23
23
|
*
|
|
24
24
|
* - DeepSeek V3.x: `chat_template_kwargs: { thinking: true }`
|
|
25
|
+
* - DeepSeek V4: `chat_template_kwargs: { thinking: true, reasoning_effort: "high" | "max" }`
|
|
25
26
|
* - GLM-5/4.7: `chat_template_kwargs: { enable_thinking: true, clear_thinking: false }`
|
|
26
27
|
* - Kimi K2.5: `chat_template_kwargs: { thinking: true }` (also accepts reasoning_effort)
|
|
27
28
|
* - Qwen3: `chat_template_kwargs: { enable_thinking: true }`
|
|
28
29
|
*
|
|
29
|
-
* NIM only accepts `reasoning_effort` values
|
|
30
|
-
*
|
|
30
|
+
* NIM only accepts selected `reasoning_effort` values. The extension maps pi's
|
|
31
|
+
* provider-agnostic levels to the values each NIM model accepts.
|
|
31
32
|
*
|
|
32
33
|
* Some models (e.g., GLM-5, GLM-4.7) always produce reasoning output regardless of
|
|
33
34
|
* thinking settings.
|
|
34
35
|
*/
|
|
35
36
|
|
|
37
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
38
|
+
import { join } from "node:path";
|
|
36
39
|
import type {
|
|
37
40
|
Api,
|
|
38
41
|
AssistantMessageEventStream,
|
|
@@ -41,7 +44,7 @@ import type {
|
|
|
41
44
|
SimpleStreamOptions,
|
|
42
45
|
} from "@mariozechner/pi-ai";
|
|
43
46
|
import { streamSimpleOpenAICompletions } from "@mariozechner/pi-ai";
|
|
44
|
-
import type
|
|
47
|
+
import { getAgentDir, type ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
45
48
|
|
|
46
49
|
// =============================================================================
|
|
47
50
|
// Constants
|
|
@@ -49,6 +52,8 @@ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
|
49
52
|
|
|
50
53
|
const NVIDIA_NIM_BASE_URL = "https://integrate.api.nvidia.com/v1";
|
|
51
54
|
const NVIDIA_NIM_API_KEY_ENV = "NVIDIA_NIM_API_KEY";
|
|
55
|
+
const NVIDIA_API_KEY_ENV = "NVIDIA_API_KEY";
|
|
56
|
+
const NVIDIA_API_KEY_ENV_NAMES = [NVIDIA_NIM_API_KEY_ENV, NVIDIA_API_KEY_ENV] as const;
|
|
52
57
|
const PROVIDER_NAME = "nvidia-nim";
|
|
53
58
|
|
|
54
59
|
// =============================================================================
|
|
@@ -70,10 +75,22 @@ interface ThinkingConfig {
|
|
|
70
75
|
disableKwargs?: Record<string, unknown>;
|
|
71
76
|
/** If true, also send reasoning_effort alongside chat_template_kwargs */
|
|
72
77
|
sendReasoningEffort?: boolean;
|
|
78
|
+
/** If true, include a model-specific reasoning_effort inside chat_template_kwargs */
|
|
79
|
+
includeReasoningEffortInKwargs?: boolean;
|
|
73
80
|
}
|
|
74
81
|
|
|
75
82
|
const THINKING_CONFIGS: Record<string, ThinkingConfig> = {
|
|
76
83
|
// DeepSeek models need chat_template_kwargs - reasoning_effort alone doesn't trigger thinking
|
|
84
|
+
"deepseek-ai/deepseek-v4-flash": {
|
|
85
|
+
enableKwargs: { thinking: true },
|
|
86
|
+
disableKwargs: { thinking: false },
|
|
87
|
+
includeReasoningEffortInKwargs: true,
|
|
88
|
+
},
|
|
89
|
+
"deepseek-ai/deepseek-v4-pro": {
|
|
90
|
+
enableKwargs: { thinking: true },
|
|
91
|
+
disableKwargs: { thinking: false },
|
|
92
|
+
includeReasoningEffortInKwargs: true,
|
|
93
|
+
},
|
|
77
94
|
"deepseek-ai/deepseek-v3.2": {
|
|
78
95
|
enableKwargs: { thinking: true },
|
|
79
96
|
disableKwargs: { thinking: false },
|
|
@@ -112,7 +129,7 @@ const THINKING_CONFIGS: Record<string, ThinkingConfig> = {
|
|
|
112
129
|
disableKwargs: { enable_thinking: false },
|
|
113
130
|
},
|
|
114
131
|
// Kimi models: chat_template_kwargs works, reasoning_effort also works
|
|
115
|
-
"moonshotai/kimi-k2.
|
|
132
|
+
"moonshotai/kimi-k2.6": {
|
|
116
133
|
enableKwargs: { thinking: true },
|
|
117
134
|
disableKwargs: { thinking: false },
|
|
118
135
|
sendReasoningEffort: true,
|
|
@@ -236,6 +253,8 @@ const CONTEXT_WINDOWS: Record<string, number> = {
|
|
|
236
253
|
"deepseek-ai/deepseek-v3.1": 131072,
|
|
237
254
|
"deepseek-ai/deepseek-v3.1-terminus": 131072,
|
|
238
255
|
"deepseek-ai/deepseek-v3.2": 131072,
|
|
256
|
+
"deepseek-ai/deepseek-v4-flash": 1048576,
|
|
257
|
+
"deepseek-ai/deepseek-v4-pro": 1048576,
|
|
239
258
|
"deepseek-ai/deepseek-r1-distill-llama-8b": 131072,
|
|
240
259
|
"deepseek-ai/deepseek-r1-distill-qwen-14b": 131072,
|
|
241
260
|
"deepseek-ai/deepseek-r1-distill-qwen-32b": 131072,
|
|
@@ -245,7 +264,7 @@ const CONTEXT_WINDOWS: Record<string, number> = {
|
|
|
245
264
|
"moonshotai/kimi-k2-instruct": 131072,
|
|
246
265
|
"moonshotai/kimi-k2-instruct-0905": 131072,
|
|
247
266
|
"moonshotai/kimi-k2-thinking": 131072,
|
|
248
|
-
"moonshotai/kimi-k2.
|
|
267
|
+
"moonshotai/kimi-k2.6": 262144,
|
|
249
268
|
// MiniMax
|
|
250
269
|
"minimaxai/minimax-m2": 1048576,
|
|
251
270
|
"minimaxai/minimax-m2.1": 1048576,
|
|
@@ -369,7 +388,9 @@ const MAX_TOKENS: Record<string, number> = {
|
|
|
369
388
|
"deepseek-ai/deepseek-v3.1": 16384,
|
|
370
389
|
"deepseek-ai/deepseek-v3.1-terminus": 16384,
|
|
371
390
|
"deepseek-ai/deepseek-v3.2": 16384,
|
|
372
|
-
"
|
|
391
|
+
"deepseek-ai/deepseek-v4-flash": 16384,
|
|
392
|
+
"deepseek-ai/deepseek-v4-pro": 16384,
|
|
393
|
+
"moonshotai/kimi-k2.6": 16384,
|
|
373
394
|
"moonshotai/kimi-k2-instruct": 8192,
|
|
374
395
|
"moonshotai/kimi-k2-thinking": 16384,
|
|
375
396
|
"minimaxai/minimax-m2": 8192,
|
|
@@ -393,10 +414,12 @@ const MAX_TOKENS: Record<string, number> = {
|
|
|
393
414
|
|
|
394
415
|
const FEATURED_MODELS = [
|
|
395
416
|
// Flagship / frontier
|
|
417
|
+
"deepseek-ai/deepseek-v4-flash",
|
|
418
|
+
"deepseek-ai/deepseek-v4-pro",
|
|
396
419
|
"deepseek-ai/deepseek-v3.2",
|
|
397
420
|
"deepseek-ai/deepseek-v3.1",
|
|
398
421
|
"deepseek-ai/deepseek-v3.1-terminus",
|
|
399
|
-
"moonshotai/kimi-k2.
|
|
422
|
+
"moonshotai/kimi-k2.6",
|
|
400
423
|
"moonshotai/kimi-k2-thinking",
|
|
401
424
|
"moonshotai/kimi-k2-instruct",
|
|
402
425
|
"moonshotai/kimi-k2-instruct-0905",
|
|
@@ -450,11 +473,148 @@ const FEATURED_MODELS = [
|
|
|
450
473
|
* Custom streamSimple that wraps the standard OpenAI completions streamer.
|
|
451
474
|
*
|
|
452
475
|
* Fixes for NVIDIA NIM:
|
|
453
|
-
* 1. Maps pi's
|
|
476
|
+
* 1. Maps pi's thinking levels to values accepted by NVIDIA NIM
|
|
454
477
|
* 2. Strips reasoning_effort for models where it doesn't trigger thinking
|
|
455
478
|
* 3. Injects chat_template_kwargs per model to actually enable thinking
|
|
456
479
|
* 4. Uses onPayload callback to mutate request params before they're sent
|
|
457
480
|
*/
|
|
481
|
+
type NimApiKeyEnvName = (typeof NVIDIA_API_KEY_ENV_NAMES)[number];
|
|
482
|
+
type AuthStorageLike = {
|
|
483
|
+
get?: (provider: string) => unknown;
|
|
484
|
+
};
|
|
485
|
+
|
|
486
|
+
interface NimApiKeyCredential {
|
|
487
|
+
type: "api_key";
|
|
488
|
+
key: string;
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
function getNimApiKeyEnv(): NimApiKeyEnvName | undefined {
|
|
492
|
+
return NVIDIA_API_KEY_ENV_NAMES.find((envName) => !!process.env[envName]);
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
function getNimApiKey(): string | undefined {
|
|
496
|
+
const envName = getNimApiKeyEnv();
|
|
497
|
+
const apiKey = envName ? process.env[envName] : undefined;
|
|
498
|
+
return apiKey?.trim() || undefined;
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
function isNimApiKeyEnvName(value: string): value is NimApiKeyEnvName {
|
|
502
|
+
return NVIDIA_API_KEY_ENV_NAMES.includes(value as NimApiKeyEnvName);
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
function isNimApiKeyEnvValue(value: string): boolean {
|
|
506
|
+
return NVIDIA_API_KEY_ENV_NAMES.some((envName) => process.env[envName]?.trim() === value);
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
function isNimApiKeyCredential(credential: unknown): credential is NimApiKeyCredential {
|
|
510
|
+
return (
|
|
511
|
+
typeof credential === "object" &&
|
|
512
|
+
credential !== null &&
|
|
513
|
+
(credential as { type?: unknown }).type === "api_key" &&
|
|
514
|
+
typeof (credential as { key?: unknown }).key === "string"
|
|
515
|
+
);
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
function readStoredNimApiKeyConfig(): string | undefined {
|
|
519
|
+
try {
|
|
520
|
+
const authPath = join(getAgentDir(), "auth.json");
|
|
521
|
+
if (!existsSync(authPath)) return undefined;
|
|
522
|
+
|
|
523
|
+
const data = JSON.parse(readFileSync(authPath, "utf-8")) as Record<string, unknown>;
|
|
524
|
+
const credential = data[PROVIDER_NAME];
|
|
525
|
+
return isNimApiKeyCredential(credential) ? credential.key : undefined;
|
|
526
|
+
} catch {
|
|
527
|
+
return undefined;
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
function getStoredNimApiKeyConfig(authStorage?: AuthStorageLike): string | undefined {
|
|
532
|
+
if (authStorage) {
|
|
533
|
+
try {
|
|
534
|
+
const credential = authStorage.get?.(PROVIDER_NAME);
|
|
535
|
+
return isNimApiKeyCredential(credential) ? credential.key : undefined;
|
|
536
|
+
} catch {
|
|
537
|
+
return undefined;
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
return readStoredNimApiKeyConfig();
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
function hasStoredNimCommandCredential(authStorage?: AuthStorageLike): boolean {
|
|
545
|
+
return getStoredNimApiKeyConfig(authStorage)?.startsWith("!") ?? false;
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
function getStoredResolvedNimApiKey(authStorage?: AuthStorageLike): string | undefined {
|
|
549
|
+
const configuredApiKey = getStoredNimApiKeyConfig(authStorage)?.trim();
|
|
550
|
+
if (!configuredApiKey || configuredApiKey.startsWith("!")) return undefined;
|
|
551
|
+
|
|
552
|
+
const envValue = process.env[configuredApiKey]?.trim();
|
|
553
|
+
return envValue || configuredApiKey;
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
function normalizeResolvedNimApiKey(apiKey: string | undefined): string | undefined {
|
|
557
|
+
if (apiKey === undefined) return undefined;
|
|
558
|
+
|
|
559
|
+
const trimmed = apiKey.trim();
|
|
560
|
+
if (!trimmed) {
|
|
561
|
+
throw new Error("NVIDIA NIM API key resolved to an empty value.");
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
return trimmed;
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
function resolveNimApiKey(apiKey: string | undefined, authStorage?: AuthStorageLike): string | undefined {
|
|
568
|
+
const resolvedApiKey = normalizeResolvedNimApiKey(apiKey);
|
|
569
|
+
const hasStoredCommandCredential = hasStoredNimCommandCredential(authStorage);
|
|
570
|
+
|
|
571
|
+
if (
|
|
572
|
+
hasStoredCommandCredential &&
|
|
573
|
+
(!resolvedApiKey || isNimApiKeyEnvName(resolvedApiKey) || isNimApiKeyEnvValue(resolvedApiKey))
|
|
574
|
+
) {
|
|
575
|
+
throw new Error("NVIDIA NIM API key command resolved to an empty value.");
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
const storedApiKey = getStoredResolvedNimApiKey(authStorage);
|
|
579
|
+
if (storedApiKey && (!resolvedApiKey || isNimApiKeyEnvName(resolvedApiKey))) return storedApiKey;
|
|
580
|
+
|
|
581
|
+
if (resolvedApiKey && !isNimApiKeyEnvName(resolvedApiKey)) return resolvedApiKey;
|
|
582
|
+
|
|
583
|
+
return getNimApiKey();
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
function resolveRequiredNimApiKey(apiKey: string | undefined): string {
|
|
587
|
+
const resolvedApiKey = resolveNimApiKey(apiKey);
|
|
588
|
+
if (resolvedApiKey) return resolvedApiKey;
|
|
589
|
+
|
|
590
|
+
throw new Error(
|
|
591
|
+
`NVIDIA NIM: no API key configured. Set ${NVIDIA_NIM_API_KEY_ENV} or ${NVIDIA_API_KEY_ENV}. ` +
|
|
592
|
+
`Get a free API key at https://build.nvidia.com and export it: ` +
|
|
593
|
+
`export ${NVIDIA_NIM_API_KEY_ENV}=nvapi-...`,
|
|
594
|
+
);
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
function mapNimTopLevelReasoning(reasoning: SimpleStreamOptions["reasoning"]): SimpleStreamOptions["reasoning"] {
|
|
598
|
+
if (reasoning === "minimal") return "low";
|
|
599
|
+
if (reasoning === "xhigh") return "high";
|
|
600
|
+
return reasoning;
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
function mapDeepSeekV4Reasoning(reasoning: SimpleStreamOptions["reasoning"]): "high" | "max" {
|
|
604
|
+
return reasoning === "xhigh" ? "max" : "high";
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
function buildThinkingKwargs(
|
|
608
|
+
thinkingConfig: ThinkingConfig,
|
|
609
|
+
reasoning: SimpleStreamOptions["reasoning"],
|
|
610
|
+
): Record<string, unknown> {
|
|
611
|
+
const kwargs = { ...thinkingConfig.enableKwargs };
|
|
612
|
+
if (thinkingConfig.includeReasoningEffortInKwargs) {
|
|
613
|
+
kwargs.reasoning_effort = mapDeepSeekV4Reasoning(reasoning);
|
|
614
|
+
}
|
|
615
|
+
return kwargs;
|
|
616
|
+
}
|
|
617
|
+
|
|
458
618
|
function nimStreamSimple(
|
|
459
619
|
model: Model<Api>,
|
|
460
620
|
context: Context,
|
|
@@ -464,12 +624,9 @@ function nimStreamSimple(
|
|
|
464
624
|
const reasoning = options?.reasoning;
|
|
465
625
|
const isThinkingEnabled = !!reasoning;
|
|
466
626
|
|
|
467
|
-
// Map
|
|
468
|
-
//
|
|
469
|
-
|
|
470
|
-
if (reasoning === "minimal") {
|
|
471
|
-
mappedReasoning = "low";
|
|
472
|
-
}
|
|
627
|
+
// Map provider-agnostic pi levels to NIM's accepted top-level values.
|
|
628
|
+
// Model-specific chat_template_kwargs may apply a different mapping below.
|
|
629
|
+
const mappedReasoning = mapNimTopLevelReasoning(reasoning);
|
|
473
630
|
|
|
474
631
|
// For models that have a thinking config: we handle thinking via chat_template_kwargs.
|
|
475
632
|
// Suppress reasoning_effort (set reasoning to undefined) unless the model explicitly
|
|
@@ -481,16 +638,9 @@ function nimStreamSimple(
|
|
|
481
638
|
effectiveReasoning = undefined;
|
|
482
639
|
}
|
|
483
640
|
|
|
484
|
-
//
|
|
485
|
-
//
|
|
486
|
-
const nimApiKey =
|
|
487
|
-
if (!nimApiKey) {
|
|
488
|
-
throw new Error(
|
|
489
|
-
`NVIDIA NIM: NVIDIA_NIM_API_KEY environment variable is not set. ` +
|
|
490
|
-
`Get a free API key at https://build.nvidia.com and export it: ` +
|
|
491
|
-
`export NVIDIA_NIM_API_KEY=nvapi-...`
|
|
492
|
-
);
|
|
493
|
-
}
|
|
641
|
+
// Use pi's already-resolved provider key when available (auth.json, shell command,
|
|
642
|
+
// CLI override), and fall back to the two NVIDIA environment variable names.
|
|
643
|
+
const nimApiKey = resolveRequiredNimApiKey(options?.apiKey);
|
|
494
644
|
|
|
495
645
|
const modifiedOptions: SimpleStreamOptions = {
|
|
496
646
|
...options,
|
|
@@ -502,7 +652,7 @@ function nimStreamSimple(
|
|
|
502
652
|
if (thinkingConfig) {
|
|
503
653
|
if (isThinkingEnabled) {
|
|
504
654
|
// Inject chat_template_kwargs to enable thinking
|
|
505
|
-
p.chat_template_kwargs = thinkingConfig
|
|
655
|
+
p.chat_template_kwargs = buildThinkingKwargs(thinkingConfig, reasoning);
|
|
506
656
|
} else if (thinkingConfig.disableKwargs) {
|
|
507
657
|
// Explicitly disable thinking (some models think by default, e.g. GLM-5/4.7)
|
|
508
658
|
p.chat_template_kwargs = thinkingConfig.disableKwargs;
|
|
@@ -532,7 +682,7 @@ function nimStreamSimple(
|
|
|
532
682
|
}
|
|
533
683
|
|
|
534
684
|
// Chain to original onPayload if present
|
|
535
|
-
options?.onPayload?.(params);
|
|
685
|
+
return options?.onPayload?.(params, model);
|
|
536
686
|
},
|
|
537
687
|
};
|
|
538
688
|
|
|
@@ -612,7 +762,34 @@ interface NimApiModel {
|
|
|
612
762
|
owned_by: string;
|
|
613
763
|
}
|
|
614
764
|
|
|
615
|
-
|
|
765
|
+
type NimModelFetchResult =
|
|
766
|
+
| { ok: true; modelIds: string[] }
|
|
767
|
+
| { ok: false; reason: "auth" | "transient" | "invalid" | "network" | "other" };
|
|
768
|
+
|
|
769
|
+
const NIM_DISCOVERY_CREDENTIAL_WARNING =
|
|
770
|
+
"NVIDIA NIM model discovery skipped: check your nvidia-nim credentials.";
|
|
771
|
+
|
|
772
|
+
function sanitizeNimLogMessage(message: string): string {
|
|
773
|
+
return message.replace(/nvapi-[A-Za-z0-9._-]+/g, "nvapi-[REDACTED]");
|
|
774
|
+
}
|
|
775
|
+
|
|
776
|
+
function notifyNimDiscoveryCredentialWarning(ctx: any): void {
|
|
777
|
+
ctx?.ui?.notify?.(NIM_DISCOVERY_CREDENTIAL_WARNING, "warning");
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
async function resolveNimDiscoveryApiKey(ctx: any): Promise<string | undefined> {
|
|
781
|
+
try {
|
|
782
|
+
const apiKey = await ctx?.modelRegistry?.getApiKeyForProvider?.(PROVIDER_NAME);
|
|
783
|
+
return resolveNimApiKey(apiKey, ctx?.modelRegistry?.authStorage);
|
|
784
|
+
} catch (error) {
|
|
785
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
786
|
+
console.warn(`pi-nvidia-nim: ${sanitizeNimLogMessage(message)}`);
|
|
787
|
+
notifyNimDiscoveryCredentialWarning(ctx);
|
|
788
|
+
return undefined;
|
|
789
|
+
}
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
async function fetchNimModels(apiKey: string): Promise<NimModelFetchResult> {
|
|
616
793
|
try {
|
|
617
794
|
const response = await fetch(`${NVIDIA_NIM_BASE_URL}/models`, {
|
|
618
795
|
headers: {
|
|
@@ -622,12 +799,25 @@ async function fetchNimModels(apiKey: string): Promise<string[]> {
|
|
|
622
799
|
signal: AbortSignal.timeout(10000),
|
|
623
800
|
});
|
|
624
801
|
|
|
625
|
-
if (
|
|
802
|
+
if (response.status === 401 || response.status === 403) {
|
|
803
|
+
return { ok: false, reason: "auth" };
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
if (response.status === 429 || response.status >= 500) {
|
|
807
|
+
return { ok: false, reason: "transient" };
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
if (!response.ok) return { ok: false, reason: "other" };
|
|
811
|
+
|
|
812
|
+
const data = (await response.json()) as { data?: NimApiModel[] };
|
|
813
|
+
if (!Array.isArray(data.data)) return { ok: false, reason: "invalid" };
|
|
626
814
|
|
|
627
|
-
|
|
628
|
-
|
|
815
|
+
return {
|
|
816
|
+
ok: true,
|
|
817
|
+
modelIds: data.data.map((m) => m.id).filter((id): id is string => typeof id === "string" && id.length > 0),
|
|
818
|
+
};
|
|
629
819
|
} catch {
|
|
630
|
-
return
|
|
820
|
+
return { ok: false, reason: "network" };
|
|
631
821
|
}
|
|
632
822
|
}
|
|
633
823
|
|
|
@@ -636,27 +826,12 @@ async function fetchNimModels(apiKey: string): Promise<string[]> {
|
|
|
636
826
|
// =============================================================================
|
|
637
827
|
|
|
638
828
|
export default function (pi: ExtensionAPI) {
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
//
|
|
642
|
-
//
|
|
643
|
-
//
|
|
644
|
-
//
|
|
645
|
-
// never intended to use NVIDIA NIM. Skipping registration leaves the
|
|
646
|
-
// extension dormant: no NIM models appear in the picker, no provider is
|
|
647
|
-
// available to route to, and other providers (OpenAI, Anthropic, custom
|
|
648
|
-
// OpenAI-compatible endpoints in models.json, etc.) work normally.
|
|
649
|
-
//
|
|
650
|
-
// Re-enable by exporting the key:
|
|
651
|
-
// export NVIDIA_NIM_API_KEY=nvapi-...
|
|
652
|
-
if (!process.env[NVIDIA_NIM_API_KEY_ENV]) {
|
|
653
|
-
// One-line, stderr, only on first load. Not fatal.
|
|
654
|
-
console.error(
|
|
655
|
-
`pi-nvidia-nim: ${NVIDIA_NIM_API_KEY_ENV} not set — extension dormant. ` +
|
|
656
|
-
`Set the key (https://build.nvidia.com) and reload pi to enable NVIDIA NIM models.`
|
|
657
|
-
);
|
|
658
|
-
return;
|
|
659
|
-
}
|
|
829
|
+
const providerApiKeyConfig = getNimApiKeyEnv() ?? NVIDIA_NIM_API_KEY_ENV;
|
|
830
|
+
|
|
831
|
+
// Always register the curated model list. The request path resolves credentials
|
|
832
|
+
// through pi first (CLI override, auth.json, shell command), then falls back to
|
|
833
|
+
// NVIDIA_NIM_API_KEY/NVIDIA_API_KEY. This keeps models available even when pi
|
|
834
|
+
// was launched by a shell that did not source ~/.bashrc or ~/.zshrc.
|
|
660
835
|
|
|
661
836
|
// Build the curated model list
|
|
662
837
|
const modelMap = new Map<string, NimModelEntry>();
|
|
@@ -672,7 +847,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
672
847
|
|
|
673
848
|
pi.registerProvider(PROVIDER_NAME, {
|
|
674
849
|
baseUrl: NVIDIA_NIM_BASE_URL,
|
|
675
|
-
apiKey:
|
|
850
|
+
apiKey: providerApiKeyConfig,
|
|
676
851
|
api: "openai-completions",
|
|
677
852
|
authHeader: true,
|
|
678
853
|
models: curatedModels,
|
|
@@ -681,11 +856,19 @@ export default function (pi: ExtensionAPI) {
|
|
|
681
856
|
|
|
682
857
|
// On session start, discover additional models from the API
|
|
683
858
|
pi.on("session_start", async (_event: any, ctx: any) => {
|
|
684
|
-
const apiKey =
|
|
685
|
-
if (!apiKey) return;
|
|
859
|
+
const apiKey = await resolveNimDiscoveryApiKey(ctx);
|
|
860
|
+
if (!apiKey) return;
|
|
686
861
|
|
|
687
862
|
// Fetch live model list
|
|
688
|
-
const
|
|
863
|
+
const fetchResult = await fetchNimModels(apiKey);
|
|
864
|
+
if (!fetchResult.ok) {
|
|
865
|
+
if (fetchResult.reason === "auth") {
|
|
866
|
+
notifyNimDiscoveryCredentialWarning(ctx);
|
|
867
|
+
}
|
|
868
|
+
return;
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
const liveModelIds = fetchResult.modelIds;
|
|
689
872
|
if (liveModelIds.length === 0) return;
|
|
690
873
|
|
|
691
874
|
let newModelsAdded = 0;
|
|
@@ -706,7 +889,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
706
889
|
const allModels = Array.from(modelMap.values());
|
|
707
890
|
ctx.modelRegistry.registerProvider(PROVIDER_NAME, {
|
|
708
891
|
baseUrl: NVIDIA_NIM_BASE_URL,
|
|
709
|
-
apiKey: NVIDIA_NIM_API_KEY_ENV,
|
|
892
|
+
apiKey: getNimApiKeyEnv() ?? NVIDIA_NIM_API_KEY_ENV,
|
|
710
893
|
api: "openai-completions",
|
|
711
894
|
authHeader: true,
|
|
712
895
|
models: allModels,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-nvidia-nim",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.19",
|
|
4
4
|
"description": "NVIDIA NIM API provider extension for pi coding agent — access 100+ models from build.nvidia.com",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"files": [
|
|
@@ -19,7 +19,8 @@
|
|
|
19
19
|
"scripts": {
|
|
20
20
|
"clean": "echo 'nothing to clean'",
|
|
21
21
|
"build": "echo 'nothing to build'",
|
|
22
|
-
"check": "
|
|
22
|
+
"check": "tsc --noEmit",
|
|
23
|
+
"test": "node --test test/*.test.mjs"
|
|
23
24
|
},
|
|
24
25
|
"devDependencies": {
|
|
25
26
|
"@mariozechner/pi-coding-agent": "^0.52.9",
|