pi-nvidia-nim 1.1.18 → 1.1.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -14
- package/index.ts +249 -58
- package/package.json +3 -2
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# pi-nvidia-nim
|
|
2
2
|
|
|
3
|
-
NVIDIA NIM API provider extension for [pi coding agent](https://github.com/badlogic/pi-mono) - access 100+ models from [build.nvidia.com](https://build.nvidia.com) including DeepSeek V3.2, Kimi K2.
|
|
3
|
+
NVIDIA NIM API provider extension for [pi coding agent](https://github.com/badlogic/pi-mono) - access 100+ models from [build.nvidia.com](https://build.nvidia.com) including DeepSeek V4 Flash/Pro, DeepSeek V3.2, Kimi K2.6, MiniMax M2.1, GLM-5, GLM-4.7, Qwen3, Llama 4, and many more.
|
|
4
4
|
|
|
5
5
|
https://github.com/user-attachments/assets/f44773e4-9bf8-4bb5-a9c0-d5938030701c
|
|
6
6
|
|
|
@@ -17,10 +17,14 @@ https://github.com/user-attachments/assets/f44773e4-9bf8-4bb5-a9c0-d5938030701c
|
|
|
17
17
|
### 2. Set Your API Key
|
|
18
18
|
|
|
19
19
|
```bash
|
|
20
|
+
# Preferred by this extension
|
|
20
21
|
export NVIDIA_NIM_API_KEY=nvapi-your-key-here
|
|
22
|
+
|
|
23
|
+
# Also supported, matching NVIDIA's website examples
|
|
24
|
+
export NVIDIA_API_KEY=nvapi-your-key-here
|
|
21
25
|
```
|
|
22
26
|
|
|
23
|
-
Add
|
|
27
|
+
Add one of these to your `~/.bashrc`, `~/.zshrc`, or shell profile to persist it.
|
|
24
28
|
|
|
25
29
|
### 3. Install the Extension
|
|
26
30
|
|
|
@@ -53,10 +57,10 @@ Once loaded, NVIDIA NIM models appear in the `/model` selector under the `nvidia
|
|
|
53
57
|
|
|
54
58
|
```bash
|
|
55
59
|
# Use a specific NIM model directly
|
|
56
|
-
pi --provider nvidia-nim --model "deepseek-ai/deepseek-
|
|
60
|
+
pi --provider nvidia-nim --model "deepseek-ai/deepseek-v4-flash"
|
|
57
61
|
|
|
58
62
|
# With thinking enabled
|
|
59
|
-
pi --provider nvidia-nim --model "deepseek-ai/deepseek-
|
|
63
|
+
pi --provider nvidia-nim --model "deepseek-ai/deepseek-v4-flash" --thinking high
|
|
60
64
|
|
|
61
65
|
# Limit model cycling to NIM models
|
|
62
66
|
pi --models "nvidia-nim/*"
|
|
@@ -70,11 +74,12 @@ NVIDIA NIM models use a non-standard `chat_template_kwargs` parameter to enable
|
|
|
70
74
|
|
|
71
75
|
When you change the thinking level in pi (`Shift+Tab` to cycle), the extension:
|
|
72
76
|
|
|
73
|
-
1. **Maps
|
|
77
|
+
1. **Maps thinking levels** to values each NIM model accepts. For DeepSeek V4, `xhigh` maps to `max`; lower enabled levels use `high`.
|
|
74
78
|
2. **Injects `chat_template_kwargs`** per model to actually enable thinking:
|
|
79
|
+
- DeepSeek V4: `{ thinking: true, reasoning_effort: "high" | "max" }`
|
|
75
80
|
- DeepSeek V3.x, R1 distills: `{ thinking: true }`
|
|
76
81
|
- GLM-5, GLM-4.7: `{ enable_thinking: true, clear_thinking: false }`
|
|
77
|
-
- Kimi K2.
|
|
82
|
+
- Kimi K2.6, K2-thinking: `{ thinking: true }`
|
|
78
83
|
- Qwen3, QwQ: `{ enable_thinking: true }`
|
|
79
84
|
3. **Explicitly disables thinking** when the level is "off" for models that think by default (e.g., GLM-5, GLM-4.7).
|
|
80
85
|
4. **Uses `system` role** instead of `developer` for all NIM models - the `developer` role combined with `chat_template_kwargs` causes 500 errors on NIM.
|
|
@@ -84,22 +89,25 @@ When you change the thinking level in pi (`Shift+Tab` to cycle), the extension:
|
|
|
84
89
|
| pi Level | NIM Mapping | Effect |
|
|
85
90
|
|----------|-------------|--------|
|
|
86
91
|
| off | No kwargs (or explicit disable) | No reasoning output |
|
|
87
|
-
| minimal |
|
|
88
|
-
| low | low | Thinking enabled |
|
|
89
|
-
| medium | medium | Thinking enabled |
|
|
92
|
+
| minimal | low, or high for DeepSeek V4 | Thinking enabled |
|
|
93
|
+
| low | low, or high for DeepSeek V4 | Thinking enabled |
|
|
94
|
+
| medium | medium, or high for DeepSeek V4 | Thinking enabled |
|
|
90
95
|
| high | high | Thinking enabled |
|
|
96
|
+
| xhigh | high, or max for DeepSeek V4 | Maximum supported thinking |
|
|
91
97
|
|
|
92
98
|
## Available Models
|
|
93
99
|
|
|
94
|
-
The extension ships with curated metadata for
|
|
100
|
+
The extension ships with curated metadata for 42 featured models. At startup, it also queries the NVIDIA NIM API to discover additional models automatically.
|
|
95
101
|
|
|
96
102
|
### Featured Models
|
|
97
103
|
|
|
98
104
|
| Model | Reasoning | Vision | Context |
|
|
99
105
|
|-------|-----------|--------|---------|
|
|
106
|
+
| `deepseek-ai/deepseek-v4-flash` | ✅ | | 1M |
|
|
107
|
+
| `deepseek-ai/deepseek-v4-pro` | ✅ | | 1M |
|
|
100
108
|
| `deepseek-ai/deepseek-v3.2` | ✅ | | 128K |
|
|
101
109
|
| `deepseek-ai/deepseek-v3.1` | ✅ | | 128K |
|
|
102
|
-
| `moonshotai/kimi-k2.
|
|
110
|
+
| `moonshotai/kimi-k2.6` | ✅ | | 256K |
|
|
103
111
|
| `moonshotai/kimi-k2-thinking` | ✅ | | 128K |
|
|
104
112
|
| `minimaxai/minimax-m2.1` | | | 1M |
|
|
105
113
|
| `z-ai/glm5` | ✅ | | 128K |
|
|
@@ -121,7 +129,7 @@ The extension ships with curated metadata for 39 featured models. At startup, it
|
|
|
121
129
|
|
|
122
130
|
### Tool Calling
|
|
123
131
|
|
|
124
|
-
All major models support OpenAI-compatible tool calling. Tested and confirmed working with DeepSeek V3.2, GLM-5, GLM-4.7, Qwen3, Kimi K2.
|
|
132
|
+
All major models support OpenAI-compatible tool calling. Tested and confirmed working with DeepSeek V4/V3.2, GLM-5, GLM-4.7, Qwen3, Kimi K2.6, and others.
|
|
125
133
|
|
|
126
134
|
## How It Works
|
|
127
135
|
|
|
@@ -130,13 +138,13 @@ This extension uses `pi.registerProvider()` to register NVIDIA NIM as a custom p
|
|
|
130
138
|
The custom streamer:
|
|
131
139
|
1. Intercepts the request payload via `onPayload` callback
|
|
132
140
|
2. Injects `chat_template_kwargs` for models that need it to enable thinking
|
|
133
|
-
3. Maps unsupported thinking levels (`minimal` → `low`)
|
|
141
|
+
3. Maps unsupported thinking levels to NIM-compatible values (`minimal` → `low`; `xhigh` → `high` or DeepSeek V4 `max`)
|
|
134
142
|
4. Suppresses `reasoning_effort` for models that don't respond to it (e.g., DeepSeek without kwargs)
|
|
135
143
|
5. Uses the standard OpenAI SSE streaming format - pi already parses `reasoning_content` and `reasoning` fields from streaming deltas
|
|
136
144
|
|
|
137
145
|
## Configuration
|
|
138
146
|
|
|
139
|
-
The only configuration needed is the `NVIDIA_NIM_API_KEY` environment variable. All models on NVIDIA NIM are free during the preview period (with rate limits).
|
|
147
|
+
The only configuration needed is either the `NVIDIA_NIM_API_KEY` or `NVIDIA_API_KEY` environment variable. All models on NVIDIA NIM are free during the preview period (with rate limits).
|
|
140
148
|
|
|
141
149
|
## Notes
|
|
142
150
|
|
package/index.ts
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
*
|
|
7
7
|
* Setup:
|
|
8
8
|
* 1. Get an API key from https://build.nvidia.com
|
|
9
|
-
* 2. Export it: export NVIDIA_NIM_API_KEY=nvapi-...
|
|
9
|
+
* 2. Export it: export NVIDIA_NIM_API_KEY=nvapi-... (or NVIDIA_API_KEY=nvapi-...)
|
|
10
10
|
* 3. Load the extension:
|
|
11
11
|
* pi -e ./path/to/pi-nvidia-nim
|
|
12
12
|
* # or install as a package:
|
|
@@ -22,17 +22,20 @@
|
|
|
22
22
|
* parameters:
|
|
23
23
|
*
|
|
24
24
|
* - DeepSeek V3.x: `chat_template_kwargs: { thinking: true }`
|
|
25
|
+
* - DeepSeek V4: `chat_template_kwargs: { thinking: true, reasoning_effort: "high" | "max" }`
|
|
25
26
|
* - GLM-5/4.7: `chat_template_kwargs: { enable_thinking: true, clear_thinking: false }`
|
|
26
27
|
* - Kimi K2.5: `chat_template_kwargs: { thinking: true }` (also accepts reasoning_effort)
|
|
27
28
|
* - Qwen3: `chat_template_kwargs: { enable_thinking: true }`
|
|
28
29
|
*
|
|
29
|
-
* NIM only accepts `reasoning_effort` values
|
|
30
|
-
*
|
|
30
|
+
* NIM only accepts selected `reasoning_effort` values. The extension maps pi's
|
|
31
|
+
* provider-agnostic levels to the values each NIM model accepts.
|
|
31
32
|
*
|
|
32
33
|
* Some models (e.g., GLM-5, GLM-4.7) always produce reasoning output regardless of
|
|
33
34
|
* thinking settings.
|
|
34
35
|
*/
|
|
35
36
|
|
|
37
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
38
|
+
import { join } from "node:path";
|
|
36
39
|
import type {
|
|
37
40
|
Api,
|
|
38
41
|
AssistantMessageEventStream,
|
|
@@ -41,7 +44,7 @@ import type {
|
|
|
41
44
|
SimpleStreamOptions,
|
|
42
45
|
} from "@mariozechner/pi-ai";
|
|
43
46
|
import { streamSimpleOpenAICompletions } from "@mariozechner/pi-ai";
|
|
44
|
-
import type
|
|
47
|
+
import { getAgentDir, type ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
45
48
|
|
|
46
49
|
// =============================================================================
|
|
47
50
|
// Constants
|
|
@@ -49,6 +52,8 @@ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
|
49
52
|
|
|
50
53
|
const NVIDIA_NIM_BASE_URL = "https://integrate.api.nvidia.com/v1";
|
|
51
54
|
const NVIDIA_NIM_API_KEY_ENV = "NVIDIA_NIM_API_KEY";
|
|
55
|
+
const NVIDIA_API_KEY_ENV = "NVIDIA_API_KEY";
|
|
56
|
+
const NVIDIA_API_KEY_ENV_NAMES = [NVIDIA_NIM_API_KEY_ENV, NVIDIA_API_KEY_ENV] as const;
|
|
52
57
|
const PROVIDER_NAME = "nvidia-nim";
|
|
53
58
|
|
|
54
59
|
// =============================================================================
|
|
@@ -70,10 +75,22 @@ interface ThinkingConfig {
|
|
|
70
75
|
disableKwargs?: Record<string, unknown>;
|
|
71
76
|
/** If true, also send reasoning_effort alongside chat_template_kwargs */
|
|
72
77
|
sendReasoningEffort?: boolean;
|
|
78
|
+
/** If true, include a model-specific reasoning_effort inside chat_template_kwargs */
|
|
79
|
+
includeReasoningEffortInKwargs?: boolean;
|
|
73
80
|
}
|
|
74
81
|
|
|
75
82
|
const THINKING_CONFIGS: Record<string, ThinkingConfig> = {
|
|
76
83
|
// DeepSeek models need chat_template_kwargs - reasoning_effort alone doesn't trigger thinking
|
|
84
|
+
"deepseek-ai/deepseek-v4-flash": {
|
|
85
|
+
enableKwargs: { thinking: true },
|
|
86
|
+
disableKwargs: { thinking: false },
|
|
87
|
+
includeReasoningEffortInKwargs: true,
|
|
88
|
+
},
|
|
89
|
+
"deepseek-ai/deepseek-v4-pro": {
|
|
90
|
+
enableKwargs: { thinking: true },
|
|
91
|
+
disableKwargs: { thinking: false },
|
|
92
|
+
includeReasoningEffortInKwargs: true,
|
|
93
|
+
},
|
|
77
94
|
"deepseek-ai/deepseek-v3.2": {
|
|
78
95
|
enableKwargs: { thinking: true },
|
|
79
96
|
disableKwargs: { thinking: false },
|
|
@@ -112,7 +129,7 @@ const THINKING_CONFIGS: Record<string, ThinkingConfig> = {
|
|
|
112
129
|
disableKwargs: { enable_thinking: false },
|
|
113
130
|
},
|
|
114
131
|
// Kimi models: chat_template_kwargs works, reasoning_effort also works
|
|
115
|
-
"moonshotai/kimi-k2.
|
|
132
|
+
"moonshotai/kimi-k2.6": {
|
|
116
133
|
enableKwargs: { thinking: true },
|
|
117
134
|
disableKwargs: { thinking: false },
|
|
118
135
|
sendReasoningEffort: true,
|
|
@@ -236,6 +253,8 @@ const CONTEXT_WINDOWS: Record<string, number> = {
|
|
|
236
253
|
"deepseek-ai/deepseek-v3.1": 131072,
|
|
237
254
|
"deepseek-ai/deepseek-v3.1-terminus": 131072,
|
|
238
255
|
"deepseek-ai/deepseek-v3.2": 131072,
|
|
256
|
+
"deepseek-ai/deepseek-v4-flash": 1048576,
|
|
257
|
+
"deepseek-ai/deepseek-v4-pro": 1048576,
|
|
239
258
|
"deepseek-ai/deepseek-r1-distill-llama-8b": 131072,
|
|
240
259
|
"deepseek-ai/deepseek-r1-distill-qwen-14b": 131072,
|
|
241
260
|
"deepseek-ai/deepseek-r1-distill-qwen-32b": 131072,
|
|
@@ -245,7 +264,7 @@ const CONTEXT_WINDOWS: Record<string, number> = {
|
|
|
245
264
|
"moonshotai/kimi-k2-instruct": 131072,
|
|
246
265
|
"moonshotai/kimi-k2-instruct-0905": 131072,
|
|
247
266
|
"moonshotai/kimi-k2-thinking": 131072,
|
|
248
|
-
"moonshotai/kimi-k2.
|
|
267
|
+
"moonshotai/kimi-k2.6": 262144,
|
|
249
268
|
// MiniMax
|
|
250
269
|
"minimaxai/minimax-m2": 1048576,
|
|
251
270
|
"minimaxai/minimax-m2.1": 1048576,
|
|
@@ -369,7 +388,9 @@ const MAX_TOKENS: Record<string, number> = {
|
|
|
369
388
|
"deepseek-ai/deepseek-v3.1": 16384,
|
|
370
389
|
"deepseek-ai/deepseek-v3.1-terminus": 16384,
|
|
371
390
|
"deepseek-ai/deepseek-v3.2": 16384,
|
|
372
|
-
"
|
|
391
|
+
"deepseek-ai/deepseek-v4-flash": 16384,
|
|
392
|
+
"deepseek-ai/deepseek-v4-pro": 16384,
|
|
393
|
+
"moonshotai/kimi-k2.6": 16384,
|
|
373
394
|
"moonshotai/kimi-k2-instruct": 8192,
|
|
374
395
|
"moonshotai/kimi-k2-thinking": 16384,
|
|
375
396
|
"minimaxai/minimax-m2": 8192,
|
|
@@ -393,10 +414,12 @@ const MAX_TOKENS: Record<string, number> = {
|
|
|
393
414
|
|
|
394
415
|
const FEATURED_MODELS = [
|
|
395
416
|
// Flagship / frontier
|
|
417
|
+
"deepseek-ai/deepseek-v4-flash",
|
|
418
|
+
"deepseek-ai/deepseek-v4-pro",
|
|
396
419
|
"deepseek-ai/deepseek-v3.2",
|
|
397
420
|
"deepseek-ai/deepseek-v3.1",
|
|
398
421
|
"deepseek-ai/deepseek-v3.1-terminus",
|
|
399
|
-
"moonshotai/kimi-k2.
|
|
422
|
+
"moonshotai/kimi-k2.6",
|
|
400
423
|
"moonshotai/kimi-k2-thinking",
|
|
401
424
|
"moonshotai/kimi-k2-instruct",
|
|
402
425
|
"moonshotai/kimi-k2-instruct-0905",
|
|
@@ -450,26 +473,168 @@ const FEATURED_MODELS = [
|
|
|
450
473
|
* Custom streamSimple that wraps the standard OpenAI completions streamer.
|
|
451
474
|
*
|
|
452
475
|
* Fixes for NVIDIA NIM:
|
|
453
|
-
* 1. Maps pi's
|
|
476
|
+
* 1. Maps pi's thinking levels to values accepted by NVIDIA NIM
|
|
454
477
|
* 2. Strips reasoning_effort for models where it doesn't trigger thinking
|
|
455
478
|
* 3. Injects chat_template_kwargs per model to actually enable thinking
|
|
456
479
|
* 4. Uses onPayload callback to mutate request params before they're sent
|
|
457
480
|
*/
|
|
481
|
+
type NimApiKeyEnvName = (typeof NVIDIA_API_KEY_ENV_NAMES)[number];
|
|
482
|
+
type AuthStorageLike = {
|
|
483
|
+
get?: (provider: string) => unknown;
|
|
484
|
+
};
|
|
485
|
+
|
|
486
|
+
interface NimApiKeyCredential {
|
|
487
|
+
type: "api_key";
|
|
488
|
+
key: string;
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
function getNimApiKeyEnv(): NimApiKeyEnvName | undefined {
|
|
492
|
+
return NVIDIA_API_KEY_ENV_NAMES.find((envName) => !!process.env[envName]);
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
function getNimApiKey(): string | undefined {
|
|
496
|
+
const envName = getNimApiKeyEnv();
|
|
497
|
+
const apiKey = envName ? process.env[envName] : undefined;
|
|
498
|
+
return apiKey?.trim() || undefined;
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
function isNimApiKeyEnvName(value: string): value is NimApiKeyEnvName {
|
|
502
|
+
return NVIDIA_API_KEY_ENV_NAMES.includes(value as NimApiKeyEnvName);
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
function isNimApiKeyEnvValue(value: string): boolean {
|
|
506
|
+
return NVIDIA_API_KEY_ENV_NAMES.some((envName) => process.env[envName]?.trim() === value);
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
function isNimApiKeyCredential(credential: unknown): credential is NimApiKeyCredential {
|
|
510
|
+
return (
|
|
511
|
+
typeof credential === "object" &&
|
|
512
|
+
credential !== null &&
|
|
513
|
+
(credential as { type?: unknown }).type === "api_key" &&
|
|
514
|
+
typeof (credential as { key?: unknown }).key === "string"
|
|
515
|
+
);
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
function readStoredNimApiKeyConfig(): string | undefined {
|
|
519
|
+
try {
|
|
520
|
+
const authPath = join(getAgentDir(), "auth.json");
|
|
521
|
+
if (!existsSync(authPath)) return undefined;
|
|
522
|
+
|
|
523
|
+
const data = JSON.parse(readFileSync(authPath, "utf-8")) as Record<string, unknown>;
|
|
524
|
+
const credential = data[PROVIDER_NAME];
|
|
525
|
+
return isNimApiKeyCredential(credential) ? credential.key : undefined;
|
|
526
|
+
} catch {
|
|
527
|
+
return undefined;
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
function getStoredNimApiKeyConfig(authStorage?: AuthStorageLike): string | undefined {
|
|
532
|
+
if (authStorage) {
|
|
533
|
+
try {
|
|
534
|
+
const credential = authStorage.get?.(PROVIDER_NAME);
|
|
535
|
+
return isNimApiKeyCredential(credential) ? credential.key : undefined;
|
|
536
|
+
} catch {
|
|
537
|
+
return undefined;
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
return readStoredNimApiKeyConfig();
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
function hasStoredNimCommandCredential(authStorage?: AuthStorageLike): boolean {
|
|
545
|
+
return getStoredNimApiKeyConfig(authStorage)?.startsWith("!") ?? false;
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
function getStoredResolvedNimApiKey(authStorage?: AuthStorageLike): string | undefined {
|
|
549
|
+
const configuredApiKey = getStoredNimApiKeyConfig(authStorage)?.trim();
|
|
550
|
+
if (!configuredApiKey || configuredApiKey.startsWith("!")) return undefined;
|
|
551
|
+
|
|
552
|
+
const envValue = process.env[configuredApiKey]?.trim();
|
|
553
|
+
return envValue || configuredApiKey;
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
function normalizeResolvedNimApiKey(apiKey: string | undefined): string | undefined {
|
|
557
|
+
if (apiKey === undefined) return undefined;
|
|
558
|
+
|
|
559
|
+
const trimmed = apiKey.trim();
|
|
560
|
+
if (!trimmed) {
|
|
561
|
+
throw new Error("NVIDIA NIM API key resolved to an empty value.");
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
return trimmed;
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
function resolveNimApiKey(apiKey: string | undefined, authStorage?: AuthStorageLike): string | undefined {
|
|
568
|
+
const resolvedApiKey = normalizeResolvedNimApiKey(apiKey);
|
|
569
|
+
const hasStoredCommandCredential = hasStoredNimCommandCredential(authStorage);
|
|
570
|
+
|
|
571
|
+
if (
|
|
572
|
+
hasStoredCommandCredential &&
|
|
573
|
+
(!resolvedApiKey || isNimApiKeyEnvName(resolvedApiKey) || isNimApiKeyEnvValue(resolvedApiKey))
|
|
574
|
+
) {
|
|
575
|
+
throw new Error("NVIDIA NIM API key command resolved to an empty value.");
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
const storedApiKey = getStoredResolvedNimApiKey(authStorage);
|
|
579
|
+
if (storedApiKey && (!resolvedApiKey || isNimApiKeyEnvName(resolvedApiKey))) return storedApiKey;
|
|
580
|
+
|
|
581
|
+
if (resolvedApiKey && !isNimApiKeyEnvName(resolvedApiKey)) return resolvedApiKey;
|
|
582
|
+
|
|
583
|
+
return getNimApiKey();
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
function resolveRequiredNimApiKey(apiKey: string | undefined): string {
|
|
587
|
+
const resolvedApiKey = resolveNimApiKey(apiKey);
|
|
588
|
+
if (resolvedApiKey) return resolvedApiKey;
|
|
589
|
+
|
|
590
|
+
throw new Error(
|
|
591
|
+
`NVIDIA NIM: no API key configured. Set ${NVIDIA_NIM_API_KEY_ENV} or ${NVIDIA_API_KEY_ENV}. ` +
|
|
592
|
+
`Get a free API key at https://build.nvidia.com and export it: ` +
|
|
593
|
+
`export ${NVIDIA_NIM_API_KEY_ENV}=nvapi-...`,
|
|
594
|
+
);
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
function mapNimTopLevelReasoning(reasoning: SimpleStreamOptions["reasoning"]): SimpleStreamOptions["reasoning"] {
|
|
598
|
+
if (reasoning === "minimal") return "low";
|
|
599
|
+
if (reasoning === "xhigh") return "high";
|
|
600
|
+
return reasoning;
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
function mapDeepSeekV4Reasoning(reasoning: SimpleStreamOptions["reasoning"]): "high" | "max" {
|
|
604
|
+
return reasoning === "xhigh" ? "max" : "high";
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
function buildThinkingKwargs(
|
|
608
|
+
thinkingConfig: ThinkingConfig,
|
|
609
|
+
reasoning: SimpleStreamOptions["reasoning"],
|
|
610
|
+
): Record<string, unknown> {
|
|
611
|
+
const kwargs = { ...thinkingConfig.enableKwargs };
|
|
612
|
+
if (thinkingConfig.includeReasoningEffortInKwargs) {
|
|
613
|
+
kwargs.reasoning_effort = mapDeepSeekV4Reasoning(reasoning);
|
|
614
|
+
}
|
|
615
|
+
return kwargs;
|
|
616
|
+
}
|
|
617
|
+
|
|
458
618
|
function nimStreamSimple(
|
|
459
619
|
model: Model<Api>,
|
|
460
620
|
context: Context,
|
|
461
621
|
options?: SimpleStreamOptions,
|
|
462
622
|
): AssistantMessageEventStream {
|
|
623
|
+
// pi-coding-agent registers streamSimple globally per `api` type (not per provider).
|
|
624
|
+
// This streamer is invoked for ALL openai-completions providers (e.g. openrouter, openai),
|
|
625
|
+
// not just nvidia-nim. Pass non-NIM calls through unchanged so we don't leak the
|
|
626
|
+
// NVIDIA_NIM_API_KEY into other providers' Authorization headers.
|
|
627
|
+
if (model.provider !== PROVIDER_NAME) {
|
|
628
|
+
return streamSimpleOpenAICompletions(model as Model<"openai-completions">, context, options);
|
|
629
|
+
}
|
|
630
|
+
|
|
463
631
|
const thinkingConfig = THINKING_CONFIGS[model.id];
|
|
464
632
|
const reasoning = options?.reasoning;
|
|
465
633
|
const isThinkingEnabled = !!reasoning;
|
|
466
634
|
|
|
467
|
-
// Map
|
|
468
|
-
//
|
|
469
|
-
|
|
470
|
-
if (reasoning === "minimal") {
|
|
471
|
-
mappedReasoning = "low";
|
|
472
|
-
}
|
|
635
|
+
// Map provider-agnostic pi levels to NIM's accepted top-level values.
|
|
636
|
+
// Model-specific chat_template_kwargs may apply a different mapping below.
|
|
637
|
+
const mappedReasoning = mapNimTopLevelReasoning(reasoning);
|
|
473
638
|
|
|
474
639
|
// For models that have a thinking config: we handle thinking via chat_template_kwargs.
|
|
475
640
|
// Suppress reasoning_effort (set reasoning to undefined) unless the model explicitly
|
|
@@ -481,16 +646,9 @@ function nimStreamSimple(
|
|
|
481
646
|
effectiveReasoning = undefined;
|
|
482
647
|
}
|
|
483
648
|
|
|
484
|
-
//
|
|
485
|
-
//
|
|
486
|
-
const nimApiKey =
|
|
487
|
-
if (!nimApiKey) {
|
|
488
|
-
throw new Error(
|
|
489
|
-
`NVIDIA NIM: NVIDIA_NIM_API_KEY environment variable is not set. ` +
|
|
490
|
-
`Get a free API key at https://build.nvidia.com and export it: ` +
|
|
491
|
-
`export NVIDIA_NIM_API_KEY=nvapi-...`
|
|
492
|
-
);
|
|
493
|
-
}
|
|
649
|
+
// Use pi's already-resolved provider key when available (auth.json, shell command,
|
|
650
|
+
// CLI override), and fall back to the two NVIDIA environment variable names.
|
|
651
|
+
const nimApiKey = resolveRequiredNimApiKey(options?.apiKey);
|
|
494
652
|
|
|
495
653
|
const modifiedOptions: SimpleStreamOptions = {
|
|
496
654
|
...options,
|
|
@@ -502,7 +660,7 @@ function nimStreamSimple(
|
|
|
502
660
|
if (thinkingConfig) {
|
|
503
661
|
if (isThinkingEnabled) {
|
|
504
662
|
// Inject chat_template_kwargs to enable thinking
|
|
505
|
-
p.chat_template_kwargs = thinkingConfig
|
|
663
|
+
p.chat_template_kwargs = buildThinkingKwargs(thinkingConfig, reasoning);
|
|
506
664
|
} else if (thinkingConfig.disableKwargs) {
|
|
507
665
|
// Explicitly disable thinking (some models think by default, e.g. GLM-5/4.7)
|
|
508
666
|
p.chat_template_kwargs = thinkingConfig.disableKwargs;
|
|
@@ -532,7 +690,7 @@ function nimStreamSimple(
|
|
|
532
690
|
}
|
|
533
691
|
|
|
534
692
|
// Chain to original onPayload if present
|
|
535
|
-
options?.onPayload?.(params);
|
|
693
|
+
return options?.onPayload?.(params, model);
|
|
536
694
|
},
|
|
537
695
|
};
|
|
538
696
|
|
|
@@ -612,7 +770,34 @@ interface NimApiModel {
|
|
|
612
770
|
owned_by: string;
|
|
613
771
|
}
|
|
614
772
|
|
|
615
|
-
|
|
773
|
+
type NimModelFetchResult =
|
|
774
|
+
| { ok: true; modelIds: string[] }
|
|
775
|
+
| { ok: false; reason: "auth" | "transient" | "invalid" | "network" | "other" };
|
|
776
|
+
|
|
777
|
+
const NIM_DISCOVERY_CREDENTIAL_WARNING =
|
|
778
|
+
"NVIDIA NIM model discovery skipped: check your nvidia-nim credentials.";
|
|
779
|
+
|
|
780
|
+
function sanitizeNimLogMessage(message: string): string {
|
|
781
|
+
return message.replace(/nvapi-[A-Za-z0-9._-]+/g, "nvapi-[REDACTED]");
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
function notifyNimDiscoveryCredentialWarning(ctx: any): void {
|
|
785
|
+
ctx?.ui?.notify?.(NIM_DISCOVERY_CREDENTIAL_WARNING, "warning");
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
async function resolveNimDiscoveryApiKey(ctx: any): Promise<string | undefined> {
|
|
789
|
+
try {
|
|
790
|
+
const apiKey = await ctx?.modelRegistry?.getApiKeyForProvider?.(PROVIDER_NAME);
|
|
791
|
+
return resolveNimApiKey(apiKey, ctx?.modelRegistry?.authStorage);
|
|
792
|
+
} catch (error) {
|
|
793
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
794
|
+
console.warn(`pi-nvidia-nim: ${sanitizeNimLogMessage(message)}`);
|
|
795
|
+
notifyNimDiscoveryCredentialWarning(ctx);
|
|
796
|
+
return undefined;
|
|
797
|
+
}
|
|
798
|
+
}
|
|
799
|
+
|
|
800
|
+
async function fetchNimModels(apiKey: string): Promise<NimModelFetchResult> {
|
|
616
801
|
try {
|
|
617
802
|
const response = await fetch(`${NVIDIA_NIM_BASE_URL}/models`, {
|
|
618
803
|
headers: {
|
|
@@ -622,12 +807,25 @@ async function fetchNimModels(apiKey: string): Promise<string[]> {
|
|
|
622
807
|
signal: AbortSignal.timeout(10000),
|
|
623
808
|
});
|
|
624
809
|
|
|
625
|
-
if (
|
|
810
|
+
if (response.status === 401 || response.status === 403) {
|
|
811
|
+
return { ok: false, reason: "auth" };
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
if (response.status === 429 || response.status >= 500) {
|
|
815
|
+
return { ok: false, reason: "transient" };
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
if (!response.ok) return { ok: false, reason: "other" };
|
|
819
|
+
|
|
820
|
+
const data = (await response.json()) as { data?: NimApiModel[] };
|
|
821
|
+
if (!Array.isArray(data.data)) return { ok: false, reason: "invalid" };
|
|
626
822
|
|
|
627
|
-
|
|
628
|
-
|
|
823
|
+
return {
|
|
824
|
+
ok: true,
|
|
825
|
+
modelIds: data.data.map((m) => m.id).filter((id): id is string => typeof id === "string" && id.length > 0),
|
|
826
|
+
};
|
|
629
827
|
} catch {
|
|
630
|
-
return
|
|
828
|
+
return { ok: false, reason: "network" };
|
|
631
829
|
}
|
|
632
830
|
}
|
|
633
831
|
|
|
@@ -636,27 +834,12 @@ async function fetchNimModels(apiKey: string): Promise<string[]> {
|
|
|
636
834
|
// =============================================================================
|
|
637
835
|
|
|
638
836
|
export default function (pi: ExtensionAPI) {
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
//
|
|
642
|
-
//
|
|
643
|
-
//
|
|
644
|
-
//
|
|
645
|
-
// never intended to use NVIDIA NIM. Skipping registration leaves the
|
|
646
|
-
// extension dormant: no NIM models appear in the picker, no provider is
|
|
647
|
-
// available to route to, and other providers (OpenAI, Anthropic, custom
|
|
648
|
-
// OpenAI-compatible endpoints in models.json, etc.) work normally.
|
|
649
|
-
//
|
|
650
|
-
// Re-enable by exporting the key:
|
|
651
|
-
// export NVIDIA_NIM_API_KEY=nvapi-...
|
|
652
|
-
if (!process.env[NVIDIA_NIM_API_KEY_ENV]) {
|
|
653
|
-
// One-line, stderr, only on first load. Not fatal.
|
|
654
|
-
console.error(
|
|
655
|
-
`pi-nvidia-nim: ${NVIDIA_NIM_API_KEY_ENV} not set — extension dormant. ` +
|
|
656
|
-
`Set the key (https://build.nvidia.com) and reload pi to enable NVIDIA NIM models.`
|
|
657
|
-
);
|
|
658
|
-
return;
|
|
659
|
-
}
|
|
837
|
+
const providerApiKeyConfig = getNimApiKeyEnv() ?? NVIDIA_NIM_API_KEY_ENV;
|
|
838
|
+
|
|
839
|
+
// Always register the curated model list. The request path resolves credentials
|
|
840
|
+
// through pi first (CLI override, auth.json, shell command), then falls back to
|
|
841
|
+
// NVIDIA_NIM_API_KEY/NVIDIA_API_KEY. This keeps models available even when pi
|
|
842
|
+
// was launched by a shell that did not source ~/.bashrc or ~/.zshrc.
|
|
660
843
|
|
|
661
844
|
// Build the curated model list
|
|
662
845
|
const modelMap = new Map<string, NimModelEntry>();
|
|
@@ -672,7 +855,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
672
855
|
|
|
673
856
|
pi.registerProvider(PROVIDER_NAME, {
|
|
674
857
|
baseUrl: NVIDIA_NIM_BASE_URL,
|
|
675
|
-
apiKey:
|
|
858
|
+
apiKey: providerApiKeyConfig,
|
|
676
859
|
api: "openai-completions",
|
|
677
860
|
authHeader: true,
|
|
678
861
|
models: curatedModels,
|
|
@@ -681,11 +864,19 @@ export default function (pi: ExtensionAPI) {
|
|
|
681
864
|
|
|
682
865
|
// On session start, discover additional models from the API
|
|
683
866
|
pi.on("session_start", async (_event: any, ctx: any) => {
|
|
684
|
-
const apiKey =
|
|
685
|
-
if (!apiKey) return;
|
|
867
|
+
const apiKey = await resolveNimDiscoveryApiKey(ctx);
|
|
868
|
+
if (!apiKey) return;
|
|
686
869
|
|
|
687
870
|
// Fetch live model list
|
|
688
|
-
const
|
|
871
|
+
const fetchResult = await fetchNimModels(apiKey);
|
|
872
|
+
if (!fetchResult.ok) {
|
|
873
|
+
if (fetchResult.reason === "auth") {
|
|
874
|
+
notifyNimDiscoveryCredentialWarning(ctx);
|
|
875
|
+
}
|
|
876
|
+
return;
|
|
877
|
+
}
|
|
878
|
+
|
|
879
|
+
const liveModelIds = fetchResult.modelIds;
|
|
689
880
|
if (liveModelIds.length === 0) return;
|
|
690
881
|
|
|
691
882
|
let newModelsAdded = 0;
|
|
@@ -706,7 +897,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
706
897
|
const allModels = Array.from(modelMap.values());
|
|
707
898
|
ctx.modelRegistry.registerProvider(PROVIDER_NAME, {
|
|
708
899
|
baseUrl: NVIDIA_NIM_BASE_URL,
|
|
709
|
-
apiKey: NVIDIA_NIM_API_KEY_ENV,
|
|
900
|
+
apiKey: getNimApiKeyEnv() ?? NVIDIA_NIM_API_KEY_ENV,
|
|
710
901
|
api: "openai-completions",
|
|
711
902
|
authHeader: true,
|
|
712
903
|
models: allModels,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-nvidia-nim",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.20",
|
|
4
4
|
"description": "NVIDIA NIM API provider extension for pi coding agent — access 100+ models from build.nvidia.com",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"files": [
|
|
@@ -19,7 +19,8 @@
|
|
|
19
19
|
"scripts": {
|
|
20
20
|
"clean": "echo 'nothing to clean'",
|
|
21
21
|
"build": "echo 'nothing to build'",
|
|
22
|
-
"check": "
|
|
22
|
+
"check": "tsc --noEmit",
|
|
23
|
+
"test": "node --test test/*.test.mjs"
|
|
23
24
|
},
|
|
24
25
|
"devDependencies": {
|
|
25
26
|
"@mariozechner/pi-coding-agent": "^0.52.9",
|