pi-nvidia-nim 1.1.15 → 1.1.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -5
- package/index.ts +15 -5
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# pi-nvidia-nim
|
|
2
2
|
|
|
3
|
-
NVIDIA NIM API provider extension for [pi coding agent](https://github.com/badlogic/pi-mono) - access 100+ models from [build.nvidia.com](https://build.nvidia.com) including DeepSeek V3.2, Kimi K2.5, MiniMax M2.1, GLM-4.7, Qwen3, Llama 4, and many more.
|
|
3
|
+
NVIDIA NIM API provider extension for [pi coding agent](https://github.com/badlogic/pi-mono) - access 100+ models from [build.nvidia.com](https://build.nvidia.com) including DeepSeek V3.2, Kimi K2.5, MiniMax M2.1, GLM-5, GLM-4.7, Qwen3, Llama 4, and many more.
|
|
4
4
|
|
|
5
5
|
https://github.com/user-attachments/assets/f44773e4-9bf8-4bb5-a9c0-d5938030701c
|
|
6
6
|
|
|
@@ -73,10 +73,10 @@ When you change the thinking level in pi (`Shift+Tab` to cycle), the extension:
|
|
|
73
73
|
1. **Maps `"minimal"` → `"low"`** - NIM only accepts `low`, `medium`, `high` (not `minimal`). Selecting "minimal" in pi works fine; it's silently mapped.
|
|
74
74
|
2. **Injects `chat_template_kwargs`** per model to actually enable thinking:
|
|
75
75
|
- DeepSeek V3.x, R1 distills: `{ thinking: true }`
|
|
76
|
-
- GLM-4.7: `{ enable_thinking: true, clear_thinking: false }`
|
|
76
|
+
- GLM-5, GLM-4.7: `{ enable_thinking: true, clear_thinking: false }`
|
|
77
77
|
- Kimi K2.5, K2-thinking: `{ thinking: true }`
|
|
78
78
|
- Qwen3, QwQ: `{ enable_thinking: true }`
|
|
79
|
-
3. **Explicitly disables thinking** when the level is "off" for models that think by default (e.g., GLM-4.7).
|
|
79
|
+
3. **Explicitly disables thinking** when the level is "off" for models that think by default (e.g., GLM-5, GLM-4.7).
|
|
80
80
|
4. **Uses `system` role** instead of `developer` for all NIM models - the `developer` role combined with `chat_template_kwargs` causes 500 errors on NIM.
|
|
81
81
|
|
|
82
82
|
### Supported thinking levels
|
|
@@ -91,7 +91,7 @@ When you change the thinking level in pi (`Shift+Tab` to cycle), the extension:
|
|
|
91
91
|
|
|
92
92
|
## Available Models
|
|
93
93
|
|
|
94
|
-
The extension ships with curated metadata for
|
|
94
|
+
The extension ships with curated metadata for 39 featured models. At startup, it also queries the NVIDIA NIM API to discover additional models automatically.
|
|
95
95
|
|
|
96
96
|
### Featured Models
|
|
97
97
|
|
|
@@ -102,6 +102,7 @@ The extension ships with curated metadata for 38 featured models. At startup, it
|
|
|
102
102
|
| `moonshotai/kimi-k2.5` | ✅ | | 256K |
|
|
103
103
|
| `moonshotai/kimi-k2-thinking` | ✅ | | 128K |
|
|
104
104
|
| `minimaxai/minimax-m2.1` | | | 1M |
|
|
105
|
+
| `z-ai/glm5` | ✅ | | 128K |
|
|
105
106
|
| `z-ai/glm4.7` | ✅ | | 128K |
|
|
106
107
|
| `openai/gpt-oss-120b` | | | 128K |
|
|
107
108
|
| `qwen/qwen3-coder-480b-a35b-instruct` | ✅ | | 256K |
|
|
@@ -120,7 +121,7 @@ The extension ships with curated metadata for 38 featured models. At startup, it
|
|
|
120
121
|
|
|
121
122
|
### Tool Calling
|
|
122
123
|
|
|
123
|
-
All major models support OpenAI-compatible tool calling. Tested and confirmed working with DeepSeek V3.2, GLM-4.7, Qwen3, Kimi K2.5, and others.
|
|
124
|
+
All major models support OpenAI-compatible tool calling. Tested and confirmed working with DeepSeek V3.2, GLM-5, GLM-4.7, Qwen3, Kimi K2.5, and others.
|
|
124
125
|
|
|
125
126
|
## How It Works
|
|
126
127
|
|
package/index.ts
CHANGED
|
@@ -22,14 +22,14 @@
|
|
|
22
22
|
* parameters:
|
|
23
23
|
*
|
|
24
24
|
* - DeepSeek V3.x: `chat_template_kwargs: { thinking: true }`
|
|
25
|
-
* - GLM-4.7:
|
|
25
|
+
* - GLM-5/4.7: `chat_template_kwargs: { enable_thinking: true, clear_thinking: false }`
|
|
26
26
|
* - Kimi K2.5: `chat_template_kwargs: { thinking: true }` (also accepts reasoning_effort)
|
|
27
27
|
* - Qwen3: `chat_template_kwargs: { enable_thinking: true }`
|
|
28
28
|
*
|
|
29
29
|
* NIM only accepts `reasoning_effort` values of "low", "medium", "high" - NOT
|
|
30
30
|
* "minimal". The extension maps pi's "minimal" level to "low" automatically.
|
|
31
31
|
*
|
|
32
|
-
* Some models (e.g., GLM-4.7) always produce reasoning output regardless of
|
|
32
|
+
* Some models (e.g., GLM-5, GLM-4.7) always produce reasoning output regardless of
|
|
33
33
|
* thinking settings.
|
|
34
34
|
*/
|
|
35
35
|
|
|
@@ -60,7 +60,7 @@ const PROVIDER_NAME = "nvidia-nim";
|
|
|
60
60
|
* When a user enables thinking in pi (any level > off), we inject these kwargs
|
|
61
61
|
* into the request body. Models not listed here either:
|
|
62
62
|
* - Don't support thinking (non-reasoning models)
|
|
63
|
-
* - Always think regardless (GLM
|
|
63
|
+
* - Always think regardless (GLM models without explicit kwargs)
|
|
64
64
|
* - Work with standard reasoning_effort (rare on NIM)
|
|
65
65
|
*/
|
|
66
66
|
interface ThinkingConfig {
|
|
@@ -102,11 +102,15 @@ const THINKING_CONFIGS: Record<string, ThinkingConfig> = {
|
|
|
102
102
|
enableKwargs: { thinking: true },
|
|
103
103
|
disableKwargs: { thinking: false },
|
|
104
104
|
},
|
|
105
|
-
// GLM-
|
|
105
|
+
// GLM models (Z-AI) - think by default, but can be controlled
|
|
106
106
|
"z-ai/glm4.7": {
|
|
107
107
|
enableKwargs: { enable_thinking: true, clear_thinking: false },
|
|
108
108
|
disableKwargs: { enable_thinking: false },
|
|
109
109
|
},
|
|
110
|
+
"z-ai/glm5": {
|
|
111
|
+
enableKwargs: { enable_thinking: true, clear_thinking: false },
|
|
112
|
+
disableKwargs: { enable_thinking: false },
|
|
113
|
+
},
|
|
110
114
|
// Kimi models: chat_template_kwargs works, reasoning_effort also works
|
|
111
115
|
"moonshotai/kimi-k2.5": {
|
|
112
116
|
enableKwargs: { thinking: true },
|
|
@@ -245,6 +249,7 @@ const CONTEXT_WINDOWS: Record<string, number> = {
|
|
|
245
249
|
// MiniMax
|
|
246
250
|
"minimaxai/minimax-m2": 1048576,
|
|
247
251
|
"minimaxai/minimax-m2.1": 1048576,
|
|
252
|
+
"minimaxai/minimax-m2.7": 204800,
|
|
248
253
|
// Meta Llama
|
|
249
254
|
"meta/llama-3.1-405b-instruct": 131072,
|
|
250
255
|
"meta/llama-3.1-70b-instruct": 131072,
|
|
@@ -320,6 +325,7 @@ const CONTEXT_WINDOWS: Record<string, number> = {
|
|
|
320
325
|
"openai/gpt-oss-20b": 131072,
|
|
321
326
|
// Z-AI / GLM
|
|
322
327
|
"z-ai/glm4.7": 131072,
|
|
328
|
+
"z-ai/glm5": 131072,
|
|
323
329
|
// StepFun
|
|
324
330
|
"stepfun-ai/step-3.5-flash": 131072,
|
|
325
331
|
// ByteDance
|
|
@@ -368,9 +374,11 @@ const MAX_TOKENS: Record<string, number> = {
|
|
|
368
374
|
"moonshotai/kimi-k2-thinking": 16384,
|
|
369
375
|
"minimaxai/minimax-m2": 8192,
|
|
370
376
|
"minimaxai/minimax-m2.1": 8192,
|
|
377
|
+
"minimaxai/minimax-m2.7": 8192,
|
|
371
378
|
"meta/llama-4-maverick-17b-128e-instruct": 16384,
|
|
372
379
|
"meta/llama-4-scout-17b-16e-instruct": 16384,
|
|
373
380
|
"z-ai/glm4.7": 16384,
|
|
381
|
+
"z-ai/glm5": 16384,
|
|
374
382
|
"qwen/qwen3-coder-480b-a35b-instruct": 65536,
|
|
375
383
|
"nvidia/llama-3.1-nemotron-ultra-253b-v1": 32768,
|
|
376
384
|
"openai/gpt-oss-120b": 16384,
|
|
@@ -394,6 +402,8 @@ const FEATURED_MODELS = [
|
|
|
394
402
|
"moonshotai/kimi-k2-instruct-0905",
|
|
395
403
|
"minimaxai/minimax-m2.1",
|
|
396
404
|
"minimaxai/minimax-m2",
|
|
405
|
+
"minimaxai/minimax-m2.7",
|
|
406
|
+
"z-ai/glm5",
|
|
397
407
|
"z-ai/glm4.7",
|
|
398
408
|
"openai/gpt-oss-120b",
|
|
399
409
|
"openai/gpt-oss-20b",
|
|
@@ -494,7 +504,7 @@ function nimStreamSimple(
|
|
|
494
504
|
// Inject chat_template_kwargs to enable thinking
|
|
495
505
|
p.chat_template_kwargs = thinkingConfig.enableKwargs;
|
|
496
506
|
} else if (thinkingConfig.disableKwargs) {
|
|
497
|
-
// Explicitly disable thinking (some models think by default, e.g. GLM-4.7)
|
|
507
|
+
// Explicitly disable thinking (some models think by default, e.g. GLM-5/4.7)
|
|
498
508
|
p.chat_template_kwargs = thinkingConfig.disableKwargs;
|
|
499
509
|
}
|
|
500
510
|
}
|