pi-nvidia-nim 1.1.15 → 1.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +6 -5
  2. package/index.ts +15 -5
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # pi-nvidia-nim
2
2
 
3
- NVIDIA NIM API provider extension for [pi coding agent](https://github.com/badlogic/pi-mono) - access 100+ models from [build.nvidia.com](https://build.nvidia.com) including DeepSeek V3.2, Kimi K2.5, MiniMax M2.1, GLM-4.7, Qwen3, Llama 4, and many more.
3
+ NVIDIA NIM API provider extension for [pi coding agent](https://github.com/badlogic/pi-mono) - access 100+ models from [build.nvidia.com](https://build.nvidia.com) including DeepSeek V3.2, Kimi K2.5, MiniMax M2.1, GLM-5, GLM-4.7, Qwen3, Llama 4, and many more.
4
4
 
5
5
  https://github.com/user-attachments/assets/f44773e4-9bf8-4bb5-a9c0-d5938030701c
6
6
 
@@ -73,10 +73,10 @@ When you change the thinking level in pi (`Shift+Tab` to cycle), the extension:
73
73
  1. **Maps `"minimal"` → `"low"`** - NIM only accepts `low`, `medium`, `high` (not `minimal`). Selecting "minimal" in pi works fine; it's silently mapped.
74
74
  2. **Injects `chat_template_kwargs`** per model to actually enable thinking:
75
75
  - DeepSeek V3.x, R1 distills: `{ thinking: true }`
76
- - GLM-4.7: `{ enable_thinking: true, clear_thinking: false }`
76
+ - GLM-5, GLM-4.7: `{ enable_thinking: true, clear_thinking: false }`
77
77
  - Kimi K2.5, K2-thinking: `{ thinking: true }`
78
78
  - Qwen3, QwQ: `{ enable_thinking: true }`
79
- 3. **Explicitly disables thinking** when the level is "off" for models that think by default (e.g., GLM-4.7).
79
+ 3. **Explicitly disables thinking** when the level is "off" for models that think by default (e.g., GLM-5, GLM-4.7).
80
80
  4. **Uses `system` role** instead of `developer` for all NIM models - the `developer` role combined with `chat_template_kwargs` causes 500 errors on NIM.
81
81
 
82
82
  ### Supported thinking levels
@@ -91,7 +91,7 @@ When you change the thinking level in pi (`Shift+Tab` to cycle), the extension:
91
91
 
92
92
  ## Available Models
93
93
 
94
- The extension ships with curated metadata for 38 featured models. At startup, it also queries the NVIDIA NIM API to discover additional models automatically.
94
+ The extension ships with curated metadata for 39 featured models. At startup, it also queries the NVIDIA NIM API to discover additional models automatically.
95
95
 
96
96
  ### Featured Models
97
97
 
@@ -102,6 +102,7 @@ The extension ships with curated metadata for 38 featured models. At startup, it
102
102
  | `moonshotai/kimi-k2.5` | ✅ | | 256K |
103
103
  | `moonshotai/kimi-k2-thinking` | ✅ | | 128K |
104
104
  | `minimaxai/minimax-m2.1` | | | 1M |
105
+ | `z-ai/glm5` | ✅ | | 128K |
105
106
  | `z-ai/glm4.7` | ✅ | | 128K |
106
107
  | `openai/gpt-oss-120b` | | | 128K |
107
108
  | `qwen/qwen3-coder-480b-a35b-instruct` | ✅ | | 256K |
@@ -120,7 +121,7 @@ The extension ships with curated metadata for 38 featured models. At startup, it
120
121
 
121
122
  ### Tool Calling
122
123
 
123
- All major models support OpenAI-compatible tool calling. Tested and confirmed working with DeepSeek V3.2, GLM-4.7, Qwen3, Kimi K2.5, and others.
124
+ All major models support OpenAI-compatible tool calling. Tested and confirmed working with DeepSeek V3.2, GLM-5, GLM-4.7, Qwen3, Kimi K2.5, and others.
124
125
 
125
126
  ## How It Works
126
127
 
package/index.ts CHANGED
@@ -22,14 +22,14 @@
22
22
  * parameters:
23
23
  *
24
24
  * - DeepSeek V3.x: `chat_template_kwargs: { thinking: true }`
25
- * - GLM-4.7: `chat_template_kwargs: { enable_thinking: true, clear_thinking: false }`
25
+ * - GLM-5/4.7: `chat_template_kwargs: { enable_thinking: true, clear_thinking: false }`
26
26
  * - Kimi K2.5: `chat_template_kwargs: { thinking: true }` (also accepts reasoning_effort)
27
27
  * - Qwen3: `chat_template_kwargs: { enable_thinking: true }`
28
28
  *
29
29
  * NIM only accepts `reasoning_effort` values of "low", "medium", "high" - NOT
30
30
  * "minimal". The extension maps pi's "minimal" level to "low" automatically.
31
31
  *
32
- * Some models (e.g., GLM-4.7) always produce reasoning output regardless of
32
+ * Some models (e.g., GLM-5, GLM-4.7) always produce reasoning output regardless of
33
33
  * thinking settings.
34
34
  */
35
35
 
@@ -60,7 +60,7 @@ const PROVIDER_NAME = "nvidia-nim";
60
60
  * When a user enables thinking in pi (any level > off), we inject these kwargs
61
61
  * into the request body. Models not listed here either:
62
62
  * - Don't support thinking (non-reasoning models)
63
- * - Always think regardless (GLM-4.7 without explicit kwargs)
63
+ * - Always think regardless (GLM models without explicit kwargs)
64
64
  * - Work with standard reasoning_effort (rare on NIM)
65
65
  */
66
66
  interface ThinkingConfig {
@@ -102,11 +102,15 @@ const THINKING_CONFIGS: Record<string, ThinkingConfig> = {
102
102
  enableKwargs: { thinking: true },
103
103
  disableKwargs: { thinking: false },
104
104
  },
105
- // GLM-4.7 always thinks by default, but can be controlled
105
+ // GLM models (Z-AI) - think by default, but can be controlled
106
106
  "z-ai/glm4.7": {
107
107
  enableKwargs: { enable_thinking: true, clear_thinking: false },
108
108
  disableKwargs: { enable_thinking: false },
109
109
  },
110
+ "z-ai/glm5": {
111
+ enableKwargs: { enable_thinking: true, clear_thinking: false },
112
+ disableKwargs: { enable_thinking: false },
113
+ },
110
114
  // Kimi models: chat_template_kwargs works, reasoning_effort also works
111
115
  "moonshotai/kimi-k2.5": {
112
116
  enableKwargs: { thinking: true },
@@ -245,6 +249,7 @@ const CONTEXT_WINDOWS: Record<string, number> = {
245
249
  // MiniMax
246
250
  "minimaxai/minimax-m2": 1048576,
247
251
  "minimaxai/minimax-m2.1": 1048576,
252
+ "minimaxai/minimax-m2.7": 204800,
248
253
  // Meta Llama
249
254
  "meta/llama-3.1-405b-instruct": 131072,
250
255
  "meta/llama-3.1-70b-instruct": 131072,
@@ -320,6 +325,7 @@ const CONTEXT_WINDOWS: Record<string, number> = {
320
325
  "openai/gpt-oss-20b": 131072,
321
326
  // Z-AI / GLM
322
327
  "z-ai/glm4.7": 131072,
328
+ "z-ai/glm5": 131072,
323
329
  // StepFun
324
330
  "stepfun-ai/step-3.5-flash": 131072,
325
331
  // ByteDance
@@ -368,9 +374,11 @@ const MAX_TOKENS: Record<string, number> = {
368
374
  "moonshotai/kimi-k2-thinking": 16384,
369
375
  "minimaxai/minimax-m2": 8192,
370
376
  "minimaxai/minimax-m2.1": 8192,
377
+ "minimaxai/minimax-m2.7": 8192,
371
378
  "meta/llama-4-maverick-17b-128e-instruct": 16384,
372
379
  "meta/llama-4-scout-17b-16e-instruct": 16384,
373
380
  "z-ai/glm4.7": 16384,
381
+ "z-ai/glm5": 16384,
374
382
  "qwen/qwen3-coder-480b-a35b-instruct": 65536,
375
383
  "nvidia/llama-3.1-nemotron-ultra-253b-v1": 32768,
376
384
  "openai/gpt-oss-120b": 16384,
@@ -394,6 +402,8 @@ const FEATURED_MODELS = [
394
402
  "moonshotai/kimi-k2-instruct-0905",
395
403
  "minimaxai/minimax-m2.1",
396
404
  "minimaxai/minimax-m2",
405
+ "minimaxai/minimax-m2.7",
406
+ "z-ai/glm5",
397
407
  "z-ai/glm4.7",
398
408
  "openai/gpt-oss-120b",
399
409
  "openai/gpt-oss-20b",
@@ -494,7 +504,7 @@ function nimStreamSimple(
494
504
  // Inject chat_template_kwargs to enable thinking
495
505
  p.chat_template_kwargs = thinkingConfig.enableKwargs;
496
506
  } else if (thinkingConfig.disableKwargs) {
497
- // Explicitly disable thinking (some models think by default, e.g. GLM-4.7)
507
+ // Explicitly disable thinking (some models think by default, e.g. GLM-5/4.7)
498
508
  p.chat_template_kwargs = thinkingConfig.disableKwargs;
499
509
  }
500
510
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-nvidia-nim",
3
- "version": "1.1.15",
3
+ "version": "1.1.17",
4
4
  "description": "NVIDIA NIM API provider extension for pi coding agent — access 100+ models from build.nvidia.com",
5
5
  "type": "module",
6
6
  "files": [