npm - pi-nvidia-nim - Versions diffs - 1.1.15 → 1.1.17 - Mend

pi-nvidia-nim 1.1.15 → 1.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # pi-nvidia-nim
-NVIDIA NIM API provider extension for [pi coding agent](https://github.com/badlogic/pi-mono) - access 100+ models from [build.nvidia.com](https://build.nvidia.com) including DeepSeek V3.2, Kimi K2.5, MiniMax M2.1, GLM-4.7, Qwen3, Llama 4, and many more.
+NVIDIA NIM API provider extension for [pi coding agent](https://github.com/badlogic/pi-mono) - access 100+ models from [build.nvidia.com](https://build.nvidia.com) including DeepSeek V3.2, Kimi K2.5, MiniMax M2.1, GLM-5, GLM-4.7, Qwen3, Llama 4, and many more.
 https://github.com/user-attachments/assets/f44773e4-9bf8-4bb5-a9c0-d5938030701c
@@ -73,10 +73,10 @@ When you change the thinking level in pi (`Shift+Tab` to cycle), the extension:
 1. **Maps `"minimal"` → `"low"`** - NIM only accepts `low`, `medium`, `high` (not `minimal`). Selecting "minimal" in pi works fine; it's silently mapped.
 2. **Injects `chat_template_kwargs`** per model to actually enable thinking:
    - DeepSeek V3.x, R1 distills: `{ thinking: true }`
-   - GLM-4.7: `{ enable_thinking: true, clear_thinking: false }`
+   - GLM-5, GLM-4.7: `{ enable_thinking: true, clear_thinking: false }`
    - Kimi K2.5, K2-thinking: `{ thinking: true }`
    - Qwen3, QwQ: `{ enable_thinking: true }`
-3. **Explicitly disables thinking** when the level is "off" for models that think by default (e.g., GLM-4.7).
+3. **Explicitly disables thinking** when the level is "off" for models that think by default (e.g., GLM-5, GLM-4.7).
 4. **Uses `system` role** instead of `developer` for all NIM models - the `developer` role combined with `chat_template_kwargs` causes 500 errors on NIM.
 ### Supported thinking levels
@@ -91,7 +91,7 @@ When you change the thinking level in pi (`Shift+Tab` to cycle), the extension:
 ## Available Models
-The extension ships with curated metadata for 38 featured models. At startup, it also queries the NVIDIA NIM API to discover additional models automatically.
+The extension ships with curated metadata for 39 featured models. At startup, it also queries the NVIDIA NIM API to discover additional models automatically.
 ### Featured Models
@@ -102,6 +102,7 @@ The extension ships with curated metadata for 38 featured models. At startup, it
 | `moonshotai/kimi-k2.5` | ✅ | | 256K |
 | `moonshotai/kimi-k2-thinking` | ✅ | | 128K |
 | `minimaxai/minimax-m2.1` | | | 1M |
+| `z-ai/glm5` | ✅ | | 128K |
 | `z-ai/glm4.7` | ✅ | | 128K |
 | `openai/gpt-oss-120b` | | | 128K |
 | `qwen/qwen3-coder-480b-a35b-instruct` | ✅ | | 256K |
@@ -120,7 +121,7 @@ The extension ships with curated metadata for 38 featured models. At startup, it
 ### Tool Calling
-All major models support OpenAI-compatible tool calling. Tested and confirmed working with DeepSeek V3.2, GLM-4.7, Qwen3, Kimi K2.5, and others.
+All major models support OpenAI-compatible tool calling. Tested and confirmed working with DeepSeek V3.2, GLM-5, GLM-4.7, Qwen3, Kimi K2.5, and others.
 ## How It Works

package/index.ts CHANGED Viewed

@@ -22,14 +22,14 @@
  * parameters:
  *
  * - DeepSeek V3.x: `chat_template_kwargs: { thinking: true }`
- * - GLM-4.7:       `chat_template_kwargs: { enable_thinking: true, clear_thinking: false }`
+ * - GLM-5/4.7:     `chat_template_kwargs: { enable_thinking: true, clear_thinking: false }`
  * - Kimi K2.5:     `chat_template_kwargs: { thinking: true }` (also accepts reasoning_effort)
  * - Qwen3:         `chat_template_kwargs: { enable_thinking: true }`
  *
  * NIM only accepts `reasoning_effort` values of "low", "medium", "high" - NOT
  * "minimal". The extension maps pi's "minimal" level to "low" automatically.
  *
- * Some models (e.g., GLM-4.7) always produce reasoning output regardless of
+ * Some models (e.g., GLM-5, GLM-4.7) always produce reasoning output regardless of
  * thinking settings.
  */
@@ -60,7 +60,7 @@ const PROVIDER_NAME = "nvidia-nim";
  * When a user enables thinking in pi (any level > off), we inject these kwargs
  * into the request body. Models not listed here either:
  * - Don't support thinking (non-reasoning models)
- * - Always think regardless (GLM-4.7 without explicit kwargs)
+ * - Always think regardless (GLM models without explicit kwargs)
  * - Work with standard reasoning_effort (rare on NIM)
  */
 interface ThinkingConfig {
@@ -102,11 +102,15 @@ const THINKING_CONFIGS: Record<string, ThinkingConfig> = {
 		enableKwargs: { thinking: true },
 		disableKwargs: { thinking: false },
 	},
-	// GLM-4.7 always thinks by default, but can be controlled
+	// GLM models (Z-AI) - think by default, but can be controlled
 	"z-ai/glm4.7": {
 		enableKwargs: { enable_thinking: true, clear_thinking: false },
 		disableKwargs: { enable_thinking: false },
 	},
+	"z-ai/glm5": {
+		enableKwargs: { enable_thinking: true, clear_thinking: false },
+		disableKwargs: { enable_thinking: false },
+	},
 	// Kimi models: chat_template_kwargs works, reasoning_effort also works
 	"moonshotai/kimi-k2.5": {
 		enableKwargs: { thinking: true },
@@ -245,6 +249,7 @@ const CONTEXT_WINDOWS: Record<string, number> = {
 	// MiniMax
 	"minimaxai/minimax-m2": 1048576,
 	"minimaxai/minimax-m2.1": 1048576,
+	"minimaxai/minimax-m2.7": 204800,
 	// Meta Llama
 	"meta/llama-3.1-405b-instruct": 131072,
 	"meta/llama-3.1-70b-instruct": 131072,
@@ -320,6 +325,7 @@ const CONTEXT_WINDOWS: Record<string, number> = {
 	"openai/gpt-oss-20b": 131072,
 	// Z-AI / GLM
 	"z-ai/glm4.7": 131072,
+	"z-ai/glm5": 131072,
 	// StepFun
 	"stepfun-ai/step-3.5-flash": 131072,
 	// ByteDance
@@ -368,9 +374,11 @@ const MAX_TOKENS: Record<string, number> = {
 	"moonshotai/kimi-k2-thinking": 16384,
 	"minimaxai/minimax-m2": 8192,
 	"minimaxai/minimax-m2.1": 8192,
+	"minimaxai/minimax-m2.7": 8192,
 	"meta/llama-4-maverick-17b-128e-instruct": 16384,
 	"meta/llama-4-scout-17b-16e-instruct": 16384,
 	"z-ai/glm4.7": 16384,
+	"z-ai/glm5": 16384,
 	"qwen/qwen3-coder-480b-a35b-instruct": 65536,
 	"nvidia/llama-3.1-nemotron-ultra-253b-v1": 32768,
 	"openai/gpt-oss-120b": 16384,
@@ -394,6 +402,8 @@ const FEATURED_MODELS = [
 	"moonshotai/kimi-k2-instruct-0905",
 	"minimaxai/minimax-m2.1",
 	"minimaxai/minimax-m2",
+	"minimaxai/minimax-m2.7",
+	"z-ai/glm5",
 	"z-ai/glm4.7",
 	"openai/gpt-oss-120b",
 	"openai/gpt-oss-20b",
@@ -494,7 +504,7 @@ function nimStreamSimple(
 					// Inject chat_template_kwargs to enable thinking
 					p.chat_template_kwargs = thinkingConfig.enableKwargs;
 				} else if (thinkingConfig.disableKwargs) {
-					// Explicitly disable thinking (some models think by default, e.g. GLM-4.7)
+					// Explicitly disable thinking (some models think by default, e.g. GLM-5/4.7)
 					p.chat_template_kwargs = thinkingConfig.disableKwargs;
 				}
 			}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-nvidia-nim",
-  "version": "1.1.15",
+  "version": "1.1.17",
   "description": "NVIDIA NIM API provider extension for pi coding agent — access 100+ models from build.nvidia.com",
   "type": "module",
   "files": [