npm - @estebanforge/pi-glm-tweaks - Versions diffs - 1.1.0 → 1.1.1 - Mend

@estebanforge/pi-glm-tweaks 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -49,7 +49,7 @@ GLM-5.2 overthinks on long agent loops — it can spend an entire turn on `reaso
 | --- | --- | --- |
 | `glm-budget-nudge` | `true` | (a) Appends a soft thinking-budget fragment to the system prompt on every zai/glm-5.2 turn. (b) Per LLM call, sums `reasoning_content` across prior assistant messages in the current agent loop (the one started by the most recent user prompt); if cumulative exceeds ~2000 characters (roughly 500 English tokens), injects a one-shot hint to push the model back toward tool calls. Fires at most once per loop. The hint appears in the conversation panel as a user message prefixed `[system reminder: ...]` — that is intentional, so you can see when the ratchet fired. |
 | `glm-clear-thinking` | `true` | Forces `clear_thinking: true` on every request. The coding endpoint (`api.z.ai/api/coding/paas/v4`) defaults to preserved thinking, which silently compounds `reasoning_content` across turns. At $4.4/MTok output, this is real money. |
-| `glm-quick-disable` | `true` | For user prompts under 80 chars, forces `thinking.type: "disabled"` for that turn. Trivial questions ("what time is it") don't need deep thinking. |
+| `glm-skip-short-thinking` | `true` | For user prompts under 80 chars, forces `thinking.type: "disabled"` for that turn. Trivial questions ("what time is it") don't need deep thinking. |
 All three flags surface in `pi config` and Pi's flag editor — `pi config set glm-budget-nudge false` to disable.

package/extensions/index.ts CHANGED Viewed

@@ -5,7 +5,7 @@
  * supports (off, high, max), wires the native `thinkingFormat: "zai"` wire
  * translation, auto-clamps hidden levels, and applies token-efficiency
  * hygiene (per-turn system-prompt nudge, intra-loop ratchet, wire-level
- * clear_thinking and short-prompt quick-disable).
+ * clear_thinking and skip-short-thinking).
  *
  * Wire map (see https://docs.z.ai/guides/capabilities/thinking and
  * providers/openai-completions.js in pi-ai):
@@ -36,7 +36,7 @@
  *     endpoint defaults to preserved thinking, which silently compounds
  *     `reasoning_content` across turns). `glm-clear-thinking`, default on.
  *   - On short user prompts (<80 chars), force `thinking.type: "disabled"`
- *     to save tokens on trivial turns. `glm-quick-disable`, default on.
+ *     to save tokens on trivial turns. `glm-skip-short-thinking`, default on.
  *
  * Auth is untouched. The provider's existing key (ZAI_API_KEY env, /login,
  * or models.json apiKey) continues to resolve against the new baseUrl.
@@ -77,8 +77,8 @@ const FLAGS = [
 			"Force clear_thinking=true on zai/glm-5.2 requests to prevent cross-turn reasoning_content carryover on the coding endpoint.",
 	},
 	{
-		name: "glm-quick-disable",
-		label: "Quick disable",
+		name: "glm-skip-short-thinking",
+		label: "Skip short thinking",
 		description: "Disable thinking on short user prompts (<80 chars) to save tokens on trivial turns.",
 	},
 ] as const;
@@ -240,6 +240,7 @@ export default function (pi: ExtensionAPI) {
 			const items: SettingItem[] = FLAGS.map((f) => ({
 				id: f.name,
 				label: f.label,
+				description: f.description,
 				currentValue: pi.getFlag(f.name) === true ? "on" : "off",
 				values: ["on", "off"],
 			}));
@@ -437,7 +438,7 @@ export default function (pi: ExtensionAPI) {
 			mutated = true;
 		}
-		// Short-prompt quick-disable: trivial turns ("what time is it")
+		// Short-prompt thinking-skip: trivial turns ("what time is it")
 		// don't need deep thinking. Force the kill switch and let Pi's
 		// zai branch drop the thinking.type="disabled" through.
 		//
@@ -445,7 +446,7 @@ export default function (pi: ExtensionAPI) {
 		// first: loop.shortPrompt is computed once from the initial prompt
 		// and held constant (see before_agent_start). A short prompt that
 		// spawns tool calls stays thinking-free for the whole turn.
-		if (pi.getFlag("glm-quick-disable") === true && loop.shortPrompt) {
+		if (pi.getFlag("glm-skip-short-thinking") === true && loop.shortPrompt) {
 			thinking.type = "disabled";
 			mutated = true;
 		}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@estebanforge/pi-glm-tweaks",
-  "version": "1.1.0",
+  "version": "1.1.1",
   "description": "Pi-native tweaks for Z.AI's GLM-5.2. Restricts the Pi thinking-level UI to the three modes GLM-5.2 actually supports (off, high, max), wires the native thinkingFormat:\"zai\" wire translation, and auto-clamps hidden levels when the model is selected.",
   "keywords": [
     "pi-package",