@estebanforge/pi-glm-tweaks 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -49,7 +49,7 @@ GLM-5.2 overthinks on long agent loops — it can spend an entire turn on `reaso
49
49
  | --- | --- | --- |
50
50
  | `glm-budget-nudge` | `true` | (a) Appends a soft thinking-budget fragment to the system prompt on every zai/glm-5.2 turn. (b) Per LLM call, sums `reasoning_content` across prior assistant messages in the current agent loop (the one started by the most recent user prompt); if cumulative exceeds ~2000 characters (roughly 500 English tokens), injects a one-shot hint to push the model back toward tool calls. Fires at most once per loop. The hint appears in the conversation panel as a user message prefixed `[system reminder: ...]` — that is intentional, so you can see when the ratchet fired. |
51
51
  | `glm-clear-thinking` | `true` | Forces `clear_thinking: true` on every request. The coding endpoint (`api.z.ai/api/coding/paas/v4`) defaults to preserved thinking, which silently compounds `reasoning_content` across turns. At $4.4/MTok output, this is real money. |
52
- | `glm-quick-disable` | `true` | For user prompts under 80 chars, forces `thinking.type: "disabled"` for that turn. Trivial questions ("what time is it") don't need deep thinking. |
52
+ | `glm-skip-short-thinking` | `true` | For user prompts under 80 chars, forces `thinking.type: "disabled"` for that turn. Trivial questions ("what time is it") don't need deep thinking. |
53
53
 
54
54
  All three flags surface in `pi config` and Pi's flag editor — `pi config set glm-budget-nudge false` to disable.
55
55
 
@@ -5,7 +5,7 @@
5
5
  * supports (off, high, max), wires the native `thinkingFormat: "zai"` wire
6
6
  * translation, auto-clamps hidden levels, and applies token-efficiency
7
7
  * hygiene (per-turn system-prompt nudge, intra-loop ratchet, wire-level
8
- * clear_thinking and short-prompt quick-disable).
8
+ * clear_thinking and skip-short-thinking).
9
9
  *
10
10
  * Wire map (see https://docs.z.ai/guides/capabilities/thinking and
11
11
  * providers/openai-completions.js in pi-ai):
@@ -36,7 +36,7 @@
36
36
  * endpoint defaults to preserved thinking, which silently compounds
37
37
  * `reasoning_content` across turns). `glm-clear-thinking`, default on.
38
38
  * - On short user prompts (<80 chars), force `thinking.type: "disabled"`
39
- * to save tokens on trivial turns. `glm-quick-disable`, default on.
39
+ * to save tokens on trivial turns. `glm-skip-short-thinking`, default on.
40
40
  *
41
41
  * Auth is untouched. The provider's existing key (ZAI_API_KEY env, /login,
42
42
  * or models.json apiKey) continues to resolve against the new baseUrl.
@@ -77,8 +77,8 @@ const FLAGS = [
77
77
  "Force clear_thinking=true on zai/glm-5.2 requests to prevent cross-turn reasoning_content carryover on the coding endpoint.",
78
78
  },
79
79
  {
80
- name: "glm-quick-disable",
81
- label: "Quick disable",
80
+ name: "glm-skip-short-thinking",
81
+ label: "Skip short thinking",
82
82
  description: "Disable thinking on short user prompts (<80 chars) to save tokens on trivial turns.",
83
83
  },
84
84
  ] as const;
@@ -240,6 +240,7 @@ export default function (pi: ExtensionAPI) {
240
240
  const items: SettingItem[] = FLAGS.map((f) => ({
241
241
  id: f.name,
242
242
  label: f.label,
243
+ description: f.description,
243
244
  currentValue: pi.getFlag(f.name) === true ? "on" : "off",
244
245
  values: ["on", "off"],
245
246
  }));
@@ -437,7 +438,7 @@ export default function (pi: ExtensionAPI) {
437
438
  mutated = true;
438
439
  }
439
440
 
440
- // Short-prompt quick-disable: trivial turns ("what time is it")
441
+ // Short-prompt thinking-skip: trivial turns ("what time is it")
441
442
  // don't need deep thinking. Force the kill switch and let Pi's
442
443
  // zai branch drop the thinking.type="disabled" through.
443
444
  //
@@ -445,7 +446,7 @@ export default function (pi: ExtensionAPI) {
445
446
  // first: loop.shortPrompt is computed once from the initial prompt
446
447
  // and held constant (see before_agent_start). A short prompt that
447
448
  // spawns tool calls stays thinking-free for the whole turn.
448
- if (pi.getFlag("glm-quick-disable") === true && loop.shortPrompt) {
449
+ if (pi.getFlag("glm-skip-short-thinking") === true && loop.shortPrompt) {
449
450
  thinking.type = "disabled";
450
451
  mutated = true;
451
452
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@estebanforge/pi-glm-tweaks",
3
- "version": "1.1.0",
3
+ "version": "1.1.1",
4
4
  "description": "Pi-native tweaks for Z.AI's GLM-5.2. Restricts the Pi thinking-level UI to the three modes GLM-5.2 actually supports (off, high, max), wires the native thinkingFormat:\"zai\" wire translation, and auto-clamps hidden levels when the model is selected.",
5
5
  "keywords": [
6
6
  "pi-package",