@estebanforge/pi-glm-tweaks 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/extensions/index.ts +7 -6
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -49,7 +49,7 @@ GLM-5.2 overthinks on long agent loops — it can spend an entire turn on `reaso
|
|
|
49
49
|
| --- | --- | --- |
|
|
50
50
|
| `glm-budget-nudge` | `true` | (a) Appends a soft thinking-budget fragment to the system prompt on every zai/glm-5.2 turn. (b) Per LLM call, sums `reasoning_content` across prior assistant messages in the current agent loop (the one started by the most recent user prompt); if cumulative exceeds ~2000 characters (roughly 500 English tokens), injects a one-shot hint to push the model back toward tool calls. Fires at most once per loop. The hint appears in the conversation panel as a user message prefixed `[system reminder: ...]` — that is intentional, so you can see when the ratchet fired. |
|
|
51
51
|
| `glm-clear-thinking` | `true` | Forces `clear_thinking: true` on every request. The coding endpoint (`api.z.ai/api/coding/paas/v4`) defaults to preserved thinking, which silently compounds `reasoning_content` across turns. At $4.4/MTok output, this is real money. |
|
|
52
|
-
| `glm-
|
|
52
|
+
| `glm-skip-short-thinking` | `true` | For user prompts under 80 chars, forces `thinking.type: "disabled"` for that turn. Trivial questions ("what time is it") don't need deep thinking. |
|
|
53
53
|
|
|
54
54
|
All three flags surface in `pi config` and Pi's flag editor — `pi config set glm-budget-nudge false` to disable.
|
|
55
55
|
|
package/extensions/index.ts
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* supports (off, high, max), wires the native `thinkingFormat: "zai"` wire
|
|
6
6
|
* translation, auto-clamps hidden levels, and applies token-efficiency
|
|
7
7
|
* hygiene (per-turn system-prompt nudge, intra-loop ratchet, wire-level
|
|
8
|
-
* clear_thinking and short-
|
|
8
|
+
* clear_thinking and skip-short-thinking).
|
|
9
9
|
*
|
|
10
10
|
* Wire map (see https://docs.z.ai/guides/capabilities/thinking and
|
|
11
11
|
* providers/openai-completions.js in pi-ai):
|
|
@@ -36,7 +36,7 @@
|
|
|
36
36
|
* endpoint defaults to preserved thinking, which silently compounds
|
|
37
37
|
* `reasoning_content` across turns). `glm-clear-thinking`, default on.
|
|
38
38
|
* - On short user prompts (<80 chars), force `thinking.type: "disabled"`
|
|
39
|
-
* to save tokens on trivial turns. `glm-
|
|
39
|
+
* to save tokens on trivial turns. `glm-skip-short-thinking`, default on.
|
|
40
40
|
*
|
|
41
41
|
* Auth is untouched. The provider's existing key (ZAI_API_KEY env, /login,
|
|
42
42
|
* or models.json apiKey) continues to resolve against the new baseUrl.
|
|
@@ -77,8 +77,8 @@ const FLAGS = [
|
|
|
77
77
|
"Force clear_thinking=true on zai/glm-5.2 requests to prevent cross-turn reasoning_content carryover on the coding endpoint.",
|
|
78
78
|
},
|
|
79
79
|
{
|
|
80
|
-
name: "glm-
|
|
81
|
-
label: "
|
|
80
|
+
name: "glm-skip-short-thinking",
|
|
81
|
+
label: "Skip short thinking",
|
|
82
82
|
description: "Disable thinking on short user prompts (<80 chars) to save tokens on trivial turns.",
|
|
83
83
|
},
|
|
84
84
|
] as const;
|
|
@@ -240,6 +240,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
240
240
|
const items: SettingItem[] = FLAGS.map((f) => ({
|
|
241
241
|
id: f.name,
|
|
242
242
|
label: f.label,
|
|
243
|
+
description: f.description,
|
|
243
244
|
currentValue: pi.getFlag(f.name) === true ? "on" : "off",
|
|
244
245
|
values: ["on", "off"],
|
|
245
246
|
}));
|
|
@@ -437,7 +438,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
437
438
|
mutated = true;
|
|
438
439
|
}
|
|
439
440
|
|
|
440
|
-
// Short-prompt
|
|
441
|
+
// Short-prompt thinking-skip: trivial turns ("what time is it")
|
|
441
442
|
// don't need deep thinking. Force the kill switch and let Pi's
|
|
442
443
|
// zai branch drop the thinking.type="disabled" through.
|
|
443
444
|
//
|
|
@@ -445,7 +446,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
445
446
|
// first: loop.shortPrompt is computed once from the initial prompt
|
|
446
447
|
// and held constant (see before_agent_start). A short prompt that
|
|
447
448
|
// spawns tool calls stays thinking-free for the whole turn.
|
|
448
|
-
if (pi.getFlag("glm-
|
|
449
|
+
if (pi.getFlag("glm-skip-short-thinking") === true && loop.shortPrompt) {
|
|
449
450
|
thinking.type = "disabled";
|
|
450
451
|
mutated = true;
|
|
451
452
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@estebanforge/pi-glm-tweaks",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.1",
|
|
4
4
|
"description": "Pi-native tweaks for Z.AI's GLM-5.2. Restricts the Pi thinking-level UI to the three modes GLM-5.2 actually supports (off, high, max), wires the native thinkingFormat:\"zai\" wire translation, and auto-clamps hidden levels when the model is selected.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|