@estebanforge/pi-glm-tweaks 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -37,6 +37,7 @@ This extension collapses that mismatch:
37
37
  ```
38
38
  2. **Auto-clamps on `model_select`** — if the current level is one we hid (e.g. you switched from a model that allowed `medium`), quietly bump to `high` and notify.
39
39
  3. **Footer hint** — sets `ctx.ui.setStatus("glm-thinking", "thinking: off | high | max")` while GLM-5.2 is the active model.
40
+ 4. **`/glm-tweaks` command** — status panel + flag toggle from inside Pi (see [`/glm-tweaks` command](#glm-tweaks-command)).
40
41
 
41
42
  `Shift+Tab`, `/thinking`, and the level picker all see only the three GLM-5.2 modes.
42
43
 
@@ -52,6 +53,21 @@ GLM-5.2 overthinks on long agent loops — it can spend an entire turn on `reaso
52
53
 
53
54
  All three flags surface in `pi config` and Pi's flag editor — `pi config set glm-budget-nudge false` to disable.
54
55
 
56
+ ## `/glm-tweaks` command
57
+
58
+ An in-session command for inspecting and flipping the flags above without leaving Pi.
59
+
60
+ | Invocation | Effect |
61
+ | --- | --- |
62
+ | `/glm-tweaks` (TUI) | Opens an interactive settings menu (the same `SettingsList` component `/settings` uses). Flip any combination of flags, then a single reload fires on close to apply them all. |
63
+ | `/glm-tweaks` (non-TUI / RPC) | Falls back to a read-only status panel (active model, thinking level vs the `off \| high \| max` map, and each flag's on/off state). |
64
+ | `/glm-tweaks toggle <flag>` | One-shot flip: persists, then reloads. |
65
+ | `/glm-tweaks <flag>` | Shorthand one-shot toggle (flag name without the `toggle` keyword). |
66
+
67
+ The command offers tab-completion for `toggle` and the three flag names.
68
+
69
+ **Why a reload per apply.** Pi's extension API exposes `getFlag` but no live `setFlag`, and flag values are read into memory at load time. So changes persist via `pi config set` and a `/reload` picks them up. The interactive menu stages all your flips and reloads once on close; the one-shot toggle reloads immediately. In both cases the command notifies (`Applied 2 change(s). Reloading...`) before reloading. If you'd rather avoid reload churn entirely, set flags directly in `pi config` / the flag editor and reload once at your convenience.
70
+
55
71
  ### What the tweaks cannot do
56
72
 
57
73
  - Cap thinking tokens at a wire level. Z.AI does not expose a thinking budget param.
@@ -41,7 +41,8 @@
41
41
  * Auth is untouched. The provider's existing key (ZAI_API_KEY env, /login,
42
42
  * or models.json apiKey) continues to resolve against the new baseUrl.
43
43
  */
44
- import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
44
+ import { getSettingsListTheme, type ExtensionAPI } from "@earendil-works/pi-coding-agent";
45
+ import { Container, SettingsList, Text, type SettingItem } from "@earendil-works/pi-tui";
45
46
 
46
47
  const PROVIDER = "zai";
47
48
  const MODEL_ID = "glm-5.2";
@@ -59,6 +60,29 @@ const HIDDEN_LEVELS = new Set(["minimal", "low", "medium"]);
59
60
  const SHORT_PROMPT_THRESHOLD = 80;
60
61
  const RATCHET_THRESHOLD_CHARS = 2_000;
61
62
 
63
+ // Token-efficiency flags. Single source of truth — drives registerFlag,
64
+ // the /glm-tweaks status display, autocomplete, and the toggle subcommand.
65
+ // All default on. See README for what each does at the wire level.
66
+ const FLAGS = [
67
+ {
68
+ name: "glm-budget-nudge",
69
+ label: "Budget nudge",
70
+ description:
71
+ "Inject a soft thinking-budget system-prompt fragment and intra-loop ratchet for zai/glm-5.2.",
72
+ },
73
+ {
74
+ name: "glm-clear-thinking",
75
+ label: "Clear thinking",
76
+ description:
77
+ "Force clear_thinking=true on zai/glm-5.2 requests to prevent cross-turn reasoning_content carryover on the coding endpoint.",
78
+ },
79
+ {
80
+ name: "glm-quick-disable",
81
+ label: "Quick disable",
82
+ description: "Disable thinking on short user prompts (<80 chars) to save tokens on trivial turns.",
83
+ },
84
+ ] as const;
85
+
62
86
  // Soft system-prompt fragment appended to every zai/glm-5.2 turn when
63
87
  // the budget-nudge flag is on. No "I'm overthinking" ack string — that's
64
88
  // unenforceable (model may or may not emit it, may emit it in Chinese,
@@ -108,24 +132,172 @@ function isZaiGlm52(model: { provider: string; id: string } | undefined | null):
108
132
  return !!model && model.provider === PROVIDER && model.id === MODEL_ID;
109
133
  }
110
134
 
135
+ // Build the /glm-tweaks status panel. Read-only snapshot of the active
136
+ // model, current thinking level, and the on/off state of every flag.
137
+ function renderStatus(
138
+ pi: ExtensionAPI,
139
+ model: { provider: string; id: string } | undefined,
140
+ ): string {
141
+ const active = isZaiGlm52(model);
142
+ const level = pi.getThinkingLevel();
143
+ const flagLines = FLAGS.map((f) => ` ${pi.getFlag(f.name) === true ? "[x]" : "[ ]"} ${f.name}`);
144
+ return [
145
+ `GLM-5.2 tweaks — ${active ? "ACTIVE (zai/glm-5.2 selected)" : "inactive (select zai/glm-5.2 to engage)"}`,
146
+ `thinking: ${active ? `current=${level}, wire=off|high|max` : "n/a"}`,
147
+ "",
148
+ "flags:",
149
+ ...flagLines,
150
+ "",
151
+ "toggle: /glm-tweaks toggle <flag> (shorthand: /glm-tweaks <flag>)",
152
+ "also: pi config set <flag> false",
153
+ ].join("\n");
154
+ }
155
+
111
156
  export default function (pi: ExtensionAPI) {
112
157
  // Register Pi-idiomatic flags at factory load time, NOT inside
113
158
  // session_start. registerFlag is static setup; calling it per session
114
159
  // would clobber user preferences on every /new or /reload.
115
- pi.registerFlag("glm-budget-nudge", {
116
- description: "Inject a soft thinking-budget system-prompt fragment and intra-loop ratchet for zai/glm-5.2.",
117
- type: "boolean",
118
- default: true,
119
- });
120
- pi.registerFlag("glm-clear-thinking", {
121
- description: "Force clear_thinking=true on zai/glm-5.2 requests to prevent cross-turn reasoning_content carryover on the coding endpoint.",
122
- type: "boolean",
123
- default: true,
124
- });
125
- pi.registerFlag("glm-quick-disable", {
126
- description: "Disable thinking on short user prompts (<80 chars) to save tokens on trivial turns.",
127
- type: "boolean",
128
- default: true,
160
+ for (const f of FLAGS) {
161
+ pi.registerFlag(f.name, { description: f.description, type: "boolean", default: true });
162
+ }
163
+
164
+ // /glm-tweaks — status display by default; `toggle <flag>` (or bare
165
+ // `<flag>`) flips a boolean. ExtensionAPI exposes no live setFlag, so a
166
+ // toggle persists via `pi config set` and then reloads the session so
167
+ // the in-memory flag value picks up the change. ctx is stale after
168
+ // reload() — we notify first, reload last, and return immediately.
169
+ pi.registerCommand("glm-tweaks", {
170
+ description: "GLM-5.2 tweaks: show status, or toggle a flag. Usage: /glm-tweaks [toggle <flag>]",
171
+ getArgumentCompletions: (prefix: string) => {
172
+ // Preserve trailing space: `/glm-tweaks toggle ` (with space) means
173
+ // the `toggle` token is complete and we should now suggest flags.
174
+ // Trimming would collapse it to "toggle" and re-suggest the word.
175
+ const trailingSpace = /\s$/.test(prefix);
176
+ const tokens = prefix.trim().split(/\s+/).filter(Boolean);
177
+ const flagNames = FLAGS.map((f) => f.name);
178
+ const root = ["toggle", ...flagNames];
179
+ // Suggest flag names once `toggle` is complete (either as the only
180
+ // token with a trailing space, or with a partial flag typed).
181
+ const toggleComplete =
182
+ (tokens.length === 1 && tokens[0] === "toggle") ||
183
+ (tokens.length >= 2 && tokens[0] === "toggle");
184
+ if (toggleComplete) {
185
+ const partial = tokens.length >= 2 ? tokens[tokens.length - 1] : "";
186
+ const hits = flagNames.filter((n) => n.startsWith(partial));
187
+ return hits.length ? hits.map((v) => ({ value: v, label: v })) : null;
188
+ }
189
+ if (tokens.length <= 1 && !trailingSpace) {
190
+ const hits = root.filter((o) => o.startsWith(tokens[0] ?? ""));
191
+ return hits.length ? hits.map((v) => ({ value: v, label: v })) : null;
192
+ }
193
+ return null;
194
+ },
195
+ handler: async (args, ctx) => {
196
+ const trimmed = args.trim();
197
+
198
+ // Toggle mode: `/glm-tweaks toggle <flag>` or `/glm-tweaks <flag>`.
199
+ // Direct one-shot flip — persists via `pi config set` then reloads.
200
+ // Bare `/glm-tweaks toggle` (no flag) falls through to the menu.
201
+ if (trimmed !== "" && trimmed !== "status" && trimmed !== "toggle") {
202
+ const tokens = trimmed.split(/\s+/).filter(Boolean);
203
+ const flagName = tokens[0] === "toggle" ? tokens[1] : tokens[0];
204
+ const meta = FLAGS.find((f) => f.name === flagName);
205
+ if (!meta) {
206
+ ctx.ui.notify(
207
+ `Unknown flag "${flagName}". Valid: ${FLAGS.map((f) => f.name).join(", ")}`,
208
+ "warning",
209
+ );
210
+ return;
211
+ }
212
+ const current = pi.getFlag(meta.name) === true;
213
+ const next = !current;
214
+ const result = await pi.exec("pi", ["config", "set", meta.name, String(next)]);
215
+ if (result.code !== 0) {
216
+ ctx.ui.notify(
217
+ `Failed to set ${meta.name}: ${result.stderr.trim() || `exit ${result.code}`}`,
218
+ "error",
219
+ );
220
+ return;
221
+ }
222
+ ctx.ui.notify(`${meta.name}: ${current} → ${next}. Reloading...`, "info");
223
+ await ctx.reload();
224
+ return;
225
+ }
226
+
227
+ // Status/menu mode. In TUI, open an interactive SettingsList
228
+ // (same component /settings uses) so the user can flip several
229
+ // flags in one visit; changes persist via `pi config set` and a
230
+ // single reload fires on close. Outside TUI (RPC/headless), fall
231
+ // back to the read-only status panel — custom components are
232
+ // terminal-only.
233
+ if (ctx.mode !== "tui") {
234
+ ctx.ui.notify(renderStatus(pi, ctx.model), "info");
235
+ return;
236
+ }
237
+
238
+ const active = isZaiGlm52(ctx.model);
239
+ const pending = new Map<string, boolean>();
240
+ const items: SettingItem[] = FLAGS.map((f) => ({
241
+ id: f.name,
242
+ label: f.label,
243
+ currentValue: pi.getFlag(f.name) === true ? "on" : "off",
244
+ values: ["on", "off"],
245
+ }));
246
+
247
+ await ctx.ui.custom((tui, theme, _kb, done) => {
248
+ const container = new Container();
249
+ const header = active
250
+ ? "GLM-5.2 tweaks — zai/glm-5.2 active"
251
+ : "GLM-5.2 tweaks — inactive (select zai/glm-5.2 to engage)";
252
+ container.addChild(new Text(theme.fg("accent", theme.bold(header)), 1, 1));
253
+
254
+ const settingsList = new SettingsList(
255
+ items,
256
+ Math.min(items.length + 2, 15),
257
+ getSettingsListTheme(),
258
+ (id, newValue) => {
259
+ // Stage the change; persist + reload on close, not here,
260
+ // so the user can flip several flags per visit.
261
+ pending.set(id, newValue === "on");
262
+ },
263
+ () => done(undefined),
264
+ );
265
+ container.addChild(settingsList);
266
+
267
+ return {
268
+ render: (w: number) => container.render(w),
269
+ invalidate: () => container.invalidate(),
270
+ handleInput: (data: string) => {
271
+ settingsList.handleInput?.(data);
272
+ tui.requestRender();
273
+ },
274
+ };
275
+ });
276
+
277
+ // Dialog closed. ctx is still valid here (reload is the only
278
+ // staleness trigger, and we haven't called it yet). Drop net-zero
279
+ // flips (a flag toggled on then off stages but changes nothing),
280
+ // then persist genuine deltas and reload once if any moved.
281
+ const deltas: Array<[string, boolean]> = [];
282
+ for (const [name, val] of pending) {
283
+ const currentlyOn = pi.getFlag(name) === true;
284
+ if (currentlyOn === val) continue; // net-zero: toggled back to current
285
+ deltas.push([name, val]);
286
+ }
287
+ if (deltas.length === 0) return;
288
+
289
+ const failures: string[] = [];
290
+ for (const [name, val] of deltas) {
291
+ const r = await pi.exec("pi", ["config", "set", name, String(val)]);
292
+ if (r.code !== 0) failures.push(`${name} (${r.stderr.trim() || `exit ${r.code}`})`);
293
+ }
294
+ if (failures.length > 0) {
295
+ ctx.ui.notify(`Failed to apply: ${failures.join("; ")}`, "error");
296
+ return;
297
+ }
298
+ ctx.ui.notify(`Applied ${deltas.length} change(s). Reloading...`, "info");
299
+ await ctx.reload();
300
+ },
129
301
  });
130
302
 
131
303
  // Per-loop mutable state. Node.js runs the extension hooks single-
@@ -196,13 +368,17 @@ export default function (pi: ExtensionAPI) {
196
368
  if (pi.getFlag("glm-budget-nudge") !== true) return {};
197
369
  if (loop.ratchetFired) return {};
198
370
 
199
- // Sum reasoning_content from assistant messages in the CURRENT
200
- // agent loop only. Find the boundary by walking back to the last
201
- // `role: "user"` message (the prompt that started this loop).
202
- // toolResult / assistant / custom / etc. are not user role, so
203
- // they don't reset the boundary. Without this scoping, a long
204
- // session would fire the ratchet on the first LLM call of every
205
- // new turn regardless of current-loop thinking.
371
+ // Sum reasoning from assistant messages in the CURRENT agent loop
372
+ // only. Find the boundary by walking back to the last `role: "user"`
373
+ // message (the prompt that started this loop). toolResult / assistant
374
+ // / custom / etc. are not user role, so they don't reset the boundary.
375
+ // Without this scoping, a long session would fire the ratchet on the
376
+ // first LLM call of every new turn regardless of current-loop thinking.
377
+ //
378
+ // Pi stores assistant thinking in content[] as ThinkingContent blocks
379
+ // ({type:"thinking", thinking:string}) — NOT a top-level
380
+ // `reasoning_content` field (that's the Z.AI wire name). Reading the
381
+ // wrong field was a 1.0.0 bug that left the ratchet permanently dead.
206
382
  let loopStart = event.messages.length - 1;
207
383
  while (loopStart > 0) {
208
384
  const m = event.messages[loopStart] as { role?: string } | undefined;
@@ -214,10 +390,17 @@ export default function (pi: ExtensionAPI) {
214
390
  for (let i = loopStart + 1; i < event.messages.length; i++) {
215
391
  const m = event.messages[i];
216
392
  if (typeof m !== "object" || m === null) continue;
217
- const msg = m as { role?: string; reasoning_content?: unknown };
218
- if (msg.role !== "assistant") continue;
219
- if (typeof msg.reasoning_content === "string") {
220
- totalReasoning += msg.reasoning_content.length;
393
+ const msg = m as { role?: string; content?: unknown };
394
+ if (msg.role !== "assistant" || !Array.isArray(msg.content)) continue;
395
+ for (const block of msg.content) {
396
+ if (
397
+ block &&
398
+ typeof block === "object" &&
399
+ (block as { type?: string }).type === "thinking" &&
400
+ typeof (block as { thinking?: unknown }).thinking === "string"
401
+ ) {
402
+ totalReasoning += (block as { thinking: string }).thinking.length;
403
+ }
221
404
  }
222
405
  }
223
406
  if (totalReasoning < RATCHET_THRESHOLD_CHARS) return {};
@@ -227,6 +410,7 @@ export default function (pi: ExtensionAPI) {
227
410
  role: "user",
228
411
  content:
229
412
  "[system reminder: you've been thinking extensively without taking a tool call. Take a tool call now or wrap up your response.]",
413
+ timestamp: Date.now(),
230
414
  };
231
415
  return { messages: [...event.messages, hint as never] };
232
416
  });
@@ -256,6 +440,11 @@ export default function (pi: ExtensionAPI) {
256
440
  // Short-prompt quick-disable: trivial turns ("what time is it")
257
441
  // don't need deep thinking. Force the kill switch and let Pi's
258
442
  // zai branch drop the thinking.type="disabled" through.
443
+ //
444
+ // Intentionally applies to every LLM call in the loop, not just the
445
+ // first: loop.shortPrompt is computed once from the initial prompt
446
+ // and held constant (see before_agent_start). A short prompt that
447
+ // spawns tool calls stays thinking-free for the whole turn.
259
448
  if (pi.getFlag("glm-quick-disable") === true && loop.shortPrompt) {
260
449
  thinking.type = "disabled";
261
450
  mutated = true;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@estebanforge/pi-glm-tweaks",
3
- "version": "1.0.0",
3
+ "version": "1.1.0",
4
4
  "description": "Pi-native tweaks for Z.AI's GLM-5.2. Restricts the Pi thinking-level UI to the three modes GLM-5.2 actually supports (off, high, max), wires the native thinkingFormat:\"zai\" wire translation, and auto-clamps hidden levels when the model is selected.",
5
5
  "keywords": [
6
6
  "pi-package",
@@ -43,6 +43,7 @@
43
43
  },
44
44
  "devDependencies": {
45
45
  "@earendil-works/pi-coding-agent": "^0.80.2",
46
+ "@earendil-works/pi-tui": "^0.80.2",
46
47
  "@types/node": "^22.0.0",
47
48
  "typescript": "^5.8.0"
48
49
  }