@dungle-scrubs/tallow 0.8.28 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/dist/config.d.ts +1 -1
  2. package/dist/config.d.ts.map +1 -1
  3. package/dist/config.js +1 -1
  4. package/dist/config.js.map +1 -1
  5. package/dist/install.d.ts.map +1 -1
  6. package/dist/install.js +2 -9
  7. package/dist/install.js.map +1 -1
  8. package/dist/interactive-mode-patch.d.ts.map +1 -1
  9. package/dist/interactive-mode-patch.js +20 -9
  10. package/dist/interactive-mode-patch.js.map +1 -1
  11. package/extensions/_icons/__tests__/icons.test.ts +0 -1
  12. package/extensions/_icons/index.ts +0 -2
  13. package/extensions/context-fork/__tests__/context-fork.test.ts +9 -0
  14. package/extensions/health/index.ts +1 -1
  15. package/extensions/render-stabilizer/__tests__/render-stabilizer.test.ts +42 -0
  16. package/extensions/render-stabilizer/extension.json +5 -0
  17. package/extensions/render-stabilizer/index.ts +66 -0
  18. package/extensions/subagent-tool/__tests__/auto-cheap-model.test.ts +66 -6
  19. package/extensions/subagent-tool/__tests__/model-router-explicit-resolution.test.ts +79 -5
  20. package/node_modules/@mariozechner/pi-tui/dist/tui.d.ts +47 -0
  21. package/node_modules/@mariozechner/pi-tui/dist/tui.d.ts.map +1 -1
  22. package/node_modules/@mariozechner/pi-tui/dist/tui.js +139 -5
  23. package/node_modules/@mariozechner/pi-tui/dist/tui.js.map +1 -1
  24. package/node_modules/@mariozechner/pi-tui/src/tui.ts +142 -5
  25. package/package.json +1 -1
  26. package/schemas/settings.schema.json +0 -5
  27. package/extensions/plan-mode-tool/__tests__/e2e.mjs +0 -350
  28. package/extensions/plan-mode-tool/__tests__/index.test.ts +0 -213
  29. package/extensions/plan-mode-tool/__tests__/utils.test.ts +0 -381
  30. package/extensions/plan-mode-tool/extension.json +0 -22
  31. package/extensions/plan-mode-tool/index.ts +0 -583
  32. package/extensions/plan-mode-tool/utils.ts +0 -257
@@ -283,6 +283,19 @@ export class TUI extends Container {
283
283
  this.clearOnShrink = enabled;
284
284
  }
285
285
 
286
+ /**
287
+ * Reset the startup grace period timer, suppressing screen-clearing full
288
+ * redraws for another {@link STARTUP_GRACE_MS} milliseconds.
289
+ *
290
+ * Call this at the start of a session switch so the chatContainer.clear()
291
+ * → renderInitialMessages() transition doesn't cause visible flicker.
292
+ *
293
+ * @returns {void}
294
+ */
295
+ resetRenderGrace(): void {
296
+ this.startedAtMs = Date.now();
297
+ }
298
+
286
299
  /**
287
300
  * Request that the next full render clears the terminal scrollback buffer.
288
301
  *
@@ -403,8 +416,25 @@ export class TUI extends Container {
403
416
  for (const overlay of this.overlayStack) overlay.component.invalidate?.();
404
417
  }
405
418
 
419
+ /**
420
+ * Timestamp when `start()` was called.
421
+ * Used by startup grace period to suppress screen-clearing full redraws.
422
+ */
423
+ private startedAtMs = 0;
424
+
425
+ /**
426
+ * Duration (ms) after `start()` during which shrink-triggered full redraws
427
+ * use a gentler line-by-line overwrite instead of screen clear.
428
+ *
429
+ * This prevents the visual flicker that occurs when session resume causes
430
+ * rapid content height changes (extension hooks, widget adds/removes) before
431
+ * the full message history is rendered.
432
+ */
433
+ private static readonly STARTUP_GRACE_MS = 3000;
434
+
406
435
  start(): void {
407
436
  this.stopped = false;
437
+ this.startedAtMs = Date.now();
408
438
  this.terminal.start(
409
439
  (data) => this.handleInput(data),
410
440
  () => this.requestRender()
@@ -454,6 +484,45 @@ export class TUI extends Container {
454
484
  this.terminal.stop();
455
485
  }
456
486
 
487
+ /** When >0, scheduled renders are deferred until the batch completes. */
488
+ private renderBatchDepth = 0;
489
+
490
+ /** Whether a render was requested while batching was active. */
491
+ private renderDeferredDuringBatch = false;
492
+
493
+ /** Whether a forced render was requested while batching was active. */
494
+ private renderForceDeferredDuringBatch = false;
495
+
496
+ /**
497
+ * Begin a render batch — all `requestRender()` calls are coalesced and
498
+ * deferred until the matching `endRenderBatch()`. Nestable.
499
+ *
500
+ * Use to prevent intermediate renders (and the screen clears they cause)
501
+ * during multi-step UI mutations such as session resume.
502
+ *
503
+ * @returns {void}
504
+ */
505
+ beginRenderBatch(): void {
506
+ this.renderBatchDepth++;
507
+ }
508
+
509
+ /**
510
+ * End a render batch. When the outermost batch ends, a single render is
511
+ * scheduled if any were deferred.
512
+ *
513
+ * @returns {void}
514
+ */
515
+ endRenderBatch(): void {
516
+ if (this.renderBatchDepth <= 0) return;
517
+ this.renderBatchDepth--;
518
+ if (this.renderBatchDepth === 0 && this.renderDeferredDuringBatch) {
519
+ const wasForce = this.renderForceDeferredDuringBatch;
520
+ this.renderDeferredDuringBatch = false;
521
+ this.renderForceDeferredDuringBatch = false;
522
+ this.requestRender(wasForce);
523
+ }
524
+ }
525
+
457
526
  requestRender(force = false): void {
458
527
  if (force) {
459
528
  this.previousLines = [];
@@ -464,6 +533,11 @@ export class TUI extends Container {
464
533
  this.previousViewportTop = 0;
465
534
  this.rollingShrinkPeak = 0;
466
535
  }
536
+ if (this.renderBatchDepth > 0) {
537
+ this.renderDeferredDuringBatch = true;
538
+ if (force) this.renderForceDeferredDuringBatch = true;
539
+ return;
540
+ }
467
541
  if (this.renderRequested) return;
468
542
  this.scheduleRender();
469
543
  }
@@ -1004,6 +1078,11 @@ export class TUI extends Container {
1004
1078
  // Width changed - need full re-render (line wrapping changes)
1005
1079
  const widthChanged = this.previousWidth !== 0 && this.previousWidth !== width;
1006
1080
 
1081
+ // Whether we are within the startup grace period where screen-clearing
1082
+ // full redraws are softened to prevent flicker during session resume.
1083
+ const inStartupGrace =
1084
+ this.startedAtMs > 0 && Date.now() - this.startedAtMs < TUI.STARTUP_GRACE_MS;
1085
+
1007
1086
  // Helper to clear viewport (and optionally scrollback) and render all new lines
1008
1087
  const fullRender = (clear: boolean): void => {
1009
1088
  this.fullRedrawCount += 1;
@@ -1035,6 +1114,44 @@ export class TUI extends Container {
1035
1114
  this.previousWidth = width;
1036
1115
  };
1037
1116
 
1117
+ /**
1118
+ * Gentle full redraw: home cursor + overwrite each line + clear below.
1119
+ *
1120
+ * Used during the startup grace period instead of fullRender(true) for
1121
+ * shrink-triggered redraws. Avoids the visible blank frame caused by
1122
+ * `\x1b[2J` (clear screen), which makes messages appear to flash in and
1123
+ * out when session resume triggers rapid content height changes.
1124
+ *
1125
+ * Unlike fullRender(true), this never clears the screen — it writes each
1126
+ * line with a preceding `\x1b[2K` (clear line) so stale content is
1127
+ * overwritten without a blank frame. Lines below the new content are
1128
+ * individually erased.
1129
+ */
1130
+ const gentleFullRender = (): void => {
1131
+ this.fullRedrawCount += 1;
1132
+ let buffer = "\x1b[?2026h\x1b[H"; // Begin synchronized output + home cursor
1133
+ for (let i = 0; i < newLines.length; i++) {
1134
+ buffer += "\x1b[2K"; // Clear current line
1135
+ buffer += newLines[i];
1136
+ if (i < newLines.length - 1) buffer += "\r\n";
1137
+ }
1138
+ // Erase lines that were previously rendered but are no longer needed
1139
+ const staleLines = Math.max(0, this.maxLinesRendered - newLines.length);
1140
+ for (let i = 0; i < staleLines; i++) {
1141
+ buffer += "\r\n\x1b[2K";
1142
+ }
1143
+ buffer += "\x1b[?2026l"; // End synchronized output
1144
+ this.terminal.write(buffer);
1145
+ this.cursorRow = Math.max(0, newLines.length + staleLines - 1);
1146
+ this.hardwareCursorRow = this.cursorRow;
1147
+ this.maxLinesRendered = newLines.length;
1148
+ this.previousViewportTop = Math.max(0, this.maxLinesRendered - height);
1149
+ this.rollingShrinkPeak = newLines.length;
1150
+ this.positionHardwareCursor(cursorPos, newLines.length);
1151
+ this.previousLines = newLines;
1152
+ this.previousWidth = width;
1153
+ };
1154
+
1038
1155
  const debugRedraw = process.env.PI_DEBUG_REDRAW === "1";
1039
1156
  const logRedraw = (reason: string): void => {
1040
1157
  if (!debugRedraw) return;
@@ -1066,7 +1183,11 @@ export class TUI extends Container {
1066
1183
  this.overlayStack.length === 0
1067
1184
  ) {
1068
1185
  logRedraw(`clearOnShrink (maxLinesRendered=${this.maxLinesRendered})`);
1069
- fullRender(true);
1186
+ if (inStartupGrace) {
1187
+ gentleFullRender();
1188
+ } else {
1189
+ fullRender(true);
1190
+ }
1070
1191
  return;
1071
1192
  }
1072
1193
 
@@ -1077,7 +1198,11 @@ export class TUI extends Container {
1077
1198
  const shrinkDelta = this.previousLines.length - newLines.length;
1078
1199
  if (shrinkDelta > 5 && this.overlayStack.length === 0) {
1079
1200
  logRedraw(`large shrink (${shrinkDelta} lines)`);
1080
- fullRender(true);
1201
+ if (inStartupGrace) {
1202
+ gentleFullRender();
1203
+ } else {
1204
+ fullRender(true);
1205
+ }
1081
1206
  return;
1082
1207
  }
1083
1208
 
@@ -1092,7 +1217,11 @@ export class TUI extends Container {
1092
1217
  logRedraw(
1093
1218
  `rolling shrink (peak=${this.rollingShrinkPeak}, now=${newLines.length}, delta=${this.rollingShrinkPeak - newLines.length})`
1094
1219
  );
1095
- fullRender(true);
1220
+ if (inStartupGrace) {
1221
+ gentleFullRender();
1222
+ } else {
1223
+ fullRender(true);
1224
+ }
1096
1225
  return;
1097
1226
  }
1098
1227
 
@@ -1167,7 +1296,11 @@ export class TUI extends Container {
1167
1296
  const extraLines = this.previousLines.length - newLines.length;
1168
1297
  if (extraLines > height) {
1169
1298
  logRedraw(`extraLines > height (${extraLines} > ${height})`);
1170
- fullRender(true);
1299
+ if (inStartupGrace) {
1300
+ gentleFullRender();
1301
+ } else {
1302
+ fullRender(true);
1303
+ }
1171
1304
  return;
1172
1305
  }
1173
1306
  if (extraLines > 0) {
@@ -1195,7 +1328,11 @@ export class TUI extends Container {
1195
1328
  // If first changed line is above the current viewport basis, partial redraw is unsafe.
1196
1329
  if (firstChanged < prevViewportTop) {
1197
1330
  logRedraw(`firstChanged < viewportTop (${firstChanged} < ${prevViewportTop})`);
1198
- fullRender(true);
1331
+ if (inStartupGrace) {
1332
+ gentleFullRender();
1333
+ } else {
1334
+ fullRender(true);
1335
+ }
1199
1336
  return;
1200
1337
  }
1201
1338
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@dungle-scrubs/tallow",
3
- "version": "0.8.28",
3
+ "version": "0.9.0",
4
4
  "description": "An opinionated coding agent. Built on pi.",
5
5
  "piConfig": {
6
6
  "name": "tallow",
@@ -429,11 +429,6 @@
429
429
  "default": ["◐", "◓", "◑", "◒"],
430
430
  "minItems": 1
431
431
  },
432
- "plan_mode": {
433
- "type": "string",
434
- "description": "Plan mode indicator (default: '⏸').",
435
- "default": "⏸"
436
- },
437
432
  "task_list": {
438
433
  "type": "string",
439
434
  "description": "Task list indicator (default: '📋').",
@@ -1,350 +0,0 @@
1
- #!/usr/bin/env node
2
-
3
- /**
4
- * E2E test for the plan-mode extension.
5
- *
6
- * Proves:
7
- * 1. plan_mode tool remains available after toggling modes
8
- * 2. Plan mode enforces a strict read-only allowlist
9
- * 3. Non-allowlisted extension tools are blocked in plan mode
10
- * 4. Disabling plan mode restores normal access
11
- *
12
- * Uses the SDK to load ONLY the plan-mode extension (isolated).
13
- * Costs ~$0.01 per run.
14
- *
15
- * Usage:
16
- * node extensions/plan-mode-tool/__tests__/e2e.mjs
17
- */
18
-
19
- import fs from "node:fs";
20
- import os from "node:os";
21
- import path from "node:path";
22
- import { fileURLToPath } from "node:url";
23
- import { getModel } from "@mariozechner/pi-ai";
24
- import {
25
- AuthStorage,
26
- createAgentSession,
27
- DefaultResourceLoader,
28
- ModelRegistry,
29
- SessionManager,
30
- SettingsManager,
31
- } from "@mariozechner/pi-coding-agent";
32
- import { Type } from "@sinclair/typebox";
33
-
34
- // ── Helpers ──────────────────────────────────────────────────
35
-
36
- const results = [];
37
-
38
- /**
39
- * Record a test result.
40
- * @param {string} name - Test name
41
- * @param {boolean} passed - Pass/fail
42
- * @param {string} [detail] - Extra detail on failure
43
- */
44
- function check(name, passed, detail) {
45
- results.push({ name, passed, detail });
46
- const icon = passed ? "\x1b[32m✓\x1b[0m" : "\x1b[31m✗\x1b[0m";
47
- let line = ` ${icon} ${name}`;
48
- if (!passed && detail) line += `\n ${detail.slice(0, 300)}`;
49
- console.log(line);
50
- }
51
-
52
- /**
53
- * Get the text content of the most recent tool result for a given tool name.
54
- * @param {import("@mariozechner/pi-coding-agent").AgentSession} session
55
- * @param {string} toolName
56
- * @returns {string}
57
- */
58
- function lastToolResultText(session, toolName) {
59
- const msgs = session.messages;
60
- for (let i = msgs.length - 1; i >= 0; i--) {
61
- const m = msgs[i];
62
- if (m.role === "toolResult" && m.toolName === toolName) {
63
- for (const part of m.content) {
64
- if (part.type === "text") return part.text;
65
- }
66
- }
67
- }
68
- return "";
69
- }
70
-
71
- /**
72
- * Check if any tool result in the session contains "not found" error.
73
- * @param {import("@mariozechner/pi-coding-agent").AgentSession} session
74
- * @param {string} toolName
75
- * @returns {boolean}
76
- */
77
- function hasToolNotFoundError(session, toolName) {
78
- const msgs = session.messages;
79
- for (let i = msgs.length - 1; i >= 0; i--) {
80
- const m = msgs[i];
81
- if (m.role === "toolResult") {
82
- for (const part of m.content) {
83
- if (part.type === "text" && part.text.includes(`Tool ${toolName} not found`)) {
84
- return true;
85
- }
86
- }
87
- }
88
- }
89
- return false;
90
- }
91
-
92
- /**
93
- * Check if a tool call was blocked by plan-mode policy.
94
- * @param {import("@mariozechner/pi-coding-agent").AgentSession} session
95
- * @param {string} toolName
96
- * @returns {boolean}
97
- */
98
- function hasPlanModeToolBlockedError(session, toolName) {
99
- const msgs = session.messages;
100
- for (let i = msgs.length - 1; i >= 0; i--) {
101
- const m = msgs[i];
102
- if (m.role !== "toolResult") continue;
103
- for (const part of m.content) {
104
- if (part.type === "text" && part.text.includes(`Plan mode: tool "${toolName}" blocked`)) {
105
- return true;
106
- }
107
- }
108
- }
109
- return false;
110
- }
111
-
112
- // ── Isolated extension loading ───────────────────────────────
113
-
114
- const __dirname = path.dirname(fileURLToPath(import.meta.url));
115
- const extensionSrcDir = path.resolve(__dirname, "..");
116
-
117
- const testAgentDir = fs.mkdtempSync(path.join(os.tmpdir(), "pi-e2e-plan-"));
118
- const extDst = path.join(testAgentDir, "extensions/plan-mode-tool");
119
- fs.mkdirSync(extDst, { recursive: true });
120
- for (const file of ["index.ts", "utils.ts"]) {
121
- fs.copyFileSync(path.join(extensionSrcDir, file), path.join(extDst, file));
122
- }
123
-
124
- // ── Setup ────────────────────────────────────────────────────
125
-
126
- console.log("\n\x1b[1m══ Plan Mode Extension E2E Test ══\x1b[0m\n");
127
-
128
- const authStorage = new AuthStorage();
129
- const modelRegistry = new ModelRegistry(authStorage);
130
- const model = getModel("anthropic", "claude-haiku-4-5");
131
- if (!model) {
132
- console.error("✗ Model claude-haiku-4-5 not found");
133
- process.exit(1);
134
- }
135
-
136
- const settingsManager = SettingsManager.inMemory({ compaction: { enabled: false } });
137
-
138
- /**
139
- * Register mock tools used to validate strict plan-mode allowlisting.
140
- * @param {import("@mariozechner/pi-coding-agent").ExtensionAPI} pi
141
- */
142
- function registerMockTools(pi) {
143
- pi.registerTool({
144
- name: "bg_bash",
145
- label: "bg_bash",
146
- description: "Mock background bash tool",
147
- parameters: Type.Object({ command: Type.String() }),
148
- async execute(_toolCallId, params) {
149
- return {
150
- content: [{ type: "text", text: `mock-bg-bash-ok:${params.command}` }],
151
- details: {},
152
- };
153
- },
154
- });
155
-
156
- pi.registerTool({
157
- name: "subagent",
158
- label: "subagent",
159
- description: "Mock subagent tool",
160
- parameters: Type.Object({ task: Type.String() }),
161
- async execute(_toolCallId, params) {
162
- return {
163
- content: [{ type: "text", text: `mock-subagent-ok:${params.task}` }],
164
- details: {},
165
- };
166
- },
167
- });
168
-
169
- pi.registerTool({
170
- name: "mcp__mock__ping",
171
- label: "mcp__mock__ping",
172
- description: "Mock MCP-style tool",
173
- parameters: Type.Object({}),
174
- async execute() {
175
- return {
176
- content: [{ type: "text", text: "mock-mcp-ok" }],
177
- details: {},
178
- };
179
- },
180
- });
181
-
182
- pi.registerTool({
183
- name: "questionnaire",
184
- label: "questionnaire",
185
- description: "Mock read-only questionnaire tool",
186
- parameters: Type.Object({}),
187
- async execute() {
188
- return {
189
- content: [{ type: "text", text: "mock-questionnaire-ok" }],
190
- details: {},
191
- };
192
- },
193
- });
194
- }
195
-
196
- console.log("Loading extension (isolated)...");
197
- const loader = new DefaultResourceLoader({
198
- cwd: os.tmpdir(),
199
- agentDir: testAgentDir,
200
- settingsManager,
201
- extensionFactories: [registerMockTools],
202
- skillsOverride: () => ({ skills: [], diagnostics: [] }),
203
- promptsOverride: () => ({ prompts: [], diagnostics: [] }),
204
- agentsFilesOverride: () => ({ agentsFiles: [] }),
205
- });
206
- await loader.reload();
207
-
208
- const exts = loader.getExtensions();
209
- console.log(` Extensions loaded: ${exts.extensions.length}, errors: ${exts.errors.length}`);
210
- if (exts.errors.length > 0) {
211
- console.error(" Extension errors:", exts.errors);
212
- }
213
-
214
- console.log("Creating session (haiku)...\n");
215
- const { session } = await createAgentSession({
216
- model,
217
- thinkingLevel: "off",
218
- authStorage,
219
- modelRegistry,
220
- resourceLoader: loader,
221
- sessionManager: SessionManager.inMemory(),
222
- settingsManager,
223
- });
224
-
225
- // Log tool calls
226
- session.subscribe((event) => {
227
- if (event.type === "tool_execution_start") {
228
- process.stdout.write(` \x1b[2m→ ${event.toolName}\x1b[0m\n`);
229
- }
230
- });
231
-
232
- // ── Test 1: plan_mode tool exists at startup ─────────────────
233
-
234
- console.log("\x1b[1mTest 1: plan_mode tool available at startup\x1b[0m");
235
- await session.prompt(
236
- 'Call the plan_mode tool with action "status". Only call this one tool, nothing else.'
237
- );
238
- const statusText = lastToolResultText(session, "plan_mode");
239
- const noStartupError = !hasToolNotFoundError(session, "plan_mode");
240
- check("plan_mode tool callable at startup", noStartupError, statusText);
241
- check("reports normal mode", statusText.includes("normal"), statusText);
242
-
243
- // ── Test 2: Enable plan mode, verify plan_mode survives ──────
244
-
245
- console.log("\n\x1b[1mTest 2: Enable plan mode → plan_mode tool still available\x1b[0m");
246
- await session.prompt(
247
- 'Call the plan_mode tool with action "enable". Only call this one tool, nothing else.'
248
- );
249
- const enableText = lastToolResultText(session, "plan_mode");
250
- const noEnableError = !hasToolNotFoundError(session, "plan_mode");
251
- check("plan_mode callable during enable", noEnableError, enableText);
252
- check("reports plan mode enabled", enableText.includes("enabled"), enableText);
253
-
254
- // Now check status — plan_mode should still work IN plan mode
255
- await session.prompt(
256
- 'Call the plan_mode tool with action "status". Only call this one tool, nothing else.'
257
- );
258
- const planStatusText = lastToolResultText(session, "plan_mode");
259
- const noPlanStatusError = !hasToolNotFoundError(session, "plan_mode");
260
- check("plan_mode callable while in plan mode", noPlanStatusError, planStatusText);
261
- check("reports planning mode", planStatusText.includes("planning"), planStatusText);
262
-
263
- // ── Test 3: Disable plan mode, verify plan_mode survives ─────
264
-
265
- console.log("\n\x1b[1mTest 3: Disable plan mode → plan_mode tool still available\x1b[0m");
266
- await session.prompt(
267
- 'Call the plan_mode tool with action "disable". Only call this one tool, nothing else.'
268
- );
269
- const disableText = lastToolResultText(session, "plan_mode");
270
- const noDisableError = !hasToolNotFoundError(session, "plan_mode");
271
- check("plan_mode callable during disable", noDisableError, disableText);
272
- check("reports disabled", disableText.includes("disabled"), disableText);
273
-
274
- // Final status check — should be back to normal
275
- await session.prompt(
276
- 'Call the plan_mode tool with action "status". Only call this one tool, nothing else.'
277
- );
278
- const finalStatusText = lastToolResultText(session, "plan_mode");
279
- const noFinalError = !hasToolNotFoundError(session, "plan_mode");
280
- check("plan_mode callable after round-trip", noFinalError, finalStatusText);
281
- check("back to normal mode", finalStatusText.includes("normal"), finalStatusText);
282
-
283
- // ── Test 4: Strict allowlist enforcement in plan mode ────────
284
-
285
- console.log("\n\x1b[1mTest 4: Strict allowlist blocks non-read-only tools\x1b[0m");
286
- await session.prompt(
287
- 'Call the plan_mode tool with action "enable". Only call this one tool, nothing else.'
288
- );
289
-
290
- await session.prompt(
291
- 'Call the edit tool to edit file "/tmp/test.txt" replacing "a" with "b". Only call edit, nothing else.'
292
- );
293
- const editBlocked =
294
- hasToolNotFoundError(session, "edit") || hasPlanModeToolBlockedError(session, "edit");
295
- check("edit tool blocked in plan mode", editBlocked, "edit should not be available in plan mode");
296
-
297
- await session.prompt(
298
- 'Call the bg_bash tool with command "echo blocked". Only call bg_bash, nothing else.'
299
- );
300
- const bgBashBlocked =
301
- hasToolNotFoundError(session, "bg_bash") || hasPlanModeToolBlockedError(session, "bg_bash");
302
- check("bg_bash blocked in plan mode", bgBashBlocked, "bg_bash should be blocked in plan mode");
303
-
304
- await session.prompt('Call the subagent tool with task "ping". Only call subagent, nothing else.');
305
- const subagentBlocked =
306
- hasToolNotFoundError(session, "subagent") || hasPlanModeToolBlockedError(session, "subagent");
307
- check("subagent blocked in plan mode", subagentBlocked, "subagent should be blocked in plan mode");
308
-
309
- await session.prompt("Call the mcp__mock__ping tool. Only call this one tool, nothing else.");
310
- const mcpBlocked =
311
- hasToolNotFoundError(session, "mcp__mock__ping") ||
312
- hasPlanModeToolBlockedError(session, "mcp__mock__ping");
313
- check("mcp__* tools blocked in plan mode", mcpBlocked, "MCP tools should be blocked in plan mode");
314
-
315
- await session.prompt("Call the questionnaire tool. Only call this one tool, nothing else.");
316
- const questionnaireText = lastToolResultText(session, "questionnaire");
317
- const questionnaireAllowed = questionnaireText.includes("mock-questionnaire-ok");
318
- check("allowlisted questionnaire tool still works", questionnaireAllowed, questionnaireText);
319
-
320
- // ── Test 5: Disabling plan mode restores normal access ───────
321
-
322
- console.log("\n\x1b[1mTest 5: Disable restores normal tool access\x1b[0m");
323
- await session.prompt(
324
- 'Call the plan_mode tool with action "disable". Only call this one tool, nothing else.'
325
- );
326
- await session.prompt(
327
- 'Call the subagent tool with task "after-disable". Only call subagent, nothing else.'
328
- );
329
- const subagentAfterDisableText = lastToolResultText(session, "subagent");
330
- const subagentRestored = subagentAfterDisableText.includes("mock-subagent-ok:after-disable");
331
- check("subagent restored after disabling plan mode", subagentRestored, subagentAfterDisableText);
332
-
333
- // ── Cleanup & Summary ────────────────────────────────────────
334
-
335
- session.dispose();
336
- fs.rmSync(testAgentDir, { recursive: true, force: true });
337
-
338
- const passed = results.filter((r) => r.passed).length;
339
- const total = results.length;
340
-
341
- console.log(`\n\x1b[1m══ Results: ${passed}/${total} passed ══\x1b[0m`);
342
- if (passed < total) {
343
- console.log("\n\x1b[31mFailed:\x1b[0m");
344
- for (const r of results.filter((r) => !r.passed)) {
345
- console.log(` ✗ ${r.name}`);
346
- if (r.detail) console.log(` ${r.detail.slice(0, 300)}`);
347
- }
348
- }
349
- console.log();
350
- process.exit(passed === total ? 0 : 1);