@dungle-scrubs/tallow 0.8.28 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config.d.ts +1 -1
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +1 -1
- package/dist/config.js.map +1 -1
- package/dist/install.d.ts.map +1 -1
- package/dist/install.js +2 -9
- package/dist/install.js.map +1 -1
- package/dist/interactive-mode-patch.d.ts.map +1 -1
- package/dist/interactive-mode-patch.js +20 -9
- package/dist/interactive-mode-patch.js.map +1 -1
- package/extensions/_icons/__tests__/icons.test.ts +0 -1
- package/extensions/_icons/index.ts +0 -2
- package/extensions/context-fork/__tests__/context-fork.test.ts +9 -0
- package/extensions/health/index.ts +1 -1
- package/extensions/render-stabilizer/__tests__/render-stabilizer.test.ts +42 -0
- package/extensions/render-stabilizer/extension.json +5 -0
- package/extensions/render-stabilizer/index.ts +66 -0
- package/extensions/subagent-tool/__tests__/auto-cheap-model.test.ts +66 -6
- package/extensions/subagent-tool/__tests__/model-router-explicit-resolution.test.ts +79 -5
- package/node_modules/@mariozechner/pi-tui/dist/tui.d.ts +47 -0
- package/node_modules/@mariozechner/pi-tui/dist/tui.d.ts.map +1 -1
- package/node_modules/@mariozechner/pi-tui/dist/tui.js +139 -5
- package/node_modules/@mariozechner/pi-tui/dist/tui.js.map +1 -1
- package/node_modules/@mariozechner/pi-tui/src/tui.ts +142 -5
- package/package.json +1 -1
- package/schemas/settings.schema.json +0 -5
- package/extensions/plan-mode-tool/__tests__/e2e.mjs +0 -350
- package/extensions/plan-mode-tool/__tests__/index.test.ts +0 -213
- package/extensions/plan-mode-tool/__tests__/utils.test.ts +0 -381
- package/extensions/plan-mode-tool/extension.json +0 -22
- package/extensions/plan-mode-tool/index.ts +0 -583
- package/extensions/plan-mode-tool/utils.ts +0 -257
|
@@ -283,6 +283,19 @@ export class TUI extends Container {
|
|
|
283
283
|
this.clearOnShrink = enabled;
|
|
284
284
|
}
|
|
285
285
|
|
|
286
|
+
/**
|
|
287
|
+
* Reset the startup grace period timer, suppressing screen-clearing full
|
|
288
|
+
* redraws for another {@link STARTUP_GRACE_MS} milliseconds.
|
|
289
|
+
*
|
|
290
|
+
* Call this at the start of a session switch so the chatContainer.clear()
|
|
291
|
+
* → renderInitialMessages() transition doesn't cause visible flicker.
|
|
292
|
+
*
|
|
293
|
+
* @returns {void}
|
|
294
|
+
*/
|
|
295
|
+
resetRenderGrace(): void {
|
|
296
|
+
this.startedAtMs = Date.now();
|
|
297
|
+
}
|
|
298
|
+
|
|
286
299
|
/**
|
|
287
300
|
* Request that the next full render clears the terminal scrollback buffer.
|
|
288
301
|
*
|
|
@@ -403,8 +416,25 @@ export class TUI extends Container {
|
|
|
403
416
|
for (const overlay of this.overlayStack) overlay.component.invalidate?.();
|
|
404
417
|
}
|
|
405
418
|
|
|
419
|
+
/**
|
|
420
|
+
* Timestamp when `start()` was called.
|
|
421
|
+
* Used by startup grace period to suppress screen-clearing full redraws.
|
|
422
|
+
*/
|
|
423
|
+
private startedAtMs = 0;
|
|
424
|
+
|
|
425
|
+
/**
|
|
426
|
+
* Duration (ms) after `start()` during which shrink-triggered full redraws
|
|
427
|
+
* use a gentler line-by-line overwrite instead of screen clear.
|
|
428
|
+
*
|
|
429
|
+
* This prevents the visual flicker that occurs when session resume causes
|
|
430
|
+
* rapid content height changes (extension hooks, widget adds/removes) before
|
|
431
|
+
* the full message history is rendered.
|
|
432
|
+
*/
|
|
433
|
+
private static readonly STARTUP_GRACE_MS = 3000;
|
|
434
|
+
|
|
406
435
|
start(): void {
|
|
407
436
|
this.stopped = false;
|
|
437
|
+
this.startedAtMs = Date.now();
|
|
408
438
|
this.terminal.start(
|
|
409
439
|
(data) => this.handleInput(data),
|
|
410
440
|
() => this.requestRender()
|
|
@@ -454,6 +484,45 @@ export class TUI extends Container {
|
|
|
454
484
|
this.terminal.stop();
|
|
455
485
|
}
|
|
456
486
|
|
|
487
|
+
/** When >0, scheduled renders are deferred until the batch completes. */
|
|
488
|
+
private renderBatchDepth = 0;
|
|
489
|
+
|
|
490
|
+
/** Whether a render was requested while batching was active. */
|
|
491
|
+
private renderDeferredDuringBatch = false;
|
|
492
|
+
|
|
493
|
+
/** Whether a forced render was requested while batching was active. */
|
|
494
|
+
private renderForceDeferredDuringBatch = false;
|
|
495
|
+
|
|
496
|
+
/**
|
|
497
|
+
* Begin a render batch — all `requestRender()` calls are coalesced and
|
|
498
|
+
* deferred until the matching `endRenderBatch()`. Nestable.
|
|
499
|
+
*
|
|
500
|
+
* Use to prevent intermediate renders (and the screen clears they cause)
|
|
501
|
+
* during multi-step UI mutations such as session resume.
|
|
502
|
+
*
|
|
503
|
+
* @returns {void}
|
|
504
|
+
*/
|
|
505
|
+
beginRenderBatch(): void {
|
|
506
|
+
this.renderBatchDepth++;
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
/**
|
|
510
|
+
* End a render batch. When the outermost batch ends, a single render is
|
|
511
|
+
* scheduled if any were deferred.
|
|
512
|
+
*
|
|
513
|
+
* @returns {void}
|
|
514
|
+
*/
|
|
515
|
+
endRenderBatch(): void {
|
|
516
|
+
if (this.renderBatchDepth <= 0) return;
|
|
517
|
+
this.renderBatchDepth--;
|
|
518
|
+
if (this.renderBatchDepth === 0 && this.renderDeferredDuringBatch) {
|
|
519
|
+
const wasForce = this.renderForceDeferredDuringBatch;
|
|
520
|
+
this.renderDeferredDuringBatch = false;
|
|
521
|
+
this.renderForceDeferredDuringBatch = false;
|
|
522
|
+
this.requestRender(wasForce);
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
|
|
457
526
|
requestRender(force = false): void {
|
|
458
527
|
if (force) {
|
|
459
528
|
this.previousLines = [];
|
|
@@ -464,6 +533,11 @@ export class TUI extends Container {
|
|
|
464
533
|
this.previousViewportTop = 0;
|
|
465
534
|
this.rollingShrinkPeak = 0;
|
|
466
535
|
}
|
|
536
|
+
if (this.renderBatchDepth > 0) {
|
|
537
|
+
this.renderDeferredDuringBatch = true;
|
|
538
|
+
if (force) this.renderForceDeferredDuringBatch = true;
|
|
539
|
+
return;
|
|
540
|
+
}
|
|
467
541
|
if (this.renderRequested) return;
|
|
468
542
|
this.scheduleRender();
|
|
469
543
|
}
|
|
@@ -1004,6 +1078,11 @@ export class TUI extends Container {
|
|
|
1004
1078
|
// Width changed - need full re-render (line wrapping changes)
|
|
1005
1079
|
const widthChanged = this.previousWidth !== 0 && this.previousWidth !== width;
|
|
1006
1080
|
|
|
1081
|
+
// Whether we are within the startup grace period where screen-clearing
|
|
1082
|
+
// full redraws are softened to prevent flicker during session resume.
|
|
1083
|
+
const inStartupGrace =
|
|
1084
|
+
this.startedAtMs > 0 && Date.now() - this.startedAtMs < TUI.STARTUP_GRACE_MS;
|
|
1085
|
+
|
|
1007
1086
|
// Helper to clear viewport (and optionally scrollback) and render all new lines
|
|
1008
1087
|
const fullRender = (clear: boolean): void => {
|
|
1009
1088
|
this.fullRedrawCount += 1;
|
|
@@ -1035,6 +1114,44 @@ export class TUI extends Container {
|
|
|
1035
1114
|
this.previousWidth = width;
|
|
1036
1115
|
};
|
|
1037
1116
|
|
|
1117
|
+
/**
|
|
1118
|
+
* Gentle full redraw: home cursor + overwrite each line + clear below.
|
|
1119
|
+
*
|
|
1120
|
+
* Used during the startup grace period instead of fullRender(true) for
|
|
1121
|
+
* shrink-triggered redraws. Avoids the visible blank frame caused by
|
|
1122
|
+
* `\x1b[2J` (clear screen), which makes messages appear to flash in and
|
|
1123
|
+
* out when session resume triggers rapid content height changes.
|
|
1124
|
+
*
|
|
1125
|
+
* Unlike fullRender(true), this never clears the screen — it writes each
|
|
1126
|
+
* line with a preceding `\x1b[2K` (clear line) so stale content is
|
|
1127
|
+
* overwritten without a blank frame. Lines below the new content are
|
|
1128
|
+
* individually erased.
|
|
1129
|
+
*/
|
|
1130
|
+
const gentleFullRender = (): void => {
|
|
1131
|
+
this.fullRedrawCount += 1;
|
|
1132
|
+
let buffer = "\x1b[?2026h\x1b[H"; // Begin synchronized output + home cursor
|
|
1133
|
+
for (let i = 0; i < newLines.length; i++) {
|
|
1134
|
+
buffer += "\x1b[2K"; // Clear current line
|
|
1135
|
+
buffer += newLines[i];
|
|
1136
|
+
if (i < newLines.length - 1) buffer += "\r\n";
|
|
1137
|
+
}
|
|
1138
|
+
// Erase lines that were previously rendered but are no longer needed
|
|
1139
|
+
const staleLines = Math.max(0, this.maxLinesRendered - newLines.length);
|
|
1140
|
+
for (let i = 0; i < staleLines; i++) {
|
|
1141
|
+
buffer += "\r\n\x1b[2K";
|
|
1142
|
+
}
|
|
1143
|
+
buffer += "\x1b[?2026l"; // End synchronized output
|
|
1144
|
+
this.terminal.write(buffer);
|
|
1145
|
+
this.cursorRow = Math.max(0, newLines.length + staleLines - 1);
|
|
1146
|
+
this.hardwareCursorRow = this.cursorRow;
|
|
1147
|
+
this.maxLinesRendered = newLines.length;
|
|
1148
|
+
this.previousViewportTop = Math.max(0, this.maxLinesRendered - height);
|
|
1149
|
+
this.rollingShrinkPeak = newLines.length;
|
|
1150
|
+
this.positionHardwareCursor(cursorPos, newLines.length);
|
|
1151
|
+
this.previousLines = newLines;
|
|
1152
|
+
this.previousWidth = width;
|
|
1153
|
+
};
|
|
1154
|
+
|
|
1038
1155
|
const debugRedraw = process.env.PI_DEBUG_REDRAW === "1";
|
|
1039
1156
|
const logRedraw = (reason: string): void => {
|
|
1040
1157
|
if (!debugRedraw) return;
|
|
@@ -1066,7 +1183,11 @@ export class TUI extends Container {
|
|
|
1066
1183
|
this.overlayStack.length === 0
|
|
1067
1184
|
) {
|
|
1068
1185
|
logRedraw(`clearOnShrink (maxLinesRendered=${this.maxLinesRendered})`);
|
|
1069
|
-
|
|
1186
|
+
if (inStartupGrace) {
|
|
1187
|
+
gentleFullRender();
|
|
1188
|
+
} else {
|
|
1189
|
+
fullRender(true);
|
|
1190
|
+
}
|
|
1070
1191
|
return;
|
|
1071
1192
|
}
|
|
1072
1193
|
|
|
@@ -1077,7 +1198,11 @@ export class TUI extends Container {
|
|
|
1077
1198
|
const shrinkDelta = this.previousLines.length - newLines.length;
|
|
1078
1199
|
if (shrinkDelta > 5 && this.overlayStack.length === 0) {
|
|
1079
1200
|
logRedraw(`large shrink (${shrinkDelta} lines)`);
|
|
1080
|
-
|
|
1201
|
+
if (inStartupGrace) {
|
|
1202
|
+
gentleFullRender();
|
|
1203
|
+
} else {
|
|
1204
|
+
fullRender(true);
|
|
1205
|
+
}
|
|
1081
1206
|
return;
|
|
1082
1207
|
}
|
|
1083
1208
|
|
|
@@ -1092,7 +1217,11 @@ export class TUI extends Container {
|
|
|
1092
1217
|
logRedraw(
|
|
1093
1218
|
`rolling shrink (peak=${this.rollingShrinkPeak}, now=${newLines.length}, delta=${this.rollingShrinkPeak - newLines.length})`
|
|
1094
1219
|
);
|
|
1095
|
-
|
|
1220
|
+
if (inStartupGrace) {
|
|
1221
|
+
gentleFullRender();
|
|
1222
|
+
} else {
|
|
1223
|
+
fullRender(true);
|
|
1224
|
+
}
|
|
1096
1225
|
return;
|
|
1097
1226
|
}
|
|
1098
1227
|
|
|
@@ -1167,7 +1296,11 @@ export class TUI extends Container {
|
|
|
1167
1296
|
const extraLines = this.previousLines.length - newLines.length;
|
|
1168
1297
|
if (extraLines > height) {
|
|
1169
1298
|
logRedraw(`extraLines > height (${extraLines} > ${height})`);
|
|
1170
|
-
|
|
1299
|
+
if (inStartupGrace) {
|
|
1300
|
+
gentleFullRender();
|
|
1301
|
+
} else {
|
|
1302
|
+
fullRender(true);
|
|
1303
|
+
}
|
|
1171
1304
|
return;
|
|
1172
1305
|
}
|
|
1173
1306
|
if (extraLines > 0) {
|
|
@@ -1195,7 +1328,11 @@ export class TUI extends Container {
|
|
|
1195
1328
|
// If first changed line is above the current viewport basis, partial redraw is unsafe.
|
|
1196
1329
|
if (firstChanged < prevViewportTop) {
|
|
1197
1330
|
logRedraw(`firstChanged < viewportTop (${firstChanged} < ${prevViewportTop})`);
|
|
1198
|
-
|
|
1331
|
+
if (inStartupGrace) {
|
|
1332
|
+
gentleFullRender();
|
|
1333
|
+
} else {
|
|
1334
|
+
fullRender(true);
|
|
1335
|
+
}
|
|
1199
1336
|
return;
|
|
1200
1337
|
}
|
|
1201
1338
|
|
package/package.json
CHANGED
|
@@ -429,11 +429,6 @@
|
|
|
429
429
|
"default": ["◐", "◓", "◑", "◒"],
|
|
430
430
|
"minItems": 1
|
|
431
431
|
},
|
|
432
|
-
"plan_mode": {
|
|
433
|
-
"type": "string",
|
|
434
|
-
"description": "Plan mode indicator (default: '⏸').",
|
|
435
|
-
"default": "⏸"
|
|
436
|
-
},
|
|
437
432
|
"task_list": {
|
|
438
433
|
"type": "string",
|
|
439
434
|
"description": "Task list indicator (default: '📋').",
|
|
@@ -1,350 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* E2E test for the plan-mode extension.
|
|
5
|
-
*
|
|
6
|
-
* Proves:
|
|
7
|
-
* 1. plan_mode tool remains available after toggling modes
|
|
8
|
-
* 2. Plan mode enforces a strict read-only allowlist
|
|
9
|
-
* 3. Non-allowlisted extension tools are blocked in plan mode
|
|
10
|
-
* 4. Disabling plan mode restores normal access
|
|
11
|
-
*
|
|
12
|
-
* Uses the SDK to load ONLY the plan-mode extension (isolated).
|
|
13
|
-
* Costs ~$0.01 per run.
|
|
14
|
-
*
|
|
15
|
-
* Usage:
|
|
16
|
-
* node extensions/plan-mode-tool/__tests__/e2e.mjs
|
|
17
|
-
*/
|
|
18
|
-
|
|
19
|
-
import fs from "node:fs";
|
|
20
|
-
import os from "node:os";
|
|
21
|
-
import path from "node:path";
|
|
22
|
-
import { fileURLToPath } from "node:url";
|
|
23
|
-
import { getModel } from "@mariozechner/pi-ai";
|
|
24
|
-
import {
|
|
25
|
-
AuthStorage,
|
|
26
|
-
createAgentSession,
|
|
27
|
-
DefaultResourceLoader,
|
|
28
|
-
ModelRegistry,
|
|
29
|
-
SessionManager,
|
|
30
|
-
SettingsManager,
|
|
31
|
-
} from "@mariozechner/pi-coding-agent";
|
|
32
|
-
import { Type } from "@sinclair/typebox";
|
|
33
|
-
|
|
34
|
-
// ── Helpers ──────────────────────────────────────────────────
|
|
35
|
-
|
|
36
|
-
const results = [];
|
|
37
|
-
|
|
38
|
-
/**
|
|
39
|
-
* Record a test result.
|
|
40
|
-
* @param {string} name - Test name
|
|
41
|
-
* @param {boolean} passed - Pass/fail
|
|
42
|
-
* @param {string} [detail] - Extra detail on failure
|
|
43
|
-
*/
|
|
44
|
-
function check(name, passed, detail) {
|
|
45
|
-
results.push({ name, passed, detail });
|
|
46
|
-
const icon = passed ? "\x1b[32m✓\x1b[0m" : "\x1b[31m✗\x1b[0m";
|
|
47
|
-
let line = ` ${icon} ${name}`;
|
|
48
|
-
if (!passed && detail) line += `\n ${detail.slice(0, 300)}`;
|
|
49
|
-
console.log(line);
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
/**
|
|
53
|
-
* Get the text content of the most recent tool result for a given tool name.
|
|
54
|
-
* @param {import("@mariozechner/pi-coding-agent").AgentSession} session
|
|
55
|
-
* @param {string} toolName
|
|
56
|
-
* @returns {string}
|
|
57
|
-
*/
|
|
58
|
-
function lastToolResultText(session, toolName) {
|
|
59
|
-
const msgs = session.messages;
|
|
60
|
-
for (let i = msgs.length - 1; i >= 0; i--) {
|
|
61
|
-
const m = msgs[i];
|
|
62
|
-
if (m.role === "toolResult" && m.toolName === toolName) {
|
|
63
|
-
for (const part of m.content) {
|
|
64
|
-
if (part.type === "text") return part.text;
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
return "";
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
/**
|
|
72
|
-
* Check if any tool result in the session contains "not found" error.
|
|
73
|
-
* @param {import("@mariozechner/pi-coding-agent").AgentSession} session
|
|
74
|
-
* @param {string} toolName
|
|
75
|
-
* @returns {boolean}
|
|
76
|
-
*/
|
|
77
|
-
function hasToolNotFoundError(session, toolName) {
|
|
78
|
-
const msgs = session.messages;
|
|
79
|
-
for (let i = msgs.length - 1; i >= 0; i--) {
|
|
80
|
-
const m = msgs[i];
|
|
81
|
-
if (m.role === "toolResult") {
|
|
82
|
-
for (const part of m.content) {
|
|
83
|
-
if (part.type === "text" && part.text.includes(`Tool ${toolName} not found`)) {
|
|
84
|
-
return true;
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
return false;
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
/**
|
|
93
|
-
* Check if a tool call was blocked by plan-mode policy.
|
|
94
|
-
* @param {import("@mariozechner/pi-coding-agent").AgentSession} session
|
|
95
|
-
* @param {string} toolName
|
|
96
|
-
* @returns {boolean}
|
|
97
|
-
*/
|
|
98
|
-
function hasPlanModeToolBlockedError(session, toolName) {
|
|
99
|
-
const msgs = session.messages;
|
|
100
|
-
for (let i = msgs.length - 1; i >= 0; i--) {
|
|
101
|
-
const m = msgs[i];
|
|
102
|
-
if (m.role !== "toolResult") continue;
|
|
103
|
-
for (const part of m.content) {
|
|
104
|
-
if (part.type === "text" && part.text.includes(`Plan mode: tool "${toolName}" blocked`)) {
|
|
105
|
-
return true;
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
return false;
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
// ── Isolated extension loading ───────────────────────────────
|
|
113
|
-
|
|
114
|
-
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
115
|
-
const extensionSrcDir = path.resolve(__dirname, "..");
|
|
116
|
-
|
|
117
|
-
const testAgentDir = fs.mkdtempSync(path.join(os.tmpdir(), "pi-e2e-plan-"));
|
|
118
|
-
const extDst = path.join(testAgentDir, "extensions/plan-mode-tool");
|
|
119
|
-
fs.mkdirSync(extDst, { recursive: true });
|
|
120
|
-
for (const file of ["index.ts", "utils.ts"]) {
|
|
121
|
-
fs.copyFileSync(path.join(extensionSrcDir, file), path.join(extDst, file));
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
// ── Setup ────────────────────────────────────────────────────
|
|
125
|
-
|
|
126
|
-
console.log("\n\x1b[1m══ Plan Mode Extension E2E Test ══\x1b[0m\n");
|
|
127
|
-
|
|
128
|
-
const authStorage = new AuthStorage();
|
|
129
|
-
const modelRegistry = new ModelRegistry(authStorage);
|
|
130
|
-
const model = getModel("anthropic", "claude-haiku-4-5");
|
|
131
|
-
if (!model) {
|
|
132
|
-
console.error("✗ Model claude-haiku-4-5 not found");
|
|
133
|
-
process.exit(1);
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
const settingsManager = SettingsManager.inMemory({ compaction: { enabled: false } });
|
|
137
|
-
|
|
138
|
-
/**
|
|
139
|
-
* Register mock tools used to validate strict plan-mode allowlisting.
|
|
140
|
-
* @param {import("@mariozechner/pi-coding-agent").ExtensionAPI} pi
|
|
141
|
-
*/
|
|
142
|
-
function registerMockTools(pi) {
|
|
143
|
-
pi.registerTool({
|
|
144
|
-
name: "bg_bash",
|
|
145
|
-
label: "bg_bash",
|
|
146
|
-
description: "Mock background bash tool",
|
|
147
|
-
parameters: Type.Object({ command: Type.String() }),
|
|
148
|
-
async execute(_toolCallId, params) {
|
|
149
|
-
return {
|
|
150
|
-
content: [{ type: "text", text: `mock-bg-bash-ok:${params.command}` }],
|
|
151
|
-
details: {},
|
|
152
|
-
};
|
|
153
|
-
},
|
|
154
|
-
});
|
|
155
|
-
|
|
156
|
-
pi.registerTool({
|
|
157
|
-
name: "subagent",
|
|
158
|
-
label: "subagent",
|
|
159
|
-
description: "Mock subagent tool",
|
|
160
|
-
parameters: Type.Object({ task: Type.String() }),
|
|
161
|
-
async execute(_toolCallId, params) {
|
|
162
|
-
return {
|
|
163
|
-
content: [{ type: "text", text: `mock-subagent-ok:${params.task}` }],
|
|
164
|
-
details: {},
|
|
165
|
-
};
|
|
166
|
-
},
|
|
167
|
-
});
|
|
168
|
-
|
|
169
|
-
pi.registerTool({
|
|
170
|
-
name: "mcp__mock__ping",
|
|
171
|
-
label: "mcp__mock__ping",
|
|
172
|
-
description: "Mock MCP-style tool",
|
|
173
|
-
parameters: Type.Object({}),
|
|
174
|
-
async execute() {
|
|
175
|
-
return {
|
|
176
|
-
content: [{ type: "text", text: "mock-mcp-ok" }],
|
|
177
|
-
details: {},
|
|
178
|
-
};
|
|
179
|
-
},
|
|
180
|
-
});
|
|
181
|
-
|
|
182
|
-
pi.registerTool({
|
|
183
|
-
name: "questionnaire",
|
|
184
|
-
label: "questionnaire",
|
|
185
|
-
description: "Mock read-only questionnaire tool",
|
|
186
|
-
parameters: Type.Object({}),
|
|
187
|
-
async execute() {
|
|
188
|
-
return {
|
|
189
|
-
content: [{ type: "text", text: "mock-questionnaire-ok" }],
|
|
190
|
-
details: {},
|
|
191
|
-
};
|
|
192
|
-
},
|
|
193
|
-
});
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
console.log("Loading extension (isolated)...");
|
|
197
|
-
const loader = new DefaultResourceLoader({
|
|
198
|
-
cwd: os.tmpdir(),
|
|
199
|
-
agentDir: testAgentDir,
|
|
200
|
-
settingsManager,
|
|
201
|
-
extensionFactories: [registerMockTools],
|
|
202
|
-
skillsOverride: () => ({ skills: [], diagnostics: [] }),
|
|
203
|
-
promptsOverride: () => ({ prompts: [], diagnostics: [] }),
|
|
204
|
-
agentsFilesOverride: () => ({ agentsFiles: [] }),
|
|
205
|
-
});
|
|
206
|
-
await loader.reload();
|
|
207
|
-
|
|
208
|
-
const exts = loader.getExtensions();
|
|
209
|
-
console.log(` Extensions loaded: ${exts.extensions.length}, errors: ${exts.errors.length}`);
|
|
210
|
-
if (exts.errors.length > 0) {
|
|
211
|
-
console.error(" Extension errors:", exts.errors);
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
console.log("Creating session (haiku)...\n");
|
|
215
|
-
const { session } = await createAgentSession({
|
|
216
|
-
model,
|
|
217
|
-
thinkingLevel: "off",
|
|
218
|
-
authStorage,
|
|
219
|
-
modelRegistry,
|
|
220
|
-
resourceLoader: loader,
|
|
221
|
-
sessionManager: SessionManager.inMemory(),
|
|
222
|
-
settingsManager,
|
|
223
|
-
});
|
|
224
|
-
|
|
225
|
-
// Log tool calls
|
|
226
|
-
session.subscribe((event) => {
|
|
227
|
-
if (event.type === "tool_execution_start") {
|
|
228
|
-
process.stdout.write(` \x1b[2m→ ${event.toolName}\x1b[0m\n`);
|
|
229
|
-
}
|
|
230
|
-
});
|
|
231
|
-
|
|
232
|
-
// ── Test 1: plan_mode tool exists at startup ─────────────────
|
|
233
|
-
|
|
234
|
-
console.log("\x1b[1mTest 1: plan_mode tool available at startup\x1b[0m");
|
|
235
|
-
await session.prompt(
|
|
236
|
-
'Call the plan_mode tool with action "status". Only call this one tool, nothing else.'
|
|
237
|
-
);
|
|
238
|
-
const statusText = lastToolResultText(session, "plan_mode");
|
|
239
|
-
const noStartupError = !hasToolNotFoundError(session, "plan_mode");
|
|
240
|
-
check("plan_mode tool callable at startup", noStartupError, statusText);
|
|
241
|
-
check("reports normal mode", statusText.includes("normal"), statusText);
|
|
242
|
-
|
|
243
|
-
// ── Test 2: Enable plan mode, verify plan_mode survives ──────
|
|
244
|
-
|
|
245
|
-
console.log("\n\x1b[1mTest 2: Enable plan mode → plan_mode tool still available\x1b[0m");
|
|
246
|
-
await session.prompt(
|
|
247
|
-
'Call the plan_mode tool with action "enable". Only call this one tool, nothing else.'
|
|
248
|
-
);
|
|
249
|
-
const enableText = lastToolResultText(session, "plan_mode");
|
|
250
|
-
const noEnableError = !hasToolNotFoundError(session, "plan_mode");
|
|
251
|
-
check("plan_mode callable during enable", noEnableError, enableText);
|
|
252
|
-
check("reports plan mode enabled", enableText.includes("enabled"), enableText);
|
|
253
|
-
|
|
254
|
-
// Now check status — plan_mode should still work IN plan mode
|
|
255
|
-
await session.prompt(
|
|
256
|
-
'Call the plan_mode tool with action "status". Only call this one tool, nothing else.'
|
|
257
|
-
);
|
|
258
|
-
const planStatusText = lastToolResultText(session, "plan_mode");
|
|
259
|
-
const noPlanStatusError = !hasToolNotFoundError(session, "plan_mode");
|
|
260
|
-
check("plan_mode callable while in plan mode", noPlanStatusError, planStatusText);
|
|
261
|
-
check("reports planning mode", planStatusText.includes("planning"), planStatusText);
|
|
262
|
-
|
|
263
|
-
// ── Test 3: Disable plan mode, verify plan_mode survives ─────
|
|
264
|
-
|
|
265
|
-
console.log("\n\x1b[1mTest 3: Disable plan mode → plan_mode tool still available\x1b[0m");
|
|
266
|
-
await session.prompt(
|
|
267
|
-
'Call the plan_mode tool with action "disable". Only call this one tool, nothing else.'
|
|
268
|
-
);
|
|
269
|
-
const disableText = lastToolResultText(session, "plan_mode");
|
|
270
|
-
const noDisableError = !hasToolNotFoundError(session, "plan_mode");
|
|
271
|
-
check("plan_mode callable during disable", noDisableError, disableText);
|
|
272
|
-
check("reports disabled", disableText.includes("disabled"), disableText);
|
|
273
|
-
|
|
274
|
-
// Final status check — should be back to normal
|
|
275
|
-
await session.prompt(
|
|
276
|
-
'Call the plan_mode tool with action "status". Only call this one tool, nothing else.'
|
|
277
|
-
);
|
|
278
|
-
const finalStatusText = lastToolResultText(session, "plan_mode");
|
|
279
|
-
const noFinalError = !hasToolNotFoundError(session, "plan_mode");
|
|
280
|
-
check("plan_mode callable after round-trip", noFinalError, finalStatusText);
|
|
281
|
-
check("back to normal mode", finalStatusText.includes("normal"), finalStatusText);
|
|
282
|
-
|
|
283
|
-
// ── Test 4: Strict allowlist enforcement in plan mode ────────
|
|
284
|
-
|
|
285
|
-
console.log("\n\x1b[1mTest 4: Strict allowlist blocks non-read-only tools\x1b[0m");
|
|
286
|
-
await session.prompt(
|
|
287
|
-
'Call the plan_mode tool with action "enable". Only call this one tool, nothing else.'
|
|
288
|
-
);
|
|
289
|
-
|
|
290
|
-
await session.prompt(
|
|
291
|
-
'Call the edit tool to edit file "/tmp/test.txt" replacing "a" with "b". Only call edit, nothing else.'
|
|
292
|
-
);
|
|
293
|
-
const editBlocked =
|
|
294
|
-
hasToolNotFoundError(session, "edit") || hasPlanModeToolBlockedError(session, "edit");
|
|
295
|
-
check("edit tool blocked in plan mode", editBlocked, "edit should not be available in plan mode");
|
|
296
|
-
|
|
297
|
-
await session.prompt(
|
|
298
|
-
'Call the bg_bash tool with command "echo blocked". Only call bg_bash, nothing else.'
|
|
299
|
-
);
|
|
300
|
-
const bgBashBlocked =
|
|
301
|
-
hasToolNotFoundError(session, "bg_bash") || hasPlanModeToolBlockedError(session, "bg_bash");
|
|
302
|
-
check("bg_bash blocked in plan mode", bgBashBlocked, "bg_bash should be blocked in plan mode");
|
|
303
|
-
|
|
304
|
-
await session.prompt('Call the subagent tool with task "ping". Only call subagent, nothing else.');
|
|
305
|
-
const subagentBlocked =
|
|
306
|
-
hasToolNotFoundError(session, "subagent") || hasPlanModeToolBlockedError(session, "subagent");
|
|
307
|
-
check("subagent blocked in plan mode", subagentBlocked, "subagent should be blocked in plan mode");
|
|
308
|
-
|
|
309
|
-
await session.prompt("Call the mcp__mock__ping tool. Only call this one tool, nothing else.");
|
|
310
|
-
const mcpBlocked =
|
|
311
|
-
hasToolNotFoundError(session, "mcp__mock__ping") ||
|
|
312
|
-
hasPlanModeToolBlockedError(session, "mcp__mock__ping");
|
|
313
|
-
check("mcp__* tools blocked in plan mode", mcpBlocked, "MCP tools should be blocked in plan mode");
|
|
314
|
-
|
|
315
|
-
await session.prompt("Call the questionnaire tool. Only call this one tool, nothing else.");
|
|
316
|
-
const questionnaireText = lastToolResultText(session, "questionnaire");
|
|
317
|
-
const questionnaireAllowed = questionnaireText.includes("mock-questionnaire-ok");
|
|
318
|
-
check("allowlisted questionnaire tool still works", questionnaireAllowed, questionnaireText);
|
|
319
|
-
|
|
320
|
-
// ── Test 5: Disabling plan mode restores normal access ───────
|
|
321
|
-
|
|
322
|
-
console.log("\n\x1b[1mTest 5: Disable restores normal tool access\x1b[0m");
|
|
323
|
-
await session.prompt(
|
|
324
|
-
'Call the plan_mode tool with action "disable". Only call this one tool, nothing else.'
|
|
325
|
-
);
|
|
326
|
-
await session.prompt(
|
|
327
|
-
'Call the subagent tool with task "after-disable". Only call subagent, nothing else.'
|
|
328
|
-
);
|
|
329
|
-
const subagentAfterDisableText = lastToolResultText(session, "subagent");
|
|
330
|
-
const subagentRestored = subagentAfterDisableText.includes("mock-subagent-ok:after-disable");
|
|
331
|
-
check("subagent restored after disabling plan mode", subagentRestored, subagentAfterDisableText);
|
|
332
|
-
|
|
333
|
-
// ── Cleanup & Summary ────────────────────────────────────────
|
|
334
|
-
|
|
335
|
-
session.dispose();
|
|
336
|
-
fs.rmSync(testAgentDir, { recursive: true, force: true });
|
|
337
|
-
|
|
338
|
-
const passed = results.filter((r) => r.passed).length;
|
|
339
|
-
const total = results.length;
|
|
340
|
-
|
|
341
|
-
console.log(`\n\x1b[1m══ Results: ${passed}/${total} passed ══\x1b[0m`);
|
|
342
|
-
if (passed < total) {
|
|
343
|
-
console.log("\n\x1b[31mFailed:\x1b[0m");
|
|
344
|
-
for (const r of results.filter((r) => !r.passed)) {
|
|
345
|
-
console.log(` ✗ ${r.name}`);
|
|
346
|
-
if (r.detail) console.log(` ${r.detail.slice(0, 300)}`);
|
|
347
|
-
}
|
|
348
|
-
}
|
|
349
|
-
console.log();
|
|
350
|
-
process.exit(passed === total ? 0 : 1);
|