@dungle-scrubs/tallow 0.8.28 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config.d.ts +1 -1
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +1 -1
- package/dist/config.js.map +1 -1
- package/dist/install.d.ts.map +1 -1
- package/dist/install.js +2 -9
- package/dist/install.js.map +1 -1
- package/dist/interactive-mode-patch.d.ts +1 -0
- package/dist/interactive-mode-patch.d.ts.map +1 -1
- package/dist/interactive-mode-patch.js +44 -9
- package/dist/interactive-mode-patch.js.map +1 -1
- package/extensions/_icons/__tests__/icons.test.ts +0 -1
- package/extensions/_icons/index.ts +0 -2
- package/extensions/context-fork/__tests__/context-fork.test.ts +9 -0
- package/extensions/health/index.ts +1 -1
- package/extensions/render-stabilizer/__tests__/render-stabilizer.test.ts +42 -0
- package/extensions/render-stabilizer/extension.json +5 -0
- package/extensions/render-stabilizer/index.ts +66 -0
- package/extensions/subagent-tool/__tests__/auto-cheap-model.test.ts +66 -6
- package/extensions/subagent-tool/__tests__/model-router-explicit-resolution.test.ts +79 -5
- package/node_modules/@mariozechner/pi-tui/dist/components/settings-list.d.ts +2 -0
- package/node_modules/@mariozechner/pi-tui/dist/components/settings-list.d.ts.map +1 -1
- package/node_modules/@mariozechner/pi-tui/dist/components/settings-list.js +17 -6
- package/node_modules/@mariozechner/pi-tui/dist/components/settings-list.js.map +1 -1
- package/node_modules/@mariozechner/pi-tui/dist/tui.d.ts +47 -0
- package/node_modules/@mariozechner/pi-tui/dist/tui.d.ts.map +1 -1
- package/node_modules/@mariozechner/pi-tui/dist/tui.js +139 -5
- package/node_modules/@mariozechner/pi-tui/dist/tui.js.map +1 -1
- package/node_modules/@mariozechner/pi-tui/src/__tests__/settings-list.test.ts +49 -0
- package/node_modules/@mariozechner/pi-tui/src/components/settings-list.ts +17 -5
- package/node_modules/@mariozechner/pi-tui/src/tui.ts +142 -5
- package/package.json +1 -1
- package/schemas/settings.schema.json +0 -5
- package/extensions/plan-mode-tool/__tests__/e2e.mjs +0 -350
- package/extensions/plan-mode-tool/__tests__/index.test.ts +0 -213
- package/extensions/plan-mode-tool/__tests__/utils.test.ts +0 -381
- package/extensions/plan-mode-tool/extension.json +0 -22
- package/extensions/plan-mode-tool/index.ts +0 -583
- package/extensions/plan-mode-tool/utils.ts +0 -257
|
@@ -1,350 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* E2E test for the plan-mode extension.
|
|
5
|
-
*
|
|
6
|
-
* Proves:
|
|
7
|
-
* 1. plan_mode tool remains available after toggling modes
|
|
8
|
-
* 2. Plan mode enforces a strict read-only allowlist
|
|
9
|
-
* 3. Non-allowlisted extension tools are blocked in plan mode
|
|
10
|
-
* 4. Disabling plan mode restores normal access
|
|
11
|
-
*
|
|
12
|
-
* Uses the SDK to load ONLY the plan-mode extension (isolated).
|
|
13
|
-
* Costs ~$0.01 per run.
|
|
14
|
-
*
|
|
15
|
-
* Usage:
|
|
16
|
-
* node extensions/plan-mode-tool/__tests__/e2e.mjs
|
|
17
|
-
*/
|
|
18
|
-
|
|
19
|
-
import fs from "node:fs";
|
|
20
|
-
import os from "node:os";
|
|
21
|
-
import path from "node:path";
|
|
22
|
-
import { fileURLToPath } from "node:url";
|
|
23
|
-
import { getModel } from "@mariozechner/pi-ai";
|
|
24
|
-
import {
|
|
25
|
-
AuthStorage,
|
|
26
|
-
createAgentSession,
|
|
27
|
-
DefaultResourceLoader,
|
|
28
|
-
ModelRegistry,
|
|
29
|
-
SessionManager,
|
|
30
|
-
SettingsManager,
|
|
31
|
-
} from "@mariozechner/pi-coding-agent";
|
|
32
|
-
import { Type } from "@sinclair/typebox";
|
|
33
|
-
|
|
34
|
-
// ── Helpers ──────────────────────────────────────────────────
|
|
35
|
-
|
|
36
|
-
const results = [];
|
|
37
|
-
|
|
38
|
-
/**
|
|
39
|
-
* Record a test result.
|
|
40
|
-
* @param {string} name - Test name
|
|
41
|
-
* @param {boolean} passed - Pass/fail
|
|
42
|
-
* @param {string} [detail] - Extra detail on failure
|
|
43
|
-
*/
|
|
44
|
-
function check(name, passed, detail) {
|
|
45
|
-
results.push({ name, passed, detail });
|
|
46
|
-
const icon = passed ? "\x1b[32m✓\x1b[0m" : "\x1b[31m✗\x1b[0m";
|
|
47
|
-
let line = ` ${icon} ${name}`;
|
|
48
|
-
if (!passed && detail) line += `\n ${detail.slice(0, 300)}`;
|
|
49
|
-
console.log(line);
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
/**
|
|
53
|
-
* Get the text content of the most recent tool result for a given tool name.
|
|
54
|
-
* @param {import("@mariozechner/pi-coding-agent").AgentSession} session
|
|
55
|
-
* @param {string} toolName
|
|
56
|
-
* @returns {string}
|
|
57
|
-
*/
|
|
58
|
-
function lastToolResultText(session, toolName) {
|
|
59
|
-
const msgs = session.messages;
|
|
60
|
-
for (let i = msgs.length - 1; i >= 0; i--) {
|
|
61
|
-
const m = msgs[i];
|
|
62
|
-
if (m.role === "toolResult" && m.toolName === toolName) {
|
|
63
|
-
for (const part of m.content) {
|
|
64
|
-
if (part.type === "text") return part.text;
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
return "";
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
/**
|
|
72
|
-
* Check if any tool result in the session contains "not found" error.
|
|
73
|
-
* @param {import("@mariozechner/pi-coding-agent").AgentSession} session
|
|
74
|
-
* @param {string} toolName
|
|
75
|
-
* @returns {boolean}
|
|
76
|
-
*/
|
|
77
|
-
function hasToolNotFoundError(session, toolName) {
|
|
78
|
-
const msgs = session.messages;
|
|
79
|
-
for (let i = msgs.length - 1; i >= 0; i--) {
|
|
80
|
-
const m = msgs[i];
|
|
81
|
-
if (m.role === "toolResult") {
|
|
82
|
-
for (const part of m.content) {
|
|
83
|
-
if (part.type === "text" && part.text.includes(`Tool ${toolName} not found`)) {
|
|
84
|
-
return true;
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
return false;
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
/**
|
|
93
|
-
* Check if a tool call was blocked by plan-mode policy.
|
|
94
|
-
* @param {import("@mariozechner/pi-coding-agent").AgentSession} session
|
|
95
|
-
* @param {string} toolName
|
|
96
|
-
* @returns {boolean}
|
|
97
|
-
*/
|
|
98
|
-
function hasPlanModeToolBlockedError(session, toolName) {
|
|
99
|
-
const msgs = session.messages;
|
|
100
|
-
for (let i = msgs.length - 1; i >= 0; i--) {
|
|
101
|
-
const m = msgs[i];
|
|
102
|
-
if (m.role !== "toolResult") continue;
|
|
103
|
-
for (const part of m.content) {
|
|
104
|
-
if (part.type === "text" && part.text.includes(`Plan mode: tool "${toolName}" blocked`)) {
|
|
105
|
-
return true;
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
return false;
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
// ── Isolated extension loading ───────────────────────────────
|
|
113
|
-
|
|
114
|
-
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
115
|
-
const extensionSrcDir = path.resolve(__dirname, "..");
|
|
116
|
-
|
|
117
|
-
const testAgentDir = fs.mkdtempSync(path.join(os.tmpdir(), "pi-e2e-plan-"));
|
|
118
|
-
const extDst = path.join(testAgentDir, "extensions/plan-mode-tool");
|
|
119
|
-
fs.mkdirSync(extDst, { recursive: true });
|
|
120
|
-
for (const file of ["index.ts", "utils.ts"]) {
|
|
121
|
-
fs.copyFileSync(path.join(extensionSrcDir, file), path.join(extDst, file));
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
// ── Setup ────────────────────────────────────────────────────
|
|
125
|
-
|
|
126
|
-
console.log("\n\x1b[1m══ Plan Mode Extension E2E Test ══\x1b[0m\n");
|
|
127
|
-
|
|
128
|
-
const authStorage = new AuthStorage();
|
|
129
|
-
const modelRegistry = new ModelRegistry(authStorage);
|
|
130
|
-
const model = getModel("anthropic", "claude-haiku-4-5");
|
|
131
|
-
if (!model) {
|
|
132
|
-
console.error("✗ Model claude-haiku-4-5 not found");
|
|
133
|
-
process.exit(1);
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
const settingsManager = SettingsManager.inMemory({ compaction: { enabled: false } });
|
|
137
|
-
|
|
138
|
-
/**
|
|
139
|
-
* Register mock tools used to validate strict plan-mode allowlisting.
|
|
140
|
-
* @param {import("@mariozechner/pi-coding-agent").ExtensionAPI} pi
|
|
141
|
-
*/
|
|
142
|
-
function registerMockTools(pi) {
|
|
143
|
-
pi.registerTool({
|
|
144
|
-
name: "bg_bash",
|
|
145
|
-
label: "bg_bash",
|
|
146
|
-
description: "Mock background bash tool",
|
|
147
|
-
parameters: Type.Object({ command: Type.String() }),
|
|
148
|
-
async execute(_toolCallId, params) {
|
|
149
|
-
return {
|
|
150
|
-
content: [{ type: "text", text: `mock-bg-bash-ok:${params.command}` }],
|
|
151
|
-
details: {},
|
|
152
|
-
};
|
|
153
|
-
},
|
|
154
|
-
});
|
|
155
|
-
|
|
156
|
-
pi.registerTool({
|
|
157
|
-
name: "subagent",
|
|
158
|
-
label: "subagent",
|
|
159
|
-
description: "Mock subagent tool",
|
|
160
|
-
parameters: Type.Object({ task: Type.String() }),
|
|
161
|
-
async execute(_toolCallId, params) {
|
|
162
|
-
return {
|
|
163
|
-
content: [{ type: "text", text: `mock-subagent-ok:${params.task}` }],
|
|
164
|
-
details: {},
|
|
165
|
-
};
|
|
166
|
-
},
|
|
167
|
-
});
|
|
168
|
-
|
|
169
|
-
pi.registerTool({
|
|
170
|
-
name: "mcp__mock__ping",
|
|
171
|
-
label: "mcp__mock__ping",
|
|
172
|
-
description: "Mock MCP-style tool",
|
|
173
|
-
parameters: Type.Object({}),
|
|
174
|
-
async execute() {
|
|
175
|
-
return {
|
|
176
|
-
content: [{ type: "text", text: "mock-mcp-ok" }],
|
|
177
|
-
details: {},
|
|
178
|
-
};
|
|
179
|
-
},
|
|
180
|
-
});
|
|
181
|
-
|
|
182
|
-
pi.registerTool({
|
|
183
|
-
name: "questionnaire",
|
|
184
|
-
label: "questionnaire",
|
|
185
|
-
description: "Mock read-only questionnaire tool",
|
|
186
|
-
parameters: Type.Object({}),
|
|
187
|
-
async execute() {
|
|
188
|
-
return {
|
|
189
|
-
content: [{ type: "text", text: "mock-questionnaire-ok" }],
|
|
190
|
-
details: {},
|
|
191
|
-
};
|
|
192
|
-
},
|
|
193
|
-
});
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
console.log("Loading extension (isolated)...");
|
|
197
|
-
const loader = new DefaultResourceLoader({
|
|
198
|
-
cwd: os.tmpdir(),
|
|
199
|
-
agentDir: testAgentDir,
|
|
200
|
-
settingsManager,
|
|
201
|
-
extensionFactories: [registerMockTools],
|
|
202
|
-
skillsOverride: () => ({ skills: [], diagnostics: [] }),
|
|
203
|
-
promptsOverride: () => ({ prompts: [], diagnostics: [] }),
|
|
204
|
-
agentsFilesOverride: () => ({ agentsFiles: [] }),
|
|
205
|
-
});
|
|
206
|
-
await loader.reload();
|
|
207
|
-
|
|
208
|
-
const exts = loader.getExtensions();
|
|
209
|
-
console.log(` Extensions loaded: ${exts.extensions.length}, errors: ${exts.errors.length}`);
|
|
210
|
-
if (exts.errors.length > 0) {
|
|
211
|
-
console.error(" Extension errors:", exts.errors);
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
console.log("Creating session (haiku)...\n");
|
|
215
|
-
const { session } = await createAgentSession({
|
|
216
|
-
model,
|
|
217
|
-
thinkingLevel: "off",
|
|
218
|
-
authStorage,
|
|
219
|
-
modelRegistry,
|
|
220
|
-
resourceLoader: loader,
|
|
221
|
-
sessionManager: SessionManager.inMemory(),
|
|
222
|
-
settingsManager,
|
|
223
|
-
});
|
|
224
|
-
|
|
225
|
-
// Log tool calls
|
|
226
|
-
session.subscribe((event) => {
|
|
227
|
-
if (event.type === "tool_execution_start") {
|
|
228
|
-
process.stdout.write(` \x1b[2m→ ${event.toolName}\x1b[0m\n`);
|
|
229
|
-
}
|
|
230
|
-
});
|
|
231
|
-
|
|
232
|
-
// ── Test 1: plan_mode tool exists at startup ─────────────────
|
|
233
|
-
|
|
234
|
-
console.log("\x1b[1mTest 1: plan_mode tool available at startup\x1b[0m");
|
|
235
|
-
await session.prompt(
|
|
236
|
-
'Call the plan_mode tool with action "status". Only call this one tool, nothing else.'
|
|
237
|
-
);
|
|
238
|
-
const statusText = lastToolResultText(session, "plan_mode");
|
|
239
|
-
const noStartupError = !hasToolNotFoundError(session, "plan_mode");
|
|
240
|
-
check("plan_mode tool callable at startup", noStartupError, statusText);
|
|
241
|
-
check("reports normal mode", statusText.includes("normal"), statusText);
|
|
242
|
-
|
|
243
|
-
// ── Test 2: Enable plan mode, verify plan_mode survives ──────
|
|
244
|
-
|
|
245
|
-
console.log("\n\x1b[1mTest 2: Enable plan mode → plan_mode tool still available\x1b[0m");
|
|
246
|
-
await session.prompt(
|
|
247
|
-
'Call the plan_mode tool with action "enable". Only call this one tool, nothing else.'
|
|
248
|
-
);
|
|
249
|
-
const enableText = lastToolResultText(session, "plan_mode");
|
|
250
|
-
const noEnableError = !hasToolNotFoundError(session, "plan_mode");
|
|
251
|
-
check("plan_mode callable during enable", noEnableError, enableText);
|
|
252
|
-
check("reports plan mode enabled", enableText.includes("enabled"), enableText);
|
|
253
|
-
|
|
254
|
-
// Now check status — plan_mode should still work IN plan mode
|
|
255
|
-
await session.prompt(
|
|
256
|
-
'Call the plan_mode tool with action "status". Only call this one tool, nothing else.'
|
|
257
|
-
);
|
|
258
|
-
const planStatusText = lastToolResultText(session, "plan_mode");
|
|
259
|
-
const noPlanStatusError = !hasToolNotFoundError(session, "plan_mode");
|
|
260
|
-
check("plan_mode callable while in plan mode", noPlanStatusError, planStatusText);
|
|
261
|
-
check("reports planning mode", planStatusText.includes("planning"), planStatusText);
|
|
262
|
-
|
|
263
|
-
// ── Test 3: Disable plan mode, verify plan_mode survives ─────
|
|
264
|
-
|
|
265
|
-
console.log("\n\x1b[1mTest 3: Disable plan mode → plan_mode tool still available\x1b[0m");
|
|
266
|
-
await session.prompt(
|
|
267
|
-
'Call the plan_mode tool with action "disable". Only call this one tool, nothing else.'
|
|
268
|
-
);
|
|
269
|
-
const disableText = lastToolResultText(session, "plan_mode");
|
|
270
|
-
const noDisableError = !hasToolNotFoundError(session, "plan_mode");
|
|
271
|
-
check("plan_mode callable during disable", noDisableError, disableText);
|
|
272
|
-
check("reports disabled", disableText.includes("disabled"), disableText);
|
|
273
|
-
|
|
274
|
-
// Final status check — should be back to normal
|
|
275
|
-
await session.prompt(
|
|
276
|
-
'Call the plan_mode tool with action "status". Only call this one tool, nothing else.'
|
|
277
|
-
);
|
|
278
|
-
const finalStatusText = lastToolResultText(session, "plan_mode");
|
|
279
|
-
const noFinalError = !hasToolNotFoundError(session, "plan_mode");
|
|
280
|
-
check("plan_mode callable after round-trip", noFinalError, finalStatusText);
|
|
281
|
-
check("back to normal mode", finalStatusText.includes("normal"), finalStatusText);
|
|
282
|
-
|
|
283
|
-
// ── Test 4: Strict allowlist enforcement in plan mode ────────
|
|
284
|
-
|
|
285
|
-
console.log("\n\x1b[1mTest 4: Strict allowlist blocks non-read-only tools\x1b[0m");
|
|
286
|
-
await session.prompt(
|
|
287
|
-
'Call the plan_mode tool with action "enable". Only call this one tool, nothing else.'
|
|
288
|
-
);
|
|
289
|
-
|
|
290
|
-
await session.prompt(
|
|
291
|
-
'Call the edit tool to edit file "/tmp/test.txt" replacing "a" with "b". Only call edit, nothing else.'
|
|
292
|
-
);
|
|
293
|
-
const editBlocked =
|
|
294
|
-
hasToolNotFoundError(session, "edit") || hasPlanModeToolBlockedError(session, "edit");
|
|
295
|
-
check("edit tool blocked in plan mode", editBlocked, "edit should not be available in plan mode");
|
|
296
|
-
|
|
297
|
-
await session.prompt(
|
|
298
|
-
'Call the bg_bash tool with command "echo blocked". Only call bg_bash, nothing else.'
|
|
299
|
-
);
|
|
300
|
-
const bgBashBlocked =
|
|
301
|
-
hasToolNotFoundError(session, "bg_bash") || hasPlanModeToolBlockedError(session, "bg_bash");
|
|
302
|
-
check("bg_bash blocked in plan mode", bgBashBlocked, "bg_bash should be blocked in plan mode");
|
|
303
|
-
|
|
304
|
-
await session.prompt('Call the subagent tool with task "ping". Only call subagent, nothing else.');
|
|
305
|
-
const subagentBlocked =
|
|
306
|
-
hasToolNotFoundError(session, "subagent") || hasPlanModeToolBlockedError(session, "subagent");
|
|
307
|
-
check("subagent blocked in plan mode", subagentBlocked, "subagent should be blocked in plan mode");
|
|
308
|
-
|
|
309
|
-
await session.prompt("Call the mcp__mock__ping tool. Only call this one tool, nothing else.");
|
|
310
|
-
const mcpBlocked =
|
|
311
|
-
hasToolNotFoundError(session, "mcp__mock__ping") ||
|
|
312
|
-
hasPlanModeToolBlockedError(session, "mcp__mock__ping");
|
|
313
|
-
check("mcp__* tools blocked in plan mode", mcpBlocked, "MCP tools should be blocked in plan mode");
|
|
314
|
-
|
|
315
|
-
await session.prompt("Call the questionnaire tool. Only call this one tool, nothing else.");
|
|
316
|
-
const questionnaireText = lastToolResultText(session, "questionnaire");
|
|
317
|
-
const questionnaireAllowed = questionnaireText.includes("mock-questionnaire-ok");
|
|
318
|
-
check("allowlisted questionnaire tool still works", questionnaireAllowed, questionnaireText);
|
|
319
|
-
|
|
320
|
-
// ── Test 5: Disabling plan mode restores normal access ───────
|
|
321
|
-
|
|
322
|
-
console.log("\n\x1b[1mTest 5: Disable restores normal tool access\x1b[0m");
|
|
323
|
-
await session.prompt(
|
|
324
|
-
'Call the plan_mode tool with action "disable". Only call this one tool, nothing else.'
|
|
325
|
-
);
|
|
326
|
-
await session.prompt(
|
|
327
|
-
'Call the subagent tool with task "after-disable". Only call subagent, nothing else.'
|
|
328
|
-
);
|
|
329
|
-
const subagentAfterDisableText = lastToolResultText(session, "subagent");
|
|
330
|
-
const subagentRestored = subagentAfterDisableText.includes("mock-subagent-ok:after-disable");
|
|
331
|
-
check("subagent restored after disabling plan mode", subagentRestored, subagentAfterDisableText);
|
|
332
|
-
|
|
333
|
-
// ── Cleanup & Summary ────────────────────────────────────────
|
|
334
|
-
|
|
335
|
-
session.dispose();
|
|
336
|
-
fs.rmSync(testAgentDir, { recursive: true, force: true });
|
|
337
|
-
|
|
338
|
-
const passed = results.filter((r) => r.passed).length;
|
|
339
|
-
const total = results.length;
|
|
340
|
-
|
|
341
|
-
console.log(`\n\x1b[1m══ Results: ${passed}/${total} passed ══\x1b[0m`);
|
|
342
|
-
if (passed < total) {
|
|
343
|
-
console.log("\n\x1b[31mFailed:\x1b[0m");
|
|
344
|
-
for (const r of results.filter((r) => !r.passed)) {
|
|
345
|
-
console.log(` ✗ ${r.name}`);
|
|
346
|
-
if (r.detail) console.log(` ${r.detail.slice(0, 300)}`);
|
|
347
|
-
}
|
|
348
|
-
}
|
|
349
|
-
console.log();
|
|
350
|
-
process.exit(passed === total ? 0 : 1);
|
|
@@ -1,213 +0,0 @@
|
|
|
1
|
-
import { beforeEach, describe, expect, test } from "bun:test";
|
|
2
|
-
import type { ExtensionAPI, ExtensionContext, ToolDefinition } from "@mariozechner/pi-coding-agent";
|
|
3
|
-
import { Type } from "@sinclair/typebox";
|
|
4
|
-
import { ExtensionHarness } from "../../../test-utils/extension-harness.js";
|
|
5
|
-
import planModeExtension from "../index.js";
|
|
6
|
-
import { PLAN_MODE_ALLOWED_TOOLS } from "../utils.js";
|
|
7
|
-
|
|
8
|
-
const BASELINE_TOOLS = [
|
|
9
|
-
"read",
|
|
10
|
-
"bash",
|
|
11
|
-
"grep",
|
|
12
|
-
"find",
|
|
13
|
-
"ls",
|
|
14
|
-
"edit",
|
|
15
|
-
"write",
|
|
16
|
-
"subagent",
|
|
17
|
-
"bg_bash",
|
|
18
|
-
"mcp__mock__ping",
|
|
19
|
-
"questionnaire",
|
|
20
|
-
"plan_mode",
|
|
21
|
-
] as const;
|
|
22
|
-
|
|
23
|
-
/**
|
|
24
|
-
* Register mock tools used to test plan-mode gating and restoration.
|
|
25
|
-
*
|
|
26
|
-
* @param pi - Extension API test double
|
|
27
|
-
* @returns void
|
|
28
|
-
*/
|
|
29
|
-
function registerMockTools(pi: ExtensionAPI): void {
|
|
30
|
-
const names = [
|
|
31
|
-
"read",
|
|
32
|
-
"bash",
|
|
33
|
-
"grep",
|
|
34
|
-
"find",
|
|
35
|
-
"ls",
|
|
36
|
-
"edit",
|
|
37
|
-
"write",
|
|
38
|
-
"subagent",
|
|
39
|
-
"bg_bash",
|
|
40
|
-
"mcp__mock__ping",
|
|
41
|
-
"questionnaire",
|
|
42
|
-
] as const;
|
|
43
|
-
|
|
44
|
-
for (const name of names) {
|
|
45
|
-
pi.registerTool({
|
|
46
|
-
name,
|
|
47
|
-
label: name,
|
|
48
|
-
description: `Mock ${name}`,
|
|
49
|
-
parameters: Type.Object({}),
|
|
50
|
-
async execute() {
|
|
51
|
-
return {
|
|
52
|
-
content: [{ type: "text", text: `${name}-ok` }],
|
|
53
|
-
details: {},
|
|
54
|
-
};
|
|
55
|
-
},
|
|
56
|
-
});
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
/**
|
|
61
|
-
* Create an extension context with optional persisted session entries.
|
|
62
|
-
*
|
|
63
|
-
* @param entries - Session entries returned by sessionManager.getEntries
|
|
64
|
-
* @returns Context object compatible with extension handlers
|
|
65
|
-
*/
|
|
66
|
-
function createContext(entries: unknown[] = [], hasUI = true): ExtensionContext {
|
|
67
|
-
return {
|
|
68
|
-
cwd: process.cwd(),
|
|
69
|
-
hasUI,
|
|
70
|
-
ui: {
|
|
71
|
-
notify() {},
|
|
72
|
-
setStatus() {},
|
|
73
|
-
setEditorComponent() {},
|
|
74
|
-
setWidget() {},
|
|
75
|
-
theme: {
|
|
76
|
-
fg(_token: string, value: string) {
|
|
77
|
-
return value;
|
|
78
|
-
},
|
|
79
|
-
strikethrough(value: string) {
|
|
80
|
-
return value;
|
|
81
|
-
},
|
|
82
|
-
},
|
|
83
|
-
} as never,
|
|
84
|
-
sessionManager: {
|
|
85
|
-
getEntries() {
|
|
86
|
-
return entries;
|
|
87
|
-
},
|
|
88
|
-
} as never,
|
|
89
|
-
} as unknown as ExtensionContext;
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
/**
|
|
93
|
-
* Resolve a registered tool from the test harness.
|
|
94
|
-
*
|
|
95
|
-
* @param harness - Extension harness
|
|
96
|
-
* @param name - Tool name
|
|
97
|
-
* @returns Tool definition
|
|
98
|
-
*/
|
|
99
|
-
function getTool(harness: ExtensionHarness, name: string): ToolDefinition {
|
|
100
|
-
const tool = harness.tools.get(name);
|
|
101
|
-
if (!tool) throw new Error(`Tool not registered: ${name}`);
|
|
102
|
-
return tool;
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
describe("plan-mode strict readonly enforcement", () => {
|
|
106
|
-
let harness: ExtensionHarness;
|
|
107
|
-
|
|
108
|
-
beforeEach(async () => {
|
|
109
|
-
harness = ExtensionHarness.create();
|
|
110
|
-
await harness.loadExtension(registerMockTools);
|
|
111
|
-
await harness.loadExtension(planModeExtension);
|
|
112
|
-
harness.api.setActiveTools([...BASELINE_TOOLS]);
|
|
113
|
-
});
|
|
114
|
-
|
|
115
|
-
test("enable applies strict allowlist and disable restores previous tools", async () => {
|
|
116
|
-
const tool = getTool(harness, "plan_mode");
|
|
117
|
-
const ctx = createContext();
|
|
118
|
-
|
|
119
|
-
await tool.execute("tc-enable", { action: "enable" }, undefined, () => {}, ctx);
|
|
120
|
-
expect(harness.api.getActiveTools()).toEqual(
|
|
121
|
-
PLAN_MODE_ALLOWED_TOOLS.filter((name) => BASELINE_TOOLS.includes(name))
|
|
122
|
-
);
|
|
123
|
-
|
|
124
|
-
await tool.execute("tc-disable", { action: "disable" }, undefined, () => {}, ctx);
|
|
125
|
-
expect(harness.api.getActiveTools()).toEqual([...BASELINE_TOOLS]);
|
|
126
|
-
});
|
|
127
|
-
|
|
128
|
-
test("tool_call blocks non-allowlisted tools and unsafe bash", async () => {
|
|
129
|
-
const tool = getTool(harness, "plan_mode");
|
|
130
|
-
const ctx = createContext();
|
|
131
|
-
await tool.execute("tc-enable", { action: "enable" }, undefined, () => {}, ctx);
|
|
132
|
-
|
|
133
|
-
const [blockedToolResult] = await harness.fireEvent(
|
|
134
|
-
"tool_call",
|
|
135
|
-
{ toolName: "subagent", input: { task: "x" } },
|
|
136
|
-
ctx
|
|
137
|
-
);
|
|
138
|
-
expect(blockedToolResult).toMatchObject({ block: true });
|
|
139
|
-
expect((blockedToolResult as { reason: string }).reason).toContain('tool "subagent" blocked');
|
|
140
|
-
|
|
141
|
-
const [safeBashResult] = await harness.fireEvent(
|
|
142
|
-
"tool_call",
|
|
143
|
-
{ toolName: "bash", input: { command: "ls -la" } },
|
|
144
|
-
ctx
|
|
145
|
-
);
|
|
146
|
-
expect(safeBashResult).toBeUndefined();
|
|
147
|
-
|
|
148
|
-
const [unsafeBashResult] = await harness.fireEvent(
|
|
149
|
-
"tool_call",
|
|
150
|
-
{ toolName: "bash", input: { command: "rm -rf /tmp/nope" } },
|
|
151
|
-
ctx
|
|
152
|
-
);
|
|
153
|
-
expect(unsafeBashResult).toMatchObject({ block: true });
|
|
154
|
-
});
|
|
155
|
-
|
|
156
|
-
test("resumed plan mode re-applies strict policy", async () => {
|
|
157
|
-
const persistedEntries = [
|
|
158
|
-
{
|
|
159
|
-
type: "custom",
|
|
160
|
-
customType: "plan-mode",
|
|
161
|
-
data: {
|
|
162
|
-
enabled: true,
|
|
163
|
-
normalTools: [...BASELINE_TOOLS],
|
|
164
|
-
todos: [],
|
|
165
|
-
},
|
|
166
|
-
},
|
|
167
|
-
];
|
|
168
|
-
const ctx = createContext(persistedEntries);
|
|
169
|
-
|
|
170
|
-
await harness.fireEvent("session_start", { type: "session_start" }, ctx);
|
|
171
|
-
|
|
172
|
-
expect(harness.api.getActiveTools()).toEqual(
|
|
173
|
-
PLAN_MODE_ALLOWED_TOOLS.filter((name) => BASELINE_TOOLS.includes(name))
|
|
174
|
-
);
|
|
175
|
-
|
|
176
|
-
const [blockedResult] = await harness.fireEvent(
|
|
177
|
-
"tool_call",
|
|
178
|
-
{ toolName: "bg_bash", input: { command: "echo hi" } },
|
|
179
|
-
ctx
|
|
180
|
-
);
|
|
181
|
-
expect(blockedResult).toMatchObject({ block: true });
|
|
182
|
-
});
|
|
183
|
-
|
|
184
|
-
test("auto-enable only triggers for interactive UI input", async () => {
|
|
185
|
-
const [result] = await harness.fireEvent(
|
|
186
|
-
"input",
|
|
187
|
-
{ source: "interactive", text: "plan only fix auth" },
|
|
188
|
-
createContext([], true)
|
|
189
|
-
);
|
|
190
|
-
|
|
191
|
-
expect(result).toEqual({ action: "transform", text: "fix auth" });
|
|
192
|
-
expect(harness.api.getActiveTools()).toEqual(
|
|
193
|
-
PLAN_MODE_ALLOWED_TOOLS.filter((name) => BASELINE_TOOLS.includes(name))
|
|
194
|
-
);
|
|
195
|
-
});
|
|
196
|
-
|
|
197
|
-
test("auto-enable ignores headless or non-interactive input", async () => {
|
|
198
|
-
const [headlessResult] = await harness.fireEvent(
|
|
199
|
-
"input",
|
|
200
|
-
{ source: "interactive", text: "plan only fix auth" },
|
|
201
|
-
createContext([], false)
|
|
202
|
-
);
|
|
203
|
-
const [rpcResult] = await harness.fireEvent(
|
|
204
|
-
"input",
|
|
205
|
-
{ source: "rpc", text: "plan only fix auth" },
|
|
206
|
-
createContext([], true)
|
|
207
|
-
);
|
|
208
|
-
|
|
209
|
-
expect(headlessResult).toEqual({ action: "continue" });
|
|
210
|
-
expect(rpcResult).toEqual({ action: "continue" });
|
|
211
|
-
expect(harness.api.getActiveTools()).toEqual([...BASELINE_TOOLS]);
|
|
212
|
-
});
|
|
213
|
-
});
|