@dungle-scrubs/tallow 0.8.27 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/README.md +42 -1
  2. package/dist/cli.js +7 -1
  3. package/dist/cli.js.map +1 -1
  4. package/dist/config.d.ts +1 -1
  5. package/dist/config.d.ts.map +1 -1
  6. package/dist/config.js +1 -1
  7. package/dist/config.js.map +1 -1
  8. package/dist/install.d.ts.map +1 -1
  9. package/dist/install.js +2 -9
  10. package/dist/install.js.map +1 -1
  11. package/dist/interactive-mode-patch.d.ts.map +1 -1
  12. package/dist/interactive-mode-patch.js +20 -9
  13. package/dist/interactive-mode-patch.js.map +1 -1
  14. package/dist/model-metadata-overrides.d.ts +2 -5
  15. package/dist/model-metadata-overrides.d.ts.map +1 -1
  16. package/dist/model-metadata-overrides.js +23 -12
  17. package/dist/model-metadata-overrides.js.map +1 -1
  18. package/dist/sdk.d.ts.map +1 -1
  19. package/dist/sdk.js +20 -9
  20. package/dist/sdk.js.map +1 -1
  21. package/dist/workspace-transition-interactive.d.ts.map +1 -1
  22. package/dist/workspace-transition-interactive.js +53 -3
  23. package/dist/workspace-transition-interactive.js.map +1 -1
  24. package/dist/workspace-transition.d.ts +2 -1
  25. package/dist/workspace-transition.d.ts.map +1 -1
  26. package/dist/workspace-transition.js +16 -4
  27. package/dist/workspace-transition.js.map +1 -1
  28. package/extensions/__integration__/cd-tool-guidelines.test.ts +46 -0
  29. package/extensions/__integration__/welcome-screen.test.ts +240 -0
  30. package/extensions/_icons/__tests__/icons.test.ts +0 -1
  31. package/extensions/_icons/index.ts +0 -2
  32. package/extensions/_shared/pid-registry.ts +5 -5
  33. package/extensions/background-task-tool/index.ts +1 -1
  34. package/extensions/cd-tool/index.ts +4 -1
  35. package/extensions/context-fork/__tests__/context-fork.test.ts +9 -0
  36. package/extensions/edit-tool-enhanced/index.ts +3 -1
  37. package/extensions/health/__tests__/diagnostics.test.ts +25 -0
  38. package/extensions/health/index.ts +62 -1
  39. package/extensions/loop/__tests__/loop.test.ts +365 -1
  40. package/extensions/loop/index.ts +213 -3
  41. package/extensions/prompt-suggestions/__tests__/autocomplete.test.ts +111 -3
  42. package/extensions/prompt-suggestions/autocomplete.ts +23 -5
  43. package/extensions/prompt-suggestions/index.ts +62 -3
  44. package/extensions/read-tool-enhanced/index.ts +5 -1
  45. package/extensions/render-stabilizer/__tests__/render-stabilizer.test.ts +42 -0
  46. package/extensions/render-stabilizer/extension.json +5 -0
  47. package/extensions/render-stabilizer/index.ts +66 -0
  48. package/extensions/session-memory/index.ts +1 -1
  49. package/extensions/session-namer/index.ts +1 -1
  50. package/extensions/subagent-tool/__tests__/auto-cheap-model.test.ts +66 -6
  51. package/extensions/subagent-tool/__tests__/model-router-explicit-resolution.test.ts +79 -5
  52. package/extensions/subagent-tool/__tests__/presentation-rendering.test.ts +4 -4
  53. package/extensions/subagent-tool/index.ts +4 -2
  54. package/extensions/subagent-tool/process.ts +26 -8
  55. package/extensions/teams-tool/sessions/spawn.ts +2 -2
  56. package/extensions/welcome-screen/__tests__/welcome-screen.test.ts +35 -0
  57. package/extensions/welcome-screen/extension.json +20 -0
  58. package/extensions/welcome-screen/index.ts +189 -0
  59. package/node_modules/@mariozechner/pi-tui/dist/index.d.ts +2 -2
  60. package/node_modules/@mariozechner/pi-tui/dist/index.d.ts.map +1 -1
  61. package/node_modules/@mariozechner/pi-tui/dist/index.js +2 -2
  62. package/node_modules/@mariozechner/pi-tui/dist/index.js.map +1 -1
  63. package/node_modules/@mariozechner/pi-tui/dist/keybindings.d.ts +309 -25
  64. package/node_modules/@mariozechner/pi-tui/dist/keybindings.d.ts.map +1 -1
  65. package/node_modules/@mariozechner/pi-tui/dist/keybindings.js +392 -72
  66. package/node_modules/@mariozechner/pi-tui/dist/keybindings.js.map +1 -1
  67. package/node_modules/@mariozechner/pi-tui/dist/keys.d.ts +30 -0
  68. package/node_modules/@mariozechner/pi-tui/dist/keys.d.ts.map +1 -1
  69. package/node_modules/@mariozechner/pi-tui/dist/keys.js +50 -6
  70. package/node_modules/@mariozechner/pi-tui/dist/keys.js.map +1 -1
  71. package/node_modules/@mariozechner/pi-tui/dist/terminal.d.ts +27 -0
  72. package/node_modules/@mariozechner/pi-tui/dist/terminal.d.ts.map +1 -1
  73. package/node_modules/@mariozechner/pi-tui/dist/terminal.js +59 -4
  74. package/node_modules/@mariozechner/pi-tui/dist/terminal.js.map +1 -1
  75. package/node_modules/@mariozechner/pi-tui/dist/tui.d.ts +56 -0
  76. package/node_modules/@mariozechner/pi-tui/dist/tui.d.ts.map +1 -1
  77. package/node_modules/@mariozechner/pi-tui/dist/tui.js +188 -5
  78. package/node_modules/@mariozechner/pi-tui/dist/tui.js.map +1 -1
  79. package/node_modules/@mariozechner/pi-tui/package.json +1 -1
  80. package/node_modules/@mariozechner/pi-tui/src/__tests__/mouse-events.test.ts +134 -0
  81. package/node_modules/@mariozechner/pi-tui/src/__tests__/tmux-compat.test.ts +204 -0
  82. package/node_modules/@mariozechner/pi-tui/src/__tests__/tui-diff-regression.test.ts +49 -0
  83. package/node_modules/@mariozechner/pi-tui/src/__tests__/tui-render-scheduling.test.ts +2 -0
  84. package/node_modules/@mariozechner/pi-tui/src/index.ts +11 -0
  85. package/node_modules/@mariozechner/pi-tui/src/keybindings.ts +478 -140
  86. package/node_modules/@mariozechner/pi-tui/src/keys.ts +84 -6
  87. package/node_modules/@mariozechner/pi-tui/src/terminal.ts +69 -4
  88. package/node_modules/@mariozechner/pi-tui/src/tui.ts +205 -5
  89. package/package.json +9 -9
  90. package/runtime/config.ts +7 -0
  91. package/runtime/model-metadata-overrides.ts +7 -0
  92. package/schemas/settings.schema.json +0 -5
  93. package/skills/tallow-expert/SKILL.md +6 -4
  94. package/extensions/plan-mode-tool/__tests__/e2e.mjs +0 -350
  95. package/extensions/plan-mode-tool/__tests__/index.test.ts +0 -213
  96. package/extensions/plan-mode-tool/__tests__/utils.test.ts +0 -381
  97. package/extensions/plan-mode-tool/extension.json +0 -22
  98. package/extensions/plan-mode-tool/index.ts +0 -583
  99. package/extensions/plan-mode-tool/utils.ts +0 -257
@@ -34,7 +34,7 @@ Relay that answer to the user.
34
34
  | Component | Location |
35
35
  |-----------|----------|
36
36
  | Core source | `src/` (agent-runner.ts, atomic-write.ts, auth-hardening.ts, cli-auto-rebuild.ts, cli.ts, compaction-cancel-patch.ts, config.ts, extensions-global.d.ts, fatal-errors.ts, index.ts, install.ts, interactive-mode-patch.ts, model-metadata-overrides.ts, otel.ts, pid-manager.ts, pid-schema.ts, plugins.ts, process-cleanup.ts, project-trust-banner.ts, project-trust-interop.ts, project-trust.ts, runtime-path-provider.ts, runtime-provenance.ts, sdk.ts, session-migration.ts, session-utils.ts, startup-profile.ts, startup-timing.ts, streaming-yield-patch.ts, workspace-transition-interactive.ts, workspace-transition-relay.ts, workspace-transition.ts, yield-to-io.ts) |
37
- | Extensions | `extensions/` — extension.json + index.ts each (52 bundled) |
37
+ | Extensions | `extensions/` — extension.json + index.ts each (53 bundled) |
38
38
  | Skills | `skills/` — subdirs with SKILL.md |
39
39
  | Agents | `agents/` — markdown with YAML frontmatter |
40
40
  | Themes | `themes/` — JSON files (34 dark-only themes) |
@@ -59,8 +59,8 @@ Extensions export a default function receiving `ExtensionAPI` (conventionally na
59
59
 
60
60
  #### Registration
61
61
 
62
- - `registerTool(tool: ToolDefinition<TParams, TDetails>)` — Register a tool that the LLM can call.
63
- - `registerCommand(name: string, options: Omit<RegisteredCommand, "name">)` — Register a custom command.
62
+ - `registerTool(tool: ToolDefinition<TParams, TDetails, TState>)` — Register a tool that the LLM can call.
63
+ - `registerCommand(name: string, options: Omit<RegisteredCommand, "name" | "sourceInfo">)` — Register a custom command.
64
64
  - `registerFlag(name: string, options: object)` — Register a CLI flag.
65
65
  - `registerMessageRenderer(customType: string, renderer: MessageRenderer<T>)` — Register a custom renderer for CustomMessageEntry.
66
66
  - `registerProvider(name: string, config: ProviderConfig)` — Register or override a model provider.
@@ -81,7 +81,7 @@ Extensions export a default function receiving `ExtensionAPI` (conventionally na
81
81
  - `getFlag(name: string)` — Get the value of a registered CLI flag.
82
82
  - `exec(command: string, args: string[], options?: ExecOptions)` — Execute a shell command.
83
83
  - `getActiveTools()` — Get the list of currently active tool names.
84
- - `getAllTools()` — Get all configured tools with name and description.
84
+ - `getAllTools()` — Get all configured tools with parameter schema and source metadata.
85
85
  - `setActiveTools(toolNames: string[])` — Set the active tools by name.
86
86
  - `getCommands()` — Get available slash commands in the current session.
87
87
  - `setModel(model: Model<any>)` — Set the current model.
@@ -154,6 +154,7 @@ Extensions export a default function receiving `ExtensionAPI` (conventionally na
154
154
  - `modelRegistry` — Model registry for API key resolution
155
155
  - `model` — Current model (may be undefined)
156
156
  - `isIdle()` — Whether the agent is idle (not streaming)
157
+ - `signal` — The current abort signal, or undefined when the agent is not streaming.
157
158
  - `abort()` — Abort the current agent operation
158
159
  - `hasPendingMessages()` — Whether there are queued messages waiting
159
160
  - `shutdown()` — Gracefully shutdown pi and exit.
@@ -178,6 +179,7 @@ Extensions export a default function receiving `ExtensionAPI` (conventionally na
178
179
  - `onTerminalInput(handler: TerminalInputHandler)` — Listen to raw terminal input (interactive mode only).
179
180
  - `setStatus(key: string, text: string)` — Set status text in the footer/status bar.
180
181
  - `setWorkingMessage(message?: string)` — Set the working/loading message shown during streaming.
182
+ - `setHiddenThinkingLabel(label?: string)` — Set the label shown for hidden thinking blocks.
181
183
  - `setWidget(key: string, content: string[], options?: ExtensionWidgetOptions)` — Set a widget to display above or below the editor.
182
184
  - `setTitle(title: string)` — Set the terminal window/tab title.
183
185
  - `pasteToEditor(text: string)` — Paste text into the editor, triggering paste handling (collapse for large content).
@@ -1,350 +0,0 @@
1
- #!/usr/bin/env node
2
-
3
- /**
4
- * E2E test for the plan-mode extension.
5
- *
6
- * Proves:
7
- * 1. plan_mode tool remains available after toggling modes
8
- * 2. Plan mode enforces a strict read-only allowlist
9
- * 3. Non-allowlisted extension tools are blocked in plan mode
10
- * 4. Disabling plan mode restores normal access
11
- *
12
- * Uses the SDK to load ONLY the plan-mode extension (isolated).
13
- * Costs ~$0.01 per run.
14
- *
15
- * Usage:
16
- * node extensions/plan-mode-tool/__tests__/e2e.mjs
17
- */
18
-
19
- import fs from "node:fs";
20
- import os from "node:os";
21
- import path from "node:path";
22
- import { fileURLToPath } from "node:url";
23
- import { getModel } from "@mariozechner/pi-ai";
24
- import {
25
- AuthStorage,
26
- createAgentSession,
27
- DefaultResourceLoader,
28
- ModelRegistry,
29
- SessionManager,
30
- SettingsManager,
31
- } from "@mariozechner/pi-coding-agent";
32
- import { Type } from "@sinclair/typebox";
33
-
34
- // ── Helpers ──────────────────────────────────────────────────
35
-
36
- const results = [];
37
-
38
- /**
39
- * Record a test result.
40
- * @param {string} name - Test name
41
- * @param {boolean} passed - Pass/fail
42
- * @param {string} [detail] - Extra detail on failure
43
- */
44
- function check(name, passed, detail) {
45
- results.push({ name, passed, detail });
46
- const icon = passed ? "\x1b[32m✓\x1b[0m" : "\x1b[31m✗\x1b[0m";
47
- let line = ` ${icon} ${name}`;
48
- if (!passed && detail) line += `\n ${detail.slice(0, 300)}`;
49
- console.log(line);
50
- }
51
-
52
- /**
53
- * Get the text content of the most recent tool result for a given tool name.
54
- * @param {import("@mariozechner/pi-coding-agent").AgentSession} session
55
- * @param {string} toolName
56
- * @returns {string}
57
- */
58
- function lastToolResultText(session, toolName) {
59
- const msgs = session.messages;
60
- for (let i = msgs.length - 1; i >= 0; i--) {
61
- const m = msgs[i];
62
- if (m.role === "toolResult" && m.toolName === toolName) {
63
- for (const part of m.content) {
64
- if (part.type === "text") return part.text;
65
- }
66
- }
67
- }
68
- return "";
69
- }
70
-
71
- /**
72
- * Check if any tool result in the session contains "not found" error.
73
- * @param {import("@mariozechner/pi-coding-agent").AgentSession} session
74
- * @param {string} toolName
75
- * @returns {boolean}
76
- */
77
- function hasToolNotFoundError(session, toolName) {
78
- const msgs = session.messages;
79
- for (let i = msgs.length - 1; i >= 0; i--) {
80
- const m = msgs[i];
81
- if (m.role === "toolResult") {
82
- for (const part of m.content) {
83
- if (part.type === "text" && part.text.includes(`Tool ${toolName} not found`)) {
84
- return true;
85
- }
86
- }
87
- }
88
- }
89
- return false;
90
- }
91
-
92
- /**
93
- * Check if a tool call was blocked by plan-mode policy.
94
- * @param {import("@mariozechner/pi-coding-agent").AgentSession} session
95
- * @param {string} toolName
96
- * @returns {boolean}
97
- */
98
- function hasPlanModeToolBlockedError(session, toolName) {
99
- const msgs = session.messages;
100
- for (let i = msgs.length - 1; i >= 0; i--) {
101
- const m = msgs[i];
102
- if (m.role !== "toolResult") continue;
103
- for (const part of m.content) {
104
- if (part.type === "text" && part.text.includes(`Plan mode: tool "${toolName}" blocked`)) {
105
- return true;
106
- }
107
- }
108
- }
109
- return false;
110
- }
111
-
112
- // ── Isolated extension loading ───────────────────────────────
113
-
114
- const __dirname = path.dirname(fileURLToPath(import.meta.url));
115
- const extensionSrcDir = path.resolve(__dirname, "..");
116
-
117
- const testAgentDir = fs.mkdtempSync(path.join(os.tmpdir(), "pi-e2e-plan-"));
118
- const extDst = path.join(testAgentDir, "extensions/plan-mode-tool");
119
- fs.mkdirSync(extDst, { recursive: true });
120
- for (const file of ["index.ts", "utils.ts"]) {
121
- fs.copyFileSync(path.join(extensionSrcDir, file), path.join(extDst, file));
122
- }
123
-
124
- // ── Setup ────────────────────────────────────────────────────
125
-
126
- console.log("\n\x1b[1m══ Plan Mode Extension E2E Test ══\x1b[0m\n");
127
-
128
- const authStorage = new AuthStorage();
129
- const modelRegistry = new ModelRegistry(authStorage);
130
- const model = getModel("anthropic", "claude-haiku-4-5");
131
- if (!model) {
132
- console.error("✗ Model claude-haiku-4-5 not found");
133
- process.exit(1);
134
- }
135
-
136
- const settingsManager = SettingsManager.inMemory({ compaction: { enabled: false } });
137
-
138
- /**
139
- * Register mock tools used to validate strict plan-mode allowlisting.
140
- * @param {import("@mariozechner/pi-coding-agent").ExtensionAPI} pi
141
- */
142
- function registerMockTools(pi) {
143
- pi.registerTool({
144
- name: "bg_bash",
145
- label: "bg_bash",
146
- description: "Mock background bash tool",
147
- parameters: Type.Object({ command: Type.String() }),
148
- async execute(_toolCallId, params) {
149
- return {
150
- content: [{ type: "text", text: `mock-bg-bash-ok:${params.command}` }],
151
- details: {},
152
- };
153
- },
154
- });
155
-
156
- pi.registerTool({
157
- name: "subagent",
158
- label: "subagent",
159
- description: "Mock subagent tool",
160
- parameters: Type.Object({ task: Type.String() }),
161
- async execute(_toolCallId, params) {
162
- return {
163
- content: [{ type: "text", text: `mock-subagent-ok:${params.task}` }],
164
- details: {},
165
- };
166
- },
167
- });
168
-
169
- pi.registerTool({
170
- name: "mcp__mock__ping",
171
- label: "mcp__mock__ping",
172
- description: "Mock MCP-style tool",
173
- parameters: Type.Object({}),
174
- async execute() {
175
- return {
176
- content: [{ type: "text", text: "mock-mcp-ok" }],
177
- details: {},
178
- };
179
- },
180
- });
181
-
182
- pi.registerTool({
183
- name: "questionnaire",
184
- label: "questionnaire",
185
- description: "Mock read-only questionnaire tool",
186
- parameters: Type.Object({}),
187
- async execute() {
188
- return {
189
- content: [{ type: "text", text: "mock-questionnaire-ok" }],
190
- details: {},
191
- };
192
- },
193
- });
194
- }
195
-
196
- console.log("Loading extension (isolated)...");
197
- const loader = new DefaultResourceLoader({
198
- cwd: os.tmpdir(),
199
- agentDir: testAgentDir,
200
- settingsManager,
201
- extensionFactories: [registerMockTools],
202
- skillsOverride: () => ({ skills: [], diagnostics: [] }),
203
- promptsOverride: () => ({ prompts: [], diagnostics: [] }),
204
- agentsFilesOverride: () => ({ agentsFiles: [] }),
205
- });
206
- await loader.reload();
207
-
208
- const exts = loader.getExtensions();
209
- console.log(` Extensions loaded: ${exts.extensions.length}, errors: ${exts.errors.length}`);
210
- if (exts.errors.length > 0) {
211
- console.error(" Extension errors:", exts.errors);
212
- }
213
-
214
- console.log("Creating session (haiku)...\n");
215
- const { session } = await createAgentSession({
216
- model,
217
- thinkingLevel: "off",
218
- authStorage,
219
- modelRegistry,
220
- resourceLoader: loader,
221
- sessionManager: SessionManager.inMemory(),
222
- settingsManager,
223
- });
224
-
225
- // Log tool calls
226
- session.subscribe((event) => {
227
- if (event.type === "tool_execution_start") {
228
- process.stdout.write(` \x1b[2m→ ${event.toolName}\x1b[0m\n`);
229
- }
230
- });
231
-
232
- // ── Test 1: plan_mode tool exists at startup ─────────────────
233
-
234
- console.log("\x1b[1mTest 1: plan_mode tool available at startup\x1b[0m");
235
- await session.prompt(
236
- 'Call the plan_mode tool with action "status". Only call this one tool, nothing else.'
237
- );
238
- const statusText = lastToolResultText(session, "plan_mode");
239
- const noStartupError = !hasToolNotFoundError(session, "plan_mode");
240
- check("plan_mode tool callable at startup", noStartupError, statusText);
241
- check("reports normal mode", statusText.includes("normal"), statusText);
242
-
243
- // ── Test 2: Enable plan mode, verify plan_mode survives ──────
244
-
245
- console.log("\n\x1b[1mTest 2: Enable plan mode → plan_mode tool still available\x1b[0m");
246
- await session.prompt(
247
- 'Call the plan_mode tool with action "enable". Only call this one tool, nothing else.'
248
- );
249
- const enableText = lastToolResultText(session, "plan_mode");
250
- const noEnableError = !hasToolNotFoundError(session, "plan_mode");
251
- check("plan_mode callable during enable", noEnableError, enableText);
252
- check("reports plan mode enabled", enableText.includes("enabled"), enableText);
253
-
254
- // Now check status — plan_mode should still work IN plan mode
255
- await session.prompt(
256
- 'Call the plan_mode tool with action "status". Only call this one tool, nothing else.'
257
- );
258
- const planStatusText = lastToolResultText(session, "plan_mode");
259
- const noPlanStatusError = !hasToolNotFoundError(session, "plan_mode");
260
- check("plan_mode callable while in plan mode", noPlanStatusError, planStatusText);
261
- check("reports planning mode", planStatusText.includes("planning"), planStatusText);
262
-
263
- // ── Test 3: Disable plan mode, verify plan_mode survives ─────
264
-
265
- console.log("\n\x1b[1mTest 3: Disable plan mode → plan_mode tool still available\x1b[0m");
266
- await session.prompt(
267
- 'Call the plan_mode tool with action "disable". Only call this one tool, nothing else.'
268
- );
269
- const disableText = lastToolResultText(session, "plan_mode");
270
- const noDisableError = !hasToolNotFoundError(session, "plan_mode");
271
- check("plan_mode callable during disable", noDisableError, disableText);
272
- check("reports disabled", disableText.includes("disabled"), disableText);
273
-
274
- // Final status check — should be back to normal
275
- await session.prompt(
276
- 'Call the plan_mode tool with action "status". Only call this one tool, nothing else.'
277
- );
278
- const finalStatusText = lastToolResultText(session, "plan_mode");
279
- const noFinalError = !hasToolNotFoundError(session, "plan_mode");
280
- check("plan_mode callable after round-trip", noFinalError, finalStatusText);
281
- check("back to normal mode", finalStatusText.includes("normal"), finalStatusText);
282
-
283
- // ── Test 4: Strict allowlist enforcement in plan mode ────────
284
-
285
- console.log("\n\x1b[1mTest 4: Strict allowlist blocks non-read-only tools\x1b[0m");
286
- await session.prompt(
287
- 'Call the plan_mode tool with action "enable". Only call this one tool, nothing else.'
288
- );
289
-
290
- await session.prompt(
291
- 'Call the edit tool to edit file "/tmp/test.txt" replacing "a" with "b". Only call edit, nothing else.'
292
- );
293
- const editBlocked =
294
- hasToolNotFoundError(session, "edit") || hasPlanModeToolBlockedError(session, "edit");
295
- check("edit tool blocked in plan mode", editBlocked, "edit should not be available in plan mode");
296
-
297
- await session.prompt(
298
- 'Call the bg_bash tool with command "echo blocked". Only call bg_bash, nothing else.'
299
- );
300
- const bgBashBlocked =
301
- hasToolNotFoundError(session, "bg_bash") || hasPlanModeToolBlockedError(session, "bg_bash");
302
- check("bg_bash blocked in plan mode", bgBashBlocked, "bg_bash should be blocked in plan mode");
303
-
304
- await session.prompt('Call the subagent tool with task "ping". Only call subagent, nothing else.');
305
- const subagentBlocked =
306
- hasToolNotFoundError(session, "subagent") || hasPlanModeToolBlockedError(session, "subagent");
307
- check("subagent blocked in plan mode", subagentBlocked, "subagent should be blocked in plan mode");
308
-
309
- await session.prompt("Call the mcp__mock__ping tool. Only call this one tool, nothing else.");
310
- const mcpBlocked =
311
- hasToolNotFoundError(session, "mcp__mock__ping") ||
312
- hasPlanModeToolBlockedError(session, "mcp__mock__ping");
313
- check("mcp__* tools blocked in plan mode", mcpBlocked, "MCP tools should be blocked in plan mode");
314
-
315
- await session.prompt("Call the questionnaire tool. Only call this one tool, nothing else.");
316
- const questionnaireText = lastToolResultText(session, "questionnaire");
317
- const questionnaireAllowed = questionnaireText.includes("mock-questionnaire-ok");
318
- check("allowlisted questionnaire tool still works", questionnaireAllowed, questionnaireText);
319
-
320
- // ── Test 5: Disabling plan mode restores normal access ───────
321
-
322
- console.log("\n\x1b[1mTest 5: Disable restores normal tool access\x1b[0m");
323
- await session.prompt(
324
- 'Call the plan_mode tool with action "disable". Only call this one tool, nothing else.'
325
- );
326
- await session.prompt(
327
- 'Call the subagent tool with task "after-disable". Only call subagent, nothing else.'
328
- );
329
- const subagentAfterDisableText = lastToolResultText(session, "subagent");
330
- const subagentRestored = subagentAfterDisableText.includes("mock-subagent-ok:after-disable");
331
- check("subagent restored after disabling plan mode", subagentRestored, subagentAfterDisableText);
332
-
333
- // ── Cleanup & Summary ────────────────────────────────────────
334
-
335
- session.dispose();
336
- fs.rmSync(testAgentDir, { recursive: true, force: true });
337
-
338
- const passed = results.filter((r) => r.passed).length;
339
- const total = results.length;
340
-
341
- console.log(`\n\x1b[1m══ Results: ${passed}/${total} passed ══\x1b[0m`);
342
- if (passed < total) {
343
- console.log("\n\x1b[31mFailed:\x1b[0m");
344
- for (const r of results.filter((r) => !r.passed)) {
345
- console.log(` ✗ ${r.name}`);
346
- if (r.detail) console.log(` ${r.detail.slice(0, 300)}`);
347
- }
348
- }
349
- console.log();
350
- process.exit(passed === total ? 0 : 1);
@@ -1,213 +0,0 @@
1
- import { beforeEach, describe, expect, test } from "bun:test";
2
- import type { ExtensionAPI, ExtensionContext, ToolDefinition } from "@mariozechner/pi-coding-agent";
3
- import { Type } from "@sinclair/typebox";
4
- import { ExtensionHarness } from "../../../test-utils/extension-harness.js";
5
- import planModeExtension from "../index.js";
6
- import { PLAN_MODE_ALLOWED_TOOLS } from "../utils.js";
7
-
8
- const BASELINE_TOOLS = [
9
- "read",
10
- "bash",
11
- "grep",
12
- "find",
13
- "ls",
14
- "edit",
15
- "write",
16
- "subagent",
17
- "bg_bash",
18
- "mcp__mock__ping",
19
- "questionnaire",
20
- "plan_mode",
21
- ] as const;
22
-
23
- /**
24
- * Register mock tools used to test plan-mode gating and restoration.
25
- *
26
- * @param pi - Extension API test double
27
- * @returns void
28
- */
29
- function registerMockTools(pi: ExtensionAPI): void {
30
- const names = [
31
- "read",
32
- "bash",
33
- "grep",
34
- "find",
35
- "ls",
36
- "edit",
37
- "write",
38
- "subagent",
39
- "bg_bash",
40
- "mcp__mock__ping",
41
- "questionnaire",
42
- ] as const;
43
-
44
- for (const name of names) {
45
- pi.registerTool({
46
- name,
47
- label: name,
48
- description: `Mock ${name}`,
49
- parameters: Type.Object({}),
50
- async execute() {
51
- return {
52
- content: [{ type: "text", text: `${name}-ok` }],
53
- details: {},
54
- };
55
- },
56
- });
57
- }
58
- }
59
-
60
- /**
61
- * Create an extension context with optional persisted session entries.
62
- *
63
- * @param entries - Session entries returned by sessionManager.getEntries
64
- * @returns Context object compatible with extension handlers
65
- */
66
- function createContext(entries: unknown[] = [], hasUI = true): ExtensionContext {
67
- return {
68
- cwd: process.cwd(),
69
- hasUI,
70
- ui: {
71
- notify() {},
72
- setStatus() {},
73
- setEditorComponent() {},
74
- setWidget() {},
75
- theme: {
76
- fg(_token: string, value: string) {
77
- return value;
78
- },
79
- strikethrough(value: string) {
80
- return value;
81
- },
82
- },
83
- } as never,
84
- sessionManager: {
85
- getEntries() {
86
- return entries;
87
- },
88
- } as never,
89
- } as unknown as ExtensionContext;
90
- }
91
-
92
- /**
93
- * Resolve a registered tool from the test harness.
94
- *
95
- * @param harness - Extension harness
96
- * @param name - Tool name
97
- * @returns Tool definition
98
- */
99
- function getTool(harness: ExtensionHarness, name: string): ToolDefinition {
100
- const tool = harness.tools.get(name);
101
- if (!tool) throw new Error(`Tool not registered: ${name}`);
102
- return tool;
103
- }
104
-
105
- describe("plan-mode strict readonly enforcement", () => {
106
- let harness: ExtensionHarness;
107
-
108
- beforeEach(async () => {
109
- harness = ExtensionHarness.create();
110
- await harness.loadExtension(registerMockTools);
111
- await harness.loadExtension(planModeExtension);
112
- harness.api.setActiveTools([...BASELINE_TOOLS]);
113
- });
114
-
115
- test("enable applies strict allowlist and disable restores previous tools", async () => {
116
- const tool = getTool(harness, "plan_mode");
117
- const ctx = createContext();
118
-
119
- await tool.execute("tc-enable", { action: "enable" }, undefined, () => {}, ctx);
120
- expect(harness.api.getActiveTools()).toEqual(
121
- PLAN_MODE_ALLOWED_TOOLS.filter((name) => BASELINE_TOOLS.includes(name))
122
- );
123
-
124
- await tool.execute("tc-disable", { action: "disable" }, undefined, () => {}, ctx);
125
- expect(harness.api.getActiveTools()).toEqual([...BASELINE_TOOLS]);
126
- });
127
-
128
- test("tool_call blocks non-allowlisted tools and unsafe bash", async () => {
129
- const tool = getTool(harness, "plan_mode");
130
- const ctx = createContext();
131
- await tool.execute("tc-enable", { action: "enable" }, undefined, () => {}, ctx);
132
-
133
- const [blockedToolResult] = await harness.fireEvent(
134
- "tool_call",
135
- { toolName: "subagent", input: { task: "x" } },
136
- ctx
137
- );
138
- expect(blockedToolResult).toMatchObject({ block: true });
139
- expect((blockedToolResult as { reason: string }).reason).toContain('tool "subagent" blocked');
140
-
141
- const [safeBashResult] = await harness.fireEvent(
142
- "tool_call",
143
- { toolName: "bash", input: { command: "ls -la" } },
144
- ctx
145
- );
146
- expect(safeBashResult).toBeUndefined();
147
-
148
- const [unsafeBashResult] = await harness.fireEvent(
149
- "tool_call",
150
- { toolName: "bash", input: { command: "rm -rf /tmp/nope" } },
151
- ctx
152
- );
153
- expect(unsafeBashResult).toMatchObject({ block: true });
154
- });
155
-
156
- test("resumed plan mode re-applies strict policy", async () => {
157
- const persistedEntries = [
158
- {
159
- type: "custom",
160
- customType: "plan-mode",
161
- data: {
162
- enabled: true,
163
- normalTools: [...BASELINE_TOOLS],
164
- todos: [],
165
- },
166
- },
167
- ];
168
- const ctx = createContext(persistedEntries);
169
-
170
- await harness.fireEvent("session_start", { type: "session_start" }, ctx);
171
-
172
- expect(harness.api.getActiveTools()).toEqual(
173
- PLAN_MODE_ALLOWED_TOOLS.filter((name) => BASELINE_TOOLS.includes(name))
174
- );
175
-
176
- const [blockedResult] = await harness.fireEvent(
177
- "tool_call",
178
- { toolName: "bg_bash", input: { command: "echo hi" } },
179
- ctx
180
- );
181
- expect(blockedResult).toMatchObject({ block: true });
182
- });
183
-
184
- test("auto-enable only triggers for interactive UI input", async () => {
185
- const [result] = await harness.fireEvent(
186
- "input",
187
- { source: "interactive", text: "plan only fix auth" },
188
- createContext([], true)
189
- );
190
-
191
- expect(result).toEqual({ action: "transform", text: "fix auth" });
192
- expect(harness.api.getActiveTools()).toEqual(
193
- PLAN_MODE_ALLOWED_TOOLS.filter((name) => BASELINE_TOOLS.includes(name))
194
- );
195
- });
196
-
197
- test("auto-enable ignores headless or non-interactive input", async () => {
198
- const [headlessResult] = await harness.fireEvent(
199
- "input",
200
- { source: "interactive", text: "plan only fix auth" },
201
- createContext([], false)
202
- );
203
- const [rpcResult] = await harness.fireEvent(
204
- "input",
205
- { source: "rpc", text: "plan only fix auth" },
206
- createContext([], true)
207
- );
208
-
209
- expect(headlessResult).toEqual({ action: "continue" });
210
- expect(rpcResult).toEqual({ action: "continue" });
211
- expect(harness.api.getActiveTools()).toEqual([...BASELINE_TOOLS]);
212
- });
213
- });