@pugi/cli 0.1.0-beta.2 → 0.1.0-beta.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. package/THIRD_PARTY_NOTICES.md +40 -0
  2. package/assets/pugi-mascot.ansi +15 -40
  3. package/bin/run.js +33 -1
  4. package/dist/commands/jobs-watch.js +201 -0
  5. package/dist/commands/jobs.js +15 -0
  6. package/dist/core/agent-progress/cleanup.js +134 -0
  7. package/dist/core/agent-progress/schema.js +144 -0
  8. package/dist/core/agent-progress/writer.js +101 -0
  9. package/dist/core/compact/auto-trigger.js +96 -0
  10. package/dist/core/compact/buffer-rewriter.js +115 -0
  11. package/dist/core/compact/summarizer.js +196 -0
  12. package/dist/core/compact/token-counter.js +108 -0
  13. package/dist/core/consensus/diff-capture.js +73 -0
  14. package/dist/core/context/index.js +7 -0
  15. package/dist/core/context/markdown-traverse.js +255 -0
  16. package/dist/core/cost/rate-card.js +129 -0
  17. package/dist/core/cost/tracker.js +221 -0
  18. package/dist/core/denial-tracking/index.js +8 -0
  19. package/dist/core/denial-tracking/state.js +264 -0
  20. package/dist/core/diagnostics/probe-runner.js +93 -0
  21. package/dist/core/diagnostics/probes/api.js +46 -0
  22. package/dist/core/diagnostics/probes/auth.js +86 -0
  23. package/dist/core/diagnostics/probes/cli-version.js +127 -0
  24. package/dist/core/diagnostics/probes/config.js +72 -0
  25. package/dist/core/diagnostics/probes/denial-tracking.js +57 -0
  26. package/dist/core/diagnostics/probes/disk.js +81 -0
  27. package/dist/core/diagnostics/probes/git.js +65 -0
  28. package/dist/core/diagnostics/probes/mcp.js +75 -0
  29. package/dist/core/diagnostics/probes/node.js +59 -0
  30. package/dist/core/diagnostics/probes/pnpm.js +36 -0
  31. package/dist/core/diagnostics/probes/session.js +74 -0
  32. package/dist/core/diagnostics/probes/status-snapshot.js +442 -0
  33. package/dist/core/diagnostics/probes/workspace.js +63 -0
  34. package/dist/core/diagnostics/types.js +70 -0
  35. package/dist/core/edits/dispatch.js +218 -2
  36. package/dist/core/edits/journal.js +199 -0
  37. package/dist/core/edits/layer-d-ast.js +557 -14
  38. package/dist/core/edits/verify-hook.js +273 -0
  39. package/dist/core/edits/worktree.js +111 -18
  40. package/dist/core/engine/anvil-client.js +115 -5
  41. package/dist/core/engine/budgets.js +89 -0
  42. package/dist/core/engine/context-prefix.js +155 -0
  43. package/dist/core/engine/intent.js +260 -0
  44. package/dist/core/engine/native-pugi.js +744 -210
  45. package/dist/core/engine/prompts.js +61 -6
  46. package/dist/core/engine/strip-internal-fields.js +124 -0
  47. package/dist/core/engine/tool-bridge.js +818 -31
  48. package/dist/core/file-cache.js +113 -1
  49. package/dist/core/init/scaffold.js +195 -0
  50. package/dist/core/lsp/client.js +174 -29
  51. package/dist/core/mcp/client.js +75 -6
  52. package/dist/core/mcp/http-server.js +553 -0
  53. package/dist/core/mcp/permission.js +190 -0
  54. package/dist/core/mcp/registry.js +24 -2
  55. package/dist/core/mcp/server-tools.js +219 -0
  56. package/dist/core/mcp/server.js +397 -0
  57. package/dist/core/permissions/gate.js +187 -0
  58. package/dist/core/permissions/index.js +18 -0
  59. package/dist/core/permissions/mode.js +102 -0
  60. package/dist/core/permissions/state.js +160 -0
  61. package/dist/core/permissions/tool-class.js +93 -0
  62. package/dist/core/repl/codebase-survey.js +308 -0
  63. package/dist/core/repl/history.js +11 -1
  64. package/dist/core/repl/init-interview.js +457 -0
  65. package/dist/core/repl/model-pricing.js +135 -0
  66. package/dist/core/repl/onboarding-state.js +297 -0
  67. package/dist/core/repl/session.js +719 -29
  68. package/dist/core/repl/slash-commands.js +133 -9
  69. package/dist/core/retry-budget/budget.js +284 -0
  70. package/dist/core/retry-budget/index.js +5 -0
  71. package/dist/core/settings.js +71 -0
  72. package/dist/core/skills/defaults.js +457 -0
  73. package/dist/core/subagents/dispatcher-real.js +600 -0
  74. package/dist/core/subagents/dispatcher.js +113 -24
  75. package/dist/core/subagents/index.js +18 -5
  76. package/dist/core/subagents/isolation-matrix.js +213 -0
  77. package/dist/core/subagents/spawn.js +19 -4
  78. package/dist/core/transport/version-interceptor.js +166 -0
  79. package/dist/index.js +28 -0
  80. package/dist/runtime/bootstrap.js +190 -0
  81. package/dist/runtime/cli.js +1588 -266
  82. package/dist/runtime/commands/compact.js +296 -0
  83. package/dist/runtime/commands/cost.js +199 -0
  84. package/dist/runtime/commands/delegate.js +289 -0
  85. package/dist/runtime/commands/doctor.js +369 -0
  86. package/dist/runtime/commands/lsp.js +187 -5
  87. package/dist/runtime/commands/mcp.js +824 -0
  88. package/dist/runtime/commands/patch.js +17 -0
  89. package/dist/runtime/commands/permissions.js +87 -0
  90. package/dist/runtime/commands/report.js +299 -0
  91. package/dist/runtime/commands/review-consensus.js +17 -2
  92. package/dist/runtime/commands/roster.js +117 -0
  93. package/dist/runtime/commands/status.js +178 -0
  94. package/dist/runtime/commands/worktree.js +50 -6
  95. package/dist/runtime/headless.js +543 -0
  96. package/dist/runtime/load-hooks-or-exit.js +71 -0
  97. package/dist/runtime/plan-decompose.js +531 -0
  98. package/dist/runtime/version.js +65 -0
  99. package/dist/tools/agent-tool.js +206 -0
  100. package/dist/tools/apply-patch.js +281 -39
  101. package/dist/tools/ask-user-question.js +213 -0
  102. package/dist/tools/ask-user.js +115 -0
  103. package/dist/tools/file-tools.js +85 -14
  104. package/dist/tools/mcp-tool.js +260 -0
  105. package/dist/tools/multi-edit.js +361 -0
  106. package/dist/tools/registry.js +22 -2
  107. package/dist/tools/skill-tool.js +96 -0
  108. package/dist/tools/tasks.js +208 -0
  109. package/dist/tools/web-fetch.js +147 -2
  110. package/dist/tools/web-search.js +458 -0
  111. package/dist/tui/agent-progress-card.js +111 -0
  112. package/dist/tui/agent-tree.js +10 -0
  113. package/dist/tui/ask-modal.js +2 -2
  114. package/dist/tui/ask-user-question-prompt.js +192 -0
  115. package/dist/tui/compact-banner.js +54 -0
  116. package/dist/tui/conversation-pane.js +69 -8
  117. package/dist/tui/cost-table.js +111 -0
  118. package/dist/tui/doctor-table.js +31 -0
  119. package/dist/tui/input-box.js +1 -1
  120. package/dist/tui/markdown-render.js +4 -4
  121. package/dist/tui/repl-render.js +276 -37
  122. package/dist/tui/repl-splash.js +2 -2
  123. package/dist/tui/repl.js +25 -6
  124. package/dist/tui/splash.js +1 -1
  125. package/dist/tui/status-bar.js +94 -16
  126. package/dist/tui/status-table.js +7 -0
  127. package/dist/tui/tool-stream-pane.js +7 -0
  128. package/dist/tui/update-banner.js +20 -2
  129. package/docs/examples/codegraph.mcp.json +10 -0
  130. package/package.json +9 -6
@@ -1,5 +1,18 @@
1
- import { editTool, globTool, grepTool, OperatorAbortedError, readTool, writeTool, } from '../../tools/file-tools.js';
1
+ import { editTool, globTool, grepTool, OperatorAbortedError, readTool, StaleReadError, writeTool, } from '../../tools/file-tools.js';
2
2
  import { bashToolSync } from '../../tools/bash.js';
3
+ import { askUser } from '../../tools/ask-user.js';
4
+ import { askUserQuestionJsonSchema, dispatchAskUserQuestion, } from '../../tools/ask-user-question.js';
5
+ import { skillInvoke, skillList } from '../../tools/skill-tool.js';
6
+ import { taskCreate, taskGet, taskList, taskUpdate, } from '../../tools/tasks.js';
7
+ import { webFetchTool } from '../../tools/web-fetch.js';
8
+ import { webSearchTool } from '../../tools/web-search.js';
9
+ import { agentTool } from '../../tools/agent-tool.js';
10
+ import { multiEdit } from '../../tools/multi-edit.js';
11
+ import { buildMcpToolDefs, defaultNonInteractiveMcpPrompt, dispatchMcpTool, MCP_TOOL_PREFIX, } from '../../tools/mcp-tool.js';
12
+ import { buildDenialContext, DENIAL_REMINDER_THRESHOLD, } from '../denial-tracking/state.js';
13
+ import { stripInternalFields } from './strip-internal-fields.js';
14
+ import { applyAskAnswer, gate as permissionGate, getToolClass, PermissionDenied, } from '../permissions/index.js';
15
+ import { RetryBudget, RetryBudgetExhausted, hashArgs } from '../retry-budget/index.js';
3
16
  /**
4
17
  * Tool-bridge: turns the abstract tool registry into:
5
18
  * 1. An OpenAI-shaped tools schema for `EngineLoopClient.send`.
@@ -23,17 +36,72 @@ import { bashToolSync } from '../../tools/bash.js';
23
36
  /**
24
37
  * Read-only subset surfaced to plan-mode. Mutating tools (write, edit,
25
38
  * bash) are intentionally absent so the model rarely tries them.
39
+ *
40
+ * β1: task_* + skill + ask_user_question + web_fetch are all read-only
41
+ * from the workspace's perspective (no file writes), so they stay
42
+ * available in plan mode. The ledger writes for `task_*` land in
43
+ * `.pugi/sessions/<id>/tasks.jsonl` which is metadata, not source.
26
44
  */
27
- const READ_ONLY_TOOLS = new Set(['read', 'grep', 'glob']);
45
+ const READ_ONLY_TOOLS = new Set([
46
+ 'read',
47
+ 'grep',
48
+ 'glob',
49
+ 'ask_user_question',
50
+ 'skill',
51
+ 'skills_list',
52
+ 'task_create',
53
+ 'task_get',
54
+ 'task_list',
55
+ 'task_update',
56
+ 'web_fetch',
57
+ // β1b T4 (2026-05-26): web_search is read-only from the workspace's
58
+ // perspective (no file writes, no shell). Egress goes through the
59
+ // Anvil-proxied Brave Search API, gated by the same opt-in posture as
60
+ // web_fetch. Plan mode keeps the tool available because reading the
61
+ // web is part of how a plan is researched.
62
+ 'web_search',
63
+ ]);
28
64
  /**
29
- * Tools we actually wire today. The registry has more entries
30
- * (task_*, skill, question) those route through the runtime layer, not
31
- * the local filesystem, so they ship in a follow-up PR. M1 cornerstone is
32
- * the six core tools.
65
+ * Tools the engine loop dispatches. β1 expands the M1 cornerstone six
66
+ * (read/write/edit/grep/glob/bash) with task_* + ask_user_question +
67
+ * skill + skill list + web_fetch. The registry advertises these slots
68
+ * to the runtime; without dispatcher entries the model would call
69
+ * "unknown tool" errors.
33
70
  */
34
- const WIRED_TOOLS = new Set(['read', 'write', 'edit', 'grep', 'glob', 'bash']);
35
- export function buildToolsSchema(kind) {
71
+ const WIRED_TOOLS = new Set([
72
+ 'read',
73
+ 'write',
74
+ 'edit',
75
+ 'grep',
76
+ 'glob',
77
+ 'bash',
78
+ 'ask_user_question',
79
+ 'skill',
80
+ 'skills_list',
81
+ 'task_create',
82
+ 'task_get',
83
+ 'task_list',
84
+ 'task_update',
85
+ 'web_fetch',
86
+ // β1b T4: see READ_ONLY_TOOLS above.
87
+ 'web_search',
88
+ // β2 S3 (2026-05-26): real subagent spawn primitive. Only advertised
89
+ // when buildToolsSchema is called with allowAgent=true (orchestrator
90
+ // / root Mira context); plan-mode also excludes it because spawning
91
+ // a write-capable child violates plan-mode's read-only contract.
92
+ 'agent',
93
+ // β7 L5+T11 (2026-05-26): transactional multi-file edit. Routes
94
+ // through the same security gate as Layer A/B/C; not advertised in
95
+ // plan mode (mutation surface).
96
+ 'multi_edit',
97
+ ]);
98
+ export function buildToolsSchema(kind, options = { allowFetch: false, allowSearch: false }) {
36
99
  const planMode = kind === 'plan';
100
+ // β4 M1/M3: splice MCP tools BEFORE the native list assembly so the
101
+ // engine-loop sees them in stable alphabetical order alongside native
102
+ // tools. We keep the entries appended after the native push so plan-
103
+ // mode can be filtered by namespace prefix in one place at the end.
104
+ const mcpDefs = buildMcpToolDefs(options.mcpRegistry);
37
105
  const toolDefs = [
38
106
  {
39
107
  name: 'read',
@@ -72,10 +140,199 @@ export function buildToolsSchema(kind) {
72
140
  },
73
141
  },
74
142
  ];
143
+ // β1 T1/T6: TodoWrite (Pugi grammar = `task_*`). Append-only ledger
144
+ // at `.pugi/sessions/<id>/tasks.jsonl`.
145
+ toolDefs.push({
146
+ name: 'task_create',
147
+ description: 'Append a new task to the session todo ledger. Returns the assigned task id and full record. Mirrors Claude Code TodoWrite/create.',
148
+ parameters: {
149
+ type: 'object',
150
+ additionalProperties: false,
151
+ required: ['title'],
152
+ properties: {
153
+ title: { type: 'string', description: 'Short imperative summary, max 2000 chars.' },
154
+ status: {
155
+ type: 'string',
156
+ enum: ['pending', 'in_progress', 'completed', 'cancelled'],
157
+ description: 'Initial status. Default pending.',
158
+ },
159
+ notes: { type: 'string', description: 'Optional free-form context.' },
160
+ },
161
+ },
162
+ }, {
163
+ name: 'task_get',
164
+ description: 'Fetch a single task record by id. Returns null when absent.',
165
+ parameters: {
166
+ type: 'object',
167
+ additionalProperties: false,
168
+ required: ['id'],
169
+ properties: { id: { type: 'string' } },
170
+ },
171
+ }, {
172
+ name: 'task_list',
173
+ description: 'List all tasks for the current session ordered by createdAt ascending.',
174
+ parameters: { type: 'object', additionalProperties: false, properties: {} },
175
+ }, {
176
+ name: 'task_update',
177
+ description: 'Mutate status/title/notes on an existing task. Throws on unknown id. Append-only journal.',
178
+ parameters: {
179
+ type: 'object',
180
+ additionalProperties: false,
181
+ required: ['id'],
182
+ properties: {
183
+ id: { type: 'string' },
184
+ title: { type: 'string' },
185
+ status: {
186
+ type: 'string',
187
+ enum: ['pending', 'in_progress', 'completed', 'cancelled'],
188
+ },
189
+ notes: { type: 'string' },
190
+ },
191
+ },
192
+ });
193
+ // β1 T2 → leak L5 (2026-05-27): structured AskUserQuestion bridge.
194
+ // Schema upgraded to openclaude's multi-choice form: header chip +
195
+ // {label, description} per option. Dispatcher accepts the structured
196
+ // form (preferred) AND the legacy string-array form so existing
197
+ // callers / tests keep working until the next major bump.
198
+ //
199
+ // Interactive TTY → returns the picked label(s).
200
+ // Non-TTY / no bridge → `[user_input_required]` envelope.
201
+ toolDefs.push({
202
+ name: 'ask_user_question',
203
+ description: 'Clarifying multi-choice question to the operator. Use INSTEAD of asking in prose when one parameter is missing. Required: question (?-ended), header (≤12 chars), 2-4 options each with {label, description}. NEVER include "Other" — UI auto-adds. Budget: max 1 per turn.',
204
+ parameters: askUserQuestionJsonSchema,
205
+ });
206
+ // β1 T3: Skill tool — discover + invoke locally-installed skills.
207
+ toolDefs.push({
208
+ name: 'skills_list',
209
+ description: 'List installed skills (global + workspace). Returns name+description+scope.',
210
+ parameters: {
211
+ type: 'object',
212
+ additionalProperties: false,
213
+ properties: {
214
+ scope: { type: 'string', enum: ['all', 'global', 'workspace'] },
215
+ },
216
+ },
217
+ }, {
218
+ name: 'skill',
219
+ description: 'Load a skill body by name. Workspace scope wins over global. Body capped at 32KB.',
220
+ parameters: {
221
+ type: 'object',
222
+ additionalProperties: false,
223
+ required: ['name'],
224
+ properties: { name: { type: 'string' } },
225
+ },
226
+ });
227
+ // β1 T5 → β1a r1 (gating fix, 2026-05-26): WebFetch wire-in. Schema
228
+ // mirrors the existing tool surface in
229
+ // `apps/pugi-cli/src/tools/web-fetch.ts`. SSRF guard runs inside the
230
+ // tool itself, but advertising the tool to the model when the tenant
231
+ // has not opted in is itself a privacy leak — the model could infer
232
+ // URL patterns and try to exfiltrate via the refused call's argument
233
+ // bytes. Only push the schema entry when the operator has explicitly
234
+ // enabled fetch (either via `.pugi/settings.json::web.fetch.enabled`
235
+ // or via `--allow-fetch`).
236
+ if (options.allowFetch) {
237
+ toolDefs.push({
238
+ name: 'web_fetch',
239
+ description: 'One-shot HTTP GET against an operator-supplied URL. Response is parsed to Markdown and wrapped in <untrusted-content> sentinel. Gated off by default.',
240
+ parameters: {
241
+ type: 'object',
242
+ additionalProperties: false,
243
+ required: ['url'],
244
+ properties: {
245
+ url: { type: 'string', description: 'Fully-qualified http(s) URL.' },
246
+ },
247
+ },
248
+ });
249
+ }
250
+ // β1b T4 (2026-05-26): web_search advertisement. Same off-by-default
251
+ // privacy posture as web_fetch — the query string itself is an egress
252
+ // event that can leak operator intent to the upstream Brave Search
253
+ // backend. The tool dispatcher applies SSRF guards (no localhost via
254
+ // the Anvil proxy URL), rate-limits (5 req/min per session), and caps
255
+ // the result payload at 1 MiB. Sentinel-wrapped results so the model
256
+ // treats every snippet as data, not instructions.
257
+ if (options.allowSearch) {
258
+ toolDefs.push({
259
+ name: 'web_search',
260
+ description: 'Search the web via Brave Search (Anvil-proxied). Returns up to 10 sentinel-wrapped {title, url, snippet} results. Rate-limited to 5 calls/min per session. Gated off by default.',
261
+ parameters: {
262
+ type: 'object',
263
+ additionalProperties: false,
264
+ required: ['query'],
265
+ properties: {
266
+ query: {
267
+ type: 'string',
268
+ description: 'Search query, max 256 chars. Plain text — no operators.',
269
+ },
270
+ count: {
271
+ type: 'integer',
272
+ description: 'Optional result count (1..10, default 10).',
273
+ },
274
+ },
275
+ },
276
+ });
277
+ }
278
+ // β2 S3 (2026-05-26): `agent` tool — subagent spawn primitive.
279
+ // Off by default; surfaced only when the caller explicitly opts in
280
+ // (orchestrator parents pass allowAgent=true via the engine adapter).
281
+ // Plan mode FORCES the tool off regardless because a write-capable
282
+ // child would violate plan-mode's read-only contract.
283
+ if (options.allowAgent && !planMode) {
284
+ toolDefs.push({
285
+ name: 'agent',
286
+ description: 'Spawn a specialist subagent under a Cyber-Zoo brand persona. '
287
+ + 'Role selects the persona + isolation tier: '
288
+ + 'researcher/reviewer/architect are read-only, verifier reads + runs tests, '
289
+ + 'coder/release/devops/design_qa get write + bash. '
290
+ + 'The child runs a fresh Anvil engine loop with its own transcript and '
291
+ + 'returns a JSON envelope (filesChanged, toolCallCount, status, summary). '
292
+ + 'Use this when the work needs a specialist persona OR write isolation via a scratch worktree.',
293
+ parameters: {
294
+ type: 'object',
295
+ additionalProperties: false,
296
+ required: ['role', 'brief'],
297
+ properties: {
298
+ role: {
299
+ type: 'string',
300
+ enum: [
301
+ 'orchestrator',
302
+ 'architect',
303
+ 'coder',
304
+ 'verifier',
305
+ 'reviewer',
306
+ 'researcher',
307
+ 'release',
308
+ 'devops',
309
+ 'design_qa',
310
+ ],
311
+ description: 'SubagentRole — selects persona + isolation tier.',
312
+ },
313
+ brief: {
314
+ type: 'string',
315
+ maxLength: 8000,
316
+ description: 'One-paragraph task description forwarded to the child as the user prompt. '
317
+ + 'Be concrete: include filenames, expected behavior, and acceptance criteria.',
318
+ },
319
+ isolation: {
320
+ type: 'string',
321
+ enum: ['worktree', 'shared_fs', 'auto'],
322
+ description: 'Optional override. `worktree` forces a scratch git worktree for write isolation; '
323
+ + '`shared_fs` forces same-tree execution; `auto` (default) defers to the role tier.',
324
+ },
325
+ },
326
+ },
327
+ });
328
+ }
75
329
  if (!planMode) {
76
330
  toolDefs.push({
77
331
  name: 'write',
78
- description: 'Create or overwrite a workspace file. Use for new files only — prefer edit for existing files. Workspace-scoped.',
332
+ description: 'Create or overwrite a workspace file. Prefer edit for existing files. ' +
333
+ 'For OVERWRITE of an existing file, you MUST read the file first in this session — ' +
334
+ 'write refuses with STALE_READ if the file changed since your last read, or if you ' +
335
+ 'never read it. New-file creation (path does not exist) skips that gate. Workspace-scoped.',
79
336
  parameters: {
80
337
  type: 'object',
81
338
  additionalProperties: false,
@@ -87,7 +344,10 @@ export function buildToolsSchema(kind) {
87
344
  },
88
345
  }, {
89
346
  name: 'edit',
90
- description: 'Replace exactly one occurrence of oldString with newString inside an already-read file. Fails if the file changed since you read it or if oldString is missing/duplicate.',
347
+ description: 'Replace exactly one occurrence of oldString with newString inside an already-read file. ' +
348
+ 'Refuses with STALE_READ if the file was never read this session or the on-disk contents ' +
349
+ 'drifted since the read (mtime+sha gate). Recovery: re-read with the `read` tool, then ' +
350
+ 'retry the edit. Also fails if oldString is missing or duplicate.',
91
351
  parameters: {
92
352
  type: 'object',
93
353
  additionalProperties: false,
@@ -109,9 +369,92 @@ export function buildToolsSchema(kind) {
109
369
  command: { type: 'string', description: 'Single shell command to execute.' },
110
370
  },
111
371
  },
372
+ },
373
+ // β7 L5+T11 (2026-05-26): transactional multi-file edit. Either
374
+ // all entries land or none do — failures roll the workspace back
375
+ // via the same journal + snapshot machinery the dispatcher uses.
376
+ // Cap is 50 entries; beyond that the operator (or model) should
377
+ // split the refactor or use Layer C rewrites.
378
+ {
379
+ name: 'multi_edit',
380
+ description: 'Apply an ordered batch of single-occurrence file edits as one transaction. ' +
381
+ 'Each entry is {file, oldString, newString} like the `edit` tool. Either every ' +
382
+ 'edit lands or none do — a failure rolls the workspace back to the pre-dispatch ' +
383
+ 'state via journal + snapshot. Cap 50 edits per call. Use this for coordinated ' +
384
+ 'refactors (rename across files, add an import to many modules).',
385
+ parameters: {
386
+ type: 'object',
387
+ additionalProperties: false,
388
+ required: ['edits'],
389
+ properties: {
390
+ edits: {
391
+ type: 'array',
392
+ minItems: 1,
393
+ maxItems: 50,
394
+ items: {
395
+ type: 'object',
396
+ additionalProperties: false,
397
+ required: ['file', 'oldString', 'newString'],
398
+ properties: {
399
+ file: { type: 'string', description: 'Workspace-relative file path.' },
400
+ oldString: { type: 'string', description: 'Verbatim substring; must be unique in the pre-edit file.' },
401
+ newString: { type: 'string', description: 'Replacement string. Empty string means delete.' },
402
+ },
403
+ },
404
+ },
405
+ },
406
+ },
112
407
  });
113
408
  }
114
- return toolDefs;
409
+ // β4 M1/M3: append MCP tools last. Plan mode skips them because every
410
+ // MCP tool is treated as medium-risk until per-tool annotations land
411
+ // in the MCP spec; treating MCP read-as-read would require server-
412
+ // side metadata we cannot trust today (a misconfigured server could
413
+ // claim `read` while running a destructive op).
414
+ if (!planMode) {
415
+ for (const def of mcpDefs) {
416
+ toolDefs.push({
417
+ name: def.name,
418
+ description: def.description,
419
+ parameters: def.parameters,
420
+ });
421
+ }
422
+ }
423
+ // α7 L3 (2026-05-27): leak-parity underscore-prefix filter. Every
424
+ // tool's parameter schema is scrubbed of `_`-prefixed fields before
425
+ // the model ever sees it. Native tool schemas above currently declare
426
+ // no `_*` fields, but MCP tools surfaced through buildMcpToolDefs
427
+ // come from third-party servers whose authors may follow the same
428
+ // convention (an MCP tool can declare `_sessionId` knowing the CLI
429
+ // dispatcher will inject it before forwarding). The dispatcher
430
+ // (buildExecutor below) does NOT strip these from the args record at
431
+ // call time — `_internal*` keys still flow through to tool handlers
432
+ // when an upstream layer populates them.
433
+ return toolDefs.map((tool) => ({
434
+ name: tool.name,
435
+ description: tool.description,
436
+ parameters: stripInternalFields(tool.parameters),
437
+ }));
438
+ }
439
+ /**
440
+ * α7 L11: tolerant args-parse for the denial fingerprint. Unlike
441
+ * `parseArgs` (which throws on malformed JSON so the model sees a
442
+ * parse error), this swallows failures and returns `{}` — the denial
443
+ * tracker needs SOME key even when the raw payload is unparseable,
444
+ * because malformed-args spam is itself a pattern operators want to
445
+ * see in `/permissions denials`.
446
+ */
447
+ function safeParseForTracking(raw) {
448
+ if (!raw || raw.trim() === '')
449
+ return {};
450
+ try {
451
+ return JSON.parse(raw);
452
+ }
453
+ catch {
454
+ // Use the raw string as the fingerprint payload so repeated
455
+ // identical malformed dispatches still cluster.
456
+ return { _rawArgs: raw.slice(0, 512) };
457
+ }
115
458
  }
116
459
  function parseArgs(raw) {
117
460
  if (!raw || raw.trim() === '')
@@ -127,25 +470,139 @@ function parseArgs(raw) {
127
470
  throw new Error(`invalid JSON in tool arguments: ${error.message}`);
128
471
  }
129
472
  }
473
+ /**
474
+ * Strict canonical-only argument coercion (leak P0 L2, 2026-05-27).
475
+ *
476
+ * Reverts the beta.17 alias acceptance (`file` / `filename` / `filepath`
477
+ * / `file_path` → `path`). The alias shim was the wrong direction: it
478
+ * paved over a model-side prompt-drift bug at the runtime layer, weakened
479
+ * the strict JSON-Schema contract one layer up (`additionalProperties:
480
+ * false`), and drifted away from the openclaude reference (research memo
481
+ * §1.1 — `z.strictObject` rejects aliased fields).
482
+ *
483
+ * The compensating change ships in the persona prompts: Mira's system
484
+ * prompt and Hiroshi's persona body now declare canonical parameter
485
+ * names with few-shot wrong/right contrasts so the model learns the
486
+ * grammar upstream of the bridge.
487
+ */
130
488
  function requireString(obj, key) {
131
489
  const v = obj[key];
132
- if (typeof v !== 'string') {
133
- throw new Error(`tool argument "${key}" must be a string`);
134
- }
135
- return v;
490
+ if (typeof v === 'string')
491
+ return v;
492
+ throw new Error(`tool argument "${key}" must be a string`);
136
493
  }
137
494
  export function buildExecutor(input) {
138
- const { kind, ctx, hooks, sessionId } = input;
495
+ const { kind, ctx, hooks, sessionId, askUserBridge, interactive, allowFetch, allowSearch, agentDispatch, mcpRegistry, permissionMode, permissionAlwaysCache, permissionAsk, } = input;
496
+ // Leak L31: per-cycle budget. Default to a fresh instance scoped to
497
+ // this executor's closure lifetime; tests pass their own.
498
+ const retryBudget = input.retryBudget ?? new RetryBudget();
499
+ const mcpPrompt = input.mcpPrompt ?? defaultNonInteractiveMcpPrompt;
500
+ const workspaceRoot = input.workspaceRoot ?? ctx.root;
139
501
  const planMode = kind === 'plan';
502
+ const denialTracking = input.denialTracking;
503
+ // α7 L11: helper that records a denial (when tracking is wired) and
504
+ // ALWAYS returns an Error whose message includes a compact
505
+ // `<denial-context>` reminder when the same (tool, args) pair has
506
+ // already been refused at least once before in this session.
507
+ //
508
+ // The reminder is appended to the THROWN message — the engine loop
509
+ // appends thrown messages to the transcript as tool-result strings,
510
+ // so the model sees the aggregate the next time it considers a
511
+ // dispatch. Without this every retry would only see the latest
512
+ // single-turn reason and could loop indefinitely.
513
+ //
514
+ // Best-effort: a hash/clone failure inside the tracker MUST NOT
515
+ // mask the original refusal. The catch path falls back to a bare
516
+ // Error with the reason text.
517
+ const recordDenial = (toolName, args, reason) => {
518
+ if (!denialTracking)
519
+ return new Error(reason);
520
+ try {
521
+ const record = denialTracking.recordDenial(toolName, args, reason);
522
+ // Only inject the reminder once the threshold is hit — the very
523
+ // first denial is the model's first chance to learn, no need to
524
+ // shout. From the 2nd repeat onwards the model has demonstrated
525
+ // it is not learning from the single-turn sentinel, so we splice
526
+ // the aggregate context.
527
+ if (record.count >= DENIAL_REMINDER_THRESHOLD) {
528
+ const reminder = buildDenialContext(denialTracking);
529
+ if (reminder.length > 0) {
530
+ return new Error(`${reason}\n\n${reminder}`);
531
+ }
532
+ }
533
+ }
534
+ catch {
535
+ // Tracking is best-effort. Fall through to the bare Error so
536
+ // the refusal still propagates.
537
+ }
538
+ return new Error(reason);
539
+ };
140
540
  return async ({ name, arguments: argsRaw }) => {
141
- if (!WIRED_TOOLS.has(name)) {
142
- throw new Error(`unknown tool: ${name}`);
541
+ // β4 M1/M3: MCP tool names live outside WIRED_TOOLS. They are
542
+ // validated lazily by the dispatcher (the registry knows which
543
+ // names are actually exposed). The namespace check happens FIRST
544
+ // so a bad `mcp__bogus__foo` does not collide with the native
545
+ // unknown-tool branch.
546
+ const isMcpName = name.startsWith(MCP_TOOL_PREFIX);
547
+ // α7 L11: parse-or-empty args once up-front so every deny path
548
+ // below can fingerprint the call against the denial tracker. We
549
+ // tolerate parse failure — `{}` keys still produce a stable hash
550
+ // (the model may have sent malformed JSON, but the refusal is
551
+ // semantic, not parse-driven).
552
+ const argsForTracking = safeParseForTracking(argsRaw);
553
+ if (!isMcpName && !WIRED_TOOLS.has(name)) {
554
+ throw recordDenial(name, argsForTracking, `unknown tool: ${name}`);
555
+ }
556
+ // Leak L6 — canonical 4-mode permission gate. Routes the dispatch
557
+ // decision BEFORE the legacy plan-mode-only enforcement so the new
558
+ // surface is the source of truth when the caller opted in. Absent
559
+ // `permissionMode` falls through to the legacy plan-mode branch
560
+ // (existing semantics preserved for callsites that have not
561
+ // migrated yet).
562
+ let hooksBypassed = false;
563
+ if (permissionMode) {
564
+ const decision = permissionGate(name, argsRaw, {
565
+ permissionMode,
566
+ ...(permissionAlwaysCache ? { alwaysCache: permissionAlwaysCache } : {}),
567
+ });
568
+ if (decision.decision === 'deny') {
569
+ throw new PermissionDenied(name, getToolClass(name), permissionMode, decision.reason);
570
+ }
571
+ if (decision.decision === 'ask') {
572
+ if (!permissionAsk) {
573
+ // Non-interactive caller (CI / pipes / agent-as-tool) cannot
574
+ // surface a prompt. Collapse to deny so the loop receives a
575
+ // deterministic refusal instead of hanging.
576
+ throw new PermissionDenied(name, decision.toolClass, permissionMode, `Ask mode: no operator prompt available for ${name} (non-interactive caller)`);
577
+ }
578
+ const answer = await permissionAsk({
579
+ toolName: name,
580
+ toolClass: decision.toolClass,
581
+ question: decision.question,
582
+ options: decision.options,
583
+ });
584
+ const verdict = permissionAlwaysCache
585
+ ? applyAskAnswer(permissionAlwaysCache, name, answer)
586
+ : applyAskAnswer({ alwaysAllowed: new Set(), alwaysDenied: new Set() }, name, answer);
587
+ if (verdict.decision === 'deny') {
588
+ throw new PermissionDenied(name, decision.toolClass, permissionMode, verdict.reason);
589
+ }
590
+ // verdict.decision === 'allow' falls through to dispatch.
591
+ }
592
+ else {
593
+ // allow — honour the bypass flag for the hook layer below.
594
+ hooksBypassed = decision.hooksBypassed === true;
595
+ }
143
596
  }
144
- if (planMode && !READ_ONLY_TOOLS.has(name)) {
145
- // Sentinel recognised by `runEngineLoop` terminates the loop
146
- // with status `tool_refused`. The CLI surfaces this as a blocked
147
- // outcome, not a failure, because plan mode is doing its job.
148
- throw new Error(`PLAN_MODE_REFUSED: ${name} is not allowed in plan mode`);
597
+ else if (planMode) {
598
+ // Legacy plan-mode enforcement (kind === 'plan') stays in place
599
+ // for callers that have not opted into the canonical gate.
600
+ // MCP tools are uniformly refused in plan mode (see schema-side
601
+ // rationale in buildToolsSchema). Native tools split via
602
+ // READ_ONLY_TOOLS as before.
603
+ if (isMcpName || !READ_ONLY_TOOLS.has(name)) {
604
+ throw recordDenial(name, argsForTracking, `PLAN_MODE_REFUSED: ${name} is not allowed in plan mode`);
605
+ }
149
606
  }
150
607
  // α6.9: refuse cancelled-token tool dispatch BEFORE PreToolUse
151
608
  // hooks fire so a cancelled brief never reaches user-defined
@@ -153,13 +610,32 @@ export function buildExecutor(input) {
153
610
  // by `runEngineLoop` as a terminal-cancel signal so the loop
154
611
  // returns control to the caller rather than retrying the model.
155
612
  if (ctx.cancellation && ctx.cancellation.isAborted) {
156
- throw new Error(`OPERATOR_ABORTED: ${name} refused — operator cancelled the dispatch.`);
613
+ throw recordDenial(name, argsForTracking, `OPERATOR_ABORTED: ${name} refused — operator cancelled the dispatch.`);
614
+ }
615
+ // Leak L31 — per-cycle tool retry budget. Same tool + same canonical
616
+ // args = same bucket. Once the cap is hit we throw a typed sentinel
617
+ // so the model is forced out of a repair loop. We gate AFTER
618
+ // permission (denied calls do not burn budget) and BEFORE PreToolUse
619
+ // hooks (hook-blocked retries DO count — the model still issued the
620
+ // same call). The `recordAttempt` fires unconditionally so warn-only
621
+ // mode (PUGI_RETRY_BUDGET_DISABLED=1) still tracks the pattern for
622
+ // diagnostics.
623
+ const argHash = hashArgs(argsRaw);
624
+ const budgetDecision = retryBudget.shouldAllow(name, argHash);
625
+ retryBudget.recordAttempt(name, argHash);
626
+ if (!budgetDecision.allowed) {
627
+ throw new RetryBudgetExhausted(name, budgetDecision.cap, argHash);
157
628
  }
158
629
  // Fire PreToolUse hooks. The match grammar takes the tool name and
159
630
  // (when extractable) the target path. Each new tool dispatch starts a
160
631
  // fresh dedup batch so a hook fires once per dispatch, not once per
161
632
  // session.
162
- if (hooks && sessionId) {
633
+ //
634
+ // Leak L6 — bypass mode skips the entire hook layer (PreToolUse +
635
+ // PostToolUse + PostToolUseFailure). The gate's allow decision
636
+ // carries the `hooksBypassed` flag; we honour it here so the
637
+ // executor stays single-pass.
638
+ if (hooks && sessionId && !hooksBypassed) {
163
639
  hooks.resetBatch();
164
640
  const path = extractToolPath(name, argsRaw);
165
641
  const preCtx = {
@@ -179,17 +655,76 @@ export function buildExecutor(input) {
179
655
  const hook = matchingPreHooks[i];
180
656
  const result = preResults[i];
181
657
  if (hook && result && hook.onFailure === 'block' && !result.ok) {
182
- throw new Error(`HOOK_BLOCKED: PreToolUse hook (${hook.run.slice(0, 80)}) refused ${name} (exit=${result.exitCode})`);
658
+ // α7 L11: record the PreToolUse hook denial so the model
659
+ // sees the pattern reminder on subsequent turns. Without
660
+ // this the model would re-issue the same refused call and
661
+ // burn a turn each time before noticing the loop.
662
+ throw recordDenial(name, argsForTracking, `HOOK_BLOCKED: PreToolUse hook (${hook.run.slice(0, 80)}) refused ${name} (exit=${result.exitCode})`);
183
663
  }
184
664
  }
185
665
  }
186
- const args = parseArgs(argsRaw);
666
+ // β4 M1/M3: MCP dispatch deferred to the `dispatch` closure below so
667
+ // PostToolUse / PostToolUseFailure hooks observe MCP calls just like
668
+ // native calls. The dispatcher does its own argument parsing — MCP
669
+ // arg errors surface as model-visible `[MCP dispatch error] ...`
670
+ // strings, not throws.
671
+ const args = isMcpName ? {} : parseArgs(argsRaw);
187
672
  const dispatch = async () => {
673
+ if (isMcpName) {
674
+ return dispatchMcpTool({
675
+ name,
676
+ argumentsRaw: argsRaw,
677
+ registry: mcpRegistry,
678
+ prompt: mcpPrompt,
679
+ });
680
+ }
681
+ // β1 T1/T2/T3/T5/T6: async-dispatch the new tool surface.
682
+ // task_*, skill, ask_user_question, web_fetch all live behind
683
+ // an async or async-compatible boundary.
684
+ if (name === 'task_create' || name === 'task_get' || name === 'task_list' || name === 'task_update') {
685
+ return dispatchTaskTool(name, args, { workspaceRoot, sessionId });
686
+ }
687
+ if (name === 'ask_user_question') {
688
+ return dispatchAskUser(args, { interactive: Boolean(interactive), bridge: askUserBridge });
689
+ }
690
+ if (name === 'skill' || name === 'skills_list') {
691
+ return dispatchSkillTool(name, args, { workspaceRoot });
692
+ }
693
+ if (name === 'web_fetch') {
694
+ return dispatchWebFetch(args, { ctx, allowFetch: Boolean(allowFetch) });
695
+ }
696
+ if (name === 'web_search') {
697
+ return dispatchWebSearch(args, {
698
+ ctx,
699
+ allowSearch: Boolean(allowSearch),
700
+ sessionId,
701
+ });
702
+ }
703
+ if (name === 'multi_edit') {
704
+ return dispatchMultiEdit(args, ctx);
705
+ }
706
+ if (name === 'agent') {
707
+ // β2a r1 (Backend Architect P1, 2026-05-26): defense in depth.
708
+ // `WIRED_TOOLS` includes `agent`, so a plan-mode model that
709
+ // fabricates an `agent` tool call would otherwise be routed
710
+ // here. The plan-mode refusal at the top of the executor only
711
+ // fires for tools NOT in READ_ONLY_TOOLS; `agent` is
712
+ // intentionally absent from both sets, so we explicitly refuse
713
+ // it here. This pairs with `native-pugi.ts` hard-gating
714
+ // `agentDispatch` itself off in plan mode — without this
715
+ // defensive throw a future schema bug could let a plan-mode
716
+ // model spawn a write-capable child and break the read-only
717
+ // contract.
718
+ if (planMode) {
719
+ throw recordDenial(name, argsForTracking, 'PLAN_MODE_REFUSED: agent is not allowed in plan mode');
720
+ }
721
+ return dispatchAgent(args, agentDispatch);
722
+ }
188
723
  return dispatchTool(name, args, ctx);
189
724
  };
190
725
  try {
191
726
  const result = await dispatch();
192
- if (hooks && sessionId) {
727
+ if (hooks && sessionId && !hooksBypassed) {
193
728
  const path = extractToolPath(name, argsRaw);
194
729
  await hooks.fire({
195
730
  sessionId,
@@ -202,6 +737,27 @@ export function buildExecutor(input) {
202
737
  return result;
203
738
  }
204
739
  catch (error) {
740
+ // Leak L6 — surface the PermissionDenied sentinel as a model-
741
+ // readable message instead of leaking the raw Error type. The
742
+ // string format is stable so the engine adapter / spec layer
743
+ // can pattern-match against it.
744
+ if (error instanceof PermissionDenied) {
745
+ // PostToolUseFailure fires for visibility unless bypass is on.
746
+ if (hooks && sessionId && !hooksBypassed) {
747
+ await hooks.fire({
748
+ sessionId,
749
+ event: 'PostToolUseFailure',
750
+ tool: name,
751
+ payload: {
752
+ tool: name,
753
+ arguments: argsRaw,
754
+ ok: false,
755
+ error: error.toModelMessage(),
756
+ },
757
+ });
758
+ }
759
+ throw new Error(error.toModelMessage());
760
+ }
205
761
  // α6.9: re-shape OperatorAbortedError throws from the
206
762
  // file-tools layer into the same `OPERATOR_ABORTED:` sentinel
207
763
  // the upstream cancellation gate uses so `runEngineLoop` sees
@@ -209,7 +765,7 @@ export function buildExecutor(input) {
209
765
  // the abort landed pre-dispatch or mid-tool (e.g. inside the
210
766
  // grep file-loop).
211
767
  if (error instanceof OperatorAbortedError) {
212
- if (hooks && sessionId) {
768
+ if (hooks && sessionId && !hooksBypassed) {
213
769
  const path = extractToolPath(name, argsRaw);
214
770
  await hooks.fire({
215
771
  sessionId,
@@ -224,9 +780,35 @@ export function buildExecutor(input) {
224
780
  },
225
781
  });
226
782
  }
227
- throw new Error(`OPERATOR_ABORTED: ${name} aborted mid-execution.`);
783
+ throw recordDenial(name, argsForTracking, `OPERATOR_ABORTED: ${name} aborted mid-execution.`);
784
+ }
785
+ // Leak L1 (2026-05-27): re-shape StaleReadError into a
786
+ // deterministic STALE_READ:<reason> sentinel so the model's
787
+ // retry policy can pattern-match on a stable prefix instead of
788
+ // free-form prose. The model is expected to re-read the file and
789
+ // retry the edit — the message points it at exactly that recovery
790
+ // path. PostToolUseFailure hooks observe the typed error so an
791
+ // operator can build a "warn me when stale edits keep happening"
792
+ // hook (likely a concurrency / multi-agent indicator).
793
+ if (error instanceof StaleReadError) {
794
+ if (hooks && sessionId && !hooksBypassed) {
795
+ const path = extractToolPath(name, argsRaw);
796
+ await hooks.fire({
797
+ sessionId,
798
+ event: 'PostToolUseFailure',
799
+ tool: name,
800
+ path,
801
+ payload: {
802
+ tool: name,
803
+ arguments: argsRaw,
804
+ ok: false,
805
+ error: `STALE_READ: ${error.reason} on ${error.path}`,
806
+ },
807
+ });
808
+ }
809
+ throw recordDenial(name, argsForTracking, `STALE_READ: ${name} on ${error.path} refused (${error.reason}). Re-read the file with the \`read\` tool, then retry the ${name}.`);
228
810
  }
229
- if (hooks && sessionId) {
811
+ if (hooks && sessionId && !hooksBypassed) {
230
812
  const path = extractToolPath(name, argsRaw);
231
813
  await hooks.fire({
232
814
  sessionId,
@@ -342,4 +924,209 @@ function dispatchTool(name, args, ctx) {
342
924
  throw new Error(`unhandled tool: ${name}`);
343
925
  }
344
926
  }
927
+ /* ----------------------------- β1 dispatchers ----------------------------- */
928
+ function dispatchTaskTool(name, args, opts) {
929
+ if (!opts.sessionId) {
930
+ throw new Error(`${name}: no sessionId in scope — task ledger requires a session`);
931
+ }
932
+ const tctx = { workspaceRoot: opts.workspaceRoot, sessionId: opts.sessionId };
933
+ switch (name) {
934
+ case 'task_create': {
935
+ const title = requireString(args, 'title');
936
+ const status = optionalString(args, 'status');
937
+ const notes = optionalString(args, 'notes');
938
+ const record = taskCreate(tctx, {
939
+ title,
940
+ ...(status !== undefined ? { status: status } : {}),
941
+ ...(notes !== undefined ? { notes } : {}),
942
+ });
943
+ return JSON.stringify(record);
944
+ }
945
+ case 'task_get': {
946
+ const id = requireString(args, 'id');
947
+ const record = taskGet(tctx, id);
948
+ return record ? JSON.stringify(record) : 'null';
949
+ }
950
+ case 'task_list': {
951
+ const list = taskList(tctx);
952
+ return JSON.stringify(list);
953
+ }
954
+ case 'task_update': {
955
+ const id = requireString(args, 'id');
956
+ const title = optionalString(args, 'title');
957
+ const status = optionalString(args, 'status');
958
+ const notes = optionalString(args, 'notes');
959
+ const record = taskUpdate(tctx, {
960
+ id,
961
+ ...(title !== undefined ? { title } : {}),
962
+ ...(status !== undefined ? { status: status } : {}),
963
+ ...(notes !== undefined ? { notes } : {}),
964
+ });
965
+ return JSON.stringify(record);
966
+ }
967
+ }
968
+ }
969
+ async function dispatchAskUser(args, opts) {
970
+ const rawOptions = args['options'];
971
+ if (!Array.isArray(rawOptions)) {
972
+ throw new Error('ask_user_question: options must be an array');
973
+ }
974
+ // Leak L5 (2026-05-27): detect structured vs legacy form. Structured
975
+ // entries are objects with {label, description}; legacy entries are
976
+ // plain strings. The structured path validates via Zod and emits the
977
+ // [ask_user_question:answered|cancelled|timeout] envelope. The legacy
978
+ // path stays for back-compat with the existing β1 T2 tests + the
979
+ // <pugi-ask> prompt envelope (which still feeds string options).
980
+ const looksStructured = rawOptions.length > 0
981
+ && typeof rawOptions[0] === 'object'
982
+ && rawOptions[0] !== null
983
+ && !Array.isArray(rawOptions[0]);
984
+ if (looksStructured) {
985
+ const result = await dispatchAskUserQuestion({ interactive: opts.interactive, ...(opts.bridge ? { bridge: opts.bridge } : {}) }, args);
986
+ return result.envelope;
987
+ }
988
+ // Legacy string-array form.
989
+ const question = requireString(args, 'question');
990
+ const options = rawOptions.map((o, i) => {
991
+ if (typeof o !== 'string') {
992
+ throw new Error(`ask_user_question: options[${i}] must be a string`);
993
+ }
994
+ return o;
995
+ });
996
+ const multiSelect = args['multiSelect'] === true;
997
+ const result = await askUser({ interactive: opts.interactive, ...(opts.bridge ? { bridge: opts.bridge } : {}) }, { question, options, multiSelect });
998
+ return result.envelope;
999
+ }
1000
+ async function dispatchSkillTool(name, args, opts) {
1001
+ if (name === 'skills_list') {
1002
+ const scopeArg = optionalString(args, 'scope');
1003
+ const scope = scopeArg === 'global' || scopeArg === 'workspace' ? scopeArg : 'all';
1004
+ const list = skillList({ workspaceRoot: opts.workspaceRoot }, { scope });
1005
+ return JSON.stringify(list);
1006
+ }
1007
+ // name === 'skill' (invoke).
1008
+ // β1a r1 (2026-05-26): `skillInvoke` is now async — it re-verifies
1009
+ // the trust manifest sha256 against the on-disk body on every call.
1010
+ // Bubble up `await` so a post-install tamper surfaces as a tool
1011
+ // error the model sees, not a swallowed Promise<SkillInvokeResult>.
1012
+ const skName = requireString(args, 'name');
1013
+ const result = await skillInvoke({ workspaceRoot: opts.workspaceRoot }, { name: skName });
1014
+ return JSON.stringify(result);
1015
+ }
1016
+ async function dispatchWebFetch(args, opts) {
1017
+ const url = requireString(args, 'url');
1018
+ const result = await webFetchTool({ url }, {
1019
+ settings: opts.ctx.settings,
1020
+ allowFetch: opts.allowFetch,
1021
+ });
1022
+ return JSON.stringify(result);
1023
+ }
1024
+ async function dispatchWebSearch(args, opts) {
1025
+ const query = requireString(args, 'query');
1026
+ // `count` is optional integer 1..10. Validate here so the tool layer
1027
+ // gets a clean value (the tool clamps internally too — defense in
1028
+ // depth, since the model can pass anything).
1029
+ let count;
1030
+ if (args['count'] !== undefined && args['count'] !== null) {
1031
+ const n = args['count'];
1032
+ if (typeof n !== 'number' || !Number.isInteger(n)) {
1033
+ throw new Error('web_search: count must be an integer');
1034
+ }
1035
+ count = n;
1036
+ }
1037
+ const result = await webSearchTool({ query, ...(count !== undefined ? { count } : {}) }, {
1038
+ settings: opts.ctx.settings,
1039
+ allowSearch: opts.allowSearch,
1040
+ sessionId: opts.sessionId,
1041
+ });
1042
+ return JSON.stringify(result);
1043
+ }
1044
+ /**
1045
+ * β2 S3 dispatch — wire the model-emitted `agent` tool call to the
1046
+ * real subagent spawn primitive. When the executor was built without
1047
+ * `agentDispatch` (e.g. a child loop, or a parent that explicitly
1048
+ * disabled subagent spawn), the call is refused with a structured
1049
+ * envelope so the model can adapt instead of crashing the parent loop.
1050
+ */
1051
+ async function dispatchAgent(args, opts) {
1052
+ if (!opts) {
1053
+ // No dispatch context — return a structured refusal envelope.
1054
+ // This matches the agent-tool.ts no-engine-client path and lets
1055
+ // the model decide whether to retry inline or abandon the
1056
+ // delegation. Throwing here would terminate the parent on a tool
1057
+ // error frame which is the wrong UX when the issue is config.
1058
+ return JSON.stringify({
1059
+ ok: false,
1060
+ status: 'failed',
1061
+ summary: 'agent tool refused: dispatch not wired in this engine adapter. '
1062
+ + 'Re-run from a parent loop with agentDispatch configured.',
1063
+ });
1064
+ }
1065
+ const parsed = parseAgentArgs(args);
1066
+ const result = await agentTool(parsed, {
1067
+ session: opts.parentSession,
1068
+ engineClient: opts.engineClient,
1069
+ ...(opts.parentBudgetRemaining
1070
+ ? { parentBudgetRemaining: opts.parentBudgetRemaining }
1071
+ : {}),
1072
+ });
1073
+ return JSON.stringify(result);
1074
+ }
1075
+ function parseAgentArgs(args) {
1076
+ // Surface a clean error message to the model when the args don't
1077
+ // match the schema. agentTool itself also validates via Zod; this
1078
+ // pre-parse layer keeps the error stack short.
1079
+ const role = requireString(args, 'role');
1080
+ const brief = requireString(args, 'brief');
1081
+ const isolationRaw = optionalString(args, 'isolation');
1082
+ const out = {
1083
+ role: role,
1084
+ brief,
1085
+ ...(isolationRaw ? { isolation: isolationRaw } : {}),
1086
+ };
1087
+ return out;
1088
+ }
1089
+ function optionalString(obj, key) {
1090
+ const v = obj[key];
1091
+ if (v === undefined || v === null)
1092
+ return undefined;
1093
+ if (typeof v !== 'string') {
1094
+ throw new Error(`tool argument "${key}" must be a string when present`);
1095
+ }
1096
+ return v;
1097
+ }
1098
+ /**
1099
+ * β7 L5+T11: dispatch the model-emitted `multi_edit` tool call. The
1100
+ * tool returns a structured result envelope; we serialize it to JSON
1101
+ * for the engine loop. A refused dispatch (security, no_match,
1102
+ * ambiguous_match, etc.) surfaces as `ok: false` in the envelope —
1103
+ * the model can re-strategise rather than crashing the loop.
1104
+ */
1105
+ function dispatchMultiEdit(args, ctx) {
1106
+ const raw = args['edits'];
1107
+ if (!Array.isArray(raw)) {
1108
+ throw new Error('multi_edit: edits must be an array');
1109
+ }
1110
+ const edits = raw.map((item, i) => {
1111
+ if (!item || typeof item !== 'object') {
1112
+ throw new Error(`multi_edit: edits[${i}] must be an object`);
1113
+ }
1114
+ const obj = item;
1115
+ const file = obj['file'];
1116
+ const oldString = obj['oldString'];
1117
+ const newString = obj['newString'];
1118
+ if (typeof file !== 'string') {
1119
+ throw new Error(`multi_edit: edits[${i}].file must be a string`);
1120
+ }
1121
+ if (typeof oldString !== 'string') {
1122
+ throw new Error(`multi_edit: edits[${i}].oldString must be a string`);
1123
+ }
1124
+ if (typeof newString !== 'string') {
1125
+ throw new Error(`multi_edit: edits[${i}].newString must be a string`);
1126
+ }
1127
+ return { file, oldString, newString };
1128
+ });
1129
+ const result = multiEdit(ctx, edits);
1130
+ return JSON.stringify(result);
1131
+ }
345
1132
  //# sourceMappingURL=tool-bridge.js.map