@pugi/cli 0.1.0-beta.2 → 0.1.0-beta.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. package/THIRD_PARTY_NOTICES.md +40 -0
  2. package/assets/pugi-mascot.ansi +15 -40
  3. package/bin/run.js +33 -1
  4. package/dist/commands/jobs-watch.js +201 -0
  5. package/dist/commands/jobs.js +15 -0
  6. package/dist/core/agent-progress/cleanup.js +134 -0
  7. package/dist/core/agent-progress/schema.js +144 -0
  8. package/dist/core/agent-progress/writer.js +101 -0
  9. package/dist/core/compact/auto-trigger.js +96 -0
  10. package/dist/core/compact/buffer-rewriter.js +115 -0
  11. package/dist/core/compact/summarizer.js +196 -0
  12. package/dist/core/compact/token-counter.js +108 -0
  13. package/dist/core/consensus/diff-capture.js +73 -0
  14. package/dist/core/context/index.js +7 -0
  15. package/dist/core/context/markdown-traverse.js +255 -0
  16. package/dist/core/cost/rate-card.js +129 -0
  17. package/dist/core/cost/tracker.js +221 -0
  18. package/dist/core/denial-tracking/index.js +8 -0
  19. package/dist/core/denial-tracking/state.js +264 -0
  20. package/dist/core/diagnostics/probe-runner.js +93 -0
  21. package/dist/core/diagnostics/probes/api.js +46 -0
  22. package/dist/core/diagnostics/probes/auth.js +86 -0
  23. package/dist/core/diagnostics/probes/cli-version.js +127 -0
  24. package/dist/core/diagnostics/probes/config.js +72 -0
  25. package/dist/core/diagnostics/probes/denial-tracking.js +57 -0
  26. package/dist/core/diagnostics/probes/disk.js +81 -0
  27. package/dist/core/diagnostics/probes/git.js +65 -0
  28. package/dist/core/diagnostics/probes/mcp.js +75 -0
  29. package/dist/core/diagnostics/probes/node.js +59 -0
  30. package/dist/core/diagnostics/probes/pnpm.js +36 -0
  31. package/dist/core/diagnostics/probes/session.js +74 -0
  32. package/dist/core/diagnostics/probes/status-snapshot.js +442 -0
  33. package/dist/core/diagnostics/probes/workspace.js +63 -0
  34. package/dist/core/diagnostics/types.js +70 -0
  35. package/dist/core/edits/dispatch.js +218 -2
  36. package/dist/core/edits/journal.js +199 -0
  37. package/dist/core/edits/layer-d-ast.js +557 -14
  38. package/dist/core/edits/verify-hook.js +273 -0
  39. package/dist/core/edits/worktree.js +111 -18
  40. package/dist/core/engine/anvil-client.js +115 -5
  41. package/dist/core/engine/budgets.js +89 -0
  42. package/dist/core/engine/context-prefix.js +155 -0
  43. package/dist/core/engine/intent.js +260 -0
  44. package/dist/core/engine/native-pugi.js +744 -210
  45. package/dist/core/engine/prompts.js +61 -6
  46. package/dist/core/engine/strip-internal-fields.js +124 -0
  47. package/dist/core/engine/tool-bridge.js +818 -31
  48. package/dist/core/file-cache.js +113 -1
  49. package/dist/core/init/scaffold.js +195 -0
  50. package/dist/core/lsp/client.js +174 -29
  51. package/dist/core/mcp/client.js +75 -6
  52. package/dist/core/mcp/http-server.js +553 -0
  53. package/dist/core/mcp/permission.js +190 -0
  54. package/dist/core/mcp/registry.js +24 -2
  55. package/dist/core/mcp/server-tools.js +219 -0
  56. package/dist/core/mcp/server.js +397 -0
  57. package/dist/core/permissions/gate.js +187 -0
  58. package/dist/core/permissions/index.js +18 -0
  59. package/dist/core/permissions/mode.js +102 -0
  60. package/dist/core/permissions/state.js +160 -0
  61. package/dist/core/permissions/tool-class.js +93 -0
  62. package/dist/core/repl/codebase-survey.js +308 -0
  63. package/dist/core/repl/history.js +11 -1
  64. package/dist/core/repl/init-interview.js +457 -0
  65. package/dist/core/repl/model-pricing.js +135 -0
  66. package/dist/core/repl/onboarding-state.js +297 -0
  67. package/dist/core/repl/session.js +719 -29
  68. package/dist/core/repl/slash-commands.js +133 -9
  69. package/dist/core/retry-budget/budget.js +284 -0
  70. package/dist/core/retry-budget/index.js +5 -0
  71. package/dist/core/settings.js +71 -0
  72. package/dist/core/skills/defaults.js +457 -0
  73. package/dist/core/subagents/dispatcher-real.js +600 -0
  74. package/dist/core/subagents/dispatcher.js +113 -24
  75. package/dist/core/subagents/index.js +18 -5
  76. package/dist/core/subagents/isolation-matrix.js +213 -0
  77. package/dist/core/subagents/spawn.js +19 -4
  78. package/dist/core/transport/version-interceptor.js +166 -0
  79. package/dist/index.js +28 -0
  80. package/dist/runtime/bootstrap.js +190 -0
  81. package/dist/runtime/cli.js +1588 -266
  82. package/dist/runtime/commands/compact.js +296 -0
  83. package/dist/runtime/commands/cost.js +199 -0
  84. package/dist/runtime/commands/delegate.js +289 -0
  85. package/dist/runtime/commands/doctor.js +369 -0
  86. package/dist/runtime/commands/lsp.js +187 -5
  87. package/dist/runtime/commands/mcp.js +824 -0
  88. package/dist/runtime/commands/patch.js +17 -0
  89. package/dist/runtime/commands/permissions.js +87 -0
  90. package/dist/runtime/commands/report.js +299 -0
  91. package/dist/runtime/commands/review-consensus.js +17 -2
  92. package/dist/runtime/commands/roster.js +117 -0
  93. package/dist/runtime/commands/status.js +178 -0
  94. package/dist/runtime/commands/worktree.js +50 -6
  95. package/dist/runtime/headless.js +543 -0
  96. package/dist/runtime/load-hooks-or-exit.js +71 -0
  97. package/dist/runtime/plan-decompose.js +531 -0
  98. package/dist/runtime/version.js +65 -0
  99. package/dist/tools/agent-tool.js +206 -0
  100. package/dist/tools/apply-patch.js +281 -39
  101. package/dist/tools/ask-user-question.js +213 -0
  102. package/dist/tools/ask-user.js +115 -0
  103. package/dist/tools/file-tools.js +85 -14
  104. package/dist/tools/mcp-tool.js +260 -0
  105. package/dist/tools/multi-edit.js +361 -0
  106. package/dist/tools/registry.js +22 -2
  107. package/dist/tools/skill-tool.js +96 -0
  108. package/dist/tools/tasks.js +208 -0
  109. package/dist/tools/web-fetch.js +147 -2
  110. package/dist/tools/web-search.js +458 -0
  111. package/dist/tui/agent-progress-card.js +111 -0
  112. package/dist/tui/agent-tree.js +10 -0
  113. package/dist/tui/ask-modal.js +2 -2
  114. package/dist/tui/ask-user-question-prompt.js +192 -0
  115. package/dist/tui/compact-banner.js +54 -0
  116. package/dist/tui/conversation-pane.js +69 -8
  117. package/dist/tui/cost-table.js +111 -0
  118. package/dist/tui/doctor-table.js +31 -0
  119. package/dist/tui/input-box.js +1 -1
  120. package/dist/tui/markdown-render.js +4 -4
  121. package/dist/tui/repl-render.js +276 -37
  122. package/dist/tui/repl-splash.js +2 -2
  123. package/dist/tui/repl.js +25 -6
  124. package/dist/tui/splash.js +1 -1
  125. package/dist/tui/status-bar.js +94 -16
  126. package/dist/tui/status-table.js +7 -0
  127. package/dist/tui/tool-stream-pane.js +7 -0
  128. package/dist/tui/update-banner.js +20 -2
  129. package/docs/examples/codegraph.mcp.json +10 -0
  130. package/package.json +9 -6
@@ -1,11 +1,26 @@
1
1
  import { appendFileSync, existsSync, mkdirSync } from 'node:fs';
2
+ import { randomUUID } from 'node:crypto';
2
3
  import { resolve } from 'node:path';
3
- import { defaultEngineBudgets, runEngineLoop, } from '@pugi/sdk';
4
+ import { AsyncEventQueue, EngineEventEmitter, modelSupportsThinking, runEngineLoop, splitThinkingBlocks, } from '@pugi/sdk';
4
5
  import { FileReadCache } from '../file-cache.js';
5
6
  import { loadSettings } from '../settings.js';
6
7
  import { openSession, recordToolCall, recordToolResult } from '../session.js';
8
+ import { prewarmRealDispatch } from '../subagents/dispatcher.js';
9
+ import { resolveBudget } from './budgets.js';
7
10
  import { buildExecutor, buildToolsSchema } from './tool-bridge.js';
8
11
  import { personaSlugFor, systemPromptFor } from './prompts.js';
12
+ import { CancellationToken } from '../repl/cancellation.js';
13
+ // β5a R5+R6 + P1 (2026-05-26): per-turn `<context>` prefix + intent
14
+ // classifier marker. Both pure functions, no fs cost at adapter init.
15
+ // Per-dir markdown traverse fires once per `run()`; budget capped so
16
+ // it never dominates the prompt budget.
17
+ import { buildContextPrefix, spliceContextPrefix } from './context-prefix.js';
18
+ import { applyIntentMarker, classifyIntent } from './intent.js';
19
+ import { loadTraversedMarkdown } from '../context/markdown-traverse.js';
20
+ // α7 L11 (2026-05-27): per-session DenialTrackingState. One instance
21
+ // per `run()` so denials cluster by (tool, args) within the same
22
+ // command but do NOT leak across CLI invocations.
23
+ import { DenialTrackingState } from '../denial-tracking/state.js';
9
24
  /**
10
25
  * Real `NativePugiEngineAdapter`. Drives the Pugi CLI's tool-use loop:
11
26
  *
@@ -50,8 +65,30 @@ export class NativePugiEngineAdapter {
50
65
  * to a single `run()` invocation.
51
66
  */
52
67
  engineToolCallIds = new Map();
68
+ /**
69
+ * β3 streaming additive: optional typed event emitter that mirrors
70
+ * every async-queue event so external consumers (admin-api SSE
71
+ * controller, future cabinet WebSocket relay) can attach without
72
+ * holding the async iterator. The CLI itself only consumes the
73
+ * `AsyncIterable<EngineEvent>` returned by `run()`; the emitter is
74
+ * a fan-out point for additional subscribers.
75
+ */
76
+ streamEmitter = new EngineEventEmitter();
53
77
  constructor(options) {
54
78
  this.options = options;
79
+ // β2a r1 (Backend Architect P1, 2026-05-26): kick off the real
80
+ // dispatcher's module import at adapter init so the first
81
+ // `agent` tool call does not pay 50-200ms cold-start. We fire
82
+ // the promise without awaiting — by the time the engine loop
83
+ // runs and the model issues an `agent` call, the import has
84
+ // resolved. The promise is swallowed because a failed prewarm
85
+ // would surface again at dispatch time with the real error.
86
+ void prewarmRealDispatch().catch(() => {
87
+ // Intentional no-op: the actual dispatch call will surface
88
+ // the import failure (if any) with the right call stack. A
89
+ // prewarm-time failure is just a missed optimization, not a
90
+ // correctness issue.
91
+ });
55
92
  }
56
93
  async capabilities() {
57
94
  return {
@@ -59,7 +96,13 @@ export class NativePugiEngineAdapter {
59
96
  supportsFileEdits: true,
60
97
  supportsShell: true,
61
98
  supportsLsp: false,
62
- supportsSubagents: false,
99
+ // β2 S2 (2026-05-26): real subagent dispatch shipped via the
100
+ // `agent` tool (apps/pugi-cli/src/tools/agent-tool.ts) plus the
101
+ // genuine `runEngineLoop`-backed dispatcher
102
+ // (apps/pugi-cli/src/core/subagents/dispatcher-real.ts). The
103
+ // capability flag flips after S1 + S3 + S4 land so cabinet UI +
104
+ // remote orchestrators can rely on the advertised contract.
105
+ supportsSubagents: true,
63
106
  };
64
107
  }
65
108
  async *run(task, ctx) {
@@ -67,235 +110,650 @@ export class NativePugiEngineAdapter {
67
110
  const root = task.workspaceRoot;
68
111
  const session = this.options.session ?? openSession(root);
69
112
  const settings = loadSettings(root);
70
- const toolCtx = {
71
- root,
72
- settings,
73
- session,
74
- readCache: new FileReadCache(),
75
- };
76
- const budget = task.budget?.tokens
77
- ? {
78
- maxTokens: task.budget.tokens,
79
- // The task-level budget only carries tokens; tool calls keep
80
- // the per-command default so a careless caller cannot disable
81
- // the call-count guard by overriding usd/tokens.
82
- maxToolCalls: defaultEngineBudgets[kind].maxToolCalls,
113
+ // P1 fix (deep audit 2026-05-26): wire ctx.signal (AbortSignal) into
114
+ // a CancellationToken so the tool-bridge cancellation gate
115
+ // (`ctx.cancellation?.isAborted` check at tool-bridge.ts:656 +
116
+ // file-tools `gateOnCancellation` calls) fires when the operator
117
+ // aborts mid-tool. Before this fix `toolCtx` carried no cancellation
118
+ // field — only the next runEngineLoop iteration via `ctx.signal`
119
+ // aborted at the turn boundary, so a long-running tool (a sleeping
120
+ // bash command, a slow grep across the repo) could not be cancelled
121
+ // mid-call.
122
+ //
123
+ // The token is wired one-way: ctx.signal -> token. Aborting the
124
+ // token directly does NOT propagate back to the AbortSignal; the
125
+ // engine's own cancellation already lives upstream via the signal
126
+ // so the back-edge is unnecessary.
127
+ //
128
+ // r2 fix (triple-review 2026-05-26 P1): the abort listener was
129
+ // registered with `{ once: true }` — on actual abort it auto-detaches
130
+ // and disappears, but on the (common) NON-abort path where `run()`
131
+ // completes cleanly the listener stays attached to `ctx.signal`
132
+ // forever. Over a long REPL session (one shared AbortController per
133
+ // session, many run() invocations) listeners accumulate one per
134
+ // run, leaking memory and CPU on `dispatchEvent`. We now track the
135
+ // detach handle and call it unconditionally in the run()'s finally
136
+ // block so cleanup happens on both the success and abort paths.
137
+ const cancellation = new CancellationToken();
138
+ let detachAbortListener;
139
+ if (ctx.signal) {
140
+ if (ctx.signal.aborted) {
141
+ cancellation.abort();
83
142
  }
84
- : defaultEngineBudgets[kind];
85
- yield {
86
- type: 'status',
87
- message: `Pugi engine starting: kind=${kind} budget=${budget.maxToolCalls} calls / ${budget.maxTokens} tokens`,
88
- };
89
- // Buffer status events emitted from inside the loop hooks. Async
90
- // generators cannot yield from synchronous callbacks, so we collect
91
- // them in a queue and drain after the loop call completes. The loop
92
- // is short enough ( ~30 turns) that latency-to-stdout is acceptable
93
- // a follow-up PR can switch to an event emitter for true streaming.
94
- const buffer = [];
95
- // Track files mutated by the loop. We extract the path from the JSON
96
- // arguments of every successful write/edit tool call; `bash` is left
97
- // out because its filesystem footprint is opaque (a single command
98
- // can touch dozens of paths via `make`, `pnpm build`, etc). The
99
- // per-session events.jsonl already carries every file_mutation event
100
- // for replay; this set is only the headline summary the CLI prints.
101
- const filesChanged = new Set();
102
- // Pending lookup: call.id → path extracted from arguments. We only
103
- // commit to `filesChanged` when the corresponding onToolResult fires
104
- // with `ok: true`, so a refused or failed edit does not surface as
105
- // a phantom change in the operator summary.
106
- const pendingMutations = new Map();
107
- // Per-session events mirror — `.pugi/sessions/<id>/events.jsonl`.
108
- // The existing global log at `.pugi/events.jsonl` is preserved as
109
- // the audit-replay source of truth; this mirror is the easy-to-find
110
- // per-run log for operators and the cabinet UI (Sprint 2B).
111
- const sessionEventsPath = openSessionMirror(root, session.id);
112
- const hooks = {
113
- onTurnStart: (turnIndex, messageCount) => {
114
- const msg = `turn ${turnIndex + 1}: requesting model (transcript=${messageCount} messages)`;
115
- buffer.push({ type: 'status', message: msg });
116
- appendSessionMirror(sessionEventsPath, { type: 'turn_start', turn: turnIndex + 1, transcript: messageCount });
117
- },
118
- onTurnComplete: (turnIndex, response) => {
119
- if (response.stop === 'tool_use') {
120
- const calls = response.assistantMessage.toolCalls ?? [];
121
- buffer.push({
143
+ else {
144
+ const handler = () => cancellation.abort();
145
+ ctx.signal.addEventListener('abort', handler, { once: true });
146
+ detachAbortListener = () => {
147
+ ctx.signal.removeEventListener('abort', handler);
148
+ };
149
+ }
150
+ }
151
+ // r2 (triple-review 2026-05-26 P1): everything below runs inside a
152
+ // try/finally so the AbortSignal listener detaches on BOTH the
153
+ // success and abort paths. Without this wrap a long REPL session
154
+ // (one persistent AbortController, many run() invocations) leaked
155
+ // one abort listener per non-aborted run.
156
+ try {
157
+ const toolCtx = {
158
+ root,
159
+ settings,
160
+ session,
161
+ readCache: new FileReadCache(),
162
+ cancellation,
163
+ };
164
+ // α7 L11 (2026-05-27): instantiate per-`run()` denial tracker. The
165
+ // executor records every refusal (PLAN_MODE_REFUSED, HOOK_BLOCKED,
166
+ // OPERATOR_ABORTED, STALE_READ, unknown-tool, plan-mode agent) and
167
+ // the user-prompt assembler below splices a compact reminder when
168
+ // the same (tool, args) pair has been denied twice or more. The
169
+ // tracker is in-memory only the audit ledger at
170
+ // `.pugi/events.jsonl` already captures the full per-event log for
171
+ // forensic replay; this surface is the model-facing aggregate.
172
+ const denialTracking = new DenialTrackingState();
173
+ // β1a r1 (budget wiring, 2026-05-26): swap the legacy SDK per-
174
+ // command budget lookup for the Pl9 `resolveBudget()` pipeline so
175
+ // `.pugi/settings.json::budgets.<command>` overrides actually take
176
+ // effect at runtime + the HARD_MAX_* caps guard misconfigured
177
+ // envelopes pre-flight. Before this fix the β1 Pl9 module
178
+ // (`core/engine/budgets.ts`) was dead code — the adapter still
179
+ // read the per-command defaults from the SDK, so operators who
180
+ // set `budgets.code.maxTokens = 50000` in settings.json got the
181
+ // legacy 30k anyway and `assertBudgetWithinTier` never ran.
182
+ //
183
+ // Task-level token override (e.g. CLI `--max-tokens`) keeps
184
+ // precedence; tool-call ceiling falls through to the resolved
185
+ // budget so a careless caller cannot disable the call-count
186
+ // guard by setting only token count.
187
+ const budget = resolveBudget(kind, settings, task.budget?.tokens ? { maxTokens: task.budget.tokens } : undefined);
188
+ // β3 streaming: pre-build the typed stream event queue so the hook
189
+ // callbacks below can push live events that this async generator
190
+ // yields IMMEDIATELY (instead of buffering until `runEngineLoop`
191
+ // completes). Operator now sees the first `tool.start` within
192
+ // ~tens of ms of the model emitting it, not 30+ s after the loop
193
+ // settles.
194
+ const streamQueue = new AsyncEventQueue();
195
+ const emitter = this.streamEmitter;
196
+ const supportsThinking = modelSupportsThinking(this.options.model);
197
+ /**
198
+ * Push one typed stream event into BOTH the per-run async queue
199
+ * (the CLI's iterator) and the long-lived emitter (the multiplex
200
+ * fan-out for admin-api SSE / cabinet WebSocket subscribers).
201
+ * The function stamps `timestamp` once so both consumers see the
202
+ * same wall clock.
203
+ */
204
+ const emitStream = (event) => {
205
+ const stamped = {
206
+ ...event,
207
+ timestamp: new Date().toISOString(),
208
+ };
209
+ streamQueue.push(stamped);
210
+ emitter.emit('event', stamped);
211
+ };
212
+ // r1 fix per triple-review Backend Architect P1: unify yield path via
213
+ // emitStream + streamQueue drain so the iterator consumer does NOT
214
+ // see this status frame twice. Pre-fix did both bare yield + emitStream
215
+ // → iterator got 2 copies, emitter got 1.
216
+ emitStream({
217
+ type: 'status',
218
+ message: `Pugi engine starting: kind=${kind} budget=${budget.maxToolCalls} calls / ${budget.maxTokens} tokens`,
219
+ });
220
+ // β5a R1+R4+R5+R6+P1 (2026-05-26): build the per-turn `<context>`
221
+ // prefix and apply the intent marker so the model sees:
222
+ // 1. cwd + open-files + per-dir-conventions block (R5+R6)
223
+ // 2. a `<intent kind="definitional">` wrapper when the operator
224
+ // asked a knowledge question (P1) — fixes the "What is grep?
225
+ // → bash man grep" loss mode flagged by the α7.X eval.
226
+ //
227
+ // All caps enforced inside the builders (5 KB block + 50 entries
228
+ // + top-3 markdown). Worst-case prompt growth is ~5 KB, well
229
+ // inside any per-command token budget.
230
+ //
231
+ // cwd is sourced from `process.cwd()` — the operator's shell pwd
232
+ // when they invoked `pugi`. For non-REPL CLI paths this is
233
+ // accurate; the REPL session retains the launch cwd for the
234
+ // lifetime of the session which is what the operator expects.
235
+ const cwdForTraverse = process.cwd();
236
+ let traverseResult;
237
+ try {
238
+ traverseResult = await loadTraversedMarkdown({
239
+ cwd: cwdForTraverse,
240
+ workspaceRoot: root,
241
+ });
242
+ }
243
+ catch {
244
+ // Per-dir markdown is a NICE-TO-HAVE; a fs error here must
245
+ // never break the engine loop. Fall back to an empty result
246
+ // so the prefix block still surfaces cwd + working set.
247
+ traverseResult = { loaded: [], warnings: [], totalBytes: 0 };
248
+ }
249
+ const intentClassification = classifyIntent(task.prompt);
250
+ const intentHint = intentClassification.intent !== 'ambiguous' ? intentClassification.intent : undefined;
251
+ const cwdRelative = relativeOrAbsolute(root, cwdForTraverse);
252
+ const prefix = buildContextPrefix({
253
+ cwdRelative,
254
+ // β5a defers wiring the live WorkingSet snapshot to the REPL
255
+ // session integration (R5+R6 here only covers the engine-side
256
+ // builder). When the REPL passes its working set down, the
257
+ // engine surface fills in. For now the prefix carries cwd +
258
+ // per-dir conventions + intent which are the two biggest
259
+ // win-rate moves per the α7.X eval.
260
+ traversedMarkdown: traverseResult.loaded,
261
+ intentHint,
262
+ });
263
+ if (prefix.bytes > 0 || intentClassification.intent === 'definitional') {
264
+ emitStream({
265
+ type: 'status',
266
+ message: `context: cwd=${cwdRelative} per-dir-md=${prefix.counts.markdownIncluded}/${prefix.counts.markdownTotal} intent=${intentClassification.intent}`,
267
+ });
268
+ }
269
+ const decoratedPrompt = applyIntentMarker(task.prompt, intentClassification.intent);
270
+ const finalUserPrompt = spliceContextPrefix(prefix.block, decoratedPrompt);
271
+ // Track files mutated by the loop. We extract the path from the JSON
272
+ // arguments of every successful write/edit tool call; `bash` is left
273
+ // out because its filesystem footprint is opaque (a single command
274
+ // can touch dozens of paths via `make`, `pnpm build`, etc). The
275
+ // per-session events.jsonl already carries every file_mutation event
276
+ // for replay; this set is only the headline summary the CLI prints.
277
+ const filesChanged = new Set();
278
+ // Pending lookup: call.id → path extracted from arguments. We only
279
+ // commit to `filesChanged` when the corresponding onToolResult fires
280
+ // with `ok: true`, so a refused or failed edit does not surface as
281
+ // a phantom change in the operator summary.
282
+ const pendingMutations = new Map();
283
+ // Per-session events mirror — `.pugi/sessions/<id>/events.jsonl`.
284
+ // The existing global log at `.pugi/events.jsonl` is preserved as
285
+ // the audit-replay source of truth; this mirror is the easy-to-find
286
+ // per-run log for operators and the cabinet UI (Sprint 2B).
287
+ const sessionEventsPath = openSessionMirror(root, session.id);
288
+ const hooks = {
289
+ onTurnStart: (turnIndex, messageCount) => {
290
+ const msg = `turn ${turnIndex + 1}: requesting model (transcript=${messageCount} messages)`;
291
+ emitStream({ type: 'status', message: msg });
292
+ appendSessionMirror(sessionEventsPath, { type: 'turn_start', turn: turnIndex + 1, transcript: messageCount });
293
+ },
294
+ onTurnComplete: (turnIndex, response) => {
295
+ if (response.stop === 'tool_use') {
296
+ const calls = response.assistantMessage.toolCalls ?? [];
297
+ emitStream({
298
+ type: 'status',
299
+ message: `turn ${turnIndex + 1}: model requested ${calls.length} tool call(s)`,
300
+ });
301
+ appendSessionMirror(sessionEventsPath, {
302
+ type: 'turn_complete',
303
+ turn: turnIndex + 1,
304
+ stop: 'tool_use',
305
+ toolCalls: calls.length,
306
+ tokensUsed: response.tokensUsed,
307
+ });
308
+ }
309
+ else if (response.stop === 'text') {
310
+ emitStream({
311
+ type: 'status',
312
+ message: `turn ${turnIndex + 1}: model returned final text (${response.content.length} chars)`,
313
+ });
314
+ appendSessionMirror(sessionEventsPath, {
315
+ type: 'turn_complete',
316
+ turn: turnIndex + 1,
317
+ stop: 'text',
318
+ contentLength: response.content.length,
319
+ tokensUsed: response.tokensUsed,
320
+ });
321
+ // β3 E4 thinking-block surface: only Claude / Gemini families
322
+ // advertise structured thinking today. The model resolver may
323
+ // return a slug we don't recognise; in that case we skip the
324
+ // split silently. When we DO recognise it, every `<thinking>`
325
+ // / `<thought>` block becomes a separate `thinking.start`/
326
+ // `thinking.delta`/`thinking.end` triplet so the TUI can
327
+ // render one collapsed pane row per block. The visible text
328
+ // (post-strip) flows to the regular `text.delta` channel so
329
+ // the conversation pane never shows raw <thinking> markup.
330
+ if (supportsThinking && response.content.length > 0) {
331
+ const split = splitThinkingBlocks(response.content);
332
+ for (const block of split.thinkingBlocks) {
333
+ const blockId = `think-${randomUUID().slice(0, 8)}`;
334
+ emitStream({ type: 'thinking.start', blockId });
335
+ emitStream({ type: 'thinking.delta', blockId, chunk: block });
336
+ emitStream({ type: 'thinking.end', blockId });
337
+ }
338
+ if (split.visibleText.length > 0) {
339
+ emitStream({ type: 'text.delta', chunk: split.visibleText });
340
+ }
341
+ }
342
+ else if (response.content.length > 0) {
343
+ emitStream({ type: 'text.delta', chunk: response.content });
344
+ }
345
+ }
346
+ },
347
+ onToolCall: (call) => {
348
+ // Record under an `engine_tool` prefix so the audit log can
349
+ // distinguish loop-driven calls from direct CLI tool calls.
350
+ const id = recordToolCall(session, `engine:${call.name}`, call.arguments.slice(0, 200));
351
+ // Stash the audit id on the call for `onToolResult` to close.
352
+ this.engineToolCallIds.set(call.id, id);
353
+ // Extract a candidate path for write/edit so we can build the
354
+ // filesChanged summary if (and only if) the call succeeds. Bad
355
+ // JSON is harmless here — we ignore it and the executor surfaces
356
+ // the actual parse error to the model.
357
+ if (call.name === 'write' || call.name === 'edit') {
358
+ const path = extractPathArg(call.arguments);
359
+ if (path)
360
+ pendingMutations.set(call.id, path);
361
+ }
362
+ emitStream({
363
+ type: 'tool.start',
364
+ callId: call.id,
365
+ name: call.name,
366
+ arguments: call.arguments,
367
+ });
368
+ emitStream({
122
369
  type: 'status',
123
- message: `turn ${turnIndex + 1}: model requested ${calls.length} tool call(s)`,
370
+ message: `tool_call: ${call.name}(${call.arguments.slice(0, 80)}${call.arguments.length > 80 ? '...' : ''})`,
124
371
  });
125
372
  appendSessionMirror(sessionEventsPath, {
126
- type: 'turn_complete',
127
- turn: turnIndex + 1,
128
- stop: 'tool_use',
129
- toolCalls: calls.length,
130
- tokensUsed: response.tokensUsed,
373
+ type: 'tool_call',
374
+ tool: call.name,
375
+ callId: call.id,
376
+ argsPreview: call.arguments.slice(0, 200),
131
377
  });
132
- }
133
- else if (response.stop === 'text') {
134
- buffer.push({
378
+ },
379
+ onToolResult: (call, result) => {
380
+ const auditId = this.engineToolCallIds.get(call.id);
381
+ if (auditId) {
382
+ if (result.ok) {
383
+ recordToolResult(session, auditId, 'success', result.content.slice(0, 200));
384
+ }
385
+ else {
386
+ recordToolResult(session, auditId, 'error', result.error.slice(0, 200));
387
+ }
388
+ this.engineToolCallIds.delete(call.id);
389
+ }
390
+ const pendingPath = pendingMutations.get(call.id);
391
+ if (pendingPath) {
392
+ if (result.ok)
393
+ filesChanged.add(pendingPath);
394
+ pendingMutations.delete(call.id);
395
+ }
396
+ emitStream({
397
+ type: 'tool.end',
398
+ callId: call.id,
399
+ ok: result.ok,
400
+ summary: result.ok
401
+ ? result.content.slice(0, 200)
402
+ : result.error.slice(0, 200),
403
+ });
404
+ emitStream({
135
405
  type: 'status',
136
- message: `turn ${turnIndex + 1}: model returned final text (${response.content.length} chars)`,
406
+ message: result.ok
407
+ ? `tool_result: ${call.name} ok`
408
+ : `tool_result: ${call.name} error: ${result.error.slice(0, 120)}`,
137
409
  });
138
410
  appendSessionMirror(sessionEventsPath, {
139
- type: 'turn_complete',
140
- turn: turnIndex + 1,
141
- stop: 'text',
142
- contentLength: response.content.length,
143
- tokensUsed: response.tokensUsed,
411
+ type: 'tool_result',
412
+ tool: call.name,
413
+ callId: call.id,
414
+ ok: result.ok,
415
+ summary: result.ok ? result.content.slice(0, 200) : result.error.slice(0, 200),
416
+ });
417
+ },
418
+ };
419
+ // β1b r1 (--allow-fetch / --allow-search wiring, 2026-05-26):
420
+ // compute the effective gate as OR of (a) the persisted
421
+ // settings.json opt-in and (b) the runtime CLI flag passed via
422
+ // the constructor. Before this fix the adapter only honored (a),
423
+ // so `pugi code --allow-fetch` against a default-privacy workspace
424
+ // silently fell back to "tool not advertised" even though the
425
+ // operator opted in for one invocation. The CLI flag was wired
426
+ // through to the legacy `pugi web` sub-command but not to the
427
+ // engine adapter — Backend Architect review (PR #425 r1) caught
428
+ // the gap.
429
+ const allowFetchEffective = this.options.allowFetch === true || settings.web?.fetch?.enabled === true;
430
+ const allowSearchEffective = this.options.allowSearch === true || settings.web?.search?.enabled === true;
431
+ // β2 S3 (2026-05-26) → β2a r1 (Backend Architect P1, 2026-05-26):
432
+ // expose the `agent` tool to the parent loop ONLY for non-plan
433
+ // commands. `buildToolsSchema` also strips the agent tool from
434
+ // plan-mode schemas, but a model that fabricates an `agent` call
435
+ // would still hit the executor with `agentDispatch` wired and
436
+ // could spawn a coder that mutates the workspace — breaking the
437
+ // plan-mode read-only contract. Hard-gate `allowAgent` on the
438
+ // command kind so plan mode never wires the dispatch block in
439
+ // the first place; tool-bridge.ts also throws ToolRefused on a
440
+ // fabricated `agent` call in plan mode as defense in depth.
441
+ //
442
+ // Why only the top-level parent and not children: the dispatcher-
443
+ // real.ts module builds the CHILD's executor without an
444
+ // `agentDispatch` block so children cannot recursively spawn
445
+ // grandchildren. The isolation-matrix capability set then refuses
446
+ // the `agent` tool for every non-orchestrator role anyway, but
447
+ // the executor-level gate is the load-bearing chokepoint.
448
+ const allowAgent = kind !== 'plan';
449
+ // β3 streaming: kick off `runEngineLoop` IN PARALLEL with the queue
450
+ // drain. The loop's hook callbacks push events onto `streamQueue`
451
+ // synchronously; this generator yields them live by awaiting the
452
+ // queue's iterator. When the loop settles (success or crash) we
453
+ // close the queue, which lets the iterator return cleanly and the
454
+ // generator falls through to the terminal `result` frame.
455
+ //
456
+ // Why concurrent instead of serial:
457
+ //
458
+ // The β1 adapter awaited `runEngineLoop` to completion, then
459
+ // drained an in-memory `EngineEvent[]` buffer. Operator saw
460
+ // nothing for 30+ seconds (the full LLM round-trip + tool exec
461
+ // wall time), then the entire log dumped at once. The TUI tool-
462
+ // stream pane was a no-op because no event ever reached it
463
+ // before the loop completed.
464
+ //
465
+ // `Promise.race`-based interleaving lets us yield the next queue
466
+ // event OR detect loop settlement on each tick. The settlement
467
+ // flag (`loopSettled`) gates the final drain so we never miss
468
+ // tail events that the hooks pushed in the same microtask as
469
+ // the loop's terminal `return`.
470
+ // Boxed via single-element tuple so TypeScript does not narrow the
471
+ // outer `outcome` binding to `null` after the closure mutation.
472
+ // Async-closure mutations are invisible to TS control-flow analysis;
473
+ // wrapping in a tuple defeats the narrowing without an unsafe cast.
474
+ const outcomeBox = [null];
475
+ let loopError = null;
476
+ const loopPromise = (async () => {
477
+ try {
478
+ outcomeBox[0] = await runEngineLoop({
479
+ client: this.options.client,
480
+ executor: buildExecutor({
481
+ kind,
482
+ ctx: toolCtx,
483
+ sessionId: session.id,
484
+ workspaceRoot: root,
485
+ // P1 fix (deep audit 2026-05-26): forward optional REPL
486
+ // ask-modal bridge. Default `interactive: false` preserves
487
+ // backward compat — non-TTY callers (CI, pipes, scripted
488
+ // CLI runs) keep the `[user_input_required]` envelope path.
489
+ // The REPL layer passes `interactive: true` + a real
490
+ // `askUserBridge` so model-initiated `ask_user_question`
491
+ // calls round-trip to the ink modal and return the
492
+ // operator's choice as a tool result.
493
+ interactive: this.options.interactive === true,
494
+ ...(this.options.askUserBridge
495
+ ? { askUserBridge: this.options.askUserBridge }
496
+ : {}),
497
+ // P1 fix (deep audit 2026-05-26): forward the workspace
498
+ // HookRegistry so `.pugi/hooks/` lifecycle hooks fire for
499
+ // model-initiated tool calls. SECURITY: a `PreToolUse
500
+ // onFailure: 'block'` hook that refuses bash containing
501
+ // `rm` now applies to model dispatch — before this fix
502
+ // such a hook only applied to direct CLI tool calls.
503
+ ...(this.options.hooks ? { hooks: this.options.hooks } : {}),
504
+ // β1a r1 (web_fetch gating) + β1b r1 (--allow-fetch wiring):
505
+ // executor allowFetch matches the schema-advertise gate so a
506
+ // settings.json opt-in OR a --allow-fetch flag enables the
507
+ // call. Without this the model would not even see the
508
+ // `web_fetch` tool. `allowSearch` covers the new T4
509
+ // `web_search` tool with the same OR semantics.
510
+ allowFetch: allowFetchEffective,
511
+ allowSearch: allowSearchEffective,
512
+ // β2 S3 → β2a r1 (2026-05-26): parent-level agentDispatch
513
+ // wiring. When the model emits a `tool_call: agent(role,
514
+ // brief)`, the executor forwards it to dispatcher-real.ts
515
+ // which spawns a child engine loop against the same Anvil
516
+ // client. Gated by `allowAgent` so plan mode does not even
517
+ // wire the dispatch block — defense in depth on top of the
518
+ // schema-filter and the tool-bridge plan-mode refusal.
519
+ ...(allowAgent
520
+ ? {
521
+ agentDispatch: {
522
+ parentSession: session,
523
+ engineClient: this.options.client,
524
+ },
525
+ }
526
+ : {}),
527
+ // β4 M1/M3/M5: pass the loaded MCP registry through so the
528
+ // executor can route `mcp__server__tool` calls + run the
529
+ // first-call permission prompt before dispatching upstream.
530
+ ...(this.options.mcpRegistry ? { mcpRegistry: this.options.mcpRegistry } : {}),
531
+ ...(this.options.mcpPrompt ? { mcpPrompt: this.options.mcpPrompt } : {}),
532
+ // α7 L11 (2026-05-27): per-`run()` denial tracker. Every
533
+ // refusal sentinel (PLAN_MODE_REFUSED, HOOK_BLOCKED,
534
+ // OPERATOR_ABORTED, STALE_READ, unknown-tool, plan-mode
535
+ // agent) is fingerprinted by (toolName, sha256(canonical
536
+ // args)) so the model's next-turn reminder surfaces the
537
+ // pattern instead of re-issuing the same refused call.
538
+ denialTracking,
539
+ }),
540
+ systemPrompt: systemPromptFor(kind),
541
+ // β5a R5+R6+P1: per-turn `<context>` prefix + intent marker
542
+ // applied above. Falls back to verbatim `task.prompt` when
543
+ // both the prefix block is empty AND the intent classifier
544
+ // returned ambiguous (the splice + apply functions handle
545
+ // that case as identity).
546
+ userPrompt: finalUserPrompt,
547
+ // β1a r1 (web_fetch gating) + β1b r1 (--allow-fetch wiring):
548
+ // pass the OR of `.pugi/settings.json::web.fetch.enabled` and
549
+ // the runtime `--allow-fetch` flag. When neither is true the
550
+ // `web_fetch` tool is not advertised to the model at all.
551
+ // `allowSearch` does the same for the new `web_search` tool.
552
+ // β2 S3: allowAgent surfaces the `agent` tool in the schema
553
+ // so the model sees it as a valid tool call option; the
554
+ // capability-matrix layer (S4) still gates which roles can
555
+ // actually USE it. Plan mode strips it via β2a r1 gate.
556
+ tools: buildToolsSchema(kind, {
557
+ allowFetch: allowFetchEffective,
558
+ allowSearch: allowSearchEffective,
559
+ allowAgent,
560
+ // β4 M1/M3: same registry the executor saw. Schema +
561
+ // dispatcher must agree on which MCP names are advertised
562
+ // and which are dispatchable; passing identical references
563
+ // makes that invariant impossible to break.
564
+ ...(this.options.mcpRegistry ? { mcpRegistry: this.options.mcpRegistry } : {}),
565
+ }),
566
+ budget,
567
+ personaSlug: personaSlugFor(kind),
568
+ hooks,
569
+ temperature: this.options.temperature ?? 0.2,
570
+ signal: ctx.signal,
571
+ // β1 (audit E2): forward CLI sub-command + α6.10 routing tag +
572
+ // operator-pinned model so the runtime controller's DTO sees
573
+ // all three. `tag` derives 1:1 from `command` for now
574
+ // (`code → code`, `build → build_task`, etc.); future routing
575
+ // changes flip the mapping table without touching the call
576
+ // site. `model` is left undefined here — operator-pinned model
577
+ // pinning ships in β6 with persona routing.
578
+ command: kind,
579
+ tag: dispatchTagFor(kind),
580
+ model: this.options.model,
144
581
  });
145
582
  }
146
- },
147
- onToolCall: (call) => {
148
- // Record under an `engine_tool` prefix so the audit log can
149
- // distinguish loop-driven calls from direct CLI tool calls.
150
- const id = recordToolCall(session, `engine:${call.name}`, call.arguments.slice(0, 200));
151
- // Stash the audit id on the call for `onToolResult` to close.
152
- this.engineToolCallIds.set(call.id, id);
153
- // Extract a candidate path for write/edit so we can build the
154
- // filesChanged summary if (and only if) the call succeeds. Bad
155
- // JSON is harmless here — we ignore it and the executor surfaces
156
- // the actual parse error to the model.
157
- if (call.name === 'write' || call.name === 'edit') {
158
- const path = extractPathArg(call.arguments);
159
- if (path)
160
- pendingMutations.set(call.id, path);
161
- }
162
- buffer.push({
163
- type: 'status',
164
- message: `tool_call: ${call.name}(${call.arguments.slice(0, 80)}${call.arguments.length > 80 ? '...' : ''})`,
165
- });
166
- appendSessionMirror(sessionEventsPath, {
167
- type: 'tool_call',
168
- tool: call.name,
169
- callId: call.id,
170
- argsPreview: call.arguments.slice(0, 200),
171
- });
172
- },
173
- onToolResult: (call, result) => {
174
- const auditId = this.engineToolCallIds.get(call.id);
175
- if (auditId) {
176
- if (result.ok) {
177
- recordToolResult(session, auditId, 'success', result.content.slice(0, 200));
178
- }
179
- else {
180
- recordToolResult(session, auditId, 'error', result.error.slice(0, 200));
181
- }
182
- this.engineToolCallIds.delete(call.id);
583
+ catch (err) {
584
+ loopError = err;
183
585
  }
184
- const pendingPath = pendingMutations.get(call.id);
185
- if (pendingPath) {
186
- if (result.ok)
187
- filesChanged.add(pendingPath);
188
- pendingMutations.delete(call.id);
586
+ finally {
587
+ // Close the queue so the iterator below returns `done: true`.
588
+ // Any tail events the hooks pushed in the same microtask still
589
+ // drain because `AsyncEventQueue.close()` only resolves
590
+ // PENDING awaiters — buffered items stay readable.
591
+ streamQueue.close();
189
592
  }
190
- buffer.push({
191
- type: 'status',
192
- message: result.ok
193
- ? `tool_result: ${call.name} ok`
194
- : `tool_result: ${call.name} error: ${result.error.slice(0, 120)}`,
195
- });
196
- appendSessionMirror(sessionEventsPath, {
197
- type: 'tool_result',
198
- tool: call.name,
199
- callId: call.id,
200
- ok: result.ok,
201
- summary: result.ok ? result.content.slice(0, 200) : result.error.slice(0, 200),
202
- });
203
- },
204
- };
205
- let outcome;
206
- try {
207
- outcome = await runEngineLoop({
208
- client: this.options.client,
209
- executor: buildExecutor({ kind, ctx: toolCtx }),
210
- systemPrompt: systemPromptFor(kind),
211
- userPrompt: task.prompt,
212
- tools: buildToolsSchema(kind),
213
- budget,
214
- personaSlug: personaSlugFor(kind),
215
- hooks,
216
- temperature: this.options.temperature ?? 0.2,
217
- signal: ctx.signal,
593
+ })();
594
+ // Drain the queue live. Each iteration yields one EngineEvent the
595
+ // moment its hook fired. Operator sees `tool.start` within tens of
596
+ // ms of the model emitting it.
597
+ for await (const event of streamQueue) {
598
+ yield streamEventToEngineEvent(event);
599
+ }
600
+ // Loop has settled (queue closed). Surface its outcome — either an
601
+ // unhandled crash from the (rare) executor exception path or the
602
+ // structured EngineLoopOutcome.
603
+ await loopPromise;
604
+ if (loopError !== null) {
605
+ const message = loopError instanceof Error ? loopError.message : String(loopError);
606
+ yield {
607
+ type: 'result',
608
+ result: {
609
+ status: 'failed',
610
+ summary: `engine loop crashed: ${message}`,
611
+ filesChanged: [],
612
+ patchRefs: [],
613
+ testsRun: [],
614
+ risks: [`unhandled error in engine adapter: ${message}`],
615
+ eventRefs: [],
616
+ },
617
+ };
618
+ return;
619
+ }
620
+ const finalOutcome = outcomeBox[0];
621
+ if (finalOutcome === null) {
622
+ // Defensive — should never hit. `runEngineLoop` always either
623
+ // resolves with an outcome or throws (and we catch that above).
624
+ yield {
625
+ type: 'result',
626
+ result: {
627
+ status: 'failed',
628
+ summary: 'engine loop returned no outcome',
629
+ filesChanged: [],
630
+ patchRefs: [],
631
+ testsRun: [],
632
+ risks: ['runEngineLoop resolved without an outcome value'],
633
+ eventRefs: [],
634
+ },
635
+ };
636
+ return;
637
+ }
638
+ // Translate the loop outcome into an EngineResult.
639
+ // `aborted` (α6.9: operator cancelled mid-tool) maps to `blocked`
640
+ // because the operator chose the outcome, same shape as
641
+ // budget_exhausted / tool_refused.
642
+ const status = finalOutcome.status === 'completed'
643
+ ? 'done'
644
+ : finalOutcome.status === 'failed'
645
+ ? 'failed'
646
+ : 'blocked';
647
+ const summaryPrefix = finalOutcome.status === 'completed'
648
+ ? ''
649
+ : finalOutcome.status === 'budget_exhausted'
650
+ ? '[budget_exhausted] '
651
+ : finalOutcome.status === 'tool_refused'
652
+ ? '[plan_mode_refused] '
653
+ : finalOutcome.status === 'aborted'
654
+ ? '[operator_aborted] '
655
+ : '[failed] ';
656
+ const filesChangedList = Array.from(filesChanged).sort();
657
+ appendSessionMirror(sessionEventsPath, {
658
+ type: 'outcome',
659
+ status: finalOutcome.status,
660
+ toolCallCount: finalOutcome.toolCallCount,
661
+ turnsUsed: finalOutcome.turnsUsed,
662
+ tokensUsed: finalOutcome.tokensUsed,
663
+ filesChanged: filesChangedList,
664
+ reason: finalOutcome.reason,
218
665
  });
219
- }
220
- catch (error) {
221
- // Defensive — runEngineLoop wraps errors into status: failed, so
222
- // this branch is only hit if the executor or hooks themselves
223
- // throw uncaught. Surface as a failed result so the CLI exits
224
- // non-zero rather than hanging.
225
- const message = error instanceof Error ? error.message : String(error);
226
666
  yield {
227
667
  type: 'result',
228
668
  result: {
229
- status: 'failed',
230
- summary: `engine loop crashed: ${message}`,
231
- filesChanged: [],
669
+ status,
670
+ summary: `${summaryPrefix}${finalOutcome.finalText || finalOutcome.reason || 'no answer returned'}`,
671
+ filesChanged: filesChangedList,
232
672
  patchRefs: [],
233
673
  testsRun: [],
234
- risks: [`unhandled error in engine adapter: ${message}`],
235
- eventRefs: [],
674
+ risks: finalOutcome.status === 'completed'
675
+ ? []
676
+ : [finalOutcome.reason ?? `outcome=${finalOutcome.status}`],
677
+ eventRefs: [
678
+ `tool_calls=${finalOutcome.toolCallCount}`,
679
+ `turns=${finalOutcome.turnsUsed}`,
680
+ `tokens=${finalOutcome.tokensUsed}`,
681
+ // `outcome=<status>` is a machine-readable echo so callers
682
+ // (cli.ts plan exit code, cabinet UI) can distinguish
683
+ // `budget_exhausted` from `tool_refused` without parsing
684
+ // the human-readable summary prefix. Code Reviewer P2
685
+ // retro 2026-05-23: plan exit code previously collapsed
686
+ // both blocked reasons into 0, which masked budget hits.
687
+ `outcome=${finalOutcome.status}`,
688
+ `session=${session.id}`,
689
+ `ctx=${ctx.sessionId}`,
690
+ `mirror=${sessionEventsPath}`,
691
+ ],
236
692
  },
237
693
  };
238
- return;
239
694
  }
240
- // Drain status buffer first so consumers see the chronological order.
241
- for (const event of buffer)
242
- yield event;
243
- // Translate the loop outcome into an EngineResult.
244
- // `aborted` (α6.9: operator cancelled mid-tool) maps to `blocked`
245
- // because the operator chose the outcome, same shape as
246
- // budget_exhausted / tool_refused.
247
- const status = outcome.status === 'completed'
248
- ? 'done'
249
- : outcome.status === 'failed'
250
- ? 'failed'
251
- : 'blocked';
252
- const summaryPrefix = outcome.status === 'completed'
253
- ? ''
254
- : outcome.status === 'budget_exhausted'
255
- ? '[budget_exhausted] '
256
- : outcome.status === 'tool_refused'
257
- ? '[plan_mode_refused] '
258
- : outcome.status === 'aborted'
259
- ? '[operator_aborted] '
260
- : '[failed] ';
261
- const filesChangedList = Array.from(filesChanged).sort();
262
- appendSessionMirror(sessionEventsPath, {
263
- type: 'outcome',
264
- status: outcome.status,
265
- toolCallCount: outcome.toolCallCount,
266
- turnsUsed: outcome.turnsUsed,
267
- tokensUsed: outcome.tokensUsed,
268
- filesChanged: filesChangedList,
269
- reason: outcome.reason,
270
- });
271
- yield {
272
- type: 'result',
273
- result: {
274
- status,
275
- summary: `${summaryPrefix}${outcome.finalText || outcome.reason || 'no answer returned'}`,
276
- filesChanged: filesChangedList,
277
- patchRefs: [],
278
- testsRun: [],
279
- risks: outcome.status === 'completed'
280
- ? []
281
- : [outcome.reason ?? `outcome=${outcome.status}`],
282
- eventRefs: [
283
- `tool_calls=${outcome.toolCallCount}`,
284
- `turns=${outcome.turnsUsed}`,
285
- `tokens=${outcome.tokensUsed}`,
286
- // `outcome=<status>` is a machine-readable echo so callers
287
- // (cli.ts plan exit code, cabinet UI) can distinguish
288
- // `budget_exhausted` from `tool_refused` without parsing
289
- // the human-readable summary prefix. Code Reviewer P2
290
- // retro 2026-05-23: plan exit code previously collapsed
291
- // both blocked reasons into 0, which masked budget hits.
292
- `outcome=${outcome.status}`,
293
- `session=${session.id}`,
294
- `ctx=${ctx.sessionId}`,
295
- `mirror=${sessionEventsPath}`,
296
- ],
297
- },
298
- };
695
+ finally {
696
+ // r2 (triple-review 2026-05-26 P1): detach the abort listener so
697
+ // long REPL sessions sharing one AbortController across many
698
+ // run() invocations do not accumulate one listener per run on
699
+ // `ctx.signal`. Called on success, abort, and uncaught throw.
700
+ detachAbortListener?.();
701
+ }
702
+ }
703
+ }
704
+ /**
705
+ * β3 streaming: translate one typed `EngineStreamEvent` from the
706
+ * adapter's internal queue into the SDK's lossier `EngineEvent` shape
707
+ * the public adapter contract exposes. The SDK contract only declares
708
+ * `status | result` today; richer events (`tool.start`, `thinking.delta`,
709
+ * etc.) collapse to a structured `status` message until the SDK widens
710
+ * the discriminated union (β3b — paired with an admin-api SSE schema
711
+ * bump so the wire format stays stable).
712
+ *
713
+ * The full typed payload is still available to richer consumers via
714
+ * `adapter.streamEmitter.on('event', ...)`. The CLI's TUI tool-stream
715
+ * pane consumes that emitter directly; this function is the safe
716
+ * bridge for legacy SDK consumers that only know `EngineEvent`.
717
+ */
718
+ function streamEventToEngineEvent(stream) {
719
+ switch (stream.type) {
720
+ case 'status':
721
+ return { type: 'status', message: stream.message };
722
+ case 'tool.start':
723
+ return {
724
+ type: 'status',
725
+ message: `tool.start ${stream.name} call=${stream.callId} args=${stream.arguments.slice(0, 80)}${stream.arguments.length > 80 ? '...' : ''}`,
726
+ };
727
+ case 'tool.delta':
728
+ return {
729
+ type: 'status',
730
+ message: `tool.delta call=${stream.callId} chunk=${stream.chunk.slice(0, 120)}`,
731
+ };
732
+ case 'tool.end':
733
+ return {
734
+ type: 'status',
735
+ message: `tool.end call=${stream.callId} ok=${stream.ok} summary=${stream.summary.slice(0, 120)}`,
736
+ };
737
+ case 'thinking.start':
738
+ return { type: 'status', message: `thinking.start block=${stream.blockId}` };
739
+ case 'thinking.delta':
740
+ return {
741
+ type: 'status',
742
+ message: `thinking.delta block=${stream.blockId} chunk=${stream.chunk.slice(0, 120)}`,
743
+ };
744
+ case 'thinking.end':
745
+ return { type: 'status', message: `thinking.end block=${stream.blockId}` };
746
+ case 'text.delta':
747
+ return {
748
+ type: 'status',
749
+ message: `text.delta chunk=${stream.chunk.slice(0, 200)}`,
750
+ };
751
+ default: {
752
+ // Exhaustiveness — TS catches a missing variant at compile time.
753
+ const exhaustive = stream;
754
+ void exhaustive;
755
+ return { type: 'status', message: 'unknown stream event' };
756
+ }
299
757
  }
300
758
  }
301
759
  /**
@@ -367,8 +825,84 @@ function toCommandKind(kind) {
367
825
  return 'build';
368
826
  return kind;
369
827
  }
828
+ /**
829
+ * β1 (audit E2) → β1a r1 (engine tag contract fix, 2026-05-26): map a
830
+ * CLI command kind to its α6.10 dispatch tag.
831
+ *
832
+ * The admin-api controller (`pugi-engine.controller.ts`) routes per-tag
833
+ * to a model/persona pair via
834
+ * `apps/admin-api/src/mira/routing/dispatch-tag.ts::DISPATCH_TAGS`. The
835
+ * closed `EngineChatTag` vocabulary is
836
+ * `classify | reason | codegen | summarize | vision` — note that
837
+ * `code`, `fix`, `plan`, `build`, `explain` (CLI command names) are NOT
838
+ * in this set.
839
+ *
840
+ * Before this fix `dispatchTagFor()` returned the CLI command names
841
+ * as-is and the runtime DTO rejected the payload with HTTP 400
842
+ * (`tag must be one of: classify, reason, codegen, summarize, vision`)
843
+ * before ever reaching the routing layer. Every `pugi code/fix/plan/
844
+ * build/explain` against the live runtime returned `failed: HTTP 400`.
845
+ *
846
+ * Mapping rationale (each row keeps the most informative `tag` value
847
+ * for cost telemetry / model selection):
848
+ *
849
+ * - `code`, `fix` → `codegen` (edits / diffs / patches)
850
+ * - `build_task`/`build` → `codegen` + `budget_hint: 'max'`
851
+ * (scaffolding hits the 30-call / 80k-token ceiling — give the
852
+ * router permission to pick the largest model in the tier)
853
+ * - `plan` → `reason` (no mutations, long-form thought)
854
+ * - `explain` → `summarize` (read-only walkthrough)
855
+ *
856
+ * `priority: 'realtime'` for every command — Pugi is an interactive
857
+ * CLI; background dispatch is reserved for the cabinet's RAG ingest
858
+ * cron path. `budget_hint: 'std'` is the default for the cost-balanced
859
+ * router row; only `build_task` opts up to `'max'`.
860
+ */
861
+ export function dispatchTagFor(kind) {
862
+ switch (kind) {
863
+ case 'code':
864
+ case 'fix':
865
+ return { tag: 'codegen', priority: 'realtime', budget_hint: 'std' };
866
+ case 'build':
867
+ // `build_task` on the engine task kind side is the heavy
868
+ // scaffolding lane — biggest budget envelope, biggest model
869
+ // permitted via `budget_hint: 'max'`.
870
+ return { tag: 'codegen', priority: 'realtime', budget_hint: 'max' };
871
+ case 'plan':
872
+ return { tag: 'reason', priority: 'realtime', budget_hint: 'std' };
873
+ case 'explain':
874
+ return { tag: 'summarize', priority: 'realtime', budget_hint: 'std' };
875
+ default: {
876
+ // Exhaustiveness check — `EngineCommandKind` is a closed union,
877
+ // so the switch above covers every case. If a new command kind
878
+ // is added the compiler flags this branch and the map must be
879
+ // extended. Fall back to `reason` as the most conservative
880
+ // routing choice so a future kind addition cannot accidentally
881
+ // unlock a write-heavy model lane.
882
+ const exhaustive = kind;
883
+ void exhaustive;
884
+ return { tag: 'reason', priority: 'realtime', budget_hint: 'std' };
885
+ }
886
+ }
887
+ }
370
888
  // The per-adapter `engineToolCallIds` Map lives on the
371
889
  // `NativePugiEngineAdapter` instance above — Code Reviewer P2 retro
372
890
  // 2026-05-23 lifted it off the module scope to prevent collisions
373
891
  // under parallel adapter runs (cabinet UI + CLI sharing one process).
892
+ /**
893
+ * β5a R5+R6: render a cwd path as either a workspace-root-relative
894
+ * string (when cwd is inside the workspace) or a `.` token (when cwd
895
+ * equals workspaceRoot). Falls back to the absolute cwd if it lives
896
+ * outside the workspace — the traverse loader already refuses to
897
+ * read off-tree files so the abs path is purely a breadcrumb for
898
+ * the SSE status line.
899
+ */
900
+ function relativeOrAbsolute(workspaceRoot, cwd) {
901
+ const absRoot = resolve(workspaceRoot);
902
+ const absCwd = resolve(cwd);
903
+ if (absCwd === absRoot)
904
+ return '.';
905
+ const rel = absCwd.startsWith(absRoot + '/') ? absCwd.slice(absRoot.length + 1) : null;
906
+ return rel ?? absCwd;
907
+ }
374
908
  //# sourceMappingURL=native-pugi.js.map