@pugi/cli 0.1.0-beta.10 → 0.1.0-beta.100

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (445) hide show
  1. package/CHANGELOG.md +132 -0
  2. package/LICENSE +1 -1
  3. package/README.md +53 -11
  4. package/assets/pugi-prozr2-mascot.ansi +9 -0
  5. package/bin/run.js +33 -1
  6. package/dist/commands/deploy.js +40 -40
  7. package/dist/commands/flatten.js +191 -0
  8. package/dist/commands/jobs-watch.js +201 -0
  9. package/dist/commands/jobs.js +42 -27
  10. package/dist/commands/retro.js +210 -0
  11. package/dist/commands/smoke.js +133 -0
  12. package/dist/core/agent-progress/cleanup.js +134 -0
  13. package/dist/core/agent-progress/schema.js +144 -0
  14. package/dist/core/agent-progress/writer.js +101 -0
  15. package/dist/core/agents/adaptive-router.js +330 -0
  16. package/dist/core/agents/query-decomposer.js +297 -0
  17. package/dist/core/agents/registry.js +3 -3
  18. package/dist/core/approvals/shortcut-resolver.js +98 -0
  19. package/dist/core/artifact-chain/dispatcher.js +148 -0
  20. package/dist/core/artifact-chain/exporter.js +164 -0
  21. package/dist/core/artifact-chain/state.js +243 -0
  22. package/dist/core/artifact-chain/steps.js +169 -0
  23. package/dist/core/ask-user/question.js +92 -0
  24. package/dist/core/audit/audit-trail.js +275 -0
  25. package/dist/core/auth/ensure-authenticated.js +129 -0
  26. package/dist/core/auth/env-provider.js +238 -0
  27. package/dist/core/auto-open-browser.js +4 -4
  28. package/dist/core/auto-update/channels.js +122 -0
  29. package/dist/core/auto-update/checker.js +241 -0
  30. package/dist/core/auto-update/state.js +235 -0
  31. package/dist/core/bare-mode/index.js +107 -0
  32. package/dist/core/bash/redirect.js +281 -0
  33. package/dist/core/bash-classifier.js +436 -40
  34. package/dist/core/checkpoint/resumer.js +149 -0
  35. package/dist/core/checkpoint/rewinder.js +291 -0
  36. package/dist/core/checkpoints/shadow-git.js +670 -0
  37. package/dist/core/citations/parser.js +109 -0
  38. package/dist/core/classifier/yolo-classifier.js +88 -0
  39. package/dist/core/codegraph/db.js +506 -0
  40. package/dist/core/codegraph/decision-store.js +248 -0
  41. package/dist/core/codegraph/detect-repo.js +459 -0
  42. package/dist/core/codegraph/install.js +134 -0
  43. package/dist/core/codegraph/offer-hook.js +220 -0
  44. package/dist/core/codegraph/parser.js +71 -0
  45. package/dist/core/codegraph/types.js +34 -0
  46. package/dist/core/compact/auto-trigger.js +96 -0
  47. package/dist/core/compact/buffer-rewriter.js +115 -0
  48. package/dist/core/compact/summarizer.js +208 -0
  49. package/dist/core/compact/token-counter.js +108 -0
  50. package/dist/core/consensus/anvil-fanout.js +25 -25
  51. package/dist/core/consensus/diff-capture.js +121 -12
  52. package/dist/core/consensus/rubric.js +21 -21
  53. package/dist/core/context/builder.js +6 -6
  54. package/dist/core/context/compaction-events.js +8 -8
  55. package/dist/core/context/compaction.js +31 -31
  56. package/dist/core/context/index.js +15 -8
  57. package/dist/core/context/invariants.js +51 -51
  58. package/dist/core/context/markdown-loader.js +28 -10
  59. package/dist/core/context/markdown-traverse.js +255 -0
  60. package/dist/core/context/pugiignore.js +41 -41
  61. package/dist/core/context/repo-skeleton.js +37 -37
  62. package/dist/core/context/tool-eviction.js +55 -0
  63. package/dist/core/context/watcher.js +32 -32
  64. package/dist/core/context/working-set.js +23 -23
  65. package/dist/core/coordinator/agent-tools.js +77 -0
  66. package/dist/core/coordinator/agent-toolset.js +65 -0
  67. package/dist/core/coordinator/fsm.js +73 -0
  68. package/dist/core/coordinator/mode-fsm.js +70 -0
  69. package/dist/core/cost/rate-card.js +129 -0
  70. package/dist/core/cost/tracker.js +221 -0
  71. package/dist/core/credentials.js +13 -13
  72. package/dist/core/cron/scheduler.js +138 -0
  73. package/dist/core/denial-tracking/index.js +8 -0
  74. package/dist/core/denial-tracking/state.js +264 -0
  75. package/dist/core/diagnostics/probe-runner.js +93 -0
  76. package/dist/core/diagnostics/probes/api.js +46 -0
  77. package/dist/core/diagnostics/probes/auth.js +93 -0
  78. package/dist/core/diagnostics/probes/bare-mode.js +42 -0
  79. package/dist/core/diagnostics/probes/cli-version.js +127 -0
  80. package/dist/core/diagnostics/probes/config.js +72 -0
  81. package/dist/core/diagnostics/probes/denial-tracking.js +57 -0
  82. package/dist/core/diagnostics/probes/disk.js +81 -0
  83. package/dist/core/diagnostics/probes/engine-live.js +46 -0
  84. package/dist/core/diagnostics/probes/git.js +65 -0
  85. package/dist/core/diagnostics/probes/hooks.js +118 -0
  86. package/dist/core/diagnostics/probes/mcp.js +75 -0
  87. package/dist/core/diagnostics/probes/node.js +59 -0
  88. package/dist/core/diagnostics/probes/pnpm.js +36 -0
  89. package/dist/core/diagnostics/probes/pugi-md.js +89 -0
  90. package/dist/core/diagnostics/probes/sandbox.js +72 -0
  91. package/dist/core/diagnostics/probes/session.js +74 -0
  92. package/dist/core/diagnostics/probes/status-snapshot.js +488 -0
  93. package/dist/core/diagnostics/probes/workspace.js +63 -0
  94. package/dist/core/diagnostics/types.js +70 -0
  95. package/dist/core/dispatch/cache-cleanup.js +197 -0
  96. package/dist/core/dispatch/cache-handoff.js +295 -0
  97. package/dist/core/edits/apply-patch-layer-e.js +189 -0
  98. package/dist/core/edits/dispatch.js +333 -7
  99. package/dist/core/edits/format-detector.js +260 -0
  100. package/dist/core/edits/format-matrix.js +26 -0
  101. package/dist/core/edits/fuzzy-ladder.js +650 -0
  102. package/dist/core/edits/index.js +5 -1
  103. package/dist/core/edits/journal.js +199 -0
  104. package/dist/core/edits/layer-a-apply.js +15 -15
  105. package/dist/core/edits/layer-a-fuzzy-apply.js +198 -0
  106. package/dist/core/edits/layer-b-apply.js +9 -9
  107. package/dist/core/edits/layer-c-apply.js +6 -6
  108. package/dist/core/edits/layer-d-ast.js +557 -14
  109. package/dist/core/edits/marker-parser.js +12 -12
  110. package/dist/core/edits/security-gate.js +27 -27
  111. package/dist/core/edits/verify-hook.js +273 -0
  112. package/dist/core/edits/worktree.js +29 -29
  113. package/dist/core/engine/anvil-client.js +214 -26
  114. package/dist/core/engine/auto-compact.js +247 -0
  115. package/dist/core/engine/budgets.js +220 -0
  116. package/dist/core/engine/compact-llm-summarizer.js +124 -0
  117. package/dist/core/engine/context-prefix.js +155 -0
  118. package/dist/core/engine/index.js +1 -1
  119. package/dist/core/engine/intensity.js +163 -0
  120. package/dist/core/engine/intent.js +260 -0
  121. package/dist/core/engine/native-pugi.js +1559 -227
  122. package/dist/core/engine/prompts.js +187 -19
  123. package/dist/core/engine/strip-internal-fields.js +124 -0
  124. package/dist/core/engine/tool-bridge.js +1887 -59
  125. package/dist/core/engine/verification-patterns.js +195 -0
  126. package/dist/core/evaluation/golden-dataset.js +293 -0
  127. package/dist/core/feedback/queue.js +177 -0
  128. package/dist/core/feedback/submitter.js +145 -0
  129. package/dist/core/file-cache.js +113 -1
  130. package/dist/core/flatten/flatten-repo.js +439 -0
  131. package/dist/core/format/osc8-link.js +28 -0
  132. package/dist/core/hook-chains.js +392 -0
  133. package/dist/core/hooks/citation-verify-hook.js +138 -0
  134. package/dist/core/hooks/citation-verify.js +112 -0
  135. package/dist/core/hooks/events.js +46 -0
  136. package/dist/core/hooks/index.js +15 -0
  137. package/dist/core/hooks/registry.js +216 -0
  138. package/dist/core/hooks/runner.js +236 -0
  139. package/dist/core/hooks/v2/event-emitter.js +115 -0
  140. package/dist/core/hooks/v2/executor.js +282 -0
  141. package/dist/core/hooks/v2/index.js +25 -0
  142. package/dist/core/hooks/v2/lifecycle.js +104 -0
  143. package/dist/core/hooks/v2/loader.js +216 -0
  144. package/dist/core/hooks/v2/matcher.js +125 -0
  145. package/dist/core/hooks/v2/trust.js +143 -0
  146. package/dist/core/hooks/v2/types.js +86 -0
  147. package/dist/core/hooks/worktree-events.js +158 -0
  148. package/dist/core/image/renderer.js +71 -0
  149. package/dist/core/init/detector.js +582 -0
  150. package/dist/core/init/template-renderer.js +242 -0
  151. package/dist/core/jobs/registry.js +18 -18
  152. package/dist/core/ledger/results-tsv.js +142 -0
  153. package/dist/core/log-discipline/stdout-redirect.js +51 -0
  154. package/dist/core/lsp/cache.js +105 -0
  155. package/dist/core/lsp/client.js +551 -41
  156. package/dist/core/lsp/language-detect.js +66 -0
  157. package/dist/core/lsp/post-edit-diagnostics.js +171 -0
  158. package/dist/core/lsp/server-detect.js +173 -0
  159. package/dist/core/lsp/symbol-cache.js +162 -0
  160. package/dist/core/lsp/symbol-tools.js +664 -0
  161. package/dist/core/mcp/client.js +97 -28
  162. package/dist/core/mcp/http-server.js +553 -0
  163. package/dist/core/mcp/orchestrator-config.js +192 -0
  164. package/dist/core/mcp/orchestrator-tools.js +806 -0
  165. package/dist/core/mcp/permission.js +190 -0
  166. package/dist/core/mcp/registry.js +39 -17
  167. package/dist/core/mcp/server-tools.js +219 -0
  168. package/dist/core/mcp/server.js +397 -0
  169. package/dist/core/mcp/trust.js +10 -10
  170. package/dist/core/memory/dual-write.js +416 -0
  171. package/dist/core/memory/passive-extract.js +130 -0
  172. package/dist/core/memory/phase1-kinds.js +20 -0
  173. package/dist/core/memory/secret-scanner.js +304 -0
  174. package/dist/core/memory-sync/queue.js +170 -0
  175. package/dist/core/metrics/extract.js +113 -0
  176. package/dist/core/modes/roo-modes.js +68 -0
  177. package/dist/core/notes/notes-paths.js +113 -0
  178. package/dist/core/notes/notes-recorder.js +140 -0
  179. package/dist/core/notes/notes-writer.js +53 -0
  180. package/dist/core/notes/renderers.js +0 -0
  181. package/dist/core/notes/slug.js +105 -0
  182. package/dist/core/onboarding/ensure-initialized.js +133 -0
  183. package/dist/core/onboarding/marker.js +111 -0
  184. package/dist/core/onboarding/telemetry-state.js +108 -0
  185. package/dist/core/output-style/presets.js +176 -0
  186. package/dist/core/output-style/state.js +185 -0
  187. package/dist/core/path-security.js +287 -5
  188. package/dist/core/permission.js +82 -22
  189. package/dist/core/permissions/auto-classifier.js +124 -0
  190. package/dist/core/permissions/bash-parser.js +371 -0
  191. package/dist/core/permissions/circuit-breaker.js +83 -0
  192. package/dist/core/permissions/constrained-edit.js +91 -0
  193. package/dist/core/permissions/gate.js +278 -0
  194. package/dist/core/permissions/index.js +20 -0
  195. package/dist/core/permissions/mode.js +174 -0
  196. package/dist/core/permissions/network-egress.js +137 -0
  197. package/dist/core/permissions/state.js +241 -0
  198. package/dist/core/permissions/tool-class.js +107 -0
  199. package/dist/core/plan-mode/ui-state.js +51 -0
  200. package/dist/core/plans/plan-artifact.js +721 -0
  201. package/dist/core/policy-limits/etag-store.js +122 -0
  202. package/dist/core/prd-check/parser.js +215 -0
  203. package/dist/core/prd-check/reporter.js +127 -0
  204. package/dist/core/prd-check/session-review.js +557 -0
  205. package/dist/core/prd-check/verifiers.js +223 -0
  206. package/dist/core/prompt-cache/client-cache.js +99 -0
  207. package/dist/core/prompts/assembly.js +29 -0
  208. package/dist/core/prompts/registry.js +364 -0
  209. package/dist/core/pugi-gitignore.js +52 -0
  210. package/dist/core/pugi-md/cc-compat-rules.js +735 -0
  211. package/dist/core/pugi-md/context-injector.js +76 -0
  212. package/dist/core/pugi-md/walk-up.js +207 -0
  213. package/dist/core/python/uv-installer.js +270 -0
  214. package/dist/core/python/uv-resolver.js +83 -0
  215. package/dist/core/rate-limit/narrator.js +146 -0
  216. package/dist/core/recipes/cli-types.js +20 -0
  217. package/dist/core/recipes/loader.js +103 -0
  218. package/dist/core/recipes/runner.js +345 -0
  219. package/dist/core/recipes/schema.js +587 -0
  220. package/dist/core/release-notes/parser.js +241 -0
  221. package/dist/core/release-notes/state.js +116 -0
  222. package/dist/core/repl/ask.js +37 -37
  223. package/dist/core/repl/cancellation.js +26 -26
  224. package/dist/core/repl/cap-warning.js +4 -4
  225. package/dist/core/repl/clipboard-read.js +11 -11
  226. package/dist/core/repl/dispatch-fsm.js +12 -12
  227. package/dist/core/repl/engine-bridge.js +303 -0
  228. package/dist/core/repl/history-search.js +15 -15
  229. package/dist/core/repl/history.js +28 -18
  230. package/dist/core/repl/kill-ring.js +5 -5
  231. package/dist/core/repl/model-pricing.js +135 -0
  232. package/dist/core/repl/privacy-banner.js +22 -22
  233. package/dist/core/repl/session.js +2690 -229
  234. package/dist/core/repl/slash-commands.js +540 -41
  235. package/dist/core/repl/store/index.js +1 -1
  236. package/dist/core/repl/store/jsonl-log.js +22 -22
  237. package/dist/core/repl/store/lockfile.js +10 -10
  238. package/dist/core/repl/store/session-store.js +136 -107
  239. package/dist/core/repl/store/types.js +15 -15
  240. package/dist/core/repl/store/uuid-v7.js +12 -12
  241. package/dist/core/repl/tool-route.js +382 -0
  242. package/dist/core/repl/workspace-context.js +43 -21
  243. package/dist/core/repo-map/build.js +125 -0
  244. package/dist/core/repo-map/cache.js +185 -0
  245. package/dist/core/repo-map/extractor.js +254 -0
  246. package/dist/core/repo-map/formatter.js +145 -0
  247. package/dist/core/repo-map/page-rank.js +105 -0
  248. package/dist/core/repo-map/scanner.js +211 -0
  249. package/dist/core/retro/git-collector.js +251 -0
  250. package/dist/core/retro/health-card.js +25 -0
  251. package/dist/core/retro/metrics.js +342 -0
  252. package/dist/core/retro/narrative.js +249 -0
  253. package/dist/core/retro/plane-collector.js +274 -0
  254. package/dist/core/retro/pr-issue-link.js +65 -0
  255. package/dist/core/retro/types.js +16 -0
  256. package/dist/core/retry-budget/budget.js +284 -0
  257. package/dist/core/retry-budget/index.js +5 -0
  258. package/dist/core/retry-budget/retry-cap.js +74 -0
  259. package/dist/core/routing/lead-worker.js +43 -0
  260. package/dist/core/routing/pre-flight-estimator.js +108 -0
  261. package/dist/core/runs/run-tree.js +103 -0
  262. package/dist/core/sandboxing/adapter.js +29 -0
  263. package/dist/core/sandboxing/index.js +49 -0
  264. package/dist/core/sandboxing/none.js +19 -0
  265. package/dist/core/sandboxing/seatbelt.js +183 -0
  266. package/dist/core/security/injection-scanner.js +367 -0
  267. package/dist/core/security/output-filter.js +418 -0
  268. package/dist/core/session/env-file.js +105 -0
  269. package/dist/core/session/section-budgets.js +140 -0
  270. package/dist/core/session.js +119 -0
  271. package/dist/core/settings.js +378 -5
  272. package/dist/core/share/formatter.js +271 -0
  273. package/dist/core/share/redactor.js +221 -0
  274. package/dist/core/share/uploader.js +267 -0
  275. package/dist/core/skills/defaults.js +30 -30
  276. package/dist/core/skills/loader.js +22 -22
  277. package/dist/core/skills/sources.js +27 -27
  278. package/dist/core/smoke/headless-driver.js +174 -0
  279. package/dist/core/smoke/orchestrator.js +194 -0
  280. package/dist/core/smoke/runner.js +238 -0
  281. package/dist/core/smoke/scenario-parser.js +316 -0
  282. package/dist/core/statusline.js +99 -0
  283. package/dist/core/subagents/dispatcher-real.js +600 -0
  284. package/dist/core/subagents/dispatcher.js +146 -52
  285. package/dist/core/subagents/index.js +19 -6
  286. package/dist/core/subagents/isolation-matrix.js +213 -0
  287. package/dist/core/subagents/spawn.js +19 -4
  288. package/dist/core/telemetry/emitter.js +229 -0
  289. package/dist/core/telemetry/queue.js +251 -0
  290. package/dist/core/theme/context.js +91 -0
  291. package/dist/core/theme/presets.js +228 -0
  292. package/dist/core/theme/state.js +181 -0
  293. package/dist/core/todos/invariant.js +10 -0
  294. package/dist/core/todos/state.js +177 -0
  295. package/dist/core/tool-schema/compressor.js +89 -0
  296. package/dist/core/transport/version-interceptor.js +166 -0
  297. package/dist/core/trust.js +2 -2
  298. package/dist/core/tui/thinking-block.js +64 -0
  299. package/dist/core/vim/keymap.js +288 -0
  300. package/dist/core/vim/state.js +92 -0
  301. package/dist/core/watch-markers/marker-watcher.js +133 -0
  302. package/dist/core/worktree/include-parser.js +249 -0
  303. package/dist/core/worktree-manager/cleanup.js +123 -0
  304. package/dist/core/worktree-manager/manager.js +303 -0
  305. package/dist/index.js +36 -0
  306. package/dist/runtime/bootstrap.js +190 -0
  307. package/dist/runtime/cli.js +4345 -561
  308. package/dist/runtime/commands/agents.js +31 -31
  309. package/dist/runtime/commands/budget.js +5 -5
  310. package/dist/runtime/commands/cancel.js +231 -0
  311. package/dist/runtime/commands/chain.js +489 -0
  312. package/dist/runtime/commands/codegraph-status.js +227 -0
  313. package/dist/runtime/commands/compact.js +297 -0
  314. package/dist/runtime/commands/config.js +74 -40
  315. package/dist/runtime/commands/cost.js +199 -0
  316. package/dist/runtime/commands/delegate.js +27 -4
  317. package/dist/runtime/commands/dispatch.js +126 -0
  318. package/dist/runtime/commands/doctor.js +579 -0
  319. package/dist/runtime/commands/feedback.js +184 -0
  320. package/dist/runtime/commands/hooks.js +187 -0
  321. package/dist/runtime/commands/index-cmd.js +353 -0
  322. package/dist/runtime/commands/init.js +254 -0
  323. package/dist/runtime/commands/lsp.js +200 -38
  324. package/dist/runtime/commands/mcp.js +935 -0
  325. package/dist/runtime/commands/memory.js +582 -0
  326. package/dist/runtime/commands/model.js +237 -0
  327. package/dist/runtime/commands/onboarding.js +275 -0
  328. package/dist/runtime/commands/patch.js +12 -12
  329. package/dist/runtime/commands/permissions.js +112 -0
  330. package/dist/runtime/commands/plan.js +143 -0
  331. package/dist/runtime/commands/prd-check.js +285 -0
  332. package/dist/runtime/commands/privacy.js +17 -17
  333. package/dist/runtime/commands/recipe.js +325 -0
  334. package/dist/runtime/commands/redo-blob-store.js +92 -0
  335. package/dist/runtime/commands/redo.js +361 -0
  336. package/dist/runtime/commands/release-notes.js +229 -0
  337. package/dist/runtime/commands/repo-map.js +95 -0
  338. package/dist/runtime/commands/report.js +299 -0
  339. package/dist/runtime/commands/resume.js +118 -0
  340. package/dist/runtime/commands/review-consensus.js +68 -53
  341. package/dist/runtime/commands/rewind.js +333 -0
  342. package/dist/runtime/commands/roster.js +14 -14
  343. package/dist/runtime/commands/servers.js +236 -0
  344. package/dist/runtime/commands/sessions.js +163 -0
  345. package/dist/runtime/commands/share.js +316 -0
  346. package/dist/runtime/commands/skills.js +31 -31
  347. package/dist/runtime/commands/status.js +186 -0
  348. package/dist/runtime/commands/stickers.js +82 -0
  349. package/dist/runtime/commands/style.js +194 -0
  350. package/dist/runtime/commands/theme.js +196 -0
  351. package/dist/runtime/commands/undo.js +54 -22
  352. package/dist/runtime/commands/update.js +289 -0
  353. package/dist/runtime/commands/vim.js +140 -0
  354. package/dist/runtime/commands/worktree.js +8 -8
  355. package/dist/runtime/commands/worktrees.js +155 -0
  356. package/dist/runtime/deprecation-warning.js +69 -0
  357. package/dist/runtime/engine-exit-code.js +50 -0
  358. package/dist/runtime/headless-repl.js +195 -0
  359. package/dist/runtime/headless.js +548 -0
  360. package/dist/runtime/load-hooks-or-exit.js +71 -0
  361. package/dist/runtime/plan-decompose.js +22 -22
  362. package/dist/runtime/sigint-guard.js +272 -0
  363. package/dist/runtime/stream-renderer.js +195 -0
  364. package/dist/runtime/update-check.js +28 -28
  365. package/dist/runtime/version.js +65 -0
  366. package/dist/runtime/worktree-bootstrap.js +579 -0
  367. package/dist/skills/bundled/batch.js +617 -0
  368. package/dist/skills/bundled/index.js +45 -0
  369. package/dist/skills/bundled/loop.js +358 -0
  370. package/dist/skills/bundled/remember.js +383 -0
  371. package/dist/skills/bundled/simplify.js +289 -0
  372. package/dist/skills/bundled/skillify.js +373 -0
  373. package/dist/skills/bundled/stuck.js +558 -0
  374. package/dist/skills/bundled/verify.js +439 -0
  375. package/dist/testing/vcr.js +486 -0
  376. package/dist/tools/agent-tool.js +229 -0
  377. package/dist/tools/apply-patch.js +89 -28
  378. package/dist/tools/ask-user-question.js +337 -0
  379. package/dist/tools/ask-user.js +115 -0
  380. package/dist/tools/bash.js +624 -46
  381. package/dist/tools/brief.js +224 -0
  382. package/dist/tools/cron.js +433 -0
  383. package/dist/tools/enter-worktree.js +250 -0
  384. package/dist/tools/exit-worktree.js +147 -0
  385. package/dist/tools/file-tools.js +161 -44
  386. package/dist/tools/http-request.js +336 -0
  387. package/dist/tools/lsp-tools.js +377 -1
  388. package/dist/tools/mcp-tool.js +260 -0
  389. package/dist/tools/multi-edit.js +361 -0
  390. package/dist/tools/powershell.js +268 -0
  391. package/dist/tools/registry.js +120 -5
  392. package/dist/tools/server-tools.js +892 -0
  393. package/dist/tools/skill-tool.js +96 -0
  394. package/dist/tools/sleep.js +99 -0
  395. package/dist/tools/synthetic-output.js +133 -0
  396. package/dist/tools/tasks.js +208 -0
  397. package/dist/tools/todo-write.js +184 -0
  398. package/dist/tools/verify-plan-execution.js +295 -0
  399. package/dist/tools/web-fetch-injection-scanner.js +207 -0
  400. package/dist/tools/web-fetch.js +195 -10
  401. package/dist/tools/web-search.js +458 -0
  402. package/dist/tui/agent-progress-card.js +111 -0
  403. package/dist/tui/agent-tree.js +22 -1
  404. package/dist/tui/ask-modal.js +14 -14
  405. package/dist/tui/ask-user-question-chips.js +315 -0
  406. package/dist/tui/ask-user-question-prompt.js +203 -0
  407. package/dist/tui/compact-banner.js +81 -0
  408. package/dist/tui/conversation-pane.js +85 -11
  409. package/dist/tui/cost-table.js +111 -0
  410. package/dist/tui/device-flow.js +2 -2
  411. package/dist/tui/doctor-table.js +46 -0
  412. package/dist/tui/feedback-prompt.js +156 -0
  413. package/dist/tui/input-box.js +247 -32
  414. package/dist/tui/login-picker.js +3 -3
  415. package/dist/tui/markdown-render.js +6 -6
  416. package/dist/tui/multi-file-diff-approval.js +375 -0
  417. package/dist/tui/onboarding-wizard.js +240 -0
  418. package/dist/tui/permissions-picker.js +86 -0
  419. package/dist/tui/render.js +36 -1
  420. package/dist/tui/repl-render.js +239 -25
  421. package/dist/tui/repl-splash-art.js +16 -16
  422. package/dist/tui/repl-splash-mascot.js +48 -24
  423. package/dist/tui/repl-splash.js +22 -22
  424. package/dist/tui/repl.js +125 -45
  425. package/dist/tui/slash-palette.js +6 -6
  426. package/dist/tui/splash.js +2 -2
  427. package/dist/tui/status-bar.js +109 -31
  428. package/dist/tui/status-table.js +7 -0
  429. package/dist/tui/stickers-art.js +136 -0
  430. package/dist/tui/style-table.js +28 -0
  431. package/dist/tui/theme-table.js +29 -0
  432. package/dist/tui/thinking-spinner.js +123 -0
  433. package/dist/tui/tool-stream-pane.js +53 -4
  434. package/dist/tui/update-banner.js +27 -2
  435. package/dist/tui/vim-input.js +267 -0
  436. package/dist/tui/welcome-banner.js +107 -0
  437. package/dist/tui/welcome-data.js +293 -0
  438. package/dist/tui/workspace-context.js +2 -2
  439. package/package.json +21 -5
  440. package/test/scenarios/codegen-create-file.scenario.txt +13 -0
  441. package/test/scenarios/compact-force.scenario.txt +12 -0
  442. package/test/scenarios/identity.scenario.txt +11 -0
  443. package/test/scenarios/persona-handoff.scenario.txt +12 -0
  444. package/test/scenarios/walkback.scenario.txt +12 -0
  445. package/dist/core/engine/compaction-hook.js +0 -154
@@ -1,25 +1,56 @@
1
1
  import { appendFileSync, existsSync, mkdirSync } from 'node:fs';
2
+ import { randomUUID } from 'node:crypto';
2
3
  import { resolve } from 'node:path';
3
- import { defaultEngineBudgets, runEngineLoop, } from '@pugi/sdk';
4
+ import { AsyncEventQueue, EngineEventEmitter, modelSupportsThinking, runEngineLoop, splitThinkingBlocks, } from '@pugi/sdk';
4
5
  import { FileReadCache } from '../file-cache.js';
5
6
  import { loadSettings } from '../settings.js';
6
7
  import { openSession, recordToolCall, recordToolResult } from '../session.js';
8
+ import { REGRESSION_DISPUTE_PHRASES } from './verification-patterns.js';
9
+ import { prewarmRealDispatch } from '../subagents/dispatcher.js';
10
+ import { resolveAutoCompactConfig, resolveBudget } from './budgets.js';
11
+ import { maybeCompactAsync } from './auto-compact.js';
12
+ import { writeAuditEvent } from '../audit/audit-trail.js';
13
+ import { recordSessionNotes } from '../notes/notes-recorder.js';
7
14
  import { buildExecutor, buildToolsSchema } from './tool-bridge.js';
8
15
  import { personaSlugFor, systemPromptFor } from './prompts.js';
16
+ import { CancellationToken } from '../repl/cancellation.js';
17
+ import { fireTaskCompletedChain } from '../hook-chains.js';
18
+ // β5a R5+R6 + P1 : per-turn `<context>` prefix + intent
19
+ // classifier marker. Both pure functions, no fs cost at adapter init.
20
+ // Per-dir markdown traverse fires once per `run()`; budget capped so
21
+ // it never dominates the prompt budget.
22
+ import { buildContextPrefix, spliceContextPrefix } from './context-prefix.js';
23
+ import { applyIntentMarker, classifyIntent } from './intent.js';
24
+ import { loadTraversedMarkdown } from '../context/markdown-traverse.js';
25
+ import { isBareMode } from '../bare-mode/index.js';
26
+ import { walkUpPugiMd } from '../pugi-md/walk-up.js';
27
+ import { renderAmbientContext } from '../pugi-md/context-injector.js';
28
+ // Backlog : `@import` + `paths:` glob loader.
29
+ // Runs over each `HierarchyFile` the walker returns to expand imports
30
+ // (capped + cycle-safe) and capture per-rule `paths:` frontmatter. The
31
+ // loader is pure-fs so it cannot break the engine loop — any failure
32
+ // degrades to "no expansion for this file" and the un-expanded walker
33
+ // body is used as the rule body.
34
+ import { loadRulesFile } from '../pugi-md/cc-compat-rules.js';
35
+ import { homedir as osHomedir } from 'node:os';
36
+ // L11 : per-session DenialTrackingState. One instance
37
+ // per `run()` so denials cluster by (tool, args) within the same
38
+ // command but do NOT leak across CLI invocations.
39
+ import { DenialTrackingState } from '../denial-tracking/state.js';
9
40
  /**
10
41
  * Real `NativePugiEngineAdapter`. Drives the Pugi CLI's tool-use loop:
11
42
  *
12
- * 1. Pick a system prompt + persona based on the task kind
13
- * (code/explain/fix/plan/build).
14
- * 2. Build an OpenAI-shaped tools schema from the local tool registry,
15
- * gated by plan-mode (read-only).
16
- * 3. Open a workspace tool context (settings, session, read cache).
17
- * 4. Drive `runEngineLoop` against an `EngineLoopClient` until the
18
- * model returns a final text answer or the per-command budget is
19
- * exhausted.
20
- * 5. Surface every turn / tool call into both the engine event stream
21
- * (consumer-visible status events) and the existing session log
22
- * (`.pugi/events.jsonl`) so audit replay sees every step.
43
+ * 1. Pick a system prompt + persona based on the task kind
44
+ * (code/explain/fix/plan/build).
45
+ * 2. Build an OpenAI-shaped tools schema from the local tool registry,
46
+ * gated by plan-mode (read-only).
47
+ * 3. Open a workspace tool context (settings, session, read cache).
48
+ * 4. Drive `runEngineLoop` against an `EngineLoopClient` until the
49
+ * model returns a final text answer or the per-command budget is
50
+ * exhausted.
51
+ * 5. Surface every turn / tool call into both the engine event stream
52
+ * (consumer-visible status events) and the existing session log
53
+ * (`.pugi/events.jsonl`) so audit replay sees every step.
23
54
  *
24
55
  * The adapter is intentionally transport-agnostic. `client` is required
25
56
  * at construction; the CLI builds an `AnvilEngineLoopClient` from the
@@ -28,12 +59,12 @@ import { personaSlugFor, systemPromptFor } from './prompts.js';
28
59
  * up so unit tests can construct the adapter with an in-memory client.
29
60
  *
30
61
  * The engine task → loop mapping:
31
- * - `task.kind === 'build_task'` is mapped to the `build` command.
32
- * - `task.prompt` is the user message.
33
- * - `task.workspaceRoot` pins the workspace root for tool execution.
34
- * - `task.permissionMode` is read by the existing permission module;
35
- * the adapter itself only enforces the plan-mode tool gate which is
36
- * keyed on `kind`, not on permissionMode.
62
+ * - `task.kind === 'build_task'` is mapped to the `build` command.
63
+ * - `task.prompt` is the user message.
64
+ * - `task.workspaceRoot` pins the workspace root for tool execution.
65
+ * - `task.permissionMode` is read by the existing permission module;
66
+ * the adapter itself only enforces the plan-mode tool gate which is
67
+ * keyed on `kind`, not on permissionMode.
37
68
  */
38
69
  export class NativePugiEngineAdapter {
39
70
  options;
@@ -41,7 +72,7 @@ export class NativePugiEngineAdapter {
41
72
  /**
42
73
  * Per-adapter scratch map: links the loop's tool_call id to the
43
74
  * audit record id returned by `recordToolCall`. Code Reviewer P2
44
- * retro 2026-05-23 moved this off the module scope — two adapters
75
+ * retro moved this off the module scope — two adapters
45
76
  * driven concurrently (cabinet UI + CLI on the same process) would
46
77
  * otherwise share the same Map and a fast turn from adapter A
47
78
  * could `.delete()` an entry that belonged to adapter B before its
@@ -50,8 +81,30 @@ export class NativePugiEngineAdapter {
50
81
  * to a single `run()` invocation.
51
82
  */
52
83
  engineToolCallIds = new Map();
84
+ /**
85
+ * β3 streaming additive: optional typed event emitter that mirrors
86
+ * every async-queue event so external consumers (admin-api SSE
87
+ * controller, future cabinet WebSocket relay) can attach without
88
+ * holding the async iterator. The CLI itself only consumes the
89
+ * `AsyncIterable<EngineEvent>` returned by `run()`; the emitter is
90
+ * a fan-out point for additional subscribers.
91
+ */
92
+ streamEmitter = new EngineEventEmitter();
53
93
  constructor(options) {
54
94
  this.options = options;
95
+ // β2a r1 (Backend Architect P1): kick off the real
96
+ // dispatcher's module import at adapter init so the first
97
+ // `agent` tool call does not pay 50-200ms cold-start. We fire
98
+ // the promise without awaiting — by the time the engine loop
99
+ // runs and the model issues an `agent` call, the import has
100
+ // resolved. The promise is swallowed because a failed prewarm
101
+ // would surface again at dispatch time with the real error.
102
+ void prewarmRealDispatch().catch(() => {
103
+ // Intentional no-op: the actual dispatch call will surface
104
+ // the import failure (if any) with the right call stack. A
105
+ // prewarm-time failure is just a missed optimization, not a
106
+ // correctness issue.
107
+ });
55
108
  }
56
109
  async capabilities() {
57
110
  return {
@@ -59,7 +112,13 @@ export class NativePugiEngineAdapter {
59
112
  supportsFileEdits: true,
60
113
  supportsShell: true,
61
114
  supportsLsp: false,
62
- supportsSubagents: false,
115
+ // β2 S2 : real subagent dispatch shipped via the
116
+ // `agent` tool (apps/pugi-cli/src/tools/agent-tool.ts) plus the
117
+ // genuine `runEngineLoop`-backed dispatcher
118
+ // (apps/pugi-cli/src/core/subagents/dispatcher-real.ts). The
119
+ // capability flag flips after S1 + S3 + S4 land so cabinet UI +
120
+ // remote orchestrators can rely on the advertised contract.
121
+ supportsSubagents: true,
63
122
  };
64
123
  }
65
124
  async *run(task, ctx) {
@@ -67,235 +126,1077 @@ export class NativePugiEngineAdapter {
67
126
  const root = task.workspaceRoot;
68
127
  const session = this.options.session ?? openSession(root);
69
128
  const settings = loadSettings(root);
70
- const toolCtx = {
71
- root,
72
- settings,
73
- session,
74
- readCache: new FileReadCache(),
75
- };
76
- const budget = task.budget?.tokens
77
- ? {
78
- maxTokens: task.budget.tokens,
79
- // The task-level budget only carries tokens; tool calls keep
80
- // the per-command default so a careless caller cannot disable
81
- // the call-count guard by overriding usd/tokens.
82
- maxToolCalls: defaultEngineBudgets[kind].maxToolCalls,
129
+ // P1 fix (deep audit): wire ctx.signal (AbortSignal) into
130
+ // a CancellationToken so the tool-bridge cancellation gate
131
+ // (`ctx.cancellation?.isAborted` check at tool-bridge.ts:656 +
132
+ // file-tools `gateOnCancellation` calls) fires when the operator
133
+ // aborts mid-tool. Before this fix `toolCtx` carried no cancellation
134
+ // field — only the next runEngineLoop iteration via `ctx.signal`
135
+ // aborted at the turn boundary, so a long-running tool (a sleeping
136
+ // bash command, a slow grep across the repo) could not be cancelled
137
+ // mid-call.
138
+ //
139
+ // The token is wired one-way: ctx.signal -> token. Aborting the
140
+ // token directly does NOT propagate back to the AbortSignal; the
141
+ // engine's own cancellation already lives upstream via the signal
142
+ // so the back-edge is unnecessary.
143
+ //
144
+ // r2 fix (triple-review P1): the abort listener was
145
+ // registered with `{ once: true }` — on actual abort it auto-detaches
146
+ // and disappears, but on the (common) NON-abort path where `run()`
147
+ // completes cleanly the listener stays attached to `ctx.signal`
148
+ // forever. Over a long REPL session (one shared AbortController per
149
+ // session, many run() invocations) listeners accumulate one per
150
+ // run, leaking memory and CPU on `dispatchEvent`. We now track the
151
+ // detach handle and call it unconditionally in the run()'s finally
152
+ // block so cleanup happens on both the success and abort paths.
153
+ const cancellation = new CancellationToken();
154
+ let detachAbortListener;
155
+ if (ctx.signal) {
156
+ if (ctx.signal.aborted) {
157
+ cancellation.abort();
158
+ }
159
+ else {
160
+ const handler = () => cancellation.abort();
161
+ ctx.signal.addEventListener('abort', handler, { once: true });
162
+ detachAbortListener = () => {
163
+ ctx.signal.removeEventListener('abort', handler);
164
+ };
165
+ }
166
+ }
167
+ // r2 (triple-review P1): everything below runs inside a
168
+ // try/finally so the AbortSignal listener detaches on BOTH the
169
+ // success and abort paths. Without this wrap a long REPL session
170
+ // (one persistent AbortController, many run() invocations) leaked
171
+ // one abort listener per non-aborted run.
172
+ //
173
+ // #24 (CEO P1) — TaskCompleted chain. We
174
+ // capture `taskStartedAt` BEFORE the try block so the duration
175
+ // measured by the chain payload covers the full dispatch wall
176
+ // time (including the abort-listener wiring above). The
177
+ // `fireTaskCompletedOnce` guard ensures the chain fires at most
178
+ // once per `run()` invocation even when multiple `yield result`
179
+ // sites are reached (defensive — the existing flow yields exactly
180
+ // one result, but a future code path that yields twice would
181
+ // double-fire otherwise).
182
+ const taskStartedAt = Date.now();
183
+ let taskCompletedFired = false;
184
+ const fireTaskCompletedOnce = async (exitCode, toolCalls, filesChangedList) => {
185
+ if (taskCompletedFired)
186
+ return;
187
+ taskCompletedFired = true;
188
+ try {
189
+ await fireTaskCompletedChain(root, {
190
+ command: kind,
191
+ exitCode,
192
+ durationMs: Date.now() - taskStartedAt,
193
+ toolCalls,
194
+ filesChanged: [...filesChangedList],
195
+ });
196
+ }
197
+ catch (chainError) {
198
+ process.stderr.write(`[pugi hook-chains] TaskCompleted chain crashed: ${chainError.message}\n`);
83
199
  }
84
- : defaultEngineBudgets[kind];
85
- yield {
86
- type: 'status',
87
- message: `Pugi engine starting: kind=${kind} budget=${budget.maxToolCalls} calls / ${budget.maxTokens} tokens`,
88
200
  };
89
- // Buffer status events emitted from inside the loop hooks. Async
90
- // generators cannot yield from synchronous callbacks, so we collect
91
- // them in a queue and drain after the loop call completes. The loop
92
- // is short enough (≤ ~30 turns) that latency-to-stdout is acceptable
93
- // — a follow-up PR can switch to an event emitter for true streaming.
94
- const buffer = [];
95
- // Track files mutated by the loop. We extract the path from the JSON
96
- // arguments of every successful write/edit tool call; `bash` is left
97
- // out because its filesystem footprint is opaque (a single command
98
- // can touch dozens of paths via `make`, `pnpm build`, etc). The
99
- // per-session events.jsonl already carries every file_mutation event
100
- // for replay; this set is only the headline summary the CLI prints.
101
- const filesChanged = new Set();
102
- // Pending lookup: call.id path extracted from arguments. We only
103
- // commit to `filesChanged` when the corresponding onToolResult fires
104
- // with `ok: true`, so a refused or failed edit does not surface as
105
- // a phantom change in the operator summary.
106
- const pendingMutations = new Map();
107
- // Per-session events mirror `.pugi/sessions/<id>/events.jsonl`.
108
- // The existing global log at `.pugi/events.jsonl` is preserved as
109
- // the audit-replay source of truth; this mirror is the easy-to-find
110
- // per-run log for operators and the cabinet UI (Sprint 2B).
111
- const sessionEventsPath = openSessionMirror(root, session.id);
112
- const hooks = {
113
- onTurnStart: (turnIndex, messageCount) => {
114
- const msg = `turn ${turnIndex + 1}: requesting model (transcript=${messageCount} messages)`;
115
- buffer.push({ type: 'status', message: msg });
116
- appendSessionMirror(sessionEventsPath, { type: 'turn_start', turn: turnIndex + 1, transcript: messageCount });
117
- },
118
- onTurnComplete: (turnIndex, response) => {
119
- if (response.stop === 'tool_use') {
120
- const calls = response.assistantMessage.toolCalls ?? [];
121
- buffer.push({
122
- type: 'status',
123
- message: `turn ${turnIndex + 1}: model requested ${calls.length} tool call(s)`,
124
- });
125
- appendSessionMirror(sessionEventsPath, {
126
- type: 'turn_complete',
127
- turn: turnIndex + 1,
128
- stop: 'tool_use',
129
- toolCalls: calls.length,
130
- tokensUsed: response.tokensUsed,
131
- });
201
+ try {
202
+ const toolCtx = {
203
+ root,
204
+ settings,
205
+ session,
206
+ readCache: new FileReadCache(),
207
+ cancellation,
208
+ };
209
+ // L11 : instantiate per-`run()` denial tracker. The
210
+ // executor records every refusal (PLAN_MODE_REFUSED, HOOK_BLOCKED,
211
+ // OPERATOR_ABORTED, STALE_READ, unknown-tool, plan-mode agent) and
212
+ // the user-prompt assembler below splices a compact reminder when
213
+ // the same (tool, args) pair has been denied twice or more. The
214
+ // tracker is in-memory only the audit ledger at
215
+ // `.pugi/events.jsonl` already captures the full per-event log for
216
+ // forensic replay; this surface is the model-facing aggregate.
217
+ const denialTracking = new DenialTrackingState();
218
+ // β1a r1 (budget wiring): swap the legacy SDK per-
219
+ // command budget lookup for the Pl9 `resolveBudget()` pipeline so
220
+ // `.pugi/settings.json::budgets.<command>` overrides actually take
221
+ // effect at runtime + the HARD_MAX_* caps guard misconfigured
222
+ // envelopes pre-flight. Before this fix the β1 Pl9 module
223
+ // (`core/engine/budgets.ts`) was dead code — the adapter still
224
+ // read the per-command defaults from the SDK, so operators who
225
+ // set `budgets.code.maxTokens = 50000` in settings.json got the
226
+ // legacy 30k anyway and `assertBudgetWithinTier` never ran.
227
+ //
228
+ // Task-level token override (e.g. CLI `--max-tokens`) keeps
229
+ // precedence; tool-call ceiling falls through to the resolved
230
+ // budget so a careless caller cannot disable the call-count
231
+ // guard by setting only token count.
232
+ //
233
+ // Triple-review P1 follow-up : forward `task.budget.turns`
234
+ // through the resolver so `EngineBudget.maxTurns` actually lands on
235
+ // the SDK's `runEngineLoop`. The CLI seam packs both `--max-turns`
236
+ // (explicit operator override) and the intensity profile's per-tier
237
+ // cap into this field with explicit-flag-wins precedence.
238
+ const taskBudgetOverride = {};
239
+ if (task.budget?.tokens)
240
+ taskBudgetOverride.maxTokens = task.budget.tokens;
241
+ if (task.budget?.turns !== undefined)
242
+ taskBudgetOverride.maxTurns = task.budget.turns;
243
+ const budget = resolveBudget(kind, settings, Object.keys(taskBudgetOverride).length > 0 ? taskBudgetOverride : undefined);
244
+ // CEO P1 #14 (auto-compact): resolve the per-workspace
245
+ // override of the 75% threshold gate. Default is `{ enabled: true,
246
+ // thresholdRatio: 0.75 }`; operators kill it via
247
+ // `.pugi/settings.json::autoCompact.enabled = false` или retune the
248
+ // ratio. The resolved config is captured by the closure that
249
+ // `runEngineLoop` invokes pre-send on every turn.
250
+ //
251
+ // PR E (2026-06-05): `summaryMode` defaults to `'llm'` so the
252
+ // sentinel is a narrative produced by Gemini Flash via Anvil rather
253
+ // than a deterministic counts line. The summarizer below is bound
254
+ // to the same engine client the loop uses for tool turns, so the
255
+ // summary call shares auth + transport with the rest of the loop.
256
+ const autoCompactConfig = resolveAutoCompactConfig(settings);
257
+ const summaryLlmCall = autoCompactConfig.summaryMode === 'llm'
258
+ ? buildSummarizerCall(this.options.client, personaSlugFor(kind))
259
+ : null;
260
+ // β3 streaming: pre-build the typed stream event queue so the hook
261
+ // callbacks below can push live events that this async generator
262
+ // yields IMMEDIATELY (instead of buffering until `runEngineLoop`
263
+ // completes). Operator now sees the first `tool.start` within
264
+ // ~tens of ms of the model emitting it, not 30+ s after the loop
265
+ // settles.
266
+ const streamQueue = new AsyncEventQueue();
267
+ const emitter = this.streamEmitter;
268
+ const supportsThinking = modelSupportsThinking(this.options.model);
269
+ /**
270
+ * Push one typed stream event into BOTH the per-run async queue
271
+ * (the CLI's iterator) and the long-lived emitter (the multiplex
272
+ * fan-out for admin-api SSE / cabinet WebSocket subscribers).
273
+ * The function stamps `timestamp` once so both consumers see the
274
+ * same wall clock.
275
+ */
276
+ const emitStream = (event) => {
277
+ const stamped = {
278
+ ...event,
279
+ timestamp: new Date().toISOString(),
280
+ };
281
+ streamQueue.push(stamped);
282
+ emitter.emit('event', stamped);
283
+ };
284
+ // r1 fix per triple-review Backend Architect P1: unify yield path via
285
+ // emitStream + streamQueue drain so the iterator consumer does NOT
286
+ // see this status frame twice. Pre-fix did both bare yield + emitStream
287
+ // → iterator got 2 copies, emitter got 1.
288
+ emitStream({
289
+ type: 'status',
290
+ message: `Pugi engine starting: kind=${kind} budget=${budget.maxToolCalls} calls / ${budget.maxTokens} tokens`,
291
+ });
292
+ // #21 : emit `dispatch_start` to the
293
+ // tenant-wide audit trail at `~/.pugi/audit/<tenant>/<slug>-<hash>
294
+ // .jsonl`. Append-only, never throws — a misconfigured audit
295
+ // surface must not block a dispatch. The per-session mirror under
296
+ // `.pugi/sessions/<id>/events.jsonl` remains as a redundant copy.
297
+ // PR F (2026-06-05): capture dispatch start wall-clock so the
298
+ // notes recorder can compute durationSeconds at dispatch end.
299
+ const dispatchStartedAt = new Date().toISOString();
300
+ writeAuditEvent({
301
+ event: 'dispatch_start',
302
+ sessionId: session.id,
303
+ workspaceRoot: root,
304
+ data: {
305
+ kind,
306
+ promptLength: task.prompt.length,
307
+ maxToolCalls: budget.maxToolCalls,
308
+ maxTokens: budget.maxTokens,
309
+ model: this.options.model ?? null,
310
+ },
311
+ });
312
+ // β5a R1+R4+R5+R6+P1 : build the per-turn `<context>`
313
+ // prefix and apply the intent marker so the model sees:
314
+ // 1. cwd + open-files + per-dir-conventions block (R5+R6)
315
+ // 2. a `<intent kind="definitional">` wrapper when the operator
316
+ // asked a knowledge question (P1) — fixes the "What is grep?
317
+ // → bash man grep" loss mode flagged by the .X eval.
318
+ //
319
+ // All caps enforced inside the builders (5 KB block + 50 entries
320
+ // + top-3 markdown). Worst-case prompt growth is ~5 KB, well
321
+ // inside any per-command token budget.
322
+ //
323
+ // cwd is sourced from `process.cwd()` — the operator's shell pwd
324
+ // when they invoked `pugi`. For non-REPL CLI paths this is
325
+ // accurate; the REPL session retains the launch cwd for the
326
+ // lifetime of the session which is what the operator expects.
327
+ const cwdForTraverse = process.cwd();
328
+ // cwd → homedir walk-up that picks up every
329
+ // ambient `PUGI.md` (or `CLAUDE.md` as a fallback) the operator
330
+ // has placed above their workspace. This is the cross-project
331
+ // hierarchy walk — distinct from the workspace-bounded
332
+ // `loadTraversedMarkdown` below which only sees files INSIDE the
333
+ // workspace root. Render the concatenation once at session boot
334
+ // and prepend to the system prompt so the model treats the
335
+ // operator's personal guidance as ambient context for the whole
336
+ // session. `--bare` () skips this walk entirely.
337
+ let ambientContextBlock = '';
338
+ if (!isBareMode()) {
339
+ try {
340
+ const hierarchy = walkUpPugiMd(cwdForTraverse);
341
+ // Backlog : expand `@import` directives and
342
+ // capture `paths:` frontmatter for each ambient file. The
343
+ // walker already returned the raw bodies; the loader replaces
344
+ // each body with its `@import`-expanded variant + appends any
345
+ // imported children at the same hierarchy level. Failures are
346
+ // localised per-file so one malformed `~/CLAUDE.md` cannot
347
+ // break the rest of the chain.
348
+ const expanded = await expandHierarchyWithImports(hierarchy, cwdForTraverse);
349
+ ambientContextBlock = renderAmbientContext(expanded);
132
350
  }
133
- else if (response.stop === 'text') {
134
- buffer.push({
135
- type: 'status',
136
- message: `turn ${turnIndex + 1}: model returned final text (${response.content.length} chars)`,
351
+ catch {
352
+ // Pure FS surface — if it throws (programmer error in the
353
+ // walker, not a per-file fs error which is already swallowed
354
+ // inside) we drop ambient context for this session rather
355
+ // than crashing the engine loop. Doctor probe still surfaces
356
+ // the hierarchy state for operator triage.
357
+ ambientContextBlock = '';
358
+ }
359
+ }
360
+ // AST-light repo-map injection. We build a
361
+ // compact `## Repo map` block (capped at the formatter's default
362
+ // 8 KB ≈ 2K tokens) from the workspace source tree + splice it
363
+ // onto the system prompt alongside the ambient PUGI.md block.
364
+ // `--bare` skips this exactly like the PUGI.md walk — the engine
365
+ // sees nothing the operator did not explicitly hand it. The build
366
+ // is deferred к `setImmediate` semantics by being a sync call
367
+ // AFTER the boot probes; the cost is one stat per source file
368
+ // (the cache catches mtime-unchanged files и skips re-extraction).
369
+ // Failures are swallowed: repo-map is enrichment, never a gate.
370
+ let repoMapBlock = '';
371
+ if (!isBareMode()) {
372
+ try {
373
+ const { buildAndFormatRepoMap } = await import('../repo-map/build.js');
374
+ const verdict = buildAndFormatRepoMap({
375
+ root,
376
+ // Boot path is best-effort: never refresh during engine boot
377
+ // (the operator can `pugi repo-map --refresh` manually). The
378
+ // cache freshness check catches every realistic edit pattern
379
+ // and avoids walking the tree on every engine invocation.
380
+ refresh: false,
381
+ // Persist the cache so the next boot reuses extracts. Engine
382
+ // boot runs on every command, so missing the persist would
383
+ // hot-loop the extractor on each invocation.
384
+ writeCache: true,
385
+ // Omit the formatter's section header — the system prompt
386
+ // already structures the ambient blocks, и a second `##`
387
+ // would fragment the prompt cache на a model-by-model basis.
388
+ omitHeader: false,
137
389
  });
138
- appendSessionMirror(sessionEventsPath, {
139
- type: 'turn_complete',
140
- turn: turnIndex + 1,
141
- stop: 'text',
142
- contentLength: response.content.length,
143
- tokensUsed: response.tokensUsed,
390
+ if (verdict.build.ok && verdict.format && verdict.format.bytes > 0) {
391
+ repoMapBlock = verdict.format.text;
392
+ }
393
+ }
394
+ catch {
395
+ // Any failure in the repo-map pipeline drops the block. The
396
+ // engine continues without enrichment — the failure mode is
397
+ // identical to the cold-boot path before L28 landed.
398
+ repoMapBlock = '';
399
+ }
400
+ }
401
+ let traverseResult;
402
+ // `--bare` skips the parent-dir PUGI.md /
403
+ // AGENTS.md / CLAUDE.md / GEMINI.md walk-up. The engine sees only
404
+ // the operator's prompt + working-set + intent marker, with no
405
+ // ambient project context injection. Mirrors the standard tool's
406
+ // --bare semantics.
407
+ if (isBareMode()) {
408
+ traverseResult = { loaded: [], warnings: [], totalBytes: 0 };
409
+ }
410
+ else {
411
+ try {
412
+ traverseResult = await loadTraversedMarkdown({
413
+ cwd: cwdForTraverse,
414
+ workspaceRoot: root,
144
415
  });
145
416
  }
146
- },
147
- onToolCall: (call) => {
148
- // Record under an `engine_tool` prefix so the audit log can
149
- // distinguish loop-driven calls from direct CLI tool calls.
150
- const id = recordToolCall(session, `engine:${call.name}`, call.arguments.slice(0, 200));
151
- // Stash the audit id on the call for `onToolResult` to close.
152
- this.engineToolCallIds.set(call.id, id);
153
- // Extract a candidate path for write/edit so we can build the
154
- // filesChanged summary if (and only if) the call succeeds. Bad
155
- // JSON is harmless here — we ignore it and the executor surfaces
156
- // the actual parse error to the model.
157
- if (call.name === 'write' || call.name === 'edit') {
158
- const path = extractPathArg(call.arguments);
159
- if (path)
160
- pendingMutations.set(call.id, path);
417
+ catch {
418
+ // Per-dir markdown is a NICE-TO-HAVE; a fs error here must
419
+ // never break the engine loop. Fall back to an empty result
420
+ // so the prefix block still surfaces cwd + working set.
421
+ traverseResult = { loaded: [], warnings: [], totalBytes: 0 };
161
422
  }
162
- buffer.push({
423
+ }
424
+ const intentClassification = classifyIntent(task.prompt);
425
+ const intentHint = intentClassification.intent !== 'ambiguous' ? intentClassification.intent : undefined;
426
+ const cwdRelative = relativeOrAbsolute(root, cwdForTraverse);
427
+ const prefix = buildContextPrefix({
428
+ cwdRelative,
429
+ // β5a defers wiring the live WorkingSet snapshot to the REPL
430
+ // session integration (R5+R6 here only covers the engine-side
431
+ // builder). When the REPL passes its working set down, the
432
+ // engine surface fills in. For now the prefix carries cwd +
433
+ // per-dir conventions + intent which are the two biggest
434
+ // win-rate moves per the .X eval.
435
+ traversedMarkdown: traverseResult.loaded,
436
+ intentHint,
437
+ });
438
+ if (prefix.bytes > 0 || intentClassification.intent === 'definitional') {
439
+ emitStream({
163
440
  type: 'status',
164
- message: `tool_call: ${call.name}(${call.arguments.slice(0, 80)}${call.arguments.length > 80 ? '...' : ''})`,
165
- });
166
- appendSessionMirror(sessionEventsPath, {
167
- type: 'tool_call',
168
- tool: call.name,
169
- callId: call.id,
170
- argsPreview: call.arguments.slice(0, 200),
441
+ message: `context: cwd=${cwdRelative} per-dir-md=${prefix.counts.markdownIncluded}/${prefix.counts.markdownTotal} intent=${intentClassification.intent}`,
171
442
  });
172
- },
173
- onToolResult: (call, result) => {
174
- const auditId = this.engineToolCallIds.get(call.id);
175
- if (auditId) {
176
- if (result.ok) {
177
- recordToolResult(session, auditId, 'success', result.content.slice(0, 200));
443
+ }
444
+ const decoratedPrompt = applyIntentMarker(task.prompt, intentClassification.intent);
445
+ const finalUserPrompt = spliceContextPrefix(prefix.block, decoratedPrompt);
446
+ // Track files mutated by the loop. We extract the path from the JSON
447
+ // arguments of every successful write/edit tool call; `bash` is left
448
+ // out because its filesystem footprint is opaque (a single command
449
+ // can touch dozens of paths via `make`, `pnpm build`, etc). The
450
+ // per-session events.jsonl already carries every file_mutation event
451
+ // for replay; this set is only the headline summary the CLI prints.
452
+ const filesChanged = new Set();
453
+ // Pending lookup: call.id → path extracted from arguments. We only
454
+ // commit to `filesChanged` when the corresponding onToolResult fires
455
+ // with `ok: true`, so a refused or failed edit does not surface as
456
+ // a phantom change in the operator summary.
457
+ const pendingMutations = new Map();
458
+ // Per-session events mirror — `.pugi/sessions/<id>/events.jsonl`.
459
+ // The existing global log at `.pugi/events.jsonl` is preserved as
460
+ // the audit-replay source of truth; this mirror is the easy-to-find
461
+ // per-run log for operators and the cabinet UI (Sprint 2B).
462
+ const sessionEventsPath = openSessionMirror(root, session.id);
463
+ const hooks = {
464
+ // CEO P1 #14 (auto-compact): single operator-visible
465
+ // line on stderr — keep parity with the upstream tool's
466
+ // `Compacted N turns into Y tokens; continuing.` message. We mirror
467
+ // the event into the session log + stream emitter as a `status`
468
+ // frame так that admin-api SSE consumers + the cabinet UI render
469
+ // it without a schema change.
470
+ onAutoCompact: (event) => {
471
+ const pct = Math.round((event.preUsedTokens / Math.max(1, event.maxTokens)) * 100);
472
+ const line = `engine: auto-compacted ${event.droppedCount} turns at ${event.preUsedTokens}/${event.maxTokens} (${pct}%)`;
473
+ // Single-line stderr write — operator-visible per spec.
474
+ process.stderr.write(`${line}\n`);
475
+ emitStream({ type: 'status', message: line });
476
+ appendSessionMirror(sessionEventsPath, {
477
+ type: 'auto_compact',
478
+ droppedCount: event.droppedCount,
479
+ preUsedTokens: event.preUsedTokens,
480
+ postUsedTokens: event.postUsedTokens,
481
+ maxTokens: event.maxTokens,
482
+ gist: event.gist,
483
+ });
484
+ // #21: tenant-wide audit trail mirror.
485
+ writeAuditEvent({
486
+ event: 'auto_compact',
487
+ sessionId: session.id,
488
+ workspaceRoot: root,
489
+ data: {
490
+ droppedCount: event.droppedCount,
491
+ preUsedTokens: event.preUsedTokens,
492
+ postUsedTokens: event.postUsedTokens,
493
+ maxTokens: event.maxTokens,
494
+ },
495
+ });
496
+ },
497
+ onTurnStart: (turnIndex, messageCount) => {
498
+ const msg = `turn ${turnIndex + 1}: requesting model (transcript=${messageCount} messages)`;
499
+ emitStream({ type: 'status', message: msg });
500
+ appendSessionMirror(sessionEventsPath, { type: 'turn_start', turn: turnIndex + 1, transcript: messageCount });
501
+ },
502
+ onTurnComplete: (turnIndex, response) => {
503
+ if (response.stop === 'tool_use') {
504
+ const calls = response.assistantMessage.toolCalls ?? [];
505
+ emitStream({
506
+ type: 'status',
507
+ message: `turn ${turnIndex + 1}: model requested ${calls.length} tool call(s)`,
508
+ });
509
+ appendSessionMirror(sessionEventsPath, {
510
+ type: 'turn_complete',
511
+ turn: turnIndex + 1,
512
+ stop: 'tool_use',
513
+ toolCalls: calls.length,
514
+ tokensUsed: response.tokensUsed,
515
+ });
516
+ }
517
+ else if (response.stop === 'text') {
518
+ emitStream({
519
+ type: 'status',
520
+ message: `turn ${turnIndex + 1}: model returned final text (${response.content.length} chars)`,
521
+ });
522
+ appendSessionMirror(sessionEventsPath, {
523
+ type: 'turn_complete',
524
+ turn: turnIndex + 1,
525
+ stop: 'text',
526
+ contentLength: response.content.length,
527
+ tokensUsed: response.tokensUsed,
528
+ });
529
+ // β3 E4 thinking-block surface: only Claude / Gemini families
530
+ // advertise structured thinking today. The model resolver may
531
+ // return a slug we don't recognise; in that case we skip the
532
+ // split silently. When we DO recognise it, every `<thinking>`
533
+ // / `<thought>` block becomes a separate `thinking.start`/
534
+ // `thinking.delta`/`thinking.end` triplet so the TUI can
535
+ // render one collapsed pane row per block. The visible text
536
+ // (post-strip) flows to the regular `text.delta` channel so
537
+ // the conversation pane never shows raw <thinking> markup.
538
+ if (supportsThinking && response.content.length > 0) {
539
+ const split = splitThinkingBlocks(response.content);
540
+ for (const block of split.thinkingBlocks) {
541
+ const blockId = `think-${randomUUID().slice(0, 8)}`;
542
+ emitStream({ type: 'thinking.start', blockId });
543
+ emitStream({ type: 'thinking.delta', blockId, chunk: block });
544
+ emitStream({ type: 'thinking.end', blockId });
545
+ }
546
+ if (split.visibleText.length > 0) {
547
+ emitStream({ type: 'text.delta', chunk: split.visibleText });
548
+ }
549
+ }
550
+ else if (response.content.length > 0) {
551
+ emitStream({ type: 'text.delta', chunk: response.content });
552
+ }
553
+ }
554
+ },
555
+ onToolCall: (call) => {
556
+ // Record under an `engine_tool` prefix so the audit log can
557
+ // distinguish loop-driven calls from direct CLI tool calls.
558
+ const id = recordToolCall(session, `engine:${call.name}`, call.arguments.slice(0, 200));
559
+ // Stash the audit id on the call for `onToolResult` to close.
560
+ this.engineToolCallIds.set(call.id, id);
561
+ // Extract a candidate path for write/edit so we can build the
562
+ // filesChanged summary if (and only if) the call succeeds. Bad
563
+ // JSON is harmless here — we ignore it and the executor surfaces
564
+ // the actual parse error to the model.
565
+ if (call.name === 'write' || call.name === 'edit') {
566
+ const path = extractPathArg(call.arguments);
567
+ if (path)
568
+ pendingMutations.set(call.id, path);
569
+ }
570
+ emitStream({
571
+ type: 'tool.start',
572
+ callId: call.id,
573
+ name: call.name,
574
+ arguments: call.arguments,
575
+ });
576
+ emitStream({
577
+ type: 'status',
578
+ message: `tool_call: ${call.name}(${call.arguments.slice(0, 80)}${call.arguments.length > 80 ? '...' : ''})`,
579
+ });
580
+ appendSessionMirror(sessionEventsPath, {
581
+ type: 'tool_call',
582
+ tool: call.name,
583
+ callId: call.id,
584
+ argsPreview: call.arguments.slice(0, 200),
585
+ });
586
+ // #21: tenant-wide audit trail mirror. Same payload
587
+ // shape as the session mirror but flattened so a `jq` query
588
+ // across all sessions for one (tenant, workspace) reads
589
+ // cleanly.
590
+ writeAuditEvent({
591
+ event: 'tool_call',
592
+ sessionId: session.id,
593
+ workspaceRoot: root,
594
+ data: {
595
+ tool: call.name,
596
+ callId: call.id,
597
+ argsPreview: call.arguments.slice(0, 200),
598
+ },
599
+ });
600
+ },
601
+ onToolResult: (call, result) => {
602
+ const auditId = this.engineToolCallIds.get(call.id);
603
+ if (auditId) {
604
+ if (result.ok) {
605
+ recordToolResult(session, auditId, 'success', result.content.slice(0, 200));
606
+ }
607
+ else {
608
+ recordToolResult(session, auditId, 'error', result.error.slice(0, 200));
609
+ }
610
+ this.engineToolCallIds.delete(call.id);
178
611
  }
179
- else {
180
- recordToolResult(session, auditId, 'error', result.error.slice(0, 200));
612
+ const pendingPath = pendingMutations.get(call.id);
613
+ if (pendingPath) {
614
+ if (result.ok)
615
+ filesChanged.add(pendingPath);
616
+ pendingMutations.delete(call.id);
181
617
  }
182
- this.engineToolCallIds.delete(call.id);
618
+ emitStream({
619
+ type: 'tool.end',
620
+ callId: call.id,
621
+ ok: result.ok,
622
+ summary: result.ok
623
+ ? result.content.slice(0, 200)
624
+ : result.error.slice(0, 200),
625
+ });
626
+ emitStream({
627
+ type: 'status',
628
+ message: result.ok
629
+ ? `tool_result: ${call.name} ok`
630
+ : `tool_result: ${call.name} error: ${result.error.slice(0, 120)}`,
631
+ });
632
+ appendSessionMirror(sessionEventsPath, {
633
+ type: 'tool_result',
634
+ tool: call.name,
635
+ callId: call.id,
636
+ ok: result.ok,
637
+ summary: result.ok ? result.content.slice(0, 200) : result.error.slice(0, 200),
638
+ });
639
+ // #21: tenant-wide audit trail mirror.
640
+ writeAuditEvent({
641
+ event: 'tool_result',
642
+ sessionId: session.id,
643
+ workspaceRoot: root,
644
+ data: {
645
+ tool: call.name,
646
+ callId: call.id,
647
+ ok: result.ok,
648
+ summary: result.ok ? result.content.slice(0, 200) : result.error.slice(0, 200),
649
+ },
650
+ });
651
+ },
652
+ };
653
+ // β1b r1 (--allow-fetch / --allow-search wiring):
654
+ // compute the effective gate as OR of (a) the persisted
655
+ // settings.json opt-in and (b) the runtime CLI flag passed via
656
+ // the constructor. Before this fix the adapter only honored (a),
657
+ // so `pugi code --allow-fetch` against a default-privacy workspace
658
+ // silently fell back to "tool not advertised" even though the
659
+ // operator opted in for one invocation. The CLI flag was wired
660
+ // through to the legacy `pugi web` sub-command but not to the
661
+ // engine adapter — Backend Architect review (PR r1) caught
662
+ // the gap.
663
+ const allowFetchEffective = this.options.allowFetch === true || settings.web?.fetch?.enabled === true;
664
+ const allowSearchEffective = this.options.allowSearch === true || settings.web?.search?.enabled === true;
665
+ // β2 S3 → β2a r1 (Backend Architect P1):
666
+ // expose the `agent` tool to the parent loop ONLY for non-plan
667
+ // commands. `buildToolsSchema` also strips the agent tool from
668
+ // plan-mode schemas, but a model that fabricates an `agent` call
669
+ // would still hit the executor with `agentDispatch` wired and
670
+ // could spawn a coder that mutates the workspace — breaking the
671
+ // plan-mode read-only contract. Hard-gate `allowAgent` on the
672
+ // command kind so plan mode never wires the dispatch block in
673
+ // the first place; tool-bridge.ts also throws ToolRefused on a
674
+ // fabricated `agent` call in plan mode as defense in depth.
675
+ //
676
+ // Why only the top-level parent and not children: the dispatcher-
677
+ // real.ts module builds the CHILD's executor without an
678
+ // `agentDispatch` block so children cannot recursively spawn
679
+ // grandchildren. The isolation-matrix capability set then refuses
680
+ // the `agent` tool for every non-orchestrator role anyway, but
681
+ // the executor-level gate is the load-bearing chokepoint.
682
+ // Pugi backlog — intensity dial gates the `agent` tool surface.
683
+ // Plan-mode hard gate keeps its precedence (read-only contract);
684
+ // the intensity layer OR-s on top so `--intensity quick|standard`
685
+ // suppresses the dispatch block even on non-plan kinds.
686
+ const intensityAllowsAgent = this.options.intensityProfile?.allowParallelAgents ?? true;
687
+ const allowAgent = kind !== 'plan' && intensityAllowsAgent;
688
+ // Pugi backlog — resolve the effective model hint. Operator-
689
+ // pinned `model` option wins outright. Otherwise the intensity
690
+ // profile's `modelTag` resolves to a concrete slug via the
691
+ // `PUGI_INTENSITY_MODEL_<TAG>` env (LIGHT / STANDARD / HEAVY) so
692
+ // ops can pin "what does 'standard' mean on this machine" without
693
+ // a code change. Absent profile + absent env => undefined (legacy
694
+ // per-persona resolution path).
695
+ const effectiveModel = resolveIntensityModel(this.options.model, this.options.intensityProfile);
696
+ // β3 streaming: kick off `runEngineLoop` IN PARALLEL with the queue
697
+ // drain. The loop's hook callbacks push events onto `streamQueue`
698
+ // synchronously; this generator yields them live by awaiting the
699
+ // queue's iterator. When the loop settles (success or crash) we
700
+ // close the queue, which lets the iterator return cleanly and the
701
+ // generator falls through to the terminal `result` frame.
702
+ //
703
+ // Why concurrent instead of serial:
704
+ //
705
+ // The β1 adapter awaited `runEngineLoop` to completion, then
706
+ // drained an in-memory `EngineEvent[]` buffer. Operator saw
707
+ // nothing for 30+ seconds (the full LLM round-trip + tool exec
708
+ // wall time), then the entire log dumped at once. The TUI tool-
709
+ // stream pane was a no-op because no event ever reached it
710
+ // before the loop completed.
711
+ //
712
+ // `Promise.race`-based interleaving lets us yield the next queue
713
+ // event OR detect loop settlement on each tick. The settlement
714
+ // flag (`loopSettled`) gates the final drain so we never miss
715
+ // tail events that the hooks pushed in the same microtask as
716
+ // the loop's terminal `return`.
717
+ // Boxed via single-element tuple so TypeScript does not narrow the
718
+ // outer `outcome` binding to `null` after the closure mutation.
719
+ // Async-closure mutations are invisible to TS control-flow analysis;
720
+ // wrapping in a tuple defeats the narrowing without an unsafe cast.
721
+ const outcomeBox = [null];
722
+ let loopError = null;
723
+ const loopPromise = (async () => {
724
+ try {
725
+ outcomeBox[0] = await runEngineLoop({
726
+ client: this.options.client,
727
+ executor: buildExecutor({
728
+ kind,
729
+ ctx: toolCtx,
730
+ sessionId: session.id,
731
+ workspaceRoot: root,
732
+ // P1 fix (deep audit): forward optional REPL
733
+ // ask-modal bridge. Default `interactive: false` preserves
734
+ // backward compat — non-TTY callers (CI, pipes, scripted
735
+ // CLI runs) keep the `[user_input_required]` envelope path.
736
+ // The REPL layer passes `interactive: true` + a real
737
+ // `askUserBridge` so model-initiated `ask_user_question`
738
+ // calls round-trip to the ink modal and return the
739
+ // operator's choice as a tool result.
740
+ interactive: this.options.interactive === true,
741
+ ...(this.options.askUserBridge
742
+ ? { askUserBridge: this.options.askUserBridge }
743
+ : {}),
744
+ // P1 fix (deep audit): forward the workspace
745
+ // HookRegistry so `.pugi/hooks/` lifecycle hooks fire for
746
+ // model-initiated tool calls. SECURITY: a `PreToolUse
747
+ // onFailure: 'block'` hook that refuses bash containing
748
+ // `rm` now applies to model dispatch — before this fix
749
+ // such a hook only applied to direct CLI tool calls.
750
+ ...(this.options.hooks ? { hooks: this.options.hooks } : {}),
751
+ // β1a r1 (web_fetch gating) + β1b r1 (--allow-fetch wiring):
752
+ // executor allowFetch matches the schema-advertise gate so a
753
+ // settings.json opt-in OR a --allow-fetch flag enables the
754
+ // call. Without this the model would not even see the
755
+ // `web_fetch` tool. `allowSearch` covers the new T4
756
+ // `web_search` tool with the same OR semantics.
757
+ allowFetch: allowFetchEffective,
758
+ allowSearch: allowSearchEffective,
759
+ // β2 S3 → β2a r1 : parent-level agentDispatch
760
+ // wiring. When the model emits a `tool_call: agent(role,
761
+ // brief)`, the executor forwards it to dispatcher-real.ts
762
+ // which spawns a child engine loop against the same Anvil
763
+ // client. Gated by `allowAgent` so plan mode does not even
764
+ // wire the dispatch block — defense in depth on top of the
765
+ // schema-filter and the tool-bridge plan-mode refusal.
766
+ ...(allowAgent
767
+ ? {
768
+ agentDispatch: {
769
+ parentSession: session,
770
+ engineClient: this.options.client,
771
+ },
772
+ }
773
+ : {}),
774
+ // β4 M1/M3/M5: pass the loaded MCP registry through so the
775
+ // executor can route `mcp__server__tool` calls + run the
776
+ // first-call permission prompt before dispatching upstream.
777
+ ...(this.options.mcpRegistry ? { mcpRegistry: this.options.mcpRegistry } : {}),
778
+ ...(this.options.mcpPrompt ? { mcpPrompt: this.options.mcpPrompt } : {}),
779
+ // L11 : per-`run()` denial tracker. Every
780
+ // refusal sentinel (PLAN_MODE_REFUSED, HOOK_BLOCKED,
781
+ // OPERATOR_ABORTED, STALE_READ, unknown-tool, plan-mode
782
+ // agent) is fingerprinted by (toolName, sha256(canonical
783
+ // args)) so the model's next-turn reminder surfaces the
784
+ // pattern instead of re-issuing the same refused call.
785
+ denialTracking,
786
+ }),
787
+ // ambient `PUGI.md` hierarchy block
788
+ // prepended once at session boot. When the walk found
789
+ // nothing OR bare mode is on, `ambientContextBlock === ''`
790
+ // and the system prompt is unchanged — no leading blank
791
+ // line, no empty wrapper tag.
792
+ //
793
+ // task #19 : static / dynamic
794
+ // split via `__PUGI_DYNAMIC_BOUNDARY__` sentinel. The persona
795
+ // prompt (`systemPromptFor(kind)`) is byte-stable across
796
+ // sessions of the same command kind — it goes BEFORE the
797
+ // boundary so Anvil's prefix cache hits on the common
798
+ // prefix. Per-workspace blocks (PUGI.md hierarchy, repo
799
+ // map) live AFTER the boundary because they change with
800
+ // the user's checkout state.
801
+ //
802
+ // ORDERING CHANGE — pre-#19 the model saw
803
+ // ambient → repoMap → persona
804
+ // post-#19 the model sees
805
+ // persona → ambient → repoMap
806
+ // This is INTENTIONAL — the cache prefix MUST be byte-stable
807
+ // and the persona is the only byte-stable block. Operators
808
+ // who relied on ambient guidance "fronting" the persona prompt
809
+ // () should now place that guidance inside
810
+ // the persona via `systemPromptFor(kind)` instead of PUGI.md.
811
+ // The empirical impact on model behaviour is bounded: persona
812
+ // prompts are tight directives; ambient PUGI.md is operator
813
+ // context. Either order is interpretable; the cache hit
814
+ // outweighs the front-loading.
815
+ systemPrompt: composeSystemPromptWithBoundary([systemPromptFor(kind)], [ambientContextBlock, repoMapBlock]),
816
+ // β5a R5+R6+P1: per-turn `<context>` prefix + intent marker
817
+ // applied above. Falls back to verbatim `task.prompt` when
818
+ // both the prefix block is empty AND the intent classifier
819
+ // returned ambiguous (the splice + apply functions handle
820
+ // that case as identity).
821
+ userPrompt: finalUserPrompt,
822
+ // β1a r1 (web_fetch gating) + β1b r1 (--allow-fetch wiring):
823
+ // pass the OR of `.pugi/settings.json::web.fetch.enabled` and
824
+ // the runtime `--allow-fetch` flag. When neither is true the
825
+ // `web_fetch` tool is not advertised to the model at all.
826
+ // `allowSearch` does the same for the new `web_search` tool.
827
+ // β2 S3: allowAgent surfaces the `agent` tool in the schema
828
+ // so the model sees it as a valid tool call option; the
829
+ // capability-matrix layer (S4) still gates which roles can
830
+ // actually USE it. Plan mode strips it via β2a r1 gate.
831
+ tools: buildToolsSchema(kind, {
832
+ allowFetch: allowFetchEffective,
833
+ allowSearch: allowSearchEffective,
834
+ allowAgent,
835
+ // β4 M1/M3: same registry the executor saw. Schema +
836
+ // dispatcher must agree on which MCP names are advertised
837
+ // and which are dispatchable; passing identical references
838
+ // makes that invariant impossible to break.
839
+ ...(this.options.mcpRegistry ? { mcpRegistry: this.options.mcpRegistry } : {}),
840
+ }),
841
+ budget,
842
+ personaSlug: personaSlugFor(kind),
843
+ hooks,
844
+ temperature: this.options.temperature ?? 0.2,
845
+ signal: ctx.signal,
846
+ // β1 (audit E2): forward CLI sub-command + routing tag +
847
+ // operator-pinned model so the runtime controller's DTO sees
848
+ // all three. `tag` derives 1:1 from `command` for now
849
+ // (`code → code`, `build → build_task`, etc.); future routing
850
+ // changes flip the mapping table without touching the call
851
+ // site. `model` is left undefined here — operator-pinned model
852
+ // pinning ships in β6 with persona routing.
853
+ command: kind,
854
+ tag: dispatchTagFor(kind),
855
+ model: effectiveModel,
856
+ // Task — 1M context tier opt-in. Forwarded к the SDK
857
+ // driver which threads it through every `client.send` call to
858
+ // the runtime gate. `undefined` (the default) preserves
859
+ // legacy routing.
860
+ contextTier: this.options.contextTier,
861
+ // CEO P1 #14 (auto-compact): pluggable compactor
862
+ // hook. The SDK driver invokes this pre-`client.send` on every
863
+ // turn. `maybeCompactAsync` returns `null` below the 75%
864
+ // threshold или when the transcript is too short to drop
865
+ // history — the loop continues unchanged on the cold path.
866
+ // When it returns a result, the driver swaps the transcript +
867
+ // fires the `onAutoCompact` hook above which emits the stderr
868
+ // line.
869
+ //
870
+ // PR E (2026-06-05): the async variant dispatches between
871
+ // the legacy stats sentinel and an LLM-produced narrative.
872
+ // The SDK driver awaits the Promise — see engine-loop.ts.
873
+ autoCompact: ({ transcript, maxTokens }) => maybeCompactAsync(transcript, maxTokens, autoCompactConfig, summaryLlmCall, { signal: ctx.signal }),
874
+ });
183
875
  }
184
- const pendingPath = pendingMutations.get(call.id);
185
- if (pendingPath) {
186
- if (result.ok)
187
- filesChanged.add(pendingPath);
188
- pendingMutations.delete(call.id);
876
+ catch (err) {
877
+ loopError = err;
189
878
  }
190
- buffer.push({
191
- type: 'status',
192
- message: result.ok
193
- ? `tool_result: ${call.name} ok`
194
- : `tool_result: ${call.name} error: ${result.error.slice(0, 120)}`,
879
+ finally {
880
+ // Close the queue so the iterator below returns `done: true`.
881
+ // Any tail events the hooks pushed in the same microtask still
882
+ // drain because `AsyncEventQueue.close()` only resolves
883
+ // PENDING awaiters buffered items stay readable.
884
+ streamQueue.close();
885
+ }
886
+ })();
887
+ // Drain the queue live. Each iteration yields one EngineEvent the
888
+ // moment its hook fired. Operator sees `tool.start` within tens of
889
+ // ms of the model emitting it.
890
+ for await (const event of streamQueue) {
891
+ yield streamEventToEngineEvent(event);
892
+ }
893
+ // Loop has settled (queue closed). Surface its outcome — either an
894
+ // unhandled crash from the (rare) executor exception path or the
895
+ // structured EngineLoopOutcome.
896
+ await loopPromise;
897
+ if (loopError !== null) {
898
+ const message = loopError instanceof Error ? loopError.message : String(loopError);
899
+ // #21: surface the crash to the audit trail before
900
+ // returning. Mirrors the `failed` arm of the structured path
901
+ // below so a SOC pipeline sees one `dispatch_end` per dispatch
902
+ // regardless of which code path produced it.
903
+ writeAuditEvent({
904
+ event: 'dispatch_end',
905
+ sessionId: session.id,
906
+ workspaceRoot: root,
907
+ data: {
908
+ status: 'crashed',
909
+ error: message,
910
+ },
195
911
  });
196
- appendSessionMirror(sessionEventsPath, {
197
- type: 'tool_result',
198
- tool: call.name,
199
- callId: call.id,
200
- ok: result.ok,
201
- summary: result.ok ? result.content.slice(0, 200) : result.error.slice(0, 200),
912
+ // #24 (CEO P1): TaskCompleted chain fires
913
+ // even on engine-loop crash so an operator hook can surface the
914
+ // failure to Slack / a dashboard. Best-effort — chain crashes
915
+ // never propagate.
916
+ await fireTaskCompletedOnce(1, 0, []);
917
+ yield {
918
+ type: 'result',
919
+ result: {
920
+ status: 'failed',
921
+ summary: `engine loop crashed: ${message}`,
922
+ filesChanged: [],
923
+ patchRefs: [],
924
+ testsRun: [],
925
+ risks: [`unhandled error in engine adapter: ${message}`],
926
+ eventRefs: [],
927
+ },
928
+ };
929
+ return;
930
+ }
931
+ const finalOutcome = outcomeBox[0];
932
+ if (finalOutcome === null) {
933
+ // Defensive — should never hit. `runEngineLoop` always either
934
+ // resolves with an outcome or throws (and we catch that above).
935
+ writeAuditEvent({
936
+ event: 'dispatch_end',
937
+ sessionId: session.id,
938
+ workspaceRoot: root,
939
+ data: { status: 'no_outcome' },
202
940
  });
203
- },
204
- };
205
- let outcome;
206
- try {
207
- outcome = await runEngineLoop({
208
- client: this.options.client,
209
- executor: buildExecutor({ kind, ctx: toolCtx }),
210
- systemPrompt: systemPromptFor(kind),
211
- userPrompt: task.prompt,
212
- tools: buildToolsSchema(kind),
213
- budget,
214
- personaSlug: personaSlugFor(kind),
215
- hooks,
216
- temperature: this.options.temperature ?? 0.2,
217
- signal: ctx.signal,
941
+ // #24: fire TaskCompleted chain on the defensive path too.
942
+ await fireTaskCompletedOnce(1, 0, []);
943
+ yield {
944
+ type: 'result',
945
+ result: {
946
+ status: 'failed',
947
+ summary: 'engine loop returned no outcome',
948
+ filesChanged: [],
949
+ patchRefs: [],
950
+ testsRun: [],
951
+ risks: ['runEngineLoop resolved without an outcome value'],
952
+ eventRefs: [],
953
+ },
954
+ };
955
+ return;
956
+ }
957
+ // Translate the loop outcome into an EngineResult.
958
+ // `aborted` maps to `blocked` because the operator chose the
959
+ // outcome, same shape as budget_exhausted / tool_refused.
960
+ //
961
+ // PUGI-VERIFY-GATE: the verification gate runs AFTER this
962
+ // base mapping. When the agent ran verification commands and
963
+ // any exited non-zero, the loop's `completed` collapses to
964
+ // `failed` (the agent's claim of "done" is unverified). When
965
+ // the loop `completed` but no verification command ever ran,
966
+ // we surface `needs_verification` (CLI exit 2) so the operator
967
+ // sees the missing signal instead of false confidence. The
968
+ // gate is non-negotiable per the contract: `done` is reserved
969
+ // for `verified: true` outcomes.
970
+ const baseStatus = finalOutcome.status === 'completed'
971
+ ? 'done'
972
+ : finalOutcome.status === 'failed'
973
+ ? 'failed'
974
+ : 'blocked';
975
+ const filesChangedList = Array.from(filesChanged).sort();
976
+ const verification = computeVerificationOutcome({
977
+ ledger: session.verificationLedger,
978
+ baseStatus,
979
+ finalText: finalOutcome.finalText,
980
+ filesChanged: filesChangedList,
218
981
  });
219
- }
220
- catch (error) {
221
- // Defensive — runEngineLoop wraps errors into status: failed, so
222
- // this branch is only hit if the executor or hooks themselves
223
- // throw uncaught. Surface as a failed result so the CLI exits
224
- // non-zero rather than hanging.
225
- const message = error instanceof Error ? error.message : String(error);
982
+ const status = verification.status;
983
+ const summaryPrefix = status === 'done'
984
+ ? ''
985
+ : finalOutcome.status === 'budget_exhausted'
986
+ ? '[budget_exhausted] '
987
+ : finalOutcome.status === 'tool_refused'
988
+ ? '[plan_mode_refused] '
989
+ : finalOutcome.status === 'aborted'
990
+ ? '[operator_aborted] '
991
+ : status === 'needs_verification'
992
+ ? '[needs_verification] '
993
+ : verification.unverifiedReason === 'verification_command_failed'
994
+ ? '[verification_failed] '
995
+ : '[failed] ';
996
+ appendSessionMirror(sessionEventsPath, {
997
+ type: 'outcome',
998
+ status: finalOutcome.status,
999
+ toolCallCount: finalOutcome.toolCallCount,
1000
+ turnsUsed: finalOutcome.turnsUsed,
1001
+ tokensUsed: finalOutcome.tokensUsed,
1002
+ filesChanged: filesChangedList,
1003
+ reason: finalOutcome.reason,
1004
+ });
1005
+ // #21: emit `dispatch_end` to the tenant-wide audit trail.
1006
+ // When the loop tripped the per-command budget we ALSO emit a
1007
+ // dedicated `budget_exhausted` row so a SOC query can filter on
1008
+ // event type alone without parsing the `data.status` payload.
1009
+ if (finalOutcome.status === 'budget_exhausted') {
1010
+ writeAuditEvent({
1011
+ event: 'budget_exhausted',
1012
+ sessionId: session.id,
1013
+ workspaceRoot: root,
1014
+ data: {
1015
+ toolCallCount: finalOutcome.toolCallCount,
1016
+ turnsUsed: finalOutcome.turnsUsed,
1017
+ tokensUsed: finalOutcome.tokensUsed,
1018
+ reason: finalOutcome.reason ?? null,
1019
+ },
1020
+ });
1021
+ }
1022
+ writeAuditEvent({
1023
+ event: 'dispatch_end',
1024
+ sessionId: session.id,
1025
+ workspaceRoot: root,
1026
+ data: {
1027
+ status: finalOutcome.status,
1028
+ toolCallCount: finalOutcome.toolCallCount,
1029
+ turnsUsed: finalOutcome.turnsUsed,
1030
+ tokensUsed: finalOutcome.tokensUsed,
1031
+ filesChangedCount: filesChangedList.length,
1032
+ reason: finalOutcome.reason ?? null,
1033
+ },
1034
+ });
1035
+ // PR F (2026-06-05): Obsidian-style notes recorder. Writes three
1036
+ // markdown surfaces (session journal, file concept notes, daily
1037
+ // journal). Wrapped in its own try/catch internally — never
1038
+ // throws, never blocks dispatch. Operator opt-out via
1039
+ // `.pugi/settings.json::notes.enabled = false` or
1040
+ // `PUGI_NOTES_DISABLE=1`.
1041
+ recordSessionNotes({
1042
+ workspaceRoot: root,
1043
+ sessionId: session.id,
1044
+ command: kind,
1045
+ persona: personaSlugFor(kind),
1046
+ brief: task.prompt,
1047
+ status: finalOutcome.status,
1048
+ toolCallCount: finalOutcome.toolCallCount,
1049
+ turnsUsed: finalOutcome.turnsUsed,
1050
+ tokensUsed: finalOutcome.tokensUsed,
1051
+ filesChanged: filesChangedList,
1052
+ startedAt: dispatchStartedAt,
1053
+ endedAt: new Date().toISOString(),
1054
+ reason: finalOutcome.reason ?? null,
1055
+ config: { enabled: settings?.notes?.enabled !== false },
1056
+ });
1057
+ // #24 (CEO P1): TaskCompleted chain on the
1058
+ // primary success path. `exitCode` maps to 0 for `completed`,
1059
+ // 1 otherwise so chain hooks can branch on success vs blocked /
1060
+ // failed / aborted via a single integer test.
1061
+ await fireTaskCompletedOnce(finalOutcome.status === 'completed' ? 0 : 1, finalOutcome.toolCallCount, filesChangedList);
1062
+ // PUGI-467: when the model finishes с tool_use-only turns (common
1063
+ // на OSS coder models that emit no final assistant text after the
1064
+ // last edit), `finalText` is empty even though work landed. Fall
1065
+ // back к a synthesised summary derived from `filesChangedList` so
1066
+ // the CLI never reports "no answer returned" when files were
1067
+ // demonstrably modified.
1068
+ //
1069
+ // Order: finalText → reason → file-list synthesis → literal placeholder.
1070
+ // Reason precedes synthesis so failure modes (budget_exhausted,
1071
+ // tool_refused, aborted) preserve their explanation when files were
1072
+ // also touched — operator must see WHY the loop terminated before
1073
+ // the "what landed" hint. Synthesis only kicks in when there is no
1074
+ // reason at all (pure tool_use-only completed turn).
1075
+ const synthesisedFromFiles = finalOutcome.finalText.trim() === '' && filesChangedList.length > 0
1076
+ ? `Updated ${filesChangedList.length} file(s): ${filesChangedList.slice(0, 5).join(', ')}${filesChangedList.length > 5 ? ` (+${filesChangedList.length - 5} more)` : ''}`
1077
+ : '';
1078
+ // PUGI-VERIFY-GATE: thread verification state into the risks
1079
+ // array so a consumer reading only the legacy fields still
1080
+ // gets a human-readable summary of what was not verified.
1081
+ const baseRisks = finalOutcome.status === 'completed' && status === 'done'
1082
+ ? []
1083
+ : [finalOutcome.reason ?? `outcome=${finalOutcome.status}`];
1084
+ if (verification.unverifiedReason && status !== 'done') {
1085
+ baseRisks.push(`unverified: ${verification.unverifiedReason}`);
1086
+ }
1087
+ if (verification.regressionOwnershipDispute) {
1088
+ baseRisks.push('regression_ownership_dispute: agent disclaimed ownership of failing verification');
1089
+ }
226
1090
  yield {
227
1091
  type: 'result',
228
1092
  result: {
229
- status: 'failed',
230
- summary: `engine loop crashed: ${message}`,
231
- filesChanged: [],
1093
+ status,
1094
+ summary: `${summaryPrefix}${finalOutcome.finalText || finalOutcome.reason || synthesisedFromFiles || 'no answer returned'}`,
1095
+ filesChanged: filesChangedList,
232
1096
  patchRefs: [],
233
1097
  testsRun: [],
234
- risks: [`unhandled error in engine adapter: ${message}`],
235
- eventRefs: [],
1098
+ risks: baseRisks,
1099
+ eventRefs: [
1100
+ `tool_calls=${finalOutcome.toolCallCount}`,
1101
+ `turns=${finalOutcome.turnsUsed}`,
1102
+ `tokens=${finalOutcome.tokensUsed}`,
1103
+ // `outcome=<status>` is a machine-readable echo so callers
1104
+ // (cli.ts plan exit code, cabinet UI) can distinguish
1105
+ // `budget_exhausted` from `tool_refused` without parsing
1106
+ // the human-readable summary prefix. Code Reviewer P2
1107
+ // retro: plan exit code previously collapsed
1108
+ // both blocked reasons into 0, which masked budget hits.
1109
+ `outcome=${finalOutcome.status}`,
1110
+ `session=${session.id}`,
1111
+ `ctx=${ctx.sessionId}`,
1112
+ `mirror=${sessionEventsPath}`,
1113
+ // PUGI-VERIFY-GATE: machine-readable verification echo so
1114
+ // downstream consumers (MCP wrapper, cabinet UI, audit
1115
+ // pipeline) can branch on the gate state without parsing
1116
+ // the new structured fields.
1117
+ `verified=${verification.verified}`,
1118
+ `verification_count=${verification.verificationCommands.length}`,
1119
+ ],
1120
+ verified: verification.verified,
1121
+ verificationCommands: verification.verificationCommands,
1122
+ verificationFailures: verification.verificationFailures,
1123
+ ...(verification.unverifiedReason !== undefined
1124
+ ? { unverifiedReason: verification.unverifiedReason }
1125
+ : {}),
1126
+ ...(verification.regressionOwnershipDispute
1127
+ ? { regressionOwnershipDispute: true }
1128
+ : {}),
236
1129
  },
237
1130
  };
238
- return;
239
1131
  }
240
- // Drain status buffer first so consumers see the chronological order.
241
- for (const event of buffer)
242
- yield event;
243
- // Translate the loop outcome into an EngineResult.
244
- // `aborted` (α6.9: operator cancelled mid-tool) maps to `blocked`
245
- // because the operator chose the outcome, same shape as
246
- // budget_exhausted / tool_refused.
247
- const status = outcome.status === 'completed'
248
- ? 'done'
249
- : outcome.status === 'failed'
250
- ? 'failed'
251
- : 'blocked';
252
- const summaryPrefix = outcome.status === 'completed'
253
- ? ''
254
- : outcome.status === 'budget_exhausted'
255
- ? '[budget_exhausted] '
256
- : outcome.status === 'tool_refused'
257
- ? '[plan_mode_refused] '
258
- : outcome.status === 'aborted'
259
- ? '[operator_aborted] '
260
- : '[failed] ';
261
- const filesChangedList = Array.from(filesChanged).sort();
262
- appendSessionMirror(sessionEventsPath, {
263
- type: 'outcome',
264
- status: outcome.status,
265
- toolCallCount: outcome.toolCallCount,
266
- turnsUsed: outcome.turnsUsed,
267
- tokensUsed: outcome.tokensUsed,
268
- filesChanged: filesChangedList,
269
- reason: outcome.reason,
270
- });
271
- yield {
272
- type: 'result',
273
- result: {
274
- status,
275
- summary: `${summaryPrefix}${outcome.finalText || outcome.reason || 'no answer returned'}`,
276
- filesChanged: filesChangedList,
277
- patchRefs: [],
278
- testsRun: [],
279
- risks: outcome.status === 'completed'
280
- ? []
281
- : [outcome.reason ?? `outcome=${outcome.status}`],
282
- eventRefs: [
283
- `tool_calls=${outcome.toolCallCount}`,
284
- `turns=${outcome.turnsUsed}`,
285
- `tokens=${outcome.tokensUsed}`,
286
- // `outcome=<status>` is a machine-readable echo so callers
287
- // (cli.ts plan exit code, cabinet UI) can distinguish
288
- // `budget_exhausted` from `tool_refused` without parsing
289
- // the human-readable summary prefix. Code Reviewer P2
290
- // retro 2026-05-23: plan exit code previously collapsed
291
- // both blocked reasons into 0, which masked budget hits.
292
- `outcome=${outcome.status}`,
293
- `session=${session.id}`,
294
- `ctx=${ctx.sessionId}`,
295
- `mirror=${sessionEventsPath}`,
296
- ],
297
- },
298
- };
1132
+ finally {
1133
+ // r2 (triple-review P1): detach the abort listener so
1134
+ // long REPL sessions sharing one AbortController across many
1135
+ // run() invocations do not accumulate one listener per run on
1136
+ // `ctx.signal`. Called on success, abort, and uncaught throw.
1137
+ detachAbortListener?.();
1138
+ // #24 (CEO P1): safety net — if `run()` threw
1139
+ // BEFORE reaching any yield-result site, the chain still fires.
1140
+ // `fireTaskCompletedOnce` is idempotent so the happy-path fire
1141
+ // above wins. Exit code 1 because the throw path is by
1142
+ // definition non-clean.
1143
+ await fireTaskCompletedOnce(1, 0, []);
1144
+ }
1145
+ }
1146
+ }
1147
+ /**
1148
+ * β3 streaming: translate one typed `EngineStreamEvent` from the
1149
+ * adapter's internal queue into the SDK's lossier `EngineEvent` shape
1150
+ * the public adapter contract exposes. The SDK contract only declares
1151
+ * `status | result` today; richer events (`tool.start`, `thinking.delta`,
1152
+ * etc.) collapse to a structured `status` message until the SDK widens
1153
+ * the discriminated union (β3b — paired with an admin-api SSE schema
1154
+ * bump so the wire format stays stable).
1155
+ *
1156
+ * The full typed payload is still available to richer consumers via
1157
+ * `adapter.streamEmitter.on('event', ...)`. The CLI's TUI tool-stream
1158
+ * pane consumes that emitter directly; this function is the safe
1159
+ * bridge for legacy SDK consumers that only know `EngineEvent`.
1160
+ */
1161
+ function streamEventToEngineEvent(stream) {
1162
+ switch (stream.type) {
1163
+ case 'status':
1164
+ return { type: 'status', message: stream.message };
1165
+ case 'tool.start':
1166
+ return {
1167
+ type: 'status',
1168
+ message: `tool.start ${stream.name} call=${stream.callId} args=${stream.arguments.slice(0, 80)}${stream.arguments.length > 80 ? '...' : ''}`,
1169
+ };
1170
+ case 'tool.delta':
1171
+ return {
1172
+ type: 'status',
1173
+ message: `tool.delta call=${stream.callId} chunk=${stream.chunk.slice(0, 120)}`,
1174
+ };
1175
+ case 'tool.end':
1176
+ return {
1177
+ type: 'status',
1178
+ message: `tool.end call=${stream.callId} ok=${stream.ok} summary=${stream.summary.slice(0, 120)}`,
1179
+ };
1180
+ case 'thinking.start':
1181
+ return { type: 'status', message: `thinking.start block=${stream.blockId}` };
1182
+ case 'thinking.delta':
1183
+ return {
1184
+ type: 'status',
1185
+ message: `thinking.delta block=${stream.blockId} chunk=${stream.chunk.slice(0, 120)}`,
1186
+ };
1187
+ case 'thinking.end':
1188
+ return { type: 'status', message: `thinking.end block=${stream.blockId}` };
1189
+ case 'text.delta':
1190
+ return {
1191
+ type: 'status',
1192
+ message: `text.delta chunk=${stream.chunk.slice(0, 200)}`,
1193
+ };
1194
+ default: {
1195
+ // Exhaustiveness — TS catches a missing variant at compile time.
1196
+ const exhaustive = stream;
1197
+ void exhaustive;
1198
+ return { type: 'status', message: 'unknown stream event' };
1199
+ }
299
1200
  }
300
1201
  }
301
1202
  /**
@@ -311,7 +1212,14 @@ function extractPathArg(raw) {
311
1212
  try {
312
1213
  const parsed = JSON.parse(raw);
313
1214
  if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
314
- const path = parsed.path;
1215
+ const obj = parsed;
1216
+ // Accept canonical `path` OR the peer-CLI-trained `filePath`
1217
+ // alias so the filesChanged summary captures writes regardless of
1218
+ // which key the model emitted. Without the alias the operator
1219
+ // sees "Files modified: none" even when a write actually landed,
1220
+ // because the dispatcher accepted the alias but the tracker did
1221
+ // not (CEO live smoke).
1222
+ const path = obj['path'] ?? obj['filePath'];
315
1223
  if (typeof path === 'string' && path.length > 0)
316
1224
  return path;
317
1225
  }
@@ -367,8 +1275,432 @@ function toCommandKind(kind) {
367
1275
  return 'build';
368
1276
  return kind;
369
1277
  }
1278
+ /**
1279
+ * β1 (audit E2) → β1a r1 (engine tag contract fix): map a
1280
+ * CLI command kind to its dispatch tag.
1281
+ *
1282
+ * The admin-api controller (`pugi-engine.controller.ts`) routes per-tag
1283
+ * to a model/persona pair via
1284
+ * `apps/admin-api/src/pugi/routing/dispatch-tag.ts::DISPATCH_TAGS`. The
1285
+ * closed `EngineChatTag` vocabulary is
1286
+ * `classify | reason | codegen | summarize | vision` — note that
1287
+ * `code`, `fix`, `plan`, `build`, `explain` (CLI command names) are NOT
1288
+ * in this set.
1289
+ *
1290
+ * Before this fix `dispatchTagFor()` returned the CLI command names
1291
+ * as-is and the runtime DTO rejected the payload with HTTP 400
1292
+ * (`tag must be one of: classify, reason, codegen, summarize, vision`)
1293
+ * before ever reaching the routing layer. Every `pugi code/fix/plan/
1294
+ * build/explain` against the live runtime returned `failed: HTTP 400`.
1295
+ *
1296
+ * Mapping rationale (each row keeps the most informative `tag` value
1297
+ * for cost telemetry / model selection):
1298
+ *
1299
+ * - `code`, `fix` → `codegen` (edits / diffs / patches)
1300
+ * - `build_task`/`build` → `codegen` + `budget_hint: 'max'`
1301
+ * (scaffolding hits the 30-call / 80k-token ceiling — give the
1302
+ * router permission to pick the largest model in the tier)
1303
+ * - `plan` → `reason` (no mutations, long-form thought)
1304
+ * - `explain` → `summarize` (read-only walkthrough)
1305
+ *
1306
+ * `priority: 'realtime'` for every command — Pugi is an interactive
1307
+ * CLI; background dispatch is reserved for the cabinet's RAG ingest
1308
+ * cron path. `budget_hint: 'std'` is the default for the cost-balanced
1309
+ * router row; only `build_task` opts up to `'max'`.
1310
+ */
1311
+ export function dispatchTagFor(kind) {
1312
+ switch (kind) {
1313
+ case 'code':
1314
+ case 'fix':
1315
+ return { tag: 'codegen', priority: 'realtime', budget_hint: 'std' };
1316
+ case 'build':
1317
+ // `build_task` on the engine task kind side is the heavy
1318
+ // scaffolding lane — biggest budget envelope, biggest model
1319
+ // permitted via `budget_hint: 'max'`.
1320
+ return { tag: 'codegen', priority: 'realtime', budget_hint: 'max' };
1321
+ case 'plan':
1322
+ return { tag: 'reason', priority: 'realtime', budget_hint: 'std' };
1323
+ case 'explain':
1324
+ return { tag: 'summarize', priority: 'realtime', budget_hint: 'std' };
1325
+ default: {
1326
+ // Exhaustiveness check — `EngineCommandKind` is a closed union,
1327
+ // so the switch above covers every case. If a new command kind
1328
+ // is added the compiler flags this branch and the map must be
1329
+ // extended. Fall back to `reason` as the most conservative
1330
+ // routing choice so a future kind addition cannot accidentally
1331
+ // unlock a write-heavy model lane.
1332
+ const exhaustive = kind;
1333
+ void exhaustive;
1334
+ return { tag: 'reason', priority: 'realtime', budget_hint: 'std' };
1335
+ }
1336
+ }
1337
+ }
1338
+ /**
1339
+ * PR E (2026-06-05): bind the engine client to a `CompactLlmCall`
1340
+ * adapter so the auto-compact summarizer can issue one Anvil request
1341
+ * per fire-threshold crossing. The adapter:
1342
+ *
1343
+ * - packs the summarizer's system + user messages into the
1344
+ * `EngineLoopMessage` shape the SDK driver expects;
1345
+ * - forwards the operator-resolved model slug
1346
+ * (`autoCompact.summaryModel`, default `gemini-2.5-flash`);
1347
+ * - caps tokens at 500 + temperature 0.3 per the summarizer
1348
+ * contract — both passed in via `input` so the adapter stays
1349
+ * side-effect-free;
1350
+ * - swallows `client.send` errors and empty replies as `null` so
1351
+ * the upstream caller falls back to the deterministic stats
1352
+ * sentinel rather than wedging the engine loop on a transient
1353
+ * network blip.
1354
+ *
1355
+ * Abort signals re-throw via `client.send` propagation; the catch
1356
+ * here only swallows non-abort errors so operator cancellation is
1357
+ * still honoured.
1358
+ */
1359
+ function buildSummarizerCall(client, personaSlug) {
1360
+ return async (input) => {
1361
+ const messages = [
1362
+ { role: 'system', content: input.systemPrompt },
1363
+ { role: 'user', content: input.userMessage },
1364
+ ];
1365
+ try {
1366
+ const response = await client.send(messages, [], {
1367
+ personaSlug,
1368
+ model: input.model,
1369
+ maxTokens: input.maxTokens,
1370
+ temperature: input.temperature,
1371
+ ...(input.signal ? { signal: input.signal } : {}),
1372
+ });
1373
+ // Summarizer call passes empty tools, so the model should
1374
+ // always return `stop: 'text'`. Defensive: tool_use / error
1375
+ // frames fall back to stats sentinel via null.
1376
+ const content = response.stop === 'text' ? response.content.trim() : '';
1377
+ return content.length > 0 ? content : null;
1378
+ }
1379
+ catch (err) {
1380
+ if (err instanceof Error &&
1381
+ (err.name === 'AbortError' || err.message.includes('aborted'))) {
1382
+ throw err;
1383
+ }
1384
+ return null;
1385
+ }
1386
+ };
1387
+ }
370
1388
  // The per-adapter `engineToolCallIds` Map lives on the
371
1389
  // `NativePugiEngineAdapter` instance above — Code Reviewer P2 retro
372
- // 2026-05-23 lifted it off the module scope to prevent collisions
1390
+ // lifted it off the module scope to prevent collisions
373
1391
  // under parallel adapter runs (cabinet UI + CLI sharing one process).
1392
+ /**
1393
+ * β5a R5+R6: render a cwd path as either a workspace-root-relative
1394
+ * string (when cwd is inside the workspace) or a `.` token (when cwd
1395
+ * equals workspaceRoot). Falls back to the absolute cwd if it lives
1396
+ * outside the workspace — the traverse loader already refuses to
1397
+ * read off-tree files so the abs path is purely a breadcrumb for
1398
+ * the SSE status line.
1399
+ */
1400
+ function relativeOrAbsolute(workspaceRoot, cwd) {
1401
+ const absRoot = resolve(workspaceRoot);
1402
+ const absCwd = resolve(cwd);
1403
+ if (absCwd === absRoot)
1404
+ return '.';
1405
+ const rel = absCwd.startsWith(absRoot + '/') ? absCwd.slice(absRoot.length + 1) : null;
1406
+ return rel ?? absCwd;
1407
+ }
1408
+ /**
1409
+ * helper — splice multiple ambient blocks onto a persona
1410
+ * system prompt, dropping empty entries cleanly. The join character
1411
+ * is `\n\n` so each block renders as a discrete paragraph the model
1412
+ * can attend к without bleeding into its neighbour.
1413
+ *
1414
+ * Empty blocks return the base prompt unchanged — no leading
1415
+ * separators, no trailing whitespace. Mirrors the original
1416
+ * `ambientContextBlock ? ... : ...` shape so the single-block path
1417
+ * before L28 stays byte-identical (prompt cache friendliness).
1418
+ */
1419
+ export function composeSystemPrompt(blocks) {
1420
+ const nonEmpty = blocks.map((b) => b.trim()).filter((b) => b.length > 0);
1421
+ return nonEmpty.join('\n\n');
1422
+ }
1423
+ /**
1424
+ * task #19 — boundary marker between cache-friendly
1425
+ * static blocks (persona, capability matrix, tool schema) and dynamic
1426
+ * per-session blocks (ambient PUGI.md, repo map, recent turns). The
1427
+ * marker is a literal sentinel string the Anvil prefix-cache layer
1428
+ * can locate to find the split point without parsing prompt semantics.
1429
+ *
1430
+ * Why: Anthropic's prompt cache works by hashing prefix bytes. Static
1431
+ * content placed BEFORE dynamic content guarantees the cache hits on
1432
+ * the common prefix even when the per-session tail varies. CC's
1433
+ * proven pattern uses a single sentinel; Pugi adopts the same shape
1434
+ * so cache infra is trivially interoperable.
1435
+ *
1436
+ * Output shape:
1437
+ * <staticBlock1>
1438
+ * <staticBlock2>
1439
+ * __PUGI_DYNAMIC_BOUNDARY__
1440
+ * <dynamicBlock1>
1441
+ * <dynamicBlock2>
1442
+ *
1443
+ * Empty blocks drop cleanly. If EITHER side ends up empty after the
1444
+ * filter, the marker is omitted so the prompt has no orphan sentinel
1445
+ * — caches treat "no boundary" as "everything is static / dynamic"
1446
+ * with deterministic behaviour.
1447
+ */
1448
+ export const PUGI_DYNAMIC_BOUNDARY = '__PUGI_DYNAMIC_BOUNDARY__';
1449
+ /**
1450
+ * Sentinel-injection guard. The Anvil cache layer locates the split
1451
+ * via grep на the literal `__PUGI_DYNAMIC_BOUNDARY__`. If either half
1452
+ * already contains the sentinel — most likely via a PUGI.md fragment
1453
+ * that documents the boundary mechanism itself, or через operator
1454
+ * @import-pulled content — the grep would mis-split and corrupt the
1455
+ * cache key. Hard-fail loud rather than silently emit a poisoned
1456
+ * prompt. Operators who legitimately need the literal string in
1457
+ * prompt context can rename their copy (e.g. `PUGI_DYNAMIC_BOUNDARY_LITERAL`)
1458
+ * or use the runtime constant export directly via code.
1459
+ */
1460
+ export class SentinelInjectionError extends Error {
1461
+ side;
1462
+ constructor(side) {
1463
+ super(`Refusing to compose system prompt: ${side} side contains the ` +
1464
+ `literal sentinel "${PUGI_DYNAMIC_BOUNDARY}". This would corrupt ` +
1465
+ `the Anvil prefix-cache split. Rename the offending occurrence ` +
1466
+ `или strip it before composing.`);
1467
+ this.side = side;
1468
+ this.name = 'SentinelInjectionError';
1469
+ }
1470
+ }
1471
+ export function composeSystemPromptWithBoundary(staticBlocks, dynamicBlocks) {
1472
+ const staticPart = composeSystemPrompt(staticBlocks);
1473
+ const dynamicPart = composeSystemPrompt(dynamicBlocks);
1474
+ // Sentinel-injection guard — refuse loud rather than mis-split cache.
1475
+ if (staticPart.includes(PUGI_DYNAMIC_BOUNDARY)) {
1476
+ throw new SentinelInjectionError('static');
1477
+ }
1478
+ if (dynamicPart.includes(PUGI_DYNAMIC_BOUNDARY)) {
1479
+ throw new SentinelInjectionError('dynamic');
1480
+ }
1481
+ if (staticPart.length === 0)
1482
+ return dynamicPart;
1483
+ if (dynamicPart.length === 0)
1484
+ return staticPart;
1485
+ return `${staticPart}\n\n${PUGI_DYNAMIC_BOUNDARY}\n\n${dynamicPart}`;
1486
+ }
1487
+ /**
1488
+ * Pugi backlog — resolve the effective model hint forwarded to
1489
+ * the runtime. Precedence:
1490
+ *
1491
+ * 1. Operator-pinned `model` option (constructor arg) wins outright.
1492
+ * `pugi code --model foo` always takes precedence over the dial.
1493
+ * 2. Intensity profile's `modelTag` resolves via
1494
+ * `PUGI_INTENSITY_MODEL_<TAG>` env (LIGHT / STANDARD / HEAVY).
1495
+ * Operators pin "what does 'standard' mean on this machine" via
1496
+ * env so the dial stays portable across providers.
1497
+ * 3. Absent both => undefined; the admin-api falls back to the
1498
+ * persona's default model (the legacy pre-#163 path).
1499
+ *
1500
+ * Returns undefined when no hint is available so the runtime sees the
1501
+ * absence of the field rather than an empty string — matches the
1502
+ * `engineLoopServerRequestSchema.model.optional()` contract.
1503
+ */
1504
+ export function resolveIntensityModel(operatorPin, profile) {
1505
+ if (operatorPin !== undefined && operatorPin !== '')
1506
+ return operatorPin;
1507
+ if (!profile)
1508
+ return undefined;
1509
+ const envKey = `PUGI_INTENSITY_MODEL_${profile.modelTag.toUpperCase()}`;
1510
+ const fromEnv = process.env[envKey];
1511
+ if (fromEnv !== undefined && fromEnv !== '')
1512
+ return fromEnv;
1513
+ return undefined;
1514
+ }
1515
+ /**
1516
+ * Backlog : expand `@import` directives across every
1517
+ * file the `walkUpPugiMd` walker discovered. Each parent file's body
1518
+ * is replaced with its post-import body (frontmatter stripped, import
1519
+ * lines removed); imported children are appended to the hierarchy at
1520
+ * the same `level` as their parent so the existing render order
1521
+ * (shallow-to-deep) stays intact and the model sees the operator's
1522
+ * `@import`-pulled rules in source order.
1523
+ *
1524
+ * Failures are localised: if a single file's load throws (cycle, hop
1525
+ * cap, byte cap, etc.) we keep the walker's original body for that
1526
+ * level and move on. Ambient context is enrichment, not a gate — one
1527
+ * malformed CLAUDE.md must never break the engine boot.
1528
+ */
1529
+ async function expandHierarchyWithImports(hierarchy, cwd) {
1530
+ const out = [];
1531
+ const home = osHomedir();
1532
+ for (const file of hierarchy) {
1533
+ try {
1534
+ const rules = await loadRulesFile(file.path, {
1535
+ cwd,
1536
+ homedir: home,
1537
+ });
1538
+ // First rule is always the entry file itself. Replace the body
1539
+ // with the post-expansion body so the rendered ambient block
1540
+ // omits the `@import` directives but keeps everything else.
1541
+ const head = rules[0];
1542
+ if (head) {
1543
+ out.push({
1544
+ ...file,
1545
+ content: head.body,
1546
+ });
1547
+ }
1548
+ else {
1549
+ out.push(file);
1550
+ }
1551
+ // Append imported children at the same level. They are not on
1552
+ // disk in the parent dir, but the operator authored the link so
1553
+ // surfacing them at the parent's specificity matches the
1554
+ // ambient-context render contract.
1555
+ for (let i = 1; i < rules.length; i += 1) {
1556
+ const child = rules[i];
1557
+ if (!child)
1558
+ continue;
1559
+ out.push({
1560
+ path: child.path,
1561
+ content: child.body,
1562
+ level: file.level,
1563
+ source: file.source,
1564
+ truncated: false,
1565
+ rawBytes: Buffer.byteLength(child.body, 'utf8'),
1566
+ });
1567
+ }
1568
+ }
1569
+ catch {
1570
+ // Localised failure: keep the walker's original body for this
1571
+ // file and skip its imports. The next file in the hierarchy is
1572
+ // tried independently.
1573
+ out.push(file);
1574
+ }
1575
+ }
1576
+ return out;
1577
+ }
1578
+ export function computeVerificationOutcome(input) {
1579
+ const { ledger, baseStatus, finalText, filesChanged } = input;
1580
+ const verificationCommands = ledger.map((entry) => entry.command);
1581
+ const failures = ledger
1582
+ .filter((entry) => entry.exitCode !== 0)
1583
+ .map((entry) => ({
1584
+ command: entry.command,
1585
+ exitCode: entry.exitCode,
1586
+ tailStderr: entry.tailStderr,
1587
+ }));
1588
+ // Verification PASS only when at least one verification call ran AND
1589
+ // the most recent (chronologically last) verification exited zero.
1590
+ // The "most recent" rule lets the agent intentionally retry a failed
1591
+ // verification — only the final state matters.
1592
+ const lastCall = ledger.length > 0 ? ledger[ledger.length - 1] : undefined;
1593
+ const ranAny = ledger.length > 0;
1594
+ const lastPassed = lastCall !== undefined && lastCall.exitCode === 0;
1595
+ const anyFailed = failures.length > 0;
1596
+ const verified = ranAny && lastPassed && !anyFailed;
1597
+ // Status precedence:
1598
+ // verification_command_failed > base failure modes > needs_verification > done
1599
+ // Override `baseStatus` ONLY when verification failed (the
1600
+ // agent's loop may have ended `completed` while a test failed) OR
1601
+ // when `baseStatus === 'done'` and no verification ran (the
1602
+ // engine completed but produced no signal of correctness).
1603
+ let status;
1604
+ let unverifiedReason;
1605
+ if (anyFailed) {
1606
+ status = 'failed';
1607
+ unverifiedReason = 'verification_command_failed';
1608
+ }
1609
+ else if (!ranAny && baseStatus === 'done') {
1610
+ status = 'needs_verification';
1611
+ unverifiedReason = 'no_verification_command_run';
1612
+ }
1613
+ else if (baseStatus !== 'done') {
1614
+ status = baseStatus;
1615
+ if (!verified)
1616
+ unverifiedReason = 'verification_inconclusive';
1617
+ }
1618
+ else {
1619
+ status = 'done';
1620
+ }
1621
+ // Regression ownership dispute heuristic. Only meaningful when a
1622
+ // verification command failed; keep the predicate simple and
1623
+ // documented so a future reviewer can audit the false-positive
1624
+ // surface.
1625
+ let regressionOwnershipDispute = false;
1626
+ if (anyFailed && filesChanged.length > 0 && finalText !== '') {
1627
+ const lower = finalText.toLowerCase();
1628
+ const disputed = REGRESSION_DISPUTE_PHRASES.some((phrase) => lower.includes(phrase));
1629
+ if (disputed && agentTouchedFailingModule(filesChanged, failures)) {
1630
+ regressionOwnershipDispute = true;
1631
+ }
1632
+ }
1633
+ return {
1634
+ status,
1635
+ verified,
1636
+ verificationCommands,
1637
+ verificationFailures: failures,
1638
+ ...(unverifiedReason !== undefined ? { unverifiedReason } : {}),
1639
+ regressionOwnershipDispute,
1640
+ };
1641
+ }
1642
+ /**
1643
+ * Predicate: at least one mutated file shares a top-level module
1644
+ * directory with a path referenced in any verification failure's
1645
+ * stderr tail. The rule is intentionally loose ("same dir + same
1646
+ * basename without extension or .test./.spec. infix") so it
1647
+ * catches the typical `src/foo.ts` ↔ `src/foo.test.ts` pairing
1648
+ * without overfitting to one test runner's stack-trace format.
1649
+ *
1650
+ * Implementation: extract every `src/...`-shaped path mention from
1651
+ * each failure's stderr tail, then check whether ANY mutated file
1652
+ * shares a module key with ANY mentioned path. The module key
1653
+ * strips the trailing filename's extension AND any `.test.` /
1654
+ * `.spec.` infix so the pair resolves to the same key.
1655
+ */
1656
+ function agentTouchedFailingModule(filesChanged, failures) {
1657
+ const stderrJoined = failures.map((f) => f.tailStderr).join('\n');
1658
+ if (stderrJoined === '')
1659
+ return false;
1660
+ // Match common test-runner path shapes: `src/foo/bar.ts`,
1661
+ // `apps/x/test/y.spec.ts`, `packages/z/baz.test.ts`. Not
1662
+ // exhaustive — false negatives are acceptable here because the
1663
+ // predicate's job is to FLAG dispute, not enforce it.
1664
+ const pathMentions = new Set();
1665
+ const pathRegex = /(?:^|[\s(])((?:src|app|apps|test|tests|lib|packages)\/[\w./-]+\.[a-zA-Z]+)/g;
1666
+ for (const match of stderrJoined.matchAll(pathRegex)) {
1667
+ const captured = match[1];
1668
+ if (typeof captured === 'string' && captured.length > 0) {
1669
+ pathMentions.add(captured);
1670
+ }
1671
+ }
1672
+ if (pathMentions.size === 0)
1673
+ return false;
1674
+ // Module key strips the trailing filename's extension (and any
1675
+ // `.test.` / `.spec.` infix) so `src/existing.ts` and
1676
+ // `src/existing.test.ts` resolve to the same key. Keep the full
1677
+ // directory path plus the bare basename (no ext) — this catches
1678
+ // the typical `foo.ts` ↔ `foo.test.ts` pairing in the same dir
1679
+ // without overfitting to one test-runner convention.
1680
+ const moduleKey = (p) => {
1681
+ const segments = p.split('/').filter(Boolean);
1682
+ if (segments.length === 0)
1683
+ return '';
1684
+ const lastIndex = segments.length - 1;
1685
+ const bareLast = segments[lastIndex]
1686
+ .replace(/\.(spec|test)\./, '.')
1687
+ .replace(/\.[a-zA-Z][a-zA-Z0-9]*$/, '');
1688
+ const dir = segments.slice(0, lastIndex).join('/');
1689
+ return dir === '' ? bareLast : `${dir}/${bareLast}`;
1690
+ };
1691
+ const failingModuleKeys = new Set();
1692
+ for (const mention of pathMentions) {
1693
+ const key = moduleKey(mention);
1694
+ if (key !== '')
1695
+ failingModuleKeys.add(key);
1696
+ }
1697
+ if (failingModuleKeys.size === 0)
1698
+ return false;
1699
+ for (const file of filesChanged) {
1700
+ const key = moduleKey(file);
1701
+ if (failingModuleKeys.has(key))
1702
+ return true;
1703
+ }
1704
+ return false;
1705
+ }
374
1706
  //# sourceMappingURL=native-pugi.js.map