@pugi/cli 0.1.0-beta.12 → 0.1.0-beta.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/dist/core/consensus/diff-capture.js +73 -0
  2. package/dist/core/context/index.js +7 -0
  3. package/dist/core/context/markdown-traverse.js +255 -0
  4. package/dist/core/edits/dispatch.js +218 -2
  5. package/dist/core/edits/journal.js +199 -0
  6. package/dist/core/edits/layer-d-ast.js +557 -14
  7. package/dist/core/edits/verify-hook.js +273 -0
  8. package/dist/core/engine/anvil-client.js +99 -5
  9. package/dist/core/engine/context-prefix.js +155 -0
  10. package/dist/core/engine/intent.js +260 -0
  11. package/dist/core/engine/native-pugi.js +663 -249
  12. package/dist/core/engine/prompts.js +52 -2
  13. package/dist/core/engine/tool-bridge.js +311 -9
  14. package/dist/core/lsp/client.js +57 -0
  15. package/dist/core/mcp/client.js +9 -0
  16. package/dist/core/mcp/http-server.js +553 -0
  17. package/dist/core/mcp/permission.js +190 -0
  18. package/dist/core/mcp/server-tools.js +219 -0
  19. package/dist/core/mcp/server.js +397 -0
  20. package/dist/core/repl/history.js +11 -1
  21. package/dist/core/repl/model-pricing.js +135 -0
  22. package/dist/core/repl/session.js +328 -12
  23. package/dist/core/repl/slash-commands.js +18 -4
  24. package/dist/core/settings.js +43 -0
  25. package/dist/core/subagents/dispatcher-real.js +600 -0
  26. package/dist/core/subagents/dispatcher.js +113 -24
  27. package/dist/core/subagents/index.js +18 -5
  28. package/dist/core/subagents/isolation-matrix.js +213 -0
  29. package/dist/core/subagents/spawn.js +19 -4
  30. package/dist/core/transport/version-interceptor.js +166 -0
  31. package/dist/index.js +28 -0
  32. package/dist/runtime/bootstrap.js +190 -0
  33. package/dist/runtime/cli.js +859 -269
  34. package/dist/runtime/commands/lsp.js +165 -5
  35. package/dist/runtime/commands/mcp.js +537 -0
  36. package/dist/runtime/commands/review-consensus.js +17 -2
  37. package/dist/runtime/headless.js +543 -0
  38. package/dist/runtime/load-hooks-or-exit.js +71 -0
  39. package/dist/runtime/version.js +65 -0
  40. package/dist/tools/agent-tool.js +192 -0
  41. package/dist/tools/apply-patch.js +62 -1
  42. package/dist/tools/mcp-tool.js +260 -0
  43. package/dist/tools/multi-edit.js +361 -0
  44. package/dist/tools/registry.js +5 -0
  45. package/dist/tools/web-fetch.js +147 -2
  46. package/dist/tools/web-search.js +458 -0
  47. package/dist/tui/agent-tree.js +10 -0
  48. package/dist/tui/ask-modal.js +2 -2
  49. package/dist/tui/conversation-pane.js +1 -1
  50. package/dist/tui/input-box.js +1 -1
  51. package/dist/tui/markdown-render.js +4 -4
  52. package/dist/tui/repl-render.js +105 -15
  53. package/dist/tui/repl-splash.js +2 -2
  54. package/dist/tui/repl.js +10 -4
  55. package/dist/tui/splash.js +1 -1
  56. package/dist/tui/status-bar.js +94 -16
  57. package/dist/tui/update-banner.js +20 -2
  58. package/package.json +5 -4
@@ -8,6 +8,9 @@ import { AnvilEngineLoopClient } from '../core/engine/anvil-client.js';
8
8
  import { NoopEngineAdapter } from '../core/engine/noop.js';
9
9
  import { NativePugiEngineAdapter } from '../core/engine/native-pugi.js';
10
10
  import { decidePermission } from '../core/permission.js';
11
+ import { loadMcpRegistry } from '../core/mcp/registry.js';
12
+ import { loadHookRegistryOrExit } from './load-hooks-or-exit.js';
13
+ import { defaultNonInteractiveMcpPrompt } from '../tools/mcp-tool.js';
11
14
  import { openSession, recordCommandCompleted, recordCommandStarted, recordToolCall, recordToolResult, } from '../core/session.js';
12
15
  import { loadSettings } from '../core/settings.js';
13
16
  import { FileReadCache } from '../core/file-cache.js';
@@ -36,6 +39,7 @@ import { runPatchCommand } from './commands/patch.js';
36
39
  import { runWorktreeCommand } from './commands/worktree.js';
37
40
  import { resolveWorkspaceLabel } from '../core/repl/workspace-context.js';
38
41
  import { runReviewConsensus } from './commands/review-consensus.js';
42
+ import { runMcpCommand } from './commands/mcp.js';
39
43
  import { DECOMPOSE_PROMPT_SUFFIX, parseDecompositionFromText, writeDecomposition, } from './plan-decompose.js';
40
44
  import { FtsSyntaxError, SqliteSessionStore, resolveProjectStoreDir } from '../core/repl/store/index.js';
41
45
  import { slugForCwd } from '../core/repl/history.js';
@@ -51,37 +55,15 @@ import { dispatchEdit, } from '../core/edits/index.js';
51
55
  * packages/pugi-sdk/package.json); the publish workflow validates the
52
56
  * three are in lockstep.
53
57
  */
54
- /**
55
- * β1 housekeeping (#51): defensive semver sanitizer. If a future
56
- * refactor moves PUGI_CLI_VERSION reading to a JSON import (resolveJson)
57
- * the npm publish pipeline can leak `workspace:*` from a partially-bumped
58
- * package.json — `npm publish` rewrites these but a local `pnpm pack`
59
- * does not, and the failure mode is silently shipping an unsemver
60
- * version that breaks `pugi --version` JSON consumers. Sanitize at the
61
- * read site so even a leaked literal lands as a deterministic
62
- * "0.0.0-unknown" rather than `workspace:*`.
63
- */
64
- function sanitizeSemver(raw) {
65
- if (typeof raw !== 'string')
66
- return '0.0.0-unknown';
67
- const trimmed = raw.trim();
68
- if (!trimmed)
69
- return '0.0.0-unknown';
70
- // Strip a `workspace:` / `npm:` / `file:` protocol prefix that pnpm
71
- // can emit when a partial publish runs.
72
- const stripped = trimmed.replace(/^(workspace:|npm:|file:)/, '');
73
- // Accept anything that begins with major.minor.patch + optional
74
- // prerelease/build per semver 2.0. Reject `*`, `^x`, `~x`, ranges, etc.
75
- if (/^\d+\.\d+\.\d+(?:[-+][0-9A-Za-z.-]+)?$/.test(stripped)) {
76
- return stripped;
77
- }
78
- return '0.0.0-unknown';
79
- }
80
- // Main bumped to 0.1.0-beta.9 (PR #430 REPL-hang fix). β1a r1 rebase
81
- // preserves the main bump and runs it through the β1 sanitizer added
82
- // here so a future workspace:* leak from a partial publish lands as
83
- // "0.0.0-unknown" instead of corrupting `pugi --version` JSON output.
84
- const PUGI_CLI_VERSION = sanitizeSemver("0.1.0-beta.12");
58
+ // PR-CLI-SERVER-VERSION-HANDSHAKE (#225). PUGI_CLI_VERSION lives in
59
+ // `runtime/version.ts` now so the engine transport interceptor can
60
+ // import it without dragging in the cli.ts module graph. Re-exported
61
+ // here under the original name so every existing reader (`pugi version`,
62
+ // `pugi doctor --json`, splash render, telemetry) keeps working with
63
+ // zero churn. Bumping the CLI version is still a single-file edit —
64
+ // just on `runtime/version.ts` instead of here. The β1 sanitizer that
65
+ // guarded against `workspace:*` leaks moved with the constant.
66
+ import { PUGI_CLI_VERSION, sanitizeSemver } from './version.js';
85
67
  const handlers = {
86
68
  accounts,
87
69
  agents: dispatchAgents,
@@ -103,6 +85,7 @@ const handlers = {
103
85
  login,
104
86
  logout,
105
87
  lsp: dispatchLsp,
88
+ mcp: dispatchMcp,
106
89
  patch: dispatchPatch,
107
90
  plan: runEngineTask('plan'),
108
91
  'plan-review': dispatchPlanReview,
@@ -421,6 +404,25 @@ async function dispatchLsp(args, flags, _session) {
421
404
  if (result.exitCode !== 0)
422
405
  process.exitCode = result.exitCode;
423
406
  }
407
+ /**
408
+ * β4 M6 + M7 + Sl7 (2026-05-26): `pugi mcp <sub>` — MCP execution +
409
+ * server. `list / trust / deny / install` manage the client-side
410
+ * registry (the same surface `pugi config mcp ...` exposes); `serve`
411
+ * boots Pugi-as-MCP-server over stdio (default) or HTTP+SSE; `perms`
412
+ * inspects + resets the per-(server, tool) permission cache that
413
+ * gates engine-loop dispatch.
414
+ *
415
+ * The serve sub-command never returns under normal conditions — the
416
+ * stdio path runs until stdin closes (parent agent disconnect) and the
417
+ * HTTP path runs until SIGINT/SIGTERM. Both honour the optional
418
+ * AbortSignal we pass through from the REPL slash bridge in β4b.
419
+ */
420
+ async function dispatchMcp(args, flags, _session) {
421
+ await runMcpCommand(args, {
422
+ workspaceRoot: process.cwd(),
423
+ writeOutput: (payload, text) => writeOutput(flags, payload, text),
424
+ });
425
+ }
424
426
  /**
425
427
  * α7.7: `pugi patch` — apply a unified-diff patch from stdin or a file.
426
428
  * Routes through the same security gate as the Layer A/B/C applicators
@@ -464,6 +466,37 @@ async function dispatchWorktree(args, flags, _session) {
464
466
  }
465
467
  export async function runCli(argv) {
466
468
  const { command, args, flags, isBareInvocation } = parseArgs(argv);
469
+ // β-headless dispatch (CEO directive 2026-05-27 "нужно тестирование по
470
+ // кругу"): when `--print <brief>` is set we route to the headless
471
+ // runner BEFORE the REPL / splash / command branches. The runner
472
+ // never mounts Ink, never opens raw stdin, never prints the splash
473
+ // — only the structured event stream lands on stdout. Same engine
474
+ // adapter path the REPL uses (no fork), only the output sink
475
+ // differs.
476
+ if (typeof flags.print === 'string') {
477
+ const { runHeadlessPrint } = await import('./headless.js');
478
+ // Default to NDJSON when stdout is not a TTY OR when --json is set
479
+ // explicitly. A human running `pugi --print "..."` in their
480
+ // terminal without flags gets the readable text sink; a pipe gets
481
+ // the machine-readable stream.
482
+ const wantJson = flags.json || !process.stdout.isTTY;
483
+ const headlessFactory = getEngineClientFactory();
484
+ const exitCode = await runHeadlessPrint({
485
+ prompt: flags.print,
486
+ json: wantJson,
487
+ cwd: flags.cwd ?? process.cwd(),
488
+ ...(flags.workspace ? { workspace: flags.workspace } : {}),
489
+ ...(flags.sessionId ? { sessionIdOverride: flags.sessionId } : {}),
490
+ ...(flags.timeoutSeconds ? { timeoutSeconds: flags.timeoutSeconds } : {}),
491
+ noTools: flags.noTools,
492
+ ...(flags.maxTurns ? { maxTurns: flags.maxTurns } : {}),
493
+ ...(headlessFactory ? { engineClientFactory: headlessFactory } : {}),
494
+ ...(headlessStdoutWriter ? { stdoutWrite: headlessStdoutWriter } : {}),
495
+ ...(headlessStderrWriter ? { stderrWrite: headlessStderrWriter } : {}),
496
+ });
497
+ process.exitCode = exitCode;
498
+ return;
499
+ }
467
500
  // Bare `pugi` on a TTY enters the REPL-by-default agentic session
468
501
  // (Sprint α5.7, ADR-0056). The REPL is the customer-facing surface
469
502
  // that brings Pugi to parity with Claude Code / Codex CLI. When the
@@ -538,6 +571,7 @@ function parseArgs(argv) {
538
571
  offline: false,
539
572
  noTty: false,
540
573
  allowFetch: false,
574
+ allowSearch: false,
541
575
  noUpdateCheck: false,
542
576
  noSplash: process.env.PUGI_SKIP_SPLASH === '1',
543
577
  // Claude triple-review P1 PR #369: default tool-stream pane HIDDEN
@@ -554,6 +588,9 @@ function parseArgs(argv) {
554
588
  : true,
555
589
  noDefaults: process.env.PUGI_INIT_NO_DEFAULTS === '1',
556
590
  decompose: false,
591
+ // β-headless: --no-tools default OFF so existing flag-free invocations
592
+ // keep tool advertisement. Flipped only by explicit operator opt-in.
593
+ noTools: false,
557
594
  };
558
595
  const args = [];
559
596
  // Sprint 2E: `pugi --version` / `-v` are universal install-test conventions
@@ -599,6 +636,12 @@ function parseArgs(argv) {
599
636
  else if (arg === '--allow-fetch') {
600
637
  flags.allowFetch = true;
601
638
  }
639
+ else if (arg === '--allow-search') {
640
+ // β1b T4 (2026-05-26): unlock the `web_search` tool for one
641
+ // invocation, mirroring the `--allow-fetch` gate. Distinct flag
642
+ // because an operator may want to query without fetching pages.
643
+ flags.allowSearch = true;
644
+ }
602
645
  else if (arg === '--no-update-check') {
603
646
  flags.noUpdateCheck = true;
604
647
  }
@@ -634,6 +677,114 @@ function parseArgs(argv) {
634
677
  flags.privacy = parsePrivacyMode(next);
635
678
  index += 1;
636
679
  }
680
+ else if (arg === '--print') {
681
+ // β-headless: top-level `--print <brief>` runs a single
682
+ // non-interactive engine turn. Consumes the next argv token as
683
+ // the brief — refusing if it looks like another flag so a
684
+ // dangling `--print --json` does not silently swallow `--json`.
685
+ const next = argv[index + 1];
686
+ if (!next || next.startsWith('--')) {
687
+ throw new Error('--print requires a brief (e.g. --print "create word_counter.py")');
688
+ }
689
+ flags.print = next;
690
+ index += 1;
691
+ }
692
+ else if (arg.startsWith('--print=')) {
693
+ flags.print = arg.slice('--print='.length);
694
+ }
695
+ else if (arg === '--cwd') {
696
+ const next = argv[index + 1];
697
+ if (!next || next.startsWith('--'))
698
+ throw new Error('--cwd requires a path');
699
+ flags.cwd = next;
700
+ index += 1;
701
+ }
702
+ else if (arg.startsWith('--cwd=')) {
703
+ flags.cwd = arg.slice('--cwd='.length);
704
+ }
705
+ else if (arg === '--workspace') {
706
+ const next = argv[index + 1];
707
+ if (!next || next.startsWith('--'))
708
+ throw new Error('--workspace requires a slug');
709
+ flags.workspace = next;
710
+ index += 1;
711
+ }
712
+ else if (arg.startsWith('--workspace=')) {
713
+ flags.workspace = arg.slice('--workspace='.length);
714
+ }
715
+ else if (arg === '--session') {
716
+ const next = argv[index + 1];
717
+ if (!next || next.startsWith('--'))
718
+ throw new Error('--session requires an id');
719
+ flags.sessionId = next;
720
+ index += 1;
721
+ }
722
+ else if (arg.startsWith('--session=')) {
723
+ flags.sessionId = arg.slice('--session='.length);
724
+ }
725
+ else if (arg === '--timeout') {
726
+ const next = argv[index + 1];
727
+ if (!next || next.startsWith('--'))
728
+ throw new Error('--timeout requires seconds');
729
+ const parsed = Number(next);
730
+ if (!Number.isFinite(parsed) || parsed <= 0) {
731
+ throw new Error(`--timeout requires positive seconds, got "${next}"`);
732
+ }
733
+ flags.timeoutSeconds = parsed;
734
+ index += 1;
735
+ }
736
+ else if (arg.startsWith('--timeout=')) {
737
+ const raw = arg.slice('--timeout='.length);
738
+ const parsed = Number(raw);
739
+ if (!Number.isFinite(parsed) || parsed <= 0) {
740
+ throw new Error(`--timeout requires positive seconds, got "${raw}"`);
741
+ }
742
+ flags.timeoutSeconds = parsed;
743
+ }
744
+ else if (arg === '--no-tools') {
745
+ flags.noTools = true;
746
+ }
747
+ else if (arg === '--max-turns') {
748
+ const next = argv[index + 1];
749
+ if (!next || next.startsWith('--'))
750
+ throw new Error('--max-turns requires an integer');
751
+ const parsed = Number(next);
752
+ if (!Number.isInteger(parsed) || parsed <= 0) {
753
+ throw new Error(`--max-turns requires positive integer, got "${next}"`);
754
+ }
755
+ flags.maxTurns = parsed;
756
+ index += 1;
757
+ }
758
+ else if (arg.startsWith('--max-turns=')) {
759
+ const raw = arg.slice('--max-turns='.length);
760
+ const parsed = Number(raw);
761
+ if (!Number.isInteger(parsed) || parsed <= 0) {
762
+ throw new Error(`--max-turns requires positive integer, got "${raw}"`);
763
+ }
764
+ flags.maxTurns = parsed;
765
+ }
766
+ else if (arg.startsWith('--commit=')) {
767
+ // `pugi review --triple --commit <SHA>` activates the multi-
768
+ // provider routing path against a specific revision.
769
+ flags.commit = arg.slice('--commit='.length);
770
+ }
771
+ else if (arg === '--commit') {
772
+ const next = argv[index + 1];
773
+ if (!next)
774
+ throw new Error('--commit requires a SHA or ref');
775
+ flags.commit = next;
776
+ index += 1;
777
+ }
778
+ else if (arg.startsWith('--base=')) {
779
+ flags.base = arg.slice('--base='.length);
780
+ }
781
+ else if (arg === '--base') {
782
+ const next = argv[index + 1];
783
+ if (!next)
784
+ throw new Error('--base requires a ref');
785
+ flags.base = next;
786
+ index += 1;
787
+ }
637
788
  else {
638
789
  args.push(arg);
639
790
  }
@@ -701,6 +852,9 @@ async function help(_args, flags, _session) {
701
852
  '',
702
853
  'Review gate:',
703
854
  ' pugi review --triple Prepare the Anvil-backed triple-review gate.',
855
+ ' pugi review --triple --commit <SHA>',
856
+ ' 3-model consensus via Anvil (Anthropic · OpenAI · Google).',
857
+ ' Optional: --base <ref> | "<prompt>". Quota: 1 slot per call.',
704
858
  ' pugi review --consensus 3-model consensus review (codex · claude · deepseek).',
705
859
  ' Optional: --commit <sha> | --pr <num> | --branch <name>.',
706
860
  ' Exits 0 PASS · 1 WARN · 2 BLOCK.',
@@ -853,6 +1007,9 @@ export async function scaffoldPugiWorkspace(input) {
853
1007
  mode: 'balanced',
854
1008
  telemetry: 'off',
855
1009
  },
1010
+ ui: {
1011
+ cyberZoo: 'on',
1012
+ },
856
1013
  artifacts: {
857
1014
  defaultPath: '.pugi/artifacts',
858
1015
  promoteExplicitly: true,
@@ -860,7 +1017,19 @@ export async function scaffoldPugiWorkspace(input) {
860
1017
  }, created, skipped);
861
1018
  writeJsonIfMissing(resolve(pugiDir, 'mcp.json'), {
862
1019
  schema: 1,
863
- servers: [],
1020
+ // 2026-05-27 dogfood: `servers` MUST be an object keyed by server
1021
+ // name (z.record(mcpServerConfigSchema) in
1022
+ // apps/pugi-cli/src/core/mcp/registry.ts:51). A bare `[]` array
1023
+ // here passed schema validation на pugi init exit но crashed
1024
+ // the next dispatch with
1025
+ // "MCP config at .pugi/mcp.json failed validation:
1026
+ // servers: Expected object, received array"
1027
+ // and the operator's first command after `pugi init` printed an
1028
+ // error banner before the actual reply. Empty object matches the
1029
+ // schema default and keeps the file forwards-compatible with
1030
+ // `pugi mcp install <name> ...` which merges into the same
1031
+ // record shape.
1032
+ servers: {},
864
1033
  }, created, skipped);
865
1034
  writeJsonIfMissing(resolve(pugiDir, 'index.json'), emptyIndex(), created, skipped);
866
1035
  writeTextIfMissing(resolve(pugiDir, 'PUGI.md'), [
@@ -1282,10 +1451,20 @@ async function review(args, flags, session) {
1282
1451
  // streaming UX and rubric-driven exit codes don't disturb the existing
1283
1452
  // pugi-cli surfaces that depend on the old shape.
1284
1453
  if (flags.consensus) {
1454
+ // 2026-05-27 (Codex r0 P1 on PR #489): pass the globally-parsed
1455
+ // --commit / --base flags to consensus so `pugi review --consensus
1456
+ // --commit X` reviews the requested SHA instead of silently falling
1457
+ // back to the working-tree diff. parseConsensusArgs gives the inline
1458
+ // args (`--commit Y` after the command name) precedence; the
1459
+ // fallback only fires when `args` does not carry the token.
1285
1460
  const exitCode = await runReviewConsensus(args, {
1286
1461
  cwd: root,
1287
1462
  config: resolveRuntimeConfig(),
1288
1463
  json: flags.json,
1464
+ flagsFallback: {
1465
+ ...(flags.commit ? { commit: flags.commit } : {}),
1466
+ ...(flags.base ? { base: flags.base } : {}),
1467
+ },
1289
1468
  emit: (line) => {
1290
1469
  if (!flags.json)
1291
1470
  process.stdout.write(line);
@@ -1297,6 +1476,15 @@ async function review(args, flags, session) {
1297
1476
  process.exitCode = exitCode;
1298
1477
  return;
1299
1478
  }
1479
+ if (flags.triple && flags.commit) {
1480
+ // CEO directive 2026-05-27: `pugi review --triple --commit <SHA>`
1481
+ // dispatches to the customer-facing 3-model consensus path through
1482
+ // Anvil's already-paid Anthropic / OpenAI / Google routes. Replaces
1483
+ // the dev-only Codex/Claude/Gemini OAuth CLIs the `/triple-review`
1484
+ // skill uses.
1485
+ await performTripleProviderReview(root, session, flags, prompt);
1486
+ return;
1487
+ }
1300
1488
  if (flags.triple && flags.remote) {
1301
1489
  await performRemoteTripleReview(root, session, flags, prompt);
1302
1490
  return;
@@ -1734,6 +1922,274 @@ async function performRemoteTripleReview(root, session, flags, prompt) {
1734
1922
  .join('\n'));
1735
1923
  process.exitCode = outcome.exitCode;
1736
1924
  }
1925
+ /**
1926
+ * `pugi review --triple --commit <SHA>` — customer-facing 3-model
1927
+ * consensus review via Anvil multi-provider routing.
1928
+ *
1929
+ * Dispatches the same diff to Anthropic / OpenAI / Google models
1930
+ * (routed through Anvil's already-paid fleet, NOT OAuth-bound dev
1931
+ * CLIs) and renders the per-reviewer verdict + cross-model
1932
+ * disagreement summary at the end. Quota: one `reviewPerMonth` slot
1933
+ * per call regardless of provider count — the controller-level
1934
+ * `@QuotaGated('reviewPerMonth')` decorator enforces single-slot
1935
+ * debit (see apps/admin-api/src/pugi/pugi.controller.ts).
1936
+ *
1937
+ * CEO directive 2026-05-27: replaces the dev-only `/triple-review`
1938
+ * skill's Codex/Claude/Gemini OAuth dependency with a customer-
1939
+ * runnable Pugi product surface. Dogfood loop: Pugi reviews Pugi PRs.
1940
+ */
1941
+ async function performTripleProviderReview(root, session, flags, prompt) {
1942
+ const config = resolveRuntimeConfig();
1943
+ const artifactDir = createArtifactDir(root, prompt || 'triple-providers');
1944
+ const requestPath = resolve(artifactDir, 'triple-review-request.json');
1945
+ const resultPath = resolve(artifactDir, 'triple-review-result.json');
1946
+ const summaryPath = resolve(artifactDir, 'triple-review.md');
1947
+ const toolCallId = recordToolCall(session, 'review:triple-providers', prompt || `review ${flags.commit ?? 'HEAD'} via providers`);
1948
+ // Resolve base ref. CLI flag wins over settings → so an operator
1949
+ // can target a specific integration branch without editing settings.
1950
+ const settings = loadSettings(root);
1951
+ const baseRef = flags.base ?? resolveBaseRef(root, settings) ?? 'origin/main';
1952
+ // Normalise both the commit and the base to short SHAs so the audit
1953
+ // log stores a stable reference even if branches move.
1954
+ const commitRef = flags.commit ?? 'HEAD';
1955
+ // 2026-05-27 (Codex r0 P2 on PR #489): safeGit returns '' on a bad ref
1956
+ // (it swallows the git exit code so callers don't have to wrap every
1957
+ // probe). Without an explicit refusal, a misspelled --commit or --base
1958
+ // produced an EMPTY diff that the gate then PASSED — operators saw a
1959
+ // green review for changes that were never reviewed. Resolve both refs
1960
+ // through `rev-parse --verify` first; an empty result is a hard error.
1961
+ const verifiedCommit = safeGit(root, ['rev-parse', '--verify', commitRef]).trim();
1962
+ if (!verifiedCommit) {
1963
+ throw new Error(`pugi review --triple: cannot resolve --commit '${commitRef}' — ` +
1964
+ `check the SHA or branch name. ` +
1965
+ `Refusing to submit an empty diff for review.`);
1966
+ }
1967
+ const verifiedBase = safeGit(root, ['rev-parse', '--verify', baseRef]).trim();
1968
+ if (!verifiedBase) {
1969
+ throw new Error(`pugi review --triple: cannot resolve --base '${baseRef}' — ` +
1970
+ `check the ref or set base via 'pugi config set review.base=<ref>'. ` +
1971
+ `Refusing to submit an empty diff for review.`);
1972
+ }
1973
+ const resolvedCommit = safeGit(root, ['rev-parse', '--short', commitRef]).trim() || commitRef;
1974
+ const mergeBase = safeGit(root, ['merge-base', baseRef, commitRef]).trim() || '';
1975
+ const diffRange = mergeBase || `${baseRef}..${commitRef}`;
1976
+ const diffArgs = ['diff', diffRange, commitRef, '--', '.', ...PROTECTED_DIFF_EXCLUDES];
1977
+ const diffStatArgs = ['diff', '--shortstat', diffRange, commitRef, '--', '.', ...PROTECTED_DIFF_EXCLUDES];
1978
+ const diffPatch = safeGit(root, diffArgs);
1979
+ const diffStats = parseDiffStats(safeGit(root, diffStatArgs));
1980
+ const requestBody = pugiTripleReviewRequestSchema.parse({
1981
+ schema: 1,
1982
+ workspace: {
1983
+ rootName: root.split('/').at(-1) ?? 'workspace',
1984
+ gitBranch: safeGit(root, ['branch', '--show-current']).trim() || null,
1985
+ gitHead: resolvedCommit || null,
1986
+ baseRef,
1987
+ dirty: Boolean(safeGit(root, ['status', '--short']).trim()),
1988
+ },
1989
+ diffPatch,
1990
+ diffStats,
1991
+ prompt: prompt || undefined,
1992
+ locale: 'en-US',
1993
+ reviewerPersona: 'oes-dev',
1994
+ commit: resolvedCommit,
1995
+ modelProviders: ['claude', 'gpt', 'gemini'],
1996
+ });
1997
+ writeFileSync(requestPath, `${JSON.stringify(requestBody, null, 2)}\n`, {
1998
+ encoding: 'utf8',
1999
+ mode: 0o600,
2000
+ });
2001
+ registerArtifact(root, {
2002
+ id: artifactIdFromDir(artifactDir),
2003
+ kind: 'triple-review',
2004
+ path: relative(root, artifactDir),
2005
+ sessionId: session.id,
2006
+ createdAt: new Date().toISOString(),
2007
+ files: ['triple-review-request.json'],
2008
+ });
2009
+ if (!config) {
2010
+ const reason = 'No active Pugi credentials. Run `pugi login --token <PAT>` or set PUGI_API_KEY for CI use.';
2011
+ recordToolResult(session, toolCallId, 'error', reason);
2012
+ writeFileSync(summaryPath, buildTripleReviewMarkdown({
2013
+ prompt,
2014
+ requestPath: relative(root, requestPath),
2015
+ verdict: null,
2016
+ reason,
2017
+ response: null,
2018
+ }), { encoding: 'utf8', mode: 0o600 });
2019
+ writeOutput(flags, {
2020
+ status: 'auth_missing',
2021
+ request: relative(root, requestPath),
2022
+ summary: relative(root, summaryPath),
2023
+ }, [
2024
+ 'Pugi triple-provider review request prepared but not sent — no active credentials.',
2025
+ `Request: ${relative(root, requestPath)}`,
2026
+ `Run \`pugi login --token <PAT>\` (or export PUGI_API_KEY for CI) then retry \`pugi review --triple --commit ${resolvedCommit}\`.`,
2027
+ ].join('\n'));
2028
+ process.exitCode = 5;
2029
+ return;
2030
+ }
2031
+ const submitResult = await submitTripleReview(config, requestBody);
2032
+ if (submitResult.status !== 'ok') {
2033
+ const outcome = describeSubmitFailure(submitResult);
2034
+ writeFileSync(summaryPath, buildTripleReviewMarkdown({
2035
+ prompt,
2036
+ requestPath: relative(root, requestPath),
2037
+ verdict: null,
2038
+ reason: outcome.message,
2039
+ response: null,
2040
+ }), { encoding: 'utf8', mode: 0o600 });
2041
+ recordToolResult(session, toolCallId, 'error', outcome.message);
2042
+ writeOutput(flags, {
2043
+ status: submitResult.status,
2044
+ code: submitResult.code,
2045
+ message: outcome.message,
2046
+ request: relative(root, requestPath),
2047
+ summary: relative(root, summaryPath),
2048
+ }, [
2049
+ outcome.headline,
2050
+ `Request: ${relative(root, requestPath)}`,
2051
+ `Summary: ${relative(root, summaryPath)}`,
2052
+ outcome.next ? `Next: ${outcome.next}` : '',
2053
+ ]
2054
+ .filter(Boolean)
2055
+ .join('\n'));
2056
+ process.exitCode = outcome.exitCode;
2057
+ return;
2058
+ }
2059
+ const response = submitResult.response;
2060
+ persistTripleReviewResult(resultPath, response);
2061
+ writeFileSync(summaryPath, buildTripleReviewMarkdown({
2062
+ prompt,
2063
+ requestPath: relative(root, requestPath),
2064
+ verdict: response.verdict,
2065
+ reason: response.reason,
2066
+ response,
2067
+ }), { encoding: 'utf8', mode: 0o600 });
2068
+ recordToolResult(session, toolCallId, response.verdict === 'BLOCK' ? 'error' : 'success', `Verdict: ${response.verdict} (${response.reason})`);
2069
+ const verdictReport = renderTripleProviderVerdict({
2070
+ response,
2071
+ commit: resolvedCommit,
2072
+ baseRef,
2073
+ });
2074
+ writeOutput(flags, {
2075
+ status: 'completed',
2076
+ verdict: response.verdict,
2077
+ reason: response.reason,
2078
+ counts: response.counts,
2079
+ reviewerCount: response.reviewerCount,
2080
+ effectiveTier: response.effectiveTier,
2081
+ commit: resolvedCommit,
2082
+ baseRef,
2083
+ reviewers: response.reviewers.map((r) => ({
2084
+ provider: r.provider ?? null,
2085
+ model: r.model,
2086
+ declaredVerdict: r.declaredVerdict,
2087
+ findings: r.findings,
2088
+ latencyMs: r.latencyMs,
2089
+ tokensUsed: r.tokensUsed,
2090
+ error: r.error,
2091
+ })),
2092
+ result: relative(root, resultPath),
2093
+ summary: relative(root, summaryPath),
2094
+ }, verdictReport);
2095
+ if (response.verdict === 'BLOCK') {
2096
+ process.exitCode = 9;
2097
+ }
2098
+ else if (response.verdict === 'WARN') {
2099
+ process.exitCode = 1;
2100
+ }
2101
+ }
2102
+ /**
2103
+ * Pretty-printer for the `pugi review --triple --commit <SHA>` verdict.
2104
+ * Mirrors the `/triple-review` skill's verdict block (per-reviewer
2105
+ * counts table → final GATE line → per-reviewer verbatim → cross-
2106
+ * model disagreement summary → tokens/cost note) so the output is
2107
+ * familiar to operators who already use the dev-only skill.
2108
+ */
2109
+ export function renderTripleProviderVerdict(input) {
2110
+ const { response, commit, baseRef } = input;
2111
+ const divider = '═'.repeat(68);
2112
+ const subDivider = '─'.repeat(68);
2113
+ // Per-reviewer counts table.
2114
+ const reviewerRows = response.reviewers.map((reviewer) => {
2115
+ const c = { P0: 0, P1: 0, P2: 0, P3: 0 };
2116
+ for (const f of reviewer.findings)
2117
+ c[f.severity] += 1;
2118
+ const status = reviewer.error
2119
+ ? 'ERROR'
2120
+ : reviewer.declaredVerdict ?? 'UNKNOWN';
2121
+ const label = reviewer.provider
2122
+ ? reviewer.provider.toUpperCase().padEnd(8)
2123
+ : reviewer.model.slice(0, 8).padEnd(8);
2124
+ return ` ${label} ${pad(c.P0)} ${pad(c.P1)} ${pad(c.P2)} ${pad(c.P3)} ${status}`;
2125
+ });
2126
+ // Cross-model disagreement: list severities flagged by 1 of N but not
2127
+ // the others. Surfaces the "highest-signal moment" per the skill.
2128
+ const disagreements = [];
2129
+ const allFindings = response.reviewers.flatMap((r) => r.findings.map((f) => ({
2130
+ provider: r.provider ?? r.model,
2131
+ severity: f.severity,
2132
+ line: f.line,
2133
+ issue: f.issue,
2134
+ })));
2135
+ const p1Flaggers = new Set(response.reviewers
2136
+ .filter((r) => r.findings.some((f) => f.severity === 'P1'))
2137
+ .map((r) => r.provider ?? r.model));
2138
+ if (p1Flaggers.size === 1) {
2139
+ const sole = [...p1Flaggers][0];
2140
+ disagreements.push(`Only ${sole} flagged a P1 — examine the disagreement, often the highest-signal moment.`);
2141
+ }
2142
+ const p0Flaggers = new Set(response.reviewers
2143
+ .filter((r) => r.findings.some((f) => f.severity === 'P0'))
2144
+ .map((r) => r.provider ?? r.model));
2145
+ if (p0Flaggers.size > 0 && p0Flaggers.size < response.reviewers.length) {
2146
+ disagreements.push(`P0 flagged by ${[...p0Flaggers].join(', ')} but not ${response.reviewers
2147
+ .filter((r) => !p0Flaggers.has(r.provider ?? r.model))
2148
+ .map((r) => r.provider ?? r.model)
2149
+ .join(', ')} — verify the finding before merging.`);
2150
+ }
2151
+ // Tokens / cost summary. Tokens are best-effort (some providers
2152
+ // return null). Cost is a placeholder pending billing wire-up; we
2153
+ // surface the quota note inline so the operator knows it counts as
2154
+ // one slot, not three.
2155
+ const totalTokens = response.reviewers.reduce((sum, r) => sum + (r.tokensUsed ?? 0), 0);
2156
+ // Verbatim reviewer outputs. Each section gets a header so operators
2157
+ // can scroll quickly and copy any individual reviewer's text into
2158
+ // their own notes / triage doc.
2159
+ const reviewerSections = response.reviewers.map((reviewer) => {
2160
+ const label = reviewer.provider
2161
+ ? reviewer.provider.toUpperCase()
2162
+ : reviewer.model;
2163
+ const body = reviewer.error
2164
+ ? `(reviewer errored: ${reviewer.error})`
2165
+ : reviewer.rawContent.trim() || '(empty response)';
2166
+ return [subDivider, `${label} SAYS (${reviewer.model}):`, '', body].join('\n');
2167
+ });
2168
+ return [
2169
+ `PUGI TRIPLE-PROVIDER REVIEW — commit ${commit} vs ${baseRef}`,
2170
+ divider,
2171
+ '',
2172
+ ` P0 P1 P2 P3 Status`,
2173
+ ...reviewerRows,
2174
+ '',
2175
+ `GATE: ${response.verdict}`,
2176
+ `Reason: ${response.reason}`,
2177
+ '',
2178
+ ...reviewerSections,
2179
+ '',
2180
+ subDivider,
2181
+ 'CROSS-MODEL DISAGREEMENT:',
2182
+ disagreements.length === 0
2183
+ ? ' (none — all reviewers agreed within rubric tolerance)'
2184
+ : disagreements.map((d) => ` - ${d}`).join('\n'),
2185
+ '',
2186
+ `Tokens: ~${totalTokens} total across ${response.reviewers.length} reviewers`,
2187
+ 'Quota: charged as 1 review slot (multi-provider counts as a single call).',
2188
+ ].join('\n');
2189
+ }
2190
+ function pad(n) {
2191
+ return String(n).padStart(2, ' ');
2192
+ }
1737
2193
  function describeSubmitFailure(result) {
1738
2194
  switch (result.status) {
1739
2195
  case 'endpoint_missing':
@@ -2293,6 +2749,33 @@ let engineClientFactory = null;
2293
2749
  export function setEngineClientFactory(factory) {
2294
2750
  engineClientFactory = factory;
2295
2751
  }
2752
+ /**
2753
+ * β-headless test seam: surface the module-scoped engine client factory
2754
+ * to sibling runtime modules (`headless.ts`) so the same fixture
2755
+ * injection that `setEngineClientFactory` provides for the
2756
+ * `runEngineTask` path applies to `pugi --print` runs. Production
2757
+ * callers never read this — the factory is `null` and falls through
2758
+ * to the real `AnvilEngineLoopClient`.
2759
+ */
2760
+ export function getEngineClientFactory() {
2761
+ return engineClientFactory;
2762
+ }
2763
+ /**
2764
+ * β-headless test seam: optional stdout/stderr writers injected for
2765
+ * `pugi --print` runs. When set, the headless runner forwards every
2766
+ * NDJSON line / human-readable chunk to these closures instead of the
2767
+ * real `process.stdout.write` / `process.stderr.write`. Needed because
2768
+ * `node:test`'s worker pool hijacks `process.stdout` for a binary IPC
2769
+ * channel — a captureStdio override would race the runner's frames
2770
+ * and surface as `Unexpected token '\x0F'` JSON parse failures in spec
2771
+ * assertions. Production never sets these.
2772
+ */
2773
+ let headlessStdoutWriter = null;
2774
+ let headlessStderrWriter = null;
2775
+ export function setHeadlessWriters(writers) {
2776
+ headlessStdoutWriter = writers.stdout ?? null;
2777
+ headlessStderrWriter = writers.stderr ?? null;
2778
+ }
2296
2779
  function runEngineTask(kind) {
2297
2780
  return async (args, flags, session) => {
2298
2781
  const label = commandLabel(kind);
@@ -2395,264 +2878,371 @@ function runEngineTask(kind) {
2395
2878
  throw new Error('internal: engine config missing after offline gate');
2396
2879
  }
2397
2880
  const client = engineClientFactory ? engineClientFactory(config) : new AnvilEngineLoopClient(config);
2398
- const adapter = new NativePugiEngineAdapter({ client, session });
2881
+ // β1b r1 (--allow-fetch / --allow-search wiring, 2026-05-26):
2882
+ // forward operator flags to the adapter so the schema-advertise +
2883
+ // executor-dispatch gates see the OR of (settings.json flag, CLI
2884
+ // flag). PR #425 r1 Backend Architect: the comment at
2885
+ // `tool-bridge.ts:740` documented `--allow-fetch` but the flag was
2886
+ // never wired into the adapter constructor — fix lands here.
2887
+ //
2888
+ // β4 r2 P1 #3 — load the MCP registry pre-run so the engine's
2889
+ // tool-bridge advertises every trusted server's tools under
2890
+ // `mcp__<server>__<tool>`. Before this fix the registry was never
2891
+ // loaded in the CLI engine path: `pugi mcp install` + `pugi mcp
2892
+ // trust` ran successfully but `pugi code/explain/fix/build` still
2893
+ // saw zero `mcp__*` tools in the schema (so the feature was
2894
+ // non-functional at the customer-facing surface). The adapter does
2895
+ // NOT own the registry lifecycle — we tear it down in the `finally`
2896
+ // below regardless of outcome so live MCP child processes are
2897
+ // reaped before the CLI exits.
2898
+ //
2899
+ // Failure mode: a bad `.pugi/mcp.json` (corrupted JSON, schema
2900
+ // violation) bubbles as an exception from `loadMcpRegistry`. We
2901
+ // surface it as a warning on stderr and continue WITHOUT MCP — the
2902
+ // operator's `pugi code "..."` invocation should not fail just
2903
+ // because a stale MCP entry refuses to parse. They get the engine
2904
+ // run without `mcp__*` tools and a clear hint to fix the file.
2905
+ let mcpRegistry;
2906
+ try {
2907
+ mcpRegistry = await loadMcpRegistry(root);
2908
+ }
2909
+ catch (error) {
2910
+ process.stderr.write(`pugi ${label}: MCP registry load failed — ${error.message}. ` +
2911
+ `Continuing without MCP tools. Fix .pugi/mcp.json to enable.\n`);
2912
+ mcpRegistry = undefined;
2913
+ }
2914
+ // P1 fix (deep audit 2026-05-26): load the workspace HookRegistry so
2915
+ // `.pugi/hooks/` lifecycle hooks fire for model-initiated tool calls
2916
+ // from the engine loop, not just for direct CLI tool invocations.
2917
+ // SECURITY: a `PreToolUse onFailure: 'block'` hook that refuses bash
2918
+ // containing `rm` now applies to model dispatch. Before this fix the
2919
+ // hooks were INVISIBLE to the engine adapter — a workspace operator
2920
+ // who set up a block hook for destructive bash would still see the
2921
+ // model freely dispatch those calls.
2922
+ //
2923
+ // r2 fix (triple-review 2026-05-26 P2): the fail-open path is a
2924
+ // security hole. If `.pugi/hooks.json` exists but is malformed
2925
+ // (truncated write, typo, partial edit) and the operator has block
2926
+ // hooks configured, the previous `continue without hooks` silently
2927
+ // disabled the BLOCK rules — a hostile or careless mutation of the
2928
+ // file would turn off all SECURITY-CRITICAL refusals without any
2929
+ // visible signal. We now distinguish three cases:
2930
+ //
2931
+ // (a) Neither user nor project hooks file exists → no hooks. Safe.
2932
+ // (b) File(s) exist and load() succeeds → hooks live. Normal.
2933
+ // (c) File(s) exist and load() fails → REFUSE THE RUN with a
2934
+ // fatal stderr message and `process.exit(1)`. Operator must
2935
+ // fix the file OR set `PUGI_HOOKS_BYPASS=1` to override (the
2936
+ // escape hatch is logged loudly so it cannot be silent).
2937
+ //
2938
+ // The bypass env var exists for the mid-edit recovery case (the
2939
+ // operator is in the middle of fixing the file and needs to run
2940
+ // pugi to see the world state). It is NEVER a default — the
2941
+ // operator types it explicitly.
2942
+ const hookOutcome = await loadHookRegistryOrExit({
2943
+ workspaceRoot: root,
2944
+ session,
2945
+ label,
2946
+ });
2947
+ if (hookOutcome.kind === 'parse-failure-refused') {
2948
+ // The helper already emitted the fatal message on stderr. Exit
2949
+ // directly so dispatchEngineCommand's caller observes a non-zero
2950
+ // exit code without a stack trace.
2951
+ process.exit(1);
2952
+ }
2953
+ const hooks = hookOutcome.hooks;
2954
+ const adapter = new NativePugiEngineAdapter({
2955
+ client,
2956
+ session,
2957
+ allowFetch: flags.allowFetch,
2958
+ allowSearch: flags.allowSearch,
2959
+ ...(mcpRegistry ? { mcpRegistry } : {}),
2960
+ ...(hooks ? { hooks } : {}),
2961
+ // Non-interactive CLI path: the FSM prompt callback always denies
2962
+ // until the operator explicitly grants permission via
2963
+ // `pugi mcp perms` (out-of-band). A future Ink-backed REPL path
2964
+ // overrides this with a modal prompt; pipes / CI never auto-allow.
2965
+ mcpPrompt: defaultNonInteractiveMcpPrompt,
2966
+ // P1 fix (deep audit 2026-05-26): CLI dispatcher is non-interactive
2967
+ // by default — pipes, CI, and scripted `pugi code "..."` runs do
2968
+ // not have an ink modal to surface ask_user_question into. The
2969
+ // REPL layer (β2b ink modal wiring, future) overrides this with
2970
+ // `interactive: true` + a live askUserBridge.
2971
+ interactive: false,
2972
+ });
2399
2973
  const toolCallId = recordToolCall(session, `engine:${adapter.name}`, `${label}: ${prompt}`);
2400
2974
  const taskId = `${kind}-${Date.now()}`;
2401
- const events = adapter.run({
2402
- id: taskId,
2403
- kind,
2404
- prompt,
2405
- workspaceRoot: root,
2406
- allowedPaths: [root],
2407
- deniedPaths: [],
2408
- artifacts: [],
2409
- // plan mode is enforced inside the tool-bridge (read-only schema +
2410
- // executor refusal sentinel). The permission mode here is the
2411
- // workspace-level toggle and is unchanged from interactive default.
2412
- permissionMode: 'auto',
2413
- }, { sessionId: session.id });
2414
- const statusEvents = [];
2415
- let result = null;
2416
- for await (const event of events) {
2417
- if (event.type === 'status') {
2418
- statusEvents.push(event.message);
2419
- // For `explain` the spec wants status events on stderr so the
2420
- // final summary on stdout is grep-able. Other commands keep the
2421
- // events on stdout-via-final-text so the operator sees the
2422
- // chronological trace.
2423
- if (kind === 'explain' && !flags.json) {
2424
- process.stderr.write(`${event.message}\n`);
2975
+ // β4 r2 P1 #3 — try/finally so loaded MCP child processes are
2976
+ // reaped regardless of run outcome (success, blocked, failed,
2977
+ // thrown). The shutdown is best-effort; we never want a stuck
2978
+ // MCP server to mask a successful Pugi run.
2979
+ try {
2980
+ const events = adapter.run({
2981
+ id: taskId,
2982
+ kind,
2983
+ prompt,
2984
+ workspaceRoot: root,
2985
+ allowedPaths: [root],
2986
+ deniedPaths: [],
2987
+ artifacts: [],
2988
+ // plan mode is enforced inside the tool-bridge (read-only schema +
2989
+ // executor refusal sentinel). The permission mode here is the
2990
+ // workspace-level toggle and is unchanged from interactive default.
2991
+ permissionMode: 'auto',
2992
+ }, { sessionId: session.id });
2993
+ const statusEvents = [];
2994
+ let result = null;
2995
+ for await (const event of events) {
2996
+ if (event.type === 'status') {
2997
+ statusEvents.push(event.message);
2998
+ // For `explain` the spec wants status events on stderr so the
2999
+ // final summary on stdout is grep-able. Other commands keep the
3000
+ // events on stdout-via-final-text so the operator sees the
3001
+ // chronological trace.
3002
+ if (kind === 'explain' && !flags.json) {
3003
+ process.stderr.write(`${event.message}\n`);
3004
+ }
3005
+ }
3006
+ else {
3007
+ result = {
3008
+ status: event.result.status,
3009
+ summary: event.result.summary,
3010
+ filesChanged: event.result.filesChanged,
3011
+ eventRefs: event.result.eventRefs,
3012
+ risks: event.result.risks,
3013
+ };
2425
3014
  }
2426
3015
  }
2427
- else {
3016
+ if (!result) {
3017
+ // Adapter MUST emit a terminal result event. Treat the empty
3018
+ // outcome as a failure so the CLI surfaces a clear error rather
3019
+ // than exiting 0 with no output.
2428
3020
  result = {
2429
- status: event.result.status,
2430
- summary: event.result.summary,
2431
- filesChanged: event.result.filesChanged,
2432
- eventRefs: event.result.eventRefs,
2433
- risks: event.result.risks,
3021
+ status: 'failed',
3022
+ summary: 'engine adapter returned no result',
3023
+ filesChanged: [],
3024
+ eventRefs: [],
3025
+ risks: ['adapter terminated without emitting a result event'],
2434
3026
  };
2435
3027
  }
2436
- }
2437
- if (!result) {
2438
- // Adapter MUST emit a terminal result event. Treat the empty
2439
- // outcome as a failure so the CLI surfaces a clear error rather
2440
- // than exiting 0 with no output.
2441
- result = {
2442
- status: 'failed',
2443
- summary: 'engine adapter returned no result',
2444
- filesChanged: [],
2445
- eventRefs: [],
2446
- risks: ['adapter terminated without emitting a result event'],
2447
- };
2448
- }
2449
- // α6.6 diff escalation Layer A/B/C dispatcher.
2450
- //
2451
- // Some models emit file edits as inline SEARCH/REPLACE markers in
2452
- // the final response rather than through tool calls (especially
2453
- // Gemini and o1 family, which under-use tool schemas in long
2454
- // reasoning chains). We run the dispatcher against the model's
2455
- // final text so those markers still land on disk. Tool-call edits
2456
- // (Layer-A equivalent already handled by `edit`/`write` tools) are
2457
- // unaffected — the dispatcher only fires on prose blocks that
2458
- // happen to contain markers.
2459
- //
2460
- // Scope: code / fix / build / explain only. `plan` is read-only
2461
- // (the engine refuses write tools), so even a stray marker in plan
2462
- // output gets ignored to honour the plan-mode contract.
2463
- //
2464
- // Dry-run + read-only short-circuits: when the flags forbid writes
2465
- // we dispatch with `dryRun: true` so the operator still sees what
2466
- // WOULD have been written, but nothing touches disk.
2467
- let dispatchResults = [];
2468
- if (kind === 'code' || kind === 'fix' || kind === 'build_task') {
2469
- dispatchResults = await runMarkerDispatch({
2470
- root,
2471
- result: {
2472
- status: result.status,
2473
- summary: result.summary,
2474
- eventRefs: result.eventRefs,
2475
- },
2476
- dryRun: flags.dryRun,
2477
- });
2478
- // Merge dispatcher-touched files into `result.filesChanged` so the
2479
- // operator-facing summary lists them alongside tool-driven edits.
2480
- for (const dr of dispatchResults) {
2481
- if (dr.ok && dr.absPath) {
2482
- const rel = relative(root, dr.absPath);
2483
- if (!result.filesChanged.includes(rel))
2484
- result.filesChanged.push(rel);
3028
+ // α6.6 diff escalation — Layer A/B/C dispatcher.
3029
+ //
3030
+ // Some models emit file edits as inline SEARCH/REPLACE markers in
3031
+ // the final response rather than through tool calls (especially
3032
+ // Gemini and o1 family, which under-use tool schemas in long
3033
+ // reasoning chains). We run the dispatcher against the model's
3034
+ // final text so those markers still land on disk. Tool-call edits
3035
+ // (Layer-A equivalent already handled by `edit`/`write` tools) are
3036
+ // unaffected — the dispatcher only fires on prose blocks that
3037
+ // happen to contain markers.
3038
+ //
3039
+ // Scope: code / fix / build / explain only. `plan` is read-only
3040
+ // (the engine refuses write tools), so even a stray marker in plan
3041
+ // output gets ignored to honour the plan-mode contract.
3042
+ //
3043
+ // Dry-run + read-only short-circuits: when the flags forbid writes
3044
+ // we dispatch with `dryRun: true` so the operator still sees what
3045
+ // WOULD have been written, but nothing touches disk.
3046
+ let dispatchResults = [];
3047
+ if (kind === 'code' || kind === 'fix' || kind === 'build_task') {
3048
+ dispatchResults = await runMarkerDispatch({
3049
+ root,
3050
+ result: {
3051
+ status: result.status,
3052
+ summary: result.summary,
3053
+ eventRefs: result.eventRefs,
3054
+ },
3055
+ dryRun: flags.dryRun,
3056
+ });
3057
+ // Merge dispatcher-touched files into `result.filesChanged` so the
3058
+ // operator-facing summary lists them alongside tool-driven edits.
3059
+ for (const dr of dispatchResults) {
3060
+ if (dr.ok && dr.absPath) {
3061
+ const rel = relative(root, dr.absPath);
3062
+ if (!result.filesChanged.includes(rel))
3063
+ result.filesChanged.push(rel);
3064
+ }
2485
3065
  }
2486
3066
  }
2487
- }
2488
- // For `plan` we always write a plan.md artifact, regardless of
2489
- // outcome. A blocked plan (budget exhausted, tool refusal) still
2490
- // produces a reviewable artifact — the reason is recorded inline.
2491
- let planArtifact = null;
2492
- if (kind === 'plan') {
2493
- planArtifact = writePlanArtifact({
2494
- root,
2495
- session,
2496
- prompt,
2497
- result,
2498
- statusEvents,
2499
- });
2500
- }
2501
- // α6.8 EXTEND PR1: `--decompose` post-processing. We only attempt
2502
- // the parse on a `done` plan (a blocked/failed plan is already
2503
- // captured in plan.md with its reason; no JSON to extract). The
2504
- // model's final answer arrives via `result.summary` — on success
2505
- // the adapter prefix is empty so it is the raw final text. We
2506
- // strip any leading/trailing whitespace then run the parser
2507
- // against the contents. On parse failure we surface a non-fatal
2508
- // structured error in the payload — the operator still gets the
2509
- // plan.md artifact and can re-run.
2510
- //
2511
- // TODO(α7.x): `result.summary` is currently a string contract that
2512
- // doubles as both "human-readable headline" and "raw final model
2513
- // text". Split into `{ summary, finalText }` on the adapter so the
2514
- // parser does not have to assume the prefix is empty. Tracked in
2515
- // PR #423 v2 retro (P2.6, Claude review).
2516
- let decomposeArtifact = null;
2517
- let decomposeError = null;
2518
- if (flags.decompose && kind === 'plan' && result.status === 'done') {
2519
- const parsed = parseDecompositionFromText(result.summary);
2520
- if (parsed.ok) {
2521
- decomposeArtifact = writeDecomposition({
3067
+ // For `plan` we always write a plan.md artifact, regardless of
3068
+ // outcome. A blocked plan (budget exhausted, tool refusal) still
3069
+ // produces a reviewable artifact the reason is recorded inline.
3070
+ let planArtifact = null;
3071
+ if (kind === 'plan') {
3072
+ planArtifact = writePlanArtifact({
2522
3073
  root,
2523
- sessionId: session.id,
2524
- // Persist the OPERATOR's original prompt, not the prompt+suffix
2525
- // we sent to the engine. The suffix is plumbing; the manifest
2526
- // header reads naturally only with the operator text.
2527
- prompt: args.join(' ').trim() || prompt,
2528
- decomposition: parsed.decomposition,
2529
- rationale: parsed.rationale,
3074
+ session,
3075
+ prompt,
3076
+ result,
3077
+ statusEvents,
2530
3078
  });
2531
3079
  }
3080
+ // α6.8 EXTEND PR1: `--decompose` post-processing. We only attempt
3081
+ // the parse on a `done` plan (a blocked/failed plan is already
3082
+ // captured in plan.md with its reason; no JSON to extract). The
3083
+ // model's final answer arrives via `result.summary` — on success
3084
+ // the adapter prefix is empty so it is the raw final text. We
3085
+ // strip any leading/trailing whitespace then run the parser
3086
+ // against the contents. On parse failure we surface a non-fatal
3087
+ // structured error in the payload — the operator still gets the
3088
+ // plan.md artifact and can re-run.
3089
+ //
3090
+ // TODO(α7.x): `result.summary` is currently a string contract that
3091
+ // doubles as both "human-readable headline" and "raw final model
3092
+ // text". Split into `{ summary, finalText }` on the adapter so the
3093
+ // parser does not have to assume the prefix is empty. Tracked in
3094
+ // PR #423 v2 retro (P2.6, Claude review).
3095
+ let decomposeArtifact = null;
3096
+ let decomposeError = null;
3097
+ if (flags.decompose && kind === 'plan' && result.status === 'done') {
3098
+ const parsed = parseDecompositionFromText(result.summary);
3099
+ if (parsed.ok) {
3100
+ decomposeArtifact = writeDecomposition({
3101
+ root,
3102
+ sessionId: session.id,
3103
+ // Persist the OPERATOR's original prompt, not the prompt+suffix
3104
+ // we sent to the engine. The suffix is plumbing; the manifest
3105
+ // header reads naturally only with the operator text.
3106
+ prompt: args.join(' ').trim() || prompt,
3107
+ decomposition: parsed.decomposition,
3108
+ rationale: parsed.rationale,
3109
+ });
3110
+ }
3111
+ else {
3112
+ decomposeError = { reason: parsed.reason, detail: parsed.detail };
3113
+ }
3114
+ }
3115
+ // Pull the headline metrics out of `eventRefs` so the summary and
3116
+ // JSON envelope match without re-parsing strings in two places.
3117
+ const metrics = parseEventRefs(result.eventRefs);
3118
+ const finalStatus = result.status === 'failed' ? 'error' : 'success';
3119
+ recordToolResult(session, toolCallId, finalStatus, result.summary);
3120
+ // Exit code policy (spec §1-§5):
3121
+ // code/fix/build → 0 done, 8 failed, 9 blocked
3122
+ // explain → same triple; read-only blocked = budget exhaustion
3123
+ // plan → 0 on done OR plan-mode refusal (refusal is a
3124
+ // SUCCESS for plan: the gate worked); 8 on failed
3125
+ // transport; 9 on budget exhaustion.
3126
+ //
3127
+ // Code Reviewer P2 retro 2026-05-23: previously `plan` masked
3128
+ // `budget_exhausted` as exit 0, so a CI loop with a token budget
3129
+ // hit looked identical to a successful plan. We now distinguish
3130
+ // via the adapter's `outcome=<status>` echo on `eventRefs` so
3131
+ // shell wrappers can branch on the real cause.
3132
+ if (kind === 'plan') {
3133
+ if (result.status === 'failed') {
3134
+ process.exitCode = ENGINE_EXIT_CODES.failed;
3135
+ }
3136
+ else if (result.status === 'blocked' &&
3137
+ metrics.outcome === 'budget_exhausted') {
3138
+ process.exitCode = ENGINE_EXIT_CODES.blocked;
3139
+ }
3140
+ else {
3141
+ // `done`, or `blocked` with outcome=tool_refused (= the plan-mode
3142
+ // gate fired, which is the contract working as designed), or
3143
+ // `blocked` with no outcome echo (legacy adapter — preserve the
3144
+ // pre-retro 0 behaviour to avoid breaking external scripts).
3145
+ process.exitCode = 0;
3146
+ }
3147
+ }
2532
3148
  else {
2533
- decomposeError = { reason: parsed.reason, detail: parsed.detail };
3149
+ process.exitCode = ENGINE_EXIT_CODES[result.status];
2534
3150
  }
2535
- }
2536
- // Pull the headline metrics out of `eventRefs` so the summary and
2537
- // JSON envelope match without re-parsing strings in two places.
2538
- const metrics = parseEventRefs(result.eventRefs);
2539
- const finalStatus = result.status === 'failed' ? 'error' : 'success';
2540
- recordToolResult(session, toolCallId, finalStatus, result.summary);
2541
- // Exit code policy (spec §1-§5):
2542
- // code/fix/build → 0 done, 8 failed, 9 blocked
2543
- // explain → same triple; read-only blocked = budget exhaustion
2544
- // plan → 0 on done OR plan-mode refusal (refusal is a
2545
- // SUCCESS for plan: the gate worked); 8 on failed
2546
- // transport; 9 on budget exhaustion.
2547
- //
2548
- // Code Reviewer P2 retro 2026-05-23: previously `plan` masked
2549
- // `budget_exhausted` as exit 0, so a CI loop with a token budget
2550
- // hit looked identical to a successful plan. We now distinguish
2551
- // via the adapter's `outcome=<status>` echo on `eventRefs` so
2552
- // shell wrappers can branch on the real cause.
2553
- if (kind === 'plan') {
2554
- if (result.status === 'failed') {
2555
- process.exitCode = ENGINE_EXIT_CODES.failed;
3151
+ const payload = {
3152
+ command: label,
3153
+ taskId,
3154
+ status: result.status,
3155
+ summary: result.summary,
3156
+ filesChanged: result.filesChanged,
3157
+ toolCalls: metrics.toolCalls,
3158
+ turns: metrics.turns,
3159
+ tokens: metrics.tokens,
3160
+ sessionId: session.id,
3161
+ sessionEventsMirror: metrics.mirror,
3162
+ risks: result.risks,
3163
+ plan: planArtifact ? { path: planArtifact.relPath } : undefined,
3164
+ // α6.6 per-edit dispatcher trace. Empty array when no inline
3165
+ // markers were detected in the model's final response.
3166
+ diffEdits: dispatchResults.map((dr) => ({
3167
+ layer: dr.layer,
3168
+ file: dr.file,
3169
+ ok: dr.ok,
3170
+ bytesWritten: dr.bytesWritten,
3171
+ reason: dr.reason,
3172
+ detail: dr.detail,
3173
+ })),
3174
+ // α6.8 EXTEND PR1: decompose artifacts (only present when
3175
+ // `--decompose` was passed AND the model emitted a parseable
3176
+ // JSON block). The `error` shape lands when the model returned
3177
+ // unparseable output; the operator can re-run with a tighter
3178
+ // prompt without losing the plain plan.md artifact.
3179
+ decompose: decomposeArtifact !== null
3180
+ ? {
3181
+ manifest: relative(root, decomposeArtifact.manifestPath),
3182
+ planDir: relative(root, decomposeArtifact.planDir),
3183
+ splits: decomposeArtifact.splitPaths,
3184
+ }
3185
+ : decomposeError !== null
3186
+ ? { error: decomposeError }
3187
+ : undefined,
3188
+ // The full event stream is useful for cabinet UI replay. We surface
3189
+ // it in JSON mode only — text mode operators want the summary, not
3190
+ // 30 turn-level lines.
3191
+ events: flags.json ? statusEvents : undefined,
3192
+ };
3193
+ const textLines = [];
3194
+ if (kind === 'plan' && planArtifact) {
3195
+ textLines.push(`Pugi plan written to ${planArtifact.relPath}`);
2556
3196
  }
2557
- else if (result.status === 'blocked' &&
2558
- metrics.outcome === 'budget_exhausted') {
2559
- process.exitCode = ENGINE_EXIT_CODES.blocked;
3197
+ if (decomposeArtifact !== null) {
3198
+ textLines.push(`Decomposition: ${decomposeArtifact.splitPaths.length} component spec${decomposeArtifact.splitPaths.length === 1 ? '' : 's'} under ${relative(root, decomposeArtifact.planDir)}`);
3199
+ textLines.push(`Manifest: ${relative(root, decomposeArtifact.manifestPath)}`);
2560
3200
  }
2561
- else {
2562
- // `done`, or `blocked` with outcome=tool_refused (= the plan-mode
2563
- // gate fired, which is the contract working as designed), or
2564
- // `blocked` with no outcome echo (legacy adapter — preserve the
2565
- // pre-retro 0 behaviour to avoid breaking external scripts).
2566
- process.exitCode = 0;
3201
+ else if (decomposeError !== null) {
3202
+ textLines.push(`Decomposition: skipped (${decomposeError.reason}) plan.md still written`);
2567
3203
  }
2568
- }
2569
- else {
2570
- process.exitCode = ENGINE_EXIT_CODES[result.status];
2571
- }
2572
- const payload = {
2573
- command: label,
2574
- taskId,
2575
- status: result.status,
2576
- summary: result.summary,
2577
- filesChanged: result.filesChanged,
2578
- toolCalls: metrics.toolCalls,
2579
- turns: metrics.turns,
2580
- tokens: metrics.tokens,
2581
- sessionId: session.id,
2582
- sessionEventsMirror: metrics.mirror,
2583
- risks: result.risks,
2584
- plan: planArtifact ? { path: planArtifact.relPath } : undefined,
2585
- // α6.6 per-edit dispatcher trace. Empty array when no inline
2586
- // markers were detected in the model's final response.
2587
- diffEdits: dispatchResults.map((dr) => ({
2588
- layer: dr.layer,
2589
- file: dr.file,
2590
- ok: dr.ok,
2591
- bytesWritten: dr.bytesWritten,
2592
- reason: dr.reason,
2593
- detail: dr.detail,
2594
- })),
2595
- // α6.8 EXTEND PR1: decompose artifacts (only present when
2596
- // `--decompose` was passed AND the model emitted a parseable
2597
- // JSON block). The `error` shape lands when the model returned
2598
- // unparseable output; the operator can re-run with a tighter
2599
- // prompt without losing the plain plan.md artifact.
2600
- decompose: decomposeArtifact !== null
2601
- ? {
2602
- manifest: relative(root, decomposeArtifact.manifestPath),
2603
- planDir: relative(root, decomposeArtifact.planDir),
2604
- splits: decomposeArtifact.splitPaths,
2605
- }
2606
- : decomposeError !== null
2607
- ? { error: decomposeError }
2608
- : undefined,
2609
- // The full event stream is useful for cabinet UI replay. We surface
2610
- // it in JSON mode only — text mode operators want the summary, not
2611
- // 30 turn-level lines.
2612
- events: flags.json ? statusEvents : undefined,
2613
- };
2614
- const textLines = [];
2615
- if (kind === 'plan' && planArtifact) {
2616
- textLines.push(`Pugi plan written to ${planArtifact.relPath}`);
2617
- }
2618
- if (decomposeArtifact !== null) {
2619
- textLines.push(`Decomposition: ${decomposeArtifact.splitPaths.length} component spec${decomposeArtifact.splitPaths.length === 1 ? '' : 's'} under ${relative(root, decomposeArtifact.planDir)}`);
2620
- textLines.push(`Manifest: ${relative(root, decomposeArtifact.manifestPath)}`);
2621
- }
2622
- else if (decomposeError !== null) {
2623
- textLines.push(`Decomposition: skipped (${decomposeError.reason}) — plan.md still written`);
2624
- }
2625
- textLines.push(`Pugi ${label}: ${result.status}`);
2626
- textLines.push(`Summary: ${result.summary}`);
2627
- if (result.filesChanged.length > 0) {
2628
- textLines.push(`Files modified (${result.filesChanged.length}):`);
2629
- for (const file of result.filesChanged)
2630
- textLines.push(` - ${file}`);
2631
- }
2632
- else if (kind !== 'explain' && kind !== 'plan') {
2633
- textLines.push('Files modified: none');
2634
- }
2635
- textLines.push(`Tool calls: ${metrics.toolCalls} · Turns: ${metrics.turns} · Tokens: ${metrics.tokens}`);
2636
- if (dispatchResults.length > 0) {
2637
- const okCount = dispatchResults.filter((d) => d.ok).length;
2638
- const failCount = dispatchResults.length - okCount;
2639
- textLines.push(`Diff dispatch: ${okCount} applied, ${failCount} rejected (${dispatchResults.length} marker block${dispatchResults.length === 1 ? '' : 's'})`);
2640
- for (const dr of dispatchResults) {
2641
- if (dr.ok) {
2642
- textLines.push(` + ${dr.layer} ${dr.file} (${dr.bytesWritten} bytes)`);
2643
- }
2644
- else {
2645
- textLines.push(` ! ${dr.layer} ${dr.file}: ${dr.reason ?? 'failure'} — ${dr.detail ?? ''}`);
3204
+ textLines.push(`Pugi ${label}: ${result.status}`);
3205
+ textLines.push(`Summary: ${result.summary}`);
3206
+ if (result.filesChanged.length > 0) {
3207
+ textLines.push(`Files modified (${result.filesChanged.length}):`);
3208
+ for (const file of result.filesChanged)
3209
+ textLines.push(` - ${file}`);
3210
+ }
3211
+ else if (kind !== 'explain' && kind !== 'plan') {
3212
+ textLines.push('Files modified: none');
3213
+ }
3214
+ textLines.push(`Tool calls: ${metrics.toolCalls} · Turns: ${metrics.turns} · Tokens: ${metrics.tokens}`);
3215
+ if (dispatchResults.length > 0) {
3216
+ const okCount = dispatchResults.filter((d) => d.ok).length;
3217
+ const failCount = dispatchResults.length - okCount;
3218
+ textLines.push(`Diff dispatch: ${okCount} applied, ${failCount} rejected (${dispatchResults.length} marker block${dispatchResults.length === 1 ? '' : 's'})`);
3219
+ for (const dr of dispatchResults) {
3220
+ if (dr.ok) {
3221
+ textLines.push(` + ${dr.layer} ${dr.file} (${dr.bytesWritten} bytes)`);
3222
+ }
3223
+ else {
3224
+ textLines.push(` ! ${dr.layer} ${dr.file}: ${dr.reason ?? 'failure'} — ${dr.detail ?? ''}`);
3225
+ }
2646
3226
  }
2647
3227
  }
3228
+ if (result.risks.length > 0) {
3229
+ textLines.push(`Risks: ${result.risks.join('; ')}`);
3230
+ }
3231
+ textLines.push(`Session: ${session.id}`);
3232
+ if (metrics.mirror)
3233
+ textLines.push(`Events mirror: ${metrics.mirror}`);
3234
+ writeOutput(flags, payload, textLines.join('\n'));
2648
3235
  }
2649
- if (result.risks.length > 0) {
2650
- textLines.push(`Risks: ${result.risks.join('; ')}`);
3236
+ finally {
3237
+ // β4 r2 P1 #3 — tear down live MCP child processes BEFORE the
3238
+ // CLI exits. shutdown() is idempotent and swallows per-server
3239
+ // disconnect errors, so it is safe even if no servers connected.
3240
+ if (mcpRegistry) {
3241
+ await mcpRegistry.shutdown().catch((error) => {
3242
+ process.stderr.write(`pugi ${label}: MCP registry shutdown reported error — ${error.message}\n`);
3243
+ });
3244
+ }
2651
3245
  }
2652
- textLines.push(`Session: ${session.id}`);
2653
- if (metrics.mirror)
2654
- textLines.push(`Events mirror: ${metrics.mirror}`);
2655
- writeOutput(flags, payload, textLines.join('\n'));
2656
3246
  };
2657
3247
  }
2658
3248
  // Exported for the α6.6.1 triple-review remediation spec