@smartmemory/compose 0.1.1-beta → 0.1.3-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/.claude/skills/bug-fix/SKILL.md +143 -0
  2. package/.claude/skills/compose/SKILL.md +604 -0
  3. package/.compose-deps.json +89 -0
  4. package/README.md +47 -983
  5. package/bin/compose.js +473 -0
  6. package/contracts/comp-obs-contract.schema.json +362 -0
  7. package/contracts/cross-model-review-result.json +78 -0
  8. package/contracts/review-result.json +126 -0
  9. package/dist/assets/{_baseUniq-CQwX6VLz.js → _baseUniq-D-avYfn5.js} +1 -1
  10. package/dist/assets/{arc-SxJ2J1sh.js → arc-BC4dfQ-X.js} +1 -1
  11. package/dist/assets/{architectureDiagram-Q4EWVU46-BykunY1F.js → architectureDiagram-Q4EWVU46-BZmFXnGI.js} +1 -1
  12. package/dist/assets/{blockDiagram-DXYQGD6D-ohAKBOUw.js → blockDiagram-DXYQGD6D-DlfWSuux.js} +1 -1
  13. package/dist/assets/{c4Diagram-AHTNJAMY-DBDC3ENB.js → c4Diagram-AHTNJAMY-Y__uJrRx.js} +1 -1
  14. package/dist/assets/channel-LRG9kHqJ.js +1 -0
  15. package/dist/assets/{chunk-4BX2VUAB-Cv93Z7uM.js → chunk-4BX2VUAB-BfMePfTp.js} +1 -1
  16. package/dist/assets/{chunk-4TB4RGXK-DE0WBDkj.js → chunk-4TB4RGXK-BdlMSdEA.js} +1 -1
  17. package/dist/assets/{chunk-55IACEB6-CE1EXenG.js → chunk-55IACEB6-vrQHZTdv.js} +1 -1
  18. package/dist/assets/{chunk-EDXVE4YY-DA7Ana6H.js → chunk-EDXVE4YY-B8wioVlW.js} +1 -1
  19. package/dist/assets/{chunk-FMBD7UC4-CTDIPA3p.js → chunk-FMBD7UC4-Cd6Hrux2.js} +1 -1
  20. package/dist/assets/{chunk-OYMX7WX6-uGBaPaTX.js → chunk-OYMX7WX6-CfrhdQXY.js} +1 -1
  21. package/dist/assets/{chunk-QZHKN3VN-CYlnXuUO.js → chunk-QZHKN3VN-B9JQerOU.js} +1 -1
  22. package/dist/assets/{chunk-YZCP3GAM-ojGkzcZK.js → chunk-YZCP3GAM-DFN9X99H.js} +1 -1
  23. package/dist/assets/classDiagram-6PBFFD2Q-BC9a6pDE.js +1 -0
  24. package/dist/assets/classDiagram-v2-HSJHXN6E-BC9a6pDE.js +1 -0
  25. package/dist/assets/clone-dRxgFrBv.js +1 -0
  26. package/dist/assets/{cose-bilkent-S5V4N54A-Bktn9hL-.js → cose-bilkent-S5V4N54A-BAn0ap_E.js} +1 -1
  27. package/dist/assets/{dagre-KV5264BT-DFaSzuRF.js → dagre-KV5264BT-DyxnVq1g.js} +1 -1
  28. package/dist/assets/{diagram-5BDNPKRD-DnfmDzEm.js → diagram-5BDNPKRD-XCrzqski.js} +1 -1
  29. package/dist/assets/{diagram-G4DWMVQ6-Bm8W9YnG.js → diagram-G4DWMVQ6-MBCAXft_.js} +1 -1
  30. package/dist/assets/{diagram-MMDJMWI5-B5-TSKvp.js → diagram-MMDJMWI5-DbtB2yS6.js} +1 -1
  31. package/dist/assets/{diagram-TYMM5635-ls4rqlky.js → diagram-TYMM5635-Bb5NzX61.js} +1 -1
  32. package/dist/assets/{erDiagram-SMLLAGMA-giG6WO-r.js → erDiagram-SMLLAGMA-CpIeCOh2.js} +1 -1
  33. package/dist/assets/{flowDiagram-DWJPFMVM-XvlUuz-7.js → flowDiagram-DWJPFMVM-CHyoKnhW.js} +1 -1
  34. package/dist/assets/{ganttDiagram-T4ZO3ILL-hLBV57oV.js → ganttDiagram-T4ZO3ILL-DErKteO_.js} +1 -1
  35. package/dist/assets/{gitGraphDiagram-UUTBAWPF-BHu3s_Gn.js → gitGraphDiagram-UUTBAWPF-KFVAtj2F.js} +1 -1
  36. package/dist/assets/{graph-D0Cfv00Y.js → graph-CRnO_ifT.js} +1 -1
  37. package/dist/assets/index-DKBsEUJ-.css +1 -0
  38. package/dist/assets/index-DkRKLuNr.js +1144 -0
  39. package/dist/assets/{infoDiagram-42DDH7IO-DbqRsOo3.js → infoDiagram-42DDH7IO-BZFnuSp5.js} +1 -1
  40. package/dist/assets/{ishikawaDiagram-UXIWVN3A-DnCdx7zb.js → ishikawaDiagram-UXIWVN3A-4Xe2Szde.js} +1 -1
  41. package/dist/assets/{journeyDiagram-VCZTEJTY-CfD7eNcP.js → journeyDiagram-VCZTEJTY-CZRByfS-.js} +1 -1
  42. package/dist/assets/{kanban-definition-6JOO6SKY-BYaO9-mK.js → kanban-definition-6JOO6SKY-B95sk6Fk.js} +1 -1
  43. package/dist/assets/{layout-Bj72wOEB.js → layout-BqNQzxWT.js} +1 -1
  44. package/dist/assets/{linear-BRFo114D.js → linear-CUh7qb64.js} +1 -1
  45. package/dist/assets/{min-GCHnKlJS.js → min-wXgOS3ig.js} +1 -1
  46. package/dist/assets/{mindmap-definition-QFDTVHPH-n0PMebY4.js → mindmap-definition-QFDTVHPH-DB6iaAbO.js} +1 -1
  47. package/dist/assets/{pieDiagram-DEJITSTG-pN4CljHF.js → pieDiagram-DEJITSTG-CHkZHrTW.js} +1 -1
  48. package/dist/assets/{quadrantDiagram-34T5L4WZ-DNoAy8-D.js → quadrantDiagram-34T5L4WZ-DoTEO8e3.js} +1 -1
  49. package/dist/assets/{requirementDiagram-MS252O5E-BhtY05PT.js → requirementDiagram-MS252O5E-Dn8peXYp.js} +1 -1
  50. package/dist/assets/{sankeyDiagram-XADWPNL6-B6AD-16A.js → sankeyDiagram-XADWPNL6-DRXs6Ipb.js} +1 -1
  51. package/dist/assets/{sequenceDiagram-FGHM5R23-DShHM-uk.js → sequenceDiagram-FGHM5R23-wBBYZ0aq.js} +1 -1
  52. package/dist/assets/{stateDiagram-FHFEXIEX-DMxn7HTo.js → stateDiagram-FHFEXIEX-DPlBNGmf.js} +1 -1
  53. package/dist/assets/stateDiagram-v2-QKLJ7IA2-BW0ezXb4.js +1 -0
  54. package/dist/assets/{timeline-definition-GMOUNBTQ-Cdu6uq52.js → timeline-definition-GMOUNBTQ-CbbyTlHk.js} +1 -1
  55. package/dist/assets/{vennDiagram-DHZGUBPP-CpK29iRe.js → vennDiagram-DHZGUBPP-Bj4GaFfj.js} +1 -1
  56. package/dist/assets/{wardley-RL74JXVD-BQgSkdcO.js → wardley-RL74JXVD-RtNzq8KU.js} +55 -55
  57. package/dist/assets/{wardleyDiagram-NUSXRM2D-DJHYev6O.js → wardleyDiagram-NUSXRM2D-CDfE3zSj.js} +1 -1
  58. package/dist/assets/{xychartDiagram-5P7HB3ND-1d75pbaO.js → xychartDiagram-5P7HB3ND-CZXHHYD5.js} +1 -1
  59. package/dist/index.html +2 -2
  60. package/lib/budget-ledger.js +45 -0
  61. package/lib/bug-bisect.js +292 -0
  62. package/lib/bug-checkpoint.js +191 -0
  63. package/lib/bug-escalation.js +306 -0
  64. package/lib/bug-index-gen.js +136 -0
  65. package/lib/bug-ledger.js +126 -0
  66. package/lib/build-stream-schema.js +176 -0
  67. package/lib/build-stream-writer.js +3 -1
  68. package/lib/build.js +854 -284
  69. package/lib/connector-factory-shim.js +167 -0
  70. package/lib/constants.js +18 -0
  71. package/lib/debug-discipline.js +176 -27
  72. package/lib/deps.js +205 -0
  73. package/lib/health-score.js +4 -4
  74. package/lib/import.js +26 -13
  75. package/lib/inject-schema.js +21 -0
  76. package/lib/new.js +27 -53
  77. package/lib/result-normalizer.js +160 -144
  78. package/lib/review-lenses.js +5 -5
  79. package/lib/review-normalize.js +413 -0
  80. package/lib/review-prompt.js +163 -0
  81. package/lib/sections.js +325 -0
  82. package/lib/step-prompt.js +21 -1
  83. package/lib/step-validator.js +5 -3
  84. package/lib/stratum-mcp-client.js +172 -7
  85. package/package.json +14 -3
  86. package/pipelines/bug-fix.stratum.yaml +39 -1
  87. package/pipelines/build.stratum.yaml +28 -45
  88. package/pipelines/review-fix.stratum.yaml +1 -1
  89. package/presets/team-review.stratum.yaml +21 -14
  90. package/server/build-stream-bridge.js +28 -0
  91. package/server/cc-session-feature-resolver.js +111 -0
  92. package/server/cc-session-reader.js +327 -0
  93. package/server/cc-session-watcher.js +318 -0
  94. package/server/compose-mcp-tools.js +0 -125
  95. package/server/compose-mcp.js +2 -4
  96. package/server/contract-diff.js +192 -0
  97. package/server/decision-event-emit.js +175 -0
  98. package/server/decision-event-id.js +64 -0
  99. package/server/decision-events-snapshot.js +166 -0
  100. package/server/design-routes.js +92 -49
  101. package/server/drift-axes.js +365 -0
  102. package/server/drift-emit.js +121 -0
  103. package/server/gate-log-store.js +102 -0
  104. package/server/lifecycle-phase-history.js +44 -0
  105. package/server/open-loops-store.js +102 -0
  106. package/server/schema-validator.js +49 -0
  107. package/server/status-emit.js +27 -0
  108. package/server/status-snapshot.js +218 -0
  109. package/server/vision-routes.js +332 -4
  110. package/server/vision-server.js +104 -12
  111. package/server/vision-store.js +21 -0
  112. package/dist/assets/channel-DGElom1e.js +0 -1
  113. package/dist/assets/classDiagram-6PBFFD2Q-KqWP9wWZ.js +0 -1
  114. package/dist/assets/classDiagram-v2-HSJHXN6E-KqWP9wWZ.js +0 -1
  115. package/dist/assets/clone-DUJKJXd7.js +0 -1
  116. package/dist/assets/index-CUd6pFGF.css +0 -1
  117. package/dist/assets/index-DReRlzZI.js +0 -1144
  118. package/dist/assets/stateDiagram-v2-QKLJ7IA2-o6PnCs4e.js +0 -1
  119. package/server/connectors/agent-connector.js +0 -78
  120. package/server/connectors/claude-sdk-connector.js +0 -198
  121. package/server/connectors/codex-connector.js +0 -240
  122. package/server/connectors/connector-discovery.js +0 -18
  123. package/server/connectors/connector-runtime.js +0 -13
  124. package/server/connectors/opencode-connector.js +0 -200
package/lib/build.js CHANGED
@@ -12,11 +12,11 @@ import { readFileSync, writeFileSync, existsSync, mkdirSync, unlinkSync, renameS
12
12
  import { join, resolve, dirname } from 'node:path';
13
13
  import { fileURLToPath } from 'node:url';
14
14
  import { homedir, tmpdir } from 'node:os';
15
- import { execSync } from 'node:child_process';
15
+ import { execSync, execFileSync } from 'node:child_process';
16
16
  import { createHash } from 'node:crypto';
17
17
 
18
18
  import { StratumMcpClient, StratumError } from './stratum-mcp-client.js';
19
- import { runAndNormalize, AgentTimeoutError, UserInterruptError } from './result-normalizer.js';
19
+ import { runAndNormalize, AgentTimeoutError, UserInterruptError, AgentError } from './result-normalizer.js';
20
20
  import { checkCapabilityViolation } from './capability-checker.js';
21
21
  import { buildStepPrompt, buildRetryPrompt, buildGateContext, clearAmbientContextCache } from './step-prompt.js';
22
22
  import { promptGate } from './gate-prompt.js';
@@ -26,15 +26,17 @@ import { probeServer } from './server-probe.js';
26
26
  import { CliProgress } from './cli-progress.js';
27
27
  import { BuildStreamWriter } from './build-stream-writer.js';
28
28
  import { resolveAgentConfig } from './agent-string.js';
29
+ import { installFactoryShim } from './connector-factory-shim.js';
30
+ import { emitSections as emitPlanSections, appendTrailers as appendSectionTrailers } from './sections.js';
29
31
 
30
32
  import YAML from 'yaml';
31
- import { ClaudeSDKConnector } from '../server/connectors/claude-sdk-connector.js';
32
- import { CodexConnector } from '../server/connectors/codex-connector.js';
33
33
  import { updateFeature, readFeature, writeFeature } from './feature-json.js';
34
34
  import { evaluatePolicy } from '../server/policy-evaluator.js';
35
35
  import { runTriage, isTriageStale } from './triage.js';
36
36
  import { shouldRunCrossModel, LENS_DEFINITIONS } from './review-lenses.js';
37
37
  import { injectCertInstructions } from './cert-inject.js';
38
+ import { buildReviewPrompt } from './review-prompt.js';
39
+ import { normalizeCrossModelResult } from './review-normalize.js';
38
40
  import { detectTestFramework, scaffoldTestFramework } from './test-bootstrap.js';
39
41
  import { classifyStepAsTier, evaluateTiers } from './gate-tiers.js';
40
42
  import { mapFilesToRoutes, classifyRoutes, isDocsOnlyDiff } from './qa-scoping.js';
@@ -42,6 +44,201 @@ import { computeCompositeScore } from './health-score.js';
42
44
  import { recordScore } from './health-history.js';
43
45
  import { FixChainDetector, AttemptCounter, DebugLedger, TraceValidator } from './debug-discipline.js';
44
46
  import { CrossLayerAudit, loadDebugConfig } from './cross-layer-audit.js';
47
+ import { emitCheckpoint } from './bug-checkpoint.js';
48
+ import { appendHypothesisEntry, readHypotheses } from './bug-ledger.js';
49
+ import { tier1CodexReview, tier2FreshAgent } from './bug-escalation.js';
50
+
51
+ // ---------------------------------------------------------------------------
52
+ // COMP-FIX-HARD T5: per-step retries cap parser
53
+ // ---------------------------------------------------------------------------
54
+
55
+ /**
56
+ * Build a Map<stepId, retriesCap> from a parsed Stratum spec.
57
+ *
58
+ * Each flow step has an optional `function` (defaults to step.id). Per-function
59
+ * `retries` is declared under top-level `functions:`. Per-step `retries`, if
60
+ * present on the flow step itself, takes precedence.
61
+ *
62
+ * Stratum's executor does not enforce `retries` (Phase 5 finding). Compose
63
+ * enforces it client-side: when a step's iteration count exceeds its cap,
64
+ * runBuild force-terminates the flow.
65
+ *
66
+ * @param {string} specYaml - The Stratum spec (post triage mutation).
67
+ * @returns {Map<string, number>} step.id -> max retries before force terminate.
68
+ */
69
+ export function parseRetriesCap(specYaml) {
70
+ const cap = new Map();
71
+ let parsed;
72
+ try {
73
+ parsed = YAML.parse(specYaml);
74
+ } catch {
75
+ return cap;
76
+ }
77
+ const functions = parsed?.functions ?? {};
78
+ const flows = parsed?.flows ?? {};
79
+ for (const flow of Object.values(flows)) {
80
+ const steps = flow?.steps;
81
+ if (!Array.isArray(steps)) continue;
82
+ for (const step of steps) {
83
+ if (!step?.id) continue;
84
+ // Per-step retries wins; otherwise inherit from the linked function.
85
+ let r;
86
+ if (typeof step.retries === 'number') {
87
+ r = step.retries;
88
+ } else {
89
+ const fnName = step.function ?? step.id;
90
+ const fn = functions[fnName];
91
+ if (fn && typeof fn.retries === 'number') r = fn.retries;
92
+ }
93
+ if (typeof r === 'number') cap.set(step.id, r);
94
+ }
95
+ }
96
+ return cap;
97
+ }
98
+
99
+ // ---------------------------------------------------------------------------
100
+ // COMP-FIX-HARD T6: hypothesis ledger append on diagnose success.
101
+ // ---------------------------------------------------------------------------
102
+
103
+ /**
104
+ * Append an `accepted` hypothesis ledger entry whenever a diagnose step
105
+ * completes successfully in bug mode. No-op outside bug mode or for any
106
+ * other step. Best-effort: ledger I/O failures are logged, never thrown.
107
+ *
108
+ * Called from BOTH the top-level execute_step success branch and the
109
+ * child-flow execute_step success branch in build.js — same helper, two
110
+ * call sites (mirroring Correction C from the COMP-FIX-HARD blueprint).
111
+ *
112
+ * @param {object} context — execution context (must carry mode + bug_code + cwd)
113
+ * @param {object} response — Stratum response (uses response.step_id)
114
+ * @param {object} result — agent result envelope (root_cause, trace_evidence)
115
+ */
116
+ export function recordDiagnoseSuccessIfBugMode(context, response, result) {
117
+ if (!context || context.mode !== 'bug') return;
118
+ if (!context.bug_code || !context.cwd) return;
119
+ const stepId = response?.step_id;
120
+ if (stepId !== 'diagnose') return;
121
+
122
+ try {
123
+ const prior = readHypotheses(context.cwd, context.bug_code);
124
+ // Use max(prior.attempt) + 1 so escalation_tier_1 entries (which use the
125
+ // same length-based formula in bug-escalation.js) don't collide on a later
126
+ // accepted entry. Idempotency key is (attempt, ts) so dups would still
127
+ // append; this just keeps the rendered attempt sequence sane.
128
+ const maxAttempt = prior.reduce((acc, e) => Math.max(acc, Number(e.attempt) || 0), 0);
129
+ const attempt = maxAttempt + 1;
130
+ const entry = {
131
+ attempt,
132
+ ts: new Date().toISOString(),
133
+ hypothesis: result?.root_cause ?? '',
134
+ verdict: 'accepted',
135
+ evidence_for: Array.isArray(result?.trace_evidence) ? result.trace_evidence : [],
136
+ };
137
+ appendHypothesisEntry(context.cwd, context.bug_code, entry);
138
+ } catch (err) {
139
+ // Best-effort: ledger I/O must never abort a successful step.
140
+ // eslint-disable-next-line no-console
141
+ console.warn(`[bug-ledger] recordDiagnoseSuccessIfBugMode failed: ${err?.message || err}`);
142
+ }
143
+ }
144
+
145
+ // ---------------------------------------------------------------------------
146
+ // COMP-FIX-HARD T10: post-retro_check escalation gate (Tier 1 + Tier 2)
147
+ // ---------------------------------------------------------------------------
148
+
149
+ /**
150
+ * Prompt the user for a yes/no decision via readline. Returns true on
151
+ * approve/y/yes; false on skip/n/no/empty/EOF. Non-interactive (no TTY)
152
+ * answers default to skip so headless runs don't hang.
153
+ */
154
+ async function _confirm(message) {
155
+ if (!process.stdin.isTTY || !process.stdout.isTTY) return false;
156
+ const { createInterface } = await import('node:readline');
157
+ const rl = createInterface({ input: process.stdin, output: process.stdout });
158
+ try {
159
+ const ans = await new Promise(resolve => rl.question(`${message} `, resolve));
160
+ const v = String(ans ?? '').trim().toLowerCase();
161
+ return v === 'a' || v === 'approve' || v === 'y' || v === 'yes';
162
+ } finally {
163
+ rl.close();
164
+ }
165
+ }
166
+
167
+ /**
168
+ * After retro_check completes in bug mode, check whether the per-bug
169
+ * attempt counter has reached the 'escalate' threshold. If so, gate the
170
+ * user for a Codex second opinion (Tier 1) and, if Codex surfaces a
171
+ * materially-new hypothesis, gate again for a fresh-agent worktree
172
+ * dispatch (Tier 2).
173
+ *
174
+ * Best-effort: any failure inside this helper is logged and swallowed —
175
+ * escalation is advisory and must never abort an otherwise-successful build.
176
+ */
177
+ export async function maybeRunEscalation(stratum, context, progress, streamWriter, attemptCounter, dataDir) {
178
+ if (!context || context.mode !== 'bug' || !context.bug_code) return;
179
+ const intervention = attemptCounter.getInterventionForBug(context.bug_code);
180
+ if (intervention !== 'escalate') return;
181
+
182
+ const bugCode = context.bug_code;
183
+ try {
184
+ const approveTier1 = await _confirm(
185
+ `Bug ${bugCode} has escalated. Run Codex second opinion (~30s, read-only)? approve / skip:`,
186
+ );
187
+ if (!approveTier1) {
188
+ if (progress) progress.warn(`Escalation skipped for ${bugCode}.`);
189
+ return;
190
+ }
191
+
192
+ // Gather inputs for Tier 1.
193
+ const bugDir = join(context.cwd, 'docs', 'bugs', bugCode);
194
+ let bugDescription = '';
195
+ try { bugDescription = readFileSync(join(bugDir, 'description.md'), 'utf-8'); } catch { /* optional */ }
196
+ let reproTest = '';
197
+ try { reproTest = readFileSync(join(bugDir, 'repro.test.js'), 'utf-8'); } catch {
198
+ try { reproTest = readFileSync(join(bugDir, 'repro.md'), 'utf-8'); } catch { /* optional */ }
199
+ }
200
+ let currentDiff = '';
201
+ try {
202
+ currentDiff = execSync('git diff --no-color HEAD', {
203
+ cwd: context.cwd, encoding: 'utf-8', timeout: 10_000,
204
+ }).slice(0, 8000);
205
+ } catch { /* not a git repo or no diff */ }
206
+
207
+ const hypotheses = readHypotheses(context.cwd, bugCode);
208
+
209
+ if (streamWriter) streamWriter.write({ type: 'build_step_start', stepId: 'escalation_tier_1', stepNum: '?', totalSteps: '?', agent: 'codex', intent: 'Codex second-opinion review', flowId: null });
210
+ const review = await tier1CodexReview(stratum, context, bugDescription, reproTest, currentDiff, hypotheses);
211
+ if (progress) progress.warn(`Tier 1 (Codex) — ${review.summary}`);
212
+ if (streamWriter) streamWriter.write({ type: 'build_step_done', stepId: 'escalation_tier_1', summary: review.summary, retries: 0, violations: [], flowId: null });
213
+
214
+ // Tier 2 gate — only if Codex surfaced a must-fix or should-fix finding.
215
+ const blocking = (review.findings ?? []).filter(f => f.severity === 'must-fix' || f.severity === 'should-fix');
216
+ if (blocking.length === 0) {
217
+ if (progress) progress.warn('Codex returned no actionable findings — Tier 2 skipped.');
218
+ return;
219
+ }
220
+
221
+ const approveTier2 = await _confirm(
222
+ `Codex found a new angle. Dispatch fresh agent in worktree to draft a patch (no commits)? approve / skip:`,
223
+ );
224
+ if (!approveTier2) {
225
+ if (progress) progress.warn(`Tier 2 skipped for ${bugCode}.`);
226
+ return;
227
+ }
228
+
229
+ const checkpointPath = join(bugDir, 'checkpoint.md');
230
+ const tier2 = await tier2FreshAgent(stratum, context, review, hypotheses, existsSync(checkpointPath) ? checkpointPath : null);
231
+ if (tier2.skipped) {
232
+ if (progress) progress.warn(`Tier 2 skipped: ${tier2.reason}`);
233
+ } else {
234
+ if (progress) progress.warn(`Tier 2 patch artifact ready at ${tier2.patch_path}`);
235
+ if (streamWriter) streamWriter.write({ type: 'build_step_done', stepId: 'escalation_tier_2', summary: `Patch artifact at ${tier2.patch_path}`, retries: 0, violations: [], flowId: null });
236
+ }
237
+ } catch (err) {
238
+ // eslint-disable-next-line no-console
239
+ console.warn(`[bug-escalation] failed: ${err?.message || err}`);
240
+ }
241
+ }
45
242
 
46
243
  // ---------------------------------------------------------------------------
47
244
  // STRAT-IMMUTABLE: pipeline and policy integrity helpers
@@ -146,14 +343,9 @@ function extractFilesChanged(response) {
146
343
  }
147
344
 
148
345
  // ---------------------------------------------------------------------------
149
- // Agent registry
346
+ // Per-step timeouts
150
347
  // ---------------------------------------------------------------------------
151
348
 
152
- const DEFAULT_AGENTS = new Map([
153
- ['claude', (opts) => new ClaudeSDKConnector(opts)],
154
- ['codex', (opts) => new CodexConnector(opts)],
155
- ]);
156
-
157
349
  // Per-step timeout in ms. Steps not listed get the default.
158
350
  // These are circuit breakers — generous enough for real work, tight enough to stop spiraling.
159
351
  const STEP_TIMEOUT_MS = {
@@ -176,41 +368,9 @@ const STEP_TIMEOUT_MS = {
176
368
  };
177
369
  const DEFAULT_TIMEOUT_MS = 30 * 60_000; // 30 min fallback
178
370
 
179
- /**
180
- * Default connector factory.
181
- * Accepts either a bare provider name ("claude") or a full agent string
182
- * ("claude:read-only-reviewer"). Resolves capability restrictions from the
183
- * template and passes them to the connector constructor.
184
- *
185
- * @param {string} agentString Full agent string, e.g. "claude:read-only-reviewer" or "claude"
186
- * @param {object} opts Additional connector options (cwd, model, etc.)
187
- */
188
- function defaultConnectorFactory(agentString, opts) {
189
- const { provider, allowedTools, disallowedTools, modelID, thinking, effort } = resolveAgentConfig(agentString);
190
- const factory = DEFAULT_AGENTS.get(provider);
191
- if (!factory) {
192
- throw new Error(
193
- `compose build: step requires agent "${provider}" but no connector is registered.\n` +
194
- `Known agents: ${[...DEFAULT_AGENTS.keys()].join(', ')}\n` +
195
- `Check your .stratum.yaml spec or install the agent.`
196
- );
197
- }
198
- // Pass tool restrictions only when they are defined (avoids overriding connector defaults)
199
- const connectorOpts = { ...opts };
200
- if (allowedTools !== null) connectorOpts.allowedTools = allowedTools;
201
- if (disallowedTools !== null) connectorOpts.disallowedTools = disallowedTools;
202
- // Pass resolved model ID when a tier was specified — connector uses its own default otherwise
203
- // Both keys for cross-connector compatibility: ClaudeSDKConnector uses `model`,
204
- // CodexConnector/AgentConnector base class uses `modelID`
205
- if (modelID !== null) {
206
- connectorOpts.model = modelID;
207
- connectorOpts.modelID = modelID;
208
- }
209
- // Tier-default thinking/effort; caller (opts) can override.
210
- if (thinking !== null && connectorOpts.thinking === undefined) connectorOpts.thinking = thinking;
211
- if (effort !== null && connectorOpts.effort === undefined) connectorOpts.effort = effort;
212
- return factory(connectorOpts);
213
- }
371
+ // STRAT-DEDUP-AGENTRUN-V3: connectors now live in stratum-mcp (Python). The
372
+ // `runAndNormalize` helper resolves the agent tier internally and dispatches
373
+ // via `stratum.agentRun(...)`, so there is no JS connector factory.
214
374
 
215
375
  // ---------------------------------------------------------------------------
216
376
  // Active build state (resume/abort)
@@ -281,16 +441,15 @@ function isProcessAlive(pid) {
281
441
  /**
282
442
  * Build an askAgent helper that answers gate questions with full workflow context.
283
443
  *
284
- * @param {Function} getConnector - Connector factory
285
- * @param {object} context - Execution context (cwd, featureCode, featureDir, stepHistory, filesChanged)
286
- * @param {object} gateDispatch - Stratum gate dispatch (step_id, on_approve, on_revise, on_kill)
287
- * @param {object} [gateExtras] - Optional enrichment (fromPhase, toPhase, summary)
444
+ * @param {object} stratum - StratumMcpClient (provides runAgentText)
445
+ * @param {object} context - Execution context (cwd, featureCode, featureDir, stepHistory, filesChanged)
446
+ * @param {object} gateDispatch - Stratum gate dispatch (step_id, on_approve, on_revise, on_kill)
447
+ * @param {object} [gateExtras] - Optional enrichment (fromPhase, toPhase, summary)
288
448
  */
289
- function makeAskAgent(getConnector, context, gateDispatch, gateExtras) {
449
+ function makeAskAgent(stratum, context, gateDispatch, gateExtras) {
290
450
  const preamble = buildGateContext(gateDispatch, context, gateExtras);
291
451
 
292
452
  return async function askAgent(question, artifactPath) {
293
- const connector = getConnector('claude', { cwd: context.cwd });
294
453
  const fileRef = artifactPath && !artifactPath.endsWith('/')
295
454
  ? `Read the file "${artifactPath}" and answer`
296
455
  : `Look at the project files in the working directory and answer`;
@@ -299,12 +458,8 @@ function makeAskAgent(getConnector, context, gateDispatch, gateExtras) {
299
458
  `${fileRef} this question concisely:\n\n` +
300
459
  `${question}\n\n` +
301
460
  `Keep your answer brief — 2-3 sentences max.`;
302
- const parts = [];
303
- for await (const event of connector.run(qaPrompt, {})) {
304
- if (event.type === 'assistant' && event.content) parts.push(event.content);
305
- if (event.type === 'result' && event.content && parts.length === 0) parts.push(event.content);
306
- }
307
- return parts.join('') || '(no answer)';
461
+ const text = await stratum.runAgentText('claude', qaPrompt, { cwd: context.cwd });
462
+ return text || '(no answer)';
308
463
  };
309
464
  }
310
465
 
@@ -366,7 +521,6 @@ export function resolveTemplatePath(name, cwd) {
366
521
  * the project root (e.g. parent dir for cross-repo features).
367
522
  * @param {boolean} [opts.abort] - Abort active build instead of running
368
523
  * @param {string} [opts.description] - Feature description override
369
- * @param {Function} [opts.connectorFactory] - Override agent connector creation (for testing)
370
524
  * @param {object} [opts.gateOpts] - Options for gate prompt (input/output streams)
371
525
  * @param {string} [opts.template] - Pipeline template name (default: 'build').
372
526
  * Resolves to pipelines/${template}.stratum.yaml.
@@ -376,7 +530,14 @@ export function resolveTemplatePath(name, cwd) {
376
530
  export async function runBuild(featureCode, opts = {}) {
377
531
  const cwd = opts.cwd ?? process.cwd();
378
532
  const agentCwd = opts.workingDirectory ?? cwd;
379
- const getConnector = opts.connectorFactory ?? defaultConnectorFactory;
533
+
534
+ // COMP-FIX-HARD T4: bug-mode branch.
535
+ // mode === 'feature' (default): legacy behavior — docs/features/<code>/,
536
+ // feature-json updates, plan with {featureCode, description}.
537
+ // mode === 'bug': docs/bugs/<code>/, no feature-json updates,
538
+ // plan with {task: description}.
539
+ const mode = opts.mode === 'bug' ? 'bug' : 'feature';
540
+ const isBugMode = mode === 'bug';
380
541
 
381
542
  // Resolve project paths
382
543
  const composeDir = join(cwd, '.compose');
@@ -388,7 +549,14 @@ export async function runBuild(featureCode, opts = {}) {
388
549
  return;
389
550
  }
390
551
 
391
- const featureDir = join(cwd, 'docs', 'features', featureCode);
552
+ // Single resolver used at every site that previously hardcoded
553
+ // `docs/features/<featureCode>/`. Callers must use this (not inline
554
+ // string concatenation) so the bug-mode path stays in sync.
555
+ const resolveItemDir = (code) => isBugMode
556
+ ? join(cwd, 'docs', 'bugs', code)
557
+ : join(cwd, 'docs', 'features', code);
558
+
559
+ const featureDir = resolveItemDir(featureCode);
392
560
 
393
561
  // Debug discipline (COMP-DEBUG-1)
394
562
  const debugStatePath = join(composeDir, 'debug-state.json');
@@ -427,7 +595,9 @@ export async function runBuild(featureCode, opts = {}) {
427
595
  // - opts.template is explicitly set (user chose a specific template)
428
596
  // ---------------------------------------------------------------------------
429
597
  let buildProfile = null;
430
- if (!opts.skipTriage && !opts.template) {
598
+ // Bug mode skips pre-build triage entirely — triage is feature-shaped
599
+ // (writes feature.json, profile selection per feature complexity tiers).
600
+ if (!isBugMode && !opts.skipTriage && !opts.template) {
431
601
  let cachedFeature = readFeature(cwd, featureCode);
432
602
  if (cachedFeature?.profile && !isTriageStale(cwd, featureCode)) {
433
603
  // Reuse cached profile
@@ -504,8 +674,14 @@ export async function runBuild(featureCode, opts = {}) {
504
674
  }
505
675
  }
506
676
 
507
- // Build description from feature folder
508
- const description = opts.description ?? loadFeatureDescription(featureDir, featureCode);
677
+ // COMP-FIX-HARD T5: build per-step retries cap from finalized spec.
678
+ // Stratum doesn't enforce `retries`; Compose force-terminates when iterN exceeds the cap.
679
+ const retriesCap = parseRetriesCap(specYaml);
680
+
681
+ // Build description from feature/bug folder
682
+ const description = opts.description ?? (isBugMode
683
+ ? loadBugDescription(featureDir, featureCode)
684
+ : loadFeatureDescription(featureDir, featureCode));
509
685
 
510
686
  // Vision writer
511
687
  const visionWriter = new VisionWriter(dataDir);
@@ -534,12 +710,23 @@ export async function runBuild(featureCode, opts = {}) {
534
710
  // CLI progress renderer
535
711
  const progress = new CliProgress();
536
712
 
537
- // Stratum MCP client
538
- const stratum = new StratumMcpClient();
539
- await stratum.connect({ cwd });
713
+ // Stratum MCP client (test override permitted via opts.stratum)
714
+ const stratum = opts.stratum ?? new StratumMcpClient();
715
+ if (!opts.stratum) await stratum.connect({ cwd });
540
716
 
541
- // Update feature.json status to IN_PROGRESS
542
- updateFeature(cwd, featureCode, { status: 'IN_PROGRESS' });
717
+ // Backward-compat shim: tests pass `connectorFactory` (legacy) to inject
718
+ // mock connectors. Adapt the factory's `connector.run(prompt)` event stream
719
+ // into BuildStreamEvent envelopes dispatched through the same onEvent
720
+ // pathway as the live producer, so runAndNormalize sees identical wire shape.
721
+ if (opts.connectorFactory && !opts.stratum) {
722
+ installFactoryShim(stratum, opts.connectorFactory, agentCwd);
723
+ }
724
+
725
+ // Update feature.json status to IN_PROGRESS (feature mode only;
726
+ // bug mode does not use feature.json).
727
+ if (!isBugMode) {
728
+ updateFeature(cwd, featureCode, { status: 'IN_PROGRESS' });
729
+ }
543
730
 
544
731
  // Hoisted for finally-block visibility
545
732
  let streamWriter = null;
@@ -575,11 +762,62 @@ export async function runBuild(featureCode, opts = {}) {
575
762
  let response;
576
763
  let isFreshStart = true;
577
764
 
578
- if (active && active.featureCode === featureCode && active.flowId) {
765
+ // COMP-FIX-HARD T8: explicit `--resume` flag (compose fix <code> --resume).
766
+ // When opts.resumeFlowId is set, skip stratum.plan entirely and resume the
767
+ // given flow. CLI validates the flowId belongs to this code before calling.
768
+ if (opts.resumeFlowId) {
769
+ // Re-read active state to verify ownership before clobbering — prevents
770
+ // two concurrent `compose fix --resume` invocations from racing on
771
+ // active-build.json. If another live process owns it, refuse to resume.
772
+ const activeNow = readActiveBuild(dataDir);
773
+ if (activeNow && activeNow.pid && activeNow.pid !== process.pid && isProcessAlive(activeNow.pid)) {
774
+ throw new Error(
775
+ `Cannot --resume: another live process (pid ${activeNow.pid}) owns the build for ${featureCode}.`
776
+ );
777
+ }
778
+ // Verify the active build matches the mode the caller asserts. Without
779
+ // this check, `compose fix CODE --resume` against a feature build with
780
+ // the same code would silently resume a feature flow as a bug flow.
781
+ if (activeNow && activeNow.mode && activeNow.mode !== mode) {
782
+ throw new Error(
783
+ `Cannot --resume: active build is in ${activeNow.mode} mode, but caller invoked ${mode} mode.`
784
+ );
785
+ }
786
+ console.log(`Resuming flow ${opts.resumeFlowId} for ${featureCode}...`);
787
+ response = await stratum.resume(opts.resumeFlowId);
788
+ isFreshStart = false;
789
+ // Refresh active-build.json so streaming/UI sees this as the live build.
790
+ const flowName = extractFlowName(specYaml, templateName);
791
+ writeActiveBuild(dataDir, {
792
+ featureCode,
793
+ flowId: response.flow_id ?? opts.resumeFlowId,
794
+ pipeline: flowName,
795
+ mode,
796
+ pid: process.pid,
797
+ currentStepId: response.step_id,
798
+ specPath: `pipelines/${templateName}.stratum.yaml`,
799
+ stepNum: response.step_number ?? 1,
800
+ totalSteps: response.total_steps ?? null,
801
+ retries: 0,
802
+ violations: [],
803
+ status: 'running',
804
+ resumedAt: new Date().toISOString(),
805
+ });
806
+ } else if (active && active.featureCode === featureCode && active.flowId) {
579
807
  // Same feature — try to resume or start fresh
580
- if (active.status && active.status !== 'running') {
808
+ // Refuse implicit resume across modes: a stale bug-mode active-build
809
+ // with the same code as a feature build (or vice versa) would otherwise
810
+ // resume the wrong flow shape. Only blocks when active.mode is set
811
+ // (legacy active-build.json files predate the field).
812
+ if (active.mode && active.mode !== mode) {
813
+ console.log(
814
+ `Previous build for ${featureCode} was in ${active.mode} mode, ` +
815
+ `current invocation is ${mode} mode. Starting fresh.`
816
+ );
817
+ response = await startFresh(stratum, specYaml, featureCode, description, dataDir, templateName, mode);
818
+ } else if (active.status && active.status !== 'running') {
581
819
  console.log(`Previous build ${active.status}. Starting fresh.`);
582
- response = await startFresh(stratum, specYaml, featureCode, description, dataDir, templateName);
820
+ response = await startFresh(stratum, specYaml, featureCode, description, dataDir, templateName, mode);
583
821
  } else if (active.pid && active.pid !== process.pid && isProcessAlive(active.pid)) {
584
822
  // Same feature, different live process — block
585
823
  throw new Error(
@@ -592,7 +830,7 @@ export async function runBuild(featureCode, opts = {}) {
592
830
  response = await stratum.resume(active.flowId);
593
831
  if (isTerminalFlow(response.status)) {
594
832
  console.log(`Previous build already ${response.status}. Starting fresh.`);
595
- response = await startFresh(stratum, specYaml, featureCode, description, dataDir, templateName);
833
+ response = await startFresh(stratum, specYaml, featureCode, description, dataDir, templateName, mode);
596
834
  } else {
597
835
  console.log(`Resuming from step: ${response.step_id}`);
598
836
  isFreshStart = false;
@@ -603,7 +841,7 @@ export async function runBuild(featureCode, opts = {}) {
603
841
  || err?.message?.includes('No active flow');
604
842
  if (recoverable) {
605
843
  console.log('Previous flow not found. Starting fresh.');
606
- response = await startFresh(stratum, specYaml, featureCode, description, dataDir, templateName);
844
+ response = await startFresh(stratum, specYaml, featureCode, description, dataDir, templateName, mode);
607
845
  } else {
608
846
  throw err;
609
847
  }
@@ -613,7 +851,7 @@ export async function runBuild(featureCode, opts = {}) {
613
851
  // Different feature or no active build — start fresh.
614
852
  // active-build.json is last-writer-wins: concurrent builds for
615
853
  // different features are allowed; the UI shows the most recent.
616
- response = await startFresh(stratum, specYaml, featureCode, description, dataDir, templateName);
854
+ response = await startFresh(stratum, specYaml, featureCode, description, dataDir, templateName, mode);
617
855
  }
618
856
 
619
857
  // Update vision state
@@ -644,9 +882,11 @@ export async function runBuild(featureCode, opts = {}) {
644
882
  const context = {
645
883
  cwd: agentCwd,
646
884
  featureCode,
647
- featureDir: join(cwd, 'docs', 'features', featureCode),
885
+ featureDir: resolveItemDir(featureCode),
648
886
  contextDir: contextDirPath,
649
887
  stepHistory,
888
+ mode,
889
+ ...(isBugMode ? { bug_code: featureCode } : {}),
650
890
  };
651
891
 
652
892
 
@@ -694,6 +934,29 @@ export async function runBuild(featureCode, opts = {}) {
694
934
  break;
695
935
  }
696
936
  progress.stepDone(stepId);
937
+ // COMP-PLAN-SECTIONS T7: append "What Was Built" trailers to all
938
+ // section files after a successful ship. No-op if sections/ doesn't
939
+ // exist. Wrapped so trailer-append failure never fails the ship.
940
+ try {
941
+ if (shipResult.commit) {
942
+ const trailerResult = appendSectionTrailers({
943
+ featureDir,
944
+ commit: shipResult.commit,
945
+ filesChanged: shipResult.filesChanged ?? [],
946
+ cwd: agentCwd,
947
+ });
948
+ if (trailerResult.trailed?.length > 0) {
949
+ streamWriter.write({
950
+ type: 'build_sections_trailed',
951
+ featureCode,
952
+ count: trailerResult.trailed.length,
953
+ sections: trailerResult.trailed,
954
+ });
955
+ }
956
+ }
957
+ } catch (err) {
958
+ try { streamWriter.write({ type: 'build_error', message: `sections trailer append failed: ${err.message}`, stepId: 'ship' }); } catch { /* ignore */ }
959
+ }
697
960
  // COMP-HEALTH: collect plan_completion signal from ship result (if present)
698
961
  if (shipResult.planCompletionPct != null || shipResult.plan_completion_pct != null) {
699
962
  buildSignals.plan_completion = {
@@ -712,10 +975,33 @@ export async function runBuild(featureCode, opts = {}) {
712
975
  // Build prompt and dispatch to agent
713
976
  const stepStartMs = Date.now();
714
977
  const agentType = response.agent ?? 'claude';
715
- const prompt = buildStepPrompt(response, context);
716
- const connector = getConnector(agentType, { cwd: agentCwd });
978
+ const basePrompt = buildStepPrompt(response, context);
717
979
  const maxDurationMs = STEP_TIMEOUT_MS[stepId] ?? DEFAULT_TIMEOUT_MS;
718
980
 
981
+ // MF-1/SF-4: Prepend shared review scaffold when this is a review step.
982
+ // Also covers the merge step (output_contract=ReviewResult) so its output
983
+ // is normalized via normalizeReviewResult.
984
+ // SF-NEW-1: reduce_mode steps (merge) get normalization but NOT scaffold framing.
985
+ // They are reducers, not reviewers — prepending "Review the {lens} for..." is wrong.
986
+ const isReviewMain = response.review_mode === true
987
+ || response.inputs?.review_mode === 'true'
988
+ || response.output_contract === 'ReviewResult';
989
+ const isReduceMain = response.inputs?.reduce_mode === 'true';
990
+ const isReviewScaffoldMain = isReviewMain && !isReduceMain;
991
+ const confGateMain = Number(response.inputs?.confidence_gate ?? response.confidence_gate ?? 7);
992
+ let prompt = basePrompt;
993
+ if (isReviewScaffoldMain) {
994
+ prompt = buildReviewPrompt({
995
+ agentType,
996
+ lens: 'general',
997
+ lensFocus: '',
998
+ exclusions: '',
999
+ confidenceGate: confGateMain,
1000
+ taskDescription: response.inputs?.task ?? '',
1001
+ blueprint: response.inputs?.blueprint ?? '',
1002
+ }) + '\n\n' + basePrompt;
1003
+ }
1004
+
719
1005
  // Collect tool_use events for post-step capability audit (Item 193/195)
720
1006
  const observedTools = [];
721
1007
  const onToolUse = ({ tool, input, timestamp }) => {
@@ -724,7 +1010,12 @@ export async function runBuild(featureCode, opts = {}) {
724
1010
 
725
1011
  let mainResult;
726
1012
  try {
727
- mainResult = await runAndNormalize(connector, prompt, response, { progress, streamWriter, maxDurationMs, onToolUse });
1013
+ mainResult = await runAndNormalize(null, prompt, response, {
1014
+ progress, streamWriter, maxDurationMs, onToolUse, stratum, cwd: agentCwd,
1015
+ reviewMode: isReviewMain,
1016
+ confidenceGate: confGateMain,
1017
+ lens: response.inputs?.lens_name ?? response.lens_name ?? 'general',
1018
+ });
728
1019
  } catch (err) {
729
1020
  if (err instanceof UserInterruptError) {
730
1021
  if (err.action === 'skip') {
@@ -787,7 +1078,7 @@ export async function runBuild(featureCode, opts = {}) {
787
1078
  capViolations.push({ tool, severity: check.severity, reason: check.reason });
788
1079
  // Emit capability_violation event to build stream
789
1080
  const { template: tpl } = resolveAgentConfig(agentType);
790
- streamWriter.writeViolation(stepId, agentType, tpl ?? 'unknown', check.reason);
1081
+ streamWriter.writeViolation(stepId, agentType, tpl ?? 'unknown', check.reason, check.severity);
791
1082
  // Console log (always, even in block mode — for visibility)
792
1083
  console.log(` [caps] ${tool} used by ${agentType} — violates ${tpl ?? 'unknown'} profile`);
793
1084
  }
@@ -855,11 +1146,20 @@ export async function runBuild(featureCode, opts = {}) {
855
1146
  response = await stratum.stepDone(flowId, stepId, result ?? { summary: 'Step complete' });
856
1147
  syncStepHistory(dataDir, stepHistory);
857
1148
 
1149
+ // COMP-FIX-HARD T6: record accepted hypothesis on diagnose success (bug mode only).
1150
+ recordDiagnoseSuccessIfBugMode(context, { step_id: stepId }, result);
1151
+
858
1152
  // Debug discipline enforcement (COMP-DEBUG-1)
859
1153
  if (stepId === 'fix' || stepId === 'diagnose') {
860
1154
  const filesChanged = extractFilesChanged({ result });
861
- fixChainDetector.recordIteration(filesChanged);
862
- attemptCounter.record({ filesChanged });
1155
+ // COMP-FIX-HARD T9: per-bug keying when running in bug mode.
1156
+ if (context.mode === 'bug' && context.bug_code) {
1157
+ fixChainDetector.recordIterationForBug(context.bug_code, filesChanged);
1158
+ attemptCounter.recordForBug(context.bug_code, { filesChanged });
1159
+ } else {
1160
+ fixChainDetector.recordIteration(filesChanged);
1161
+ attemptCounter.record({ filesChanged });
1162
+ }
863
1163
 
864
1164
  // Validate trace evidence on diagnose results
865
1165
  if (stepId === 'diagnose' && result) {
@@ -877,21 +1177,33 @@ export async function runBuild(featureCode, opts = {}) {
877
1177
  }
878
1178
  }
879
1179
 
880
- const chains = fixChainDetector.detect();
881
- const intervention = attemptCounter.getIntervention();
1180
+ const isBugMode = context.mode === 'bug' && !!context.bug_code;
1181
+ const chains = isBugMode
1182
+ ? fixChainDetector.detectForBug(context.bug_code)
1183
+ : fixChainDetector.detect();
1184
+ const intervention = isBugMode
1185
+ ? attemptCounter.getInterventionForBug(context.bug_code)
1186
+ : attemptCounter.getIntervention();
1187
+ // COMP-FIX-HARD T10: read attempt counters via the per-bug API in bug mode.
1188
+ const attemptCount = isBugMode
1189
+ ? attemptCounter.getCountForBug(context.bug_code)
1190
+ : attemptCounter.count;
1191
+ const attemptIsVisual = isBugMode
1192
+ ? (attemptCounter.byBug.get(context.bug_code)?.isVisual ?? false)
1193
+ : attemptCounter.isVisual;
882
1194
 
883
1195
  if (chains.length > 0) {
884
1196
  debugLedger.record({ type: 'fix_chain_detected', chains });
885
1197
  }
886
1198
 
887
1199
  if (intervention === 'escalate') {
888
- debugLedger.record({ type: 'escalation', attempt: attemptCounter.count, isVisual: attemptCounter.isVisual });
889
- if (streamWriter) streamWriter.write({ type: 'build_error', message: `Debug discipline: escalating after ${attemptCounter.count} attempts. Dispatching to cross-agent review.` });
1200
+ debugLedger.record({ type: 'escalation', attempt: attemptCount, isVisual: attemptIsVisual });
1201
+ if (streamWriter) streamWriter.write({ type: 'build_error', message: `Debug discipline: escalating after ${attemptCount} attempts. Dispatching to cross-agent review.` });
890
1202
  } else if (intervention === 'trace_refresh') {
891
- debugLedger.record({ type: 'trace_refresh_required', attempt: attemptCounter.count });
892
- if (progress) progress.warn(`Debug discipline: ${attemptCounter.count} attempts — fresh trace evidence required before next fix`);
1203
+ debugLedger.record({ type: 'trace_refresh_required', attempt: attemptCount });
1204
+ if (progress) progress.warn(`Debug discipline: ${attemptCount} attempts — fresh trace evidence required before next fix`);
893
1205
  } else if (intervention === 'trace_reminder') {
894
- if (progress) progress.warn(`Debug discipline: ${attemptCounter.count} attempts on same target — verify trace evidence is current`);
1206
+ if (progress) progress.warn(`Debug discipline: ${attemptCount} attempts on same target — verify trace evidence is current`);
895
1207
  }
896
1208
 
897
1209
  // Persist debug state
@@ -903,6 +1215,11 @@ export async function runBuild(featureCode, opts = {}) {
903
1215
  } catch { /* best-effort */ }
904
1216
  }
905
1217
 
1218
+ // COMP-FIX-HARD T10: post-retro_check escalation gate (bug mode only).
1219
+ if (stepId === 'retro_check' && context.mode === 'bug' && context.bug_code) {
1220
+ await maybeRunEscalation(stratum, context, progress, streamWriter, attemptCounter, dataDir);
1221
+ }
1222
+
906
1223
  // Stream: step done — read retries/violations from active-build state
907
1224
  // (syncStepHistory has already written them above)
908
1225
  {
@@ -990,6 +1307,8 @@ export async function runBuild(featureCode, opts = {}) {
990
1307
  type: 'build_gate_resolved',
991
1308
  stepId, outcome: 'approve', rationale: policy.reason, flowId, policyMode: 'skip',
992
1309
  });
1310
+ // COMP-PLAN-SECTIONS T6: emit sections after plan_gate auto-approve
1311
+ maybeEmitSectionsAfterPlanGate(stepId, featureDir, { streamWriter, featureCode });
993
1312
  stepHistory.push({ stepId, artifact: null, summary: `Gate skip: ${policy.reason}`, outcome: 'approve' });
994
1313
  syncStepHistory(dataDir, stepHistory);
995
1314
 
@@ -1001,6 +1320,8 @@ export async function runBuild(featureCode, opts = {}) {
1001
1320
  type: 'build_gate_resolved',
1002
1321
  stepId, outcome: 'approve', rationale: policy.reason, flowId, policyMode: 'flag',
1003
1322
  });
1323
+ // COMP-PLAN-SECTIONS T6: emit sections after plan_gate auto-approve
1324
+ maybeEmitSectionsAfterPlanGate(stepId, featureDir, { streamWriter, featureCode });
1004
1325
  stepHistory.push({ stepId, artifact: null, summary: `Gate flag: ${policy.reason}`, outcome: 'approve' });
1005
1326
  syncStepHistory(dataDir, stepHistory);
1006
1327
 
@@ -1016,7 +1337,7 @@ export async function runBuild(featureCode, opts = {}) {
1016
1337
  progress.pause();
1017
1338
  console.log(`\nGate: ${stepId}`);
1018
1339
 
1019
- const askAgent = makeAskAgent(getConnector, context, response, gateExtras);
1340
+ const askAgent = makeAskAgent(stratum, context, response, gateExtras);
1020
1341
  const serverUp = await probeServer();
1021
1342
  let outcome, rationale;
1022
1343
 
@@ -1066,6 +1387,10 @@ export async function runBuild(featureCode, opts = {}) {
1066
1387
  clearAmbientContextCache(contextDirPath);
1067
1388
 
1068
1389
  response = await stratum.gateResolve(flowId, stepId, outcome, rationale, 'human');
1390
+ // COMP-PLAN-SECTIONS T6: emit sections after plan_gate human approve
1391
+ if (outcome === 'approve') {
1392
+ maybeEmitSectionsAfterPlanGate(stepId, featureDir, { streamWriter, featureCode });
1393
+ }
1069
1394
  progress.resume();
1070
1395
 
1071
1396
  // COMP-UX-3c: concise gate resolution narration
@@ -1170,7 +1495,7 @@ export async function runBuild(featureCode, opts = {}) {
1170
1495
  }
1171
1496
 
1172
1497
  let childResult = await executeChildFlow(
1173
- response, stratum, getConnector, context,
1498
+ response, stratum, context,
1174
1499
  visionWriter, itemId, dataDir, opts.gateOpts ?? {}, progress,
1175
1500
  streamWriter
1176
1501
  );
@@ -1183,7 +1508,7 @@ export async function runBuild(featureCode, opts = {}) {
1183
1508
  mergedResult,
1184
1509
  context.filesChanged ?? [],
1185
1510
  agentCwd,
1186
- getConnector,
1511
+ stratum,
1187
1512
  streamWriter,
1188
1513
  opts
1189
1514
  );
@@ -1228,12 +1553,31 @@ export async function runBuild(featureCode, opts = {}) {
1228
1553
 
1229
1554
  // COMP-UX-3c: 1-line iteration summary
1230
1555
  const iterN = ((currentState?.retries) || 0) + 1;
1231
- const maxIter = 3; // stratum default max retries
1556
+ // COMP-FIX-HARD T5: read cap from YAML; default to 3 when unspecified.
1557
+ const capStepId = response.step_id ?? stepId;
1558
+ const maxIter = retriesCap.get(capStepId) ?? 3;
1232
1559
  const topViolation = violationList[0] ?? 'postcondition failed';
1233
1560
  const iterSummary = typeof topViolation === 'string'
1234
1561
  ? topViolation
1235
1562
  : (topViolation.message ?? topViolation.text ?? JSON.stringify(topViolation));
1236
- console.log(` Iteration ${iterN}/${maxIter} (${response.step_id ?? stepId}): ${iterSummary.slice(0, 80)}`);
1563
+ console.log(` Iteration ${iterN}/${maxIter} (${capStepId}): ${iterSummary.slice(0, 80)}`);
1564
+
1565
+ // COMP-FIX-HARD T5: force-terminate when cap exceeded.
1566
+ // Stratum's `retries` field is declarative-only; Compose enforces it here.
1567
+ if (iterN > maxIter) {
1568
+ console.log(` Retry cap exceeded for ${capStepId} (${iterN} > ${maxIter}). Terminating.`);
1569
+ // In bug mode for diagnostic/fix/test steps, emit a checkpoint
1570
+ // so the user can resume with full context.
1571
+ if (context.mode === 'bug' && (capStepId === 'test' || capStepId === 'fix' || capStepId === 'diagnose')) {
1572
+ try {
1573
+ await emitCheckpoint(context, capStepId, response);
1574
+ } catch (err) {
1575
+ console.warn(`[retry-cap] emitCheckpoint failed: ${err?.message || err}`);
1576
+ }
1577
+ }
1578
+ buildStatus = 'failed';
1579
+ break;
1580
+ }
1237
1581
  }
1238
1582
  progress.retry('build', stepId, response.agent);
1239
1583
  const violations = response.violations ?? [];
@@ -1248,12 +1592,38 @@ export async function runBuild(featureCode, opts = {}) {
1248
1592
  }
1249
1593
  const retryStepId = response.step_id ?? stepId;
1250
1594
  const agentType = response.agent ?? 'claude';
1251
- const prompt = buildRetryPrompt(response, violations, context, response.conflicts);
1252
- const connector = getConnector(agentType, { cwd: agentCwd });
1595
+ const baseRetryPrompt = buildRetryPrompt(response, violations, context, response.conflicts);
1253
1596
  const retryTimeout = STEP_TIMEOUT_MS[retryStepId] ?? DEFAULT_TIMEOUT_MS;
1597
+
1598
+ // MF-1/SF-4: Prepend shared review scaffold on retries when this is a review step.
1599
+ // SF-1 (iter 3): mirror main-path reduce_mode gating so merge-step retries don't get reviewer framing.
1600
+ const isReviewRetry = response.review_mode === true
1601
+ || response.inputs?.review_mode === 'true'
1602
+ || response.output_contract === 'ReviewResult';
1603
+ const isReduceRetry = response.inputs?.reduce_mode === 'true';
1604
+ const isReviewScaffoldRetry = isReviewRetry && !isReduceRetry;
1605
+ const confGateRetry = Number(response.inputs?.confidence_gate ?? response.confidence_gate ?? 7);
1606
+ let prompt = baseRetryPrompt;
1607
+ if (isReviewScaffoldRetry) {
1608
+ prompt = buildReviewPrompt({
1609
+ agentType,
1610
+ lens: 'general',
1611
+ lensFocus: '',
1612
+ exclusions: '',
1613
+ confidenceGate: confGateRetry,
1614
+ taskDescription: response.inputs?.task ?? '',
1615
+ blueprint: response.inputs?.blueprint ?? '',
1616
+ }) + '\n\n' + baseRetryPrompt;
1617
+ }
1618
+
1254
1619
  let retryResult;
1255
1620
  try {
1256
- retryResult = await runAndNormalize(connector, prompt, response, { progress, streamWriter, maxDurationMs: retryTimeout });
1621
+ retryResult = await runAndNormalize(null, prompt, response, {
1622
+ progress, streamWriter, maxDurationMs: retryTimeout, stratum, cwd: agentCwd,
1623
+ reviewMode: isReviewRetry,
1624
+ confidenceGate: confGateRetry,
1625
+ lens: response.inputs?.lens_name ?? response.lens_name ?? 'general',
1626
+ });
1257
1627
  } catch (err) {
1258
1628
  if (err instanceof AgentTimeoutError) {
1259
1629
  console.warn(`\n⚠ Agent timed out on retry "${retryStepId}" after ${Math.round(err.durationMs / 1000)}s`);
@@ -1291,8 +1661,14 @@ export async function runBuild(featureCode, opts = {}) {
1291
1661
  // Debug discipline enforcement on retry (COMP-DEBUG-1)
1292
1662
  if (retryStepId === 'fix' || retryStepId === 'diagnose') {
1293
1663
  const filesChanged = extractFilesChanged({ result });
1294
- fixChainDetector.recordIteration(filesChanged);
1295
- attemptCounter.record({ filesChanged });
1664
+ // COMP-FIX-HARD T9: per-bug keying when running in bug mode.
1665
+ if (context.mode === 'bug' && context.bug_code) {
1666
+ fixChainDetector.recordIterationForBug(context.bug_code, filesChanged);
1667
+ attemptCounter.recordForBug(context.bug_code, { filesChanged });
1668
+ } else {
1669
+ fixChainDetector.recordIteration(filesChanged);
1670
+ attemptCounter.record({ filesChanged });
1671
+ }
1296
1672
 
1297
1673
  // Validate trace evidence on diagnose retries
1298
1674
  if (retryStepId === 'diagnose' && result) {
@@ -1308,21 +1684,32 @@ export async function runBuild(featureCode, opts = {}) {
1308
1684
  }
1309
1685
  }
1310
1686
 
1311
- const chains = fixChainDetector.detect();
1312
- const intervention = attemptCounter.getIntervention();
1687
+ const isBugMode = context.mode === 'bug' && !!context.bug_code;
1688
+ const chains = isBugMode
1689
+ ? fixChainDetector.detectForBug(context.bug_code)
1690
+ : fixChainDetector.detect();
1691
+ const intervention = isBugMode
1692
+ ? attemptCounter.getInterventionForBug(context.bug_code)
1693
+ : attemptCounter.getIntervention();
1694
+ const attemptCount = isBugMode
1695
+ ? attemptCounter.getCountForBug(context.bug_code)
1696
+ : attemptCounter.count;
1697
+ const attemptIsVisual = isBugMode
1698
+ ? (attemptCounter.byBug.get(context.bug_code)?.isVisual ?? false)
1699
+ : attemptCounter.isVisual;
1313
1700
 
1314
1701
  if (chains.length > 0) {
1315
1702
  debugLedger.record({ type: 'fix_chain_detected', chains });
1316
1703
  }
1317
1704
 
1318
1705
  if (intervention === 'escalate') {
1319
- debugLedger.record({ type: 'escalation', attempt: attemptCounter.count, isVisual: attemptCounter.isVisual });
1320
- if (streamWriter) streamWriter.write({ type: 'build_error', message: `Debug discipline: escalating after ${attemptCounter.count} attempts. Dispatching to cross-agent review.` });
1706
+ debugLedger.record({ type: 'escalation', attempt: attemptCount, isVisual: attemptIsVisual });
1707
+ if (streamWriter) streamWriter.write({ type: 'build_error', message: `Debug discipline: escalating after ${attemptCount} attempts. Dispatching to cross-agent review.` });
1321
1708
  } else if (intervention === 'trace_refresh') {
1322
- debugLedger.record({ type: 'trace_refresh_required', attempt: attemptCounter.count });
1323
- if (progress) progress.warn(`Debug discipline: ${attemptCounter.count} attempts — fresh trace evidence required before next fix`);
1709
+ debugLedger.record({ type: 'trace_refresh_required', attempt: attemptCount });
1710
+ if (progress) progress.warn(`Debug discipline: ${attemptCount} attempts — fresh trace evidence required before next fix`);
1324
1711
  } else if (intervention === 'trace_reminder') {
1325
- if (progress) progress.warn(`Debug discipline: ${attemptCounter.count} attempts on same target — verify trace evidence is current`);
1712
+ if (progress) progress.warn(`Debug discipline: ${attemptCount} attempts on same target — verify trace evidence is current`);
1326
1713
  }
1327
1714
 
1328
1715
  // Persist debug state
@@ -1344,7 +1731,6 @@ export async function runBuild(featureCode, opts = {}) {
1344
1731
  response = await executeParallelDispatch(
1345
1732
  response,
1346
1733
  stratum,
1347
- getConnector,
1348
1734
  context,
1349
1735
  progress,
1350
1736
  streamWriter,
@@ -1370,8 +1756,11 @@ export async function runBuild(featureCode, opts = {}) {
1370
1756
  if (response.status === 'complete' && buildStatus === 'complete') {
1371
1757
  console.log('\nBuild complete.');
1372
1758
  await visionWriter.updateItemStatus(itemId, 'complete');
1373
- // COMP-QA: persist filesChanged so `compose qa-scope` can read them post-build
1374
- updateFeature(cwd, featureCode, { status: 'COMPLETE', filesChanged: context.filesChanged ?? [] });
1759
+ // COMP-QA: persist filesChanged so `compose qa-scope` can read them post-build.
1760
+ // Bug mode skips feature-json — bugs don't have feature.json (COMP-FIX-HARD T4).
1761
+ if (!isBugMode) {
1762
+ updateFeature(cwd, featureCode, { status: 'COMPLETE', filesChanged: context.filesChanged ?? [] });
1763
+ }
1375
1764
  const termState = readActiveBuild(dataDir);
1376
1765
  if (termState) {
1377
1766
  writeActiveBuild(dataDir, { ...termState, status: 'complete', completedAt: new Date().toISOString() });
@@ -1381,7 +1770,7 @@ export async function runBuild(featureCode, opts = {}) {
1381
1770
  buildStatus = 'killed';
1382
1771
  console.log('\nBuild killed.');
1383
1772
  await visionWriter.updateItemStatus(itemId, 'killed');
1384
- updateFeature(cwd, featureCode, { status: 'PLANNED' });
1773
+ if (!isBugMode) updateFeature(cwd, featureCode, { status: 'PLANNED' });
1385
1774
  const termState = readActiveBuild(dataDir);
1386
1775
  if (termState) {
1387
1776
  writeActiveBuild(dataDir, { ...termState, status: 'aborted', completedAt: new Date().toISOString() });
@@ -1390,7 +1779,7 @@ export async function runBuild(featureCode, opts = {}) {
1390
1779
  // Ship failure or other explicit failure — write terminal state
1391
1780
  console.log('\nBuild failed.');
1392
1781
  await visionWriter.updateItemStatus(itemId, 'failed');
1393
- updateFeature(cwd, featureCode, { status: 'PLANNED' });
1782
+ if (!isBugMode) updateFeature(cwd, featureCode, { status: 'PLANNED' });
1394
1783
  const termState = readActiveBuild(dataDir);
1395
1784
  if (termState) {
1396
1785
  writeActiveBuild(dataDir, { ...termState, status: 'failed', completedAt: new Date().toISOString() });
@@ -1515,7 +1904,7 @@ export async function runBuild(featureCode, opts = {}) {
1515
1904
  join(featureDir, 'audit.json'),
1516
1905
  JSON.stringify(response, null, 2)
1517
1906
  );
1518
- console.log(`Audit trace written to docs/features/${featureCode}/audit.json`);
1907
+ console.log(`Audit trace written to ${isBugMode ? 'docs/bugs' : 'docs/features'}/${featureCode}/audit.json`);
1519
1908
  } catch (err) {
1520
1909
  console.warn(`Warning: could not write audit trace: ${err.message}`);
1521
1910
  }
@@ -1528,7 +1917,7 @@ export async function runBuild(featureCode, opts = {}) {
1528
1917
  join(featureDir, 'audit.json'),
1529
1918
  JSON.stringify(audit, null, 2)
1530
1919
  );
1531
- console.log(`Audit trace written to docs/features/${featureCode}/audit.json`);
1920
+ console.log(`Audit trace written to ${isBugMode ? 'docs/bugs' : 'docs/features'}/${featureCode}/audit.json`);
1532
1921
  } catch (err) {
1533
1922
  console.warn(`Warning: could not write audit trace: ${err.message}`);
1534
1923
  }
@@ -1554,6 +1943,47 @@ export async function runBuild(featureCode, opts = {}) {
1554
1943
  // Helpers
1555
1944
  // ---------------------------------------------------------------------------
1556
1945
 
1946
+ /**
1947
+ * COMP-PLAN-SECTIONS T6 — emit per-task section files after a plan_gate approve.
1948
+ *
1949
+ * Called from each of the three plan_gate approve branches (skip / flag / human).
1950
+ * No-op for any other gate. No-op if the plan is below the threshold (the
1951
+ * underlying emitSections handles that). On success, emits a build_sections_emitted
1952
+ * stream event with the created/skipped lists.
1953
+ *
1954
+ * @param {string} stepId — the gate stepId (must be 'plan_gate' to fire)
1955
+ * @param {string} featureDir — absolute feature directory
1956
+ * @param {object} opts
1957
+ * @param {object} [opts.streamWriter] — build stream writer
1958
+ * @param {string} [opts.featureCode] — feature code, included in event
1959
+ * @returns {{ created: string[], skipped: string[] }}
1960
+ */
1961
+ export function maybeEmitSectionsAfterPlanGate(stepId, featureDir, opts = {}) {
1962
+ const empty = { created: [], skipped: [] };
1963
+ if (stepId !== 'plan_gate' || !featureDir) return empty;
1964
+ let result = empty;
1965
+ try {
1966
+ result = emitPlanSections(featureDir);
1967
+ } catch (err) {
1968
+ // Section emission must never break the build.
1969
+ if (opts.streamWriter) {
1970
+ try { opts.streamWriter.write({ type: 'build_error', message: `sections emit failed: ${err.message}`, stepId }); } catch { /* ignore */ }
1971
+ }
1972
+ return empty;
1973
+ }
1974
+ if (result.created.length > 0 && opts.streamWriter) {
1975
+ try {
1976
+ opts.streamWriter.write({
1977
+ type: 'build_sections_emitted',
1978
+ featureCode: opts.featureCode ?? null,
1979
+ created: result.created,
1980
+ skipped: result.skipped,
1981
+ });
1982
+ } catch { /* ignore */ }
1983
+ }
1984
+ return result;
1985
+ }
1986
+
1557
1987
  // ---------------------------------------------------------------------------
1558
1988
  // Ship step — runs git commit in-process (not via agent)
1559
1989
  // ---------------------------------------------------------------------------
@@ -1562,8 +1992,11 @@ export async function runBuild(featureCode, opts = {}) {
1562
1992
  * Execute the ship step: run tests, stage feature files, commit.
1563
1993
  * Returns a PhaseResult-shaped object.
1564
1994
  */
1565
- async function executeShipStep(featureCode, agentCwd, cwd, context, description, progress) {
1566
- const featureDir = `docs/features/${featureCode}`;
1995
+ export async function executeShipStep(featureCode, agentCwd, cwd, context, description, progress) {
1996
+ // COMP-FIX-HARD T4: bug mode stages docs/bugs/<code>/ instead of docs/features/<code>/
1997
+ const featureDir = context?.mode === 'bug'
1998
+ ? `docs/bugs/${featureCode}`
1999
+ : `docs/features/${featureCode}`;
1567
2000
 
1568
2001
  try {
1569
2002
  // 0. Check if we're in a git repository — if not, skip git operations
@@ -1638,7 +2071,7 @@ async function executeShipStep(featureCode, agentCwd, cwd, context, description,
1638
2071
  if (progress) progress.toolUse('ship', `Staging ${featureFiles.length} files...`);
1639
2072
  for (const f of featureFiles) {
1640
2073
  try {
1641
- execSync(`git add "${f}"`, { cwd: agentCwd, encoding: 'utf-8', timeout: 5000 });
2074
+ execFileSync('git', ['add', '--', f], { cwd: agentCwd, encoding: 'utf-8', timeout: 5000 });
1642
2075
  } catch { /* file might not exist or already staged */ }
1643
2076
  }
1644
2077
 
@@ -1662,23 +2095,49 @@ async function executeShipStep(featureCode, agentCwd, cwd, context, description,
1662
2095
 
1663
2096
  // 6. Commit
1664
2097
  if (progress) progress.toolUse('ship', 'Committing...');
1665
- execSync(`git commit -m "${commitMsg.replace(/"/g, '\\"')}"`, {
2098
+ execFileSync('git', ['commit', '-m', commitMsg], {
1666
2099
  cwd: agentCwd, encoding: 'utf-8', timeout: 30_000,
1667
2100
  });
1668
2101
 
1669
- // 7. Get the commit SHA
1670
- const sha = execSync('git rev-parse HEAD', {
1671
- cwd: agentCwd, encoding: 'utf-8', timeout: 5000,
1672
- }).trim();
1673
-
2102
+ // 7. Best-effort post-commit metadata collection.
2103
+ // Each call is wrapped in its own try/catch — metadata failures must NEVER
2104
+ // downgrade the ship outcome from 'complete' to 'failed'. Empty fields
2105
+ // (commit:null, filesChanged:[]) are acceptable.
1674
2106
  const stagedFiles = staged.split('\n').filter(Boolean);
1675
- if (progress) progress.toolUse('ship', `Committed ${sha.slice(0, 8)} (${stagedFiles.length} files)`);
2107
+
2108
+ let sha = null;
2109
+ try {
2110
+ sha = execSync('git rev-parse HEAD', {
2111
+ cwd: agentCwd, encoding: 'utf-8', timeout: 5000, stdio: ['ignore', 'pipe', 'pipe'],
2112
+ }).trim() || null;
2113
+ } catch { /* metadata best-effort */ }
2114
+
2115
+ if (progress) {
2116
+ progress.toolUse('ship', sha
2117
+ ? `Committed ${sha.slice(0, 8)} (${stagedFiles.length} files)`
2118
+ : `Committed (${stagedFiles.length} files)`);
2119
+ }
2120
+
2121
+ // COMP-PLAN-SECTIONS T5: filesChanged from `git show --name-only`. Best-effort.
2122
+ let filesChanged = [];
2123
+ try {
2124
+ const namesOnly = execSync('git show --name-only --pretty=format: HEAD', {
2125
+ cwd: agentCwd, encoding: 'utf-8', timeout: 5000, stdio: ['ignore', 'pipe', 'pipe'],
2126
+ }).trim();
2127
+ filesChanged = namesOnly.split('\n').map(s => s.trim()).filter(Boolean);
2128
+ } catch { /* metadata best-effort — leave [] */ }
2129
+ // If we got nothing from show, fall back to the staged list (still best-effort).
2130
+ if (filesChanged.length === 0 && sha) filesChanged = stagedFiles;
1676
2131
 
1677
2132
  return {
1678
2133
  phase: 'ship',
1679
- artifact: sha,
2134
+ artifact: sha ?? '',
1680
2135
  outcome: 'complete',
1681
- summary: `Committed ${sha.slice(0, 8)}: ${commitMsg} (${stagedFiles.length} files)`,
2136
+ summary: sha
2137
+ ? `Committed ${sha.slice(0, 8)}: ${commitMsg} (${stagedFiles.length} files)`
2138
+ : `Committed: ${commitMsg} (${stagedFiles.length} files)`,
2139
+ commit: sha,
2140
+ filesChanged,
1682
2141
  };
1683
2142
 
1684
2143
  } catch (err) {
@@ -1696,22 +2155,24 @@ async function executeShipStep(featureCode, agentCwd, cwd, context, description,
1696
2155
  // ---------------------------------------------------------------------------
1697
2156
 
1698
2157
  /**
1699
- * Run Codex review of the diff and synthesize findings with Claude's MergedReviewResult.
2158
+ * Run Codex review of the diff and synthesize findings with Claude's ReviewResult.
1700
2159
  *
1701
2160
  * Opt-out: pass opts.skipCrossModel=true or set COMPOSE_CROSS_MODEL=0 env var.
1702
- * Graceful skip: if CodexConnector construction fails (opencode not installed).
1703
2161
  *
1704
- * @param {object} mergedResult - MergedReviewResult from the parallel_review child flow
2162
+ * Synthesis output is a CrossModelReviewResult a canonical ReviewResult extended with
2163
+ * consensus/claude_only/codex_only arrays of canonical finding items (STRAT-XMODEL-PARITY).
2164
+ *
2165
+ * @param {object} mergedResult - ReviewResult from the parallel_review child flow
1705
2166
  * @param {string[]} filesChanged - list of changed file paths
1706
2167
  * @param {string} cwd - working directory
1707
- * @param {object} getConnector - connector factory
2168
+ * @param {object} stratum - StratumMcpClient
1708
2169
  * @param {BuildStreamWriter|null} streamWriter
1709
2170
  * @param {object} opts
1710
2171
  * @param {boolean} [opts.skipCrossModel] - explicit opt-out
1711
- * @returns {Promise<object>} updated MergedReviewResult with crossModelSynthesis field,
2172
+ * @returns {Promise<object>} updated ReviewResult with crossModelSynthesis field,
1712
2173
  * or original mergedResult if skipped
1713
2174
  */
1714
- async function runCrossModelReview(mergedResult, filesChanged, cwd, getConnector, streamWriter, opts = {}) {
2175
+ async function runCrossModelReview(mergedResult, filesChanged, cwd, stratum, streamWriter, opts = {}) {
1715
2176
  // --- Opt-out checks ---
1716
2177
  if (opts.skipCrossModel) {
1717
2178
  if (streamWriter) streamWriter.write({ type: 'cross_model_review', status: 'skipped', reason: 'skipCrossModel flag set' });
@@ -1725,17 +2186,6 @@ async function runCrossModelReview(mergedResult, filesChanged, cwd, getConnector
1725
2186
  return mergedResult; // small/medium diff — skip silently
1726
2187
  }
1727
2188
 
1728
- // --- Codex availability check ---
1729
- let codexConnector;
1730
- try {
1731
- codexConnector = new CodexConnector({ cwd });
1732
- } catch (err) {
1733
- const msg = `cross-model review skipped: Codex unavailable (${err.message})`;
1734
- console.warn(` [cross-model] ${msg}`);
1735
- if (streamWriter) streamWriter.write({ type: 'cross_model_review', status: 'skipped', reason: msg });
1736
- return mergedResult;
1737
- }
1738
-
1739
2189
  if (streamWriter) {
1740
2190
  streamWriter.write({ type: 'cross_model_review', status: 'started', filesChanged: filesChanged.length });
1741
2191
  }
@@ -1756,10 +2206,12 @@ async function runCrossModelReview(mergedResult, filesChanged, cwd, getConnector
1756
2206
  let codexFindings = [];
1757
2207
  try {
1758
2208
  const codexTimeout = STEP_TIMEOUT_MS.codex_review ?? 10 * 60_000;
1759
- const syntheticStep = { step_id: 'codex_review', ensure: [], output_fields: {} };
1760
- const { text: codexText } = await runAndNormalize(codexConnector, codexPrompt, syntheticStep, {
2209
+ const syntheticStep = { step_id: 'codex_review', agent: 'codex', ensure: [], output_fields: {} };
2210
+ const { text: codexText } = await runAndNormalize(null, codexPrompt, syntheticStep, {
1761
2211
  streamWriter,
1762
2212
  maxDurationMs: codexTimeout,
2213
+ stratum,
2214
+ cwd,
1763
2215
  });
1764
2216
 
1765
2217
  // Parse findings: look for a JSON array in the response text
@@ -1791,7 +2243,7 @@ async function runCrossModelReview(mergedResult, filesChanged, cwd, getConnector
1791
2243
  const claudeFindings = mergedResult.findings ?? [];
1792
2244
  const synthesisPrompt =
1793
2245
  `You are synthesizing code review findings from two models.\n\n` +
1794
- `## Claude findings (structured LensFinding objects)\n` +
2246
+ `## Claude findings (ReviewResult finding items — {lens, file, line, severity, finding, confidence, applied_gate})\n` +
1795
2247
  JSON.stringify(claudeFindings, null, 2) +
1796
2248
  `\n\n## Codex findings (plain strings)\n` +
1797
2249
  JSON.stringify(codexFindings, null, 2) +
@@ -1800,49 +2252,60 @@ async function runCrossModelReview(mergedResult, filesChanged, cwd, getConnector
1800
2252
  `- CONSENSUS: both models flagged the same issue (same file, similar concern)\n` +
1801
2253
  `- CLAUDE_ONLY: only Claude found it\n` +
1802
2254
  `- CODEX_ONLY: only Codex found it\n\n` +
1803
- `Return a JSON object with this exact shape:\n` +
2255
+ `Return a JSON object matching the CrossModelReviewResult schema with this exact shape:\n` +
1804
2256
  `{\n` +
1805
- ` "consensus": [<LensFinding objects from Claude, with codexNote field added>],\n` +
1806
- ` "claude_only": [<LensFinding objects>],\n` +
1807
- ` "codex_only": [{"file":"?","line":0,"severity":"medium","finding":"<codex text>","confidence":70,"source":"codex"}]\n` +
2257
+ ` "summary": "<1-3 sentence narrative>",\n` +
2258
+ ` "consensus": [<canonical finding items from Claude, severity in {must-fix,should-fix,nit}, confidence 1-10, applied_gate 1-10>],\n` +
2259
+ ` "claude_only": [<canonical finding items, same shape>],\n` +
2260
+ ` "codex_only": [{"lens":"general","file":null,"line":null,"severity":"should-fix","finding":"<codex text>","confidence":7,"applied_gate":7}]\n` +
1808
2261
  `}\n\n` +
1809
- `For CODEX_ONLY findings, create LensFinding-shaped objects with file="" if the file is not clear.\n` +
2262
+ `Each finding item MUST have: lens (string), file (string|null), line (integer|null), ` +
2263
+ `severity ("must-fix"|"should-fix"|"nit"), finding (string), confidence (1-10 integer), applied_gate (1-10 integer).\n` +
2264
+ `For CODEX_ONLY findings, create canonical finding-shaped objects with file=null if the file is not clear.\n` +
1810
2265
  `Output ONLY the JSON object, no prose.`;
1811
2266
 
1812
- // Fallback preserves Codex findings as codex_only so they're never silently dropped
1813
- const codexAsFallback = codexFindings.map(f => ({ file: '', line: 0, severity: 'medium', finding: f, confidence: 60, source: 'codex' }));
1814
- let synthesis = { consensus: [], claude_only: claudeFindings, codex_only: codexAsFallback };
2267
+ // Fallback preserves Codex findings as codex_only so they're never silently dropped.
2268
+ // Emits canonical ReviewResult finding shape (severity {must-fix,should-fix,nit}, confidence 1-10, applied_gate).
2269
+ const codexAsFallback = codexFindings.map(f => ({
2270
+ lens: 'general',
2271
+ file: null,
2272
+ line: null,
2273
+ severity: 'should-fix',
2274
+ finding: typeof f === 'string' ? f : (f?.finding ?? String(f)),
2275
+ confidence: 7, // at gate — guarantees fallback findings survive confidence filtering
2276
+ applied_gate: 7,
2277
+ source: 'codex',
2278
+ }));
2279
+
2280
+ let synthText = '';
1815
2281
  try {
1816
- const claudeConnector = getConnector('claude', { cwd });
1817
- const syntheticStep = { step_id: 'synthesis', ensure: [], output_fields: {} };
1818
- const { text: synthText } = await runAndNormalize(claudeConnector, synthesisPrompt, syntheticStep, {
2282
+ const syntheticStep = { step_id: 'synthesis', agent: 'claude', ensure: [], output_fields: {} };
2283
+ const result = await runAndNormalize(null, synthesisPrompt, syntheticStep, {
1819
2284
  streamWriter,
1820
2285
  maxDurationMs: 3 * 60_000,
2286
+ stratum,
2287
+ cwd,
1821
2288
  });
1822
-
1823
- const synthMatch = synthText.match(/\{[\s\S]*\}/);
1824
- if (synthMatch) {
1825
- try {
1826
- const parsed = JSON.parse(synthMatch[0]);
1827
- if (parsed && typeof parsed === 'object') {
1828
- synthesis = {
1829
- consensus: Array.isArray(parsed.consensus) ? parsed.consensus : [],
1830
- claude_only: Array.isArray(parsed.claude_only) ? parsed.claude_only : claudeFindings,
1831
- codex_only: Array.isArray(parsed.codex_only) ? parsed.codex_only : codexAsFallback,
1832
- };
1833
- }
1834
- } catch { /* keep fallback */ }
1835
- }
2289
+ synthText = result.text;
1836
2290
  } catch (err) {
1837
2291
  console.warn(` [cross-model] synthesis error: ${err.message}`);
1838
- // Fall through with default synthesis
2292
+ // Fall through normalizeCrossModelResult will use the fallback arrays
1839
2293
  }
1840
2294
 
1841
- const allFindings = [
1842
- ...synthesis.consensus,
1843
- ...synthesis.claude_only,
1844
- ...synthesis.codex_only,
1845
- ];
2295
+ // Route synthesis output through canonical normalizer (STRAT-XMODEL-PARITY).
2296
+ // Wire repairFn so malformed synthesis JSON gets one repair-retry before degrading to fallback.
2297
+ const synthesis = await normalizeCrossModelResult(synthText, {
2298
+ confidenceGate: 7,
2299
+ claudeFindingsFallback: claudeFindings,
2300
+ codexFindingsFallback: codexAsFallback,
2301
+ repairFn: async (repairPrompt) => {
2302
+ const { text } = await runAndNormalize(null, repairPrompt, {
2303
+ step_id: 'synthesis_repair', agent: 'claude', ensure: [], output_fields: {},
2304
+ }, { stratum, cwd, maxDurationMs: 90_000 });
2305
+ return text;
2306
+ },
2307
+ });
2308
+
1846
2309
  const consensusCount = synthesis.consensus.length;
1847
2310
  const claudeOnlyCount = synthesis.claude_only.length;
1848
2311
  const codexOnlyCount = synthesis.codex_only.length;
@@ -1859,10 +2322,14 @@ async function runCrossModelReview(mergedResult, filesChanged, cwd, getConnector
1859
2322
 
1860
2323
  return {
1861
2324
  ...mergedResult,
1862
- clean: allFindings.length === 0,
1863
- summary: `Cross-model synthesis: ${consensusCount} consensus, ${claudeOnlyCount} Claude-only, ${codexOnlyCount} Codex-only`,
1864
- findings: allFindings,
1865
- crossModelSynthesis: synthesis,
2325
+ clean: synthesis.clean,
2326
+ summary: synthesis.summary,
2327
+ findings: synthesis.findings,
2328
+ crossModelSynthesis: {
2329
+ consensus: synthesis.consensus,
2330
+ claude_only: synthesis.claude_only,
2331
+ codex_only: synthesis.codex_only,
2332
+ },
1866
2333
  };
1867
2334
  }
1868
2335
 
@@ -1872,7 +2339,7 @@ async function runCrossModelReview(mergedResult, filesChanged, cwd, getConnector
1872
2339
  * including nested execute_flow (recursive).
1873
2340
  */
1874
2341
  async function executeChildFlow(
1875
- flowDispatch, stratum, getConnector, context,
2342
+ flowDispatch, stratum, context,
1876
2343
  visionWriter, itemId, dataDir, gateOpts, progress,
1877
2344
  streamWriter
1878
2345
  ) {
@@ -1906,14 +2373,13 @@ async function executeChildFlow(
1906
2373
 
1907
2374
  const agentType = resp.agent ?? 'claude';
1908
2375
  const prompt = buildStepPrompt(resp, context);
1909
- const connector = getConnector(agentType, { cwd: context.cwd });
1910
2376
  const childStepTimeout = STEP_TIMEOUT_MS[resp.step_id] ?? DEFAULT_TIMEOUT_MS;
1911
2377
  // COMP-CAPS-ENFORCE: tap tool_use events in child flow steps too
1912
2378
  const childObservedTools = [];
1913
2379
  const childOnToolUse = (ev) => childObservedTools.push(ev);
1914
2380
  let childMainResult;
1915
2381
  try {
1916
- childMainResult = await runAndNormalize(connector, prompt, resp, { progress, streamWriter, maxDurationMs: childStepTimeout, onToolUse: childOnToolUse });
2382
+ childMainResult = await runAndNormalize(null, prompt, resp, { progress, streamWriter, maxDurationMs: childStepTimeout, onToolUse: childOnToolUse, stratum, cwd: context.cwd });
1917
2383
  } catch (err) {
1918
2384
  if (err instanceof UserInterruptError) {
1919
2385
  if (err.action === 'skip') {
@@ -1936,19 +2402,42 @@ async function executeChildFlow(
1936
2402
  const completedStepId = resp.step_id;
1937
2403
 
1938
2404
  // Emit capability_profile event for child step (informational, never blocking)
1939
- if (streamWriter) {
2405
+ {
1940
2406
  const { template: childTemplate, allowedTools: childAllowed, disallowedTools: childDisallowed } = resolveAgentConfig(agentType);
1941
- if (childTemplate) {
2407
+ if (streamWriter && childTemplate) {
1942
2408
  streamWriter.writeCapabilityProfile(completedStepId, agentType, childTemplate, childAllowed, childDisallowed);
1943
2409
  }
1944
- // COMP-CAPS-ENFORCE: check child step tool_use events against template
2410
+
2411
+ // COMP-CAPS-ENFORCE + COMP-AGENT-CAPS-6: check child step tool_use events against
2412
+ // template and enforce block mode — mirrors the main-path enforcement block (lines 763-794).
2413
+ const childCapViolations = [];
1945
2414
  for (const ev of childObservedTools) {
1946
2415
  const check = checkCapabilityViolation(ev.tool, agentType);
1947
2416
  if (check.violation) {
1948
- streamWriter.writeViolation(completedStepId, agentType, childTemplate, `${ev.tool}: ${check.reason}`);
2417
+ childCapViolations.push({ tool: ev.tool, severity: check.severity, reason: check.reason });
2418
+ if (streamWriter) {
2419
+ streamWriter.writeViolation(completedStepId, agentType, childTemplate, `${ev.tool}: ${check.reason}`, check.severity);
2420
+ }
1949
2421
  console.log(` [caps] ${ev.tool} used by ${agentType} — violates ${childTemplate} profile`);
1950
2422
  }
1951
2423
  }
2424
+
2425
+ // COMP-AGENT-CAPS-6: enforce block mode for child-flow steps (was log-only before).
2426
+ const childEnforcement = (() => {
2427
+ try {
2428
+ const childSettingsPath = join(dataDir, 'settings.json');
2429
+ if (existsSync(childSettingsPath)) {
2430
+ const s = JSON.parse(readFileSync(childSettingsPath, 'utf-8'));
2431
+ return s?.capabilities?.enforcement ?? 'log';
2432
+ }
2433
+ } catch { /* degraded — default to log */ }
2434
+ return 'log';
2435
+ })();
2436
+ if (childEnforcement === 'block' && childCapViolations.length > 0) {
2437
+ const tools = childCapViolations.map(v => v.tool).join(', ');
2438
+ throw new StratumError('CAPABILITY_VIOLATION',
2439
+ `Child step "${completedStepId}" used disallowed tools: ${tools}`, completedStepId);
2440
+ }
1952
2441
  }
1953
2442
 
1954
2443
  // Accumulate child step results into shared stepHistory
@@ -1966,6 +2455,10 @@ async function executeChildFlow(
1966
2455
  result ?? { summary: 'Step complete' }
1967
2456
  );
1968
2457
 
2458
+ // COMP-FIX-HARD T6: record accepted hypothesis on diagnose success (bug mode only).
2459
+ // Child-flow call site, paired with the top-level call site near `:989`.
2460
+ recordDiagnoseSuccessIfBugMode(context, { step_id: completedStepId }, result);
2461
+
1969
2462
  // Stream: child step done
1970
2463
  if (streamWriter) {
1971
2464
  streamWriter.write({
@@ -1996,7 +2489,7 @@ async function executeChildFlow(
1996
2489
  if (progress) progress.pause();
1997
2490
  console.log(` [${childFlowName}] Gate: ${resp.step_id}`);
1998
2491
  const gateId = await visionWriter.createGate(childFlowId, resp.step_id, itemId);
1999
- const childAskAgent = makeAskAgent(getConnector, context, resp, null);
2492
+ const childAskAgent = makeAskAgent(stratum, context, resp, null);
2000
2493
 
2001
2494
  const childGateExtras = {
2002
2495
  fromPhase: resp.from_phase ?? null,
@@ -2068,10 +2561,10 @@ async function executeChildFlow(
2068
2561
  violations.map(v => `- ${v}`).join('\n') + '\n\n' +
2069
2562
  `Fix every issue. Do not skip any.\n\n` +
2070
2563
  `## Context\nWorking directory: ${context.cwd}\nFeature: ${context.featureCode}`;
2071
- const fixConnector = getConnector(fixAgent, { cwd: context.cwd });
2072
2564
  const fixTimeout = STEP_TIMEOUT_MS[resp.step_id] ?? DEFAULT_TIMEOUT_MS;
2565
+ const fixDispatch = { ...resp, agent: fixAgent };
2073
2566
  try {
2074
- await runAndNormalize(fixConnector, fixPrompt, resp, { progress, streamWriter, maxDurationMs: fixTimeout });
2567
+ await runAndNormalize(null, fixPrompt, fixDispatch, { progress, streamWriter, maxDurationMs: fixTimeout, stratum, cwd: context.cwd });
2075
2568
  } catch (err) {
2076
2569
  if (!(err instanceof AgentTimeoutError)) throw err;
2077
2570
  console.warn(`\n⚠ Fix agent timed out on "${resp.step_id}"`);
@@ -2083,10 +2576,9 @@ async function executeChildFlow(
2083
2576
  console.log(` [${childFlowName}] ↻ Retrying ${resp.step_id} (${stepAgent})`);
2084
2577
  }
2085
2578
  const prompt = buildRetryPrompt(resp, violations, context, resp.conflicts);
2086
- const connector = getConnector(stepAgent, { cwd: context.cwd });
2087
2579
  let childRetryResult;
2088
2580
  try {
2089
- childRetryResult = await runAndNormalize(connector, prompt, resp, { progress, streamWriter, maxDurationMs: fixTimeout });
2581
+ childRetryResult = await runAndNormalize(null, prompt, resp, { progress, streamWriter, maxDurationMs: fixTimeout, stratum, cwd: context.cwd });
2090
2582
  } catch (err) {
2091
2583
  if (err instanceof AgentTimeoutError) {
2092
2584
  console.warn(`\n⚠ Retry agent timed out on "${resp.step_id}"`);
@@ -2105,7 +2597,7 @@ async function executeChildFlow(
2105
2597
  const nestedParentFlowId = resp.parent_flow_id;
2106
2598
  const nestedParentStepId = resp.parent_step_id;
2107
2599
  const nestedResult = await executeChildFlow(
2108
- resp, stratum, getConnector, context,
2600
+ resp, stratum, context,
2109
2601
  visionWriter, itemId, dataDir, gateOpts, progress,
2110
2602
  streamWriter
2111
2603
  );
@@ -2118,7 +2610,6 @@ async function executeChildFlow(
2118
2610
  resp = await executeParallelDispatch(
2119
2611
  resp,
2120
2612
  stratum,
2121
- getConnector,
2122
2613
  context,
2123
2614
  progress,
2124
2615
  streamWriter,
@@ -2235,87 +2726,76 @@ export async function executeParallelDispatchServer(
2235
2726
  );
2236
2727
  }
2237
2728
 
2238
- // Poll until outcome is present (NOT can_advance see design §3)
2239
- let pollResult;
2240
- const intervalMs = SERVER_DISPATCH_POLL_MS();
2241
- while (true) {
2242
- pollResult = await stratum.parallelPoll(flowId, stepId);
2243
- if (pollResult?.error) {
2244
- throw new Error(
2245
- `stratum_parallel_poll failed: ${pollResult.error}: ${pollResult.message || ''}`,
2246
- );
2247
- }
2248
- emitPerTaskProgress(streamWriter, pollResult, emittedStates);
2249
- if (pollResult.outcome != null) break;
2250
- await new Promise((resolve) => setTimeout(resolve, intervalMs));
2251
- }
2252
-
2253
- if (pollResult.outcome.status === 'already_advanced') {
2254
- throw new Error(
2255
- `stratum_parallel_poll returned already_advanced for step ${stepId} — ` +
2256
- `flow state desync. Aggregate: ${JSON.stringify(pollResult.outcome.aggregate)}`,
2257
- );
2729
+ // STRAT-PAR-STREAM: subscribe to push events for this scope before polling.
2730
+ // Push events are best-effort transient narration; poll remains the canonical
2731
+ // state-machine driver. Forward valid events through streamWriter so the
2732
+ // bridge rebroadcasts them via SSE under the buildStreamEvent wrapper.
2733
+ let unsubscribePush = null;
2734
+ if (typeof stratum.onEvent === 'function' && streamWriter) {
2735
+ unsubscribePush = stratum.onEvent(flowId, stepId, (event) => {
2736
+ if (!event || event.schema_version !== '0.2.5') return;
2737
+ try {
2738
+ streamWriter.write({ type: 'build_stream_event', event });
2739
+ } catch (err) {
2740
+ console.error('[build] failed to forward stream event:', err);
2741
+ }
2742
+ });
2258
2743
  }
2259
2744
 
2260
- // T2-F5-CONSUMER-MERGE-STATUS-COMPOSE: branch on defer-advance sentinel.
2261
- // hasServerMerge is true only when the spec declared both isolation:worktree AND capture_diff:true.
2262
- const isolation = dispatchResponse.isolation ?? 'worktree';
2263
- const hasServerMerge = isolation === 'worktree' && dispatchResponse.capture_diff === true;
2264
-
2265
- // Defensive: spec declared defer_advance:true but misses the companions
2266
- // (isolation:worktree + capture_diff:true). The poll still returns the sentinel
2267
- // but we have nothing to merge. Call advance with 'clean' to unblock the flow
2268
- // before any worktree-merge block runs.
2269
- if (pollResult.outcome?.status === 'awaiting_consumer_advance' && !hasServerMerge) {
2270
- if (streamWriter) {
2271
- streamWriter.write({
2272
- type: 'build_error', stepId,
2273
- message:
2274
- `Spec declared defer_advance:true without (isolation:worktree + capture_diff:true); ` +
2275
- `no diffs to merge. Calling parallelAdvance with merge_status='clean' to unblock the flow.`,
2276
- });
2745
+ try {
2746
+ // Poll until outcome is present (NOT can_advance see design §3)
2747
+ let pollResult;
2748
+ const intervalMs = SERVER_DISPATCH_POLL_MS();
2749
+ while (true) {
2750
+ pollResult = await stratum.parallelPoll(flowId, stepId);
2751
+ if (pollResult?.error) {
2752
+ throw new Error(
2753
+ `stratum_parallel_poll failed: ${pollResult.error}: ${pollResult.message || ''}`,
2754
+ );
2755
+ }
2756
+ emitPerTaskProgress(streamWriter, pollResult, emittedStates);
2757
+ if (pollResult.outcome != null) break;
2758
+ await new Promise((resolve) => setTimeout(resolve, intervalMs));
2277
2759
  }
2278
- const advanceResult = await stratum.parallelAdvance(flowId, stepId, 'clean');
2279
- if (advanceResult?.error) {
2760
+
2761
+ if (pollResult.outcome.status === 'already_advanced') {
2280
2762
  throw new Error(
2281
- `stratum_parallel_advance failed: ${advanceResult.error}: ${advanceResult.message || ''}`,
2763
+ `stratum_parallel_poll returned already_advanced for step ${stepId} ` +
2764
+ `flow state desync. Aggregate: ${JSON.stringify(pollResult.outcome.aggregate)}`,
2282
2765
  );
2283
2766
  }
2284
- pollResult.outcome = advanceResult;
2285
- }
2286
2767
 
2287
- if (hasServerMerge) {
2288
- if (pollResult.outcome?.status === 'awaiting_consumer_advance') {
2289
- // DEFER PATH: merge locally, report merge_status, let flow advance with truth.
2290
- const { mergeStatus, conflictedTaskId, conflictError } = applyServerDispatchDiffsCore(
2291
- dispatchResponse.tasks ?? [],
2292
- pollResult.tasks,
2293
- baseCwd,
2294
- streamWriter,
2295
- stepId,
2296
- context,
2297
- );
2768
+ // T2-F5-CONSUMER-MERGE-STATUS-COMPOSE: branch on defer-advance sentinel.
2769
+ // hasServerMerge is true only when the spec declared both isolation:worktree AND capture_diff:true.
2770
+ const isolation = dispatchResponse.isolation ?? 'worktree';
2771
+ const hasServerMerge = isolation === 'worktree' && dispatchResponse.capture_diff === true;
2298
2772
 
2299
- if (mergeStatus === 'conflict' && streamWriter) {
2773
+ // Defensive: spec declared defer_advance:true but misses the companions
2774
+ // (isolation:worktree + capture_diff:true). The poll still returns the sentinel
2775
+ // but we have nothing to merge. Call advance with 'clean' to unblock the flow
2776
+ // before any worktree-merge block runs.
2777
+ if (pollResult.outcome?.status === 'awaiting_consumer_advance' && !hasServerMerge) {
2778
+ if (streamWriter) {
2300
2779
  streamWriter.write({
2301
2780
  type: 'build_error', stepId,
2302
2781
  message:
2303
- `Client-side merge conflict on task ${conflictedTaskId}: ${conflictError}. ` +
2304
- `Reporting merge_status='conflict' to Stratum; flow will route through its failure handler.`,
2782
+ `Spec declared defer_advance:true without (isolation:worktree + capture_diff:true); ` +
2783
+ `no diffs to merge. Calling parallelAdvance with merge_status='clean' to unblock the flow.`,
2305
2784
  });
2306
2785
  }
2307
-
2308
- const advanceResult = await stratum.parallelAdvance(flowId, stepId, mergeStatus);
2786
+ const advanceResult = await stratum.parallelAdvance(flowId, stepId, 'clean');
2309
2787
  if (advanceResult?.error) {
2310
2788
  throw new Error(
2311
2789
  `stratum_parallel_advance failed: ${advanceResult.error}: ${advanceResult.message || ''}`,
2312
2790
  );
2313
2791
  }
2314
2792
  pollResult.outcome = advanceResult;
2315
- } else {
2316
- // LEGACY PATH: non-deferred spec. Throwing wrapper preserves pre-defer behavior.
2317
- try {
2318
- applyServerDispatchDiffs(
2793
+ }
2794
+
2795
+ if (hasServerMerge) {
2796
+ if (pollResult.outcome?.status === 'awaiting_consumer_advance') {
2797
+ // DEFER PATH: merge locally, report merge_status, let flow advance with truth.
2798
+ const { mergeStatus, conflictedTaskId, conflictError } = applyServerDispatchDiffsCore(
2319
2799
  dispatchResponse.tasks ?? [],
2320
2800
  pollResult.tasks,
2321
2801
  baseCwd,
@@ -2323,29 +2803,60 @@ export async function executeParallelDispatchServer(
2323
2803
  stepId,
2324
2804
  context,
2325
2805
  );
2326
- } catch (err) {
2327
- if (streamWriter) {
2806
+
2807
+ if (mergeStatus === 'conflict' && streamWriter) {
2328
2808
  streamWriter.write({
2329
- type: 'build_step_done', stepId,
2330
- parallel: true,
2331
- summary: { ...pollResult.summary, merge_status: 'conflict' },
2332
- flowId,
2809
+ type: 'build_error', stepId,
2810
+ message:
2811
+ `Client-side merge conflict on task ${conflictedTaskId}: ${conflictError}. ` +
2812
+ `Reporting merge_status='conflict' to Stratum; flow will route through its failure handler.`,
2333
2813
  });
2334
2814
  }
2335
- throw err;
2815
+
2816
+ const advanceResult = await stratum.parallelAdvance(flowId, stepId, mergeStatus);
2817
+ if (advanceResult?.error) {
2818
+ throw new Error(
2819
+ `stratum_parallel_advance failed: ${advanceResult.error}: ${advanceResult.message || ''}`,
2820
+ );
2821
+ }
2822
+ pollResult.outcome = advanceResult;
2823
+ } else {
2824
+ // LEGACY PATH: non-deferred spec. Throwing wrapper preserves pre-defer behavior.
2825
+ try {
2826
+ applyServerDispatchDiffs(
2827
+ dispatchResponse.tasks ?? [],
2828
+ pollResult.tasks,
2829
+ baseCwd,
2830
+ streamWriter,
2831
+ stepId,
2832
+ context,
2833
+ );
2834
+ } catch (err) {
2835
+ if (streamWriter) {
2836
+ streamWriter.write({
2837
+ type: 'build_step_done', stepId,
2838
+ parallel: true,
2839
+ summary: { ...pollResult.summary, merge_status: 'conflict' },
2840
+ flowId,
2841
+ });
2842
+ }
2843
+ throw err;
2844
+ }
2336
2845
  }
2337
2846
  }
2338
- }
2339
2847
 
2340
- if (streamWriter) {
2341
- streamWriter.write({
2342
- type: 'build_step_done', stepId,
2343
- parallel: true,
2344
- summary: pollResult.summary, flowId,
2345
- });
2346
- }
2848
+ if (streamWriter) {
2849
+ streamWriter.write({
2850
+ type: 'build_step_done', stepId,
2851
+ parallel: true,
2852
+ summary: pollResult.summary, flowId,
2853
+ });
2854
+ }
2347
2855
 
2348
- return pollResult.outcome;
2856
+ return pollResult.outcome;
2857
+ } finally {
2858
+ if (unsubscribePush) { try { unsubscribePush(); } catch { /* ignore */ } }
2859
+ }
2349
2860
  }
2350
2861
 
2351
2862
  /**
@@ -2543,7 +3054,6 @@ function applyServerDispatchDiffs(taskList, pollTasks, baseCwd, streamWriter, st
2543
3054
  async function executeParallelDispatch(
2544
3055
  dispatchResponse,
2545
3056
  stratum,
2546
- getConnector,
2547
3057
  context,
2548
3058
  progress,
2549
3059
  streamWriter,
@@ -2638,13 +3148,10 @@ async function executeParallelDispatch(
2638
3148
  .replace(/\{confidence_gate\}/g, String(task.confidence_gate ?? ''))
2639
3149
  .replace(/\{exclusions\}/g, task.exclusions ?? '');
2640
3150
 
2641
- // STRAT-CERT: inject reasoning template for Claude-family agents (CERT-WIRE-1/7)
2642
- if (agentType.startsWith('claude') && task.lens_name) {
2643
- const lensDef = LENS_DEFINITIONS[task.lens_name];
2644
- if (lensDef?.reasoning_template) {
2645
- taskIntent = injectCertInstructions(taskIntent, lensDef.reasoning_template);
2646
- }
2647
- }
3151
+ // STRAT-CERT / SF-NEW-3: cert (reasoning_template) injection for Claude lens tasks
3152
+ // is done on the review scaffold below (line ~2720), NOT here. Injecting here AND
3153
+ // on the scaffold produced two "## Premises / ## Trace / ## Conclusion" blocks.
3154
+ // The scaffold-side injection is the single source of truth for lens tasks.
2648
3155
 
2649
3156
  const syntheticDispatch = {
2650
3157
  step_id: taskId,
@@ -2670,10 +3177,46 @@ async function executeParallelDispatch(
2670
3177
  }
2671
3178
 
2672
3179
  try {
2673
- const prompt = buildStepPrompt(syntheticDispatch, context);
2674
- const connector = getConnector(agentType, { cwd: taskCwd });
3180
+ const baseTaskPrompt = buildStepPrompt(syntheticDispatch, context);
2675
3181
  const taskTimeout = STEP_TIMEOUT_MS[dispStepId] ?? DEFAULT_TIMEOUT_MS;
2676
- const taskResult = await runAndNormalize(connector, prompt, syntheticDispatch, { progress, streamWriter, maxDurationMs: taskTimeout });
3182
+ // review_mode is passed via inputs (as string "true") since top-level step props are Stratum-validated.
3183
+ // Fallback: parallel_dispatch steps with output_contract=ReviewResult are review by definition.
3184
+ const isReview = dispatchResponse.review_mode === true
3185
+ || dispatchResponse.inputs?.review_mode === 'true'
3186
+ || dispatchResponse.output_contract === 'ReviewResult'
3187
+ || task.lens_name != null;
3188
+ const confGateTask = task.confidence_gate ?? 7;
3189
+
3190
+ // MF-1: Prepend shared review scaffold when this is a review (lens) task.
3191
+ // SF-6: cert (reasoning template) injection is done here at the call site,
3192
+ // not inside buildReviewPrompt — matches the pattern at the taskIntent injection above.
3193
+ let prompt = baseTaskPrompt;
3194
+ if (isReview) {
3195
+ let reviewScaffold = buildReviewPrompt({
3196
+ agentType,
3197
+ lens: task.lens_name ?? 'general',
3198
+ lensFocus: task.lens_focus ?? '',
3199
+ exclusions: task.exclusions ?? '',
3200
+ confidenceGate: confGateTask,
3201
+ taskDescription: response.inputs?.task ?? '',
3202
+ blueprint: response.inputs?.blueprint ?? '',
3203
+ });
3204
+ // SF-6: append cert reasoning template for Claude-family agents (same as taskIntent path above)
3205
+ if (agentType.startsWith('claude') && task.lens_name) {
3206
+ const lensDef = LENS_DEFINITIONS[task.lens_name];
3207
+ if (lensDef?.reasoning_template) {
3208
+ reviewScaffold = injectCertInstructions(reviewScaffold, lensDef.reasoning_template);
3209
+ }
3210
+ }
3211
+ prompt = reviewScaffold + '\n\n' + baseTaskPrompt;
3212
+ }
3213
+
3214
+ const taskResult = await runAndNormalize(null, prompt, syntheticDispatch, {
3215
+ progress, streamWriter, maxDurationMs: taskTimeout, stratum, cwd: taskCwd,
3216
+ reviewMode: isReview,
3217
+ confidenceGate: confGateTask,
3218
+ lens: task.lens_name ?? 'general',
3219
+ });
2677
3220
 
2678
3221
  if (worktreeIsolation && worktreePaths.has(taskId)) {
2679
3222
  const diskQuotaMB = dispatchResponse.diskQuotaMB ?? 500;
@@ -2782,15 +3325,23 @@ async function executeParallelDispatch(
2782
3325
  return stratum.parallelDone(dispFlowId, dispStepId, taskResults, mergeStatus);
2783
3326
  }
2784
3327
 
2785
- async function startFresh(stratum, specYaml, featureCode, description, dataDir, templateName) {
3328
+ async function startFresh(stratum, specYaml, featureCode, description, dataDir, templateName, mode = 'feature') {
2786
3329
  const flowName = extractFlowName(specYaml, templateName);
2787
3330
  console.log(`Starting ${flowName} for ${featureCode}...`);
2788
- const response = await stratum.plan(specYaml, flowName, { featureCode, description });
3331
+ // COMP-FIX-HARD T4: bug-mode flows take input as { task: <description> }
3332
+ // because pipelines/bug-fix.stratum.yaml's flow input contract uses `task`,
3333
+ // not the feature flow's `{ featureCode, description }`.
3334
+ const planInputs = mode === 'bug'
3335
+ ? { task: description }
3336
+ : { featureCode, description };
3337
+ const response = await stratum.plan(specYaml, flowName, planInputs);
2789
3338
 
2790
3339
  writeActiveBuild(dataDir, {
2791
3340
  featureCode,
2792
3341
  flowId: response.flow_id,
2793
3342
  pipeline: flowName,
3343
+ mode,
3344
+ pid: process.pid,
2794
3345
  currentStepId: response.step_id,
2795
3346
  specPath: `pipelines/${templateName}.stratum.yaml`,
2796
3347
  stepNum: response.step_number ?? 1,
@@ -2929,6 +3480,25 @@ function appendDecisionEntry(contextDir, featureCode, stepId, outcome, rationale
2929
3480
  }
2930
3481
  }
2931
3482
 
3483
+ /**
3484
+ * Load bug description from docs/bugs/<bugCode>/description.md (bug mode).
3485
+ *
3486
+ * Bug mode has no JSON file (feature.json equivalent); description.md is
3487
+ * the sole source. If absent, fall back to the bug code so callers don't
3488
+ * crash — `bin/compose.js` is responsible for prompting the user to write
3489
+ * description.md before invoking runBuild.
3490
+ */
3491
+ function loadBugDescription(bugDir, bugCode) {
3492
+ const p = join(bugDir, 'description.md');
3493
+ if (existsSync(p)) {
3494
+ const content = readFileSync(p, 'utf-8');
3495
+ // First non-blank, non-heading line; fall back to whole file if none.
3496
+ const firstLine = content.split('\n').find(l => l.trim() && !l.startsWith('#'));
3497
+ return (firstLine?.trim()) || content.trim() || bugCode;
3498
+ }
3499
+ return bugCode;
3500
+ }
3501
+
2932
3502
  function loadFeatureDescription(featureDir, featureCode) {
2933
3503
  // Try design.md, then spec.md, then fall back to feature code
2934
3504
  for (const name of ['design.md', 'spec.md']) {