@tekyzinc/gsd-t 4.0.29 → 4.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,7 +15,23 @@
15
15
  // milestone?: "M61",
16
16
  // projectDir?: ".",
17
17
  // userInput?: string, // arbitrary input to the phase (e.g. "$ARGUMENTS")
18
+ // competition?: number, // M82: N>1 enables Competition Mode (generate-and-judge)
19
+ // // on eligible upstream phases. N parallel Self-MoA
20
+ // // producers -> judge stage -> winner. Default 1 (off).
18
21
  // }
22
+ //
23
+ // M82 Competition Mode (generate-and-judge — the GENERATIVE dual of the
24
+ // orthogonal validation triad). Contract: competition-mode-contract.md v1.0.0.
25
+ // - Eligible phases: partition, milestone, discuss, design-decompose (pre-contract,
26
+ // wide-solution-space). INELIGIBLE: plan/impact/prd/doc-ripple (narrow / one
27
+ // right answer) — competition there is wasted, so a competition arg is ignored.
28
+ // - Producers: N samples of ONE strong model (Self-MoA beats a model zoo), varied
29
+ // by an explicit per-candidate "angle" so they explore different regions.
30
+ // - Judge: partition uses the OBJECTIVE oracle (gsd-t competition-judge --kind
31
+ // partition, scoring via the disjointness prover — a calculator, not a critic,
32
+ // immune to LLM-judge bias). Other phases use a blind+shuffled+rubric judge whose
33
+ // numeric selection is finalized deterministically by competition-judge --kind
34
+ // generic.
19
35
 
20
36
  export const meta = {
21
37
  name: "gsd-t-phase",
@@ -34,6 +50,8 @@ const _CLI_ENVELOPE_SCHEMA = {
34
50
  type: "object", required: ["ok", "exitCode"], additionalProperties: true,
35
51
  properties: { ok: { type: "boolean" }, exitCode: { type: "integer" }, envelope: {}, stdout: { type: "string" }, stderr: { type: "string" }, via: { type: "string" } },
36
52
  };
53
+ // Single-quote a value for safe shell interpolation (Red Team MED-5).
54
+ function _shq(s) { return `'${String(s).replace(/'/g, "'\\''")}'`; }
37
55
  async function runCli(projectDir, subcmd, argv, localBin, label, parseJson = true, phaseNameOpt) {
38
56
  const argStr = (argv || []).map((a) => `'${String(a).replace(/'/g, "'\\''")}'`).join(" ");
39
57
  const prompt = [
@@ -49,6 +67,14 @@ async function runCli(projectDir, subcmd, argv, localBin, label, parseJson = tru
49
67
  return r || { ok: false, exitCode: -1, envelope: null, via: "error" };
50
68
  }
51
69
  async function runPreflight(projectDir, label = "preflight", phaseNameOpt) { return runCli(projectDir, "preflight", ["--json"], "cli-preflight.cjs", label, true, phaseNameOpt); }
70
+ // M83: the deterministic plan-hardening gate. Returns the parsed envelope
71
+ // ({ ok, exitCode, violations, ... }); ok:false means ≥1 untraceable AC.
72
+ async function runTraceabilityGate(projectDir, milestone, label = "traceability-gate", phaseNameOpt) {
73
+ const argv = ["--json"];
74
+ if (milestone) argv.push("--milestone", milestone);
75
+ const r = await runCli(projectDir, "traceability-gate", argv, "gsd-t-traceability-gate.cjs", label, true, phaseNameOpt);
76
+ return r.envelope || { ok: r.ok, exitCode: r.exitCode, violations: [], reason: "gate-unparsed" };
77
+ }
52
78
  async function generateBrief(projectDir, { kind = "execute", milestone, domain, id, label = "brief", phaseNameOpt } = {}) {
53
79
  const argv = ["--kind", kind, "--spawn-id", id, "--out", `${projectDir}/.gsd-t/briefs/${id}.json`];
54
80
  if (milestone) argv.push("--milestone", milestone);
@@ -57,6 +83,71 @@ async function generateBrief(projectDir, { kind = "execute", milestone, domain,
57
83
  return { ok: r.ok, briefPath: `${projectDir}/.gsd-t/briefs/${id}.json`, via: r.via };
58
84
  }
59
85
 
86
+ // M82: run the deterministic selection oracle over a candidate-set spec. The spec
87
+ // is written to a file via the agent's Bash (no fs in this sandbox), then judged by
88
+ // `gsd-t competition-judge --in <file>`. The agent MUST copy the judge's rich output
89
+ // (winner/ranked) up to the TOP LEVEL of its reply — a permissive free-form
90
+ // `envelope:{}` schema let a haiku agent silently drop winner/ranked (caught in the
91
+ // M82 real-sandbox proof: via=local ok=true but winner=undefined). Explicit required
92
+ // fields fix that. Returns { ok, winner, ranked }.
93
+ const _JUDGE_ENVELOPE_SCHEMA = {
94
+ type: "object", required: ["ok", "winner"], additionalProperties: true,
95
+ properties: {
96
+ ok: { type: "boolean" },
97
+ exitCode: { type: "integer" },
98
+ winner: { type: ["string", "null"] },
99
+ ranked: { type: "array", items: { type: "object", additionalProperties: true } },
100
+ via: { type: "string" },
101
+ },
102
+ };
103
+ async function runCompetitionJudge(projectDir, spec, label = "judge", phaseNameOpt) {
104
+ // De-fang backticks so a producer-supplied domain name / path containing ``` can't
105
+ // break out of the markdown fence in the prompt (Red Team MED-5). The judge only
106
+ // reads structural fields (id, domains.name, touches[]); a sanitized name is fine.
107
+ const specJson = JSON.stringify(spec).replace(/`/g, "'");
108
+ const qDir = _shq(projectDir);
109
+ const specPath = `${projectDir}/.gsd-t/briefs/_competition-spec.json`;
110
+ const qSpec = _shq(specPath);
111
+ const prompt = [
112
+ `Run the GSD-T Competition Mode judge for the project at \`${projectDir}\` and report its FULL output. Steps:`,
113
+ `1. Write this EXACT JSON (one line) to \`${specPath}\` (overwrite; create .gsd-t/briefs/ if needed):`,
114
+ "~~~json",
115
+ specJson,
116
+ "~~~",
117
+ `2. If \`${projectDir}/bin/gsd-t-competition-judge.cjs\` exists, run: \`node ${qDir}/bin/gsd-t-competition-judge.cjs --in ${qSpec} --project-dir ${qDir}\` (set via="local"). Otherwise run: \`gsd-t competition-judge --in ${qSpec} --project-dir ${qDir}\` (set via="global"). cwd \`${projectDir}\`.`,
118
+ `3. The command prints a JSON object to stdout with fields: ok, exitCode, winner, ranked, n.`,
119
+ `4. COPY those fields (ok, exitCode, winner, ranked) up to the TOP LEVEL of your reply, plus via. Do NOT nest them under "envelope". If the command failed, set winner=null.`,
120
+ `Do NOT do any other work.`,
121
+ ].join("\n");
122
+ const opts = { label, schema: _JUDGE_ENVELOPE_SCHEMA, model: "haiku" };
123
+ if (phaseNameOpt) opts.phase = phaseNameOpt;
124
+ const r = await agent(prompt, opts).catch((e) => ({ ok: false, winner: null, ranked: [], via: "error", err: String(e && e.message) }));
125
+ // Prefer top-level fields; fall back to a nested envelope if the agent nested anyway.
126
+ const env = (r && r.winner !== undefined) ? r : (r && r.envelope) || {};
127
+ return { ok: !!env.ok, winner: env.winner != null ? env.winner : null, ranked: env.ranked || [] };
128
+ }
129
+
130
+ // Phases where competition pays off (wide solution space, pre-contract, high blast
131
+ // radius). A competition arg on any other phase is ignored (single producer runs).
132
+ const COMPETITION_ELIGIBLE = new Set(["partition", "milestone", "discuss", "design-decompose"]);
133
+
134
+ // Rubric axes for the SUBJECTIVE judge (non-partition eligible phases). Partition
135
+ // uses the objective oracle instead and ignores these.
136
+ const RUBRIC_AXES_BY_PHASE = {
137
+ milestone: [
138
+ { key: "coherence", weight: 2 }, { key: "completeness", weight: 1 },
139
+ { key: "riskCoverage", weight: 1 }, { key: "simplicity", weight: 1 },
140
+ ],
141
+ discuss: [
142
+ { key: "soundness", weight: 2 }, { key: "completeness", weight: 1 },
143
+ { key: "tradeoffClarity", weight: 1 }, { key: "simplicity", weight: 1 },
144
+ ],
145
+ "design-decompose": [
146
+ { key: "fidelity", weight: 2 }, { key: "completeness", weight: 1 },
147
+ { key: "reuse", weight: 1 }, { key: "simplicity", weight: 1 },
148
+ ],
149
+ };
150
+
60
151
  const VALID_PHASES = [
61
152
  "partition", "plan", "discuss", "impact",
62
153
  "milestone", "prd", "design-decompose", "doc-ripple",
@@ -79,6 +170,15 @@ const milestone = _args.milestone || null;
79
170
  const userInput = _args.userInput || "";
80
171
  const phaseName = _args.phase;
81
172
 
173
+ // M82: clamp competition N to [1,5]. Evidence (Self-MoA, Large Language Monkeys):
174
+ // gains plateau fast; N=3 captures the elbow, >5 is wasteful. N<=1 = off (single producer).
175
+ const _rawN = Number(_args.competition) || 1;
176
+ const competitionN = Math.max(1, Math.min(5, Math.floor(_rawN)));
177
+ const competitionOn = competitionN > 1 && COMPETITION_ELIGIBLE.has(phaseName);
178
+ if (competitionN > 1 && !competitionOn) {
179
+ log(`competition: N=${competitionN} ignored — phase "${phaseName}" is not competition-eligible (single producer runs). Eligible: ${[...COMPETITION_ELIGIBLE].join(", ")}.`);
180
+ }
181
+
82
182
  if (!phaseName || !VALID_PHASES.includes(phaseName)) {
83
183
  log(`phase: args.phase must be one of: ${VALID_PHASES.join(", ")}`);
84
184
  return { status: "failed", reason: "invalid-phase" };
@@ -92,7 +192,9 @@ const brief = await generateBrief(projectDir, { kind: phaseName, milestone, id:
92
192
  phase("Phase");
93
193
  const promptByPhase = {
94
194
  partition: `Decompose the milestone into 2-5 independent domains. Write .gsd-t/domains/{domain}/{scope,constraints,tasks}.md. Cross-domain contracts in .gsd-t/contracts/.`,
95
- plan: `For each domain, write atomic tasks.md entries with files, contract refs, dependencies, acceptance criteria. Update .gsd-t/contracts/integration-points.md with wave groupings.`,
195
+ plan: `For each domain, write atomic tasks.md entries with files, contract refs, dependencies, acceptance criteria. Update .gsd-t/contracts/integration-points.md with wave groupings.
196
+
197
+ M83 PLAN HARDENING (mandatory — the plan is BLOCKED from execute otherwise): every task that declares acceptance criteria MUST also declare (1) **Files** = the concrete code path that implements it, and (2) a TEST that fails if that path is dead — name it in a **Test** field, a test-file path (\`*.test.*\` / \`*.spec.*\` / \`e2e/\`), or a runner (vitest/cargo test/playwright). The ONE task that delivers the milestone's HEADLINE capability MUST be tagged **Headline:** true and carry BOTH a real implementation path AND a test that exercises that capability end-to-end (e.g. for a "100MB+ file" milestone, a test that actually opens a >100MB fixture). NEVER defer a milestone's own headline capability or a core AC to a later milestone. This exists because NiceNote M5 shipped its headline (100MB+ chunked read) as DEAD CODE with no test and burned 4 verify cycles.`,
96
198
  discuss: `Multi-perspective exploration of design questions. Settle locked decisions into .gsd-t/CONTEXT.md. Do NOT implement.`,
97
199
  impact: `Analyze downstream effects of proposed changes. Identify breaking changes, affected consumers, migration paths.`,
98
200
  milestone: `Define a new milestone — origin, goal, success criteria, falsifiable acceptance. Append to .gsd-t/progress.md. Defer partition/plan.`,
@@ -101,23 +203,316 @@ const promptByPhase = {
101
203
  "doc-ripple": `Identify and update all docs affected by recent code changes per the Document Ripple Completion Gate. No code edits.`,
102
204
  };
103
205
 
104
- const result = await agent(
105
- [
106
- `You are the ${phaseName} phase agent.`,
107
- milestone ? `Milestone: ${milestone}` : "",
108
- `**Brief (REQUIRED):** ${brief.briefPath || "(no brief — re-walk repo)"}`,
109
- userInput ? `\nUser input:\n${userInput}` : "",
110
- ``,
111
- `Objective: ${promptByPhase[phaseName]}`,
112
- ``,
113
- `Follow the CLAUDE.md Pre-Commit Gate. Commit artifacts with prefix "m61(${phaseName})" or similar.`,
114
- `Return JSON per the schema.`,
115
- ].filter(Boolean).join("\n"),
116
- { label: phaseName, phase: "Phase", schema: PHASE_RESULT_SCHEMA, model: "opus" }
117
- ).catch((e) => ({
118
- status: "failed",
119
- artifacts: [],
120
- summary: `agent error: ${e && e.message}`,
121
- }));
206
+ const baseObjective = promptByPhase[phaseName];
207
+ const briefLine = `**Brief (REQUIRED):** ${brief.briefPath || "(no brief — re-walk repo)"}`;
208
+
209
+ let result;
210
+ if (!competitionOn) {
211
+ // ── Single-producer path (default, unchanged behavior) ──
212
+ result = await agent(
213
+ [
214
+ `You are the ${phaseName} phase agent.`,
215
+ milestone ? `Milestone: ${milestone}` : "",
216
+ briefLine,
217
+ userInput ? `\nUser input:\n${userInput}` : "",
218
+ ``,
219
+ `Objective: ${baseObjective}`,
220
+ ``,
221
+ `Follow the CLAUDE.md Pre-Commit Gate. Commit artifacts with prefix "${(milestone || "m").toLowerCase()}(${phaseName})".`,
222
+ `Return JSON per the schema.`,
223
+ ].filter(Boolean).join("\n"),
224
+ { label: phaseName, phase: "Phase", schema: PHASE_RESULT_SCHEMA, model: "opus" }
225
+ ).catch((e) => ({ status: "failed", artifacts: [], summary: `agent error: ${e && e.message}` }));
226
+ } else {
227
+ // ── M82 Competition Mode: generate -> judge -> finalize ──
228
+ // Distinct "angles" so the N Self-MoA producers explore different regions of
229
+ // the solution space (diversity by prompt, not by model — Self-MoA > Mixed-MoA).
230
+ const ANGLES = [
231
+ "Optimize for MAXIMUM parallelism: carve the most file-disjoint domains that can run concurrently.",
232
+ "Optimize for SIMPLICITY: the fewest domains with the cleanest, most obvious boundaries.",
233
+ "Optimize for RISK ISOLATION: isolate the riskiest/most-coupled work into its own domain so the rest stays safe.",
234
+ "Optimize for DEPENDENCY DEPTH: minimize serial gates (waves) between domains.",
235
+ "Optimize for BALANCE: roughly equal-sized domains with minimal cross-talk.",
236
+ ];
237
+
238
+ const PRODUCER_SCHEMA = phaseName === "partition"
239
+ ? {
240
+ type: "object", required: ["id", "domains"], additionalProperties: true,
241
+ properties: {
242
+ id: { type: "string" },
243
+ rationale: { type: "string" },
244
+ domains: {
245
+ type: "array", items: {
246
+ type: "object", required: ["name", "touches"], additionalProperties: true,
247
+ properties: {
248
+ name: { type: "string" },
249
+ touches: { type: "array", items: { type: "string" } },
250
+ summary: { type: "string" },
251
+ },
252
+ },
253
+ },
254
+ },
255
+ }
256
+ : {
257
+ type: "object", required: ["id", "proposal"], additionalProperties: true,
258
+ properties: { id: { type: "string" }, proposal: { type: "string" }, rationale: { type: "string" } },
259
+ };
260
+
261
+ phase("Compete");
262
+ log(`competition: ${competitionN} producers (Self-MoA, model=opus) for ${phaseName}`);
263
+ const ids = ["A", "B", "C", "D", "E"];
264
+ const candidates = (await parallel(
265
+ Array.from({ length: competitionN }, (_, i) => () =>
266
+ agent(
267
+ [
268
+ `You are candidate ${ids[i]} — one of ${competitionN} INDEPENDENT ${phaseName} proposals competing on quality.`,
269
+ milestone ? `Milestone: ${milestone}` : "",
270
+ briefLine,
271
+ userInput ? `\nUser input:\n${userInput}` : "",
272
+ ``,
273
+ `Objective: ${baseObjective}`,
274
+ `Your distinct angle: ${ANGLES[i % ANGLES.length]}`,
275
+ ``,
276
+ `DO NOT write or commit any files. PROPOSE ONLY — return your proposal as JSON per the schema.`,
277
+ phaseName === "partition"
278
+ ? `For "touches", list the concrete repo file paths each domain will WRITE (its owned files). Be specific and realistic — the judge scores file-disjointness from these.`
279
+ : `Put the full proposal text in "proposal".`,
280
+ `Set "id" to "${ids[i]}".`,
281
+ ].filter(Boolean).join("\n"),
282
+ { label: `candidate:${ids[i]}`, phase: "Compete", schema: PRODUCER_SCHEMA, model: "opus" }
283
+ ).then((c) => ({ ...c, id: c.id || ids[i] })).catch(() => null)
284
+ )
285
+ )).filter(Boolean);
286
+
287
+ if (candidates.length === 0) {
288
+ return { status: "failed", artifacts: [], summary: "competition: all producers failed" };
289
+ }
290
+
291
+ phase("Judge");
292
+ let winnerId = null;
293
+ let ranked = [];
294
+ if (phaseName === "partition") {
295
+ // OBJECTIVE oracle judge — calculator, not critic.
296
+ const env = await runCompetitionJudge(projectDir, { kind: "partition", candidates }, "judge:oracle", "Judge");
297
+ winnerId = env.winner; ranked = env.ranked || [];
298
+ } else {
299
+ // SUBJECTIVE judge: a different-model (sonnet) rubric scorer. Candidates are
300
+ // blind (author identity stripped) AND shuffled (deterministic permutation) so
301
+ // judge position no longer correlates with producer index/angle — Red Team
302
+ // HIGH-3: the shuffle was claimed in a comment but never implemented.
303
+ const axes = RUBRIC_AXES_BY_PHASE[phaseName] || [{ key: "quality", weight: 1 }];
304
+ // Deterministic permutation (Math.random is sandbox-banned): rotate by a seed
305
+ // derived from the milestone+phase string so order is stable per run but
306
+ // decoupled from producer index. The CLI tiebreak keys off the candidate's own
307
+ // id (carried through), so final selection stays reproducible regardless.
308
+ const seedStr = `${milestone || "m"}:${phaseName}`;
309
+ let seed = 0;
310
+ for (let k = 0; k < seedStr.length; k++) seed = (seed * 31 + seedStr.charCodeAt(k)) >>> 0;
311
+ const rot = candidates.length ? (seed % candidates.length) : 0;
312
+ const shuffled = candidates.map((_, i) => candidates[(i + rot) % candidates.length]);
313
+ const labeled = shuffled.map((c, i) => ({ id: c.id, label: ids[i], text: c.proposal || c.rationale || "" }));
314
+ const rubric = await agent(
315
+ [
316
+ `You are a BLIND, IMPARTIAL judge scoring ${labeled.length} competing ${phaseName} proposals.`,
317
+ `Score each on a 1-5 scale per axis: ${axes.map((a) => a.key).join(", ")}. Higher = better.`,
318
+ `Judge ONLY the content. The labels are arbitrary and the order is randomized — do NOT prefer earlier ones. Be calibrated and critical.`,
319
+ ``,
320
+ ...labeled.map((c) => `### Candidate ${c.label}\n${c.text}`),
321
+ ``,
322
+ `Return JSON: { "scores": [ { "id": "<candidate label A/B/C...>", "<axis>": <1-5>, ... }, ... ] }`,
323
+ `IMPORTANT: use the CANDIDATE LABEL (A, B, C…) shown above as the "id" in your scores.`,
324
+ ].join("\n"),
325
+ {
326
+ label: "judge:rubric", phase: "Judge", model: "sonnet",
327
+ schema: {
328
+ type: "object", required: ["scores"], additionalProperties: true,
329
+ properties: { scores: { type: "array", items: { type: "object", additionalProperties: true } } },
330
+ },
331
+ }
332
+ ).catch(() => ({ scores: [] }));
333
+ // Map the judge's label-keyed scores back to the REAL candidate ids before
334
+ // deterministic selection (so the winner id matches an actual candidate).
335
+ const labelToId = new Map(labeled.map((c) => [c.label, c.id]));
336
+ const judgeCandidates = (rubric.scores || []).map((s) => {
337
+ const { id, ...rest } = s; return { id: labelToId.get(id) || id, scores: rest };
338
+ });
339
+ const env = await runCompetitionJudge(projectDir, { kind: "generic", axes, candidates: judgeCandidates }, "judge:select", "Judge");
340
+ winnerId = env.winner; ranked = env.ranked || [];
341
+ }
342
+
343
+ // Red Team HIGH-1: NEVER fall back to an arbitrary candidate. For partition the
344
+ // judge returns winner=null only when EVERY candidate is file-overlapping
345
+ // (invalid) — committing candidates[0] would ship an invalid partition the
346
+ // dispatcher then mis-fans-out (contract Invariant 2). Hard-fail instead.
347
+ let winner = candidates.find((c) => c.id === winnerId);
348
+ if (!winner) {
349
+ if (phaseName === "partition") {
350
+ log(`competition: no VALID partition among ${candidates.length} candidates — failing the phase (Invariant 2: invalid never selected).`);
351
+ return {
352
+ status: "failed", artifacts: [],
353
+ summary: `competition: no valid (file-disjoint) partition among ${candidates.length} candidates`,
354
+ competition: { n: candidates.length, winner: null, ranked },
355
+ };
356
+ }
357
+ // Subjective phases: fall back to the judge's rank-1, else the first candidate.
358
+ const rank1 = (ranked[0] && candidates.find((c) => c.id === ranked[0].id)) || candidates[0];
359
+ winner = rank1;
360
+ log(`competition: judge returned no winner; falling back to rank-1 (${winner.id}).`);
361
+ }
362
+ log(`competition: winner = ${winner.id} (of ${candidates.map((c) => c.id).join(", ")})`);
363
+
364
+ // FINALIZE: one agent commits the WINNING approach (pick-one at the thesis level),
365
+ // then enriches it with non-overlapping good line-items from the losers (safe union
366
+ // at the separable layer — "winner + salvage orphaned good ideas"; never grafts a
367
+ // coupled thesis). Per the two-gate rule in competition-mode-contract.md.
368
+ phase("Finalize");
369
+ const winnerBlob = phaseName === "partition" ? JSON.stringify(winner.domains) : (winner.proposal || winner.rationale || "");
370
+ const losersBlob = candidates.filter((c) => c.id !== winner.id)
371
+ .map((c) => phaseName === "partition" ? JSON.stringify(c.domains) : (c.proposal || c.rationale || ""))
372
+ .join("\n---\n");
373
+ // For partition, the finalizer must report the EXACT domains+touches it committed
374
+ // so we can RE-VALIDATE the graft (Red Team HIGH-2 / contract Invariant 4: a
375
+ // salvaged "missed file" could silently reintroduce a write-target overlap).
376
+ const FINALIZE_SCHEMA = phaseName === "partition"
377
+ ? {
378
+ // finalizedDomains REQUIRED for partition (Red Team recheck LOW-1): if it's
379
+ // optional, a finalizer that omits it silently bypasses re-validation.
380
+ type: "object", required: ["status", "artifacts", "finalizedDomains"], additionalProperties: false,
381
+ properties: {
382
+ status: { type: "string", enum: ["complete", "partial", "blocked", "failed"] },
383
+ artifacts: { type: "array", items: { type: "string" } },
384
+ summary: { type: "string" },
385
+ decisions: { type: "array", items: { type: "string" } },
386
+ finalizedDomains: {
387
+ type: "array", items: {
388
+ type: "object", required: ["name", "touches"], additionalProperties: true,
389
+ properties: { name: { type: "string" }, touches: { type: "array", items: { type: "string" } } },
390
+ },
391
+ },
392
+ },
393
+ }
394
+ : PHASE_RESULT_SCHEMA;
395
+
396
+ result = await agent(
397
+ [
398
+ `You are the ${phaseName} finalizer. A competition selected a WINNING proposal; implement it for real.`,
399
+ milestone ? `Milestone: ${milestone}` : "",
400
+ briefLine,
401
+ ``,
402
+ `Objective: ${baseObjective}`,
403
+ ``,
404
+ `WINNING proposal (implement this whole — it is a coherent thesis, do NOT Frankenstein it):`,
405
+ winnerBlob,
406
+ ``,
407
+ `Other proposals (for SALVAGE ONLY — fold in any non-overlapping, clearly-good line-items, e.g. an extra risk, a missed file, a better domain name — that do NOT conflict with the winning structure. NEVER assign a file to a domain that another domain already owns. If in doubt, leave them out):`,
408
+ losersBlob || "(none)",
409
+ ``,
410
+ `Now WRITE the real artifacts and follow the CLAUDE.md Pre-Commit Gate. Commit with prefix "${(milestone || "m").toLowerCase()}(${phaseName})".`,
411
+ phaseName === "partition"
412
+ ? `Return JSON per the schema, INCLUDING "finalizedDomains" — the exact {name, touches[]} of every domain you committed (touches = the repo files each domain OWNS/WRITES). This is re-validated for file-disjointness.`
413
+ : `Return JSON per the schema.`,
414
+ `Include the competition outcome in "decisions" (e.g. "competition: winner ${winner.id} of ${candidates.length}").`,
415
+ ].filter(Boolean).join("\n"),
416
+ { label: `${phaseName}:finalize`, phase: "Finalize", schema: FINALIZE_SCHEMA, model: "opus" }
417
+ ).catch((e) => ({ status: "failed", artifacts: [], summary: `finalizer error: ${e && e.message}` }));
418
+
419
+ // Re-validate the FINALIZED partition (Invariant 4). If salvage reintroduced an
420
+ // overlap, the finalized graft is invalid → block completion with a clear reason.
421
+ if (phaseName === "partition" && result && result.status !== "failed") {
422
+ const finalized = Array.isArray(result.finalizedDomains) ? result.finalizedDomains : null;
423
+ if (!finalized || !finalized.length) {
424
+ // No finalizedDomains to re-check → can't prove disjointness → block rather
425
+ // than silently accept (Red Team recheck LOW-1: never fail-open on the gate).
426
+ log(`competition: finalizer returned no finalizedDomains — cannot re-validate disjointness, blocking.`);
427
+ result.status = "blocked";
428
+ result.summary = `finalizer did not report finalizedDomains; partition disjointness unverifiable. ${result.summary || ""}`.trim();
429
+ } else {
430
+ const reval = await runCompetitionJudge(
431
+ projectDir,
432
+ { kind: "partition", candidates: [{ id: "finalized", domains: finalized }] },
433
+ "judge:revalidate", "Finalize"
434
+ );
435
+ if (reval.winner !== "finalized") {
436
+ log(`competition: FINALIZED partition failed re-validation (salvage reintroduced a file overlap) — blocking (Invariant 4).`);
437
+ result.status = "blocked";
438
+ result.summary = `finalized partition is NOT file-disjoint (salvage overlap); re-run finalize dropping the conflicting file. ${result.summary || ""}`.trim();
439
+ }
440
+ }
441
+ }
442
+
443
+ // Thread the competition telemetry up so the caller can report measured SC#1.
444
+ result.competition = { n: candidates.length, winner: winner.id, ranked };
445
+ }
446
+
447
+ // ── M83 Left-Shifted Plan Hardening (plan phase only) ──
448
+ // Two blocking gates run AFTER the plan agent writes tasks.md and BEFORE the plan
449
+ // is declared complete — so execute can never start on a plan that would produce a
450
+ // dead deliverable or an unguarded edge case. Contract: plan-hardening-contract.md.
451
+ // (1) Deterministic acceptance-traceability gate — every behavioral task's ACs
452
+ // must bind to a code path + a killing test; the headline must be impl+test.
453
+ // (2) Adversarial pre-mortem agent (opus, fresh-context, assume-the-plan-is-flawed)
454
+ // — predicts edge-case / dead-deliverable / NFR failures; each blocking
455
+ // finding must become a required test before execute.
456
+ if (phaseName === "plan" && result && result.status !== "failed") {
457
+ phase("Plan Hardening");
458
+
459
+ // (1) Deterministic gate. FAIL-CLOSED (Red Team MEDIUM-2): a deterministic gate
460
+ // that can't be evaluated (CLI error / unparsed envelope) is NOT a pass — block.
461
+ const trace = await runTraceabilityGate(projectDir, milestone, "traceability-gate", "Plan Hardening");
462
+ const traceUnparsed = trace && trace.reason === "gate-unparsed";
463
+ if (trace && (trace.ok === false || traceUnparsed)) {
464
+ const vcount = (trace.violations || []).length;
465
+ const why = traceUnparsed
466
+ ? `traceability gate could not be evaluated (CLI error / unparsed output) — failing closed; re-run plan.`
467
+ : `${vcount} acceptance criteria not bound to a code path + killing test (M83 traceability gate). Fix tasks.md, then re-run plan.`;
468
+ log(`plan-hardening: traceability gate BLOCKED — ${traceUnparsed ? "unevaluable (fail-closed)" : vcount + " untraceable AC"}.`);
469
+ result.status = "blocked";
470
+ result.summary = `plan blocked: ${why} ${result.summary || ""}`.trim();
471
+ result.traceability = trace;
472
+ return result;
473
+ }
474
+ result.traceability = trace;
475
+
476
+ // (2) Adversarial pre-mortem. The agent reads its own protocol at spawn time
477
+ // (the orchestrator has no fs); blocking findings convert to required tests.
478
+ const PRE_MORTEM_SCHEMA = {
479
+ type: "object", required: ["verdict", "findings"], additionalProperties: true,
480
+ properties: {
481
+ verdict: { type: "string", enum: ["BLOCK", "CLEARED"] },
482
+ findings: {
483
+ type: "array", items: {
484
+ type: "object", required: ["severity", "condition", "requiredTest"], additionalProperties: true,
485
+ properties: {
486
+ severity: { type: "string", enum: ["CRITICAL", "HIGH", "MEDIUM", "LOW"] },
487
+ category: { type: "string" }, condition: { type: "string" },
488
+ whyItFails: { type: "string" }, requiredTest: { type: "string" }, affectedAC: { type: "string" },
489
+ },
490
+ },
491
+ },
492
+ headlineAssessment: { type: "object", additionalProperties: true },
493
+ notes: { type: "string" },
494
+ },
495
+ };
496
+ const preMortem = await agent(
497
+ [
498
+ `You are the adversarial Pre-Mortem reviewer for milestone ${milestone || "(current)"}.`,
499
+ `FIRST read your protocol via the Read tool: templates/prompts/pre-mortem-subagent.md (in the installed @tekyzinc/gsd-t package, or this project's copy). Follow it exactly.`,
500
+ `**Brief (REQUIRED):** ${brief.briefPath || "(no brief — read plan artifacts directly)"}`,
501
+ `Attack the PLAN at .gsd-t/domains/*/{scope,constraints,tasks}.md + .gsd-t/contracts/ + docs/requirements.md.`,
502
+ `Predict, before any code is executed, how this milestone will FAIL: edge cases, dead deliverables, unguarded NFRs, shallow-test traps. Scrutinize the HEADLINE capability hardest — is it bound to a real path, reachable, and covered by a killing test?`,
503
+ `Every blocking finding MUST convert to a concrete requiredTest the plan must adopt. Advisory notes are forbidden.`,
504
+ `Verdict BLOCK if any concrete, falsifiable failure condition lacks a named required test; else CLEARED. Return JSON per the schema.`,
505
+ ].join("\n"),
506
+ { label: "pre-mortem", phase: "Plan Hardening", schema: PRE_MORTEM_SCHEMA, model: "opus" }
507
+ ).catch((e) => ({ verdict: "BLOCK", findings: [{ severity: "HIGH", condition: `pre-mortem agent error: ${e && e.message}`, requiredTest: "re-run pre-mortem" }], notes: "agent-error" }));
508
+
509
+ result.preMortem = preMortem;
510
+ if (preMortem && preMortem.verdict === "BLOCK") {
511
+ const n = (preMortem.findings || []).length;
512
+ log(`plan-hardening: pre-mortem BLOCKED — ${n} predicted failure condition(s) need required tests in the plan.`);
513
+ result.status = "blocked";
514
+ result.summary = `plan blocked: pre-mortem found ${n} falsifiable failure condition(s) not covered by a planned test (M83). Add the required tests to tasks.md, then re-run plan. ${result.summary || ""}`.trim();
515
+ }
516
+ }
122
517
 
123
518
  return result;