slice-tournament-zoo 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -0
- package/README.md +357 -0
- package/bin/stz.mjs +15 -0
- package/package.json +35 -0
- package/src/README.md +19 -0
- package/src/bridge.ts +950 -0
- package/src/budget.ts +78 -0
- package/src/cli.ts +126 -0
- package/src/cost-tracker.ts +59 -0
- package/src/escalation.ts +89 -0
- package/src/eval-runner.ts +220 -0
- package/src/grpo.ts +54 -0
- package/src/hack-detector.ts +124 -0
- package/src/index.ts +17 -0
- package/src/merge.ts +245 -0
- package/src/mock/README.md +40 -0
- package/src/mock/interfaces.ts +114 -0
- package/src/mock/mock.ts +223 -0
- package/src/mock/orchestrator.ts +457 -0
- package/src/pressure.ts +81 -0
- package/src/project.ts +335 -0
- package/src/seal.ts +182 -0
- package/src/selection.ts +128 -0
- package/src/specdiff.ts +141 -0
- package/src/state.ts +95 -0
- package/src/taxonomy.ts +161 -0
- package/src/types.ts +305 -0
package/src/bridge.ts
ADDED
|
@@ -0,0 +1,950 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* The in-session orchestration bridge.
|
|
3
|
+
*
|
|
4
|
+
* STZ runs *inside* Claude Code: the orchestrator is the command-driven main
|
|
5
|
+
* agent, which spawns specimen/judge/test-author/documenter work as Task
|
|
6
|
+
* subagents. A Node process cannot call the Task tool, so the model layer lives
|
|
7
|
+
* in the agent loop — but every *deterministic* decision (eval gate, hack
|
|
8
|
+
* detection, GRPO, selection, state, audit) must stay exact and replayable.
|
|
9
|
+
*
|
|
10
|
+
* This module is that deterministic half, exposed as JSON-in / JSON-out
|
|
11
|
+
* subcommands the `/stz:run` command calls between agent spawns. The command
|
|
12
|
+
* owns spawn-and-collect; the bridge owns all compute. If a tally or comparison
|
|
13
|
+
* is ever tempting to write in the command markdown, it belongs here instead.
|
|
14
|
+
*
|
|
15
|
+
* stz bridge begin --root D --manifest M.json
|
|
16
|
+
* stz bridge record-eval --root D --slice S --specimen X --metrics J.json
|
|
17
|
+
* stz bridge gate --root D --slice S
|
|
18
|
+
* stz bridge record-votes--root D --slice S --votes V.json
|
|
19
|
+
* stz bridge select --root D --slice S
|
|
20
|
+
* stz bridge finalize --root D --slice S --intent I.json --asbuilt A.json
|
|
21
|
+
*
|
|
22
|
+
* Every subcommand prints a single JSON object on stdout (the command parses
|
|
23
|
+
* it) and writes its durable artifacts into the `.stz/` tree.
|
|
24
|
+
*/
|
|
25
|
+
import { readFileSync, writeFileSync, mkdirSync, readdirSync, existsSync } from "node:fs";
|
|
26
|
+
import { writeFile } from "node:fs/promises";
|
|
27
|
+
import { join } from "node:path";
|
|
28
|
+
import type {
|
|
29
|
+
EvalResult,
|
|
30
|
+
PairwiseVote,
|
|
31
|
+
SliceManifest,
|
|
32
|
+
ProjectManifest,
|
|
33
|
+
ProjectPhase,
|
|
34
|
+
ProjectSliceEntry,
|
|
35
|
+
RunConfig,
|
|
36
|
+
} from "./types.js";
|
|
37
|
+
import { PROJECT_PHASES } from "./types.js";
|
|
38
|
+
import { scaffold, writeDoc, readDoc, stzPath } from "./taxonomy.js";
|
|
39
|
+
import { freshState, saveState, loadState, stateExists, setPhaseStatus, appendEvent } from "./state.js";
|
|
40
|
+
import {
|
|
41
|
+
freshProjectState,
|
|
42
|
+
saveProjectState,
|
|
43
|
+
loadProjectState,
|
|
44
|
+
projectStateExists,
|
|
45
|
+
appendProjectEvent,
|
|
46
|
+
projectManifestPath,
|
|
47
|
+
PROJECT_PHASE_TIER,
|
|
48
|
+
topoOrder,
|
|
49
|
+
deriveSliceStatus,
|
|
50
|
+
nextRunnable,
|
|
51
|
+
normalizeRunConfig,
|
|
52
|
+
saveRunConfig,
|
|
53
|
+
loadRunConfig,
|
|
54
|
+
setDarkFactory,
|
|
55
|
+
runConfigExists,
|
|
56
|
+
defaultRunConfig,
|
|
57
|
+
} from "./project.js";
|
|
58
|
+
import { detectHacks } from "./hack-detector.js";
|
|
59
|
+
import { evalGate, select, pairings } from "./selection.js";
|
|
60
|
+
import { diffSpecs, renderSpecDiff, isFaithful, unmatchedIntentIds, mismatchedAsBuiltIds, type Spec } from "./specdiff.js";
|
|
61
|
+
import { seal, verifySeal, amendSeal, heldOutFiles } from "./seal.js";
|
|
62
|
+
import { renderPressureLog, refinementContext, type CulledSpecimen } from "./pressure.js";
|
|
63
|
+
import { fullEval, crossReference } from "./eval-runner.js";
|
|
64
|
+
import {
|
|
65
|
+
loadCompat,
|
|
66
|
+
saveCompat,
|
|
67
|
+
proposeCompat,
|
|
68
|
+
approveCompat,
|
|
69
|
+
retireCompat,
|
|
70
|
+
validateMerge,
|
|
71
|
+
type MergeCompatEntry,
|
|
72
|
+
type SealedSuiteResult,
|
|
73
|
+
} from "./merge.js";
|
|
74
|
+
|
|
75
|
+
// ── small arg parser ──────────────────────────────────────────────────────
|
|
76
|
+
|
|
77
|
+
function parseArgs(argv: string[]): Record<string, string> {
|
|
78
|
+
const out: Record<string, string> = {};
|
|
79
|
+
for (let i = 0; i < argv.length; i++) {
|
|
80
|
+
const a = argv[i];
|
|
81
|
+
if (a?.startsWith("--")) {
|
|
82
|
+
const key = a.slice(2);
|
|
83
|
+
const val = argv[i + 1] && !argv[i + 1]!.startsWith("--") ? argv[++i]! : "true";
|
|
84
|
+
out[key] = val;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
return out;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function readJSON<T>(path: string): T {
|
|
91
|
+
return JSON.parse(readFileSync(path, "utf8")) as T;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function print(obj: unknown): void {
|
|
95
|
+
process.stdout.write(JSON.stringify(obj, null, 2) + "\n");
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// ── paths within a slice ────────────────────────────────────────────────────
|
|
99
|
+
|
|
100
|
+
const sliceRel = (id: string) => join("40-slices", id);
|
|
101
|
+
const protoRel = (id: string, specimen: string) =>
|
|
102
|
+
join(sliceRel(id), "prototypes", `specimen-${specimen}`);
|
|
103
|
+
const evalResultPath = (root: string, id: string, specimen: string) =>
|
|
104
|
+
stzPath(root, join(protoRel(id, specimen), "eval", "result.json"));
|
|
105
|
+
const votesPath = (root: string, id: string) =>
|
|
106
|
+
stzPath(root, join(sliceRel(id), "tournament", "votes.json"));
|
|
107
|
+
const judgmentPath = (root: string, id: string) =>
|
|
108
|
+
stzPath(root, join(sliceRel(id), "tournament", "judgment.json"));
|
|
109
|
+
|
|
110
|
+
function readSpecimenFiles(root: string, id: string, specimen: string): Record<string, string> {
|
|
111
|
+
const dir = stzPath(root, protoRel(id, specimen));
|
|
112
|
+
const files: Record<string, string> = {};
|
|
113
|
+
const walk = (rel: string) => {
|
|
114
|
+
const abs = join(dir, rel);
|
|
115
|
+
if (!existsSync(abs)) return;
|
|
116
|
+
for (const ent of readdirSync(abs, { withFileTypes: true })) {
|
|
117
|
+
if (ent.name === "eval") continue; // skip our own eval output dir
|
|
118
|
+
const childRel = join(rel, ent.name);
|
|
119
|
+
if (ent.isDirectory()) walk(childRel);
|
|
120
|
+
else files[childRel] = readFileSync(join(dir, childRel), "utf8");
|
|
121
|
+
}
|
|
122
|
+
};
|
|
123
|
+
walk(".");
|
|
124
|
+
return files;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function listSpecimens(root: string, id: string): string[] {
|
|
128
|
+
const dir = stzPath(root, join(sliceRel(id), "prototypes"));
|
|
129
|
+
if (!existsSync(dir)) return [];
|
|
130
|
+
return readdirSync(dir, { withFileTypes: true })
|
|
131
|
+
.filter((e) => e.isDirectory() && e.name.startsWith("specimen-"))
|
|
132
|
+
.map((e) => e.name.replace("specimen-", ""))
|
|
133
|
+
.sort();
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// ── subcommands ─────────────────────────────────────────────────────────────
|
|
137
|
+
|
|
138
|
+
async function begin(args: Record<string, string>): Promise<void> {
|
|
139
|
+
const root = args.root!;
|
|
140
|
+
const manifest = readJSON<SliceManifest>(args.manifest!);
|
|
141
|
+
await scaffold(root);
|
|
142
|
+
await writeDoc(root, join(sliceRel(manifest.id), "manifest.md"), {
|
|
143
|
+
frontmatter: {
|
|
144
|
+
summary: manifest.summary,
|
|
145
|
+
contract: manifest.contract,
|
|
146
|
+
complexity: manifest.complexity,
|
|
147
|
+
traceTier: manifest.traceTier,
|
|
148
|
+
votesPerPair: manifest.judge.votesPerPair,
|
|
149
|
+
},
|
|
150
|
+
body:
|
|
151
|
+
`# ${manifest.id} — ${manifest.name}\n\n## Contract\n\n\`${manifest.contract}\`\n\n` +
|
|
152
|
+
`## Done predicates\n` +
|
|
153
|
+
manifest.donePredicates.map((d) => `- \`${d.expr}\` (${d.kind})`).join("\n") +
|
|
154
|
+
"\n",
|
|
155
|
+
});
|
|
156
|
+
// Preserve a project-seeded state if one exists: `project-seed-slices` already
|
|
157
|
+
// marked the four early phases done at the project level. A fresh `freshState`
|
|
158
|
+
// here would clobber that back to pending, so the slice could never read
|
|
159
|
+
// complete (the pipeline "reset"). Only seed fresh for a standalone /stz:run.
|
|
160
|
+
let state = stateExists(root, manifest.id)
|
|
161
|
+
? await loadState(root, manifest.id)
|
|
162
|
+
: freshState(manifest.id, manifest.complexity);
|
|
163
|
+
await saveState(root, setPhaseStatus(state, "planning", "done"));
|
|
164
|
+
print({
|
|
165
|
+
sliceId: manifest.id,
|
|
166
|
+
votesPerPair: manifest.judge.votesPerPair,
|
|
167
|
+
protoDirRoot: stzPath(root, join(sliceRel(manifest.id), "prototypes")),
|
|
168
|
+
note: "spawn specimens to write into prototypes/specimen-<id>/; they return a path+summary, not file contents (N2).",
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Record one specimen's eval result. The hack-detector runs HERE, for real,
|
|
174
|
+
* over the specimen's written files (F10/L3 is never mocked). The model-side
|
|
175
|
+
* metrics (testPassRate/coverage/mutation) are supplied by the eval runner the
|
|
176
|
+
* command invoked, so the gate decision is deterministic given those inputs.
|
|
177
|
+
*/
|
|
178
|
+
/** Build, persist, and print an EvalResult from already-measured metrics. */
|
|
179
|
+
function commitEval(
|
|
180
|
+
root: string,
|
|
181
|
+
slice: string,
|
|
182
|
+
specimen: string,
|
|
183
|
+
metrics: { testPassRate: number; coverage: number; mutationScore: number },
|
|
184
|
+
fixtureNames: string[],
|
|
185
|
+
extra: Record<string, unknown> = {},
|
|
186
|
+
): void {
|
|
187
|
+
const files = readSpecimenFiles(root, slice, specimen);
|
|
188
|
+
const hackFindings = detectHacks(specimen, files, { fixtureNames });
|
|
189
|
+
const result: EvalResult = {
|
|
190
|
+
specimen,
|
|
191
|
+
passedGate: metrics.testPassRate >= 1 && hackFindings.length === 0,
|
|
192
|
+
testPassRate: metrics.testPassRate,
|
|
193
|
+
coverage: metrics.coverage,
|
|
194
|
+
mutationScore: metrics.mutationScore,
|
|
195
|
+
hackFindings,
|
|
196
|
+
};
|
|
197
|
+
const out = evalResultPath(root, slice, specimen);
|
|
198
|
+
mkdirSync(join(out, ".."), { recursive: true });
|
|
199
|
+
writeFileSync(out, JSON.stringify(result, null, 2) + "\n", "utf8");
|
|
200
|
+
print({ ...result, ...extra });
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/** record-eval: metrics supplied by the caller (an external eval runner). */
|
|
204
|
+
function recordEval(args: Record<string, string>): void {
|
|
205
|
+
const { root, slice, specimen } = args as { root: string; slice: string; specimen: string };
|
|
206
|
+
const metrics = readJSON<{ testPassRate: number; coverage: number; mutationScore: number }>(args.metrics!);
|
|
207
|
+
commitEval(root, slice, specimen, metrics, args.fixtures ? args.fixtures.split(",") : []);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* eval: run the REAL eval runner (sealed suite + V8 coverage + mutation) over a
|
|
212
|
+
* specimen and record the result. This is the un-stubbed path — testPassRate,
|
|
213
|
+
* coverage, and mutationScore are all genuinely executed, no caller trust.
|
|
214
|
+
*/
|
|
215
|
+
function evalCmd(args: Record<string, string>): void {
|
|
216
|
+
const { root, slice, specimen } = args as { root: string; slice: string; specimen: string };
|
|
217
|
+
const e = fullEval(args.sealed!, args.impl!);
|
|
218
|
+
commitEval(
|
|
219
|
+
root,
|
|
220
|
+
slice,
|
|
221
|
+
specimen,
|
|
222
|
+
{ testPassRate: e.testPassRate, coverage: e.coverage, mutationScore: e.mutationScore },
|
|
223
|
+
args.fixtures ? args.fixtures.split(",") : [],
|
|
224
|
+
{ measured: { passed: e.passed, total: e.total, mutants: e.mutants, survivors: e.survivors } },
|
|
225
|
+
);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
function loadEvals(root: string, slice: string): EvalResult[] {
|
|
229
|
+
return listSpecimens(root, slice)
|
|
230
|
+
.map((s) => evalResultPath(root, slice, s))
|
|
231
|
+
.filter(existsSync)
|
|
232
|
+
.map((p) => readJSON<EvalResult>(p));
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
function gate(args: Record<string, string>): void {
|
|
236
|
+
const { root, slice } = args as { root: string; slice: string };
|
|
237
|
+
const evals = loadEvals(root, slice);
|
|
238
|
+
const { passers, eliminated } = evalGate(evals);
|
|
239
|
+
// Emit the pairing schedule the command must drive with judge agents.
|
|
240
|
+
print({ passers, eliminated, pairings: pairings(passers) });
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
function recordVotes(args: Record<string, string>): void {
|
|
244
|
+
const { root, slice } = args as { root: string; slice: string };
|
|
245
|
+
const votes = readJSON<PairwiseVote[]>(args.votes!);
|
|
246
|
+
const p = votesPath(root, slice);
|
|
247
|
+
mkdirSync(join(p, ".."), { recursive: true });
|
|
248
|
+
writeFileSync(p, JSON.stringify(votes, null, 2) + "\n", "utf8");
|
|
249
|
+
print({ recorded: votes.length });
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
async function selectCmd(args: Record<string, string>): Promise<void> {
|
|
253
|
+
const { root, slice } = args as { root: string; slice: string };
|
|
254
|
+
const evals = loadEvals(root, slice);
|
|
255
|
+
const votes = existsSync(votesPath(root, slice)) ? readJSON<PairwiseVote[]>(votesPath(root, slice)) : [];
|
|
256
|
+
const { judgment } = select(evals, votes);
|
|
257
|
+
writeFileSync(judgmentPath(root, slice), JSON.stringify(judgment, null, 2) + "\n", "utf8");
|
|
258
|
+
await writeDoc(root, join(sliceRel(slice), "tournament.md"), {
|
|
259
|
+
frontmatter: {
|
|
260
|
+
summary: `Tournament ${slice}: winner specimen-${judgment.winner ?? "none"}, ${judgment.ranking.length} passer(s).`,
|
|
261
|
+
},
|
|
262
|
+
body:
|
|
263
|
+
`# Tournament — ${slice}\n\n- **winner:** ${judgment.winner ? "specimen-" + judgment.winner : "none"}\n` +
|
|
264
|
+
`- **ranking:** ${judgment.ranking.join(" > ") || "—none—"}\n- **votes:** ${votes.length}\n\n` +
|
|
265
|
+
`## GRPO advantages (whole group)\n` +
|
|
266
|
+
judgment.advantages
|
|
267
|
+
.map((a) => `- specimen-${a.specimen}: reward=${a.reward.toFixed(3)} advantage=${a.advantage.toFixed(3)}`)
|
|
268
|
+
.join("\n") +
|
|
269
|
+
"\n",
|
|
270
|
+
});
|
|
271
|
+
let state = await loadState(root, slice);
|
|
272
|
+
state = appendEvent(state, "judgment", "winner", `winner=${judgment.winner}, ranking=[${judgment.ranking.join(",")}]`);
|
|
273
|
+
await saveState(root, state);
|
|
274
|
+
print({ winner: judgment.winner, ranking: judgment.ranking, advantages: judgment.advantages });
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
async function finalize(args: Record<string, string>): Promise<void> {
|
|
278
|
+
const { root, slice } = args as { root: string; slice: string };
|
|
279
|
+
const evals = loadEvals(root, slice);
|
|
280
|
+
const judgment = existsSync(judgmentPath(root, slice))
|
|
281
|
+
? readJSON<ReturnType<typeof select>["judgment"]>(judgmentPath(root, slice))
|
|
282
|
+
: { ranking: [], winner: null, advantages: [], votes: [] };
|
|
283
|
+
|
|
284
|
+
// Pressure log: every non-winning specimen is a negative exemplar (F9).
|
|
285
|
+
const culled: CulledSpecimen[] = evals
|
|
286
|
+
.filter((e) => e.specimen !== judgment.winner)
|
|
287
|
+
.map((e) => ({
|
|
288
|
+
specimen: e.specimen,
|
|
289
|
+
reason: e.hackFindings.length
|
|
290
|
+
? `hack: ${e.hackFindings.map((f) => f.pattern).join(",")}`
|
|
291
|
+
: `gate testPassRate=${e.testPassRate.toFixed(2)}`,
|
|
292
|
+
diff: Object.entries(readSpecimenFiles(root, slice, e.specimen))
|
|
293
|
+
.map(([p, c]) => `+++ ${p}\n${c}`)
|
|
294
|
+
.join("\n"),
|
|
295
|
+
critique: "",
|
|
296
|
+
hackFindings: e.hackFindings,
|
|
297
|
+
}));
|
|
298
|
+
await writeDoc(root, join("50-pressure", slice, "pressure.md"), {
|
|
299
|
+
frontmatter: { summary: `Pressure log ${slice}: ${culled.length} culled.` },
|
|
300
|
+
body: renderPressureLog({ sliceId: slice, culled }),
|
|
301
|
+
});
|
|
302
|
+
if (judgment.advantages.length > 0) {
|
|
303
|
+
await writeDoc(root, join("50-pressure", slice, "refinement.md"), {
|
|
304
|
+
frontmatter: { summary: `PDR top-K refinement for ${slice}.` },
|
|
305
|
+
body: refinementContext({ sliceId: slice, culled }, judgment.advantages),
|
|
306
|
+
});
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
// Spec-diff (F13). Claims are matched by id (or normalized text); the
|
|
310
|
+
// documenter adjudicates each intent claim, so wording differences no longer
|
|
311
|
+
// read as drift. A mis-keyed verdict would, though — surface it rather than
|
|
312
|
+
// let it silently miscount.
|
|
313
|
+
const intent = readJSON<Spec>(args.intent!);
|
|
314
|
+
const asBuilt = readJSON<Spec>(args.asbuilt!);
|
|
315
|
+
const sdiff = diffSpecs(intent, asBuilt);
|
|
316
|
+
const unmatched = unmatchedIntentIds(intent, asBuilt);
|
|
317
|
+
const mismatched = mismatchedAsBuiltIds(intent, asBuilt);
|
|
318
|
+
if (mismatched.length) {
|
|
319
|
+
process.stderr.write(
|
|
320
|
+
`warning: as-built claim id(s) [${mismatched.join(", ")}] assert satisfied but match no intent claim — likely a documenter mis-key, counted as 'added'.\n`,
|
|
321
|
+
);
|
|
322
|
+
}
|
|
323
|
+
await writeDoc(root, join(sliceRel(slice), "spec-diff.md"), {
|
|
324
|
+
frontmatter: {
|
|
325
|
+
summary: `Spec diff ${slice}: ${sdiff.missing.length} missing, ${sdiff.added.length} added, ${sdiff.kept.length} kept.`,
|
|
326
|
+
},
|
|
327
|
+
body: renderSpecDiff(slice, sdiff),
|
|
328
|
+
});
|
|
329
|
+
|
|
330
|
+
// finalize is the tournament-half completion barrier: by the time it runs the
|
|
331
|
+
// sealed suite was authored, the plan written, the tournament run, and the
|
|
332
|
+
// winner judged. Mark every tournament-half phase done (idempotent — `begin`
|
|
333
|
+
// already set planning; skip phases already done so events aren't duplicated)
|
|
334
|
+
// so the slice is `isComplete` and `project-status` derives it as "done".
|
|
335
|
+
// Without this, test-authoring/tournament stay "pending" forever, the slice
|
|
336
|
+
// reads "running", and `/stz:pipeline` never advances past it (or re-runs it on
|
|
337
|
+
// resume) — the orchestrator had to hand-patch state.json every slice.
|
|
338
|
+
let state = await loadState(root, slice);
|
|
339
|
+
for (const p of ["test-authoring", "planning", "tournament", "judgment"] as const) {
|
|
340
|
+
if (state.phaseStatus[p] !== "done") state = setPhaseStatus(state, p, "done");
|
|
341
|
+
}
|
|
342
|
+
state.currentPhase = "judgment";
|
|
343
|
+
await saveState(root, state);
|
|
344
|
+
await writeDoc(root, join("90-audit", "journal.md"), {
|
|
345
|
+
frontmatter: { summary: `Event journal for ${slice}: ${state.events.length} events.` },
|
|
346
|
+
body:
|
|
347
|
+
`# Journal — ${slice}\n\n` +
|
|
348
|
+
state.events.map((e) => `${e.seq}. [${e.phase}] ${e.kind}: ${e.detail}`).join("\n") +
|
|
349
|
+
"\n",
|
|
350
|
+
});
|
|
351
|
+
print({
|
|
352
|
+
winner: judgment.winner,
|
|
353
|
+
faithful: isFaithful(sdiff),
|
|
354
|
+
specDiff: { missing: sdiff.missing.length, added: sdiff.added.length, kept: sdiff.kept.length },
|
|
355
|
+
culled: culled.length,
|
|
356
|
+
unmatchedIntentIds: unmatched.length ? unmatched : undefined,
|
|
357
|
+
mismatchedAsBuiltIds: mismatched.length ? mismatched : undefined,
|
|
358
|
+
});
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
// ── project-level subcommands (the multi-slice driver) ──────────────────────
|
|
362
|
+
|
|
363
|
+
/** project-init: scaffold + write project manifest + fresh project state. */
|
|
364
|
+
async function projectInit(args: Record<string, string>): Promise<void> {
|
|
365
|
+
const root = args.root!;
|
|
366
|
+
const manifest = readJSON<ProjectManifest>(args.manifest!);
|
|
367
|
+
manifest.schemaVersion = 1;
|
|
368
|
+
manifest.slices = manifest.slices ?? [];
|
|
369
|
+
await scaffold(root);
|
|
370
|
+
await writeFile(projectManifestPath(root), JSON.stringify(manifest, null, 2) + "\n", "utf8");
|
|
371
|
+
const state = freshProjectState(manifest.projectId);
|
|
372
|
+
appendProjectEvent(state, "lifecycle", "project-init", `project ${manifest.projectId} created`);
|
|
373
|
+
await saveProjectState(root, state);
|
|
374
|
+
await writeDoc(root, join("00-intent", "project.md"), {
|
|
375
|
+
frontmatter: { summary: manifest.summary || `Project ${manifest.name}.` },
|
|
376
|
+
body:
|
|
377
|
+
`# ${manifest.name}\n\n${manifest.summary}\n\n## Slices (DAG)\n` +
|
|
378
|
+
(manifest.slices.length
|
|
379
|
+
? manifest.slices.map((s) => `- ${s.id} (${s.name}) deps: [${s.dependsOn.join(", ")}]`).join("\n")
|
|
380
|
+
: "_none yet — added during slice-disaggregation_") +
|
|
381
|
+
"\n",
|
|
382
|
+
});
|
|
383
|
+
print({ projectId: manifest.projectId, slices: manifest.slices.map((s) => s.id), phases: PROJECT_PHASES });
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
function isProjectPhase(p: string): p is ProjectPhase {
|
|
387
|
+
return (PROJECT_PHASES as readonly string[]).includes(p);
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
/** project-phase: mark a project-level phase done + write a tier marker. */
|
|
391
|
+
async function projectPhase(args: Record<string, string>): Promise<void> {
|
|
392
|
+
const root = args.root!;
|
|
393
|
+
const phase = args.phase!;
|
|
394
|
+
if (!isProjectPhase(phase)) {
|
|
395
|
+
process.stderr.write(`unknown project phase: ${phase}\n`);
|
|
396
|
+
process.exitCode = 1;
|
|
397
|
+
return;
|
|
398
|
+
}
|
|
399
|
+
const state = await loadProjectState(root);
|
|
400
|
+
state.phaseStatus[phase] = "done";
|
|
401
|
+
appendProjectEvent(state, phase, "phase-done", `${phase} → done`);
|
|
402
|
+
await saveProjectState(root, state);
|
|
403
|
+
const tier = PROJECT_PHASE_TIER[phase];
|
|
404
|
+
await writeDoc(root, join(tier, `${phase}.md`), {
|
|
405
|
+
frontmatter: { summary: `Project phase ${phase} marked done.` },
|
|
406
|
+
body: `# ${phase}\n\nCompleted at the project level. Artifacts live under \`${tier}/\`.\n`,
|
|
407
|
+
});
|
|
408
|
+
print({ phase, status: "done", tier });
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
/** project-write-intent: persist the elicited intent + done-predicates. */
|
|
412
|
+
async function projectWriteIntent(args: Record<string, string>): Promise<void> {
|
|
413
|
+
const root = args.root!;
|
|
414
|
+
const intent = readJSON<{
|
|
415
|
+
problem?: string;
|
|
416
|
+
users?: string;
|
|
417
|
+
constraints?: string[];
|
|
418
|
+
donePredicates?: { id: string; expr: string; kind: string }[];
|
|
419
|
+
areas?: string[];
|
|
420
|
+
}>(args.intent!);
|
|
421
|
+
const preds = intent.donePredicates ?? [];
|
|
422
|
+
await writeFile(stzPath(root, join("00-intent", "intent.json")), JSON.stringify(intent, null, 2) + "\n", "utf8");
|
|
423
|
+
await writeDoc(root, join("00-intent", "intent.md"), {
|
|
424
|
+
frontmatter: { summary: `Intent: ${preds.length} done-predicate(s); ${(intent.areas ?? []).length} area(s).` },
|
|
425
|
+
body:
|
|
426
|
+
`# Intent\n\n## Problem\n${intent.problem ?? ""}\n\n## Users\n${intent.users ?? ""}\n\n` +
|
|
427
|
+
`## Constraints\n${(intent.constraints ?? []).map((c) => `- ${c}`).join("\n")}\n\n` +
|
|
428
|
+
`## Done predicates (machine-checkable)\n${preds.map((p) => `- \`${p.expr}\` (${p.kind})`).join("\n")}\n`,
|
|
429
|
+
});
|
|
430
|
+
print({ predicates: preds.length, areas: (intent.areas ?? []).length });
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
/** project-record-area: durable per-area checkpoint during elicitation. */
|
|
434
|
+
async function projectRecordArea(args: Record<string, string>): Promise<void> {
|
|
435
|
+
const root = args.root!;
|
|
436
|
+
const phase = args.phase!;
|
|
437
|
+
if (!isProjectPhase(phase)) {
|
|
438
|
+
process.stderr.write(`unknown project phase: ${phase}\n`);
|
|
439
|
+
process.exitCode = 1;
|
|
440
|
+
return;
|
|
441
|
+
}
|
|
442
|
+
const state = await loadProjectState(root);
|
|
443
|
+
appendProjectEvent(state, phase, "area-resolved", `${args.area}: ${args.resolution ?? ""}`);
|
|
444
|
+
await saveProjectState(root, state);
|
|
445
|
+
const resolved = state.events.filter((e) => e.phase === phase && e.kind === "area-resolved").map((e) => e.detail.split(":")[0]);
|
|
446
|
+
print({ phase, area: args.area, recorded: true, resolved });
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
/** slice-add: append a slice to the DAG (permissive; validation in status). */
|
|
450
|
+
async function sliceAdd(args: Record<string, string>): Promise<void> {
|
|
451
|
+
const root = args.root!;
|
|
452
|
+
const id = args.id!;
|
|
453
|
+
const entry: ProjectSliceEntry = {
|
|
454
|
+
id,
|
|
455
|
+
name: args.name ?? id,
|
|
456
|
+
dependsOn: args.depends ? args.depends.split(",").map((s) => s.trim()).filter(Boolean) : [],
|
|
457
|
+
};
|
|
458
|
+
const manifest = readJSON<ProjectManifest>(projectManifestPath(root));
|
|
459
|
+
manifest.slices = (manifest.slices ?? []).filter((s) => s.id !== id);
|
|
460
|
+
manifest.slices.push(entry);
|
|
461
|
+
await writeFile(projectManifestPath(root), JSON.stringify(manifest, null, 2) + "\n", "utf8");
|
|
462
|
+
const state = await loadProjectState(root);
|
|
463
|
+
if (!(id in state.sliceStatus)) state.sliceStatus[id] = "pending";
|
|
464
|
+
appendProjectEvent(state, "slice", "slice-added", `${id} deps=[${entry.dependsOn.join(",")}]`);
|
|
465
|
+
await saveProjectState(root, state);
|
|
466
|
+
print({ id, dependsOn: entry.dependsOn, totalSlices: manifest.slices.length });
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
/** project-seed-slices: write per-slice manifests + seed early phases done. */
|
|
470
|
+
async function projectSeedSlices(args: Record<string, string>): Promise<void> {
|
|
471
|
+
const root = args.root!;
|
|
472
|
+
const dag = readJSON<SliceManifest[]>(args.dag!);
|
|
473
|
+
const created: string[] = [];
|
|
474
|
+
for (const m of dag) {
|
|
475
|
+
m.judge = m.judge ?? { votesPerPair: 8 };
|
|
476
|
+
m.dependsOn = m.dependsOn ?? [];
|
|
477
|
+
m.donePredicates = m.donePredicates ?? [];
|
|
478
|
+
mkdirSync(stzPath(root, sliceRel(m.id)), { recursive: true });
|
|
479
|
+
await writeFile(stzPath(root, join(sliceRel(m.id), "manifest.json")), JSON.stringify(m, null, 2) + "\n", "utf8");
|
|
480
|
+
await writeDoc(root, join(sliceRel(m.id), "manifest.md"), {
|
|
481
|
+
frontmatter: { summary: m.summary, contract: m.contract, complexity: m.complexity },
|
|
482
|
+
body: `# ${m.id} — ${m.name}\n\n## Contract\n\n\`${m.contract}\`\n\n## Depends on\n${m.dependsOn.join(", ") || "—"}\n`,
|
|
483
|
+
});
|
|
484
|
+
// Seed per-slice state: the four early phases were settled at the project
|
|
485
|
+
// level, so they start `done`; the tournament half remains for /stz:run.
|
|
486
|
+
let st = freshState(m.id, m.complexity ?? 1);
|
|
487
|
+
for (const p of ["elicitation", "research", "ground-truth-validation", "standards"] as const) {
|
|
488
|
+
st = setPhaseStatus(st, p, "done");
|
|
489
|
+
}
|
|
490
|
+
await saveState(root, st);
|
|
491
|
+
created.push(m.id);
|
|
492
|
+
// Also register in the project DAG.
|
|
493
|
+
await sliceAddInternal(root, { id: m.id, name: m.name, dependsOn: m.dependsOn });
|
|
494
|
+
}
|
|
495
|
+
print({ created, seeded: true });
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
async function sliceAddInternal(root: string, entry: ProjectSliceEntry): Promise<void> {
|
|
499
|
+
const manifest = readJSON<ProjectManifest>(projectManifestPath(root));
|
|
500
|
+
manifest.slices = (manifest.slices ?? []).filter((s) => s.id !== entry.id);
|
|
501
|
+
manifest.slices.push(entry);
|
|
502
|
+
await writeFile(projectManifestPath(root), JSON.stringify(manifest, null, 2) + "\n", "utf8");
|
|
503
|
+
const state = await loadProjectState(root);
|
|
504
|
+
if (!(entry.id in state.sliceStatus)) state.sliceStatus[entry.id] = "pending";
|
|
505
|
+
await saveProjectState(root, state);
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
/**
|
|
509
|
+
* project-set-config: persist the run configuration captured during `/stz:new`.
|
|
510
|
+
* Reads a (possibly partial) config JSON, merges it over the defaults, validates
|
|
511
|
+
* and clamps, then writes run-config.json + a human-readable run-config.md and
|
|
512
|
+
* appends an event. Prints the resolved config.
|
|
513
|
+
*/
|
|
514
|
+
async function projectSetConfig(args: Record<string, string>): Promise<void> {
|
|
515
|
+
const root = args.root!;
|
|
516
|
+
const partial = readJSON<Partial<RunConfig>>(args.config!);
|
|
517
|
+
let config: RunConfig;
|
|
518
|
+
try {
|
|
519
|
+
config = normalizeRunConfig(partial);
|
|
520
|
+
} catch (e) {
|
|
521
|
+
process.stderr.write(`${(e as Error).message}\n`);
|
|
522
|
+
process.exitCode = 1;
|
|
523
|
+
return;
|
|
524
|
+
}
|
|
525
|
+
await saveRunConfig(root, config);
|
|
526
|
+
await writeRunConfigDoc(root, config);
|
|
527
|
+
const state = await loadProjectState(root);
|
|
528
|
+
appendProjectEvent(state, "elicitation", "run-config-set", `N=${config.fanout}, ${config.granularity}, cov≥${config.strictness.coverageTarget}, dark-factory=${config.darkFactory}`);
|
|
529
|
+
await saveProjectState(root, state);
|
|
530
|
+
print(config);
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
/** Render the human-readable run-config.md (shared by set-config + toggles). */
|
|
534
|
+
async function writeRunConfigDoc(root: string, config: RunConfig): Promise<void> {
|
|
535
|
+
const m = config.models;
|
|
536
|
+
await writeDoc(root, join("00-intent", "run-config.md"), {
|
|
537
|
+
frontmatter: {
|
|
538
|
+
summary: `Run config: ${config.granularity} slicing, N=${config.fanout}, coverage≥${config.strictness.coverageTarget}, mutation ${config.strictness.mutationPolicy}, conventions ${config.strictness.conventions}, dark-factory ${config.darkFactory ? "on" : "off"}.`,
|
|
539
|
+
},
|
|
540
|
+
body:
|
|
541
|
+
`# Run configuration\n\n` +
|
|
542
|
+
`- **Slicing granularity:** ${config.granularity}\n` +
|
|
543
|
+
`- **Specimen fan-out (N):** ${config.fanout}\n` +
|
|
544
|
+
`- **Strictness:** coverage ≥ ${config.strictness.coverageTarget}, mutation ${config.strictness.mutationPolicy}, conventions ${config.strictness.conventions}\n` +
|
|
545
|
+
`- **Dark-factory mode:** ${config.darkFactory ? "**on** — autonomous end-to-end, human gates skipped (except the F2 predicate gate)" : "off — human-in-the-loop"}\n\n` +
|
|
546
|
+
`## Models per role\n\n| role | model |\n|---|---|\n` +
|
|
547
|
+
`| planning | ${m.planning} |\n| research | ${m.research} |\n| execution | ${m.execution} |\n` +
|
|
548
|
+
`| testing | ${m.testing} |\n| validation | ${m.validation} |\n| judging | ${m.judging} |\n`,
|
|
549
|
+
});
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
/**
|
|
553
|
+
* project-dark-factory: flip dark-factory mode at ANY point in the run (0.4.0).
|
|
554
|
+
* `--on` / `--off` (default `--on`). Implemented as a load-modify-save on the
|
|
555
|
+
* existing config — it must NOT round-trip through `project-set-config`, whose
|
|
556
|
+
* normalize-over-defaults merge would silently reset every other field.
|
|
557
|
+
*/
|
|
558
|
+
async function projectDarkFactory(args: Record<string, string>): Promise<void> {
|
|
559
|
+
const root = args.root!;
|
|
560
|
+
// --off disables; --on (or bare) enables. --enabled true/false also accepted.
|
|
561
|
+
const enabled = args.off ? false : args.enabled !== undefined ? String(args.enabled).trim().toLowerCase() === "true" : true;
|
|
562
|
+
const config = await setDarkFactory(root, enabled);
|
|
563
|
+
await writeRunConfigDoc(root, config);
|
|
564
|
+
if (projectStateExists(root)) {
|
|
565
|
+
const state = await loadProjectState(root);
|
|
566
|
+
appendProjectEvent(state, "lifecycle", "dark-factory", enabled ? "engaged — autonomous run" : "disengaged — human-in-the-loop");
|
|
567
|
+
await saveProjectState(root, state);
|
|
568
|
+
}
|
|
569
|
+
print({ darkFactory: config.darkFactory, runConfig: config });
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
/** project-config: READ-ONLY — print the run config (defaults if unset). */
|
|
573
|
+
async function projectConfig(args: Record<string, string>): Promise<void> {
|
|
574
|
+
const root = args.root!;
|
|
575
|
+
const config = await loadRunConfig(root);
|
|
576
|
+
print({ ...config, isDefault: !runConfigExists(root) });
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
/** project-status: READ-ONLY DAG + phase status + next runnable slice. */
|
|
580
|
+
async function projectStatus(args: Record<string, string>): Promise<void> {
|
|
581
|
+
const root = args.root!;
|
|
582
|
+
const manifest = readJSON<ProjectManifest>(projectManifestPath(root));
|
|
583
|
+
const slices = manifest.slices ?? [];
|
|
584
|
+
const state = await loadProjectState(root);
|
|
585
|
+
const topo = topoOrder(slices);
|
|
586
|
+
if (!topo.ok) {
|
|
587
|
+
print(topo.error === "cycle" ? { error: "cycle", cycle: topo.cycle } : { error: "dangling", from: topo.from, missing: topo.missing });
|
|
588
|
+
process.exitCode = 1;
|
|
589
|
+
return;
|
|
590
|
+
}
|
|
591
|
+
const sliceStatus: Record<string, string> = {};
|
|
592
|
+
for (const id of topo.order) sliceStatus[id] = await deriveSliceStatus(root, id);
|
|
593
|
+
|
|
594
|
+
// Enriched, dashboard-ready rows + computed progress totals — so the pipeline
|
|
595
|
+
// dashboard renders a fixed table from data rather than the agent eyeballing
|
|
596
|
+
// counts (which drift run to run). winner/faithful are pulled the same way
|
|
597
|
+
// `summary` does, so the dashboard and the completion report never disagree.
|
|
598
|
+
const byId = new Map(slices.map((s) => [s.id, s]));
|
|
599
|
+
const tally = { done: 0, running: 0, halted: 0, pending: 0 };
|
|
600
|
+
const sliceRows: { id: string; dependsOn: string[]; status: string; winner: string | null; faithful: boolean | null }[] = [];
|
|
601
|
+
for (const id of topo.order) {
|
|
602
|
+
const status = sliceStatus[id]!;
|
|
603
|
+
if (status === "done" || status === "running" || status === "halted" || status === "pending") tally[status]++;
|
|
604
|
+
let winner: string | null = null;
|
|
605
|
+
const jp = judgmentPath(root, id);
|
|
606
|
+
if (existsSync(jp)) winner = readJSON<{ winner: string | null }>(jp).winner;
|
|
607
|
+
let faithful: boolean | null = null;
|
|
608
|
+
const sdRel = join(sliceRel(id), "spec-diff.md");
|
|
609
|
+
if (existsSync(stzPath(root, sdRel))) {
|
|
610
|
+
const sd = await readDoc(root, sdRel);
|
|
611
|
+
faithful = /0 missing/.test(String(sd.frontmatter.summary ?? ""));
|
|
612
|
+
}
|
|
613
|
+
sliceRows.push({ id, dependsOn: byId.get(id)?.dependsOn ?? [], status, winner, faithful });
|
|
614
|
+
}
|
|
615
|
+
const phasesDone = Object.values(state.phaseStatus).filter((s) => s === "done").length;
|
|
616
|
+
const progress = {
|
|
617
|
+
phases: { done: phasesDone, total: PROJECT_PHASES.length },
|
|
618
|
+
slices: { total: slices.length, ...tally },
|
|
619
|
+
};
|
|
620
|
+
|
|
621
|
+
const runnable = await nextRunnable(slices, (id) => deriveSliceStatus(root, id));
|
|
622
|
+
const slicingDone = state.phaseStatus["slice-disaggregation"] === "done";
|
|
623
|
+
// A corrupt/hand-edited run-config.json must not brick status (and thus every
|
|
624
|
+
// command's first call). Fall back to defaults rather than throwing.
|
|
625
|
+
let runConfig;
|
|
626
|
+
let runConfigBroken = false;
|
|
627
|
+
try {
|
|
628
|
+
runConfig = await loadRunConfig(root);
|
|
629
|
+
} catch {
|
|
630
|
+
runConfig = defaultRunConfig();
|
|
631
|
+
runConfigBroken = true;
|
|
632
|
+
}
|
|
633
|
+
print({
|
|
634
|
+
projectPhases: state.phaseStatus,
|
|
635
|
+
progress,
|
|
636
|
+
order: topo.order,
|
|
637
|
+
sliceStatus,
|
|
638
|
+
slices: sliceRows,
|
|
639
|
+
frontier: slicingDone ? runnable.frontier : [],
|
|
640
|
+
next: slicingDone ? runnable.next : null,
|
|
641
|
+
blocked: !slicingDone,
|
|
642
|
+
runConfig,
|
|
643
|
+
// Hoisted convenience: a command driving the autonomous loop reads this one
|
|
644
|
+
// field rather than reaching into runConfig.darkFactory each phase.
|
|
645
|
+
darkFactory: runConfig.darkFactory,
|
|
646
|
+
runConfigSet: runConfigExists(root) && !runConfigBroken,
|
|
647
|
+
runConfigBroken: runConfigBroken || undefined,
|
|
648
|
+
note: slicingDone ? undefined : "slice execution gated until /stz:slice completes slice-disaggregation",
|
|
649
|
+
});
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
/** summary: aggregate every slice's outcome into a completion report. */
|
|
653
|
+
async function summaryCmd(args: Record<string, string>): Promise<void> {
|
|
654
|
+
const root = args.root!;
|
|
655
|
+
const manifest = readJSON<ProjectManifest>(projectManifestPath(root));
|
|
656
|
+
const slices = manifest.slices ?? [];
|
|
657
|
+
const rows: { id: string; winner: string | null; faithful: boolean | null; culled: number | null; status: string }[] = [];
|
|
658
|
+
let done = 0, halted = 0, pending = 0;
|
|
659
|
+
for (const s of slices) {
|
|
660
|
+
const status = await deriveSliceStatus(root, s.id);
|
|
661
|
+
if (status === "done") done++; else if (status === "halted") halted++; else pending++;
|
|
662
|
+
let winner: string | null = null;
|
|
663
|
+
const jPath = judgmentPath(root, s.id);
|
|
664
|
+
if (existsSync(jPath)) winner = (readJSON<{ winner: string | null }>(jPath)).winner;
|
|
665
|
+
let faithful: boolean | null = null;
|
|
666
|
+
const sdPath = stzPath(root, join(sliceRel(s.id), "spec-diff.md"));
|
|
667
|
+
if (existsSync(sdPath)) {
|
|
668
|
+
const sd = await readDoc(root, join(sliceRel(s.id), "spec-diff.md"));
|
|
669
|
+
faithful = /0 missing/.test(String(sd.frontmatter.summary ?? ""));
|
|
670
|
+
}
|
|
671
|
+
let culled: number | null = null;
|
|
672
|
+
const pPath = stzPath(root, join("50-pressure", s.id, "pressure.md"));
|
|
673
|
+
if (existsSync(pPath)) {
|
|
674
|
+
const pd = await readDoc(root, join("50-pressure", s.id, "pressure.md"));
|
|
675
|
+
const m = String(pd.frontmatter.summary ?? "").match(/(\d+) culled/);
|
|
676
|
+
culled = m ? Number(m[1]) : null;
|
|
677
|
+
}
|
|
678
|
+
rows.push({ id: s.id, winner, faithful, culled, status });
|
|
679
|
+
}
|
|
680
|
+
await writeDoc(root, join("90-audit", "completion-report.md"), {
|
|
681
|
+
frontmatter: { summary: `Completion: ${done} done, ${halted} halted, ${pending} pending of ${slices.length} slice(s).` },
|
|
682
|
+
body:
|
|
683
|
+
`# Completion report — ${manifest.name}\n\n` +
|
|
684
|
+
`| slice | status | winner | faithful | culled |\n|---|---|---|---|---|\n` +
|
|
685
|
+
rows.map((r) => `| ${r.id} | ${r.status} | ${r.winner ?? "—"} | ${r.faithful ?? "—"} | ${r.culled ?? "—"} |`).join("\n") +
|
|
686
|
+
"\n",
|
|
687
|
+
});
|
|
688
|
+
print({ slices: rows, done, halted, pending });
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
// ── sealed held-out suite integrity (L1/F10) ────────────────────────────────
|
|
692
|
+
|
|
693
|
+
/** seal: freeze the held-out suite into SEAL.json (run after the smoke gate is green). */
|
|
694
|
+
async function sealCmd(args: Record<string, string>): Promise<void> {
|
|
695
|
+
const root = args.root!;
|
|
696
|
+
const res = await seal(root);
|
|
697
|
+
if (!res.sealed) {
|
|
698
|
+
process.stderr.write(
|
|
699
|
+
`refusing to re-seal: already-sealed file(s) changed [${[...res.drifted, ...res.removed].join(", ")}]. Use seal-amend --reason to record a sanctioned change.\n`,
|
|
700
|
+
);
|
|
701
|
+
process.exitCode = 1;
|
|
702
|
+
}
|
|
703
|
+
print(res);
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
/** seal-verify: re-hash held-out vs SEAL.json; exit 1 on drift (gates the tournament). */
|
|
707
|
+
function sealVerify(args: Record<string, string>): void {
|
|
708
|
+
const root = args.root!;
|
|
709
|
+
const res = verifySeal(root);
|
|
710
|
+
if (!res.sealed) {
|
|
711
|
+
process.stderr.write("no SEAL.json — the held-out suite was never sealed; run `seal` first.\n");
|
|
712
|
+
process.exitCode = 1;
|
|
713
|
+
} else if (!res.ok) {
|
|
714
|
+
process.stderr.write(
|
|
715
|
+
`SEAL DRIFT — the frozen held-out suite changed since sealing: ${res.drift.map((d) => `${d.file} (${d.status})`).join(", ")}. This breaks the anti-hacking seal; investigate before judging. Use seal-amend --reason for a sanctioned fix.\n`,
|
|
716
|
+
);
|
|
717
|
+
process.exitCode = 1;
|
|
718
|
+
}
|
|
719
|
+
print({ ...res, files: heldOutFiles(root).length });
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
/**
|
|
723
|
+
* seal-crosscheck: run the sealed suite against TWO independent references (the
|
|
724
|
+
* test-author's primary + an independently-authored cross-family one) and report
|
|
725
|
+
* whether they agree. Gates the seal like `seal-verify` gates the tournament:
|
|
726
|
+
* exits non-zero on anything but both-pass so the pipeline PAUSES for human
|
|
727
|
+
* adjudication. Divergence is a GUIDE-class signal (the suite may encode a
|
|
728
|
+
* reference-specific assumption a second author didn't share), NOT an automatic
|
|
729
|
+
* rewrite trigger — see docs/development/sealed-suite.md. Writes a durable audit
|
|
730
|
+
* doc under 30-tests/cross-reference.md (outside held-out/, so it is not sealed).
|
|
731
|
+
*/
|
|
732
|
+
async function sealCrosscheck(args: Record<string, string>): Promise<void> {
|
|
733
|
+
const root = args.root!;
|
|
734
|
+
const sealed = args.sealed!;
|
|
735
|
+
const refA = args["reference-a"]!;
|
|
736
|
+
const refB = args["reference-b"]!;
|
|
737
|
+
if (!sealed || !refA || !refB) {
|
|
738
|
+
process.stderr.write("seal-crosscheck requires --sealed, --reference-a, and --reference-b.\n");
|
|
739
|
+
process.exitCode = 1;
|
|
740
|
+
return;
|
|
741
|
+
}
|
|
742
|
+
const res = crossReference(sealed, refA, refB);
|
|
743
|
+
const verdict =
|
|
744
|
+
res.status === "both-pass"
|
|
745
|
+
? "✅ both independent references satisfy the sealed suite — no shared-blind-spot signal."
|
|
746
|
+
: res.status === "divergent"
|
|
747
|
+
? "⚠️ DIVERGENT — exactly one reference satisfies the suite. The suite may encode a reference-specific assumption the other author did not share (a candidate fragile invariant), OR the cross-family reference is simply wrong. This is a GUIDE-class signal: adjudicate by hand — strengthen the stz-test-author guidance + seal-amend, or discard a buggy cross reference. Do NOT auto-rewrite."
|
|
748
|
+
: "⛔ both references FAIL the suite — it is unsatisfiable as written (a gate/sensor failure, not a cross-family signal). Send the stderr back to stz-test-author.";
|
|
749
|
+
await writeDoc(root, join("30-tests", "cross-reference.md"), {
|
|
750
|
+
frontmatter: {
|
|
751
|
+
summary: `Cross-family reference check: ${res.status} (A ${res.a.passed}/${res.a.total}, B ${res.b.passed}/${res.b.total}).`,
|
|
752
|
+
},
|
|
753
|
+
body:
|
|
754
|
+
`# Cross-family reference check\n\n` +
|
|
755
|
+
`A second, independently-authored reference is run against the same sealed\n` +
|
|
756
|
+
`suite to catch blind spots the single test-author reference shares with the\n` +
|
|
757
|
+
`suite (R2 cross-family quorum, applied to the reference).\n\n` +
|
|
758
|
+
`- **Primary reference (A):** ${res.a.passed}/${res.a.total} passed (passRate ${res.a.passRate})\n` +
|
|
759
|
+
`- **Cross-family reference (B):** ${res.b.passed}/${res.b.total} passed (passRate ${res.b.passRate})\n` +
|
|
760
|
+
`- **Status:** \`${res.status}\`\n\n## Verdict\n\n${verdict}\n`,
|
|
761
|
+
});
|
|
762
|
+
if (!res.bothPass) {
|
|
763
|
+
process.stderr.write(`${verdict}\n`);
|
|
764
|
+
process.exitCode = 1;
|
|
765
|
+
}
|
|
766
|
+
print({ status: res.status, bothPass: res.bothPass, divergent: res.divergent, bothFail: res.bothFail, a: res.a, b: res.b });
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
/** seal-amend: the only sanctioned way to change a sealed file — records from→to + reason. */
|
|
770
|
+
async function sealAmend(args: Record<string, string>): Promise<void> {
|
|
771
|
+
const root = args.root!;
|
|
772
|
+
const reason = args.reason;
|
|
773
|
+
if (!reason || reason === "true") {
|
|
774
|
+
process.stderr.write("seal-amend requires --reason \"<why this sealed-suite change is legitimate>\".\n");
|
|
775
|
+
process.exitCode = 1;
|
|
776
|
+
return;
|
|
777
|
+
}
|
|
778
|
+
const res = await amendSeal(root, reason);
|
|
779
|
+
if (!res.amended) {
|
|
780
|
+
process.stderr.write("nothing to amend: held-out suite matches SEAL.json (or it was never sealed).\n");
|
|
781
|
+
process.exitCode = 1;
|
|
782
|
+
}
|
|
783
|
+
print({ ...res, reason });
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
// ── cross-slice merge integrity (sealed-invariant supersession) ─────────────
|
|
787
|
+
|
|
788
|
+
/** Render the human-readable merge-compat.md mirror of the manifest. */
|
|
789
|
+
async function writeCompatDoc(root: string): Promise<void> {
|
|
790
|
+
const m = loadCompat(root);
|
|
791
|
+
const rows = m.entries.length
|
|
792
|
+
? m.entries
|
|
793
|
+
.map(
|
|
794
|
+
(e) =>
|
|
795
|
+
`| ${e.id} | ${e.supersededSlice} | ${e.supersededBy} | ${e.replacement.slice} | \`${e.panicSubstring}\` | ${e.approved ? "✅ " + (e.approvedBy ?? "") : "⏳ pending"} | ${e.pendingAmendment} |`,
|
|
796
|
+
)
|
|
797
|
+
.join("\n")
|
|
798
|
+
: "| _none_ | | | | | | |";
|
|
799
|
+
await writeDoc(root, join("90-audit", "merge-compat.md"), {
|
|
800
|
+
frontmatter: { summary: `Merge compat: ${m.entries.length} entry(ies), ${m.entries.filter((e) => e.approved).length} approved.` },
|
|
801
|
+
body:
|
|
802
|
+
`# Merge compatibility — superseded sealed invariants\n\n` +
|
|
803
|
+
`Each entry sanctions an EARLIER slice's sealed-suite failure that a LATER\n` +
|
|
804
|
+
`slice legitimately supersedes (e.g. slice-03 "no respawn" vs slice-05\n` +
|
|
805
|
+
`wave-clear). A failure is sanctioned only when the signature matches, the\n` +
|
|
806
|
+
`replacement invariant also passes, and the entry is approved. Entries are\n` +
|
|
807
|
+
`transitional debt — retired once the superseded suite is \`seal-amend\`ed.\n\n` +
|
|
808
|
+
`| id | superseded | superseded by | replacement proof | signature | approved | pending amendment |\n` +
|
|
809
|
+
`|---|---|---|---|---|---|---|\n${rows}\n\n` +
|
|
810
|
+
`## History (append-only)\n\n` +
|
|
811
|
+
(m.history.length ? m.history.map((h) => `${h.seq}. ${h.action} ${h.id}: ${h.detail}`).join("\n") : "_none_") +
|
|
812
|
+
"\n",
|
|
813
|
+
});
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
/** merge-compat-propose: the merge agent proposes an entry (always unapproved). */
|
|
817
|
+
async function mergeCompatPropose(args: Record<string, string>): Promise<void> {
|
|
818
|
+
const root = args.root!;
|
|
819
|
+
const entry = readJSON<Omit<MergeCompatEntry, "approved" | "approvedBy">>(args.entry!);
|
|
820
|
+
const m = loadCompat(root);
|
|
821
|
+
const res = proposeCompat(m, entry);
|
|
822
|
+
if (!res.ok) {
|
|
823
|
+
process.stderr.write(`${res.error}\n`);
|
|
824
|
+
process.exitCode = 1;
|
|
825
|
+
return;
|
|
826
|
+
}
|
|
827
|
+
saveCompat(root, m);
|
|
828
|
+
await writeCompatDoc(root);
|
|
829
|
+
print({ proposed: entry.id, approved: false, note: "unapproved — an approver must run merge-compat-approve before this can sanction a merge failure" });
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
/** merge-compat-approve: flip a proposed entry to approved, recording who/why. */
|
|
833
|
+
async function mergeCompatApprove(args: Record<string, string>): Promise<void> {
|
|
834
|
+
const root = args.root!;
|
|
835
|
+
const by = args.by;
|
|
836
|
+
if (!by || by === "true") {
|
|
837
|
+
process.stderr.write('merge-compat-approve requires --by "<who/why>" so a self-approval is auditable.\n');
|
|
838
|
+
process.exitCode = 1;
|
|
839
|
+
return;
|
|
840
|
+
}
|
|
841
|
+
const m = loadCompat(root);
|
|
842
|
+
const res = approveCompat(m, args.id!, by);
|
|
843
|
+
if (!res.ok) {
|
|
844
|
+
process.stderr.write(`${res.error}\n`);
|
|
845
|
+
process.exitCode = 1;
|
|
846
|
+
return;
|
|
847
|
+
}
|
|
848
|
+
saveCompat(root, m);
|
|
849
|
+
await writeCompatDoc(root);
|
|
850
|
+
print({ approved: args.id, by });
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
/** merge-compat-retire: retire an entry once its superseded suite is amended. */
|
|
854
|
+
async function mergeCompatRetire(args: Record<string, string>): Promise<void> {
|
|
855
|
+
const root = args.root!;
|
|
856
|
+
const ref = args.amendment;
|
|
857
|
+
if (!ref || ref === "true") {
|
|
858
|
+
process.stderr.write('merge-compat-retire requires --amendment "<seal-amend reason/ref>" linking the wave-aware fix.\n');
|
|
859
|
+
process.exitCode = 1;
|
|
860
|
+
return;
|
|
861
|
+
}
|
|
862
|
+
const m = loadCompat(root);
|
|
863
|
+
const res = retireCompat(m, args.id!, ref);
|
|
864
|
+
if (!res.ok) {
|
|
865
|
+
process.stderr.write(`${res.error}\n`);
|
|
866
|
+
process.exitCode = 1;
|
|
867
|
+
return;
|
|
868
|
+
}
|
|
869
|
+
saveCompat(root, m);
|
|
870
|
+
await writeCompatDoc(root);
|
|
871
|
+
print({ retired: args.id, amendment: ref });
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
/** merge-compat-list: READ-ONLY dump of the manifest. */
|
|
875
|
+
function mergeCompatList(args: Record<string, string>): void {
|
|
876
|
+
print(loadCompat(args.root!));
|
|
877
|
+
}
|
|
878
|
+
|
|
879
|
+
/**
|
|
880
|
+
* merge-validate: adjudicate REPORTED sealed-suite results against the compat
|
|
881
|
+
* manifest. It does not run the suites (the assembled crate may be Rust); it
|
|
882
|
+
* deterministically classifies each reported failure. Exits non-zero unless every
|
|
883
|
+
* failure is sanctioned — pendingApproval / invalid / unsanctioned all block.
|
|
884
|
+
*/
|
|
885
|
+
async function mergeValidate(args: Record<string, string>): Promise<void> {
|
|
886
|
+
const root = args.root!;
|
|
887
|
+
const results = readJSON<SealedSuiteResult[]>(args.results!);
|
|
888
|
+
const manifest = loadCompat(root);
|
|
889
|
+
const verdict = validateMerge(results, manifest);
|
|
890
|
+
await writeDoc(root, join("90-audit", "merge-validation.md"), {
|
|
891
|
+
frontmatter: {
|
|
892
|
+
summary: `Merge validation: ${verdict.ok ? "OK" : "BLOCKED"} — ${verdict.sanctioned.length} sanctioned, ${verdict.pendingApproval.length} pending, ${verdict.invalid.length} invalid, ${verdict.unsanctioned.length} unsanctioned.`,
|
|
893
|
+
},
|
|
894
|
+
body:
|
|
895
|
+
`# Merge validation\n\n` +
|
|
896
|
+
`Reported sealed-suite results adjudicated against the merge-compat manifest.\n` +
|
|
897
|
+
`(Adjudication is deterministic; the suite *execution* is the caller's — run\n` +
|
|
898
|
+
`it in an ephemeral scratch copy of the assembled crate, never the canonical one.)\n\n` +
|
|
899
|
+
`- **Verdict:** ${verdict.ok ? "✅ OK — merge may proceed" : "⛔ BLOCKED"}\n` +
|
|
900
|
+
`- **Sanctioned supersessions:** ${verdict.sanctioned.map((s) => `${s.slice}←${s.supersededBy} (${s.entryId})`).join(", ") || "—"}\n` +
|
|
901
|
+
`- **Pending approval (blocks):** ${verdict.pendingApproval.map((p) => `${p.slice} (${p.entryId})`).join(", ") || "—"}\n` +
|
|
902
|
+
`- **Invalid — replacement unproven (blocks):** ${verdict.invalid.map((i) => `${i.slice}: ${i.reason}`).join("; ") || "—"}\n` +
|
|
903
|
+
`- **Unsanctioned — suspect real defect (blocks):** ${verdict.unsanctioned.map((u) => `${u.slice}: ${u.reason}`).join("; ") || "—"}\n` +
|
|
904
|
+
`- **Unused approved entries (retire candidates):** ${verdict.unused.join(", ") || "—"}\n`,
|
|
905
|
+
});
|
|
906
|
+
if (!verdict.ok) {
|
|
907
|
+
process.stderr.write(
|
|
908
|
+
`MERGE BLOCKED — ${verdict.unsanctioned.length} unsanctioned, ${verdict.invalid.length} invalid, ${verdict.pendingApproval.length} pending-approval failure(s). See 90-audit/merge-validation.md.\n`,
|
|
909
|
+
);
|
|
910
|
+
process.exitCode = 1;
|
|
911
|
+
}
|
|
912
|
+
print(verdict);
|
|
913
|
+
}
|
|
914
|
+
|
|
915
|
+
export async function runBridge(argv: string[]): Promise<void> {
|
|
916
|
+
const [sub, ...rest] = argv;
|
|
917
|
+
const args = parseArgs(rest);
|
|
918
|
+
switch (sub) {
|
|
919
|
+
case "begin": await begin(args); break;
|
|
920
|
+
case "record-eval": recordEval(args); break;
|
|
921
|
+
case "eval": evalCmd(args); break;
|
|
922
|
+
case "gate": gate(args); break;
|
|
923
|
+
case "record-votes": recordVotes(args); break;
|
|
924
|
+
case "select": await selectCmd(args); break;
|
|
925
|
+
case "finalize": await finalize(args); break;
|
|
926
|
+
case "project-init": await projectInit(args); break;
|
|
927
|
+
case "project-phase": await projectPhase(args); break;
|
|
928
|
+
case "project-write-intent": await projectWriteIntent(args); break;
|
|
929
|
+
case "project-record-area": await projectRecordArea(args); break;
|
|
930
|
+
case "project-set-config": await projectSetConfig(args); break;
|
|
931
|
+
case "project-dark-factory": await projectDarkFactory(args); break;
|
|
932
|
+
case "project-config": await projectConfig(args); break;
|
|
933
|
+
case "slice-add": await sliceAdd(args); break;
|
|
934
|
+
case "project-seed-slices": await projectSeedSlices(args); break;
|
|
935
|
+
case "project-status": await projectStatus(args); break;
|
|
936
|
+
case "summary": await summaryCmd(args); break;
|
|
937
|
+
case "seal": await sealCmd(args); break;
|
|
938
|
+
case "seal-verify": sealVerify(args); break;
|
|
939
|
+
case "seal-crosscheck": await sealCrosscheck(args); break;
|
|
940
|
+
case "seal-amend": await sealAmend(args); break;
|
|
941
|
+
case "merge-validate": await mergeValidate(args); break;
|
|
942
|
+
case "merge-compat-propose": await mergeCompatPropose(args); break;
|
|
943
|
+
case "merge-compat-approve": await mergeCompatApprove(args); break;
|
|
944
|
+
case "merge-compat-retire": await mergeCompatRetire(args); break;
|
|
945
|
+
case "merge-compat-list": mergeCompatList(args); break;
|
|
946
|
+
default:
|
|
947
|
+
process.stderr.write(`unknown bridge subcommand: ${sub}\n`);
|
|
948
|
+
process.exitCode = 1;
|
|
949
|
+
}
|
|
950
|
+
}
|