@tekyzinc/gsd-t 4.0.28 → 4.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +35 -0
- package/README.md +3 -0
- package/bin/gsd-t-competition-judge.cjs +344 -0
- package/bin/gsd-t.js +16 -0
- package/commands/gsd-t-design-decompose.md +9 -2
- package/commands/gsd-t-help.md +8 -0
- package/commands/gsd-t-milestone.md +9 -2
- package/commands/gsd-t-partition.md +9 -2
- package/package.json +1 -1
- package/templates/CLAUDE-global.md +2 -2
- package/templates/workflows/gsd-t-debug.workflow.js +34 -5
- package/templates/workflows/gsd-t-execute.workflow.js +54 -29
- package/templates/workflows/gsd-t-integrate.workflow.js +37 -7
- package/templates/workflows/gsd-t-phase.workflow.js +368 -25
- package/templates/workflows/gsd-t-quick.workflow.js +59 -7
- package/templates/workflows/gsd-t-verify.workflow.js +67 -47
- package/templates/workflows/gsd-t-wave.workflow.js +7 -4
|
@@ -65,12 +65,47 @@ const INTEGRATE_RESULT_SCHEMA = {
|
|
|
65
65
|
|
|
66
66
|
// ───── Script body ──────────────────────────────────────────────────────────
|
|
67
67
|
|
|
68
|
-
|
|
69
|
-
|
|
68
|
+
// M81: runtime-native helpers (sandbox bans require/fs/path/child_process/process — the
|
|
69
|
+
// old require("./_lib.js")+require("path") crashed this on first eval, TD-113). CLI calls
|
|
70
|
+
// delegate to an agent's Bash; file reads (scope.md/tasks.md) move INTO the worker agent
|
|
71
|
+
// (it has Read). args arrives as a JSON STRING in this runtime. See gsd-t-scan.workflow.js.
|
|
72
|
+
const _args = (typeof args === "string") ? (() => { try { return JSON.parse(args); } catch { return {}; } })() : (args || {});
|
|
73
|
+
const _CLI_ENVELOPE_SCHEMA = {
|
|
74
|
+
type: "object", required: ["ok", "exitCode"], additionalProperties: true,
|
|
75
|
+
properties: { ok: { type: "boolean" }, exitCode: { type: "integer" }, envelope: {}, stdout: { type: "string" }, stderr: { type: "string" }, via: { type: "string" } },
|
|
76
|
+
};
|
|
77
|
+
async function runCli(projectDir, subcmd, argv, localBin, label, parseJson = true, phaseName) {
|
|
78
|
+
const argStr = (argv || []).map((a) => `'${String(a).replace(/'/g, "'\\''")}'`).join(" ");
|
|
79
|
+
const prompt = [
|
|
80
|
+
`Run a GSD-T CLI command for the project at \`${projectDir}\` and report the result. Steps:`,
|
|
81
|
+
`1. If \`${projectDir}/bin/${localBin}\` exists, run: \`node ${projectDir}/bin/${localBin} ${argStr}\` (set via="local"). Otherwise run: \`gsd-t ${subcmd} ${argStr}\` (set via="global"). Use cwd \`${projectDir}\`.`,
|
|
82
|
+
`2. Capture exit code (ok = exitCode 0) and stdout/stderr.`,
|
|
83
|
+
parseJson ? `3. Parse stdout as JSON into \`envelope\` (null if not JSON). Return JSON per the schema.` : `3. Put stdout (trimmed, ≤4000 chars) in \`stdout\`. Return JSON per the schema.`,
|
|
84
|
+
`Do NOT do any other work. ONLY run this one command and report.`,
|
|
85
|
+
].join("\n");
|
|
86
|
+
const opts = { label, schema: _CLI_ENVELOPE_SCHEMA, model: "haiku" };
|
|
87
|
+
if (phaseName) opts.phase = phaseName;
|
|
88
|
+
const r = await agent(prompt, opts).catch((e) => ({ ok: false, exitCode: -1, envelope: null, stderr: String(e && e.message), via: "error" }));
|
|
89
|
+
return r || { ok: false, exitCode: -1, envelope: null, via: "error" };
|
|
90
|
+
}
|
|
91
|
+
async function runPreflight(projectDir, label = "preflight", phaseName) { return runCli(projectDir, "preflight", ["--json"], "cli-preflight.cjs", label, true, phaseName); }
|
|
92
|
+
async function runVerifyGate(projectDir, label = "verify-gate", phaseName) { return runCli(projectDir, "verify-gate", ["--json"], "gsd-t-verify-gate.cjs", label, true, phaseName); }
|
|
93
|
+
async function proveFileDisjointness(projectDir, domains, label = "disjointness", phaseName) {
|
|
94
|
+
const argv = ["--dry-run"];
|
|
95
|
+
for (const d of (domains || [])) { argv.push("--domain", d); }
|
|
96
|
+
return runCli(projectDir, "parallel", argv, "gsd-t-parallel.cjs", label, false, phaseName);
|
|
97
|
+
}
|
|
98
|
+
async function generateBrief(projectDir, { kind = "execute", milestone, domain, id, label = "brief", phaseName } = {}) {
|
|
99
|
+
const argv = ["--kind", kind, "--spawn-id", id, "--out", `${projectDir}/.gsd-t/briefs/${id}.json`];
|
|
100
|
+
if (milestone) argv.push("--milestone", milestone);
|
|
101
|
+
if (domain) argv.push("--domain", domain);
|
|
102
|
+
const r = await runCli(projectDir, "brief", argv, "gsd-t-context-brief.cjs", label, false, phaseName);
|
|
103
|
+
return { ok: r.ok, briefPath: `${projectDir}/.gsd-t/briefs/${id}.json`, via: r.via };
|
|
104
|
+
}
|
|
70
105
|
|
|
71
|
-
const projectDir =
|
|
72
|
-
const milestone =
|
|
73
|
-
const domains = (
|
|
106
|
+
const projectDir = _args.projectDir || ".";
|
|
107
|
+
const milestone = _args.milestone || null;
|
|
108
|
+
const domains = (Array.isArray(_args.domains) && _args.domains) || [];
|
|
74
109
|
|
|
75
110
|
if (!milestone) {
|
|
76
111
|
log("execute: no milestone provided — args.milestone is required");
|
|
@@ -83,7 +118,7 @@ if (!domains.length) {
|
|
|
83
118
|
|
|
84
119
|
phase("Preflight");
|
|
85
120
|
log(`execute: milestone=${milestone}, domains=${domains.length}`);
|
|
86
|
-
const pre =
|
|
121
|
+
const pre = await runPreflight(projectDir);
|
|
87
122
|
if (!pre.ok) {
|
|
88
123
|
log(`preflight FAIL — exitCode=${pre.exitCode}: ${pre.stderr || "(no stderr)"}`);
|
|
89
124
|
return { status: "failed", reason: "preflight-failed", preflight: pre.envelope };
|
|
@@ -93,7 +128,7 @@ log(`preflight OK`);
|
|
|
93
128
|
phase("Disjointness");
|
|
94
129
|
// 4.8-audit fix: scope disjointness to the requested domain set, not the whole project.
|
|
95
130
|
// Without this, an unrelated DRAFT domain elsewhere in the project could flip the result.
|
|
96
|
-
const disj =
|
|
131
|
+
const disj = await proveFileDisjointness(projectDir, domains);
|
|
97
132
|
if (!disj.ok) {
|
|
98
133
|
log(`disjointness FAIL — exitCode=${disj.exitCode}: ${disj.stderr || disj.stdout}`);
|
|
99
134
|
return { status: "failed", reason: "non-disjoint" };
|
|
@@ -105,32 +140,22 @@ const domainResults = await parallel(
|
|
|
105
140
|
domains.map((domain) => async () => {
|
|
106
141
|
// 4.8-audit fix: per-domain brief (M55-D2 brief-per-spawn semantic) — each worker
|
|
107
142
|
// gets a brief scoped to its own domain so grep-the-brief is most effective.
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
// every task" — silently dropping tail content is a correctness regression. Briefs
|
|
115
|
-
// are the compression layer; raw scope/tasks must pass whole.
|
|
116
|
-
const scope = lib.readScope({ projectDir, domain }) || "(scope.md missing)";
|
|
117
|
-
const tasks = lib.readDomainTasks({ projectDir, domain }) || "(tasks.md missing)";
|
|
143
|
+
// M81: generated via an awaited agent (sandbox-safe); the worker reads its own
|
|
144
|
+
// scope.md/tasks.md (it has Read) instead of the orchestrator pre-reading via fs.
|
|
145
|
+
const domBrief = await generateBrief(projectDir, { kind: "execute", milestone, domain, id: `execute-${(milestone || "m").toLowerCase()}-${domain}`, phaseName: "Domains", label: `brief:${domain}` });
|
|
146
|
+
const briefRef = domBrief.ok ? domBrief.briefPath : "(brief generation failed — re-walk repo)";
|
|
147
|
+
const scopePath = `${projectDir}/.gsd-t/domains/${domain}/scope.md`;
|
|
148
|
+
const tasksPath = `${projectDir}/.gsd-t/domains/${domain}/tasks.md`;
|
|
118
149
|
const prompt = [
|
|
119
150
|
`You are the worker agent for the GSD-T domain \`${domain}\` in milestone \`${milestone}\`.`,
|
|
120
151
|
``,
|
|
121
|
-
`
|
|
152
|
+
`FIRST, read these two files in full (do NOT skip or truncate them):`,
|
|
153
|
+
`- Scope (your owned files): \`${scopePath}\``,
|
|
154
|
+
`- Tasks: \`${tasksPath}\``,
|
|
122
155
|
``,
|
|
123
|
-
|
|
124
|
-
``,
|
|
125
|
-
`**Scope (your owned files):**`,
|
|
126
|
-
"```",
|
|
127
|
-
scope,
|
|
128
|
-
"```",
|
|
156
|
+
`Your job: execute every task listed under "## Tasks" in tasks.md, respecting the file ownership in scope.md.`,
|
|
129
157
|
``,
|
|
130
|
-
`**
|
|
131
|
-
"```",
|
|
132
|
-
tasks,
|
|
133
|
-
"```",
|
|
158
|
+
`**Brief (REQUIRED READ):** ${briefRef} — if present, grep this JSON first instead of re-reading CLAUDE.md and contracts.`,
|
|
134
159
|
``,
|
|
135
160
|
`Constraints:`,
|
|
136
161
|
`- Touch only files in your scope's "Owned Files" list.`,
|
|
@@ -194,7 +219,7 @@ if (integrate.status === "failed") {
|
|
|
194
219
|
}
|
|
195
220
|
|
|
196
221
|
phase("Verify-Gate");
|
|
197
|
-
const vg =
|
|
222
|
+
const vg = await runVerifyGate(projectDir);
|
|
198
223
|
log(`verify-gate exitCode=${vg.exitCode} ok=${vg.ok}`);
|
|
199
224
|
|
|
200
225
|
return {
|
|
@@ -16,11 +16,41 @@ export const meta = {
|
|
|
16
16
|
],
|
|
17
17
|
};
|
|
18
18
|
|
|
19
|
-
|
|
19
|
+
// M81: runtime-native helpers (sandbox bans require/fs/child_process/process — the old
|
|
20
|
+
// require("./_lib.js") crashed this workflow on first eval, TD-113). Delegate CLI calls
|
|
21
|
+
// to an agent's Bash; args arrives as a JSON STRING in this runtime. See gsd-t-scan.workflow.js.
|
|
22
|
+
const _args = (typeof args === "string") ? (() => { try { return JSON.parse(args); } catch { return {}; } })() : (args || {});
|
|
23
|
+
const _CLI_ENVELOPE_SCHEMA = {
|
|
24
|
+
type: "object", required: ["ok", "exitCode"], additionalProperties: true,
|
|
25
|
+
properties: { ok: { type: "boolean" }, exitCode: { type: "integer" }, envelope: {}, stdout: { type: "string" }, stderr: { type: "string" }, via: { type: "string" } },
|
|
26
|
+
};
|
|
27
|
+
async function runCli(projectDir, subcmd, argv, localBin, label, parseJson = true, phaseName) {
|
|
28
|
+
const argStr = (argv || []).map((a) => `'${String(a).replace(/'/g, "'\\''")}'`).join(" ");
|
|
29
|
+
const prompt = [
|
|
30
|
+
`Run a GSD-T CLI command for the project at \`${projectDir}\` and report the result. Steps:`,
|
|
31
|
+
`1. If \`${projectDir}/bin/${localBin}\` exists, run: \`node ${projectDir}/bin/${localBin} ${argStr}\` (set via="local"). Otherwise run: \`gsd-t ${subcmd} ${argStr}\` (set via="global"). Use cwd \`${projectDir}\`.`,
|
|
32
|
+
`2. Capture exit code (ok = exitCode 0) and stdout/stderr.`,
|
|
33
|
+
parseJson ? `3. Parse stdout as JSON into \`envelope\` (null if not JSON). Return JSON per the schema.` : `3. Put stdout (trimmed, ≤4000 chars) in \`stdout\`. Return JSON per the schema.`,
|
|
34
|
+
`Do NOT do any other work. ONLY run this one command and report.`,
|
|
35
|
+
].join("\n");
|
|
36
|
+
const opts = { label, schema: _CLI_ENVELOPE_SCHEMA, model: "haiku" };
|
|
37
|
+
if (phaseName) opts.phase = phaseName;
|
|
38
|
+
const r = await agent(prompt, opts).catch((e) => ({ ok: false, exitCode: -1, envelope: null, stderr: String(e && e.message), via: "error" }));
|
|
39
|
+
return r || { ok: false, exitCode: -1, envelope: null, via: "error" };
|
|
40
|
+
}
|
|
41
|
+
async function runPreflight(projectDir, label = "preflight", phaseName) { return runCli(projectDir, "preflight", ["--json"], "cli-preflight.cjs", label, true, phaseName); }
|
|
42
|
+
async function runVerifyGate(projectDir, label = "verify-gate", phaseName) { return runCli(projectDir, "verify-gate", ["--json"], "gsd-t-verify-gate.cjs", label, true, phaseName); }
|
|
43
|
+
async function generateBrief(projectDir, { kind = "execute", milestone, domain, id, label = "brief", phaseName } = {}) {
|
|
44
|
+
const argv = ["--kind", kind, "--spawn-id", id, "--out", `${projectDir}/.gsd-t/briefs/${id}.json`];
|
|
45
|
+
if (milestone) argv.push("--milestone", milestone);
|
|
46
|
+
if (domain) argv.push("--domain", domain);
|
|
47
|
+
const r = await runCli(projectDir, "brief", argv, "gsd-t-context-brief.cjs", label, false, phaseName);
|
|
48
|
+
return { ok: r.ok, briefPath: `${projectDir}/.gsd-t/briefs/${id}.json`, via: r.via };
|
|
49
|
+
}
|
|
20
50
|
|
|
21
|
-
const projectDir =
|
|
22
|
-
const milestone =
|
|
23
|
-
const domains =
|
|
51
|
+
const projectDir = _args.projectDir || ".";
|
|
52
|
+
const milestone = _args.milestone || null;
|
|
53
|
+
const domains = _args.domains || [];
|
|
24
54
|
|
|
25
55
|
const INTEGRATE_SCHEMA = {
|
|
26
56
|
type: "object",
|
|
@@ -38,9 +68,9 @@ if (!milestone || !domains.length) {
|
|
|
38
68
|
}
|
|
39
69
|
|
|
40
70
|
phase("Preflight");
|
|
41
|
-
const pre =
|
|
71
|
+
const pre = await runPreflight(projectDir);
|
|
42
72
|
if (!pre.ok) return { status: "failed", reason: "preflight-failed", preflight: pre.envelope };
|
|
43
|
-
const brief =
|
|
73
|
+
const brief = await generateBrief(projectDir, { kind: "execute", milestone, id: `integrate-${(milestone || "m").toLowerCase()}` });
|
|
44
74
|
|
|
45
75
|
phase("Integrate");
|
|
46
76
|
const integrate = await agent(
|
|
@@ -63,7 +93,7 @@ if (integrate.status === "failed") {
|
|
|
63
93
|
}
|
|
64
94
|
|
|
65
95
|
phase("Verify-Gate");
|
|
66
|
-
const vg =
|
|
96
|
+
const vg = await runVerifyGate(projectDir);
|
|
67
97
|
return {
|
|
68
98
|
status: vg.ok ? "complete" : "verify-failed",
|
|
69
99
|
integrate,
|
|
@@ -15,7 +15,23 @@
|
|
|
15
15
|
// milestone?: "M61",
|
|
16
16
|
// projectDir?: ".",
|
|
17
17
|
// userInput?: string, // arbitrary input to the phase (e.g. "$ARGUMENTS")
|
|
18
|
+
// competition?: number, // M82: N>1 enables Competition Mode (generate-and-judge)
|
|
19
|
+
// // on eligible upstream phases. N parallel Self-MoA
|
|
20
|
+
// // producers -> judge stage -> winner. Default 1 (off).
|
|
18
21
|
// }
|
|
22
|
+
//
|
|
23
|
+
// M82 Competition Mode (generate-and-judge — the GENERATIVE dual of the
|
|
24
|
+
// orthogonal validation triad). Contract: competition-mode-contract.md v1.0.0.
|
|
25
|
+
// - Eligible phases: partition, milestone, discuss, design-decompose (pre-contract,
|
|
26
|
+
// wide-solution-space). INELIGIBLE: plan/impact/prd/doc-ripple (narrow / one
|
|
27
|
+
// right answer) — competition there is wasted, so a competition arg is ignored.
|
|
28
|
+
// - Producers: N samples of ONE strong model (Self-MoA beats a model zoo), varied
|
|
29
|
+
// by an explicit per-candidate "angle" so they explore different regions.
|
|
30
|
+
// - Judge: partition uses the OBJECTIVE oracle (gsd-t competition-judge --kind
|
|
31
|
+
// partition, scoring via the disjointness prover — a calculator, not a critic,
|
|
32
|
+
// immune to LLM-judge bias). Other phases use a blind+shuffled+rubric judge whose
|
|
33
|
+
// numeric selection is finalized deterministically by competition-judge --kind
|
|
34
|
+
// generic.
|
|
19
35
|
|
|
20
36
|
export const meta = {
|
|
21
37
|
name: "gsd-t-phase",
|
|
@@ -26,7 +42,103 @@ export const meta = {
|
|
|
26
42
|
],
|
|
27
43
|
};
|
|
28
44
|
|
|
29
|
-
|
|
45
|
+
// M81: runtime-native helpers (sandbox bans require/fs/child_process/process — the old
|
|
46
|
+
// require("./_lib.js") crashed this workflow on first eval, TD-113). Delegate CLI calls
|
|
47
|
+
// to an agent's Bash; args arrives as a JSON STRING in this runtime. See gsd-t-scan.workflow.js.
|
|
48
|
+
const _args = (typeof args === "string") ? (() => { try { return JSON.parse(args); } catch { return {}; } })() : (args || {});
|
|
49
|
+
const _CLI_ENVELOPE_SCHEMA = {
|
|
50
|
+
type: "object", required: ["ok", "exitCode"], additionalProperties: true,
|
|
51
|
+
properties: { ok: { type: "boolean" }, exitCode: { type: "integer" }, envelope: {}, stdout: { type: "string" }, stderr: { type: "string" }, via: { type: "string" } },
|
|
52
|
+
};
|
|
53
|
+
// Single-quote a value for safe shell interpolation (Red Team MED-5).
|
|
54
|
+
function _shq(s) { return `'${String(s).replace(/'/g, "'\\''")}'`; }
|
|
55
|
+
async function runCli(projectDir, subcmd, argv, localBin, label, parseJson = true, phaseNameOpt) {
|
|
56
|
+
const argStr = (argv || []).map((a) => `'${String(a).replace(/'/g, "'\\''")}'`).join(" ");
|
|
57
|
+
const prompt = [
|
|
58
|
+
`Run a GSD-T CLI command for the project at \`${projectDir}\` and report the result. Steps:`,
|
|
59
|
+
`1. If \`${projectDir}/bin/${localBin}\` exists, run: \`node ${projectDir}/bin/${localBin} ${argStr}\` (set via="local"). Otherwise run: \`gsd-t ${subcmd} ${argStr}\` (set via="global"). Use cwd \`${projectDir}\`.`,
|
|
60
|
+
`2. Capture exit code (ok = exitCode 0) and stdout/stderr.`,
|
|
61
|
+
parseJson ? `3. Parse stdout as JSON into \`envelope\` (null if not JSON). Return JSON per the schema.` : `3. Put stdout (trimmed, ≤4000 chars) in \`stdout\`. Return JSON per the schema.`,
|
|
62
|
+
`Do NOT do any other work. ONLY run this one command and report.`,
|
|
63
|
+
].join("\n");
|
|
64
|
+
const opts = { label, schema: _CLI_ENVELOPE_SCHEMA, model: "haiku" };
|
|
65
|
+
if (phaseNameOpt) opts.phase = phaseNameOpt;
|
|
66
|
+
const r = await agent(prompt, opts).catch((e) => ({ ok: false, exitCode: -1, envelope: null, stderr: String(e && e.message), via: "error" }));
|
|
67
|
+
return r || { ok: false, exitCode: -1, envelope: null, via: "error" };
|
|
68
|
+
}
|
|
69
|
+
async function runPreflight(projectDir, label = "preflight", phaseNameOpt) { return runCli(projectDir, "preflight", ["--json"], "cli-preflight.cjs", label, true, phaseNameOpt); }
|
|
70
|
+
async function generateBrief(projectDir, { kind = "execute", milestone, domain, id, label = "brief", phaseNameOpt } = {}) {
|
|
71
|
+
const argv = ["--kind", kind, "--spawn-id", id, "--out", `${projectDir}/.gsd-t/briefs/${id}.json`];
|
|
72
|
+
if (milestone) argv.push("--milestone", milestone);
|
|
73
|
+
if (domain) argv.push("--domain", domain);
|
|
74
|
+
const r = await runCli(projectDir, "brief", argv, "gsd-t-context-brief.cjs", label, false, phaseNameOpt);
|
|
75
|
+
return { ok: r.ok, briefPath: `${projectDir}/.gsd-t/briefs/${id}.json`, via: r.via };
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// M82: run the deterministic selection oracle over a candidate-set spec. The spec
|
|
79
|
+
// is written to a file via the agent's Bash (no fs in this sandbox), then judged by
|
|
80
|
+
// `gsd-t competition-judge --in <file>`. The agent MUST copy the judge's rich output
|
|
81
|
+
// (winner/ranked) up to the TOP LEVEL of its reply — a permissive free-form
|
|
82
|
+
// `envelope:{}` schema let a haiku agent silently drop winner/ranked (caught in the
|
|
83
|
+
// M82 real-sandbox proof: via=local ok=true but winner=undefined). Explicit required
|
|
84
|
+
// fields fix that. Returns { ok, winner, ranked }.
|
|
85
|
+
const _JUDGE_ENVELOPE_SCHEMA = {
|
|
86
|
+
type: "object", required: ["ok", "winner"], additionalProperties: true,
|
|
87
|
+
properties: {
|
|
88
|
+
ok: { type: "boolean" },
|
|
89
|
+
exitCode: { type: "integer" },
|
|
90
|
+
winner: { type: ["string", "null"] },
|
|
91
|
+
ranked: { type: "array", items: { type: "object", additionalProperties: true } },
|
|
92
|
+
via: { type: "string" },
|
|
93
|
+
},
|
|
94
|
+
};
|
|
95
|
+
async function runCompetitionJudge(projectDir, spec, label = "judge", phaseNameOpt) {
|
|
96
|
+
// De-fang backticks so a producer-supplied domain name / path containing ``` can't
|
|
97
|
+
// break out of the markdown fence in the prompt (Red Team MED-5). The judge only
|
|
98
|
+
// reads structural fields (id, domains.name, touches[]); a sanitized name is fine.
|
|
99
|
+
const specJson = JSON.stringify(spec).replace(/`/g, "'");
|
|
100
|
+
const qDir = _shq(projectDir);
|
|
101
|
+
const specPath = `${projectDir}/.gsd-t/briefs/_competition-spec.json`;
|
|
102
|
+
const qSpec = _shq(specPath);
|
|
103
|
+
const prompt = [
|
|
104
|
+
`Run the GSD-T Competition Mode judge for the project at \`${projectDir}\` and report its FULL output. Steps:`,
|
|
105
|
+
`1. Write this EXACT JSON (one line) to \`${specPath}\` (overwrite; create .gsd-t/briefs/ if needed):`,
|
|
106
|
+
"~~~json",
|
|
107
|
+
specJson,
|
|
108
|
+
"~~~",
|
|
109
|
+
`2. If \`${projectDir}/bin/gsd-t-competition-judge.cjs\` exists, run: \`node ${qDir}/bin/gsd-t-competition-judge.cjs --in ${qSpec} --project-dir ${qDir}\` (set via="local"). Otherwise run: \`gsd-t competition-judge --in ${qSpec} --project-dir ${qDir}\` (set via="global"). cwd \`${projectDir}\`.`,
|
|
110
|
+
`3. The command prints a JSON object to stdout with fields: ok, exitCode, winner, ranked, n.`,
|
|
111
|
+
`4. COPY those fields (ok, exitCode, winner, ranked) up to the TOP LEVEL of your reply, plus via. Do NOT nest them under "envelope". If the command failed, set winner=null.`,
|
|
112
|
+
`Do NOT do any other work.`,
|
|
113
|
+
].join("\n");
|
|
114
|
+
const opts = { label, schema: _JUDGE_ENVELOPE_SCHEMA, model: "haiku" };
|
|
115
|
+
if (phaseNameOpt) opts.phase = phaseNameOpt;
|
|
116
|
+
const r = await agent(prompt, opts).catch((e) => ({ ok: false, winner: null, ranked: [], via: "error", err: String(e && e.message) }));
|
|
117
|
+
// Prefer top-level fields; fall back to a nested envelope if the agent nested anyway.
|
|
118
|
+
const env = (r && r.winner !== undefined) ? r : (r && r.envelope) || {};
|
|
119
|
+
return { ok: !!env.ok, winner: env.winner != null ? env.winner : null, ranked: env.ranked || [] };
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Phases where competition pays off (wide solution space, pre-contract, high blast
|
|
123
|
+
// radius). A competition arg on any other phase is ignored (single producer runs).
|
|
124
|
+
const COMPETITION_ELIGIBLE = new Set(["partition", "milestone", "discuss", "design-decompose"]);
|
|
125
|
+
|
|
126
|
+
// Rubric axes for the SUBJECTIVE judge (non-partition eligible phases). Partition
|
|
127
|
+
// uses the objective oracle instead and ignores these.
|
|
128
|
+
const RUBRIC_AXES_BY_PHASE = {
|
|
129
|
+
milestone: [
|
|
130
|
+
{ key: "coherence", weight: 2 }, { key: "completeness", weight: 1 },
|
|
131
|
+
{ key: "riskCoverage", weight: 1 }, { key: "simplicity", weight: 1 },
|
|
132
|
+
],
|
|
133
|
+
discuss: [
|
|
134
|
+
{ key: "soundness", weight: 2 }, { key: "completeness", weight: 1 },
|
|
135
|
+
{ key: "tradeoffClarity", weight: 1 }, { key: "simplicity", weight: 1 },
|
|
136
|
+
],
|
|
137
|
+
"design-decompose": [
|
|
138
|
+
{ key: "fidelity", weight: 2 }, { key: "completeness", weight: 1 },
|
|
139
|
+
{ key: "reuse", weight: 1 }, { key: "simplicity", weight: 1 },
|
|
140
|
+
],
|
|
141
|
+
};
|
|
30
142
|
|
|
31
143
|
const VALID_PHASES = [
|
|
32
144
|
"partition", "plan", "discuss", "impact",
|
|
@@ -45,10 +157,19 @@ const PHASE_RESULT_SCHEMA = {
|
|
|
45
157
|
},
|
|
46
158
|
};
|
|
47
159
|
|
|
48
|
-
const projectDir =
|
|
49
|
-
const milestone =
|
|
50
|
-
const userInput =
|
|
51
|
-
const phaseName =
|
|
160
|
+
const projectDir = _args.projectDir || ".";
|
|
161
|
+
const milestone = _args.milestone || null;
|
|
162
|
+
const userInput = _args.userInput || "";
|
|
163
|
+
const phaseName = _args.phase;
|
|
164
|
+
|
|
165
|
+
// M82: clamp competition N to [1,5]. Evidence (Self-MoA, Large Language Monkeys):
|
|
166
|
+
// gains plateau fast; N=3 captures the elbow, >5 is wasteful. N<=1 = off (single producer).
|
|
167
|
+
const _rawN = Number(_args.competition) || 1;
|
|
168
|
+
const competitionN = Math.max(1, Math.min(5, Math.floor(_rawN)));
|
|
169
|
+
const competitionOn = competitionN > 1 && COMPETITION_ELIGIBLE.has(phaseName);
|
|
170
|
+
if (competitionN > 1 && !competitionOn) {
|
|
171
|
+
log(`competition: N=${competitionN} ignored — phase "${phaseName}" is not competition-eligible (single producer runs). Eligible: ${[...COMPETITION_ELIGIBLE].join(", ")}.`);
|
|
172
|
+
}
|
|
52
173
|
|
|
53
174
|
if (!phaseName || !VALID_PHASES.includes(phaseName)) {
|
|
54
175
|
log(`phase: args.phase must be one of: ${VALID_PHASES.join(", ")}`);
|
|
@@ -56,9 +177,9 @@ if (!phaseName || !VALID_PHASES.includes(phaseName)) {
|
|
|
56
177
|
}
|
|
57
178
|
|
|
58
179
|
phase("Preflight");
|
|
59
|
-
const pre =
|
|
180
|
+
const pre = await runPreflight(projectDir);
|
|
60
181
|
if (!pre.ok) return { status: "failed", reason: "preflight-failed", preflight: pre.envelope };
|
|
61
|
-
const brief =
|
|
182
|
+
const brief = await generateBrief(projectDir, { kind: phaseName, milestone, id: `${phaseName}-${(milestone || "m").toLowerCase()}` });
|
|
62
183
|
|
|
63
184
|
phase("Phase");
|
|
64
185
|
const promptByPhase = {
|
|
@@ -72,23 +193,245 @@ const promptByPhase = {
|
|
|
72
193
|
"doc-ripple": `Identify and update all docs affected by recent code changes per the Document Ripple Completion Gate. No code edits.`,
|
|
73
194
|
};
|
|
74
195
|
|
|
75
|
-
const
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
196
|
+
const baseObjective = promptByPhase[phaseName];
|
|
197
|
+
const briefLine = `**Brief (REQUIRED):** ${brief.briefPath || "(no brief — re-walk repo)"}`;
|
|
198
|
+
|
|
199
|
+
let result;
|
|
200
|
+
if (!competitionOn) {
|
|
201
|
+
// ── Single-producer path (default, unchanged behavior) ──
|
|
202
|
+
result = await agent(
|
|
203
|
+
[
|
|
204
|
+
`You are the ${phaseName} phase agent.`,
|
|
205
|
+
milestone ? `Milestone: ${milestone}` : "",
|
|
206
|
+
briefLine,
|
|
207
|
+
userInput ? `\nUser input:\n${userInput}` : "",
|
|
208
|
+
``,
|
|
209
|
+
`Objective: ${baseObjective}`,
|
|
210
|
+
``,
|
|
211
|
+
`Follow the CLAUDE.md Pre-Commit Gate. Commit artifacts with prefix "${(milestone || "m").toLowerCase()}(${phaseName})".`,
|
|
212
|
+
`Return JSON per the schema.`,
|
|
213
|
+
].filter(Boolean).join("\n"),
|
|
214
|
+
{ label: phaseName, phase: "Phase", schema: PHASE_RESULT_SCHEMA, model: "opus" }
|
|
215
|
+
).catch((e) => ({ status: "failed", artifacts: [], summary: `agent error: ${e && e.message}` }));
|
|
216
|
+
} else {
|
|
217
|
+
// ── M82 Competition Mode: generate -> judge -> finalize ──
|
|
218
|
+
// Distinct "angles" so the N Self-MoA producers explore different regions of
|
|
219
|
+
// the solution space (diversity by prompt, not by model — Self-MoA > Mixed-MoA).
|
|
220
|
+
const ANGLES = [
|
|
221
|
+
"Optimize for MAXIMUM parallelism: carve the most file-disjoint domains that can run concurrently.",
|
|
222
|
+
"Optimize for SIMPLICITY: the fewest domains with the cleanest, most obvious boundaries.",
|
|
223
|
+
"Optimize for RISK ISOLATION: isolate the riskiest/most-coupled work into its own domain so the rest stays safe.",
|
|
224
|
+
"Optimize for DEPENDENCY DEPTH: minimize serial gates (waves) between domains.",
|
|
225
|
+
"Optimize for BALANCE: roughly equal-sized domains with minimal cross-talk.",
|
|
226
|
+
];
|
|
227
|
+
|
|
228
|
+
const PRODUCER_SCHEMA = phaseName === "partition"
|
|
229
|
+
? {
|
|
230
|
+
type: "object", required: ["id", "domains"], additionalProperties: true,
|
|
231
|
+
properties: {
|
|
232
|
+
id: { type: "string" },
|
|
233
|
+
rationale: { type: "string" },
|
|
234
|
+
domains: {
|
|
235
|
+
type: "array", items: {
|
|
236
|
+
type: "object", required: ["name", "touches"], additionalProperties: true,
|
|
237
|
+
properties: {
|
|
238
|
+
name: { type: "string" },
|
|
239
|
+
touches: { type: "array", items: { type: "string" } },
|
|
240
|
+
summary: { type: "string" },
|
|
241
|
+
},
|
|
242
|
+
},
|
|
243
|
+
},
|
|
244
|
+
},
|
|
245
|
+
}
|
|
246
|
+
: {
|
|
247
|
+
type: "object", required: ["id", "proposal"], additionalProperties: true,
|
|
248
|
+
properties: { id: { type: "string" }, proposal: { type: "string" }, rationale: { type: "string" } },
|
|
249
|
+
};
|
|
250
|
+
|
|
251
|
+
phase("Compete");
|
|
252
|
+
log(`competition: ${competitionN} producers (Self-MoA, model=opus) for ${phaseName}`);
|
|
253
|
+
const ids = ["A", "B", "C", "D", "E"];
|
|
254
|
+
const candidates = (await parallel(
|
|
255
|
+
Array.from({ length: competitionN }, (_, i) => () =>
|
|
256
|
+
agent(
|
|
257
|
+
[
|
|
258
|
+
`You are candidate ${ids[i]} — one of ${competitionN} INDEPENDENT ${phaseName} proposals competing on quality.`,
|
|
259
|
+
milestone ? `Milestone: ${milestone}` : "",
|
|
260
|
+
briefLine,
|
|
261
|
+
userInput ? `\nUser input:\n${userInput}` : "",
|
|
262
|
+
``,
|
|
263
|
+
`Objective: ${baseObjective}`,
|
|
264
|
+
`Your distinct angle: ${ANGLES[i % ANGLES.length]}`,
|
|
265
|
+
``,
|
|
266
|
+
`DO NOT write or commit any files. PROPOSE ONLY — return your proposal as JSON per the schema.`,
|
|
267
|
+
phaseName === "partition"
|
|
268
|
+
? `For "touches", list the concrete repo file paths each domain will WRITE (its owned files). Be specific and realistic — the judge scores file-disjointness from these.`
|
|
269
|
+
: `Put the full proposal text in "proposal".`,
|
|
270
|
+
`Set "id" to "${ids[i]}".`,
|
|
271
|
+
].filter(Boolean).join("\n"),
|
|
272
|
+
{ label: `candidate:${ids[i]}`, phase: "Compete", schema: PRODUCER_SCHEMA, model: "opus" }
|
|
273
|
+
).then((c) => ({ ...c, id: c.id || ids[i] })).catch(() => null)
|
|
274
|
+
)
|
|
275
|
+
)).filter(Boolean);
|
|
276
|
+
|
|
277
|
+
if (candidates.length === 0) {
|
|
278
|
+
return { status: "failed", artifacts: [], summary: "competition: all producers failed" };
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
phase("Judge");
|
|
282
|
+
let winnerId = null;
|
|
283
|
+
let ranked = [];
|
|
284
|
+
if (phaseName === "partition") {
|
|
285
|
+
// OBJECTIVE oracle judge — calculator, not critic.
|
|
286
|
+
const env = await runCompetitionJudge(projectDir, { kind: "partition", candidates }, "judge:oracle", "Judge");
|
|
287
|
+
winnerId = env.winner; ranked = env.ranked || [];
|
|
288
|
+
} else {
|
|
289
|
+
// SUBJECTIVE judge: a different-model (sonnet) rubric scorer. Candidates are
|
|
290
|
+
// blind (author identity stripped) AND shuffled (deterministic permutation) so
|
|
291
|
+
// judge position no longer correlates with producer index/angle — Red Team
|
|
292
|
+
// HIGH-3: the shuffle was claimed in a comment but never implemented.
|
|
293
|
+
const axes = RUBRIC_AXES_BY_PHASE[phaseName] || [{ key: "quality", weight: 1 }];
|
|
294
|
+
// Deterministic permutation (Math.random is sandbox-banned): rotate by a seed
|
|
295
|
+
// derived from the milestone+phase string so order is stable per run but
|
|
296
|
+
// decoupled from producer index. The CLI tiebreak keys off the candidate's own
|
|
297
|
+
// id (carried through), so final selection stays reproducible regardless.
|
|
298
|
+
const seedStr = `${milestone || "m"}:${phaseName}`;
|
|
299
|
+
let seed = 0;
|
|
300
|
+
for (let k = 0; k < seedStr.length; k++) seed = (seed * 31 + seedStr.charCodeAt(k)) >>> 0;
|
|
301
|
+
const rot = candidates.length ? (seed % candidates.length) : 0;
|
|
302
|
+
const shuffled = candidates.map((_, i) => candidates[(i + rot) % candidates.length]);
|
|
303
|
+
const labeled = shuffled.map((c, i) => ({ id: c.id, label: ids[i], text: c.proposal || c.rationale || "" }));
|
|
304
|
+
const rubric = await agent(
|
|
305
|
+
[
|
|
306
|
+
`You are a BLIND, IMPARTIAL judge scoring ${labeled.length} competing ${phaseName} proposals.`,
|
|
307
|
+
`Score each on a 1-5 scale per axis: ${axes.map((a) => a.key).join(", ")}. Higher = better.`,
|
|
308
|
+
`Judge ONLY the content. The labels are arbitrary and the order is randomized — do NOT prefer earlier ones. Be calibrated and critical.`,
|
|
309
|
+
``,
|
|
310
|
+
...labeled.map((c) => `### Candidate ${c.label}\n${c.text}`),
|
|
311
|
+
``,
|
|
312
|
+
`Return JSON: { "scores": [ { "id": "<candidate label A/B/C...>", "<axis>": <1-5>, ... }, ... ] }`,
|
|
313
|
+
`IMPORTANT: use the CANDIDATE LABEL (A, B, C…) shown above as the "id" in your scores.`,
|
|
314
|
+
].join("\n"),
|
|
315
|
+
{
|
|
316
|
+
label: "judge:rubric", phase: "Judge", model: "sonnet",
|
|
317
|
+
schema: {
|
|
318
|
+
type: "object", required: ["scores"], additionalProperties: true,
|
|
319
|
+
properties: { scores: { type: "array", items: { type: "object", additionalProperties: true } } },
|
|
320
|
+
},
|
|
321
|
+
}
|
|
322
|
+
).catch(() => ({ scores: [] }));
|
|
323
|
+
// Map the judge's label-keyed scores back to the REAL candidate ids before
|
|
324
|
+
// deterministic selection (so the winner id matches an actual candidate).
|
|
325
|
+
const labelToId = new Map(labeled.map((c) => [c.label, c.id]));
|
|
326
|
+
const judgeCandidates = (rubric.scores || []).map((s) => {
|
|
327
|
+
const { id, ...rest } = s; return { id: labelToId.get(id) || id, scores: rest };
|
|
328
|
+
});
|
|
329
|
+
const env = await runCompetitionJudge(projectDir, { kind: "generic", axes, candidates: judgeCandidates }, "judge:select", "Judge");
|
|
330
|
+
winnerId = env.winner; ranked = env.ranked || [];
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
// Red Team HIGH-1: NEVER fall back to an arbitrary candidate. For partition the
|
|
334
|
+
// judge returns winner=null only when EVERY candidate is file-overlapping
|
|
335
|
+
// (invalid) — committing candidates[0] would ship an invalid partition the
|
|
336
|
+
// dispatcher then mis-fans-out (contract Invariant 2). Hard-fail instead.
|
|
337
|
+
let winner = candidates.find((c) => c.id === winnerId);
|
|
338
|
+
if (!winner) {
|
|
339
|
+
if (phaseName === "partition") {
|
|
340
|
+
log(`competition: no VALID partition among ${candidates.length} candidates — failing the phase (Invariant 2: invalid never selected).`);
|
|
341
|
+
return {
|
|
342
|
+
status: "failed", artifacts: [],
|
|
343
|
+
summary: `competition: no valid (file-disjoint) partition among ${candidates.length} candidates`,
|
|
344
|
+
competition: { n: candidates.length, winner: null, ranked },
|
|
345
|
+
};
|
|
346
|
+
}
|
|
347
|
+
// Subjective phases: fall back to the judge's rank-1, else the first candidate.
|
|
348
|
+
const rank1 = (ranked[0] && candidates.find((c) => c.id === ranked[0].id)) || candidates[0];
|
|
349
|
+
winner = rank1;
|
|
350
|
+
log(`competition: judge returned no winner; falling back to rank-1 (${winner.id}).`);
|
|
351
|
+
}
|
|
352
|
+
log(`competition: winner = ${winner.id} (of ${candidates.map((c) => c.id).join(", ")})`);
|
|
353
|
+
|
|
354
|
+
// FINALIZE: one agent commits the WINNING approach (pick-one at the thesis level),
|
|
355
|
+
// then enriches it with non-overlapping good line-items from the losers (safe union
|
|
356
|
+
// at the separable layer — "winner + salvage orphaned good ideas"; never grafts a
|
|
357
|
+
// coupled thesis). Per the two-gate rule in competition-mode-contract.md.
|
|
358
|
+
phase("Finalize");
|
|
359
|
+
const winnerBlob = phaseName === "partition" ? JSON.stringify(winner.domains) : (winner.proposal || winner.rationale || "");
|
|
360
|
+
const losersBlob = candidates.filter((c) => c.id !== winner.id)
|
|
361
|
+
.map((c) => phaseName === "partition" ? JSON.stringify(c.domains) : (c.proposal || c.rationale || ""))
|
|
362
|
+
.join("\n---\n");
|
|
363
|
+
// For partition, the finalizer must report the EXACT domains+touches it committed
|
|
364
|
+
// so we can RE-VALIDATE the graft (Red Team HIGH-2 / contract Invariant 4: a
|
|
365
|
+
// salvaged "missed file" could silently reintroduce a write-target overlap).
|
|
366
|
+
const FINALIZE_SCHEMA = phaseName === "partition"
|
|
367
|
+
? {
|
|
368
|
+
// finalizedDomains REQUIRED for partition (Red Team recheck LOW-1): if it's
|
|
369
|
+
// optional, a finalizer that omits it silently bypasses re-validation.
|
|
370
|
+
type: "object", required: ["status", "artifacts", "finalizedDomains"], additionalProperties: false,
|
|
371
|
+
properties: {
|
|
372
|
+
status: { type: "string", enum: ["complete", "partial", "blocked", "failed"] },
|
|
373
|
+
artifacts: { type: "array", items: { type: "string" } },
|
|
374
|
+
summary: { type: "string" },
|
|
375
|
+
decisions: { type: "array", items: { type: "string" } },
|
|
376
|
+
finalizedDomains: {
|
|
377
|
+
type: "array", items: {
|
|
378
|
+
type: "object", required: ["name", "touches"], additionalProperties: true,
|
|
379
|
+
properties: { name: { type: "string" }, touches: { type: "array", items: { type: "string" } } },
|
|
380
|
+
},
|
|
381
|
+
},
|
|
382
|
+
},
|
|
383
|
+
}
|
|
384
|
+
: PHASE_RESULT_SCHEMA;
|
|
385
|
+
|
|
386
|
+
result = await agent(
|
|
387
|
+
[
|
|
388
|
+
`You are the ${phaseName} finalizer. A competition selected a WINNING proposal; implement it for real.`,
|
|
389
|
+
milestone ? `Milestone: ${milestone}` : "",
|
|
390
|
+
briefLine,
|
|
391
|
+
``,
|
|
392
|
+
`Objective: ${baseObjective}`,
|
|
393
|
+
``,
|
|
394
|
+
`WINNING proposal (implement this whole — it is a coherent thesis, do NOT Frankenstein it):`,
|
|
395
|
+
winnerBlob,
|
|
396
|
+
``,
|
|
397
|
+
`Other proposals (for SALVAGE ONLY — fold in any non-overlapping, clearly-good line-items, e.g. an extra risk, a missed file, a better domain name — that do NOT conflict with the winning structure. NEVER assign a file to a domain that another domain already owns. If in doubt, leave them out):`,
|
|
398
|
+
losersBlob || "(none)",
|
|
399
|
+
``,
|
|
400
|
+
`Now WRITE the real artifacts and follow the CLAUDE.md Pre-Commit Gate. Commit with prefix "${(milestone || "m").toLowerCase()}(${phaseName})".`,
|
|
401
|
+
phaseName === "partition"
|
|
402
|
+
? `Return JSON per the schema, INCLUDING "finalizedDomains" — the exact {name, touches[]} of every domain you committed (touches = the repo files each domain OWNS/WRITES). This is re-validated for file-disjointness.`
|
|
403
|
+
: `Return JSON per the schema.`,
|
|
404
|
+
`Include the competition outcome in "decisions" (e.g. "competition: winner ${winner.id} of ${candidates.length}").`,
|
|
405
|
+
].filter(Boolean).join("\n"),
|
|
406
|
+
{ label: `${phaseName}:finalize`, phase: "Finalize", schema: FINALIZE_SCHEMA, model: "opus" }
|
|
407
|
+
).catch((e) => ({ status: "failed", artifacts: [], summary: `finalizer error: ${e && e.message}` }));
|
|
408
|
+
|
|
409
|
+
// Re-validate the FINALIZED partition (Invariant 4). If salvage reintroduced an
|
|
410
|
+
// overlap, the finalized graft is invalid → block completion with a clear reason.
|
|
411
|
+
if (phaseName === "partition" && result && result.status !== "failed") {
|
|
412
|
+
const finalized = Array.isArray(result.finalizedDomains) ? result.finalizedDomains : null;
|
|
413
|
+
if (!finalized || !finalized.length) {
|
|
414
|
+
// No finalizedDomains to re-check → can't prove disjointness → block rather
|
|
415
|
+
// than silently accept (Red Team recheck LOW-1: never fail-open on the gate).
|
|
416
|
+
log(`competition: finalizer returned no finalizedDomains — cannot re-validate disjointness, blocking.`);
|
|
417
|
+
result.status = "blocked";
|
|
418
|
+
result.summary = `finalizer did not report finalizedDomains; partition disjointness unverifiable. ${result.summary || ""}`.trim();
|
|
419
|
+
} else {
|
|
420
|
+
const reval = await runCompetitionJudge(
|
|
421
|
+
projectDir,
|
|
422
|
+
{ kind: "partition", candidates: [{ id: "finalized", domains: finalized }] },
|
|
423
|
+
"judge:revalidate", "Finalize"
|
|
424
|
+
);
|
|
425
|
+
if (reval.winner !== "finalized") {
|
|
426
|
+
log(`competition: FINALIZED partition failed re-validation (salvage reintroduced a file overlap) — blocking (Invariant 4).`);
|
|
427
|
+
result.status = "blocked";
|
|
428
|
+
result.summary = `finalized partition is NOT file-disjoint (salvage overlap); re-run finalize dropping the conflicting file. ${result.summary || ""}`.trim();
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
// Thread the competition telemetry up so the caller can report measured SC#1.
|
|
434
|
+
result.competition = { n: candidates.length, winner: winner.id, ranked };
|
|
435
|
+
}
|
|
93
436
|
|
|
94
437
|
return result;
|