ultimate-pi 0.14.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-debate-plan/SKILL.md +41 -61
- package/.agents/skills/harness-orchestration/SKILL.md +2 -2
- package/.agents/skills/harness-plan/SKILL.md +10 -8
- package/.pi/agents/harness/planning/decompose.md +4 -2
- package/.pi/agents/harness/planning/execution-plan-author.md +25 -14
- package/.pi/agents/harness/planning/hypothesis-validator.md +21 -5
- package/.pi/agents/harness/planning/implementation-researcher.md +42 -0
- package/.pi/agents/harness/planning/plan-adversary.md +19 -3
- package/.pi/agents/harness/planning/plan-evaluator.md +26 -5
- package/.pi/agents/harness/planning/review-integrator.md +23 -9
- package/.pi/agents/harness/planning/scout-graphify.md +1 -1
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +19 -4
- package/.pi/agents/harness/planning/stack-researcher.md +19 -10
- package/.pi/extensions/harness-debate-tools.ts +238 -16
- package/.pi/extensions/harness-live-widget.ts +39 -159
- package/.pi/extensions/harness-plan-approval.ts +47 -5
- package/.pi/extensions/lib/debate-bus-core.ts +69 -15
- package/.pi/extensions/lib/debate-bus-state.ts +6 -0
- package/.pi/extensions/lib/plan-approval/plan-review.ts +56 -0
- package/.pi/extensions/lib/plan-approval/types.ts +1 -0
- package/.pi/extensions/lib/plan-debate-eligibility.ts +214 -0
- package/.pi/extensions/lib/plan-debate-focus.ts +151 -0
- package/.pi/extensions/lib/plan-debate-gate.ts +77 -34
- package/.pi/extensions/lib/plan-debate-lanes.ts +44 -0
- package/.pi/extensions/lib/plan-debate-round-status.ts +63 -20
- package/.pi/extensions/lib/plan-messenger.ts +93 -17
- package/.pi/extensions/policy-gate.ts +1 -1
- package/.pi/harness/README.md +1 -1
- package/.pi/harness/agents.manifest.json +15 -11
- package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +1 -3
- package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +13 -5
- package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +51 -0
- package/.pi/harness/docs/adrs/README.md +2 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r1.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r2.yaml +25 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-packet.yaml +196 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-review.md +14 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/research-brief.yaml +62 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r2.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r3.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +29 -0
- package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +97 -16
- package/.pi/harness/specs/plan-implementation-research-brief.schema.json +128 -0
- package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
- package/.pi/harness/specs/round-result.schema.json +15 -2
- package/.pi/lib/harness-ui-state.ts +92 -0
- package/.pi/prompts/harness-plan.md +87 -37
- package/.pi/prompts/planning-rubrics.md +31 -0
- package/CHANGELOG.md +11 -0
- package/package.json +2 -2
|
@@ -1,16 +1,54 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
/**
|
|
3
3
|
* smoke-harness-plan — fixture validation for plan-phase pipeline (CI).
|
|
4
|
-
* Usage: node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture
|
|
4
|
+
* Usage: node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light]
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
|
-
import { access,
|
|
7
|
+
import { access, readFile } from "node:fs/promises";
|
|
8
8
|
import { constants } from "node:fs";
|
|
9
|
-
import { dirname, join
|
|
9
|
+
import { dirname, join } from "node:path";
|
|
10
10
|
import { fileURLToPath } from "node:url";
|
|
11
11
|
import { parse as parseYaml } from "yaml";
|
|
12
12
|
import { validateExecutionPlan } from "../../../scripts/validate-plan-dag.mjs";
|
|
13
13
|
|
|
14
|
+
function planOutcomeComplete(coverage, requiredFocus, minRounds) {
|
|
15
|
+
return (
|
|
16
|
+
coverage.missing.length === 0 &&
|
|
17
|
+
coverage.last_review_gate_ready === true &&
|
|
18
|
+
coverage.last_round_index >= minRounds
|
|
19
|
+
);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
async function scanFocusCoverage(fixtureRoot, requiredFocus) {
|
|
23
|
+
const art = join(fixtureRoot, "artifacts");
|
|
24
|
+
const covered = new Set();
|
|
25
|
+
let last_review_gate_ready = false;
|
|
26
|
+
let last_round_index = 0;
|
|
27
|
+
const { readdir } = await import("node:fs/promises");
|
|
28
|
+
const files = (await readdir(art)).filter((f) =>
|
|
29
|
+
/^review-round-r\d+\.yaml$/i.test(f),
|
|
30
|
+
);
|
|
31
|
+
for (const name of files.sort()) {
|
|
32
|
+
const m = /^review-round-r(\d+)\.yaml$/i.exec(name);
|
|
33
|
+
if (!m) continue;
|
|
34
|
+
const roundIndex = Number(m[1]);
|
|
35
|
+
if (roundIndex > last_round_index) last_round_index = roundIndex;
|
|
36
|
+
const draft = parseYaml(await readFile(join(art, name), "utf-8"));
|
|
37
|
+
const focus = String(draft.debate_round_focus ?? "").trim();
|
|
38
|
+
if (requiredFocus.includes(focus)) covered.add(focus);
|
|
39
|
+
if (roundIndex === last_round_index) {
|
|
40
|
+
last_review_gate_ready = draft.review_gate_ready === true;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
const missing = requiredFocus.filter((f) => !covered.has(f));
|
|
44
|
+
return {
|
|
45
|
+
covered: requiredFocus.filter((f) => covered.has(f)),
|
|
46
|
+
missing,
|
|
47
|
+
last_review_gate_ready,
|
|
48
|
+
last_round_index,
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
|
|
14
52
|
const ROOT = join(dirname(fileURLToPath(import.meta.url)), "..", "..", "..", "..");
|
|
15
53
|
const FIXTURE_DIR = join(dirname(fileURLToPath(import.meta.url)), "fixtures", "plan-phase");
|
|
16
54
|
|
|
@@ -23,8 +61,16 @@ function ok(msg) {
|
|
|
23
61
|
console.log(` ✓ ${msg}`);
|
|
24
62
|
}
|
|
25
63
|
|
|
26
|
-
|
|
27
|
-
const
|
|
64
|
+
function fixtureNameFromArgs(args) {
|
|
65
|
+
const idx = args.indexOf("--fixture");
|
|
66
|
+
if (idx === -1 || !args[idx + 1] || args[idx + 1].startsWith("-")) {
|
|
67
|
+
return "minimal-med";
|
|
68
|
+
}
|
|
69
|
+
return args[idx + 1];
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
async function runFixture(name) {
|
|
73
|
+
const fixtureRoot = join(FIXTURE_DIR, name);
|
|
28
74
|
try {
|
|
29
75
|
await access(fixtureRoot, constants.R_OK);
|
|
30
76
|
} catch {
|
|
@@ -49,29 +95,64 @@ async function runFixture() {
|
|
|
49
95
|
await access(reviewPath, constants.R_OK);
|
|
50
96
|
ok("plan-review.md present");
|
|
51
97
|
|
|
52
|
-
const
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
await access(p, constants.R_OK);
|
|
56
|
-
const draft = parseYaml(await readFile(p, "utf-8"));
|
|
57
|
-
if (!draft.schema_version) fail(`${name} missing schema_version`);
|
|
58
|
-
}
|
|
59
|
-
ok("debate round YAML artifacts present");
|
|
98
|
+
const implPath = join(fixtureRoot, "artifacts", "implementation-research.yaml");
|
|
99
|
+
await access(implPath, constants.R_OK);
|
|
100
|
+
ok("implementation-research.yaml present");
|
|
60
101
|
|
|
61
102
|
const researchPath = join(fixtureRoot, "research-brief.yaml");
|
|
62
103
|
const research = parseYaml(await readFile(researchPath, "utf-8"));
|
|
63
104
|
if (!research.decomposition || !research.hypothesis) {
|
|
64
105
|
fail("research-brief.yaml missing decomposition/hypothesis");
|
|
65
106
|
}
|
|
107
|
+
if (!research.implementation) {
|
|
108
|
+
fail("research-brief.yaml missing implementation section");
|
|
109
|
+
}
|
|
66
110
|
ok("research-brief.yaml structure");
|
|
67
111
|
|
|
68
|
-
|
|
112
|
+
const isLight = name === "minimal-low-light";
|
|
113
|
+
const requiredFocus = isLight ? ["spec", "quality"] : ["spec", "wbs", "schedule", "quality"];
|
|
114
|
+
const debateRounds = isLight
|
|
115
|
+
? ["review-round-r1.yaml", "review-round-r2.yaml"]
|
|
116
|
+
: [
|
|
117
|
+
"review-round-r1.yaml",
|
|
118
|
+
"review-round-r2.yaml",
|
|
119
|
+
"review-round-r3.yaml",
|
|
120
|
+
"review-round-r4.yaml",
|
|
121
|
+
];
|
|
122
|
+
const seenFocus = new Set();
|
|
123
|
+
for (const fileName of debateRounds) {
|
|
124
|
+
const p = join(fixtureRoot, "artifacts", fileName);
|
|
125
|
+
await access(p, constants.R_OK);
|
|
126
|
+
const draft = parseYaml(await readFile(p, "utf-8"));
|
|
127
|
+
if (!draft.schema_version) fail(`${fileName} missing schema_version`);
|
|
128
|
+
if (draft.debate_round_focus) seenFocus.add(draft.debate_round_focus);
|
|
129
|
+
}
|
|
130
|
+
for (const focus of requiredFocus) {
|
|
131
|
+
if (!seenFocus.has(focus)) {
|
|
132
|
+
fail(`fixture missing debate_round_focus: ${focus}`);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
ok(`debate round YAML artifacts (${requiredFocus.length} focuses)`);
|
|
136
|
+
|
|
137
|
+
const coverage = await scanFocusCoverage(fixtureRoot, requiredFocus);
|
|
138
|
+
const minRounds = isLight ? 2 : 4;
|
|
139
|
+
if (!planOutcomeComplete(coverage, requiredFocus, minRounds)) {
|
|
140
|
+
fail("debate outcome incomplete for fixture coverage");
|
|
141
|
+
}
|
|
142
|
+
ok("debate outcome complete for fixture profile");
|
|
143
|
+
|
|
144
|
+
if (isLight && packet.risk_level !== "low") {
|
|
145
|
+
fail("minimal-low-light fixture must use risk_level low");
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
console.log(`smoke-harness-plan: all ${name} fixture checks passed`);
|
|
69
149
|
}
|
|
70
150
|
|
|
71
151
|
async function main() {
|
|
72
152
|
const args = process.argv.slice(2);
|
|
73
153
|
if (args.includes("--fixture")) {
|
|
74
|
-
|
|
154
|
+
const name = fixtureNameFromArgs(args);
|
|
155
|
+
await runFixture(name);
|
|
75
156
|
return;
|
|
76
157
|
}
|
|
77
158
|
if (args.includes("--live")) {
|
|
@@ -80,7 +161,7 @@ async function main() {
|
|
|
80
161
|
);
|
|
81
162
|
return;
|
|
82
163
|
}
|
|
83
|
-
fail("Usage: smoke-harness-plan.mjs --fixture | --live");
|
|
164
|
+
fail("Usage: smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light] | --live");
|
|
84
165
|
}
|
|
85
166
|
|
|
86
167
|
main().catch((err) => {
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://ultimate-pi.local/.pi/harness/specs/plan-implementation-research-brief.schema.json",
|
|
4
|
+
"title": "PlanImplementationResearchBrief",
|
|
5
|
+
"type": "object",
|
|
6
|
+
"additionalProperties": false,
|
|
7
|
+
"required": [
|
|
8
|
+
"schema_version",
|
|
9
|
+
"problem_framing",
|
|
10
|
+
"sub_problems",
|
|
11
|
+
"internal_references",
|
|
12
|
+
"external_references",
|
|
13
|
+
"solution_patterns",
|
|
14
|
+
"similar_implementations",
|
|
15
|
+
"recommended_approach",
|
|
16
|
+
"anti_patterns",
|
|
17
|
+
"open_questions"
|
|
18
|
+
],
|
|
19
|
+
"properties": {
|
|
20
|
+
"schema_version": { "type": "string", "const": "1.0.0" },
|
|
21
|
+
"problem_framing": { "type": "string", "minLength": 1 },
|
|
22
|
+
"sub_problems": {
|
|
23
|
+
"type": "array",
|
|
24
|
+
"items": { "type": "string", "minLength": 1 }
|
|
25
|
+
},
|
|
26
|
+
"internal_references": {
|
|
27
|
+
"type": "array",
|
|
28
|
+
"items": { "$ref": "#/$defs/internal_reference" }
|
|
29
|
+
},
|
|
30
|
+
"external_references": {
|
|
31
|
+
"type": "array",
|
|
32
|
+
"items": { "$ref": "#/$defs/external_reference" }
|
|
33
|
+
},
|
|
34
|
+
"solution_patterns": {
|
|
35
|
+
"type": "array",
|
|
36
|
+
"items": { "$ref": "#/$defs/solution_pattern" }
|
|
37
|
+
},
|
|
38
|
+
"similar_implementations": {
|
|
39
|
+
"type": "array",
|
|
40
|
+
"items": { "$ref": "#/$defs/similar_implementation" }
|
|
41
|
+
},
|
|
42
|
+
"recommended_approach": { "$ref": "#/$defs/recommended_approach" },
|
|
43
|
+
"anti_patterns": {
|
|
44
|
+
"type": "array",
|
|
45
|
+
"items": { "type": "string", "minLength": 1 }
|
|
46
|
+
},
|
|
47
|
+
"open_questions": {
|
|
48
|
+
"type": "array",
|
|
49
|
+
"items": { "type": "string", "minLength": 1 }
|
|
50
|
+
},
|
|
51
|
+
"deep_research_recommended": { "type": "boolean" }
|
|
52
|
+
},
|
|
53
|
+
"$defs": {
|
|
54
|
+
"internal_reference": {
|
|
55
|
+
"type": "object",
|
|
56
|
+
"additionalProperties": false,
|
|
57
|
+
"required": ["path", "relevance", "reuse_signal"],
|
|
58
|
+
"properties": {
|
|
59
|
+
"path": { "type": "string" },
|
|
60
|
+
"relevance": { "type": "string" },
|
|
61
|
+
"reuse_signal": {
|
|
62
|
+
"type": "string",
|
|
63
|
+
"enum": ["high", "med", "low", "none"]
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
},
|
|
67
|
+
"external_reference": {
|
|
68
|
+
"type": "object",
|
|
69
|
+
"additionalProperties": false,
|
|
70
|
+
"required": ["url", "source_type", "summary", "evidence_grade"],
|
|
71
|
+
"properties": {
|
|
72
|
+
"url": { "type": "string" },
|
|
73
|
+
"source_type": { "type": "string" },
|
|
74
|
+
"summary": { "type": "string" },
|
|
75
|
+
"evidence_grade": {
|
|
76
|
+
"type": "string",
|
|
77
|
+
"enum": ["primary", "secondary", "anecdotal"]
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
},
|
|
81
|
+
"solution_pattern": {
|
|
82
|
+
"type": "object",
|
|
83
|
+
"additionalProperties": false,
|
|
84
|
+
"required": ["name", "provenance", "fit", "tradeoffs", "risks"],
|
|
85
|
+
"properties": {
|
|
86
|
+
"name": { "type": "string" },
|
|
87
|
+
"provenance": { "type": "string" },
|
|
88
|
+
"fit": { "type": "string" },
|
|
89
|
+
"tradeoffs": {
|
|
90
|
+
"type": "object",
|
|
91
|
+
"required": ["pros", "cons"],
|
|
92
|
+
"properties": {
|
|
93
|
+
"pros": { "type": "array", "items": { "type": "string" } },
|
|
94
|
+
"cons": { "type": "array", "items": { "type": "string" } }
|
|
95
|
+
}
|
|
96
|
+
},
|
|
97
|
+
"risks": { "type": "array", "items": { "type": "string" } }
|
|
98
|
+
}
|
|
99
|
+
},
|
|
100
|
+
"similar_implementation": {
|
|
101
|
+
"type": "object",
|
|
102
|
+
"additionalProperties": false,
|
|
103
|
+
"required": ["name", "what_it_solves", "gap_vs_us"],
|
|
104
|
+
"properties": {
|
|
105
|
+
"name": { "type": "string" },
|
|
106
|
+
"what_it_solves": { "type": "string" },
|
|
107
|
+
"gap_vs_us": { "type": "string" }
|
|
108
|
+
}
|
|
109
|
+
},
|
|
110
|
+
"recommended_approach": {
|
|
111
|
+
"type": "object",
|
|
112
|
+
"additionalProperties": false,
|
|
113
|
+
"required": ["summary", "recommended_approach_confidence"],
|
|
114
|
+
"properties": {
|
|
115
|
+
"summary": { "type": "string", "minLength": 1 },
|
|
116
|
+
"recommended_approach_confidence": {
|
|
117
|
+
"type": "string",
|
|
118
|
+
"enum": ["low", "med", "high"]
|
|
119
|
+
},
|
|
120
|
+
"confidence_rationale": { "type": "string" },
|
|
121
|
+
"evidence_refs": {
|
|
122
|
+
"type": "array",
|
|
123
|
+
"items": { "type": "string" }
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
],
|
|
14
14
|
"properties": {
|
|
15
15
|
"schema_version": { "type": "string", "const": "1.0.0" },
|
|
16
|
-
"round_index": { "type": "integer", "minimum": 1, "maximum":
|
|
16
|
+
"round_index": { "type": "integer", "minimum": 1, "maximum": 12 },
|
|
17
17
|
"debate_round_focus": {
|
|
18
18
|
"type": "string",
|
|
19
19
|
"enum": ["spec", "wbs", "schedule", "quality"]
|
|
@@ -39,7 +39,7 @@
|
|
|
39
39
|
"round_index": {
|
|
40
40
|
"type": "integer",
|
|
41
41
|
"minimum": 1,
|
|
42
|
-
"maximum":
|
|
42
|
+
"maximum": 12
|
|
43
43
|
},
|
|
44
44
|
"participants": {
|
|
45
45
|
"type": "array",
|
|
@@ -104,7 +104,9 @@
|
|
|
104
104
|
"additionalProperties": false,
|
|
105
105
|
"required": [
|
|
106
106
|
"name",
|
|
107
|
+
"min_focus_rounds",
|
|
107
108
|
"max_rounds",
|
|
109
|
+
"max_exchanges_per_round",
|
|
108
110
|
"round_token_cap",
|
|
109
111
|
"debate_global_cap"
|
|
110
112
|
],
|
|
@@ -113,8 +115,19 @@
|
|
|
113
115
|
"type": "string",
|
|
114
116
|
"enum": ["aggressive", "plan"]
|
|
115
117
|
},
|
|
118
|
+
"min_focus_rounds": {
|
|
119
|
+
"type": "integer",
|
|
120
|
+
"minimum": 1
|
|
121
|
+
},
|
|
116
122
|
"max_rounds": {
|
|
117
|
-
"type": "integer"
|
|
123
|
+
"type": "integer",
|
|
124
|
+
"minimum": 1,
|
|
125
|
+
"maximum": 12
|
|
126
|
+
},
|
|
127
|
+
"max_exchanges_per_round": {
|
|
128
|
+
"type": "integer",
|
|
129
|
+
"minimum": 1,
|
|
130
|
+
"maximum": 6
|
|
118
131
|
},
|
|
119
132
|
"round_token_cap": {
|
|
120
133
|
"type": "integer"
|
|
@@ -299,6 +299,98 @@ function createStateFromEntries(entries: unknown[]): HarnessUiState {
|
|
|
299
299
|
return state;
|
|
300
300
|
}
|
|
301
301
|
|
|
302
|
+
export type HarnessStatusSeverity =
|
|
303
|
+
| "accent"
|
|
304
|
+
| "warning"
|
|
305
|
+
| "error"
|
|
306
|
+
| "success"
|
|
307
|
+
| "muted";
|
|
308
|
+
|
|
309
|
+
export const HARNESS_PHASE_ORDER: readonly HarnessPhase[] = [
|
|
310
|
+
"plan",
|
|
311
|
+
"execute",
|
|
312
|
+
"evaluate",
|
|
313
|
+
"adversary",
|
|
314
|
+
"merge",
|
|
315
|
+
] as const;
|
|
316
|
+
|
|
317
|
+
export function formatHarnessPhaseLabel(phase: HarnessPhase): string {
|
|
318
|
+
switch (phase) {
|
|
319
|
+
case "plan":
|
|
320
|
+
return "plan";
|
|
321
|
+
case "execute":
|
|
322
|
+
return "build";
|
|
323
|
+
case "evaluate":
|
|
324
|
+
return "eval";
|
|
325
|
+
case "adversary":
|
|
326
|
+
return "review";
|
|
327
|
+
case "merge":
|
|
328
|
+
return "merge";
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
export function nextHarnessPhase(phase: HarnessPhase): HarnessPhase | null {
|
|
333
|
+
const index = HARNESS_PHASE_ORDER.indexOf(phase);
|
|
334
|
+
if (index < 0 || index >= HARNESS_PHASE_ORDER.length - 1) return null;
|
|
335
|
+
return HARNESS_PHASE_ORDER[index + 1] ?? null;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
function truncateStatusCommand(command: string, maxLen = 40): string {
|
|
339
|
+
if (command.length <= maxLen) return command;
|
|
340
|
+
return `${command.slice(0, maxLen - 3)}...`;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
export function deriveHarnessStatusHint(state: HarnessUiState): {
|
|
344
|
+
text: string;
|
|
345
|
+
severity: HarnessStatusSeverity;
|
|
346
|
+
} {
|
|
347
|
+
if (state.budgetExhausted) {
|
|
348
|
+
return { text: "Budget limit reached", severity: "error" };
|
|
349
|
+
}
|
|
350
|
+
if (state.testIntegritySeverity === "high") {
|
|
351
|
+
return { text: "Test integrity issue", severity: "error" };
|
|
352
|
+
}
|
|
353
|
+
if (state.policyDecision === "block") {
|
|
354
|
+
return { text: "Blocked — fix issues first", severity: "error" };
|
|
355
|
+
}
|
|
356
|
+
if (
|
|
357
|
+
state.policyDecision === "human_required" ||
|
|
358
|
+
state.flowSubstate === "human-required"
|
|
359
|
+
) {
|
|
360
|
+
return { text: "Waiting for your input", severity: "warning" };
|
|
361
|
+
}
|
|
362
|
+
if (state.nextRecommendedCommand) {
|
|
363
|
+
return {
|
|
364
|
+
text: `Next: ${truncateStatusCommand(state.nextRecommendedCommand)}`,
|
|
365
|
+
severity: "accent",
|
|
366
|
+
};
|
|
367
|
+
}
|
|
368
|
+
if (state.phase === "plan") {
|
|
369
|
+
if (!state.planApproved) {
|
|
370
|
+
return { text: "Approve plan to continue", severity: "warning" };
|
|
371
|
+
}
|
|
372
|
+
return { text: "Plan approved", severity: "success" };
|
|
373
|
+
}
|
|
374
|
+
if (state.policyDecision === "pass") {
|
|
375
|
+
return { text: "Checks passed", severity: "success" };
|
|
376
|
+
}
|
|
377
|
+
if (state.policyDecision === "conditional_pass") {
|
|
378
|
+
return { text: "Passed with notes", severity: "warning" };
|
|
379
|
+
}
|
|
380
|
+
switch (state.phase) {
|
|
381
|
+
case "execute":
|
|
382
|
+
return { text: "Implementing changes", severity: "accent" };
|
|
383
|
+
case "evaluate":
|
|
384
|
+
return { text: "Running checks", severity: "accent" };
|
|
385
|
+
case "adversary":
|
|
386
|
+
return { text: "Review gate", severity: "accent" };
|
|
387
|
+
case "merge":
|
|
388
|
+
return { text: "Ready to finish", severity: "accent" };
|
|
389
|
+
default:
|
|
390
|
+
return { text: "Planning", severity: "muted" };
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
|
|
302
394
|
export class HarnessUiStateStore {
|
|
303
395
|
private lastEntriesLen = -1;
|
|
304
396
|
private cachedState: HarnessUiState = {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
---
|
|
2
|
-
description: PM-grade harness plan — scouts, ExecutionPlan, DAG validation, Review Gate debate, approval.
|
|
2
|
+
description: PM-grade harness plan — scouts, implementation research, ExecutionPlan, DAG validation, selective Review Gate debate, approval.
|
|
3
3
|
argument-hint: "\"<task>\" [--risk low|med|high] [--budget <amount>] [--quick]"
|
|
4
4
|
---
|
|
5
5
|
|
|
@@ -16,6 +16,7 @@ Never `write`/`edit` the final canonical packet except via **`write_harness_yaml
|
|
|
16
16
|
- `harness/planning/scout-semantic` (skip when `--quick`)
|
|
17
17
|
- `harness/planning/decompose`
|
|
18
18
|
- `harness/planning/hypothesis`
|
|
19
|
+
- `harness/planning/implementation-researcher`
|
|
19
20
|
- `harness/planning/stack-researcher`
|
|
20
21
|
- `harness/planning/execution-plan-author`
|
|
21
22
|
- `harness/planning/hypothesis-validator` (debate R1 only)
|
|
@@ -31,7 +32,7 @@ Read **harness-debate-plan** skill before Review Gate rounds.
|
|
|
31
32
|
1. Use `subagent` with `agentScope: "both"` and parallel `tasks` where lanes are independent.
|
|
32
33
|
2. Each `subagent` call blocks until subprocesses finish — batch parallel scouts in one `tasks` array.
|
|
33
34
|
3. Do **not** set `timeoutMs` unless the user explicitly requests a cap — subagents run until natural completion (optional backstop: `PI_SUBAGENT_TIMEOUT_MS`).
|
|
34
|
-
4. No harness subagent spawn cap — run the full scout + debate pipeline without skipping lanes for budget.
|
|
35
|
+
4. No harness subagent spawn cap — run the full scout + research + debate pipeline without skipping lanes for budget.
|
|
35
36
|
5. Compact task text: embed `HarnessSpawnContext` JSON + lane-specific instructions only.
|
|
36
37
|
|
|
37
38
|
## Step 0 — Parse `$ARGUMENTS`
|
|
@@ -39,7 +40,7 @@ Read **harness-debate-plan** skill before Review Gate rounds.
|
|
|
39
40
|
- task (required)
|
|
40
41
|
- `--risk low|med|high`, `--budget`, `--quick`
|
|
41
42
|
|
|
42
|
-
`--quick` skips **scout-semantic** and post-run adversary only — **never** skip graphify, structure, decompose, hypothesis, stack research, execution plan, DAG validation, or **
|
|
43
|
+
`--quick` skips **scout-semantic** and post-run adversary only — **never** skip graphify, structure, decompose, hypothesis, **Phase 3.5 implementation research**, stack research, execution plan, DAG validation, or **Review Gate debate**.
|
|
43
44
|
|
|
44
45
|
## Active plan context
|
|
45
46
|
|
|
@@ -67,29 +68,44 @@ Add `harness/planning/scout-semantic` to `tasks` unless `--quick`. Require graph
|
|
|
67
68
|
|
|
68
69
|
One `subagent` call with `tasks` for `harness/planning/decompose` and `harness/planning/hypothesis`. Parse `PlanDecompositionBrief` and `PlanHypothesisBrief` from outputs. Persist with `write_harness_yaml` → `artifacts/decomposition.yaml` and `artifacts/hypothesis.yaml`.
|
|
69
70
|
|
|
70
|
-
|
|
71
|
+
Decompose **prior_art** is **internal only** (from scouts). External prior art arrives in Phase 3.5.
|
|
71
72
|
|
|
72
|
-
|
|
73
|
+
## Phase 3.5 — External solution research (required)
|
|
73
74
|
|
|
74
|
-
|
|
75
|
-
- `execution_plan` placeholder until Phase 4b
|
|
75
|
+
**MUST** run unless you document a `human_required` waiver in the run trace. Parallel batch:
|
|
76
76
|
|
|
77
|
-
|
|
77
|
+
```json
|
|
78
|
+
{
|
|
79
|
+
"agentScope": "both",
|
|
80
|
+
"tasks": [
|
|
81
|
+
{ "agent": "harness/planning/implementation-researcher", "task": "<HarnessSpawnContext + paths to decomposition/hypothesis/scout summaries — patterns/repos/workflows only; no stack version SERPs>" },
|
|
82
|
+
{ "agent": "harness/planning/stack-researcher", "task": "<HarnessSpawnContext + stack research brief — libraries/APIs only>" }
|
|
83
|
+
]
|
|
84
|
+
}
|
|
85
|
+
```
|
|
78
86
|
|
|
79
|
-
|
|
87
|
+
- `write_harness_yaml` → `artifacts/implementation-research.yaml` and `artifacts/stack.yaml`.
|
|
88
|
+
- Merge both into `research-brief.yaml` (`implementation:` + `stack:`).
|
|
89
|
+
- **Partial failure:** if one lane fails, re-spawn that lane once; if still failing set `plan_status: partial` and `human_required` via `ask_user`. Do not proceed to Phase 4b without both artifacts or explicit human waiver.
|
|
90
|
+
- **Web dedup:** implementation owns patterns/repos; stack owns libraries/versions — no overlapping queries.
|
|
80
91
|
|
|
81
|
-
|
|
92
|
+
On `mode: revise`: re-run implementation-researcher when task scope, acceptance_checks, or >30% work_items change; skip when delta is schedule-only and prior artifact is fresh.
|
|
82
93
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
94
|
+
## Phase 4 — Draft shell
|
|
95
|
+
|
|
96
|
+
Build draft `PlanPacket` (`contract_version: "1.1.0"`):
|
|
97
|
+
|
|
98
|
+
- `scope`, `assumptions`, `acceptance_checks`, `risk_level`, `rollback_plan`
|
|
99
|
+
- `execution_plan` placeholder until Phase 4b
|
|
86
100
|
|
|
87
|
-
|
|
101
|
+
Initialize `research-brief.yaml` with decomposition + hypothesis + Phase 3.5 merges (`write_harness_yaml`).
|
|
102
|
+
|
|
103
|
+
**`ask_user` on material `dialectical_fork`** after Phase 3.5 merge (evidence-backed — conflicting external patterns may trigger `human_required` from eligibility).
|
|
88
104
|
|
|
89
105
|
## Phase 4b — Execution plan author
|
|
90
106
|
|
|
91
107
|
```
|
|
92
|
-
subagent({ agentScope: "both", agent: "harness/planning/execution-plan-author", task: "<HarnessSpawnContext +
|
|
108
|
+
subagent({ agentScope: "both", agent: "harness/planning/execution-plan-author", task: "<HarnessSpawnContext + PlanImplementationResearchBrief + PlanStackBrief + decomposition/hypothesis>" })
|
|
93
109
|
```
|
|
94
110
|
|
|
95
111
|
Merge `execution_plan` into draft `plan-packet.yaml` (`write_harness_yaml`). Save `artifacts/execution-plan-draft.yaml` the same way.
|
|
@@ -102,37 +118,71 @@ node .pi/scripts/validate-plan-dag.mjs --packet .pi/harness/runs/<run_id>/plan-p
|
|
|
102
118
|
|
|
103
119
|
Must **pass** before debate. On fail: fix via author or parent patches, re-run.
|
|
104
120
|
|
|
105
|
-
## Phase
|
|
121
|
+
## Phase 4d — Debate eligibility (before Review Gate)
|
|
122
|
+
|
|
123
|
+
```
|
|
124
|
+
harness_plan_debate_eligibility({ risk_level, material_fork, dag_pass: true, ... })
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Pre-debate signals only (no R1 hypothesis output). Default profile **standard** when ambiguous.
|
|
106
128
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
129
|
+
If `human_required: true` → `ask_user` before `harness_debate_open`.
|
|
130
|
+
|
|
131
|
+
Then:
|
|
132
|
+
|
|
133
|
+
```
|
|
134
|
+
harness_debate_open({ debate_profile, required_focuses })
|
|
135
|
+
```
|
|
110
136
|
|
|
111
|
-
|
|
112
|
-
|-------|--------------------------|-----------|
|
|
113
|
-
| 1 | `hypothesis-validator` (blind) → `plan-evaluator` → `plan-adversary` | evaluator `claim` → adversary `rebuttal` (`in_reply_to` claim ids) |
|
|
114
|
-
| 2 | `plan-evaluator` → `plan-adversary` | same |
|
|
115
|
-
| 3 | `plan-evaluator` → `plan-adversary` | same |
|
|
116
|
-
| 4 | `plan-evaluator` → `plan-adversary` → **`sprint-contract-auditor`** | same + audit message optional |
|
|
137
|
+
Profiles:
|
|
117
138
|
|
|
118
|
-
|
|
139
|
+
| Profile | Focuses required | min_focus_rounds |
|
|
140
|
+
|---------|------------------|------------------|
|
|
141
|
+
| full | spec, wbs, schedule, quality | 4 |
|
|
142
|
+
| standard | all four | 4 |
|
|
143
|
+
| light | spec, quality only | 2 |
|
|
119
144
|
|
|
120
|
-
|
|
145
|
+
## Phase 5 — Review Gate debate (profile-aware, pi-messenger, even with `--quick`)
|
|
121
146
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
147
|
+
**Forbidden:** parallel `subagent` calls for any debate lane agent in one batch. One lane agent per tool batch, in order.
|
|
148
|
+
|
|
149
|
+
1. Optional: `harness_plan_scope_check` — if `material_drift`, `ask_user` before debate.
|
|
150
|
+
2. Drive debate with **`harness_debate_focus_coverage`** and **`harness_debate_round_status({ round_index, debate_round_focus })`** — cover **required_focuses** from eligibility, not always all four.
|
|
151
|
+
|
|
152
|
+
### Focus coverage (required before consensus)
|
|
153
|
+
|
|
154
|
+
Each required focus must appear in a submitted `review-round-rN.yaml` (`debate_round_focus`). Monotonic `round_index` (cap from profile). Consensus only when:
|
|
155
|
+
|
|
156
|
+
- all **required** focuses covered, **and**
|
|
157
|
+
- last round `review_gate_ready: true`, **and**
|
|
158
|
+
- `validate-plan-dag.mjs` still passes (re-run after patches).
|
|
159
|
+
|
|
160
|
+
### Per-round state machine
|
|
161
|
+
|
|
162
|
+
```
|
|
163
|
+
round_index := next uncovered required focus
|
|
164
|
+
debate_round_focus := spec | wbs | schedule | quality for this round
|
|
165
|
+
|
|
166
|
+
IF round_index == 1:
|
|
167
|
+
spawn hypothesis-validator (blind — no decomposition/PlanPacket/scouts/prior debate)
|
|
168
|
+
WHILE NOT ready_for_integrator (harness_debate_round_status with debate_round_focus):
|
|
169
|
+
follow next_tool exactly (one subagent per batch)
|
|
170
|
+
IF debate_round_focus == quality OR round_index >= 4:
|
|
171
|
+
spawn sprint-contract-auditor
|
|
172
|
+
spawn review-integrator → harness_debate_submit_round({ round_index, integrator_draft })
|
|
173
|
+
harness_debate_focus_coverage // repeat until missing required focuses empty
|
|
174
|
+
harness_debate_consensus
|
|
175
|
+
```
|
|
126
176
|
|
|
127
|
-
|
|
177
|
+
Debate agents **must not** call `web_search` / `web_fetch` — cite `artifacts/implementation-research.yaml` instead.
|
|
128
178
|
|
|
129
|
-
**Never**
|
|
179
|
+
**Never** end a Phase 5 turn with prose only — next action must be a harness tool or single sequential `subagent`.
|
|
130
180
|
|
|
131
|
-
**R1 blind rule:** hypothesis-validator
|
|
181
|
+
**R1 blind rule:** hypothesis-validator sees only task + `PlanHypothesisBrief`.
|
|
132
182
|
|
|
133
183
|
If R1 `revision_recommended` or `relevance.passes === false`: one `hypothesis` re-spawn, update brief, continue.
|
|
134
184
|
|
|
135
|
-
**Blockers:** `policy_decision: block` →
|
|
185
|
+
**Blockers:** `policy_decision: block` → no `approve_plan`. `human_required` → `ask_user` first.
|
|
136
186
|
|
|
137
187
|
## Phase 5b — Revise packet
|
|
138
188
|
|
|
@@ -142,7 +192,7 @@ Set `research_brief.eval` from R1 `hypothesis-validator` output.
|
|
|
142
192
|
|
|
143
193
|
## Phase 6 — Approval + persistence
|
|
144
194
|
|
|
145
|
-
1. `approve_plan` with `plan_packet`, `human_summary`, `research_brief` (
|
|
195
|
+
1. `approve_plan` with `plan_packet`, `human_summary`, `research_brief` (include `implementation` section). Missing `artifacts/implementation-research.yaml` → **error** on `--risk high`, **warn** otherwise.
|
|
146
196
|
2. On Approve: `create_plan` with same packet (`contract_version: "1.1.0"` + `execution_plan`).
|
|
147
197
|
3. Confirm `plan_ready: true` → `next_command: /harness-run`.
|
|
148
198
|
|
|
@@ -152,4 +202,4 @@ Post-execute adversary: `/harness-critic` only (not plan-phase agents).
|
|
|
152
202
|
|
|
153
203
|
- `plan_status`: ready | partial | needs_clarification
|
|
154
204
|
- `plan_review_path` for human review
|
|
155
|
-
- DAG `pass` +
|
|
205
|
+
- DAG `pass` + required focus areas covered + consensus not `block` before ready
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Planning Review Gate rubrics (spawn fragment)
|
|
2
|
+
|
|
3
|
+
Parent includes this file in debate agent spawn text. Stable check ids by `debate_round_focus`.
|
|
4
|
+
|
|
5
|
+
## spec
|
|
6
|
+
|
|
7
|
+
- SC-01: Every acceptance_check maps to scope or execution_plan work_item
|
|
8
|
+
- SC-02: Out-of-scope work is listed in decomposition `excluded`
|
|
9
|
+
- SC-03: Hypothesis brief falsifiability and success metrics are testable
|
|
10
|
+
- SC-04: Risk register covers top technical unknowns
|
|
11
|
+
|
|
12
|
+
## wbs
|
|
13
|
+
|
|
14
|
+
- WB-01: Each work_item has typed `done_criteria` (not vague “implement X”)
|
|
15
|
+
- WB-02: No orphan work_items (every item on critical path or sprint_contract)
|
|
16
|
+
- WB-03: `depends_on` is acyclic; parallel_safe only when files disjoint
|
|
17
|
+
- WB-04: wbs_dictionary entry per non-trivial work_item
|
|
18
|
+
|
|
19
|
+
## schedule
|
|
20
|
+
|
|
21
|
+
- SH-01: `schedule_metadata.critical_path_work_item_ids` is non-empty for med/high risk
|
|
22
|
+
- SH-02: Phase entry/exit criteria are observable
|
|
23
|
+
- SH-03: Milestones align with acceptance_checks dates where stated
|
|
24
|
+
- SH-04: No impossible parallelism (same file, conflicting owners)
|
|
25
|
+
|
|
26
|
+
## quality
|
|
27
|
+
|
|
28
|
+
- QL-01: sprint_contract.done_criteria_types complete (ADR-020)
|
|
29
|
+
- QL-02: Verify/lint/test work_items in early phases when risk ≥ med
|
|
30
|
+
- QL-03: Checkpoint gaps between phases documented
|
|
31
|
+
- QL-04: Keep Quality Left — no “test at end only” without justification
|