ultimate-pi 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/.agents/skills/harness-debate-plan/SKILL.md +41 -61
  2. package/.agents/skills/harness-orchestration/SKILL.md +2 -2
  3. package/.agents/skills/harness-plan/SKILL.md +10 -8
  4. package/.pi/agents/harness/planning/decompose.md +4 -2
  5. package/.pi/agents/harness/planning/execution-plan-author.md +25 -14
  6. package/.pi/agents/harness/planning/hypothesis-validator.md +21 -5
  7. package/.pi/agents/harness/planning/implementation-researcher.md +42 -0
  8. package/.pi/agents/harness/planning/plan-adversary.md +19 -3
  9. package/.pi/agents/harness/planning/plan-evaluator.md +26 -5
  10. package/.pi/agents/harness/planning/review-integrator.md +23 -9
  11. package/.pi/agents/harness/planning/scout-graphify.md +1 -1
  12. package/.pi/agents/harness/planning/sprint-contract-auditor.md +19 -4
  13. package/.pi/agents/harness/planning/stack-researcher.md +19 -10
  14. package/.pi/extensions/harness-debate-tools.ts +238 -16
  15. package/.pi/extensions/harness-live-widget.ts +39 -159
  16. package/.pi/extensions/harness-plan-approval.ts +47 -5
  17. package/.pi/extensions/lib/debate-bus-core.ts +69 -15
  18. package/.pi/extensions/lib/debate-bus-state.ts +6 -0
  19. package/.pi/extensions/lib/plan-approval/plan-review.ts +56 -0
  20. package/.pi/extensions/lib/plan-approval/types.ts +1 -0
  21. package/.pi/extensions/lib/plan-debate-eligibility.ts +214 -0
  22. package/.pi/extensions/lib/plan-debate-focus.ts +151 -0
  23. package/.pi/extensions/lib/plan-debate-gate.ts +77 -34
  24. package/.pi/extensions/lib/plan-debate-lanes.ts +44 -0
  25. package/.pi/extensions/lib/plan-debate-round-status.ts +63 -20
  26. package/.pi/extensions/lib/plan-messenger.ts +93 -17
  27. package/.pi/extensions/policy-gate.ts +1 -1
  28. package/.pi/harness/README.md +1 -1
  29. package/.pi/harness/agents.manifest.json +15 -11
  30. package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +1 -3
  31. package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +13 -5
  32. package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +51 -0
  33. package/.pi/harness/docs/adrs/README.md +2 -0
  34. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/implementation-research.yaml +28 -0
  35. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r1.yaml +24 -0
  36. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r2.yaml +25 -0
  37. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-packet.yaml +196 -0
  38. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-review.md +14 -0
  39. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/research-brief.yaml +62 -0
  40. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/implementation-research.yaml +28 -0
  41. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r2.yaml +24 -0
  42. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r3.yaml +24 -0
  43. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +29 -0
  44. package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +97 -16
  45. package/.pi/harness/specs/plan-implementation-research-brief.schema.json +128 -0
  46. package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
  47. package/.pi/harness/specs/round-result.schema.json +15 -2
  48. package/.pi/lib/harness-ui-state.ts +92 -0
  49. package/.pi/prompts/harness-plan.md +87 -37
  50. package/.pi/prompts/planning-rubrics.md +31 -0
  51. package/CHANGELOG.md +11 -0
  52. package/package.json +2 -2
@@ -1,16 +1,54 @@
1
1
  #!/usr/bin/env node
2
2
  /**
3
3
  * smoke-harness-plan — fixture validation for plan-phase pipeline (CI).
4
- * Usage: node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture
4
+ * Usage: node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light]
5
5
  */
6
6
 
7
- import { access, cp, mkdir, readFile, rm } from "node:fs/promises";
7
+ import { access, readFile } from "node:fs/promises";
8
8
  import { constants } from "node:fs";
9
- import { dirname, join, resolve } from "node:path";
9
+ import { dirname, join } from "node:path";
10
10
  import { fileURLToPath } from "node:url";
11
11
  import { parse as parseYaml } from "yaml";
12
12
  import { validateExecutionPlan } from "../../../scripts/validate-plan-dag.mjs";
13
13
 
14
+ function planOutcomeComplete(coverage, requiredFocus, minRounds) {
15
+ return (
16
+ coverage.missing.length === 0 &&
17
+ coverage.last_review_gate_ready === true &&
18
+ coverage.last_round_index >= minRounds
19
+ );
20
+ }
21
+
22
+ async function scanFocusCoverage(fixtureRoot, requiredFocus) {
23
+ const art = join(fixtureRoot, "artifacts");
24
+ const covered = new Set();
25
+ let last_review_gate_ready = false;
26
+ let last_round_index = 0;
27
+ const { readdir } = await import("node:fs/promises");
28
+ const files = (await readdir(art)).filter((f) =>
29
+ /^review-round-r\d+\.yaml$/i.test(f),
30
+ );
31
+ for (const name of files.sort()) {
32
+ const m = /^review-round-r(\d+)\.yaml$/i.exec(name);
33
+ if (!m) continue;
34
+ const roundIndex = Number(m[1]);
35
+ if (roundIndex > last_round_index) last_round_index = roundIndex;
36
+ const draft = parseYaml(await readFile(join(art, name), "utf-8"));
37
+ const focus = String(draft.debate_round_focus ?? "").trim();
38
+ if (requiredFocus.includes(focus)) covered.add(focus);
39
+ if (roundIndex === last_round_index) {
40
+ last_review_gate_ready = draft.review_gate_ready === true;
41
+ }
42
+ }
43
+ const missing = requiredFocus.filter((f) => !covered.has(f));
44
+ return {
45
+ covered: requiredFocus.filter((f) => covered.has(f)),
46
+ missing,
47
+ last_review_gate_ready,
48
+ last_round_index,
49
+ };
50
+ }
51
+
14
52
  const ROOT = join(dirname(fileURLToPath(import.meta.url)), "..", "..", "..", "..");
15
53
  const FIXTURE_DIR = join(dirname(fileURLToPath(import.meta.url)), "fixtures", "plan-phase");
16
54
 
@@ -23,8 +61,16 @@ function ok(msg) {
23
61
  console.log(` ✓ ${msg}`);
24
62
  }
25
63
 
26
- async function runFixture() {
27
- const fixtureRoot = join(FIXTURE_DIR, "minimal-med");
64
+ function fixtureNameFromArgs(args) {
65
+ const idx = args.indexOf("--fixture");
66
+ if (idx === -1 || !args[idx + 1] || args[idx + 1].startsWith("-")) {
67
+ return "minimal-med";
68
+ }
69
+ return args[idx + 1];
70
+ }
71
+
72
+ async function runFixture(name) {
73
+ const fixtureRoot = join(FIXTURE_DIR, name);
28
74
  try {
29
75
  await access(fixtureRoot, constants.R_OK);
30
76
  } catch {
@@ -49,29 +95,64 @@ async function runFixture() {
49
95
  await access(reviewPath, constants.R_OK);
50
96
  ok("plan-review.md present");
51
97
 
52
- const debateRounds = ["review-round-r1.yaml", "review-round-r4.yaml"];
53
- for (const name of debateRounds) {
54
- const p = join(fixtureRoot, "artifacts", name);
55
- await access(p, constants.R_OK);
56
- const draft = parseYaml(await readFile(p, "utf-8"));
57
- if (!draft.schema_version) fail(`${name} missing schema_version`);
58
- }
59
- ok("debate round YAML artifacts present");
98
+ const implPath = join(fixtureRoot, "artifacts", "implementation-research.yaml");
99
+ await access(implPath, constants.R_OK);
100
+ ok("implementation-research.yaml present");
60
101
 
61
102
  const researchPath = join(fixtureRoot, "research-brief.yaml");
62
103
  const research = parseYaml(await readFile(researchPath, "utf-8"));
63
104
  if (!research.decomposition || !research.hypothesis) {
64
105
  fail("research-brief.yaml missing decomposition/hypothesis");
65
106
  }
107
+ if (!research.implementation) {
108
+ fail("research-brief.yaml missing implementation section");
109
+ }
66
110
  ok("research-brief.yaml structure");
67
111
 
68
- console.log("smoke-harness-plan: all fixture checks passed");
112
+ const isLight = name === "minimal-low-light";
113
+ const requiredFocus = isLight ? ["spec", "quality"] : ["spec", "wbs", "schedule", "quality"];
114
+ const debateRounds = isLight
115
+ ? ["review-round-r1.yaml", "review-round-r2.yaml"]
116
+ : [
117
+ "review-round-r1.yaml",
118
+ "review-round-r2.yaml",
119
+ "review-round-r3.yaml",
120
+ "review-round-r4.yaml",
121
+ ];
122
+ const seenFocus = new Set();
123
+ for (const fileName of debateRounds) {
124
+ const p = join(fixtureRoot, "artifacts", fileName);
125
+ await access(p, constants.R_OK);
126
+ const draft = parseYaml(await readFile(p, "utf-8"));
127
+ if (!draft.schema_version) fail(`${fileName} missing schema_version`);
128
+ if (draft.debate_round_focus) seenFocus.add(draft.debate_round_focus);
129
+ }
130
+ for (const focus of requiredFocus) {
131
+ if (!seenFocus.has(focus)) {
132
+ fail(`fixture missing debate_round_focus: ${focus}`);
133
+ }
134
+ }
135
+ ok(`debate round YAML artifacts (${requiredFocus.length} focuses)`);
136
+
137
+ const coverage = await scanFocusCoverage(fixtureRoot, requiredFocus);
138
+ const minRounds = isLight ? 2 : 4;
139
+ if (!planOutcomeComplete(coverage, requiredFocus, minRounds)) {
140
+ fail("debate outcome incomplete for fixture coverage");
141
+ }
142
+ ok("debate outcome complete for fixture profile");
143
+
144
+ if (isLight && packet.risk_level !== "low") {
145
+ fail("minimal-low-light fixture must use risk_level low");
146
+ }
147
+
148
+ console.log(`smoke-harness-plan: all ${name} fixture checks passed`);
69
149
  }
70
150
 
71
151
  async function main() {
72
152
  const args = process.argv.slice(2);
73
153
  if (args.includes("--fixture")) {
74
- await runFixture();
154
+ const name = fixtureNameFromArgs(args);
155
+ await runFixture(name);
75
156
  return;
76
157
  }
77
158
  if (args.includes("--live")) {
@@ -80,7 +161,7 @@ async function main() {
80
161
  );
81
162
  return;
82
163
  }
83
- fail("Usage: smoke-harness-plan.mjs --fixture | --live");
164
+ fail("Usage: smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light] | --live");
84
165
  }
85
166
 
86
167
  main().catch((err) => {
@@ -0,0 +1,128 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://ultimate-pi.local/.pi/harness/specs/plan-implementation-research-brief.schema.json",
4
+ "title": "PlanImplementationResearchBrief",
5
+ "type": "object",
6
+ "additionalProperties": false,
7
+ "required": [
8
+ "schema_version",
9
+ "problem_framing",
10
+ "sub_problems",
11
+ "internal_references",
12
+ "external_references",
13
+ "solution_patterns",
14
+ "similar_implementations",
15
+ "recommended_approach",
16
+ "anti_patterns",
17
+ "open_questions"
18
+ ],
19
+ "properties": {
20
+ "schema_version": { "type": "string", "const": "1.0.0" },
21
+ "problem_framing": { "type": "string", "minLength": 1 },
22
+ "sub_problems": {
23
+ "type": "array",
24
+ "items": { "type": "string", "minLength": 1 }
25
+ },
26
+ "internal_references": {
27
+ "type": "array",
28
+ "items": { "$ref": "#/$defs/internal_reference" }
29
+ },
30
+ "external_references": {
31
+ "type": "array",
32
+ "items": { "$ref": "#/$defs/external_reference" }
33
+ },
34
+ "solution_patterns": {
35
+ "type": "array",
36
+ "items": { "$ref": "#/$defs/solution_pattern" }
37
+ },
38
+ "similar_implementations": {
39
+ "type": "array",
40
+ "items": { "$ref": "#/$defs/similar_implementation" }
41
+ },
42
+ "recommended_approach": { "$ref": "#/$defs/recommended_approach" },
43
+ "anti_patterns": {
44
+ "type": "array",
45
+ "items": { "type": "string", "minLength": 1 }
46
+ },
47
+ "open_questions": {
48
+ "type": "array",
49
+ "items": { "type": "string", "minLength": 1 }
50
+ },
51
+ "deep_research_recommended": { "type": "boolean" }
52
+ },
53
+ "$defs": {
54
+ "internal_reference": {
55
+ "type": "object",
56
+ "additionalProperties": false,
57
+ "required": ["path", "relevance", "reuse_signal"],
58
+ "properties": {
59
+ "path": { "type": "string" },
60
+ "relevance": { "type": "string" },
61
+ "reuse_signal": {
62
+ "type": "string",
63
+ "enum": ["high", "med", "low", "none"]
64
+ }
65
+ }
66
+ },
67
+ "external_reference": {
68
+ "type": "object",
69
+ "additionalProperties": false,
70
+ "required": ["url", "source_type", "summary", "evidence_grade"],
71
+ "properties": {
72
+ "url": { "type": "string" },
73
+ "source_type": { "type": "string" },
74
+ "summary": { "type": "string" },
75
+ "evidence_grade": {
76
+ "type": "string",
77
+ "enum": ["primary", "secondary", "anecdotal"]
78
+ }
79
+ }
80
+ },
81
+ "solution_pattern": {
82
+ "type": "object",
83
+ "additionalProperties": false,
84
+ "required": ["name", "provenance", "fit", "tradeoffs", "risks"],
85
+ "properties": {
86
+ "name": { "type": "string" },
87
+ "provenance": { "type": "string" },
88
+ "fit": { "type": "string" },
89
+ "tradeoffs": {
90
+ "type": "object",
91
+ "required": ["pros", "cons"],
92
+ "properties": {
93
+ "pros": { "type": "array", "items": { "type": "string" } },
94
+ "cons": { "type": "array", "items": { "type": "string" } }
95
+ }
96
+ },
97
+ "risks": { "type": "array", "items": { "type": "string" } }
98
+ }
99
+ },
100
+ "similar_implementation": {
101
+ "type": "object",
102
+ "additionalProperties": false,
103
+ "required": ["name", "what_it_solves", "gap_vs_us"],
104
+ "properties": {
105
+ "name": { "type": "string" },
106
+ "what_it_solves": { "type": "string" },
107
+ "gap_vs_us": { "type": "string" }
108
+ }
109
+ },
110
+ "recommended_approach": {
111
+ "type": "object",
112
+ "additionalProperties": false,
113
+ "required": ["summary", "recommended_approach_confidence"],
114
+ "properties": {
115
+ "summary": { "type": "string", "minLength": 1 },
116
+ "recommended_approach_confidence": {
117
+ "type": "string",
118
+ "enum": ["low", "med", "high"]
119
+ },
120
+ "confidence_rationale": { "type": "string" },
121
+ "evidence_refs": {
122
+ "type": "array",
123
+ "items": { "type": "string" }
124
+ }
125
+ }
126
+ }
127
+ }
128
+ }
@@ -13,7 +13,7 @@
13
13
  ],
14
14
  "properties": {
15
15
  "schema_version": { "type": "string", "const": "1.0.0" },
16
- "round_index": { "type": "integer", "minimum": 1, "maximum": 4 },
16
+ "round_index": { "type": "integer", "minimum": 1, "maximum": 12 },
17
17
  "debate_round_focus": {
18
18
  "type": "string",
19
19
  "enum": ["spec", "wbs", "schedule", "quality"]
@@ -39,7 +39,7 @@
39
39
  "round_index": {
40
40
  "type": "integer",
41
41
  "minimum": 1,
42
- "maximum": 6
42
+ "maximum": 12
43
43
  },
44
44
  "participants": {
45
45
  "type": "array",
@@ -104,7 +104,9 @@
104
104
  "additionalProperties": false,
105
105
  "required": [
106
106
  "name",
107
+ "min_focus_rounds",
107
108
  "max_rounds",
109
+ "max_exchanges_per_round",
108
110
  "round_token_cap",
109
111
  "debate_global_cap"
110
112
  ],
@@ -113,8 +115,19 @@
113
115
  "type": "string",
114
116
  "enum": ["aggressive", "plan"]
115
117
  },
118
+ "min_focus_rounds": {
119
+ "type": "integer",
120
+ "minimum": 1
121
+ },
116
122
  "max_rounds": {
117
- "type": "integer"
123
+ "type": "integer",
124
+ "minimum": 1,
125
+ "maximum": 12
126
+ },
127
+ "max_exchanges_per_round": {
128
+ "type": "integer",
129
+ "minimum": 1,
130
+ "maximum": 6
118
131
  },
119
132
  "round_token_cap": {
120
133
  "type": "integer"
@@ -299,6 +299,98 @@ function createStateFromEntries(entries: unknown[]): HarnessUiState {
299
299
  return state;
300
300
  }
301
301
 
302
+ export type HarnessStatusSeverity =
303
+ | "accent"
304
+ | "warning"
305
+ | "error"
306
+ | "success"
307
+ | "muted";
308
+
309
+ export const HARNESS_PHASE_ORDER: readonly HarnessPhase[] = [
310
+ "plan",
311
+ "execute",
312
+ "evaluate",
313
+ "adversary",
314
+ "merge",
315
+ ] as const;
316
+
317
+ export function formatHarnessPhaseLabel(phase: HarnessPhase): string {
318
+ switch (phase) {
319
+ case "plan":
320
+ return "plan";
321
+ case "execute":
322
+ return "build";
323
+ case "evaluate":
324
+ return "eval";
325
+ case "adversary":
326
+ return "review";
327
+ case "merge":
328
+ return "merge";
329
+ }
330
+ }
331
+
332
+ export function nextHarnessPhase(phase: HarnessPhase): HarnessPhase | null {
333
+ const index = HARNESS_PHASE_ORDER.indexOf(phase);
334
+ if (index < 0 || index >= HARNESS_PHASE_ORDER.length - 1) return null;
335
+ return HARNESS_PHASE_ORDER[index + 1] ?? null;
336
+ }
337
+
338
+ function truncateStatusCommand(command: string, maxLen = 40): string {
339
+ if (command.length <= maxLen) return command;
340
+ return `${command.slice(0, maxLen - 3)}...`;
341
+ }
342
+
343
+ export function deriveHarnessStatusHint(state: HarnessUiState): {
344
+ text: string;
345
+ severity: HarnessStatusSeverity;
346
+ } {
347
+ if (state.budgetExhausted) {
348
+ return { text: "Budget limit reached", severity: "error" };
349
+ }
350
+ if (state.testIntegritySeverity === "high") {
351
+ return { text: "Test integrity issue", severity: "error" };
352
+ }
353
+ if (state.policyDecision === "block") {
354
+ return { text: "Blocked — fix issues first", severity: "error" };
355
+ }
356
+ if (
357
+ state.policyDecision === "human_required" ||
358
+ state.flowSubstate === "human-required"
359
+ ) {
360
+ return { text: "Waiting for your input", severity: "warning" };
361
+ }
362
+ if (state.nextRecommendedCommand) {
363
+ return {
364
+ text: `Next: ${truncateStatusCommand(state.nextRecommendedCommand)}`,
365
+ severity: "accent",
366
+ };
367
+ }
368
+ if (state.phase === "plan") {
369
+ if (!state.planApproved) {
370
+ return { text: "Approve plan to continue", severity: "warning" };
371
+ }
372
+ return { text: "Plan approved", severity: "success" };
373
+ }
374
+ if (state.policyDecision === "pass") {
375
+ return { text: "Checks passed", severity: "success" };
376
+ }
377
+ if (state.policyDecision === "conditional_pass") {
378
+ return { text: "Passed with notes", severity: "warning" };
379
+ }
380
+ switch (state.phase) {
381
+ case "execute":
382
+ return { text: "Implementing changes", severity: "accent" };
383
+ case "evaluate":
384
+ return { text: "Running checks", severity: "accent" };
385
+ case "adversary":
386
+ return { text: "Review gate", severity: "accent" };
387
+ case "merge":
388
+ return { text: "Ready to finish", severity: "accent" };
389
+ default:
390
+ return { text: "Planning", severity: "muted" };
391
+ }
392
+ }
393
+
302
394
  export class HarnessUiStateStore {
303
395
  private lastEntriesLen = -1;
304
396
  private cachedState: HarnessUiState = {
@@ -1,5 +1,5 @@
1
1
  ---
2
- description: PM-grade harness plan — scouts, ExecutionPlan, DAG validation, Review Gate debate, approval.
2
+ description: PM-grade harness plan — scouts, implementation research, ExecutionPlan, DAG validation, selective Review Gate debate, approval.
3
3
  argument-hint: "\"<task>\" [--risk low|med|high] [--budget <amount>] [--quick]"
4
4
  ---
5
5
 
@@ -16,6 +16,7 @@ Never `write`/`edit` the final canonical packet except via **`write_harness_yaml
16
16
  - `harness/planning/scout-semantic` (skip when `--quick`)
17
17
  - `harness/planning/decompose`
18
18
  - `harness/planning/hypothesis`
19
+ - `harness/planning/implementation-researcher`
19
20
  - `harness/planning/stack-researcher`
20
21
  - `harness/planning/execution-plan-author`
21
22
  - `harness/planning/hypothesis-validator` (debate R1 only)
@@ -31,7 +32,7 @@ Read **harness-debate-plan** skill before Review Gate rounds.
31
32
  1. Use `subagent` with `agentScope: "both"` and parallel `tasks` where lanes are independent.
32
33
  2. Each `subagent` call blocks until subprocesses finish — batch parallel scouts in one `tasks` array.
33
34
  3. Do **not** set `timeoutMs` unless the user explicitly requests a cap — subagents run until natural completion (optional backstop: `PI_SUBAGENT_TIMEOUT_MS`).
34
- 4. No harness subagent spawn cap — run the full scout + debate pipeline without skipping lanes for budget.
35
+ 4. No harness subagent spawn cap — run the full scout + research + debate pipeline without skipping lanes for budget.
35
36
  5. Compact task text: embed `HarnessSpawnContext` JSON + lane-specific instructions only.
36
37
 
37
38
  ## Step 0 — Parse `$ARGUMENTS`
@@ -39,7 +40,7 @@ Read **harness-debate-plan** skill before Review Gate rounds.
39
40
  - task (required)
40
41
  - `--risk low|med|high`, `--budget`, `--quick`
41
42
 
42
- `--quick` skips **scout-semantic** and post-run adversary only — **never** skip graphify, structure, decompose, hypothesis, stack research, execution plan, DAG validation, or **4-round plan debate**.
43
+ `--quick` skips **scout-semantic** and post-run adversary only — **never** skip graphify, structure, decompose, hypothesis, **Phase 3.5 implementation research**, stack research, execution plan, DAG validation, or **Review Gate debate**.
43
44
 
44
45
  ## Active plan context
45
46
 
@@ -67,29 +68,44 @@ Add `harness/planning/scout-semantic` to `tasks` unless `--quick`. Require graph
67
68
 
68
69
  One `subagent` call with `tasks` for `harness/planning/decompose` and `harness/planning/hypothesis`. Parse `PlanDecompositionBrief` and `PlanHypothesisBrief` from outputs. Persist with `write_harness_yaml` → `artifacts/decomposition.yaml` and `artifacts/hypothesis.yaml`.
69
70
 
70
- ## Phase 4 Draft shell + fork
71
+ Decompose **prior_art** is **internal only** (from scouts). External prior art arrives in Phase 3.5.
71
72
 
72
- Build draft `PlanPacket` (`contract_version: "1.1.0"`):
73
+ ## Phase 3.5 — External solution research (required)
73
74
 
74
- - `scope`, `assumptions`, `acceptance_checks`, `risk_level`, `rollback_plan`
75
- - `execution_plan` placeholder until Phase 4b
75
+ **MUST** run unless you document a `human_required` waiver in the run trace. Parallel batch:
76
76
 
77
- `ask_user` when `dialectical_fork` is material.
77
+ ```json
78
+ {
79
+ "agentScope": "both",
80
+ "tasks": [
81
+ { "agent": "harness/planning/implementation-researcher", "task": "<HarnessSpawnContext + paths to decomposition/hypothesis/scout summaries — patterns/repos/workflows only; no stack version SERPs>" },
82
+ { "agent": "harness/planning/stack-researcher", "task": "<HarnessSpawnContext + stack research brief — libraries/APIs only>" }
83
+ ]
84
+ }
85
+ ```
78
86
 
79
- Initialize `research-brief.yaml` with decomposition + hypothesis (`write_harness_yaml`).
87
+ - `write_harness_yaml` → `artifacts/implementation-research.yaml` and `artifacts/stack.yaml`.
88
+ - Merge both into `research-brief.yaml` (`implementation:` + `stack:`).
89
+ - **Partial failure:** if one lane fails, re-spawn that lane once; if still failing set `plan_status: partial` and `human_required` via `ask_user`. Do not proceed to Phase 4b without both artifacts or explicit human waiver.
90
+ - **Web dedup:** implementation owns patterns/repos; stack owns libraries/versions — no overlapping queries.
80
91
 
81
- ## Phase 4a Stack research
92
+ On `mode: revise`: re-run implementation-researcher when task scope, acceptance_checks, or >30% work_items change; skip when delta is schedule-only and prior artifact is fresh.
82
93
 
83
- ```
84
- subagent({ agentScope: "both", agent: "harness/planning/stack-researcher", task: "<HarnessSpawnContext + stack research brief>" })
85
- ```
94
+ ## Phase 4 — Draft shell
95
+
96
+ Build draft `PlanPacket` (`contract_version: "1.1.0"`):
97
+
98
+ - `scope`, `assumptions`, `acceptance_checks`, `risk_level`, `rollback_plan`
99
+ - `execution_plan` placeholder until Phase 4b
86
100
 
87
- `write_harness_yaml` `artifacts/stack.yaml`; merge into `research-brief.yaml` `stack`.
101
+ Initialize `research-brief.yaml` with decomposition + hypothesis + Phase 3.5 merges (`write_harness_yaml`).
102
+
103
+ **`ask_user` on material `dialectical_fork`** after Phase 3.5 merge (evidence-backed — conflicting external patterns may trigger `human_required` from eligibility).
88
104
 
89
105
  ## Phase 4b — Execution plan author
90
106
 
91
107
  ```
92
- subagent({ agentScope: "both", agent: "harness/planning/execution-plan-author", task: "<HarnessSpawnContext + execution plan brief>" })
108
+ subagent({ agentScope: "both", agent: "harness/planning/execution-plan-author", task: "<HarnessSpawnContext + PlanImplementationResearchBrief + PlanStackBrief + decomposition/hypothesis>" })
93
109
  ```
94
110
 
95
111
  Merge `execution_plan` into draft `plan-packet.yaml` (`write_harness_yaml`). Save `artifacts/execution-plan-draft.yaml` the same way.
@@ -102,37 +118,71 @@ node .pi/scripts/validate-plan-dag.mjs --packet .pi/harness/runs/<run_id>/plan-p
102
118
 
103
119
  Must **pass** before debate. On fail: fix via author or parent patches, re-run.
104
120
 
105
- ## Phase 5Review Gate debate (4 rounds, pi-messenger, even with `--quick`)
121
+ ## Phase 4dDebate eligibility (before Review Gate)
122
+
123
+ ```
124
+ harness_plan_debate_eligibility({ risk_level, material_fork, dag_pass: true, ... })
125
+ ```
126
+
127
+ Pre-debate signals only (no R1 hypothesis output). Default profile **standard** when ambiguous.
106
128
 
107
- 1. `harness_debate_open` (debate id normalized to `plan-<run_id>`; creates `debate-messenger/` inboxes + threads).
108
- 2. Optional: `harness_plan_scope_check` after decomposition — if `material_drift`, `ask_user` before continuing.
109
- 3. For rounds 1–4 (`debate_round_focus`: spec, wbs, schedule, quality):
129
+ If `human_required: true` `ask_user` before `harness_debate_open`.
130
+
131
+ Then:
132
+
133
+ ```
134
+ harness_debate_open({ debate_profile, required_focuses })
135
+ ```
110
136
 
111
- | Round | Lane spawns (sequential) | Messenger |
112
- |-------|--------------------------|-----------|
113
- | 1 | `hypothesis-validator` (blind) → `plan-evaluator` → `plan-adversary` | evaluator `claim` → adversary `rebuttal` (`in_reply_to` claim ids) |
114
- | 2 | `plan-evaluator` → `plan-adversary` | same |
115
- | 3 | `plan-evaluator` → `plan-adversary` | same |
116
- | 4 | `plan-evaluator` → `plan-adversary` → **`sprint-contract-auditor`** | same + audit message optional |
137
+ Profiles:
117
138
 
118
- Lane YAML + messenger claims/rebuttals are **auto-applied** when each debate subagent completes (`harness-debate-lane-applied` entry). You may also call `harness_debate_apply_lane` if fenced YAML was truncated.
139
+ | Profile | Focuses required | min_focus_rounds |
140
+ |---------|------------------|------------------|
141
+ | full | spec, wbs, schedule, quality | 4 |
142
+ | standard | all four | 4 |
143
+ | light | spec, quality only | 2 |
119
144
 
120
- Per round (no prose-only turns **always call a tool**):
145
+ ## Phase 5 Review Gate debate (profile-aware, pi-messenger, even with `--quick`)
121
146
 
122
- 1. Spawn lane agents (evaluator adversary integrator; R1/R4 extras per table).
123
- 2. After each subagent: verify `harness-debate-next-step` message or run `harness_debate_round_status({ round_index: N })`.
124
- 3. Before adversary: `harness_messenger_read_round` include transcript in adversary task.
125
- 4. After integrator: `harness_debate_submit_round({ round_index, integrator_draft })` (writes review-round + bus round + integrate message — **do not** `write_harness_yaml` review-round paths).
147
+ **Forbidden:** parallel `subagent` calls for any debate lane agent in one batch. One lane agent per tool batch, in order.
148
+
149
+ 1. Optional: `harness_plan_scope_check` if `material_drift`, `ask_user` before debate.
150
+ 2. Drive debate with **`harness_debate_focus_coverage`** and **`harness_debate_round_status({ round_index, debate_round_focus })`** cover **required_focuses** from eligibility, not always all four.
151
+
152
+ ### Focus coverage (required before consensus)
153
+
154
+ Each required focus must appear in a submitted `review-round-rN.yaml` (`debate_round_focus`). Monotonic `round_index` (cap from profile). Consensus only when:
155
+
156
+ - all **required** focuses covered, **and**
157
+ - last round `review_gate_ready: true`, **and**
158
+ - `validate-plan-dag.mjs` still passes (re-run after patches).
159
+
160
+ ### Per-round state machine
161
+
162
+ ```
163
+ round_index := next uncovered required focus
164
+ debate_round_focus := spec | wbs | schedule | quality for this round
165
+
166
+ IF round_index == 1:
167
+ spawn hypothesis-validator (blind — no decomposition/PlanPacket/scouts/prior debate)
168
+ WHILE NOT ready_for_integrator (harness_debate_round_status with debate_round_focus):
169
+ follow next_tool exactly (one subagent per batch)
170
+ IF debate_round_focus == quality OR round_index >= 4:
171
+ spawn sprint-contract-auditor
172
+ spawn review-integrator → harness_debate_submit_round({ round_index, integrator_draft })
173
+ harness_debate_focus_coverage // repeat until missing required focuses empty
174
+ harness_debate_consensus
175
+ ```
126
176
 
127
- 5. `harness_debate_consensus` after round 4.
177
+ Debate agents **must not** call `web_search` / `web_fetch` — cite `artifacts/implementation-research.yaml` instead.
128
178
 
129
- **Never** echo `/harness-debate-*` in bash. **Never** end a turn during Phase 5 with only narration (e.g. "Let me post claims") the next tool call must be in the **same** assistant message or immediately after `harness-debate-next-step`.
179
+ **Never** end a Phase 5 turn with prose only — next action must be a harness tool or single sequential `subagent`.
130
180
 
131
- **R1 blind rule:** hypothesis-validator prompt must exclude decomposition, scouts, PlanPacket, prior debate.
181
+ **R1 blind rule:** hypothesis-validator sees only task + `PlanHypothesisBrief`.
132
182
 
133
183
  If R1 `revision_recommended` or `relevance.passes === false`: one `hypothesis` re-spawn, update brief, continue.
134
184
 
135
- **Blockers:** `policy_decision: block` → do not `approve_plan`. `human_required` → `ask_user` before approval.
185
+ **Blockers:** `policy_decision: block` → no `approve_plan`. `human_required` → `ask_user` first.
136
186
 
137
187
  ## Phase 5b — Revise packet
138
188
 
@@ -142,7 +192,7 @@ Set `research_brief.eval` from R1 `hypothesis-validator` output.
142
192
 
143
193
  ## Phase 6 — Approval + persistence
144
194
 
145
- 1. `approve_plan` with `plan_packet`, `human_summary`, `research_brief` (paths/summaries OK).
195
+ 1. `approve_plan` with `plan_packet`, `human_summary`, `research_brief` (include `implementation` section). Missing `artifacts/implementation-research.yaml` → **error** on `--risk high`, **warn** otherwise.
146
196
  2. On Approve: `create_plan` with same packet (`contract_version: "1.1.0"` + `execution_plan`).
147
197
  3. Confirm `plan_ready: true` → `next_command: /harness-run`.
148
198
 
@@ -152,4 +202,4 @@ Post-execute adversary: `/harness-critic` only (not plan-phase agents).
152
202
 
153
203
  - `plan_status`: ready | partial | needs_clarification
154
204
  - `plan_review_path` for human review
155
- - DAG `pass` + 4 debate rounds + consensus not `block` before ready
205
+ - DAG `pass` + required focus areas covered + consensus not `block` before ready
@@ -0,0 +1,31 @@
1
+ # Planning Review Gate rubrics (spawn fragment)
2
+
3
+ Parent includes this file in debate agent spawn text. Stable check ids by `debate_round_focus`.
4
+
5
+ ## spec
6
+
7
+ - SC-01: Every acceptance_check maps to scope or execution_plan work_item
8
+ - SC-02: Out-of-scope work is listed in decomposition `excluded`
9
+ - SC-03: Hypothesis brief falsifiability and success metrics are testable
10
+ - SC-04: Risk register covers top technical unknowns
11
+
12
+ ## wbs
13
+
14
+ - WB-01: Each work_item has typed `done_criteria` (not vague “implement X”)
15
+ - WB-02: No orphan work_items (every item on critical path or sprint_contract)
16
+ - WB-03: `depends_on` is acyclic; parallel_safe only when files disjoint
17
+ - WB-04: wbs_dictionary entry per non-trivial work_item
18
+
19
+ ## schedule
20
+
21
+ - SH-01: `schedule_metadata.critical_path_work_item_ids` is non-empty for med/high risk
22
+ - SH-02: Phase entry/exit criteria are observable
23
+ - SH-03: Milestones align with acceptance_checks dates where stated
24
+ - SH-04: No impossible parallelism (same file, conflicting owners)
25
+
26
+ ## quality
27
+
28
+ - QL-01: sprint_contract.done_criteria_types complete (ADR-020)
29
+ - QL-02: Verify/lint/test work_items in early phases when risk ≥ med
30
+ - QL-03: Checkpoint gaps between phases documented
31
+ - QL-04: Keep Quality Left — no “test at end only” without justification