npm - ultimate-pi - Versions diffs - 0.14.0 → 0.15.0 - Mend

ultimate-pi 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/.pi/harness/evals/smoke/smoke-harness-plan.mjs CHANGED Viewed

@@ -1,16 +1,54 @@
 #!/usr/bin/env node
 /**
  * smoke-harness-plan — fixture validation for plan-phase pipeline (CI).
- * Usage: node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture
+ * Usage: node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light]
  */
-import { access, cp, mkdir, readFile, rm } from "node:fs/promises";
+import { access, readFile } from "node:fs/promises";
 import { constants } from "node:fs";
-import { dirname, join, resolve } from "node:path";
+import { dirname, join } from "node:path";
 import { fileURLToPath } from "node:url";
 import { parse as parseYaml } from "yaml";
 import { validateExecutionPlan } from "../../../scripts/validate-plan-dag.mjs";
+function planOutcomeComplete(coverage, requiredFocus, minRounds) {
+	return (
+		coverage.missing.length === 0 &&
+		coverage.last_review_gate_ready === true &&
+		coverage.last_round_index >= minRounds
+	);
+}
+async function scanFocusCoverage(fixtureRoot, requiredFocus) {
+	const art = join(fixtureRoot, "artifacts");
+	const covered = new Set();
+	let last_review_gate_ready = false;
+	let last_round_index = 0;
+	const { readdir } = await import("node:fs/promises");
+	const files = (await readdir(art)).filter((f) =>
+		/^review-round-r\d+\.yaml$/i.test(f),
+	);
+	for (const name of files.sort()) {
+		const m = /^review-round-r(\d+)\.yaml$/i.exec(name);
+		if (!m) continue;
+		const roundIndex = Number(m[1]);
+		if (roundIndex > last_round_index) last_round_index = roundIndex;
+		const draft = parseYaml(await readFile(join(art, name), "utf-8"));
+		const focus = String(draft.debate_round_focus ?? "").trim();
+		if (requiredFocus.includes(focus)) covered.add(focus);
+		if (roundIndex === last_round_index) {
+			last_review_gate_ready = draft.review_gate_ready === true;
+		}
+	}
+	const missing = requiredFocus.filter((f) => !covered.has(f));
+	return {
+		covered: requiredFocus.filter((f) => covered.has(f)),
+		missing,
+		last_review_gate_ready,
+		last_round_index,
+	};
+}
 const ROOT = join(dirname(fileURLToPath(import.meta.url)), "..", "..", "..", "..");
 const FIXTURE_DIR = join(dirname(fileURLToPath(import.meta.url)), "fixtures", "plan-phase");
@@ -23,8 +61,16 @@ function ok(msg) {
 	console.log(`  ✓ ${msg}`);
 }
-async function runFixture() {
-	const fixtureRoot = join(FIXTURE_DIR, "minimal-med");
+function fixtureNameFromArgs(args) {
+	const idx = args.indexOf("--fixture");
+	if (idx === -1 || !args[idx + 1] || args[idx + 1].startsWith("-")) {
+		return "minimal-med";
+	}
+	return args[idx + 1];
+}
+async function runFixture(name) {
+	const fixtureRoot = join(FIXTURE_DIR, name);
 	try {
 		await access(fixtureRoot, constants.R_OK);
 	} catch {
@@ -49,29 +95,64 @@ async function runFixture() {
 	await access(reviewPath, constants.R_OK);
 	ok("plan-review.md present");
-	const debateRounds = ["review-round-r1.yaml", "review-round-r4.yaml"];
-	for (const name of debateRounds) {
-		const p = join(fixtureRoot, "artifacts", name);
-		await access(p, constants.R_OK);
-		const draft = parseYaml(await readFile(p, "utf-8"));
-		if (!draft.schema_version) fail(`${name} missing schema_version`);
-	}
-	ok("debate round YAML artifacts present");
+	const implPath = join(fixtureRoot, "artifacts", "implementation-research.yaml");
+	await access(implPath, constants.R_OK);
+	ok("implementation-research.yaml present");
 	const researchPath = join(fixtureRoot, "research-brief.yaml");
 	const research = parseYaml(await readFile(researchPath, "utf-8"));
 	if (!research.decomposition || !research.hypothesis) {
 		fail("research-brief.yaml missing decomposition/hypothesis");
 	}
+	if (!research.implementation) {
+		fail("research-brief.yaml missing implementation section");
+	}
 	ok("research-brief.yaml structure");
-	console.log("smoke-harness-plan: all fixture checks passed");
+	const isLight = name === "minimal-low-light";
+	const requiredFocus = isLight ? ["spec", "quality"] : ["spec", "wbs", "schedule", "quality"];
+	const debateRounds = isLight
+		? ["review-round-r1.yaml", "review-round-r2.yaml"]
+		: [
+				"review-round-r1.yaml",
+				"review-round-r2.yaml",
+				"review-round-r3.yaml",
+				"review-round-r4.yaml",
+			];
+	const seenFocus = new Set();
+	for (const fileName of debateRounds) {
+		const p = join(fixtureRoot, "artifacts", fileName);
+		await access(p, constants.R_OK);
+		const draft = parseYaml(await readFile(p, "utf-8"));
+		if (!draft.schema_version) fail(`${fileName} missing schema_version`);
+		if (draft.debate_round_focus) seenFocus.add(draft.debate_round_focus);
+	}
+	for (const focus of requiredFocus) {
+		if (!seenFocus.has(focus)) {
+			fail(`fixture missing debate_round_focus: ${focus}`);
+		}
+	}
+	ok(`debate round YAML artifacts (${requiredFocus.length} focuses)`);
+	const coverage = await scanFocusCoverage(fixtureRoot, requiredFocus);
+	const minRounds = isLight ? 2 : 4;
+	if (!planOutcomeComplete(coverage, requiredFocus, minRounds)) {
+		fail("debate outcome incomplete for fixture coverage");
+	}
+	ok("debate outcome complete for fixture profile");
+	if (isLight && packet.risk_level !== "low") {
+		fail("minimal-low-light fixture must use risk_level low");
+	}
+	console.log(`smoke-harness-plan: all ${name} fixture checks passed`);
 }
 async function main() {
 	const args = process.argv.slice(2);
 	if (args.includes("--fixture")) {
-		await runFixture();
+		const name = fixtureNameFromArgs(args);
+		await runFixture(name);
 		return;
 	}
 	if (args.includes("--live")) {
@@ -80,7 +161,7 @@ async function main() {
 		);
 		return;
 	}
-	fail("Usage: smoke-harness-plan.mjs --fixture | --live");
+	fail("Usage: smoke-harness-plan.mjs --fixture [minimal-med|minimal-low-light] | --live");
 }
 main().catch((err) => {

package/.pi/harness/specs/plan-implementation-research-brief.schema.json ADDED Viewed

@@ -0,0 +1,128 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://ultimate-pi.local/.pi/harness/specs/plan-implementation-research-brief.schema.json",
+	"title": "PlanImplementationResearchBrief",
+	"type": "object",
+	"additionalProperties": false,
+	"required": [
+		"schema_version",
+		"problem_framing",
+		"sub_problems",
+		"internal_references",
+		"external_references",
+		"solution_patterns",
+		"similar_implementations",
+		"recommended_approach",
+		"anti_patterns",
+		"open_questions"
+	],
+	"properties": {
+		"schema_version": { "type": "string", "const": "1.0.0" },
+		"problem_framing": { "type": "string", "minLength": 1 },
+		"sub_problems": {
+			"type": "array",
+			"items": { "type": "string", "minLength": 1 }
+		},
+		"internal_references": {
+			"type": "array",
+			"items": { "$ref": "#/$defs/internal_reference" }
+		},
+		"external_references": {
+			"type": "array",
+			"items": { "$ref": "#/$defs/external_reference" }
+		},
+		"solution_patterns": {
+			"type": "array",
+			"items": { "$ref": "#/$defs/solution_pattern" }
+		},
+		"similar_implementations": {
+			"type": "array",
+			"items": { "$ref": "#/$defs/similar_implementation" }
+		},
+		"recommended_approach": { "$ref": "#/$defs/recommended_approach" },
+		"anti_patterns": {
+			"type": "array",
+			"items": { "type": "string", "minLength": 1 }
+		},
+		"open_questions": {
+			"type": "array",
+			"items": { "type": "string", "minLength": 1 }
+		},
+		"deep_research_recommended": { "type": "boolean" }
+	},
+	"$defs": {
+		"internal_reference": {
+			"type": "object",
+			"additionalProperties": false,
+			"required": ["path", "relevance", "reuse_signal"],
+			"properties": {
+				"path": { "type": "string" },
+				"relevance": { "type": "string" },
+				"reuse_signal": {
+					"type": "string",
+					"enum": ["high", "med", "low", "none"]
+				}
+			}
+		},
+		"external_reference": {
+			"type": "object",
+			"additionalProperties": false,
+			"required": ["url", "source_type", "summary", "evidence_grade"],
+			"properties": {
+				"url": { "type": "string" },
+				"source_type": { "type": "string" },
+				"summary": { "type": "string" },
+				"evidence_grade": {
+					"type": "string",
+					"enum": ["primary", "secondary", "anecdotal"]
+				}
+			}
+		},
+		"solution_pattern": {
+			"type": "object",
+			"additionalProperties": false,
+			"required": ["name", "provenance", "fit", "tradeoffs", "risks"],
+			"properties": {
+				"name": { "type": "string" },
+				"provenance": { "type": "string" },
+				"fit": { "type": "string" },
+				"tradeoffs": {
+					"type": "object",
+					"required": ["pros", "cons"],
+					"properties": {
+						"pros": { "type": "array", "items": { "type": "string" } },
+						"cons": { "type": "array", "items": { "type": "string" } }
+					}
+				},
+				"risks": { "type": "array", "items": { "type": "string" } }
+			}
+		},
+		"similar_implementation": {
+			"type": "object",
+			"additionalProperties": false,
+			"required": ["name", "what_it_solves", "gap_vs_us"],
+			"properties": {
+				"name": { "type": "string" },
+				"what_it_solves": { "type": "string" },
+				"gap_vs_us": { "type": "string" }
+			}
+		},
+		"recommended_approach": {
+			"type": "object",
+			"additionalProperties": false,
+			"required": ["summary", "recommended_approach_confidence"],
+			"properties": {
+				"summary": { "type": "string", "minLength": 1 },
+				"recommended_approach_confidence": {
+					"type": "string",
+					"enum": ["low", "med", "high"]
+				},
+				"confidence_rationale": { "type": "string" },
+				"evidence_refs": {
+					"type": "array",
+					"items": { "type": "string" }
+				}
+			}
+		}
+	}
+}

package/.pi/harness/specs/plan-review-round-draft.schema.json CHANGED Viewed

@@ -13,7 +13,7 @@
 	],
 	"properties": {
 		"schema_version": { "type": "string", "const": "1.0.0" },
-		"round_index": { "type": "integer", "minimum": 1, "maximum": 4 },
+		"round_index": { "type": "integer", "minimum": 1, "maximum": 12 },
 		"debate_round_focus": {
 			"type": "string",
 			"enum": ["spec", "wbs", "schedule", "quality"]

package/.pi/harness/specs/round-result.schema.json CHANGED Viewed

@@ -39,7 +39,7 @@
 		"round_index": {
 			"type": "integer",
 			"minimum": 1,
-			"maximum": 6
+			"maximum": 12
 		},
 		"participants": {
 			"type": "array",
@@ -104,7 +104,9 @@
 			"additionalProperties": false,
 			"required": [
 				"name",
+				"min_focus_rounds",
 				"max_rounds",
+				"max_exchanges_per_round",
 				"round_token_cap",
 				"debate_global_cap"
 			],
@@ -113,8 +115,19 @@
 					"type": "string",
 					"enum": ["aggressive", "plan"]
 				},
+				"min_focus_rounds": {
+					"type": "integer",
+					"minimum": 1
+				},
 				"max_rounds": {
-					"type": "integer"
+					"type": "integer",
+					"minimum": 1,
+					"maximum": 12
+				},
+				"max_exchanges_per_round": {
+					"type": "integer",
+					"minimum": 1,
+					"maximum": 6
 				},
 				"round_token_cap": {
 					"type": "integer"

package/.pi/lib/harness-ui-state.ts CHANGED Viewed

@@ -299,6 +299,98 @@ function createStateFromEntries(entries: unknown[]): HarnessUiState {
 	return state;
 }
+export type HarnessStatusSeverity =
+	| "accent"
+	| "warning"
+	| "error"
+	| "success"
+	| "muted";
+export const HARNESS_PHASE_ORDER: readonly HarnessPhase[] = [
+	"plan",
+	"execute",
+	"evaluate",
+	"adversary",
+	"merge",
+] as const;
+export function formatHarnessPhaseLabel(phase: HarnessPhase): string {
+	switch (phase) {
+		case "plan":
+			return "plan";
+		case "execute":
+			return "build";
+		case "evaluate":
+			return "eval";
+		case "adversary":
+			return "review";
+		case "merge":
+			return "merge";
+	}
+}
+export function nextHarnessPhase(phase: HarnessPhase): HarnessPhase | null {
+	const index = HARNESS_PHASE_ORDER.indexOf(phase);
+	if (index < 0 || index >= HARNESS_PHASE_ORDER.length - 1) return null;
+	return HARNESS_PHASE_ORDER[index + 1] ?? null;
+}
+function truncateStatusCommand(command: string, maxLen = 40): string {
+	if (command.length <= maxLen) return command;
+	return `${command.slice(0, maxLen - 3)}...`;
+}
+export function deriveHarnessStatusHint(state: HarnessUiState): {
+	text: string;
+	severity: HarnessStatusSeverity;
+} {
+	if (state.budgetExhausted) {
+		return { text: "Budget limit reached", severity: "error" };
+	}
+	if (state.testIntegritySeverity === "high") {
+		return { text: "Test integrity issue", severity: "error" };
+	}
+	if (state.policyDecision === "block") {
+		return { text: "Blocked — fix issues first", severity: "error" };
+	}
+	if (
+		state.policyDecision === "human_required" ||
+		state.flowSubstate === "human-required"
+	) {
+		return { text: "Waiting for your input", severity: "warning" };
+	}
+	if (state.nextRecommendedCommand) {
+		return {
+			text: `Next: ${truncateStatusCommand(state.nextRecommendedCommand)}`,
+			severity: "accent",
+		};
+	}
+	if (state.phase === "plan") {
+		if (!state.planApproved) {
+			return { text: "Approve plan to continue", severity: "warning" };
+		}
+		return { text: "Plan approved", severity: "success" };
+	}
+	if (state.policyDecision === "pass") {
+		return { text: "Checks passed", severity: "success" };
+	}
+	if (state.policyDecision === "conditional_pass") {
+		return { text: "Passed with notes", severity: "warning" };
+	}
+	switch (state.phase) {
+		case "execute":
+			return { text: "Implementing changes", severity: "accent" };
+		case "evaluate":
+			return { text: "Running checks", severity: "accent" };
+		case "adversary":
+			return { text: "Review gate", severity: "accent" };
+		case "merge":
+			return { text: "Ready to finish", severity: "accent" };
+		default:
+			return { text: "Planning", severity: "muted" };
+	}
+}
 export class HarnessUiStateStore {
 	private lastEntriesLen = -1;
 	private cachedState: HarnessUiState = {

package/.pi/prompts/harness-plan.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-description: PM-grade harness plan — scouts, ExecutionPlan, DAG validation, Review Gate debate, approval.
+description: PM-grade harness plan — scouts, implementation research, ExecutionPlan, DAG validation, selective Review Gate debate, approval.
 argument-hint: "\"<task>\" [--risk low|med|high] [--budget <amount>] [--quick]"
 ---
@@ -16,6 +16,7 @@ Never `write`/`edit` the final canonical packet except via **`write_harness_yaml
 - `harness/planning/scout-semantic` (skip when `--quick`)
 - `harness/planning/decompose`
 - `harness/planning/hypothesis`
+- `harness/planning/implementation-researcher`
 - `harness/planning/stack-researcher`
 - `harness/planning/execution-plan-author`
 - `harness/planning/hypothesis-validator` (debate R1 only)
@@ -31,7 +32,7 @@ Read **harness-debate-plan** skill before Review Gate rounds.
 1. Use `subagent` with `agentScope: "both"` and parallel `tasks` where lanes are independent.
 2. Each `subagent` call blocks until subprocesses finish — batch parallel scouts in one `tasks` array.
 3. Do **not** set `timeoutMs` unless the user explicitly requests a cap — subagents run until natural completion (optional backstop: `PI_SUBAGENT_TIMEOUT_MS`).
-4. No harness subagent spawn cap — run the full scout + debate pipeline without skipping lanes for budget.
+4. No harness subagent spawn cap — run the full scout + research + debate pipeline without skipping lanes for budget.
 5. Compact task text: embed `HarnessSpawnContext` JSON + lane-specific instructions only.
 ## Step 0 — Parse `$ARGUMENTS`
@@ -39,7 +40,7 @@ Read **harness-debate-plan** skill before Review Gate rounds.
 - task (required)
 - `--risk low|med|high`, `--budget`, `--quick`
-`--quick` skips **scout-semantic** and post-run adversary only — **never** skip graphify, structure, decompose, hypothesis, stack research, execution plan, DAG validation, or **4-round plan debate**.
+`--quick` skips **scout-semantic** and post-run adversary only — **never** skip graphify, structure, decompose, hypothesis, **Phase 3.5 implementation research**, stack research, execution plan, DAG validation, or **Review Gate debate**.
 ## Active plan context
@@ -67,29 +68,44 @@ Add `harness/planning/scout-semantic` to `tasks` unless `--quick`. Require graph
 One `subagent` call with `tasks` for `harness/planning/decompose` and `harness/planning/hypothesis`. Parse `PlanDecompositionBrief` and `PlanHypothesisBrief` from outputs. Persist with `write_harness_yaml` → `artifacts/decomposition.yaml` and `artifacts/hypothesis.yaml`.
-## Phase 4 — Draft shell + fork
+Decompose **prior_art** is **internal only** (from scouts). External prior art arrives in Phase 3.5.
-Build draft `PlanPacket` (`contract_version: "1.1.0"`):
+## Phase 3.5 — External solution research (required)
-- `scope`, `assumptions`, `acceptance_checks`, `risk_level`, `rollback_plan`
-- `execution_plan` placeholder until Phase 4b
+**MUST** run unless you document a `human_required` waiver in the run trace. Parallel batch:
-`ask_user` when `dialectical_fork` is material.
+```json
+{
+  "agentScope": "both",
+  "tasks": [
+    { "agent": "harness/planning/implementation-researcher", "task": "<HarnessSpawnContext + paths to decomposition/hypothesis/scout summaries — patterns/repos/workflows only; no stack version SERPs>" },
+    { "agent": "harness/planning/stack-researcher", "task": "<HarnessSpawnContext + stack research brief — libraries/APIs only>" }
+  ]
+}
+```
-Initialize `research-brief.yaml` with decomposition + hypothesis (`write_harness_yaml`).
+- `write_harness_yaml` → `artifacts/implementation-research.yaml` and `artifacts/stack.yaml`.
+- Merge both into `research-brief.yaml` (`implementation:` + `stack:`).
+- **Partial failure:** if one lane fails, re-spawn that lane once; if still failing set `plan_status: partial` and `human_required` via `ask_user`. Do not proceed to Phase 4b without both artifacts or explicit human waiver.
+- **Web dedup:** implementation owns patterns/repos; stack owns libraries/versions — no overlapping queries.
-## Phase 4a — Stack research
+On `mode: revise`: re-run implementation-researcher when task scope, acceptance_checks, or >30% work_items change; skip when delta is schedule-only and prior artifact is fresh.
-```
-subagent({ agentScope: "both", agent: "harness/planning/stack-researcher", task: "<HarnessSpawnContext + stack research brief>" })
-```
+## Phase 4 — Draft shell
+Build draft `PlanPacket` (`contract_version: "1.1.0"`):
+- `scope`, `assumptions`, `acceptance_checks`, `risk_level`, `rollback_plan`
+- `execution_plan` placeholder until Phase 4b
-`write_harness_yaml` → `artifacts/stack.yaml`; merge into `research-brief.yaml` → `stack`.
+Initialize `research-brief.yaml` with decomposition + hypothesis + Phase 3.5 merges (`write_harness_yaml`).
+**`ask_user` on material `dialectical_fork`** after Phase 3.5 merge (evidence-backed — conflicting external patterns may trigger `human_required` from eligibility).
 ## Phase 4b — Execution plan author
 ```
-subagent({ agentScope: "both", agent: "harness/planning/execution-plan-author", task: "<HarnessSpawnContext + execution plan brief>" })
+subagent({ agentScope: "both", agent: "harness/planning/execution-plan-author", task: "<HarnessSpawnContext + PlanImplementationResearchBrief + PlanStackBrief + decomposition/hypothesis>" })
 ```
 Merge `execution_plan` into draft `plan-packet.yaml` (`write_harness_yaml`). Save `artifacts/execution-plan-draft.yaml` the same way.
@@ -102,37 +118,71 @@ node .pi/scripts/validate-plan-dag.mjs --packet .pi/harness/runs/<run_id>/plan-p
 Must **pass** before debate. On fail: fix via author or parent patches, re-run.
-## Phase 5 — Review Gate debate (4 rounds, pi-messenger, even with `--quick`)
+## Phase 4d — Debate eligibility (before Review Gate)
+```
+harness_plan_debate_eligibility({ risk_level, material_fork, dag_pass: true, ... })
+```
+Pre-debate signals only (no R1 hypothesis output). Default profile **standard** when ambiguous.
-1. `harness_debate_open` (debate id normalized to `plan-<run_id>`; creates `debate-messenger/` inboxes + threads).
-2. Optional: `harness_plan_scope_check` after decomposition — if `material_drift`, `ask_user` before continuing.
-3. For rounds 1–4 (`debate_round_focus`: spec, wbs, schedule, quality):
+If `human_required: true` → `ask_user` before `harness_debate_open`.
+Then:
+```
+harness_debate_open({ debate_profile, required_focuses })
+```
-| Round | Lane spawns (sequential) | Messenger |
-|-------|--------------------------|-----------|
-| 1 | `hypothesis-validator` (blind) → `plan-evaluator` → `plan-adversary` | evaluator `claim` → adversary `rebuttal` (`in_reply_to` claim ids) |
-| 2 | `plan-evaluator` → `plan-adversary` | same |
-| 3 | `plan-evaluator` → `plan-adversary` | same |
-| 4 | `plan-evaluator` → `plan-adversary` → **`sprint-contract-auditor`** | same + audit message optional |
+Profiles:
-Lane YAML + messenger claims/rebuttals are **auto-applied** when each debate subagent completes (`harness-debate-lane-applied` entry). You may also call `harness_debate_apply_lane` if fenced YAML was truncated.
+| Profile | Focuses required | min_focus_rounds |
+|---------|------------------|------------------|
+| full | spec, wbs, schedule, quality | 4 |
+| standard | all four | 4 |
+| light | spec, quality only | 2 |
-Per round (no prose-only turns — **always call a tool**):
+## Phase 5 — Review Gate debate (profile-aware, pi-messenger, even with `--quick`)
-1. Spawn lane agents (evaluator → adversary → integrator; R1/R4 extras per table).
-2. After each subagent: verify `harness-debate-next-step` message or run `harness_debate_round_status({ round_index: N })`.
-3. Before adversary: `harness_messenger_read_round` → include transcript in adversary task.
-4. After integrator: `harness_debate_submit_round({ round_index, integrator_draft })` (writes review-round + bus round + integrate message — **do not** `write_harness_yaml` review-round paths).
+**Forbidden:** parallel `subagent` calls for any debate lane agent in one batch. One lane agent per tool batch, in order.
+1. Optional: `harness_plan_scope_check` — if `material_drift`, `ask_user` before debate.
+2. Drive debate with **`harness_debate_focus_coverage`** and **`harness_debate_round_status({ round_index, debate_round_focus })`** — cover **required_focuses** from eligibility, not always all four.
+### Focus coverage (required before consensus)
+Each required focus must appear in a submitted `review-round-rN.yaml` (`debate_round_focus`). Monotonic `round_index` (cap from profile). Consensus only when:
+- all **required** focuses covered, **and**
+- last round `review_gate_ready: true`, **and**
+- `validate-plan-dag.mjs` still passes (re-run after patches).
+### Per-round state machine
+```
+round_index := next uncovered required focus
+debate_round_focus := spec | wbs | schedule | quality for this round
+IF round_index == 1:
+  spawn hypothesis-validator (blind — no decomposition/PlanPacket/scouts/prior debate)
+WHILE NOT ready_for_integrator (harness_debate_round_status with debate_round_focus):
+  follow next_tool exactly (one subagent per batch)
+  IF debate_round_focus == quality OR round_index >= 4:
+    spawn sprint-contract-auditor
+spawn review-integrator → harness_debate_submit_round({ round_index, integrator_draft })
+harness_debate_focus_coverage  // repeat until missing required focuses empty
+harness_debate_consensus
+```
-5. `harness_debate_consensus` after round 4.
+Debate agents **must not** call `web_search` / `web_fetch` — cite `artifacts/implementation-research.yaml` instead.
-**Never** echo `/harness-debate-*` in bash. **Never** end a turn during Phase 5 with only narration (e.g. "Let me post claims") — the next tool call must be in the **same** assistant message or immediately after `harness-debate-next-step`.
+**Never** end a Phase 5 turn with prose only — next action must be a harness tool or single sequential `subagent`.
-**R1 blind rule:** hypothesis-validator prompt must exclude decomposition, scouts, PlanPacket, prior debate.
+**R1 blind rule:** hypothesis-validator sees only task + `PlanHypothesisBrief`.
 If R1 `revision_recommended` or `relevance.passes === false`: one `hypothesis` re-spawn, update brief, continue.
-**Blockers:** `policy_decision: block` → do not `approve_plan`. `human_required` → `ask_user` before approval.
+**Blockers:** `policy_decision: block` → no `approve_plan`. `human_required` → `ask_user` first.
 ## Phase 5b — Revise packet
@@ -142,7 +192,7 @@ Set `research_brief.eval` from R1 `hypothesis-validator` output.
 ## Phase 6 — Approval + persistence
-1. `approve_plan` with `plan_packet`, `human_summary`, `research_brief` (paths/summaries OK).
+1. `approve_plan` with `plan_packet`, `human_summary`, `research_brief` (include `implementation` section). Missing `artifacts/implementation-research.yaml` → **error** on `--risk high`, **warn** otherwise.
 2. On Approve: `create_plan` with same packet (`contract_version: "1.1.0"` + `execution_plan`).
 3. Confirm `plan_ready: true` → `next_command: /harness-run`.
@@ -152,4 +202,4 @@ Post-execute adversary: `/harness-critic` only (not plan-phase agents).
 - `plan_status`: ready | partial | needs_clarification
 - `plan_review_path` for human review
-- DAG `pass` + 4 debate rounds + consensus not `block` before ready
+- DAG `pass` + required focus areas covered + consensus not `block` before ready

package/.pi/prompts/planning-rubrics.md ADDED Viewed

@@ -0,0 +1,31 @@
+# Planning Review Gate rubrics (spawn fragment)
+Parent includes this file in debate agent spawn text. Stable check ids by `debate_round_focus`.
+## spec
+- SC-01: Every acceptance_check maps to scope or execution_plan work_item
+- SC-02: Out-of-scope work is listed in decomposition `excluded`
+- SC-03: Hypothesis brief falsifiability and success metrics are testable
+- SC-04: Risk register covers top technical unknowns
+## wbs
+- WB-01: Each work_item has typed `done_criteria` (not vague “implement X”)
+- WB-02: No orphan work_items (every item on critical path or sprint_contract)
+- WB-03: `depends_on` is acyclic; parallel_safe only when files disjoint
+- WB-04: wbs_dictionary entry per non-trivial work_item
+## schedule
+- SH-01: `schedule_metadata.critical_path_work_item_ids` is non-empty for med/high risk
+- SH-02: Phase entry/exit criteria are observable
+- SH-03: Milestones align with acceptance_checks dates where stated
+- SH-04: No impossible parallelism (same file, conflicting owners)
+## quality
+- QL-01: sprint_contract.done_criteria_types complete (ADR-020)
+- QL-02: Verify/lint/test work_items in early phases when risk ≥ med
+- QL-03: Checkpoint gaps between phases documented
+- QL-04: Keep Quality Left — no “test at end only” without justification