npm - ultimate-pi - Versions diffs - 0.14.0 → 0.16.0 - Mend

ultimate-pi 0.14.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (92) hide show

package/.pi/extensions/lib/plan-debate-round-status.ts CHANGED Viewed

@@ -5,10 +5,18 @@
 import { constants } from "node:fs";
 import { access } from "node:fs/promises";
 import { join } from "node:path";
-import { type DebateLaneKind, laneArtifactPath } from "./plan-debate-lane.js";
+import { capsForDebate } from "./debate-bus-core.js";
+import {
+	type PlanDebateFocus,
+	readDebateRoundFocus,
+} from "./plan-debate-focus.js";
+import { planDebateIdForRun } from "./plan-debate-id.js";
+import { laneArtifactPath } from "./plan-debate-lane.js";
+import { lanesForRound } from "./plan-debate-lanes.js";
 import {
 	getMessengerRoundState,
-	messengerRoundDebateReady,
+	loadMessengerState,
+	messengerRoundDialogueReady,
 } from "./plan-messenger.js";
 async function exists(path: string): Promise<boolean> {
@@ -20,39 +28,50 @@ async function exists(path: string): Promise<boolean> {
 	}
 }
-function lanesForRound(roundIndex: number): DebateLaneKind[] {
-	const lanes: DebateLaneKind[] = ["validation-turn", "adversary-brief"];
-	if (roundIndex === 1) lanes.unshift("hypothesis-validation");
-	if (roundIndex === 4) lanes.push("sprint-audit");
-	return lanes;
-}
 export interface RoundStatusResult {
 	round_index: number;
-	/** Lane YAML + messenger thread complete; spawn integrator next. */
+	/** Lane YAML + messenger dialogue complete; spawn integrator next. */
 	ready_for_integrator: boolean;
 	/** review-round-rN.yaml on disk (call harness_debate_submit_round if bus not updated). */
 	review_round_on_disk: boolean;
 	missing: string[];
 	next_tool?: string;
 	messenger: { ok: boolean; errors: string[] };
+	dialogue: { ok: boolean; errors: string[] };
+	unresolved_claim_ids: string[];
+	exchange_count: number;
+	debate_round_focus?: PlanDebateFocus | null;
 }
 export async function getPlanDebateRoundStatus(
 	runDir: string,
 	roundIndex: number,
+	runId?: string,
+	opts?: { debate_round_focus?: PlanDebateFocus },
 ): Promise<RoundStatusResult> {
+	const focus =
+		opts?.debate_round_focus ??
+		(await readDebateRoundFocus(runDir, roundIndex));
 	const missing: string[] = [];
-	for (const lane of lanesForRound(roundIndex)) {
+	for (const lane of lanesForRound(roundIndex, focus)) {
 		const rel = laneArtifactPath(lane, roundIndex);
 		if (!(await exists(join(runDir, rel)))) {
 			missing.push(rel);
 		}
 	}
+	const messengerState = await loadMessengerState(runDir);
+	const profile = messengerState?.debate_profile;
+	const caps = capsForDebate(
+		runId ? planDebateIdForRun(runId) : `plan-${runId ?? "unknown"}`,
+		profile,
+	);
 	const roundState = await getMessengerRoundState(runDir, roundIndex);
-	const messenger = messengerRoundDebateReady(roundState, roundIndex === 4);
-	if (!messenger.ok) {
-		missing.push(...messenger.errors.map((e) => `messenger: ${e}`));
+	const dialogueOpts = {
+		max_exchanges_per_round: caps.max_exchanges_per_round,
+	};
+	const dialogue = messengerRoundDialogueReady(roundState, dialogueOpts);
+	if (!dialogue.ok) {
+		missing.push(...dialogue.errors.map((e) => `messenger: ${e}`));
 	}
 	const reviewRound = `artifacts/review-round-r${roundIndex}.yaml`;
 	const reviewRoundOnDisk = await exists(join(runDir, reviewRound));
@@ -62,14 +81,35 @@ export async function getPlanDebateRoundStatus(
 		next_tool = "subagent harness/planning/hypothesis-validator";
 	} else if (missing.some((m) => m.includes("validation-turn"))) {
 		next_tool = "subagent harness/planning/plan-evaluator";
+	} else if (
+		missing.some((m) => m.includes("adversary-brief")) &&
+		!roundState?.evaluator_posted
+	) {
+		next_tool = "subagent harness/planning/plan-evaluator";
 	} else if (missing.some((m) => m.includes("adversary-brief"))) {
 		next_tool =
 			"harness_messenger_read_round then subagent harness/planning/plan-adversary";
 	} else if (missing.some((m) => m.includes("sprint-audit"))) {
 		next_tool = "subagent harness/planning/sprint-contract-auditor";
-	} else if (!messenger.ok) {
+	} else if (
+		roundState &&
+		roundState.evaluator_posted &&
+		!roundState.adversary_posted
+	) {
+		next_tool =
+			"harness_messenger_read_round then subagent harness/planning/plan-adversary";
+	} else if (
+		roundState &&
+		roundState.unresolved_claim_ids.length > 0 &&
+		roundState.exchange_count < caps.max_exchanges_per_round
+	) {
+		const spawnEvaluator = roundState.exchange_count % 2 === 1;
+		next_tool = spawnEvaluator
+			? "harness_debate_advance_thread → harness_messenger_read_round → subagent harness/planning/plan-evaluator (clarification; address unresolved claim_ids)"
+			: "harness_debate_advance_thread → harness_messenger_read_round → subagent harness/planning/plan-adversary (counter or concede)";
+	} else if (!dialogue.ok) {
 		next_tool =
-			"harness_debate_apply_lane (evaluator/adversary) or re-spawn lane agent";
+			"harness_debate_advance_thread or harness_debate_apply_lane (evaluator/adversary)";
 	} else if (!reviewRoundOnDisk) {
 		next_tool =
 			"subagent harness/planning/review-integrator then harness_debate_submit_round";
@@ -78,10 +118,9 @@ export async function getPlanDebateRoundStatus(
 			"harness_debate_submit_round with integrator draft from review-round file";
 	}
+	const laneMissing = missing.filter((m) => !m.startsWith("messenger"));
 	const readyForIntegrator =
-		messenger.ok &&
-		missing.filter((m) => !m.startsWith("messenger")).length === 0 &&
-		!reviewRoundOnDisk;
+		dialogue.ok && laneMissing.length === 0 && !reviewRoundOnDisk;
 	return {
 		round_index: roundIndex,
@@ -89,6 +128,10 @@ export async function getPlanDebateRoundStatus(
 		review_round_on_disk: reviewRoundOnDisk,
 		missing,
 		next_tool,
-		messenger,
+		messenger: dialogue,
+		dialogue,
+		unresolved_claim_ids: roundState?.unresolved_claim_ids ?? [],
+		exchange_count: roundState?.exchange_count ?? 0,
+		debate_round_focus: focus,
 	};
 }

package/.pi/extensions/lib/plan-messenger.ts CHANGED Viewed

@@ -17,11 +17,15 @@ import {
 } from "node:fs/promises";
 import { join } from "node:path";
 import type { DebateParticipant } from "../../lib/debate-orchestrator-types.js";
+import type { DebateProfile } from "./plan-debate-eligibility.js";
+import type { PlanDebateFocus } from "./plan-debate-focus.js";
 export type MessengerMessageKind =
 	| "system"
 	| "claim"
 	| "rebuttal"
+	| "clarification"
+	| "counter"
 	| "integrate"
 	| "audit";
@@ -47,6 +51,8 @@ export interface MessengerRoundState {
 	integrator_posted: boolean;
 	claim_count: number;
 	rebuttal_count: number;
+	exchange_count: number;
+	unresolved_claim_ids: string[];
 }
 export interface MessengerState {
@@ -55,6 +61,8 @@ export interface MessengerState {
 	debate_id: string;
 	opened_at: string;
 	rounds: Record<string, MessengerRoundState>;
+	debate_profile?: DebateProfile;
+	required_focuses?: PlanDebateFocus[];
 }
 function messengerRoot(runDir: string): string {
@@ -71,7 +79,12 @@ function roundKey(roundIndex: number): string {
 export async function initPlanMessenger(
 	runDir: string,
-	opts: { runId: string; debateId: string },
+	opts: {
+		runId: string;
+		debateId: string;
+		debate_profile?: DebateProfile;
+		required_focuses?: PlanDebateFocus[];
+	},
 ): Promise<string> {
 	const root = messengerRoot(runDir);
 	await mkdir(join(root, "inbox"), { recursive: true });
@@ -82,6 +95,8 @@ export async function initPlanMessenger(
 		debate_id: opts.debateId,
 		opened_at: nowIso(),
 		rounds: {},
+		debate_profile: opts.debate_profile,
+		required_focuses: opts.required_focuses,
 	};
 	await writeFile(
 		join(root, "state.json"),
@@ -122,9 +137,51 @@ function defaultRoundState(roundIndex: number): MessengerRoundState {
 		integrator_posted: false,
 		claim_count: 0,
 		rebuttal_count: 0,
+		exchange_count: 0,
+		unresolved_claim_ids: [],
 	};
 }
+/** Recompute exchange + unresolved claim ids from a round transcript. */
+export function syncRoundStateFromTranscript(
+	round: MessengerRoundState,
+	messages: MessengerMessage[],
+): MessengerRoundState {
+	const claimed = new Set<string>();
+	const resolved = new Set<string>();
+	let exchange_count = 0;
+	for (const m of messages) {
+		if (m.from === "PlanEvaluatorAgent" && m.kind === "claim") {
+			round.evaluator_posted = true;
+			round.claim_count += m.claim_ids.length || 1;
+			for (const id of m.claim_ids) claimed.add(id);
+		}
+		if (m.from === "PlanAdversaryAgent" && m.kind === "rebuttal") {
+			round.adversary_posted = true;
+			round.rebuttal_count += m.in_reply_to.length || 1;
+			exchange_count += 1;
+		}
+		if (m.from === "PlanEvaluatorAgent" && m.kind === "clarification") {
+			exchange_count += 1;
+			for (const id of m.claim_ids) resolved.add(id);
+			for (const id of m.in_reply_to) resolved.add(id);
+		}
+		if (m.from === "PlanAdversaryAgent" && m.kind === "counter") {
+			exchange_count += 1;
+			for (const id of m.claim_ids) resolved.add(id);
+			for (const id of m.in_reply_to) resolved.add(id);
+		}
+		if (m.from === "ReviewIntegratorAgent" && m.kind === "integrate") {
+			round.integrator_posted = true;
+		}
+	}
+	round.exchange_count = exchange_count;
+	round.unresolved_claim_ids = [...claimed].filter((id) => !resolved.has(id));
+	return round;
+}
 export async function postMessengerMessage(
 	runDir: string,
 	msg: Omit<MessengerMessage, "schema_version" | "id" | "ts"> & {
@@ -172,19 +229,10 @@ export async function postMessengerMessage(
 		rounds: {},
 	};
 	const key = roundKey(full.round_index);
+	const messages = await readRoundTranscript(runDir, full.round_index);
+	messages.push(full);
 	const round = state.rounds[key] ?? defaultRoundState(full.round_index);
-	if (full.from === "PlanEvaluatorAgent" && full.kind === "claim") {
-		round.evaluator_posted = true;
-		round.claim_count += full.claim_ids.length || 1;
-	}
-	if (full.from === "PlanAdversaryAgent" && full.kind === "rebuttal") {
-		round.adversary_posted = true;
-		round.rebuttal_count += full.in_reply_to.length || 1;
-	}
-	if (full.from === "ReviewIntegratorAgent" && full.kind === "integrate") {
-		round.integrator_posted = true;
-	}
-	state.rounds[key] = round;
+	state.rounds[key] = syncRoundStateFromTranscript(round, messages);
 	await saveMessengerState(runDir, state);
 	return full;
 }
@@ -233,13 +281,22 @@ export async function getMessengerRoundState(
 ): Promise<MessengerRoundState | null> {
 	const state = await loadMessengerState(runDir);
 	if (!state) return null;
-	return state.rounds[roundKey(roundIndex)] ?? null;
+	const round = state.rounds[roundKey(roundIndex)];
+	if (!round) return null;
+	const transcript = await readRoundTranscript(runDir, roundIndex);
+	return syncRoundStateFromTranscript({ ...round }, transcript);
 }
-export function messengerRoundDebateReady(
+export interface MessengerDialogueOptions {
+	max_exchanges_per_round?: number;
+}
+/** Evaluator + adversary dialogue settled; safe to spawn integrator. */
+export function messengerRoundDialogueReady(
 	round: MessengerRoundState | null,
-	_requireSprintAudit: boolean,
+	opts: MessengerDialogueOptions = {},
 ): { ok: boolean; errors: string[] } {
+	const maxExchanges = opts.max_exchanges_per_round ?? 3;
 	const errors: string[] = [];
 	if (!round) {
 		errors.push("no messenger activity for this round");
@@ -257,7 +314,26 @@ export function messengerRoundDebateReady(
 	if (round.rebuttal_count < 1) {
 		errors.push("adversary must rebut at least one claim (in_reply_to)");
 	}
-	if (!round.integrator_posted) {
+	const dialogueSettled =
+		round.unresolved_claim_ids.length === 0 ||
+		round.exchange_count >= maxExchanges;
+	if (!dialogueSettled) {
+		errors.push(
+			`unresolved claims remain (${round.unresolved_claim_ids.join(", ")}) and exchange_count ${round.exchange_count} < ${maxExchanges}`,
+		);
+	}
+	return { ok: errors.length === 0, errors };
+}
+/** Full round ready for harness_debate_submit_round (includes integrator). */
+export function messengerRoundDebateReady(
+	round: MessengerRoundState | null,
+	_requireSprintAudit: boolean,
+	opts: MessengerDialogueOptions = {},
+): { ok: boolean; errors: string[] } {
+	const dialogue = messengerRoundDialogueReady(round, opts);
+	const errors = [...dialogue.errors];
+	if (!round?.integrator_posted) {
 		errors.push(
 			"ReviewIntegratorAgent must post integrate message before bus submit",
 		);

package/.pi/extensions/policy-gate.ts CHANGED Viewed

@@ -243,7 +243,7 @@ export default function policyGate(pi: ExtensionAPI) {
 		const planPhaseHint =
 			state.phase === "plan"
-				? "\nPlan phase: scouts → decompose → hypothesis → stack-researcher → execution-plan-author → validate-plan-dag → 4-round plan debate → approve_plan → create_plan (YAML plan-packet.yaml). Post-execute: /harness-critic."
+				? "\nPlan phase: scouts → decompose → hypothesis → implementation-researcher + stack-researcher → execution-plan-author → validate-plan-dag → debate eligibility + Review Gate → approve_plan → create_plan (YAML plan-packet.yaml). Post-execute: /harness-critic."
 				: "";
 		return {

package/.pi/harness/README.md CHANGED Viewed

@@ -30,7 +30,7 @@ under `.pi/extensions/` and auto-loaded through the package `pi.extensions`
 manifest (`package.json`).
 - `harness-run-context.ts` - active run + plan injection; short commands without run/plan args
-- `harness-live-widget.ts` - footer status (phase, plan ready, next command; no run id in UI)
+- `harness-live-widget.ts` - footer status (current/next phase + plain-language status hint; no run id in UI)
 - `policy-gate.ts` - phase state machine + plan-before-mutate enforcement
 - `budget-guard.ts` - hard-stop token budget checks + budget exhausted artifacts
 - `trace-recorder.ts` - append-only run traces + HarnessRunRecord + compact index

package/.pi/harness/agents.manifest.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
 	"schema_version": "1.0.0",
 	"package": "ultimate-pi",
-	"package_version": "0.13.1",
-	"generated_at": "2026-05-18T17:22:10.311Z",
+	"package_version": "0.15.0",
+	"generated_at": "2026-05-19T12:56:13.369Z",
 	"agents": {
 		"pi-pi/agent-expert": {
 			"path": ".pi/agents/pi-pi/agent-expert.md",
@@ -46,23 +46,23 @@
 		},
 		"harness/adversary": {
 			"path": ".pi/agents/harness/adversary.md",
-			"sha256": "dd2ef87139cb175d795f4d7bde78dca1a181d2e42c3c3bd0d48832cf5069aa29"
+			"sha256": "560c7571ab91478bde1271e9ae6c3a112c3e1d28e1a261c5450fd1d00f9f89af"
 		},
 		"harness/evaluator": {
 			"path": ".pi/agents/harness/evaluator.md",
-			"sha256": "2b8039fd79f9177fdafd5319a53a96812719d4f1f68e2de70632030142649cfe"
+			"sha256": "a4667d3efb305ba2fe79118e3d7d2b0de5e0369637af040d1238161d75cd28ac"
 		},
 		"harness/executor": {
 			"path": ".pi/agents/harness/executor.md",
-			"sha256": "b549e9fc802ba23857a1bc6b2ff36f3c169e708fe5ec13857b3bcfe841384f1f"
+			"sha256": "6baffcc3d89954494ce3ae439175686a39928b6a543a0a451da27475094b1712"
 		},
 		"harness/incident-recorder": {
 			"path": ".pi/agents/harness/incident-recorder.md",
-			"sha256": "d7577c911a9e6c9607eb64f76337aab85c4eb9a92e7cd917eb8d989ef3cd1de5"
+			"sha256": "d42fa45de1a2fe3842d075c6f319315266588942e314f1b650caabac39bdc29a"
 		},
 		"harness/meta-optimizer": {
 			"path": ".pi/agents/harness/meta-optimizer.md",
-			"sha256": "a4eed88084c7cfb5ace3edc72b72d7ead4134b3eae0d444b391decfe2640a632"
+			"sha256": "cbaab35367126796b7136389a02ab41b4fd1fe7098cf83be562d7b7493ccc297"
 		},
 		"harness/sentrux-bootstrap": {
 			"path": ".pi/agents/harness/sentrux-bootstrap.md",
@@ -70,59 +70,63 @@
 		},
 		"harness/tie-breaker": {
 			"path": ".pi/agents/harness/tie-breaker.md",
-			"sha256": "68f02b86e95927f06d7f963e1f61f193159bbef1ba4558d90c84d5457d62b3f7"
+			"sha256": "1c54c1c3274291dea1ea8826563a7ad4fe1d9c4302984e907bfcd22cfc4f5eba"
 		},
 		"harness/trace-librarian": {
 			"path": ".pi/agents/harness/trace-librarian.md",
-			"sha256": "03b499a948b8467f1cfe2b4e63190feb7b8b9d96461055638e774253b9b6b2d4"
+			"sha256": "336b3f3f6141cef8750ab18d29bbe454caf26973830a86afe099d9e4ad8b0abe"
 		},
 		"harness/planning/decompose": {
 			"path": ".pi/agents/harness/planning/decompose.md",
-			"sha256": "1b3f85d956d2e203ec87045a731c47f8b40f75b63fce8916fda91cefc39244a8"
+			"sha256": "0919dafa1d1cd008d513c28524c1e7218867586a138982dccf01db5270c42c73"
 		},
 		"harness/planning/execution-plan-author": {
 			"path": ".pi/agents/harness/planning/execution-plan-author.md",
-			"sha256": "a69fb2e8bda9336e71ce9536071f9c8a2f4abd9d9d88930c6a8be29bdc9c5f62"
+			"sha256": "55ece0f1ee14abd17fe7b3e478b548240f637eacbfc2a34758e98d3878dc82fd"
 		},
 		"harness/planning/hypothesis-validator": {
 			"path": ".pi/agents/harness/planning/hypothesis-validator.md",
-			"sha256": "f75312439c441ccee72692d41f44b6e733df08e06c89e930740fc256bed3ba02"
+			"sha256": "36f0baa7796229f21bd02faf5e70402c7bf054289eab557a25bfbe3cb7781de7"
 		},
 		"harness/planning/hypothesis": {
 			"path": ".pi/agents/harness/planning/hypothesis.md",
-			"sha256": "b20c527d15c2243cd5d3a8f16cea6d44bdfd16e01915d42f3b830bf9938e5f8b"
+			"sha256": "e83d5c4faaee8d32af4a5f22c9917b70a173f3e22d7c0f182b361706f2309171"
+		},
+		"harness/planning/implementation-researcher": {
+			"path": ".pi/agents/harness/planning/implementation-researcher.md",
+			"sha256": "653f320b5d51bb331774246687f24a75347b406bba4e6dfd2968d6e5d4cc8bb3"
 		},
 		"harness/planning/plan-adversary": {
 			"path": ".pi/agents/harness/planning/plan-adversary.md",
-			"sha256": "84c7fa63d38c39e32000c90093688a45bc2b96a2c6209037342222eae0c854f9"
+			"sha256": "3241d7ec939dc29e0af64690b99e9f74b209f40b0daa4a2a1f9ff86f99f94a8d"
 		},
 		"harness/planning/plan-evaluator": {
 			"path": ".pi/agents/harness/planning/plan-evaluator.md",
-			"sha256": "580d8c7a31f7a6ecd9e627460459d600650580b5df63d129278beefd3f3e347c"
+			"sha256": "71660ab58bfcfdfae56c873140d4ea5946ae30cd5719c96afeabfd02b1d1f81d"
 		},
 		"harness/planning/review-integrator": {
 			"path": ".pi/agents/harness/planning/review-integrator.md",
-			"sha256": "cd1e5d10f0cb8b7a4197d2e92489023c285e90e250f1badc371470165aeb8cfd"
+			"sha256": "cf3f0dbe81274ec9ef0ff2e0c170e8dc929b20be65492d0ee9a80d985acf6d71"
 		},
 		"harness/planning/scout-graphify": {
 			"path": ".pi/agents/harness/planning/scout-graphify.md",
-			"sha256": "8a5ff68306a5eedf1a62067ac8812eac4ac1fe2016cba63337ef4e90b5136e00"
+			"sha256": "6e2bda8ad38311810c9916d9dab311873bc776e4b8832bb0e574136e45e1255e"
 		},
 		"harness/planning/scout-semantic": {
 			"path": ".pi/agents/harness/planning/scout-semantic.md",
-			"sha256": "36bd424ebd422bda82bd447b22f591f99f32ec897ea43f385586119da5c26caa"
+			"sha256": "416e518d8204a55b26dc53da1f750865c6f09ee2c7f343b41e7c08da3230c089"
 		},
 		"harness/planning/scout-structure": {
 			"path": ".pi/agents/harness/planning/scout-structure.md",
-			"sha256": "e67b7cd75519e5ae36e1bb5f49ca158888c28d365465863aee50a9b2e8e5b7d7"
+			"sha256": "76c42a15cc74cf1de2cf861cb0146c865c205f69cce7b9605d41893b19600029"
 		},
 		"harness/planning/sprint-contract-auditor": {
 			"path": ".pi/agents/harness/planning/sprint-contract-auditor.md",
-			"sha256": "f613a4fa937d76936fa01155d4e7956a81878f300100f99f6a78915b0af6f7c7"
+			"sha256": "12cb5e6b53dcc19ace62e8e4c152d96440717df53a182e76216dd2327410df4d"
 		},
 		"harness/planning/stack-researcher": {
 			"path": ".pi/agents/harness/planning/stack-researcher.md",
-			"sha256": "90e2ff1348f54bebc8c0392407bf1bb4d794c942fd8d6f342d80b191c945b34e"
+			"sha256": "ce546ef3aca19da7f334f07cef8f510b79068bffeb7f276c428f3e6236bbe96b"
 		}
 	}
 }

package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md CHANGED Viewed

@@ -13,9 +13,7 @@
    - `harness/planning/decompose` — DeepMind-style problem decomposition (`PlanDecompositionBrief`)
    - `harness/planning/hypothesis` — DARWIN hypothesis generation (`PlanHypothesisBrief`)
 2. **Parent maps hypothesis → PlanPacket** — `plan-packet.schema.json` unchanged; execution gating stable.
-3. **Parallel pre-approval reviews:**
-   - `harness/planning/plan-adversary` — execution risk on PlanPacket
-   - `harness/planning/hypothesis-eval` — blind self-eval (task + hypothesis only)
+3. **Review Gate (ADR 0035):** outcome-based debate with `hypothesis-validator` on R1 (blind — task + hypothesis only). Retired `hypothesis-eval` as a separate pre-approval agent.
 4. **`approve_plan` optional `research_brief`** — rendered in `plan-review.md`; not written to `plan-packet.json`.
 5. **`--quick`** still skips semantic scout only; never skips decompose/hypothesis.

package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md CHANGED Viewed

@@ -2,26 +2,34 @@
 ## Status
-Accepted (2026-05-18)
+Accepted (2026-05-18); amended 2026-05-19 (outcome-based debate + ping-pong dialogue)
 ## Context
 `/harness-plan` produced thin PlanPackets (scope + bullets). Post-execute adversarial review (`/harness-critic`) ran too late. Graphify corpus (Structured Planning, ADR-020, Generator–Evaluator) defines WBS, validation, and review gate before baseline.
+Early implementation treated debate as a fixed four-round checklist with single evaluator→adversary exchange per round, which ended debate on round count rather than focus coverage and quality.
 ## Decision
 1. **PlanPacket 1.1.0** — required `execution_plan` (phases, work_items, sprint_contract, dag_validation).
 2. **YAML on disk** — `plan-packet.yaml`, `research-brief.yaml`, `run-context.yaml`, `artifacts/*.yaml`. JSON Schema unchanged; instances validated after YAML parse.
 3. **Review Gate agents** — `stack-researcher`, `execution-plan-author`, debate: `hypothesis-validator`, `plan-evaluator`, `plan-adversary`, `sprint-contract-auditor`, `review-integrator`.
-4. **Debate bus** — `debate_id=plan-<run_id>`, plan budget profile (4 rounds, 12k cap), plan-phase consensus prerequisites.
-5. **No legacy JSON** plan paths; no pre-debate standalone `hypothesis-eval`.
+4. **Debate bus** — `debate_id=plan-<run_id>`, plan budget profile:
+   - `min_focus_rounds=4`, `max_rounds=12`, `max_exchanges_per_round=3`
+   - `round_token_cap=8000`, `debate_global_cap=80000`
+5. **Outcome-based completion** — consensus `adversarial_debate_completed` when all focuses `spec|wbs|schedule|quality` are covered in submitted review rounds, last `review_gate_ready: true`, and parent DAG validation passes (not `round_count >= 4` alone).
+6. **Within-round dialogue** — pi-messenger kinds: `claim`, `rebuttal`, `clarification`, `counter`; parent orchestrates ping-pong via `harness_debate_round_status` / `harness_debate_advance_thread` before integrator.
+7. **Sequential debate spawns** — parent must not parallelize debate lane subagents in one batch.
+8. **No legacy JSON** plan paths; no pre-debate standalone `hypothesis-eval`.
 ## Consequences
-- Positive: PM-grade plans, deterministic DAG gate, blind hypothesis eval in debate R1.
-- Negative: Higher spawn/token cost; `harness-verify` and smoke fixtures must use `.yaml`.
+- Positive: PM-grade plans, deterministic DAG gate, blind hypothesis eval in debate R1, richer evaluator↔adversary threads, extendable round index for partial re-debate.
+- Negative: Higher token cost (80k debate cap vs 12k); parent orchestration more stateful; smoke fixtures must include four `debate_round_focus` values.
 ## References
 - [ADR-0033](0033-parent-orchestrated-planning.md), [ADR-0034](0034-darwin-plan-research-pipeline.md)
 - `raw/decisions/adr-020.md`, `raw/modules/structured-planning.md`
+- `.pi/prompts/planning-rubrics.md`, `.pi/prompts/harness-plan.md` Phase 5

package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md ADDED Viewed

@@ -0,0 +1,51 @@
+# ADR 0036: Implementation research and selective debate
+- **Status:** Accepted
+- **Date:** 2026-05-19
+## Context
+ADR 0034–0035 established Darwin research and outcome-based Review Gate debate. Gaps remained:
+- No dedicated pass for external solution patterns vs in-repo stack selection.
+- Debate always required all four focuses with `min_focus_rounds=4`, even for low-risk tasks.
+- Sprint-contract-auditor spawn in code did not match prompt (quality focus).
+## Decision
+1. **Phase 3.5** — After decompose/hypothesis, parent spawns in parallel:
+   - `harness/planning/implementation-researcher` → `PlanImplementationResearchBrief` → `artifacts/implementation-research.yaml`
+   - `harness/planning/stack-researcher` → `PlanStackBrief` → `artifacts/stack.yaml`
+2. Research stays **outside** debate; debate agents cite artifacts, no web tools.
+3. **Phase 4d** — `harness_plan_debate_eligibility` (pre-debate only) selects `full | standard | light` and `required_focuses`; persisted on messenger + bus at `harness_debate_open`.
+4. **Light profile** — `spec` + `quality` only, `min_focus_rounds=2`, reduced global cap; gate uses stored `required_focuses` (not hardcoded four).
+5. **Sprint auditor** — shared `lanesForRound(roundIndex, focus)` spawns sprint lane when `focus === quality` OR `roundIndex >= 4`.
+6. **`--quick`** still skips semantic scout only; never skips Phase 3.5 or debate.
+## Profiles
+| Profile | When | Focuses | min_focus_rounds |
+|---------|------|---------|-------------------|
+| full | high risk, material fork, open implementation questions, DAG manual patch, many tensions | all four | 4 |
+| standard | default (ambiguous → standard) | all four | 4 |
+| light | low risk, no fork, high-confidence implementation + clear stack primary | spec, quality | 2 |
+## Consequences
+### Positive
+- Better plans on hard tasks (external patterns before WBS).
+- Cheaper low-risk plans (light debate).
+- Deterministic eligibility and gate alignment.
+### Negative
+- Extra subagent per plan (implementation-researcher).
+- Parents must run eligibility before `harness_debate_open`.
+## References
+- `.pi/prompts/harness-plan.md`
+- `.pi/harness/specs/plan-implementation-research-brief.schema.json`
+- `.pi/extensions/lib/plan-debate-eligibility.ts`
+- ADR 0034, ADR 0035

package/.pi/harness/docs/adrs/0037-subagent-submit-tools.md ADDED Viewed

@@ -0,0 +1,31 @@
+# ADR 0037: Subagent submit tools (replace JSON prose contracts)
+**Status:** Accepted
+**Date:** 2026-05-19
+## Context
+Harness plan/execute agents used fenced JSON in `finalOutput`, requiring the parent orchestrator to parse prose and call `write_harness_yaml`. This was fragile (truncated parallel summaries, invalid JSON, double-hop writes).
+Planning agents set `extensions: false` and subprocess spawn used `--no-extensions`, so harness tools were unavailable in children.
+## Decision
+1. **Option A — subprocess-only extension bundle:** vendored spawn passes `--no-extensions -e .pi/extensions/harness-subagent-submit.ts` for `harness/*` agents with `extensions: false`.
+2. **Scoped `submit_*` tools** per agent, validated against `.pi/harness/specs/*.schema.json` (Ajv) and written deterministically under `HARNESS_RUN_DIR`.
+3. **Parent gates** via `harness_artifact_ready` (file existence) instead of parsing subprocess JSON.
+4. **Debate lanes:** `tool_result` hook prefers last `submit_*` in `details.results[].messages`; skips `finalOutput` auto-apply when submit present (`HARNESS_SUBMIT_TOOLS` default on).
+5. **Parent** blocks all `submit_*`; keeps `write_harness_yaml` for merges and debate round submission only.
+## Consequences
+- Agent frontmatter lists one terminal `submit_*` tool per role.
+- `HarnessSpawnContext` must include `run_id` / `run_dir`; bridge sets `HARNESS_RUN_ID`, `HARNESS_RUN_DIR`, `HARNESS_AGENT_ID` on spawn.
+- `parseHarnessAgentJson` retained for migration/tests; hot path is tool args.
+- See ADR 0038 for budget telemetry-only default.
+## References
+- `.pi/extensions/harness-subagent-submit.ts`
+- `.pi/extensions/lib/harness-subagent-submit-registry.ts`
+- `.pi/harness/specs/plan-scout-findings.schema.json`

package/.pi/harness/docs/adrs/0038-budget-telemetry-only.md ADDED Viewed

@@ -0,0 +1,23 @@
+# ADR 0038: Budget enforcement telemetry-only (default)
+**Status:** Accepted
+**Date:** 2026-05-19
+## Context
+Token and debate caps emitted `harness-budget-exhausted`, which set `budgetExhausted` in the live widget and blocked flows even when `HARNESS_BUDGET_HARD_STOP` was false. `max_rounds` and messenger exchange limits in `validatePlanDebateGate` also hard-failed approval.
+## Decision
+- **`HARNESS_BUDGET_ENFORCE` default `off`:** phase/debate caps log `harness-budget-soft-limit` and `harness-budget-telemetry` only; `harness-budget-exhausted` is emitted only when enforce is on **and** hard-stop flags are set.
+- **UI:** `budgetExhausted` / blocked substate only when blocking exhaustion events qualify.
+- **Debate:** `capsForDebate` uses sentinel caps when enforce is off; `max_rounds` gate errors become warnings.
+- **CLI:** `--budget` on harness prompts is reserved/no-op until a real budget story ships.
+Re-enable: `HARNESS_BUDGET_ENFORCE=1` plus `HARNESS_BUDGET_HARD_STOP` / `HARNESS_DEBATE_HARD_STOP` as needed.
+## Consequences
+- Long debates and large plans are not blocked by soft token telemetry.
+- Quality gates (`min_focus_rounds`, required focuses, `review_gate_ready`) remain enforced.
+- PostHog should prefer `harness_budget_telemetry` over exhausted for dashboards until enforce returns.

package/.pi/harness/docs/adrs/README.md CHANGED Viewed

@@ -20,6 +20,10 @@ Team-shared ADRs for the ultimate-pi harness live under `.pi/harness/docs/adrs/`
 | [0032](0032-harness-command-orchestration.md) | Harness commands as agent orchestrators | Accepted |
 | [0033](0033-parent-orchestrated-planning.md) | Parent-orchestrated harness planning | Accepted |
 | [0034](0034-darwin-plan-research-pipeline.md) | Darwin plan research pipeline | Accepted |
+| [0035](0035-plan-phase-review-gate.md) | Plan-phase Review Gate | Accepted |
+| [0036](0036-implementation-research-and-selective-debate.md) | Implementation research and selective debate | Accepted |
+| [0037](0037-subagent-submit-tools.md) | Subagent submit tools (subprocess extension) | Accepted |
+| [0038](0038-budget-telemetry-only.md) | Budget caps telemetry-only by default | Accepted |
 ## Template