npm - ultimate-pi - Versions diffs - 0.15.0 → 0.17.0 - Mend

ultimate-pi 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

package/.pi/extensions/lib/plan-debate-round-status.ts CHANGED Viewed

@@ -7,12 +7,15 @@ import { access } from "node:fs/promises";
 import { join } from "node:path";
 import { capsForDebate } from "./debate-bus-core.js";
 import {
-	type PlanDebateFocus,
+	type PlanDebateRoundFocus,
 	readDebateRoundFocus,
 } from "./plan-debate-focus.js";
 import { planDebateIdForRun } from "./plan-debate-id.js";
 import { laneArtifactPath } from "./plan-debate-lane.js";
-import { lanesForRound } from "./plan-debate-lanes.js";
+import {
+	lanesForConsolidatedRound,
+	lanesForRound,
+} from "./plan-debate-lanes.js";
 import {
 	getMessengerRoundState,
 	loadMessengerState,
@@ -40,26 +43,32 @@ export interface RoundStatusResult {
 	dialogue: { ok: boolean; errors: string[] };
 	unresolved_claim_ids: string[];
 	exchange_count: number;
-	debate_round_focus?: PlanDebateFocus | null;
+	debate_round_focus?: PlanDebateRoundFocus | null;
 }
 export async function getPlanDebateRoundStatus(
 	runDir: string,
 	roundIndex: number,
 	runId?: string,
-	opts?: { debate_round_focus?: PlanDebateFocus },
+	opts?: { debate_round_focus?: PlanDebateRoundFocus },
 ): Promise<RoundStatusResult> {
+	const messengerState = await loadMessengerState(runDir);
+	const consolidated =
+		messengerState?.review_gate_mode === "consolidated" && roundIndex === 1;
 	const focus =
 		opts?.debate_round_focus ??
+		(consolidated ? ("all" as PlanDebateRoundFocus) : null) ??
 		(await readDebateRoundFocus(runDir, roundIndex));
 	const missing: string[] = [];
-	for (const lane of lanesForRound(roundIndex, focus)) {
+	const laneList = consolidated
+		? lanesForConsolidatedRound()
+		: lanesForRound(roundIndex, focus);
+	for (const lane of laneList) {
 		const rel = laneArtifactPath(lane, roundIndex);
 		if (!(await exists(join(runDir, rel)))) {
 			missing.push(rel);
 		}
 	}
-	const messengerState = await loadMessengerState(runDir);
 	const profile = messengerState?.debate_profile;
 	const caps = capsForDebate(
 		runId ? planDebateIdForRun(runId) : `plan-${runId ?? "unknown"}`,
@@ -73,7 +82,9 @@ export async function getPlanDebateRoundStatus(
 	if (!dialogue.ok) {
 		missing.push(...dialogue.errors.map((e) => `messenger: ${e}`));
 	}
-	const reviewRound = `artifacts/review-round-r${roundIndex}.yaml`;
+	const reviewRound = consolidated
+		? "artifacts/review-round-consolidated.yaml"
+		: `artifacts/review-round-r${roundIndex}.yaml`;
 	const reviewRoundOnDisk = await exists(join(runDir, reviewRound));
 	let next_tool: string | undefined;

package/.pi/extensions/lib/plan-messenger.ts CHANGED Viewed

@@ -63,6 +63,8 @@ export interface MessengerState {
 	rounds: Record<string, MessengerRoundState>;
 	debate_profile?: DebateProfile;
 	required_focuses?: PlanDebateFocus[];
+	/** consolidated = single Review Gate round; threaded = per-focus rounds */
+	review_gate_mode?: "consolidated" | "threaded";
 }
 function messengerRoot(runDir: string): string {
@@ -84,6 +86,7 @@ export async function initPlanMessenger(
 		debateId: string;
 		debate_profile?: DebateProfile;
 		required_focuses?: PlanDebateFocus[];
+		review_gate_mode?: "consolidated" | "threaded";
 	},
 ): Promise<string> {
 	const root = messengerRoot(runDir);
@@ -97,6 +100,7 @@ export async function initPlanMessenger(
 		rounds: {},
 		debate_profile: opts.debate_profile,
 		required_focuses: opts.required_focuses,
+		review_gate_mode: opts.review_gate_mode,
 	};
 	await writeFile(
 		join(root, "state.json"),

package/.pi/extensions/lib/plan-review-gate.ts ADDED Viewed

@@ -0,0 +1,51 @@
+/**
+ * Consolidated vs threaded Review Gate strategy for plan-phase debate.
+ */
+import type {
+	DebateEligibilityResult,
+	PlanReviewGateStrategy,
+} from "./plan-debate-eligibility.js";
+import type { PlanDebateFocus } from "./plan-debate-focus.js";
+export type { PlanReviewGateStrategy };
+export const CONSOLIDATED_REVIEW_ROUND = 1;
+export const CONSOLIDATED_REVIEW_ARTIFACT =
+	"artifacts/review-round-consolidated.yaml";
+export function planReviewGateStrategyFromEligibility(
+	eligibility: DebateEligibilityResult,
+): PlanReviewGateStrategy {
+	return (
+		eligibility.review_gate_strategy ?? {
+			mode: eligibility.profile === "fast" ? "consolidated" : "threaded",
+			profile: eligibility.profile,
+			required_focuses: [...eligibility.required_focuses],
+			min_focus_rounds: eligibility.min_focus_rounds,
+			max_rounds: eligibility.max_rounds,
+			max_exchanges_per_round: eligibility.max_exchanges_per_round,
+			round_token_cap: eligibility.round_token_cap,
+			debate_global_cap: eligibility.debate_global_cap,
+			rationale: [...eligibility.rationale],
+		}
+	);
+}
+export function isConsolidatedReviewStrategy(
+	strategy: PlanReviewGateStrategy,
+): boolean {
+	return strategy.mode === "consolidated";
+}
+/** Focus areas covered in a single consolidated review round (spec + quality gate). */
+export const CONSOLIDATED_REVIEW_FOCUS_AREAS: readonly PlanDebateFocus[] = [
+	"spec",
+	"quality",
+];
+export function consolidatedReviewFocusesSatisfied(
+	covered: readonly string[],
+): boolean {
+	return CONSOLIDATED_REVIEW_FOCUS_AREAS.every((f) => covered.includes(f));
+}

package/.pi/extensions/trace-recorder.ts CHANGED Viewed

@@ -235,6 +235,7 @@ export default function traceRecorder(pi: ExtensionAPI) {
 		if (shouldEmitStarted) {
 			captureHarnessEvent(sessionId, "harness_run_started", {
 				harness_run_id: runId,
+				run_id: runId,
 				harness_plan_id: activeRun.planId,
 				harness_phase: activeRun.phase,
 				pi_session_id: sessionId,

package/.pi/harness/agents.manifest.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
 	"schema_version": "1.0.0",
 	"package": "ultimate-pi",
-	"package_version": "0.14.0",
-	"generated_at": "2026-05-19T10:53:28.359Z",
+	"package_version": "0.15.0",
+	"generated_at": "2026-05-19T12:56:13.369Z",
 	"agents": {
 		"pi-pi/agent-expert": {
 			"path": ".pi/agents/pi-pi/agent-expert.md",
@@ -46,23 +46,23 @@
 		},
 		"harness/adversary": {
 			"path": ".pi/agents/harness/adversary.md",
-			"sha256": "dd2ef87139cb175d795f4d7bde78dca1a181d2e42c3c3bd0d48832cf5069aa29"
+			"sha256": "560c7571ab91478bde1271e9ae6c3a112c3e1d28e1a261c5450fd1d00f9f89af"
 		},
 		"harness/evaluator": {
 			"path": ".pi/agents/harness/evaluator.md",
-			"sha256": "2b8039fd79f9177fdafd5319a53a96812719d4f1f68e2de70632030142649cfe"
+			"sha256": "a4667d3efb305ba2fe79118e3d7d2b0de5e0369637af040d1238161d75cd28ac"
 		},
 		"harness/executor": {
 			"path": ".pi/agents/harness/executor.md",
-			"sha256": "b549e9fc802ba23857a1bc6b2ff36f3c169e708fe5ec13857b3bcfe841384f1f"
+			"sha256": "6baffcc3d89954494ce3ae439175686a39928b6a543a0a451da27475094b1712"
 		},
 		"harness/incident-recorder": {
 			"path": ".pi/agents/harness/incident-recorder.md",
-			"sha256": "d7577c911a9e6c9607eb64f76337aab85c4eb9a92e7cd917eb8d989ef3cd1de5"
+			"sha256": "d42fa45de1a2fe3842d075c6f319315266588942e314f1b650caabac39bdc29a"
 		},
 		"harness/meta-optimizer": {
 			"path": ".pi/agents/harness/meta-optimizer.md",
-			"sha256": "a4eed88084c7cfb5ace3edc72b72d7ead4134b3eae0d444b391decfe2640a632"
+			"sha256": "cbaab35367126796b7136389a02ab41b4fd1fe7098cf83be562d7b7493ccc297"
 		},
 		"harness/sentrux-bootstrap": {
 			"path": ".pi/agents/harness/sentrux-bootstrap.md",
@@ -70,63 +70,63 @@
 		},
 		"harness/tie-breaker": {
 			"path": ".pi/agents/harness/tie-breaker.md",
-			"sha256": "68f02b86e95927f06d7f963e1f61f193159bbef1ba4558d90c84d5457d62b3f7"
+			"sha256": "1c54c1c3274291dea1ea8826563a7ad4fe1d9c4302984e907bfcd22cfc4f5eba"
 		},
 		"harness/trace-librarian": {
 			"path": ".pi/agents/harness/trace-librarian.md",
-			"sha256": "03b499a948b8467f1cfe2b4e63190feb7b8b9d96461055638e774253b9b6b2d4"
+			"sha256": "336b3f3f6141cef8750ab18d29bbe454caf26973830a86afe099d9e4ad8b0abe"
 		},
 		"harness/planning/decompose": {
 			"path": ".pi/agents/harness/planning/decompose.md",
-			"sha256": "5c3b983772d013741d50f39945bc77f178aa338aecab56b93c09216d72192c69"
+			"sha256": "0919dafa1d1cd008d513c28524c1e7218867586a138982dccf01db5270c42c73"
 		},
 		"harness/planning/execution-plan-author": {
 			"path": ".pi/agents/harness/planning/execution-plan-author.md",
-			"sha256": "16f8800c50bcaf1b82ed9138889c8a0e538ee6a139aeae129ccd20cec2ec25f7"
+			"sha256": "55ece0f1ee14abd17fe7b3e478b548240f637eacbfc2a34758e98d3878dc82fd"
 		},
 		"harness/planning/hypothesis-validator": {
 			"path": ".pi/agents/harness/planning/hypothesis-validator.md",
-			"sha256": "9e68ec5d6aef96a3666c30227c3cbddf1aaed1182fdc94dbbd21ad3d48315ff2"
+			"sha256": "36f0baa7796229f21bd02faf5e70402c7bf054289eab557a25bfbe3cb7781de7"
 		},
 		"harness/planning/hypothesis": {
 			"path": ".pi/agents/harness/planning/hypothesis.md",
-			"sha256": "b20c527d15c2243cd5d3a8f16cea6d44bdfd16e01915d42f3b830bf9938e5f8b"
+			"sha256": "e83d5c4faaee8d32af4a5f22c9917b70a173f3e22d7c0f182b361706f2309171"
 		},
 		"harness/planning/implementation-researcher": {
 			"path": ".pi/agents/harness/planning/implementation-researcher.md",
-			"sha256": "dbd1c4fc74d538b110d406febfd4603eebea77d82e8b367df4596ac7ff6e54cc"
+			"sha256": "653f320b5d51bb331774246687f24a75347b406bba4e6dfd2968d6e5d4cc8bb3"
 		},
 		"harness/planning/plan-adversary": {
 			"path": ".pi/agents/harness/planning/plan-adversary.md",
-			"sha256": "7c14eaab65f356003ee2ff380f5d4e620170b5126daa67c3d226b12342f47bd2"
+			"sha256": "3241d7ec939dc29e0af64690b99e9f74b209f40b0daa4a2a1f9ff86f99f94a8d"
 		},
 		"harness/planning/plan-evaluator": {
 			"path": ".pi/agents/harness/planning/plan-evaluator.md",
-			"sha256": "846575abe9df3e7e5be812c0c474989c1a9de8074a7884d77b9d3dd423643480"
+			"sha256": "71660ab58bfcfdfae56c873140d4ea5946ae30cd5719c96afeabfd02b1d1f81d"
 		},
 		"harness/planning/review-integrator": {
 			"path": ".pi/agents/harness/planning/review-integrator.md",
-			"sha256": "bed43f3f049c279ac50a24bcffac1bbe46a8605d89c9cc6d0c3c6a87d488b1b8"
+			"sha256": "cf3f0dbe81274ec9ef0ff2e0c170e8dc929b20be65492d0ee9a80d985acf6d71"
 		},
 		"harness/planning/scout-graphify": {
 			"path": ".pi/agents/harness/planning/scout-graphify.md",
-			"sha256": "7f385d5bda2fe04b9da52cb4cb9247324efd345579b483d3ad55a6abefad50d5"
+			"sha256": "6e2bda8ad38311810c9916d9dab311873bc776e4b8832bb0e574136e45e1255e"
 		},
 		"harness/planning/scout-semantic": {
 			"path": ".pi/agents/harness/planning/scout-semantic.md",
-			"sha256": "36bd424ebd422bda82bd447b22f591f99f32ec897ea43f385586119da5c26caa"
+			"sha256": "416e518d8204a55b26dc53da1f750865c6f09ee2c7f343b41e7c08da3230c089"
 		},
 		"harness/planning/scout-structure": {
 			"path": ".pi/agents/harness/planning/scout-structure.md",
-			"sha256": "e67b7cd75519e5ae36e1bb5f49ca158888c28d365465863aee50a9b2e8e5b7d7"
+			"sha256": "76c42a15cc74cf1de2cf861cb0146c865c205f69cce7b9605d41893b19600029"
 		},
 		"harness/planning/sprint-contract-auditor": {
 			"path": ".pi/agents/harness/planning/sprint-contract-auditor.md",
-			"sha256": "d915274dc9b5addae5499bc2390b348eddeb8f133b526a816e23d0d19a2618bf"
+			"sha256": "12cb5e6b53dcc19ace62e8e4c152d96440717df53a182e76216dd2327410df4d"
 		},
 		"harness/planning/stack-researcher": {
 			"path": ".pi/agents/harness/planning/stack-researcher.md",
-			"sha256": "fa228920abe2b66d4d8921c4a5d85593e3019a24bbe9ae512ed9149f235e3536"
+			"sha256": "ce546ef3aca19da7f334f07cef8f510b79068bffeb7f276c428f3e6236bbe96b"
 		}
 	}
 }

package/.pi/harness/docs/adrs/0037-subagent-submit-tools.md ADDED Viewed

@@ -0,0 +1,31 @@
+# ADR 0037: Subagent submit tools (replace JSON prose contracts)
+**Status:** Accepted
+**Date:** 2026-05-19
+## Context
+Harness plan/execute agents used fenced JSON in `finalOutput`, requiring the parent orchestrator to parse prose and call `write_harness_yaml`. This was fragile (truncated parallel summaries, invalid JSON, double-hop writes).
+Planning agents set `extensions: false` and subprocess spawn used `--no-extensions`, so harness tools were unavailable in children.
+## Decision
+1. **Option A — subprocess-only extension bundle:** vendored spawn passes `--no-extensions -e .pi/extensions/harness-subagent-submit.ts` for `harness/*` agents with `extensions: false`.
+2. **Scoped `submit_*` tools** per agent, validated against `.pi/harness/specs/*.schema.json` (Ajv) and written deterministically under `HARNESS_RUN_DIR`.
+3. **Parent gates** via `harness_artifact_ready` (file existence) instead of parsing subprocess JSON.
+4. **Debate lanes:** `tool_result` hook prefers last `submit_*` in `details.results[].messages`; skips `finalOutput` auto-apply when submit present (`HARNESS_SUBMIT_TOOLS` default on).
+5. **Parent** blocks all `submit_*`; keeps `write_harness_yaml` for merges and debate round submission only.
+## Consequences
+- Agent frontmatter lists one terminal `submit_*` tool per role.
+- `HarnessSpawnContext` must include `run_id` / `run_dir`; bridge sets `HARNESS_RUN_ID`, `HARNESS_RUN_DIR`, `HARNESS_AGENT_ID` on spawn.
+- `parseHarnessAgentJson` retained for migration/tests; hot path is tool args.
+- See ADR 0038 for budget telemetry-only default.
+## References
+- `.pi/extensions/harness-subagent-submit.ts`
+- `.pi/extensions/lib/harness-subagent-submit-registry.ts`
+- `.pi/harness/specs/plan-scout-findings.schema.json`

package/.pi/harness/docs/adrs/0038-budget-telemetry-only.md ADDED Viewed

@@ -0,0 +1,23 @@
+# ADR 0038: Budget enforcement telemetry-only (default)
+**Status:** Accepted
+**Date:** 2026-05-19
+## Context
+Token and debate caps emitted `harness-budget-exhausted`, which set `budgetExhausted` in the live widget and blocked flows even when `HARNESS_BUDGET_HARD_STOP` was false. `max_rounds` and messenger exchange limits in `validatePlanDebateGate` also hard-failed approval.
+## Decision
+- **`HARNESS_BUDGET_ENFORCE` default `off`:** phase/debate caps log `harness-budget-soft-limit` and `harness-budget-telemetry` only; `harness-budget-exhausted` is emitted only when enforce is on **and** hard-stop flags are set.
+- **UI:** `budgetExhausted` / blocked substate only when blocking exhaustion events qualify.
+- **Debate:** `capsForDebate` uses sentinel caps when enforce is off; `max_rounds` gate errors become warnings.
+- **CLI:** `--budget` on harness prompts is reserved/no-op until a real budget story ships.
+Re-enable: `HARNESS_BUDGET_ENFORCE=1` plus `HARNESS_BUDGET_HARD_STOP` / `HARNESS_DEBATE_HARD_STOP` as needed.
+## Consequences
+- Long debates and large plans are not blocked by soft token telemetry.
+- Quality gates (`min_focus_rounds`, required focuses, `review_gate_ready`) remain enforced.
+- PostHog should prefer `harness_budget_telemetry` over exhausted for dashboards until enforce returns.

package/.pi/harness/docs/adrs/README.md CHANGED Viewed

@@ -22,6 +22,8 @@ Team-shared ADRs for the ultimate-pi harness live under `.pi/harness/docs/adrs/`
 | [0034](0034-darwin-plan-research-pipeline.md) | Darwin plan research pipeline | Accepted |
 | [0035](0035-plan-phase-review-gate.md) | Plan-phase Review Gate | Accepted |
 | [0036](0036-implementation-research-and-selective-debate.md) | Implementation research and selective debate | Accepted |
+| [0037](0037-subagent-submit-tools.md) | Subagent submit tools (subprocess extension) | Accepted |
+| [0038](0038-budget-telemetry-only.md) | Budget caps telemetry-only by default | Accepted |
 ## Template

package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/implementation-research.yaml ADDED Viewed

@@ -0,0 +1,28 @@
+schema_version: "1.0.0"
+problem_framing: Validate harness plan-phase with fixture-driven smoke
+sub_problems:
+  - DAG validation
+  - Debate gate coverage
+internal_references:
+  - path: .pi/harness/evals/smoke/smoke-harness-plan.mjs
+    relevance: Existing smoke pattern
+    reuse_signal: high
+external_references: []
+solution_patterns:
+  - name: fixture-driven gate
+    provenance: in-repo smoke
+    fit: Validates plan pipeline without live agents
+    tradeoffs:
+      pros: [Deterministic CI]
+      cons: []
+    risks: []
+similar_implementations: []
+recommended_approach:
+  summary: Extend minimal-med fixture with implementation artifact
+  recommended_approach_confidence: high
+  confidence_rationale: Reuses established smoke-harness-plan pattern
+  evidence_refs:
+    - .pi/harness/evals/smoke/smoke-harness-plan.mjs
+    - .pi/scripts/validate-plan-dag.mjs
+anti_patterns: []
+open_questions: []

package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/review-round-consolidated.yaml ADDED Viewed

@@ -0,0 +1,25 @@
+schema_version: "1.0.0"
+round_index: 1
+debate_round_focus: all
+round_summary: Consolidated review gate for fast profile fixture
+validation_summary: Spec and quality checks pass in one round
+adversary_summary: No blockers
+disputes: []
+recommended_packet_patches: []
+review_gate_ready: true
+participants:
+  - PlanEvaluatorAgent
+  - PlanAdversaryAgent
+  - SprintContractAuditorAgent
+  - ReviewIntegratorAgent
+claims:
+  - consolidated review gate ready
+rebuttals: []
+evidence_refs: []
+token_usage:
+  per_agent:
+    PlanEvaluatorAgent: 120
+    PlanAdversaryAgent: 100
+    SprintContractAuditorAgent: 80
+  round_total: 300
+consensus_delta: 0.1

package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-packet.yaml ADDED Viewed

@@ -0,0 +1,196 @@
+schema_version: "1.0.0"
+contract_version: "1.1.0"
+plan_id: plan-smoke-fixture-001
+task_id: task-smoke-001
+scope: Smoke fixture for plan-phase harness validation with execution_plan and debate artifacts.
+assumptions:
+  - Fixture only; no live agent run
+risk_level: med
+acceptance_checks:
+  - id: AC-1
+    description: DAG validation passes
+  - id: AC-2
+    description: Consolidated debate round recorded (fast profile)
+  - id: AC-3
+    description: Stack brief present in research-brief
+  - id: AC-4
+    description: Sprint contract complete
+  - id: AC-5
+    description: plan-review.md renders
+rollback_plan:
+  revert_commit_ready: true
+  rollback_artifacts:
+    revert_command: git revert HEAD
+    revert_branch: main
+    patch_bundle: .pi/harness/runs/smoke-fixture/patch.bundle
+execution_plan:
+  schema_version: "1.0.0"
+  phases:
+    - phase_id: P1
+      name: Foundation
+      objective: Establish baseline and verify harness wiring
+      entry_criteria:
+        - Fixture loaded
+      exit_criteria:
+        - AC-1 satisfied
+      milestone: M1-baseline
+      work_item_ids: [WI-1, WI-2, WI-3]
+    - phase_id: P2
+      name: Build
+      objective: Implement core changes
+      entry_criteria:
+        - M1-baseline complete
+      exit_criteria:
+        - AC-2 satisfied
+      milestone: M2-build
+      work_item_ids: [WI-4, WI-5, WI-6]
+    - phase_id: P3
+      name: Verify
+      objective: Quality gate and documentation
+      entry_criteria:
+        - M2-build complete
+      exit_criteria:
+        - AC-5 satisfied
+      milestone: M3-ship
+      work_item_ids: [WI-7, WI-8]
+  work_items:
+    - work_item_id: WI-1
+      phase_id: P1
+      title: Load fixture packet
+      description: Read plan-packet.yaml from fixture directory
+      depends_on: []
+      files:
+        - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-packet.yaml
+      parallel_safe: true
+      done_criteria:
+        type: manual
+        spec: Fixture packet readable
+      acceptance_check_ids: [AC-1]
+    - work_item_id: WI-2
+      phase_id: P1
+      title: Run DAG validator
+      description: Execute validate-plan-dag.mjs
+      depends_on: [WI-1]
+      files:
+        - .pi/scripts/validate-plan-dag.mjs
+      parallel_safe: false
+      done_criteria:
+        type: command
+        spec: node .pi/scripts/validate-plan-dag.mjs --packet plan-packet.yaml
+      acceptance_check_ids: [AC-1]
+    - work_item_id: WI-3
+      phase_id: P1
+      title: Lint harness-yaml
+      description: Ensure YAML helpers parse fixture
+      depends_on: [WI-1]
+      files:
+        - .pi/lib/harness-yaml.ts
+      parallel_safe: true
+      done_criteria:
+        type: lint
+        spec: npm test
+      acceptance_check_ids: [AC-1]
+    - work_item_id: WI-4
+      phase_id: P2
+      title: Debate round 1-2 artifacts
+      description: Validate review-round YAML
+      depends_on: [WI-2]
+      files:
+        - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r1.yaml
+      parallel_safe: false
+      done_criteria:
+        type: artifact
+        spec: artifacts/review-round-r1.yaml exists
+      acceptance_check_ids: [AC-2]
+    - work_item_id: WI-5
+      phase_id: P2
+      title: Debate round 3-4 artifacts
+      description: Validate final review round
+      depends_on: [WI-4]
+      files:
+        - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml
+      parallel_safe: false
+      done_criteria:
+        type: artifact
+        spec: artifacts/review-round-r4.yaml exists
+      acceptance_check_ids: [AC-2]
+    - work_item_id: WI-6
+      phase_id: P2
+      title: Stack research merge
+      description: research-brief includes stack section
+      depends_on: [WI-2]
+      files: []
+      non_code: true
+      parallel_safe: true
+      done_criteria:
+        type: manual
+        spec: research-brief.yaml contains stack key
+      acceptance_check_ids: [AC-3]
+    - work_item_id: WI-7
+      phase_id: P3
+      title: Sprint contract audit
+      description: R4 sprint audit artifact
+      depends_on: [WI-5]
+      files:
+        - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/sprint-audit-r4.yaml
+      parallel_safe: false
+      done_criteria:
+        type: artifact
+        spec: sprint-audit-r4.yaml present
+      acceptance_check_ids: [AC-4]
+    - work_item_id: WI-8
+      phase_id: P3
+      title: Render plan-review
+      description: Human-readable plan review markdown
+      depends_on: [WI-7]
+      files:
+        - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-review.md
+      parallel_safe: false
+      done_criteria:
+        type: manual
+        spec: plan-review.md non-empty
+      acceptance_check_ids: [AC-5]
+  sprint_contract:
+    in_scope:
+      - Fixture validation only
+    out_of_scope:
+      - Production deploy
+    definition_of_done: All smoke checks green
+    assumptions:
+      - CI environment has node
+    external_dependencies: []
+  wbs_dictionary:
+    - work_item_id: WI-1
+      deliverable: Fixture packet loaded
+      owner_role: executor
+      inputs: []
+      outputs: [parsed packet]
+  risk_register:
+    - risk_id: R1
+      description: DAG validator false negative
+      likelihood: low
+      impact: high
+      mitigation: Unit tests on validate-plan-dag.mjs
+      linked_work_item_ids: [WI-2]
+    - risk_id: R2
+      description: Debate cap misconfiguration
+      likelihood: med
+      impact: med
+      mitigation: debate-orchestrator plan profile tests
+      linked_work_item_ids: [WI-4]
+    - risk_id: R3
+      description: YAML parse drift
+      likelihood: low
+      impact: med
+      mitigation: harness-yaml strict parse
+      linked_work_item_ids: [WI-3]
+  schedule_metadata:
+    critical_path_work_item_ids: [WI-1, WI-2, WI-4, WI-5, WI-7, WI-8]
+    parallel_groups:
+      - [WI-1, WI-3]
+    schedule_baseline_note: Fixture topological order; no calendar dates
+  dag_validation:
+    status: pass
+    topological_order: [WI-1, WI-2, WI-3, WI-4, WI-5, WI-6, WI-7, WI-8]
+    cycles: []
+    conflicts: []

package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-review.md ADDED Viewed

@@ -0,0 +1,14 @@
+# Plan review (fixture)
+plan_id: plan-smoke-fixture-001
+## Execution plan
+Phases: P1 Foundation → P2 Build → P3 Verify
+Critical path: WI-1 → WI-2 → WI-4 → WI-5 → WI-7 → WI-8
+## Debate
+- Round 1 (spec): review_gate_ready
+- Round 4 (quality): review_gate_ready

package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/research-brief.yaml ADDED Viewed

@@ -0,0 +1,62 @@
+decomposition:
+  schema_version: "1.0.0"
+  problem_restatement: Light-profile smoke for two-focus debate
+hypothesis:
+  schema_version: "1.0.0"
+  primary:
+    claim: Light debate covers spec and quality only
+    mechanism: Eligibility profile light with min_focus_rounds 2
+    prediction: planDebateOutcomeComplete passes with two rounds
+    experiment: Run smoke-harness-plan.mjs --fixture minimal-low-light
+implementation:
+  schema_version: "1.0.0"
+  problem_framing: Low-risk fixture for selective debate
+  sub_problems: [spec coverage, quality coverage]
+  internal_references:
+    - path: test/plan-debate-eligibility.test.mjs
+      relevance: Eligibility unit tests
+      reuse_signal: high
+  external_references: []
+  solution_patterns:
+    - name: light profile gate
+      provenance: ADR-0036
+      fit: Reduces debate cost on trivial tasks
+      tradeoffs:
+        pros: [Fewer rounds]
+        cons: []
+      risks: []
+  similar_implementations:
+    - name: minimal-med four-focus fixture
+      what_it_solves: Full debate coverage
+      gap_vs_us: Light uses two focuses only
+  recommended_approach:
+    summary: Two review rounds with spec then quality
+    recommended_approach_confidence: high
+    confidence_rationale: Deterministic fixture aligned with eligibility rules
+    evidence_refs:
+      - .pi/extensions/lib/plan-debate-eligibility.ts
+      - test/plan-debate-eligibility.test.mjs
+  anti_patterns: []
+  open_questions: []
+stack:
+  schema_version: "1.0.0"
+  problem_framing: Node harness tooling
+  constraints: []
+  options:
+    - name: extend current stack
+      category: brownfield
+      fit_summary: Use existing ultimate-pi harness
+      tradeoffs:
+        pros: [No new deps]
+        cons: []
+      risks: []
+      evidence_refs: []
+      recommendation_rank: 1
+  recommended_primary: extend current stack
+  rationale: Fixture validates in-repo harness
+eval:
+  schema_version: "1.0.0"
+  revision_recommended: false
+  relevance:
+    passes: true
+    rationale: Hypothesis matches light smoke task