npm - ultimate-pi - Versions diffs - 0.11.0 → 0.12.0 - Mend

ultimate-pi 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (122) hide show

package/.pi/extensions/review-integrity.ts CHANGED Viewed

@@ -1,9 +1,8 @@
 /**
  * review-integrity — enforce evaluator/adversary isolation from executor session.
  *
- * Parent orchestrators spawn review agents in isolated subagent sessions.
- * Direct review tools in the executor session are blocked; Agent/get_subagent_result
- * for harness review agents remain allowed.
+ * Parent orchestrators spawn review agents in isolated subprocesses via `subagent`.
+ * Direct review tools in the executor session are blocked.
  */
 import { appendFile, mkdir } from "node:fs/promises";
@@ -15,12 +14,6 @@ type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
 const INCIDENTS_DIR = join(process.cwd(), ".pi", "harness", "incidents");
 const INCIDENT_FILE = join(INCIDENTS_DIR, "review-integrity.jsonl");
-const ORCHESTRATION_TOOLS = new Set([
-	"Agent",
-	"get_subagent_result",
-	"steer_subagent",
-]);
 const REVIEW_SUBAGENT_TYPES = new Set([
 	"harness/evaluator",
 	"harness/adversary",
@@ -104,15 +97,45 @@ function restoreState(ctx: {
 	};
 }
-function subagentTypeFromInput(
+function agentsFromSubagentInput(
 	input: Record<string, unknown> | undefined,
-): string {
-	if (!input) return "";
-	const direct = input.subagent_type;
-	if (typeof direct === "string") return direct;
-	const nested = input as { subagentType?: string };
-	if (typeof nested.subagentType === "string") return nested.subagentType;
-	return "";
+): string[] {
+	if (!input) return [];
+	const names: string[] = [];
+	if (typeof input.agent === "string") names.push(input.agent);
+	const tasks = input.tasks;
+	if (Array.isArray(tasks)) {
+		for (const t of tasks) {
+			if (
+				t &&
+				typeof t === "object" &&
+				typeof (t as { agent?: string }).agent === "string"
+			) {
+				names.push((t as { agent: string }).agent);
+			}
+		}
+	}
+	const chain = input.chain;
+	if (Array.isArray(chain)) {
+		for (const c of chain) {
+			if (
+				c &&
+				typeof c === "object" &&
+				typeof (c as { agent?: string }).agent === "string"
+			) {
+				names.push((c as { agent: string }).agent);
+			}
+		}
+	}
+	const agg = input.aggregator;
+	if (
+		agg &&
+		typeof agg === "object" &&
+		typeof (agg as { agent?: string }).agent === "string"
+	) {
+		names.push((agg as { agent: string }).agent);
+	}
+	return names;
 }
 async function appendIncident(payload: Record<string, unknown>): Promise<void> {
@@ -178,26 +201,26 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
 				customType: "harness-review-integrity-hint",
 				display: true,
 				content: [
-					"Review phase in executor session: spawn harness/evaluator or harness/adversary via Agent (isolated subagent context).",
-					"Do not run review checks directly in this session — use get_subagent_result after spawn.",
+					"Review phase in executor session: spawn harness/evaluator or harness/adversary via subagent (isolated subprocess).",
+					"Do not run review checks directly in this session.",
 				].join("\n"),
 			},
 		};
 	});
 	pi.on("tool_call", async (event, ctx) => {
-		if (event.toolName === "Agent") {
-			const subagentType = subagentTypeFromInput(
+		if (event.toolName === "subagent") {
+			const agents = agentsFromSubagentInput(
 				event.input as Record<string, unknown> | undefined,
 			);
-			if (subagentType === EXECUTOR_SUBAGENT_TYPE) {
+			if (agents.includes(EXECUTOR_SUBAGENT_TYPE)) {
 				state.executorSessionId = ctx.sessionManager.getSessionId();
 				state.violationActive = false;
 				state.updatedAt = nowIso();
 				persist();
 				return undefined;
 			}
-			if (REVIEW_SUBAGENT_TYPES.has(subagentType)) {
+			if (agents.some((a) => REVIEW_SUBAGENT_TYPES.has(a))) {
 				state.violationActive = false;
 				state.updatedAt = nowIso();
 				persist();
@@ -207,10 +230,6 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
 		if (!state.violationActive) return undefined;
-		if (ORCHESTRATION_TOOLS.has(event.toolName)) {
-			return undefined;
-		}
 		await appendIncident({
 			type: "review_integrity_violation",
 			session_id: ctx.sessionManager.getSessionId(),
@@ -218,13 +237,13 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
 			reason:
 				"direct tool use in review phase while sharing executor session context",
 			mitigation:
-				"spawn harness/evaluator or harness/adversary via Agent instead",
+				"spawn harness/evaluator or harness/adversary via subagent instead",
 		});
 		return {
 			block: true,
 			reason:
-				"review-integrity: tool blocked in review phase — spawn an isolated review subagent via Agent.",
+				"review-integrity: tool blocked in review phase — spawn an isolated review subagent via subagent.",
 		};
 	});

package/.pi/harness/agents.manifest.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
 	"schema_version": "1.0.0",
 	"package": "ultimate-pi",
-	"package_version": "0.10.1",
-	"generated_at": "2026-05-17T17:48:22.826Z",
+	"package_version": "0.11.0",
+	"generated_at": "2026-05-18T09:43:44.563Z",
 	"agents": {
 		"pi-pi/agent-expert": {
 			"path": ".pi/agents/pi-pi/agent-expert.md",
@@ -46,27 +46,23 @@
 		},
 		"harness/adversary": {
 			"path": ".pi/agents/harness/adversary.md",
-			"sha256": "b965f90610ca942d08b656f1aee839266d08a92beb174b8761dd5e840694a899"
+			"sha256": "dd2ef87139cb175d795f4d7bde78dca1a181d2e42c3c3bd0d48832cf5069aa29"
 		},
 		"harness/evaluator": {
 			"path": ".pi/agents/harness/evaluator.md",
-			"sha256": "6c0de777a10de26dba4a6feb5641495fa5c2d31072a8b0e597a5ecc9921f129f"
+			"sha256": "2b8039fd79f9177fdafd5319a53a96812719d4f1f68e2de70632030142649cfe"
 		},
 		"harness/executor": {
 			"path": ".pi/agents/harness/executor.md",
-			"sha256": "5af3ec2be4d64a738834e36d480a36c2bee4359e8cd5a2e1aac49be4cff79589"
+			"sha256": "b549e9fc802ba23857a1bc6b2ff36f3c169e708fe5ec13857b3bcfe841384f1f"
 		},
 		"harness/incident-recorder": {
 			"path": ".pi/agents/harness/incident-recorder.md",
-			"sha256": "2de405f77b62dde38f331665bff220a3ef131c3c1cd42eebee364000fc83352b"
+			"sha256": "d7577c911a9e6c9607eb64f76337aab85c4eb9a92e7cd917eb8d989ef3cd1de5"
 		},
 		"harness/meta-optimizer": {
 			"path": ".pi/agents/harness/meta-optimizer.md",
-			"sha256": "ef2fb950e18e3a6439e91a68f764fc7ec922cd2d6b35de8f656f376854974d04"
-		},
-		"harness/planner": {
-			"path": ".pi/agents/harness/planner.md",
-			"sha256": "648b9e4c56a6c0f983ae990238952579bb4745af81eb6c79add5325ea4929c91"
+			"sha256": "a4eed88084c7cfb5ace3edc72b72d7ead4134b3eae0d444b391decfe2640a632"
 		},
 		"harness/sentrux-bootstrap": {
 			"path": ".pi/agents/harness/sentrux-bootstrap.md",
@@ -74,43 +70,59 @@
 		},
 		"harness/tie-breaker": {
 			"path": ".pi/agents/harness/tie-breaker.md",
-			"sha256": "651f50b9e2c7903c542700e94908b1fcd026ebed12aa1f1d6ec481df3567e34f"
+			"sha256": "68f02b86e95927f06d7f963e1f61f193159bbef1ba4558d90c84d5457d62b3f7"
 		},
 		"harness/trace-librarian": {
 			"path": ".pi/agents/harness/trace-librarian.md",
-			"sha256": "d63fe08a2ea0466c0fd89fff4da03ac1d9d3580c306381cee251c89d4e8fdb97"
+			"sha256": "03b499a948b8467f1cfe2b4e63190feb7b8b9d96461055638e774253b9b6b2d4"
 		},
 		"harness/planning/decompose": {
 			"path": ".pi/agents/harness/planning/decompose.md",
-			"sha256": "a4f33869759ebdc049e77b344ae050be5ede08ea9a92216b8599cc7d2f14c052"
+			"sha256": "1b3f85d956d2e203ec87045a731c47f8b40f75b63fce8916fda91cefc39244a8"
+		},
+		"harness/planning/execution-plan-author": {
+			"path": ".pi/agents/harness/planning/execution-plan-author.md",
+			"sha256": "a69fb2e8bda9336e71ce9536071f9c8a2f4abd9d9d88930c6a8be29bdc9c5f62"
 		},
-		"harness/planning/hypothesis-eval": {
-			"path": ".pi/agents/harness/planning/hypothesis-eval.md",
-			"sha256": "7a05e2f746bf79f20096cbfb12aaee31a0717e660680b44cb285ea967b3141e5"
+		"harness/planning/hypothesis-validator": {
+			"path": ".pi/agents/harness/planning/hypothesis-validator.md",
+			"sha256": "f75312439c441ccee72692d41f44b6e733df08e06c89e930740fc256bed3ba02"
 		},
 		"harness/planning/hypothesis": {
 			"path": ".pi/agents/harness/planning/hypothesis.md",
-			"sha256": "7f2af6dda328d6cc1279dbff20a46b1d93aacfda9d57857cc6117685f8a585dd"
+			"sha256": "b20c527d15c2243cd5d3a8f16cea6d44bdfd16e01915d42f3b830bf9938e5f8b"
 		},
 		"harness/planning/plan-adversary": {
 			"path": ".pi/agents/harness/planning/plan-adversary.md",
-			"sha256": "4beceb8c4181f82b7eb006d87392c0adb4c7ce41992193790888e7298c1b7594"
+			"sha256": "685926c638ae1377361d7cafda5e400be19cb3880510d8f6d389a5876647575f"
+		},
+		"harness/planning/plan-evaluator": {
+			"path": ".pi/agents/harness/planning/plan-evaluator.md",
+			"sha256": "44fd52389d7e43dd5093653cba9694900561318ee5f00e3bc05c3ecef5d43621"
 		},
-		"harness/planning/planner": {
-			"path": ".pi/agents/harness/planning/planner.md",
-			"sha256": "570c501c976e26d79a36814787eb03fab6aa97f79cc895af319dc717648a2a65"
+		"harness/planning/review-integrator": {
+			"path": ".pi/agents/harness/planning/review-integrator.md",
+			"sha256": "d0e8214539d0a78b9e5add70e61dd4e4de36def64172cda18d9b70727e7600ca"
 		},
 		"harness/planning/scout-graphify": {
 			"path": ".pi/agents/harness/planning/scout-graphify.md",
-			"sha256": "76a66a3dc8bce60a91ed30ffdc683fb1eab0692006b0ee80fbdc67b11b374b61"
+			"sha256": "b59916a26afccfe105e29c0bd8637ac54275e8afef1c6cc88a58bd05b0325473"
 		},
 		"harness/planning/scout-semantic": {
 			"path": ".pi/agents/harness/planning/scout-semantic.md",
-			"sha256": "99aedca25fd81000d3bb532e0191ce9e1a87b84ab4039f089734f3a0d24ba44b"
+			"sha256": "47b7ea3e65b20a65e6d0ff11b6d5daff59b47a9ed618b8a3b6282f2eb0460572"
 		},
 		"harness/planning/scout-structure": {
 			"path": ".pi/agents/harness/planning/scout-structure.md",
-			"sha256": "83fd09e5eccd77b27d9de464d7e32536d9a762469e021b86b0ca665942bb40af"
+			"sha256": "e67b7cd75519e5ae36e1bb5f49ca158888c28d365465863aee50a9b2e8e5b7d7"
+		},
+		"harness/planning/sprint-contract-auditor": {
+			"path": ".pi/agents/harness/planning/sprint-contract-auditor.md",
+			"sha256": "f613a4fa937d76936fa01155d4e7956a81878f300100f99f6a78915b0af6f7c7"
+		},
+		"harness/planning/stack-researcher": {
+			"path": ".pi/agents/harness/planning/stack-researcher.md",
+			"sha256": "90e2ff1348f54bebc8c0392407bf1bb4d794c942fd8d6f342d80b191c945b34e"
 		}
 	}
 }

package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md CHANGED Viewed

@@ -5,14 +5,14 @@
 ## Context
-Harness slash prompts duplicated logic already defined in `harness/*` agents. Commands did not invoke the `Agent` tool. Review docs told users to fork a new Pi session even though subagents already provide isolated context.
+Harness slash prompts duplicated logic already defined in `harness/*` agents. The in-process `Agent` / `createAgentSession` stack was heavy and unstable. Review docs told users to fork a new Pi session even though subprocess subagents already provide isolation.
 ## Decision
 1. **Slash commands** (prompt templates) are orchestrators: spawn `harness/*` agents once, perform policy-gated writes, emit handoff blocks. Command identity is captured on Pi **`input`** as `harness-turn` (raw `/harness-*`), not from expanded prompt markdown.
 2. **Agents** perform multi-turn reads and emit structured JSON drafts. **Planning** (`harness/planning/*`) scouts and plan-adversary are read-only; parent orchestrator runs `ask_user`, `approve_plan`, and `create_plan` (see ADR 0033).
 3. **HarnessSpawnContext** is injected in `[HarnessRunContext]`; orchestrator copies it into spawn prompts. Subagents do not receive `[HarnessActivePlan]` injection.
-4. **Review isolation** uses `Agent` spawn with `inherit_context: false`. `review-integrity` allows `Agent` / `get_subagent_result` for evaluator/adversary/tie-breaker.
+4. **Review isolation** uses native `subagent` (vendored pi-subagents: isolated `pi --mode json` subprocess). `review-integrity` allows `subagent` when `agent` is evaluator/adversary/tie-breaker; bridge blocks plan-phase mutating spawns and nested `subagent` in children.
 5. **Subagent policy** blocks mutating tools for read-only phase agents; `ask_user` bridged for evaluator/adversary/tie-breaker only (not planning scouts).
 6. **Parent** owns plan-phase `ask_user`, `approve_plan`, and `create_plan` per ADR 0033.
@@ -32,6 +32,7 @@ Harness slash prompts duplicated logic already defined in `harness/*` agents. Co
 - `.pi/prompts/harness-*.md`
 - `.pi/agents/harness/*.md`
-- `.pi/extensions/lib/harness-subagents/harness-subagent-policy.ts`
+- `vendor/pi-subagents/src/subagents.ts`, `.pi/extensions/lib/harness-subagents-bridge.ts`
+- `.pi/extensions/lib/harness-subagent-policy.ts`
 - `.pi/extensions/review-integrity.ts`
 - `.pi/lib/harness-agent-output.ts`

package/.pi/harness/docs/adrs/0033-parent-orchestrated-planning.md CHANGED Viewed

@@ -5,7 +5,7 @@
 ## Context
-`/harness-plan` delegated the full plan lifecycle to a single `harness/planner` subagent. Plans and approval UI were largely invisible in the parent transcript until `get_subagent_result`, and the orchestrator could not call `ask_user` / `approve_plan` / `create_plan` directly.
+`/harness-plan` previously delegated the full plan lifecycle to a single `harness/planner` subagent. Plans and approval UI were largely invisible in the parent transcript until subprocess completion, and the orchestrator could not call `ask_user` / `approve_plan` / `create_plan` directly.
 ## Decision

package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md ADDED Viewed

@@ -0,0 +1,27 @@
+# ADR-0035: Plan-phase Review Gate and YAML artifacts
+## Status
+Accepted (2026-05-18)
+## Context
+`/harness-plan` produced thin PlanPackets (scope + bullets). Post-execute adversarial review (`/harness-critic`) ran too late. Graphify corpus (Structured Planning, ADR-020, Generator–Evaluator) defines WBS, validation, and review gate before baseline.
+## Decision
+1. **PlanPacket 1.1.0** — required `execution_plan` (phases, work_items, sprint_contract, dag_validation).
+2. **YAML on disk** — `plan-packet.yaml`, `research-brief.yaml`, `run-context.yaml`, `artifacts/*.yaml`. JSON Schema unchanged; instances validated after YAML parse.
+3. **Review Gate agents** — `stack-researcher`, `execution-plan-author`, debate: `hypothesis-validator`, `plan-evaluator`, `plan-adversary`, `sprint-contract-auditor`, `review-integrator`.
+4. **Debate bus** — `debate_id=plan-<run_id>`, plan budget profile (4 rounds, 12k cap), plan-phase consensus prerequisites.
+5. **No legacy JSON** plan paths; no pre-debate standalone `hypothesis-eval`.
+## Consequences
+- Positive: PM-grade plans, deterministic DAG gate, blind hypothesis eval in debate R1.
+- Negative: Higher spawn/token cost; `harness-verify` and smoke fixtures must use `.yaml`.
+## References
+- [ADR-0033](0033-parent-orchestrated-planning.md), [ADR-0034](0034-darwin-plan-research-pipeline.md)
+- `raw/decisions/adr-020.md`, `raw/modules/structured-planning.md`

package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r1.yaml ADDED Viewed

@@ -0,0 +1,25 @@
+schema_version: "1.0.0"
+round_index: 1
+debate_round_focus: spec
+round_summary: Spec round passed for fixture
+validation_summary: All spec checks pass
+adversary_summary: No blocking adversarial findings
+disputes: []
+recommended_packet_patches: []
+review_gate_ready: true
+participants:
+  - PlanEvaluatorAgent
+  - PlanAdversaryAgent
+  - HypothesisValidatorAgent
+  - ReviewIntegratorAgent
+claims:
+  - spec validation complete
+rebuttals: []
+evidence_refs: []
+token_usage:
+  per_agent:
+    PlanEvaluatorAgent: 100
+    PlanAdversaryAgent: 100
+    ReviewIntegratorAgent: 50
+  round_total: 250
+consensus_delta: 0.1

package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml ADDED Viewed

@@ -0,0 +1,26 @@
+schema_version: "1.0.0"
+round_index: 4
+debate_round_focus: quality
+round_summary: Quality and sprint contract round passed
+validation_summary: Sprint contract complete
+adversary_summary: No gaps
+disputes: []
+recommended_packet_patches: []
+review_gate_ready: true
+participants:
+  - PlanEvaluatorAgent
+  - PlanAdversaryAgent
+  - SprintContractAuditorAgent
+  - ReviewIntegratorAgent
+claims:
+  - review gate ready
+rebuttals: []
+evidence_refs: []
+token_usage:
+  per_agent:
+    PlanEvaluatorAgent: 120
+    PlanAdversaryAgent: 110
+    SprintContractAuditorAgent: 90
+    ReviewIntegratorAgent: 60
+  round_total: 380
+consensus_delta: 0.15

package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/sprint-audit-r4.yaml ADDED Viewed

@@ -0,0 +1,5 @@
+schema_version: "1.0.0"
+round_index: 4
+gaps: []
+recommendation: proceed
+human_summary: Sprint contract satisfies ADR-020 for fixture

package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-packet.yaml ADDED Viewed

@@ -0,0 +1,196 @@
+schema_version: "1.0.0"
+contract_version: "1.1.0"
+plan_id: plan-smoke-fixture-001
+task_id: task-smoke-001
+scope: Smoke fixture for plan-phase harness validation with execution_plan and debate artifacts.
+assumptions:
+  - Fixture only; no live agent run
+risk_level: med
+acceptance_checks:
+  - id: AC-1
+    description: DAG validation passes
+  - id: AC-2
+    description: Four debate rounds recorded
+  - id: AC-3
+    description: Stack brief present in research-brief
+  - id: AC-4
+    description: Sprint contract complete
+  - id: AC-5
+    description: plan-review.md renders
+rollback_plan:
+  revert_commit_ready: true
+  rollback_artifacts:
+    revert_command: git revert HEAD
+    revert_branch: main
+    patch_bundle: .pi/harness/runs/smoke-fixture/patch.bundle
+execution_plan:
+  schema_version: "1.0.0"
+  phases:
+    - phase_id: P1
+      name: Foundation
+      objective: Establish baseline and verify harness wiring
+      entry_criteria:
+        - Fixture loaded
+      exit_criteria:
+        - AC-1 satisfied
+      milestone: M1-baseline
+      work_item_ids: [WI-1, WI-2, WI-3]
+    - phase_id: P2
+      name: Build
+      objective: Implement core changes
+      entry_criteria:
+        - M1-baseline complete
+      exit_criteria:
+        - AC-2 satisfied
+      milestone: M2-build
+      work_item_ids: [WI-4, WI-5, WI-6]
+    - phase_id: P3
+      name: Verify
+      objective: Quality gate and documentation
+      entry_criteria:
+        - M2-build complete
+      exit_criteria:
+        - AC-5 satisfied
+      milestone: M3-ship
+      work_item_ids: [WI-7, WI-8]
+  work_items:
+    - work_item_id: WI-1
+      phase_id: P1
+      title: Load fixture packet
+      description: Read plan-packet.yaml from fixture directory
+      depends_on: []
+      files:
+        - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-packet.yaml
+      parallel_safe: true
+      done_criteria:
+        type: manual
+        spec: Fixture packet readable
+      acceptance_check_ids: [AC-1]
+    - work_item_id: WI-2
+      phase_id: P1
+      title: Run DAG validator
+      description: Execute validate-plan-dag.mjs
+      depends_on: [WI-1]
+      files:
+        - .pi/scripts/validate-plan-dag.mjs
+      parallel_safe: false
+      done_criteria:
+        type: command
+        spec: node .pi/scripts/validate-plan-dag.mjs --packet plan-packet.yaml
+      acceptance_check_ids: [AC-1]
+    - work_item_id: WI-3
+      phase_id: P1
+      title: Lint harness-yaml
+      description: Ensure YAML helpers parse fixture
+      depends_on: [WI-1]
+      files:
+        - .pi/lib/harness-yaml.ts
+      parallel_safe: true
+      done_criteria:
+        type: lint
+        spec: npm test
+      acceptance_check_ids: [AC-1]
+    - work_item_id: WI-4
+      phase_id: P2
+      title: Debate round 1-2 artifacts
+      description: Validate review-round YAML
+      depends_on: [WI-2]
+      files:
+        - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r1.yaml
+      parallel_safe: false
+      done_criteria:
+        type: artifact
+        spec: artifacts/review-round-r1.yaml exists
+      acceptance_check_ids: [AC-2]
+    - work_item_id: WI-5
+      phase_id: P2
+      title: Debate round 3-4 artifacts
+      description: Validate final review round
+      depends_on: [WI-4]
+      files:
+        - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml
+      parallel_safe: false
+      done_criteria:
+        type: artifact
+        spec: artifacts/review-round-r4.yaml exists
+      acceptance_check_ids: [AC-2]
+    - work_item_id: WI-6
+      phase_id: P2
+      title: Stack research merge
+      description: research-brief includes stack section
+      depends_on: [WI-2]
+      files: []
+      non_code: true
+      parallel_safe: true
+      done_criteria:
+        type: manual
+        spec: research-brief.yaml contains stack key
+      acceptance_check_ids: [AC-3]
+    - work_item_id: WI-7
+      phase_id: P3
+      title: Sprint contract audit
+      description: R4 sprint audit artifact
+      depends_on: [WI-5]
+      files:
+        - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/sprint-audit-r4.yaml
+      parallel_safe: false
+      done_criteria:
+        type: artifact
+        spec: sprint-audit-r4.yaml present
+      acceptance_check_ids: [AC-4]
+    - work_item_id: WI-8
+      phase_id: P3
+      title: Render plan-review
+      description: Human-readable plan review markdown
+      depends_on: [WI-7]
+      files:
+        - .pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-review.md
+      parallel_safe: false
+      done_criteria:
+        type: manual
+        spec: plan-review.md non-empty
+      acceptance_check_ids: [AC-5]
+  sprint_contract:
+    in_scope:
+      - Fixture validation only
+    out_of_scope:
+      - Production deploy
+    definition_of_done: All smoke checks green
+    assumptions:
+      - CI environment has node
+    external_dependencies: []
+  wbs_dictionary:
+    - work_item_id: WI-1
+      deliverable: Fixture packet loaded
+      owner_role: executor
+      inputs: []
+      outputs: [parsed packet]
+  risk_register:
+    - risk_id: R1
+      description: DAG validator false negative
+      likelihood: low
+      impact: high
+      mitigation: Unit tests on validate-plan-dag.mjs
+      linked_work_item_ids: [WI-2]
+    - risk_id: R2
+      description: Debate cap misconfiguration
+      likelihood: med
+      impact: med
+      mitigation: debate-orchestrator plan profile tests
+      linked_work_item_ids: [WI-4]
+    - risk_id: R3
+      description: YAML parse drift
+      likelihood: low
+      impact: med
+      mitigation: harness-yaml strict parse
+      linked_work_item_ids: [WI-3]
+  schedule_metadata:
+    critical_path_work_item_ids: [WI-1, WI-2, WI-4, WI-5, WI-7, WI-8]
+    parallel_groups:
+      - [WI-1, WI-3]
+    schedule_baseline_note: Fixture topological order; no calendar dates
+  dag_validation:
+    status: pass
+    topological_order: [WI-1, WI-2, WI-3, WI-4, WI-5, WI-6, WI-7, WI-8]
+    cycles: []
+    conflicts: []

package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-review.md ADDED Viewed

@@ -0,0 +1,14 @@
+# Plan review (fixture)
+plan_id: plan-smoke-fixture-001
+## Execution plan
+Phases: P1 Foundation → P2 Build → P3 Verify
+Critical path: WI-1 → WI-2 → WI-4 → WI-5 → WI-7 → WI-8
+## Debate
+- Round 1 (spec): review_gate_ready
+- Round 4 (quality): review_gate_ready

package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml ADDED Viewed

@@ -0,0 +1,32 @@
+decomposition:
+  schema_version: "1.0.0"
+  problem_restatement: Validate plan-phase YAML and debate pipeline
+hypothesis:
+  schema_version: "1.0.0"
+  primary:
+    claim: Fixture-driven smoke covers DAG and debate
+    mechanism: Static artifacts plus validate-plan-dag.mjs
+    prediction: CI passes without live agents
+    experiment: Run smoke-harness-plan.mjs --fixture
+stack:
+  schema_version: "1.0.0"
+  problem_framing: Node harness tooling
+  constraints: []
+  options:
+    - name: extend current stack
+      category: brownfield
+      fit_summary: Use existing ultimate-pi harness
+      tradeoffs:
+        pros: [No new deps]
+        cons: []
+      risks: []
+      evidence_refs: []
+      recommendation_rank: 1
+  recommended_primary: extend current stack
+  rationale: Fixture validates in-repo harness
+eval:
+  schema_version: "1.0.0"
+  revision_recommended: false
+  relevance:
+    passes: true
+    rationale: Hypothesis matches smoke task

package/.pi/harness/evals/smoke/run-context.fixture.json CHANGED Viewed

@@ -5,7 +5,7 @@
 	"project_root": "/tmp/ultimate-pi-smoke",
 	"phase": "plan",
 	"plan_id": null,
-	"plan_packet_path": "/tmp/ultimate-pi-smoke/.pi/harness/runs/smoke-session-1/plan-packet.json",
+	"plan_packet_path": "/tmp/ultimate-pi-smoke/.pi/harness/runs/smoke-session-1/plan-packet.yaml",
 	"plan_ready": false,
 	"task_summary": "smoke task",
 	"status": "active",