npm - ultimate-pi - Versions diffs - 0.10.1 → 0.11.0 - Mend

ultimate-pi 0.10.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/.agents/skills/harness-decisions/SKILL.md +3 -3
package/.agents/skills/harness-orchestration/SKILL.md +19 -11
package/.agents/skills/harness-plan/SKILL.md +15 -9
package/.pi/agents/harness/planner.md +6 -47
package/.pi/agents/harness/planning/decompose.md +84 -0
package/.pi/agents/harness/planning/hypothesis-eval.md +59 -0
package/.pi/agents/harness/planning/hypothesis.md +90 -0
package/.pi/agents/harness/planning/plan-adversary.md +50 -0
package/.pi/agents/harness/planning/planner.md +20 -0
package/.pi/agents/harness/planning/scout-graphify.md +48 -0
package/.pi/agents/harness/planning/scout-semantic.md +42 -0
package/.pi/agents/harness/planning/scout-structure.md +44 -0
package/.pi/extensions/harness-ask-user.ts +5 -0
package/.pi/extensions/harness-plan-approval.ts +137 -3
package/.pi/extensions/harness-run-context.ts +1 -1
package/.pi/extensions/harness-subagents.ts +8 -3
package/.pi/extensions/harness-web-tools.ts +2 -0
package/.pi/extensions/lib/extension-load-guard.ts +39 -0
package/.pi/extensions/lib/harness-subagents/harness-subagent-policy.ts +33 -5
package/.pi/extensions/lib/harness-subagents/parent-harness-ui-bridge.ts +2 -175
package/.pi/extensions/lib/harness-subagents/parent-harness-ui-hooks.ts +18 -0
package/.pi/extensions/lib/harness-subagents/spawn-policy.ts +1 -5
package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +0 -18
package/.pi/extensions/lib/harness-subagents/vendored/index.ts +1 -35
package/.pi/extensions/lib/plan-approval/create-plan.ts +5 -0
package/.pi/extensions/lib/plan-approval/plan-review.ts +393 -0
package/.pi/extensions/lib/plan-approval/schema.ts +16 -1
package/.pi/extensions/lib/plan-approval/types.ts +10 -0
package/.pi/extensions/lib/plan-approval/validate.ts +2 -0
package/.pi/extensions/policy-gate.ts +1 -1
package/.pi/extensions/ultimate-pi-vcc.ts +5 -0
package/.pi/harness/agents.manifest.json +114 -82
package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +3 -3
package/.pi/harness/docs/adrs/0033-parent-orchestrated-planning.md +34 -0
package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +41 -0
package/.pi/harness/docs/adrs/README.md +2 -0
package/.pi/harness/specs/README.md +1 -1
package/.pi/harness/specs/harness-spawn-context.schema.json +2 -1
package/.pi/harness/specs/plan-adversary-brief.schema.json +45 -0
package/.pi/harness/specs/plan-decomposition-brief.schema.json +108 -0
package/.pi/harness/specs/plan-hypothesis-brief.schema.json +96 -0
package/.pi/harness/specs/plan-hypothesis-eval.schema.json +61 -0
package/.pi/lib/harness-run-context.ts +12 -0
package/.pi/prompts/harness-auto.md +1 -1
package/.pi/prompts/harness-plan.md +111 -28
package/.pi/prompts/harness-setup.md +1 -1
package/.pi/scripts/harness-resolve-up-pkg.mjs +13 -0
package/CHANGELOG.md +12 -0
package/biome.json +4 -1
package/package.json +2 -2

package/.pi/lib/harness-run-context.ts CHANGED Viewed

@@ -114,6 +114,15 @@ export function canonicalPlanPath(runId: string, projectRoot: string): string {
 	return join(harnessRunsRoot(projectRoot), runId, "plan-packet.json");
 }
+export const PLAN_REVIEW_BASENAME = "plan-review.md";
+export function canonicalPlanReviewPath(
+	runId: string,
+	projectRoot: string,
+): string {
+	return join(harnessRunsRoot(projectRoot), runId, PLAN_REVIEW_BASENAME);
+}
 const PLAN_PACKET_BASENAME = "plan-packet.json";
 const MUTATING_FILE_TOOLS = new Set(["write", "edit"]);
@@ -910,6 +919,9 @@ export function formatPlanContextBlock(
 	];
 	if (ctx.plan_packet_path) {
 		lines.push(`plan_packet_path=${ctx.plan_packet_path}`);
+		lines.push(
+			`plan_review_path=${canonicalPlanReviewPath(ctx.run_id, ctx.project_root)}`,
+		);
 	}
 	if (ctx.task_summary) {
 		lines.push(`task_summary=${ctx.task_summary}`);

package/.pi/prompts/harness-auto.md CHANGED Viewed

@@ -18,7 +18,7 @@ If task missing:
 ## Orchestration (required) — same session
-1. **Plan** — spawn `harness/planner` → parse JSON → present full plan → `ask_user` Approve/Changes/Cancel → write `plan-packet.json` only on Approve (advances phase via policy-gate).
+1. **Plan** — follow `/harness-plan` parent orchestration (parallel `harness/planning/scout-*`, `decompose`, `hypothesis`, draft PlanPacket, `ask_user` on fork, parallel `plan-adversary` + `hypothesis-eval`, parent `approve_plan` + `create_plan`). Do not spawn `harness/planner`. No second approval pass.
 2. **Execute** — spawn `harness/executor` with `HarnessSpawnContext` (`mode: execute`). Summarize handoff bullets for next spawn (do not paste full subagent log).
 3. **Eval** — spawn `harness/evaluator` (`mode: benchmark`) after parent scripts if needed.
 4. **Review** — spawn `harness/evaluator` (`mode: verdict`) OR rely on eval verdict if policy allows — prefer both when strict gates require.

package/.pi/prompts/harness-plan.md CHANGED Viewed

@@ -1,71 +1,154 @@
 ---
-description: Build a strict read-only PlanPacket before any mutating work.
+description: Transform a vague task into a rigorous hypothesis via decomposition + DARWIN synthesis, then a strict PlanPacket.
 argument-hint: "\"<task>\" [--risk low|med|high] [--budget <amount>] [--quick]"
 ---
 # harness-plan
-Orchestrator only — spawn `harness/planner` once. The planner runs clarification (`ask_user`), approval (`approve_plan`), and persists the plan (`create_plan`). Do **not** write `plan-packet.json` in this parent session.
+Parent orchestrator — run planning in **this session**. Subagents explore, decompose, hypothesize, and review; you own `ask_user`, `approve_plan`, and `create_plan`. Never `write` or `edit` `plan-packet.json` — use **`create_plan`** only.
+Allowed `subagent_type` values (copy exactly):
+- `harness/planning/scout-graphify`
+- `harness/planning/scout-structure`
+- `harness/planning/scout-semantic`
+- `harness/planning/decompose`
+- `harness/planning/hypothesis`
+- `harness/planning/plan-adversary`
+- `harness/planning/hypothesis-eval`
+Do **not** spawn `harness/planner` or `harness/planning/planner`.
 ## Step 0 — Parse arguments
 Read `$ARGUMENTS`:
-- task statement (required)
+- task statement (required) — **THE QUESTION**
 - optional: `--risk low|med|high`, `--budget <amount>`, `--quick`
 If task is missing:
 `Usage: /harness-plan "<task>" [--risk low|med|high] [--budget <amount>] [--quick]`
-`--quick` narrows planning breadth only — it does **not** skip user approval.
+`--quick` skips `harness/planning/scout-semantic` only — never skip graphify, structure, decompose, hypothesis, or approval.
 ## Active plan context
-Use injected context only — **do not** read `.pi/harness/specs/*.schema.json` or explore specs with bash.
+Use injected context only — **do not** read `.pi/harness/specs/*.schema.json` from disk.
 If `[HarnessActivePlan]` is present:
-- Treat task as **revise/amend** unless `/harness-new-run` was used.
-- Pass `mode: revise` using the `HarnessSpawnContext` JSON in `[HarnessRunContext]`.
+- Treat as **revise/amend** unless `/harness-new-run` was used.
+- Set `mode: revise` in `HarnessSpawnContext` from `[HarnessRunContext]`.
+- **Preserve `plan_id` and `task_id`** from the existing packet when amending.
+- Scouts focus on delta vs existing `plan_packet_path`; full re-scout only if scope changed materially.
+Otherwise use `HarnessSpawnContext` from `[HarnessRunContext]` with `mode: create`.
+## Phase 1 — Parallel scouts (required)
+1. Copy `HarnessSpawnContext` from `[HarnessRunContext]` (adjust `risk_level`, `quick`, `mode` from `$ARGUMENTS`).
+2. Spawn scouts with **`inherit_context: false`**. Prefer parallel: `run_in_background: true` on each `Agent` call, then `get_subagent_result` for all.
+```
+Agent({ subagent_type: "harness/planning/scout-graphify", prompt: "<task + HarnessSpawnContext + scout JSON schema>", run_in_background: true })
+Agent({ subagent_type: "harness/planning/scout-structure", prompt: "…", run_in_background: true })
+```
+Skip `harness/planning/scout-semantic` when `--quick` or `quick: true`.
+3. Parse each scout’s fenced `json` (`lane`, `status`, `findings`, `key_paths`, `open_questions`).
+4. **Partial failure:** require successful **graphify + structure** lanes. Semantic is optional. If a required lane fails, continue with `plan_status: partial` and document gaps in `assumptions`.
+5. If JSON parse fails for a lane, summarize free-text output and add an assumption that the lane was unstructured.
+## Phase 2 — Decompose (DeepMind-style)
+1. Spawn once with merged scout JSON:
+```
+Agent({ subagent_type: "harness/planning/decompose", prompt: "<HarnessSpawnContext + task + all scout lane JSON>", inherit_context: false })
+```
+2. Parse `PlanDecompositionBrief` JSON (`problem_restatement`, `core_tension`, `tensions`, `prior_art`, etc.).
+3. On parse failure: one retry with “output valid JSON only”; if still failing, abort with `plan_status: needs_clarification`.
+## Phase 3 — Hypothesis (DARWIN)
+1. Spawn once:
+```
+Agent({ subagent_type: "harness/planning/hypothesis", prompt: "<HarnessSpawnContext + task + PlanDecompositionBrief + scout summaries>", inherit_context: false })
+```
+2. Parse `PlanHypothesisBrief` JSON (`primary`, `dialectical_fork`, `alternatives`, `recommended_next_steps`).
+3. **Revision cap:** at most **one** re-spawn of `hypothesis` if Phase 6 eval requests revision (see below).
+## Phase 4 — Draft PlanPacket + fork clarification (parent)
-Otherwise use `HarnessSpawnContext` from `[HarnessRunContext]` for greenfield `mode: create`.
+Map hypothesis → [`PlanPacket`](.pi/harness/specs/plan-packet.schema.json):
-## Orchestration (required)
+| Field | Source |
+|-------|--------|
+| `scope` | `problem_restatement` (narrowed) + `primary.claim` + `primary.mechanism` (implementation-ready) |
+| `assumptions` | `core_tension`, `prior_art.dead_ends`, scout `open_questions`, chosen fork path (if any) |
+| `acceptance_checks` | Each `primary.prediction` and `primary.experiment` as verifiable checklist items (min 1) |
+| `risk_level` | From `$ARGUMENTS` or infer from fork uncertainty / blast radius |
-1. Copy the `HarnessSpawnContext=…` JSON from `[HarnessRunContext]` into the spawn prompt (adjust `risk_level`, `quick`, `mode` from `$ARGUMENTS` if needed). Do **not** add “call ask_user for approval” in the `Agent` prompt — the planner agent instructions already define `approve_plan` / `create_plan`.
-2. Spawn **once** with **`inherit_context: false`**:
+Build complete draft: `plan_id`, `task_id`, `scope`, `assumptions`, `risk_level`, `acceptance_checks`, `rollback_plan` (`revert_commit_ready: true`, artifacts filled).
+Call **`ask_user`** when `dialectical_fork` is material (Path A vs B materially different) **before** Phase 5 reviews.
+Assemble `research_brief` for approval:
+```json
+{
+  "decomposition": { /* PlanDecompositionBrief */ },
+  "hypothesis": { /* PlanHypothesisBrief */ },
+  "eval": null
+}
+```
+## Phase 5 — Parallel reviews
+Spawn in parallel (`run_in_background: true`):
 ```
-Agent({ subagent_type: "harness/planner", prompt: "<task + HarnessSpawnContext JSON + output schema>" })
+Agent({ subagent_type: "harness/planning/plan-adversary", prompt: "<HarnessSpawnContext + draft PlanPacket + scout summaries + decomposition human_summary>", inherit_context: false })
+Agent({ subagent_type: "harness/planning/hypothesis-eval", prompt: "<original task ONLY + PlanHypothesisBrief JSON — no decomposition, no PlanPacket>", inherit_context: false })
 ```
-3. `get_subagent_result` — parse final JSON (`status`, `plan_packet`, `human_summary`, `clarification`) via fenced `json` block. Treat `plan_packet` in that JSON as **read-only summary context** — not input for another approval tool call.
-4. If `status === "ready"` and `[HarnessRunContext]` shows `plan_ready: true` (planner called `create_plan`), confirm `plan_packet_path` exists — do **not** write the file yourself.
-5. If `needs_clarification`, tell the user the planner is waiting — do **not** re-spawn; user should answer in the subagent or re-run `/harness-plan`.
-6. Do **not** call `ask_user`, `approve_plan`, or `create_plan` in this parent session.
+1. Parse `PlanAdversaryBrief` — merge `mitigations` into scope, assumptions, or `acceptance_checks`.
+2. Parse `PlanHypothesisEval` — set `research_brief.eval`.
+3. If `revision_recommended` or testability &lt; 70 or `relevance.passes` is false: re-spawn `hypothesis` once with eval rationale, update PlanPacket + `research_brief.hypothesis`, then re-run **hypothesis-eval** only (not adversary unless PlanPacket changed materially).
+Cap: **at most 2** plan-adversary spawns and **at most 2** `approve_plan` rounds per invocation.
+## Phase 6 — Approval + persistence (parent)
-## After subagent returns (no second approval)
+1. Call **`approve_plan`** with `plan_packet`, `human_summary` (primary claim + fork if any), and `research_brief`.
+2. On **Approve** only, call **`create_plan`** with the **same** `plan_packet`.
+3. If `create_plan` fails, tell the user to fix validation errors or run `/harness-plan-commit` after approval is recorded.
+4. Confirm `[HarnessRunContext]` `plan_ready: true` before handoff.
-User approval happens **once**, inside the planner subagent: `approve_plan` uses the parent TUI bridge. You are the orchestrator, **not** an approver.
+On **Cancel** or Esc: `plan_status: needs_clarification`; do **not** call `create_plan`.
-After `get_subagent_result`:
+On **Request changes**: revise draft and re-run phases 4–6 only (re-scout/decompose/hypothesis only if scope changed).
-- If `[HarnessRunContext]` shows `plan_ready: true`, or the transcript already has `harness-plan-approval` / bridged `approve_plan` with **Approve** → planning is complete. **Stop.** Summarize the plan and set `next_command: /harness-run`.
-- Do **not** call `approve_plan` to “confirm” using `plan_packet` from subagent JSON.
-- Do **not** call `ask_user` with Approve / Request changes / Cancel for the same plan.
-- Do **not** re-spawn the planner to “get approval again”.
+## Recovery and ownership
-If `status === "ready"` but `plan_ready` is false → planner approved but `create_plan` may have failed; tell the user to run `/harness-plan-commit` — **not** a second `approve_plan`.
+- Plan only in the **owner** session (`owner_pi_session_id` on run context); otherwise `/harness-use-run`.
+- `/harness-plan-commit` only after parent `approve_plan` (Approve) is in the transcript.
+- If `plan_ready: true` already, stop — summarize and set `next_command: /harness-run`.
 ## Parent rules
-- Do not mutate project source files in the plan phase.
-- Do not embed `plan_id=` in prompts for policy sync.
-- Optional recovery: `/harness-plan-commit` only if the planner approved but `create_plan` failed.
+- Do not mutate project source in plan phase.
+- Subagents never call `ask_user`, `approve_plan`, or `create_plan`.
+- Do not embed `plan_id=` in spawn prompts for policy sync.
 ## Completion
-- `plan_status`: `ready` or `needs_clarification`
+- `plan_status`: `ready`, `partial`, or `needs_clarification`
 - `risk_level` used
+- `plan_review_path` shown for editor review
 - `next_command`: `/harness-run` when `ready` (never `/harness-run --plan …`)

package/.pi/prompts/harness-setup.md CHANGED Viewed

@@ -387,7 +387,7 @@ Manual override: **`/router profile auto`** anytime after reload if they changed
 **Slash commands are orchestrators:** `/harness-plan`, `/harness-run`, etc. spawn `harness/*` agents via the `Agent` tool — bootstrap stays **script-first**; only optionally spawn `harness/sentrux-bootstrap` for Sentrux (see Step 4.2).
-Optional per-repo overrides: place `.md` files at the **same relative path** (e.g. `.pi/agents/harness/planner.md` overrides the package planner).
+Optional per-repo overrides: place `.md` files at the **same relative path** (e.g. `.pi/agents/harness/planning/scout-graphify.md` overrides the package scout). Deprecated: `harness/planner.md` — use `harness/planning/` agents instead.
 Verify manifest drift after `pi update ultimate-pi`:

package/.pi/scripts/harness-resolve-up-pkg.mjs CHANGED Viewed

@@ -30,7 +30,20 @@ function hasHarnessScripts(root) {
 	return existsSync(join(root, ".pi", "scripts", "harness-cli-verify.sh"));
 }
+function isSourceCheckout(root) {
+	try {
+		const pkg = requireFromCwd.resolve("./package.json");
+		return dirname(pkg) === root;
+	} catch {
+		return false;
+	}
+}
 function tryResolveUltimatePi() {
+	if (hasHarnessScripts(process.cwd()) && isSourceCheckout(process.cwd())) {
+		return process.cwd();
+	}
 	if (process.env.ULTIMATE_PI_PKG) {
 		const envRoot = process.env.ULTIMATE_PI_PKG;
 		if (hasHarnessScripts(envRoot)) return envRoot;

package/CHANGELOG.md CHANGED Viewed

@@ -4,6 +4,18 @@ All notable changes to this project are documented in this file.
 ## [Unreleased]
+## [v0.11.0] — 2026-05-17
+### ✨ Features
+- **Harness Darwin plan pipeline:** decomposition and hypothesis agents with plan-adversary, scouts, and structured plan brief schemas (ADR 0034).
+- **Harness plan review:** `plan-review.md` for editor review; extension load guard.
+- **Cursor Pi experts:** cursor-pi domain expert agents.
+### 🔄 CI/CD
+- **Biome:** ignore harness runtime JSON; format committed harness plan pipeline sources.
 ## [v0.10.1] — 2026-05-17
 ### 🐛 Fixes

package/biome.json CHANGED Viewed

@@ -12,7 +12,10 @@
 			"**/*.{ts,tsx,js,jsx,json,jsonc,css}",
 			"!graphify-out/**/*",
 			"!graphify-books-out/**/*",
-			"!vendor/**/*"
+			"!vendor/**/*",
+			"!.pi/harness/active-run.json",
+			"!.pi/harness/runs/**/run-context.json",
+			"!.pi/harness/runs/**/plan-packet.json"
 		]
 	},
 	"formatter": {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "ultimate-pi",
-	"version": "0.10.1",
+	"version": "0.11.0",
 	"description": "Ultimate AI coding harness for pi.dev — extensible skills, Obsidian wiki knowledge layer, compressed context, deterministic output",
 	"keywords": [
 		"pi-package",
@@ -82,7 +82,7 @@
 		"format": "biome format --write",
 		"format:check": "biome format",
 		"prepare": "lefthook install",
-		"test": "node --test test/harness-verify.test.mjs test/harness-ask-user.test.mjs test/harness-subagents-loader.test.mjs test/harness-subagents-import-path.test.mjs test/sentrux-rules-sync.test.mjs test/harness-budget-guard.test.mjs && npx -y tsx --test test/harness-vcc-settings.test.ts test/harness-plan-phase-policy.test.mjs test/harness-subagent-policy.test.mjs test/harness-turn-routing.test.mjs test/plan-approval-format.test.mjs test/plan-approval-dialog.test.mjs test/plan-approval-sync.test.mjs test/plan-create-plan.test.mjs",
+		"test": "node --test test/harness-verify.test.mjs test/harness-ask-user.test.mjs test/harness-subagents-loader.test.mjs test/harness-subagents-import-path.test.mjs test/sentrux-rules-sync.test.mjs test/harness-budget-guard.test.mjs && npx -y tsx --test test/harness-vcc-settings.test.ts test/harness-plan-phase-policy.test.mjs test/harness-subagent-policy.test.mjs test/harness-turn-routing.test.mjs test/plan-approval-format.test.mjs test/plan-approval-dialog.test.mjs test/plan-approval-sync.test.mjs test/plan-create-plan.test.mjs test/plan-review-format.test.mjs",
 		"test:vcc": "npx -y tsx --test vendor/pi-vcc/tests/*.test.ts",
 		"harness:sentrux-bootstrap": "node .pi/scripts/harness-sentrux-bootstrap.mjs",
 		"harness:sentrux-sync": "node .pi/scripts/sentrux-rules-sync.mjs --force",