ultimate-pi 0.10.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/.agents/skills/harness-decisions/SKILL.md +3 -3
  2. package/.agents/skills/harness-orchestration/SKILL.md +19 -11
  3. package/.agents/skills/harness-plan/SKILL.md +15 -9
  4. package/.pi/agents/harness/planner.md +6 -47
  5. package/.pi/agents/harness/planning/decompose.md +84 -0
  6. package/.pi/agents/harness/planning/hypothesis-eval.md +59 -0
  7. package/.pi/agents/harness/planning/hypothesis.md +90 -0
  8. package/.pi/agents/harness/planning/plan-adversary.md +50 -0
  9. package/.pi/agents/harness/planning/planner.md +20 -0
  10. package/.pi/agents/harness/planning/scout-graphify.md +48 -0
  11. package/.pi/agents/harness/planning/scout-semantic.md +42 -0
  12. package/.pi/agents/harness/planning/scout-structure.md +44 -0
  13. package/.pi/extensions/harness-ask-user.ts +5 -0
  14. package/.pi/extensions/harness-plan-approval.ts +137 -3
  15. package/.pi/extensions/harness-run-context.ts +1 -1
  16. package/.pi/extensions/harness-subagents.ts +8 -3
  17. package/.pi/extensions/harness-web-tools.ts +2 -0
  18. package/.pi/extensions/lib/extension-load-guard.ts +39 -0
  19. package/.pi/extensions/lib/harness-subagents/harness-subagent-policy.ts +33 -5
  20. package/.pi/extensions/lib/harness-subagents/parent-harness-ui-bridge.ts +2 -175
  21. package/.pi/extensions/lib/harness-subagents/parent-harness-ui-hooks.ts +18 -0
  22. package/.pi/extensions/lib/harness-subagents/spawn-policy.ts +1 -5
  23. package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +0 -18
  24. package/.pi/extensions/lib/harness-subagents/vendored/index.ts +1 -35
  25. package/.pi/extensions/lib/plan-approval/create-plan.ts +5 -0
  26. package/.pi/extensions/lib/plan-approval/plan-review.ts +393 -0
  27. package/.pi/extensions/lib/plan-approval/schema.ts +16 -1
  28. package/.pi/extensions/lib/plan-approval/types.ts +10 -0
  29. package/.pi/extensions/lib/plan-approval/validate.ts +2 -0
  30. package/.pi/extensions/policy-gate.ts +1 -1
  31. package/.pi/extensions/ultimate-pi-vcc.ts +5 -0
  32. package/.pi/harness/agents.manifest.json +114 -82
  33. package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +3 -3
  34. package/.pi/harness/docs/adrs/0033-parent-orchestrated-planning.md +34 -0
  35. package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +41 -0
  36. package/.pi/harness/docs/adrs/README.md +2 -0
  37. package/.pi/harness/specs/README.md +1 -1
  38. package/.pi/harness/specs/harness-spawn-context.schema.json +2 -1
  39. package/.pi/harness/specs/plan-adversary-brief.schema.json +45 -0
  40. package/.pi/harness/specs/plan-decomposition-brief.schema.json +108 -0
  41. package/.pi/harness/specs/plan-hypothesis-brief.schema.json +96 -0
  42. package/.pi/harness/specs/plan-hypothesis-eval.schema.json +61 -0
  43. package/.pi/lib/harness-run-context.ts +12 -0
  44. package/.pi/prompts/harness-auto.md +1 -1
  45. package/.pi/prompts/harness-plan.md +111 -28
  46. package/.pi/prompts/harness-setup.md +1 -1
  47. package/.pi/scripts/harness-resolve-up-pkg.mjs +13 -0
  48. package/CHANGELOG.md +12 -0
  49. package/biome.json +4 -1
  50. package/package.json +2 -2
@@ -114,6 +114,15 @@ export function canonicalPlanPath(runId: string, projectRoot: string): string {
114
114
  return join(harnessRunsRoot(projectRoot), runId, "plan-packet.json");
115
115
  }
116
116
 
117
+ export const PLAN_REVIEW_BASENAME = "plan-review.md";
118
+
119
+ export function canonicalPlanReviewPath(
120
+ runId: string,
121
+ projectRoot: string,
122
+ ): string {
123
+ return join(harnessRunsRoot(projectRoot), runId, PLAN_REVIEW_BASENAME);
124
+ }
125
+
117
126
  const PLAN_PACKET_BASENAME = "plan-packet.json";
118
127
 
119
128
  const MUTATING_FILE_TOOLS = new Set(["write", "edit"]);
@@ -910,6 +919,9 @@ export function formatPlanContextBlock(
910
919
  ];
911
920
  if (ctx.plan_packet_path) {
912
921
  lines.push(`plan_packet_path=${ctx.plan_packet_path}`);
922
+ lines.push(
923
+ `plan_review_path=${canonicalPlanReviewPath(ctx.run_id, ctx.project_root)}`,
924
+ );
913
925
  }
914
926
  if (ctx.task_summary) {
915
927
  lines.push(`task_summary=${ctx.task_summary}`);
@@ -18,7 +18,7 @@ If task missing:
18
18
 
19
19
  ## Orchestration (required) — same session
20
20
 
21
- 1. **Plan** — spawn `harness/planner` parse JSON present full plan `ask_user` Approve/Changes/Cancel write `plan-packet.json` only on Approve (advances phase via policy-gate).
21
+ 1. **Plan** — follow `/harness-plan` parent orchestration (parallel `harness/planning/scout-*`, `decompose`, `hypothesis`, draft PlanPacket, `ask_user` on fork, parallel `plan-adversary` + `hypothesis-eval`, parent `approve_plan` + `create_plan`). Do not spawn `harness/planner`. No second approval pass.
22
22
  2. **Execute** — spawn `harness/executor` with `HarnessSpawnContext` (`mode: execute`). Summarize handoff bullets for next spawn (do not paste full subagent log).
23
23
  3. **Eval** — spawn `harness/evaluator` (`mode: benchmark`) after parent scripts if needed.
24
24
  4. **Review** — spawn `harness/evaluator` (`mode: verdict`) OR rely on eval verdict if policy allows — prefer both when strict gates require.
@@ -1,71 +1,154 @@
1
1
  ---
2
- description: Build a strict read-only PlanPacket before any mutating work.
2
+ description: Transform a vague task into a rigorous hypothesis via decomposition + DARWIN synthesis, then a strict PlanPacket.
3
3
  argument-hint: "\"<task>\" [--risk low|med|high] [--budget <amount>] [--quick]"
4
4
  ---
5
5
 
6
6
  # harness-plan
7
7
 
8
- Orchestrator onlyspawn `harness/planner` once. The planner runs clarification (`ask_user`), approval (`approve_plan`), and persists the plan (`create_plan`). Do **not** write `plan-packet.json` in this parent session.
8
+ Parent orchestratorrun planning in **this session**. Subagents explore, decompose, hypothesize, and review; you own `ask_user`, `approve_plan`, and `create_plan`. Never `write` or `edit` `plan-packet.json` use **`create_plan`** only.
9
+
10
+ Allowed `subagent_type` values (copy exactly):
11
+
12
+ - `harness/planning/scout-graphify`
13
+ - `harness/planning/scout-structure`
14
+ - `harness/planning/scout-semantic`
15
+ - `harness/planning/decompose`
16
+ - `harness/planning/hypothesis`
17
+ - `harness/planning/plan-adversary`
18
+ - `harness/planning/hypothesis-eval`
19
+
20
+ Do **not** spawn `harness/planner` or `harness/planning/planner`.
9
21
 
10
22
  ## Step 0 — Parse arguments
11
23
 
12
24
  Read `$ARGUMENTS`:
13
25
 
14
- - task statement (required)
26
+ - task statement (required) — **THE QUESTION**
15
27
  - optional: `--risk low|med|high`, `--budget <amount>`, `--quick`
16
28
 
17
29
  If task is missing:
18
30
 
19
31
  `Usage: /harness-plan "<task>" [--risk low|med|high] [--budget <amount>] [--quick]`
20
32
 
21
- `--quick` narrows planning breadth only — it does **not** skip user approval.
33
+ `--quick` skips `harness/planning/scout-semantic` only — never skip graphify, structure, decompose, hypothesis, or approval.
22
34
 
23
35
  ## Active plan context
24
36
 
25
- Use injected context only — **do not** read `.pi/harness/specs/*.schema.json` or explore specs with bash.
37
+ Use injected context only — **do not** read `.pi/harness/specs/*.schema.json` from disk.
26
38
 
27
39
  If `[HarnessActivePlan]` is present:
28
40
 
29
- - Treat task as **revise/amend** unless `/harness-new-run` was used.
30
- - Pass `mode: revise` using the `HarnessSpawnContext` JSON in `[HarnessRunContext]`.
41
+ - Treat as **revise/amend** unless `/harness-new-run` was used.
42
+ - Set `mode: revise` in `HarnessSpawnContext` from `[HarnessRunContext]`.
43
+ - **Preserve `plan_id` and `task_id`** from the existing packet when amending.
44
+ - Scouts focus on delta vs existing `plan_packet_path`; full re-scout only if scope changed materially.
45
+
46
+ Otherwise use `HarnessSpawnContext` from `[HarnessRunContext]` with `mode: create`.
47
+
48
+ ## Phase 1 — Parallel scouts (required)
49
+
50
+ 1. Copy `HarnessSpawnContext` from `[HarnessRunContext]` (adjust `risk_level`, `quick`, `mode` from `$ARGUMENTS`).
51
+ 2. Spawn scouts with **`inherit_context: false`**. Prefer parallel: `run_in_background: true` on each `Agent` call, then `get_subagent_result` for all.
52
+
53
+ ```
54
+ Agent({ subagent_type: "harness/planning/scout-graphify", prompt: "<task + HarnessSpawnContext + scout JSON schema>", run_in_background: true })
55
+ Agent({ subagent_type: "harness/planning/scout-structure", prompt: "…", run_in_background: true })
56
+ ```
57
+
58
+ Skip `harness/planning/scout-semantic` when `--quick` or `quick: true`.
59
+
60
+ 3. Parse each scout’s fenced `json` (`lane`, `status`, `findings`, `key_paths`, `open_questions`).
61
+ 4. **Partial failure:** require successful **graphify + structure** lanes. Semantic is optional. If a required lane fails, continue with `plan_status: partial` and document gaps in `assumptions`.
62
+ 5. If JSON parse fails for a lane, summarize free-text output and add an assumption that the lane was unstructured.
63
+
64
+ ## Phase 2 — Decompose (DeepMind-style)
65
+
66
+ 1. Spawn once with merged scout JSON:
67
+
68
+ ```
69
+ Agent({ subagent_type: "harness/planning/decompose", prompt: "<HarnessSpawnContext + task + all scout lane JSON>", inherit_context: false })
70
+ ```
71
+
72
+ 2. Parse `PlanDecompositionBrief` JSON (`problem_restatement`, `core_tension`, `tensions`, `prior_art`, etc.).
73
+ 3. On parse failure: one retry with “output valid JSON only”; if still failing, abort with `plan_status: needs_clarification`.
74
+
75
+ ## Phase 3 — Hypothesis (DARWIN)
76
+
77
+ 1. Spawn once:
78
+
79
+ ```
80
+ Agent({ subagent_type: "harness/planning/hypothesis", prompt: "<HarnessSpawnContext + task + PlanDecompositionBrief + scout summaries>", inherit_context: false })
81
+ ```
82
+
83
+ 2. Parse `PlanHypothesisBrief` JSON (`primary`, `dialectical_fork`, `alternatives`, `recommended_next_steps`).
84
+ 3. **Revision cap:** at most **one** re-spawn of `hypothesis` if Phase 6 eval requests revision (see below).
85
+
86
+ ## Phase 4 — Draft PlanPacket + fork clarification (parent)
31
87
 
32
- Otherwise use `HarnessSpawnContext` from `[HarnessRunContext]` for greenfield `mode: create`.
88
+ Map hypothesis [`PlanPacket`](.pi/harness/specs/plan-packet.schema.json):
33
89
 
34
- ## Orchestration (required)
90
+ | Field | Source |
91
+ |-------|--------|
92
+ | `scope` | `problem_restatement` (narrowed) + `primary.claim` + `primary.mechanism` (implementation-ready) |
93
+ | `assumptions` | `core_tension`, `prior_art.dead_ends`, scout `open_questions`, chosen fork path (if any) |
94
+ | `acceptance_checks` | Each `primary.prediction` and `primary.experiment` as verifiable checklist items (min 1) |
95
+ | `risk_level` | From `$ARGUMENTS` or infer from fork uncertainty / blast radius |
35
96
 
36
- 1. Copy the `HarnessSpawnContext=…` JSON from `[HarnessRunContext]` into the spawn prompt (adjust `risk_level`, `quick`, `mode` from `$ARGUMENTS` if needed). Do **not** add “call ask_user for approval” in the `Agent` prompt — the planner agent instructions already define `approve_plan` / `create_plan`.
37
- 2. Spawn **once** with **`inherit_context: false`**:
97
+ Build complete draft: `plan_id`, `task_id`, `scope`, `assumptions`, `risk_level`, `acceptance_checks`, `rollback_plan` (`revert_commit_ready: true`, artifacts filled).
98
+
99
+ Call **`ask_user`** when `dialectical_fork` is material (Path A vs B materially different) **before** Phase 5 reviews.
100
+
101
+ Assemble `research_brief` for approval:
102
+
103
+ ```json
104
+ {
105
+ "decomposition": { /* PlanDecompositionBrief */ },
106
+ "hypothesis": { /* PlanHypothesisBrief */ },
107
+ "eval": null
108
+ }
109
+ ```
110
+
111
+ ## Phase 5 — Parallel reviews
112
+
113
+ Spawn in parallel (`run_in_background: true`):
38
114
 
39
115
  ```
40
- Agent({ subagent_type: "harness/planner", prompt: "<task + HarnessSpawnContext JSON + output schema>" })
116
+ Agent({ subagent_type: "harness/planning/plan-adversary", prompt: "<HarnessSpawnContext + draft PlanPacket + scout summaries + decomposition human_summary>", inherit_context: false })
117
+ Agent({ subagent_type: "harness/planning/hypothesis-eval", prompt: "<original task ONLY + PlanHypothesisBrief JSON — no decomposition, no PlanPacket>", inherit_context: false })
41
118
  ```
42
119
 
43
- 3. `get_subagent_result` — parse final JSON (`status`, `plan_packet`, `human_summary`, `clarification`) via fenced `json` block. Treat `plan_packet` in that JSON as **read-only summary context** — not input for another approval tool call.
44
- 4. If `status === "ready"` and `[HarnessRunContext]` shows `plan_ready: true` (planner called `create_plan`), confirm `plan_packet_path` exists — do **not** write the file yourself.
45
- 5. If `needs_clarification`, tell the user the planner is waiting — do **not** re-spawn; user should answer in the subagent or re-run `/harness-plan`.
46
- 6. Do **not** call `ask_user`, `approve_plan`, or `create_plan` in this parent session.
120
+ 1. Parse `PlanAdversaryBrief` — merge `mitigations` into scope, assumptions, or `acceptance_checks`.
121
+ 2. Parse `PlanHypothesisEval` set `research_brief.eval`.
122
+ 3. If `revision_recommended` or testability &lt; 70 or `relevance.passes` is false: re-spawn `hypothesis` once with eval rationale, update PlanPacket + `research_brief.hypothesis`, then re-run **hypothesis-eval** only (not adversary unless PlanPacket changed materially).
123
+
124
+ Cap: **at most 2** plan-adversary spawns and **at most 2** `approve_plan` rounds per invocation.
125
+
126
+ ## Phase 6 — Approval + persistence (parent)
47
127
 
48
- ## After subagent returns (no second approval)
128
+ 1. Call **`approve_plan`** with `plan_packet`, `human_summary` (primary claim + fork if any), and `research_brief`.
129
+ 2. On **Approve** only, call **`create_plan`** with the **same** `plan_packet`.
130
+ 3. If `create_plan` fails, tell the user to fix validation errors or run `/harness-plan-commit` after approval is recorded.
131
+ 4. Confirm `[HarnessRunContext]` `plan_ready: true` before handoff.
49
132
 
50
- User approval happens **once**, inside the planner subagent: `approve_plan` uses the parent TUI bridge. You are the orchestrator, **not** an approver.
133
+ On **Cancel** or Esc: `plan_status: needs_clarification`; do **not** call `create_plan`.
51
134
 
52
- After `get_subagent_result`:
135
+ On **Request changes**: revise draft and re-run phases 4–6 only (re-scout/decompose/hypothesis only if scope changed).
53
136
 
54
- - If `[HarnessRunContext]` shows `plan_ready: true`, or the transcript already has `harness-plan-approval` / bridged `approve_plan` with **Approve** → planning is complete. **Stop.** Summarize the plan and set `next_command: /harness-run`.
55
- - Do **not** call `approve_plan` to “confirm” using `plan_packet` from subagent JSON.
56
- - Do **not** call `ask_user` with Approve / Request changes / Cancel for the same plan.
57
- - Do **not** re-spawn the planner to “get approval again”.
137
+ ## Recovery and ownership
58
138
 
59
- If `status === "ready"` but `plan_ready` is false → planner approved but `create_plan` may have failed; tell the user to run `/harness-plan-commit` — **not** a second `approve_plan`.
139
+ - Plan only in the **owner** session (`owner_pi_session_id` on run context); otherwise `/harness-use-run`.
140
+ - `/harness-plan-commit` only after parent `approve_plan` (Approve) is in the transcript.
141
+ - If `plan_ready: true` already, stop — summarize and set `next_command: /harness-run`.
60
142
 
61
143
  ## Parent rules
62
144
 
63
- - Do not mutate project source files in the plan phase.
64
- - Do not embed `plan_id=` in prompts for policy sync.
65
- - Optional recovery: `/harness-plan-commit` only if the planner approved but `create_plan` failed.
145
+ - Do not mutate project source in plan phase.
146
+ - Subagents never call `ask_user`, `approve_plan`, or `create_plan`.
147
+ - Do not embed `plan_id=` in spawn prompts for policy sync.
66
148
 
67
149
  ## Completion
68
150
 
69
- - `plan_status`: `ready` or `needs_clarification`
151
+ - `plan_status`: `ready`, `partial`, or `needs_clarification`
70
152
  - `risk_level` used
153
+ - `plan_review_path` shown for editor review
71
154
  - `next_command`: `/harness-run` when `ready` (never `/harness-run --plan …`)
@@ -387,7 +387,7 @@ Manual override: **`/router profile auto`** anytime after reload if they changed
387
387
 
388
388
  **Slash commands are orchestrators:** `/harness-plan`, `/harness-run`, etc. spawn `harness/*` agents via the `Agent` tool — bootstrap stays **script-first**; only optionally spawn `harness/sentrux-bootstrap` for Sentrux (see Step 4.2).
389
389
 
390
- Optional per-repo overrides: place `.md` files at the **same relative path** (e.g. `.pi/agents/harness/planner.md` overrides the package planner).
390
+ Optional per-repo overrides: place `.md` files at the **same relative path** (e.g. `.pi/agents/harness/planning/scout-graphify.md` overrides the package scout). Deprecated: `harness/planner.md` — use `harness/planning/` agents instead.
391
391
 
392
392
  Verify manifest drift after `pi update ultimate-pi`:
393
393
 
@@ -30,7 +30,20 @@ function hasHarnessScripts(root) {
30
30
  return existsSync(join(root, ".pi", "scripts", "harness-cli-verify.sh"));
31
31
  }
32
32
 
33
+ function isSourceCheckout(root) {
34
+ try {
35
+ const pkg = requireFromCwd.resolve("./package.json");
36
+ return dirname(pkg) === root;
37
+ } catch {
38
+ return false;
39
+ }
40
+ }
41
+
33
42
  function tryResolveUltimatePi() {
43
+ if (hasHarnessScripts(process.cwd()) && isSourceCheckout(process.cwd())) {
44
+ return process.cwd();
45
+ }
46
+
34
47
  if (process.env.ULTIMATE_PI_PKG) {
35
48
  const envRoot = process.env.ULTIMATE_PI_PKG;
36
49
  if (hasHarnessScripts(envRoot)) return envRoot;
package/CHANGELOG.md CHANGED
@@ -4,6 +4,18 @@ All notable changes to this project are documented in this file.
4
4
 
5
5
  ## [Unreleased]
6
6
 
7
+ ## [v0.11.0] — 2026-05-17
8
+
9
+ ### ✨ Features
10
+
11
+ - **Harness Darwin plan pipeline:** decomposition and hypothesis agents with plan-adversary, scouts, and structured plan brief schemas (ADR 0034).
12
+ - **Harness plan review:** `plan-review.md` for editor review; extension load guard.
13
+ - **Cursor Pi experts:** cursor-pi domain expert agents.
14
+
15
+ ### 🔄 CI/CD
16
+
17
+ - **Biome:** ignore harness runtime JSON; format committed harness plan pipeline sources.
18
+
7
19
  ## [v0.10.1] — 2026-05-17
8
20
 
9
21
  ### 🐛 Fixes
package/biome.json CHANGED
@@ -12,7 +12,10 @@
12
12
  "**/*.{ts,tsx,js,jsx,json,jsonc,css}",
13
13
  "!graphify-out/**/*",
14
14
  "!graphify-books-out/**/*",
15
- "!vendor/**/*"
15
+ "!vendor/**/*",
16
+ "!.pi/harness/active-run.json",
17
+ "!.pi/harness/runs/**/run-context.json",
18
+ "!.pi/harness/runs/**/plan-packet.json"
16
19
  ]
17
20
  },
18
21
  "formatter": {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ultimate-pi",
3
- "version": "0.10.1",
3
+ "version": "0.11.0",
4
4
  "description": "Ultimate AI coding harness for pi.dev — extensible skills, Obsidian wiki knowledge layer, compressed context, deterministic output",
5
5
  "keywords": [
6
6
  "pi-package",
@@ -82,7 +82,7 @@
82
82
  "format": "biome format --write",
83
83
  "format:check": "biome format",
84
84
  "prepare": "lefthook install",
85
- "test": "node --test test/harness-verify.test.mjs test/harness-ask-user.test.mjs test/harness-subagents-loader.test.mjs test/harness-subagents-import-path.test.mjs test/sentrux-rules-sync.test.mjs test/harness-budget-guard.test.mjs && npx -y tsx --test test/harness-vcc-settings.test.ts test/harness-plan-phase-policy.test.mjs test/harness-subagent-policy.test.mjs test/harness-turn-routing.test.mjs test/plan-approval-format.test.mjs test/plan-approval-dialog.test.mjs test/plan-approval-sync.test.mjs test/plan-create-plan.test.mjs",
85
+ "test": "node --test test/harness-verify.test.mjs test/harness-ask-user.test.mjs test/harness-subagents-loader.test.mjs test/harness-subagents-import-path.test.mjs test/sentrux-rules-sync.test.mjs test/harness-budget-guard.test.mjs && npx -y tsx --test test/harness-vcc-settings.test.ts test/harness-plan-phase-policy.test.mjs test/harness-subagent-policy.test.mjs test/harness-turn-routing.test.mjs test/plan-approval-format.test.mjs test/plan-approval-dialog.test.mjs test/plan-approval-sync.test.mjs test/plan-create-plan.test.mjs test/plan-review-format.test.mjs",
86
86
  "test:vcc": "npx -y tsx --test vendor/pi-vcc/tests/*.test.ts",
87
87
  "harness:sentrux-bootstrap": "node .pi/scripts/harness-sentrux-bootstrap.mjs",
88
88
  "harness:sentrux-sync": "node .pi/scripts/sentrux-rules-sync.mjs --force",