ultimate-pi 0.10.1 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/.agents/skills/harness-debate-plan/SKILL.md +44 -0
  2. package/.agents/skills/harness-decisions/SKILL.md +3 -3
  3. package/.agents/skills/harness-orchestration/SKILL.md +59 -25
  4. package/.agents/skills/harness-plan/SKILL.md +16 -15
  5. package/.pi/agents/harness/adversary.md +0 -1
  6. package/.pi/agents/harness/evaluator.md +0 -1
  7. package/.pi/agents/harness/executor.md +1 -2
  8. package/.pi/agents/harness/incident-recorder.md +0 -1
  9. package/.pi/agents/harness/meta-optimizer.md +0 -1
  10. package/.pi/agents/harness/planning/decompose.md +83 -0
  11. package/.pi/agents/harness/planning/execution-plan-author.md +30 -0
  12. package/.pi/agents/harness/planning/hypothesis-validator.md +23 -0
  13. package/.pi/agents/harness/planning/hypothesis.md +89 -0
  14. package/.pi/agents/harness/planning/plan-adversary.md +18 -0
  15. package/.pi/agents/harness/planning/plan-evaluator.md +18 -0
  16. package/.pi/agents/harness/planning/review-integrator.md +23 -0
  17. package/.pi/agents/harness/planning/scout-graphify.md +54 -0
  18. package/.pi/agents/harness/planning/scout-semantic.md +47 -0
  19. package/.pi/agents/harness/planning/scout-structure.md +50 -0
  20. package/.pi/agents/harness/planning/sprint-contract-auditor.md +18 -0
  21. package/.pi/agents/harness/planning/stack-researcher.md +24 -0
  22. package/.pi/agents/harness/tie-breaker.md +0 -1
  23. package/.pi/agents/harness/trace-librarian.md +0 -1
  24. package/.pi/extensions/debate-orchestrator.ts +90 -53
  25. package/.pi/extensions/harness-ask-user.ts +5 -0
  26. package/.pi/extensions/harness-plan-approval.ts +137 -3
  27. package/.pi/extensions/harness-run-context.ts +146 -6
  28. package/.pi/extensions/harness-subagents.ts +10 -5
  29. package/.pi/extensions/harness-web-tools.ts +2 -0
  30. package/.pi/extensions/lib/extension-load-guard.ts +39 -0
  31. package/.pi/extensions/lib/harness-posthog.ts +6 -1
  32. package/.pi/extensions/lib/harness-spawn-budget.ts +75 -0
  33. package/.pi/extensions/lib/harness-subagent-auth.ts +123 -0
  34. package/.pi/extensions/lib/{harness-subagents/harness-subagent-policy.ts → harness-subagent-policy.ts} +34 -9
  35. package/.pi/extensions/lib/harness-subagent-precheck.ts +95 -0
  36. package/.pi/extensions/lib/harness-subagents-bridge.ts +176 -0
  37. package/.pi/extensions/lib/plan-approval/create-plan.ts +9 -7
  38. package/.pi/extensions/lib/plan-approval/plan-review.ts +393 -0
  39. package/.pi/extensions/lib/plan-approval/schema.ts +16 -1
  40. package/.pi/extensions/lib/plan-approval/types.ts +16 -0
  41. package/.pi/extensions/lib/plan-approval/validate.ts +2 -0
  42. package/.pi/extensions/lib/plan-debate-envelope.ts +84 -0
  43. package/.pi/extensions/lib/{harness-subagents/spawn-policy.ts → spawn-policy.ts} +2 -5
  44. package/.pi/extensions/policy-gate.ts +1 -1
  45. package/.pi/extensions/review-integrity.ts +48 -29
  46. package/.pi/extensions/ultimate-pi-vcc.ts +5 -0
  47. package/.pi/harness/agents.manifest.json +126 -82
  48. package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +7 -6
  49. package/.pi/harness/docs/adrs/0033-parent-orchestrated-planning.md +34 -0
  50. package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +41 -0
  51. package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +27 -0
  52. package/.pi/harness/docs/adrs/README.md +2 -0
  53. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r1.yaml +25 -0
  54. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml +26 -0
  55. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/sprint-audit-r4.yaml +5 -0
  56. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-packet.yaml +196 -0
  57. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-review.md +14 -0
  58. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +32 -0
  59. package/.pi/harness/evals/smoke/run-context.fixture.json +1 -1
  60. package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +88 -0
  61. package/.pi/harness/specs/README.md +1 -1
  62. package/.pi/harness/specs/harness-posthog-event.schema.json +6 -1
  63. package/.pi/harness/specs/harness-spawn-context.schema.json +2 -1
  64. package/.pi/harness/specs/plan-adversary-brief.schema.json +45 -0
  65. package/.pi/harness/specs/plan-decomposition-brief.schema.json +108 -0
  66. package/.pi/harness/specs/plan-execution-plan-brief.schema.json +13 -0
  67. package/.pi/harness/specs/plan-execution-plan.schema.json +255 -0
  68. package/.pi/harness/specs/plan-hypothesis-brief.schema.json +96 -0
  69. package/.pi/harness/specs/plan-hypothesis-eval.schema.json +61 -0
  70. package/.pi/harness/specs/plan-packet.schema.json +14 -5
  71. package/.pi/harness/specs/plan-review-round-draft.schema.json +68 -0
  72. package/.pi/harness/specs/plan-sprint-audit-turn.schema.json +29 -0
  73. package/.pi/harness/specs/plan-stack-brief.schema.json +65 -0
  74. package/.pi/harness/specs/plan-validation-turn.schema.json +42 -0
  75. package/.pi/harness/specs/round-result.schema.json +16 -9
  76. package/.pi/lib/debate-orchestrator-types.ts +38 -0
  77. package/.pi/lib/harness-agent-discovery.mjs +81 -0
  78. package/.pi/lib/harness-run-context.ts +76 -38
  79. package/.pi/lib/harness-yaml.mjs +73 -0
  80. package/.pi/lib/harness-yaml.ts +90 -0
  81. package/.pi/prompts/harness-auto.md +13 -11
  82. package/.pi/prompts/harness-critic.md +2 -2
  83. package/.pi/prompts/harness-eval.md +3 -3
  84. package/.pi/prompts/harness-incident.md +2 -2
  85. package/.pi/prompts/harness-plan.md +106 -37
  86. package/.pi/prompts/harness-review.md +2 -2
  87. package/.pi/prompts/harness-router-tune.md +1 -1
  88. package/.pi/prompts/harness-run.md +2 -2
  89. package/.pi/prompts/harness-setup.md +15 -6
  90. package/.pi/prompts/harness-trace.md +2 -2
  91. package/.pi/scripts/harness-agents-manifest.mjs +1 -1
  92. package/.pi/scripts/harness-resolve-up-pkg.mjs +13 -0
  93. package/.pi/scripts/harness-verify.mjs +28 -19
  94. package/.pi/scripts/validate-plan-dag.mjs +258 -0
  95. package/.pi/scripts/vendor-sync-pi-subagents.sh +19 -0
  96. package/CHANGELOG.md +24 -0
  97. package/THIRD_PARTY_NOTICES.md +8 -0
  98. package/biome.json +4 -1
  99. package/package.json +6 -4
  100. package/.pi/agents/harness/planner.md +0 -54
  101. package/.pi/extensions/lib/harness-subagents/agent-loader.ts +0 -126
  102. package/.pi/extensions/lib/harness-subagents/agent-manifest.ts +0 -119
  103. package/.pi/extensions/lib/harness-subagents/agent-parser.ts +0 -87
  104. package/.pi/extensions/lib/harness-subagents/blackboard-tool.ts +0 -118
  105. package/.pi/extensions/lib/harness-subagents/blackboard.ts +0 -175
  106. package/.pi/extensions/lib/harness-subagents/parent-ask-user-bridge.ts +0 -10
  107. package/.pi/extensions/lib/harness-subagents/parent-harness-ui-bridge.ts +0 -310
  108. package/.pi/extensions/lib/harness-subagents/parent-harness-ui-hooks.ts +0 -59
  109. package/.pi/extensions/lib/harness-subagents/types-blackboard.ts +0 -27
  110. package/.pi/extensions/lib/harness-subagents/vendored/agent-manager.ts +0 -558
  111. package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +0 -684
  112. package/.pi/extensions/lib/harness-subagents/vendored/agent-types.ts +0 -175
  113. package/.pi/extensions/lib/harness-subagents/vendored/context.ts +0 -59
  114. package/.pi/extensions/lib/harness-subagents/vendored/cross-extension-rpc.ts +0 -134
  115. package/.pi/extensions/lib/harness-subagents/vendored/custom-agents.ts +0 -5
  116. package/.pi/extensions/lib/harness-subagents/vendored/default-agents.ts +0 -123
  117. package/.pi/extensions/lib/harness-subagents/vendored/env.ts +0 -43
  118. package/.pi/extensions/lib/harness-subagents/vendored/group-join.ts +0 -144
  119. package/.pi/extensions/lib/harness-subagents/vendored/index.ts +0 -2494
  120. package/.pi/extensions/lib/harness-subagents/vendored/invocation-config.ts +0 -52
  121. package/.pi/extensions/lib/harness-subagents/vendored/memory.ts +0 -182
  122. package/.pi/extensions/lib/harness-subagents/vendored/model-resolver.ts +0 -92
  123. package/.pi/extensions/lib/harness-subagents/vendored/output-file.ts +0 -115
  124. package/.pi/extensions/lib/harness-subagents/vendored/prompts.ts +0 -103
  125. package/.pi/extensions/lib/harness-subagents/vendored/schedule-store.ts +0 -177
  126. package/.pi/extensions/lib/harness-subagents/vendored/schedule.ts +0 -416
  127. package/.pi/extensions/lib/harness-subagents/vendored/settings.ts +0 -210
  128. package/.pi/extensions/lib/harness-subagents/vendored/skill-loader.ts +0 -108
  129. package/.pi/extensions/lib/harness-subagents/vendored/types.ts +0 -187
  130. package/.pi/extensions/lib/harness-subagents/vendored/ui/agent-widget.ts +0 -639
  131. package/.pi/extensions/lib/harness-subagents/vendored/ui/conversation-viewer.ts +0 -324
  132. package/.pi/extensions/lib/harness-subagents/vendored/ui/schedule-menu.ts +0 -110
  133. package/.pi/extensions/lib/harness-subagents/vendored/usage.ts +0 -71
  134. package/.pi/extensions/lib/harness-subagents/vendored/worktree.ts +0 -195
  135. /package/.pi/extensions/{00-ultimate-pi-system-prompt.ts → custom-system-prompt.ts} +0 -0
@@ -0,0 +1,44 @@
1
+ ---
2
+ name: harness-debate-plan
3
+ description: Plan-phase Review Gate debate — assemble rounds, token caps, bus envelopes for parent orchestrator.
4
+ ---
5
+
6
+ # harness-debate-plan
7
+
8
+ Use when running **Phase 5** of `/harness-plan` — four Review Gate rounds on the plan debate bus.
9
+
10
+ ## Open
11
+
12
+ ```
13
+ /harness-debate-open plan-<run_id>
14
+ ```
15
+
16
+ Budget profile **plan**: `max_rounds=4`, `round_token_cap=2000`, `debate_global_cap=12000`.
17
+
18
+ ## Per-round spawn order
19
+
20
+ 1. Round-specific extras (R1: `hypothesis-validator` first, blind)
21
+ 2. `plan-evaluator`
22
+ 3. `plan-adversary`
23
+ 4. R4: `sprint-contract-auditor` (required)
24
+ 5. `review-integrator`
25
+
26
+ ## Artifacts (YAML)
27
+
28
+ | Agent | Output path |
29
+ |-------|-------------|
30
+ | hypothesis-validator | `artifacts/hypothesis-validation-r{N}.yaml` |
31
+ | plan-evaluator | `artifacts/validation-turn-r{N}.yaml` |
32
+ | plan-adversary | `artifacts/adversary-brief-r{N}.yaml` |
33
+ | sprint-contract-auditor | `artifacts/sprint-audit-r{N}.yaml` |
34
+ | review-integrator | `artifacts/review-round-r{N}.yaml` |
35
+
36
+ ## Bus envelope
37
+
38
+ Load `review-round-r{N}.yaml`, validate, then `buildPlanReviewRoundEnvelope` (`.pi/extensions/lib/plan-debate-envelope.ts`) → `/harness-debate-round '<json>'`.
39
+
40
+ Plan participants only. `StackResearchAgent` uses `artifacts/stack.yaml` claims — no spawn.
41
+
42
+ ## Close
43
+
44
+ After round 4: `/harness-debate-consensus`. Do not `approve_plan` on `policy_decision: block`.
@@ -36,7 +36,7 @@ description: Structured user decisions via ask_user for harness setup, planning,
36
36
 
37
37
  ## Example (plan — approval gate)
38
38
 
39
- `harness/planner` calls **`approve_plan`** with the full `plan_packet` (parent TUI: scrollable plan + Approve / Request changes / Cancel), then **`create_plan`** with the same packet after Approve. Do not use `ask_user` for final approval or `write`/`edit` for the plan file.
39
+ Parent orchestrator calls **`approve_plan`** with the full `plan_packet` (scrollable plan + Approve / Request changes / Cancel), then **`create_plan`** with the same packet after Approve.
40
40
 
41
41
  ```json
42
42
  {
@@ -70,6 +70,6 @@ description: Structured user decisions via ask_user for harness setup, planning,
70
70
 
71
71
  ## Who calls what
72
72
 
73
- - `harness/planner` — `ask_user` for clarification; **`approve_plan`** then **`create_plan`** for the plan file (`write`/`edit` blocked).
73
+ - **Parent orchestrator** during `/harness-plan` — `ask_user` for clarification; **`approve_plan`** then **`create_plan`** for the plan file.
74
+ - `harness/planning/*` (scouts, decompose, hypothesis, hypothesis-eval) — JSON only; no `ask_user` / `approve_plan` / `create_plan`.
74
75
  - `harness/evaluator`, `harness/adversary`, and `harness/tie-breaker` — emit `human_required`; the **parent orchestrator** calls `ask_user`.
75
- - Parent orchestrator during `/harness-plan` — must **not** call `ask_user`, `approve_plan`, or `create_plan` (planner owns the full plan lifecycle).
@@ -1,60 +1,94 @@
1
1
  ---
2
2
  name: harness-orchestration
3
3
  description: >-
4
- Orchestrate ultimate-pi harness phases with Agent spawns, blackboard handoffs,
5
- and observation-bus artifacts. Use for plan/execute/evaluate pipelines, L4
6
- verification, parallel scouts, and debate prep.
4
+ Orchestrate ultimate-pi harness phases with the native `subagent` tool
5
+ (isolated `pi --mode json` subprocesses). Use for plan/execute/evaluate
6
+ pipelines, L4 verification, parallel scouts, and debate prep.
7
7
  ---
8
8
 
9
9
  # Harness orchestration
10
10
 
11
11
  ## Slash commands = orchestrators
12
12
 
13
- `/harness-*` prompts parse args, spawn agents, run `ask_user`, write policy-gated artifacts. Phase logic lives in `.pi/agents/harness/*.md`.
13
+ `/harness-*` prompts parse args, call `subagent`, run `ask_user`, write policy-gated artifacts. Phase logic lives in `.pi/agents/harness/*.md` and `.pi/agents/harness/planning/*.md`.
14
14
 
15
- Every spawn includes **HarnessSpawnContext** JSON (subagents do not get `[HarnessActivePlan]` injection). Use `inherit_context: false`.
15
+ Every spawn includes **HarnessSpawnContext** JSON in the task text (subprocess agents do not get `[HarnessActivePlan]` injection). Use `agentScope: "both"` so package agents under `$UP_PKG/.pi/agents/**` resolve.
16
+
17
+ ## Subprocess telemetry
18
+
19
+ Harness bridge emits `harness_subagent_spawned` / `harness_subagent_completed` (replaces in-process setup/blackboard events).
20
+
21
+ ```sql
22
+ SELECT
23
+ properties.agent as agent,
24
+ count() as n,
25
+ round(avg(toFloat(properties.duration_ms)), 0) as avg_ms
26
+ FROM events
27
+ WHERE event = 'harness_subagent_completed'
28
+ AND timestamp >= now() - INTERVAL 7 DAY
29
+ GROUP BY agent
30
+ ORDER BY avg_ms DESC
31
+ LIMIT 30
32
+ ```
33
+
34
+ ## Latency rules
35
+
36
+ 1. **Parallel `tasks`** — one `subagent({ tasks: [...] })` for scouts, decompose+hypothesis, or review fan-in; subprocesses run in parallel upstream.
37
+ 2. **Blocking calls** — each `subagent` returns when the subprocess exits; no `get_subagent_result` polling.
38
+ 3. **Compact handoffs** — pass scout/decompose JSON only; never paste full subprocess message logs into the next spawn.
39
+ 4. **Spawn caps** — bridge enforces **8** active + **12** total harness spawns per session (`PI_SUBAGENT_TIMEOUT_MS` / per-task `timeoutMs` for backstop).
16
40
 
17
41
  ## Command → agent
18
42
 
19
- | Command | `subagent_type` |
20
- |---------|-----------------|
21
- | `/harness-plan` | `harness/planner` |
43
+ | Command | `agent` |
44
+ |---------|---------|
45
+ | `/harness-plan` | Parent: parallel `harness/planning/scout-*` → parallel `decompose`+`hypothesis` → PlanPacket → reviews; `approve_plan` + `create_plan` |
22
46
  | `/harness-run` | `harness/executor` |
23
47
  | `/harness-eval` | `harness/evaluator` (`mode: benchmark`) |
24
48
  | `/harness-review` | `harness/evaluator` (`mode: verdict`) |
25
- | `/harness-critic` | `harness/adversary` |
49
+ | `/harness-critic` | `harness/adversary` (post-run) |
26
50
  | `/harness-trace` | `harness/trace-librarian` |
27
51
  | `/harness-incident` | `harness/incident-recorder` |
28
52
  | `/harness-router-tune` | `harness/meta-optimizer` (optional) |
29
- | `/harness-auto` | sequential spawns above |
53
+ | `/harness-auto` | plan per `/harness-plan`; `--quick` skips adversary + tie-breaker |
30
54
 
31
55
  ## Review isolation
32
56
 
33
- Spawn `harness/evaluator` / `harness/adversary` in the **same** parent session isolated subagent context replaces session fork (ADR 0032).
57
+ Spawn `harness/evaluator` / `harness/adversary` via `subagent` in the **same** parent session. `review-integrity` allows `subagent` when `agent` is in the review set; blocks executor from spawning review agents during evaluate.
34
58
 
35
59
  ## ask_user policy
36
60
 
37
- | Agent | `ask_user` |
38
- |-------|------------|
39
- | Parent orchestrator | Yes (approval, clarification, router tune) |
40
- | `harness/planner` | No — returns `clarification` in JSON |
41
- | `harness/evaluator`, `harness/adversary`, `harness/tie-breaker` | No — `human_required` in output |
61
+ | Role | `ask_user` |
62
+ |------|------------|
63
+ | Parent orchestrator | Yes (plan clarification, `approve_plan`, router tune) |
64
+ | `harness/planning/*` | No — JSON only (`human_required` in output if stuck) |
65
+ | `harness/evaluator`, `harness/adversary`, `harness/tie-breaker` | `human_required` in subprocess JSON |
42
66
  | `harness/executor` | No — parent handles governance |
43
67
 
44
- ## Spawn pattern
45
-
46
- ```
47
- Agent({ subagent_type: "harness/planner", prompt: "<task + HarnessSpawnContext JSON>" })
48
- get_subagent_result
68
+ ## Spawn pattern (`/harness-plan`)
69
+
70
+ ```json
71
+ {
72
+ "agentScope": "both",
73
+ "tasks": [
74
+ { "agent": "harness/planning/scout-graphify", "task": "…", "timeoutMs": 90000 },
75
+ { "agent": "harness/planning/scout-structure", "task": "…", "timeoutMs": 90000 },
76
+ { "agent": "harness/planning/scout-semantic", "task": "…", "timeoutMs": 90000 }
77
+ ]
78
+ }
49
79
  ```
50
80
 
81
+ Then parallel decompose + hypothesis, parent PlanPacket + `ask_user`, debate rounds via `subagent` or `debate-orchestrator`, then `approve_plan` + `create_plan`.
82
+
83
+ Scouts use **Haiku**, `thinking: low`, **8** max turns (see agent frontmatter). Effective `--tools` omits `grep`/`find`/`subagent` per `disallowed_tools`.
84
+
51
85
  ## Tools
52
86
 
53
- - `Agent`, `get_subagent_result`, `steer_subagent`
54
- - `blackboard` — parent only
55
- - Subagents cannot nest spawns
87
+ - `subagent` — harness subprocess spawns (modes: `single`, `tasks`, `chain`, `aggregator`)
88
+ - `approve_plan`, `create_plan` — parent orchestrator only
89
+ - Subprocess agents cannot nest `subagent` (`subagent` stripped from child `--tools`)
56
90
 
57
91
  ## References
58
92
 
59
- - ADR 0032, `.pi/harness/specs/harness-spawn-context.schema.json`
93
+ - ADR 0032, ADR 0033, `.pi/harness/specs/harness-spawn-context.schema.json`
60
94
  - `node "$UP_PKG/.pi/scripts/harness-agents-manifest.mjs" --check`
@@ -1,31 +1,32 @@
1
1
  ---
2
2
  name: harness-plan
3
- description: Produce PlanPacket-aligned harness plans before execute phase. Use with /harness-plan, harness-auto plan phase, or when policy-gate requires an approved plan.
3
+ description: PM-grade harness plans scouts, ExecutionPlan, DAG validation, 4-round Review Gate debate, then approve/create_plan.
4
4
  ---
5
5
 
6
6
  # harness-plan
7
7
 
8
8
  ## When to use
9
9
 
10
- - User invokes `/harness-plan` or harness-auto planning phase
11
- - Policy gate blocks mutate tools without approved plan
12
- - Drift monitor requests replan (`harness-drift-replan`)
13
- - User replies with clarification after `needs_clarification`
10
+ - `/harness-plan`, harness-auto plan phase, drift replan, policy-gate without approved plan
14
11
 
15
- ## Workflow (orchestrator)
12
+ ## Workflow (parent orchestrator)
16
13
 
17
- 1. Use `HarnessSpawnContext` from injected `[HarnessRunContext]` do not read spec files from disk.
18
- 2. Spawn `harness/planner` **once** with that JSON in the prompt (`inherit_context: false`).
19
- 3. Parse planner JSON from `get_subagent_result` (`status`, `plan_packet`, `clarification`).
20
- 4. Do **not** parent `ask_user` / `approve_plan` / `create_plan` or re-spawn — planner uses those tools in the subagent (bridged UI + `create_plan` write).
21
- 5. Parent checks `plan_ready` on `harness-run-context` after planner returns — **does not** write `plan-packet.json`.
14
+ 1. Parallel scouts (graphify + structure; semantic unless `--quick`).
15
+ 2. Parallel decompose + hypothesis write `artifacts/*.yaml`.
16
+ 3. Draft `PlanPacket` (`contract_version: "1.1.0"`) + `ask_user` on material fork.
17
+ 4. `stack-researcher` `execution-plan-author` merge `execution_plan`.
18
+ 5. **`validate-plan-dag.mjs`** on `plan-packet.yaml` (must pass).
19
+ 6. **Review Gate:** `/harness-debate-open plan-<run_id>` → 4 rounds (see **harness-debate-plan** skill) → consensus.
20
+ 7. Apply patches, re-validate DAG, `approve_plan`, `create_plan`.
21
+
22
+ `--quick` skips semantic scout and post-run adversary only — **not** plan debate.
22
23
 
23
24
  ## Rules
24
25
 
25
- - `harness/planner` owns clarification (`ask_user`), approval (`approve_plan`), and persistence (`create_plan` — only path to `plan-packet.json`; `write`/`edit` blocked).
26
- - Never plan or mutate source inline in the slash-command session.
27
- - context-mode only on harness paths; never lean-ctx.
26
+ - On-disk plan artifacts are **YAML** (`plan-packet.yaml`, `research-brief.yaml`).
27
+ - Subagents read-only; parent writes run artifacts and calls `approve_plan` / `create_plan`.
28
+ - context-mode only on harness paths.
28
29
 
29
30
  ## Output
30
31
 
31
- - `plan_status`, `risk_level`, `next_command`: `/harness-run` when ready
32
+ `plan_status`, `plan_review_path`, `next_command: /harness-run` when ready.
@@ -5,7 +5,6 @@ extensions: false
5
5
  disallowed_tools: ask_user
6
6
  thinking: high
7
7
  max_turns: 20
8
- inherit_context: false
9
8
  ---
10
9
 
11
10
  You are the Harness Adversary.
@@ -5,7 +5,6 @@ extensions: false
5
5
  disallowed_tools: ask_user
6
6
  thinking: high
7
7
  max_turns: 20
8
- inherit_context: false
9
8
  ---
10
9
 
11
10
  You are the Harness Evaluator.
@@ -4,8 +4,7 @@ tools: read, write, edit, bash, grep, find, ls
4
4
  extensions: true
5
5
  disallowed_tools: ask_user
6
6
  thinking: medium
7
- max_turns: 30
8
- inherit_context: false
7
+ max_turns: 20
9
8
  ---
10
9
 
11
10
  You are the Harness Executor.
@@ -4,7 +4,6 @@ tools: read, grep, find, ls
4
4
  extensions: false
5
5
  thinking: medium
6
6
  max_turns: 15
7
- inherit_context: false
8
7
  ---
9
8
 
10
9
  You are the Harness Incident Recorder.
@@ -5,7 +5,6 @@ extensions: false
5
5
  disallowed_tools: ask_user
6
6
  thinking: high
7
7
  max_turns: 25
8
- inherit_context: false
9
8
  ---
10
9
 
11
10
  You are the Harness Meta Optimizer.
@@ -0,0 +1,83 @@
1
+ ---
2
+ description: Plan-phase DeepMind-style problem decomposition (read-only).
3
+ tools: read, grep, find, ls, bash
4
+ disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent
5
+ extensions: false
6
+ thinking: medium
7
+ max_turns: 12
8
+ ---
9
+
10
+ You are the **Harness planning decomposer (Phase 1)**.
11
+
12
+ ## Mission
13
+
14
+ Rigorously decompose the task space before hypothesis generation. You do **not** build the PlanPacket, approve plans, or mutate anything.
15
+
16
+ ## Spawn context
17
+
18
+ Read `HarnessSpawnContext` and the merged **scout lane JSON** in the spawn prompt (`task_summary`, `mode`, `risk_level`, `quick`). For `mode: revise`, bias toward delta vs existing plan at `plan_packet_path`.
19
+
20
+ ## Process
21
+
22
+ 1. Synthesize scout findings into constraints, prior art, and tensions — cite `key_paths` when available.
23
+ 2. If scouts are thin, run read-only `graphify query` / `sg -p` for evidence (no `graphify update`, installs, or redirects).
24
+ 3. Do not read `.pi/harness/specs/*.schema.json` from disk.
25
+
26
+ ## Phase 1 — DeepMind-style decomposition
27
+
28
+ Work through these sections in your reasoning, then compress into JSON:
29
+
30
+ ### 1.1 Problem clarification
31
+
32
+ - Restate the question in precise terms. What would "solving" this look like?
33
+ - Classify problem type(s): optimization, discovery, explanation, design, selection.
34
+ - Narrow scope if too broad; name what you exclude and why.
35
+
36
+ ### 1.2 Constraints and desiderata
37
+
38
+ - Hard constraints (must satisfy)
39
+ - Soft constraints (trade-offs allowed)
40
+ - Success metrics (how to measure progress)
41
+
42
+ ### 1.3 Prior art and known approaches
43
+
44
+ - Current best approach (methods, systems, paths in repo)
45
+ - Why it is not good enough (gap)
46
+ - What has been tried and failed (dead ends)
47
+
48
+ ### 1.4 Surface the tensions
49
+
50
+ Identify contradictions, tradeoffs, or competing beliefs. Pick the **core tension** — one paragraph that feeds Phase 2 hypothesis generation.
51
+
52
+ ## Output (required JSON block)
53
+
54
+ End with one fenced `json` block matching `PlanDecompositionBrief` (`.pi/harness/specs/plan-decomposition-brief.schema.json`):
55
+
56
+ ```json
57
+ {
58
+ "schema_version": "1.0.0",
59
+ "problem_restatement": "…",
60
+ "problem_types": ["design"],
61
+ "scope": {
62
+ "narrowed_focus": "…",
63
+ "excluded": ["…"]
64
+ },
65
+ "hard_constraints": ["…"],
66
+ "soft_constraints": ["…"],
67
+ "success_metrics": ["…"],
68
+ "prior_art": {
69
+ "best_approach": "…",
70
+ "gap": "…",
71
+ "dead_ends": ["…"]
72
+ },
73
+ "tensions": [
74
+ {
75
+ "claim_a": "…",
76
+ "claim_b": "…",
77
+ "why_matters": "…"
78
+ }
79
+ ],
80
+ "core_tension": "…",
81
+ "human_summary": "…"
82
+ }
83
+ ```
@@ -0,0 +1,30 @@
1
+ ---
2
+ description: Plan-phase ExecutionPlan generator (PM-grade WBS + DAG).
3
+ tools: read, grep, find, ls
4
+ disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
5
+ extensions: false
6
+ thinking: high
7
+ max_turns: 16
8
+ ---
9
+
10
+ You are **execution-plan-author** — produce a complete `execution_plan` a senior EM would sign off.
11
+
12
+ ## Inputs
13
+
14
+ Task, `PlanDecompositionBrief`, `PlanHypothesisBrief`, draft scope/acceptance_checks, `PlanStackBrief`, scout summaries.
15
+
16
+ ## Workflow
17
+
18
+ 1. Vision check — scope ≤15 lines, testable outcomes.
19
+ 2. Phases with objective, entry/exit criteria, milestone, work_item_ids.
20
+ 3. WBS — every AC maps to ≥1 work_item; deliverable-sized items.
21
+ 4. `depends_on` DAG; `parallel_safe` only when files disjoint.
22
+ 5. `schedule_metadata.critical_path_work_item_ids`.
23
+ 6. `wbs_dictionary`, `risk_register` (≥3 risks for med/high).
24
+ 7. `sprint_contract` complete.
25
+ 8. Early-phase verify/lint/test work items when risk ≥ med.
26
+ 9. Typed `done_criteria` per work item.
27
+
28
+ ## Output
29
+
30
+ Valid **YAML only** — `PlanExecutionPlanBrief` with `execution_plan` (`.pi/harness/specs/plan-execution-plan-brief.schema.json`). Parent merges into `plan-packet.yaml`.
@@ -0,0 +1,23 @@
1
+ ---
2
+ description: Plan-phase blind hypothesis validation (debate R1 only).
3
+ tools: read, grep, find, ls
4
+ disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
5
+ extensions: false
6
+ thinking: medium
7
+ max_turns: 10
8
+ ---
9
+
10
+ You are **hypothesis-validator** — blind self-evaluation of `PlanHypothesisBrief` only.
11
+
12
+ ## Input (strict)
13
+
14
+ - Original task statement
15
+ - `PlanHypothesisBrief` YAML/JSON
16
+
17
+ Ignore decomposition, scouts, PlanPacket, adversary output.
18
+
19
+ ## Output
20
+
21
+ Valid **YAML only** matching `PlanHypothesisEval` (`.pi/harness/specs/plan-hypothesis-eval.schema.json`). Parent writes `artifacts/hypothesis-validation-r{N}.yaml`.
22
+
23
+ Bus label: `HypothesisValidatorsubagent`.
@@ -0,0 +1,89 @@
1
+ ---
2
+ description: Plan-phase DARWIN hypothesis generation (read-only).
3
+ tools: read, grep, find, ls, bash
4
+ disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent
5
+ extensions: false
6
+ thinking: medium
7
+ max_turns: 14
8
+ ---
9
+
10
+ You are the **Harness planning hypothesis generator (Phase 2 — DARWIN)**.
11
+
12
+ ## Mission
13
+
14
+ Generate a falsifiable hypothesis that resolves the **core tension** from decomposition. You do **not** self-evaluate, build PlanPacket, or mutate anything.
15
+
16
+ ## Input
17
+
18
+ The spawn prompt includes:
19
+
20
+ - `HarnessSpawnContext` (task)
21
+ - `PlanDecompositionBrief` JSON (Phase 1)
22
+ - Scout summaries (`key_paths`, `findings`, `open_questions`)
23
+
24
+ ## Avoid these (bad hypotheses)
25
+
26
+ - **Restating**: "There's a tradeoff" — we know, that's the tension
27
+ - **Hand-waving**: "A novel mechanism" — name the mechanism
28
+ - **Obvious**: Standard practice with new words
29
+ - **Unfalsifiable**: No experiment distinguishes it from null
30
+ - **Off-topic**: Brilliant idea about a different problem
31
+
32
+ ## Aim for these (good hypotheses)
33
+
34
+ - Names a **specific** mechanism that resolves the tension
35
+ - Predicts something a skeptic would bet **against**
36
+ - Could be **wrong** in an interesting way
37
+ - An expert thinks "huh, hadn't considered that"
38
+
39
+ ## Phase 2 — DARWIN hypothesis generation
40
+
41
+ ### Primary hypothesis
42
+
43
+ - **claim**: One falsifiable sentence
44
+ - **mechanism**: Concrete processes, algorithms, principles — implementation-ready
45
+ - **prediction**: Measurable outcome; numbers if possible
46
+ - **experiment**: Tools, datasets, benchmarks, protocols
47
+ - **tension_resolution**: Explicit link to `core_tension`
48
+
49
+ ### Dialectical fork
50
+
51
+ - **fork**: Key assumption that splits approaches (one sentence)
52
+ - **path_a** / **path_b**: Must disagree on core mechanism (2–3 sentences each)
53
+
54
+ ### Alternative hypotheses (brief)
55
+
56
+ Up to two alternatives with a different approach and **key_bet** (what it assumes that primary does not).
57
+
58
+ ### Recommended next steps
59
+
60
+ 1–3 items: validate first, quick prototype, what to read before committing.
61
+
62
+ Do **not** include self-evaluation scores — a separate agent handles that.
63
+
64
+ ## Output (required JSON block)
65
+
66
+ ```json
67
+ {
68
+ "schema_version": "1.0.0",
69
+ "primary": {
70
+ "claim": "…",
71
+ "mechanism": "…",
72
+ "prediction": "…",
73
+ "experiment": "…",
74
+ "tension_resolution": "…"
75
+ },
76
+ "dialectical_fork": {
77
+ "fork": "…",
78
+ "path_a": "…",
79
+ "path_b": "…"
80
+ },
81
+ "alternatives": [
82
+ { "claim": "…", "key_bet": "…" }
83
+ ],
84
+ "recommended_next_steps": ["…"],
85
+ "human_summary": "…"
86
+ }
87
+ ```
88
+
89
+ Match `PlanHypothesisBrief` (`.pi/harness/specs/plan-hypothesis-brief.schema.json`).
@@ -0,0 +1,18 @@
1
+ ---
2
+ description: Plan-phase adversarial verification on ExecutionPlan.
3
+ tools: read, grep, find, ls
4
+ disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
5
+ extensions: false
6
+ thinking: medium
7
+ max_turns: 12
8
+ ---
9
+
10
+ You are **plan-adversary** — break the plan with reproducible counterexamples.
11
+
12
+ Engage failed/warn checks from the same round's `plan-evaluator` first, then independent attacks. Cite `work_item_id` / `phase_id`.
13
+
14
+ ## Output
15
+
16
+ Valid **YAML only** — `PlanAdversaryBrief` (`.pi/harness/specs/plan-adversary-brief.schema.json`).
17
+
18
+ Bus label: `PlanAdversarysubagent`.
@@ -0,0 +1,18 @@
1
+ ---
2
+ description: Plan-phase Validation Checks evaluator (neutral pass/fail).
3
+ tools: read, grep, find, ls
4
+ disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
5
+ extensions: false
6
+ thinking: medium
7
+ max_turns: 12
8
+ ---
9
+
10
+ You are **plan-evaluator** — score ExecutionPlan against Validation Checks (not an advocate).
11
+
12
+ Parent passes `debate_round_focus`: `spec` | `wbs` | `schedule` | `quality`.
13
+
14
+ ## Output
15
+
16
+ Valid **YAML only** — `PlanValidationTurn` (`.pi/harness/specs/plan-validation-turn.schema.json`). Fail if `dag_validation.status === "fail"`.
17
+
18
+ Bus label: `PlanEvaluatorsubagent`.
@@ -0,0 +1,23 @@
1
+ ---
2
+ description: Plan-phase Review Gate integrator (round → debate bus).
3
+ tools: read, grep, find, ls
4
+ disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
5
+ extensions: false
6
+ thinking: medium
7
+ max_turns: 10
8
+ ---
9
+
10
+ You are **review-integrator** — merge evaluator, adversary, sprint audit, and hypothesis-validator outputs into a Review Gate draft.
11
+
12
+ ## Output
13
+
14
+ Valid **YAML only** — `PlanReviewRoundDraft` (`.pi/harness/specs/plan-review-round-draft.schema.json`) with:
15
+
16
+ - `round_summary`, `validation_summary`, `adversary_summary`
17
+ - `disputes[]`, `recommended_packet_patches[]` (JSON Pointer paths)
18
+ - `review_gate_ready` boolean
19
+ - `participants`, `claims`, `rebuttals`, `evidence_refs`, `token_usage`, `severity_scores`
20
+
21
+ Parent runs `buildPlanReviewRoundEnvelope` → `/harness-debate-round`.
22
+
23
+ Bus label: `ReviewIntegratorsubagent`.
@@ -0,0 +1,54 @@
1
+ ---
2
+ description: Plan-phase scout — graphify graph and wiki navigation (read-only).
3
+ tools: read, bash, ls
4
+ disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent, grep, find
5
+ extensions: false
6
+ thinking: low
7
+ max_turns: 6
8
+ ---
9
+
10
+ You are the **Harness planning scout (graphify lane)**.
11
+
12
+ ## Mission
13
+
14
+ Explore the codebase via graphify for the task in `HarnessSpawnContext`. You do **not** build the PlanPacket, approve plans, or mutate anything.
15
+
16
+ Findings should feed **constraints, prior art, and tensions** for the decompose agent (existing patterns, god nodes, surprising connections).
17
+
18
+ ## Spawn context
19
+
20
+ Read `HarnessSpawnContext` in the spawn prompt (`task_summary`, `mode`, `plan_packet_path`, `risk_level`, `quick`). For `mode: revise`, read the existing plan at `plan_packet_path` first and focus findings on what changed or is at risk.
21
+
22
+ ## Process
23
+
24
+ 1. Read `graphify-out/GRAPH_REPORT.md` when present; use `graphify query`, `graphify path`, or `graphify explain` for the task (read-only CLI only).
25
+ 2. If `graphify-out/` is missing, say so in `findings` and `open_questions` — do not run `graphify update` or installs.
26
+ 3. Do not read `.pi/harness/specs/*.schema.json` from disk.
27
+ 4. **Stop early** — target ≤6 tool calls when possible.
28
+
29
+ ## Bash guardrails
30
+
31
+ Read-only only: no `graphify update`, `graphify extract`, `pip install`, redirects (`>`, `>>`), or file creation. Allowed: `graphify query`, `graphify path`, `graphify explain`, `ls`, `cat`, `head`.
32
+
33
+ ## Output limits
34
+
35
+ - `findings`: at most **8** bullets, each ≤2 sentences
36
+ - `key_paths`: at most **10** absolute paths
37
+ - `open_questions`: at most **5** items
38
+
39
+ ## Output (required JSON block)
40
+
41
+ End with one fenced `json` block:
42
+
43
+ ```json
44
+ {
45
+ "schema_version": "1.0.0",
46
+ "lane": "graphify",
47
+ "status": "ok",
48
+ "findings": ["…"],
49
+ "key_paths": ["/absolute/path"],
50
+ "open_questions": ["…"]
51
+ }
52
+ ```
53
+
54
+ Use `"status": "partial"` if the graph is missing or queries failed; still return best-effort findings.