ultimate-pi 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/.agents/skills/harness-debate-plan/SKILL.md +44 -0
  2. package/.agents/skills/harness-decisions/SKILL.md +1 -1
  3. package/.agents/skills/harness-orchestration/SKILL.md +54 -28
  4. package/.agents/skills/harness-plan/SKILL.md +15 -20
  5. package/.pi/agents/harness/adversary.md +0 -1
  6. package/.pi/agents/harness/evaluator.md +0 -1
  7. package/.pi/agents/harness/executor.md +1 -2
  8. package/.pi/agents/harness/incident-recorder.md +0 -1
  9. package/.pi/agents/harness/meta-optimizer.md +0 -1
  10. package/.pi/agents/harness/planning/decompose.md +3 -4
  11. package/.pi/agents/harness/planning/execution-plan-author.md +30 -0
  12. package/.pi/agents/harness/planning/hypothesis-validator.md +23 -0
  13. package/.pi/agents/harness/planning/hypothesis.md +3 -4
  14. package/.pi/agents/harness/planning/plan-adversary.md +10 -42
  15. package/.pi/agents/harness/planning/plan-evaluator.md +18 -0
  16. package/.pi/agents/harness/planning/review-integrator.md +23 -0
  17. package/.pi/agents/harness/planning/scout-graphify.md +11 -5
  18. package/.pi/agents/harness/planning/scout-semantic.md +11 -6
  19. package/.pi/agents/harness/planning/scout-structure.md +12 -6
  20. package/.pi/agents/harness/planning/sprint-contract-auditor.md +18 -0
  21. package/.pi/agents/harness/planning/stack-researcher.md +24 -0
  22. package/.pi/agents/harness/tie-breaker.md +0 -1
  23. package/.pi/agents/harness/trace-librarian.md +0 -1
  24. package/.pi/extensions/debate-orchestrator.ts +90 -53
  25. package/.pi/extensions/harness-plan-approval.ts +2 -2
  26. package/.pi/extensions/harness-run-context.ts +145 -5
  27. package/.pi/extensions/harness-subagents.ts +2 -2
  28. package/.pi/extensions/lib/harness-posthog.ts +6 -1
  29. package/.pi/extensions/lib/harness-spawn-budget.ts +75 -0
  30. package/.pi/extensions/lib/harness-subagent-auth.ts +123 -0
  31. package/.pi/extensions/lib/{harness-subagents/harness-subagent-policy.ts → harness-subagent-policy.ts} +3 -6
  32. package/.pi/extensions/lib/harness-subagent-precheck.ts +95 -0
  33. package/.pi/extensions/lib/harness-subagents-bridge.ts +176 -0
  34. package/.pi/extensions/lib/plan-approval/create-plan.ts +4 -7
  35. package/.pi/extensions/lib/plan-approval/plan-review.ts +1 -1
  36. package/.pi/extensions/lib/plan-approval/types.ts +7 -1
  37. package/.pi/extensions/lib/plan-debate-envelope.ts +84 -0
  38. package/.pi/extensions/lib/{harness-subagents/spawn-policy.ts → spawn-policy.ts} +1 -0
  39. package/.pi/extensions/policy-gate.ts +1 -1
  40. package/.pi/extensions/review-integrity.ts +48 -29
  41. package/.pi/harness/agents.manifest.json +37 -25
  42. package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +4 -3
  43. package/.pi/harness/docs/adrs/0033-parent-orchestrated-planning.md +1 -1
  44. package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +27 -0
  45. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r1.yaml +25 -0
  46. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml +26 -0
  47. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/sprint-audit-r4.yaml +5 -0
  48. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-packet.yaml +196 -0
  49. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-review.md +14 -0
  50. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +32 -0
  51. package/.pi/harness/evals/smoke/run-context.fixture.json +1 -1
  52. package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +88 -0
  53. package/.pi/harness/specs/harness-posthog-event.schema.json +6 -1
  54. package/.pi/harness/specs/plan-execution-plan-brief.schema.json +13 -0
  55. package/.pi/harness/specs/plan-execution-plan.schema.json +255 -0
  56. package/.pi/harness/specs/plan-packet.schema.json +14 -5
  57. package/.pi/harness/specs/plan-review-round-draft.schema.json +68 -0
  58. package/.pi/harness/specs/plan-sprint-audit-turn.schema.json +29 -0
  59. package/.pi/harness/specs/plan-stack-brief.schema.json +65 -0
  60. package/.pi/harness/specs/plan-validation-turn.schema.json +42 -0
  61. package/.pi/harness/specs/round-result.schema.json +16 -9
  62. package/.pi/lib/debate-orchestrator-types.ts +38 -0
  63. package/.pi/lib/harness-agent-discovery.mjs +81 -0
  64. package/.pi/lib/harness-run-context.ts +64 -38
  65. package/.pi/lib/harness-yaml.mjs +73 -0
  66. package/.pi/lib/harness-yaml.ts +90 -0
  67. package/.pi/prompts/harness-auto.md +13 -11
  68. package/.pi/prompts/harness-critic.md +2 -2
  69. package/.pi/prompts/harness-eval.md +3 -3
  70. package/.pi/prompts/harness-incident.md +2 -2
  71. package/.pi/prompts/harness-plan.md +79 -93
  72. package/.pi/prompts/harness-review.md +2 -2
  73. package/.pi/prompts/harness-router-tune.md +1 -1
  74. package/.pi/prompts/harness-run.md +2 -2
  75. package/.pi/prompts/harness-setup.md +15 -6
  76. package/.pi/prompts/harness-trace.md +2 -2
  77. package/.pi/scripts/harness-agents-manifest.mjs +1 -1
  78. package/.pi/scripts/harness-verify.mjs +28 -19
  79. package/.pi/scripts/validate-plan-dag.mjs +258 -0
  80. package/.pi/scripts/vendor-sync-pi-subagents.sh +19 -0
  81. package/CHANGELOG.md +12 -0
  82. package/THIRD_PARTY_NOTICES.md +8 -0
  83. package/biome.json +2 -2
  84. package/package.json +6 -4
  85. package/.pi/agents/harness/planner.md +0 -13
  86. package/.pi/agents/harness/planning/hypothesis-eval.md +0 -59
  87. package/.pi/agents/harness/planning/planner.md +0 -20
  88. package/.pi/extensions/lib/harness-subagents/agent-loader.ts +0 -126
  89. package/.pi/extensions/lib/harness-subagents/agent-manifest.ts +0 -119
  90. package/.pi/extensions/lib/harness-subagents/agent-parser.ts +0 -87
  91. package/.pi/extensions/lib/harness-subagents/blackboard-tool.ts +0 -118
  92. package/.pi/extensions/lib/harness-subagents/blackboard.ts +0 -175
  93. package/.pi/extensions/lib/harness-subagents/parent-ask-user-bridge.ts +0 -10
  94. package/.pi/extensions/lib/harness-subagents/parent-harness-ui-bridge.ts +0 -137
  95. package/.pi/extensions/lib/harness-subagents/parent-harness-ui-hooks.ts +0 -77
  96. package/.pi/extensions/lib/harness-subagents/types-blackboard.ts +0 -27
  97. package/.pi/extensions/lib/harness-subagents/vendored/agent-manager.ts +0 -558
  98. package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +0 -666
  99. package/.pi/extensions/lib/harness-subagents/vendored/agent-types.ts +0 -175
  100. package/.pi/extensions/lib/harness-subagents/vendored/context.ts +0 -59
  101. package/.pi/extensions/lib/harness-subagents/vendored/cross-extension-rpc.ts +0 -134
  102. package/.pi/extensions/lib/harness-subagents/vendored/custom-agents.ts +0 -5
  103. package/.pi/extensions/lib/harness-subagents/vendored/default-agents.ts +0 -123
  104. package/.pi/extensions/lib/harness-subagents/vendored/env.ts +0 -43
  105. package/.pi/extensions/lib/harness-subagents/vendored/group-join.ts +0 -144
  106. package/.pi/extensions/lib/harness-subagents/vendored/index.ts +0 -2460
  107. package/.pi/extensions/lib/harness-subagents/vendored/invocation-config.ts +0 -52
  108. package/.pi/extensions/lib/harness-subagents/vendored/memory.ts +0 -182
  109. package/.pi/extensions/lib/harness-subagents/vendored/model-resolver.ts +0 -92
  110. package/.pi/extensions/lib/harness-subagents/vendored/output-file.ts +0 -115
  111. package/.pi/extensions/lib/harness-subagents/vendored/prompts.ts +0 -103
  112. package/.pi/extensions/lib/harness-subagents/vendored/schedule-store.ts +0 -177
  113. package/.pi/extensions/lib/harness-subagents/vendored/schedule.ts +0 -416
  114. package/.pi/extensions/lib/harness-subagents/vendored/settings.ts +0 -210
  115. package/.pi/extensions/lib/harness-subagents/vendored/skill-loader.ts +0 -108
  116. package/.pi/extensions/lib/harness-subagents/vendored/types.ts +0 -187
  117. package/.pi/extensions/lib/harness-subagents/vendored/ui/agent-widget.ts +0 -639
  118. package/.pi/extensions/lib/harness-subagents/vendored/ui/conversation-viewer.ts +0 -324
  119. package/.pi/extensions/lib/harness-subagents/vendored/ui/schedule-menu.ts +0 -110
  120. package/.pi/extensions/lib/harness-subagents/vendored/usage.ts +0 -71
  121. package/.pi/extensions/lib/harness-subagents/vendored/worktree.ts +0 -195
  122. /package/.pi/extensions/{00-ultimate-pi-system-prompt.ts → custom-system-prompt.ts} +0 -0
@@ -0,0 +1,44 @@
1
+ ---
2
+ name: harness-debate-plan
3
+ description: Plan-phase Review Gate debate — assemble rounds, token caps, bus envelopes for parent orchestrator.
4
+ ---
5
+
6
+ # harness-debate-plan
7
+
8
+ Use when running **Phase 5** of `/harness-plan` — four Review Gate rounds on the plan debate bus.
9
+
10
+ ## Open
11
+
12
+ ```
13
+ /harness-debate-open plan-<run_id>
14
+ ```
15
+
16
+ Budget profile **plan**: `max_rounds=4`, `round_token_cap=2000`, `debate_global_cap=12000`.
17
+
18
+ ## Per-round spawn order
19
+
20
+ 1. Round-specific extras (R1: `hypothesis-validator` first, blind)
21
+ 2. `plan-evaluator`
22
+ 3. `plan-adversary`
23
+ 4. R4: `sprint-contract-auditor` (required)
24
+ 5. `review-integrator`
25
+
26
+ ## Artifacts (YAML)
27
+
28
+ | Agent | Output path |
29
+ |-------|-------------|
30
+ | hypothesis-validator | `artifacts/hypothesis-validation-r{N}.yaml` |
31
+ | plan-evaluator | `artifacts/validation-turn-r{N}.yaml` |
32
+ | plan-adversary | `artifacts/adversary-brief-r{N}.yaml` |
33
+ | sprint-contract-auditor | `artifacts/sprint-audit-r{N}.yaml` |
34
+ | review-integrator | `artifacts/review-round-r{N}.yaml` |
35
+
36
+ ## Bus envelope
37
+
38
+ Load `review-round-r{N}.yaml`, validate, then `buildPlanReviewRoundEnvelope` (`.pi/extensions/lib/plan-debate-envelope.ts`) → `/harness-debate-round '<json>'`.
39
+
40
+ Plan participants only. `StackResearchAgent` uses `artifacts/stack.yaml` claims — no spawn.
41
+
42
+ ## Close
43
+
44
+ After round 4: `/harness-debate-consensus`. Do not `approve_plan` on `policy_decision: block`.
@@ -71,5 +71,5 @@ Parent orchestrator calls **`approve_plan`** with the full `plan_packet` (scroll
71
71
  ## Who calls what
72
72
 
73
73
  - **Parent orchestrator** during `/harness-plan` — `ask_user` for clarification; **`approve_plan`** then **`create_plan`** for the plan file.
74
- - `harness/planning/*` (scouts, decompose, hypothesis, plan-adversary, hypothesis-eval) — JSON only; no `ask_user` / `approve_plan` / `create_plan`.
74
+ - `harness/planning/*` (scouts, decompose, hypothesis, hypothesis-eval) — JSON only; no `ask_user` / `approve_plan` / `create_plan`.
75
75
  - `harness/evaluator`, `harness/adversary`, and `harness/tie-breaker` — emit `human_required`; the **parent orchestrator** calls `ask_user`.
@@ -1,24 +1,48 @@
1
1
  ---
2
2
  name: harness-orchestration
3
3
  description: >-
4
- Orchestrate ultimate-pi harness phases with Agent spawns, blackboard handoffs,
5
- and observation-bus artifacts. Use for plan/execute/evaluate pipelines, L4
6
- verification, parallel scouts, and debate prep.
4
+ Orchestrate ultimate-pi harness phases with the native `subagent` tool
5
+ (isolated `pi --mode json` subprocesses). Use for plan/execute/evaluate
6
+ pipelines, L4 verification, parallel scouts, and debate prep.
7
7
  ---
8
8
 
9
9
  # Harness orchestration
10
10
 
11
11
  ## Slash commands = orchestrators
12
12
 
13
- `/harness-*` prompts parse args, spawn agents, run `ask_user`, write policy-gated artifacts. Phase logic lives in `.pi/agents/harness/*.md` and `.pi/agents/harness/planning/*.md`.
13
+ `/harness-*` prompts parse args, call `subagent`, run `ask_user`, write policy-gated artifacts. Phase logic lives in `.pi/agents/harness/*.md` and `.pi/agents/harness/planning/*.md`.
14
14
 
15
- Every spawn includes **HarnessSpawnContext** JSON (subagents do not get `[HarnessActivePlan]` injection). Use `inherit_context: false`.
15
+ Every spawn includes **HarnessSpawnContext** JSON in the task text (subprocess agents do not get `[HarnessActivePlan]` injection). Use `agentScope: "both"` so package agents under `$UP_PKG/.pi/agents/**` resolve.
16
+
17
+ ## Subprocess telemetry
18
+
19
+ Harness bridge emits `harness_subagent_spawned` / `harness_subagent_completed` (replaces in-process setup/blackboard events).
20
+
21
+ ```sql
22
+ SELECT
23
+ properties.agent as agent,
24
+ count() as n,
25
+ round(avg(toFloat(properties.duration_ms)), 0) as avg_ms
26
+ FROM events
27
+ WHERE event = 'harness_subagent_completed'
28
+ AND timestamp >= now() - INTERVAL 7 DAY
29
+ GROUP BY agent
30
+ ORDER BY avg_ms DESC
31
+ LIMIT 30
32
+ ```
33
+
34
+ ## Latency rules
35
+
36
+ 1. **Parallel `tasks`** — one `subagent({ tasks: [...] })` for scouts, decompose+hypothesis, or review fan-in; subprocesses run in parallel upstream.
37
+ 2. **Blocking calls** — each `subagent` returns when the subprocess exits; no `get_subagent_result` polling.
38
+ 3. **Compact handoffs** — pass scout/decompose JSON only; never paste full subprocess message logs into the next spawn.
39
+ 4. **Spawn caps** — bridge enforces **8** active + **12** total harness spawns per session (`PI_SUBAGENT_TIMEOUT_MS` / per-task `timeoutMs` for backstop).
16
40
 
17
41
  ## Command → agent
18
42
 
19
- | Command | `subagent_type` |
20
- |---------|-----------------|
21
- | `/harness-plan` | Parent: parallel `scout-*` → `decompose` → `hypothesis` → PlanPacket → parallel `plan-adversary` + `hypothesis-eval`; `approve_plan` + `create_plan` |
43
+ | Command | `agent` |
44
+ |---------|---------|
45
+ | `/harness-plan` | Parent: parallel `harness/planning/scout-*` → parallel `decompose`+`hypothesis` → PlanPacket → reviews; `approve_plan` + `create_plan` |
22
46
  | `/harness-run` | `harness/executor` |
23
47
  | `/harness-eval` | `harness/evaluator` (`mode: benchmark`) |
24
48
  | `/harness-review` | `harness/evaluator` (`mode: verdict`) |
@@ -26,41 +50,43 @@ Every spawn includes **HarnessSpawnContext** JSON (subagents do not get `[Harnes
26
50
  | `/harness-trace` | `harness/trace-librarian` |
27
51
  | `/harness-incident` | `harness/incident-recorder` |
28
52
  | `/harness-router-tune` | `harness/meta-optimizer` (optional) |
29
- | `/harness-auto` | plan phases per `/harness-plan`, then sequential spawns above |
53
+ | `/harness-auto` | plan per `/harness-plan`; `--quick` skips adversary + tie-breaker |
30
54
 
31
55
  ## Review isolation
32
56
 
33
- Spawn `harness/evaluator` / `harness/adversary` in the **same** parent session isolated subagent context replaces session fork (ADR 0032).
57
+ Spawn `harness/evaluator` / `harness/adversary` via `subagent` in the **same** parent session. `review-integrity` allows `subagent` when `agent` is in the review set; blocks executor from spawning review agents during evaluate.
34
58
 
35
59
  ## ask_user policy
36
60
 
37
- | Agent | `ask_user` |
38
- |-------|------------|
39
- | Parent orchestrator | Yes (plan clarification, approval via `approve_plan`, router tune) |
40
- | `harness/planning/*` | No — JSON only |
41
- | `harness/evaluator`, `harness/adversary`, `harness/tie-breaker` | Bridged or `human_required` in output |
61
+ | Role | `ask_user` |
62
+ |------|------------|
63
+ | Parent orchestrator | Yes (plan clarification, `approve_plan`, router tune) |
64
+ | `harness/planning/*` | No — JSON only (`human_required` in output if stuck) |
65
+ | `harness/evaluator`, `harness/adversary`, `harness/tie-breaker` | `human_required` in subprocess JSON |
42
66
  | `harness/executor` | No — parent handles governance |
43
67
 
44
68
  ## Spawn pattern (`/harness-plan`)
45
69
 
70
+ ```json
71
+ {
72
+ "agentScope": "both",
73
+ "tasks": [
74
+ { "agent": "harness/planning/scout-graphify", "task": "…", "timeoutMs": 90000 },
75
+ { "agent": "harness/planning/scout-structure", "task": "…", "timeoutMs": 90000 },
76
+ { "agent": "harness/planning/scout-semantic", "task": "…", "timeoutMs": 90000 }
77
+ ]
78
+ }
46
79
  ```
47
- Agent({ subagent_type: "harness/planning/scout-graphify", prompt: "…", run_in_background: true })
48
- Agent({ subagent_type: "harness/planning/scout-structure", prompt: "…", run_in_background: true })
49
- get_subagent_result # scouts
50
- Agent({ subagent_type: "harness/planning/decompose", prompt: "…" })
51
- Agent({ subagent_type: "harness/planning/hypothesis", prompt: "…" })
52
- # parent: PlanPacket, ask_user on fork
53
- Agent({ subagent_type: "harness/planning/plan-adversary", run_in_background: true })
54
- Agent({ subagent_type: "harness/planning/hypothesis-eval", run_in_background: true })
55
- approve_plan({ plan_packet, research_brief }); create_plan
56
- ```
80
+
81
+ Then parallel decompose + hypothesis, parent PlanPacket + `ask_user`, debate rounds via `subagent` or `debate-orchestrator`, then `approve_plan` + `create_plan`.
82
+
83
+ Scouts use **Haiku**, `thinking: low`, **8** max turns (see agent frontmatter). Effective `--tools` omits `grep`/`find`/`subagent` per `disallowed_tools`.
57
84
 
58
85
  ## Tools
59
86
 
60
- - `Agent`, `get_subagent_result`, `steer_subagent`
87
+ - `subagent` — harness subprocess spawns (modes: `single`, `tasks`, `chain`, `aggregator`)
61
88
  - `approve_plan`, `create_plan` — parent orchestrator only
62
- - `blackboard` parent only
63
- - Subagents cannot nest spawns
89
+ - Subprocess agents cannot nest `subagent` (`subagent` stripped from child `--tools`)
64
90
 
65
91
  ## References
66
92
 
@@ -1,37 +1,32 @@
1
1
  ---
2
2
  name: harness-plan
3
- description: Produce PlanPacket-aligned harness plans via decomposition + DARWIN hypothesis before execute phase. Use with /harness-plan, harness-auto plan phase, or when policy-gate requires an approved plan.
3
+ description: PM-grade harness plans scouts, ExecutionPlan, DAG validation, 4-round Review Gate debate, then approve/create_plan.
4
4
  ---
5
5
 
6
6
  # harness-plan
7
7
 
8
8
  ## When to use
9
9
 
10
- - User invokes `/harness-plan` or harness-auto planning phase
11
- - Policy gate blocks mutate tools without approved plan
12
- - Drift monitor requests replan (`harness-drift-replan`)
13
- - User replies with clarification after `needs_clarification`
10
+ - `/harness-plan`, harness-auto plan phase, drift replan, policy-gate without approved plan
14
11
 
15
12
  ## Workflow (parent orchestrator)
16
13
 
17
- 1. Use `HarnessSpawnContext` from injected `[HarnessRunContext]` do not read spec files from disk.
18
- 2. Spawn planning scouts in parallel (`run_in_background: true`, `inherit_context: false`):
19
- - `harness/planning/scout-graphify` (required)
20
- - `harness/planning/scout-structure` (required)
21
- - `harness/planning/scout-semantic` (skip when `--quick`)
22
- 3. `get_subagent_result` for each; parse scout JSON.
23
- 4. Spawn `harness/planning/decompose` with merged scout JSON → `PlanDecompositionBrief`.
24
- 5. Spawn `harness/planning/hypothesis` with decomposition + scouts → `PlanHypothesisBrief`.
25
- 6. Parent synthesizes draft `PlanPacket` from hypothesis; `ask_user` when dialectical fork is material.
26
- 7. Parallel: `harness/planning/plan-adversary` + `harness/planning/hypothesis-eval` (eval gets task + hypothesis only).
27
- 8. Parent calls `approve_plan({ plan_packet, human_summary, research_brief })` then `create_plan`.
14
+ 1. Parallel scouts (graphify + structure; semantic unless `--quick`).
15
+ 2. Parallel decompose + hypothesis write `artifacts/*.yaml`.
16
+ 3. Draft `PlanPacket` (`contract_version: "1.1.0"`) + `ask_user` on material fork.
17
+ 4. `stack-researcher` `execution-plan-author` → merge `execution_plan`.
18
+ 5. **`validate-plan-dag.mjs`** on `plan-packet.yaml` (must pass).
19
+ 6. **Review Gate:** `/harness-debate-open plan-<run_id>` 4 rounds (see **harness-debate-plan** skill) → consensus.
20
+ 7. Apply patches, re-validate DAG, `approve_plan`, `create_plan`.
21
+
22
+ `--quick` skips semantic scout and post-run adversary only **not** plan debate.
28
23
 
29
24
  ## Rules
30
25
 
31
- - Planning subagents are read-only; they never call `ask_user`, `approve_plan`, or `create_plan`.
32
- - Do not spawn `harness/planner` or `harness/planning/planner` (deprecated).
33
- - context-mode only on harness paths; never lean-ctx.
26
+ - On-disk plan artifacts are **YAML** (`plan-packet.yaml`, `research-brief.yaml`).
27
+ - Subagents read-only; parent writes run artifacts and calls `approve_plan` / `create_plan`.
28
+ - context-mode only on harness paths.
34
29
 
35
30
  ## Output
36
31
 
37
- - `plan_status`, `risk_level`, `plan_review_path`, `next_command`: `/harness-run` when ready
32
+ `plan_status`, `plan_review_path`, `next_command: /harness-run` when ready.
@@ -5,7 +5,6 @@ extensions: false
5
5
  disallowed_tools: ask_user
6
6
  thinking: high
7
7
  max_turns: 20
8
- inherit_context: false
9
8
  ---
10
9
 
11
10
  You are the Harness Adversary.
@@ -5,7 +5,6 @@ extensions: false
5
5
  disallowed_tools: ask_user
6
6
  thinking: high
7
7
  max_turns: 20
8
- inherit_context: false
9
8
  ---
10
9
 
11
10
  You are the Harness Evaluator.
@@ -4,8 +4,7 @@ tools: read, write, edit, bash, grep, find, ls
4
4
  extensions: true
5
5
  disallowed_tools: ask_user
6
6
  thinking: medium
7
- max_turns: 30
8
- inherit_context: false
7
+ max_turns: 20
9
8
  ---
10
9
 
11
10
  You are the Harness Executor.
@@ -4,7 +4,6 @@ tools: read, grep, find, ls
4
4
  extensions: false
5
5
  thinking: medium
6
6
  max_turns: 15
7
- inherit_context: false
8
7
  ---
9
8
 
10
9
  You are the Harness Incident Recorder.
@@ -5,7 +5,6 @@ extensions: false
5
5
  disallowed_tools: ask_user
6
6
  thinking: high
7
7
  max_turns: 25
8
- inherit_context: false
9
8
  ---
10
9
 
11
10
  You are the Harness Meta Optimizer.
@@ -1,11 +1,10 @@
1
1
  ---
2
2
  description: Plan-phase DeepMind-style problem decomposition (read-only).
3
3
  tools: read, grep, find, ls, bash
4
- disallowed_tools: write, edit, ask_user, approve_plan, create_plan, Agent
4
+ disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent
5
5
  extensions: false
6
- thinking: high
7
- max_turns: 18
8
- inherit_context: false
6
+ thinking: medium
7
+ max_turns: 12
9
8
  ---
10
9
 
11
10
  You are the **Harness planning decomposer (Phase 1)**.
@@ -0,0 +1,30 @@
1
+ ---
2
+ description: Plan-phase ExecutionPlan generator (PM-grade WBS + DAG).
3
+ tools: read, grep, find, ls
4
+ disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
5
+ extensions: false
6
+ thinking: high
7
+ max_turns: 16
8
+ ---
9
+
10
+ You are **execution-plan-author** — produce a complete `execution_plan` a senior EM would sign off.
11
+
12
+ ## Inputs
13
+
14
+ Task, `PlanDecompositionBrief`, `PlanHypothesisBrief`, draft scope/acceptance_checks, `PlanStackBrief`, scout summaries.
15
+
16
+ ## Workflow
17
+
18
+ 1. Vision check — scope ≤15 lines, testable outcomes.
19
+ 2. Phases with objective, entry/exit criteria, milestone, work_item_ids.
20
+ 3. WBS — every AC maps to ≥1 work_item; deliverable-sized items.
21
+ 4. `depends_on` DAG; `parallel_safe` only when files disjoint.
22
+ 5. `schedule_metadata.critical_path_work_item_ids`.
23
+ 6. `wbs_dictionary`, `risk_register` (≥3 risks for med/high).
24
+ 7. `sprint_contract` complete.
25
+ 8. Early-phase verify/lint/test work items when risk ≥ med.
26
+ 9. Typed `done_criteria` per work item.
27
+
28
+ ## Output
29
+
30
+ Valid **YAML only** — `PlanExecutionPlanBrief` with `execution_plan` (`.pi/harness/specs/plan-execution-plan-brief.schema.json`). Parent merges into `plan-packet.yaml`.
@@ -0,0 +1,23 @@
1
+ ---
2
+ description: Plan-phase blind hypothesis validation (debate R1 only).
3
+ tools: read, grep, find, ls
4
+ disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
5
+ extensions: false
6
+ thinking: medium
7
+ max_turns: 10
8
+ ---
9
+
10
+ You are **hypothesis-validator** — blind self-evaluation of `PlanHypothesisBrief` only.
11
+
12
+ ## Input (strict)
13
+
14
+ - Original task statement
15
+ - `PlanHypothesisBrief` YAML/JSON
16
+
17
+ Ignore decomposition, scouts, PlanPacket, adversary output.
18
+
19
+ ## Output
20
+
21
+ Valid **YAML only** matching `PlanHypothesisEval` (`.pi/harness/specs/plan-hypothesis-eval.schema.json`). Parent writes `artifacts/hypothesis-validation-r{N}.yaml`.
22
+
23
+ Bus label: `HypothesisValidatorsubagent`.
@@ -1,11 +1,10 @@
1
1
  ---
2
2
  description: Plan-phase DARWIN hypothesis generation (read-only).
3
3
  tools: read, grep, find, ls, bash
4
- disallowed_tools: write, edit, ask_user, approve_plan, create_plan, Agent
4
+ disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent
5
5
  extensions: false
6
- thinking: high
7
- max_turns: 20
8
- inherit_context: false
6
+ thinking: medium
7
+ max_turns: 14
9
8
  ---
10
9
 
11
10
  You are the **Harness planning hypothesis generator (Phase 2 — DARWIN)**.
@@ -1,50 +1,18 @@
1
1
  ---
2
- description: Plan adversary (pre-approval) edge cases and acceptance gaps on a draft PlanPacket.
3
- tools: read, grep, find, ls, bash
4
- disallowed_tools: write, edit, ask_user, approve_plan, create_plan, Agent
2
+ description: Plan-phase adversarial verification on ExecutionPlan.
3
+ tools: read, grep, find, ls
4
+ disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
5
5
  extensions: false
6
- thinking: high
7
- max_turns: 15
8
- inherit_context: false
6
+ thinking: medium
7
+ max_turns: 12
9
8
  ---
10
9
 
11
- You are the **Harness plan adversary (pre-approval)**. Not the post-run `harness/adversary`.
10
+ You are **plan-adversary** break the plan with reproducible counterexamples.
12
11
 
13
- ## Mission
12
+ Engage failed/warn checks from the same round's `plan-evaluator` first, then independent attacks. Cite `work_item_id` / `phase_id`.
14
13
 
15
- Pressure-test a **draft** `PlanPacket` for **execution risk** before the user approves. Surface edge cases, failure modes, and missing acceptance checks tied to hypothesis-derived `acceptance_checks`. Read-only — no mutations.
14
+ ## Output
16
15
 
17
- Do **not** re-score DARWIN novelty or duplicate hypothesis-eval work.
16
+ Valid **YAML only** `PlanAdversaryBrief` (`.pi/harness/specs/plan-adversary-brief.schema.json`).
18
17
 
19
- ## Input
20
-
21
- The spawn prompt includes:
22
-
23
- - `HarnessSpawnContext`
24
- - Draft `PlanPacket` JSON
25
- - Scout lane summaries (graphify, structure, semantic)
26
-
27
- ## Process
28
-
29
- 1. Assume the plan has hidden gaps until you justify `recommendation: proceed`.
30
- 2. Tie every finding to evidence (paths, APIs, or scout findings) — no speculation without a probe path.
31
- 3. Propose concrete `mitigations` the parent can merge into scope, assumptions, or `acceptance_checks`.
32
- 4. Empty arrays are allowed when no material gaps exist; say so in `human_summary`.
33
-
34
- ## Output (required JSON block)
35
-
36
- Match `PlanAdversaryBrief` (`.pi/harness/specs/plan-adversary-brief.schema.json`):
37
-
38
- ```json
39
- {
40
- "schema_version": "1.0.0",
41
- "edge_cases": ["…"],
42
- "failure_modes": ["…"],
43
- "acceptance_gaps": ["…"],
44
- "mitigations": ["…"],
45
- "recommendation": "proceed",
46
- "human_summary": "…"
47
- }
48
- ```
49
-
50
- Use `"recommendation": "revise"` when scope or acceptance must change before execution.
18
+ Bus label: `PlanAdversarysubagent`.
@@ -0,0 +1,18 @@
1
+ ---
2
+ description: Plan-phase Validation Checks evaluator (neutral pass/fail).
3
+ tools: read, grep, find, ls
4
+ disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
5
+ extensions: false
6
+ thinking: medium
7
+ max_turns: 12
8
+ ---
9
+
10
+ You are **plan-evaluator** — score ExecutionPlan against Validation Checks (not an advocate).
11
+
12
+ Parent passes `debate_round_focus`: `spec` | `wbs` | `schedule` | `quality`.
13
+
14
+ ## Output
15
+
16
+ Valid **YAML only** — `PlanValidationTurn` (`.pi/harness/specs/plan-validation-turn.schema.json`). Fail if `dag_validation.status === "fail"`.
17
+
18
+ Bus label: `PlanEvaluatorsubagent`.
@@ -0,0 +1,23 @@
1
+ ---
2
+ description: Plan-phase Review Gate integrator (round → debate bus).
3
+ tools: read, grep, find, ls
4
+ disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
5
+ extensions: false
6
+ thinking: medium
7
+ max_turns: 10
8
+ ---
9
+
10
+ You are **review-integrator** — merge evaluator, adversary, sprint audit, and hypothesis-validator outputs into a Review Gate draft.
11
+
12
+ ## Output
13
+
14
+ Valid **YAML only** — `PlanReviewRoundDraft` (`.pi/harness/specs/plan-review-round-draft.schema.json`) with:
15
+
16
+ - `round_summary`, `validation_summary`, `adversary_summary`
17
+ - `disputes[]`, `recommended_packet_patches[]` (JSON Pointer paths)
18
+ - `review_gate_ready` boolean
19
+ - `participants`, `claims`, `rebuttals`, `evidence_refs`, `token_usage`, `severity_scores`
20
+
21
+ Parent runs `buildPlanReviewRoundEnvelope` → `/harness-debate-round`.
22
+
23
+ Bus label: `ReviewIntegratorsubagent`.
@@ -1,11 +1,10 @@
1
1
  ---
2
2
  description: Plan-phase scout — graphify graph and wiki navigation (read-only).
3
- tools: read, grep, find, ls, bash
4
- disallowed_tools: write, edit, ask_user, approve_plan, create_plan, Agent
3
+ tools: read, bash, ls
4
+ disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent, grep, find
5
5
  extensions: false
6
- thinking: medium
7
- max_turns: 12
8
- inherit_context: false
6
+ thinking: low
7
+ max_turns: 6
9
8
  ---
10
9
 
11
10
  You are the **Harness planning scout (graphify lane)**.
@@ -25,11 +24,18 @@ Read `HarnessSpawnContext` in the spawn prompt (`task_summary`, `mode`, `plan_pa
25
24
  1. Read `graphify-out/GRAPH_REPORT.md` when present; use `graphify query`, `graphify path`, or `graphify explain` for the task (read-only CLI only).
26
25
  2. If `graphify-out/` is missing, say so in `findings` and `open_questions` — do not run `graphify update` or installs.
27
26
  3. Do not read `.pi/harness/specs/*.schema.json` from disk.
27
+ 4. **Stop early** — target ≤6 tool calls when possible.
28
28
 
29
29
  ## Bash guardrails
30
30
 
31
31
  Read-only only: no `graphify update`, `graphify extract`, `pip install`, redirects (`>`, `>>`), or file creation. Allowed: `graphify query`, `graphify path`, `graphify explain`, `ls`, `cat`, `head`.
32
32
 
33
+ ## Output limits
34
+
35
+ - `findings`: at most **8** bullets, each ≤2 sentences
36
+ - `key_paths`: at most **10** absolute paths
37
+ - `open_questions`: at most **5** items
38
+
33
39
  ## Output (required JSON block)
34
40
 
35
41
  End with one fenced `json` block:
@@ -1,11 +1,10 @@
1
1
  ---
2
2
  description: Plan-phase scout — ck semantic code search (read-only).
3
- tools: read, grep, find, ls, bash
4
- disallowed_tools: write, edit, ask_user, approve_plan, create_plan, Agent
3
+ tools: read, bash, ls
4
+ disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent, grep, find
5
5
  extensions: false
6
- thinking: medium
7
- max_turns: 12
8
- inherit_context: false
6
+ thinking: low
7
+ max_turns: 6
9
8
  ---
10
9
 
11
10
  You are the **Harness planning scout (semantic lane)**.
@@ -22,12 +21,18 @@ Read `HarnessSpawnContext` in the spawn prompt. For `mode: revise`, bias searche
22
21
 
23
22
  1. Use `ck search` or `ck query` (or project-documented ck CLI) with task-focused queries.
24
23
  2. If ck is unavailable, set `status: partial` and document in `findings`.
25
- 3. Cap outputprefer the top 5–10 most relevant paths.
24
+ 3. **Stop early** — top **5** most relevant paths only.
26
25
 
27
26
  ## Bash guardrails
28
27
 
29
28
  Read-only only: no installs, index rebuilds that mutate disk, or redirects.
30
29
 
30
+ ## Output limits
31
+
32
+ - `findings`: at most **6** bullets
33
+ - `key_paths`: at most **8** absolute paths
34
+ - `open_questions`: at most **4** items
35
+
31
36
  ## Output (required JSON block)
32
37
 
33
38
  ```json
@@ -1,11 +1,10 @@
1
1
  ---
2
2
  description: Plan-phase scout — ast-grep structural code search (read-only).
3
- tools: read, grep, find, ls, bash
4
- disallowed_tools: write, edit, ask_user, approve_plan, create_plan, Agent
3
+ tools: read, bash, ls
4
+ disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent, grep, find
5
5
  extensions: false
6
- thinking: medium
7
- max_turns: 12
8
- inherit_context: false
6
+ thinking: low
7
+ max_turns: 6
9
8
  ---
10
9
 
11
10
  You are the **Harness planning scout (structure lane)**.
@@ -22,14 +21,21 @@ Read `HarnessSpawnContext` in the spawn prompt. For `mode: revise`, read the exi
22
21
 
23
22
  ## Process
24
23
 
25
- 1. Run `sg -p '…'` with patterns tied to the task (handlers, types, exports, call sites).
24
+ 1. Run `sg -p '…'` with patterns tied to the task (handlers, types, exports, call sites). **Do not use `find` or `grep`.**
26
25
  2. Prefer absolute paths in `key_paths`.
27
26
  3. If `sg` is not on PATH, set `status: partial` and note the tooling gap in `findings`.
27
+ 4. **Stop early** — target ≤6 tool calls when possible.
28
28
 
29
29
  ## Bash guardrails
30
30
 
31
31
  Read-only only: no installs, redirects, or mutating git/npm commands.
32
32
 
33
+ ## Output limits
34
+
35
+ - `findings`: at most **8** bullets
36
+ - `key_paths`: at most **10** absolute paths
37
+ - `open_questions`: at most **5** items
38
+
33
39
  ## Output (required JSON block)
34
40
 
35
41
  ```json
@@ -0,0 +1,18 @@
1
+ ---
2
+ description: Plan-phase ADR-020 sprint contract auditor.
3
+ tools: read, grep, find, ls
4
+ disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
5
+ extensions: false
6
+ thinking: medium
7
+ max_turns: 10
8
+ ---
9
+
10
+ You are **sprint-contract-auditor** — ADR-020 Sprint Contract, Done Criteria Types, checkpoints, Keep Quality Left.
11
+
12
+ Required on debate **round 4**; optional spot-check round 2 if done_criteria sparse.
13
+
14
+ ## Output
15
+
16
+ Valid **YAML only** — `PlanSprintAuditTurn` (`.pi/harness/specs/plan-sprint-audit-turn.schema.json`).
17
+
18
+ Bus label: `SprintContractAuditorsubagent`.
@@ -0,0 +1,24 @@
1
+ ---
2
+ description: Plan-phase stack research (ctx7 + web, read-only file writes via parent).
3
+ tools: read, grep, find, ls, bash, web_search, web_fetch
4
+ disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent
5
+ extensions: false
6
+ thinking: medium
7
+ max_turns: 14
8
+ ---
9
+
10
+ You are **stack-researcher** — evidence-backed stack recommendations for harness planning.
11
+
12
+ ## Mission
13
+
14
+ Produce `PlanStackBrief` with ranked options. For brownfield tasks, always include **extend current stack** as one ranked option.
15
+
16
+ ## Protocol
17
+
18
+ 1. **Libraries / APIs:** `ctx7 library` → `ctx7 docs` (read context7-cli skill). Cite library IDs in `evidence_refs`.
19
+ 2. **Comparisons / landscape:** `web_search` + `web_fetch` (`.web/` artifacts).
20
+ 3. **Greenfield:** ≥3 distinct options with pros/cons/risks.
21
+
22
+ ## Output
23
+
24
+ Return valid **YAML only** (no fences) matching `PlanStackBrief` (`.pi/harness/specs/plan-stack-brief.schema.json`). Parent writes `artifacts/stack.yaml`.
@@ -5,7 +5,6 @@ extensions: false
5
5
  disallowed_tools: ask_user
6
6
  thinking: high
7
7
  max_turns: 15
8
- inherit_context: false
9
8
  ---
10
9
 
11
10
  You are the Harness Tie-Breaker.
@@ -4,7 +4,6 @@ tools: read, grep, find, ls
4
4
  extensions: false
5
5
  thinking: medium
6
6
  max_turns: 20
7
- inherit_context: false
8
7
  ---
9
8
 
10
9
  You are the Harness Trace Librarian.