ultimate-pi 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-debate-plan/SKILL.md +44 -0
- package/.agents/skills/harness-decisions/SKILL.md +1 -1
- package/.agents/skills/harness-orchestration/SKILL.md +54 -28
- package/.agents/skills/harness-plan/SKILL.md +15 -20
- package/.pi/agents/harness/adversary.md +0 -1
- package/.pi/agents/harness/evaluator.md +0 -1
- package/.pi/agents/harness/executor.md +1 -2
- package/.pi/agents/harness/incident-recorder.md +0 -1
- package/.pi/agents/harness/meta-optimizer.md +0 -1
- package/.pi/agents/harness/planning/decompose.md +3 -4
- package/.pi/agents/harness/planning/execution-plan-author.md +30 -0
- package/.pi/agents/harness/planning/hypothesis-validator.md +23 -0
- package/.pi/agents/harness/planning/hypothesis.md +3 -4
- package/.pi/agents/harness/planning/plan-adversary.md +10 -42
- package/.pi/agents/harness/planning/plan-evaluator.md +18 -0
- package/.pi/agents/harness/planning/review-integrator.md +23 -0
- package/.pi/agents/harness/planning/scout-graphify.md +11 -5
- package/.pi/agents/harness/planning/scout-semantic.md +11 -6
- package/.pi/agents/harness/planning/scout-structure.md +12 -6
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +18 -0
- package/.pi/agents/harness/planning/stack-researcher.md +24 -0
- package/.pi/agents/harness/tie-breaker.md +0 -1
- package/.pi/agents/harness/trace-librarian.md +0 -1
- package/.pi/extensions/debate-orchestrator.ts +90 -53
- package/.pi/extensions/harness-plan-approval.ts +2 -2
- package/.pi/extensions/harness-run-context.ts +145 -5
- package/.pi/extensions/harness-subagents.ts +2 -2
- package/.pi/extensions/lib/harness-posthog.ts +6 -1
- package/.pi/extensions/lib/harness-spawn-budget.ts +75 -0
- package/.pi/extensions/lib/harness-subagent-auth.ts +123 -0
- package/.pi/extensions/lib/{harness-subagents/harness-subagent-policy.ts → harness-subagent-policy.ts} +3 -6
- package/.pi/extensions/lib/harness-subagent-precheck.ts +95 -0
- package/.pi/extensions/lib/harness-subagents-bridge.ts +176 -0
- package/.pi/extensions/lib/plan-approval/create-plan.ts +4 -7
- package/.pi/extensions/lib/plan-approval/plan-review.ts +1 -1
- package/.pi/extensions/lib/plan-approval/types.ts +7 -1
- package/.pi/extensions/lib/plan-debate-envelope.ts +84 -0
- package/.pi/extensions/lib/{harness-subagents/spawn-policy.ts → spawn-policy.ts} +1 -0
- package/.pi/extensions/policy-gate.ts +1 -1
- package/.pi/extensions/review-integrity.ts +48 -29
- package/.pi/harness/agents.manifest.json +37 -25
- package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +4 -3
- package/.pi/harness/docs/adrs/0033-parent-orchestrated-planning.md +1 -1
- package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +27 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r1.yaml +25 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml +26 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/sprint-audit-r4.yaml +5 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-packet.yaml +196 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-review.md +14 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +32 -0
- package/.pi/harness/evals/smoke/run-context.fixture.json +1 -1
- package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +88 -0
- package/.pi/harness/specs/harness-posthog-event.schema.json +6 -1
- package/.pi/harness/specs/plan-execution-plan-brief.schema.json +13 -0
- package/.pi/harness/specs/plan-execution-plan.schema.json +255 -0
- package/.pi/harness/specs/plan-packet.schema.json +14 -5
- package/.pi/harness/specs/plan-review-round-draft.schema.json +68 -0
- package/.pi/harness/specs/plan-sprint-audit-turn.schema.json +29 -0
- package/.pi/harness/specs/plan-stack-brief.schema.json +65 -0
- package/.pi/harness/specs/plan-validation-turn.schema.json +42 -0
- package/.pi/harness/specs/round-result.schema.json +16 -9
- package/.pi/lib/debate-orchestrator-types.ts +38 -0
- package/.pi/lib/harness-agent-discovery.mjs +81 -0
- package/.pi/lib/harness-run-context.ts +64 -38
- package/.pi/lib/harness-yaml.mjs +73 -0
- package/.pi/lib/harness-yaml.ts +90 -0
- package/.pi/prompts/harness-auto.md +13 -11
- package/.pi/prompts/harness-critic.md +2 -2
- package/.pi/prompts/harness-eval.md +3 -3
- package/.pi/prompts/harness-incident.md +2 -2
- package/.pi/prompts/harness-plan.md +79 -93
- package/.pi/prompts/harness-review.md +2 -2
- package/.pi/prompts/harness-router-tune.md +1 -1
- package/.pi/prompts/harness-run.md +2 -2
- package/.pi/prompts/harness-setup.md +15 -6
- package/.pi/prompts/harness-trace.md +2 -2
- package/.pi/scripts/harness-agents-manifest.mjs +1 -1
- package/.pi/scripts/harness-verify.mjs +28 -19
- package/.pi/scripts/validate-plan-dag.mjs +258 -0
- package/.pi/scripts/vendor-sync-pi-subagents.sh +19 -0
- package/CHANGELOG.md +12 -0
- package/THIRD_PARTY_NOTICES.md +8 -0
- package/biome.json +2 -2
- package/package.json +6 -4
- package/.pi/agents/harness/planner.md +0 -13
- package/.pi/agents/harness/planning/hypothesis-eval.md +0 -59
- package/.pi/agents/harness/planning/planner.md +0 -20
- package/.pi/extensions/lib/harness-subagents/agent-loader.ts +0 -126
- package/.pi/extensions/lib/harness-subagents/agent-manifest.ts +0 -119
- package/.pi/extensions/lib/harness-subagents/agent-parser.ts +0 -87
- package/.pi/extensions/lib/harness-subagents/blackboard-tool.ts +0 -118
- package/.pi/extensions/lib/harness-subagents/blackboard.ts +0 -175
- package/.pi/extensions/lib/harness-subagents/parent-ask-user-bridge.ts +0 -10
- package/.pi/extensions/lib/harness-subagents/parent-harness-ui-bridge.ts +0 -137
- package/.pi/extensions/lib/harness-subagents/parent-harness-ui-hooks.ts +0 -77
- package/.pi/extensions/lib/harness-subagents/types-blackboard.ts +0 -27
- package/.pi/extensions/lib/harness-subagents/vendored/agent-manager.ts +0 -558
- package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +0 -666
- package/.pi/extensions/lib/harness-subagents/vendored/agent-types.ts +0 -175
- package/.pi/extensions/lib/harness-subagents/vendored/context.ts +0 -59
- package/.pi/extensions/lib/harness-subagents/vendored/cross-extension-rpc.ts +0 -134
- package/.pi/extensions/lib/harness-subagents/vendored/custom-agents.ts +0 -5
- package/.pi/extensions/lib/harness-subagents/vendored/default-agents.ts +0 -123
- package/.pi/extensions/lib/harness-subagents/vendored/env.ts +0 -43
- package/.pi/extensions/lib/harness-subagents/vendored/group-join.ts +0 -144
- package/.pi/extensions/lib/harness-subagents/vendored/index.ts +0 -2460
- package/.pi/extensions/lib/harness-subagents/vendored/invocation-config.ts +0 -52
- package/.pi/extensions/lib/harness-subagents/vendored/memory.ts +0 -182
- package/.pi/extensions/lib/harness-subagents/vendored/model-resolver.ts +0 -92
- package/.pi/extensions/lib/harness-subagents/vendored/output-file.ts +0 -115
- package/.pi/extensions/lib/harness-subagents/vendored/prompts.ts +0 -103
- package/.pi/extensions/lib/harness-subagents/vendored/schedule-store.ts +0 -177
- package/.pi/extensions/lib/harness-subagents/vendored/schedule.ts +0 -416
- package/.pi/extensions/lib/harness-subagents/vendored/settings.ts +0 -210
- package/.pi/extensions/lib/harness-subagents/vendored/skill-loader.ts +0 -108
- package/.pi/extensions/lib/harness-subagents/vendored/types.ts +0 -187
- package/.pi/extensions/lib/harness-subagents/vendored/ui/agent-widget.ts +0 -639
- package/.pi/extensions/lib/harness-subagents/vendored/ui/conversation-viewer.ts +0 -324
- package/.pi/extensions/lib/harness-subagents/vendored/ui/schedule-menu.ts +0 -110
- package/.pi/extensions/lib/harness-subagents/vendored/usage.ts +0 -71
- package/.pi/extensions/lib/harness-subagents/vendored/worktree.ts +0 -195
- /package/.pi/extensions/{00-ultimate-pi-system-prompt.ts → custom-system-prompt.ts} +0 -0
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: harness-debate-plan
|
|
3
|
+
description: Plan-phase Review Gate debate — assemble rounds, token caps, bus envelopes for parent orchestrator.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# harness-debate-plan
|
|
7
|
+
|
|
8
|
+
Use when running **Phase 5** of `/harness-plan` — four Review Gate rounds on the plan debate bus.
|
|
9
|
+
|
|
10
|
+
## Open
|
|
11
|
+
|
|
12
|
+
```
|
|
13
|
+
/harness-debate-open plan-<run_id>
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
Budget profile **plan**: `max_rounds=4`, `round_token_cap=2000`, `debate_global_cap=12000`.
|
|
17
|
+
|
|
18
|
+
## Per-round spawn order
|
|
19
|
+
|
|
20
|
+
1. Round-specific extras (R1: `hypothesis-validator` first, blind)
|
|
21
|
+
2. `plan-evaluator`
|
|
22
|
+
3. `plan-adversary`
|
|
23
|
+
4. R4: `sprint-contract-auditor` (required)
|
|
24
|
+
5. `review-integrator`
|
|
25
|
+
|
|
26
|
+
## Artifacts (YAML)
|
|
27
|
+
|
|
28
|
+
| Agent | Output path |
|
|
29
|
+
|-------|-------------|
|
|
30
|
+
| hypothesis-validator | `artifacts/hypothesis-validation-r{N}.yaml` |
|
|
31
|
+
| plan-evaluator | `artifacts/validation-turn-r{N}.yaml` |
|
|
32
|
+
| plan-adversary | `artifacts/adversary-brief-r{N}.yaml` |
|
|
33
|
+
| sprint-contract-auditor | `artifacts/sprint-audit-r{N}.yaml` |
|
|
34
|
+
| review-integrator | `artifacts/review-round-r{N}.yaml` |
|
|
35
|
+
|
|
36
|
+
## Bus envelope
|
|
37
|
+
|
|
38
|
+
Load `review-round-r{N}.yaml`, validate, then `buildPlanReviewRoundEnvelope` (`.pi/extensions/lib/plan-debate-envelope.ts`) → `/harness-debate-round '<json>'`.
|
|
39
|
+
|
|
40
|
+
Plan participants only. `StackResearchAgent` uses `artifacts/stack.yaml` claims — no spawn.
|
|
41
|
+
|
|
42
|
+
## Close
|
|
43
|
+
|
|
44
|
+
After round 4: `/harness-debate-consensus`. Do not `approve_plan` on `policy_decision: block`.
|
|
@@ -71,5 +71,5 @@ Parent orchestrator calls **`approve_plan`** with the full `plan_packet` (scroll
|
|
|
71
71
|
## Who calls what
|
|
72
72
|
|
|
73
73
|
- **Parent orchestrator** during `/harness-plan` — `ask_user` for clarification; **`approve_plan`** then **`create_plan`** for the plan file.
|
|
74
|
-
- `harness/planning/*` (scouts, decompose, hypothesis,
|
|
74
|
+
- `harness/planning/*` (scouts, decompose, hypothesis, hypothesis-eval) — JSON only; no `ask_user` / `approve_plan` / `create_plan`.
|
|
75
75
|
- `harness/evaluator`, `harness/adversary`, and `harness/tie-breaker` — emit `human_required`; the **parent orchestrator** calls `ask_user`.
|
|
@@ -1,24 +1,48 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: harness-orchestration
|
|
3
3
|
description: >-
|
|
4
|
-
Orchestrate ultimate-pi harness phases with
|
|
5
|
-
|
|
6
|
-
verification, parallel scouts, and debate prep.
|
|
4
|
+
Orchestrate ultimate-pi harness phases with the native `subagent` tool
|
|
5
|
+
(isolated `pi --mode json` subprocesses). Use for plan/execute/evaluate
|
|
6
|
+
pipelines, L4 verification, parallel scouts, and debate prep.
|
|
7
7
|
---
|
|
8
8
|
|
|
9
9
|
# Harness orchestration
|
|
10
10
|
|
|
11
11
|
## Slash commands = orchestrators
|
|
12
12
|
|
|
13
|
-
`/harness-*` prompts parse args,
|
|
13
|
+
`/harness-*` prompts parse args, call `subagent`, run `ask_user`, write policy-gated artifacts. Phase logic lives in `.pi/agents/harness/*.md` and `.pi/agents/harness/planning/*.md`.
|
|
14
14
|
|
|
15
|
-
Every spawn includes **HarnessSpawnContext** JSON (
|
|
15
|
+
Every spawn includes **HarnessSpawnContext** JSON in the task text (subprocess agents do not get `[HarnessActivePlan]` injection). Use `agentScope: "both"` so package agents under `$UP_PKG/.pi/agents/**` resolve.
|
|
16
|
+
|
|
17
|
+
## Subprocess telemetry
|
|
18
|
+
|
|
19
|
+
Harness bridge emits `harness_subagent_spawned` / `harness_subagent_completed` (replaces in-process setup/blackboard events).
|
|
20
|
+
|
|
21
|
+
```sql
|
|
22
|
+
SELECT
|
|
23
|
+
properties.agent as agent,
|
|
24
|
+
count() as n,
|
|
25
|
+
round(avg(toFloat(properties.duration_ms)), 0) as avg_ms
|
|
26
|
+
FROM events
|
|
27
|
+
WHERE event = 'harness_subagent_completed'
|
|
28
|
+
AND timestamp >= now() - INTERVAL 7 DAY
|
|
29
|
+
GROUP BY agent
|
|
30
|
+
ORDER BY avg_ms DESC
|
|
31
|
+
LIMIT 30
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Latency rules
|
|
35
|
+
|
|
36
|
+
1. **Parallel `tasks`** — one `subagent({ tasks: [...] })` for scouts, decompose+hypothesis, or review fan-in; subprocesses run in parallel upstream.
|
|
37
|
+
2. **Blocking calls** — each `subagent` returns when the subprocess exits; no `get_subagent_result` polling.
|
|
38
|
+
3. **Compact handoffs** — pass scout/decompose JSON only; never paste full subprocess message logs into the next spawn.
|
|
39
|
+
4. **Spawn caps** — bridge enforces **8** active + **12** total harness spawns per session (`PI_SUBAGENT_TIMEOUT_MS` / per-task `timeoutMs` for backstop).
|
|
16
40
|
|
|
17
41
|
## Command → agent
|
|
18
42
|
|
|
19
|
-
| Command | `
|
|
20
|
-
|
|
21
|
-
| `/harness-plan` | Parent: parallel `scout-*` → `decompose
|
|
43
|
+
| Command | `agent` |
|
|
44
|
+
|---------|---------|
|
|
45
|
+
| `/harness-plan` | Parent: parallel `harness/planning/scout-*` → parallel `decompose`+`hypothesis` → PlanPacket → reviews; `approve_plan` + `create_plan` |
|
|
22
46
|
| `/harness-run` | `harness/executor` |
|
|
23
47
|
| `/harness-eval` | `harness/evaluator` (`mode: benchmark`) |
|
|
24
48
|
| `/harness-review` | `harness/evaluator` (`mode: verdict`) |
|
|
@@ -26,41 +50,43 @@ Every spawn includes **HarnessSpawnContext** JSON (subagents do not get `[Harnes
|
|
|
26
50
|
| `/harness-trace` | `harness/trace-librarian` |
|
|
27
51
|
| `/harness-incident` | `harness/incident-recorder` |
|
|
28
52
|
| `/harness-router-tune` | `harness/meta-optimizer` (optional) |
|
|
29
|
-
| `/harness-auto` | plan
|
|
53
|
+
| `/harness-auto` | plan per `/harness-plan`; `--quick` skips adversary + tie-breaker |
|
|
30
54
|
|
|
31
55
|
## Review isolation
|
|
32
56
|
|
|
33
|
-
Spawn `harness/evaluator` / `harness/adversary` in the **same** parent session
|
|
57
|
+
Spawn `harness/evaluator` / `harness/adversary` via `subagent` in the **same** parent session. `review-integrity` allows `subagent` when `agent` is in the review set; blocks executor from spawning review agents during evaluate.
|
|
34
58
|
|
|
35
59
|
## ask_user policy
|
|
36
60
|
|
|
37
|
-
|
|
|
38
|
-
|
|
39
|
-
| Parent orchestrator | Yes (plan clarification,
|
|
40
|
-
| `harness/planning/*` | No — JSON only |
|
|
41
|
-
| `harness/evaluator`, `harness/adversary`, `harness/tie-breaker` |
|
|
61
|
+
| Role | `ask_user` |
|
|
62
|
+
|------|------------|
|
|
63
|
+
| Parent orchestrator | Yes (plan clarification, `approve_plan`, router tune) |
|
|
64
|
+
| `harness/planning/*` | No — JSON only (`human_required` in output if stuck) |
|
|
65
|
+
| `harness/evaluator`, `harness/adversary`, `harness/tie-breaker` | `human_required` in subprocess JSON |
|
|
42
66
|
| `harness/executor` | No — parent handles governance |
|
|
43
67
|
|
|
44
68
|
## Spawn pattern (`/harness-plan`)
|
|
45
69
|
|
|
70
|
+
```json
|
|
71
|
+
{
|
|
72
|
+
"agentScope": "both",
|
|
73
|
+
"tasks": [
|
|
74
|
+
{ "agent": "harness/planning/scout-graphify", "task": "…", "timeoutMs": 90000 },
|
|
75
|
+
{ "agent": "harness/planning/scout-structure", "task": "…", "timeoutMs": 90000 },
|
|
76
|
+
{ "agent": "harness/planning/scout-semantic", "task": "…", "timeoutMs": 90000 }
|
|
77
|
+
]
|
|
78
|
+
}
|
|
46
79
|
```
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
Agent({ subagent_type: "harness/planning/hypothesis", prompt: "…" })
|
|
52
|
-
# parent: PlanPacket, ask_user on fork
|
|
53
|
-
Agent({ subagent_type: "harness/planning/plan-adversary", run_in_background: true })
|
|
54
|
-
Agent({ subagent_type: "harness/planning/hypothesis-eval", run_in_background: true })
|
|
55
|
-
approve_plan({ plan_packet, research_brief }); create_plan
|
|
56
|
-
```
|
|
80
|
+
|
|
81
|
+
Then parallel decompose + hypothesis, parent PlanPacket + `ask_user`, debate rounds via `subagent` or `debate-orchestrator`, then `approve_plan` + `create_plan`.
|
|
82
|
+
|
|
83
|
+
Scouts use **Haiku**, `thinking: low`, **8** max turns (see agent frontmatter). Effective `--tools` omits `grep`/`find`/`subagent` per `disallowed_tools`.
|
|
57
84
|
|
|
58
85
|
## Tools
|
|
59
86
|
|
|
60
|
-
- `
|
|
87
|
+
- `subagent` — harness subprocess spawns (modes: `single`, `tasks`, `chain`, `aggregator`)
|
|
61
88
|
- `approve_plan`, `create_plan` — parent orchestrator only
|
|
62
|
-
- `
|
|
63
|
-
- Subagents cannot nest spawns
|
|
89
|
+
- Subprocess agents cannot nest `subagent` (`subagent` stripped from child `--tools`)
|
|
64
90
|
|
|
65
91
|
## References
|
|
66
92
|
|
|
@@ -1,37 +1,32 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: harness-plan
|
|
3
|
-
description:
|
|
3
|
+
description: PM-grade harness plans — scouts, ExecutionPlan, DAG validation, 4-round Review Gate debate, then approve/create_plan.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# harness-plan
|
|
7
7
|
|
|
8
8
|
## When to use
|
|
9
9
|
|
|
10
|
-
-
|
|
11
|
-
- Policy gate blocks mutate tools without approved plan
|
|
12
|
-
- Drift monitor requests replan (`harness-drift-replan`)
|
|
13
|
-
- User replies with clarification after `needs_clarification`
|
|
10
|
+
- `/harness-plan`, harness-auto plan phase, drift replan, policy-gate without approved plan
|
|
14
11
|
|
|
15
12
|
## Workflow (parent orchestrator)
|
|
16
13
|
|
|
17
|
-
1.
|
|
18
|
-
2.
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
7. Parallel: `harness/planning/plan-adversary` + `harness/planning/hypothesis-eval` (eval gets task + hypothesis only).
|
|
27
|
-
8. Parent calls `approve_plan({ plan_packet, human_summary, research_brief })` then `create_plan`.
|
|
14
|
+
1. Parallel scouts (graphify + structure; semantic unless `--quick`).
|
|
15
|
+
2. Parallel decompose + hypothesis → write `artifacts/*.yaml`.
|
|
16
|
+
3. Draft `PlanPacket` (`contract_version: "1.1.0"`) + `ask_user` on material fork.
|
|
17
|
+
4. `stack-researcher` → `execution-plan-author` → merge `execution_plan`.
|
|
18
|
+
5. **`validate-plan-dag.mjs`** on `plan-packet.yaml` (must pass).
|
|
19
|
+
6. **Review Gate:** `/harness-debate-open plan-<run_id>` → 4 rounds (see **harness-debate-plan** skill) → consensus.
|
|
20
|
+
7. Apply patches, re-validate DAG, `approve_plan`, `create_plan`.
|
|
21
|
+
|
|
22
|
+
`--quick` skips semantic scout and post-run adversary only — **not** plan debate.
|
|
28
23
|
|
|
29
24
|
## Rules
|
|
30
25
|
|
|
31
|
-
-
|
|
32
|
-
-
|
|
33
|
-
- context-mode only on harness paths
|
|
26
|
+
- On-disk plan artifacts are **YAML** (`plan-packet.yaml`, `research-brief.yaml`).
|
|
27
|
+
- Subagents read-only; parent writes run artifacts and calls `approve_plan` / `create_plan`.
|
|
28
|
+
- context-mode only on harness paths.
|
|
34
29
|
|
|
35
30
|
## Output
|
|
36
31
|
|
|
37
|
-
|
|
32
|
+
`plan_status`, `plan_review_path`, `next_command: /harness-run` when ready.
|
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Plan-phase DeepMind-style problem decomposition (read-only).
|
|
3
3
|
tools: read, grep, find, ls, bash
|
|
4
|
-
disallowed_tools: write, edit, ask_user, approve_plan, create_plan,
|
|
4
|
+
disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent
|
|
5
5
|
extensions: false
|
|
6
|
-
thinking:
|
|
7
|
-
max_turns:
|
|
8
|
-
inherit_context: false
|
|
6
|
+
thinking: medium
|
|
7
|
+
max_turns: 12
|
|
9
8
|
---
|
|
10
9
|
|
|
11
10
|
You are the **Harness planning decomposer (Phase 1)**.
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Plan-phase ExecutionPlan generator (PM-grade WBS + DAG).
|
|
3
|
+
tools: read, grep, find, ls
|
|
4
|
+
disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
|
|
5
|
+
extensions: false
|
|
6
|
+
thinking: high
|
|
7
|
+
max_turns: 16
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
You are **execution-plan-author** — produce a complete `execution_plan` a senior EM would sign off.
|
|
11
|
+
|
|
12
|
+
## Inputs
|
|
13
|
+
|
|
14
|
+
Task, `PlanDecompositionBrief`, `PlanHypothesisBrief`, draft scope/acceptance_checks, `PlanStackBrief`, scout summaries.
|
|
15
|
+
|
|
16
|
+
## Workflow
|
|
17
|
+
|
|
18
|
+
1. Vision check — scope ≤15 lines, testable outcomes.
|
|
19
|
+
2. Phases with objective, entry/exit criteria, milestone, work_item_ids.
|
|
20
|
+
3. WBS — every AC maps to ≥1 work_item; deliverable-sized items.
|
|
21
|
+
4. `depends_on` DAG; `parallel_safe` only when files disjoint.
|
|
22
|
+
5. `schedule_metadata.critical_path_work_item_ids`.
|
|
23
|
+
6. `wbs_dictionary`, `risk_register` (≥3 risks for med/high).
|
|
24
|
+
7. `sprint_contract` complete.
|
|
25
|
+
8. Early-phase verify/lint/test work items when risk ≥ med.
|
|
26
|
+
9. Typed `done_criteria` per work item.
|
|
27
|
+
|
|
28
|
+
## Output
|
|
29
|
+
|
|
30
|
+
Valid **YAML only** — `PlanExecutionPlanBrief` with `execution_plan` (`.pi/harness/specs/plan-execution-plan-brief.schema.json`). Parent merges into `plan-packet.yaml`.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Plan-phase blind hypothesis validation (debate R1 only).
|
|
3
|
+
tools: read, grep, find, ls
|
|
4
|
+
disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
|
|
5
|
+
extensions: false
|
|
6
|
+
thinking: medium
|
|
7
|
+
max_turns: 10
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
You are **hypothesis-validator** — blind self-evaluation of `PlanHypothesisBrief` only.
|
|
11
|
+
|
|
12
|
+
## Input (strict)
|
|
13
|
+
|
|
14
|
+
- Original task statement
|
|
15
|
+
- `PlanHypothesisBrief` YAML/JSON
|
|
16
|
+
|
|
17
|
+
Ignore decomposition, scouts, PlanPacket, adversary output.
|
|
18
|
+
|
|
19
|
+
## Output
|
|
20
|
+
|
|
21
|
+
Valid **YAML only** matching `PlanHypothesisEval` (`.pi/harness/specs/plan-hypothesis-eval.schema.json`). Parent writes `artifacts/hypothesis-validation-r{N}.yaml`.
|
|
22
|
+
|
|
23
|
+
Bus label: `HypothesisValidatorsubagent`.
|
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Plan-phase DARWIN hypothesis generation (read-only).
|
|
3
3
|
tools: read, grep, find, ls, bash
|
|
4
|
-
disallowed_tools: write, edit, ask_user, approve_plan, create_plan,
|
|
4
|
+
disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent
|
|
5
5
|
extensions: false
|
|
6
|
-
thinking:
|
|
7
|
-
max_turns:
|
|
8
|
-
inherit_context: false
|
|
6
|
+
thinking: medium
|
|
7
|
+
max_turns: 14
|
|
9
8
|
---
|
|
10
9
|
|
|
11
10
|
You are the **Harness planning hypothesis generator (Phase 2 — DARWIN)**.
|
|
@@ -1,50 +1,18 @@
|
|
|
1
1
|
---
|
|
2
|
-
description: Plan
|
|
3
|
-
tools: read, grep, find, ls
|
|
4
|
-
disallowed_tools: write, edit, ask_user, approve_plan, create_plan,
|
|
2
|
+
description: Plan-phase adversarial verification on ExecutionPlan.
|
|
3
|
+
tools: read, grep, find, ls
|
|
4
|
+
disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
|
|
5
5
|
extensions: false
|
|
6
|
-
thinking:
|
|
7
|
-
max_turns:
|
|
8
|
-
inherit_context: false
|
|
6
|
+
thinking: medium
|
|
7
|
+
max_turns: 12
|
|
9
8
|
---
|
|
10
9
|
|
|
11
|
-
You are
|
|
10
|
+
You are **plan-adversary** — break the plan with reproducible counterexamples.
|
|
12
11
|
|
|
13
|
-
|
|
12
|
+
Engage failed/warn checks from the same round's `plan-evaluator` first, then independent attacks. Cite `work_item_id` / `phase_id`.
|
|
14
13
|
|
|
15
|
-
|
|
14
|
+
## Output
|
|
16
15
|
|
|
17
|
-
|
|
16
|
+
Valid **YAML only** — `PlanAdversaryBrief` (`.pi/harness/specs/plan-adversary-brief.schema.json`).
|
|
18
17
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
The spawn prompt includes:
|
|
22
|
-
|
|
23
|
-
- `HarnessSpawnContext`
|
|
24
|
-
- Draft `PlanPacket` JSON
|
|
25
|
-
- Scout lane summaries (graphify, structure, semantic)
|
|
26
|
-
|
|
27
|
-
## Process
|
|
28
|
-
|
|
29
|
-
1. Assume the plan has hidden gaps until you justify `recommendation: proceed`.
|
|
30
|
-
2. Tie every finding to evidence (paths, APIs, or scout findings) — no speculation without a probe path.
|
|
31
|
-
3. Propose concrete `mitigations` the parent can merge into scope, assumptions, or `acceptance_checks`.
|
|
32
|
-
4. Empty arrays are allowed when no material gaps exist; say so in `human_summary`.
|
|
33
|
-
|
|
34
|
-
## Output (required JSON block)
|
|
35
|
-
|
|
36
|
-
Match `PlanAdversaryBrief` (`.pi/harness/specs/plan-adversary-brief.schema.json`):
|
|
37
|
-
|
|
38
|
-
```json
|
|
39
|
-
{
|
|
40
|
-
"schema_version": "1.0.0",
|
|
41
|
-
"edge_cases": ["…"],
|
|
42
|
-
"failure_modes": ["…"],
|
|
43
|
-
"acceptance_gaps": ["…"],
|
|
44
|
-
"mitigations": ["…"],
|
|
45
|
-
"recommendation": "proceed",
|
|
46
|
-
"human_summary": "…"
|
|
47
|
-
}
|
|
48
|
-
```
|
|
49
|
-
|
|
50
|
-
Use `"recommendation": "revise"` when scope or acceptance must change before execution.
|
|
18
|
+
Bus label: `PlanAdversarysubagent`.
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Plan-phase Validation Checks evaluator (neutral pass/fail).
|
|
3
|
+
tools: read, grep, find, ls
|
|
4
|
+
disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
|
|
5
|
+
extensions: false
|
|
6
|
+
thinking: medium
|
|
7
|
+
max_turns: 12
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
You are **plan-evaluator** — score ExecutionPlan against Validation Checks (not an advocate).
|
|
11
|
+
|
|
12
|
+
Parent passes `debate_round_focus`: `spec` | `wbs` | `schedule` | `quality`.
|
|
13
|
+
|
|
14
|
+
## Output
|
|
15
|
+
|
|
16
|
+
Valid **YAML only** — `PlanValidationTurn` (`.pi/harness/specs/plan-validation-turn.schema.json`). Fail if `dag_validation.status === "fail"`.
|
|
17
|
+
|
|
18
|
+
Bus label: `PlanEvaluatorsubagent`.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Plan-phase Review Gate integrator (round → debate bus).
|
|
3
|
+
tools: read, grep, find, ls
|
|
4
|
+
disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
|
|
5
|
+
extensions: false
|
|
6
|
+
thinking: medium
|
|
7
|
+
max_turns: 10
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
You are **review-integrator** — merge evaluator, adversary, sprint audit, and hypothesis-validator outputs into a Review Gate draft.
|
|
11
|
+
|
|
12
|
+
## Output
|
|
13
|
+
|
|
14
|
+
Valid **YAML only** — `PlanReviewRoundDraft` (`.pi/harness/specs/plan-review-round-draft.schema.json`) with:
|
|
15
|
+
|
|
16
|
+
- `round_summary`, `validation_summary`, `adversary_summary`
|
|
17
|
+
- `disputes[]`, `recommended_packet_patches[]` (JSON Pointer paths)
|
|
18
|
+
- `review_gate_ready` boolean
|
|
19
|
+
- `participants`, `claims`, `rebuttals`, `evidence_refs`, `token_usage`, `severity_scores`
|
|
20
|
+
|
|
21
|
+
Parent runs `buildPlanReviewRoundEnvelope` → `/harness-debate-round`.
|
|
22
|
+
|
|
23
|
+
Bus label: `ReviewIntegratorsubagent`.
|
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Plan-phase scout — graphify graph and wiki navigation (read-only).
|
|
3
|
-
tools: read,
|
|
4
|
-
disallowed_tools: write, edit, ask_user, approve_plan, create_plan,
|
|
3
|
+
tools: read, bash, ls
|
|
4
|
+
disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent, grep, find
|
|
5
5
|
extensions: false
|
|
6
|
-
thinking:
|
|
7
|
-
max_turns:
|
|
8
|
-
inherit_context: false
|
|
6
|
+
thinking: low
|
|
7
|
+
max_turns: 6
|
|
9
8
|
---
|
|
10
9
|
|
|
11
10
|
You are the **Harness planning scout (graphify lane)**.
|
|
@@ -25,11 +24,18 @@ Read `HarnessSpawnContext` in the spawn prompt (`task_summary`, `mode`, `plan_pa
|
|
|
25
24
|
1. Read `graphify-out/GRAPH_REPORT.md` when present; use `graphify query`, `graphify path`, or `graphify explain` for the task (read-only CLI only).
|
|
26
25
|
2. If `graphify-out/` is missing, say so in `findings` and `open_questions` — do not run `graphify update` or installs.
|
|
27
26
|
3. Do not read `.pi/harness/specs/*.schema.json` from disk.
|
|
27
|
+
4. **Stop early** — target ≤6 tool calls when possible.
|
|
28
28
|
|
|
29
29
|
## Bash guardrails
|
|
30
30
|
|
|
31
31
|
Read-only only: no `graphify update`, `graphify extract`, `pip install`, redirects (`>`, `>>`), or file creation. Allowed: `graphify query`, `graphify path`, `graphify explain`, `ls`, `cat`, `head`.
|
|
32
32
|
|
|
33
|
+
## Output limits
|
|
34
|
+
|
|
35
|
+
- `findings`: at most **8** bullets, each ≤2 sentences
|
|
36
|
+
- `key_paths`: at most **10** absolute paths
|
|
37
|
+
- `open_questions`: at most **5** items
|
|
38
|
+
|
|
33
39
|
## Output (required JSON block)
|
|
34
40
|
|
|
35
41
|
End with one fenced `json` block:
|
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Plan-phase scout — ck semantic code search (read-only).
|
|
3
|
-
tools: read,
|
|
4
|
-
disallowed_tools: write, edit, ask_user, approve_plan, create_plan,
|
|
3
|
+
tools: read, bash, ls
|
|
4
|
+
disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent, grep, find
|
|
5
5
|
extensions: false
|
|
6
|
-
thinking:
|
|
7
|
-
max_turns:
|
|
8
|
-
inherit_context: false
|
|
6
|
+
thinking: low
|
|
7
|
+
max_turns: 6
|
|
9
8
|
---
|
|
10
9
|
|
|
11
10
|
You are the **Harness planning scout (semantic lane)**.
|
|
@@ -22,12 +21,18 @@ Read `HarnessSpawnContext` in the spawn prompt. For `mode: revise`, bias searche
|
|
|
22
21
|
|
|
23
22
|
1. Use `ck search` or `ck query` (or project-documented ck CLI) with task-focused queries.
|
|
24
23
|
2. If ck is unavailable, set `status: partial` and document in `findings`.
|
|
25
|
-
3.
|
|
24
|
+
3. **Stop early** — top **5** most relevant paths only.
|
|
26
25
|
|
|
27
26
|
## Bash guardrails
|
|
28
27
|
|
|
29
28
|
Read-only only: no installs, index rebuilds that mutate disk, or redirects.
|
|
30
29
|
|
|
30
|
+
## Output limits
|
|
31
|
+
|
|
32
|
+
- `findings`: at most **6** bullets
|
|
33
|
+
- `key_paths`: at most **8** absolute paths
|
|
34
|
+
- `open_questions`: at most **4** items
|
|
35
|
+
|
|
31
36
|
## Output (required JSON block)
|
|
32
37
|
|
|
33
38
|
```json
|
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Plan-phase scout — ast-grep structural code search (read-only).
|
|
3
|
-
tools: read,
|
|
4
|
-
disallowed_tools: write, edit, ask_user, approve_plan, create_plan,
|
|
3
|
+
tools: read, bash, ls
|
|
4
|
+
disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent, grep, find
|
|
5
5
|
extensions: false
|
|
6
|
-
thinking:
|
|
7
|
-
max_turns:
|
|
8
|
-
inherit_context: false
|
|
6
|
+
thinking: low
|
|
7
|
+
max_turns: 6
|
|
9
8
|
---
|
|
10
9
|
|
|
11
10
|
You are the **Harness planning scout (structure lane)**.
|
|
@@ -22,14 +21,21 @@ Read `HarnessSpawnContext` in the spawn prompt. For `mode: revise`, read the exi
|
|
|
22
21
|
|
|
23
22
|
## Process
|
|
24
23
|
|
|
25
|
-
1. Run `sg -p '…'` with patterns tied to the task (handlers, types, exports, call sites).
|
|
24
|
+
1. Run `sg -p '…'` with patterns tied to the task (handlers, types, exports, call sites). **Do not use `find` or `grep`.**
|
|
26
25
|
2. Prefer absolute paths in `key_paths`.
|
|
27
26
|
3. If `sg` is not on PATH, set `status: partial` and note the tooling gap in `findings`.
|
|
27
|
+
4. **Stop early** — target ≤6 tool calls when possible.
|
|
28
28
|
|
|
29
29
|
## Bash guardrails
|
|
30
30
|
|
|
31
31
|
Read-only only: no installs, redirects, or mutating git/npm commands.
|
|
32
32
|
|
|
33
|
+
## Output limits
|
|
34
|
+
|
|
35
|
+
- `findings`: at most **8** bullets
|
|
36
|
+
- `key_paths`: at most **10** absolute paths
|
|
37
|
+
- `open_questions`: at most **5** items
|
|
38
|
+
|
|
33
39
|
## Output (required JSON block)
|
|
34
40
|
|
|
35
41
|
```json
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Plan-phase ADR-020 sprint contract auditor.
|
|
3
|
+
tools: read, grep, find, ls
|
|
4
|
+
disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
|
|
5
|
+
extensions: false
|
|
6
|
+
thinking: medium
|
|
7
|
+
max_turns: 10
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
You are **sprint-contract-auditor** — ADR-020 Sprint Contract, Done Criteria Types, checkpoints, Keep Quality Left.
|
|
11
|
+
|
|
12
|
+
Required on debate **round 4**; optional spot-check round 2 if done_criteria sparse.
|
|
13
|
+
|
|
14
|
+
## Output
|
|
15
|
+
|
|
16
|
+
Valid **YAML only** — `PlanSprintAuditTurn` (`.pi/harness/specs/plan-sprint-audit-turn.schema.json`).
|
|
17
|
+
|
|
18
|
+
Bus label: `SprintContractAuditorsubagent`.
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Plan-phase stack research (ctx7 + web, read-only file writes via parent).
|
|
3
|
+
tools: read, grep, find, ls, bash, web_search, web_fetch
|
|
4
|
+
disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent
|
|
5
|
+
extensions: false
|
|
6
|
+
thinking: medium
|
|
7
|
+
max_turns: 14
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
You are **stack-researcher** — evidence-backed stack recommendations for harness planning.
|
|
11
|
+
|
|
12
|
+
## Mission
|
|
13
|
+
|
|
14
|
+
Produce `PlanStackBrief` with ranked options. For brownfield tasks, always include **extend current stack** as one ranked option.
|
|
15
|
+
|
|
16
|
+
## Protocol
|
|
17
|
+
|
|
18
|
+
1. **Libraries / APIs:** `ctx7 library` → `ctx7 docs` (read context7-cli skill). Cite library IDs in `evidence_refs`.
|
|
19
|
+
2. **Comparisons / landscape:** `web_search` + `web_fetch` (`.web/` artifacts).
|
|
20
|
+
3. **Greenfield:** ≥3 distinct options with pros/cons/risks.
|
|
21
|
+
|
|
22
|
+
## Output
|
|
23
|
+
|
|
24
|
+
Return valid **YAML only** (no fences) matching `PlanStackBrief` (`.pi/harness/specs/plan-stack-brief.schema.json`). Parent writes `artifacts/stack.yaml`.
|