ultimate-pi 0.10.1 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-decisions/SKILL.md +3 -3
- package/.agents/skills/harness-orchestration/SKILL.md +19 -11
- package/.agents/skills/harness-plan/SKILL.md +15 -9
- package/.pi/agents/harness/planner.md +6 -47
- package/.pi/agents/harness/planning/decompose.md +84 -0
- package/.pi/agents/harness/planning/hypothesis-eval.md +59 -0
- package/.pi/agents/harness/planning/hypothesis.md +90 -0
- package/.pi/agents/harness/planning/plan-adversary.md +50 -0
- package/.pi/agents/harness/planning/planner.md +20 -0
- package/.pi/agents/harness/planning/scout-graphify.md +48 -0
- package/.pi/agents/harness/planning/scout-semantic.md +42 -0
- package/.pi/agents/harness/planning/scout-structure.md +44 -0
- package/.pi/extensions/harness-ask-user.ts +5 -0
- package/.pi/extensions/harness-plan-approval.ts +137 -3
- package/.pi/extensions/harness-run-context.ts +1 -1
- package/.pi/extensions/harness-subagents.ts +8 -3
- package/.pi/extensions/harness-web-tools.ts +2 -0
- package/.pi/extensions/lib/extension-load-guard.ts +39 -0
- package/.pi/extensions/lib/harness-subagents/harness-subagent-policy.ts +33 -5
- package/.pi/extensions/lib/harness-subagents/parent-harness-ui-bridge.ts +2 -175
- package/.pi/extensions/lib/harness-subagents/parent-harness-ui-hooks.ts +18 -0
- package/.pi/extensions/lib/harness-subagents/spawn-policy.ts +1 -5
- package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +0 -18
- package/.pi/extensions/lib/harness-subagents/vendored/index.ts +1 -35
- package/.pi/extensions/lib/plan-approval/create-plan.ts +5 -0
- package/.pi/extensions/lib/plan-approval/plan-review.ts +393 -0
- package/.pi/extensions/lib/plan-approval/schema.ts +16 -1
- package/.pi/extensions/lib/plan-approval/types.ts +10 -0
- package/.pi/extensions/lib/plan-approval/validate.ts +2 -0
- package/.pi/extensions/policy-gate.ts +1 -1
- package/.pi/extensions/ultimate-pi-vcc.ts +5 -0
- package/.pi/harness/agents.manifest.json +114 -82
- package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +3 -3
- package/.pi/harness/docs/adrs/0033-parent-orchestrated-planning.md +34 -0
- package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +41 -0
- package/.pi/harness/docs/adrs/README.md +2 -0
- package/.pi/harness/specs/README.md +1 -1
- package/.pi/harness/specs/harness-spawn-context.schema.json +2 -1
- package/.pi/harness/specs/plan-adversary-brief.schema.json +45 -0
- package/.pi/harness/specs/plan-decomposition-brief.schema.json +108 -0
- package/.pi/harness/specs/plan-hypothesis-brief.schema.json +96 -0
- package/.pi/harness/specs/plan-hypothesis-eval.schema.json +61 -0
- package/.pi/lib/harness-run-context.ts +12 -0
- package/.pi/prompts/harness-auto.md +1 -1
- package/.pi/prompts/harness-plan.md +111 -28
- package/.pi/prompts/harness-setup.md +1 -1
- package/.pi/scripts/harness-resolve-up-pkg.mjs +13 -0
- package/CHANGELOG.md +12 -0
- package/biome.json +4 -1
- package/package.json +2 -2
|
@@ -36,7 +36,7 @@ description: Structured user decisions via ask_user for harness setup, planning,
|
|
|
36
36
|
|
|
37
37
|
## Example (plan — approval gate)
|
|
38
38
|
|
|
39
|
-
|
|
39
|
+
Parent orchestrator calls **`approve_plan`** with the full `plan_packet` (scrollable plan + Approve / Request changes / Cancel), then **`create_plan`** with the same packet after Approve.
|
|
40
40
|
|
|
41
41
|
```json
|
|
42
42
|
{
|
|
@@ -70,6 +70,6 @@ description: Structured user decisions via ask_user for harness setup, planning,
|
|
|
70
70
|
|
|
71
71
|
## Who calls what
|
|
72
72
|
|
|
73
|
-
-
|
|
73
|
+
- **Parent orchestrator** during `/harness-plan` — `ask_user` for clarification; **`approve_plan`** then **`create_plan`** for the plan file.
|
|
74
|
+
- `harness/planning/*` (scouts, decompose, hypothesis, plan-adversary, hypothesis-eval) — JSON only; no `ask_user` / `approve_plan` / `create_plan`.
|
|
74
75
|
- `harness/evaluator`, `harness/adversary`, and `harness/tie-breaker` — emit `human_required`; the **parent orchestrator** calls `ask_user`.
|
|
75
|
-
- Parent orchestrator during `/harness-plan` — must **not** call `ask_user`, `approve_plan`, or `create_plan` (planner owns the full plan lifecycle).
|
|
@@ -10,7 +10,7 @@ description: >-
|
|
|
10
10
|
|
|
11
11
|
## Slash commands = orchestrators
|
|
12
12
|
|
|
13
|
-
`/harness-*` prompts parse args, spawn agents, run `ask_user`, write policy-gated artifacts. Phase logic lives in `.pi/agents/harness/*.md`.
|
|
13
|
+
`/harness-*` prompts parse args, spawn agents, run `ask_user`, write policy-gated artifacts. Phase logic lives in `.pi/agents/harness/*.md` and `.pi/agents/harness/planning/*.md`.
|
|
14
14
|
|
|
15
15
|
Every spawn includes **HarnessSpawnContext** JSON (subagents do not get `[HarnessActivePlan]` injection). Use `inherit_context: false`.
|
|
16
16
|
|
|
@@ -18,15 +18,15 @@ Every spawn includes **HarnessSpawnContext** JSON (subagents do not get `[Harnes
|
|
|
18
18
|
|
|
19
19
|
| Command | `subagent_type` |
|
|
20
20
|
|---------|-----------------|
|
|
21
|
-
| `/harness-plan` | `
|
|
21
|
+
| `/harness-plan` | Parent: parallel `scout-*` → `decompose` → `hypothesis` → PlanPacket → parallel `plan-adversary` + `hypothesis-eval`; `approve_plan` + `create_plan` |
|
|
22
22
|
| `/harness-run` | `harness/executor` |
|
|
23
23
|
| `/harness-eval` | `harness/evaluator` (`mode: benchmark`) |
|
|
24
24
|
| `/harness-review` | `harness/evaluator` (`mode: verdict`) |
|
|
25
|
-
| `/harness-critic` | `harness/adversary` |
|
|
25
|
+
| `/harness-critic` | `harness/adversary` (post-run) |
|
|
26
26
|
| `/harness-trace` | `harness/trace-librarian` |
|
|
27
27
|
| `/harness-incident` | `harness/incident-recorder` |
|
|
28
28
|
| `/harness-router-tune` | `harness/meta-optimizer` (optional) |
|
|
29
|
-
| `/harness-auto` | sequential spawns above |
|
|
29
|
+
| `/harness-auto` | plan phases per `/harness-plan`, then sequential spawns above |
|
|
30
30
|
|
|
31
31
|
## Review isolation
|
|
32
32
|
|
|
@@ -36,25 +36,33 @@ Spawn `harness/evaluator` / `harness/adversary` in the **same** parent session
|
|
|
36
36
|
|
|
37
37
|
| Agent | `ask_user` |
|
|
38
38
|
|-------|------------|
|
|
39
|
-
| Parent orchestrator | Yes (
|
|
40
|
-
| `harness/
|
|
41
|
-
| `harness/evaluator`, `harness/adversary`, `harness/tie-breaker` |
|
|
39
|
+
| Parent orchestrator | Yes (plan clarification, approval via `approve_plan`, router tune) |
|
|
40
|
+
| `harness/planning/*` | No — JSON only |
|
|
41
|
+
| `harness/evaluator`, `harness/adversary`, `harness/tie-breaker` | Bridged or `human_required` in output |
|
|
42
42
|
| `harness/executor` | No — parent handles governance |
|
|
43
43
|
|
|
44
|
-
## Spawn pattern
|
|
44
|
+
## Spawn pattern (`/harness-plan`)
|
|
45
45
|
|
|
46
46
|
```
|
|
47
|
-
Agent({ subagent_type: "harness/
|
|
48
|
-
|
|
47
|
+
Agent({ subagent_type: "harness/planning/scout-graphify", prompt: "…", run_in_background: true })
|
|
48
|
+
Agent({ subagent_type: "harness/planning/scout-structure", prompt: "…", run_in_background: true })
|
|
49
|
+
get_subagent_result # scouts
|
|
50
|
+
Agent({ subagent_type: "harness/planning/decompose", prompt: "…" })
|
|
51
|
+
Agent({ subagent_type: "harness/planning/hypothesis", prompt: "…" })
|
|
52
|
+
# parent: PlanPacket, ask_user on fork
|
|
53
|
+
Agent({ subagent_type: "harness/planning/plan-adversary", run_in_background: true })
|
|
54
|
+
Agent({ subagent_type: "harness/planning/hypothesis-eval", run_in_background: true })
|
|
55
|
+
approve_plan({ plan_packet, research_brief }); create_plan
|
|
49
56
|
```
|
|
50
57
|
|
|
51
58
|
## Tools
|
|
52
59
|
|
|
53
60
|
- `Agent`, `get_subagent_result`, `steer_subagent`
|
|
61
|
+
- `approve_plan`, `create_plan` — parent orchestrator only
|
|
54
62
|
- `blackboard` — parent only
|
|
55
63
|
- Subagents cannot nest spawns
|
|
56
64
|
|
|
57
65
|
## References
|
|
58
66
|
|
|
59
|
-
- ADR 0032, `.pi/harness/specs/harness-spawn-context.schema.json`
|
|
67
|
+
- ADR 0032, ADR 0033, `.pi/harness/specs/harness-spawn-context.schema.json`
|
|
60
68
|
- `node "$UP_PKG/.pi/scripts/harness-agents-manifest.mjs" --check`
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: harness-plan
|
|
3
|
-
description: Produce PlanPacket-aligned harness plans before execute phase. Use with /harness-plan, harness-auto plan phase, or when policy-gate requires an approved plan.
|
|
3
|
+
description: Produce PlanPacket-aligned harness plans via decomposition + DARWIN hypothesis before execute phase. Use with /harness-plan, harness-auto plan phase, or when policy-gate requires an approved plan.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# harness-plan
|
|
@@ -12,20 +12,26 @@ description: Produce PlanPacket-aligned harness plans before execute phase. Use
|
|
|
12
12
|
- Drift monitor requests replan (`harness-drift-replan`)
|
|
13
13
|
- User replies with clarification after `needs_clarification`
|
|
14
14
|
|
|
15
|
-
## Workflow (orchestrator)
|
|
15
|
+
## Workflow (parent orchestrator)
|
|
16
16
|
|
|
17
17
|
1. Use `HarnessSpawnContext` from injected `[HarnessRunContext]` — do not read spec files from disk.
|
|
18
|
-
2. Spawn
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
18
|
+
2. Spawn planning scouts in parallel (`run_in_background: true`, `inherit_context: false`):
|
|
19
|
+
- `harness/planning/scout-graphify` (required)
|
|
20
|
+
- `harness/planning/scout-structure` (required)
|
|
21
|
+
- `harness/planning/scout-semantic` (skip when `--quick`)
|
|
22
|
+
3. `get_subagent_result` for each; parse scout JSON.
|
|
23
|
+
4. Spawn `harness/planning/decompose` with merged scout JSON → `PlanDecompositionBrief`.
|
|
24
|
+
5. Spawn `harness/planning/hypothesis` with decomposition + scouts → `PlanHypothesisBrief`.
|
|
25
|
+
6. Parent synthesizes draft `PlanPacket` from hypothesis; `ask_user` when dialectical fork is material.
|
|
26
|
+
7. Parallel: `harness/planning/plan-adversary` + `harness/planning/hypothesis-eval` (eval gets task + hypothesis only).
|
|
27
|
+
8. Parent calls `approve_plan({ plan_packet, human_summary, research_brief })` then `create_plan`.
|
|
22
28
|
|
|
23
29
|
## Rules
|
|
24
30
|
|
|
25
|
-
-
|
|
26
|
-
-
|
|
31
|
+
- Planning subagents are read-only; they never call `ask_user`, `approve_plan`, or `create_plan`.
|
|
32
|
+
- Do not spawn `harness/planner` or `harness/planning/planner` (deprecated).
|
|
27
33
|
- context-mode only on harness paths; never lean-ctx.
|
|
28
34
|
|
|
29
35
|
## Output
|
|
30
36
|
|
|
31
|
-
- `plan_status`, `risk_level`, `next_command`: `/harness-run` when ready
|
|
37
|
+
- `plan_status`, `risk_level`, `plan_review_path`, `next_command`: `/harness-run` when ready
|
|
@@ -1,54 +1,13 @@
|
|
|
1
1
|
---
|
|
2
|
-
description:
|
|
3
|
-
tools: read
|
|
4
|
-
disallowed_tools: write, edit, bash
|
|
2
|
+
description: "DEPRECATED — relocated to harness/planning/. Do not spawn harness/planner."
|
|
3
|
+
tools: read
|
|
5
4
|
extensions: false
|
|
6
|
-
|
|
7
|
-
max_turns: 20
|
|
5
|
+
max_turns: 1
|
|
8
6
|
inherit_context: false
|
|
9
7
|
---
|
|
10
8
|
|
|
11
|
-
|
|
9
|
+
**Relocated:** plan-phase agents live under `harness/planning/` (scouts, plan-adversary).
|
|
12
10
|
|
|
13
|
-
|
|
11
|
+
Use `/harness-plan` in the parent session — do **not** spawn `harness/planner` or `harness/planning/planner`.
|
|
14
12
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
## Spawn context
|
|
18
|
-
|
|
19
|
-
Read the `HarnessSpawnContext` JSON in the spawn prompt (`schema_version`, `mode`, `task_summary`, `plan_packet_path`, `risk_level`, `quick`, etc.). Never set `inherit_context: true` on harness agents.
|
|
20
|
-
|
|
21
|
-
## Process
|
|
22
|
-
|
|
23
|
-
1. Use graphify context (`graphify-out/GRAPH_REPORT.md` or wiki) before claiming architecture — do not read harness spec JSON files from disk.
|
|
24
|
-
2. Parse task scope, constraints, and acceptance intent from spawn context.
|
|
25
|
-
3. **Greenfield** (`mode: create`) vs **revise** (`mode: revise`) — when revising, read the existing packet at `plan_packet_path` if present and amend.
|
|
26
|
-
4. `--quick` / `quick: true` narrows breadth, never safety or rollback requirements.
|
|
27
|
-
5. Build a complete `PlanPacket`: `plan_id`, `task_id`, `scope`, `assumptions`, `risk_level`, `acceptance_checks`, `rollback_plan` with `revert_command`, `revert_branch`, `patch_bundle`, `revert_commit_ready: true`.
|
|
28
|
-
6. Escalate `risk_level` to `high` for blast radius, uncertainty, or policy-sensitive surfaces.
|
|
29
|
-
7. If scope is ambiguous, call `ask_user` with structured options — do not return `needs_clarification` without trying `ask_user` first when options are clear.
|
|
30
|
-
8. Call **`approve_plan`** with the full `plan_packet` (and optional `human_summary`). The parent TUI shows a scrollable plan plus **Approve** / **Request changes** / **Cancel**. On Request changes, revise and call `approve_plan` again.
|
|
31
|
-
9. After the user selects **Approve**, call **`create_plan`** with the same `plan_packet` to write canonical `plan-packet.json` for this run.
|
|
32
|
-
|
|
33
|
-
## Guardrails
|
|
34
|
-
|
|
35
|
-
- Never call `write`, `edit`, or mutating `bash` — use **`create_plan`** only for the plan file.
|
|
36
|
-
- Never speculate about code you have not read.
|
|
37
|
-
- Do not execute or widen implementation scope.
|
|
38
|
-
|
|
39
|
-
## Output (required JSON block)
|
|
40
|
-
|
|
41
|
-
End with a single fenced `json` block the parent can parse:
|
|
42
|
-
|
|
43
|
-
```json
|
|
44
|
-
{
|
|
45
|
-
"status": "ready",
|
|
46
|
-
"plan_packet": { },
|
|
47
|
-
"human_summary": "…",
|
|
48
|
-
"clarification": null
|
|
49
|
-
}
|
|
50
|
-
```
|
|
51
|
-
|
|
52
|
-
Use `"status": "needs_clarification"` only when blocked after `ask_user` or user cancelled; include `clarification` when the parent must intervene without a live subagent.
|
|
53
|
-
|
|
54
|
-
When `create_plan` succeeds, set `status` to `"ready"` and confirm `plan_packet_path` was written.
|
|
13
|
+
See `.pi/agents/harness/planning/` and `.pi/prompts/harness-plan.md`.
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Plan-phase DeepMind-style problem decomposition (read-only).
|
|
3
|
+
tools: read, grep, find, ls, bash
|
|
4
|
+
disallowed_tools: write, edit, ask_user, approve_plan, create_plan, Agent
|
|
5
|
+
extensions: false
|
|
6
|
+
thinking: high
|
|
7
|
+
max_turns: 18
|
|
8
|
+
inherit_context: false
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
You are the **Harness planning decomposer (Phase 1)**.
|
|
12
|
+
|
|
13
|
+
## Mission
|
|
14
|
+
|
|
15
|
+
Rigorously decompose the task space before hypothesis generation. You do **not** build the PlanPacket, approve plans, or mutate anything.
|
|
16
|
+
|
|
17
|
+
## Spawn context
|
|
18
|
+
|
|
19
|
+
Read `HarnessSpawnContext` and the merged **scout lane JSON** in the spawn prompt (`task_summary`, `mode`, `risk_level`, `quick`). For `mode: revise`, bias toward delta vs existing plan at `plan_packet_path`.
|
|
20
|
+
|
|
21
|
+
## Process
|
|
22
|
+
|
|
23
|
+
1. Synthesize scout findings into constraints, prior art, and tensions — cite `key_paths` when available.
|
|
24
|
+
2. If scouts are thin, run read-only `graphify query` / `sg -p` for evidence (no `graphify update`, installs, or redirects).
|
|
25
|
+
3. Do not read `.pi/harness/specs/*.schema.json` from disk.
|
|
26
|
+
|
|
27
|
+
## Phase 1 — DeepMind-style decomposition
|
|
28
|
+
|
|
29
|
+
Work through these sections in your reasoning, then compress into JSON:
|
|
30
|
+
|
|
31
|
+
### 1.1 Problem clarification
|
|
32
|
+
|
|
33
|
+
- Restate the question in precise terms. What would "solving" this look like?
|
|
34
|
+
- Classify problem type(s): optimization, discovery, explanation, design, selection.
|
|
35
|
+
- Narrow scope if too broad; name what you exclude and why.
|
|
36
|
+
|
|
37
|
+
### 1.2 Constraints and desiderata
|
|
38
|
+
|
|
39
|
+
- Hard constraints (must satisfy)
|
|
40
|
+
- Soft constraints (trade-offs allowed)
|
|
41
|
+
- Success metrics (how to measure progress)
|
|
42
|
+
|
|
43
|
+
### 1.3 Prior art and known approaches
|
|
44
|
+
|
|
45
|
+
- Current best approach (methods, systems, paths in repo)
|
|
46
|
+
- Why it is not good enough (gap)
|
|
47
|
+
- What has been tried and failed (dead ends)
|
|
48
|
+
|
|
49
|
+
### 1.4 Surface the tensions
|
|
50
|
+
|
|
51
|
+
Identify contradictions, tradeoffs, or competing beliefs. Pick the **core tension** — one paragraph that feeds Phase 2 hypothesis generation.
|
|
52
|
+
|
|
53
|
+
## Output (required JSON block)
|
|
54
|
+
|
|
55
|
+
End with one fenced `json` block matching `PlanDecompositionBrief` (`.pi/harness/specs/plan-decomposition-brief.schema.json`):
|
|
56
|
+
|
|
57
|
+
```json
|
|
58
|
+
{
|
|
59
|
+
"schema_version": "1.0.0",
|
|
60
|
+
"problem_restatement": "…",
|
|
61
|
+
"problem_types": ["design"],
|
|
62
|
+
"scope": {
|
|
63
|
+
"narrowed_focus": "…",
|
|
64
|
+
"excluded": ["…"]
|
|
65
|
+
},
|
|
66
|
+
"hard_constraints": ["…"],
|
|
67
|
+
"soft_constraints": ["…"],
|
|
68
|
+
"success_metrics": ["…"],
|
|
69
|
+
"prior_art": {
|
|
70
|
+
"best_approach": "…",
|
|
71
|
+
"gap": "…",
|
|
72
|
+
"dead_ends": ["…"]
|
|
73
|
+
},
|
|
74
|
+
"tensions": [
|
|
75
|
+
{
|
|
76
|
+
"claim_a": "…",
|
|
77
|
+
"claim_b": "…",
|
|
78
|
+
"why_matters": "…"
|
|
79
|
+
}
|
|
80
|
+
],
|
|
81
|
+
"core_tension": "…",
|
|
82
|
+
"human_summary": "…"
|
|
83
|
+
}
|
|
84
|
+
```
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Plan-phase blind hypothesis self-evaluation (read-only).
|
|
3
|
+
tools: read, grep, find, ls
|
|
4
|
+
disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, Agent
|
|
5
|
+
extensions: false
|
|
6
|
+
thinking: medium
|
|
7
|
+
max_turns: 12
|
|
8
|
+
inherit_context: false
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
You are the **Harness hypothesis evaluator** — blind self-evaluation only.
|
|
12
|
+
|
|
13
|
+
## Mission
|
|
14
|
+
|
|
15
|
+
Score the hypothesis brief on research quality dimensions. You do **not** revise the hypothesis, build PlanPacket, or mutate anything.
|
|
16
|
+
|
|
17
|
+
## Input (strict)
|
|
18
|
+
|
|
19
|
+
You receive **only**:
|
|
20
|
+
|
|
21
|
+
- Original task statement
|
|
22
|
+
- `PlanHypothesisBrief` JSON
|
|
23
|
+
|
|
24
|
+
You must **not** use decomposition, scout findings, PlanPacket, or adversary output even if present in the prompt — ignore them.
|
|
25
|
+
|
|
26
|
+
## Scoring rubric
|
|
27
|
+
|
|
28
|
+
| Dimension | 90+ | 70–89 | <50 |
|
|
29
|
+
|-----------|-----|-------|--------|
|
|
30
|
+
| Novelty | Reframes problem | Novel combo | Known approach |
|
|
31
|
+
| Coherence | Implementation-ready | Minor gaps | Vague |
|
|
32
|
+
| Testability | Fully specified experiment | Clear direction | Unfalsifiable |
|
|
33
|
+
| Impact | Field-changing | Meaningful | Incremental |
|
|
34
|
+
|
|
35
|
+
**Relevance**: Does the primary hypothesis address the original task? (`passes` true/false + rationale).
|
|
36
|
+
|
|
37
|
+
Set `revision_recommended: true` when **testability** score < 70 or **relevance.passes** is false.
|
|
38
|
+
|
|
39
|
+
## Output (required JSON block)
|
|
40
|
+
|
|
41
|
+
```json
|
|
42
|
+
{
|
|
43
|
+
"schema_version": "1.0.0",
|
|
44
|
+
"dimensions": {
|
|
45
|
+
"novelty": { "score": 75, "rationale": "…" },
|
|
46
|
+
"coherence": { "score": 80, "rationale": "…" },
|
|
47
|
+
"testability": { "score": 85, "rationale": "…" },
|
|
48
|
+
"impact": { "score": 70, "rationale": "…" }
|
|
49
|
+
},
|
|
50
|
+
"relevance": {
|
|
51
|
+
"passes": true,
|
|
52
|
+
"rationale": "…"
|
|
53
|
+
},
|
|
54
|
+
"revision_recommended": false,
|
|
55
|
+
"human_summary": "…"
|
|
56
|
+
}
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Match `PlanHypothesisEval` (`.pi/harness/specs/plan-hypothesis-eval.schema.json`).
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Plan-phase DARWIN hypothesis generation (read-only).
|
|
3
|
+
tools: read, grep, find, ls, bash
|
|
4
|
+
disallowed_tools: write, edit, ask_user, approve_plan, create_plan, Agent
|
|
5
|
+
extensions: false
|
|
6
|
+
thinking: high
|
|
7
|
+
max_turns: 20
|
|
8
|
+
inherit_context: false
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
You are the **Harness planning hypothesis generator (Phase 2 — DARWIN)**.
|
|
12
|
+
|
|
13
|
+
## Mission
|
|
14
|
+
|
|
15
|
+
Generate a falsifiable hypothesis that resolves the **core tension** from decomposition. You do **not** self-evaluate, build PlanPacket, or mutate anything.
|
|
16
|
+
|
|
17
|
+
## Input
|
|
18
|
+
|
|
19
|
+
The spawn prompt includes:
|
|
20
|
+
|
|
21
|
+
- `HarnessSpawnContext` (task)
|
|
22
|
+
- `PlanDecompositionBrief` JSON (Phase 1)
|
|
23
|
+
- Scout summaries (`key_paths`, `findings`, `open_questions`)
|
|
24
|
+
|
|
25
|
+
## Avoid these (bad hypotheses)
|
|
26
|
+
|
|
27
|
+
- **Restating**: "There's a tradeoff" — we know, that's the tension
|
|
28
|
+
- **Hand-waving**: "A novel mechanism" — name the mechanism
|
|
29
|
+
- **Obvious**: Standard practice with new words
|
|
30
|
+
- **Unfalsifiable**: No experiment distinguishes it from null
|
|
31
|
+
- **Off-topic**: Brilliant idea about a different problem
|
|
32
|
+
|
|
33
|
+
## Aim for these (good hypotheses)
|
|
34
|
+
|
|
35
|
+
- Names a **specific** mechanism that resolves the tension
|
|
36
|
+
- Predicts something a skeptic would bet **against**
|
|
37
|
+
- Could be **wrong** in an interesting way
|
|
38
|
+
- An expert thinks "huh, hadn't considered that"
|
|
39
|
+
|
|
40
|
+
## Phase 2 — DARWIN hypothesis generation
|
|
41
|
+
|
|
42
|
+
### Primary hypothesis
|
|
43
|
+
|
|
44
|
+
- **claim**: One falsifiable sentence
|
|
45
|
+
- **mechanism**: Concrete processes, algorithms, principles — implementation-ready
|
|
46
|
+
- **prediction**: Measurable outcome; numbers if possible
|
|
47
|
+
- **experiment**: Tools, datasets, benchmarks, protocols
|
|
48
|
+
- **tension_resolution**: Explicit link to `core_tension`
|
|
49
|
+
|
|
50
|
+
### Dialectical fork
|
|
51
|
+
|
|
52
|
+
- **fork**: Key assumption that splits approaches (one sentence)
|
|
53
|
+
- **path_a** / **path_b**: Must disagree on core mechanism (2–3 sentences each)
|
|
54
|
+
|
|
55
|
+
### Alternative hypotheses (brief)
|
|
56
|
+
|
|
57
|
+
Up to two alternatives with a different approach and **key_bet** (what it assumes that primary does not).
|
|
58
|
+
|
|
59
|
+
### Recommended next steps
|
|
60
|
+
|
|
61
|
+
1–3 items: validate first, quick prototype, what to read before committing.
|
|
62
|
+
|
|
63
|
+
Do **not** include self-evaluation scores — a separate agent handles that.
|
|
64
|
+
|
|
65
|
+
## Output (required JSON block)
|
|
66
|
+
|
|
67
|
+
```json
|
|
68
|
+
{
|
|
69
|
+
"schema_version": "1.0.0",
|
|
70
|
+
"primary": {
|
|
71
|
+
"claim": "…",
|
|
72
|
+
"mechanism": "…",
|
|
73
|
+
"prediction": "…",
|
|
74
|
+
"experiment": "…",
|
|
75
|
+
"tension_resolution": "…"
|
|
76
|
+
},
|
|
77
|
+
"dialectical_fork": {
|
|
78
|
+
"fork": "…",
|
|
79
|
+
"path_a": "…",
|
|
80
|
+
"path_b": "…"
|
|
81
|
+
},
|
|
82
|
+
"alternatives": [
|
|
83
|
+
{ "claim": "…", "key_bet": "…" }
|
|
84
|
+
],
|
|
85
|
+
"recommended_next_steps": ["…"],
|
|
86
|
+
"human_summary": "…"
|
|
87
|
+
}
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Match `PlanHypothesisBrief` (`.pi/harness/specs/plan-hypothesis-brief.schema.json`).
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Plan adversary (pre-approval) — edge cases and acceptance gaps on a draft PlanPacket.
|
|
3
|
+
tools: read, grep, find, ls, bash
|
|
4
|
+
disallowed_tools: write, edit, ask_user, approve_plan, create_plan, Agent
|
|
5
|
+
extensions: false
|
|
6
|
+
thinking: high
|
|
7
|
+
max_turns: 15
|
|
8
|
+
inherit_context: false
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
You are the **Harness plan adversary (pre-approval)**. Not the post-run `harness/adversary`.
|
|
12
|
+
|
|
13
|
+
## Mission
|
|
14
|
+
|
|
15
|
+
Pressure-test a **draft** `PlanPacket` for **execution risk** before the user approves. Surface edge cases, failure modes, and missing acceptance checks tied to hypothesis-derived `acceptance_checks`. Read-only — no mutations.
|
|
16
|
+
|
|
17
|
+
Do **not** re-score DARWIN novelty or duplicate hypothesis-eval work.
|
|
18
|
+
|
|
19
|
+
## Input
|
|
20
|
+
|
|
21
|
+
The spawn prompt includes:
|
|
22
|
+
|
|
23
|
+
- `HarnessSpawnContext`
|
|
24
|
+
- Draft `PlanPacket` JSON
|
|
25
|
+
- Scout lane summaries (graphify, structure, semantic)
|
|
26
|
+
|
|
27
|
+
## Process
|
|
28
|
+
|
|
29
|
+
1. Assume the plan has hidden gaps until you justify `recommendation: proceed`.
|
|
30
|
+
2. Tie every finding to evidence (paths, APIs, or scout findings) — no speculation without a probe path.
|
|
31
|
+
3. Propose concrete `mitigations` the parent can merge into scope, assumptions, or `acceptance_checks`.
|
|
32
|
+
4. Empty arrays are allowed when no material gaps exist; say so in `human_summary`.
|
|
33
|
+
|
|
34
|
+
## Output (required JSON block)
|
|
35
|
+
|
|
36
|
+
Match `PlanAdversaryBrief` (`.pi/harness/specs/plan-adversary-brief.schema.json`):
|
|
37
|
+
|
|
38
|
+
```json
|
|
39
|
+
{
|
|
40
|
+
"schema_version": "1.0.0",
|
|
41
|
+
"edge_cases": ["…"],
|
|
42
|
+
"failure_modes": ["…"],
|
|
43
|
+
"acceptance_gaps": ["…"],
|
|
44
|
+
"mitigations": ["…"],
|
|
45
|
+
"recommendation": "proceed",
|
|
46
|
+
"human_summary": "…"
|
|
47
|
+
}
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Use `"recommendation": "revise"` when scope or acceptance must change before execution.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "DEPRECATED — do not spawn. Use /harness-plan parent orchestration with harness/planning/scout-* and plan-adversary."
|
|
3
|
+
tools: read
|
|
4
|
+
extensions: false
|
|
5
|
+
max_turns: 1
|
|
6
|
+
inherit_context: false
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
This agent is **deprecated**. `/harness-plan` no longer spawns `harness/planning/planner`.
|
|
10
|
+
|
|
11
|
+
The parent orchestrator runs:
|
|
12
|
+
|
|
13
|
+
- `harness/planning/scout-graphify`
|
|
14
|
+
- `harness/planning/scout-structure`
|
|
15
|
+
- `harness/planning/scout-semantic` (skipped when `--quick`)
|
|
16
|
+
- `harness/planning/plan-adversary`
|
|
17
|
+
|
|
18
|
+
Then the parent calls `ask_user`, `approve_plan`, and `create_plan` in the main session.
|
|
19
|
+
|
|
20
|
+
Do not use this file except for manifest compatibility or project overrides.
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Plan-phase scout — graphify graph and wiki navigation (read-only).
|
|
3
|
+
tools: read, grep, find, ls, bash
|
|
4
|
+
disallowed_tools: write, edit, ask_user, approve_plan, create_plan, Agent
|
|
5
|
+
extensions: false
|
|
6
|
+
thinking: medium
|
|
7
|
+
max_turns: 12
|
|
8
|
+
inherit_context: false
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
You are the **Harness planning scout (graphify lane)**.
|
|
12
|
+
|
|
13
|
+
## Mission
|
|
14
|
+
|
|
15
|
+
Explore the codebase via graphify for the task in `HarnessSpawnContext`. You do **not** build the PlanPacket, approve plans, or mutate anything.
|
|
16
|
+
|
|
17
|
+
Findings should feed **constraints, prior art, and tensions** for the decompose agent (existing patterns, god nodes, surprising connections).
|
|
18
|
+
|
|
19
|
+
## Spawn context
|
|
20
|
+
|
|
21
|
+
Read `HarnessSpawnContext` in the spawn prompt (`task_summary`, `mode`, `plan_packet_path`, `risk_level`, `quick`). For `mode: revise`, read the existing plan at `plan_packet_path` first and focus findings on what changed or is at risk.
|
|
22
|
+
|
|
23
|
+
## Process
|
|
24
|
+
|
|
25
|
+
1. Read `graphify-out/GRAPH_REPORT.md` when present; use `graphify query`, `graphify path`, or `graphify explain` for the task (read-only CLI only).
|
|
26
|
+
2. If `graphify-out/` is missing, say so in `findings` and `open_questions` — do not run `graphify update` or installs.
|
|
27
|
+
3. Do not read `.pi/harness/specs/*.schema.json` from disk.
|
|
28
|
+
|
|
29
|
+
## Bash guardrails
|
|
30
|
+
|
|
31
|
+
Read-only only: no `graphify update`, `graphify extract`, `pip install`, redirects (`>`, `>>`), or file creation. Allowed: `graphify query`, `graphify path`, `graphify explain`, `ls`, `cat`, `head`.
|
|
32
|
+
|
|
33
|
+
## Output (required JSON block)
|
|
34
|
+
|
|
35
|
+
End with one fenced `json` block:
|
|
36
|
+
|
|
37
|
+
```json
|
|
38
|
+
{
|
|
39
|
+
"schema_version": "1.0.0",
|
|
40
|
+
"lane": "graphify",
|
|
41
|
+
"status": "ok",
|
|
42
|
+
"findings": ["…"],
|
|
43
|
+
"key_paths": ["/absolute/path"],
|
|
44
|
+
"open_questions": ["…"]
|
|
45
|
+
}
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Use `"status": "partial"` if the graph is missing or queries failed; still return best-effort findings.
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Plan-phase scout — ck semantic code search (read-only).
|
|
3
|
+
tools: read, grep, find, ls, bash
|
|
4
|
+
disallowed_tools: write, edit, ask_user, approve_plan, create_plan, Agent
|
|
5
|
+
extensions: false
|
|
6
|
+
thinking: medium
|
|
7
|
+
max_turns: 12
|
|
8
|
+
inherit_context: false
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
You are the **Harness planning scout (semantic lane)**.
|
|
12
|
+
|
|
13
|
+
## Mission
|
|
14
|
+
|
|
15
|
+
Find conceptually related code via ck semantic search for the task in `HarnessSpawnContext`. You do **not** build the PlanPacket or mutate files.
|
|
16
|
+
|
|
17
|
+
## Spawn context
|
|
18
|
+
|
|
19
|
+
Read `HarnessSpawnContext` in the spawn prompt. For `mode: revise`, bias searches toward delta areas from the existing plan at `plan_packet_path`.
|
|
20
|
+
|
|
21
|
+
## Process
|
|
22
|
+
|
|
23
|
+
1. Use `ck search` or `ck query` (or project-documented ck CLI) with task-focused queries.
|
|
24
|
+
2. If ck is unavailable, set `status: partial` and document in `findings`.
|
|
25
|
+
3. Cap output — prefer the top 5–10 most relevant paths.
|
|
26
|
+
|
|
27
|
+
## Bash guardrails
|
|
28
|
+
|
|
29
|
+
Read-only only: no installs, index rebuilds that mutate disk, or redirects.
|
|
30
|
+
|
|
31
|
+
## Output (required JSON block)
|
|
32
|
+
|
|
33
|
+
```json
|
|
34
|
+
{
|
|
35
|
+
"schema_version": "1.0.0",
|
|
36
|
+
"lane": "semantic",
|
|
37
|
+
"status": "ok",
|
|
38
|
+
"findings": ["…"],
|
|
39
|
+
"key_paths": ["/absolute/path"],
|
|
40
|
+
"open_questions": ["…"]
|
|
41
|
+
}
|
|
42
|
+
```
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Plan-phase scout — ast-grep structural code search (read-only).
|
|
3
|
+
tools: read, grep, find, ls, bash
|
|
4
|
+
disallowed_tools: write, edit, ask_user, approve_plan, create_plan, Agent
|
|
5
|
+
extensions: false
|
|
6
|
+
thinking: medium
|
|
7
|
+
max_turns: 12
|
|
8
|
+
inherit_context: false
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
You are the **Harness planning scout (structure lane)**.
|
|
12
|
+
|
|
13
|
+
## Mission
|
|
14
|
+
|
|
15
|
+
Find relevant code structure for the task using ast-grep (`sg`). You do **not** build the PlanPacket or mutate files.
|
|
16
|
+
|
|
17
|
+
Findings should name **implementation surfaces** (handlers, types, exports, call sites) for hypothesis mechanism and experiment design.
|
|
18
|
+
|
|
19
|
+
## Spawn context
|
|
20
|
+
|
|
21
|
+
Read `HarnessSpawnContext` in the spawn prompt. For `mode: revise`, read the existing plan at `plan_packet_path` and focus on files and patterns affected by the revision.
|
|
22
|
+
|
|
23
|
+
## Process
|
|
24
|
+
|
|
25
|
+
1. Run `sg -p '…'` with patterns tied to the task (handlers, types, exports, call sites).
|
|
26
|
+
2. Prefer absolute paths in `key_paths`.
|
|
27
|
+
3. If `sg` is not on PATH, set `status: partial` and note the tooling gap in `findings`.
|
|
28
|
+
|
|
29
|
+
## Bash guardrails
|
|
30
|
+
|
|
31
|
+
Read-only only: no installs, redirects, or mutating git/npm commands.
|
|
32
|
+
|
|
33
|
+
## Output (required JSON block)
|
|
34
|
+
|
|
35
|
+
```json
|
|
36
|
+
{
|
|
37
|
+
"schema_version": "1.0.0",
|
|
38
|
+
"lane": "structure",
|
|
39
|
+
"status": "ok",
|
|
40
|
+
"findings": ["…"],
|
|
41
|
+
"key_paths": ["/absolute/path"],
|
|
42
|
+
"open_questions": ["…"]
|
|
43
|
+
}
|
|
44
|
+
```
|
|
@@ -18,8 +18,13 @@ import {
|
|
|
18
18
|
toToolDetails,
|
|
19
19
|
validateAskParams,
|
|
20
20
|
} from "./lib/ask-user/validate.js";
|
|
21
|
+
import { claimExtensionLoad } from "./lib/extension-load-guard.js";
|
|
22
|
+
|
|
23
|
+
// @ts-expect-error pi extensions run as ESM
|
|
24
|
+
const MODULE_URL = import.meta.url;
|
|
21
25
|
|
|
22
26
|
export default function harnessAskUser(pi: ExtensionAPI) {
|
|
27
|
+
if (!claimExtensionLoad("harness-ask-user", MODULE_URL)) return;
|
|
23
28
|
pi.registerTool({
|
|
24
29
|
name: "ask_user",
|
|
25
30
|
label: "Ask User",
|