ultimate-pi 0.7.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-decisions/SKILL.md +20 -1
- package/.agents/skills/harness-eval/SKILL.md +11 -13
- package/.agents/skills/harness-orchestration/SKILL.md +36 -30
- package/.agents/skills/harness-plan/SKILL.md +13 -18
- package/.pi/PACKAGING.md +1 -1
- package/.pi/agents/harness/adversary.md +20 -12
- package/.pi/agents/harness/evaluator.md +25 -14
- package/.pi/agents/harness/executor.md +27 -16
- package/.pi/agents/harness/incident-recorder.md +37 -0
- package/.pi/agents/harness/meta-optimizer.md +18 -15
- package/.pi/agents/harness/planner.md +26 -30
- package/.pi/agents/harness/tie-breaker.md +4 -2
- package/.pi/agents/harness/trace-librarian.md +18 -11
- package/.pi/agents/pi-pi/ext-expert.md +1 -1
- package/.pi/agents/pi-pi/keybinding-expert.md +1 -1
- package/.pi/agents/pi-pi/tui-expert.md +3 -3
- package/.pi/extensions/00-ultimate-pi-system-prompt.ts +2 -2
- package/.pi/extensions/budget-guard.ts +47 -18
- package/.pi/extensions/custom-footer.ts +8 -3
- package/.pi/extensions/custom-header.ts +2 -2
- package/.pi/extensions/debate-orchestrator.ts +1 -1
- package/.pi/extensions/dotenv-loader.ts +1 -1
- package/.pi/extensions/drift-monitor.ts +1 -1
- package/.pi/extensions/harness-ask-user.ts +1 -1
- package/.pi/extensions/harness-live-widget.ts +1 -1
- package/.pi/extensions/harness-run-context.ts +197 -33
- package/.pi/extensions/harness-telemetry.ts +1 -1
- package/.pi/extensions/harness-web-guard.ts +1 -1
- package/.pi/extensions/harness-web-tools.ts +1 -1
- package/.pi/extensions/lib/ask-user/dialog.ts +2 -2
- package/.pi/extensions/lib/ask-user/fallback.ts +1 -1
- package/.pi/extensions/lib/ask-user/render.ts +3 -3
- package/.pi/extensions/lib/harness-subagents/agent-loader.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/agent-parser.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/blackboard-tool.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/harness-subagent-policy.ts +134 -0
- package/.pi/extensions/lib/harness-subagents/parent-ask-user-bridge.ts +89 -0
- package/.pi/extensions/lib/harness-subagents/spawn-policy.ts +20 -2
- package/.pi/extensions/lib/harness-subagents/vendored/agent-manager.ts +3 -2
- package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +44 -24
- package/.pi/extensions/lib/harness-subagents/vendored/context.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/vendored/env.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/vendored/index.ts +23 -2
- package/.pi/extensions/lib/harness-subagents/vendored/output-file.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/vendored/schedule.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/vendored/settings.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/vendored/skill-loader.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/vendored/types.ts +2 -2
- package/.pi/extensions/lib/harness-subagents/vendored/ui/agent-widget.ts +1 -1
- package/.pi/extensions/lib/harness-subagents/vendored/ui/conversation-viewer.ts +2 -2
- package/.pi/extensions/lib/harness-subagents/vendored/ui/schedule-menu.ts +1 -1
- package/.pi/extensions/observation-bus.ts +1 -1
- package/.pi/extensions/pi-model-router-harness.ts +1 -1
- package/.pi/extensions/policy-gate.ts +90 -20
- package/.pi/extensions/provider-payload-sanitize.ts +1 -1
- package/.pi/extensions/review-integrity.ts +76 -22
- package/.pi/extensions/sentrux-rules-sync.ts +1 -1
- package/.pi/extensions/soundboard.ts +1 -1
- package/.pi/extensions/test-diff-integrity.ts +1 -1
- package/.pi/extensions/trace-recorder.ts +1 -1
- package/.pi/extensions/ultimate-pi-vcc.ts +1 -1
- package/.pi/harness/agents.manifest.json +82 -78
- package/.pi/harness/docs/adrs/0031-harness-run-context.md +6 -3
- package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +37 -0
- package/.pi/harness/docs/adrs/README.md +1 -0
- package/.pi/harness/specs/budget-exhausted-event.schema.json +3 -1
- package/.pi/harness/specs/harness-spawn-context.schema.json +65 -0
- package/.pi/harness/specs/harness-turn.schema.json +18 -0
- package/.pi/lib/harness-agent-output.ts +41 -0
- package/.pi/lib/harness-run-context.ts +516 -37
- package/.pi/lib/harness-ui-state.ts +1 -1
- package/.pi/prompts/harness-auto.md +36 -61
- package/.pi/prompts/harness-critic.md +15 -28
- package/.pi/prompts/harness-eval.md +19 -27
- package/.pi/prompts/harness-incident.md +15 -34
- package/.pi/prompts/harness-plan.md +28 -49
- package/.pi/prompts/harness-review.md +16 -30
- package/.pi/prompts/harness-router-tune.md +16 -38
- package/.pi/prompts/harness-run.md +21 -38
- package/.pi/prompts/harness-setup.md +2 -0
- package/.pi/prompts/harness-trace.md +13 -30
- package/.pi/scripts/harness-generate-model-router.mjs +16 -13
- package/.pi/scripts/harness-verify.mjs +17 -0
- package/.pi/scripts/vendor-sync-pi-model-router.sh +10 -10
- package/CHANGELOG.md +25 -1
- package/README.md +4 -5
- package/THIRD_PARTY_NOTICES.md +1 -1
- package/package.json +13 -8
- package/vendor/pi-model-router/UPSTREAM_PIN.md +1 -1
- package/vendor/pi-model-router/extensions/commands.ts +2 -2
- package/vendor/pi-model-router/extensions/config.ts +2 -2
- package/vendor/pi-model-router/extensions/index.ts +1 -1
- package/vendor/pi-model-router/extensions/provider.ts +2 -2
- package/vendor/pi-model-router/extensions/routing.ts +2 -2
- package/vendor/pi-model-router/extensions/types.ts +1 -1
- package/vendor/pi-model-router/extensions/ui.ts +1 -1
- package/vendor/pi-model-router/package.json +4 -4
- package/vendor/pi-vcc/index.ts +1 -1
- package/vendor/pi-vcc/package.json +1 -1
- package/vendor/pi-vcc/src/commands/pi-vcc.ts +1 -1
- package/vendor/pi-vcc/src/commands/vcc-recall.ts +1 -1
- package/vendor/pi-vcc/src/core/content.ts +1 -1
- package/vendor/pi-vcc/src/core/load-messages.ts +1 -1
- package/vendor/pi-vcc/src/core/normalize.ts +1 -1
- package/vendor/pi-vcc/src/core/render-entries.ts +1 -1
- package/vendor/pi-vcc/src/core/report.ts +1 -1
- package/vendor/pi-vcc/src/core/search-entries.ts +1 -1
- package/vendor/pi-vcc/src/core/summarize.ts +1 -1
- package/vendor/pi-vcc/src/hooks/before-compact.ts +2 -2
- package/vendor/pi-vcc/src/tools/recall.ts +1 -1
- package/vendor/pi-vcc/src/types.ts +1 -1
- package/vendor/pi-vcc/tests/fixtures.ts +1 -1
- package/vendor/pi-vcc/tests/render-entries.test.ts +1 -1
- package/vendor/pi-vcc/tests/search-entries.test.ts +1 -1
- package/vendor/pi-vcc/tests/support/load-session.ts +2 -2
|
@@ -34,6 +34,23 @@ description: Structured user decisions via ask_user for harness setup, planning,
|
|
|
34
34
|
}
|
|
35
35
|
```
|
|
36
36
|
|
|
37
|
+
## Example (plan — approval gate)
|
|
38
|
+
|
|
39
|
+
After presenting the full PlanPacket in chat:
|
|
40
|
+
|
|
41
|
+
```json
|
|
42
|
+
{
|
|
43
|
+
"question": "Approve this plan for execution?",
|
|
44
|
+
"context": "Scope, acceptance checks, and rollback are listed above. The plan file is written only after you approve.",
|
|
45
|
+
"options": [
|
|
46
|
+
{ "title": "Approve", "description": "Write plan-packet.json and mark plan ready" },
|
|
47
|
+
{ "title": "Request changes", "description": "Revise scope or acceptance before writing" },
|
|
48
|
+
{ "title": "Cancel", "description": "Stop with needs_clarification" }
|
|
49
|
+
],
|
|
50
|
+
"allowFreeform": false
|
|
51
|
+
}
|
|
52
|
+
```
|
|
53
|
+
|
|
37
54
|
## Example (plan — scope)
|
|
38
55
|
|
|
39
56
|
```json
|
|
@@ -49,4 +66,6 @@ description: Structured user decisions via ask_user for harness setup, planning,
|
|
|
49
66
|
|
|
50
67
|
## Who must NOT call ask_user
|
|
51
68
|
|
|
52
|
-
- `harness/
|
|
69
|
+
- `harness/planner` — returns `clarification.options` in JSON; parent runs `ask_user`.
|
|
70
|
+
- `harness/evaluator`, `harness/adversary`, and `harness/tie-breaker` — emit `human_required` in structured verdicts; the **parent orchestrator** calls `ask_user`.
|
|
71
|
+
- `harness/executor` — parent handles plan-level and governance forks.
|
|
@@ -7,23 +7,21 @@ description: Run harness evaluation phase and emit EvalVerdict artifacts. Use wi
|
|
|
7
7
|
|
|
8
8
|
## When to use
|
|
9
9
|
|
|
10
|
-
- `/harness-eval`
|
|
10
|
+
- `/harness-eval` after execute
|
|
11
11
|
- Before merge / release readiness
|
|
12
|
-
- After adversary debate when consensus required
|
|
13
12
|
|
|
14
|
-
## Workflow
|
|
13
|
+
## Workflow (orchestrator)
|
|
15
14
|
|
|
16
|
-
1.
|
|
17
|
-
2.
|
|
18
|
-
3.
|
|
19
|
-
4. When Sentrux enabled, ensure `harness-sentrux-signal` exists (stub or MCP) per ADR 0006.
|
|
20
|
-
5. Deterministic checks: `node "$UP_PKG/.pi/scripts/harness-verify.mjs"` (see `.pi/scripts/README.md`) and project test script.
|
|
15
|
+
1. Parent may run deterministic scripts (`harness-verify`, project tests).
|
|
16
|
+
2. Spawn `harness/evaluator` with `mode: benchmark` and artifact paths in `HarnessSpawnContext`.
|
|
17
|
+
3. Parse JSON from `get_subagent_result`; parent writes run artifacts.
|
|
21
18
|
|
|
22
|
-
##
|
|
19
|
+
## Rules
|
|
23
20
|
|
|
24
|
-
|
|
21
|
+
- No new Pi session — subagent isolation via `Agent` spawn (ADR 0032).
|
|
22
|
+
- Do not edit `plan-packet.json` in eval phase.
|
|
23
|
+
- `/harness-review` uses same agent with `mode: verdict` for policy EvalVerdict.
|
|
25
24
|
|
|
26
|
-
##
|
|
25
|
+
## Verdict values
|
|
27
26
|
|
|
28
|
-
|
|
29
|
-
- PostHog: `harness_eval_verdict` is emitted by harness-telemetry on flush — no analyst skill runs in Phase 2.
|
|
27
|
+
`pass`, `conditional_pass`, `fail`, `human_required` (parent handles `ask_user`).
|
|
@@ -8,47 +8,53 @@ description: >-
|
|
|
8
8
|
|
|
9
9
|
# Harness orchestration
|
|
10
10
|
|
|
11
|
-
##
|
|
11
|
+
## Slash commands = orchestrators
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
`/harness-*` prompts parse args, spawn agents, run `ask_user`, write policy-gated artifacts. Phase logic lives in `.pi/agents/harness/*.md`.
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
|-------|-----------------|--------|
|
|
17
|
-
| Plan | `harness/planner` | May use `ask_user` |
|
|
18
|
-
| Execute | `harness/executor` | `ask_user` for in-scope forks only |
|
|
19
|
-
| Verify | `harness/evaluator`, `harness/adversary`, `harness/tie-breaker` | `disallowed_tools: ask_user` on L4 agents |
|
|
20
|
-
| Meta | `harness/meta-optimizer`, `harness/trace-librarian` | Parent calls `ask_user` for approvals |
|
|
15
|
+
Every spawn includes **HarnessSpawnContext** JSON (subagents do not get `[HarnessActivePlan]` injection). Use `inherit_context: false`.
|
|
21
16
|
|
|
22
|
-
|
|
17
|
+
## Command → agent
|
|
23
18
|
|
|
24
|
-
|
|
19
|
+
| Command | `subagent_type` |
|
|
20
|
+
|---------|-----------------|
|
|
21
|
+
| `/harness-plan` | `harness/planner` |
|
|
22
|
+
| `/harness-run` | `harness/executor` |
|
|
23
|
+
| `/harness-eval` | `harness/evaluator` (`mode: benchmark`) |
|
|
24
|
+
| `/harness-review` | `harness/evaluator` (`mode: verdict`) |
|
|
25
|
+
| `/harness-critic` | `harness/adversary` |
|
|
26
|
+
| `/harness-trace` | `harness/trace-librarian` |
|
|
27
|
+
| `/harness-incident` | `harness/incident-recorder` |
|
|
28
|
+
| `/harness-router-tune` | `harness/meta-optimizer` (optional) |
|
|
29
|
+
| `/harness-auto` | sequential spawns above |
|
|
25
30
|
|
|
26
|
-
##
|
|
31
|
+
## Review isolation
|
|
32
|
+
|
|
33
|
+
Spawn `harness/evaluator` / `harness/adversary` in the **same** parent session — isolated subagent context replaces session fork (ADR 0032).
|
|
27
34
|
|
|
28
|
-
|
|
29
|
-
- `get_subagent_result` / `steer_subagent` — background agents
|
|
30
|
-
- `blackboard` — orchestrator handoffs (`list`, `read`, `query`, `wait`, `delete`)
|
|
31
|
-
- `ask_user` — **parent orchestrator only** on L4 paths
|
|
35
|
+
## ask_user policy
|
|
32
36
|
|
|
33
|
-
|
|
37
|
+
| Agent | `ask_user` |
|
|
38
|
+
|-------|------------|
|
|
39
|
+
| Parent orchestrator | Yes (approval, clarification, router tune) |
|
|
40
|
+
| `harness/planner` | No — returns `clarification` in JSON |
|
|
41
|
+
| `harness/evaluator`, `harness/adversary`, `harness/tie-breaker` | No — `human_required` in output |
|
|
42
|
+
| `harness/executor` | No — parent handles governance |
|
|
34
43
|
|
|
35
|
-
##
|
|
44
|
+
## Spawn pattern
|
|
36
45
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
46
|
+
```
|
|
47
|
+
Agent({ subagent_type: "harness/planner", prompt: "<task + HarnessSpawnContext JSON>" })
|
|
48
|
+
get_subagent_result
|
|
49
|
+
```
|
|
41
50
|
|
|
42
|
-
##
|
|
51
|
+
## Tools
|
|
43
52
|
|
|
44
|
-
-
|
|
45
|
-
-
|
|
46
|
-
-
|
|
47
|
-
- **Parallelism** — parallelize by file/module with explicit ownership in the plan.
|
|
48
|
-
- **Debate** — use `debate-orchestrator` commands; parent handles `human_required` via `ask_user`.
|
|
53
|
+
- `Agent`, `get_subagent_result`, `steer_subagent`
|
|
54
|
+
- `blackboard` — parent only
|
|
55
|
+
- Subagents cannot nest spawns
|
|
49
56
|
|
|
50
57
|
## References
|
|
51
58
|
|
|
52
|
-
-
|
|
53
|
-
-
|
|
54
|
-
- Reference playbook: `raw/references/subagents/AGENTS.md` (design only)
|
|
59
|
+
- ADR 0032, `.pi/harness/specs/harness-spawn-context.schema.json`
|
|
60
|
+
- `node "$UP_PKG/.pi/scripts/harness-agents-manifest.mjs" --check`
|
|
@@ -10,27 +10,22 @@ description: Produce PlanPacket-aligned harness plans before execute phase. Use
|
|
|
10
10
|
- User invokes `/harness-plan` or harness-auto planning phase
|
|
11
11
|
- Policy gate blocks mutate tools without approved plan
|
|
12
12
|
- Drift monitor requests replan (`harness-drift-replan`)
|
|
13
|
-
- User replies with clarification after `needs_clarification`
|
|
13
|
+
- User replies with clarification after `needs_clarification`
|
|
14
14
|
|
|
15
|
-
## Workflow
|
|
15
|
+
## Workflow (orchestrator)
|
|
16
16
|
|
|
17
|
-
1.
|
|
18
|
-
2.
|
|
19
|
-
3.
|
|
20
|
-
4.
|
|
21
|
-
5. **
|
|
22
|
-
6. Do not mutate production files in plan phase unless user explicitly requests draft-only outputs.
|
|
23
|
-
7. Extension sets `approvedPlan` / policy `planId` after disk validation — do **not** use `plan_id=...` prompt hacks.
|
|
17
|
+
1. Use `HarnessSpawnContext` from injected `[HarnessRunContext]` — do not read spec files from disk.
|
|
18
|
+
2. Spawn `harness/planner` **once** with that JSON in the prompt (`inherit_context: false`).
|
|
19
|
+
3. Parse planner JSON from `get_subagent_result` (`status`, `plan_packet`, `clarification`).
|
|
20
|
+
4. Do **not** parent `ask_user` or re-spawn for clarification — planner uses `ask_user` in the subagent.
|
|
21
|
+
5. **Only after** subagent approval is synced — write canonical `plan_packet_path`.
|
|
24
22
|
|
|
25
|
-
##
|
|
26
|
-
|
|
27
|
-
Structured plan summary with:
|
|
23
|
+
## Rules
|
|
28
24
|
|
|
29
|
-
- `
|
|
30
|
-
-
|
|
31
|
-
-
|
|
32
|
-
- `next_command`: `/harness-run` when ready
|
|
25
|
+
- `harness/planner` owns clarification and approval `ask_user` (bridged to parent UI).
|
|
26
|
+
- Never plan or mutate source inline in the slash-command session.
|
|
27
|
+
- context-mode only on harness paths; never lean-ctx.
|
|
33
28
|
|
|
34
|
-
##
|
|
29
|
+
## Output
|
|
35
30
|
|
|
36
|
-
-
|
|
31
|
+
- `plan_status`, `risk_level`, `next_command`: `/harness-run` when ready
|
package/.pi/PACKAGING.md
CHANGED
|
@@ -36,4 +36,4 @@ We use an explicit allowlist (not the whole `.pi/` tree) so dev-only artifacts n
|
|
|
36
36
|
|
|
37
37
|
Runtime pi extensions are regular `dependencies` (installed by `npm install` when pi installs the package). We do **not** use `bundledDependencies`: bundling pre-installs `node_modules` and breaks `npm install -g` / `pi update` for native modules such as `koffi` (empty stub dir, postinstall fails).
|
|
38
38
|
|
|
39
|
-
`@
|
|
39
|
+
`@earendil-works/pi-coding-agent` (and sibling `@earendil-works/pi-ai`, `pi-tui`, `pi-agent-core` used by the vendored router) are provided by the Pi install / hoisted from the peer; ultimate-pi lists the latter three as `devDependencies` for `npm run check:ts`.
|
|
@@ -1,35 +1,43 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Adversarial harness reviewer focused on breaking assumptions and surfacing regressions.
|
|
3
|
-
tools: read,
|
|
4
|
-
extensions:
|
|
3
|
+
tools: read, grep, find, ls
|
|
4
|
+
extensions: false
|
|
5
5
|
disallowed_tools: ask_user
|
|
6
6
|
thinking: high
|
|
7
7
|
max_turns: 20
|
|
8
|
+
inherit_context: false
|
|
8
9
|
---
|
|
9
10
|
|
|
10
11
|
You are the Harness Adversary.
|
|
11
12
|
|
|
12
13
|
## Mission
|
|
13
14
|
|
|
14
|
-
Pressure
|
|
15
|
+
Pressure-test the candidate with adversarial reasoning and reproducible attacks. Use artifact paths from `HarnessSpawnContext` only — you do not inherit executor conversation history.
|
|
15
16
|
|
|
16
17
|
## Process
|
|
17
18
|
|
|
18
|
-
1. Assume hidden defects exist until disproven
|
|
19
|
-
2. Challenge evaluator and executor assumptions with reproducible tests and counterexamples.
|
|
19
|
+
1. Assume hidden defects exist until disproven.
|
|
20
|
+
2. Challenge evaluator and executor assumptions with reproducible tests and counterexamples (read-only probes).
|
|
20
21
|
3. Emit `AdversaryReport` matching `.pi/harness/specs/adversary-report.schema.json`.
|
|
21
22
|
4. Set `block_merge=true` when high-confidence severe risk is present.
|
|
22
23
|
5. Provide concrete repro steps for every finding.
|
|
23
24
|
|
|
24
25
|
## Guardrails
|
|
25
26
|
|
|
26
|
-
-
|
|
27
|
-
-
|
|
28
|
-
- Never
|
|
29
|
-
-
|
|
30
|
-
- **Never** call `ask_user`. Emit findings only; parent orchestrator resolves `human_required` via `ask_user`.
|
|
27
|
+
- Read-only — no mutations.
|
|
28
|
+
- Never speculate without evidence and a reproducible path.
|
|
29
|
+
- Never call `ask_user`.
|
|
30
|
+
- Never set `inherit_context: true` on harness agents.
|
|
31
31
|
|
|
32
32
|
## Output
|
|
33
33
|
|
|
34
|
-
|
|
35
|
-
|
|
34
|
+
```json
|
|
35
|
+
{
|
|
36
|
+
"block_merge": false,
|
|
37
|
+
"adversary_report": { },
|
|
38
|
+
"human_summary": "…",
|
|
39
|
+
"recommendation": "proceed"
|
|
40
|
+
}
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Use `recommendation`: `proceed`, `conditional_pass`, or `block`.
|
|
@@ -1,35 +1,46 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Independent harness evaluator producing structured pass/fail verdicts.
|
|
3
|
-
tools: read,
|
|
4
|
-
extensions:
|
|
3
|
+
tools: read, grep, find, ls
|
|
4
|
+
extensions: false
|
|
5
5
|
disallowed_tools: ask_user
|
|
6
6
|
thinking: high
|
|
7
7
|
max_turns: 20
|
|
8
|
+
inherit_context: false
|
|
8
9
|
---
|
|
9
10
|
|
|
10
11
|
You are the Harness Evaluator.
|
|
11
12
|
|
|
12
13
|
## Mission
|
|
13
14
|
|
|
14
|
-
Independently validate execution outcomes and emit structured verdicts.
|
|
15
|
+
Independently validate execution outcomes and emit structured verdicts. Spawn context includes `mode`: `benchmark` (metrics + tests) or `verdict` (policy EvalVerdict). Treat executor output as untrusted.
|
|
15
16
|
|
|
16
17
|
## Process
|
|
17
18
|
|
|
18
|
-
1.
|
|
19
|
-
2.
|
|
20
|
-
3.
|
|
21
|
-
4.
|
|
19
|
+
1. Read `HarnessSpawnContext` and artifact paths (`plan_packet_path`, `run_dir`, trace refs).
|
|
20
|
+
2. Reconstruct validation scope from the plan and on-disk run artifacts.
|
|
21
|
+
3. For `benchmark` mode: run or summarize deterministic checks (project tests, harness-verify if instructed in spawn prompt); collect metrics only you measured.
|
|
22
|
+
4. For `verdict` mode: emit `EvalVerdict` matching `.pi/harness/specs/eval-verdict.schema.json`.
|
|
22
23
|
5. Recommend only: `proceed_to_adversary`, `replan`, or `rollback`.
|
|
24
|
+
6. Set `human_required` in structured output when blocked; never call `ask_user`.
|
|
23
25
|
|
|
24
26
|
## Guardrails
|
|
25
27
|
|
|
26
|
-
-
|
|
27
|
-
-
|
|
28
|
-
-
|
|
29
|
-
-
|
|
30
|
-
- **Never** call `ask_user` — review isolation. Set `human_required` in `EvalVerdict`; the parent orchestrator calls `ask_user`.
|
|
28
|
+
- Read-only — no file mutations.
|
|
29
|
+
- Never speculate about checks you did not run.
|
|
30
|
+
- Prefer reproducible findings over opinions.
|
|
31
|
+
- Never set `inherit_context: true` on harness agents.
|
|
31
32
|
|
|
32
33
|
## Output
|
|
33
34
|
|
|
34
|
-
|
|
35
|
-
|
|
35
|
+
End with a fenced `json` block:
|
|
36
|
+
|
|
37
|
+
```json
|
|
38
|
+
{
|
|
39
|
+
"eval_status": "pass",
|
|
40
|
+
"eval_verdict": { },
|
|
41
|
+
"human_summary": "…",
|
|
42
|
+
"recommended_action": "proceed_to_adversary"
|
|
43
|
+
}
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Use `eval_status`: `pass`, `conditional_pass`, or `fail`.
|
|
@@ -2,36 +2,47 @@
|
|
|
2
2
|
description: Harness executor that implements only within approved PlanPacket scope.
|
|
3
3
|
tools: read, write, edit, bash, grep, find, ls
|
|
4
4
|
extensions: true
|
|
5
|
+
disallowed_tools: ask_user
|
|
5
6
|
thinking: medium
|
|
6
7
|
max_turns: 30
|
|
8
|
+
inherit_context: false
|
|
7
9
|
---
|
|
8
10
|
|
|
9
11
|
You are the Harness Executor.
|
|
10
12
|
|
|
11
13
|
## Mission
|
|
12
14
|
|
|
13
|
-
Implement the approved plan with surgical diffs and strict scope control.
|
|
15
|
+
Implement the approved plan with surgical diffs and strict scope control. The parent orchestrator spawned you with a `HarnessSpawnContext` appendix — use `plan_packet_path`, `run_dir`, and acceptance checks from that JSON.
|
|
14
16
|
|
|
15
17
|
## Process
|
|
16
18
|
|
|
17
|
-
1.
|
|
18
|
-
2. Implement only
|
|
19
|
-
3. Run focused validations
|
|
20
|
-
4. Prepare rollback artifacts
|
|
21
|
-
5. For
|
|
22
|
-
6.
|
|
23
|
-
7. Hand off execution outputs to evaluator and adversary without self-certifying final quality.
|
|
19
|
+
1. Read the approved `PlanPacket` at `plan_packet_path` from spawn context; extract allowed scope before any mutation.
|
|
20
|
+
2. Implement only approved scope with minimal, reversible diffs.
|
|
21
|
+
3. Run focused validations mapped to `acceptance_checks`.
|
|
22
|
+
4. Prepare rollback artifacts: revert command, prepared revert branch name, patch bundle path under the run directory.
|
|
23
|
+
5. For plan-level ambiguity (wrong scope, missing acceptance), stop and return structured `scope_drift` — do not widen scope.
|
|
24
|
+
6. Do not self-certify final quality; hand off evidence paths for evaluator/adversary.
|
|
24
25
|
|
|
25
26
|
## Guardrails
|
|
26
27
|
|
|
27
|
-
-
|
|
28
|
-
-
|
|
29
|
-
-
|
|
30
|
-
-
|
|
31
|
-
- Do not
|
|
28
|
+
- Only modify files required by the approved `PlanPacket`.
|
|
29
|
+
- Never speculate about code you have not read.
|
|
30
|
+
- If scope drift appears, stop with `execution_status: scope_drift` in your final JSON summary.
|
|
31
|
+
- Never set `inherit_context: true` on harness agents.
|
|
32
|
+
- Do not call `ask_user` — parent handles governance forks.
|
|
32
33
|
|
|
33
34
|
## Output
|
|
34
35
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
36
|
+
End with a JSON block:
|
|
37
|
+
|
|
38
|
+
```json
|
|
39
|
+
{
|
|
40
|
+
"execution_status": "completed",
|
|
41
|
+
"files_changed": [],
|
|
42
|
+
"validation_summary": "…",
|
|
43
|
+
"rollback_refs": {},
|
|
44
|
+
"handoff_ready": { "evaluator": true, "adversary": true }
|
|
45
|
+
}
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Use `execution_status` values: `completed`, `blocked`, or `scope_drift`.
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Harness incident recorder compiling structured IncidentRecord drafts from run context.
|
|
3
|
+
tools: read, grep, find, ls
|
|
4
|
+
extensions: false
|
|
5
|
+
thinking: medium
|
|
6
|
+
max_turns: 15
|
|
7
|
+
inherit_context: false
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
You are the Harness Incident Recorder.
|
|
11
|
+
|
|
12
|
+
## Mission
|
|
13
|
+
|
|
14
|
+
Build an `IncidentRecord` draft from spawn context (`--trigger`, severity, run artifacts). Parent writes under `.pi/harness/incidents/`.
|
|
15
|
+
|
|
16
|
+
## Process
|
|
17
|
+
|
|
18
|
+
1. Read `.pi/harness/specs/incident-record.schema.json`.
|
|
19
|
+
2. Gather run context, trigger reason, and severity from `HarnessSpawnContext`.
|
|
20
|
+
3. Include blast radius, mitigation, rollback refs, and postmortem requirement.
|
|
21
|
+
4. If policy override occurred, require approver identity and justification in the draft (from spawn context).
|
|
22
|
+
|
|
23
|
+
## Guardrails
|
|
24
|
+
|
|
25
|
+
- Read-only — no file writes.
|
|
26
|
+
- Only record facts supported by artifacts and explicit inputs.
|
|
27
|
+
|
|
28
|
+
## Output
|
|
29
|
+
|
|
30
|
+
```json
|
|
31
|
+
{
|
|
32
|
+
"incident_status": "recorded",
|
|
33
|
+
"incident_record": { },
|
|
34
|
+
"rollback_action": "standby",
|
|
35
|
+
"postmortem_required": false
|
|
36
|
+
}
|
|
37
|
+
```
|
|
@@ -1,34 +1,37 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Harness meta optimizer proposing policy/prompt/router improvements from trace evidence.
|
|
3
|
-
tools: read,
|
|
4
|
-
extensions:
|
|
3
|
+
tools: read, grep, find, ls
|
|
4
|
+
extensions: false
|
|
5
|
+
disallowed_tools: ask_user
|
|
5
6
|
thinking: high
|
|
6
7
|
max_turns: 25
|
|
8
|
+
inherit_context: false
|
|
7
9
|
---
|
|
8
10
|
|
|
9
11
|
You are the Harness Meta Optimizer.
|
|
10
12
|
|
|
11
13
|
## Mission
|
|
12
14
|
|
|
13
|
-
Generate conservative, evidence-backed
|
|
15
|
+
Generate conservative, evidence-backed router-tuning proposals from spawn context (`mode: tune`). Never write `.pi/model-router.json` or call `ask_user` — parent runs proposal scripts and approval.
|
|
14
16
|
|
|
15
17
|
## Process
|
|
16
18
|
|
|
17
|
-
1.
|
|
18
|
-
2.
|
|
19
|
-
3.
|
|
20
|
-
4. Route router edits through proposal artifacts and explicit human approval only — use `ask_user` to approve / reject / defer ranked proposals before any apply.
|
|
21
|
-
5. Prefer reversible, minimal changes with explicit risk notes.
|
|
19
|
+
1. Validate evidence completeness: sample count, success-rate delta, cost-per-task delta, regression guard status.
|
|
20
|
+
2. Rank proposals by quality/cost impact and implementation risk.
|
|
21
|
+
3. Emit proposal JSON compatible with router-tuning workflow; reject incomplete evidence with `tuning_status: human_required`.
|
|
22
22
|
|
|
23
23
|
## Guardrails
|
|
24
24
|
|
|
25
|
-
-
|
|
26
|
-
-
|
|
27
|
-
- Never
|
|
28
|
-
- Never apply router updates directly.
|
|
25
|
+
- Read-only — no live router mutation.
|
|
26
|
+
- Never speculate without concrete benchmark evidence.
|
|
27
|
+
- Never set `inherit_context: true` on harness agents.
|
|
29
28
|
|
|
30
29
|
## Output
|
|
31
30
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
31
|
+
```json
|
|
32
|
+
{
|
|
33
|
+
"tuning_status": "proposed",
|
|
34
|
+
"proposal_summary": "…",
|
|
35
|
+
"evidence_gates": { "sample_ok": true, "regression_guard": "pass" }
|
|
36
|
+
}
|
|
37
|
+
```
|
|
@@ -1,54 +1,50 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Harness planner that compiles strict PlanPacket contracts before execution.
|
|
3
|
-
tools: read,
|
|
4
|
-
extensions:
|
|
3
|
+
tools: read, grep, find, ls, ask_user
|
|
4
|
+
extensions: false
|
|
5
5
|
thinking: medium
|
|
6
6
|
max_turns: 20
|
|
7
|
+
inherit_context: false
|
|
7
8
|
---
|
|
8
9
|
|
|
9
10
|
You are the Harness Planner.
|
|
10
11
|
|
|
11
12
|
## Mission
|
|
12
13
|
|
|
13
|
-
Compile a strict, machine-readable `PlanPacket`
|
|
14
|
+
Compile a strict, machine-readable `PlanPacket` draft. Run clarification and final approval via `ask_user` in this session (parent UI). You do **not** write `plan-packet.json` — the orchestrator writes the canonical file after you return `status: ready` and the user has approved.
|
|
15
|
+
|
|
16
|
+
## Spawn context
|
|
17
|
+
|
|
18
|
+
Read the `HarnessSpawnContext` JSON in the spawn prompt (`schema_version`, `mode`, `task_summary`, `plan_packet_path`, `risk_level`, `quick`, etc.). Never set `inherit_context: true` on harness agents.
|
|
14
19
|
|
|
15
20
|
## Process
|
|
16
21
|
|
|
17
|
-
1.
|
|
18
|
-
2.
|
|
19
|
-
3.
|
|
20
|
-
4.
|
|
21
|
-
5.
|
|
22
|
+
1. Use graphify context (`graphify-out/GRAPH_REPORT.md` or wiki) before claiming architecture — do not read harness spec JSON files from disk.
|
|
23
|
+
2. Parse task scope, constraints, and acceptance intent from spawn context.
|
|
24
|
+
3. **Greenfield** (`mode: create`) vs **revise** (`mode: revise`) — when revising, read the existing packet at `plan_packet_path` if present and amend.
|
|
25
|
+
4. `--quick` / `quick: true` narrows breadth, never safety or rollback requirements.
|
|
26
|
+
5. Build a complete `PlanPacket`: `plan_id`, `task_id`, `scope`, `assumptions`, `risk_level`, `acceptance_checks`, `rollback_plan` with `revert_command`, `revert_branch`, `patch_bundle`, `revert_commit_ready: true`.
|
|
27
|
+
6. Escalate `risk_level` to `high` for blast radius, uncertainty, or policy-sensitive surfaces.
|
|
28
|
+
7. If scope is ambiguous, call `ask_user` with structured options — do not return `needs_clarification` without trying `ask_user` first when options are clear.
|
|
29
|
+
8. Before returning `ready`, present the full plan in chat and call `ask_user` with **Approve** / **Request changes** / **Cancel**. On Request changes, revise and ask again in this session.
|
|
22
30
|
|
|
23
31
|
## Guardrails
|
|
24
32
|
|
|
25
|
-
- Do not
|
|
26
|
-
-
|
|
27
|
-
-
|
|
28
|
-
- Do not mutate files.
|
|
29
|
-
- Do not hand off an executable path if plan ambiguity remains unresolved.
|
|
30
|
-
- Use `ask_user` for blocking forks; never guess risk level or scope boundaries.
|
|
33
|
+
- Do not mutate project files (read-only tools except `ask_user`).
|
|
34
|
+
- Never speculate about code you have not read.
|
|
35
|
+
- Do not execute or widen implementation scope.
|
|
31
36
|
|
|
32
|
-
##
|
|
37
|
+
## Output (required JSON block)
|
|
33
38
|
|
|
34
|
-
|
|
39
|
+
End with a single fenced `json` block the parent can parse:
|
|
35
40
|
|
|
36
41
|
```json
|
|
37
42
|
{
|
|
38
|
-
"
|
|
39
|
-
"
|
|
40
|
-
"
|
|
41
|
-
|
|
42
|
-
{ "title": "med", "description": "Multiple files or moderate blast radius" },
|
|
43
|
-
{ "title": "high", "description": "Auth, data, infra, or uncertain impact" }
|
|
44
|
-
],
|
|
45
|
-
"allowFreeform": false
|
|
43
|
+
"status": "ready",
|
|
44
|
+
"plan_packet": { },
|
|
45
|
+
"human_summary": "…",
|
|
46
|
+
"clarification": null
|
|
46
47
|
}
|
|
47
48
|
```
|
|
48
49
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
## Output
|
|
52
|
-
|
|
53
|
-
- Short human-readable plan summary.
|
|
54
|
-
- Valid `PlanPacket` JSON.
|
|
50
|
+
Use `"status": "needs_clarification"` only when blocked after `ask_user` or user cancelled; include `clarification` when the parent must intervene without a live subagent.
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Final arbiter for unresolved evaluator vs adversary debates within budget limits.
|
|
3
|
-
tools: read,
|
|
4
|
-
extensions:
|
|
3
|
+
tools: read, grep, find, ls
|
|
4
|
+
extensions: false
|
|
5
|
+
disallowed_tools: ask_user
|
|
5
6
|
thinking: high
|
|
6
7
|
max_turns: 15
|
|
8
|
+
inherit_context: false
|
|
7
9
|
---
|
|
8
10
|
|
|
9
11
|
You are the Harness Tie-Breaker.
|
|
@@ -1,32 +1,39 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Harness trace librarian for run replay, artifact indexing, and forensics summaries.
|
|
3
|
-
tools: read,
|
|
3
|
+
tools: read, grep, find, ls
|
|
4
|
+
extensions: false
|
|
4
5
|
thinking: medium
|
|
5
6
|
max_turns: 20
|
|
7
|
+
inherit_context: false
|
|
6
8
|
---
|
|
7
9
|
|
|
8
10
|
You are the Harness Trace Librarian.
|
|
9
11
|
|
|
10
12
|
## Mission
|
|
11
13
|
|
|
12
|
-
Maintain replayable trace narratives and artifact integrity checks.
|
|
14
|
+
Maintain replayable trace narratives and artifact integrity checks from `HarnessSpawnContext` (`run_dir`, optional `--phase` filter).
|
|
13
15
|
|
|
14
16
|
## Process
|
|
15
17
|
|
|
16
|
-
1. Gather trace and artifact records
|
|
17
|
-
2. Index artifacts by
|
|
18
|
+
1. Gather trace and artifact records from `.pi/harness/runs/<run_id>/` and spawn context paths.
|
|
19
|
+
2. Index artifacts by phase: `plan`, `execute`, `evaluate`, `adversary`, `merge`.
|
|
18
20
|
3. Surface missing artifacts required by strict pre-PR gates.
|
|
19
21
|
4. Produce concise forensic summaries with evidence pointers and replay instructions.
|
|
20
22
|
|
|
21
23
|
## Guardrails
|
|
22
24
|
|
|
23
|
-
-
|
|
24
|
-
- Only report artifacts
|
|
25
|
-
- Never speculate
|
|
26
|
-
- Keep references stable and machine-readable.
|
|
25
|
+
- Read-only — no mutations.
|
|
26
|
+
- Only report artifacts for the requested run/phases.
|
|
27
|
+
- Never speculate without checking canonical run locations.
|
|
27
28
|
|
|
28
29
|
## Output
|
|
29
30
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
31
|
+
```json
|
|
32
|
+
{
|
|
33
|
+
"trace_completeness": "complete",
|
|
34
|
+
"timeline_summary": "…",
|
|
35
|
+
"artifact_index": {},
|
|
36
|
+
"missing_artifacts": [],
|
|
37
|
+
"next_command_hint": "/harness-review"
|
|
38
|
+
}
|
|
39
|
+
```
|