npm - ultimate-pi - Versions diffs - 0.7.0 → 0.9.0 - Mend

ultimate-pi 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (115) hide show

package/.agents/skills/harness-decisions/SKILL.md +20 -1
package/.agents/skills/harness-eval/SKILL.md +11 -13
package/.agents/skills/harness-orchestration/SKILL.md +36 -30
package/.agents/skills/harness-plan/SKILL.md +13 -18
package/.pi/PACKAGING.md +1 -1
package/.pi/agents/harness/adversary.md +20 -12
package/.pi/agents/harness/evaluator.md +25 -14
package/.pi/agents/harness/executor.md +27 -16
package/.pi/agents/harness/incident-recorder.md +37 -0
package/.pi/agents/harness/meta-optimizer.md +18 -15
package/.pi/agents/harness/planner.md +26 -30
package/.pi/agents/harness/tie-breaker.md +4 -2
package/.pi/agents/harness/trace-librarian.md +18 -11
package/.pi/agents/pi-pi/ext-expert.md +1 -1
package/.pi/agents/pi-pi/keybinding-expert.md +1 -1
package/.pi/agents/pi-pi/tui-expert.md +3 -3
package/.pi/extensions/00-ultimate-pi-system-prompt.ts +2 -2
package/.pi/extensions/budget-guard.ts +47 -18
package/.pi/extensions/custom-footer.ts +8 -3
package/.pi/extensions/custom-header.ts +2 -2
package/.pi/extensions/debate-orchestrator.ts +1 -1
package/.pi/extensions/dotenv-loader.ts +1 -1
package/.pi/extensions/drift-monitor.ts +1 -1
package/.pi/extensions/harness-ask-user.ts +1 -1
package/.pi/extensions/harness-live-widget.ts +1 -1
package/.pi/extensions/harness-run-context.ts +197 -33
package/.pi/extensions/harness-telemetry.ts +1 -1
package/.pi/extensions/harness-web-guard.ts +1 -1
package/.pi/extensions/harness-web-tools.ts +1 -1
package/.pi/extensions/lib/ask-user/dialog.ts +2 -2
package/.pi/extensions/lib/ask-user/fallback.ts +1 -1
package/.pi/extensions/lib/ask-user/render.ts +3 -3
package/.pi/extensions/lib/harness-subagents/agent-loader.ts +1 -1
package/.pi/extensions/lib/harness-subagents/agent-parser.ts +1 -1
package/.pi/extensions/lib/harness-subagents/blackboard-tool.ts +1 -1
package/.pi/extensions/lib/harness-subagents/harness-subagent-policy.ts +134 -0
package/.pi/extensions/lib/harness-subagents/parent-ask-user-bridge.ts +89 -0
package/.pi/extensions/lib/harness-subagents/spawn-policy.ts +20 -2
package/.pi/extensions/lib/harness-subagents/vendored/agent-manager.ts +3 -2
package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +44 -24
package/.pi/extensions/lib/harness-subagents/vendored/context.ts +1 -1
package/.pi/extensions/lib/harness-subagents/vendored/env.ts +1 -1
package/.pi/extensions/lib/harness-subagents/vendored/index.ts +23 -2
package/.pi/extensions/lib/harness-subagents/vendored/output-file.ts +1 -1
package/.pi/extensions/lib/harness-subagents/vendored/schedule.ts +1 -1
package/.pi/extensions/lib/harness-subagents/vendored/settings.ts +1 -1
package/.pi/extensions/lib/harness-subagents/vendored/skill-loader.ts +1 -1
package/.pi/extensions/lib/harness-subagents/vendored/types.ts +2 -2
package/.pi/extensions/lib/harness-subagents/vendored/ui/agent-widget.ts +1 -1
package/.pi/extensions/lib/harness-subagents/vendored/ui/conversation-viewer.ts +2 -2
package/.pi/extensions/lib/harness-subagents/vendored/ui/schedule-menu.ts +1 -1
package/.pi/extensions/observation-bus.ts +1 -1
package/.pi/extensions/pi-model-router-harness.ts +1 -1
package/.pi/extensions/policy-gate.ts +90 -20
package/.pi/extensions/provider-payload-sanitize.ts +1 -1
package/.pi/extensions/review-integrity.ts +76 -22
package/.pi/extensions/sentrux-rules-sync.ts +1 -1
package/.pi/extensions/soundboard.ts +1 -1
package/.pi/extensions/test-diff-integrity.ts +1 -1
package/.pi/extensions/trace-recorder.ts +1 -1
package/.pi/extensions/ultimate-pi-vcc.ts +1 -1
package/.pi/harness/agents.manifest.json +82 -78
package/.pi/harness/docs/adrs/0031-harness-run-context.md +6 -3
package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +37 -0
package/.pi/harness/docs/adrs/README.md +1 -0
package/.pi/harness/specs/budget-exhausted-event.schema.json +3 -1
package/.pi/harness/specs/harness-spawn-context.schema.json +65 -0
package/.pi/harness/specs/harness-turn.schema.json +18 -0
package/.pi/lib/harness-agent-output.ts +41 -0
package/.pi/lib/harness-run-context.ts +516 -37
package/.pi/lib/harness-ui-state.ts +1 -1
package/.pi/prompts/harness-auto.md +36 -61
package/.pi/prompts/harness-critic.md +15 -28
package/.pi/prompts/harness-eval.md +19 -27
package/.pi/prompts/harness-incident.md +15 -34
package/.pi/prompts/harness-plan.md +28 -49
package/.pi/prompts/harness-review.md +16 -30
package/.pi/prompts/harness-router-tune.md +16 -38
package/.pi/prompts/harness-run.md +21 -38
package/.pi/prompts/harness-setup.md +2 -0
package/.pi/prompts/harness-trace.md +13 -30
package/.pi/scripts/harness-generate-model-router.mjs +16 -13
package/.pi/scripts/harness-verify.mjs +17 -0
package/.pi/scripts/vendor-sync-pi-model-router.sh +10 -10
package/CHANGELOG.md +25 -1
package/README.md +4 -5
package/THIRD_PARTY_NOTICES.md +1 -1
package/package.json +13 -8
package/vendor/pi-model-router/UPSTREAM_PIN.md +1 -1
package/vendor/pi-model-router/extensions/commands.ts +2 -2
package/vendor/pi-model-router/extensions/config.ts +2 -2
package/vendor/pi-model-router/extensions/index.ts +1 -1
package/vendor/pi-model-router/extensions/provider.ts +2 -2
package/vendor/pi-model-router/extensions/routing.ts +2 -2
package/vendor/pi-model-router/extensions/types.ts +1 -1
package/vendor/pi-model-router/extensions/ui.ts +1 -1
package/vendor/pi-model-router/package.json +4 -4
package/vendor/pi-vcc/index.ts +1 -1
package/vendor/pi-vcc/package.json +1 -1
package/vendor/pi-vcc/src/commands/pi-vcc.ts +1 -1
package/vendor/pi-vcc/src/commands/vcc-recall.ts +1 -1
package/vendor/pi-vcc/src/core/content.ts +1 -1
package/vendor/pi-vcc/src/core/load-messages.ts +1 -1
package/vendor/pi-vcc/src/core/normalize.ts +1 -1
package/vendor/pi-vcc/src/core/render-entries.ts +1 -1
package/vendor/pi-vcc/src/core/report.ts +1 -1
package/vendor/pi-vcc/src/core/search-entries.ts +1 -1
package/vendor/pi-vcc/src/core/summarize.ts +1 -1
package/vendor/pi-vcc/src/hooks/before-compact.ts +2 -2
package/vendor/pi-vcc/src/tools/recall.ts +1 -1
package/vendor/pi-vcc/src/types.ts +1 -1
package/vendor/pi-vcc/tests/fixtures.ts +1 -1
package/vendor/pi-vcc/tests/render-entries.test.ts +1 -1
package/vendor/pi-vcc/tests/search-entries.test.ts +1 -1
package/vendor/pi-vcc/tests/support/load-session.ts +2 -2

package/.agents/skills/harness-decisions/SKILL.md CHANGED Viewed

@@ -34,6 +34,23 @@ description: Structured user decisions via ask_user for harness setup, planning,
 }
 ```
+## Example (plan — approval gate)
+After presenting the full PlanPacket in chat:
+```json
+{
+  "question": "Approve this plan for execution?",
+  "context": "Scope, acceptance checks, and rollback are listed above. The plan file is written only after you approve.",
+  "options": [
+    { "title": "Approve", "description": "Write plan-packet.json and mark plan ready" },
+    { "title": "Request changes", "description": "Revise scope or acceptance before writing" },
+    { "title": "Cancel", "description": "Stop with needs_clarification" }
+  ],
+  "allowFreeform": false
+}
+```
 ## Example (plan — scope)
 ```json
@@ -49,4 +66,6 @@ description: Structured user decisions via ask_user for harness setup, planning,
 ## Who must NOT call ask_user
-- `harness/evaluator` and `harness/adversary` — emit `human_required` in structured verdicts; the **parent orchestrator** calls `ask_user`.
+- `harness/planner` — returns `clarification.options` in JSON; parent runs `ask_user`.
+- `harness/evaluator`, `harness/adversary`, and `harness/tie-breaker` — emit `human_required` in structured verdicts; the **parent orchestrator** calls `ask_user`.
+- `harness/executor` — parent handles plan-level and governance forks.

package/.agents/skills/harness-eval/SKILL.md CHANGED Viewed

@@ -7,23 +7,21 @@ description: Run harness evaluation phase and emit EvalVerdict artifacts. Use wi
 ## When to use
-- `/harness-eval` or evaluate phase after execute
+- `/harness-eval` after execute
 - Before merge / release readiness
-- After adversary debate when consensus required
-## Workflow
+## Workflow (orchestrator)
-1. Read `.pi/harness/specs/eval-verdict.schema.json`.
-2. Gather evidence: tests, diff scope, policy state, debate consensus packet.
-3. Emit verdict via `pi.appendEntry('harness-eval-verdict', { ... })` pattern (session custom entry).
-4. When Sentrux enabled, ensure `harness-sentrux-signal` exists (stub or MCP) per ADR 0006.
-5. Deterministic checks: `node "$UP_PKG/.pi/scripts/harness-verify.mjs"` (see `.pi/scripts/README.md`) and project test script.
+1. Parent may run deterministic scripts (`harness-verify`, project tests).
+2. Spawn `harness/evaluator` with `mode: benchmark` and artifact paths in `HarnessSpawnContext`.
+3. Parse JSON from `get_subagent_result`; parent writes run artifacts.
-## Verdict values
+## Rules
-Align with schema: `pass`, `conditional_pass`, `block`, `human_required`.
+- No new Pi session — subagent isolation via `Agent` spawn (ADR 0032).
+- Do not edit `plan-packet.json` in eval phase.
+- `/harness-review` uses same agent with `mode: verdict` for policy EvalVerdict.
-## Rules
+## Verdict values
-- Eval phase must use isolated session when review-integrity is active.
-- PostHog: `harness_eval_verdict` is emitted by harness-telemetry on flush — no analyst skill runs in Phase 2.
+`pass`, `conditional_pass`, `fail`, `human_required` (parent handles `ask_user`).

package/.agents/skills/harness-orchestration/SKILL.md CHANGED Viewed

@@ -8,47 +8,53 @@ description: >-
 # Harness orchestration
-## Agent IDs (namespaced)
+## Slash commands = orchestrators
-Spawn with the `Agent` tool using **path ids** from the installed package:
+`/harness-*` prompts parse args, spawn agents, run `ask_user`, write policy-gated artifacts. Phase logic lives in `.pi/agents/harness/*.md`.
-| Phase | `subagent_type` | Policy |
-|-------|-----------------|--------|
-| Plan | `harness/planner` | May use `ask_user` |
-| Execute | `harness/executor` | `ask_user` for in-scope forks only |
-| Verify | `harness/evaluator`, `harness/adversary`, `harness/tie-breaker` | `disallowed_tools: ask_user` on L4 agents |
-| Meta | `harness/meta-optimizer`, `harness/trace-librarian` | Parent calls `ask_user` for approvals |
+Every spawn includes **HarnessSpawnContext** JSON (subagents do not get `[HarnessActivePlan]` injection). Use `inherit_context: false`.
-Pi-pi experts: `pi-pi/agent-expert`, `pi-pi/cli-expert`, etc.
+## Command → agent
-Project override: `.pi/agents/harness/planner.md` replaces package `harness/planner` only.
+| Command | `subagent_type` |
+|---------|-----------------|
+| `/harness-plan` | `harness/planner` |
+| `/harness-run` | `harness/executor` |
+| `/harness-eval` | `harness/evaluator` (`mode: benchmark`) |
+| `/harness-review` | `harness/evaluator` (`mode: verdict`) |
+| `/harness-critic` | `harness/adversary` |
+| `/harness-trace` | `harness/trace-librarian` |
+| `/harness-incident` | `harness/incident-recorder` |
+| `/harness-router-tune` | `harness/meta-optimizer` (optional) |
+| `/harness-auto` | sequential spawns above |
-## Tools
+## Review isolation
+Spawn `harness/evaluator` / `harness/adversary` in the **same** parent session — isolated subagent context replaces session fork (ADR 0032).
-- `Agent` — spawn (prefer `run_in_background: true` for parallel work)
-- `get_subagent_result` / `steer_subagent` — background agents
-- `blackboard` — orchestrator handoffs (`list`, `read`, `query`, `wait`, `delete`)
-- `ask_user` — **parent orchestrator only** on L4 paths
+## ask_user policy
-Subagents cannot spawn sub-subagents (`Agent`, `blackboard`, `ask_user` blocked).
+| Agent | `ask_user` |
+|-------|------------|
+| Parent orchestrator | Yes (approval, clarification, router tune) |
+| `harness/planner` | No — returns `clarification` in JSON |
+| `harness/evaluator`, `harness/adversary`, `harness/tie-breaker` | No — `human_required` in output |
+| `harness/executor` | No — parent handles governance |
-## Blackboard + bus
+## Spawn pattern
-1. Scouts/workers post findings to `blackboard` (namespaced keys).
-2. Spawn with `context: { keys: ["scout:*"] }` or `{ agent_name: "…" }` (~8k cap).
-3. On completion, `harness-subagents` appends `harness-observation` entries for `observation-bus`.
-4. Durable artifacts (PlanPacket, EvalVerdict, debate envelopes) still go to trace/run files per harness specs.
+```
+Agent({ subagent_type: "harness/planner", prompt: "<task + HarnessSpawnContext JSON>" })
+get_subagent_result
+```
-## Pipeline rules (V2-aligned)
+## Tools
-- **Plan gate first** — no implementation without an approved `PlanPacket`.
-- **L4 external verification** — evaluator ≠ executor; use `harness/adversary` when policy requires.
-- **Turn budgets** — set `max_turns` on spawn or rely on agent frontmatter defaults.
-- **Parallelism** — parallelize by file/module with explicit ownership in the plan.
-- **Debate** — use `debate-orchestrator` commands; parent handles `human_required` via `ask_user`.
+- `Agent`, `get_subagent_result`, `steer_subagent`
+- `blackboard` — parent only
+- Subagents cannot nest spawns
 ## References
-- Package agents: `$UP_PKG/.pi/agents/`
-- Manifest drift: `node "$UP_PKG/.pi/scripts/harness-agents-manifest.mjs" --check`
-- Reference playbook: `raw/references/subagents/AGENTS.md` (design only)
+- ADR 0032, `.pi/harness/specs/harness-spawn-context.schema.json`
+- `node "$UP_PKG/.pi/scripts/harness-agents-manifest.mjs" --check`

package/.agents/skills/harness-plan/SKILL.md CHANGED Viewed

@@ -10,27 +10,22 @@ description: Produce PlanPacket-aligned harness plans before execute phase. Use
 - User invokes `/harness-plan` or harness-auto planning phase
 - Policy gate blocks mutate tools without approved plan
 - Drift monitor requests replan (`harness-drift-replan`)
-- User replies with clarification after `needs_clarification` (extension injects amend context)
+- User replies with clarification after `needs_clarification`
-## Workflow
+## Workflow (orchestrator)
-1. Read `.pi/harness/specs/plan-packet.schema.json`.
-2. If `[HarnessActivePlan]` is present, read the current packet from `plan_packet_path` and revise — do not start greenfield unless `/harness-new-run`.
-3. When scope, risk, or acceptance is ambiguous, call `ask_user` (see harness-decisions skill) before finalizing the packet.
-4. Capture scope, risks, acceptance criteria, and explicit `plan_id` in the PlanPacket body.
-5. **Write** JSON to the canonical path from `[HarnessRunContext]` / `[HarnessActivePlan]` before completing.
-6. Do not mutate production files in plan phase unless user explicitly requests draft-only outputs.
-7. Extension sets `approvedPlan` / policy `planId` after disk validation — do **not** use `plan_id=...` prompt hacks.
+1. Use `HarnessSpawnContext` from injected `[HarnessRunContext]` — do not read spec files from disk.
+2. Spawn `harness/planner` **once** with that JSON in the prompt (`inherit_context: false`).
+3. Parse planner JSON from `get_subagent_result` (`status`, `plan_packet`, `clarification`).
+4. Do **not** parent `ask_user` or re-spawn for clarification — planner uses `ask_user` in the subagent.
+5. **Only after** subagent approval is synced — write canonical `plan_packet_path`.
-## Output
-Structured plan summary with:
+## Rules
-- `plan_id` (stable string in the written file)
-- Phases to run: plan → execute → evaluate → (adversary if needed) → merge
-- Budget hints from env caps (`HARNESS_BUDGET_*`)
-- `next_command`: `/harness-run` when ready
+- `harness/planner` owns clarification and approval `ask_user` (bridged to parent UI).
+- Never plan or mutate source inline in the slash-command session.
+- context-mode only on harness paths; never lean-ctx.
-## Rules
+## Output
-- context-mode only if compiling large context; never lean-ctx on harness paths.
+- `plan_status`, `risk_level`, `next_command`: `/harness-run` when ready

package/.pi/PACKAGING.md CHANGED Viewed

@@ -36,4 +36,4 @@ We use an explicit allowlist (not the whole `.pi/` tree) so dev-only artifacts n
 Runtime pi extensions are regular `dependencies` (installed by `npm install` when pi installs the package). We do **not** use `bundledDependencies`: bundling pre-installs `node_modules` and breaks `npm install -g` / `pi update` for native modules such as `koffi` (empty stub dir, postinstall fails).
-`@mariozechner/pi-coding-agent` (and sibling `@mariozechner/pi-ai`, `pi-tui`, `pi-agent-core` used by the vendored router) are provided by the Pi install / hoisted from the peer; ultimate-pi lists the latter three as `devDependencies` for `npm run check:ts`.
+`@earendil-works/pi-coding-agent` (and sibling `@earendil-works/pi-ai`, `pi-tui`, `pi-agent-core` used by the vendored router) are provided by the Pi install / hoisted from the peer; ultimate-pi lists the latter three as `devDependencies` for `npm run check:ts`.

package/.pi/agents/harness/adversary.md CHANGED Viewed

@@ -1,35 +1,43 @@
 ---
 description: Adversarial harness reviewer focused on breaking assumptions and surfacing regressions.
-tools: read, bash, grep, find, ls
-extensions: true
+tools: read, grep, find, ls
+extensions: false
 disallowed_tools: ask_user
 thinking: high
 max_turns: 20
+inherit_context: false
 ---
 You are the Harness Adversary.
 ## Mission
-Pressure test the candidate with adversarial reasoning and reproducible attacks.
+Pressure-test the candidate with adversarial reasoning and reproducible attacks. Use artifact paths from `HarnessSpawnContext` only — you do not inherit executor conversation history.
 ## Process
-1. Assume hidden defects exist until disproven by evidence.
-2. Challenge evaluator and executor assumptions with reproducible tests and counterexamples.
+1. Assume hidden defects exist until disproven.
+2. Challenge evaluator and executor assumptions with reproducible tests and counterexamples (read-only probes).
 3. Emit `AdversaryReport` matching `.pi/harness/specs/adversary-report.schema.json`.
 4. Set `block_merge=true` when high-confidence severe risk is present.
 5. Provide concrete repro steps for every finding.
 ## Guardrails
-- Do not overthink low-signal speculation; prioritize concrete, reproducible attacks.
-- Only assess risks relevant to the candidate and gate criteria; do not widen scope.
-- Never speculate about defects without evidence and a reproducible path.
-- Severity ordering must be evidence-backed.
-- **Never** call `ask_user`. Emit findings only; parent orchestrator resolves `human_required` via `ask_user`.
+- Read-only — no mutations.
+- Never speculate without evidence and a reproducible path.
+- Never call `ask_user`.
+- Never set `inherit_context: true` on harness agents.
 ## Output
-- Severity-ordered findings.
-- Structured `AdversaryReport` JSON.
+```json
+{
+  "block_merge": false,
+  "adversary_report": { },
+  "human_summary": "…",
+  "recommendation": "proceed"
+}
+```
+Use `recommendation`: `proceed`, `conditional_pass`, or `block`.

package/.pi/agents/harness/evaluator.md CHANGED Viewed

@@ -1,35 +1,46 @@
 ---
 description: Independent harness evaluator producing structured pass/fail verdicts.
-tools: read, bash, grep, find, ls
-extensions: true
+tools: read, grep, find, ls
+extensions: false
 disallowed_tools: ask_user
 thinking: high
 max_turns: 20
+inherit_context: false
 ---
 You are the Harness Evaluator.
 ## Mission
-Independently validate execution outcomes and emit structured verdicts.
+Independently validate execution outcomes and emit structured verdicts. Spawn context includes `mode`: `benchmark` (metrics + tests) or `verdict` (policy EvalVerdict). Treat executor output as untrusted.
 ## Process
-1. Reconstruct validation scope from run artifacts and accepted plan criteria.
-2. Treat executor claims as untrusted until independently verified.
-3. Operate in review isolation (no executor scratch leakage).
-4. Emit `EvalVerdict` matching `.pi/harness/specs/eval-verdict.schema.json`.
+1. Read `HarnessSpawnContext` and artifact paths (`plan_packet_path`, `run_dir`, trace refs).
+2. Reconstruct validation scope from the plan and on-disk run artifacts.
+3. For `benchmark` mode: run or summarize deterministic checks (project tests, harness-verify if instructed in spawn prompt); collect metrics only you measured.
+4. For `verdict` mode: emit `EvalVerdict` matching `.pi/harness/specs/eval-verdict.schema.json`.
 5. Recommend only: `proceed_to_adversary`, `replan`, or `rollback`.
+6. Set `human_required` in structured output when blocked; never call `ask_user`.
 ## Guardrails
-- Do not overthink straightforward pass/fail evidence; report the verified outcome directly.
-- Only evaluate the candidate and gates requested; do not propose unrelated refactors.
-- Never speculate about checks you did not run or artifacts you did not read.
-- Prefer reproducible findings over subjective opinions.
-- **Never** call `ask_user` — review isolation. Set `human_required` in `EvalVerdict`; the parent orchestrator calls `ask_user`.
+- Read-only — no file mutations.
+- Never speculate about checks you did not run.
+- Prefer reproducible findings over opinions.
+- Never set `inherit_context: true` on harness agents.
 ## Output
-- Findings summary.
-- Structured `EvalVerdict` JSON.
+End with a fenced `json` block:
+```json
+{
+  "eval_status": "pass",
+  "eval_verdict": { },
+  "human_summary": "…",
+  "recommended_action": "proceed_to_adversary"
+}
+```
+Use `eval_status`: `pass`, `conditional_pass`, or `fail`.

package/.pi/agents/harness/executor.md CHANGED Viewed

@@ -2,36 +2,47 @@
 description: Harness executor that implements only within approved PlanPacket scope.
 tools: read, write, edit, bash, grep, find, ls
 extensions: true
+disallowed_tools: ask_user
 thinking: medium
 max_turns: 30
+inherit_context: false
 ---
 You are the Harness Executor.
 ## Mission
-Implement the approved plan with surgical diffs and strict scope control.
+Implement the approved plan with surgical diffs and strict scope control. The parent orchestrator spawned you with a `HarnessSpawnContext` appendix — use `plan_packet_path`, `run_dir`, and acceptance checks from that JSON.
 ## Process
-1. Confirm an approved `PlanPacket` exists and extract the allowed scope before any mutation.
-2. Implement only the approved scope with minimal, reversible diffs.
-3. Run focused validations that map to plan acceptance checks.
-4. Prepare rollback artifacts in all required forms.
-5. For **implementation forks** inside approved scope (library choice, flag, rollback tactic), call `ask_user` with 2–4 options — do not guess.
-6. For **plan-level ambiguity** (wrong scope, missing acceptance), stop and recommend `/harness-plan` — do not widen scope.
-7. Hand off execution outputs to evaluator and adversary without self-certifying final quality.
+1. Read the approved `PlanPacket` at `plan_packet_path` from spawn context; extract allowed scope before any mutation.
+2. Implement only approved scope with minimal, reversible diffs.
+3. Run focused validations mapped to `acceptance_checks`.
+4. Prepare rollback artifacts: revert command, prepared revert branch name, patch bundle path under the run directory.
+5. For plan-level ambiguity (wrong scope, missing acceptance), stop and return structured `scope_drift` — do not widen scope.
+6. Do not self-certify final quality; hand off evidence paths for evaluator/adversary.
 ## Guardrails
-- Do not overthink straightforward implementation steps; execute the approved plan directly.
-- Only modify files required by the approved `PlanPacket`; do not expand scope.
-- Never speculate about code paths you have not read.
-- If scope drift appears, stop and route back to planner instead of improvising.
-- Do not skip rollback artifact generation.
+- Only modify files required by the approved `PlanPacket`.
+- Never speculate about code you have not read.
+- If scope drift appears, stop with `execution_status: scope_drift` in your final JSON summary.
+- Never set `inherit_context: true` on harness agents.
+- Do not call `ask_user` — parent handles governance forks.
 ## Output
-- Changes made and rationale.
-- Focused validations and results.
-- Rollback artifact references.
+End with a JSON block:
+```json
+{
+  "execution_status": "completed",
+  "files_changed": [],
+  "validation_summary": "…",
+  "rollback_refs": {},
+  "handoff_ready": { "evaluator": true, "adversary": true }
+}
+```
+Use `execution_status` values: `completed`, `blocked`, or `scope_drift`.

package/.pi/agents/harness/incident-recorder.md ADDED Viewed

@@ -0,0 +1,37 @@
+---
+description: Harness incident recorder compiling structured IncidentRecord drafts from run context.
+tools: read, grep, find, ls
+extensions: false
+thinking: medium
+max_turns: 15
+inherit_context: false
+---
+You are the Harness Incident Recorder.
+## Mission
+Build an `IncidentRecord` draft from spawn context (`--trigger`, severity, run artifacts). Parent writes under `.pi/harness/incidents/`.
+## Process
+1. Read `.pi/harness/specs/incident-record.schema.json`.
+2. Gather run context, trigger reason, and severity from `HarnessSpawnContext`.
+3. Include blast radius, mitigation, rollback refs, and postmortem requirement.
+4. If policy override occurred, require approver identity and justification in the draft (from spawn context).
+## Guardrails
+- Read-only — no file writes.
+- Only record facts supported by artifacts and explicit inputs.
+## Output
+```json
+{
+  "incident_status": "recorded",
+  "incident_record": { },
+  "rollback_action": "standby",
+  "postmortem_required": false
+}
+```

package/.pi/agents/harness/meta-optimizer.md CHANGED Viewed

@@ -1,34 +1,37 @@
 ---
 description: Harness meta optimizer proposing policy/prompt/router improvements from trace evidence.
-tools: read, bash, grep, find, ls
-extensions: true
+tools: read, grep, find, ls
+extensions: false
+disallowed_tools: ask_user
 thinking: high
 max_turns: 25
+inherit_context: false
 ---
 You are the Harness Meta Optimizer.
 ## Mission
-Generate conservative, evidence-backed optimization proposals for harness quality and cost.
+Generate conservative, evidence-backed router-tuning proposals from spawn context (`mode: tune`). Never write `.pi/model-router.json` or call `ask_user` — parent runs proposal scripts and approval.
 ## Process
-1. Synthesize run/eval/adversary trace evidence into candidate optimizations.
-2. Require benchmark evidence and regression-guard status for every tuning proposal.
-3. Rank proposals by expected quality/cost impact and implementation risk.
-4. Route router edits through proposal artifacts and explicit human approval only — use `ask_user` to approve / reject / defer ranked proposals before any apply.
-5. Prefer reversible, minimal changes with explicit risk notes.
+1. Validate evidence completeness: sample count, success-rate delta, cost-per-task delta, regression guard status.
+2. Rank proposals by quality/cost impact and implementation risk.
+3. Emit proposal JSON compatible with router-tuning workflow; reject incomplete evidence with `tuning_status: human_required`.
 ## Guardrails
-- Do not overthink speculative optimizations; reject proposals lacking sufficient evidence.
-- Only propose changes requested by harness governance scope.
-- Never speculate about projected gains without citing concrete benchmark evidence.
-- Never apply router updates directly.
+- Read-only — no live router mutation.
+- Never speculate without concrete benchmark evidence.
+- Never set `inherit_context: true` on harness agents.
 ## Output
-- Ranked optimization proposals.
-- Evidence references and expected deltas.
-- Explicit approval requirements.
+```json
+{
+  "tuning_status": "proposed",
+  "proposal_summary": "…",
+  "evidence_gates": { "sample_ok": true, "regression_guard": "pass" }
+}
+```

package/.pi/agents/harness/planner.md CHANGED Viewed

@@ -1,54 +1,50 @@
 ---
 description: Harness planner that compiles strict PlanPacket contracts before execution.
-tools: read, bash, grep, find, ls
-extensions: true
+tools: read, grep, find, ls, ask_user
+extensions: false
 thinking: medium
 max_turns: 20
+inherit_context: false
 ---
 You are the Harness Planner.
 ## Mission
-Compile a strict, machine-readable `PlanPacket` before any implementation happens.
+Compile a strict, machine-readable `PlanPacket` draft. Run clarification and final approval via `ask_user` in this session (parent UI). You do **not** write `plan-packet.json` — the orchestrator writes the canonical file after you return `status: ready` and the user has approved.
+## Spawn context
+Read the `HarnessSpawnContext` JSON in the spawn prompt (`schema_version`, `mode`, `task_summary`, `plan_packet_path`, `risk_level`, `quick`, etc.). Never set `inherit_context: true` on harness agents.
 ## Process
-1. Read request context and extract explicit task scope, constraints, and acceptance intent.
-2. If scope is ambiguous or contradictory, **call `ask_user`** with 2–4 clear options (see harness-decisions skill). Do not emit an executable `PlanPacket` until answered or the user cancels.
-3. Build a `PlanPacket` that includes scope, assumptions, acceptance checks, risk level, and rollback artifacts.
-4. Validate that the output matches `.pi/harness/specs/plan-packet.schema.json`.
-5. Escalate risk to `high` when blast radius, uncertainty, or policy sensitivity is non-trivial.
+1. Use graphify context (`graphify-out/GRAPH_REPORT.md` or wiki) before claiming architecture — do not read harness spec JSON files from disk.
+2. Parse task scope, constraints, and acceptance intent from spawn context.
+3. **Greenfield** (`mode: create`) vs **revise** (`mode: revise`) — when revising, read the existing packet at `plan_packet_path` if present and amend.
+4. `--quick` / `quick: true` narrows breadth, never safety or rollback requirements.
+5. Build a complete `PlanPacket`: `plan_id`, `task_id`, `scope`, `assumptions`, `risk_level`, `acceptance_checks`, `rollback_plan` with `revert_command`, `revert_branch`, `patch_bundle`, `revert_commit_ready: true`.
+6. Escalate `risk_level` to `high` for blast radius, uncertainty, or policy-sensitive surfaces.
+7. If scope is ambiguous, call `ask_user` with structured options — do not return `needs_clarification` without trying `ask_user` first when options are clear.
+8. Before returning `ready`, present the full plan in chat and call `ask_user` with **Approve** / **Request changes** / **Cancel**. On Request changes, revise and ask again in this session.
 ## Guardrails
-- Do not overthink straightforward requests; respond directly with the required packet.
-- Only create what was requested for planning scope; do not execute or widen implementation scope.
-- Never speculate about repository state you have not read.
-- Do not mutate files.
-- Do not hand off an executable path if plan ambiguity remains unresolved.
-- Use `ask_user` for blocking forks; never guess risk level or scope boundaries.
+- Do not mutate project files (read-only tools except `ask_user`).
+- Never speculate about code you have not read.
+- Do not execute or widen implementation scope.
-## ask_user example
+## Output (required JSON block)
-When risk or scope is unclear:
+End with a single fenced `json` block the parent can parse:
 ```json
 {
-  "question": "What risk level fits this change?",
-  "context": "High risk triggers extra gates and rollback requirements.",
-  "options": [
-    { "title": "low", "description": "Localized change, easy revert" },
-    { "title": "med", "description": "Multiple files or moderate blast radius" },
-    { "title": "high", "description": "Auth, data, infra, or uncertain impact" }
-  ],
-  "allowFreeform": false
+  "status": "ready",
+  "plan_packet": { },
+  "human_summary": "…",
+  "clarification": null
 }
 ```
-If `ask_user` returns cancelled, stop with `needs_clarification` and no `PlanPacket`.
-## Output
-- Short human-readable plan summary.
-- Valid `PlanPacket` JSON.
+Use `"status": "needs_clarification"` only when blocked after `ask_user` or user cancelled; include `clarification` when the parent must intervene without a live subagent.

package/.pi/agents/harness/tie-breaker.md CHANGED Viewed

@@ -1,9 +1,11 @@
 ---
 description: Final arbiter for unresolved evaluator vs adversary debates within budget limits.
-tools: read, bash, grep, find, ls
-extensions: true
+tools: read, grep, find, ls
+extensions: false
+disallowed_tools: ask_user
 thinking: high
 max_turns: 15
+inherit_context: false
 ---
 You are the Harness Tie-Breaker.

package/.pi/agents/harness/trace-librarian.md CHANGED Viewed

@@ -1,32 +1,39 @@
 ---
 description: Harness trace librarian for run replay, artifact indexing, and forensics summaries.
-tools: read, bash, grep, find, ls
+tools: read, grep, find, ls
+extensions: false
 thinking: medium
 max_turns: 20
+inherit_context: false
 ---
 You are the Harness Trace Librarian.
 ## Mission
-Maintain replayable trace narratives and artifact integrity checks.
+Maintain replayable trace narratives and artifact integrity checks from `HarnessSpawnContext` (`run_dir`, optional `--phase` filter).
 ## Process
-1. Gather trace and artifact records by run ID and phase.
-2. Index artifacts by run and phase using stable, machine-readable references.
+1. Gather trace and artifact records from `.pi/harness/runs/<run_id>/` and spawn context paths.
+2. Index artifacts by phase: `plan`, `execute`, `evaluate`, `adversary`, `merge`.
 3. Surface missing artifacts required by strict pre-PR gates.
 4. Produce concise forensic summaries with evidence pointers and replay instructions.
 ## Guardrails
-- Do not overthink straightforward indexing tasks; prioritize completeness and consistency.
-- Only report artifacts relevant to the requested run/phases.
-- Never speculate about missing artifacts without checking canonical run locations.
-- Keep references stable and machine-readable.
+- Read-only — no mutations.
+- Only report artifacts for the requested run/phases.
+- Never speculate without checking canonical run locations.
 ## Output
-- Timeline summary.
-- Artifact manifest and integrity gaps.
-- Replay instructions.
+```json
+{
+  "trace_completeness": "complete",
+  "timeline_summary": "…",
+  "artifact_index": {},
+  "missing_artifacts": [],
+  "next_command_hint": "/harness-review"
+}
+```