ultimate-pi 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/.agents/skills/harness-decisions/SKILL.md +20 -1
  2. package/.agents/skills/harness-eval/SKILL.md +11 -13
  3. package/.agents/skills/harness-orchestration/SKILL.md +36 -30
  4. package/.agents/skills/harness-plan/SKILL.md +13 -18
  5. package/.pi/PACKAGING.md +1 -1
  6. package/.pi/agents/harness/adversary.md +20 -12
  7. package/.pi/agents/harness/evaluator.md +25 -14
  8. package/.pi/agents/harness/executor.md +27 -16
  9. package/.pi/agents/harness/incident-recorder.md +37 -0
  10. package/.pi/agents/harness/meta-optimizer.md +18 -15
  11. package/.pi/agents/harness/planner.md +27 -30
  12. package/.pi/agents/harness/tie-breaker.md +4 -2
  13. package/.pi/agents/harness/trace-librarian.md +18 -11
  14. package/.pi/agents/pi-pi/ext-expert.md +1 -1
  15. package/.pi/agents/pi-pi/keybinding-expert.md +1 -1
  16. package/.pi/agents/pi-pi/tui-expert.md +3 -3
  17. package/.pi/extensions/00-ultimate-pi-system-prompt.ts +2 -2
  18. package/.pi/extensions/budget-guard.ts +1 -1
  19. package/.pi/extensions/custom-footer.ts +8 -3
  20. package/.pi/extensions/custom-header.ts +2 -2
  21. package/.pi/extensions/debate-orchestrator.ts +1 -1
  22. package/.pi/extensions/dotenv-loader.ts +1 -1
  23. package/.pi/extensions/drift-monitor.ts +1 -1
  24. package/.pi/extensions/harness-ask-user.ts +1 -1
  25. package/.pi/extensions/harness-live-widget.ts +1 -1
  26. package/.pi/extensions/harness-run-context.ts +52 -10
  27. package/.pi/extensions/harness-telemetry.ts +1 -1
  28. package/.pi/extensions/harness-web-guard.ts +1 -1
  29. package/.pi/extensions/harness-web-tools.ts +1 -1
  30. package/.pi/extensions/lib/ask-user/dialog.ts +2 -2
  31. package/.pi/extensions/lib/ask-user/fallback.ts +1 -1
  32. package/.pi/extensions/lib/ask-user/render.ts +3 -3
  33. package/.pi/extensions/lib/harness-subagents/agent-loader.ts +1 -1
  34. package/.pi/extensions/lib/harness-subagents/agent-parser.ts +1 -1
  35. package/.pi/extensions/lib/harness-subagents/blackboard-tool.ts +1 -1
  36. package/.pi/extensions/lib/harness-subagents/harness-subagent-policy.ts +134 -0
  37. package/.pi/extensions/lib/harness-subagents/vendored/agent-manager.ts +2 -2
  38. package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +9 -5
  39. package/.pi/extensions/lib/harness-subagents/vendored/context.ts +1 -1
  40. package/.pi/extensions/lib/harness-subagents/vendored/env.ts +1 -1
  41. package/.pi/extensions/lib/harness-subagents/vendored/index.ts +2 -2
  42. package/.pi/extensions/lib/harness-subagents/vendored/output-file.ts +1 -1
  43. package/.pi/extensions/lib/harness-subagents/vendored/schedule.ts +1 -1
  44. package/.pi/extensions/lib/harness-subagents/vendored/settings.ts +1 -1
  45. package/.pi/extensions/lib/harness-subagents/vendored/skill-loader.ts +1 -1
  46. package/.pi/extensions/lib/harness-subagents/vendored/types.ts +2 -2
  47. package/.pi/extensions/lib/harness-subagents/vendored/ui/agent-widget.ts +1 -1
  48. package/.pi/extensions/lib/harness-subagents/vendored/ui/conversation-viewer.ts +2 -2
  49. package/.pi/extensions/lib/harness-subagents/vendored/ui/schedule-menu.ts +1 -1
  50. package/.pi/extensions/observation-bus.ts +1 -1
  51. package/.pi/extensions/pi-model-router-harness.ts +1 -1
  52. package/.pi/extensions/policy-gate.ts +86 -16
  53. package/.pi/extensions/provider-payload-sanitize.ts +1 -1
  54. package/.pi/extensions/review-integrity.ts +76 -22
  55. package/.pi/extensions/sentrux-rules-sync.ts +1 -1
  56. package/.pi/extensions/soundboard.ts +1 -1
  57. package/.pi/extensions/test-diff-integrity.ts +1 -1
  58. package/.pi/extensions/trace-recorder.ts +1 -1
  59. package/.pi/extensions/ultimate-pi-vcc.ts +1 -1
  60. package/.pi/harness/agents.manifest.json +16 -12
  61. package/.pi/harness/docs/adrs/0031-harness-run-context.md +5 -2
  62. package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +37 -0
  63. package/.pi/harness/docs/adrs/README.md +1 -0
  64. package/.pi/harness/specs/harness-spawn-context.schema.json +65 -0
  65. package/.pi/lib/harness-agent-output.ts +41 -0
  66. package/.pi/lib/harness-run-context.ts +352 -7
  67. package/.pi/lib/harness-ui-state.ts +1 -1
  68. package/.pi/prompts/harness-auto.md +36 -61
  69. package/.pi/prompts/harness-critic.md +15 -28
  70. package/.pi/prompts/harness-eval.md +19 -27
  71. package/.pi/prompts/harness-incident.md +15 -34
  72. package/.pi/prompts/harness-plan.md +31 -50
  73. package/.pi/prompts/harness-review.md +16 -30
  74. package/.pi/prompts/harness-router-tune.md +16 -38
  75. package/.pi/prompts/harness-run.md +21 -38
  76. package/.pi/prompts/harness-setup.md +2 -0
  77. package/.pi/prompts/harness-trace.md +13 -30
  78. package/.pi/scripts/harness-generate-model-router.mjs +16 -13
  79. package/.pi/scripts/harness-verify.mjs +16 -0
  80. package/.pi/scripts/vendor-sync-pi-model-router.sh +10 -10
  81. package/CHANGELOG.md +19 -1
  82. package/README.md +4 -5
  83. package/THIRD_PARTY_NOTICES.md +1 -1
  84. package/package.json +13 -8
  85. package/vendor/pi-model-router/UPSTREAM_PIN.md +1 -1
  86. package/vendor/pi-model-router/extensions/commands.ts +2 -2
  87. package/vendor/pi-model-router/extensions/config.ts +2 -2
  88. package/vendor/pi-model-router/extensions/index.ts +1 -1
  89. package/vendor/pi-model-router/extensions/provider.ts +2 -2
  90. package/vendor/pi-model-router/extensions/routing.ts +2 -2
  91. package/vendor/pi-model-router/extensions/types.ts +1 -1
  92. package/vendor/pi-model-router/extensions/ui.ts +1 -1
  93. package/vendor/pi-model-router/package.json +4 -4
  94. package/vendor/pi-vcc/index.ts +1 -1
  95. package/vendor/pi-vcc/package.json +1 -1
  96. package/vendor/pi-vcc/src/commands/pi-vcc.ts +1 -1
  97. package/vendor/pi-vcc/src/commands/vcc-recall.ts +1 -1
  98. package/vendor/pi-vcc/src/core/content.ts +1 -1
  99. package/vendor/pi-vcc/src/core/load-messages.ts +1 -1
  100. package/vendor/pi-vcc/src/core/normalize.ts +1 -1
  101. package/vendor/pi-vcc/src/core/render-entries.ts +1 -1
  102. package/vendor/pi-vcc/src/core/report.ts +1 -1
  103. package/vendor/pi-vcc/src/core/search-entries.ts +1 -1
  104. package/vendor/pi-vcc/src/core/summarize.ts +1 -1
  105. package/vendor/pi-vcc/src/hooks/before-compact.ts +2 -2
  106. package/vendor/pi-vcc/src/tools/recall.ts +1 -1
  107. package/vendor/pi-vcc/src/types.ts +1 -1
  108. package/vendor/pi-vcc/tests/fixtures.ts +1 -1
  109. package/vendor/pi-vcc/tests/render-entries.test.ts +1 -1
  110. package/vendor/pi-vcc/tests/search-entries.test.ts +1 -1
  111. package/vendor/pi-vcc/tests/support/load-session.ts +2 -2
@@ -5,49 +5,30 @@ argument-hint: "--trigger <reason> [--run <run-id>] [--severity low|med|high|cri
5
5
 
6
6
  # harness-incident
7
7
 
8
- Create a structured incident record for blocked or failed harness runs.
8
+ Orchestrator spawn `harness/incident-recorder`; parent writes incident file.
9
9
 
10
10
  ## Step 0 — Parse arguments
11
11
 
12
- Read `$ARGUMENTS` and parse:
13
-
14
12
  - required: `--trigger <reason>`
15
- - optional: `--run <run-id>` (recovery only), `--severity low|med|high|critical`
16
-
17
- If `--trigger` is missing, stop and return:
18
-
19
- `Usage: /harness-incident --trigger <reason> [--run <run-id>] [--severity low|med|high|critical]`
20
-
21
- Use active run when `--run` is omitted.
22
-
23
- ## Process
24
-
25
- 1. Gather run context, trigger reason, and severity context.
26
- 2. Build `IncidentRecord` with blast radius, mitigation, rollback, and override metadata.
27
- 3. Validate incident output contract before finalizing.
28
-
29
- ## Requirements
13
+ - optional: `--run <run-id>`, `--severity low|med|high|critical`
30
14
 
31
- - Emit `IncidentRecord` matching `.pi/harness/specs/incident-record.schema.json`.
32
- - Capture blast radius, mitigation, rollback refs, and postmortem requirement.
33
- - If a policy block is overridden, record single-human approver and explicit justification.
15
+ If `--trigger` missing:
34
16
 
35
- ## Guardrails
17
+ `Usage: /harness-incident --trigger <reason> [--run <run-id>] [--severity …]`
36
18
 
37
- - Do not overthink incident narrative; prioritize factual, auditable records.
38
- - Only record details supported by available run artifacts and explicit inputs.
39
- - Never omit override approver identity or justification when override occurred.
19
+ ## Orchestration (required)
40
20
 
41
- ## Output
21
+ 1. Build `HarnessSpawnContext` with `mode: incident`, trigger, severity, run paths.
22
+ 2. Spawn:
42
23
 
43
- - Incident summary.
44
- - Structured `IncidentRecord` JSON.
45
- - Immediate rollback decision trail.
24
+ ```
25
+ Agent({ subagent_type: "harness/incident-recorder", prompt: "…" })
26
+ ```
46
27
 
47
- ## Completion behavior
28
+ 3. `get_subagent_result` — validate `IncidentRecord` draft; parent writes under `.pi/harness/incidents/`.
48
29
 
49
- Finish with:
30
+ ## Completion
50
31
 
51
- - `incident_status` (`recorded` or `needs_input`)
52
- - rollback action (`execute_now` or `standby`)
53
- - postmortem requirement (`true`/`false`)
32
+ - `incident_status`: `recorded` or `needs_input`
33
+ - `rollback_action`: `execute_now` or `standby`
34
+ - `postmortem_required`: true/false
@@ -5,75 +5,56 @@ argument-hint: "\"<task>\" [--risk low|med|high] [--budget <amount>] [--quick]"
5
5
 
6
6
  # harness-plan
7
7
 
8
- Create a machine-readable plan packet before execution.
8
+ Orchestrator only spawn `harness/planner`, present draft, run `ask_user`, write plan after Approve. Do **not** plan inline in this session.
9
9
 
10
10
  ## Step 0 — Parse arguments
11
11
 
12
- Read `$ARGUMENTS` and parse:
12
+ Read `$ARGUMENTS`:
13
13
 
14
14
  - task statement (required)
15
- - optional flags: `--risk low|med|high`, `--budget <amount>`, `--quick`
15
+ - optional: `--risk low|med|high`, `--budget <amount>`, `--quick`
16
16
 
17
- If task is missing, stop and return:
17
+ If task is missing:
18
18
 
19
19
  `Usage: /harness-plan "<task>" [--risk low|med|high] [--budget <amount>] [--quick]`
20
20
 
21
- Do **not** require or accept `--plan` on this command.
21
+ `--quick` narrows planning breadth only it does **not** skip user approval.
22
22
 
23
23
  ## Active plan context
24
24
 
25
- If `[HarnessActivePlan]` is present in context:
25
+ If `[HarnessActivePlan]` is present:
26
26
 
27
- - Read the current PlanPacket from the injected `plan_packet_path` first.
28
- - Treat the user task as **revise/amend** of that packet (not a greenfield plan), unless `/harness-new-run` was used.
29
- - After drift replan or post-abort, update the same canonical file.
27
+ - Read current packet from `plan_packet_path` first.
28
+ - Treat task as **revise/amend** unless `/harness-new-run` was used.
29
+ - Pass `mode: revise` in spawn context.
30
30
 
31
- If no prior plan file exists, create PlanPacket at the canonical path from `[HarnessRunContext]`.
31
+ Otherwise use canonical path from `[HarnessRunContext]` for greenfield `mode: create`.
32
32
 
33
- ## Process
33
+ ## Orchestration (required)
34
34
 
35
- 1. Parse the requested task and extract concrete scope and constraints.
36
- 2. If ambiguity blocks safe execution planning, call `ask_user` (harness-decisions skill). Stop with `needs_clarification` if the user cancels.
37
- 3. Build a `PlanPacket` that is valid against `.pi/harness/specs/plan-packet.schema.json`.
38
- 4. **Write** the PlanPacket JSON to the canonical `plan_packet_path` before completing.
39
- 5. Include rollback artifacts in all required forms.
35
+ 1. Build `HarnessSpawnContext` JSON (`.pi/harness/specs/harness-spawn-context.schema.json`) from injected run/plan context: `run_id`, `plan_packet_path`, `task_summary`, `risk_level`, `quick`, `mode`.
36
+ 2. Spawn with **`inherit_context: false`**:
40
37
 
41
- ## Hard requirements
38
+ ```
39
+ Agent({ subagent_type: "harness/planner", prompt: "<task + HarnessSpawnContext JSON + output schema>" })
40
+ ```
42
41
 
43
- - Do not run mutating tools in this command.
44
- - If task scope is ambiguous, call `ask_user` do not guess or use prose-only clarification.
45
- - Produce a `PlanPacket` matching `.pi/harness/specs/plan-packet.schema.json`.
46
- - Include rollback artifacts in all three forms:
47
- - revert command
48
- - prepared revert branch name
49
- - patch bundle path
50
- - Set risk level to `high` if uncertainty, broad blast radius, or policy-sensitive surfaces are involved.
51
- - Do **not** embed `plan_id=` in the user prompt for policy sync — the extension sets `approvedPlan` from the written file.
42
+ 3. `get_subagent_result` parse final JSON (`status`, `plan_packet`, `human_summary`, `clarification`) via fenced `json` block.
43
+ 4. If `needs_clarification`, call `ask_user` (harness-decisions) with planner `clarification.options`, then re-spawn with answers.
44
+ 5. Present **full** human-readable plan in chat (scope, assumptions, acceptance_checks, rollback_plan, risk_level).
45
+ 6. Call `ask_user`: **Approve** / **Request changes** / **Cancel** (harness-decisions). **Do not write** until Approve.
46
+ 7. On **Request changes**, re-spawn planner with `mode: revise` and user feedback — do not write file.
47
+ 8. **Only after Approve** — write `PlanPacket` JSON to canonical `plan_packet_path`.
52
48
 
53
- ## Guardrails
49
+ ## Parent rules
54
50
 
55
- - Do not overthink straightforward planning requests.
56
- - Only plan the requested scope; do not execute or widen implementation.
57
- - Never speculate about code or configuration that was not read.
51
+ - Do not mutate project source files — only `plan-packet.json` after approval.
52
+ - Validate draft against `.pi/harness/specs/plan-packet.schema.json` before `ask_user` Approve.
53
+ - Do not embed `plan_id=` in prompts for policy sync.
58
54
 
59
- ## Output contract
55
+ ## Completion
60
56
 
61
- Return:
62
-
63
- 1. Human-readable plan summary:
64
- - scope
65
- - assumptions
66
- - acceptance checks
67
- - rollback plan
68
- 2. Confirmation that PlanPacket was written to the canonical path.
69
-
70
- Do not proceed to execution from this command.
71
-
72
- ## Completion behavior
73
-
74
- Always end with:
75
-
76
- - one-line `plan_status` (`ready` or `needs_clarification`)
77
- - the final `risk_level` used
78
- - explicit `next_command` recommendation: `/harness-run` when `ready` (never `/harness-run --plan …`)
79
- - if `needs_clarification`, tell the user they may reply in plain language or run `/harness-plan` again with updates
57
+ - `plan_status`: `ready` or `needs_clarification`
58
+ - `risk_level` used
59
+ - `next_command`: `/harness-run` when `ready` (never `/harness-run --plan …`)
60
+ - If `needs_clarification`, user may reply in chat or re-run `/harness-plan`
@@ -5,47 +5,33 @@ argument-hint: "[--run <run-id>] [--trace <trace-ref>]"
5
5
 
6
6
  # harness-review
7
7
 
8
- Produce an independent evaluator verdict.
8
+ Orchestrator spawn `harness/evaluator` with `mode: verdict`.
9
9
 
10
10
  ## Step 0 — Parse arguments
11
11
 
12
- Read `$ARGUMENTS` and parse:
13
-
14
12
  - optional: `--run <run-id>` (recovery only)
15
13
  - optional: `--trace <trace-ref>`
16
14
 
17
- On the happy path, **omit `--run`**. Use active run context from `[HarnessRunContext]`.
18
- Run in a **new Pi session** after execute when possible.
19
-
20
- ## Process
21
-
22
- 1. Reconstruct expected outcomes from plan and run artifacts.
23
- 2. Independently verify checks and regression guards.
24
- 3. Emit `EvalVerdict` output for policy gate consumption.
25
-
26
- ## Requirements
15
+ Happy path: omit `--run`; use `[HarnessRunContext]`.
27
16
 
28
- - Treat executor output as untrusted.
29
- - Do not self-review with executor-private scratch context.
30
- - Emit `EvalVerdict` contract matching `.pi/harness/specs/eval-verdict.schema.json`.
31
- - Provide reproducible failed checks and regression flags.
17
+ ## Orchestration (required)
32
18
 
33
- ## Guardrails
19
+ 1. Build `HarnessSpawnContext` with `mode: verdict`, `plan_packet_path`, `run_dir`, trace refs.
20
+ 2. Spawn:
34
21
 
35
- - Do not overthink straightforward pass/fail evidence.
36
- - Only evaluate requested run artifacts and gates.
37
- - Never speculate about checks that were not executed.
22
+ ```
23
+ Agent({ subagent_type: "harness/evaluator", prompt: "Treat executor output as untrusted. …" })
24
+ ```
38
25
 
39
- ## Output
26
+ 3. `get_subagent_result` — parse `EvalVerdict` JSON; parent writes under run dir for policy gate.
40
27
 
41
- - Human-readable findings.
42
- - Structured `EvalVerdict` JSON.
43
- - Recommended action: `proceed_to_adversary`, `replan`, or `rollback`.
28
+ ## Parent rules
44
29
 
45
- ## Completion behavior
30
+ - Do not run review checks inline in this session.
31
+ - No new Pi session required.
46
32
 
47
- Always finish with:
33
+ ## Completion
48
34
 
49
- - `eval_status` (`pass`, `conditional_pass`, `fail`)
50
- - `recommended_action`
51
- - short evidence list that maps each failed check to a reproducible reference
35
+ - `eval_status`: `pass`, `conditional_pass`, or `fail`
36
+ - `recommended_action`: `proceed_to_adversary`, `replan`, or `rollback`
37
+ - Evidence list for each failed check
@@ -5,32 +5,27 @@ argument-hint: "--evidence <evidence.json> --candidate <candidate-router.json> [
5
5
 
6
6
  # harness-router-tune
7
7
 
8
- Router tuning is **propose-and-approve only**.
8
+ Orchestrator scripts + `harness/meta-optimizer` spawn. **Never** write `.pi/model-router.json` directly.
9
9
 
10
10
  ## Step 0 — Parse arguments
11
11
 
12
- Read `$ARGUMENTS` and parse:
13
-
14
12
  - required: `--evidence <evidence.json>`, `--candidate <candidate-router.json>`
15
13
  - optional: `--proposal <out.json>`
16
14
 
17
- If required args are missing, stop and return:
18
-
19
- `Usage: /harness-router-tune --evidence <evidence.json> --candidate <candidate-router.json> [--proposal <out.json>]`
20
-
21
- ## Process
15
+ If missing required args:
22
16
 
23
- 1. Validate evidence completeness and guard status. Evidence may live under `.pi/harness/runs/<run_id>/` for the active harness run when produced by `/harness-eval` (resolve via active run context or explicit paths — no run id required on the happy path).
24
- 2. Generate a proposal artifact only (no live router mutation).
25
- 3. Require explicit human approval metadata before any apply step.
17
+ `Usage: /harness-router-tune --evidence <path> --candidate <path> [--proposal <out.json>]`
26
18
 
27
- ## Never-do rule
19
+ ## Orchestration (required)
28
20
 
29
- - Never write `.pi/model-router.json` directly from this command.
21
+ 1. Parent validates evidence paths exist.
22
+ 2. Optionally spawn:
30
23
 
31
- ## Proposal flow
24
+ ```
25
+ Agent({ subagent_type: "harness/meta-optimizer", prompt: "mode: tune, evidence paths…" })
26
+ ```
32
27
 
33
- 1. Build proposal:
28
+ 3. Parent runs proposal script:
34
29
 
35
30
  ```bash
36
31
  node .pi/harness/router/propose-router-tuning.mjs \
@@ -39,8 +34,8 @@ node .pi/harness/router/propose-router-tuning.mjs \
39
34
  --proposal-out .pi/harness/router/proposals/<id>.json
40
35
  ```
41
36
 
42
- 2. Call `ask_user` to approve / reject / request edits before apply (harness-decisions skill).
43
- 3. Apply only after approval, with explicit approver + justification:
37
+ 4. `ask_user` approve / reject / edit (harness-decisions).
38
+ 5. Apply only after approval:
44
39
 
45
40
  ```bash
46
41
  node .pi/harness/router/apply-router-proposal.mjs \
@@ -50,25 +45,8 @@ node .pi/harness/router/apply-router-proposal.mjs \
50
45
  --write
51
46
  ```
52
47
 
53
- ## Evidence requirements
54
-
55
- - Minimum sample count threshold met.
56
- - Pre/post success-rate delta included.
57
- - Cost-per-task delta included.
58
- - Regression guard status present and passing.
59
-
60
- If any requirement is missing, stop with `human_required`.
61
-
62
- ## Guardrails
63
-
64
- - Do not overthink weak evidence; reject incomplete proposals quickly.
65
- - Only produce proposal/apply instructions within this contract.
66
- - Never apply tuning without explicit human approver identity and justification.
67
-
68
- ## Completion behavior
69
-
70
- End with:
48
+ ## Completion
71
49
 
72
- - `tuning_status` (`proposed`, `human_required`, or `rejected`)
73
- - evidence gate summary (sample count, success delta, cost delta, regression guard)
74
- - explicit non-mutation confirmation for `.pi/model-router.json`
50
+ - `tuning_status`: `proposed`, `human_required`, or `rejected`
51
+ - Evidence gate summary
52
+ - Confirm `.pi/model-router.json` was not mutated without apply script
@@ -5,56 +5,39 @@ argument-hint: "[--budget <amount>]"
5
5
 
6
6
  # harness-run
7
7
 
8
- Execute implementation only after an approved plan exists in active run context.
8
+ Orchestrator only spawn `harness/executor`. Do **not** implement inline.
9
9
 
10
10
  ## Step 0 — Parse arguments
11
11
 
12
- Read `$ARGUMENTS` and parse:
13
-
14
12
  - optional: `--budget <amount>`
13
+ - Do **not** use `--plan` on happy path — load from `[HarnessActivePlan]` / `plan_packet_path`.
15
14
 
16
- Do **not** parse `--plan` on the happy path. Load the PlanPacket from `[HarnessActivePlan]` / injected `plan_packet_path` only.
17
-
18
- If the extension reports plan not ready, stop and return:
15
+ If plan not ready:
19
16
 
20
17
  `Run /harness-plan first — no approved plan in active run context.`
21
18
 
22
- Advanced recovery only: `--plan <path>` must live under the active run directory (extension validates).
23
-
24
- ## Process
25
-
26
- 1. Load PlanPacket from the injected canonical path and confirm it is valid.
27
- 2. Execute only within approved scope.
28
- 3. Run focused validations mapped to approved acceptance checks.
29
- 4. Produce rollback artifacts and handoff references for downstream gates.
30
-
31
- ## Gate behavior
32
-
33
- - Refuse execution if active plan is not ready (extension blocks before the agent runs).
34
- - Keep edits strictly within approved scope.
35
- - If scope drift appears, stop and return to `harness-plan`.
36
- - For **implementation forks** inside approved scope, call `ask_user` with 2–4 options. For plan-level ambiguity, stop and return to `harness-plan`.
37
- - Record evaluator/adversary prerequisites for downstream gates.
38
- - Always prepare rollback artifacts as part of execution output.
19
+ ## Orchestration (required)
39
20
 
40
- ## Guardrails
21
+ 1. Confirm `[HarnessActivePlan]` / extension reports plan ready.
22
+ 2. Build `HarnessSpawnContext` with `mode: execute`, `plan_packet_path`, `run_dir`, `acceptance_checks` from plan file.
23
+ 3. Spawn:
41
24
 
42
- - Do not overthink straightforward approved changes; execute the approved scope directly.
43
- - Only modify files and behaviors covered by the approved `PlanPacket`.
44
- - Never speculate about successful validation without runnable evidence.
25
+ ```
26
+ Agent({ subagent_type: "harness/executor", prompt: "<HarnessSpawnContext + handoff>" })
27
+ ```
45
28
 
46
- ## Output
29
+ 4. `get_subagent_result` — parse executor JSON (`execution_status`, validations, rollback refs).
30
+ 5. Parent persists trace/handoff artifacts under run dir if needed; do not self-review.
47
31
 
48
- - Implementation summary scoped to approved plan.
49
- - Files changed and why.
50
- - Targeted validations run.
51
- - Trace pointers and rollback references.
32
+ ## Parent rules
52
33
 
53
- ## Completion behavior
34
+ - Refuse if plan not approved.
35
+ - On `scope_drift`, stop and recommend `/harness-plan`.
36
+ - Do not call `ask_user` for plan-level ambiguity — return to plan command.
54
37
 
55
- End with:
38
+ ## Completion
56
39
 
57
- 1. `execution_status` (`completed`, `blocked`, or `scope_drift`).
58
- 2. `validation_summary` (pass/fail with command evidence).
59
- 3. `handoff_ready` booleans for evaluator/adversary prerequisites.
60
- 4. `next_command`: **New Pi session `/harness-eval`** when execution completed successfully.
40
+ - `execution_status`: `completed`, `blocked`, or `scope_drift`
41
+ - `validation_summary` with command evidence
42
+ - `handoff_ready` for evaluator/adversary
43
+ - `next_command`: `/harness-eval` (same session spawn isolated review agents; no new Pi session)
@@ -385,6 +385,8 @@ Manual override: **`/router profile auto`** anytime after reload if they changed
385
385
 
386
386
  `harness-subagents` loads agents from the installed **`ultimate-pi`** package (`$UP_PKG/.pi/agents/**`) with namespaced ids (`harness/planner`, `pi-pi/agent-expert`). **Do not copy** agents into the project unless you want a deliberate override.
387
387
 
388
+ **Slash commands are orchestrators:** `/harness-plan`, `/harness-run`, etc. spawn `harness/*` agents via the `Agent` tool — bootstrap stays **script-first**; only optionally spawn `harness/sentrux-bootstrap` for Sentrux (see Step 4.2).
389
+
388
390
  Optional per-repo overrides: place `.md` files at the **same relative path** (e.g. `.pi/agents/harness/planner.md` overrides the package planner).
389
391
 
390
392
  Verify manifest drift after `pi update ultimate-pi`:
@@ -5,45 +5,28 @@ argument-hint: "[--run <run-id>] [--phase plan|execute|evaluate|adversary|merge]
5
5
 
6
6
  # harness-trace
7
7
 
8
- Retrieve and summarize trace artifacts for a run.
8
+ Orchestrator spawn `harness/trace-librarian`.
9
9
 
10
10
  ## Step 0 — Parse arguments
11
11
 
12
- Read `$ARGUMENTS` and parse:
13
-
14
12
  - optional: `--run <run-id>` (recovery only)
15
13
  - optional: `--phase plan|execute|evaluate|adversary|merge`
16
14
 
17
- On the happy path, **omit `--run`**. Phase traces live at `trace-<phase>.json` under the active run directory.
18
-
19
- ## Process
20
-
21
- 1. Collect run artifacts from canonical harness locations and provided trace refs.
22
- 2. Build phase timeline with policy and gate decision points.
23
- 3. Report completeness gaps against strict gate artifact requirements.
24
-
25
- ## Requirements
26
-
27
- - Use `.pi/harness/runs/` and external trace references as source of truth pointers.
28
- - Include phase timeline, artifact refs, and policy decisions.
29
- - Highlight missing artifacts that violate strict gate requirements.
30
-
31
- ## Guardrails
15
+ Happy path: omit `--run`.
32
16
 
33
- - Do not overthink simple trace lookups; prioritize completeness and stable references.
34
- - Only report artifacts for the requested run and optional phase filter.
35
- - Never infer artifact existence without verifying source pointers.
17
+ ## Orchestration (required)
36
18
 
37
- ## Output
19
+ 1. Build `HarnessSpawnContext` with `mode: trace`, `run_dir`, optional phase filter.
20
+ 2. Spawn:
38
21
 
39
- - Replay-ready timeline summary.
40
- - Artifact index (`plan`, `run`, `eval`, `adversary`, `consensus`, `incident`, `rollback`).
41
- - Any integrity or completeness gaps.
22
+ ```
23
+ Agent({ subagent_type: "harness/trace-librarian", prompt: "…" })
24
+ ```
42
25
 
43
- ## Completion behavior
26
+ 3. `get_subagent_result` — present timeline and artifact index to user.
44
27
 
45
- Always end with:
28
+ ## Completion
46
29
 
47
- - `trace_completeness` (`complete` or `incomplete`)
48
- - missing artifact checklist (if any)
49
- - most likely next command (`/harness-incident`, `/harness-review`, or `/harness-critic`)
30
+ - `trace_completeness`: `complete` or `incomplete`
31
+ - Missing artifact checklist
32
+ - `next_command` hint (`/harness-incident`, `/harness-review`, or `/harness-critic`)
@@ -3,13 +3,13 @@
3
3
  * Generate `.pi/model-router.json` from Pi's authenticated providers (auth.json + env),
4
4
  * not from raw env-var heuristics alone.
5
5
  *
6
- * Uses @mariozechner/pi-coding-agent ModelRegistry.getAvailable() — same source as /login.
6
+ * Uses @earendil-works/pi-coding-agent ModelRegistry.getAvailable() — same source as /login.
7
7
  *
8
8
  * Usage: node harness-generate-model-router.mjs [--force] [--dry-run]
9
9
  * --force overwrite existing .pi/model-router.json
10
10
  * --dry-run print JSON to stdout, do not write
11
11
  *
12
- * Requires @mariozechner/pi-coding-agent (peer of ultimate-pi; bundled with pi).
12
+ * Requires @earendil-works/pi-coding-agent (peer of ultimate-pi; bundled with pi).
13
13
  */
14
14
 
15
15
  import { existsSync, mkdirSync, writeFileSync } from "node:fs";
@@ -66,26 +66,29 @@ function fail(msg) {
66
66
  }
67
67
 
68
68
  async function loadPiCodingAgent() {
69
- const agentRoots = [
70
- join(UP_PKG, "node_modules", "@mariozechner", "pi-coding-agent"),
71
- join(UP_PKG, ".pi", "npm", "node_modules", "@mariozechner", "pi-coding-agent"),
72
- ];
69
+ const scopes = ["@earendil-works", "@mariozechner"];
70
+ const agentRoots = scopes.flatMap((scope) => [
71
+ join(UP_PKG, "node_modules", scope, "pi-coding-agent"),
72
+ join(UP_PKG, ".pi", "npm", "node_modules", scope, "pi-coding-agent"),
73
+ ]);
73
74
  for (const root of agentRoots) {
74
75
  const entry = join(root, "dist", "index.js");
75
76
  if (existsSync(entry)) {
76
77
  return import(pathToFileURL(entry).href);
77
78
  }
78
79
  }
79
- for (const base of [UP_PKG, process.cwd()]) {
80
- try {
81
- const req = createRequire(join(base, "package.json"));
82
- return req("@mariozechner/pi-coding-agent");
83
- } catch {
84
- /* try next */
80
+ for (const spec of ["@earendil-works/pi-coding-agent", "@mariozechner/pi-coding-agent"]) {
81
+ for (const base of [UP_PKG, process.cwd()]) {
82
+ try {
83
+ const req = createRequire(join(base, "package.json"));
84
+ return req(spec);
85
+ } catch {
86
+ /* try next */
87
+ }
85
88
  }
86
89
  }
87
90
  fail(
88
- "@mariozechner/pi-coding-agent not found (install pi or npm i in ultimate-pi). Peer: @mariozechner/pi-coding-agent",
91
+ "@earendil-works/pi-coding-agent not found (install pi or npm i in ultimate-pi). Peer: @earendil-works/pi-coding-agent",
89
92
  );
90
93
  }
91
94
 
@@ -21,6 +21,7 @@ const REQUIRED_SCHEMAS = [
21
21
  "observation.schema.json",
22
22
  "run-trace.schema.json",
23
23
  "eval-verdict.schema.json",
24
+ "harness-spawn-context.schema.json",
24
25
  ];
25
26
 
26
27
  const REQUIRED_ADRS = [
@@ -34,6 +35,7 @@ const REQUIRED_ADRS = [
34
35
  "0008-harness-posthog-telemetry.md",
35
36
  "0009-sentrux-rules-lifecycle.md",
36
37
  "0031-harness-run-context.md",
38
+ "0032-harness-command-orchestration.md",
37
39
  ];
38
40
 
39
41
  const REQUIRED_EXTENSIONS = [
@@ -199,6 +201,20 @@ async function main() {
199
201
  if (!(await fileExists(runCtxLib))) fail("missing lib/harness-run-context.ts");
200
202
  ok("lib/harness-run-context.ts");
201
203
 
204
+ const policyGateSrc = await readFile(
205
+ join(ROOT, ".pi", "extensions", "policy-gate.ts"),
206
+ "utf-8",
207
+ );
208
+ if (!policyGateSrc.includes("isPlanPhaseAllowedMutation")) {
209
+ fail(
210
+ "policy-gate.ts must use isPlanPhaseAllowedMutation (plan-phase scoped writes)",
211
+ );
212
+ }
213
+ if (!policyGateSrc.includes('pi.on("tool_call", async (event, ctx)')) {
214
+ fail("policy-gate tool_call must receive ctx for run context");
215
+ }
216
+ ok("policy-gate plan-phase writes");
217
+
202
218
  const runCtxFixture = join(SMOKE, "run-context.fixture.json");
203
219
  if (!(await fileExists(runCtxFixture))) {
204
220
  fail("missing run-context.fixture.json");
@@ -11,19 +11,19 @@ rm -rf "$VEND/.git"
11
11
 
12
12
  for f in "$VEND"/extensions/*.ts; do
13
13
  sed -i \
14
- -e "s|'@earendil-works/pi-agent-core'|'@mariozechner/pi-agent-core'|g" \
15
- -e "s|'@earendil-works/pi-ai'|'@mariozechner/pi-ai'|g" \
16
- -e "s|'@earendil-works/pi-coding-agent'|'@mariozechner/pi-coding-agent'|g" \
17
- -e "s|'@earendil-works/pi-tui'|'@mariozechner/pi-tui'|g" \
14
+ -e "s|'@earendil-works/pi-agent-core'|'@earendil-works/pi-agent-core'|g" \
15
+ -e "s|'@earendil-works/pi-ai'|'@earendil-works/pi-ai'|g" \
16
+ -e "s|'@earendil-works/pi-coding-agent'|'@earendil-works/pi-coding-agent'|g" \
17
+ -e "s|'@earendil-works/pi-tui'|'@earendil-works/pi-tui'|g" \
18
18
  "$f"
19
19
  done
20
20
 
21
- # Align package.json peers with @mariozechner (upstream lists @earendil-works)
21
+ # Align package.json peers with @earendil-works (upstream lists @earendil-works)
22
22
  sed -i \
23
- -e 's|"@earendil-works/pi-agent-core"|"@mariozechner/pi-agent-core"|g' \
24
- -e 's|"@earendil-works/pi-ai"|"@mariozechner/pi-ai"|g' \
25
- -e 's|"@earendil-works/pi-coding-agent"|"@mariozechner/pi-coding-agent"|g' \
26
- -e 's|"@earendil-works/pi-tui"|"@mariozechner/pi-tui"|g' \
23
+ -e 's|"@earendil-works/pi-agent-core"|"@earendil-works/pi-agent-core"|g' \
24
+ -e 's|"@earendil-works/pi-ai"|"@earendil-works/pi-ai"|g' \
25
+ -e 's|"@earendil-works/pi-coding-agent"|"@earendil-works/pi-coding-agent"|g' \
26
+ -e 's|"@earendil-works/pi-tui"|"@earendil-works/pi-tui"|g' \
27
27
  "$VEND/package.json"
28
28
 
29
29
  python3 -c "
@@ -40,7 +40,7 @@ cat >"$VEND/UPSTREAM_PIN.md" <<EOF
40
40
  - **Repository:** https://github.com/yeliu84/pi-model-router
41
41
  - **License:** MIT (\`LICENSE\` in this tree)
42
42
  - **Pinned upstream commit:** \`$COMMIT\`
43
- - **Local changes:** \`extensions/*.ts\` imports use \`@mariozechner/*\` and relative paths end in \`.js\` for TypeScript nodenext.
43
+ - **Local changes:** \`extensions/*.ts\` imports use \`@earendil-works/*\` and relative paths end in \`.js\` for TypeScript nodenext.
44
44
  EOF
45
45
 
46
46
  rm -f "$VEND/package-lock.json"
package/CHANGELOG.md CHANGED
@@ -4,6 +4,24 @@ All notable changes to this project are documented in this file.
4
4
 
5
5
  ## [Unreleased]
6
6
 
7
+ ## [v0.8.0] — 2026-05-17
8
+
9
+ ### ✨ Features
10
+
11
+ - **Harness command orchestration:** slash prompts spawn `harness/*` agents with required `HarnessSpawnContext`; subagent tool policy; L4 review via isolated subagents (not session fork); ADR 0032.
12
+
13
+ ### 🐛 Fixes
14
+
15
+ - **Policy gate / harness-plan:** allow scoped writes to `plan-packet.json` in plan phase after `ask_user` approval; block project source edits until execute; promote `/harness-auto` to execute phase mid-turn after approved plan write.
16
+
17
+ ### 📖 Documentation
18
+
19
+ - **Harness plan workflow:** present full plan → Approve via `ask_user` → persist packet; `--quick` does not skip approval (ADR 0031).
20
+
21
+ ### 🔧 Chores
22
+
23
+ - **pi-coding-agent:** migrate peer/dev deps to `@earendil-works/pi-coding-agent` 0.74.1.
24
+
7
25
  ## [v0.7.0] — 2026-05-17
8
26
 
9
27
  ### ✨ Features
@@ -110,7 +128,7 @@ All notable changes to this project are documented in this file.
110
128
 
111
129
  - Remove `npm:@yeliu84/pi-model-router` from package dependencies; add `THIRD_PARTY_NOTICES.md`
112
130
  - `harness-sync-model-router.mjs` adjusts Pi defaults only (no package toggling)
113
- - `check:ts` uses ES2023; devDependency on `@mariozechner/pi-ai`, `pi-tui`, `pi-agent-core` for vendored typecheck
131
+ - `check:ts` uses ES2023; devDependency on `@earendil-works/pi-ai`, `pi-tui`, `pi-agent-core` for vendored typecheck
114
132
 
115
133
  ### 🐛 Fixes
116
134