ultimate-pi 0.14.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/.agents/skills/harness-debate-plan/SKILL.md +41 -61
  2. package/.agents/skills/harness-governor/SKILL.md +11 -0
  3. package/.agents/skills/harness-orchestration/SKILL.md +5 -3
  4. package/.agents/skills/harness-plan/SKILL.md +11 -9
  5. package/.pi/agents/harness/adversary.md +1 -1
  6. package/.pi/agents/harness/evaluator.md +1 -1
  7. package/.pi/agents/harness/executor.md +1 -1
  8. package/.pi/agents/harness/incident-recorder.md +1 -1
  9. package/.pi/agents/harness/meta-optimizer.md +1 -1
  10. package/.pi/agents/harness/planning/decompose.md +8 -35
  11. package/.pi/agents/harness/planning/execution-plan-author.md +27 -15
  12. package/.pi/agents/harness/planning/hypothesis-validator.md +23 -6
  13. package/.pi/agents/harness/planning/hypothesis.md +4 -27
  14. package/.pi/agents/harness/planning/implementation-researcher.md +43 -0
  15. package/.pi/agents/harness/planning/plan-adversary.md +20 -5
  16. package/.pi/agents/harness/planning/plan-evaluator.md +28 -6
  17. package/.pi/agents/harness/planning/review-integrator.md +23 -10
  18. package/.pi/agents/harness/planning/scout-graphify.md +4 -23
  19. package/.pi/agents/harness/planning/scout-semantic.md +3 -18
  20. package/.pi/agents/harness/planning/scout-structure.md +3 -18
  21. package/.pi/agents/harness/planning/sprint-contract-auditor.md +22 -6
  22. package/.pi/agents/harness/planning/stack-researcher.md +21 -11
  23. package/.pi/agents/harness/tie-breaker.md +1 -1
  24. package/.pi/agents/harness/trace-librarian.md +1 -1
  25. package/.pi/extensions/budget-guard.ts +33 -19
  26. package/.pi/extensions/harness-debate-tools.ts +280 -19
  27. package/.pi/extensions/harness-live-widget.ts +39 -159
  28. package/.pi/extensions/harness-plan-approval.ts +47 -5
  29. package/.pi/extensions/harness-run-context.ts +96 -2
  30. package/.pi/extensions/harness-subagent-submit.ts +195 -0
  31. package/.pi/extensions/lib/debate-bus-core.ts +108 -17
  32. package/.pi/extensions/lib/debate-bus-state.ts +6 -0
  33. package/.pi/extensions/lib/harness-subagent-policy.ts +45 -0
  34. package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +82 -0
  35. package/.pi/extensions/lib/harness-subagent-submit-registry.ts +172 -0
  36. package/.pi/extensions/lib/harness-subagents-bridge.ts +42 -0
  37. package/.pi/extensions/lib/plan-approval/plan-review.ts +56 -0
  38. package/.pi/extensions/lib/plan-approval/types.ts +1 -0
  39. package/.pi/extensions/lib/plan-debate-eligibility.ts +214 -0
  40. package/.pi/extensions/lib/plan-debate-focus.ts +151 -0
  41. package/.pi/extensions/lib/plan-debate-gate.ts +88 -34
  42. package/.pi/extensions/lib/plan-debate-lane.ts +15 -0
  43. package/.pi/extensions/lib/plan-debate-lanes.ts +44 -0
  44. package/.pi/extensions/lib/plan-debate-round-status.ts +63 -20
  45. package/.pi/extensions/lib/plan-messenger.ts +93 -17
  46. package/.pi/extensions/policy-gate.ts +1 -1
  47. package/.pi/harness/README.md +1 -1
  48. package/.pi/harness/agents.manifest.json +25 -21
  49. package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +1 -3
  50. package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +13 -5
  51. package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +51 -0
  52. package/.pi/harness/docs/adrs/0037-subagent-submit-tools.md +31 -0
  53. package/.pi/harness/docs/adrs/0038-budget-telemetry-only.md +23 -0
  54. package/.pi/harness/docs/adrs/README.md +4 -0
  55. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/implementation-research.yaml +28 -0
  56. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r1.yaml +24 -0
  57. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r2.yaml +25 -0
  58. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-packet.yaml +196 -0
  59. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-review.md +14 -0
  60. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/research-brief.yaml +62 -0
  61. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/implementation-research.yaml +28 -0
  62. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r2.yaml +24 -0
  63. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r3.yaml +24 -0
  64. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +29 -0
  65. package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +97 -16
  66. package/.pi/harness/specs/harness-executor-handoff.schema.json +19 -0
  67. package/.pi/harness/specs/harness-human-required.schema.json +16 -0
  68. package/.pi/harness/specs/plan-implementation-research-brief.schema.json +128 -0
  69. package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
  70. package/.pi/harness/specs/plan-scout-findings.schema.json +19 -0
  71. package/.pi/harness/specs/round-result.schema.json +15 -2
  72. package/.pi/lib/harness-agent-output.ts +45 -0
  73. package/.pi/lib/harness-budget-enforce.ts +18 -0
  74. package/.pi/lib/harness-schema-validate.ts +89 -0
  75. package/.pi/lib/harness-spawn-parse.ts +86 -0
  76. package/.pi/lib/harness-subagent-submit-path.ts +41 -0
  77. package/.pi/lib/harness-ui-state.ts +107 -2
  78. package/.pi/prompts/harness-auto.md +2 -2
  79. package/.pi/prompts/harness-plan.md +94 -42
  80. package/.pi/prompts/harness-run.md +2 -2
  81. package/.pi/prompts/planning-rubrics.md +31 -0
  82. package/.pi/scripts/harness-verify.mjs +2 -0
  83. package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
  84. package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
  85. package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
  86. package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
  87. package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
  88. package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
  89. package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
  90. package/CHANGELOG.md +21 -0
  91. package/package.json +4 -2
  92. package/vendor/pi-subagents/src/subagents.ts +29 -3
@@ -1,13 +1,13 @@
1
1
  ---
2
- description: PM-grade harness plan — scouts, ExecutionPlan, DAG validation, Review Gate debate, approval.
3
- argument-hint: "\"<task>\" [--risk low|med|high] [--budget <amount>] [--quick]"
2
+ description: PM-grade harness plan — scouts, implementation research, ExecutionPlan, DAG validation, selective Review Gate debate, approval.
3
+ argument-hint: "\"<task>\" [--risk low|med|high] [--quick]"
4
4
  ---
5
5
 
6
6
  # harness-plan
7
7
 
8
8
  You are the **planning PM** for this harness run. Produce an execution baseline (`plan-packet.yaml` + `plan-review.md`), not strategy theater. Parent owns `ask_user`, `approve_plan`, `create_plan`, debate bus commands, and YAML writes under `.pi/harness/runs/<run_id>/`.
9
9
 
10
- Never `write`/`edit` the final canonical packet except via **`write_harness_yaml`** for run artifacts and **`create_plan`** after approval. Do not paste JSON into `.yaml` files subagents emit JSON; you convert via `write_harness_yaml`.
10
+ Subagents persist artifacts via scoped **`submit_*`** tools (deterministic YAML under the run dir). Parent uses **`harness_artifact_ready`** to gate phases (no JSON parsing). Parent merges still use **`write_harness_yaml`** for `research-brief.yaml`, `plan-packet.yaml` shell, and integrator patches only.
11
11
 
12
12
  ## Allowed subagents
13
13
 
@@ -16,6 +16,7 @@ Never `write`/`edit` the final canonical packet except via **`write_harness_yaml
16
16
  - `harness/planning/scout-semantic` (skip when `--quick`)
17
17
  - `harness/planning/decompose`
18
18
  - `harness/planning/hypothesis`
19
+ - `harness/planning/implementation-researcher`
19
20
  - `harness/planning/stack-researcher`
20
21
  - `harness/planning/execution-plan-author`
21
22
  - `harness/planning/hypothesis-validator` (debate R1 only)
@@ -31,15 +32,15 @@ Read **harness-debate-plan** skill before Review Gate rounds.
31
32
  1. Use `subagent` with `agentScope: "both"` and parallel `tasks` where lanes are independent.
32
33
  2. Each `subagent` call blocks until subprocesses finish — batch parallel scouts in one `tasks` array.
33
34
  3. Do **not** set `timeoutMs` unless the user explicitly requests a cap — subagents run until natural completion (optional backstop: `PI_SUBAGENT_TIMEOUT_MS`).
34
- 4. No harness subagent spawn cap — run the full scout + debate pipeline without skipping lanes for budget.
35
- 5. Compact task text: embed `HarnessSpawnContext` JSON + lane-specific instructions only.
35
+ 4. No harness subagent spawn cap — run the full scout + research + debate pipeline without skipping lanes for budget.
36
+ 5. Compact task text: embed spawn context + lane instructions. Prefer `HarnessSpawnContext={"run_id":"…","plan_packet_path":"…",…}` or a JSON object with `"HarnessSpawnContext":{…}` — both parse; `run_id` is required so subprocess submit tools get `HARNESS_RUN_ID`.
36
37
 
37
38
  ## Step 0 — Parse `$ARGUMENTS`
38
39
 
39
40
  - task (required)
40
- - `--risk low|med|high`, `--budget`, `--quick`
41
+ - `--risk low|med|high`, `--quick` (`--budget` is reserved/no-op; token budgets are telemetry-only unless `HARNESS_BUDGET_ENFORCE=1`)
41
42
 
42
- `--quick` skips **scout-semantic** and post-run adversary only — **never** skip graphify, structure, decompose, hypothesis, stack research, execution plan, DAG validation, or **4-round plan debate**.
43
+ `--quick` skips **scout-semantic** and post-run adversary only — **never** skip graphify, structure, decompose, hypothesis, **Phase 3.5 implementation research**, stack research, execution plan, DAG validation, or **Review Gate debate**.
43
44
 
44
45
  ## Active plan context
45
46
 
@@ -63,33 +64,50 @@ Do **not** run `ccc index` or `ccc search --refresh`. The harness runs increment
63
64
 
64
65
  Add `harness/planning/scout-semantic` to `tasks` unless `--quick`. Require graphify + structure success. Semantic lane uses `ccc search` only (see `scout-semantic` agent).
65
66
 
67
+ After scouts: `harness_artifact_ready({ paths: ["artifacts/scout-graphify.yaml", "artifacts/scout-structure.yaml", ...] })`.
68
+
66
69
  ## Phase 2 & 3 — Decompose + hypothesis (parallel)
67
70
 
68
- One `subagent` call with `tasks` for `harness/planning/decompose` and `harness/planning/hypothesis`. Parse `PlanDecompositionBrief` and `PlanHypothesisBrief` from outputs. Persist with `write_harness_yaml` `artifacts/decomposition.yaml` and `artifacts/hypothesis.yaml`.
71
+ One `subagent` call with `tasks` for `harness/planning/decompose` and `harness/planning/hypothesis` (include scout YAML paths in task text). Gate with `harness_artifact_ready` on `artifacts/decomposition.yaml` and `artifacts/hypothesis.yaml`.
69
72
 
70
- ## Phase 4 Draft shell + fork
73
+ Decompose **prior_art** is **internal only** (from scouts). External prior art arrives in Phase 3.5.
71
74
 
72
- Build draft `PlanPacket` (`contract_version: "1.1.0"`):
75
+ ## Phase 3.5 — External solution research (required)
73
76
 
74
- - `scope`, `assumptions`, `acceptance_checks`, `risk_level`, `rollback_plan`
75
- - `execution_plan` placeholder until Phase 4b
77
+ **MUST** run unless you document a `human_required` waiver in the run trace. Parallel batch:
78
+
79
+ ```json
80
+ {
81
+ "agentScope": "both",
82
+ "tasks": [
83
+ { "agent": "harness/planning/implementation-researcher", "task": "<HarnessSpawnContext + paths to decomposition/hypothesis/scout summaries — patterns/repos/workflows only; no stack version SERPs>" },
84
+ { "agent": "harness/planning/stack-researcher", "task": "<HarnessSpawnContext + stack research brief — libraries/APIs only>" }
85
+ ]
86
+ }
87
+ ```
76
88
 
77
- `ask_user` when `dialectical_fork` is material.
89
+ - Subagents write via `submit_implementation_research` / `submit_stack_brief`; gate with `harness_artifact_ready` on both paths.
90
+ - Merge both into `research-brief.yaml` (`implementation:` + `stack:`) via parent `write_harness_yaml`.
91
+ - **Partial failure:** if one lane fails, re-spawn that lane once; if still failing set `plan_status: partial` and `human_required` via `ask_user`. Do not proceed to Phase 4b without both artifacts or explicit human waiver.
92
+ - **Web dedup:** implementation owns patterns/repos; stack owns libraries/versions — no overlapping queries.
78
93
 
79
- Initialize `research-brief.yaml` with decomposition + hypothesis (`write_harness_yaml`).
94
+ On `mode: revise`: re-run implementation-researcher when task scope, acceptance_checks, or >30% work_items change; skip when delta is schedule-only and prior artifact is fresh.
80
95
 
81
- ## Phase 4aStack research
96
+ ## Phase 4Draft shell
82
97
 
83
- ```
84
- subagent({ agentScope: "both", agent: "harness/planning/stack-researcher", task: "<HarnessSpawnContext + stack research brief>" })
85
- ```
98
+ Build draft `PlanPacket` (`contract_version: "1.1.0"`):
99
+
100
+ - `scope`, `assumptions`, `acceptance_checks`, `risk_level`, `rollback_plan`
101
+ - `execution_plan` placeholder until Phase 4b
86
102
 
87
- `write_harness_yaml` `artifacts/stack.yaml`; merge into `research-brief.yaml` `stack`.
103
+ Initialize `research-brief.yaml` with decomposition + hypothesis + Phase 3.5 merges (`write_harness_yaml`).
104
+
105
+ **`ask_user` on material `dialectical_fork`** after Phase 3.5 merge (evidence-backed — conflicting external patterns may trigger `human_required` from eligibility).
88
106
 
89
107
  ## Phase 4b — Execution plan author
90
108
 
91
109
  ```
92
- subagent({ agentScope: "both", agent: "harness/planning/execution-plan-author", task: "<HarnessSpawnContext + execution plan brief>" })
110
+ subagent({ agentScope: "both", agent: "harness/planning/execution-plan-author", task: "<HarnessSpawnContext + PlanImplementationResearchBrief + PlanStackBrief + decomposition/hypothesis>" })
93
111
  ```
94
112
 
95
113
  Merge `execution_plan` into draft `plan-packet.yaml` (`write_harness_yaml`). Save `artifacts/execution-plan-draft.yaml` the same way.
@@ -102,37 +120,71 @@ node .pi/scripts/validate-plan-dag.mjs --packet .pi/harness/runs/<run_id>/plan-p
102
120
 
103
121
  Must **pass** before debate. On fail: fix via author or parent patches, re-run.
104
122
 
105
- ## Phase 5Review Gate debate (4 rounds, pi-messenger, even with `--quick`)
123
+ ## Phase 4dDebate eligibility (before Review Gate)
124
+
125
+ ```
126
+ harness_plan_debate_eligibility({ risk_level, material_fork, dag_pass: true, ... })
127
+ ```
128
+
129
+ Pre-debate signals only (no R1 hypothesis output). Default profile **standard** when ambiguous.
106
130
 
107
- 1. `harness_debate_open` (debate id normalized to `plan-<run_id>`; creates `debate-messenger/` inboxes + threads).
108
- 2. Optional: `harness_plan_scope_check` after decomposition — if `material_drift`, `ask_user` before continuing.
109
- 3. For rounds 1–4 (`debate_round_focus`: spec, wbs, schedule, quality):
131
+ If `human_required: true` `ask_user` before `harness_debate_open`.
132
+
133
+ Then:
134
+
135
+ ```
136
+ harness_debate_open({ debate_profile, required_focuses })
137
+ ```
110
138
 
111
- | Round | Lane spawns (sequential) | Messenger |
112
- |-------|--------------------------|-----------|
113
- | 1 | `hypothesis-validator` (blind) → `plan-evaluator` → `plan-adversary` | evaluator `claim` → adversary `rebuttal` (`in_reply_to` claim ids) |
114
- | 2 | `plan-evaluator` → `plan-adversary` | same |
115
- | 3 | `plan-evaluator` → `plan-adversary` | same |
116
- | 4 | `plan-evaluator` → `plan-adversary` → **`sprint-contract-auditor`** | same + audit message optional |
139
+ Profiles:
117
140
 
118
- Lane YAML + messenger claims/rebuttals are **auto-applied** when each debate subagent completes (`harness-debate-lane-applied` entry). You may also call `harness_debate_apply_lane` if fenced YAML was truncated.
141
+ | Profile | Focuses required | min_focus_rounds |
142
+ |---------|------------------|------------------|
143
+ | full | spec, wbs, schedule, quality | 4 |
144
+ | standard | all four | 4 |
145
+ | light | spec, quality only | 2 |
119
146
 
120
- Per round (no prose-only turns **always call a tool**):
147
+ ## Phase 5 Review Gate debate (profile-aware, pi-messenger, even with `--quick`)
121
148
 
122
- 1. Spawn lane agents (evaluator adversary integrator; R1/R4 extras per table).
123
- 2. After each subagent: verify `harness-debate-next-step` message or run `harness_debate_round_status({ round_index: N })`.
124
- 3. Before adversary: `harness_messenger_read_round` include transcript in adversary task.
125
- 4. After integrator: `harness_debate_submit_round({ round_index, integrator_draft })` (writes review-round + bus round + integrate message — **do not** `write_harness_yaml` review-round paths).
149
+ **Forbidden:** parallel `subagent` calls for any debate lane agent in one batch. One lane agent per tool batch, in order.
150
+
151
+ 1. Optional: `harness_plan_scope_check` if `material_drift`, `ask_user` before debate.
152
+ 2. Drive debate with **`harness_debate_focus_coverage`** and **`harness_debate_round_status({ round_index, debate_round_focus })`** cover **required_focuses** from eligibility, not always all four.
153
+
154
+ ### Focus coverage (required before consensus)
155
+
156
+ Each required focus must appear in a submitted `review-round-rN.yaml` (`debate_round_focus`). Monotonic `round_index` (cap from profile). Consensus only when:
157
+
158
+ - all **required** focuses covered, **and**
159
+ - last round `review_gate_ready: true`, **and**
160
+ - `validate-plan-dag.mjs` still passes (re-run after patches).
161
+
162
+ ### Per-round state machine
163
+
164
+ ```
165
+ round_index := next uncovered required focus
166
+ debate_round_focus := spec | wbs | schedule | quality for this round
167
+
168
+ IF round_index == 1:
169
+ spawn hypothesis-validator (blind — no decomposition/PlanPacket/scouts/prior debate)
170
+ WHILE NOT ready_for_integrator (harness_debate_round_status with debate_round_focus):
171
+ follow next_tool exactly (one subagent per batch)
172
+ IF debate_round_focus == quality OR round_index >= 4:
173
+ spawn sprint-contract-auditor
174
+ spawn review-integrator → harness_debate_submit_round({ round_index, integrator_draft })
175
+ harness_debate_focus_coverage // repeat until missing required focuses empty
176
+ harness_debate_consensus
177
+ ```
126
178
 
127
- 5. `harness_debate_consensus` after round 4.
179
+ Debate agents **must not** call `web_search` / `web_fetch` — cite `artifacts/implementation-research.yaml` instead.
128
180
 
129
- **Never** echo `/harness-debate-*` in bash. **Never** end a turn during Phase 5 with only narration (e.g. "Let me post claims") the next tool call must be in the **same** assistant message or immediately after `harness-debate-next-step`.
181
+ **Never** end a Phase 5 turn with prose only — next action must be a harness tool or single sequential `subagent`.
130
182
 
131
- **R1 blind rule:** hypothesis-validator prompt must exclude decomposition, scouts, PlanPacket, prior debate.
183
+ **R1 blind rule:** hypothesis-validator sees only task + `PlanHypothesisBrief`.
132
184
 
133
185
  If R1 `revision_recommended` or `relevance.passes === false`: one `hypothesis` re-spawn, update brief, continue.
134
186
 
135
- **Blockers:** `policy_decision: block` → do not `approve_plan`. `human_required` → `ask_user` before approval.
187
+ **Blockers:** `policy_decision: block` → no `approve_plan`. `human_required` → `ask_user` first.
136
188
 
137
189
  ## Phase 5b — Revise packet
138
190
 
@@ -142,7 +194,7 @@ Set `research_brief.eval` from R1 `hypothesis-validator` output.
142
194
 
143
195
  ## Phase 6 — Approval + persistence
144
196
 
145
- 1. `approve_plan` with `plan_packet`, `human_summary`, `research_brief` (paths/summaries OK).
197
+ 1. `approve_plan` with `plan_packet`, `human_summary`, `research_brief` (include `implementation` section). Missing `artifacts/implementation-research.yaml` → **error** on `--risk high`, **warn** otherwise.
146
198
  2. On Approve: `create_plan` with same packet (`contract_version: "1.1.0"` + `execution_plan`).
147
199
  3. Confirm `plan_ready: true` → `next_command: /harness-run`.
148
200
 
@@ -152,4 +204,4 @@ Post-execute adversary: `/harness-critic` only (not plan-phase agents).
152
204
 
153
205
  - `plan_status`: ready | partial | needs_clarification
154
206
  - `plan_review_path` for human review
155
- - DAG `pass` + 4 debate rounds + consensus not `block` before ready
207
+ - DAG `pass` + required focus areas covered + consensus not `block` before ready
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Execute only against an approved PlanPacket with strict phase gates.
3
- argument-hint: "[--budget <amount>]"
3
+ argument-hint: ""
4
4
  ---
5
5
 
6
6
  # harness-run
@@ -9,7 +9,7 @@ Orchestrator only — spawn `harness/executor`. Do **not** implement inline.
9
9
 
10
10
  ## Step 0 — Parse arguments
11
11
 
12
- - optional: `--budget <amount>`
12
+ - `--budget` is reserved/no-op (telemetry-only budgets by default)
13
13
  - Do **not** use `--plan` on happy path — load from `[HarnessActivePlan]` / `plan_packet_path`.
14
14
 
15
15
  If plan not ready:
@@ -0,0 +1,31 @@
1
+ # Planning Review Gate rubrics (spawn fragment)
2
+
3
+ Parent includes this file in debate agent spawn text. Stable check ids by `debate_round_focus`.
4
+
5
+ ## spec
6
+
7
+ - SC-01: Every acceptance_check maps to scope or execution_plan work_item
8
+ - SC-02: Out-of-scope work is listed in decomposition `excluded`
9
+ - SC-03: Hypothesis brief falsifiability and success metrics are testable
10
+ - SC-04: Risk register covers top technical unknowns
11
+
12
+ ## wbs
13
+
14
+ - WB-01: Each work_item has typed `done_criteria` (not vague “implement X”)
15
+ - WB-02: No orphan work_items (every item on critical path or sprint_contract)
16
+ - WB-03: `depends_on` is acyclic; parallel_safe only when files disjoint
17
+ - WB-04: wbs_dictionary entry per non-trivial work_item
18
+
19
+ ## schedule
20
+
21
+ - SH-01: `schedule_metadata.critical_path_work_item_ids` is non-empty for med/high risk
22
+ - SH-02: Phase entry/exit criteria are observable
23
+ - SH-03: Milestones align with acceptance_checks dates where stated
24
+ - SH-04: No impossible parallelism (same file, conflicting owners)
25
+
26
+ ## quality
27
+
28
+ - QL-01: sprint_contract.done_criteria_types complete (ADR-020)
29
+ - QL-02: Verify/lint/test work_items in early phases when risk ≥ med
30
+ - QL-03: Checkpoint gaps between phases documented
31
+ - QL-04: Keep Quality Left — no “test at end only” without justification
@@ -37,6 +37,8 @@ const REQUIRED_ADRS = [
37
37
  "0009-sentrux-rules-lifecycle.md",
38
38
  "0031-harness-run-context.md",
39
39
  "0032-harness-command-orchestration.md",
40
+ "0037-subagent-submit-tools.md",
41
+ "0038-budget-telemetry-only.md",
40
42
  ];
41
43
 
42
44
  const REQUIRED_EXTENSIONS = [
package/CHANGELOG.md CHANGED
@@ -4,6 +4,27 @@ All notable changes to this project are documented in this file.
4
4
 
5
5
  ## [Unreleased]
6
6
 
7
+ ## [v0.16.0] — 2026-05-19
8
+
9
+ ### ✨ Features
10
+
11
+ - add submit pipeline and planning/debate updates
12
+
13
+ ### 🔧 Chores
14
+
15
+ - refresh graph artifacts after harness updates
16
+
17
+ ## [v0.15.0] — 2026-05-19
18
+
19
+ ### ✨ Features
20
+
21
+ - **Live widget:** Single-row footer with current/next pipeline phase and plain-language status hints; removes inFlight, policy jargon, and flag rows.
22
+ - **Plan phase:** Implementation researcher, selective debate lanes/eligibility, planning rubrics, ADR 0036, and smoke fixture updates.
23
+
24
+ ### ✅ Tests
25
+
26
+ - Add `harness-live-widget-status` and `plan-debate-eligibility` tests.
27
+
7
28
  ## [v0.14.0] — 2026-05-18
8
29
 
9
30
  ### ✨ Features
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ultimate-pi",
3
- "version": "0.14.0",
3
+ "version": "0.16.0",
4
4
  "description": "Ultimate AI coding harness for pi.dev — extensible skills, Obsidian wiki knowledge layer, compressed context, deterministic output",
5
5
  "keywords": [
6
6
  "pi-package",
@@ -84,7 +84,7 @@
84
84
  "format": "biome format --write",
85
85
  "format:check": "biome format",
86
86
  "prepare": "lefthook install",
87
- "test": "node --test test/harness-verify.test.mjs test/harness-ask-user.test.mjs test/harness-subagents-loader.test.mjs test/harness-subagent-precheck.test.mjs test/sentrux-rules-sync.test.mjs test/harness-budget-guard.test.mjs && node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture && npx -y tsx --test test/harness-vcc-settings.test.ts test/harness-plan-phase-policy.test.mjs test/harness-subagent-policy.test.mjs test/harness-spawn-budget.test.mjs test/harness-turn-routing.test.mjs test/plan-approval-format.test.mjs test/plan-approval-dialog.test.mjs test/plan-approval-sync.test.mjs test/plan-create-plan.test.mjs test/plan-review-format.test.mjs test/debate-plan-phase.test.mjs test/plan-messenger-gate.test.mjs test/plan-debate-lane-apply.test.mjs",
87
+ "test": "node --test test/harness-verify.test.mjs test/harness-ask-user.test.mjs test/harness-subagents-loader.test.mjs test/harness-subagent-precheck.test.mjs test/sentrux-rules-sync.test.mjs test/harness-budget-guard.test.mjs && node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture && npx -y tsx --test test/harness-vcc-settings.test.ts test/harness-live-widget-status.test.ts test/harness-plan-phase-policy.test.mjs test/harness-subagent-policy.test.mjs test/harness-spawn-budget.test.mjs test/harness-spawn-parse.test.mjs test/harness-schema-validate.test.mjs test/harness-turn-routing.test.mjs test/harness-budget-enforce.test.mjs test/harness-submit-policy.test.mjs test/plan-approval-format.test.mjs test/plan-approval-dialog.test.mjs test/plan-approval-sync.test.mjs test/plan-create-plan.test.mjs test/plan-review-format.test.mjs test/debate-plan-phase.test.mjs test/plan-debate-eligibility.test.mjs test/plan-messenger-gate.test.mjs test/plan-debate-lane-apply.test.mjs",
88
88
  "test:vcc": "npx -y tsx --test vendor/pi-vcc/tests/*.test.ts",
89
89
  "harness:sentrux-bootstrap": "node .pi/scripts/harness-sentrux-bootstrap.mjs",
90
90
  "harness:sentrux-sync": "node .pi/scripts/sentrux-rules-sync.mjs --force",
@@ -103,6 +103,8 @@
103
103
  },
104
104
  "dependencies": {
105
105
  "@posthog/pi": "latest",
106
+ "ajv": "^8.17.1",
107
+ "ajv-formats": "^3.0.1",
106
108
  "croner": "^9.0.0",
107
109
  "jimp": "^1.6.1",
108
110
  "nanoid": "^5.1.5",
@@ -42,6 +42,13 @@ export interface SpawnAuthForward {
42
42
 
43
43
  export interface HarnessSubagentsOptions {
44
44
  packageRoot?: string;
45
+ /** Absolute path to harness-subagent-submit.ts for subprocess-only extension loading (Option A). */
46
+ harnessSubprocessExtensionPath?: string;
47
+ /** Extra env vars per subprocess (e.g. HARNESS_RUN_ID, HARNESS_RUN_DIR). */
48
+ resolveSubprocessEnv?: (
49
+ task: string,
50
+ agent: AgentConfig,
51
+ ) => Record<string, string> | undefined;
45
52
  defaultAgentScope?: AgentScope;
46
53
  defaultConfirmProjectAgents?: boolean;
47
54
  beforeExecute?: (
@@ -388,8 +395,11 @@ function terminateProcess(proc: ReturnType<typeof spawn>) {
388
395
 
389
396
  type OnUpdateCallback = (partial: AgentToolResult<SubagentDetails>) => void;
390
397
 
391
- function buildSpawnEnv(packageRoot?: string): NodeJS.ProcessEnv {
392
- const env = { ...process.env };
398
+ function buildSpawnEnv(
399
+ packageRoot?: string,
400
+ extra?: Record<string, string>,
401
+ ): NodeJS.ProcessEnv {
402
+ const env = { ...process.env, ...extra };
393
403
  env.PI_HARNESS_SUBPROCESS = "1";
394
404
  if (packageRoot) {
395
405
  env.UP_PKG = packageRoot;
@@ -411,6 +421,7 @@ async function runSingleAgent(
411
421
  makeDetails: (results: SingleResult[]) => SubagentDetails,
412
422
  packageRoot?: string,
413
423
  spawnAuth?: SpawnAuthForward,
424
+ subagentsOptions?: HarnessSubagentsOptions,
414
425
  ): Promise<SingleResult> {
415
426
  const agent = agents.find((a) => a.name === agentName);
416
427
 
@@ -434,8 +445,15 @@ async function runSingleAgent(
434
445
  else if (spawnAuth) args.push("--model", spawnAuth.modelRef);
435
446
  if (spawnAuth?.apiKey) args.push("--api-key", spawnAuth.apiKey);
436
447
  if (agent.thinking) args.push("--thinking", agent.thinking);
448
+ const harnessExt =
449
+ agent.extensionsOff &&
450
+ agent.name.startsWith("harness/") &&
451
+ subagentsOptions?.harnessSubprocessExtensionPath;
437
452
  if (agent.extensionsOff) {
438
453
  args.push("--no-extensions");
454
+ if (harnessExt) {
455
+ args.push("-e", harnessExt);
456
+ }
439
457
  if (agent.skillsOff) args.push("--no-skills");
440
458
  }
441
459
  if (agent.tools && agent.tools.length > 0) {
@@ -443,7 +461,11 @@ async function runSingleAgent(
443
461
  } else if (agent.extensionsOff) {
444
462
  args.push("--no-tools");
445
463
  }
446
- const spawnEnv = buildSpawnEnv(packageRoot);
464
+ const extraEnv = subagentsOptions?.resolveSubprocessEnv?.(task, agent);
465
+ const spawnEnv = buildSpawnEnv(packageRoot, {
466
+ ...extraEnv,
467
+ HARNESS_AGENT_ID: agent.name,
468
+ });
447
469
 
448
470
  let tmpPromptDir: string | null = null;
449
471
  let tmpPromptPath: string | null = null;
@@ -856,6 +878,7 @@ export function createSubagentsExtension(
856
878
  makeDetails("chain"),
857
879
  packageRoot,
858
880
  await resolveSpawnAuth(step.agent),
881
+ options,
859
882
  );
860
883
  results.push(result);
861
884
 
@@ -950,6 +973,7 @@ export function createSubagentsExtension(
950
973
  makeDetails("parallel"),
951
974
  packageRoot,
952
975
  await resolveSpawnAuth(t.agent),
976
+ options,
953
977
  );
954
978
  allResults[index] = result;
955
979
  doneCount += 1;
@@ -987,6 +1011,7 @@ export function createSubagentsExtension(
987
1011
  makeDetails("parallel"),
988
1012
  packageRoot,
989
1013
  await resolveSpawnAuth(aggregator.agent),
1014
+ options,
990
1015
  );
991
1016
  }
992
1017
 
@@ -1038,6 +1063,7 @@ export function createSubagentsExtension(
1038
1063
  makeDetails("single"),
1039
1064
  packageRoot,
1040
1065
  await resolveSpawnAuth(params.agent),
1066
+ options,
1041
1067
  );
1042
1068
  const isError = result.exitCode !== 0 || result.stopReason === "error" || result.stopReason === "aborted";
1043
1069
  if (isError) {