ultimate-pi 0.14.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-debate-plan/SKILL.md +41 -61
- package/.agents/skills/harness-governor/SKILL.md +11 -0
- package/.agents/skills/harness-orchestration/SKILL.md +5 -3
- package/.agents/skills/harness-plan/SKILL.md +11 -9
- package/.pi/agents/harness/adversary.md +1 -1
- package/.pi/agents/harness/evaluator.md +1 -1
- package/.pi/agents/harness/executor.md +1 -1
- package/.pi/agents/harness/incident-recorder.md +1 -1
- package/.pi/agents/harness/meta-optimizer.md +1 -1
- package/.pi/agents/harness/planning/decompose.md +8 -35
- package/.pi/agents/harness/planning/execution-plan-author.md +27 -15
- package/.pi/agents/harness/planning/hypothesis-validator.md +23 -6
- package/.pi/agents/harness/planning/hypothesis.md +4 -27
- package/.pi/agents/harness/planning/implementation-researcher.md +43 -0
- package/.pi/agents/harness/planning/plan-adversary.md +20 -5
- package/.pi/agents/harness/planning/plan-evaluator.md +28 -6
- package/.pi/agents/harness/planning/review-integrator.md +23 -10
- package/.pi/agents/harness/planning/scout-graphify.md +4 -23
- package/.pi/agents/harness/planning/scout-semantic.md +3 -18
- package/.pi/agents/harness/planning/scout-structure.md +3 -18
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +22 -6
- package/.pi/agents/harness/planning/stack-researcher.md +21 -11
- package/.pi/agents/harness/tie-breaker.md +1 -1
- package/.pi/agents/harness/trace-librarian.md +1 -1
- package/.pi/extensions/budget-guard.ts +33 -19
- package/.pi/extensions/harness-debate-tools.ts +280 -19
- package/.pi/extensions/harness-live-widget.ts +39 -159
- package/.pi/extensions/harness-plan-approval.ts +47 -5
- package/.pi/extensions/harness-run-context.ts +96 -2
- package/.pi/extensions/harness-subagent-submit.ts +195 -0
- package/.pi/extensions/lib/debate-bus-core.ts +108 -17
- package/.pi/extensions/lib/debate-bus-state.ts +6 -0
- package/.pi/extensions/lib/harness-subagent-policy.ts +45 -0
- package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +82 -0
- package/.pi/extensions/lib/harness-subagent-submit-registry.ts +172 -0
- package/.pi/extensions/lib/harness-subagents-bridge.ts +42 -0
- package/.pi/extensions/lib/plan-approval/plan-review.ts +56 -0
- package/.pi/extensions/lib/plan-approval/types.ts +1 -0
- package/.pi/extensions/lib/plan-debate-eligibility.ts +214 -0
- package/.pi/extensions/lib/plan-debate-focus.ts +151 -0
- package/.pi/extensions/lib/plan-debate-gate.ts +88 -34
- package/.pi/extensions/lib/plan-debate-lane.ts +15 -0
- package/.pi/extensions/lib/plan-debate-lanes.ts +44 -0
- package/.pi/extensions/lib/plan-debate-round-status.ts +63 -20
- package/.pi/extensions/lib/plan-messenger.ts +93 -17
- package/.pi/extensions/policy-gate.ts +1 -1
- package/.pi/harness/README.md +1 -1
- package/.pi/harness/agents.manifest.json +25 -21
- package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +1 -3
- package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +13 -5
- package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +51 -0
- package/.pi/harness/docs/adrs/0037-subagent-submit-tools.md +31 -0
- package/.pi/harness/docs/adrs/0038-budget-telemetry-only.md +23 -0
- package/.pi/harness/docs/adrs/README.md +4 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r1.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/artifacts/review-round-r2.yaml +25 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-packet.yaml +196 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/plan-review.md +14 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-low-light/research-brief.yaml +62 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r2.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r3.yaml +24 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +29 -0
- package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +97 -16
- package/.pi/harness/specs/harness-executor-handoff.schema.json +19 -0
- package/.pi/harness/specs/harness-human-required.schema.json +16 -0
- package/.pi/harness/specs/plan-implementation-research-brief.schema.json +128 -0
- package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
- package/.pi/harness/specs/plan-scout-findings.schema.json +19 -0
- package/.pi/harness/specs/round-result.schema.json +15 -2
- package/.pi/lib/harness-agent-output.ts +45 -0
- package/.pi/lib/harness-budget-enforce.ts +18 -0
- package/.pi/lib/harness-schema-validate.ts +89 -0
- package/.pi/lib/harness-spawn-parse.ts +86 -0
- package/.pi/lib/harness-subagent-submit-path.ts +41 -0
- package/.pi/lib/harness-ui-state.ts +107 -2
- package/.pi/prompts/harness-auto.md +2 -2
- package/.pi/prompts/harness-plan.md +94 -42
- package/.pi/prompts/harness-run.md +2 -2
- package/.pi/prompts/planning-rubrics.md +31 -0
- package/.pi/scripts/harness-verify.mjs +2 -0
- package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
- package/CHANGELOG.md +21 -0
- package/package.json +4 -2
- package/vendor/pi-subagents/src/subagents.ts +29 -3
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
---
|
|
2
|
-
description: PM-grade harness plan — scouts, ExecutionPlan, DAG validation, Review Gate debate, approval.
|
|
3
|
-
argument-hint: "\"<task>\" [--risk low|med|high] [--
|
|
2
|
+
description: PM-grade harness plan — scouts, implementation research, ExecutionPlan, DAG validation, selective Review Gate debate, approval.
|
|
3
|
+
argument-hint: "\"<task>\" [--risk low|med|high] [--quick]"
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# harness-plan
|
|
7
7
|
|
|
8
8
|
You are the **planning PM** for this harness run. Produce an execution baseline (`plan-packet.yaml` + `plan-review.md`), not strategy theater. Parent owns `ask_user`, `approve_plan`, `create_plan`, debate bus commands, and YAML writes under `.pi/harness/runs/<run_id>/`.
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
Subagents persist artifacts via scoped **`submit_*`** tools (deterministic YAML under the run dir). Parent uses **`harness_artifact_ready`** to gate phases (no JSON parsing). Parent merges still use **`write_harness_yaml`** for `research-brief.yaml`, `plan-packet.yaml` shell, and integrator patches only.
|
|
11
11
|
|
|
12
12
|
## Allowed subagents
|
|
13
13
|
|
|
@@ -16,6 +16,7 @@ Never `write`/`edit` the final canonical packet except via **`write_harness_yaml
|
|
|
16
16
|
- `harness/planning/scout-semantic` (skip when `--quick`)
|
|
17
17
|
- `harness/planning/decompose`
|
|
18
18
|
- `harness/planning/hypothesis`
|
|
19
|
+
- `harness/planning/implementation-researcher`
|
|
19
20
|
- `harness/planning/stack-researcher`
|
|
20
21
|
- `harness/planning/execution-plan-author`
|
|
21
22
|
- `harness/planning/hypothesis-validator` (debate R1 only)
|
|
@@ -31,15 +32,15 @@ Read **harness-debate-plan** skill before Review Gate rounds.
|
|
|
31
32
|
1. Use `subagent` with `agentScope: "both"` and parallel `tasks` where lanes are independent.
|
|
32
33
|
2. Each `subagent` call blocks until subprocesses finish — batch parallel scouts in one `tasks` array.
|
|
33
34
|
3. Do **not** set `timeoutMs` unless the user explicitly requests a cap — subagents run until natural completion (optional backstop: `PI_SUBAGENT_TIMEOUT_MS`).
|
|
34
|
-
4. No harness subagent spawn cap — run the full scout + debate pipeline without skipping lanes for budget.
|
|
35
|
-
5. Compact task text: embed `HarnessSpawnContext` JSON
|
|
35
|
+
4. No harness subagent spawn cap — run the full scout + research + debate pipeline without skipping lanes for budget.
|
|
36
|
+
5. Compact task text: embed spawn context + lane instructions. Prefer `HarnessSpawnContext={"run_id":"…","plan_packet_path":"…",…}` or a JSON object with `"HarnessSpawnContext":{…}` — both parse; `run_id` is required so subprocess submit tools get `HARNESS_RUN_ID`.
|
|
36
37
|
|
|
37
38
|
## Step 0 — Parse `$ARGUMENTS`
|
|
38
39
|
|
|
39
40
|
- task (required)
|
|
40
|
-
- `--risk low|med|high`, `--
|
|
41
|
+
- `--risk low|med|high`, `--quick` (`--budget` is reserved/no-op; token budgets are telemetry-only unless `HARNESS_BUDGET_ENFORCE=1`)
|
|
41
42
|
|
|
42
|
-
`--quick` skips **scout-semantic** and post-run adversary only — **never** skip graphify, structure, decompose, hypothesis, stack research, execution plan, DAG validation, or **
|
|
43
|
+
`--quick` skips **scout-semantic** and post-run adversary only — **never** skip graphify, structure, decompose, hypothesis, **Phase 3.5 implementation research**, stack research, execution plan, DAG validation, or **Review Gate debate**.
|
|
43
44
|
|
|
44
45
|
## Active plan context
|
|
45
46
|
|
|
@@ -63,33 +64,50 @@ Do **not** run `ccc index` or `ccc search --refresh`. The harness runs increment
|
|
|
63
64
|
|
|
64
65
|
Add `harness/planning/scout-semantic` to `tasks` unless `--quick`. Require graphify + structure success. Semantic lane uses `ccc search` only (see `scout-semantic` agent).
|
|
65
66
|
|
|
67
|
+
After scouts: `harness_artifact_ready({ paths: ["artifacts/scout-graphify.yaml", "artifacts/scout-structure.yaml", ...] })`.
|
|
68
|
+
|
|
66
69
|
## Phase 2 & 3 — Decompose + hypothesis (parallel)
|
|
67
70
|
|
|
68
|
-
One `subagent` call with `tasks` for `harness/planning/decompose` and `harness/planning/hypothesis
|
|
71
|
+
One `subagent` call with `tasks` for `harness/planning/decompose` and `harness/planning/hypothesis` (include scout YAML paths in task text). Gate with `harness_artifact_ready` on `artifacts/decomposition.yaml` and `artifacts/hypothesis.yaml`.
|
|
69
72
|
|
|
70
|
-
|
|
73
|
+
Decompose **prior_art** is **internal only** (from scouts). External prior art arrives in Phase 3.5.
|
|
71
74
|
|
|
72
|
-
|
|
75
|
+
## Phase 3.5 — External solution research (required)
|
|
73
76
|
|
|
74
|
-
|
|
75
|
-
|
|
77
|
+
**MUST** run unless you document a `human_required` waiver in the run trace. Parallel batch:
|
|
78
|
+
|
|
79
|
+
```json
|
|
80
|
+
{
|
|
81
|
+
"agentScope": "both",
|
|
82
|
+
"tasks": [
|
|
83
|
+
{ "agent": "harness/planning/implementation-researcher", "task": "<HarnessSpawnContext + paths to decomposition/hypothesis/scout summaries — patterns/repos/workflows only; no stack version SERPs>" },
|
|
84
|
+
{ "agent": "harness/planning/stack-researcher", "task": "<HarnessSpawnContext + stack research brief — libraries/APIs only>" }
|
|
85
|
+
]
|
|
86
|
+
}
|
|
87
|
+
```
|
|
76
88
|
|
|
77
|
-
`
|
|
89
|
+
- Subagents write via `submit_implementation_research` / `submit_stack_brief`; gate with `harness_artifact_ready` on both paths.
|
|
90
|
+
- Merge both into `research-brief.yaml` (`implementation:` + `stack:`) via parent `write_harness_yaml`.
|
|
91
|
+
- **Partial failure:** if one lane fails, re-spawn that lane once; if still failing set `plan_status: partial` and `human_required` via `ask_user`. Do not proceed to Phase 4b without both artifacts or explicit human waiver.
|
|
92
|
+
- **Web dedup:** implementation owns patterns/repos; stack owns libraries/versions — no overlapping queries.
|
|
78
93
|
|
|
79
|
-
|
|
94
|
+
On `mode: revise`: re-run implementation-researcher when task scope, acceptance_checks, or >30% work_items change; skip when delta is schedule-only and prior artifact is fresh.
|
|
80
95
|
|
|
81
|
-
## Phase
|
|
96
|
+
## Phase 4 — Draft shell
|
|
82
97
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
98
|
+
Build draft `PlanPacket` (`contract_version: "1.1.0"`):
|
|
99
|
+
|
|
100
|
+
- `scope`, `assumptions`, `acceptance_checks`, `risk_level`, `rollback_plan`
|
|
101
|
+
- `execution_plan` placeholder until Phase 4b
|
|
86
102
|
|
|
87
|
-
|
|
103
|
+
Initialize `research-brief.yaml` with decomposition + hypothesis + Phase 3.5 merges (`write_harness_yaml`).
|
|
104
|
+
|
|
105
|
+
**`ask_user` on material `dialectical_fork`** after Phase 3.5 merge (evidence-backed — conflicting external patterns may trigger `human_required` from eligibility).
|
|
88
106
|
|
|
89
107
|
## Phase 4b — Execution plan author
|
|
90
108
|
|
|
91
109
|
```
|
|
92
|
-
subagent({ agentScope: "both", agent: "harness/planning/execution-plan-author", task: "<HarnessSpawnContext +
|
|
110
|
+
subagent({ agentScope: "both", agent: "harness/planning/execution-plan-author", task: "<HarnessSpawnContext + PlanImplementationResearchBrief + PlanStackBrief + decomposition/hypothesis>" })
|
|
93
111
|
```
|
|
94
112
|
|
|
95
113
|
Merge `execution_plan` into draft `plan-packet.yaml` (`write_harness_yaml`). Save `artifacts/execution-plan-draft.yaml` the same way.
|
|
@@ -102,37 +120,71 @@ node .pi/scripts/validate-plan-dag.mjs --packet .pi/harness/runs/<run_id>/plan-p
|
|
|
102
120
|
|
|
103
121
|
Must **pass** before debate. On fail: fix via author or parent patches, re-run.
|
|
104
122
|
|
|
105
|
-
## Phase
|
|
123
|
+
## Phase 4d — Debate eligibility (before Review Gate)
|
|
124
|
+
|
|
125
|
+
```
|
|
126
|
+
harness_plan_debate_eligibility({ risk_level, material_fork, dag_pass: true, ... })
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Pre-debate signals only (no R1 hypothesis output). Default profile **standard** when ambiguous.
|
|
106
130
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
131
|
+
If `human_required: true` → `ask_user` before `harness_debate_open`.
|
|
132
|
+
|
|
133
|
+
Then:
|
|
134
|
+
|
|
135
|
+
```
|
|
136
|
+
harness_debate_open({ debate_profile, required_focuses })
|
|
137
|
+
```
|
|
110
138
|
|
|
111
|
-
|
|
112
|
-
|-------|--------------------------|-----------|
|
|
113
|
-
| 1 | `hypothesis-validator` (blind) → `plan-evaluator` → `plan-adversary` | evaluator `claim` → adversary `rebuttal` (`in_reply_to` claim ids) |
|
|
114
|
-
| 2 | `plan-evaluator` → `plan-adversary` | same |
|
|
115
|
-
| 3 | `plan-evaluator` → `plan-adversary` | same |
|
|
116
|
-
| 4 | `plan-evaluator` → `plan-adversary` → **`sprint-contract-auditor`** | same + audit message optional |
|
|
139
|
+
Profiles:
|
|
117
140
|
|
|
118
|
-
|
|
141
|
+
| Profile | Focuses required | min_focus_rounds |
|
|
142
|
+
|---------|------------------|------------------|
|
|
143
|
+
| full | spec, wbs, schedule, quality | 4 |
|
|
144
|
+
| standard | all four | 4 |
|
|
145
|
+
| light | spec, quality only | 2 |
|
|
119
146
|
|
|
120
|
-
|
|
147
|
+
## Phase 5 — Review Gate debate (profile-aware, pi-messenger, even with `--quick`)
|
|
121
148
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
149
|
+
**Forbidden:** parallel `subagent` calls for any debate lane agent in one batch. One lane agent per tool batch, in order.
|
|
150
|
+
|
|
151
|
+
1. Optional: `harness_plan_scope_check` — if `material_drift`, `ask_user` before debate.
|
|
152
|
+
2. Drive debate with **`harness_debate_focus_coverage`** and **`harness_debate_round_status({ round_index, debate_round_focus })`** — cover **required_focuses** from eligibility, not always all four.
|
|
153
|
+
|
|
154
|
+
### Focus coverage (required before consensus)
|
|
155
|
+
|
|
156
|
+
Each required focus must appear in a submitted `review-round-rN.yaml` (`debate_round_focus`). Monotonic `round_index` (cap from profile). Consensus only when:
|
|
157
|
+
|
|
158
|
+
- all **required** focuses covered, **and**
|
|
159
|
+
- last round `review_gate_ready: true`, **and**
|
|
160
|
+
- `validate-plan-dag.mjs` still passes (re-run after patches).
|
|
161
|
+
|
|
162
|
+
### Per-round state machine
|
|
163
|
+
|
|
164
|
+
```
|
|
165
|
+
round_index := next uncovered required focus
|
|
166
|
+
debate_round_focus := spec | wbs | schedule | quality for this round
|
|
167
|
+
|
|
168
|
+
IF round_index == 1:
|
|
169
|
+
spawn hypothesis-validator (blind — no decomposition/PlanPacket/scouts/prior debate)
|
|
170
|
+
WHILE NOT ready_for_integrator (harness_debate_round_status with debate_round_focus):
|
|
171
|
+
follow next_tool exactly (one subagent per batch)
|
|
172
|
+
IF debate_round_focus == quality OR round_index >= 4:
|
|
173
|
+
spawn sprint-contract-auditor
|
|
174
|
+
spawn review-integrator → harness_debate_submit_round({ round_index, integrator_draft })
|
|
175
|
+
harness_debate_focus_coverage // repeat until missing required focuses empty
|
|
176
|
+
harness_debate_consensus
|
|
177
|
+
```
|
|
126
178
|
|
|
127
|
-
|
|
179
|
+
Debate agents **must not** call `web_search` / `web_fetch` — cite `artifacts/implementation-research.yaml` instead.
|
|
128
180
|
|
|
129
|
-
**Never**
|
|
181
|
+
**Never** end a Phase 5 turn with prose only — next action must be a harness tool or single sequential `subagent`.
|
|
130
182
|
|
|
131
|
-
**R1 blind rule:** hypothesis-validator
|
|
183
|
+
**R1 blind rule:** hypothesis-validator sees only task + `PlanHypothesisBrief`.
|
|
132
184
|
|
|
133
185
|
If R1 `revision_recommended` or `relevance.passes === false`: one `hypothesis` re-spawn, update brief, continue.
|
|
134
186
|
|
|
135
|
-
**Blockers:** `policy_decision: block` →
|
|
187
|
+
**Blockers:** `policy_decision: block` → no `approve_plan`. `human_required` → `ask_user` first.
|
|
136
188
|
|
|
137
189
|
## Phase 5b — Revise packet
|
|
138
190
|
|
|
@@ -142,7 +194,7 @@ Set `research_brief.eval` from R1 `hypothesis-validator` output.
|
|
|
142
194
|
|
|
143
195
|
## Phase 6 — Approval + persistence
|
|
144
196
|
|
|
145
|
-
1. `approve_plan` with `plan_packet`, `human_summary`, `research_brief` (
|
|
197
|
+
1. `approve_plan` with `plan_packet`, `human_summary`, `research_brief` (include `implementation` section). Missing `artifacts/implementation-research.yaml` → **error** on `--risk high`, **warn** otherwise.
|
|
146
198
|
2. On Approve: `create_plan` with same packet (`contract_version: "1.1.0"` + `execution_plan`).
|
|
147
199
|
3. Confirm `plan_ready: true` → `next_command: /harness-run`.
|
|
148
200
|
|
|
@@ -152,4 +204,4 @@ Post-execute adversary: `/harness-critic` only (not plan-phase agents).
|
|
|
152
204
|
|
|
153
205
|
- `plan_status`: ready | partial | needs_clarification
|
|
154
206
|
- `plan_review_path` for human review
|
|
155
|
-
- DAG `pass` +
|
|
207
|
+
- DAG `pass` + required focus areas covered + consensus not `block` before ready
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Execute only against an approved PlanPacket with strict phase gates.
|
|
3
|
-
argument-hint: "
|
|
3
|
+
argument-hint: ""
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# harness-run
|
|
@@ -9,7 +9,7 @@ Orchestrator only — spawn `harness/executor`. Do **not** implement inline.
|
|
|
9
9
|
|
|
10
10
|
## Step 0 — Parse arguments
|
|
11
11
|
|
|
12
|
-
-
|
|
12
|
+
- `--budget` is reserved/no-op (telemetry-only budgets by default)
|
|
13
13
|
- Do **not** use `--plan` on happy path — load from `[HarnessActivePlan]` / `plan_packet_path`.
|
|
14
14
|
|
|
15
15
|
If plan not ready:
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Planning Review Gate rubrics (spawn fragment)
|
|
2
|
+
|
|
3
|
+
Parent includes this file in debate agent spawn text. Stable check ids by `debate_round_focus`.
|
|
4
|
+
|
|
5
|
+
## spec
|
|
6
|
+
|
|
7
|
+
- SC-01: Every acceptance_check maps to scope or execution_plan work_item
|
|
8
|
+
- SC-02: Out-of-scope work is listed in decomposition `excluded`
|
|
9
|
+
- SC-03: Hypothesis brief falsifiability and success metrics are testable
|
|
10
|
+
- SC-04: Risk register covers top technical unknowns
|
|
11
|
+
|
|
12
|
+
## wbs
|
|
13
|
+
|
|
14
|
+
- WB-01: Each work_item has typed `done_criteria` (not vague “implement X”)
|
|
15
|
+
- WB-02: No orphan work_items (every item on critical path or sprint_contract)
|
|
16
|
+
- WB-03: `depends_on` is acyclic; parallel_safe only when files disjoint
|
|
17
|
+
- WB-04: wbs_dictionary entry per non-trivial work_item
|
|
18
|
+
|
|
19
|
+
## schedule
|
|
20
|
+
|
|
21
|
+
- SH-01: `schedule_metadata.critical_path_work_item_ids` is non-empty for med/high risk
|
|
22
|
+
- SH-02: Phase entry/exit criteria are observable
|
|
23
|
+
- SH-03: Milestones align with acceptance_checks dates where stated
|
|
24
|
+
- SH-04: No impossible parallelism (same file, conflicting owners)
|
|
25
|
+
|
|
26
|
+
## quality
|
|
27
|
+
|
|
28
|
+
- QL-01: sprint_contract.done_criteria_types complete (ADR-020)
|
|
29
|
+
- QL-02: Verify/lint/test work_items in early phases when risk ≥ med
|
|
30
|
+
- QL-03: Checkpoint gaps between phases documented
|
|
31
|
+
- QL-04: Keep Quality Left — no “test at end only” without justification
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,27 @@ All notable changes to this project are documented in this file.
|
|
|
4
4
|
|
|
5
5
|
## [Unreleased]
|
|
6
6
|
|
|
7
|
+
## [v0.16.0] — 2026-05-19
|
|
8
|
+
|
|
9
|
+
### ✨ Features
|
|
10
|
+
|
|
11
|
+
- add submit pipeline and planning/debate updates
|
|
12
|
+
|
|
13
|
+
### 🔧 Chores
|
|
14
|
+
|
|
15
|
+
- refresh graph artifacts after harness updates
|
|
16
|
+
|
|
17
|
+
## [v0.15.0] — 2026-05-19
|
|
18
|
+
|
|
19
|
+
### ✨ Features
|
|
20
|
+
|
|
21
|
+
- **Live widget:** Single-row footer with current/next pipeline phase and plain-language status hints; removes inFlight, policy jargon, and flag rows.
|
|
22
|
+
- **Plan phase:** Implementation researcher, selective debate lanes/eligibility, planning rubrics, ADR 0036, and smoke fixture updates.
|
|
23
|
+
|
|
24
|
+
### ✅ Tests
|
|
25
|
+
|
|
26
|
+
- Add `harness-live-widget-status` and `plan-debate-eligibility` tests.
|
|
27
|
+
|
|
7
28
|
## [v0.14.0] — 2026-05-18
|
|
8
29
|
|
|
9
30
|
### ✨ Features
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ultimate-pi",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.16.0",
|
|
4
4
|
"description": "Ultimate AI coding harness for pi.dev — extensible skills, Obsidian wiki knowledge layer, compressed context, deterministic output",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|
|
@@ -84,7 +84,7 @@
|
|
|
84
84
|
"format": "biome format --write",
|
|
85
85
|
"format:check": "biome format",
|
|
86
86
|
"prepare": "lefthook install",
|
|
87
|
-
"test": "node --test test/harness-verify.test.mjs test/harness-ask-user.test.mjs test/harness-subagents-loader.test.mjs test/harness-subagent-precheck.test.mjs test/sentrux-rules-sync.test.mjs test/harness-budget-guard.test.mjs && node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture && npx -y tsx --test test/harness-vcc-settings.test.ts test/harness-plan-phase-policy.test.mjs test/harness-subagent-policy.test.mjs test/harness-spawn-budget.test.mjs test/harness-turn-routing.test.mjs test/plan-approval-format.test.mjs test/plan-approval-dialog.test.mjs test/plan-approval-sync.test.mjs test/plan-create-plan.test.mjs test/plan-review-format.test.mjs test/debate-plan-phase.test.mjs test/plan-messenger-gate.test.mjs test/plan-debate-lane-apply.test.mjs",
|
|
87
|
+
"test": "node --test test/harness-verify.test.mjs test/harness-ask-user.test.mjs test/harness-subagents-loader.test.mjs test/harness-subagent-precheck.test.mjs test/sentrux-rules-sync.test.mjs test/harness-budget-guard.test.mjs && node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture && npx -y tsx --test test/harness-vcc-settings.test.ts test/harness-live-widget-status.test.ts test/harness-plan-phase-policy.test.mjs test/harness-subagent-policy.test.mjs test/harness-spawn-budget.test.mjs test/harness-spawn-parse.test.mjs test/harness-schema-validate.test.mjs test/harness-turn-routing.test.mjs test/harness-budget-enforce.test.mjs test/harness-submit-policy.test.mjs test/plan-approval-format.test.mjs test/plan-approval-dialog.test.mjs test/plan-approval-sync.test.mjs test/plan-create-plan.test.mjs test/plan-review-format.test.mjs test/debate-plan-phase.test.mjs test/plan-debate-eligibility.test.mjs test/plan-messenger-gate.test.mjs test/plan-debate-lane-apply.test.mjs",
|
|
88
88
|
"test:vcc": "npx -y tsx --test vendor/pi-vcc/tests/*.test.ts",
|
|
89
89
|
"harness:sentrux-bootstrap": "node .pi/scripts/harness-sentrux-bootstrap.mjs",
|
|
90
90
|
"harness:sentrux-sync": "node .pi/scripts/sentrux-rules-sync.mjs --force",
|
|
@@ -103,6 +103,8 @@
|
|
|
103
103
|
},
|
|
104
104
|
"dependencies": {
|
|
105
105
|
"@posthog/pi": "latest",
|
|
106
|
+
"ajv": "^8.17.1",
|
|
107
|
+
"ajv-formats": "^3.0.1",
|
|
106
108
|
"croner": "^9.0.0",
|
|
107
109
|
"jimp": "^1.6.1",
|
|
108
110
|
"nanoid": "^5.1.5",
|
|
@@ -42,6 +42,13 @@ export interface SpawnAuthForward {
|
|
|
42
42
|
|
|
43
43
|
export interface HarnessSubagentsOptions {
|
|
44
44
|
packageRoot?: string;
|
|
45
|
+
/** Absolute path to harness-subagent-submit.ts for subprocess-only extension loading (Option A). */
|
|
46
|
+
harnessSubprocessExtensionPath?: string;
|
|
47
|
+
/** Extra env vars per subprocess (e.g. HARNESS_RUN_ID, HARNESS_RUN_DIR). */
|
|
48
|
+
resolveSubprocessEnv?: (
|
|
49
|
+
task: string,
|
|
50
|
+
agent: AgentConfig,
|
|
51
|
+
) => Record<string, string> | undefined;
|
|
45
52
|
defaultAgentScope?: AgentScope;
|
|
46
53
|
defaultConfirmProjectAgents?: boolean;
|
|
47
54
|
beforeExecute?: (
|
|
@@ -388,8 +395,11 @@ function terminateProcess(proc: ReturnType<typeof spawn>) {
|
|
|
388
395
|
|
|
389
396
|
type OnUpdateCallback = (partial: AgentToolResult<SubagentDetails>) => void;
|
|
390
397
|
|
|
391
|
-
function buildSpawnEnv(
|
|
392
|
-
|
|
398
|
+
function buildSpawnEnv(
|
|
399
|
+
packageRoot?: string,
|
|
400
|
+
extra?: Record<string, string>,
|
|
401
|
+
): NodeJS.ProcessEnv {
|
|
402
|
+
const env = { ...process.env, ...extra };
|
|
393
403
|
env.PI_HARNESS_SUBPROCESS = "1";
|
|
394
404
|
if (packageRoot) {
|
|
395
405
|
env.UP_PKG = packageRoot;
|
|
@@ -411,6 +421,7 @@ async function runSingleAgent(
|
|
|
411
421
|
makeDetails: (results: SingleResult[]) => SubagentDetails,
|
|
412
422
|
packageRoot?: string,
|
|
413
423
|
spawnAuth?: SpawnAuthForward,
|
|
424
|
+
subagentsOptions?: HarnessSubagentsOptions,
|
|
414
425
|
): Promise<SingleResult> {
|
|
415
426
|
const agent = agents.find((a) => a.name === agentName);
|
|
416
427
|
|
|
@@ -434,8 +445,15 @@ async function runSingleAgent(
|
|
|
434
445
|
else if (spawnAuth) args.push("--model", spawnAuth.modelRef);
|
|
435
446
|
if (spawnAuth?.apiKey) args.push("--api-key", spawnAuth.apiKey);
|
|
436
447
|
if (agent.thinking) args.push("--thinking", agent.thinking);
|
|
448
|
+
const harnessExt =
|
|
449
|
+
agent.extensionsOff &&
|
|
450
|
+
agent.name.startsWith("harness/") &&
|
|
451
|
+
subagentsOptions?.harnessSubprocessExtensionPath;
|
|
437
452
|
if (agent.extensionsOff) {
|
|
438
453
|
args.push("--no-extensions");
|
|
454
|
+
if (harnessExt) {
|
|
455
|
+
args.push("-e", harnessExt);
|
|
456
|
+
}
|
|
439
457
|
if (agent.skillsOff) args.push("--no-skills");
|
|
440
458
|
}
|
|
441
459
|
if (agent.tools && agent.tools.length > 0) {
|
|
@@ -443,7 +461,11 @@ async function runSingleAgent(
|
|
|
443
461
|
} else if (agent.extensionsOff) {
|
|
444
462
|
args.push("--no-tools");
|
|
445
463
|
}
|
|
446
|
-
const
|
|
464
|
+
const extraEnv = subagentsOptions?.resolveSubprocessEnv?.(task, agent);
|
|
465
|
+
const spawnEnv = buildSpawnEnv(packageRoot, {
|
|
466
|
+
...extraEnv,
|
|
467
|
+
HARNESS_AGENT_ID: agent.name,
|
|
468
|
+
});
|
|
447
469
|
|
|
448
470
|
let tmpPromptDir: string | null = null;
|
|
449
471
|
let tmpPromptPath: string | null = null;
|
|
@@ -856,6 +878,7 @@ export function createSubagentsExtension(
|
|
|
856
878
|
makeDetails("chain"),
|
|
857
879
|
packageRoot,
|
|
858
880
|
await resolveSpawnAuth(step.agent),
|
|
881
|
+
options,
|
|
859
882
|
);
|
|
860
883
|
results.push(result);
|
|
861
884
|
|
|
@@ -950,6 +973,7 @@ export function createSubagentsExtension(
|
|
|
950
973
|
makeDetails("parallel"),
|
|
951
974
|
packageRoot,
|
|
952
975
|
await resolveSpawnAuth(t.agent),
|
|
976
|
+
options,
|
|
953
977
|
);
|
|
954
978
|
allResults[index] = result;
|
|
955
979
|
doneCount += 1;
|
|
@@ -987,6 +1011,7 @@ export function createSubagentsExtension(
|
|
|
987
1011
|
makeDetails("parallel"),
|
|
988
1012
|
packageRoot,
|
|
989
1013
|
await resolveSpawnAuth(aggregator.agent),
|
|
1014
|
+
options,
|
|
990
1015
|
);
|
|
991
1016
|
}
|
|
992
1017
|
|
|
@@ -1038,6 +1063,7 @@ export function createSubagentsExtension(
|
|
|
1038
1063
|
makeDetails("single"),
|
|
1039
1064
|
packageRoot,
|
|
1040
1065
|
await resolveSpawnAuth(params.agent),
|
|
1066
|
+
options,
|
|
1041
1067
|
);
|
|
1042
1068
|
const isError = result.exitCode !== 0 || result.stopReason === "error" || result.stopReason === "aborted";
|
|
1043
1069
|
if (isError) {
|