ultimate-pi 0.15.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-governor/SKILL.md +11 -0
- package/.agents/skills/harness-orchestration/SKILL.md +3 -1
- package/.agents/skills/harness-plan/SKILL.md +5 -5
- package/.pi/agents/harness/adversary.md +1 -1
- package/.pi/agents/harness/evaluator.md +1 -1
- package/.pi/agents/harness/executor.md +1 -1
- package/.pi/agents/harness/incident-recorder.md +1 -1
- package/.pi/agents/harness/meta-optimizer.md +1 -1
- package/.pi/agents/harness/planning/decompose.md +4 -33
- package/.pi/agents/harness/planning/execution-plan-author.md +3 -2
- package/.pi/agents/harness/planning/hypothesis-validator.md +3 -2
- package/.pi/agents/harness/planning/hypothesis.md +4 -27
- package/.pi/agents/harness/planning/implementation-researcher.md +3 -2
- package/.pi/agents/harness/planning/plan-adversary.md +2 -3
- package/.pi/agents/harness/planning/plan-evaluator.md +3 -2
- package/.pi/agents/harness/planning/review-integrator.md +2 -3
- package/.pi/agents/harness/planning/scout-graphify.md +3 -22
- package/.pi/agents/harness/planning/scout-semantic.md +3 -18
- package/.pi/agents/harness/planning/scout-structure.md +3 -18
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +3 -2
- package/.pi/agents/harness/planning/stack-researcher.md +3 -2
- package/.pi/agents/harness/tie-breaker.md +1 -1
- package/.pi/agents/harness/trace-librarian.md +1 -1
- package/.pi/extensions/budget-guard.ts +33 -19
- package/.pi/extensions/harness-debate-tools.ts +54 -6
- package/.pi/extensions/harness-run-context.ts +108 -2
- package/.pi/extensions/harness-subagent-submit.ts +172 -0
- package/.pi/extensions/harness-telemetry.ts +29 -4
- package/.pi/extensions/lib/debate-bus-core.ts +49 -6
- package/.pi/extensions/lib/harness-subagent-auth.ts +104 -19
- package/.pi/extensions/lib/harness-subagent-policy.ts +59 -0
- package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +82 -0
- package/.pi/extensions/lib/harness-subagent-submit-registry.ts +172 -0
- package/.pi/extensions/lib/harness-subagents-bridge.ts +127 -0
- package/.pi/extensions/lib/plan-debate-eligibility.ts +61 -8
- package/.pi/extensions/lib/plan-debate-focus.ts +21 -9
- package/.pi/extensions/lib/plan-debate-gate.ts +92 -18
- package/.pi/extensions/lib/plan-debate-lane.ts +15 -0
- package/.pi/extensions/lib/plan-debate-lanes.ts +27 -3
- package/.pi/extensions/lib/plan-debate-round-status.ts +18 -7
- package/.pi/extensions/lib/plan-messenger.ts +4 -0
- package/.pi/extensions/lib/plan-review-gate.ts +51 -0
- package/.pi/extensions/trace-recorder.ts +1 -0
- package/.pi/harness/agents.manifest.json +22 -22
- package/.pi/harness/docs/adrs/0037-subagent-submit-tools.md +31 -0
- package/.pi/harness/docs/adrs/0038-budget-telemetry-only.md +23 -0
- package/.pi/harness/docs/adrs/README.md +2 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/review-round-consolidated.yaml +25 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-packet.yaml +196 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-review.md +14 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/research-brief.yaml +62 -0
- package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +40 -17
- package/.pi/harness/specs/harness-executor-handoff.schema.json +19 -0
- package/.pi/harness/specs/harness-human-required.schema.json +16 -0
- package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
- package/.pi/harness/specs/plan-scout-findings.schema.json +19 -0
- package/.pi/lib/harness-agent-output.ts +45 -0
- package/.pi/lib/harness-budget-enforce.ts +18 -0
- package/.pi/lib/harness-schema-validate.ts +89 -0
- package/.pi/lib/harness-spawn-parse.ts +86 -0
- package/.pi/lib/harness-subagent-submit-path.ts +41 -0
- package/.pi/lib/harness-ui-state.ts +15 -2
- package/.pi/model-router.example.json +13 -4
- package/.pi/prompts/harness-auto.md +2 -2
- package/.pi/prompts/harness-plan.md +34 -14
- package/.pi/prompts/harness-run.md +2 -2
- package/.pi/prompts/harness-setup.md +4 -4
- package/.pi/scripts/harness-generate-model-router.mjs +118 -36
- package/.pi/scripts/harness-model-router-routing.test.mjs +97 -0
- package/.pi/scripts/harness-sync-model-router.mjs +15 -2
- package/.pi/scripts/harness-verify.mjs +31 -0
- package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
- package/CHANGELOG.md +21 -0
- package/package.json +4 -2
- package/vendor/pi-model-router/UPSTREAM_PIN.md +3 -1
- package/vendor/pi-model-router/extensions/commands.ts +4 -4
- package/vendor/pi-model-router/extensions/index.ts +21 -0
- package/vendor/pi-model-router/extensions/provider.ts +130 -79
- package/vendor/pi-model-router/extensions/routing.ts +148 -0
- package/vendor/pi-model-router/extensions/state.ts +3 -0
- package/vendor/pi-model-router/extensions/types.ts +9 -0
- package/vendor/pi-model-router/extensions/ui.ts +16 -2
- package/vendor/pi-subagents/src/subagents.ts +29 -3
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: PM-grade harness plan — scouts, implementation research, ExecutionPlan, DAG validation, selective Review Gate debate, approval.
|
|
3
|
-
argument-hint: "\"<task>\" [--risk low|med|high] [--
|
|
3
|
+
argument-hint: "\"<task>\" [--risk low|med|high] [--quick]"
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# harness-plan
|
|
7
7
|
|
|
8
8
|
You are the **planning PM** for this harness run. Produce an execution baseline (`plan-packet.yaml` + `plan-review.md`), not strategy theater. Parent owns `ask_user`, `approve_plan`, `create_plan`, debate bus commands, and YAML writes under `.pi/harness/runs/<run_id>/`.
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
Subagents persist artifacts via scoped **`submit_*`** tools (deterministic YAML under the run dir). Parent uses **`harness_artifact_ready`** to gate phases (no JSON parsing). Parent merges still use **`write_harness_yaml`** for `research-brief.yaml`, `plan-packet.yaml` shell, and integrator patches only.
|
|
11
11
|
|
|
12
12
|
## Allowed subagents
|
|
13
13
|
|
|
@@ -33,12 +33,12 @@ Read **harness-debate-plan** skill before Review Gate rounds.
|
|
|
33
33
|
2. Each `subagent` call blocks until subprocesses finish — batch parallel scouts in one `tasks` array.
|
|
34
34
|
3. Do **not** set `timeoutMs` unless the user explicitly requests a cap — subagents run until natural completion (optional backstop: `PI_SUBAGENT_TIMEOUT_MS`).
|
|
35
35
|
4. No harness subagent spawn cap — run the full scout + research + debate pipeline without skipping lanes for budget.
|
|
36
|
-
5. Compact task text: embed `HarnessSpawnContext` JSON
|
|
36
|
+
5. Compact task text: embed spawn context + lane instructions. Prefer `HarnessSpawnContext={"run_id":"…","plan_packet_path":"…",…}` or a JSON object with `"HarnessSpawnContext":{…}` — both parse; `run_id` is required so subprocess submit tools get `HARNESS_RUN_ID`.
|
|
37
37
|
|
|
38
38
|
## Step 0 — Parse `$ARGUMENTS`
|
|
39
39
|
|
|
40
40
|
- task (required)
|
|
41
|
-
- `--risk low|med|high`, `--
|
|
41
|
+
- `--risk low|med|high`, `--quick` (`--budget` is reserved/no-op; token budgets are telemetry-only unless `HARNESS_BUDGET_ENFORCE=1`)
|
|
42
42
|
|
|
43
43
|
`--quick` skips **scout-semantic** and post-run adversary only — **never** skip graphify, structure, decompose, hypothesis, **Phase 3.5 implementation research**, stack research, execution plan, DAG validation, or **Review Gate debate**.
|
|
44
44
|
|
|
@@ -64,9 +64,11 @@ Do **not** run `ccc index` or `ccc search --refresh`. The harness runs increment
|
|
|
64
64
|
|
|
65
65
|
Add `harness/planning/scout-semantic` to `tasks` unless `--quick`. Require graphify + structure success. Semantic lane uses `ccc search` only (see `scout-semantic` agent).
|
|
66
66
|
|
|
67
|
+
After scouts: `harness_artifact_ready({ paths: ["artifacts/scout-graphify.yaml", "artifacts/scout-structure.yaml", ...] })`.
|
|
68
|
+
|
|
67
69
|
## Phase 2 & 3 — Decompose + hypothesis (parallel)
|
|
68
70
|
|
|
69
|
-
One `subagent` call with `tasks` for `harness/planning/decompose` and `harness/planning/hypothesis
|
|
71
|
+
One `subagent` call with `tasks` for `harness/planning/decompose` and `harness/planning/hypothesis` (include scout YAML paths in task text). Gate with `harness_artifact_ready` on `artifacts/decomposition.yaml` and `artifacts/hypothesis.yaml`.
|
|
70
72
|
|
|
71
73
|
Decompose **prior_art** is **internal only** (from scouts). External prior art arrives in Phase 3.5.
|
|
72
74
|
|
|
@@ -84,8 +86,8 @@ Decompose **prior_art** is **internal only** (from scouts). External prior art a
|
|
|
84
86
|
}
|
|
85
87
|
```
|
|
86
88
|
|
|
87
|
-
- `
|
|
88
|
-
- Merge both into `research-brief.yaml` (`implementation:` + `stack:`)
|
|
89
|
+
- Subagents write via `submit_implementation_research` / `submit_stack_brief`; gate with `harness_artifact_ready` on both paths.
|
|
90
|
+
- Merge both into `research-brief.yaml` (`implementation:` + `stack:`) via parent `write_harness_yaml`.
|
|
89
91
|
- **Partial failure:** if one lane fails, re-spawn that lane once; if still failing set `plan_status: partial` and `human_required` via `ask_user`. Do not proceed to Phase 4b without both artifacts or explicit human waiver.
|
|
90
92
|
- **Web dedup:** implementation owns patterns/repos; stack owns libraries/versions — no overlapping queries.
|
|
91
93
|
|
|
@@ -136,11 +138,16 @@ harness_debate_open({ debate_profile, required_focuses })
|
|
|
136
138
|
|
|
137
139
|
Profiles:
|
|
138
140
|
|
|
139
|
-
| Profile | Focuses required | min_focus_rounds |
|
|
140
|
-
|
|
141
|
-
| full | spec, wbs, schedule, quality | 4 |
|
|
142
|
-
| standard | all four | 4 |
|
|
143
|
-
| light | spec, quality only | 2 |
|
|
141
|
+
| Profile | Review gate | Focuses required | min_focus_rounds |
|
|
142
|
+
|---------|-------------|------------------|------------------|
|
|
143
|
+
| full | threaded (4 rounds) | spec, wbs, schedule, quality | 4 |
|
|
144
|
+
| standard | threaded (4 rounds) | all four | 4 |
|
|
145
|
+
| light | threaded (2 rounds) | spec, quality only | 2 |
|
|
146
|
+
| fast | **consolidated** (1 round) | spec, quality | 1 |
|
|
147
|
+
|
|
148
|
+
Med/low non-fork plans with clear stack and no implementation `open_questions` default to **fast** (consolidated). Escalate to threaded rounds only when integrator sets `review_gate_ready: false` or records blockers.
|
|
149
|
+
|
|
150
|
+
`--quick`: skip scout-semantic; cap web research (≤2 searches, ≤3 fetches); prefer **fast** eligibility when DAG passes; use consolidated Review Gate when profile is fast.
|
|
144
151
|
|
|
145
152
|
## Phase 5 — Review Gate debate (profile-aware, pi-messenger, even with `--quick`)
|
|
146
153
|
|
|
@@ -151,13 +158,26 @@ Profiles:
|
|
|
151
158
|
|
|
152
159
|
### Focus coverage (required before consensus)
|
|
153
160
|
|
|
154
|
-
Each required focus must appear in
|
|
161
|
+
Each required focus must appear in submitted review artifacts (`review-round-rN.yaml` or `review-round-consolidated.yaml` with `debate_round_focus: all`). Monotonic `round_index` (cap from profile). Consensus only when:
|
|
155
162
|
|
|
156
163
|
- all **required** focuses covered, **and**
|
|
157
164
|
- last round `review_gate_ready: true`, **and**
|
|
158
165
|
- `validate-plan-dag.mjs` still passes (re-run after patches).
|
|
159
166
|
|
|
160
|
-
###
|
|
167
|
+
### Consolidated state machine (`review_gate_mode: consolidated`, profile fast)
|
|
168
|
+
|
|
169
|
+
```
|
|
170
|
+
round_index := 1
|
|
171
|
+
debate_round_focus := all
|
|
172
|
+
spawn hypothesis-validator (blind)
|
|
173
|
+
WHILE NOT ready_for_integrator (harness_debate_round_status round_index=1):
|
|
174
|
+
follow next_tool (validation-turn, adversary-brief, sprint-audit in parallel-friendly order; one subagent per batch)
|
|
175
|
+
spawn review-integrator → write artifacts/review-round-consolidated.yaml → harness_debate_submit_round
|
|
176
|
+
IF review_gate_ready false OR blockers: escalate — threaded round per missing focus (spec/wbs/schedule/quality)
|
|
177
|
+
harness_debate_focus_coverage → harness_debate_consensus
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
### Threaded state machine (standard/full/light)
|
|
161
181
|
|
|
162
182
|
```
|
|
163
183
|
round_index := next uncovered required focus
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Execute only against an approved PlanPacket with strict phase gates.
|
|
3
|
-
argument-hint: "
|
|
3
|
+
argument-hint: ""
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# harness-run
|
|
@@ -9,7 +9,7 @@ Orchestrator only — spawn `harness/executor`. Do **not** implement inline.
|
|
|
9
9
|
|
|
10
10
|
## Step 0 — Parse arguments
|
|
11
11
|
|
|
12
|
-
-
|
|
12
|
+
- `--budget` is reserved/no-op (telemetry-only budgets by default)
|
|
13
13
|
- Do **not** use `--plan` on happy path — load from `[HarnessActivePlan]` / `plan_packet_path`.
|
|
14
14
|
|
|
15
15
|
If plan not ready:
|
|
@@ -327,7 +327,7 @@ sentrux plugin add-standard 2>/dev/null || echo "Plugins already installed or fa
|
|
|
327
327
|
|
|
328
328
|
## Step 3 — Pi Extension Packages
|
|
329
329
|
|
|
330
|
-
Bundled extensions load from the installed `ultimate-pi` package. **
|
|
330
|
+
Bundled extensions load from the installed `ultimate-pi` package. **Session-locked model routing** comes from a **vendored** fork of [`yeliu84/pi-model-router`](https://github.com/yeliu84/pi-model-router) in `vendor/pi-model-router/`, wired through [`.pi/extensions/pi-model-router-harness.ts`](.pi/extensions/pi-model-router-harness.ts). The router picks **one concrete model** when the session starts (from the first user prompt + system prompt complexity), then changes **thinking level only** each turn. The harness **gates** activation on `.pi/model-router.json` (Step **3.5** below) so `router/auto` cannot load prematurely. Attribution: see [THIRD_PARTY_NOTICES.md](THIRD_PARTY_NOTICES.md) and `vendor/pi-model-router/UPSTREAM_PIN.md`. Maintainer refresh: `npm run vendor:sync-router`.
|
|
331
331
|
|
|
332
332
|
Optionally install the companion lockfile used in development:
|
|
333
333
|
|
|
@@ -381,9 +381,9 @@ If generation prints "No authenticated Pi providers": warn in report — user sh
|
|
|
381
381
|
|
|
382
382
|
Do NOT block setup. If no config is written, `harness-sync-model-router.mjs` clears a premature `defaultProvider: "router"` in `.pi/settings.json`.
|
|
383
383
|
|
|
384
|
-
**Router onboarding** — The vendored extension starts only after `.pi/model-router.json` appears. Running the script above prepares that file plus optional Pi defaults (**`router` / `auto
|
|
384
|
+
**Router onboarding** — The vendored extension starts only after `.pi/model-router.json` appears. Running the script above prepares that file plus optional Pi defaults (**`router` / `auto`**, or whatever `defaultProfile` is) via `harness-sync-model-router.mjs` when `defaultProvider` was unset—then **`/reload`**. Generated profiles use **one model SKU per profile**; high/medium/low tiers differ in **thinking** only. Subagents resolve their subprocess model from the **agent system prompt** complexity (same lock rules).
|
|
385
385
|
|
|
386
|
-
Manual override: **`/router profile auto`** anytime after reload if they changed defaults.
|
|
386
|
+
Manual override: **`/router profile auto`** or **`/router profile opencode-go`** anytime after reload if they changed defaults.
|
|
387
387
|
|
|
388
388
|
## Step 3.6 — Harness agents (package-resolved)
|
|
389
389
|
|
|
@@ -677,7 +677,7 @@ Output summary table:
|
|
|
677
677
|
| sentrux | ✓/✗ | CLI + plugins; rules via Step 4.2 bootstrap |
|
|
678
678
|
| Sentrux rules.toml | ✓/✗ | `.sentrux/rules.toml` synced from manifest |
|
|
679
679
|
| pi extensions | ✓/✗ | 4 packages |
|
|
680
|
-
| model router | ✓/✗ | Package + config verified, activation via `/router profile auto` |
|
|
680
|
+
| model router | ✓/✗ | Package + config verified, activation via `/router profile auto` (or `opencode-go`) |
|
|
681
681
|
| `.env` | ✓/✗/ask | Created / keys appended / user declined |
|
|
682
682
|
|
|
683
683
|
| .gitignore | ✓/✗ | entries added (incl. `.env`) |
|
|
@@ -22,9 +22,9 @@ const UP_PKG = join(SCRIPT_DIR, "..", "..");
|
|
|
22
22
|
const OUT_PATH = join(process.cwd(), ".pi", "model-router.json");
|
|
23
23
|
|
|
24
24
|
const PROVIDER_PRIORITY = [
|
|
25
|
+
"openai",
|
|
25
26
|
"opencode-go",
|
|
26
27
|
"anthropic",
|
|
27
|
-
"openai",
|
|
28
28
|
"google",
|
|
29
29
|
"openrouter",
|
|
30
30
|
"groq",
|
|
@@ -35,6 +35,7 @@ const PROVIDER_PRIORITY = [
|
|
|
35
35
|
/** Substring hints per tier (first match in available ids wins). */
|
|
36
36
|
const TIER_HINTS = {
|
|
37
37
|
high: [
|
|
38
|
+
"gpt-5.5-pro",
|
|
38
39
|
"deepseek-v4-pro",
|
|
39
40
|
"gpt-5.4-pro",
|
|
40
41
|
"claude-opus",
|
|
@@ -43,6 +44,7 @@ const TIER_HINTS = {
|
|
|
43
44
|
"pro",
|
|
44
45
|
],
|
|
45
46
|
medium: [
|
|
47
|
+
"gpt-5.5",
|
|
46
48
|
"qwen3.6-plus",
|
|
47
49
|
"kimi-k2.6",
|
|
48
50
|
"gpt-5.4",
|
|
@@ -98,7 +100,10 @@ function canonicalRef(provider, modelId) {
|
|
|
98
100
|
|
|
99
101
|
function pickTierModel(models, tier) {
|
|
100
102
|
const hints = TIER_HINTS[tier];
|
|
101
|
-
const
|
|
103
|
+
for (const hint of hints) {
|
|
104
|
+
const exact = models.find((m) => m.id === hint);
|
|
105
|
+
if (exact) return canonicalRef(exact.provider, exact.id);
|
|
106
|
+
}
|
|
102
107
|
for (const hint of hints) {
|
|
103
108
|
const match = models.find((m) => m.id.includes(hint));
|
|
104
109
|
if (match) return canonicalRef(match.provider, match.id);
|
|
@@ -114,6 +119,10 @@ function pickTierModel(models, tier) {
|
|
|
114
119
|
return canonicalRef(models[0].provider, models[0].id);
|
|
115
120
|
}
|
|
116
121
|
|
|
122
|
+
function modelsForProvider(available, provider) {
|
|
123
|
+
return available.filter((m) => m.provider === provider);
|
|
124
|
+
}
|
|
125
|
+
|
|
117
126
|
function choosePrimaryProvider(available) {
|
|
118
127
|
const byProvider = new Map();
|
|
119
128
|
for (const m of available) {
|
|
@@ -129,7 +138,7 @@ function choosePrimaryProvider(available) {
|
|
|
129
138
|
|
|
130
139
|
function buildFallbacks(available, primaryProvider, highModel) {
|
|
131
140
|
const fallbacks = [];
|
|
132
|
-
for (const p of ["anthropic", "google", "openai"]) {
|
|
141
|
+
for (const p of ["anthropic", "google", "openai", "opencode-go"]) {
|
|
133
142
|
if (p === primaryProvider) continue;
|
|
134
143
|
const alt = available.filter((m) => m.provider === p);
|
|
135
144
|
if (alt.length === 0) continue;
|
|
@@ -139,6 +148,76 @@ function buildFallbacks(available, primaryProvider, highModel) {
|
|
|
139
148
|
return fallbacks.slice(0, 3);
|
|
140
149
|
}
|
|
141
150
|
|
|
151
|
+
/** Session-locked router: one model SKU per profile; tiers vary thinking only. */
|
|
152
|
+
function buildRoutedProfile(available, provider) {
|
|
153
|
+
const models = modelsForProvider(available, provider);
|
|
154
|
+
if (models.length === 0) return null;
|
|
155
|
+
const sku =
|
|
156
|
+
pickTierModel(models, "medium") ??
|
|
157
|
+
pickTierModel(models, "high") ??
|
|
158
|
+
pickTierModel(models, "low");
|
|
159
|
+
if (!sku) return null;
|
|
160
|
+
const fallbacks = buildFallbacks(available, provider, sku);
|
|
161
|
+
const high = { model: sku, thinking: "high" };
|
|
162
|
+
if (fallbacks.length) high.fallbacks = fallbacks;
|
|
163
|
+
return {
|
|
164
|
+
high,
|
|
165
|
+
medium: { model: sku, thinking: "medium" },
|
|
166
|
+
low: { model: sku, thinking: "low" },
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
function addCheapDeepProfiles(profiles, available, provider) {
|
|
171
|
+
const models = modelsForProvider(available, provider);
|
|
172
|
+
if (models.length === 0) return;
|
|
173
|
+
const sku =
|
|
174
|
+
pickTierModel(models, "medium") ??
|
|
175
|
+
pickTierModel(models, "high") ??
|
|
176
|
+
pickTierModel(models, "low");
|
|
177
|
+
if (!sku) return;
|
|
178
|
+
const fallbacks = buildFallbacks(available, provider, sku);
|
|
179
|
+
const deepHigh = { model: sku, thinking: "xhigh" };
|
|
180
|
+
if (fallbacks.length) deepHigh.fallbacks = fallbacks;
|
|
181
|
+
profiles.cheap = {
|
|
182
|
+
high: { model: sku, thinking: "low" },
|
|
183
|
+
medium: { model: sku, thinking: "off" },
|
|
184
|
+
low: { model: sku, thinking: "off" },
|
|
185
|
+
};
|
|
186
|
+
profiles.deep = {
|
|
187
|
+
high: deepHigh,
|
|
188
|
+
medium: { model: sku, thinking: "medium" },
|
|
189
|
+
low: { model: sku, thinking: "low" },
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
function resolveClassifierModel(available) {
|
|
194
|
+
const openaiModels = modelsForProvider(available, "openai");
|
|
195
|
+
if (openaiModels.length > 0) {
|
|
196
|
+
return (
|
|
197
|
+
pickTierModel(openaiModels, "low") ??
|
|
198
|
+
canonicalRef(openaiModels[openaiModels.length - 1].provider, openaiModels[openaiModels.length - 1].id)
|
|
199
|
+
);
|
|
200
|
+
}
|
|
201
|
+
const { models } = choosePrimaryProvider(available);
|
|
202
|
+
return pickTierModel(models, "medium");
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
/** OpenAI-backed default profile name exposed as `router/auto`. */
|
|
206
|
+
const OPENAI_PROFILE_NAME = "auto";
|
|
207
|
+
|
|
208
|
+
function routerProfileName(provider) {
|
|
209
|
+
return provider === "openai" ? OPENAI_PROFILE_NAME : provider;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
function resolveDefaultProfile(profiles) {
|
|
213
|
+
if (profiles[OPENAI_PROFILE_NAME]) return OPENAI_PROFILE_NAME;
|
|
214
|
+
if (profiles["opencode-go"]) return "opencode-go";
|
|
215
|
+
return (
|
|
216
|
+
Object.keys(profiles).find((name) => name !== "cheap" && name !== "deep") ??
|
|
217
|
+
OPENAI_PROFILE_NAME
|
|
218
|
+
);
|
|
219
|
+
}
|
|
220
|
+
|
|
142
221
|
async function main() {
|
|
143
222
|
const force = process.argv.includes("--force");
|
|
144
223
|
const dryRun = process.argv.includes("--dry-run");
|
|
@@ -171,23 +250,37 @@ async function main() {
|
|
|
171
250
|
process.exit(0);
|
|
172
251
|
}
|
|
173
252
|
|
|
174
|
-
const
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
const lowModel = pickTierModel(primaryModels, "low");
|
|
253
|
+
const profiles = {};
|
|
254
|
+
for (const provider of ["openai", "opencode-go"]) {
|
|
255
|
+
const profile = buildRoutedProfile(available, provider);
|
|
256
|
+
if (profile) profiles[routerProfileName(provider)] = profile;
|
|
257
|
+
}
|
|
180
258
|
|
|
181
|
-
if (
|
|
182
|
-
|
|
259
|
+
if (Object.keys(profiles).length === 0) {
|
|
260
|
+
const { provider: primaryProvider, models: primaryModels } =
|
|
261
|
+
choosePrimaryProvider(available);
|
|
262
|
+
const profile = buildRoutedProfile(available, primaryProvider);
|
|
263
|
+
if (!profile) {
|
|
264
|
+
fail("could not assign tier models from available registry");
|
|
265
|
+
}
|
|
266
|
+
profiles[primaryProvider] = profile;
|
|
183
267
|
}
|
|
184
268
|
|
|
185
|
-
const
|
|
269
|
+
const cheapDeepSource = profiles["opencode-go"]
|
|
270
|
+
? "opencode-go"
|
|
271
|
+
: resolveDefaultProfile(profiles);
|
|
272
|
+
addCheapDeepProfiles(profiles, available, cheapDeepSource);
|
|
273
|
+
|
|
274
|
+
const defaultProfile = resolveDefaultProfile(profiles);
|
|
275
|
+
const classifierModel = resolveClassifierModel(available);
|
|
276
|
+
if (!classifierModel) {
|
|
277
|
+
fail("could not assign classifier model from available registry");
|
|
278
|
+
}
|
|
186
279
|
|
|
187
280
|
const config = {
|
|
188
|
-
defaultProfile
|
|
281
|
+
defaultProfile,
|
|
189
282
|
debug: false,
|
|
190
|
-
classifierModel
|
|
283
|
+
classifierModel,
|
|
191
284
|
phaseBias: 0.5,
|
|
192
285
|
maxSessionBudget: 1.0,
|
|
193
286
|
largeContextThreshold: 100000,
|
|
@@ -199,27 +292,13 @@ async function main() {
|
|
|
199
292
|
},
|
|
200
293
|
{ matches: "changelog", tier: "low" },
|
|
201
294
|
],
|
|
202
|
-
profiles
|
|
203
|
-
auto: {
|
|
204
|
-
high: { model: highModel, thinking: "high", fallbacks },
|
|
205
|
-
medium: { model: mediumModel, thinking: "medium" },
|
|
206
|
-
low: { model: lowModel, thinking: "low" },
|
|
207
|
-
},
|
|
208
|
-
cheap: {
|
|
209
|
-
high: { model: mediumModel, thinking: "low" },
|
|
210
|
-
medium: { model: lowModel, thinking: "off" },
|
|
211
|
-
low: { model: lowModel, thinking: "off" },
|
|
212
|
-
},
|
|
213
|
-
deep: {
|
|
214
|
-
high: { model: highModel, thinking: "xhigh", fallbacks },
|
|
215
|
-
medium: { model: mediumModel, thinking: "medium" },
|
|
216
|
-
low: { model: lowModel, thinking: "low" },
|
|
217
|
-
},
|
|
218
|
-
},
|
|
295
|
+
profiles,
|
|
219
296
|
};
|
|
220
297
|
|
|
221
298
|
const json = `${JSON.stringify(config, null, 2)}\n`;
|
|
222
299
|
const providerSet = [...new Set(available.map((m) => m.provider))].sort();
|
|
300
|
+
const autoProfile = profiles[OPENAI_PROFILE_NAME];
|
|
301
|
+
const opencodeProfile = profiles["opencode-go"];
|
|
223
302
|
|
|
224
303
|
if (dryRun) {
|
|
225
304
|
process.stdout.write(json);
|
|
@@ -230,13 +309,16 @@ async function main() {
|
|
|
230
309
|
writeFileSync(OUT_PATH, json, "utf8");
|
|
231
310
|
|
|
232
311
|
console.log("✓ Generated .pi/model-router.json from Pi authenticated providers:");
|
|
233
|
-
console.log(`
|
|
312
|
+
console.log(` Default profile: ${defaultProfile}`);
|
|
313
|
+
console.log(` Classifier: ${classifierModel}`);
|
|
234
314
|
console.log(` Authenticated providers: ${providerSet.join(", ")}`);
|
|
235
315
|
console.log(` Available models: ${available.length}`);
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
if (
|
|
316
|
+
if (autoProfile) {
|
|
317
|
+
console.log(` auto (openai) high: ${autoProfile.high.model}`);
|
|
318
|
+
}
|
|
319
|
+
if (opencodeProfile) {
|
|
320
|
+
console.log(` opencode-go high: ${opencodeProfile.high.model}`);
|
|
321
|
+
}
|
|
240
322
|
}
|
|
241
323
|
|
|
242
324
|
main().catch((err) => {
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Unit tests for session-locked pi-model-router routing (no LLM).
|
|
4
|
+
* Run: npx tsx .pi/scripts/harness-model-router-routing.test.mjs
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import assert from "node:assert/strict";
|
|
8
|
+
import { readFileSync } from "node:fs";
|
|
9
|
+
import { join, dirname } from "node:path";
|
|
10
|
+
import { fileURLToPath } from "node:url";
|
|
11
|
+
import {
|
|
12
|
+
decideSessionLock,
|
|
13
|
+
applyThinkingToDecision,
|
|
14
|
+
buildRoutingDecision,
|
|
15
|
+
decideRouting,
|
|
16
|
+
} from "../../vendor/pi-model-router/extensions/routing.js";
|
|
17
|
+
|
|
18
|
+
const ROOT = join(dirname(fileURLToPath(import.meta.url)), "..", "..");
|
|
19
|
+
|
|
20
|
+
const sampleProfile = {
|
|
21
|
+
high: { model: "openai/gpt-5.5", thinking: "high" },
|
|
22
|
+
medium: { model: "openai/gpt-5.5", thinking: "medium" },
|
|
23
|
+
low: { model: "openai/gpt-5.5", thinking: "low" },
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
const planningContext = {
|
|
27
|
+
systemPrompt: "You are a harness architect. Design tradeoffs and migration strategy.",
|
|
28
|
+
messages: [
|
|
29
|
+
{
|
|
30
|
+
role: "user",
|
|
31
|
+
content:
|
|
32
|
+
"Plan a multi-phase refactor across modules with architecture review.",
|
|
33
|
+
timestamp: 1,
|
|
34
|
+
},
|
|
35
|
+
],
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
const shortContext = {
|
|
39
|
+
systemPrompt: "Summarize briefly.",
|
|
40
|
+
messages: [{ role: "user", content: "changelog", timestamp: 1 }],
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
const lockHigh = decideSessionLock(
|
|
44
|
+
planningContext,
|
|
45
|
+
"auto",
|
|
46
|
+
sampleProfile,
|
|
47
|
+
undefined,
|
|
48
|
+
undefined,
|
|
49
|
+
0.5,
|
|
50
|
+
[{ matches: "changelog", tier: "low" }],
|
|
51
|
+
);
|
|
52
|
+
assert.equal(lockHigh.tier, "high", "planning prompt locks high tier");
|
|
53
|
+
|
|
54
|
+
const lockLow = decideSessionLock(shortContext, "auto", sampleProfile);
|
|
55
|
+
assert.equal(lockLow.tier, "low", "short summary locks low tier");
|
|
56
|
+
|
|
57
|
+
const locked = buildRoutingDecision(
|
|
58
|
+
"auto",
|
|
59
|
+
sampleProfile,
|
|
60
|
+
lockHigh.tier,
|
|
61
|
+
"planning",
|
|
62
|
+
lockHigh.reasoning,
|
|
63
|
+
);
|
|
64
|
+
const thinkingTurn = decideRouting(
|
|
65
|
+
{
|
|
66
|
+
...planningContext,
|
|
67
|
+
messages: [
|
|
68
|
+
...planningContext.messages,
|
|
69
|
+
{ role: "user", content: "changelog only", timestamp: 2 },
|
|
70
|
+
],
|
|
71
|
+
},
|
|
72
|
+
"auto",
|
|
73
|
+
sampleProfile,
|
|
74
|
+
locked,
|
|
75
|
+
);
|
|
76
|
+
const merged = applyThinkingToDecision(locked, thinkingTurn, sampleProfile);
|
|
77
|
+
assert.equal(merged.targetLabel, locked.targetLabel, "model stays locked");
|
|
78
|
+
assert.equal(merged.tier, thinkingTurn.tier, "thinking tier follows turn");
|
|
79
|
+
assert.equal(merged.thinking, "low", "low thinking from turn tier config");
|
|
80
|
+
|
|
81
|
+
const examplePath = join(ROOT, ".pi", "model-router.example.json");
|
|
82
|
+
const example = JSON.parse(readFileSync(examplePath, "utf8"));
|
|
83
|
+
for (const [name, profile] of Object.entries(example.profiles ?? {})) {
|
|
84
|
+
const { high, medium, low } = profile;
|
|
85
|
+
assert.equal(
|
|
86
|
+
high.model,
|
|
87
|
+
medium.model,
|
|
88
|
+
`example profile ${name}: medium/high same model`,
|
|
89
|
+
);
|
|
90
|
+
assert.equal(
|
|
91
|
+
medium.model,
|
|
92
|
+
low.model,
|
|
93
|
+
`example profile ${name}: low/medium same model`,
|
|
94
|
+
);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
console.log("harness-model-router-routing.test: PASS");
|
|
@@ -29,11 +29,24 @@ function saveSettings(settingsPath, data) {
|
|
|
29
29
|
);
|
|
30
30
|
}
|
|
31
31
|
|
|
32
|
+
function readDefaultRouterProfile(configPath) {
|
|
33
|
+
if (!existsSync(configPath)) return "auto";
|
|
34
|
+
try {
|
|
35
|
+
const data = JSON.parse(readFileSync(configPath, "utf8"));
|
|
36
|
+
const profile =
|
|
37
|
+
typeof data.defaultProfile === "string" ? data.defaultProfile.trim() : "";
|
|
38
|
+
return profile || "auto";
|
|
39
|
+
} catch {
|
|
40
|
+
return "auto";
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
32
44
|
function main() {
|
|
33
45
|
const root = process.cwd();
|
|
34
46
|
const configPath = join(root, ".pi", "model-router.json");
|
|
35
47
|
const settingsPath = join(root, ".pi", "settings.json");
|
|
36
48
|
const hasConfig = existsSync(configPath);
|
|
49
|
+
const defaultRouterProfile = readDefaultRouterProfile(configPath);
|
|
37
50
|
|
|
38
51
|
const settings = loadSettings(settingsPath);
|
|
39
52
|
if (!settings) {
|
|
@@ -67,14 +80,14 @@ function main() {
|
|
|
67
80
|
|
|
68
81
|
if (noProjectDefault) {
|
|
69
82
|
settings.defaultProvider = "router";
|
|
70
|
-
settings.defaultModel =
|
|
83
|
+
settings.defaultModel = defaultRouterProfile;
|
|
71
84
|
changed = true;
|
|
72
85
|
}
|
|
73
86
|
|
|
74
87
|
if (changed) {
|
|
75
88
|
saveSettings(settingsPath, settings);
|
|
76
89
|
console.log(
|
|
77
|
-
|
|
90
|
+
`✓ Router defaults set (\`router\` / \`${defaultRouterProfile}\`) — run /reload in pi when ready`,
|
|
78
91
|
);
|
|
79
92
|
} else {
|
|
80
93
|
console.log("[harness-model-router] Defaults unchanged (user set defaultProvider)");
|
|
@@ -37,6 +37,8 @@ const REQUIRED_ADRS = [
|
|
|
37
37
|
"0009-sentrux-rules-lifecycle.md",
|
|
38
38
|
"0031-harness-run-context.md",
|
|
39
39
|
"0032-harness-command-orchestration.md",
|
|
40
|
+
"0037-subagent-submit-tools.md",
|
|
41
|
+
"0038-budget-telemetry-only.md",
|
|
40
42
|
];
|
|
41
43
|
|
|
42
44
|
const REQUIRED_EXTENSIONS = [
|
|
@@ -143,6 +145,34 @@ async function checkSentruxRules() {
|
|
|
143
145
|
ok(".sentrux/rules.toml present");
|
|
144
146
|
}
|
|
145
147
|
|
|
148
|
+
async function checkModelRouterThinkingOnly() {
|
|
149
|
+
const path = join(ROOT, ".pi", "model-router.json");
|
|
150
|
+
if (!(await fileExists(path))) {
|
|
151
|
+
ok("model-router.json absent (skip thinking-only tier check)");
|
|
152
|
+
return;
|
|
153
|
+
}
|
|
154
|
+
let raw;
|
|
155
|
+
try {
|
|
156
|
+
raw = JSON.parse(await readFile(path, "utf-8"));
|
|
157
|
+
} catch {
|
|
158
|
+
fail("invalid .pi/model-router.json");
|
|
159
|
+
}
|
|
160
|
+
const profiles = raw.profiles ?? {};
|
|
161
|
+
for (const [name, profile] of Object.entries(profiles)) {
|
|
162
|
+
const high = profile?.high?.model;
|
|
163
|
+
const medium = profile?.medium?.model;
|
|
164
|
+
const low = profile?.low?.model;
|
|
165
|
+
if (
|
|
166
|
+
!(high && medium && low && high === medium && medium === low)
|
|
167
|
+
) {
|
|
168
|
+
fail(
|
|
169
|
+
`model-router profile "${name}" must use the same model on high/medium/low (thinking-only tiers)`,
|
|
170
|
+
);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
ok("model-router.json thinking-only (same model per profile)");
|
|
174
|
+
}
|
|
175
|
+
|
|
146
176
|
async function checkSentruxGate() {
|
|
147
177
|
await checkSentruxRules();
|
|
148
178
|
|
|
@@ -286,6 +316,7 @@ async function main() {
|
|
|
286
316
|
ok("test-diff-golden.json");
|
|
287
317
|
|
|
288
318
|
await checkSentruxGate();
|
|
319
|
+
await checkModelRouterThinkingOnly();
|
|
289
320
|
|
|
290
321
|
if (!(await fileExists(AGENTS_MANIFEST))) {
|
|
291
322
|
fail(
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,27 @@ All notable changes to this project are documented in this file.
|
|
|
4
4
|
|
|
5
5
|
## [Unreleased]
|
|
6
6
|
|
|
7
|
+
## [v0.17.0] — 2026-05-22
|
|
8
|
+
|
|
9
|
+
### ✨ Features
|
|
10
|
+
|
|
11
|
+
- **Model router:** Session-locked model SKU at start (initial prompt + system prompt); per-turn routing adjusts thinking tier only; subagents lock from agent `systemPrompt` complexity.
|
|
12
|
+
- **Harness:** Thinking-only profile shape in generator/verify; plan review gate, debate eligibility, and smoke fixture updates.
|
|
13
|
+
|
|
14
|
+
### ✅ Tests
|
|
15
|
+
|
|
16
|
+
- Add `harness-model-router-routing` and plan-debate eligibility coverage.
|
|
17
|
+
|
|
18
|
+
## [v0.16.0] — 2026-05-19
|
|
19
|
+
|
|
20
|
+
### ✨ Features
|
|
21
|
+
|
|
22
|
+
- add submit pipeline and planning/debate updates
|
|
23
|
+
|
|
24
|
+
### 🔧 Chores
|
|
25
|
+
|
|
26
|
+
- refresh graph artifacts after harness updates
|
|
27
|
+
|
|
7
28
|
## [v0.15.0] — 2026-05-19
|
|
8
29
|
|
|
9
30
|
### ✨ Features
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ultimate-pi",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.17.0",
|
|
4
4
|
"description": "Ultimate AI coding harness for pi.dev — extensible skills, Obsidian wiki knowledge layer, compressed context, deterministic output",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|
|
@@ -84,7 +84,7 @@
|
|
|
84
84
|
"format": "biome format --write",
|
|
85
85
|
"format:check": "biome format",
|
|
86
86
|
"prepare": "lefthook install",
|
|
87
|
-
"test": "node --test test/harness-verify.test.mjs test/harness-ask-user.test.mjs test/harness-subagents-loader.test.mjs test/harness-subagent-precheck.test.mjs test/sentrux-rules-sync.test.mjs test/harness-budget-guard.test.mjs && node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture && npx -y tsx --test test/harness-vcc-settings.test.ts test/harness-live-widget-status.test.ts test/harness-plan-phase-policy.test.mjs test/harness-subagent-policy.test.mjs test/harness-spawn-budget.test.mjs test/harness-turn-routing.test.mjs test/plan-approval-format.test.mjs test/plan-approval-dialog.test.mjs test/plan-approval-sync.test.mjs test/plan-create-plan.test.mjs test/plan-review-format.test.mjs test/debate-plan-phase.test.mjs test/plan-debate-eligibility.test.mjs test/plan-messenger-gate.test.mjs test/plan-debate-lane-apply.test.mjs",
|
|
87
|
+
"test": "node --test test/harness-verify.test.mjs test/harness-ask-user.test.mjs test/harness-subagents-loader.test.mjs test/harness-subagent-precheck.test.mjs test/sentrux-rules-sync.test.mjs test/harness-budget-guard.test.mjs && node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture && npx -y tsx --test test/harness-vcc-settings.test.ts test/harness-live-widget-status.test.ts test/harness-plan-phase-policy.test.mjs test/harness-subagent-policy.test.mjs test/harness-spawn-budget.test.mjs test/harness-spawn-parse.test.mjs test/harness-schema-validate.test.mjs test/harness-turn-routing.test.mjs test/harness-budget-enforce.test.mjs test/harness-submit-policy.test.mjs test/plan-approval-format.test.mjs test/plan-approval-dialog.test.mjs test/plan-approval-sync.test.mjs test/plan-create-plan.test.mjs test/plan-review-format.test.mjs test/debate-plan-phase.test.mjs test/plan-debate-eligibility.test.mjs test/plan-messenger-gate.test.mjs test/plan-debate-lane-apply.test.mjs",
|
|
88
88
|
"test:vcc": "npx -y tsx --test vendor/pi-vcc/tests/*.test.ts",
|
|
89
89
|
"harness:sentrux-bootstrap": "node .pi/scripts/harness-sentrux-bootstrap.mjs",
|
|
90
90
|
"harness:sentrux-sync": "node .pi/scripts/sentrux-rules-sync.mjs --force",
|
|
@@ -103,6 +103,8 @@
|
|
|
103
103
|
},
|
|
104
104
|
"dependencies": {
|
|
105
105
|
"@posthog/pi": "latest",
|
|
106
|
+
"ajv": "^8.17.1",
|
|
107
|
+
"ajv-formats": "^3.0.1",
|
|
106
108
|
"croner": "^9.0.0",
|
|
107
109
|
"jimp": "^1.6.1",
|
|
108
110
|
"nanoid": "^5.1.5",
|