ultimate-pi 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/.agents/skills/harness-governor/SKILL.md +11 -0
  2. package/.agents/skills/harness-orchestration/SKILL.md +3 -1
  3. package/.agents/skills/harness-plan/SKILL.md +5 -5
  4. package/.pi/agents/harness/adversary.md +1 -1
  5. package/.pi/agents/harness/evaluator.md +1 -1
  6. package/.pi/agents/harness/executor.md +1 -1
  7. package/.pi/agents/harness/incident-recorder.md +1 -1
  8. package/.pi/agents/harness/meta-optimizer.md +1 -1
  9. package/.pi/agents/harness/planning/decompose.md +4 -33
  10. package/.pi/agents/harness/planning/execution-plan-author.md +3 -2
  11. package/.pi/agents/harness/planning/hypothesis-validator.md +3 -2
  12. package/.pi/agents/harness/planning/hypothesis.md +4 -27
  13. package/.pi/agents/harness/planning/implementation-researcher.md +3 -2
  14. package/.pi/agents/harness/planning/plan-adversary.md +2 -3
  15. package/.pi/agents/harness/planning/plan-evaluator.md +3 -2
  16. package/.pi/agents/harness/planning/review-integrator.md +2 -3
  17. package/.pi/agents/harness/planning/scout-graphify.md +3 -22
  18. package/.pi/agents/harness/planning/scout-semantic.md +3 -18
  19. package/.pi/agents/harness/planning/scout-structure.md +3 -18
  20. package/.pi/agents/harness/planning/sprint-contract-auditor.md +3 -2
  21. package/.pi/agents/harness/planning/stack-researcher.md +3 -2
  22. package/.pi/agents/harness/tie-breaker.md +1 -1
  23. package/.pi/agents/harness/trace-librarian.md +1 -1
  24. package/.pi/extensions/budget-guard.ts +33 -19
  25. package/.pi/extensions/harness-debate-tools.ts +54 -6
  26. package/.pi/extensions/harness-run-context.ts +108 -2
  27. package/.pi/extensions/harness-subagent-submit.ts +172 -0
  28. package/.pi/extensions/harness-telemetry.ts +29 -4
  29. package/.pi/extensions/lib/debate-bus-core.ts +49 -6
  30. package/.pi/extensions/lib/harness-subagent-auth.ts +104 -19
  31. package/.pi/extensions/lib/harness-subagent-policy.ts +59 -0
  32. package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +82 -0
  33. package/.pi/extensions/lib/harness-subagent-submit-registry.ts +172 -0
  34. package/.pi/extensions/lib/harness-subagents-bridge.ts +127 -0
  35. package/.pi/extensions/lib/plan-debate-eligibility.ts +61 -8
  36. package/.pi/extensions/lib/plan-debate-focus.ts +21 -9
  37. package/.pi/extensions/lib/plan-debate-gate.ts +92 -18
  38. package/.pi/extensions/lib/plan-debate-lane.ts +15 -0
  39. package/.pi/extensions/lib/plan-debate-lanes.ts +27 -3
  40. package/.pi/extensions/lib/plan-debate-round-status.ts +18 -7
  41. package/.pi/extensions/lib/plan-messenger.ts +4 -0
  42. package/.pi/extensions/lib/plan-review-gate.ts +51 -0
  43. package/.pi/extensions/trace-recorder.ts +1 -0
  44. package/.pi/harness/agents.manifest.json +22 -22
  45. package/.pi/harness/docs/adrs/0037-subagent-submit-tools.md +31 -0
  46. package/.pi/harness/docs/adrs/0038-budget-telemetry-only.md +23 -0
  47. package/.pi/harness/docs/adrs/README.md +2 -0
  48. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/implementation-research.yaml +28 -0
  49. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/review-round-consolidated.yaml +25 -0
  50. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-packet.yaml +196 -0
  51. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-review.md +14 -0
  52. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/research-brief.yaml +62 -0
  53. package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +40 -17
  54. package/.pi/harness/specs/harness-executor-handoff.schema.json +19 -0
  55. package/.pi/harness/specs/harness-human-required.schema.json +16 -0
  56. package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
  57. package/.pi/harness/specs/plan-scout-findings.schema.json +19 -0
  58. package/.pi/lib/harness-agent-output.ts +45 -0
  59. package/.pi/lib/harness-budget-enforce.ts +18 -0
  60. package/.pi/lib/harness-schema-validate.ts +89 -0
  61. package/.pi/lib/harness-spawn-parse.ts +86 -0
  62. package/.pi/lib/harness-subagent-submit-path.ts +41 -0
  63. package/.pi/lib/harness-ui-state.ts +15 -2
  64. package/.pi/model-router.example.json +13 -4
  65. package/.pi/prompts/harness-auto.md +2 -2
  66. package/.pi/prompts/harness-plan.md +34 -14
  67. package/.pi/prompts/harness-run.md +2 -2
  68. package/.pi/prompts/harness-setup.md +4 -4
  69. package/.pi/scripts/harness-generate-model-router.mjs +118 -36
  70. package/.pi/scripts/harness-model-router-routing.test.mjs +97 -0
  71. package/.pi/scripts/harness-sync-model-router.mjs +15 -2
  72. package/.pi/scripts/harness-verify.mjs +31 -0
  73. package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
  74. package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
  75. package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
  76. package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
  77. package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
  78. package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
  79. package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
  80. package/CHANGELOG.md +21 -0
  81. package/package.json +4 -2
  82. package/vendor/pi-model-router/UPSTREAM_PIN.md +3 -1
  83. package/vendor/pi-model-router/extensions/commands.ts +4 -4
  84. package/vendor/pi-model-router/extensions/index.ts +21 -0
  85. package/vendor/pi-model-router/extensions/provider.ts +130 -79
  86. package/vendor/pi-model-router/extensions/routing.ts +148 -0
  87. package/vendor/pi-model-router/extensions/state.ts +3 -0
  88. package/vendor/pi-model-router/extensions/types.ts +9 -0
  89. package/vendor/pi-model-router/extensions/ui.ts +16 -2
  90. package/vendor/pi-subagents/src/subagents.ts +29 -3
@@ -1,13 +1,13 @@
1
1
  ---
2
2
  description: PM-grade harness plan — scouts, implementation research, ExecutionPlan, DAG validation, selective Review Gate debate, approval.
3
- argument-hint: "\"<task>\" [--risk low|med|high] [--budget <amount>] [--quick]"
3
+ argument-hint: "\"<task>\" [--risk low|med|high] [--quick]"
4
4
  ---
5
5
 
6
6
  # harness-plan
7
7
 
8
8
  You are the **planning PM** for this harness run. Produce an execution baseline (`plan-packet.yaml` + `plan-review.md`), not strategy theater. Parent owns `ask_user`, `approve_plan`, `create_plan`, debate bus commands, and YAML writes under `.pi/harness/runs/<run_id>/`.
9
9
 
10
- Never `write`/`edit` the final canonical packet except via **`write_harness_yaml`** for run artifacts and **`create_plan`** after approval. Do not paste JSON into `.yaml` files subagents emit JSON; you convert via `write_harness_yaml`.
10
+ Subagents persist artifacts via scoped **`submit_*`** tools (deterministic YAML under the run dir). Parent uses **`harness_artifact_ready`** to gate phases (no JSON parsing). Parent merges still use **`write_harness_yaml`** for `research-brief.yaml`, `plan-packet.yaml` shell, and integrator patches only.
11
11
 
12
12
  ## Allowed subagents
13
13
 
@@ -33,12 +33,12 @@ Read **harness-debate-plan** skill before Review Gate rounds.
33
33
  2. Each `subagent` call blocks until subprocesses finish — batch parallel scouts in one `tasks` array.
34
34
  3. Do **not** set `timeoutMs` unless the user explicitly requests a cap — subagents run until natural completion (optional backstop: `PI_SUBAGENT_TIMEOUT_MS`).
35
35
  4. No harness subagent spawn cap — run the full scout + research + debate pipeline without skipping lanes for budget.
36
- 5. Compact task text: embed `HarnessSpawnContext` JSON + lane-specific instructions only.
36
+ 5. Compact task text: embed spawn context + lane instructions. Prefer `HarnessSpawnContext={"run_id":"…","plan_packet_path":"…",…}` or a JSON object with `"HarnessSpawnContext":{…}` — both parse; `run_id` is required so subprocess submit tools get `HARNESS_RUN_ID`.
37
37
 
38
38
  ## Step 0 — Parse `$ARGUMENTS`
39
39
 
40
40
  - task (required)
41
- - `--risk low|med|high`, `--budget`, `--quick`
41
+ - `--risk low|med|high`, `--quick` (`--budget` is reserved/no-op; token budgets are telemetry-only unless `HARNESS_BUDGET_ENFORCE=1`)
42
42
 
43
43
  `--quick` skips **scout-semantic** and post-run adversary only — **never** skip graphify, structure, decompose, hypothesis, **Phase 3.5 implementation research**, stack research, execution plan, DAG validation, or **Review Gate debate**.
44
44
 
@@ -64,9 +64,11 @@ Do **not** run `ccc index` or `ccc search --refresh`. The harness runs increment
64
64
 
65
65
  Add `harness/planning/scout-semantic` to `tasks` unless `--quick`. Require graphify + structure success. Semantic lane uses `ccc search` only (see `scout-semantic` agent).
66
66
 
67
+ After scouts: `harness_artifact_ready({ paths: ["artifacts/scout-graphify.yaml", "artifacts/scout-structure.yaml", ...] })`.
68
+
67
69
  ## Phase 2 & 3 — Decompose + hypothesis (parallel)
68
70
 
69
- One `subagent` call with `tasks` for `harness/planning/decompose` and `harness/planning/hypothesis`. Parse `PlanDecompositionBrief` and `PlanHypothesisBrief` from outputs. Persist with `write_harness_yaml` `artifacts/decomposition.yaml` and `artifacts/hypothesis.yaml`.
71
+ One `subagent` call with `tasks` for `harness/planning/decompose` and `harness/planning/hypothesis` (include scout YAML paths in task text). Gate with `harness_artifact_ready` on `artifacts/decomposition.yaml` and `artifacts/hypothesis.yaml`.
70
72
 
71
73
  Decompose **prior_art** is **internal only** (from scouts). External prior art arrives in Phase 3.5.
72
74
 
@@ -84,8 +86,8 @@ Decompose **prior_art** is **internal only** (from scouts). External prior art a
84
86
  }
85
87
  ```
86
88
 
87
- - `write_harness_yaml` `artifacts/implementation-research.yaml` and `artifacts/stack.yaml`.
88
- - Merge both into `research-brief.yaml` (`implementation:` + `stack:`).
89
+ - Subagents write via `submit_implementation_research` / `submit_stack_brief`; gate with `harness_artifact_ready` on both paths.
90
+ - Merge both into `research-brief.yaml` (`implementation:` + `stack:`) via parent `write_harness_yaml`.
89
91
  - **Partial failure:** if one lane fails, re-spawn that lane once; if still failing set `plan_status: partial` and `human_required` via `ask_user`. Do not proceed to Phase 4b without both artifacts or explicit human waiver.
90
92
  - **Web dedup:** implementation owns patterns/repos; stack owns libraries/versions — no overlapping queries.
91
93
 
@@ -136,11 +138,16 @@ harness_debate_open({ debate_profile, required_focuses })
136
138
 
137
139
  Profiles:
138
140
 
139
- | Profile | Focuses required | min_focus_rounds |
140
- |---------|------------------|------------------|
141
- | full | spec, wbs, schedule, quality | 4 |
142
- | standard | all four | 4 |
143
- | light | spec, quality only | 2 |
141
+ | Profile | Review gate | Focuses required | min_focus_rounds |
142
+ |---------|-------------|------------------|------------------|
143
+ | full | threaded (4 rounds) | spec, wbs, schedule, quality | 4 |
144
+ | standard | threaded (4 rounds) | all four | 4 |
145
+ | light | threaded (2 rounds) | spec, quality only | 2 |
146
+ | fast | **consolidated** (1 round) | spec, quality | 1 |
147
+
148
+ Med/low non-fork plans with clear stack and no implementation `open_questions` default to **fast** (consolidated). Escalate to threaded rounds only when integrator sets `review_gate_ready: false` or records blockers.
149
+
150
+ `--quick`: skip scout-semantic; cap web research (≤2 searches, ≤3 fetches); prefer **fast** eligibility when DAG passes; use consolidated Review Gate when profile is fast.
144
151
 
145
152
  ## Phase 5 — Review Gate debate (profile-aware, pi-messenger, even with `--quick`)
146
153
 
@@ -151,13 +158,26 @@ Profiles:
151
158
 
152
159
  ### Focus coverage (required before consensus)
153
160
 
154
- Each required focus must appear in a submitted `review-round-rN.yaml` (`debate_round_focus`). Monotonic `round_index` (cap from profile). Consensus only when:
161
+ Each required focus must appear in submitted review artifacts (`review-round-rN.yaml` or `review-round-consolidated.yaml` with `debate_round_focus: all`). Monotonic `round_index` (cap from profile). Consensus only when:
155
162
 
156
163
  - all **required** focuses covered, **and**
157
164
  - last round `review_gate_ready: true`, **and**
158
165
  - `validate-plan-dag.mjs` still passes (re-run after patches).
159
166
 
160
- ### Per-round state machine
167
+ ### Consolidated state machine (`review_gate_mode: consolidated`, profile fast)
168
+
169
+ ```
170
+ round_index := 1
171
+ debate_round_focus := all
172
+ spawn hypothesis-validator (blind)
173
+ WHILE NOT ready_for_integrator (harness_debate_round_status round_index=1):
174
+ follow next_tool (validation-turn, adversary-brief, sprint-audit in parallel-friendly order; one subagent per batch)
175
+ spawn review-integrator → write artifacts/review-round-consolidated.yaml → harness_debate_submit_round
176
+ IF review_gate_ready false OR blockers: escalate — threaded round per missing focus (spec/wbs/schedule/quality)
177
+ harness_debate_focus_coverage → harness_debate_consensus
178
+ ```
179
+
180
+ ### Threaded state machine (standard/full/light)
161
181
 
162
182
  ```
163
183
  round_index := next uncovered required focus
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Execute only against an approved PlanPacket with strict phase gates.
3
- argument-hint: "[--budget <amount>]"
3
+ argument-hint: ""
4
4
  ---
5
5
 
6
6
  # harness-run
@@ -9,7 +9,7 @@ Orchestrator only — spawn `harness/executor`. Do **not** implement inline.
9
9
 
10
10
  ## Step 0 — Parse arguments
11
11
 
12
- - optional: `--budget <amount>`
12
+ - `--budget` is reserved/no-op (telemetry-only budgets by default)
13
13
  - Do **not** use `--plan` on happy path — load from `[HarnessActivePlan]` / `plan_packet_path`.
14
14
 
15
15
  If plan not ready:
@@ -327,7 +327,7 @@ sentrux plugin add-standard 2>/dev/null || echo "Plugins already installed or fa
327
327
 
328
328
  ## Step 3 — Pi Extension Packages
329
329
 
330
- Bundled extensions load from the installed `ultimate-pi` package. **Per-turn model routing** comes from a **vendored** fork of [`yeliu84/pi-model-router`](https://github.com/yeliu84/pi-model-router) in `vendor/pi-model-router/`, wired through [`.pi/extensions/pi-model-router-harness.ts`](.pi/extensions/pi-model-router-harness.ts). The harness **gates** activation on `.pi/model-router.json` (Step **3.5** below) so `router/auto` and built-in tiers such as `openai/gpt-5.4-pro` cannot load prematurely. Attribution: see [THIRD_PARTY_NOTICES.md](THIRD_PARTY_NOTICES.md) and `vendor/pi-model-router/UPSTREAM_PIN.md`. Maintainer refresh: `npm run vendor:sync-router`.
330
+ Bundled extensions load from the installed `ultimate-pi` package. **Session-locked model routing** comes from a **vendored** fork of [`yeliu84/pi-model-router`](https://github.com/yeliu84/pi-model-router) in `vendor/pi-model-router/`, wired through [`.pi/extensions/pi-model-router-harness.ts`](.pi/extensions/pi-model-router-harness.ts). The router picks **one concrete model** when the session starts (from the first user prompt + system prompt complexity), then changes **thinking level only** each turn. The harness **gates** activation on `.pi/model-router.json` (Step **3.5** below) so `router/auto` cannot load prematurely. Attribution: see [THIRD_PARTY_NOTICES.md](THIRD_PARTY_NOTICES.md) and `vendor/pi-model-router/UPSTREAM_PIN.md`. Maintainer refresh: `npm run vendor:sync-router`.
331
331
 
332
332
  Optionally install the companion lockfile used in development:
333
333
 
@@ -381,9 +381,9 @@ If generation prints "No authenticated Pi providers": warn in report — user sh
381
381
 
382
382
  Do NOT block setup. If no config is written, `harness-sync-model-router.mjs` clears a premature `defaultProvider: "router"` in `.pi/settings.json`.
383
383
 
384
- **Router onboarding** — The vendored extension starts only after `.pi/model-router.json` appears. Running the script above prepares that file plus optional Pi defaults (**`router` / `auto`**) via `harness-sync-model-router.mjs` when `defaultProvider` was unset—then **`/reload`**.
384
+ **Router onboarding** — The vendored extension starts only after `.pi/model-router.json` appears. Running the script above prepares that file plus optional Pi defaults (**`router` / `auto`**, or whatever `defaultProfile` is) via `harness-sync-model-router.mjs` when `defaultProvider` was unset—then **`/reload`**. Generated profiles use **one model SKU per profile**; high/medium/low tiers differ in **thinking** only. Subagents resolve their subprocess model from the **agent system prompt** complexity (same lock rules).
385
385
 
386
- Manual override: **`/router profile auto`** anytime after reload if they changed defaults.
386
+ Manual override: **`/router profile auto`** or **`/router profile opencode-go`** anytime after reload if they changed defaults.
387
387
 
388
388
  ## Step 3.6 — Harness agents (package-resolved)
389
389
 
@@ -677,7 +677,7 @@ Output summary table:
677
677
  | sentrux | ✓/✗ | CLI + plugins; rules via Step 4.2 bootstrap |
678
678
  | Sentrux rules.toml | ✓/✗ | `.sentrux/rules.toml` synced from manifest |
679
679
  | pi extensions | ✓/✗ | 4 packages |
680
- | model router | ✓/✗ | Package + config verified, activation via `/router profile auto` |
680
+ | model router | ✓/✗ | Package + config verified, activation via `/router profile auto` (or `opencode-go`) |
681
681
  | `.env` | ✓/✗/ask | Created / keys appended / user declined |
682
682
 
683
683
  | .gitignore | ✓/✗ | entries added (incl. `.env`) |
@@ -22,9 +22,9 @@ const UP_PKG = join(SCRIPT_DIR, "..", "..");
22
22
  const OUT_PATH = join(process.cwd(), ".pi", "model-router.json");
23
23
 
24
24
  const PROVIDER_PRIORITY = [
25
+ "openai",
25
26
  "opencode-go",
26
27
  "anthropic",
27
- "openai",
28
28
  "google",
29
29
  "openrouter",
30
30
  "groq",
@@ -35,6 +35,7 @@ const PROVIDER_PRIORITY = [
35
35
  /** Substring hints per tier (first match in available ids wins). */
36
36
  const TIER_HINTS = {
37
37
  high: [
38
+ "gpt-5.5-pro",
38
39
  "deepseek-v4-pro",
39
40
  "gpt-5.4-pro",
40
41
  "claude-opus",
@@ -43,6 +44,7 @@ const TIER_HINTS = {
43
44
  "pro",
44
45
  ],
45
46
  medium: [
47
+ "gpt-5.5",
46
48
  "qwen3.6-plus",
47
49
  "kimi-k2.6",
48
50
  "gpt-5.4",
@@ -98,7 +100,10 @@ function canonicalRef(provider, modelId) {
98
100
 
99
101
  function pickTierModel(models, tier) {
100
102
  const hints = TIER_HINTS[tier];
101
- const ids = models.map((m) => m.id);
103
+ for (const hint of hints) {
104
+ const exact = models.find((m) => m.id === hint);
105
+ if (exact) return canonicalRef(exact.provider, exact.id);
106
+ }
102
107
  for (const hint of hints) {
103
108
  const match = models.find((m) => m.id.includes(hint));
104
109
  if (match) return canonicalRef(match.provider, match.id);
@@ -114,6 +119,10 @@ function pickTierModel(models, tier) {
114
119
  return canonicalRef(models[0].provider, models[0].id);
115
120
  }
116
121
 
122
+ function modelsForProvider(available, provider) {
123
+ return available.filter((m) => m.provider === provider);
124
+ }
125
+
117
126
  function choosePrimaryProvider(available) {
118
127
  const byProvider = new Map();
119
128
  for (const m of available) {
@@ -129,7 +138,7 @@ function choosePrimaryProvider(available) {
129
138
 
130
139
  function buildFallbacks(available, primaryProvider, highModel) {
131
140
  const fallbacks = [];
132
- for (const p of ["anthropic", "google", "openai"]) {
141
+ for (const p of ["anthropic", "google", "openai", "opencode-go"]) {
133
142
  if (p === primaryProvider) continue;
134
143
  const alt = available.filter((m) => m.provider === p);
135
144
  if (alt.length === 0) continue;
@@ -139,6 +148,76 @@ function buildFallbacks(available, primaryProvider, highModel) {
139
148
  return fallbacks.slice(0, 3);
140
149
  }
141
150
 
151
+ /** Session-locked router: one model SKU per profile; tiers vary thinking only. */
152
+ function buildRoutedProfile(available, provider) {
153
+ const models = modelsForProvider(available, provider);
154
+ if (models.length === 0) return null;
155
+ const sku =
156
+ pickTierModel(models, "medium") ??
157
+ pickTierModel(models, "high") ??
158
+ pickTierModel(models, "low");
159
+ if (!sku) return null;
160
+ const fallbacks = buildFallbacks(available, provider, sku);
161
+ const high = { model: sku, thinking: "high" };
162
+ if (fallbacks.length) high.fallbacks = fallbacks;
163
+ return {
164
+ high,
165
+ medium: { model: sku, thinking: "medium" },
166
+ low: { model: sku, thinking: "low" },
167
+ };
168
+ }
169
+
170
+ function addCheapDeepProfiles(profiles, available, provider) {
171
+ const models = modelsForProvider(available, provider);
172
+ if (models.length === 0) return;
173
+ const sku =
174
+ pickTierModel(models, "medium") ??
175
+ pickTierModel(models, "high") ??
176
+ pickTierModel(models, "low");
177
+ if (!sku) return;
178
+ const fallbacks = buildFallbacks(available, provider, sku);
179
+ const deepHigh = { model: sku, thinking: "xhigh" };
180
+ if (fallbacks.length) deepHigh.fallbacks = fallbacks;
181
+ profiles.cheap = {
182
+ high: { model: sku, thinking: "low" },
183
+ medium: { model: sku, thinking: "off" },
184
+ low: { model: sku, thinking: "off" },
185
+ };
186
+ profiles.deep = {
187
+ high: deepHigh,
188
+ medium: { model: sku, thinking: "medium" },
189
+ low: { model: sku, thinking: "low" },
190
+ };
191
+ }
192
+
193
+ function resolveClassifierModel(available) {
194
+ const openaiModels = modelsForProvider(available, "openai");
195
+ if (openaiModels.length > 0) {
196
+ return (
197
+ pickTierModel(openaiModels, "low") ??
198
+ canonicalRef(openaiModels[openaiModels.length - 1].provider, openaiModels[openaiModels.length - 1].id)
199
+ );
200
+ }
201
+ const { models } = choosePrimaryProvider(available);
202
+ return pickTierModel(models, "medium");
203
+ }
204
+
205
+ /** OpenAI-backed default profile name exposed as `router/auto`. */
206
+ const OPENAI_PROFILE_NAME = "auto";
207
+
208
+ function routerProfileName(provider) {
209
+ return provider === "openai" ? OPENAI_PROFILE_NAME : provider;
210
+ }
211
+
212
+ function resolveDefaultProfile(profiles) {
213
+ if (profiles[OPENAI_PROFILE_NAME]) return OPENAI_PROFILE_NAME;
214
+ if (profiles["opencode-go"]) return "opencode-go";
215
+ return (
216
+ Object.keys(profiles).find((name) => name !== "cheap" && name !== "deep") ??
217
+ OPENAI_PROFILE_NAME
218
+ );
219
+ }
220
+
142
221
  async function main() {
143
222
  const force = process.argv.includes("--force");
144
223
  const dryRun = process.argv.includes("--dry-run");
@@ -171,23 +250,37 @@ async function main() {
171
250
  process.exit(0);
172
251
  }
173
252
 
174
- const { provider: primaryProvider, models: primaryModels } =
175
- choosePrimaryProvider(available);
176
-
177
- const highModel = pickTierModel(primaryModels, "high");
178
- const mediumModel = pickTierModel(primaryModels, "medium");
179
- const lowModel = pickTierModel(primaryModels, "low");
253
+ const profiles = {};
254
+ for (const provider of ["openai", "opencode-go"]) {
255
+ const profile = buildRoutedProfile(available, provider);
256
+ if (profile) profiles[routerProfileName(provider)] = profile;
257
+ }
180
258
 
181
- if (!highModel || !mediumModel || !lowModel) {
182
- fail("could not assign tier models from available registry");
259
+ if (Object.keys(profiles).length === 0) {
260
+ const { provider: primaryProvider, models: primaryModels } =
261
+ choosePrimaryProvider(available);
262
+ const profile = buildRoutedProfile(available, primaryProvider);
263
+ if (!profile) {
264
+ fail("could not assign tier models from available registry");
265
+ }
266
+ profiles[primaryProvider] = profile;
183
267
  }
184
268
 
185
- const fallbacks = buildFallbacks(available, primaryProvider, highModel);
269
+ const cheapDeepSource = profiles["opencode-go"]
270
+ ? "opencode-go"
271
+ : resolveDefaultProfile(profiles);
272
+ addCheapDeepProfiles(profiles, available, cheapDeepSource);
273
+
274
+ const defaultProfile = resolveDefaultProfile(profiles);
275
+ const classifierModel = resolveClassifierModel(available);
276
+ if (!classifierModel) {
277
+ fail("could not assign classifier model from available registry");
278
+ }
186
279
 
187
280
  const config = {
188
- defaultProfile: "auto",
281
+ defaultProfile,
189
282
  debug: false,
190
- classifierModel: mediumModel,
283
+ classifierModel,
191
284
  phaseBias: 0.5,
192
285
  maxSessionBudget: 1.0,
193
286
  largeContextThreshold: 100000,
@@ -199,27 +292,13 @@ async function main() {
199
292
  },
200
293
  { matches: "changelog", tier: "low" },
201
294
  ],
202
- profiles: {
203
- auto: {
204
- high: { model: highModel, thinking: "high", fallbacks },
205
- medium: { model: mediumModel, thinking: "medium" },
206
- low: { model: lowModel, thinking: "low" },
207
- },
208
- cheap: {
209
- high: { model: mediumModel, thinking: "low" },
210
- medium: { model: lowModel, thinking: "off" },
211
- low: { model: lowModel, thinking: "off" },
212
- },
213
- deep: {
214
- high: { model: highModel, thinking: "xhigh", fallbacks },
215
- medium: { model: mediumModel, thinking: "medium" },
216
- low: { model: lowModel, thinking: "low" },
217
- },
218
- },
295
+ profiles,
219
296
  };
220
297
 
221
298
  const json = `${JSON.stringify(config, null, 2)}\n`;
222
299
  const providerSet = [...new Set(available.map((m) => m.provider))].sort();
300
+ const autoProfile = profiles[OPENAI_PROFILE_NAME];
301
+ const opencodeProfile = profiles["opencode-go"];
223
302
 
224
303
  if (dryRun) {
225
304
  process.stdout.write(json);
@@ -230,13 +309,16 @@ async function main() {
230
309
  writeFileSync(OUT_PATH, json, "utf8");
231
310
 
232
311
  console.log("✓ Generated .pi/model-router.json from Pi authenticated providers:");
233
- console.log(` Primary provider: ${primaryProvider}`);
312
+ console.log(` Default profile: ${defaultProfile}`);
313
+ console.log(` Classifier: ${classifierModel}`);
234
314
  console.log(` Authenticated providers: ${providerSet.join(", ")}`);
235
315
  console.log(` Available models: ${available.length}`);
236
- console.log(` High tier: ${highModel}`);
237
- console.log(` Medium tier: ${mediumModel}`);
238
- console.log(` Low tier: ${lowModel}`);
239
- if (fallbacks.length) console.log(` Fallbacks: ${fallbacks.join(", ")}`);
316
+ if (autoProfile) {
317
+ console.log(` auto (openai) high: ${autoProfile.high.model}`);
318
+ }
319
+ if (opencodeProfile) {
320
+ console.log(` opencode-go high: ${opencodeProfile.high.model}`);
321
+ }
240
322
  }
241
323
 
242
324
  main().catch((err) => {
@@ -0,0 +1,97 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Unit tests for session-locked pi-model-router routing (no LLM).
4
+ * Run: npx tsx .pi/scripts/harness-model-router-routing.test.mjs
5
+ */
6
+
7
+ import assert from "node:assert/strict";
8
+ import { readFileSync } from "node:fs";
9
+ import { join, dirname } from "node:path";
10
+ import { fileURLToPath } from "node:url";
11
+ import {
12
+ decideSessionLock,
13
+ applyThinkingToDecision,
14
+ buildRoutingDecision,
15
+ decideRouting,
16
+ } from "../../vendor/pi-model-router/extensions/routing.js";
17
+
18
+ const ROOT = join(dirname(fileURLToPath(import.meta.url)), "..", "..");
19
+
20
+ const sampleProfile = {
21
+ high: { model: "openai/gpt-5.5", thinking: "high" },
22
+ medium: { model: "openai/gpt-5.5", thinking: "medium" },
23
+ low: { model: "openai/gpt-5.5", thinking: "low" },
24
+ };
25
+
26
+ const planningContext = {
27
+ systemPrompt: "You are a harness architect. Design tradeoffs and migration strategy.",
28
+ messages: [
29
+ {
30
+ role: "user",
31
+ content:
32
+ "Plan a multi-phase refactor across modules with architecture review.",
33
+ timestamp: 1,
34
+ },
35
+ ],
36
+ };
37
+
38
+ const shortContext = {
39
+ systemPrompt: "Summarize briefly.",
40
+ messages: [{ role: "user", content: "changelog", timestamp: 1 }],
41
+ };
42
+
43
+ const lockHigh = decideSessionLock(
44
+ planningContext,
45
+ "auto",
46
+ sampleProfile,
47
+ undefined,
48
+ undefined,
49
+ 0.5,
50
+ [{ matches: "changelog", tier: "low" }],
51
+ );
52
+ assert.equal(lockHigh.tier, "high", "planning prompt locks high tier");
53
+
54
+ const lockLow = decideSessionLock(shortContext, "auto", sampleProfile);
55
+ assert.equal(lockLow.tier, "low", "short summary locks low tier");
56
+
57
+ const locked = buildRoutingDecision(
58
+ "auto",
59
+ sampleProfile,
60
+ lockHigh.tier,
61
+ "planning",
62
+ lockHigh.reasoning,
63
+ );
64
+ const thinkingTurn = decideRouting(
65
+ {
66
+ ...planningContext,
67
+ messages: [
68
+ ...planningContext.messages,
69
+ { role: "user", content: "changelog only", timestamp: 2 },
70
+ ],
71
+ },
72
+ "auto",
73
+ sampleProfile,
74
+ locked,
75
+ );
76
+ const merged = applyThinkingToDecision(locked, thinkingTurn, sampleProfile);
77
+ assert.equal(merged.targetLabel, locked.targetLabel, "model stays locked");
78
+ assert.equal(merged.tier, thinkingTurn.tier, "thinking tier follows turn");
79
+ assert.equal(merged.thinking, "low", "low thinking from turn tier config");
80
+
81
+ const examplePath = join(ROOT, ".pi", "model-router.example.json");
82
+ const example = JSON.parse(readFileSync(examplePath, "utf8"));
83
+ for (const [name, profile] of Object.entries(example.profiles ?? {})) {
84
+ const { high, medium, low } = profile;
85
+ assert.equal(
86
+ high.model,
87
+ medium.model,
88
+ `example profile ${name}: medium/high same model`,
89
+ );
90
+ assert.equal(
91
+ medium.model,
92
+ low.model,
93
+ `example profile ${name}: low/medium same model`,
94
+ );
95
+ }
96
+
97
+ console.log("harness-model-router-routing.test: PASS");
@@ -29,11 +29,24 @@ function saveSettings(settingsPath, data) {
29
29
  );
30
30
  }
31
31
 
32
+ function readDefaultRouterProfile(configPath) {
33
+ if (!existsSync(configPath)) return "auto";
34
+ try {
35
+ const data = JSON.parse(readFileSync(configPath, "utf8"));
36
+ const profile =
37
+ typeof data.defaultProfile === "string" ? data.defaultProfile.trim() : "";
38
+ return profile || "auto";
39
+ } catch {
40
+ return "auto";
41
+ }
42
+ }
43
+
32
44
  function main() {
33
45
  const root = process.cwd();
34
46
  const configPath = join(root, ".pi", "model-router.json");
35
47
  const settingsPath = join(root, ".pi", "settings.json");
36
48
  const hasConfig = existsSync(configPath);
49
+ const defaultRouterProfile = readDefaultRouterProfile(configPath);
37
50
 
38
51
  const settings = loadSettings(settingsPath);
39
52
  if (!settings) {
@@ -67,14 +80,14 @@ function main() {
67
80
 
68
81
  if (noProjectDefault) {
69
82
  settings.defaultProvider = "router";
70
- settings.defaultModel = "auto";
83
+ settings.defaultModel = defaultRouterProfile;
71
84
  changed = true;
72
85
  }
73
86
 
74
87
  if (changed) {
75
88
  saveSettings(settingsPath, settings);
76
89
  console.log(
77
- "✓ Router defaults set (`router` / `auto`) — run /reload in pi when ready",
90
+ `✓ Router defaults set (\`router\` / \`${defaultRouterProfile}\`) — run /reload in pi when ready`,
78
91
  );
79
92
  } else {
80
93
  console.log("[harness-model-router] Defaults unchanged (user set defaultProvider)");
@@ -37,6 +37,8 @@ const REQUIRED_ADRS = [
37
37
  "0009-sentrux-rules-lifecycle.md",
38
38
  "0031-harness-run-context.md",
39
39
  "0032-harness-command-orchestration.md",
40
+ "0037-subagent-submit-tools.md",
41
+ "0038-budget-telemetry-only.md",
40
42
  ];
41
43
 
42
44
  const REQUIRED_EXTENSIONS = [
@@ -143,6 +145,34 @@ async function checkSentruxRules() {
143
145
  ok(".sentrux/rules.toml present");
144
146
  }
145
147
 
148
+ async function checkModelRouterThinkingOnly() {
149
+ const path = join(ROOT, ".pi", "model-router.json");
150
+ if (!(await fileExists(path))) {
151
+ ok("model-router.json absent (skip thinking-only tier check)");
152
+ return;
153
+ }
154
+ let raw;
155
+ try {
156
+ raw = JSON.parse(await readFile(path, "utf-8"));
157
+ } catch {
158
+ fail("invalid .pi/model-router.json");
159
+ }
160
+ const profiles = raw.profiles ?? {};
161
+ for (const [name, profile] of Object.entries(profiles)) {
162
+ const high = profile?.high?.model;
163
+ const medium = profile?.medium?.model;
164
+ const low = profile?.low?.model;
165
+ if (
166
+ !(high && medium && low && high === medium && medium === low)
167
+ ) {
168
+ fail(
169
+ `model-router profile "${name}" must use the same model on high/medium/low (thinking-only tiers)`,
170
+ );
171
+ }
172
+ }
173
+ ok("model-router.json thinking-only (same model per profile)");
174
+ }
175
+
146
176
  async function checkSentruxGate() {
147
177
  await checkSentruxRules();
148
178
 
@@ -286,6 +316,7 @@ async function main() {
286
316
  ok("test-diff-golden.json");
287
317
 
288
318
  await checkSentruxGate();
319
+ await checkModelRouterThinkingOnly();
289
320
 
290
321
  if (!(await fileExists(AGENTS_MANIFEST))) {
291
322
  fail(
package/CHANGELOG.md CHANGED
@@ -4,6 +4,27 @@ All notable changes to this project are documented in this file.
4
4
 
5
5
  ## [Unreleased]
6
6
 
7
+ ## [v0.17.0] — 2026-05-22
8
+
9
+ ### ✨ Features
10
+
11
+ - **Model router:** Session-locked model SKU at start (initial prompt + system prompt); per-turn routing adjusts thinking tier only; subagents lock from agent `systemPrompt` complexity.
12
+ - **Harness:** Thinking-only profile shape in generator/verify; plan review gate, debate eligibility, and smoke fixture updates.
13
+
14
+ ### ✅ Tests
15
+
16
+ - Add `harness-model-router-routing` and plan-debate eligibility coverage.
17
+
18
+ ## [v0.16.0] — 2026-05-19
19
+
20
+ ### ✨ Features
21
+
22
+ - add submit pipeline and planning/debate updates
23
+
24
+ ### 🔧 Chores
25
+
26
+ - refresh graph artifacts after harness updates
27
+
7
28
  ## [v0.15.0] — 2026-05-19
8
29
 
9
30
  ### ✨ Features
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ultimate-pi",
3
- "version": "0.15.0",
3
+ "version": "0.17.0",
4
4
  "description": "Ultimate AI coding harness for pi.dev — extensible skills, Obsidian wiki knowledge layer, compressed context, deterministic output",
5
5
  "keywords": [
6
6
  "pi-package",
@@ -84,7 +84,7 @@
84
84
  "format": "biome format --write",
85
85
  "format:check": "biome format",
86
86
  "prepare": "lefthook install",
87
- "test": "node --test test/harness-verify.test.mjs test/harness-ask-user.test.mjs test/harness-subagents-loader.test.mjs test/harness-subagent-precheck.test.mjs test/sentrux-rules-sync.test.mjs test/harness-budget-guard.test.mjs && node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture && npx -y tsx --test test/harness-vcc-settings.test.ts test/harness-live-widget-status.test.ts test/harness-plan-phase-policy.test.mjs test/harness-subagent-policy.test.mjs test/harness-spawn-budget.test.mjs test/harness-turn-routing.test.mjs test/plan-approval-format.test.mjs test/plan-approval-dialog.test.mjs test/plan-approval-sync.test.mjs test/plan-create-plan.test.mjs test/plan-review-format.test.mjs test/debate-plan-phase.test.mjs test/plan-debate-eligibility.test.mjs test/plan-messenger-gate.test.mjs test/plan-debate-lane-apply.test.mjs",
87
+ "test": "node --test test/harness-verify.test.mjs test/harness-ask-user.test.mjs test/harness-subagents-loader.test.mjs test/harness-subagent-precheck.test.mjs test/sentrux-rules-sync.test.mjs test/harness-budget-guard.test.mjs && node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture && npx -y tsx --test test/harness-vcc-settings.test.ts test/harness-live-widget-status.test.ts test/harness-plan-phase-policy.test.mjs test/harness-subagent-policy.test.mjs test/harness-spawn-budget.test.mjs test/harness-spawn-parse.test.mjs test/harness-schema-validate.test.mjs test/harness-turn-routing.test.mjs test/harness-budget-enforce.test.mjs test/harness-submit-policy.test.mjs test/plan-approval-format.test.mjs test/plan-approval-dialog.test.mjs test/plan-approval-sync.test.mjs test/plan-create-plan.test.mjs test/plan-review-format.test.mjs test/debate-plan-phase.test.mjs test/plan-debate-eligibility.test.mjs test/plan-messenger-gate.test.mjs test/plan-debate-lane-apply.test.mjs",
88
88
  "test:vcc": "npx -y tsx --test vendor/pi-vcc/tests/*.test.ts",
89
89
  "harness:sentrux-bootstrap": "node .pi/scripts/harness-sentrux-bootstrap.mjs",
90
90
  "harness:sentrux-sync": "node .pi/scripts/sentrux-rules-sync.mjs --force",
@@ -103,6 +103,8 @@
103
103
  },
104
104
  "dependencies": {
105
105
  "@posthog/pi": "latest",
106
+ "ajv": "^8.17.1",
107
+ "ajv-formats": "^3.0.1",
106
108
  "croner": "^9.0.0",
107
109
  "jimp": "^1.6.1",
108
110
  "nanoid": "^5.1.5",