ultimate-pi 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.pi/PACKAGING.md +35 -0
- package/.pi/prompts/harness-setup.md +48 -10
- package/.pi/{settings.json → settings.example.json} +1 -1
- package/CHANGELOG.md +9 -0
- package/package.json +35 -6
- package/.pi/harness/browser.json +0 -5
- package/.pi/harness/debates/README.md +0 -9
- package/.pi/harness/incidents/README.md +0 -6
- package/.pi/harness/release-readiness-report.md +0 -128
- package/.pi/harness/router/README.md +0 -35
- package/.pi/harness/router/apply-router-proposal.mjs +0 -153
- package/.pi/harness/router/proposals/canary-proposal.json +0 -96
- package/.pi/harness/router/propose-router-tuning.mjs +0 -149
- package/.pi/harness/runs/019e272f-3eef-7107-9712-ce281de55707-1778773891854/events.jsonl +0 -2
- package/.pi/harness/runs/019e272f-3eef-7107-9712-ce281de55707-1778773891854/trace.json +0 -17
- package/.pi/harness/runs/019e272f-3eef-7107-9712-ce281de55707-1778773912057/events.jsonl +0 -2
- package/.pi/harness/runs/019e272f-3eef-7107-9712-ce281de55707-1778773912057/trace.json +0 -17
- package/.pi/harness/runs/019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096/events.jsonl +0 -6
- package/.pi/harness/runs/019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096/trace.json +0 -42
- package/.pi/harness/runs/019e2732-8651-74e5-9f5d-4d06c3105f25-1778774136101/events.jsonl +0 -1
- package/.pi/harness/runs/019e2758-b332-771b-ad6f-54d0d8478768-1778776600591/events.jsonl +0 -2
- package/.pi/harness/runs/019e2758-b332-771b-ad6f-54d0d8478768-1778776600591/trace.json +0 -17
- package/.pi/harness/runs/README.md +0 -6
- package/.pi/harness/runs/budget-events.jsonl +0 -4
- package/.pi/harness/runs/canary-candidate-router.json +0 -72
- package/.pi/harness/runs/canary-evidence.json +0 -9
- package/.pi/harness/runs/index.jsonl +0 -4
- package/.pi/model-router.json +0 -95
- package/.pi/npm/.gitignore +0 -2
- package/.pi/prompts/release.md +0 -225
- package/firecrawl/.env +0 -53
package/.pi/PACKAGING.md
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Pi package packaging (ultimate-pi)
|
|
2
|
+
|
|
3
|
+
Aligned with [pi packages](https://github.com/badlogic/pi-mono/blob/main/packages/coding-agent/docs/packages.md).
|
|
4
|
+
|
|
5
|
+
## Pi manifest (`package.json` → `pi`)
|
|
6
|
+
|
|
7
|
+
| Key | Paths | Notes |
|
|
8
|
+
|-----|-------|--------|
|
|
9
|
+
| `extensions` | `.pi/extensions` | TypeScript extensions (loaded by pi) |
|
|
10
|
+
| `skills` | `.agents/skills`, `.pi/skills` | Agent Skills + pi-local skills |
|
|
11
|
+
| `prompts` | `.pi/prompts` | Slash-command prompt templates |
|
|
12
|
+
|
|
13
|
+
Pi does **not** define `scripts`, `agents`, or `providers` in the manifest.
|
|
14
|
+
|
|
15
|
+
- **Harness scripts** → `.pi/scripts/` (npm `harness:*` scripts; see `.pi/scripts/README.md`)
|
|
16
|
+
- **Subagent agents** → `.pi/agents/**/*.md` (loaded by `@tintinweb/pi-subagents` from the **project** `.pi/agents/`; `/harness-setup` seeds them from the installed package)
|
|
17
|
+
- **Providers** → install via `bundledDependencies` + user settings, not a separate manifest directory
|
|
18
|
+
|
|
19
|
+
## npm `files` allowlist
|
|
20
|
+
|
|
21
|
+
We use an explicit allowlist (not the whole `.pi/` tree) so dev-only artifacts never ship:
|
|
22
|
+
|
|
23
|
+
- No `.pi/harness/runs/`, local `model-router.json`, or `firecrawl/.env`
|
|
24
|
+
- Ship `.pi/settings.example.json`, not `.pi/settings.json` (dev checkout uses `".."` local package)
|
|
25
|
+
|
|
26
|
+
## Settings
|
|
27
|
+
|
|
28
|
+
| File | Shipped | Purpose |
|
|
29
|
+
|------|---------|---------|
|
|
30
|
+
| `.pi/settings.json` | No | Repo dev only (`"packages": ["..", …]`) |
|
|
31
|
+
| `.pi/settings.example.json` | Yes | Merge into project `.pi/settings.json` during setup |
|
|
32
|
+
|
|
33
|
+
## Dependencies
|
|
34
|
+
|
|
35
|
+
Runtime pi extensions are in `dependencies` + `bundledDependencies`. `@mariozechner/pi-coding-agent` is a `peerDependency` (provided by the pi CLI).
|
|
@@ -30,7 +30,16 @@ which git && git --version
|
|
|
30
30
|
|
|
31
31
|
Block if node < 18, npm < 9, or git missing. Report versions and continue.
|
|
32
32
|
|
|
33
|
-
Read `.pi/auto-commit.json` for co-author + branch config.
|
|
33
|
+
Read `.pi/auto-commit.json` for co-author + branch config.
|
|
34
|
+
|
|
35
|
+
Resolve the installed **ultimate-pi** package root (works in this repo and after `pi install npm:ultimate-pi`):
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
UP_PKG="$(node -p "require('path').dirname(require.resolve('ultimate-pi/package.json'))")"
|
|
39
|
+
echo "ultimate-pi package: $UP_PKG"
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
For extension package names, read **`$UP_PKG/.pi/settings.example.json`** (shipped template). Merge its `packages` array into the **project** `.pi/settings.json` if missing — do not copy the repo-dev `.pi/settings.json` from the package (it may contain `".."` and is not published).
|
|
34
43
|
|
|
35
44
|
## Step 0.5 — Graphify (skip if `--skip-graphify`)
|
|
36
45
|
|
|
@@ -432,13 +441,20 @@ sentrux gate --save . 2>/dev/null || echo "Baseline will be saved on first gate
|
|
|
432
441
|
|
|
433
442
|
## Step 3 — Pi Extension Packages
|
|
434
443
|
|
|
435
|
-
|
|
444
|
+
Bundled extensions load from the installed `ultimate-pi` package. Optionally install the companion lockfile used in development:
|
|
436
445
|
|
|
437
446
|
```bash
|
|
438
|
-
|
|
439
|
-
npm
|
|
447
|
+
UP_PKG="$(node -p "require('path').dirname(require.resolve('ultimate-pi/package.json'))")"
|
|
448
|
+
if [ -f "$UP_PKG/.pi/npm/package.json" ]; then
|
|
449
|
+
(cd "$UP_PKG/.pi/npm" && npm install)
|
|
450
|
+
echo "✓ ultimate-pi .pi/npm dependencies"
|
|
451
|
+
else
|
|
452
|
+
echo "✓ skip .pi/npm (not in package)"
|
|
453
|
+
fi
|
|
440
454
|
```
|
|
441
455
|
|
|
456
|
+
Merge extension entries from `$UP_PKG/.pi/settings.example.json` into this project's `.pi/settings.json` `packages` array (add any missing `npm:…` entries; keep existing user packages).
|
|
457
|
+
|
|
442
458
|
Verify each package:
|
|
443
459
|
|
|
444
460
|
| Package | Purpose | Phase |
|
|
@@ -460,9 +476,10 @@ The script below:
|
|
|
460
476
|
|
|
461
477
|
```bash
|
|
462
478
|
# Verify package installed first
|
|
463
|
-
ls
|
|
479
|
+
ls "$UP_PKG/node_modules/@yeliu84/pi-model-router/package.json" 2>/dev/null \
|
|
480
|
+
|| ls "$UP_PKG/.pi/npm/node_modules/@yeliu84/pi-model-router/package.json" 2>/dev/null \
|
|
464
481
|
&& echo "✓ model-router package" \
|
|
465
|
-
|| echo "✗ model-router package —
|
|
482
|
+
|| echo "✗ model-router package — reinstall ultimate-pi or run npm install in $UP_PKG/.pi/npm"
|
|
466
483
|
|
|
467
484
|
# Generate config from detected providers (only if missing)
|
|
468
485
|
if [ -f .pi/model-router.json ]; then
|
|
@@ -578,7 +595,25 @@ Do NOT block. If generation fails, warn in report and continue.
|
|
|
578
595
|
|
|
579
596
|
> `/router profile auto`
|
|
580
597
|
|
|
581
|
-
The pi TUI will intercept this and activate the `auto` profile. Then continue to Step
|
|
598
|
+
The pi TUI will intercept this and activate the `auto` profile. Then continue to Step 3.6.
|
|
599
|
+
|
|
600
|
+
## Step 3.6 — Seed `.pi/agents` (pi-subagents)
|
|
601
|
+
|
|
602
|
+
`@tintinweb/pi-subagents` reads agent definitions from **this project's** `.pi/agents/`, not from the installed npm tree. Copy packaged agents when missing (preserves user edits):
|
|
603
|
+
|
|
604
|
+
```bash
|
|
605
|
+
UP_PKG="$(node -p "require('path').dirname(require.resolve('ultimate-pi/package.json'))")"
|
|
606
|
+
mkdir -p .pi/agents/harness .pi/agents/pi-pi
|
|
607
|
+
for dir in harness pi-pi; do
|
|
608
|
+
[ -d "$UP_PKG/.pi/agents/$dir" ] || continue
|
|
609
|
+
for f in "$UP_PKG/.pi/agents/$dir"/*.md; do
|
|
610
|
+
[ -f "$f" ] || continue
|
|
611
|
+
base="$(basename "$f")"
|
|
612
|
+
[ -f ".pi/agents/$dir/$base" ] || cp "$f" ".pi/agents/$dir/$base"
|
|
613
|
+
done
|
|
614
|
+
done
|
|
615
|
+
echo "✓ .pi/agents (harness + pi-pi) seeded from package"
|
|
616
|
+
```
|
|
582
617
|
|
|
583
618
|
## Step 4 — Configuration Files
|
|
584
619
|
|
|
@@ -643,7 +678,7 @@ Created: $(date +%Y-%m-%d)
|
|
|
643
678
|
- .pi/harness/specs/ → Harness contracts and schema docs
|
|
644
679
|
- .pi/harness/incidents/ → Incident and override records
|
|
645
680
|
- `.agents/skills/` (npm package) → Harness skills (no copy into `.pi/skills/` needed)
|
|
646
|
-
-
|
|
681
|
+
- `.pi/agents/` → Specialized agents (seed from package — see Step 3.6)
|
|
647
682
|
|
|
648
683
|
## Graphify-First Workflow
|
|
649
684
|
|
|
@@ -674,7 +709,8 @@ Then run the remaining checks:
|
|
|
674
709
|
|
|
675
710
|
```bash
|
|
676
711
|
# pi extensions
|
|
677
|
-
|
|
712
|
+
UP_PKG="$(node -p "require('path').dirname(require.resolve('ultimate-pi/package.json'))")"
|
|
713
|
+
npm ls --prefix "$UP_PKG" 2>/dev/null | head -5 && echo "✓ ultimate-pi bundled extensions" || echo "✗ check ultimate-pi install"
|
|
678
714
|
|
|
679
715
|
# graphify knowledge graph (pip/pip3, uv, apt, or PATH)
|
|
680
716
|
PIP_CMD=""
|
|
@@ -706,7 +742,9 @@ print(f'✓ knowledge graph built ({n} nodes)' if n else '✗ graph.json has 0 n
|
|
|
706
742
|
graphify hook status 2>/dev/null && echo "✓ graphify git hooks installed" || echo "✗ graphify git hooks not installed"
|
|
707
743
|
|
|
708
744
|
# model router
|
|
709
|
-
ls
|
|
745
|
+
ls "$UP_PKG/node_modules/@yeliu84/pi-model-router/package.json" 2>/dev/null \
|
|
746
|
+
|| ls "$UP_PKG/.pi/npm/node_modules/@yeliu84/pi-model-router/package.json" 2>/dev/null \
|
|
747
|
+
&& echo "✓ model-router package" || echo "✗ model-router package"
|
|
710
748
|
ls .pi/model-router.json 2>/dev/null && echo "✓ model-router config" || echo "✗ model-router config"
|
|
711
749
|
|
|
712
750
|
# raw folder for graphify sources
|
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project are documented in this file.
|
|
4
4
|
|
|
5
|
+
## [v0.2.6] — 2026-05-15
|
|
6
|
+
|
|
7
|
+
### 🔧 Chores
|
|
8
|
+
|
|
9
|
+
- Align npm publish with pi package docs: explicit `files` allowlist (no dev runs, secrets, or local router config)
|
|
10
|
+
- Fix `pi` manifest: drop missing `.pi/providers`, add `.pi/skills`
|
|
11
|
+
- Ship `.pi/settings.example.json` instead of dev `.pi/settings.json` (removes `".."` local package from installs)
|
|
12
|
+
- Document layout in `.pi/PACKAGING.md`; harness-setup seeds `.pi/agents` and resolves package root for npm installs
|
|
13
|
+
|
|
5
14
|
## [v0.2.5] — 2026-05-15
|
|
6
15
|
|
|
7
16
|
### 🔧 Chores
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ultimate-pi",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.6",
|
|
4
4
|
"description": "Ultimate AI coding harness for pi.dev — extensible skills, Obsidian wiki knowledge layer, compressed context, deterministic output",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|
|
@@ -24,11 +24,11 @@
|
|
|
24
24
|
},
|
|
25
25
|
"pi": {
|
|
26
26
|
"extensions": [
|
|
27
|
-
"./.pi/extensions"
|
|
28
|
-
"./.pi/providers"
|
|
27
|
+
"./.pi/extensions"
|
|
29
28
|
],
|
|
30
29
|
"skills": [
|
|
31
|
-
"./.agents/skills"
|
|
30
|
+
"./.agents/skills",
|
|
31
|
+
"./.pi/skills"
|
|
32
32
|
],
|
|
33
33
|
"prompts": [
|
|
34
34
|
"./.pi/prompts"
|
|
@@ -36,14 +36,43 @@
|
|
|
36
36
|
},
|
|
37
37
|
"files": [
|
|
38
38
|
".agents",
|
|
39
|
-
".pi",
|
|
40
39
|
".sentrux",
|
|
41
|
-
"
|
|
40
|
+
".pi/extensions",
|
|
41
|
+
".pi/prompts",
|
|
42
|
+
"!.pi/prompts/release.md",
|
|
43
|
+
".pi/skills",
|
|
44
|
+
".pi/agents",
|
|
45
|
+
".pi/scripts",
|
|
46
|
+
".pi/lib",
|
|
47
|
+
".pi/sounds",
|
|
48
|
+
".pi/harness/specs",
|
|
49
|
+
".pi/harness/docs",
|
|
50
|
+
".pi/harness/sentrux",
|
|
51
|
+
".pi/harness/evals",
|
|
52
|
+
".pi/harness/evolution",
|
|
53
|
+
".pi/harness/corpus",
|
|
54
|
+
".pi/harness/README.md",
|
|
55
|
+
".pi/npm/package.json",
|
|
56
|
+
".pi/npm/.gitignore",
|
|
57
|
+
".pi/model-router.example.json",
|
|
58
|
+
".pi/settings.example.json",
|
|
59
|
+
".pi/auto-commit.json",
|
|
60
|
+
".pi/mcp.json",
|
|
61
|
+
".pi/pi-vcc-config.json",
|
|
62
|
+
".pi/SYSTEM.md",
|
|
63
|
+
".pi/PACKAGING.md",
|
|
64
|
+
"firecrawl/docker-compose.yaml",
|
|
65
|
+
"firecrawl/.env.template",
|
|
66
|
+
"firecrawl/README.md",
|
|
67
|
+
"firecrawl/searxng",
|
|
42
68
|
"AGENTS.md",
|
|
43
69
|
"biome.json",
|
|
44
70
|
"CHANGELOG.md",
|
|
45
71
|
"README.md"
|
|
46
72
|
],
|
|
73
|
+
"peerDependencies": {
|
|
74
|
+
"@mariozechner/pi-coding-agent": "*"
|
|
75
|
+
},
|
|
47
76
|
"scripts": {
|
|
48
77
|
"check:ts": "tsc --noEmit --target ES2022 --moduleResolution nodenext --module nodenext --skipLibCheck .pi/extensions/dotenv-loader.ts .pi/extensions/lib/posthog-node.d.ts .pi/extensions/lib/harness-posthog.ts .pi/extensions/lib/harness-paths.ts .pi/extensions/model-router-bootstrap.ts .pi/extensions/harness-telemetry.ts .pi/extensions/trace-recorder.ts .pi/extensions/observation-bus.ts .pi/extensions/drift-monitor.ts .pi/extensions/sentrux-rules-sync.ts .pi/extensions/custom-header.ts",
|
|
49
78
|
"harness:graphify-bootstrap": "bash .pi/scripts/harness-graphify-bootstrap.sh",
|
package/.pi/harness/browser.json
DELETED
|
@@ -1,128 +0,0 @@
|
|
|
1
|
-
# Release Readiness Report
|
|
2
|
-
|
|
3
|
-
Date: 2026-05-14
|
|
4
|
-
Repo root used: `/home/aryaniyaps/ai-projects/ultimate-pi` (active workspace root, treated as canonical)
|
|
5
|
-
|
|
6
|
-
## Requested remaining work
|
|
7
|
-
|
|
8
|
-
- `run-adversarial-canary-and-release`
|
|
9
|
-
- `final-prompt-expert-feature-sweep`
|
|
10
|
-
|
|
11
|
-
Plan file was not modified.
|
|
12
|
-
|
|
13
|
-
## Final integration checks
|
|
14
|
-
|
|
15
|
-
### 1) TypeScript compile check
|
|
16
|
-
|
|
17
|
-
- Command: `npm run check:ts`
|
|
18
|
-
- Result: PASS
|
|
19
|
-
|
|
20
|
-
### 2) Full lint/format/test gate
|
|
21
|
-
|
|
22
|
-
- Command: `npm run check:ts && npm run lint && npm run format:check && npm test`
|
|
23
|
-
- Result: FAIL (expected in current tree state)
|
|
24
|
-
- Notes:
|
|
25
|
-
- `biome check` reports existing lint/format issues (including `.pi/extensions/custom-footer.ts` and multiple `.pi/harness/specs/*.json` files).
|
|
26
|
-
- `npm test` fails before test execution due Node runtime flag incompatibility:
|
|
27
|
-
- `node: bad option: --experimental-strip-types`
|
|
28
|
-
|
|
29
|
-
### 3) Release preflight checks
|
|
30
|
-
|
|
31
|
-
- Command: `git rev-parse --is-inside-work-tree && git remote -v && git symbolic-ref -q HEAD && (git diff --quiet && git diff --cached --quiet && echo CLEAN || echo DIRTY)`
|
|
32
|
-
- Result:
|
|
33
|
-
- inside git repo: yes
|
|
34
|
-
- branch: `refs/heads/main`
|
|
35
|
-
- remote `origin`: configured
|
|
36
|
-
- tree cleanliness: `DIRTY` (release/tag push should stay blocked until clean)
|
|
37
|
-
|
|
38
|
-
## Targeted canary validations
|
|
39
|
-
|
|
40
|
-
### 1) Prompt and policy canary assertions
|
|
41
|
-
|
|
42
|
-
- Static canary suite executed against:
|
|
43
|
-
- harness prompt templates
|
|
44
|
-
- `policy-gate`
|
|
45
|
-
- `test-diff-integrity`
|
|
46
|
-
- `debate-orchestrator`
|
|
47
|
-
- Result: PASS after prompt sweep updates
|
|
48
|
-
- locked clauses in `harness-auto` preserved
|
|
49
|
-
- prompt argument parsing + usage surfaces present across harness prompts
|
|
50
|
-
- completion behavior sections present for operator-facing harness prompts
|
|
51
|
-
- policy/test/debate lock signals present in extension code
|
|
52
|
-
|
|
53
|
-
### 2) Router tuning canary (proposal-only)
|
|
54
|
-
|
|
55
|
-
- Created synthetic canary evidence:
|
|
56
|
-
- `.pi/harness/runs/canary-evidence.json`
|
|
57
|
-
- Candidate router for dry proposal:
|
|
58
|
-
- `.pi/harness/runs/canary-candidate-router.json`
|
|
59
|
-
- Command:
|
|
60
|
-
- `node .pi/harness/router/propose-router-tuning.mjs --evidence ... --candidate ... --proposal-out .pi/harness/router/proposals/canary-proposal.json`
|
|
61
|
-
- Result: PASS (proposal created, no live router write)
|
|
62
|
-
|
|
63
|
-
### 3) Harness schema parse check
|
|
64
|
-
|
|
65
|
-
- Command: Node JSON parse validation across `.pi/harness/specs/*.json`
|
|
66
|
-
- Result: PASS (all 9 schema files parse successfully)
|
|
67
|
-
|
|
68
|
-
## Lightweight adversarial drills
|
|
69
|
-
|
|
70
|
-
### 1) Negative apply drill (guardrail validation)
|
|
71
|
-
|
|
72
|
-
- Command:
|
|
73
|
-
- `node .pi/harness/router/apply-router-proposal.mjs --proposal ... --approve-by ... --justification ...`
|
|
74
|
-
- intentionally omitted `--write`
|
|
75
|
-
- Result: PASS (guard correctly blocked apply)
|
|
76
|
-
- Expected error:
|
|
77
|
-
- `missing --write (blind writes and implicit applies are disallowed)`
|
|
78
|
-
|
|
79
|
-
### 2) Adversarial lock retention
|
|
80
|
-
|
|
81
|
-
- Verified locked governance semantics remain stated in `harness-auto`:
|
|
82
|
-
- adversarial review always required
|
|
83
|
-
- severity-policy-engine remains merge-block authority
|
|
84
|
-
- strict pre-PR gates mandatory
|
|
85
|
-
- never auto-merge
|
|
86
|
-
|
|
87
|
-
## Prompt expert feature sweep
|
|
88
|
-
|
|
89
|
-
Using guidance from `.pi/agents/pi-pi/prompt-expert.md`, harness prompt templates were refined for:
|
|
90
|
-
|
|
91
|
-
1. Argument handling:
|
|
92
|
-
- explicit `$ARGUMENTS` parse sections
|
|
93
|
-
- required/optional argument normalization
|
|
94
|
-
- deterministic usage fallback lines
|
|
95
|
-
2. Completion behavior:
|
|
96
|
-
- explicit terminal output contracts for predictable downstream handoff
|
|
97
|
-
3. UX consistency:
|
|
98
|
-
- harmonized command usage patterns and closure blocks across harness prompts
|
|
99
|
-
4. Policy integrity:
|
|
100
|
-
- locked policy constraints intentionally kept intact
|
|
101
|
-
|
|
102
|
-
## Files updated in this sweep
|
|
103
|
-
|
|
104
|
-
- `.pi/prompts/harness-auto.md`
|
|
105
|
-
- `.pi/prompts/harness-plan.md`
|
|
106
|
-
- `.pi/prompts/harness-run.md`
|
|
107
|
-
- `.pi/prompts/harness-review.md`
|
|
108
|
-
- `.pi/prompts/harness-critic.md`
|
|
109
|
-
- `.pi/prompts/harness-eval.md`
|
|
110
|
-
- `.pi/prompts/harness-trace.md`
|
|
111
|
-
- `.pi/prompts/harness-incident.md`
|
|
112
|
-
- `.pi/prompts/harness-router-tune.md`
|
|
113
|
-
- `.pi/prompts/harness-setup.md`
|
|
114
|
-
- `.pi/harness/release-readiness-report.md` (this report)
|
|
115
|
-
|
|
116
|
-
## New canary artifacts
|
|
117
|
-
|
|
118
|
-
- `.pi/harness/runs/canary-evidence.json`
|
|
119
|
-
- `.pi/harness/runs/canary-candidate-router.json`
|
|
120
|
-
- `.pi/harness/router/proposals/canary-proposal.json`
|
|
121
|
-
|
|
122
|
-
## Residual risks
|
|
123
|
-
|
|
124
|
-
1. Full repo lint/format gate currently fails due pre-existing issues unrelated to this sweep.
|
|
125
|
-
2. `npm test` is currently not runnable in this environment because the configured Node flag is unsupported.
|
|
126
|
-
3. Release flow should remain blocked until working tree is clean and CI-equivalent checks pass.
|
|
127
|
-
4. Router apply path was intentionally not executed with `--write` during this run (safety-preserving drill).
|
|
128
|
-
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
# Router Tuning Flow
|
|
2
|
-
|
|
3
|
-
Router tuning is intentionally split into two steps:
|
|
4
|
-
|
|
5
|
-
1. **Propose** (`propose-router-tuning.mjs`)
|
|
6
|
-
2. **Approve + apply** (`apply-router-proposal.mjs`)
|
|
7
|
-
|
|
8
|
-
Blind writes to `.pi/model-router.json` are prohibited by design.
|
|
9
|
-
|
|
10
|
-
## Proposal
|
|
11
|
-
|
|
12
|
-
```bash
|
|
13
|
-
node .pi/harness/router/propose-router-tuning.mjs \
|
|
14
|
-
--evidence /path/to/evidence.json \
|
|
15
|
-
--candidate /path/to/candidate-router.json \
|
|
16
|
-
--proposal-out .pi/harness/router/proposals/proposal-001.json
|
|
17
|
-
```
|
|
18
|
-
|
|
19
|
-
## Apply (requires explicit human approval + justification)
|
|
20
|
-
|
|
21
|
-
```bash
|
|
22
|
-
node .pi/harness/router/apply-router-proposal.mjs \
|
|
23
|
-
--proposal .pi/harness/router/proposals/proposal-001.json \
|
|
24
|
-
--approve-by "human.name" \
|
|
25
|
-
--justification "why this is safe" \
|
|
26
|
-
--write
|
|
27
|
-
```
|
|
28
|
-
|
|
29
|
-
## Safety checks
|
|
30
|
-
|
|
31
|
-
- Evidence threshold must pass (`sample_count >= min_sample_count`)
|
|
32
|
-
- Regression guard must pass
|
|
33
|
-
- Base router hash in proposal must match current `.pi/model-router.json`
|
|
34
|
-
- Apply requires explicit approver and justification
|
|
35
|
-
- Current router file is backed up before write
|
|
@@ -1,153 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
import crypto from "node:crypto";
|
|
4
|
-
import fs from "node:fs";
|
|
5
|
-
import path from "node:path";
|
|
6
|
-
|
|
7
|
-
const ROUTER_PATH = ".pi/model-router.json";
|
|
8
|
-
const BACKUP_DIR = ".pi/harness/router/backups";
|
|
9
|
-
|
|
10
|
-
function fail(message) {
|
|
11
|
-
process.stderr.write(`Error: ${message}\n`);
|
|
12
|
-
process.exit(1);
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
function parseArgs(argv) {
|
|
16
|
-
const args = {};
|
|
17
|
-
for (let i = 0; i < argv.length; i++) {
|
|
18
|
-
const token = argv[i];
|
|
19
|
-
if (!token.startsWith("--")) continue;
|
|
20
|
-
const key = token.slice(2);
|
|
21
|
-
const value = argv[i + 1];
|
|
22
|
-
if (!value || value.startsWith("--")) {
|
|
23
|
-
args[key] = true;
|
|
24
|
-
continue;
|
|
25
|
-
}
|
|
26
|
-
args[key] = value;
|
|
27
|
-
i++;
|
|
28
|
-
}
|
|
29
|
-
return args;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
function readJson(filePath, label) {
|
|
33
|
-
if (!fs.existsSync(filePath)) fail(`${label} not found: ${filePath}`);
|
|
34
|
-
try {
|
|
35
|
-
return JSON.parse(fs.readFileSync(filePath, "utf8"));
|
|
36
|
-
} catch (error) {
|
|
37
|
-
fail(`${label} is not valid JSON (${filePath}): ${error.message}`);
|
|
38
|
-
}
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
function sha256FromJson(value) {
|
|
42
|
-
const canonical = `${JSON.stringify(value, null, 2)}\n`;
|
|
43
|
-
return crypto.createHash("sha256").update(canonical).digest("hex");
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
function validateProposal(proposal) {
|
|
47
|
-
if (proposal.status !== "proposed") {
|
|
48
|
-
fail(`proposal status must be 'proposed', got '${proposal.status}'`);
|
|
49
|
-
}
|
|
50
|
-
if (proposal.router_path !== ROUTER_PATH) {
|
|
51
|
-
fail(`proposal router_path must be '${ROUTER_PATH}'`);
|
|
52
|
-
}
|
|
53
|
-
const evidence = proposal.evidence ?? {};
|
|
54
|
-
if (
|
|
55
|
-
!Number.isInteger(evidence.sample_count) ||
|
|
56
|
-
!Number.isInteger(evidence.min_sample_count)
|
|
57
|
-
) {
|
|
58
|
-
fail("proposal evidence sample counts are invalid");
|
|
59
|
-
}
|
|
60
|
-
if (evidence.sample_count < evidence.min_sample_count) {
|
|
61
|
-
fail("proposal evidence does not meet minimum sample threshold");
|
|
62
|
-
}
|
|
63
|
-
if (evidence.regression_guard_passed !== true) {
|
|
64
|
-
fail("proposal regression guard is not passing");
|
|
65
|
-
}
|
|
66
|
-
if (!proposal.candidate_router || typeof proposal.candidate_router !== "object") {
|
|
67
|
-
fail("proposal missing candidate_router object");
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
const args = parseArgs(process.argv.slice(2));
|
|
72
|
-
|
|
73
|
-
if (args.help || args.h) {
|
|
74
|
-
process.stdout.write(
|
|
75
|
-
[
|
|
76
|
-
"Usage:",
|
|
77
|
-
" node .pi/harness/router/apply-router-proposal.mjs \\",
|
|
78
|
-
" --proposal <proposal.json> \\",
|
|
79
|
-
" --approve-by <human> \\",
|
|
80
|
-
" --justification <reason> \\",
|
|
81
|
-
" --write",
|
|
82
|
-
"",
|
|
83
|
-
"Behavior:",
|
|
84
|
-
" - validates proposal status and evidence",
|
|
85
|
-
" - verifies base router hash matches current router file",
|
|
86
|
-
" - creates backup before atomic write",
|
|
87
|
-
" - refuses write unless explicit --write is provided",
|
|
88
|
-
].join("\n"),
|
|
89
|
-
);
|
|
90
|
-
process.exit(0);
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
if (!args.proposal) fail("missing --proposal");
|
|
94
|
-
if (!args["approve-by"]) fail("missing --approve-by");
|
|
95
|
-
if (!args.justification) fail("missing --justification");
|
|
96
|
-
if (!args.write) {
|
|
97
|
-
fail("missing --write (blind writes and implicit applies are disallowed)");
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
const proposalPath = path.resolve(args.proposal);
|
|
101
|
-
const proposal = readJson(proposalPath, "proposal");
|
|
102
|
-
const currentRouter = readJson(ROUTER_PATH, "current router");
|
|
103
|
-
|
|
104
|
-
validateProposal(proposal);
|
|
105
|
-
|
|
106
|
-
const currentHash = sha256FromJson(currentRouter);
|
|
107
|
-
if (currentHash !== proposal.base_router_sha256) {
|
|
108
|
-
fail(
|
|
109
|
-
[
|
|
110
|
-
"base router hash mismatch; refusing apply.",
|
|
111
|
-
`current: ${currentHash}`,
|
|
112
|
-
`proposal: ${proposal.base_router_sha256}`,
|
|
113
|
-
].join("\n"),
|
|
114
|
-
);
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
const candidateHash = sha256FromJson(proposal.candidate_router);
|
|
118
|
-
if (candidateHash !== proposal.candidate_router_sha256) {
|
|
119
|
-
fail("proposal candidate_router hash mismatch; artifact may be tampered");
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
const now = new Date().toISOString();
|
|
123
|
-
fs.mkdirSync(BACKUP_DIR, { recursive: true });
|
|
124
|
-
const backupPath = path.join(
|
|
125
|
-
BACKUP_DIR,
|
|
126
|
-
`model-router.${now.replace(/[:.]/g, "-")}.json`,
|
|
127
|
-
);
|
|
128
|
-
fs.copyFileSync(ROUTER_PATH, backupPath);
|
|
129
|
-
|
|
130
|
-
const routerTemp = `${ROUTER_PATH}.tmp`;
|
|
131
|
-
fs.writeFileSync(routerTemp, `${JSON.stringify(proposal.candidate_router, null, 2)}\n`);
|
|
132
|
-
fs.renameSync(routerTemp, ROUTER_PATH);
|
|
133
|
-
|
|
134
|
-
proposal.status = "approved_applied";
|
|
135
|
-
proposal.approval = {
|
|
136
|
-
required: true,
|
|
137
|
-
approved_by: args["approve-by"],
|
|
138
|
-
approved_at: now,
|
|
139
|
-
justification: args.justification,
|
|
140
|
-
};
|
|
141
|
-
proposal.applied_router_sha256 = candidateHash;
|
|
142
|
-
proposal.backup_router_path = backupPath;
|
|
143
|
-
proposal.applied_at = now;
|
|
144
|
-
fs.writeFileSync(proposalPath, `${JSON.stringify(proposal, null, 2)}\n`);
|
|
145
|
-
|
|
146
|
-
process.stdout.write(
|
|
147
|
-
[
|
|
148
|
-
"Router proposal applied safely.",
|
|
149
|
-
`proposal: ${proposalPath}`,
|
|
150
|
-
`backup: ${backupPath}`,
|
|
151
|
-
`router: ${ROUTER_PATH}`,
|
|
152
|
-
].join("\n") + "\n",
|
|
153
|
-
);
|
|
@@ -1,96 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"schema_version": "1.0.0",
|
|
3
|
-
"proposal_id": "router-tune-2026-05-14T15-44-44-399Z",
|
|
4
|
-
"created_at": "2026-05-14T15:44:44.399Z",
|
|
5
|
-
"router_path": ".pi/model-router.json",
|
|
6
|
-
"base_router_sha256": "2a96fba517cc5b5147f37428d7ed62961b1968c0e83c0e69f02524265449856b",
|
|
7
|
-
"candidate_router_sha256": "2a96fba517cc5b5147f37428d7ed62961b1968c0e83c0e69f02524265449856b",
|
|
8
|
-
"evidence": {
|
|
9
|
-
"sample_count": 24,
|
|
10
|
-
"min_sample_count": 12,
|
|
11
|
-
"success_rate_delta": 0.08,
|
|
12
|
-
"cost_per_task_delta": -0.04,
|
|
13
|
-
"regression_guard_passed": true,
|
|
14
|
-
"trace_refs": ["run-canary-001", "run-canary-002"],
|
|
15
|
-
"notes": "canary validation synthetic evidence"
|
|
16
|
-
},
|
|
17
|
-
"status": "proposed",
|
|
18
|
-
"approval": {
|
|
19
|
-
"required": true,
|
|
20
|
-
"approved_by": null,
|
|
21
|
-
"approved_at": null,
|
|
22
|
-
"justification": null
|
|
23
|
-
},
|
|
24
|
-
"candidate_router": {
|
|
25
|
-
"defaultProfile": "auto",
|
|
26
|
-
"debug": false,
|
|
27
|
-
"classifierModel": "opencode-go/qwen3.6-plus",
|
|
28
|
-
"phaseBias": 0.5,
|
|
29
|
-
"maxSessionBudget": 1,
|
|
30
|
-
"largeContextThreshold": 100000,
|
|
31
|
-
"rules": [
|
|
32
|
-
{
|
|
33
|
-
"matches": ["deploy", "production", "release"],
|
|
34
|
-
"tier": "high",
|
|
35
|
-
"reason": "Safety check for production tasks"
|
|
36
|
-
},
|
|
37
|
-
{
|
|
38
|
-
"matches": "changelog",
|
|
39
|
-
"tier": "low"
|
|
40
|
-
}
|
|
41
|
-
],
|
|
42
|
-
"profiles": {
|
|
43
|
-
"auto": {
|
|
44
|
-
"high": {
|
|
45
|
-
"model": "opencode-go/deepseek-v4-pro",
|
|
46
|
-
"thinking": "high",
|
|
47
|
-
"fallbacks": ["opencode-go/qwen3.6-plus", "opencode-go/kimi-k2.6"]
|
|
48
|
-
},
|
|
49
|
-
"medium": {
|
|
50
|
-
"model": "opencode-go/qwen3.6-plus",
|
|
51
|
-
"thinking": "medium",
|
|
52
|
-
"fallbacks": ["opencode-go/deepseek-v4-pro"]
|
|
53
|
-
},
|
|
54
|
-
"low": {
|
|
55
|
-
"model": "opencode-go/deepseek-v4-flash",
|
|
56
|
-
"thinking": "low",
|
|
57
|
-
"fallbacks": ["opencode-go/qwen3.5-plus"]
|
|
58
|
-
}
|
|
59
|
-
},
|
|
60
|
-
"cheap": {
|
|
61
|
-
"high": {
|
|
62
|
-
"model": "opencode-go/qwen3.6-plus",
|
|
63
|
-
"thinking": "low",
|
|
64
|
-
"fallbacks": ["opencode-go/qwen3.5-plus"]
|
|
65
|
-
},
|
|
66
|
-
"medium": {
|
|
67
|
-
"model": "opencode-go/qwen3.5-plus",
|
|
68
|
-
"thinking": "off",
|
|
69
|
-
"fallbacks": ["opencode-go/deepseek-v4-flash"]
|
|
70
|
-
},
|
|
71
|
-
"low": {
|
|
72
|
-
"model": "opencode-go/deepseek-v4-flash",
|
|
73
|
-
"thinking": "off",
|
|
74
|
-
"fallbacks": ["opencode-go/qwen3.5-plus"]
|
|
75
|
-
}
|
|
76
|
-
},
|
|
77
|
-
"deep": {
|
|
78
|
-
"high": {
|
|
79
|
-
"model": "opencode-go/deepseek-v4-pro",
|
|
80
|
-
"thinking": "xhigh",
|
|
81
|
-
"fallbacks": ["opencode-go/kimi-k2.6"]
|
|
82
|
-
},
|
|
83
|
-
"medium": {
|
|
84
|
-
"model": "opencode-go/kimi-k2.6",
|
|
85
|
-
"thinking": "medium",
|
|
86
|
-
"fallbacks": ["opencode-go/deepseek-v4-pro"]
|
|
87
|
-
},
|
|
88
|
-
"low": {
|
|
89
|
-
"model": "opencode-go/qwen3.6-plus",
|
|
90
|
-
"thinking": "low",
|
|
91
|
-
"fallbacks": ["opencode-go/deepseek-v4-flash"]
|
|
92
|
-
}
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
}
|