ultimate-pi 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/.pi/extensions/custom-header.ts +26 -2
  2. package/.pi/extensions/lib/harness-paths.ts +55 -0
  3. package/.pi/extensions/model-router-bootstrap.ts +174 -0
  4. package/.pi/extensions/sentrux-rules-sync.ts +28 -3
  5. package/.pi/harness/browser.json +5 -0
  6. package/.pi/harness/debates/README.md +9 -0
  7. package/.pi/harness/docs/adrs/0006-sentrux-dual-layer.md +1 -1
  8. package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +2 -2
  9. package/.pi/harness/incidents/README.md +6 -0
  10. package/.pi/harness/release-readiness-report.md +128 -0
  11. package/.pi/harness/router/proposals/canary-proposal.json +96 -0
  12. package/.pi/harness/runs/019e272f-3eef-7107-9712-ce281de55707-1778773891854/events.jsonl +2 -0
  13. package/.pi/harness/runs/019e272f-3eef-7107-9712-ce281de55707-1778773891854/trace.json +17 -0
  14. package/.pi/harness/runs/019e272f-3eef-7107-9712-ce281de55707-1778773912057/events.jsonl +2 -0
  15. package/.pi/harness/runs/019e272f-3eef-7107-9712-ce281de55707-1778773912057/trace.json +17 -0
  16. package/.pi/harness/runs/019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096/events.jsonl +6 -0
  17. package/.pi/harness/runs/019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096/trace.json +42 -0
  18. package/.pi/harness/runs/019e2732-8651-74e5-9f5d-4d06c3105f25-1778774136101/events.jsonl +1 -0
  19. package/.pi/harness/runs/019e2758-b332-771b-ad6f-54d0d8478768-1778776600591/events.jsonl +2 -0
  20. package/.pi/harness/runs/019e2758-b332-771b-ad6f-54d0d8478768-1778776600591/trace.json +17 -0
  21. package/.pi/harness/runs/README.md +6 -0
  22. package/.pi/harness/runs/budget-events.jsonl +4 -0
  23. package/.pi/harness/runs/canary-candidate-router.json +72 -0
  24. package/.pi/harness/runs/canary-evidence.json +9 -0
  25. package/.pi/harness/runs/index.jsonl +4 -0
  26. package/.pi/harness/sentrux/architecture.manifest.json +3 -3
  27. package/.pi/model-router.example.json +27 -0
  28. package/.pi/prompts/graphify.md +4 -8
  29. package/.pi/prompts/harness-setup.md +142 -92
  30. package/.pi/prompts/release.md +225 -0
  31. package/.pi/scripts/README.md +17 -0
  32. package/.pi/scripts/harness-cli-verify.sh +294 -0
  33. package/.pi/scripts/harness-graphify-bootstrap.sh +151 -0
  34. package/{scripts → .pi/scripts}/harness-verify.mjs +3 -3
  35. package/{scripts → .pi/scripts}/sentrux-rules-sync.mjs +2 -2
  36. package/.pi/settings.json +0 -2
  37. package/.sentrux/.harness-rules-meta.json +2 -2
  38. package/.sentrux/rules.toml +3 -3
  39. package/AGENTS.md +12 -0
  40. package/CHANGELOG.md +21 -0
  41. package/README.md +39 -350
  42. package/firecrawl/.env +53 -0
  43. package/package.json +16 -4
  44. package/.ckignore +0 -41
  45. package/.env.example +0 -21
  46. package/.gitattributes +0 -1
  47. package/.github/banner-v2.png +0 -0
  48. package/.github/workflows/lint.yml +0 -33
  49. package/.github/workflows/publish-github-packages.yml +0 -35
  50. package/.github/workflows/publish-npm.yml +0 -32
  51. package/CONTRIBUTING.md +0 -166
  52. package/lefthook.yml +0 -9
  53. package/scripts/__pycache__/merge_graphify_corpora.cpython-314.pyc +0 -0
  54. package/scripts/index_youtube_urls.py +0 -376
  55. package/scripts/merge_graphify_corpora.py +0 -398
  56. package/scripts/regen_graphify_html.py +0 -46
  57. package/test/harness-verify.test.mjs +0 -33
@@ -6,12 +6,19 @@
6
6
  * doubling vertical resolution in the same terminal footprint.
7
7
  */
8
8
 
9
- import { join } from "node:path";
10
9
  import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
11
10
  import { truncateToWidth } from "@mariozechner/pi-tui";
12
11
  import * as JimpModule from "jimp";
12
+ import { resolveHarnessAsset } from "./lib/harness-paths.js";
13
13
 
14
- const imagePath = join(process.cwd(), ".pi", "extensions", "banner.png");
14
+ /** Shipped next to this extension in the npm package — not the host project's .pi dir. */
15
+ const imagePath = resolveHarnessAsset(
16
+ // @ts-expect-error pi extensions run as ESM
17
+ import.meta.url,
18
+ ".pi",
19
+ "extensions",
20
+ "banner.png",
21
+ );
15
22
 
16
23
  // Terminal footprint — keep a safety margin so we never crash on narrow terminals
17
24
  const SAFETY_MARGIN = 2;
@@ -83,6 +90,23 @@ function ansiCell(
83
90
  }
84
91
 
85
92
  async function loadBanner(): Promise<string[]> {
93
+ // #region agent log
94
+ fetch("http://127.0.0.1:7928/ingest/a5d40896-34cb-4f12-97db-df7ada0b22f0", {
95
+ method: "POST",
96
+ headers: {
97
+ "Content-Type": "application/json",
98
+ "X-Debug-Session-Id": "7737a8",
99
+ },
100
+ body: JSON.stringify({
101
+ sessionId: "7737a8",
102
+ hypothesisId: "B",
103
+ location: "custom-header.ts:loadBanner",
104
+ message: "banner path",
105
+ data: { imagePath, cwd: process.cwd() },
106
+ timestamp: Date.now(),
107
+ }),
108
+ }).catch(() => {});
109
+ // #endregion
86
110
  const Jimp = getJimpRuntime();
87
111
  const image = await Jimp.read(imagePath);
88
112
  resizeImageCompat(image, PIXEL_WIDTH, PIXEL_HEIGHT);
@@ -0,0 +1,55 @@
1
+ import { existsSync, readFileSync } from "node:fs";
2
+ import { dirname, join } from "node:path";
3
+ import { fileURLToPath } from "node:url";
4
+
5
+ const rootByModuleUrl = new Map<string, string>();
6
+
7
+ /** Resolve ultimate-pi package root from the calling extension module URL. */
8
+ export function getHarnessPackageRoot(moduleUrl: string): string {
9
+ const cached = rootByModuleUrl.get(moduleUrl);
10
+ if (cached) {
11
+ return cached;
12
+ }
13
+
14
+ let dir = dirname(fileURLToPath(moduleUrl));
15
+ for (let depth = 0; depth < 8; depth++) {
16
+ const pkgPath = join(dir, "package.json");
17
+ if (existsSync(pkgPath)) {
18
+ try {
19
+ const pkg = JSON.parse(readFileSync(pkgPath, "utf-8")) as {
20
+ name?: string;
21
+ };
22
+ if (pkg.name === "ultimate-pi") {
23
+ rootByModuleUrl.set(moduleUrl, dir);
24
+ return dir;
25
+ }
26
+ } catch {
27
+ /* try parent */
28
+ }
29
+ }
30
+ const parent = dirname(dir);
31
+ if (parent === dir) {
32
+ break;
33
+ }
34
+ dir = parent;
35
+ }
36
+
37
+ const fallback = join(dirname(fileURLToPath(moduleUrl)), "..", "..");
38
+ rootByModuleUrl.set(moduleUrl, fallback);
39
+ return fallback;
40
+ }
41
+
42
+ export function resolveHarnessAsset(
43
+ moduleUrl: string,
44
+ ...segments: string[]
45
+ ): string {
46
+ return join(getHarnessPackageRoot(moduleUrl), ...segments);
47
+ }
48
+
49
+ /** Harness CLI scripts shipped under `.pi/scripts/` in the npm package. */
50
+ export function resolveHarnessScript(
51
+ moduleUrl: string,
52
+ scriptName: string,
53
+ ): string {
54
+ return resolveHarnessAsset(moduleUrl, ".pi", "scripts", scriptName);
55
+ }
@@ -0,0 +1,174 @@
1
+ /**
2
+ * Ensures .pi/model-router.json exists before pi-model-router reads config at
3
+ * extension init (which otherwise falls back to openai/gpt-5.4-pro).
4
+ *
5
+ * Runs synchronously in the extension factory so dotenv-loader can run first
6
+ * (alphabetically: dotenv-loader < model-router-bootstrap < sentrux / router pkg).
7
+ */
8
+
9
+ import { existsSync, mkdirSync, writeFileSync } from "node:fs";
10
+ import { join } from "node:path";
11
+ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
12
+
13
+ const ROUTER_PATH = ".pi/model-router.json";
14
+
15
+ function model(prefix: string, name: string): string {
16
+ return `${prefix}/${name}`;
17
+ }
18
+
19
+ function buildRouterConfig(): Record<string, unknown> | null {
20
+ const hasOpenCode = process.env.OPENAI_API_BASE?.includes("opencode.ai");
21
+ const hasOpenAI = !!process.env.OPENAI_API_KEY;
22
+ const hasAnthropic = !!process.env.ANTHROPIC_API_KEY;
23
+ const hasGoogle = !!process.env.GOOGLE_API_KEY;
24
+
25
+ if (!hasOpenCode && !hasOpenAI && !hasAnthropic && !hasGoogle) {
26
+ return null;
27
+ }
28
+
29
+ const highModel = hasOpenCode
30
+ ? model("opencode-go", "deepseek-v4-pro")
31
+ : hasAnthropic
32
+ ? "anthropic/claude-sonnet-4-20250514"
33
+ : hasGoogle
34
+ ? "google/gemini-2.5-flash-001"
35
+ : hasOpenAI
36
+ ? model("openai", "gpt-4o")
37
+ : null;
38
+
39
+ const mediumModel = hasOpenCode
40
+ ? model("opencode-go", "qwen3.6-plus")
41
+ : hasAnthropic
42
+ ? "anthropic/claude-sonnet-4-20250514"
43
+ : hasGoogle
44
+ ? "google/gemini-flash-latest"
45
+ : hasOpenAI
46
+ ? model("openai", "gpt-4o-mini")
47
+ : null;
48
+
49
+ const lowModel = hasOpenCode
50
+ ? model("opencode-go", "deepseek-v4-flash")
51
+ : hasAnthropic
52
+ ? "anthropic/claude-3-5-haiku-20241022"
53
+ : hasGoogle
54
+ ? "google/gemini-flash-lite-latest"
55
+ : hasOpenAI
56
+ ? model("openai", "gpt-4o-mini")
57
+ : null;
58
+
59
+ if (!highModel || !mediumModel || !lowModel) {
60
+ return null;
61
+ }
62
+
63
+ const fallbacks: string[] = [];
64
+ if (hasAnthropic && !highModel.startsWith("anthropic/")) {
65
+ fallbacks.push("anthropic/claude-sonnet-4-20250514");
66
+ }
67
+ if (hasGoogle && !highModel.startsWith("google/")) {
68
+ fallbacks.push("google/gemini-flash-latest");
69
+ }
70
+
71
+ return {
72
+ defaultProfile: "auto",
73
+ debug: false,
74
+ classifierModel: mediumModel,
75
+ phaseBias: 0.5,
76
+ maxSessionBudget: 1.0,
77
+ largeContextThreshold: 100000,
78
+ rules: [
79
+ {
80
+ matches: ["deploy", "production", "release"],
81
+ tier: "high",
82
+ reason: "Safety check for production tasks",
83
+ },
84
+ { matches: "changelog", tier: "low" },
85
+ ],
86
+ profiles: {
87
+ auto: {
88
+ high: { model: highModel, thinking: "high", fallbacks },
89
+ medium: { model: mediumModel, thinking: "medium" },
90
+ low: { model: lowModel, thinking: "low" },
91
+ },
92
+ cheap: {
93
+ high: { model: mediumModel, thinking: "low" },
94
+ medium: { model: lowModel, thinking: "off" },
95
+ low: { model: lowModel, thinking: "off" },
96
+ },
97
+ deep: {
98
+ high: { model: highModel, thinking: "xhigh", fallbacks },
99
+ medium: { model: mediumModel, thinking: "medium" },
100
+ low: { model: lowModel, thinking: "low" },
101
+ },
102
+ },
103
+ };
104
+ }
105
+
106
+ function ensureModelRouterConfig(cwd: string): boolean {
107
+ const projectPath = join(cwd, ROUTER_PATH);
108
+ // #region agent log
109
+ fetch("http://127.0.0.1:7928/ingest/a5d40896-34cb-4f12-97db-df7ada0b22f0", {
110
+ method: "POST",
111
+ headers: {
112
+ "Content-Type": "application/json",
113
+ "X-Debug-Session-Id": "7737a8",
114
+ },
115
+ body: JSON.stringify({
116
+ sessionId: "7737a8",
117
+ hypothesisId: "A",
118
+ location: "model-router-bootstrap.ts:ensure",
119
+ message: "router bootstrap check",
120
+ data: {
121
+ projectPath,
122
+ exists: existsSync(projectPath),
123
+ hasOpenCode: !!process.env.OPENAI_API_BASE?.includes("opencode.ai"),
124
+ hasOpenAI: !!process.env.OPENAI_API_KEY,
125
+ },
126
+ timestamp: Date.now(),
127
+ }),
128
+ }).catch(() => {});
129
+ // #endregion
130
+
131
+ if (existsSync(projectPath)) {
132
+ return false;
133
+ }
134
+
135
+ const config = buildRouterConfig();
136
+ if (!config) {
137
+ return false;
138
+ }
139
+
140
+ mkdirSync(join(cwd, ".pi"), { recursive: true });
141
+ writeFileSync(projectPath, `${JSON.stringify(config, null, 2)}\n`);
142
+
143
+ // #region agent log
144
+ fetch("http://127.0.0.1:7928/ingest/a5d40896-34cb-4f12-97db-df7ada0b22f0", {
145
+ method: "POST",
146
+ headers: {
147
+ "Content-Type": "application/json",
148
+ "X-Debug-Session-Id": "7737a8",
149
+ },
150
+ body: JSON.stringify({
151
+ sessionId: "7737a8",
152
+ hypothesisId: "A",
153
+ location: "model-router-bootstrap.ts:write",
154
+ message: "wrote model-router.json",
155
+ data: {
156
+ high: (config.profiles as { auto: { high: { model: string } } }).auto
157
+ .high.model,
158
+ },
159
+ timestamp: Date.now(),
160
+ }),
161
+ }).catch(() => {});
162
+ // #endregion
163
+
164
+ return true;
165
+ }
166
+
167
+ export default function modelRouterBootstrap(_pi: ExtensionAPI) {
168
+ const wrote = ensureModelRouterConfig(process.cwd());
169
+ if (wrote) {
170
+ console.warn(
171
+ "[ultimate-pi] Created .pi/model-router.json from detected providers (avoids gpt-5.4-pro fallback). Run /reload if router was already loaded.",
172
+ );
173
+ }
174
+ }
@@ -3,14 +3,39 @@
3
3
  */
4
4
 
5
5
  import { spawn } from "node:child_process";
6
- import { join } from "node:path";
6
+ import { existsSync } from "node:fs";
7
7
  import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
8
+ import { resolveHarnessScript } from "./lib/harness-paths.js";
8
9
 
9
- const SYNC_SCRIPT = join(process.cwd(), "scripts", "sentrux-rules-sync.mjs");
10
+ function resolveSyncScript(): string {
11
+ return resolveHarnessScript(
12
+ // @ts-expect-error pi extensions run as ESM
13
+ import.meta.url,
14
+ "sentrux-rules-sync.mjs",
15
+ );
16
+ }
10
17
 
11
18
  function runSync(args: string[]): Promise<{ code: number; output: string }> {
19
+ const syncScript = resolveSyncScript();
20
+ // #region agent log
21
+ fetch("http://127.0.0.1:7928/ingest/a5d40896-34cb-4f12-97db-df7ada0b22f0", {
22
+ method: "POST",
23
+ headers: {
24
+ "Content-Type": "application/json",
25
+ "X-Debug-Session-Id": "7737a8",
26
+ },
27
+ body: JSON.stringify({
28
+ sessionId: "7737a8",
29
+ hypothesisId: "C",
30
+ location: "sentrux-rules-sync.ts:runSync",
31
+ message: "sync script path",
32
+ data: { syncScript, cwd: process.cwd(), exists: existsSync(syncScript) },
33
+ timestamp: Date.now(),
34
+ }),
35
+ }).catch(() => {});
36
+ // #endregion
12
37
  return new Promise((resolve) => {
13
- const child = spawn(process.execPath, [SYNC_SCRIPT, ...args], {
38
+ const child = spawn(process.execPath, [syncScript, ...args], {
14
39
  cwd: process.cwd(),
15
40
  stdio: ["ignore", "pipe", "pipe"],
16
41
  });
@@ -0,0 +1,5 @@
1
+ {
2
+ "headless": true,
3
+ "timeout": 30000,
4
+ "viewport": { "width": 1280, "height": 720 }
5
+ }
@@ -0,0 +1,9 @@
1
+ # Harness Debates
2
+
3
+ Store debate artifacts (`RoundResult`, `ConsensusPacket`, budget events) here.
4
+
5
+ Locked defaults for aggressive budget profile:
6
+
7
+ - `max_rounds=6`
8
+ - `round_token_cap=2500`
9
+ - `debate_global_cap=35000`
@@ -28,4 +28,4 @@ Evaluator trust requires both programmatic gates (policy, budget, integrity) and
28
28
  ## References
29
29
 
30
30
  - `.pi/harness/specs/observation.schema.json`
31
- - `scripts/harness-verify.mjs`
31
+ - `.pi/scripts/harness-verify.mjs`
@@ -11,7 +11,7 @@ Sentrux enforces architecture via [`.sentrux/rules.toml`](https://sentrux.dev/do
11
11
 
12
12
  1. **Canonical source:** [`.pi/harness/sentrux/architecture.manifest.json`](../../sentrux/architecture.manifest.json) — layers, boundaries, global constraints.
13
13
  2. **Generated artifact:** `.sentrux/rules.toml` — committed to git; managed block between `harness:managed:start/end` markers.
14
- 3. **Sync command:** `npm run harness:sentrux-sync` (`scripts/sentrux-rules-sync.mjs`).
14
+ 3. **Sync command:** `npm run harness:sentrux-sync` (`.pi/scripts/sentrux-rules-sync.mjs`).
15
15
  4. **Pi command:** `/harness-sentrux-sync` via `sentrux-rules-sync.ts` extension.
16
16
  5. **When to sync:**
17
17
  - `/harness-setup` Step 2.8 (after sentrux install)
@@ -34,5 +34,5 @@ Sentrux enforces architecture via [`.sentrux/rules.toml`](https://sentrux.dev/do
34
34
  ## References
35
35
 
36
36
  - ADR 0006 (Sentrux dual layer)
37
- - `scripts/sentrux-rules-sync.mjs`
37
+ - `.pi/scripts/sentrux-rules-sync.mjs`
38
38
  - `.pi/extensions/sentrux-rules-sync.ts`
@@ -0,0 +1,6 @@
1
+ # Harness Incidents
2
+
3
+ Store `IncidentRecord` artifacts and any policy override justifications here.
4
+
5
+ - Override policy: one human approver only.
6
+ - Justification is mandatory for every override record.
@@ -0,0 +1,128 @@
1
+ # Release Readiness Report
2
+
3
+ Date: 2026-05-14
4
+ Repo root used: `/home/aryaniyaps/ai-projects/ultimate-pi` (active workspace root, treated as canonical)
5
+
6
+ ## Requested remaining work
7
+
8
+ - `run-adversarial-canary-and-release`
9
+ - `final-prompt-expert-feature-sweep`
10
+
11
+ Plan file was not modified.
12
+
13
+ ## Final integration checks
14
+
15
+ ### 1) TypeScript compile check
16
+
17
+ - Command: `npm run check:ts`
18
+ - Result: PASS
19
+
20
+ ### 2) Full lint/format/test gate
21
+
22
+ - Command: `npm run check:ts && npm run lint && npm run format:check && npm test`
23
+ - Result: FAIL (expected in current tree state)
24
+ - Notes:
25
+ - `biome check` reports existing lint/format issues (including `.pi/extensions/custom-footer.ts` and multiple `.pi/harness/specs/*.json` files).
26
+ - `npm test` fails before test execution due Node runtime flag incompatibility:
27
+ - `node: bad option: --experimental-strip-types`
28
+
29
+ ### 3) Release preflight checks
30
+
31
+ - Command: `git rev-parse --is-inside-work-tree && git remote -v && git symbolic-ref -q HEAD && (git diff --quiet && git diff --cached --quiet && echo CLEAN || echo DIRTY)`
32
+ - Result:
33
+ - inside git repo: yes
34
+ - branch: `refs/heads/main`
35
+ - remote `origin`: configured
36
+ - tree cleanliness: `DIRTY` (release/tag push should stay blocked until clean)
37
+
38
+ ## Targeted canary validations
39
+
40
+ ### 1) Prompt and policy canary assertions
41
+
42
+ - Static canary suite executed against:
43
+ - harness prompt templates
44
+ - `policy-gate`
45
+ - `test-diff-integrity`
46
+ - `debate-orchestrator`
47
+ - Result: PASS after prompt sweep updates
48
+ - locked clauses in `harness-auto` preserved
49
+ - prompt argument parsing + usage surfaces present across harness prompts
50
+ - completion behavior sections present for operator-facing harness prompts
51
+ - policy/test/debate lock signals present in extension code
52
+
53
+ ### 2) Router tuning canary (proposal-only)
54
+
55
+ - Created synthetic canary evidence:
56
+ - `.pi/harness/runs/canary-evidence.json`
57
+ - Candidate router for dry proposal:
58
+ - `.pi/harness/runs/canary-candidate-router.json`
59
+ - Command:
60
+ - `node .pi/harness/router/propose-router-tuning.mjs --evidence ... --candidate ... --proposal-out .pi/harness/router/proposals/canary-proposal.json`
61
+ - Result: PASS (proposal created, no live router write)
62
+
63
+ ### 3) Harness schema parse check
64
+
65
+ - Command: Node JSON parse validation across `.pi/harness/specs/*.json`
66
+ - Result: PASS (all 9 schema files parse successfully)
67
+
68
+ ## Lightweight adversarial drills
69
+
70
+ ### 1) Negative apply drill (guardrail validation)
71
+
72
+ - Command:
73
+ - `node .pi/harness/router/apply-router-proposal.mjs --proposal ... --approve-by ... --justification ...`
74
+ - intentionally omitted `--write`
75
+ - Result: PASS (guard correctly blocked apply)
76
+ - Expected error:
77
+ - `missing --write (blind writes and implicit applies are disallowed)`
78
+
79
+ ### 2) Adversarial lock retention
80
+
81
+ - Verified locked governance semantics remain stated in `harness-auto`:
82
+ - adversarial review always required
83
+ - severity-policy-engine remains merge-block authority
84
+ - strict pre-PR gates mandatory
85
+ - never auto-merge
86
+
87
+ ## Prompt expert feature sweep
88
+
89
+ Using guidance from `.pi/agents/pi-pi/prompt-expert.md`, harness prompt templates were refined for:
90
+
91
+ 1. Argument handling:
92
+ - explicit `$ARGUMENTS` parse sections
93
+ - required/optional argument normalization
94
+ - deterministic usage fallback lines
95
+ 2. Completion behavior:
96
+ - explicit terminal output contracts for predictable downstream handoff
97
+ 3. UX consistency:
98
+ - harmonized command usage patterns and closure blocks across harness prompts
99
+ 4. Policy integrity:
100
+ - locked policy constraints intentionally kept intact
101
+
102
+ ## Files updated in this sweep
103
+
104
+ - `.pi/prompts/harness-auto.md`
105
+ - `.pi/prompts/harness-plan.md`
106
+ - `.pi/prompts/harness-run.md`
107
+ - `.pi/prompts/harness-review.md`
108
+ - `.pi/prompts/harness-critic.md`
109
+ - `.pi/prompts/harness-eval.md`
110
+ - `.pi/prompts/harness-trace.md`
111
+ - `.pi/prompts/harness-incident.md`
112
+ - `.pi/prompts/harness-router-tune.md`
113
+ - `.pi/prompts/harness-setup.md`
114
+ - `.pi/harness/release-readiness-report.md` (this report)
115
+
116
+ ## New canary artifacts
117
+
118
+ - `.pi/harness/runs/canary-evidence.json`
119
+ - `.pi/harness/runs/canary-candidate-router.json`
120
+ - `.pi/harness/router/proposals/canary-proposal.json`
121
+
122
+ ## Residual risks
123
+
124
+ 1. Full repo lint/format gate currently fails due pre-existing issues unrelated to this sweep.
125
+ 2. `npm test` is currently not runnable in this environment because the configured Node flag is unsupported.
126
+ 3. Release flow should remain blocked until working tree is clean and CI-equivalent checks pass.
127
+ 4. Router apply path was intentionally not executed with `--write` during this run (safety-preserving drill).
128
+
@@ -0,0 +1,96 @@
1
+ {
2
+ "schema_version": "1.0.0",
3
+ "proposal_id": "router-tune-2026-05-14T15-44-44-399Z",
4
+ "created_at": "2026-05-14T15:44:44.399Z",
5
+ "router_path": ".pi/model-router.json",
6
+ "base_router_sha256": "2a96fba517cc5b5147f37428d7ed62961b1968c0e83c0e69f02524265449856b",
7
+ "candidate_router_sha256": "2a96fba517cc5b5147f37428d7ed62961b1968c0e83c0e69f02524265449856b",
8
+ "evidence": {
9
+ "sample_count": 24,
10
+ "min_sample_count": 12,
11
+ "success_rate_delta": 0.08,
12
+ "cost_per_task_delta": -0.04,
13
+ "regression_guard_passed": true,
14
+ "trace_refs": ["run-canary-001", "run-canary-002"],
15
+ "notes": "canary validation synthetic evidence"
16
+ },
17
+ "status": "proposed",
18
+ "approval": {
19
+ "required": true,
20
+ "approved_by": null,
21
+ "approved_at": null,
22
+ "justification": null
23
+ },
24
+ "candidate_router": {
25
+ "defaultProfile": "auto",
26
+ "debug": false,
27
+ "classifierModel": "opencode-go/qwen3.6-plus",
28
+ "phaseBias": 0.5,
29
+ "maxSessionBudget": 1,
30
+ "largeContextThreshold": 100000,
31
+ "rules": [
32
+ {
33
+ "matches": ["deploy", "production", "release"],
34
+ "tier": "high",
35
+ "reason": "Safety check for production tasks"
36
+ },
37
+ {
38
+ "matches": "changelog",
39
+ "tier": "low"
40
+ }
41
+ ],
42
+ "profiles": {
43
+ "auto": {
44
+ "high": {
45
+ "model": "opencode-go/deepseek-v4-pro",
46
+ "thinking": "high",
47
+ "fallbacks": ["opencode-go/qwen3.6-plus", "opencode-go/kimi-k2.6"]
48
+ },
49
+ "medium": {
50
+ "model": "opencode-go/qwen3.6-plus",
51
+ "thinking": "medium",
52
+ "fallbacks": ["opencode-go/deepseek-v4-pro"]
53
+ },
54
+ "low": {
55
+ "model": "opencode-go/deepseek-v4-flash",
56
+ "thinking": "low",
57
+ "fallbacks": ["opencode-go/qwen3.5-plus"]
58
+ }
59
+ },
60
+ "cheap": {
61
+ "high": {
62
+ "model": "opencode-go/qwen3.6-plus",
63
+ "thinking": "low",
64
+ "fallbacks": ["opencode-go/qwen3.5-plus"]
65
+ },
66
+ "medium": {
67
+ "model": "opencode-go/qwen3.5-plus",
68
+ "thinking": "off",
69
+ "fallbacks": ["opencode-go/deepseek-v4-flash"]
70
+ },
71
+ "low": {
72
+ "model": "opencode-go/deepseek-v4-flash",
73
+ "thinking": "off",
74
+ "fallbacks": ["opencode-go/qwen3.5-plus"]
75
+ }
76
+ },
77
+ "deep": {
78
+ "high": {
79
+ "model": "opencode-go/deepseek-v4-pro",
80
+ "thinking": "xhigh",
81
+ "fallbacks": ["opencode-go/kimi-k2.6"]
82
+ },
83
+ "medium": {
84
+ "model": "opencode-go/kimi-k2.6",
85
+ "thinking": "medium",
86
+ "fallbacks": ["opencode-go/deepseek-v4-pro"]
87
+ },
88
+ "low": {
89
+ "model": "opencode-go/qwen3.6-plus",
90
+ "thinking": "low",
91
+ "fallbacks": ["opencode-go/deepseek-v4-flash"]
92
+ }
93
+ }
94
+ }
95
+ }
96
+ }
@@ -0,0 +1,2 @@
1
+ {"timestamp":"2026-05-14T15:51:31.965Z","type":"run_start","run_id":"019e272f-3eef-7107-9712-ce281de55707-1778773891854","plan_id":"plan-unknown","phase":"plan"}
2
+ {"timestamp":"2026-05-14T15:51:38.346Z","type":"run_end","run_id":"019e272f-3eef-7107-9712-ce281de55707-1778773891854","phase":"plan","tool_span_count":0,"artifact_ref_count":0}
@@ -0,0 +1,17 @@
1
+ {
2
+ "schema_version": "1.0.0",
3
+ "contract_version": "1.0.0",
4
+ "run_id": "019e272f-3eef-7107-9712-ce281de55707-1778773891854",
5
+ "plan_id": "plan-unknown",
6
+ "agent_id": "019e272f-3eef-7107-9712-ce281de55707",
7
+ "phase": "plan",
8
+ "model": "auto",
9
+ "thinking_level": "off",
10
+ "tool_spans": [],
11
+ "artifact_refs": [],
12
+ "cost": {
13
+ "input_tokens": 15381,
14
+ "output_tokens": 33,
15
+ "total_tokens": 15414
16
+ }
17
+ }
@@ -0,0 +1,2 @@
1
+ {"timestamp":"2026-05-14T15:51:52.062Z","type":"run_start","run_id":"019e272f-3eef-7107-9712-ce281de55707-1778773912057","plan_id":"plan-unknown","phase":"plan"}
2
+ {"timestamp":"2026-05-14T15:52:14.313Z","type":"run_end","run_id":"019e272f-3eef-7107-9712-ce281de55707-1778773912057","phase":"plan","tool_span_count":0,"artifact_ref_count":0}
@@ -0,0 +1,17 @@
1
+ {
2
+ "schema_version": "1.0.0",
3
+ "contract_version": "1.0.0",
4
+ "run_id": "019e272f-3eef-7107-9712-ce281de55707-1778773912057",
5
+ "plan_id": "plan-unknown",
6
+ "agent_id": "019e272f-3eef-7107-9712-ce281de55707",
7
+ "phase": "plan",
8
+ "model": "auto",
9
+ "thinking_level": "off",
10
+ "tool_spans": [],
11
+ "artifact_refs": [],
12
+ "cost": {
13
+ "input_tokens": 31337,
14
+ "output_tokens": 528,
15
+ "total_tokens": 31865
16
+ }
17
+ }
@@ -0,0 +1,6 @@
1
+ {"timestamp":"2026-05-14T15:54:46.136Z","type":"run_start","run_id":"019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096","plan_id":"plan-unknown","phase":"plan"}
2
+ {"timestamp":"2026-05-14T15:54:59.110Z","type":"tool_start","run_id":"019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096","tool_call_id":"call_00_7UHDcydTHJHVR2dT5xpb0903","tool_name":"bash"}
3
+ {"timestamp":"2026-05-14T15:54:59.137Z","type":"tool_start","run_id":"019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096","tool_call_id":"call_01_aNsry1whTl5hRf5Ew91t3142","tool_name":"bash"}
4
+ {"timestamp":"2026-05-14T15:54:59.139Z","type":"tool_start","run_id":"019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096","tool_call_id":"call_02_N2e56Q6vKr6cAYzd4Z9q7953","tool_name":"bash"}
5
+ {"timestamp":"2026-05-14T15:55:11.546Z","type":"tool_start","run_id":"019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096","tool_call_id":"call_00_wG71Rv3SKrf6R9K03EeS0264","tool_name":"ctx_batch_execute"}
6
+ {"timestamp":"2026-05-14T15:55:25.167Z","type":"run_end","run_id":"019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096","phase":"plan","tool_span_count":4,"artifact_ref_count":0}