ultimate-pi 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.pi/extensions/custom-header.ts +26 -2
- package/.pi/extensions/lib/harness-paths.ts +55 -0
- package/.pi/extensions/model-router-bootstrap.ts +174 -0
- package/.pi/extensions/sentrux-rules-sync.ts +28 -3
- package/.pi/harness/browser.json +5 -0
- package/.pi/harness/debates/README.md +9 -0
- package/.pi/harness/docs/adrs/0006-sentrux-dual-layer.md +1 -1
- package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +2 -2
- package/.pi/harness/incidents/README.md +6 -0
- package/.pi/harness/release-readiness-report.md +128 -0
- package/.pi/harness/router/proposals/canary-proposal.json +96 -0
- package/.pi/harness/runs/019e272f-3eef-7107-9712-ce281de55707-1778773891854/events.jsonl +2 -0
- package/.pi/harness/runs/019e272f-3eef-7107-9712-ce281de55707-1778773891854/trace.json +17 -0
- package/.pi/harness/runs/019e272f-3eef-7107-9712-ce281de55707-1778773912057/events.jsonl +2 -0
- package/.pi/harness/runs/019e272f-3eef-7107-9712-ce281de55707-1778773912057/trace.json +17 -0
- package/.pi/harness/runs/019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096/events.jsonl +6 -0
- package/.pi/harness/runs/019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096/trace.json +42 -0
- package/.pi/harness/runs/019e2732-8651-74e5-9f5d-4d06c3105f25-1778774136101/events.jsonl +1 -0
- package/.pi/harness/runs/019e2758-b332-771b-ad6f-54d0d8478768-1778776600591/events.jsonl +2 -0
- package/.pi/harness/runs/019e2758-b332-771b-ad6f-54d0d8478768-1778776600591/trace.json +17 -0
- package/.pi/harness/runs/README.md +6 -0
- package/.pi/harness/runs/budget-events.jsonl +4 -0
- package/.pi/harness/runs/canary-candidate-router.json +72 -0
- package/.pi/harness/runs/canary-evidence.json +9 -0
- package/.pi/harness/runs/index.jsonl +4 -0
- package/.pi/harness/sentrux/architecture.manifest.json +3 -3
- package/.pi/model-router.example.json +27 -0
- package/.pi/prompts/graphify.md +4 -8
- package/.pi/prompts/harness-setup.md +142 -92
- package/.pi/prompts/release.md +225 -0
- package/.pi/scripts/README.md +17 -0
- package/.pi/scripts/harness-cli-verify.sh +294 -0
- package/.pi/scripts/harness-graphify-bootstrap.sh +151 -0
- package/{scripts → .pi/scripts}/harness-verify.mjs +3 -3
- package/{scripts → .pi/scripts}/sentrux-rules-sync.mjs +2 -2
- package/.pi/settings.json +0 -2
- package/.sentrux/.harness-rules-meta.json +2 -2
- package/.sentrux/rules.toml +3 -3
- package/AGENTS.md +12 -0
- package/CHANGELOG.md +21 -0
- package/README.md +39 -350
- package/firecrawl/.env +53 -0
- package/package.json +16 -4
- package/.ckignore +0 -41
- package/.env.example +0 -21
- package/.gitattributes +0 -1
- package/.github/banner-v2.png +0 -0
- package/.github/workflows/lint.yml +0 -33
- package/.github/workflows/publish-github-packages.yml +0 -35
- package/.github/workflows/publish-npm.yml +0 -32
- package/CONTRIBUTING.md +0 -166
- package/lefthook.yml +0 -9
- package/scripts/__pycache__/merge_graphify_corpora.cpython-314.pyc +0 -0
- package/scripts/index_youtube_urls.py +0 -376
- package/scripts/merge_graphify_corpora.py +0 -398
- package/scripts/regen_graphify_html.py +0 -46
- package/test/harness-verify.test.mjs +0 -33
|
@@ -6,12 +6,19 @@
|
|
|
6
6
|
* doubling vertical resolution in the same terminal footprint.
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
|
-
import { join } from "node:path";
|
|
10
9
|
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
11
10
|
import { truncateToWidth } from "@mariozechner/pi-tui";
|
|
12
11
|
import * as JimpModule from "jimp";
|
|
12
|
+
import { resolveHarnessAsset } from "./lib/harness-paths.js";
|
|
13
13
|
|
|
14
|
-
|
|
14
|
+
/** Shipped next to this extension in the npm package — not the host project's .pi dir. */
|
|
15
|
+
const imagePath = resolveHarnessAsset(
|
|
16
|
+
// @ts-expect-error pi extensions run as ESM
|
|
17
|
+
import.meta.url,
|
|
18
|
+
".pi",
|
|
19
|
+
"extensions",
|
|
20
|
+
"banner.png",
|
|
21
|
+
);
|
|
15
22
|
|
|
16
23
|
// Terminal footprint — keep a safety margin so we never crash on narrow terminals
|
|
17
24
|
const SAFETY_MARGIN = 2;
|
|
@@ -83,6 +90,23 @@ function ansiCell(
|
|
|
83
90
|
}
|
|
84
91
|
|
|
85
92
|
async function loadBanner(): Promise<string[]> {
|
|
93
|
+
// #region agent log
|
|
94
|
+
fetch("http://127.0.0.1:7928/ingest/a5d40896-34cb-4f12-97db-df7ada0b22f0", {
|
|
95
|
+
method: "POST",
|
|
96
|
+
headers: {
|
|
97
|
+
"Content-Type": "application/json",
|
|
98
|
+
"X-Debug-Session-Id": "7737a8",
|
|
99
|
+
},
|
|
100
|
+
body: JSON.stringify({
|
|
101
|
+
sessionId: "7737a8",
|
|
102
|
+
hypothesisId: "B",
|
|
103
|
+
location: "custom-header.ts:loadBanner",
|
|
104
|
+
message: "banner path",
|
|
105
|
+
data: { imagePath, cwd: process.cwd() },
|
|
106
|
+
timestamp: Date.now(),
|
|
107
|
+
}),
|
|
108
|
+
}).catch(() => {});
|
|
109
|
+
// #endregion
|
|
86
110
|
const Jimp = getJimpRuntime();
|
|
87
111
|
const image = await Jimp.read(imagePath);
|
|
88
112
|
resizeImageCompat(image, PIXEL_WIDTH, PIXEL_HEIGHT);
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
2
|
+
import { dirname, join } from "node:path";
|
|
3
|
+
import { fileURLToPath } from "node:url";
|
|
4
|
+
|
|
5
|
+
const rootByModuleUrl = new Map<string, string>();
|
|
6
|
+
|
|
7
|
+
/** Resolve ultimate-pi package root from the calling extension module URL. */
|
|
8
|
+
export function getHarnessPackageRoot(moduleUrl: string): string {
|
|
9
|
+
const cached = rootByModuleUrl.get(moduleUrl);
|
|
10
|
+
if (cached) {
|
|
11
|
+
return cached;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
let dir = dirname(fileURLToPath(moduleUrl));
|
|
15
|
+
for (let depth = 0; depth < 8; depth++) {
|
|
16
|
+
const pkgPath = join(dir, "package.json");
|
|
17
|
+
if (existsSync(pkgPath)) {
|
|
18
|
+
try {
|
|
19
|
+
const pkg = JSON.parse(readFileSync(pkgPath, "utf-8")) as {
|
|
20
|
+
name?: string;
|
|
21
|
+
};
|
|
22
|
+
if (pkg.name === "ultimate-pi") {
|
|
23
|
+
rootByModuleUrl.set(moduleUrl, dir);
|
|
24
|
+
return dir;
|
|
25
|
+
}
|
|
26
|
+
} catch {
|
|
27
|
+
/* try parent */
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
const parent = dirname(dir);
|
|
31
|
+
if (parent === dir) {
|
|
32
|
+
break;
|
|
33
|
+
}
|
|
34
|
+
dir = parent;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const fallback = join(dirname(fileURLToPath(moduleUrl)), "..", "..");
|
|
38
|
+
rootByModuleUrl.set(moduleUrl, fallback);
|
|
39
|
+
return fallback;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function resolveHarnessAsset(
|
|
43
|
+
moduleUrl: string,
|
|
44
|
+
...segments: string[]
|
|
45
|
+
): string {
|
|
46
|
+
return join(getHarnessPackageRoot(moduleUrl), ...segments);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/** Harness CLI scripts shipped under `.pi/scripts/` in the npm package. */
|
|
50
|
+
export function resolveHarnessScript(
|
|
51
|
+
moduleUrl: string,
|
|
52
|
+
scriptName: string,
|
|
53
|
+
): string {
|
|
54
|
+
return resolveHarnessAsset(moduleUrl, ".pi", "scripts", scriptName);
|
|
55
|
+
}
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Ensures .pi/model-router.json exists before pi-model-router reads config at
|
|
3
|
+
* extension init (which otherwise falls back to openai/gpt-5.4-pro).
|
|
4
|
+
*
|
|
5
|
+
* Runs synchronously in the extension factory so dotenv-loader can run first
|
|
6
|
+
* (alphabetically: dotenv-loader < model-router-bootstrap < sentrux / router pkg).
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
10
|
+
import { join } from "node:path";
|
|
11
|
+
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
12
|
+
|
|
13
|
+
const ROUTER_PATH = ".pi/model-router.json";
|
|
14
|
+
|
|
15
|
+
function model(prefix: string, name: string): string {
|
|
16
|
+
return `${prefix}/${name}`;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function buildRouterConfig(): Record<string, unknown> | null {
|
|
20
|
+
const hasOpenCode = process.env.OPENAI_API_BASE?.includes("opencode.ai");
|
|
21
|
+
const hasOpenAI = !!process.env.OPENAI_API_KEY;
|
|
22
|
+
const hasAnthropic = !!process.env.ANTHROPIC_API_KEY;
|
|
23
|
+
const hasGoogle = !!process.env.GOOGLE_API_KEY;
|
|
24
|
+
|
|
25
|
+
if (!hasOpenCode && !hasOpenAI && !hasAnthropic && !hasGoogle) {
|
|
26
|
+
return null;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
const highModel = hasOpenCode
|
|
30
|
+
? model("opencode-go", "deepseek-v4-pro")
|
|
31
|
+
: hasAnthropic
|
|
32
|
+
? "anthropic/claude-sonnet-4-20250514"
|
|
33
|
+
: hasGoogle
|
|
34
|
+
? "google/gemini-2.5-flash-001"
|
|
35
|
+
: hasOpenAI
|
|
36
|
+
? model("openai", "gpt-4o")
|
|
37
|
+
: null;
|
|
38
|
+
|
|
39
|
+
const mediumModel = hasOpenCode
|
|
40
|
+
? model("opencode-go", "qwen3.6-plus")
|
|
41
|
+
: hasAnthropic
|
|
42
|
+
? "anthropic/claude-sonnet-4-20250514"
|
|
43
|
+
: hasGoogle
|
|
44
|
+
? "google/gemini-flash-latest"
|
|
45
|
+
: hasOpenAI
|
|
46
|
+
? model("openai", "gpt-4o-mini")
|
|
47
|
+
: null;
|
|
48
|
+
|
|
49
|
+
const lowModel = hasOpenCode
|
|
50
|
+
? model("opencode-go", "deepseek-v4-flash")
|
|
51
|
+
: hasAnthropic
|
|
52
|
+
? "anthropic/claude-3-5-haiku-20241022"
|
|
53
|
+
: hasGoogle
|
|
54
|
+
? "google/gemini-flash-lite-latest"
|
|
55
|
+
: hasOpenAI
|
|
56
|
+
? model("openai", "gpt-4o-mini")
|
|
57
|
+
: null;
|
|
58
|
+
|
|
59
|
+
if (!highModel || !mediumModel || !lowModel) {
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const fallbacks: string[] = [];
|
|
64
|
+
if (hasAnthropic && !highModel.startsWith("anthropic/")) {
|
|
65
|
+
fallbacks.push("anthropic/claude-sonnet-4-20250514");
|
|
66
|
+
}
|
|
67
|
+
if (hasGoogle && !highModel.startsWith("google/")) {
|
|
68
|
+
fallbacks.push("google/gemini-flash-latest");
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return {
|
|
72
|
+
defaultProfile: "auto",
|
|
73
|
+
debug: false,
|
|
74
|
+
classifierModel: mediumModel,
|
|
75
|
+
phaseBias: 0.5,
|
|
76
|
+
maxSessionBudget: 1.0,
|
|
77
|
+
largeContextThreshold: 100000,
|
|
78
|
+
rules: [
|
|
79
|
+
{
|
|
80
|
+
matches: ["deploy", "production", "release"],
|
|
81
|
+
tier: "high",
|
|
82
|
+
reason: "Safety check for production tasks",
|
|
83
|
+
},
|
|
84
|
+
{ matches: "changelog", tier: "low" },
|
|
85
|
+
],
|
|
86
|
+
profiles: {
|
|
87
|
+
auto: {
|
|
88
|
+
high: { model: highModel, thinking: "high", fallbacks },
|
|
89
|
+
medium: { model: mediumModel, thinking: "medium" },
|
|
90
|
+
low: { model: lowModel, thinking: "low" },
|
|
91
|
+
},
|
|
92
|
+
cheap: {
|
|
93
|
+
high: { model: mediumModel, thinking: "low" },
|
|
94
|
+
medium: { model: lowModel, thinking: "off" },
|
|
95
|
+
low: { model: lowModel, thinking: "off" },
|
|
96
|
+
},
|
|
97
|
+
deep: {
|
|
98
|
+
high: { model: highModel, thinking: "xhigh", fallbacks },
|
|
99
|
+
medium: { model: mediumModel, thinking: "medium" },
|
|
100
|
+
low: { model: lowModel, thinking: "low" },
|
|
101
|
+
},
|
|
102
|
+
},
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function ensureModelRouterConfig(cwd: string): boolean {
|
|
107
|
+
const projectPath = join(cwd, ROUTER_PATH);
|
|
108
|
+
// #region agent log
|
|
109
|
+
fetch("http://127.0.0.1:7928/ingest/a5d40896-34cb-4f12-97db-df7ada0b22f0", {
|
|
110
|
+
method: "POST",
|
|
111
|
+
headers: {
|
|
112
|
+
"Content-Type": "application/json",
|
|
113
|
+
"X-Debug-Session-Id": "7737a8",
|
|
114
|
+
},
|
|
115
|
+
body: JSON.stringify({
|
|
116
|
+
sessionId: "7737a8",
|
|
117
|
+
hypothesisId: "A",
|
|
118
|
+
location: "model-router-bootstrap.ts:ensure",
|
|
119
|
+
message: "router bootstrap check",
|
|
120
|
+
data: {
|
|
121
|
+
projectPath,
|
|
122
|
+
exists: existsSync(projectPath),
|
|
123
|
+
hasOpenCode: !!process.env.OPENAI_API_BASE?.includes("opencode.ai"),
|
|
124
|
+
hasOpenAI: !!process.env.OPENAI_API_KEY,
|
|
125
|
+
},
|
|
126
|
+
timestamp: Date.now(),
|
|
127
|
+
}),
|
|
128
|
+
}).catch(() => {});
|
|
129
|
+
// #endregion
|
|
130
|
+
|
|
131
|
+
if (existsSync(projectPath)) {
|
|
132
|
+
return false;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
const config = buildRouterConfig();
|
|
136
|
+
if (!config) {
|
|
137
|
+
return false;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
mkdirSync(join(cwd, ".pi"), { recursive: true });
|
|
141
|
+
writeFileSync(projectPath, `${JSON.stringify(config, null, 2)}\n`);
|
|
142
|
+
|
|
143
|
+
// #region agent log
|
|
144
|
+
fetch("http://127.0.0.1:7928/ingest/a5d40896-34cb-4f12-97db-df7ada0b22f0", {
|
|
145
|
+
method: "POST",
|
|
146
|
+
headers: {
|
|
147
|
+
"Content-Type": "application/json",
|
|
148
|
+
"X-Debug-Session-Id": "7737a8",
|
|
149
|
+
},
|
|
150
|
+
body: JSON.stringify({
|
|
151
|
+
sessionId: "7737a8",
|
|
152
|
+
hypothesisId: "A",
|
|
153
|
+
location: "model-router-bootstrap.ts:write",
|
|
154
|
+
message: "wrote model-router.json",
|
|
155
|
+
data: {
|
|
156
|
+
high: (config.profiles as { auto: { high: { model: string } } }).auto
|
|
157
|
+
.high.model,
|
|
158
|
+
},
|
|
159
|
+
timestamp: Date.now(),
|
|
160
|
+
}),
|
|
161
|
+
}).catch(() => {});
|
|
162
|
+
// #endregion
|
|
163
|
+
|
|
164
|
+
return true;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
export default function modelRouterBootstrap(_pi: ExtensionAPI) {
|
|
168
|
+
const wrote = ensureModelRouterConfig(process.cwd());
|
|
169
|
+
if (wrote) {
|
|
170
|
+
console.warn(
|
|
171
|
+
"[ultimate-pi] Created .pi/model-router.json from detected providers (avoids gpt-5.4-pro fallback). Run /reload if router was already loaded.",
|
|
172
|
+
);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
@@ -3,14 +3,39 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
import { spawn } from "node:child_process";
|
|
6
|
-
import {
|
|
6
|
+
import { existsSync } from "node:fs";
|
|
7
7
|
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
8
|
+
import { resolveHarnessScript } from "./lib/harness-paths.js";
|
|
8
9
|
|
|
9
|
-
|
|
10
|
+
function resolveSyncScript(): string {
|
|
11
|
+
return resolveHarnessScript(
|
|
12
|
+
// @ts-expect-error pi extensions run as ESM
|
|
13
|
+
import.meta.url,
|
|
14
|
+
"sentrux-rules-sync.mjs",
|
|
15
|
+
);
|
|
16
|
+
}
|
|
10
17
|
|
|
11
18
|
function runSync(args: string[]): Promise<{ code: number; output: string }> {
|
|
19
|
+
const syncScript = resolveSyncScript();
|
|
20
|
+
// #region agent log
|
|
21
|
+
fetch("http://127.0.0.1:7928/ingest/a5d40896-34cb-4f12-97db-df7ada0b22f0", {
|
|
22
|
+
method: "POST",
|
|
23
|
+
headers: {
|
|
24
|
+
"Content-Type": "application/json",
|
|
25
|
+
"X-Debug-Session-Id": "7737a8",
|
|
26
|
+
},
|
|
27
|
+
body: JSON.stringify({
|
|
28
|
+
sessionId: "7737a8",
|
|
29
|
+
hypothesisId: "C",
|
|
30
|
+
location: "sentrux-rules-sync.ts:runSync",
|
|
31
|
+
message: "sync script path",
|
|
32
|
+
data: { syncScript, cwd: process.cwd(), exists: existsSync(syncScript) },
|
|
33
|
+
timestamp: Date.now(),
|
|
34
|
+
}),
|
|
35
|
+
}).catch(() => {});
|
|
36
|
+
// #endregion
|
|
12
37
|
return new Promise((resolve) => {
|
|
13
|
-
const child = spawn(process.execPath, [
|
|
38
|
+
const child = spawn(process.execPath, [syncScript, ...args], {
|
|
14
39
|
cwd: process.cwd(),
|
|
15
40
|
stdio: ["ignore", "pipe", "pipe"],
|
|
16
41
|
});
|
|
@@ -11,7 +11,7 @@ Sentrux enforces architecture via [`.sentrux/rules.toml`](https://sentrux.dev/do
|
|
|
11
11
|
|
|
12
12
|
1. **Canonical source:** [`.pi/harness/sentrux/architecture.manifest.json`](../../sentrux/architecture.manifest.json) — layers, boundaries, global constraints.
|
|
13
13
|
2. **Generated artifact:** `.sentrux/rules.toml` — committed to git; managed block between `harness:managed:start/end` markers.
|
|
14
|
-
3. **Sync command:** `npm run harness:sentrux-sync` (
|
|
14
|
+
3. **Sync command:** `npm run harness:sentrux-sync` (`.pi/scripts/sentrux-rules-sync.mjs`).
|
|
15
15
|
4. **Pi command:** `/harness-sentrux-sync` via `sentrux-rules-sync.ts` extension.
|
|
16
16
|
5. **When to sync:**
|
|
17
17
|
- `/harness-setup` Step 2.8 (after sentrux install)
|
|
@@ -34,5 +34,5 @@ Sentrux enforces architecture via [`.sentrux/rules.toml`](https://sentrux.dev/do
|
|
|
34
34
|
## References
|
|
35
35
|
|
|
36
36
|
- ADR 0006 (Sentrux dual layer)
|
|
37
|
-
-
|
|
37
|
+
- `.pi/scripts/sentrux-rules-sync.mjs`
|
|
38
38
|
- `.pi/extensions/sentrux-rules-sync.ts`
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# Release Readiness Report
|
|
2
|
+
|
|
3
|
+
Date: 2026-05-14
|
|
4
|
+
Repo root used: `/home/aryaniyaps/ai-projects/ultimate-pi` (active workspace root, treated as canonical)
|
|
5
|
+
|
|
6
|
+
## Requested remaining work
|
|
7
|
+
|
|
8
|
+
- `run-adversarial-canary-and-release`
|
|
9
|
+
- `final-prompt-expert-feature-sweep`
|
|
10
|
+
|
|
11
|
+
Plan file was not modified.
|
|
12
|
+
|
|
13
|
+
## Final integration checks
|
|
14
|
+
|
|
15
|
+
### 1) TypeScript compile check
|
|
16
|
+
|
|
17
|
+
- Command: `npm run check:ts`
|
|
18
|
+
- Result: PASS
|
|
19
|
+
|
|
20
|
+
### 2) Full lint/format/test gate
|
|
21
|
+
|
|
22
|
+
- Command: `npm run check:ts && npm run lint && npm run format:check && npm test`
|
|
23
|
+
- Result: FAIL (expected in current tree state)
|
|
24
|
+
- Notes:
|
|
25
|
+
- `biome check` reports existing lint/format issues (including `.pi/extensions/custom-footer.ts` and multiple `.pi/harness/specs/*.json` files).
|
|
26
|
+
- `npm test` fails before test execution due Node runtime flag incompatibility:
|
|
27
|
+
- `node: bad option: --experimental-strip-types`
|
|
28
|
+
|
|
29
|
+
### 3) Release preflight checks
|
|
30
|
+
|
|
31
|
+
- Command: `git rev-parse --is-inside-work-tree && git remote -v && git symbolic-ref -q HEAD && (git diff --quiet && git diff --cached --quiet && echo CLEAN || echo DIRTY)`
|
|
32
|
+
- Result:
|
|
33
|
+
- inside git repo: yes
|
|
34
|
+
- branch: `refs/heads/main`
|
|
35
|
+
- remote `origin`: configured
|
|
36
|
+
- tree cleanliness: `DIRTY` (release/tag push should stay blocked until clean)
|
|
37
|
+
|
|
38
|
+
## Targeted canary validations
|
|
39
|
+
|
|
40
|
+
### 1) Prompt and policy canary assertions
|
|
41
|
+
|
|
42
|
+
- Static canary suite executed against:
|
|
43
|
+
- harness prompt templates
|
|
44
|
+
- `policy-gate`
|
|
45
|
+
- `test-diff-integrity`
|
|
46
|
+
- `debate-orchestrator`
|
|
47
|
+
- Result: PASS after prompt sweep updates
|
|
48
|
+
- locked clauses in `harness-auto` preserved
|
|
49
|
+
- prompt argument parsing + usage surfaces present across harness prompts
|
|
50
|
+
- completion behavior sections present for operator-facing harness prompts
|
|
51
|
+
- policy/test/debate lock signals present in extension code
|
|
52
|
+
|
|
53
|
+
### 2) Router tuning canary (proposal-only)
|
|
54
|
+
|
|
55
|
+
- Created synthetic canary evidence:
|
|
56
|
+
- `.pi/harness/runs/canary-evidence.json`
|
|
57
|
+
- Candidate router for dry proposal:
|
|
58
|
+
- `.pi/harness/runs/canary-candidate-router.json`
|
|
59
|
+
- Command:
|
|
60
|
+
- `node .pi/harness/router/propose-router-tuning.mjs --evidence ... --candidate ... --proposal-out .pi/harness/router/proposals/canary-proposal.json`
|
|
61
|
+
- Result: PASS (proposal created, no live router write)
|
|
62
|
+
|
|
63
|
+
### 3) Harness schema parse check
|
|
64
|
+
|
|
65
|
+
- Command: Node JSON parse validation across `.pi/harness/specs/*.json`
|
|
66
|
+
- Result: PASS (all 9 schema files parse successfully)
|
|
67
|
+
|
|
68
|
+
## Lightweight adversarial drills
|
|
69
|
+
|
|
70
|
+
### 1) Negative apply drill (guardrail validation)
|
|
71
|
+
|
|
72
|
+
- Command:
|
|
73
|
+
- `node .pi/harness/router/apply-router-proposal.mjs --proposal ... --approve-by ... --justification ...`
|
|
74
|
+
- intentionally omitted `--write`
|
|
75
|
+
- Result: PASS (guard correctly blocked apply)
|
|
76
|
+
- Expected error:
|
|
77
|
+
- `missing --write (blind writes and implicit applies are disallowed)`
|
|
78
|
+
|
|
79
|
+
### 2) Adversarial lock retention
|
|
80
|
+
|
|
81
|
+
- Verified locked governance semantics remain stated in `harness-auto`:
|
|
82
|
+
- adversarial review always required
|
|
83
|
+
- severity-policy-engine remains merge-block authority
|
|
84
|
+
- strict pre-PR gates mandatory
|
|
85
|
+
- never auto-merge
|
|
86
|
+
|
|
87
|
+
## Prompt expert feature sweep
|
|
88
|
+
|
|
89
|
+
Using guidance from `.pi/agents/pi-pi/prompt-expert.md`, harness prompt templates were refined for:
|
|
90
|
+
|
|
91
|
+
1. Argument handling:
|
|
92
|
+
- explicit `$ARGUMENTS` parse sections
|
|
93
|
+
- required/optional argument normalization
|
|
94
|
+
- deterministic usage fallback lines
|
|
95
|
+
2. Completion behavior:
|
|
96
|
+
- explicit terminal output contracts for predictable downstream handoff
|
|
97
|
+
3. UX consistency:
|
|
98
|
+
- harmonized command usage patterns and closure blocks across harness prompts
|
|
99
|
+
4. Policy integrity:
|
|
100
|
+
- locked policy constraints intentionally kept intact
|
|
101
|
+
|
|
102
|
+
## Files updated in this sweep
|
|
103
|
+
|
|
104
|
+
- `.pi/prompts/harness-auto.md`
|
|
105
|
+
- `.pi/prompts/harness-plan.md`
|
|
106
|
+
- `.pi/prompts/harness-run.md`
|
|
107
|
+
- `.pi/prompts/harness-review.md`
|
|
108
|
+
- `.pi/prompts/harness-critic.md`
|
|
109
|
+
- `.pi/prompts/harness-eval.md`
|
|
110
|
+
- `.pi/prompts/harness-trace.md`
|
|
111
|
+
- `.pi/prompts/harness-incident.md`
|
|
112
|
+
- `.pi/prompts/harness-router-tune.md`
|
|
113
|
+
- `.pi/prompts/harness-setup.md`
|
|
114
|
+
- `.pi/harness/release-readiness-report.md` (this report)
|
|
115
|
+
|
|
116
|
+
## New canary artifacts
|
|
117
|
+
|
|
118
|
+
- `.pi/harness/runs/canary-evidence.json`
|
|
119
|
+
- `.pi/harness/runs/canary-candidate-router.json`
|
|
120
|
+
- `.pi/harness/router/proposals/canary-proposal.json`
|
|
121
|
+
|
|
122
|
+
## Residual risks
|
|
123
|
+
|
|
124
|
+
1. Full repo lint/format gate currently fails due pre-existing issues unrelated to this sweep.
|
|
125
|
+
2. `npm test` is currently not runnable in this environment because the configured Node flag is unsupported.
|
|
126
|
+
3. Release flow should remain blocked until working tree is clean and CI-equivalent checks pass.
|
|
127
|
+
4. Router apply path was intentionally not executed with `--write` during this run (safety-preserving drill).
|
|
128
|
+
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schema_version": "1.0.0",
|
|
3
|
+
"proposal_id": "router-tune-2026-05-14T15-44-44-399Z",
|
|
4
|
+
"created_at": "2026-05-14T15:44:44.399Z",
|
|
5
|
+
"router_path": ".pi/model-router.json",
|
|
6
|
+
"base_router_sha256": "2a96fba517cc5b5147f37428d7ed62961b1968c0e83c0e69f02524265449856b",
|
|
7
|
+
"candidate_router_sha256": "2a96fba517cc5b5147f37428d7ed62961b1968c0e83c0e69f02524265449856b",
|
|
8
|
+
"evidence": {
|
|
9
|
+
"sample_count": 24,
|
|
10
|
+
"min_sample_count": 12,
|
|
11
|
+
"success_rate_delta": 0.08,
|
|
12
|
+
"cost_per_task_delta": -0.04,
|
|
13
|
+
"regression_guard_passed": true,
|
|
14
|
+
"trace_refs": ["run-canary-001", "run-canary-002"],
|
|
15
|
+
"notes": "canary validation synthetic evidence"
|
|
16
|
+
},
|
|
17
|
+
"status": "proposed",
|
|
18
|
+
"approval": {
|
|
19
|
+
"required": true,
|
|
20
|
+
"approved_by": null,
|
|
21
|
+
"approved_at": null,
|
|
22
|
+
"justification": null
|
|
23
|
+
},
|
|
24
|
+
"candidate_router": {
|
|
25
|
+
"defaultProfile": "auto",
|
|
26
|
+
"debug": false,
|
|
27
|
+
"classifierModel": "opencode-go/qwen3.6-plus",
|
|
28
|
+
"phaseBias": 0.5,
|
|
29
|
+
"maxSessionBudget": 1,
|
|
30
|
+
"largeContextThreshold": 100000,
|
|
31
|
+
"rules": [
|
|
32
|
+
{
|
|
33
|
+
"matches": ["deploy", "production", "release"],
|
|
34
|
+
"tier": "high",
|
|
35
|
+
"reason": "Safety check for production tasks"
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
"matches": "changelog",
|
|
39
|
+
"tier": "low"
|
|
40
|
+
}
|
|
41
|
+
],
|
|
42
|
+
"profiles": {
|
|
43
|
+
"auto": {
|
|
44
|
+
"high": {
|
|
45
|
+
"model": "opencode-go/deepseek-v4-pro",
|
|
46
|
+
"thinking": "high",
|
|
47
|
+
"fallbacks": ["opencode-go/qwen3.6-plus", "opencode-go/kimi-k2.6"]
|
|
48
|
+
},
|
|
49
|
+
"medium": {
|
|
50
|
+
"model": "opencode-go/qwen3.6-plus",
|
|
51
|
+
"thinking": "medium",
|
|
52
|
+
"fallbacks": ["opencode-go/deepseek-v4-pro"]
|
|
53
|
+
},
|
|
54
|
+
"low": {
|
|
55
|
+
"model": "opencode-go/deepseek-v4-flash",
|
|
56
|
+
"thinking": "low",
|
|
57
|
+
"fallbacks": ["opencode-go/qwen3.5-plus"]
|
|
58
|
+
}
|
|
59
|
+
},
|
|
60
|
+
"cheap": {
|
|
61
|
+
"high": {
|
|
62
|
+
"model": "opencode-go/qwen3.6-plus",
|
|
63
|
+
"thinking": "low",
|
|
64
|
+
"fallbacks": ["opencode-go/qwen3.5-plus"]
|
|
65
|
+
},
|
|
66
|
+
"medium": {
|
|
67
|
+
"model": "opencode-go/qwen3.5-plus",
|
|
68
|
+
"thinking": "off",
|
|
69
|
+
"fallbacks": ["opencode-go/deepseek-v4-flash"]
|
|
70
|
+
},
|
|
71
|
+
"low": {
|
|
72
|
+
"model": "opencode-go/deepseek-v4-flash",
|
|
73
|
+
"thinking": "off",
|
|
74
|
+
"fallbacks": ["opencode-go/qwen3.5-plus"]
|
|
75
|
+
}
|
|
76
|
+
},
|
|
77
|
+
"deep": {
|
|
78
|
+
"high": {
|
|
79
|
+
"model": "opencode-go/deepseek-v4-pro",
|
|
80
|
+
"thinking": "xhigh",
|
|
81
|
+
"fallbacks": ["opencode-go/kimi-k2.6"]
|
|
82
|
+
},
|
|
83
|
+
"medium": {
|
|
84
|
+
"model": "opencode-go/kimi-k2.6",
|
|
85
|
+
"thinking": "medium",
|
|
86
|
+
"fallbacks": ["opencode-go/deepseek-v4-pro"]
|
|
87
|
+
},
|
|
88
|
+
"low": {
|
|
89
|
+
"model": "opencode-go/qwen3.6-plus",
|
|
90
|
+
"thinking": "low",
|
|
91
|
+
"fallbacks": ["opencode-go/deepseek-v4-flash"]
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
{"timestamp":"2026-05-14T15:51:31.965Z","type":"run_start","run_id":"019e272f-3eef-7107-9712-ce281de55707-1778773891854","plan_id":"plan-unknown","phase":"plan"}
|
|
2
|
+
{"timestamp":"2026-05-14T15:51:38.346Z","type":"run_end","run_id":"019e272f-3eef-7107-9712-ce281de55707-1778773891854","phase":"plan","tool_span_count":0,"artifact_ref_count":0}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schema_version": "1.0.0",
|
|
3
|
+
"contract_version": "1.0.0",
|
|
4
|
+
"run_id": "019e272f-3eef-7107-9712-ce281de55707-1778773891854",
|
|
5
|
+
"plan_id": "plan-unknown",
|
|
6
|
+
"agent_id": "019e272f-3eef-7107-9712-ce281de55707",
|
|
7
|
+
"phase": "plan",
|
|
8
|
+
"model": "auto",
|
|
9
|
+
"thinking_level": "off",
|
|
10
|
+
"tool_spans": [],
|
|
11
|
+
"artifact_refs": [],
|
|
12
|
+
"cost": {
|
|
13
|
+
"input_tokens": 15381,
|
|
14
|
+
"output_tokens": 33,
|
|
15
|
+
"total_tokens": 15414
|
|
16
|
+
}
|
|
17
|
+
}
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
{"timestamp":"2026-05-14T15:51:52.062Z","type":"run_start","run_id":"019e272f-3eef-7107-9712-ce281de55707-1778773912057","plan_id":"plan-unknown","phase":"plan"}
|
|
2
|
+
{"timestamp":"2026-05-14T15:52:14.313Z","type":"run_end","run_id":"019e272f-3eef-7107-9712-ce281de55707-1778773912057","phase":"plan","tool_span_count":0,"artifact_ref_count":0}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schema_version": "1.0.0",
|
|
3
|
+
"contract_version": "1.0.0",
|
|
4
|
+
"run_id": "019e272f-3eef-7107-9712-ce281de55707-1778773912057",
|
|
5
|
+
"plan_id": "plan-unknown",
|
|
6
|
+
"agent_id": "019e272f-3eef-7107-9712-ce281de55707",
|
|
7
|
+
"phase": "plan",
|
|
8
|
+
"model": "auto",
|
|
9
|
+
"thinking_level": "off",
|
|
10
|
+
"tool_spans": [],
|
|
11
|
+
"artifact_refs": [],
|
|
12
|
+
"cost": {
|
|
13
|
+
"input_tokens": 31337,
|
|
14
|
+
"output_tokens": 528,
|
|
15
|
+
"total_tokens": 31865
|
|
16
|
+
}
|
|
17
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
{"timestamp":"2026-05-14T15:54:46.136Z","type":"run_start","run_id":"019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096","plan_id":"plan-unknown","phase":"plan"}
|
|
2
|
+
{"timestamp":"2026-05-14T15:54:59.110Z","type":"tool_start","run_id":"019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096","tool_call_id":"call_00_7UHDcydTHJHVR2dT5xpb0903","tool_name":"bash"}
|
|
3
|
+
{"timestamp":"2026-05-14T15:54:59.137Z","type":"tool_start","run_id":"019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096","tool_call_id":"call_01_aNsry1whTl5hRf5Ew91t3142","tool_name":"bash"}
|
|
4
|
+
{"timestamp":"2026-05-14T15:54:59.139Z","type":"tool_start","run_id":"019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096","tool_call_id":"call_02_N2e56Q6vKr6cAYzd4Z9q7953","tool_name":"bash"}
|
|
5
|
+
{"timestamp":"2026-05-14T15:55:11.546Z","type":"tool_start","run_id":"019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096","tool_call_id":"call_00_wG71Rv3SKrf6R9K03EeS0264","tool_name":"ctx_batch_execute"}
|
|
6
|
+
{"timestamp":"2026-05-14T15:55:25.167Z","type":"run_end","run_id":"019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096","phase":"plan","tool_span_count":4,"artifact_ref_count":0}
|