@fiale-plus/pi-rogue 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/node_modules/@fiale-plus/pi-core/src/context-broker.ts +4 -0
- package/node_modules/@fiale-plus/pi-rogue-advisor/README.md +1 -0
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/binary-gate-features.test.ts +8 -0
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/binary-gate-features.ts +7 -0
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/router.test.ts +26 -0
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/router.ts +10 -1
- package/node_modules/@fiale-plus/pi-rogue-context-broker/README.md +20 -2
- package/node_modules/@fiale-plus/pi-rogue-context-broker/src/extension.test.ts +81 -3
- package/node_modules/@fiale-plus/pi-rogue-context-broker/src/extension.ts +72 -10
- package/node_modules/@fiale-plus/pi-rogue-context-broker/src/index.test.ts +32 -0
- package/node_modules/@fiale-plus/pi-rogue-context-broker/src/index.ts +32 -1
- package/node_modules/@fiale-plus/pi-rogue-context-broker/src/sqlite.test.ts +37 -0
- package/node_modules/@fiale-plus/pi-rogue-context-broker/src/sqlite.ts +39 -2
- package/node_modules/@fiale-plus/pi-rogue-orchestration/README.md +3 -3
- package/node_modules/@fiale-plus/pi-rogue-orchestration/package.json +3 -0
- package/node_modules/@fiale-plus/pi-rogue-orchestration/skills/orchestration/SKILL.md +3 -2
- package/node_modules/@fiale-plus/pi-rogue-orchestration/src/goal.test.ts +65 -2
- package/node_modules/@fiale-plus/pi-rogue-orchestration/src/goal.ts +84 -4
- package/node_modules/@fiale-plus/pi-rogue-orchestration/src/loop.ts +3 -0
- package/node_modules/@fiale-plus/pi-rogue-orchestration/src/novelty-guard.test.ts +43 -0
- package/node_modules/@fiale-plus/pi-rogue-orchestration/src/novelty-guard.ts +96 -11
- package/node_modules/@fiale-plus/pi-rogue-router/README.md +46 -5
- package/node_modules/@fiale-plus/pi-rogue-router/src/binary-gate.test.ts +88 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/binary-gate.ts +232 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/checkpoints.ts +9 -1
- package/node_modules/@fiale-plus/pi-rogue-router/src/cli.ts +123 -9
- package/node_modules/@fiale-plus/pi-rogue-router/src/completions.ts +39 -16
- package/node_modules/@fiale-plus/pi-rogue-router/src/config-extension.test.ts +145 -6
- package/node_modules/@fiale-plus/pi-rogue-router/src/config.ts +51 -11
- package/node_modules/@fiale-plus/pi-rogue-router/src/extension.ts +67 -7
- package/node_modules/@fiale-plus/pi-rogue-router/src/git-features.ts +27 -12
- package/node_modules/@fiale-plus/pi-rogue-router/src/index.ts +4 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/observe.ts +87 -9
- package/node_modules/@fiale-plus/pi-rogue-router/src/outcomes.ts +130 -6
- package/node_modules/@fiale-plus/pi-rogue-router/src/reports.test.ts +92 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/reports.ts +116 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/sharpening.test.ts +223 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/sharpening.ts +344 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/teacher-runner.test.ts +126 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/teacher-runner.ts +238 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/v1-telemetry.test.ts +59 -2
- package/package.json +1 -1
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import { mkdtempSync, readFileSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { tmpdir } from "node:os";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { describe, expect, it } from "vitest";
|
|
5
|
+
import { writeRouterReport } from "./reports.js";
|
|
6
|
+
|
|
7
|
+
function tempFile(name: string): string {
|
|
8
|
+
return join(mkdtempSync(join(tmpdir(), "pi-router-report-")), name);
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
const rawRef = { schema: "pi-router.raw-session-ref.v1", path: "/tmp/session.jsonl", fromEvent: 0, toEvent: 1, fromByte: 0, toByte: 1, contentHash: "hash" };
|
|
12
|
+
|
|
13
|
+
function event(action = "run_verifier") {
|
|
14
|
+
return {
|
|
15
|
+
schema: "pi-router.route-event.v1",
|
|
16
|
+
eventId: "event-1",
|
|
17
|
+
recordedAt: "2026-06-14T00:00:00.000Z",
|
|
18
|
+
checkpointId: "checkpoint-1",
|
|
19
|
+
sessionId: "session-1",
|
|
20
|
+
rawSessionRef: rawRef,
|
|
21
|
+
sourceEvent: { index: 0, timestamp: null },
|
|
22
|
+
decision: { schema: "pi-router.route-decision.v1", decisionId: "decision-1", checkpointId: "checkpoint-1", action, reason: "test", confidence: 0.5, policyVersion: "test", alternatives: [] },
|
|
23
|
+
runtime: { activeModel: "qwen", provider: "local", contextTokensApprox: 1000, gitDirty: true },
|
|
24
|
+
observed: { followed: false, overriddenBy: "continue_current" },
|
|
25
|
+
metrics: { loopScore: 0.2, progressScore: 0.8, sameCommandRepeatedCount: 1, sameErrorRepeatedCount: 0, verifierUsed: true, diffLines: 10, diffFilesChanged: 1 },
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function outcome() {
|
|
30
|
+
return {
|
|
31
|
+
schema: "pi-router.outcome.v1",
|
|
32
|
+
outcomeId: "outcome-1",
|
|
33
|
+
recordedAt: "2026-06-14T00:00:00.000Z",
|
|
34
|
+
sessionId: "session-1",
|
|
35
|
+
checkpointId: "checkpoint-1",
|
|
36
|
+
routeEventId: "event-1",
|
|
37
|
+
taskType: "implementation",
|
|
38
|
+
taskStatus: "success",
|
|
39
|
+
testsPassedAfter: true,
|
|
40
|
+
verifierImproved: true,
|
|
41
|
+
acceptedDiff: true,
|
|
42
|
+
userInterrupted: false,
|
|
43
|
+
userOverrodeDecision: true,
|
|
44
|
+
finalFilesTouched: 1,
|
|
45
|
+
finalDiffLines: 10,
|
|
46
|
+
wallTimeMs: null,
|
|
47
|
+
cloudCostUsd: null,
|
|
48
|
+
frontierCalls: 0,
|
|
49
|
+
localTurns: 2,
|
|
50
|
+
reworkTurns: 0,
|
|
51
|
+
evidence: { source: "manual", rawSessionRef: rawRef, routeEventId: "event-1", notesHash: "notes" },
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function trainingRow(label: "continue" | "intervene" | "unknown") {
|
|
56
|
+
return {
|
|
57
|
+
schema: "pi-router.training-row.v1",
|
|
58
|
+
checkpointId: `checkpoint-${label}`,
|
|
59
|
+
sessionId: "session-1",
|
|
60
|
+
rawSessionRef: rawRef,
|
|
61
|
+
features: { phase: "implementation", activeModel: "qwen", provider: "local", contextTokensApprox: 1000, sameCommandRepeatedCount: 1, sameErrorRepeatedCount: 0, loopScore: 0.1, progressScore: 0.9, verifierUsed: true, noVerifierUsed: false, diffLines: 10, diffFilesChanged: 1, diffChurnScore: 0.01, filesTouched: 1 },
|
|
62
|
+
labels: { routeAction: label === "unknown" ? null : "continue_current", binaryGate: label, source: label === "unknown" ? "unknown" : "teacher", confidence: label === "unknown" ? null : 0.8 },
|
|
63
|
+
outcome: { taskStatus: "unknown", testsPassedAfter: null, acceptedDiff: null, userOverrodeDecision: null, reworkTurns: null },
|
|
64
|
+
provenance: { localRuleAction: "continue_current", excludedLocalRuleAsTruth: label === "unknown" },
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
describe("router report", () => {
|
|
69
|
+
it("writes JSON and Markdown summaries", () => {
|
|
70
|
+
const eventsPath = tempFile("events.jsonl");
|
|
71
|
+
const outcomesPath = tempFile("outcomes.jsonl");
|
|
72
|
+
const rowsPath = tempFile("training.jsonl");
|
|
73
|
+
const gatePath = tempFile("gate-report.json");
|
|
74
|
+
const outputPath = tempFile("report.json");
|
|
75
|
+
const markdownPath = tempFile("report.md");
|
|
76
|
+
writeFileSync(eventsPath, `${JSON.stringify(event())}\n`);
|
|
77
|
+
writeFileSync(outcomesPath, `${JSON.stringify(outcome())}\n`);
|
|
78
|
+
writeFileSync(rowsPath, [JSON.stringify(trainingRow("continue")), JSON.stringify(trainingRow("unknown"))].join("\n") + "\n");
|
|
79
|
+
writeFileSync(gatePath, JSON.stringify({ schema: "pi-router.binary-gate-eval.v1", candidate: { accuracy: 0.8, f1: 0.7 }, ruleBaseline: { accuracy: 0.6, f1: 0.5 } }));
|
|
80
|
+
|
|
81
|
+
const report = writeRouterReport({ eventsPath, outcomesPath, trainingRowsPath: rowsPath, gateReportPath: gatePath, outputPath, markdownPath });
|
|
82
|
+
|
|
83
|
+
expect(report).toMatchObject({ schema: "pi-router.report.v1", routeEvents: { total: 1, mismatches: 1 }, outcomes: { total: 1, linked: 1 }, trainingRows: { total: 2, labeled: 1, localRuleExcluded: 1 } });
|
|
84
|
+
expect(JSON.parse(readFileSync(outputPath, "utf8")).schema).toBe("pi-router.report.v1");
|
|
85
|
+
expect(readFileSync(markdownPath, "utf8")).toContain("# Pi router report");
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
it("requires at least one report input and rejects missing provided inputs", () => {
|
|
89
|
+
expect(() => writeRouterReport({ outputPath: tempFile("report.json") })).toThrow(/requires at least one input/);
|
|
90
|
+
expect(() => writeRouterReport({ eventsPath: tempFile("missing-events.jsonl"), outputPath: tempFile("report.json") })).toThrow(/report input file not found/);
|
|
91
|
+
});
|
|
92
|
+
});
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { dirname, resolve } from "node:path";
|
|
3
|
+
import { readTrainingRows } from "./binary-gate.js";
|
|
4
|
+
import { readRouteEvents, type RouteEvent } from "./ledger.js";
|
|
5
|
+
import { readOutcomes, type RouterOutcome } from "./outcomes.js";
|
|
6
|
+
|
|
7
|
+
export const ROUTER_REPORT_SCHEMA = "pi-router.report.v1" as const;
|
|
8
|
+
|
|
9
|
+
export interface RouterReport {
|
|
10
|
+
schema: typeof ROUTER_REPORT_SCHEMA;
|
|
11
|
+
generatedAt: string;
|
|
12
|
+
inputs: { events?: string; outcomes?: string; gateReport?: string; trainingRows?: string };
|
|
13
|
+
routeEvents: { total: number; byAction: Record<string, number>; byModel: Record<string, number>; mismatches: number };
|
|
14
|
+
outcomes: { total: number; byStatus: Record<string, number>; linked: number; missingEvidence: number };
|
|
15
|
+
trainingRows: { total: number; labeled: number; unlabeled: number; localRuleExcluded: number; byGate: Record<string, number> };
|
|
16
|
+
gate?: unknown;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function increment(map: Record<string, number>, key: string): void {
|
|
20
|
+
map[key] = (map[key] ?? 0) + 1;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function routeSummary(events: RouteEvent[]): RouterReport["routeEvents"] {
|
|
24
|
+
const byAction: Record<string, number> = {};
|
|
25
|
+
const byModel: Record<string, number> = {};
|
|
26
|
+
let mismatches = 0;
|
|
27
|
+
for (const event of events) {
|
|
28
|
+
increment(byAction, event.decision.action);
|
|
29
|
+
increment(byModel, event.runtime.activeModel ?? "unknown");
|
|
30
|
+
if (event.observed.followed === false || event.observed.overriddenBy) mismatches++;
|
|
31
|
+
}
|
|
32
|
+
return { total: events.length, byAction, byModel, mismatches };
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function outcomeSummary(outcomes: RouterOutcome[]): RouterReport["outcomes"] {
|
|
36
|
+
const byStatus: Record<string, number> = {};
|
|
37
|
+
let linked = 0;
|
|
38
|
+
let missingEvidence = 0;
|
|
39
|
+
for (const outcome of outcomes) {
|
|
40
|
+
increment(byStatus, outcome.taskStatus);
|
|
41
|
+
if (outcome.routeEventId || outcome.checkpointId) linked++;
|
|
42
|
+
if (!outcome.evidence.rawSessionRef && !outcome.evidence.notesHash) missingEvidence++;
|
|
43
|
+
}
|
|
44
|
+
return { total: outcomes.length, byStatus, linked, missingEvidence };
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function trainingSummary(rowsPath?: string): RouterReport["trainingRows"] {
|
|
48
|
+
if (!rowsPath) return { total: 0, labeled: 0, unlabeled: 0, localRuleExcluded: 0, byGate: {} };
|
|
49
|
+
const rows = readTrainingRows(rowsPath);
|
|
50
|
+
const byGate: Record<string, number> = {};
|
|
51
|
+
let labeled = 0;
|
|
52
|
+
let localRuleExcluded = 0;
|
|
53
|
+
for (const row of rows) {
|
|
54
|
+
increment(byGate, row.labels.binaryGate);
|
|
55
|
+
if (row.labels.binaryGate === "unknown") localRuleExcluded += row.provenance.excludedLocalRuleAsTruth ? 1 : 0;
|
|
56
|
+
else labeled++;
|
|
57
|
+
}
|
|
58
|
+
return { total: rows.length, labeled, unlabeled: rows.length - labeled, localRuleExcluded, byGate };
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function readJson(path?: string): unknown {
|
|
62
|
+
if (!path) return undefined;
|
|
63
|
+
if (!existsSync(resolve(path))) throw new Error(`report input file not found: ${path}`);
|
|
64
|
+
return JSON.parse(readFileSync(resolve(path), "utf8"));
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function markdown(report: RouterReport): string {
|
|
68
|
+
const gate = report.gate && typeof report.gate === "object" ? report.gate as { candidate?: { accuracy?: number; f1?: number }; ruleBaseline?: { accuracy?: number; f1?: number } } : undefined;
|
|
69
|
+
const lines = [
|
|
70
|
+
"# Pi router report",
|
|
71
|
+
"",
|
|
72
|
+
`- generatedAt: ${report.generatedAt}`,
|
|
73
|
+
`- route events: ${report.routeEvents.total}`,
|
|
74
|
+
`- route mismatches/overrides: ${report.routeEvents.mismatches}`,
|
|
75
|
+
`- outcomes: ${report.outcomes.total}`,
|
|
76
|
+
`- training rows: ${report.trainingRows.total} (${report.trainingRows.labeled} labeled, ${report.trainingRows.unlabeled} unlabeled)`,
|
|
77
|
+
`- local-rule labels excluded: ${report.trainingRows.localRuleExcluded}`,
|
|
78
|
+
"",
|
|
79
|
+
"## Route actions",
|
|
80
|
+
...Object.entries(report.routeEvents.byAction).sort().map(([key, value]) => `- ${key}: ${value}`),
|
|
81
|
+
"",
|
|
82
|
+
"## Outcome status",
|
|
83
|
+
...Object.entries(report.outcomes.byStatus).sort().map(([key, value]) => `- ${key}: ${value}`),
|
|
84
|
+
];
|
|
85
|
+
if (gate) {
|
|
86
|
+
lines.push("", "## Gate eval", `- candidate accuracy/f1: ${gate.candidate?.accuracy ?? "n/a"}/${gate.candidate?.f1 ?? "n/a"}`, `- rule baseline accuracy/f1: ${gate.ruleBaseline?.accuracy ?? "n/a"}/${gate.ruleBaseline?.f1 ?? "n/a"}`);
|
|
87
|
+
}
|
|
88
|
+
return `${lines.join("\n")}\n`;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export function buildRouterReport(options: { eventsPath?: string; outcomesPath?: string; trainingRowsPath?: string; gateReportPath?: string; generatedAt?: string }): RouterReport {
|
|
92
|
+
if (options.eventsPath && !existsSync(resolve(options.eventsPath))) throw new Error(`report input file not found: ${options.eventsPath}`);
|
|
93
|
+
const events = options.eventsPath ? readRouteEvents(options.eventsPath) : [];
|
|
94
|
+
const outcomes = options.outcomesPath ? readOutcomes(options.outcomesPath) : [];
|
|
95
|
+
return {
|
|
96
|
+
schema: ROUTER_REPORT_SCHEMA,
|
|
97
|
+
generatedAt: options.generatedAt ?? new Date().toISOString(),
|
|
98
|
+
inputs: { events: options.eventsPath, outcomes: options.outcomesPath, trainingRows: options.trainingRowsPath, gateReport: options.gateReportPath },
|
|
99
|
+
routeEvents: routeSummary(events),
|
|
100
|
+
outcomes: outcomeSummary(outcomes),
|
|
101
|
+
trainingRows: trainingSummary(options.trainingRowsPath),
|
|
102
|
+
gate: readJson(options.gateReportPath),
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
export function writeRouterReport(options: { outputPath: string; markdownPath?: string; eventsPath?: string; outcomesPath?: string; trainingRowsPath?: string; gateReportPath?: string }): RouterReport {
|
|
107
|
+
if (!options.eventsPath && !options.outcomesPath && !options.trainingRowsPath && !options.gateReportPath) throw new Error("router report requires at least one input file");
|
|
108
|
+
const report = buildRouterReport(options);
|
|
109
|
+
mkdirSync(dirname(resolve(options.outputPath)), { recursive: true });
|
|
110
|
+
writeFileSync(resolve(options.outputPath), `${JSON.stringify(report, null, 2)}\n`);
|
|
111
|
+
if (options.markdownPath) {
|
|
112
|
+
mkdirSync(dirname(resolve(options.markdownPath)), { recursive: true });
|
|
113
|
+
writeFileSync(resolve(options.markdownPath), markdown(report));
|
|
114
|
+
}
|
|
115
|
+
return report;
|
|
116
|
+
}
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
import { mkdtempSync, readFileSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { tmpdir } from "node:os";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { describe, expect, it } from "vitest";
|
|
5
|
+
import { buildRouteEvent, type RouteEvent } from "./ledger.js";
|
|
6
|
+
import { generateSharpeningHints, writeSharpeningHints } from "./sharpening.js";
|
|
7
|
+
import type { RouterOutcome } from "./outcomes.js";
|
|
8
|
+
import type { RouteAction, RouteDecision, RouterCheckpoint, TaskStatus } from "./types.js";
|
|
9
|
+
|
|
10
|
+
function tempFile(name: string): string {
|
|
11
|
+
return join(mkdtempSync(join(tmpdir(), "pi-router-sharpening-")), name);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function checkpoint(id: string, model: string, provider: string, progressScore: number, loopScore: number): RouterCheckpoint {
|
|
15
|
+
return {
|
|
16
|
+
schema: "pi-router.checkpoint.v1",
|
|
17
|
+
sessionId: `session-${id.slice(0, 1)}`,
|
|
18
|
+
checkpointId: `session-${id.slice(0, 1)}:event-${id}`,
|
|
19
|
+
createdAt: "2026-06-14T00:00:00.000Z",
|
|
20
|
+
rawSessionRef: {
|
|
21
|
+
schema: "pi-router.raw-session-ref.v1",
|
|
22
|
+
path: "/tmp/raw-session-with-SECRET_TOKEN.jsonl",
|
|
23
|
+
fromEvent: 1,
|
|
24
|
+
toEvent: 2,
|
|
25
|
+
fromByte: 10,
|
|
26
|
+
toByte: 20,
|
|
27
|
+
contentHash: `hash-${id}`,
|
|
28
|
+
},
|
|
29
|
+
harness: "pi",
|
|
30
|
+
phase: "implementation",
|
|
31
|
+
activeModel: model,
|
|
32
|
+
provider,
|
|
33
|
+
features: {
|
|
34
|
+
turnIndex: 2,
|
|
35
|
+
sameCommandRepeatedCount: 1,
|
|
36
|
+
sameErrorRepeatedCount: 0,
|
|
37
|
+
errorChanged: true,
|
|
38
|
+
testsImproved: null,
|
|
39
|
+
filesTouched: 1,
|
|
40
|
+
diffLines: 12,
|
|
41
|
+
diffFilesChanged: 1,
|
|
42
|
+
diffLinesAdded: 8,
|
|
43
|
+
diffLinesDeleted: 4,
|
|
44
|
+
diffChurnScore: 0,
|
|
45
|
+
toolThrashScore: 0,
|
|
46
|
+
goalDriftScore: 0,
|
|
47
|
+
loopScore,
|
|
48
|
+
progressScore,
|
|
49
|
+
verifierUsed: true,
|
|
50
|
+
noVerifierUsed: false,
|
|
51
|
+
toolCallsLast10Turns: 3,
|
|
52
|
+
contextTokensApprox: 1000,
|
|
53
|
+
gitDirty: null,
|
|
54
|
+
},
|
|
55
|
+
recent: { touchedFileHashes: ["file-hash"] },
|
|
56
|
+
sourceEvent: { index: 2, byteStart: 10, byteEnd: 20, id: `event-${id}`, timestamp: "2026-06-14T00:00:01.000Z", type: "message", role: "toolResult" },
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function decision(checkpointId: string, action: RouteAction): RouteDecision {
|
|
61
|
+
return {
|
|
62
|
+
schema: "pi-router.decision.v1",
|
|
63
|
+
checkpointId,
|
|
64
|
+
action,
|
|
65
|
+
adviceShape: "none",
|
|
66
|
+
contextPolicy: "none",
|
|
67
|
+
confidence: 0.75,
|
|
68
|
+
reason: "test decision",
|
|
69
|
+
policyVersion: "test-policy",
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function event(id: string, action: RouteAction, model: string, provider: string, progressScore: number, loopScore: number): RouteEvent {
|
|
74
|
+
const item = checkpoint(id, model, provider, progressScore, loopScore);
|
|
75
|
+
return buildRouteEvent(item, decision(item.checkpointId, action), `2026-06-14T00:00:${id.padStart(2, "0")}.000Z`);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function outcomeFor(routeEvent: RouteEvent, status: TaskStatus): RouterOutcome {
|
|
79
|
+
return {
|
|
80
|
+
schema: "pi-router.outcome.v1",
|
|
81
|
+
outcomeId: `outcome-${routeEvent.eventId}`,
|
|
82
|
+
recordedAt: "2026-06-14T00:01:00.000Z",
|
|
83
|
+
sessionId: routeEvent.sessionId,
|
|
84
|
+
checkpointId: routeEvent.checkpointId,
|
|
85
|
+
routeEventId: routeEvent.eventId,
|
|
86
|
+
taskType: "implementation",
|
|
87
|
+
taskStatus: status,
|
|
88
|
+
testsPassedAfter: null,
|
|
89
|
+
verifierImproved: null,
|
|
90
|
+
acceptedDiff: null,
|
|
91
|
+
userInterrupted: false,
|
|
92
|
+
userOverrodeDecision: false,
|
|
93
|
+
finalFilesTouched: 1,
|
|
94
|
+
finalDiffLines: 12,
|
|
95
|
+
wallTimeMs: null,
|
|
96
|
+
cloudCostUsd: null,
|
|
97
|
+
frontierCalls: providerIsFrontier(routeEvent.runtime.activeModel) ? 1 : 0,
|
|
98
|
+
localTurns: providerIsFrontier(routeEvent.runtime.activeModel) ? 0 : 1,
|
|
99
|
+
reworkTurns: 0,
|
|
100
|
+
evidence: { source: "manual", routeEventId: routeEvent.eventId },
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function providerIsFrontier(model?: string): boolean {
|
|
105
|
+
return Boolean(model && /gpt|claude|gemini/i.test(model));
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
describe("router sharpening hints", () => {
|
|
109
|
+
it("generates deterministic provenance-backed model preference hints without transcript content", () => {
|
|
110
|
+
const qwen = [
|
|
111
|
+
event("11", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.9, 0.05),
|
|
112
|
+
event("12", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.85, 0.1),
|
|
113
|
+
event("13", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.88, 0.08),
|
|
114
|
+
event("14", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.86, 0.12),
|
|
115
|
+
event("15", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.82, 0.1),
|
|
116
|
+
];
|
|
117
|
+
const gpt = [
|
|
118
|
+
event("21", "run_verifier", "gpt-5.5", "openai-codex", 0.62, 0.35),
|
|
119
|
+
event("22", "run_verifier", "gpt-5.5", "openai-codex", 0.58, 0.4),
|
|
120
|
+
event("23", "run_verifier", "gpt-5.5", "openai-codex", 0.65, 0.32),
|
|
121
|
+
event("24", "run_verifier", "gpt-5.5", "openai-codex", 0.6, 0.38),
|
|
122
|
+
event("25", "run_verifier", "gpt-5.5", "openai-codex", 0.61, 0.37),
|
|
123
|
+
];
|
|
124
|
+
const artifact = generateSharpeningHints({
|
|
125
|
+
events: [...gpt, ...qwen].reverse(),
|
|
126
|
+
outcomes: [...qwen.map((item) => outcomeFor(item, "success")), ...gpt.map((item) => outcomeFor(item, "partial"))],
|
|
127
|
+
generatedAt: "2026-06-14T00:02:00.000Z",
|
|
128
|
+
inputs: { events: "events.jsonl", outcomes: "outcomes.jsonl" },
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
expect(artifact).toMatchObject({
|
|
132
|
+
schema: "pi-router.sharpening-hints.v1",
|
|
133
|
+
generatedAt: "2026-06-14T00:02:00.000Z",
|
|
134
|
+
totals: { events: 10, outcomes: 10, sessions: 2, models: 2 },
|
|
135
|
+
learningPolicy: { scope: "repo-local", ignoresRawTranscript: true, fallback: "baseline-router" },
|
|
136
|
+
manualPromotionRequired: true,
|
|
137
|
+
});
|
|
138
|
+
const prefer = artifact.hints.find((hint) => hint.kind === "prefer_model_for_action");
|
|
139
|
+
expect(prefer).toMatchObject({
|
|
140
|
+
action: "run_verifier",
|
|
141
|
+
modelId: "qwen3.6-35b-a3b-128k",
|
|
142
|
+
provider: "local",
|
|
143
|
+
confidence: "medium",
|
|
144
|
+
guardrails: { manualPromotionOnly: true, sparse: true, autoUse: { eligible: false } },
|
|
145
|
+
});
|
|
146
|
+
expect(prefer?.provenance.comparedWith?.[0]).toMatchObject({ modelId: "gpt-5.5", provider: "openai-codex", events: 5 });
|
|
147
|
+
expect(JSON.stringify(artifact)).not.toContain("SECRET_TOKEN");
|
|
148
|
+
expect(JSON.stringify(artifact)).not.toContain("raw-session");
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
it("marks sparse hints low-confidence and sample-size capped", () => {
|
|
152
|
+
const local = [event("31", "continue_current", "qwen-local", "local", 0.9, 0.05)];
|
|
153
|
+
const cloud = [event("41", "continue_current", "gpt-5.5", "openai-codex", 0.4, 0.6)];
|
|
154
|
+
|
|
155
|
+
const artifact = generateSharpeningHints({ events: [...local, ...cloud], generatedAt: "2026-06-14T00:03:00.000Z" });
|
|
156
|
+
const hint = artifact.hints.find((item) => item.kind === "prefer_model_for_action");
|
|
157
|
+
|
|
158
|
+
expect(hint?.confidence).toBe("low");
|
|
159
|
+
expect(hint?.guardrails).toMatchObject({ sparse: true, sampleSizeCapped: true, manualPromotionOnly: true, autoUse: { eligible: false } });
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
it("requires cross-session outcome-backed evidence before future auto bias eligibility", () => {
|
|
163
|
+
const strongLocal = [
|
|
164
|
+
...Array.from({ length: 10 }, (_, index) => event(`8${index}`, "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.9, 0.04)),
|
|
165
|
+
...Array.from({ length: 10 }, (_, index) => event(`9${index}`, "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.88, 0.06)),
|
|
166
|
+
];
|
|
167
|
+
const weakerCloud = [
|
|
168
|
+
...Array.from({ length: 10 }, (_, index) => event(`a${index}`, "run_verifier", "gpt-5.5", "openai-codex", 0.58, 0.4)),
|
|
169
|
+
...Array.from({ length: 10 }, (_, index) => event(`b${index}`, "run_verifier", "gpt-5.5", "openai-codex", 0.6, 0.38)),
|
|
170
|
+
];
|
|
171
|
+
|
|
172
|
+
const artifact = generateSharpeningHints({
|
|
173
|
+
events: [...strongLocal, ...weakerCloud],
|
|
174
|
+
outcomes: [...strongLocal.map((item) => outcomeFor(item, "success")), ...weakerCloud.map((item) => outcomeFor(item, "partial"))],
|
|
175
|
+
generatedAt: "2026-06-14T00:03:30.000Z",
|
|
176
|
+
});
|
|
177
|
+
const hint = artifact.hints.find((item) => item.kind === "prefer_model_for_action" && item.modelId === "qwen3.6-35b-a3b-128k");
|
|
178
|
+
|
|
179
|
+
expect(hint?.confidence).toBe("high");
|
|
180
|
+
expect(hint?.guardrails).toMatchObject({ sparse: false, sampleSizeCapped: false, autoUse: { eligible: true } });
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
it("emits local savings candidates only as manual hints", () => {
|
|
184
|
+
const events = [
|
|
185
|
+
event("51", "continue_current", "qwen3.6-35b-a3b-128k", "local", 0.9, 0.05),
|
|
186
|
+
event("52", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.88, 0.08),
|
|
187
|
+
event("53", "summarize_context", "qwen3.6-35b-a3b-128k", "local", 0.85, 0.1),
|
|
188
|
+
];
|
|
189
|
+
|
|
190
|
+
const artifact = generateSharpeningHints({ events, generatedAt: "2026-06-14T00:04:00.000Z" });
|
|
191
|
+
const savings = artifact.hints.find((hint) => hint.kind === "savings_candidate");
|
|
192
|
+
|
|
193
|
+
expect(savings).toMatchObject({ modelId: "qwen3.6-35b-a3b-128k", provider: "local", confidence: "low" });
|
|
194
|
+
expect(savings?.rationale).toContain("manual hint, not an automatic promotion");
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
it("suppresses savings candidates when linked outcomes are all poor", () => {
|
|
198
|
+
const events = [
|
|
199
|
+
event("71", "continue_current", "qwen3.6-35b-a3b-128k", "local", 0.95, 0.02),
|
|
200
|
+
event("72", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.93, 0.03),
|
|
201
|
+
event("73", "summarize_context", "qwen3.6-35b-a3b-128k", "local", 0.91, 0.04),
|
|
202
|
+
event("74", "continue_current", "qwen3.6-35b-a3b-128k", "local", 0.92, 0.03),
|
|
203
|
+
event("75", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.94, 0.02),
|
|
204
|
+
];
|
|
205
|
+
|
|
206
|
+
const artifact = generateSharpeningHints({ events, outcomes: events.map((item) => outcomeFor(item, "failed")), generatedAt: "2026-06-14T00:04:30.000Z" });
|
|
207
|
+
|
|
208
|
+
expect(artifact.hints.find((hint) => hint.kind === "savings_candidate")).toBeUndefined();
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
it("writes sharpening hints and fails clearly for missing event inputs", () => {
|
|
212
|
+
const eventPath = tempFile("events.jsonl");
|
|
213
|
+
const outputPath = tempFile("hints.json");
|
|
214
|
+
const item = event("61", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.9, 0.05);
|
|
215
|
+
writeFileSync(eventPath, `${JSON.stringify(item)}\n`);
|
|
216
|
+
|
|
217
|
+
const artifact = writeSharpeningHints({ eventsPath: eventPath, outputPath, generatedAt: "2026-06-14T00:05:00.000Z" });
|
|
218
|
+
|
|
219
|
+
expect(artifact.schema).toBe("pi-router.sharpening-hints.v1");
|
|
220
|
+
expect(JSON.parse(readFileSync(outputPath, "utf8")).schema).toBe("pi-router.sharpening-hints.v1");
|
|
221
|
+
expect(() => writeSharpeningHints({ eventsPath: "/tmp/pi-router-missing-events.jsonl", outputPath })).toThrow(/required route events file not found/);
|
|
222
|
+
});
|
|
223
|
+
});
|