@fiale-plus/pi-rogue 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/node_modules/@fiale-plus/pi-rogue-advisor/README.md +1 -0
  2. package/node_modules/@fiale-plus/pi-rogue-advisor/src/binary-gate-features.test.ts +8 -0
  3. package/node_modules/@fiale-plus/pi-rogue-advisor/src/binary-gate-features.ts +7 -0
  4. package/node_modules/@fiale-plus/pi-rogue-advisor/src/router.test.ts +26 -0
  5. package/node_modules/@fiale-plus/pi-rogue-advisor/src/router.ts +10 -1
  6. package/node_modules/@fiale-plus/pi-rogue-orchestration/README.md +3 -3
  7. package/node_modules/@fiale-plus/pi-rogue-orchestration/package.json +3 -0
  8. package/node_modules/@fiale-plus/pi-rogue-orchestration/skills/orchestration/SKILL.md +3 -2
  9. package/node_modules/@fiale-plus/pi-rogue-orchestration/src/goal.test.ts +65 -2
  10. package/node_modules/@fiale-plus/pi-rogue-orchestration/src/goal.ts +84 -4
  11. package/node_modules/@fiale-plus/pi-rogue-orchestration/src/loop.ts +3 -0
  12. package/node_modules/@fiale-plus/pi-rogue-orchestration/src/novelty-guard.test.ts +43 -0
  13. package/node_modules/@fiale-plus/pi-rogue-orchestration/src/novelty-guard.ts +96 -11
  14. package/node_modules/@fiale-plus/pi-rogue-router/README.md +45 -6
  15. package/node_modules/@fiale-plus/pi-rogue-router/src/binary-gate.test.ts +88 -0
  16. package/node_modules/@fiale-plus/pi-rogue-router/src/binary-gate.ts +232 -0
  17. package/node_modules/@fiale-plus/pi-rogue-router/src/cli.ts +123 -9
  18. package/node_modules/@fiale-plus/pi-rogue-router/src/completions.ts +39 -16
  19. package/node_modules/@fiale-plus/pi-rogue-router/src/config-extension.test.ts +111 -4
  20. package/node_modules/@fiale-plus/pi-rogue-router/src/config.ts +17 -2
  21. package/node_modules/@fiale-plus/pi-rogue-router/src/extension.ts +67 -7
  22. package/node_modules/@fiale-plus/pi-rogue-router/src/index.ts +4 -0
  23. package/node_modules/@fiale-plus/pi-rogue-router/src/observe.ts +76 -5
  24. package/node_modules/@fiale-plus/pi-rogue-router/src/outcomes.ts +130 -6
  25. package/node_modules/@fiale-plus/pi-rogue-router/src/reports.test.ts +92 -0
  26. package/node_modules/@fiale-plus/pi-rogue-router/src/reports.ts +116 -0
  27. package/node_modules/@fiale-plus/pi-rogue-router/src/sharpening.test.ts +223 -0
  28. package/node_modules/@fiale-plus/pi-rogue-router/src/sharpening.ts +344 -0
  29. package/node_modules/@fiale-plus/pi-rogue-router/src/teacher-runner.test.ts +126 -0
  30. package/node_modules/@fiale-plus/pi-rogue-router/src/teacher-runner.ts +238 -0
  31. package/node_modules/@fiale-plus/pi-rogue-router/src/v1-telemetry.test.ts +54 -1
  32. package/package.json +1 -1
@@ -6,6 +6,7 @@ import { readRouteEvents, type RouteEvent } from "./ledger.js";
6
6
  import type { RouterCheckpoint, TaskStatus, TaskType } from "./types.js";
7
7
 
8
8
  export const ROUTER_OUTCOME_SCHEMA = "pi-router.outcome.v1" as const;
9
+ export const ROUTER_OUTCOME_ENRICH_SUMMARY_SCHEMA = "pi-router.outcome-enrich-summary.v1" as const;
9
10
 
10
11
  export interface RouterOutcome {
11
12
  schema: typeof ROUTER_OUTCOME_SCHEMA;
@@ -43,9 +44,15 @@ export interface OutcomeWriteSummary {
43
44
  inferred: number;
44
45
  }
45
46
 
46
- function roundStatus(event: RouteEvent, checkpoint?: RouterCheckpoint): TaskStatus {
47
- if (event.decision.action === "stop_and_ask_user") return "unknown";
48
- if (checkpoint?.features.verifierUsed && checkpoint.features.progressScore >= 0.75) return "partial";
47
+ export interface OutcomeEnrichSummary {
48
+ schema: typeof ROUTER_OUTCOME_ENRICH_SUMMARY_SCHEMA;
49
+ output: string;
50
+ inputOutcomes: number;
51
+ outputOutcomes: number;
52
+ enriched: number;
53
+ }
54
+
55
+ function roundStatus(_event: RouteEvent, _checkpoint?: RouterCheckpoint): TaskStatus {
49
56
  return "unknown";
50
57
  }
51
58
 
@@ -78,7 +85,7 @@ export function buildUnknownOutcome(event: RouteEvent, checkpoint?: RouterCheckp
78
85
  testsPassedAfter: null,
79
86
  verifierImproved: null,
80
87
  acceptedDiff: null,
81
- userInterrupted: event.decision.action === "stop_and_ask_user",
88
+ userInterrupted: false,
82
89
  userOverrodeDecision: Boolean(event.observed.overriddenBy),
83
90
  finalFilesTouched: checkpoint ? ((checkpoint.features.diffFilesChanged ?? 0) > 0 ? (checkpoint.features.diffFilesChanged ?? 0) : checkpoint.features.filesTouched) : 0,
84
91
  finalDiffLines: checkpoint?.features.diffLines ?? 0,
@@ -107,8 +114,14 @@ export function readOutcomes(path?: string): RouterOutcome[] {
107
114
  return readFileSync(resolved, "utf8")
108
115
  .split("\n")
109
116
  .filter((line) => line.trim())
110
- .flatMap((line) => {
111
- try { return [JSON.parse(line) as RouterOutcome]; } catch { return []; }
117
+ .map((line, index) => {
118
+ try {
119
+ const outcome = JSON.parse(line) as RouterOutcome;
120
+ if (outcome.schema !== ROUTER_OUTCOME_SCHEMA) throw new Error("invalid schema");
121
+ return outcome;
122
+ } catch (error) {
123
+ throw new Error(`invalid outcome JSONL at ${path}:${index + 1}: ${error instanceof Error ? error.message : String(error)}`);
124
+ }
112
125
  });
113
126
  }
114
127
 
@@ -118,6 +131,94 @@ export function writeOutcomesJsonl(outcomes: RouterOutcome[], path: string): voi
118
131
  writeFileSync(resolved, outcomes.map((outcome) => JSON.stringify(outcome)).join("\n") + (outcomes.length ? "\n" : ""));
119
132
  }
120
133
 
134
+ function routeEventForOutcome(outcome: RouterOutcome, byId: Map<string, RouteEvent>, byCheckpoint: Map<string, RouteEvent>): RouteEvent | undefined {
135
+ return (outcome.routeEventId ? byId.get(outcome.routeEventId) : undefined) ?? (outcome.checkpointId ? byCheckpoint.get(outcome.checkpointId) : undefined);
136
+ }
137
+
138
+ function checkpointForOutcome(outcome: RouterOutcome, event: RouteEvent | undefined, byCheckpoint: Map<string, RouterCheckpoint>): RouterCheckpoint | undefined {
139
+ return (outcome.checkpointId ? byCheckpoint.get(outcome.checkpointId) : undefined) ?? (event ? byCheckpoint.get(event.checkpointId) : undefined);
140
+ }
141
+
142
+ function inferredStatus(outcome: RouterOutcome, checkpoint?: RouterCheckpoint, event?: RouteEvent, testsPassed: boolean | null = outcome.testsPassedAfter): TaskStatus {
143
+ const stopWasFollowed = event?.decision.action === "stop_and_ask_user" && event.observed.followed === true && !event.observed.overriddenBy;
144
+ if (stopWasFollowed || outcome.userInterrupted) return outcome.taskStatus === "unknown" ? "abandoned" : outcome.taskStatus;
145
+ if (testsPassed === true && Math.max(outcome.finalDiffLines, checkpoint?.features.diffLines ?? 0, event?.metrics.diffLines ?? 0) > 0) return "success";
146
+ if (testsPassed === true && outcome.taskStatus === "unknown") return "partial";
147
+ if (testsPassed === false && outcome.taskStatus === "unknown") return "failed";
148
+ if (outcome.taskStatus === "partial" && testsPassed === true && Math.max(outcome.finalDiffLines, checkpoint?.features.diffLines ?? 0, event?.metrics.diffLines ?? 0) > 0) return "success";
149
+ return outcome.taskStatus;
150
+ }
151
+
152
+ export function enrichOutcome(outcome: RouterOutcome, options: { checkpoint?: RouterCheckpoint; event?: RouteEvent; recordedAt?: string } = {}): RouterOutcome {
153
+ const checkpoint = options.checkpoint;
154
+ const event = options.event;
155
+ const testsPassedAfter = outcome.testsPassedAfter;
156
+ const verifierImproved = outcome.verifierImproved
157
+ ?? (checkpoint?.features.testsImproved !== null && checkpoint?.features.testsImproved !== undefined ? checkpoint.features.testsImproved : null);
158
+ const taskStatus = inferredStatus(outcome, checkpoint, event, testsPassedAfter);
159
+ const evidenceDiffLines = checkpoint?.features.diffLines ?? event?.metrics.diffLines ?? 0;
160
+ const evidenceFilesTouched = checkpoint
161
+ ? ((checkpoint.features.diffFilesChanged ?? 0) > 0 ? checkpoint.features.diffFilesChanged : checkpoint.features.filesTouched)
162
+ : event?.metrics.diffFilesChanged ?? 0;
163
+ const evidenceErrorRepeats = checkpoint?.features.sameErrorRepeatedCount ?? event?.metrics.sameErrorRepeatedCount ?? 0;
164
+ const finalDiffLines = Math.max(outcome.finalDiffLines, evidenceDiffLines);
165
+ const finalFilesTouched = Math.max(outcome.finalFilesTouched, evidenceFilesTouched);
166
+ const reworkTurns = Math.max(outcome.reworkTurns, evidenceErrorRepeats > 1 ? evidenceErrorRepeats - 1 : 0);
167
+ const acceptedDiff = outcome.acceptedDiff
168
+ ?? (finalDiffLines > 0 && testsPassedAfter === true ? true : testsPassedAfter === false || taskStatus === "abandoned" ? false : null);
169
+ const notes = JSON.stringify({ enrichedFromCheckpoint: checkpoint?.checkpointId, routeEventId: event?.eventId, taskStatus, testsPassedAfter, verifierImproved, acceptedDiff });
170
+
171
+ return {
172
+ ...outcome,
173
+ recordedAt: options.recordedAt ?? outcome.recordedAt,
174
+ checkpointId: outcome.checkpointId ?? event?.checkpointId,
175
+ routeEventId: outcome.routeEventId ?? event?.eventId,
176
+ taskType: outcome.taskType === "unknown" ? taskTypeFromCheckpoint(checkpoint) : outcome.taskType,
177
+ taskStatus,
178
+ testsPassedAfter,
179
+ verifierImproved,
180
+ acceptedDiff,
181
+ userInterrupted: outcome.userInterrupted || Boolean(event?.decision.action === "stop_and_ask_user" && event.observed.followed === true && !event.observed.overriddenBy),
182
+ userOverrodeDecision: outcome.userOverrodeDecision || Boolean(event?.observed.overriddenBy),
183
+ finalFilesTouched,
184
+ finalDiffLines,
185
+ reworkTurns,
186
+ evidence: {
187
+ ...outcome.evidence,
188
+ rawSessionRef: outcome.evidence.rawSessionRef ?? checkpoint?.rawSessionRef ?? event?.rawSessionRef,
189
+ routeEventId: outcome.evidence.routeEventId ?? event?.eventId,
190
+ notesHash: outcome.evidence.notesHash ?? hashText(notes),
191
+ },
192
+ };
193
+ }
194
+
195
+ function validateOutcomeLinks(outcomes: RouterOutcome[], checkpoints: RouterCheckpoint[], events: RouteEvent[]): void {
196
+ const checkpointIds = new Set(checkpoints.map((checkpoint) => checkpoint.checkpointId));
197
+ const eventIds = new Set(events.map((event) => event.eventId));
198
+ const eventById = new Map(events.map((event) => [event.eventId, event]));
199
+ const eventCheckpointIds = new Set(events.map((event) => event.checkpointId));
200
+ for (const outcome of outcomes) {
201
+ if (events.length > 0 && outcome.routeEventId && !eventIds.has(outcome.routeEventId)) throw new Error(`outcome routeEventId not found: ${outcome.routeEventId}`);
202
+ if (outcome.routeEventId && outcome.checkpointId) {
203
+ const event = eventById.get(outcome.routeEventId);
204
+ if (event && event.checkpointId !== outcome.checkpointId) throw new Error(`outcome routeEventId/checkpointId mismatch: ${outcome.routeEventId}`);
205
+ }
206
+ if ((checkpoints.length > 0 || events.length > 0) && outcome.checkpointId && !checkpointIds.has(outcome.checkpointId) && !eventCheckpointIds.has(outcome.checkpointId)) throw new Error(`outcome checkpointId not found: ${outcome.checkpointId}`);
207
+ }
208
+ }
209
+
210
+ export function enrichOutcomes(outcomes: RouterOutcome[], checkpoints: RouterCheckpoint[] = [], events: RouteEvent[] = [], recordedAt?: string): RouterOutcome[] {
211
+ validateOutcomeLinks(outcomes, checkpoints, events);
212
+ const checkpointById = new Map(checkpoints.map((checkpoint) => [checkpoint.checkpointId, checkpoint]));
213
+ const eventById = new Map(events.map((event) => [event.eventId, event]));
214
+ const eventByCheckpoint = new Map(events.map((event) => [event.checkpointId, event]));
215
+ return outcomes.map((outcome) => {
216
+ const event = routeEventForOutcome(outcome, eventById, eventByCheckpoint);
217
+ const checkpoint = checkpointForOutcome(outcome, event, checkpointById);
218
+ return enrichOutcome(outcome, { checkpoint, event, recordedAt });
219
+ });
220
+ }
221
+
121
222
  export function writeInferredOutcomes(options: { checkpointPath: string; eventsPath: string; outputPath: string }): OutcomeWriteSummary {
122
223
  if (!existsSync(resolve(options.eventsPath))) throw new Error(`required route events file not found: ${options.eventsPath}`);
123
224
  const checkpoints = readCheckpointJsonl(options.checkpointPath);
@@ -126,3 +227,26 @@ export function writeInferredOutcomes(options: { checkpointPath: string; eventsP
126
227
  writeOutcomesJsonl(outcomes, options.outputPath);
127
228
  return { schema: "pi-router.outcomes-summary.v1", output: resolve(options.outputPath), outcomes: outcomes.length, inferred: outcomes.length };
128
229
  }
230
+
231
+ export function writeEnrichedOutcomes(options: { outcomesPath: string; outputPath: string; checkpointPath?: string; eventsPath?: string }): OutcomeEnrichSummary {
232
+ if (!options.checkpointPath && !options.eventsPath) throw new Error("outcome enrichment requires --checkpoint-file or --events evidence");
233
+ if (options.eventsPath && !existsSync(resolve(options.eventsPath))) throw new Error(`route events file not found: ${options.eventsPath}`);
234
+ if (options.checkpointPath && !existsSync(resolve(options.checkpointPath))) throw new Error(`checkpoint file not found: ${options.checkpointPath}`);
235
+ const input = readOutcomes(options.outcomesPath);
236
+ const checkpoints = options.checkpointPath ? readCheckpointJsonl(options.checkpointPath) : [];
237
+ const events = options.eventsPath ? readRouteEvents(options.eventsPath) : [];
238
+ if (checkpoints.length === 0 && events.length === 0) {
239
+ if (options.checkpointPath && !options.eventsPath) throw new Error(`checkpoint file contains no checkpoints: ${options.checkpointPath}`);
240
+ if (options.eventsPath && !options.checkpointPath) throw new Error(`route events file contains no events: ${options.eventsPath}`);
241
+ throw new Error("outcome enrichment evidence files contain no usable checkpoint or route events");
242
+ }
243
+ const enriched = enrichOutcomes(input, checkpoints, events);
244
+ writeOutcomesJsonl(enriched, options.outputPath);
245
+ return {
246
+ schema: ROUTER_OUTCOME_ENRICH_SUMMARY_SCHEMA,
247
+ output: resolve(options.outputPath),
248
+ inputOutcomes: input.length,
249
+ outputOutcomes: enriched.length,
250
+ enriched: enriched.filter((outcome, index) => JSON.stringify(outcome) !== JSON.stringify(input[index])).length,
251
+ };
252
+ }
@@ -0,0 +1,92 @@
1
+ import { mkdtempSync, readFileSync, writeFileSync } from "node:fs";
2
+ import { tmpdir } from "node:os";
3
+ import { join } from "node:path";
4
+ import { describe, expect, it } from "vitest";
5
+ import { writeRouterReport } from "./reports.js";
6
+
7
+ function tempFile(name: string): string {
8
+ return join(mkdtempSync(join(tmpdir(), "pi-router-report-")), name);
9
+ }
10
+
11
+ const rawRef = { schema: "pi-router.raw-session-ref.v1", path: "/tmp/session.jsonl", fromEvent: 0, toEvent: 1, fromByte: 0, toByte: 1, contentHash: "hash" };
12
+
13
+ function event(action = "run_verifier") {
14
+ return {
15
+ schema: "pi-router.route-event.v1",
16
+ eventId: "event-1",
17
+ recordedAt: "2026-06-14T00:00:00.000Z",
18
+ checkpointId: "checkpoint-1",
19
+ sessionId: "session-1",
20
+ rawSessionRef: rawRef,
21
+ sourceEvent: { index: 0, timestamp: null },
22
+ decision: { schema: "pi-router.route-decision.v1", decisionId: "decision-1", checkpointId: "checkpoint-1", action, reason: "test", confidence: 0.5, policyVersion: "test", alternatives: [] },
23
+ runtime: { activeModel: "qwen", provider: "local", contextTokensApprox: 1000, gitDirty: true },
24
+ observed: { followed: false, overriddenBy: "continue_current" },
25
+ metrics: { loopScore: 0.2, progressScore: 0.8, sameCommandRepeatedCount: 1, sameErrorRepeatedCount: 0, verifierUsed: true, diffLines: 10, diffFilesChanged: 1 },
26
+ };
27
+ }
28
+
29
+ function outcome() {
30
+ return {
31
+ schema: "pi-router.outcome.v1",
32
+ outcomeId: "outcome-1",
33
+ recordedAt: "2026-06-14T00:00:00.000Z",
34
+ sessionId: "session-1",
35
+ checkpointId: "checkpoint-1",
36
+ routeEventId: "event-1",
37
+ taskType: "implementation",
38
+ taskStatus: "success",
39
+ testsPassedAfter: true,
40
+ verifierImproved: true,
41
+ acceptedDiff: true,
42
+ userInterrupted: false,
43
+ userOverrodeDecision: true,
44
+ finalFilesTouched: 1,
45
+ finalDiffLines: 10,
46
+ wallTimeMs: null,
47
+ cloudCostUsd: null,
48
+ frontierCalls: 0,
49
+ localTurns: 2,
50
+ reworkTurns: 0,
51
+ evidence: { source: "manual", rawSessionRef: rawRef, routeEventId: "event-1", notesHash: "notes" },
52
+ };
53
+ }
54
+
55
+ function trainingRow(label: "continue" | "intervene" | "unknown") {
56
+ return {
57
+ schema: "pi-router.training-row.v1",
58
+ checkpointId: `checkpoint-${label}`,
59
+ sessionId: "session-1",
60
+ rawSessionRef: rawRef,
61
+ features: { phase: "implementation", activeModel: "qwen", provider: "local", contextTokensApprox: 1000, sameCommandRepeatedCount: 1, sameErrorRepeatedCount: 0, loopScore: 0.1, progressScore: 0.9, verifierUsed: true, noVerifierUsed: false, diffLines: 10, diffFilesChanged: 1, diffChurnScore: 0.01, filesTouched: 1 },
62
+ labels: { routeAction: label === "unknown" ? null : "continue_current", binaryGate: label, source: label === "unknown" ? "unknown" : "teacher", confidence: label === "unknown" ? null : 0.8 },
63
+ outcome: { taskStatus: "unknown", testsPassedAfter: null, acceptedDiff: null, userOverrodeDecision: null, reworkTurns: null },
64
+ provenance: { localRuleAction: "continue_current", excludedLocalRuleAsTruth: label === "unknown" },
65
+ };
66
+ }
67
+
68
+ describe("router report", () => {
69
+ it("writes JSON and Markdown summaries", () => {
70
+ const eventsPath = tempFile("events.jsonl");
71
+ const outcomesPath = tempFile("outcomes.jsonl");
72
+ const rowsPath = tempFile("training.jsonl");
73
+ const gatePath = tempFile("gate-report.json");
74
+ const outputPath = tempFile("report.json");
75
+ const markdownPath = tempFile("report.md");
76
+ writeFileSync(eventsPath, `${JSON.stringify(event())}\n`);
77
+ writeFileSync(outcomesPath, `${JSON.stringify(outcome())}\n`);
78
+ writeFileSync(rowsPath, [JSON.stringify(trainingRow("continue")), JSON.stringify(trainingRow("unknown"))].join("\n") + "\n");
79
+ writeFileSync(gatePath, JSON.stringify({ schema: "pi-router.binary-gate-eval.v1", candidate: { accuracy: 0.8, f1: 0.7 }, ruleBaseline: { accuracy: 0.6, f1: 0.5 } }));
80
+
81
+ const report = writeRouterReport({ eventsPath, outcomesPath, trainingRowsPath: rowsPath, gateReportPath: gatePath, outputPath, markdownPath });
82
+
83
+ expect(report).toMatchObject({ schema: "pi-router.report.v1", routeEvents: { total: 1, mismatches: 1 }, outcomes: { total: 1, linked: 1 }, trainingRows: { total: 2, labeled: 1, localRuleExcluded: 1 } });
84
+ expect(JSON.parse(readFileSync(outputPath, "utf8")).schema).toBe("pi-router.report.v1");
85
+ expect(readFileSync(markdownPath, "utf8")).toContain("# Pi router report");
86
+ });
87
+
88
+ it("requires at least one report input and rejects missing provided inputs", () => {
89
+ expect(() => writeRouterReport({ outputPath: tempFile("report.json") })).toThrow(/requires at least one input/);
90
+ expect(() => writeRouterReport({ eventsPath: tempFile("missing-events.jsonl"), outputPath: tempFile("report.json") })).toThrow(/report input file not found/);
91
+ });
92
+ });
@@ -0,0 +1,116 @@
1
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
2
+ import { dirname, resolve } from "node:path";
3
+ import { readTrainingRows } from "./binary-gate.js";
4
+ import { readRouteEvents, type RouteEvent } from "./ledger.js";
5
+ import { readOutcomes, type RouterOutcome } from "./outcomes.js";
6
+
7
+ export const ROUTER_REPORT_SCHEMA = "pi-router.report.v1" as const;
8
+
9
+ export interface RouterReport {
10
+ schema: typeof ROUTER_REPORT_SCHEMA;
11
+ generatedAt: string;
12
+ inputs: { events?: string; outcomes?: string; gateReport?: string; trainingRows?: string };
13
+ routeEvents: { total: number; byAction: Record<string, number>; byModel: Record<string, number>; mismatches: number };
14
+ outcomes: { total: number; byStatus: Record<string, number>; linked: number; missingEvidence: number };
15
+ trainingRows: { total: number; labeled: number; unlabeled: number; localRuleExcluded: number; byGate: Record<string, number> };
16
+ gate?: unknown;
17
+ }
18
+
19
+ function increment(map: Record<string, number>, key: string): void {
20
+ map[key] = (map[key] ?? 0) + 1;
21
+ }
22
+
23
+ function routeSummary(events: RouteEvent[]): RouterReport["routeEvents"] {
24
+ const byAction: Record<string, number> = {};
25
+ const byModel: Record<string, number> = {};
26
+ let mismatches = 0;
27
+ for (const event of events) {
28
+ increment(byAction, event.decision.action);
29
+ increment(byModel, event.runtime.activeModel ?? "unknown");
30
+ if (event.observed.followed === false || event.observed.overriddenBy) mismatches++;
31
+ }
32
+ return { total: events.length, byAction, byModel, mismatches };
33
+ }
34
+
35
+ function outcomeSummary(outcomes: RouterOutcome[]): RouterReport["outcomes"] {
36
+ const byStatus: Record<string, number> = {};
37
+ let linked = 0;
38
+ let missingEvidence = 0;
39
+ for (const outcome of outcomes) {
40
+ increment(byStatus, outcome.taskStatus);
41
+ if (outcome.routeEventId || outcome.checkpointId) linked++;
42
+ if (!outcome.evidence.rawSessionRef && !outcome.evidence.notesHash) missingEvidence++;
43
+ }
44
+ return { total: outcomes.length, byStatus, linked, missingEvidence };
45
+ }
46
+
47
+ function trainingSummary(rowsPath?: string): RouterReport["trainingRows"] {
48
+ if (!rowsPath) return { total: 0, labeled: 0, unlabeled: 0, localRuleExcluded: 0, byGate: {} };
49
+ const rows = readTrainingRows(rowsPath);
50
+ const byGate: Record<string, number> = {};
51
+ let labeled = 0;
52
+ let localRuleExcluded = 0;
53
+ for (const row of rows) {
54
+ increment(byGate, row.labels.binaryGate);
55
+ if (row.labels.binaryGate === "unknown") localRuleExcluded += row.provenance.excludedLocalRuleAsTruth ? 1 : 0;
56
+ else labeled++;
57
+ }
58
+ return { total: rows.length, labeled, unlabeled: rows.length - labeled, localRuleExcluded, byGate };
59
+ }
60
+
61
+ function readJson(path?: string): unknown {
62
+ if (!path) return undefined;
63
+ if (!existsSync(resolve(path))) throw new Error(`report input file not found: ${path}`);
64
+ return JSON.parse(readFileSync(resolve(path), "utf8"));
65
+ }
66
+
67
+ function markdown(report: RouterReport): string {
68
+ const gate = report.gate && typeof report.gate === "object" ? report.gate as { candidate?: { accuracy?: number; f1?: number }; ruleBaseline?: { accuracy?: number; f1?: number } } : undefined;
69
+ const lines = [
70
+ "# Pi router report",
71
+ "",
72
+ `- generatedAt: ${report.generatedAt}`,
73
+ `- route events: ${report.routeEvents.total}`,
74
+ `- route mismatches/overrides: ${report.routeEvents.mismatches}`,
75
+ `- outcomes: ${report.outcomes.total}`,
76
+ `- training rows: ${report.trainingRows.total} (${report.trainingRows.labeled} labeled, ${report.trainingRows.unlabeled} unlabeled)`,
77
+ `- local-rule labels excluded: ${report.trainingRows.localRuleExcluded}`,
78
+ "",
79
+ "## Route actions",
80
+ ...Object.entries(report.routeEvents.byAction).sort().map(([key, value]) => `- ${key}: ${value}`),
81
+ "",
82
+ "## Outcome status",
83
+ ...Object.entries(report.outcomes.byStatus).sort().map(([key, value]) => `- ${key}: ${value}`),
84
+ ];
85
+ if (gate) {
86
+ lines.push("", "## Gate eval", `- candidate accuracy/f1: ${gate.candidate?.accuracy ?? "n/a"}/${gate.candidate?.f1 ?? "n/a"}`, `- rule baseline accuracy/f1: ${gate.ruleBaseline?.accuracy ?? "n/a"}/${gate.ruleBaseline?.f1 ?? "n/a"}`);
87
+ }
88
+ return `${lines.join("\n")}\n`;
89
+ }
90
+
91
+ export function buildRouterReport(options: { eventsPath?: string; outcomesPath?: string; trainingRowsPath?: string; gateReportPath?: string; generatedAt?: string }): RouterReport {
92
+ if (options.eventsPath && !existsSync(resolve(options.eventsPath))) throw new Error(`report input file not found: ${options.eventsPath}`);
93
+ const events = options.eventsPath ? readRouteEvents(options.eventsPath) : [];
94
+ const outcomes = options.outcomesPath ? readOutcomes(options.outcomesPath) : [];
95
+ return {
96
+ schema: ROUTER_REPORT_SCHEMA,
97
+ generatedAt: options.generatedAt ?? new Date().toISOString(),
98
+ inputs: { events: options.eventsPath, outcomes: options.outcomesPath, trainingRows: options.trainingRowsPath, gateReport: options.gateReportPath },
99
+ routeEvents: routeSummary(events),
100
+ outcomes: outcomeSummary(outcomes),
101
+ trainingRows: trainingSummary(options.trainingRowsPath),
102
+ gate: readJson(options.gateReportPath),
103
+ };
104
+ }
105
+
106
+ export function writeRouterReport(options: { outputPath: string; markdownPath?: string; eventsPath?: string; outcomesPath?: string; trainingRowsPath?: string; gateReportPath?: string }): RouterReport {
107
+ if (!options.eventsPath && !options.outcomesPath && !options.trainingRowsPath && !options.gateReportPath) throw new Error("router report requires at least one input file");
108
+ const report = buildRouterReport(options);
109
+ mkdirSync(dirname(resolve(options.outputPath)), { recursive: true });
110
+ writeFileSync(resolve(options.outputPath), `${JSON.stringify(report, null, 2)}\n`);
111
+ if (options.markdownPath) {
112
+ mkdirSync(dirname(resolve(options.markdownPath)), { recursive: true });
113
+ writeFileSync(resolve(options.markdownPath), markdown(report));
114
+ }
115
+ return report;
116
+ }
@@ -0,0 +1,223 @@
1
+ import { mkdtempSync, readFileSync, writeFileSync } from "node:fs";
2
+ import { tmpdir } from "node:os";
3
+ import { join } from "node:path";
4
+ import { describe, expect, it } from "vitest";
5
+ import { buildRouteEvent, type RouteEvent } from "./ledger.js";
6
+ import { generateSharpeningHints, writeSharpeningHints } from "./sharpening.js";
7
+ import type { RouterOutcome } from "./outcomes.js";
8
+ import type { RouteAction, RouteDecision, RouterCheckpoint, TaskStatus } from "./types.js";
9
+
10
+ function tempFile(name: string): string {
11
+ return join(mkdtempSync(join(tmpdir(), "pi-router-sharpening-")), name);
12
+ }
13
+
14
+ function checkpoint(id: string, model: string, provider: string, progressScore: number, loopScore: number): RouterCheckpoint {
15
+ return {
16
+ schema: "pi-router.checkpoint.v1",
17
+ sessionId: `session-${id.slice(0, 1)}`,
18
+ checkpointId: `session-${id.slice(0, 1)}:event-${id}`,
19
+ createdAt: "2026-06-14T00:00:00.000Z",
20
+ rawSessionRef: {
21
+ schema: "pi-router.raw-session-ref.v1",
22
+ path: "/tmp/raw-session-with-SECRET_TOKEN.jsonl",
23
+ fromEvent: 1,
24
+ toEvent: 2,
25
+ fromByte: 10,
26
+ toByte: 20,
27
+ contentHash: `hash-${id}`,
28
+ },
29
+ harness: "pi",
30
+ phase: "implementation",
31
+ activeModel: model,
32
+ provider,
33
+ features: {
34
+ turnIndex: 2,
35
+ sameCommandRepeatedCount: 1,
36
+ sameErrorRepeatedCount: 0,
37
+ errorChanged: true,
38
+ testsImproved: null,
39
+ filesTouched: 1,
40
+ diffLines: 12,
41
+ diffFilesChanged: 1,
42
+ diffLinesAdded: 8,
43
+ diffLinesDeleted: 4,
44
+ diffChurnScore: 0,
45
+ toolThrashScore: 0,
46
+ goalDriftScore: 0,
47
+ loopScore,
48
+ progressScore,
49
+ verifierUsed: true,
50
+ noVerifierUsed: false,
51
+ toolCallsLast10Turns: 3,
52
+ contextTokensApprox: 1000,
53
+ gitDirty: null,
54
+ },
55
+ recent: { touchedFileHashes: ["file-hash"] },
56
+ sourceEvent: { index: 2, byteStart: 10, byteEnd: 20, id: `event-${id}`, timestamp: "2026-06-14T00:00:01.000Z", type: "message", role: "toolResult" },
57
+ };
58
+ }
59
+
60
+ function decision(checkpointId: string, action: RouteAction): RouteDecision {
61
+ return {
62
+ schema: "pi-router.decision.v1",
63
+ checkpointId,
64
+ action,
65
+ adviceShape: "none",
66
+ contextPolicy: "none",
67
+ confidence: 0.75,
68
+ reason: "test decision",
69
+ policyVersion: "test-policy",
70
+ };
71
+ }
72
+
73
+ function event(id: string, action: RouteAction, model: string, provider: string, progressScore: number, loopScore: number): RouteEvent {
74
+ const item = checkpoint(id, model, provider, progressScore, loopScore);
75
+ return buildRouteEvent(item, decision(item.checkpointId, action), `2026-06-14T00:00:${id.padStart(2, "0")}.000Z`);
76
+ }
77
+
78
+ function outcomeFor(routeEvent: RouteEvent, status: TaskStatus): RouterOutcome {
79
+ return {
80
+ schema: "pi-router.outcome.v1",
81
+ outcomeId: `outcome-${routeEvent.eventId}`,
82
+ recordedAt: "2026-06-14T00:01:00.000Z",
83
+ sessionId: routeEvent.sessionId,
84
+ checkpointId: routeEvent.checkpointId,
85
+ routeEventId: routeEvent.eventId,
86
+ taskType: "implementation",
87
+ taskStatus: status,
88
+ testsPassedAfter: null,
89
+ verifierImproved: null,
90
+ acceptedDiff: null,
91
+ userInterrupted: false,
92
+ userOverrodeDecision: false,
93
+ finalFilesTouched: 1,
94
+ finalDiffLines: 12,
95
+ wallTimeMs: null,
96
+ cloudCostUsd: null,
97
+ frontierCalls: providerIsFrontier(routeEvent.runtime.activeModel) ? 1 : 0,
98
+ localTurns: providerIsFrontier(routeEvent.runtime.activeModel) ? 0 : 1,
99
+ reworkTurns: 0,
100
+ evidence: { source: "manual", routeEventId: routeEvent.eventId },
101
+ };
102
+ }
103
+
104
+ function providerIsFrontier(model?: string): boolean {
105
+ return Boolean(model && /gpt|claude|gemini/i.test(model));
106
+ }
107
+
108
+ describe("router sharpening hints", () => {
109
+ it("generates deterministic provenance-backed model preference hints without transcript content", () => {
110
+ const qwen = [
111
+ event("11", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.9, 0.05),
112
+ event("12", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.85, 0.1),
113
+ event("13", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.88, 0.08),
114
+ event("14", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.86, 0.12),
115
+ event("15", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.82, 0.1),
116
+ ];
117
+ const gpt = [
118
+ event("21", "run_verifier", "gpt-5.5", "openai-codex", 0.62, 0.35),
119
+ event("22", "run_verifier", "gpt-5.5", "openai-codex", 0.58, 0.4),
120
+ event("23", "run_verifier", "gpt-5.5", "openai-codex", 0.65, 0.32),
121
+ event("24", "run_verifier", "gpt-5.5", "openai-codex", 0.6, 0.38),
122
+ event("25", "run_verifier", "gpt-5.5", "openai-codex", 0.61, 0.37),
123
+ ];
124
+ const artifact = generateSharpeningHints({
125
+ events: [...gpt, ...qwen].reverse(),
126
+ outcomes: [...qwen.map((item) => outcomeFor(item, "success")), ...gpt.map((item) => outcomeFor(item, "partial"))],
127
+ generatedAt: "2026-06-14T00:02:00.000Z",
128
+ inputs: { events: "events.jsonl", outcomes: "outcomes.jsonl" },
129
+ });
130
+
131
+ expect(artifact).toMatchObject({
132
+ schema: "pi-router.sharpening-hints.v1",
133
+ generatedAt: "2026-06-14T00:02:00.000Z",
134
+ totals: { events: 10, outcomes: 10, sessions: 2, models: 2 },
135
+ learningPolicy: { scope: "repo-local", ignoresRawTranscript: true, fallback: "baseline-router" },
136
+ manualPromotionRequired: true,
137
+ });
138
+ const prefer = artifact.hints.find((hint) => hint.kind === "prefer_model_for_action");
139
+ expect(prefer).toMatchObject({
140
+ action: "run_verifier",
141
+ modelId: "qwen3.6-35b-a3b-128k",
142
+ provider: "local",
143
+ confidence: "medium",
144
+ guardrails: { manualPromotionOnly: true, sparse: true, autoUse: { eligible: false } },
145
+ });
146
+ expect(prefer?.provenance.comparedWith?.[0]).toMatchObject({ modelId: "gpt-5.5", provider: "openai-codex", events: 5 });
147
+ expect(JSON.stringify(artifact)).not.toContain("SECRET_TOKEN");
148
+ expect(JSON.stringify(artifact)).not.toContain("raw-session");
149
+ });
150
+
151
+ it("marks sparse hints low-confidence and sample-size capped", () => {
152
+ const local = [event("31", "continue_current", "qwen-local", "local", 0.9, 0.05)];
153
+ const cloud = [event("41", "continue_current", "gpt-5.5", "openai-codex", 0.4, 0.6)];
154
+
155
+ const artifact = generateSharpeningHints({ events: [...local, ...cloud], generatedAt: "2026-06-14T00:03:00.000Z" });
156
+ const hint = artifact.hints.find((item) => item.kind === "prefer_model_for_action");
157
+
158
+ expect(hint?.confidence).toBe("low");
159
+ expect(hint?.guardrails).toMatchObject({ sparse: true, sampleSizeCapped: true, manualPromotionOnly: true, autoUse: { eligible: false } });
160
+ });
161
+
162
+ it("requires cross-session outcome-backed evidence before future auto bias eligibility", () => {
163
+ const strongLocal = [
164
+ ...Array.from({ length: 10 }, (_, index) => event(`8${index}`, "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.9, 0.04)),
165
+ ...Array.from({ length: 10 }, (_, index) => event(`9${index}`, "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.88, 0.06)),
166
+ ];
167
+ const weakerCloud = [
168
+ ...Array.from({ length: 10 }, (_, index) => event(`a${index}`, "run_verifier", "gpt-5.5", "openai-codex", 0.58, 0.4)),
169
+ ...Array.from({ length: 10 }, (_, index) => event(`b${index}`, "run_verifier", "gpt-5.5", "openai-codex", 0.6, 0.38)),
170
+ ];
171
+
172
+ const artifact = generateSharpeningHints({
173
+ events: [...strongLocal, ...weakerCloud],
174
+ outcomes: [...strongLocal.map((item) => outcomeFor(item, "success")), ...weakerCloud.map((item) => outcomeFor(item, "partial"))],
175
+ generatedAt: "2026-06-14T00:03:30.000Z",
176
+ });
177
+ const hint = artifact.hints.find((item) => item.kind === "prefer_model_for_action" && item.modelId === "qwen3.6-35b-a3b-128k");
178
+
179
+ expect(hint?.confidence).toBe("high");
180
+ expect(hint?.guardrails).toMatchObject({ sparse: false, sampleSizeCapped: false, autoUse: { eligible: true } });
181
+ });
182
+
183
+ it("emits local savings candidates only as manual hints", () => {
184
+ const events = [
185
+ event("51", "continue_current", "qwen3.6-35b-a3b-128k", "local", 0.9, 0.05),
186
+ event("52", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.88, 0.08),
187
+ event("53", "summarize_context", "qwen3.6-35b-a3b-128k", "local", 0.85, 0.1),
188
+ ];
189
+
190
+ const artifact = generateSharpeningHints({ events, generatedAt: "2026-06-14T00:04:00.000Z" });
191
+ const savings = artifact.hints.find((hint) => hint.kind === "savings_candidate");
192
+
193
+ expect(savings).toMatchObject({ modelId: "qwen3.6-35b-a3b-128k", provider: "local", confidence: "low" });
194
+ expect(savings?.rationale).toContain("manual hint, not an automatic promotion");
195
+ });
196
+
197
+ it("suppresses savings candidates when linked outcomes are all poor", () => {
198
+ const events = [
199
+ event("71", "continue_current", "qwen3.6-35b-a3b-128k", "local", 0.95, 0.02),
200
+ event("72", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.93, 0.03),
201
+ event("73", "summarize_context", "qwen3.6-35b-a3b-128k", "local", 0.91, 0.04),
202
+ event("74", "continue_current", "qwen3.6-35b-a3b-128k", "local", 0.92, 0.03),
203
+ event("75", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.94, 0.02),
204
+ ];
205
+
206
+ const artifact = generateSharpeningHints({ events, outcomes: events.map((item) => outcomeFor(item, "failed")), generatedAt: "2026-06-14T00:04:30.000Z" });
207
+
208
+ expect(artifact.hints.find((hint) => hint.kind === "savings_candidate")).toBeUndefined();
209
+ });
210
+
211
+ it("writes sharpening hints and fails clearly for missing event inputs", () => {
212
+ const eventPath = tempFile("events.jsonl");
213
+ const outputPath = tempFile("hints.json");
214
+ const item = event("61", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.9, 0.05);
215
+ writeFileSync(eventPath, `${JSON.stringify(item)}\n`);
216
+
217
+ const artifact = writeSharpeningHints({ eventsPath: eventPath, outputPath, generatedAt: "2026-06-14T00:05:00.000Z" });
218
+
219
+ expect(artifact.schema).toBe("pi-router.sharpening-hints.v1");
220
+ expect(JSON.parse(readFileSync(outputPath, "utf8")).schema).toBe("pi-router.sharpening-hints.v1");
221
+ expect(() => writeSharpeningHints({ eventsPath: "/tmp/pi-router-missing-events.jsonl", outputPath })).toThrow(/required route events file not found/);
222
+ });
223
+ });