@fiale-plus/pi-rogue 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -1
- package/node_modules/@fiale-plus/pi-rogue-advisor/README.md +1 -0
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/binary-gate-features.test.ts +8 -0
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/binary-gate-features.ts +7 -0
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/router.test.ts +26 -0
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/router.ts +10 -1
- package/node_modules/@fiale-plus/pi-rogue-orchestration/README.md +3 -3
- package/node_modules/@fiale-plus/pi-rogue-orchestration/package.json +3 -0
- package/node_modules/@fiale-plus/pi-rogue-orchestration/skills/orchestration/SKILL.md +3 -2
- package/node_modules/@fiale-plus/pi-rogue-orchestration/src/goal.test.ts +65 -2
- package/node_modules/@fiale-plus/pi-rogue-orchestration/src/goal.ts +84 -4
- package/node_modules/@fiale-plus/pi-rogue-orchestration/src/loop.ts +3 -0
- package/node_modules/@fiale-plus/pi-rogue-orchestration/src/novelty-guard.test.ts +43 -0
- package/node_modules/@fiale-plus/pi-rogue-orchestration/src/novelty-guard.ts +96 -11
- package/node_modules/@fiale-plus/pi-rogue-router/README.md +45 -6
- package/node_modules/@fiale-plus/pi-rogue-router/src/binary-gate.test.ts +88 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/binary-gate.ts +232 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/cli.ts +123 -9
- package/node_modules/@fiale-plus/pi-rogue-router/src/completions.ts +39 -16
- package/node_modules/@fiale-plus/pi-rogue-router/src/config-extension.test.ts +111 -4
- package/node_modules/@fiale-plus/pi-rogue-router/src/config.ts +17 -2
- package/node_modules/@fiale-plus/pi-rogue-router/src/extension.ts +67 -7
- package/node_modules/@fiale-plus/pi-rogue-router/src/index.ts +4 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/observe.ts +76 -5
- package/node_modules/@fiale-plus/pi-rogue-router/src/outcomes.ts +130 -6
- package/node_modules/@fiale-plus/pi-rogue-router/src/reports.test.ts +92 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/reports.ts +116 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/sharpening.test.ts +223 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/sharpening.ts +344 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/teacher-runner.test.ts +126 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/teacher-runner.ts +238 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/v1-telemetry.test.ts +54 -1
- package/package.json +1 -1
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
1
2
|
import { appendRouteEvent, buildRouteEvent } from "./ledger.js";
|
|
2
3
|
import { decideRoute } from "./decision.js";
|
|
3
4
|
import { checkpointWithDiffStats, streamCheckpointsFromSessionPath } from "./checkpoints.js";
|
|
@@ -21,12 +22,20 @@ export interface RouterObserveSummary {
|
|
|
21
22
|
role: keyof RouterProfile | "none" | "current";
|
|
22
23
|
targetModel?: string;
|
|
23
24
|
currentModel?: string;
|
|
25
|
+
currentProvider?: string;
|
|
24
26
|
match: boolean | null;
|
|
25
27
|
confidence: number;
|
|
26
28
|
reason: string;
|
|
27
29
|
text: string;
|
|
28
30
|
}
|
|
29
31
|
|
|
32
|
+
export interface RouterModelApplySummary {
|
|
33
|
+
applied: boolean;
|
|
34
|
+
reason: string;
|
|
35
|
+
fromModel?: string;
|
|
36
|
+
toModel?: string;
|
|
37
|
+
}
|
|
38
|
+
|
|
30
39
|
function squish(text: unknown, max = 140): string {
|
|
31
40
|
const value = String(text ?? "").replace(/\s+/g, " ").trim();
|
|
32
41
|
return value.length <= max ? value : `${value.slice(0, max - 1).trimEnd()}…`;
|
|
@@ -53,10 +62,22 @@ function modelLeaf(model: string): string {
|
|
|
53
62
|
return model.split("/").at(-1)?.toLowerCase() ?? model.toLowerCase();
|
|
54
63
|
}
|
|
55
64
|
|
|
56
|
-
export function modelsMatch(current: string | undefined, target: string | undefined): boolean | null {
|
|
65
|
+
export function modelsMatch(current: string | undefined, target: string | undefined, currentProvider?: string): boolean | null {
|
|
57
66
|
if (!current || !target) return null;
|
|
58
67
|
const c = current.toLowerCase();
|
|
59
68
|
const t = target.toLowerCase();
|
|
69
|
+
const provider = currentProvider?.toLowerCase();
|
|
70
|
+
const [targetProvider, ...targetModelParts] = t.split("/");
|
|
71
|
+
if (targetModelParts.length > 0) {
|
|
72
|
+
const targetModel = targetModelParts.join("/");
|
|
73
|
+
if (provider) {
|
|
74
|
+
const currentModel = c.startsWith(`${provider}/`) ? c.slice(provider.length + 1) : c;
|
|
75
|
+
if (provider === targetProvider) return currentModel === targetModel;
|
|
76
|
+
return currentModel === t;
|
|
77
|
+
}
|
|
78
|
+
if (c.includes("/")) return c === t;
|
|
79
|
+
return false;
|
|
80
|
+
}
|
|
60
81
|
return c === t || modelLeaf(c) === modelLeaf(t) || c.endsWith(`/${modelLeaf(t)}`) || t.endsWith(`/${modelLeaf(c)}`);
|
|
61
82
|
}
|
|
62
83
|
|
|
@@ -70,7 +91,7 @@ export function summarizeRouterDecision(checkpoint: RouterCheckpoint, decision:
|
|
|
70
91
|
const profile = activeProfile(config);
|
|
71
92
|
const role = actionRole(decision.action);
|
|
72
93
|
const targetModel = targetForRole(role, profile, checkpoint.activeModel);
|
|
73
|
-
const match = role === "none" ? null : modelsMatch(checkpoint.activeModel, targetModel);
|
|
94
|
+
const match = role === "none" ? null : modelsMatch(checkpoint.activeModel, targetModel, checkpoint.provider);
|
|
74
95
|
const verdict = match === null ? "INFO" : match ? "MATCH" : "MISMATCH";
|
|
75
96
|
const roleText = role === "none" ? "no-model" : role;
|
|
76
97
|
const targetText = targetModel ? `${roleText}(${targetModel})` : roleText;
|
|
@@ -81,6 +102,7 @@ export function summarizeRouterDecision(checkpoint: RouterCheckpoint, decision:
|
|
|
81
102
|
role,
|
|
82
103
|
targetModel,
|
|
83
104
|
currentModel: checkpoint.activeModel,
|
|
105
|
+
currentProvider: checkpoint.provider,
|
|
84
106
|
match,
|
|
85
107
|
confidence: decision.confidence,
|
|
86
108
|
reason: decision.reason,
|
|
@@ -94,9 +116,51 @@ export async function latestCheckpointFromSession(sessionPath: string): Promise<
|
|
|
94
116
|
return latest;
|
|
95
117
|
}
|
|
96
118
|
|
|
97
|
-
|
|
119
|
+
function findConfiguredModel(ctx: any, target: string, currentProvider?: string): { model: any; matchedBy: "qualified" | "id" } | undefined {
|
|
120
|
+
const all = ctx?.modelRegistry?.getAll?.() ?? [];
|
|
121
|
+
const observedProvider = currentProvider?.toLowerCase();
|
|
122
|
+
const byCurrentProviderId = observedProvider ? all.find((model: any) => model.id === target && String(model.provider).toLowerCase() === observedProvider) : undefined;
|
|
123
|
+
if (byCurrentProviderId) return { model: byCurrentProviderId, matchedBy: "id" };
|
|
124
|
+
const [provider, ...modelParts] = target.split("/");
|
|
125
|
+
if (modelParts.length > 0) {
|
|
126
|
+
const found = ctx?.modelRegistry?.find?.(provider, modelParts.join("/"));
|
|
127
|
+
if (found) return { model: found, matchedBy: "qualified" };
|
|
128
|
+
const byQualified = all.find((model: any) => `${model.provider}/${model.id}` === target);
|
|
129
|
+
if (byQualified) return { model: byQualified, matchedBy: "qualified" };
|
|
130
|
+
}
|
|
131
|
+
const byId = all.filter((model: any) => model.id === target);
|
|
132
|
+
return byId.length === 1 ? { model: byId[0], matchedBy: "id" } : undefined;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function configuredModelMatches(current: string | undefined, currentProvider: string | undefined, resolved: { model: any; matchedBy: "qualified" | "id" }): boolean {
|
|
136
|
+
const model = resolved.model;
|
|
137
|
+
if (!current || !model?.provider || !model?.id) return false;
|
|
138
|
+
const c = current.toLowerCase();
|
|
139
|
+
const provider = String(model.provider).toLowerCase();
|
|
140
|
+
const id = String(model.id).toLowerCase();
|
|
141
|
+
const observedProvider = currentProvider?.toLowerCase();
|
|
142
|
+
if (observedProvider) {
|
|
143
|
+
const currentModel = c.startsWith(`${observedProvider}/`) ? c.slice(observedProvider.length + 1) : c;
|
|
144
|
+
return observedProvider === provider && currentModel === id;
|
|
145
|
+
}
|
|
146
|
+
return c === `${provider}/${id}` || (resolved.matchedBy === "id" && c === id);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
export async function applyModelRouting(pi: Pick<ExtensionAPI, "setModel"> | undefined, ctx: any, summary: RouterObserveSummary): Promise<RouterModelApplySummary> {
|
|
150
|
+
if (!summary.targetModel || summary.role === "none" || summary.role === "current") return { applied: false, reason: "no model switch for route action" };
|
|
151
|
+
const resolved = findConfiguredModel(ctx, summary.targetModel, summary.currentProvider);
|
|
152
|
+
if (resolved?.matchedBy === "id" && modelsMatch(summary.currentModel, summary.targetModel, summary.currentProvider)) return { applied: false, reason: "current model already matches target", fromModel: summary.currentModel, toModel: summary.targetModel };
|
|
153
|
+
if (resolved && configuredModelMatches(summary.currentModel, summary.currentProvider, resolved)) return { applied: false, reason: "current model already matches target", fromModel: summary.currentModel, toModel: summary.targetModel };
|
|
154
|
+
if (!resolved && modelsMatch(summary.currentModel, summary.targetModel, summary.currentProvider)) return { applied: false, reason: "current model already matches target", fromModel: summary.currentModel, toModel: summary.targetModel };
|
|
155
|
+
if (!resolved) return { applied: false, reason: `target model not configured: ${summary.targetModel}`, fromModel: summary.currentModel, toModel: summary.targetModel };
|
|
156
|
+
const success = await pi?.setModel?.(resolved.model);
|
|
157
|
+
if (!success) return { applied: false, reason: `target model unavailable or missing auth: ${summary.targetModel}`, fromModel: summary.currentModel, toModel: summary.targetModel };
|
|
158
|
+
return { applied: true, reason: summary.reason, fromModel: summary.currentModel, toModel: summary.targetModel };
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
export async function observeRouterTurn(ctx: any, pi?: Pick<ExtensionAPI, "setModel">): Promise<RouterObserveSummary | null> {
|
|
98
162
|
const config = loadRouterConfig(ctx);
|
|
99
|
-
if (!config.enabled || config.print === "off") return null;
|
|
163
|
+
if (!config.enabled || (config.print === "off" && config.mode === "observe")) return null;
|
|
100
164
|
const sessionPath = ctx?.sessionManager?.getSessionFile?.();
|
|
101
165
|
if (!sessionPath) return null;
|
|
102
166
|
const checkpoint = await latestCheckpointFromSession(String(sessionPath));
|
|
@@ -120,7 +184,14 @@ export async function observeRouterTurn(ctx: any): Promise<RouterObserveSummary
|
|
|
120
184
|
lastSummary: summary.text,
|
|
121
185
|
}, String(sessionPath));
|
|
122
186
|
|
|
187
|
+
if (config.mode === "auto_model") {
|
|
188
|
+
const applied = await applyModelRouting(pi, ctx, summary);
|
|
189
|
+
if (applied.applied || summary.match === false) {
|
|
190
|
+
ctx.ui?.notify?.(`router auto-model: ${applied.applied ? "APPLIED" : "SKIPPED"} ${applied.fromModel ?? "unknown"} → ${applied.toModel ?? "none"} · ${applied.reason}`, applied.applied ? "info" : "warning");
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
123
194
|
if (config.print === "mismatch_only" && summary.match !== false) return summary;
|
|
124
|
-
ctx.ui?.notify?.(summary.text, summary.match === false ? "warning" : "info");
|
|
195
|
+
if (config.print !== "off") ctx.ui?.notify?.(summary.text, summary.match === false ? "warning" : "info");
|
|
125
196
|
return summary;
|
|
126
197
|
}
|
|
@@ -6,6 +6,7 @@ import { readRouteEvents, type RouteEvent } from "./ledger.js";
|
|
|
6
6
|
import type { RouterCheckpoint, TaskStatus, TaskType } from "./types.js";
|
|
7
7
|
|
|
8
8
|
export const ROUTER_OUTCOME_SCHEMA = "pi-router.outcome.v1" as const;
|
|
9
|
+
export const ROUTER_OUTCOME_ENRICH_SUMMARY_SCHEMA = "pi-router.outcome-enrich-summary.v1" as const;
|
|
9
10
|
|
|
10
11
|
export interface RouterOutcome {
|
|
11
12
|
schema: typeof ROUTER_OUTCOME_SCHEMA;
|
|
@@ -43,9 +44,15 @@ export interface OutcomeWriteSummary {
|
|
|
43
44
|
inferred: number;
|
|
44
45
|
}
|
|
45
46
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
47
|
+
export interface OutcomeEnrichSummary {
|
|
48
|
+
schema: typeof ROUTER_OUTCOME_ENRICH_SUMMARY_SCHEMA;
|
|
49
|
+
output: string;
|
|
50
|
+
inputOutcomes: number;
|
|
51
|
+
outputOutcomes: number;
|
|
52
|
+
enriched: number;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function roundStatus(_event: RouteEvent, _checkpoint?: RouterCheckpoint): TaskStatus {
|
|
49
56
|
return "unknown";
|
|
50
57
|
}
|
|
51
58
|
|
|
@@ -78,7 +85,7 @@ export function buildUnknownOutcome(event: RouteEvent, checkpoint?: RouterCheckp
|
|
|
78
85
|
testsPassedAfter: null,
|
|
79
86
|
verifierImproved: null,
|
|
80
87
|
acceptedDiff: null,
|
|
81
|
-
userInterrupted:
|
|
88
|
+
userInterrupted: false,
|
|
82
89
|
userOverrodeDecision: Boolean(event.observed.overriddenBy),
|
|
83
90
|
finalFilesTouched: checkpoint ? ((checkpoint.features.diffFilesChanged ?? 0) > 0 ? (checkpoint.features.diffFilesChanged ?? 0) : checkpoint.features.filesTouched) : 0,
|
|
84
91
|
finalDiffLines: checkpoint?.features.diffLines ?? 0,
|
|
@@ -107,8 +114,14 @@ export function readOutcomes(path?: string): RouterOutcome[] {
|
|
|
107
114
|
return readFileSync(resolved, "utf8")
|
|
108
115
|
.split("\n")
|
|
109
116
|
.filter((line) => line.trim())
|
|
110
|
-
.
|
|
111
|
-
try {
|
|
117
|
+
.map((line, index) => {
|
|
118
|
+
try {
|
|
119
|
+
const outcome = JSON.parse(line) as RouterOutcome;
|
|
120
|
+
if (outcome.schema !== ROUTER_OUTCOME_SCHEMA) throw new Error("invalid schema");
|
|
121
|
+
return outcome;
|
|
122
|
+
} catch (error) {
|
|
123
|
+
throw new Error(`invalid outcome JSONL at ${path}:${index + 1}: ${error instanceof Error ? error.message : String(error)}`);
|
|
124
|
+
}
|
|
112
125
|
});
|
|
113
126
|
}
|
|
114
127
|
|
|
@@ -118,6 +131,94 @@ export function writeOutcomesJsonl(outcomes: RouterOutcome[], path: string): voi
|
|
|
118
131
|
writeFileSync(resolved, outcomes.map((outcome) => JSON.stringify(outcome)).join("\n") + (outcomes.length ? "\n" : ""));
|
|
119
132
|
}
|
|
120
133
|
|
|
134
|
+
function routeEventForOutcome(outcome: RouterOutcome, byId: Map<string, RouteEvent>, byCheckpoint: Map<string, RouteEvent>): RouteEvent | undefined {
|
|
135
|
+
return (outcome.routeEventId ? byId.get(outcome.routeEventId) : undefined) ?? (outcome.checkpointId ? byCheckpoint.get(outcome.checkpointId) : undefined);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function checkpointForOutcome(outcome: RouterOutcome, event: RouteEvent | undefined, byCheckpoint: Map<string, RouterCheckpoint>): RouterCheckpoint | undefined {
|
|
139
|
+
return (outcome.checkpointId ? byCheckpoint.get(outcome.checkpointId) : undefined) ?? (event ? byCheckpoint.get(event.checkpointId) : undefined);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
function inferredStatus(outcome: RouterOutcome, checkpoint?: RouterCheckpoint, event?: RouteEvent, testsPassed: boolean | null = outcome.testsPassedAfter): TaskStatus {
|
|
143
|
+
const stopWasFollowed = event?.decision.action === "stop_and_ask_user" && event.observed.followed === true && !event.observed.overriddenBy;
|
|
144
|
+
if (stopWasFollowed || outcome.userInterrupted) return outcome.taskStatus === "unknown" ? "abandoned" : outcome.taskStatus;
|
|
145
|
+
if (testsPassed === true && Math.max(outcome.finalDiffLines, checkpoint?.features.diffLines ?? 0, event?.metrics.diffLines ?? 0) > 0) return "success";
|
|
146
|
+
if (testsPassed === true && outcome.taskStatus === "unknown") return "partial";
|
|
147
|
+
if (testsPassed === false && outcome.taskStatus === "unknown") return "failed";
|
|
148
|
+
if (outcome.taskStatus === "partial" && testsPassed === true && Math.max(outcome.finalDiffLines, checkpoint?.features.diffLines ?? 0, event?.metrics.diffLines ?? 0) > 0) return "success";
|
|
149
|
+
return outcome.taskStatus;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
export function enrichOutcome(outcome: RouterOutcome, options: { checkpoint?: RouterCheckpoint; event?: RouteEvent; recordedAt?: string } = {}): RouterOutcome {
|
|
153
|
+
const checkpoint = options.checkpoint;
|
|
154
|
+
const event = options.event;
|
|
155
|
+
const testsPassedAfter = outcome.testsPassedAfter;
|
|
156
|
+
const verifierImproved = outcome.verifierImproved
|
|
157
|
+
?? (checkpoint?.features.testsImproved !== null && checkpoint?.features.testsImproved !== undefined ? checkpoint.features.testsImproved : null);
|
|
158
|
+
const taskStatus = inferredStatus(outcome, checkpoint, event, testsPassedAfter);
|
|
159
|
+
const evidenceDiffLines = checkpoint?.features.diffLines ?? event?.metrics.diffLines ?? 0;
|
|
160
|
+
const evidenceFilesTouched = checkpoint
|
|
161
|
+
? ((checkpoint.features.diffFilesChanged ?? 0) > 0 ? checkpoint.features.diffFilesChanged : checkpoint.features.filesTouched)
|
|
162
|
+
: event?.metrics.diffFilesChanged ?? 0;
|
|
163
|
+
const evidenceErrorRepeats = checkpoint?.features.sameErrorRepeatedCount ?? event?.metrics.sameErrorRepeatedCount ?? 0;
|
|
164
|
+
const finalDiffLines = Math.max(outcome.finalDiffLines, evidenceDiffLines);
|
|
165
|
+
const finalFilesTouched = Math.max(outcome.finalFilesTouched, evidenceFilesTouched);
|
|
166
|
+
const reworkTurns = Math.max(outcome.reworkTurns, evidenceErrorRepeats > 1 ? evidenceErrorRepeats - 1 : 0);
|
|
167
|
+
const acceptedDiff = outcome.acceptedDiff
|
|
168
|
+
?? (finalDiffLines > 0 && testsPassedAfter === true ? true : testsPassedAfter === false || taskStatus === "abandoned" ? false : null);
|
|
169
|
+
const notes = JSON.stringify({ enrichedFromCheckpoint: checkpoint?.checkpointId, routeEventId: event?.eventId, taskStatus, testsPassedAfter, verifierImproved, acceptedDiff });
|
|
170
|
+
|
|
171
|
+
return {
|
|
172
|
+
...outcome,
|
|
173
|
+
recordedAt: options.recordedAt ?? outcome.recordedAt,
|
|
174
|
+
checkpointId: outcome.checkpointId ?? event?.checkpointId,
|
|
175
|
+
routeEventId: outcome.routeEventId ?? event?.eventId,
|
|
176
|
+
taskType: outcome.taskType === "unknown" ? taskTypeFromCheckpoint(checkpoint) : outcome.taskType,
|
|
177
|
+
taskStatus,
|
|
178
|
+
testsPassedAfter,
|
|
179
|
+
verifierImproved,
|
|
180
|
+
acceptedDiff,
|
|
181
|
+
userInterrupted: outcome.userInterrupted || Boolean(event?.decision.action === "stop_and_ask_user" && event.observed.followed === true && !event.observed.overriddenBy),
|
|
182
|
+
userOverrodeDecision: outcome.userOverrodeDecision || Boolean(event?.observed.overriddenBy),
|
|
183
|
+
finalFilesTouched,
|
|
184
|
+
finalDiffLines,
|
|
185
|
+
reworkTurns,
|
|
186
|
+
evidence: {
|
|
187
|
+
...outcome.evidence,
|
|
188
|
+
rawSessionRef: outcome.evidence.rawSessionRef ?? checkpoint?.rawSessionRef ?? event?.rawSessionRef,
|
|
189
|
+
routeEventId: outcome.evidence.routeEventId ?? event?.eventId,
|
|
190
|
+
notesHash: outcome.evidence.notesHash ?? hashText(notes),
|
|
191
|
+
},
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
function validateOutcomeLinks(outcomes: RouterOutcome[], checkpoints: RouterCheckpoint[], events: RouteEvent[]): void {
|
|
196
|
+
const checkpointIds = new Set(checkpoints.map((checkpoint) => checkpoint.checkpointId));
|
|
197
|
+
const eventIds = new Set(events.map((event) => event.eventId));
|
|
198
|
+
const eventById = new Map(events.map((event) => [event.eventId, event]));
|
|
199
|
+
const eventCheckpointIds = new Set(events.map((event) => event.checkpointId));
|
|
200
|
+
for (const outcome of outcomes) {
|
|
201
|
+
if (events.length > 0 && outcome.routeEventId && !eventIds.has(outcome.routeEventId)) throw new Error(`outcome routeEventId not found: ${outcome.routeEventId}`);
|
|
202
|
+
if (outcome.routeEventId && outcome.checkpointId) {
|
|
203
|
+
const event = eventById.get(outcome.routeEventId);
|
|
204
|
+
if (event && event.checkpointId !== outcome.checkpointId) throw new Error(`outcome routeEventId/checkpointId mismatch: ${outcome.routeEventId}`);
|
|
205
|
+
}
|
|
206
|
+
if ((checkpoints.length > 0 || events.length > 0) && outcome.checkpointId && !checkpointIds.has(outcome.checkpointId) && !eventCheckpointIds.has(outcome.checkpointId)) throw new Error(`outcome checkpointId not found: ${outcome.checkpointId}`);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
export function enrichOutcomes(outcomes: RouterOutcome[], checkpoints: RouterCheckpoint[] = [], events: RouteEvent[] = [], recordedAt?: string): RouterOutcome[] {
|
|
211
|
+
validateOutcomeLinks(outcomes, checkpoints, events);
|
|
212
|
+
const checkpointById = new Map(checkpoints.map((checkpoint) => [checkpoint.checkpointId, checkpoint]));
|
|
213
|
+
const eventById = new Map(events.map((event) => [event.eventId, event]));
|
|
214
|
+
const eventByCheckpoint = new Map(events.map((event) => [event.checkpointId, event]));
|
|
215
|
+
return outcomes.map((outcome) => {
|
|
216
|
+
const event = routeEventForOutcome(outcome, eventById, eventByCheckpoint);
|
|
217
|
+
const checkpoint = checkpointForOutcome(outcome, event, checkpointById);
|
|
218
|
+
return enrichOutcome(outcome, { checkpoint, event, recordedAt });
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
|
|
121
222
|
export function writeInferredOutcomes(options: { checkpointPath: string; eventsPath: string; outputPath: string }): OutcomeWriteSummary {
|
|
122
223
|
if (!existsSync(resolve(options.eventsPath))) throw new Error(`required route events file not found: ${options.eventsPath}`);
|
|
123
224
|
const checkpoints = readCheckpointJsonl(options.checkpointPath);
|
|
@@ -126,3 +227,26 @@ export function writeInferredOutcomes(options: { checkpointPath: string; eventsP
|
|
|
126
227
|
writeOutcomesJsonl(outcomes, options.outputPath);
|
|
127
228
|
return { schema: "pi-router.outcomes-summary.v1", output: resolve(options.outputPath), outcomes: outcomes.length, inferred: outcomes.length };
|
|
128
229
|
}
|
|
230
|
+
|
|
231
|
+
export function writeEnrichedOutcomes(options: { outcomesPath: string; outputPath: string; checkpointPath?: string; eventsPath?: string }): OutcomeEnrichSummary {
|
|
232
|
+
if (!options.checkpointPath && !options.eventsPath) throw new Error("outcome enrichment requires --checkpoint-file or --events evidence");
|
|
233
|
+
if (options.eventsPath && !existsSync(resolve(options.eventsPath))) throw new Error(`route events file not found: ${options.eventsPath}`);
|
|
234
|
+
if (options.checkpointPath && !existsSync(resolve(options.checkpointPath))) throw new Error(`checkpoint file not found: ${options.checkpointPath}`);
|
|
235
|
+
const input = readOutcomes(options.outcomesPath);
|
|
236
|
+
const checkpoints = options.checkpointPath ? readCheckpointJsonl(options.checkpointPath) : [];
|
|
237
|
+
const events = options.eventsPath ? readRouteEvents(options.eventsPath) : [];
|
|
238
|
+
if (checkpoints.length === 0 && events.length === 0) {
|
|
239
|
+
if (options.checkpointPath && !options.eventsPath) throw new Error(`checkpoint file contains no checkpoints: ${options.checkpointPath}`);
|
|
240
|
+
if (options.eventsPath && !options.checkpointPath) throw new Error(`route events file contains no events: ${options.eventsPath}`);
|
|
241
|
+
throw new Error("outcome enrichment evidence files contain no usable checkpoint or route events");
|
|
242
|
+
}
|
|
243
|
+
const enriched = enrichOutcomes(input, checkpoints, events);
|
|
244
|
+
writeOutcomesJsonl(enriched, options.outputPath);
|
|
245
|
+
return {
|
|
246
|
+
schema: ROUTER_OUTCOME_ENRICH_SUMMARY_SCHEMA,
|
|
247
|
+
output: resolve(options.outputPath),
|
|
248
|
+
inputOutcomes: input.length,
|
|
249
|
+
outputOutcomes: enriched.length,
|
|
250
|
+
enriched: enriched.filter((outcome, index) => JSON.stringify(outcome) !== JSON.stringify(input[index])).length,
|
|
251
|
+
};
|
|
252
|
+
}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import { mkdtempSync, readFileSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { tmpdir } from "node:os";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { describe, expect, it } from "vitest";
|
|
5
|
+
import { writeRouterReport } from "./reports.js";
|
|
6
|
+
|
|
7
|
+
function tempFile(name: string): string {
|
|
8
|
+
return join(mkdtempSync(join(tmpdir(), "pi-router-report-")), name);
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
const rawRef = { schema: "pi-router.raw-session-ref.v1", path: "/tmp/session.jsonl", fromEvent: 0, toEvent: 1, fromByte: 0, toByte: 1, contentHash: "hash" };
|
|
12
|
+
|
|
13
|
+
function event(action = "run_verifier") {
|
|
14
|
+
return {
|
|
15
|
+
schema: "pi-router.route-event.v1",
|
|
16
|
+
eventId: "event-1",
|
|
17
|
+
recordedAt: "2026-06-14T00:00:00.000Z",
|
|
18
|
+
checkpointId: "checkpoint-1",
|
|
19
|
+
sessionId: "session-1",
|
|
20
|
+
rawSessionRef: rawRef,
|
|
21
|
+
sourceEvent: { index: 0, timestamp: null },
|
|
22
|
+
decision: { schema: "pi-router.route-decision.v1", decisionId: "decision-1", checkpointId: "checkpoint-1", action, reason: "test", confidence: 0.5, policyVersion: "test", alternatives: [] },
|
|
23
|
+
runtime: { activeModel: "qwen", provider: "local", contextTokensApprox: 1000, gitDirty: true },
|
|
24
|
+
observed: { followed: false, overriddenBy: "continue_current" },
|
|
25
|
+
metrics: { loopScore: 0.2, progressScore: 0.8, sameCommandRepeatedCount: 1, sameErrorRepeatedCount: 0, verifierUsed: true, diffLines: 10, diffFilesChanged: 1 },
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function outcome() {
|
|
30
|
+
return {
|
|
31
|
+
schema: "pi-router.outcome.v1",
|
|
32
|
+
outcomeId: "outcome-1",
|
|
33
|
+
recordedAt: "2026-06-14T00:00:00.000Z",
|
|
34
|
+
sessionId: "session-1",
|
|
35
|
+
checkpointId: "checkpoint-1",
|
|
36
|
+
routeEventId: "event-1",
|
|
37
|
+
taskType: "implementation",
|
|
38
|
+
taskStatus: "success",
|
|
39
|
+
testsPassedAfter: true,
|
|
40
|
+
verifierImproved: true,
|
|
41
|
+
acceptedDiff: true,
|
|
42
|
+
userInterrupted: false,
|
|
43
|
+
userOverrodeDecision: true,
|
|
44
|
+
finalFilesTouched: 1,
|
|
45
|
+
finalDiffLines: 10,
|
|
46
|
+
wallTimeMs: null,
|
|
47
|
+
cloudCostUsd: null,
|
|
48
|
+
frontierCalls: 0,
|
|
49
|
+
localTurns: 2,
|
|
50
|
+
reworkTurns: 0,
|
|
51
|
+
evidence: { source: "manual", rawSessionRef: rawRef, routeEventId: "event-1", notesHash: "notes" },
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function trainingRow(label: "continue" | "intervene" | "unknown") {
|
|
56
|
+
return {
|
|
57
|
+
schema: "pi-router.training-row.v1",
|
|
58
|
+
checkpointId: `checkpoint-${label}`,
|
|
59
|
+
sessionId: "session-1",
|
|
60
|
+
rawSessionRef: rawRef,
|
|
61
|
+
features: { phase: "implementation", activeModel: "qwen", provider: "local", contextTokensApprox: 1000, sameCommandRepeatedCount: 1, sameErrorRepeatedCount: 0, loopScore: 0.1, progressScore: 0.9, verifierUsed: true, noVerifierUsed: false, diffLines: 10, diffFilesChanged: 1, diffChurnScore: 0.01, filesTouched: 1 },
|
|
62
|
+
labels: { routeAction: label === "unknown" ? null : "continue_current", binaryGate: label, source: label === "unknown" ? "unknown" : "teacher", confidence: label === "unknown" ? null : 0.8 },
|
|
63
|
+
outcome: { taskStatus: "unknown", testsPassedAfter: null, acceptedDiff: null, userOverrodeDecision: null, reworkTurns: null },
|
|
64
|
+
provenance: { localRuleAction: "continue_current", excludedLocalRuleAsTruth: label === "unknown" },
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
describe("router report", () => {
|
|
69
|
+
it("writes JSON and Markdown summaries", () => {
|
|
70
|
+
const eventsPath = tempFile("events.jsonl");
|
|
71
|
+
const outcomesPath = tempFile("outcomes.jsonl");
|
|
72
|
+
const rowsPath = tempFile("training.jsonl");
|
|
73
|
+
const gatePath = tempFile("gate-report.json");
|
|
74
|
+
const outputPath = tempFile("report.json");
|
|
75
|
+
const markdownPath = tempFile("report.md");
|
|
76
|
+
writeFileSync(eventsPath, `${JSON.stringify(event())}\n`);
|
|
77
|
+
writeFileSync(outcomesPath, `${JSON.stringify(outcome())}\n`);
|
|
78
|
+
writeFileSync(rowsPath, [JSON.stringify(trainingRow("continue")), JSON.stringify(trainingRow("unknown"))].join("\n") + "\n");
|
|
79
|
+
writeFileSync(gatePath, JSON.stringify({ schema: "pi-router.binary-gate-eval.v1", candidate: { accuracy: 0.8, f1: 0.7 }, ruleBaseline: { accuracy: 0.6, f1: 0.5 } }));
|
|
80
|
+
|
|
81
|
+
const report = writeRouterReport({ eventsPath, outcomesPath, trainingRowsPath: rowsPath, gateReportPath: gatePath, outputPath, markdownPath });
|
|
82
|
+
|
|
83
|
+
expect(report).toMatchObject({ schema: "pi-router.report.v1", routeEvents: { total: 1, mismatches: 1 }, outcomes: { total: 1, linked: 1 }, trainingRows: { total: 2, labeled: 1, localRuleExcluded: 1 } });
|
|
84
|
+
expect(JSON.parse(readFileSync(outputPath, "utf8")).schema).toBe("pi-router.report.v1");
|
|
85
|
+
expect(readFileSync(markdownPath, "utf8")).toContain("# Pi router report");
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
it("requires at least one report input and rejects missing provided inputs", () => {
|
|
89
|
+
expect(() => writeRouterReport({ outputPath: tempFile("report.json") })).toThrow(/requires at least one input/);
|
|
90
|
+
expect(() => writeRouterReport({ eventsPath: tempFile("missing-events.jsonl"), outputPath: tempFile("report.json") })).toThrow(/report input file not found/);
|
|
91
|
+
});
|
|
92
|
+
});
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { dirname, resolve } from "node:path";
|
|
3
|
+
import { readTrainingRows } from "./binary-gate.js";
|
|
4
|
+
import { readRouteEvents, type RouteEvent } from "./ledger.js";
|
|
5
|
+
import { readOutcomes, type RouterOutcome } from "./outcomes.js";
|
|
6
|
+
|
|
7
|
+
export const ROUTER_REPORT_SCHEMA = "pi-router.report.v1" as const;
|
|
8
|
+
|
|
9
|
+
export interface RouterReport {
|
|
10
|
+
schema: typeof ROUTER_REPORT_SCHEMA;
|
|
11
|
+
generatedAt: string;
|
|
12
|
+
inputs: { events?: string; outcomes?: string; gateReport?: string; trainingRows?: string };
|
|
13
|
+
routeEvents: { total: number; byAction: Record<string, number>; byModel: Record<string, number>; mismatches: number };
|
|
14
|
+
outcomes: { total: number; byStatus: Record<string, number>; linked: number; missingEvidence: number };
|
|
15
|
+
trainingRows: { total: number; labeled: number; unlabeled: number; localRuleExcluded: number; byGate: Record<string, number> };
|
|
16
|
+
gate?: unknown;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function increment(map: Record<string, number>, key: string): void {
|
|
20
|
+
map[key] = (map[key] ?? 0) + 1;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function routeSummary(events: RouteEvent[]): RouterReport["routeEvents"] {
|
|
24
|
+
const byAction: Record<string, number> = {};
|
|
25
|
+
const byModel: Record<string, number> = {};
|
|
26
|
+
let mismatches = 0;
|
|
27
|
+
for (const event of events) {
|
|
28
|
+
increment(byAction, event.decision.action);
|
|
29
|
+
increment(byModel, event.runtime.activeModel ?? "unknown");
|
|
30
|
+
if (event.observed.followed === false || event.observed.overriddenBy) mismatches++;
|
|
31
|
+
}
|
|
32
|
+
return { total: events.length, byAction, byModel, mismatches };
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function outcomeSummary(outcomes: RouterOutcome[]): RouterReport["outcomes"] {
|
|
36
|
+
const byStatus: Record<string, number> = {};
|
|
37
|
+
let linked = 0;
|
|
38
|
+
let missingEvidence = 0;
|
|
39
|
+
for (const outcome of outcomes) {
|
|
40
|
+
increment(byStatus, outcome.taskStatus);
|
|
41
|
+
if (outcome.routeEventId || outcome.checkpointId) linked++;
|
|
42
|
+
if (!outcome.evidence.rawSessionRef && !outcome.evidence.notesHash) missingEvidence++;
|
|
43
|
+
}
|
|
44
|
+
return { total: outcomes.length, byStatus, linked, missingEvidence };
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function trainingSummary(rowsPath?: string): RouterReport["trainingRows"] {
|
|
48
|
+
if (!rowsPath) return { total: 0, labeled: 0, unlabeled: 0, localRuleExcluded: 0, byGate: {} };
|
|
49
|
+
const rows = readTrainingRows(rowsPath);
|
|
50
|
+
const byGate: Record<string, number> = {};
|
|
51
|
+
let labeled = 0;
|
|
52
|
+
let localRuleExcluded = 0;
|
|
53
|
+
for (const row of rows) {
|
|
54
|
+
increment(byGate, row.labels.binaryGate);
|
|
55
|
+
if (row.labels.binaryGate === "unknown") localRuleExcluded += row.provenance.excludedLocalRuleAsTruth ? 1 : 0;
|
|
56
|
+
else labeled++;
|
|
57
|
+
}
|
|
58
|
+
return { total: rows.length, labeled, unlabeled: rows.length - labeled, localRuleExcluded, byGate };
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function readJson(path?: string): unknown {
|
|
62
|
+
if (!path) return undefined;
|
|
63
|
+
if (!existsSync(resolve(path))) throw new Error(`report input file not found: ${path}`);
|
|
64
|
+
return JSON.parse(readFileSync(resolve(path), "utf8"));
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function markdown(report: RouterReport): string {
|
|
68
|
+
const gate = report.gate && typeof report.gate === "object" ? report.gate as { candidate?: { accuracy?: number; f1?: number }; ruleBaseline?: { accuracy?: number; f1?: number } } : undefined;
|
|
69
|
+
const lines = [
|
|
70
|
+
"# Pi router report",
|
|
71
|
+
"",
|
|
72
|
+
`- generatedAt: ${report.generatedAt}`,
|
|
73
|
+
`- route events: ${report.routeEvents.total}`,
|
|
74
|
+
`- route mismatches/overrides: ${report.routeEvents.mismatches}`,
|
|
75
|
+
`- outcomes: ${report.outcomes.total}`,
|
|
76
|
+
`- training rows: ${report.trainingRows.total} (${report.trainingRows.labeled} labeled, ${report.trainingRows.unlabeled} unlabeled)`,
|
|
77
|
+
`- local-rule labels excluded: ${report.trainingRows.localRuleExcluded}`,
|
|
78
|
+
"",
|
|
79
|
+
"## Route actions",
|
|
80
|
+
...Object.entries(report.routeEvents.byAction).sort().map(([key, value]) => `- ${key}: ${value}`),
|
|
81
|
+
"",
|
|
82
|
+
"## Outcome status",
|
|
83
|
+
...Object.entries(report.outcomes.byStatus).sort().map(([key, value]) => `- ${key}: ${value}`),
|
|
84
|
+
];
|
|
85
|
+
if (gate) {
|
|
86
|
+
lines.push("", "## Gate eval", `- candidate accuracy/f1: ${gate.candidate?.accuracy ?? "n/a"}/${gate.candidate?.f1 ?? "n/a"}`, `- rule baseline accuracy/f1: ${gate.ruleBaseline?.accuracy ?? "n/a"}/${gate.ruleBaseline?.f1 ?? "n/a"}`);
|
|
87
|
+
}
|
|
88
|
+
return `${lines.join("\n")}\n`;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export function buildRouterReport(options: { eventsPath?: string; outcomesPath?: string; trainingRowsPath?: string; gateReportPath?: string; generatedAt?: string }): RouterReport {
|
|
92
|
+
if (options.eventsPath && !existsSync(resolve(options.eventsPath))) throw new Error(`report input file not found: ${options.eventsPath}`);
|
|
93
|
+
const events = options.eventsPath ? readRouteEvents(options.eventsPath) : [];
|
|
94
|
+
const outcomes = options.outcomesPath ? readOutcomes(options.outcomesPath) : [];
|
|
95
|
+
return {
|
|
96
|
+
schema: ROUTER_REPORT_SCHEMA,
|
|
97
|
+
generatedAt: options.generatedAt ?? new Date().toISOString(),
|
|
98
|
+
inputs: { events: options.eventsPath, outcomes: options.outcomesPath, trainingRows: options.trainingRowsPath, gateReport: options.gateReportPath },
|
|
99
|
+
routeEvents: routeSummary(events),
|
|
100
|
+
outcomes: outcomeSummary(outcomes),
|
|
101
|
+
trainingRows: trainingSummary(options.trainingRowsPath),
|
|
102
|
+
gate: readJson(options.gateReportPath),
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
export function writeRouterReport(options: { outputPath: string; markdownPath?: string; eventsPath?: string; outcomesPath?: string; trainingRowsPath?: string; gateReportPath?: string }): RouterReport {
|
|
107
|
+
if (!options.eventsPath && !options.outcomesPath && !options.trainingRowsPath && !options.gateReportPath) throw new Error("router report requires at least one input file");
|
|
108
|
+
const report = buildRouterReport(options);
|
|
109
|
+
mkdirSync(dirname(resolve(options.outputPath)), { recursive: true });
|
|
110
|
+
writeFileSync(resolve(options.outputPath), `${JSON.stringify(report, null, 2)}\n`);
|
|
111
|
+
if (options.markdownPath) {
|
|
112
|
+
mkdirSync(dirname(resolve(options.markdownPath)), { recursive: true });
|
|
113
|
+
writeFileSync(resolve(options.markdownPath), markdown(report));
|
|
114
|
+
}
|
|
115
|
+
return report;
|
|
116
|
+
}
|