@fiale-plus/pi-rogue 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/node_modules/@fiale-plus/pi-core/src/context-broker.ts +4 -0
- package/node_modules/@fiale-plus/pi-rogue-context-broker/README.md +24 -5
- package/node_modules/@fiale-plus/pi-rogue-context-broker/src/extension.test.ts +119 -7
- package/node_modules/@fiale-plus/pi-rogue-context-broker/src/extension.ts +124 -16
- package/node_modules/@fiale-plus/pi-rogue-context-broker/src/index.test.ts +32 -0
- package/node_modules/@fiale-plus/pi-rogue-context-broker/src/index.ts +32 -1
- package/node_modules/@fiale-plus/pi-rogue-context-broker/src/sqlite.test.ts +37 -0
- package/node_modules/@fiale-plus/pi-rogue-context-broker/src/sqlite.ts +39 -2
- package/node_modules/@fiale-plus/pi-rogue-router/README.md +34 -0
- package/node_modules/@fiale-plus/pi-rogue-router/package.json +30 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/checkpoints.test.ts +84 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/checkpoints.ts +363 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/cli.ts +277 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/completions.ts +34 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/config-extension.test.ts +165 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/config.ts +193 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/dataset.ts +154 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/decision-ledger.test.ts +148 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/decision.ts +138 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/extension.ts +139 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/git-features.ts +134 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/hash.ts +19 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/index.ts +15 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/learning.test.ts +241 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/learning.ts +382 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/ledger.ts +94 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/observe.ts +126 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/outcomes.ts +128 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/progress.ts +93 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/session-reader.ts +217 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/subagents.ts +178 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/types.ts +150 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/v1-telemetry.test.ts +297 -0
- package/package.json +5 -3
- package/src/extension.test.ts +1 -0
- package/src/extension.ts +2 -0
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { dirname, resolve } from "node:path";
|
|
3
|
+
import { decideRoute, readCheckpointJsonl } from "./decision.js";
|
|
4
|
+
import { hashText } from "./hash.js";
|
|
5
|
+
import { readRouteEvents, type RouteEvent } from "./ledger.js";
|
|
6
|
+
import { readOutcomes, type RouterOutcome } from "./outcomes.js";
|
|
7
|
+
import type { RouteAction, RouteDecision, RouterCheckpoint } from "./types.js";
|
|
8
|
+
|
|
9
|
+
export const MODEL_CAPABILITY_CARD_SCHEMA = "pi-router.model-capability-card.v1" as const;
|
|
10
|
+
export const TEACHER_LABEL_SCHEMA = "pi-router.teacher-label.v1" as const;
|
|
11
|
+
export const SHADOW_EVAL_SCHEMA = "pi-router.shadow-eval.v1" as const;
|
|
12
|
+
|
|
13
|
+
export interface ModelCapabilityCard {
|
|
14
|
+
schema: typeof MODEL_CAPABILITY_CARD_SCHEMA;
|
|
15
|
+
modelId: string;
|
|
16
|
+
provider?: string;
|
|
17
|
+
generatedAt: string;
|
|
18
|
+
seed: {
|
|
19
|
+
source: "none" | "manual" | "public" | "default";
|
|
20
|
+
purpose: string;
|
|
21
|
+
};
|
|
22
|
+
observed: {
|
|
23
|
+
source: "local Pi telemetry";
|
|
24
|
+
events: number;
|
|
25
|
+
sessions: number;
|
|
26
|
+
actions: Record<string, number>;
|
|
27
|
+
averageLoopScore: number;
|
|
28
|
+
averageProgressScore: number;
|
|
29
|
+
averageContextTokensApprox: number | null;
|
|
30
|
+
outcomes: {
|
|
31
|
+
linked: number;
|
|
32
|
+
success: number;
|
|
33
|
+
partial: number;
|
|
34
|
+
failed: number;
|
|
35
|
+
abandoned: number;
|
|
36
|
+
unknown: number;
|
|
37
|
+
averageReworkTurns: number | null;
|
|
38
|
+
};
|
|
39
|
+
};
|
|
40
|
+
promotion: {
|
|
41
|
+
manualOnly: true;
|
|
42
|
+
promoted: false;
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export interface TeacherLabel {
|
|
47
|
+
schema: typeof TEACHER_LABEL_SCHEMA;
|
|
48
|
+
labelId: string;
|
|
49
|
+
generatedAt: string;
|
|
50
|
+
teacher: string;
|
|
51
|
+
checkpointId: string;
|
|
52
|
+
sessionId: string;
|
|
53
|
+
rawSessionRef: RouterCheckpoint["rawSessionRef"];
|
|
54
|
+
suggestedAction: RouteAction;
|
|
55
|
+
confidence: number;
|
|
56
|
+
rationale: string;
|
|
57
|
+
source: "local-rule" | "teacher-output";
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export interface TeacherPromptRequest {
|
|
61
|
+
schema: "pi-router.teacher-prompt.v1";
|
|
62
|
+
requestId: string;
|
|
63
|
+
teacher: string;
|
|
64
|
+
checkpointId: string;
|
|
65
|
+
sessionId: string;
|
|
66
|
+
rawSessionRef: RouterCheckpoint["rawSessionRef"];
|
|
67
|
+
allowedActions: RouteAction[];
|
|
68
|
+
instruction: string;
|
|
69
|
+
features: Pick<RouterCheckpoint, "phase" | "activeModel" | "provider"> & {
|
|
70
|
+
loopScore: number;
|
|
71
|
+
progressScore: number;
|
|
72
|
+
sameCommandRepeatedCount: number;
|
|
73
|
+
sameErrorRepeatedCount: number;
|
|
74
|
+
verifierUsed: boolean;
|
|
75
|
+
noVerifierUsed: boolean;
|
|
76
|
+
diffLines: number;
|
|
77
|
+
diffFilesChanged: number;
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
export interface ReflectionResult {
|
|
82
|
+
labels: TeacherLabel[];
|
|
83
|
+
markdown: string;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
export interface ShadowEvalReport {
|
|
87
|
+
schema: typeof SHADOW_EVAL_SCHEMA;
|
|
88
|
+
generatedAt: string;
|
|
89
|
+
policyVersion: string;
|
|
90
|
+
checkpoints: number;
|
|
91
|
+
comparedEvents: number;
|
|
92
|
+
actionCounts: Record<string, number>;
|
|
93
|
+
ledgerActionCounts: Record<string, number>;
|
|
94
|
+
divergences: number;
|
|
95
|
+
divergenceRate: number;
|
|
96
|
+
likelySavingsSignals: {
|
|
97
|
+
summarizeContext: number;
|
|
98
|
+
runVerifier: number;
|
|
99
|
+
continueCurrent: number;
|
|
100
|
+
};
|
|
101
|
+
manualPromotionRequired: true;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function round(value: number): number {
|
|
105
|
+
return Number(value.toFixed(3));
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function increment(map: Record<string, number>, key: string): void {
|
|
109
|
+
map[key] = (map[key] ?? 0) + 1;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
function writeJsonl(path: string, rows: unknown[]): void {
|
|
113
|
+
const resolved = resolve(path);
|
|
114
|
+
mkdirSync(dirname(resolved), { recursive: true });
|
|
115
|
+
writeFileSync(resolved, rows.map((row) => JSON.stringify(row)).join("\n") + (rows.length ? "\n" : ""));
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
function emptyOutcomeCounts(): ModelCapabilityCard["observed"]["outcomes"] {
|
|
119
|
+
return { linked: 0, success: 0, partial: 0, failed: 0, abandoned: 0, unknown: 0, averageReworkTurns: null };
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function summarizeOutcomes(group: RouteEvent[], outcomes: RouterOutcome[]): ModelCapabilityCard["observed"]["outcomes"] {
|
|
123
|
+
const byRouteEvent = new Map(outcomes.flatMap((outcome) => outcome.routeEventId ? [[outcome.routeEventId, outcome] as const] : []));
|
|
124
|
+
const byCheckpoint = new Map(outcomes.flatMap((outcome) => outcome.checkpointId && !outcome.routeEventId ? [[outcome.checkpointId, outcome] as const] : []));
|
|
125
|
+
const linked = group.flatMap((event) => {
|
|
126
|
+
const outcome = byRouteEvent.get(event.eventId) ?? byCheckpoint.get(event.checkpointId);
|
|
127
|
+
return outcome ? [outcome] : [];
|
|
128
|
+
});
|
|
129
|
+
if (linked.length === 0) return emptyOutcomeCounts();
|
|
130
|
+
const counts = emptyOutcomeCounts();
|
|
131
|
+
counts.linked = linked.length;
|
|
132
|
+
for (const outcome of linked) counts[outcome.taskStatus]++;
|
|
133
|
+
const reworkValues = linked.map((outcome) => outcome.reworkTurns).filter((value): value is number => Number.isFinite(value));
|
|
134
|
+
counts.averageReworkTurns = reworkValues.length ? round(reworkValues.reduce((sum, value) => sum + value, 0) / reworkValues.length) : null;
|
|
135
|
+
return counts;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
export function generateCapabilityCards(events: RouteEvent[], generatedAt = new Date().toISOString(), outcomes: RouterOutcome[] = []): ModelCapabilityCard[] {
|
|
139
|
+
const groups = new Map<string, RouteEvent[]>();
|
|
140
|
+
for (const event of events) {
|
|
141
|
+
const modelId = event.runtime.activeModel ?? "unknown";
|
|
142
|
+
const provider = event.runtime.provider ?? "unknown";
|
|
143
|
+
const key = `${provider}\0${modelId}`;
|
|
144
|
+
groups.set(key, [...(groups.get(key) ?? []), event]);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return [...groups.entries()].map(([key, group]) => {
|
|
148
|
+
const [provider, modelId] = key.split("\0");
|
|
149
|
+
const actions: Record<string, number> = {};
|
|
150
|
+
const sessions = new Set(group.map((event) => event.sessionId));
|
|
151
|
+
const contextValues = group
|
|
152
|
+
.map((event) => event.runtime.contextTokensApprox)
|
|
153
|
+
.filter((value): value is number => typeof value === "number" && Number.isFinite(value));
|
|
154
|
+
for (const event of group) increment(actions, event.decision.action);
|
|
155
|
+
return {
|
|
156
|
+
schema: MODEL_CAPABILITY_CARD_SCHEMA,
|
|
157
|
+
modelId,
|
|
158
|
+
provider,
|
|
159
|
+
generatedAt,
|
|
160
|
+
seed: {
|
|
161
|
+
source: "none",
|
|
162
|
+
purpose: "cold-start priors are intentionally absent in v0; local observations dominate",
|
|
163
|
+
},
|
|
164
|
+
observed: {
|
|
165
|
+
source: "local Pi telemetry",
|
|
166
|
+
events: group.length,
|
|
167
|
+
sessions: sessions.size,
|
|
168
|
+
actions,
|
|
169
|
+
averageLoopScore: round(group.reduce((sum, event) => sum + event.metrics.loopScore, 0) / group.length),
|
|
170
|
+
averageProgressScore: round(group.reduce((sum, event) => sum + event.metrics.progressScore, 0) / group.length),
|
|
171
|
+
averageContextTokensApprox: contextValues.length
|
|
172
|
+
? round(contextValues.reduce((sum, value) => sum + value, 0) / contextValues.length)
|
|
173
|
+
: null,
|
|
174
|
+
outcomes: summarizeOutcomes(group, outcomes),
|
|
175
|
+
},
|
|
176
|
+
promotion: {
|
|
177
|
+
manualOnly: true,
|
|
178
|
+
promoted: false,
|
|
179
|
+
},
|
|
180
|
+
} satisfies ModelCapabilityCard;
|
|
181
|
+
}).sort((a, b) => `${a.provider}/${a.modelId}`.localeCompare(`${b.provider}/${b.modelId}`));
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
function readRequiredRouteEvents(path: string): RouteEvent[] {
|
|
185
|
+
if (!existsSync(resolve(path))) throw new Error(`required route events file not found: ${path}`);
|
|
186
|
+
return readRouteEvents(path);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
export function writeCapabilityCards(eventsPath: string, outputPath: string, outcomesPath?: string): ModelCapabilityCard[] {
|
|
190
|
+
const cards = generateCapabilityCards(readRequiredRouteEvents(eventsPath), new Date().toISOString(), readOutcomes(outcomesPath));
|
|
191
|
+
writeJsonl(outputPath, cards);
|
|
192
|
+
return cards;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
function labelFromDecision(
|
|
196
|
+
checkpoint: RouterCheckpoint,
|
|
197
|
+
decision: RouteDecision,
|
|
198
|
+
teacher: string,
|
|
199
|
+
source: TeacherLabel["source"],
|
|
200
|
+
generatedAt: string,
|
|
201
|
+
): TeacherLabel {
|
|
202
|
+
return {
|
|
203
|
+
schema: TEACHER_LABEL_SCHEMA,
|
|
204
|
+
labelId: hashText(teacher, checkpoint.checkpointId, decision.action, checkpoint.rawSessionRef.contentHash),
|
|
205
|
+
generatedAt,
|
|
206
|
+
teacher,
|
|
207
|
+
checkpointId: checkpoint.checkpointId,
|
|
208
|
+
sessionId: checkpoint.sessionId,
|
|
209
|
+
rawSessionRef: checkpoint.rawSessionRef,
|
|
210
|
+
suggestedAction: decision.action,
|
|
211
|
+
confidence: decision.confidence,
|
|
212
|
+
rationale: decision.reason,
|
|
213
|
+
source,
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
export function readTeacherLabels(path: string): TeacherLabel[] {
|
|
218
|
+
return readFileSync(path, "utf8")
|
|
219
|
+
.split("\n")
|
|
220
|
+
.filter((line) => line.trim())
|
|
221
|
+
.map((line) => JSON.parse(line) as TeacherLabel);
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
function importedTeacherDecisions(path: string): Map<string, RouteDecision> {
|
|
225
|
+
const map = new Map<string, RouteDecision>();
|
|
226
|
+
if (!path) return map;
|
|
227
|
+
for (const line of readFileSync(path, "utf8").split("\n")) {
|
|
228
|
+
if (!line.trim()) continue;
|
|
229
|
+
const decision = JSON.parse(line) as RouteDecision;
|
|
230
|
+
map.set(decision.checkpointId, decision);
|
|
231
|
+
}
|
|
232
|
+
return map;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
export function generateTeacherReflection(
|
|
236
|
+
checkpoints: RouterCheckpoint[],
|
|
237
|
+
options: { teacher: string; teacherOutputPath?: string; generatedAt?: string } = { teacher: "local-rule" },
|
|
238
|
+
): ReflectionResult {
|
|
239
|
+
const generatedAt = options.generatedAt ?? new Date().toISOString();
|
|
240
|
+
if (options.teacher !== "local-rule" && !options.teacherOutputPath) {
|
|
241
|
+
throw new Error("non-local teacher reflection requires --teacher-output decisions JSONL in local-only v0");
|
|
242
|
+
}
|
|
243
|
+
const imported = options.teacherOutputPath ? importedTeacherDecisions(options.teacherOutputPath) : new Map<string, RouteDecision>();
|
|
244
|
+
const labels = checkpoints.map((checkpoint) => {
|
|
245
|
+
const importedDecision = imported.get(checkpoint.checkpointId);
|
|
246
|
+
if (options.teacher !== "local-rule" && !importedDecision) {
|
|
247
|
+
throw new Error(`teacher output missing decision for checkpoint: ${checkpoint.checkpointId}`);
|
|
248
|
+
}
|
|
249
|
+
const decision = importedDecision ?? decideRoute(checkpoint, { policyVersion: options.teacher });
|
|
250
|
+
const source: TeacherLabel["source"] = importedDecision ? "teacher-output" : "local-rule";
|
|
251
|
+
return labelFromDecision(checkpoint, decision, options.teacher, source, generatedAt);
|
|
252
|
+
});
|
|
253
|
+
const actionCounts: Record<string, number> = {};
|
|
254
|
+
for (const label of labels) increment(actionCounts, label.suggestedAction);
|
|
255
|
+
const markdown = [
|
|
256
|
+
`# Pi router teacher reflection`,
|
|
257
|
+
``,
|
|
258
|
+
`- generatedAt: ${generatedAt}`,
|
|
259
|
+
`- teacher: ${options.teacher}`,
|
|
260
|
+
`- labels: ${labels.length}`,
|
|
261
|
+
`- source: ${options.teacherOutputPath ? "imported teacher output" : "local rule teacher"}`,
|
|
262
|
+
``,
|
|
263
|
+
`## Suggested action counts`,
|
|
264
|
+
``,
|
|
265
|
+
...Object.entries(actionCounts).sort().map(([action, count]) => `- ${action}: ${count}`),
|
|
266
|
+
``,
|
|
267
|
+
`Manual promotion only: these labels do not mutate router policy.`,
|
|
268
|
+
].join("\n");
|
|
269
|
+
return { labels, markdown };
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
export function generateTeacherPromptRequests(checkpoints: RouterCheckpoint[], teacher: string): TeacherPromptRequest[] {
|
|
273
|
+
const allowedActions: RouteAction[] = [
|
|
274
|
+
"continue_current",
|
|
275
|
+
"continue_local",
|
|
276
|
+
"summarize_context",
|
|
277
|
+
"run_verifier",
|
|
278
|
+
"ask_micro_hint",
|
|
279
|
+
"escalate_plan_critique",
|
|
280
|
+
"escalate_debug_diagnosis",
|
|
281
|
+
"escalate_diff_review",
|
|
282
|
+
"delegate_full_step",
|
|
283
|
+
"spawn_subagent",
|
|
284
|
+
"stop_and_ask_user",
|
|
285
|
+
];
|
|
286
|
+
return checkpoints.map((checkpoint) => ({
|
|
287
|
+
schema: "pi-router.teacher-prompt.v1",
|
|
288
|
+
requestId: hashText("teacher-request", teacher, checkpoint.checkpointId, checkpoint.rawSessionRef.contentHash),
|
|
289
|
+
teacher,
|
|
290
|
+
checkpointId: checkpoint.checkpointId,
|
|
291
|
+
sessionId: checkpoint.sessionId,
|
|
292
|
+
rawSessionRef: checkpoint.rawSessionRef,
|
|
293
|
+
allowedActions,
|
|
294
|
+
instruction: "Inspect the raw session span by pointer if needed. Return one pi-router.decision.v1 JSON object with checkpointId, action, adviceShape, contextPolicy, confidence, reason, and policyVersion. Prefer intervention only when it likely improves trajectory outcome; do not mutate policy.",
|
|
295
|
+
features: {
|
|
296
|
+
phase: checkpoint.phase,
|
|
297
|
+
activeModel: checkpoint.activeModel,
|
|
298
|
+
provider: checkpoint.provider,
|
|
299
|
+
loopScore: checkpoint.features.loopScore,
|
|
300
|
+
progressScore: checkpoint.features.progressScore,
|
|
301
|
+
sameCommandRepeatedCount: checkpoint.features.sameCommandRepeatedCount,
|
|
302
|
+
sameErrorRepeatedCount: checkpoint.features.sameErrorRepeatedCount,
|
|
303
|
+
verifierUsed: checkpoint.features.verifierUsed,
|
|
304
|
+
noVerifierUsed: checkpoint.features.noVerifierUsed,
|
|
305
|
+
diffLines: checkpoint.features.diffLines,
|
|
306
|
+
diffFilesChanged: checkpoint.features.diffFilesChanged,
|
|
307
|
+
},
|
|
308
|
+
}));
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
export function writeTeacherPromptRequests(checkpointPath: string, outputPath: string, teacher: string): TeacherPromptRequest[] {
|
|
312
|
+
const requests = generateTeacherPromptRequests(readCheckpointJsonl(checkpointPath), teacher);
|
|
313
|
+
writeJsonl(outputPath, requests);
|
|
314
|
+
return requests;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
export function writeTeacherReflection(options: {
|
|
318
|
+
checkpointPath: string;
|
|
319
|
+
labelsPath: string;
|
|
320
|
+
reflectionPath: string;
|
|
321
|
+
teacher: string;
|
|
322
|
+
teacherOutputPath?: string;
|
|
323
|
+
teacherPromptPath?: string;
|
|
324
|
+
}): ReflectionResult {
|
|
325
|
+
const checkpoints = readCheckpointJsonl(options.checkpointPath);
|
|
326
|
+
if (options.teacherPromptPath) writeJsonl(options.teacherPromptPath, generateTeacherPromptRequests(checkpoints, options.teacher));
|
|
327
|
+
const reflection = generateTeacherReflection(checkpoints, {
|
|
328
|
+
teacher: options.teacher,
|
|
329
|
+
teacherOutputPath: options.teacherOutputPath,
|
|
330
|
+
});
|
|
331
|
+
writeJsonl(options.labelsPath, reflection.labels);
|
|
332
|
+
const resolved = resolve(options.reflectionPath);
|
|
333
|
+
mkdirSync(dirname(resolved), { recursive: true });
|
|
334
|
+
writeFileSync(resolved, `${reflection.markdown}\n`);
|
|
335
|
+
return reflection;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
export function shadowEvaluate(checkpoints: RouterCheckpoint[], ledgerEvents: RouteEvent[] = [], generatedAt = new Date().toISOString()): ShadowEvalReport {
|
|
339
|
+
const ledgerByCheckpoint = new Map(ledgerEvents.map((event) => [event.checkpointId, event]));
|
|
340
|
+
const actionCounts: Record<string, number> = {};
|
|
341
|
+
const ledgerActionCounts: Record<string, number> = {};
|
|
342
|
+
let comparedEvents = 0;
|
|
343
|
+
let divergences = 0;
|
|
344
|
+
let summarizeContext = 0;
|
|
345
|
+
let runVerifier = 0;
|
|
346
|
+
let continueCurrent = 0;
|
|
347
|
+
|
|
348
|
+
for (const checkpoint of checkpoints) {
|
|
349
|
+
const decision = decideRoute(checkpoint);
|
|
350
|
+
increment(actionCounts, decision.action);
|
|
351
|
+
if (decision.action === "summarize_context") summarizeContext++;
|
|
352
|
+
if (decision.action === "run_verifier") runVerifier++;
|
|
353
|
+
if (decision.action === "continue_current") continueCurrent++;
|
|
354
|
+
const actual = ledgerByCheckpoint.get(checkpoint.checkpointId);
|
|
355
|
+
if (!actual) continue;
|
|
356
|
+
comparedEvents++;
|
|
357
|
+
increment(ledgerActionCounts, actual.decision.action);
|
|
358
|
+
if (actual.decision.action !== decision.action) divergences++;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
return {
|
|
362
|
+
schema: SHADOW_EVAL_SCHEMA,
|
|
363
|
+
generatedAt,
|
|
364
|
+
policyVersion: checkpoints[0] ? decideRoute(checkpoints[0]).policyVersion : "pi-router.rule-policy.v0",
|
|
365
|
+
checkpoints: checkpoints.length,
|
|
366
|
+
comparedEvents,
|
|
367
|
+
actionCounts,
|
|
368
|
+
ledgerActionCounts,
|
|
369
|
+
divergences,
|
|
370
|
+
divergenceRate: comparedEvents ? round(divergences / comparedEvents) : 0,
|
|
371
|
+
likelySavingsSignals: { summarizeContext, runVerifier, continueCurrent },
|
|
372
|
+
manualPromotionRequired: true,
|
|
373
|
+
};
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
export function writeShadowEval(checkpointPath: string, outputPath: string, ledgerPath?: string): ShadowEvalReport {
|
|
377
|
+
const report = shadowEvaluate(readCheckpointJsonl(checkpointPath), ledgerPath ? readRequiredRouteEvents(ledgerPath) : []);
|
|
378
|
+
const resolved = resolve(outputPath);
|
|
379
|
+
mkdirSync(dirname(resolved), { recursive: true });
|
|
380
|
+
writeFileSync(resolved, `${JSON.stringify(report, null, 2)}\n`);
|
|
381
|
+
return report;
|
|
382
|
+
}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import { mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { dirname, resolve } from "node:path";
|
|
3
|
+
import { decisionId } from "./decision.js";
|
|
4
|
+
import type { RouteDecision, RouterCheckpoint } from "./types.js";
|
|
5
|
+
|
|
6
|
+
export const ROUTE_EVENT_SCHEMA = "pi-router.route-event.v1" as const;
|
|
7
|
+
|
|
8
|
+
export interface RouteRuntimeFacts {
|
|
9
|
+
activeModel?: string;
|
|
10
|
+
provider?: string;
|
|
11
|
+
contextTokensApprox: number | null;
|
|
12
|
+
gitDirty: boolean | null;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export interface RouteEvent {
|
|
16
|
+
schema: typeof ROUTE_EVENT_SCHEMA;
|
|
17
|
+
eventId: string;
|
|
18
|
+
recordedAt: string;
|
|
19
|
+
checkpointId: string;
|
|
20
|
+
sessionId: string;
|
|
21
|
+
rawSessionRef: RouterCheckpoint["rawSessionRef"];
|
|
22
|
+
sourceEvent: RouterCheckpoint["sourceEvent"];
|
|
23
|
+
decision: RouteDecision;
|
|
24
|
+
runtime: RouteRuntimeFacts;
|
|
25
|
+
observed: {
|
|
26
|
+
followed: boolean | null;
|
|
27
|
+
overriddenBy?: string;
|
|
28
|
+
};
|
|
29
|
+
metrics: {
|
|
30
|
+
loopScore: number;
|
|
31
|
+
progressScore: number;
|
|
32
|
+
sameCommandRepeatedCount: number;
|
|
33
|
+
sameErrorRepeatedCount: number;
|
|
34
|
+
verifierUsed: boolean;
|
|
35
|
+
diffLines: number;
|
|
36
|
+
diffFilesChanged: number;
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export function buildRouteEvent(checkpoint: RouterCheckpoint, decision: RouteDecision, recordedAt = new Date().toISOString()): RouteEvent {
|
|
41
|
+
return {
|
|
42
|
+
schema: ROUTE_EVENT_SCHEMA,
|
|
43
|
+
eventId: decisionId(decision, checkpoint),
|
|
44
|
+
recordedAt,
|
|
45
|
+
checkpointId: checkpoint.checkpointId,
|
|
46
|
+
sessionId: checkpoint.sessionId,
|
|
47
|
+
rawSessionRef: checkpoint.rawSessionRef,
|
|
48
|
+
sourceEvent: checkpoint.sourceEvent,
|
|
49
|
+
decision,
|
|
50
|
+
runtime: {
|
|
51
|
+
activeModel: checkpoint.activeModel,
|
|
52
|
+
provider: checkpoint.provider,
|
|
53
|
+
contextTokensApprox: checkpoint.features.contextTokensApprox,
|
|
54
|
+
gitDirty: checkpoint.features.gitDirty,
|
|
55
|
+
},
|
|
56
|
+
observed: {
|
|
57
|
+
followed: null,
|
|
58
|
+
},
|
|
59
|
+
metrics: {
|
|
60
|
+
loopScore: checkpoint.features.loopScore,
|
|
61
|
+
progressScore: checkpoint.features.progressScore,
|
|
62
|
+
sameCommandRepeatedCount: checkpoint.features.sameCommandRepeatedCount,
|
|
63
|
+
sameErrorRepeatedCount: checkpoint.features.sameErrorRepeatedCount,
|
|
64
|
+
verifierUsed: checkpoint.features.verifierUsed,
|
|
65
|
+
diffLines: checkpoint.features.diffLines,
|
|
66
|
+
diffFilesChanged: checkpoint.features.diffFilesChanged,
|
|
67
|
+
},
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export function appendRouteEvent(path: string, event: RouteEvent): void {
|
|
72
|
+
const resolved = resolve(path);
|
|
73
|
+
mkdirSync(dirname(resolved), { recursive: true });
|
|
74
|
+
writeFileSync(resolved, `${JSON.stringify(event)}\n`, { flag: "a" });
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
export function readRouteEvents(path: string): RouteEvent[] {
|
|
78
|
+
const resolved = resolve(path);
|
|
79
|
+
try {
|
|
80
|
+
return readFileSync(resolved, "utf8")
|
|
81
|
+
.split("\n")
|
|
82
|
+
.filter((line) => line.trim())
|
|
83
|
+
.flatMap((line) => {
|
|
84
|
+
try {
|
|
85
|
+
return [JSON.parse(line) as RouteEvent];
|
|
86
|
+
} catch {
|
|
87
|
+
return [];
|
|
88
|
+
}
|
|
89
|
+
});
|
|
90
|
+
} catch (error) {
|
|
91
|
+
if ((error as NodeJS.ErrnoException).code === "ENOENT") return [];
|
|
92
|
+
throw error;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import { appendRouteEvent, buildRouteEvent } from "./ledger.js";
|
|
2
|
+
import { decideRoute } from "./decision.js";
|
|
3
|
+
import { checkpointWithDiffStats, streamCheckpointsFromSessionPath } from "./checkpoints.js";
|
|
4
|
+
import {
|
|
5
|
+
activeProfile,
|
|
6
|
+
loadRouterConfig,
|
|
7
|
+
loadRouterState,
|
|
8
|
+
routerConfigPath,
|
|
9
|
+
routerDir,
|
|
10
|
+
routerEventsPath,
|
|
11
|
+
routerStatePath,
|
|
12
|
+
saveRouterState,
|
|
13
|
+
type RouterConfig,
|
|
14
|
+
type RouterProfile,
|
|
15
|
+
} from "./config.js";
|
|
16
|
+
import type { RouteAction, RouteDecision, RouterCheckpoint } from "./types.js";
|
|
17
|
+
|
|
18
|
+
export interface RouterObserveSummary {
|
|
19
|
+
checkpointId: string;
|
|
20
|
+
action: RouteAction;
|
|
21
|
+
role: keyof RouterProfile | "none" | "current";
|
|
22
|
+
targetModel?: string;
|
|
23
|
+
currentModel?: string;
|
|
24
|
+
match: boolean | null;
|
|
25
|
+
confidence: number;
|
|
26
|
+
reason: string;
|
|
27
|
+
text: string;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function squish(text: unknown, max = 140): string {
|
|
31
|
+
const value = String(text ?? "").replace(/\s+/g, " ").trim();
|
|
32
|
+
return value.length <= max ? value : `${value.slice(0, max - 1).trimEnd()}…`;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function actionRole(action: RouteAction): RouterObserveSummary["role"] {
|
|
36
|
+
switch (action) {
|
|
37
|
+
case "continue_current": return "current";
|
|
38
|
+
case "continue_local": return "worker";
|
|
39
|
+
case "summarize_context": return "worker";
|
|
40
|
+
case "run_verifier": return "worker";
|
|
41
|
+
case "ask_micro_hint": return "smart";
|
|
42
|
+
case "escalate_plan_critique": return "smart";
|
|
43
|
+
case "escalate_debug_diagnosis": return "smart";
|
|
44
|
+
case "escalate_diff_review": return "reviewer";
|
|
45
|
+
case "delegate_full_step": return "smart";
|
|
46
|
+
case "spawn_subagent": return "smart";
|
|
47
|
+
case "merge_subagent_result": return "current";
|
|
48
|
+
case "stop_and_ask_user": return "none";
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function modelLeaf(model: string): string {
|
|
53
|
+
return model.split("/").at(-1)?.toLowerCase() ?? model.toLowerCase();
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export function modelsMatch(current: string | undefined, target: string | undefined): boolean | null {
|
|
57
|
+
if (!current || !target) return null;
|
|
58
|
+
const c = current.toLowerCase();
|
|
59
|
+
const t = target.toLowerCase();
|
|
60
|
+
return c === t || modelLeaf(c) === modelLeaf(t) || c.endsWith(`/${modelLeaf(t)}`) || t.endsWith(`/${modelLeaf(c)}`);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function targetForRole(role: RouterObserveSummary["role"], profile: RouterProfile, currentModel?: string): string | undefined {
|
|
64
|
+
if (role === "current") return currentModel;
|
|
65
|
+
if (role === "none") return undefined;
|
|
66
|
+
return profile[role];
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export function summarizeRouterDecision(checkpoint: RouterCheckpoint, decision: RouteDecision, config: RouterConfig): RouterObserveSummary {
|
|
70
|
+
const profile = activeProfile(config);
|
|
71
|
+
const role = actionRole(decision.action);
|
|
72
|
+
const targetModel = targetForRole(role, profile, checkpoint.activeModel);
|
|
73
|
+
const match = role === "none" ? null : modelsMatch(checkpoint.activeModel, targetModel);
|
|
74
|
+
const verdict = match === null ? "INFO" : match ? "MATCH" : "MISMATCH";
|
|
75
|
+
const roleText = role === "none" ? "no-model" : role;
|
|
76
|
+
const targetText = targetModel ? `${roleText}(${targetModel})` : roleText;
|
|
77
|
+
const currentText = checkpoint.activeModel ? `current=${checkpoint.activeModel}` : "current=unknown";
|
|
78
|
+
return {
|
|
79
|
+
checkpointId: checkpoint.checkpointId,
|
|
80
|
+
action: decision.action,
|
|
81
|
+
role,
|
|
82
|
+
targetModel,
|
|
83
|
+
currentModel: checkpoint.activeModel,
|
|
84
|
+
match,
|
|
85
|
+
confidence: decision.confidence,
|
|
86
|
+
reason: decision.reason,
|
|
87
|
+
text: `router: ${verdict} ${decision.action} → ${targetText} · ${currentText} · ${decision.confidence.toFixed(2)} · ${squish(decision.reason)}`,
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export async function latestCheckpointFromSession(sessionPath: string): Promise<RouterCheckpoint | null> {
|
|
92
|
+
let latest: RouterCheckpoint | null = null;
|
|
93
|
+
for await (const checkpoint of streamCheckpointsFromSessionPath(sessionPath)) latest = checkpoint;
|
|
94
|
+
return latest;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export async function observeRouterTurn(ctx: any): Promise<RouterObserveSummary | null> {
|
|
98
|
+
const config = loadRouterConfig(ctx);
|
|
99
|
+
if (!config.enabled || config.print === "off") return null;
|
|
100
|
+
const sessionPath = ctx?.sessionManager?.getSessionFile?.();
|
|
101
|
+
if (!sessionPath) return null;
|
|
102
|
+
const checkpoint = await latestCheckpointFromSession(String(sessionPath));
|
|
103
|
+
if (!checkpoint) return null;
|
|
104
|
+
const state = loadRouterState(ctx, String(sessionPath));
|
|
105
|
+
if (state.lastObservedCheckpointId === checkpoint.checkpointId) return null;
|
|
106
|
+
|
|
107
|
+
const liveCheckpoint = checkpointWithDiffStats(checkpoint, ctx?.cwd, [
|
|
108
|
+
String(sessionPath),
|
|
109
|
+
routerConfigPath(ctx),
|
|
110
|
+
routerDir(ctx),
|
|
111
|
+
routerStatePath(ctx, String(sessionPath)),
|
|
112
|
+
routerEventsPath(ctx, String(sessionPath)),
|
|
113
|
+
]);
|
|
114
|
+
const decision = decideRoute(liveCheckpoint);
|
|
115
|
+
const summary = summarizeRouterDecision(liveCheckpoint, decision, config);
|
|
116
|
+
appendRouteEvent(routerEventsPath(ctx, String(sessionPath)), buildRouteEvent(liveCheckpoint, decision));
|
|
117
|
+
saveRouterState(ctx, {
|
|
118
|
+
lastObservedCheckpointId: checkpoint.checkpointId,
|
|
119
|
+
lastDecisionAction: decision.action,
|
|
120
|
+
lastSummary: summary.text,
|
|
121
|
+
}, String(sessionPath));
|
|
122
|
+
|
|
123
|
+
if (config.print === "mismatch_only" && summary.match !== false) return summary;
|
|
124
|
+
ctx.ui?.notify?.(summary.text, summary.match === false ? "warning" : "info");
|
|
125
|
+
return summary;
|
|
126
|
+
}
|