@fiale-plus/pi-rogue 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/extension.ts +75 -31
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/loop-convergence.test.ts +2 -2
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/state-versioning.test.ts +25 -4
- package/node_modules/@fiale-plus/pi-rogue-context-broker/README.md +4 -3
- package/node_modules/@fiale-plus/pi-rogue-context-broker/src/extension.test.ts +38 -4
- package/node_modules/@fiale-plus/pi-rogue-context-broker/src/extension.ts +52 -6
- package/node_modules/@fiale-plus/pi-rogue-orchestration/src/advisor-checkins.test.ts +10 -0
- package/node_modules/@fiale-plus/pi-rogue-orchestration/src/advisor-checkins.ts +17 -2
- package/node_modules/@fiale-plus/pi-rogue-orchestration/src/goal.ts +2 -2
- package/node_modules/@fiale-plus/pi-rogue-orchestration/src/internal.ts +11 -2
- package/node_modules/@fiale-plus/pi-rogue-router/README.md +32 -0
- package/node_modules/@fiale-plus/pi-rogue-router/package.json +30 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/checkpoints.test.ts +84 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/checkpoints.ts +355 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/cli.ts +277 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/completions.ts +34 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/config-extension.test.ts +133 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/config.ts +168 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/dataset.ts +154 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/decision-ledger.test.ts +148 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/decision.ts +138 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/extension.ts +139 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/git-features.ts +119 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/hash.ts +19 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/index.ts +15 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/learning.test.ts +241 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/learning.ts +382 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/ledger.ts +94 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/observe.ts +119 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/outcomes.ts +128 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/progress.ts +93 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/session-reader.ts +217 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/subagents.ts +178 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/types.ts +150 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/v1-telemetry.test.ts +293 -0
- package/package.json +5 -3
- package/src/extension.test.ts +1 -0
- package/src/extension.ts +2 -0
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import { execFileSync } from "node:child_process";
|
|
2
|
+
import { readFileSync, realpathSync, statSync } from "node:fs";
|
|
3
|
+
import { isAbsolute, relative, resolve } from "node:path";
|
|
4
|
+
import { hashText } from "./hash.js";
|
|
5
|
+
import type { DiffStats } from "./types.js";
|
|
6
|
+
|
|
7
|
+
export const EMPTY_DIFF_STATS: DiffStats = {
|
|
8
|
+
filesChanged: 0,
|
|
9
|
+
linesAdded: 0,
|
|
10
|
+
linesDeleted: 0,
|
|
11
|
+
totalLines: 0,
|
|
12
|
+
fileHashes: [],
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
function git(cwd: string, args: string[]): string {
|
|
16
|
+
return execFileSync("git", args, { cwd: resolve(cwd), encoding: "utf8", stdio: ["ignore", "pipe", "ignore"] });
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function parseNumstat(output: string, excludeFiles = new Set<string>()): Pick<DiffStats, "filesChanged" | "linesAdded" | "linesDeleted" | "totalLines" | "fileHashes"> {
|
|
20
|
+
let rows = 0;
|
|
21
|
+
let linesAdded = 0;
|
|
22
|
+
let linesDeleted = 0;
|
|
23
|
+
const fileHashes = new Set<string>();
|
|
24
|
+
|
|
25
|
+
for (const line of output.split("\n")) {
|
|
26
|
+
if (!line.trim()) continue;
|
|
27
|
+
const [added, deleted, ...fileParts] = line.split("\t");
|
|
28
|
+
const file = fileParts.join("\t").trim();
|
|
29
|
+
if (file && excludeFiles.has(file)) continue;
|
|
30
|
+
rows++;
|
|
31
|
+
if (file) fileHashes.add(hashText(file));
|
|
32
|
+
const add = Number(added);
|
|
33
|
+
const del = Number(deleted);
|
|
34
|
+
if (Number.isFinite(add)) linesAdded += add;
|
|
35
|
+
if (Number.isFinite(del)) linesDeleted += del;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
return { filesChanged: fileHashes.size || rows, linesAdded, linesDeleted, totalLines: linesAdded + linesDeleted, fileHashes: [...fileHashes].sort() };
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function untrackedFiles(cwd: string, excludeFiles = new Set<string>()): { hashes: string[]; linesAdded: number } {
|
|
42
|
+
try {
|
|
43
|
+
let linesAdded = 0;
|
|
44
|
+
const hashes: string[] = [];
|
|
45
|
+
for (const raw of git(cwd, ["ls-files", "--others", "--exclude-standard"]).split("\n")) {
|
|
46
|
+
const file = raw.trim();
|
|
47
|
+
if (!file || excludeFiles.has(file)) continue;
|
|
48
|
+
hashes.push(hashText(file));
|
|
49
|
+
try {
|
|
50
|
+
const path = resolve(cwd, file);
|
|
51
|
+
const stat = statSync(path);
|
|
52
|
+
if (stat.size <= 1_000_000) {
|
|
53
|
+
const text = readFileSync(path, "utf8");
|
|
54
|
+
linesAdded += text.length === 0 ? 0 : text.split(/\r?\n/).filter((line, index, arr) => line.length > 0 || index < arr.length - 1).length;
|
|
55
|
+
}
|
|
56
|
+
} catch {
|
|
57
|
+
// Large, binary, or unreadable untracked files still count as changed files; line count remains unknown/zero.
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
return { hashes, linesAdded };
|
|
61
|
+
} catch {
|
|
62
|
+
return { hashes: [], linesAdded: 0 };
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function excludeFilesFromPaths(root: string, paths: string[] | undefined): Set<string> {
|
|
67
|
+
const files = new Set<string>();
|
|
68
|
+
const realRoot = realpathSync(root);
|
|
69
|
+
for (const path of paths ?? []) {
|
|
70
|
+
const absolute = isAbsolute(path) ? path : resolve(root, path);
|
|
71
|
+
let rel = relative(root, absolute);
|
|
72
|
+
try {
|
|
73
|
+
rel = relative(realRoot, realpathSync(absolute));
|
|
74
|
+
} catch {
|
|
75
|
+
// Output paths may not exist yet; fall back to lexical repo-relative path.
|
|
76
|
+
}
|
|
77
|
+
if (rel && !rel.startsWith("..")) files.add(rel);
|
|
78
|
+
}
|
|
79
|
+
return files;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export function readGitDiffStats(cwd?: string, options: { excludePaths?: string[] } = {}): DiffStats {
|
|
83
|
+
if (!cwd) return EMPTY_DIFF_STATS;
|
|
84
|
+
try {
|
|
85
|
+
const root = git(cwd, ["rev-parse", "--show-toplevel"]).trim() || cwd;
|
|
86
|
+
const excludeFiles = excludeFilesFromPaths(root, options.excludePaths);
|
|
87
|
+
const untracked = untrackedFiles(root, excludeFiles);
|
|
88
|
+
let parsed: Pick<DiffStats, "filesChanged" | "linesAdded" | "linesDeleted" | "totalLines" | "fileHashes"> = EMPTY_DIFF_STATS;
|
|
89
|
+
let shortStat = "";
|
|
90
|
+
try {
|
|
91
|
+
parsed = parseNumstat(git(root, ["diff", "--numstat", "HEAD"]), excludeFiles);
|
|
92
|
+
shortStat = git(root, ["diff", "--shortstat", "HEAD"]).trim();
|
|
93
|
+
} catch {
|
|
94
|
+
// Repositories without an initial commit have no HEAD; include staged files plus untracked counts.
|
|
95
|
+
try {
|
|
96
|
+
const cachedNumstat = git(root, ["diff", "--cached", "--numstat"]);
|
|
97
|
+
const worktreeNumstat = git(root, ["diff", "--numstat"]);
|
|
98
|
+
parsed = parseNumstat(`${cachedNumstat}\n${worktreeNumstat}`, excludeFiles);
|
|
99
|
+
shortStat = `${git(root, ["diff", "--cached", "--shortstat"]).trim()} ${git(root, ["diff", "--shortstat"]).trim()}`.trim();
|
|
100
|
+
} catch {
|
|
101
|
+
// Still report untracked-file counts/hashes below.
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
const fileHashes = [...new Set([...parsed.fileHashes, ...untracked.hashes])].sort();
|
|
105
|
+
const filesChanged = Math.max(fileHashes.length, parsed.filesChanged + untracked.hashes.length);
|
|
106
|
+
const linesAdded = parsed.linesAdded + untracked.linesAdded;
|
|
107
|
+
const totalLines = linesAdded + parsed.linesDeleted;
|
|
108
|
+
if (filesChanged === 0) return EMPTY_DIFF_STATS;
|
|
109
|
+
const shortStatHash = shortStat || untracked.hashes.length ? hashText(shortStat, `untracked:${untracked.hashes.length}:${untracked.linesAdded}`) : undefined;
|
|
110
|
+
return { ...parsed, filesChanged, linesAdded, totalLines, fileHashes, shortStatHash };
|
|
111
|
+
} catch {
|
|
112
|
+
return EMPTY_DIFF_STATS;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
export function diffChurnScore(stats: DiffStats): number {
|
|
117
|
+
if (stats.totalLines <= 0) return 0;
|
|
118
|
+
return Math.max(0, Math.min(1, stats.totalLines / 1200));
|
|
119
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
|
|
3
|
+
export function hashText(...parts: string[]): string {
|
|
4
|
+
return createHash("sha256").update(parts.join("||")).digest("hex").slice(0, 16);
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
export function normalizeText(text: unknown): string {
|
|
8
|
+
return String(text ?? "")
|
|
9
|
+
.toLowerCase()
|
|
10
|
+
.replace(/https?:\/\/\S+/g, " url ")
|
|
11
|
+
.replace(/\b\d+(?:\.\d)?\b/g, " n ")
|
|
12
|
+
.replace(/\s+/g, " ")
|
|
13
|
+
.trim();
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function hashMaybe(text: unknown): string | undefined {
|
|
17
|
+
const normalized = normalizeText(text);
|
|
18
|
+
return normalized ? hashText(normalized) : undefined;
|
|
19
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
export * from "./checkpoints.js";
|
|
2
|
+
export * from "./completions.js";
|
|
3
|
+
export * from "./config.js";
|
|
4
|
+
export * from "./decision.js";
|
|
5
|
+
export * from "./dataset.js";
|
|
6
|
+
export * from "./git-features.js";
|
|
7
|
+
export * from "./hash.js";
|
|
8
|
+
export * from "./learning.js";
|
|
9
|
+
export * from "./ledger.js";
|
|
10
|
+
export * from "./observe.js";
|
|
11
|
+
export * from "./outcomes.js";
|
|
12
|
+
export * from "./progress.js";
|
|
13
|
+
export * from "./session-reader.js";
|
|
14
|
+
export * from "./subagents.js";
|
|
15
|
+
export * from "./types.js";
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
import { mkdtempSync, readFileSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { tmpdir } from "node:os";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { describe, expect, it } from "vitest";
|
|
5
|
+
import { decideRoute } from "./decision.js";
|
|
6
|
+
import { buildRouteEvent } from "./ledger.js";
|
|
7
|
+
import {
|
|
8
|
+
generateCapabilityCards,
|
|
9
|
+
generateTeacherReflection,
|
|
10
|
+
shadowEvaluate,
|
|
11
|
+
writeCapabilityCards,
|
|
12
|
+
writeShadowEval,
|
|
13
|
+
writeTeacherReflection,
|
|
14
|
+
} from "./learning.js";
|
|
15
|
+
import type { RouterCheckpoint } from "./types.js";
|
|
16
|
+
|
|
17
|
+
type CheckpointOverrides = Partial<Omit<RouterCheckpoint, "features" | "recent">> & {
|
|
18
|
+
features?: Partial<RouterCheckpoint["features"]>;
|
|
19
|
+
recent?: Partial<RouterCheckpoint["recent"]>;
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
function checkpoint(overrides: CheckpointOverrides = {}): RouterCheckpoint {
|
|
23
|
+
const base: RouterCheckpoint = {
|
|
24
|
+
schema: "pi-router.checkpoint.v1",
|
|
25
|
+
sessionId: "session-1",
|
|
26
|
+
checkpointId: "session-1:event-10",
|
|
27
|
+
createdAt: "2026-06-12T00:00:00.000Z",
|
|
28
|
+
rawSessionRef: {
|
|
29
|
+
schema: "pi-router.raw-session-ref.v1",
|
|
30
|
+
path: "/tmp/raw-session.jsonl",
|
|
31
|
+
fromEvent: 1,
|
|
32
|
+
toEvent: 10,
|
|
33
|
+
fromByte: 100,
|
|
34
|
+
toByte: 200,
|
|
35
|
+
contentHash: "hash-only",
|
|
36
|
+
},
|
|
37
|
+
harness: "pi",
|
|
38
|
+
repoHash: "repo-hash",
|
|
39
|
+
goalHash: "goal-hash",
|
|
40
|
+
phase: "debug",
|
|
41
|
+
activeModel: "local/qwen",
|
|
42
|
+
provider: "local",
|
|
43
|
+
features: {
|
|
44
|
+
turnIndex: 10,
|
|
45
|
+
sameCommandRepeatedCount: 2,
|
|
46
|
+
sameErrorRepeatedCount: 2,
|
|
47
|
+
errorChanged: false,
|
|
48
|
+
testsImproved: null,
|
|
49
|
+
filesTouched: 1,
|
|
50
|
+
diffLines: 12,
|
|
51
|
+
diffFilesChanged: 1,
|
|
52
|
+
diffLinesAdded: 8,
|
|
53
|
+
diffLinesDeleted: 4,
|
|
54
|
+
diffChurnScore: 0,
|
|
55
|
+
toolThrashScore: 0.25,
|
|
56
|
+
goalDriftScore: 0,
|
|
57
|
+
loopScore: 0.55,
|
|
58
|
+
progressScore: 0.45,
|
|
59
|
+
verifierUsed: true,
|
|
60
|
+
noVerifierUsed: false,
|
|
61
|
+
toolCallsLast10Turns: 4,
|
|
62
|
+
contextTokensApprox: 1000,
|
|
63
|
+
gitDirty: null,
|
|
64
|
+
},
|
|
65
|
+
recent: {
|
|
66
|
+
lastUserGoalHash: "goal-hash",
|
|
67
|
+
lastCommandHash: "command-hash",
|
|
68
|
+
lastErrorHash: "error-hash",
|
|
69
|
+
touchedFileHashes: ["file-hash"],
|
|
70
|
+
},
|
|
71
|
+
sourceEvent: {
|
|
72
|
+
index: 10,
|
|
73
|
+
byteStart: 100,
|
|
74
|
+
byteEnd: 200,
|
|
75
|
+
id: "event-id",
|
|
76
|
+
timestamp: "2026-06-12T00:00:01.000Z",
|
|
77
|
+
type: "message",
|
|
78
|
+
role: "toolResult",
|
|
79
|
+
},
|
|
80
|
+
};
|
|
81
|
+
return { ...base, ...overrides, features: { ...base.features, ...overrides.features }, recent: { ...base.recent, ...overrides.recent } };
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function tempFile(name: string): string {
|
|
85
|
+
return join(mkdtempSync(join(tmpdir(), "pi-router-learning-")), name);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
describe("trajectory router local learning and eval", () => {
|
|
89
|
+
it("generates local observed capability cards from route events", () => {
|
|
90
|
+
const first = checkpoint();
|
|
91
|
+
const second = checkpoint({
|
|
92
|
+
checkpointId: "session-1:event-11",
|
|
93
|
+
phase: "implementation",
|
|
94
|
+
features: { sameCommandRepeatedCount: 1, sameErrorRepeatedCount: 0, loopScore: 0.1, progressScore: 0.9, contextTokensApprox: 2000 },
|
|
95
|
+
});
|
|
96
|
+
const events = [
|
|
97
|
+
buildRouteEvent(first, decideRoute(first), "2026-06-12T00:00:02.000Z"),
|
|
98
|
+
buildRouteEvent(second, decideRoute(second), "2026-06-12T00:00:03.000Z"),
|
|
99
|
+
];
|
|
100
|
+
const cards = generateCapabilityCards(events, "2026-06-12T00:00:04.000Z", [{
|
|
101
|
+
schema: "pi-router.outcome.v1",
|
|
102
|
+
outcomeId: "outcome-1",
|
|
103
|
+
recordedAt: "2026-06-12T00:00:04.000Z",
|
|
104
|
+
sessionId: first.sessionId,
|
|
105
|
+
checkpointId: first.checkpointId,
|
|
106
|
+
taskType: "debug",
|
|
107
|
+
taskStatus: "partial",
|
|
108
|
+
testsPassedAfter: null,
|
|
109
|
+
verifierImproved: null,
|
|
110
|
+
acceptedDiff: null,
|
|
111
|
+
userInterrupted: false,
|
|
112
|
+
userOverrodeDecision: false,
|
|
113
|
+
finalFilesTouched: 1,
|
|
114
|
+
finalDiffLines: 12,
|
|
115
|
+
wallTimeMs: null,
|
|
116
|
+
cloudCostUsd: null,
|
|
117
|
+
frontierCalls: 0,
|
|
118
|
+
localTurns: 1,
|
|
119
|
+
reworkTurns: 1,
|
|
120
|
+
evidence: { source: "manual" },
|
|
121
|
+
}]);
|
|
122
|
+
|
|
123
|
+
expect(cards).toHaveLength(1);
|
|
124
|
+
expect(cards[0]).toMatchObject({
|
|
125
|
+
schema: "pi-router.model-capability-card.v1",
|
|
126
|
+
modelId: "local/qwen",
|
|
127
|
+
provider: "local",
|
|
128
|
+
seed: { source: "none" },
|
|
129
|
+
observed: {
|
|
130
|
+
source: "local Pi telemetry",
|
|
131
|
+
events: 2,
|
|
132
|
+
sessions: 1,
|
|
133
|
+
averageLoopScore: 0.325,
|
|
134
|
+
averageProgressScore: 0.675,
|
|
135
|
+
averageContextTokensApprox: 1500,
|
|
136
|
+
},
|
|
137
|
+
promotion: { manualOnly: true, promoted: false },
|
|
138
|
+
});
|
|
139
|
+
expect(cards[0].observed.actions.escalate_debug_diagnosis).toBe(1);
|
|
140
|
+
expect(cards[0].observed.actions.continue_current).toBe(1);
|
|
141
|
+
expect(cards[0].observed.outcomes).toMatchObject({ linked: 1, partial: 1, averageReworkTurns: 1 });
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
it("fails capability-card generation when required events input is missing", () => {
|
|
145
|
+
expect(() => writeCapabilityCards("/tmp/pi-router-missing-events.jsonl", tempFile("cards.jsonl"))).toThrow(/required route events file not found/);
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
it("writes capability cards as JSONL", () => {
|
|
149
|
+
const eventPath = tempFile("events.jsonl");
|
|
150
|
+
const outputPath = tempFile("cards.jsonl");
|
|
151
|
+
const item = checkpoint();
|
|
152
|
+
writeFileSync(eventPath, `${JSON.stringify(buildRouteEvent(item, decideRoute(item)))}\n`);
|
|
153
|
+
|
|
154
|
+
const cards = writeCapabilityCards(eventPath, outputPath);
|
|
155
|
+
|
|
156
|
+
expect(cards).toHaveLength(1);
|
|
157
|
+
expect(readFileSync(outputPath, "utf8")).toContain("pi-router.model-capability-card.v1");
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
it("generates teacher labels and reflection artifacts without transcript content", () => {
|
|
161
|
+
const sensitiveText = "raw command npm test with SECRET_TOKEN=abc";
|
|
162
|
+
const item = checkpoint();
|
|
163
|
+
const reflection = generateTeacherReflection([item], { teacher: "local-rule", generatedAt: "2026-06-12T00:00:05.000Z" });
|
|
164
|
+
const serialized = JSON.stringify(reflection);
|
|
165
|
+
|
|
166
|
+
expect(reflection.labels).toHaveLength(1);
|
|
167
|
+
expect(reflection.labels[0]).toMatchObject({
|
|
168
|
+
schema: "pi-router.teacher-label.v1",
|
|
169
|
+
checkpointId: item.checkpointId,
|
|
170
|
+
sessionId: item.sessionId,
|
|
171
|
+
source: "local-rule",
|
|
172
|
+
suggestedAction: "escalate_debug_diagnosis",
|
|
173
|
+
});
|
|
174
|
+
expect(reflection.markdown).toContain("Manual promotion only");
|
|
175
|
+
expect(serialized).not.toContain(sensitiveText);
|
|
176
|
+
expect(serialized).not.toContain("npm test");
|
|
177
|
+
expect(serialized).not.toContain("SECRET_TOKEN");
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
it("marks imported teacher decisions with teacher-output provenance", () => {
|
|
181
|
+
const decisionPath = tempFile("teacher-decisions.jsonl");
|
|
182
|
+
const item = checkpoint();
|
|
183
|
+
writeFileSync(decisionPath, `${JSON.stringify({ ...decideRoute(item), action: "run_verifier" })}\n`);
|
|
184
|
+
|
|
185
|
+
const reflection = generateTeacherReflection([item], { teacher: "configured-teacher", teacherOutputPath: decisionPath });
|
|
186
|
+
|
|
187
|
+
expect(reflection.labels[0]).toMatchObject({ source: "teacher-output", suggestedAction: "run_verifier", teacher: "configured-teacher" });
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
it("requires explicit teacher output for non-local teachers", () => {
|
|
191
|
+
expect(() => generateTeacherReflection([checkpoint()], { teacher: "configured-teacher" })).toThrow(/requires --teacher-output/);
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
it("writes reflection labels and markdown", () => {
|
|
195
|
+
const checkpointPath = tempFile("checkpoints.jsonl");
|
|
196
|
+
const labelsPath = tempFile("teacher-labels.jsonl");
|
|
197
|
+
const reflectionPath = tempFile("reflection.md");
|
|
198
|
+
writeFileSync(checkpointPath, `${JSON.stringify(checkpoint())}\n`);
|
|
199
|
+
|
|
200
|
+
const result = writeTeacherReflection({ checkpointPath, labelsPath, reflectionPath, teacher: "local-rule" });
|
|
201
|
+
|
|
202
|
+
expect(result.labels).toHaveLength(1);
|
|
203
|
+
expect(readFileSync(labelsPath, "utf8")).toContain("pi-router.teacher-label.v1");
|
|
204
|
+
expect(readFileSync(reflectionPath, "utf8")).toContain("Pi router teacher reflection");
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
it("shadow-evaluates policy decisions against historical ledger events", () => {
|
|
208
|
+
const item = checkpoint();
|
|
209
|
+
const actualDecision = { ...decideRoute(item), action: "continue_current" as const };
|
|
210
|
+
const report = shadowEvaluate([item], [buildRouteEvent(item, actualDecision)], "2026-06-12T00:00:06.000Z");
|
|
211
|
+
|
|
212
|
+
expect(report).toMatchObject({
|
|
213
|
+
schema: "pi-router.shadow-eval.v1",
|
|
214
|
+
checkpoints: 1,
|
|
215
|
+
comparedEvents: 1,
|
|
216
|
+
divergences: 1,
|
|
217
|
+
divergenceRate: 1,
|
|
218
|
+
manualPromotionRequired: true,
|
|
219
|
+
});
|
|
220
|
+
expect(report.actionCounts.escalate_debug_diagnosis).toBe(1);
|
|
221
|
+
expect(report.ledgerActionCounts.continue_current).toBe(1);
|
|
222
|
+
});
|
|
223
|
+
|
|
224
|
+
it("fails shadow eval when an explicit ledger path is missing", () => {
|
|
225
|
+
const checkpointPath = tempFile("checkpoints.jsonl");
|
|
226
|
+
writeFileSync(checkpointPath, `${JSON.stringify(checkpoint())}\n`);
|
|
227
|
+
|
|
228
|
+
expect(() => writeShadowEval(checkpointPath, tempFile("shadow.json"), "/tmp/pi-router-missing-ledger.jsonl")).toThrow(/required route events file not found/);
|
|
229
|
+
});
|
|
230
|
+
|
|
231
|
+
it("writes shadow eval reports", () => {
|
|
232
|
+
const checkpointPath = tempFile("checkpoints.jsonl");
|
|
233
|
+
const outputPath = tempFile("shadow.json");
|
|
234
|
+
writeFileSync(checkpointPath, `${JSON.stringify(checkpoint())}\n`);
|
|
235
|
+
|
|
236
|
+
const report = writeShadowEval(checkpointPath, outputPath);
|
|
237
|
+
|
|
238
|
+
expect(report.checkpoints).toBe(1);
|
|
239
|
+
expect(JSON.parse(readFileSync(outputPath, "utf8")).schema).toBe("pi-router.shadow-eval.v1");
|
|
240
|
+
});
|
|
241
|
+
});
|