@fiale-plus/pi-rogue 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -1
- package/node_modules/@fiale-plus/pi-rogue-advisor/README.md +1 -0
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/binary-gate-features.test.ts +8 -0
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/binary-gate-features.ts +7 -0
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/router.test.ts +26 -0
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/router.ts +10 -1
- package/node_modules/@fiale-plus/pi-rogue-orchestration/README.md +3 -3
- package/node_modules/@fiale-plus/pi-rogue-orchestration/package.json +3 -0
- package/node_modules/@fiale-plus/pi-rogue-orchestration/skills/orchestration/SKILL.md +3 -2
- package/node_modules/@fiale-plus/pi-rogue-orchestration/src/goal.test.ts +65 -2
- package/node_modules/@fiale-plus/pi-rogue-orchestration/src/goal.ts +84 -4
- package/node_modules/@fiale-plus/pi-rogue-orchestration/src/loop.ts +3 -0
- package/node_modules/@fiale-plus/pi-rogue-orchestration/src/novelty-guard.test.ts +43 -0
- package/node_modules/@fiale-plus/pi-rogue-orchestration/src/novelty-guard.ts +96 -11
- package/node_modules/@fiale-plus/pi-rogue-router/README.md +45 -6
- package/node_modules/@fiale-plus/pi-rogue-router/src/binary-gate.test.ts +88 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/binary-gate.ts +232 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/cli.ts +123 -9
- package/node_modules/@fiale-plus/pi-rogue-router/src/completions.ts +39 -16
- package/node_modules/@fiale-plus/pi-rogue-router/src/config-extension.test.ts +111 -4
- package/node_modules/@fiale-plus/pi-rogue-router/src/config.ts +17 -2
- package/node_modules/@fiale-plus/pi-rogue-router/src/extension.ts +67 -7
- package/node_modules/@fiale-plus/pi-rogue-router/src/index.ts +4 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/observe.ts +76 -5
- package/node_modules/@fiale-plus/pi-rogue-router/src/outcomes.ts +130 -6
- package/node_modules/@fiale-plus/pi-rogue-router/src/reports.test.ts +92 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/reports.ts +116 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/sharpening.test.ts +223 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/sharpening.ts +344 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/teacher-runner.test.ts +126 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/teacher-runner.ts +238 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/v1-telemetry.test.ts +54 -1
- package/package.json +1 -1
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import { mkdtempSync, readFileSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { tmpdir } from "node:os";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { describe, expect, it } from "vitest";
|
|
5
|
+
import { parseTeacherDecision, runTeacherLabeling, teacherPromptText, type TeacherModelExecutor } from "./teacher-runner.js";
|
|
6
|
+
import type { TeacherPromptRequest } from "./learning.js";
|
|
7
|
+
|
|
8
|
+
function tempFile(name: string): string {
|
|
9
|
+
return join(mkdtempSync(join(tmpdir(), "pi-router-teacher-")), name);
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
function request(overrides: Partial<TeacherPromptRequest> = {}): TeacherPromptRequest {
|
|
13
|
+
return {
|
|
14
|
+
schema: "pi-router.teacher-prompt.v1",
|
|
15
|
+
requestId: "request-1",
|
|
16
|
+
teacher: "openai-codex/gpt-5.5",
|
|
17
|
+
checkpointId: "session-1:event-1",
|
|
18
|
+
sessionId: "session-1",
|
|
19
|
+
rawSessionRef: { schema: "pi-router.raw-session-ref.v1", path: "/tmp/session.jsonl", fromEvent: 1, toEvent: 2, fromByte: 10, toByte: 20, contentHash: "hash-only" },
|
|
20
|
+
allowedActions: ["continue_current", "run_verifier", "escalate_debug_diagnosis"],
|
|
21
|
+
instruction: "Return one decision.",
|
|
22
|
+
features: {
|
|
23
|
+
phase: "debug",
|
|
24
|
+
activeModel: "qwen3.6-35b-a3b-128k",
|
|
25
|
+
provider: "local",
|
|
26
|
+
loopScore: 0.7,
|
|
27
|
+
progressScore: 0.3,
|
|
28
|
+
sameCommandRepeatedCount: 1,
|
|
29
|
+
sameErrorRepeatedCount: 2,
|
|
30
|
+
verifierUsed: false,
|
|
31
|
+
noVerifierUsed: true,
|
|
32
|
+
diffLines: 12,
|
|
33
|
+
diffFilesChanged: 2,
|
|
34
|
+
},
|
|
35
|
+
...overrides,
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function decisionJson(action = "run_verifier") {
|
|
40
|
+
return JSON.stringify({
|
|
41
|
+
schema: "pi-router.decision.v1",
|
|
42
|
+
checkpointId: "session-1:event-1",
|
|
43
|
+
action,
|
|
44
|
+
adviceShape: "none",
|
|
45
|
+
contextPolicy: "minimal",
|
|
46
|
+
confidence: 0.82,
|
|
47
|
+
reason: "teacher says verifier should run before more edits",
|
|
48
|
+
policyVersion: "teacher/openai-codex/gpt-5.5",
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
describe("router teacher label runner", () => {
|
|
53
|
+
it("builds explicit teacher prompts with only the bounded raw session span", () => {
|
|
54
|
+
const sessionPath = tempFile("session.jsonl");
|
|
55
|
+
writeFileSync(sessionPath, "0123456789bounded-span-secret-tail");
|
|
56
|
+
const prompt = teacherPromptText(request({ rawSessionRef: { schema: "pi-router.raw-session-ref.v1", path: sessionPath, fromEvent: 1, toEvent: 2, fromByte: 10, toByte: 22, contentHash: "hash-only" } }));
|
|
57
|
+
|
|
58
|
+
expect(prompt).toContain("Return exactly one JSON object");
|
|
59
|
+
expect(prompt).toContain("run_verifier");
|
|
60
|
+
expect(prompt).toContain("rawSessionRef");
|
|
61
|
+
expect(prompt).toContain("bounded-span");
|
|
62
|
+
expect(prompt).not.toContain("secret-tail");
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
it("parses and validates teacher decisions", () => {
|
|
66
|
+
const parsed = parseTeacherDecision(request(), `\n\n\`\`\`json\n${decisionJson()}\n\`\`\``);
|
|
67
|
+
|
|
68
|
+
expect(parsed).toMatchObject({ schema: "pi-router.decision.v1", checkpointId: "session-1:event-1", action: "run_verifier", policyVersion: "teacher/openai-codex/gpt-5.5/request/request-1" });
|
|
69
|
+
expect(() => parseTeacherDecision(request(), decisionJson("stop_and_ask_user"))).toThrow(/not allowed/);
|
|
70
|
+
expect(() => parseTeacherDecision(request(), JSON.stringify({
|
|
71
|
+
schema: "pi-router.decision.v1",
|
|
72
|
+
checkpointId: "session-1:event-1",
|
|
73
|
+
action: "run_verifier",
|
|
74
|
+
confidence: 0.8,
|
|
75
|
+
reason: "missing fields",
|
|
76
|
+
}))).toThrow(/adviceShape invalid/);
|
|
77
|
+
|
|
78
|
+
const withExtras = parseTeacherDecision(request(), JSON.stringify({
|
|
79
|
+
...JSON.parse(decisionJson()),
|
|
80
|
+
reason: "The transcript says \"this is a very long raw transcript quote that should not be stored in labels\" and token=secret",
|
|
81
|
+
transcriptExcerpt: "do not persist me",
|
|
82
|
+
policyVersion: "model-supplied",
|
|
83
|
+
}));
|
|
84
|
+
expect(withExtras.policyVersion).toBe("teacher/openai-codex/gpt-5.5/request/request-1");
|
|
85
|
+
expect(JSON.stringify(withExtras)).not.toContain("transcriptExcerpt");
|
|
86
|
+
expect(withExtras.reason).not.toContain("very long raw transcript quote");
|
|
87
|
+
expect(withExtras.reason).not.toContain("token=secret");
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
it("runs an injected teacher executor and writes decisions plus labels", async () => {
|
|
91
|
+
const requestsPath = tempFile("requests.jsonl");
|
|
92
|
+
const decisionsPath = tempFile("teacher-decisions.jsonl");
|
|
93
|
+
const labelsPath = tempFile("teacher-labels.jsonl");
|
|
94
|
+
writeFileSync(requestsPath, `${JSON.stringify(request())}\n`);
|
|
95
|
+
const executor: TeacherModelExecutor = ({ prompt }) => {
|
|
96
|
+
expect(prompt).toContain("session-1:event-1");
|
|
97
|
+
return decisionJson();
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
const summary = await runTeacherLabeling({
|
|
101
|
+
requestsPath,
|
|
102
|
+
decisionsOutputPath: decisionsPath,
|
|
103
|
+
labelsOutputPath: labelsPath,
|
|
104
|
+
executor,
|
|
105
|
+
generatedAt: "2026-06-14T00:00:00.000Z",
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
expect(summary).toMatchObject({ schema: "pi-router.teacher-run-summary.v1", teacher: "openai-codex/gpt-5.5", teachers: ["openai-codex/gpt-5.5"], requests: 1, decisions: 1, labels: 1, dryRun: false });
|
|
109
|
+
expect(readFileSync(decisionsPath, "utf8")).toContain("pi-router.decision.v1");
|
|
110
|
+
const label = JSON.parse(readFileSync(labelsPath, "utf8").trim());
|
|
111
|
+
expect(label).toMatchObject({ schema: "pi-router.teacher-label.v1", source: "teacher-output", suggestedAction: "run_verifier" });
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
it("supports dry-run without model calls", async () => {
|
|
115
|
+
const requestsPath = tempFile("requests.jsonl");
|
|
116
|
+
const decisionsPath = tempFile("teacher-decisions.jsonl");
|
|
117
|
+
const labelsPath = tempFile("teacher-labels.jsonl");
|
|
118
|
+
writeFileSync(requestsPath, `${JSON.stringify(request())}\n`);
|
|
119
|
+
|
|
120
|
+
const summary = await runTeacherLabeling({ requestsPath, decisionsOutputPath: decisionsPath, labelsOutputPath: labelsPath, dryRun: true });
|
|
121
|
+
|
|
122
|
+
expect(summary).toMatchObject({ requests: 1, decisions: 0, labels: 0, dryRun: true });
|
|
123
|
+
expect(readFileSync(decisionsPath, "utf8")).toBe("");
|
|
124
|
+
expect(readFileSync(labelsPath, "utf8")).toBe("");
|
|
125
|
+
});
|
|
126
|
+
});
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
import { execFileSync } from "node:child_process";
|
|
2
|
+
import { closeSync, existsSync, mkdirSync, mkdtempSync, openSync, readFileSync, readSync, rmSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import { dirname, join, resolve } from "node:path";
|
|
5
|
+
import { hashText } from "./hash.js";
|
|
6
|
+
import { TEACHER_LABEL_SCHEMA, type TeacherLabel, type TeacherPromptRequest } from "./learning.js";
|
|
7
|
+
import type { AdviceShape, ContextPolicy, RouteAction, RouteDecision } from "./types.js";
|
|
8
|
+
|
|
9
|
+
const ROUTE_ACTIONS = new Set<RouteAction>([
|
|
10
|
+
"continue_current",
|
|
11
|
+
"continue_local",
|
|
12
|
+
"summarize_context",
|
|
13
|
+
"run_verifier",
|
|
14
|
+
"ask_micro_hint",
|
|
15
|
+
"escalate_plan_critique",
|
|
16
|
+
"escalate_debug_diagnosis",
|
|
17
|
+
"escalate_diff_review",
|
|
18
|
+
"delegate_full_step",
|
|
19
|
+
"spawn_subagent",
|
|
20
|
+
"merge_subagent_result",
|
|
21
|
+
"stop_and_ask_user",
|
|
22
|
+
]);
|
|
23
|
+
const ADVICE_SHAPES = new Set<AdviceShape>(["none", "micro_hint", "plan_critique", "debug_diagnosis", "diff_review", "full_delegation"]);
|
|
24
|
+
const CONTEXT_POLICIES = new Set<ContextPolicy>(["none", "minimal", "recent_events", "focused_error_and_diff", "diff_only", "session_summary", "full_context"]);
|
|
25
|
+
|
|
26
|
+
export const TEACHER_RUN_SUMMARY_SCHEMA = "pi-router.teacher-run-summary.v1" as const;
|
|
27
|
+
|
|
28
|
+
export interface TeacherRunSummary {
|
|
29
|
+
schema: typeof TEACHER_RUN_SUMMARY_SCHEMA;
|
|
30
|
+
teacher: string;
|
|
31
|
+
teachers: string[];
|
|
32
|
+
requests: number;
|
|
33
|
+
decisions: number;
|
|
34
|
+
labels: number;
|
|
35
|
+
decisionsOutput: string;
|
|
36
|
+
labelsOutput: string;
|
|
37
|
+
dryRun: boolean;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export interface TeacherModelExecutor {
|
|
41
|
+
(input: { request: TeacherPromptRequest; prompt: string; teacher: string }): string | Promise<string>;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export function readTeacherPromptRequests(path: string): TeacherPromptRequest[] {
|
|
45
|
+
const resolved = resolve(path);
|
|
46
|
+
if (!existsSync(resolved)) throw new Error(`teacher request file not found: ${path}`);
|
|
47
|
+
return readFileSync(resolved, "utf8")
|
|
48
|
+
.split("\n")
|
|
49
|
+
.filter((line) => line.trim())
|
|
50
|
+
.map((line) => JSON.parse(line) as TeacherPromptRequest);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function writeJsonl(path: string, rows: unknown[]): void {
|
|
54
|
+
const resolved = resolve(path);
|
|
55
|
+
mkdirSync(dirname(resolved), { recursive: true });
|
|
56
|
+
writeFileSync(resolved, rows.map((row) => JSON.stringify(row)).join("\n") + (rows.length ? "\n" : ""));
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function readRawSessionSpan(request: TeacherPromptRequest, maxBytes = 20_000): { text: string; truncated: boolean } | null {
|
|
60
|
+
const { path, fromByte, toByte } = request.rawSessionRef;
|
|
61
|
+
const spanBytes = Math.max(0, toByte - fromByte);
|
|
62
|
+
const length = Math.min(maxBytes, spanBytes);
|
|
63
|
+
if (!path || length <= 0) return null;
|
|
64
|
+
const truncated = spanBytes > maxBytes;
|
|
65
|
+
const offset = truncated ? Math.max(fromByte, toByte - length) : fromByte;
|
|
66
|
+
let fd: number | undefined;
|
|
67
|
+
try {
|
|
68
|
+
fd = openSync(resolve(path), "r");
|
|
69
|
+
const buffer = Buffer.alloc(length);
|
|
70
|
+
const bytes = readSync(fd, buffer, 0, length, offset);
|
|
71
|
+
return { text: buffer.subarray(0, bytes).toString("utf8"), truncated };
|
|
72
|
+
} catch {
|
|
73
|
+
return null;
|
|
74
|
+
} finally {
|
|
75
|
+
if (fd !== undefined) closeSync(fd);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export function teacherPromptText(request: TeacherPromptRequest): string {
|
|
80
|
+
const span = readRawSessionSpan(request);
|
|
81
|
+
return [
|
|
82
|
+
"You are labeling a Pi router checkpoint for model routing.",
|
|
83
|
+
"Return exactly one JSON object matching pi-router.decision.v1 and no markdown.",
|
|
84
|
+
"Use the bounded raw session span as evidence, but do not quote transcript text in the reason; summarize evidence only.",
|
|
85
|
+
`Allowed actions: ${request.allowedActions.join(", ")}`,
|
|
86
|
+
request.instruction,
|
|
87
|
+
"Request:",
|
|
88
|
+
JSON.stringify(request, null, 2),
|
|
89
|
+
"Bounded raw session span (not persisted by the router; do not quote it in output):",
|
|
90
|
+
span ? `${span.text}${span.truncated ? "\n[truncated]" : ""}` : "[unavailable]",
|
|
91
|
+
].join("\n\n");
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function extractJsonObject(text: string): unknown {
|
|
95
|
+
const trimmed = text.trim();
|
|
96
|
+
if (trimmed.startsWith("{")) {
|
|
97
|
+
try {
|
|
98
|
+
return JSON.parse(trimmed);
|
|
99
|
+
} catch {
|
|
100
|
+
// Fall through to object slicing; models often append prose after a valid JSON object.
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
const fenced = trimmed.match(/```(?:json)?\s*([\s\S]*?)\s*```/i);
|
|
104
|
+
if (fenced) return JSON.parse(fenced[1]);
|
|
105
|
+
const start = trimmed.indexOf("{");
|
|
106
|
+
const end = trimmed.lastIndexOf("}");
|
|
107
|
+
if (start >= 0 && end > start) return JSON.parse(trimmed.slice(start, end + 1));
|
|
108
|
+
throw new Error("teacher response did not contain a JSON object");
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function teacherPolicyVersion(request: TeacherPromptRequest): string {
|
|
112
|
+
return `teacher/${request.teacher}/request/${request.requestId}`.replace(/\s+/g, "-");
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function sanitizeRationale(text: string): string {
|
|
116
|
+
// Do not persist free-form teacher rationale: the prompt includes a raw session span,
|
|
117
|
+
// so even unquoted excerpts would violate the router's derived-artifact privacy rule.
|
|
118
|
+
return `teacher rationale redacted; rationaleHash=${hashText(text)}`;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
export function parseTeacherDecision(request: TeacherPromptRequest, text: string): RouteDecision {
|
|
122
|
+
const value = extractJsonObject(text) as Partial<RouteDecision>;
|
|
123
|
+
if (value.schema !== "pi-router.decision.v1") throw new Error(`teacher decision has invalid schema for ${request.checkpointId}`);
|
|
124
|
+
if (value.checkpointId !== request.checkpointId) throw new Error(`teacher decision checkpoint mismatch for ${request.checkpointId}`);
|
|
125
|
+
const allowedActions = request.allowedActions.filter((action): action is RouteAction => ROUTE_ACTIONS.has(action));
|
|
126
|
+
if (!value.action || !ROUTE_ACTIONS.has(value.action) || !allowedActions.includes(value.action)) throw new Error(`teacher decision action not allowed for ${request.checkpointId}: ${String(value.action)}`);
|
|
127
|
+
if (!value.adviceShape || !ADVICE_SHAPES.has(value.adviceShape)) throw new Error(`teacher decision adviceShape invalid for ${request.checkpointId}`);
|
|
128
|
+
if (!value.contextPolicy || !CONTEXT_POLICIES.has(value.contextPolicy)) throw new Error(`teacher decision contextPolicy invalid for ${request.checkpointId}`);
|
|
129
|
+
if (typeof value.confidence !== "number" || value.confidence < 0 || value.confidence > 1) throw new Error(`teacher decision confidence invalid for ${request.checkpointId}`);
|
|
130
|
+
if (typeof value.reason !== "string" || !value.reason.trim()) throw new Error(`teacher decision missing reason for ${request.checkpointId}`);
|
|
131
|
+
return {
|
|
132
|
+
schema: "pi-router.decision.v1",
|
|
133
|
+
checkpointId: request.checkpointId,
|
|
134
|
+
action: value.action,
|
|
135
|
+
adviceShape: value.adviceShape,
|
|
136
|
+
contextPolicy: value.contextPolicy,
|
|
137
|
+
confidence: Number(value.confidence.toFixed(3)),
|
|
138
|
+
reason: sanitizeRationale(value.reason),
|
|
139
|
+
policyVersion: teacherPolicyVersion(request),
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
export function labelFromTeacherDecision(request: TeacherPromptRequest, decision: RouteDecision, generatedAt: string): TeacherLabel {
|
|
144
|
+
return {
|
|
145
|
+
schema: TEACHER_LABEL_SCHEMA,
|
|
146
|
+
labelId: hashText("teacher-label", request.teacher, request.requestId, decision.action, request.rawSessionRef.contentHash),
|
|
147
|
+
generatedAt,
|
|
148
|
+
teacher: request.teacher,
|
|
149
|
+
checkpointId: request.checkpointId,
|
|
150
|
+
sessionId: request.sessionId,
|
|
151
|
+
rawSessionRef: request.rawSessionRef,
|
|
152
|
+
suggestedAction: decision.action,
|
|
153
|
+
confidence: decision.confidence,
|
|
154
|
+
rationale: decision.reason,
|
|
155
|
+
source: "teacher-output",
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
export function defaultPiTeacherExecutor(input: { request: TeacherPromptRequest; prompt: string; teacher: string }): string {
|
|
160
|
+
const dir = mkdtempSync(join(tmpdir(), "pi-router-teacher-"));
|
|
161
|
+
const promptPath = join(dir, "prompt.md");
|
|
162
|
+
try {
|
|
163
|
+
writeFileSync(promptPath, input.prompt, { mode: 0o600 });
|
|
164
|
+
return execFileSync("pi", [
|
|
165
|
+
"-p",
|
|
166
|
+
"--no-session",
|
|
167
|
+
"--no-tools",
|
|
168
|
+
"--no-context-files",
|
|
169
|
+
"--no-extensions",
|
|
170
|
+
"--no-skills",
|
|
171
|
+
"--no-prompt-templates",
|
|
172
|
+
"--model",
|
|
173
|
+
input.teacher,
|
|
174
|
+
`@${promptPath}`,
|
|
175
|
+
], {
|
|
176
|
+
encoding: "utf8",
|
|
177
|
+
maxBuffer: 1024 * 1024,
|
|
178
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
179
|
+
});
|
|
180
|
+
} finally {
|
|
181
|
+
rmSync(dir, { recursive: true, force: true });
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
export async function runTeacherLabeling(options: {
|
|
186
|
+
requestsPath: string;
|
|
187
|
+
decisionsOutputPath: string;
|
|
188
|
+
labelsOutputPath: string;
|
|
189
|
+
teacher?: string;
|
|
190
|
+
dryRun?: boolean;
|
|
191
|
+
generatedAt?: string;
|
|
192
|
+
executor?: TeacherModelExecutor;
|
|
193
|
+
}): Promise<TeacherRunSummary> {
|
|
194
|
+
const requests = readTeacherPromptRequests(options.requestsPath).map((request) => options.teacher ? { ...request, teacher: options.teacher } : request);
|
|
195
|
+
const teachers = [...new Set(requests.map((request) => request.teacher))].sort();
|
|
196
|
+
const teacher = teachers.length === 1 ? teachers[0] : teachers.length > 1 ? "mixed" : options.teacher ?? "openai-codex/gpt-5.5";
|
|
197
|
+
const executor = options.executor ?? defaultPiTeacherExecutor;
|
|
198
|
+
const generatedAt = options.generatedAt ?? new Date().toISOString();
|
|
199
|
+
|
|
200
|
+
if (options.dryRun) {
|
|
201
|
+
writeJsonl(options.decisionsOutputPath, []);
|
|
202
|
+
writeJsonl(options.labelsOutputPath, []);
|
|
203
|
+
return {
|
|
204
|
+
schema: TEACHER_RUN_SUMMARY_SCHEMA,
|
|
205
|
+
teacher,
|
|
206
|
+
teachers,
|
|
207
|
+
requests: requests.length,
|
|
208
|
+
decisions: 0,
|
|
209
|
+
labels: 0,
|
|
210
|
+
decisionsOutput: resolve(options.decisionsOutputPath),
|
|
211
|
+
labelsOutput: resolve(options.labelsOutputPath),
|
|
212
|
+
dryRun: true,
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
const decisions: RouteDecision[] = [];
|
|
217
|
+
const labels: TeacherLabel[] = [];
|
|
218
|
+
for (const request of requests) {
|
|
219
|
+
const prompt = teacherPromptText(request);
|
|
220
|
+
const response = await executor({ request, prompt, teacher: request.teacher });
|
|
221
|
+
const decision = parseTeacherDecision(request, response);
|
|
222
|
+
decisions.push(decision);
|
|
223
|
+
labels.push(labelFromTeacherDecision(request, decision, generatedAt));
|
|
224
|
+
}
|
|
225
|
+
writeJsonl(options.decisionsOutputPath, decisions);
|
|
226
|
+
writeJsonl(options.labelsOutputPath, labels);
|
|
227
|
+
return {
|
|
228
|
+
schema: TEACHER_RUN_SUMMARY_SCHEMA,
|
|
229
|
+
teacher,
|
|
230
|
+
teachers,
|
|
231
|
+
requests: requests.length,
|
|
232
|
+
decisions: decisions.length,
|
|
233
|
+
labels: labels.length,
|
|
234
|
+
decisionsOutput: resolve(options.decisionsOutputPath),
|
|
235
|
+
labelsOutput: resolve(options.labelsOutputPath),
|
|
236
|
+
dryRun: false,
|
|
237
|
+
};
|
|
238
|
+
}
|
|
@@ -9,7 +9,7 @@ import { decideRoute } from "./decision.js";
|
|
|
9
9
|
import { buildRouteEvent } from "./ledger.js";
|
|
10
10
|
import { readGitDiffStats } from "./git-features.js";
|
|
11
11
|
import { generateTeacherPromptRequests } from "./learning.js";
|
|
12
|
-
import { buildUnknownOutcome, inferOutcomes, writeInferredOutcomes } from "./outcomes.js";
|
|
12
|
+
import { buildUnknownOutcome, enrichOutcome, inferOutcomes, writeEnrichedOutcomes, writeInferredOutcomes } from "./outcomes.js";
|
|
13
13
|
import { buildSubagentLedgerEvent, recommendSubagentDecision } from "./subagents.js";
|
|
14
14
|
import type { RouterCheckpoint } from "./types.js";
|
|
15
15
|
|
|
@@ -152,6 +152,59 @@ describe("router v1 outcome and feature telemetry", () => {
|
|
|
152
152
|
expect(JSON.stringify(outcome)).not.toContain("Error: boom");
|
|
153
153
|
});
|
|
154
154
|
|
|
155
|
+
it("enriches outcome skeletons from checkpoint and route-event evidence", () => {
|
|
156
|
+
const item = checkpoint({ features: { verifierUsed: true, testsImproved: true, progressScore: 0.9, loopScore: 0.1, diffLines: 44, diffFilesChanged: 3, sameErrorRepeatedCount: 1 } });
|
|
157
|
+
const event = buildRouteEvent(item, decideRoute(item));
|
|
158
|
+
const outcome = buildUnknownOutcome(event, item);
|
|
159
|
+
|
|
160
|
+
const enriched = enrichOutcome({ ...outcome, taskStatus: "unknown", testsPassedAfter: true, verifierImproved: null, acceptedDiff: null }, { checkpoint: item, event, recordedAt: "2026-06-14T00:00:00.000Z" });
|
|
161
|
+
|
|
162
|
+
expect(enriched).toMatchObject({
|
|
163
|
+
taskStatus: "success",
|
|
164
|
+
testsPassedAfter: true,
|
|
165
|
+
verifierImproved: true,
|
|
166
|
+
acceptedDiff: true,
|
|
167
|
+
finalFilesTouched: 3,
|
|
168
|
+
finalDiffLines: 44,
|
|
169
|
+
});
|
|
170
|
+
expect(enriched.evidence.notesHash).toBeTruthy();
|
|
171
|
+
expect(JSON.stringify(enriched)).not.toContain("npm test");
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
it("writes enriched outcomes from explicit inputs", () => {
|
|
175
|
+
const item = checkpoint({ features: { verifierUsed: true, testsImproved: false, progressScore: 0.2, loopScore: 0.8, sameErrorRepeatedCount: 3 } });
|
|
176
|
+
const event = buildRouteEvent(item, decideRoute(item));
|
|
177
|
+
const checkpointPath = tempFile("checkpoints.jsonl");
|
|
178
|
+
const eventsPath = tempFile("events.jsonl");
|
|
179
|
+
const outcomesPath = tempFile("outcomes.jsonl");
|
|
180
|
+
const outputPath = tempFile("outcomes.enriched.jsonl");
|
|
181
|
+
writeFileSync(checkpointPath, `${JSON.stringify(item)}\n`);
|
|
182
|
+
writeFileSync(eventsPath, `${JSON.stringify(event)}\n`);
|
|
183
|
+
writeFileSync(outcomesPath, `${JSON.stringify({ ...buildUnknownOutcome(event, item), testsPassedAfter: false })}\n`);
|
|
184
|
+
|
|
185
|
+
const summary = writeEnrichedOutcomes({ outcomesPath, checkpointPath, eventsPath, outputPath });
|
|
186
|
+
const enriched = JSON.parse(readFileSync(outputPath, "utf8").trim());
|
|
187
|
+
|
|
188
|
+
expect(summary).toMatchObject({ schema: "pi-router.outcome-enrich-summary.v1", inputOutcomes: 1, outputOutcomes: 1, enriched: 1 });
|
|
189
|
+
expect(enriched).toMatchObject({ testsPassedAfter: false, verifierImproved: false, taskStatus: "failed" });
|
|
190
|
+
const eventOnlyOutput = tempFile("outcomes.event-only.jsonl");
|
|
191
|
+
writeEnrichedOutcomes({ outcomesPath, eventsPath, outputPath: eventOnlyOutput });
|
|
192
|
+
expect(JSON.parse(readFileSync(eventOnlyOutput, "utf8").trim())).toMatchObject({ finalDiffLines: event.metrics.diffLines, finalFilesTouched: event.metrics.diffFilesChanged, reworkTurns: 2 });
|
|
193
|
+
expect(writeEnrichedOutcomes({ outcomesPath: outputPath, checkpointPath, eventsPath, outputPath: tempFile("outcomes.enriched.again.jsonl") }).enriched).toBe(0);
|
|
194
|
+
const manualOutput = tempFile("outcomes.manual.enriched.jsonl");
|
|
195
|
+
writeFileSync(outcomesPath, `${JSON.stringify({ ...buildUnknownOutcome(event, item), evidence: { source: "manual", notesHash: "manual-notes-hash" } })}\n`);
|
|
196
|
+
writeEnrichedOutcomes({ outcomesPath, checkpointPath, eventsPath, outputPath: manualOutput });
|
|
197
|
+
expect(JSON.parse(readFileSync(manualOutput, "utf8").trim()).evidence.notesHash).toBe("manual-notes-hash");
|
|
198
|
+
const wrongEventsPath = tempFile("wrong-events.jsonl");
|
|
199
|
+
writeFileSync(wrongEventsPath, `${JSON.stringify({ ...event, checkpointId: "other-checkpoint" })}\n`);
|
|
200
|
+
expect(() => writeEnrichedOutcomes({ outcomesPath, eventsPath: wrongEventsPath, outputPath: tempFile("bad-wrong-events.jsonl") })).toThrow(/outcome routeEventId\/checkpointId mismatch/);
|
|
201
|
+
const emptyEvents = tempFile("empty-events.jsonl");
|
|
202
|
+
writeFileSync(emptyEvents, "");
|
|
203
|
+
expect(() => writeEnrichedOutcomes({ outcomesPath, eventsPath: emptyEvents, outputPath: tempFile("bad-empty-evidence.jsonl") })).toThrow(/contains no events/);
|
|
204
|
+
expect(() => writeEnrichedOutcomes({ outcomesPath, outputPath: tempFile("bad-no-evidence.jsonl") })).toThrow(/requires --checkpoint-file or --events/);
|
|
205
|
+
expect(() => writeEnrichedOutcomes({ outcomesPath, eventsPath: join(tmpdir(), "missing-events.jsonl"), outputPath: tempFile("bad-enriched.jsonl") })).toThrow(/route events file not found/);
|
|
206
|
+
});
|
|
207
|
+
|
|
155
208
|
it("writes inferred outcomes from checkpoint and route-event files", () => {
|
|
156
209
|
const item = checkpoint();
|
|
157
210
|
const checkpointPath = tempFile("checkpoints.jsonl");
|