@fiale-plus/pi-rogue-bundle 0.1.13 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/node_modules/@fiale-plus/pi-core/README.md +12 -0
- package/node_modules/@fiale-plus/pi-core/package.json +25 -0
- package/node_modules/@fiale-plus/pi-core/src/context-broker.test.ts +216 -0
- package/node_modules/@fiale-plus/pi-core/src/context-broker.ts +308 -0
- package/node_modules/@fiale-plus/pi-core/src/index.ts +5 -0
- package/node_modules/@fiale-plus/pi-core/src/paths.ts +36 -0
- package/node_modules/@fiale-plus/pi-core/src/risk.test.ts +129 -0
- package/node_modules/@fiale-plus/pi-core/src/risk.ts +97 -0
- package/node_modules/@fiale-plus/pi-core/src/storage.ts +39 -0
- package/node_modules/@fiale-plus/pi-core/src/text.test.ts +36 -0
- package/node_modules/@fiale-plus/pi-core/src/text.ts +14 -0
- package/node_modules/@fiale-plus/pi-rogue-advisor/assets/binary-gate-model.json +23399 -23399
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/binary-gate-features.test.ts +19 -0
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/binary-gate-features.ts +248 -0
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/binary-gate.test.ts +66 -0
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/extension.ts +53 -12
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/internal.ts +16 -1
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/loop-convergence.test.ts +7 -0
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/router.ts +4 -37
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/state-versioning.test.ts +227 -0
- package/package.json +4 -2
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { extractBinaryGateFeatureCounts } from "./binary-gate-features.js";
|
|
3
|
+
|
|
4
|
+
describe("binary gate feature extraction", () => {
|
|
5
|
+
it("emits shared lexical and routing cue features", () => {
|
|
6
|
+
const features = extractBinaryGateFeatureCounts("review the auth migration diff before production deploy?");
|
|
7
|
+
|
|
8
|
+
expect(features.get("cue:question_mark")).toBe(1);
|
|
9
|
+
expect(features.get("cue:question_punct")).toBe(1);
|
|
10
|
+
expect(features.get("cue:imperative")).toBe(1);
|
|
11
|
+
expect(features.get("len_bucket:medium")).toBe(1);
|
|
12
|
+
expect(features.get("complex:auth")).toBe(1);
|
|
13
|
+
expect(features.get("complex:migration")).toBe(1);
|
|
14
|
+
expect(features.get("review:review")).toBe(1);
|
|
15
|
+
expect(features.get("review:diff")).toBe(1);
|
|
16
|
+
expect(features.get("safety:production")).toBe(1);
|
|
17
|
+
expect(features.get("safety:deploy")).toBe(1);
|
|
18
|
+
});
|
|
19
|
+
});
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
const NORMALIZE_REPLACEMENT_PATTERNS = [
|
|
2
|
+
[/https?:\/\/\S+/g, " url "],
|
|
3
|
+
[/[^a-z0-9\s']/g, " "],
|
|
4
|
+
] as const;
|
|
5
|
+
|
|
6
|
+
function normalizeBinaryGateText(text: string): string {
|
|
7
|
+
return String(text ?? "")
|
|
8
|
+
.toLowerCase()
|
|
9
|
+
.replace(NORMALIZE_REPLACEMENT_PATTERNS[0]![0], NORMALIZE_REPLACEMENT_PATTERNS[0]![1])
|
|
10
|
+
.replace(NORMALIZE_REPLACEMENT_PATTERNS[1]![0], NORMALIZE_REPLACEMENT_PATTERNS[1]![1])
|
|
11
|
+
.replace(/\s+/g, " ")
|
|
12
|
+
.trim();
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function normalizeBinaryGateTokens(text: string): string[] {
|
|
16
|
+
const norm = normalizeBinaryGateText(text);
|
|
17
|
+
return norm ? norm.split(" ").filter(Boolean) : [];
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function replaceSpaces(value: string): string {
|
|
21
|
+
return value.replace(/\s+/g, "_");
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function inc(map: Map<string, number>, key: string, by = 1): void {
|
|
25
|
+
map.set(key, (map.get(key) || 0) + by);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export function extractBinaryGateFeatureCounts(text: string): Map<string, number> {
|
|
29
|
+
const counts = new Map<string, number>();
|
|
30
|
+
const toks = normalizeBinaryGateTokens(text);
|
|
31
|
+
const lower = normalizeBinaryGateText(text);
|
|
32
|
+
|
|
33
|
+
for (const n of [1, 2]) {
|
|
34
|
+
if (toks.length >= n) {
|
|
35
|
+
for (let i = 0; i <= toks.length - n; i++) {
|
|
36
|
+
inc(counts, `w${n}:${toks.slice(i, i + n).join("_")}`);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const norm = ` ${lower} `;
|
|
42
|
+
for (const n of [3, 4]) {
|
|
43
|
+
if (norm.length >= n) {
|
|
44
|
+
for (let i = 0; i <= norm.length - n; i++) {
|
|
45
|
+
const g = norm.slice(i, i + n);
|
|
46
|
+
if (!/^\s+$/.test(g)) {
|
|
47
|
+
inc(counts, `c${n}:${g}`);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
if (toks.length > 0) inc(counts, `pref1:${toks[0]}`);
|
|
54
|
+
if (toks.length > 1) inc(counts, `pref2:${toks.slice(0, 2).join("_")}`);
|
|
55
|
+
if (toks.length > 2) inc(counts, `pref3:${toks.slice(0, 3).join("_")}`);
|
|
56
|
+
|
|
57
|
+
if (text.includes("?")) inc(counts, "cue:question_mark");
|
|
58
|
+
|
|
59
|
+
if (toks.length > 0) {
|
|
60
|
+
inc(counts, `len_bucket:${toks.length <= 3 ? "short" : toks.length <= 8 ? "medium" : "long"}`);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if (/[\?\!]/.test(text)) {
|
|
64
|
+
inc(counts, "cue:question_punct");
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const imperative = /^(create|add|make|change|write|fix|update|remove|delete|run|install|set|build|deploy|check|investigate|debug|review|test|refactor|merge|close|open|start|stop|continue|show|list|compact|setup|implement|build|write|create|add|make|refactor|rename|extract|migrate|patch)/i.test(text.trim());
|
|
68
|
+
if (imperative) {
|
|
69
|
+
inc(counts, "cue:imperative");
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const safetyWords = [
|
|
73
|
+
"rm -rf",
|
|
74
|
+
"sudo",
|
|
75
|
+
"shutdown",
|
|
76
|
+
"reboot",
|
|
77
|
+
"mkfs",
|
|
78
|
+
"chmod -R",
|
|
79
|
+
"chown",
|
|
80
|
+
"git push --force",
|
|
81
|
+
"curl | sh",
|
|
82
|
+
"wget | sh",
|
|
83
|
+
"drop table",
|
|
84
|
+
"delete database",
|
|
85
|
+
"secret",
|
|
86
|
+
"token",
|
|
87
|
+
"credential",
|
|
88
|
+
"password",
|
|
89
|
+
"prod",
|
|
90
|
+
"production",
|
|
91
|
+
"deploy",
|
|
92
|
+
"deploying",
|
|
93
|
+
];
|
|
94
|
+
for (const safetyWord of safetyWords) {
|
|
95
|
+
if (lower.includes(safetyWord)) {
|
|
96
|
+
inc(counts, `safety:${replaceSpaces(safetyWord)}`);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
const complexityWords = [
|
|
101
|
+
"architecture",
|
|
102
|
+
"refactor",
|
|
103
|
+
"design",
|
|
104
|
+
"tradeoff",
|
|
105
|
+
"security",
|
|
106
|
+
"auth",
|
|
107
|
+
"migration",
|
|
108
|
+
"performance",
|
|
109
|
+
"scale",
|
|
110
|
+
"scalability",
|
|
111
|
+
"framework",
|
|
112
|
+
"system design",
|
|
113
|
+
"schema",
|
|
114
|
+
"data model",
|
|
115
|
+
"protocol",
|
|
116
|
+
"advisor routing",
|
|
117
|
+
"advisor flow",
|
|
118
|
+
"router logic",
|
|
119
|
+
"call vs skip",
|
|
120
|
+
"skip vs call",
|
|
121
|
+
"compare",
|
|
122
|
+
"recommend",
|
|
123
|
+
"benchmark",
|
|
124
|
+
"evaluate",
|
|
125
|
+
"experiment",
|
|
126
|
+
"train",
|
|
127
|
+
"strategy",
|
|
128
|
+
"choose",
|
|
129
|
+
"make sense",
|
|
130
|
+
"worth",
|
|
131
|
+
"kpi",
|
|
132
|
+
"kpis",
|
|
133
|
+
"how it works",
|
|
134
|
+
"where it comes from",
|
|
135
|
+
"what would you choose",
|
|
136
|
+
"what do you think",
|
|
137
|
+
"next step",
|
|
138
|
+
"pick between",
|
|
139
|
+
"buy",
|
|
140
|
+
"usage",
|
|
141
|
+
"sustained speed",
|
|
142
|
+
"available models",
|
|
143
|
+
"running model kpis",
|
|
144
|
+
];
|
|
145
|
+
let complexityCount = 0;
|
|
146
|
+
for (const complexityWord of complexityWords) {
|
|
147
|
+
if (lower.includes(complexityWord)) {
|
|
148
|
+
complexityCount++;
|
|
149
|
+
inc(counts, `complex:${replaceSpaces(complexityWord)}`);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
if (complexityCount > 0) {
|
|
153
|
+
inc(counts, `complex_count:${complexityCount}`);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
const debugWords = ["debug", "bug", "error", "stack trace", "traceback", "fail", "broken", "investigate", "why is", "cannot", "can't", "crash", "regression"];
|
|
157
|
+
for (const debugWord of debugWords) {
|
|
158
|
+
if (lower.includes(debugWord)) {
|
|
159
|
+
inc(counts, `debug:${replaceSpaces(debugWord)}`);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const contextWords = ["need more context", "missing context", "clarify", "not enough info", "unspecified", "unknown", "ambiguous"];
|
|
164
|
+
for (const contextWord of contextWords) {
|
|
165
|
+
if (lower.includes(contextWord)) {
|
|
166
|
+
inc(counts, `context:${replaceSpaces(contextWord)}`);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const reviewWords = ["review", "check", "verify", "validate", "diff", "pr", "pull request", "feedback"];
|
|
171
|
+
for (const reviewWord of reviewWords) {
|
|
172
|
+
if (lower.includes(reviewWord)) {
|
|
173
|
+
inc(counts, `review:${replaceSpaces(reviewWord)}`);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
const doneWords = ["done", "complete", "fixed", "implemented", "works", "passing tests", "tests pass", "verified", "looks good", "merged"];
|
|
178
|
+
for (const doneWord of doneWords) {
|
|
179
|
+
if (lower.includes(doneWord)) {
|
|
180
|
+
inc(counts, `done:${replaceSpaces(doneWord)}`);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
const checkinWords = ["check-in", "checkin", "mid-hour", "alignment", "progress", "status", "stats", "log", "logs"];
|
|
185
|
+
for (const checkinWord of checkinWords) {
|
|
186
|
+
if (lower.includes(checkinWord)) {
|
|
187
|
+
inc(counts, `checkin:${replaceSpaces(checkinWord)}`);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
const cues = [
|
|
192
|
+
"check",
|
|
193
|
+
"why",
|
|
194
|
+
"what",
|
|
195
|
+
"how",
|
|
196
|
+
"should",
|
|
197
|
+
"status",
|
|
198
|
+
"stats",
|
|
199
|
+
"log",
|
|
200
|
+
"logs",
|
|
201
|
+
"review",
|
|
202
|
+
"diff",
|
|
203
|
+
"pr",
|
|
204
|
+
"build",
|
|
205
|
+
"run",
|
|
206
|
+
"test",
|
|
207
|
+
"deploy",
|
|
208
|
+
"fix",
|
|
209
|
+
"debug",
|
|
210
|
+
"install",
|
|
211
|
+
"configure",
|
|
212
|
+
"plan",
|
|
213
|
+
"continue",
|
|
214
|
+
"resume",
|
|
215
|
+
"compact",
|
|
216
|
+
"research",
|
|
217
|
+
"update",
|
|
218
|
+
"patch",
|
|
219
|
+
"cleanup",
|
|
220
|
+
"remove",
|
|
221
|
+
];
|
|
222
|
+
const multi = [
|
|
223
|
+
"what is",
|
|
224
|
+
"what's",
|
|
225
|
+
"safe to use",
|
|
226
|
+
"pull request",
|
|
227
|
+
"model family",
|
|
228
|
+
"how does",
|
|
229
|
+
"next step",
|
|
230
|
+
"path forward",
|
|
231
|
+
"should we",
|
|
232
|
+
"what should",
|
|
233
|
+
];
|
|
234
|
+
|
|
235
|
+
const tokenSet = new Set(toks);
|
|
236
|
+
for (const cue of cues) {
|
|
237
|
+
if (tokenSet.has(cue)) {
|
|
238
|
+
inc(counts, `cue:${cue}`);
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
for (const cue of multi) {
|
|
242
|
+
if (lower.includes(cue)) {
|
|
243
|
+
inc(counts, `cue:${replaceSpaces(cue)}`);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
return counts;
|
|
248
|
+
}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { binaryGatePredict } from "./router.js";
|
|
3
|
+
|
|
4
|
+
describe("binary gate model", () => {
|
|
5
|
+
it("returns a decision when model is available", () => {
|
|
6
|
+
const result = binaryGatePredict("test");
|
|
7
|
+
if (result) {
|
|
8
|
+
expect(["continue", "escalate"]).toContain(result.decision);
|
|
9
|
+
expect(result.confidence).toBeGreaterThanOrEqual(0);
|
|
10
|
+
expect(result.confidence).toBeLessThanOrEqual(1);
|
|
11
|
+
}
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
it("classifies short prompts and returns a valid decision", () => {
|
|
15
|
+
const result = binaryGatePredict("fix typo");
|
|
16
|
+
if (result) {
|
|
17
|
+
expect(["continue", "escalate"]).toContain(result.decision);
|
|
18
|
+
expect(result.confidence).toBeGreaterThan(0.5);
|
|
19
|
+
}
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
it("handles empty text gracefully", () => {
|
|
23
|
+
const result = binaryGatePredict("");
|
|
24
|
+
if (result) {
|
|
25
|
+
expect(["continue", "escalate"]).toContain(result.decision);
|
|
26
|
+
expect(result.confidence).toBeGreaterThanOrEqual(0);
|
|
27
|
+
expect(result.confidence).toBeLessThanOrEqual(1);
|
|
28
|
+
}
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
it("handles very long text without crashing", () => {
|
|
32
|
+
const longText = "a".repeat(10000);
|
|
33
|
+
const result = binaryGatePredict(longText);
|
|
34
|
+
if (result) {
|
|
35
|
+
expect(result.confidence).toBeGreaterThanOrEqual(0);
|
|
36
|
+
expect(result.confidence).toBeLessThanOrEqual(1);
|
|
37
|
+
}
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
it("handles unicode text", () => {
|
|
41
|
+
const result = binaryGatePredict("Привет мир 你好世界 مرحبا بالعالم");
|
|
42
|
+
if (result) {
|
|
43
|
+
expect(["continue", "escalate"]).toContain(result.decision);
|
|
44
|
+
expect(result.confidence).toBeGreaterThanOrEqual(0);
|
|
45
|
+
expect(result.confidence).toBeLessThanOrEqual(1);
|
|
46
|
+
}
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it("handles special characters and potential injection", () => {
|
|
50
|
+
const result = binaryGatePredict("fix <script>alert('xss')</script> && rm -rf /");
|
|
51
|
+
if (result) {
|
|
52
|
+
expect(["continue", "escalate"]).toContain(result.decision);
|
|
53
|
+
expect(result.confidence).toBeGreaterThanOrEqual(0);
|
|
54
|
+
expect(result.confidence).toBeLessThanOrEqual(1);
|
|
55
|
+
}
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
it("handles URLs", () => {
|
|
59
|
+
const result = binaryGatePredict("check https://example.com/path?query=value&foo=bar");
|
|
60
|
+
if (result) {
|
|
61
|
+
expect(["continue", "escalate"]).toContain(result.decision);
|
|
62
|
+
expect(result.confidence).toBeGreaterThanOrEqual(0);
|
|
63
|
+
expect(result.confidence).toBeLessThanOrEqual(1);
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
});
|
|
@@ -5,7 +5,7 @@ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
|
5
5
|
import { Box, Text } from "@earendil-works/pi-tui";
|
|
6
6
|
import { completeSimple, type ThinkingLevel } from "@earendil-works/pi-ai";
|
|
7
7
|
import { Type } from "typebox";
|
|
8
|
-
import { featureFile, readText, truncate, writeText } from "./internal.js";
|
|
8
|
+
import { featureFile, readText, truncate, writeText, atomicWriteText } from "./internal.js";
|
|
9
9
|
import { advisorArgumentCompletions, piRogueArgumentCompletions } from "./completions.js";
|
|
10
10
|
import {
|
|
11
11
|
appendRouteLog,
|
|
@@ -58,6 +58,7 @@ const MAX_FILES = 8;
|
|
|
58
58
|
const MAX_ERRORS = 5;
|
|
59
59
|
const MIN_CHECKIN_INTERVAL_MINUTES = 10;
|
|
60
60
|
const MAX_CHECKIN_INTERVAL_MINUTES = 240;
|
|
61
|
+
const STATE_VERSION = 1;
|
|
61
62
|
const checkinLocks = new Set<string>();
|
|
62
63
|
|
|
63
64
|
// ── SOTA models (ordered by preference) ───────────────────────────────────
|
|
@@ -70,6 +71,8 @@ const SOTA_CHAIN: Array<{ provider: string; model: string; label: string }> = [
|
|
|
70
71
|
|
|
71
72
|
// ── Internal state ────────────────────────────────────────────────────────
|
|
72
73
|
interface SessionState {
|
|
74
|
+
/** State schema version for migration support */
|
|
75
|
+
_v?: number;
|
|
73
76
|
turns: number;
|
|
74
77
|
lastTask: string;
|
|
75
78
|
notes: string[];
|
|
@@ -159,9 +162,18 @@ function saveConfig(c: AdvisorConfig) {
|
|
|
159
162
|
|
|
160
163
|
function loadState(): SessionState {
|
|
161
164
|
const raw = readJson<Partial<SessionState>>(STATE_PATH, {});
|
|
165
|
+
// Handle state versioning: migrate old versions to current
|
|
166
|
+
const version = raw._v ?? 0;
|
|
167
|
+
if (version < STATE_VERSION) {
|
|
168
|
+
// Migrate: ensure reviewControl has all fields
|
|
169
|
+
if (raw.reviewControl && !raw.reviewControl.lastAppliedAt) {
|
|
170
|
+
(raw.reviewControl as any).lastAppliedAt = new Date().toISOString();
|
|
171
|
+
}
|
|
172
|
+
}
|
|
162
173
|
const control = raw.reviewControl;
|
|
163
174
|
const pauseUntil = Number(raw.advisorPauseUntilTurn);
|
|
164
175
|
return {
|
|
176
|
+
_v: STATE_VERSION,
|
|
165
177
|
turns: raw.turns ?? 0,
|
|
166
178
|
lastTask: raw.lastTask ?? "",
|
|
167
179
|
notes: (raw.notes ?? []).map(noteText).filter(Boolean).slice(-MAX_NOTES),
|
|
@@ -197,7 +209,7 @@ function loadState(): SessionState {
|
|
|
197
209
|
}
|
|
198
210
|
|
|
199
211
|
function saveState(s: SessionState) {
|
|
200
|
-
|
|
212
|
+
atomicWriteText(STATE_PATH, JSON.stringify(s, null, 2) + "\n");
|
|
201
213
|
}
|
|
202
214
|
|
|
203
215
|
function loadCache(): Record<string, string> {
|
|
@@ -210,21 +222,50 @@ function saveCache(c: Record<string, string>) {
|
|
|
210
222
|
entries.sort((a, b) => a[0].localeCompare(b[0]));
|
|
211
223
|
for (const [k] of entries.slice(0, entries.length - MAX_CACHE)) delete c[k];
|
|
212
224
|
}
|
|
213
|
-
|
|
225
|
+
atomicWriteText(CACHE_PATH, JSON.stringify(c, null, 2) + "\n");
|
|
214
226
|
}
|
|
215
227
|
|
|
216
228
|
// ── Prompts ───────────────────────────────────────────────────────────────
|
|
217
229
|
|
|
218
|
-
const ADVISOR_SYSTEM = `You are a senior engineering advisor. Use the session brief only. Return terse, specific advice with concrete recommendations. 200 words max
|
|
230
|
+
const ADVISOR_SYSTEM = `You are a senior engineering advisor. Use the session brief only. Return terse, specific advice with concrete recommendations. 200 words max.
|
|
231
|
+
|
|
232
|
+
## Guidance
|
|
233
|
+
- Focus on actionable insights, not summaries of what was done.
|
|
234
|
+
- If no issues found, say so briefly — do not invent problems.
|
|
235
|
+
- Flag security concerns, architecture risks, and test gaps.
|
|
236
|
+
- Reference specific files or lines when possible.`;
|
|
237
|
+
|
|
238
|
+
const REVIEW_SYSTEM = `You are a senior reviewer. An AI agent just completed work. Assess it and return ONLY valid JSON.
|
|
239
|
+
|
|
240
|
+
## Verdicts
|
|
241
|
+
- **on_track**: Work is complete. Changes are correct, tests pass (if applicable), no outstanding issues. This is the default for clearly finished work.
|
|
242
|
+
- **course_correct**: Work is mostly done but needs specific changes. Minor fixes, adjustments, or refinements required. Be specific about what needs to change.
|
|
243
|
+
- **not_done**: Work is incomplete, failing, or has critical errors. The agent has not finished the task. Include what is missing or broken.
|
|
244
|
+
|
|
245
|
+
## Confidence Calibration
|
|
246
|
+
- 0.80+ = clear signal (e.g., explicit "done" with file changes, or explicit errors)
|
|
247
|
+
- 0.60-0.79 = moderate signal (e.g., partial completion, some issues noted)
|
|
248
|
+
- <0.60 = weak signal — defer rather than force a verdict
|
|
249
|
+
|
|
250
|
+
## Guidelines
|
|
251
|
+
- Focus on MATERIAL changes (logic, behavior, correctness). Ignore cosmetic changes (formatting, comments, whitespace).
|
|
252
|
+
- If the agent explicitly states "done"/"fixed"/"implemented" AND file changes are small/simple → on_track.
|
|
253
|
+
- If the agent states "done" BUT there are errors or incomplete logic → course_correct or not_done.
|
|
254
|
+
- If the agent states "incomplete"/"wip"/"todo" → not_done.
|
|
255
|
+
- Actions should be concrete next steps (2 max), not vague suggestions.
|
|
256
|
+
- Checklist items are optional — include only when there are specific verification steps.
|
|
257
|
+
- notify is always false for this system.
|
|
258
|
+
|
|
259
|
+
## Examples
|
|
260
|
+
|
|
261
|
+
Example 1 (on_track):
|
|
262
|
+
{ "verdict": "on_track", "summary": "Added new endpoint and tests pass", "actions": [], "checklist": ["Verify endpoint returns 200"], "notify": false }
|
|
263
|
+
|
|
264
|
+
Example 2 (course_correct):
|
|
265
|
+
{ "verdict": "course_correct", "summary": "Refactored module but error handling was removed", "actions": ["Restore error handling in handleRequest"], "checklist": [], "notify": false }
|
|
219
266
|
|
|
220
|
-
|
|
221
|
-
{
|
|
222
|
-
"verdict": "on_track"|"course_correct"|"not_done",
|
|
223
|
-
"summary": "1-2 sentence assessment",
|
|
224
|
-
"actions": ["action1"],
|
|
225
|
-
"checklist": ["item"],
|
|
226
|
-
"notify": false
|
|
227
|
-
}`;
|
|
267
|
+
Example 3 (not_done):
|
|
268
|
+
{ "verdict": "not_done", "summary": "Migration script has syntax errors and missing table reference", "actions": ["Fix syntax errors in migration.sql", "Add missing users table reference"], "checklist": ["Verify migration runs cleanly"], "notify": false }`;
|
|
228
269
|
|
|
229
270
|
// ── Helpers ───────────────────────────────────────────────────────────────
|
|
230
271
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { appendFileSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
1
|
+
import { appendFileSync, mkdirSync, readFileSync, renameSync, writeFileSync } from "node:fs";
|
|
2
2
|
import { dirname, join } from "node:path";
|
|
3
3
|
import { homedir } from "node:os";
|
|
4
4
|
|
|
@@ -46,3 +46,18 @@ export function appendText(filePath: string, text: string): void {
|
|
|
46
46
|
ensureParent(filePath);
|
|
47
47
|
appendFileSync(filePath, text, "utf8");
|
|
48
48
|
}
|
|
49
|
+
|
|
50
|
+
/** Write text atomically: write to temp file, then rename. Falls back to direct write on failure. */
|
|
51
|
+
export function atomicWriteText(filePath: string, text: string): void {
|
|
52
|
+
const tempPath = filePath + ".tmp";
|
|
53
|
+
try {
|
|
54
|
+
writeText(tempPath, text);
|
|
55
|
+
try { renameSync(tempPath, filePath); } catch {
|
|
56
|
+
// If rename fails (e.g., cross-device), fall back to overwrite
|
|
57
|
+
writeText(filePath, text);
|
|
58
|
+
}
|
|
59
|
+
} catch {
|
|
60
|
+
// If temp write fails, try direct write
|
|
61
|
+
writeText(filePath, text);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
@@ -6,6 +6,13 @@ import { join } from "node:path";
|
|
|
6
6
|
import { completeSimple } from "@earendil-works/pi-ai";
|
|
7
7
|
import { registerAdvisor } from "./extension.js";
|
|
8
8
|
|
|
9
|
+
const testHome = vi.hoisted(() => `/tmp/pi-rogue-advisor-loop-convergence-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2)}`);
|
|
10
|
+
|
|
11
|
+
vi.mock("node:os", async () => {
|
|
12
|
+
const actual = await vi.importActual<typeof import("node:os")>("node:os");
|
|
13
|
+
return { ...actual, homedir: () => testHome };
|
|
14
|
+
});
|
|
15
|
+
|
|
9
16
|
vi.mock("@earendil-works/pi-ai", async () => {
|
|
10
17
|
const actual = await vi.importActual<typeof import("@earendil-works/pi-ai")>("@earendil-works/pi-ai");
|
|
11
18
|
return {
|
|
@@ -3,6 +3,7 @@ import { copyFileSync, existsSync, mkdirSync, readFileSync, statSync } from "nod
|
|
|
3
3
|
import { dirname, resolve } from "node:path";
|
|
4
4
|
import { fileURLToPath } from "node:url";
|
|
5
5
|
import { appendText, featureFile, truncate } from "./internal.js";
|
|
6
|
+
import { extractBinaryGateFeatureCounts } from "./binary-gate-features.js";
|
|
6
7
|
|
|
7
8
|
export type AdvisorPhase = "preflight" | "review" | "closeout";
|
|
8
9
|
export type PreflightLabel = "continue" | "escalate_to_advisor" | "need_more_context" | "low_confidence";
|
|
@@ -85,43 +86,8 @@ function loadBinaryGate(): BinaryGateModel | null {
|
|
|
85
86
|
} catch { _binaryGateCache = null; return null; }
|
|
86
87
|
}
|
|
87
88
|
|
|
88
|
-
function binaryGateTokens(text: string): string[] {
|
|
89
|
-
const norm = String(text ?? "").toLowerCase()
|
|
90
|
-
.replace(/https?:\/\/\S+/g, " url ")
|
|
91
|
-
.replace(/[^a-z0-9\s']/g, " ")
|
|
92
|
-
.replace(/\s+/g, " ").trim();
|
|
93
|
-
return norm ? norm.split(" ").filter(Boolean) : [];
|
|
94
|
-
}
|
|
95
|
-
|
|
96
89
|
function binaryGateFeatures(text: string, model: BinaryGateModel) {
|
|
97
|
-
const
|
|
98
|
-
const lower = String(text ?? "").toLowerCase()
|
|
99
|
-
.replace(/https?:\/\/\S+/g, " url ")
|
|
100
|
-
.replace(/[^a-z0-9\s']/g, " ")
|
|
101
|
-
.replace(/\s+/g, " ").trim();
|
|
102
|
-
const counts = new Map<string, number>();
|
|
103
|
-
const inc = (k: string, b = 1) => counts.set(k, (counts.get(k) || 0) + b);
|
|
104
|
-
for (const n of [1, 2]) {
|
|
105
|
-
if (toks.length >= n) for (let i = 0; i <= toks.length - n; i++)
|
|
106
|
-
inc(`w${n}:${toks.slice(i, i + n).join("_")}`);
|
|
107
|
-
}
|
|
108
|
-
const norm = ` ${lower} `;
|
|
109
|
-
for (const n of [3, 4]) {
|
|
110
|
-
if (norm.length >= n) for (let i = 0; i <= norm.length - n; i++) {
|
|
111
|
-
const g = norm.slice(i, i + n);
|
|
112
|
-
if (!/^\s+$/.test(g)) inc(`c${n}:${g}`);
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
if (toks.length > 0) inc(`pref1:${toks[0]}`);
|
|
116
|
-
if (toks.length > 1) inc(`pref2:${toks.slice(0, 2).join("_")}`);
|
|
117
|
-
if (toks.length > 2) inc(`pref3:${toks.slice(0, 3).join("_")}`);
|
|
118
|
-
if (text.includes("?")) inc("cue:question_mark");
|
|
119
|
-
const cues = ["check","why","what","how","should","status","stats","log","logs","review","diff","pr","build","run","test","deploy","fix","debug","install","configure","plan","continue","resume","compact","research","update","patch","cleanup","remove"];
|
|
120
|
-
const multi = ["what is","what's","safe to use","pull request","model family","how does","next step","path forward","should we","what should"];
|
|
121
|
-
const ts = new Set(toks);
|
|
122
|
-
for (const c of cues) if (ts.has(c)) inc(`cue:${c}`);
|
|
123
|
-
for (const c of multi) if (lower.includes(c)) inc(`cue:${c.replace(/\s+/g,"_")}`);
|
|
124
|
-
|
|
90
|
+
const counts = extractBinaryGateFeatureCounts(text);
|
|
125
91
|
const index = new Map(model.features.map((f, i) => [f, i]));
|
|
126
92
|
const pairs: Array<[number, number]> = [];
|
|
127
93
|
let nrm = 0;
|
|
@@ -129,7 +95,8 @@ function binaryGateFeatures(text: string, model: BinaryGateModel) {
|
|
|
129
95
|
const idx = index.get(feature);
|
|
130
96
|
if (idx === undefined) continue;
|
|
131
97
|
const value = (1 + Math.log(tf)) * model.idf[idx];
|
|
132
|
-
pairs.push([idx, value]);
|
|
98
|
+
pairs.push([idx, value]);
|
|
99
|
+
nrm += value * value;
|
|
133
100
|
}
|
|
134
101
|
const scale = nrm > 0 ? 1 / Math.sqrt(nrm) : 1;
|
|
135
102
|
pairs.sort((a, b) => a[0] - b[0]);
|