@fiale-plus/pi-rogue 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/node_modules/@fiale-plus/pi-rogue-advisor/README.md +1 -0
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/binary-gate-features.test.ts +8 -0
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/binary-gate-features.ts +7 -0
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/router.test.ts +26 -0
- package/node_modules/@fiale-plus/pi-rogue-advisor/src/router.ts +10 -1
- package/node_modules/@fiale-plus/pi-rogue-orchestration/README.md +3 -3
- package/node_modules/@fiale-plus/pi-rogue-orchestration/package.json +3 -0
- package/node_modules/@fiale-plus/pi-rogue-orchestration/skills/orchestration/SKILL.md +3 -2
- package/node_modules/@fiale-plus/pi-rogue-orchestration/src/goal.test.ts +65 -2
- package/node_modules/@fiale-plus/pi-rogue-orchestration/src/goal.ts +84 -4
- package/node_modules/@fiale-plus/pi-rogue-orchestration/src/loop.ts +3 -0
- package/node_modules/@fiale-plus/pi-rogue-orchestration/src/novelty-guard.test.ts +43 -0
- package/node_modules/@fiale-plus/pi-rogue-orchestration/src/novelty-guard.ts +96 -11
- package/node_modules/@fiale-plus/pi-rogue-router/README.md +45 -6
- package/node_modules/@fiale-plus/pi-rogue-router/src/binary-gate.test.ts +88 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/binary-gate.ts +232 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/cli.ts +123 -9
- package/node_modules/@fiale-plus/pi-rogue-router/src/completions.ts +39 -16
- package/node_modules/@fiale-plus/pi-rogue-router/src/config-extension.test.ts +111 -4
- package/node_modules/@fiale-plus/pi-rogue-router/src/config.ts +17 -2
- package/node_modules/@fiale-plus/pi-rogue-router/src/extension.ts +67 -7
- package/node_modules/@fiale-plus/pi-rogue-router/src/index.ts +4 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/observe.ts +76 -5
- package/node_modules/@fiale-plus/pi-rogue-router/src/outcomes.ts +130 -6
- package/node_modules/@fiale-plus/pi-rogue-router/src/reports.test.ts +92 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/reports.ts +116 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/sharpening.test.ts +223 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/sharpening.ts +344 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/teacher-runner.test.ts +126 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/teacher-runner.ts +238 -0
- package/node_modules/@fiale-plus/pi-rogue-router/src/v1-telemetry.test.ts +54 -1
- package/package.json +1 -1
|
@@ -8,6 +8,7 @@ Strategic advisor for Pi sessions with low-overhead preflight/post-review routin
|
|
|
8
8
|
|
|
9
9
|
- SOTA-first model fallback: `gpt-5.5`/`claude-opus-4-6`/`claude-sonnet-4-6` where available.
|
|
10
10
|
- Keeps command-level behavior simple and explicit.
|
|
11
|
+
- Router/binary-gate policy escalates architecture/refactor/tradeoff/security/high-uncertainty and material stuck/no-progress work, while tiny edits, direct answers, docs/formatting cleanup, and other low-risk reactive tasks continue without advisor noise.
|
|
11
12
|
|
|
12
13
|
## Install
|
|
13
14
|
|
|
@@ -16,4 +16,12 @@ describe("binary gate feature extraction", () => {
|
|
|
16
16
|
expect(features.get("safety:production")).toBe(1);
|
|
17
17
|
expect(features.get("safety:deploy")).toBe(1);
|
|
18
18
|
});
|
|
19
|
+
|
|
20
|
+
it("emits stuck/no-progress cues for the binary gate", () => {
|
|
21
|
+
const features = extractBinaryGateFeatureCounts("goal loop stuck with repeated planning and no concrete progress");
|
|
22
|
+
|
|
23
|
+
expect(features.get("stuck:stuck")).toBe(1);
|
|
24
|
+
expect(features.get("stuck:repeated_planning")).toBe(1);
|
|
25
|
+
expect(features.get("stuck:no_concrete_progress")).toBe(1);
|
|
26
|
+
});
|
|
19
27
|
});
|
|
@@ -160,6 +160,13 @@ export function extractBinaryGateFeatureCounts(text: string): Map<string, number
|
|
|
160
160
|
}
|
|
161
161
|
}
|
|
162
162
|
|
|
163
|
+
const stuckWords = ["stuck", "looping", "spinning", "no progress", "no concrete progress", "same failure", "repeated failure", "repeated planning", "self talk", "forever thinking", "alternative action", "blocked"];
|
|
164
|
+
for (const stuckWord of stuckWords) {
|
|
165
|
+
if (lower.includes(stuckWord)) {
|
|
166
|
+
inc(counts, `stuck:${replaceSpaces(stuckWord)}`);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
163
170
|
const contextWords = ["need more context", "missing context", "clarify", "not enough info", "unspecified", "unknown", "ambiguous"];
|
|
164
171
|
for (const contextWord of contextWords) {
|
|
165
172
|
if (lower.includes(contextWord)) {
|
|
@@ -36,6 +36,32 @@ describe("advisor router heuristics", () => {
|
|
|
36
36
|
expect(routeNote(route)).toMatch(/^\[advisor:rules: review, reason: [a-z0-9 ,.'-]+\]$/);
|
|
37
37
|
});
|
|
38
38
|
|
|
39
|
+
it("escalates material stuck/no-progress prompts", () => {
|
|
40
|
+
const input: AdvisorRouteInput = { phase: "preflight", text: "the goal loop is stuck with repeated planning and no concrete progress after several turns" };
|
|
41
|
+
const route = heuristicRoute(input);
|
|
42
|
+
|
|
43
|
+
expect(route.label).toBe("escalate_to_advisor");
|
|
44
|
+
expect(route.escalate).toBe(true);
|
|
45
|
+
expect(route.reason).toContain("no-progress");
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
it("escalates stuck evidence even when low-risk edit cues are present", () => {
|
|
49
|
+
const input: AdvisorRouteInput = { phase: "preflight", text: "small README edit is stuck with no concrete progress after several turns" };
|
|
50
|
+
const route = heuristicRoute(input);
|
|
51
|
+
|
|
52
|
+
expect(route.label).toBe("escalate_to_advisor");
|
|
53
|
+
expect(route.reason).toContain("no-progress");
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
it("keeps routine docs cleanup out of advisor escalation", () => {
|
|
57
|
+
const input: AdvisorRouteInput = { phase: "preflight", text: "routine docs and formatting cleanup in README" };
|
|
58
|
+
const route = heuristicRoute(input);
|
|
59
|
+
|
|
60
|
+
expect(route.label).toBe("continue");
|
|
61
|
+
expect(route.escalate).toBe(false);
|
|
62
|
+
expect(route.review).toBe("off");
|
|
63
|
+
});
|
|
64
|
+
|
|
39
65
|
it("flags safety-sensitive prompts", () => {
|
|
40
66
|
const input: AdvisorRouteInput = { phase: "preflight", text: "run rm -rf on prod" };
|
|
41
67
|
const route = heuristicRoute(input);
|
|
@@ -125,6 +125,7 @@ const QUICK_EDIT_RE = /\b(quick edit|small edit|tiny edit|rename|format(?:ting)?
|
|
|
125
125
|
const ROUTINE_CLEANUP_RE = /\b(routine docs?|docs? and formatting|formatting cleanup|generated changes|large diff|docs?\/formatting)\b/i;
|
|
126
126
|
const COMPLEX_RE = /\b(architecture|architectural|refactor|design|trade[- ]?off|concurrency|security|auth|migration|performance|scale|scalability|framework|system design|schema|data model|protocol|advisor routing|advisor flow|router logic|call vs skip|skip vs call|compare|recommend|benchmark|evaluate|experiment|train|strategy|choose|make sense|worth(?: it)?|kpi|kpis|how it works|where it comes from|what would you choose|what do you think|next step|pick between|buy|usage|sustained speed|available models|running model kpis)\b/i;
|
|
127
127
|
const DEBUG_RE = /\b(debug|bug|error|stack trace|traceback|fail(?:ed|ure)?|broken|investigate|why is|cannot|can't|crash|regression)\b/i;
|
|
128
|
+
const STUCK_RE = /\b(stuck|looping|spinning|no[- ]?progress|no concrete progress|same failure|repeated failure|repeated planning|self[- ]?talk|forever thinking|strategy change|alternative action|blocked)\b/i;
|
|
128
129
|
const CONTEXT_RE = /\b(need more context|missing context|clarify|not enough info|unspecified|unknown|ambiguous)\b/i;
|
|
129
130
|
const SAFETY_RE = /\b(rm\s+-rf|sudo\b|shutdown\b|reboot\b|mkfs(?:\.[\w-]+)?\b|chmod\s+-R\b|chown\b|git\s+push\b[\s\S]*--force(?:-with-lease)?|curl\b[\s\S]*\|\s*(?:sh|bash)\b|wget\b[\s\S]*\|\s*(?:sh|bash)\b|drop\s+table\b|delete\s+database\b|credential\b|password\b|secret\b)\b/i;
|
|
130
131
|
const COMPACTION_RE = /\b(compact(?:ed|ion)?|missing history|history might flip|prior constraint|resume(?:d)? after compaction)\b/i;
|
|
@@ -293,6 +294,11 @@ function hasComplexSignal(text: string): boolean {
|
|
|
293
294
|
return COMPLEX_RE.test(text) || DEBUG_RE.test(text);
|
|
294
295
|
}
|
|
295
296
|
|
|
297
|
+
function hasMaterialStuckSignal(text: string): boolean {
|
|
298
|
+
if (!STUCK_RE.test(text)) return false;
|
|
299
|
+
return /\b(goal|loop|autoresearch|tool|test|command|failure|failed|turns?|again|same|repeated|concrete|progress|blocked|alternative|recovery)\b/i.test(text);
|
|
300
|
+
}
|
|
301
|
+
|
|
296
302
|
function hasCompactionLowRiskSignal(text: string): boolean {
|
|
297
303
|
return COMPACTION_RE.test(text) && /\blow[- ]?risk\b/i.test(text);
|
|
298
304
|
}
|
|
@@ -342,6 +348,9 @@ function preflightSignals(input: AdvisorRouteInput): { label: PreflightLabel; co
|
|
|
342
348
|
if (isSafetySensitive(text)) {
|
|
343
349
|
return { label: "escalate_to_advisor", confidence: 0.98, reason: "Safety-sensitive keywords detected.", safety: true };
|
|
344
350
|
}
|
|
351
|
+
if (hasMaterialStuckSignal(text)) {
|
|
352
|
+
return { label: "escalate_to_advisor", confidence: 0.86, reason: "Material stuck/no-progress signal detected.", safety: false };
|
|
353
|
+
}
|
|
345
354
|
if (hasRoutineCleanupSignal(text) || (hasQuickEditSignal(text) && !hasComplexSignal(text))) {
|
|
346
355
|
return { label: "continue", confidence: 0.9, reason: "Small-edit or routine-cleanup signal detected.", safety: false };
|
|
347
356
|
}
|
|
@@ -424,7 +433,7 @@ export function buildRouterPrompt(input: AdvisorRouteInput): string {
|
|
|
424
433
|
input.failed !== undefined ? `Failed: ${String(input.failed)}` : "",
|
|
425
434
|
phase === "preflight"
|
|
426
435
|
? [
|
|
427
|
-
"Guidance: continue for tiny edits and
|
|
436
|
+
"Guidance: continue for tiny edits, direct answers, docs/formatting cleanup, and other low-risk reactive tasks; escalate_to_advisor for architecture, refactors, design, tradeoffs, security, irreversible actions, high uncertainty, or material stuck/no-progress evidence; need_more_context when underspecified; low_confidence when mixed signals. If advisor guidance conflicts with local evidence, the working model must reconcile explicitly rather than blindly follow it.",
|
|
428
437
|
].join(" ")
|
|
429
438
|
: [
|
|
430
439
|
"Guidance: on_track for clearly complete work; course_correct for partial work that needs changes; not_done when incomplete or failing; abstain when there is not enough signal.",
|
|
@@ -47,10 +47,10 @@ npm install --workspace packages/orchestration
|
|
|
47
47
|
## Behavior notes
|
|
48
48
|
|
|
49
49
|
- `loop` supports minimum interval `1m`.
|
|
50
|
-
- `
|
|
50
|
+
- Active goals can be completed with the model-callable `goal_complete` tool, which requires a summary and verification evidence; `GOAL_DONE` / `GOAL_CONTINUE` sentinel loop checks are preserved for compatibility.
|
|
51
51
|
- `autoresearch` and `autoresearch-lab` are thin facades over `/goal + /loop`.
|
|
52
52
|
- A goal or loop activation enables scheduled advisor check-ins; stopping or clearing the active goal/loop disables them again.
|
|
53
53
|
- Check-ins are part of orchestration lifecycle, not a standalone advisor command. They use the advisor interval, higher/advanced advisor models first, and regular model fallback by default.
|
|
54
|
-
- A
|
|
55
|
-
- There are no hidden flow budgets. Long loops run until `/loop off`, `/goal clear`, or a `GOAL_DONE` response clears the active goal and loop.
|
|
54
|
+
- A bounded no-progress guard detects repeated assistant output or repeated planning-only turns during active orchestration, then nudges one concrete alternative action and eventually stops retry churn instead of stacking recovery prompts.
|
|
55
|
+
- There are no hidden flow budgets. Long loops run until `/loop off`, `/goal clear`, `goal_complete`, or a `GOAL_DONE` response clears the active goal and loop.
|
|
56
56
|
- Stale research state is cleared when `goal` or `loop` are cleared.
|
|
@@ -28,12 +28,13 @@ Use this skill to run measurable, bounded workflow loops inside a Pi session.
|
|
|
28
28
|
## Behavior rules
|
|
29
29
|
|
|
30
30
|
- `loop` is the primitive; `goal` is the execution intent.
|
|
31
|
-
- Goal completion
|
|
31
|
+
- Goal completion should use the `goal_complete` tool when available, with a summary and verification evidence; `GOAL_DONE` / `GOAL_CONTINUE` remain valid loop-check sentinels for compatibility.
|
|
32
32
|
- `autoresearch` / `autoresearch-lab` are facades over goal+loop.
|
|
33
33
|
- Goal or loop activation enables scheduled advisor check-ins; stopping or clearing either disables them.
|
|
34
34
|
- Check-ins belong to orchestration lifecycle, not the advisor command surface, and use higher/advanced advisor models first, with regular model fallback enabled by default.
|
|
35
35
|
- `autoresearch` enforces multi-cycle + evidence-aware completion.
|
|
36
|
-
- Clearing goal/loop clears stale autoresearch state.
|
|
36
|
+
- Clearing goal/loop or completing a goal clears stale autoresearch state.
|
|
37
|
+
- Bounded no-progress recovery may steer one concrete alternative action after repeated self-talk/repetition, then stops retry churn instead of stacking prompts.
|
|
37
38
|
|
|
38
39
|
## Safety and agentic flow
|
|
39
40
|
|
|
@@ -2,8 +2,8 @@ import { randomUUID } from "node:crypto";
|
|
|
2
2
|
import { beforeEach, describe, expect, it, vi } from "vitest";
|
|
3
3
|
import { resetAdvisorSessionContext, setAdvisorCheckinsEnabled } from "./advisor-checkins.js";
|
|
4
4
|
import { endGoalCheck } from "./goal-resolution.js";
|
|
5
|
-
import { activeGoal, clearGoal, registerGoal, setGoal, startGoalProcessing } from "./goal.js";
|
|
6
|
-
import { featureFile, readText } from "./internal.js";
|
|
5
|
+
import { activeGoal, clearGoal, completeActiveGoal, registerGoal, setGoal, startGoalProcessing } from "./goal.js";
|
|
6
|
+
import { featureFile, readText, sessionFile, writeText } from "./internal.js";
|
|
7
7
|
|
|
8
8
|
vi.mock("./advisor-checkins.js", () => ({
|
|
9
9
|
resetAdvisorSessionContext: vi.fn(),
|
|
@@ -111,6 +111,20 @@ describe("goal processing", () => {
|
|
|
111
111
|
endGoalCheck(ctx);
|
|
112
112
|
});
|
|
113
113
|
|
|
114
|
+
it("clears stale no-progress recovery state when goal lifecycle changes", () => {
|
|
115
|
+
const ctx = fakeCtx();
|
|
116
|
+
const guardFile = sessionFile("orchestration", ctx, "repetition-guard.json");
|
|
117
|
+
writeText(guardFile, `${JSON.stringify({
|
|
118
|
+
recentAssistantTurns: [],
|
|
119
|
+
noProgress: { at: new Date().toISOString(), count: 3, text: "I will plan next.", reason: "test" },
|
|
120
|
+
})}\n`);
|
|
121
|
+
|
|
122
|
+
setGoal(ctx, "fresh goal after stale recovery");
|
|
123
|
+
|
|
124
|
+
expect(JSON.parse(readText(guardFile)).noProgress).toBeUndefined();
|
|
125
|
+
clearGoal(ctx);
|
|
126
|
+
});
|
|
127
|
+
|
|
114
128
|
it("resets advisor context when a goal is cleared", () => {
|
|
115
129
|
const ctx = fakeCtx();
|
|
116
130
|
|
|
@@ -158,6 +172,55 @@ describe("goal processing", () => {
|
|
|
158
172
|
expect(setAdvisorCheckinsEnabledMock).toHaveBeenCalledWith(false);
|
|
159
173
|
});
|
|
160
174
|
|
|
175
|
+
it("completes an active goal through the explicit completion signal", () => {
|
|
176
|
+
const ctx = fakeCtx();
|
|
177
|
+
const goal = `complete with tool ${randomUUID()}`;
|
|
178
|
+
|
|
179
|
+
setGoal(ctx, goal);
|
|
180
|
+
const result = completeActiveGoal(ctx, {
|
|
181
|
+
summary: "Implemented the requested behavior.",
|
|
182
|
+
verification: "Ran focused unit tests.",
|
|
183
|
+
source: "tool",
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
expect(result.completed).toBe(true);
|
|
187
|
+
expect(activeGoal(ctx)).toBe("");
|
|
188
|
+
expect(readText(featureFile("orchestration", "goal-completions.jsonl"))).toContain(goal);
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
it("rejects explicit goal completion without verification", () => {
|
|
192
|
+
const ctx = fakeCtx();
|
|
193
|
+
setGoal(ctx, "needs verification");
|
|
194
|
+
|
|
195
|
+
const result = completeActiveGoal(ctx, { summary: "Done", verification: "" });
|
|
196
|
+
|
|
197
|
+
expect(result.completed).toBe(false);
|
|
198
|
+
expect(activeGoal(ctx)).toBe("needs verification");
|
|
199
|
+
clearGoal(ctx);
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
it("registers a goal completion tool", async () => {
|
|
203
|
+
let tool: any;
|
|
204
|
+
const pi = {
|
|
205
|
+
on: () => undefined,
|
|
206
|
+
registerCommand: () => undefined,
|
|
207
|
+
registerTool: (definition: any) => { tool = definition; },
|
|
208
|
+
sendUserMessage: () => undefined,
|
|
209
|
+
} as any;
|
|
210
|
+
const ctx = fakeCtx();
|
|
211
|
+
|
|
212
|
+
registerGoal(pi);
|
|
213
|
+
setGoal(ctx, "finish explicit tool path");
|
|
214
|
+
const response = await tool.execute("call", {
|
|
215
|
+
summary: "Finished explicit path.",
|
|
216
|
+
verification: "Verified with a fake focused check.",
|
|
217
|
+
}, undefined, undefined, ctx);
|
|
218
|
+
|
|
219
|
+
expect(tool.name).toBe("goal_complete");
|
|
220
|
+
expect(response.details.completed).toBe(true);
|
|
221
|
+
expect(activeGoal(ctx)).toBe("");
|
|
222
|
+
});
|
|
223
|
+
|
|
161
224
|
it("clears the active goal immediately when a pending check returns GOAL_DONE", async () => {
|
|
162
225
|
const handlers: Record<string, Array<(event: any, ctx: any) => Promise<void> | void>> = {};
|
|
163
226
|
const pi = {
|
|
@@ -1,20 +1,35 @@
|
|
|
1
1
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
2
|
+
import { Type } from "typebox";
|
|
2
3
|
import { appendText, contentText, featureFile, readText, sessionFile, truncate, writeText } from "./internal.js";
|
|
3
4
|
import { clearResearchStateForGoal, readResearchState, writeResearchState, type ResearchState } from "./autoresearch-state.js";
|
|
4
5
|
import { beginGoalCheck, buildGoalCheckPrompt, endGoalCheck, goalCheckResult, hasGoalCheckPending } from "./goal-resolution.js";
|
|
5
6
|
import { clearLoop, triggerLoopTick } from "./loop.js";
|
|
7
|
+
import { clearNoProgressRecovery } from "./novelty-guard.js";
|
|
6
8
|
import { resetAdvisorSessionContext, setAdvisorCheckinsEnabled } from "./advisor-checkins.js";
|
|
7
9
|
import { goalArgumentCompletions } from "./completions.js";
|
|
8
10
|
|
|
9
11
|
const FEATURE = "orchestration";
|
|
10
12
|
const CURRENT_FILE = "goal.md";
|
|
11
13
|
const HISTORY_FILE = featureFile(FEATURE, "goal-history.jsonl");
|
|
14
|
+
const COMPLETION_HISTORY_FILE = featureFile(FEATURE, "goal-completions.jsonl");
|
|
12
15
|
|
|
13
16
|
type GoalHistoryEntry = {
|
|
14
17
|
at: string;
|
|
15
18
|
goal: string;
|
|
16
19
|
};
|
|
17
20
|
|
|
21
|
+
export type GoalCompletionInput = {
|
|
22
|
+
summary: string;
|
|
23
|
+
verification: string;
|
|
24
|
+
source?: "tool" | "sentinel";
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
export type GoalCompletionResult = {
|
|
28
|
+
completed: boolean;
|
|
29
|
+
goal?: string;
|
|
30
|
+
reason?: string;
|
|
31
|
+
};
|
|
32
|
+
|
|
18
33
|
export type GoalSetResult = "updated" | "duplicate";
|
|
19
34
|
export type GoalProcessingStartResult = "loop" | "standalone" | "pending";
|
|
20
35
|
|
|
@@ -53,6 +68,7 @@ export function setGoal(ctx: any, goal: string, options: { restartDuplicate?: bo
|
|
|
53
68
|
clearResearchStateForGoal(ctx, previous);
|
|
54
69
|
}
|
|
55
70
|
clearLoop(ctx, { clearResearch: true, preserveCheckins: true });
|
|
71
|
+
clearNoProgressRecovery(ctx);
|
|
56
72
|
writeText(sessionFile(FEATURE, ctx, CURRENT_FILE), note ? `${note}\n` : "");
|
|
57
73
|
resetAdvisorSessionContext(ctx);
|
|
58
74
|
if (note) {
|
|
@@ -68,14 +84,47 @@ export function setGoal(ctx: any, goal: string, options: { restartDuplicate?: bo
|
|
|
68
84
|
|
|
69
85
|
export function clearGoal(ctx: any): void {
|
|
70
86
|
writeText(sessionFile(FEATURE, ctx, CURRENT_FILE), "");
|
|
87
|
+
clearNoProgressRecovery(ctx);
|
|
71
88
|
resetAdvisorSessionContext(ctx);
|
|
72
89
|
}
|
|
73
90
|
|
|
91
|
+
function completionLine(goal: string, input: GoalCompletionInput): string {
|
|
92
|
+
return `${JSON.stringify({
|
|
93
|
+
at: new Date().toISOString(),
|
|
94
|
+
goal,
|
|
95
|
+
summary: input.summary.trim(),
|
|
96
|
+
verification: input.verification.trim(),
|
|
97
|
+
source: input.source ?? "tool",
|
|
98
|
+
})}\n`;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
export function completeActiveGoal(ctx: any, input: GoalCompletionInput): GoalCompletionResult {
|
|
102
|
+
const goal = activeGoal(ctx);
|
|
103
|
+
if (!goal) return { completed: false, reason: "No active goal." };
|
|
104
|
+
|
|
105
|
+
const summary = input.summary.trim();
|
|
106
|
+
const verification = input.verification.trim();
|
|
107
|
+
if (!summary) return { completed: false, goal, reason: "Goal completion requires a summary." };
|
|
108
|
+
if (!verification) return { completed: false, goal, reason: "Goal completion requires verification evidence or an explicit not-verified statement." };
|
|
109
|
+
|
|
110
|
+
appendText(COMPLETION_HISTORY_FILE, completionLine(goal, { ...input, summary, verification }));
|
|
111
|
+
|
|
112
|
+
const research = researchForGoal(ctx, goal);
|
|
113
|
+
if (research) recordResearchResult(ctx, research, "done");
|
|
114
|
+
|
|
115
|
+
endGoalCheck(ctx);
|
|
116
|
+
clearGoal(ctx);
|
|
117
|
+
setGoalStatus(ctx, null);
|
|
118
|
+
clearLoop(ctx, { clearResearch: true });
|
|
119
|
+
return { completed: true, goal };
|
|
120
|
+
}
|
|
121
|
+
|
|
74
122
|
function goalBlock(goal: string): string {
|
|
75
123
|
return [
|
|
76
124
|
"## Pi-Rogue Goal",
|
|
77
125
|
`Current goal: ${goal}`,
|
|
78
|
-
"When
|
|
126
|
+
"When the goal is complete, prefer the `goal_complete` tool with a summary and verification evidence. If that tool is unavailable during a loop tick, answer exactly with `GOAL_DONE: ...`.",
|
|
127
|
+
"When the goal is not complete during a loop tick, answer exactly with `GOAL_CONTINUE: ...` and then take one concrete next action.",
|
|
79
128
|
].join("\n");
|
|
80
129
|
}
|
|
81
130
|
|
|
@@ -154,9 +203,13 @@ export function registerGoal(pi: ExtensionAPI): void {
|
|
|
154
203
|
if (research) recordResearchResult(ctx, research, result);
|
|
155
204
|
|
|
156
205
|
if (result === "done") {
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
206
|
+
const text = assistantText(event);
|
|
207
|
+
const summary = text.replace(/^GOAL_DONE:\s*/i, "").trim() || "Goal marked done by sentinel response.";
|
|
208
|
+
completeActiveGoal(ctx, {
|
|
209
|
+
summary: truncate(summary, 1200),
|
|
210
|
+
verification: "GOAL_DONE sentinel response; see assistant message for final state and evidence.",
|
|
211
|
+
source: "sentinel",
|
|
212
|
+
});
|
|
160
213
|
ctx.ui.notify(`🎯 Goal completed: ${truncate(goal, 160)}`, "info");
|
|
161
214
|
}
|
|
162
215
|
});
|
|
@@ -169,6 +222,33 @@ export function registerGoal(pi: ExtensionAPI): void {
|
|
|
169
222
|
return { systemPrompt: `${event.systemPrompt}\n\n${goalBlock(goal)}` };
|
|
170
223
|
});
|
|
171
224
|
|
|
225
|
+
const registerTool = (pi as any).registerTool;
|
|
226
|
+
if (typeof registerTool === "function") registerTool.call(pi, {
|
|
227
|
+
name: "goal_complete",
|
|
228
|
+
label: "Goal Complete",
|
|
229
|
+
description: "Mark the active Pi-Rogue goal complete. Requires a completion summary and verification evidence.",
|
|
230
|
+
parameters: Type.Object({
|
|
231
|
+
summary: Type.String({ description: "What was completed for the active goal" }),
|
|
232
|
+
verification: Type.String({ description: "How completion was verified, or an explicit not-verified statement with reason" }),
|
|
233
|
+
}),
|
|
234
|
+
async execute(_id: unknown, params: { summary?: unknown; verification?: unknown }, _signal: unknown, onUpdate: ((update: unknown) => void) | undefined, ctx: any) {
|
|
235
|
+
const result = completeActiveGoal(ctx, {
|
|
236
|
+
summary: String(params.summary ?? ""),
|
|
237
|
+
verification: String(params.verification ?? ""),
|
|
238
|
+
source: "tool",
|
|
239
|
+
});
|
|
240
|
+
if (!result.completed) {
|
|
241
|
+
const message = result.reason || "Goal completion failed.";
|
|
242
|
+
onUpdate?.({ content: [{ type: "text", text: message }], details: { completed: false } });
|
|
243
|
+
return { content: [{ type: "text", text: message }], details: { completed: false } };
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
const message = `Goal completed: ${truncate(result.goal || "", 160)}`;
|
|
247
|
+
ctx.ui.notify(`🎯 ${message}`, "info");
|
|
248
|
+
return { content: [{ type: "text", text: message }], details: { completed: true, goal: result.goal } };
|
|
249
|
+
},
|
|
250
|
+
});
|
|
251
|
+
|
|
172
252
|
pi.registerCommand("goal", {
|
|
173
253
|
description: "Set, show, clear, or list the current session goal",
|
|
174
254
|
getArgumentCompletions: (prefix: string) => goalArgumentCompletions(prefix),
|
|
@@ -3,6 +3,7 @@ import { appendText, featureFile, readText, sessionFile, sessionKey, truncate }
|
|
|
3
3
|
import { clearResearchState, hasActiveResearch } from "./autoresearch-state.js";
|
|
4
4
|
import { setAdvisorCheckinsEnabled } from "./advisor-checkins.js";
|
|
5
5
|
import { buildGoalCheckPrompt, beginGoalCheck, hasGoalCheckPending } from "./goal-resolution.js";
|
|
6
|
+
import { clearNoProgressRecovery } from "./novelty-guard.js";
|
|
6
7
|
import { readSessionJson, writeSessionJson } from "./state.js";
|
|
7
8
|
import { loopArgumentCompletions } from "./completions.js";
|
|
8
9
|
|
|
@@ -60,6 +61,7 @@ export function clearLoop(ctx: any, options: { clearResearch?: boolean; preserve
|
|
|
60
61
|
const current = readLoopState(ctx);
|
|
61
62
|
archiveLoopState(ctx, current);
|
|
62
63
|
const next = clearLoopState(ctx);
|
|
64
|
+
clearNoProgressRecovery(ctx);
|
|
63
65
|
stopLoopTimer(sessionKey(ctx));
|
|
64
66
|
setLoopStatus(ctx, next);
|
|
65
67
|
if (!options.preserveCheckins) {
|
|
@@ -202,6 +204,7 @@ export function startLoop(pi: ExtensionAPI, ctx: any, interval: string, instruct
|
|
|
202
204
|
return null;
|
|
203
205
|
}
|
|
204
206
|
|
|
207
|
+
clearNoProgressRecovery(ctx);
|
|
205
208
|
const next = writeLoopState(ctx, {
|
|
206
209
|
enabled: true,
|
|
207
210
|
interval,
|
|
@@ -1,11 +1,15 @@
|
|
|
1
|
+
import { randomUUID } from "node:crypto";
|
|
1
2
|
import { describe, expect, it } from "vitest";
|
|
2
3
|
import {
|
|
3
4
|
detectAssistantRepetition,
|
|
5
|
+
looksLikeNoProgressTurn,
|
|
4
6
|
normalizeTurn,
|
|
5
7
|
recordAssistantTurn,
|
|
8
|
+
registerNoveltyGuard,
|
|
6
9
|
turnSimilarity,
|
|
7
10
|
type RepetitionGuardState,
|
|
8
11
|
} from "./novelty-guard.js";
|
|
12
|
+
import { readText, sessionFile, writeText } from "./internal.js";
|
|
9
13
|
|
|
10
14
|
describe("repetition guard", () => {
|
|
11
15
|
it("normalizes noisy assistant text", () => {
|
|
@@ -21,6 +25,45 @@ describe("repetition guard", () => {
|
|
|
21
25
|
expect(similarity).toBeGreaterThan(0.8);
|
|
22
26
|
});
|
|
23
27
|
|
|
28
|
+
it("identifies planning-only no-progress turns", () => {
|
|
29
|
+
expect(looksLikeNoProgressTurn("I will think through the approach and plan the next steps.")).toBe(true);
|
|
30
|
+
expect(looksLikeNoProgressTurn("I ran npm test and found one failing assertion.")).toBe(false);
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
it("tracks bounded no-progress only while orchestration is active", () => {
|
|
34
|
+
const base: RepetitionGuardState = { recentAssistantTurns: [] };
|
|
35
|
+
const first = recordAssistantTurn(base, "I will think through the approach and plan the next step.", { activeOrchestration: true });
|
|
36
|
+
const second = recordAssistantTurn(first, "I will think through the approach and plan the next step.", { activeOrchestration: true });
|
|
37
|
+
const third = recordAssistantTurn(second, "I will think through the approach and plan the next step.", { activeOrchestration: true });
|
|
38
|
+
|
|
39
|
+
expect(third.noProgress?.count).toBe(3);
|
|
40
|
+
|
|
41
|
+
const inactive = recordAssistantTurn(base, "I will think through the approach and plan the next step.", { activeOrchestration: false });
|
|
42
|
+
expect(inactive.noProgress).toBeUndefined();
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it("does not inject stale no-progress recovery after orchestration is inactive", async () => {
|
|
46
|
+
const handlers: Record<string, (event: any, ctx: any) => Promise<any> | any> = {};
|
|
47
|
+
const pi = {
|
|
48
|
+
on: (name: string, handler: (event: any, ctx: any) => Promise<any> | any) => { handlers[name] = handler; },
|
|
49
|
+
} as any;
|
|
50
|
+
const sessionPath = `/tmp/pi-rogue-novelty-${randomUUID()}.jsonl`;
|
|
51
|
+
const ctx = {
|
|
52
|
+
sessionManager: { getSessionFile: () => sessionPath },
|
|
53
|
+
ui: { notify: () => undefined },
|
|
54
|
+
};
|
|
55
|
+
writeText(sessionFile("orchestration", ctx, "repetition-guard.json"), `${JSON.stringify({
|
|
56
|
+
recentAssistantTurns: [],
|
|
57
|
+
noProgress: { at: new Date().toISOString(), count: 3, text: "I will plan the next step.", reason: "test" },
|
|
58
|
+
})}\n`);
|
|
59
|
+
|
|
60
|
+
registerNoveltyGuard(pi);
|
|
61
|
+
const result = await handlers.before_agent_start?.({ systemPrompt: "base" }, ctx);
|
|
62
|
+
|
|
63
|
+
expect(result.systemPrompt).toBe("base");
|
|
64
|
+
expect(JSON.parse(readText(sessionFile("orchestration", ctx, "repetition-guard.json"))).noProgress).toBeUndefined();
|
|
65
|
+
});
|
|
66
|
+
|
|
24
67
|
it("detects repeated assistant output", () => {
|
|
25
68
|
const base: RepetitionGuardState = { recentAssistantTurns: [] };
|
|
26
69
|
const first = recordAssistantTurn(base, "Now let me build the session-flow analyzer and workflow clustering pipeline.");
|
|
@@ -6,6 +6,8 @@ const STATE_FILE = "repetition-guard.json";
|
|
|
6
6
|
const MAX_ASSISTANT_TURNS = 6;
|
|
7
7
|
const REPEAT_COUNT = 3;
|
|
8
8
|
const REPEAT_THRESHOLD = 0.8;
|
|
9
|
+
const NO_PROGRESS_COUNT = 3;
|
|
10
|
+
const BOUNDED_RECOVERY_COUNT = 5;
|
|
9
11
|
|
|
10
12
|
export interface RepetitionGuardTurn {
|
|
11
13
|
at: string;
|
|
@@ -18,9 +20,17 @@ export interface RepetitionGuardRepeat {
|
|
|
18
20
|
text: string;
|
|
19
21
|
}
|
|
20
22
|
|
|
23
|
+
export interface NoProgressSignal {
|
|
24
|
+
at: string;
|
|
25
|
+
count: number;
|
|
26
|
+
text: string;
|
|
27
|
+
reason: string;
|
|
28
|
+
}
|
|
29
|
+
|
|
21
30
|
export interface RepetitionGuardState {
|
|
22
31
|
recentAssistantTurns: RepetitionGuardTurn[];
|
|
23
32
|
assistantRepeat?: RepetitionGuardRepeat;
|
|
33
|
+
noProgress?: NoProgressSignal;
|
|
24
34
|
}
|
|
25
35
|
|
|
26
36
|
export function defaultRepetitionGuardState(): RepetitionGuardState {
|
|
@@ -70,7 +80,18 @@ export function detectAssistantRepetition(state: RepetitionGuardState, minCount
|
|
|
70
80
|
};
|
|
71
81
|
}
|
|
72
82
|
|
|
73
|
-
export function
|
|
83
|
+
export function looksLikeNoProgressTurn(text: string): boolean {
|
|
84
|
+
const normalized = normalizeTurn(text);
|
|
85
|
+
if (normalized.length < 24) return false;
|
|
86
|
+
|
|
87
|
+
const planning = /\b(i will|i'll|let me|going to|we need to|we should|next i|plan|planning|approach|think through|summarize|restate)\b/i.test(text);
|
|
88
|
+
if (!planning) return false;
|
|
89
|
+
|
|
90
|
+
const concreteProgress = /\b(changed|edited|created|wrote|updated|implemented|removed|ran|tested|passed|failed|verified|validated|found|inspected|read|opened|committed|pushed|fixed|completed|result|error|diff)\b/i.test(text);
|
|
91
|
+
return !concreteProgress;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
export function recordAssistantTurn(state: RepetitionGuardState, text: string, options: { activeOrchestration?: boolean } = {}): RepetitionGuardState {
|
|
74
95
|
const trimmed = String(text ?? "").trim();
|
|
75
96
|
if (!trimmed) return state;
|
|
76
97
|
const next: RepetitionGuardState = {
|
|
@@ -78,7 +99,22 @@ export function recordAssistantTurn(state: RepetitionGuardState, text: string):
|
|
|
78
99
|
recentAssistantTurns: [...state.recentAssistantTurns, { at: new Date().toISOString(), text: truncate(trimmed, 1200) }].slice(-MAX_ASSISTANT_TURNS),
|
|
79
100
|
};
|
|
80
101
|
const repeat = detectAssistantRepetition(next);
|
|
81
|
-
|
|
102
|
+
const noProgressCount = options.activeOrchestration && looksLikeNoProgressTurn(trimmed)
|
|
103
|
+
? (state.noProgress?.count ?? 0) + 1
|
|
104
|
+
: 0;
|
|
105
|
+
|
|
106
|
+
return {
|
|
107
|
+
...next,
|
|
108
|
+
assistantRepeat: repeat ?? undefined,
|
|
109
|
+
noProgress: noProgressCount > 0
|
|
110
|
+
? {
|
|
111
|
+
at: new Date().toISOString(),
|
|
112
|
+
count: noProgressCount,
|
|
113
|
+
text: truncate(trimmed, 240),
|
|
114
|
+
reason: "repeated planning/self-talk without concrete progress while orchestration is active",
|
|
115
|
+
}
|
|
116
|
+
: undefined,
|
|
117
|
+
};
|
|
82
118
|
}
|
|
83
119
|
|
|
84
120
|
function parseState(raw: string): RepetitionGuardState {
|
|
@@ -96,6 +132,14 @@ function parseState(raw: string): RepetitionGuardState {
|
|
|
96
132
|
text: parsed.assistantRepeat.text,
|
|
97
133
|
}
|
|
98
134
|
: undefined,
|
|
135
|
+
noProgress: parsed.noProgress && typeof parsed.noProgress.text === "string"
|
|
136
|
+
? {
|
|
137
|
+
at: String(parsed.noProgress.at ?? new Date().toISOString()),
|
|
138
|
+
count: Number(parsed.noProgress.count) || NO_PROGRESS_COUNT,
|
|
139
|
+
text: parsed.noProgress.text,
|
|
140
|
+
reason: String(parsed.noProgress.reason ?? "no concrete progress detected"),
|
|
141
|
+
}
|
|
142
|
+
: undefined,
|
|
99
143
|
};
|
|
100
144
|
} catch {
|
|
101
145
|
return defaultRepetitionGuardState();
|
|
@@ -110,6 +154,44 @@ function writeGuardState(ctx: any, state: RepetitionGuardState): void {
|
|
|
110
154
|
writeText(sessionFile(FEATURE, ctx, STATE_FILE), `${JSON.stringify(state, null, 2)}\n`);
|
|
111
155
|
}
|
|
112
156
|
|
|
157
|
+
export function clearNoProgressRecovery(ctx: any): void {
|
|
158
|
+
const state = readGuardState(ctx);
|
|
159
|
+
if (!state.noProgress) return;
|
|
160
|
+
writeGuardState(ctx, { ...state, noProgress: undefined });
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
function hasActiveOrchestration(ctx: any): boolean {
|
|
164
|
+
if (readText(sessionFile(FEATURE, ctx, "goal.md")).trim()) return true;
|
|
165
|
+
for (const file of ["loop.json", "autoresearch.json"]) {
|
|
166
|
+
try {
|
|
167
|
+
const parsed = JSON.parse(readText(sessionFile(FEATURE, ctx, file), "{}"));
|
|
168
|
+
if (parsed?.enabled || parsed?.instruction) return true;
|
|
169
|
+
} catch {
|
|
170
|
+
// ignore malformed state files; they should not trigger recovery
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
return false;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
function recoveryPrompt(repeat?: RepetitionGuardRepeat, noProgress?: NoProgressSignal): string | null {
|
|
177
|
+
if (noProgress && noProgress.count < NO_PROGRESS_COUNT) noProgress = undefined;
|
|
178
|
+
if (!repeat && !noProgress) return null;
|
|
179
|
+
const count = Math.max(repeat?.count ?? 0, noProgress?.count ?? 0);
|
|
180
|
+
const bounded = count >= BOUNDED_RECOVERY_COUNT;
|
|
181
|
+
const signal = repeat
|
|
182
|
+
? `Repeated assistant output (${repeat.count} turns): ${truncate(repeat.text, 180)}`
|
|
183
|
+
: `No-progress streak (${noProgress?.count} turns): ${truncate(noProgress?.text ?? "", 180)}`;
|
|
184
|
+
|
|
185
|
+
return [
|
|
186
|
+
bounded ? "Pi-Rogue bounded no-progress recovery:" : "Pi-Rogue no-progress recovery:",
|
|
187
|
+
signal,
|
|
188
|
+
noProgress?.reason ? `Reason: ${noProgress.reason}.` : "Reason: repeated output suggests the current approach is stuck.",
|
|
189
|
+
bounded
|
|
190
|
+
? "Recovery is bounded now: do not stack another retry. If one safe, concrete alternative action is available, take exactly that action; otherwise stop and ask the user for direction with the current blocker."
|
|
191
|
+
: "Summarize the current state in one sentence, choose one concrete alternative action, and take it now. Do not only restate the plan or repeat the same response.",
|
|
192
|
+
].join("\n");
|
|
193
|
+
}
|
|
194
|
+
|
|
113
195
|
export function registerNoveltyGuard(pi: ExtensionAPI): void {
|
|
114
196
|
const p = pi as any;
|
|
115
197
|
if (p.__piRogueNoveltyGuardRegistered) return;
|
|
@@ -117,16 +199,16 @@ export function registerNoveltyGuard(pi: ExtensionAPI): void {
|
|
|
117
199
|
|
|
118
200
|
pi.on("before_agent_start", async (event, ctx) => {
|
|
119
201
|
const state = readGuardState(ctx);
|
|
120
|
-
const
|
|
121
|
-
|
|
202
|
+
const active = hasActiveOrchestration(ctx);
|
|
203
|
+
const noProgress = active ? state.noProgress : undefined;
|
|
204
|
+
if (!active && state.noProgress) {
|
|
205
|
+
clearNoProgressRecovery(ctx);
|
|
206
|
+
}
|
|
207
|
+
const prompt = recoveryPrompt(detectAssistantRepetition(state) ?? state.assistantRepeat, noProgress);
|
|
208
|
+
if (!prompt) return { systemPrompt: event.systemPrompt };
|
|
122
209
|
|
|
123
210
|
return {
|
|
124
|
-
systemPrompt: [
|
|
125
|
-
event.systemPrompt,
|
|
126
|
-
"Pi-Rogue repetition guard:",
|
|
127
|
-
`The previous assistant output repeated ${repeat.count} times: ${truncate(repeat.text, 180)}`,
|
|
128
|
-
"Inspect current state before continuing, then apply only the smallest missing delta. Do not repeat the same response.",
|
|
129
|
-
].join("\n\n"),
|
|
211
|
+
systemPrompt: [event.systemPrompt, prompt].join("\n\n"),
|
|
130
212
|
};
|
|
131
213
|
});
|
|
132
214
|
|
|
@@ -136,10 +218,13 @@ export function registerNoveltyGuard(pi: ExtensionAPI): void {
|
|
|
136
218
|
if (!text) return;
|
|
137
219
|
|
|
138
220
|
const previous = readGuardState(ctx);
|
|
139
|
-
const next = recordAssistantTurn(previous, text);
|
|
221
|
+
const next = recordAssistantTurn(previous, text, { activeOrchestration: hasActiveOrchestration(ctx) });
|
|
140
222
|
writeGuardState(ctx, next);
|
|
141
223
|
if (next.assistantRepeat && (!previous.assistantRepeat || next.assistantRepeat.count > previous.assistantRepeat.count)) {
|
|
142
224
|
ctx.ui.notify("Repetition guard detected repeated assistant output; the next turn will inspect current state before retrying.", "warning");
|
|
143
225
|
}
|
|
226
|
+
if (next.noProgress && next.noProgress.count >= NO_PROGRESS_COUNT && (!previous.noProgress || next.noProgress.count > previous.noProgress.count)) {
|
|
227
|
+
ctx.ui.notify("No-progress recovery detected repeated planning without concrete progress; the next turn will take one alternative action or stop.", "warning");
|
|
228
|
+
}
|
|
144
229
|
});
|
|
145
230
|
}
|