little-coder 1.4.3 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.pi/extensions/_shared/intervention.test.ts +13 -0
- package/.pi/extensions/_shared/intervention.ts +41 -0
- package/.pi/extensions/benchmark-profiles/index.ts +27 -9
- package/.pi/extensions/benchmark-profiles/profiles.test.ts +53 -44
- package/.pi/extensions/clear-command/index.test.ts +37 -0
- package/.pi/extensions/clear-command/index.ts +26 -0
- package/.pi/extensions/finalize-warn/index.ts +4 -3
- package/.pi/extensions/output-parser/index.ts +4 -3
- package/.pi/extensions/quality-monitor/index.ts +15 -8
- package/.pi/extensions/quality-monitor/quality.test.ts +68 -2
- package/.pi/extensions/quality-monitor/quality.ts +17 -0
- package/.pi/extensions/thinking-budget/budget.test.ts +170 -132
- package/.pi/extensions/thinking-budget/index.ts +118 -52
- package/.pi/extensions/turn-cap/index.ts +4 -3
- package/.pi/extensions/write-guard/index.ts +57 -67
- package/.pi/extensions/write-guard/write-guard.test.ts +102 -2
- package/.pi/settings.json +6 -6
- package/CHANGELOG.md +26 -0
- package/README.md +8 -2
- package/bin/little-coder.mjs +12 -0
- package/package.json +4 -2
- package/scripts/patch-pi.mjs +113 -0
- package/scripts/patch-pi.test.mjs +63 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { describe, it, expect, beforeEach } from "vitest";
|
|
1
|
+
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
|
2
2
|
import setupExtension from "./index.ts";
|
|
3
3
|
|
|
4
4
|
// Exercise the char→token conversion (matches local/context_manager.py)
|
|
@@ -13,170 +13,208 @@ describe("thinking budget token estimation", () => {
|
|
|
13
13
|
expect(charsToTokens(7)).toBe(2);
|
|
14
14
|
expect(charsToTokens(3500)).toBe(1000);
|
|
15
15
|
});
|
|
16
|
-
it("
|
|
17
|
-
|
|
18
|
-
expect(charsToTokens(
|
|
19
|
-
expect(charsToTokens(7169)).toBeGreaterThan(2048);
|
|
16
|
+
it("4096 tokens ~ 14336 chars (the v1.5.0 default budget)", () => {
|
|
17
|
+
expect(charsToTokens(14336)).toBe(4096);
|
|
18
|
+
expect(charsToTokens(14337)).toBeGreaterThan(4096);
|
|
20
19
|
});
|
|
21
20
|
});
|
|
22
21
|
|
|
23
|
-
// ── Issue #8 regression coverage
|
|
24
|
-
//
|
|
25
|
-
//
|
|
26
|
-
//
|
|
22
|
+
// ── Issue #8 regression coverage (second reproduction, 1.4.3) ───────────────
|
|
23
|
+
// The bug: recovery (setThinkingLevel("off") + sendUserMessage) was deferred to
|
|
24
|
+
// a `turn_end` handler that ran against the module-scope `pi` AFTER ctx.abort()
|
|
25
|
+
// triggered a session replacement → stale `pi` → throw → thinking never turned
|
|
26
|
+
// off + follow-up never sent.
|
|
27
|
+
//
|
|
28
|
+
// The fix: do the whole recovery synchronously in `message_update`, BEFORE
|
|
29
|
+
// ctx.abort(), while `pi` is still live. These tests pin that choreography:
|
|
30
|
+
// - no `turn_end` handler exists (nothing can run against a stale pi),
|
|
31
|
+
// - setThinkingLevel + sendUserMessage are ordered strictly before abort,
|
|
32
|
+
// - thinking is re-asserted off across the restart turn,
|
|
33
|
+
// - the prior level is restored on the next genuine user input.
|
|
27
34
|
|
|
28
35
|
interface Handler {
|
|
29
36
|
(event: any, ctx: any): Promise<unknown> | unknown;
|
|
30
37
|
}
|
|
31
38
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
setThinkingLevel: (lvl: string) => void;
|
|
38
|
-
sendUserMessage: (msg: string, opts?: any) => void;
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
function makePi(): MockPi {
|
|
39
|
+
function makeHarness(initialLevel = "high") {
|
|
40
|
+
const calls: string[] = []; // ordered log across pi + ctx
|
|
41
|
+
const followUps: string[] = [];
|
|
42
|
+
const notifies: string[] = [];
|
|
43
|
+
let level = initialLevel;
|
|
42
44
|
const handlers: Record<string, Handler[]> = {};
|
|
43
|
-
|
|
45
|
+
const pi = {
|
|
44
46
|
handlers,
|
|
45
|
-
|
|
46
|
-
thinkingLevels: [],
|
|
47
|
-
on(name, h) {
|
|
47
|
+
on(name: string, h: Handler) {
|
|
48
48
|
(handlers[name] ??= []).push(h);
|
|
49
49
|
},
|
|
50
|
-
|
|
51
|
-
|
|
50
|
+
getThinkingLevel() {
|
|
51
|
+
return level;
|
|
52
52
|
},
|
|
53
|
-
|
|
54
|
-
|
|
53
|
+
setThinkingLevel(l: string) {
|
|
54
|
+
level = l;
|
|
55
|
+
calls.push(`set:${l}`);
|
|
55
56
|
},
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
57
|
+
sendUserMessage(m: string) {
|
|
58
|
+
followUps.push(m);
|
|
59
|
+
calls.push("send");
|
|
60
|
+
},
|
|
61
|
+
};
|
|
62
|
+
const ctx = {
|
|
63
|
+
abort() {
|
|
64
|
+
calls.push("abort");
|
|
65
|
+
},
|
|
66
|
+
ui: {
|
|
67
|
+
notify(m: string) {
|
|
68
|
+
notifies.push(m);
|
|
69
|
+
calls.push("notify");
|
|
70
|
+
},
|
|
71
|
+
},
|
|
72
|
+
};
|
|
61
73
|
return {
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
74
|
+
pi,
|
|
75
|
+
ctx,
|
|
76
|
+
calls,
|
|
77
|
+
followUps,
|
|
78
|
+
notifies,
|
|
79
|
+
level: () => level,
|
|
80
|
+
setLevelExternally: (l: string) => {
|
|
81
|
+
level = l;
|
|
82
|
+
},
|
|
65
83
|
};
|
|
66
84
|
}
|
|
67
85
|
|
|
68
|
-
async function fire(pi:
|
|
69
|
-
for (const h of pi.handlers[name] ?? [])
|
|
70
|
-
await h(event, ctx);
|
|
71
|
-
}
|
|
86
|
+
async function fire(pi: any, name: string, event: any, ctx: any) {
|
|
87
|
+
for (const h of pi.handlers[name] ?? []) await h(event, ctx);
|
|
72
88
|
}
|
|
73
89
|
|
|
74
90
|
function thinkingDelta(s: string) {
|
|
75
91
|
return { assistantMessageEvent: { type: "thinking_delta", delta: s } };
|
|
76
92
|
}
|
|
77
93
|
|
|
78
|
-
|
|
94
|
+
// Always begin from a clean session — resets the extension's module-scoped
|
|
95
|
+
// state so cases don't leak `forcedOff` / `priorLevel` into one another (and
|
|
96
|
+
// mirrors real startup: session_start always precedes the first agent run).
|
|
97
|
+
async function startRun(h: ReturnType<typeof makeHarness>) {
|
|
98
|
+
await fire(h.pi, "session_start", {}, h.ctx);
|
|
99
|
+
await fire(h.pi, "agent_start", {}, h.ctx);
|
|
100
|
+
await fire(h.pi, "before_agent_start", { systemPromptOptions: {} }, h.ctx);
|
|
101
|
+
await fire(h.pi, "turn_start", {}, h.ctx);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
describe("thinking-budget recovery (issue #8)", () => {
|
|
79
105
|
beforeEach(() => {
|
|
80
|
-
|
|
81
|
-
|
|
106
|
+
process.env.LITTLE_CODER_THINKING_BUDGET = "10"; // tiny budget for short strings
|
|
107
|
+
});
|
|
108
|
+
afterEach(() => {
|
|
109
|
+
delete process.env.LITTLE_CODER_THINKING_BUDGET;
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
it("registers NO turn_end handler (recovery must not run against a stale pi)", () => {
|
|
113
|
+
const h = makeHarness();
|
|
114
|
+
setupExtension(h.pi as any);
|
|
115
|
+
expect(h.pi.handlers["turn_end"]).toBeUndefined();
|
|
82
116
|
});
|
|
83
117
|
|
|
84
|
-
it("
|
|
85
|
-
const
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
await fire(pi, "
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
await fire(pi, "message_update", thinkingDelta("z".repeat(1000)), ctx);
|
|
98
|
-
|
|
99
|
-
await fire(pi, "turn_end", {}, ctx);
|
|
100
|
-
|
|
101
|
-
expect(ctx.aborts.length).toBe(1);
|
|
102
|
-
expect(pi.followUps.length).toBe(1);
|
|
103
|
-
expect(pi.followUps[0]).toMatch(/thinking budget exceeded/i);
|
|
104
|
-
expect(pi.thinkingLevels).toEqual(["off"]);
|
|
118
|
+
it("on breach, runs the full recovery BEFORE abort and exactly once", async () => {
|
|
119
|
+
const h = makeHarness();
|
|
120
|
+
setupExtension(h.pi as any);
|
|
121
|
+
await startRun(h);
|
|
122
|
+
|
|
123
|
+
await fire(h.pi, "message_update", thinkingDelta("x".repeat(1000)), h.ctx);
|
|
124
|
+
|
|
125
|
+
// setThinkingLevel("off") and sendUserMessage both happen before abort.
|
|
126
|
+
expect(h.calls).toEqual(["set:off", "send", "notify", "abort"]);
|
|
127
|
+
expect(h.level()).toBe("off");
|
|
128
|
+
expect(h.followUps).toHaveLength(1);
|
|
129
|
+
expect(h.followUps[0]).toMatch(/thinking budget exceeded/i);
|
|
130
|
+
expect(h.notifies[0]).toMatch(/harness intervention:.*thought long enough/i);
|
|
105
131
|
});
|
|
106
132
|
|
|
107
|
-
it("
|
|
108
|
-
const
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
await fire(pi, "
|
|
112
|
-
await fire(pi, "
|
|
113
|
-
await fire(pi, "
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
await fire(pi, "turn_end", {}, ctx);
|
|
118
|
-
await fire(pi, "turn_end", {}, ctx);
|
|
119
|
-
|
|
120
|
-
expect(ctx.aborts.length).toBe(1);
|
|
121
|
-
expect(pi.followUps.length).toBe(1);
|
|
122
|
-
expect(pi.thinkingLevels.length).toBe(1);
|
|
133
|
+
it("does not double-abort across multiple bursts in the same turn", async () => {
|
|
134
|
+
const h = makeHarness();
|
|
135
|
+
setupExtension(h.pi as any);
|
|
136
|
+
await startRun(h);
|
|
137
|
+
await fire(h.pi, "message_update", thinkingDelta("x".repeat(1000)), h.ctx);
|
|
138
|
+
await fire(h.pi, "message_update", thinkingDelta("y".repeat(1000)), h.ctx);
|
|
139
|
+
await fire(h.pi, "message_update", thinkingDelta("z".repeat(1000)), h.ctx);
|
|
140
|
+
|
|
141
|
+
expect(h.calls.filter((c) => c === "abort")).toHaveLength(1);
|
|
142
|
+
expect(h.followUps).toHaveLength(1);
|
|
123
143
|
});
|
|
124
144
|
|
|
125
|
-
it("
|
|
126
|
-
const
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
await fire(pi, "before_agent_start", { systemPromptOptions: {} }, ctx1);
|
|
133
|
-
await fire(pi, "turn_start", {}, ctx1);
|
|
134
|
-
await fire(pi, "message_update", thinkingDelta("x".repeat(1000)), ctx1);
|
|
135
|
-
await fire(pi, "turn_end", {}, ctx1);
|
|
136
|
-
|
|
137
|
-
// Run 2: fresh agent_start — no abort should fire on the first turn
|
|
138
|
-
// even though run 1 left state behind.
|
|
139
|
-
const ctx2 = makeCtx();
|
|
140
|
-
await fire(pi, "agent_start", {}, ctx2);
|
|
141
|
-
await fire(pi, "before_agent_start", { systemPromptOptions: {} }, ctx2);
|
|
142
|
-
await fire(pi, "turn_start", {}, ctx2);
|
|
143
|
-
// A small thinking delta well under budget.
|
|
144
|
-
await fire(pi, "message_update", thinkingDelta("ok"), ctx2);
|
|
145
|
-
await fire(pi, "turn_end", {}, ctx2);
|
|
146
|
-
|
|
147
|
-
expect(ctx2.aborts.length).toBe(0);
|
|
148
|
-
// Total follow-ups: only the one from run 1.
|
|
149
|
-
expect(pi.followUps.length).toBe(1);
|
|
145
|
+
it("does not fire under budget", async () => {
|
|
146
|
+
const h = makeHarness();
|
|
147
|
+
setupExtension(h.pi as any);
|
|
148
|
+
await startRun(h);
|
|
149
|
+
await fire(h.pi, "message_update", thinkingDelta("ok"), h.ctx); // 2 chars < 10 tokens
|
|
150
|
+
expect(h.calls).toEqual([]);
|
|
151
|
+
expect(h.level()).toBe("high");
|
|
150
152
|
});
|
|
151
153
|
|
|
152
|
-
it("
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
await fire(pi, "
|
|
162
|
-
await fire(pi, "
|
|
163
|
-
await fire(pi, "
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
154
|
+
it("re-asserts thinking off on the restart turn even if pi re-enables it", async () => {
|
|
155
|
+
const h = makeHarness();
|
|
156
|
+
setupExtension(h.pi as any);
|
|
157
|
+
await startRun(h);
|
|
158
|
+
await fire(h.pi, "message_update", thinkingDelta("x".repeat(1000)), h.ctx); // breach → off
|
|
159
|
+
|
|
160
|
+
// Simulate the post-abort session replacement re-resolving thinking to the
|
|
161
|
+
// profile default. The bug was that this stuck; the fix re-asserts off.
|
|
162
|
+
h.setLevelExternally("high");
|
|
163
|
+
await fire(h.pi, "agent_start", {}, h.ctx); // restart run after the followUp
|
|
164
|
+
await fire(h.pi, "before_agent_start", { systemPromptOptions: {} }, h.ctx);
|
|
165
|
+
await fire(h.pi, "turn_start", {}, h.ctx);
|
|
166
|
+
|
|
167
|
+
expect(h.level()).toBe("off");
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
it("restores the prior thinking level on the next genuine user input", async () => {
|
|
171
|
+
const h = makeHarness("medium");
|
|
172
|
+
setupExtension(h.pi as any);
|
|
173
|
+
await startRun(h);
|
|
174
|
+
await fire(h.pi, "message_update", thinkingDelta("x".repeat(1000)), h.ctx); // breach
|
|
175
|
+
expect(h.level()).toBe("off");
|
|
176
|
+
|
|
177
|
+
// A new user prompt ends the forced-off window and restores the level.
|
|
178
|
+
await fire(h.pi, "input", { text: "next task" }, h.ctx);
|
|
179
|
+
expect(h.level()).toBe("medium");
|
|
180
|
+
|
|
181
|
+
// And the force is cleared: a subsequent turn does NOT re-disable thinking.
|
|
182
|
+
await fire(h.pi, "turn_start", {}, h.ctx);
|
|
183
|
+
expect(h.level()).toBe("medium");
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
it("a fresh task (no prior breach) is never forced off", async () => {
|
|
187
|
+
const h = makeHarness("low");
|
|
188
|
+
setupExtension(h.pi as any);
|
|
189
|
+
await fire(h.pi, "input", { text: "task" }, h.ctx);
|
|
190
|
+
await startRun(h);
|
|
191
|
+
await fire(h.pi, "message_update", thinkingDelta("ok"), h.ctx);
|
|
192
|
+
expect(h.level()).toBe("low");
|
|
193
|
+
expect(h.calls).toEqual([]);
|
|
194
|
+
});
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
describe("thinking-budget resolution", () => {
|
|
198
|
+
afterEach(() => {
|
|
199
|
+
delete process.env.LITTLE_CODER_THINKING_BUDGET;
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
it("a profile budget wins over the env budget", async () => {
|
|
203
|
+
process.env.LITTLE_CODER_THINKING_BUDGET = "10";
|
|
204
|
+
const h = makeHarness();
|
|
205
|
+
setupExtension(h.pi as any);
|
|
206
|
+
await fire(h.pi, "session_start", {}, h.ctx);
|
|
207
|
+
await fire(h.pi, "agent_start", {}, h.ctx);
|
|
208
|
+
// profile budget 100 tokens (~350 chars) overrides env's 10.
|
|
209
|
+
await fire(
|
|
210
|
+
h.pi,
|
|
211
|
+
"before_agent_start",
|
|
212
|
+
{ systemPromptOptions: { littleCoder: { thinkingBudget: 100 } } },
|
|
213
|
+
h.ctx,
|
|
170
214
|
);
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
// After-call comes first (sync), then the setImmediate marker fires
|
|
176
|
-
// (because turn_end yielded), then we resume after the await.
|
|
177
|
-
expect(order[0]).toBe("after-call");
|
|
178
|
-
// marker must appear before resolve completes
|
|
179
|
-
expect(order).toContain("setImmediate-marker");
|
|
180
|
-
expect(pi.followUps.length).toBe(1);
|
|
215
|
+
await fire(h.pi, "turn_start", {}, h.ctx);
|
|
216
|
+
// 200 chars ≈ 58 tokens — under the 100-token profile budget, over env's 10.
|
|
217
|
+
await fire(h.pi, "message_update", thinkingDelta("x".repeat(200)), h.ctx);
|
|
218
|
+
expect(h.calls).toEqual([]);
|
|
181
219
|
});
|
|
182
220
|
});
|
|
@@ -1,53 +1,118 @@
|
|
|
1
1
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
2
|
+
import { harnessIntervention } from "../_shared/intervention.ts";
|
|
3
|
+
|
|
4
|
+
// pi's thinking-level union (not re-exported from the package root). Mirrors
|
|
5
|
+
// settings-manager's ThinkingLevel; structurally assignable to pi's own type.
|
|
6
|
+
type ThinkingLevel = "off" | "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
2
7
|
|
|
3
8
|
// Port of the thinking-budget cap + partial-trace reuse logic from
|
|
4
9
|
// providers.py. little-coder's Python implementation aborts the stream
|
|
5
10
|
// mid-flight when thinking tokens cross the budget, re-injects the partial
|
|
6
11
|
// trace as assistant context, and retries with thinking disabled. Pi's
|
|
7
|
-
// AgentSession doesn't expose mid-stream abort-and-replace, so we
|
|
8
|
-
//
|
|
12
|
+
// AgentSession doesn't expose mid-stream abort-and-replace, so we approximate
|
|
13
|
+
// it: count thinking tokens, and on breach disable thinking + queue a
|
|
14
|
+
// commit-to-an-implementation nudge, then abort the over-long turn.
|
|
9
15
|
//
|
|
10
|
-
//
|
|
11
|
-
//
|
|
12
|
-
//
|
|
13
|
-
//
|
|
16
|
+
// ── Issue #8, second reproduction (1.4.3) ───────────────────────────────────
|
|
17
|
+
// The v1.0.0 fix deferred the recovery (`setThinkingLevel("off")` +
|
|
18
|
+
// `sendUserMessage`) to a `turn_end` handler, after a `setImmediate` yield, and
|
|
19
|
+
// ran it against the module-scope `pi`. But `ctx.abort()` makes pi's `agent_end`
|
|
20
|
+
// run auto-retry / auto-compaction (both enabled in .pi/settings.json), which
|
|
21
|
+
// REPLACES the session (dispose() → ExtensionRunner.invalidate()). The
|
|
22
|
+
// setImmediate yield is exactly what let that replacement land *before* the
|
|
23
|
+
// deferred recovery, so the recovery touched a now-stale `pi` and threw
|
|
24
|
+
// ("This extension ctx is stale after session replacement or reload"). Net
|
|
25
|
+
// effect: thinking was never turned off (the next step kept thinking) and the
|
|
26
|
+
// follow-up never reached the model (the agent appeared to stop) — the #8
|
|
27
|
+
// symptom, on a different mechanism than the original.
|
|
14
28
|
//
|
|
15
|
-
//
|
|
16
|
-
//
|
|
17
|
-
//
|
|
18
|
-
// trace are lost, but the commit-to-action pressure is the same.
|
|
29
|
+
// Fix: do the whole recovery SYNCHRONOUSLY inside `message_update`, BEFORE
|
|
30
|
+
// `ctx.abort()`, while `pi` is still live and the session hasn't been replaced.
|
|
31
|
+
// No `turn_end` handler, no `setImmediate` — nothing runs against a stale ref.
|
|
19
32
|
//
|
|
20
|
-
//
|
|
21
|
-
//
|
|
22
|
-
//
|
|
23
|
-
//
|
|
24
|
-
//
|
|
25
|
-
//
|
|
26
|
-
//
|
|
27
|
-
//
|
|
28
|
-
//
|
|
33
|
+
// 1. Count thinking_delta tokens during message_update.
|
|
34
|
+
// 2. On breach: capture the current thinking level, flip thinking to "off",
|
|
35
|
+
// queue the commit nudge as a follow-up, surface one harness-intervention
|
|
36
|
+
// line, THEN ctx.abort().
|
|
37
|
+
// 3. Keep thinking off across the restart turn(s): `forcedOff` re-asserts
|
|
38
|
+
// "off" on every turn_start until the user submits a genuinely new prompt
|
|
39
|
+
// (`input` event), at which point the prior level is restored so the next
|
|
40
|
+
// task can think again. (A new task should not inherit "off" just because
|
|
41
|
+
// a previous one over-thought.)
|
|
29
42
|
|
|
30
|
-
const DEFAULT_BUDGET =
|
|
43
|
+
const DEFAULT_BUDGET = 4096;
|
|
31
44
|
|
|
32
|
-
// Per-run rolling state
|
|
45
|
+
// Per-run rolling state.
|
|
33
46
|
let thinkingChars = 0;
|
|
34
47
|
let budgetForTurn = DEFAULT_BUDGET;
|
|
35
48
|
let aborted = false;
|
|
36
|
-
|
|
49
|
+
// True from a budget breach until the next genuine user input. While set, we
|
|
50
|
+
// re-assert thinking "off" at the start of every turn so the restart turn (and
|
|
51
|
+
// any follow-on turns of the same task) can't silently come back with thinking
|
|
52
|
+
// re-enabled by the post-replacement profile resolution.
|
|
53
|
+
let forcedOff = false;
|
|
54
|
+
// The thinking level in effect when we first forced it off, restored on the
|
|
55
|
+
// next user input so a new task is unaffected.
|
|
56
|
+
let priorLevel: ThinkingLevel | undefined;
|
|
37
57
|
|
|
38
58
|
function charsToTokens(chars: number): number {
|
|
39
59
|
// Matches local/context_manager.estimate_tokens (len/3.5)
|
|
40
60
|
return Math.ceil(chars / 3.5);
|
|
41
61
|
}
|
|
42
62
|
|
|
63
|
+
// setThinkingLevel / getThinkingLevel are guarded: a stale-ctx throw must never
|
|
64
|
+
// escape (pi reports an uncaught extension throw as a hard "Extension error"),
|
|
65
|
+
// and older SDK builds may lack the getter.
|
|
66
|
+
function safeGetThinkingLevel(pi: ExtensionAPI): ThinkingLevel | undefined {
|
|
67
|
+
try {
|
|
68
|
+
return typeof pi.getThinkingLevel === "function" ? pi.getThinkingLevel() : undefined;
|
|
69
|
+
} catch {
|
|
70
|
+
return undefined;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function safeSetThinkingLevel(pi: ExtensionAPI, level: ThinkingLevel): void {
|
|
75
|
+
try {
|
|
76
|
+
pi.setThinkingLevel(level);
|
|
77
|
+
} catch {
|
|
78
|
+
// Stale ctx / unsupported — leave the level alone rather than crash the run.
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
43
82
|
export default function (pi: ExtensionAPI) {
|
|
44
|
-
//
|
|
45
|
-
//
|
|
46
|
-
//
|
|
83
|
+
// A new session (startup, /clear, resume, reload) is a clean slate — clear
|
|
84
|
+
// everything, including the forced-off window. The recovery restart does NOT
|
|
85
|
+
// fire session_start (it's a follow-up within the same session), so this
|
|
86
|
+
// never clobbers the re-assertion. Also stops module-scoped state leaking
|
|
87
|
+
// across sessions in-process.
|
|
88
|
+
pi.on("session_start", async () => {
|
|
89
|
+
thinkingChars = 0;
|
|
90
|
+
aborted = false;
|
|
91
|
+
forcedOff = false;
|
|
92
|
+
priorLevel = undefined;
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
// Hard reset of per-turn counters between agent runs. `forcedOff` /
|
|
96
|
+
// `priorLevel` are deliberately NOT reset here: agent_start ALSO fires for
|
|
97
|
+
// the recovery restart turn, and clearing the force there would let thinking
|
|
98
|
+
// come straight back on — exactly the bug. They are cleared on `input`
|
|
99
|
+
// (a genuinely new user task) or `session_start`.
|
|
47
100
|
pi.on("agent_start", async () => {
|
|
48
101
|
thinkingChars = 0;
|
|
49
102
|
aborted = false;
|
|
50
|
-
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
// A genuinely new user prompt ends the "forced off" window: restore the
|
|
106
|
+
// level the user actually had before the breach. Programmatic follow-ups
|
|
107
|
+
// (our nudge) do not emit an `input` event, so the restart turn stays off.
|
|
108
|
+
pi.on("input", async () => {
|
|
109
|
+
if (forcedOff) {
|
|
110
|
+
if (priorLevel !== undefined) safeSetThinkingLevel(pi, priorLevel);
|
|
111
|
+
forcedOff = false;
|
|
112
|
+
priorLevel = undefined;
|
|
113
|
+
}
|
|
114
|
+
thinkingChars = 0;
|
|
115
|
+
aborted = false;
|
|
51
116
|
});
|
|
52
117
|
|
|
53
118
|
pi.on("before_agent_start", async (event) => {
|
|
@@ -63,9 +128,11 @@ export default function (pi: ExtensionAPI) {
|
|
|
63
128
|
|
|
64
129
|
pi.on("turn_start", async () => {
|
|
65
130
|
thinkingChars = 0;
|
|
66
|
-
|
|
67
|
-
//
|
|
68
|
-
|
|
131
|
+
aborted = false;
|
|
132
|
+
// Re-assert "off" for the restart turn (and any follow-on turns of the same
|
|
133
|
+
// task). After the session replacement triggered by the abort, the new
|
|
134
|
+
// run can otherwise resolve thinking back to the profile default.
|
|
135
|
+
if (forcedOff) safeSetThinkingLevel(pi, "off");
|
|
69
136
|
});
|
|
70
137
|
|
|
71
138
|
pi.on("message_update", async (event, ctx) => {
|
|
@@ -74,32 +141,31 @@ export default function (pi: ExtensionAPI) {
|
|
|
74
141
|
if (ev.type !== "thinking_delta") return;
|
|
75
142
|
const delta = typeof ev.delta === "string" ? ev.delta : "";
|
|
76
143
|
thinkingChars += delta.length;
|
|
77
|
-
if (aborted
|
|
144
|
+
if (aborted) return;
|
|
78
145
|
const tokens = charsToTokens(thinkingChars);
|
|
79
|
-
if (tokens
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
146
|
+
if (tokens <= budgetForTurn) return;
|
|
147
|
+
|
|
148
|
+
// Breach. Do the entire recovery now, while `pi` is still live — BEFORE
|
|
149
|
+
// ctx.abort() triggers the session replacement that would make `pi` stale.
|
|
150
|
+
aborted = true;
|
|
151
|
+
if (!forcedOff) {
|
|
152
|
+
priorLevel = safeGetThinkingLevel(pi);
|
|
153
|
+
forcedOff = true;
|
|
154
|
+
}
|
|
155
|
+
safeSetThinkingLevel(pi, "off");
|
|
156
|
+
try {
|
|
157
|
+
pi.sendUserMessage(
|
|
158
|
+
"[thinking budget exceeded] Please commit to an implementation now. " +
|
|
159
|
+
"Stop deliberating and use your tools to make progress.",
|
|
160
|
+
{ deliverAs: "followUp" },
|
|
85
161
|
);
|
|
86
|
-
|
|
162
|
+
} catch {
|
|
163
|
+
// SDK without sendUserMessage — abort still forces the turn to end.
|
|
87
164
|
}
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
if (!recoveryPending) return;
|
|
92
|
-
// Yield one tick so pi's abort barrier settles before we queue the
|
|
93
|
-
// follow-up. On fast-streaming local backends (qwen3.6 / llama.cpp)
|
|
94
|
-
// queuing immediately after ctx.abort() drops the follow-up silently
|
|
95
|
-
// and the agent appears to stop with no message — issue #8.
|
|
96
|
-
await new Promise<void>((r) => setImmediate(r));
|
|
97
|
-
pi.setThinkingLevel("off");
|
|
98
|
-
pi.sendUserMessage(
|
|
99
|
-
"[thinking budget exceeded] Please commit to an implementation now. Stop deliberating and use your tools to make progress.",
|
|
100
|
-
{ deliverAs: "followUp" },
|
|
165
|
+
harnessIntervention(
|
|
166
|
+
ctx,
|
|
167
|
+
"the model has thought long enough — forcing it to start implementing.",
|
|
101
168
|
);
|
|
102
|
-
|
|
103
|
-
aborted = false;
|
|
169
|
+
ctx.abort();
|
|
104
170
|
});
|
|
105
171
|
}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
2
|
+
import { harnessIntervention } from "../_shared/intervention.ts";
|
|
2
3
|
|
|
3
4
|
// Port of agent.py's max_turns early-break. Counts turn_start events per
|
|
4
5
|
// agent_start span; when the count exceeds LITTLE_CODER_MAX_TURNS (or the
|
|
@@ -27,9 +28,9 @@ export default function (pi: ExtensionAPI) {
|
|
|
27
28
|
if (capForRun <= 0) return;
|
|
28
29
|
turnsThisRun++;
|
|
29
30
|
if (turnsThisRun > capForRun) {
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
31
|
+
harnessIntervention(
|
|
32
|
+
ctx,
|
|
33
|
+
`the model hit the turn limit (${capForRun}) — stopping the run.`,
|
|
33
34
|
);
|
|
34
35
|
ctx.abort();
|
|
35
36
|
}
|