little-coder 1.4.2 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { describe, it, expect, beforeEach } from "vitest";
1
+ import { describe, it, expect, beforeEach, afterEach } from "vitest";
2
2
  import setupExtension from "./index.ts";
3
3
 
4
4
  // Exercise the char→token conversion (matches local/context_manager.py)
@@ -13,170 +13,208 @@ describe("thinking budget token estimation", () => {
13
13
  expect(charsToTokens(7)).toBe(2);
14
14
  expect(charsToTokens(3500)).toBe(1000);
15
15
  });
16
- it("2048 tokens ~ 7168 chars", () => {
17
- // Budget trigger boundary: ceil(7169/3.5) = 2049 > 2048
18
- expect(charsToTokens(7168)).toBe(2048);
19
- expect(charsToTokens(7169)).toBeGreaterThan(2048);
16
+ it("4096 tokens ~ 14336 chars (the v1.5.0 default budget)", () => {
17
+ expect(charsToTokens(14336)).toBe(4096);
18
+ expect(charsToTokens(14337)).toBeGreaterThan(4096);
20
19
  });
21
20
  });
22
21
 
23
- // ── Issue #8 regression coverage ────────────────────────────────────────
24
- // Mock just enough of pi's ExtensionAPI for the handler choreography.
25
- // We capture every registered handler keyed by event name and drive them
26
- // directly to assert idempotency / sequencing.
22
+ // ── Issue #8 regression coverage (second reproduction, 1.4.3) ───────────────
23
+ // The bug: recovery (setThinkingLevel("off") + sendUserMessage) was deferred to
24
+ // a `turn_end` handler that ran against the module-scope `pi` AFTER ctx.abort()
25
+ // triggered a session replacement stale `pi` → throw → thinking never turned
26
+ // off + follow-up never sent.
27
+ //
28
+ // The fix: do the whole recovery synchronously in `message_update`, BEFORE
29
+ // ctx.abort(), while `pi` is still live. These tests pin that choreography:
30
+ // - no `turn_end` handler exists (nothing can run against a stale pi),
31
+ // - setThinkingLevel + sendUserMessage are ordered strictly before abort,
32
+ // - thinking is re-asserted off across the restart turn,
33
+ // - the prior level is restored on the next genuine user input.
27
34
 
28
35
  interface Handler {
29
36
  (event: any, ctx: any): Promise<unknown> | unknown;
30
37
  }
31
38
 
32
- interface MockPi {
33
- on: (name: string, h: Handler) => void;
34
- handlers: Record<string, Handler[]>;
35
- followUps: string[];
36
- thinkingLevels: string[];
37
- setThinkingLevel: (lvl: string) => void;
38
- sendUserMessage: (msg: string, opts?: any) => void;
39
- }
40
-
41
- function makePi(): MockPi {
39
+ function makeHarness(initialLevel = "high") {
40
+ const calls: string[] = []; // ordered log across pi + ctx
41
+ const followUps: string[] = [];
42
+ const notifies: string[] = [];
43
+ let level = initialLevel;
42
44
  const handlers: Record<string, Handler[]> = {};
43
- return {
45
+ const pi = {
44
46
  handlers,
45
- followUps: [],
46
- thinkingLevels: [],
47
- on(name, h) {
47
+ on(name: string, h: Handler) {
48
48
  (handlers[name] ??= []).push(h);
49
49
  },
50
- setThinkingLevel(lvl) {
51
- this.thinkingLevels.push(lvl);
50
+ getThinkingLevel() {
51
+ return level;
52
52
  },
53
- sendUserMessage(msg, _opts) {
54
- this.followUps.push(msg);
53
+ setThinkingLevel(l: string) {
54
+ level = l;
55
+ calls.push(`set:${l}`);
55
56
  },
56
- } as MockPi;
57
- }
58
-
59
- function makeCtx() {
60
- const aborts: number[] = [];
57
+ sendUserMessage(m: string) {
58
+ followUps.push(m);
59
+ calls.push("send");
60
+ },
61
+ };
62
+ const ctx = {
63
+ abort() {
64
+ calls.push("abort");
65
+ },
66
+ ui: {
67
+ notify(m: string) {
68
+ notifies.push(m);
69
+ calls.push("notify");
70
+ },
71
+ },
72
+ };
61
73
  return {
62
- aborts,
63
- abort: () => { aborts.push(1); },
64
- ui: { notify: (_m: string, _l?: string) => {} },
74
+ pi,
75
+ ctx,
76
+ calls,
77
+ followUps,
78
+ notifies,
79
+ level: () => level,
80
+ setLevelExternally: (l: string) => {
81
+ level = l;
82
+ },
65
83
  };
66
84
  }
67
85
 
68
- async function fire(pi: MockPi, name: string, event: any, ctx: any) {
69
- for (const h of pi.handlers[name] ?? []) {
70
- await h(event, ctx);
71
- }
86
+ async function fire(pi: any, name: string, event: any, ctx: any) {
87
+ for (const h of pi.handlers[name] ?? []) await h(event, ctx);
72
88
  }
73
89
 
74
90
  function thinkingDelta(s: string) {
75
91
  return { assistantMessageEvent: { type: "thinking_delta", delta: s } };
76
92
  }
77
93
 
78
- describe("thinking-budget idempotency (issue #8)", () => {
94
+ // Always begin from a clean session — resets the extension's module-scoped
95
+ // state so cases don't leak `forcedOff` / `priorLevel` into one another (and
96
+ // mirrors real startup: session_start always precedes the first agent run).
97
+ async function startRun(h: ReturnType<typeof makeHarness>) {
98
+ await fire(h.pi, "session_start", {}, h.ctx);
99
+ await fire(h.pi, "agent_start", {}, h.ctx);
100
+ await fire(h.pi, "before_agent_start", { systemPromptOptions: {} }, h.ctx);
101
+ await fire(h.pi, "turn_start", {}, h.ctx);
102
+ }
103
+
104
+ describe("thinking-budget recovery (issue #8)", () => {
79
105
  beforeEach(() => {
80
- // Force a small budget so we can trigger with short strings.
81
- process.env.LITTLE_CODER_THINKING_BUDGET = "10";
106
+ process.env.LITTLE_CODER_THINKING_BUDGET = "10"; // tiny budget for short strings
107
+ });
108
+ afterEach(() => {
109
+ delete process.env.LITTLE_CODER_THINKING_BUDGET;
110
+ });
111
+
112
+ it("registers NO turn_end handler (recovery must not run against a stale pi)", () => {
113
+ const h = makeHarness();
114
+ setupExtension(h.pi as any);
115
+ expect(h.pi.handlers["turn_end"]).toBeUndefined();
82
116
  });
83
117
 
84
- it("fires exactly one abort + one follow-up for a single budget breach across many bursts", async () => {
85
- const pi = makePi();
86
- const ctx = makeCtx();
87
- setupExtension(pi as any);
88
- await fire(pi, "agent_start", {}, ctx);
89
- await fire(pi, "before_agent_start", { systemPromptOptions: {} }, ctx);
90
- await fire(pi, "turn_start", {}, ctx);
91
-
92
- // Burst: 1000 chars of thinking, way over 10-token budget.
93
- await fire(pi, "message_update", thinkingDelta("x".repeat(1000)), ctx);
94
- // Second burst on the same turn — must not double-abort.
95
- await fire(pi, "message_update", thinkingDelta("y".repeat(1000)), ctx);
96
- // Third burst.
97
- await fire(pi, "message_update", thinkingDelta("z".repeat(1000)), ctx);
98
-
99
- await fire(pi, "turn_end", {}, ctx);
100
-
101
- expect(ctx.aborts.length).toBe(1);
102
- expect(pi.followUps.length).toBe(1);
103
- expect(pi.followUps[0]).toMatch(/thinking budget exceeded/i);
104
- expect(pi.thinkingLevels).toEqual(["off"]);
118
+ it("on breach, runs the full recovery BEFORE abort and exactly once", async () => {
119
+ const h = makeHarness();
120
+ setupExtension(h.pi as any);
121
+ await startRun(h);
122
+
123
+ await fire(h.pi, "message_update", thinkingDelta("x".repeat(1000)), h.ctx);
124
+
125
+ // setThinkingLevel("off") and sendUserMessage both happen before abort.
126
+ expect(h.calls).toEqual(["set:off", "send", "notify", "abort"]);
127
+ expect(h.level()).toBe("off");
128
+ expect(h.followUps).toHaveLength(1);
129
+ expect(h.followUps[0]).toMatch(/thinking budget exceeded/i);
130
+ expect(h.notifies[0]).toMatch(/harness intervention:.*thought long enough/i);
105
131
  });
106
132
 
107
- it("fires the recovery follow-up only once even if turn_end is re-emitted", async () => {
108
- const pi = makePi();
109
- const ctx = makeCtx();
110
- setupExtension(pi as any);
111
- await fire(pi, "agent_start", {}, ctx);
112
- await fire(pi, "before_agent_start", { systemPromptOptions: {} }, ctx);
113
- await fire(pi, "turn_start", {}, ctx);
114
- await fire(pi, "message_update", thinkingDelta("x".repeat(1000)), ctx);
115
- await fire(pi, "turn_end", {}, ctx);
116
- // Pi can re-emit turn_end during retry / compaction paths — must be a no-op.
117
- await fire(pi, "turn_end", {}, ctx);
118
- await fire(pi, "turn_end", {}, ctx);
119
-
120
- expect(ctx.aborts.length).toBe(1);
121
- expect(pi.followUps.length).toBe(1);
122
- expect(pi.thinkingLevels.length).toBe(1);
133
+ it("does not double-abort across multiple bursts in the same turn", async () => {
134
+ const h = makeHarness();
135
+ setupExtension(h.pi as any);
136
+ await startRun(h);
137
+ await fire(h.pi, "message_update", thinkingDelta("x".repeat(1000)), h.ctx);
138
+ await fire(h.pi, "message_update", thinkingDelta("y".repeat(1000)), h.ctx);
139
+ await fire(h.pi, "message_update", thinkingDelta("z".repeat(1000)), h.ctx);
140
+
141
+ expect(h.calls.filter((c) => c === "abort")).toHaveLength(1);
142
+ expect(h.followUps).toHaveLength(1);
123
143
  });
124
144
 
125
- it("resets state on agent_start so a prior aborted run does not leak", async () => {
126
- const pi = makePi();
127
- const ctx1 = makeCtx();
128
- setupExtension(pi as any);
129
-
130
- // Run 1: trigger an abort.
131
- await fire(pi, "agent_start", {}, ctx1);
132
- await fire(pi, "before_agent_start", { systemPromptOptions: {} }, ctx1);
133
- await fire(pi, "turn_start", {}, ctx1);
134
- await fire(pi, "message_update", thinkingDelta("x".repeat(1000)), ctx1);
135
- await fire(pi, "turn_end", {}, ctx1);
136
-
137
- // Run 2: fresh agent_start — no abort should fire on the first turn
138
- // even though run 1 left state behind.
139
- const ctx2 = makeCtx();
140
- await fire(pi, "agent_start", {}, ctx2);
141
- await fire(pi, "before_agent_start", { systemPromptOptions: {} }, ctx2);
142
- await fire(pi, "turn_start", {}, ctx2);
143
- // A small thinking delta well under budget.
144
- await fire(pi, "message_update", thinkingDelta("ok"), ctx2);
145
- await fire(pi, "turn_end", {}, ctx2);
146
-
147
- expect(ctx2.aborts.length).toBe(0);
148
- // Total follow-ups: only the one from run 1.
149
- expect(pi.followUps.length).toBe(1);
145
+ it("does not fire under budget", async () => {
146
+ const h = makeHarness();
147
+ setupExtension(h.pi as any);
148
+ await startRun(h);
149
+ await fire(h.pi, "message_update", thinkingDelta("ok"), h.ctx); // 2 chars < 10 tokens
150
+ expect(h.calls).toEqual([]);
151
+ expect(h.level()).toBe("high");
150
152
  });
151
153
 
152
- it("yields one tick before sendUserMessage so pi's abort barrier can settle", async () => {
153
- // We can only assert this indirectly: turn_end must complete the await
154
- // chain (it returns a Promise) AFTER setImmediate fires. If it didn't
155
- // yield, sendUserMessage would land synchronously inside the same
156
- // microtask as ctx.abort(). Verify ordering by interleaving a marker.
157
- const pi = makePi();
158
- const ctx = makeCtx();
159
- setupExtension(pi as any);
160
- await fire(pi, "agent_start", {}, ctx);
161
- await fire(pi, "before_agent_start", { systemPromptOptions: {} }, ctx);
162
- await fire(pi, "turn_start", {}, ctx);
163
- await fire(pi, "message_update", thinkingDelta("x".repeat(1000)), ctx);
164
-
165
- const order: string[] = [];
166
- setImmediate(() => order.push("setImmediate-marker"));
167
- const turnEndPromise = (pi.handlers["turn_end"] ?? []).reduce<Promise<unknown>>(
168
- (p, h) => p.then(() => h({}, ctx)),
169
- Promise.resolve(),
154
+ it("re-asserts thinking off on the restart turn even if pi re-enables it", async () => {
155
+ const h = makeHarness();
156
+ setupExtension(h.pi as any);
157
+ await startRun(h);
158
+ await fire(h.pi, "message_update", thinkingDelta("x".repeat(1000)), h.ctx); // breach off
159
+
160
+ // Simulate the post-abort session replacement re-resolving thinking to the
161
+ // profile default. The bug was that this stuck; the fix re-asserts off.
162
+ h.setLevelExternally("high");
163
+ await fire(h.pi, "agent_start", {}, h.ctx); // restart run after the followUp
164
+ await fire(h.pi, "before_agent_start", { systemPromptOptions: {} }, h.ctx);
165
+ await fire(h.pi, "turn_start", {}, h.ctx);
166
+
167
+ expect(h.level()).toBe("off");
168
+ });
169
+
170
+ it("restores the prior thinking level on the next genuine user input", async () => {
171
+ const h = makeHarness("medium");
172
+ setupExtension(h.pi as any);
173
+ await startRun(h);
174
+ await fire(h.pi, "message_update", thinkingDelta("x".repeat(1000)), h.ctx); // breach
175
+ expect(h.level()).toBe("off");
176
+
177
+ // A new user prompt ends the forced-off window and restores the level.
178
+ await fire(h.pi, "input", { text: "next task" }, h.ctx);
179
+ expect(h.level()).toBe("medium");
180
+
181
+ // And the force is cleared: a subsequent turn does NOT re-disable thinking.
182
+ await fire(h.pi, "turn_start", {}, h.ctx);
183
+ expect(h.level()).toBe("medium");
184
+ });
185
+
186
+ it("a fresh task (no prior breach) is never forced off", async () => {
187
+ const h = makeHarness("low");
188
+ setupExtension(h.pi as any);
189
+ await fire(h.pi, "input", { text: "task" }, h.ctx);
190
+ await startRun(h);
191
+ await fire(h.pi, "message_update", thinkingDelta("ok"), h.ctx);
192
+ expect(h.level()).toBe("low");
193
+ expect(h.calls).toEqual([]);
194
+ });
195
+ });
196
+
197
+ describe("thinking-budget resolution", () => {
198
+ afterEach(() => {
199
+ delete process.env.LITTLE_CODER_THINKING_BUDGET;
200
+ });
201
+
202
+ it("a profile budget wins over the env budget", async () => {
203
+ process.env.LITTLE_CODER_THINKING_BUDGET = "10";
204
+ const h = makeHarness();
205
+ setupExtension(h.pi as any);
206
+ await fire(h.pi, "session_start", {}, h.ctx);
207
+ await fire(h.pi, "agent_start", {}, h.ctx);
208
+ // profile budget 100 tokens (~350 chars) overrides env's 10.
209
+ await fire(
210
+ h.pi,
211
+ "before_agent_start",
212
+ { systemPromptOptions: { littleCoder: { thinkingBudget: 100 } } },
213
+ h.ctx,
170
214
  );
171
- order.push("after-call");
172
- await turnEndPromise;
173
- order.push("after-await");
174
-
175
- // After-call comes first (sync), then the setImmediate marker fires
176
- // (because turn_end yielded), then we resume after the await.
177
- expect(order[0]).toBe("after-call");
178
- // marker must appear before resolve completes
179
- expect(order).toContain("setImmediate-marker");
180
- expect(pi.followUps.length).toBe(1);
215
+ await fire(h.pi, "turn_start", {}, h.ctx);
216
+ // 200 chars ≈ 58 tokens — under the 100-token profile budget, over env's 10.
217
+ await fire(h.pi, "message_update", thinkingDelta("x".repeat(200)), h.ctx);
218
+ expect(h.calls).toEqual([]);
181
219
  });
182
220
  });
@@ -1,53 +1,118 @@
1
1
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
2
+ import { harnessIntervention } from "../_shared/intervention.ts";
3
+
4
+ // pi's thinking-level union (not re-exported from the package root). Mirrors
5
+ // settings-manager's ThinkingLevel; structurally assignable to pi's own type.
6
+ type ThinkingLevel = "off" | "minimal" | "low" | "medium" | "high" | "xhigh";
2
7
 
3
8
  // Port of the thinking-budget cap + partial-trace reuse logic from
4
9
  // providers.py. little-coder's Python implementation aborts the stream
5
10
  // mid-flight when thinking tokens cross the budget, re-injects the partial
6
11
  // trace as assistant context, and retries with thinking disabled. Pi's
7
- // AgentSession doesn't expose mid-stream abort-and-replace, so we implement
8
- // the between-turn fallback documented in the plan:
12
+ // AgentSession doesn't expose mid-stream abort-and-replace, so we approximate
13
+ // it: count thinking tokens, and on breach disable thinking + queue a
14
+ // commit-to-an-implementation nudge, then abort the over-long turn.
9
15
  //
10
- // 1. Count thinking_delta tokens during message_update
11
- // 2. On budget exceed, call ctx.abort() to end the turn
12
- // 3. On turn_end after abort, flip thinking level to "off" and queue a
13
- // correction follow-up nudging the model to commit to an implementation
16
+ // ── Issue #8, second reproduction (1.4.3) ───────────────────────────────────
17
+ // The v1.0.0 fix deferred the recovery (`setThinkingLevel("off")` +
18
+ // `sendUserMessage`) to a `turn_end` handler, after a `setImmediate` yield, and
19
+ // ran it against the module-scope `pi`. But `ctx.abort()` makes pi's `agent_end`
20
+ // run auto-retry / auto-compaction (both enabled in .pi/settings.json), which
21
+ // REPLACES the session (dispose() → ExtensionRunner.invalidate()). The
22
+ // setImmediate yield is exactly what let that replacement land *before* the
23
+ // deferred recovery, so the recovery touched a now-stale `pi` and threw
24
+ // ("This extension ctx is stale after session replacement or reload"). Net
25
+ // effect: thinking was never turned off (the next step kept thinking) and the
26
+ // follow-up never reached the model (the agent appeared to stop) — the #8
27
+ // symptom, on a different mechanism than the original.
14
28
  //
15
- // The behavioral effect matches the whitepaper's claim that the budget cap
16
- // "forces the model out of open-ended deliberation and back into committing
17
- // to an implementation"the concrete savings of preserving the partial
18
- // trace are lost, but the commit-to-action pressure is the same.
29
+ // Fix: do the whole recovery SYNCHRONOUSLY inside `message_update`, BEFORE
30
+ // `ctx.abort()`, while `pi` is still live and the session hasn't been replaced.
31
+ // No `turn_end` handler, no `setImmediate` nothing runs against a stale ref.
19
32
  //
20
- // Idempotency notes (issue #8 fix):
21
- // - State is reset on `agent_start` AND `turn_start` so a previous run
22
- // leaving `aborted=true` cannot leak into the next conversation.
23
- // - `recoveryPending` gates re-entry: while a recovery is mid-flight,
24
- // message_update / turn_start cannot re-arm the abort.
25
- // - The recovery sequence yields one tick (setImmediate) so pi's async
26
- // abort barrier settles before we queue the follow-up message; without
27
- // this, fast-streaming local backends drop the follow-up silently and
28
- // the agent appears to stop.
33
+ // 1. Count thinking_delta tokens during message_update.
34
+ // 2. On breach: capture the current thinking level, flip thinking to "off",
35
+ // queue the commit nudge as a follow-up, surface one harness-intervention
36
+ // line, THEN ctx.abort().
37
+ // 3. Keep thinking off across the restart turn(s): `forcedOff` re-asserts
38
+ // "off" on every turn_start until the user submits a genuinely new prompt
39
+ // (`input` event), at which point the prior level is restored so the next
40
+ // task can think again. (A new task should not inherit "off" just because
41
+ // a previous one over-thought.)
29
42
 
30
- const DEFAULT_BUDGET = 2048;
43
+ const DEFAULT_BUDGET = 4096;
31
44
 
32
- // Per-run rolling state (reset on agent_start)
45
+ // Per-run rolling state.
33
46
  let thinkingChars = 0;
34
47
  let budgetForTurn = DEFAULT_BUDGET;
35
48
  let aborted = false;
36
- let recoveryPending = false;
49
+ // True from a budget breach until the next genuine user input. While set, we
50
+ // re-assert thinking "off" at the start of every turn so the restart turn (and
51
+ // any follow-on turns of the same task) can't silently come back with thinking
52
+ // re-enabled by the post-replacement profile resolution.
53
+ let forcedOff = false;
54
+ // The thinking level in effect when we first forced it off, restored on the
55
+ // next user input so a new task is unaffected.
56
+ let priorLevel: ThinkingLevel | undefined;
37
57
 
38
58
  function charsToTokens(chars: number): number {
39
59
  // Matches local/context_manager.estimate_tokens (len/3.5)
40
60
  return Math.ceil(chars / 3.5);
41
61
  }
42
62
 
63
+ // setThinkingLevel / getThinkingLevel are guarded: a stale-ctx throw must never
64
+ // escape (pi reports an uncaught extension throw as a hard "Extension error"),
65
+ // and older SDK builds may lack the getter.
66
+ function safeGetThinkingLevel(pi: ExtensionAPI): ThinkingLevel | undefined {
67
+ try {
68
+ return typeof pi.getThinkingLevel === "function" ? pi.getThinkingLevel() : undefined;
69
+ } catch {
70
+ return undefined;
71
+ }
72
+ }
73
+
74
+ function safeSetThinkingLevel(pi: ExtensionAPI, level: ThinkingLevel): void {
75
+ try {
76
+ pi.setThinkingLevel(level);
77
+ } catch {
78
+ // Stale ctx / unsupported — leave the level alone rather than crash the run.
79
+ }
80
+ }
81
+
43
82
  export default function (pi: ExtensionAPI) {
44
- // Hard reset between conversations. agent_start fires once per /run; if a
45
- // previous run aborted, `aborted` and `recoveryPending` would otherwise
46
- // leak into the next conversation.
83
+ // A new session (startup, /clear, resume, reload) is a clean slate — clear
84
+ // everything, including the forced-off window. The recovery restart does NOT
85
+ // fire session_start (it's a follow-up within the same session), so this
86
+ // never clobbers the re-assertion. Also stops module-scoped state leaking
87
+ // across sessions in-process.
88
+ pi.on("session_start", async () => {
89
+ thinkingChars = 0;
90
+ aborted = false;
91
+ forcedOff = false;
92
+ priorLevel = undefined;
93
+ });
94
+
95
+ // Hard reset of per-turn counters between agent runs. `forcedOff` /
96
+ // `priorLevel` are deliberately NOT reset here: agent_start ALSO fires for
97
+ // the recovery restart turn, and clearing the force there would let thinking
98
+ // come straight back on — exactly the bug. They are cleared on `input`
99
+ // (a genuinely new user task) or `session_start`.
47
100
  pi.on("agent_start", async () => {
48
101
  thinkingChars = 0;
49
102
  aborted = false;
50
- recoveryPending = false;
103
+ });
104
+
105
+ // A genuinely new user prompt ends the "forced off" window: restore the
106
+ // level the user actually had before the breach. Programmatic follow-ups
107
+ // (our nudge) do not emit an `input` event, so the restart turn stays off.
108
+ pi.on("input", async () => {
109
+ if (forcedOff) {
110
+ if (priorLevel !== undefined) safeSetThinkingLevel(pi, priorLevel);
111
+ forcedOff = false;
112
+ priorLevel = undefined;
113
+ }
114
+ thinkingChars = 0;
115
+ aborted = false;
51
116
  });
52
117
 
53
118
  pi.on("before_agent_start", async (event) => {
@@ -63,9 +128,11 @@ export default function (pi: ExtensionAPI) {
63
128
 
64
129
  pi.on("turn_start", async () => {
65
130
  thinkingChars = 0;
66
- // Don't clear `aborted` if a recovery is mid-flight — the recovery
67
- // turn_end handler clears it once the follow-up has been queued.
68
- if (!recoveryPending) aborted = false;
131
+ aborted = false;
132
+ // Re-assert "off" for the restart turn (and any follow-on turns of the same
133
+ // task). After the session replacement triggered by the abort, the new
134
+ // run can otherwise resolve thinking back to the profile default.
135
+ if (forcedOff) safeSetThinkingLevel(pi, "off");
69
136
  });
70
137
 
71
138
  pi.on("message_update", async (event, ctx) => {
@@ -74,32 +141,31 @@ export default function (pi: ExtensionAPI) {
74
141
  if (ev.type !== "thinking_delta") return;
75
142
  const delta = typeof ev.delta === "string" ? ev.delta : "";
76
143
  thinkingChars += delta.length;
77
- if (aborted || recoveryPending) return;
144
+ if (aborted) return;
78
145
  const tokens = charsToTokens(thinkingChars);
79
- if (tokens > budgetForTurn) {
80
- aborted = true;
81
- recoveryPending = true;
82
- ctx.ui.notify(
83
- `thinking-budget: ${tokens} > ${budgetForTurn} — aborting turn, will retry with thinking off`,
84
- "warning",
146
+ if (tokens <= budgetForTurn) return;
147
+
148
+ // Breach. Do the entire recovery now, while `pi` is still live — BEFORE
149
+ // ctx.abort() triggers the session replacement that would make `pi` stale.
150
+ aborted = true;
151
+ if (!forcedOff) {
152
+ priorLevel = safeGetThinkingLevel(pi);
153
+ forcedOff = true;
154
+ }
155
+ safeSetThinkingLevel(pi, "off");
156
+ try {
157
+ pi.sendUserMessage(
158
+ "[thinking budget exceeded] Please commit to an implementation now. " +
159
+ "Stop deliberating and use your tools to make progress.",
160
+ { deliverAs: "followUp" },
85
161
  );
86
- ctx.abort();
162
+ } catch {
163
+ // SDK without sendUserMessage — abort still forces the turn to end.
87
164
  }
88
- });
89
-
90
- pi.on("turn_end", async (_event, _ctx) => {
91
- if (!recoveryPending) return;
92
- // Yield one tick so pi's abort barrier settles before we queue the
93
- // follow-up. On fast-streaming local backends (qwen3.6 / llama.cpp)
94
- // queuing immediately after ctx.abort() drops the follow-up silently
95
- // and the agent appears to stop with no message — issue #8.
96
- await new Promise<void>((r) => setImmediate(r));
97
- pi.setThinkingLevel("off");
98
- pi.sendUserMessage(
99
- "[thinking budget exceeded] Please commit to an implementation now. Stop deliberating and use your tools to make progress.",
100
- { deliverAs: "followUp" },
165
+ harnessIntervention(
166
+ ctx,
167
+ "the model has thought long enough — forcing it to start implementing.",
101
168
  );
102
- recoveryPending = false;
103
- aborted = false;
169
+ ctx.abort();
104
170
  });
105
171
  }
@@ -1,4 +1,5 @@
1
1
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
2
+ import { harnessIntervention } from "../_shared/intervention.ts";
2
3
 
3
4
  // Port of agent.py's max_turns early-break. Counts turn_start events per
4
5
  // agent_start span; when the count exceeds LITTLE_CODER_MAX_TURNS (or the
@@ -27,9 +28,9 @@ export default function (pi: ExtensionAPI) {
27
28
  if (capForRun <= 0) return;
28
29
  turnsThisRun++;
29
30
  if (turnsThisRun > capForRun) {
30
- ctx.ui.notify(
31
- `turn-cap: reached max_turns=${capForRun}, aborting`,
32
- "warning",
31
+ harnessIntervention(
32
+ ctx,
33
+ `the model hit the turn limit (${capForRun}) — stopping the run.`,
33
34
  );
34
35
  ctx.abort();
35
36
  }