@chances-ai/engine 25.0.0 → 27.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/dist/ai/adapters/ai-sdk-stream.d.ts.map +1 -1
  2. package/dist/ai/adapters/ai-sdk-stream.js +6 -1
  3. package/dist/ai/adapters/ai-sdk-stream.js.map +1 -1
  4. package/dist/ai/index.d.ts +1 -0
  5. package/dist/ai/index.d.ts.map +1 -1
  6. package/dist/ai/index.js +1 -0
  7. package/dist/ai/index.js.map +1 -1
  8. package/dist/ai/overflow.d.ts +40 -0
  9. package/dist/ai/overflow.d.ts.map +1 -0
  10. package/dist/ai/overflow.js +84 -0
  11. package/dist/ai/overflow.js.map +1 -0
  12. package/dist/ai/types.d.ts +8 -1
  13. package/dist/ai/types.d.ts.map +1 -1
  14. package/dist/core/engine.d.ts +205 -10
  15. package/dist/core/engine.d.ts.map +1 -1
  16. package/dist/core/engine.js +539 -272
  17. package/dist/core/engine.js.map +1 -1
  18. package/dist/core/index.d.ts +1 -1
  19. package/dist/core/index.d.ts.map +1 -1
  20. package/dist/core/index.js.map +1 -1
  21. package/dist/core/task-tool.d.ts.map +1 -1
  22. package/dist/core/task-tool.js +6 -0
  23. package/dist/core/task-tool.js.map +1 -1
  24. package/dist/session/index.d.ts +11 -0
  25. package/dist/session/index.d.ts.map +1 -1
  26. package/dist/session/index.js +22 -1
  27. package/dist/session/index.js.map +1 -1
  28. package/dist/tools/bash-readonly.d.ts +26 -0
  29. package/dist/tools/bash-readonly.d.ts.map +1 -0
  30. package/dist/tools/bash-readonly.js +130 -0
  31. package/dist/tools/bash-readonly.js.map +1 -0
  32. package/dist/tools/builtins/bash.d.ts.map +1 -1
  33. package/dist/tools/builtins/bash.js +12 -0
  34. package/dist/tools/builtins/bash.js.map +1 -1
  35. package/dist/tools/builtins/edit.d.ts.map +1 -1
  36. package/dist/tools/builtins/edit.js +18 -12
  37. package/dist/tools/builtins/edit.js.map +1 -1
  38. package/dist/tools/builtins/todo.d.ts +33 -0
  39. package/dist/tools/builtins/todo.d.ts.map +1 -0
  40. package/dist/tools/builtins/todo.js +245 -0
  41. package/dist/tools/builtins/todo.js.map +1 -0
  42. package/dist/tools/builtins/write.d.ts.map +1 -1
  43. package/dist/tools/builtins/write.js +10 -5
  44. package/dist/tools/builtins/write.js.map +1 -1
  45. package/dist/tools/concurrency.d.ts +37 -0
  46. package/dist/tools/concurrency.d.ts.map +1 -0
  47. package/dist/tools/concurrency.js +50 -0
  48. package/dist/tools/concurrency.js.map +1 -0
  49. package/dist/tools/file-lock.d.ts +22 -0
  50. package/dist/tools/file-lock.d.ts.map +1 -0
  51. package/dist/tools/file-lock.js +85 -0
  52. package/dist/tools/file-lock.js.map +1 -0
  53. package/dist/tools/index.d.ts +4 -0
  54. package/dist/tools/index.d.ts.map +1 -1
  55. package/dist/tools/index.js +4 -0
  56. package/dist/tools/index.js.map +1 -1
  57. package/dist/tools/types.d.ts +31 -0
  58. package/dist/tools/types.d.ts.map +1 -1
  59. package/dist/tools/types.js.map +1 -1
  60. package/package.json +3 -3
@@ -1,24 +1,57 @@
1
- import { AppError, ErrorCode, ModelSelection, createId, runWithCwd, } from "@chances-ai/runtime";
1
+ import { AppError, ErrorCode, ModelSelection, createId, runConcurrent, runWithCwd, } from "@chances-ai/runtime";
2
2
  import { refreshActiveMarker } from "./worktree/index.js";
3
- import { classifyProviderError, defaultRetryConfig, estimateCost, } from "../ai/index.js";
4
- import { ASK_USER_QUESTION_TOOL_NAME, READONLY_CATEGORIES } from "../tools/index.js";
5
- /**
6
- * (3.5 codex Round-1 SHOULD-FIX #4) Anthropic-only overflow
7
- * detection. Three patterns from pi's overflow catalogue
8
- * (`pi/packages/ai/src/utils/overflow.ts:11`). The other 10
9
- * providers we ship stay deferred until real-world telemetry shows
10
- * a hit; their error shapes are less stable and a wrong regex would
11
- * surface as silent overflow loops.
12
- */
13
- function isAnthropicOverflowError(adapterId, message) {
14
- if (adapterId !== "anthropic")
15
- return false;
16
- return (/prompt is too long/i.test(message) ||
17
- /request_too_large/i.test(message) ||
18
- /maximum.*context.*length/i.test(message));
3
+ import { classifyProviderError, defaultRetryConfig, estimateCost, isContextOverflow, } from "../ai/index.js";
4
+ import { ASK_USER_QUESTION_TOOL_NAME, READONLY_CATEGORIES, TODO_TOOL_NAME, isCallConcurrencySafe, partitionToolCalls, } from "../tools/index.js";
5
+ /** (7.7 §6) Soft turn ceiling — raised from the old 12 (too low for a
6
+ * daily-driver agent). At this many tool iterations without a final answer the
7
+ * interactive engine PAUSES gracefully ("continue?") rather than throwing. The
8
+ * real runaway guard is the token/compaction machinery + the absolute hard cap
9
+ * below, not a low turn count. */
10
+ export const DEFAULT_MAX_TURNS = 50;
11
+ /** (7.7 §6) Absolute runaway backstop. Even a caller that sets a huge `maxTurns`
12
+ * can't loop past this; hitting it always errors (`hard-cap`), never a silent
13
+ * pause. Env-tunable. */
14
+ function hardMaxTurns() {
15
+ const raw = Number.parseInt(process.env.CHANCES_HARD_MAX_TURNS ?? "", 10);
16
+ return Number.isFinite(raw) && raw > 0 ? raw : 500;
17
+ }
18
+ /** (7.7 §3.3) Max tool calls that run concurrently within one parallel batch.
19
+ * Env-tunable (claude-code's `CLAUDE_CODE_MAX_TOOL_USE_CONCURRENCY` analog);
20
+ * defaults to 10. A non-numeric / non-positive value falls back to 10. */
21
+ function maxToolConcurrency() {
22
+ const raw = Number.parseInt(process.env.CHANCES_MAX_TOOL_CONCURRENCY ?? "", 10);
23
+ return Number.isFinite(raw) && raw > 0 ? raw : 10;
24
+ }
25
+ /** (7.7 §3.5) Aggregate-size budget for one batch's tool results, in chars.
26
+ * Env-tunable; defaults to 200_000 (claude-code MAX_TOOL_RESULTS_PER_MESSAGE_CHARS). */
27
+ function batchResultBudget() {
28
+ const raw = Number.parseInt(process.env.CHANCES_MAX_BATCH_RESULT_CHARS ?? "", 10);
29
+ return Number.isFinite(raw) && raw > 0 ? raw : 200_000;
30
+ }
31
+ /** (7.7 §3.5) When the combined size of a batch's results exceeds the budget,
32
+ * truncate the LARGEST results (in place) until under budget, leaving a note.
33
+ * Bounds a fan-out of parallel reads from blowing the next request's context in
34
+ * one user message. Mutates the passed map's value `.output` fields. */
35
+ function applyBatchResultBudget(resultByCall) {
36
+ const budget = batchResultBudget();
37
+ let total = 0;
38
+ for (const r of resultByCall.values())
39
+ total += r.output.length;
40
+ if (total <= budget)
41
+ return;
42
+ const bySize = [...resultByCall.values()].sort((a, b) => b.output.length - a.output.length);
43
+ for (const r of bySize) {
44
+ if (total <= budget)
45
+ break;
46
+ const over = total - budget;
47
+ const keep = Math.max(2048, r.output.length - over);
48
+ if (keep >= r.output.length)
49
+ continue;
50
+ const omitted = r.output.length - keep;
51
+ r.output = `${r.output.slice(0, keep)}\n[…${omitted} chars truncated — batch result budget (${budget}) exceeded]`;
52
+ total -= omitted;
53
+ }
19
54
  }
20
- /** Engine default when no caller-supplied or config-supplied value applies. */
21
- export const DEFAULT_MAX_TURNS = 12;
22
55
  /** Default base prompt the engine uses when no `systemBaseOverride` is set.
23
56
  * Exported so tests can assert "is this the default or an agent override?" and
24
57
  * so the doc + plugin authors can read the exact text. */
@@ -56,6 +89,69 @@ export class AgentEngine {
56
89
  getSelection() {
57
90
  return this.selection;
58
91
  }
92
+ /**
93
+ * (7.7 §4) Queue a user steering message to be injected at the next turn
94
+ * boundary of the in-flight turn (or the top of the next turn if idle). A
95
+ * no-op when no `steering` queue was provided. The CLI / serve driver call
96
+ * this on a submit-while-busy instead of rejecting the input.
97
+ */
98
+ enqueueSteering(text) {
99
+ this.opts.steering?.enqueue(text);
100
+ }
101
+ /** (7.7 §4) Peek the steering queue for entries not yet injected this turn,
102
+ * render each as a user message, and append its id to `injectedSteerIds` for
103
+ * post-persist ack. Peek-not-drain: a cancelled turn re-delivers. */
104
+ drainSteering(injectedSteerIds) {
105
+ const seen = new Set(injectedSteerIds);
106
+ const out = [];
107
+ for (const e of this.opts.steering?.peek() ?? []) {
108
+ if (seen.has(e.id))
109
+ continue;
110
+ out.push({ role: "user", content: [{ type: "text", text: e.text }] });
111
+ injectedSteerIds.push(e.id);
112
+ }
113
+ return out;
114
+ }
115
+ /** (7.7 §4) Iteration-boundary drain: background-task notifications that
116
+ * arrived mid-turn (combined render, same as turn-top) PLUS steering. Both
117
+ * peek-not-drain; ids recorded for post-persist ack. */
118
+ drainBoundaryInjections(injectedNotifIds, injectedSteerIds) {
119
+ const out = [];
120
+ const fresh = (this.opts.backgroundTasks?.peekPendingNotifications() ?? []).filter((n) => !injectedNotifIds.has(n.taskId));
121
+ if (fresh.length > 0) {
122
+ out.push({
123
+ role: "user",
124
+ content: [{ type: "text", text: fresh.map(renderTaskNotificationXml).join("\n") }],
125
+ });
126
+ for (const n of fresh)
127
+ injectedNotifIds.add(n.taskId);
128
+ }
129
+ out.push(...this.drainSteering(injectedSteerIds));
130
+ return out;
131
+ }
132
+ /** (7.7 §5.3) Build an incomplete-todos reminder when the model stopped with
133
+ * open (pending/in_progress) todos and the per-turn cap isn't exhausted.
134
+ * Returns a `user`-role system-reminder message (no `developer` role exists),
135
+ * or null to let the turn resolve. */
136
+ maybeTodoReminder(count) {
137
+ const max = this.opts.todoReminderMax ?? 3;
138
+ if (max <= 0 || count >= max)
139
+ return null;
140
+ const open = [];
141
+ for (const p of this.opts.session.getTodoPhases()) {
142
+ for (const t of p.tasks) {
143
+ if (t.status === "pending" || t.status === "in_progress")
144
+ open.push(t.content);
145
+ }
146
+ }
147
+ if (open.length === 0)
148
+ return null;
149
+ const list = open.map((c) => `"${c}"`).join(", ");
150
+ const text = `<system-reminder>You stopped with ${open.length} incomplete todo item(s): ${list}. ` +
151
+ `Continue working through them, or mark each done/abandoned with the todo tool. ` +
152
+ `(Reminder ${count + 1}/${max})</system-reminder>`;
153
+ return { role: "user", content: [{ type: "text", text }] };
154
+ }
59
155
  /** Bus-emit wrapper. Three responsibilities (3.4):
60
156
  * 1. Suppress lifecycle frames (`turn:*`, `error`) when the engine is a
61
157
  * child (`suppressLifecycleEvents=true`). Codex Round-1 MUST-FIX #2.
@@ -115,7 +211,7 @@ export class AgentEngine {
115
211
  return this.runTurnImpl(prompt, token, opts.expandMentions !== false, opts.trustedContext);
116
212
  }
117
213
  async runTurnImpl(prompt, token, expandMentions, trustedContext) {
118
- const { router, tools, gate, session, plugins, backgroundTasks } = this.opts;
214
+ const { tools, session, plugins, backgroundTasks } = this.opts;
119
215
  const turnId = createId("turn");
120
216
  // (3.6) Carry the active session id on `turn:start` so the OTel
121
217
  // exporter can stamp `chances.gen_ai.session.id` correctly across
@@ -174,248 +270,41 @@ export class AgentEngine {
174
270
  turnMessages.push({ role: "user", content: [{ type: "text", text: trustedContext }] });
175
271
  }
176
272
  turnMessages.push({ role: "user", content: [{ type: "text", text: prompt }] });
273
+ // (7.7 §4) Drain any steering queued before this turn started, plus track
274
+ // which notifications/steering ids have been injected so the iteration-
275
+ // boundary drain (and the post-persist ack) don't double-count.
276
+ const injectedNotifIds = new Set(notificationIds);
277
+ const injectedSteerIds = [];
278
+ turnMessages.push(...this.drainSteering(injectedSteerIds));
177
279
  const result = { text: "", inputTokens: 0, outputTokens: 0, costUsd: 0 };
178
- const maxTurns = this.opts.maxTurns ?? this.opts.maxIterations ?? DEFAULT_MAX_TURNS;
179
- let resolved = false;
180
- // (3.5 codex Round-1 MUST-FIX #1) `result.inputTokens` aggregates
181
- // every `usage` event across the multi-step tool loop. The compactor's
182
- // threshold check needs the LAST stream's input only — that's what
183
- // the provider will count for the NEXT request, plus the new user
184
- // prompt. Tracked separately here; emitted via `usage:turn`.
185
- let lastRequestInputTokens = 0;
186
- // (3.5 codex Round-1 SHOULD-FIX #4) Per-turn flag. Anthropic
187
- // overflow recovery fires AT MOST ONCE per turn a second 413 after
188
- // we already compacted is an actual ceiling we can't paper over.
189
- let recoveredFromOverflow = false;
190
- // (3.5) Tracked at the outer scope so the post-turn compaction check
191
- // can read `route.model`. The for-loop reuses the variable across
192
- // iterations; we just need the most recent value to query the model
193
- // descriptor for `contextWindow`.
194
- let lastRoute;
195
- for (let i = 0; i < maxTurns; i++) {
196
- token.throwIfCancelled();
197
- // Re-read selection per turn so a `/model` switch between turns lands on
198
- // the next request without rebuilding the engine.
199
- const choice = this.selection.get();
200
- const route = router.pick({
201
- preferredModel: choice.model,
202
- preferredProvider: choice.provider,
203
- needsTools: toolDefs.length > 0,
204
- });
205
- lastRoute = route;
206
- const retry = this.opts.retry ?? defaultRetryConfig;
207
- let textBuffer = "";
208
- let calls = [];
209
- let attempt = 0;
210
- while (true) {
211
- token.throwIfCancelled();
212
- textBuffer = "";
213
- calls = [];
214
- // (3.5) Reset per attempt — only the LAST successful stream's
215
- // last `usage.inputTokens` carries forward into the post-turn
216
- // compactor check.
217
- let attemptLastInputTokens = 0;
218
- // (6.5b review) Stage usage in attempt-local accumulators instead of
219
- // folding it straight into the turn-level `result`. A retryable
220
- // mid-stream error (e.g. ECONNRESET after a partial stream) discards
221
- // the attempt and restreams; folding here would double-count tokens
222
- // and double-emit `usage`. We only merge into `result` + emit once
223
- // the stream completes (`streamError === null`, below).
224
- let attemptInputTokens = 0;
225
- let attemptOutputTokens = 0;
226
- let attemptCostUsd = 0;
227
- let streamError = null;
228
- const stream = route.adapter.stream({ model: route.model.id, system, messages: [...session.messages(), ...turnMessages], tools: toolDefs }, token.signal);
229
- for await (const event of stream) {
230
- // Enforce cancellation per-event so a provider that ignores or
231
- // queues past the AbortSignal can't keep dripping text/tool-calls
232
- // into a turn the user already abandoned. Particularly important
233
- // for subagents: the parent's abort must stop the child instantly,
234
- // not wait until the child stream naturally ends.
235
- token.throwIfCancelled();
236
- switch (event.type) {
237
- case "text-delta":
238
- textBuffer += event.text;
239
- this.emit({ type: "assistant:delta", turnId, text: event.text });
240
- break;
241
- case "tool-call":
242
- // Defer the `tool:call` bus emit until the execution loop
243
- // below — pairs each emit atomically with its matching
244
- // `tool:result`. Emitting here would leave orphan call frames
245
- // on the bus whenever the turn aborts between stream-end and
246
- // tool execution (Ctrl-C) or a retry attempt discards the
247
- // collected calls and tries again on attempt N+1.
248
- calls.push(event.call);
249
- break;
250
- case "usage": {
251
- const costUsd = estimateCost(route.model, event.usage);
252
- // (6.5b review) Accumulate into attempt-local totals; the merge
253
- // into `result` + the `usage` emit happen once the stream
254
- // succeeds, so a discarded retry attempt can't double-count.
255
- attemptInputTokens += event.usage.inputTokens;
256
- attemptOutputTokens += event.usage.outputTokens;
257
- attemptCostUsd += costUsd;
258
- // (3.5) Track most recent stream's last input count for the
259
- // post-turn compaction threshold check. NOT the aggregate.
260
- attemptLastInputTokens = event.usage.inputTokens;
261
- break;
262
- }
263
- case "error":
264
- // Defer the bus emit until after cancellation check — if the
265
- // user just hit Ctrl-C, the SDK's abort path surfaces as a
266
- // stream error and we shouldn't shout "PROVIDER" at them.
267
- streamError = event.message;
268
- break;
269
- case "done":
270
- break;
271
- }
272
- if (streamError !== null)
273
- break;
274
- }
275
- if (streamError === null) {
276
- // Stream completed successfully — NOW fold this attempt's usage
277
- // into the turn-level `result` and emit the (aggregated) `usage`
278
- // frame. Deferred to here so a discarded retry attempt's partial
279
- // usage never double-counts (6.5b review).
280
- result.inputTokens += attemptInputTokens;
281
- result.outputTokens += attemptOutputTokens;
282
- result.costUsd += attemptCostUsd;
283
- if (attemptInputTokens > 0 || attemptOutputTokens > 0 || attemptCostUsd > 0) {
284
- this.emit({
285
- type: "usage",
286
- model: route.model.id,
287
- inputTokens: attemptInputTokens,
288
- outputTokens: attemptOutputTokens,
289
- costUsd: attemptCostUsd,
290
- });
291
- }
292
- // Persist this attempt's last input-token count for the post-turn
293
- // compaction check.
294
- lastRequestInputTokens = attemptLastInputTokens;
295
- break;
296
- }
297
- // If the abort signal fired during the stream, the error we just
298
- // captured is the SDK reacting to the cancellation — treat it as
299
- // Cancelled rather than misclassifying as a provider error.
300
- token.throwIfCancelled();
301
- const decision = classifyProviderError(streamError);
302
- const terminal = !decision.retryable || attempt >= retry.delaysMs.length;
303
- if (terminal) {
304
- // (3.5 — codex Round-1 SHOULD-FIX #4) Anthropic-only reactive
305
- // overflow recovery. Catches the 413 BEFORE the terminal throw,
306
- // runs compaction with `reason: "overflow"` (bypasses circuit
307
- // breaker), and retries the stream once with the compacted
308
- // history. Wider 10-provider catalogue stays deferred until
309
- // telemetry shows a real-world miss.
310
- if (this.opts.compactor &&
311
- !recoveredFromOverflow &&
312
- isAnthropicOverflowError(route.adapter.id, streamError)) {
313
- recoveredFromOverflow = true;
314
- try {
315
- const recovery = await this.opts.compactor.compact("overflow", token.signal);
316
- if (recovery.ok) {
317
- // Reset the attempt counter so we get the full retry
318
- // budget against the now-smaller request, AND clear the
319
- // accumulated message buffer that the failed attempt
320
- // wrote into `turnMessages`. The retry rebuilds from
321
- // `session.messages()` (which now reflects compaction)
322
- // plus this turn's prepended user/notification messages.
323
- // (6.5b follow-up) Mirror the normal retry path's partial-undo:
324
- // if this attempt streamed partial text before the overflow, drop
325
- // it so the post-compaction restream doesn't append onto a stale
326
- // partial. A 413 usually precedes any delta, so this is typically
327
- // a no-op — emitted only when text was actually shown.
328
- if (textBuffer.length > 0) {
329
- this.emit({ type: "assistant:reset", turnId });
330
- }
331
- attempt = 0;
332
- continue;
333
- }
334
- }
335
- catch (e) {
336
- // Compactor.compact never throws by contract, but be defensive:
337
- // a malformed Compactor implementation shouldn't break the
338
- // original error path.
339
- this.emit({
340
- type: "log",
341
- level: "warn",
342
- message: `overflow compaction unexpectedly threw: ${e.message ?? e}`,
343
- });
344
- }
345
- }
346
- // Emit the bus `error` ONLY when we're about to throw. Emitting on
347
- // every retry attempt would cause `runPrompt`'s `lastError`
348
- // listener to record a transient failure as the turn's exit code
349
- // even after a later attempt succeeded (codex re-review finding).
350
- // Subagent engines suppress this emit — see `suppressTerminalErrors`.
351
- if (!this.opts.suppressTerminalErrors) {
352
- this.emit({ type: "error", code: "PROVIDER", message: streamError });
353
- }
354
- throw new AppError(ErrorCode.Provider, `Provider error (${decision.reason}): ${streamError}`);
355
- }
356
- const delayMs = retry.delaysMs[attempt] ?? 0;
357
- this.emit({
358
- type: "log",
359
- level: "warn",
360
- message: `provider stream errored (${decision.reason}); retry ${attempt + 1}/${retry.delaysMs.length} after ${delayMs}ms; original: ${streamError}`,
361
- });
362
- // (6.5b review) If this attempt already streamed partial assistant text
363
- // to the bus, the upcoming restream would APPEND a fresh copy on top
364
- // (consumers don't replace) — duplicating it on screen. Tell consumers
365
- // to drop the in-flight partial first. `usage`/`tool-call` are deferred
366
- // (not yet on the bus), so only `textBuffer` needs undoing.
367
- if (textBuffer.length > 0) {
368
- this.emit({ type: "assistant:reset", turnId });
369
- }
370
- attempt += 1;
371
- await sleepCancellable(delayMs, token);
372
- }
373
- if (calls.length === 0) {
374
- const content = [{ type: "text", text: textBuffer }];
375
- turnMessages.push({ role: "assistant", content });
376
- result.text = textBuffer;
377
- this.emit({ type: "assistant:message", turnId, text: textBuffer });
378
- await safeRunHook(plugins, "afterResponse", { text: textBuffer }, this.opts.bus);
379
- resolved = true;
380
- break;
381
- }
382
- // Record the assistant message that requested the tools.
383
- const assistantContent = [];
384
- if (textBuffer)
385
- assistantContent.push({ type: "text", text: textBuffer });
386
- for (const call of calls) {
387
- assistantContent.push({ type: "tool-call", callId: call.callId, name: call.name, args: call.args });
388
- }
389
- turnMessages.push({ role: "assistant", content: assistantContent });
390
- // Execute each tool through the permission gate, then feed results back.
391
- for (const call of calls) {
392
- // Check cancellation before each tool — a long batch of tool-calls
393
- // from one model turn shouldn't keep running after the user aborts.
394
- token.throwIfCancelled();
395
- // Emit `tool:call` here (not in the stream loop) so each call is
396
- // paired with its `tool:result` from runTool — keeps the bus
397
- // observably balanced for subscribers (TUI, NDJSON, telemetry).
398
- this.emit({ type: "tool:call", callId: call.callId, name: call.name, args: call.args });
399
- const outcome = await this.runTool(call, token);
400
- turnMessages.push({
401
- role: "tool",
402
- content: [{ type: "tool-result", callId: call.callId, name: call.name, output: outcome.output, ok: outcome.ok }],
403
- });
404
- // Round 3 codex SHOULD-FIX: check cancellation AFTER each tool
405
- // result too. A tool that catches cancellation internally and
406
- // returns `ok:false` (e.g. `bash` returning `(cancelled)`) does
407
- // NOT re-throw, so without this check the loop would continue
408
- // to the next turn and could exhaust `maxTurns`, surfacing as
409
- // a misleading `PROVIDER: Reached maximum number of turns`
410
- // instead of the user's actual `Cancelled` intent.
411
- token.throwIfCancelled();
412
- }
413
- }
280
+ // (7.7 §6) Soft ceiling (default 50), clamped to the absolute hard cap so a
281
+ // huge caller-supplied `maxTurns` can't loop forever. `hitHardCap` records
282
+ // whether the loop will stop at the hard backstop (always errors) vs the
283
+ // soft ceiling (interactive pauses).
284
+ const softMax = this.opts.maxTurns ?? this.opts.maxIterations ?? DEFAULT_MAX_TURNS;
285
+ const hardMax = hardMaxTurns();
286
+ const maxTurns = Math.min(softMax, hardMax);
287
+ const hitHardCap = softMax > hardMax;
288
+ // (7.7 §3.8) Per-turn mutable accumulators (formerly a fistful of inline
289
+ // `let`s). The loop units fold usage / track overflow recovery / record the
290
+ // last route through this. `result` aliases `state.result`, read back below
291
+ // to finalize the turn.
292
+ const state = { result, lastRequestInputTokens: 0, recoveredFromOverflow: false };
293
+ // (7.7 §3.8) Hand the assembled turn to the hook-driven loop. The default
294
+ // hooks bind this engine's own units (stream / tool-batch / steering / todo
295
+ // reminder); task 08's coordinator can pass ALTERNATIVE hooks WITHOUT
296
+ // forking the loop body — that injectable seam is the Axis 3.8 deliverable.
297
+ const { resolved } = await this.runAgentLoop({ turnId, system, toolDefs, turnMessages, injectedNotifIds, injectedSteerIds, maxTurns, state, token }, this.defaultLoopHooks());
414
298
  session.appendTurn(turnMessages);
415
- // Codex Round-2 MUST-FIX #3: ack notifications AFTER appendTurn so a
416
- // cancellation between peek and persist leaves the queue intact.
417
- if (notificationIds.length > 0) {
418
- backgroundTasks?.acknowledgeNotifications(notificationIds);
299
+ // Codex Round-2 MUST-FIX #3: ack notifications + steering AFTER appendTurn so
300
+ // a cancellation between peek and persist leaves both queues intact (the
301
+ // next turn re-delivers). `injectedNotifIds` covers turn-top AND mid-turn
302
+ // notifications; `injectedSteerIds` covers all injected steering.
303
+ if (injectedNotifIds.size > 0) {
304
+ backgroundTasks?.acknowledgeNotifications([...injectedNotifIds]);
305
+ }
306
+ if (injectedSteerIds.length > 0) {
307
+ this.opts.steering?.acknowledge(injectedSteerIds);
419
308
  }
420
309
  // (3.5) Per-turn aggregate event. Lifecycle suppression honored —
421
310
  // child engines (with `suppressLifecycleEvents`) skip this too.
@@ -428,7 +317,7 @@ export class AgentEngine {
428
317
  inputTokens: result.inputTokens,
429
318
  outputTokens: result.outputTokens,
430
319
  costUsd: result.costUsd,
431
- lastRequestInputTokens,
320
+ lastRequestInputTokens: state.lastRequestInputTokens,
432
321
  });
433
322
  }
434
323
  // (3.5 — codex Round-1 MUST-FIX #4) Threshold-triggered compaction.
@@ -437,10 +326,10 @@ export class AgentEngine {
437
326
  // `compaction:start` / `compaction:end` frames inside this await.
438
327
  // The compactor itself swallows all failures into ok:false (never
439
328
  // throws by contract); `Cancelled` propagates as `cancelled` reason.
440
- if (this.opts.compactor && resolved && lastRoute) {
329
+ if (this.opts.compactor && resolved && state.lastRoute) {
441
330
  const should = this.opts.compactor.shouldCompact({
442
- lastRequestInputTokens,
443
- model: lastRoute.model,
331
+ lastRequestInputTokens: state.lastRequestInputTokens,
332
+ model: state.lastRoute.model,
444
333
  });
445
334
  if (should) {
446
335
  await this.opts.compactor.compact("threshold", token.signal);
@@ -462,20 +351,398 @@ export class AgentEngine {
462
351
  }
463
352
  this.emit({ type: "turn:end", turnId });
464
353
  if (!resolved) {
465
- // Loop exhausted the turn budget without the model returning a final
466
- // answer. Match claude-code's terminal-error pattern (`QueryEngine.ts:914`)
467
- // emit a bus error and throw so the caller sees a concrete signal
468
- // instead of an empty result. The turn is still persisted above so
469
- // `/resume` can pick up the partial work.
470
- // Subagent engines suppress this emit see `suppressTerminalErrors`.
354
+ // (7.7 §6) The loop exhausted its budget without a final answer. The turn
355
+ // is already persisted above, so `/resume` (or the next message / steering)
356
+ // picks up the partial work. Two outcomes:
357
+ // - SOFT pause (interactive default): emit `turn:paused` and return
358
+ // cleanly "reached max actions, continue?". No scary throw.
359
+ // - THROW: when the caller opted out of pausing (`pauseOnMaxTurns:false`,
360
+ // i.e. `-p`/serve/SDK automation) OR the absolute hard cap was hit.
361
+ // A precise `MaxTurns` code (not the misleading PROVIDER) → non-zero
362
+ // exit for automation.
363
+ const pause = (this.opts.pauseOnMaxTurns ?? true) && !hitHardCap;
364
+ if (pause) {
365
+ // (codex R2 MUST-FIX) Emit `turn:paused` ONLY on the actual pause — the
366
+ // throw path (automation / hard-cap) must NOT signal a "continue?"
367
+ // affordance it then contradicts with an error. Soft pause always means
368
+ // the soft ceiling (pause requires !hitHardCap), hence reason max-turns.
369
+ if (!this.opts.suppressLifecycleEvents) {
370
+ this.emit({ type: "turn:paused", turnId, reason: "max-turns", turnsTaken: maxTurns });
371
+ }
372
+ return result;
373
+ }
374
+ // Throw path: `-p`/serve/SDK automation, or the absolute hard cap. A
375
+ // precise `MaxTurns` error (not the misleading PROVIDER) → non-zero exit.
471
376
  const message = `Reached maximum number of turns (${maxTurns})`;
472
377
  if (!this.opts.suppressTerminalErrors) {
473
- this.emit({ type: "error", code: "PROVIDER", message });
378
+ this.emit({ type: "error", code: ErrorCode.MaxTurns, message });
474
379
  }
475
- throw new AppError(ErrorCode.Provider, message);
380
+ throw new AppError(ErrorCode.MaxTurns, message);
476
381
  }
477
382
  return result;
478
383
  }
384
+ /**
385
+ * (7.7 §3.8) The hook-driven turn loop — a thin orchestrator over
386
+ * {@link AgentLoopHooks}, mirroring pi's `runLoop`
387
+ * (`packages/agent/src/agent-loop.ts`). Each iteration streams one assistant
388
+ * response, executes its tool batch, then drains boundary injections (steering
389
+ * + mid-turn notifications) before the next stream. Returns whether the turn
390
+ * reached a final answer (`resolved`) or exhausted `maxTurns`.
391
+ *
392
+ * Behaviour is IDENTICAL to the pre-refactor inline loop — `defaultLoopHooks`
393
+ * binds the same units; the seam exists so task 08's coordinator can inject
394
+ * alternatives without copying this body.
395
+ */
396
+ async runAgentLoop(run, hooks) {
397
+ const { token, turnId } = run;
398
+ // (7.7 §5.3) Per-turn incomplete-todos reminder counter. Resets every
399
+ // runTurn (= every new user prompt), matching oh-my-pi.
400
+ let todoReminderCount = 0;
401
+ for (let i = 0; i < run.maxTurns; i++) {
402
+ token.throwIfCancelled();
403
+ // Re-read selection per turn so a `/model` switch between turns lands on
404
+ // the next request without rebuilding the engine.
405
+ const choice = this.selection.get();
406
+ const route = this.opts.router.pick({
407
+ preferredModel: choice.model,
408
+ preferredProvider: choice.provider,
409
+ needsTools: run.toolDefs.length > 0,
410
+ });
411
+ run.state.lastRoute = route;
412
+ const { text, calls } = await hooks.streamAssistant(route, run);
413
+ if (calls.length === 0) {
414
+ run.turnMessages.push({ role: "assistant", content: [{ type: "text", text }] });
415
+ run.state.result.text = text;
416
+ this.emit({ type: "assistant:message", turnId, text });
417
+ // (7.7 §5.3) If the model stopped with incomplete todos, nudge it to
418
+ // finish (capped). Inject a user-role reminder and continue the loop
419
+ // instead of resolving — boosts autonomous completion (oh-my-pi
420
+ // `#checkTodoCompletion`). The reminder rides the maxTurns budget.
421
+ const reminder = hooks.incompleteTodoReminder(todoReminderCount);
422
+ if (reminder) {
423
+ todoReminderCount += 1;
424
+ run.turnMessages.push(reminder);
425
+ continue;
426
+ }
427
+ await safeRunHook(this.opts.plugins, "afterResponse", { text }, this.opts.bus);
428
+ return { resolved: true };
429
+ }
430
+ // Record the assistant message that requested the tools.
431
+ const assistantContent = [];
432
+ if (text)
433
+ assistantContent.push({ type: "text", text });
434
+ for (const call of calls) {
435
+ assistantContent.push({ type: "tool-call", callId: call.callId, name: call.name, args: call.args });
436
+ }
437
+ run.turnMessages.push({ role: "assistant", content: assistantContent });
438
+ // (7.7 §3) Execute the batch (parallel safe-batch / serial unsafe;
439
+ // submission order preserved; all-settled on cancellation).
440
+ const batch = await hooks.executeToolBatch(calls, token, turnId);
441
+ run.turnMessages.push(...batch.messages);
442
+ // (7.7 §6.2) Model-signalled terminate: EVERY result asked to stop → end
443
+ // gracefully (transcript intact) instead of streaming another turn.
444
+ if (batch.terminate) {
445
+ run.state.result.text = text;
446
+ return { resolved: true };
447
+ }
448
+ // (7.7 §4) Iteration boundary: inject steering the user typed mid-turn +
449
+ // bg-task notifications that completed mid-turn, so they reach the model on
450
+ // the NEXT stream of this turn (claude-code mid-turn drain).
451
+ run.turnMessages.push(...hooks.getBoundaryMessages(run.injectedNotifIds, run.injectedSteerIds));
452
+ }
453
+ return { resolved: false };
454
+ }
455
+ /**
456
+ * (7.7 §3.8) The default loop hooks — an object binding the engine's own
457
+ * units. An object (not the methods passed directly) so the shape is a
458
+ * documented, swappable seam: task 08 supplies alternatives; a test can wrap a
459
+ * single hook to assert the loop dispatches through it.
460
+ */
461
+ defaultLoopHooks() {
462
+ return {
463
+ streamAssistant: (route, run) => this.streamAssistantResponse(route, run),
464
+ executeToolBatch: (calls, token, turnId) => this.executeToolBatch(calls, token, turnId),
465
+ getBoundaryMessages: (notifIds, steerIds) => this.drainBoundaryInjections(notifIds, steerIds),
466
+ incompleteTodoReminder: (count) => this.maybeTodoReminder(count),
467
+ };
468
+ }
469
+ /**
470
+ * (7.7 §3.8) Stream ONE assistant response — the careful inner unit (pi
471
+ * `streamAssistantResponse`). Owns: per-attempt classified retry with backoff,
472
+ * attempt-local usage staging (a discarded retry never double-counts),
473
+ * `assistant:reset` partial-undo, and provider-agnostic overflow recovery on
474
+ * BOTH the terminal-error path (a 413 before the throw) and the success path
475
+ * (z.ai silent truncation / MiMo length-stop). Folds usage into `state.result`
476
+ * and records `state.lastRequestInputTokens` once the stream completes. Throws
477
+ * `AppError(Provider|Cancelled)` on a terminal failure; otherwise returns the
478
+ * streamed text + requested tool calls.
479
+ */
480
+ async streamAssistantResponse(route, run) {
481
+ const { token, turnId, system, toolDefs, turnMessages, state } = run;
482
+ const { session } = this.opts;
483
+ const retry = this.opts.retry ?? defaultRetryConfig;
484
+ let textBuffer = "";
485
+ let calls = [];
486
+ let attempt = 0;
487
+ while (true) {
488
+ token.throwIfCancelled();
489
+ textBuffer = "";
490
+ calls = [];
491
+ // (3.5) Reset per attempt — only the LAST successful stream's last
492
+ // `usage.inputTokens` carries forward into the post-turn compactor check.
493
+ let attemptLastInputTokens = 0;
494
+ // (6.5b review) Stage usage in attempt-local accumulators instead of
495
+ // folding straight into the turn-level `result`. A retryable mid-stream
496
+ // error discards the attempt and restreams; folding here would
497
+ // double-count. Merge into `result` + emit once the stream completes.
498
+ let attemptInputTokens = 0;
499
+ let attemptOutputTokens = 0;
500
+ let attemptCostUsd = 0;
501
+ let streamError = null;
502
+ // (7.7 §7) The provider's finish reason (when surfaced) — drives the
503
+ // length-stop overflow signal on the success path.
504
+ let finishReason;
505
+ const stream = route.adapter.stream({ model: route.model.id, system, messages: [...session.messages(), ...turnMessages], tools: toolDefs }, token.signal);
506
+ for await (const event of stream) {
507
+ // Enforce cancellation per-event so a provider that ignores or queues
508
+ // past the AbortSignal can't keep dripping into a turn the user already
509
+ // abandoned. Critical for subagents: the parent's abort stops the child
510
+ // instantly, not when the child stream naturally ends.
511
+ token.throwIfCancelled();
512
+ switch (event.type) {
513
+ case "text-delta":
514
+ textBuffer += event.text;
515
+ this.emit({ type: "assistant:delta", turnId, text: event.text });
516
+ break;
517
+ case "tool-call":
518
+ // Defer the `tool:call` bus emit until the execution loop — pairs
519
+ // each emit atomically with its `tool:result`. Emitting here would
520
+ // orphan call frames whenever the turn aborts between stream-end and
521
+ // tool execution (Ctrl-C) or a retry discards the collected calls.
522
+ calls.push(event.call);
523
+ break;
524
+ case "usage": {
525
+ const costUsd = estimateCost(route.model, event.usage);
526
+ attemptInputTokens += event.usage.inputTokens;
527
+ attemptOutputTokens += event.usage.outputTokens;
528
+ attemptCostUsd += costUsd;
529
+ // (3.5) Track most recent stream's last input count for the post-turn
530
+ // compaction threshold check. NOT the aggregate.
531
+ attemptLastInputTokens = event.usage.inputTokens;
532
+ break;
533
+ }
534
+ case "error":
535
+ // Defer the bus emit until after the cancellation check — a Ctrl-C
536
+ // surfaces as a stream error and we shouldn't shout "PROVIDER".
537
+ streamError = event.message;
538
+ break;
539
+ case "done":
540
+ finishReason = event.finishReason;
541
+ break;
542
+ }
543
+ if (streamError !== null)
544
+ break;
545
+ }
546
+ if (streamError === null) {
547
+ // Stream completed — NOW fold this attempt's usage into the turn-level
548
+ // `result` and emit the aggregated `usage` frame (deferred so a discarded
549
+ // retry attempt's partial usage never double-counts).
550
+ state.result.inputTokens += attemptInputTokens;
551
+ state.result.outputTokens += attemptOutputTokens;
552
+ state.result.costUsd += attemptCostUsd;
553
+ if (attemptInputTokens > 0 || attemptOutputTokens > 0 || attemptCostUsd > 0) {
554
+ this.emit({
555
+ type: "usage",
556
+ model: route.model.id,
557
+ inputTokens: attemptInputTokens,
558
+ outputTokens: attemptOutputTokens,
559
+ costUsd: attemptCostUsd,
560
+ });
561
+ }
562
+ state.lastRequestInputTokens = attemptLastInputTokens;
563
+ // (7.7 §7) Success-path overflow: a NORMAL stop whose reported input
564
+ // exceeds the window (z.ai silently truncated the prompt), or a `length`
565
+ // stop with zero output (MiMo truncated input, no room to generate). Both
566
+ // mean the model never saw the full context — recover (compact +
567
+ // restream) once, exactly like the error path. A no-op for every healthy
568
+ // stream: `isContextOverflow` needs a real `contextWindow` AND input over
569
+ // it (or `length` + zero output).
570
+ const recovered = await this.maybeRecoverFromOverflow({
571
+ stopReason: finishReason === "length" ? "length" : "stop",
572
+ usage: { input: attemptLastInputTokens, output: attemptOutputTokens },
573
+ contextWindow: route.model.contextWindow,
574
+ }, route, textBuffer, turnId, state, token);
575
+ if (recovered) {
576
+ attempt = 0;
577
+ continue;
578
+ }
579
+ break;
580
+ }
581
+ // If the abort signal fired during the stream, the captured error is the
582
+ // SDK reacting to cancellation — treat as Cancelled, not a provider error.
583
+ token.throwIfCancelled();
584
+ const decision = classifyProviderError(streamError);
585
+ const terminal = !decision.retryable || attempt >= retry.delaysMs.length;
586
+ if (terminal) {
587
+ // (7.7 §7) Error-path overflow recovery: catch a 413 BEFORE the terminal
588
+ // throw, compact (bypasses the circuit breaker), and restream once with
589
+ // the now-smaller history. pi's full catalogue replaces the old
590
+ // Anthropic-only regex so any provider's overflow recovers the same way.
591
+ const recovered = await this.maybeRecoverFromOverflow({ stopReason: "error", errorMessage: streamError }, route, textBuffer, turnId, state, token);
592
+ if (recovered) {
593
+ attempt = 0;
594
+ continue;
595
+ }
596
+ // Emit the bus `error` ONLY when about to throw — emitting on every retry
597
+ // would let `runPrompt`'s `lastError` listener record a transient failure
598
+ // as the exit code even after a later attempt succeeded. Subagent engines
599
+ // suppress this (see `suppressTerminalErrors`).
600
+ if (!this.opts.suppressTerminalErrors) {
601
+ this.emit({ type: "error", code: "PROVIDER", message: streamError });
602
+ }
603
+ throw new AppError(ErrorCode.Provider, `Provider error (${decision.reason}): ${streamError}`);
604
+ }
605
+ const delayMs = retry.delaysMs[attempt] ?? 0;
606
+ this.emit({
607
+ type: "log",
608
+ level: "warn",
609
+ message: `provider stream errored (${decision.reason}); retry ${attempt + 1}/${retry.delaysMs.length} after ${delayMs}ms; original: ${streamError}`,
610
+ });
611
+ // (6.5b review) If this attempt streamed partial text, the upcoming
612
+ // restream APPENDS a fresh copy (consumers don't replace) — duplicating it
613
+ // on screen. Tell consumers to drop the in-flight partial first.
614
+ if (textBuffer.length > 0) {
615
+ this.emit({ type: "assistant:reset", turnId });
616
+ }
617
+ attempt += 1;
618
+ await sleepCancellable(delayMs, token);
619
+ }
620
+ return { text: textBuffer, calls };
621
+ }
622
+ /**
623
+ * (7.7 §7) Provider-agnostic reactive overflow recovery, shared by the
624
+ * terminal-error path and the success path. When `signal` reads as a context
625
+ * overflow AND a compactor is wired AND we haven't already recovered this turn,
626
+ * compact with `reason: "overflow"` (bypasses the circuit breaker) and report
627
+ * whether the caller should restream. Telemetry-gated: a non-Anthropic recovery
628
+ * emits a `log info` so a wrong pattern surfaces as an observable event, not a
629
+ * silent loop. Fires AT MOST ONCE per turn.
630
+ */
631
+ async maybeRecoverFromOverflow(signal, route, textBuffer, turnId, state, token) {
632
+ if (!this.opts.compactor || state.recoveredFromOverflow)
633
+ return false;
634
+ if (!isContextOverflow(signal))
635
+ return false;
636
+ state.recoveredFromOverflow = true;
637
+ // (7.7 §7) Telemetry-gated: flag a non-Anthropic recovery so a wrong pattern
638
+ // surfaces as an observable event, not a silent overflow loop.
639
+ if (route.adapter.id !== "anthropic") {
640
+ this.emit({
641
+ type: "log",
642
+ level: "info",
643
+ message: `reactive overflow recovery fired for adapter '${route.adapter.id}'`,
644
+ });
645
+ }
646
+ try {
647
+ const recovery = await this.opts.compactor.compact("overflow", token.signal);
648
+ if (recovery.ok) {
649
+ // (6.5b follow-up) Mirror the retry path's partial-undo: if this attempt
650
+ // streamed partial text before the overflow, drop it so the
651
+ // post-compaction restream doesn't append onto a stale partial. Usually a
652
+ // no-op (a 413 precedes any delta) — emitted only when text was shown.
653
+ if (textBuffer.length > 0) {
654
+ this.emit({ type: "assistant:reset", turnId });
655
+ }
656
+ return true;
657
+ }
658
+ }
659
+ catch (e) {
660
+ // Compactor.compact never throws by contract, but be defensive: a malformed
661
+ // implementation shouldn't break the original error path.
662
+ this.emit({
663
+ type: "log",
664
+ level: "warn",
665
+ message: `overflow compaction unexpectedly threw: ${e.message ?? e}`,
666
+ });
667
+ }
668
+ return false;
669
+ }
670
+ /**
671
+ * (7.7 §3.3) Execute one assistant turn's tool calls. Partitions into
672
+ * batches (consecutive concurrency-safe → one parallel batch; any unsafe →
673
+ * its own serial batch), preserving submission order so a write is never
674
+ * reordered before a preceding read. Returns the `tool` messages in
675
+ * submission order plus whether the model signalled `terminate`.
676
+ *
677
+ * Cancellation is ALL-SETTLED: `runConcurrent` drains every thunk before it
678
+ * propagates, and `runTool` emits a `tool:result` on every path (success /
679
+ * denied / error / cancelled), so each emitted `tool:call` always has its
680
+ * paired result on the bus even when a sibling cancels (codex 7.7 R1
681
+ * MUST-FIX #3).
682
+ */
683
+ async executeToolBatch(calls, token, turnId) {
684
+ const { tools } = this.opts;
685
+ const cap = maxToolConcurrency();
686
+ const batches = partitionToolCalls(calls, (call) => {
687
+ const tool = tools.get(call.name);
688
+ return tool ? isCallConcurrencySafe(tool, call.args) : false;
689
+ });
690
+ const resultByCall = new Map();
691
+ const runOne = async (call) => {
692
+ // Emit `tool:call` here (not in the stream loop) so each call is paired
693
+ // with its `tool:result` from runTool — keeps the bus observably balanced
694
+ // for subscribers (TUI, NDJSON, telemetry, serve replay).
695
+ this.emit({ type: "tool:call", callId: call.callId, name: call.name, args: call.args });
696
+ resultByCall.set(call.callId, await this.runTool(call, token));
697
+ };
698
+ for (const b of batches) {
699
+ if (b.safe && b.calls.length > 1) {
700
+ // Parallel, bounded, drain-all-before-throw (no orphan tool:call).
701
+ await runConcurrent(b.calls.map((c) => () => runOne(c)), cap);
702
+ }
703
+ else {
704
+ // Serial: a single safe call, or an unsafe/write call. Preserve the
705
+ // pre-7.7 per-call cancellation checks (before AND after — a tool that
706
+ // catches cancellation internally and returns ok:false doesn't rethrow).
707
+ for (const c of b.calls) {
708
+ token.throwIfCancelled();
709
+ await runOne(c);
710
+ token.throwIfCancelled();
711
+ }
712
+ }
713
+ }
714
+ // (7.7 §3.5) Bound the aggregate size of one batch's results so a fan-out of
715
+ // parallel reads can't blow the next request's context in a single user
716
+ // message (claude-code MAX_TOOL_RESULTS_PER_MESSAGE_CHARS). Inline
717
+ // truncation of the largest results; disk-spill via the artifact mechanism
718
+ // is a future enhancement.
719
+ applyBatchResultBudget(resultByCall);
720
+ // Reassemble in submission order + aggregate terminate.
721
+ const messages = [];
722
+ let allTerminate = calls.length > 0;
723
+ for (const call of calls) {
724
+ const r = resultByCall.get(call.callId);
725
+ // A call whose runTool threw (cancellation) has no entry; the turn aborts
726
+ // and won't be persisted, so skipping it is correct.
727
+ if (!r) {
728
+ allTerminate = false;
729
+ continue;
730
+ }
731
+ messages.push({
732
+ role: "tool",
733
+ content: [{ type: "tool-result", callId: call.callId, name: call.name, output: r.output, ok: r.ok }],
734
+ });
735
+ if (!r.terminate)
736
+ allTerminate = false;
737
+ }
738
+ // (7.7 §5, OQ-5) Single todo emit point: after a successful `todo` tool call
739
+ // the session phases changed — project the full state to all four surfaces.
740
+ const todoChanged = calls.some((c) => c.name === TODO_TOOL_NAME && resultByCall.get(c.callId)?.ok);
741
+ if (todoChanged) {
742
+ this.emit({ type: "todo", turnId, phases: this.opts.session.getTodoPhases() });
743
+ }
744
+ return { messages, terminate: allTerminate };
745
+ }
479
746
  async runTool(call, token) {
480
747
  const { bus, tools, gate, plugins, workspaceRoot } = this.opts;
481
748
  const tool = tools.get(call.name);