@chances-ai/engine 26.0.0 → 28.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/dist/agents/bundled.d.ts +5 -0
  2. package/dist/agents/bundled.d.ts.map +1 -0
  3. package/dist/agents/bundled.js +66 -0
  4. package/dist/agents/bundled.js.map +1 -0
  5. package/dist/agents/index.d.ts +1 -0
  6. package/dist/agents/index.d.ts.map +1 -1
  7. package/dist/agents/index.js +1 -0
  8. package/dist/agents/index.js.map +1 -1
  9. package/dist/agents/parse.d.ts +3 -0
  10. package/dist/agents/parse.d.ts.map +1 -1
  11. package/dist/agents/parse.js +17 -0
  12. package/dist/agents/parse.js.map +1 -1
  13. package/dist/agents/types.d.ts +8 -0
  14. package/dist/agents/types.d.ts.map +1 -1
  15. package/dist/ai/adapters/ai-sdk-stream.d.ts.map +1 -1
  16. package/dist/ai/adapters/ai-sdk-stream.js +6 -1
  17. package/dist/ai/adapters/ai-sdk-stream.js.map +1 -1
  18. package/dist/ai/index.d.ts +1 -0
  19. package/dist/ai/index.d.ts.map +1 -1
  20. package/dist/ai/index.js +1 -0
  21. package/dist/ai/index.js.map +1 -1
  22. package/dist/ai/overflow.d.ts +40 -0
  23. package/dist/ai/overflow.d.ts.map +1 -0
  24. package/dist/ai/overflow.js +84 -0
  25. package/dist/ai/overflow.js.map +1 -0
  26. package/dist/ai/types.d.ts +8 -1
  27. package/dist/ai/types.d.ts.map +1 -1
  28. package/dist/core/coordinator-mode.d.ts +32 -0
  29. package/dist/core/coordinator-mode.d.ts.map +1 -0
  30. package/dist/core/coordinator-mode.js +98 -0
  31. package/dist/core/coordinator-mode.js.map +1 -0
  32. package/dist/core/coordinator-tools.d.ts +22 -0
  33. package/dist/core/coordinator-tools.d.ts.map +1 -0
  34. package/dist/core/coordinator-tools.js +262 -0
  35. package/dist/core/coordinator-tools.js.map +1 -0
  36. package/dist/core/engine.d.ts +235 -10
  37. package/dist/core/engine.d.ts.map +1 -1
  38. package/dist/core/engine.js +585 -274
  39. package/dist/core/engine.js.map +1 -1
  40. package/dist/core/index.d.ts +4 -2
  41. package/dist/core/index.d.ts.map +1 -1
  42. package/dist/core/index.js +3 -1
  43. package/dist/core/index.js.map +1 -1
  44. package/dist/core/task-tool.d.ts +85 -1
  45. package/dist/core/task-tool.d.ts.map +1 -1
  46. package/dist/core/task-tool.js +456 -494
  47. package/dist/core/task-tool.js.map +1 -1
  48. package/dist/session/index.d.ts +11 -0
  49. package/dist/session/index.d.ts.map +1 -1
  50. package/dist/session/index.js +22 -1
  51. package/dist/session/index.js.map +1 -1
  52. package/dist/tools/bash-readonly.d.ts +26 -0
  53. package/dist/tools/bash-readonly.d.ts.map +1 -0
  54. package/dist/tools/bash-readonly.js +130 -0
  55. package/dist/tools/bash-readonly.js.map +1 -0
  56. package/dist/tools/builtins/bash.d.ts.map +1 -1
  57. package/dist/tools/builtins/bash.js +12 -0
  58. package/dist/tools/builtins/bash.js.map +1 -1
  59. package/dist/tools/builtins/edit.d.ts.map +1 -1
  60. package/dist/tools/builtins/edit.js +18 -12
  61. package/dist/tools/builtins/edit.js.map +1 -1
  62. package/dist/tools/builtins/todo.d.ts +33 -0
  63. package/dist/tools/builtins/todo.d.ts.map +1 -0
  64. package/dist/tools/builtins/todo.js +245 -0
  65. package/dist/tools/builtins/todo.js.map +1 -0
  66. package/dist/tools/builtins/write.d.ts.map +1 -1
  67. package/dist/tools/builtins/write.js +10 -5
  68. package/dist/tools/builtins/write.js.map +1 -1
  69. package/dist/tools/concurrency.d.ts +37 -0
  70. package/dist/tools/concurrency.d.ts.map +1 -0
  71. package/dist/tools/concurrency.js +50 -0
  72. package/dist/tools/concurrency.js.map +1 -0
  73. package/dist/tools/file-lock.d.ts +22 -0
  74. package/dist/tools/file-lock.d.ts.map +1 -0
  75. package/dist/tools/file-lock.js +85 -0
  76. package/dist/tools/file-lock.js.map +1 -0
  77. package/dist/tools/index.d.ts +4 -0
  78. package/dist/tools/index.d.ts.map +1 -1
  79. package/dist/tools/index.js +4 -0
  80. package/dist/tools/index.js.map +1 -1
  81. package/dist/tools/types.d.ts +31 -0
  82. package/dist/tools/types.d.ts.map +1 -1
  83. package/dist/tools/types.js.map +1 -1
  84. package/package.json +3 -3
@@ -1,24 +1,57 @@
1
- import { AppError, ErrorCode, ModelSelection, createId, runWithCwd, } from "@chances-ai/runtime";
1
+ import { AppError, ErrorCode, ModelSelection, createId, runConcurrent, runWithCwd, } from "@chances-ai/runtime";
2
2
  import { refreshActiveMarker } from "./worktree/index.js";
3
- import { classifyProviderError, defaultRetryConfig, estimateCost, } from "../ai/index.js";
4
- import { ASK_USER_QUESTION_TOOL_NAME, READONLY_CATEGORIES } from "../tools/index.js";
5
- /**
6
- * (3.5 codex Round-1 SHOULD-FIX #4) Anthropic-only overflow
7
- * detection. Three patterns from pi's overflow catalogue
8
- * (`pi/packages/ai/src/utils/overflow.ts:11`). The other 10
9
- * providers we ship stay deferred until real-world telemetry shows
10
- * a hit; their error shapes are less stable and a wrong regex would
11
- * surface as silent overflow loops.
12
- */
13
- function isAnthropicOverflowError(adapterId, message) {
14
- if (adapterId !== "anthropic")
15
- return false;
16
- return (/prompt is too long/i.test(message) ||
17
- /request_too_large/i.test(message) ||
18
- /maximum.*context.*length/i.test(message));
3
+ import { classifyProviderError, defaultRetryConfig, estimateCost, isContextOverflow, } from "../ai/index.js";
4
+ import { ASK_USER_QUESTION_TOOL_NAME, READONLY_CATEGORIES, TODO_TOOL_NAME, isCallConcurrencySafe, partitionToolCalls, } from "../tools/index.js";
5
+ /** (7.7 §6) Soft turn ceiling — raised from the old 12 (too low for a
6
+ * daily-driver agent). At this many tool iterations without a final answer the
7
+ * interactive engine PAUSES gracefully ("continue?") rather than throwing. The
8
+ * real runaway guard is the token/compaction machinery + the absolute hard cap
9
+ * below, not a low turn count. */
10
+ export const DEFAULT_MAX_TURNS = 50;
11
+ /** (7.7 §6) Absolute runaway backstop. Even a caller that sets a huge `maxTurns`
12
+ * can't loop past this; hitting it always errors (`hard-cap`), never a silent
13
+ * pause. Env-tunable. */
14
+ function hardMaxTurns() {
15
+ const raw = Number.parseInt(process.env.CHANCES_HARD_MAX_TURNS ?? "", 10);
16
+ return Number.isFinite(raw) && raw > 0 ? raw : 500;
17
+ }
18
+ /** (7.7 §3.3) Max tool calls that run concurrently within one parallel batch.
19
+ * Env-tunable (claude-code's `CLAUDE_CODE_MAX_TOOL_USE_CONCURRENCY` analog);
20
+ * defaults to 10. A non-numeric / non-positive value falls back to 10. */
21
+ function maxToolConcurrency() {
22
+ const raw = Number.parseInt(process.env.CHANCES_MAX_TOOL_CONCURRENCY ?? "", 10);
23
+ return Number.isFinite(raw) && raw > 0 ? raw : 10;
24
+ }
25
+ /** (7.7 §3.5) Aggregate-size budget for one batch's tool results, in chars.
26
+ * Env-tunable; defaults to 200_000 (claude-code MAX_TOOL_RESULTS_PER_MESSAGE_CHARS). */
27
+ function batchResultBudget() {
28
+ const raw = Number.parseInt(process.env.CHANCES_MAX_BATCH_RESULT_CHARS ?? "", 10);
29
+ return Number.isFinite(raw) && raw > 0 ? raw : 200_000;
30
+ }
31
+ /** (7.7 §3.5) When the combined size of a batch's results exceeds the budget,
32
+ * truncate the LARGEST results (in place) until under budget, leaving a note.
33
+ * Bounds a fan-out of parallel reads from blowing the next request's context in
34
+ * one user message. Mutates the passed map's value `.output` fields. */
35
+ function applyBatchResultBudget(resultByCall) {
36
+ const budget = batchResultBudget();
37
+ let total = 0;
38
+ for (const r of resultByCall.values())
39
+ total += r.output.length;
40
+ if (total <= budget)
41
+ return;
42
+ const bySize = [...resultByCall.values()].sort((a, b) => b.output.length - a.output.length);
43
+ for (const r of bySize) {
44
+ if (total <= budget)
45
+ break;
46
+ const over = total - budget;
47
+ const keep = Math.max(2048, r.output.length - over);
48
+ if (keep >= r.output.length)
49
+ continue;
50
+ const omitted = r.output.length - keep;
51
+ r.output = `${r.output.slice(0, keep)}\n[…${omitted} chars truncated — batch result budget (${budget}) exceeded]`;
52
+ total -= omitted;
53
+ }
19
54
  }
20
- /** Engine default when no caller-supplied or config-supplied value applies. */
21
- export const DEFAULT_MAX_TURNS = 12;
22
55
  /** Default base prompt the engine uses when no `systemBaseOverride` is set.
23
56
  * Exported so tests can assert "is this the default or an agent override?" and
24
57
  * so the doc + plugin authors can read the exact text. */
@@ -56,6 +89,112 @@ export class AgentEngine {
56
89
  getSelection() {
57
90
  return this.selection;
58
91
  }
92
+ /**
93
+ * (7.7 §4) Queue a user steering message to be injected at the next turn
94
+ * boundary of the in-flight turn (or the top of the next turn if idle). A
95
+ * no-op when no `steering` queue was provided. The CLI / serve driver call
96
+ * this on a submit-while-busy instead of rejecting the input.
97
+ */
98
+ enqueueSteering(text) {
99
+ this.opts.steering?.enqueue(text);
100
+ }
101
+ /** (7.7 §4) Peek the steering queue for entries not yet injected this turn,
102
+ * render each as a user message, and append its id to `injectedSteerIds` for
103
+ * post-persist ack. Peek-not-drain: a cancelled turn re-delivers. */
104
+ drainSteering(injectedSteerIds) {
105
+ const seen = new Set(injectedSteerIds);
106
+ const out = [];
107
+ for (const e of this.opts.steering?.peek() ?? []) {
108
+ if (seen.has(e.id))
109
+ continue;
110
+ out.push({ role: "user", content: [{ type: "text", text: e.text }] });
111
+ injectedSteerIds.push(e.id);
112
+ }
113
+ return out;
114
+ }
115
+ /** (7.8 §3.4) The notification sources the engine drains at turn boundaries:
116
+ * background subagent tasks AND persistent coordinator workers. Both
117
+ * implement {@link NotificationSource}; the engine renders their pending
118
+ * `<task-notification>`s together (FIFO within each) and acks EACH source's
119
+ * own ids after persist — never broadcasting a global id list (codex R1-§5). */
120
+ notificationSources() {
121
+ const out = [];
122
+ if (this.opts.backgroundTasks)
123
+ out.push(this.opts.backgroundTasks);
124
+ if (this.opts.workerRegistry)
125
+ out.push(this.opts.workerRegistry);
126
+ return out;
127
+ }
128
+ /** (7.7 §4 / 7.8 §3.4 — codex R2 M1) Collect notifications not yet injected this
129
+ * turn, across ALL sources, recording each into its OWN per-source injected set
130
+ * as it's taken. The injected tracking is keyed by `NotificationSource` (NOT a
131
+ * flat id set) so a hypothetical cross-source id collision can never make one
132
+ * source's note shadow another's, nor ack a note that was never injected. */
133
+ collectFreshNotifications(injected) {
134
+ const out = [];
135
+ for (const src of this.notificationSources()) {
136
+ let set = injected.get(src);
137
+ if (!set) {
138
+ set = new Set();
139
+ injected.set(src, set);
140
+ }
141
+ for (const n of src.peekPendingNotifications()) {
142
+ if (set.has(n.taskId))
143
+ continue;
144
+ out.push(n);
145
+ set.add(n.taskId); // record immediately so the boundary drain won't re-inject
146
+ }
147
+ }
148
+ return out;
149
+ }
150
+ /** (7.7 §4 / 7.8 — codex R2 M1) Ack AFTER persist: each source acks EXACTLY the
151
+ * ids injected FROM it this turn (per-source set), never a merged global list.
152
+ * A cancelled turn (no ack) re-delivers; injected ids are turn-local. */
153
+ ackNotifications(injected) {
154
+ for (const src of this.notificationSources()) {
155
+ const set = injected.get(src);
156
+ if (set && set.size > 0)
157
+ src.acknowledgeNotifications([...set]);
158
+ }
159
+ }
160
+ /** (7.7 §4) Iteration-boundary drain: notifications that arrived mid-turn
161
+ * (combined render across all sources, same as turn-top) PLUS steering. Both
162
+ * peek-not-drain; ids recorded for post-persist ack. */
163
+ drainBoundaryInjections(injectedNotifIds, injectedSteerIds) {
164
+ const out = [];
165
+ const fresh = this.collectFreshNotifications(injectedNotifIds);
166
+ if (fresh.length > 0) {
167
+ out.push({
168
+ role: "user",
169
+ content: [{ type: "text", text: fresh.map(renderTaskNotificationXml).join("\n") }],
170
+ });
171
+ }
172
+ out.push(...this.drainSteering(injectedSteerIds));
173
+ return out;
174
+ }
175
+ /** (7.7 §5.3) Build an incomplete-todos reminder when the model stopped with
176
+ * open (pending/in_progress) todos and the per-turn cap isn't exhausted.
177
+ * Returns a `user`-role system-reminder message (no `developer` role exists),
178
+ * or null to let the turn resolve. */
179
+ maybeTodoReminder(count) {
180
+ const max = this.opts.todoReminderMax ?? 3;
181
+ if (max <= 0 || count >= max)
182
+ return null;
183
+ const open = [];
184
+ for (const p of this.opts.session.getTodoPhases()) {
185
+ for (const t of p.tasks) {
186
+ if (t.status === "pending" || t.status === "in_progress")
187
+ open.push(t.content);
188
+ }
189
+ }
190
+ if (open.length === 0)
191
+ return null;
192
+ const list = open.map((c) => `"${c}"`).join(", ");
193
+ const text = `<system-reminder>You stopped with ${open.length} incomplete todo item(s): ${list}. ` +
194
+ `Continue working through them, or mark each done/abandoned with the todo tool. ` +
195
+ `(Reminder ${count + 1}/${max})</system-reminder>`;
196
+ return { role: "user", content: [{ type: "text", text }] };
197
+ }
59
198
  /** Bus-emit wrapper. Three responsibilities (3.4):
60
199
  * 1. Suppress lifecycle frames (`turn:*`, `error`) when the engine is a
61
200
  * child (`suppressLifecycleEvents=true`). Codex Round-1 MUST-FIX #2.
@@ -115,7 +254,7 @@ export class AgentEngine {
115
254
  return this.runTurnImpl(prompt, token, opts.expandMentions !== false, opts.trustedContext);
116
255
  }
117
256
  async runTurnImpl(prompt, token, expandMentions, trustedContext) {
118
- const { router, tools, gate, session, plugins, backgroundTasks } = this.opts;
257
+ const { tools, session, plugins } = this.opts;
119
258
  const turnId = createId("turn");
120
259
  // (3.6) Carry the active session id on `turn:start` so the OTel
121
260
  // exporter can stamp `chances.gen_ai.session.id` correctly across
@@ -144,8 +283,10 @@ export class AgentEngine {
144
283
  // persisted to `session.messages()`. Acknowledgement (queue removal)
145
284
  // happens immediately after `session.appendTurn(turnMessages)`
146
285
  // succeeds.
147
- const notifications = backgroundTasks?.peekPendingNotifications() ?? [];
148
- const notificationIds = notifications.map((n) => n.taskId);
286
+ // (7.8 §3.4 — codex R2 M1) Collect across ALL notification sources (background
287
+ // tasks + persistent workers), recording each into its per-source injected set.
288
+ const injectedNotifIds = new Map();
289
+ const notifications = this.collectFreshNotifications(injectedNotifIds);
149
290
  const turnMessages = [];
150
291
  if (notifications.length > 0) {
151
292
  const xml = notifications.map(renderTaskNotificationXml).join("\n");
@@ -174,248 +315,40 @@ export class AgentEngine {
174
315
  turnMessages.push({ role: "user", content: [{ type: "text", text: trustedContext }] });
175
316
  }
176
317
  turnMessages.push({ role: "user", content: [{ type: "text", text: prompt }] });
318
+ // (7.7 §4) Drain any steering queued before this turn started. `injectedNotifIds`
319
+ // (above) tracks per-source notification ids; `injectedSteerIds` tracks steering
320
+ // — both so the iteration-boundary drain + post-persist ack don't double-count.
321
+ const injectedSteerIds = [];
322
+ turnMessages.push(...this.drainSteering(injectedSteerIds));
177
323
  const result = { text: "", inputTokens: 0, outputTokens: 0, costUsd: 0 };
178
- const maxTurns = this.opts.maxTurns ?? this.opts.maxIterations ?? DEFAULT_MAX_TURNS;
179
- let resolved = false;
180
- // (3.5 codex Round-1 MUST-FIX #1) `result.inputTokens` aggregates
181
- // every `usage` event across the multi-step tool loop. The compactor's
182
- // threshold check needs the LAST stream's input only — that's what
183
- // the provider will count for the NEXT request, plus the new user
184
- // prompt. Tracked separately here; emitted via `usage:turn`.
185
- let lastRequestInputTokens = 0;
186
- // (3.5 codex Round-1 SHOULD-FIX #4) Per-turn flag. Anthropic
187
- // overflow recovery fires AT MOST ONCE per turn a second 413 after
188
- // we already compacted is an actual ceiling we can't paper over.
189
- let recoveredFromOverflow = false;
190
- // (3.5) Tracked at the outer scope so the post-turn compaction check
191
- // can read `route.model`. The for-loop reuses the variable across
192
- // iterations; we just need the most recent value to query the model
193
- // descriptor for `contextWindow`.
194
- let lastRoute;
195
- for (let i = 0; i < maxTurns; i++) {
196
- token.throwIfCancelled();
197
- // Re-read selection per turn so a `/model` switch between turns lands on
198
- // the next request without rebuilding the engine.
199
- const choice = this.selection.get();
200
- const route = router.pick({
201
- preferredModel: choice.model,
202
- preferredProvider: choice.provider,
203
- needsTools: toolDefs.length > 0,
204
- });
205
- lastRoute = route;
206
- const retry = this.opts.retry ?? defaultRetryConfig;
207
- let textBuffer = "";
208
- let calls = [];
209
- let attempt = 0;
210
- while (true) {
211
- token.throwIfCancelled();
212
- textBuffer = "";
213
- calls = [];
214
- // (3.5) Reset per attempt — only the LAST successful stream's
215
- // last `usage.inputTokens` carries forward into the post-turn
216
- // compactor check.
217
- let attemptLastInputTokens = 0;
218
- // (6.5b review) Stage usage in attempt-local accumulators instead of
219
- // folding it straight into the turn-level `result`. A retryable
220
- // mid-stream error (e.g. ECONNRESET after a partial stream) discards
221
- // the attempt and restreams; folding here would double-count tokens
222
- // and double-emit `usage`. We only merge into `result` + emit once
223
- // the stream completes (`streamError === null`, below).
224
- let attemptInputTokens = 0;
225
- let attemptOutputTokens = 0;
226
- let attemptCostUsd = 0;
227
- let streamError = null;
228
- const stream = route.adapter.stream({ model: route.model.id, system, messages: [...session.messages(), ...turnMessages], tools: toolDefs }, token.signal);
229
- for await (const event of stream) {
230
- // Enforce cancellation per-event so a provider that ignores or
231
- // queues past the AbortSignal can't keep dripping text/tool-calls
232
- // into a turn the user already abandoned. Particularly important
233
- // for subagents: the parent's abort must stop the child instantly,
234
- // not wait until the child stream naturally ends.
235
- token.throwIfCancelled();
236
- switch (event.type) {
237
- case "text-delta":
238
- textBuffer += event.text;
239
- this.emit({ type: "assistant:delta", turnId, text: event.text });
240
- break;
241
- case "tool-call":
242
- // Defer the `tool:call` bus emit until the execution loop
243
- // below — pairs each emit atomically with its matching
244
- // `tool:result`. Emitting here would leave orphan call frames
245
- // on the bus whenever the turn aborts between stream-end and
246
- // tool execution (Ctrl-C) or a retry attempt discards the
247
- // collected calls and tries again on attempt N+1.
248
- calls.push(event.call);
249
- break;
250
- case "usage": {
251
- const costUsd = estimateCost(route.model, event.usage);
252
- // (6.5b review) Accumulate into attempt-local totals; the merge
253
- // into `result` + the `usage` emit happen once the stream
254
- // succeeds, so a discarded retry attempt can't double-count.
255
- attemptInputTokens += event.usage.inputTokens;
256
- attemptOutputTokens += event.usage.outputTokens;
257
- attemptCostUsd += costUsd;
258
- // (3.5) Track most recent stream's last input count for the
259
- // post-turn compaction threshold check. NOT the aggregate.
260
- attemptLastInputTokens = event.usage.inputTokens;
261
- break;
262
- }
263
- case "error":
264
- // Defer the bus emit until after cancellation check — if the
265
- // user just hit Ctrl-C, the SDK's abort path surfaces as a
266
- // stream error and we shouldn't shout "PROVIDER" at them.
267
- streamError = event.message;
268
- break;
269
- case "done":
270
- break;
271
- }
272
- if (streamError !== null)
273
- break;
274
- }
275
- if (streamError === null) {
276
- // Stream completed successfully — NOW fold this attempt's usage
277
- // into the turn-level `result` and emit the (aggregated) `usage`
278
- // frame. Deferred to here so a discarded retry attempt's partial
279
- // usage never double-counts (6.5b review).
280
- result.inputTokens += attemptInputTokens;
281
- result.outputTokens += attemptOutputTokens;
282
- result.costUsd += attemptCostUsd;
283
- if (attemptInputTokens > 0 || attemptOutputTokens > 0 || attemptCostUsd > 0) {
284
- this.emit({
285
- type: "usage",
286
- model: route.model.id,
287
- inputTokens: attemptInputTokens,
288
- outputTokens: attemptOutputTokens,
289
- costUsd: attemptCostUsd,
290
- });
291
- }
292
- // Persist this attempt's last input-token count for the post-turn
293
- // compaction check.
294
- lastRequestInputTokens = attemptLastInputTokens;
295
- break;
296
- }
297
- // If the abort signal fired during the stream, the error we just
298
- // captured is the SDK reacting to the cancellation — treat it as
299
- // Cancelled rather than misclassifying as a provider error.
300
- token.throwIfCancelled();
301
- const decision = classifyProviderError(streamError);
302
- const terminal = !decision.retryable || attempt >= retry.delaysMs.length;
303
- if (terminal) {
304
- // (3.5 — codex Round-1 SHOULD-FIX #4) Anthropic-only reactive
305
- // overflow recovery. Catches the 413 BEFORE the terminal throw,
306
- // runs compaction with `reason: "overflow"` (bypasses circuit
307
- // breaker), and retries the stream once with the compacted
308
- // history. Wider 10-provider catalogue stays deferred until
309
- // telemetry shows a real-world miss.
310
- if (this.opts.compactor &&
311
- !recoveredFromOverflow &&
312
- isAnthropicOverflowError(route.adapter.id, streamError)) {
313
- recoveredFromOverflow = true;
314
- try {
315
- const recovery = await this.opts.compactor.compact("overflow", token.signal);
316
- if (recovery.ok) {
317
- // Reset the attempt counter so we get the full retry
318
- // budget against the now-smaller request, AND clear the
319
- // accumulated message buffer that the failed attempt
320
- // wrote into `turnMessages`. The retry rebuilds from
321
- // `session.messages()` (which now reflects compaction)
322
- // plus this turn's prepended user/notification messages.
323
- // (6.5b follow-up) Mirror the normal retry path's partial-undo:
324
- // if this attempt streamed partial text before the overflow, drop
325
- // it so the post-compaction restream doesn't append onto a stale
326
- // partial. A 413 usually precedes any delta, so this is typically
327
- // a no-op — emitted only when text was actually shown.
328
- if (textBuffer.length > 0) {
329
- this.emit({ type: "assistant:reset", turnId });
330
- }
331
- attempt = 0;
332
- continue;
333
- }
334
- }
335
- catch (e) {
336
- // Compactor.compact never throws by contract, but be defensive:
337
- // a malformed Compactor implementation shouldn't break the
338
- // original error path.
339
- this.emit({
340
- type: "log",
341
- level: "warn",
342
- message: `overflow compaction unexpectedly threw: ${e.message ?? e}`,
343
- });
344
- }
345
- }
346
- // Emit the bus `error` ONLY when we're about to throw. Emitting on
347
- // every retry attempt would cause `runPrompt`'s `lastError`
348
- // listener to record a transient failure as the turn's exit code
349
- // even after a later attempt succeeded (codex re-review finding).
350
- // Subagent engines suppress this emit — see `suppressTerminalErrors`.
351
- if (!this.opts.suppressTerminalErrors) {
352
- this.emit({ type: "error", code: "PROVIDER", message: streamError });
353
- }
354
- throw new AppError(ErrorCode.Provider, `Provider error (${decision.reason}): ${streamError}`);
355
- }
356
- const delayMs = retry.delaysMs[attempt] ?? 0;
357
- this.emit({
358
- type: "log",
359
- level: "warn",
360
- message: `provider stream errored (${decision.reason}); retry ${attempt + 1}/${retry.delaysMs.length} after ${delayMs}ms; original: ${streamError}`,
361
- });
362
- // (6.5b review) If this attempt already streamed partial assistant text
363
- // to the bus, the upcoming restream would APPEND a fresh copy on top
364
- // (consumers don't replace) — duplicating it on screen. Tell consumers
365
- // to drop the in-flight partial first. `usage`/`tool-call` are deferred
366
- // (not yet on the bus), so only `textBuffer` needs undoing.
367
- if (textBuffer.length > 0) {
368
- this.emit({ type: "assistant:reset", turnId });
369
- }
370
- attempt += 1;
371
- await sleepCancellable(delayMs, token);
372
- }
373
- if (calls.length === 0) {
374
- const content = [{ type: "text", text: textBuffer }];
375
- turnMessages.push({ role: "assistant", content });
376
- result.text = textBuffer;
377
- this.emit({ type: "assistant:message", turnId, text: textBuffer });
378
- await safeRunHook(plugins, "afterResponse", { text: textBuffer }, this.opts.bus);
379
- resolved = true;
380
- break;
381
- }
382
- // Record the assistant message that requested the tools.
383
- const assistantContent = [];
384
- if (textBuffer)
385
- assistantContent.push({ type: "text", text: textBuffer });
386
- for (const call of calls) {
387
- assistantContent.push({ type: "tool-call", callId: call.callId, name: call.name, args: call.args });
388
- }
389
- turnMessages.push({ role: "assistant", content: assistantContent });
390
- // Execute each tool through the permission gate, then feed results back.
391
- for (const call of calls) {
392
- // Check cancellation before each tool — a long batch of tool-calls
393
- // from one model turn shouldn't keep running after the user aborts.
394
- token.throwIfCancelled();
395
- // Emit `tool:call` here (not in the stream loop) so each call is
396
- // paired with its `tool:result` from runTool — keeps the bus
397
- // observably balanced for subscribers (TUI, NDJSON, telemetry).
398
- this.emit({ type: "tool:call", callId: call.callId, name: call.name, args: call.args });
399
- const outcome = await this.runTool(call, token);
400
- turnMessages.push({
401
- role: "tool",
402
- content: [{ type: "tool-result", callId: call.callId, name: call.name, output: outcome.output, ok: outcome.ok }],
403
- });
404
- // Round 3 codex SHOULD-FIX: check cancellation AFTER each tool
405
- // result too. A tool that catches cancellation internally and
406
- // returns `ok:false` (e.g. `bash` returning `(cancelled)`) does
407
- // NOT re-throw, so without this check the loop would continue
408
- // to the next turn and could exhaust `maxTurns`, surfacing as
409
- // a misleading `PROVIDER: Reached maximum number of turns`
410
- // instead of the user's actual `Cancelled` intent.
411
- token.throwIfCancelled();
412
- }
413
- }
324
+ // (7.7 §6) Soft ceiling (default 50), clamped to the absolute hard cap so a
325
+ // huge caller-supplied `maxTurns` can't loop forever. `hitHardCap` records
326
+ // whether the loop will stop at the hard backstop (always errors) vs the
327
+ // soft ceiling (interactive pauses).
328
+ const softMax = this.opts.maxTurns ?? this.opts.maxIterations ?? DEFAULT_MAX_TURNS;
329
+ const hardMax = hardMaxTurns();
330
+ const maxTurns = Math.min(softMax, hardMax);
331
+ const hitHardCap = softMax > hardMax;
332
+ // (7.7 §3.8) Per-turn mutable accumulators (formerly a fistful of inline
333
+ // `let`s). The loop units fold usage / track overflow recovery / record the
334
+ // last route through this. `result` aliases `state.result`, read back below
335
+ // to finalize the turn.
336
+ const state = { result, lastRequestInputTokens: 0, recoveredFromOverflow: false };
337
+ // (7.7 §3.8) Hand the assembled turn to the hook-driven loop. The default
338
+ // hooks bind this engine's own units (stream / tool-batch / steering / todo
339
+ // reminder); task 08's coordinator can pass ALTERNATIVE hooks WITHOUT
340
+ // forking the loop body — that injectable seam is the Axis 3.8 deliverable.
341
+ const { resolved } = await this.runAgentLoop({ turnId, system, toolDefs, turnMessages, injectedNotifIds, injectedSteerIds, maxTurns, state, token }, this.defaultLoopHooks());
414
342
  session.appendTurn(turnMessages);
415
- // Codex Round-2 MUST-FIX #3: ack notifications AFTER appendTurn so a
416
- // cancellation between peek and persist leaves the queue intact.
417
- if (notificationIds.length > 0) {
418
- backgroundTasks?.acknowledgeNotifications(notificationIds);
343
+ // Codex Round-2 MUST-FIX #3: ack notifications + steering AFTER appendTurn so
344
+ // a cancellation between peek and persist leaves both queues intact (the
345
+ // next turn re-delivers). `injectedNotifIds` covers turn-top AND mid-turn
346
+ // notifications; `injectedSteerIds` covers all injected steering.
347
+ // (7.8 §3.4) Ack per source, each only its own injected ids (no global
348
+ // broadcast) — drains across backgroundTasks + workerRegistry uniformly.
349
+ this.ackNotifications(injectedNotifIds);
350
+ if (injectedSteerIds.length > 0) {
351
+ this.opts.steering?.acknowledge(injectedSteerIds);
419
352
  }
420
353
  // (3.5) Per-turn aggregate event. Lifecycle suppression honored —
421
354
  // child engines (with `suppressLifecycleEvents`) skip this too.
@@ -428,7 +361,7 @@ export class AgentEngine {
428
361
  inputTokens: result.inputTokens,
429
362
  outputTokens: result.outputTokens,
430
363
  costUsd: result.costUsd,
431
- lastRequestInputTokens,
364
+ lastRequestInputTokens: state.lastRequestInputTokens,
432
365
  });
433
366
  }
434
367
  // (3.5 — codex Round-1 MUST-FIX #4) Threshold-triggered compaction.
@@ -437,10 +370,10 @@ export class AgentEngine {
437
370
  // `compaction:start` / `compaction:end` frames inside this await.
438
371
  // The compactor itself swallows all failures into ok:false (never
439
372
  // throws by contract); `Cancelled` propagates as `cancelled` reason.
440
- if (this.opts.compactor && resolved && lastRoute) {
373
+ if (this.opts.compactor && resolved && state.lastRoute) {
441
374
  const should = this.opts.compactor.shouldCompact({
442
- lastRequestInputTokens,
443
- model: lastRoute.model,
375
+ lastRequestInputTokens: state.lastRequestInputTokens,
376
+ model: state.lastRoute.model,
444
377
  });
445
378
  if (should) {
446
379
  await this.opts.compactor.compact("threshold", token.signal);
@@ -462,20 +395,398 @@ export class AgentEngine {
462
395
  }
463
396
  this.emit({ type: "turn:end", turnId });
464
397
  if (!resolved) {
465
- // Loop exhausted the turn budget without the model returning a final
466
- // answer. Match claude-code's terminal-error pattern (`QueryEngine.ts:914`)
467
- // emit a bus error and throw so the caller sees a concrete signal
468
- // instead of an empty result. The turn is still persisted above so
469
- // `/resume` can pick up the partial work.
470
- // Subagent engines suppress this emit see `suppressTerminalErrors`.
398
+ // (7.7 §6) The loop exhausted its budget without a final answer. The turn
399
+ // is already persisted above, so `/resume` (or the next message / steering)
400
+ // picks up the partial work. Two outcomes:
401
+ // - SOFT pause (interactive default): emit `turn:paused` and return
402
+ // cleanly "reached max actions, continue?". No scary throw.
403
+ // - THROW: when the caller opted out of pausing (`pauseOnMaxTurns:false`,
404
+ // i.e. `-p`/serve/SDK automation) OR the absolute hard cap was hit.
405
+ // A precise `MaxTurns` code (not the misleading PROVIDER) → non-zero
406
+ // exit for automation.
407
+ const pause = (this.opts.pauseOnMaxTurns ?? true) && !hitHardCap;
408
+ if (pause) {
409
+ // (codex R2 MUST-FIX) Emit `turn:paused` ONLY on the actual pause — the
410
+ // throw path (automation / hard-cap) must NOT signal a "continue?"
411
+ // affordance it then contradicts with an error. Soft pause always means
412
+ // the soft ceiling (pause requires !hitHardCap), hence reason max-turns.
413
+ if (!this.opts.suppressLifecycleEvents) {
414
+ this.emit({ type: "turn:paused", turnId, reason: "max-turns", turnsTaken: maxTurns });
415
+ }
416
+ return result;
417
+ }
418
+ // Throw path: `-p`/serve/SDK automation, or the absolute hard cap. A
419
+ // precise `MaxTurns` error (not the misleading PROVIDER) → non-zero exit.
471
420
  const message = `Reached maximum number of turns (${maxTurns})`;
472
421
  if (!this.opts.suppressTerminalErrors) {
473
- this.emit({ type: "error", code: "PROVIDER", message });
422
+ this.emit({ type: "error", code: ErrorCode.MaxTurns, message });
474
423
  }
475
- throw new AppError(ErrorCode.Provider, message);
424
+ throw new AppError(ErrorCode.MaxTurns, message);
476
425
  }
477
426
  return result;
478
427
  }
428
+ /**
429
+ * (7.7 §3.8) The hook-driven turn loop — a thin orchestrator over
430
+ * {@link AgentLoopHooks}, mirroring pi's `runLoop`
431
+ * (`packages/agent/src/agent-loop.ts`). Each iteration streams one assistant
432
+ * response, executes its tool batch, then drains boundary injections (steering
433
+ * + mid-turn notifications) before the next stream. Returns whether the turn
434
+ * reached a final answer (`resolved`) or exhausted `maxTurns`.
435
+ *
436
+ * Behaviour is IDENTICAL to the pre-refactor inline loop — `defaultLoopHooks`
437
+ * binds the same units; the seam exists so task 08's coordinator can inject
438
+ * alternatives without copying this body.
439
+ */
440
+ async runAgentLoop(run, hooks) {
441
+ const { token, turnId } = run;
442
+ // (7.7 §5.3) Per-turn incomplete-todos reminder counter. Resets every
443
+ // runTurn (= every new user prompt), matching oh-my-pi.
444
+ let todoReminderCount = 0;
445
+ for (let i = 0; i < run.maxTurns; i++) {
446
+ token.throwIfCancelled();
447
+ // Re-read selection per turn so a `/model` switch between turns lands on
448
+ // the next request without rebuilding the engine.
449
+ const choice = this.selection.get();
450
+ const route = this.opts.router.pick({
451
+ preferredModel: choice.model,
452
+ preferredProvider: choice.provider,
453
+ needsTools: run.toolDefs.length > 0,
454
+ });
455
+ run.state.lastRoute = route;
456
+ const { text, calls } = await hooks.streamAssistant(route, run);
457
+ if (calls.length === 0) {
458
+ run.turnMessages.push({ role: "assistant", content: [{ type: "text", text }] });
459
+ run.state.result.text = text;
460
+ this.emit({ type: "assistant:message", turnId, text });
461
+ // (7.7 §5.3) If the model stopped with incomplete todos, nudge it to
462
+ // finish (capped). Inject a user-role reminder and continue the loop
463
+ // instead of resolving — boosts autonomous completion (oh-my-pi
464
+ // `#checkTodoCompletion`). The reminder rides the maxTurns budget.
465
+ const reminder = hooks.incompleteTodoReminder(todoReminderCount);
466
+ if (reminder) {
467
+ todoReminderCount += 1;
468
+ run.turnMessages.push(reminder);
469
+ continue;
470
+ }
471
+ await safeRunHook(this.opts.plugins, "afterResponse", { text }, this.opts.bus);
472
+ return { resolved: true };
473
+ }
474
+ // Record the assistant message that requested the tools.
475
+ const assistantContent = [];
476
+ if (text)
477
+ assistantContent.push({ type: "text", text });
478
+ for (const call of calls) {
479
+ assistantContent.push({ type: "tool-call", callId: call.callId, name: call.name, args: call.args });
480
+ }
481
+ run.turnMessages.push({ role: "assistant", content: assistantContent });
482
+ // (7.7 §3) Execute the batch (parallel safe-batch / serial unsafe;
483
+ // submission order preserved; all-settled on cancellation).
484
+ const batch = await hooks.executeToolBatch(calls, token, turnId);
485
+ run.turnMessages.push(...batch.messages);
486
+ // (7.7 §6.2) Model-signalled terminate: EVERY result asked to stop → end
487
+ // gracefully (transcript intact) instead of streaming another turn.
488
+ if (batch.terminate) {
489
+ run.state.result.text = text;
490
+ return { resolved: true };
491
+ }
492
+ // (7.7 §4) Iteration boundary: inject steering the user typed mid-turn +
493
+ // bg-task notifications that completed mid-turn, so they reach the model on
494
+ // the NEXT stream of this turn (claude-code mid-turn drain).
495
+ run.turnMessages.push(...hooks.getBoundaryMessages(run.injectedNotifIds, run.injectedSteerIds));
496
+ }
497
+ return { resolved: false };
498
+ }
499
+ /**
500
+ * (7.7 §3.8) The default loop hooks — an object binding the engine's own
501
+ * units. An object (not the methods passed directly) so the shape is a
502
+ * documented, swappable seam: task 08 supplies alternatives; a test can wrap a
503
+ * single hook to assert the loop dispatches through it.
504
+ */
505
+ defaultLoopHooks() {
506
+ return {
507
+ streamAssistant: (route, run) => this.streamAssistantResponse(route, run),
508
+ executeToolBatch: (calls, token, turnId) => this.executeToolBatch(calls, token, turnId),
509
+ getBoundaryMessages: (notifIds, steerIds) => this.drainBoundaryInjections(notifIds, steerIds),
510
+ incompleteTodoReminder: (count) => this.maybeTodoReminder(count),
511
+ };
512
+ }
513
+ /**
514
+ * (7.7 §3.8) Stream ONE assistant response — the careful inner unit (pi
515
+ * `streamAssistantResponse`). Owns: per-attempt classified retry with backoff,
516
+ * attempt-local usage staging (a discarded retry never double-counts),
517
+ * `assistant:reset` partial-undo, and provider-agnostic overflow recovery on
518
+ * BOTH the terminal-error path (a 413 before the throw) and the success path
519
+ * (z.ai silent truncation / MiMo length-stop). Folds usage into `state.result`
520
+ * and records `state.lastRequestInputTokens` once the stream completes. Throws
521
+ * `AppError(Provider|Cancelled)` on a terminal failure; otherwise returns the
522
+ * streamed text + requested tool calls.
523
+ */
524
+ async streamAssistantResponse(route, run) {
525
+ const { token, turnId, system, toolDefs, turnMessages, state } = run;
526
+ const { session } = this.opts;
527
+ const retry = this.opts.retry ?? defaultRetryConfig;
528
+ let textBuffer = "";
529
+ let calls = [];
530
+ let attempt = 0;
531
+ while (true) {
532
+ token.throwIfCancelled();
533
+ textBuffer = "";
534
+ calls = [];
535
+ // (3.5) Reset per attempt — only the LAST successful stream's last
536
+ // `usage.inputTokens` carries forward into the post-turn compactor check.
537
+ let attemptLastInputTokens = 0;
538
+ // (6.5b review) Stage usage in attempt-local accumulators instead of
539
+ // folding straight into the turn-level `result`. A retryable mid-stream
540
+ // error discards the attempt and restreams; folding here would
541
+ // double-count. Merge into `result` + emit once the stream completes.
542
+ let attemptInputTokens = 0;
543
+ let attemptOutputTokens = 0;
544
+ let attemptCostUsd = 0;
545
+ let streamError = null;
546
+ // (7.7 §7) The provider's finish reason (when surfaced) — drives the
547
+ // length-stop overflow signal on the success path.
548
+ let finishReason;
549
+ const stream = route.adapter.stream({ model: route.model.id, system, messages: [...session.messages(), ...turnMessages], tools: toolDefs }, token.signal);
550
+ for await (const event of stream) {
551
+ // Enforce cancellation per-event so a provider that ignores or queues
552
+ // past the AbortSignal can't keep dripping into a turn the user already
553
+ // abandoned. Critical for subagents: the parent's abort stops the child
554
+ // instantly, not when the child stream naturally ends.
555
+ token.throwIfCancelled();
556
+ switch (event.type) {
557
+ case "text-delta":
558
+ textBuffer += event.text;
559
+ this.emit({ type: "assistant:delta", turnId, text: event.text });
560
+ break;
561
+ case "tool-call":
562
+ // Defer the `tool:call` bus emit until the execution loop — pairs
563
+ // each emit atomically with its `tool:result`. Emitting here would
564
+ // orphan call frames whenever the turn aborts between stream-end and
565
+ // tool execution (Ctrl-C) or a retry discards the collected calls.
566
+ calls.push(event.call);
567
+ break;
568
+ case "usage": {
569
+ const costUsd = estimateCost(route.model, event.usage);
570
+ attemptInputTokens += event.usage.inputTokens;
571
+ attemptOutputTokens += event.usage.outputTokens;
572
+ attemptCostUsd += costUsd;
573
+ // (3.5) Track most recent stream's last input count for the post-turn
574
+ // compaction threshold check. NOT the aggregate.
575
+ attemptLastInputTokens = event.usage.inputTokens;
576
+ break;
577
+ }
578
+ case "error":
579
+ // Defer the bus emit until after the cancellation check — a Ctrl-C
580
+ // surfaces as a stream error and we shouldn't shout "PROVIDER".
581
+ streamError = event.message;
582
+ break;
583
+ case "done":
584
+ finishReason = event.finishReason;
585
+ break;
586
+ }
587
+ if (streamError !== null)
588
+ break;
589
+ }
590
+ if (streamError === null) {
591
+ // Stream completed — NOW fold this attempt's usage into the turn-level
592
+ // `result` and emit the aggregated `usage` frame (deferred so a discarded
593
+ // retry attempt's partial usage never double-counts).
594
+ state.result.inputTokens += attemptInputTokens;
595
+ state.result.outputTokens += attemptOutputTokens;
596
+ state.result.costUsd += attemptCostUsd;
597
+ if (attemptInputTokens > 0 || attemptOutputTokens > 0 || attemptCostUsd > 0) {
598
+ this.emit({
599
+ type: "usage",
600
+ model: route.model.id,
601
+ inputTokens: attemptInputTokens,
602
+ outputTokens: attemptOutputTokens,
603
+ costUsd: attemptCostUsd,
604
+ });
605
+ }
606
+ state.lastRequestInputTokens = attemptLastInputTokens;
607
+ // (7.7 §7) Success-path overflow: a NORMAL stop whose reported input
608
+ // exceeds the window (z.ai silently truncated the prompt), or a `length`
609
+ // stop with zero output (MiMo truncated input, no room to generate). Both
610
+ // mean the model never saw the full context — recover (compact +
611
+ // restream) once, exactly like the error path. A no-op for every healthy
612
+ // stream: `isContextOverflow` needs a real `contextWindow` AND input over
613
+ // it (or `length` + zero output).
614
+ const recovered = await this.maybeRecoverFromOverflow({
615
+ stopReason: finishReason === "length" ? "length" : "stop",
616
+ usage: { input: attemptLastInputTokens, output: attemptOutputTokens },
617
+ contextWindow: route.model.contextWindow,
618
+ }, route, textBuffer, turnId, state, token);
619
+ if (recovered) {
620
+ attempt = 0;
621
+ continue;
622
+ }
623
+ break;
624
+ }
625
+ // If the abort signal fired during the stream, the captured error is the
626
+ // SDK reacting to cancellation — treat as Cancelled, not a provider error.
627
+ token.throwIfCancelled();
628
+ const decision = classifyProviderError(streamError);
629
+ const terminal = !decision.retryable || attempt >= retry.delaysMs.length;
630
+ if (terminal) {
631
+ // (7.7 §7) Error-path overflow recovery: catch a 413 BEFORE the terminal
632
+ // throw, compact (bypasses the circuit breaker), and restream once with
633
+ // the now-smaller history. pi's full catalogue replaces the old
634
+ // Anthropic-only regex so any provider's overflow recovers the same way.
635
+ const recovered = await this.maybeRecoverFromOverflow({ stopReason: "error", errorMessage: streamError }, route, textBuffer, turnId, state, token);
636
+ if (recovered) {
637
+ attempt = 0;
638
+ continue;
639
+ }
640
+ // Emit the bus `error` ONLY when about to throw — emitting on every retry
641
+ // would let `runPrompt`'s `lastError` listener record a transient failure
642
+ // as the exit code even after a later attempt succeeded. Subagent engines
643
+ // suppress this (see `suppressTerminalErrors`).
644
+ if (!this.opts.suppressTerminalErrors) {
645
+ this.emit({ type: "error", code: "PROVIDER", message: streamError });
646
+ }
647
+ throw new AppError(ErrorCode.Provider, `Provider error (${decision.reason}): ${streamError}`);
648
+ }
649
+ const delayMs = retry.delaysMs[attempt] ?? 0;
650
+ this.emit({
651
+ type: "log",
652
+ level: "warn",
653
+ message: `provider stream errored (${decision.reason}); retry ${attempt + 1}/${retry.delaysMs.length} after ${delayMs}ms; original: ${streamError}`,
654
+ });
655
+ // (6.5b review) If this attempt streamed partial text, the upcoming
656
+ // restream APPENDS a fresh copy (consumers don't replace) — duplicating it
657
+ // on screen. Tell consumers to drop the in-flight partial first.
658
+ if (textBuffer.length > 0) {
659
+ this.emit({ type: "assistant:reset", turnId });
660
+ }
661
+ attempt += 1;
662
+ await sleepCancellable(delayMs, token);
663
+ }
664
+ return { text: textBuffer, calls };
665
+ }
666
+ /**
667
+ * (7.7 §7) Provider-agnostic reactive overflow recovery, shared by the
668
+ * terminal-error path and the success path. When `signal` reads as a context
669
+ * overflow AND a compactor is wired AND we haven't already recovered this turn,
670
+ * compact with `reason: "overflow"` (bypasses the circuit breaker) and report
671
+ * whether the caller should restream. Telemetry-gated: a non-Anthropic recovery
672
+ * emits a `log info` so a wrong pattern surfaces as an observable event, not a
673
+ * silent loop. Fires AT MOST ONCE per turn.
674
+ */
675
+ async maybeRecoverFromOverflow(signal, route, textBuffer, turnId, state, token) {
676
+ if (!this.opts.compactor || state.recoveredFromOverflow)
677
+ return false;
678
+ if (!isContextOverflow(signal))
679
+ return false;
680
+ state.recoveredFromOverflow = true;
681
+ // (7.7 §7) Telemetry-gated: flag a non-Anthropic recovery so a wrong pattern
682
+ // surfaces as an observable event, not a silent overflow loop.
683
+ if (route.adapter.id !== "anthropic") {
684
+ this.emit({
685
+ type: "log",
686
+ level: "info",
687
+ message: `reactive overflow recovery fired for adapter '${route.adapter.id}'`,
688
+ });
689
+ }
690
+ try {
691
+ const recovery = await this.opts.compactor.compact("overflow", token.signal);
692
+ if (recovery.ok) {
693
+ // (6.5b follow-up) Mirror the retry path's partial-undo: if this attempt
694
+ // streamed partial text before the overflow, drop it so the
695
+ // post-compaction restream doesn't append onto a stale partial. Usually a
696
+ // no-op (a 413 precedes any delta) — emitted only when text was shown.
697
+ if (textBuffer.length > 0) {
698
+ this.emit({ type: "assistant:reset", turnId });
699
+ }
700
+ return true;
701
+ }
702
+ }
703
+ catch (e) {
704
+ // Compactor.compact never throws by contract, but be defensive: a malformed
705
+ // implementation shouldn't break the original error path.
706
+ this.emit({
707
+ type: "log",
708
+ level: "warn",
709
+ message: `overflow compaction unexpectedly threw: ${e.message ?? e}`,
710
+ });
711
+ }
712
+ return false;
713
+ }
714
+ /**
715
+ * (7.7 §3.3) Execute one assistant turn's tool calls. Partitions into
716
+ * batches (consecutive concurrency-safe → one parallel batch; any unsafe →
717
+ * its own serial batch), preserving submission order so a write is never
718
+ * reordered before a preceding read. Returns the `tool` messages in
719
+ * submission order plus whether the model signalled `terminate`.
720
+ *
721
+ * Cancellation is ALL-SETTLED: `runConcurrent` drains every thunk before it
722
+ * propagates, and `runTool` emits a `tool:result` on every path (success /
723
+ * denied / error / cancelled), so each emitted `tool:call` always has its
724
+ * paired result on the bus even when a sibling cancels (codex 7.7 R1
725
+ * MUST-FIX #3).
726
+ */
727
+ async executeToolBatch(calls, token, turnId) {
728
+ const { tools } = this.opts;
729
+ const cap = maxToolConcurrency();
730
+ const batches = partitionToolCalls(calls, (call) => {
731
+ const tool = tools.get(call.name);
732
+ return tool ? isCallConcurrencySafe(tool, call.args) : false;
733
+ });
734
+ const resultByCall = new Map();
735
+ const runOne = async (call) => {
736
+ // Emit `tool:call` here (not in the stream loop) so each call is paired
737
+ // with its `tool:result` from runTool — keeps the bus observably balanced
738
+ // for subscribers (TUI, NDJSON, telemetry, serve replay).
739
+ this.emit({ type: "tool:call", callId: call.callId, name: call.name, args: call.args });
740
+ resultByCall.set(call.callId, await this.runTool(call, token));
741
+ };
742
+ for (const b of batches) {
743
+ if (b.safe && b.calls.length > 1) {
744
+ // Parallel, bounded, drain-all-before-throw (no orphan tool:call).
745
+ await runConcurrent(b.calls.map((c) => () => runOne(c)), cap);
746
+ }
747
+ else {
748
+ // Serial: a single safe call, or an unsafe/write call. Preserve the
749
+ // pre-7.7 per-call cancellation checks (before AND after — a tool that
750
+ // catches cancellation internally and returns ok:false doesn't rethrow).
751
+ for (const c of b.calls) {
752
+ token.throwIfCancelled();
753
+ await runOne(c);
754
+ token.throwIfCancelled();
755
+ }
756
+ }
757
+ }
758
+ // (7.7 §3.5) Bound the aggregate size of one batch's results so a fan-out of
759
+ // parallel reads can't blow the next request's context in a single user
760
+ // message (claude-code MAX_TOOL_RESULTS_PER_MESSAGE_CHARS). Inline
761
+ // truncation of the largest results; disk-spill via the artifact mechanism
762
+ // is a future enhancement.
763
+ applyBatchResultBudget(resultByCall);
764
+ // Reassemble in submission order + aggregate terminate.
765
+ const messages = [];
766
+ let allTerminate = calls.length > 0;
767
+ for (const call of calls) {
768
+ const r = resultByCall.get(call.callId);
769
+ // A call whose runTool threw (cancellation) has no entry; the turn aborts
770
+ // and won't be persisted, so skipping it is correct.
771
+ if (!r) {
772
+ allTerminate = false;
773
+ continue;
774
+ }
775
+ messages.push({
776
+ role: "tool",
777
+ content: [{ type: "tool-result", callId: call.callId, name: call.name, output: r.output, ok: r.ok }],
778
+ });
779
+ if (!r.terminate)
780
+ allTerminate = false;
781
+ }
782
+ // (7.7 §5, OQ-5) Single todo emit point: after a successful `todo` tool call
783
+ // the session phases changed — project the full state to all four surfaces.
784
+ const todoChanged = calls.some((c) => c.name === TODO_TOOL_NAME && resultByCall.get(c.callId)?.ok);
785
+ if (todoChanged) {
786
+ this.emit({ type: "todo", turnId, phases: this.opts.session.getTodoPhases() });
787
+ }
788
+ return { messages, terminate: allTerminate };
789
+ }
479
790
  async runTool(call, token) {
480
791
  const { bus, tools, gate, plugins, workspaceRoot } = this.opts;
481
792
  const tool = tools.get(call.name);