protoagent 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,224 +17,18 @@
17
17
  * and UI-independent.
18
18
  */
19
19
  import { setMaxListeners } from 'node:events';
20
- import { getAllTools, handleToolCall } from './tools/index.js';
20
+ import { getAllTools } from './tools/index.js';
21
21
  import { generateSystemPrompt } from './system-prompt.js';
22
- import { subAgentTool, runSubAgent } from './sub-agent.js';
23
- import { estimateTokens, estimateConversationTokens, createUsageInfo, getContextInfo, } from './utils/cost-tracker.js';
22
+ import { subAgentTool } from './sub-agent.js';
23
+ import { getContextInfo, } from './utils/cost-tracker.js';
24
24
  import { compactIfNeeded } from './utils/compactor.js';
25
25
  import { logger } from './utils/logger.js';
26
+ import { processStream } from './agentic-loop/stream.js';
27
+ import { executeToolCalls } from './agentic-loop/executor.js';
28
+ import { handleApiError } from './agentic-loop/errors.js';
26
29
  function emitAbortAndFinish(onEvent) {
27
30
  onEvent({ type: 'done' });
28
31
  }
29
- async function sleepWithAbort(delayMs, abortSignal) {
30
- if (!abortSignal) {
31
- await new Promise((resolve) => setTimeout(resolve, delayMs));
32
- return;
33
- }
34
- if (abortSignal.aborted) {
35
- throw new Error('Operation aborted');
36
- }
37
- await new Promise((resolve, reject) => {
38
- const timer = setTimeout(() => {
39
- abortSignal.removeEventListener('abort', onAbort);
40
- resolve();
41
- }, delayMs);
42
- const onAbort = () => {
43
- clearTimeout(timer);
44
- abortSignal.removeEventListener('abort', onAbort);
45
- reject(new Error('Operation aborted'));
46
- };
47
- abortSignal.addEventListener('abort', onAbort, { once: true });
48
- });
49
- }
50
- /** @internal exported for unit testing only */
51
- export function appendStreamingFragment(current, fragment) {
52
- if (!fragment)
53
- return current;
54
- if (!current)
55
- return fragment;
56
- // Some providers resend the full accumulated value instead of a delta.
57
- // These two guards handle that case without corrupting normal incremental deltas.
58
- if (current === fragment)
59
- return current;
60
- if (fragment.startsWith(current))
61
- return fragment;
62
- // Normal case: incremental delta, just append.
63
- // The previous partial-overlap loop was removed because it caused false-positive
64
- // deduplication: short JSON tokens (e.g. `", "`) would coincidentally match the
65
- // tail of `current`, silently stripping characters from valid argument payloads.
66
- return current + fragment;
67
- }
68
- function collapseRepeatedString(value) {
69
- if (!value)
70
- return value;
71
- for (let size = 1; size <= Math.floor(value.length / 2); size++) {
72
- if (value.length % size !== 0)
73
- continue;
74
- const candidate = value.slice(0, size);
75
- if (candidate.repeat(value.length / size) === value) {
76
- return candidate;
77
- }
78
- }
79
- return value;
80
- }
81
- function normalizeToolName(name, validToolNames) {
82
- if (!name)
83
- return name;
84
- if (validToolNames.has(name))
85
- return name;
86
- const collapsed = collapseRepeatedString(name);
87
- if (validToolNames.has(collapsed)) {
88
- return collapsed;
89
- }
90
- return name;
91
- }
92
- function extractFirstCompleteJsonValue(value) {
93
- const trimmed = value.trim();
94
- if (!trimmed)
95
- return null;
96
- const opening = trimmed[0];
97
- const closing = opening === '{' ? '}' : opening === '[' ? ']' : null;
98
- if (!closing)
99
- return null;
100
- let depth = 0;
101
- let inString = false;
102
- let escaped = false;
103
- for (let i = 0; i < trimmed.length; i++) {
104
- const char = trimmed[i];
105
- if (inString) {
106
- if (escaped) {
107
- escaped = false;
108
- }
109
- else if (char === '\\') {
110
- escaped = true;
111
- }
112
- else if (char === '"') {
113
- inString = false;
114
- }
115
- continue;
116
- }
117
- if (char === '"') {
118
- inString = true;
119
- continue;
120
- }
121
- if (char === opening)
122
- depth++;
123
- if (char === closing)
124
- depth--;
125
- if (depth === 0) {
126
- return trimmed.slice(0, i + 1);
127
- }
128
- }
129
- return null;
130
- }
131
- /**
132
- * Repair invalid JSON escape sequences in a string value.
133
- *
134
- * JSON only allows: \" \\ \/ \b \f \n \r \t \uXXXX
135
- * Models sometimes emit \| \! \- etc. (e.g. grep regex args) which make
136
- * JSON.parse throw, and Anthropic strict-validates tool_call arguments on
137
- * every subsequent request, bricking the session permanently.
138
- *
139
- * We double the backslash for any \X where X is not a valid JSON escape char.
140
- */
141
- function repairInvalidEscapes(value) {
142
- // Match a backslash followed by any character that is NOT a valid JSON escape
143
- // Valid escapes: " \ / b f n r t u
144
- return value.replace(/\\([^"\\\/bfnrtu])/g, '\\\\$1');
145
- }
146
- function normalizeJsonArguments(argumentsText) {
147
- const trimmed = argumentsText.trim();
148
- if (!trimmed)
149
- return argumentsText;
150
- try {
151
- JSON.parse(trimmed);
152
- return trimmed;
153
- }
154
- catch {
155
- // Fall through to repair heuristics.
156
- }
157
- const collapsed = collapseRepeatedString(trimmed);
158
- if (collapsed !== trimmed) {
159
- try {
160
- JSON.parse(collapsed);
161
- return collapsed;
162
- }
163
- catch {
164
- // Fall through to next heuristic.
165
- }
166
- }
167
- const firstJsonValue = extractFirstCompleteJsonValue(trimmed);
168
- if (firstJsonValue) {
169
- try {
170
- JSON.parse(firstJsonValue);
171
- return firstJsonValue;
172
- }
173
- catch {
174
- // Give up and return the original text below.
175
- }
176
- }
177
- // Heuristic: repair invalid escape sequences (e.g. \| from grep regex args)
178
- const repaired = repairInvalidEscapes(trimmed);
179
- if (repaired !== trimmed) {
180
- try {
181
- JSON.parse(repaired);
182
- return repaired;
183
- }
184
- catch {
185
- // Try repair + first-value extraction together
186
- const repairedFirst = extractFirstCompleteJsonValue(repaired);
187
- if (repairedFirst) {
188
- try {
189
- JSON.parse(repairedFirst);
190
- return repairedFirst;
191
- }
192
- catch { /* give up */ }
193
- }
194
- }
195
- }
196
- return argumentsText;
197
- }
198
- function sanitizeToolCall(toolCall, validToolNames) {
199
- const originalName = toolCall.function?.name || '';
200
- const originalArgs = toolCall.function?.arguments || '';
201
- const normalizedName = normalizeToolName(originalName, validToolNames);
202
- const normalizedArgs = normalizeJsonArguments(originalArgs);
203
- const changed = normalizedName !== originalName || normalizedArgs !== originalArgs;
204
- if (!changed) {
205
- return { toolCall, changed: false };
206
- }
207
- return {
208
- changed: true,
209
- toolCall: {
210
- ...toolCall,
211
- function: {
212
- ...toolCall.function,
213
- name: normalizedName,
214
- arguments: normalizedArgs,
215
- },
216
- },
217
- };
218
- }
219
- function sanitizeMessagesForRetry(messages, validToolNames) {
220
- let changed = false;
221
- const sanitizedMessages = messages.map((message) => {
222
- const msgAny = message;
223
- if (message.role !== 'assistant' || !Array.isArray(msgAny.tool_calls) || msgAny.tool_calls.length === 0) {
224
- return message;
225
- }
226
- const nextToolCalls = msgAny.tool_calls.map((toolCall) => {
227
- const sanitized = sanitizeToolCall(toolCall, validToolNames);
228
- changed = changed || sanitized.changed;
229
- return sanitized.toolCall;
230
- });
231
- return {
232
- ...msgAny,
233
- tool_calls: nextToolCalls,
234
- };
235
- });
236
- return { messages: sanitizedMessages, changed };
237
- }
238
32
  function getValidToolNames() {
239
33
  return new Set([...getAllTools(), subAgentTool]
240
34
  .map((tool) => tool.function?.name)
@@ -244,8 +38,7 @@ function getValidToolNames() {
244
38
  * Process a single user input through the agentic loop.
245
39
  *
246
40
  * Takes the full conversation history (including system message),
247
- * appends the user message, runs the loop, and returns the updated
248
- * message history.
41
+ * runs the loop, and returns the updated message history.
249
42
  *
250
43
  * The `onEvent` callback is called for each event (text deltas,
251
44
  * tool calls, usage info, etc.) so the UI can render progress.
@@ -257,7 +50,7 @@ export async function runAgenticLoop(client, model, messages, userInput, onEvent
257
50
  const sessionId = options.sessionId;
258
51
  const requestDefaults = options.requestDefaults || {};
259
52
  // The same AbortSignal is passed into every OpenAI SDK call and every
260
- // sleepWithAbort() across all loop iterations and sub-agent calls.
53
+ // sleep across all loop iterations and sub-agent calls.
261
54
  // The SDK attaches an 'abort' listener per request, so on a long run
262
55
  // the default limit of 10 listeners is quickly exceeded, producing the
263
56
  // MaxListenersExceededWarning. AbortSignal is a Web API EventTarget,
@@ -277,14 +70,14 @@ export async function runAgenticLoop(client, model, messages, userInput, onEvent
277
70
  updatedMessages[systemMsgIndex] = { role: 'system', content: newSystemPrompt };
278
71
  }
279
72
  let iterationCount = 0;
280
- let repairRetryCount = 0;
281
- let contextRetryCount = 0;
282
- let retriggerCount = 0;
283
- let truncateRetryCount = 0;
284
- let continueRetryCount = 0;
73
+ const retryState = {
74
+ repairCount: 0,
75
+ contextCount: 0,
76
+ truncateCount: 0,
77
+ continueCount: 0,
78
+ retriggerCount: 0,
79
+ };
285
80
  const MAX_RETRIGGERS = 3;
286
- const MAX_TRUNCATE_RETRIES = 5;
287
- const MAX_CONTINUE_RETRIES = 1;
288
81
  const validToolNames = getValidToolNames();
289
82
  while (iterationCount < maxIterations) {
290
83
  // Check if abort was requested
@@ -294,12 +87,16 @@ export async function runAgenticLoop(client, model, messages, userInput, onEvent
294
87
  return updatedMessages;
295
88
  }
296
89
  iterationCount++;
297
- // Check for compaction
90
+ // Check for compaction when we have pricing info (includes context window).
91
+ // Compaction preserves: (1) the system prompt at index 0, (2) any skill_content
92
+ // tool messages, and (3) the 5 most recent messages. Middle messages are
93
+ // summarized into a secondary system message. The length=0 + spread reassigns
94
+ // the array in place with the compacted structure.
298
95
  if (pricing) {
299
96
  const contextInfo = getContextInfo(updatedMessages, pricing);
300
97
  if (contextInfo.needsCompaction) {
301
- const compacted = await compactIfNeeded(client, model, updatedMessages, pricing.contextWindow, contextInfo.currentTokens, requestDefaults, sessionId);
302
- // Replace messages in-place
98
+ const compacted = await compactIfNeeded(client, model, updatedMessages, pricing.contextWindow, requestDefaults, sessionId);
99
+ // Replace messages in-place with compacted version
303
100
  updatedMessages.length = 0;
304
101
  updatedMessages.push(...compacted);
305
102
  }
@@ -314,34 +111,11 @@ export async function runAgenticLoop(client, model, messages, userInput, onEvent
314
111
  toolsCount: allTools.length,
315
112
  messagesCount: updatedMessages.length,
316
113
  });
317
- // Log message structure for debugging provider compatibility
318
- for (const msg of updatedMessages) {
319
- const m = msg;
320
- if (m.role === 'tool') {
321
- logger.trace('Message payload', {
322
- role: m.role,
323
- tool_call_id: m.tool_call_id,
324
- contentLength: m.content?.length,
325
- contentPreview: m.content?.slice(0, 100),
326
- });
327
- }
328
- else if (m.role === 'assistant' && m.tool_calls?.length) {
329
- logger.trace('Message payload', {
330
- role: m.role,
331
- toolCalls: m.tool_calls.map((tc) => ({
332
- id: tc.id,
333
- name: tc.function?.name,
334
- argsLength: tc.function?.arguments?.length,
335
- })),
336
- });
337
- }
338
- else {
339
- logger.trace('Message payload', {
340
- role: m.role,
341
- contentLength: m.content?.length,
342
- });
343
- }
344
- }
114
+ // Debug: log message roles and sizes
115
+ logger.trace('Messages', { msgs: updatedMessages.map((m) => ({
116
+ role: m.role,
117
+ len: m.content?.length || m.tool_calls?.length || 0,
118
+ })) });
345
119
  const stream = await client.chat.completions.create({
346
120
  ...requestDefaults,
347
121
  model,
@@ -353,112 +127,15 @@ export async function runAgenticLoop(client, model, messages, userInput, onEvent
353
127
  }, {
354
128
  signal: abortSignal,
355
129
  });
356
- // Accumulate the streamed response
357
- assistantMessage = {
358
- role: 'assistant',
359
- content: '',
360
- tool_calls: [],
361
- };
362
- let streamedContent = '';
363
- let hasToolCalls = false;
364
- let actualUsage;
365
- for await (const chunk of stream) {
366
- const delta = chunk.choices[0]?.delta;
367
- if (chunk.usage) {
368
- actualUsage = chunk.usage;
369
- }
370
- // Stream text content (and return to UI for immediate display via onEvent)
371
- if (delta?.content) {
372
- streamedContent += delta.content;
373
- assistantMessage.content = streamedContent;
374
- if (!hasToolCalls) {
375
- onEvent({ type: 'text_delta', content: delta.content });
376
- }
377
- }
378
- // Accumulate tool calls across stream chunks
379
- if (delta?.tool_calls) {
380
- hasToolCalls = true;
381
- for (const tc of delta.tool_calls) {
382
- const idx = tc.index || 0;
383
- if (!assistantMessage.tool_calls[idx]) {
384
- assistantMessage.tool_calls[idx] = {
385
- id: '',
386
- type: 'function',
387
- function: { name: '', arguments: '' },
388
- };
389
- }
390
- if (tc.id)
391
- assistantMessage.tool_calls[idx].id = tc.id;
392
- if (tc.function?.name) {
393
- assistantMessage.tool_calls[idx].function.name = appendStreamingFragment(assistantMessage.tool_calls[idx].function.name, tc.function.name);
394
- }
395
- if (tc.function?.arguments) {
396
- assistantMessage.tool_calls[idx].function.arguments = appendStreamingFragment(assistantMessage.tool_calls[idx].function.arguments, tc.function.arguments);
397
- }
398
- // Gemini 3+ models include an `extra_content` field on tool calls
399
- // containing a `thought_signature`. This MUST be preserved and sent
400
- // back in subsequent requests, otherwise Gemini returns a 400.
401
- // See: https://ai.google.dev/gemini-api/docs/openai
402
- // See also: https://gist.github.com/thomasgauvin/3cfe8e907c957fba4e132e6cf0f06292
403
- if (tc.extra_content) {
404
- assistantMessage.tool_calls[idx].extra_content = tc.extra_content;
405
- }
406
- }
407
- }
408
- }
409
- // Log API response with usage info at INFO level
410
- {
411
- const inputTokens = actualUsage?.prompt_tokens ?? estimateConversationTokens(updatedMessages);
412
- const outputTokens = actualUsage?.completion_tokens ?? estimateTokens(assistantMessage.content || '');
413
- const cachedTokens = actualUsage?.prompt_tokens_details?.cached_tokens;
414
- const cost = pricing
415
- ? createUsageInfo(inputTokens, outputTokens, pricing, cachedTokens).estimatedCost
416
- : 0;
417
- const contextPercent = pricing
418
- ? getContextInfo(updatedMessages, pricing).utilizationPercentage
419
- : 0;
420
- logger.info('Received API response', {
421
- model,
422
- inputTokens,
423
- outputTokens,
424
- cachedTokens,
425
- cost: cost > 0 ? `$${cost.toFixed(4)}` : 'N/A',
426
- contextPercent: contextPercent > 0 ? `${contextPercent.toFixed(1)}%` : 'N/A',
427
- hasToolCalls: assistantMessage.tool_calls.length > 0,
428
- contentLength: assistantMessage.content?.length || 0,
429
- });
430
- onEvent({
431
- type: 'usage',
432
- usage: { inputTokens, outputTokens, cost, contextPercent },
433
- });
434
- }
435
- // Log the full assistant message for debugging
436
- logger.debug('Assistant response details', {
437
- contentLength: assistantMessage.content?.length || 0,
438
- contentPreview: assistantMessage.content?.slice(0, 200) || '(empty)',
439
- toolCallsCount: assistantMessage.tool_calls?.length || 0,
440
- toolCalls: assistantMessage.tool_calls?.map((tc) => ({
441
- id: tc.id,
442
- name: tc.function?.name,
443
- argsPreview: tc.function?.arguments?.slice(0, 100),
444
- })),
445
- });
130
+ // Process the streaming response
131
+ const streamResult = await processStream(stream, updatedMessages, model, pricing, onEvent);
132
+ assistantMessage = streamResult.assistantMessage;
446
133
  // Handle tool calls
447
- if (assistantMessage.tool_calls.length > 0) {
134
+ if (streamResult.hasToolCalls) {
448
135
  // Reset retrigger count on valid tool call response
449
- retriggerCount = 0;
136
+ retryState.retriggerCount = 0;
450
137
  // Clean up empty tool_calls entries (from sparse array)
451
138
  assistantMessage.tool_calls = assistantMessage.tool_calls.filter(Boolean);
452
- assistantMessage.tool_calls = assistantMessage.tool_calls.map((toolCall) => {
453
- const sanitized = sanitizeToolCall(toolCall, validToolNames);
454
- if (sanitized.changed) {
455
- logger.warn('Sanitized streamed tool call', {
456
- originalName: toolCall.function?.name,
457
- sanitizedName: sanitized.toolCall.function?.name,
458
- });
459
- }
460
- return sanitized.toolCall;
461
- });
462
139
  // Validate that all tool calls have valid JSON arguments
463
140
  const invalidToolCalls = assistantMessage.tool_calls.filter((tc) => {
464
141
  const args = tc.function?.arguments;
@@ -488,91 +165,19 @@ export async function runAgenticLoop(client, model, messages, userInput, onEvent
488
165
  tools: assistantMessage.tool_calls.map((tc) => tc.function?.name).join(', '),
489
166
  });
490
167
  updatedMessages.push(assistantMessage);
491
- // Track which tool_call_ids still need a tool result message.
492
- // This set is used to inject stub responses on abort, preventing
493
- // orphaned tool_call_ids from permanently bricking the session.
494
- const pendingToolCallIds = new Set(assistantMessage.tool_calls.map((tc) => tc.id));
495
- const injectStubsForPendingToolCalls = () => {
496
- for (const id of pendingToolCallIds) {
497
- updatedMessages.push({
498
- role: 'tool',
499
- tool_call_id: id,
500
- content: 'Aborted by user.',
501
- });
502
- }
168
+ // Execute tool calls
169
+ const toolContext = {
170
+ sessionId,
171
+ abortSignal,
172
+ requestDefaults,
173
+ client,
174
+ model,
175
+ pricing,
503
176
  };
504
- for (const toolCall of assistantMessage.tool_calls) {
505
- // Check abort between tool calls
506
- if (abortSignal?.aborted) {
507
- logger.debug('Agentic loop aborted between tool calls');
508
- injectStubsForPendingToolCalls();
509
- emitAbortAndFinish(onEvent);
510
- return updatedMessages;
511
- }
512
- const { name, arguments: argsStr } = toolCall.function;
513
- onEvent({
514
- type: 'tool_call',
515
- toolCall: { id: toolCall.id, name, args: argsStr, status: 'running' },
516
- });
517
- try {
518
- const args = JSON.parse(argsStr);
519
- let result;
520
- // Handle sub-agent tool specially
521
- if (name === 'sub_agent') {
522
- const subProgress = (evt) => {
523
- onEvent({
524
- type: 'sub_agent_iteration',
525
- subAgentTool: { tool: evt.tool, status: evt.status, iteration: evt.iteration, args: evt.args },
526
- });
527
- };
528
- const subResult = await runSubAgent(client, model, args.task, args.max_iterations, requestDefaults, subProgress, abortSignal, pricing);
529
- result = subResult.response;
530
- // Emit sub-agent usage for the UI to add to total cost
531
- if (subResult.usage.inputTokens > 0 || subResult.usage.outputTokens > 0) {
532
- onEvent({
533
- type: 'sub_agent_iteration',
534
- subAgentUsage: subResult.usage,
535
- });
536
- }
537
- }
538
- else {
539
- result = await handleToolCall(name, args, { sessionId, abortSignal });
540
- }
541
- logger.info('Tool completed', {
542
- tool: name,
543
- resultLength: result.length,
544
- });
545
- updatedMessages.push({
546
- role: 'tool',
547
- tool_call_id: toolCall.id,
548
- content: result,
549
- });
550
- pendingToolCallIds.delete(toolCall.id);
551
- onEvent({
552
- type: 'tool_result',
553
- toolCall: { id: toolCall.id, name, args: argsStr, status: 'done', result },
554
- });
555
- }
556
- catch (err) {
557
- const errMsg = err instanceof Error ? err.message : String(err);
558
- updatedMessages.push({
559
- role: 'tool',
560
- tool_call_id: toolCall.id,
561
- content: `Error: ${errMsg}`,
562
- });
563
- pendingToolCallIds.delete(toolCall.id);
564
- // If the tool was aborted, inject stubs for remaining pending calls and stop
565
- if (abortSignal?.aborted || (err instanceof Error && (err.name === 'AbortError' || err.message === 'Operation aborted'))) {
566
- logger.debug('Agentic loop aborted during tool execution');
567
- injectStubsForPendingToolCalls();
568
- emitAbortAndFinish(onEvent);
569
- return updatedMessages;
570
- }
571
- onEvent({
572
- type: 'tool_result',
573
- toolCall: { id: toolCall.id, name, args: argsStr, status: 'error', result: errMsg },
574
- });
575
- }
177
+ const executionResult = await executeToolCalls(assistantMessage.tool_calls, updatedMessages, onEvent, toolContext);
178
+ if (executionResult.shouldAbort) {
179
+ emitAbortAndFinish(onEvent);
180
+ return updatedMessages;
576
181
  }
577
182
  // Signal UI that this iteration's tool calls are all done,
578
183
  // so it can flush completed messages to static output.
@@ -587,20 +192,20 @@ export async function runAgenticLoop(client, model, messages, userInput, onEvent
587
192
  content: assistantMessage.content,
588
193
  });
589
194
  // Reset retrigger count on valid content response
590
- retriggerCount = 0;
195
+ retryState.retriggerCount = 0;
591
196
  }
592
197
  // Check if we need to retrigger: if the last message is a tool result
593
198
  // but we got no assistant response (empty content, no tool_calls), the AI
594
199
  // may have stopped prematurely. Inject a 'continue' prompt and retry.
595
200
  const lastMessage = updatedMessages[updatedMessages.length - 1];
596
- if (lastMessage?.role === 'tool' && retriggerCount < MAX_RETRIGGERS) {
597
- retriggerCount++;
201
+ if (lastMessage?.role === 'tool' && retryState.retriggerCount < MAX_RETRIGGERS) {
202
+ retryState.retriggerCount++;
598
203
  logger.warn('AI stopped after tool call without responding; retriggering', {
599
- retriggerCount,
204
+ retriggerCount: retryState.retriggerCount,
600
205
  maxRetriggers: MAX_RETRIGGERS,
601
206
  lastMessageRole: lastMessage.role,
602
207
  assistantContent: assistantMessage.content || '(empty)',
603
- hasToolCalls: assistantMessage.tool_calls.length > 0,
208
+ hasToolCalls: assistantMessage.tool_calls?.length > 0,
604
209
  });
605
210
  // Inject a 'continue' prompt to help the AI continue
606
211
  updatedMessages.push({
@@ -609,8 +214,9 @@ export async function runAgenticLoop(client, model, messages, userInput, onEvent
609
214
  });
610
215
  continue;
611
216
  }
612
- repairRetryCount = 0;
613
- retriggerCount = 0;
217
+ // Reset retry counts on successful completion
218
+ retryState.repairCount = 0;
219
+ retryState.retriggerCount = 0;
614
220
  onEvent({ type: 'done' });
615
221
  return updatedMessages;
616
222
  }
@@ -650,161 +256,29 @@ export async function runAgenticLoop(client, model, messages, userInput, onEvent
650
256
  emitAbortAndFinish(onEvent);
651
257
  return updatedMessages;
652
258
  }
653
- const errMsg = apiError?.message || 'Unknown API error';
654
- // Try to extract response body for more details
655
- let responseBody;
656
- try {
657
- if (apiError?.response) {
658
- responseBody = JSON.stringify(apiError.response);
659
- }
660
- else if (apiError?.error) {
661
- responseBody = JSON.stringify(apiError.error);
662
- }
663
- }
664
- catch { /* ignore */ }
665
- logger.error(`API error: ${errMsg}`, {
666
- status: apiError?.status,
667
- code: apiError?.code,
668
- responseBody,
669
- headers: apiError?.headers ? Object.fromEntries(Object.entries(apiError.headers).filter(([k]) => ['content-type', 'x-error', 'retry-after'].includes(k.toLowerCase()))) : undefined,
670
- });
671
- // Log the last few messages to help debug format issues
672
- logger.debug('Messages at time of error', {
673
- lastMessages: updatedMessages.slice(-3).map((m) => ({
674
- role: m.role,
675
- hasToolCalls: !!(m.tool_calls?.length),
676
- tool_call_id: m.tool_call_id,
677
- contentPreview: m.content?.slice(0, 150),
678
- })),
679
- });
680
- const retryableStatus = apiError?.status === 408 || apiError?.status === 409 || apiError?.status === 425;
681
- const retryableCode = ['ECONNRESET', 'ECONNABORTED', 'ETIMEDOUT', 'ENETUNREACH', 'EAI_AGAIN'].includes(apiError?.code);
682
- // Handle 400 errors: try sanitization first, then truncate messages
683
- if (apiError?.status === 400) {
684
- // Try sanitization first
685
- if (repairRetryCount < 2) {
686
- const sanitized = sanitizeMessagesForRetry(updatedMessages, getValidToolNames());
687
- if (sanitized.changed) {
688
- repairRetryCount++;
689
- updatedMessages.length = 0;
690
- updatedMessages.push(...sanitized.messages);
691
- logger.warn('400 response after malformed tool payload; retrying with sanitized messages', {
692
- repairRetryCount,
693
- });
694
- // Silently retry without showing error to user
695
- continue;
696
- }
697
- }
698
- // If sanitization didn't help, try removing messages one at a time (up to 5)
699
- if (truncateRetryCount < MAX_TRUNCATE_RETRIES) {
700
- truncateRetryCount++;
701
- const removedCount = Math.min(1, Math.max(0, updatedMessages.length - 2)); // Remove 1 at a time, keep system + at least 1 user
702
- if (removedCount > 0) {
703
- const removed = updatedMessages.splice(-removedCount);
704
- logger.debug('400 error: removing message from history to attempt fix', {
705
- truncateRetryCount,
706
- maxRetries: MAX_TRUNCATE_RETRIES,
707
- removedCount,
708
- removedRoles: removed.map((m) => m.role),
709
- removedPreviews: removed.map((m) => ({
710
- role: m.role,
711
- content: m.content?.slice(0, 100),
712
- tool_calls: m.tool_calls?.map((tc) => tc.function?.name),
713
- })),
714
- });
715
- // Silently retry without showing error to user
716
- continue;
717
- }
718
- }
719
- // After truncation retries exhausted, try adding a "continue" message
720
- if (continueRetryCount < MAX_CONTINUE_RETRIES) {
721
- continueRetryCount++;
722
- updatedMessages.push({ role: 'user', content: 'continue' });
723
- logger.warn('400 error: adding "continue" message to retry', {
724
- continueRetryCount,
725
- messageCount: updatedMessages.length,
726
- });
727
- onEvent({
728
- type: 'error',
729
- error: 'Request failed. Retrying with "continue"...',
730
- transient: true,
731
- });
732
- continue;
733
- }
259
+ // Handle API errors with retry strategies
260
+ const errorResult = await handleApiError(apiError, updatedMessages, validToolNames, pricing, retryState, iterationCount, onEvent, client, model, requestDefaults, sessionId);
261
+ if (errorResult.shouldAbort) {
262
+ emitAbortAndFinish(onEvent);
263
+ return updatedMessages;
734
264
  }
735
- // Handle context-window-exceeded (prompt too long) — attempt forced compaction
736
- // This fires when our token estimate was too low (e.g. base64 images from MCP tools)
737
- // and the request actually hit the hard provider limit.
738
- const isContextTooLong = apiError?.status === 400 &&
739
- typeof errMsg === 'string' &&
740
- /prompt.{0,30}too long|context.{0,30}length|maximum.{0,30}token|tokens?.{0,10}exceed/i.test(errMsg);
741
- if (isContextTooLong && contextRetryCount < 2) {
742
- contextRetryCount++;
743
- logger.warn(`Prompt too long (attempt ${contextRetryCount}); forcing compaction`, { errMsg });
265
+ if (!errorResult.handled) {
266
+ // Non-retryable error
744
267
  onEvent({
745
268
  type: 'error',
746
- error: 'Prompt too long. Compacting conversation and retrying...',
747
- transient: true,
269
+ error: errorResult.errorMessage || 'Unknown error',
270
+ transient: errorResult.transient,
748
271
  });
749
- if (pricing) {
750
- // Use the normal LLM-based compaction path
751
- try {
752
- const compacted = await compactIfNeeded(client, model, updatedMessages, pricing.contextWindow,
753
- // Pass the context window itself as currentTokens to force compaction
754
- pricing.contextWindow, requestDefaults, sessionId);
755
- updatedMessages.length = 0;
756
- updatedMessages.push(...compacted);
757
- }
758
- catch (compactErr) {
759
- logger.error(`Forced compaction failed: ${compactErr}`);
760
- // Fall through to truncation fallback below
761
- }
762
- }
763
- // Fallback: truncate any tool result messages whose content looks like
764
- // base64 or is extremely large (e.g. MCP screenshot data)
765
- const MAX_TOOL_RESULT_CHARS = 20_000;
766
- for (let i = 0; i < updatedMessages.length; i++) {
767
- const m = updatedMessages[i];
768
- if (m.role === 'tool' && typeof m.content === 'string' && m.content.length > MAX_TOOL_RESULT_CHARS) {
769
- updatedMessages[i] = {
770
- ...m,
771
- content: m.content.slice(0, MAX_TOOL_RESULT_CHARS) + '\n... (truncated — content was too large)',
772
- };
773
- }
774
- }
775
- continue;
776
- }
777
- // Retry on 429 (rate limit) with backoff
778
- if (apiError?.status === 429) {
779
- const retryAfter = parseInt(apiError?.headers?.['retry-after'] || '5', 10);
780
- const backoff = Math.min(retryAfter * 1000, 60_000);
781
- logger.info(`Rate limited, retrying in ${backoff / 1000}s...`);
782
- onEvent({ type: 'error', error: `Rate limited. Retrying in ${backoff / 1000}s...`, transient: true });
783
- await sleepWithAbort(backoff, abortSignal);
784
- continue;
785
- }
786
- // Retry on transient request failures
787
- if (apiError?.status >= 500 || retryableStatus || retryableCode) {
788
- const backoff = Math.min(2 ** iterationCount * 1000, 30_000);
789
- logger.info(`Request failed, retrying in ${backoff / 1000}s...`);
790
- onEvent({ type: 'error', error: `Request failed. Retrying in ${backoff / 1000}s...`, transient: true });
791
- await sleepWithAbort(backoff, abortSignal);
792
- continue;
272
+ onEvent({ type: 'done' });
273
+ return updatedMessages;
793
274
  }
794
- // 400 error that couldn't be fixed by sanitization or truncation
795
- if (apiError?.status === 400) {
796
- onEvent({
797
- type: 'error',
798
- error: `Request failed: ${errMsg}\n\nThe conversation history could not be automatically repaired. Try /clear to start fresh.`,
799
- transient: false,
800
- });
275
+ // If handled but not silently, the error was already emitted
276
+ if (!errorResult.silentRetry) {
801
277
  onEvent({ type: 'done' });
802
278
  return updatedMessages;
803
279
  }
804
- // Non-retryable error
805
- onEvent({ type: 'error', error: errMsg });
806
- onEvent({ type: 'done' });
807
- return updatedMessages;
280
+ // Silent retry - continue the loop
281
+ continue;
808
282
  }
809
283
  }
810
284
  onEvent({ type: 'error', error: 'Maximum iteration limit reached.' });