protoagent 0.1.14 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,259 +17,18 @@
17
17
  * and UI-independent.
18
18
  */
19
19
  import { setMaxListeners } from 'node:events';
20
- import { getAllTools, handleToolCall } from './tools/index.js';
20
+ import { getAllTools } from './tools/index.js';
21
21
  import { generateSystemPrompt } from './system-prompt.js';
22
- import { subAgentTool, runSubAgent } from './sub-agent.js';
23
- import { estimateTokens, estimateConversationTokens, createUsageInfo, getContextInfo, } from './utils/cost-tracker.js';
22
+ import { subAgentTool } from './sub-agent.js';
23
+ import { getContextInfo, } from './utils/cost-tracker.js';
24
24
  import { compactIfNeeded } from './utils/compactor.js';
25
25
  import { logger } from './utils/logger.js';
26
+ import { processStream } from './agentic-loop/stream.js';
27
+ import { executeToolCalls } from './agentic-loop/executor.js';
28
+ import { handleApiError } from './agentic-loop/errors.js';
26
29
  function emitAbortAndFinish(onEvent) {
27
30
  onEvent({ type: 'done' });
28
31
  }
29
- async function sleepWithAbort(delayMs, abortSignal) {
30
- if (!abortSignal) {
31
- await new Promise((resolve) => setTimeout(resolve, delayMs));
32
- return;
33
- }
34
- if (abortSignal.aborted) {
35
- throw new Error('Operation aborted');
36
- }
37
- await new Promise((resolve, reject) => {
38
- const timer = setTimeout(() => {
39
- abortSignal.removeEventListener('abort', onAbort);
40
- resolve();
41
- }, delayMs);
42
- const onAbort = () => {
43
- clearTimeout(timer);
44
- abortSignal.removeEventListener('abort', onAbort);
45
- reject(new Error('Operation aborted'));
46
- };
47
- abortSignal.addEventListener('abort', onAbort, { once: true });
48
- });
49
- }
50
- /** @internal exported for unit testing only */
51
- export function appendStreamingFragment(current, fragment) {
52
- if (!fragment)
53
- return current;
54
- if (!current)
55
- return fragment;
56
- // Some providers resend the full accumulated value instead of a delta.
57
- // These two guards handle that case without corrupting normal incremental deltas.
58
- if (current === fragment)
59
- return current;
60
- if (fragment.startsWith(current))
61
- return fragment;
62
- // Normal case: incremental delta, just append.
63
- // The previous partial-overlap loop was removed because it caused false-positive
64
- // deduplication: short JSON tokens (e.g. `", "`) would coincidentally match the
65
- // tail of `current`, silently stripping characters from valid argument payloads.
66
- return current + fragment;
67
- }
68
- function collapseRepeatedString(value) {
69
- if (!value)
70
- return value;
71
- for (let size = 1; size <= Math.floor(value.length / 2); size++) {
72
- if (value.length % size !== 0)
73
- continue;
74
- const candidate = value.slice(0, size);
75
- if (candidate.repeat(value.length / size) === value) {
76
- return candidate;
77
- }
78
- }
79
- return value;
80
- }
81
- function normalizeToolName(name, validToolNames) {
82
- if (!name)
83
- return name;
84
- if (validToolNames.has(name))
85
- return name;
86
- const collapsed = collapseRepeatedString(name);
87
- if (validToolNames.has(collapsed)) {
88
- return collapsed;
89
- }
90
- return name;
91
- }
92
- function extractFirstCompleteJsonValue(value) {
93
- const trimmed = value.trim();
94
- if (!trimmed)
95
- return null;
96
- const opening = trimmed[0];
97
- const closing = opening === '{' ? '}' : opening === '[' ? ']' : null;
98
- if (!closing)
99
- return null;
100
- let depth = 0;
101
- let inString = false;
102
- let escaped = false;
103
- for (let i = 0; i < trimmed.length; i++) {
104
- const char = trimmed[i];
105
- if (inString) {
106
- if (escaped) {
107
- escaped = false;
108
- }
109
- else if (char === '\\') {
110
- escaped = true;
111
- }
112
- else if (char === '"') {
113
- inString = false;
114
- }
115
- continue;
116
- }
117
- if (char === '"') {
118
- inString = true;
119
- continue;
120
- }
121
- if (char === opening)
122
- depth++;
123
- if (char === closing)
124
- depth--;
125
- if (depth === 0) {
126
- return trimmed.slice(0, i + 1);
127
- }
128
- }
129
- return null;
130
- }
131
- /**
132
- * Repair invalid JSON escape sequences in a string value.
133
- *
134
- * JSON only allows: \" \\ \/ \b \f \n \r \t \uXXXX
135
- * Models sometimes emit \| \! \- etc. (e.g. grep regex args) which make
136
- * JSON.parse throw, and Anthropic strict-validates tool_call arguments on
137
- * every subsequent request, bricking the session permanently.
138
- *
139
- * We double the backslash for any \X where X is not a valid JSON escape char.
140
- */
141
- function repairInvalidEscapes(value) {
142
- // Match a backslash followed by any character that is NOT a valid JSON escape
143
- // Valid escapes: " \ / b f n r t u
144
- return value.replace(/\\([^"\\\/bfnrtu])/g, '\\\\$1');
145
- }
146
- function normalizeJsonArguments(argumentsText) {
147
- const trimmed = argumentsText.trim();
148
- if (!trimmed)
149
- return argumentsText;
150
- try {
151
- JSON.parse(trimmed);
152
- return trimmed;
153
- }
154
- catch {
155
- // Fall through to repair heuristics.
156
- }
157
- const collapsed = collapseRepeatedString(trimmed);
158
- if (collapsed !== trimmed) {
159
- try {
160
- JSON.parse(collapsed);
161
- return collapsed;
162
- }
163
- catch {
164
- // Fall through to next heuristic.
165
- }
166
- }
167
- const firstJsonValue = extractFirstCompleteJsonValue(trimmed);
168
- if (firstJsonValue) {
169
- try {
170
- JSON.parse(firstJsonValue);
171
- return firstJsonValue;
172
- }
173
- catch {
174
- // Give up and return the original text below.
175
- }
176
- }
177
- // Heuristic: repair invalid escape sequences (e.g. \| from grep regex args)
178
- const repaired = repairInvalidEscapes(trimmed);
179
- if (repaired !== trimmed) {
180
- try {
181
- JSON.parse(repaired);
182
- return repaired;
183
- }
184
- catch {
185
- // Try repair + first-value extraction together
186
- const repairedFirst = extractFirstCompleteJsonValue(repaired);
187
- if (repairedFirst) {
188
- try {
189
- JSON.parse(repairedFirst);
190
- return repairedFirst;
191
- }
192
- catch { /* give up */ }
193
- }
194
- }
195
- }
196
- return argumentsText;
197
- }
198
- function sanitizeToolCall(toolCall, validToolNames) {
199
- const originalName = toolCall.function?.name || '';
200
- const originalArgs = toolCall.function?.arguments || '';
201
- const normalizedName = normalizeToolName(originalName, validToolNames);
202
- const normalizedArgs = normalizeJsonArguments(originalArgs);
203
- const changed = normalizedName !== originalName || normalizedArgs !== originalArgs;
204
- if (!changed) {
205
- return { toolCall, changed: false };
206
- }
207
- return {
208
- changed: true,
209
- toolCall: {
210
- ...toolCall,
211
- function: {
212
- ...toolCall.function,
213
- name: normalizedName,
214
- arguments: normalizedArgs,
215
- },
216
- },
217
- };
218
- }
219
- function sanitizeMessagesForRetry(messages, validToolNames) {
220
- let changed = false;
221
- const sanitizedMessages = messages.map((message) => {
222
- const msgAny = message;
223
- if (message.role !== 'assistant' || !Array.isArray(msgAny.tool_calls) || msgAny.tool_calls.length === 0) {
224
- return message;
225
- }
226
- const nextToolCalls = msgAny.tool_calls.map((toolCall) => {
227
- const sanitized = sanitizeToolCall(toolCall, validToolNames);
228
- changed = changed || sanitized.changed;
229
- return sanitized.toolCall;
230
- });
231
- return {
232
- ...msgAny,
233
- tool_calls: nextToolCalls,
234
- };
235
- });
236
- return { messages: sanitizedMessages, changed };
237
- }
238
- /**
239
- * Remove orphaned tool result messages that don't have a matching tool_call_id
240
- * in any assistant message. This happens when messages are truncated and the
241
- * assistant's tool_calls are removed but the tool results remain.
242
- */
243
- function removeOrphanedToolResults(messages) {
244
- // Collect all valid tool_call_ids from assistant messages
245
- const validToolCallIds = new Set();
246
- for (const message of messages) {
247
- const msgAny = message;
248
- if (message.role === 'assistant' && Array.isArray(msgAny.tool_calls)) {
249
- for (const tc of msgAny.tool_calls) {
250
- if (tc.id) {
251
- validToolCallIds.add(tc.id);
252
- }
253
- }
254
- }
255
- }
256
- // Filter out tool messages with orphaned tool_call_ids
257
- const filteredMessages = messages.filter((message) => {
258
- const msgAny = message;
259
- if (message.role === 'tool' && msgAny.tool_call_id) {
260
- const isOrphaned = !validToolCallIds.has(msgAny.tool_call_id);
261
- if (isOrphaned) {
262
- logger.warn('Removing orphaned tool result', {
263
- tool_call_id: msgAny.tool_call_id,
264
- contentPreview: msgAny.content?.slice(0, 100),
265
- });
266
- }
267
- return !isOrphaned;
268
- }
269
- return true;
270
- });
271
- return { messages: filteredMessages, changed: filteredMessages.length !== messages.length };
272
- }
273
32
  function getValidToolNames() {
274
33
  return new Set([...getAllTools(), subAgentTool]
275
34
  .map((tool) => tool.function?.name)
@@ -279,8 +38,7 @@ function getValidToolNames() {
279
38
  * Process a single user input through the agentic loop.
280
39
  *
281
40
  * Takes the full conversation history (including system message),
282
- * appends the user message, runs the loop, and returns the updated
283
- * message history.
41
+ * runs the loop, and returns the updated message history.
284
42
  *
285
43
  * The `onEvent` callback is called for each event (text deltas,
286
44
  * tool calls, usage info, etc.) so the UI can render progress.
@@ -292,7 +50,7 @@ export async function runAgenticLoop(client, model, messages, userInput, onEvent
292
50
  const sessionId = options.sessionId;
293
51
  const requestDefaults = options.requestDefaults || {};
294
52
  // The same AbortSignal is passed into every OpenAI SDK call and every
295
- // sleepWithAbort() across all loop iterations and sub-agent calls.
53
+ // sleep across all loop iterations and sub-agent calls.
296
54
  // The SDK attaches an 'abort' listener per request, so on a long run
297
55
  // the default limit of 10 listeners is quickly exceeded, producing the
298
56
  // MaxListenersExceededWarning. AbortSignal is a Web API EventTarget,
@@ -312,14 +70,14 @@ export async function runAgenticLoop(client, model, messages, userInput, onEvent
312
70
  updatedMessages[systemMsgIndex] = { role: 'system', content: newSystemPrompt };
313
71
  }
314
72
  let iterationCount = 0;
315
- let repairRetryCount = 0;
316
- let contextRetryCount = 0;
317
- let retriggerCount = 0;
318
- let truncateRetryCount = 0;
319
- let continueRetryCount = 0;
73
+ const retryState = {
74
+ repairCount: 0,
75
+ contextCount: 0,
76
+ truncateCount: 0,
77
+ continueCount: 0,
78
+ retriggerCount: 0,
79
+ };
320
80
  const MAX_RETRIGGERS = 3;
321
- const MAX_TRUNCATE_RETRIES = 5;
322
- const MAX_CONTINUE_RETRIES = 1;
323
81
  const validToolNames = getValidToolNames();
324
82
  while (iterationCount < maxIterations) {
325
83
  // Check if abort was requested
@@ -329,12 +87,16 @@ export async function runAgenticLoop(client, model, messages, userInput, onEvent
329
87
  return updatedMessages;
330
88
  }
331
89
  iterationCount++;
332
- // Check for compaction
90
+ // Check for compaction when we have pricing info (includes context window).
91
+ // Compaction preserves: (1) the system prompt at index 0, (2) any skill_content
92
+ // tool messages, and (3) the 5 most recent messages. Middle messages are
93
+ // summarized into a secondary system message. The length=0 + spread reassigns
94
+ // the array in place with the compacted structure.
333
95
  if (pricing) {
334
96
  const contextInfo = getContextInfo(updatedMessages, pricing);
335
97
  if (contextInfo.needsCompaction) {
336
- const compacted = await compactIfNeeded(client, model, updatedMessages, pricing.contextWindow, contextInfo.currentTokens, requestDefaults, sessionId);
337
- // Replace messages in-place
98
+ const compacted = await compactIfNeeded(client, model, updatedMessages, pricing.contextWindow, requestDefaults, sessionId);
99
+ // Replace messages in-place with compacted version
338
100
  updatedMessages.length = 0;
339
101
  updatedMessages.push(...compacted);
340
102
  }
@@ -349,34 +111,11 @@ export async function runAgenticLoop(client, model, messages, userInput, onEvent
349
111
  toolsCount: allTools.length,
350
112
  messagesCount: updatedMessages.length,
351
113
  });
352
- // Log message structure for debugging provider compatibility
353
- for (const msg of updatedMessages) {
354
- const m = msg;
355
- if (m.role === 'tool') {
356
- logger.trace('Message payload', {
357
- role: m.role,
358
- tool_call_id: m.tool_call_id,
359
- contentLength: m.content?.length,
360
- contentPreview: m.content?.slice(0, 100),
361
- });
362
- }
363
- else if (m.role === 'assistant' && m.tool_calls?.length) {
364
- logger.trace('Message payload', {
365
- role: m.role,
366
- toolCalls: m.tool_calls.map((tc) => ({
367
- id: tc.id,
368
- name: tc.function?.name,
369
- argsLength: tc.function?.arguments?.length,
370
- })),
371
- });
372
- }
373
- else {
374
- logger.trace('Message payload', {
375
- role: m.role,
376
- contentLength: m.content?.length,
377
- });
378
- }
379
- }
114
+ // Debug: log message roles and sizes
115
+ logger.trace('Messages', { msgs: updatedMessages.map((m) => ({
116
+ role: m.role,
117
+ len: m.content?.length || m.tool_calls?.length || 0,
118
+ })) });
380
119
  const stream = await client.chat.completions.create({
381
120
  ...requestDefaults,
382
121
  model,
@@ -388,112 +127,15 @@ export async function runAgenticLoop(client, model, messages, userInput, onEvent
388
127
  }, {
389
128
  signal: abortSignal,
390
129
  });
391
- // Accumulate the streamed response
392
- assistantMessage = {
393
- role: 'assistant',
394
- content: '',
395
- tool_calls: [],
396
- };
397
- let streamedContent = '';
398
- let hasToolCalls = false;
399
- let actualUsage;
400
- for await (const chunk of stream) {
401
- const delta = chunk.choices[0]?.delta;
402
- if (chunk.usage) {
403
- actualUsage = chunk.usage;
404
- }
405
- // Stream text content (and return to UI for immediate display via onEvent)
406
- if (delta?.content) {
407
- streamedContent += delta.content;
408
- assistantMessage.content = streamedContent;
409
- if (!hasToolCalls) {
410
- onEvent({ type: 'text_delta', content: delta.content });
411
- }
412
- }
413
- // Accumulate tool calls across stream chunks
414
- if (delta?.tool_calls) {
415
- hasToolCalls = true;
416
- for (const tc of delta.tool_calls) {
417
- const idx = tc.index || 0;
418
- if (!assistantMessage.tool_calls[idx]) {
419
- assistantMessage.tool_calls[idx] = {
420
- id: '',
421
- type: 'function',
422
- function: { name: '', arguments: '' },
423
- };
424
- }
425
- if (tc.id)
426
- assistantMessage.tool_calls[idx].id = tc.id;
427
- if (tc.function?.name) {
428
- assistantMessage.tool_calls[idx].function.name = appendStreamingFragment(assistantMessage.tool_calls[idx].function.name, tc.function.name);
429
- }
430
- if (tc.function?.arguments) {
431
- assistantMessage.tool_calls[idx].function.arguments = appendStreamingFragment(assistantMessage.tool_calls[idx].function.arguments, tc.function.arguments);
432
- }
433
- // Gemini 3+ models include an `extra_content` field on tool calls
434
- // containing a `thought_signature`. This MUST be preserved and sent
435
- // back in subsequent requests, otherwise Gemini returns a 400.
436
- // See: https://ai.google.dev/gemini-api/docs/openai
437
- // See also: https://gist.github.com/thomasgauvin/3cfe8e907c957fba4e132e6cf0f06292
438
- if (tc.extra_content) {
439
- assistantMessage.tool_calls[idx].extra_content = tc.extra_content;
440
- }
441
- }
442
- }
443
- }
444
- // Log API response with usage info at INFO level
445
- {
446
- const inputTokens = actualUsage?.prompt_tokens ?? estimateConversationTokens(updatedMessages);
447
- const outputTokens = actualUsage?.completion_tokens ?? estimateTokens(assistantMessage.content || '');
448
- const cachedTokens = actualUsage?.prompt_tokens_details?.cached_tokens;
449
- const cost = pricing
450
- ? createUsageInfo(inputTokens, outputTokens, pricing, cachedTokens).estimatedCost
451
- : 0;
452
- const contextPercent = pricing
453
- ? getContextInfo(updatedMessages, pricing).utilizationPercentage
454
- : 0;
455
- logger.info('Received API response', {
456
- model,
457
- inputTokens,
458
- outputTokens,
459
- cachedTokens,
460
- cost: cost > 0 ? `$${cost.toFixed(4)}` : 'N/A',
461
- contextPercent: contextPercent > 0 ? `${contextPercent.toFixed(1)}%` : 'N/A',
462
- hasToolCalls: assistantMessage.tool_calls.length > 0,
463
- contentLength: assistantMessage.content?.length || 0,
464
- });
465
- onEvent({
466
- type: 'usage',
467
- usage: { inputTokens, outputTokens, cost, contextPercent },
468
- });
469
- }
470
- // Log the full assistant message for debugging
471
- logger.debug('Assistant response details', {
472
- contentLength: assistantMessage.content?.length || 0,
473
- contentPreview: assistantMessage.content?.slice(0, 200) || '(empty)',
474
- toolCallsCount: assistantMessage.tool_calls?.length || 0,
475
- toolCalls: assistantMessage.tool_calls?.map((tc) => ({
476
- id: tc.id,
477
- name: tc.function?.name,
478
- argsPreview: tc.function?.arguments?.slice(0, 100),
479
- })),
480
- });
130
+ // Process the streaming response
131
+ const streamResult = await processStream(stream, updatedMessages, model, pricing, onEvent);
132
+ assistantMessage = streamResult.assistantMessage;
481
133
  // Handle tool calls
482
- if (assistantMessage.tool_calls.length > 0) {
134
+ if (streamResult.hasToolCalls) {
483
135
  // Reset retrigger count on valid tool call response
484
- retriggerCount = 0;
136
+ retryState.retriggerCount = 0;
485
137
  // Clean up empty tool_calls entries (from sparse array)
486
138
  assistantMessage.tool_calls = assistantMessage.tool_calls.filter(Boolean);
487
- assistantMessage.tool_calls = assistantMessage.tool_calls.map((toolCall) => {
488
- const sanitized = sanitizeToolCall(toolCall, validToolNames);
489
- if (sanitized.changed) {
490
- logger.warn('Sanitized streamed tool call', {
491
- originalName: toolCall.function?.name,
492
- sanitizedName: sanitized.toolCall.function?.name,
493
- });
494
- }
495
- return sanitized.toolCall;
496
- });
497
139
  // Validate that all tool calls have valid JSON arguments
498
140
  const invalidToolCalls = assistantMessage.tool_calls.filter((tc) => {
499
141
  const args = tc.function?.arguments;
@@ -523,91 +165,19 @@ export async function runAgenticLoop(client, model, messages, userInput, onEvent
523
165
  tools: assistantMessage.tool_calls.map((tc) => tc.function?.name).join(', '),
524
166
  });
525
167
  updatedMessages.push(assistantMessage);
526
- // Track which tool_call_ids still need a tool result message.
527
- // This set is used to inject stub responses on abort, preventing
528
- // orphaned tool_call_ids from permanently bricking the session.
529
- const pendingToolCallIds = new Set(assistantMessage.tool_calls.map((tc) => tc.id));
530
- const injectStubsForPendingToolCalls = () => {
531
- for (const id of pendingToolCallIds) {
532
- updatedMessages.push({
533
- role: 'tool',
534
- tool_call_id: id,
535
- content: 'Aborted by user.',
536
- });
537
- }
168
+ // Execute tool calls
169
+ const toolContext = {
170
+ sessionId,
171
+ abortSignal,
172
+ requestDefaults,
173
+ client,
174
+ model,
175
+ pricing,
538
176
  };
539
- for (const toolCall of assistantMessage.tool_calls) {
540
- // Check abort between tool calls
541
- if (abortSignal?.aborted) {
542
- logger.debug('Agentic loop aborted between tool calls');
543
- injectStubsForPendingToolCalls();
544
- emitAbortAndFinish(onEvent);
545
- return updatedMessages;
546
- }
547
- const { name, arguments: argsStr } = toolCall.function;
548
- onEvent({
549
- type: 'tool_call',
550
- toolCall: { id: toolCall.id, name, args: argsStr, status: 'running' },
551
- });
552
- try {
553
- const args = JSON.parse(argsStr);
554
- let result;
555
- // Handle sub-agent tool specially
556
- if (name === 'sub_agent') {
557
- const subProgress = (evt) => {
558
- onEvent({
559
- type: 'sub_agent_iteration',
560
- subAgentTool: { tool: evt.tool, status: evt.status, iteration: evt.iteration, args: evt.args },
561
- });
562
- };
563
- const subResult = await runSubAgent(client, model, args.task, args.max_iterations, requestDefaults, subProgress, abortSignal, pricing);
564
- result = subResult.response;
565
- // Emit sub-agent usage for the UI to add to total cost
566
- if (subResult.usage.inputTokens > 0 || subResult.usage.outputTokens > 0) {
567
- onEvent({
568
- type: 'sub_agent_iteration',
569
- subAgentUsage: subResult.usage,
570
- });
571
- }
572
- }
573
- else {
574
- result = await handleToolCall(name, args, { sessionId, abortSignal });
575
- }
576
- logger.info('Tool completed', {
577
- tool: name,
578
- resultLength: result.length,
579
- });
580
- updatedMessages.push({
581
- role: 'tool',
582
- tool_call_id: toolCall.id,
583
- content: result,
584
- });
585
- pendingToolCallIds.delete(toolCall.id);
586
- onEvent({
587
- type: 'tool_result',
588
- toolCall: { id: toolCall.id, name, args: argsStr, status: 'done', result },
589
- });
590
- }
591
- catch (err) {
592
- const errMsg = err instanceof Error ? err.message : String(err);
593
- updatedMessages.push({
594
- role: 'tool',
595
- tool_call_id: toolCall.id,
596
- content: `Error: ${errMsg}`,
597
- });
598
- pendingToolCallIds.delete(toolCall.id);
599
- // If the tool was aborted, inject stubs for remaining pending calls and stop
600
- if (abortSignal?.aborted || (err instanceof Error && (err.name === 'AbortError' || err.message === 'Operation aborted'))) {
601
- logger.debug('Agentic loop aborted during tool execution');
602
- injectStubsForPendingToolCalls();
603
- emitAbortAndFinish(onEvent);
604
- return updatedMessages;
605
- }
606
- onEvent({
607
- type: 'tool_result',
608
- toolCall: { id: toolCall.id, name, args: argsStr, status: 'error', result: errMsg },
609
- });
610
- }
177
+ const executionResult = await executeToolCalls(assistantMessage.tool_calls, updatedMessages, onEvent, toolContext);
178
+ if (executionResult.shouldAbort) {
179
+ emitAbortAndFinish(onEvent);
180
+ return updatedMessages;
611
181
  }
612
182
  // Signal UI that this iteration's tool calls are all done,
613
183
  // so it can flush completed messages to static output.
@@ -622,20 +192,20 @@ export async function runAgenticLoop(client, model, messages, userInput, onEvent
622
192
  content: assistantMessage.content,
623
193
  });
624
194
  // Reset retrigger count on valid content response
625
- retriggerCount = 0;
195
+ retryState.retriggerCount = 0;
626
196
  }
627
197
  // Check if we need to retrigger: if the last message is a tool result
628
198
  // but we got no assistant response (empty content, no tool_calls), the AI
629
199
  // may have stopped prematurely. Inject a 'continue' prompt and retry.
630
200
  const lastMessage = updatedMessages[updatedMessages.length - 1];
631
- if (lastMessage?.role === 'tool' && retriggerCount < MAX_RETRIGGERS) {
632
- retriggerCount++;
201
+ if (lastMessage?.role === 'tool' && retryState.retriggerCount < MAX_RETRIGGERS) {
202
+ retryState.retriggerCount++;
633
203
  logger.warn('AI stopped after tool call without responding; retriggering', {
634
- retriggerCount,
204
+ retriggerCount: retryState.retriggerCount,
635
205
  maxRetriggers: MAX_RETRIGGERS,
636
206
  lastMessageRole: lastMessage.role,
637
207
  assistantContent: assistantMessage.content || '(empty)',
638
- hasToolCalls: assistantMessage.tool_calls.length > 0,
208
+ hasToolCalls: assistantMessage.tool_calls?.length > 0,
639
209
  });
640
210
  // Inject a 'continue' prompt to help the AI continue
641
211
  updatedMessages.push({
@@ -644,8 +214,9 @@ export async function runAgenticLoop(client, model, messages, userInput, onEvent
644
214
  });
645
215
  continue;
646
216
  }
647
- repairRetryCount = 0;
648
- retriggerCount = 0;
217
+ // Reset retry counts on successful completion
218
+ retryState.repairCount = 0;
219
+ retryState.retriggerCount = 0;
649
220
  onEvent({ type: 'done' });
650
221
  return updatedMessages;
651
222
  }
@@ -685,170 +256,29 @@ export async function runAgenticLoop(client, model, messages, userInput, onEvent
685
256
  emitAbortAndFinish(onEvent);
686
257
  return updatedMessages;
687
258
  }
688
- const errMsg = apiError?.message || 'Unknown API error';
689
- // Try to extract response body for more details
690
- let responseBody;
691
- try {
692
- if (apiError?.response) {
693
- responseBody = JSON.stringify(apiError.response);
694
- }
695
- else if (apiError?.error) {
696
- responseBody = JSON.stringify(apiError.error);
697
- }
698
- }
699
- catch { /* ignore */ }
700
- logger.error(`API error: ${errMsg}`, {
701
- status: apiError?.status,
702
- code: apiError?.code,
703
- responseBody,
704
- headers: apiError?.headers ? Object.fromEntries(Object.entries(apiError.headers).filter(([k]) => ['content-type', 'x-error', 'retry-after'].includes(k.toLowerCase()))) : undefined,
705
- });
706
- // Log the last few messages to help debug format issues
707
- logger.debug('Messages at time of error', {
708
- lastMessages: updatedMessages.slice(-3).map((m) => ({
709
- role: m.role,
710
- hasToolCalls: !!(m.tool_calls?.length),
711
- tool_call_id: m.tool_call_id,
712
- contentPreview: m.content?.slice(0, 150),
713
- })),
714
- });
715
- const retryableStatus = apiError?.status === 408 || apiError?.status === 409 || apiError?.status === 425;
716
- const retryableCode = ['ECONNRESET', 'ECONNABORTED', 'ETIMEDOUT', 'ENETUNREACH', 'EAI_AGAIN'].includes(apiError?.code);
717
- // Handle 400 errors: try sanitization first, then truncate messages
718
- if (apiError?.status === 400) {
719
- // Try sanitization first
720
- if (repairRetryCount < 2) {
721
- const sanitized = sanitizeMessagesForRetry(updatedMessages, getValidToolNames());
722
- if (sanitized.changed) {
723
- repairRetryCount++;
724
- updatedMessages.length = 0;
725
- updatedMessages.push(...sanitized.messages);
726
- logger.warn('400 response after malformed tool payload; retrying with sanitized messages', {
727
- repairRetryCount,
728
- });
729
- // Silently retry without showing error to user
730
- continue;
731
- }
732
- }
733
- // Try removing orphaned tool results
734
- const orphanedRemoved = removeOrphanedToolResults(updatedMessages);
735
- if (orphanedRemoved.changed) {
736
- updatedMessages.length = 0;
737
- updatedMessages.push(...orphanedRemoved.messages);
738
- logger.warn('400 response after orphaned tool results; retrying with cleaned messages');
739
- // Silently retry without showing error to user
740
- continue;
741
- }
742
- // If sanitization didn't help, try removing messages one at a time (up to 5)
743
- if (truncateRetryCount < MAX_TRUNCATE_RETRIES) {
744
- truncateRetryCount++;
745
- const removedCount = Math.min(1, Math.max(0, updatedMessages.length - 2)); // Remove 1 at a time, keep system + at least 1 user
746
- if (removedCount > 0) {
747
- const removed = updatedMessages.splice(-removedCount);
748
- logger.debug('400 error: removing message from history to attempt fix', {
749
- truncateRetryCount,
750
- maxRetries: MAX_TRUNCATE_RETRIES,
751
- removedCount,
752
- removedRoles: removed.map((m) => m.role),
753
- removedPreviews: removed.map((m) => ({
754
- role: m.role,
755
- content: m.content?.slice(0, 100),
756
- tool_calls: m.tool_calls?.map((tc) => tc.function?.name),
757
- })),
758
- });
759
- // Silently retry without showing error to user
760
- continue;
761
- }
762
- }
763
- // After truncation retries exhausted, try adding a "continue" message
764
- if (continueRetryCount < MAX_CONTINUE_RETRIES) {
765
- continueRetryCount++;
766
- updatedMessages.push({ role: 'user', content: 'continue' });
767
- logger.warn('400 error: adding "continue" message to retry', {
768
- continueRetryCount,
769
- messageCount: updatedMessages.length,
770
- });
771
- onEvent({
772
- type: 'error',
773
- error: 'Request failed. Retrying with "continue"...',
774
- transient: true,
775
- });
776
- continue;
777
- }
259
+ // Handle API errors with retry strategies
260
+ const errorResult = await handleApiError(apiError, updatedMessages, validToolNames, pricing, retryState, iterationCount, onEvent, client, model, requestDefaults, sessionId);
261
+ if (errorResult.shouldAbort) {
262
+ emitAbortAndFinish(onEvent);
263
+ return updatedMessages;
778
264
  }
779
- // Handle context-window-exceeded (prompt too long) — attempt forced compaction
780
- // This fires when our token estimate was too low (e.g. base64 images from MCP tools)
781
- // and the request actually hit the hard provider limit.
782
- const isContextTooLong = apiError?.status === 400 &&
783
- typeof errMsg === 'string' &&
784
- /prompt.{0,30}too long|context.{0,30}length|maximum.{0,30}token|tokens?.{0,10}exceed/i.test(errMsg);
785
- if (isContextTooLong && contextRetryCount < 2) {
786
- contextRetryCount++;
787
- logger.warn(`Prompt too long (attempt ${contextRetryCount}); forcing compaction`, { errMsg });
265
+ if (!errorResult.handled) {
266
+ // Non-retryable error
788
267
  onEvent({
789
268
  type: 'error',
790
- error: 'Prompt too long. Compacting conversation and retrying...',
791
- transient: true,
269
+ error: errorResult.errorMessage || 'Unknown error',
270
+ transient: errorResult.transient,
792
271
  });
793
- if (pricing) {
794
- // Use the normal LLM-based compaction path
795
- try {
796
- const compacted = await compactIfNeeded(client, model, updatedMessages, pricing.contextWindow,
797
- // Pass the context window itself as currentTokens to force compaction
798
- pricing.contextWindow, requestDefaults, sessionId);
799
- updatedMessages.length = 0;
800
- updatedMessages.push(...compacted);
801
- }
802
- catch (compactErr) {
803
- logger.error(`Forced compaction failed: ${compactErr}`);
804
- // Fall through to truncation fallback below
805
- }
806
- }
807
- // Fallback: truncate any tool result messages whose content looks like
808
- // base64 or is extremely large (e.g. MCP screenshot data)
809
- const MAX_TOOL_RESULT_CHARS = 20_000;
810
- for (let i = 0; i < updatedMessages.length; i++) {
811
- const m = updatedMessages[i];
812
- if (m.role === 'tool' && typeof m.content === 'string' && m.content.length > MAX_TOOL_RESULT_CHARS) {
813
- updatedMessages[i] = {
814
- ...m,
815
- content: m.content.slice(0, MAX_TOOL_RESULT_CHARS) + '\n... (truncated — content was too large)',
816
- };
817
- }
818
- }
819
- continue;
820
- }
821
- // Retry on 429 (rate limit) with backoff
822
- if (apiError?.status === 429) {
823
- const retryAfter = parseInt(apiError?.headers?.['retry-after'] || '5', 10);
824
- const backoff = Math.min(retryAfter * 1000, 60_000);
825
- logger.info(`Rate limited, retrying in ${backoff / 1000}s...`);
826
- onEvent({ type: 'error', error: `Rate limited. Retrying in ${backoff / 1000}s...`, transient: true });
827
- await sleepWithAbort(backoff, abortSignal);
828
- continue;
829
- }
830
- // Retry on transient request failures
831
- if (apiError?.status >= 500 || retryableStatus || retryableCode) {
832
- const backoff = Math.min(2 ** iterationCount * 1000, 30_000);
833
- logger.info(`Request failed, retrying in ${backoff / 1000}s...`);
834
- onEvent({ type: 'error', error: `Request failed. Retrying in ${backoff / 1000}s...`, transient: true });
835
- await sleepWithAbort(backoff, abortSignal);
836
- continue;
272
+ onEvent({ type: 'done' });
273
+ return updatedMessages;
837
274
  }
838
- // 400 error that couldn't be fixed by sanitization or truncation
839
- if (apiError?.status === 400) {
840
- onEvent({
841
- type: 'error',
842
- error: `Request failed: ${errMsg}\n\nThe conversation history could not be automatically repaired. Try /clear to start fresh.`,
843
- transient: false,
844
- });
275
+ // If handled but not silently, the error was already emitted
276
+ if (!errorResult.silentRetry) {
845
277
  onEvent({ type: 'done' });
846
278
  return updatedMessages;
847
279
  }
848
- // Non-retryable error
849
- onEvent({ type: 'error', error: errMsg });
850
- onEvent({ type: 'done' });
851
- return updatedMessages;
280
+ // Silent retry - continue the loop
281
+ continue;
852
282
  }
853
283
  }
854
284
  onEvent({ type: 'error', error: 'Maximum iteration limit reached.' });