@prestyj/agent 4.2.77 → 4.3.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -25,7 +25,8 @@ __export(index_exports, {
25
25
  agentLoop: () => agentLoop,
26
26
  isAbortError: () => isAbortError,
27
27
  isBillingError: () => isBillingError,
28
- isContextOverflow: () => isContextOverflow
28
+ isContextOverflow: () => isContextOverflow,
29
+ setStreamDiagnostic: () => setStreamDiagnostic
29
30
  });
30
31
  module.exports = __toCommonJS(index_exports);
31
32
 
@@ -34,7 +35,14 @@ var import_ai2 = require("@prestyj/ai");
34
35
 
35
36
  // src/agent-loop.ts
36
37
  var import_ai = require("@prestyj/ai");
37
- var DEFAULT_MAX_TURNS = 100;
38
+ var DEFAULT_MAX_TURNS = 200;
39
+ var _diagFn = null;
40
+ function setStreamDiagnostic(fn) {
41
+ _diagFn = fn;
42
+ }
43
+ function diag(phase, data) {
44
+ _diagFn?.(phase, data);
45
+ }
38
46
  function isAbortError(err) {
39
47
  if (!(err instanceof Error)) return false;
40
48
  if (err.name === "AbortError") return true;
@@ -43,13 +51,20 @@ function isAbortError(err) {
43
51
  }
44
52
  function isContextOverflow(err) {
45
53
  if (!(err instanceof Error)) return false;
54
+ if (isBillingError(err)) return false;
46
55
  const msg = err.message.toLowerCase();
47
56
  return msg.includes("prompt is too long") || msg.includes("context_length_exceeded") || msg.includes("maximum context length") || msg.includes("token") && msg.includes("exceed");
48
57
  }
49
58
  function isBillingError(err) {
50
59
  if (!(err instanceof Error)) return false;
51
60
  const msg = err.message.toLowerCase();
52
- return msg.includes("insufficient balance") || msg.includes("no resource package") || msg.includes("quota exceeded") || msg.includes("billing") || msg.includes("recharge");
61
+ return msg.includes("insufficient balance") || msg.includes("no resource package") || msg.includes("quota exceeded") || msg.includes("billing") || msg.includes("recharge") || msg.includes("subscription plan") || msg.includes("does not yet include access") || msg.includes("token quota") || msg.includes("exceeded_current_quota_error") || msg.includes("check your account balance");
62
+ }
63
+ function isToolPairingError(err) {
64
+ if (!(err instanceof Error)) return false;
65
+ const msg = err.message.toLowerCase();
66
+ return msg.includes("tool_use") && msg.includes("tool_result") || msg.includes("unexpected `tool_use_id`") || msg.includes("tool_use ids found without") || // Moonshot/OpenAI-compatible: "tool call id <id> is not found"
67
+ msg.includes("tool call id") && msg.includes("is not found");
53
68
  }
54
69
  function isOverloaded(err) {
55
70
  if (!(err instanceof Error)) return false;
@@ -65,18 +80,46 @@ async function* agentLoop(messages, options) {
65
80
  let turn = 0;
66
81
  let firstTurn = true;
67
82
  let consecutivePauses = 0;
68
- let overflowRetries = 0;
83
+ let toolPairingRepaired = false;
69
84
  let overloadRetries = 0;
70
85
  let emptyResponseRetries = 0;
71
- const MAX_OVERFLOW_RETRIES = 3;
86
+ let stallRetries = 0;
87
+ let useNonStreamingFallback = false;
72
88
  const MAX_OVERLOAD_RETRIES = 10;
73
- const MAX_EMPTY_RESPONSE_RETRIES = 3;
89
+ const MAX_EMPTY_RESPONSE_RETRIES = 2;
90
+ const MAX_STALL_RETRIES = 5;
91
+ const STALL_RETRIES_BEFORE_NON_STREAMING = 2;
92
+ const STALL_DELAY_MS = 1e3;
74
93
  const OVERLOAD_BASE_DELAY_MS = 2e3;
75
94
  const OVERLOAD_MAX_DELAY_MS = 3e4;
95
+ const STREAM_FIRST_EVENT_TIMEOUT_MS = 45e3;
96
+ const STREAM_IDLE_TIMEOUT_MS = 3e4;
97
+ const STREAM_HARD_TIMEOUT_MS = 9e4;
98
+ const STREAM_OUTPUT_HARD_TIMEOUT_MS = 3e5;
99
+ const STREAM_THINKING_IDLE_TIMEOUT_MS = 3e5;
100
+ const STREAM_THINKING_HARD_TIMEOUT_MS = 6e5;
101
+ const NON_STREAMING_HARD_TIMEOUT_MS = 3e5;
76
102
  try {
77
103
  while (turn < maxTurns) {
78
104
  options.signal?.throwIfAborted();
79
105
  turn++;
106
+ let msgChars = 0;
107
+ for (const m of messages) {
108
+ if (typeof m.content === "string") msgChars += m.content.length;
109
+ else if (Array.isArray(m.content)) {
110
+ for (const p of m.content) {
111
+ if ("text" in p && typeof p.text === "string") msgChars += p.text.length;
112
+ if ("content" in p && typeof p.content === "string") msgChars += p.content.length;
113
+ }
114
+ }
115
+ }
116
+ diag("turn_start", {
117
+ turn,
118
+ messages: messages.length,
119
+ chars: msgChars,
120
+ provider: options.provider,
121
+ model: options.model
122
+ });
80
123
  if (firstTurn && options.getSteeringMessages) {
81
124
  const steering = await options.getSteeringMessages();
82
125
  if (steering && steering.length > 0) {
@@ -88,14 +131,64 @@ async function* agentLoop(messages, options) {
88
131
  }
89
132
  firstTurn = false;
90
133
  if (options.transformContext) {
134
+ diag("transform_start");
91
135
  const transformed = await options.transformContext(messages);
92
136
  if (transformed !== messages) {
137
+ diag("transform_compacted", {
138
+ before: messages.length,
139
+ after: transformed.length
140
+ });
93
141
  messages.length = 0;
94
142
  messages.push(...transformed);
95
143
  }
144
+ diag("transform_end");
96
145
  }
146
+ repairToolPairingAdjacent(messages);
97
147
  let response;
148
+ const streamController = new AbortController();
149
+ let idleTimer = null;
150
+ let hardTimer = null;
151
+ let idleTimedOut = false;
152
+ let streamEventCount = 0;
153
+ let lastEventTime = Date.now();
154
+ let streamCallStart = Date.now();
155
+ const eventTypeCounts = {};
156
+ let lastEventType = "";
157
+ let lastYieldEndTime = Date.now();
158
+ let maxConsumerLagMs = 0;
159
+ const forwardAbort = () => streamController.abort();
160
+ options.signal?.addEventListener("abort", forwardAbort, { once: true });
161
+ let hasReceivedEvent = false;
162
+ let hasReceivedThinking = false;
163
+ const resetIdleTimer = () => {
164
+ if (useNonStreamingFallback) return;
165
+ if (idleTimer) clearTimeout(idleTimer);
166
+ const timeoutMs = hasReceivedEvent ? STREAM_IDLE_TIMEOUT_MS : hasReceivedThinking ? STREAM_THINKING_IDLE_TIMEOUT_MS : STREAM_FIRST_EVENT_TIMEOUT_MS;
167
+ idleTimer = setTimeout(() => {
168
+ diag("idle_timeout_fired", {
169
+ events: streamEventCount,
170
+ sinceLastEventMs: Date.now() - lastEventTime,
171
+ lastEventType,
172
+ maxConsumerLagMs,
173
+ phase: hasReceivedEvent ? "mid_stream" : hasReceivedThinking ? "post_thinking" : "first_event",
174
+ eventTypes: eventTypeCounts
175
+ });
176
+ idleTimedOut = true;
177
+ streamController.abort();
178
+ }, timeoutMs);
179
+ };
180
+ let hardTimeoutMs = useNonStreamingFallback ? NON_STREAMING_HARD_TIMEOUT_MS : STREAM_HARD_TIMEOUT_MS;
181
+ hardTimer = setTimeout(() => {
182
+ diag("hard_timeout_fired", {
183
+ events: typeof streamEventCount !== "undefined" ? streamEventCount : 0,
184
+ nonStreaming: useNonStreamingFallback
185
+ });
186
+ idleTimedOut = true;
187
+ streamController.abort();
188
+ }, hardTimeoutMs);
98
189
  try {
190
+ diag("stream_call", { nonStreaming: useNonStreamingFallback });
191
+ streamCallStart = Date.now();
99
192
  const result = (0, import_ai.stream)({
100
193
  provider: options.provider,
101
194
  model: options.model,
@@ -108,15 +201,65 @@ async function* agentLoop(messages, options) {
108
201
  thinking: options.thinking,
109
202
  apiKey: options.apiKey,
110
203
  baseUrl: options.baseUrl,
111
- signal: options.signal,
204
+ signal: streamController.signal,
112
205
  accountId: options.accountId,
113
206
  cacheRetention: options.cacheRetention,
114
207
  compaction: options.compaction,
115
- clearToolUses: options.clearToolUses
208
+ clearToolUses: options.clearToolUses,
209
+ // Flip to non-streaming fallback after repeated stream stalls.
210
+ ...useNonStreamingFallback ? { streaming: false } : {}
116
211
  });
212
+ diag("stream_created", { setupMs: Date.now() - streamCallStart });
117
213
  result.response.catch(() => {
118
214
  });
215
+ streamEventCount = 0;
216
+ hasReceivedEvent = false;
217
+ lastEventTime = Date.now();
218
+ streamCallStart = Date.now();
219
+ resetIdleTimer();
119
220
  for await (const event of result) {
221
+ const pullTime = Date.now();
222
+ const consumerLag = pullTime - lastYieldEndTime;
223
+ if (consumerLag > maxConsumerLagMs) maxConsumerLagMs = consumerLag;
224
+ streamEventCount++;
225
+ eventTypeCounts[event.type] = (eventTypeCounts[event.type] ?? 0) + 1;
226
+ lastEventType = event.type;
227
+ if ((event.type === "text_delta" || event.type === "server_toolcall" || event.type === "toolcall_delta") && !hasReceivedEvent) {
228
+ hasReceivedEvent = true;
229
+ if (hardTimer && hardTimeoutMs < STREAM_OUTPUT_HARD_TIMEOUT_MS) {
230
+ clearTimeout(hardTimer);
231
+ hardTimeoutMs = STREAM_OUTPUT_HARD_TIMEOUT_MS;
232
+ hardTimer = setTimeout(() => {
233
+ diag("hard_timeout_fired", { events: streamEventCount });
234
+ idleTimedOut = true;
235
+ streamController.abort();
236
+ }, hardTimeoutMs);
237
+ }
238
+ }
239
+ if (event.type === "thinking_delta" && !hasReceivedThinking) {
240
+ hasReceivedThinking = true;
241
+ if (hardTimer) clearTimeout(hardTimer);
242
+ hardTimeoutMs = STREAM_THINKING_HARD_TIMEOUT_MS;
243
+ hardTimer = setTimeout(() => {
244
+ diag("hard_timeout_fired", { events: streamEventCount });
245
+ idleTimedOut = true;
246
+ streamController.abort();
247
+ }, hardTimeoutMs);
248
+ }
249
+ const now = Date.now();
250
+ const gap = now - lastEventTime;
251
+ if (streamEventCount === 1) {
252
+ diag("first_event", { type: event.type, ttfMs: now - streamCallStart });
253
+ } else if (gap > 3e3) {
254
+ diag("slow_gap", {
255
+ type: event.type,
256
+ gapMs: gap,
257
+ eventNum: streamEventCount,
258
+ sinceStartMs: now - streamCallStart
259
+ });
260
+ }
261
+ lastEventTime = now;
262
+ resetIdleTimer();
120
263
  if (event.type === "text_delta") {
121
264
  yield { type: "text_delta", text: event.text };
122
265
  } else if (event.type === "thinking_delta") {
@@ -135,26 +278,36 @@ async function* agentLoop(messages, options) {
135
278
  resultType: event.resultType,
136
279
  data: event.data
137
280
  };
281
+ } else if (event.type === "toolcall_delta") {
282
+ yield {
283
+ type: "toolcall_delta",
284
+ chars: event.argsJson?.length ?? 0
285
+ };
138
286
  }
287
+ lastYieldEndTime = Date.now();
139
288
  }
289
+ diag("stream_done", {
290
+ events: streamEventCount,
291
+ totalMs: Date.now() - streamCallStart,
292
+ maxConsumerLagMs,
293
+ eventTypes: eventTypeCounts
294
+ });
140
295
  response = await result.response;
141
296
  } catch (err) {
142
- if (overflowRetries < MAX_OVERFLOW_RETRIES && isContextOverflow(err) && options.transformContext) {
143
- overflowRetries++;
144
- yield {
145
- type: "retry",
146
- reason: "context_overflow",
147
- attempt: overflowRetries,
148
- maxAttempts: MAX_OVERFLOW_RETRIES,
149
- delayMs: 0
150
- };
151
- const transformed = await options.transformContext(messages, { force: true });
152
- if (transformed !== messages) {
153
- messages.length = 0;
154
- messages.push(...transformed);
155
- }
156
- turn--;
157
- continue;
297
+ const errMsg = err instanceof Error ? err.message : String(err);
298
+ diag("stream_error", {
299
+ error: errMsg.slice(0, 200),
300
+ events: streamEventCount,
301
+ totalMs: Date.now() - streamCallStart,
302
+ idleTimedOut,
303
+ aborted: !!options.signal?.aborted,
304
+ eventTypes: eventTypeCounts,
305
+ provider: options.provider,
306
+ model: options.model
307
+ });
308
+ if (isContextOverflow(err)) {
309
+ yield { type: "error", error: err instanceof Error ? err : new Error(errMsg) };
310
+ throw err;
158
311
  }
159
312
  if (overloadRetries < MAX_OVERLOAD_RETRIES && isOverloaded(err)) {
160
313
  overloadRetries++;
@@ -162,6 +315,12 @@ async function* agentLoop(messages, options) {
162
315
  OVERLOAD_BASE_DELAY_MS * 2 ** (overloadRetries - 1),
163
316
  OVERLOAD_MAX_DELAY_MS
164
317
  );
318
+ diag("retry", {
319
+ reason: "overloaded",
320
+ attempt: overloadRetries,
321
+ maxAttempts: MAX_OVERLOAD_RETRIES,
322
+ delayMs
323
+ });
165
324
  yield {
166
325
  type: "retry",
167
326
  reason: "overloaded",
@@ -173,16 +332,91 @@ async function* agentLoop(messages, options) {
173
332
  turn--;
174
333
  continue;
175
334
  }
335
+ if (idleTimedOut && !options.signal?.aborted && stallRetries < MAX_STALL_RETRIES) {
336
+ stallRetries++;
337
+ if (!useNonStreamingFallback && stallRetries >= STALL_RETRIES_BEFORE_NON_STREAMING) {
338
+ useNonStreamingFallback = true;
339
+ diag("non_streaming_fallback_enabled", {
340
+ stallRetries,
341
+ provider: options.provider,
342
+ model: options.model
343
+ });
344
+ }
345
+ const delayMs = Math.min(STALL_DELAY_MS * 2 ** (stallRetries - 1), 8e3);
346
+ diag("retry", {
347
+ reason: "stream_stall",
348
+ attempt: stallRetries,
349
+ maxAttempts: MAX_STALL_RETRIES,
350
+ delayMs,
351
+ events: streamEventCount,
352
+ nonStreaming: useNonStreamingFallback
353
+ });
354
+ yield {
355
+ type: "retry",
356
+ reason: "stream_stall",
357
+ attempt: stallRetries,
358
+ maxAttempts: MAX_STALL_RETRIES,
359
+ delayMs,
360
+ silent: stallRetries <= 2
361
+ };
362
+ await new Promise((r) => setTimeout(r, delayMs));
363
+ turn--;
364
+ continue;
365
+ }
366
+ if (idleTimedOut && !options.signal?.aborted) {
367
+ diag("stall_exhausted", {
368
+ stallRetries: MAX_STALL_RETRIES,
369
+ provider: options.provider,
370
+ model: options.model
371
+ });
372
+ yield {
373
+ type: "error",
374
+ error: new Error(
375
+ `The API provider's stream stalled ${MAX_STALL_RETRIES} times \u2014 the provider may be experiencing capacity issues. Your conversation is preserved. Send another message to retry.`
376
+ )
377
+ };
378
+ break;
379
+ }
380
+ if (isToolPairingError(err) && !toolPairingRepaired) {
381
+ toolPairingRepaired = true;
382
+ diag("tool_pairing_repair", { error: errMsg.slice(0, 200) });
383
+ repairToolPairingAdjacent(messages);
384
+ turn--;
385
+ continue;
386
+ }
176
387
  if (isAbortError(err) || options.signal?.aborted) {
388
+ diag("aborted", { turn, provider: options.provider, model: options.model });
177
389
  break;
178
390
  }
391
+ diag("unhandled_error", {
392
+ error: errMsg.slice(0, 500),
393
+ turn,
394
+ provider: options.provider,
395
+ model: options.model
396
+ });
179
397
  throw err;
398
+ } finally {
399
+ if (idleTimer) clearTimeout(idleTimer);
400
+ if (hardTimer) clearTimeout(hardTimer);
401
+ options.signal?.removeEventListener("abort", forwardAbort);
180
402
  }
181
- overflowRetries = 0;
182
403
  overloadRetries = 0;
183
- if (response.usage.outputTokens === 0 && (response.message.content === "" || Array.isArray(response.message.content) && response.message.content.length === 0)) {
404
+ stallRetries = 0;
405
+ const contentArr = Array.isArray(response.message.content) ? response.message.content : null;
406
+ const hasActionableContent = response.message.content !== "" && contentArr !== null && contentArr.some(
407
+ (p) => p.type === "text" || p.type === "tool_call" || p.type === "server_tool_call"
408
+ );
409
+ if (!hasActionableContent) {
184
410
  if (emptyResponseRetries < MAX_EMPTY_RESPONSE_RETRIES) {
185
411
  emptyResponseRetries++;
412
+ diag("retry", {
413
+ reason: "empty_response",
414
+ attempt: emptyResponseRetries,
415
+ maxAttempts: MAX_EMPTY_RESPONSE_RETRIES,
416
+ provider: options.provider,
417
+ model: options.model,
418
+ contentTypes: contentArr?.map((p) => p.type).join(",") ?? "empty"
419
+ });
186
420
  yield {
187
421
  type: "retry",
188
422
  reason: "empty_response",
@@ -195,6 +429,7 @@ async function* agentLoop(messages, options) {
195
429
  }
196
430
  }
197
431
  emptyResponseRetries = 0;
432
+ useNonStreamingFallback = false;
198
433
  totalUsage.inputTokens += response.usage.inputTokens;
199
434
  totalUsage.outputTokens += response.usage.outputTokens;
200
435
  if (response.usage.cacheRead) {
@@ -442,6 +677,59 @@ function sanitizeOrphanedServerTools(messages) {
442
677
  break;
443
678
  }
444
679
  }
680
+ function repairToolPairingAdjacent(messages) {
681
+ for (let i = 0; i < messages.length; i++) {
682
+ const msg = messages[i];
683
+ if (msg.role !== "assistant") continue;
684
+ if (typeof msg.content === "string" || !Array.isArray(msg.content)) continue;
685
+ const toolCallIds = msg.content.filter((p) => p.type === "tool_call").map((p) => p.id);
686
+ if (toolCallIds.length === 0) continue;
687
+ const next = messages[i + 1];
688
+ if (next?.role === "tool" && Array.isArray(next.content)) {
689
+ const existingIds = new Set(next.content.map((r) => r.toolCallId));
690
+ const missing = toolCallIds.filter((id) => !existingIds.has(id));
691
+ if (missing.length > 0) {
692
+ for (const id of missing) {
693
+ next.content.push({
694
+ type: "tool_result",
695
+ toolCallId: id,
696
+ content: "Tool execution was interrupted.",
697
+ isError: true
698
+ });
699
+ }
700
+ }
701
+ } else {
702
+ messages.splice(i + 1, 0, {
703
+ role: "tool",
704
+ content: toolCallIds.map((id) => ({
705
+ type: "tool_result",
706
+ toolCallId: id,
707
+ content: "Tool execution was interrupted.",
708
+ isError: true
709
+ }))
710
+ });
711
+ }
712
+ }
713
+ const toolCallIdSet = /* @__PURE__ */ new Set();
714
+ for (let i = 0; i < messages.length; i++) {
715
+ const msg = messages[i];
716
+ if (msg.role === "assistant" && Array.isArray(msg.content)) {
717
+ for (const p of msg.content) {
718
+ if (p.type === "tool_call") toolCallIdSet.add(p.id);
719
+ }
720
+ }
721
+ if (msg.role === "tool" && Array.isArray(msg.content)) {
722
+ const results = msg.content;
723
+ const filtered = results.filter((r) => toolCallIdSet.has(r.toolCallId));
724
+ if (filtered.length === 0) {
725
+ messages.splice(i, 1);
726
+ i--;
727
+ } else if (filtered.length < results.length) {
728
+ msg.content = filtered;
729
+ }
730
+ }
731
+ }
732
+ }
445
733
 
446
734
  // src/agent.ts
447
735
  var AgentStream = class {
@@ -548,6 +836,7 @@ var Agent = class {
548
836
  agentLoop,
549
837
  isAbortError,
550
838
  isBillingError,
551
- isContextOverflow
839
+ isContextOverflow,
840
+ setStreamDiagnostic
552
841
  });
553
842
  //# sourceMappingURL=index.cjs.map