@prestyj/agent 4.2.77 → 4.3.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -6,7 +6,14 @@ import {
6
6
  stream,
7
7
  EventStream
8
8
  } from "@prestyj/ai";
9
- var DEFAULT_MAX_TURNS = 100;
9
+ var DEFAULT_MAX_TURNS = 200;
10
+ var _diagFn = null;
11
+ function setStreamDiagnostic(fn) {
12
+ _diagFn = fn;
13
+ }
14
+ function diag(phase, data) {
15
+ _diagFn?.(phase, data);
16
+ }
10
17
  function isAbortError(err) {
11
18
  if (!(err instanceof Error)) return false;
12
19
  if (err.name === "AbortError") return true;
@@ -15,13 +22,20 @@ function isAbortError(err) {
15
22
  }
16
23
  function isContextOverflow(err) {
17
24
  if (!(err instanceof Error)) return false;
25
+ if (isBillingError(err)) return false;
18
26
  const msg = err.message.toLowerCase();
19
27
  return msg.includes("prompt is too long") || msg.includes("context_length_exceeded") || msg.includes("maximum context length") || msg.includes("token") && msg.includes("exceed");
20
28
  }
21
29
  function isBillingError(err) {
22
30
  if (!(err instanceof Error)) return false;
23
31
  const msg = err.message.toLowerCase();
24
- return msg.includes("insufficient balance") || msg.includes("no resource package") || msg.includes("quota exceeded") || msg.includes("billing") || msg.includes("recharge");
32
+ return msg.includes("insufficient balance") || msg.includes("no resource package") || msg.includes("quota exceeded") || msg.includes("billing") || msg.includes("recharge") || msg.includes("subscription plan") || msg.includes("does not yet include access") || msg.includes("token quota") || msg.includes("exceeded_current_quota_error") || msg.includes("check your account balance");
33
+ }
34
+ function isToolPairingError(err) {
35
+ if (!(err instanceof Error)) return false;
36
+ const msg = err.message.toLowerCase();
37
+ return msg.includes("tool_use") && msg.includes("tool_result") || msg.includes("unexpected `tool_use_id`") || msg.includes("tool_use ids found without") || // Moonshot/OpenAI-compatible: "tool call id <id> is not found"
38
+ msg.includes("tool call id") && msg.includes("is not found");
25
39
  }
26
40
  function isOverloaded(err) {
27
41
  if (!(err instanceof Error)) return false;
@@ -37,18 +51,46 @@ async function* agentLoop(messages, options) {
37
51
  let turn = 0;
38
52
  let firstTurn = true;
39
53
  let consecutivePauses = 0;
40
- let overflowRetries = 0;
54
+ let toolPairingRepaired = false;
41
55
  let overloadRetries = 0;
42
56
  let emptyResponseRetries = 0;
43
- const MAX_OVERFLOW_RETRIES = 3;
57
+ let stallRetries = 0;
58
+ let useNonStreamingFallback = false;
44
59
  const MAX_OVERLOAD_RETRIES = 10;
45
- const MAX_EMPTY_RESPONSE_RETRIES = 3;
60
+ const MAX_EMPTY_RESPONSE_RETRIES = 2;
61
+ const MAX_STALL_RETRIES = 5;
62
+ const STALL_RETRIES_BEFORE_NON_STREAMING = 2;
63
+ const STALL_DELAY_MS = 1e3;
46
64
  const OVERLOAD_BASE_DELAY_MS = 2e3;
47
65
  const OVERLOAD_MAX_DELAY_MS = 3e4;
66
+ const STREAM_FIRST_EVENT_TIMEOUT_MS = 45e3;
67
+ const STREAM_IDLE_TIMEOUT_MS = 3e4;
68
+ const STREAM_HARD_TIMEOUT_MS = 9e4;
69
+ const STREAM_OUTPUT_HARD_TIMEOUT_MS = 3e5;
70
+ const STREAM_THINKING_IDLE_TIMEOUT_MS = 3e5;
71
+ const STREAM_THINKING_HARD_TIMEOUT_MS = 6e5;
72
+ const NON_STREAMING_HARD_TIMEOUT_MS = 3e5;
48
73
  try {
49
74
  while (turn < maxTurns) {
50
75
  options.signal?.throwIfAborted();
51
76
  turn++;
77
+ let msgChars = 0;
78
+ for (const m of messages) {
79
+ if (typeof m.content === "string") msgChars += m.content.length;
80
+ else if (Array.isArray(m.content)) {
81
+ for (const p of m.content) {
82
+ if ("text" in p && typeof p.text === "string") msgChars += p.text.length;
83
+ if ("content" in p && typeof p.content === "string") msgChars += p.content.length;
84
+ }
85
+ }
86
+ }
87
+ diag("turn_start", {
88
+ turn,
89
+ messages: messages.length,
90
+ chars: msgChars,
91
+ provider: options.provider,
92
+ model: options.model
93
+ });
52
94
  if (firstTurn && options.getSteeringMessages) {
53
95
  const steering = await options.getSteeringMessages();
54
96
  if (steering && steering.length > 0) {
@@ -60,14 +102,64 @@ async function* agentLoop(messages, options) {
60
102
  }
61
103
  firstTurn = false;
62
104
  if (options.transformContext) {
105
+ diag("transform_start");
63
106
  const transformed = await options.transformContext(messages);
64
107
  if (transformed !== messages) {
108
+ diag("transform_compacted", {
109
+ before: messages.length,
110
+ after: transformed.length
111
+ });
65
112
  messages.length = 0;
66
113
  messages.push(...transformed);
67
114
  }
115
+ diag("transform_end");
68
116
  }
117
+ repairToolPairingAdjacent(messages);
69
118
  let response;
119
+ const streamController = new AbortController();
120
+ let idleTimer = null;
121
+ let hardTimer = null;
122
+ let idleTimedOut = false;
123
+ let streamEventCount = 0;
124
+ let lastEventTime = Date.now();
125
+ let streamCallStart = Date.now();
126
+ const eventTypeCounts = {};
127
+ let lastEventType = "";
128
+ let lastYieldEndTime = Date.now();
129
+ let maxConsumerLagMs = 0;
130
+ const forwardAbort = () => streamController.abort();
131
+ options.signal?.addEventListener("abort", forwardAbort, { once: true });
132
+ let hasReceivedEvent = false;
133
+ let hasReceivedThinking = false;
134
+ const resetIdleTimer = () => {
135
+ if (useNonStreamingFallback) return;
136
+ if (idleTimer) clearTimeout(idleTimer);
137
+ const timeoutMs = hasReceivedEvent ? STREAM_IDLE_TIMEOUT_MS : hasReceivedThinking ? STREAM_THINKING_IDLE_TIMEOUT_MS : STREAM_FIRST_EVENT_TIMEOUT_MS;
138
+ idleTimer = setTimeout(() => {
139
+ diag("idle_timeout_fired", {
140
+ events: streamEventCount,
141
+ sinceLastEventMs: Date.now() - lastEventTime,
142
+ lastEventType,
143
+ maxConsumerLagMs,
144
+ phase: hasReceivedEvent ? "mid_stream" : hasReceivedThinking ? "post_thinking" : "first_event",
145
+ eventTypes: eventTypeCounts
146
+ });
147
+ idleTimedOut = true;
148
+ streamController.abort();
149
+ }, timeoutMs);
150
+ };
151
+ let hardTimeoutMs = useNonStreamingFallback ? NON_STREAMING_HARD_TIMEOUT_MS : STREAM_HARD_TIMEOUT_MS;
152
+ hardTimer = setTimeout(() => {
153
+ diag("hard_timeout_fired", {
154
+ events: typeof streamEventCount !== "undefined" ? streamEventCount : 0,
155
+ nonStreaming: useNonStreamingFallback
156
+ });
157
+ idleTimedOut = true;
158
+ streamController.abort();
159
+ }, hardTimeoutMs);
70
160
  try {
161
+ diag("stream_call", { nonStreaming: useNonStreamingFallback });
162
+ streamCallStart = Date.now();
71
163
  const result = stream({
72
164
  provider: options.provider,
73
165
  model: options.model,
@@ -80,15 +172,65 @@ async function* agentLoop(messages, options) {
80
172
  thinking: options.thinking,
81
173
  apiKey: options.apiKey,
82
174
  baseUrl: options.baseUrl,
83
- signal: options.signal,
175
+ signal: streamController.signal,
84
176
  accountId: options.accountId,
85
177
  cacheRetention: options.cacheRetention,
86
178
  compaction: options.compaction,
87
- clearToolUses: options.clearToolUses
179
+ clearToolUses: options.clearToolUses,
180
+ // Flip to non-streaming fallback after repeated stream stalls.
181
+ ...useNonStreamingFallback ? { streaming: false } : {}
88
182
  });
183
+ diag("stream_created", { setupMs: Date.now() - streamCallStart });
89
184
  result.response.catch(() => {
90
185
  });
186
+ streamEventCount = 0;
187
+ hasReceivedEvent = false;
188
+ lastEventTime = Date.now();
189
+ streamCallStart = Date.now();
190
+ resetIdleTimer();
91
191
  for await (const event of result) {
192
+ const pullTime = Date.now();
193
+ const consumerLag = pullTime - lastYieldEndTime;
194
+ if (consumerLag > maxConsumerLagMs) maxConsumerLagMs = consumerLag;
195
+ streamEventCount++;
196
+ eventTypeCounts[event.type] = (eventTypeCounts[event.type] ?? 0) + 1;
197
+ lastEventType = event.type;
198
+ if ((event.type === "text_delta" || event.type === "server_toolcall" || event.type === "toolcall_delta") && !hasReceivedEvent) {
199
+ hasReceivedEvent = true;
200
+ if (hardTimer && hardTimeoutMs < STREAM_OUTPUT_HARD_TIMEOUT_MS) {
201
+ clearTimeout(hardTimer);
202
+ hardTimeoutMs = STREAM_OUTPUT_HARD_TIMEOUT_MS;
203
+ hardTimer = setTimeout(() => {
204
+ diag("hard_timeout_fired", { events: streamEventCount });
205
+ idleTimedOut = true;
206
+ streamController.abort();
207
+ }, hardTimeoutMs);
208
+ }
209
+ }
210
+ if (event.type === "thinking_delta" && !hasReceivedThinking) {
211
+ hasReceivedThinking = true;
212
+ if (hardTimer) clearTimeout(hardTimer);
213
+ hardTimeoutMs = STREAM_THINKING_HARD_TIMEOUT_MS;
214
+ hardTimer = setTimeout(() => {
215
+ diag("hard_timeout_fired", { events: streamEventCount });
216
+ idleTimedOut = true;
217
+ streamController.abort();
218
+ }, hardTimeoutMs);
219
+ }
220
+ const now = Date.now();
221
+ const gap = now - lastEventTime;
222
+ if (streamEventCount === 1) {
223
+ diag("first_event", { type: event.type, ttfMs: now - streamCallStart });
224
+ } else if (gap > 3e3) {
225
+ diag("slow_gap", {
226
+ type: event.type,
227
+ gapMs: gap,
228
+ eventNum: streamEventCount,
229
+ sinceStartMs: now - streamCallStart
230
+ });
231
+ }
232
+ lastEventTime = now;
233
+ resetIdleTimer();
92
234
  if (event.type === "text_delta") {
93
235
  yield { type: "text_delta", text: event.text };
94
236
  } else if (event.type === "thinking_delta") {
@@ -107,26 +249,36 @@ async function* agentLoop(messages, options) {
107
249
  resultType: event.resultType,
108
250
  data: event.data
109
251
  };
252
+ } else if (event.type === "toolcall_delta") {
253
+ yield {
254
+ type: "toolcall_delta",
255
+ chars: event.argsJson?.length ?? 0
256
+ };
110
257
  }
258
+ lastYieldEndTime = Date.now();
111
259
  }
260
+ diag("stream_done", {
261
+ events: streamEventCount,
262
+ totalMs: Date.now() - streamCallStart,
263
+ maxConsumerLagMs,
264
+ eventTypes: eventTypeCounts
265
+ });
112
266
  response = await result.response;
113
267
  } catch (err) {
114
- if (overflowRetries < MAX_OVERFLOW_RETRIES && isContextOverflow(err) && options.transformContext) {
115
- overflowRetries++;
116
- yield {
117
- type: "retry",
118
- reason: "context_overflow",
119
- attempt: overflowRetries,
120
- maxAttempts: MAX_OVERFLOW_RETRIES,
121
- delayMs: 0
122
- };
123
- const transformed = await options.transformContext(messages, { force: true });
124
- if (transformed !== messages) {
125
- messages.length = 0;
126
- messages.push(...transformed);
127
- }
128
- turn--;
129
- continue;
268
+ const errMsg = err instanceof Error ? err.message : String(err);
269
+ diag("stream_error", {
270
+ error: errMsg.slice(0, 200),
271
+ events: streamEventCount,
272
+ totalMs: Date.now() - streamCallStart,
273
+ idleTimedOut,
274
+ aborted: !!options.signal?.aborted,
275
+ eventTypes: eventTypeCounts,
276
+ provider: options.provider,
277
+ model: options.model
278
+ });
279
+ if (isContextOverflow(err)) {
280
+ yield { type: "error", error: err instanceof Error ? err : new Error(errMsg) };
281
+ throw err;
130
282
  }
131
283
  if (overloadRetries < MAX_OVERLOAD_RETRIES && isOverloaded(err)) {
132
284
  overloadRetries++;
@@ -134,6 +286,12 @@ async function* agentLoop(messages, options) {
134
286
  OVERLOAD_BASE_DELAY_MS * 2 ** (overloadRetries - 1),
135
287
  OVERLOAD_MAX_DELAY_MS
136
288
  );
289
+ diag("retry", {
290
+ reason: "overloaded",
291
+ attempt: overloadRetries,
292
+ maxAttempts: MAX_OVERLOAD_RETRIES,
293
+ delayMs
294
+ });
137
295
  yield {
138
296
  type: "retry",
139
297
  reason: "overloaded",
@@ -145,16 +303,91 @@ async function* agentLoop(messages, options) {
145
303
  turn--;
146
304
  continue;
147
305
  }
306
+ if (idleTimedOut && !options.signal?.aborted && stallRetries < MAX_STALL_RETRIES) {
307
+ stallRetries++;
308
+ if (!useNonStreamingFallback && stallRetries >= STALL_RETRIES_BEFORE_NON_STREAMING) {
309
+ useNonStreamingFallback = true;
310
+ diag("non_streaming_fallback_enabled", {
311
+ stallRetries,
312
+ provider: options.provider,
313
+ model: options.model
314
+ });
315
+ }
316
+ const delayMs = Math.min(STALL_DELAY_MS * 2 ** (stallRetries - 1), 8e3);
317
+ diag("retry", {
318
+ reason: "stream_stall",
319
+ attempt: stallRetries,
320
+ maxAttempts: MAX_STALL_RETRIES,
321
+ delayMs,
322
+ events: streamEventCount,
323
+ nonStreaming: useNonStreamingFallback
324
+ });
325
+ yield {
326
+ type: "retry",
327
+ reason: "stream_stall",
328
+ attempt: stallRetries,
329
+ maxAttempts: MAX_STALL_RETRIES,
330
+ delayMs,
331
+ silent: stallRetries <= 2
332
+ };
333
+ await new Promise((r) => setTimeout(r, delayMs));
334
+ turn--;
335
+ continue;
336
+ }
337
+ if (idleTimedOut && !options.signal?.aborted) {
338
+ diag("stall_exhausted", {
339
+ stallRetries: MAX_STALL_RETRIES,
340
+ provider: options.provider,
341
+ model: options.model
342
+ });
343
+ yield {
344
+ type: "error",
345
+ error: new Error(
346
+ `The API provider's stream stalled ${MAX_STALL_RETRIES} times \u2014 the provider may be experiencing capacity issues. Your conversation is preserved. Send another message to retry.`
347
+ )
348
+ };
349
+ break;
350
+ }
351
+ if (isToolPairingError(err) && !toolPairingRepaired) {
352
+ toolPairingRepaired = true;
353
+ diag("tool_pairing_repair", { error: errMsg.slice(0, 200) });
354
+ repairToolPairingAdjacent(messages);
355
+ turn--;
356
+ continue;
357
+ }
148
358
  if (isAbortError(err) || options.signal?.aborted) {
359
+ diag("aborted", { turn, provider: options.provider, model: options.model });
149
360
  break;
150
361
  }
362
+ diag("unhandled_error", {
363
+ error: errMsg.slice(0, 500),
364
+ turn,
365
+ provider: options.provider,
366
+ model: options.model
367
+ });
151
368
  throw err;
369
+ } finally {
370
+ if (idleTimer) clearTimeout(idleTimer);
371
+ if (hardTimer) clearTimeout(hardTimer);
372
+ options.signal?.removeEventListener("abort", forwardAbort);
152
373
  }
153
- overflowRetries = 0;
154
374
  overloadRetries = 0;
155
- if (response.usage.outputTokens === 0 && (response.message.content === "" || Array.isArray(response.message.content) && response.message.content.length === 0)) {
375
+ stallRetries = 0;
376
+ const contentArr = Array.isArray(response.message.content) ? response.message.content : null;
377
+ const hasActionableContent = response.message.content !== "" && contentArr !== null && contentArr.some(
378
+ (p) => p.type === "text" || p.type === "tool_call" || p.type === "server_tool_call"
379
+ );
380
+ if (!hasActionableContent) {
156
381
  if (emptyResponseRetries < MAX_EMPTY_RESPONSE_RETRIES) {
157
382
  emptyResponseRetries++;
383
+ diag("retry", {
384
+ reason: "empty_response",
385
+ attempt: emptyResponseRetries,
386
+ maxAttempts: MAX_EMPTY_RESPONSE_RETRIES,
387
+ provider: options.provider,
388
+ model: options.model,
389
+ contentTypes: contentArr?.map((p) => p.type).join(",") ?? "empty"
390
+ });
158
391
  yield {
159
392
  type: "retry",
160
393
  reason: "empty_response",
@@ -167,6 +400,7 @@ async function* agentLoop(messages, options) {
167
400
  }
168
401
  }
169
402
  emptyResponseRetries = 0;
403
+ useNonStreamingFallback = false;
170
404
  totalUsage.inputTokens += response.usage.inputTokens;
171
405
  totalUsage.outputTokens += response.usage.outputTokens;
172
406
  if (response.usage.cacheRead) {
@@ -414,6 +648,59 @@ function sanitizeOrphanedServerTools(messages) {
414
648
  break;
415
649
  }
416
650
  }
651
+ function repairToolPairingAdjacent(messages) {
652
+ for (let i = 0; i < messages.length; i++) {
653
+ const msg = messages[i];
654
+ if (msg.role !== "assistant") continue;
655
+ if (typeof msg.content === "string" || !Array.isArray(msg.content)) continue;
656
+ const toolCallIds = msg.content.filter((p) => p.type === "tool_call").map((p) => p.id);
657
+ if (toolCallIds.length === 0) continue;
658
+ const next = messages[i + 1];
659
+ if (next?.role === "tool" && Array.isArray(next.content)) {
660
+ const existingIds = new Set(next.content.map((r) => r.toolCallId));
661
+ const missing = toolCallIds.filter((id) => !existingIds.has(id));
662
+ if (missing.length > 0) {
663
+ for (const id of missing) {
664
+ next.content.push({
665
+ type: "tool_result",
666
+ toolCallId: id,
667
+ content: "Tool execution was interrupted.",
668
+ isError: true
669
+ });
670
+ }
671
+ }
672
+ } else {
673
+ messages.splice(i + 1, 0, {
674
+ role: "tool",
675
+ content: toolCallIds.map((id) => ({
676
+ type: "tool_result",
677
+ toolCallId: id,
678
+ content: "Tool execution was interrupted.",
679
+ isError: true
680
+ }))
681
+ });
682
+ }
683
+ }
684
+ const toolCallIdSet = /* @__PURE__ */ new Set();
685
+ for (let i = 0; i < messages.length; i++) {
686
+ const msg = messages[i];
687
+ if (msg.role === "assistant" && Array.isArray(msg.content)) {
688
+ for (const p of msg.content) {
689
+ if (p.type === "tool_call") toolCallIdSet.add(p.id);
690
+ }
691
+ }
692
+ if (msg.role === "tool" && Array.isArray(msg.content)) {
693
+ const results = msg.content;
694
+ const filtered = results.filter((r) => toolCallIdSet.has(r.toolCallId));
695
+ if (filtered.length === 0) {
696
+ messages.splice(i, 1);
697
+ i--;
698
+ } else if (filtered.length < results.length) {
699
+ msg.content = filtered;
700
+ }
701
+ }
702
+ }
703
+ }
417
704
 
418
705
  // src/agent.ts
419
706
  var AgentStream = class {
@@ -519,6 +806,7 @@ export {
519
806
  agentLoop,
520
807
  isAbortError,
521
808
  isBillingError,
522
- isContextOverflow
809
+ isContextOverflow,
810
+ setStreamDiagnostic
523
811
  };
524
812
  //# sourceMappingURL=index.js.map