npm - @prestyj/agent - Versions diffs - 4.2.77 → 4.3.34 - Mend

@prestyj/agent 4.2.77 → 4.3.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.js CHANGED Viewed

@@ -6,7 +6,14 @@ import {
   stream,
   EventStream
 } from "@prestyj/ai";
-var DEFAULT_MAX_TURNS = 100;
+var DEFAULT_MAX_TURNS = 200;
+var _diagFn = null;
+function setStreamDiagnostic(fn) {
+  _diagFn = fn;
+}
+function diag(phase, data) {
+  _diagFn?.(phase, data);
+}
 function isAbortError(err) {
   if (!(err instanceof Error)) return false;
   if (err.name === "AbortError") return true;
@@ -15,13 +22,20 @@ function isAbortError(err) {
 }
 function isContextOverflow(err) {
   if (!(err instanceof Error)) return false;
+  if (isBillingError(err)) return false;
   const msg = err.message.toLowerCase();
   return msg.includes("prompt is too long") || msg.includes("context_length_exceeded") || msg.includes("maximum context length") || msg.includes("token") && msg.includes("exceed");
 }
 function isBillingError(err) {
   if (!(err instanceof Error)) return false;
   const msg = err.message.toLowerCase();
-  return msg.includes("insufficient balance") || msg.includes("no resource package") || msg.includes("quota exceeded") || msg.includes("billing") || msg.includes("recharge");
+  return msg.includes("insufficient balance") || msg.includes("no resource package") || msg.includes("quota exceeded") || msg.includes("billing") || msg.includes("recharge") || msg.includes("subscription plan") || msg.includes("does not yet include access") || msg.includes("token quota") || msg.includes("exceeded_current_quota_error") || msg.includes("check your account balance");
+}
+function isToolPairingError(err) {
+  if (!(err instanceof Error)) return false;
+  const msg = err.message.toLowerCase();
+  return msg.includes("tool_use") && msg.includes("tool_result") || msg.includes("unexpected `tool_use_id`") || msg.includes("tool_use ids found without") || // Moonshot/OpenAI-compatible: "tool call id <id> is not found"
+  msg.includes("tool call id") && msg.includes("is not found");
 }
 function isOverloaded(err) {
   if (!(err instanceof Error)) return false;
@@ -37,18 +51,46 @@ async function* agentLoop(messages, options) {
   let turn = 0;
   let firstTurn = true;
   let consecutivePauses = 0;
-  let overflowRetries = 0;
+  let toolPairingRepaired = false;
   let overloadRetries = 0;
   let emptyResponseRetries = 0;
-  const MAX_OVERFLOW_RETRIES = 3;
+  let stallRetries = 0;
+  let useNonStreamingFallback = false;
   const MAX_OVERLOAD_RETRIES = 10;
-  const MAX_EMPTY_RESPONSE_RETRIES = 3;
+  const MAX_EMPTY_RESPONSE_RETRIES = 2;
+  const MAX_STALL_RETRIES = 5;
+  const STALL_RETRIES_BEFORE_NON_STREAMING = 2;
+  const STALL_DELAY_MS = 1e3;
   const OVERLOAD_BASE_DELAY_MS = 2e3;
   const OVERLOAD_MAX_DELAY_MS = 3e4;
+  const STREAM_FIRST_EVENT_TIMEOUT_MS = 45e3;
+  const STREAM_IDLE_TIMEOUT_MS = 3e4;
+  const STREAM_HARD_TIMEOUT_MS = 9e4;
+  const STREAM_OUTPUT_HARD_TIMEOUT_MS = 3e5;
+  const STREAM_THINKING_IDLE_TIMEOUT_MS = 3e5;
+  const STREAM_THINKING_HARD_TIMEOUT_MS = 6e5;
+  const NON_STREAMING_HARD_TIMEOUT_MS = 3e5;
   try {
     while (turn < maxTurns) {
       options.signal?.throwIfAborted();
       turn++;
+      let msgChars = 0;
+      for (const m of messages) {
+        if (typeof m.content === "string") msgChars += m.content.length;
+        else if (Array.isArray(m.content)) {
+          for (const p of m.content) {
+            if ("text" in p && typeof p.text === "string") msgChars += p.text.length;
+            if ("content" in p && typeof p.content === "string") msgChars += p.content.length;
+          }
+        }
+      }
+      diag("turn_start", {
+        turn,
+        messages: messages.length,
+        chars: msgChars,
+        provider: options.provider,
+        model: options.model
+      });
       if (firstTurn && options.getSteeringMessages) {
         const steering = await options.getSteeringMessages();
         if (steering && steering.length > 0) {
@@ -60,14 +102,64 @@ async function* agentLoop(messages, options) {
       }
       firstTurn = false;
       if (options.transformContext) {
+        diag("transform_start");
         const transformed = await options.transformContext(messages);
         if (transformed !== messages) {
+          diag("transform_compacted", {
+            before: messages.length,
+            after: transformed.length
+          });
           messages.length = 0;
           messages.push(...transformed);
         }
+        diag("transform_end");
       }
+      repairToolPairingAdjacent(messages);
       let response;
+      const streamController = new AbortController();
+      let idleTimer = null;
+      let hardTimer = null;
+      let idleTimedOut = false;
+      let streamEventCount = 0;
+      let lastEventTime = Date.now();
+      let streamCallStart = Date.now();
+      const eventTypeCounts = {};
+      let lastEventType = "";
+      let lastYieldEndTime = Date.now();
+      let maxConsumerLagMs = 0;
+      const forwardAbort = () => streamController.abort();
+      options.signal?.addEventListener("abort", forwardAbort, { once: true });
+      let hasReceivedEvent = false;
+      let hasReceivedThinking = false;
+      const resetIdleTimer = () => {
+        if (useNonStreamingFallback) return;
+        if (idleTimer) clearTimeout(idleTimer);
+        const timeoutMs = hasReceivedEvent ? STREAM_IDLE_TIMEOUT_MS : hasReceivedThinking ? STREAM_THINKING_IDLE_TIMEOUT_MS : STREAM_FIRST_EVENT_TIMEOUT_MS;
+        idleTimer = setTimeout(() => {
+          diag("idle_timeout_fired", {
+            events: streamEventCount,
+            sinceLastEventMs: Date.now() - lastEventTime,
+            lastEventType,
+            maxConsumerLagMs,
+            phase: hasReceivedEvent ? "mid_stream" : hasReceivedThinking ? "post_thinking" : "first_event",
+            eventTypes: eventTypeCounts
+          });
+          idleTimedOut = true;
+          streamController.abort();
+        }, timeoutMs);
+      };
+      let hardTimeoutMs = useNonStreamingFallback ? NON_STREAMING_HARD_TIMEOUT_MS : STREAM_HARD_TIMEOUT_MS;
+      hardTimer = setTimeout(() => {
+        diag("hard_timeout_fired", {
+          events: typeof streamEventCount !== "undefined" ? streamEventCount : 0,
+          nonStreaming: useNonStreamingFallback
+        });
+        idleTimedOut = true;
+        streamController.abort();
+      }, hardTimeoutMs);
       try {
+        diag("stream_call", { nonStreaming: useNonStreamingFallback });
+        streamCallStart = Date.now();
         const result = stream({
           provider: options.provider,
           model: options.model,
@@ -80,15 +172,65 @@ async function* agentLoop(messages, options) {
           thinking: options.thinking,
           apiKey: options.apiKey,
           baseUrl: options.baseUrl,
-          signal: options.signal,
+          signal: streamController.signal,
           accountId: options.accountId,
           cacheRetention: options.cacheRetention,
           compaction: options.compaction,
-          clearToolUses: options.clearToolUses
+          clearToolUses: options.clearToolUses,
+          // Flip to non-streaming fallback after repeated stream stalls.
+          ...useNonStreamingFallback ? { streaming: false } : {}
         });
+        diag("stream_created", { setupMs: Date.now() - streamCallStart });
         result.response.catch(() => {
         });
+        streamEventCount = 0;
+        hasReceivedEvent = false;
+        lastEventTime = Date.now();
+        streamCallStart = Date.now();
+        resetIdleTimer();
         for await (const event of result) {
+          const pullTime = Date.now();
+          const consumerLag = pullTime - lastYieldEndTime;
+          if (consumerLag > maxConsumerLagMs) maxConsumerLagMs = consumerLag;
+          streamEventCount++;
+          eventTypeCounts[event.type] = (eventTypeCounts[event.type] ?? 0) + 1;
+          lastEventType = event.type;
+          if ((event.type === "text_delta" || event.type === "server_toolcall" || event.type === "toolcall_delta") && !hasReceivedEvent) {
+            hasReceivedEvent = true;
+            if (hardTimer && hardTimeoutMs < STREAM_OUTPUT_HARD_TIMEOUT_MS) {
+              clearTimeout(hardTimer);
+              hardTimeoutMs = STREAM_OUTPUT_HARD_TIMEOUT_MS;
+              hardTimer = setTimeout(() => {
+                diag("hard_timeout_fired", { events: streamEventCount });
+                idleTimedOut = true;
+                streamController.abort();
+              }, hardTimeoutMs);
+            }
+          }
+          if (event.type === "thinking_delta" && !hasReceivedThinking) {
+            hasReceivedThinking = true;
+            if (hardTimer) clearTimeout(hardTimer);
+            hardTimeoutMs = STREAM_THINKING_HARD_TIMEOUT_MS;
+            hardTimer = setTimeout(() => {
+              diag("hard_timeout_fired", { events: streamEventCount });
+              idleTimedOut = true;
+              streamController.abort();
+            }, hardTimeoutMs);
+          }
+          const now = Date.now();
+          const gap = now - lastEventTime;
+          if (streamEventCount === 1) {
+            diag("first_event", { type: event.type, ttfMs: now - streamCallStart });
+          } else if (gap > 3e3) {
+            diag("slow_gap", {
+              type: event.type,
+              gapMs: gap,
+              eventNum: streamEventCount,
+              sinceStartMs: now - streamCallStart
+            });
+          }
+          lastEventTime = now;
+          resetIdleTimer();
           if (event.type === "text_delta") {
             yield { type: "text_delta", text: event.text };
           } else if (event.type === "thinking_delta") {
@@ -107,26 +249,36 @@ async function* agentLoop(messages, options) {
               resultType: event.resultType,
               data: event.data
             };
+          } else if (event.type === "toolcall_delta") {
+            yield {
+              type: "toolcall_delta",
+              chars: event.argsJson?.length ?? 0
+            };
           }
+          lastYieldEndTime = Date.now();
         }
+        diag("stream_done", {
+          events: streamEventCount,
+          totalMs: Date.now() - streamCallStart,
+          maxConsumerLagMs,
+          eventTypes: eventTypeCounts
+        });
         response = await result.response;
       } catch (err) {
-        if (overflowRetries < MAX_OVERFLOW_RETRIES && isContextOverflow(err) && options.transformContext) {
-          overflowRetries++;
-          yield {
-            type: "retry",
-            reason: "context_overflow",
-            attempt: overflowRetries,
-            maxAttempts: MAX_OVERFLOW_RETRIES,
-            delayMs: 0
-          };
-          const transformed = await options.transformContext(messages, { force: true });
-          if (transformed !== messages) {
-            messages.length = 0;
-            messages.push(...transformed);
-          }
-          turn--;
-          continue;
+        const errMsg = err instanceof Error ? err.message : String(err);
+        diag("stream_error", {
+          error: errMsg.slice(0, 200),
+          events: streamEventCount,
+          totalMs: Date.now() - streamCallStart,
+          idleTimedOut,
+          aborted: !!options.signal?.aborted,
+          eventTypes: eventTypeCounts,
+          provider: options.provider,
+          model: options.model
+        });
+        if (isContextOverflow(err)) {
+          yield { type: "error", error: err instanceof Error ? err : new Error(errMsg) };
+          throw err;
         }
         if (overloadRetries < MAX_OVERLOAD_RETRIES && isOverloaded(err)) {
           overloadRetries++;
@@ -134,6 +286,12 @@ async function* agentLoop(messages, options) {
             OVERLOAD_BASE_DELAY_MS * 2 ** (overloadRetries - 1),
             OVERLOAD_MAX_DELAY_MS
           );
+          diag("retry", {
+            reason: "overloaded",
+            attempt: overloadRetries,
+            maxAttempts: MAX_OVERLOAD_RETRIES,
+            delayMs
+          });
           yield {
             type: "retry",
             reason: "overloaded",
@@ -145,16 +303,91 @@ async function* agentLoop(messages, options) {
           turn--;
           continue;
         }
+        if (idleTimedOut && !options.signal?.aborted && stallRetries < MAX_STALL_RETRIES) {
+          stallRetries++;
+          if (!useNonStreamingFallback && stallRetries >= STALL_RETRIES_BEFORE_NON_STREAMING) {
+            useNonStreamingFallback = true;
+            diag("non_streaming_fallback_enabled", {
+              stallRetries,
+              provider: options.provider,
+              model: options.model
+            });
+          }
+          const delayMs = Math.min(STALL_DELAY_MS * 2 ** (stallRetries - 1), 8e3);
+          diag("retry", {
+            reason: "stream_stall",
+            attempt: stallRetries,
+            maxAttempts: MAX_STALL_RETRIES,
+            delayMs,
+            events: streamEventCount,
+            nonStreaming: useNonStreamingFallback
+          });
+          yield {
+            type: "retry",
+            reason: "stream_stall",
+            attempt: stallRetries,
+            maxAttempts: MAX_STALL_RETRIES,
+            delayMs,
+            silent: stallRetries <= 2
+          };
+          await new Promise((r) => setTimeout(r, delayMs));
+          turn--;
+          continue;
+        }
+        if (idleTimedOut && !options.signal?.aborted) {
+          diag("stall_exhausted", {
+            stallRetries: MAX_STALL_RETRIES,
+            provider: options.provider,
+            model: options.model
+          });
+          yield {
+            type: "error",
+            error: new Error(
+              `The API provider's stream stalled ${MAX_STALL_RETRIES} times \u2014 the provider may be experiencing capacity issues. Your conversation is preserved. Send another message to retry.`
+            )
+          };
+          break;
+        }
+        if (isToolPairingError(err) && !toolPairingRepaired) {
+          toolPairingRepaired = true;
+          diag("tool_pairing_repair", { error: errMsg.slice(0, 200) });
+          repairToolPairingAdjacent(messages);
+          turn--;
+          continue;
+        }
         if (isAbortError(err) || options.signal?.aborted) {
+          diag("aborted", { turn, provider: options.provider, model: options.model });
           break;
         }
+        diag("unhandled_error", {
+          error: errMsg.slice(0, 500),
+          turn,
+          provider: options.provider,
+          model: options.model
+        });
         throw err;
+      } finally {
+        if (idleTimer) clearTimeout(idleTimer);
+        if (hardTimer) clearTimeout(hardTimer);
+        options.signal?.removeEventListener("abort", forwardAbort);
       }
-      overflowRetries = 0;
       overloadRetries = 0;
-      if (response.usage.outputTokens === 0 && (response.message.content === "" || Array.isArray(response.message.content) && response.message.content.length === 0)) {
+      stallRetries = 0;
+      const contentArr = Array.isArray(response.message.content) ? response.message.content : null;
+      const hasActionableContent = response.message.content !== "" && contentArr !== null && contentArr.some(
+        (p) => p.type === "text" || p.type === "tool_call" || p.type === "server_tool_call"
+      );
+      if (!hasActionableContent) {
         if (emptyResponseRetries < MAX_EMPTY_RESPONSE_RETRIES) {
           emptyResponseRetries++;
+          diag("retry", {
+            reason: "empty_response",
+            attempt: emptyResponseRetries,
+            maxAttempts: MAX_EMPTY_RESPONSE_RETRIES,
+            provider: options.provider,
+            model: options.model,
+            contentTypes: contentArr?.map((p) => p.type).join(",") ?? "empty"
+          });
           yield {
             type: "retry",
             reason: "empty_response",
@@ -167,6 +400,7 @@ async function* agentLoop(messages, options) {
         }
       }
       emptyResponseRetries = 0;
+      useNonStreamingFallback = false;
       totalUsage.inputTokens += response.usage.inputTokens;
       totalUsage.outputTokens += response.usage.outputTokens;
       if (response.usage.cacheRead) {
@@ -414,6 +648,59 @@ function sanitizeOrphanedServerTools(messages) {
     break;
   }
 }
+function repairToolPairingAdjacent(messages) {
+  for (let i = 0; i < messages.length; i++) {
+    const msg = messages[i];
+    if (msg.role !== "assistant") continue;
+    if (typeof msg.content === "string" || !Array.isArray(msg.content)) continue;
+    const toolCallIds = msg.content.filter((p) => p.type === "tool_call").map((p) => p.id);
+    if (toolCallIds.length === 0) continue;
+    const next = messages[i + 1];
+    if (next?.role === "tool" && Array.isArray(next.content)) {
+      const existingIds = new Set(next.content.map((r) => r.toolCallId));
+      const missing = toolCallIds.filter((id) => !existingIds.has(id));
+      if (missing.length > 0) {
+        for (const id of missing) {
+          next.content.push({
+            type: "tool_result",
+            toolCallId: id,
+            content: "Tool execution was interrupted.",
+            isError: true
+          });
+        }
+      }
+    } else {
+      messages.splice(i + 1, 0, {
+        role: "tool",
+        content: toolCallIds.map((id) => ({
+          type: "tool_result",
+          toolCallId: id,
+          content: "Tool execution was interrupted.",
+          isError: true
+        }))
+      });
+    }
+  }
+  const toolCallIdSet = /* @__PURE__ */ new Set();
+  for (let i = 0; i < messages.length; i++) {
+    const msg = messages[i];
+    if (msg.role === "assistant" && Array.isArray(msg.content)) {
+      for (const p of msg.content) {
+        if (p.type === "tool_call") toolCallIdSet.add(p.id);
+      }
+    }
+    if (msg.role === "tool" && Array.isArray(msg.content)) {
+      const results = msg.content;
+      const filtered = results.filter((r) => toolCallIdSet.has(r.toolCallId));
+      if (filtered.length === 0) {
+        messages.splice(i, 1);
+        i--;
+      } else if (filtered.length < results.length) {
+        msg.content = filtered;
+      }
+    }
+  }
+}
 // src/agent.ts
 var AgentStream = class {
@@ -519,6 +806,7 @@ export {
   agentLoop,
   isAbortError,
   isBillingError,
-  isContextOverflow
+  isContextOverflow,
+  setStreamDiagnostic
 };
 //# sourceMappingURL=index.js.map