npm - @link-assistant/agent - Versions diffs - 0.13.1 → 0.13.4 - Mend

@link-assistant/agent 0.13.1 → 0.13.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json +1 -1
package/src/provider/models.ts +64 -1
package/src/provider/retry-fetch.ts +136 -14

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@link-assistant/agent",
-  "version": "0.13.1",
+  "version": "0.13.4",
   "description": "A minimal, public domain AI CLI agent compatible with OpenCode's JSON interface. Bun-only runtime.",
   "main": "src/index.js",
   "type": "module",

package/src/provider/models.ts CHANGED Viewed

@@ -67,11 +67,74 @@ export namespace ModelsDev {
   export type Provider = z.infer<typeof Provider>;
+  /**
+   * Cache staleness threshold in milliseconds (1 hour).
+   * If the cache is older than this, we await the refresh before using the data.
+   */
+  const CACHE_STALE_THRESHOLD_MS = 60 * 60 * 1000;
+  /**
+   * Get the models database, refreshing from models.dev if needed.
+   *
+   * This function handles cache staleness properly:
+   * - If cache doesn't exist: await refresh to ensure fresh data
+   * - If cache is stale (> 1 hour old): await refresh to ensure up-to-date models
+   * - If cache is fresh: trigger background refresh but use cached data immediately
+   *
+   * This prevents ProviderModelNotFoundError when:
+   * - User runs agent for the first time (no cache)
+   * - User has outdated cache missing new models like kimi-k2.5-free
+   *
+   * @see https://github.com/link-assistant/agent/issues/175
+   */
   export async function get() {
-    refresh();
     const file = Bun.file(filepath);
+    // Check if cache exists and get its modification time
+    const exists = await file.exists();
+    if (!exists) {
+      // No cache - must await refresh to get initial data
+      log.info(() => ({
+        message: 'no cache found, awaiting refresh',
+        path: filepath,
+      }));
+      await refresh();
+    } else {
+      // Check if cache is stale
+      const stats = await file.stat().catch(() => null);
+      const mtime = stats?.mtime?.getTime() ?? 0;
+      const isStale = Date.now() - mtime > CACHE_STALE_THRESHOLD_MS;
+      if (isStale) {
+        // Stale cache - await refresh to get updated model list
+        log.info(() => ({
+          message: 'cache is stale, awaiting refresh',
+          path: filepath,
+          age: Date.now() - mtime,
+          threshold: CACHE_STALE_THRESHOLD_MS,
+        }));
+        await refresh();
+      } else {
+        // Fresh cache - trigger background refresh but don't wait
+        log.info(() => ({
+          message: 'cache is fresh, triggering background refresh',
+          path: filepath,
+          age: Date.now() - mtime,
+        }));
+        refresh();
+      }
+    }
+    // Now read the cache file
     const result = await file.json().catch(() => {});
     if (result) return result as Record<string, Provider>;
+    // Fallback to bundled data if cache read failed
+    log.warn(() => ({
+      message: 'cache read failed, using bundled data',
+      path: filepath,
+    }));
     const json = await data();
     return JSON.parse(json) as Record<string, Provider>;
   }

package/src/provider/retry-fetch.ts CHANGED Viewed

@@ -20,7 +20,12 @@ import { Flag } from '../flag/flag';
  * By wrapping fetch, we handle rate limits at the HTTP layer with time-based retries,
  * ensuring the agent's 7-week global timeout is respected.
  *
+ * Important: Rate limit waits use ISOLATED AbortControllers that are NOT subject to
+ * provider/stream timeouts. This prevents long rate limit waits (e.g., 15 hours) from
+ * being aborted by short provider timeouts (e.g., 5 minutes).
+ *
  * @see https://github.com/link-assistant/agent/issues/167
+ * @see https://github.com/link-assistant/agent/issues/183
  * @see https://github.com/vercel/ai/issues/12585
  */
@@ -150,23 +155,109 @@ export namespace RetryFetch {
   /**
    * Sleep for the specified duration, but respect abort signals.
+   * Properly cleans up event listeners to prevent memory leaks.
    */
   async function sleep(ms: number, signal?: AbortSignal): Promise<void> {
     return new Promise((resolve, reject) => {
+      // Check if already aborted before starting
+      if (signal?.aborted) {
+        reject(new DOMException('Aborted', 'AbortError'));
+        return;
+      }
       const timeout = setTimeout(resolve, ms);
       if (signal) {
-        signal.addEventListener(
-          'abort',
-          () => {
-            clearTimeout(timeout);
-            reject(new DOMException('Aborted', 'AbortError'));
-          },
-          { once: true }
-        );
+        const abortHandler = () => {
+          clearTimeout(timeout);
+          reject(new DOMException('Aborted', 'AbortError'));
+        };
+        signal.addEventListener('abort', abortHandler, { once: true });
+        // Clean up the listener when the timeout completes normally
+        // This prevents memory leaks on long-running processes
+        const originalResolve = resolve;
+        // eslint-disable-next-line no-param-reassign
+        resolve = (value) => {
+          signal.removeEventListener('abort', abortHandler);
+          originalResolve(value);
+        };
       }
     });
   }
+  /**
+   * Create an isolated AbortController for rate limit waits.
+   *
+   * This controller is NOT connected to the request's AbortSignal, so it won't be
+   * affected by provider timeouts (default 5 minutes) or stream timeouts.
+   * It only respects the global AGENT_RETRY_TIMEOUT.
+   *
+   * However, it DOES check the user's abort signal periodically (every 10 seconds)
+   * to allow user cancellation during long rate limit waits.
+   *
+   * This solves issue #183 where long rate limit waits (e.g., 15 hours) were being
+   * aborted by the provider timeout (5 minutes).
+   *
+   * @param remainingTimeout Maximum time allowed for this wait (ms)
+   * @param userSignal Optional user abort signal to check periodically
+   * @returns An object with the signal and a cleanup function
+   * @see https://github.com/link-assistant/agent/issues/183
+   */
+  function createIsolatedRateLimitSignal(
+    remainingTimeout: number,
+    userSignal?: AbortSignal
+  ): {
+    signal: AbortSignal;
+    cleanup: () => void;
+  } {
+    const controller = new AbortController();
+    const timers: NodeJS.Timeout[] = [];
+    // Set a timeout based on the global AGENT_RETRY_TIMEOUT (not provider timeout)
+    const globalTimeoutId = setTimeout(() => {
+      controller.abort(
+        new DOMException(
+          'Rate limit wait exceeded global timeout',
+          'TimeoutError'
+        )
+      );
+    }, remainingTimeout);
+    timers.push(globalTimeoutId);
+    // Periodically check if user canceled (every 10 seconds)
+    // This allows user cancellation during long rate limit waits
+    // without being affected by provider timeouts
+    if (userSignal) {
+      const checkUserCancellation = () => {
+        if (userSignal.aborted) {
+          controller.abort(
+            new DOMException(
+              'User canceled during rate limit wait',
+              'AbortError'
+            )
+          );
+        }
+      };
+      // Check immediately and then every 10 seconds
+      checkUserCancellation();
+      const intervalId = setInterval(checkUserCancellation, 10_000);
+      timers.push(intervalId as unknown as NodeJS.Timeout);
+    }
+    return {
+      signal: controller.signal,
+      cleanup: () => {
+        for (const timer of timers) {
+          clearTimeout(timer);
+          clearInterval(timer as unknown as NodeJS.Timeout);
+        }
+      },
+    };
+  }
   /**
    * Check if an error is retryable (network issues, temporary failures).
    */
@@ -317,26 +408,57 @@ export namespace RetryFetch {
           return response;
         }
+        const remainingTimeout = maxRetryTimeout - elapsed;
         log.info(() => ({
           message: 'rate limited, will retry',
           sessionID,
           attempt,
           delay,
           delayMinutes: (delay / 1000 / 60).toFixed(2),
+          delayHours: (delay / 1000 / 3600).toFixed(2),
           elapsed,
-          remainingTimeout: maxRetryTimeout - elapsed,
+          remainingTimeout,
+          remainingTimeoutHours: (remainingTimeout / 1000 / 3600).toFixed(2),
+          isolatedSignal: true, // Indicates we're using isolated signal for this wait
         }));
-        // Wait before retrying
+        // Wait before retrying using ISOLATED signal
+        // This is critical for issue #183: Rate limit waits can be hours long (e.g., 15 hours),
+        // but provider timeouts are typically 5 minutes. By using an isolated AbortController
+        // that only respects AGENT_RETRY_TIMEOUT, we prevent the provider timeout from
+        // aborting long rate limit waits.
+        //
+        // The isolated signal periodically checks the user's abort signal (every 10 seconds)
+        // to allow user cancellation during long waits.
+        const { signal: isolatedSignal, cleanup } =
+          createIsolatedRateLimitSignal(
+            remainingTimeout,
+            init?.signal ?? undefined
+          );
         try {
-          await sleep(delay, init?.signal ?? undefined);
-        } catch {
-          // Aborted - return the last response
+          await sleep(delay, isolatedSignal);
+        } catch (sleepError) {
+          // Check if the original request was aborted (user cancellation)
+          // In that case, we should stop retrying
+          if (init?.signal?.aborted) {
+            log.info(() => ({
+              message: 'rate limit wait aborted by user cancellation',
+              sessionID,
+            }));
+            return response;
+          }
+          // Otherwise, it was the isolated timeout - log and return
           log.info(() => ({
-            message: 'retry sleep aborted, returning last response',
+            message: 'rate limit wait exceeded global timeout',
             sessionID,
+            sleepError: String(sleepError),
           }));
           return response;
+        } finally {
+          cleanup();
         }
       }
     };