npm - @link-assistant/agent - Versions diffs - 0.12.0 → 0.12.3 - Mend

@link-assistant/agent 0.12.0 → 0.12.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/package.json +1 -1
package/src/flag/flag.ts +11 -0
package/src/index.js +61 -13
package/src/provider/provider.ts +220 -0
package/src/provider/retry-fetch.ts +363 -0
package/src/session/processor.ts +23 -2
package/src/session/prompt.ts +7 -7

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@link-assistant/agent",
-  "version": "0.12.0",
+  "version": "0.12.3",
   "description": "A minimal, public domain AI CLI agent compatible with OpenCode's JSON interface. Bun-only runtime.",
   "main": "src/index.js",
   "type": "module",

package/src/flag/flag.ts CHANGED Viewed

@@ -98,6 +98,17 @@ export namespace Flag {
     return val ? parseInt(val, 10) * 1000 : 1200000; // 20 minutes in ms
   }
+  // Minimum retry interval to prevent rapid retries (default: 30 seconds)
+  // This ensures we don't hammer the API with rapid retry attempts
+  // See: https://github.com/link-assistant/agent/issues/167
+  export function MIN_RETRY_INTERVAL(): number {
+    const val = getEnv(
+      'LINK_ASSISTANT_AGENT_MIN_RETRY_INTERVAL',
+      'AGENT_MIN_RETRY_INTERVAL'
+    );
+    return val ? parseInt(val, 10) * 1000 : 30000; // 30 seconds in ms
+  }
   // Stream timeout configuration
   // chunkMs: timeout between stream chunks - detects stalled streams (default: 2 minutes)
   // stepMs: timeout for each individual LLM step (default: 10 minutes)

package/src/index.js CHANGED Viewed

@@ -142,14 +142,56 @@ function readStdinWithTimeout(timeout = null) {
 /**
  * Parse model configuration from argv
+ * Supports both explicit provider/model format and short model names.
+ *
+ * Format examples:
+ * - "kilo/glm-5-free" -> uses kilo provider with glm-5-free model (explicit)
+ * - "opencode/kimi-k2.5-free" -> uses opencode provider (explicit)
+ * - "glm-5-free" -> resolved to kilo provider (unique free model)
+ * - "kimi-k2.5-free" -> resolved to opencode provider (shared model, opencode preferred)
+ *
  * @param {object} argv - Command line arguments
  * @returns {object} - { providerID, modelID }
  */
 async function parseModelConfig(argv) {
-  // Parse model argument (handle model IDs with slashes like groq/qwen/qwen3-32b)
-  const modelParts = argv.model.split('/');
-  let providerID = modelParts[0] || 'opencode';
-  let modelID = modelParts.slice(1).join('/') || 'kimi-k2.5-free';
+  const modelArg = argv.model;
+  let providerID;
+  let modelID;
+  // Check if model includes explicit provider prefix
+  if (modelArg.includes('/')) {
+    // Explicit provider/model format - respect user's choice
+    const modelParts = modelArg.split('/');
+    providerID = modelParts[0];
+    modelID = modelParts.slice(1).join('/');
+    // Validate that providerID and modelID are not empty
+    if (!providerID || !modelID) {
+      providerID = providerID || 'opencode';
+      modelID = modelID || 'kimi-k2.5-free';
+    }
+    Log.Default.info(() => ({
+      message: 'using explicit provider/model',
+      providerID,
+      modelID,
+    }));
+  } else {
+    // Short model name - resolve to appropriate provider
+    // Import Provider to use parseModelWithResolution
+    const { Provider } = await import('./provider/provider.ts');
+    const resolved = await Provider.parseModelWithResolution(modelArg);
+    providerID = resolved.providerID;
+    modelID = resolved.modelID;
+    Log.Default.info(() => ({
+      message: 'resolved short model name',
+      input: modelArg,
+      providerID,
+      modelID,
+    }));
+  }
   // Handle --use-existing-claude-oauth option
   // This reads OAuth credentials from ~/.claude/.credentials.json (Claude Code CLI)
@@ -175,22 +217,22 @@ async function parseModelConfig(argv) {
     // Set environment variable for the provider to use
     process.env.CLAUDE_CODE_OAUTH_TOKEN = creds.accessToken;
-    // If user specified a model, use it with claude-oauth provider
-    // If not, use claude-oauth/claude-sonnet-4-5 as default
+    // If user specified the default model (opencode/kimi-k2.5-free), switch to claude-oauth
+    // If user explicitly specified kilo or another provider, warn but respect their choice
     if (providerID === 'opencode' && modelID === 'kimi-k2.5-free') {
       providerID = 'claude-oauth';
       modelID = 'claude-sonnet-4-5';
     } else if (!['claude-oauth', 'anthropic'].includes(providerID)) {
-      // If user specified a different provider, warn them
+      // If user specified a different provider explicitly, warn them
       const compactJson = argv['compact-json'] === true;
       outputStatus(
         {
           type: 'warning',
-          message: `--use-existing-claude-oauth is set but model uses provider "${providerID}". Using OAuth credentials anyway.`,
+          message: `--use-existing-claude-oauth is set but model uses provider "${providerID}". Using specified provider.`,
         },
         compactJson
       );
-      providerID = 'claude-oauth';
+      // Don't override - respect user's explicit provider choice
     }
   }
@@ -257,8 +299,6 @@ async function runAgentMode(argv, request) {
     }));
   }
-  const { providerID, modelID } = await parseModelConfig(argv);
   // Validate and get JSON standard
   const jsonStandard = argv['json-standard'];
   if (!isValidJsonStandard(jsonStandard)) {
@@ -275,9 +315,14 @@ async function runAgentMode(argv, request) {
   // Logging is already initialized in middleware, no need to call Log.init() again
   // Wrap in Instance.provide for OpenCode infrastructure
+  // parseModelConfig must be called inside Instance.provide to access provider state
   await Instance.provide({
     directory: process.cwd(),
     fn: async () => {
+      // Parse model config inside Instance.provide context
+      // This allows parseModelWithResolution to access the provider state
+      const { providerID, modelID } = await parseModelConfig(argv);
       if (argv.server) {
         // SERVER MODE: Start server and communicate via HTTP
         await runServerMode(
@@ -330,8 +375,6 @@ async function runContinuousAgentMode(argv) {
     }));
   }
-  const { providerID, modelID } = await parseModelConfig(argv);
   // Validate and get JSON standard
   const jsonStandard = argv['json-standard'];
   if (!isValidJsonStandard(jsonStandard)) {
@@ -348,9 +391,14 @@ async function runContinuousAgentMode(argv) {
   const { systemMessage, appendSystemMessage } = await readSystemMessages(argv);
   // Wrap in Instance.provide for OpenCode infrastructure
+  // parseModelConfig must be called inside Instance.provide to access provider state
   await Instance.provide({
     directory: process.cwd(),
     fn: async () => {
+      // Parse model config inside Instance.provide context
+      // This allows parseModelWithResolution to access the provider state
+      const { providerID, modelID } = await parseModelConfig(argv);
       if (argv.server) {
         // SERVER MODE: Start server and communicate via HTTP
         await runContinuousServerMode(

package/src/provider/provider.ts CHANGED Viewed

@@ -16,6 +16,7 @@ import { Flag } from '../flag/flag';
 import { iife } from '../util/iife';
 import { createEchoModel } from './echo';
 import { createCacheModel } from './cache';
+import { RetryFetch } from './retry-fetch';
 export namespace Provider {
   const log = Log.create({ service: 'provider' });
@@ -1143,6 +1144,16 @@ export namespace Provider {
           });
         };
       }
+      // Wrap fetch with retry logic for rate limit handling (HTTP 429)
+      // This ensures the agent's time-based retry (7-week timeout) is respected
+      // instead of the AI SDK's fixed retry count (3 attempts)
+      // See: https://github.com/link-assistant/agent/issues/167
+      const existingFetch = options['fetch'] ?? fetch;
+      options['fetch'] = RetryFetch.wrap(existingFetch, {
+        sessionID: provider.id,
+      });
       const fn = mod[Object.keys(mod).find((key) => key.startsWith('create'))!];
       const loaded = fn({
         name: provider.id,
@@ -1373,6 +1384,215 @@ export namespace Provider {
     };
   }
+  /**
+   * Resolve a short model name (without provider prefix) to the appropriate provider.
+   * This function finds which provider should handle a model when no explicit provider is specified.
+   *
+   * Priority for free models:
+   * 1. If model is uniquely available in one provider, use that provider
+   * 2. If model is available in multiple providers, prioritize based on free model availability:
+   *    - kilo: glm-5-free, glm-4.7-free, minimax-m2.1-free, giga-potato-free (unique to Kilo)
+   *    - opencode: minimax-m2.5-free, big-pickle, gpt-5-nano (unique to OpenCode)
+   *    - SHARED: kimi-k2.5-free (available in both)
+   * 3. For shared models like kimi-k2.5-free, prefer OpenCode first, then fall back to Kilo on rate limit
+   *
+   * @param modelID - Short model name without provider prefix
+   * @returns Provider ID that should handle this model, or undefined if not found
+   */
+  export async function resolveShortModelName(
+    modelID: string
+  ): Promise<{ providerID: string; modelID: string } | undefined> {
+    const s = await state();
+    // Define model-to-provider mappings for free models
+    // Models unique to Kilo (GLM models from Z.AI are only free on Kilo)
+    const kiloUniqueModels = [
+      'glm-5-free',
+      'glm-4.7-free',
+      'giga-potato-free',
+      'trinity-large-preview',
+    ];
+    // Check if it's a Kilo-unique model
+    if (kiloUniqueModels.includes(modelID)) {
+      const kiloProvider = s.providers['kilo'];
+      if (kiloProvider && kiloProvider.info.models[modelID]) {
+        log.info(() => ({
+          message: 'resolved short model name to kilo (unique)',
+          modelID,
+        }));
+        return { providerID: 'kilo', modelID };
+      }
+    }
+    // Check if model exists in any provider
+    const matchingProviders: string[] = [];
+    for (const [providerID, provider] of Object.entries(s.providers)) {
+      if (provider.info.models[modelID]) {
+        matchingProviders.push(providerID);
+      }
+    }
+    if (matchingProviders.length === 0) {
+      return undefined;
+    }
+    if (matchingProviders.length === 1) {
+      const providerID = matchingProviders[0];
+      log.info(() => ({
+        message: 'resolved short model name (single match)',
+        modelID,
+        providerID,
+      }));
+      return { providerID, modelID };
+    }
+    // Multiple providers have this model - prefer OpenCode for shared free models
+    // This follows the convention that opencode is the primary free provider
+    if (matchingProviders.includes('opencode')) {
+      log.info(() => ({
+        message: 'resolved short model name to opencode (multiple providers)',
+        modelID,
+        availableProviders: matchingProviders,
+      }));
+      return { providerID: 'opencode', modelID };
+    }
+    // Fallback to first matching provider
+    const providerID = matchingProviders[0];
+    log.info(() => ({
+      message: 'resolved short model name (fallback)',
+      modelID,
+      providerID,
+      availableProviders: matchingProviders,
+    }));
+    return { providerID, modelID };
+  }
+  /**
+   * Parse a model string that may or may not include a provider prefix.
+   * If no provider is specified, attempts to resolve the short model name to the appropriate provider.
+   *
+   * Examples:
+   * - "kilo/glm-5-free" -> { providerID: "kilo", modelID: "glm-5-free" }
+   * - "glm-5-free" -> { providerID: "kilo", modelID: "glm-5-free" } (resolved)
+   * - "kimi-k2.5-free" -> { providerID: "opencode", modelID: "kimi-k2.5-free" } (resolved)
+   *
+   * @param model - Model string with or without provider prefix
+   * @returns Parsed provider ID and model ID
+   */
+  export async function parseModelWithResolution(
+    model: string
+  ): Promise<{ providerID: string; modelID: string }> {
+    // Check if model includes a provider prefix
+    if (model.includes('/')) {
+      // Explicit provider specified - use it directly
+      return parseModel(model);
+    }
+    // No provider prefix - try to resolve the short model name
+    const resolved = await resolveShortModelName(model);
+    if (resolved) {
+      return resolved;
+    }
+    // Unable to resolve - fall back to default behavior (opencode provider)
+    log.warn(() => ({
+      message: 'unable to resolve short model name, using opencode as default',
+      modelID: model,
+    }));
+    return {
+      providerID: 'opencode',
+      modelID: model,
+    };
+  }
+  /**
+   * Defines models that are available in multiple free providers.
+   * When one provider hits rate limits, the system can try an alternative.
+   *
+   * Note: This is only used for models without explicit provider specification.
+   * If user specifies "kilo/kimi-k2.5-free", no fallback will occur.
+   */
+  const SHARED_FREE_MODELS: Record<string, string[]> = {
+    // kimi-k2.5-free is available in both OpenCode and Kilo
+    'kimi-k2.5-free': ['opencode', 'kilo'],
+    // Note: minimax-m2.1-free is Kilo only, minimax-m2.5-free is OpenCode only
+    // They are different model versions, not shared
+  };
+  /**
+   * Get alternative providers for a model when the primary provider fails (e.g., rate limited).
+   * This function returns a list of alternative providers that offer the same model.
+   *
+   * Note: This only returns alternatives for models without explicit provider specification.
+   * If the original request had an explicit provider (like "kilo/kimi-k2.5-free"), this returns empty array.
+   *
+   * @param modelID - The model ID to find alternatives for
+   * @param failedProviderID - The provider that failed
+   * @param wasExplicitProvider - Whether the user explicitly specified the provider
+   * @returns Array of alternative provider IDs that can serve this model
+   */
+  export async function getAlternativeProviders(
+    modelID: string,
+    failedProviderID: string,
+    wasExplicitProvider: boolean
+  ): Promise<string[]> {
+    // If the user explicitly specified a provider, don't offer alternatives
+    if (wasExplicitProvider) {
+      log.info(() => ({
+        message: 'no alternative providers (explicit provider specified)',
+        modelID,
+        failedProviderID,
+      }));
+      return [];
+    }
+    // Check if this is a shared model
+    const sharedProviders = SHARED_FREE_MODELS[modelID];
+    if (!sharedProviders) {
+      // Not a shared model, no alternatives
+      return [];
+    }
+    // Get alternative providers (excluding the failed one)
+    const s = await state();
+    const alternatives = sharedProviders.filter(
+      (p) => p !== failedProviderID && s.providers[p]
+    );
+    if (alternatives.length > 0) {
+      log.info(() => ({
+        message: 'found alternative providers for rate-limited model',
+        modelID,
+        failedProviderID,
+        alternatives,
+      }));
+    }
+    return alternatives;
+  }
+  /**
+   * Checks if an error indicates a rate limit issue.
+   * @param error - The error to check
+   * @returns true if the error indicates a rate limit
+   */
+  export function isRateLimitError(error: unknown): boolean {
+    if (!(error instanceof Error)) return false;
+    const message = error.message.toLowerCase();
+    const name = error.name.toLowerCase();
+    return (
+      message.includes('rate limit') ||
+      message.includes('ratelimit') ||
+      message.includes('too many requests') ||
+      message.includes('429') ||
+      name.includes('ratelimit')
+    );
+  }
   export const ModelNotFoundError = NamedError.create(
     'ProviderModelNotFoundError',
     z.object({

package/src/provider/retry-fetch.ts ADDED Viewed

@@ -0,0 +1,363 @@
+import { Log } from '../util/log';
+import { Flag } from '../flag/flag';
+/**
+ * Custom fetch wrapper that handles rate limits (HTTP 429) using time-based retry logic.
+ *
+ * This wrapper intercepts 429 responses at the HTTP level before the AI SDK's internal
+ * retry mechanism can interfere. It respects:
+ * - retry-after headers (both seconds and HTTP date formats)
+ * - retry-after-ms header for millisecond precision
+ * - AGENT_RETRY_TIMEOUT for global time-based retry limit
+ * - AGENT_MAX_RETRY_DELAY for maximum single retry wait time
+ *
+ * Problem solved:
+ * The AI SDK's internal retry uses a fixed count (default 3 attempts) and ignores
+ * retry-after headers. When providers return long retry-after values (e.g., 64 minutes),
+ * the SDK exhausts its retries before the agent can properly wait.
+ *
+ * Solution:
+ * By wrapping fetch, we handle rate limits at the HTTP layer with time-based retries,
+ * ensuring the agent's 7-week global timeout is respected.
+ *
+ * @see https://github.com/link-assistant/agent/issues/167
+ * @see https://github.com/vercel/ai/issues/12585
+ */
+export namespace RetryFetch {
+  const log = Log.create({ service: 'retry-fetch' });
+  // Retry configuration constants matching SessionRetry
+  const RETRY_INITIAL_DELAY = 2000;
+  const RETRY_BACKOFF_FACTOR = 2;
+  const RETRY_MAX_DELAY_NO_HEADERS = 30_000;
+  // Minimum retry interval to prevent rapid retries (default: 30 seconds)
+  // Can be configured via AGENT_MIN_RETRY_INTERVAL env var
+  function getMinRetryInterval(): number {
+    return Flag.MIN_RETRY_INTERVAL();
+  }
+  /**
+   * Add jitter to a delay value to prevent thundering herd.
+   * Adds 0-10% random variation to the delay.
+   */
+  function addJitter(delay: number): number {
+    const jitter = Math.random() * 0.1 * delay;
+    return Math.round(delay + jitter);
+  }
+  /**
+   * Parse retry-after value from response headers and return delay in milliseconds.
+   * Returns null if no valid retry-after header is found.
+   */
+  function parseRetryAfterHeader(headers: Headers): number | null {
+    // Check for retry-after-ms header first (milliseconds)
+    const retryAfterMs = headers.get('retry-after-ms');
+    if (retryAfterMs) {
+      const parsedMs = Number.parseFloat(retryAfterMs);
+      if (!Number.isNaN(parsedMs) && parsedMs > 0) {
+        log.info(() => ({
+          message: 'parsed retry-after-ms header',
+          headerValue: parsedMs,
+        }));
+        return parsedMs;
+      }
+    }
+    // Check for retry-after header (seconds or HTTP date)
+    const retryAfter = headers.get('retry-after');
+    if (retryAfter) {
+      const parsedSeconds = Number.parseFloat(retryAfter);
+      if (!Number.isNaN(parsedSeconds) && parsedSeconds > 0) {
+        const delayMs = Math.ceil(parsedSeconds * 1000);
+        log.info(() => ({
+          message: 'parsed retry-after header (seconds)',
+          headerValue: parsedSeconds,
+          delayMs,
+        }));
+        return delayMs;
+      }
+      // Try parsing as HTTP date format
+      const parsed = Date.parse(retryAfter) - Date.now();
+      if (!Number.isNaN(parsed) && parsed > 0) {
+        log.info(() => ({
+          message: 'parsed retry-after header (date)',
+          headerValue: retryAfter,
+          delayMs: parsed,
+        }));
+        return Math.ceil(parsed);
+      }
+    }
+    return null;
+  }
+  /**
+   * Calculate retry delay based on headers and attempt number.
+   * Returns null if retry-after exceeds the global retry timeout.
+   */
+  function calculateRetryDelay(
+    headers: Headers,
+    attempt: number,
+    maxRetryTimeout: number,
+    maxBackoffDelay: number
+  ): number | null {
+    const retryAfterMs = parseRetryAfterHeader(headers);
+    const minInterval = getMinRetryInterval();
+    if (retryAfterMs !== null) {
+      // Check if retry-after exceeds the maximum retry timeout
+      if (retryAfterMs > maxRetryTimeout) {
+        log.error(() => ({
+          message:
+            'retry-after exceeds maximum retry timeout, will not retry at fetch level',
+          retryAfterMs,
+          maxRetryTimeout,
+          retryAfterHours: (retryAfterMs / 1000 / 3600).toFixed(2),
+          maxRetryTimeoutHours: (maxRetryTimeout / 1000 / 3600).toFixed(2),
+        }));
+        return null;
+      }
+      // Use exact retry-after time, but ensure minimum interval
+      const delay = Math.max(retryAfterMs, minInterval);
+      log.info(() => ({
+        message: 'using retry-after value',
+        retryAfterMs,
+        delay,
+        minInterval,
+      }));
+      return addJitter(delay);
+    }
+    // No retry-after header - use exponential backoff
+    const backoffDelay = Math.min(
+      RETRY_INITIAL_DELAY * Math.pow(RETRY_BACKOFF_FACTOR, attempt - 1),
+      maxBackoffDelay
+    );
+    const delay = Math.max(backoffDelay, minInterval);
+    log.info(() => ({
+      message: 'no retry-after header, using exponential backoff',
+      attempt,
+      backoffDelay,
+      delay,
+      minInterval,
+      maxBackoffDelay,
+    }));
+    return addJitter(delay);
+  }
+  /**
+   * Sleep for the specified duration, but respect abort signals.
+   */
+  async function sleep(ms: number, signal?: AbortSignal): Promise<void> {
+    return new Promise((resolve, reject) => {
+      const timeout = setTimeout(resolve, ms);
+      if (signal) {
+        signal.addEventListener(
+          'abort',
+          () => {
+            clearTimeout(timeout);
+            reject(new DOMException('Aborted', 'AbortError'));
+          },
+          { once: true }
+        );
+      }
+    });
+  }
+  /**
+   * Check if an error is retryable (network issues, temporary failures).
+   */
+  function isRetryableError(error: unknown): boolean {
+    if (!(error instanceof Error)) return false;
+    // Socket/connection errors (Bun has known timeout issues)
+    // See: https://github.com/oven-sh/bun/issues/14439
+    if (
+      error.message.includes('ConnectionClosed') ||
+      error.message.includes('ECONNRESET') ||
+      error.message.includes('ECONNREFUSED') ||
+      error.message.includes('socket') ||
+      error.message.includes('connection')
+    ) {
+      return true;
+    }
+    return false;
+  }
+  export type RetryFetchOptions = {
+    /**
+     * Original fetch function to wrap. Defaults to global fetch.
+     */
+    baseFetch?: typeof fetch;
+    /**
+     * Session ID for logging purposes.
+     */
+    sessionID?: string;
+  };
+  /**
+   * Create a fetch function that handles rate limits with time-based retry logic.
+   *
+   * This wrapper:
+   * 1. Intercepts HTTP 429 responses
+   * 2. Parses retry-after headers
+   * 3. Waits for the specified duration (respecting global timeout)
+   * 4. Retries the request
+   *
+   * If retry-after exceeds AGENT_RETRY_TIMEOUT, the original 429 response is returned
+   * to let higher-level error handling take over.
+   *
+   * @param options Configuration options
+   * @returns A fetch function with rate limit retry handling
+   */
+  export function create(options: RetryFetchOptions = {}): typeof fetch {
+    const baseFetch = options.baseFetch ?? fetch;
+    const sessionID = options.sessionID ?? 'unknown';
+    return async function retryFetch(
+      input: RequestInfo | URL,
+      init?: RequestInit
+    ): Promise<Response> {
+      let attempt = 0;
+      const startTime = Date.now();
+      const maxRetryTimeout = Flag.RETRY_TIMEOUT() * 1000;
+      const maxBackoffDelay = Flag.MAX_RETRY_DELAY();
+      while (true) {
+        attempt++;
+        let response: Response;
+        try {
+          response = await baseFetch(input, init);
+        } catch (error) {
+          // Check if it's a retryable network error
+          if (isRetryableError(error)) {
+            const elapsed = Date.now() - startTime;
+            if (elapsed >= maxRetryTimeout) {
+              log.warn(() => ({
+                message:
+                  'network error retry timeout exceeded, re-throwing error',
+                sessionID,
+                elapsed,
+                maxRetryTimeout,
+                error: (error as Error).message,
+              }));
+              throw error;
+            }
+            // Use exponential backoff for network errors
+            const delay = Math.min(
+              2000 * Math.pow(2, attempt - 1),
+              maxBackoffDelay
+            );
+            log.info(() => ({
+              message: 'network error, retrying',
+              sessionID,
+              attempt,
+              delay,
+              error: (error as Error).message,
+            }));
+            await sleep(delay, init?.signal ?? undefined);
+            continue;
+          }
+          throw error;
+        }
+        // Only handle rate limit errors (429)
+        if (response.status !== 429) {
+          return response;
+        }
+        // Check if we're within the global retry timeout
+        const elapsed = Date.now() - startTime;
+        if (elapsed >= maxRetryTimeout) {
+          log.warn(() => ({
+            message: 'retry timeout exceeded in fetch wrapper, returning 429',
+            sessionID,
+            elapsed,
+            maxRetryTimeout,
+          }));
+          return response; // Let higher-level handling take over
+        }
+        // Calculate retry delay
+        const delay = calculateRetryDelay(
+          response.headers,
+          attempt,
+          maxRetryTimeout - elapsed, // Remaining time
+          maxBackoffDelay
+        );
+        // If delay is null, retry-after exceeds timeout - return response
+        if (delay === null) {
+          log.warn(() => ({
+            message:
+              'retry-after exceeds remaining timeout, returning 429 response',
+            sessionID,
+            elapsed,
+            remainingTimeout: maxRetryTimeout - elapsed,
+          }));
+          return response;
+        }
+        // Check if delay would exceed remaining timeout
+        if (elapsed + delay >= maxRetryTimeout) {
+          log.warn(() => ({
+            message: 'delay would exceed retry timeout, returning 429 response',
+            sessionID,
+            elapsed,
+            delay,
+            maxRetryTimeout,
+          }));
+          return response;
+        }
+        log.info(() => ({
+          message: 'rate limited, will retry',
+          sessionID,
+          attempt,
+          delay,
+          delayMinutes: (delay / 1000 / 60).toFixed(2),
+          elapsed,
+          remainingTimeout: maxRetryTimeout - elapsed,
+        }));
+        // Wait before retrying
+        try {
+          await sleep(delay, init?.signal ?? undefined);
+        } catch {
+          // Aborted - return the last response
+          log.info(() => ({
+            message: 'retry sleep aborted, returning last response',
+            sessionID,
+          }));
+          return response;
+        }
+      }
+    };
+  }
+  /**
+   * Wrap an existing custom fetch (e.g., OAuth fetch) with retry logic.
+   *
+   * This allows composing multiple fetch wrappers while maintaining retry handling.
+   *
+   * @param customFetch The custom fetch function to wrap
+   * @param options Configuration options
+   * @returns A fetch function with both custom logic and retry handling
+   */
+  export function wrap(
+    customFetch: typeof fetch,
+    options: Omit<RetryFetchOptions, 'baseFetch'> = {}
+  ): typeof fetch {
+    return create({
+      ...options,
+      baseFetch: customFetch,
+    });
+  }
+}

package/src/session/processor.ts CHANGED Viewed

@@ -1,6 +1,11 @@
 import type { ModelsDev } from '../provider/models';
 import { MessageV2 } from './message-v2';
-import { type StreamTextResult, type Tool as AITool, APICallError } from 'ai';
+import {
+  type StreamTextResult,
+  type Tool as AITool,
+  APICallError,
+  JSONParseError,
+} from 'ai';
 import { Log } from '../util/log';
 import { Identifier } from '../id/id';
 import { Session } from '.';
@@ -205,6 +210,22 @@ export namespace SessionProcessor {
                   break;
                 }
                 case 'error':
+                  // Skip stream parse errors (malformed SSE from gateway/provider)
+                  // The AI SDK emits these as error events but continues the stream.
+                  // Following OpenAI Codex pattern: log and skip bad events.
+                  // See: https://github.com/link-assistant/agent/issues/169
+                  if (JSONParseError.isInstance(value.error)) {
+                    log.warn(() => ({
+                      message:
+                        'skipping malformed SSE event (stream parse error)',
+                      errorName: (value.error as Error)?.name,
+                      errorMessage: (value.error as Error)?.message?.substring(
+                        0,
+                        200
+                      ),
+                    }));
+                    continue;
+                  }
                   throw value.error;
                 case 'start-step':
@@ -364,7 +385,7 @@ export namespace SessionProcessor {
               providerID: input.providerID,
             });
-            // Check if error is retryable (APIError, SocketConnectionError, or TimeoutError)
+            // Check if error is retryable (APIError, SocketConnectionError, TimeoutError)
             const isRetryableAPIError =
               error?.name === 'APIError' && error.data.isRetryable;
             const isRetryableSocketError =

package/src/session/prompt.ts CHANGED Viewed

@@ -298,18 +298,18 @@ export namespace SessionPrompt {
           lastUser.model.modelID
         );
       } catch (error) {
-        log.warn(() => ({
+        // When an explicit provider is specified, do NOT silently fall back to default
+        // This ensures user's explicit choice is respected
+        // If the user wants a fallback, they should not specify a provider
+        log.error(() => ({
           message:
-            'Failed to initialize specified model, falling back to default model',
+            'Failed to initialize specified model - NOT falling back to default (explicit provider specified)',
           providerID: lastUser.model.providerID,
           modelID: lastUser.model.modelID,
           error: error instanceof Error ? error.message : String(error),
         }));
-        const defaultModel = await Provider.defaultModel();
-        model = await Provider.getModel(
-          defaultModel.providerID,
-          defaultModel.modelID
-        );
+        // Re-throw the error so it can be handled by the caller
+        throw error;
       }
       const task = tasks.pop();