npm - llmist - Versions diffs - 15.3.0 → 15.4.1 - Mend

llmist 15.3.0 → 15.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.cjs CHANGED Viewed

@@ -1326,24 +1326,23 @@ function isKnownModelPattern(model) {
   return KNOWN_MODEL_PATTERNS.some((pattern) => pattern.test(model));
 }
 function resolveModel(model, options = {}) {
-  if (model.includes(":")) {
-    return model;
-  }
   const normalized = model.toLowerCase();
   if (MODEL_ALIASES[normalized]) {
     return MODEL_ALIASES[normalized];
   }
-  const modelLower = model.toLowerCase();
-  if (modelLower.startsWith("gpt")) {
+  if (model.includes(":")) {
+    return model;
+  }
+  if (normalized.startsWith("gpt")) {
     return `openai:${model}`;
   }
-  if (modelLower.startsWith("claude")) {
+  if (normalized.startsWith("claude")) {
     return `anthropic:${model}`;
   }
-  if (modelLower.startsWith("gemini")) {
+  if (normalized.startsWith("gemini")) {
     return `gemini:${model}`;
   }
-  if (modelLower.match(/^o\d/)) {
+  if (normalized.match(/^o\d/)) {
     return `openai:${model}`;
   }
   if (!isKnownModelPattern(model)) {
@@ -1408,7 +1407,16 @@ var init_model_shortcuts = __esm({
       "gemini-flash": "gemini:gemini-2.5-flash",
       "flash-lite": "gemini:gemini-2.5-flash-lite",
       "gemini-pro": "gemini:gemini-3-pro-preview",
-      pro: "gemini:gemini-3-pro-preview"
+      pro: "gemini:gemini-3-pro-preview",
+      // OpenRouter aliases (or: prefix for short)
+      "or:sonnet": "openrouter:anthropic/claude-sonnet-4-5",
+      "or:opus": "openrouter:anthropic/claude-opus-4-5",
+      "or:haiku": "openrouter:anthropic/claude-haiku-4-5",
+      "or:gpt4o": "openrouter:openai/gpt-4o",
+      "or:gpt5": "openrouter:openai/gpt-5.2",
+      "or:flash": "openrouter:google/gemini-2.5-flash",
+      "or:llama": "openrouter:meta-llama/llama-3.3-70b-instruct",
+      "or:deepseek": "openrouter:deepseek/deepseek-r1"
     };
     KNOWN_MODEL_PATTERNS = [
       /^gpt-?\d/i,
@@ -6262,84 +6270,111 @@ var init_huggingface_models = __esm({
   }
 });
-// src/providers/huggingface.ts
-function createHuggingFaceProviderFromEnv() {
-  const token = readEnvVar("HF_TOKEN") || readEnvVar("HUGGING_FACE_API_KEY");
-  if (!isNonEmpty(token)) {
-    return null;
-  }
-  if (!token.startsWith("hf_")) {
-    console.warn(
-      "Warning: HF token should start with 'hf_'. Authentication may fail if token format is incorrect."
-    );
-  }
-  const endpointUrl = readEnvVar("HF_ENDPOINT_URL");
-  const baseURL = endpointUrl || "https://router.huggingface.co/v1";
-  const endpointType = endpointUrl ? "dedicated" : "serverless";
-  const client = new import_openai.default({
-    apiKey: token.trim(),
-    baseURL,
-    timeout: 6e4,
-    // 60s timeout - HF free tier can be slower than OpenAI
-    maxRetries: 0
-    // Disable SDK retries - llmist handles all retries at application level
-  });
-  return new HuggingFaceProvider(client, endpointType);
-}
-var import_openai, ROLE_MAP, HuggingFaceProvider;
-var init_huggingface = __esm({
-  "src/providers/huggingface.ts"() {
+// src/providers/openai-compatible-provider.ts
+var import_openai, ROLE_MAP, OpenAICompatibleProvider;
+var init_openai_compatible_provider = __esm({
+  "src/providers/openai-compatible-provider.ts"() {
     "use strict";
     import_openai = __toESM(require("openai"), 1);
     init_messages();
     init_base_provider();
     init_constants2();
-    init_huggingface_models();
-    init_utils();
     ROLE_MAP = {
       system: "system",
       user: "user",
       assistant: "assistant"
     };
-    HuggingFaceProvider = class extends BaseProviderAdapter {
-      providerId = "huggingface";
-      endpointType;
-      constructor(client, endpointType = "serverless") {
+    OpenAICompatibleProvider = class extends BaseProviderAdapter {
+      /**
+       * Short alias for the provider (e.g., "or" for openrouter, "hf" for huggingface).
+       * If not set, only the full providerId is accepted.
+       */
+      providerAlias;
+      config;
+      constructor(client, config) {
         super(client);
-        this.endpointType = endpointType;
+        this.config = config;
       }
+      /**
+       * Check if this provider supports the given model descriptor.
+       * Accepts both the full providerId and the short alias.
+       */
       supports(descriptor) {
-        return descriptor.provider === this.providerId || descriptor.provider === "hf";
+        return descriptor.provider === this.providerId || this.providerAlias !== void 0 && descriptor.provider === this.providerAlias;
       }
-      getModelSpecs() {
-        return HUGGINGFACE_MODELS;
+      /**
+       * Get custom headers to include in requests.
+       * Override in subclasses for provider-specific headers.
+       */
+      getCustomHeaders() {
+        return this.config.customHeaders ?? {};
+      }
+      /**
+       * Enhance error messages with provider-specific guidance.
+       * Override in subclasses for better error messages.
+       */
+      enhanceError(error) {
+        if (error instanceof Error) {
+          return error;
+        }
+        return new Error(String(error));
+      }
+      /**
+       * Build provider-specific request parameters.
+       * Override in subclasses to add custom parameters from `extra`.
+       *
+       * @param extra - The extra options from LLMGenerationOptions
+       * @returns Object with provider-specific params to merge into the request
+       */
+      buildProviderSpecificParams(_extra) {
+        return {};
       }
       buildApiRequest(options, descriptor, _spec, messages) {
         const { maxTokens, temperature, topP, stopSequences, extra } = options;
-        return {
+        const request = {
           model: descriptor.name,
-          messages: messages.map((message) => this.convertToHuggingFaceMessage(message)),
-          // HF accepts max_tokens (like many providers), though OpenAI uses max_completion_tokens
-          ...maxTokens !== void 0 ? { max_tokens: maxTokens } : {},
-          temperature,
-          top_p: topP,
-          stop: stopSequences,
+          messages: messages.map((message) => this.convertMessage(message)),
           stream: true,
-          stream_options: { include_usage: true },
-          ...extra ?? {}
+          stream_options: { include_usage: true }
         };
+        if (maxTokens !== void 0) {
+          request.max_tokens = maxTokens;
+        }
+        if (temperature !== void 0) {
+          request.temperature = temperature;
+        }
+        if (topP !== void 0) {
+          request.top_p = topP;
+        }
+        if (stopSequences) {
+          request.stop = stopSequences;
+        }
+        const providerParams = this.buildProviderSpecificParams(extra);
+        Object.assign(request, providerParams);
+        if (extra) {
+          const handledKeys = Object.keys(providerParams);
+          for (const [key, value] of Object.entries(extra)) {
+            if (!handledKeys.includes(key) && !this.isProviderSpecificKey(key)) {
+              request[key] = value;
+            }
+          }
+        }
+        return request;
       }
       /**
-       * Convert an LLMMessage to HuggingFace's ChatCompletionMessageParam.
-       * HF uses OpenAI-compatible format.
-       * Handles role-specific content type requirements:
-       * - system/assistant: string content only
-       * - user: string or multimodal array content (for vision models)
+       * Check if a key should be filtered from passthrough.
+       * Override in subclasses to filter provider-specific keys from extra.
        */
-      convertToHuggingFaceMessage(message) {
+      isProviderSpecificKey(_key) {
+        return false;
+      }
+      /**
+       * Convert an LLMMessage to OpenAI's ChatCompletionMessageParam format.
+       */
+      convertMessage(message) {
         const role = ROLE_MAP[message.role];
         if (role === "user") {
-          const content = this.convertToHuggingFaceContent(message.content);
+          const content = this.convertContent(message.content);
           return {
             role: "user",
             content,
@@ -6361,11 +6396,9 @@ var init_huggingface = __esm({
         };
       }
       /**
-       * Convert llmist content to HuggingFace's content format.
-       * Optimizes by returning string for text-only content, array for multimodal.
-       * Note: Multimodal support will be added in Phase 2.
+       * Convert llmist content to OpenAI's content format.
        */
-      convertToHuggingFaceContent(content) {
+      convertContent(content) {
         if (typeof content === "string") {
           return content;
         }
@@ -6378,16 +6411,14 @@ var init_huggingface = __esm({
           }
           if (part.type === "audio") {
             throw new Error(
-              "Hugging Face chat completions do not currently support audio input in llmist. Audio support will be added in Phase 2."
+              `${this.providerId} does not support audio input through llmist. Check provider docs for model-specific audio support.`
             );
           }
           throw new Error(`Unsupported content type: ${part.type}`);
         });
       }
       /**
-       * Convert an image content part to HuggingFace's image_url format.
-       * Supports both URLs and base64 data URLs (OpenAI-compatible format).
-       * Note: Image support requires vision-capable models on HF.
+       * Convert an image content part to OpenAI's image_url format.
        */
       convertImagePart(part) {
         if (part.source.type === "url") {
@@ -6405,33 +6436,22 @@ var init_huggingface = __esm({
       }
       async executeStreamRequest(payload, signal) {
         const client = this.client;
+        const headers = this.getCustomHeaders();
+        const requestOptions = {};
+        if (signal) {
+          requestOptions.signal = signal;
+        }
+        if (Object.keys(headers).length > 0) {
+          requestOptions.headers = headers;
+        }
         try {
-          const stream2 = await client.chat.completions.create(payload, signal ? { signal } : void 0);
+          const stream2 = await client.chat.completions.create(
+            payload,
+            Object.keys(requestOptions).length > 0 ? requestOptions : void 0
+          );
           return stream2;
         } catch (error) {
-          if (error instanceof Error) {
-            if (error.message.includes("rate limit") || error.message.includes("429")) {
-              throw new Error(
-                `HF rate limit exceeded. Free tier has limits. Consider upgrading or using a dedicated endpoint. Original error: ${error.message}`
-              );
-            }
-            if (error.message.includes("model not found") || error.message.includes("404")) {
-              throw new Error(
-                `Model not available on HF ${this.endpointType} inference. Check model name or try a different endpoint type. Original error: ${error.message}`
-              );
-            }
-            if (error.message.includes("401") || error.message.includes("unauthorized")) {
-              throw new Error(
-                `HF authentication failed. Check that HF_TOKEN or HUGGING_FACE_API_KEY is set correctly and starts with 'hf_'. Original error: ${error.message}`
-              );
-            }
-            if (error.message.includes("400") || error.name === "BadRequestError") {
-              throw new Error(
-                `HF bad request (often transient on serverless). Original error: ${error.message}`
-              );
-            }
-          }
-          throw error;
+          throw this.enhanceError(error);
         }
       }
       async *normalizeProviderStream(iterable) {
@@ -6446,7 +6466,6 @@ var init_huggingface = __esm({
             inputTokens: chunk.usage.prompt_tokens,
             outputTokens: chunk.usage.completion_tokens,
             totalTokens: chunk.usage.total_tokens,
-            // HF doesn't currently support prompt caching, but structure is ready
             cachedInputTokens: 0
           } : void 0;
           if (finishReason || usage) {
@@ -6455,21 +6474,8 @@ var init_huggingface = __esm({
         }
       }
       /**
-       * Count tokens in messages using character-based fallback estimation.
-       *
-       * Hugging Face doesn't provide a native token counting API yet, so we use
-       * a simple character-based heuristic (4 chars per token) which is reasonably
-       * accurate for most models.
-       *
-       * Future enhancement: Could integrate tiktoken for common model families
-       * (Llama, Mistral) that use known tokenizers.
-       *
-       * @param messages - The messages to count tokens for
-       * @param descriptor - Model descriptor containing the model name
-       * @param _spec - Optional model specification (currently unused)
-       * @returns Promise resolving to the estimated input token count
-       *
-       * @throws Never throws - returns 0 on error with warning
+       * Count tokens using character-based fallback estimation.
+       * Most meta-providers don't have a native token counting API.
        */
       async countTokens(messages, descriptor, _spec) {
         try {
@@ -6492,6 +6498,87 @@ var init_huggingface = __esm({
   }
 });
+// src/providers/huggingface.ts
+function createHuggingFaceProviderFromEnv() {
+  const token = readEnvVar("HF_TOKEN") || readEnvVar("HUGGING_FACE_API_KEY");
+  if (!isNonEmpty(token)) {
+    return null;
+  }
+  if (!token.startsWith("hf_")) {
+    console.warn(
+      "Warning: HF token should start with 'hf_'. Authentication may fail if token format is incorrect."
+    );
+  }
+  const endpointUrl = readEnvVar("HF_ENDPOINT_URL");
+  const baseURL = endpointUrl || "https://router.huggingface.co/v1";
+  const endpointType = endpointUrl ? "dedicated" : "serverless";
+  const config = {
+    endpointType
+  };
+  const client = new import_openai2.default({
+    apiKey: token.trim(),
+    baseURL,
+    timeout: 6e4,
+    // 60s timeout - HF free tier can be slower than OpenAI
+    maxRetries: 0
+    // Disable SDK retries - llmist handles all retries at application level
+  });
+  return new HuggingFaceProvider(client, config);
+}
+var import_openai2, HuggingFaceProvider;
+var init_huggingface = __esm({
+  "src/providers/huggingface.ts"() {
+    "use strict";
+    import_openai2 = __toESM(require("openai"), 1);
+    init_huggingface_models();
+    init_openai_compatible_provider();
+    init_utils();
+    HuggingFaceProvider = class extends OpenAICompatibleProvider {
+      providerId = "huggingface";
+      providerAlias = "hf";
+      constructor(client, config = {}) {
+        super(client, { endpointType: "serverless", ...config });
+      }
+      getModelSpecs() {
+        return HUGGINGFACE_MODELS;
+      }
+      /**
+       * Enhance error messages with HuggingFace-specific guidance.
+       */
+      enhanceError(error) {
+        if (!(error instanceof Error)) {
+          return new Error(String(error));
+        }
+        const message = error.message.toLowerCase();
+        if (message.includes("rate limit") || message.includes("429")) {
+          return new Error(
+            `HF rate limit exceeded. Free tier has limits. Consider upgrading or using a dedicated endpoint.
+Original error: ${error.message}`
+          );
+        }
+        if (message.includes("model not found") || message.includes("404")) {
+          return new Error(
+            `Model not available on HF ${this.config.endpointType} inference. Check model name or try a different endpoint type.
+Original error: ${error.message}`
+          );
+        }
+        if (message.includes("401") || message.includes("unauthorized")) {
+          return new Error(
+            `HF authentication failed. Check that HF_TOKEN or HUGGING_FACE_API_KEY is set correctly and starts with 'hf_'.
+Original error: ${error.message}`
+          );
+        }
+        if (message.includes("400") || message.includes("bad request")) {
+          return new Error(
+            `HF bad request (often transient on serverless). Original error: ${error.message}`
+          );
+        }
+        return error;
+      }
+    };
+  }
+});
 // src/providers/openai-image-models.ts
 function getOpenAIImageModelSpec(modelId) {
   return openaiImageModels.find((m) => m.modelId === modelId);
@@ -7338,13 +7425,13 @@ function sanitizeExtra(extra, allowTemperature) {
   return Object.fromEntries(Object.entries(extra).filter(([key]) => key !== "temperature"));
 }
 function createOpenAIProviderFromEnv() {
-  return createProviderFromEnv("OPENAI_API_KEY", import_openai2.default, OpenAIChatProvider);
+  return createProviderFromEnv("OPENAI_API_KEY", import_openai3.default, OpenAIChatProvider);
 }
-var import_openai2, import_tiktoken, ROLE_MAP2, OpenAIChatProvider;
+var import_openai3, import_tiktoken, ROLE_MAP2, OpenAIChatProvider;
 var init_openai = __esm({
   "src/providers/openai.ts"() {
     "use strict";
-    import_openai2 = __toESM(require("openai"), 1);
+    import_openai3 = __toESM(require("openai"), 1);
     import_tiktoken = require("tiktoken");
     init_messages();
     init_base_provider();
@@ -7643,6 +7730,475 @@ var init_openai = __esm({
   }
 });
+// src/providers/openrouter-models.ts
+var OPENROUTER_MODELS;
+var init_openrouter_models = __esm({
+  "src/providers/openrouter-models.ts"() {
+    "use strict";
+    OPENROUTER_MODELS = [
+      // ============================================================
+      // Anthropic Claude Models (via OpenRouter)
+      // ============================================================
+      {
+        provider: "openrouter",
+        modelId: "anthropic/claude-sonnet-4-5",
+        displayName: "Claude Sonnet 4.5 (OpenRouter)",
+        contextWindow: 2e5,
+        maxOutputTokens: 64e3,
+        pricing: {
+          input: 3,
+          output: 15
+        },
+        knowledgeCutoff: "2025-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          reasoning: true
+        },
+        metadata: {
+          family: "Claude 4",
+          notes: "Anthropic Claude via OpenRouter. Pricing may vary."
+        }
+      },
+      {
+        provider: "openrouter",
+        modelId: "anthropic/claude-opus-4-5",
+        displayName: "Claude Opus 4.5 (OpenRouter)",
+        contextWindow: 2e5,
+        maxOutputTokens: 64e3,
+        pricing: {
+          input: 15,
+          output: 75
+        },
+        knowledgeCutoff: "2025-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          reasoning: true
+        },
+        metadata: {
+          family: "Claude 4",
+          notes: "Anthropic Claude Opus via OpenRouter. Most capable Claude model."
+        }
+      },
+      {
+        provider: "openrouter",
+        modelId: "anthropic/claude-haiku-4-5",
+        displayName: "Claude Haiku 4.5 (OpenRouter)",
+        contextWindow: 2e5,
+        maxOutputTokens: 64e3,
+        pricing: {
+          input: 0.8,
+          output: 4
+        },
+        knowledgeCutoff: "2025-02",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          reasoning: true
+        },
+        metadata: {
+          family: "Claude 4",
+          notes: "Anthropic Claude Haiku via OpenRouter. Fast and efficient."
+        }
+      },
+      // ============================================================
+      // OpenAI GPT Models (via OpenRouter)
+      // ============================================================
+      {
+        provider: "openrouter",
+        modelId: "openai/gpt-4o",
+        displayName: "GPT-4o (OpenRouter)",
+        contextWindow: 128e3,
+        maxOutputTokens: 16384,
+        pricing: {
+          input: 2.5,
+          output: 10
+        },
+        knowledgeCutoff: "2024-10",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true
+        },
+        metadata: {
+          family: "GPT-4",
+          notes: "OpenAI GPT-4o via OpenRouter."
+        }
+      },
+      {
+        provider: "openrouter",
+        modelId: "openai/gpt-4o-mini",
+        displayName: "GPT-4o Mini (OpenRouter)",
+        contextWindow: 128e3,
+        maxOutputTokens: 16384,
+        pricing: {
+          input: 0.15,
+          output: 0.6
+        },
+        knowledgeCutoff: "2024-10",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true
+        },
+        metadata: {
+          family: "GPT-4",
+          notes: "OpenAI GPT-4o Mini via OpenRouter. Cost-effective option."
+        }
+      },
+      {
+        provider: "openrouter",
+        modelId: "openai/gpt-5.2",
+        displayName: "GPT-5.2 (OpenRouter)",
+        contextWindow: 1e6,
+        maxOutputTokens: 128e3,
+        pricing: {
+          input: 5,
+          output: 20
+        },
+        knowledgeCutoff: "2025-03",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          reasoning: true
+        },
+        metadata: {
+          family: "GPT-5",
+          notes: "OpenAI GPT-5.2 via OpenRouter. Latest flagship model."
+        }
+      },
+      // ============================================================
+      // Google Gemini Models (via OpenRouter)
+      // ============================================================
+      {
+        provider: "openrouter",
+        modelId: "google/gemini-2.5-flash",
+        displayName: "Gemini 2.5 Flash (OpenRouter)",
+        contextWindow: 1e6,
+        maxOutputTokens: 65536,
+        pricing: {
+          input: 0.15,
+          output: 0.6
+        },
+        knowledgeCutoff: "2025-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          reasoning: true
+        },
+        metadata: {
+          family: "Gemini 2.5",
+          notes: "Google Gemini 2.5 Flash via OpenRouter. Fast and cost-effective."
+        }
+      },
+      {
+        provider: "openrouter",
+        modelId: "google/gemini-2.5-pro",
+        displayName: "Gemini 2.5 Pro (OpenRouter)",
+        contextWindow: 1e6,
+        maxOutputTokens: 65536,
+        pricing: {
+          input: 2.5,
+          output: 10
+        },
+        knowledgeCutoff: "2025-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          reasoning: true
+        },
+        metadata: {
+          family: "Gemini 2.5",
+          notes: "Google Gemini 2.5 Pro via OpenRouter."
+        }
+      },
+      // ============================================================
+      // Meta Llama Models (via OpenRouter)
+      // ============================================================
+      {
+        provider: "openrouter",
+        modelId: "meta-llama/llama-3.3-70b-instruct",
+        displayName: "Llama 3.3 70B Instruct (OpenRouter)",
+        contextWindow: 128e3,
+        maxOutputTokens: 8192,
+        pricing: {
+          input: 0.4,
+          output: 0.4
+        },
+        knowledgeCutoff: "2024-12",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: false
+        },
+        metadata: {
+          family: "Llama 3.3",
+          notes: "Meta Llama 3.3 70B via OpenRouter. Excellent open-source model."
+        }
+      },
+      {
+        provider: "openrouter",
+        modelId: "meta-llama/llama-4-maverick",
+        displayName: "Llama 4 Maverick (OpenRouter)",
+        contextWindow: 1e6,
+        maxOutputTokens: 128e3,
+        pricing: {
+          input: 0.2,
+          output: 0.6
+        },
+        knowledgeCutoff: "2025-04",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true
+        },
+        metadata: {
+          family: "Llama 4",
+          notes: "Meta Llama 4 Maverick via OpenRouter. Latest Llama generation."
+        }
+      },
+      // ============================================================
+      // DeepSeek Models (via OpenRouter)
+      // ============================================================
+      {
+        provider: "openrouter",
+        modelId: "deepseek/deepseek-r1",
+        displayName: "DeepSeek R1 (OpenRouter)",
+        contextWindow: 64e3,
+        maxOutputTokens: 8192,
+        pricing: {
+          input: 0.55,
+          output: 2.19
+        },
+        knowledgeCutoff: "2025-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: false,
+          reasoning: true
+        },
+        metadata: {
+          family: "DeepSeek R1",
+          notes: "DeepSeek R1 via OpenRouter. Strong reasoning capabilities."
+        }
+      },
+      {
+        provider: "openrouter",
+        modelId: "deepseek/deepseek-chat",
+        displayName: "DeepSeek Chat (OpenRouter)",
+        contextWindow: 64e3,
+        maxOutputTokens: 8192,
+        pricing: {
+          input: 0.14,
+          output: 0.28
+        },
+        knowledgeCutoff: "2025-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: false
+        },
+        metadata: {
+          family: "DeepSeek V3",
+          notes: "DeepSeek Chat via OpenRouter. Very cost-effective."
+        }
+      },
+      // ============================================================
+      // Mistral Models (via OpenRouter)
+      // ============================================================
+      {
+        provider: "openrouter",
+        modelId: "mistralai/mistral-large",
+        displayName: "Mistral Large (OpenRouter)",
+        contextWindow: 128e3,
+        maxOutputTokens: 8192,
+        pricing: {
+          input: 2,
+          output: 6
+        },
+        knowledgeCutoff: "2024-11",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: false
+        },
+        metadata: {
+          family: "Mistral Large",
+          notes: "Mistral Large via OpenRouter. Strong multilingual capabilities."
+        }
+      },
+      {
+        provider: "openrouter",
+        modelId: "mistralai/mixtral-8x22b-instruct",
+        displayName: "Mixtral 8x22B Instruct (OpenRouter)",
+        contextWindow: 65536,
+        maxOutputTokens: 8192,
+        pricing: {
+          input: 0.9,
+          output: 0.9
+        },
+        knowledgeCutoff: "2024-04",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: false
+        },
+        metadata: {
+          family: "Mixtral",
+          notes: "Mixtral 8x22B via OpenRouter. Sparse MoE architecture."
+        }
+      },
+      // ============================================================
+      // Qwen Models (via OpenRouter)
+      // ============================================================
+      {
+        provider: "openrouter",
+        modelId: "qwen/qwen-2.5-72b-instruct",
+        displayName: "Qwen 2.5 72B Instruct (OpenRouter)",
+        contextWindow: 131072,
+        maxOutputTokens: 8192,
+        pricing: {
+          input: 0.35,
+          output: 0.4
+        },
+        knowledgeCutoff: "2024-09",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: false
+        },
+        metadata: {
+          family: "Qwen 2.5",
+          notes: "Qwen 2.5 72B via OpenRouter. Strong coding and math."
+        }
+      }
+    ];
+  }
+});
+// src/providers/openrouter.ts
+function createOpenRouterProviderFromEnv() {
+  const apiKey = readEnvVar("OPENROUTER_API_KEY");
+  if (!isNonEmpty(apiKey)) {
+    return null;
+  }
+  const config = {
+    siteUrl: readEnvVar("OPENROUTER_SITE_URL"),
+    appName: readEnvVar("OPENROUTER_APP_NAME") || "llmist"
+  };
+  const client = new import_openai4.default({
+    apiKey: apiKey.trim(),
+    baseURL: "https://openrouter.ai/api/v1",
+    timeout: 12e4,
+    // 2 minute timeout
+    maxRetries: 0
+    // Disable SDK retries - llmist handles all retries at application level
+  });
+  return new OpenRouterProvider(client, config);
+}
+var import_openai4, OpenRouterProvider;
+var init_openrouter = __esm({
+  "src/providers/openrouter.ts"() {
+    "use strict";
+    import_openai4 = __toESM(require("openai"), 1);
+    init_openai_compatible_provider();
+    init_openrouter_models();
+    init_utils();
+    OpenRouterProvider = class extends OpenAICompatibleProvider {
+      providerId = "openrouter";
+      providerAlias = "or";
+      constructor(client, config = {}) {
+        super(client, config);
+      }
+      getModelSpecs() {
+        return OPENROUTER_MODELS;
+      }
+      /**
+       * Get custom headers for OpenRouter analytics.
+       */
+      getCustomHeaders() {
+        const headers = {};
+        if (this.config.siteUrl) {
+          headers["HTTP-Referer"] = this.config.siteUrl;
+        }
+        if (this.config.appName) {
+          headers["X-Title"] = this.config.appName;
+        }
+        return headers;
+      }
+      /**
+       * Build OpenRouter-specific request parameters from `extra.routing`.
+       */
+      buildProviderSpecificParams(extra) {
+        const routing = extra?.routing;
+        if (!routing) {
+          return {};
+        }
+        const params = {};
+        if (routing.models && routing.models.length > 0) {
+          params.models = routing.models;
+        }
+        if (routing.route) {
+          params.route = routing.route;
+        }
+        if (routing.provider) {
+          params.provider = { order: [routing.provider] };
+        } else if (routing.order && routing.order.length > 0) {
+          params.provider = { order: routing.order };
+        }
+        return params;
+      }
+      /**
+       * Filter out the 'routing' key from extra passthrough.
+       */
+      isProviderSpecificKey(key) {
+        return key === "routing";
+      }
+      /**
+       * Enhance error messages with OpenRouter-specific guidance.
+       */
+      enhanceError(error) {
+        if (!(error instanceof Error)) {
+          return new Error(String(error));
+        }
+        const message = error.message.toLowerCase();
+        if (message.includes("402") || message.includes("insufficient")) {
+          return new Error(
+            `OpenRouter: Insufficient credits. Add funds at https://openrouter.ai/credits
+Original error: ${error.message}`
+          );
+        }
+        if (message.includes("429") || message.includes("rate limit")) {
+          return new Error(
+            `OpenRouter: Rate limit exceeded. Consider upgrading your plan or reducing request frequency.
+Original error: ${error.message}`
+          );
+        }
+        if (message.includes("503") || message.includes("unavailable")) {
+          return new Error(
+            `OpenRouter: Model temporarily unavailable. Try a different model or use the 'models' fallback option for automatic retry.
+Original error: ${error.message}`
+          );
+        }
+        if (message.includes("401") || message.includes("unauthorized") || message.includes("invalid")) {
+          return new Error(
+            `OpenRouter: Authentication failed. Check that OPENROUTER_API_KEY is set correctly.
+Original error: ${error.message}`
+          );
+        }
+        return error;
+      }
+    };
+  }
+});
 // src/providers/discovery.ts
 function discoverProviderAdapters() {
   const adapters = [];
@@ -7662,11 +8218,13 @@ var init_discovery = __esm({
     init_gemini();
     init_huggingface();
     init_openai();
+    init_openrouter();
     DISCOVERERS = [
       createOpenAIProviderFromEnv,
       createAnthropicProviderFromEnv,
       createGeminiProviderFromEnv,
-      createHuggingFaceProviderFromEnv
+      createHuggingFaceProviderFromEnv,
+      createOpenRouterProviderFromEnv
     ];
   }
 });
@@ -12037,11 +12595,10 @@ var init_stream_processor = __esm({
 });
 // src/agent/agent.ts
-var import_p_retry, Agent;
+var Agent;
 var init_agent = __esm({
   "src/agent/agent.ts"() {
     "use strict";
-    import_p_retry = __toESM(require("p-retry"), 1);
     init_constants();
     init_execution_tree();
     init_messages();
@@ -12433,62 +12990,120 @@ var init_agent = __esm({
                 messageCount: llmOptions.messages.length,
                 messages: llmOptions.messages
               });
-              const stream2 = await this.createStreamWithRetry(
-                llmOptions,
-                currentIteration,
-                currentLLMNodeId
-              );
-              const processor = new StreamProcessor({
-                iteration: currentIteration,
-                registry: this.registry,
-                gadgetStartPrefix: this.gadgetStartPrefix,
-                gadgetEndPrefix: this.gadgetEndPrefix,
-                gadgetArgPrefix: this.gadgetArgPrefix,
-                hooks: this.hooks,
-                logger: this.logger.getSubLogger({ name: "stream-processor" }),
-                requestHumanInput: this.requestHumanInput,
-                defaultGadgetTimeoutMs: this.defaultGadgetTimeoutMs,
-                client: this.client,
-                mediaStore: this.mediaStore,
-                agentConfig: this.agentContextConfig,
-                subagentConfig: this.subagentConfig,
-                // Tree context for execution tracking
-                tree: this.tree,
-                parentNodeId: currentLLMNodeId,
-                // Gadgets are children of this LLM call
-                baseDepth: this.baseDepth,
-                // Cross-iteration dependency tracking
-                priorCompletedInvocations: this.completedInvocationIds,
-                priorFailedInvocations: this.failedInvocationIds,
-                // Parent observer hooks for subagent visibility
-                parentObservers: this.parentObservers
-              });
+              const maxStreamAttempts = this.retryConfig.enabled ? this.retryConfig.retries + 1 : 1;
+              let streamAttempt = 0;
               let streamMetadata = null;
               let gadgetCallCount = 0;
               const textOutputs = [];
               const gadgetResults = [];
-              for await (const event of processor.process(stream2)) {
-                if (event.type === "stream_complete") {
-                  streamMetadata = event;
-                  continue;
-                }
-                if (event.type === "text") {
-                  textOutputs.push(event.content);
-                } else if (event.type === "gadget_result") {
-                  gadgetCallCount++;
-                  gadgetResults.push(event);
+              while (streamAttempt < maxStreamAttempts) {
+                streamAttempt++;
+                try {
+                  const stream2 = await this.createStream(
+                    llmOptions,
+                    currentIteration,
+                    currentLLMNodeId
+                  );
+                  const processor = new StreamProcessor({
+                    iteration: currentIteration,
+                    registry: this.registry,
+                    gadgetStartPrefix: this.gadgetStartPrefix,
+                    gadgetEndPrefix: this.gadgetEndPrefix,
+                    gadgetArgPrefix: this.gadgetArgPrefix,
+                    hooks: this.hooks,
+                    logger: this.logger.getSubLogger({ name: "stream-processor" }),
+                    requestHumanInput: this.requestHumanInput,
+                    defaultGadgetTimeoutMs: this.defaultGadgetTimeoutMs,
+                    client: this.client,
+                    mediaStore: this.mediaStore,
+                    agentConfig: this.agentContextConfig,
+                    subagentConfig: this.subagentConfig,
+                    // Tree context for execution tracking
+                    tree: this.tree,
+                    parentNodeId: currentLLMNodeId,
+                    // Gadgets are children of this LLM call
+                    baseDepth: this.baseDepth,
+                    // Cross-iteration dependency tracking
+                    priorCompletedInvocations: this.completedInvocationIds,
+                    priorFailedInvocations: this.failedInvocationIds,
+                    // Parent observer hooks for subagent visibility
+                    parentObservers: this.parentObservers
+                  });
+                  for await (const event of processor.process(stream2)) {
+                    if (event.type === "stream_complete") {
+                      streamMetadata = event;
+                      continue;
+                    }
+                    if (event.type === "text") {
+                      textOutputs.push(event.content);
+                    } else if (event.type === "gadget_result") {
+                      gadgetCallCount++;
+                      gadgetResults.push(event);
+                    }
+                    yield event;
+                  }
+                  for (const id of processor.getCompletedInvocationIds()) {
+                    this.completedInvocationIds.add(id);
+                  }
+                  for (const id of processor.getFailedInvocationIds()) {
+                    this.failedInvocationIds.add(id);
+                  }
+                  break;
+                } catch (streamError) {
+                  const error = streamError;
+                  const canRetry = this.retryConfig.enabled && streamAttempt < maxStreamAttempts;
+                  const shouldRetryError = this.retryConfig.shouldRetry ? this.retryConfig.shouldRetry(error) : isRetryableError(error);
+                  if (canRetry && shouldRetryError) {
+                    const retryAfterMs = this.retryConfig.respectRetryAfter ? extractRetryAfterMs(error) : null;
+                    const baseDelay = this.retryConfig.minTimeout * this.retryConfig.factor ** (streamAttempt - 1);
+                    const cappedBaseDelay = Math.min(baseDelay, this.retryConfig.maxTimeout);
+                    const delay = retryAfterMs !== null ? Math.min(retryAfterMs, this.retryConfig.maxRetryAfterMs) : cappedBaseDelay;
+                    const finalDelay = this.retryConfig.randomize ? delay * (0.5 + Math.random()) : delay;
+                    this.logger.warn(
+                      `Stream iteration failed (attempt ${streamAttempt}/${maxStreamAttempts}), retrying...`,
+                      {
+                        error: error.message,
+                        retriesLeft: maxStreamAttempts - streamAttempt,
+                        delayMs: Math.round(finalDelay),
+                        retryAfterMs
+                      }
+                    );
+                    this.retryConfig.onRetry?.(error, streamAttempt);
+                    await this.safeObserve(async () => {
+                      if (this.hooks.observers?.onRetryAttempt) {
+                        const subagentContext = getSubagentContextForNode(this.tree, currentLLMNodeId);
+                        const hookContext = {
+                          iteration: currentIteration,
+                          attemptNumber: streamAttempt,
+                          retriesLeft: maxStreamAttempts - streamAttempt,
+                          error,
+                          retryAfterMs: retryAfterMs ?? void 0,
+                          logger: this.logger,
+                          subagentContext
+                        };
+                        await this.hooks.observers.onRetryAttempt(hookContext);
+                      }
+                    });
+                    await this.sleep(finalDelay);
+                    streamMetadata = null;
+                    gadgetCallCount = 0;
+                    textOutputs.length = 0;
+                    gadgetResults.length = 0;
+                    continue;
+                  }
+                  if (streamAttempt > 1) {
+                    this.logger.error(`Stream iteration failed after ${streamAttempt} attempts`, {
+                      error: error.message,
+                      iteration: currentIteration
+                    });
+                    this.retryConfig.onRetriesExhausted?.(error, streamAttempt);
+                  }
+                  throw error;
                 }
-                yield event;
               }
               if (!streamMetadata) {
                 throw new Error("Stream processing completed without metadata event");
               }
-              for (const id of processor.getCompletedInvocationIds()) {
-                this.completedInvocationIds.add(id);
-              }
-              for (const id of processor.getFailedInvocationIds()) {
-                this.failedInvocationIds.add(id);
-              }
               const result = streamMetadata;
               this.logger.info("LLM response completed", {
                 finishReason: result.finishReason,
@@ -12602,12 +13217,12 @@ var init_agent = __esm({
         }
       }
       /**
-       * Create LLM stream with two-layer rate limit protection:
+       * Create LLM stream with proactive rate limit protection.
        *
-       * Layer 1 (Proactive): If rate limits are configured, delays requests to stay within limits.
-       * Layer 2 (Reactive): Exponential backoff with Retry-After header support for transient failures.
+       * Note: Retry logic for errors during streaming is handled by the outer loop in run().
+       * This method only handles proactive rate limiting (delaying requests to stay within limits).
        */
-      async createStreamWithRetry(llmOptions, iteration, llmNodeId) {
+      async createStream(llmOptions, iteration, llmNodeId) {
         if (this.rateLimitTracker) {
           const throttleDelay = this.rateLimitTracker.getRequiredDelayMs();
           if (throttleDelay > 0) {
@@ -12628,100 +13243,7 @@ var init_agent = __esm({
             await this.sleep(throttleDelay);
           }
         }
-        if (!this.retryConfig.enabled) {
-          return this.client.stream(llmOptions);
-        }
-        const {
-          retries,
-          minTimeout,
-          maxTimeout,
-          factor,
-          randomize,
-          onRetry,
-          onRetriesExhausted,
-          shouldRetry,
-          respectRetryAfter,
-          maxRetryAfterMs
-        } = this.retryConfig;
-        let retryAfterHintMs = null;
-        try {
-          return await (0, import_p_retry.default)(
-            async (attemptNumber) => {
-              if (retryAfterHintMs !== null && respectRetryAfter) {
-                const cappedDelay = Math.min(retryAfterHintMs, maxRetryAfterMs);
-                this.logger.debug("Using Retry-After delay", {
-                  retryAfterMs: retryAfterHintMs,
-                  cappedDelay
-                });
-                await this.sleep(cappedDelay);
-                retryAfterHintMs = null;
-              }
-              this.logger.debug("Creating LLM stream", {
-                attempt: attemptNumber,
-                maxAttempts: retries + 1
-              });
-              return this.client.stream(llmOptions);
-            },
-            {
-              retries,
-              minTimeout,
-              maxTimeout,
-              factor,
-              randomize,
-              signal: this.signal,
-              onFailedAttempt: (context) => {
-                const { error, attemptNumber, retriesLeft } = context;
-                if (respectRetryAfter) {
-                  retryAfterHintMs = extractRetryAfterMs(error);
-                  if (retryAfterHintMs !== null) {
-                    this.logger.debug("Retry-After header detected", {
-                      delayMs: retryAfterHintMs
-                    });
-                  }
-                }
-                this.logger.warn(
-                  `LLM call failed (attempt ${attemptNumber}/${attemptNumber + retriesLeft}), retrying...`,
-                  {
-                    error: error.message,
-                    retriesLeft,
-                    retryAfterMs: retryAfterHintMs
-                  }
-                );
-                onRetry?.(error, attemptNumber);
-                this.safeObserve(async () => {
-                  if (this.hooks.observers?.onRetryAttempt) {
-                    const subagentContext = getSubagentContextForNode(this.tree, llmNodeId);
-                    const hookContext = {
-                      iteration,
-                      attemptNumber,
-                      retriesLeft,
-                      error,
-                      retryAfterMs: retryAfterHintMs ?? void 0,
-                      logger: this.logger,
-                      subagentContext
-                    };
-                    await this.hooks.observers.onRetryAttempt(hookContext);
-                  }
-                }).catch((err) => {
-                  this.logger.error("Observer hook error", { hook: "onRetryAttempt", error: err });
-                });
-              },
-              shouldRetry: (context) => {
-                if (shouldRetry) {
-                  return shouldRetry(context.error);
-                }
-                return isRetryableError(context.error);
-              }
-            }
-          );
-        } catch (error) {
-          this.logger.error(`LLM call failed after ${retries + 1} attempts`, {
-            error: error.message,
-            iteration
-          });
-          onRetriesExhausted?.(error, retries + 1);
-          throw error;
-        }
+        return this.client.stream(llmOptions);
       }
       /**
        * Simple sleep utility for rate limit delays.
@@ -13126,6 +13648,8 @@ __export(index_exports, {
   ModelIdentifierParser: () => ModelIdentifierParser,
   ModelRegistry: () => ModelRegistry,
   OpenAIChatProvider: () => OpenAIChatProvider,
+  OpenAICompatibleProvider: () => OpenAICompatibleProvider,
+  OpenRouterProvider: () => OpenRouterProvider,
   RateLimitTracker: () => RateLimitTracker,
   SimpleSessionManager: () => SimpleSessionManager,
   SlidingWindowStrategy: () => SlidingWindowStrategy,
@@ -13147,6 +13671,7 @@ __export(index_exports, {
   createLogger: () => createLogger,
   createMediaOutput: () => createMediaOutput,
   createOpenAIProviderFromEnv: () => createOpenAIProviderFromEnv,
+  createOpenRouterProviderFromEnv: () => createOpenRouterProviderFromEnv,
   createSubagent: () => createSubagent,
   defaultLogger: () => defaultLogger,
   detectAudioMimeType: () => detectAudioMimeType,
@@ -14497,6 +15022,8 @@ init_discovery();
 init_gemini();
 init_huggingface();
 init_openai();
+init_openai_compatible_provider();
+init_openrouter();
 // src/session/manager.ts
 var BaseSessionManager = class {
@@ -14771,6 +15298,8 @@ function getHostExports2(ctx) {
   ModelIdentifierParser,
   ModelRegistry,
   OpenAIChatProvider,
+  OpenAICompatibleProvider,
+  OpenRouterProvider,
   RateLimitTracker,
   SimpleSessionManager,
   SlidingWindowStrategy,
@@ -14792,6 +15321,7 @@ function getHostExports2(ctx) {
   createLogger,
   createMediaOutput,
   createOpenAIProviderFromEnv,
+  createOpenRouterProviderFromEnv,
   createSubagent,
   defaultLogger,
   detectAudioMimeType,