llmist 15.3.0 → 15.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1326,24 +1326,23 @@ function isKnownModelPattern(model) {
1326
1326
  return KNOWN_MODEL_PATTERNS.some((pattern) => pattern.test(model));
1327
1327
  }
1328
1328
  function resolveModel(model, options = {}) {
1329
- if (model.includes(":")) {
1330
- return model;
1331
- }
1332
1329
  const normalized = model.toLowerCase();
1333
1330
  if (MODEL_ALIASES[normalized]) {
1334
1331
  return MODEL_ALIASES[normalized];
1335
1332
  }
1336
- const modelLower = model.toLowerCase();
1337
- if (modelLower.startsWith("gpt")) {
1333
+ if (model.includes(":")) {
1334
+ return model;
1335
+ }
1336
+ if (normalized.startsWith("gpt")) {
1338
1337
  return `openai:${model}`;
1339
1338
  }
1340
- if (modelLower.startsWith("claude")) {
1339
+ if (normalized.startsWith("claude")) {
1341
1340
  return `anthropic:${model}`;
1342
1341
  }
1343
- if (modelLower.startsWith("gemini")) {
1342
+ if (normalized.startsWith("gemini")) {
1344
1343
  return `gemini:${model}`;
1345
1344
  }
1346
- if (modelLower.match(/^o\d/)) {
1345
+ if (normalized.match(/^o\d/)) {
1347
1346
  return `openai:${model}`;
1348
1347
  }
1349
1348
  if (!isKnownModelPattern(model)) {
@@ -1408,7 +1407,16 @@ var init_model_shortcuts = __esm({
1408
1407
  "gemini-flash": "gemini:gemini-2.5-flash",
1409
1408
  "flash-lite": "gemini:gemini-2.5-flash-lite",
1410
1409
  "gemini-pro": "gemini:gemini-3-pro-preview",
1411
- pro: "gemini:gemini-3-pro-preview"
1410
+ pro: "gemini:gemini-3-pro-preview",
1411
+ // OpenRouter aliases (or: prefix for short)
1412
+ "or:sonnet": "openrouter:anthropic/claude-sonnet-4-5",
1413
+ "or:opus": "openrouter:anthropic/claude-opus-4-5",
1414
+ "or:haiku": "openrouter:anthropic/claude-haiku-4-5",
1415
+ "or:gpt4o": "openrouter:openai/gpt-4o",
1416
+ "or:gpt5": "openrouter:openai/gpt-5.2",
1417
+ "or:flash": "openrouter:google/gemini-2.5-flash",
1418
+ "or:llama": "openrouter:meta-llama/llama-3.3-70b-instruct",
1419
+ "or:deepseek": "openrouter:deepseek/deepseek-r1"
1412
1420
  };
1413
1421
  KNOWN_MODEL_PATTERNS = [
1414
1422
  /^gpt-?\d/i,
@@ -6262,84 +6270,111 @@ var init_huggingface_models = __esm({
6262
6270
  }
6263
6271
  });
6264
6272
 
6265
- // src/providers/huggingface.ts
6266
- function createHuggingFaceProviderFromEnv() {
6267
- const token = readEnvVar("HF_TOKEN") || readEnvVar("HUGGING_FACE_API_KEY");
6268
- if (!isNonEmpty(token)) {
6269
- return null;
6270
- }
6271
- if (!token.startsWith("hf_")) {
6272
- console.warn(
6273
- "Warning: HF token should start with 'hf_'. Authentication may fail if token format is incorrect."
6274
- );
6275
- }
6276
- const endpointUrl = readEnvVar("HF_ENDPOINT_URL");
6277
- const baseURL = endpointUrl || "https://router.huggingface.co/v1";
6278
- const endpointType = endpointUrl ? "dedicated" : "serverless";
6279
- const client = new import_openai.default({
6280
- apiKey: token.trim(),
6281
- baseURL,
6282
- timeout: 6e4,
6283
- // 60s timeout - HF free tier can be slower than OpenAI
6284
- maxRetries: 0
6285
- // Disable SDK retries - llmist handles all retries at application level
6286
- });
6287
- return new HuggingFaceProvider(client, endpointType);
6288
- }
6289
- var import_openai, ROLE_MAP, HuggingFaceProvider;
6290
- var init_huggingface = __esm({
6291
- "src/providers/huggingface.ts"() {
6273
+ // src/providers/openai-compatible-provider.ts
6274
+ var import_openai, ROLE_MAP, OpenAICompatibleProvider;
6275
+ var init_openai_compatible_provider = __esm({
6276
+ "src/providers/openai-compatible-provider.ts"() {
6292
6277
  "use strict";
6293
6278
  import_openai = __toESM(require("openai"), 1);
6294
6279
  init_messages();
6295
6280
  init_base_provider();
6296
6281
  init_constants2();
6297
- init_huggingface_models();
6298
- init_utils();
6299
6282
  ROLE_MAP = {
6300
6283
  system: "system",
6301
6284
  user: "user",
6302
6285
  assistant: "assistant"
6303
6286
  };
6304
- HuggingFaceProvider = class extends BaseProviderAdapter {
6305
- providerId = "huggingface";
6306
- endpointType;
6307
- constructor(client, endpointType = "serverless") {
6287
+ OpenAICompatibleProvider = class extends BaseProviderAdapter {
6288
+ /**
6289
+ * Short alias for the provider (e.g., "or" for openrouter, "hf" for huggingface).
6290
+ * If not set, only the full providerId is accepted.
6291
+ */
6292
+ providerAlias;
6293
+ config;
6294
+ constructor(client, config) {
6308
6295
  super(client);
6309
- this.endpointType = endpointType;
6296
+ this.config = config;
6310
6297
  }
6298
+ /**
6299
+ * Check if this provider supports the given model descriptor.
6300
+ * Accepts both the full providerId and the short alias.
6301
+ */
6311
6302
  supports(descriptor) {
6312
- return descriptor.provider === this.providerId || descriptor.provider === "hf";
6303
+ return descriptor.provider === this.providerId || this.providerAlias !== void 0 && descriptor.provider === this.providerAlias;
6313
6304
  }
6314
- getModelSpecs() {
6315
- return HUGGINGFACE_MODELS;
6305
+ /**
6306
+ * Get custom headers to include in requests.
6307
+ * Override in subclasses for provider-specific headers.
6308
+ */
6309
+ getCustomHeaders() {
6310
+ return this.config.customHeaders ?? {};
6311
+ }
6312
+ /**
6313
+ * Enhance error messages with provider-specific guidance.
6314
+ * Override in subclasses for better error messages.
6315
+ */
6316
+ enhanceError(error) {
6317
+ if (error instanceof Error) {
6318
+ return error;
6319
+ }
6320
+ return new Error(String(error));
6321
+ }
6322
+ /**
6323
+ * Build provider-specific request parameters.
6324
+ * Override in subclasses to add custom parameters from `extra`.
6325
+ *
6326
+ * @param extra - The extra options from LLMGenerationOptions
6327
+ * @returns Object with provider-specific params to merge into the request
6328
+ */
6329
+ buildProviderSpecificParams(_extra) {
6330
+ return {};
6316
6331
  }
6317
6332
  buildApiRequest(options, descriptor, _spec, messages) {
6318
6333
  const { maxTokens, temperature, topP, stopSequences, extra } = options;
6319
- return {
6334
+ const request = {
6320
6335
  model: descriptor.name,
6321
- messages: messages.map((message) => this.convertToHuggingFaceMessage(message)),
6322
- // HF accepts max_tokens (like many providers), though OpenAI uses max_completion_tokens
6323
- ...maxTokens !== void 0 ? { max_tokens: maxTokens } : {},
6324
- temperature,
6325
- top_p: topP,
6326
- stop: stopSequences,
6336
+ messages: messages.map((message) => this.convertMessage(message)),
6327
6337
  stream: true,
6328
- stream_options: { include_usage: true },
6329
- ...extra ?? {}
6338
+ stream_options: { include_usage: true }
6330
6339
  };
6340
+ if (maxTokens !== void 0) {
6341
+ request.max_tokens = maxTokens;
6342
+ }
6343
+ if (temperature !== void 0) {
6344
+ request.temperature = temperature;
6345
+ }
6346
+ if (topP !== void 0) {
6347
+ request.top_p = topP;
6348
+ }
6349
+ if (stopSequences) {
6350
+ request.stop = stopSequences;
6351
+ }
6352
+ const providerParams = this.buildProviderSpecificParams(extra);
6353
+ Object.assign(request, providerParams);
6354
+ if (extra) {
6355
+ const handledKeys = Object.keys(providerParams);
6356
+ for (const [key, value] of Object.entries(extra)) {
6357
+ if (!handledKeys.includes(key) && !this.isProviderSpecificKey(key)) {
6358
+ request[key] = value;
6359
+ }
6360
+ }
6361
+ }
6362
+ return request;
6331
6363
  }
6332
6364
  /**
6333
- * Convert an LLMMessage to HuggingFace's ChatCompletionMessageParam.
6334
- * HF uses OpenAI-compatible format.
6335
- * Handles role-specific content type requirements:
6336
- * - system/assistant: string content only
6337
- * - user: string or multimodal array content (for vision models)
6365
+ * Check if a key should be filtered from passthrough.
6366
+ * Override in subclasses to filter provider-specific keys from extra.
6338
6367
  */
6339
- convertToHuggingFaceMessage(message) {
6368
+ isProviderSpecificKey(_key) {
6369
+ return false;
6370
+ }
6371
+ /**
6372
+ * Convert an LLMMessage to OpenAI's ChatCompletionMessageParam format.
6373
+ */
6374
+ convertMessage(message) {
6340
6375
  const role = ROLE_MAP[message.role];
6341
6376
  if (role === "user") {
6342
- const content = this.convertToHuggingFaceContent(message.content);
6377
+ const content = this.convertContent(message.content);
6343
6378
  return {
6344
6379
  role: "user",
6345
6380
  content,
@@ -6361,11 +6396,9 @@ var init_huggingface = __esm({
6361
6396
  };
6362
6397
  }
6363
6398
  /**
6364
- * Convert llmist content to HuggingFace's content format.
6365
- * Optimizes by returning string for text-only content, array for multimodal.
6366
- * Note: Multimodal support will be added in Phase 2.
6399
+ * Convert llmist content to OpenAI's content format.
6367
6400
  */
6368
- convertToHuggingFaceContent(content) {
6401
+ convertContent(content) {
6369
6402
  if (typeof content === "string") {
6370
6403
  return content;
6371
6404
  }
@@ -6378,16 +6411,14 @@ var init_huggingface = __esm({
6378
6411
  }
6379
6412
  if (part.type === "audio") {
6380
6413
  throw new Error(
6381
- "Hugging Face chat completions do not currently support audio input in llmist. Audio support will be added in Phase 2."
6414
+ `${this.providerId} does not support audio input through llmist. Check provider docs for model-specific audio support.`
6382
6415
  );
6383
6416
  }
6384
6417
  throw new Error(`Unsupported content type: ${part.type}`);
6385
6418
  });
6386
6419
  }
6387
6420
  /**
6388
- * Convert an image content part to HuggingFace's image_url format.
6389
- * Supports both URLs and base64 data URLs (OpenAI-compatible format).
6390
- * Note: Image support requires vision-capable models on HF.
6421
+ * Convert an image content part to OpenAI's image_url format.
6391
6422
  */
6392
6423
  convertImagePart(part) {
6393
6424
  if (part.source.type === "url") {
@@ -6405,33 +6436,22 @@ var init_huggingface = __esm({
6405
6436
  }
6406
6437
  async executeStreamRequest(payload, signal) {
6407
6438
  const client = this.client;
6439
+ const headers = this.getCustomHeaders();
6440
+ const requestOptions = {};
6441
+ if (signal) {
6442
+ requestOptions.signal = signal;
6443
+ }
6444
+ if (Object.keys(headers).length > 0) {
6445
+ requestOptions.headers = headers;
6446
+ }
6408
6447
  try {
6409
- const stream2 = await client.chat.completions.create(payload, signal ? { signal } : void 0);
6448
+ const stream2 = await client.chat.completions.create(
6449
+ payload,
6450
+ Object.keys(requestOptions).length > 0 ? requestOptions : void 0
6451
+ );
6410
6452
  return stream2;
6411
6453
  } catch (error) {
6412
- if (error instanceof Error) {
6413
- if (error.message.includes("rate limit") || error.message.includes("429")) {
6414
- throw new Error(
6415
- `HF rate limit exceeded. Free tier has limits. Consider upgrading or using a dedicated endpoint. Original error: ${error.message}`
6416
- );
6417
- }
6418
- if (error.message.includes("model not found") || error.message.includes("404")) {
6419
- throw new Error(
6420
- `Model not available on HF ${this.endpointType} inference. Check model name or try a different endpoint type. Original error: ${error.message}`
6421
- );
6422
- }
6423
- if (error.message.includes("401") || error.message.includes("unauthorized")) {
6424
- throw new Error(
6425
- `HF authentication failed. Check that HF_TOKEN or HUGGING_FACE_API_KEY is set correctly and starts with 'hf_'. Original error: ${error.message}`
6426
- );
6427
- }
6428
- if (error.message.includes("400") || error.name === "BadRequestError") {
6429
- throw new Error(
6430
- `HF bad request (often transient on serverless). Original error: ${error.message}`
6431
- );
6432
- }
6433
- }
6434
- throw error;
6454
+ throw this.enhanceError(error);
6435
6455
  }
6436
6456
  }
6437
6457
  async *normalizeProviderStream(iterable) {
@@ -6446,7 +6466,6 @@ var init_huggingface = __esm({
6446
6466
  inputTokens: chunk.usage.prompt_tokens,
6447
6467
  outputTokens: chunk.usage.completion_tokens,
6448
6468
  totalTokens: chunk.usage.total_tokens,
6449
- // HF doesn't currently support prompt caching, but structure is ready
6450
6469
  cachedInputTokens: 0
6451
6470
  } : void 0;
6452
6471
  if (finishReason || usage) {
@@ -6455,21 +6474,8 @@ var init_huggingface = __esm({
6455
6474
  }
6456
6475
  }
6457
6476
  /**
6458
- * Count tokens in messages using character-based fallback estimation.
6459
- *
6460
- * Hugging Face doesn't provide a native token counting API yet, so we use
6461
- * a simple character-based heuristic (4 chars per token) which is reasonably
6462
- * accurate for most models.
6463
- *
6464
- * Future enhancement: Could integrate tiktoken for common model families
6465
- * (Llama, Mistral) that use known tokenizers.
6466
- *
6467
- * @param messages - The messages to count tokens for
6468
- * @param descriptor - Model descriptor containing the model name
6469
- * @param _spec - Optional model specification (currently unused)
6470
- * @returns Promise resolving to the estimated input token count
6471
- *
6472
- * @throws Never throws - returns 0 on error with warning
6477
+ * Count tokens using character-based fallback estimation.
6478
+ * Most meta-providers don't have a native token counting API.
6473
6479
  */
6474
6480
  async countTokens(messages, descriptor, _spec) {
6475
6481
  try {
@@ -6492,6 +6498,87 @@ var init_huggingface = __esm({
6492
6498
  }
6493
6499
  });
6494
6500
 
6501
+ // src/providers/huggingface.ts
6502
+ function createHuggingFaceProviderFromEnv() {
6503
+ const token = readEnvVar("HF_TOKEN") || readEnvVar("HUGGING_FACE_API_KEY");
6504
+ if (!isNonEmpty(token)) {
6505
+ return null;
6506
+ }
6507
+ if (!token.startsWith("hf_")) {
6508
+ console.warn(
6509
+ "Warning: HF token should start with 'hf_'. Authentication may fail if token format is incorrect."
6510
+ );
6511
+ }
6512
+ const endpointUrl = readEnvVar("HF_ENDPOINT_URL");
6513
+ const baseURL = endpointUrl || "https://router.huggingface.co/v1";
6514
+ const endpointType = endpointUrl ? "dedicated" : "serverless";
6515
+ const config = {
6516
+ endpointType
6517
+ };
6518
+ const client = new import_openai2.default({
6519
+ apiKey: token.trim(),
6520
+ baseURL,
6521
+ timeout: 6e4,
6522
+ // 60s timeout - HF free tier can be slower than OpenAI
6523
+ maxRetries: 0
6524
+ // Disable SDK retries - llmist handles all retries at application level
6525
+ });
6526
+ return new HuggingFaceProvider(client, config);
6527
+ }
6528
+ var import_openai2, HuggingFaceProvider;
6529
+ var init_huggingface = __esm({
6530
+ "src/providers/huggingface.ts"() {
6531
+ "use strict";
6532
+ import_openai2 = __toESM(require("openai"), 1);
6533
+ init_huggingface_models();
6534
+ init_openai_compatible_provider();
6535
+ init_utils();
6536
+ HuggingFaceProvider = class extends OpenAICompatibleProvider {
6537
+ providerId = "huggingface";
6538
+ providerAlias = "hf";
6539
+ constructor(client, config = {}) {
6540
+ super(client, { endpointType: "serverless", ...config });
6541
+ }
6542
+ getModelSpecs() {
6543
+ return HUGGINGFACE_MODELS;
6544
+ }
6545
+ /**
6546
+ * Enhance error messages with HuggingFace-specific guidance.
6547
+ */
6548
+ enhanceError(error) {
6549
+ if (!(error instanceof Error)) {
6550
+ return new Error(String(error));
6551
+ }
6552
+ const message = error.message.toLowerCase();
6553
+ if (message.includes("rate limit") || message.includes("429")) {
6554
+ return new Error(
6555
+ `HF rate limit exceeded. Free tier has limits. Consider upgrading or using a dedicated endpoint.
6556
+ Original error: ${error.message}`
6557
+ );
6558
+ }
6559
+ if (message.includes("model not found") || message.includes("404")) {
6560
+ return new Error(
6561
+ `Model not available on HF ${this.config.endpointType} inference. Check model name or try a different endpoint type.
6562
+ Original error: ${error.message}`
6563
+ );
6564
+ }
6565
+ if (message.includes("401") || message.includes("unauthorized")) {
6566
+ return new Error(
6567
+ `HF authentication failed. Check that HF_TOKEN or HUGGING_FACE_API_KEY is set correctly and starts with 'hf_'.
6568
+ Original error: ${error.message}`
6569
+ );
6570
+ }
6571
+ if (message.includes("400") || message.includes("bad request")) {
6572
+ return new Error(
6573
+ `HF bad request (often transient on serverless). Original error: ${error.message}`
6574
+ );
6575
+ }
6576
+ return error;
6577
+ }
6578
+ };
6579
+ }
6580
+ });
6581
+
6495
6582
  // src/providers/openai-image-models.ts
6496
6583
  function getOpenAIImageModelSpec(modelId) {
6497
6584
  return openaiImageModels.find((m) => m.modelId === modelId);
@@ -7338,13 +7425,13 @@ function sanitizeExtra(extra, allowTemperature) {
7338
7425
  return Object.fromEntries(Object.entries(extra).filter(([key]) => key !== "temperature"));
7339
7426
  }
7340
7427
  function createOpenAIProviderFromEnv() {
7341
- return createProviderFromEnv("OPENAI_API_KEY", import_openai2.default, OpenAIChatProvider);
7428
+ return createProviderFromEnv("OPENAI_API_KEY", import_openai3.default, OpenAIChatProvider);
7342
7429
  }
7343
- var import_openai2, import_tiktoken, ROLE_MAP2, OpenAIChatProvider;
7430
+ var import_openai3, import_tiktoken, ROLE_MAP2, OpenAIChatProvider;
7344
7431
  var init_openai = __esm({
7345
7432
  "src/providers/openai.ts"() {
7346
7433
  "use strict";
7347
- import_openai2 = __toESM(require("openai"), 1);
7434
+ import_openai3 = __toESM(require("openai"), 1);
7348
7435
  import_tiktoken = require("tiktoken");
7349
7436
  init_messages();
7350
7437
  init_base_provider();
@@ -7643,6 +7730,475 @@ var init_openai = __esm({
7643
7730
  }
7644
7731
  });
7645
7732
 
7733
+ // src/providers/openrouter-models.ts
7734
+ var OPENROUTER_MODELS;
7735
+ var init_openrouter_models = __esm({
7736
+ "src/providers/openrouter-models.ts"() {
7737
+ "use strict";
7738
+ OPENROUTER_MODELS = [
7739
+ // ============================================================
7740
+ // Anthropic Claude Models (via OpenRouter)
7741
+ // ============================================================
7742
+ {
7743
+ provider: "openrouter",
7744
+ modelId: "anthropic/claude-sonnet-4-5",
7745
+ displayName: "Claude Sonnet 4.5 (OpenRouter)",
7746
+ contextWindow: 2e5,
7747
+ maxOutputTokens: 64e3,
7748
+ pricing: {
7749
+ input: 3,
7750
+ output: 15
7751
+ },
7752
+ knowledgeCutoff: "2025-01",
7753
+ features: {
7754
+ streaming: true,
7755
+ functionCalling: true,
7756
+ vision: true,
7757
+ reasoning: true
7758
+ },
7759
+ metadata: {
7760
+ family: "Claude 4",
7761
+ notes: "Anthropic Claude via OpenRouter. Pricing may vary."
7762
+ }
7763
+ },
7764
+ {
7765
+ provider: "openrouter",
7766
+ modelId: "anthropic/claude-opus-4-5",
7767
+ displayName: "Claude Opus 4.5 (OpenRouter)",
7768
+ contextWindow: 2e5,
7769
+ maxOutputTokens: 64e3,
7770
+ pricing: {
7771
+ input: 15,
7772
+ output: 75
7773
+ },
7774
+ knowledgeCutoff: "2025-01",
7775
+ features: {
7776
+ streaming: true,
7777
+ functionCalling: true,
7778
+ vision: true,
7779
+ reasoning: true
7780
+ },
7781
+ metadata: {
7782
+ family: "Claude 4",
7783
+ notes: "Anthropic Claude Opus via OpenRouter. Most capable Claude model."
7784
+ }
7785
+ },
7786
+ {
7787
+ provider: "openrouter",
7788
+ modelId: "anthropic/claude-haiku-4-5",
7789
+ displayName: "Claude Haiku 4.5 (OpenRouter)",
7790
+ contextWindow: 2e5,
7791
+ maxOutputTokens: 64e3,
7792
+ pricing: {
7793
+ input: 0.8,
7794
+ output: 4
7795
+ },
7796
+ knowledgeCutoff: "2025-02",
7797
+ features: {
7798
+ streaming: true,
7799
+ functionCalling: true,
7800
+ vision: true,
7801
+ reasoning: true
7802
+ },
7803
+ metadata: {
7804
+ family: "Claude 4",
7805
+ notes: "Anthropic Claude Haiku via OpenRouter. Fast and efficient."
7806
+ }
7807
+ },
7808
+ // ============================================================
7809
+ // OpenAI GPT Models (via OpenRouter)
7810
+ // ============================================================
7811
+ {
7812
+ provider: "openrouter",
7813
+ modelId: "openai/gpt-4o",
7814
+ displayName: "GPT-4o (OpenRouter)",
7815
+ contextWindow: 128e3,
7816
+ maxOutputTokens: 16384,
7817
+ pricing: {
7818
+ input: 2.5,
7819
+ output: 10
7820
+ },
7821
+ knowledgeCutoff: "2024-10",
7822
+ features: {
7823
+ streaming: true,
7824
+ functionCalling: true,
7825
+ vision: true
7826
+ },
7827
+ metadata: {
7828
+ family: "GPT-4",
7829
+ notes: "OpenAI GPT-4o via OpenRouter."
7830
+ }
7831
+ },
7832
+ {
7833
+ provider: "openrouter",
7834
+ modelId: "openai/gpt-4o-mini",
7835
+ displayName: "GPT-4o Mini (OpenRouter)",
7836
+ contextWindow: 128e3,
7837
+ maxOutputTokens: 16384,
7838
+ pricing: {
7839
+ input: 0.15,
7840
+ output: 0.6
7841
+ },
7842
+ knowledgeCutoff: "2024-10",
7843
+ features: {
7844
+ streaming: true,
7845
+ functionCalling: true,
7846
+ vision: true
7847
+ },
7848
+ metadata: {
7849
+ family: "GPT-4",
7850
+ notes: "OpenAI GPT-4o Mini via OpenRouter. Cost-effective option."
7851
+ }
7852
+ },
7853
+ {
7854
+ provider: "openrouter",
7855
+ modelId: "openai/gpt-5.2",
7856
+ displayName: "GPT-5.2 (OpenRouter)",
7857
+ contextWindow: 1e6,
7858
+ maxOutputTokens: 128e3,
7859
+ pricing: {
7860
+ input: 5,
7861
+ output: 20
7862
+ },
7863
+ knowledgeCutoff: "2025-03",
7864
+ features: {
7865
+ streaming: true,
7866
+ functionCalling: true,
7867
+ vision: true,
7868
+ reasoning: true
7869
+ },
7870
+ metadata: {
7871
+ family: "GPT-5",
7872
+ notes: "OpenAI GPT-5.2 via OpenRouter. Latest flagship model."
7873
+ }
7874
+ },
7875
+ // ============================================================
7876
+ // Google Gemini Models (via OpenRouter)
7877
+ // ============================================================
7878
+ {
7879
+ provider: "openrouter",
7880
+ modelId: "google/gemini-2.5-flash",
7881
+ displayName: "Gemini 2.5 Flash (OpenRouter)",
7882
+ contextWindow: 1e6,
7883
+ maxOutputTokens: 65536,
7884
+ pricing: {
7885
+ input: 0.15,
7886
+ output: 0.6
7887
+ },
7888
+ knowledgeCutoff: "2025-01",
7889
+ features: {
7890
+ streaming: true,
7891
+ functionCalling: true,
7892
+ vision: true,
7893
+ reasoning: true
7894
+ },
7895
+ metadata: {
7896
+ family: "Gemini 2.5",
7897
+ notes: "Google Gemini 2.5 Flash via OpenRouter. Fast and cost-effective."
7898
+ }
7899
+ },
7900
+ {
7901
+ provider: "openrouter",
7902
+ modelId: "google/gemini-2.5-pro",
7903
+ displayName: "Gemini 2.5 Pro (OpenRouter)",
7904
+ contextWindow: 1e6,
7905
+ maxOutputTokens: 65536,
7906
+ pricing: {
7907
+ input: 2.5,
7908
+ output: 10
7909
+ },
7910
+ knowledgeCutoff: "2025-01",
7911
+ features: {
7912
+ streaming: true,
7913
+ functionCalling: true,
7914
+ vision: true,
7915
+ reasoning: true
7916
+ },
7917
+ metadata: {
7918
+ family: "Gemini 2.5",
7919
+ notes: "Google Gemini 2.5 Pro via OpenRouter."
7920
+ }
7921
+ },
7922
+ // ============================================================
7923
+ // Meta Llama Models (via OpenRouter)
7924
+ // ============================================================
7925
+ {
7926
+ provider: "openrouter",
7927
+ modelId: "meta-llama/llama-3.3-70b-instruct",
7928
+ displayName: "Llama 3.3 70B Instruct (OpenRouter)",
7929
+ contextWindow: 128e3,
7930
+ maxOutputTokens: 8192,
7931
+ pricing: {
7932
+ input: 0.4,
7933
+ output: 0.4
7934
+ },
7935
+ knowledgeCutoff: "2024-12",
7936
+ features: {
7937
+ streaming: true,
7938
+ functionCalling: true,
7939
+ vision: false
7940
+ },
7941
+ metadata: {
7942
+ family: "Llama 3.3",
7943
+ notes: "Meta Llama 3.3 70B via OpenRouter. Excellent open-source model."
7944
+ }
7945
+ },
7946
+ {
7947
+ provider: "openrouter",
7948
+ modelId: "meta-llama/llama-4-maverick",
7949
+ displayName: "Llama 4 Maverick (OpenRouter)",
7950
+ contextWindow: 1e6,
7951
+ maxOutputTokens: 128e3,
7952
+ pricing: {
7953
+ input: 0.2,
7954
+ output: 0.6
7955
+ },
7956
+ knowledgeCutoff: "2025-04",
7957
+ features: {
7958
+ streaming: true,
7959
+ functionCalling: true,
7960
+ vision: true
7961
+ },
7962
+ metadata: {
7963
+ family: "Llama 4",
7964
+ notes: "Meta Llama 4 Maverick via OpenRouter. Latest Llama generation."
7965
+ }
7966
+ },
7967
+ // ============================================================
7968
+ // DeepSeek Models (via OpenRouter)
7969
+ // ============================================================
7970
+ {
7971
+ provider: "openrouter",
7972
+ modelId: "deepseek/deepseek-r1",
7973
+ displayName: "DeepSeek R1 (OpenRouter)",
7974
+ contextWindow: 64e3,
7975
+ maxOutputTokens: 8192,
7976
+ pricing: {
7977
+ input: 0.55,
7978
+ output: 2.19
7979
+ },
7980
+ knowledgeCutoff: "2025-01",
7981
+ features: {
7982
+ streaming: true,
7983
+ functionCalling: true,
7984
+ vision: false,
7985
+ reasoning: true
7986
+ },
7987
+ metadata: {
7988
+ family: "DeepSeek R1",
7989
+ notes: "DeepSeek R1 via OpenRouter. Strong reasoning capabilities."
7990
+ }
7991
+ },
7992
+ {
7993
+ provider: "openrouter",
7994
+ modelId: "deepseek/deepseek-chat",
7995
+ displayName: "DeepSeek Chat (OpenRouter)",
7996
+ contextWindow: 64e3,
7997
+ maxOutputTokens: 8192,
7998
+ pricing: {
7999
+ input: 0.14,
8000
+ output: 0.28
8001
+ },
8002
+ knowledgeCutoff: "2025-01",
8003
+ features: {
8004
+ streaming: true,
8005
+ functionCalling: true,
8006
+ vision: false
8007
+ },
8008
+ metadata: {
8009
+ family: "DeepSeek V3",
8010
+ notes: "DeepSeek Chat via OpenRouter. Very cost-effective."
8011
+ }
8012
+ },
8013
+ // ============================================================
8014
+ // Mistral Models (via OpenRouter)
8015
+ // ============================================================
8016
+ {
8017
+ provider: "openrouter",
8018
+ modelId: "mistralai/mistral-large",
8019
+ displayName: "Mistral Large (OpenRouter)",
8020
+ contextWindow: 128e3,
8021
+ maxOutputTokens: 8192,
8022
+ pricing: {
8023
+ input: 2,
8024
+ output: 6
8025
+ },
8026
+ knowledgeCutoff: "2024-11",
8027
+ features: {
8028
+ streaming: true,
8029
+ functionCalling: true,
8030
+ vision: false
8031
+ },
8032
+ metadata: {
8033
+ family: "Mistral Large",
8034
+ notes: "Mistral Large via OpenRouter. Strong multilingual capabilities."
8035
+ }
8036
+ },
8037
+ {
8038
+ provider: "openrouter",
8039
+ modelId: "mistralai/mixtral-8x22b-instruct",
8040
+ displayName: "Mixtral 8x22B Instruct (OpenRouter)",
8041
+ contextWindow: 65536,
8042
+ maxOutputTokens: 8192,
8043
+ pricing: {
8044
+ input: 0.9,
8045
+ output: 0.9
8046
+ },
8047
+ knowledgeCutoff: "2024-04",
8048
+ features: {
8049
+ streaming: true,
8050
+ functionCalling: true,
8051
+ vision: false
8052
+ },
8053
+ metadata: {
8054
+ family: "Mixtral",
8055
+ notes: "Mixtral 8x22B via OpenRouter. Sparse MoE architecture."
8056
+ }
8057
+ },
8058
+ // ============================================================
8059
+ // Qwen Models (via OpenRouter)
8060
+ // ============================================================
8061
+ {
8062
+ provider: "openrouter",
8063
+ modelId: "qwen/qwen-2.5-72b-instruct",
8064
+ displayName: "Qwen 2.5 72B Instruct (OpenRouter)",
8065
+ contextWindow: 131072,
8066
+ maxOutputTokens: 8192,
8067
+ pricing: {
8068
+ input: 0.35,
8069
+ output: 0.4
8070
+ },
8071
+ knowledgeCutoff: "2024-09",
8072
+ features: {
8073
+ streaming: true,
8074
+ functionCalling: true,
8075
+ vision: false
8076
+ },
8077
+ metadata: {
8078
+ family: "Qwen 2.5",
8079
+ notes: "Qwen 2.5 72B via OpenRouter. Strong coding and math."
8080
+ }
8081
+ }
8082
+ ];
8083
+ }
8084
+ });
8085
+
8086
+ // src/providers/openrouter.ts
8087
+ function createOpenRouterProviderFromEnv() {
8088
+ const apiKey = readEnvVar("OPENROUTER_API_KEY");
8089
+ if (!isNonEmpty(apiKey)) {
8090
+ return null;
8091
+ }
8092
+ const config = {
8093
+ siteUrl: readEnvVar("OPENROUTER_SITE_URL"),
8094
+ appName: readEnvVar("OPENROUTER_APP_NAME") || "llmist"
8095
+ };
8096
+ const client = new import_openai4.default({
8097
+ apiKey: apiKey.trim(),
8098
+ baseURL: "https://openrouter.ai/api/v1",
8099
+ timeout: 12e4,
8100
+ // 2 minute timeout
8101
+ maxRetries: 0
8102
+ // Disable SDK retries - llmist handles all retries at application level
8103
+ });
8104
+ return new OpenRouterProvider(client, config);
8105
+ }
8106
+ var import_openai4, OpenRouterProvider;
8107
+ var init_openrouter = __esm({
8108
+ "src/providers/openrouter.ts"() {
8109
+ "use strict";
8110
+ import_openai4 = __toESM(require("openai"), 1);
8111
+ init_openai_compatible_provider();
8112
+ init_openrouter_models();
8113
+ init_utils();
8114
+ OpenRouterProvider = class extends OpenAICompatibleProvider {
8115
+ providerId = "openrouter";
8116
+ providerAlias = "or";
8117
+ constructor(client, config = {}) {
8118
+ super(client, config);
8119
+ }
8120
+ getModelSpecs() {
8121
+ return OPENROUTER_MODELS;
8122
+ }
8123
+ /**
8124
+ * Get custom headers for OpenRouter analytics.
8125
+ */
8126
+ getCustomHeaders() {
8127
+ const headers = {};
8128
+ if (this.config.siteUrl) {
8129
+ headers["HTTP-Referer"] = this.config.siteUrl;
8130
+ }
8131
+ if (this.config.appName) {
8132
+ headers["X-Title"] = this.config.appName;
8133
+ }
8134
+ return headers;
8135
+ }
8136
+ /**
8137
+ * Build OpenRouter-specific request parameters from `extra.routing`.
8138
+ */
8139
+ buildProviderSpecificParams(extra) {
8140
+ const routing = extra?.routing;
8141
+ if (!routing) {
8142
+ return {};
8143
+ }
8144
+ const params = {};
8145
+ if (routing.models && routing.models.length > 0) {
8146
+ params.models = routing.models;
8147
+ }
8148
+ if (routing.route) {
8149
+ params.route = routing.route;
8150
+ }
8151
+ if (routing.provider) {
8152
+ params.provider = { order: [routing.provider] };
8153
+ } else if (routing.order && routing.order.length > 0) {
8154
+ params.provider = { order: routing.order };
8155
+ }
8156
+ return params;
8157
+ }
8158
+ /**
8159
+ * Filter out the 'routing' key from extra passthrough.
8160
+ */
8161
+ isProviderSpecificKey(key) {
8162
+ return key === "routing";
8163
+ }
8164
+ /**
8165
+ * Enhance error messages with OpenRouter-specific guidance.
8166
+ */
8167
+ enhanceError(error) {
8168
+ if (!(error instanceof Error)) {
8169
+ return new Error(String(error));
8170
+ }
8171
+ const message = error.message.toLowerCase();
8172
+ if (message.includes("402") || message.includes("insufficient")) {
8173
+ return new Error(
8174
+ `OpenRouter: Insufficient credits. Add funds at https://openrouter.ai/credits
8175
+ Original error: ${error.message}`
8176
+ );
8177
+ }
8178
+ if (message.includes("429") || message.includes("rate limit")) {
8179
+ return new Error(
8180
+ `OpenRouter: Rate limit exceeded. Consider upgrading your plan or reducing request frequency.
8181
+ Original error: ${error.message}`
8182
+ );
8183
+ }
8184
+ if (message.includes("503") || message.includes("unavailable")) {
8185
+ return new Error(
8186
+ `OpenRouter: Model temporarily unavailable. Try a different model or use the 'models' fallback option for automatic retry.
8187
+ Original error: ${error.message}`
8188
+ );
8189
+ }
8190
+ if (message.includes("401") || message.includes("unauthorized") || message.includes("invalid")) {
8191
+ return new Error(
8192
+ `OpenRouter: Authentication failed. Check that OPENROUTER_API_KEY is set correctly.
8193
+ Original error: ${error.message}`
8194
+ );
8195
+ }
8196
+ return error;
8197
+ }
8198
+ };
8199
+ }
8200
+ });
8201
+
7646
8202
  // src/providers/discovery.ts
7647
8203
  function discoverProviderAdapters() {
7648
8204
  const adapters = [];
@@ -7662,11 +8218,13 @@ var init_discovery = __esm({
7662
8218
  init_gemini();
7663
8219
  init_huggingface();
7664
8220
  init_openai();
8221
+ init_openrouter();
7665
8222
  DISCOVERERS = [
7666
8223
  createOpenAIProviderFromEnv,
7667
8224
  createAnthropicProviderFromEnv,
7668
8225
  createGeminiProviderFromEnv,
7669
- createHuggingFaceProviderFromEnv
8226
+ createHuggingFaceProviderFromEnv,
8227
+ createOpenRouterProviderFromEnv
7670
8228
  ];
7671
8229
  }
7672
8230
  });
@@ -12037,11 +12595,10 @@ var init_stream_processor = __esm({
12037
12595
  });
12038
12596
 
12039
12597
  // src/agent/agent.ts
12040
- var import_p_retry, Agent;
12598
+ var Agent;
12041
12599
  var init_agent = __esm({
12042
12600
  "src/agent/agent.ts"() {
12043
12601
  "use strict";
12044
- import_p_retry = __toESM(require("p-retry"), 1);
12045
12602
  init_constants();
12046
12603
  init_execution_tree();
12047
12604
  init_messages();
@@ -12433,62 +12990,120 @@ var init_agent = __esm({
12433
12990
  messageCount: llmOptions.messages.length,
12434
12991
  messages: llmOptions.messages
12435
12992
  });
12436
- const stream2 = await this.createStreamWithRetry(
12437
- llmOptions,
12438
- currentIteration,
12439
- currentLLMNodeId
12440
- );
12441
- const processor = new StreamProcessor({
12442
- iteration: currentIteration,
12443
- registry: this.registry,
12444
- gadgetStartPrefix: this.gadgetStartPrefix,
12445
- gadgetEndPrefix: this.gadgetEndPrefix,
12446
- gadgetArgPrefix: this.gadgetArgPrefix,
12447
- hooks: this.hooks,
12448
- logger: this.logger.getSubLogger({ name: "stream-processor" }),
12449
- requestHumanInput: this.requestHumanInput,
12450
- defaultGadgetTimeoutMs: this.defaultGadgetTimeoutMs,
12451
- client: this.client,
12452
- mediaStore: this.mediaStore,
12453
- agentConfig: this.agentContextConfig,
12454
- subagentConfig: this.subagentConfig,
12455
- // Tree context for execution tracking
12456
- tree: this.tree,
12457
- parentNodeId: currentLLMNodeId,
12458
- // Gadgets are children of this LLM call
12459
- baseDepth: this.baseDepth,
12460
- // Cross-iteration dependency tracking
12461
- priorCompletedInvocations: this.completedInvocationIds,
12462
- priorFailedInvocations: this.failedInvocationIds,
12463
- // Parent observer hooks for subagent visibility
12464
- parentObservers: this.parentObservers
12465
- });
12993
+ const maxStreamAttempts = this.retryConfig.enabled ? this.retryConfig.retries + 1 : 1;
12994
+ let streamAttempt = 0;
12466
12995
  let streamMetadata = null;
12467
12996
  let gadgetCallCount = 0;
12468
12997
  const textOutputs = [];
12469
12998
  const gadgetResults = [];
12470
- for await (const event of processor.process(stream2)) {
12471
- if (event.type === "stream_complete") {
12472
- streamMetadata = event;
12473
- continue;
12474
- }
12475
- if (event.type === "text") {
12476
- textOutputs.push(event.content);
12477
- } else if (event.type === "gadget_result") {
12478
- gadgetCallCount++;
12479
- gadgetResults.push(event);
12999
+ while (streamAttempt < maxStreamAttempts) {
13000
+ streamAttempt++;
13001
+ try {
13002
+ const stream2 = await this.createStream(
13003
+ llmOptions,
13004
+ currentIteration,
13005
+ currentLLMNodeId
13006
+ );
13007
+ const processor = new StreamProcessor({
13008
+ iteration: currentIteration,
13009
+ registry: this.registry,
13010
+ gadgetStartPrefix: this.gadgetStartPrefix,
13011
+ gadgetEndPrefix: this.gadgetEndPrefix,
13012
+ gadgetArgPrefix: this.gadgetArgPrefix,
13013
+ hooks: this.hooks,
13014
+ logger: this.logger.getSubLogger({ name: "stream-processor" }),
13015
+ requestHumanInput: this.requestHumanInput,
13016
+ defaultGadgetTimeoutMs: this.defaultGadgetTimeoutMs,
13017
+ client: this.client,
13018
+ mediaStore: this.mediaStore,
13019
+ agentConfig: this.agentContextConfig,
13020
+ subagentConfig: this.subagentConfig,
13021
+ // Tree context for execution tracking
13022
+ tree: this.tree,
13023
+ parentNodeId: currentLLMNodeId,
13024
+ // Gadgets are children of this LLM call
13025
+ baseDepth: this.baseDepth,
13026
+ // Cross-iteration dependency tracking
13027
+ priorCompletedInvocations: this.completedInvocationIds,
13028
+ priorFailedInvocations: this.failedInvocationIds,
13029
+ // Parent observer hooks for subagent visibility
13030
+ parentObservers: this.parentObservers
13031
+ });
13032
+ for await (const event of processor.process(stream2)) {
13033
+ if (event.type === "stream_complete") {
13034
+ streamMetadata = event;
13035
+ continue;
13036
+ }
13037
+ if (event.type === "text") {
13038
+ textOutputs.push(event.content);
13039
+ } else if (event.type === "gadget_result") {
13040
+ gadgetCallCount++;
13041
+ gadgetResults.push(event);
13042
+ }
13043
+ yield event;
13044
+ }
13045
+ for (const id of processor.getCompletedInvocationIds()) {
13046
+ this.completedInvocationIds.add(id);
13047
+ }
13048
+ for (const id of processor.getFailedInvocationIds()) {
13049
+ this.failedInvocationIds.add(id);
13050
+ }
13051
+ break;
13052
+ } catch (streamError) {
13053
+ const error = streamError;
13054
+ const canRetry = this.retryConfig.enabled && streamAttempt < maxStreamAttempts;
13055
+ const shouldRetryError = this.retryConfig.shouldRetry ? this.retryConfig.shouldRetry(error) : isRetryableError(error);
13056
+ if (canRetry && shouldRetryError) {
13057
+ const retryAfterMs = this.retryConfig.respectRetryAfter ? extractRetryAfterMs(error) : null;
13058
+ const baseDelay = this.retryConfig.minTimeout * this.retryConfig.factor ** (streamAttempt - 1);
13059
+ const cappedBaseDelay = Math.min(baseDelay, this.retryConfig.maxTimeout);
13060
+ const delay = retryAfterMs !== null ? Math.min(retryAfterMs, this.retryConfig.maxRetryAfterMs) : cappedBaseDelay;
13061
+ const finalDelay = this.retryConfig.randomize ? delay * (0.5 + Math.random()) : delay;
13062
+ this.logger.warn(
13063
+ `Stream iteration failed (attempt ${streamAttempt}/${maxStreamAttempts}), retrying...`,
13064
+ {
13065
+ error: error.message,
13066
+ retriesLeft: maxStreamAttempts - streamAttempt,
13067
+ delayMs: Math.round(finalDelay),
13068
+ retryAfterMs
13069
+ }
13070
+ );
13071
+ this.retryConfig.onRetry?.(error, streamAttempt);
13072
+ await this.safeObserve(async () => {
13073
+ if (this.hooks.observers?.onRetryAttempt) {
13074
+ const subagentContext = getSubagentContextForNode(this.tree, currentLLMNodeId);
13075
+ const hookContext = {
13076
+ iteration: currentIteration,
13077
+ attemptNumber: streamAttempt,
13078
+ retriesLeft: maxStreamAttempts - streamAttempt,
13079
+ error,
13080
+ retryAfterMs: retryAfterMs ?? void 0,
13081
+ logger: this.logger,
13082
+ subagentContext
13083
+ };
13084
+ await this.hooks.observers.onRetryAttempt(hookContext);
13085
+ }
13086
+ });
13087
+ await this.sleep(finalDelay);
13088
+ streamMetadata = null;
13089
+ gadgetCallCount = 0;
13090
+ textOutputs.length = 0;
13091
+ gadgetResults.length = 0;
13092
+ continue;
13093
+ }
13094
+ if (streamAttempt > 1) {
13095
+ this.logger.error(`Stream iteration failed after ${streamAttempt} attempts`, {
13096
+ error: error.message,
13097
+ iteration: currentIteration
13098
+ });
13099
+ this.retryConfig.onRetriesExhausted?.(error, streamAttempt);
13100
+ }
13101
+ throw error;
12480
13102
  }
12481
- yield event;
12482
13103
  }
12483
13104
  if (!streamMetadata) {
12484
13105
  throw new Error("Stream processing completed without metadata event");
12485
13106
  }
12486
- for (const id of processor.getCompletedInvocationIds()) {
12487
- this.completedInvocationIds.add(id);
12488
- }
12489
- for (const id of processor.getFailedInvocationIds()) {
12490
- this.failedInvocationIds.add(id);
12491
- }
12492
13107
  const result = streamMetadata;
12493
13108
  this.logger.info("LLM response completed", {
12494
13109
  finishReason: result.finishReason,
@@ -12602,12 +13217,12 @@ var init_agent = __esm({
12602
13217
  }
12603
13218
  }
12604
13219
  /**
12605
- * Create LLM stream with two-layer rate limit protection:
13220
+ * Create LLM stream with proactive rate limit protection.
12606
13221
  *
12607
- * Layer 1 (Proactive): If rate limits are configured, delays requests to stay within limits.
12608
- * Layer 2 (Reactive): Exponential backoff with Retry-After header support for transient failures.
13222
+ * Note: Retry logic for errors during streaming is handled by the outer loop in run().
13223
+ * This method only handles proactive rate limiting (delaying requests to stay within limits).
12609
13224
  */
12610
- async createStreamWithRetry(llmOptions, iteration, llmNodeId) {
13225
+ async createStream(llmOptions, iteration, llmNodeId) {
12611
13226
  if (this.rateLimitTracker) {
12612
13227
  const throttleDelay = this.rateLimitTracker.getRequiredDelayMs();
12613
13228
  if (throttleDelay > 0) {
@@ -12628,100 +13243,7 @@ var init_agent = __esm({
12628
13243
  await this.sleep(throttleDelay);
12629
13244
  }
12630
13245
  }
12631
- if (!this.retryConfig.enabled) {
12632
- return this.client.stream(llmOptions);
12633
- }
12634
- const {
12635
- retries,
12636
- minTimeout,
12637
- maxTimeout,
12638
- factor,
12639
- randomize,
12640
- onRetry,
12641
- onRetriesExhausted,
12642
- shouldRetry,
12643
- respectRetryAfter,
12644
- maxRetryAfterMs
12645
- } = this.retryConfig;
12646
- let retryAfterHintMs = null;
12647
- try {
12648
- return await (0, import_p_retry.default)(
12649
- async (attemptNumber) => {
12650
- if (retryAfterHintMs !== null && respectRetryAfter) {
12651
- const cappedDelay = Math.min(retryAfterHintMs, maxRetryAfterMs);
12652
- this.logger.debug("Using Retry-After delay", {
12653
- retryAfterMs: retryAfterHintMs,
12654
- cappedDelay
12655
- });
12656
- await this.sleep(cappedDelay);
12657
- retryAfterHintMs = null;
12658
- }
12659
- this.logger.debug("Creating LLM stream", {
12660
- attempt: attemptNumber,
12661
- maxAttempts: retries + 1
12662
- });
12663
- return this.client.stream(llmOptions);
12664
- },
12665
- {
12666
- retries,
12667
- minTimeout,
12668
- maxTimeout,
12669
- factor,
12670
- randomize,
12671
- signal: this.signal,
12672
- onFailedAttempt: (context) => {
12673
- const { error, attemptNumber, retriesLeft } = context;
12674
- if (respectRetryAfter) {
12675
- retryAfterHintMs = extractRetryAfterMs(error);
12676
- if (retryAfterHintMs !== null) {
12677
- this.logger.debug("Retry-After header detected", {
12678
- delayMs: retryAfterHintMs
12679
- });
12680
- }
12681
- }
12682
- this.logger.warn(
12683
- `LLM call failed (attempt ${attemptNumber}/${attemptNumber + retriesLeft}), retrying...`,
12684
- {
12685
- error: error.message,
12686
- retriesLeft,
12687
- retryAfterMs: retryAfterHintMs
12688
- }
12689
- );
12690
- onRetry?.(error, attemptNumber);
12691
- this.safeObserve(async () => {
12692
- if (this.hooks.observers?.onRetryAttempt) {
12693
- const subagentContext = getSubagentContextForNode(this.tree, llmNodeId);
12694
- const hookContext = {
12695
- iteration,
12696
- attemptNumber,
12697
- retriesLeft,
12698
- error,
12699
- retryAfterMs: retryAfterHintMs ?? void 0,
12700
- logger: this.logger,
12701
- subagentContext
12702
- };
12703
- await this.hooks.observers.onRetryAttempt(hookContext);
12704
- }
12705
- }).catch((err) => {
12706
- this.logger.error("Observer hook error", { hook: "onRetryAttempt", error: err });
12707
- });
12708
- },
12709
- shouldRetry: (context) => {
12710
- if (shouldRetry) {
12711
- return shouldRetry(context.error);
12712
- }
12713
- return isRetryableError(context.error);
12714
- }
12715
- }
12716
- );
12717
- } catch (error) {
12718
- this.logger.error(`LLM call failed after ${retries + 1} attempts`, {
12719
- error: error.message,
12720
- iteration
12721
- });
12722
- onRetriesExhausted?.(error, retries + 1);
12723
- throw error;
12724
- }
13246
+ return this.client.stream(llmOptions);
12725
13247
  }
12726
13248
  /**
12727
13249
  * Simple sleep utility for rate limit delays.
@@ -13126,6 +13648,8 @@ __export(index_exports, {
13126
13648
  ModelIdentifierParser: () => ModelIdentifierParser,
13127
13649
  ModelRegistry: () => ModelRegistry,
13128
13650
  OpenAIChatProvider: () => OpenAIChatProvider,
13651
+ OpenAICompatibleProvider: () => OpenAICompatibleProvider,
13652
+ OpenRouterProvider: () => OpenRouterProvider,
13129
13653
  RateLimitTracker: () => RateLimitTracker,
13130
13654
  SimpleSessionManager: () => SimpleSessionManager,
13131
13655
  SlidingWindowStrategy: () => SlidingWindowStrategy,
@@ -13147,6 +13671,7 @@ __export(index_exports, {
13147
13671
  createLogger: () => createLogger,
13148
13672
  createMediaOutput: () => createMediaOutput,
13149
13673
  createOpenAIProviderFromEnv: () => createOpenAIProviderFromEnv,
13674
+ createOpenRouterProviderFromEnv: () => createOpenRouterProviderFromEnv,
13150
13675
  createSubagent: () => createSubagent,
13151
13676
  defaultLogger: () => defaultLogger,
13152
13677
  detectAudioMimeType: () => detectAudioMimeType,
@@ -14497,6 +15022,8 @@ init_discovery();
14497
15022
  init_gemini();
14498
15023
  init_huggingface();
14499
15024
  init_openai();
15025
+ init_openai_compatible_provider();
15026
+ init_openrouter();
14500
15027
 
14501
15028
  // src/session/manager.ts
14502
15029
  var BaseSessionManager = class {
@@ -14771,6 +15298,8 @@ function getHostExports2(ctx) {
14771
15298
  ModelIdentifierParser,
14772
15299
  ModelRegistry,
14773
15300
  OpenAIChatProvider,
15301
+ OpenAICompatibleProvider,
15302
+ OpenRouterProvider,
14774
15303
  RateLimitTracker,
14775
15304
  SimpleSessionManager,
14776
15305
  SlidingWindowStrategy,
@@ -14792,6 +15321,7 @@ function getHostExports2(ctx) {
14792
15321
  createLogger,
14793
15322
  createMediaOutput,
14794
15323
  createOpenAIProviderFromEnv,
15324
+ createOpenRouterProviderFromEnv,
14795
15325
  createSubagent,
14796
15326
  defaultLogger,
14797
15327
  detectAudioMimeType,