npm - @tyvm/knowhow - Versions diffs - 0.0.105 → 0.0.106 - Mend

@tyvm/knowhow 0.0.105 → 0.0.106

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (209) hide show

package/CONFIG.md +8 -5
package/package.json +3 -2
package/scripts/check-model-pricing.ts +509 -0
package/scripts/compare-openrouter-coverage.ts +576 -0
package/src/agents/base/base.ts +127 -2
package/src/agents/tools/execCommand.ts +4 -0
package/src/agents/tools/executeScript/definition.ts +1 -1
package/src/agents/tools/index.ts +0 -1
package/src/agents/tools/list.ts +3 -43
package/src/agents/tools/writeFile.ts +1 -1
package/src/auth/browserLogin.ts +9 -4
package/src/chat/modules/RemoteSyncModule.ts +3 -0
package/src/cli.ts +31 -1
package/src/clients/cerebras.ts +10 -0
package/src/clients/contextLimits.ts +7 -2
package/src/clients/copilot.ts +23 -0
package/src/clients/deepseek.ts +16 -0
package/src/clients/fireworks.ts +15 -0
package/src/clients/gemini.ts +45 -2
package/src/clients/github.ts +16 -0
package/src/clients/groq.ts +15 -0
package/src/clients/http.ts +190 -6
package/src/clients/index.ts +116 -4
package/src/clients/llama.ts +16 -0
package/src/clients/mistral.ts +16 -0
package/src/clients/nvidia.ts +16 -0
package/src/clients/openai.ts +41 -11
package/src/clients/openrouter.ts +17 -0
package/src/clients/pricing/anthropic.ts +105 -78
package/src/clients/pricing/cerebras.ts +11 -0
package/src/clients/pricing/copilot.ts +60 -0
package/src/clients/pricing/deepseek.ts +15 -0
package/src/clients/pricing/fireworks.ts +32 -0
package/src/clients/pricing/github.ts +69 -0
package/src/clients/pricing/google.ts +245 -206
package/src/clients/pricing/groq.ts +56 -0
package/src/clients/pricing/index.ts +42 -5
package/src/clients/pricing/llama.ts +18 -0
package/src/clients/pricing/mistral.ts +34 -0
package/src/clients/pricing/models.ts +7 -236
package/src/clients/pricing/nvidia.ts +102 -0
package/src/clients/pricing/openai.ts +347 -171
package/src/clients/pricing/openrouter.ts +36 -0
package/src/clients/pricing/types.ts +83 -2
package/src/clients/pricing/xai.ts +121 -65
package/src/clients/types.ts +4 -0
package/src/clients/xai.ts +150 -0
package/src/fileSync.ts +8 -2
package/src/login.ts +11 -3
package/src/services/AgentSyncFs.ts +36 -12
package/src/services/KnowhowClient.ts +11 -0
package/src/services/LazyToolsService.ts +6 -0
package/src/services/S3.ts +0 -7
package/src/services/modules/index.ts +11 -2
package/src/types.ts +56 -279
package/src/worker.ts +174 -0
package/tests/clients/pricing.test.ts +37 -0
package/tests/manual/clients/completions.json +838 -226
package/tests/manual/clients/completions.test.ts +46 -31
package/ts_build/package.json +3 -2
package/ts_build/src/agents/base/base.d.ts +17 -1
package/ts_build/src/agents/base/base.js +82 -1
package/ts_build/src/agents/base/base.js.map +1 -1
package/ts_build/src/agents/tools/execCommand.js +3 -0
package/ts_build/src/agents/tools/execCommand.js.map +1 -1
package/ts_build/src/agents/tools/executeScript/definition.js +1 -1
package/ts_build/src/agents/tools/executeScript/definition.js.map +1 -1
package/ts_build/src/agents/tools/index.d.ts +0 -1
package/ts_build/src/agents/tools/index.js +0 -1
package/ts_build/src/agents/tools/index.js.map +1 -1
package/ts_build/src/agents/tools/list.js +3 -38
package/ts_build/src/agents/tools/list.js.map +1 -1
package/ts_build/src/agents/tools/visionTool.d.ts +1 -1
package/ts_build/src/agents/tools/writeFile.js +1 -1
package/ts_build/src/agents/tools/writeFile.js.map +1 -1
package/ts_build/src/ai.d.ts +1 -1
package/ts_build/src/auth/browserLogin.d.ts +2 -1
package/ts_build/src/auth/browserLogin.js +10 -3
package/ts_build/src/auth/browserLogin.js.map +1 -1
package/ts_build/src/chat/modules/RemoteSyncModule.js +1 -0
package/ts_build/src/chat/modules/RemoteSyncModule.js.map +1 -1
package/ts_build/src/cli.js +19 -0
package/ts_build/src/cli.js.map +1 -1
package/ts_build/src/clients/anthropic.d.ts +1 -82
package/ts_build/src/clients/cerebras.d.ts +4 -0
package/ts_build/src/clients/cerebras.js +14 -0
package/ts_build/src/clients/cerebras.js.map +1 -0
package/ts_build/src/clients/contextLimits.js +7 -2
package/ts_build/src/clients/contextLimits.js.map +1 -1
package/ts_build/src/clients/copilot.d.ts +4 -0
package/ts_build/src/clients/copilot.js +15 -0
package/ts_build/src/clients/copilot.js.map +1 -0
package/ts_build/src/clients/deepseek.d.ts +4 -0
package/ts_build/src/clients/deepseek.js +15 -0
package/ts_build/src/clients/deepseek.js.map +1 -0
package/ts_build/src/clients/fireworks.d.ts +4 -0
package/ts_build/src/clients/fireworks.js +15 -0
package/ts_build/src/clients/fireworks.js.map +1 -0
package/ts_build/src/clients/gemini.d.ts +1 -0
package/ts_build/src/clients/gemini.js +28 -1
package/ts_build/src/clients/gemini.js.map +1 -1
package/ts_build/src/clients/github.d.ts +4 -0
package/ts_build/src/clients/github.js +15 -0
package/ts_build/src/clients/github.js.map +1 -0
package/ts_build/src/clients/groq.d.ts +4 -0
package/ts_build/src/clients/groq.js +15 -0
package/ts_build/src/clients/groq.js.map +1 -0
package/ts_build/src/clients/http.d.ts +22 -1
package/ts_build/src/clients/http.js +132 -7
package/ts_build/src/clients/http.js.map +1 -1
package/ts_build/src/clients/index.d.ts +14 -0
package/ts_build/src/clients/index.js +94 -4
package/ts_build/src/clients/index.js.map +1 -1
package/ts_build/src/clients/llama.d.ts +4 -0
package/ts_build/src/clients/llama.js +15 -0
package/ts_build/src/clients/llama.js.map +1 -0
package/ts_build/src/clients/mistral.d.ts +4 -0
package/ts_build/src/clients/mistral.js +15 -0
package/ts_build/src/clients/mistral.js.map +1 -0
package/ts_build/src/clients/nvidia.d.ts +4 -0
package/ts_build/src/clients/nvidia.js +15 -0
package/ts_build/src/clients/nvidia.js.map +1 -0
package/ts_build/src/clients/openai.d.ts +4 -206
package/ts_build/src/clients/openai.js +27 -9
package/ts_build/src/clients/openai.js.map +1 -1
package/ts_build/src/clients/openrouter.d.ts +4 -0
package/ts_build/src/clients/openrouter.js +15 -0
package/ts_build/src/clients/openrouter.js.map +1 -0
package/ts_build/src/clients/pricing/anthropic.d.ts +26 -78
package/ts_build/src/clients/pricing/anthropic.js +75 -78
package/ts_build/src/clients/pricing/anthropic.js.map +1 -1
package/ts_build/src/clients/pricing/cerebras.d.ts +4 -0
package/ts_build/src/clients/pricing/cerebras.js +11 -0
package/ts_build/src/clients/pricing/cerebras.js.map +1 -0
package/ts_build/src/clients/pricing/copilot.d.ts +5 -0
package/ts_build/src/clients/pricing/copilot.js +35 -0
package/ts_build/src/clients/pricing/copilot.js.map +1 -0
package/ts_build/src/clients/pricing/deepseek.d.ts +5 -0
package/ts_build/src/clients/pricing/deepseek.js +10 -0
package/ts_build/src/clients/pricing/deepseek.js.map +1 -0
package/ts_build/src/clients/pricing/fireworks.d.ts +5 -0
package/ts_build/src/clients/pricing/fireworks.js +21 -0
package/ts_build/src/clients/pricing/fireworks.js.map +1 -0
package/ts_build/src/clients/pricing/github.d.ts +4 -0
package/ts_build/src/clients/pricing/github.js +58 -0
package/ts_build/src/clients/pricing/github.js.map +1 -0
package/ts_build/src/clients/pricing/google.d.ts +59 -6
package/ts_build/src/clients/pricing/google.js +214 -167
package/ts_build/src/clients/pricing/google.js.map +1 -1
package/ts_build/src/clients/pricing/groq.d.ts +5 -0
package/ts_build/src/clients/pricing/groq.js +41 -0
package/ts_build/src/clients/pricing/groq.js.map +1 -0
package/ts_build/src/clients/pricing/index.d.ts +16 -5
package/ts_build/src/clients/pricing/index.js +62 -7
package/ts_build/src/clients/pricing/index.js.map +1 -1
package/ts_build/src/clients/pricing/llama.d.ts +4 -0
package/ts_build/src/clients/pricing/llama.js +14 -0
package/ts_build/src/clients/pricing/llama.js.map +1 -0
package/ts_build/src/clients/pricing/mistral.d.ts +5 -0
package/ts_build/src/clients/pricing/mistral.js +23 -0
package/ts_build/src/clients/pricing/mistral.js.map +1 -0
package/ts_build/src/clients/pricing/models.d.ts +5 -4
package/ts_build/src/clients/pricing/models.js +8 -162
package/ts_build/src/clients/pricing/models.js.map +1 -1
package/ts_build/src/clients/pricing/nvidia.d.ts +8 -0
package/ts_build/src/clients/pricing/nvidia.js +96 -0
package/ts_build/src/clients/pricing/nvidia.js.map +1 -0
package/ts_build/src/clients/pricing/openai.d.ts +86 -197
package/ts_build/src/clients/pricing/openai.js +294 -168
package/ts_build/src/clients/pricing/openai.js.map +1 -1
package/ts_build/src/clients/pricing/openrouter.d.ts +4 -0
package/ts_build/src/clients/pricing/openrouter.js +29 -0
package/ts_build/src/clients/pricing/openrouter.js.map +1 -0
package/ts_build/src/clients/pricing/types.d.ts +27 -2
package/ts_build/src/clients/pricing/types.js +46 -0
package/ts_build/src/clients/pricing/types.js.map +1 -1
package/ts_build/src/clients/pricing/xai.d.ts +37 -57
package/ts_build/src/clients/pricing/xai.js +92 -59
package/ts_build/src/clients/pricing/xai.js.map +1 -1
package/ts_build/src/clients/types.d.ts +1 -0
package/ts_build/src/clients/xai.d.ts +2 -62
package/ts_build/src/clients/xai.js +121 -0
package/ts_build/src/clients/xai.js.map +1 -1
package/ts_build/src/fileSync.js +7 -2
package/ts_build/src/fileSync.js.map +1 -1
package/ts_build/src/login.js +8 -2
package/ts_build/src/login.js.map +1 -1
package/ts_build/src/services/AgentSyncFs.js +1 -0
package/ts_build/src/services/AgentSyncFs.js.map +1 -1
package/ts_build/src/services/KnowhowClient.d.ts +1 -0
package/ts_build/src/services/KnowhowClient.js +7 -0
package/ts_build/src/services/KnowhowClient.js.map +1 -1
package/ts_build/src/services/LazyToolsService.d.ts +1 -0
package/ts_build/src/services/LazyToolsService.js +3 -0
package/ts_build/src/services/LazyToolsService.js.map +1 -1
package/ts_build/src/services/S3.js +0 -7
package/ts_build/src/services/S3.js.map +1 -1
package/ts_build/src/services/modules/index.js +41 -1
package/ts_build/src/services/modules/index.js.map +1 -1
package/ts_build/src/types.d.ts +163 -124
package/ts_build/src/types.js +33 -213
package/ts_build/src/types.js.map +1 -1
package/ts_build/src/worker.d.ts +4 -0
package/ts_build/src/worker.js +140 -0
package/ts_build/src/worker.js.map +1 -1
package/ts_build/tests/clients/pricing.test.js +21 -0
package/ts_build/tests/clients/pricing.test.js.map +1 -1
package/ts_build/tests/manual/clients/completions.test.js +27 -24
package/ts_build/tests/manual/clients/completions.test.js.map +1 -1

package/src/clients/http.ts CHANGED Viewed

@@ -6,11 +6,29 @@ import {
   EmbeddingOptions,
   EmbeddingResponse,
 } from "./types";
+import { ModelPricing } from "./pricing/types";
 import fs from "fs";
 import path from "path";
+export interface HttpClientOptions {
+  headers?: Record<string, string>;
+  timeout?: number;
+  extra_body?: Record<string, any>;
+}
 export class HttpClient implements GenericClient {
-  constructor(private baseUrl: string, private headers = {}) {}
+  /** Timeout in milliseconds for HTTP requests. Default: 30000 (30s). Use 0 to disable. */
+  private timeout: number;
+  private headers: Record<string, string>;
+  private extra_body: Record<string, any>;
+  /** Optional pricing table: model id → per-million-token prices */
+  private pricingMap: Record<string, ModelPricing> = {};
+  constructor(private baseUrl: string, options: HttpClientOptions = {}) {
+    this.headers = options.headers ?? {};
+    this.timeout = options.timeout ?? 30000;
+    this.extra_body = options.extra_body ?? {};
+  }
   private async withRetry<T>(fn: () => Promise<T>, retries = 3): Promise<T> {
     let lastError: any;
@@ -64,6 +82,52 @@ export class HttpClient implements GenericClient {
     this.setJwt(key);
   }
+  /**
+   * Supply a pricing map so that createChatCompletion / createEmbedding can
+   * calculate a local usd_cost from usage tokens when the provider does not
+   * return a cost field itself.
+   */
+  setPrices(pricingMap: Record<string, ModelPricing>) {
+    this.pricingMap = pricingMap;
+  }
+  /**
+   * Calculate USD cost for a completion/embedding call from token usage.
+   * Returns undefined if no pricing entry exists for the model.
+   */
+  calculateCost(
+    model: string,
+    usage: { prompt_tokens?: number; completion_tokens?: number; prompt_tokens_details?: { cached_tokens?: number } } | undefined
+  ): number | undefined {
+    if (!usage) return undefined;
+    const pricing = this.pricingMap[model];
+    if (!pricing) return undefined;
+    const cachedInputTokens =
+      usage.prompt_tokens_details?.cached_tokens ?? 0;
+    const inputTokens = usage.prompt_tokens ?? 0;
+    const outputTokens = usage.completion_tokens ?? 0;
+    const cachedInputCost = (cachedInputTokens * (pricing.cache_hit ?? pricing.cached_input ?? 0)) / 1e6;
+    const inputCost = ((inputTokens - cachedInputTokens) * (pricing.input ?? 0)) / 1e6;
+    const outputCost = (outputTokens * (pricing.output ?? 0)) / 1e6;
+    return cachedInputCost + inputCost + outputCost;
+  }
+  /**
+   * Apply extra options (timeout, headers, extra_body) after construction.
+   * Used by AIClient.resolveClient to honour per-provider config overrides
+   * even when the client is created via a known clientClass (e.g. nvidia, groq).
+   */
+  setOptions(options: Omit<HttpClientOptions, "headers"> & { headers?: Record<string, string> }) {
+    if (options.timeout !== undefined) this.timeout = options.timeout;
+    if (options.extra_body !== undefined) this.extra_body = options.extra_body;
+    if (options.headers) {
+      this.headers = { ...this.headers, ...options.headers };
+    }
+  }
   loadJwtFile(filePath: string) {
     try {
       const jwtFile = path.join(process.cwd(), filePath);
@@ -85,7 +149,8 @@ export class HttpClient implements GenericClient {
         ...options,
         model: options.model,
         messages: options.messages,
-        max_tokens: options.max_tokens || 3000,
+        max_tokens: options.max_tokens || 4000,
+        ...this.extra_body,
         ...(options.tools && {
           tools: options.tools,
@@ -96,7 +161,7 @@ export class HttpClient implements GenericClient {
       const response = await http.post(
         `${this.baseUrl}/v1/chat/completions`,
         body,
-        { headers: this.headers as Record<string, string> }
+        { headers: this.headers as Record<string, string>, timeout: this.timeout }
       );
       const data = response.data;
@@ -116,7 +181,125 @@ export class HttpClient implements GenericClient {
         })),
         model: data.model,
         usage: data.usage,
-        usd_cost: data.usd_cost,
+        usd_cost: data.usd_cost ?? this.calculateCost(options.model, data.usage),
+      };
+    });
+  }
+  /**
+   * Creates a completion using the Responses API (/v1/responses).
+   * Compatible with providers that implement the OpenAI Responses API spec
+   * (e.g. xAI at https://api.x.ai/v1/responses).
+   */
+  async createResponse(
+    options: CompletionOptions,
+    store = false
+  ): Promise<CompletionResponse> {
+    return this.withRetry(async () => {
+      // Extract system messages as instructions
+      const systemMessages = options.messages.filter((m) => m.role === "system");
+      const nonSystemMessages = options.messages.filter((m) => m.role !== "system");
+      const instructions = systemMessages
+        .map((m) => (typeof m.content === "string" ? m.content : ""))
+        .join("\n")
+        .trim() || undefined;
+      // Convert messages to Responses API input format
+      const input: any[] = nonSystemMessages.map((msg) => {
+        if (msg.role === "tool") {
+          return {
+            type: "function_call_output",
+            call_id: msg.tool_call_id,
+            output: typeof msg.content === "string" ? msg.content : JSON.stringify(msg.content),
+          };
+        }
+        if (msg.role === "assistant" && msg.tool_calls?.length) {
+          return (msg.tool_calls as any[]).map((tc: any) => ({
+            type: "function_call",
+            id: tc.id.startsWith("fc") ? tc.id : `fc_${tc.id}`,
+            call_id: tc.id,
+            name: tc.function.name,
+            arguments: tc.function.arguments,
+          }));
+        }
+        return {
+          role: msg.role,
+          content: typeof msg.content === "string" ? msg.content : JSON.stringify(msg.content),
+        };
+      }).flat();
+      const tools = options.tools?.map((tool) => ({
+        type: "function" as const,
+        name: tool.function.name,
+        description: tool.function.description,
+        parameters: tool.function.parameters as Record<string, unknown>,
+        strict: false,
+      }));
+      const body = {
+        model: options.model,
+        input,
+        ...(instructions && { instructions }),
+        ...(options.max_tokens && { max_output_tokens: options.max_tokens }),
+        ...(tools?.length && { tools, tool_choice: "auto" }),
+        store,
+        ...this.extra_body,
+      };
+      const response = await http.post(
+        `${this.baseUrl}/v1/responses`,
+        body,
+        { headers: this.headers as Record<string, string>, timeout: this.timeout }
+      );
+      const data = response.data;
+      if (data.error) {
+        throw new Error(JSON.stringify(data.error, null, 2));
+      }
+      // Map usage from Responses API format to Chat Completions format
+      const usage = data.usage
+        ? {
+            prompt_tokens: data.usage.input_tokens,
+            completion_tokens: data.usage.output_tokens,
+            total_tokens: data.usage.input_tokens + data.usage.output_tokens,
+          }
+        : undefined;
+      // Collect text content and tool calls from output items
+      let textContent: string | null = null;
+      const toolCalls: any[] = [];
+      for (const item of data.output ?? []) {
+        if (item.type === "message") {
+          for (const part of item.content ?? []) {
+            if (part.type === "output_text") {
+              textContent = (textContent ?? "") + part.text;
+            }
+          }
+        } else if (item.type === "function_call") {
+          toolCalls.push({
+            id: item.call_id,
+            type: "function",
+            function: { name: item.name, arguments: item.arguments },
+          });
+        }
+      }
+      return {
+        choices: [
+          {
+            message: {
+              role: "assistant",
+              content: textContent,
+              ...(toolCalls.length > 0 && { tool_calls: toolCalls }),
+            },
+          },
+        ],
+        model: data.model ?? options.model,
+        usage,
+        usd_cost: data.usd_cost ?? this.calculateCost(options.model, usage),
       };
     });
   }
@@ -129,7 +312,7 @@ export class HttpClient implements GenericClient {
           model: options.model,
           input: options.input,
         },
-        { headers: this.headers as Record<string, string> }
+        { headers: this.headers as Record<string, string>, timeout: this.timeout }
       );
       const data = response.data;
@@ -143,7 +326,7 @@ export class HttpClient implements GenericClient {
         data: data.data,
         model: options.model,
         usage: data.usage,
-        usd_cost: data.usd_cost,
+        usd_cost: data.usd_cost ?? this.calculateCost(options.model, data.usage),
       };
     });
   }
@@ -152,6 +335,7 @@ export class HttpClient implements GenericClient {
     return this.withRetry(async () => {
       const response = await http.get(`${this.baseUrl}/v1/models?type=${type}`, {
         headers: this.headers as Record<string, string>,
+        timeout: this.timeout,
       });
       const data = response.data?.data;

package/src/clients/index.ts CHANGED Viewed

@@ -43,6 +43,16 @@ import type {
   ModelType,
   ModelCatalogEntry,
 } from "./pricing/types";
+import { GenericCerebrasClient } from "./cerebras";
+import { GenericGroqClient } from "./groq";
+import { GenericGitHubModelsClient } from "./github";
+import { GenericNvidiaClient } from "./nvidia";
+import { GenericOpenRouterClient } from "./openrouter";
+import { GenericDeepSeekClient } from "./deepseek";
+import { GenericMistralClient } from "./mistral";
+import { GitHubCopilotClient } from "./copilot";
+import { GenericLlamaClient } from "./llama";
+import { GenericFireworksClient } from "./fireworks";
 export {
   OpenAiTextPricing,
   AnthropicTextPricing,
@@ -75,6 +85,18 @@ const BUILT_IN_PROVIDER_REGISTRY: Record<string, ProviderRegistryEntry> = {
   anthropic: { clientClass: GenericAnthropicClient },
   google: { clientClass: GenericGeminiClient },
   xai: { clientClass: GenericXAIClient },
+  cerebras: {
+    clientClass: GenericCerebrasClient,
+  },
+  groq: { clientClass: GenericGroqClient },
+  github: { clientClass: GenericGitHubModelsClient },
+  nvidia: { clientClass: GenericNvidiaClient },
+  openrouter: { clientClass: GenericOpenRouterClient },
+  deepseek: { clientClass: GenericDeepSeekClient },
+  mistral: { clientClass: GenericMistralClient },
+  "github-copilot": { clientClass: GitHubCopilotClient },
+  llama: { clientClass: GenericLlamaClient },
+  fireworks: { clientClass: GenericFireworksClient },
   knowhow: {
     createClient: (entry: ModelProvider) => {
       const jwt = loadKnowhowJwt();
@@ -94,7 +116,17 @@ const DEFAULT_PROVIDERS: ModelProvider[] = [
   { provider: "anthropic", envKey: "ANTHROPIC_API_KEY" },
   { provider: "google", envKey: "GEMINI_API_KEY" },
   { provider: "xai", envKey: "XAI_API_KEY" },
+  { provider: "cerebras", envKey: "CEREBRAS_API_KEY" },
   { provider: "knowhow" },
+  { provider: "groq", envKey: "GROQ_API_KEY" },
+  { provider: "github", envKey: "GITHUB_TOKEN" },
+  { provider: "nvidia", envKey: "NVIDIA_API_KEY" },
+  { provider: "openrouter", envKey: "OPENROUTER_API_KEY" },
+  { provider: "deepseek", envKey: "DEEPSEEK_API_KEY" },
+  { provider: "mistral", envKey: "MISTRAL_API_KEY" },
+  { provider: "github-copilot", envKey: "GITHUB_COPILOT_TOKEN" },
+  { provider: "llama", envKey: "LLAMA_API_KEY" },
+  { provider: "fireworks", envKey: "FIREWORKS_API_KEY" },
 ];
 export class AIClient {
@@ -153,19 +185,45 @@ export class AIClient {
         // envKey-based auth: env var must be present
         const envValue = process.env[effectiveEnvKey];
         if (!envValue) return null;
-        return new reg.clientClass(envValue);
+        const client = new reg.clientClass(envValue);
+        // Apply any extra options (timeout, headers, extra_body) from config
+        if (client instanceof HttpClient) {
+          client.setOptions({
+            timeout: entry.timeout,
+            headers: entry.headers,
+            extra_body: entry.extra_body,
+          });
+          if (entry.pricing) client.setPrices(entry.pricing);
+        }
+        return client;
       }
       // No envKey, no url — instantiate with no arg (client uses its own defaults)
-      return new reg.clientClass();
+      const client = new reg.clientClass();
+      // Apply any extra options (timeout, headers, extra_body) from config
+      if (client instanceof HttpClient) {
+        client.setOptions({
+          timeout: entry.timeout,
+          headers: entry.headers,
+          extra_body: entry.extra_body,
+        });
+        if (entry.pricing) client.setPrices(entry.pricing);
+      }
+      return client;
     }
     // 3. HTTP provider — requires url, no clientClass in registry
     if (entry.url) {
-      const client = new HttpClient(entry.url, entry.headers);
+      const client = new HttpClient(entry.url, {
+        headers: entry.headers,
+        timeout: entry.timeout,
+        extra_body: entry.extra_body,
+      });
       if (entry.jwtFile) {
         client.loadJwtFile(entry.jwtFile);
       }
+      // For custom HTTP providers, use entry.pricing if available
+      if (entry.pricing) client.setPrices(entry.pricing);
       return client;
     }
@@ -492,6 +550,52 @@ export class AIClient {
     return undefined;
   }
+  /**
+   * Normalize a model ID for fuzzy matching:
+   *   - lowercase
+   *   - replace dots with dashes (e.g. "claude-opus-4.7" → "claude-opus-4-7")
+   *   - strip variant suffixes like ":thinking", ":free"
+   *   - strip trailing date suffixes like "-20250514"
+   *   - strip trailing "-beta", "-preview", "-latest"
+   */
+  private static normalizeModelId(id: string): string {
+    return id
+      .toLowerCase()
+      .replace(/\./g, "-")
+      .replace(/:[^:]+$/, "")
+      .replace(/-\d{8}$/, "")
+      .replace(/-(beta|preview|latest|exp|rc\d*)$/i, "");
+  }
+  /**
+   * Fuzzy model lookup: given a model name (possibly without date suffix,
+   * with dots instead of dashes, etc.), find the best matching registered model.
+   *
+   * Example: "claude-3.7-sonnet" matches "claude-3-7-sonnet-20250219"
+   *          "gpt-4.1" matches "gpt-4.1" exactly
+   *
+   * @param modelQuery - the model name to search for (can be partial/normalized)
+   * @param provider   - optional provider to restrict search to
+   */
+  findModelFuzzy(modelQuery: string, provider?: string): { provider: string; model: string } | undefined {
+    const queryNorm = AIClient.normalizeModelId(modelQuery);
+    const providers = provider
+      ? [provider]
+      : Object.keys(this.clientModels);
+    for (const p of providers) {
+      const models = (this.clientModels[p] as string[]) ?? [];
+      for (const m of models) {
+        const mNorm = AIClient.normalizeModelId(m);
+        // Exact normalized match, OR our model is a dated variant of the query
+        if (mNorm === queryNorm || mNorm.startsWith(queryNorm + "-")) {
+          return { provider: p, model: m };
+        }
+      }
+    }
+    return undefined;
+  }
   // detects these formats:
   // "openai", "gpt-5"
   // "knowhow", "openai/gpt-5"
@@ -822,7 +926,6 @@ export class AIClient {
             id,
             provider,
             type,
-            displayName: id,
             pricing: p,
           });
         }
@@ -871,3 +974,12 @@ export * from "./gemini";
 export * from "./contextLimits";
 export * from "./xai";
 export * from "./knowhowMcp";
+export * from "./groq";
+export * from "./github";
+export * from "./nvidia";
+export * from "./openrouter";
+export * from "./deepseek";
+export * from "./mistral";
+export * from "./llama";
+export * from "./copilot";
+export * from "./fireworks";

package/src/clients/llama.ts ADDED Viewed

@@ -0,0 +1,16 @@
+import { HttpClient } from "./http";
+import { LlamaTextPricing } from "./pricing/llama";
+/**
+ * Meta Llama API client — OpenAI-compatible API
+ * https://llama.developer.meta.com/docs/
+ * Direct from Meta: free Llama 3.x, Llama 4, and Cerebras/Groq-hosted variants.
+ * Set env var LLAMA_API_KEY to enable.
+ */
+export class GenericLlamaClient extends HttpClient {
+  constructor(apiKey = process.env.LLAMA_API_KEY) {
+    super("https://api.llama.com/compat");
+    if (apiKey) this.setJwt(apiKey);
+    this.setPrices(LlamaTextPricing);
+  }
+}

package/src/clients/mistral.ts ADDED Viewed

@@ -0,0 +1,16 @@
+import { HttpClient } from "./http";
+import { MistralTextPricing } from "./pricing/mistral";
+/**
+ * Mistral AI client — OpenAI-compatible API
+ * https://docs.mistral.ai/api/
+ * Top European AI lab with Mistral Large, Codestral, and free Devstral coding model.
+ * Set env var MISTRAL_API_KEY to enable.
+ */
+export class GenericMistralClient extends HttpClient {
+  constructor(apiKey = process.env.MISTRAL_API_KEY) {
+    super("https://api.mistral.ai");
+    if (apiKey) this.setJwt(apiKey);
+    this.setPrices(MistralTextPricing);
+  }
+}

package/src/clients/nvidia.ts ADDED Viewed

@@ -0,0 +1,16 @@
+import { HttpClient } from "./http";
+import { NvidiaTextPricing } from "./pricing/nvidia";
+/**
+ * NVIDIA NIM client — OpenAI-compatible API
+ * https://build.nvidia.com/explore/discover
+ * 76+ free models including Llama, Mistral, Phi, Flux image generation.
+ * Set env var NVIDIA_API_KEY to enable.
+ */
+export class GenericNvidiaClient extends HttpClient {
+  constructor(apiKey = process.env.NVIDIA_API_KEY) {
+    super("https://integrate.api.nvidia.com");
+    if (apiKey) this.setJwt(apiKey);
+    this.setPrices(NvidiaTextPricing);
+  }
+}

package/src/clients/openai.ts CHANGED Viewed

@@ -34,12 +34,14 @@ import {
   EmbeddingModels,
   Models,
   OpenAiReasoningModels,
+  OpenAiChatModels,
   OpenAiResponsesOnlyModels,
   OpenAiImageModels,
   OpenAiVideoModels,
   OpenAiTTSModels,
   OpenAiTranscriptionModels,
-  OpenAiEmbeddingModels,
+  OpenAiEmbeddingModelsList,
+  OpenAiRealtimeModels,
 } from "../types";
 import { ModelModality } from "./types";
@@ -64,7 +66,11 @@ export class GenericOpenAiClient implements GenericClient {
   reasoningEffort(
     messages: CompletionOptions["messages"]
   ): "low" | "medium" | "high" {
-    const effortMap = {
+    return this.detectReasoningEffort(messages);
+  }
+  detectReasoningEffort(messages: CompletionOptions["messages"]): "low" | "medium" | "high" {
+    const effortMap: Record<string, "low" | "medium" | "high"> = {
       ultrathink: "high",
       "think hard": "high",
       "reason hard": "high",
@@ -96,6 +102,30 @@ export class GenericOpenAiClient implements GenericClient {
     return "medium"; // Default to medium if no specific effort is mentioned
   }
+  resolveReasoningEffort(options: CompletionOptions): "low" | "medium" | "high" {
+    return options.reasoning_effort ?? this.detectReasoningEffort(options.messages);
+  }
+  /**
+   * Resolves the reasoning effort for a specific model, clamping to the model's
+   * supported levels if `reasoningLevels` is set in its pricing entry.
+   * If the requested level is not supported, picks the lowest supported level.
+   */
+  resolveReasoningEffortForModel(options: CompletionOptions): string {
+    const requested = options.reasoning_effort ?? this.detectReasoningEffort(options.messages);
+    const pricing = OpenAiTextPricing[options.model];
+    const supportedLevels = pricing?.reasoningLevels;
+    if (!supportedLevels || supportedLevels.length === 0) {
+      return requested;
+    }
+    // If the requested level is supported, use it
+    if (supportedLevels.includes(requested)) {
+      return requested;
+    }
+    // Otherwise use the first (lowest) supported level
+    return supportedLevels[0];
+  }
   async createChatCompletion(
     options: CompletionOptions
   ): Promise<CompletionResponse> {
@@ -122,8 +152,8 @@ export class GenericOpenAiClient implements GenericClient {
       max_tokens: options.max_tokens,
       ...(OpenAiReasoningModels.includes(options.model) && {
         max_tokens: undefined,
-        max_completion_tokens: Math.max(options.max_tokens, 100),
-        reasoning_effort: this.reasoningEffort(options.messages),
+        max_completion_tokens: Math.max(options.max_tokens ?? 0, 16_000),
+        reasoning_effort: this.resolveReasoningEffort(options),
       }),
       ...(options.tools && {
@@ -254,7 +284,7 @@ export class GenericOpenAiClient implements GenericClient {
       // Don't limit max_output_tokens for Responses API - codex truncates tool call arguments when limited
       ...(OpenAiReasoningModels.includes(options.model) && {
         max_output_tokens: Math.max(options.max_tokens || 0, 16000),
-        reasoning: { effort: this.reasoningEffort(options.messages) },
+        reasoning: { effort: this.resolveReasoningEffortForModel(options) },
       }),
       ...(tools?.length && {
         tools,
@@ -349,14 +379,14 @@ export class GenericOpenAiClient implements GenericClient {
       ("prompt_tokens_details" in usage &&
         usage.prompt_tokens_details?.cached_tokens) ||
       0;
-    const cachedInputCost = (cachedInputTokens * pricing.cached_input) / 1e6;
+    const cachedInputCost = (cachedInputTokens * (pricing.cached_input ?? 0)) / 1e6;
     const inputTokens = usage.prompt_tokens;
-    const inputCost = ((inputTokens - cachedInputCost) * pricing.input) / 1e6;
+    const inputCost = ((inputTokens - cachedInputTokens) * (pricing.input ?? 0)) / 1e6;
     const outputTokens =
       ("completion_tokens" in usage && usage?.completion_tokens) || 0;
-    const outputCost = (outputTokens * pricing.output) / 1e6;
+    const outputCost = (outputTokens * (pricing.output ?? 0)) / 1e6;
     const total = cachedInputCost + inputCost + outputCost;
     return total;
@@ -365,8 +395,8 @@ export class GenericOpenAiClient implements GenericClient {
   async getModels(modality?: ModelModality): Promise<{ id: string }[]> {
     if (modality) {
       const map: Partial<Record<ModelModality, string[]>> = {
-        completion: Object.values(Models.openai),
-        embedding: OpenAiEmbeddingModels,
+        completion: [...new Set([...OpenAiChatModels, ...OpenAiResponsesOnlyModels])],
+        embedding: OpenAiEmbeddingModelsList,
         image: OpenAiImageModels,
         audio: [...OpenAiTTSModels, ...OpenAiTranscriptionModels],
         transcription: OpenAiTranscriptionModels,
@@ -406,7 +436,7 @@ export class GenericOpenAiClient implements GenericClient {
     }
     const response = await this.client.audio.transcriptions.create({
-      file: file,
+      file,
       model: options.model || "whisper-1",
       language: options.language,
       prompt: options.prompt,

package/src/clients/openrouter.ts ADDED Viewed

@@ -0,0 +1,17 @@
+import { HttpClient } from "./http";
+import { OpenRouterTextPricing } from "./pricing/openrouter";
+/**
+ * OpenRouter client — OpenAI-compatible API aggregator
+ * https://openrouter.ai/docs
+ * 39+ free models; append `:free` suffix to a model id for the free variant.
+ * One API key gives access to models from many providers.
+ * Set env var OPENROUTER_API_KEY to enable.
+ */
+export class GenericOpenRouterClient extends HttpClient {
+  constructor(apiKey = process.env.OPENROUTER_API_KEY) {
+    super("https://openrouter.ai/api");
+    if (apiKey) this.setJwt(apiKey);
+    this.setPrices(OpenRouterTextPricing);
+  }
+}