@tyvm/knowhow 0.0.105 → 0.0.107

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. package/CONFIG.md +8 -5
  2. package/package.json +3 -2
  3. package/scripts/check-model-pricing.ts +509 -0
  4. package/scripts/compare-openrouter-coverage.ts +576 -0
  5. package/src/agents/base/base.ts +169 -5
  6. package/src/agents/tools/execCommand.ts +4 -0
  7. package/src/agents/tools/executeScript/definition.ts +1 -1
  8. package/src/agents/tools/index.ts +0 -1
  9. package/src/agents/tools/list.ts +3 -43
  10. package/src/agents/tools/writeFile.ts +1 -1
  11. package/src/auth/browserLogin.ts +9 -4
  12. package/src/chat/modules/RemoteSyncModule.ts +3 -0
  13. package/src/cli.ts +31 -1
  14. package/src/clients/anthropic.ts +8 -2
  15. package/src/clients/cerebras.ts +10 -0
  16. package/src/clients/contextLimits.ts +7 -2
  17. package/src/clients/copilot.ts +23 -0
  18. package/src/clients/deepseek.ts +16 -0
  19. package/src/clients/fireworks.ts +15 -0
  20. package/src/clients/gemini.ts +59 -4
  21. package/src/clients/github.ts +16 -0
  22. package/src/clients/groq.ts +15 -0
  23. package/src/clients/http.ts +194 -6
  24. package/src/clients/index.ts +116 -4
  25. package/src/clients/llama.ts +16 -0
  26. package/src/clients/mistral.ts +16 -0
  27. package/src/clients/nvidia.ts +16 -0
  28. package/src/clients/openai.ts +53 -12
  29. package/src/clients/openrouter.ts +17 -0
  30. package/src/clients/pricing/anthropic.ts +105 -78
  31. package/src/clients/pricing/cerebras.ts +11 -0
  32. package/src/clients/pricing/copilot.ts +60 -0
  33. package/src/clients/pricing/deepseek.ts +15 -0
  34. package/src/clients/pricing/fireworks.ts +32 -0
  35. package/src/clients/pricing/github.ts +69 -0
  36. package/src/clients/pricing/google.ts +245 -206
  37. package/src/clients/pricing/groq.ts +56 -0
  38. package/src/clients/pricing/index.ts +42 -5
  39. package/src/clients/pricing/llama.ts +18 -0
  40. package/src/clients/pricing/mistral.ts +34 -0
  41. package/src/clients/pricing/models.ts +7 -236
  42. package/src/clients/pricing/nvidia.ts +102 -0
  43. package/src/clients/pricing/openai.ts +348 -171
  44. package/src/clients/pricing/openrouter.ts +36 -0
  45. package/src/clients/pricing/types.ts +83 -2
  46. package/src/clients/pricing/xai.ts +121 -65
  47. package/src/clients/types.ts +28 -1
  48. package/src/clients/xai.ts +161 -1
  49. package/src/fileSync.ts +8 -2
  50. package/src/login.ts +11 -3
  51. package/src/services/AgentSyncFs.ts +36 -12
  52. package/src/services/KnowhowClient.ts +11 -0
  53. package/src/services/LazyToolsService.ts +6 -0
  54. package/src/services/S3.ts +0 -7
  55. package/src/services/modules/index.ts +11 -2
  56. package/src/types.ts +56 -279
  57. package/src/worker.ts +174 -0
  58. package/tests/clients/AIClient.test.ts +1 -1
  59. package/tests/clients/anthropic.test.ts +202 -0
  60. package/tests/clients/pricing.test.ts +37 -0
  61. package/tests/manual/clients/completions.json +838 -226
  62. package/tests/manual/clients/completions.test.ts +46 -31
  63. package/ts_build/package.json +3 -2
  64. package/ts_build/src/agents/base/base.d.ts +18 -1
  65. package/ts_build/src/agents/base/base.js +111 -4
  66. package/ts_build/src/agents/base/base.js.map +1 -1
  67. package/ts_build/src/agents/tools/execCommand.js +3 -0
  68. package/ts_build/src/agents/tools/execCommand.js.map +1 -1
  69. package/ts_build/src/agents/tools/executeScript/definition.js +1 -1
  70. package/ts_build/src/agents/tools/executeScript/definition.js.map +1 -1
  71. package/ts_build/src/agents/tools/index.d.ts +0 -1
  72. package/ts_build/src/agents/tools/index.js +0 -1
  73. package/ts_build/src/agents/tools/index.js.map +1 -1
  74. package/ts_build/src/agents/tools/list.js +3 -38
  75. package/ts_build/src/agents/tools/list.js.map +1 -1
  76. package/ts_build/src/agents/tools/visionTool.d.ts +1 -1
  77. package/ts_build/src/agents/tools/writeFile.js +1 -1
  78. package/ts_build/src/agents/tools/writeFile.js.map +1 -1
  79. package/ts_build/src/ai.d.ts +1 -1
  80. package/ts_build/src/auth/browserLogin.d.ts +2 -1
  81. package/ts_build/src/auth/browserLogin.js +10 -3
  82. package/ts_build/src/auth/browserLogin.js.map +1 -1
  83. package/ts_build/src/chat/modules/RemoteSyncModule.js +1 -0
  84. package/ts_build/src/chat/modules/RemoteSyncModule.js.map +1 -1
  85. package/ts_build/src/cli.js +19 -0
  86. package/ts_build/src/cli.js.map +1 -1
  87. package/ts_build/src/clients/anthropic.d.ts +1 -82
  88. package/ts_build/src/clients/anthropic.js +8 -2
  89. package/ts_build/src/clients/anthropic.js.map +1 -1
  90. package/ts_build/src/clients/cerebras.d.ts +4 -0
  91. package/ts_build/src/clients/cerebras.js +14 -0
  92. package/ts_build/src/clients/cerebras.js.map +1 -0
  93. package/ts_build/src/clients/contextLimits.js +7 -2
  94. package/ts_build/src/clients/contextLimits.js.map +1 -1
  95. package/ts_build/src/clients/copilot.d.ts +4 -0
  96. package/ts_build/src/clients/copilot.js +15 -0
  97. package/ts_build/src/clients/copilot.js.map +1 -0
  98. package/ts_build/src/clients/deepseek.d.ts +4 -0
  99. package/ts_build/src/clients/deepseek.js +15 -0
  100. package/ts_build/src/clients/deepseek.js.map +1 -0
  101. package/ts_build/src/clients/fireworks.d.ts +4 -0
  102. package/ts_build/src/clients/fireworks.js +15 -0
  103. package/ts_build/src/clients/fireworks.js.map +1 -0
  104. package/ts_build/src/clients/gemini.d.ts +1 -0
  105. package/ts_build/src/clients/gemini.js +38 -2
  106. package/ts_build/src/clients/gemini.js.map +1 -1
  107. package/ts_build/src/clients/github.d.ts +4 -0
  108. package/ts_build/src/clients/github.js +15 -0
  109. package/ts_build/src/clients/github.js.map +1 -0
  110. package/ts_build/src/clients/groq.d.ts +4 -0
  111. package/ts_build/src/clients/groq.js +15 -0
  112. package/ts_build/src/clients/groq.js.map +1 -0
  113. package/ts_build/src/clients/http.d.ts +22 -1
  114. package/ts_build/src/clients/http.js +135 -7
  115. package/ts_build/src/clients/http.js.map +1 -1
  116. package/ts_build/src/clients/index.d.ts +14 -0
  117. package/ts_build/src/clients/index.js +94 -4
  118. package/ts_build/src/clients/index.js.map +1 -1
  119. package/ts_build/src/clients/llama.d.ts +4 -0
  120. package/ts_build/src/clients/llama.js +15 -0
  121. package/ts_build/src/clients/llama.js.map +1 -0
  122. package/ts_build/src/clients/mistral.d.ts +4 -0
  123. package/ts_build/src/clients/mistral.js +15 -0
  124. package/ts_build/src/clients/mistral.js.map +1 -0
  125. package/ts_build/src/clients/nvidia.d.ts +4 -0
  126. package/ts_build/src/clients/nvidia.js +15 -0
  127. package/ts_build/src/clients/nvidia.js.map +1 -0
  128. package/ts_build/src/clients/openai.d.ts +4 -206
  129. package/ts_build/src/clients/openai.js +38 -10
  130. package/ts_build/src/clients/openai.js.map +1 -1
  131. package/ts_build/src/clients/openrouter.d.ts +4 -0
  132. package/ts_build/src/clients/openrouter.js +15 -0
  133. package/ts_build/src/clients/openrouter.js.map +1 -0
  134. package/ts_build/src/clients/pricing/anthropic.d.ts +26 -78
  135. package/ts_build/src/clients/pricing/anthropic.js +75 -78
  136. package/ts_build/src/clients/pricing/anthropic.js.map +1 -1
  137. package/ts_build/src/clients/pricing/cerebras.d.ts +4 -0
  138. package/ts_build/src/clients/pricing/cerebras.js +11 -0
  139. package/ts_build/src/clients/pricing/cerebras.js.map +1 -0
  140. package/ts_build/src/clients/pricing/copilot.d.ts +5 -0
  141. package/ts_build/src/clients/pricing/copilot.js +35 -0
  142. package/ts_build/src/clients/pricing/copilot.js.map +1 -0
  143. package/ts_build/src/clients/pricing/deepseek.d.ts +5 -0
  144. package/ts_build/src/clients/pricing/deepseek.js +10 -0
  145. package/ts_build/src/clients/pricing/deepseek.js.map +1 -0
  146. package/ts_build/src/clients/pricing/fireworks.d.ts +5 -0
  147. package/ts_build/src/clients/pricing/fireworks.js +21 -0
  148. package/ts_build/src/clients/pricing/fireworks.js.map +1 -0
  149. package/ts_build/src/clients/pricing/github.d.ts +4 -0
  150. package/ts_build/src/clients/pricing/github.js +58 -0
  151. package/ts_build/src/clients/pricing/github.js.map +1 -0
  152. package/ts_build/src/clients/pricing/google.d.ts +59 -6
  153. package/ts_build/src/clients/pricing/google.js +214 -167
  154. package/ts_build/src/clients/pricing/google.js.map +1 -1
  155. package/ts_build/src/clients/pricing/groq.d.ts +5 -0
  156. package/ts_build/src/clients/pricing/groq.js +41 -0
  157. package/ts_build/src/clients/pricing/groq.js.map +1 -0
  158. package/ts_build/src/clients/pricing/index.d.ts +16 -5
  159. package/ts_build/src/clients/pricing/index.js +62 -7
  160. package/ts_build/src/clients/pricing/index.js.map +1 -1
  161. package/ts_build/src/clients/pricing/llama.d.ts +4 -0
  162. package/ts_build/src/clients/pricing/llama.js +14 -0
  163. package/ts_build/src/clients/pricing/llama.js.map +1 -0
  164. package/ts_build/src/clients/pricing/mistral.d.ts +5 -0
  165. package/ts_build/src/clients/pricing/mistral.js +23 -0
  166. package/ts_build/src/clients/pricing/mistral.js.map +1 -0
  167. package/ts_build/src/clients/pricing/models.d.ts +5 -4
  168. package/ts_build/src/clients/pricing/models.js +8 -162
  169. package/ts_build/src/clients/pricing/models.js.map +1 -1
  170. package/ts_build/src/clients/pricing/nvidia.d.ts +8 -0
  171. package/ts_build/src/clients/pricing/nvidia.js +96 -0
  172. package/ts_build/src/clients/pricing/nvidia.js.map +1 -0
  173. package/ts_build/src/clients/pricing/openai.d.ts +86 -197
  174. package/ts_build/src/clients/pricing/openai.js +295 -168
  175. package/ts_build/src/clients/pricing/openai.js.map +1 -1
  176. package/ts_build/src/clients/pricing/openrouter.d.ts +4 -0
  177. package/ts_build/src/clients/pricing/openrouter.js +29 -0
  178. package/ts_build/src/clients/pricing/openrouter.js.map +1 -0
  179. package/ts_build/src/clients/pricing/types.d.ts +27 -2
  180. package/ts_build/src/clients/pricing/types.js +46 -0
  181. package/ts_build/src/clients/pricing/types.js.map +1 -1
  182. package/ts_build/src/clients/pricing/xai.d.ts +37 -57
  183. package/ts_build/src/clients/pricing/xai.js +92 -59
  184. package/ts_build/src/clients/pricing/xai.js.map +1 -1
  185. package/ts_build/src/clients/types.d.ts +12 -1
  186. package/ts_build/src/clients/xai.d.ts +2 -62
  187. package/ts_build/src/clients/xai.js +132 -1
  188. package/ts_build/src/clients/xai.js.map +1 -1
  189. package/ts_build/src/fileSync.js +7 -2
  190. package/ts_build/src/fileSync.js.map +1 -1
  191. package/ts_build/src/login.js +8 -2
  192. package/ts_build/src/login.js.map +1 -1
  193. package/ts_build/src/services/AgentSyncFs.js +1 -0
  194. package/ts_build/src/services/AgentSyncFs.js.map +1 -1
  195. package/ts_build/src/services/KnowhowClient.d.ts +1 -0
  196. package/ts_build/src/services/KnowhowClient.js +7 -0
  197. package/ts_build/src/services/KnowhowClient.js.map +1 -1
  198. package/ts_build/src/services/LazyToolsService.d.ts +1 -0
  199. package/ts_build/src/services/LazyToolsService.js +3 -0
  200. package/ts_build/src/services/LazyToolsService.js.map +1 -1
  201. package/ts_build/src/services/S3.js +0 -7
  202. package/ts_build/src/services/S3.js.map +1 -1
  203. package/ts_build/src/services/modules/index.js +41 -1
  204. package/ts_build/src/services/modules/index.js.map +1 -1
  205. package/ts_build/src/types.d.ts +163 -124
  206. package/ts_build/src/types.js +33 -213
  207. package/ts_build/src/types.js.map +1 -1
  208. package/ts_build/src/worker.d.ts +4 -0
  209. package/ts_build/src/worker.js +140 -0
  210. package/ts_build/src/worker.js.map +1 -1
  211. package/ts_build/tests/clients/AIClient.test.js +1 -1
  212. package/ts_build/tests/clients/AIClient.test.js.map +1 -1
  213. package/ts_build/tests/clients/anthropic.test.d.ts +1 -0
  214. package/ts_build/tests/clients/anthropic.test.js +159 -0
  215. package/ts_build/tests/clients/anthropic.test.js.map +1 -0
  216. package/ts_build/tests/clients/pricing.test.js +21 -0
  217. package/ts_build/tests/clients/pricing.test.js.map +1 -1
  218. package/ts_build/tests/manual/clients/completions.test.js +27 -24
  219. package/ts_build/tests/manual/clients/completions.test.js.map +1 -1
@@ -16,15 +16,17 @@ import { wait } from "../utils";
16
16
  import {
17
17
  EmbeddingModels,
18
18
  Models,
19
+ GoogleThinkingLevelModels,
20
+ GoogleThinkingBudgetModels,
19
21
  GoogleImageModels,
20
22
  GoogleVideoModels,
21
23
  GoogleTTSModels,
22
- GoogleEmbeddingModels,
24
+ GoogleEmbeddingModelsList,
23
25
  GoogleReasoningModels,
24
26
  } from "../types";
25
27
  import { GeminiTextPricing } from "./pricing";
26
28
  import { ContextLimits } from "./contextLimits";
27
- import { ModelModality } from "./types";
29
+ import { ModelModality, TokenUsage } from "./types";
28
30
 
29
31
  import {
30
32
  GenericClient,
@@ -389,9 +391,49 @@ export class GenericGeminiClient implements GenericClient {
389
391
  return [{ functionDeclarations }];
390
392
  }
391
393
 
394
+ /**
395
+ * Builds the thinkingConfig for Gemini models that support it.
396
+ * - Gemini 3.x models use thinkingLevel: "minimal" | "low" | "medium" | "high"
397
+ * - Gemini 2.5 models use thinkingBudget: number (0 = off, -1 = dynamic)
398
+ *
399
+ * Maps CompletionOptions.reasoning_effort to provider-specific values.
400
+ */
401
+ buildThinkingConfig(options: CompletionOptions): Record<string, unknown> | undefined {
402
+ const model = options.model;
403
+ const effort = options.reasoning_effort ?? "low";
404
+
405
+ // Gemini 3.x — use thinkingLevel
406
+ if (GoogleThinkingLevelModels.includes(model)) {
407
+ const levelMap: Record<string, string> = {
408
+ low: "low",
409
+ medium: "medium",
410
+ high: "high",
411
+ };
412
+ return {
413
+ thinkingLevel: levelMap[effort] ?? "low",
414
+ };
415
+ }
416
+
417
+ // Gemini 2.5 — use thinkingBudget
418
+ if (GoogleThinkingBudgetModels.includes(model)) {
419
+ // Map effort to token budget
420
+ const budgetMap: Record<string, number> = {
421
+ low: 1024,
422
+ medium: 8192,
423
+ high: -1, // dynamic
424
+ };
425
+ return {
426
+ thinkingBudget: budgetMap[effort] ?? 1024,
427
+ };
428
+ }
429
+
430
+ return undefined;
431
+ }
432
+
392
433
  async createChatCompletion(
393
434
  options: CompletionOptions
394
435
  ): Promise<CompletionResponse> {
436
+ const thinkingConfig = this.buildThinkingConfig(options);
395
437
  const { systemInstruction, contents } = this.transformMessages(
396
438
  options.messages
397
439
  );
@@ -403,6 +445,7 @@ export class GenericGeminiClient implements GenericClient {
403
445
  contents,
404
446
  config: {
405
447
  systemInstruction,
448
+ thinkingConfig,
406
449
  tools: this.transformTools(options.tools),
407
450
  maxOutputTokens: options.max_tokens,
408
451
  },
@@ -481,10 +524,22 @@ export class GenericGeminiClient implements GenericClient {
481
524
  ? this.calculateCost(options.model, usage)
482
525
  : undefined;
483
526
 
527
+ // Map cachedContentTokenCount → prompt_tokens_details.cached_tokens so that
528
+ // base.ts can read cache hit tokens via usage.prompt_tokens_details?.cached_tokens
529
+ const cachedTokens = (usage as any)?.cachedContentTokenCount ?? 0;
530
+ const usageWithCache: TokenUsage | undefined = usage
531
+ ? ({
532
+ prompt_tokens: (usage as any).promptTokenCount ?? 0,
533
+ completion_tokens: (usage as any).candidatesTokenCount ?? 0,
534
+ total_tokens: (usage as any).totalTokenCount,
535
+ prompt_tokens_details: { cached_tokens: cachedTokens },
536
+ } as TokenUsage)
537
+ : undefined;
538
+
484
539
  return {
485
540
  choices,
486
541
  model: options.model,
487
- usage,
542
+ usage: usageWithCache,
488
543
  usd_cost: usdCost,
489
544
  };
490
545
  } catch (error) {
@@ -600,7 +655,7 @@ export class GenericGeminiClient implements GenericClient {
600
655
  if (modality) {
601
656
  const map: Partial<Record<ModelModality, string[]>> = {
602
657
  completion: GoogleReasoningModels,
603
- embedding: GoogleEmbeddingModels,
658
+ embedding: GoogleEmbeddingModelsList,
604
659
  image: GoogleImageModels,
605
660
  audio: GoogleTTSModels,
606
661
  video: GoogleVideoModels,
@@ -0,0 +1,16 @@
1
+ import { HttpClient } from "./http";
2
+ import { GitHubModelsTextPricing } from "./pricing/github";
3
+
4
+ /**
5
+ * GitHub Models client — OpenAI-compatible API
6
+ * https://docs.github.com/en/github-models
7
+ * Free access to premium models (GPT-4o, DeepSeek R1, Llama, Phi etc.) with a GitHub token.
8
+ * Set env var GITHUB_TOKEN to enable.
9
+ */
10
+ export class GenericGitHubModelsClient extends HttpClient {
11
+ constructor(apiKey = process.env.GITHUB_TOKEN) {
12
+ super("https://models.github.ai/inference");
13
+ if (apiKey) this.setJwt(apiKey);
14
+ this.setPrices(GitHubModelsTextPricing);
15
+ }
16
+ }
@@ -0,0 +1,15 @@
1
+ import { HttpClient } from "./http";
2
+ import { GroqTextPricing } from "./pricing/groq";
3
+
4
+ /**
5
+ * Groq client — OpenAI-compatible API (ultra-fast inference)
6
+ * https://console.groq.com/docs/openai
7
+ * Set env var GROQ_API_KEY to enable.
8
+ */
9
+ export class GenericGroqClient extends HttpClient {
10
+ constructor(apiKey = process.env.GROQ_API_KEY) {
11
+ super("https://api.groq.com/openai");
12
+ if (apiKey) this.setJwt(apiKey);
13
+ this.setPrices(GroqTextPricing);
14
+ }
15
+ }
@@ -6,11 +6,29 @@ import {
6
6
  EmbeddingOptions,
7
7
  EmbeddingResponse,
8
8
  } from "./types";
9
+ import { ModelPricing } from "./pricing/types";
9
10
  import fs from "fs";
10
11
  import path from "path";
11
12
 
13
+ export interface HttpClientOptions {
14
+ headers?: Record<string, string>;
15
+ timeout?: number;
16
+ extra_body?: Record<string, any>;
17
+ }
18
+
12
19
  export class HttpClient implements GenericClient {
13
- constructor(private baseUrl: string, private headers = {}) {}
20
+ /** Timeout in milliseconds for HTTP requests. Default: 30000 (30s). Use 0 to disable. */
21
+ private timeout: number;
22
+ private headers: Record<string, string>;
23
+ private extra_body: Record<string, any>;
24
+ /** Optional pricing table: model id → per-million-token prices */
25
+ private pricingMap: Record<string, ModelPricing> = {};
26
+
27
+ constructor(private baseUrl: string, options: HttpClientOptions = {}) {
28
+ this.headers = options.headers ?? {};
29
+ this.timeout = options.timeout ?? 30000;
30
+ this.extra_body = options.extra_body ?? {};
31
+ }
14
32
 
15
33
  private async withRetry<T>(fn: () => Promise<T>, retries = 3): Promise<T> {
16
34
  let lastError: any;
@@ -64,6 +82,52 @@ export class HttpClient implements GenericClient {
64
82
  this.setJwt(key);
65
83
  }
66
84
 
85
+ /**
86
+ * Supply a pricing map so that createChatCompletion / createEmbedding can
87
+ * calculate a local usd_cost from usage tokens when the provider does not
88
+ * return a cost field itself.
89
+ */
90
+ setPrices(pricingMap: Record<string, ModelPricing>) {
91
+ this.pricingMap = pricingMap;
92
+ }
93
+
94
+ /**
95
+ * Calculate USD cost for a completion/embedding call from token usage.
96
+ * Returns undefined if no pricing entry exists for the model.
97
+ */
98
+ calculateCost(
99
+ model: string,
100
+ usage: { prompt_tokens?: number; completion_tokens?: number; prompt_tokens_details?: { cached_tokens?: number } } | undefined
101
+ ): number | undefined {
102
+ if (!usage) return undefined;
103
+ const pricing = this.pricingMap[model];
104
+ if (!pricing) return undefined;
105
+
106
+ const cachedInputTokens =
107
+ usage.prompt_tokens_details?.cached_tokens ?? 0;
108
+ const inputTokens = usage.prompt_tokens ?? 0;
109
+ const outputTokens = usage.completion_tokens ?? 0;
110
+
111
+ const cachedInputCost = (cachedInputTokens * (pricing.cache_hit ?? pricing.cached_input ?? 0)) / 1e6;
112
+ const inputCost = ((inputTokens - cachedInputTokens) * (pricing.input ?? 0)) / 1e6;
113
+ const outputCost = (outputTokens * (pricing.output ?? 0)) / 1e6;
114
+
115
+ return cachedInputCost + inputCost + outputCost;
116
+ }
117
+
118
+ /**
119
+ * Apply extra options (timeout, headers, extra_body) after construction.
120
+ * Used by AIClient.resolveClient to honour per-provider config overrides
121
+ * even when the client is created via a known clientClass (e.g. nvidia, groq).
122
+ */
123
+ setOptions(options: Omit<HttpClientOptions, "headers"> & { headers?: Record<string, string> }) {
124
+ if (options.timeout !== undefined) this.timeout = options.timeout;
125
+ if (options.extra_body !== undefined) this.extra_body = options.extra_body;
126
+ if (options.headers) {
127
+ this.headers = { ...this.headers, ...options.headers };
128
+ }
129
+ }
130
+
67
131
  loadJwtFile(filePath: string) {
68
132
  try {
69
133
  const jwtFile = path.join(process.cwd(), filePath);
@@ -85,7 +149,8 @@ export class HttpClient implements GenericClient {
85
149
  ...options,
86
150
  model: options.model,
87
151
  messages: options.messages,
88
- max_tokens: options.max_tokens || 3000,
152
+ max_tokens: options.max_tokens || 4000,
153
+ ...this.extra_body,
89
154
 
90
155
  ...(options.tools && {
91
156
  tools: options.tools,
@@ -96,7 +161,7 @@ export class HttpClient implements GenericClient {
96
161
  const response = await http.post(
97
162
  `${this.baseUrl}/v1/chat/completions`,
98
163
  body,
99
- { headers: this.headers as Record<string, string> }
164
+ { headers: this.headers as Record<string, string>, timeout: this.timeout }
100
165
  );
101
166
 
102
167
  const data = response.data;
@@ -116,7 +181,129 @@ export class HttpClient implements GenericClient {
116
181
  })),
117
182
  model: data.model,
118
183
  usage: data.usage,
119
- usd_cost: data.usd_cost,
184
+ usd_cost: data.usd_cost ?? this.calculateCost(options.model, data.usage),
185
+ };
186
+ });
187
+ }
188
+
189
+ /**
190
+ * Creates a completion using the Responses API (/v1/responses).
191
+ * Compatible with providers that implement the OpenAI Responses API spec
192
+ * (e.g. xAI at https://api.x.ai/v1/responses).
193
+ */
194
+ async createResponse(
195
+ options: CompletionOptions,
196
+ store = false
197
+ ): Promise<CompletionResponse> {
198
+ return this.withRetry(async () => {
199
+ // Extract system messages as instructions
200
+ const systemMessages = options.messages.filter((m) => m.role === "system");
201
+ const nonSystemMessages = options.messages.filter((m) => m.role !== "system");
202
+ const instructions = systemMessages
203
+ .map((m) => (typeof m.content === "string" ? m.content : ""))
204
+ .join("\n")
205
+ .trim() || undefined;
206
+
207
+ // Convert messages to Responses API input format
208
+ const input: any[] = nonSystemMessages.map((msg) => {
209
+ if (msg.role === "tool") {
210
+ return {
211
+ type: "function_call_output",
212
+ call_id: msg.tool_call_id,
213
+ output: typeof msg.content === "string" ? msg.content : JSON.stringify(msg.content),
214
+ };
215
+ }
216
+ if (msg.role === "assistant" && msg.tool_calls?.length) {
217
+ return (msg.tool_calls as any[]).map((tc: any) => ({
218
+ type: "function_call",
219
+ id: tc.id.startsWith("fc") ? tc.id : `fc_${tc.id}`,
220
+ call_id: tc.id,
221
+ name: tc.function.name,
222
+ arguments: tc.function.arguments,
223
+ }));
224
+ }
225
+ return {
226
+ role: msg.role,
227
+ content: typeof msg.content === "string" ? msg.content : JSON.stringify(msg.content),
228
+ };
229
+ }).flat();
230
+
231
+ const tools = options.tools?.map((tool) => ({
232
+ type: "function" as const,
233
+ name: tool.function.name,
234
+ description: tool.function.description,
235
+ parameters: tool.function.parameters as Record<string, unknown>,
236
+ strict: false,
237
+ }));
238
+
239
+ const body = {
240
+ model: options.model,
241
+ input,
242
+ ...(instructions && { instructions }),
243
+ ...(options.max_tokens && { max_output_tokens: options.max_tokens }),
244
+ ...(tools?.length && { tools, tool_choice: "auto" }),
245
+ store,
246
+ ...this.extra_body,
247
+ };
248
+
249
+ const response = await http.post(
250
+ `${this.baseUrl}/v1/responses`,
251
+ body,
252
+ { headers: this.headers as Record<string, string>, timeout: this.timeout }
253
+ );
254
+
255
+ const data = response.data;
256
+
257
+ if (data.error) {
258
+ throw new Error(JSON.stringify(data.error, null, 2));
259
+ }
260
+
261
+ // Map usage from Responses API format to Chat Completions format
262
+ const usage = data.usage
263
+ ? {
264
+ prompt_tokens: data.usage.input_tokens,
265
+ completion_tokens: data.usage.output_tokens,
266
+ total_tokens: data.usage.input_tokens + data.usage.output_tokens,
267
+ prompt_tokens_details: {
268
+ cached_tokens:
269
+ data.usage.input_tokens_details?.cached_tokens ?? 0,
270
+ },
271
+ }
272
+ : undefined;
273
+
274
+ // Collect text content and tool calls from output items
275
+ let textContent: string | null = null;
276
+ const toolCalls: any[] = [];
277
+
278
+ for (const item of data.output ?? []) {
279
+ if (item.type === "message") {
280
+ for (const part of item.content ?? []) {
281
+ if (part.type === "output_text") {
282
+ textContent = (textContent ?? "") + part.text;
283
+ }
284
+ }
285
+ } else if (item.type === "function_call") {
286
+ toolCalls.push({
287
+ id: item.call_id,
288
+ type: "function",
289
+ function: { name: item.name, arguments: item.arguments },
290
+ });
291
+ }
292
+ }
293
+
294
+ return {
295
+ choices: [
296
+ {
297
+ message: {
298
+ role: "assistant",
299
+ content: textContent,
300
+ ...(toolCalls.length > 0 && { tool_calls: toolCalls }),
301
+ },
302
+ },
303
+ ],
304
+ model: data.model ?? options.model,
305
+ usage,
306
+ usd_cost: data.usd_cost ?? this.calculateCost(options.model, usage),
120
307
  };
121
308
  });
122
309
  }
@@ -129,7 +316,7 @@ export class HttpClient implements GenericClient {
129
316
  model: options.model,
130
317
  input: options.input,
131
318
  },
132
- { headers: this.headers as Record<string, string> }
319
+ { headers: this.headers as Record<string, string>, timeout: this.timeout }
133
320
  );
134
321
 
135
322
  const data = response.data;
@@ -143,7 +330,7 @@ export class HttpClient implements GenericClient {
143
330
  data: data.data,
144
331
  model: options.model,
145
332
  usage: data.usage,
146
- usd_cost: data.usd_cost,
333
+ usd_cost: data.usd_cost ?? this.calculateCost(options.model, data.usage),
147
334
  };
148
335
  });
149
336
  }
@@ -152,6 +339,7 @@ export class HttpClient implements GenericClient {
152
339
  return this.withRetry(async () => {
153
340
  const response = await http.get(`${this.baseUrl}/v1/models?type=${type}`, {
154
341
  headers: this.headers as Record<string, string>,
342
+ timeout: this.timeout,
155
343
  });
156
344
 
157
345
  const data = response.data?.data;
@@ -43,6 +43,16 @@ import type {
43
43
  ModelType,
44
44
  ModelCatalogEntry,
45
45
  } from "./pricing/types";
46
+ import { GenericCerebrasClient } from "./cerebras";
47
+ import { GenericGroqClient } from "./groq";
48
+ import { GenericGitHubModelsClient } from "./github";
49
+ import { GenericNvidiaClient } from "./nvidia";
50
+ import { GenericOpenRouterClient } from "./openrouter";
51
+ import { GenericDeepSeekClient } from "./deepseek";
52
+ import { GenericMistralClient } from "./mistral";
53
+ import { GitHubCopilotClient } from "./copilot";
54
+ import { GenericLlamaClient } from "./llama";
55
+ import { GenericFireworksClient } from "./fireworks";
46
56
  export {
47
57
  OpenAiTextPricing,
48
58
  AnthropicTextPricing,
@@ -75,6 +85,18 @@ const BUILT_IN_PROVIDER_REGISTRY: Record<string, ProviderRegistryEntry> = {
75
85
  anthropic: { clientClass: GenericAnthropicClient },
76
86
  google: { clientClass: GenericGeminiClient },
77
87
  xai: { clientClass: GenericXAIClient },
88
+ cerebras: {
89
+ clientClass: GenericCerebrasClient,
90
+ },
91
+ groq: { clientClass: GenericGroqClient },
92
+ github: { clientClass: GenericGitHubModelsClient },
93
+ nvidia: { clientClass: GenericNvidiaClient },
94
+ openrouter: { clientClass: GenericOpenRouterClient },
95
+ deepseek: { clientClass: GenericDeepSeekClient },
96
+ mistral: { clientClass: GenericMistralClient },
97
+ "github-copilot": { clientClass: GitHubCopilotClient },
98
+ llama: { clientClass: GenericLlamaClient },
99
+ fireworks: { clientClass: GenericFireworksClient },
78
100
  knowhow: {
79
101
  createClient: (entry: ModelProvider) => {
80
102
  const jwt = loadKnowhowJwt();
@@ -94,7 +116,17 @@ const DEFAULT_PROVIDERS: ModelProvider[] = [
94
116
  { provider: "anthropic", envKey: "ANTHROPIC_API_KEY" },
95
117
  { provider: "google", envKey: "GEMINI_API_KEY" },
96
118
  { provider: "xai", envKey: "XAI_API_KEY" },
119
+ { provider: "cerebras", envKey: "CEREBRAS_API_KEY" },
97
120
  { provider: "knowhow" },
121
+ { provider: "groq", envKey: "GROQ_API_KEY" },
122
+ { provider: "github", envKey: "GITHUB_TOKEN" },
123
+ { provider: "nvidia", envKey: "NVIDIA_API_KEY" },
124
+ { provider: "openrouter", envKey: "OPENROUTER_API_KEY" },
125
+ { provider: "deepseek", envKey: "DEEPSEEK_API_KEY" },
126
+ { provider: "mistral", envKey: "MISTRAL_API_KEY" },
127
+ { provider: "github-copilot", envKey: "GITHUB_COPILOT_TOKEN" },
128
+ { provider: "llama", envKey: "LLAMA_API_KEY" },
129
+ { provider: "fireworks", envKey: "FIREWORKS_API_KEY" },
98
130
  ];
99
131
 
100
132
  export class AIClient {
@@ -153,19 +185,45 @@ export class AIClient {
153
185
  // envKey-based auth: env var must be present
154
186
  const envValue = process.env[effectiveEnvKey];
155
187
  if (!envValue) return null;
156
- return new reg.clientClass(envValue);
188
+ const client = new reg.clientClass(envValue);
189
+ // Apply any extra options (timeout, headers, extra_body) from config
190
+ if (client instanceof HttpClient) {
191
+ client.setOptions({
192
+ timeout: entry.timeout,
193
+ headers: entry.headers,
194
+ extra_body: entry.extra_body,
195
+ });
196
+ if (entry.pricing) client.setPrices(entry.pricing);
197
+ }
198
+ return client;
157
199
  }
158
200
 
159
201
  // No envKey, no url — instantiate with no arg (client uses its own defaults)
160
- return new reg.clientClass();
202
+ const client = new reg.clientClass();
203
+ // Apply any extra options (timeout, headers, extra_body) from config
204
+ if (client instanceof HttpClient) {
205
+ client.setOptions({
206
+ timeout: entry.timeout,
207
+ headers: entry.headers,
208
+ extra_body: entry.extra_body,
209
+ });
210
+ if (entry.pricing) client.setPrices(entry.pricing);
211
+ }
212
+ return client;
161
213
  }
162
214
 
163
215
  // 3. HTTP provider — requires url, no clientClass in registry
164
216
  if (entry.url) {
165
- const client = new HttpClient(entry.url, entry.headers);
217
+ const client = new HttpClient(entry.url, {
218
+ headers: entry.headers,
219
+ timeout: entry.timeout,
220
+ extra_body: entry.extra_body,
221
+ });
166
222
  if (entry.jwtFile) {
167
223
  client.loadJwtFile(entry.jwtFile);
168
224
  }
225
+ // For custom HTTP providers, use entry.pricing if available
226
+ if (entry.pricing) client.setPrices(entry.pricing);
169
227
  return client;
170
228
  }
171
229
 
@@ -492,6 +550,52 @@ export class AIClient {
492
550
  return undefined;
493
551
  }
494
552
 
553
+ /**
554
+ * Normalize a model ID for fuzzy matching:
555
+ * - lowercase
556
+ * - replace dots with dashes (e.g. "claude-opus-4.7" → "claude-opus-4-7")
557
+ * - strip variant suffixes like ":thinking", ":free"
558
+ * - strip trailing date suffixes like "-20250514"
559
+ * - strip trailing "-beta", "-preview", "-latest"
560
+ */
561
+ private static normalizeModelId(id: string): string {
562
+ return id
563
+ .toLowerCase()
564
+ .replace(/\./g, "-")
565
+ .replace(/:[^:]+$/, "")
566
+ .replace(/-\d{8}$/, "")
567
+ .replace(/-(beta|preview|latest|exp|rc\d*)$/i, "");
568
+ }
569
+
570
+ /**
571
+ * Fuzzy model lookup: given a model name (possibly without date suffix,
572
+ * with dots instead of dashes, etc.), find the best matching registered model.
573
+ *
574
+ * Example: "claude-3.7-sonnet" matches "claude-3-7-sonnet-20250219"
575
+ * "gpt-4.1" matches "gpt-4.1" exactly
576
+ *
577
+ * @param modelQuery - the model name to search for (can be partial/normalized)
578
+ * @param provider - optional provider to restrict search to
579
+ */
580
+ findModelFuzzy(modelQuery: string, provider?: string): { provider: string; model: string } | undefined {
581
+ const queryNorm = AIClient.normalizeModelId(modelQuery);
582
+ const providers = provider
583
+ ? [provider]
584
+ : Object.keys(this.clientModels);
585
+
586
+ for (const p of providers) {
587
+ const models = (this.clientModels[p] as string[]) ?? [];
588
+ for (const m of models) {
589
+ const mNorm = AIClient.normalizeModelId(m);
590
+ // Exact normalized match, OR our model is a dated variant of the query
591
+ if (mNorm === queryNorm || mNorm.startsWith(queryNorm + "-")) {
592
+ return { provider: p, model: m };
593
+ }
594
+ }
595
+ }
596
+ return undefined;
597
+ }
598
+
495
599
  // detects these formats:
496
600
  // "openai", "gpt-5"
497
601
  // "knowhow", "openai/gpt-5"
@@ -822,7 +926,6 @@ export class AIClient {
822
926
  id,
823
927
  provider,
824
928
  type,
825
- displayName: id,
826
929
  pricing: p,
827
930
  });
828
931
  }
@@ -871,3 +974,12 @@ export * from "./gemini";
871
974
  export * from "./contextLimits";
872
975
  export * from "./xai";
873
976
  export * from "./knowhowMcp";
977
+ export * from "./groq";
978
+ export * from "./github";
979
+ export * from "./nvidia";
980
+ export * from "./openrouter";
981
+ export * from "./deepseek";
982
+ export * from "./mistral";
983
+ export * from "./llama";
984
+ export * from "./copilot";
985
+ export * from "./fireworks";
@@ -0,0 +1,16 @@
1
+ import { HttpClient } from "./http";
2
+ import { LlamaTextPricing } from "./pricing/llama";
3
+
4
+ /**
5
+ * Meta Llama API client — OpenAI-compatible API
6
+ * https://llama.developer.meta.com/docs/
7
+ * Direct from Meta: free Llama 3.x, Llama 4, and Cerebras/Groq-hosted variants.
8
+ * Set env var LLAMA_API_KEY to enable.
9
+ */
10
+ export class GenericLlamaClient extends HttpClient {
11
+ constructor(apiKey = process.env.LLAMA_API_KEY) {
12
+ super("https://api.llama.com/compat");
13
+ if (apiKey) this.setJwt(apiKey);
14
+ this.setPrices(LlamaTextPricing);
15
+ }
16
+ }
@@ -0,0 +1,16 @@
1
+ import { HttpClient } from "./http";
2
+ import { MistralTextPricing } from "./pricing/mistral";
3
+
4
+ /**
5
+ * Mistral AI client — OpenAI-compatible API
6
+ * https://docs.mistral.ai/api/
7
+ * Top European AI lab with Mistral Large, Codestral, and free Devstral coding model.
8
+ * Set env var MISTRAL_API_KEY to enable.
9
+ */
10
+ export class GenericMistralClient extends HttpClient {
11
+ constructor(apiKey = process.env.MISTRAL_API_KEY) {
12
+ super("https://api.mistral.ai");
13
+ if (apiKey) this.setJwt(apiKey);
14
+ this.setPrices(MistralTextPricing);
15
+ }
16
+ }
@@ -0,0 +1,16 @@
1
+ import { HttpClient } from "./http";
2
+ import { NvidiaTextPricing } from "./pricing/nvidia";
3
+
4
+ /**
5
+ * NVIDIA NIM client — OpenAI-compatible API
6
+ * https://build.nvidia.com/explore/discover
7
+ * 76+ free models including Llama, Mistral, Phi, Flux image generation.
8
+ * Set env var NVIDIA_API_KEY to enable.
9
+ */
10
+ export class GenericNvidiaClient extends HttpClient {
11
+ constructor(apiKey = process.env.NVIDIA_API_KEY) {
12
+ super("https://integrate.api.nvidia.com");
13
+ if (apiKey) this.setJwt(apiKey);
14
+ this.setPrices(NvidiaTextPricing);
15
+ }
16
+ }