@tyvm/knowhow 0.0.105 → 0.0.107
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONFIG.md +8 -5
- package/package.json +3 -2
- package/scripts/check-model-pricing.ts +509 -0
- package/scripts/compare-openrouter-coverage.ts +576 -0
- package/src/agents/base/base.ts +169 -5
- package/src/agents/tools/execCommand.ts +4 -0
- package/src/agents/tools/executeScript/definition.ts +1 -1
- package/src/agents/tools/index.ts +0 -1
- package/src/agents/tools/list.ts +3 -43
- package/src/agents/tools/writeFile.ts +1 -1
- package/src/auth/browserLogin.ts +9 -4
- package/src/chat/modules/RemoteSyncModule.ts +3 -0
- package/src/cli.ts +31 -1
- package/src/clients/anthropic.ts +8 -2
- package/src/clients/cerebras.ts +10 -0
- package/src/clients/contextLimits.ts +7 -2
- package/src/clients/copilot.ts +23 -0
- package/src/clients/deepseek.ts +16 -0
- package/src/clients/fireworks.ts +15 -0
- package/src/clients/gemini.ts +59 -4
- package/src/clients/github.ts +16 -0
- package/src/clients/groq.ts +15 -0
- package/src/clients/http.ts +194 -6
- package/src/clients/index.ts +116 -4
- package/src/clients/llama.ts +16 -0
- package/src/clients/mistral.ts +16 -0
- package/src/clients/nvidia.ts +16 -0
- package/src/clients/openai.ts +53 -12
- package/src/clients/openrouter.ts +17 -0
- package/src/clients/pricing/anthropic.ts +105 -78
- package/src/clients/pricing/cerebras.ts +11 -0
- package/src/clients/pricing/copilot.ts +60 -0
- package/src/clients/pricing/deepseek.ts +15 -0
- package/src/clients/pricing/fireworks.ts +32 -0
- package/src/clients/pricing/github.ts +69 -0
- package/src/clients/pricing/google.ts +245 -206
- package/src/clients/pricing/groq.ts +56 -0
- package/src/clients/pricing/index.ts +42 -5
- package/src/clients/pricing/llama.ts +18 -0
- package/src/clients/pricing/mistral.ts +34 -0
- package/src/clients/pricing/models.ts +7 -236
- package/src/clients/pricing/nvidia.ts +102 -0
- package/src/clients/pricing/openai.ts +348 -171
- package/src/clients/pricing/openrouter.ts +36 -0
- package/src/clients/pricing/types.ts +83 -2
- package/src/clients/pricing/xai.ts +121 -65
- package/src/clients/types.ts +28 -1
- package/src/clients/xai.ts +161 -1
- package/src/fileSync.ts +8 -2
- package/src/login.ts +11 -3
- package/src/services/AgentSyncFs.ts +36 -12
- package/src/services/KnowhowClient.ts +11 -0
- package/src/services/LazyToolsService.ts +6 -0
- package/src/services/S3.ts +0 -7
- package/src/services/modules/index.ts +11 -2
- package/src/types.ts +56 -279
- package/src/worker.ts +174 -0
- package/tests/clients/AIClient.test.ts +1 -1
- package/tests/clients/anthropic.test.ts +202 -0
- package/tests/clients/pricing.test.ts +37 -0
- package/tests/manual/clients/completions.json +838 -226
- package/tests/manual/clients/completions.test.ts +46 -31
- package/ts_build/package.json +3 -2
- package/ts_build/src/agents/base/base.d.ts +18 -1
- package/ts_build/src/agents/base/base.js +111 -4
- package/ts_build/src/agents/base/base.js.map +1 -1
- package/ts_build/src/agents/tools/execCommand.js +3 -0
- package/ts_build/src/agents/tools/execCommand.js.map +1 -1
- package/ts_build/src/agents/tools/executeScript/definition.js +1 -1
- package/ts_build/src/agents/tools/executeScript/definition.js.map +1 -1
- package/ts_build/src/agents/tools/index.d.ts +0 -1
- package/ts_build/src/agents/tools/index.js +0 -1
- package/ts_build/src/agents/tools/index.js.map +1 -1
- package/ts_build/src/agents/tools/list.js +3 -38
- package/ts_build/src/agents/tools/list.js.map +1 -1
- package/ts_build/src/agents/tools/visionTool.d.ts +1 -1
- package/ts_build/src/agents/tools/writeFile.js +1 -1
- package/ts_build/src/agents/tools/writeFile.js.map +1 -1
- package/ts_build/src/ai.d.ts +1 -1
- package/ts_build/src/auth/browserLogin.d.ts +2 -1
- package/ts_build/src/auth/browserLogin.js +10 -3
- package/ts_build/src/auth/browserLogin.js.map +1 -1
- package/ts_build/src/chat/modules/RemoteSyncModule.js +1 -0
- package/ts_build/src/chat/modules/RemoteSyncModule.js.map +1 -1
- package/ts_build/src/cli.js +19 -0
- package/ts_build/src/cli.js.map +1 -1
- package/ts_build/src/clients/anthropic.d.ts +1 -82
- package/ts_build/src/clients/anthropic.js +8 -2
- package/ts_build/src/clients/anthropic.js.map +1 -1
- package/ts_build/src/clients/cerebras.d.ts +4 -0
- package/ts_build/src/clients/cerebras.js +14 -0
- package/ts_build/src/clients/cerebras.js.map +1 -0
- package/ts_build/src/clients/contextLimits.js +7 -2
- package/ts_build/src/clients/contextLimits.js.map +1 -1
- package/ts_build/src/clients/copilot.d.ts +4 -0
- package/ts_build/src/clients/copilot.js +15 -0
- package/ts_build/src/clients/copilot.js.map +1 -0
- package/ts_build/src/clients/deepseek.d.ts +4 -0
- package/ts_build/src/clients/deepseek.js +15 -0
- package/ts_build/src/clients/deepseek.js.map +1 -0
- package/ts_build/src/clients/fireworks.d.ts +4 -0
- package/ts_build/src/clients/fireworks.js +15 -0
- package/ts_build/src/clients/fireworks.js.map +1 -0
- package/ts_build/src/clients/gemini.d.ts +1 -0
- package/ts_build/src/clients/gemini.js +38 -2
- package/ts_build/src/clients/gemini.js.map +1 -1
- package/ts_build/src/clients/github.d.ts +4 -0
- package/ts_build/src/clients/github.js +15 -0
- package/ts_build/src/clients/github.js.map +1 -0
- package/ts_build/src/clients/groq.d.ts +4 -0
- package/ts_build/src/clients/groq.js +15 -0
- package/ts_build/src/clients/groq.js.map +1 -0
- package/ts_build/src/clients/http.d.ts +22 -1
- package/ts_build/src/clients/http.js +135 -7
- package/ts_build/src/clients/http.js.map +1 -1
- package/ts_build/src/clients/index.d.ts +14 -0
- package/ts_build/src/clients/index.js +94 -4
- package/ts_build/src/clients/index.js.map +1 -1
- package/ts_build/src/clients/llama.d.ts +4 -0
- package/ts_build/src/clients/llama.js +15 -0
- package/ts_build/src/clients/llama.js.map +1 -0
- package/ts_build/src/clients/mistral.d.ts +4 -0
- package/ts_build/src/clients/mistral.js +15 -0
- package/ts_build/src/clients/mistral.js.map +1 -0
- package/ts_build/src/clients/nvidia.d.ts +4 -0
- package/ts_build/src/clients/nvidia.js +15 -0
- package/ts_build/src/clients/nvidia.js.map +1 -0
- package/ts_build/src/clients/openai.d.ts +4 -206
- package/ts_build/src/clients/openai.js +38 -10
- package/ts_build/src/clients/openai.js.map +1 -1
- package/ts_build/src/clients/openrouter.d.ts +4 -0
- package/ts_build/src/clients/openrouter.js +15 -0
- package/ts_build/src/clients/openrouter.js.map +1 -0
- package/ts_build/src/clients/pricing/anthropic.d.ts +26 -78
- package/ts_build/src/clients/pricing/anthropic.js +75 -78
- package/ts_build/src/clients/pricing/anthropic.js.map +1 -1
- package/ts_build/src/clients/pricing/cerebras.d.ts +4 -0
- package/ts_build/src/clients/pricing/cerebras.js +11 -0
- package/ts_build/src/clients/pricing/cerebras.js.map +1 -0
- package/ts_build/src/clients/pricing/copilot.d.ts +5 -0
- package/ts_build/src/clients/pricing/copilot.js +35 -0
- package/ts_build/src/clients/pricing/copilot.js.map +1 -0
- package/ts_build/src/clients/pricing/deepseek.d.ts +5 -0
- package/ts_build/src/clients/pricing/deepseek.js +10 -0
- package/ts_build/src/clients/pricing/deepseek.js.map +1 -0
- package/ts_build/src/clients/pricing/fireworks.d.ts +5 -0
- package/ts_build/src/clients/pricing/fireworks.js +21 -0
- package/ts_build/src/clients/pricing/fireworks.js.map +1 -0
- package/ts_build/src/clients/pricing/github.d.ts +4 -0
- package/ts_build/src/clients/pricing/github.js +58 -0
- package/ts_build/src/clients/pricing/github.js.map +1 -0
- package/ts_build/src/clients/pricing/google.d.ts +59 -6
- package/ts_build/src/clients/pricing/google.js +214 -167
- package/ts_build/src/clients/pricing/google.js.map +1 -1
- package/ts_build/src/clients/pricing/groq.d.ts +5 -0
- package/ts_build/src/clients/pricing/groq.js +41 -0
- package/ts_build/src/clients/pricing/groq.js.map +1 -0
- package/ts_build/src/clients/pricing/index.d.ts +16 -5
- package/ts_build/src/clients/pricing/index.js +62 -7
- package/ts_build/src/clients/pricing/index.js.map +1 -1
- package/ts_build/src/clients/pricing/llama.d.ts +4 -0
- package/ts_build/src/clients/pricing/llama.js +14 -0
- package/ts_build/src/clients/pricing/llama.js.map +1 -0
- package/ts_build/src/clients/pricing/mistral.d.ts +5 -0
- package/ts_build/src/clients/pricing/mistral.js +23 -0
- package/ts_build/src/clients/pricing/mistral.js.map +1 -0
- package/ts_build/src/clients/pricing/models.d.ts +5 -4
- package/ts_build/src/clients/pricing/models.js +8 -162
- package/ts_build/src/clients/pricing/models.js.map +1 -1
- package/ts_build/src/clients/pricing/nvidia.d.ts +8 -0
- package/ts_build/src/clients/pricing/nvidia.js +96 -0
- package/ts_build/src/clients/pricing/nvidia.js.map +1 -0
- package/ts_build/src/clients/pricing/openai.d.ts +86 -197
- package/ts_build/src/clients/pricing/openai.js +295 -168
- package/ts_build/src/clients/pricing/openai.js.map +1 -1
- package/ts_build/src/clients/pricing/openrouter.d.ts +4 -0
- package/ts_build/src/clients/pricing/openrouter.js +29 -0
- package/ts_build/src/clients/pricing/openrouter.js.map +1 -0
- package/ts_build/src/clients/pricing/types.d.ts +27 -2
- package/ts_build/src/clients/pricing/types.js +46 -0
- package/ts_build/src/clients/pricing/types.js.map +1 -1
- package/ts_build/src/clients/pricing/xai.d.ts +37 -57
- package/ts_build/src/clients/pricing/xai.js +92 -59
- package/ts_build/src/clients/pricing/xai.js.map +1 -1
- package/ts_build/src/clients/types.d.ts +12 -1
- package/ts_build/src/clients/xai.d.ts +2 -62
- package/ts_build/src/clients/xai.js +132 -1
- package/ts_build/src/clients/xai.js.map +1 -1
- package/ts_build/src/fileSync.js +7 -2
- package/ts_build/src/fileSync.js.map +1 -1
- package/ts_build/src/login.js +8 -2
- package/ts_build/src/login.js.map +1 -1
- package/ts_build/src/services/AgentSyncFs.js +1 -0
- package/ts_build/src/services/AgentSyncFs.js.map +1 -1
- package/ts_build/src/services/KnowhowClient.d.ts +1 -0
- package/ts_build/src/services/KnowhowClient.js +7 -0
- package/ts_build/src/services/KnowhowClient.js.map +1 -1
- package/ts_build/src/services/LazyToolsService.d.ts +1 -0
- package/ts_build/src/services/LazyToolsService.js +3 -0
- package/ts_build/src/services/LazyToolsService.js.map +1 -1
- package/ts_build/src/services/S3.js +0 -7
- package/ts_build/src/services/S3.js.map +1 -1
- package/ts_build/src/services/modules/index.js +41 -1
- package/ts_build/src/services/modules/index.js.map +1 -1
- package/ts_build/src/types.d.ts +163 -124
- package/ts_build/src/types.js +33 -213
- package/ts_build/src/types.js.map +1 -1
- package/ts_build/src/worker.d.ts +4 -0
- package/ts_build/src/worker.js +140 -0
- package/ts_build/src/worker.js.map +1 -1
- package/ts_build/tests/clients/AIClient.test.js +1 -1
- package/ts_build/tests/clients/AIClient.test.js.map +1 -1
- package/ts_build/tests/clients/anthropic.test.d.ts +1 -0
- package/ts_build/tests/clients/anthropic.test.js +159 -0
- package/ts_build/tests/clients/anthropic.test.js.map +1 -0
- package/ts_build/tests/clients/pricing.test.js +21 -0
- package/ts_build/tests/clients/pricing.test.js.map +1 -1
- package/ts_build/tests/manual/clients/completions.test.js +27 -24
- package/ts_build/tests/manual/clients/completions.test.js.map +1 -1
package/src/clients/gemini.ts
CHANGED
|
@@ -16,15 +16,17 @@ import { wait } from "../utils";
|
|
|
16
16
|
import {
|
|
17
17
|
EmbeddingModels,
|
|
18
18
|
Models,
|
|
19
|
+
GoogleThinkingLevelModels,
|
|
20
|
+
GoogleThinkingBudgetModels,
|
|
19
21
|
GoogleImageModels,
|
|
20
22
|
GoogleVideoModels,
|
|
21
23
|
GoogleTTSModels,
|
|
22
|
-
|
|
24
|
+
GoogleEmbeddingModelsList,
|
|
23
25
|
GoogleReasoningModels,
|
|
24
26
|
} from "../types";
|
|
25
27
|
import { GeminiTextPricing } from "./pricing";
|
|
26
28
|
import { ContextLimits } from "./contextLimits";
|
|
27
|
-
import { ModelModality } from "./types";
|
|
29
|
+
import { ModelModality, TokenUsage } from "./types";
|
|
28
30
|
|
|
29
31
|
import {
|
|
30
32
|
GenericClient,
|
|
@@ -389,9 +391,49 @@ export class GenericGeminiClient implements GenericClient {
|
|
|
389
391
|
return [{ functionDeclarations }];
|
|
390
392
|
}
|
|
391
393
|
|
|
394
|
+
/**
|
|
395
|
+
* Builds the thinkingConfig for Gemini models that support it.
|
|
396
|
+
* - Gemini 3.x models use thinkingLevel: "minimal" | "low" | "medium" | "high"
|
|
397
|
+
* - Gemini 2.5 models use thinkingBudget: number (0 = off, -1 = dynamic)
|
|
398
|
+
*
|
|
399
|
+
* Maps CompletionOptions.reasoning_effort to provider-specific values.
|
|
400
|
+
*/
|
|
401
|
+
buildThinkingConfig(options: CompletionOptions): Record<string, unknown> | undefined {
|
|
402
|
+
const model = options.model;
|
|
403
|
+
const effort = options.reasoning_effort ?? "low";
|
|
404
|
+
|
|
405
|
+
// Gemini 3.x — use thinkingLevel
|
|
406
|
+
if (GoogleThinkingLevelModels.includes(model)) {
|
|
407
|
+
const levelMap: Record<string, string> = {
|
|
408
|
+
low: "low",
|
|
409
|
+
medium: "medium",
|
|
410
|
+
high: "high",
|
|
411
|
+
};
|
|
412
|
+
return {
|
|
413
|
+
thinkingLevel: levelMap[effort] ?? "low",
|
|
414
|
+
};
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
// Gemini 2.5 — use thinkingBudget
|
|
418
|
+
if (GoogleThinkingBudgetModels.includes(model)) {
|
|
419
|
+
// Map effort to token budget
|
|
420
|
+
const budgetMap: Record<string, number> = {
|
|
421
|
+
low: 1024,
|
|
422
|
+
medium: 8192,
|
|
423
|
+
high: -1, // dynamic
|
|
424
|
+
};
|
|
425
|
+
return {
|
|
426
|
+
thinkingBudget: budgetMap[effort] ?? 1024,
|
|
427
|
+
};
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
return undefined;
|
|
431
|
+
}
|
|
432
|
+
|
|
392
433
|
async createChatCompletion(
|
|
393
434
|
options: CompletionOptions
|
|
394
435
|
): Promise<CompletionResponse> {
|
|
436
|
+
const thinkingConfig = this.buildThinkingConfig(options);
|
|
395
437
|
const { systemInstruction, contents } = this.transformMessages(
|
|
396
438
|
options.messages
|
|
397
439
|
);
|
|
@@ -403,6 +445,7 @@ export class GenericGeminiClient implements GenericClient {
|
|
|
403
445
|
contents,
|
|
404
446
|
config: {
|
|
405
447
|
systemInstruction,
|
|
448
|
+
thinkingConfig,
|
|
406
449
|
tools: this.transformTools(options.tools),
|
|
407
450
|
maxOutputTokens: options.max_tokens,
|
|
408
451
|
},
|
|
@@ -481,10 +524,22 @@ export class GenericGeminiClient implements GenericClient {
|
|
|
481
524
|
? this.calculateCost(options.model, usage)
|
|
482
525
|
: undefined;
|
|
483
526
|
|
|
527
|
+
// Map cachedContentTokenCount → prompt_tokens_details.cached_tokens so that
|
|
528
|
+
// base.ts can read cache hit tokens via usage.prompt_tokens_details?.cached_tokens
|
|
529
|
+
const cachedTokens = (usage as any)?.cachedContentTokenCount ?? 0;
|
|
530
|
+
const usageWithCache: TokenUsage | undefined = usage
|
|
531
|
+
? ({
|
|
532
|
+
prompt_tokens: (usage as any).promptTokenCount ?? 0,
|
|
533
|
+
completion_tokens: (usage as any).candidatesTokenCount ?? 0,
|
|
534
|
+
total_tokens: (usage as any).totalTokenCount,
|
|
535
|
+
prompt_tokens_details: { cached_tokens: cachedTokens },
|
|
536
|
+
} as TokenUsage)
|
|
537
|
+
: undefined;
|
|
538
|
+
|
|
484
539
|
return {
|
|
485
540
|
choices,
|
|
486
541
|
model: options.model,
|
|
487
|
-
usage,
|
|
542
|
+
usage: usageWithCache,
|
|
488
543
|
usd_cost: usdCost,
|
|
489
544
|
};
|
|
490
545
|
} catch (error) {
|
|
@@ -600,7 +655,7 @@ export class GenericGeminiClient implements GenericClient {
|
|
|
600
655
|
if (modality) {
|
|
601
656
|
const map: Partial<Record<ModelModality, string[]>> = {
|
|
602
657
|
completion: GoogleReasoningModels,
|
|
603
|
-
embedding:
|
|
658
|
+
embedding: GoogleEmbeddingModelsList,
|
|
604
659
|
image: GoogleImageModels,
|
|
605
660
|
audio: GoogleTTSModels,
|
|
606
661
|
video: GoogleVideoModels,
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { HttpClient } from "./http";
|
|
2
|
+
import { GitHubModelsTextPricing } from "./pricing/github";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* GitHub Models client — OpenAI-compatible API
|
|
6
|
+
* https://docs.github.com/en/github-models
|
|
7
|
+
* Free access to premium models (GPT-4o, DeepSeek R1, Llama, Phi etc.) with a GitHub token.
|
|
8
|
+
* Set env var GITHUB_TOKEN to enable.
|
|
9
|
+
*/
|
|
10
|
+
export class GenericGitHubModelsClient extends HttpClient {
|
|
11
|
+
constructor(apiKey = process.env.GITHUB_TOKEN) {
|
|
12
|
+
super("https://models.github.ai/inference");
|
|
13
|
+
if (apiKey) this.setJwt(apiKey);
|
|
14
|
+
this.setPrices(GitHubModelsTextPricing);
|
|
15
|
+
}
|
|
16
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { HttpClient } from "./http";
|
|
2
|
+
import { GroqTextPricing } from "./pricing/groq";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Groq client — OpenAI-compatible API (ultra-fast inference)
|
|
6
|
+
* https://console.groq.com/docs/openai
|
|
7
|
+
* Set env var GROQ_API_KEY to enable.
|
|
8
|
+
*/
|
|
9
|
+
export class GenericGroqClient extends HttpClient {
|
|
10
|
+
constructor(apiKey = process.env.GROQ_API_KEY) {
|
|
11
|
+
super("https://api.groq.com/openai");
|
|
12
|
+
if (apiKey) this.setJwt(apiKey);
|
|
13
|
+
this.setPrices(GroqTextPricing);
|
|
14
|
+
}
|
|
15
|
+
}
|
package/src/clients/http.ts
CHANGED
|
@@ -6,11 +6,29 @@ import {
|
|
|
6
6
|
EmbeddingOptions,
|
|
7
7
|
EmbeddingResponse,
|
|
8
8
|
} from "./types";
|
|
9
|
+
import { ModelPricing } from "./pricing/types";
|
|
9
10
|
import fs from "fs";
|
|
10
11
|
import path from "path";
|
|
11
12
|
|
|
13
|
+
export interface HttpClientOptions {
|
|
14
|
+
headers?: Record<string, string>;
|
|
15
|
+
timeout?: number;
|
|
16
|
+
extra_body?: Record<string, any>;
|
|
17
|
+
}
|
|
18
|
+
|
|
12
19
|
export class HttpClient implements GenericClient {
|
|
13
|
-
|
|
20
|
+
/** Timeout in milliseconds for HTTP requests. Default: 30000 (30s). Use 0 to disable. */
|
|
21
|
+
private timeout: number;
|
|
22
|
+
private headers: Record<string, string>;
|
|
23
|
+
private extra_body: Record<string, any>;
|
|
24
|
+
/** Optional pricing table: model id → per-million-token prices */
|
|
25
|
+
private pricingMap: Record<string, ModelPricing> = {};
|
|
26
|
+
|
|
27
|
+
constructor(private baseUrl: string, options: HttpClientOptions = {}) {
|
|
28
|
+
this.headers = options.headers ?? {};
|
|
29
|
+
this.timeout = options.timeout ?? 30000;
|
|
30
|
+
this.extra_body = options.extra_body ?? {};
|
|
31
|
+
}
|
|
14
32
|
|
|
15
33
|
private async withRetry<T>(fn: () => Promise<T>, retries = 3): Promise<T> {
|
|
16
34
|
let lastError: any;
|
|
@@ -64,6 +82,52 @@ export class HttpClient implements GenericClient {
|
|
|
64
82
|
this.setJwt(key);
|
|
65
83
|
}
|
|
66
84
|
|
|
85
|
+
/**
|
|
86
|
+
* Supply a pricing map so that createChatCompletion / createEmbedding can
|
|
87
|
+
* calculate a local usd_cost from usage tokens when the provider does not
|
|
88
|
+
* return a cost field itself.
|
|
89
|
+
*/
|
|
90
|
+
setPrices(pricingMap: Record<string, ModelPricing>) {
|
|
91
|
+
this.pricingMap = pricingMap;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Calculate USD cost for a completion/embedding call from token usage.
|
|
96
|
+
* Returns undefined if no pricing entry exists for the model.
|
|
97
|
+
*/
|
|
98
|
+
calculateCost(
|
|
99
|
+
model: string,
|
|
100
|
+
usage: { prompt_tokens?: number; completion_tokens?: number; prompt_tokens_details?: { cached_tokens?: number } } | undefined
|
|
101
|
+
): number | undefined {
|
|
102
|
+
if (!usage) return undefined;
|
|
103
|
+
const pricing = this.pricingMap[model];
|
|
104
|
+
if (!pricing) return undefined;
|
|
105
|
+
|
|
106
|
+
const cachedInputTokens =
|
|
107
|
+
usage.prompt_tokens_details?.cached_tokens ?? 0;
|
|
108
|
+
const inputTokens = usage.prompt_tokens ?? 0;
|
|
109
|
+
const outputTokens = usage.completion_tokens ?? 0;
|
|
110
|
+
|
|
111
|
+
const cachedInputCost = (cachedInputTokens * (pricing.cache_hit ?? pricing.cached_input ?? 0)) / 1e6;
|
|
112
|
+
const inputCost = ((inputTokens - cachedInputTokens) * (pricing.input ?? 0)) / 1e6;
|
|
113
|
+
const outputCost = (outputTokens * (pricing.output ?? 0)) / 1e6;
|
|
114
|
+
|
|
115
|
+
return cachedInputCost + inputCost + outputCost;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Apply extra options (timeout, headers, extra_body) after construction.
|
|
120
|
+
* Used by AIClient.resolveClient to honour per-provider config overrides
|
|
121
|
+
* even when the client is created via a known clientClass (e.g. nvidia, groq).
|
|
122
|
+
*/
|
|
123
|
+
setOptions(options: Omit<HttpClientOptions, "headers"> & { headers?: Record<string, string> }) {
|
|
124
|
+
if (options.timeout !== undefined) this.timeout = options.timeout;
|
|
125
|
+
if (options.extra_body !== undefined) this.extra_body = options.extra_body;
|
|
126
|
+
if (options.headers) {
|
|
127
|
+
this.headers = { ...this.headers, ...options.headers };
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
67
131
|
loadJwtFile(filePath: string) {
|
|
68
132
|
try {
|
|
69
133
|
const jwtFile = path.join(process.cwd(), filePath);
|
|
@@ -85,7 +149,8 @@ export class HttpClient implements GenericClient {
|
|
|
85
149
|
...options,
|
|
86
150
|
model: options.model,
|
|
87
151
|
messages: options.messages,
|
|
88
|
-
max_tokens: options.max_tokens ||
|
|
152
|
+
max_tokens: options.max_tokens || 4000,
|
|
153
|
+
...this.extra_body,
|
|
89
154
|
|
|
90
155
|
...(options.tools && {
|
|
91
156
|
tools: options.tools,
|
|
@@ -96,7 +161,7 @@ export class HttpClient implements GenericClient {
|
|
|
96
161
|
const response = await http.post(
|
|
97
162
|
`${this.baseUrl}/v1/chat/completions`,
|
|
98
163
|
body,
|
|
99
|
-
{ headers: this.headers as Record<string, string
|
|
164
|
+
{ headers: this.headers as Record<string, string>, timeout: this.timeout }
|
|
100
165
|
);
|
|
101
166
|
|
|
102
167
|
const data = response.data;
|
|
@@ -116,7 +181,129 @@ export class HttpClient implements GenericClient {
|
|
|
116
181
|
})),
|
|
117
182
|
model: data.model,
|
|
118
183
|
usage: data.usage,
|
|
119
|
-
usd_cost: data.usd_cost,
|
|
184
|
+
usd_cost: data.usd_cost ?? this.calculateCost(options.model, data.usage),
|
|
185
|
+
};
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Creates a completion using the Responses API (/v1/responses).
|
|
191
|
+
* Compatible with providers that implement the OpenAI Responses API spec
|
|
192
|
+
* (e.g. xAI at https://api.x.ai/v1/responses).
|
|
193
|
+
*/
|
|
194
|
+
async createResponse(
|
|
195
|
+
options: CompletionOptions,
|
|
196
|
+
store = false
|
|
197
|
+
): Promise<CompletionResponse> {
|
|
198
|
+
return this.withRetry(async () => {
|
|
199
|
+
// Extract system messages as instructions
|
|
200
|
+
const systemMessages = options.messages.filter((m) => m.role === "system");
|
|
201
|
+
const nonSystemMessages = options.messages.filter((m) => m.role !== "system");
|
|
202
|
+
const instructions = systemMessages
|
|
203
|
+
.map((m) => (typeof m.content === "string" ? m.content : ""))
|
|
204
|
+
.join("\n")
|
|
205
|
+
.trim() || undefined;
|
|
206
|
+
|
|
207
|
+
// Convert messages to Responses API input format
|
|
208
|
+
const input: any[] = nonSystemMessages.map((msg) => {
|
|
209
|
+
if (msg.role === "tool") {
|
|
210
|
+
return {
|
|
211
|
+
type: "function_call_output",
|
|
212
|
+
call_id: msg.tool_call_id,
|
|
213
|
+
output: typeof msg.content === "string" ? msg.content : JSON.stringify(msg.content),
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
if (msg.role === "assistant" && msg.tool_calls?.length) {
|
|
217
|
+
return (msg.tool_calls as any[]).map((tc: any) => ({
|
|
218
|
+
type: "function_call",
|
|
219
|
+
id: tc.id.startsWith("fc") ? tc.id : `fc_${tc.id}`,
|
|
220
|
+
call_id: tc.id,
|
|
221
|
+
name: tc.function.name,
|
|
222
|
+
arguments: tc.function.arguments,
|
|
223
|
+
}));
|
|
224
|
+
}
|
|
225
|
+
return {
|
|
226
|
+
role: msg.role,
|
|
227
|
+
content: typeof msg.content === "string" ? msg.content : JSON.stringify(msg.content),
|
|
228
|
+
};
|
|
229
|
+
}).flat();
|
|
230
|
+
|
|
231
|
+
const tools = options.tools?.map((tool) => ({
|
|
232
|
+
type: "function" as const,
|
|
233
|
+
name: tool.function.name,
|
|
234
|
+
description: tool.function.description,
|
|
235
|
+
parameters: tool.function.parameters as Record<string, unknown>,
|
|
236
|
+
strict: false,
|
|
237
|
+
}));
|
|
238
|
+
|
|
239
|
+
const body = {
|
|
240
|
+
model: options.model,
|
|
241
|
+
input,
|
|
242
|
+
...(instructions && { instructions }),
|
|
243
|
+
...(options.max_tokens && { max_output_tokens: options.max_tokens }),
|
|
244
|
+
...(tools?.length && { tools, tool_choice: "auto" }),
|
|
245
|
+
store,
|
|
246
|
+
...this.extra_body,
|
|
247
|
+
};
|
|
248
|
+
|
|
249
|
+
const response = await http.post(
|
|
250
|
+
`${this.baseUrl}/v1/responses`,
|
|
251
|
+
body,
|
|
252
|
+
{ headers: this.headers as Record<string, string>, timeout: this.timeout }
|
|
253
|
+
);
|
|
254
|
+
|
|
255
|
+
const data = response.data;
|
|
256
|
+
|
|
257
|
+
if (data.error) {
|
|
258
|
+
throw new Error(JSON.stringify(data.error, null, 2));
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// Map usage from Responses API format to Chat Completions format
|
|
262
|
+
const usage = data.usage
|
|
263
|
+
? {
|
|
264
|
+
prompt_tokens: data.usage.input_tokens,
|
|
265
|
+
completion_tokens: data.usage.output_tokens,
|
|
266
|
+
total_tokens: data.usage.input_tokens + data.usage.output_tokens,
|
|
267
|
+
prompt_tokens_details: {
|
|
268
|
+
cached_tokens:
|
|
269
|
+
data.usage.input_tokens_details?.cached_tokens ?? 0,
|
|
270
|
+
},
|
|
271
|
+
}
|
|
272
|
+
: undefined;
|
|
273
|
+
|
|
274
|
+
// Collect text content and tool calls from output items
|
|
275
|
+
let textContent: string | null = null;
|
|
276
|
+
const toolCalls: any[] = [];
|
|
277
|
+
|
|
278
|
+
for (const item of data.output ?? []) {
|
|
279
|
+
if (item.type === "message") {
|
|
280
|
+
for (const part of item.content ?? []) {
|
|
281
|
+
if (part.type === "output_text") {
|
|
282
|
+
textContent = (textContent ?? "") + part.text;
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
} else if (item.type === "function_call") {
|
|
286
|
+
toolCalls.push({
|
|
287
|
+
id: item.call_id,
|
|
288
|
+
type: "function",
|
|
289
|
+
function: { name: item.name, arguments: item.arguments },
|
|
290
|
+
});
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
return {
|
|
295
|
+
choices: [
|
|
296
|
+
{
|
|
297
|
+
message: {
|
|
298
|
+
role: "assistant",
|
|
299
|
+
content: textContent,
|
|
300
|
+
...(toolCalls.length > 0 && { tool_calls: toolCalls }),
|
|
301
|
+
},
|
|
302
|
+
},
|
|
303
|
+
],
|
|
304
|
+
model: data.model ?? options.model,
|
|
305
|
+
usage,
|
|
306
|
+
usd_cost: data.usd_cost ?? this.calculateCost(options.model, usage),
|
|
120
307
|
};
|
|
121
308
|
});
|
|
122
309
|
}
|
|
@@ -129,7 +316,7 @@ export class HttpClient implements GenericClient {
|
|
|
129
316
|
model: options.model,
|
|
130
317
|
input: options.input,
|
|
131
318
|
},
|
|
132
|
-
{ headers: this.headers as Record<string, string
|
|
319
|
+
{ headers: this.headers as Record<string, string>, timeout: this.timeout }
|
|
133
320
|
);
|
|
134
321
|
|
|
135
322
|
const data = response.data;
|
|
@@ -143,7 +330,7 @@ export class HttpClient implements GenericClient {
|
|
|
143
330
|
data: data.data,
|
|
144
331
|
model: options.model,
|
|
145
332
|
usage: data.usage,
|
|
146
|
-
usd_cost: data.usd_cost,
|
|
333
|
+
usd_cost: data.usd_cost ?? this.calculateCost(options.model, data.usage),
|
|
147
334
|
};
|
|
148
335
|
});
|
|
149
336
|
}
|
|
@@ -152,6 +339,7 @@ export class HttpClient implements GenericClient {
|
|
|
152
339
|
return this.withRetry(async () => {
|
|
153
340
|
const response = await http.get(`${this.baseUrl}/v1/models?type=${type}`, {
|
|
154
341
|
headers: this.headers as Record<string, string>,
|
|
342
|
+
timeout: this.timeout,
|
|
155
343
|
});
|
|
156
344
|
|
|
157
345
|
const data = response.data?.data;
|
package/src/clients/index.ts
CHANGED
|
@@ -43,6 +43,16 @@ import type {
|
|
|
43
43
|
ModelType,
|
|
44
44
|
ModelCatalogEntry,
|
|
45
45
|
} from "./pricing/types";
|
|
46
|
+
import { GenericCerebrasClient } from "./cerebras";
|
|
47
|
+
import { GenericGroqClient } from "./groq";
|
|
48
|
+
import { GenericGitHubModelsClient } from "./github";
|
|
49
|
+
import { GenericNvidiaClient } from "./nvidia";
|
|
50
|
+
import { GenericOpenRouterClient } from "./openrouter";
|
|
51
|
+
import { GenericDeepSeekClient } from "./deepseek";
|
|
52
|
+
import { GenericMistralClient } from "./mistral";
|
|
53
|
+
import { GitHubCopilotClient } from "./copilot";
|
|
54
|
+
import { GenericLlamaClient } from "./llama";
|
|
55
|
+
import { GenericFireworksClient } from "./fireworks";
|
|
46
56
|
export {
|
|
47
57
|
OpenAiTextPricing,
|
|
48
58
|
AnthropicTextPricing,
|
|
@@ -75,6 +85,18 @@ const BUILT_IN_PROVIDER_REGISTRY: Record<string, ProviderRegistryEntry> = {
|
|
|
75
85
|
anthropic: { clientClass: GenericAnthropicClient },
|
|
76
86
|
google: { clientClass: GenericGeminiClient },
|
|
77
87
|
xai: { clientClass: GenericXAIClient },
|
|
88
|
+
cerebras: {
|
|
89
|
+
clientClass: GenericCerebrasClient,
|
|
90
|
+
},
|
|
91
|
+
groq: { clientClass: GenericGroqClient },
|
|
92
|
+
github: { clientClass: GenericGitHubModelsClient },
|
|
93
|
+
nvidia: { clientClass: GenericNvidiaClient },
|
|
94
|
+
openrouter: { clientClass: GenericOpenRouterClient },
|
|
95
|
+
deepseek: { clientClass: GenericDeepSeekClient },
|
|
96
|
+
mistral: { clientClass: GenericMistralClient },
|
|
97
|
+
"github-copilot": { clientClass: GitHubCopilotClient },
|
|
98
|
+
llama: { clientClass: GenericLlamaClient },
|
|
99
|
+
fireworks: { clientClass: GenericFireworksClient },
|
|
78
100
|
knowhow: {
|
|
79
101
|
createClient: (entry: ModelProvider) => {
|
|
80
102
|
const jwt = loadKnowhowJwt();
|
|
@@ -94,7 +116,17 @@ const DEFAULT_PROVIDERS: ModelProvider[] = [
|
|
|
94
116
|
{ provider: "anthropic", envKey: "ANTHROPIC_API_KEY" },
|
|
95
117
|
{ provider: "google", envKey: "GEMINI_API_KEY" },
|
|
96
118
|
{ provider: "xai", envKey: "XAI_API_KEY" },
|
|
119
|
+
{ provider: "cerebras", envKey: "CEREBRAS_API_KEY" },
|
|
97
120
|
{ provider: "knowhow" },
|
|
121
|
+
{ provider: "groq", envKey: "GROQ_API_KEY" },
|
|
122
|
+
{ provider: "github", envKey: "GITHUB_TOKEN" },
|
|
123
|
+
{ provider: "nvidia", envKey: "NVIDIA_API_KEY" },
|
|
124
|
+
{ provider: "openrouter", envKey: "OPENROUTER_API_KEY" },
|
|
125
|
+
{ provider: "deepseek", envKey: "DEEPSEEK_API_KEY" },
|
|
126
|
+
{ provider: "mistral", envKey: "MISTRAL_API_KEY" },
|
|
127
|
+
{ provider: "github-copilot", envKey: "GITHUB_COPILOT_TOKEN" },
|
|
128
|
+
{ provider: "llama", envKey: "LLAMA_API_KEY" },
|
|
129
|
+
{ provider: "fireworks", envKey: "FIREWORKS_API_KEY" },
|
|
98
130
|
];
|
|
99
131
|
|
|
100
132
|
export class AIClient {
|
|
@@ -153,19 +185,45 @@ export class AIClient {
|
|
|
153
185
|
// envKey-based auth: env var must be present
|
|
154
186
|
const envValue = process.env[effectiveEnvKey];
|
|
155
187
|
if (!envValue) return null;
|
|
156
|
-
|
|
188
|
+
const client = new reg.clientClass(envValue);
|
|
189
|
+
// Apply any extra options (timeout, headers, extra_body) from config
|
|
190
|
+
if (client instanceof HttpClient) {
|
|
191
|
+
client.setOptions({
|
|
192
|
+
timeout: entry.timeout,
|
|
193
|
+
headers: entry.headers,
|
|
194
|
+
extra_body: entry.extra_body,
|
|
195
|
+
});
|
|
196
|
+
if (entry.pricing) client.setPrices(entry.pricing);
|
|
197
|
+
}
|
|
198
|
+
return client;
|
|
157
199
|
}
|
|
158
200
|
|
|
159
201
|
// No envKey, no url — instantiate with no arg (client uses its own defaults)
|
|
160
|
-
|
|
202
|
+
const client = new reg.clientClass();
|
|
203
|
+
// Apply any extra options (timeout, headers, extra_body) from config
|
|
204
|
+
if (client instanceof HttpClient) {
|
|
205
|
+
client.setOptions({
|
|
206
|
+
timeout: entry.timeout,
|
|
207
|
+
headers: entry.headers,
|
|
208
|
+
extra_body: entry.extra_body,
|
|
209
|
+
});
|
|
210
|
+
if (entry.pricing) client.setPrices(entry.pricing);
|
|
211
|
+
}
|
|
212
|
+
return client;
|
|
161
213
|
}
|
|
162
214
|
|
|
163
215
|
// 3. HTTP provider — requires url, no clientClass in registry
|
|
164
216
|
if (entry.url) {
|
|
165
|
-
const client = new HttpClient(entry.url,
|
|
217
|
+
const client = new HttpClient(entry.url, {
|
|
218
|
+
headers: entry.headers,
|
|
219
|
+
timeout: entry.timeout,
|
|
220
|
+
extra_body: entry.extra_body,
|
|
221
|
+
});
|
|
166
222
|
if (entry.jwtFile) {
|
|
167
223
|
client.loadJwtFile(entry.jwtFile);
|
|
168
224
|
}
|
|
225
|
+
// For custom HTTP providers, use entry.pricing if available
|
|
226
|
+
if (entry.pricing) client.setPrices(entry.pricing);
|
|
169
227
|
return client;
|
|
170
228
|
}
|
|
171
229
|
|
|
@@ -492,6 +550,52 @@ export class AIClient {
|
|
|
492
550
|
return undefined;
|
|
493
551
|
}
|
|
494
552
|
|
|
553
|
+
/**
|
|
554
|
+
* Normalize a model ID for fuzzy matching:
|
|
555
|
+
* - lowercase
|
|
556
|
+
* - replace dots with dashes (e.g. "claude-opus-4.7" → "claude-opus-4-7")
|
|
557
|
+
* - strip variant suffixes like ":thinking", ":free"
|
|
558
|
+
* - strip trailing date suffixes like "-20250514"
|
|
559
|
+
* - strip trailing "-beta", "-preview", "-latest"
|
|
560
|
+
*/
|
|
561
|
+
private static normalizeModelId(id: string): string {
|
|
562
|
+
return id
|
|
563
|
+
.toLowerCase()
|
|
564
|
+
.replace(/\./g, "-")
|
|
565
|
+
.replace(/:[^:]+$/, "")
|
|
566
|
+
.replace(/-\d{8}$/, "")
|
|
567
|
+
.replace(/-(beta|preview|latest|exp|rc\d*)$/i, "");
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
/**
|
|
571
|
+
* Fuzzy model lookup: given a model name (possibly without date suffix,
|
|
572
|
+
* with dots instead of dashes, etc.), find the best matching registered model.
|
|
573
|
+
*
|
|
574
|
+
* Example: "claude-3.7-sonnet" matches "claude-3-7-sonnet-20250219"
|
|
575
|
+
* "gpt-4.1" matches "gpt-4.1" exactly
|
|
576
|
+
*
|
|
577
|
+
* @param modelQuery - the model name to search for (can be partial/normalized)
|
|
578
|
+
* @param provider - optional provider to restrict search to
|
|
579
|
+
*/
|
|
580
|
+
findModelFuzzy(modelQuery: string, provider?: string): { provider: string; model: string } | undefined {
|
|
581
|
+
const queryNorm = AIClient.normalizeModelId(modelQuery);
|
|
582
|
+
const providers = provider
|
|
583
|
+
? [provider]
|
|
584
|
+
: Object.keys(this.clientModels);
|
|
585
|
+
|
|
586
|
+
for (const p of providers) {
|
|
587
|
+
const models = (this.clientModels[p] as string[]) ?? [];
|
|
588
|
+
for (const m of models) {
|
|
589
|
+
const mNorm = AIClient.normalizeModelId(m);
|
|
590
|
+
// Exact normalized match, OR our model is a dated variant of the query
|
|
591
|
+
if (mNorm === queryNorm || mNorm.startsWith(queryNorm + "-")) {
|
|
592
|
+
return { provider: p, model: m };
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
return undefined;
|
|
597
|
+
}
|
|
598
|
+
|
|
495
599
|
// detects these formats:
|
|
496
600
|
// "openai", "gpt-5"
|
|
497
601
|
// "knowhow", "openai/gpt-5"
|
|
@@ -822,7 +926,6 @@ export class AIClient {
|
|
|
822
926
|
id,
|
|
823
927
|
provider,
|
|
824
928
|
type,
|
|
825
|
-
displayName: id,
|
|
826
929
|
pricing: p,
|
|
827
930
|
});
|
|
828
931
|
}
|
|
@@ -871,3 +974,12 @@ export * from "./gemini";
|
|
|
871
974
|
export * from "./contextLimits";
|
|
872
975
|
export * from "./xai";
|
|
873
976
|
export * from "./knowhowMcp";
|
|
977
|
+
export * from "./groq";
|
|
978
|
+
export * from "./github";
|
|
979
|
+
export * from "./nvidia";
|
|
980
|
+
export * from "./openrouter";
|
|
981
|
+
export * from "./deepseek";
|
|
982
|
+
export * from "./mistral";
|
|
983
|
+
export * from "./llama";
|
|
984
|
+
export * from "./copilot";
|
|
985
|
+
export * from "./fireworks";
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { HttpClient } from "./http";
|
|
2
|
+
import { LlamaTextPricing } from "./pricing/llama";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Meta Llama API client — OpenAI-compatible API
|
|
6
|
+
* https://llama.developer.meta.com/docs/
|
|
7
|
+
* Direct from Meta: free Llama 3.x, Llama 4, and Cerebras/Groq-hosted variants.
|
|
8
|
+
* Set env var LLAMA_API_KEY to enable.
|
|
9
|
+
*/
|
|
10
|
+
export class GenericLlamaClient extends HttpClient {
|
|
11
|
+
constructor(apiKey = process.env.LLAMA_API_KEY) {
|
|
12
|
+
super("https://api.llama.com/compat");
|
|
13
|
+
if (apiKey) this.setJwt(apiKey);
|
|
14
|
+
this.setPrices(LlamaTextPricing);
|
|
15
|
+
}
|
|
16
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { HttpClient } from "./http";
|
|
2
|
+
import { MistralTextPricing } from "./pricing/mistral";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Mistral AI client — OpenAI-compatible API
|
|
6
|
+
* https://docs.mistral.ai/api/
|
|
7
|
+
* Top European AI lab with Mistral Large, Codestral, and free Devstral coding model.
|
|
8
|
+
* Set env var MISTRAL_API_KEY to enable.
|
|
9
|
+
*/
|
|
10
|
+
export class GenericMistralClient extends HttpClient {
|
|
11
|
+
constructor(apiKey = process.env.MISTRAL_API_KEY) {
|
|
12
|
+
super("https://api.mistral.ai");
|
|
13
|
+
if (apiKey) this.setJwt(apiKey);
|
|
14
|
+
this.setPrices(MistralTextPricing);
|
|
15
|
+
}
|
|
16
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { HttpClient } from "./http";
|
|
2
|
+
import { NvidiaTextPricing } from "./pricing/nvidia";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* NVIDIA NIM client — OpenAI-compatible API
|
|
6
|
+
* https://build.nvidia.com/explore/discover
|
|
7
|
+
* 76+ free models including Llama, Mistral, Phi, Flux image generation.
|
|
8
|
+
* Set env var NVIDIA_API_KEY to enable.
|
|
9
|
+
*/
|
|
10
|
+
export class GenericNvidiaClient extends HttpClient {
|
|
11
|
+
constructor(apiKey = process.env.NVIDIA_API_KEY) {
|
|
12
|
+
super("https://integrate.api.nvidia.com");
|
|
13
|
+
if (apiKey) this.setJwt(apiKey);
|
|
14
|
+
this.setPrices(NvidiaTextPricing);
|
|
15
|
+
}
|
|
16
|
+
}
|