@tyvm/knowhow 0.0.84 → 0.0.86
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agents/base/base.ts +99 -37
- package/src/agents/researcher/researcher.ts +1 -2
- package/src/agents/tools/aiClient.ts +48 -0
- package/src/agents/tools/list.ts +57 -0
- package/src/chat/CliChatService.ts +3 -1
- package/src/chat/modules/AgentModule.ts +20 -31
- package/src/chat/types.ts +1 -0
- package/src/cli.ts +19 -10
- package/src/clients/anthropic.ts +11 -0
- package/src/clients/contextLimits.ts +106 -0
- package/src/clients/gemini.ts +11 -0
- package/src/clients/index.ts +112 -0
- package/src/clients/openai.ts +11 -0
- package/src/clients/pricing/anthropic.ts +0 -4
- package/src/clients/pricing/google.ts +81 -2
- package/src/clients/pricing/openai.ts +68 -0
- package/src/clients/types.ts +8 -0
- package/src/clients/xai.ts +11 -0
- package/src/types.ts +79 -7
- package/tests/clients/pricing.test.ts +144 -0
- package/ts_build/package.json +1 -1
- package/ts_build/src/agents/base/base.d.ts +4 -0
- package/ts_build/src/agents/base/base.js +53 -28
- package/ts_build/src/agents/base/base.js.map +1 -1
- package/ts_build/src/agents/researcher/researcher.js +1 -1
- package/ts_build/src/agents/researcher/researcher.js.map +1 -1
- package/ts_build/src/agents/tools/aiClient.d.ts +3 -0
- package/ts_build/src/agents/tools/aiClient.js +31 -1
- package/ts_build/src/agents/tools/aiClient.js.map +1 -1
- package/ts_build/src/agents/tools/list.js +48 -0
- package/ts_build/src/agents/tools/list.js.map +1 -1
- package/ts_build/src/chat/CliChatService.js.map +1 -1
- package/ts_build/src/chat/modules/AgentModule.d.ts +1 -4
- package/ts_build/src/chat/modules/AgentModule.js +12 -15
- package/ts_build/src/chat/modules/AgentModule.js.map +1 -1
- package/ts_build/src/chat/types.d.ts +1 -0
- package/ts_build/src/cli.js +3 -2
- package/ts_build/src/cli.js.map +1 -1
- package/ts_build/src/clients/anthropic.d.ts +8 -4
- package/ts_build/src/clients/anthropic.js +9 -0
- package/ts_build/src/clients/anthropic.js.map +1 -1
- package/ts_build/src/clients/contextLimits.d.ts +3 -0
- package/ts_build/src/clients/contextLimits.js +88 -0
- package/ts_build/src/clients/contextLimits.js.map +1 -0
- package/ts_build/src/clients/gemini.d.ts +14 -10
- package/ts_build/src/clients/gemini.js +9 -0
- package/ts_build/src/clients/gemini.js.map +1 -1
- package/ts_build/src/clients/index.d.ts +15 -0
- package/ts_build/src/clients/index.js +70 -0
- package/ts_build/src/clients/index.js.map +1 -1
- package/ts_build/src/clients/openai.d.ts +4 -0
- package/ts_build/src/clients/openai.js +9 -0
- package/ts_build/src/clients/openai.js.map +1 -1
- package/ts_build/src/clients/pricing/anthropic.d.ts +4 -4
- package/ts_build/src/clients/pricing/anthropic.js +0 -4
- package/ts_build/src/clients/pricing/anthropic.js.map +1 -1
- package/ts_build/src/clients/pricing/google.d.ts +10 -10
- package/ts_build/src/clients/pricing/google.js +74 -2
- package/ts_build/src/clients/pricing/google.js.map +1 -1
- package/ts_build/src/clients/pricing/openai.js +65 -0
- package/ts_build/src/clients/pricing/openai.js.map +1 -1
- package/ts_build/src/clients/types.d.ts +4 -0
- package/ts_build/src/clients/xai.d.ts +4 -0
- package/ts_build/src/clients/xai.js +9 -0
- package/ts_build/src/clients/xai.js.map +1 -1
- package/ts_build/src/types.d.ts +33 -4
- package/ts_build/src/types.js +73 -5
- package/ts_build/src/types.js.map +1 -1
- package/ts_build/tests/clients/pricing.test.d.ts +1 -0
- package/ts_build/tests/clients/pricing.test.js +90 -0
- package/ts_build/tests/clients/pricing.test.js.map +1 -0
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import { Models, EmbeddingModels } from "../types";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Context window limits (in tokens) for all supported models.
|
|
5
|
+
* Sources:
|
|
6
|
+
* - OpenAI: https://platform.openai.com/docs/models
|
|
7
|
+
* - Anthropic: https://docs.anthropic.com/en/docs/about-claude/models
|
|
8
|
+
* - Google: https://ai.google.dev/gemini-api/docs/models
|
|
9
|
+
* - xAI: https://docs.x.ai/developers/models
|
|
10
|
+
*/
|
|
11
|
+
export const ContextLimits: Record<string, number> = {
|
|
12
|
+
// ─── OpenAI ───────────────────────────────────────────────────────────────
|
|
13
|
+
[Models.openai.GPT_54]: 1_000_000,
|
|
14
|
+
[Models.openai.GPT_54_Mini]: 400_000,
|
|
15
|
+
[Models.openai.GPT_54_Nano]: 400_000,
|
|
16
|
+
[Models.openai.GPT_54_Pro]: 1_000_000,
|
|
17
|
+
[Models.openai.GPT_53_Chat]: 1_000_000,
|
|
18
|
+
[Models.openai.GPT_53_Codex]: 1_000_000,
|
|
19
|
+
[Models.openai.GPT_5]: 1_000_000,
|
|
20
|
+
[Models.openai.GPT_5_Mini]: 1_000_000,
|
|
21
|
+
[Models.openai.GPT_5_Nano]: 1_000_000,
|
|
22
|
+
[Models.openai.GPT_5_1]: 1_000_000,
|
|
23
|
+
[Models.openai.GPT_5_2]: 1_000_000,
|
|
24
|
+
[Models.openai.GPT_41]: 1_047_576,
|
|
25
|
+
[Models.openai.GPT_41_Mini]: 1_047_576,
|
|
26
|
+
[Models.openai.GPT_41_Nano]: 1_047_576,
|
|
27
|
+
[Models.openai.GPT_45]: 128_000,
|
|
28
|
+
[Models.openai.GPT_4o]: 128_000,
|
|
29
|
+
[Models.openai.GPT_4o_Mini]: 128_000,
|
|
30
|
+
[Models.openai.GPT_4o_Audio]: 128_000,
|
|
31
|
+
[Models.openai.GPT_4o_Realtime]: 128_000,
|
|
32
|
+
[Models.openai.GPT_4o_Mini_Audio]: 128_000,
|
|
33
|
+
[Models.openai.GPT_4o_Mini_Realtime]: 128_000,
|
|
34
|
+
[Models.openai.GPT_4o_Mini_Search]: 128_000,
|
|
35
|
+
[Models.openai.GPT_4o_Search]: 128_000,
|
|
36
|
+
[Models.openai.o1]: 200_000,
|
|
37
|
+
[Models.openai.o1_Mini]: 128_000,
|
|
38
|
+
[Models.openai.o1_Pro]: 200_000,
|
|
39
|
+
[Models.openai.o3]: 200_000,
|
|
40
|
+
[Models.openai.o3_Pro]: 200_000,
|
|
41
|
+
[Models.openai.o3_Mini]: 200_000,
|
|
42
|
+
[Models.openai.o4_Mini]: 200_000,
|
|
43
|
+
|
|
44
|
+
// ─── Anthropic ────────────────────────────────────────────────────────────
|
|
45
|
+
[Models.anthropic.Opus4_6]: 1_000_000,
|
|
46
|
+
[Models.anthropic.Sonnet4_6]: 1_000_000,
|
|
47
|
+
[Models.anthropic.Opus4_5]: 1_000_000,
|
|
48
|
+
[Models.anthropic.Opus4]: 200_000,
|
|
49
|
+
[Models.anthropic.Opus4_1]: 200_000,
|
|
50
|
+
[Models.anthropic.Sonnet4]: 200_000,
|
|
51
|
+
[Models.anthropic.Sonnet4_5]: 200_000,
|
|
52
|
+
[Models.anthropic.Haiku4_5]: 200_000,
|
|
53
|
+
[Models.anthropic.Sonnet3_7]: 200_000,
|
|
54
|
+
[Models.anthropic.Sonnet3_5]: 200_000,
|
|
55
|
+
[Models.anthropic.Haiku3_5]: 200_000,
|
|
56
|
+
[Models.anthropic.Opus3]: 200_000,
|
|
57
|
+
[Models.anthropic.Haiku3]: 200_000,
|
|
58
|
+
|
|
59
|
+
// ─── Google ───────────────────────────────────────────────────────────────
|
|
60
|
+
[Models.google.Gemini_31_Pro_Preview]: 1_000_000,
|
|
61
|
+
[Models.google.Gemini_31_Flash_Image_Preview]: 1_000_000,
|
|
62
|
+
[Models.google.Gemini_31_Flash_Lite_Preview]: 1_000_000,
|
|
63
|
+
[Models.google.Gemini_3_Flash_Preview]: 1_000_000,
|
|
64
|
+
[Models.google.Gemini_3_Pro_Image_Preview]: 1_000_000,
|
|
65
|
+
[Models.google.Gemini_25_Pro]: 1_000_000,
|
|
66
|
+
[Models.google.Gemini_25_Flash]: 1_000_000,
|
|
67
|
+
[Models.google.Gemini_25_Flash_Lite]: 1_000_000,
|
|
68
|
+
[Models.google.Gemini_25_Flash_Preview]: 1_000_000,
|
|
69
|
+
[Models.google.Gemini_25_Pro_Preview]: 1_000_000,
|
|
70
|
+
[Models.google.Gemini_25_Flash_Image]: 1_000_000,
|
|
71
|
+
[Models.google.Gemini_25_Flash_Live]: 1_000_000,
|
|
72
|
+
[Models.google.Gemini_25_Flash_Native_Audio]: 1_000_000,
|
|
73
|
+
[Models.google.Gemini_25_Flash_TTS]: 1_000_000,
|
|
74
|
+
[Models.google.Gemini_25_Pro_TTS]: 1_000_000,
|
|
75
|
+
[Models.google.Gemini_20_Flash]: 1_000_000,
|
|
76
|
+
[Models.google.Gemini_20_Flash_Preview_Image_Generation]: 1_000_000,
|
|
77
|
+
[Models.google.Gemini_20_Flash_Lite]: 1_000_000,
|
|
78
|
+
[Models.google.Gemini_20_Flash_Live]: 1_000_000,
|
|
79
|
+
[Models.google.Gemini_20_Flash_TTS]: 1_000_000,
|
|
80
|
+
[Models.google.Gemini_15_Flash]: 1_000_000,
|
|
81
|
+
[Models.google.Gemini_15_Flash_8B]: 1_000_000,
|
|
82
|
+
[Models.google.Gemini_15_Pro]: 2_000_000,
|
|
83
|
+
|
|
84
|
+
// ─── xAI ──────────────────────────────────────────────────────────────────
|
|
85
|
+
[Models.xai.Grok4_1_Fast_Reasoning]: 2_000_000,
|
|
86
|
+
[Models.xai.Grok4_1_Fast_NonReasoning]: 2_000_000,
|
|
87
|
+
[Models.xai.GrokCodeFast]: 2_000_000,
|
|
88
|
+
[Models.xai.Grok4]: 131_072,
|
|
89
|
+
[Models.xai.Grok3Beta]: 131_072,
|
|
90
|
+
[Models.xai.Grok3MiniBeta]: 131_072,
|
|
91
|
+
[Models.xai.Grok3FastBeta]: 131_072,
|
|
92
|
+
[Models.xai.Grok3MiniFastBeta]: 131_072,
|
|
93
|
+
[Models.xai.Grok21212]: 131_072,
|
|
94
|
+
[Models.xai.Grok2Vision1212]: 131_072,
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
/** Default fallback context window limit (tokens) used when a model is not found. */
|
|
98
|
+
export const DEFAULT_CONTEXT_LIMIT = 30_000;
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Returns the context window limit (in tokens) for a given model.
|
|
102
|
+
* Falls back to DEFAULT_CONTEXT_LIMIT if the model is not recognized.
|
|
103
|
+
*/
|
|
104
|
+
export function getModelContextLimit(model: string): number {
|
|
105
|
+
return ContextLimits[model] ?? DEFAULT_CONTEXT_LIMIT;
|
|
106
|
+
}
|
package/src/clients/gemini.ts
CHANGED
|
@@ -15,6 +15,7 @@ import * as pathSync from "path";
|
|
|
15
15
|
import { wait } from "../utils";
|
|
16
16
|
import { EmbeddingModels, Models } from "../types";
|
|
17
17
|
import { GeminiTextPricing } from "./pricing";
|
|
18
|
+
import { ContextLimits } from "./contextLimits";
|
|
18
19
|
|
|
19
20
|
import {
|
|
20
21
|
GenericClient,
|
|
@@ -916,4 +917,14 @@ export class GenericGeminiClient implements GenericClient {
|
|
|
916
917
|
throw error;
|
|
917
918
|
}
|
|
918
919
|
}
|
|
920
|
+
|
|
921
|
+
getContextLimit(model: string): { contextLimit: number; threshold: number } | undefined {
|
|
922
|
+
const contextLimit = ContextLimits[model];
|
|
923
|
+
if (contextLimit === undefined) return undefined;
|
|
924
|
+
const pricing = GeminiTextPricing[model];
|
|
925
|
+
// If the model has tiered pricing above 200k tokens, use 200k as the threshold
|
|
926
|
+
const threshold =
|
|
927
|
+
pricing && "input_gt_200k" in pricing ? 200_000 : contextLimit;
|
|
928
|
+
return { contextLimit, threshold };
|
|
929
|
+
}
|
|
919
930
|
}
|
package/src/clients/index.ts
CHANGED
|
@@ -25,9 +25,23 @@ import { GenericGeminiClient } from "./gemini";
|
|
|
25
25
|
import { HttpClient } from "./http";
|
|
26
26
|
import { EmbeddingModels, Models } from "../types";
|
|
27
27
|
import { getConfig } from "../config";
|
|
28
|
+
import {
|
|
29
|
+
GoogleImageModels,
|
|
30
|
+
GoogleVideoModels,
|
|
31
|
+
GoogleTTSModels,
|
|
32
|
+
OpenAiImageModels,
|
|
33
|
+
OpenAiVideoModels,
|
|
34
|
+
OpenAiTTSModels,
|
|
35
|
+
OpenAiTranscriptionModels,
|
|
36
|
+
XaiImageModels,
|
|
37
|
+
XaiVideoModels,
|
|
38
|
+
} from "../types";
|
|
28
39
|
import { GenericXAIClient } from "./xai";
|
|
29
40
|
import { KnowhowGenericClient } from "./knowhow";
|
|
30
41
|
import { loadKnowhowJwt } from "../services/KnowhowClient";
|
|
42
|
+
import { ContextLimits } from "./contextLimits";
|
|
43
|
+
|
|
44
|
+
export type ModelModality = "completion" | "embedding" | "image" | "audio" | "video";
|
|
31
45
|
|
|
32
46
|
function envCheck(key: string): boolean {
|
|
33
47
|
const value = process.env[key];
|
|
@@ -85,6 +99,39 @@ export class AIClient {
|
|
|
85
99
|
...(envCheck("XAI_API_KEY") && { xai: this.completionModels.xai }),
|
|
86
100
|
};
|
|
87
101
|
|
|
102
|
+
imageModels: Record<string, string[]> = {
|
|
103
|
+
...(envCheck("OPENAI_KEY") && {
|
|
104
|
+
openai: OpenAiImageModels,
|
|
105
|
+
}),
|
|
106
|
+
...(envCheck("GEMINI_API_KEY") && {
|
|
107
|
+
google: GoogleImageModels,
|
|
108
|
+
}),
|
|
109
|
+
...(envCheck("XAI_API_KEY") && {
|
|
110
|
+
xai: XaiImageModels,
|
|
111
|
+
}),
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
audioModels: Record<string, string[]> = {
|
|
115
|
+
...(envCheck("OPENAI_KEY") && {
|
|
116
|
+
openai: [...OpenAiTTSModels, ...OpenAiTranscriptionModels],
|
|
117
|
+
}),
|
|
118
|
+
...(envCheck("GEMINI_API_KEY") && {
|
|
119
|
+
google: GoogleTTSModels,
|
|
120
|
+
}),
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
videoModels: Record<string, string[]> = {
|
|
124
|
+
...(envCheck("OPENAI_KEY") && {
|
|
125
|
+
openai: OpenAiVideoModels,
|
|
126
|
+
}),
|
|
127
|
+
...(envCheck("GEMINI_API_KEY") && {
|
|
128
|
+
google: GoogleVideoModels,
|
|
129
|
+
}),
|
|
130
|
+
...(envCheck("XAI_API_KEY") && {
|
|
131
|
+
xai: XaiVideoModels,
|
|
132
|
+
}),
|
|
133
|
+
};
|
|
134
|
+
|
|
88
135
|
getClient(provider: string, model?: string) {
|
|
89
136
|
if (provider && !model) {
|
|
90
137
|
return { client: this.clients[provider], provider, model: undefined };
|
|
@@ -215,6 +262,39 @@ export class AIClient {
|
|
|
215
262
|
);
|
|
216
263
|
}
|
|
217
264
|
|
|
265
|
+
registerImageModels(provider: string, models: string[]) {
|
|
266
|
+
const currentModels = this.clientModels[provider] || [];
|
|
267
|
+
const currentImageModels = this.imageModels[provider] || [];
|
|
268
|
+
this.clientModels[provider] = Array.from<string>(
|
|
269
|
+
new Set(currentModels.concat(models))
|
|
270
|
+
);
|
|
271
|
+
this.imageModels[provider] = Array.from<string>(
|
|
272
|
+
new Set(currentImageModels.concat(models))
|
|
273
|
+
);
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
registerAudioModels(provider: string, models: string[]) {
|
|
277
|
+
const currentModels = this.clientModels[provider] || [];
|
|
278
|
+
const currentAudioModels = this.audioModels[provider] || [];
|
|
279
|
+
this.clientModels[provider] = Array.from<string>(
|
|
280
|
+
new Set(currentModels.concat(models))
|
|
281
|
+
);
|
|
282
|
+
this.audioModels[provider] = Array.from<string>(
|
|
283
|
+
new Set(currentAudioModels.concat(models))
|
|
284
|
+
);
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
registerVideoModels(provider: string, models: string[]) {
|
|
288
|
+
const currentModels = this.clientModels[provider] || [];
|
|
289
|
+
const currentVideoModels = this.videoModels[provider] || [];
|
|
290
|
+
this.clientModels[provider] = Array.from<string>(
|
|
291
|
+
new Set(currentModels.concat(models))
|
|
292
|
+
);
|
|
293
|
+
this.videoModels[provider] = Array.from<string>(
|
|
294
|
+
new Set(currentVideoModels.concat(models))
|
|
295
|
+
);
|
|
296
|
+
}
|
|
297
|
+
|
|
218
298
|
providerHasModel(provider: string, model: string): boolean {
|
|
219
299
|
const models = this.clientModels[provider];
|
|
220
300
|
if (!models) return false;
|
|
@@ -517,6 +597,37 @@ export class AIClient {
|
|
|
517
597
|
listAllProviders() {
|
|
518
598
|
return Object.keys(this.clientModels);
|
|
519
599
|
}
|
|
600
|
+
|
|
601
|
+
listAllImageModels() {
|
|
602
|
+
return this.imageModels;
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
listAllAudioModels() {
|
|
606
|
+
return this.audioModels;
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
listAllVideoModels() {
|
|
610
|
+
return this.videoModels;
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
/**
|
|
614
|
+
* Returns the context window limit (in tokens) for a given model.
|
|
615
|
+
* Optionally accepts a provider for future provider-specific overrides.
|
|
616
|
+
* Delegates to the registered client's getContextLimit() if available,
|
|
617
|
+
* so custom clients can provide their own context limits.
|
|
618
|
+
* Returns undefined if neither the client nor the global ContextLimits table knows the model.
|
|
619
|
+
*/
|
|
620
|
+
getContextLimit(provider: string, model: string): { contextLimit: number; threshold: number } | undefined {
|
|
621
|
+
// Try the registered client first
|
|
622
|
+
const client = this.clients[provider];
|
|
623
|
+
if (client?.getContextLimit) {
|
|
624
|
+
return client.getContextLimit(model);
|
|
625
|
+
}
|
|
626
|
+
// Fall back to the global ContextLimits table
|
|
627
|
+
const contextLimit = ContextLimits[model];
|
|
628
|
+
if (contextLimit === undefined) return undefined;
|
|
629
|
+
return { contextLimit, threshold: contextLimit };
|
|
630
|
+
}
|
|
520
631
|
}
|
|
521
632
|
|
|
522
633
|
export const Clients = new AIClient();
|
|
@@ -528,5 +639,6 @@ export * from "./openai";
|
|
|
528
639
|
export * from "./anthropic";
|
|
529
640
|
export * from "./knowhow";
|
|
530
641
|
export * from "./gemini";
|
|
642
|
+
export * from "./contextLimits";
|
|
531
643
|
export * from "./xai";
|
|
532
644
|
export * from "./knowhowMcp";
|
package/src/clients/openai.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import OpenAI from "openai";
|
|
2
2
|
import { getConfigSync } from "../config";
|
|
3
3
|
import { OpenAiTextPricing } from "./pricing";
|
|
4
|
+
import { ContextLimits } from "./contextLimits";
|
|
4
5
|
import {
|
|
5
6
|
GenericClient,
|
|
6
7
|
CompletionOptions,
|
|
@@ -429,4 +430,14 @@ export class GenericOpenAiClient implements GenericClient {
|
|
|
429
430
|
const data = Buffer.from(await response.arrayBuffer());
|
|
430
431
|
return { data, mimeType };
|
|
431
432
|
}
|
|
433
|
+
|
|
434
|
+
getContextLimit(model: string): { contextLimit: number; threshold: number } | undefined {
|
|
435
|
+
const contextLimit = ContextLimits[model];
|
|
436
|
+
if (contextLimit === undefined) return undefined;
|
|
437
|
+
const pricing = OpenAiTextPricing[model];
|
|
438
|
+
// If the model has tiered pricing above 200k tokens, use 200k as the threshold
|
|
439
|
+
const threshold =
|
|
440
|
+
pricing && "input_gt_200k" in pricing ? 200_000 : contextLimit;
|
|
441
|
+
return { contextLimit, threshold };
|
|
442
|
+
}
|
|
432
443
|
}
|
|
@@ -3,19 +3,15 @@ import { Models } from "../../types";
|
|
|
3
3
|
export const AnthropicTextPricing = {
|
|
4
4
|
[Models.anthropic.Opus4_6]: {
|
|
5
5
|
input: 5.0,
|
|
6
|
-
input_gt_200k: 10.0,
|
|
7
6
|
cache_write: 6.25,
|
|
8
7
|
cache_hit: 0.5,
|
|
9
8
|
output: 25.0,
|
|
10
|
-
output_gt_200k: 37.5,
|
|
11
9
|
},
|
|
12
10
|
[Models.anthropic.Sonnet4_6]: {
|
|
13
11
|
input: 3.0,
|
|
14
|
-
input_gt_200k: 6.0,
|
|
15
12
|
cache_write: 3.75,
|
|
16
13
|
cache_hit: 0.3,
|
|
17
14
|
output: 15.0,
|
|
18
|
-
output_gt_200k: 22.5,
|
|
19
15
|
},
|
|
20
16
|
[Models.anthropic.Opus4_5]: {
|
|
21
17
|
input: 5.0,
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import { Models, EmbeddingModels } from "../../types";
|
|
2
2
|
|
|
3
3
|
export const GeminiTextPricing = {
|
|
4
|
-
|
|
4
|
+
// Gemini 3.x
|
|
5
|
+
[Models.google.Gemini_31_Pro_Preview]: {
|
|
5
6
|
input: 2,
|
|
6
7
|
input_gt_200k: 4,
|
|
7
8
|
output: 12,
|
|
@@ -9,6 +10,45 @@ export const GeminiTextPricing = {
|
|
|
9
10
|
context_caching: 0.2,
|
|
10
11
|
context_caching_gt_200k: 0.4,
|
|
11
12
|
},
|
|
13
|
+
[Models.google.Gemini_31_Flash_Image_Preview]: {
|
|
14
|
+
input: 0.5,
|
|
15
|
+
output: 3,
|
|
16
|
+
image_generation: 0.045, // per 0.5K image
|
|
17
|
+
},
|
|
18
|
+
[Models.google.Gemini_31_Flash_Lite_Preview]: {
|
|
19
|
+
input: 0.25,
|
|
20
|
+
output: 1.5,
|
|
21
|
+
context_caching: 0.025,
|
|
22
|
+
},
|
|
23
|
+
[Models.google.Gemini_3_Flash_Preview]: {
|
|
24
|
+
input: 0.5,
|
|
25
|
+
output: 3.0,
|
|
26
|
+
context_caching: 0.05,
|
|
27
|
+
},
|
|
28
|
+
[Models.google.Gemini_3_Pro_Image_Preview]: {
|
|
29
|
+
input: 2,
|
|
30
|
+
output: 12,
|
|
31
|
+
image_generation: 0.134, // per 1K/2K image
|
|
32
|
+
},
|
|
33
|
+
// Gemini 2.5
|
|
34
|
+
[Models.google.Gemini_25_Pro]: {
|
|
35
|
+
input: 1.25,
|
|
36
|
+
input_gt_200k: 2.5,
|
|
37
|
+
output: 10.0,
|
|
38
|
+
output_gt_200k: 15.0,
|
|
39
|
+
context_caching: 0.125,
|
|
40
|
+
context_caching_gt_200k: 0.25,
|
|
41
|
+
},
|
|
42
|
+
[Models.google.Gemini_25_Flash]: {
|
|
43
|
+
input: 0.3,
|
|
44
|
+
output: 2.5,
|
|
45
|
+
context_caching: 0.03,
|
|
46
|
+
},
|
|
47
|
+
[Models.google.Gemini_25_Flash_Lite]: {
|
|
48
|
+
input: 0.1,
|
|
49
|
+
output: 0.4,
|
|
50
|
+
context_caching: 0.01,
|
|
51
|
+
},
|
|
12
52
|
[Models.google.Gemini_25_Flash_Preview]: {
|
|
13
53
|
input: 0.3,
|
|
14
54
|
output: 2.5,
|
|
@@ -23,6 +63,19 @@ export const GeminiTextPricing = {
|
|
|
23
63
|
context_caching: 0.125,
|
|
24
64
|
context_caching_gt_200k: 0.25,
|
|
25
65
|
},
|
|
66
|
+
[Models.google.Gemini_25_Flash_Image]: {
|
|
67
|
+
input: 0.3,
|
|
68
|
+
output: 0.039, // per image ($30/1M tokens, 1290 tokens per image)
|
|
69
|
+
},
|
|
70
|
+
[Models.google.Gemini_25_Flash_TTS]: {
|
|
71
|
+
input: 0.5,
|
|
72
|
+
output: 10.0,
|
|
73
|
+
},
|
|
74
|
+
[Models.google.Gemini_25_Pro_TTS]: {
|
|
75
|
+
input: 1.0,
|
|
76
|
+
output: 20.0,
|
|
77
|
+
},
|
|
78
|
+
// Gemini 2.0 (deprecated)
|
|
26
79
|
[Models.google.Gemini_20_Flash]: {
|
|
27
80
|
input: 0.1,
|
|
28
81
|
output: 0.4,
|
|
@@ -37,6 +90,7 @@ export const GeminiTextPricing = {
|
|
|
37
90
|
input: 0.075,
|
|
38
91
|
output: 0.3,
|
|
39
92
|
},
|
|
93
|
+
// Gemini 1.5 (legacy)
|
|
40
94
|
[Models.google.Gemini_15_Flash]: {
|
|
41
95
|
input: 0.075,
|
|
42
96
|
output: 0.3,
|
|
@@ -52,14 +106,39 @@ export const GeminiTextPricing = {
|
|
|
52
106
|
output: 5.0,
|
|
53
107
|
context_caching: 0.3125,
|
|
54
108
|
},
|
|
109
|
+
// Image generation
|
|
55
110
|
[Models.google.Imagen_3]: {
|
|
56
|
-
image_generation: 0.
|
|
111
|
+
image_generation: 0.04, // Imagen 4 Standard: $0.04/image
|
|
112
|
+
},
|
|
113
|
+
[Models.google.Imagen_4_Fast]: {
|
|
114
|
+
image_generation: 0.02, // $0.02/image
|
|
57
115
|
},
|
|
116
|
+
[Models.google.Imagen_4_Ultra]: {
|
|
117
|
+
image_generation: 0.06, // $0.06/image
|
|
118
|
+
},
|
|
119
|
+
// Video generation
|
|
58
120
|
[Models.google.Veo_2]: {
|
|
59
121
|
video_generation: 0.35,
|
|
60
122
|
},
|
|
123
|
+
[Models.google.Veo_3]: {
|
|
124
|
+
video_generation: 0.4, // $0.40/second
|
|
125
|
+
},
|
|
126
|
+
[Models.google.Veo_3_Fast]: {
|
|
127
|
+
video_generation: 0.15, // $0.15/second
|
|
128
|
+
},
|
|
129
|
+
[Models.google.Veo_3_1]: {
|
|
130
|
+
video_generation: 0.4, // $0.40/second (720p/1080p)
|
|
131
|
+
},
|
|
132
|
+
[Models.google.Veo_3_1_Fast]: {
|
|
133
|
+
video_generation: 0.15, // $0.15/second
|
|
134
|
+
},
|
|
135
|
+
// Embeddings
|
|
61
136
|
[EmbeddingModels.google.Gemini_Embedding]: {
|
|
62
137
|
input: 0, // Free of charge
|
|
63
138
|
output: 0, // Free of charge
|
|
64
139
|
},
|
|
140
|
+
[EmbeddingModels.google.Gemini_Embedding_001]: {
|
|
141
|
+
input: 0.15,
|
|
142
|
+
output: 0,
|
|
143
|
+
},
|
|
65
144
|
};
|
|
@@ -1,6 +1,36 @@
|
|
|
1
1
|
import { Models, EmbeddingModels } from "../../types";
|
|
2
2
|
|
|
3
3
|
export const OpenAiTextPricing = {
|
|
4
|
+
[Models.openai.GPT_54]: {
|
|
5
|
+
input: 2.5,
|
|
6
|
+
cached_input: 0.25,
|
|
7
|
+
output: 15.0,
|
|
8
|
+
},
|
|
9
|
+
[Models.openai.GPT_54_Mini]: {
|
|
10
|
+
input: 0.75,
|
|
11
|
+
cached_input: 0.075,
|
|
12
|
+
output: 4.5,
|
|
13
|
+
},
|
|
14
|
+
[Models.openai.GPT_54_Nano]: {
|
|
15
|
+
input: 0.2,
|
|
16
|
+
cached_input: 0.02,
|
|
17
|
+
output: 1.25,
|
|
18
|
+
},
|
|
19
|
+
[Models.openai.GPT_54_Pro]: {
|
|
20
|
+
input: 30.0,
|
|
21
|
+
cached_input: 0,
|
|
22
|
+
output: 180.0,
|
|
23
|
+
},
|
|
24
|
+
[Models.openai.GPT_53_Chat]: {
|
|
25
|
+
input: 1.75,
|
|
26
|
+
cached_input: 0.175,
|
|
27
|
+
output: 14.0,
|
|
28
|
+
},
|
|
29
|
+
[Models.openai.GPT_53_Codex]: {
|
|
30
|
+
input: 1.75,
|
|
31
|
+
cached_input: 0.175,
|
|
32
|
+
output: 14.0,
|
|
33
|
+
},
|
|
4
34
|
[Models.openai.GPT_4o]: {
|
|
5
35
|
input: 2.5,
|
|
6
36
|
cached_input: 1.25,
|
|
@@ -76,6 +106,11 @@ export const OpenAiTextPricing = {
|
|
|
76
106
|
cached_input: 0.5,
|
|
77
107
|
output: 8.0,
|
|
78
108
|
},
|
|
109
|
+
[Models.openai.o3_Pro]: {
|
|
110
|
+
input: 20.0,
|
|
111
|
+
cached_input: 0,
|
|
112
|
+
output: 80.0,
|
|
113
|
+
},
|
|
79
114
|
[Models.openai.o4_Mini]: {
|
|
80
115
|
input: 1.1,
|
|
81
116
|
cached_input: 0.275,
|
|
@@ -131,4 +166,37 @@ export const OpenAiTextPricing = {
|
|
|
131
166
|
cached_input: 0,
|
|
132
167
|
output: 0,
|
|
133
168
|
},
|
|
169
|
+
// New realtime models
|
|
170
|
+
[Models.openai.GPT_Realtime_15]: {
|
|
171
|
+
input: 4.0,
|
|
172
|
+
cached_input: 0.4,
|
|
173
|
+
output: 16.0,
|
|
174
|
+
},
|
|
175
|
+
[Models.openai.GPT_Realtime_Mini]: {
|
|
176
|
+
input: 0.6,
|
|
177
|
+
cached_input: 0.06,
|
|
178
|
+
output: 2.4,
|
|
179
|
+
},
|
|
180
|
+
// New image models (text token pricing)
|
|
181
|
+
[Models.openai.GPT_Image_15]: {
|
|
182
|
+
input: 5.0,
|
|
183
|
+
cached_input: 1.25,
|
|
184
|
+
output: 10.0,
|
|
185
|
+
},
|
|
186
|
+
[Models.openai.GPT_Image_1_Mini]: {
|
|
187
|
+
input: 2.0,
|
|
188
|
+
cached_input: 0.2,
|
|
189
|
+
output: 0,
|
|
190
|
+
},
|
|
191
|
+
// New transcription models
|
|
192
|
+
[Models.openai.GPT_4o_Transcribe]: {
|
|
193
|
+
input: 2.5,
|
|
194
|
+
cached_input: 0,
|
|
195
|
+
output: 10.0,
|
|
196
|
+
},
|
|
197
|
+
[Models.openai.GPT_4o_Mini_Transcribe]: {
|
|
198
|
+
input: 1.25,
|
|
199
|
+
cached_input: 0,
|
|
200
|
+
output: 5.0,
|
|
201
|
+
},
|
|
134
202
|
};
|
package/src/clients/types.ts
CHANGED
|
@@ -258,4 +258,12 @@ export interface GenericClient {
|
|
|
258
258
|
/** Download a file from the provider's file storage */
|
|
259
259
|
downloadFile?(options: FileDownloadOptions): Promise<FileDownloadResponse>;
|
|
260
260
|
getModels(): Promise<{ id: string }[]>;
|
|
261
|
+
/**
|
|
262
|
+
* Returns the context window limit and compression threshold for a given model,
|
|
263
|
+
* or undefined if the model is not known to this client.
|
|
264
|
+
* - contextLimit: the maximum number of tokens the model can handle
|
|
265
|
+
* - threshold: the point at which compression should kick in; equals contextLimit
|
|
266
|
+
* unless the model has tiered pricing (input_gt_200k), in which case it is 200_000
|
|
267
|
+
*/
|
|
268
|
+
getContextLimit?(model: string): { contextLimit: number; threshold: number } | undefined;
|
|
261
269
|
}
|
package/src/clients/xai.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import OpenAI from "openai";
|
|
2
2
|
import { XaiTextPricing, XaiImagePricing, XaiVideoPricing } from "./pricing";
|
|
3
|
+
import { ContextLimits } from "./contextLimits";
|
|
3
4
|
import {
|
|
4
5
|
GenericClient,
|
|
5
6
|
CompletionOptions,
|
|
@@ -392,4 +393,14 @@ export class GenericXAIClient implements GenericClient {
|
|
|
392
393
|
mimeType,
|
|
393
394
|
};
|
|
394
395
|
}
|
|
396
|
+
|
|
397
|
+
getContextLimit(model: string): { contextLimit: number; threshold: number } | undefined {
|
|
398
|
+
const contextLimit = ContextLimits[model];
|
|
399
|
+
if (contextLimit === undefined) return undefined;
|
|
400
|
+
const pricing = XaiTextPricing[model];
|
|
401
|
+
// If the model has tiered pricing above 200k tokens, use 200k as the threshold
|
|
402
|
+
const threshold =
|
|
403
|
+
pricing && "input_gt_200k" in pricing ? 200_000 : contextLimit;
|
|
404
|
+
return { contextLimit, threshold };
|
|
405
|
+
}
|
|
395
406
|
}
|