@tyvm/knowhow 0.0.84 → 0.0.86

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/package.json +1 -1
  2. package/src/agents/base/base.ts +99 -37
  3. package/src/agents/researcher/researcher.ts +1 -2
  4. package/src/agents/tools/aiClient.ts +48 -0
  5. package/src/agents/tools/list.ts +57 -0
  6. package/src/chat/CliChatService.ts +3 -1
  7. package/src/chat/modules/AgentModule.ts +20 -31
  8. package/src/chat/types.ts +1 -0
  9. package/src/cli.ts +19 -10
  10. package/src/clients/anthropic.ts +11 -0
  11. package/src/clients/contextLimits.ts +106 -0
  12. package/src/clients/gemini.ts +11 -0
  13. package/src/clients/index.ts +112 -0
  14. package/src/clients/openai.ts +11 -0
  15. package/src/clients/pricing/anthropic.ts +0 -4
  16. package/src/clients/pricing/google.ts +81 -2
  17. package/src/clients/pricing/openai.ts +68 -0
  18. package/src/clients/types.ts +8 -0
  19. package/src/clients/xai.ts +11 -0
  20. package/src/types.ts +79 -7
  21. package/tests/clients/pricing.test.ts +144 -0
  22. package/ts_build/package.json +1 -1
  23. package/ts_build/src/agents/base/base.d.ts +4 -0
  24. package/ts_build/src/agents/base/base.js +53 -28
  25. package/ts_build/src/agents/base/base.js.map +1 -1
  26. package/ts_build/src/agents/researcher/researcher.js +1 -1
  27. package/ts_build/src/agents/researcher/researcher.js.map +1 -1
  28. package/ts_build/src/agents/tools/aiClient.d.ts +3 -0
  29. package/ts_build/src/agents/tools/aiClient.js +31 -1
  30. package/ts_build/src/agents/tools/aiClient.js.map +1 -1
  31. package/ts_build/src/agents/tools/list.js +48 -0
  32. package/ts_build/src/agents/tools/list.js.map +1 -1
  33. package/ts_build/src/chat/CliChatService.js.map +1 -1
  34. package/ts_build/src/chat/modules/AgentModule.d.ts +1 -4
  35. package/ts_build/src/chat/modules/AgentModule.js +12 -15
  36. package/ts_build/src/chat/modules/AgentModule.js.map +1 -1
  37. package/ts_build/src/chat/types.d.ts +1 -0
  38. package/ts_build/src/cli.js +3 -2
  39. package/ts_build/src/cli.js.map +1 -1
  40. package/ts_build/src/clients/anthropic.d.ts +8 -4
  41. package/ts_build/src/clients/anthropic.js +9 -0
  42. package/ts_build/src/clients/anthropic.js.map +1 -1
  43. package/ts_build/src/clients/contextLimits.d.ts +3 -0
  44. package/ts_build/src/clients/contextLimits.js +88 -0
  45. package/ts_build/src/clients/contextLimits.js.map +1 -0
  46. package/ts_build/src/clients/gemini.d.ts +14 -10
  47. package/ts_build/src/clients/gemini.js +9 -0
  48. package/ts_build/src/clients/gemini.js.map +1 -1
  49. package/ts_build/src/clients/index.d.ts +15 -0
  50. package/ts_build/src/clients/index.js +70 -0
  51. package/ts_build/src/clients/index.js.map +1 -1
  52. package/ts_build/src/clients/openai.d.ts +4 -0
  53. package/ts_build/src/clients/openai.js +9 -0
  54. package/ts_build/src/clients/openai.js.map +1 -1
  55. package/ts_build/src/clients/pricing/anthropic.d.ts +4 -4
  56. package/ts_build/src/clients/pricing/anthropic.js +0 -4
  57. package/ts_build/src/clients/pricing/anthropic.js.map +1 -1
  58. package/ts_build/src/clients/pricing/google.d.ts +10 -10
  59. package/ts_build/src/clients/pricing/google.js +74 -2
  60. package/ts_build/src/clients/pricing/google.js.map +1 -1
  61. package/ts_build/src/clients/pricing/openai.js +65 -0
  62. package/ts_build/src/clients/pricing/openai.js.map +1 -1
  63. package/ts_build/src/clients/types.d.ts +4 -0
  64. package/ts_build/src/clients/xai.d.ts +4 -0
  65. package/ts_build/src/clients/xai.js +9 -0
  66. package/ts_build/src/clients/xai.js.map +1 -1
  67. package/ts_build/src/types.d.ts +33 -4
  68. package/ts_build/src/types.js +73 -5
  69. package/ts_build/src/types.js.map +1 -1
  70. package/ts_build/tests/clients/pricing.test.d.ts +1 -0
  71. package/ts_build/tests/clients/pricing.test.js +90 -0
  72. package/ts_build/tests/clients/pricing.test.js.map +1 -0
@@ -0,0 +1,106 @@
1
+ import { Models, EmbeddingModels } from "../types";
2
+
3
+ /**
4
+ * Context window limits (in tokens) for all supported models.
5
+ * Sources:
6
+ * - OpenAI: https://platform.openai.com/docs/models
7
+ * - Anthropic: https://docs.anthropic.com/en/docs/about-claude/models
8
+ * - Google: https://ai.google.dev/gemini-api/docs/models
9
+ * - xAI: https://docs.x.ai/developers/models
10
+ */
11
+ export const ContextLimits: Record<string, number> = {
12
+ // ─── OpenAI ───────────────────────────────────────────────────────────────
13
+ [Models.openai.GPT_54]: 1_000_000,
14
+ [Models.openai.GPT_54_Mini]: 400_000,
15
+ [Models.openai.GPT_54_Nano]: 400_000,
16
+ [Models.openai.GPT_54_Pro]: 1_000_000,
17
+ [Models.openai.GPT_53_Chat]: 1_000_000,
18
+ [Models.openai.GPT_53_Codex]: 1_000_000,
19
+ [Models.openai.GPT_5]: 1_000_000,
20
+ [Models.openai.GPT_5_Mini]: 1_000_000,
21
+ [Models.openai.GPT_5_Nano]: 1_000_000,
22
+ [Models.openai.GPT_5_1]: 1_000_000,
23
+ [Models.openai.GPT_5_2]: 1_000_000,
24
+ [Models.openai.GPT_41]: 1_047_576,
25
+ [Models.openai.GPT_41_Mini]: 1_047_576,
26
+ [Models.openai.GPT_41_Nano]: 1_047_576,
27
+ [Models.openai.GPT_45]: 128_000,
28
+ [Models.openai.GPT_4o]: 128_000,
29
+ [Models.openai.GPT_4o_Mini]: 128_000,
30
+ [Models.openai.GPT_4o_Audio]: 128_000,
31
+ [Models.openai.GPT_4o_Realtime]: 128_000,
32
+ [Models.openai.GPT_4o_Mini_Audio]: 128_000,
33
+ [Models.openai.GPT_4o_Mini_Realtime]: 128_000,
34
+ [Models.openai.GPT_4o_Mini_Search]: 128_000,
35
+ [Models.openai.GPT_4o_Search]: 128_000,
36
+ [Models.openai.o1]: 200_000,
37
+ [Models.openai.o1_Mini]: 128_000,
38
+ [Models.openai.o1_Pro]: 200_000,
39
+ [Models.openai.o3]: 200_000,
40
+ [Models.openai.o3_Pro]: 200_000,
41
+ [Models.openai.o3_Mini]: 200_000,
42
+ [Models.openai.o4_Mini]: 200_000,
43
+
44
+ // ─── Anthropic ────────────────────────────────────────────────────────────
45
+ [Models.anthropic.Opus4_6]: 1_000_000,
46
+ [Models.anthropic.Sonnet4_6]: 1_000_000,
47
+ [Models.anthropic.Opus4_5]: 1_000_000,
48
+ [Models.anthropic.Opus4]: 200_000,
49
+ [Models.anthropic.Opus4_1]: 200_000,
50
+ [Models.anthropic.Sonnet4]: 200_000,
51
+ [Models.anthropic.Sonnet4_5]: 200_000,
52
+ [Models.anthropic.Haiku4_5]: 200_000,
53
+ [Models.anthropic.Sonnet3_7]: 200_000,
54
+ [Models.anthropic.Sonnet3_5]: 200_000,
55
+ [Models.anthropic.Haiku3_5]: 200_000,
56
+ [Models.anthropic.Opus3]: 200_000,
57
+ [Models.anthropic.Haiku3]: 200_000,
58
+
59
+ // ─── Google ───────────────────────────────────────────────────────────────
60
+ [Models.google.Gemini_31_Pro_Preview]: 1_000_000,
61
+ [Models.google.Gemini_31_Flash_Image_Preview]: 1_000_000,
62
+ [Models.google.Gemini_31_Flash_Lite_Preview]: 1_000_000,
63
+ [Models.google.Gemini_3_Flash_Preview]: 1_000_000,
64
+ [Models.google.Gemini_3_Pro_Image_Preview]: 1_000_000,
65
+ [Models.google.Gemini_25_Pro]: 1_000_000,
66
+ [Models.google.Gemini_25_Flash]: 1_000_000,
67
+ [Models.google.Gemini_25_Flash_Lite]: 1_000_000,
68
+ [Models.google.Gemini_25_Flash_Preview]: 1_000_000,
69
+ [Models.google.Gemini_25_Pro_Preview]: 1_000_000,
70
+ [Models.google.Gemini_25_Flash_Image]: 1_000_000,
71
+ [Models.google.Gemini_25_Flash_Live]: 1_000_000,
72
+ [Models.google.Gemini_25_Flash_Native_Audio]: 1_000_000,
73
+ [Models.google.Gemini_25_Flash_TTS]: 1_000_000,
74
+ [Models.google.Gemini_25_Pro_TTS]: 1_000_000,
75
+ [Models.google.Gemini_20_Flash]: 1_000_000,
76
+ [Models.google.Gemini_20_Flash_Preview_Image_Generation]: 1_000_000,
77
+ [Models.google.Gemini_20_Flash_Lite]: 1_000_000,
78
+ [Models.google.Gemini_20_Flash_Live]: 1_000_000,
79
+ [Models.google.Gemini_20_Flash_TTS]: 1_000_000,
80
+ [Models.google.Gemini_15_Flash]: 1_000_000,
81
+ [Models.google.Gemini_15_Flash_8B]: 1_000_000,
82
+ [Models.google.Gemini_15_Pro]: 2_000_000,
83
+
84
+ // ─── xAI ──────────────────────────────────────────────────────────────────
85
+ [Models.xai.Grok4_1_Fast_Reasoning]: 2_000_000,
86
+ [Models.xai.Grok4_1_Fast_NonReasoning]: 2_000_000,
87
+ [Models.xai.GrokCodeFast]: 2_000_000,
88
+ [Models.xai.Grok4]: 131_072,
89
+ [Models.xai.Grok3Beta]: 131_072,
90
+ [Models.xai.Grok3MiniBeta]: 131_072,
91
+ [Models.xai.Grok3FastBeta]: 131_072,
92
+ [Models.xai.Grok3MiniFastBeta]: 131_072,
93
+ [Models.xai.Grok21212]: 131_072,
94
+ [Models.xai.Grok2Vision1212]: 131_072,
95
+ };
96
+
97
+ /** Default fallback context window limit (tokens) used when a model is not found. */
98
+ export const DEFAULT_CONTEXT_LIMIT = 30_000;
99
+
100
+ /**
101
+ * Returns the context window limit (in tokens) for a given model.
102
+ * Falls back to DEFAULT_CONTEXT_LIMIT if the model is not recognized.
103
+ */
104
+ export function getModelContextLimit(model: string): number {
105
+ return ContextLimits[model] ?? DEFAULT_CONTEXT_LIMIT;
106
+ }
@@ -15,6 +15,7 @@ import * as pathSync from "path";
15
15
  import { wait } from "../utils";
16
16
  import { EmbeddingModels, Models } from "../types";
17
17
  import { GeminiTextPricing } from "./pricing";
18
+ import { ContextLimits } from "./contextLimits";
18
19
 
19
20
  import {
20
21
  GenericClient,
@@ -916,4 +917,14 @@ export class GenericGeminiClient implements GenericClient {
916
917
  throw error;
917
918
  }
918
919
  }
920
+
921
+ getContextLimit(model: string): { contextLimit: number; threshold: number } | undefined {
922
+ const contextLimit = ContextLimits[model];
923
+ if (contextLimit === undefined) return undefined;
924
+ const pricing = GeminiTextPricing[model];
925
+ // If the model has tiered pricing above 200k tokens, use 200k as the threshold
926
+ const threshold =
927
+ pricing && "input_gt_200k" in pricing ? 200_000 : contextLimit;
928
+ return { contextLimit, threshold };
929
+ }
919
930
  }
@@ -25,9 +25,23 @@ import { GenericGeminiClient } from "./gemini";
25
25
  import { HttpClient } from "./http";
26
26
  import { EmbeddingModels, Models } from "../types";
27
27
  import { getConfig } from "../config";
28
+ import {
29
+ GoogleImageModels,
30
+ GoogleVideoModels,
31
+ GoogleTTSModels,
32
+ OpenAiImageModels,
33
+ OpenAiVideoModels,
34
+ OpenAiTTSModels,
35
+ OpenAiTranscriptionModels,
36
+ XaiImageModels,
37
+ XaiVideoModels,
38
+ } from "../types";
28
39
  import { GenericXAIClient } from "./xai";
29
40
  import { KnowhowGenericClient } from "./knowhow";
30
41
  import { loadKnowhowJwt } from "../services/KnowhowClient";
42
+ import { ContextLimits } from "./contextLimits";
43
+
44
+ export type ModelModality = "completion" | "embedding" | "image" | "audio" | "video";
31
45
 
32
46
  function envCheck(key: string): boolean {
33
47
  const value = process.env[key];
@@ -85,6 +99,39 @@ export class AIClient {
85
99
  ...(envCheck("XAI_API_KEY") && { xai: this.completionModels.xai }),
86
100
  };
87
101
 
102
+ imageModels: Record<string, string[]> = {
103
+ ...(envCheck("OPENAI_KEY") && {
104
+ openai: OpenAiImageModels,
105
+ }),
106
+ ...(envCheck("GEMINI_API_KEY") && {
107
+ google: GoogleImageModels,
108
+ }),
109
+ ...(envCheck("XAI_API_KEY") && {
110
+ xai: XaiImageModels,
111
+ }),
112
+ };
113
+
114
+ audioModels: Record<string, string[]> = {
115
+ ...(envCheck("OPENAI_KEY") && {
116
+ openai: [...OpenAiTTSModels, ...OpenAiTranscriptionModels],
117
+ }),
118
+ ...(envCheck("GEMINI_API_KEY") && {
119
+ google: GoogleTTSModels,
120
+ }),
121
+ };
122
+
123
+ videoModels: Record<string, string[]> = {
124
+ ...(envCheck("OPENAI_KEY") && {
125
+ openai: OpenAiVideoModels,
126
+ }),
127
+ ...(envCheck("GEMINI_API_KEY") && {
128
+ google: GoogleVideoModels,
129
+ }),
130
+ ...(envCheck("XAI_API_KEY") && {
131
+ xai: XaiVideoModels,
132
+ }),
133
+ };
134
+
88
135
  getClient(provider: string, model?: string) {
89
136
  if (provider && !model) {
90
137
  return { client: this.clients[provider], provider, model: undefined };
@@ -215,6 +262,39 @@ export class AIClient {
215
262
  );
216
263
  }
217
264
 
265
+ registerImageModels(provider: string, models: string[]) {
266
+ const currentModels = this.clientModels[provider] || [];
267
+ const currentImageModels = this.imageModels[provider] || [];
268
+ this.clientModels[provider] = Array.from<string>(
269
+ new Set(currentModels.concat(models))
270
+ );
271
+ this.imageModels[provider] = Array.from<string>(
272
+ new Set(currentImageModels.concat(models))
273
+ );
274
+ }
275
+
276
+ registerAudioModels(provider: string, models: string[]) {
277
+ const currentModels = this.clientModels[provider] || [];
278
+ const currentAudioModels = this.audioModels[provider] || [];
279
+ this.clientModels[provider] = Array.from<string>(
280
+ new Set(currentModels.concat(models))
281
+ );
282
+ this.audioModels[provider] = Array.from<string>(
283
+ new Set(currentAudioModels.concat(models))
284
+ );
285
+ }
286
+
287
+ registerVideoModels(provider: string, models: string[]) {
288
+ const currentModels = this.clientModels[provider] || [];
289
+ const currentVideoModels = this.videoModels[provider] || [];
290
+ this.clientModels[provider] = Array.from<string>(
291
+ new Set(currentModels.concat(models))
292
+ );
293
+ this.videoModels[provider] = Array.from<string>(
294
+ new Set(currentVideoModels.concat(models))
295
+ );
296
+ }
297
+
218
298
  providerHasModel(provider: string, model: string): boolean {
219
299
  const models = this.clientModels[provider];
220
300
  if (!models) return false;
@@ -517,6 +597,37 @@ export class AIClient {
517
597
  listAllProviders() {
518
598
  return Object.keys(this.clientModels);
519
599
  }
600
+
601
+ listAllImageModels() {
602
+ return this.imageModels;
603
+ }
604
+
605
+ listAllAudioModels() {
606
+ return this.audioModels;
607
+ }
608
+
609
+ listAllVideoModels() {
610
+ return this.videoModels;
611
+ }
612
+
613
+ /**
614
+ * Returns the context window limit (in tokens) for a given model.
615
+ * Optionally accepts a provider for future provider-specific overrides.
616
+ * Delegates to the registered client's getContextLimit() if available,
617
+ * so custom clients can provide their own context limits.
618
+ * Returns undefined if neither the client nor the global ContextLimits table knows the model.
619
+ */
620
+ getContextLimit(provider: string, model: string): { contextLimit: number; threshold: number } | undefined {
621
+ // Try the registered client first
622
+ const client = this.clients[provider];
623
+ if (client?.getContextLimit) {
624
+ return client.getContextLimit(model);
625
+ }
626
+ // Fall back to the global ContextLimits table
627
+ const contextLimit = ContextLimits[model];
628
+ if (contextLimit === undefined) return undefined;
629
+ return { contextLimit, threshold: contextLimit };
630
+ }
520
631
  }
521
632
 
522
633
  export const Clients = new AIClient();
@@ -528,5 +639,6 @@ export * from "./openai";
528
639
  export * from "./anthropic";
529
640
  export * from "./knowhow";
530
641
  export * from "./gemini";
642
+ export * from "./contextLimits";
531
643
  export * from "./xai";
532
644
  export * from "./knowhowMcp";
@@ -1,6 +1,7 @@
1
1
  import OpenAI from "openai";
2
2
  import { getConfigSync } from "../config";
3
3
  import { OpenAiTextPricing } from "./pricing";
4
+ import { ContextLimits } from "./contextLimits";
4
5
  import {
5
6
  GenericClient,
6
7
  CompletionOptions,
@@ -429,4 +430,14 @@ export class GenericOpenAiClient implements GenericClient {
429
430
  const data = Buffer.from(await response.arrayBuffer());
430
431
  return { data, mimeType };
431
432
  }
433
+
434
+ getContextLimit(model: string): { contextLimit: number; threshold: number } | undefined {
435
+ const contextLimit = ContextLimits[model];
436
+ if (contextLimit === undefined) return undefined;
437
+ const pricing = OpenAiTextPricing[model];
438
+ // If the model has tiered pricing above 200k tokens, use 200k as the threshold
439
+ const threshold =
440
+ pricing && "input_gt_200k" in pricing ? 200_000 : contextLimit;
441
+ return { contextLimit, threshold };
442
+ }
432
443
  }
@@ -3,19 +3,15 @@ import { Models } from "../../types";
3
3
  export const AnthropicTextPricing = {
4
4
  [Models.anthropic.Opus4_6]: {
5
5
  input: 5.0,
6
- input_gt_200k: 10.0,
7
6
  cache_write: 6.25,
8
7
  cache_hit: 0.5,
9
8
  output: 25.0,
10
- output_gt_200k: 37.5,
11
9
  },
12
10
  [Models.anthropic.Sonnet4_6]: {
13
11
  input: 3.0,
14
- input_gt_200k: 6.0,
15
12
  cache_write: 3.75,
16
13
  cache_hit: 0.3,
17
14
  output: 15.0,
18
- output_gt_200k: 22.5,
19
15
  },
20
16
  [Models.anthropic.Opus4_5]: {
21
17
  input: 5.0,
@@ -1,7 +1,8 @@
1
1
  import { Models, EmbeddingModels } from "../../types";
2
2
 
3
3
  export const GeminiTextPricing = {
4
- [Models.google.Gemini_3_Preview]: {
4
+ // Gemini 3.x
5
+ [Models.google.Gemini_31_Pro_Preview]: {
5
6
  input: 2,
6
7
  input_gt_200k: 4,
7
8
  output: 12,
@@ -9,6 +10,45 @@ export const GeminiTextPricing = {
9
10
  context_caching: 0.2,
10
11
  context_caching_gt_200k: 0.4,
11
12
  },
13
+ [Models.google.Gemini_31_Flash_Image_Preview]: {
14
+ input: 0.5,
15
+ output: 3,
16
+ image_generation: 0.045, // per 0.5K image
17
+ },
18
+ [Models.google.Gemini_31_Flash_Lite_Preview]: {
19
+ input: 0.25,
20
+ output: 1.5,
21
+ context_caching: 0.025,
22
+ },
23
+ [Models.google.Gemini_3_Flash_Preview]: {
24
+ input: 0.5,
25
+ output: 3.0,
26
+ context_caching: 0.05,
27
+ },
28
+ [Models.google.Gemini_3_Pro_Image_Preview]: {
29
+ input: 2,
30
+ output: 12,
31
+ image_generation: 0.134, // per 1K/2K image
32
+ },
33
+ // Gemini 2.5
34
+ [Models.google.Gemini_25_Pro]: {
35
+ input: 1.25,
36
+ input_gt_200k: 2.5,
37
+ output: 10.0,
38
+ output_gt_200k: 15.0,
39
+ context_caching: 0.125,
40
+ context_caching_gt_200k: 0.25,
41
+ },
42
+ [Models.google.Gemini_25_Flash]: {
43
+ input: 0.3,
44
+ output: 2.5,
45
+ context_caching: 0.03,
46
+ },
47
+ [Models.google.Gemini_25_Flash_Lite]: {
48
+ input: 0.1,
49
+ output: 0.4,
50
+ context_caching: 0.01,
51
+ },
12
52
  [Models.google.Gemini_25_Flash_Preview]: {
13
53
  input: 0.3,
14
54
  output: 2.5,
@@ -23,6 +63,19 @@ export const GeminiTextPricing = {
23
63
  context_caching: 0.125,
24
64
  context_caching_gt_200k: 0.25,
25
65
  },
66
+ [Models.google.Gemini_25_Flash_Image]: {
67
+ input: 0.3,
68
+ output: 0.039, // per image ($30/1M tokens, 1290 tokens per image)
69
+ },
70
+ [Models.google.Gemini_25_Flash_TTS]: {
71
+ input: 0.5,
72
+ output: 10.0,
73
+ },
74
+ [Models.google.Gemini_25_Pro_TTS]: {
75
+ input: 1.0,
76
+ output: 20.0,
77
+ },
78
+ // Gemini 2.0 (deprecated)
26
79
  [Models.google.Gemini_20_Flash]: {
27
80
  input: 0.1,
28
81
  output: 0.4,
@@ -37,6 +90,7 @@ export const GeminiTextPricing = {
37
90
  input: 0.075,
38
91
  output: 0.3,
39
92
  },
93
+ // Gemini 1.5 (legacy)
40
94
  [Models.google.Gemini_15_Flash]: {
41
95
  input: 0.075,
42
96
  output: 0.3,
@@ -52,14 +106,39 @@ export const GeminiTextPricing = {
52
106
  output: 5.0,
53
107
  context_caching: 0.3125,
54
108
  },
109
+ // Image generation
55
110
  [Models.google.Imagen_3]: {
56
- image_generation: 0.03,
111
+ image_generation: 0.04, // Imagen 4 Standard: $0.04/image
112
+ },
113
+ [Models.google.Imagen_4_Fast]: {
114
+ image_generation: 0.02, // $0.02/image
57
115
  },
116
+ [Models.google.Imagen_4_Ultra]: {
117
+ image_generation: 0.06, // $0.06/image
118
+ },
119
+ // Video generation
58
120
  [Models.google.Veo_2]: {
59
121
  video_generation: 0.35,
60
122
  },
123
+ [Models.google.Veo_3]: {
124
+ video_generation: 0.4, // $0.40/second
125
+ },
126
+ [Models.google.Veo_3_Fast]: {
127
+ video_generation: 0.15, // $0.15/second
128
+ },
129
+ [Models.google.Veo_3_1]: {
130
+ video_generation: 0.4, // $0.40/second (720p/1080p)
131
+ },
132
+ [Models.google.Veo_3_1_Fast]: {
133
+ video_generation: 0.15, // $0.15/second
134
+ },
135
+ // Embeddings
61
136
  [EmbeddingModels.google.Gemini_Embedding]: {
62
137
  input: 0, // Free of charge
63
138
  output: 0, // Free of charge
64
139
  },
140
+ [EmbeddingModels.google.Gemini_Embedding_001]: {
141
+ input: 0.15,
142
+ output: 0,
143
+ },
65
144
  };
@@ -1,6 +1,36 @@
1
1
  import { Models, EmbeddingModels } from "../../types";
2
2
 
3
3
  export const OpenAiTextPricing = {
4
+ [Models.openai.GPT_54]: {
5
+ input: 2.5,
6
+ cached_input: 0.25,
7
+ output: 15.0,
8
+ },
9
+ [Models.openai.GPT_54_Mini]: {
10
+ input: 0.75,
11
+ cached_input: 0.075,
12
+ output: 4.5,
13
+ },
14
+ [Models.openai.GPT_54_Nano]: {
15
+ input: 0.2,
16
+ cached_input: 0.02,
17
+ output: 1.25,
18
+ },
19
+ [Models.openai.GPT_54_Pro]: {
20
+ input: 30.0,
21
+ cached_input: 0,
22
+ output: 180.0,
23
+ },
24
+ [Models.openai.GPT_53_Chat]: {
25
+ input: 1.75,
26
+ cached_input: 0.175,
27
+ output: 14.0,
28
+ },
29
+ [Models.openai.GPT_53_Codex]: {
30
+ input: 1.75,
31
+ cached_input: 0.175,
32
+ output: 14.0,
33
+ },
4
34
  [Models.openai.GPT_4o]: {
5
35
  input: 2.5,
6
36
  cached_input: 1.25,
@@ -76,6 +106,11 @@ export const OpenAiTextPricing = {
76
106
  cached_input: 0.5,
77
107
  output: 8.0,
78
108
  },
109
+ [Models.openai.o3_Pro]: {
110
+ input: 20.0,
111
+ cached_input: 0,
112
+ output: 80.0,
113
+ },
79
114
  [Models.openai.o4_Mini]: {
80
115
  input: 1.1,
81
116
  cached_input: 0.275,
@@ -131,4 +166,37 @@ export const OpenAiTextPricing = {
131
166
  cached_input: 0,
132
167
  output: 0,
133
168
  },
169
+ // New realtime models
170
+ [Models.openai.GPT_Realtime_15]: {
171
+ input: 4.0,
172
+ cached_input: 0.4,
173
+ output: 16.0,
174
+ },
175
+ [Models.openai.GPT_Realtime_Mini]: {
176
+ input: 0.6,
177
+ cached_input: 0.06,
178
+ output: 2.4,
179
+ },
180
+ // New image models (text token pricing)
181
+ [Models.openai.GPT_Image_15]: {
182
+ input: 5.0,
183
+ cached_input: 1.25,
184
+ output: 10.0,
185
+ },
186
+ [Models.openai.GPT_Image_1_Mini]: {
187
+ input: 2.0,
188
+ cached_input: 0.2,
189
+ output: 0,
190
+ },
191
+ // New transcription models
192
+ [Models.openai.GPT_4o_Transcribe]: {
193
+ input: 2.5,
194
+ cached_input: 0,
195
+ output: 10.0,
196
+ },
197
+ [Models.openai.GPT_4o_Mini_Transcribe]: {
198
+ input: 1.25,
199
+ cached_input: 0,
200
+ output: 5.0,
201
+ },
134
202
  };
@@ -258,4 +258,12 @@ export interface GenericClient {
258
258
  /** Download a file from the provider's file storage */
259
259
  downloadFile?(options: FileDownloadOptions): Promise<FileDownloadResponse>;
260
260
  getModels(): Promise<{ id: string }[]>;
261
+ /**
262
+ * Returns the context window limit and compression threshold for a given model,
263
+ * or undefined if the model is not known to this client.
264
+ * - contextLimit: the maximum number of tokens the model can handle
265
+ * - threshold: the point at which compression should kick in; equals contextLimit
266
+ * unless the model has tiered pricing (input_gt_200k), in which case it is 200_000
267
+ */
268
+ getContextLimit?(model: string): { contextLimit: number; threshold: number } | undefined;
261
269
  }
@@ -1,5 +1,6 @@
1
1
  import OpenAI from "openai";
2
2
  import { XaiTextPricing, XaiImagePricing, XaiVideoPricing } from "./pricing";
3
+ import { ContextLimits } from "./contextLimits";
3
4
  import {
4
5
  GenericClient,
5
6
  CompletionOptions,
@@ -392,4 +393,14 @@ export class GenericXAIClient implements GenericClient {
392
393
  mimeType,
393
394
  };
394
395
  }
396
+
397
+ getContextLimit(model: string): { contextLimit: number; threshold: number } | undefined {
398
+ const contextLimit = ContextLimits[model];
399
+ if (contextLimit === undefined) return undefined;
400
+ const pricing = XaiTextPricing[model];
401
+ // If the model has tiered pricing above 200k tokens, use 200k as the threshold
402
+ const threshold =
403
+ pricing && "input_gt_200k" in pricing ? 200_000 : contextLimit;
404
+ return { contextLimit, threshold };
405
+ }
395
406
  }