npm - @tyvm/knowhow - Versions diffs - 0.0.84 → 0.0.86 - Mend

@tyvm/knowhow 0.0.84 → 0.0.86

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

package/package.json +1 -1
package/src/agents/base/base.ts +99 -37
package/src/agents/researcher/researcher.ts +1 -2
package/src/agents/tools/aiClient.ts +48 -0
package/src/agents/tools/list.ts +57 -0
package/src/chat/CliChatService.ts +3 -1
package/src/chat/modules/AgentModule.ts +20 -31
package/src/chat/types.ts +1 -0
package/src/cli.ts +19 -10
package/src/clients/anthropic.ts +11 -0
package/src/clients/contextLimits.ts +106 -0
package/src/clients/gemini.ts +11 -0
package/src/clients/index.ts +112 -0
package/src/clients/openai.ts +11 -0
package/src/clients/pricing/anthropic.ts +0 -4
package/src/clients/pricing/google.ts +81 -2
package/src/clients/pricing/openai.ts +68 -0
package/src/clients/types.ts +8 -0
package/src/clients/xai.ts +11 -0
package/src/types.ts +79 -7
package/tests/clients/pricing.test.ts +144 -0
package/ts_build/package.json +1 -1
package/ts_build/src/agents/base/base.d.ts +4 -0
package/ts_build/src/agents/base/base.js +53 -28
package/ts_build/src/agents/base/base.js.map +1 -1
package/ts_build/src/agents/researcher/researcher.js +1 -1
package/ts_build/src/agents/researcher/researcher.js.map +1 -1
package/ts_build/src/agents/tools/aiClient.d.ts +3 -0
package/ts_build/src/agents/tools/aiClient.js +31 -1
package/ts_build/src/agents/tools/aiClient.js.map +1 -1
package/ts_build/src/agents/tools/list.js +48 -0
package/ts_build/src/agents/tools/list.js.map +1 -1
package/ts_build/src/chat/CliChatService.js.map +1 -1
package/ts_build/src/chat/modules/AgentModule.d.ts +1 -4
package/ts_build/src/chat/modules/AgentModule.js +12 -15
package/ts_build/src/chat/modules/AgentModule.js.map +1 -1
package/ts_build/src/chat/types.d.ts +1 -0
package/ts_build/src/cli.js +3 -2
package/ts_build/src/cli.js.map +1 -1
package/ts_build/src/clients/anthropic.d.ts +8 -4
package/ts_build/src/clients/anthropic.js +9 -0
package/ts_build/src/clients/anthropic.js.map +1 -1
package/ts_build/src/clients/contextLimits.d.ts +3 -0
package/ts_build/src/clients/contextLimits.js +88 -0
package/ts_build/src/clients/contextLimits.js.map +1 -0
package/ts_build/src/clients/gemini.d.ts +14 -10
package/ts_build/src/clients/gemini.js +9 -0
package/ts_build/src/clients/gemini.js.map +1 -1
package/ts_build/src/clients/index.d.ts +15 -0
package/ts_build/src/clients/index.js +70 -0
package/ts_build/src/clients/index.js.map +1 -1
package/ts_build/src/clients/openai.d.ts +4 -0
package/ts_build/src/clients/openai.js +9 -0
package/ts_build/src/clients/openai.js.map +1 -1
package/ts_build/src/clients/pricing/anthropic.d.ts +4 -4
package/ts_build/src/clients/pricing/anthropic.js +0 -4
package/ts_build/src/clients/pricing/anthropic.js.map +1 -1
package/ts_build/src/clients/pricing/google.d.ts +10 -10
package/ts_build/src/clients/pricing/google.js +74 -2
package/ts_build/src/clients/pricing/google.js.map +1 -1
package/ts_build/src/clients/pricing/openai.js +65 -0
package/ts_build/src/clients/pricing/openai.js.map +1 -1
package/ts_build/src/clients/types.d.ts +4 -0
package/ts_build/src/clients/xai.d.ts +4 -0
package/ts_build/src/clients/xai.js +9 -0
package/ts_build/src/clients/xai.js.map +1 -1
package/ts_build/src/types.d.ts +33 -4
package/ts_build/src/types.js +73 -5
package/ts_build/src/types.js.map +1 -1
package/ts_build/tests/clients/pricing.test.d.ts +1 -0
package/ts_build/tests/clients/pricing.test.js +90 -0
package/ts_build/tests/clients/pricing.test.js.map +1 -0

package/src/clients/contextLimits.ts ADDED Viewed

@@ -0,0 +1,106 @@
+import { Models, EmbeddingModels } from "../types";
+/**
+ * Context window limits (in tokens) for all supported models.
+ * Sources:
+ * - OpenAI: https://platform.openai.com/docs/models
+ * - Anthropic: https://docs.anthropic.com/en/docs/about-claude/models
+ * - Google: https://ai.google.dev/gemini-api/docs/models
+ * - xAI: https://docs.x.ai/developers/models
+ */
+export const ContextLimits: Record<string, number> = {
+  // ─── OpenAI ───────────────────────────────────────────────────────────────
+  [Models.openai.GPT_54]: 1_000_000,
+  [Models.openai.GPT_54_Mini]: 400_000,
+  [Models.openai.GPT_54_Nano]: 400_000,
+  [Models.openai.GPT_54_Pro]: 1_000_000,
+  [Models.openai.GPT_53_Chat]: 1_000_000,
+  [Models.openai.GPT_53_Codex]: 1_000_000,
+  [Models.openai.GPT_5]: 1_000_000,
+  [Models.openai.GPT_5_Mini]: 1_000_000,
+  [Models.openai.GPT_5_Nano]: 1_000_000,
+  [Models.openai.GPT_5_1]: 1_000_000,
+  [Models.openai.GPT_5_2]: 1_000_000,
+  [Models.openai.GPT_41]: 1_047_576,
+  [Models.openai.GPT_41_Mini]: 1_047_576,
+  [Models.openai.GPT_41_Nano]: 1_047_576,
+  [Models.openai.GPT_45]: 128_000,
+  [Models.openai.GPT_4o]: 128_000,
+  [Models.openai.GPT_4o_Mini]: 128_000,
+  [Models.openai.GPT_4o_Audio]: 128_000,
+  [Models.openai.GPT_4o_Realtime]: 128_000,
+  [Models.openai.GPT_4o_Mini_Audio]: 128_000,
+  [Models.openai.GPT_4o_Mini_Realtime]: 128_000,
+  [Models.openai.GPT_4o_Mini_Search]: 128_000,
+  [Models.openai.GPT_4o_Search]: 128_000,
+  [Models.openai.o1]: 200_000,
+  [Models.openai.o1_Mini]: 128_000,
+  [Models.openai.o1_Pro]: 200_000,
+  [Models.openai.o3]: 200_000,
+  [Models.openai.o3_Pro]: 200_000,
+  [Models.openai.o3_Mini]: 200_000,
+  [Models.openai.o4_Mini]: 200_000,
+  // ─── Anthropic ────────────────────────────────────────────────────────────
+  [Models.anthropic.Opus4_6]: 1_000_000,
+  [Models.anthropic.Sonnet4_6]: 1_000_000,
+  [Models.anthropic.Opus4_5]: 1_000_000,
+  [Models.anthropic.Opus4]: 200_000,
+  [Models.anthropic.Opus4_1]: 200_000,
+  [Models.anthropic.Sonnet4]: 200_000,
+  [Models.anthropic.Sonnet4_5]: 200_000,
+  [Models.anthropic.Haiku4_5]: 200_000,
+  [Models.anthropic.Sonnet3_7]: 200_000,
+  [Models.anthropic.Sonnet3_5]: 200_000,
+  [Models.anthropic.Haiku3_5]: 200_000,
+  [Models.anthropic.Opus3]: 200_000,
+  [Models.anthropic.Haiku3]: 200_000,
+  // ─── Google ───────────────────────────────────────────────────────────────
+  [Models.google.Gemini_31_Pro_Preview]: 1_000_000,
+  [Models.google.Gemini_31_Flash_Image_Preview]: 1_000_000,
+  [Models.google.Gemini_31_Flash_Lite_Preview]: 1_000_000,
+  [Models.google.Gemini_3_Flash_Preview]: 1_000_000,
+  [Models.google.Gemini_3_Pro_Image_Preview]: 1_000_000,
+  [Models.google.Gemini_25_Pro]: 1_000_000,
+  [Models.google.Gemini_25_Flash]: 1_000_000,
+  [Models.google.Gemini_25_Flash_Lite]: 1_000_000,
+  [Models.google.Gemini_25_Flash_Preview]: 1_000_000,
+  [Models.google.Gemini_25_Pro_Preview]: 1_000_000,
+  [Models.google.Gemini_25_Flash_Image]: 1_000_000,
+  [Models.google.Gemini_25_Flash_Live]: 1_000_000,
+  [Models.google.Gemini_25_Flash_Native_Audio]: 1_000_000,
+  [Models.google.Gemini_25_Flash_TTS]: 1_000_000,
+  [Models.google.Gemini_25_Pro_TTS]: 1_000_000,
+  [Models.google.Gemini_20_Flash]: 1_000_000,
+  [Models.google.Gemini_20_Flash_Preview_Image_Generation]: 1_000_000,
+  [Models.google.Gemini_20_Flash_Lite]: 1_000_000,
+  [Models.google.Gemini_20_Flash_Live]: 1_000_000,
+  [Models.google.Gemini_20_Flash_TTS]: 1_000_000,
+  [Models.google.Gemini_15_Flash]: 1_000_000,
+  [Models.google.Gemini_15_Flash_8B]: 1_000_000,
+  [Models.google.Gemini_15_Pro]: 2_000_000,
+  // ─── xAI ──────────────────────────────────────────────────────────────────
+  [Models.xai.Grok4_1_Fast_Reasoning]: 2_000_000,
+  [Models.xai.Grok4_1_Fast_NonReasoning]: 2_000_000,
+  [Models.xai.GrokCodeFast]: 2_000_000,
+  [Models.xai.Grok4]: 131_072,
+  [Models.xai.Grok3Beta]: 131_072,
+  [Models.xai.Grok3MiniBeta]: 131_072,
+  [Models.xai.Grok3FastBeta]: 131_072,
+  [Models.xai.Grok3MiniFastBeta]: 131_072,
+  [Models.xai.Grok21212]: 131_072,
+  [Models.xai.Grok2Vision1212]: 131_072,
+};
+/** Default fallback context window limit (tokens) used when a model is not found. */
+export const DEFAULT_CONTEXT_LIMIT = 30_000;
+/**
+ * Returns the context window limit (in tokens) for a given model.
+ * Falls back to DEFAULT_CONTEXT_LIMIT if the model is not recognized.
+ */
+export function getModelContextLimit(model: string): number {
+  return ContextLimits[model] ?? DEFAULT_CONTEXT_LIMIT;
+}

package/src/clients/gemini.ts CHANGED Viewed

@@ -15,6 +15,7 @@ import * as pathSync from "path";
 import { wait } from "../utils";
 import { EmbeddingModels, Models } from "../types";
 import { GeminiTextPricing } from "./pricing";
+import { ContextLimits } from "./contextLimits";
 import {
   GenericClient,
@@ -916,4 +917,14 @@ export class GenericGeminiClient implements GenericClient {
       throw error;
     }
   }
+  getContextLimit(model: string): { contextLimit: number; threshold: number } | undefined {
+    const contextLimit = ContextLimits[model];
+    if (contextLimit === undefined) return undefined;
+    const pricing = GeminiTextPricing[model];
+    // If the model has tiered pricing above 200k tokens, use 200k as the threshold
+    const threshold =
+      pricing && "input_gt_200k" in pricing ? 200_000 : contextLimit;
+    return { contextLimit, threshold };
+  }
 }

package/src/clients/index.ts CHANGED Viewed

@@ -25,9 +25,23 @@ import { GenericGeminiClient } from "./gemini";
 import { HttpClient } from "./http";
 import { EmbeddingModels, Models } from "../types";
 import { getConfig } from "../config";
+import {
+  GoogleImageModels,
+  GoogleVideoModels,
+  GoogleTTSModels,
+  OpenAiImageModels,
+  OpenAiVideoModels,
+  OpenAiTTSModels,
+  OpenAiTranscriptionModels,
+  XaiImageModels,
+  XaiVideoModels,
+} from "../types";
 import { GenericXAIClient } from "./xai";
 import { KnowhowGenericClient } from "./knowhow";
 import { loadKnowhowJwt } from "../services/KnowhowClient";
+import { ContextLimits } from "./contextLimits";
+export type ModelModality = "completion" | "embedding" | "image" | "audio" | "video";
 function envCheck(key: string): boolean {
   const value = process.env[key];
@@ -85,6 +99,39 @@ export class AIClient {
     ...(envCheck("XAI_API_KEY") && { xai: this.completionModels.xai }),
   };
+  imageModels: Record<string, string[]> = {
+    ...(envCheck("OPENAI_KEY") && {
+      openai: OpenAiImageModels,
+    }),
+    ...(envCheck("GEMINI_API_KEY") && {
+      google: GoogleImageModels,
+    }),
+    ...(envCheck("XAI_API_KEY") && {
+      xai: XaiImageModels,
+    }),
+  };
+  audioModels: Record<string, string[]> = {
+    ...(envCheck("OPENAI_KEY") && {
+      openai: [...OpenAiTTSModels, ...OpenAiTranscriptionModels],
+    }),
+    ...(envCheck("GEMINI_API_KEY") && {
+      google: GoogleTTSModels,
+    }),
+  };
+  videoModels: Record<string, string[]> = {
+    ...(envCheck("OPENAI_KEY") && {
+      openai: OpenAiVideoModels,
+    }),
+    ...(envCheck("GEMINI_API_KEY") && {
+      google: GoogleVideoModels,
+    }),
+    ...(envCheck("XAI_API_KEY") && {
+      xai: XaiVideoModels,
+    }),
+  };
   getClient(provider: string, model?: string) {
     if (provider && !model) {
       return { client: this.clients[provider], provider, model: undefined };
@@ -215,6 +262,39 @@ export class AIClient {
     );
   }
+  registerImageModels(provider: string, models: string[]) {
+    const currentModels = this.clientModels[provider] || [];
+    const currentImageModels = this.imageModels[provider] || [];
+    this.clientModels[provider] = Array.from<string>(
+      new Set(currentModels.concat(models))
+    );
+    this.imageModels[provider] = Array.from<string>(
+      new Set(currentImageModels.concat(models))
+    );
+  }
+  registerAudioModels(provider: string, models: string[]) {
+    const currentModels = this.clientModels[provider] || [];
+    const currentAudioModels = this.audioModels[provider] || [];
+    this.clientModels[provider] = Array.from<string>(
+      new Set(currentModels.concat(models))
+    );
+    this.audioModels[provider] = Array.from<string>(
+      new Set(currentAudioModels.concat(models))
+    );
+  }
+  registerVideoModels(provider: string, models: string[]) {
+    const currentModels = this.clientModels[provider] || [];
+    const currentVideoModels = this.videoModels[provider] || [];
+    this.clientModels[provider] = Array.from<string>(
+      new Set(currentModels.concat(models))
+    );
+    this.videoModels[provider] = Array.from<string>(
+      new Set(currentVideoModels.concat(models))
+    );
+  }
   providerHasModel(provider: string, model: string): boolean {
     const models = this.clientModels[provider];
     if (!models) return false;
@@ -517,6 +597,37 @@ export class AIClient {
   listAllProviders() {
     return Object.keys(this.clientModels);
   }
+  listAllImageModels() {
+    return this.imageModels;
+  }
+  listAllAudioModels() {
+    return this.audioModels;
+  }
+  listAllVideoModels() {
+    return this.videoModels;
+  }
+  /**
+   * Returns the context window limit (in tokens) for a given model.
+   * Optionally accepts a provider for future provider-specific overrides.
+   * Delegates to the registered client's getContextLimit() if available,
+   * so custom clients can provide their own context limits.
+   * Returns undefined if neither the client nor the global ContextLimits table knows the model.
+   */
+  getContextLimit(provider: string, model: string): { contextLimit: number; threshold: number } | undefined {
+    // Try the registered client first
+    const client = this.clients[provider];
+    if (client?.getContextLimit) {
+      return client.getContextLimit(model);
+    }
+    // Fall back to the global ContextLimits table
+    const contextLimit = ContextLimits[model];
+    if (contextLimit === undefined) return undefined;
+    return { contextLimit, threshold: contextLimit };
+  }
 }
 export const Clients = new AIClient();
@@ -528,5 +639,6 @@ export * from "./openai";
 export * from "./anthropic";
 export * from "./knowhow";
 export * from "./gemini";
+export * from "./contextLimits";
 export * from "./xai";
 export * from "./knowhowMcp";

package/src/clients/openai.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import OpenAI from "openai";
 import { getConfigSync } from "../config";
 import { OpenAiTextPricing } from "./pricing";
+import { ContextLimits } from "./contextLimits";
 import {
   GenericClient,
   CompletionOptions,
@@ -429,4 +430,14 @@ export class GenericOpenAiClient implements GenericClient {
     const data = Buffer.from(await response.arrayBuffer());
     return { data, mimeType };
   }
+  getContextLimit(model: string): { contextLimit: number; threshold: number } | undefined {
+    const contextLimit = ContextLimits[model];
+    if (contextLimit === undefined) return undefined;
+    const pricing = OpenAiTextPricing[model];
+    // If the model has tiered pricing above 200k tokens, use 200k as the threshold
+    const threshold =
+      pricing && "input_gt_200k" in pricing ? 200_000 : contextLimit;
+    return { contextLimit, threshold };
+  }
 }

package/src/clients/pricing/anthropic.ts CHANGED Viewed

@@ -3,19 +3,15 @@ import { Models } from "../../types";
 export const AnthropicTextPricing = {
   [Models.anthropic.Opus4_6]: {
     input: 5.0,
-    input_gt_200k: 10.0,
     cache_write: 6.25,
     cache_hit: 0.5,
     output: 25.0,
-    output_gt_200k: 37.5,
   },
   [Models.anthropic.Sonnet4_6]: {
     input: 3.0,
-    input_gt_200k: 6.0,
     cache_write: 3.75,
     cache_hit: 0.3,
     output: 15.0,
-    output_gt_200k: 22.5,
   },
   [Models.anthropic.Opus4_5]: {
     input: 5.0,

package/src/clients/pricing/google.ts CHANGED Viewed

@@ -1,7 +1,8 @@
 import { Models, EmbeddingModels } from "../../types";
 export const GeminiTextPricing = {
-  [Models.google.Gemini_3_Preview]: {
+  // Gemini 3.x
+  [Models.google.Gemini_31_Pro_Preview]: {
     input: 2,
     input_gt_200k: 4,
     output: 12,
@@ -9,6 +10,45 @@ export const GeminiTextPricing = {
     context_caching: 0.2,
     context_caching_gt_200k: 0.4,
   },
+  [Models.google.Gemini_31_Flash_Image_Preview]: {
+    input: 0.5,
+    output: 3,
+    image_generation: 0.045, // per 0.5K image
+  },
+  [Models.google.Gemini_31_Flash_Lite_Preview]: {
+    input: 0.25,
+    output: 1.5,
+    context_caching: 0.025,
+  },
+  [Models.google.Gemini_3_Flash_Preview]: {
+    input: 0.5,
+    output: 3.0,
+    context_caching: 0.05,
+  },
+  [Models.google.Gemini_3_Pro_Image_Preview]: {
+    input: 2,
+    output: 12,
+    image_generation: 0.134, // per 1K/2K image
+  },
+  // Gemini 2.5
+  [Models.google.Gemini_25_Pro]: {
+    input: 1.25,
+    input_gt_200k: 2.5,
+    output: 10.0,
+    output_gt_200k: 15.0,
+    context_caching: 0.125,
+    context_caching_gt_200k: 0.25,
+  },
+  [Models.google.Gemini_25_Flash]: {
+    input: 0.3,
+    output: 2.5,
+    context_caching: 0.03,
+  },
+  [Models.google.Gemini_25_Flash_Lite]: {
+    input: 0.1,
+    output: 0.4,
+    context_caching: 0.01,
+  },
   [Models.google.Gemini_25_Flash_Preview]: {
     input: 0.3,
     output: 2.5,
@@ -23,6 +63,19 @@ export const GeminiTextPricing = {
     context_caching: 0.125,
     context_caching_gt_200k: 0.25,
   },
+  [Models.google.Gemini_25_Flash_Image]: {
+    input: 0.3,
+    output: 0.039, // per image ($30/1M tokens, 1290 tokens per image)
+  },
+  [Models.google.Gemini_25_Flash_TTS]: {
+    input: 0.5,
+    output: 10.0,
+  },
+  [Models.google.Gemini_25_Pro_TTS]: {
+    input: 1.0,
+    output: 20.0,
+  },
+  // Gemini 2.0 (deprecated)
   [Models.google.Gemini_20_Flash]: {
     input: 0.1,
     output: 0.4,
@@ -37,6 +90,7 @@ export const GeminiTextPricing = {
     input: 0.075,
     output: 0.3,
   },
+  // Gemini 1.5 (legacy)
   [Models.google.Gemini_15_Flash]: {
     input: 0.075,
     output: 0.3,
@@ -52,14 +106,39 @@ export const GeminiTextPricing = {
     output: 5.0,
     context_caching: 0.3125,
   },
+  // Image generation
   [Models.google.Imagen_3]: {
-    image_generation: 0.03,
+    image_generation: 0.04, // Imagen 4 Standard: $0.04/image
+  },
+  [Models.google.Imagen_4_Fast]: {
+    image_generation: 0.02, // $0.02/image
   },
+  [Models.google.Imagen_4_Ultra]: {
+    image_generation: 0.06, // $0.06/image
+  },
+  // Video generation
   [Models.google.Veo_2]: {
     video_generation: 0.35,
   },
+  [Models.google.Veo_3]: {
+    video_generation: 0.4, // $0.40/second
+  },
+  [Models.google.Veo_3_Fast]: {
+    video_generation: 0.15, // $0.15/second
+  },
+  [Models.google.Veo_3_1]: {
+    video_generation: 0.4, // $0.40/second (720p/1080p)
+  },
+  [Models.google.Veo_3_1_Fast]: {
+    video_generation: 0.15, // $0.15/second
+  },
+  // Embeddings
   [EmbeddingModels.google.Gemini_Embedding]: {
     input: 0, // Free of charge
     output: 0, // Free of charge
   },
+  [EmbeddingModels.google.Gemini_Embedding_001]: {
+    input: 0.15,
+    output: 0,
+  },
 };

package/src/clients/pricing/openai.ts CHANGED Viewed

@@ -1,6 +1,36 @@
 import { Models, EmbeddingModels } from "../../types";
 export const OpenAiTextPricing = {
+  [Models.openai.GPT_54]: {
+    input: 2.5,
+    cached_input: 0.25,
+    output: 15.0,
+  },
+  [Models.openai.GPT_54_Mini]: {
+    input: 0.75,
+    cached_input: 0.075,
+    output: 4.5,
+  },
+  [Models.openai.GPT_54_Nano]: {
+    input: 0.2,
+    cached_input: 0.02,
+    output: 1.25,
+  },
+  [Models.openai.GPT_54_Pro]: {
+    input: 30.0,
+    cached_input: 0,
+    output: 180.0,
+  },
+  [Models.openai.GPT_53_Chat]: {
+    input: 1.75,
+    cached_input: 0.175,
+    output: 14.0,
+  },
+  [Models.openai.GPT_53_Codex]: {
+    input: 1.75,
+    cached_input: 0.175,
+    output: 14.0,
+  },
   [Models.openai.GPT_4o]: {
     input: 2.5,
     cached_input: 1.25,
@@ -76,6 +106,11 @@ export const OpenAiTextPricing = {
     cached_input: 0.5,
     output: 8.0,
   },
+  [Models.openai.o3_Pro]: {
+    input: 20.0,
+    cached_input: 0,
+    output: 80.0,
+  },
   [Models.openai.o4_Mini]: {
     input: 1.1,
     cached_input: 0.275,
@@ -131,4 +166,37 @@ export const OpenAiTextPricing = {
     cached_input: 0,
     output: 0,
   },
+  // New realtime models
+  [Models.openai.GPT_Realtime_15]: {
+    input: 4.0,
+    cached_input: 0.4,
+    output: 16.0,
+  },
+  [Models.openai.GPT_Realtime_Mini]: {
+    input: 0.6,
+    cached_input: 0.06,
+    output: 2.4,
+  },
+  // New image models (text token pricing)
+  [Models.openai.GPT_Image_15]: {
+    input: 5.0,
+    cached_input: 1.25,
+    output: 10.0,
+  },
+  [Models.openai.GPT_Image_1_Mini]: {
+    input: 2.0,
+    cached_input: 0.2,
+    output: 0,
+  },
+  // New transcription models
+  [Models.openai.GPT_4o_Transcribe]: {
+    input: 2.5,
+    cached_input: 0,
+    output: 10.0,
+  },
+  [Models.openai.GPT_4o_Mini_Transcribe]: {
+    input: 1.25,
+    cached_input: 0,
+    output: 5.0,
+  },
 };

package/src/clients/types.ts CHANGED Viewed

@@ -258,4 +258,12 @@ export interface GenericClient {
   /** Download a file from the provider's file storage */
   downloadFile?(options: FileDownloadOptions): Promise<FileDownloadResponse>;
   getModels(): Promise<{ id: string }[]>;
+  /**
+   * Returns the context window limit and compression threshold for a given model,
+   * or undefined if the model is not known to this client.
+   * - contextLimit: the maximum number of tokens the model can handle
+   * - threshold: the point at which compression should kick in; equals contextLimit
+   *   unless the model has tiered pricing (input_gt_200k), in which case it is 200_000
+   */
+  getContextLimit?(model: string): { contextLimit: number; threshold: number } | undefined;
 }

package/src/clients/xai.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import OpenAI from "openai";
 import { XaiTextPricing, XaiImagePricing, XaiVideoPricing } from "./pricing";
+import { ContextLimits } from "./contextLimits";
 import {
   GenericClient,
   CompletionOptions,
@@ -392,4 +393,14 @@ export class GenericXAIClient implements GenericClient {
       mimeType,
     };
   }
+  getContextLimit(model: string): { contextLimit: number; threshold: number } | undefined {
+    const contextLimit = ContextLimits[model];
+    if (contextLimit === undefined) return undefined;
+    const pricing = XaiTextPricing[model];
+    // If the model has tiered pricing above 200k tokens, use 200k as the threshold
+    const threshold =
+      pricing && "input_gt_200k" in pricing ? 200_000 : contextLimit;
+    return { contextLimit, threshold };
+  }
 }