npm - @tyvm/knowhow - Versions diffs - 0.0.85 → 0.0.86 - Mend

@tyvm/knowhow 0.0.85 → 0.0.86

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/package.json +1 -1
package/src/agents/base/base.ts +99 -37
package/src/chat/CliChatService.ts +3 -1
package/src/chat/modules/AgentModule.ts +20 -31
package/src/chat/types.ts +1 -0
package/src/cli.ts +19 -10
package/src/clients/anthropic.ts +11 -0
package/src/clients/contextLimits.ts +106 -0
package/src/clients/gemini.ts +11 -0
package/src/clients/index.ts +21 -0
package/src/clients/openai.ts +11 -0
package/src/clients/pricing/anthropic.ts +0 -4
package/src/clients/types.ts +8 -0
package/src/clients/xai.ts +11 -0
package/src/types.ts +1 -2
package/ts_build/package.json +1 -1
package/ts_build/src/agents/base/base.d.ts +4 -0
package/ts_build/src/agents/base/base.js +53 -28
package/ts_build/src/agents/base/base.js.map +1 -1
package/ts_build/src/chat/CliChatService.js.map +1 -1
package/ts_build/src/chat/modules/AgentModule.d.ts +1 -4
package/ts_build/src/chat/modules/AgentModule.js +12 -15
package/ts_build/src/chat/modules/AgentModule.js.map +1 -1
package/ts_build/src/chat/types.d.ts +1 -0
package/ts_build/src/cli.js +3 -2
package/ts_build/src/cli.js.map +1 -1
package/ts_build/src/clients/anthropic.d.ts +8 -4
package/ts_build/src/clients/anthropic.js +9 -0
package/ts_build/src/clients/anthropic.js.map +1 -1
package/ts_build/src/clients/contextLimits.d.ts +3 -0
package/ts_build/src/clients/contextLimits.js +88 -0
package/ts_build/src/clients/contextLimits.js.map +1 -0
package/ts_build/src/clients/gemini.d.ts +4 -0
package/ts_build/src/clients/gemini.js +9 -0
package/ts_build/src/clients/gemini.js.map +1 -1
package/ts_build/src/clients/index.d.ts +5 -0
package/ts_build/src/clients/index.js +12 -0
package/ts_build/src/clients/index.js.map +1 -1
package/ts_build/src/clients/openai.d.ts +4 -0
package/ts_build/src/clients/openai.js +9 -0
package/ts_build/src/clients/openai.js.map +1 -1
package/ts_build/src/clients/pricing/anthropic.d.ts +4 -4
package/ts_build/src/clients/pricing/anthropic.js +0 -4
package/ts_build/src/clients/pricing/anthropic.js.map +1 -1
package/ts_build/src/clients/types.d.ts +4 -0
package/ts_build/src/clients/xai.d.ts +4 -0
package/ts_build/src/clients/xai.js +9 -0
package/ts_build/src/clients/xai.js.map +1 -1
package/ts_build/src/types.d.ts +1 -2
package/ts_build/src/types.js.map +1 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tyvm/knowhow",
-  "version": "0.0.85",
+  "version": "0.0.86",
   "description": "ai cli with plugins and agents",
   "main": "ts_build/src/index.js",
   "bin": {

package/src/agents/base/base.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { EventEmitter } from "events";  // kept for reference; agentEvents now uses EventService
+import { EventEmitter } from "events"; // kept for reference; agentEvents now uses EventService
 import {
   GenericClient,
   Message,
@@ -18,6 +18,7 @@ import { EventService } from "../../services/EventService";
 import { AIClient, Clients } from "../../clients";
 import { Models } from "../../ai";
 import { MessageProcessor } from "../../services/MessageProcessor";
+import { DEFAULT_CONTEXT_LIMIT } from "../../clients/contextLimits";
 import { Marked } from "../../utils";
 export { Message, Tool, ToolCall };
@@ -58,6 +59,10 @@ export abstract class BaseAgent implements IAgent {
   protected turnCount = 0;
   protected totalCostUsd = 0;
   protected currentThread = 0;
+  protected compressThreshold = 30000;
+  protected compressMinMessages = 30;
   protected threads = [] as Message[][];
   protected pendingUserMessages = [] as Message[];
   protected taskBreakdown = "";
@@ -109,21 +114,30 @@ export abstract class BaseAgent implements IAgent {
     // Subscribe to "agent:msg" events for dynamic context loading
     // Use setListener with a key so re-creating the agent doesn't double-subscribe
-    this.events.setListener({ key: `agent:msg:${this.constructor.name}`, event: this.eventTypes.agentMsg }, (eventData: any) => {
-      if (
-        this.status === this.eventTypes.inProgress ||
-        this.status === this.eventTypes.pause
-      ) {
-        const message = {
-          role: "user",
-          content: JSON.stringify(eventData),
-        } as Message;
-        this.addPendingMessage(message);
+    this.events.setListener(
+      {
+        key: `agent:msg:${this.constructor.name}`,
+        event: this.eventTypes.agentMsg,
+      },
+      (eventData: any) => {
+        if (
+          this.status === this.eventTypes.inProgress ||
+          this.status === this.eventTypes.pause
+        ) {
+          const message = {
+            role: "user",
+            content: JSON.stringify(eventData),
+          } as Message;
+          this.addPendingMessage(message);
+        }
       }
-    });
+    );
   }
-  protected log(message: string, level: "info" | "warn" | "error" = "info"): void {
+  protected log(
+    message: string,
+    level: "info" | "warn" | "error" = "info"
+  ): void {
     this.agentEvents.emit(this.eventTypes.agentLog, {
       agentName: this.name,
       message,
@@ -133,6 +147,29 @@ export abstract class BaseAgent implements IAgent {
     });
   }
+  setCompressThreshold(threshold: number) {
+    this.compressThreshold = threshold;
+  }
+  /**
+   * Returns the effective compress threshold for the current model.
+   * If the user has manually set a custom threshold (different from the default 30k),
+   * that value is used as-is. Otherwise, the threshold is dynamically computed as
+   * 85% of the model's context window limit, falling back to DEFAULT_CONTEXT_LIMIT.
+   */
+  getCompressThreshold(): number {
+    if (this.compressThreshold !== DEFAULT_CONTEXT_LIMIT) {
+      return this.compressThreshold;
+    }
+    const result = this.clientService.getContextLimit(
+      this.getProvider() as string,
+      this.getModel()
+    );
+    const contextLimit = result?.contextLimit ?? DEFAULT_CONTEXT_LIMIT;
+    const threshold = result?.threshold ?? contextLimit;
+    return Math.floor(threshold * 0.85);
+  }
   setMaxTurns(maxTurns: number | null) {
     this.maxTurns = maxTurns;
   }
@@ -259,7 +296,10 @@ export abstract class BaseAgent implements IAgent {
   private checkLimits(): boolean {
     // Check turn limit
     if (this.maxTurns !== null && this.turnCount >= this.maxTurns) {
-      this.log(`Turn limit reached: ${this.turnCount}/${this.maxTurns}`, "warn");
+      this.log(
+        `Turn limit reached: ${this.turnCount}/${this.maxTurns}`,
+        "warn"
+      );
       return true;
     }
@@ -268,8 +308,9 @@ export abstract class BaseAgent implements IAgent {
       this.log(
         `Spend limit reached: $${this.totalCostUsd.toFixed(
           4
-        )}/$${this.maxSpend.toFixed(4)}`
-      , "warn");
+        )}/$${this.maxSpend.toFixed(4)}`,
+        "warn"
+      );
       return true;
     }
@@ -333,7 +374,9 @@ export abstract class BaseAgent implements IAgent {
     return this.summaries;
   }
-  abstract getInitialMessages(userInput: string | MessageContent[]): Promise<Message[]>;
+  abstract getInitialMessages(
+    userInput: string | MessageContent[]
+  ): Promise<Message[]>;
   async processToolMessages(toolCall: ToolCall) {
     this.agentEvents.emit(this.eventTypes.toolCall, { toolCall });
@@ -431,7 +474,11 @@ export abstract class BaseAgent implements IAgent {
     }
     this.log(
-      `Required tool: [${this.requiredToolNames}] not available, checking for finalAnswer. Enabled: ${this.getEnabledToolNames().join(", ")}`
+      `Required tool: [${
+        this.requiredToolNames
+      }] not available, checking for finalAnswer. Enabled: ${this.getEnabledToolNames().join(
+        ", "
+      )}`
     );
     // Otherwise we're missing the required tool, lets use finalAnswer if we have it
@@ -442,8 +489,9 @@ export abstract class BaseAgent implements IAgent {
     // We have the final answer tool, but it wasn't required
     if (hasFinalAnswer && !requiredFinalAnswer) {
       this.log(
-        "Required tool not available, setting finalAnswer as required tool"
-      , "warn");
+        "Required tool not available, setting finalAnswer as required tool",
+        "warn"
+      );
       this.requiredToolNames.push("finalAnswer");
       return false;
     }
@@ -491,7 +539,11 @@ export abstract class BaseAgent implements IAgent {
     } as Message);
   }
-  async call(userInput: string | MessageContent[], _messages?: Message[], retryCount = 0) {
+  async call(
+    userInput: string | MessageContent[],
+    _messages?: Message[],
+    retryCount = 0
+  ) {
     if (this.status === this.eventTypes.notStarted) {
       this.status = this.eventTypes.inProgress;
     }
@@ -551,7 +603,6 @@ export abstract class BaseAgent implements IAgent {
         messages,
         "pre_call"
       );
-      const compressThreshold = 30000;
       const response = await this.getClient().createChatCompletion({
         model,
@@ -575,6 +626,7 @@ export abstract class BaseAgent implements IAgent {
       }
       this.adjustTotalCostUsd(response?.usd_cost);
+      this.log("agent response cost: " + response?.usd_cost);
       // Typically, there's only one choice in the array, but you could have many
       // If you set `n` to more than 1, you will get multiple choices
@@ -679,12 +731,14 @@ export abstract class BaseAgent implements IAgent {
       }
       if (
-        this.getMessagesLength(messages) > compressThreshold &&
-        messages.length > 30
+        this.getMessagesLength(messages) > this.getCompressThreshold() &&
+        messages.length > this.compressMinMessages
       ) {
         const taskBreakdown = await this.getTaskBreakdown(messages);
         this.log(
-          `Compressing messages: ${this.getMessagesLength(messages)} exceeds ${compressThreshold}`
+          `Compressing messages: ${this.getMessagesLength(
+            messages
+          )} exceeds ${this.getCompressThreshold()}`
         );
         messages = await this.compressMessages(messages, startIndex, endIndex);
         this.startNewThread(messages);
@@ -713,6 +767,7 @@ export abstract class BaseAgent implements IAgent {
       if (e.toString().includes("429")) {
         this.setNotHealthy();
         return this.call(userInput, _messages, retryCount);
+      }
       const errorStr = e.toString();
       const isNonRetriable =
         errorStr.includes("401") ||
@@ -731,15 +786,15 @@ export abstract class BaseAgent implements IAgent {
       if (isRetriable && retryCount < 3) {
         const delay = 1000 * Math.pow(2, retryCount);
         this.log(
-          `Agent request failed (attempt ${retryCount + 1}/3), retrying in ${delay}ms: ${e.message}`,
+          `Agent request failed (attempt ${
+            retryCount + 1
+          }/3), retrying in ${delay}ms: ${e.message}`,
           "warn"
         );
         await new Promise((resolve) => setTimeout(resolve, delay));
         return this.call(userInput, _messages, retryCount + 1);
       }
-      }
       this.log(`Agent failed: ${e}`, "error");
       if ("response" in e && "data" in e.response) {
@@ -798,11 +853,16 @@ export abstract class BaseAgent implements IAgent {
       details: {
         totalCostUsd: this.getTotalCostUsd(),
         elapsedMs: this.runTime(),
-        remainingTimeMs: this.maxRunTimeMs && this.startTimeMs
-          ? this.maxRunTimeMs - (Date.now() - this.startTimeMs)
+        remainingTimeMs:
+          this.maxRunTimeMs && this.startTimeMs
+            ? this.maxRunTimeMs - (Date.now() - this.startTimeMs)
+            : undefined,
+        remainingTurns: this.maxTurns
+          ? this.maxTurns - this.turnCount
+          : undefined,
+        remainingBudget: this.maxSpend
+          ? this.maxSpend - this.totalCostUsd
           : undefined,
-        remainingTurns: this.maxTurns ? this.maxTurns - this.turnCount : undefined,
-        remainingBudget: this.maxSpend ? this.maxSpend - this.totalCostUsd : undefined,
       },
       timestamp: Date.now(),
     });
@@ -836,19 +896,21 @@ export abstract class BaseAgent implements IAgent {
     }
     const taskPrompt = `
+    Analyze all previous messages.
     Generate a detailed task breakdown for this conversation, include a section for the following:
     1. Task List
     2. Completion Criteria - when the agent should stop
-    This output will be used to guide the work of the agent, and determine when we've accomplished the goal
-    \n\n<ToAnalyze>${JSON.stringify(messages)}</ToAnalyze>`;
+    Your output will be used to guide the work of the agent, and determine when we've accomplished the goal
+    `;
     const model = this.getModel();
     const response = await this.getClient().createChatCompletion({
       model,
       messages: [
+        ...messages,
         {
           role: "user",
           content: taskPrompt,
@@ -859,9 +921,8 @@ export abstract class BaseAgent implements IAgent {
     this.adjustTotalCostUsd(response.usd_cost);
-    this.log(String(response));
     this.taskBreakdown = response.choices[0].message.content;
+    this.log(`task breakdown cost: ${response.usd_cost}`);
     return this.taskBreakdown;
   }
@@ -936,6 +997,7 @@ export abstract class BaseAgent implements IAgent {
       100
     ).toFixed(2);
+    this.log(`compression cost: ${response.usd_cost}`);
     this.log(
       `Compressed messages from ${oldLength} to ${newLength}, ${compressionRatio}% reduction in size`
     );

package/src/chat/CliChatService.ts CHANGED Viewed

@@ -318,7 +318,9 @@ export class CliChatService implements ChatService {
     while (true) {
       // Recompute available commands each iteration so mode changes are reflected in autocomplete
-      const currentCommandNames = this.getCommandsForActiveModes().map((cmd) => `/${cmd.name}`);
+      const currentCommandNames = this.getCommandsForActiveModes().map(
+        (cmd) => `/${cmd.name}`
+      );
       // Check active modes for a promptText first, then fall back to context.promptText, then default
       const activeModeWithPrompt = this.modes

package/src/chat/modules/AgentModule.ts CHANGED Viewed

@@ -556,28 +556,21 @@ Please continue from where you left off and complete the original request.
         return true;
       }
-      // Otherwise start a new agent task
-      // Create initial interaction for the chatHistory
-      const initialInteraction: ChatInteraction = {
-        input,
-        output: "", // Will be filled after agent completion
-        summaries: [],
-        lastThread: [],
-      };
+      context.chatHistory = context.chatHistory || [];
-      const { result, finalOutput } = await this.startAgent(
+      const { taskId } = await this.startAgent(
         context.selectedAgent,
         input,
-        context.chatHistory || []
+        context.chatHistory
       );
-      // Update the chatHistory with the completed interaction
-      if (result && finalOutput) {
-        initialInteraction.output = finalOutput;
-        context.chatHistory.push(initialInteraction);
-      }
+      context.chatHistory.push({
+        input,
+        output: "", // Output will be filled in when the agent responds and the session is updated
+        taskId,
+      });
-      return result;
+      return true;
     }
     return false;
   }
@@ -1005,7 +998,7 @@ Please continue from where you left off and complete the original request.
     selectedAgent: BaseAgent,
     initialInput: string,
     chatHistory: ChatInteraction[] = []
-  ): Promise<{ result: boolean; finalOutput?: string }> {
+  ) {
     try {
       const { agent, taskId, formattedPrompt } = await this.setupAgent({
         agentName: selectedAgent.name,
@@ -1013,15 +1006,12 @@ Please continue from where you left off and complete the original request.
         chatHistory,
         run: false, // Don't run yet, we need to set up event listeners first
       });
-      const result = await this.attachedAgentChatLoop(
-        taskId,
-        agent,
-        formattedPrompt
-      );
-      return result;
+      await this.attachedAgentChatLoop(taskId, agent, formattedPrompt);
+      return { taskId };
     } catch (error) {
       console.error("Error starting agent:", error);
-      return { result: false, finalOutput: "Error starting agent" };
     }
   }
@@ -1029,7 +1019,7 @@ Please continue from where you left off and complete the original request.
     taskId: string,
     agent: AttachableAgent,
     initialInput?: string
-  ): Promise<{ result: boolean; finalOutput?: string }> {
+  ): Promise<void> {
     try {
       let agentFinalOutput: string | undefined;
@@ -1068,6 +1058,11 @@ Please continue from where you left off and complete the original request.
             }
           }
+          if (context.chatHistory) {
+            const found = context.chatHistory.find((h) => h.taskId === taskId);
+            found.output = agentFinalOutput;
+          }
           resolve("done");
           // Exit agent:attached mode so the prompt resets back to the default
           this.detachFromAgent();
@@ -1081,14 +1076,8 @@ Please continue from where you left off and complete the original request.
           taskInfo?.formattedPrompt || taskInfo?.initialInput || initialInput
         );
       }
-      // Return immediately — the main startChatLoop on CliChatService
-      // now handles all user input via the registered agent:attached commands.
-      // Any non-command input is forwarded to the agent via handleInput below.
-      return { result: true, finalOutput: agentFinalOutput };
     } catch (error) {
       console.error("Agent execution failed:", error);
-      return { result: false, finalOutput: "Error during agent execution" };
     }
   }
 }

package/src/chat/types.ts CHANGED Viewed

@@ -21,6 +21,7 @@ export interface ChatContext {
   plugins: string[];
   activeAgentTaskId?: string;
   renderer?: AgentRenderer;
+  chatHistory: ChatInteraction[];
   [key: string]: any;
 }

package/src/cli.ts CHANGED Viewed

@@ -242,14 +242,15 @@ async function main() {
             options.input || "Please continue from where you left off.";
           await agentModule.initialize(chatService);
-          const { taskCompleted } = await agentModule.resumeFromMessages({
-            agentName: options.agentName || "Patcher",
-            input: resumeInput,
-            threads,
-            messageId: options.messageId,
-            taskId: options.taskId,
-          });
-          await taskCompleted;
+          const { taskCompleted: resumed } =
+            await agentModule.resumeFromMessages({
+              agentName: options.agentName || "Patcher",
+              input: resumeInput,
+              threads,
+              messageId: options.messageId,
+              taskId: options.taskId,
+            });
+          await resumed;
           return;
         }
@@ -320,6 +321,7 @@ async function main() {
           plugins: config.plugins.enabled,
           currentModel: options.model,
           currentProvider: options.provider,
+          chatHistory: [],
         });
       } catch (error) {
         console.error("Error asking AI:", error);
@@ -380,7 +382,10 @@ async function main() {
   program
     .command("sessions")
     .description("Manage agent sessions from CLI")
-    .option("--all", "Show all historical sessions (default: current process only)")
+    .option(
+      "--all",
+      "Show all historical sessions (default: current process only)"
+    )
     .option("--csv", "Output sessions as CSV")
     .action(async (options) => {
       try {
@@ -388,7 +393,11 @@ async function main() {
         await agentModule.initialize(chatService);
         const sessionsModule = new SessionsModule(agentModule);
         await sessionsModule.initialize(chatService);
-        await sessionsModule.logSessionTable(options.all || false, options.csv || false, true);
+        await sessionsModule.logSessionTable(
+          options.all || false,
+          options.csv || false,
+          true
+        );
       } catch (error) {
         console.error("Error listing sessions:", error);
         process.exit(1);

package/src/clients/anthropic.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import Anthropic from "@anthropic-ai/sdk";
 import { wait } from "../utils";
 import { AnthropicTextPricing } from "./pricing";
+import { ContextLimits } from "./contextLimits";
 import { Models } from "../types";
 import {
   GenericClient,
@@ -483,6 +484,16 @@ export class GenericAnthropicClient implements GenericClient {
     throw new Error("Anthropic does not support audio transcription");
   }
+  getContextLimit(model: string): { contextLimit: number; threshold: number } | undefined {
+    const contextLimit = ContextLimits[model];
+    if (contextLimit === undefined) return undefined;
+    const pricing = AnthropicTextPricing[model];
+    // If the model has tiered pricing above 200k tokens, use 200k as the threshold
+    const threshold =
+      pricing && "input_gt_200k" in pricing ? 200_000 : contextLimit;
+    return { contextLimit, threshold };
+  }
   async createAudioGeneration(
     options: AudioGenerationOptions
   ): Promise<AudioGenerationResponse> {

package/src/clients/contextLimits.ts ADDED Viewed

@@ -0,0 +1,106 @@
+import { Models, EmbeddingModels } from "../types";
+/**
+ * Context window limits (in tokens) for all supported models.
+ * Sources:
+ * - OpenAI: https://platform.openai.com/docs/models
+ * - Anthropic: https://docs.anthropic.com/en/docs/about-claude/models
+ * - Google: https://ai.google.dev/gemini-api/docs/models
+ * - xAI: https://docs.x.ai/developers/models
+ */
+export const ContextLimits: Record<string, number> = {
+  // ─── OpenAI ───────────────────────────────────────────────────────────────
+  [Models.openai.GPT_54]: 1_000_000,
+  [Models.openai.GPT_54_Mini]: 400_000,
+  [Models.openai.GPT_54_Nano]: 400_000,
+  [Models.openai.GPT_54_Pro]: 1_000_000,
+  [Models.openai.GPT_53_Chat]: 1_000_000,
+  [Models.openai.GPT_53_Codex]: 1_000_000,
+  [Models.openai.GPT_5]: 1_000_000,
+  [Models.openai.GPT_5_Mini]: 1_000_000,
+  [Models.openai.GPT_5_Nano]: 1_000_000,
+  [Models.openai.GPT_5_1]: 1_000_000,
+  [Models.openai.GPT_5_2]: 1_000_000,
+  [Models.openai.GPT_41]: 1_047_576,
+  [Models.openai.GPT_41_Mini]: 1_047_576,
+  [Models.openai.GPT_41_Nano]: 1_047_576,
+  [Models.openai.GPT_45]: 128_000,
+  [Models.openai.GPT_4o]: 128_000,
+  [Models.openai.GPT_4o_Mini]: 128_000,
+  [Models.openai.GPT_4o_Audio]: 128_000,
+  [Models.openai.GPT_4o_Realtime]: 128_000,
+  [Models.openai.GPT_4o_Mini_Audio]: 128_000,
+  [Models.openai.GPT_4o_Mini_Realtime]: 128_000,
+  [Models.openai.GPT_4o_Mini_Search]: 128_000,
+  [Models.openai.GPT_4o_Search]: 128_000,
+  [Models.openai.o1]: 200_000,
+  [Models.openai.o1_Mini]: 128_000,
+  [Models.openai.o1_Pro]: 200_000,
+  [Models.openai.o3]: 200_000,
+  [Models.openai.o3_Pro]: 200_000,
+  [Models.openai.o3_Mini]: 200_000,
+  [Models.openai.o4_Mini]: 200_000,
+  // ─── Anthropic ────────────────────────────────────────────────────────────
+  [Models.anthropic.Opus4_6]: 1_000_000,
+  [Models.anthropic.Sonnet4_6]: 1_000_000,
+  [Models.anthropic.Opus4_5]: 1_000_000,
+  [Models.anthropic.Opus4]: 200_000,
+  [Models.anthropic.Opus4_1]: 200_000,
+  [Models.anthropic.Sonnet4]: 200_000,
+  [Models.anthropic.Sonnet4_5]: 200_000,
+  [Models.anthropic.Haiku4_5]: 200_000,
+  [Models.anthropic.Sonnet3_7]: 200_000,
+  [Models.anthropic.Sonnet3_5]: 200_000,
+  [Models.anthropic.Haiku3_5]: 200_000,
+  [Models.anthropic.Opus3]: 200_000,
+  [Models.anthropic.Haiku3]: 200_000,
+  // ─── Google ───────────────────────────────────────────────────────────────
+  [Models.google.Gemini_31_Pro_Preview]: 1_000_000,
+  [Models.google.Gemini_31_Flash_Image_Preview]: 1_000_000,
+  [Models.google.Gemini_31_Flash_Lite_Preview]: 1_000_000,
+  [Models.google.Gemini_3_Flash_Preview]: 1_000_000,
+  [Models.google.Gemini_3_Pro_Image_Preview]: 1_000_000,
+  [Models.google.Gemini_25_Pro]: 1_000_000,
+  [Models.google.Gemini_25_Flash]: 1_000_000,
+  [Models.google.Gemini_25_Flash_Lite]: 1_000_000,
+  [Models.google.Gemini_25_Flash_Preview]: 1_000_000,
+  [Models.google.Gemini_25_Pro_Preview]: 1_000_000,
+  [Models.google.Gemini_25_Flash_Image]: 1_000_000,
+  [Models.google.Gemini_25_Flash_Live]: 1_000_000,
+  [Models.google.Gemini_25_Flash_Native_Audio]: 1_000_000,
+  [Models.google.Gemini_25_Flash_TTS]: 1_000_000,
+  [Models.google.Gemini_25_Pro_TTS]: 1_000_000,
+  [Models.google.Gemini_20_Flash]: 1_000_000,
+  [Models.google.Gemini_20_Flash_Preview_Image_Generation]: 1_000_000,
+  [Models.google.Gemini_20_Flash_Lite]: 1_000_000,
+  [Models.google.Gemini_20_Flash_Live]: 1_000_000,
+  [Models.google.Gemini_20_Flash_TTS]: 1_000_000,
+  [Models.google.Gemini_15_Flash]: 1_000_000,
+  [Models.google.Gemini_15_Flash_8B]: 1_000_000,
+  [Models.google.Gemini_15_Pro]: 2_000_000,
+  // ─── xAI ──────────────────────────────────────────────────────────────────
+  [Models.xai.Grok4_1_Fast_Reasoning]: 2_000_000,
+  [Models.xai.Grok4_1_Fast_NonReasoning]: 2_000_000,
+  [Models.xai.GrokCodeFast]: 2_000_000,
+  [Models.xai.Grok4]: 131_072,
+  [Models.xai.Grok3Beta]: 131_072,
+  [Models.xai.Grok3MiniBeta]: 131_072,
+  [Models.xai.Grok3FastBeta]: 131_072,
+  [Models.xai.Grok3MiniFastBeta]: 131_072,
+  [Models.xai.Grok21212]: 131_072,
+  [Models.xai.Grok2Vision1212]: 131_072,
+};
+/** Default fallback context window limit (tokens) used when a model is not found. */
+export const DEFAULT_CONTEXT_LIMIT = 30_000;
+/**
+ * Returns the context window limit (in tokens) for a given model.
+ * Falls back to DEFAULT_CONTEXT_LIMIT if the model is not recognized.
+ */
+export function getModelContextLimit(model: string): number {
+  return ContextLimits[model] ?? DEFAULT_CONTEXT_LIMIT;
+}

package/src/clients/gemini.ts CHANGED Viewed

@@ -15,6 +15,7 @@ import * as pathSync from "path";
 import { wait } from "../utils";
 import { EmbeddingModels, Models } from "../types";
 import { GeminiTextPricing } from "./pricing";
+import { ContextLimits } from "./contextLimits";
 import {
   GenericClient,
@@ -916,4 +917,14 @@ export class GenericGeminiClient implements GenericClient {
       throw error;
     }
   }
+  getContextLimit(model: string): { contextLimit: number; threshold: number } | undefined {
+    const contextLimit = ContextLimits[model];
+    if (contextLimit === undefined) return undefined;
+    const pricing = GeminiTextPricing[model];
+    // If the model has tiered pricing above 200k tokens, use 200k as the threshold
+    const threshold =
+      pricing && "input_gt_200k" in pricing ? 200_000 : contextLimit;
+    return { contextLimit, threshold };
+  }
 }

package/src/clients/index.ts CHANGED Viewed

@@ -39,6 +39,7 @@ import {
 import { GenericXAIClient } from "./xai";
 import { KnowhowGenericClient } from "./knowhow";
 import { loadKnowhowJwt } from "../services/KnowhowClient";
+import { ContextLimits } from "./contextLimits";
 export type ModelModality = "completion" | "embedding" | "image" | "audio" | "video";
@@ -608,6 +609,25 @@ export class AIClient {
   listAllVideoModels() {
     return this.videoModels;
   }
+  /**
+   * Returns the context window limit (in tokens) for a given model.
+   * Optionally accepts a provider for future provider-specific overrides.
+   * Delegates to the registered client's getContextLimit() if available,
+   * so custom clients can provide their own context limits.
+   * Returns undefined if neither the client nor the global ContextLimits table knows the model.
+   */
+  getContextLimit(provider: string, model: string): { contextLimit: number; threshold: number } | undefined {
+    // Try the registered client first
+    const client = this.clients[provider];
+    if (client?.getContextLimit) {
+      return client.getContextLimit(model);
+    }
+    // Fall back to the global ContextLimits table
+    const contextLimit = ContextLimits[model];
+    if (contextLimit === undefined) return undefined;
+    return { contextLimit, threshold: contextLimit };
+  }
 }
 export const Clients = new AIClient();
@@ -619,5 +639,6 @@ export * from "./openai";
 export * from "./anthropic";
 export * from "./knowhow";
 export * from "./gemini";
+export * from "./contextLimits";
 export * from "./xai";
 export * from "./knowhowMcp";