@tyvm/knowhow 0.0.85 → 0.0.86

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/package.json +1 -1
  2. package/src/agents/base/base.ts +99 -37
  3. package/src/chat/CliChatService.ts +3 -1
  4. package/src/chat/modules/AgentModule.ts +20 -31
  5. package/src/chat/types.ts +1 -0
  6. package/src/cli.ts +19 -10
  7. package/src/clients/anthropic.ts +11 -0
  8. package/src/clients/contextLimits.ts +106 -0
  9. package/src/clients/gemini.ts +11 -0
  10. package/src/clients/index.ts +21 -0
  11. package/src/clients/openai.ts +11 -0
  12. package/src/clients/pricing/anthropic.ts +0 -4
  13. package/src/clients/types.ts +8 -0
  14. package/src/clients/xai.ts +11 -0
  15. package/src/types.ts +1 -2
  16. package/ts_build/package.json +1 -1
  17. package/ts_build/src/agents/base/base.d.ts +4 -0
  18. package/ts_build/src/agents/base/base.js +53 -28
  19. package/ts_build/src/agents/base/base.js.map +1 -1
  20. package/ts_build/src/chat/CliChatService.js.map +1 -1
  21. package/ts_build/src/chat/modules/AgentModule.d.ts +1 -4
  22. package/ts_build/src/chat/modules/AgentModule.js +12 -15
  23. package/ts_build/src/chat/modules/AgentModule.js.map +1 -1
  24. package/ts_build/src/chat/types.d.ts +1 -0
  25. package/ts_build/src/cli.js +3 -2
  26. package/ts_build/src/cli.js.map +1 -1
  27. package/ts_build/src/clients/anthropic.d.ts +8 -4
  28. package/ts_build/src/clients/anthropic.js +9 -0
  29. package/ts_build/src/clients/anthropic.js.map +1 -1
  30. package/ts_build/src/clients/contextLimits.d.ts +3 -0
  31. package/ts_build/src/clients/contextLimits.js +88 -0
  32. package/ts_build/src/clients/contextLimits.js.map +1 -0
  33. package/ts_build/src/clients/gemini.d.ts +4 -0
  34. package/ts_build/src/clients/gemini.js +9 -0
  35. package/ts_build/src/clients/gemini.js.map +1 -1
  36. package/ts_build/src/clients/index.d.ts +5 -0
  37. package/ts_build/src/clients/index.js +12 -0
  38. package/ts_build/src/clients/index.js.map +1 -1
  39. package/ts_build/src/clients/openai.d.ts +4 -0
  40. package/ts_build/src/clients/openai.js +9 -0
  41. package/ts_build/src/clients/openai.js.map +1 -1
  42. package/ts_build/src/clients/pricing/anthropic.d.ts +4 -4
  43. package/ts_build/src/clients/pricing/anthropic.js +0 -4
  44. package/ts_build/src/clients/pricing/anthropic.js.map +1 -1
  45. package/ts_build/src/clients/types.d.ts +4 -0
  46. package/ts_build/src/clients/xai.d.ts +4 -0
  47. package/ts_build/src/clients/xai.js +9 -0
  48. package/ts_build/src/clients/xai.js.map +1 -1
  49. package/ts_build/src/types.d.ts +1 -2
  50. package/ts_build/src/types.js.map +1 -1
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tyvm/knowhow",
3
- "version": "0.0.85",
3
+ "version": "0.0.86",
4
4
  "description": "ai cli with plugins and agents",
5
5
  "main": "ts_build/src/index.js",
6
6
  "bin": {
@@ -1,4 +1,4 @@
1
- import { EventEmitter } from "events"; // kept for reference; agentEvents now uses EventService
1
+ import { EventEmitter } from "events"; // kept for reference; agentEvents now uses EventService
2
2
  import {
3
3
  GenericClient,
4
4
  Message,
@@ -18,6 +18,7 @@ import { EventService } from "../../services/EventService";
18
18
  import { AIClient, Clients } from "../../clients";
19
19
  import { Models } from "../../ai";
20
20
  import { MessageProcessor } from "../../services/MessageProcessor";
21
+ import { DEFAULT_CONTEXT_LIMIT } from "../../clients/contextLimits";
21
22
  import { Marked } from "../../utils";
22
23
 
23
24
  export { Message, Tool, ToolCall };
@@ -58,6 +59,10 @@ export abstract class BaseAgent implements IAgent {
58
59
  protected turnCount = 0;
59
60
  protected totalCostUsd = 0;
60
61
  protected currentThread = 0;
62
+
63
+ protected compressThreshold = 30000;
64
+ protected compressMinMessages = 30;
65
+
61
66
  protected threads = [] as Message[][];
62
67
  protected pendingUserMessages = [] as Message[];
63
68
  protected taskBreakdown = "";
@@ -109,21 +114,30 @@ export abstract class BaseAgent implements IAgent {
109
114
 
110
115
  // Subscribe to "agent:msg" events for dynamic context loading
111
116
  // Use setListener with a key so re-creating the agent doesn't double-subscribe
112
- this.events.setListener({ key: `agent:msg:${this.constructor.name}`, event: this.eventTypes.agentMsg }, (eventData: any) => {
113
- if (
114
- this.status === this.eventTypes.inProgress ||
115
- this.status === this.eventTypes.pause
116
- ) {
117
- const message = {
118
- role: "user",
119
- content: JSON.stringify(eventData),
120
- } as Message;
121
- this.addPendingMessage(message);
117
+ this.events.setListener(
118
+ {
119
+ key: `agent:msg:${this.constructor.name}`,
120
+ event: this.eventTypes.agentMsg,
121
+ },
122
+ (eventData: any) => {
123
+ if (
124
+ this.status === this.eventTypes.inProgress ||
125
+ this.status === this.eventTypes.pause
126
+ ) {
127
+ const message = {
128
+ role: "user",
129
+ content: JSON.stringify(eventData),
130
+ } as Message;
131
+ this.addPendingMessage(message);
132
+ }
122
133
  }
123
- });
134
+ );
124
135
  }
125
136
 
126
- protected log(message: string, level: "info" | "warn" | "error" = "info"): void {
137
+ protected log(
138
+ message: string,
139
+ level: "info" | "warn" | "error" = "info"
140
+ ): void {
127
141
  this.agentEvents.emit(this.eventTypes.agentLog, {
128
142
  agentName: this.name,
129
143
  message,
@@ -133,6 +147,29 @@ export abstract class BaseAgent implements IAgent {
133
147
  });
134
148
  }
135
149
 
150
+ setCompressThreshold(threshold: number) {
151
+ this.compressThreshold = threshold;
152
+ }
153
+
154
+ /**
155
+ * Returns the effective compress threshold for the current model.
156
+ * If the user has manually set a custom threshold (different from the default 30k),
157
+ * that value is used as-is. Otherwise, the threshold is dynamically computed as
158
+ * 85% of the model's context window limit, falling back to DEFAULT_CONTEXT_LIMIT.
159
+ */
160
+ getCompressThreshold(): number {
161
+ if (this.compressThreshold !== DEFAULT_CONTEXT_LIMIT) {
162
+ return this.compressThreshold;
163
+ }
164
+ const result = this.clientService.getContextLimit(
165
+ this.getProvider() as string,
166
+ this.getModel()
167
+ );
168
+ const contextLimit = result?.contextLimit ?? DEFAULT_CONTEXT_LIMIT;
169
+ const threshold = result?.threshold ?? contextLimit;
170
+ return Math.floor(threshold * 0.85);
171
+ }
172
+
136
173
  setMaxTurns(maxTurns: number | null) {
137
174
  this.maxTurns = maxTurns;
138
175
  }
@@ -259,7 +296,10 @@ export abstract class BaseAgent implements IAgent {
259
296
  private checkLimits(): boolean {
260
297
  // Check turn limit
261
298
  if (this.maxTurns !== null && this.turnCount >= this.maxTurns) {
262
- this.log(`Turn limit reached: ${this.turnCount}/${this.maxTurns}`, "warn");
299
+ this.log(
300
+ `Turn limit reached: ${this.turnCount}/${this.maxTurns}`,
301
+ "warn"
302
+ );
263
303
  return true;
264
304
  }
265
305
 
@@ -268,8 +308,9 @@ export abstract class BaseAgent implements IAgent {
268
308
  this.log(
269
309
  `Spend limit reached: $${this.totalCostUsd.toFixed(
270
310
  4
271
- )}/$${this.maxSpend.toFixed(4)}`
272
- , "warn");
311
+ )}/$${this.maxSpend.toFixed(4)}`,
312
+ "warn"
313
+ );
273
314
  return true;
274
315
  }
275
316
 
@@ -333,7 +374,9 @@ export abstract class BaseAgent implements IAgent {
333
374
  return this.summaries;
334
375
  }
335
376
 
336
- abstract getInitialMessages(userInput: string | MessageContent[]): Promise<Message[]>;
377
+ abstract getInitialMessages(
378
+ userInput: string | MessageContent[]
379
+ ): Promise<Message[]>;
337
380
 
338
381
  async processToolMessages(toolCall: ToolCall) {
339
382
  this.agentEvents.emit(this.eventTypes.toolCall, { toolCall });
@@ -431,7 +474,11 @@ export abstract class BaseAgent implements IAgent {
431
474
  }
432
475
 
433
476
  this.log(
434
- `Required tool: [${this.requiredToolNames}] not available, checking for finalAnswer. Enabled: ${this.getEnabledToolNames().join(", ")}`
477
+ `Required tool: [${
478
+ this.requiredToolNames
479
+ }] not available, checking for finalAnswer. Enabled: ${this.getEnabledToolNames().join(
480
+ ", "
481
+ )}`
435
482
  );
436
483
 
437
484
  // Otherwise we're missing the required tool, lets use finalAnswer if we have it
@@ -442,8 +489,9 @@ export abstract class BaseAgent implements IAgent {
442
489
  // We have the final answer tool, but it wasn't required
443
490
  if (hasFinalAnswer && !requiredFinalAnswer) {
444
491
  this.log(
445
- "Required tool not available, setting finalAnswer as required tool"
446
- , "warn");
492
+ "Required tool not available, setting finalAnswer as required tool",
493
+ "warn"
494
+ );
447
495
  this.requiredToolNames.push("finalAnswer");
448
496
  return false;
449
497
  }
@@ -491,7 +539,11 @@ export abstract class BaseAgent implements IAgent {
491
539
  } as Message);
492
540
  }
493
541
 
494
- async call(userInput: string | MessageContent[], _messages?: Message[], retryCount = 0) {
542
+ async call(
543
+ userInput: string | MessageContent[],
544
+ _messages?: Message[],
545
+ retryCount = 0
546
+ ) {
495
547
  if (this.status === this.eventTypes.notStarted) {
496
548
  this.status = this.eventTypes.inProgress;
497
549
  }
@@ -551,7 +603,6 @@ export abstract class BaseAgent implements IAgent {
551
603
  messages,
552
604
  "pre_call"
553
605
  );
554
- const compressThreshold = 30000;
555
606
 
556
607
  const response = await this.getClient().createChatCompletion({
557
608
  model,
@@ -575,6 +626,7 @@ export abstract class BaseAgent implements IAgent {
575
626
  }
576
627
 
577
628
  this.adjustTotalCostUsd(response?.usd_cost);
629
+ this.log("agent response cost: " + response?.usd_cost);
578
630
 
579
631
  // Typically, there's only one choice in the array, but you could have many
580
632
  // If you set `n` to more than 1, you will get multiple choices
@@ -679,12 +731,14 @@ export abstract class BaseAgent implements IAgent {
679
731
  }
680
732
 
681
733
  if (
682
- this.getMessagesLength(messages) > compressThreshold &&
683
- messages.length > 30
734
+ this.getMessagesLength(messages) > this.getCompressThreshold() &&
735
+ messages.length > this.compressMinMessages
684
736
  ) {
685
737
  const taskBreakdown = await this.getTaskBreakdown(messages);
686
738
  this.log(
687
- `Compressing messages: ${this.getMessagesLength(messages)} exceeds ${compressThreshold}`
739
+ `Compressing messages: ${this.getMessagesLength(
740
+ messages
741
+ )} exceeds ${this.getCompressThreshold()}`
688
742
  );
689
743
  messages = await this.compressMessages(messages, startIndex, endIndex);
690
744
  this.startNewThread(messages);
@@ -713,6 +767,7 @@ export abstract class BaseAgent implements IAgent {
713
767
  if (e.toString().includes("429")) {
714
768
  this.setNotHealthy();
715
769
  return this.call(userInput, _messages, retryCount);
770
+ }
716
771
  const errorStr = e.toString();
717
772
  const isNonRetriable =
718
773
  errorStr.includes("401") ||
@@ -731,15 +786,15 @@ export abstract class BaseAgent implements IAgent {
731
786
  if (isRetriable && retryCount < 3) {
732
787
  const delay = 1000 * Math.pow(2, retryCount);
733
788
  this.log(
734
- `Agent request failed (attempt ${retryCount + 1}/3), retrying in ${delay}ms: ${e.message}`,
789
+ `Agent request failed (attempt ${
790
+ retryCount + 1
791
+ }/3), retrying in ${delay}ms: ${e.message}`,
735
792
  "warn"
736
793
  );
737
794
  await new Promise((resolve) => setTimeout(resolve, delay));
738
795
  return this.call(userInput, _messages, retryCount + 1);
739
796
  }
740
797
 
741
- }
742
-
743
798
  this.log(`Agent failed: ${e}`, "error");
744
799
 
745
800
  if ("response" in e && "data" in e.response) {
@@ -798,11 +853,16 @@ export abstract class BaseAgent implements IAgent {
798
853
  details: {
799
854
  totalCostUsd: this.getTotalCostUsd(),
800
855
  elapsedMs: this.runTime(),
801
- remainingTimeMs: this.maxRunTimeMs && this.startTimeMs
802
- ? this.maxRunTimeMs - (Date.now() - this.startTimeMs)
856
+ remainingTimeMs:
857
+ this.maxRunTimeMs && this.startTimeMs
858
+ ? this.maxRunTimeMs - (Date.now() - this.startTimeMs)
859
+ : undefined,
860
+ remainingTurns: this.maxTurns
861
+ ? this.maxTurns - this.turnCount
862
+ : undefined,
863
+ remainingBudget: this.maxSpend
864
+ ? this.maxSpend - this.totalCostUsd
803
865
  : undefined,
804
- remainingTurns: this.maxTurns ? this.maxTurns - this.turnCount : undefined,
805
- remainingBudget: this.maxSpend ? this.maxSpend - this.totalCostUsd : undefined,
806
866
  },
807
867
  timestamp: Date.now(),
808
868
  });
@@ -836,19 +896,21 @@ export abstract class BaseAgent implements IAgent {
836
896
  }
837
897
 
838
898
  const taskPrompt = `
899
+ Analyze all previous messages.
900
+
839
901
  Generate a detailed task breakdown for this conversation, include a section for the following:
840
902
  1. Task List
841
903
  2. Completion Criteria - when the agent should stop
842
904
 
843
- This output will be used to guide the work of the agent, and determine when we've accomplished the goal
844
-
845
- \n\n<ToAnalyze>${JSON.stringify(messages)}</ToAnalyze>`;
905
+ Your output will be used to guide the work of the agent, and determine when we've accomplished the goal
906
+ `;
846
907
 
847
908
  const model = this.getModel();
848
909
 
849
910
  const response = await this.getClient().createChatCompletion({
850
911
  model,
851
912
  messages: [
913
+ ...messages,
852
914
  {
853
915
  role: "user",
854
916
  content: taskPrompt,
@@ -859,9 +921,8 @@ export abstract class BaseAgent implements IAgent {
859
921
 
860
922
  this.adjustTotalCostUsd(response.usd_cost);
861
923
 
862
- this.log(String(response));
863
-
864
924
  this.taskBreakdown = response.choices[0].message.content;
925
+ this.log(`task breakdown cost: ${response.usd_cost}`);
865
926
  return this.taskBreakdown;
866
927
  }
867
928
 
@@ -936,6 +997,7 @@ export abstract class BaseAgent implements IAgent {
936
997
  100
937
998
  ).toFixed(2);
938
999
 
1000
+ this.log(`compression cost: ${response.usd_cost}`);
939
1001
  this.log(
940
1002
  `Compressed messages from ${oldLength} to ${newLength}, ${compressionRatio}% reduction in size`
941
1003
  );
@@ -318,7 +318,9 @@ export class CliChatService implements ChatService {
318
318
 
319
319
  while (true) {
320
320
  // Recompute available commands each iteration so mode changes are reflected in autocomplete
321
- const currentCommandNames = this.getCommandsForActiveModes().map((cmd) => `/${cmd.name}`);
321
+ const currentCommandNames = this.getCommandsForActiveModes().map(
322
+ (cmd) => `/${cmd.name}`
323
+ );
322
324
 
323
325
  // Check active modes for a promptText first, then fall back to context.promptText, then default
324
326
  const activeModeWithPrompt = this.modes
@@ -556,28 +556,21 @@ Please continue from where you left off and complete the original request.
556
556
  return true;
557
557
  }
558
558
 
559
- // Otherwise start a new agent task
560
- // Create initial interaction for the chatHistory
561
- const initialInteraction: ChatInteraction = {
562
- input,
563
- output: "", // Will be filled after agent completion
564
- summaries: [],
565
- lastThread: [],
566
- };
559
+ context.chatHistory = context.chatHistory || [];
567
560
 
568
- const { result, finalOutput } = await this.startAgent(
561
+ const { taskId } = await this.startAgent(
569
562
  context.selectedAgent,
570
563
  input,
571
- context.chatHistory || []
564
+ context.chatHistory
572
565
  );
573
566
 
574
- // Update the chatHistory with the completed interaction
575
- if (result && finalOutput) {
576
- initialInteraction.output = finalOutput;
577
- context.chatHistory.push(initialInteraction);
578
- }
567
+ context.chatHistory.push({
568
+ input,
569
+ output: "", // Output will be filled in when the agent responds and the session is updated
570
+ taskId,
571
+ });
579
572
 
580
- return result;
573
+ return true;
581
574
  }
582
575
  return false;
583
576
  }
@@ -1005,7 +998,7 @@ Please continue from where you left off and complete the original request.
1005
998
  selectedAgent: BaseAgent,
1006
999
  initialInput: string,
1007
1000
  chatHistory: ChatInteraction[] = []
1008
- ): Promise<{ result: boolean; finalOutput?: string }> {
1001
+ ) {
1009
1002
  try {
1010
1003
  const { agent, taskId, formattedPrompt } = await this.setupAgent({
1011
1004
  agentName: selectedAgent.name,
@@ -1013,15 +1006,12 @@ Please continue from where you left off and complete the original request.
1013
1006
  chatHistory,
1014
1007
  run: false, // Don't run yet, we need to set up event listeners first
1015
1008
  });
1016
- const result = await this.attachedAgentChatLoop(
1017
- taskId,
1018
- agent,
1019
- formattedPrompt
1020
- );
1021
- return result;
1009
+
1010
+ await this.attachedAgentChatLoop(taskId, agent, formattedPrompt);
1011
+
1012
+ return { taskId };
1022
1013
  } catch (error) {
1023
1014
  console.error("Error starting agent:", error);
1024
- return { result: false, finalOutput: "Error starting agent" };
1025
1015
  }
1026
1016
  }
1027
1017
 
@@ -1029,7 +1019,7 @@ Please continue from where you left off and complete the original request.
1029
1019
  taskId: string,
1030
1020
  agent: AttachableAgent,
1031
1021
  initialInput?: string
1032
- ): Promise<{ result: boolean; finalOutput?: string }> {
1022
+ ): Promise<void> {
1033
1023
  try {
1034
1024
  let agentFinalOutput: string | undefined;
1035
1025
 
@@ -1068,6 +1058,11 @@ Please continue from where you left off and complete the original request.
1068
1058
  }
1069
1059
  }
1070
1060
 
1061
+ if (context.chatHistory) {
1062
+ const found = context.chatHistory.find((h) => h.taskId === taskId);
1063
+ found.output = agentFinalOutput;
1064
+ }
1065
+
1071
1066
  resolve("done");
1072
1067
  // Exit agent:attached mode so the prompt resets back to the default
1073
1068
  this.detachFromAgent();
@@ -1081,14 +1076,8 @@ Please continue from where you left off and complete the original request.
1081
1076
  taskInfo?.formattedPrompt || taskInfo?.initialInput || initialInput
1082
1077
  );
1083
1078
  }
1084
-
1085
- // Return immediately — the main startChatLoop on CliChatService
1086
- // now handles all user input via the registered agent:attached commands.
1087
- // Any non-command input is forwarded to the agent via handleInput below.
1088
- return { result: true, finalOutput: agentFinalOutput };
1089
1079
  } catch (error) {
1090
1080
  console.error("Agent execution failed:", error);
1091
- return { result: false, finalOutput: "Error during agent execution" };
1092
1081
  }
1093
1082
  }
1094
1083
  }
package/src/chat/types.ts CHANGED
@@ -21,6 +21,7 @@ export interface ChatContext {
21
21
  plugins: string[];
22
22
  activeAgentTaskId?: string;
23
23
  renderer?: AgentRenderer;
24
+ chatHistory: ChatInteraction[];
24
25
 
25
26
  [key: string]: any;
26
27
  }
package/src/cli.ts CHANGED
@@ -242,14 +242,15 @@ async function main() {
242
242
  options.input || "Please continue from where you left off.";
243
243
 
244
244
  await agentModule.initialize(chatService);
245
- const { taskCompleted } = await agentModule.resumeFromMessages({
246
- agentName: options.agentName || "Patcher",
247
- input: resumeInput,
248
- threads,
249
- messageId: options.messageId,
250
- taskId: options.taskId,
251
- });
252
- await taskCompleted;
245
+ const { taskCompleted: resumed } =
246
+ await agentModule.resumeFromMessages({
247
+ agentName: options.agentName || "Patcher",
248
+ input: resumeInput,
249
+ threads,
250
+ messageId: options.messageId,
251
+ taskId: options.taskId,
252
+ });
253
+ await resumed;
253
254
  return;
254
255
  }
255
256
 
@@ -320,6 +321,7 @@ async function main() {
320
321
  plugins: config.plugins.enabled,
321
322
  currentModel: options.model,
322
323
  currentProvider: options.provider,
324
+ chatHistory: [],
323
325
  });
324
326
  } catch (error) {
325
327
  console.error("Error asking AI:", error);
@@ -380,7 +382,10 @@ async function main() {
380
382
  program
381
383
  .command("sessions")
382
384
  .description("Manage agent sessions from CLI")
383
- .option("--all", "Show all historical sessions (default: current process only)")
385
+ .option(
386
+ "--all",
387
+ "Show all historical sessions (default: current process only)"
388
+ )
384
389
  .option("--csv", "Output sessions as CSV")
385
390
  .action(async (options) => {
386
391
  try {
@@ -388,7 +393,11 @@ async function main() {
388
393
  await agentModule.initialize(chatService);
389
394
  const sessionsModule = new SessionsModule(agentModule);
390
395
  await sessionsModule.initialize(chatService);
391
- await sessionsModule.logSessionTable(options.all || false, options.csv || false, true);
396
+ await sessionsModule.logSessionTable(
397
+ options.all || false,
398
+ options.csv || false,
399
+ true
400
+ );
392
401
  } catch (error) {
393
402
  console.error("Error listing sessions:", error);
394
403
  process.exit(1);
@@ -1,6 +1,7 @@
1
1
  import Anthropic from "@anthropic-ai/sdk";
2
2
  import { wait } from "../utils";
3
3
  import { AnthropicTextPricing } from "./pricing";
4
+ import { ContextLimits } from "./contextLimits";
4
5
  import { Models } from "../types";
5
6
  import {
6
7
  GenericClient,
@@ -483,6 +484,16 @@ export class GenericAnthropicClient implements GenericClient {
483
484
  throw new Error("Anthropic does not support audio transcription");
484
485
  }
485
486
 
487
+ getContextLimit(model: string): { contextLimit: number; threshold: number } | undefined {
488
+ const contextLimit = ContextLimits[model];
489
+ if (contextLimit === undefined) return undefined;
490
+ const pricing = AnthropicTextPricing[model];
491
+ // If the model has tiered pricing above 200k tokens, use 200k as the threshold
492
+ const threshold =
493
+ pricing && "input_gt_200k" in pricing ? 200_000 : contextLimit;
494
+ return { contextLimit, threshold };
495
+ }
496
+
486
497
  async createAudioGeneration(
487
498
  options: AudioGenerationOptions
488
499
  ): Promise<AudioGenerationResponse> {
@@ -0,0 +1,106 @@
1
+ import { Models, EmbeddingModels } from "../types";
2
+
3
+ /**
4
+ * Context window limits (in tokens) for all supported models.
5
+ * Sources:
6
+ * - OpenAI: https://platform.openai.com/docs/models
7
+ * - Anthropic: https://docs.anthropic.com/en/docs/about-claude/models
8
+ * - Google: https://ai.google.dev/gemini-api/docs/models
9
+ * - xAI: https://docs.x.ai/developers/models
10
+ */
11
+ export const ContextLimits: Record<string, number> = {
12
+ // ─── OpenAI ───────────────────────────────────────────────────────────────
13
+ [Models.openai.GPT_54]: 1_000_000,
14
+ [Models.openai.GPT_54_Mini]: 400_000,
15
+ [Models.openai.GPT_54_Nano]: 400_000,
16
+ [Models.openai.GPT_54_Pro]: 1_000_000,
17
+ [Models.openai.GPT_53_Chat]: 1_000_000,
18
+ [Models.openai.GPT_53_Codex]: 1_000_000,
19
+ [Models.openai.GPT_5]: 1_000_000,
20
+ [Models.openai.GPT_5_Mini]: 1_000_000,
21
+ [Models.openai.GPT_5_Nano]: 1_000_000,
22
+ [Models.openai.GPT_5_1]: 1_000_000,
23
+ [Models.openai.GPT_5_2]: 1_000_000,
24
+ [Models.openai.GPT_41]: 1_047_576,
25
+ [Models.openai.GPT_41_Mini]: 1_047_576,
26
+ [Models.openai.GPT_41_Nano]: 1_047_576,
27
+ [Models.openai.GPT_45]: 128_000,
28
+ [Models.openai.GPT_4o]: 128_000,
29
+ [Models.openai.GPT_4o_Mini]: 128_000,
30
+ [Models.openai.GPT_4o_Audio]: 128_000,
31
+ [Models.openai.GPT_4o_Realtime]: 128_000,
32
+ [Models.openai.GPT_4o_Mini_Audio]: 128_000,
33
+ [Models.openai.GPT_4o_Mini_Realtime]: 128_000,
34
+ [Models.openai.GPT_4o_Mini_Search]: 128_000,
35
+ [Models.openai.GPT_4o_Search]: 128_000,
36
+ [Models.openai.o1]: 200_000,
37
+ [Models.openai.o1_Mini]: 128_000,
38
+ [Models.openai.o1_Pro]: 200_000,
39
+ [Models.openai.o3]: 200_000,
40
+ [Models.openai.o3_Pro]: 200_000,
41
+ [Models.openai.o3_Mini]: 200_000,
42
+ [Models.openai.o4_Mini]: 200_000,
43
+
44
+ // ─── Anthropic ────────────────────────────────────────────────────────────
45
+ [Models.anthropic.Opus4_6]: 1_000_000,
46
+ [Models.anthropic.Sonnet4_6]: 1_000_000,
47
+ [Models.anthropic.Opus4_5]: 1_000_000,
48
+ [Models.anthropic.Opus4]: 200_000,
49
+ [Models.anthropic.Opus4_1]: 200_000,
50
+ [Models.anthropic.Sonnet4]: 200_000,
51
+ [Models.anthropic.Sonnet4_5]: 200_000,
52
+ [Models.anthropic.Haiku4_5]: 200_000,
53
+ [Models.anthropic.Sonnet3_7]: 200_000,
54
+ [Models.anthropic.Sonnet3_5]: 200_000,
55
+ [Models.anthropic.Haiku3_5]: 200_000,
56
+ [Models.anthropic.Opus3]: 200_000,
57
+ [Models.anthropic.Haiku3]: 200_000,
58
+
59
+ // ─── Google ───────────────────────────────────────────────────────────────
60
+ [Models.google.Gemini_31_Pro_Preview]: 1_000_000,
61
+ [Models.google.Gemini_31_Flash_Image_Preview]: 1_000_000,
62
+ [Models.google.Gemini_31_Flash_Lite_Preview]: 1_000_000,
63
+ [Models.google.Gemini_3_Flash_Preview]: 1_000_000,
64
+ [Models.google.Gemini_3_Pro_Image_Preview]: 1_000_000,
65
+ [Models.google.Gemini_25_Pro]: 1_000_000,
66
+ [Models.google.Gemini_25_Flash]: 1_000_000,
67
+ [Models.google.Gemini_25_Flash_Lite]: 1_000_000,
68
+ [Models.google.Gemini_25_Flash_Preview]: 1_000_000,
69
+ [Models.google.Gemini_25_Pro_Preview]: 1_000_000,
70
+ [Models.google.Gemini_25_Flash_Image]: 1_000_000,
71
+ [Models.google.Gemini_25_Flash_Live]: 1_000_000,
72
+ [Models.google.Gemini_25_Flash_Native_Audio]: 1_000_000,
73
+ [Models.google.Gemini_25_Flash_TTS]: 1_000_000,
74
+ [Models.google.Gemini_25_Pro_TTS]: 1_000_000,
75
+ [Models.google.Gemini_20_Flash]: 1_000_000,
76
+ [Models.google.Gemini_20_Flash_Preview_Image_Generation]: 1_000_000,
77
+ [Models.google.Gemini_20_Flash_Lite]: 1_000_000,
78
+ [Models.google.Gemini_20_Flash_Live]: 1_000_000,
79
+ [Models.google.Gemini_20_Flash_TTS]: 1_000_000,
80
+ [Models.google.Gemini_15_Flash]: 1_000_000,
81
+ [Models.google.Gemini_15_Flash_8B]: 1_000_000,
82
+ [Models.google.Gemini_15_Pro]: 2_000_000,
83
+
84
+ // ─── xAI ──────────────────────────────────────────────────────────────────
85
+ [Models.xai.Grok4_1_Fast_Reasoning]: 2_000_000,
86
+ [Models.xai.Grok4_1_Fast_NonReasoning]: 2_000_000,
87
+ [Models.xai.GrokCodeFast]: 2_000_000,
88
+ [Models.xai.Grok4]: 131_072,
89
+ [Models.xai.Grok3Beta]: 131_072,
90
+ [Models.xai.Grok3MiniBeta]: 131_072,
91
+ [Models.xai.Grok3FastBeta]: 131_072,
92
+ [Models.xai.Grok3MiniFastBeta]: 131_072,
93
+ [Models.xai.Grok21212]: 131_072,
94
+ [Models.xai.Grok2Vision1212]: 131_072,
95
+ };
96
+
97
+ /** Default fallback context window limit (tokens) used when a model is not found. */
98
+ export const DEFAULT_CONTEXT_LIMIT = 30_000;
99
+
100
+ /**
101
+ * Returns the context window limit (in tokens) for a given model.
102
+ * Falls back to DEFAULT_CONTEXT_LIMIT if the model is not recognized.
103
+ */
104
+ export function getModelContextLimit(model: string): number {
105
+ return ContextLimits[model] ?? DEFAULT_CONTEXT_LIMIT;
106
+ }
@@ -15,6 +15,7 @@ import * as pathSync from "path";
15
15
  import { wait } from "../utils";
16
16
  import { EmbeddingModels, Models } from "../types";
17
17
  import { GeminiTextPricing } from "./pricing";
18
+ import { ContextLimits } from "./contextLimits";
18
19
 
19
20
  import {
20
21
  GenericClient,
@@ -916,4 +917,14 @@ export class GenericGeminiClient implements GenericClient {
916
917
  throw error;
917
918
  }
918
919
  }
920
+
921
+ getContextLimit(model: string): { contextLimit: number; threshold: number } | undefined {
922
+ const contextLimit = ContextLimits[model];
923
+ if (contextLimit === undefined) return undefined;
924
+ const pricing = GeminiTextPricing[model];
925
+ // If the model has tiered pricing above 200k tokens, use 200k as the threshold
926
+ const threshold =
927
+ pricing && "input_gt_200k" in pricing ? 200_000 : contextLimit;
928
+ return { contextLimit, threshold };
929
+ }
919
930
  }
@@ -39,6 +39,7 @@ import {
39
39
  import { GenericXAIClient } from "./xai";
40
40
  import { KnowhowGenericClient } from "./knowhow";
41
41
  import { loadKnowhowJwt } from "../services/KnowhowClient";
42
+ import { ContextLimits } from "./contextLimits";
42
43
 
43
44
  export type ModelModality = "completion" | "embedding" | "image" | "audio" | "video";
44
45
 
@@ -608,6 +609,25 @@ export class AIClient {
608
609
  listAllVideoModels() {
609
610
  return this.videoModels;
610
611
  }
612
+
613
+ /**
614
+ * Returns the context window limit (in tokens) for a given model.
615
+ * Optionally accepts a provider for future provider-specific overrides.
616
+ * Delegates to the registered client's getContextLimit() if available,
617
+ * so custom clients can provide their own context limits.
618
+ * Returns undefined if neither the client nor the global ContextLimits table knows the model.
619
+ */
620
+ getContextLimit(provider: string, model: string): { contextLimit: number; threshold: number } | undefined {
621
+ // Try the registered client first
622
+ const client = this.clients[provider];
623
+ if (client?.getContextLimit) {
624
+ return client.getContextLimit(model);
625
+ }
626
+ // Fall back to the global ContextLimits table
627
+ const contextLimit = ContextLimits[model];
628
+ if (contextLimit === undefined) return undefined;
629
+ return { contextLimit, threshold: contextLimit };
630
+ }
611
631
  }
612
632
 
613
633
  export const Clients = new AIClient();
@@ -619,5 +639,6 @@ export * from "./openai";
619
639
  export * from "./anthropic";
620
640
  export * from "./knowhow";
621
641
  export * from "./gemini";
642
+ export * from "./contextLimits";
622
643
  export * from "./xai";
623
644
  export * from "./knowhowMcp";