llmist 16.2.4 → 17.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -7328,11 +7328,14 @@ declare class CompactionManager {
7328
7328
  private readonly model;
7329
7329
  private readonly config;
7330
7330
  private readonly strategy;
7331
+ private readonly logger;
7331
7332
  private modelLimits?;
7333
+ private hasWarnedModelNotFound;
7334
+ private hasWarnedNoTokenCounting;
7332
7335
  private totalCompactions;
7333
7336
  private totalTokensSaved;
7334
7337
  private lastTokenCount;
7335
- constructor(client: LLMist, model: string, config?: CompactionConfig);
7338
+ constructor(client: LLMist, model: string, config?: CompactionConfig, logger?: Logger<ILogObj>);
7336
7339
  /**
7337
7340
  * Check if compaction is needed and perform it if so.
7338
7341
  *
@@ -7350,6 +7353,22 @@ declare class CompactionManager {
7350
7353
  * @returns CompactionEvent with compaction details
7351
7354
  */
7352
7355
  compact(conversation: IConversationManager, iteration: number, precomputed?: PrecomputedTokens): Promise<CompactionEvent | null>;
7356
+ /**
7357
+ * Feed API-reported input token count for reactive threshold checking.
7358
+ * Call this after each LLM response with the actual inputTokens from usage.
7359
+ */
7360
+ updateUsage(inputTokens: number): void;
7361
+ /**
7362
+ * Check if compaction should trigger based on API-reported usage.
7363
+ * Unlike checkAndCompact() which uses estimated token counts,
7364
+ * this uses the ground-truth token count from the last LLM response.
7365
+ */
7366
+ shouldCompactFromUsage(): boolean;
7367
+ /**
7368
+ * Resolve and cache model limits from registry. Warns once if not found.
7369
+ * @returns true if limits are available, false otherwise
7370
+ */
7371
+ private resolveModelLimits;
7353
7372
  /**
7354
7373
  * Get compaction statistics.
7355
7374
  */
@@ -8613,20 +8632,16 @@ declare class GadgetCallParser {
8613
8632
  /**
8614
8633
  * Character-to-token ratio for fallback token estimation.
8615
8634
  *
8616
- * Rationale: When native token counting APIs fail, we estimate tokens using
8617
- * a rough heuristic of 4 characters per token. This is based on empirical
8618
- * observations across multiple LLM providers:
8619
- * - OpenAI's GPT models average ~4 chars/token for English text
8620
- * - Anthropic's Claude models have similar characteristics
8621
- * - Gemini models also approximate this ratio
8622
- *
8623
- * This is intentionally conservative to avoid underestimating token usage.
8624
- * While not perfectly accurate, it provides a reasonable fallback when
8625
- * precise tokenization is unavailable.
8635
+ * Used only when tiktoken (the primary fallback) is unavailable. A value of 2
8636
+ * errs on the side of overestimating token count, which is safer for
8637
+ * compaction triggers and output limiting.
8626
8638
  *
8627
- * Reference: https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them
8639
+ * Rationale: The previous value of 4 was based on English prose averages, but
8640
+ * agentic sessions are dominated by JSON, code, and structured data where the
8641
+ * real ratio is ~1.5-2.5 chars/token. A 4-char estimate underestimated tokens
8642
+ * by up to 250%, causing compaction and output limiting to never trigger.
8628
8643
  */
8629
- declare const FALLBACK_CHARS_PER_TOKEN = 4;
8644
+ declare const FALLBACK_CHARS_PER_TOKEN = 2;
8630
8645
 
8631
8646
  /**
8632
8647
  * Subagent creation helper for gadget authors.
@@ -9554,8 +9569,14 @@ declare abstract class OpenAICompatibleProvider<TConfig extends OpenAICompatible
9554
9569
  protected executeStreamRequest(payload: Parameters<OpenAI["chat"]["completions"]["create"]>[0], signal?: AbortSignal): Promise<AsyncIterable<ChatCompletionChunk>>;
9555
9570
  protected normalizeProviderStream(iterable: AsyncIterable<unknown>): LLMStream;
9556
9571
  /**
9557
- * Count tokens using character-based fallback estimation.
9558
- * Most meta-providers don't have a native token counting API.
9572
+ * Count tokens using tiktoken o200k_base encoding.
9573
+ *
9574
+ * While o200k_base isn't model-exact for non-OpenAI models routed through
9575
+ * meta-providers like OpenRouter, BPE tokenizers with 200K vocab produce
9576
+ * counts within 10-20% of true values — far better than the character-based
9577
+ * fallback which can be off by 250% for JSON/code-heavy content.
9578
+ *
9579
+ * Falls back to character-based estimation if tiktoken fails.
9559
9580
  */
9560
9581
  countTokens(messages: LLMMessage[], descriptor: ModelDescriptor, _spec?: ModelSpec): Promise<number>;
9561
9582
  }
package/dist/index.d.ts CHANGED
@@ -7328,11 +7328,14 @@ declare class CompactionManager {
7328
7328
  private readonly model;
7329
7329
  private readonly config;
7330
7330
  private readonly strategy;
7331
+ private readonly logger;
7331
7332
  private modelLimits?;
7333
+ private hasWarnedModelNotFound;
7334
+ private hasWarnedNoTokenCounting;
7332
7335
  private totalCompactions;
7333
7336
  private totalTokensSaved;
7334
7337
  private lastTokenCount;
7335
- constructor(client: LLMist, model: string, config?: CompactionConfig);
7338
+ constructor(client: LLMist, model: string, config?: CompactionConfig, logger?: Logger<ILogObj>);
7336
7339
  /**
7337
7340
  * Check if compaction is needed and perform it if so.
7338
7341
  *
@@ -7350,6 +7353,22 @@ declare class CompactionManager {
7350
7353
  * @returns CompactionEvent with compaction details
7351
7354
  */
7352
7355
  compact(conversation: IConversationManager, iteration: number, precomputed?: PrecomputedTokens): Promise<CompactionEvent | null>;
7356
+ /**
7357
+ * Feed API-reported input token count for reactive threshold checking.
7358
+ * Call this after each LLM response with the actual inputTokens from usage.
7359
+ */
7360
+ updateUsage(inputTokens: number): void;
7361
+ /**
7362
+ * Check if compaction should trigger based on API-reported usage.
7363
+ * Unlike checkAndCompact() which uses estimated token counts,
7364
+ * this uses the ground-truth token count from the last LLM response.
7365
+ */
7366
+ shouldCompactFromUsage(): boolean;
7367
+ /**
7368
+ * Resolve and cache model limits from registry. Warns once if not found.
7369
+ * @returns true if limits are available, false otherwise
7370
+ */
7371
+ private resolveModelLimits;
7353
7372
  /**
7354
7373
  * Get compaction statistics.
7355
7374
  */
@@ -8613,20 +8632,16 @@ declare class GadgetCallParser {
8613
8632
  /**
8614
8633
  * Character-to-token ratio for fallback token estimation.
8615
8634
  *
8616
- * Rationale: When native token counting APIs fail, we estimate tokens using
8617
- * a rough heuristic of 4 characters per token. This is based on empirical
8618
- * observations across multiple LLM providers:
8619
- * - OpenAI's GPT models average ~4 chars/token for English text
8620
- * - Anthropic's Claude models have similar characteristics
8621
- * - Gemini models also approximate this ratio
8622
- *
8623
- * This is intentionally conservative to avoid underestimating token usage.
8624
- * While not perfectly accurate, it provides a reasonable fallback when
8625
- * precise tokenization is unavailable.
8635
+ * Used only when tiktoken (the primary fallback) is unavailable. A value of 2
8636
+ * errs on the side of overestimating token count, which is safer for
8637
+ * compaction triggers and output limiting.
8626
8638
  *
8627
- * Reference: https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them
8639
+ * Rationale: The previous value of 4 was based on English prose averages, but
8640
+ * agentic sessions are dominated by JSON, code, and structured data where the
8641
+ * real ratio is ~1.5-2.5 chars/token. A 4-char estimate underestimated tokens
8642
+ * by up to 250%, causing compaction and output limiting to never trigger.
8628
8643
  */
8629
- declare const FALLBACK_CHARS_PER_TOKEN = 4;
8644
+ declare const FALLBACK_CHARS_PER_TOKEN = 2;
8630
8645
 
8631
8646
  /**
8632
8647
  * Subagent creation helper for gadget authors.
@@ -9554,8 +9569,14 @@ declare abstract class OpenAICompatibleProvider<TConfig extends OpenAICompatible
9554
9569
  protected executeStreamRequest(payload: Parameters<OpenAI["chat"]["completions"]["create"]>[0], signal?: AbortSignal): Promise<AsyncIterable<ChatCompletionChunk>>;
9555
9570
  protected normalizeProviderStream(iterable: AsyncIterable<unknown>): LLMStream;
9556
9571
  /**
9557
- * Count tokens using character-based fallback estimation.
9558
- * Most meta-providers don't have a native token counting API.
9572
+ * Count tokens using tiktoken o200k_base encoding.
9573
+ *
9574
+ * While o200k_base isn't model-exact for non-OpenAI models routed through
9575
+ * meta-providers like OpenRouter, BPE tokenizers with 200K vocab produce
9576
+ * counts within 10-20% of true values — far better than the character-based
9577
+ * fallback which can be off by 250% for JSON/code-heavy content.
9578
+ *
9579
+ * Falls back to character-based estimation if tiktoken fails.
9559
9580
  */
9560
9581
  countTokens(messages: LLMMessage[], descriptor: ModelDescriptor, _spec?: ModelSpec): Promise<number>;
9561
9582
  }
package/dist/index.js CHANGED
@@ -813,7 +813,7 @@ var init_constants = __esm({
813
813
  GADGET_ARG_PREFIX = "!!!ARG:";
814
814
  DEFAULT_GADGET_OUTPUT_LIMIT = true;
815
815
  DEFAULT_GADGET_OUTPUT_LIMIT_PERCENT = 15;
816
- CHARS_PER_TOKEN = 4;
816
+ CHARS_PER_TOKEN = 2;
817
817
  FALLBACK_CONTEXT_WINDOW = 128e3;
818
818
  }
819
819
  });
@@ -2834,6 +2834,7 @@ var CompactionManager;
2834
2834
  var init_manager = __esm({
2835
2835
  "src/agent/compaction/manager.ts"() {
2836
2836
  "use strict";
2837
+ init_logger();
2837
2838
  init_config();
2838
2839
  init_strategies();
2839
2840
  CompactionManager = class {
@@ -2841,15 +2842,19 @@ var init_manager = __esm({
2841
2842
  model;
2842
2843
  config;
2843
2844
  strategy;
2845
+ logger;
2844
2846
  modelLimits;
2847
+ hasWarnedModelNotFound = false;
2848
+ hasWarnedNoTokenCounting = false;
2845
2849
  // Statistics
2846
2850
  totalCompactions = 0;
2847
2851
  totalTokensSaved = 0;
2848
2852
  lastTokenCount = 0;
2849
- constructor(client, model, config = {}) {
2853
+ constructor(client, model, config = {}, logger2) {
2850
2854
  this.client = client;
2851
2855
  this.model = model;
2852
2856
  this.config = resolveCompactionConfig(config);
2857
+ this.logger = logger2 ?? createLogger({ name: "llmist:compaction" });
2853
2858
  if (typeof config.strategy === "object" && "compact" in config.strategy) {
2854
2859
  this.strategy = config.strategy;
2855
2860
  } else {
@@ -2867,13 +2872,16 @@ var init_manager = __esm({
2867
2872
  if (!this.config.enabled) {
2868
2873
  return null;
2869
2874
  }
2870
- if (!this.modelLimits) {
2871
- this.modelLimits = this.client.modelRegistry.getModelLimits(this.model);
2872
- if (!this.modelLimits) {
2873
- return null;
2874
- }
2875
+ if (!this.resolveModelLimits()) {
2876
+ return null;
2875
2877
  }
2876
2878
  if (!this.client.countTokens) {
2879
+ if (!this.hasWarnedNoTokenCounting) {
2880
+ this.hasWarnedNoTokenCounting = true;
2881
+ this.logger.warn("Compaction skipped: client does not support token counting", {
2882
+ model: this.model
2883
+ });
2884
+ }
2877
2885
  return null;
2878
2886
  }
2879
2887
  const messages = conversation.getMessages();
@@ -2904,11 +2912,8 @@ var init_manager = __esm({
2904
2912
  * @returns CompactionEvent with compaction details
2905
2913
  */
2906
2914
  async compact(conversation, iteration, precomputed) {
2907
- if (!this.modelLimits) {
2908
- this.modelLimits = this.client.modelRegistry.getModelLimits(this.model);
2909
- if (!this.modelLimits) {
2910
- return null;
2911
- }
2915
+ if (!this.resolveModelLimits()) {
2916
+ return null;
2912
2917
  }
2913
2918
  const historyMessages = precomputed?.historyMessages ?? conversation.getHistoryMessages();
2914
2919
  const baseMessages = precomputed?.baseMessages ?? conversation.getBaseMessages();
@@ -2950,6 +2955,42 @@ var init_manager = __esm({
2950
2955
  }
2951
2956
  return event;
2952
2957
  }
2958
+ /**
2959
+ * Feed API-reported input token count for reactive threshold checking.
2960
+ * Call this after each LLM response with the actual inputTokens from usage.
2961
+ */
2962
+ updateUsage(inputTokens) {
2963
+ this.lastTokenCount = inputTokens;
2964
+ }
2965
+ /**
2966
+ * Check if compaction should trigger based on API-reported usage.
2967
+ * Unlike checkAndCompact() which uses estimated token counts,
2968
+ * this uses the ground-truth token count from the last LLM response.
2969
+ */
2970
+ shouldCompactFromUsage() {
2971
+ if (!this.config.enabled) return false;
2972
+ if (!this.resolveModelLimits()) return false;
2973
+ const usagePercent = this.lastTokenCount / this.modelLimits.contextWindow * 100;
2974
+ return usagePercent >= this.config.triggerThresholdPercent;
2975
+ }
2976
+ /**
2977
+ * Resolve and cache model limits from registry. Warns once if not found.
2978
+ * @returns true if limits are available, false otherwise
2979
+ */
2980
+ resolveModelLimits() {
2981
+ if (this.modelLimits) return true;
2982
+ this.modelLimits = this.client.modelRegistry.getModelLimits(this.model);
2983
+ if (!this.modelLimits) {
2984
+ if (!this.hasWarnedModelNotFound) {
2985
+ this.hasWarnedModelNotFound = true;
2986
+ this.logger.warn("Compaction skipped: model not found in registry", {
2987
+ model: this.model
2988
+ });
2989
+ }
2990
+ return false;
2991
+ }
2992
+ return true;
2993
+ }
2953
2994
  /**
2954
2995
  * Get compaction statistics.
2955
2996
  */
@@ -7350,7 +7391,7 @@ var init_constants2 = __esm({
7350
7391
  "src/providers/constants.ts"() {
7351
7392
  "use strict";
7352
7393
  ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS = 4096;
7353
- FALLBACK_CHARS_PER_TOKEN = 4;
7394
+ FALLBACK_CHARS_PER_TOKEN = 2;
7354
7395
  OPENAI_MESSAGE_OVERHEAD_TOKENS = 4;
7355
7396
  OPENAI_REPLY_PRIMING_TOKENS = 2;
7356
7397
  OPENAI_NAME_FIELD_OVERHEAD_TOKENS = 1;
@@ -9705,6 +9746,7 @@ var init_huggingface_models = __esm({
9705
9746
 
9706
9747
  // src/providers/openai-compatible-provider.ts
9707
9748
  import OpenAI from "openai";
9749
+ import { get_encoding } from "tiktoken";
9708
9750
  var ROLE_MAP, OpenAICompatibleProvider;
9709
9751
  var init_openai_compatible_provider = __esm({
9710
9752
  "src/providers/openai-compatible-provider.ts"() {
@@ -9909,11 +9951,38 @@ var init_openai_compatible_provider = __esm({
9909
9951
  }
9910
9952
  }
9911
9953
  /**
9912
- * Count tokens using character-based fallback estimation.
9913
- * Most meta-providers don't have a native token counting API.
9954
+ * Count tokens using tiktoken o200k_base encoding.
9955
+ *
9956
+ * While o200k_base isn't model-exact for non-OpenAI models routed through
9957
+ * meta-providers like OpenRouter, BPE tokenizers with 200K vocab produce
9958
+ * counts within 10-20% of true values — far better than the character-based
9959
+ * fallback which can be off by 250% for JSON/code-heavy content.
9960
+ *
9961
+ * Falls back to character-based estimation if tiktoken fails.
9914
9962
  */
9915
9963
  async countTokens(messages, descriptor, _spec) {
9964
+ if (!messages || messages.length === 0) return 0;
9916
9965
  try {
9966
+ const encoding = get_encoding("o200k_base");
9967
+ try {
9968
+ let tokenCount = 0;
9969
+ for (const msg of messages) {
9970
+ const parts = normalizeMessageContent(msg.content);
9971
+ for (const part of parts) {
9972
+ if (part.type === "text") {
9973
+ tokenCount += encoding.encode(part.text).length;
9974
+ }
9975
+ }
9976
+ }
9977
+ return tokenCount;
9978
+ } finally {
9979
+ encoding.free();
9980
+ }
9981
+ } catch (error) {
9982
+ console.warn(
9983
+ `Token counting with tiktoken failed for ${descriptor.name}, using fallback estimation:`,
9984
+ error
9985
+ );
9917
9986
  let totalChars = 0;
9918
9987
  for (const msg of messages) {
9919
9988
  const parts = normalizeMessageContent(msg.content);
@@ -9924,9 +9993,6 @@ var init_openai_compatible_provider = __esm({
9924
9993
  }
9925
9994
  }
9926
9995
  return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN);
9927
- } catch (error) {
9928
- console.warn(`Token counting failed for ${descriptor.name}, using zero estimate:`, error);
9929
- return 0;
9930
9996
  }
9931
9997
  }
9932
9998
  };
@@ -11435,6 +11501,103 @@ var init_openrouter_models = __esm({
11435
11501
  }
11436
11502
  },
11437
11503
  // ============================================================
11504
+ // Google Gemini 3.1 Models (via OpenRouter)
11505
+ // ============================================================
11506
+ {
11507
+ provider: "openrouter",
11508
+ modelId: "google/gemini-3.1-pro-preview",
11509
+ displayName: "Gemini 3.1 Pro Preview (OpenRouter)",
11510
+ contextWindow: 1048576,
11511
+ maxOutputTokens: 65536,
11512
+ pricing: {
11513
+ input: 2,
11514
+ output: 12
11515
+ },
11516
+ knowledgeCutoff: "2025-01",
11517
+ features: {
11518
+ streaming: true,
11519
+ functionCalling: true,
11520
+ vision: true,
11521
+ reasoning: true,
11522
+ structuredOutputs: true
11523
+ },
11524
+ metadata: {
11525
+ family: "Gemini 3.1",
11526
+ releaseDate: "2026-03",
11527
+ notes: "Gemini 3.1 Pro Preview via OpenRouter. Frontier reasoning with enhanced software engineering performance."
11528
+ }
11529
+ },
11530
+ {
11531
+ provider: "openrouter",
11532
+ modelId: "google/gemini-3.1-pro-preview-customtools",
11533
+ displayName: "Gemini 3.1 Pro Preview Custom Tools (OpenRouter)",
11534
+ contextWindow: 1048576,
11535
+ maxOutputTokens: 65536,
11536
+ pricing: {
11537
+ input: 2,
11538
+ output: 12
11539
+ },
11540
+ knowledgeCutoff: "2025-01",
11541
+ features: {
11542
+ streaming: true,
11543
+ functionCalling: true,
11544
+ vision: true,
11545
+ reasoning: true,
11546
+ structuredOutputs: true
11547
+ },
11548
+ metadata: {
11549
+ family: "Gemini 3.1",
11550
+ releaseDate: "2026-03",
11551
+ notes: "Gemini 3.1 Pro Preview Custom Tools via OpenRouter. Improved tool selection to prevent overuse of general tools in agent workflows."
11552
+ }
11553
+ },
11554
+ {
11555
+ provider: "openrouter",
11556
+ modelId: "google/gemini-3.1-flash-lite-preview",
11557
+ displayName: "Gemini 3.1 Flash Lite Preview (OpenRouter)",
11558
+ contextWindow: 1048576,
11559
+ maxOutputTokens: 65536,
11560
+ pricing: {
11561
+ input: 0.25,
11562
+ output: 1.5
11563
+ },
11564
+ knowledgeCutoff: "2025-01",
11565
+ features: {
11566
+ streaming: true,
11567
+ functionCalling: true,
11568
+ vision: true,
11569
+ reasoning: true,
11570
+ structuredOutputs: true
11571
+ },
11572
+ metadata: {
11573
+ family: "Gemini 3.1",
11574
+ releaseDate: "2026-03",
11575
+ notes: "Gemini 3.1 Flash Lite Preview via OpenRouter. High-efficiency model with full thinking levels for cost/performance trade-offs."
11576
+ }
11577
+ },
11578
+ {
11579
+ provider: "openrouter",
11580
+ modelId: "google/gemini-3.1-flash-image-preview",
11581
+ displayName: "Gemini 3.1 Flash Image Preview (OpenRouter)",
11582
+ contextWindow: 65536,
11583
+ maxOutputTokens: 65536,
11584
+ pricing: {
11585
+ input: 0.5,
11586
+ output: 3
11587
+ },
11588
+ knowledgeCutoff: "2025-01",
11589
+ features: {
11590
+ streaming: true,
11591
+ functionCalling: false,
11592
+ vision: true
11593
+ },
11594
+ metadata: {
11595
+ family: "Gemini 3.1",
11596
+ releaseDate: "2026-03",
11597
+ notes: "Gemini 3.1 Flash Image Preview via OpenRouter. Pro-level image generation and editing at Flash speed."
11598
+ }
11599
+ },
11600
+ // ============================================================
11438
11601
  // Meta Llama Models (via OpenRouter)
11439
11602
  // ============================================================
11440
11603
  {
@@ -12588,6 +12751,7 @@ var init_client = __esm({
12588
12751
  "use strict";
12589
12752
  init_builder();
12590
12753
  init_discovery();
12754
+ init_constants();
12591
12755
  init_model_registry();
12592
12756
  init_image();
12593
12757
  init_speech();
@@ -12706,8 +12870,43 @@ var init_client = __esm({
12706
12870
  if (adapter.countTokens) {
12707
12871
  return adapter.countTokens(messages, descriptor, spec);
12708
12872
  }
12709
- const totalChars = messages.reduce((sum, msg) => sum + (msg.content?.length ?? 0), 0);
12710
- return Math.ceil(totalChars / 4);
12873
+ try {
12874
+ const { get_encoding: get_encoding2 } = await import("tiktoken");
12875
+ const encoding = get_encoding2("o200k_base");
12876
+ try {
12877
+ let tokenCount = 0;
12878
+ for (const msg of messages) {
12879
+ const content = msg.content;
12880
+ if (typeof content === "string") {
12881
+ tokenCount += encoding.encode(content).length;
12882
+ } else if (Array.isArray(content)) {
12883
+ for (const part of content) {
12884
+ if (part.type === "text") {
12885
+ tokenCount += encoding.encode(part.text).length;
12886
+ }
12887
+ }
12888
+ }
12889
+ }
12890
+ return tokenCount;
12891
+ } finally {
12892
+ encoding.free();
12893
+ }
12894
+ } catch {
12895
+ let totalChars = 0;
12896
+ for (const msg of messages) {
12897
+ const content = msg.content;
12898
+ if (typeof content === "string") {
12899
+ totalChars += content.length;
12900
+ } else if (Array.isArray(content)) {
12901
+ for (const part of content) {
12902
+ if (part.type === "text") {
12903
+ totalChars += part.text.length;
12904
+ }
12905
+ }
12906
+ }
12907
+ }
12908
+ return Math.ceil(totalChars / CHARS_PER_TOKEN);
12909
+ }
12711
12910
  }
12712
12911
  resolveAdapter(descriptor) {
12713
12912
  const adapter = this.adapters.find((item) => item.supports(descriptor));
@@ -16372,7 +16571,8 @@ var init_agent = __esm({
16372
16571
  this.compactionManager = new CompactionManager(
16373
16572
  this.client,
16374
16573
  this.model,
16375
- options.compactionConfig
16574
+ options.compactionConfig,
16575
+ this.logger
16376
16576
  );
16377
16577
  }
16378
16578
  this.signal = options.signal;
@@ -16718,6 +16918,22 @@ var init_agent = __esm({
16718
16918
  this.logger.info("Loop terminated by gadget or processor");
16719
16919
  break;
16720
16920
  }
16921
+ if (this.compactionManager && result.usage?.inputTokens) {
16922
+ this.compactionManager.updateUsage(result.usage.inputTokens);
16923
+ if (this.compactionManager.shouldCompactFromUsage()) {
16924
+ this.logger.info("Reactive compaction triggered from API-reported usage", {
16925
+ inputTokens: result.usage.inputTokens,
16926
+ iteration: currentIteration
16927
+ });
16928
+ const reactiveCompaction = await this.compactionManager.compact(
16929
+ this.conversation,
16930
+ currentIteration
16931
+ );
16932
+ if (reactiveCompaction) {
16933
+ yield await this.emitCompactionEvent(reactiveCompaction, currentIteration);
16934
+ }
16935
+ }
16936
+ }
16721
16937
  if (this.budget !== void 0) {
16722
16938
  const totalCost = this.tree.getTotalCost();
16723
16939
  if (totalCost >= this.budget) {