llmist 16.2.4 → 16.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +145 -26
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +36 -15
- package/dist/index.d.ts +36 -15
- package/dist/index.js +140 -21
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.cts
CHANGED
|
@@ -7328,11 +7328,14 @@ declare class CompactionManager {
|
|
|
7328
7328
|
private readonly model;
|
|
7329
7329
|
private readonly config;
|
|
7330
7330
|
private readonly strategy;
|
|
7331
|
+
private readonly logger;
|
|
7331
7332
|
private modelLimits?;
|
|
7333
|
+
private hasWarnedModelNotFound;
|
|
7334
|
+
private hasWarnedNoTokenCounting;
|
|
7332
7335
|
private totalCompactions;
|
|
7333
7336
|
private totalTokensSaved;
|
|
7334
7337
|
private lastTokenCount;
|
|
7335
|
-
constructor(client: LLMist, model: string, config?: CompactionConfig);
|
|
7338
|
+
constructor(client: LLMist, model: string, config?: CompactionConfig, logger?: Logger<ILogObj>);
|
|
7336
7339
|
/**
|
|
7337
7340
|
* Check if compaction is needed and perform it if so.
|
|
7338
7341
|
*
|
|
@@ -7350,6 +7353,22 @@ declare class CompactionManager {
|
|
|
7350
7353
|
* @returns CompactionEvent with compaction details
|
|
7351
7354
|
*/
|
|
7352
7355
|
compact(conversation: IConversationManager, iteration: number, precomputed?: PrecomputedTokens): Promise<CompactionEvent | null>;
|
|
7356
|
+
/**
|
|
7357
|
+
* Feed API-reported input token count for reactive threshold checking.
|
|
7358
|
+
* Call this after each LLM response with the actual inputTokens from usage.
|
|
7359
|
+
*/
|
|
7360
|
+
updateUsage(inputTokens: number): void;
|
|
7361
|
+
/**
|
|
7362
|
+
* Check if compaction should trigger based on API-reported usage.
|
|
7363
|
+
* Unlike checkAndCompact() which uses estimated token counts,
|
|
7364
|
+
* this uses the ground-truth token count from the last LLM response.
|
|
7365
|
+
*/
|
|
7366
|
+
shouldCompactFromUsage(): boolean;
|
|
7367
|
+
/**
|
|
7368
|
+
* Resolve and cache model limits from registry. Warns once if not found.
|
|
7369
|
+
* @returns true if limits are available, false otherwise
|
|
7370
|
+
*/
|
|
7371
|
+
private resolveModelLimits;
|
|
7353
7372
|
/**
|
|
7354
7373
|
* Get compaction statistics.
|
|
7355
7374
|
*/
|
|
@@ -8613,20 +8632,16 @@ declare class GadgetCallParser {
|
|
|
8613
8632
|
/**
|
|
8614
8633
|
* Character-to-token ratio for fallback token estimation.
|
|
8615
8634
|
*
|
|
8616
|
-
*
|
|
8617
|
-
*
|
|
8618
|
-
*
|
|
8619
|
-
* - OpenAI's GPT models average ~4 chars/token for English text
|
|
8620
|
-
* - Anthropic's Claude models have similar characteristics
|
|
8621
|
-
* - Gemini models also approximate this ratio
|
|
8622
|
-
*
|
|
8623
|
-
* This is intentionally conservative to avoid underestimating token usage.
|
|
8624
|
-
* While not perfectly accurate, it provides a reasonable fallback when
|
|
8625
|
-
* precise tokenization is unavailable.
|
|
8635
|
+
* Used only when tiktoken (the primary fallback) is unavailable. A value of 2
|
|
8636
|
+
* errs on the side of overestimating token count, which is safer for
|
|
8637
|
+
* compaction triggers and output limiting.
|
|
8626
8638
|
*
|
|
8627
|
-
*
|
|
8639
|
+
* Rationale: The previous value of 4 was based on English prose averages, but
|
|
8640
|
+
* agentic sessions are dominated by JSON, code, and structured data where the
|
|
8641
|
+
* real ratio is ~1.5-2.5 chars/token. A 4-char estimate underestimated tokens
|
|
8642
|
+
* by up to 250%, causing compaction and output limiting to never trigger.
|
|
8628
8643
|
*/
|
|
8629
|
-
declare const FALLBACK_CHARS_PER_TOKEN =
|
|
8644
|
+
declare const FALLBACK_CHARS_PER_TOKEN = 2;
|
|
8630
8645
|
|
|
8631
8646
|
/**
|
|
8632
8647
|
* Subagent creation helper for gadget authors.
|
|
@@ -9554,8 +9569,14 @@ declare abstract class OpenAICompatibleProvider<TConfig extends OpenAICompatible
|
|
|
9554
9569
|
protected executeStreamRequest(payload: Parameters<OpenAI["chat"]["completions"]["create"]>[0], signal?: AbortSignal): Promise<AsyncIterable<ChatCompletionChunk>>;
|
|
9555
9570
|
protected normalizeProviderStream(iterable: AsyncIterable<unknown>): LLMStream;
|
|
9556
9571
|
/**
|
|
9557
|
-
* Count tokens using
|
|
9558
|
-
*
|
|
9572
|
+
* Count tokens using tiktoken o200k_base encoding.
|
|
9573
|
+
*
|
|
9574
|
+
* While o200k_base isn't model-exact for non-OpenAI models routed through
|
|
9575
|
+
* meta-providers like OpenRouter, BPE tokenizers with 200K vocab produce
|
|
9576
|
+
* counts within 10-20% of true values — far better than the character-based
|
|
9577
|
+
* fallback which can be off by 250% for JSON/code-heavy content.
|
|
9578
|
+
*
|
|
9579
|
+
* Falls back to character-based estimation if tiktoken fails.
|
|
9559
9580
|
*/
|
|
9560
9581
|
countTokens(messages: LLMMessage[], descriptor: ModelDescriptor, _spec?: ModelSpec): Promise<number>;
|
|
9561
9582
|
}
|
package/dist/index.d.ts
CHANGED
|
@@ -7328,11 +7328,14 @@ declare class CompactionManager {
|
|
|
7328
7328
|
private readonly model;
|
|
7329
7329
|
private readonly config;
|
|
7330
7330
|
private readonly strategy;
|
|
7331
|
+
private readonly logger;
|
|
7331
7332
|
private modelLimits?;
|
|
7333
|
+
private hasWarnedModelNotFound;
|
|
7334
|
+
private hasWarnedNoTokenCounting;
|
|
7332
7335
|
private totalCompactions;
|
|
7333
7336
|
private totalTokensSaved;
|
|
7334
7337
|
private lastTokenCount;
|
|
7335
|
-
constructor(client: LLMist, model: string, config?: CompactionConfig);
|
|
7338
|
+
constructor(client: LLMist, model: string, config?: CompactionConfig, logger?: Logger<ILogObj>);
|
|
7336
7339
|
/**
|
|
7337
7340
|
* Check if compaction is needed and perform it if so.
|
|
7338
7341
|
*
|
|
@@ -7350,6 +7353,22 @@ declare class CompactionManager {
|
|
|
7350
7353
|
* @returns CompactionEvent with compaction details
|
|
7351
7354
|
*/
|
|
7352
7355
|
compact(conversation: IConversationManager, iteration: number, precomputed?: PrecomputedTokens): Promise<CompactionEvent | null>;
|
|
7356
|
+
/**
|
|
7357
|
+
* Feed API-reported input token count for reactive threshold checking.
|
|
7358
|
+
* Call this after each LLM response with the actual inputTokens from usage.
|
|
7359
|
+
*/
|
|
7360
|
+
updateUsage(inputTokens: number): void;
|
|
7361
|
+
/**
|
|
7362
|
+
* Check if compaction should trigger based on API-reported usage.
|
|
7363
|
+
* Unlike checkAndCompact() which uses estimated token counts,
|
|
7364
|
+
* this uses the ground-truth token count from the last LLM response.
|
|
7365
|
+
*/
|
|
7366
|
+
shouldCompactFromUsage(): boolean;
|
|
7367
|
+
/**
|
|
7368
|
+
* Resolve and cache model limits from registry. Warns once if not found.
|
|
7369
|
+
* @returns true if limits are available, false otherwise
|
|
7370
|
+
*/
|
|
7371
|
+
private resolveModelLimits;
|
|
7353
7372
|
/**
|
|
7354
7373
|
* Get compaction statistics.
|
|
7355
7374
|
*/
|
|
@@ -8613,20 +8632,16 @@ declare class GadgetCallParser {
|
|
|
8613
8632
|
/**
|
|
8614
8633
|
* Character-to-token ratio for fallback token estimation.
|
|
8615
8634
|
*
|
|
8616
|
-
*
|
|
8617
|
-
*
|
|
8618
|
-
*
|
|
8619
|
-
* - OpenAI's GPT models average ~4 chars/token for English text
|
|
8620
|
-
* - Anthropic's Claude models have similar characteristics
|
|
8621
|
-
* - Gemini models also approximate this ratio
|
|
8622
|
-
*
|
|
8623
|
-
* This is intentionally conservative to avoid underestimating token usage.
|
|
8624
|
-
* While not perfectly accurate, it provides a reasonable fallback when
|
|
8625
|
-
* precise tokenization is unavailable.
|
|
8635
|
+
* Used only when tiktoken (the primary fallback) is unavailable. A value of 2
|
|
8636
|
+
* errs on the side of overestimating token count, which is safer for
|
|
8637
|
+
* compaction triggers and output limiting.
|
|
8626
8638
|
*
|
|
8627
|
-
*
|
|
8639
|
+
* Rationale: The previous value of 4 was based on English prose averages, but
|
|
8640
|
+
* agentic sessions are dominated by JSON, code, and structured data where the
|
|
8641
|
+
* real ratio is ~1.5-2.5 chars/token. A 4-char estimate underestimated tokens
|
|
8642
|
+
* by up to 250%, causing compaction and output limiting to never trigger.
|
|
8628
8643
|
*/
|
|
8629
|
-
declare const FALLBACK_CHARS_PER_TOKEN =
|
|
8644
|
+
declare const FALLBACK_CHARS_PER_TOKEN = 2;
|
|
8630
8645
|
|
|
8631
8646
|
/**
|
|
8632
8647
|
* Subagent creation helper for gadget authors.
|
|
@@ -9554,8 +9569,14 @@ declare abstract class OpenAICompatibleProvider<TConfig extends OpenAICompatible
|
|
|
9554
9569
|
protected executeStreamRequest(payload: Parameters<OpenAI["chat"]["completions"]["create"]>[0], signal?: AbortSignal): Promise<AsyncIterable<ChatCompletionChunk>>;
|
|
9555
9570
|
protected normalizeProviderStream(iterable: AsyncIterable<unknown>): LLMStream;
|
|
9556
9571
|
/**
|
|
9557
|
-
* Count tokens using
|
|
9558
|
-
*
|
|
9572
|
+
* Count tokens using tiktoken o200k_base encoding.
|
|
9573
|
+
*
|
|
9574
|
+
* While o200k_base isn't model-exact for non-OpenAI models routed through
|
|
9575
|
+
* meta-providers like OpenRouter, BPE tokenizers with 200K vocab produce
|
|
9576
|
+
* counts within 10-20% of true values — far better than the character-based
|
|
9577
|
+
* fallback which can be off by 250% for JSON/code-heavy content.
|
|
9578
|
+
*
|
|
9579
|
+
* Falls back to character-based estimation if tiktoken fails.
|
|
9559
9580
|
*/
|
|
9560
9581
|
countTokens(messages: LLMMessage[], descriptor: ModelDescriptor, _spec?: ModelSpec): Promise<number>;
|
|
9561
9582
|
}
|
package/dist/index.js
CHANGED
|
@@ -813,7 +813,7 @@ var init_constants = __esm({
|
|
|
813
813
|
GADGET_ARG_PREFIX = "!!!ARG:";
|
|
814
814
|
DEFAULT_GADGET_OUTPUT_LIMIT = true;
|
|
815
815
|
DEFAULT_GADGET_OUTPUT_LIMIT_PERCENT = 15;
|
|
816
|
-
CHARS_PER_TOKEN =
|
|
816
|
+
CHARS_PER_TOKEN = 2;
|
|
817
817
|
FALLBACK_CONTEXT_WINDOW = 128e3;
|
|
818
818
|
}
|
|
819
819
|
});
|
|
@@ -2834,6 +2834,7 @@ var CompactionManager;
|
|
|
2834
2834
|
var init_manager = __esm({
|
|
2835
2835
|
"src/agent/compaction/manager.ts"() {
|
|
2836
2836
|
"use strict";
|
|
2837
|
+
init_logger();
|
|
2837
2838
|
init_config();
|
|
2838
2839
|
init_strategies();
|
|
2839
2840
|
CompactionManager = class {
|
|
@@ -2841,15 +2842,19 @@ var init_manager = __esm({
|
|
|
2841
2842
|
model;
|
|
2842
2843
|
config;
|
|
2843
2844
|
strategy;
|
|
2845
|
+
logger;
|
|
2844
2846
|
modelLimits;
|
|
2847
|
+
hasWarnedModelNotFound = false;
|
|
2848
|
+
hasWarnedNoTokenCounting = false;
|
|
2845
2849
|
// Statistics
|
|
2846
2850
|
totalCompactions = 0;
|
|
2847
2851
|
totalTokensSaved = 0;
|
|
2848
2852
|
lastTokenCount = 0;
|
|
2849
|
-
constructor(client, model, config = {}) {
|
|
2853
|
+
constructor(client, model, config = {}, logger2) {
|
|
2850
2854
|
this.client = client;
|
|
2851
2855
|
this.model = model;
|
|
2852
2856
|
this.config = resolveCompactionConfig(config);
|
|
2857
|
+
this.logger = logger2 ?? createLogger({ name: "llmist:compaction" });
|
|
2853
2858
|
if (typeof config.strategy === "object" && "compact" in config.strategy) {
|
|
2854
2859
|
this.strategy = config.strategy;
|
|
2855
2860
|
} else {
|
|
@@ -2867,13 +2872,16 @@ var init_manager = __esm({
|
|
|
2867
2872
|
if (!this.config.enabled) {
|
|
2868
2873
|
return null;
|
|
2869
2874
|
}
|
|
2870
|
-
if (!this.
|
|
2871
|
-
|
|
2872
|
-
if (!this.modelLimits) {
|
|
2873
|
-
return null;
|
|
2874
|
-
}
|
|
2875
|
+
if (!this.resolveModelLimits()) {
|
|
2876
|
+
return null;
|
|
2875
2877
|
}
|
|
2876
2878
|
if (!this.client.countTokens) {
|
|
2879
|
+
if (!this.hasWarnedNoTokenCounting) {
|
|
2880
|
+
this.hasWarnedNoTokenCounting = true;
|
|
2881
|
+
this.logger.warn("Compaction skipped: client does not support token counting", {
|
|
2882
|
+
model: this.model
|
|
2883
|
+
});
|
|
2884
|
+
}
|
|
2877
2885
|
return null;
|
|
2878
2886
|
}
|
|
2879
2887
|
const messages = conversation.getMessages();
|
|
@@ -2904,11 +2912,8 @@ var init_manager = __esm({
|
|
|
2904
2912
|
* @returns CompactionEvent with compaction details
|
|
2905
2913
|
*/
|
|
2906
2914
|
async compact(conversation, iteration, precomputed) {
|
|
2907
|
-
if (!this.
|
|
2908
|
-
|
|
2909
|
-
if (!this.modelLimits) {
|
|
2910
|
-
return null;
|
|
2911
|
-
}
|
|
2915
|
+
if (!this.resolveModelLimits()) {
|
|
2916
|
+
return null;
|
|
2912
2917
|
}
|
|
2913
2918
|
const historyMessages = precomputed?.historyMessages ?? conversation.getHistoryMessages();
|
|
2914
2919
|
const baseMessages = precomputed?.baseMessages ?? conversation.getBaseMessages();
|
|
@@ -2950,6 +2955,42 @@ var init_manager = __esm({
|
|
|
2950
2955
|
}
|
|
2951
2956
|
return event;
|
|
2952
2957
|
}
|
|
2958
|
+
/**
|
|
2959
|
+
* Feed API-reported input token count for reactive threshold checking.
|
|
2960
|
+
* Call this after each LLM response with the actual inputTokens from usage.
|
|
2961
|
+
*/
|
|
2962
|
+
updateUsage(inputTokens) {
|
|
2963
|
+
this.lastTokenCount = inputTokens;
|
|
2964
|
+
}
|
|
2965
|
+
/**
|
|
2966
|
+
* Check if compaction should trigger based on API-reported usage.
|
|
2967
|
+
* Unlike checkAndCompact() which uses estimated token counts,
|
|
2968
|
+
* this uses the ground-truth token count from the last LLM response.
|
|
2969
|
+
*/
|
|
2970
|
+
shouldCompactFromUsage() {
|
|
2971
|
+
if (!this.config.enabled) return false;
|
|
2972
|
+
if (!this.resolveModelLimits()) return false;
|
|
2973
|
+
const usagePercent = this.lastTokenCount / this.modelLimits.contextWindow * 100;
|
|
2974
|
+
return usagePercent >= this.config.triggerThresholdPercent;
|
|
2975
|
+
}
|
|
2976
|
+
/**
|
|
2977
|
+
* Resolve and cache model limits from registry. Warns once if not found.
|
|
2978
|
+
* @returns true if limits are available, false otherwise
|
|
2979
|
+
*/
|
|
2980
|
+
resolveModelLimits() {
|
|
2981
|
+
if (this.modelLimits) return true;
|
|
2982
|
+
this.modelLimits = this.client.modelRegistry.getModelLimits(this.model);
|
|
2983
|
+
if (!this.modelLimits) {
|
|
2984
|
+
if (!this.hasWarnedModelNotFound) {
|
|
2985
|
+
this.hasWarnedModelNotFound = true;
|
|
2986
|
+
this.logger.warn("Compaction skipped: model not found in registry", {
|
|
2987
|
+
model: this.model
|
|
2988
|
+
});
|
|
2989
|
+
}
|
|
2990
|
+
return false;
|
|
2991
|
+
}
|
|
2992
|
+
return true;
|
|
2993
|
+
}
|
|
2953
2994
|
/**
|
|
2954
2995
|
* Get compaction statistics.
|
|
2955
2996
|
*/
|
|
@@ -7350,7 +7391,7 @@ var init_constants2 = __esm({
|
|
|
7350
7391
|
"src/providers/constants.ts"() {
|
|
7351
7392
|
"use strict";
|
|
7352
7393
|
ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS = 4096;
|
|
7353
|
-
FALLBACK_CHARS_PER_TOKEN =
|
|
7394
|
+
FALLBACK_CHARS_PER_TOKEN = 2;
|
|
7354
7395
|
OPENAI_MESSAGE_OVERHEAD_TOKENS = 4;
|
|
7355
7396
|
OPENAI_REPLY_PRIMING_TOKENS = 2;
|
|
7356
7397
|
OPENAI_NAME_FIELD_OVERHEAD_TOKENS = 1;
|
|
@@ -9705,6 +9746,7 @@ var init_huggingface_models = __esm({
|
|
|
9705
9746
|
|
|
9706
9747
|
// src/providers/openai-compatible-provider.ts
|
|
9707
9748
|
import OpenAI from "openai";
|
|
9749
|
+
import { get_encoding } from "tiktoken";
|
|
9708
9750
|
var ROLE_MAP, OpenAICompatibleProvider;
|
|
9709
9751
|
var init_openai_compatible_provider = __esm({
|
|
9710
9752
|
"src/providers/openai-compatible-provider.ts"() {
|
|
@@ -9909,11 +9951,38 @@ var init_openai_compatible_provider = __esm({
|
|
|
9909
9951
|
}
|
|
9910
9952
|
}
|
|
9911
9953
|
/**
|
|
9912
|
-
* Count tokens using
|
|
9913
|
-
*
|
|
9954
|
+
* Count tokens using tiktoken o200k_base encoding.
|
|
9955
|
+
*
|
|
9956
|
+
* While o200k_base isn't model-exact for non-OpenAI models routed through
|
|
9957
|
+
* meta-providers like OpenRouter, BPE tokenizers with 200K vocab produce
|
|
9958
|
+
* counts within 10-20% of true values — far better than the character-based
|
|
9959
|
+
* fallback which can be off by 250% for JSON/code-heavy content.
|
|
9960
|
+
*
|
|
9961
|
+
* Falls back to character-based estimation if tiktoken fails.
|
|
9914
9962
|
*/
|
|
9915
9963
|
async countTokens(messages, descriptor, _spec) {
|
|
9964
|
+
if (!messages || messages.length === 0) return 0;
|
|
9916
9965
|
try {
|
|
9966
|
+
const encoding = get_encoding("o200k_base");
|
|
9967
|
+
try {
|
|
9968
|
+
let tokenCount = 0;
|
|
9969
|
+
for (const msg of messages) {
|
|
9970
|
+
const parts = normalizeMessageContent(msg.content);
|
|
9971
|
+
for (const part of parts) {
|
|
9972
|
+
if (part.type === "text") {
|
|
9973
|
+
tokenCount += encoding.encode(part.text).length;
|
|
9974
|
+
}
|
|
9975
|
+
}
|
|
9976
|
+
}
|
|
9977
|
+
return tokenCount;
|
|
9978
|
+
} finally {
|
|
9979
|
+
encoding.free();
|
|
9980
|
+
}
|
|
9981
|
+
} catch (error) {
|
|
9982
|
+
console.warn(
|
|
9983
|
+
`Token counting with tiktoken failed for ${descriptor.name}, using fallback estimation:`,
|
|
9984
|
+
error
|
|
9985
|
+
);
|
|
9917
9986
|
let totalChars = 0;
|
|
9918
9987
|
for (const msg of messages) {
|
|
9919
9988
|
const parts = normalizeMessageContent(msg.content);
|
|
@@ -9924,9 +9993,6 @@ var init_openai_compatible_provider = __esm({
|
|
|
9924
9993
|
}
|
|
9925
9994
|
}
|
|
9926
9995
|
return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN);
|
|
9927
|
-
} catch (error) {
|
|
9928
|
-
console.warn(`Token counting failed for ${descriptor.name}, using zero estimate:`, error);
|
|
9929
|
-
return 0;
|
|
9930
9996
|
}
|
|
9931
9997
|
}
|
|
9932
9998
|
};
|
|
@@ -12588,6 +12654,7 @@ var init_client = __esm({
|
|
|
12588
12654
|
"use strict";
|
|
12589
12655
|
init_builder();
|
|
12590
12656
|
init_discovery();
|
|
12657
|
+
init_constants();
|
|
12591
12658
|
init_model_registry();
|
|
12592
12659
|
init_image();
|
|
12593
12660
|
init_speech();
|
|
@@ -12706,8 +12773,43 @@ var init_client = __esm({
|
|
|
12706
12773
|
if (adapter.countTokens) {
|
|
12707
12774
|
return adapter.countTokens(messages, descriptor, spec);
|
|
12708
12775
|
}
|
|
12709
|
-
|
|
12710
|
-
|
|
12776
|
+
try {
|
|
12777
|
+
const { get_encoding: get_encoding2 } = await import("tiktoken");
|
|
12778
|
+
const encoding = get_encoding2("o200k_base");
|
|
12779
|
+
try {
|
|
12780
|
+
let tokenCount = 0;
|
|
12781
|
+
for (const msg of messages) {
|
|
12782
|
+
const content = msg.content;
|
|
12783
|
+
if (typeof content === "string") {
|
|
12784
|
+
tokenCount += encoding.encode(content).length;
|
|
12785
|
+
} else if (Array.isArray(content)) {
|
|
12786
|
+
for (const part of content) {
|
|
12787
|
+
if (part.type === "text") {
|
|
12788
|
+
tokenCount += encoding.encode(part.text).length;
|
|
12789
|
+
}
|
|
12790
|
+
}
|
|
12791
|
+
}
|
|
12792
|
+
}
|
|
12793
|
+
return tokenCount;
|
|
12794
|
+
} finally {
|
|
12795
|
+
encoding.free();
|
|
12796
|
+
}
|
|
12797
|
+
} catch {
|
|
12798
|
+
let totalChars = 0;
|
|
12799
|
+
for (const msg of messages) {
|
|
12800
|
+
const content = msg.content;
|
|
12801
|
+
if (typeof content === "string") {
|
|
12802
|
+
totalChars += content.length;
|
|
12803
|
+
} else if (Array.isArray(content)) {
|
|
12804
|
+
for (const part of content) {
|
|
12805
|
+
if (part.type === "text") {
|
|
12806
|
+
totalChars += part.text.length;
|
|
12807
|
+
}
|
|
12808
|
+
}
|
|
12809
|
+
}
|
|
12810
|
+
}
|
|
12811
|
+
return Math.ceil(totalChars / CHARS_PER_TOKEN);
|
|
12812
|
+
}
|
|
12711
12813
|
}
|
|
12712
12814
|
resolveAdapter(descriptor) {
|
|
12713
12815
|
const adapter = this.adapters.find((item) => item.supports(descriptor));
|
|
@@ -16372,7 +16474,8 @@ var init_agent = __esm({
|
|
|
16372
16474
|
this.compactionManager = new CompactionManager(
|
|
16373
16475
|
this.client,
|
|
16374
16476
|
this.model,
|
|
16375
|
-
options.compactionConfig
|
|
16477
|
+
options.compactionConfig,
|
|
16478
|
+
this.logger
|
|
16376
16479
|
);
|
|
16377
16480
|
}
|
|
16378
16481
|
this.signal = options.signal;
|
|
@@ -16718,6 +16821,22 @@ var init_agent = __esm({
|
|
|
16718
16821
|
this.logger.info("Loop terminated by gadget or processor");
|
|
16719
16822
|
break;
|
|
16720
16823
|
}
|
|
16824
|
+
if (this.compactionManager && result.usage?.inputTokens) {
|
|
16825
|
+
this.compactionManager.updateUsage(result.usage.inputTokens);
|
|
16826
|
+
if (this.compactionManager.shouldCompactFromUsage()) {
|
|
16827
|
+
this.logger.info("Reactive compaction triggered from API-reported usage", {
|
|
16828
|
+
inputTokens: result.usage.inputTokens,
|
|
16829
|
+
iteration: currentIteration
|
|
16830
|
+
});
|
|
16831
|
+
const reactiveCompaction = await this.compactionManager.compact(
|
|
16832
|
+
this.conversation,
|
|
16833
|
+
currentIteration
|
|
16834
|
+
);
|
|
16835
|
+
if (reactiveCompaction) {
|
|
16836
|
+
yield await this.emitCompactionEvent(reactiveCompaction, currentIteration);
|
|
16837
|
+
}
|
|
16838
|
+
}
|
|
16839
|
+
}
|
|
16721
16840
|
if (this.budget !== void 0) {
|
|
16722
16841
|
const totalCost = this.tree.getTotalCost();
|
|
16723
16842
|
if (totalCost >= this.budget) {
|