llmist 16.2.4 → 16.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +145 -26
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +36 -15
- package/dist/index.d.ts +36 -15
- package/dist/index.js +140 -21
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -818,7 +818,7 @@ var init_constants = __esm({
|
|
|
818
818
|
GADGET_ARG_PREFIX = "!!!ARG:";
|
|
819
819
|
DEFAULT_GADGET_OUTPUT_LIMIT = true;
|
|
820
820
|
DEFAULT_GADGET_OUTPUT_LIMIT_PERCENT = 15;
|
|
821
|
-
CHARS_PER_TOKEN =
|
|
821
|
+
CHARS_PER_TOKEN = 2;
|
|
822
822
|
FALLBACK_CONTEXT_WINDOW = 128e3;
|
|
823
823
|
}
|
|
824
824
|
});
|
|
@@ -2839,6 +2839,7 @@ var CompactionManager;
|
|
|
2839
2839
|
var init_manager = __esm({
|
|
2840
2840
|
"src/agent/compaction/manager.ts"() {
|
|
2841
2841
|
"use strict";
|
|
2842
|
+
init_logger();
|
|
2842
2843
|
init_config();
|
|
2843
2844
|
init_strategies();
|
|
2844
2845
|
CompactionManager = class {
|
|
@@ -2846,15 +2847,19 @@ var init_manager = __esm({
|
|
|
2846
2847
|
model;
|
|
2847
2848
|
config;
|
|
2848
2849
|
strategy;
|
|
2850
|
+
logger;
|
|
2849
2851
|
modelLimits;
|
|
2852
|
+
hasWarnedModelNotFound = false;
|
|
2853
|
+
hasWarnedNoTokenCounting = false;
|
|
2850
2854
|
// Statistics
|
|
2851
2855
|
totalCompactions = 0;
|
|
2852
2856
|
totalTokensSaved = 0;
|
|
2853
2857
|
lastTokenCount = 0;
|
|
2854
|
-
constructor(client, model, config = {}) {
|
|
2858
|
+
constructor(client, model, config = {}, logger2) {
|
|
2855
2859
|
this.client = client;
|
|
2856
2860
|
this.model = model;
|
|
2857
2861
|
this.config = resolveCompactionConfig(config);
|
|
2862
|
+
this.logger = logger2 ?? createLogger({ name: "llmist:compaction" });
|
|
2858
2863
|
if (typeof config.strategy === "object" && "compact" in config.strategy) {
|
|
2859
2864
|
this.strategy = config.strategy;
|
|
2860
2865
|
} else {
|
|
@@ -2872,13 +2877,16 @@ var init_manager = __esm({
|
|
|
2872
2877
|
if (!this.config.enabled) {
|
|
2873
2878
|
return null;
|
|
2874
2879
|
}
|
|
2875
|
-
if (!this.
|
|
2876
|
-
|
|
2877
|
-
if (!this.modelLimits) {
|
|
2878
|
-
return null;
|
|
2879
|
-
}
|
|
2880
|
+
if (!this.resolveModelLimits()) {
|
|
2881
|
+
return null;
|
|
2880
2882
|
}
|
|
2881
2883
|
if (!this.client.countTokens) {
|
|
2884
|
+
if (!this.hasWarnedNoTokenCounting) {
|
|
2885
|
+
this.hasWarnedNoTokenCounting = true;
|
|
2886
|
+
this.logger.warn("Compaction skipped: client does not support token counting", {
|
|
2887
|
+
model: this.model
|
|
2888
|
+
});
|
|
2889
|
+
}
|
|
2882
2890
|
return null;
|
|
2883
2891
|
}
|
|
2884
2892
|
const messages = conversation.getMessages();
|
|
@@ -2909,11 +2917,8 @@ var init_manager = __esm({
|
|
|
2909
2917
|
* @returns CompactionEvent with compaction details
|
|
2910
2918
|
*/
|
|
2911
2919
|
async compact(conversation, iteration, precomputed) {
|
|
2912
|
-
if (!this.
|
|
2913
|
-
|
|
2914
|
-
if (!this.modelLimits) {
|
|
2915
|
-
return null;
|
|
2916
|
-
}
|
|
2920
|
+
if (!this.resolveModelLimits()) {
|
|
2921
|
+
return null;
|
|
2917
2922
|
}
|
|
2918
2923
|
const historyMessages = precomputed?.historyMessages ?? conversation.getHistoryMessages();
|
|
2919
2924
|
const baseMessages = precomputed?.baseMessages ?? conversation.getBaseMessages();
|
|
@@ -2955,6 +2960,42 @@ var init_manager = __esm({
|
|
|
2955
2960
|
}
|
|
2956
2961
|
return event;
|
|
2957
2962
|
}
|
|
2963
|
+
/**
|
|
2964
|
+
* Feed API-reported input token count for reactive threshold checking.
|
|
2965
|
+
* Call this after each LLM response with the actual inputTokens from usage.
|
|
2966
|
+
*/
|
|
2967
|
+
updateUsage(inputTokens) {
|
|
2968
|
+
this.lastTokenCount = inputTokens;
|
|
2969
|
+
}
|
|
2970
|
+
/**
|
|
2971
|
+
* Check if compaction should trigger based on API-reported usage.
|
|
2972
|
+
* Unlike checkAndCompact() which uses estimated token counts,
|
|
2973
|
+
* this uses the ground-truth token count from the last LLM response.
|
|
2974
|
+
*/
|
|
2975
|
+
shouldCompactFromUsage() {
|
|
2976
|
+
if (!this.config.enabled) return false;
|
|
2977
|
+
if (!this.resolveModelLimits()) return false;
|
|
2978
|
+
const usagePercent = this.lastTokenCount / this.modelLimits.contextWindow * 100;
|
|
2979
|
+
return usagePercent >= this.config.triggerThresholdPercent;
|
|
2980
|
+
}
|
|
2981
|
+
/**
|
|
2982
|
+
* Resolve and cache model limits from registry. Warns once if not found.
|
|
2983
|
+
* @returns true if limits are available, false otherwise
|
|
2984
|
+
*/
|
|
2985
|
+
resolveModelLimits() {
|
|
2986
|
+
if (this.modelLimits) return true;
|
|
2987
|
+
this.modelLimits = this.client.modelRegistry.getModelLimits(this.model);
|
|
2988
|
+
if (!this.modelLimits) {
|
|
2989
|
+
if (!this.hasWarnedModelNotFound) {
|
|
2990
|
+
this.hasWarnedModelNotFound = true;
|
|
2991
|
+
this.logger.warn("Compaction skipped: model not found in registry", {
|
|
2992
|
+
model: this.model
|
|
2993
|
+
});
|
|
2994
|
+
}
|
|
2995
|
+
return false;
|
|
2996
|
+
}
|
|
2997
|
+
return true;
|
|
2998
|
+
}
|
|
2958
2999
|
/**
|
|
2959
3000
|
* Get compaction statistics.
|
|
2960
3001
|
*/
|
|
@@ -7358,7 +7399,7 @@ var init_constants2 = __esm({
|
|
|
7358
7399
|
"src/providers/constants.ts"() {
|
|
7359
7400
|
"use strict";
|
|
7360
7401
|
ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS = 4096;
|
|
7361
|
-
FALLBACK_CHARS_PER_TOKEN =
|
|
7402
|
+
FALLBACK_CHARS_PER_TOKEN = 2;
|
|
7362
7403
|
OPENAI_MESSAGE_OVERHEAD_TOKENS = 4;
|
|
7363
7404
|
OPENAI_REPLY_PRIMING_TOKENS = 2;
|
|
7364
7405
|
OPENAI_NAME_FIELD_OVERHEAD_TOKENS = 1;
|
|
@@ -9712,11 +9753,12 @@ var init_huggingface_models = __esm({
|
|
|
9712
9753
|
});
|
|
9713
9754
|
|
|
9714
9755
|
// src/providers/openai-compatible-provider.ts
|
|
9715
|
-
var import_openai, ROLE_MAP, OpenAICompatibleProvider;
|
|
9756
|
+
var import_openai, import_tiktoken, ROLE_MAP, OpenAICompatibleProvider;
|
|
9716
9757
|
var init_openai_compatible_provider = __esm({
|
|
9717
9758
|
"src/providers/openai-compatible-provider.ts"() {
|
|
9718
9759
|
"use strict";
|
|
9719
9760
|
import_openai = __toESM(require("openai"), 1);
|
|
9761
|
+
import_tiktoken = require("tiktoken");
|
|
9720
9762
|
init_messages();
|
|
9721
9763
|
init_base_provider();
|
|
9722
9764
|
init_constants2();
|
|
@@ -9917,11 +9959,38 @@ var init_openai_compatible_provider = __esm({
|
|
|
9917
9959
|
}
|
|
9918
9960
|
}
|
|
9919
9961
|
/**
|
|
9920
|
-
* Count tokens using
|
|
9921
|
-
*
|
|
9962
|
+
* Count tokens using tiktoken o200k_base encoding.
|
|
9963
|
+
*
|
|
9964
|
+
* While o200k_base isn't model-exact for non-OpenAI models routed through
|
|
9965
|
+
* meta-providers like OpenRouter, BPE tokenizers with 200K vocab produce
|
|
9966
|
+
* counts within 10-20% of true values — far better than the character-based
|
|
9967
|
+
* fallback which can be off by 250% for JSON/code-heavy content.
|
|
9968
|
+
*
|
|
9969
|
+
* Falls back to character-based estimation if tiktoken fails.
|
|
9922
9970
|
*/
|
|
9923
9971
|
async countTokens(messages, descriptor, _spec) {
|
|
9972
|
+
if (!messages || messages.length === 0) return 0;
|
|
9924
9973
|
try {
|
|
9974
|
+
const encoding = (0, import_tiktoken.get_encoding)("o200k_base");
|
|
9975
|
+
try {
|
|
9976
|
+
let tokenCount = 0;
|
|
9977
|
+
for (const msg of messages) {
|
|
9978
|
+
const parts = normalizeMessageContent(msg.content);
|
|
9979
|
+
for (const part of parts) {
|
|
9980
|
+
if (part.type === "text") {
|
|
9981
|
+
tokenCount += encoding.encode(part.text).length;
|
|
9982
|
+
}
|
|
9983
|
+
}
|
|
9984
|
+
}
|
|
9985
|
+
return tokenCount;
|
|
9986
|
+
} finally {
|
|
9987
|
+
encoding.free();
|
|
9988
|
+
}
|
|
9989
|
+
} catch (error) {
|
|
9990
|
+
console.warn(
|
|
9991
|
+
`Token counting with tiktoken failed for ${descriptor.name}, using fallback estimation:`,
|
|
9992
|
+
error
|
|
9993
|
+
);
|
|
9925
9994
|
let totalChars = 0;
|
|
9926
9995
|
for (const msg of messages) {
|
|
9927
9996
|
const parts = normalizeMessageContent(msg.content);
|
|
@@ -9932,9 +10001,6 @@ var init_openai_compatible_provider = __esm({
|
|
|
9932
10001
|
}
|
|
9933
10002
|
}
|
|
9934
10003
|
return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN);
|
|
9935
|
-
} catch (error) {
|
|
9936
|
-
console.warn(`Token counting failed for ${descriptor.name}, using zero estimate:`, error);
|
|
9937
|
-
return 0;
|
|
9938
10004
|
}
|
|
9939
10005
|
}
|
|
9940
10006
|
};
|
|
@@ -10885,12 +10951,12 @@ function sanitizeExtra(extra, allowTemperature) {
|
|
|
10885
10951
|
function createOpenAIProviderFromEnv() {
|
|
10886
10952
|
return createProviderFromEnv("OPENAI_API_KEY", import_openai3.default, OpenAIChatProvider);
|
|
10887
10953
|
}
|
|
10888
|
-
var import_openai3,
|
|
10954
|
+
var import_openai3, import_tiktoken2, ROLE_MAP2, OPENAI_EFFORT_MAP, OpenAIChatProvider;
|
|
10889
10955
|
var init_openai = __esm({
|
|
10890
10956
|
"src/providers/openai.ts"() {
|
|
10891
10957
|
"use strict";
|
|
10892
10958
|
import_openai3 = __toESM(require("openai"), 1);
|
|
10893
|
-
|
|
10959
|
+
import_tiktoken2 = require("tiktoken");
|
|
10894
10960
|
init_messages();
|
|
10895
10961
|
init_base_provider();
|
|
10896
10962
|
init_constants2();
|
|
@@ -11149,9 +11215,9 @@ var init_openai = __esm({
|
|
|
11149
11215
|
const modelName = descriptor.name;
|
|
11150
11216
|
let encoding;
|
|
11151
11217
|
try {
|
|
11152
|
-
encoding = (0,
|
|
11218
|
+
encoding = (0, import_tiktoken2.encoding_for_model)(modelName);
|
|
11153
11219
|
} catch {
|
|
11154
|
-
encoding = (0,
|
|
11220
|
+
encoding = (0, import_tiktoken2.encoding_for_model)("gpt-4o");
|
|
11155
11221
|
}
|
|
11156
11222
|
try {
|
|
11157
11223
|
let tokenCount = 0;
|
|
@@ -12596,6 +12662,7 @@ var init_client = __esm({
|
|
|
12596
12662
|
"use strict";
|
|
12597
12663
|
init_builder();
|
|
12598
12664
|
init_discovery();
|
|
12665
|
+
init_constants();
|
|
12599
12666
|
init_model_registry();
|
|
12600
12667
|
init_image();
|
|
12601
12668
|
init_speech();
|
|
@@ -12714,8 +12781,43 @@ var init_client = __esm({
|
|
|
12714
12781
|
if (adapter.countTokens) {
|
|
12715
12782
|
return adapter.countTokens(messages, descriptor, spec);
|
|
12716
12783
|
}
|
|
12717
|
-
|
|
12718
|
-
|
|
12784
|
+
try {
|
|
12785
|
+
const { get_encoding: get_encoding2 } = await import("tiktoken");
|
|
12786
|
+
const encoding = get_encoding2("o200k_base");
|
|
12787
|
+
try {
|
|
12788
|
+
let tokenCount = 0;
|
|
12789
|
+
for (const msg of messages) {
|
|
12790
|
+
const content = msg.content;
|
|
12791
|
+
if (typeof content === "string") {
|
|
12792
|
+
tokenCount += encoding.encode(content).length;
|
|
12793
|
+
} else if (Array.isArray(content)) {
|
|
12794
|
+
for (const part of content) {
|
|
12795
|
+
if (part.type === "text") {
|
|
12796
|
+
tokenCount += encoding.encode(part.text).length;
|
|
12797
|
+
}
|
|
12798
|
+
}
|
|
12799
|
+
}
|
|
12800
|
+
}
|
|
12801
|
+
return tokenCount;
|
|
12802
|
+
} finally {
|
|
12803
|
+
encoding.free();
|
|
12804
|
+
}
|
|
12805
|
+
} catch {
|
|
12806
|
+
let totalChars = 0;
|
|
12807
|
+
for (const msg of messages) {
|
|
12808
|
+
const content = msg.content;
|
|
12809
|
+
if (typeof content === "string") {
|
|
12810
|
+
totalChars += content.length;
|
|
12811
|
+
} else if (Array.isArray(content)) {
|
|
12812
|
+
for (const part of content) {
|
|
12813
|
+
if (part.type === "text") {
|
|
12814
|
+
totalChars += part.text.length;
|
|
12815
|
+
}
|
|
12816
|
+
}
|
|
12817
|
+
}
|
|
12818
|
+
}
|
|
12819
|
+
return Math.ceil(totalChars / CHARS_PER_TOKEN);
|
|
12820
|
+
}
|
|
12719
12821
|
}
|
|
12720
12822
|
resolveAdapter(descriptor) {
|
|
12721
12823
|
const adapter = this.adapters.find((item) => item.supports(descriptor));
|
|
@@ -16380,7 +16482,8 @@ var init_agent = __esm({
|
|
|
16380
16482
|
this.compactionManager = new CompactionManager(
|
|
16381
16483
|
this.client,
|
|
16382
16484
|
this.model,
|
|
16383
|
-
options.compactionConfig
|
|
16485
|
+
options.compactionConfig,
|
|
16486
|
+
this.logger
|
|
16384
16487
|
);
|
|
16385
16488
|
}
|
|
16386
16489
|
this.signal = options.signal;
|
|
@@ -16726,6 +16829,22 @@ var init_agent = __esm({
|
|
|
16726
16829
|
this.logger.info("Loop terminated by gadget or processor");
|
|
16727
16830
|
break;
|
|
16728
16831
|
}
|
|
16832
|
+
if (this.compactionManager && result.usage?.inputTokens) {
|
|
16833
|
+
this.compactionManager.updateUsage(result.usage.inputTokens);
|
|
16834
|
+
if (this.compactionManager.shouldCompactFromUsage()) {
|
|
16835
|
+
this.logger.info("Reactive compaction triggered from API-reported usage", {
|
|
16836
|
+
inputTokens: result.usage.inputTokens,
|
|
16837
|
+
iteration: currentIteration
|
|
16838
|
+
});
|
|
16839
|
+
const reactiveCompaction = await this.compactionManager.compact(
|
|
16840
|
+
this.conversation,
|
|
16841
|
+
currentIteration
|
|
16842
|
+
);
|
|
16843
|
+
if (reactiveCompaction) {
|
|
16844
|
+
yield await this.emitCompactionEvent(reactiveCompaction, currentIteration);
|
|
16845
|
+
}
|
|
16846
|
+
}
|
|
16847
|
+
}
|
|
16729
16848
|
if (this.budget !== void 0) {
|
|
16730
16849
|
const totalCost = this.tree.getTotalCost();
|
|
16731
16850
|
if (totalCost >= this.budget) {
|