llmist 16.2.4 → 17.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +242 -26
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +36 -15
- package/dist/index.d.ts +36 -15
- package/dist/index.js +237 -21
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -818,7 +818,7 @@ var init_constants = __esm({
|
|
|
818
818
|
GADGET_ARG_PREFIX = "!!!ARG:";
|
|
819
819
|
DEFAULT_GADGET_OUTPUT_LIMIT = true;
|
|
820
820
|
DEFAULT_GADGET_OUTPUT_LIMIT_PERCENT = 15;
|
|
821
|
-
CHARS_PER_TOKEN =
|
|
821
|
+
CHARS_PER_TOKEN = 2;
|
|
822
822
|
FALLBACK_CONTEXT_WINDOW = 128e3;
|
|
823
823
|
}
|
|
824
824
|
});
|
|
@@ -2839,6 +2839,7 @@ var CompactionManager;
|
|
|
2839
2839
|
var init_manager = __esm({
|
|
2840
2840
|
"src/agent/compaction/manager.ts"() {
|
|
2841
2841
|
"use strict";
|
|
2842
|
+
init_logger();
|
|
2842
2843
|
init_config();
|
|
2843
2844
|
init_strategies();
|
|
2844
2845
|
CompactionManager = class {
|
|
@@ -2846,15 +2847,19 @@ var init_manager = __esm({
|
|
|
2846
2847
|
model;
|
|
2847
2848
|
config;
|
|
2848
2849
|
strategy;
|
|
2850
|
+
logger;
|
|
2849
2851
|
modelLimits;
|
|
2852
|
+
hasWarnedModelNotFound = false;
|
|
2853
|
+
hasWarnedNoTokenCounting = false;
|
|
2850
2854
|
// Statistics
|
|
2851
2855
|
totalCompactions = 0;
|
|
2852
2856
|
totalTokensSaved = 0;
|
|
2853
2857
|
lastTokenCount = 0;
|
|
2854
|
-
constructor(client, model, config = {}) {
|
|
2858
|
+
constructor(client, model, config = {}, logger2) {
|
|
2855
2859
|
this.client = client;
|
|
2856
2860
|
this.model = model;
|
|
2857
2861
|
this.config = resolveCompactionConfig(config);
|
|
2862
|
+
this.logger = logger2 ?? createLogger({ name: "llmist:compaction" });
|
|
2858
2863
|
if (typeof config.strategy === "object" && "compact" in config.strategy) {
|
|
2859
2864
|
this.strategy = config.strategy;
|
|
2860
2865
|
} else {
|
|
@@ -2872,13 +2877,16 @@ var init_manager = __esm({
|
|
|
2872
2877
|
if (!this.config.enabled) {
|
|
2873
2878
|
return null;
|
|
2874
2879
|
}
|
|
2875
|
-
if (!this.
|
|
2876
|
-
|
|
2877
|
-
if (!this.modelLimits) {
|
|
2878
|
-
return null;
|
|
2879
|
-
}
|
|
2880
|
+
if (!this.resolveModelLimits()) {
|
|
2881
|
+
return null;
|
|
2880
2882
|
}
|
|
2881
2883
|
if (!this.client.countTokens) {
|
|
2884
|
+
if (!this.hasWarnedNoTokenCounting) {
|
|
2885
|
+
this.hasWarnedNoTokenCounting = true;
|
|
2886
|
+
this.logger.warn("Compaction skipped: client does not support token counting", {
|
|
2887
|
+
model: this.model
|
|
2888
|
+
});
|
|
2889
|
+
}
|
|
2882
2890
|
return null;
|
|
2883
2891
|
}
|
|
2884
2892
|
const messages = conversation.getMessages();
|
|
@@ -2909,11 +2917,8 @@ var init_manager = __esm({
|
|
|
2909
2917
|
* @returns CompactionEvent with compaction details
|
|
2910
2918
|
*/
|
|
2911
2919
|
async compact(conversation, iteration, precomputed) {
|
|
2912
|
-
if (!this.
|
|
2913
|
-
|
|
2914
|
-
if (!this.modelLimits) {
|
|
2915
|
-
return null;
|
|
2916
|
-
}
|
|
2920
|
+
if (!this.resolveModelLimits()) {
|
|
2921
|
+
return null;
|
|
2917
2922
|
}
|
|
2918
2923
|
const historyMessages = precomputed?.historyMessages ?? conversation.getHistoryMessages();
|
|
2919
2924
|
const baseMessages = precomputed?.baseMessages ?? conversation.getBaseMessages();
|
|
@@ -2955,6 +2960,42 @@ var init_manager = __esm({
|
|
|
2955
2960
|
}
|
|
2956
2961
|
return event;
|
|
2957
2962
|
}
|
|
2963
|
+
/**
|
|
2964
|
+
* Feed API-reported input token count for reactive threshold checking.
|
|
2965
|
+
* Call this after each LLM response with the actual inputTokens from usage.
|
|
2966
|
+
*/
|
|
2967
|
+
updateUsage(inputTokens) {
|
|
2968
|
+
this.lastTokenCount = inputTokens;
|
|
2969
|
+
}
|
|
2970
|
+
/**
|
|
2971
|
+
* Check if compaction should trigger based on API-reported usage.
|
|
2972
|
+
* Unlike checkAndCompact() which uses estimated token counts,
|
|
2973
|
+
* this uses the ground-truth token count from the last LLM response.
|
|
2974
|
+
*/
|
|
2975
|
+
shouldCompactFromUsage() {
|
|
2976
|
+
if (!this.config.enabled) return false;
|
|
2977
|
+
if (!this.resolveModelLimits()) return false;
|
|
2978
|
+
const usagePercent = this.lastTokenCount / this.modelLimits.contextWindow * 100;
|
|
2979
|
+
return usagePercent >= this.config.triggerThresholdPercent;
|
|
2980
|
+
}
|
|
2981
|
+
/**
|
|
2982
|
+
* Resolve and cache model limits from registry. Warns once if not found.
|
|
2983
|
+
* @returns true if limits are available, false otherwise
|
|
2984
|
+
*/
|
|
2985
|
+
resolveModelLimits() {
|
|
2986
|
+
if (this.modelLimits) return true;
|
|
2987
|
+
this.modelLimits = this.client.modelRegistry.getModelLimits(this.model);
|
|
2988
|
+
if (!this.modelLimits) {
|
|
2989
|
+
if (!this.hasWarnedModelNotFound) {
|
|
2990
|
+
this.hasWarnedModelNotFound = true;
|
|
2991
|
+
this.logger.warn("Compaction skipped: model not found in registry", {
|
|
2992
|
+
model: this.model
|
|
2993
|
+
});
|
|
2994
|
+
}
|
|
2995
|
+
return false;
|
|
2996
|
+
}
|
|
2997
|
+
return true;
|
|
2998
|
+
}
|
|
2958
2999
|
/**
|
|
2959
3000
|
* Get compaction statistics.
|
|
2960
3001
|
*/
|
|
@@ -7358,7 +7399,7 @@ var init_constants2 = __esm({
|
|
|
7358
7399
|
"src/providers/constants.ts"() {
|
|
7359
7400
|
"use strict";
|
|
7360
7401
|
ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS = 4096;
|
|
7361
|
-
FALLBACK_CHARS_PER_TOKEN =
|
|
7402
|
+
FALLBACK_CHARS_PER_TOKEN = 2;
|
|
7362
7403
|
OPENAI_MESSAGE_OVERHEAD_TOKENS = 4;
|
|
7363
7404
|
OPENAI_REPLY_PRIMING_TOKENS = 2;
|
|
7364
7405
|
OPENAI_NAME_FIELD_OVERHEAD_TOKENS = 1;
|
|
@@ -9712,11 +9753,12 @@ var init_huggingface_models = __esm({
|
|
|
9712
9753
|
});
|
|
9713
9754
|
|
|
9714
9755
|
// src/providers/openai-compatible-provider.ts
|
|
9715
|
-
var import_openai, ROLE_MAP, OpenAICompatibleProvider;
|
|
9756
|
+
var import_openai, import_tiktoken, ROLE_MAP, OpenAICompatibleProvider;
|
|
9716
9757
|
var init_openai_compatible_provider = __esm({
|
|
9717
9758
|
"src/providers/openai-compatible-provider.ts"() {
|
|
9718
9759
|
"use strict";
|
|
9719
9760
|
import_openai = __toESM(require("openai"), 1);
|
|
9761
|
+
import_tiktoken = require("tiktoken");
|
|
9720
9762
|
init_messages();
|
|
9721
9763
|
init_base_provider();
|
|
9722
9764
|
init_constants2();
|
|
@@ -9917,11 +9959,38 @@ var init_openai_compatible_provider = __esm({
|
|
|
9917
9959
|
}
|
|
9918
9960
|
}
|
|
9919
9961
|
/**
|
|
9920
|
-
* Count tokens using
|
|
9921
|
-
*
|
|
9962
|
+
* Count tokens using tiktoken o200k_base encoding.
|
|
9963
|
+
*
|
|
9964
|
+
* While o200k_base isn't model-exact for non-OpenAI models routed through
|
|
9965
|
+
* meta-providers like OpenRouter, BPE tokenizers with 200K vocab produce
|
|
9966
|
+
* counts within 10-20% of true values — far better than the character-based
|
|
9967
|
+
* fallback which can be off by 250% for JSON/code-heavy content.
|
|
9968
|
+
*
|
|
9969
|
+
* Falls back to character-based estimation if tiktoken fails.
|
|
9922
9970
|
*/
|
|
9923
9971
|
async countTokens(messages, descriptor, _spec) {
|
|
9972
|
+
if (!messages || messages.length === 0) return 0;
|
|
9924
9973
|
try {
|
|
9974
|
+
const encoding = (0, import_tiktoken.get_encoding)("o200k_base");
|
|
9975
|
+
try {
|
|
9976
|
+
let tokenCount = 0;
|
|
9977
|
+
for (const msg of messages) {
|
|
9978
|
+
const parts = normalizeMessageContent(msg.content);
|
|
9979
|
+
for (const part of parts) {
|
|
9980
|
+
if (part.type === "text") {
|
|
9981
|
+
tokenCount += encoding.encode(part.text).length;
|
|
9982
|
+
}
|
|
9983
|
+
}
|
|
9984
|
+
}
|
|
9985
|
+
return tokenCount;
|
|
9986
|
+
} finally {
|
|
9987
|
+
encoding.free();
|
|
9988
|
+
}
|
|
9989
|
+
} catch (error) {
|
|
9990
|
+
console.warn(
|
|
9991
|
+
`Token counting with tiktoken failed for ${descriptor.name}, using fallback estimation:`,
|
|
9992
|
+
error
|
|
9993
|
+
);
|
|
9925
9994
|
let totalChars = 0;
|
|
9926
9995
|
for (const msg of messages) {
|
|
9927
9996
|
const parts = normalizeMessageContent(msg.content);
|
|
@@ -9932,9 +10001,6 @@ var init_openai_compatible_provider = __esm({
|
|
|
9932
10001
|
}
|
|
9933
10002
|
}
|
|
9934
10003
|
return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN);
|
|
9935
|
-
} catch (error) {
|
|
9936
|
-
console.warn(`Token counting failed for ${descriptor.name}, using zero estimate:`, error);
|
|
9937
|
-
return 0;
|
|
9938
10004
|
}
|
|
9939
10005
|
}
|
|
9940
10006
|
};
|
|
@@ -10885,12 +10951,12 @@ function sanitizeExtra(extra, allowTemperature) {
|
|
|
10885
10951
|
function createOpenAIProviderFromEnv() {
|
|
10886
10952
|
return createProviderFromEnv("OPENAI_API_KEY", import_openai3.default, OpenAIChatProvider);
|
|
10887
10953
|
}
|
|
10888
|
-
var import_openai3,
|
|
10954
|
+
var import_openai3, import_tiktoken2, ROLE_MAP2, OPENAI_EFFORT_MAP, OpenAIChatProvider;
|
|
10889
10955
|
var init_openai = __esm({
|
|
10890
10956
|
"src/providers/openai.ts"() {
|
|
10891
10957
|
"use strict";
|
|
10892
10958
|
import_openai3 = __toESM(require("openai"), 1);
|
|
10893
|
-
|
|
10959
|
+
import_tiktoken2 = require("tiktoken");
|
|
10894
10960
|
init_messages();
|
|
10895
10961
|
init_base_provider();
|
|
10896
10962
|
init_constants2();
|
|
@@ -11149,9 +11215,9 @@ var init_openai = __esm({
|
|
|
11149
11215
|
const modelName = descriptor.name;
|
|
11150
11216
|
let encoding;
|
|
11151
11217
|
try {
|
|
11152
|
-
encoding = (0,
|
|
11218
|
+
encoding = (0, import_tiktoken2.encoding_for_model)(modelName);
|
|
11153
11219
|
} catch {
|
|
11154
|
-
encoding = (0,
|
|
11220
|
+
encoding = (0, import_tiktoken2.encoding_for_model)("gpt-4o");
|
|
11155
11221
|
}
|
|
11156
11222
|
try {
|
|
11157
11223
|
let tokenCount = 0;
|
|
@@ -11443,6 +11509,103 @@ var init_openrouter_models = __esm({
|
|
|
11443
11509
|
}
|
|
11444
11510
|
},
|
|
11445
11511
|
// ============================================================
|
|
11512
|
+
// Google Gemini 3.1 Models (via OpenRouter)
|
|
11513
|
+
// ============================================================
|
|
11514
|
+
{
|
|
11515
|
+
provider: "openrouter",
|
|
11516
|
+
modelId: "google/gemini-3.1-pro-preview",
|
|
11517
|
+
displayName: "Gemini 3.1 Pro Preview (OpenRouter)",
|
|
11518
|
+
contextWindow: 1048576,
|
|
11519
|
+
maxOutputTokens: 65536,
|
|
11520
|
+
pricing: {
|
|
11521
|
+
input: 2,
|
|
11522
|
+
output: 12
|
|
11523
|
+
},
|
|
11524
|
+
knowledgeCutoff: "2025-01",
|
|
11525
|
+
features: {
|
|
11526
|
+
streaming: true,
|
|
11527
|
+
functionCalling: true,
|
|
11528
|
+
vision: true,
|
|
11529
|
+
reasoning: true,
|
|
11530
|
+
structuredOutputs: true
|
|
11531
|
+
},
|
|
11532
|
+
metadata: {
|
|
11533
|
+
family: "Gemini 3.1",
|
|
11534
|
+
releaseDate: "2026-03",
|
|
11535
|
+
notes: "Gemini 3.1 Pro Preview via OpenRouter. Frontier reasoning with enhanced software engineering performance."
|
|
11536
|
+
}
|
|
11537
|
+
},
|
|
11538
|
+
{
|
|
11539
|
+
provider: "openrouter",
|
|
11540
|
+
modelId: "google/gemini-3.1-pro-preview-customtools",
|
|
11541
|
+
displayName: "Gemini 3.1 Pro Preview Custom Tools (OpenRouter)",
|
|
11542
|
+
contextWindow: 1048576,
|
|
11543
|
+
maxOutputTokens: 65536,
|
|
11544
|
+
pricing: {
|
|
11545
|
+
input: 2,
|
|
11546
|
+
output: 12
|
|
11547
|
+
},
|
|
11548
|
+
knowledgeCutoff: "2025-01",
|
|
11549
|
+
features: {
|
|
11550
|
+
streaming: true,
|
|
11551
|
+
functionCalling: true,
|
|
11552
|
+
vision: true,
|
|
11553
|
+
reasoning: true,
|
|
11554
|
+
structuredOutputs: true
|
|
11555
|
+
},
|
|
11556
|
+
metadata: {
|
|
11557
|
+
family: "Gemini 3.1",
|
|
11558
|
+
releaseDate: "2026-03",
|
|
11559
|
+
notes: "Gemini 3.1 Pro Preview Custom Tools via OpenRouter. Improved tool selection to prevent overuse of general tools in agent workflows."
|
|
11560
|
+
}
|
|
11561
|
+
},
|
|
11562
|
+
{
|
|
11563
|
+
provider: "openrouter",
|
|
11564
|
+
modelId: "google/gemini-3.1-flash-lite-preview",
|
|
11565
|
+
displayName: "Gemini 3.1 Flash Lite Preview (OpenRouter)",
|
|
11566
|
+
contextWindow: 1048576,
|
|
11567
|
+
maxOutputTokens: 65536,
|
|
11568
|
+
pricing: {
|
|
11569
|
+
input: 0.25,
|
|
11570
|
+
output: 1.5
|
|
11571
|
+
},
|
|
11572
|
+
knowledgeCutoff: "2025-01",
|
|
11573
|
+
features: {
|
|
11574
|
+
streaming: true,
|
|
11575
|
+
functionCalling: true,
|
|
11576
|
+
vision: true,
|
|
11577
|
+
reasoning: true,
|
|
11578
|
+
structuredOutputs: true
|
|
11579
|
+
},
|
|
11580
|
+
metadata: {
|
|
11581
|
+
family: "Gemini 3.1",
|
|
11582
|
+
releaseDate: "2026-03",
|
|
11583
|
+
notes: "Gemini 3.1 Flash Lite Preview via OpenRouter. High-efficiency model with full thinking levels for cost/performance trade-offs."
|
|
11584
|
+
}
|
|
11585
|
+
},
|
|
11586
|
+
{
|
|
11587
|
+
provider: "openrouter",
|
|
11588
|
+
modelId: "google/gemini-3.1-flash-image-preview",
|
|
11589
|
+
displayName: "Gemini 3.1 Flash Image Preview (OpenRouter)",
|
|
11590
|
+
contextWindow: 65536,
|
|
11591
|
+
maxOutputTokens: 65536,
|
|
11592
|
+
pricing: {
|
|
11593
|
+
input: 0.5,
|
|
11594
|
+
output: 3
|
|
11595
|
+
},
|
|
11596
|
+
knowledgeCutoff: "2025-01",
|
|
11597
|
+
features: {
|
|
11598
|
+
streaming: true,
|
|
11599
|
+
functionCalling: false,
|
|
11600
|
+
vision: true
|
|
11601
|
+
},
|
|
11602
|
+
metadata: {
|
|
11603
|
+
family: "Gemini 3.1",
|
|
11604
|
+
releaseDate: "2026-03",
|
|
11605
|
+
notes: "Gemini 3.1 Flash Image Preview via OpenRouter. Pro-level image generation and editing at Flash speed."
|
|
11606
|
+
}
|
|
11607
|
+
},
|
|
11608
|
+
// ============================================================
|
|
11446
11609
|
// Meta Llama Models (via OpenRouter)
|
|
11447
11610
|
// ============================================================
|
|
11448
11611
|
{
|
|
@@ -12596,6 +12759,7 @@ var init_client = __esm({
|
|
|
12596
12759
|
"use strict";
|
|
12597
12760
|
init_builder();
|
|
12598
12761
|
init_discovery();
|
|
12762
|
+
init_constants();
|
|
12599
12763
|
init_model_registry();
|
|
12600
12764
|
init_image();
|
|
12601
12765
|
init_speech();
|
|
@@ -12714,8 +12878,43 @@ var init_client = __esm({
|
|
|
12714
12878
|
if (adapter.countTokens) {
|
|
12715
12879
|
return adapter.countTokens(messages, descriptor, spec);
|
|
12716
12880
|
}
|
|
12717
|
-
|
|
12718
|
-
|
|
12881
|
+
try {
|
|
12882
|
+
const { get_encoding: get_encoding2 } = await import("tiktoken");
|
|
12883
|
+
const encoding = get_encoding2("o200k_base");
|
|
12884
|
+
try {
|
|
12885
|
+
let tokenCount = 0;
|
|
12886
|
+
for (const msg of messages) {
|
|
12887
|
+
const content = msg.content;
|
|
12888
|
+
if (typeof content === "string") {
|
|
12889
|
+
tokenCount += encoding.encode(content).length;
|
|
12890
|
+
} else if (Array.isArray(content)) {
|
|
12891
|
+
for (const part of content) {
|
|
12892
|
+
if (part.type === "text") {
|
|
12893
|
+
tokenCount += encoding.encode(part.text).length;
|
|
12894
|
+
}
|
|
12895
|
+
}
|
|
12896
|
+
}
|
|
12897
|
+
}
|
|
12898
|
+
return tokenCount;
|
|
12899
|
+
} finally {
|
|
12900
|
+
encoding.free();
|
|
12901
|
+
}
|
|
12902
|
+
} catch {
|
|
12903
|
+
let totalChars = 0;
|
|
12904
|
+
for (const msg of messages) {
|
|
12905
|
+
const content = msg.content;
|
|
12906
|
+
if (typeof content === "string") {
|
|
12907
|
+
totalChars += content.length;
|
|
12908
|
+
} else if (Array.isArray(content)) {
|
|
12909
|
+
for (const part of content) {
|
|
12910
|
+
if (part.type === "text") {
|
|
12911
|
+
totalChars += part.text.length;
|
|
12912
|
+
}
|
|
12913
|
+
}
|
|
12914
|
+
}
|
|
12915
|
+
}
|
|
12916
|
+
return Math.ceil(totalChars / CHARS_PER_TOKEN);
|
|
12917
|
+
}
|
|
12719
12918
|
}
|
|
12720
12919
|
resolveAdapter(descriptor) {
|
|
12721
12920
|
const adapter = this.adapters.find((item) => item.supports(descriptor));
|
|
@@ -16380,7 +16579,8 @@ var init_agent = __esm({
|
|
|
16380
16579
|
this.compactionManager = new CompactionManager(
|
|
16381
16580
|
this.client,
|
|
16382
16581
|
this.model,
|
|
16383
|
-
options.compactionConfig
|
|
16582
|
+
options.compactionConfig,
|
|
16583
|
+
this.logger
|
|
16384
16584
|
);
|
|
16385
16585
|
}
|
|
16386
16586
|
this.signal = options.signal;
|
|
@@ -16726,6 +16926,22 @@ var init_agent = __esm({
|
|
|
16726
16926
|
this.logger.info("Loop terminated by gadget or processor");
|
|
16727
16927
|
break;
|
|
16728
16928
|
}
|
|
16929
|
+
if (this.compactionManager && result.usage?.inputTokens) {
|
|
16930
|
+
this.compactionManager.updateUsage(result.usage.inputTokens);
|
|
16931
|
+
if (this.compactionManager.shouldCompactFromUsage()) {
|
|
16932
|
+
this.logger.info("Reactive compaction triggered from API-reported usage", {
|
|
16933
|
+
inputTokens: result.usage.inputTokens,
|
|
16934
|
+
iteration: currentIteration
|
|
16935
|
+
});
|
|
16936
|
+
const reactiveCompaction = await this.compactionManager.compact(
|
|
16937
|
+
this.conversation,
|
|
16938
|
+
currentIteration
|
|
16939
|
+
);
|
|
16940
|
+
if (reactiveCompaction) {
|
|
16941
|
+
yield await this.emitCompactionEvent(reactiveCompaction, currentIteration);
|
|
16942
|
+
}
|
|
16943
|
+
}
|
|
16944
|
+
}
|
|
16729
16945
|
if (this.budget !== void 0) {
|
|
16730
16946
|
const totalCost = this.tree.getTotalCost();
|
|
16731
16947
|
if (totalCost >= this.budget) {
|