llmist 16.2.3 → 16.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -818,7 +818,7 @@ var init_constants = __esm({
818
818
  GADGET_ARG_PREFIX = "!!!ARG:";
819
819
  DEFAULT_GADGET_OUTPUT_LIMIT = true;
820
820
  DEFAULT_GADGET_OUTPUT_LIMIT_PERCENT = 15;
821
- CHARS_PER_TOKEN = 4;
821
+ CHARS_PER_TOKEN = 2;
822
822
  FALLBACK_CONTEXT_WINDOW = 128e3;
823
823
  }
824
824
  });
@@ -2839,6 +2839,7 @@ var CompactionManager;
2839
2839
  var init_manager = __esm({
2840
2840
  "src/agent/compaction/manager.ts"() {
2841
2841
  "use strict";
2842
+ init_logger();
2842
2843
  init_config();
2843
2844
  init_strategies();
2844
2845
  CompactionManager = class {
@@ -2846,15 +2847,19 @@ var init_manager = __esm({
2846
2847
  model;
2847
2848
  config;
2848
2849
  strategy;
2850
+ logger;
2849
2851
  modelLimits;
2852
+ hasWarnedModelNotFound = false;
2853
+ hasWarnedNoTokenCounting = false;
2850
2854
  // Statistics
2851
2855
  totalCompactions = 0;
2852
2856
  totalTokensSaved = 0;
2853
2857
  lastTokenCount = 0;
2854
- constructor(client, model, config = {}) {
2858
+ constructor(client, model, config = {}, logger2) {
2855
2859
  this.client = client;
2856
2860
  this.model = model;
2857
2861
  this.config = resolveCompactionConfig(config);
2862
+ this.logger = logger2 ?? createLogger({ name: "llmist:compaction" });
2858
2863
  if (typeof config.strategy === "object" && "compact" in config.strategy) {
2859
2864
  this.strategy = config.strategy;
2860
2865
  } else {
@@ -2872,13 +2877,16 @@ var init_manager = __esm({
2872
2877
  if (!this.config.enabled) {
2873
2878
  return null;
2874
2879
  }
2875
- if (!this.modelLimits) {
2876
- this.modelLimits = this.client.modelRegistry.getModelLimits(this.model);
2877
- if (!this.modelLimits) {
2878
- return null;
2879
- }
2880
+ if (!this.resolveModelLimits()) {
2881
+ return null;
2880
2882
  }
2881
2883
  if (!this.client.countTokens) {
2884
+ if (!this.hasWarnedNoTokenCounting) {
2885
+ this.hasWarnedNoTokenCounting = true;
2886
+ this.logger.warn("Compaction skipped: client does not support token counting", {
2887
+ model: this.model
2888
+ });
2889
+ }
2882
2890
  return null;
2883
2891
  }
2884
2892
  const messages = conversation.getMessages();
@@ -2909,11 +2917,8 @@ var init_manager = __esm({
2909
2917
  * @returns CompactionEvent with compaction details
2910
2918
  */
2911
2919
  async compact(conversation, iteration, precomputed) {
2912
- if (!this.modelLimits) {
2913
- this.modelLimits = this.client.modelRegistry.getModelLimits(this.model);
2914
- if (!this.modelLimits) {
2915
- return null;
2916
- }
2920
+ if (!this.resolveModelLimits()) {
2921
+ return null;
2917
2922
  }
2918
2923
  const historyMessages = precomputed?.historyMessages ?? conversation.getHistoryMessages();
2919
2924
  const baseMessages = precomputed?.baseMessages ?? conversation.getBaseMessages();
@@ -2955,6 +2960,42 @@ var init_manager = __esm({
2955
2960
  }
2956
2961
  return event;
2957
2962
  }
2963
+ /**
2964
+ * Feed API-reported input token count for reactive threshold checking.
2965
+ * Call this after each LLM response with the actual inputTokens from usage.
2966
+ */
2967
+ updateUsage(inputTokens) {
2968
+ this.lastTokenCount = inputTokens;
2969
+ }
2970
+ /**
2971
+ * Check if compaction should trigger based on API-reported usage.
2972
+ * Unlike checkAndCompact() which uses estimated token counts,
2973
+ * this uses the ground-truth token count from the last LLM response.
2974
+ */
2975
+ shouldCompactFromUsage() {
2976
+ if (!this.config.enabled) return false;
2977
+ if (!this.resolveModelLimits()) return false;
2978
+ const usagePercent = this.lastTokenCount / this.modelLimits.contextWindow * 100;
2979
+ return usagePercent >= this.config.triggerThresholdPercent;
2980
+ }
2981
+ /**
2982
+ * Resolve and cache model limits from registry. Warns once if not found.
2983
+ * @returns true if limits are available, false otherwise
2984
+ */
2985
+ resolveModelLimits() {
2986
+ if (this.modelLimits) return true;
2987
+ this.modelLimits = this.client.modelRegistry.getModelLimits(this.model);
2988
+ if (!this.modelLimits) {
2989
+ if (!this.hasWarnedModelNotFound) {
2990
+ this.hasWarnedModelNotFound = true;
2991
+ this.logger.warn("Compaction skipped: model not found in registry", {
2992
+ model: this.model
2993
+ });
2994
+ }
2995
+ return false;
2996
+ }
2997
+ return true;
2998
+ }
2958
2999
  /**
2959
3000
  * Get compaction statistics.
2960
3001
  */
@@ -5306,6 +5347,45 @@ var init_activation = __esm({
5306
5347
  }
5307
5348
  });
5308
5349
 
5350
+ // src/skills/load-skill-gadget.ts
5351
+ function createLoadSkillGadget(registry) {
5352
+ const summaries = registry.getMetadataSummaries();
5353
+ const skillNames = registry.getModelInvocable().map((s) => s.name);
5354
+ const description = [
5355
+ "Load a skill's specialized instructions into context for a task.",
5356
+ "Available skills:",
5357
+ summaries
5358
+ ].join("\n");
5359
+ return createGadget({
5360
+ name: LOAD_SKILL_GADGET_NAME,
5361
+ description,
5362
+ schema: import_zod2.z.object({
5363
+ skill: import_zod2.z.enum(skillNames).describe("Name of the skill to load"),
5364
+ arguments: import_zod2.z.string().optional().describe("Arguments for the skill (e.g., a filename, issue number, or search query)")
5365
+ }),
5366
+ execute: async ({ skill: skillName, arguments: args }) => {
5367
+ const skill = registry.get(skillName);
5368
+ if (!skill) {
5369
+ return `Unknown skill: "${skillName}". Available skills: ${skillNames.join(", ")}`;
5370
+ }
5371
+ const activation = await skill.activate({
5372
+ arguments: args,
5373
+ cwd: process.cwd()
5374
+ });
5375
+ return activation.resolvedInstructions;
5376
+ }
5377
+ });
5378
+ }
5379
+ var import_zod2, LOAD_SKILL_GADGET_NAME;
5380
+ var init_load_skill_gadget = __esm({
5381
+ "src/skills/load-skill-gadget.ts"() {
5382
+ "use strict";
5383
+ import_zod2 = require("zod");
5384
+ init_create_gadget();
5385
+ LOAD_SKILL_GADGET_NAME = "LoadSkill";
5386
+ }
5387
+ });
5388
+
5309
5389
  // src/skills/parser.ts
5310
5390
  function parseFrontmatter(content) {
5311
5391
  const trimmed = content.trimStart();
@@ -5799,45 +5879,6 @@ var init_loader = __esm({
5799
5879
  }
5800
5880
  });
5801
5881
 
5802
- // src/skills/use-skill-gadget.ts
5803
- function createUseSkillGadget(registry) {
5804
- const summaries = registry.getMetadataSummaries();
5805
- const skillNames = registry.getModelInvocable().map((s) => s.name);
5806
- const description = [
5807
- "Activate a skill to get specialized instructions for a task.",
5808
- "Available skills:",
5809
- summaries
5810
- ].join("\n");
5811
- return createGadget({
5812
- name: USE_SKILL_GADGET_NAME,
5813
- description,
5814
- schema: import_zod2.z.object({
5815
- skill: import_zod2.z.enum(skillNames).describe("Name of the skill to activate"),
5816
- arguments: import_zod2.z.string().optional().describe("Arguments for the skill (e.g., a filename, issue number, or search query)")
5817
- }),
5818
- execute: async ({ skill: skillName, arguments: args }) => {
5819
- const skill = registry.get(skillName);
5820
- if (!skill) {
5821
- return `Unknown skill: "${skillName}". Available skills: ${skillNames.join(", ")}`;
5822
- }
5823
- const activation = await skill.activate({
5824
- arguments: args,
5825
- cwd: process.cwd()
5826
- });
5827
- return activation.resolvedInstructions;
5828
- }
5829
- });
5830
- }
5831
- var import_zod2, USE_SKILL_GADGET_NAME;
5832
- var init_use_skill_gadget = __esm({
5833
- "src/skills/use-skill-gadget.ts"() {
5834
- "use strict";
5835
- import_zod2 = require("zod");
5836
- init_create_gadget();
5837
- USE_SKILL_GADGET_NAME = "UseSkill";
5838
- }
5839
- });
5840
-
5841
5882
  // src/agent/builder-utils.ts
5842
5883
  function formatGadgetCall(gadgetName, invocationId, parameters, prefixes) {
5843
5884
  const startPrefix = prefixes?.start ?? GADGET_START_PREFIX;
@@ -7358,7 +7399,7 @@ var init_constants2 = __esm({
7358
7399
  "src/providers/constants.ts"() {
7359
7400
  "use strict";
7360
7401
  ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS = 4096;
7361
- FALLBACK_CHARS_PER_TOKEN = 4;
7402
+ FALLBACK_CHARS_PER_TOKEN = 2;
7362
7403
  OPENAI_MESSAGE_OVERHEAD_TOKENS = 4;
7363
7404
  OPENAI_REPLY_PRIMING_TOKENS = 2;
7364
7405
  OPENAI_NAME_FIELD_OVERHEAD_TOKENS = 1;
@@ -9712,11 +9753,12 @@ var init_huggingface_models = __esm({
9712
9753
  });
9713
9754
 
9714
9755
  // src/providers/openai-compatible-provider.ts
9715
- var import_openai, ROLE_MAP, OpenAICompatibleProvider;
9756
+ var import_openai, import_tiktoken, ROLE_MAP, OpenAICompatibleProvider;
9716
9757
  var init_openai_compatible_provider = __esm({
9717
9758
  "src/providers/openai-compatible-provider.ts"() {
9718
9759
  "use strict";
9719
9760
  import_openai = __toESM(require("openai"), 1);
9761
+ import_tiktoken = require("tiktoken");
9720
9762
  init_messages();
9721
9763
  init_base_provider();
9722
9764
  init_constants2();
@@ -9917,11 +9959,38 @@ var init_openai_compatible_provider = __esm({
9917
9959
  }
9918
9960
  }
9919
9961
  /**
9920
- * Count tokens using character-based fallback estimation.
9921
- * Most meta-providers don't have a native token counting API.
9962
+ * Count tokens using tiktoken o200k_base encoding.
9963
+ *
9964
+ * While o200k_base isn't model-exact for non-OpenAI models routed through
9965
+ * meta-providers like OpenRouter, BPE tokenizers with 200K vocab produce
9966
+ * counts within 10-20% of true values — far better than the character-based
9967
+ * fallback which can be off by 250% for JSON/code-heavy content.
9968
+ *
9969
+ * Falls back to character-based estimation if tiktoken fails.
9922
9970
  */
9923
9971
  async countTokens(messages, descriptor, _spec) {
9972
+ if (!messages || messages.length === 0) return 0;
9924
9973
  try {
9974
+ const encoding = (0, import_tiktoken.get_encoding)("o200k_base");
9975
+ try {
9976
+ let tokenCount = 0;
9977
+ for (const msg of messages) {
9978
+ const parts = normalizeMessageContent(msg.content);
9979
+ for (const part of parts) {
9980
+ if (part.type === "text") {
9981
+ tokenCount += encoding.encode(part.text).length;
9982
+ }
9983
+ }
9984
+ }
9985
+ return tokenCount;
9986
+ } finally {
9987
+ encoding.free();
9988
+ }
9989
+ } catch (error) {
9990
+ console.warn(
9991
+ `Token counting with tiktoken failed for ${descriptor.name}, using fallback estimation:`,
9992
+ error
9993
+ );
9925
9994
  let totalChars = 0;
9926
9995
  for (const msg of messages) {
9927
9996
  const parts = normalizeMessageContent(msg.content);
@@ -9932,9 +10001,6 @@ var init_openai_compatible_provider = __esm({
9932
10001
  }
9933
10002
  }
9934
10003
  return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN);
9935
- } catch (error) {
9936
- console.warn(`Token counting failed for ${descriptor.name}, using zero estimate:`, error);
9937
- return 0;
9938
10004
  }
9939
10005
  }
9940
10006
  };
@@ -10885,12 +10951,12 @@ function sanitizeExtra(extra, allowTemperature) {
10885
10951
  function createOpenAIProviderFromEnv() {
10886
10952
  return createProviderFromEnv("OPENAI_API_KEY", import_openai3.default, OpenAIChatProvider);
10887
10953
  }
10888
- var import_openai3, import_tiktoken, ROLE_MAP2, OPENAI_EFFORT_MAP, OpenAIChatProvider;
10954
+ var import_openai3, import_tiktoken2, ROLE_MAP2, OPENAI_EFFORT_MAP, OpenAIChatProvider;
10889
10955
  var init_openai = __esm({
10890
10956
  "src/providers/openai.ts"() {
10891
10957
  "use strict";
10892
10958
  import_openai3 = __toESM(require("openai"), 1);
10893
- import_tiktoken = require("tiktoken");
10959
+ import_tiktoken2 = require("tiktoken");
10894
10960
  init_messages();
10895
10961
  init_base_provider();
10896
10962
  init_constants2();
@@ -11149,9 +11215,9 @@ var init_openai = __esm({
11149
11215
  const modelName = descriptor.name;
11150
11216
  let encoding;
11151
11217
  try {
11152
- encoding = (0, import_tiktoken.encoding_for_model)(modelName);
11218
+ encoding = (0, import_tiktoken2.encoding_for_model)(modelName);
11153
11219
  } catch {
11154
- encoding = (0, import_tiktoken.encoding_for_model)("gpt-4o");
11220
+ encoding = (0, import_tiktoken2.encoding_for_model)("gpt-4o");
11155
11221
  }
11156
11222
  try {
11157
11223
  let tokenCount = 0;
@@ -12596,6 +12662,7 @@ var init_client = __esm({
12596
12662
  "use strict";
12597
12663
  init_builder();
12598
12664
  init_discovery();
12665
+ init_constants();
12599
12666
  init_model_registry();
12600
12667
  init_image();
12601
12668
  init_speech();
@@ -12714,8 +12781,43 @@ var init_client = __esm({
12714
12781
  if (adapter.countTokens) {
12715
12782
  return adapter.countTokens(messages, descriptor, spec);
12716
12783
  }
12717
- const totalChars = messages.reduce((sum, msg) => sum + (msg.content?.length ?? 0), 0);
12718
- return Math.ceil(totalChars / 4);
12784
+ try {
12785
+ const { get_encoding: get_encoding2 } = await import("tiktoken");
12786
+ const encoding = get_encoding2("o200k_base");
12787
+ try {
12788
+ let tokenCount = 0;
12789
+ for (const msg of messages) {
12790
+ const content = msg.content;
12791
+ if (typeof content === "string") {
12792
+ tokenCount += encoding.encode(content).length;
12793
+ } else if (Array.isArray(content)) {
12794
+ for (const part of content) {
12795
+ if (part.type === "text") {
12796
+ tokenCount += encoding.encode(part.text).length;
12797
+ }
12798
+ }
12799
+ }
12800
+ }
12801
+ return tokenCount;
12802
+ } finally {
12803
+ encoding.free();
12804
+ }
12805
+ } catch {
12806
+ let totalChars = 0;
12807
+ for (const msg of messages) {
12808
+ const content = msg.content;
12809
+ if (typeof content === "string") {
12810
+ totalChars += content.length;
12811
+ } else if (Array.isArray(content)) {
12812
+ for (const part of content) {
12813
+ if (part.type === "text") {
12814
+ totalChars += part.text.length;
12815
+ }
12816
+ }
12817
+ }
12818
+ }
12819
+ return Math.ceil(totalChars / CHARS_PER_TOKEN);
12820
+ }
12719
12821
  }
12720
12822
  resolveAdapter(descriptor) {
12721
12823
  const adapter = this.adapters.find((item) => item.supports(descriptor));
@@ -12854,9 +12956,9 @@ var init_builder = __esm({
12854
12956
  init_model_shortcuts();
12855
12957
  init_registry();
12856
12958
  init_activation();
12959
+ init_load_skill_gadget();
12857
12960
  init_loader();
12858
12961
  init_parser();
12859
- init_use_skill_gadget();
12860
12962
  init_agent();
12861
12963
  init_agent_internal_key();
12862
12964
  init_builder_utils();
@@ -13191,7 +13293,7 @@ ${resolved}`);
13191
13293
  const skillRegistry = this.resolveSkillRegistry();
13192
13294
  if (skillRegistry && skillRegistry.size > 0) {
13193
13295
  if (skillRegistry.getModelInvocable().length > 0) {
13194
- registry.registerByClass(createUseSkillGadget(skillRegistry));
13296
+ registry.registerByClass(createLoadSkillGadget(skillRegistry));
13195
13297
  }
13196
13298
  const preActivatedBlock = this.resolvePreActivatedInstructions(skillRegistry);
13197
13299
  if (preActivatedBlock) {
@@ -16380,7 +16482,8 @@ var init_agent = __esm({
16380
16482
  this.compactionManager = new CompactionManager(
16381
16483
  this.client,
16382
16484
  this.model,
16383
- options.compactionConfig
16485
+ options.compactionConfig,
16486
+ this.logger
16384
16487
  );
16385
16488
  }
16386
16489
  this.signal = options.signal;
@@ -16726,6 +16829,22 @@ var init_agent = __esm({
16726
16829
  this.logger.info("Loop terminated by gadget or processor");
16727
16830
  break;
16728
16831
  }
16832
+ if (this.compactionManager && result.usage?.inputTokens) {
16833
+ this.compactionManager.updateUsage(result.usage.inputTokens);
16834
+ if (this.compactionManager.shouldCompactFromUsage()) {
16835
+ this.logger.info("Reactive compaction triggered from API-reported usage", {
16836
+ inputTokens: result.usage.inputTokens,
16837
+ iteration: currentIteration
16838
+ });
16839
+ const reactiveCompaction = await this.compactionManager.compact(
16840
+ this.conversation,
16841
+ currentIteration
16842
+ );
16843
+ if (reactiveCompaction) {
16844
+ yield await this.emitCompactionEvent(reactiveCompaction, currentIteration);
16845
+ }
16846
+ }
16847
+ }
16729
16848
  if (this.budget !== void 0) {
16730
16849
  const totalCost = this.tree.getTotalCost();
16731
16850
  if (totalCost >= this.budget) {
@@ -17046,6 +17165,7 @@ __export(index_exports, {
17046
17165
  HybridStrategy: () => HybridStrategy,
17047
17166
  LLMMessageBuilder: () => LLMMessageBuilder,
17048
17167
  LLMist: () => LLMist,
17168
+ LOAD_SKILL_GADGET_NAME: () => LOAD_SKILL_GADGET_NAME,
17049
17169
  MODEL_ALIASES: () => MODEL_ALIASES,
17050
17170
  MediaStore: () => MediaStore,
17051
17171
  ModelIdentifierParser: () => ModelIdentifierParser,
@@ -17062,7 +17182,6 @@ __export(index_exports, {
17062
17182
  SummarizationStrategy: () => SummarizationStrategy,
17063
17183
  TaskCompletionSignal: () => TaskCompletionSignal,
17064
17184
  TimeoutException: () => TimeoutException,
17065
- USE_SKILL_GADGET_NAME: () => USE_SKILL_GADGET_NAME,
17066
17185
  audioFromBase64: () => audioFromBase64,
17067
17186
  audioFromBuffer: () => audioFromBuffer,
17068
17187
  collectEvents: () => collectEvents,
@@ -17075,12 +17194,12 @@ __export(index_exports, {
17075
17194
  createGeminiProviderFromEnv: () => createGeminiProviderFromEnv,
17076
17195
  createHints: () => createHints,
17077
17196
  createHuggingFaceProviderFromEnv: () => createHuggingFaceProviderFromEnv,
17197
+ createLoadSkillGadget: () => createLoadSkillGadget,
17078
17198
  createLogger: () => createLogger,
17079
17199
  createMediaOutput: () => createMediaOutput,
17080
17200
  createOpenAIProviderFromEnv: () => createOpenAIProviderFromEnv,
17081
17201
  createOpenRouterProviderFromEnv: () => createOpenRouterProviderFromEnv,
17082
17202
  createSubagent: () => createSubagent,
17083
- createUseSkillGadget: () => createUseSkillGadget,
17084
17203
  defaultLogger: () => defaultLogger,
17085
17204
  detectAudioMimeType: () => detectAudioMimeType,
17086
17205
  detectImageMimeType: () => detectImageMimeType,
@@ -17761,11 +17880,11 @@ var SimpleSessionManager = class extends BaseSessionManager {
17761
17880
 
17762
17881
  // src/skills/index.ts
17763
17882
  init_activation();
17883
+ init_load_skill_gadget();
17764
17884
  init_loader();
17765
17885
  init_parser();
17766
17886
  init_registry2();
17767
17887
  init_skill();
17768
- init_use_skill_gadget();
17769
17888
 
17770
17889
  // src/utils/format.ts
17771
17890
  function truncate(text3, maxLength, suffix = "...") {
@@ -17954,6 +18073,7 @@ function getHostExports2(ctx) {
17954
18073
  HybridStrategy,
17955
18074
  LLMMessageBuilder,
17956
18075
  LLMist,
18076
+ LOAD_SKILL_GADGET_NAME,
17957
18077
  MODEL_ALIASES,
17958
18078
  MediaStore,
17959
18079
  ModelIdentifierParser,
@@ -17970,7 +18090,6 @@ function getHostExports2(ctx) {
17970
18090
  SummarizationStrategy,
17971
18091
  TaskCompletionSignal,
17972
18092
  TimeoutException,
17973
- USE_SKILL_GADGET_NAME,
17974
18093
  audioFromBase64,
17975
18094
  audioFromBuffer,
17976
18095
  collectEvents,
@@ -17983,12 +18102,12 @@ function getHostExports2(ctx) {
17983
18102
  createGeminiProviderFromEnv,
17984
18103
  createHints,
17985
18104
  createHuggingFaceProviderFromEnv,
18105
+ createLoadSkillGadget,
17986
18106
  createLogger,
17987
18107
  createMediaOutput,
17988
18108
  createOpenAIProviderFromEnv,
17989
18109
  createOpenRouterProviderFromEnv,
17990
18110
  createSubagent,
17991
- createUseSkillGadget,
17992
18111
  defaultLogger,
17993
18112
  detectAudioMimeType,
17994
18113
  detectImageMimeType,