llmist 16.2.3 → 16.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -813,7 +813,7 @@ var init_constants = __esm({
813
813
  GADGET_ARG_PREFIX = "!!!ARG:";
814
814
  DEFAULT_GADGET_OUTPUT_LIMIT = true;
815
815
  DEFAULT_GADGET_OUTPUT_LIMIT_PERCENT = 15;
816
- CHARS_PER_TOKEN = 4;
816
+ CHARS_PER_TOKEN = 2;
817
817
  FALLBACK_CONTEXT_WINDOW = 128e3;
818
818
  }
819
819
  });
@@ -2834,6 +2834,7 @@ var CompactionManager;
2834
2834
  var init_manager = __esm({
2835
2835
  "src/agent/compaction/manager.ts"() {
2836
2836
  "use strict";
2837
+ init_logger();
2837
2838
  init_config();
2838
2839
  init_strategies();
2839
2840
  CompactionManager = class {
@@ -2841,15 +2842,19 @@ var init_manager = __esm({
2841
2842
  model;
2842
2843
  config;
2843
2844
  strategy;
2845
+ logger;
2844
2846
  modelLimits;
2847
+ hasWarnedModelNotFound = false;
2848
+ hasWarnedNoTokenCounting = false;
2845
2849
  // Statistics
2846
2850
  totalCompactions = 0;
2847
2851
  totalTokensSaved = 0;
2848
2852
  lastTokenCount = 0;
2849
- constructor(client, model, config = {}) {
2853
+ constructor(client, model, config = {}, logger2) {
2850
2854
  this.client = client;
2851
2855
  this.model = model;
2852
2856
  this.config = resolveCompactionConfig(config);
2857
+ this.logger = logger2 ?? createLogger({ name: "llmist:compaction" });
2853
2858
  if (typeof config.strategy === "object" && "compact" in config.strategy) {
2854
2859
  this.strategy = config.strategy;
2855
2860
  } else {
@@ -2867,13 +2872,16 @@ var init_manager = __esm({
2867
2872
  if (!this.config.enabled) {
2868
2873
  return null;
2869
2874
  }
2870
- if (!this.modelLimits) {
2871
- this.modelLimits = this.client.modelRegistry.getModelLimits(this.model);
2872
- if (!this.modelLimits) {
2873
- return null;
2874
- }
2875
+ if (!this.resolveModelLimits()) {
2876
+ return null;
2875
2877
  }
2876
2878
  if (!this.client.countTokens) {
2879
+ if (!this.hasWarnedNoTokenCounting) {
2880
+ this.hasWarnedNoTokenCounting = true;
2881
+ this.logger.warn("Compaction skipped: client does not support token counting", {
2882
+ model: this.model
2883
+ });
2884
+ }
2877
2885
  return null;
2878
2886
  }
2879
2887
  const messages = conversation.getMessages();
@@ -2904,11 +2912,8 @@ var init_manager = __esm({
2904
2912
  * @returns CompactionEvent with compaction details
2905
2913
  */
2906
2914
  async compact(conversation, iteration, precomputed) {
2907
- if (!this.modelLimits) {
2908
- this.modelLimits = this.client.modelRegistry.getModelLimits(this.model);
2909
- if (!this.modelLimits) {
2910
- return null;
2911
- }
2915
+ if (!this.resolveModelLimits()) {
2916
+ return null;
2912
2917
  }
2913
2918
  const historyMessages = precomputed?.historyMessages ?? conversation.getHistoryMessages();
2914
2919
  const baseMessages = precomputed?.baseMessages ?? conversation.getBaseMessages();
@@ -2950,6 +2955,42 @@ var init_manager = __esm({
2950
2955
  }
2951
2956
  return event;
2952
2957
  }
2958
+ /**
2959
+ * Feed API-reported input token count for reactive threshold checking.
2960
+ * Call this after each LLM response with the actual inputTokens from usage.
2961
+ */
2962
+ updateUsage(inputTokens) {
2963
+ this.lastTokenCount = inputTokens;
2964
+ }
2965
+ /**
2966
+ * Check if compaction should trigger based on API-reported usage.
2967
+ * Unlike checkAndCompact() which uses estimated token counts,
2968
+ * this uses the ground-truth token count from the last LLM response.
2969
+ */
2970
+ shouldCompactFromUsage() {
2971
+ if (!this.config.enabled) return false;
2972
+ if (!this.resolveModelLimits()) return false;
2973
+ const usagePercent = this.lastTokenCount / this.modelLimits.contextWindow * 100;
2974
+ return usagePercent >= this.config.triggerThresholdPercent;
2975
+ }
2976
+ /**
2977
+ * Resolve and cache model limits from registry. Warns once if not found.
2978
+ * @returns true if limits are available, false otherwise
2979
+ */
2980
+ resolveModelLimits() {
2981
+ if (this.modelLimits) return true;
2982
+ this.modelLimits = this.client.modelRegistry.getModelLimits(this.model);
2983
+ if (!this.modelLimits) {
2984
+ if (!this.hasWarnedModelNotFound) {
2985
+ this.hasWarnedModelNotFound = true;
2986
+ this.logger.warn("Compaction skipped: model not found in registry", {
2987
+ model: this.model
2988
+ });
2989
+ }
2990
+ return false;
2991
+ }
2992
+ return true;
2993
+ }
2953
2994
  /**
2954
2995
  * Get compaction statistics.
2955
2996
  */
@@ -5298,6 +5339,45 @@ var init_activation = __esm({
5298
5339
  }
5299
5340
  });
5300
5341
 
5342
+ // src/skills/load-skill-gadget.ts
5343
+ import { z as z4 } from "zod";
5344
+ function createLoadSkillGadget(registry) {
5345
+ const summaries = registry.getMetadataSummaries();
5346
+ const skillNames = registry.getModelInvocable().map((s) => s.name);
5347
+ const description = [
5348
+ "Load a skill's specialized instructions into context for a task.",
5349
+ "Available skills:",
5350
+ summaries
5351
+ ].join("\n");
5352
+ return createGadget({
5353
+ name: LOAD_SKILL_GADGET_NAME,
5354
+ description,
5355
+ schema: z4.object({
5356
+ skill: z4.enum(skillNames).describe("Name of the skill to load"),
5357
+ arguments: z4.string().optional().describe("Arguments for the skill (e.g., a filename, issue number, or search query)")
5358
+ }),
5359
+ execute: async ({ skill: skillName, arguments: args }) => {
5360
+ const skill = registry.get(skillName);
5361
+ if (!skill) {
5362
+ return `Unknown skill: "${skillName}". Available skills: ${skillNames.join(", ")}`;
5363
+ }
5364
+ const activation = await skill.activate({
5365
+ arguments: args,
5366
+ cwd: process.cwd()
5367
+ });
5368
+ return activation.resolvedInstructions;
5369
+ }
5370
+ });
5371
+ }
5372
+ var LOAD_SKILL_GADGET_NAME;
5373
+ var init_load_skill_gadget = __esm({
5374
+ "src/skills/load-skill-gadget.ts"() {
5375
+ "use strict";
5376
+ init_create_gadget();
5377
+ LOAD_SKILL_GADGET_NAME = "LoadSkill";
5378
+ }
5379
+ });
5380
+
5301
5381
  // src/skills/parser.ts
5302
5382
  import fs from "fs";
5303
5383
  import path from "path";
@@ -5791,45 +5871,6 @@ var init_loader = __esm({
5791
5871
  }
5792
5872
  });
5793
5873
 
5794
- // src/skills/use-skill-gadget.ts
5795
- import { z as z4 } from "zod";
5796
- function createUseSkillGadget(registry) {
5797
- const summaries = registry.getMetadataSummaries();
5798
- const skillNames = registry.getModelInvocable().map((s) => s.name);
5799
- const description = [
5800
- "Activate a skill to get specialized instructions for a task.",
5801
- "Available skills:",
5802
- summaries
5803
- ].join("\n");
5804
- return createGadget({
5805
- name: USE_SKILL_GADGET_NAME,
5806
- description,
5807
- schema: z4.object({
5808
- skill: z4.enum(skillNames).describe("Name of the skill to activate"),
5809
- arguments: z4.string().optional().describe("Arguments for the skill (e.g., a filename, issue number, or search query)")
5810
- }),
5811
- execute: async ({ skill: skillName, arguments: args }) => {
5812
- const skill = registry.get(skillName);
5813
- if (!skill) {
5814
- return `Unknown skill: "${skillName}". Available skills: ${skillNames.join(", ")}`;
5815
- }
5816
- const activation = await skill.activate({
5817
- arguments: args,
5818
- cwd: process.cwd()
5819
- });
5820
- return activation.resolvedInstructions;
5821
- }
5822
- });
5823
- }
5824
- var USE_SKILL_GADGET_NAME;
5825
- var init_use_skill_gadget = __esm({
5826
- "src/skills/use-skill-gadget.ts"() {
5827
- "use strict";
5828
- init_create_gadget();
5829
- USE_SKILL_GADGET_NAME = "UseSkill";
5830
- }
5831
- });
5832
-
5833
5874
  // src/agent/builder-utils.ts
5834
5875
  function formatGadgetCall(gadgetName, invocationId, parameters, prefixes) {
5835
5876
  const startPrefix = prefixes?.start ?? GADGET_START_PREFIX;
@@ -7350,7 +7391,7 @@ var init_constants2 = __esm({
7350
7391
  "src/providers/constants.ts"() {
7351
7392
  "use strict";
7352
7393
  ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS = 4096;
7353
- FALLBACK_CHARS_PER_TOKEN = 4;
7394
+ FALLBACK_CHARS_PER_TOKEN = 2;
7354
7395
  OPENAI_MESSAGE_OVERHEAD_TOKENS = 4;
7355
7396
  OPENAI_REPLY_PRIMING_TOKENS = 2;
7356
7397
  OPENAI_NAME_FIELD_OVERHEAD_TOKENS = 1;
@@ -9705,6 +9746,7 @@ var init_huggingface_models = __esm({
9705
9746
 
9706
9747
  // src/providers/openai-compatible-provider.ts
9707
9748
  import OpenAI from "openai";
9749
+ import { get_encoding } from "tiktoken";
9708
9750
  var ROLE_MAP, OpenAICompatibleProvider;
9709
9751
  var init_openai_compatible_provider = __esm({
9710
9752
  "src/providers/openai-compatible-provider.ts"() {
@@ -9909,11 +9951,38 @@ var init_openai_compatible_provider = __esm({
9909
9951
  }
9910
9952
  }
9911
9953
  /**
9912
- * Count tokens using character-based fallback estimation.
9913
- * Most meta-providers don't have a native token counting API.
9954
+ * Count tokens using tiktoken o200k_base encoding.
9955
+ *
9956
+ * While o200k_base isn't model-exact for non-OpenAI models routed through
9957
+ * meta-providers like OpenRouter, BPE tokenizers with 200K vocab produce
9958
+ * counts within 10-20% of true values — far better than the character-based
9959
+ * fallback which can be off by 250% for JSON/code-heavy content.
9960
+ *
9961
+ * Falls back to character-based estimation if tiktoken fails.
9914
9962
  */
9915
9963
  async countTokens(messages, descriptor, _spec) {
9964
+ if (!messages || messages.length === 0) return 0;
9916
9965
  try {
9966
+ const encoding = get_encoding("o200k_base");
9967
+ try {
9968
+ let tokenCount = 0;
9969
+ for (const msg of messages) {
9970
+ const parts = normalizeMessageContent(msg.content);
9971
+ for (const part of parts) {
9972
+ if (part.type === "text") {
9973
+ tokenCount += encoding.encode(part.text).length;
9974
+ }
9975
+ }
9976
+ }
9977
+ return tokenCount;
9978
+ } finally {
9979
+ encoding.free();
9980
+ }
9981
+ } catch (error) {
9982
+ console.warn(
9983
+ `Token counting with tiktoken failed for ${descriptor.name}, using fallback estimation:`,
9984
+ error
9985
+ );
9917
9986
  let totalChars = 0;
9918
9987
  for (const msg of messages) {
9919
9988
  const parts = normalizeMessageContent(msg.content);
@@ -9924,9 +9993,6 @@ var init_openai_compatible_provider = __esm({
9924
9993
  }
9925
9994
  }
9926
9995
  return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN);
9927
- } catch (error) {
9928
- console.warn(`Token counting failed for ${descriptor.name}, using zero estimate:`, error);
9929
- return 0;
9930
9996
  }
9931
9997
  }
9932
9998
  };
@@ -12588,6 +12654,7 @@ var init_client = __esm({
12588
12654
  "use strict";
12589
12655
  init_builder();
12590
12656
  init_discovery();
12657
+ init_constants();
12591
12658
  init_model_registry();
12592
12659
  init_image();
12593
12660
  init_speech();
@@ -12706,8 +12773,43 @@ var init_client = __esm({
12706
12773
  if (adapter.countTokens) {
12707
12774
  return adapter.countTokens(messages, descriptor, spec);
12708
12775
  }
12709
- const totalChars = messages.reduce((sum, msg) => sum + (msg.content?.length ?? 0), 0);
12710
- return Math.ceil(totalChars / 4);
12776
+ try {
12777
+ const { get_encoding: get_encoding2 } = await import("tiktoken");
12778
+ const encoding = get_encoding2("o200k_base");
12779
+ try {
12780
+ let tokenCount = 0;
12781
+ for (const msg of messages) {
12782
+ const content = msg.content;
12783
+ if (typeof content === "string") {
12784
+ tokenCount += encoding.encode(content).length;
12785
+ } else if (Array.isArray(content)) {
12786
+ for (const part of content) {
12787
+ if (part.type === "text") {
12788
+ tokenCount += encoding.encode(part.text).length;
12789
+ }
12790
+ }
12791
+ }
12792
+ }
12793
+ return tokenCount;
12794
+ } finally {
12795
+ encoding.free();
12796
+ }
12797
+ } catch {
12798
+ let totalChars = 0;
12799
+ for (const msg of messages) {
12800
+ const content = msg.content;
12801
+ if (typeof content === "string") {
12802
+ totalChars += content.length;
12803
+ } else if (Array.isArray(content)) {
12804
+ for (const part of content) {
12805
+ if (part.type === "text") {
12806
+ totalChars += part.text.length;
12807
+ }
12808
+ }
12809
+ }
12810
+ }
12811
+ return Math.ceil(totalChars / CHARS_PER_TOKEN);
12812
+ }
12711
12813
  }
12712
12814
  resolveAdapter(descriptor) {
12713
12815
  const adapter = this.adapters.find((item) => item.supports(descriptor));
@@ -12846,9 +12948,9 @@ var init_builder = __esm({
12846
12948
  init_model_shortcuts();
12847
12949
  init_registry();
12848
12950
  init_activation();
12951
+ init_load_skill_gadget();
12849
12952
  init_loader();
12850
12953
  init_parser();
12851
- init_use_skill_gadget();
12852
12954
  init_agent();
12853
12955
  init_agent_internal_key();
12854
12956
  init_builder_utils();
@@ -13183,7 +13285,7 @@ ${resolved}`);
13183
13285
  const skillRegistry = this.resolveSkillRegistry();
13184
13286
  if (skillRegistry && skillRegistry.size > 0) {
13185
13287
  if (skillRegistry.getModelInvocable().length > 0) {
13186
- registry.registerByClass(createUseSkillGadget(skillRegistry));
13288
+ registry.registerByClass(createLoadSkillGadget(skillRegistry));
13187
13289
  }
13188
13290
  const preActivatedBlock = this.resolvePreActivatedInstructions(skillRegistry);
13189
13291
  if (preActivatedBlock) {
@@ -16372,7 +16474,8 @@ var init_agent = __esm({
16372
16474
  this.compactionManager = new CompactionManager(
16373
16475
  this.client,
16374
16476
  this.model,
16375
- options.compactionConfig
16477
+ options.compactionConfig,
16478
+ this.logger
16376
16479
  );
16377
16480
  }
16378
16481
  this.signal = options.signal;
@@ -16718,6 +16821,22 @@ var init_agent = __esm({
16718
16821
  this.logger.info("Loop terminated by gadget or processor");
16719
16822
  break;
16720
16823
  }
16824
+ if (this.compactionManager && result.usage?.inputTokens) {
16825
+ this.compactionManager.updateUsage(result.usage.inputTokens);
16826
+ if (this.compactionManager.shouldCompactFromUsage()) {
16827
+ this.logger.info("Reactive compaction triggered from API-reported usage", {
16828
+ inputTokens: result.usage.inputTokens,
16829
+ iteration: currentIteration
16830
+ });
16831
+ const reactiveCompaction = await this.compactionManager.compact(
16832
+ this.conversation,
16833
+ currentIteration
16834
+ );
16835
+ if (reactiveCompaction) {
16836
+ yield await this.emitCompactionEvent(reactiveCompaction, currentIteration);
16837
+ }
16838
+ }
16839
+ }
16721
16840
  if (this.budget !== void 0) {
16722
16841
  const totalCost = this.tree.getTotalCost();
16723
16842
  if (totalCost >= this.budget) {
@@ -17587,11 +17706,11 @@ var SimpleSessionManager = class extends BaseSessionManager {
17587
17706
 
17588
17707
  // src/skills/index.ts
17589
17708
  init_activation();
17709
+ init_load_skill_gadget();
17590
17710
  init_loader();
17591
17711
  init_parser();
17592
17712
  init_registry2();
17593
17713
  init_skill();
17594
- init_use_skill_gadget();
17595
17714
 
17596
17715
  // src/utils/format.ts
17597
17716
  function truncate(text3, maxLength, suffix = "...") {
@@ -17779,6 +17898,7 @@ export {
17779
17898
  HybridStrategy,
17780
17899
  LLMMessageBuilder,
17781
17900
  LLMist,
17901
+ LOAD_SKILL_GADGET_NAME,
17782
17902
  MODEL_ALIASES,
17783
17903
  MediaStore,
17784
17904
  ModelIdentifierParser,
@@ -17795,7 +17915,6 @@ export {
17795
17915
  SummarizationStrategy,
17796
17916
  TaskCompletionSignal,
17797
17917
  TimeoutException,
17798
- USE_SKILL_GADGET_NAME,
17799
17918
  audioFromBase64,
17800
17919
  audioFromBuffer,
17801
17920
  collectEvents,
@@ -17808,12 +17927,12 @@ export {
17808
17927
  createGeminiProviderFromEnv,
17809
17928
  createHints,
17810
17929
  createHuggingFaceProviderFromEnv,
17930
+ createLoadSkillGadget,
17811
17931
  createLogger,
17812
17932
  createMediaOutput,
17813
17933
  createOpenAIProviderFromEnv,
17814
17934
  createOpenRouterProviderFromEnv,
17815
17935
  createSubagent,
17816
- createUseSkillGadget,
17817
17936
  defaultLogger,
17818
17937
  detectAudioMimeType,
17819
17938
  detectImageMimeType,