llmist 0.3.1 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  BaseGadget
3
- } from "./chunk-VYBRYR2S.js";
3
+ } from "./chunk-LQE7TKKW.js";
4
4
 
5
5
  // src/gadgets/create-gadget.ts
6
6
  function createGadget(config) {
@@ -19,4 +19,4 @@ function createGadget(config) {
19
19
  export {
20
20
  createGadget
21
21
  };
22
- //# sourceMappingURL=chunk-I55AV3WV.js.map
22
+ //# sourceMappingURL=chunk-QVDGTUQN.js.map
package/dist/cli.cjs CHANGED
@@ -2745,10 +2745,11 @@ var init_gemini = __esm({
2745
2745
  return GEMINI_MODELS;
2746
2746
  }
2747
2747
  buildRequestPayload(options, descriptor, _spec, messages) {
2748
- const { systemInstruction, contents } = this.extractSystemAndContents(messages);
2748
+ const contents = this.convertMessagesToContents(messages);
2749
2749
  const generationConfig = this.buildGenerationConfig(options);
2750
2750
  const config = {
2751
- ...systemInstruction ? { systemInstruction: systemInstruction.parts.map((p) => p.text).join("\n") } : {},
2751
+ // Note: systemInstruction removed - it doesn't work with countTokens()
2752
+ // System messages are now included in contents as user+model exchanges
2752
2753
  ...generationConfig ? { ...generationConfig } : {},
2753
2754
  // Explicitly disable function calling to prevent UNEXPECTED_TOOL_CALL errors
2754
2755
  toolConfig: {
@@ -2769,31 +2770,37 @@ var init_gemini = __esm({
2769
2770
  const streamResponse = await client.models.generateContentStream(payload);
2770
2771
  return streamResponse;
2771
2772
  }
2772
- extractSystemAndContents(messages) {
2773
- const firstSystemIndex = messages.findIndex((message) => message.role === "system");
2774
- if (firstSystemIndex === -1) {
2775
- return {
2776
- systemInstruction: null,
2777
- contents: this.mergeConsecutiveMessages(messages)
2778
- };
2779
- }
2780
- let systemBlockEnd = firstSystemIndex;
2781
- while (systemBlockEnd < messages.length && messages[systemBlockEnd].role === "system") {
2782
- systemBlockEnd++;
2773
+ /**
2774
+ * Convert LLM messages to Gemini contents format.
2775
+ *
2776
+ * For Gemini, we convert system messages to user+model exchanges instead of
2777
+ * using systemInstruction, because:
2778
+ * 1. systemInstruction doesn't work with countTokens() API
2779
+ * 2. This approach gives perfect token counting accuracy (0% error)
2780
+ * 3. The model receives and follows system instructions identically
2781
+ *
2782
+ * System message: "You are a helpful assistant"
2783
+ * Becomes:
2784
+ * - User: "You are a helpful assistant"
2785
+ * - Model: "Understood."
2786
+ */
2787
+ convertMessagesToContents(messages) {
2788
+ const expandedMessages = [];
2789
+ for (const message of messages) {
2790
+ if (message.role === "system") {
2791
+ expandedMessages.push({
2792
+ role: "user",
2793
+ content: message.content
2794
+ });
2795
+ expandedMessages.push({
2796
+ role: "assistant",
2797
+ content: "Understood."
2798
+ });
2799
+ } else {
2800
+ expandedMessages.push(message);
2801
+ }
2783
2802
  }
2784
- const systemMessages = messages.slice(firstSystemIndex, systemBlockEnd);
2785
- const nonSystemMessages = [
2786
- ...messages.slice(0, firstSystemIndex),
2787
- ...messages.slice(systemBlockEnd)
2788
- ];
2789
- const systemInstruction = {
2790
- role: "system",
2791
- parts: systemMessages.map((message) => ({ text: message.content }))
2792
- };
2793
- return {
2794
- systemInstruction,
2795
- contents: this.mergeConsecutiveMessages(nonSystemMessages)
2796
- };
2803
+ return this.mergeConsecutiveMessages(expandedMessages);
2797
2804
  }
2798
2805
  mergeConsecutiveMessages(messages) {
2799
2806
  if (messages.length === 0) {
@@ -2882,8 +2889,8 @@ var init_gemini = __esm({
2882
2889
  *
2883
2890
  * This method provides accurate token estimation for Gemini models by:
2884
2891
  * - Using the SDK's countTokens() method
2885
- * - Properly extracting and handling system instructions
2886
- * - Transforming messages to Gemini's expected format
2892
+ * - Converting system messages to user+model exchanges (same as in generation)
2893
+ * - This gives perfect token counting accuracy (0% error vs actual usage)
2887
2894
  *
2888
2895
  * @param messages - The messages to count tokens for
2889
2896
  * @param descriptor - Model descriptor containing the model name
@@ -2902,16 +2909,14 @@ var init_gemini = __esm({
2902
2909
  */
2903
2910
  async countTokens(messages, descriptor, _spec) {
2904
2911
  const client = this.client;
2905
- const { systemInstruction, contents } = this.extractSystemAndContents(messages);
2906
- const request = {
2907
- model: descriptor.name,
2908
- contents: this.convertContentsForNewSDK(contents)
2909
- };
2910
- if (systemInstruction) {
2911
- request.systemInstruction = systemInstruction.parts.map((p) => p.text).join("\n");
2912
- }
2912
+ const contents = this.convertMessagesToContents(messages);
2913
2913
  try {
2914
- const response = await client.models.countTokens(request);
2914
+ const response = await client.models.countTokens({
2915
+ model: descriptor.name,
2916
+ contents: this.convertContentsForNewSDK(contents)
2917
+ // Note: systemInstruction not used - it's not supported by countTokens()
2918
+ // and would cause a 2100% token counting error
2919
+ });
2915
2920
  return response.totalTokens ?? 0;
2916
2921
  } catch (error) {
2917
2922
  console.warn(
@@ -4281,7 +4286,8 @@ var OPTION_FLAGS = {
4281
4286
  parameterFormat: "--parameter-format <format>",
4282
4287
  logLevel: "--log-level <level>",
4283
4288
  logFile: "--log-file <path>",
4284
- noBuiltins: "--no-builtins"
4289
+ noBuiltins: "--no-builtins",
4290
+ noBuiltinInteraction: "--no-builtin-interaction"
4285
4291
  };
4286
4292
  var OPTION_DESCRIPTIONS = {
4287
4293
  model: "Model identifier, e.g. openai:gpt-5-nano or anthropic:claude-sonnet-4-5.",
@@ -4293,7 +4299,8 @@ var OPTION_DESCRIPTIONS = {
4293
4299
  parameterFormat: "Format for gadget parameter schemas: 'json', 'yaml', or 'auto'.",
4294
4300
  logLevel: "Log level: silly, trace, debug, info, warn, error, fatal.",
4295
4301
  logFile: "Path to log file. When set, logs are written to file instead of stderr.",
4296
- noBuiltins: "Disable built-in gadgets (AskUser, TellUser)."
4302
+ noBuiltins: "Disable built-in gadgets (AskUser, TellUser).",
4303
+ noBuiltinInteraction: "Disable interactive gadgets (AskUser) while keeping TellUser."
4297
4304
  };
4298
4305
  var SUMMARY_PREFIX = "[llmist]";
4299
4306
 
@@ -4303,7 +4310,7 @@ var import_commander3 = require("commander");
4303
4310
  // package.json
4304
4311
  var package_default = {
4305
4312
  name: "llmist",
4306
- version: "0.3.0",
4313
+ version: "0.4.0",
4307
4314
  description: "Universal TypeScript LLM client with streaming-first agent framework. Works with any model - no structured outputs or native tool calling required. Implements its own flexible grammar for function calling.",
4308
4315
  type: "module",
4309
4316
  main: "dist/index.cjs",
@@ -4412,7 +4419,6 @@ var package_default = {
4412
4419
 
4413
4420
  // src/cli/agent-command.ts
4414
4421
  var import_promises = require("readline/promises");
4415
- var import_chalk2 = __toESM(require("chalk"), 1);
4416
4422
  var import_commander2 = require("commander");
4417
4423
  init_builder();
4418
4424
  init_registry();
@@ -4706,9 +4712,67 @@ async function loadGadgets(specifiers, cwd, importer = (specifier) => import(spe
4706
4712
  }
4707
4713
 
4708
4714
  // src/cli/utils.ts
4709
- var import_chalk = __toESM(require("chalk"), 1);
4715
+ var import_chalk2 = __toESM(require("chalk"), 1);
4710
4716
  var import_commander = require("commander");
4711
4717
  init_constants2();
4718
+
4719
+ // src/cli/ui/formatters.ts
4720
+ var import_chalk = __toESM(require("chalk"), 1);
4721
+ function formatTokens(tokens) {
4722
+ return tokens >= 1e3 ? `${(tokens / 1e3).toFixed(1)}k` : `${tokens}`;
4723
+ }
4724
+ function formatCost(cost) {
4725
+ if (cost < 1e-3) {
4726
+ return cost.toFixed(5);
4727
+ }
4728
+ if (cost < 0.01) {
4729
+ return cost.toFixed(4);
4730
+ }
4731
+ if (cost < 1) {
4732
+ return cost.toFixed(3);
4733
+ }
4734
+ return cost.toFixed(2);
4735
+ }
4736
+ function renderSummary(metadata) {
4737
+ const parts = [];
4738
+ if (metadata.iterations !== void 0) {
4739
+ parts.push(import_chalk.default.cyan(`#${metadata.iterations}`));
4740
+ }
4741
+ if (metadata.usage) {
4742
+ const { inputTokens, outputTokens } = metadata.usage;
4743
+ parts.push(import_chalk.default.dim("\u2191") + import_chalk.default.yellow(` ${formatTokens(inputTokens)}`));
4744
+ parts.push(import_chalk.default.dim("\u2193") + import_chalk.default.green(` ${formatTokens(outputTokens)}`));
4745
+ }
4746
+ if (metadata.elapsedSeconds !== void 0 && metadata.elapsedSeconds > 0) {
4747
+ parts.push(import_chalk.default.dim(`${metadata.elapsedSeconds}s`));
4748
+ }
4749
+ if (metadata.cost !== void 0 && metadata.cost > 0) {
4750
+ parts.push(import_chalk.default.cyan(`$${formatCost(metadata.cost)}`));
4751
+ }
4752
+ if (metadata.finishReason) {
4753
+ parts.push(import_chalk.default.dim(metadata.finishReason));
4754
+ }
4755
+ if (parts.length === 0) {
4756
+ return null;
4757
+ }
4758
+ return parts.join(import_chalk.default.dim(" | "));
4759
+ }
4760
+ function formatGadgetSummary(result) {
4761
+ const gadgetLabel = import_chalk.default.magenta.bold(result.gadgetName);
4762
+ const timeLabel = import_chalk.default.dim(`${Math.round(result.executionTimeMs)}ms`);
4763
+ if (result.error) {
4764
+ return `${import_chalk.default.red("\u2717")} ${gadgetLabel} ${import_chalk.default.red("error:")} ${result.error} ${timeLabel}`;
4765
+ }
4766
+ if (result.breaksLoop) {
4767
+ return `${import_chalk.default.yellow("\u23F9")} ${gadgetLabel} ${import_chalk.default.yellow("finished:")} ${result.result} ${timeLabel}`;
4768
+ }
4769
+ const maxLen = 80;
4770
+ const shouldTruncate = result.gadgetName !== "TellUser";
4771
+ const resultText = result.result ? shouldTruncate && result.result.length > maxLen ? `${result.result.slice(0, maxLen)}...` : result.result : "";
4772
+ return `${import_chalk.default.green("\u2713")} ${gadgetLabel} ${import_chalk.default.dim("\u2192")} ${resultText} ${timeLabel}`;
4773
+ }
4774
+
4775
+ // src/cli/utils.ts
4712
4776
  function createNumericParser({
4713
4777
  label,
4714
4778
  integer = false,
@@ -4791,15 +4855,20 @@ var StreamProgress = class {
4791
4855
  totalTokens = 0;
4792
4856
  totalCost = 0;
4793
4857
  iterations = 0;
4858
+ currentIteration = 0;
4794
4859
  /**
4795
4860
  * Starts a new LLM call. Switches to streaming mode.
4796
4861
  * @param model - Model name being used
4797
- * @param estimatedInputTokens - Estimated input tokens based on prompt length
4862
+ * @param estimatedInputTokens - Initial input token count. Should come from
4863
+ * client.countTokens() for accuracy (provider-specific counting), not
4864
+ * character-based estimation. Will be updated with provider-returned counts
4865
+ * via setInputTokens() during streaming if available.
4798
4866
  */
4799
4867
  startCall(model, estimatedInputTokens) {
4800
4868
  this.mode = "streaming";
4801
4869
  this.model = model;
4802
4870
  this.callStartTime = Date.now();
4871
+ this.currentIteration++;
4803
4872
  this.callInputTokens = estimatedInputTokens ?? 0;
4804
4873
  this.callInputTokensEstimated = true;
4805
4874
  this.callOutputTokens = 0;
@@ -4836,8 +4905,10 @@ var StreamProgress = class {
4836
4905
  }
4837
4906
  /**
4838
4907
  * Sets the input token count for current call (from stream metadata).
4839
- * @param tokens - Token count
4840
- * @param estimated - If true, shown with ~ prefix until actual count arrives
4908
+ * @param tokens - Token count from provider or client.countTokens()
4909
+ * @param estimated - If true, this is a fallback estimate (character-based).
4910
+ * If false, this is an accurate count from the provider API or client.countTokens().
4911
+ * Display shows ~ prefix only when estimated=true.
4841
4912
  */
4842
4913
  setInputTokens(tokens, estimated = false) {
4843
4914
  if (estimated && !this.callInputTokensEstimated) {
@@ -4848,8 +4919,10 @@ var StreamProgress = class {
4848
4919
  }
4849
4920
  /**
4850
4921
  * Sets the output token count for current call (from stream metadata).
4851
- * @param tokens - Token count
4852
- * @param estimated - If true, shown with ~ prefix until actual count arrives
4922
+ * @param tokens - Token count from provider streaming response
4923
+ * @param estimated - If true, this is a fallback estimate (character-based).
4924
+ * If false, this is an accurate count from the provider's streaming metadata.
4925
+ * Display shows ~ prefix only when estimated=true.
4853
4926
  */
4854
4927
  setOutputTokens(tokens, estimated = false) {
4855
4928
  if (estimated && !this.callOutputTokensEstimated) {
@@ -4858,6 +4931,14 @@ var StreamProgress = class {
4858
4931
  this.callOutputTokens = tokens;
4859
4932
  this.callOutputTokensEstimated = estimated;
4860
4933
  }
4934
+ /**
4935
+ * Get total elapsed time in seconds since the first call started.
4936
+ * @returns Elapsed time in seconds with 1 decimal place
4937
+ */
4938
+ getTotalElapsedSeconds() {
4939
+ if (this.totalStartTime === 0) return 0;
4940
+ return Number(((Date.now() - this.totalStartTime) / 1e3).toFixed(1));
4941
+ }
4861
4942
  /**
4862
4943
  * Starts the progress indicator animation after a brief delay.
4863
4944
  */
@@ -4892,40 +4973,38 @@ var StreamProgress = class {
4892
4973
  const elapsed = ((Date.now() - this.callStartTime) / 1e3).toFixed(1);
4893
4974
  const outTokens = this.callOutputTokensEstimated ? Math.round(this.callOutputChars / FALLBACK_CHARS_PER_TOKEN) : this.callOutputTokens;
4894
4975
  const parts = [];
4895
- if (this.model) {
4896
- parts.push(import_chalk.default.cyan(this.model));
4897
- }
4976
+ parts.push(import_chalk2.default.cyan(`#${this.currentIteration}`));
4898
4977
  if (this.callInputTokens > 0) {
4899
4978
  const prefix = this.callInputTokensEstimated ? "~" : "";
4900
- parts.push(import_chalk.default.dim("out:") + import_chalk.default.yellow(` ${prefix}${this.callInputTokens}`));
4979
+ parts.push(import_chalk2.default.dim("\u2191") + import_chalk2.default.yellow(` ${prefix}${formatTokens(this.callInputTokens)}`));
4901
4980
  }
4902
4981
  if (this.isStreaming || outTokens > 0) {
4903
4982
  const prefix = this.callOutputTokensEstimated ? "~" : "";
4904
- parts.push(import_chalk.default.dim("in:") + import_chalk.default.green(` ${prefix}${outTokens}`));
4983
+ parts.push(import_chalk2.default.dim("\u2193") + import_chalk2.default.green(` ${prefix}${formatTokens(outTokens)}`));
4905
4984
  }
4985
+ parts.push(import_chalk2.default.dim(`${elapsed}s`));
4906
4986
  if (this.totalCost > 0) {
4907
- parts.push(import_chalk.default.dim("cost:") + import_chalk.default.cyan(` $${this.formatCost(this.totalCost)}`));
4987
+ parts.push(import_chalk2.default.cyan(`$${formatCost(this.totalCost)}`));
4908
4988
  }
4909
- parts.push(import_chalk.default.dim(`${elapsed}s`));
4910
- this.target.write(`\r${import_chalk.default.cyan(spinner)} ${parts.join(import_chalk.default.dim(" | "))}`);
4989
+ this.target.write(`\r${import_chalk2.default.cyan(spinner)} ${parts.join(import_chalk2.default.dim(" | "))}`);
4911
4990
  }
4912
4991
  renderCumulativeMode(spinner) {
4913
4992
  const elapsed = ((Date.now() - this.totalStartTime) / 1e3).toFixed(1);
4914
4993
  const parts = [];
4915
4994
  if (this.model) {
4916
- parts.push(import_chalk.default.cyan(this.model));
4995
+ parts.push(import_chalk2.default.cyan(this.model));
4917
4996
  }
4918
4997
  if (this.totalTokens > 0) {
4919
- parts.push(import_chalk.default.dim("total:") + import_chalk.default.magenta(` ${this.totalTokens}`));
4998
+ parts.push(import_chalk2.default.dim("total:") + import_chalk2.default.magenta(` ${this.totalTokens}`));
4920
4999
  }
4921
5000
  if (this.iterations > 0) {
4922
- parts.push(import_chalk.default.dim("iter:") + import_chalk.default.blue(` ${this.iterations}`));
5001
+ parts.push(import_chalk2.default.dim("iter:") + import_chalk2.default.blue(` ${this.iterations}`));
4923
5002
  }
4924
5003
  if (this.totalCost > 0) {
4925
- parts.push(import_chalk.default.dim("cost:") + import_chalk.default.cyan(` $${this.formatCost(this.totalCost)}`));
5004
+ parts.push(import_chalk2.default.dim("cost:") + import_chalk2.default.cyan(` $${formatCost(this.totalCost)}`));
4926
5005
  }
4927
- parts.push(import_chalk.default.dim(`${elapsed}s`));
4928
- this.target.write(`\r${import_chalk.default.cyan(spinner)} ${parts.join(import_chalk.default.dim(" | "))}`);
5006
+ parts.push(import_chalk2.default.dim(`${elapsed}s`));
5007
+ this.target.write(`\r${import_chalk2.default.cyan(spinner)} ${parts.join(import_chalk2.default.dim(" | "))}`);
4929
5008
  }
4930
5009
  /**
4931
5010
  * Pauses the progress indicator and clears the line.
@@ -4973,49 +5052,28 @@ var StreamProgress = class {
4973
5052
  if (this.callInputTokens > 0) {
4974
5053
  const prefix = this.callInputTokensEstimated ? "~" : "";
4975
5054
  parts.push(
4976
- import_chalk.default.dim("out:") + import_chalk.default.yellow(` ${prefix}${this.formatTokens(this.callInputTokens)}`)
5055
+ import_chalk2.default.dim("\u2191") + import_chalk2.default.yellow(` ${prefix}${formatTokens(this.callInputTokens)}`)
4977
5056
  );
4978
5057
  }
4979
5058
  if (outTokens > 0) {
4980
5059
  const prefix = outEstimated ? "~" : "";
4981
- parts.push(import_chalk.default.dim("in:") + import_chalk.default.green(` ${prefix}${this.formatTokens(outTokens)}`));
5060
+ parts.push(import_chalk2.default.dim("\u2193") + import_chalk2.default.green(` ${prefix}${formatTokens(outTokens)}`));
4982
5061
  }
4983
- parts.push(import_chalk.default.dim(`${elapsed}s`));
5062
+ parts.push(import_chalk2.default.dim(`${elapsed}s`));
4984
5063
  } else {
4985
5064
  const elapsed = Math.round((Date.now() - this.totalStartTime) / 1e3);
4986
5065
  if (this.totalTokens > 0) {
4987
- parts.push(import_chalk.default.magenta(this.formatTokens(this.totalTokens)));
5066
+ parts.push(import_chalk2.default.magenta(formatTokens(this.totalTokens)));
4988
5067
  }
4989
5068
  if (this.iterations > 0) {
4990
- parts.push(import_chalk.default.blue(`i${this.iterations}`));
5069
+ parts.push(import_chalk2.default.blue(`i${this.iterations}`));
4991
5070
  }
4992
5071
  if (this.totalCost > 0) {
4993
- parts.push(import_chalk.default.cyan(`$${this.formatCost(this.totalCost)}`));
5072
+ parts.push(import_chalk2.default.cyan(`$${formatCost(this.totalCost)}`));
4994
5073
  }
4995
- parts.push(import_chalk.default.dim(`${elapsed}s`));
5074
+ parts.push(import_chalk2.default.dim(`${elapsed}s`));
4996
5075
  }
4997
- return `${parts.join(import_chalk.default.dim(" \u2502 "))} ${import_chalk.default.green(">")} `;
4998
- }
4999
- /**
5000
- * Formats token count compactly (3625 -> "3.6k").
5001
- */
5002
- formatTokens(tokens) {
5003
- return tokens >= 1e3 ? `${(tokens / 1e3).toFixed(1)}k` : `${tokens}`;
5004
- }
5005
- /**
5006
- * Formats cost compactly (0.0001234 -> "0.00012", 0.1234 -> "0.12", 1.234 -> "1.23").
5007
- */
5008
- formatCost(cost) {
5009
- if (cost < 1e-3) {
5010
- return cost.toFixed(5);
5011
- }
5012
- if (cost < 0.01) {
5013
- return cost.toFixed(4);
5014
- }
5015
- if (cost < 1) {
5016
- return cost.toFixed(3);
5017
- }
5018
- return cost.toFixed(2);
5076
+ return `${parts.join(import_chalk2.default.dim(" | "))} ${import_chalk2.default.green(">")} `;
5019
5077
  }
5020
5078
  };
5021
5079
  async function readStream(stream2) {
@@ -5045,44 +5103,12 @@ async function resolvePrompt(promptArg, env) {
5045
5103
  }
5046
5104
  return pipedInput;
5047
5105
  }
5048
- function renderSummary(metadata) {
5049
- const parts = [];
5050
- if (metadata.iterations !== void 0) {
5051
- parts.push(import_chalk.default.dim(`iterations: ${metadata.iterations}`));
5052
- }
5053
- if (metadata.finishReason) {
5054
- parts.push(import_chalk.default.dim(`finish: ${metadata.finishReason}`));
5055
- }
5056
- if (metadata.usage) {
5057
- const { inputTokens, outputTokens, totalTokens } = metadata.usage;
5058
- parts.push(
5059
- import_chalk.default.dim(`tokens: `) + import_chalk.default.cyan(`${totalTokens}`) + import_chalk.default.dim(` (in: ${inputTokens}, out: ${outputTokens})`)
5060
- );
5061
- }
5062
- if (metadata.cost !== void 0 && metadata.cost > 0) {
5063
- let formattedCost;
5064
- if (metadata.cost < 1e-3) {
5065
- formattedCost = metadata.cost.toFixed(5);
5066
- } else if (metadata.cost < 0.01) {
5067
- formattedCost = metadata.cost.toFixed(4);
5068
- } else if (metadata.cost < 1) {
5069
- formattedCost = metadata.cost.toFixed(3);
5070
- } else {
5071
- formattedCost = metadata.cost.toFixed(2);
5072
- }
5073
- parts.push(import_chalk.default.dim(`cost: `) + import_chalk.default.cyan(`$${formattedCost}`));
5074
- }
5075
- if (parts.length === 0) {
5076
- return null;
5077
- }
5078
- return parts.join(import_chalk.default.dim(" \u2502 "));
5079
- }
5080
5106
  async function executeAction(action, env) {
5081
5107
  try {
5082
5108
  await action();
5083
5109
  } catch (error) {
5084
5110
  const message = error instanceof Error ? error.message : String(error);
5085
- env.stderr.write(`${import_chalk.default.red.bold("Error:")} ${message}
5111
+ env.stderr.write(`${import_chalk2.default.red.bold("Error:")} ${message}
5086
5112
  `);
5087
5113
  env.setExitCode(1);
5088
5114
  }
@@ -5125,26 +5151,15 @@ ${statsPrompt}` : statsPrompt;
5125
5151
  }
5126
5152
  };
5127
5153
  }
5128
- function formatGadgetSummary(result) {
5129
- const gadgetLabel = import_chalk2.default.magenta.bold(result.gadgetName);
5130
- const timeLabel = import_chalk2.default.dim(`${Math.round(result.executionTimeMs)}ms`);
5131
- if (result.error) {
5132
- return `${import_chalk2.default.red("\u2717")} ${gadgetLabel} ${import_chalk2.default.red("error:")} ${result.error} ${timeLabel}`;
5133
- }
5134
- if (result.breaksLoop) {
5135
- return `${import_chalk2.default.yellow("\u23F9")} ${gadgetLabel} ${import_chalk2.default.yellow("finished:")} ${result.result} ${timeLabel}`;
5136
- }
5137
- const maxLen = 80;
5138
- const shouldTruncate = result.gadgetName !== "TellUser";
5139
- const resultText = result.result ? shouldTruncate && result.result.length > maxLen ? `${result.result.slice(0, maxLen)}...` : result.result : "";
5140
- return `${import_chalk2.default.green("\u2713")} ${gadgetLabel} ${import_chalk2.default.dim("\u2192")} ${resultText} ${timeLabel}`;
5141
- }
5142
5154
  async function handleAgentCommand(promptArg, options, env) {
5143
5155
  const prompt = await resolvePrompt(promptArg, env);
5144
5156
  const client = env.createClient();
5145
5157
  const registry = new GadgetRegistry();
5146
5158
  if (options.builtins !== false) {
5147
5159
  for (const gadget of builtinGadgets) {
5160
+ if (options.builtinInteraction === false && gadget.name === "AskUser") {
5161
+ continue;
5162
+ }
5148
5163
  registry.registerByClass(gadget);
5149
5164
  }
5150
5165
  }
@@ -5161,16 +5176,28 @@ async function handleAgentCommand(promptArg, options, env) {
5161
5176
  let finishReason;
5162
5177
  let usage;
5163
5178
  let iterations = 0;
5164
- const estimateMessagesTokens = (messages) => {
5165
- const totalChars = messages.reduce((sum, m) => sum + (m.content?.length ?? 0), 0);
5166
- return Math.round(totalChars / FALLBACK_CHARS_PER_TOKEN);
5179
+ const countMessagesTokens = async (model, messages) => {
5180
+ try {
5181
+ return await client.countTokens(model, messages);
5182
+ } catch {
5183
+ const totalChars = messages.reduce((sum, m) => sum + (m.content?.length ?? 0), 0);
5184
+ return Math.round(totalChars / FALLBACK_CHARS_PER_TOKEN);
5185
+ }
5167
5186
  };
5168
5187
  const builder = new AgentBuilder(client).withModel(options.model).withLogger(env.createLogger("llmist:cli:agent")).withHooks({
5169
5188
  observers: {
5189
+ // onLLMCallStart: Start progress indicator for each LLM call
5190
+ // This showcases how to react to agent lifecycle events
5170
5191
  onLLMCallStart: async (context) => {
5171
- const estimate = estimateMessagesTokens(context.options.messages);
5172
- progress.startCall(context.options.model, estimate);
5192
+ const inputTokens = await countMessagesTokens(
5193
+ context.options.model,
5194
+ context.options.messages
5195
+ );
5196
+ progress.startCall(context.options.model, inputTokens);
5197
+ progress.setInputTokens(inputTokens, false);
5173
5198
  },
5199
+ // onStreamChunk: Real-time updates as LLM generates tokens
5200
+ // This enables responsive UIs that show progress during generation
5174
5201
  onStreamChunk: async (context) => {
5175
5202
  progress.update(context.accumulatedText.length);
5176
5203
  if (context.usage) {
@@ -5182,10 +5209,20 @@ async function handleAgentCommand(promptArg, options, env) {
5182
5209
  }
5183
5210
  }
5184
5211
  },
5212
+ // onLLMCallComplete: Finalize metrics after each LLM call
5213
+ // This is where you'd typically log metrics or update dashboards
5185
5214
  onLLMCallComplete: async (context) => {
5186
5215
  finishReason = context.finishReason;
5187
5216
  usage = context.usage;
5188
5217
  iterations = Math.max(iterations, context.iteration + 1);
5218
+ if (context.usage) {
5219
+ if (context.usage.inputTokens) {
5220
+ progress.setInputTokens(context.usage.inputTokens, false);
5221
+ }
5222
+ if (context.usage.outputTokens) {
5223
+ progress.setOutputTokens(context.usage.outputTokens, false);
5224
+ }
5225
+ }
5189
5226
  progress.endCall(context.usage);
5190
5227
  }
5191
5228
  }
@@ -5227,7 +5264,8 @@ async function handleAgentCommand(promptArg, options, env) {
5227
5264
  finishReason,
5228
5265
  usage,
5229
5266
  iterations,
5230
- cost: progress.getTotalCost()
5267
+ cost: progress.getTotalCost(),
5268
+ elapsedSeconds: progress.getTotalElapsedSeconds()
5231
5269
  });
5232
5270
  if (summary) {
5233
5271
  env.stderr.write(`${summary}
@@ -5254,7 +5292,7 @@ function registerAgentCommand(program, env) {
5254
5292
  OPTION_DESCRIPTIONS.parameterFormat,
5255
5293
  parseParameterFormat,
5256
5294
  DEFAULT_PARAMETER_FORMAT
5257
- ).option(OPTION_FLAGS.noBuiltins, OPTION_DESCRIPTIONS.noBuiltins).action(
5295
+ ).option(OPTION_FLAGS.noBuiltins, OPTION_DESCRIPTIONS.noBuiltins).option(OPTION_FLAGS.noBuiltinInteraction, OPTION_DESCRIPTIONS.noBuiltinInteraction).action(
5258
5296
  (prompt, options) => executeAction(() => handleAgentCommand(prompt, options, env), env)
5259
5297
  );
5260
5298
  }
@@ -5392,7 +5430,7 @@ function renderCompactTable(models, stream2) {
5392
5430
  );
5393
5431
  stream2.write(import_chalk3.default.dim("\u2500".repeat(idWidth + nameWidth + contextWidth + inputWidth + outputWidth + 8)) + "\n");
5394
5432
  for (const model of models) {
5395
- const contextFormatted = formatTokens(model.contextWindow);
5433
+ const contextFormatted = formatTokens2(model.contextWindow);
5396
5434
  const inputPrice = `$${model.pricing.input.toFixed(2)}`;
5397
5435
  const outputPrice = `$${model.pricing.output.toFixed(2)}`;
5398
5436
  stream2.write(
@@ -5411,9 +5449,9 @@ function renderVerboseTable(models, stream2) {
5411
5449
  stream2.write(import_chalk3.default.dim(" " + "\u2500".repeat(60)) + "\n");
5412
5450
  stream2.write(` ${import_chalk3.default.dim("Name:")} ${import_chalk3.default.white(model.displayName)}
5413
5451
  `);
5414
- stream2.write(` ${import_chalk3.default.dim("Context:")} ${import_chalk3.default.yellow(formatTokens(model.contextWindow))}
5452
+ stream2.write(` ${import_chalk3.default.dim("Context:")} ${import_chalk3.default.yellow(formatTokens2(model.contextWindow))}
5415
5453
  `);
5416
- stream2.write(` ${import_chalk3.default.dim("Max Output:")} ${import_chalk3.default.yellow(formatTokens(model.maxOutputTokens))}
5454
+ stream2.write(` ${import_chalk3.default.dim("Max Output:")} ${import_chalk3.default.yellow(formatTokens2(model.maxOutputTokens))}
5417
5455
  `);
5418
5456
  stream2.write(` ${import_chalk3.default.dim("Pricing:")} ${import_chalk3.default.cyan(`$${model.pricing.input.toFixed(2)} input`)} ${import_chalk3.default.dim("/")} ${import_chalk3.default.cyan(`$${model.pricing.output.toFixed(2)} output`)} ${import_chalk3.default.dim("(per 1M tokens)")}
5419
5457
  `);
@@ -5476,7 +5514,7 @@ function renderJSON(models, stream2) {
5476
5514
  };
5477
5515
  stream2.write(JSON.stringify(output, null, 2) + "\n");
5478
5516
  }
5479
- function formatTokens(count) {
5517
+ function formatTokens2(count) {
5480
5518
  if (count >= 1e6) {
5481
5519
  return `${(count / 1e6).toFixed(1)}M tokens`;
5482
5520
  } else if (count >= 1e3) {