npm - llmist - Versions diffs - 0.3.1 → 0.4.1 - Mend

llmist 0.3.1 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/dist/{chunk-VRTKJK2X.js → chunk-A4GRCCXF.js} +2 -2
package/dist/{chunk-VYBRYR2S.js → chunk-LQE7TKKW.js} +43 -38
package/dist/chunk-LQE7TKKW.js.map +1 -0
package/dist/{chunk-I55AV3WV.js → chunk-QVDGTUQN.js} +2 -2
package/dist/cli.cjs +186 -148
package/dist/cli.cjs.map +1 -1
package/dist/cli.js +146 -113
package/dist/cli.js.map +1 -1
package/dist/index.cjs +225 -37
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +227 -6
package/dist/index.d.ts +227 -6
package/dist/index.js +186 -3
package/dist/index.js.map +1 -1
package/dist/{mock-stream-C2sBQlvc.d.cts → mock-stream-C0vOqI3L.d.cts} +1 -1
package/dist/{mock-stream-C2sBQlvc.d.ts → mock-stream-C0vOqI3L.d.ts} +1 -1
package/dist/testing/index.cjs +42 -37
package/dist/testing/index.cjs.map +1 -1
package/dist/testing/index.d.cts +2 -2
package/dist/testing/index.d.ts +2 -2
package/dist/testing/index.js +2 -2
package/package.json +1 -1
package/dist/chunk-VYBRYR2S.js.map +0 -1
/package/dist/{chunk-VRTKJK2X.js.map → chunk-A4GRCCXF.js.map} +0 -0
/package/dist/{chunk-I55AV3WV.js.map → chunk-QVDGTUQN.js.map} +0 -0

package/dist/{chunk-I55AV3WV.js → chunk-QVDGTUQN.js} RENAMED Viewed

@@ -1,6 +1,6 @@
 import {
   BaseGadget
-} from "./chunk-VYBRYR2S.js";
+} from "./chunk-LQE7TKKW.js";
 // src/gadgets/create-gadget.ts
 function createGadget(config) {
@@ -19,4 +19,4 @@ function createGadget(config) {
 export {
   createGadget
 };
-//# sourceMappingURL=chunk-I55AV3WV.js.map
+//# sourceMappingURL=chunk-QVDGTUQN.js.map

package/dist/cli.cjs CHANGED Viewed

@@ -2745,10 +2745,11 @@ var init_gemini = __esm({
         return GEMINI_MODELS;
       }
       buildRequestPayload(options, descriptor, _spec, messages) {
-        const { systemInstruction, contents } = this.extractSystemAndContents(messages);
+        const contents = this.convertMessagesToContents(messages);
         const generationConfig = this.buildGenerationConfig(options);
         const config = {
-          ...systemInstruction ? { systemInstruction: systemInstruction.parts.map((p) => p.text).join("\n") } : {},
+          // Note: systemInstruction removed - it doesn't work with countTokens()
+          // System messages are now included in contents as user+model exchanges
           ...generationConfig ? { ...generationConfig } : {},
           // Explicitly disable function calling to prevent UNEXPECTED_TOOL_CALL errors
           toolConfig: {
@@ -2769,31 +2770,37 @@ var init_gemini = __esm({
         const streamResponse = await client.models.generateContentStream(payload);
         return streamResponse;
       }
-      extractSystemAndContents(messages) {
-        const firstSystemIndex = messages.findIndex((message) => message.role === "system");
-        if (firstSystemIndex === -1) {
-          return {
-            systemInstruction: null,
-            contents: this.mergeConsecutiveMessages(messages)
-          };
-        }
-        let systemBlockEnd = firstSystemIndex;
-        while (systemBlockEnd < messages.length && messages[systemBlockEnd].role === "system") {
-          systemBlockEnd++;
+      /**
+       * Convert LLM messages to Gemini contents format.
+       *
+       * For Gemini, we convert system messages to user+model exchanges instead of
+       * using systemInstruction, because:
+       * 1. systemInstruction doesn't work with countTokens() API
+       * 2. This approach gives perfect token counting accuracy (0% error)
+       * 3. The model receives and follows system instructions identically
+       *
+       * System message: "You are a helpful assistant"
+       * Becomes:
+       * - User: "You are a helpful assistant"
+       * - Model: "Understood."
+       */
+      convertMessagesToContents(messages) {
+        const expandedMessages = [];
+        for (const message of messages) {
+          if (message.role === "system") {
+            expandedMessages.push({
+              role: "user",
+              content: message.content
+            });
+            expandedMessages.push({
+              role: "assistant",
+              content: "Understood."
+            });
+          } else {
+            expandedMessages.push(message);
+          }
         }
-        const systemMessages = messages.slice(firstSystemIndex, systemBlockEnd);
-        const nonSystemMessages = [
-          ...messages.slice(0, firstSystemIndex),
-          ...messages.slice(systemBlockEnd)
-        ];
-        const systemInstruction = {
-          role: "system",
-          parts: systemMessages.map((message) => ({ text: message.content }))
-        };
-        return {
-          systemInstruction,
-          contents: this.mergeConsecutiveMessages(nonSystemMessages)
-        };
+        return this.mergeConsecutiveMessages(expandedMessages);
       }
       mergeConsecutiveMessages(messages) {
         if (messages.length === 0) {
@@ -2882,8 +2889,8 @@ var init_gemini = __esm({
        *
        * This method provides accurate token estimation for Gemini models by:
        * - Using the SDK's countTokens() method
-       * - Properly extracting and handling system instructions
-       * - Transforming messages to Gemini's expected format
+       * - Converting system messages to user+model exchanges (same as in generation)
+       * - This gives perfect token counting accuracy (0% error vs actual usage)
        *
        * @param messages - The messages to count tokens for
        * @param descriptor - Model descriptor containing the model name
@@ -2902,16 +2909,14 @@ var init_gemini = __esm({
        */
       async countTokens(messages, descriptor, _spec) {
         const client = this.client;
-        const { systemInstruction, contents } = this.extractSystemAndContents(messages);
-        const request = {
-          model: descriptor.name,
-          contents: this.convertContentsForNewSDK(contents)
-        };
-        if (systemInstruction) {
-          request.systemInstruction = systemInstruction.parts.map((p) => p.text).join("\n");
-        }
+        const contents = this.convertMessagesToContents(messages);
         try {
-          const response = await client.models.countTokens(request);
+          const response = await client.models.countTokens({
+            model: descriptor.name,
+            contents: this.convertContentsForNewSDK(contents)
+            // Note: systemInstruction not used - it's not supported by countTokens()
+            // and would cause a 2100% token counting error
+          });
           return response.totalTokens ?? 0;
         } catch (error) {
           console.warn(
@@ -4281,7 +4286,8 @@ var OPTION_FLAGS = {
   parameterFormat: "--parameter-format <format>",
   logLevel: "--log-level <level>",
   logFile: "--log-file <path>",
-  noBuiltins: "--no-builtins"
+  noBuiltins: "--no-builtins",
+  noBuiltinInteraction: "--no-builtin-interaction"
 };
 var OPTION_DESCRIPTIONS = {
   model: "Model identifier, e.g. openai:gpt-5-nano or anthropic:claude-sonnet-4-5.",
@@ -4293,7 +4299,8 @@ var OPTION_DESCRIPTIONS = {
   parameterFormat: "Format for gadget parameter schemas: 'json', 'yaml', or 'auto'.",
   logLevel: "Log level: silly, trace, debug, info, warn, error, fatal.",
   logFile: "Path to log file. When set, logs are written to file instead of stderr.",
-  noBuiltins: "Disable built-in gadgets (AskUser, TellUser)."
+  noBuiltins: "Disable built-in gadgets (AskUser, TellUser).",
+  noBuiltinInteraction: "Disable interactive gadgets (AskUser) while keeping TellUser."
 };
 var SUMMARY_PREFIX = "[llmist]";
@@ -4303,7 +4310,7 @@ var import_commander3 = require("commander");
 // package.json
 var package_default = {
   name: "llmist",
-  version: "0.3.0",
+  version: "0.4.0",
   description: "Universal TypeScript LLM client with streaming-first agent framework. Works with any model - no structured outputs or native tool calling required. Implements its own flexible grammar for function calling.",
   type: "module",
   main: "dist/index.cjs",
@@ -4412,7 +4419,6 @@ var package_default = {
 // src/cli/agent-command.ts
 var import_promises = require("readline/promises");
-var import_chalk2 = __toESM(require("chalk"), 1);
 var import_commander2 = require("commander");
 init_builder();
 init_registry();
@@ -4706,9 +4712,67 @@ async function loadGadgets(specifiers, cwd, importer = (specifier) => import(spe
 }
 // src/cli/utils.ts
-var import_chalk = __toESM(require("chalk"), 1);
+var import_chalk2 = __toESM(require("chalk"), 1);
 var import_commander = require("commander");
 init_constants2();
+// src/cli/ui/formatters.ts
+var import_chalk = __toESM(require("chalk"), 1);
+function formatTokens(tokens) {
+  return tokens >= 1e3 ? `${(tokens / 1e3).toFixed(1)}k` : `${tokens}`;
+}
+function formatCost(cost) {
+  if (cost < 1e-3) {
+    return cost.toFixed(5);
+  }
+  if (cost < 0.01) {
+    return cost.toFixed(4);
+  }
+  if (cost < 1) {
+    return cost.toFixed(3);
+  }
+  return cost.toFixed(2);
+}
+function renderSummary(metadata) {
+  const parts = [];
+  if (metadata.iterations !== void 0) {
+    parts.push(import_chalk.default.cyan(`#${metadata.iterations}`));
+  }
+  if (metadata.usage) {
+    const { inputTokens, outputTokens } = metadata.usage;
+    parts.push(import_chalk.default.dim("\u2191") + import_chalk.default.yellow(` ${formatTokens(inputTokens)}`));
+    parts.push(import_chalk.default.dim("\u2193") + import_chalk.default.green(` ${formatTokens(outputTokens)}`));
+  }
+  if (metadata.elapsedSeconds !== void 0 && metadata.elapsedSeconds > 0) {
+    parts.push(import_chalk.default.dim(`${metadata.elapsedSeconds}s`));
+  }
+  if (metadata.cost !== void 0 && metadata.cost > 0) {
+    parts.push(import_chalk.default.cyan(`$${formatCost(metadata.cost)}`));
+  }
+  if (metadata.finishReason) {
+    parts.push(import_chalk.default.dim(metadata.finishReason));
+  }
+  if (parts.length === 0) {
+    return null;
+  }
+  return parts.join(import_chalk.default.dim(" | "));
+}
+function formatGadgetSummary(result) {
+  const gadgetLabel = import_chalk.default.magenta.bold(result.gadgetName);
+  const timeLabel = import_chalk.default.dim(`${Math.round(result.executionTimeMs)}ms`);
+  if (result.error) {
+    return `${import_chalk.default.red("\u2717")} ${gadgetLabel} ${import_chalk.default.red("error:")} ${result.error} ${timeLabel}`;
+  }
+  if (result.breaksLoop) {
+    return `${import_chalk.default.yellow("\u23F9")} ${gadgetLabel} ${import_chalk.default.yellow("finished:")} ${result.result} ${timeLabel}`;
+  }
+  const maxLen = 80;
+  const shouldTruncate = result.gadgetName !== "TellUser";
+  const resultText = result.result ? shouldTruncate && result.result.length > maxLen ? `${result.result.slice(0, maxLen)}...` : result.result : "";
+  return `${import_chalk.default.green("\u2713")} ${gadgetLabel} ${import_chalk.default.dim("\u2192")} ${resultText} ${timeLabel}`;
+}
+// src/cli/utils.ts
 function createNumericParser({
   label,
   integer = false,
@@ -4791,15 +4855,20 @@ var StreamProgress = class {
   totalTokens = 0;
   totalCost = 0;
   iterations = 0;
+  currentIteration = 0;
   /**
    * Starts a new LLM call. Switches to streaming mode.
    * @param model - Model name being used
-   * @param estimatedInputTokens - Estimated input tokens based on prompt length
+   * @param estimatedInputTokens - Initial input token count. Should come from
+   *   client.countTokens() for accuracy (provider-specific counting), not
+   *   character-based estimation. Will be updated with provider-returned counts
+   *   via setInputTokens() during streaming if available.
    */
   startCall(model, estimatedInputTokens) {
     this.mode = "streaming";
     this.model = model;
     this.callStartTime = Date.now();
+    this.currentIteration++;
     this.callInputTokens = estimatedInputTokens ?? 0;
     this.callInputTokensEstimated = true;
     this.callOutputTokens = 0;
@@ -4836,8 +4905,10 @@ var StreamProgress = class {
   }
   /**
    * Sets the input token count for current call (from stream metadata).
-   * @param tokens - Token count
-   * @param estimated - If true, shown with ~ prefix until actual count arrives
+   * @param tokens - Token count from provider or client.countTokens()
+   * @param estimated - If true, this is a fallback estimate (character-based).
+   *   If false, this is an accurate count from the provider API or client.countTokens().
+   *   Display shows ~ prefix only when estimated=true.
    */
   setInputTokens(tokens, estimated = false) {
     if (estimated && !this.callInputTokensEstimated) {
@@ -4848,8 +4919,10 @@ var StreamProgress = class {
   }
   /**
    * Sets the output token count for current call (from stream metadata).
-   * @param tokens - Token count
-   * @param estimated - If true, shown with ~ prefix until actual count arrives
+   * @param tokens - Token count from provider streaming response
+   * @param estimated - If true, this is a fallback estimate (character-based).
+   *   If false, this is an accurate count from the provider's streaming metadata.
+   *   Display shows ~ prefix only when estimated=true.
    */
   setOutputTokens(tokens, estimated = false) {
     if (estimated && !this.callOutputTokensEstimated) {
@@ -4858,6 +4931,14 @@ var StreamProgress = class {
     this.callOutputTokens = tokens;
     this.callOutputTokensEstimated = estimated;
   }
+  /**
+   * Get total elapsed time in seconds since the first call started.
+   * @returns Elapsed time in seconds with 1 decimal place
+   */
+  getTotalElapsedSeconds() {
+    if (this.totalStartTime === 0) return 0;
+    return Number(((Date.now() - this.totalStartTime) / 1e3).toFixed(1));
+  }
   /**
    * Starts the progress indicator animation after a brief delay.
    */
@@ -4892,40 +4973,38 @@ var StreamProgress = class {
     const elapsed = ((Date.now() - this.callStartTime) / 1e3).toFixed(1);
     const outTokens = this.callOutputTokensEstimated ? Math.round(this.callOutputChars / FALLBACK_CHARS_PER_TOKEN) : this.callOutputTokens;
     const parts = [];
-    if (this.model) {
-      parts.push(import_chalk.default.cyan(this.model));
-    }
+    parts.push(import_chalk2.default.cyan(`#${this.currentIteration}`));
     if (this.callInputTokens > 0) {
       const prefix = this.callInputTokensEstimated ? "~" : "";
-      parts.push(import_chalk.default.dim("out:") + import_chalk.default.yellow(` ${prefix}${this.callInputTokens}`));
+      parts.push(import_chalk2.default.dim("\u2191") + import_chalk2.default.yellow(` ${prefix}${formatTokens(this.callInputTokens)}`));
     }
     if (this.isStreaming || outTokens > 0) {
       const prefix = this.callOutputTokensEstimated ? "~" : "";
-      parts.push(import_chalk.default.dim("in:") + import_chalk.default.green(` ${prefix}${outTokens}`));
+      parts.push(import_chalk2.default.dim("\u2193") + import_chalk2.default.green(` ${prefix}${formatTokens(outTokens)}`));
     }
+    parts.push(import_chalk2.default.dim(`${elapsed}s`));
     if (this.totalCost > 0) {
-      parts.push(import_chalk.default.dim("cost:") + import_chalk.default.cyan(` $${this.formatCost(this.totalCost)}`));
+      parts.push(import_chalk2.default.cyan(`$${formatCost(this.totalCost)}`));
     }
-    parts.push(import_chalk.default.dim(`${elapsed}s`));
-    this.target.write(`\r${import_chalk.default.cyan(spinner)} ${parts.join(import_chalk.default.dim(" | "))}`);
+    this.target.write(`\r${import_chalk2.default.cyan(spinner)} ${parts.join(import_chalk2.default.dim(" | "))}`);
   }
   renderCumulativeMode(spinner) {
     const elapsed = ((Date.now() - this.totalStartTime) / 1e3).toFixed(1);
     const parts = [];
     if (this.model) {
-      parts.push(import_chalk.default.cyan(this.model));
+      parts.push(import_chalk2.default.cyan(this.model));
     }
     if (this.totalTokens > 0) {
-      parts.push(import_chalk.default.dim("total:") + import_chalk.default.magenta(` ${this.totalTokens}`));
+      parts.push(import_chalk2.default.dim("total:") + import_chalk2.default.magenta(` ${this.totalTokens}`));
     }
     if (this.iterations > 0) {
-      parts.push(import_chalk.default.dim("iter:") + import_chalk.default.blue(` ${this.iterations}`));
+      parts.push(import_chalk2.default.dim("iter:") + import_chalk2.default.blue(` ${this.iterations}`));
     }
     if (this.totalCost > 0) {
-      parts.push(import_chalk.default.dim("cost:") + import_chalk.default.cyan(` $${this.formatCost(this.totalCost)}`));
+      parts.push(import_chalk2.default.dim("cost:") + import_chalk2.default.cyan(` $${formatCost(this.totalCost)}`));
     }
-    parts.push(import_chalk.default.dim(`${elapsed}s`));
-    this.target.write(`\r${import_chalk.default.cyan(spinner)} ${parts.join(import_chalk.default.dim(" | "))}`);
+    parts.push(import_chalk2.default.dim(`${elapsed}s`));
+    this.target.write(`\r${import_chalk2.default.cyan(spinner)} ${parts.join(import_chalk2.default.dim(" | "))}`);
   }
   /**
    * Pauses the progress indicator and clears the line.
@@ -4973,49 +5052,28 @@ var StreamProgress = class {
       if (this.callInputTokens > 0) {
         const prefix = this.callInputTokensEstimated ? "~" : "";
         parts.push(
-          import_chalk.default.dim("out:") + import_chalk.default.yellow(` ${prefix}${this.formatTokens(this.callInputTokens)}`)
+          import_chalk2.default.dim("\u2191") + import_chalk2.default.yellow(` ${prefix}${formatTokens(this.callInputTokens)}`)
         );
       }
       if (outTokens > 0) {
         const prefix = outEstimated ? "~" : "";
-        parts.push(import_chalk.default.dim("in:") + import_chalk.default.green(` ${prefix}${this.formatTokens(outTokens)}`));
+        parts.push(import_chalk2.default.dim("\u2193") + import_chalk2.default.green(` ${prefix}${formatTokens(outTokens)}`));
       }
-      parts.push(import_chalk.default.dim(`${elapsed}s`));
+      parts.push(import_chalk2.default.dim(`${elapsed}s`));
     } else {
       const elapsed = Math.round((Date.now() - this.totalStartTime) / 1e3);
       if (this.totalTokens > 0) {
-        parts.push(import_chalk.default.magenta(this.formatTokens(this.totalTokens)));
+        parts.push(import_chalk2.default.magenta(formatTokens(this.totalTokens)));
       }
       if (this.iterations > 0) {
-        parts.push(import_chalk.default.blue(`i${this.iterations}`));
+        parts.push(import_chalk2.default.blue(`i${this.iterations}`));
       }
       if (this.totalCost > 0) {
-        parts.push(import_chalk.default.cyan(`$${this.formatCost(this.totalCost)}`));
+        parts.push(import_chalk2.default.cyan(`$${formatCost(this.totalCost)}`));
       }
-      parts.push(import_chalk.default.dim(`${elapsed}s`));
+      parts.push(import_chalk2.default.dim(`${elapsed}s`));
     }
-    return `${parts.join(import_chalk.default.dim(" \u2502 "))} ${import_chalk.default.green(">")} `;
-  }
-  /**
-   * Formats token count compactly (3625 -> "3.6k").
-   */
-  formatTokens(tokens) {
-    return tokens >= 1e3 ? `${(tokens / 1e3).toFixed(1)}k` : `${tokens}`;
-  }
-  /**
-   * Formats cost compactly (0.0001234 -> "0.00012", 0.1234 -> "0.12", 1.234 -> "1.23").
-   */
-  formatCost(cost) {
-    if (cost < 1e-3) {
-      return cost.toFixed(5);
-    }
-    if (cost < 0.01) {
-      return cost.toFixed(4);
-    }
-    if (cost < 1) {
-      return cost.toFixed(3);
-    }
-    return cost.toFixed(2);
+    return `${parts.join(import_chalk2.default.dim(" | "))} ${import_chalk2.default.green(">")} `;
   }
 };
 async function readStream(stream2) {
@@ -5045,44 +5103,12 @@ async function resolvePrompt(promptArg, env) {
   }
   return pipedInput;
 }
-function renderSummary(metadata) {
-  const parts = [];
-  if (metadata.iterations !== void 0) {
-    parts.push(import_chalk.default.dim(`iterations: ${metadata.iterations}`));
-  }
-  if (metadata.finishReason) {
-    parts.push(import_chalk.default.dim(`finish: ${metadata.finishReason}`));
-  }
-  if (metadata.usage) {
-    const { inputTokens, outputTokens, totalTokens } = metadata.usage;
-    parts.push(
-      import_chalk.default.dim(`tokens: `) + import_chalk.default.cyan(`${totalTokens}`) + import_chalk.default.dim(` (in: ${inputTokens}, out: ${outputTokens})`)
-    );
-  }
-  if (metadata.cost !== void 0 && metadata.cost > 0) {
-    let formattedCost;
-    if (metadata.cost < 1e-3) {
-      formattedCost = metadata.cost.toFixed(5);
-    } else if (metadata.cost < 0.01) {
-      formattedCost = metadata.cost.toFixed(4);
-    } else if (metadata.cost < 1) {
-      formattedCost = metadata.cost.toFixed(3);
-    } else {
-      formattedCost = metadata.cost.toFixed(2);
-    }
-    parts.push(import_chalk.default.dim(`cost: `) + import_chalk.default.cyan(`$${formattedCost}`));
-  }
-  if (parts.length === 0) {
-    return null;
-  }
-  return parts.join(import_chalk.default.dim(" \u2502 "));
-}
 async function executeAction(action, env) {
   try {
     await action();
   } catch (error) {
     const message = error instanceof Error ? error.message : String(error);
-    env.stderr.write(`${import_chalk.default.red.bold("Error:")} ${message}
+    env.stderr.write(`${import_chalk2.default.red.bold("Error:")} ${message}
 `);
     env.setExitCode(1);
   }
@@ -5125,26 +5151,15 @@ ${statsPrompt}` : statsPrompt;
     }
   };
 }
-function formatGadgetSummary(result) {
-  const gadgetLabel = import_chalk2.default.magenta.bold(result.gadgetName);
-  const timeLabel = import_chalk2.default.dim(`${Math.round(result.executionTimeMs)}ms`);
-  if (result.error) {
-    return `${import_chalk2.default.red("\u2717")} ${gadgetLabel} ${import_chalk2.default.red("error:")} ${result.error} ${timeLabel}`;
-  }
-  if (result.breaksLoop) {
-    return `${import_chalk2.default.yellow("\u23F9")} ${gadgetLabel} ${import_chalk2.default.yellow("finished:")} ${result.result} ${timeLabel}`;
-  }
-  const maxLen = 80;
-  const shouldTruncate = result.gadgetName !== "TellUser";
-  const resultText = result.result ? shouldTruncate && result.result.length > maxLen ? `${result.result.slice(0, maxLen)}...` : result.result : "";
-  return `${import_chalk2.default.green("\u2713")} ${gadgetLabel} ${import_chalk2.default.dim("\u2192")} ${resultText} ${timeLabel}`;
-}
 async function handleAgentCommand(promptArg, options, env) {
   const prompt = await resolvePrompt(promptArg, env);
   const client = env.createClient();
   const registry = new GadgetRegistry();
   if (options.builtins !== false) {
     for (const gadget of builtinGadgets) {
+      if (options.builtinInteraction === false && gadget.name === "AskUser") {
+        continue;
+      }
       registry.registerByClass(gadget);
     }
   }
@@ -5161,16 +5176,28 @@ async function handleAgentCommand(promptArg, options, env) {
   let finishReason;
   let usage;
   let iterations = 0;
-  const estimateMessagesTokens = (messages) => {
-    const totalChars = messages.reduce((sum, m) => sum + (m.content?.length ?? 0), 0);
-    return Math.round(totalChars / FALLBACK_CHARS_PER_TOKEN);
+  const countMessagesTokens = async (model, messages) => {
+    try {
+      return await client.countTokens(model, messages);
+    } catch {
+      const totalChars = messages.reduce((sum, m) => sum + (m.content?.length ?? 0), 0);
+      return Math.round(totalChars / FALLBACK_CHARS_PER_TOKEN);
+    }
   };
   const builder = new AgentBuilder(client).withModel(options.model).withLogger(env.createLogger("llmist:cli:agent")).withHooks({
     observers: {
+      // onLLMCallStart: Start progress indicator for each LLM call
+      // This showcases how to react to agent lifecycle events
       onLLMCallStart: async (context) => {
-        const estimate = estimateMessagesTokens(context.options.messages);
-        progress.startCall(context.options.model, estimate);
+        const inputTokens = await countMessagesTokens(
+          context.options.model,
+          context.options.messages
+        );
+        progress.startCall(context.options.model, inputTokens);
+        progress.setInputTokens(inputTokens, false);
       },
+      // onStreamChunk: Real-time updates as LLM generates tokens
+      // This enables responsive UIs that show progress during generation
       onStreamChunk: async (context) => {
         progress.update(context.accumulatedText.length);
         if (context.usage) {
@@ -5182,10 +5209,20 @@ async function handleAgentCommand(promptArg, options, env) {
           }
         }
       },
+      // onLLMCallComplete: Finalize metrics after each LLM call
+      // This is where you'd typically log metrics or update dashboards
       onLLMCallComplete: async (context) => {
         finishReason = context.finishReason;
         usage = context.usage;
         iterations = Math.max(iterations, context.iteration + 1);
+        if (context.usage) {
+          if (context.usage.inputTokens) {
+            progress.setInputTokens(context.usage.inputTokens, false);
+          }
+          if (context.usage.outputTokens) {
+            progress.setOutputTokens(context.usage.outputTokens, false);
+          }
+        }
         progress.endCall(context.usage);
       }
     }
@@ -5227,7 +5264,8 @@ async function handleAgentCommand(promptArg, options, env) {
       finishReason,
       usage,
       iterations,
-      cost: progress.getTotalCost()
+      cost: progress.getTotalCost(),
+      elapsedSeconds: progress.getTotalElapsedSeconds()
     });
     if (summary) {
       env.stderr.write(`${summary}
@@ -5254,7 +5292,7 @@ function registerAgentCommand(program, env) {
     OPTION_DESCRIPTIONS.parameterFormat,
     parseParameterFormat,
     DEFAULT_PARAMETER_FORMAT
-  ).option(OPTION_FLAGS.noBuiltins, OPTION_DESCRIPTIONS.noBuiltins).action(
+  ).option(OPTION_FLAGS.noBuiltins, OPTION_DESCRIPTIONS.noBuiltins).option(OPTION_FLAGS.noBuiltinInteraction, OPTION_DESCRIPTIONS.noBuiltinInteraction).action(
     (prompt, options) => executeAction(() => handleAgentCommand(prompt, options, env), env)
   );
 }
@@ -5392,7 +5430,7 @@ function renderCompactTable(models, stream2) {
   );
   stream2.write(import_chalk3.default.dim("\u2500".repeat(idWidth + nameWidth + contextWidth + inputWidth + outputWidth + 8)) + "\n");
   for (const model of models) {
-    const contextFormatted = formatTokens(model.contextWindow);
+    const contextFormatted = formatTokens2(model.contextWindow);
     const inputPrice = `$${model.pricing.input.toFixed(2)}`;
     const outputPrice = `$${model.pricing.output.toFixed(2)}`;
     stream2.write(
@@ -5411,9 +5449,9 @@ function renderVerboseTable(models, stream2) {
     stream2.write(import_chalk3.default.dim("  " + "\u2500".repeat(60)) + "\n");
     stream2.write(`  ${import_chalk3.default.dim("Name:")}         ${import_chalk3.default.white(model.displayName)}
 `);
-    stream2.write(`  ${import_chalk3.default.dim("Context:")}      ${import_chalk3.default.yellow(formatTokens(model.contextWindow))}
+    stream2.write(`  ${import_chalk3.default.dim("Context:")}      ${import_chalk3.default.yellow(formatTokens2(model.contextWindow))}
 `);
-    stream2.write(`  ${import_chalk3.default.dim("Max Output:")}   ${import_chalk3.default.yellow(formatTokens(model.maxOutputTokens))}
+    stream2.write(`  ${import_chalk3.default.dim("Max Output:")}   ${import_chalk3.default.yellow(formatTokens2(model.maxOutputTokens))}
 `);
     stream2.write(`  ${import_chalk3.default.dim("Pricing:")}      ${import_chalk3.default.cyan(`$${model.pricing.input.toFixed(2)} input`)} ${import_chalk3.default.dim("/")} ${import_chalk3.default.cyan(`$${model.pricing.output.toFixed(2)} output`)} ${import_chalk3.default.dim("(per 1M tokens)")}
 `);
@@ -5476,7 +5514,7 @@ function renderJSON(models, stream2) {
   };
   stream2.write(JSON.stringify(output, null, 2) + "\n");
 }
-function formatTokens(count) {
+function formatTokens2(count) {
   if (count >= 1e6) {
     return `${(count / 1e6).toFixed(1)}M tokens`;
   } else if (count >= 1e3) {