npm - llmist - Versions diffs - 0.6.2 → 0.8.0 - Mend

llmist 0.6.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/dist/{chunk-TSR25DAY.js → chunk-4IMGADVY.js} +2 -2
package/dist/{chunk-DVK6ZQOV.js → chunk-62M4TDAK.js} +501 -78
package/dist/chunk-62M4TDAK.js.map +1 -0
package/dist/cli.cjs +946 -197
package/dist/cli.cjs.map +1 -1
package/dist/cli.js +436 -110
package/dist/cli.js.map +1 -1
package/dist/index.cjs +511 -88
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +6 -9
package/dist/index.d.ts +6 -9
package/dist/index.js +2 -2
package/dist/{mock-stream-B5R6XPif.d.cts → mock-stream-CjmvWDc3.d.cts} +91 -20
package/dist/{mock-stream-B5R6XPif.d.ts → mock-stream-CjmvWDc3.d.ts} +91 -20
package/dist/testing/index.cjs +497 -74
package/dist/testing/index.cjs.map +1 -1
package/dist/testing/index.d.cts +2 -2
package/dist/testing/index.d.ts +2 -2
package/dist/testing/index.js +2 -2
package/package.json +2 -1
package/dist/chunk-DVK6ZQOV.js.map +0 -1
/package/dist/{chunk-TSR25DAY.js.map → chunk-4IMGADVY.js.map} +0 -0

package/dist/cli.cjs CHANGED Viewed

@@ -31,6 +31,20 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
 ));
 var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
+// src/core/constants.ts
+var GADGET_START_PREFIX, GADGET_END_PREFIX, DEFAULT_GADGET_OUTPUT_LIMIT, DEFAULT_GADGET_OUTPUT_LIMIT_PERCENT, CHARS_PER_TOKEN, FALLBACK_CONTEXT_WINDOW;
+var init_constants = __esm({
+  "src/core/constants.ts"() {
+    "use strict";
+    GADGET_START_PREFIX = "!!!GADGET_START:";
+    GADGET_END_PREFIX = "!!!GADGET_END";
+    DEFAULT_GADGET_OUTPUT_LIMIT = true;
+    DEFAULT_GADGET_OUTPUT_LIMIT_PERCENT = 15;
+    CHARS_PER_TOKEN = 4;
+    FALLBACK_CONTEXT_WINDOW = 128e3;
+  }
+});
 // src/core/model-shortcuts.ts
 function isKnownModelPattern(model) {
   const normalized = model.toLowerCase();
@@ -328,20 +342,6 @@ var init_registry = __esm({
   }
 });
-// src/core/constants.ts
-var GADGET_START_PREFIX, GADGET_END_PREFIX, DEFAULT_GADGET_OUTPUT_LIMIT, DEFAULT_GADGET_OUTPUT_LIMIT_PERCENT, CHARS_PER_TOKEN, FALLBACK_CONTEXT_WINDOW;
-var init_constants = __esm({
-  "src/core/constants.ts"() {
-    "use strict";
-    GADGET_START_PREFIX = "!!!GADGET_START:";
-    GADGET_END_PREFIX = "!!!GADGET_END";
-    DEFAULT_GADGET_OUTPUT_LIMIT = true;
-    DEFAULT_GADGET_OUTPUT_LIMIT_PERCENT = 15;
-    CHARS_PER_TOKEN = 4;
-    FALLBACK_CONTEXT_WINDOW = 128e3;
-  }
-});
 // src/core/prompt-config.ts
 function resolvePromptTemplate(template, defaultValue, context) {
   const resolved = template ?? defaultValue;
@@ -865,7 +865,7 @@ function findSafeDelimiter(content) {
   }
   let counter = 1;
   while (counter < 1e3) {
-    const delimiter = `HEREDOC_${counter}`;
+    const delimiter = `__GADGET_PARAM_${counter}__`;
     const regex = new RegExp(`^${delimiter}\\s*$`);
     const isUsed = lines.some((line) => regex.test(line));
     if (!isUsed) {
@@ -923,6 +923,10 @@ function formatParamsAsYaml(params) {
   }
   return lines.join("\n");
 }
+function formatTomlInlineTable(obj) {
+  const entries = Object.entries(obj).map(([k, v]) => `${k} = ${formatTomlValue(v)}`);
+  return `{ ${entries.join(", ")} }`;
+}
 function formatTomlValue(value) {
   if (typeof value === "string") {
     if (value.includes("\n")) {
@@ -940,10 +944,17 @@ ${delimiter}`;
     return '""';
   }
   if (Array.isArray(value)) {
-    return JSON.stringify(value);
+    if (value.length === 0) return "[]";
+    const items = value.map((item) => {
+      if (typeof item === "object" && item !== null && !Array.isArray(item)) {
+        return formatTomlInlineTable(item);
+      }
+      return formatTomlValue(item);
+    });
+    return `[${items.join(", ")}]`;
   }
   if (typeof value === "object") {
-    return JSON.stringify(value);
+    return formatTomlInlineTable(value);
   }
   return JSON.stringify(value);
 }
@@ -961,7 +972,16 @@ var init_gadget = __esm({
     yaml = __toESM(require("js-yaml"), 1);
     init_schema_to_json();
     init_schema_validator();
-    HEREDOC_DELIMITERS = ["EOF", "END", "DOC", "CONTENT", "TEXT", "HEREDOC", "DATA", "BLOCK"];
+    HEREDOC_DELIMITERS = [
+      "__GADGET_PARAM_EOF__",
+      "__GADGET_PARAM_END__",
+      "__GADGET_PARAM_DOC__",
+      "__GADGET_PARAM_CONTENT__",
+      "__GADGET_PARAM_TEXT__",
+      "__GADGET_PARAM_HEREDOC__",
+      "__GADGET_PARAM_DATA__",
+      "__GADGET_PARAM_BLOCK__"
+    ];
     BaseGadget = class {
       /**
        * The name of the gadget. Used for identification when LLM calls it.
@@ -1959,6 +1979,14 @@ function preprocessTomlHeredoc(tomlStr) {
   }
   return result.join("\n");
 }
+function stripMarkdownFences(content) {
+  let cleaned = content.trim();
+  const openingFence = /^```(?:toml|yaml|json)?\s*\n/i;
+  const closingFence = /\n?```\s*$/;
+  cleaned = cleaned.replace(openingFence, "");
+  cleaned = cleaned.replace(closingFence, "");
+  return cleaned.trim();
+}
 var yaml2, import_js_toml, globalInvocationCounter, StreamParser;
 var init_parser = __esm({
   "src/gadgets/parser.ts"() {
@@ -2014,35 +2042,36 @@ var init_parser = __esm({
        * Parse parameter string according to configured format
        */
       parseParameters(raw) {
+        const cleaned = stripMarkdownFences(raw);
         if (this.parameterFormat === "json") {
           try {
-            return { parameters: JSON.parse(raw) };
+            return { parameters: JSON.parse(cleaned) };
           } catch (error) {
             return { parseError: this.truncateParseError(error, "JSON") };
           }
         }
         if (this.parameterFormat === "yaml") {
           try {
-            return { parameters: yaml2.load(preprocessYaml(raw)) };
+            return { parameters: yaml2.load(preprocessYaml(cleaned)) };
           } catch (error) {
             return { parseError: this.truncateParseError(error, "YAML") };
           }
         }
         if (this.parameterFormat === "toml") {
           try {
-            return { parameters: (0, import_js_toml.load)(preprocessTomlHeredoc(raw)) };
+            return { parameters: (0, import_js_toml.load)(preprocessTomlHeredoc(cleaned)) };
           } catch (error) {
             return { parseError: this.truncateParseError(error, "TOML") };
           }
         }
         try {
-          return { parameters: JSON.parse(raw) };
+          return { parameters: JSON.parse(cleaned) };
         } catch {
           try {
-            return { parameters: (0, import_js_toml.load)(preprocessTomlHeredoc(raw)) };
+            return { parameters: (0, import_js_toml.load)(preprocessTomlHeredoc(cleaned)) };
           } catch {
             try {
-              return { parameters: yaml2.load(preprocessYaml(raw)) };
+              return { parameters: yaml2.load(preprocessYaml(cleaned)) };
             } catch (error) {
               return { parseError: this.truncateParseError(error, "auto") };
             }
@@ -2588,6 +2617,7 @@ var init_agent = __esm({
       gadgetEndPrefix;
       onHumanInputRequired;
       textOnlyHandler;
+      textWithGadgetsHandler;
       stopOnGadgetError;
       shouldContinueAfterError;
       defaultGadgetTimeoutMs;
@@ -2618,6 +2648,7 @@ var init_agent = __esm({
         this.gadgetEndPrefix = options.gadgetEndPrefix;
         this.onHumanInputRequired = options.onHumanInputRequired;
         this.textOnlyHandler = options.textOnlyHandler ?? "terminate";
+        this.textWithGadgetsHandler = options.textWithGadgetsHandler;
         this.stopOnGadgetError = options.stopOnGadgetError ?? true;
         this.shouldContinueAfterError = options.shouldContinueAfterError;
         this.defaultGadgetTimeoutMs = options.defaultGadgetTimeoutMs;
@@ -2805,6 +2836,17 @@ var init_agent = __esm({
               }
             }
             if (result.didExecuteGadgets) {
+              if (this.textWithGadgetsHandler) {
+                const textContent = result.outputs.filter((output) => output.type === "text").map((output) => output.content).join("");
+                if (textContent.trim()) {
+                  const { gadgetName, parameterMapping, resultMapping } = this.textWithGadgetsHandler;
+                  this.conversation.addGadgetCall(
+                    gadgetName,
+                    parameterMapping(textContent),
+                    resultMapping ? resultMapping(textContent) : textContent
+                  );
+                }
+              }
               for (const output of result.outputs) {
                 if (output.type === "gadget_result") {
                   const gadgetResult = output.result;
@@ -2816,7 +2858,13 @@ var init_agent = __esm({
                 }
               }
             } else {
-              this.conversation.addAssistantMessage(finalMessage);
+              if (finalMessage.trim()) {
+                this.conversation.addGadgetCall(
+                  "TellUser",
+                  { message: finalMessage, done: false, type: "info" },
+                  `\u2139\uFE0F  ${finalMessage}`
+                );
+              }
               const shouldBreak = await this.handleTextOnlyResponse(finalMessage);
               if (shouldBreak) {
                 break;
@@ -3011,7 +3059,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 3,
           output: 15,
-          cachedInput: 0.3
+          cachedInput: 0.3,
+          cacheWriteInput: 3.75
         },
         knowledgeCutoff: "2025-01",
         features: {
@@ -3035,7 +3084,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 1,
           output: 5,
-          cachedInput: 0.1
+          cachedInput: 0.1,
+          cacheWriteInput: 1.25
         },
         knowledgeCutoff: "2025-02",
         features: {
@@ -3059,7 +3109,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 3,
           output: 15,
-          cachedInput: 0.3
+          cachedInput: 0.3,
+          cacheWriteInput: 3.75
         },
         knowledgeCutoff: "2025-03",
         features: {
@@ -3083,7 +3134,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 3,
           output: 15,
-          cachedInput: 0.3
+          cachedInput: 0.3,
+          cacheWriteInput: 3.75
         },
         knowledgeCutoff: "2024-11",
         features: {
@@ -3107,7 +3159,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 15,
           output: 75,
-          cachedInput: 1.5
+          cachedInput: 1.5,
+          cacheWriteInput: 18.75
         },
         knowledgeCutoff: "2025-01",
         features: {
@@ -3131,7 +3184,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 15,
           output: 75,
-          cachedInput: 1.5
+          cachedInput: 1.5,
+          cacheWriteInput: 18.75
         },
         knowledgeCutoff: "2025-03",
         features: {
@@ -3154,7 +3208,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 0.8,
           output: 4,
-          cachedInput: 0.08
+          cachedInput: 0.08,
+          cacheWriteInput: 1
         },
         knowledgeCutoff: "2024-07",
         features: {
@@ -3177,7 +3232,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 0.25,
           output: 1.25,
-          cachedInput: 0.025
+          cachedInput: 0.025,
+          cacheWriteInput: 0.3125
         },
         knowledgeCutoff: "2023-08",
         features: {
@@ -3201,7 +3257,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 1,
           output: 5,
-          cachedInput: 0.1
+          cachedInput: 0.1,
+          cacheWriteInput: 1.25
         },
         knowledgeCutoff: "2025-02",
         features: {
@@ -3225,7 +3282,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 3,
           output: 15,
-          cachedInput: 0.3
+          cachedInput: 0.3,
+          cacheWriteInput: 3.75
         },
         knowledgeCutoff: "2025-01",
         features: {
@@ -3249,7 +3307,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 5,
           output: 25,
-          cachedInput: 0.5
+          cachedInput: 0.5,
+          cacheWriteInput: 6.25
         },
         knowledgeCutoff: "2025-03",
         features: {
@@ -3364,15 +3423,27 @@ var init_anthropic = __esm({
       }
       buildRequestPayload(options, descriptor, spec, messages) {
         const systemMessages = messages.filter((message) => message.role === "system");
-        const system = systemMessages.length > 0 ? systemMessages.map((m) => m.content).join("\n\n") : void 0;
-        const conversation = messages.filter(
+        const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
+          type: "text",
+          text: m.content,
+          // Add cache_control to the LAST system message block
+          ...index === systemMessages.length - 1 ? { cache_control: { type: "ephemeral" } } : {}
+        })) : void 0;
+        const nonSystemMessages = messages.filter(
           (message) => message.role !== "system"
-        ).map((message) => ({
+        );
+        const lastUserIndex = nonSystemMessages.reduce(
+          (lastIdx, msg, idx) => msg.role === "user" ? idx : lastIdx,
+          -1
+        );
+        const conversation = nonSystemMessages.map((message, index) => ({
           role: message.role,
           content: [
             {
               type: "text",
-              text: message.content
+              text: message.content,
+              // Add cache_control to the LAST user message
+              ...message.role === "user" && index === lastUserIndex ? { cache_control: { type: "ephemeral" } } : {}
             }
           ]
         }));
@@ -3398,15 +3469,22 @@ var init_anthropic = __esm({
       async *wrapStream(iterable) {
         const stream2 = iterable;
         let inputTokens = 0;
+        let cachedInputTokens = 0;
+        let cacheCreationInputTokens = 0;
         for await (const event of stream2) {
           if (event.type === "message_start") {
-            inputTokens = event.message.usage.input_tokens;
+            const usage = event.message.usage;
+            cachedInputTokens = usage.cache_read_input_tokens ?? 0;
+            cacheCreationInputTokens = usage.cache_creation_input_tokens ?? 0;
+            inputTokens = usage.input_tokens + cachedInputTokens + cacheCreationInputTokens;
             yield {
               text: "",
               usage: {
                 inputTokens,
                 outputTokens: 0,
-                totalTokens: inputTokens
+                totalTokens: inputTokens,
+                cachedInputTokens,
+                cacheCreationInputTokens
               },
               rawEvent: event
             };
@@ -3420,7 +3498,9 @@ var init_anthropic = __esm({
             const usage = event.usage ? {
               inputTokens,
               outputTokens: event.usage.output_tokens,
-              totalTokens: inputTokens + event.usage.output_tokens
+              totalTokens: inputTokens + event.usage.output_tokens,
+              cachedInputTokens,
+              cacheCreationInputTokens
             } : void 0;
             if (event.delta.stop_reason || usage) {
               yield {
@@ -3501,6 +3581,7 @@ var init_gemini_models = __esm({
   "src/providers/gemini-models.ts"() {
     "use strict";
     GEMINI_MODELS = [
+      // Gemini 3 Pro (Preview)
       {
         provider: "gemini",
         modelId: "gemini-3-pro-preview",
@@ -3509,8 +3590,11 @@ var init_gemini_models = __esm({
         maxOutputTokens: 65536,
         pricing: {
           input: 2,
+          // $2.00 for prompts <= 200k, $4.00 for > 200k (using lower tier)
           output: 12,
+          // $12.00 for prompts <= 200k, $18.00 for > 200k
           cachedInput: 0.2
+          // $0.20 for prompts <= 200k
         },
         knowledgeCutoff: "2025-01",
         features: {
@@ -3523,9 +3607,10 @@ var init_gemini_models = __esm({
         metadata: {
           family: "Gemini 3",
           releaseDate: "2025-11-18",
-          notes: "Most advanced model. 1501 Elo LMArena, 91.9% GPQA Diamond, 76.2% SWE-bench. Deep Think mode available."
+          notes: "Best model for multimodal understanding, agentic and vibe-coding. Deep Think mode available."
         }
       },
+      // Gemini 2.5 Pro
       {
         provider: "gemini",
         modelId: "gemini-2.5-pro",
@@ -3534,8 +3619,11 @@ var init_gemini_models = __esm({
         maxOutputTokens: 65536,
         pricing: {
           input: 1.25,
+          // $1.25 for prompts <= 200k, $2.50 for > 200k
           output: 10,
+          // $10.00 for prompts <= 200k, $15.00 for > 200k
           cachedInput: 0.125
+          // $0.125 for prompts <= 200k
         },
         knowledgeCutoff: "2025-01",
         features: {
@@ -3548,9 +3636,10 @@ var init_gemini_models = __esm({
         metadata: {
           family: "Gemini 2.5",
           releaseDate: "2025-06",
-          notes: "Balanced multimodal model with 1M context. Best for complex agents and reasoning."
+          notes: "State-of-the-art multipurpose model. Excels at coding and complex reasoning."
         }
       },
+      // Gemini 2.5 Flash
       {
         provider: "gemini",
         modelId: "gemini-2.5-flash",
@@ -3559,8 +3648,10 @@ var init_gemini_models = __esm({
         maxOutputTokens: 65536,
         pricing: {
           input: 0.3,
+          // $0.30 for text/image/video, $1.00 for audio
           output: 2.5,
           cachedInput: 0.03
+          // $0.03 for text/image/video
         },
         knowledgeCutoff: "2025-01",
         features: {
@@ -3573,9 +3664,10 @@ var init_gemini_models = __esm({
         metadata: {
           family: "Gemini 2.5",
           releaseDate: "2025-06",
-          notes: "Best price-performance ratio with thinking enabled by default"
+          notes: "First hybrid reasoning model with 1M context and thinking budgets."
         }
       },
+      // Gemini 2.5 Flash-Lite
       {
         provider: "gemini",
         modelId: "gemini-2.5-flash-lite",
@@ -3584,8 +3676,10 @@ var init_gemini_models = __esm({
         maxOutputTokens: 65536,
         pricing: {
           input: 0.1,
+          // $0.10 for text/image/video, $0.30 for audio
           output: 0.4,
           cachedInput: 0.01
+          // $0.01 for text/image/video
         },
         knowledgeCutoff: "2025-01",
         features: {
@@ -3597,9 +3691,10 @@ var init_gemini_models = __esm({
         metadata: {
           family: "Gemini 2.5",
           releaseDate: "2025-06",
-          notes: "Fastest and most cost-efficient model for high-volume, low-latency tasks"
+          notes: "Smallest and most cost effective model, built for at scale usage."
         }
       },
+      // Gemini 2.0 Flash
       {
         provider: "gemini",
         modelId: "gemini-2.0-flash",
@@ -3608,8 +3703,10 @@ var init_gemini_models = __esm({
         maxOutputTokens: 8192,
         pricing: {
           input: 0.1,
+          // $0.10 for text/image/video, $0.70 for audio
           output: 0.4,
-          cachedInput: 0.01
+          cachedInput: 0.025
+          // $0.025 for text/image/video
         },
         knowledgeCutoff: "2024-08",
         features: {
@@ -3620,9 +3717,10 @@ var init_gemini_models = __esm({
         },
         metadata: {
           family: "Gemini 2.0",
-          notes: "Previous generation with 1M context and multimodal capabilities"
+          notes: "Balanced multimodal model with 1M context, built for the era of Agents."
         }
       },
+      // Gemini 2.0 Flash-Lite
       {
         provider: "gemini",
         modelId: "gemini-2.0-flash-lite",
@@ -3631,8 +3729,8 @@ var init_gemini_models = __esm({
         maxOutputTokens: 8192,
         pricing: {
           input: 0.075,
-          output: 0.3,
-          cachedInput: 75e-4
+          output: 0.3
+          // No context caching available for 2.0-flash-lite
         },
         knowledgeCutoff: "2024-08",
         features: {
@@ -3643,7 +3741,7 @@ var init_gemini_models = __esm({
         },
         metadata: {
           family: "Gemini 2.0",
-          notes: "Lightweight previous generation model for cost-sensitive applications"
+          notes: "Smallest and most cost effective 2.0 model for at scale usage."
         }
       }
     ];
@@ -3813,7 +3911,9 @@ var init_gemini = __esm({
         return {
           inputTokens: usageMetadata.promptTokenCount ?? 0,
           outputTokens: usageMetadata.candidatesTokenCount ?? 0,
-          totalTokens: usageMetadata.totalTokenCount ?? 0
+          totalTokens: usageMetadata.totalTokenCount ?? 0,
+          // Gemini returns cached token count in cachedContentTokenCount
+          cachedInputTokens: usageMetadata.cachedContentTokenCount ?? 0
         };
       }
       /**
@@ -3869,10 +3969,11 @@ var init_openai_models = __esm({
   "src/providers/openai-models.ts"() {
     "use strict";
     OPENAI_MODELS = [
+      // GPT-5 Family
       {
         provider: "openai",
         modelId: "gpt-5.1",
-        displayName: "GPT-5.1 Instant",
+        displayName: "GPT-5.1",
         contextWindow: 128e3,
         maxOutputTokens: 32768,
         pricing: {
@@ -3892,34 +3993,7 @@ var init_openai_models = __esm({
         metadata: {
           family: "GPT-5",
           releaseDate: "2025-11-12",
-          notes: "Warmer, more intelligent, better instruction following. 2-3x faster than GPT-5.",
-          supportsTemperature: false
-        }
-      },
-      {
-        provider: "openai",
-        modelId: "gpt-5.1-thinking",
-        displayName: "GPT-5.1 Thinking",
-        contextWindow: 196e3,
-        maxOutputTokens: 32768,
-        pricing: {
-          input: 1.25,
-          output: 10,
-          cachedInput: 0.125
-        },
-        knowledgeCutoff: "2024-09-30",
-        features: {
-          streaming: true,
-          functionCalling: true,
-          vision: true,
-          reasoning: true,
-          structuredOutputs: true,
-          fineTuning: true
-        },
-        metadata: {
-          family: "GPT-5",
-          releaseDate: "2025-11-12",
-          notes: "Advanced reasoning with thinking levels: Light, Standard, Extended, Heavy. Best for complex tasks.",
+          notes: "Latest GPT-5 with improved instruction following. 2-3x faster than GPT-5.",
           supportsTemperature: false
         }
       },
@@ -3999,6 +4073,255 @@ var init_openai_models = __esm({
           notes: "Fastest, most cost-efficient version for well-defined tasks",
           supportsTemperature: false
         }
+      },
+      {
+        provider: "openai",
+        modelId: "gpt-5-pro",
+        displayName: "GPT-5 Pro",
+        contextWindow: 272e3,
+        maxOutputTokens: 128e3,
+        pricing: {
+          input: 15,
+          output: 120
+          // No cached input pricing for gpt-5-pro
+        },
+        knowledgeCutoff: "2024-09-30",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          reasoning: true,
+          structuredOutputs: true
+        },
+        metadata: {
+          family: "GPT-5",
+          notes: "Premium tier with enhanced capabilities. Does not support prompt caching.",
+          supportsTemperature: false
+        }
+      },
+      // GPT-4.1 Family
+      {
+        provider: "openai",
+        modelId: "gpt-4.1",
+        displayName: "GPT-4.1",
+        contextWindow: 128e3,
+        maxOutputTokens: 32768,
+        pricing: {
+          input: 2,
+          output: 8,
+          cachedInput: 0.5
+        },
+        knowledgeCutoff: "2024-04-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          structuredOutputs: true,
+          fineTuning: true
+        },
+        metadata: {
+          family: "GPT-4.1",
+          notes: "Improved GPT-4 with better instruction following"
+        }
+      },
+      {
+        provider: "openai",
+        modelId: "gpt-4.1-mini",
+        displayName: "GPT-4.1 Mini",
+        contextWindow: 128e3,
+        maxOutputTokens: 32768,
+        pricing: {
+          input: 0.4,
+          output: 1.6,
+          cachedInput: 0.1
+        },
+        knowledgeCutoff: "2024-04-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          structuredOutputs: true,
+          fineTuning: true
+        },
+        metadata: {
+          family: "GPT-4.1",
+          notes: "Cost-efficient GPT-4.1 variant"
+        }
+      },
+      {
+        provider: "openai",
+        modelId: "gpt-4.1-nano",
+        displayName: "GPT-4.1 Nano",
+        contextWindow: 128e3,
+        maxOutputTokens: 32768,
+        pricing: {
+          input: 0.1,
+          output: 0.4,
+          cachedInput: 0.025
+        },
+        knowledgeCutoff: "2024-04-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          structuredOutputs: true,
+          fineTuning: true
+        },
+        metadata: {
+          family: "GPT-4.1",
+          notes: "Fastest GPT-4.1 variant for simple tasks"
+        }
+      },
+      // GPT-4o Family
+      {
+        provider: "openai",
+        modelId: "gpt-4o",
+        displayName: "GPT-4o",
+        contextWindow: 128e3,
+        maxOutputTokens: 16384,
+        pricing: {
+          input: 2.5,
+          output: 10,
+          cachedInput: 1.25
+        },
+        knowledgeCutoff: "2024-04-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          structuredOutputs: true,
+          fineTuning: true
+        },
+        metadata: {
+          family: "GPT-4o",
+          notes: "Multimodal model optimized for speed"
+        }
+      },
+      {
+        provider: "openai",
+        modelId: "gpt-4o-mini",
+        displayName: "GPT-4o Mini",
+        contextWindow: 128e3,
+        maxOutputTokens: 16384,
+        pricing: {
+          input: 0.15,
+          output: 0.6,
+          cachedInput: 0.075
+        },
+        knowledgeCutoff: "2024-04-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          structuredOutputs: true,
+          fineTuning: true
+        },
+        metadata: {
+          family: "GPT-4o",
+          notes: "Fast and affordable multimodal model"
+        }
+      },
+      // o-series (Reasoning models)
+      {
+        provider: "openai",
+        modelId: "o1",
+        displayName: "o1",
+        contextWindow: 2e5,
+        maxOutputTokens: 1e5,
+        pricing: {
+          input: 15,
+          output: 60,
+          cachedInput: 7.5
+        },
+        knowledgeCutoff: "2024-12-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          reasoning: true,
+          structuredOutputs: true
+        },
+        metadata: {
+          family: "o-series",
+          notes: "Advanced reasoning model with chain-of-thought",
+          supportsTemperature: false
+        }
+      },
+      {
+        provider: "openai",
+        modelId: "o3",
+        displayName: "o3",
+        contextWindow: 2e5,
+        maxOutputTokens: 1e5,
+        pricing: {
+          input: 2,
+          output: 8,
+          cachedInput: 0.5
+        },
+        knowledgeCutoff: "2025-01-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          reasoning: true,
+          structuredOutputs: true
+        },
+        metadata: {
+          family: "o-series",
+          notes: "Next-gen reasoning model, more efficient than o1",
+          supportsTemperature: false
+        }
+      },
+      {
+        provider: "openai",
+        modelId: "o4-mini",
+        displayName: "o4 Mini",
+        contextWindow: 2e5,
+        maxOutputTokens: 1e5,
+        pricing: {
+          input: 1.1,
+          output: 4.4,
+          cachedInput: 0.275
+        },
+        knowledgeCutoff: "2025-04-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          reasoning: true,
+          structuredOutputs: true,
+          fineTuning: true
+        },
+        metadata: {
+          family: "o-series",
+          notes: "Cost-efficient reasoning model",
+          supportsTemperature: false
+        }
+      },
+      {
+        provider: "openai",
+        modelId: "o3-mini",
+        displayName: "o3 Mini",
+        contextWindow: 2e5,
+        maxOutputTokens: 1e5,
+        pricing: {
+          input: 1.1,
+          output: 4.4,
+          cachedInput: 0.55
+        },
+        knowledgeCutoff: "2025-01-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          reasoning: true,
+          structuredOutputs: true
+        },
+        metadata: {
+          family: "o-series",
+          notes: "Compact reasoning model for cost-sensitive applications",
+          supportsTemperature: false
+        }
       }
     ];
   }
@@ -4079,7 +4402,8 @@ var init_openai = __esm({
           const usage = chunk.usage ? {
             inputTokens: chunk.usage.prompt_tokens,
             outputTokens: chunk.usage.completion_tokens,
-            totalTokens: chunk.usage.total_tokens
+            totalTokens: chunk.usage.total_tokens,
+            cachedInputTokens: chunk.usage.prompt_tokens_details?.cached_tokens ?? 0
           } : void 0;
           if (finishReason || usage) {
             yield { text: "", finishReason, usage, rawEvent: chunk };
@@ -4296,20 +4620,28 @@ var init_model_registry = __esm({
       /**
        * Estimate API cost for a given model and token usage
        * @param modelId - Full model identifier
-       * @param inputTokens - Number of input tokens
+       * @param inputTokens - Number of input tokens (total, including cached and cache creation)
        * @param outputTokens - Number of output tokens
-       * @param useCachedInput - Whether to use cached input pricing (if supported by provider)
+       * @param cachedInputTokens - Number of cached input tokens (subset of inputTokens)
+       * @param cacheCreationInputTokens - Number of cache creation tokens (subset of inputTokens, Anthropic only)
        * @returns CostEstimate if model found, undefined otherwise
        */
-      estimateCost(modelId, inputTokens, outputTokens, useCachedInput = false) {
+      estimateCost(modelId, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0) {
         const spec = this.getModelSpec(modelId);
         if (!spec) return void 0;
-        const inputRate = useCachedInput && spec.pricing.cachedInput !== void 0 ? spec.pricing.cachedInput : spec.pricing.input;
-        const inputCost = inputTokens / 1e6 * inputRate;
+        const cachedRate = spec.pricing.cachedInput ?? spec.pricing.input;
+        const cacheWriteRate = spec.pricing.cacheWriteInput ?? spec.pricing.input;
+        const uncachedInputTokens = inputTokens - cachedInputTokens - cacheCreationInputTokens;
+        const uncachedInputCost = uncachedInputTokens / 1e6 * spec.pricing.input;
+        const cachedInputCost = cachedInputTokens / 1e6 * cachedRate;
+        const cacheCreationCost = cacheCreationInputTokens / 1e6 * cacheWriteRate;
+        const inputCost = uncachedInputCost + cachedInputCost + cacheCreationCost;
         const outputCost = outputTokens / 1e6 * spec.pricing.output;
         const totalCost = inputCost + outputCost;
         return {
           inputCost,
+          cachedInputCost,
+          cacheCreationCost,
           outputCost,
           totalCost,
           currency: "USD"
@@ -4690,6 +5022,7 @@ var AgentBuilder;
 var init_builder = __esm({
   "src/agent/builder.ts"() {
     "use strict";
+    init_constants();
     init_model_shortcuts();
     init_registry();
     init_agent();
@@ -4711,6 +5044,7 @@ var init_builder = __esm({
       gadgetStartPrefix;
       gadgetEndPrefix;
       textOnlyHandler;
+      textWithGadgetsHandler;
       stopOnGadgetError;
       shouldContinueAfterError;
       defaultGadgetTimeoutMs;
@@ -4973,6 +5307,30 @@ var init_builder = __esm({
         this.textOnlyHandler = handler;
         return this;
       }
+      /**
+       * Set the handler for text content that appears alongside gadget calls.
+       *
+       * When set, text accompanying gadget responses will be wrapped as a
+       * synthetic gadget call before the actual gadget results in the
+       * conversation history.
+       *
+       * @param handler - Configuration for wrapping text
+       * @returns This builder for chaining
+       *
+       * @example
+       * ```typescript
+       * // Wrap text as TellUser gadget
+       * .withTextWithGadgetsHandler({
+       *   gadgetName: "TellUser",
+       *   parameterMapping: (text) => ({ message: text, done: false, type: "info" }),
+       *   resultMapping: (text) => `ℹ️  ${text}`,
+       * })
+       * ```
+       */
+      withTextWithGadgetsHandler(handler) {
+        this.textWithGadgetsHandler = handler;
+        return this;
+      }
       /**
        * Set whether to stop gadget execution on first error.
        *
@@ -5087,6 +5445,69 @@ var init_builder = __esm({
         this.gadgetOutputLimitPercent = percent;
         return this;
       }
+      /**
+       * Add a synthetic gadget call to the conversation history.
+       *
+       * This is useful for in-context learning - showing the LLM what "past self"
+       * did correctly so it mimics the pattern. The call is formatted with proper
+       * markers and parameter format.
+       *
+       * @param gadgetName - Name of the gadget
+       * @param parameters - Parameters passed to the gadget
+       * @param result - Result returned by the gadget
+       * @returns This builder for chaining
+       *
+       * @example
+       * ```typescript
+       * .withSyntheticGadgetCall(
+       *   'TellUser',
+       *   {
+       *     message: '👋 Hello!\n\nHere\'s what I can do:\n- Analyze code\n- Run commands',
+       *     done: false,
+       *     type: 'info'
+       *   },
+       *   'ℹ️  👋 Hello!\n\nHere\'s what I can do:\n- Analyze code\n- Run commands'
+       * )
+       * ```
+       */
+      withSyntheticGadgetCall(gadgetName, parameters, result) {
+        const startPrefix = this.gadgetStartPrefix ?? GADGET_START_PREFIX;
+        const endPrefix = this.gadgetEndPrefix ?? GADGET_END_PREFIX;
+        const format = this.parameterFormat ?? "yaml";
+        const paramStr = this.formatSyntheticParameters(parameters, format);
+        this.initialMessages.push({
+          role: "assistant",
+          content: `${startPrefix}${gadgetName}
+${paramStr}
+${endPrefix}`
+        });
+        this.initialMessages.push({
+          role: "user",
+          content: `Result: ${result}`
+        });
+        return this;
+      }
+      /**
+       * Format parameters for synthetic gadget calls.
+       * Uses heredoc for multiline string values.
+       */
+      formatSyntheticParameters(parameters, format) {
+        if (format === "json" || format === "auto") {
+          return JSON.stringify(parameters);
+        }
+        return Object.entries(parameters).map(([key, value]) => {
+          if (typeof value === "string" && value.includes("\n")) {
+            const separator = format === "yaml" ? ":" : " =";
+            return `${key}${separator} <<<EOF
+${value}
+EOF`;
+          }
+          if (format === "yaml") {
+            return typeof value === "string" ? `${key}: ${value}` : `${key}: ${JSON.stringify(value)}`;
+          }
+          return `${key} = ${JSON.stringify(value)}`;
+        }).join("\n");
+      }
       /**
        * Build and create the agent with the given user prompt.
        * Returns the Agent instance ready to run.
@@ -5129,6 +5550,7 @@ var init_builder = __esm({
           gadgetStartPrefix: this.gadgetStartPrefix,
           gadgetEndPrefix: this.gadgetEndPrefix,
           textOnlyHandler: this.textOnlyHandler,
+          textWithGadgetsHandler: this.textWithGadgetsHandler,
           stopOnGadgetError: this.stopOnGadgetError,
           shouldContinueAfterError: this.shouldContinueAfterError,
           defaultGadgetTimeoutMs: this.defaultGadgetTimeoutMs,
@@ -5230,6 +5652,7 @@ var init_builder = __esm({
           gadgetStartPrefix: this.gadgetStartPrefix,
           gadgetEndPrefix: this.gadgetEndPrefix,
           textOnlyHandler: this.textOnlyHandler,
+          textWithGadgetsHandler: this.textWithGadgetsHandler,
           stopOnGadgetError: this.stopOnGadgetError,
           shouldContinueAfterError: this.shouldContinueAfterError,
           defaultGadgetTimeoutMs: this.defaultGadgetTimeoutMs,
@@ -5265,7 +5688,8 @@ var OPTION_FLAGS = {
   logFile: "--log-file <path>",
   logReset: "--log-reset",
   noBuiltins: "--no-builtins",
-  noBuiltinInteraction: "--no-builtin-interaction"
+  noBuiltinInteraction: "--no-builtin-interaction",
+  quiet: "-q, --quiet"
 };
 var OPTION_DESCRIPTIONS = {
   model: "Model identifier, e.g. openai:gpt-5-nano or anthropic:claude-sonnet-4-5.",
@@ -5279,7 +5703,8 @@ var OPTION_DESCRIPTIONS = {
   logFile: "Path to log file. When set, logs are written to file instead of stderr.",
   logReset: "Reset (truncate) the log file at session start instead of appending.",
   noBuiltins: "Disable built-in gadgets (AskUser, TellUser).",
-  noBuiltinInteraction: "Disable interactive gadgets (AskUser) while keeping TellUser."
+  noBuiltinInteraction: "Disable interactive gadgets (AskUser) while keeping TellUser.",
+  quiet: "Suppress all output except content (text and TellUser messages)."
 };
 var SUMMARY_PREFIX = "[llmist]";
@@ -5289,7 +5714,7 @@ var import_commander3 = require("commander");
 // package.json
 var package_default = {
   name: "llmist",
-  version: "0.6.1",
+  version: "0.7.0",
   description: "Universal TypeScript LLM client with streaming-first agent framework. Works with any model - no structured outputs or native tool calling required. Implements its own flexible grammar for function calling.",
   type: "module",
   main: "dist/index.cjs",
@@ -5373,6 +5798,7 @@ var package_default = {
     "@google/genai": "^1.27.0",
     chalk: "^5.6.2",
     commander: "^12.1.0",
+    eta: "^4.4.1",
     "js-toml": "^1.0.2",
     "js-yaml": "^4.1.0",
     marked: "^15.0.12",
@@ -5437,7 +5863,7 @@ var tellUser = createGadget({
   name: "TellUser",
   description: "Tell the user something important. Set done=true when your work is complete and you want to end the conversation.",
   schema: import_zod2.z.object({
-    message: import_zod2.z.string().describe("The message to display to the user in Markdown"),
+    message: import_zod2.z.string().optional().describe("The message to display to the user in Markdown"),
     done: import_zod2.z.boolean().default(false).describe("Set to true to end the conversation, false to continue"),
     type: import_zod2.z.enum(["info", "success", "warning", "error"]).default("info").describe("Message type: info, success, warning, or error")
   }),
@@ -5457,9 +5883,20 @@ var tellUser = createGadget({
         done: false,
         type: "warning"
       }
+    },
+    {
+      comment: "Share detailed analysis with bullet points (use heredoc for multiline)",
+      params: {
+        message: "Here's what I found in the codebase:\n\n1. **Main entry point**: `src/index.ts` exports all public APIs\n2. **Core logic**: Located in `src/core/` with 5 modules\n3. **Tests**: Good coverage in `src/__tests__/`\n\nI'll continue exploring the core modules.",
+        done: false,
+        type: "info"
+      }
     }
   ],
   execute: ({ message, done, type }) => {
+    if (!message || message.trim() === "") {
+      return "\u26A0\uFE0F  TellUser was called without a message. Please provide content in the 'message' field.";
+    }
     const prefixes = {
       info: "\u2139\uFE0F  ",
       success: "\u2705 ",
@@ -5481,12 +5918,19 @@ var import_node_path2 = __toESM(require("path"), 1);
 var import_node_url = require("url");
 init_gadget();
 var PATH_PREFIXES = [".", "/", "~"];
+function isGadgetLike(value) {
+  if (typeof value !== "object" || value === null) {
+    return false;
+  }
+  const obj = value;
+  return typeof obj.execute === "function" && typeof obj.description === "string" && ("parameterSchema" in obj || "schema" in obj);
+}
 function isGadgetConstructor(value) {
   if (typeof value !== "function") {
     return false;
   }
   const prototype = value.prototype;
-  return Boolean(prototype) && prototype instanceof BaseGadget;
+  return Boolean(prototype) && (prototype instanceof BaseGadget || isGadgetLike(prototype));
 }
 function expandHomePath(input) {
   if (!input.startsWith("~")) {
@@ -5523,7 +5967,7 @@ function extractGadgetsFromModule(moduleExports) {
       return;
     }
     visited.add(value);
-    if (value instanceof BaseGadget) {
+    if (value instanceof BaseGadget || isGadgetLike(value)) {
       results.push(value);
       return;
     }
@@ -5648,8 +6092,14 @@ function renderSummary(metadata) {
     parts.push(import_chalk.default.magenta(metadata.model));
   }
   if (metadata.usage) {
-    const { inputTokens, outputTokens } = metadata.usage;
+    const { inputTokens, outputTokens, cachedInputTokens, cacheCreationInputTokens } = metadata.usage;
     parts.push(import_chalk.default.dim("\u2191") + import_chalk.default.yellow(` ${formatTokens(inputTokens)}`));
+    if (cachedInputTokens && cachedInputTokens > 0) {
+      parts.push(import_chalk.default.dim("\u27F3") + import_chalk.default.blue(` ${formatTokens(cachedInputTokens)}`));
+    }
+    if (cacheCreationInputTokens && cacheCreationInputTokens > 0) {
+      parts.push(import_chalk.default.dim("\u270E") + import_chalk.default.magenta(` ${formatTokens(cacheCreationInputTokens)}`));
+    }
     parts.push(import_chalk.default.dim("\u2193") + import_chalk.default.green(` ${formatTokens(outputTokens)}`));
   }
   if (metadata.elapsedSeconds !== void 0 && metadata.elapsedSeconds > 0) {
@@ -5741,53 +6191,6 @@ ${rendered}`;
 }
 // src/cli/utils.ts
-var RARE_EMOJI = [
-  "\u{1F531}",
-  "\u2697\uFE0F",
-  "\u{1F9FF}",
-  "\u{1F530}",
-  "\u269B\uFE0F",
-  "\u{1F3FA}",
-  "\u{1F9EB}",
-  "\u{1F52C}",
-  "\u2695\uFE0F",
-  "\u{1F5DD}\uFE0F",
-  "\u2696\uFE0F",
-  "\u{1F52E}",
-  "\u{1FAAC}",
-  "\u{1F9EC}",
-  "\u2699\uFE0F",
-  "\u{1F529}",
-  "\u{1FA9B}",
-  "\u26CF\uFE0F",
-  "\u{1FA83}",
-  "\u{1F3F9}",
-  "\u{1F6E1}\uFE0F",
-  "\u2694\uFE0F",
-  "\u{1F5E1}\uFE0F",
-  "\u{1FA93}",
-  "\u{1F5C3}\uFE0F",
-  "\u{1F4DC}",
-  "\u{1F4EF}",
-  "\u{1F3B4}",
-  "\u{1F004}",
-  "\u{1F3B2}"
-];
-function generateMarkers() {
-  const pick = (count) => {
-    const result = [];
-    const pool = [...RARE_EMOJI];
-    for (let i = 0; i < count && pool.length > 0; i++) {
-      const idx = Math.floor(Math.random() * pool.length);
-      result.push(pool.splice(idx, 1)[0]);
-    }
-    return result.join("");
-  };
-  return {
-    startPrefix: pick(5),
-    endPrefix: pick(5)
-  };
-}
 function createNumericParser({
   label,
   integer = false,
@@ -5865,6 +6268,9 @@ var StreamProgress = class {
   callOutputTokensEstimated = true;
   callOutputChars = 0;
   isStreaming = false;
+  // Cache token tracking for live cost estimation during streaming
+  callCachedInputTokens = 0;
+  callCacheCreationInputTokens = 0;
   // Cumulative stats (cumulative mode)
   totalStartTime = Date.now();
   totalTokens = 0;
@@ -5890,11 +6296,13 @@ var StreamProgress = class {
     this.callOutputTokensEstimated = true;
     this.callOutputChars = 0;
     this.isStreaming = false;
+    this.callCachedInputTokens = 0;
+    this.callCacheCreationInputTokens = 0;
     this.start();
   }
   /**
    * Ends the current LLM call. Updates cumulative stats and switches to cumulative mode.
-   * @param usage - Final token usage from the call
+   * @param usage - Final token usage from the call (including cached tokens if available)
    */
   endCall(usage) {
     this.iterations++;
@@ -5906,7 +6314,9 @@ var StreamProgress = class {
           const cost = this.modelRegistry.estimateCost(
             modelName,
             usage.inputTokens,
-            usage.outputTokens
+            usage.outputTokens,
+            usage.cachedInputTokens ?? 0,
+            usage.cacheCreationInputTokens ?? 0
           );
           if (cost) {
             this.totalCost += cost.totalCost;
@@ -5946,6 +6356,16 @@ var StreamProgress = class {
     this.callOutputTokens = tokens;
     this.callOutputTokensEstimated = estimated;
   }
+  /**
+   * Sets cached token counts for the current call (from stream metadata).
+   * Used for live cost estimation during streaming.
+   * @param cachedInputTokens - Number of tokens read from cache (cheaper)
+   * @param cacheCreationInputTokens - Number of tokens written to cache (more expensive)
+   */
+  setCachedTokens(cachedInputTokens, cacheCreationInputTokens) {
+    this.callCachedInputTokens = cachedInputTokens;
+    this.callCacheCreationInputTokens = cacheCreationInputTokens;
+  }
   /**
    * Get total elapsed time in seconds since the first call started.
    * @returns Elapsed time in seconds with 1 decimal place
@@ -6010,11 +6430,32 @@ var StreamProgress = class {
       parts.push(import_chalk2.default.dim("\u2193") + import_chalk2.default.green(` ${prefix}${formatTokens(outTokens)}`));
     }
     parts.push(import_chalk2.default.dim(`${elapsed}s`));
-    if (this.totalCost > 0) {
-      parts.push(import_chalk2.default.cyan(`$${formatCost(this.totalCost)}`));
+    const callCost = this.calculateCurrentCallCost(outTokens);
+    if (callCost > 0) {
+      parts.push(import_chalk2.default.cyan(`$${formatCost(callCost)}`));
     }
     this.target.write(`\r${parts.join(import_chalk2.default.dim(" | "))} ${import_chalk2.default.cyan(spinner)}`);
   }
+  /**
+   * Calculates live cost estimate for the current streaming call.
+   * Uses current input/output tokens and cached token counts.
+   */
+  calculateCurrentCallCost(outputTokens) {
+    if (!this.modelRegistry || !this.model) return 0;
+    try {
+      const modelName = this.model.includes(":") ? this.model.split(":")[1] : this.model;
+      const cost = this.modelRegistry.estimateCost(
+        modelName,
+        this.callInputTokens,
+        outputTokens,
+        this.callCachedInputTokens,
+        this.callCacheCreationInputTokens
+      );
+      return cost?.totalCost ?? 0;
+    } catch {
+      return 0;
+    }
+  }
   renderCumulativeMode(spinner) {
     const elapsed = ((Date.now() - this.totalStartTime) / 1e3).toFixed(1);
     const parts = [];
@@ -6163,7 +6604,7 @@ function addCompleteOptions(cmd, defaults) {
     OPTION_DESCRIPTIONS.maxTokens,
     createNumericParser({ label: "Max tokens", integer: true, min: 1 }),
     defaults?.["max-tokens"]
-  );
+  ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet);
 }
 function addAgentOptions(cmd, defaults) {
   const gadgetAccumulator = (value, previous = []) => [
@@ -6192,7 +6633,7 @@ function addAgentOptions(cmd, defaults) {
     OPTION_FLAGS.noBuiltinInteraction,
     OPTION_DESCRIPTIONS.noBuiltinInteraction,
     defaults?.["builtin-interaction"] !== false
-  );
+  ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet);
 }
 function configToCompleteOptions(config) {
   const result = {};
@@ -6200,6 +6641,7 @@ function configToCompleteOptions(config) {
   if (config.system !== void 0) result.system = config.system;
   if (config.temperature !== void 0) result.temperature = config.temperature;
   if (config["max-tokens"] !== void 0) result.maxTokens = config["max-tokens"];
+  if (config.quiet !== void 0) result.quiet = config.quiet;
   return result;
 }
 function configToAgentOptions(config) {
@@ -6213,6 +6655,11 @@ function configToAgentOptions(config) {
   if (config.builtins !== void 0) result.builtins = config.builtins;
   if (config["builtin-interaction"] !== void 0)
     result.builtinInteraction = config["builtin-interaction"];
+  if (config["gadget-start-prefix"] !== void 0)
+    result.gadgetStartPrefix = config["gadget-start-prefix"];
+  if (config["gadget-end-prefix"] !== void 0)
+    result.gadgetEndPrefix = config["gadget-end-prefix"];
+  if (config.quiet !== void 0) result.quiet = config.quiet;
   return result;
 }
@@ -6258,9 +6705,10 @@ async function executeAgent(promptArg, options, env) {
   const prompt = await resolvePrompt(promptArg, env);
   const client = env.createClient();
   const registry = new GadgetRegistry();
+  const stdinIsInteractive = isInteractive(env.stdin);
   if (options.builtins !== false) {
     for (const gadget of builtinGadgets) {
-      if (options.builtinInteraction === false && gadget.name === "AskUser") {
+      if (gadget.name === "AskUser" && (options.builtinInteraction === false || !stdinIsInteractive)) {
         continue;
       }
       registry.registerByClass(gadget);
@@ -6318,6 +6766,10 @@ async function executeAgent(promptArg, options, env) {
           if (context.usage.outputTokens) {
             progress.setOutputTokens(context.usage.outputTokens, false);
           }
+          progress.setCachedTokens(
+            context.usage.cachedInputTokens ?? 0,
+            context.usage.cacheCreationInputTokens ?? 0
+          );
         }
       },
       // onLLMCallComplete: Finalize metrics after each LLM call
@@ -6336,11 +6788,13 @@ async function executeAgent(promptArg, options, env) {
         let callCost;
         if (context.usage && client.modelRegistry) {
           try {
-            const modelName = options.model.includes(":") ? options.model.split(":")[1] : options.model;
+            const modelName = context.options.model.includes(":") ? context.options.model.split(":")[1] : context.options.model;
             const costResult = client.modelRegistry.estimateCost(
               modelName,
               context.usage.inputTokens,
-              context.usage.outputTokens
+              context.usage.outputTokens,
+              context.usage.cachedInputTokens ?? 0,
+              context.usage.cacheCreationInputTokens ?? 0
             );
             if (costResult) callCost = costResult.totalCost;
           } catch {
@@ -6348,7 +6802,7 @@ async function executeAgent(promptArg, options, env) {
         }
         const callElapsed = progress.getCallElapsedSeconds();
         progress.endCall(context.usage);
-        if (stderrTTY) {
+        if (!options.quiet) {
           const summary = renderSummary({
             iterations: context.iteration + 1,
             model: options.model,
@@ -6427,9 +6881,27 @@ Command rejected by user with message: "${response}"`
     builder.withGadgets(...gadgets);
   }
   builder.withParameterFormat(options.parameterFormat);
-  const markers = generateMarkers();
-  builder.withGadgetStartPrefix(markers.startPrefix);
-  builder.withGadgetEndPrefix(markers.endPrefix);
+  if (options.gadgetStartPrefix) {
+    builder.withGadgetStartPrefix(options.gadgetStartPrefix);
+  }
+  if (options.gadgetEndPrefix) {
+    builder.withGadgetEndPrefix(options.gadgetEndPrefix);
+  }
+  builder.withSyntheticGadgetCall(
+    "TellUser",
+    {
+      message: "\u{1F44B} Hello! I'm ready to help.\n\nHere's what I can do:\n- Analyze your codebase\n- Execute commands\n- Answer questions\n\nWhat would you like me to work on?",
+      done: false,
+      type: "info"
+    },
+    "\u2139\uFE0F  \u{1F44B} Hello! I'm ready to help.\n\nHere's what I can do:\n- Analyze your codebase\n- Execute commands\n- Answer questions\n\nWhat would you like me to work on?"
+  );
+  builder.withTextOnlyHandler("acknowledge");
+  builder.withTextWithGadgetsHandler({
+    gadgetName: "TellUser",
+    parameterMapping: (text) => ({ message: text, done: false, type: "info" }),
+    resultMapping: (text) => `\u2139\uFE0F  ${text}`
+  });
   const agent = builder.ask(prompt);
   for await (const event of agent.run()) {
     if (event.type === "text") {
@@ -6437,7 +6909,14 @@ Command rejected by user with message: "${response}"`
       printer.write(event.content);
     } else if (event.type === "gadget_result") {
       progress.pause();
-      if (stderrTTY) {
+      if (options.quiet) {
+        if (event.result.gadgetName === "TellUser" && event.result.parameters?.message) {
+          const message = String(event.result.parameters.message);
+          const rendered = renderMarkdown(message);
+          env.stdout.write(`${rendered}
+`);
+        }
+      } else {
         const tokenCount = await countGadgetOutputTokens(event.result.result);
         env.stderr.write(`${formatGadgetSummary({ ...event.result, tokenCount })}
 `);
@@ -6446,7 +6925,7 @@ Command rejected by user with message: "${response}"`
   }
   progress.complete();
   printer.ensureNewline();
-  if (stderrTTY && iterations > 1) {
+  if (!options.quiet && iterations > 1) {
     env.stderr.write(`${import_chalk3.default.dim("\u2500".repeat(40))}
 `);
     const summary = renderOverallSummary({
@@ -6519,7 +6998,7 @@ async function executeComplete(promptArg, options, env) {
   progress.endCall(usage);
   progress.complete();
   printer.ensureNewline();
-  if (stderrTTY) {
+  if (stderrTTY && !options.quiet) {
     const summary = renderSummary({ finishReason, usage, cost: progress.getTotalCost() });
     if (summary) {
       env.stderr.write(`${summary}
@@ -6540,9 +7019,102 @@ var import_node_fs3 = require("fs");
 var import_node_os = require("os");
 var import_node_path3 = require("path");
 var import_js_toml2 = require("js-toml");
+// src/cli/templates.ts
+var import_eta = require("eta");
+var TemplateError = class extends Error {
+  constructor(message, promptName, configPath) {
+    super(promptName ? `[prompts.${promptName}]: ${message}` : message);
+    this.promptName = promptName;
+    this.configPath = configPath;
+    this.name = "TemplateError";
+  }
+};
+function createTemplateEngine(prompts, configPath) {
+  const eta = new import_eta.Eta({
+    views: "/",
+    // Required but we use named templates
+    autoEscape: false,
+    // Don't escape - these are prompts, not HTML
+    autoTrim: false
+    // Preserve whitespace in prompts
+  });
+  for (const [name, template] of Object.entries(prompts)) {
+    try {
+      eta.loadTemplate(`@${name}`, template);
+    } catch (error) {
+      throw new TemplateError(
+        error instanceof Error ? error.message : String(error),
+        name,
+        configPath
+      );
+    }
+  }
+  return eta;
+}
+function resolveTemplate(eta, template, context = {}, configPath) {
+  try {
+    const fullContext = {
+      ...context,
+      env: process.env
+    };
+    return eta.renderString(template, fullContext);
+  } catch (error) {
+    throw new TemplateError(
+      error instanceof Error ? error.message : String(error),
+      void 0,
+      configPath
+    );
+  }
+}
+function validatePrompts(prompts, configPath) {
+  const eta = createTemplateEngine(prompts, configPath);
+  for (const [name, template] of Object.entries(prompts)) {
+    try {
+      eta.renderString(template, { env: {} });
+    } catch (error) {
+      throw new TemplateError(
+        error instanceof Error ? error.message : String(error),
+        name,
+        configPath
+      );
+    }
+  }
+}
+function validateEnvVars(template, promptName, configPath) {
+  const envVarPattern = /<%=\s*it\.env\.(\w+)\s*%>/g;
+  const matches = template.matchAll(envVarPattern);
+  for (const match of matches) {
+    const varName = match[1];
+    if (process.env[varName] === void 0) {
+      throw new TemplateError(
+        `Environment variable '${varName}' is not set`,
+        promptName,
+        configPath
+      );
+    }
+  }
+}
+function hasTemplateSyntax(str) {
+  return str.includes("<%");
+}
+// src/cli/config.ts
 var GLOBAL_CONFIG_KEYS = /* @__PURE__ */ new Set(["log-level", "log-file", "log-reset"]);
 var VALID_LOG_LEVELS = ["silly", "trace", "debug", "info", "warn", "error", "fatal"];
-var COMPLETE_CONFIG_KEYS = /* @__PURE__ */ new Set(["model", "system", "temperature", "max-tokens"]);
+var COMPLETE_CONFIG_KEYS = /* @__PURE__ */ new Set([
+  "model",
+  "system",
+  "temperature",
+  "max-tokens",
+  "quiet",
+  "inherits",
+  "log-level",
+  "log-file",
+  "log-reset",
+  "type"
+  // Allowed for inheritance compatibility, ignored for built-in commands
+]);
 var AGENT_CONFIG_KEYS = /* @__PURE__ */ new Set([
   "model",
   "system",
@@ -6551,16 +7123,22 @@ var AGENT_CONFIG_KEYS = /* @__PURE__ */ new Set([
   "gadget",
   "parameter-format",
   "builtins",
-  "builtin-interaction"
+  "builtin-interaction",
+  "gadget-start-prefix",
+  "gadget-end-prefix",
+  "quiet",
+  "inherits",
+  "log-level",
+  "log-file",
+  "log-reset",
+  "type"
+  // Allowed for inheritance compatibility, ignored for built-in commands
 ]);
 var CUSTOM_CONFIG_KEYS = /* @__PURE__ */ new Set([
   ...COMPLETE_CONFIG_KEYS,
   ...AGENT_CONFIG_KEYS,
   "type",
-  "description",
-  "log-level",
-  "log-file",
-  "log-reset"
+  "description"
 ]);
 var VALID_PARAMETER_FORMATS = ["json", "yaml", "toml", "auto"];
 function getConfigPath() {
@@ -6611,6 +7189,39 @@ function validateStringArray(value, key, section) {
   }
   return value;
 }
+function validateInherits(value, section) {
+  if (typeof value === "string") {
+    return value;
+  }
+  if (Array.isArray(value)) {
+    for (let i = 0; i < value.length; i++) {
+      if (typeof value[i] !== "string") {
+        throw new ConfigError(`[${section}].inherits[${i}] must be a string`);
+      }
+    }
+    return value;
+  }
+  throw new ConfigError(`[${section}].inherits must be a string or array of strings`);
+}
+function validateLoggingConfig(raw, section) {
+  const result = {};
+  if ("log-level" in raw) {
+    const level = validateString(raw["log-level"], "log-level", section);
+    if (!VALID_LOG_LEVELS.includes(level)) {
+      throw new ConfigError(
+        `[${section}].log-level must be one of: ${VALID_LOG_LEVELS.join(", ")}`
+      );
+    }
+    result["log-level"] = level;
+  }
+  if ("log-file" in raw) {
+    result["log-file"] = validateString(raw["log-file"], "log-file", section);
+  }
+  if ("log-reset" in raw) {
+    result["log-reset"] = validateBoolean(raw["log-reset"], "log-reset", section);
+  }
+  return result;
+}
 function validateBaseConfig(raw, section) {
   const result = {};
   if ("model" in raw) {
@@ -6625,6 +7236,9 @@ function validateBaseConfig(raw, section) {
       max: 2
     });
   }
+  if ("inherits" in raw) {
+    result.inherits = validateInherits(raw.inherits, section);
+  }
   return result;
 }
 function validateGlobalConfig(raw, section) {
@@ -6637,23 +7251,7 @@ function validateGlobalConfig(raw, section) {
       throw new ConfigError(`[${section}].${key} is not a valid option`);
     }
   }
-  const result = {};
-  if ("log-level" in rawObj) {
-    const level = validateString(rawObj["log-level"], "log-level", section);
-    if (!VALID_LOG_LEVELS.includes(level)) {
-      throw new ConfigError(
-        `[${section}].log-level must be one of: ${VALID_LOG_LEVELS.join(", ")}`
-      );
-    }
-    result["log-level"] = level;
-  }
-  if ("log-file" in rawObj) {
-    result["log-file"] = validateString(rawObj["log-file"], "log-file", section);
-  }
-  if ("log-reset" in rawObj) {
-    result["log-reset"] = validateBoolean(rawObj["log-reset"], "log-reset", section);
-  }
-  return result;
+  return validateLoggingConfig(rawObj, section);
 }
 function validateCompleteConfig(raw, section) {
   if (typeof raw !== "object" || raw === null) {
@@ -6665,13 +7263,19 @@ function validateCompleteConfig(raw, section) {
       throw new ConfigError(`[${section}].${key} is not a valid option`);
     }
   }
-  const result = { ...validateBaseConfig(rawObj, section) };
+  const result = {
+    ...validateBaseConfig(rawObj, section),
+    ...validateLoggingConfig(rawObj, section)
+  };
   if ("max-tokens" in rawObj) {
     result["max-tokens"] = validateNumber(rawObj["max-tokens"], "max-tokens", section, {
       integer: true,
       min: 1
     });
   }
+  if ("quiet" in rawObj) {
+    result.quiet = validateBoolean(rawObj.quiet, "quiet", section);
+  }
   return result;
 }
 function validateAgentConfig(raw, section) {
@@ -6684,7 +7288,10 @@ function validateAgentConfig(raw, section) {
       throw new ConfigError(`[${section}].${key} is not a valid option`);
     }
   }
-  const result = { ...validateBaseConfig(rawObj, section) };
+  const result = {
+    ...validateBaseConfig(rawObj, section),
+    ...validateLoggingConfig(rawObj, section)
+  };
   if ("max-iterations" in rawObj) {
     result["max-iterations"] = validateNumber(rawObj["max-iterations"], "max-iterations", section, {
       integer: true,
@@ -6713,6 +7320,23 @@ function validateAgentConfig(raw, section) {
       section
     );
   }
+  if ("gadget-start-prefix" in rawObj) {
+    result["gadget-start-prefix"] = validateString(
+      rawObj["gadget-start-prefix"],
+      "gadget-start-prefix",
+      section
+    );
+  }
+  if ("gadget-end-prefix" in rawObj) {
+    result["gadget-end-prefix"] = validateString(
+      rawObj["gadget-end-prefix"],
+      "gadget-end-prefix",
+      section
+    );
+  }
+  if ("quiet" in rawObj) {
+    result.quiet = validateBoolean(rawObj.quiet, "quiet", section);
+  }
   return result;
 }
 function validateCustomConfig(raw, section) {
@@ -6768,26 +7392,42 @@ function validateCustomConfig(raw, section) {
       section
     );
   }
+  if ("gadget-start-prefix" in rawObj) {
+    result["gadget-start-prefix"] = validateString(
+      rawObj["gadget-start-prefix"],
+      "gadget-start-prefix",
+      section
+    );
+  }
+  if ("gadget-end-prefix" in rawObj) {
+    result["gadget-end-prefix"] = validateString(
+      rawObj["gadget-end-prefix"],
+      "gadget-end-prefix",
+      section
+    );
+  }
   if ("max-tokens" in rawObj) {
     result["max-tokens"] = validateNumber(rawObj["max-tokens"], "max-tokens", section, {
       integer: true,
       min: 1
     });
   }
-  if ("log-level" in rawObj) {
-    const level = validateString(rawObj["log-level"], "log-level", section);
-    if (!VALID_LOG_LEVELS.includes(level)) {
-      throw new ConfigError(
-        `[${section}].log-level must be one of: ${VALID_LOG_LEVELS.join(", ")}`
-      );
-    }
-    result["log-level"] = level;
+  if ("quiet" in rawObj) {
+    result.quiet = validateBoolean(rawObj.quiet, "quiet", section);
   }
-  if ("log-file" in rawObj) {
-    result["log-file"] = validateString(rawObj["log-file"], "log-file", section);
+  Object.assign(result, validateLoggingConfig(rawObj, section));
+  return result;
+}
+function validatePromptsConfig(raw, section) {
+  if (typeof raw !== "object" || raw === null) {
+    throw new ConfigError(`[${section}] must be a table`);
   }
-  if ("log-reset" in rawObj) {
-    result["log-reset"] = validateBoolean(rawObj["log-reset"], "log-reset", section);
+  const result = {};
+  for (const [key, value] of Object.entries(raw)) {
+    if (typeof value !== "string") {
+      throw new ConfigError(`[${section}].${key} must be a string`);
+    }
+    result[key] = value;
   }
   return result;
 }
@@ -6805,6 +7445,8 @@ function validateConfig(raw, configPath) {
         result.complete = validateCompleteConfig(value, key);
       } else if (key === "agent") {
         result.agent = validateAgentConfig(value, key);
+      } else if (key === "prompts") {
+        result.prompts = validatePromptsConfig(value, key);
       } else {
         result[key] = validateCustomConfig(value, key);
       }
@@ -6840,12 +7482,119 @@ function loadConfig() {
       configPath
     );
   }
-  return validateConfig(raw, configPath);
+  const validated = validateConfig(raw, configPath);
+  const inherited = resolveInheritance(validated, configPath);
+  return resolveTemplatesInConfig(inherited, configPath);
 }
 function getCustomCommandNames(config) {
-  const reserved = /* @__PURE__ */ new Set(["global", "complete", "agent"]);
+  const reserved = /* @__PURE__ */ new Set(["global", "complete", "agent", "prompts"]);
   return Object.keys(config).filter((key) => !reserved.has(key));
 }
+function resolveTemplatesInConfig(config, configPath) {
+  const prompts = config.prompts ?? {};
+  const hasPrompts = Object.keys(prompts).length > 0;
+  let hasTemplates = false;
+  for (const [sectionName, section] of Object.entries(config)) {
+    if (sectionName === "global" || sectionName === "prompts") continue;
+    if (!section || typeof section !== "object") continue;
+    const sectionObj = section;
+    if (typeof sectionObj.system === "string" && hasTemplateSyntax(sectionObj.system)) {
+      hasTemplates = true;
+      break;
+    }
+  }
+  for (const template of Object.values(prompts)) {
+    if (hasTemplateSyntax(template)) {
+      hasTemplates = true;
+      break;
+    }
+  }
+  if (!hasPrompts && !hasTemplates) {
+    return config;
+  }
+  try {
+    validatePrompts(prompts, configPath);
+  } catch (error) {
+    if (error instanceof TemplateError) {
+      throw new ConfigError(error.message, configPath);
+    }
+    throw error;
+  }
+  for (const [name, template] of Object.entries(prompts)) {
+    try {
+      validateEnvVars(template, name, configPath);
+    } catch (error) {
+      if (error instanceof TemplateError) {
+        throw new ConfigError(error.message, configPath);
+      }
+      throw error;
+    }
+  }
+  const eta = createTemplateEngine(prompts, configPath);
+  const result = { ...config };
+  for (const [sectionName, section] of Object.entries(config)) {
+    if (sectionName === "global" || sectionName === "prompts") continue;
+    if (!section || typeof section !== "object") continue;
+    const sectionObj = section;
+    if (typeof sectionObj.system === "string" && hasTemplateSyntax(sectionObj.system)) {
+      try {
+        validateEnvVars(sectionObj.system, void 0, configPath);
+      } catch (error) {
+        if (error instanceof TemplateError) {
+          throw new ConfigError(`[${sectionName}].system: ${error.message}`, configPath);
+        }
+        throw error;
+      }
+      try {
+        const resolved = resolveTemplate(eta, sectionObj.system, {}, configPath);
+        result[sectionName] = {
+          ...sectionObj,
+          system: resolved
+        };
+      } catch (error) {
+        if (error instanceof TemplateError) {
+          throw new ConfigError(`[${sectionName}].system: ${error.message}`, configPath);
+        }
+        throw error;
+      }
+    }
+  }
+  return result;
+}
+function resolveInheritance(config, configPath) {
+  const resolved = {};
+  const resolving = /* @__PURE__ */ new Set();
+  function resolveSection(name) {
+    if (name in resolved) {
+      return resolved[name];
+    }
+    if (resolving.has(name)) {
+      throw new ConfigError(`Circular inheritance detected: ${name}`, configPath);
+    }
+    const section = config[name];
+    if (section === void 0 || typeof section !== "object") {
+      throw new ConfigError(`Cannot inherit from unknown section: ${name}`, configPath);
+    }
+    resolving.add(name);
+    const sectionObj = section;
+    const inheritsRaw = sectionObj.inherits;
+    const inheritsList = inheritsRaw ? Array.isArray(inheritsRaw) ? inheritsRaw : [inheritsRaw] : [];
+    let merged = {};
+    for (const parent of inheritsList) {
+      const parentResolved = resolveSection(parent);
+      merged = { ...merged, ...parentResolved };
+    }
+    const { inherits: _inherits, ...ownValues } = sectionObj;
+    merged = { ...merged, ...ownValues };
+    resolving.delete(name);
+    resolved[name] = merged;
+    return merged;
+  }
+  for (const name of Object.keys(config)) {
+    resolveSection(name);
+  }
+  return resolved;
+}
 // src/cli/models-command.ts
 var import_chalk4 = __toESM(require("chalk"), 1);