npm - llmist - Versions diffs - 0.7.0 → 0.8.0 - Mend

llmist 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/dist/{chunk-CTC2WJZA.js → chunk-4IMGADVY.js} +2 -2
package/dist/{chunk-ZFHFBEQ5.js → chunk-62M4TDAK.js} +359 -66
package/dist/chunk-62M4TDAK.js.map +1 -0
package/dist/cli.cjs +726 -123
package/dist/cli.cjs.map +1 -1
package/dist/cli.js +369 -59
package/dist/cli.js.map +1 -1
package/dist/index.cjs +358 -65
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +6 -9
package/dist/index.d.ts +6 -9
package/dist/index.js +2 -2
package/dist/{mock-stream-B2qwECvd.d.cts → mock-stream-CjmvWDc3.d.cts} +21 -20
package/dist/{mock-stream-B2qwECvd.d.ts → mock-stream-CjmvWDc3.d.ts} +21 -20
package/dist/testing/index.cjs +358 -65
package/dist/testing/index.cjs.map +1 -1
package/dist/testing/index.d.cts +2 -2
package/dist/testing/index.d.ts +2 -2
package/dist/testing/index.js +2 -2
package/package.json +2 -1
package/dist/chunk-ZFHFBEQ5.js.map +0 -1
/package/dist/{chunk-CTC2WJZA.js.map → chunk-4IMGADVY.js.map} +0 -0

package/dist/cli.cjs CHANGED Viewed

@@ -865,7 +865,7 @@ function findSafeDelimiter(content) {
   }
   let counter = 1;
   while (counter < 1e3) {
-    const delimiter = `HEREDOC_${counter}`;
+    const delimiter = `__GADGET_PARAM_${counter}__`;
     const regex = new RegExp(`^${delimiter}\\s*$`);
     const isUsed = lines.some((line) => regex.test(line));
     if (!isUsed) {
@@ -972,7 +972,16 @@ var init_gadget = __esm({
     yaml = __toESM(require("js-yaml"), 1);
     init_schema_to_json();
     init_schema_validator();
-    HEREDOC_DELIMITERS = ["EOF", "END", "DOC", "CONTENT", "TEXT", "HEREDOC", "DATA", "BLOCK"];
+    HEREDOC_DELIMITERS = [
+      "__GADGET_PARAM_EOF__",
+      "__GADGET_PARAM_END__",
+      "__GADGET_PARAM_DOC__",
+      "__GADGET_PARAM_CONTENT__",
+      "__GADGET_PARAM_TEXT__",
+      "__GADGET_PARAM_HEREDOC__",
+      "__GADGET_PARAM_DATA__",
+      "__GADGET_PARAM_BLOCK__"
+    ];
     BaseGadget = class {
       /**
        * The name of the gadget. Used for identification when LLM calls it.
@@ -3050,7 +3059,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 3,
           output: 15,
-          cachedInput: 0.3
+          cachedInput: 0.3,
+          cacheWriteInput: 3.75
         },
         knowledgeCutoff: "2025-01",
         features: {
@@ -3074,7 +3084,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 1,
           output: 5,
-          cachedInput: 0.1
+          cachedInput: 0.1,
+          cacheWriteInput: 1.25
         },
         knowledgeCutoff: "2025-02",
         features: {
@@ -3098,7 +3109,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 3,
           output: 15,
-          cachedInput: 0.3
+          cachedInput: 0.3,
+          cacheWriteInput: 3.75
         },
         knowledgeCutoff: "2025-03",
         features: {
@@ -3122,7 +3134,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 3,
           output: 15,
-          cachedInput: 0.3
+          cachedInput: 0.3,
+          cacheWriteInput: 3.75
         },
         knowledgeCutoff: "2024-11",
         features: {
@@ -3146,7 +3159,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 15,
           output: 75,
-          cachedInput: 1.5
+          cachedInput: 1.5,
+          cacheWriteInput: 18.75
         },
         knowledgeCutoff: "2025-01",
         features: {
@@ -3170,7 +3184,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 15,
           output: 75,
-          cachedInput: 1.5
+          cachedInput: 1.5,
+          cacheWriteInput: 18.75
         },
         knowledgeCutoff: "2025-03",
         features: {
@@ -3193,7 +3208,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 0.8,
           output: 4,
-          cachedInput: 0.08
+          cachedInput: 0.08,
+          cacheWriteInput: 1
         },
         knowledgeCutoff: "2024-07",
         features: {
@@ -3216,7 +3232,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 0.25,
           output: 1.25,
-          cachedInput: 0.025
+          cachedInput: 0.025,
+          cacheWriteInput: 0.3125
         },
         knowledgeCutoff: "2023-08",
         features: {
@@ -3240,7 +3257,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 1,
           output: 5,
-          cachedInput: 0.1
+          cachedInput: 0.1,
+          cacheWriteInput: 1.25
         },
         knowledgeCutoff: "2025-02",
         features: {
@@ -3264,7 +3282,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 3,
           output: 15,
-          cachedInput: 0.3
+          cachedInput: 0.3,
+          cacheWriteInput: 3.75
         },
         knowledgeCutoff: "2025-01",
         features: {
@@ -3288,7 +3307,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 5,
           output: 25,
-          cachedInput: 0.5
+          cachedInput: 0.5,
+          cacheWriteInput: 6.25
         },
         knowledgeCutoff: "2025-03",
         features: {
@@ -3403,15 +3423,27 @@ var init_anthropic = __esm({
       }
       buildRequestPayload(options, descriptor, spec, messages) {
         const systemMessages = messages.filter((message) => message.role === "system");
-        const system = systemMessages.length > 0 ? systemMessages.map((m) => m.content).join("\n\n") : void 0;
-        const conversation = messages.filter(
+        const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
+          type: "text",
+          text: m.content,
+          // Add cache_control to the LAST system message block
+          ...index === systemMessages.length - 1 ? { cache_control: { type: "ephemeral" } } : {}
+        })) : void 0;
+        const nonSystemMessages = messages.filter(
           (message) => message.role !== "system"
-        ).map((message) => ({
+        );
+        const lastUserIndex = nonSystemMessages.reduce(
+          (lastIdx, msg, idx) => msg.role === "user" ? idx : lastIdx,
+          -1
+        );
+        const conversation = nonSystemMessages.map((message, index) => ({
           role: message.role,
           content: [
             {
               type: "text",
-              text: message.content
+              text: message.content,
+              // Add cache_control to the LAST user message
+              ...message.role === "user" && index === lastUserIndex ? { cache_control: { type: "ephemeral" } } : {}
             }
           ]
         }));
@@ -3437,15 +3469,22 @@ var init_anthropic = __esm({
       async *wrapStream(iterable) {
         const stream2 = iterable;
         let inputTokens = 0;
+        let cachedInputTokens = 0;
+        let cacheCreationInputTokens = 0;
         for await (const event of stream2) {
           if (event.type === "message_start") {
-            inputTokens = event.message.usage.input_tokens;
+            const usage = event.message.usage;
+            cachedInputTokens = usage.cache_read_input_tokens ?? 0;
+            cacheCreationInputTokens = usage.cache_creation_input_tokens ?? 0;
+            inputTokens = usage.input_tokens + cachedInputTokens + cacheCreationInputTokens;
             yield {
               text: "",
               usage: {
                 inputTokens,
                 outputTokens: 0,
-                totalTokens: inputTokens
+                totalTokens: inputTokens,
+                cachedInputTokens,
+                cacheCreationInputTokens
               },
               rawEvent: event
             };
@@ -3459,7 +3498,9 @@ var init_anthropic = __esm({
             const usage = event.usage ? {
               inputTokens,
               outputTokens: event.usage.output_tokens,
-              totalTokens: inputTokens + event.usage.output_tokens
+              totalTokens: inputTokens + event.usage.output_tokens,
+              cachedInputTokens,
+              cacheCreationInputTokens
             } : void 0;
             if (event.delta.stop_reason || usage) {
               yield {
@@ -3540,6 +3581,7 @@ var init_gemini_models = __esm({
   "src/providers/gemini-models.ts"() {
     "use strict";
     GEMINI_MODELS = [
+      // Gemini 3 Pro (Preview)
       {
         provider: "gemini",
         modelId: "gemini-3-pro-preview",
@@ -3548,8 +3590,11 @@ var init_gemini_models = __esm({
         maxOutputTokens: 65536,
         pricing: {
           input: 2,
+          // $2.00 for prompts <= 200k, $4.00 for > 200k (using lower tier)
           output: 12,
+          // $12.00 for prompts <= 200k, $18.00 for > 200k
           cachedInput: 0.2
+          // $0.20 for prompts <= 200k
         },
         knowledgeCutoff: "2025-01",
         features: {
@@ -3562,9 +3607,10 @@ var init_gemini_models = __esm({
         metadata: {
           family: "Gemini 3",
           releaseDate: "2025-11-18",
-          notes: "Most advanced model. 1501 Elo LMArena, 91.9% GPQA Diamond, 76.2% SWE-bench. Deep Think mode available."
+          notes: "Best model for multimodal understanding, agentic and vibe-coding. Deep Think mode available."
         }
       },
+      // Gemini 2.5 Pro
       {
         provider: "gemini",
         modelId: "gemini-2.5-pro",
@@ -3573,8 +3619,11 @@ var init_gemini_models = __esm({
         maxOutputTokens: 65536,
         pricing: {
           input: 1.25,
+          // $1.25 for prompts <= 200k, $2.50 for > 200k
           output: 10,
+          // $10.00 for prompts <= 200k, $15.00 for > 200k
           cachedInput: 0.125
+          // $0.125 for prompts <= 200k
         },
         knowledgeCutoff: "2025-01",
         features: {
@@ -3587,9 +3636,10 @@ var init_gemini_models = __esm({
         metadata: {
           family: "Gemini 2.5",
           releaseDate: "2025-06",
-          notes: "Balanced multimodal model with 1M context. Best for complex agents and reasoning."
+          notes: "State-of-the-art multipurpose model. Excels at coding and complex reasoning."
         }
       },
+      // Gemini 2.5 Flash
       {
         provider: "gemini",
         modelId: "gemini-2.5-flash",
@@ -3598,8 +3648,10 @@ var init_gemini_models = __esm({
         maxOutputTokens: 65536,
         pricing: {
           input: 0.3,
+          // $0.30 for text/image/video, $1.00 for audio
           output: 2.5,
           cachedInput: 0.03
+          // $0.03 for text/image/video
         },
         knowledgeCutoff: "2025-01",
         features: {
@@ -3612,9 +3664,10 @@ var init_gemini_models = __esm({
         metadata: {
           family: "Gemini 2.5",
           releaseDate: "2025-06",
-          notes: "Best price-performance ratio with thinking enabled by default"
+          notes: "First hybrid reasoning model with 1M context and thinking budgets."
         }
       },
+      // Gemini 2.5 Flash-Lite
       {
         provider: "gemini",
         modelId: "gemini-2.5-flash-lite",
@@ -3623,8 +3676,10 @@ var init_gemini_models = __esm({
         maxOutputTokens: 65536,
         pricing: {
           input: 0.1,
+          // $0.10 for text/image/video, $0.30 for audio
           output: 0.4,
           cachedInput: 0.01
+          // $0.01 for text/image/video
         },
         knowledgeCutoff: "2025-01",
         features: {
@@ -3636,9 +3691,10 @@ var init_gemini_models = __esm({
         metadata: {
           family: "Gemini 2.5",
           releaseDate: "2025-06",
-          notes: "Fastest and most cost-efficient model for high-volume, low-latency tasks"
+          notes: "Smallest and most cost effective model, built for at scale usage."
         }
       },
+      // Gemini 2.0 Flash
       {
         provider: "gemini",
         modelId: "gemini-2.0-flash",
@@ -3647,8 +3703,10 @@ var init_gemini_models = __esm({
         maxOutputTokens: 8192,
         pricing: {
           input: 0.1,
+          // $0.10 for text/image/video, $0.70 for audio
           output: 0.4,
-          cachedInput: 0.01
+          cachedInput: 0.025
+          // $0.025 for text/image/video
         },
         knowledgeCutoff: "2024-08",
         features: {
@@ -3659,9 +3717,10 @@ var init_gemini_models = __esm({
         },
         metadata: {
           family: "Gemini 2.0",
-          notes: "Previous generation with 1M context and multimodal capabilities"
+          notes: "Balanced multimodal model with 1M context, built for the era of Agents."
         }
       },
+      // Gemini 2.0 Flash-Lite
       {
         provider: "gemini",
         modelId: "gemini-2.0-flash-lite",
@@ -3670,8 +3729,8 @@ var init_gemini_models = __esm({
         maxOutputTokens: 8192,
         pricing: {
           input: 0.075,
-          output: 0.3,
-          cachedInput: 75e-4
+          output: 0.3
+          // No context caching available for 2.0-flash-lite
         },
         knowledgeCutoff: "2024-08",
         features: {
@@ -3682,7 +3741,7 @@ var init_gemini_models = __esm({
         },
         metadata: {
           family: "Gemini 2.0",
-          notes: "Lightweight previous generation model for cost-sensitive applications"
+          notes: "Smallest and most cost effective 2.0 model for at scale usage."
         }
       }
     ];
@@ -3852,7 +3911,9 @@ var init_gemini = __esm({
         return {
           inputTokens: usageMetadata.promptTokenCount ?? 0,
           outputTokens: usageMetadata.candidatesTokenCount ?? 0,
-          totalTokens: usageMetadata.totalTokenCount ?? 0
+          totalTokens: usageMetadata.totalTokenCount ?? 0,
+          // Gemini returns cached token count in cachedContentTokenCount
+          cachedInputTokens: usageMetadata.cachedContentTokenCount ?? 0
         };
       }
       /**
@@ -3908,10 +3969,11 @@ var init_openai_models = __esm({
   "src/providers/openai-models.ts"() {
     "use strict";
     OPENAI_MODELS = [
+      // GPT-5 Family
       {
         provider: "openai",
         modelId: "gpt-5.1",
-        displayName: "GPT-5.1 Instant",
+        displayName: "GPT-5.1",
         contextWindow: 128e3,
         maxOutputTokens: 32768,
         pricing: {
@@ -3931,34 +3993,7 @@ var init_openai_models = __esm({
         metadata: {
           family: "GPT-5",
           releaseDate: "2025-11-12",
-          notes: "Warmer, more intelligent, better instruction following. 2-3x faster than GPT-5.",
-          supportsTemperature: false
-        }
-      },
-      {
-        provider: "openai",
-        modelId: "gpt-5.1-thinking",
-        displayName: "GPT-5.1 Thinking",
-        contextWindow: 196e3,
-        maxOutputTokens: 32768,
-        pricing: {
-          input: 1.25,
-          output: 10,
-          cachedInput: 0.125
-        },
-        knowledgeCutoff: "2024-09-30",
-        features: {
-          streaming: true,
-          functionCalling: true,
-          vision: true,
-          reasoning: true,
-          structuredOutputs: true,
-          fineTuning: true
-        },
-        metadata: {
-          family: "GPT-5",
-          releaseDate: "2025-11-12",
-          notes: "Advanced reasoning with thinking levels: Light, Standard, Extended, Heavy. Best for complex tasks.",
+          notes: "Latest GPT-5 with improved instruction following. 2-3x faster than GPT-5.",
           supportsTemperature: false
         }
       },
@@ -4038,6 +4073,255 @@ var init_openai_models = __esm({
           notes: "Fastest, most cost-efficient version for well-defined tasks",
           supportsTemperature: false
         }
+      },
+      {
+        provider: "openai",
+        modelId: "gpt-5-pro",
+        displayName: "GPT-5 Pro",
+        contextWindow: 272e3,
+        maxOutputTokens: 128e3,
+        pricing: {
+          input: 15,
+          output: 120
+          // No cached input pricing for gpt-5-pro
+        },
+        knowledgeCutoff: "2024-09-30",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          reasoning: true,
+          structuredOutputs: true
+        },
+        metadata: {
+          family: "GPT-5",
+          notes: "Premium tier with enhanced capabilities. Does not support prompt caching.",
+          supportsTemperature: false
+        }
+      },
+      // GPT-4.1 Family
+      {
+        provider: "openai",
+        modelId: "gpt-4.1",
+        displayName: "GPT-4.1",
+        contextWindow: 128e3,
+        maxOutputTokens: 32768,
+        pricing: {
+          input: 2,
+          output: 8,
+          cachedInput: 0.5
+        },
+        knowledgeCutoff: "2024-04-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          structuredOutputs: true,
+          fineTuning: true
+        },
+        metadata: {
+          family: "GPT-4.1",
+          notes: "Improved GPT-4 with better instruction following"
+        }
+      },
+      {
+        provider: "openai",
+        modelId: "gpt-4.1-mini",
+        displayName: "GPT-4.1 Mini",
+        contextWindow: 128e3,
+        maxOutputTokens: 32768,
+        pricing: {
+          input: 0.4,
+          output: 1.6,
+          cachedInput: 0.1
+        },
+        knowledgeCutoff: "2024-04-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          structuredOutputs: true,
+          fineTuning: true
+        },
+        metadata: {
+          family: "GPT-4.1",
+          notes: "Cost-efficient GPT-4.1 variant"
+        }
+      },
+      {
+        provider: "openai",
+        modelId: "gpt-4.1-nano",
+        displayName: "GPT-4.1 Nano",
+        contextWindow: 128e3,
+        maxOutputTokens: 32768,
+        pricing: {
+          input: 0.1,
+          output: 0.4,
+          cachedInput: 0.025
+        },
+        knowledgeCutoff: "2024-04-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          structuredOutputs: true,
+          fineTuning: true
+        },
+        metadata: {
+          family: "GPT-4.1",
+          notes: "Fastest GPT-4.1 variant for simple tasks"
+        }
+      },
+      // GPT-4o Family
+      {
+        provider: "openai",
+        modelId: "gpt-4o",
+        displayName: "GPT-4o",
+        contextWindow: 128e3,
+        maxOutputTokens: 16384,
+        pricing: {
+          input: 2.5,
+          output: 10,
+          cachedInput: 1.25
+        },
+        knowledgeCutoff: "2024-04-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          structuredOutputs: true,
+          fineTuning: true
+        },
+        metadata: {
+          family: "GPT-4o",
+          notes: "Multimodal model optimized for speed"
+        }
+      },
+      {
+        provider: "openai",
+        modelId: "gpt-4o-mini",
+        displayName: "GPT-4o Mini",
+        contextWindow: 128e3,
+        maxOutputTokens: 16384,
+        pricing: {
+          input: 0.15,
+          output: 0.6,
+          cachedInput: 0.075
+        },
+        knowledgeCutoff: "2024-04-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          structuredOutputs: true,
+          fineTuning: true
+        },
+        metadata: {
+          family: "GPT-4o",
+          notes: "Fast and affordable multimodal model"
+        }
+      },
+      // o-series (Reasoning models)
+      {
+        provider: "openai",
+        modelId: "o1",
+        displayName: "o1",
+        contextWindow: 2e5,
+        maxOutputTokens: 1e5,
+        pricing: {
+          input: 15,
+          output: 60,
+          cachedInput: 7.5
+        },
+        knowledgeCutoff: "2024-12-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          reasoning: true,
+          structuredOutputs: true
+        },
+        metadata: {
+          family: "o-series",
+          notes: "Advanced reasoning model with chain-of-thought",
+          supportsTemperature: false
+        }
+      },
+      {
+        provider: "openai",
+        modelId: "o3",
+        displayName: "o3",
+        contextWindow: 2e5,
+        maxOutputTokens: 1e5,
+        pricing: {
+          input: 2,
+          output: 8,
+          cachedInput: 0.5
+        },
+        knowledgeCutoff: "2025-01-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          reasoning: true,
+          structuredOutputs: true
+        },
+        metadata: {
+          family: "o-series",
+          notes: "Next-gen reasoning model, more efficient than o1",
+          supportsTemperature: false
+        }
+      },
+      {
+        provider: "openai",
+        modelId: "o4-mini",
+        displayName: "o4 Mini",
+        contextWindow: 2e5,
+        maxOutputTokens: 1e5,
+        pricing: {
+          input: 1.1,
+          output: 4.4,
+          cachedInput: 0.275
+        },
+        knowledgeCutoff: "2025-04-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          reasoning: true,
+          structuredOutputs: true,
+          fineTuning: true
+        },
+        metadata: {
+          family: "o-series",
+          notes: "Cost-efficient reasoning model",
+          supportsTemperature: false
+        }
+      },
+      {
+        provider: "openai",
+        modelId: "o3-mini",
+        displayName: "o3 Mini",
+        contextWindow: 2e5,
+        maxOutputTokens: 1e5,
+        pricing: {
+          input: 1.1,
+          output: 4.4,
+          cachedInput: 0.55
+        },
+        knowledgeCutoff: "2025-01-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          reasoning: true,
+          structuredOutputs: true
+        },
+        metadata: {
+          family: "o-series",
+          notes: "Compact reasoning model for cost-sensitive applications",
+          supportsTemperature: false
+        }
       }
     ];
   }
@@ -4118,7 +4402,8 @@ var init_openai = __esm({
           const usage = chunk.usage ? {
             inputTokens: chunk.usage.prompt_tokens,
             outputTokens: chunk.usage.completion_tokens,
-            totalTokens: chunk.usage.total_tokens
+            totalTokens: chunk.usage.total_tokens,
+            cachedInputTokens: chunk.usage.prompt_tokens_details?.cached_tokens ?? 0
           } : void 0;
           if (finishReason || usage) {
             yield { text: "", finishReason, usage, rawEvent: chunk };
@@ -4335,20 +4620,28 @@ var init_model_registry = __esm({
       /**
        * Estimate API cost for a given model and token usage
        * @param modelId - Full model identifier
-       * @param inputTokens - Number of input tokens
+       * @param inputTokens - Number of input tokens (total, including cached and cache creation)
        * @param outputTokens - Number of output tokens
-       * @param useCachedInput - Whether to use cached input pricing (if supported by provider)
+       * @param cachedInputTokens - Number of cached input tokens (subset of inputTokens)
+       * @param cacheCreationInputTokens - Number of cache creation tokens (subset of inputTokens, Anthropic only)
        * @returns CostEstimate if model found, undefined otherwise
        */
-      estimateCost(modelId, inputTokens, outputTokens, useCachedInput = false) {
+      estimateCost(modelId, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0) {
         const spec = this.getModelSpec(modelId);
         if (!spec) return void 0;
-        const inputRate = useCachedInput && spec.pricing.cachedInput !== void 0 ? spec.pricing.cachedInput : spec.pricing.input;
-        const inputCost = inputTokens / 1e6 * inputRate;
+        const cachedRate = spec.pricing.cachedInput ?? spec.pricing.input;
+        const cacheWriteRate = spec.pricing.cacheWriteInput ?? spec.pricing.input;
+        const uncachedInputTokens = inputTokens - cachedInputTokens - cacheCreationInputTokens;
+        const uncachedInputCost = uncachedInputTokens / 1e6 * spec.pricing.input;
+        const cachedInputCost = cachedInputTokens / 1e6 * cachedRate;
+        const cacheCreationCost = cacheCreationInputTokens / 1e6 * cacheWriteRate;
+        const inputCost = uncachedInputCost + cachedInputCost + cacheCreationCost;
         const outputCost = outputTokens / 1e6 * spec.pricing.output;
         const totalCost = inputCost + outputCost;
         return {
           inputCost,
+          cachedInputCost,
+          cacheCreationCost,
           outputCost,
           totalCost,
           currency: "USD"
@@ -5395,7 +5688,8 @@ var OPTION_FLAGS = {
   logFile: "--log-file <path>",
   logReset: "--log-reset",
   noBuiltins: "--no-builtins",
-  noBuiltinInteraction: "--no-builtin-interaction"
+  noBuiltinInteraction: "--no-builtin-interaction",
+  quiet: "-q, --quiet"
 };
 var OPTION_DESCRIPTIONS = {
   model: "Model identifier, e.g. openai:gpt-5-nano or anthropic:claude-sonnet-4-5.",
@@ -5409,7 +5703,8 @@ var OPTION_DESCRIPTIONS = {
   logFile: "Path to log file. When set, logs are written to file instead of stderr.",
   logReset: "Reset (truncate) the log file at session start instead of appending.",
   noBuiltins: "Disable built-in gadgets (AskUser, TellUser).",
-  noBuiltinInteraction: "Disable interactive gadgets (AskUser) while keeping TellUser."
+  noBuiltinInteraction: "Disable interactive gadgets (AskUser) while keeping TellUser.",
+  quiet: "Suppress all output except content (text and TellUser messages)."
 };
 var SUMMARY_PREFIX = "[llmist]";
@@ -5419,7 +5714,7 @@ var import_commander3 = require("commander");
 // package.json
 var package_default = {
   name: "llmist",
-  version: "0.6.2",
+  version: "0.7.0",
   description: "Universal TypeScript LLM client with streaming-first agent framework. Works with any model - no structured outputs or native tool calling required. Implements its own flexible grammar for function calling.",
   type: "module",
   main: "dist/index.cjs",
@@ -5503,6 +5798,7 @@ var package_default = {
     "@google/genai": "^1.27.0",
     chalk: "^5.6.2",
     commander: "^12.1.0",
+    eta: "^4.4.1",
     "js-toml": "^1.0.2",
     "js-yaml": "^4.1.0",
     marked: "^15.0.12",
@@ -5622,12 +5918,19 @@ var import_node_path2 = __toESM(require("path"), 1);
 var import_node_url = require("url");
 init_gadget();
 var PATH_PREFIXES = [".", "/", "~"];
+function isGadgetLike(value) {
+  if (typeof value !== "object" || value === null) {
+    return false;
+  }
+  const obj = value;
+  return typeof obj.execute === "function" && typeof obj.description === "string" && ("parameterSchema" in obj || "schema" in obj);
+}
 function isGadgetConstructor(value) {
   if (typeof value !== "function") {
     return false;
   }
   const prototype = value.prototype;
-  return Boolean(prototype) && prototype instanceof BaseGadget;
+  return Boolean(prototype) && (prototype instanceof BaseGadget || isGadgetLike(prototype));
 }
 function expandHomePath(input) {
   if (!input.startsWith("~")) {
@@ -5664,7 +5967,7 @@ function extractGadgetsFromModule(moduleExports) {
       return;
     }
     visited.add(value);
-    if (value instanceof BaseGadget) {
+    if (value instanceof BaseGadget || isGadgetLike(value)) {
       results.push(value);
       return;
     }
@@ -5789,8 +6092,14 @@ function renderSummary(metadata) {
     parts.push(import_chalk.default.magenta(metadata.model));
   }
   if (metadata.usage) {
-    const { inputTokens, outputTokens } = metadata.usage;
+    const { inputTokens, outputTokens, cachedInputTokens, cacheCreationInputTokens } = metadata.usage;
     parts.push(import_chalk.default.dim("\u2191") + import_chalk.default.yellow(` ${formatTokens(inputTokens)}`));
+    if (cachedInputTokens && cachedInputTokens > 0) {
+      parts.push(import_chalk.default.dim("\u27F3") + import_chalk.default.blue(` ${formatTokens(cachedInputTokens)}`));
+    }
+    if (cacheCreationInputTokens && cacheCreationInputTokens > 0) {
+      parts.push(import_chalk.default.dim("\u270E") + import_chalk.default.magenta(` ${formatTokens(cacheCreationInputTokens)}`));
+    }
     parts.push(import_chalk.default.dim("\u2193") + import_chalk.default.green(` ${formatTokens(outputTokens)}`));
   }
   if (metadata.elapsedSeconds !== void 0 && metadata.elapsedSeconds > 0) {
@@ -5959,6 +6268,9 @@ var StreamProgress = class {
   callOutputTokensEstimated = true;
   callOutputChars = 0;
   isStreaming = false;
+  // Cache token tracking for live cost estimation during streaming
+  callCachedInputTokens = 0;
+  callCacheCreationInputTokens = 0;
   // Cumulative stats (cumulative mode)
   totalStartTime = Date.now();
   totalTokens = 0;
@@ -5984,11 +6296,13 @@ var StreamProgress = class {
     this.callOutputTokensEstimated = true;
     this.callOutputChars = 0;
     this.isStreaming = false;
+    this.callCachedInputTokens = 0;
+    this.callCacheCreationInputTokens = 0;
     this.start();
   }
   /**
    * Ends the current LLM call. Updates cumulative stats and switches to cumulative mode.
-   * @param usage - Final token usage from the call
+   * @param usage - Final token usage from the call (including cached tokens if available)
    */
   endCall(usage) {
     this.iterations++;
@@ -6000,7 +6314,9 @@ var StreamProgress = class {
           const cost = this.modelRegistry.estimateCost(
             modelName,
             usage.inputTokens,
-            usage.outputTokens
+            usage.outputTokens,
+            usage.cachedInputTokens ?? 0,
+            usage.cacheCreationInputTokens ?? 0
           );
           if (cost) {
             this.totalCost += cost.totalCost;
@@ -6040,6 +6356,16 @@ var StreamProgress = class {
     this.callOutputTokens = tokens;
     this.callOutputTokensEstimated = estimated;
   }
+  /**
+   * Sets cached token counts for the current call (from stream metadata).
+   * Used for live cost estimation during streaming.
+   * @param cachedInputTokens - Number of tokens read from cache (cheaper)
+   * @param cacheCreationInputTokens - Number of tokens written to cache (more expensive)
+   */
+  setCachedTokens(cachedInputTokens, cacheCreationInputTokens) {
+    this.callCachedInputTokens = cachedInputTokens;
+    this.callCacheCreationInputTokens = cacheCreationInputTokens;
+  }
   /**
    * Get total elapsed time in seconds since the first call started.
    * @returns Elapsed time in seconds with 1 decimal place
@@ -6104,11 +6430,32 @@ var StreamProgress = class {
       parts.push(import_chalk2.default.dim("\u2193") + import_chalk2.default.green(` ${prefix}${formatTokens(outTokens)}`));
     }
     parts.push(import_chalk2.default.dim(`${elapsed}s`));
-    if (this.totalCost > 0) {
-      parts.push(import_chalk2.default.cyan(`$${formatCost(this.totalCost)}`));
+    const callCost = this.calculateCurrentCallCost(outTokens);
+    if (callCost > 0) {
+      parts.push(import_chalk2.default.cyan(`$${formatCost(callCost)}`));
     }
     this.target.write(`\r${parts.join(import_chalk2.default.dim(" | "))} ${import_chalk2.default.cyan(spinner)}`);
   }
+  /**
+   * Calculates live cost estimate for the current streaming call.
+   * Uses current input/output tokens and cached token counts.
+   */
+  calculateCurrentCallCost(outputTokens) {
+    if (!this.modelRegistry || !this.model) return 0;
+    try {
+      const modelName = this.model.includes(":") ? this.model.split(":")[1] : this.model;
+      const cost = this.modelRegistry.estimateCost(
+        modelName,
+        this.callInputTokens,
+        outputTokens,
+        this.callCachedInputTokens,
+        this.callCacheCreationInputTokens
+      );
+      return cost?.totalCost ?? 0;
+    } catch {
+      return 0;
+    }
+  }
   renderCumulativeMode(spinner) {
     const elapsed = ((Date.now() - this.totalStartTime) / 1e3).toFixed(1);
     const parts = [];
@@ -6257,7 +6604,7 @@ function addCompleteOptions(cmd, defaults) {
     OPTION_DESCRIPTIONS.maxTokens,
     createNumericParser({ label: "Max tokens", integer: true, min: 1 }),
     defaults?.["max-tokens"]
-  );
+  ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet);
 }
 function addAgentOptions(cmd, defaults) {
   const gadgetAccumulator = (value, previous = []) => [
@@ -6286,7 +6633,7 @@ function addAgentOptions(cmd, defaults) {
     OPTION_FLAGS.noBuiltinInteraction,
     OPTION_DESCRIPTIONS.noBuiltinInteraction,
     defaults?.["builtin-interaction"] !== false
-  );
+  ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet);
 }
 function configToCompleteOptions(config) {
   const result = {};
@@ -6294,6 +6641,7 @@ function configToCompleteOptions(config) {
   if (config.system !== void 0) result.system = config.system;
   if (config.temperature !== void 0) result.temperature = config.temperature;
   if (config["max-tokens"] !== void 0) result.maxTokens = config["max-tokens"];
+  if (config.quiet !== void 0) result.quiet = config.quiet;
   return result;
 }
 function configToAgentOptions(config) {
@@ -6311,6 +6659,7 @@ function configToAgentOptions(config) {
     result.gadgetStartPrefix = config["gadget-start-prefix"];
   if (config["gadget-end-prefix"] !== void 0)
     result.gadgetEndPrefix = config["gadget-end-prefix"];
+  if (config.quiet !== void 0) result.quiet = config.quiet;
   return result;
 }
@@ -6356,9 +6705,10 @@ async function executeAgent(promptArg, options, env) {
   const prompt = await resolvePrompt(promptArg, env);
   const client = env.createClient();
   const registry = new GadgetRegistry();
+  const stdinIsInteractive = isInteractive(env.stdin);
   if (options.builtins !== false) {
     for (const gadget of builtinGadgets) {
-      if (options.builtinInteraction === false && gadget.name === "AskUser") {
+      if (gadget.name === "AskUser" && (options.builtinInteraction === false || !stdinIsInteractive)) {
         continue;
       }
       registry.registerByClass(gadget);
@@ -6416,6 +6766,10 @@ async function executeAgent(promptArg, options, env) {
           if (context.usage.outputTokens) {
             progress.setOutputTokens(context.usage.outputTokens, false);
           }
+          progress.setCachedTokens(
+            context.usage.cachedInputTokens ?? 0,
+            context.usage.cacheCreationInputTokens ?? 0
+          );
         }
       },
       // onLLMCallComplete: Finalize metrics after each LLM call
@@ -6434,11 +6788,13 @@ async function executeAgent(promptArg, options, env) {
         let callCost;
         if (context.usage && client.modelRegistry) {
           try {
-            const modelName = options.model.includes(":") ? options.model.split(":")[1] : options.model;
+            const modelName = context.options.model.includes(":") ? context.options.model.split(":")[1] : context.options.model;
             const costResult = client.modelRegistry.estimateCost(
               modelName,
               context.usage.inputTokens,
-              context.usage.outputTokens
+              context.usage.outputTokens,
+              context.usage.cachedInputTokens ?? 0,
+              context.usage.cacheCreationInputTokens ?? 0
             );
             if (costResult) callCost = costResult.totalCost;
           } catch {
@@ -6446,7 +6802,7 @@ async function executeAgent(promptArg, options, env) {
         }
         const callElapsed = progress.getCallElapsedSeconds();
         progress.endCall(context.usage);
-        if (stderrTTY) {
+        if (!options.quiet) {
           const summary = renderSummary({
             iterations: context.iteration + 1,
             model: options.model,
@@ -6553,7 +6909,14 @@ Command rejected by user with message: "${response}"`
       printer.write(event.content);
     } else if (event.type === "gadget_result") {
       progress.pause();
-      if (stderrTTY) {
+      if (options.quiet) {
+        if (event.result.gadgetName === "TellUser" && event.result.parameters?.message) {
+          const message = String(event.result.parameters.message);
+          const rendered = renderMarkdown(message);
+          env.stdout.write(`${rendered}
+`);
+        }
+      } else {
         const tokenCount = await countGadgetOutputTokens(event.result.result);
         env.stderr.write(`${formatGadgetSummary({ ...event.result, tokenCount })}
 `);
@@ -6562,7 +6925,7 @@ Command rejected by user with message: "${response}"`
   }
   progress.complete();
   printer.ensureNewline();
-  if (stderrTTY && iterations > 1) {
+  if (!options.quiet && iterations > 1) {
     env.stderr.write(`${import_chalk3.default.dim("\u2500".repeat(40))}
 `);
     const summary = renderOverallSummary({
@@ -6635,7 +6998,7 @@ async function executeComplete(promptArg, options, env) {
   progress.endCall(usage);
   progress.complete();
   printer.ensureNewline();
-  if (stderrTTY) {
+  if (stderrTTY && !options.quiet) {
     const summary = renderSummary({ finishReason, usage, cost: progress.getTotalCost() });
     if (summary) {
       env.stderr.write(`${summary}
@@ -6656,9 +7019,102 @@ var import_node_fs3 = require("fs");
 var import_node_os = require("os");
 var import_node_path3 = require("path");
 var import_js_toml2 = require("js-toml");
+// src/cli/templates.ts
+var import_eta = require("eta");
+var TemplateError = class extends Error {
+  constructor(message, promptName, configPath) {
+    super(promptName ? `[prompts.${promptName}]: ${message}` : message);
+    this.promptName = promptName;
+    this.configPath = configPath;
+    this.name = "TemplateError";
+  }
+};
+function createTemplateEngine(prompts, configPath) {
+  const eta = new import_eta.Eta({
+    views: "/",
+    // Required but we use named templates
+    autoEscape: false,
+    // Don't escape - these are prompts, not HTML
+    autoTrim: false
+    // Preserve whitespace in prompts
+  });
+  for (const [name, template] of Object.entries(prompts)) {
+    try {
+      eta.loadTemplate(`@${name}`, template);
+    } catch (error) {
+      throw new TemplateError(
+        error instanceof Error ? error.message : String(error),
+        name,
+        configPath
+      );
+    }
+  }
+  return eta;
+}
+function resolveTemplate(eta, template, context = {}, configPath) {
+  try {
+    const fullContext = {
+      ...context,
+      env: process.env
+    };
+    return eta.renderString(template, fullContext);
+  } catch (error) {
+    throw new TemplateError(
+      error instanceof Error ? error.message : String(error),
+      void 0,
+      configPath
+    );
+  }
+}
+function validatePrompts(prompts, configPath) {
+  const eta = createTemplateEngine(prompts, configPath);
+  for (const [name, template] of Object.entries(prompts)) {
+    try {
+      eta.renderString(template, { env: {} });
+    } catch (error) {
+      throw new TemplateError(
+        error instanceof Error ? error.message : String(error),
+        name,
+        configPath
+      );
+    }
+  }
+}
+function validateEnvVars(template, promptName, configPath) {
+  const envVarPattern = /<%=\s*it\.env\.(\w+)\s*%>/g;
+  const matches = template.matchAll(envVarPattern);
+  for (const match of matches) {
+    const varName = match[1];
+    if (process.env[varName] === void 0) {
+      throw new TemplateError(
+        `Environment variable '${varName}' is not set`,
+        promptName,
+        configPath
+      );
+    }
+  }
+}
+function hasTemplateSyntax(str) {
+  return str.includes("<%");
+}
+// src/cli/config.ts
 var GLOBAL_CONFIG_KEYS = /* @__PURE__ */ new Set(["log-level", "log-file", "log-reset"]);
 var VALID_LOG_LEVELS = ["silly", "trace", "debug", "info", "warn", "error", "fatal"];
-var COMPLETE_CONFIG_KEYS = /* @__PURE__ */ new Set(["model", "system", "temperature", "max-tokens"]);
+var COMPLETE_CONFIG_KEYS = /* @__PURE__ */ new Set([
+  "model",
+  "system",
+  "temperature",
+  "max-tokens",
+  "quiet",
+  "inherits",
+  "log-level",
+  "log-file",
+  "log-reset",
+  "type"
+  // Allowed for inheritance compatibility, ignored for built-in commands
+]);
 var AGENT_CONFIG_KEYS = /* @__PURE__ */ new Set([
   "model",
   "system",
@@ -6669,16 +7125,20 @@ var AGENT_CONFIG_KEYS = /* @__PURE__ */ new Set([
   "builtins",
   "builtin-interaction",
   "gadget-start-prefix",
-  "gadget-end-prefix"
+  "gadget-end-prefix",
+  "quiet",
+  "inherits",
+  "log-level",
+  "log-file",
+  "log-reset",
+  "type"
+  // Allowed for inheritance compatibility, ignored for built-in commands
 ]);
 var CUSTOM_CONFIG_KEYS = /* @__PURE__ */ new Set([
   ...COMPLETE_CONFIG_KEYS,
   ...AGENT_CONFIG_KEYS,
   "type",
-  "description",
-  "log-level",
-  "log-file",
-  "log-reset"
+  "description"
 ]);
 var VALID_PARAMETER_FORMATS = ["json", "yaml", "toml", "auto"];
 function getConfigPath() {
@@ -6729,6 +7189,39 @@ function validateStringArray(value, key, section) {
   }
   return value;
 }
+function validateInherits(value, section) {
+  if (typeof value === "string") {
+    return value;
+  }
+  if (Array.isArray(value)) {
+    for (let i = 0; i < value.length; i++) {
+      if (typeof value[i] !== "string") {
+        throw new ConfigError(`[${section}].inherits[${i}] must be a string`);
+      }
+    }
+    return value;
+  }
+  throw new ConfigError(`[${section}].inherits must be a string or array of strings`);
+}
+function validateLoggingConfig(raw, section) {
+  const result = {};
+  if ("log-level" in raw) {
+    const level = validateString(raw["log-level"], "log-level", section);
+    if (!VALID_LOG_LEVELS.includes(level)) {
+      throw new ConfigError(
+        `[${section}].log-level must be one of: ${VALID_LOG_LEVELS.join(", ")}`
+      );
+    }
+    result["log-level"] = level;
+  }
+  if ("log-file" in raw) {
+    result["log-file"] = validateString(raw["log-file"], "log-file", section);
+  }
+  if ("log-reset" in raw) {
+    result["log-reset"] = validateBoolean(raw["log-reset"], "log-reset", section);
+  }
+  return result;
+}
 function validateBaseConfig(raw, section) {
   const result = {};
   if ("model" in raw) {
@@ -6743,6 +7236,9 @@ function validateBaseConfig(raw, section) {
       max: 2
     });
   }
+  if ("inherits" in raw) {
+    result.inherits = validateInherits(raw.inherits, section);
+  }
   return result;
 }
 function validateGlobalConfig(raw, section) {
@@ -6755,23 +7251,7 @@ function validateGlobalConfig(raw, section) {
       throw new ConfigError(`[${section}].${key} is not a valid option`);
     }
   }
-  const result = {};
-  if ("log-level" in rawObj) {
-    const level = validateString(rawObj["log-level"], "log-level", section);
-    if (!VALID_LOG_LEVELS.includes(level)) {
-      throw new ConfigError(
-        `[${section}].log-level must be one of: ${VALID_LOG_LEVELS.join(", ")}`
-      );
-    }
-    result["log-level"] = level;
-  }
-  if ("log-file" in rawObj) {
-    result["log-file"] = validateString(rawObj["log-file"], "log-file", section);
-  }
-  if ("log-reset" in rawObj) {
-    result["log-reset"] = validateBoolean(rawObj["log-reset"], "log-reset", section);
-  }
-  return result;
+  return validateLoggingConfig(rawObj, section);
 }
 function validateCompleteConfig(raw, section) {
   if (typeof raw !== "object" || raw === null) {
@@ -6783,13 +7263,19 @@ function validateCompleteConfig(raw, section) {
       throw new ConfigError(`[${section}].${key} is not a valid option`);
     }
   }
-  const result = { ...validateBaseConfig(rawObj, section) };
+  const result = {
+    ...validateBaseConfig(rawObj, section),
+    ...validateLoggingConfig(rawObj, section)
+  };
   if ("max-tokens" in rawObj) {
     result["max-tokens"] = validateNumber(rawObj["max-tokens"], "max-tokens", section, {
       integer: true,
       min: 1
     });
   }
+  if ("quiet" in rawObj) {
+    result.quiet = validateBoolean(rawObj.quiet, "quiet", section);
+  }
   return result;
 }
 function validateAgentConfig(raw, section) {
@@ -6802,7 +7288,10 @@ function validateAgentConfig(raw, section) {
       throw new ConfigError(`[${section}].${key} is not a valid option`);
     }
   }
-  const result = { ...validateBaseConfig(rawObj, section) };
+  const result = {
+    ...validateBaseConfig(rawObj, section),
+    ...validateLoggingConfig(rawObj, section)
+  };
   if ("max-iterations" in rawObj) {
     result["max-iterations"] = validateNumber(rawObj["max-iterations"], "max-iterations", section, {
       integer: true,
@@ -6845,6 +7334,9 @@ function validateAgentConfig(raw, section) {
       section
     );
   }
+  if ("quiet" in rawObj) {
+    result.quiet = validateBoolean(rawObj.quiet, "quiet", section);
+  }
   return result;
 }
 function validateCustomConfig(raw, section) {
@@ -6920,20 +7412,22 @@ function validateCustomConfig(raw, section) {
       min: 1
     });
   }
-  if ("log-level" in rawObj) {
-    const level = validateString(rawObj["log-level"], "log-level", section);
-    if (!VALID_LOG_LEVELS.includes(level)) {
-      throw new ConfigError(
-        `[${section}].log-level must be one of: ${VALID_LOG_LEVELS.join(", ")}`
-      );
-    }
-    result["log-level"] = level;
+  if ("quiet" in rawObj) {
+    result.quiet = validateBoolean(rawObj.quiet, "quiet", section);
   }
-  if ("log-file" in rawObj) {
-    result["log-file"] = validateString(rawObj["log-file"], "log-file", section);
+  Object.assign(result, validateLoggingConfig(rawObj, section));
+  return result;
+}
+function validatePromptsConfig(raw, section) {
+  if (typeof raw !== "object" || raw === null) {
+    throw new ConfigError(`[${section}] must be a table`);
   }
-  if ("log-reset" in rawObj) {
-    result["log-reset"] = validateBoolean(rawObj["log-reset"], "log-reset", section);
+  const result = {};
+  for (const [key, value] of Object.entries(raw)) {
+    if (typeof value !== "string") {
+      throw new ConfigError(`[${section}].${key} must be a string`);
+    }
+    result[key] = value;
   }
   return result;
 }
@@ -6951,6 +7445,8 @@ function validateConfig(raw, configPath) {
         result.complete = validateCompleteConfig(value, key);
       } else if (key === "agent") {
         result.agent = validateAgentConfig(value, key);
+      } else if (key === "prompts") {
+        result.prompts = validatePromptsConfig(value, key);
       } else {
         result[key] = validateCustomConfig(value, key);
       }
@@ -6986,12 +7482,119 @@ function loadConfig() {
       configPath
     );
   }
-  return validateConfig(raw, configPath);
+  const validated = validateConfig(raw, configPath);
+  const inherited = resolveInheritance(validated, configPath);
+  return resolveTemplatesInConfig(inherited, configPath);
 }
 function getCustomCommandNames(config) {
-  const reserved = /* @__PURE__ */ new Set(["global", "complete", "agent"]);
+  const reserved = /* @__PURE__ */ new Set(["global", "complete", "agent", "prompts"]);
   return Object.keys(config).filter((key) => !reserved.has(key));
 }
+function resolveTemplatesInConfig(config, configPath) {
+  const prompts = config.prompts ?? {};
+  const hasPrompts = Object.keys(prompts).length > 0;
+  let hasTemplates = false;
+  for (const [sectionName, section] of Object.entries(config)) {
+    if (sectionName === "global" || sectionName === "prompts") continue;
+    if (!section || typeof section !== "object") continue;
+    const sectionObj = section;
+    if (typeof sectionObj.system === "string" && hasTemplateSyntax(sectionObj.system)) {
+      hasTemplates = true;
+      break;
+    }
+  }
+  for (const template of Object.values(prompts)) {
+    if (hasTemplateSyntax(template)) {
+      hasTemplates = true;
+      break;
+    }
+  }
+  if (!hasPrompts && !hasTemplates) {
+    return config;
+  }
+  try {
+    validatePrompts(prompts, configPath);
+  } catch (error) {
+    if (error instanceof TemplateError) {
+      throw new ConfigError(error.message, configPath);
+    }
+    throw error;
+  }
+  for (const [name, template] of Object.entries(prompts)) {
+    try {
+      validateEnvVars(template, name, configPath);
+    } catch (error) {
+      if (error instanceof TemplateError) {
+        throw new ConfigError(error.message, configPath);
+      }
+      throw error;
+    }
+  }
+  const eta = createTemplateEngine(prompts, configPath);
+  const result = { ...config };
+  for (const [sectionName, section] of Object.entries(config)) {
+    if (sectionName === "global" || sectionName === "prompts") continue;
+    if (!section || typeof section !== "object") continue;
+    const sectionObj = section;
+    if (typeof sectionObj.system === "string" && hasTemplateSyntax(sectionObj.system)) {
+      try {
+        validateEnvVars(sectionObj.system, void 0, configPath);
+      } catch (error) {
+        if (error instanceof TemplateError) {
+          throw new ConfigError(`[${sectionName}].system: ${error.message}`, configPath);
+        }
+        throw error;
+      }
+      try {
+        const resolved = resolveTemplate(eta, sectionObj.system, {}, configPath);
+        result[sectionName] = {
+          ...sectionObj,
+          system: resolved
+        };
+      } catch (error) {
+        if (error instanceof TemplateError) {
+          throw new ConfigError(`[${sectionName}].system: ${error.message}`, configPath);
+        }
+        throw error;
+      }
+    }
+  }
+  return result;
+}
+function resolveInheritance(config, configPath) {
+  const resolved = {};
+  const resolving = /* @__PURE__ */ new Set();
+  function resolveSection(name) {
+    if (name in resolved) {
+      return resolved[name];
+    }
+    if (resolving.has(name)) {
+      throw new ConfigError(`Circular inheritance detected: ${name}`, configPath);
+    }
+    const section = config[name];
+    if (section === void 0 || typeof section !== "object") {
+      throw new ConfigError(`Cannot inherit from unknown section: ${name}`, configPath);
+    }
+    resolving.add(name);
+    const sectionObj = section;
+    const inheritsRaw = sectionObj.inherits;
+    const inheritsList = inheritsRaw ? Array.isArray(inheritsRaw) ? inheritsRaw : [inheritsRaw] : [];
+    let merged = {};
+    for (const parent of inheritsList) {
+      const parentResolved = resolveSection(parent);
+      merged = { ...merged, ...parentResolved };
+    }
+    const { inherits: _inherits, ...ownValues } = sectionObj;
+    merged = { ...merged, ...ownValues };
+    resolving.delete(name);
+    resolved[name] = merged;
+    return merged;
+  }
+  for (const name of Object.keys(config)) {
+    resolveSection(name);
+  }
+  return resolved;
+}
 // src/cli/models-command.ts
 var import_chalk4 = __toESM(require("chalk"), 1);