npm - llmist - Versions diffs - 0.6.2 → 0.8.0 - Mend

llmist 0.6.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/dist/{chunk-TSR25DAY.js → chunk-4IMGADVY.js} +2 -2
package/dist/{chunk-DVK6ZQOV.js → chunk-62M4TDAK.js} +501 -78
package/dist/chunk-62M4TDAK.js.map +1 -0
package/dist/cli.cjs +946 -197
package/dist/cli.cjs.map +1 -1
package/dist/cli.js +436 -110
package/dist/cli.js.map +1 -1
package/dist/index.cjs +511 -88
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +6 -9
package/dist/index.d.ts +6 -9
package/dist/index.js +2 -2
package/dist/{mock-stream-B5R6XPif.d.cts → mock-stream-CjmvWDc3.d.cts} +91 -20
package/dist/{mock-stream-B5R6XPif.d.ts → mock-stream-CjmvWDc3.d.ts} +91 -20
package/dist/testing/index.cjs +497 -74
package/dist/testing/index.cjs.map +1 -1
package/dist/testing/index.d.cts +2 -2
package/dist/testing/index.d.ts +2 -2
package/dist/testing/index.js +2 -2
package/package.json +2 -1
package/dist/chunk-DVK6ZQOV.js.map +0 -1
/package/dist/{chunk-TSR25DAY.js.map → chunk-4IMGADVY.js.map} +0 -0

package/dist/testing/index.cjs CHANGED Viewed

@@ -864,7 +864,7 @@ function findSafeDelimiter(content) {
   }
   let counter = 1;
   while (counter < 1e3) {
-    const delimiter = `HEREDOC_${counter}`;
+    const delimiter = `__GADGET_PARAM_${counter}__`;
     const regex = new RegExp(`^${delimiter}\\s*$`);
     const isUsed = lines.some((line) => regex.test(line));
     if (!isUsed) {
@@ -922,6 +922,10 @@ function formatParamsAsYaml(params) {
   }
   return lines.join("\n");
 }
+function formatTomlInlineTable(obj) {
+  const entries = Object.entries(obj).map(([k, v]) => `${k} = ${formatTomlValue(v)}`);
+  return `{ ${entries.join(", ")} }`;
+}
 function formatTomlValue(value) {
   if (typeof value === "string") {
     if (value.includes("\n")) {
@@ -939,10 +943,17 @@ ${delimiter}`;
     return '""';
   }
   if (Array.isArray(value)) {
-    return JSON.stringify(value);
+    if (value.length === 0) return "[]";
+    const items = value.map((item) => {
+      if (typeof item === "object" && item !== null && !Array.isArray(item)) {
+        return formatTomlInlineTable(item);
+      }
+      return formatTomlValue(item);
+    });
+    return `[${items.join(", ")}]`;
   }
   if (typeof value === "object") {
-    return JSON.stringify(value);
+    return formatTomlInlineTable(value);
   }
   return JSON.stringify(value);
 }
@@ -960,7 +971,16 @@ var init_gadget = __esm({
     yaml = __toESM(require("js-yaml"), 1);
     init_schema_to_json();
     init_schema_validator();
-    HEREDOC_DELIMITERS = ["EOF", "END", "DOC", "CONTENT", "TEXT", "HEREDOC", "DATA", "BLOCK"];
+    HEREDOC_DELIMITERS = [
+      "__GADGET_PARAM_EOF__",
+      "__GADGET_PARAM_END__",
+      "__GADGET_PARAM_DOC__",
+      "__GADGET_PARAM_CONTENT__",
+      "__GADGET_PARAM_TEXT__",
+      "__GADGET_PARAM_HEREDOC__",
+      "__GADGET_PARAM_DATA__",
+      "__GADGET_PARAM_BLOCK__"
+    ];
     BaseGadget = class {
       /**
        * The name of the gadget. Used for identification when LLM calls it.
@@ -1958,6 +1978,14 @@ function preprocessTomlHeredoc(tomlStr) {
   }
   return result.join("\n");
 }
+function stripMarkdownFences(content) {
+  let cleaned = content.trim();
+  const openingFence = /^```(?:toml|yaml|json)?\s*\n/i;
+  const closingFence = /\n?```\s*$/;
+  cleaned = cleaned.replace(openingFence, "");
+  cleaned = cleaned.replace(closingFence, "");
+  return cleaned.trim();
+}
 var yaml2, import_js_toml, globalInvocationCounter, StreamParser;
 var init_parser = __esm({
   "src/gadgets/parser.ts"() {
@@ -2013,35 +2041,36 @@ var init_parser = __esm({
        * Parse parameter string according to configured format
        */
       parseParameters(raw) {
+        const cleaned = stripMarkdownFences(raw);
         if (this.parameterFormat === "json") {
           try {
-            return { parameters: JSON.parse(raw) };
+            return { parameters: JSON.parse(cleaned) };
           } catch (error) {
             return { parseError: this.truncateParseError(error, "JSON") };
           }
         }
         if (this.parameterFormat === "yaml") {
           try {
-            return { parameters: yaml2.load(preprocessYaml(raw)) };
+            return { parameters: yaml2.load(preprocessYaml(cleaned)) };
           } catch (error) {
             return { parseError: this.truncateParseError(error, "YAML") };
           }
         }
         if (this.parameterFormat === "toml") {
           try {
-            return { parameters: (0, import_js_toml.load)(preprocessTomlHeredoc(raw)) };
+            return { parameters: (0, import_js_toml.load)(preprocessTomlHeredoc(cleaned)) };
           } catch (error) {
             return { parseError: this.truncateParseError(error, "TOML") };
           }
         }
         try {
-          return { parameters: JSON.parse(raw) };
+          return { parameters: JSON.parse(cleaned) };
         } catch {
           try {
-            return { parameters: (0, import_js_toml.load)(preprocessTomlHeredoc(raw)) };
+            return { parameters: (0, import_js_toml.load)(preprocessTomlHeredoc(cleaned)) };
           } catch {
             try {
-              return { parameters: yaml2.load(preprocessYaml(raw)) };
+              return { parameters: yaml2.load(preprocessYaml(cleaned)) };
             } catch (error) {
               return { parseError: this.truncateParseError(error, "auto") };
             }
@@ -2587,6 +2616,7 @@ var init_agent = __esm({
       gadgetEndPrefix;
       onHumanInputRequired;
       textOnlyHandler;
+      textWithGadgetsHandler;
       stopOnGadgetError;
       shouldContinueAfterError;
       defaultGadgetTimeoutMs;
@@ -2617,6 +2647,7 @@ var init_agent = __esm({
         this.gadgetEndPrefix = options.gadgetEndPrefix;
         this.onHumanInputRequired = options.onHumanInputRequired;
         this.textOnlyHandler = options.textOnlyHandler ?? "terminate";
+        this.textWithGadgetsHandler = options.textWithGadgetsHandler;
         this.stopOnGadgetError = options.stopOnGadgetError ?? true;
         this.shouldContinueAfterError = options.shouldContinueAfterError;
         this.defaultGadgetTimeoutMs = options.defaultGadgetTimeoutMs;
@@ -2804,6 +2835,17 @@ var init_agent = __esm({
               }
             }
             if (result.didExecuteGadgets) {
+              if (this.textWithGadgetsHandler) {
+                const textContent = result.outputs.filter((output) => output.type === "text").map((output) => output.content).join("");
+                if (textContent.trim()) {
+                  const { gadgetName, parameterMapping, resultMapping } = this.textWithGadgetsHandler;
+                  this.conversation.addGadgetCall(
+                    gadgetName,
+                    parameterMapping(textContent),
+                    resultMapping ? resultMapping(textContent) : textContent
+                  );
+                }
+              }
               for (const output of result.outputs) {
                 if (output.type === "gadget_result") {
                   const gadgetResult = output.result;
@@ -2815,7 +2857,13 @@ var init_agent = __esm({
                 }
               }
             } else {
-              this.conversation.addAssistantMessage(finalMessage);
+              if (finalMessage.trim()) {
+                this.conversation.addGadgetCall(
+                  "TellUser",
+                  { message: finalMessage, done: false, type: "info" },
+                  `\u2139\uFE0F  ${finalMessage}`
+                );
+              }
               const shouldBreak = await this.handleTextOnlyResponse(finalMessage);
               if (shouldBreak) {
                 break;
@@ -3000,6 +3048,7 @@ var AgentBuilder;
 var init_builder = __esm({
   "src/agent/builder.ts"() {
     "use strict";
+    init_constants();
     init_model_shortcuts();
     init_registry();
     init_agent();
@@ -3021,6 +3070,7 @@ var init_builder = __esm({
       gadgetStartPrefix;
       gadgetEndPrefix;
       textOnlyHandler;
+      textWithGadgetsHandler;
       stopOnGadgetError;
       shouldContinueAfterError;
       defaultGadgetTimeoutMs;
@@ -3283,6 +3333,30 @@ var init_builder = __esm({
         this.textOnlyHandler = handler;
         return this;
       }
+      /**
+       * Set the handler for text content that appears alongside gadget calls.
+       *
+       * When set, text accompanying gadget responses will be wrapped as a
+       * synthetic gadget call before the actual gadget results in the
+       * conversation history.
+       *
+       * @param handler - Configuration for wrapping text
+       * @returns This builder for chaining
+       *
+       * @example
+       * ```typescript
+       * // Wrap text as TellUser gadget
+       * .withTextWithGadgetsHandler({
+       *   gadgetName: "TellUser",
+       *   parameterMapping: (text) => ({ message: text, done: false, type: "info" }),
+       *   resultMapping: (text) => `ℹ️  ${text}`,
+       * })
+       * ```
+       */
+      withTextWithGadgetsHandler(handler) {
+        this.textWithGadgetsHandler = handler;
+        return this;
+      }
       /**
        * Set whether to stop gadget execution on first error.
        *
@@ -3397,6 +3471,69 @@ var init_builder = __esm({
         this.gadgetOutputLimitPercent = percent;
         return this;
       }
+      /**
+       * Add a synthetic gadget call to the conversation history.
+       *
+       * This is useful for in-context learning - showing the LLM what "past self"
+       * did correctly so it mimics the pattern. The call is formatted with proper
+       * markers and parameter format.
+       *
+       * @param gadgetName - Name of the gadget
+       * @param parameters - Parameters passed to the gadget
+       * @param result - Result returned by the gadget
+       * @returns This builder for chaining
+       *
+       * @example
+       * ```typescript
+       * .withSyntheticGadgetCall(
+       *   'TellUser',
+       *   {
+       *     message: '👋 Hello!\n\nHere\'s what I can do:\n- Analyze code\n- Run commands',
+       *     done: false,
+       *     type: 'info'
+       *   },
+       *   'ℹ️  👋 Hello!\n\nHere\'s what I can do:\n- Analyze code\n- Run commands'
+       * )
+       * ```
+       */
+      withSyntheticGadgetCall(gadgetName, parameters, result) {
+        const startPrefix = this.gadgetStartPrefix ?? GADGET_START_PREFIX;
+        const endPrefix = this.gadgetEndPrefix ?? GADGET_END_PREFIX;
+        const format = this.parameterFormat ?? "yaml";
+        const paramStr = this.formatSyntheticParameters(parameters, format);
+        this.initialMessages.push({
+          role: "assistant",
+          content: `${startPrefix}${gadgetName}
+${paramStr}
+${endPrefix}`
+        });
+        this.initialMessages.push({
+          role: "user",
+          content: `Result: ${result}`
+        });
+        return this;
+      }
+      /**
+       * Format parameters for synthetic gadget calls.
+       * Uses heredoc for multiline string values.
+       */
+      formatSyntheticParameters(parameters, format) {
+        if (format === "json" || format === "auto") {
+          return JSON.stringify(parameters);
+        }
+        return Object.entries(parameters).map(([key, value]) => {
+          if (typeof value === "string" && value.includes("\n")) {
+            const separator = format === "yaml" ? ":" : " =";
+            return `${key}${separator} <<<EOF
+${value}
+EOF`;
+          }
+          if (format === "yaml") {
+            return typeof value === "string" ? `${key}: ${value}` : `${key}: ${JSON.stringify(value)}`;
+          }
+          return `${key} = ${JSON.stringify(value)}`;
+        }).join("\n");
+      }
       /**
        * Build and create the agent with the given user prompt.
        * Returns the Agent instance ready to run.
@@ -3439,6 +3576,7 @@ var init_builder = __esm({
           gadgetStartPrefix: this.gadgetStartPrefix,
           gadgetEndPrefix: this.gadgetEndPrefix,
           textOnlyHandler: this.textOnlyHandler,
+          textWithGadgetsHandler: this.textWithGadgetsHandler,
           stopOnGadgetError: this.stopOnGadgetError,
           shouldContinueAfterError: this.shouldContinueAfterError,
           defaultGadgetTimeoutMs: this.defaultGadgetTimeoutMs,
@@ -3540,6 +3678,7 @@ var init_builder = __esm({
           gadgetStartPrefix: this.gadgetStartPrefix,
           gadgetEndPrefix: this.gadgetEndPrefix,
           textOnlyHandler: this.textOnlyHandler,
+          textWithGadgetsHandler: this.textWithGadgetsHandler,
           stopOnGadgetError: this.stopOnGadgetError,
           shouldContinueAfterError: this.shouldContinueAfterError,
           defaultGadgetTimeoutMs: this.defaultGadgetTimeoutMs,
@@ -3567,7 +3706,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 3,
           output: 15,
-          cachedInput: 0.3
+          cachedInput: 0.3,
+          cacheWriteInput: 3.75
         },
         knowledgeCutoff: "2025-01",
         features: {
@@ -3591,7 +3731,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 1,
           output: 5,
-          cachedInput: 0.1
+          cachedInput: 0.1,
+          cacheWriteInput: 1.25
         },
         knowledgeCutoff: "2025-02",
         features: {
@@ -3615,7 +3756,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 3,
           output: 15,
-          cachedInput: 0.3
+          cachedInput: 0.3,
+          cacheWriteInput: 3.75
         },
         knowledgeCutoff: "2025-03",
         features: {
@@ -3639,7 +3781,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 3,
           output: 15,
-          cachedInput: 0.3
+          cachedInput: 0.3,
+          cacheWriteInput: 3.75
         },
         knowledgeCutoff: "2024-11",
         features: {
@@ -3663,7 +3806,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 15,
           output: 75,
-          cachedInput: 1.5
+          cachedInput: 1.5,
+          cacheWriteInput: 18.75
         },
         knowledgeCutoff: "2025-01",
         features: {
@@ -3687,7 +3831,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 15,
           output: 75,
-          cachedInput: 1.5
+          cachedInput: 1.5,
+          cacheWriteInput: 18.75
         },
         knowledgeCutoff: "2025-03",
         features: {
@@ -3710,7 +3855,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 0.8,
           output: 4,
-          cachedInput: 0.08
+          cachedInput: 0.08,
+          cacheWriteInput: 1
         },
         knowledgeCutoff: "2024-07",
         features: {
@@ -3733,7 +3879,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 0.25,
           output: 1.25,
-          cachedInput: 0.025
+          cachedInput: 0.025,
+          cacheWriteInput: 0.3125
         },
         knowledgeCutoff: "2023-08",
         features: {
@@ -3757,7 +3904,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 1,
           output: 5,
-          cachedInput: 0.1
+          cachedInput: 0.1,
+          cacheWriteInput: 1.25
         },
         knowledgeCutoff: "2025-02",
         features: {
@@ -3781,7 +3929,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 3,
           output: 15,
-          cachedInput: 0.3
+          cachedInput: 0.3,
+          cacheWriteInput: 3.75
         },
         knowledgeCutoff: "2025-01",
         features: {
@@ -3805,7 +3954,8 @@ var init_anthropic_models = __esm({
         pricing: {
           input: 5,
           output: 25,
-          cachedInput: 0.5
+          cachedInput: 0.5,
+          cacheWriteInput: 6.25
         },
         knowledgeCutoff: "2025-03",
         features: {
@@ -3920,15 +4070,27 @@ var init_anthropic = __esm({
       }
       buildRequestPayload(options, descriptor, spec, messages) {
         const systemMessages = messages.filter((message) => message.role === "system");
-        const system = systemMessages.length > 0 ? systemMessages.map((m) => m.content).join("\n\n") : void 0;
-        const conversation = messages.filter(
+        const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
+          type: "text",
+          text: m.content,
+          // Add cache_control to the LAST system message block
+          ...index === systemMessages.length - 1 ? { cache_control: { type: "ephemeral" } } : {}
+        })) : void 0;
+        const nonSystemMessages = messages.filter(
           (message) => message.role !== "system"
-        ).map((message) => ({
+        );
+        const lastUserIndex = nonSystemMessages.reduce(
+          (lastIdx, msg, idx) => msg.role === "user" ? idx : lastIdx,
+          -1
+        );
+        const conversation = nonSystemMessages.map((message, index) => ({
           role: message.role,
           content: [
             {
               type: "text",
-              text: message.content
+              text: message.content,
+              // Add cache_control to the LAST user message
+              ...message.role === "user" && index === lastUserIndex ? { cache_control: { type: "ephemeral" } } : {}
             }
           ]
         }));
@@ -3954,15 +4116,22 @@ var init_anthropic = __esm({
       async *wrapStream(iterable) {
         const stream2 = iterable;
         let inputTokens = 0;
+        let cachedInputTokens = 0;
+        let cacheCreationInputTokens = 0;
         for await (const event of stream2) {
           if (event.type === "message_start") {
-            inputTokens = event.message.usage.input_tokens;
+            const usage = event.message.usage;
+            cachedInputTokens = usage.cache_read_input_tokens ?? 0;
+            cacheCreationInputTokens = usage.cache_creation_input_tokens ?? 0;
+            inputTokens = usage.input_tokens + cachedInputTokens + cacheCreationInputTokens;
             yield {
               text: "",
               usage: {
                 inputTokens,
                 outputTokens: 0,
-                totalTokens: inputTokens
+                totalTokens: inputTokens,
+                cachedInputTokens,
+                cacheCreationInputTokens
               },
               rawEvent: event
             };
@@ -3976,7 +4145,9 @@ var init_anthropic = __esm({
             const usage = event.usage ? {
               inputTokens,
               outputTokens: event.usage.output_tokens,
-              totalTokens: inputTokens + event.usage.output_tokens
+              totalTokens: inputTokens + event.usage.output_tokens,
+              cachedInputTokens,
+              cacheCreationInputTokens
             } : void 0;
             if (event.delta.stop_reason || usage) {
               yield {
@@ -4057,6 +4228,7 @@ var init_gemini_models = __esm({
   "src/providers/gemini-models.ts"() {
     "use strict";
     GEMINI_MODELS = [
+      // Gemini 3 Pro (Preview)
       {
         provider: "gemini",
         modelId: "gemini-3-pro-preview",
@@ -4065,8 +4237,11 @@ var init_gemini_models = __esm({
         maxOutputTokens: 65536,
         pricing: {
           input: 2,
+          // $2.00 for prompts <= 200k, $4.00 for > 200k (using lower tier)
           output: 12,
+          // $12.00 for prompts <= 200k, $18.00 for > 200k
           cachedInput: 0.2
+          // $0.20 for prompts <= 200k
         },
         knowledgeCutoff: "2025-01",
         features: {
@@ -4079,9 +4254,10 @@ var init_gemini_models = __esm({
         metadata: {
           family: "Gemini 3",
           releaseDate: "2025-11-18",
-          notes: "Most advanced model. 1501 Elo LMArena, 91.9% GPQA Diamond, 76.2% SWE-bench. Deep Think mode available."
+          notes: "Best model for multimodal understanding, agentic and vibe-coding. Deep Think mode available."
         }
       },
+      // Gemini 2.5 Pro
       {
         provider: "gemini",
         modelId: "gemini-2.5-pro",
@@ -4090,8 +4266,11 @@ var init_gemini_models = __esm({
         maxOutputTokens: 65536,
         pricing: {
           input: 1.25,
+          // $1.25 for prompts <= 200k, $2.50 for > 200k
           output: 10,
+          // $10.00 for prompts <= 200k, $15.00 for > 200k
           cachedInput: 0.125
+          // $0.125 for prompts <= 200k
         },
         knowledgeCutoff: "2025-01",
         features: {
@@ -4104,9 +4283,10 @@ var init_gemini_models = __esm({
         metadata: {
           family: "Gemini 2.5",
           releaseDate: "2025-06",
-          notes: "Balanced multimodal model with 1M context. Best for complex agents and reasoning."
+          notes: "State-of-the-art multipurpose model. Excels at coding and complex reasoning."
         }
       },
+      // Gemini 2.5 Flash
       {
         provider: "gemini",
         modelId: "gemini-2.5-flash",
@@ -4115,8 +4295,10 @@ var init_gemini_models = __esm({
         maxOutputTokens: 65536,
         pricing: {
           input: 0.3,
+          // $0.30 for text/image/video, $1.00 for audio
           output: 2.5,
           cachedInput: 0.03
+          // $0.03 for text/image/video
         },
         knowledgeCutoff: "2025-01",
         features: {
@@ -4129,9 +4311,10 @@ var init_gemini_models = __esm({
         metadata: {
           family: "Gemini 2.5",
           releaseDate: "2025-06",
-          notes: "Best price-performance ratio with thinking enabled by default"
+          notes: "First hybrid reasoning model with 1M context and thinking budgets."
         }
       },
+      // Gemini 2.5 Flash-Lite
       {
         provider: "gemini",
         modelId: "gemini-2.5-flash-lite",
@@ -4140,8 +4323,10 @@ var init_gemini_models = __esm({
         maxOutputTokens: 65536,
         pricing: {
           input: 0.1,
+          // $0.10 for text/image/video, $0.30 for audio
           output: 0.4,
           cachedInput: 0.01
+          // $0.01 for text/image/video
         },
         knowledgeCutoff: "2025-01",
         features: {
@@ -4153,9 +4338,10 @@ var init_gemini_models = __esm({
         metadata: {
           family: "Gemini 2.5",
           releaseDate: "2025-06",
-          notes: "Fastest and most cost-efficient model for high-volume, low-latency tasks"
+          notes: "Smallest and most cost effective model, built for at scale usage."
         }
       },
+      // Gemini 2.0 Flash
       {
         provider: "gemini",
         modelId: "gemini-2.0-flash",
@@ -4164,8 +4350,10 @@ var init_gemini_models = __esm({
         maxOutputTokens: 8192,
         pricing: {
           input: 0.1,
+          // $0.10 for text/image/video, $0.70 for audio
           output: 0.4,
-          cachedInput: 0.01
+          cachedInput: 0.025
+          // $0.025 for text/image/video
         },
         knowledgeCutoff: "2024-08",
         features: {
@@ -4176,9 +4364,10 @@ var init_gemini_models = __esm({
         },
         metadata: {
           family: "Gemini 2.0",
-          notes: "Previous generation with 1M context and multimodal capabilities"
+          notes: "Balanced multimodal model with 1M context, built for the era of Agents."
         }
       },
+      // Gemini 2.0 Flash-Lite
       {
         provider: "gemini",
         modelId: "gemini-2.0-flash-lite",
@@ -4187,8 +4376,8 @@ var init_gemini_models = __esm({
         maxOutputTokens: 8192,
         pricing: {
           input: 0.075,
-          output: 0.3,
-          cachedInput: 75e-4
+          output: 0.3
+          // No context caching available for 2.0-flash-lite
         },
         knowledgeCutoff: "2024-08",
         features: {
@@ -4199,7 +4388,7 @@ var init_gemini_models = __esm({
         },
         metadata: {
           family: "Gemini 2.0",
-          notes: "Lightweight previous generation model for cost-sensitive applications"
+          notes: "Smallest and most cost effective 2.0 model for at scale usage."
         }
       }
     ];
@@ -4369,7 +4558,9 @@ var init_gemini = __esm({
         return {
           inputTokens: usageMetadata.promptTokenCount ?? 0,
           outputTokens: usageMetadata.candidatesTokenCount ?? 0,
-          totalTokens: usageMetadata.totalTokenCount ?? 0
+          totalTokens: usageMetadata.totalTokenCount ?? 0,
+          // Gemini returns cached token count in cachedContentTokenCount
+          cachedInputTokens: usageMetadata.cachedContentTokenCount ?? 0
         };
       }
       /**
@@ -4425,10 +4616,11 @@ var init_openai_models = __esm({
   "src/providers/openai-models.ts"() {
     "use strict";
     OPENAI_MODELS = [
+      // GPT-5 Family
       {
         provider: "openai",
         modelId: "gpt-5.1",
-        displayName: "GPT-5.1 Instant",
+        displayName: "GPT-5.1",
         contextWindow: 128e3,
         maxOutputTokens: 32768,
         pricing: {
@@ -4448,34 +4640,7 @@ var init_openai_models = __esm({
         metadata: {
           family: "GPT-5",
           releaseDate: "2025-11-12",
-          notes: "Warmer, more intelligent, better instruction following. 2-3x faster than GPT-5.",
-          supportsTemperature: false
-        }
-      },
-      {
-        provider: "openai",
-        modelId: "gpt-5.1-thinking",
-        displayName: "GPT-5.1 Thinking",
-        contextWindow: 196e3,
-        maxOutputTokens: 32768,
-        pricing: {
-          input: 1.25,
-          output: 10,
-          cachedInput: 0.125
-        },
-        knowledgeCutoff: "2024-09-30",
-        features: {
-          streaming: true,
-          functionCalling: true,
-          vision: true,
-          reasoning: true,
-          structuredOutputs: true,
-          fineTuning: true
-        },
-        metadata: {
-          family: "GPT-5",
-          releaseDate: "2025-11-12",
-          notes: "Advanced reasoning with thinking levels: Light, Standard, Extended, Heavy. Best for complex tasks.",
+          notes: "Latest GPT-5 with improved instruction following. 2-3x faster than GPT-5.",
           supportsTemperature: false
         }
       },
@@ -4555,6 +4720,255 @@ var init_openai_models = __esm({
           notes: "Fastest, most cost-efficient version for well-defined tasks",
           supportsTemperature: false
         }
+      },
+      {
+        provider: "openai",
+        modelId: "gpt-5-pro",
+        displayName: "GPT-5 Pro",
+        contextWindow: 272e3,
+        maxOutputTokens: 128e3,
+        pricing: {
+          input: 15,
+          output: 120
+          // No cached input pricing for gpt-5-pro
+        },
+        knowledgeCutoff: "2024-09-30",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          reasoning: true,
+          structuredOutputs: true
+        },
+        metadata: {
+          family: "GPT-5",
+          notes: "Premium tier with enhanced capabilities. Does not support prompt caching.",
+          supportsTemperature: false
+        }
+      },
+      // GPT-4.1 Family
+      {
+        provider: "openai",
+        modelId: "gpt-4.1",
+        displayName: "GPT-4.1",
+        contextWindow: 128e3,
+        maxOutputTokens: 32768,
+        pricing: {
+          input: 2,
+          output: 8,
+          cachedInput: 0.5
+        },
+        knowledgeCutoff: "2024-04-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          structuredOutputs: true,
+          fineTuning: true
+        },
+        metadata: {
+          family: "GPT-4.1",
+          notes: "Improved GPT-4 with better instruction following"
+        }
+      },
+      {
+        provider: "openai",
+        modelId: "gpt-4.1-mini",
+        displayName: "GPT-4.1 Mini",
+        contextWindow: 128e3,
+        maxOutputTokens: 32768,
+        pricing: {
+          input: 0.4,
+          output: 1.6,
+          cachedInput: 0.1
+        },
+        knowledgeCutoff: "2024-04-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          structuredOutputs: true,
+          fineTuning: true
+        },
+        metadata: {
+          family: "GPT-4.1",
+          notes: "Cost-efficient GPT-4.1 variant"
+        }
+      },
+      {
+        provider: "openai",
+        modelId: "gpt-4.1-nano",
+        displayName: "GPT-4.1 Nano",
+        contextWindow: 128e3,
+        maxOutputTokens: 32768,
+        pricing: {
+          input: 0.1,
+          output: 0.4,
+          cachedInput: 0.025
+        },
+        knowledgeCutoff: "2024-04-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          structuredOutputs: true,
+          fineTuning: true
+        },
+        metadata: {
+          family: "GPT-4.1",
+          notes: "Fastest GPT-4.1 variant for simple tasks"
+        }
+      },
+      // GPT-4o Family
+      {
+        provider: "openai",
+        modelId: "gpt-4o",
+        displayName: "GPT-4o",
+        contextWindow: 128e3,
+        maxOutputTokens: 16384,
+        pricing: {
+          input: 2.5,
+          output: 10,
+          cachedInput: 1.25
+        },
+        knowledgeCutoff: "2024-04-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          structuredOutputs: true,
+          fineTuning: true
+        },
+        metadata: {
+          family: "GPT-4o",
+          notes: "Multimodal model optimized for speed"
+        }
+      },
+      {
+        provider: "openai",
+        modelId: "gpt-4o-mini",
+        displayName: "GPT-4o Mini",
+        contextWindow: 128e3,
+        maxOutputTokens: 16384,
+        pricing: {
+          input: 0.15,
+          output: 0.6,
+          cachedInput: 0.075
+        },
+        knowledgeCutoff: "2024-04-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          structuredOutputs: true,
+          fineTuning: true
+        },
+        metadata: {
+          family: "GPT-4o",
+          notes: "Fast and affordable multimodal model"
+        }
+      },
+      // o-series (Reasoning models)
+      {
+        provider: "openai",
+        modelId: "o1",
+        displayName: "o1",
+        contextWindow: 2e5,
+        maxOutputTokens: 1e5,
+        pricing: {
+          input: 15,
+          output: 60,
+          cachedInput: 7.5
+        },
+        knowledgeCutoff: "2024-12-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          reasoning: true,
+          structuredOutputs: true
+        },
+        metadata: {
+          family: "o-series",
+          notes: "Advanced reasoning model with chain-of-thought",
+          supportsTemperature: false
+        }
+      },
+      {
+        provider: "openai",
+        modelId: "o3",
+        displayName: "o3",
+        contextWindow: 2e5,
+        maxOutputTokens: 1e5,
+        pricing: {
+          input: 2,
+          output: 8,
+          cachedInput: 0.5
+        },
+        knowledgeCutoff: "2025-01-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          reasoning: true,
+          structuredOutputs: true
+        },
+        metadata: {
+          family: "o-series",
+          notes: "Next-gen reasoning model, more efficient than o1",
+          supportsTemperature: false
+        }
+      },
+      {
+        provider: "openai",
+        modelId: "o4-mini",
+        displayName: "o4 Mini",
+        contextWindow: 2e5,
+        maxOutputTokens: 1e5,
+        pricing: {
+          input: 1.1,
+          output: 4.4,
+          cachedInput: 0.275
+        },
+        knowledgeCutoff: "2025-04-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          reasoning: true,
+          structuredOutputs: true,
+          fineTuning: true
+        },
+        metadata: {
+          family: "o-series",
+          notes: "Cost-efficient reasoning model",
+          supportsTemperature: false
+        }
+      },
+      {
+        provider: "openai",
+        modelId: "o3-mini",
+        displayName: "o3 Mini",
+        contextWindow: 2e5,
+        maxOutputTokens: 1e5,
+        pricing: {
+          input: 1.1,
+          output: 4.4,
+          cachedInput: 0.55
+        },
+        knowledgeCutoff: "2025-01-01",
+        features: {
+          streaming: true,
+          functionCalling: true,
+          vision: true,
+          reasoning: true,
+          structuredOutputs: true
+        },
+        metadata: {
+          family: "o-series",
+          notes: "Compact reasoning model for cost-sensitive applications",
+          supportsTemperature: false
+        }
       }
     ];
   }
@@ -4635,7 +5049,8 @@ var init_openai = __esm({
           const usage = chunk.usage ? {
             inputTokens: chunk.usage.prompt_tokens,
             outputTokens: chunk.usage.completion_tokens,
-            totalTokens: chunk.usage.total_tokens
+            totalTokens: chunk.usage.total_tokens,
+            cachedInputTokens: chunk.usage.prompt_tokens_details?.cached_tokens ?? 0
           } : void 0;
           if (finishReason || usage) {
             yield { text: "", finishReason, usage, rawEvent: chunk };
@@ -4852,20 +5267,28 @@ var init_model_registry = __esm({
       /**
        * Estimate API cost for a given model and token usage
        * @param modelId - Full model identifier
-       * @param inputTokens - Number of input tokens
+       * @param inputTokens - Number of input tokens (total, including cached and cache creation)
        * @param outputTokens - Number of output tokens
-       * @param useCachedInput - Whether to use cached input pricing (if supported by provider)
+       * @param cachedInputTokens - Number of cached input tokens (subset of inputTokens)
+       * @param cacheCreationInputTokens - Number of cache creation tokens (subset of inputTokens, Anthropic only)
        * @returns CostEstimate if model found, undefined otherwise
        */
-      estimateCost(modelId, inputTokens, outputTokens, useCachedInput = false) {
+      estimateCost(modelId, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0) {
         const spec = this.getModelSpec(modelId);
         if (!spec) return void 0;
-        const inputRate = useCachedInput && spec.pricing.cachedInput !== void 0 ? spec.pricing.cachedInput : spec.pricing.input;
-        const inputCost = inputTokens / 1e6 * inputRate;
+        const cachedRate = spec.pricing.cachedInput ?? spec.pricing.input;
+        const cacheWriteRate = spec.pricing.cacheWriteInput ?? spec.pricing.input;
+        const uncachedInputTokens = inputTokens - cachedInputTokens - cacheCreationInputTokens;
+        const uncachedInputCost = uncachedInputTokens / 1e6 * spec.pricing.input;
+        const cachedInputCost = cachedInputTokens / 1e6 * cachedRate;
+        const cacheCreationCost = cacheCreationInputTokens / 1e6 * cacheWriteRate;
+        const inputCost = uncachedInputCost + cachedInputCost + cacheCreationCost;
         const outputCost = outputTokens / 1e6 * spec.pricing.output;
         const totalCost = inputCost + outputCost;
         return {
           inputCost,
+          cachedInputCost,
+          cacheCreationCost,
           outputCost,
           totalCost,
           currency: "USD"