npm - @axlsdk/axl - Versions diffs - 0.5.0 → 0.6.0 - Mend

@axlsdk/axl 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/index.cjs CHANGED Viewed

@@ -140,6 +140,7 @@ __export(index_exports, {
   agent: () => agent,
   createSpanManager: () => createSpanManager,
   defineConfig: () => defineConfig,
+  resolveThinkingOptions: () => resolveThinkingOptions,
   tool: () => tool,
   workflow: () => workflow,
   zodToJsonSchema: () => zodToJsonSchema
@@ -185,7 +186,7 @@ function tool(config) {
     on: config.retry?.on
   };
   const maxStringLen = config.maxStringLength ?? DEFAULT_MAX_STRING_LENGTH;
-  const execute = async (input) => {
+  const execute = async (input, ctx) => {
     const parsed = config.input.parse(input);
     if (maxStringLen > 0) {
       validateStringLengths(parsed, maxStringLen);
@@ -194,7 +195,7 @@ function tool(config) {
     let lastError;
     for (let attempt = 1; attempt <= maxAttempts; attempt++) {
       try {
-        return await config.handler(parsed);
+        return await config.handler(parsed, ctx);
       } catch (err) {
         lastError = err instanceof Error ? err : new Error(String(err));
         if (attempt === maxAttempts) break;
@@ -224,7 +225,7 @@ function tool(config) {
         if (config.hooks?.before) {
           processedInput = await config.hooks.before(processedInput, ctx);
         }
-        let result = await execute(processedInput);
+        let result = await execute(processedInput, ctx);
         if (config.hooks?.after) {
           result = await config.hooks.after(result, ctx);
         }
@@ -246,6 +247,25 @@ function tool(config) {
   };
 }
+// src/providers/types.ts
+function resolveThinkingOptions(options) {
+  if (options.thinkingBudget !== void 0 && options.thinkingBudget < 0) {
+    throw new Error(`thinkingBudget must be non-negative, got ${options.thinkingBudget}`);
+  }
+  const effort = options.effort;
+  const thinkingBudget = options.thinkingBudget;
+  const hasBudgetOverride = thinkingBudget !== void 0 && thinkingBudget > 0;
+  return {
+    effort,
+    thinkingBudget,
+    includeThoughts: options.includeThoughts ?? false,
+    // Budget override wins: effort: 'none' + thinkingBudget: 5000 → thinking enabled
+    thinkingDisabled: (effort === "none" || thinkingBudget === 0) && !hasBudgetOverride,
+    activeEffort: effort && effort !== "none" ? effort : void 0,
+    hasBudgetOverride
+  };
+}
 // src/providers/retry.ts
 var RETRYABLE_STATUS_CODES = /* @__PURE__ */ new Set([429, 503, 529]);
 var MAX_RETRIES = 2;
@@ -300,6 +320,9 @@ var OPENAI_PRICING = {
   "gpt-5-nano": [5e-8, 4e-7],
   "gpt-5.1": [125e-8, 1e-5],
   "gpt-5.2": [175e-8, 14e-6],
+  "gpt-5.3": [175e-8, 14e-6],
+  "gpt-5.4": [25e-7, 15e-6],
+  "gpt-5.4-pro": [3e-5, 18e-5],
   o1: [15e-6, 6e-5],
   "o1-mini": [3e-6, 12e-6],
   "o1-pro": [15e-5, 6e-4],
@@ -328,26 +351,31 @@ function estimateOpenAICost(model, promptTokens, completionTokens, cachedTokens)
   const inputCost = (promptTokens - cached) * inputRate + cached * inputRate * 0.5;
   return inputCost + completionTokens * outputRate;
 }
-function isReasoningModel(model) {
+function isOSeriesModel(model) {
   return /^(o1|o3|o4-mini)/.test(model);
 }
-function thinkingToReasoningEffort(thinking) {
-  if (typeof thinking === "object") {
-    const budget = thinking.budgetTokens;
-    if (budget <= 1024) return "low";
-    if (budget <= 8192) return "medium";
-    return "high";
-  }
-  switch (thinking) {
-    case "low":
-      return "low";
-    case "medium":
-      return "medium";
-    case "high":
-      return "high";
-    case "max":
-      return "xhigh";
-  }
+function supportsReasoningEffort(model) {
+  return isOSeriesModel(model) || /^gpt-5/.test(model);
+}
+function supportsReasoningNone(model) {
+  return /^gpt-5\.[1-9]/.test(model);
+}
+function supportsXhigh(model) {
+  return /^gpt-5\.([2-9]|\d{2,})/.test(model);
+}
+function clampReasoningEffort(model, effort) {
+  if (model.startsWith("gpt-5-pro")) return "high";
+  if (effort === "none" && !supportsReasoningNone(model)) return "minimal";
+  if (effort === "xhigh" && !supportsXhigh(model)) return "high";
+  return effort;
+}
+function effortToReasoningEffort(effort) {
+  return effort === "max" ? "xhigh" : effort;
+}
+function budgetToReasoningEffort(budget) {
+  if (budget <= 1024) return "low";
+  if (budget <= 8192) return "medium";
+  return "high";
 }
 var OpenAIProvider = class {
   name = "openai";
@@ -436,13 +464,26 @@ var OpenAIProvider = class {
   // Internal helpers
   // ---------------------------------------------------------------------------
   buildRequestBody(messages, options, stream) {
-    const reasoning = isReasoningModel(options.model);
+    const oSeries = isOSeriesModel(options.model);
+    const reasoningCapable = supportsReasoningEffort(options.model);
+    const { thinkingBudget, thinkingDisabled, activeEffort, hasBudgetOverride } = resolveThinkingOptions(options);
+    let wireEffort;
+    if (reasoningCapable) {
+      if (hasBudgetOverride) {
+        wireEffort = clampReasoningEffort(options.model, budgetToReasoningEffort(thinkingBudget));
+      } else if (!thinkingDisabled && activeEffort) {
+        wireEffort = clampReasoningEffort(options.model, effortToReasoningEffort(activeEffort));
+      } else if (thinkingDisabled) {
+        wireEffort = clampReasoningEffort(options.model, "none");
+      }
+    }
+    const stripTemp = oSeries || reasoningCapable && wireEffort !== void 0;
     const body = {
       model: options.model,
-      messages: messages.map((m) => this.formatMessage(m, reasoning)),
+      messages: messages.map((m) => this.formatMessage(m, oSeries)),
       stream
     };
-    if (options.temperature !== void 0 && !reasoning) {
+    if (options.temperature !== void 0 && !stripTemp) {
       body.temperature = options.temperature;
     }
     if (options.maxTokens !== void 0) {
@@ -451,7 +492,7 @@ var OpenAIProvider = class {
     if (options.stop) body.stop = options.stop;
     if (options.tools && options.tools.length > 0) {
       body.tools = options.tools;
-      if (!reasoning) {
+      if (!oSeries) {
         body.parallel_tool_calls = true;
       }
     }
@@ -461,15 +502,13 @@ var OpenAIProvider = class {
     if (options.responseFormat) {
       body.response_format = options.responseFormat;
     }
-    if (reasoning) {
-      const effort = options.thinking ? thinkingToReasoningEffort(options.thinking) : options.reasoningEffort;
-      if (effort) {
-        body.reasoning_effort = effort;
-      }
-    }
+    if (wireEffort) body.reasoning_effort = wireEffort;
     if (stream) {
       body.stream_options = { include_usage: true };
     }
+    if (options.providerOptions) {
+      Object.assign(body, options.providerOptions);
+    }
     return body;
   }
   /** Extract a human-readable message from an API error response body. */
@@ -483,9 +522,9 @@ var OpenAIProvider = class {
     }
     return `OpenAI API error (${status}): ${body}`;
   }
-  formatMessage(msg, reasoning) {
+  formatMessage(msg, oSeries) {
     const out = {
-      role: msg.role === "system" && reasoning ? "developer" : msg.role,
+      role: msg.role === "system" && oSeries ? "developer" : msg.role,
       content: msg.content
     };
     if (msg.name) out.name = msg.name;
@@ -622,7 +661,20 @@ var OpenAIResponsesProvider = class {
   // Internal: build request body
   // ---------------------------------------------------------------------------
   buildRequestBody(messages, options, stream) {
-    const reasoning = isReasoningModel(options.model);
+    const oSeries = isOSeriesModel(options.model);
+    const reasoningCapable = supportsReasoningEffort(options.model);
+    const { thinkingBudget, includeThoughts, thinkingDisabled, activeEffort, hasBudgetOverride } = resolveThinkingOptions(options);
+    let wireEffort;
+    if (reasoningCapable) {
+      if (hasBudgetOverride) {
+        wireEffort = clampReasoningEffort(options.model, budgetToReasoningEffort(thinkingBudget));
+      } else if (!thinkingDisabled && activeEffort) {
+        wireEffort = clampReasoningEffort(options.model, effortToReasoningEffort(activeEffort));
+      } else if (thinkingDisabled) {
+        wireEffort = clampReasoningEffort(options.model, "none");
+      }
+    }
+    const stripTemp = oSeries || reasoningCapable && wireEffort !== void 0;
     const systemMessages = messages.filter((m) => m.role === "system");
     const nonSystemMessages = messages.filter((m) => m.role !== "system");
     const body = {
@@ -637,7 +689,7 @@ var OpenAIResponsesProvider = class {
     if (options.maxTokens !== void 0) {
       body.max_output_tokens = options.maxTokens;
     }
-    if (options.temperature !== void 0 && !reasoning) {
+    if (options.temperature !== void 0 && !stripTemp) {
       body.temperature = options.temperature;
     }
     if (options.tools && options.tools.length > 0) {
@@ -656,15 +708,21 @@ var OpenAIResponsesProvider = class {
         body.tool_choice = options.toolChoice;
       }
     }
-    if (reasoning) {
-      const effort = options.thinking ? thinkingToReasoningEffort(options.thinking) : options.reasoningEffort;
-      if (effort) {
-        body.reasoning = { effort };
-      }
+    if (reasoningCapable && (wireEffort !== void 0 || includeThoughts)) {
+      const reasoning = {};
+      if (wireEffort !== void 0) reasoning.effort = wireEffort;
+      if (includeThoughts) reasoning.summary = "detailed";
+      if (Object.keys(reasoning).length > 0) body.reasoning = reasoning;
+    }
+    if (reasoningCapable) {
+      body.include = ["reasoning.encrypted_content"];
     }
     if (options.responseFormat) {
       body.text = { format: this.mapResponseFormat(options.responseFormat) };
     }
+    if (options.providerOptions) {
+      Object.assign(body, options.providerOptions);
+    }
     return body;
   }
   // ---------------------------------------------------------------------------
@@ -680,6 +738,12 @@ var OpenAIResponsesProvider = class {
           output: msg.content
         });
       } else if (msg.role === "assistant" && msg.tool_calls && msg.tool_calls.length > 0) {
+        const reasoningItems = msg.providerMetadata?.openaiReasoningItems;
+        if (reasoningItems) {
+          for (const item of reasoningItems) {
+            input.push(item);
+          }
+        }
         if (msg.content) {
           input.push({ type: "message", role: "assistant", content: msg.content });
         }
@@ -692,6 +756,12 @@ var OpenAIResponsesProvider = class {
           });
         }
       } else if (msg.role === "user" || msg.role === "assistant") {
+        if (msg.role === "assistant" && msg.providerMetadata?.openaiReasoningItems) {
+          const reasoningItems = msg.providerMetadata.openaiReasoningItems;
+          for (const item of reasoningItems) {
+            input.push(item);
+          }
+        }
         input.push({
           type: "message",
           role: msg.role,
@@ -724,7 +794,9 @@ var OpenAIResponsesProvider = class {
   // ---------------------------------------------------------------------------
   parseResponse(json, model) {
     let content = "";
+    let thinkingContent = "";
     const toolCalls = [];
+    const reasoningItems = [];
     for (const item of json.output) {
       if (item.type === "message") {
         for (const part of item.content ?? []) {
@@ -741,6 +813,15 @@ var OpenAIResponsesProvider = class {
             arguments: item.arguments
           }
         });
+      } else if (item.type === "reasoning") {
+        reasoningItems.push(item);
+        if (item.summary) {
+          for (const s of item.summary) {
+            if (s.type === "summary_text" && s.text) {
+              thinkingContent += s.text;
+            }
+          }
+        }
       }
     }
     const usage = json.usage ? {
@@ -751,11 +832,14 @@ var OpenAIResponsesProvider = class {
       cached_tokens: json.usage.input_tokens_details?.cached_tokens
     } : void 0;
     const cost = usage ? estimateOpenAICost(model, usage.prompt_tokens, usage.completion_tokens, usage.cached_tokens) : void 0;
+    const providerMetadata = reasoningItems.length > 0 ? { openaiReasoningItems: reasoningItems } : void 0;
     return {
       content,
+      thinking_content: thinkingContent || void 0,
       tool_calls: toolCalls.length > 0 ? toolCalls : void 0,
       usage,
-      cost
+      cost,
+      providerMetadata
     };
   }
   // ---------------------------------------------------------------------------
@@ -807,6 +891,8 @@ var OpenAIResponsesProvider = class {
     switch (eventType) {
       case "response.output_text.delta":
         return { type: "text_delta", content: data.delta ?? "" };
+      case "response.reasoning_summary_text.delta":
+        return { type: "thinking_delta", content: data.delta ?? "" };
       case "response.output_item.added":
         if (data.item?.type === "function_call") {
           const callId = data.item.call_id ?? data.item.id ?? "";
@@ -837,7 +923,9 @@ var OpenAIResponsesProvider = class {
           reasoning_tokens: response.usage.output_tokens_details?.reasoning_tokens,
           cached_tokens: response.usage.input_tokens_details?.cached_tokens
         } : void 0;
-        return { type: "done", usage };
+        const reasoningItems = response?.output?.filter((item) => item.type === "reasoning") ?? [];
+        const providerMetadata = reasoningItems.length > 0 ? { openaiReasoningItems: reasoningItems } : void 0;
+        return { type: "done", usage, providerMetadata };
       }
       case "response.failed": {
         const errorMsg = data.response?.error?.message ?? data.response?.status_details?.error?.message ?? "Unknown error";
@@ -865,9 +953,12 @@ var OpenAIResponsesProvider = class {
 // src/providers/anthropic.ts
 var ANTHROPIC_API_VERSION = "2023-06-01";
 var ANTHROPIC_PRICING = {
-  "claude-opus-4-6": [15e-6, 75e-6],
+  "claude-opus-4-6": [5e-6, 25e-6],
+  "claude-sonnet-4-6": [3e-6, 15e-6],
+  "claude-opus-4-5": [5e-6, 25e-6],
+  "claude-opus-4-1": [15e-6, 75e-6],
   "claude-sonnet-4-5": [3e-6, 15e-6],
-  "claude-haiku-4-5": [8e-7, 4e-6],
+  "claude-haiku-4-5": [1e-6, 5e-6],
   "claude-sonnet-4": [3e-6, 15e-6],
   "claude-opus-4": [15e-6, 75e-6],
   "claude-3-7-sonnet": [3e-6, 15e-6],
@@ -877,12 +968,15 @@ var ANTHROPIC_PRICING = {
   "claude-3-sonnet": [3e-6, 15e-6],
   "claude-3-haiku": [25e-8, 125e-8]
 };
+var ANTHROPIC_PRICING_KEYS_BY_LENGTH = Object.keys(ANTHROPIC_PRICING).sort(
+  (a, b) => b.length - a.length
+);
 function estimateAnthropicCost(model, inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens) {
   let pricing = ANTHROPIC_PRICING[model];
   if (!pricing) {
-    for (const [key, value] of Object.entries(ANTHROPIC_PRICING)) {
+    for (const key of ANTHROPIC_PRICING_KEYS_BY_LENGTH) {
       if (model.startsWith(key)) {
-        pricing = value;
+        pricing = ANTHROPIC_PRICING[key];
         break;
       }
     }
@@ -902,16 +996,15 @@ var THINKING_BUDGETS = {
   // With auto-bump (+1024), max_tokens becomes 31024 which fits all models.
   max: 3e4
 };
-function thinkingToBudgetTokens(thinking) {
-  if (typeof thinking === "string") return THINKING_BUDGETS[thinking] ?? 5e3;
-  return thinking.budgetTokens;
-}
 function supportsAdaptiveThinking(model) {
   return model.startsWith("claude-opus-4-6") || model.startsWith("claude-sonnet-4-6");
 }
 function supportsMaxEffort(model) {
   return model.startsWith("claude-opus-4-6");
 }
+function supportsEffort(model) {
+  return model.startsWith("claude-opus-4-6") || model.startsWith("claude-sonnet-4-6") || model.startsWith("claude-opus-4-5");
+}
 var AnthropicProvider = class {
   name = "anthropic";
   baseUrl;
@@ -1001,9 +1094,6 @@ var AnthropicProvider = class {
     if (systemText) {
       body.system = systemText;
     }
-    if (options.temperature !== void 0 && !options.thinking) {
-      body.temperature = options.temperature;
-    }
     if (options.stop) {
       body.stop_sequences = options.stop;
     }
@@ -1013,19 +1103,39 @@ var AnthropicProvider = class {
     if (options.toolChoice !== void 0) {
       body.tool_choice = this.mapToolChoice(options.toolChoice);
     }
-    if (options.thinking) {
-      if (typeof options.thinking === "string" && supportsAdaptiveThinking(options.model) && // 'max' effort is only supported on Opus 4.6; Sonnet 4.6 falls back to manual mode
-      (options.thinking !== "max" || supportsMaxEffort(options.model))) {
-        body.thinking = { type: "adaptive" };
-        body.output_config = { effort: options.thinking };
-      } else {
-        const budgetTokens = thinkingToBudgetTokens(options.thinking);
-        body.thinking = { type: "enabled", budget_tokens: budgetTokens };
-        const currentMax = body.max_tokens;
-        if (currentMax < budgetTokens + 1024) {
-          body.max_tokens = budgetTokens + 1024;
-        }
+    const { thinkingBudget, thinkingDisabled, activeEffort, hasBudgetOverride } = resolveThinkingOptions(options);
+    let resolvedEffort = activeEffort;
+    if (resolvedEffort === "max" && !supportsMaxEffort(options.model)) {
+      resolvedEffort = "high";
+    }
+    if (hasBudgetOverride) {
+      body.thinking = { type: "enabled", budget_tokens: thinkingBudget };
+      const currentMax = body.max_tokens;
+      if (currentMax < thinkingBudget + 1024) {
+        body.max_tokens = thinkingBudget + 1024;
       }
+      if (resolvedEffort && supportsEffort(options.model)) {
+        body.output_config = { effort: resolvedEffort };
+      }
+    } else if (thinkingDisabled) {
+      if (resolvedEffort && supportsEffort(options.model)) {
+        body.output_config = { effort: resolvedEffort };
+      }
+    } else if (resolvedEffort && supportsAdaptiveThinking(options.model)) {
+      body.thinking = { type: "adaptive" };
+      body.output_config = { effort: resolvedEffort };
+    } else if (resolvedEffort && supportsEffort(options.model)) {
+      body.output_config = { effort: resolvedEffort };
+    } else if (resolvedEffort) {
+      const budget = THINKING_BUDGETS[resolvedEffort] ?? 5e3;
+      body.thinking = { type: "enabled", budget_tokens: budget };
+      const currentMax = body.max_tokens;
+      if (currentMax < budget + 1024) {
+        body.max_tokens = budget + 1024;
+      }
+    }
+    if (options.temperature !== void 0 && !body.thinking) {
+      body.temperature = options.temperature;
     }
     if (options.responseFormat && options.responseFormat.type !== "text") {
       const jsonInstruction = "You must respond with valid JSON only. No markdown fences, no extra text.";
@@ -1033,6 +1143,9 @@ var AnthropicProvider = class {
 ${jsonInstruction}` : jsonInstruction;
     }
+    if (options.providerOptions) {
+      Object.assign(body, options.providerOptions);
+    }
     return body;
   }
   /**
@@ -1143,9 +1256,12 @@ ${jsonInstruction}` : jsonInstruction;
   // ---------------------------------------------------------------------------
   parseResponse(json) {
     let content = "";
+    let thinkingContent = "";
     const toolCalls = [];
     for (const block of json.content) {
-      if (block.type === "text") {
+      if (block.type === "thinking") {
+        thinkingContent += block.thinking;
+      } else if (block.type === "text") {
         content += block.text;
       } else if (block.type === "tool_use") {
         toolCalls.push({
@@ -1176,6 +1292,7 @@ ${jsonInstruction}` : jsonInstruction;
     ) : void 0;
     return {
       content,
+      thinking_content: thinkingContent || void 0,
       tool_calls: toolCalls.length > 0 ? toolCalls : void 0,
       usage,
       cost
@@ -1226,7 +1343,9 @@ ${jsonInstruction}` : jsonInstruction;
             }
             case "content_block_delta": {
               const delta = event.delta;
-              if (delta?.type === "text_delta" && delta.text) {
+              if (delta?.type === "thinking_delta" && delta.thinking) {
+                yield { type: "thinking_delta", content: delta.thinking };
+              } else if (delta?.type === "text_delta" && delta.text) {
                 yield { type: "text_delta", content: delta.text };
               } else if (delta?.type === "input_json_delta" && delta.partial_json) {
                 yield {
@@ -1297,14 +1416,19 @@ var GEMINI_PRICING = {
   "gemini-2.0-flash": [1e-7, 4e-7],
   "gemini-2.0-flash-lite": [1e-7, 4e-7],
   "gemini-3-pro-preview": [2e-6, 12e-6],
-  "gemini-3-flash-preview": [5e-7, 3e-6]
+  "gemini-3-flash-preview": [5e-7, 3e-6],
+  "gemini-3.1-pro-preview": [2e-6, 12e-6],
+  "gemini-3.1-flash-lite-preview": [25e-8, 15e-7]
 };
+var GEMINI_PRICING_KEYS_BY_LENGTH = Object.keys(GEMINI_PRICING).sort(
+  (a, b) => b.length - a.length
+);
 function estimateGeminiCost(model, inputTokens, outputTokens, cachedTokens) {
   let pricing = GEMINI_PRICING[model];
   if (!pricing) {
-    for (const [key, value] of Object.entries(GEMINI_PRICING)) {
+    for (const key of GEMINI_PRICING_KEYS_BY_LENGTH) {
       if (model.startsWith(key)) {
-        pricing = value;
+        pricing = GEMINI_PRICING[key];
         break;
       }
     }
@@ -1321,9 +1445,32 @@ var THINKING_BUDGETS2 = {
   high: 1e4,
   max: 24576
 };
-function thinkingToBudgetTokens2(thinking) {
-  if (typeof thinking === "string") return THINKING_BUDGETS2[thinking] ?? 5e3;
-  return thinking.budgetTokens;
+var THINKING_LEVELS = {
+  low: "low",
+  medium: "medium",
+  high: "high",
+  max: "high"
+  // 3.x caps at 'high'
+};
+function isGemini3x(model) {
+  return /^gemini-3[.-]/.test(model);
+}
+function budgetToThinkingLevel(budgetTokens) {
+  if (budgetTokens <= 1024) return "low";
+  if (budgetTokens <= 5e3) return "medium";
+  return "high";
+}
+function minThinkingLevel(model) {
+  if (model.startsWith("gemini-3.1-pro")) return "low";
+  return "minimal";
+}
+var _warned3xEffortNone = /* @__PURE__ */ new Set();
+function warnGemini3xEffortNone(model) {
+  if (_warned3xEffortNone.has(model)) return;
+  _warned3xEffortNone.add(model);
+  console.warn(
+    `[axl] effort: 'none' on Gemini 3.x (${model}) maps to the model's minimum thinking level ('${minThinkingLevel(model)}'), not fully disabled. Gemini 3.x models cannot disable thinking entirely.`
+  );
 }
 var GeminiProvider = class {
   name = "google";
@@ -1438,17 +1585,58 @@ var GeminiProvider = class {
     if (Object.keys(generationConfig).length > 0) {
       body.generationConfig = generationConfig;
     }
-    if (options.thinking) {
-      generationConfig.thinkingConfig = {
-        thinkingBudget: thinkingToBudgetTokens2(options.thinking)
-      };
-      if (!body.generationConfig) {
-        body.generationConfig = generationConfig;
+    const {
+      effort,
+      thinkingBudget,
+      includeThoughts,
+      thinkingDisabled,
+      activeEffort,
+      hasBudgetOverride
+    } = resolveThinkingOptions(options);
+    if (thinkingDisabled) {
+      if (isGemini3x(options.model)) {
+        if (effort === "none") {
+          warnGemini3xEffortNone(options.model);
+        }
+        generationConfig.thinkingConfig = { thinkingLevel: minThinkingLevel(options.model) };
+      } else {
+        generationConfig.thinkingConfig = { thinkingBudget: 0 };
+      }
+      if (!body.generationConfig) body.generationConfig = generationConfig;
+    } else if (hasBudgetOverride) {
+      const config = {};
+      if (isGemini3x(options.model)) {
+        config.thinkingLevel = budgetToThinkingLevel(thinkingBudget);
+      } else {
+        config.thinkingBudget = thinkingBudget;
+      }
+      if (includeThoughts) config.includeThoughts = true;
+      generationConfig.thinkingConfig = config;
+      if (!body.generationConfig) body.generationConfig = generationConfig;
+    } else if (activeEffort) {
+      const config = {};
+      if (isGemini3x(options.model)) {
+        config.thinkingLevel = THINKING_LEVELS[activeEffort] ?? "medium";
+      } else {
+        if (activeEffort === "max" && options.model.startsWith("gemini-2.5-pro")) {
+          config.thinkingBudget = 32768;
+        } else {
+          config.thinkingBudget = THINKING_BUDGETS2[activeEffort] ?? 5e3;
+        }
       }
+      if (includeThoughts) config.includeThoughts = true;
+      generationConfig.thinkingConfig = config;
+      if (!body.generationConfig) body.generationConfig = generationConfig;
+    } else if (includeThoughts) {
+      generationConfig.thinkingConfig = { includeThoughts: true };
+      if (!body.generationConfig) body.generationConfig = generationConfig;
     }
     if (options.toolChoice !== void 0) {
       body.toolConfig = { functionCallingConfig: this.mapToolChoice(options.toolChoice) };
     }
+    if (options.providerOptions) {
+      Object.assign(body, options.providerOptions);
+    }
     return body;
   }
   /**
@@ -1474,28 +1662,33 @@ var GeminiProvider = class {
     const result = [];
     for (const msg of messages) {
       if (msg.role === "assistant") {
-        const parts = [];
-        if (msg.content) {
-          parts.push({ text: msg.content });
-        }
-        if (msg.tool_calls && msg.tool_calls.length > 0) {
-          for (const tc of msg.tool_calls) {
-            let parsedArgs;
-            try {
-              parsedArgs = JSON.parse(tc.function.arguments);
-            } catch {
-              parsedArgs = {};
-            }
-            parts.push({
-              functionCall: {
-                name: tc.function.name,
-                args: parsedArgs
+        const rawParts = msg.providerMetadata?.geminiParts;
+        if (rawParts && rawParts.length > 0) {
+          result.push({ role: "model", parts: rawParts });
+        } else {
+          const parts = [];
+          if (msg.content) {
+            parts.push({ text: msg.content });
+          }
+          if (msg.tool_calls && msg.tool_calls.length > 0) {
+            for (const tc of msg.tool_calls) {
+              let parsedArgs;
+              try {
+                parsedArgs = JSON.parse(tc.function.arguments);
+              } catch {
+                parsedArgs = {};
               }
-            });
+              parts.push({
+                functionCall: {
+                  name: tc.function.name,
+                  args: parsedArgs
+                }
+              });
+            }
+          }
+          if (parts.length > 0) {
+            result.push({ role: "model", parts });
           }
-        }
-        if (parts.length > 0) {
-          result.push({ role: "model", parts });
         }
       } else if (msg.role === "tool") {
         const functionName = toolCallIdToName.get(msg.tool_call_id) ?? "unknown";
@@ -1572,10 +1765,13 @@ var GeminiProvider = class {
   parseResponse(json, model) {
     const candidate = json.candidates?.[0];
     let content = "";
+    let thinkingContent = "";
     const toolCalls = [];
     if (candidate?.content?.parts) {
       for (const part of candidate.content.parts) {
-        if (part.text) {
+        if (part.thought && part.text) {
+          thinkingContent += part.text;
+        } else if (part.text) {
           content += part.text;
         } else if (part.functionCall) {
           toolCalls.push({
@@ -1590,18 +1786,24 @@ var GeminiProvider = class {
       }
     }
     const cachedTokens = json.usageMetadata?.cachedContentTokenCount;
+    const reasoningTokens = json.usageMetadata?.thoughtsTokenCount;
     const usage = json.usageMetadata ? {
       prompt_tokens: json.usageMetadata.promptTokenCount ?? 0,
       completion_tokens: json.usageMetadata.candidatesTokenCount ?? 0,
       total_tokens: json.usageMetadata.totalTokenCount ?? 0,
-      cached_tokens: cachedTokens && cachedTokens > 0 ? cachedTokens : void 0
+      cached_tokens: cachedTokens && cachedTokens > 0 ? cachedTokens : void 0,
+      reasoning_tokens: reasoningTokens && reasoningTokens > 0 ? reasoningTokens : void 0
     } : void 0;
     const cost = usage ? estimateGeminiCost(model, usage.prompt_tokens, usage.completion_tokens, usage.cached_tokens) : void 0;
+    const rawParts = candidate?.content?.parts;
+    const providerMetadata = rawParts ? { geminiParts: rawParts } : void 0;
     return {
       content,
+      thinking_content: thinkingContent || void 0,
       tool_calls: toolCalls.length > 0 ? toolCalls : void 0,
       usage,
-      cost
+      cost,
+      providerMetadata
     };
   }
   // ---------------------------------------------------------------------------
@@ -1612,6 +1814,7 @@ var GeminiProvider = class {
     const decoder = new TextDecoder();
     let buffer = "";
     let usage;
+    const accumulatedParts = [];
     try {
       while (true) {
         const { done, value } = await reader.read();
@@ -1632,17 +1835,22 @@ var GeminiProvider = class {
           }
           if (chunk.usageMetadata) {
             const cached = chunk.usageMetadata.cachedContentTokenCount;
+            const reasoning = chunk.usageMetadata.thoughtsTokenCount;
             usage = {
               prompt_tokens: chunk.usageMetadata.promptTokenCount ?? 0,
               completion_tokens: chunk.usageMetadata.candidatesTokenCount ?? 0,
               total_tokens: chunk.usageMetadata.totalTokenCount ?? 0,
-              cached_tokens: cached && cached > 0 ? cached : void 0
+              cached_tokens: cached && cached > 0 ? cached : void 0,
+              reasoning_tokens: reasoning && reasoning > 0 ? reasoning : void 0
             };
           }
           const candidate = chunk.candidates?.[0];
           if (candidate?.content?.parts) {
             for (const part of candidate.content.parts) {
-              if (part.text) {
+              accumulatedParts.push(part);
+              if (part.thought && part.text) {
+                yield { type: "thinking_delta", content: part.text };
+              } else if (part.text) {
                 yield { type: "text_delta", content: part.text };
               } else if (part.functionCall) {
                 yield {
@@ -1656,7 +1864,8 @@ var GeminiProvider = class {
           }
         }
       }
-      yield { type: "done", usage };
+      const providerMetadata = accumulatedParts.length > 0 ? { geminiParts: accumulatedParts } : void 0;
+      yield { type: "done", usage, providerMetadata };
     } finally {
       reader.releaseLock();
     }
@@ -2029,7 +2238,7 @@ function estimateMessagesTokens(messages) {
   }
   return total;
 }
-var WorkflowContext = class {
+var WorkflowContext = class _WorkflowContext {
   input;
   executionId;
   metadata;
@@ -2082,6 +2291,37 @@ var WorkflowContext = class {
       this.summaryCache = init.metadata.summaryCache;
     }
   }
+  /**
+   * Create a child context for nested agent invocations (e.g., agent-as-tool).
+   * Shares: budget tracking, abort signals, trace emission, provider registry,
+   *         state store, span manager, memory manager, MCP manager, config,
+   *         awaitHuman handler, pending decisions, tool overrides.
+   * Isolates: session history, step counter, streaming callbacks (onToken, onAgentStart, onToolCall).
+   */
+  createChildContext() {
+    return new _WorkflowContext({
+      input: this.input,
+      executionId: this.executionId,
+      config: this.config,
+      providerRegistry: this.providerRegistry,
+      metadata: { ...this.metadata },
+      // Shared infrastructure
+      budgetContext: this.budgetContext,
+      stateStore: this.stateStore,
+      mcpManager: this.mcpManager,
+      spanManager: this.spanManager,
+      memoryManager: this.memoryManager,
+      onTrace: this.onTrace,
+      onAgentCallComplete: this.onAgentCallComplete,
+      awaitHumanHandler: this.awaitHumanHandler,
+      pendingDecisions: this.pendingDecisions,
+      toolOverrides: this.toolOverrides,
+      signal: this.signal,
+      workflowName: this.workflowName
+      // Isolated: sessionHistory (empty), stepCounter (0),
+      // onToken (null), onAgentStart (null), onToolCall (null)
+    });
+  }
   /**
    * Resolve the current abort signal.
    * Branch-scoped signals (from race/spawn/map/budget) in AsyncLocalStorage
@@ -2144,10 +2384,12 @@ var WorkflowContext = class {
         promptVersion: agent2._config.version,
         temperature: options?.temperature ?? agent2._config.temperature,
         maxTokens: options?.maxTokens ?? agent2._config.maxTokens ?? 4096,
-        thinking: options?.thinking ?? agent2._config.thinking,
-        reasoningEffort: options?.reasoningEffort ?? agent2._config.reasoningEffort,
+        effort: options?.effort ?? agent2._config.effort,
+        thinkingBudget: options?.thinkingBudget ?? agent2._config.thinkingBudget,
+        includeThoughts: options?.includeThoughts ?? agent2._config.includeThoughts,
         toolChoice: options?.toolChoice ?? agent2._config.toolChoice,
-        stop: options?.stop ?? agent2._config.stop
+        stop: options?.stop ?? agent2._config.stop,
+        providerOptions: options?.providerOptions ?? agent2._config.providerOptions
       });
       return result;
     });
@@ -2170,7 +2412,21 @@ var WorkflowContext = class {
     const modelUri = agent2.resolveModel(resolveCtx);
     const systemPrompt = agent2.resolveSystem(resolveCtx);
     const { provider, model } = this.providerRegistry.resolve(modelUri, this.config);
-    const toolDefs = this.buildToolDefs(agent2);
+    let resolvedHandoffs;
+    if (typeof agent2._config.handoffs === "function") {
+      try {
+        resolvedHandoffs = agent2._config.handoffs(resolveCtx);
+      } catch (err) {
+        this.log("handoff_resolve_error", {
+          agent: agent2._name,
+          error: err instanceof Error ? err.message : String(err)
+        });
+        resolvedHandoffs = void 0;
+      }
+    } else {
+      resolvedHandoffs = agent2._config.handoffs;
+    }
+    const toolDefs = this.buildToolDefs(agent2, resolvedHandoffs);
     const messages = [];
     if (systemPrompt) {
       messages.push({ role: "system", content: systemPrompt });
@@ -2272,21 +2528,17 @@ Please fix and try again.`;
         throw new TimeoutError("ctx.ask()", timeoutMs);
       }
       turns++;
-      const thinking = options?.thinking ?? agent2._config.thinking;
-      if (thinking && typeof thinking === "object" && thinking.budgetTokens <= 0) {
-        throw new Error(
-          `thinking.budgetTokens must be a positive number, got ${thinking.budgetTokens}`
-        );
-      }
       const chatOptions = {
         model,
         temperature: options?.temperature ?? agent2._config.temperature,
         tools: toolDefs.length > 0 ? toolDefs : void 0,
         maxTokens: options?.maxTokens ?? agent2._config.maxTokens ?? 4096,
-        thinking,
-        reasoningEffort: options?.reasoningEffort ?? agent2._config.reasoningEffort,
+        effort: options?.effort ?? agent2._config.effort,
+        thinkingBudget: options?.thinkingBudget ?? agent2._config.thinkingBudget,
+        includeThoughts: options?.includeThoughts ?? agent2._config.includeThoughts,
         toolChoice: options?.toolChoice ?? agent2._config.toolChoice,
         stop: options?.stop ?? agent2._config.stop,
+        providerOptions: options?.providerOptions ?? agent2._config.providerOptions,
         signal: this.currentSignal
       };
       if (options?.schema && toolDefs.length === 0) {
@@ -2298,10 +2550,14 @@ Please fix and try again.`;
         let content2 = "";
         const toolCalls = [];
         const toolCallBuffers = /* @__PURE__ */ new Map();
+        let streamProviderMetadata;
+        let thinkingContent = "";
         for await (const chunk of provider.stream(currentMessages, chatOptions)) {
           if (chunk.type === "text_delta") {
             content2 += chunk.content;
             this.onToken(chunk.content);
+          } else if (chunk.type === "thinking_delta") {
+            thinkingContent += chunk.content;
           } else if (chunk.type === "tool_call_delta") {
             let buffer = toolCallBuffers.get(chunk.id);
             if (!buffer) {
@@ -2311,6 +2567,7 @@ Please fix and try again.`;
             if (chunk.name) buffer.name = chunk.name;
             if (chunk.arguments) buffer.arguments += chunk.arguments;
           } else if (chunk.type === "done") {
+            streamProviderMetadata = chunk.providerMetadata;
             if (chunk.usage) {
               response = {
                 content: content2,
@@ -2337,6 +2594,12 @@ Please fix and try again.`;
         if (toolCalls.length > 0) {
           response.tool_calls = toolCalls;
         }
+        if (streamProviderMetadata) {
+          response.providerMetadata = streamProviderMetadata;
+        }
+        if (thinkingContent) {
+          response.thinking_content = thinkingContent;
+        }
       } else {
         response = await provider.chat(currentMessages, chatOptions);
       }
@@ -2367,13 +2630,14 @@ Please fix and try again.`;
         currentMessages.push({
           role: "assistant",
           content: response.content || "",
-          tool_calls: response.tool_calls
+          tool_calls: response.tool_calls,
+          ...response.providerMetadata ? { providerMetadata: response.providerMetadata } : {}
         });
         for (const toolCall of response.tool_calls) {
           const toolName = toolCall.function.name;
           if (toolName.startsWith("handoff_to_")) {
             const targetName = toolName.replace("handoff_to_", "");
-            const descriptor = agent2._config.handoffs?.find((h) => h.agent._name === targetName);
+            const descriptor = resolvedHandoffs?.find((h) => h.agent._name === targetName);
             if (descriptor) {
               const mode = descriptor.mode ?? "oneway";
               let handoffPrompt = prompt;
@@ -2626,8 +2890,9 @@ Please fix and try again.`;
                 resultContent2 = JSON.stringify(toolResult2);
               }
             } else if (tool2) {
+              const childCtx = this.createChildContext();
               try {
-                toolResult2 = await tool2._execute(toolArgs);
+                toolResult2 = await tool2._execute(toolArgs, childCtx);
               } catch (err) {
                 toolResult2 = { error: err instanceof Error ? err.message : String(err) };
               }
@@ -2707,7 +2972,8 @@ Please fix and try again.`;
               guardrailOutputRetries++;
               currentMessages.push({
                 role: "assistant",
-                content
+                content,
+                ...response.providerMetadata ? { providerMetadata: response.providerMetadata } : {}
               });
               currentMessages.push({
                 role: "system",
@@ -2728,6 +2994,7 @@ Please fix and try again.`;
         try {
           const parsed = JSON.parse(stripMarkdownFences(content));
           const validated = options.schema.parse(parsed);
+          this.pushAssistantToSessionHistory(content, response.providerMetadata);
           return validated;
         } catch (err) {
           const maxRetries = options.retries ?? 3;
@@ -2754,11 +3021,23 @@ Please fix and try again.`;
           throw new VerifyError(content, zodErr, maxRetries);
         }
       }
+      this.pushAssistantToSessionHistory(content, response.providerMetadata);
       return content;
     }
     throw new MaxTurnsError("ctx.ask()", maxTurns);
   }
-  buildToolDefs(agent2) {
+  /**
+   * Push the final assistant message into session history, preserving providerMetadata
+   * (e.g., Gemini thought signatures needed for multi-turn reasoning context).
+   */
+  pushAssistantToSessionHistory(content, providerMetadata) {
+    this.sessionHistory.push({
+      role: "assistant",
+      content,
+      ...providerMetadata ? { providerMetadata } : {}
+    });
+  }
+  buildToolDefs(agent2, resolvedHandoffs) {
     const defs = [];
     if (agent2._config.tools) {
       for (const tool2 of agent2._config.tools) {
@@ -2772,8 +3051,8 @@ Please fix and try again.`;
         });
       }
     }
-    if (agent2._config.handoffs) {
-      for (const { agent: handoffAgent, description, mode } of agent2._config.handoffs) {
+    if (resolvedHandoffs) {
+      for (const { agent: handoffAgent, description, mode } of resolvedHandoffs) {
         const isRoundtrip = mode === "roundtrip";
         const defaultDesc = isRoundtrip ? `Delegate a task to ${handoffAgent._name} and receive the result back` : `Hand off the conversation to ${handoffAgent._name}`;
         defs.push({
@@ -3462,6 +3741,79 @@ ${summaryResponse.content}`
     const sessionId = this.metadata?.sessionId;
     await this.memoryManager.forget(key, this.stateStore, sessionId, options);
   }
+  // ── ctx.delegate() ──────────────────────────────────────────────────
+  /**
+   * Select the best agent from a list of candidates and invoke it.
+   * Creates a temporary router agent that uses handoffs to pick the right specialist.
+   *
+   * This is convenience sugar over creating a router agent with dynamic handoffs.
+   * For full control over the router's behavior, create the router agent explicitly.
+   *
+   * @param agents - Candidate agents to choose from (at least 1)
+   * @param prompt - The prompt to send to the selected agent
+   * @param options - Optional: schema, routerModel, metadata, retries
+   */
+  async delegate(agents, prompt, options) {
+    if (agents.length === 0) {
+      throw new Error("ctx.delegate() requires at least one candidate agent");
+    }
+    const names = /* @__PURE__ */ new Set();
+    for (const a of agents) {
+      if (names.has(a._name)) {
+        throw new Error(
+          `ctx.delegate() received duplicate agent name '${a._name}'. All candidate agents must have unique names.`
+        );
+      }
+      names.add(a._name);
+    }
+    if (agents.length === 1) {
+      return this.ask(agents[0], prompt, {
+        schema: options?.schema,
+        retries: options?.retries,
+        metadata: options?.metadata
+      });
+    }
+    const resolveCtx = options?.metadata ? { metadata: { ...this.metadata, ...options.metadata } } : { metadata: this.metadata };
+    const routerModelUri = options?.routerModel ?? agents[0].resolveModel(resolveCtx);
+    const handoffs = agents.map((a) => {
+      let description;
+      try {
+        description = a.resolveSystem(resolveCtx).slice(0, 200);
+      } catch {
+        description = `Agent: ${a._name}`;
+      }
+      return { agent: a, description };
+    });
+    const routerSystem = "Route to the best agent for this task. Always hand off; never answer directly.";
+    const routerAgent = {
+      _config: {
+        model: routerModelUri,
+        system: routerSystem,
+        temperature: 0,
+        handoffs,
+        maxTurns: 2
+      },
+      _name: "_delegate_router",
+      ask: async () => {
+        throw new Error("Direct invocation not supported on delegate router");
+      },
+      resolveModel: () => routerModelUri,
+      resolveSystem: () => routerSystem
+    };
+    this.emitTrace({
+      type: "delegate",
+      agent: "_delegate_router",
+      data: {
+        candidates: agents.map((a) => a._name),
+        routerModel: routerModelUri
+      }
+    });
+    return this.ask(routerAgent, prompt, {
+      schema: options?.schema,
+      retries: options?.retries,
+      metadata: options?.metadata
+    });
+  }
   // ── Private ───────────────────────────────────────────────────────────
   emitTrace(partial) {
     let data = partial.data;
@@ -4071,11 +4423,13 @@ var Session = class _Session {
         ...cachedSummary ? { summaryCache: cachedSummary } : {}
       }
     });
-    const assistantMessage = {
-      role: "assistant",
-      content: typeof result === "string" ? result : JSON.stringify(result)
-    };
-    history.push(assistantMessage);
+    const lastMsg = history[history.length - 1];
+    if (!(lastMsg && lastMsg.role === "assistant")) {
+      history.push({
+        role: "assistant",
+        content: typeof result === "string" ? result : JSON.stringify(result)
+      });
+    }
     if (this.options.persist !== false) {
       await this.store.saveSession(this.sessionId, history);
     }
@@ -4118,10 +4472,13 @@ var Session = class _Session {
       }
     });
     const updateHistory = async (result) => {
-      history.push({
-        role: "assistant",
-        content: typeof result === "string" ? result : JSON.stringify(result)
-      });
+      const lastMsg = history[history.length - 1];
+      if (!(lastMsg && lastMsg.role === "assistant")) {
+        history.push({
+          role: "assistant",
+          content: typeof result === "string" ? result : JSON.stringify(result)
+        });
+      }
       if (this.options.persist !== false) {
         await this.store.saveSession(this.sessionId, history);
       }
@@ -5011,6 +5368,24 @@ var AxlRuntime = class extends import_node_events2.EventEmitter {
   getExecutions() {
     return [...this.executions.values()];
   }
+  /**
+   * Create a lightweight WorkflowContext for ad-hoc use (tool testing, prototyping).
+   * The context has access to the runtime's providers, state store, and MCP manager
+   * but no session history, streaming callbacks, or budget tracking.
+   */
+  createContext(options) {
+    return new WorkflowContext({
+      input: void 0,
+      executionId: (0, import_node_crypto2.randomUUID)(),
+      metadata: options?.metadata,
+      config: this.config,
+      providerRegistry: this.providerRegistry,
+      stateStore: this.stateStore,
+      mcpManager: this.mcpManager,
+      spanManager: this.spanManager,
+      memoryManager: this.memoryManager
+    });
+  }
   /** Register a custom provider instance. */
   registerProvider(name, provider) {
     this.providerRegistry.registerInstance(name, provider);
@@ -5709,6 +6084,7 @@ function cosineSimilarity2(a, b) {
   agent,
   createSpanManager,
   defineConfig,
+  resolveThinkingOptions,
   tool,
   workflow,
   zodToJsonSchema