npm - @axlsdk/axl - Versions diffs - 0.5.0 → 0.6.0 - Mend

@axlsdk/axl 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/index.d.cts CHANGED Viewed

@@ -16,33 +16,23 @@ type ToolDefinition = {
     };
 };
 /**
- * Unified thinking/reasoning level that works across all providers.
+ * Unified effort level controlling how thoroughly the model responds.
  *
- * Simple form (`'low' | 'medium' | 'high' | 'max'`) is portable across all providers:
- * - **OpenAI** (o1/o3/o4-mini): maps to `reasoning_effort` (`'max'` → `'xhigh'`)
- * - **OpenAI Responses**: maps to `reasoning.effort` (`'max'` → `'xhigh'`)
- * - **Anthropic** (4.6): maps to adaptive mode + `output_config.effort`
- * - **Anthropic** (older): maps to `thinking.budget_tokens` (`'max'` → `32000`)
- * - **Gemini** (2.5+): maps to `generationConfig.thinkingConfig.thinkingBudget` (`'max'` → `24576`)
+ * - `'none'` — Disable thinking/reasoning. On Gemini 3.x, maps to the model's
+ *   minimum thinking level (3.1 Pro: 'low', others: 'minimal'). On other providers,
+ *   fully disables reasoning.
+ * - `'low'` through `'max'` — Increasing levels of reasoning depth and token spend.
  *
- * Budget form (`{ budgetTokens: number }`) gives explicit control over thinking tokens.
- * For OpenAI, budget is mapped to the nearest effort level.
+ * Provider mapping:
+ * - Anthropic 4.6: adaptive thinking + output_config.effort
+ * - Anthropic Opus 4.5: output_config.effort (no adaptive)
+ * - Anthropic older: thinking.budget_tokens fallback
+ * - OpenAI o-series: reasoning_effort
+ * - OpenAI GPT-5.x: reasoning.effort / reasoning_effort
+ * - Gemini 3.x: thinkingLevel (`'none'` → model min: `'minimal'` or `'low'` for 3.1 Pro)
+ * - Gemini 2.x: thinkingBudget (`'none'` → 0; some models have minimums)
  */
-type Thinking = 'low' | 'medium' | 'high' | 'max' | {
-    budgetTokens: number;
-};
-/**
- * Reasoning effort level for OpenAI reasoning models.
- *
- * This is a low-level, OpenAI-specific escape hatch. Prefer `thinking` for cross-provider use.
- *
- * Supported values:
- * - **OpenAI** (o1/o3/o4-mini): all values — `'none'`, `'minimal'`, `'low'`, `'medium'`, `'high'`, `'xhigh'`
- * - **OpenAI Responses**: all values (via `reasoning.effort`)
- * - **Anthropic**: not supported
- * - **Gemini**: not supported
- */
-type ReasoningEffort = 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh';
+type Effort = 'none' | 'low' | 'medium' | 'high' | 'max';
 /** Tool choice strategy for LLM calls. */
 type ToolChoice = 'auto' | 'none' | 'required' | {
     type: 'function';
@@ -61,9 +51,24 @@ type ChatOptions = {
     responseFormat?: ResponseFormat;
     stop?: string[];
     signal?: AbortSignal;
-    thinking?: Thinking;
-    reasoningEffort?: ReasoningEffort;
+    /** How hard should the model try? Primary param for cost/quality tradeoff.
+     *  'none' disables thinking/reasoning (Gemini 3.x: maps to minimal).
+     *  Omit to use provider defaults. */
+    effort?: Effort;
+    /** Precise thinking token budget (advanced). When set alongside `effort`, overrides the
+     *  thinking/reasoning allocation. On Anthropic 4.6, `effort` still controls output quality
+     *  independently. On all other providers, `thinkingBudget` fully overrides `effort` for
+     *  reasoning behavior. Set to 0 to disable thinking while keeping effort for output control
+     *  (Anthropic-specific optimization; on other providers, simply disables reasoning). */
+    thinkingBudget?: number;
+    /** Show reasoning summaries in responses (thinking_content / thinking_delta).
+     *  Supported on OpenAI Responses API and Gemini. No-op on Anthropic. */
+    includeThoughts?: boolean;
     toolChoice?: ToolChoice;
+    /** Provider-specific options merged LAST into the raw API request body.
+     *  Can override any computed field including model and messages — use with care.
+     *  NOT portable across providers — use effort/thinkingBudget/includeThoughts for cross-provider behavior. */
+    providerOptions?: Record<string, unknown>;
 };
 /**
  * Response format for structured output (JSON mode).
@@ -86,6 +91,9 @@ type ResponseFormat = {
 type StreamChunk = {
     type: 'text_delta';
     content: string;
+} | {
+    type: 'thinking_delta';
+    content: string;
 } | {
     type: 'tool_call_delta';
     id: string;
@@ -100,6 +108,8 @@ type StreamChunk = {
         reasoning_tokens?: number;
         cached_tokens?: number;
     };
+    /** Provider-specific opaque metadata (e.g. raw Gemini parts with thought signatures). */
+    providerMetadata?: Record<string, unknown>;
 };
 /**
  * Core provider interface. Every LLM adapter must implement this.
@@ -120,6 +130,24 @@ interface Provider {
  * Alias for Provider. Used for backward compatibility with index.ts exports.
  */
 type ProviderAdapter = Provider;
+/** Normalized thinking options computed once, used by all providers. */
+type ResolvedThinkingOptions = {
+    /** Raw effort value from user. */
+    effort: Effort | undefined;
+    /** Raw thinking budget from user. */
+    thinkingBudget: number | undefined;
+    /** Whether to include thought summaries in responses. */
+    includeThoughts: boolean;
+    /** True when thinking/reasoning should be disabled (effort: 'none' or thinkingBudget: 0). */
+    thinkingDisabled: boolean;
+    /** Effort level with 'none' stripped (undefined when effort is 'none' or unset). */
+    activeEffort: Exclude<Effort, 'none'> | undefined;
+    /** True when an explicit positive budget overrides effort-based allocation. */
+    hasBudgetOverride: boolean;
+};
+/** Resolve effort/thinkingBudget/includeThoughts into normalized form.
+ *  Validates inputs and computes derived flags used by all provider adapters. */
+declare function resolveThinkingOptions(options: Pick<ChatOptions, 'effort' | 'thinkingBudget' | 'includeThoughts'>): ResolvedThinkingOptions;
 /** Result type for concurrent operations (spawn, map) */
 type Result<T> = {
@@ -187,14 +215,29 @@ type AskOptions<T = unknown> = {
     temperature?: number;
     /** Override max tokens for this call (default: 4096). */
     maxTokens?: number;
-    /** Thinking level — portable across all providers. Prefer this over `reasoningEffort`. */
-    thinking?: Thinking;
-    /** OpenAI-specific reasoning effort (escape hatch). Overridden by `thinking` if both are set. */
-    reasoningEffort?: ReasoningEffort;
+    /** How hard should the model try? Primary param for cost/quality tradeoff. */
+    effort?: Effort;
+    /** Precise thinking token budget (advanced). Overrides effort-based thinking allocation. */
+    thinkingBudget?: number;
+    /** Show reasoning summaries in responses. */
+    includeThoughts?: boolean;
     /** Tool choice strategy for this call. */
     toolChoice?: ToolChoice;
     /** Stop sequences for this call. */
     stop?: string[];
+    /** Provider-specific options merged into API requests. Not portable across providers. */
+    providerOptions?: Record<string, unknown>;
+};
+/** Delegate options */
+type DelegateOptions<T = unknown> = {
+    /** Zod schema for structured output from the selected agent. */
+    schema?: z.ZodType<T>;
+    /** Model URI for the internal router agent (default: first candidate's model). */
+    routerModel?: string;
+    /** Additional metadata passed to the router and selected agent. */
+    metadata?: Record<string, unknown>;
+    /** Number of retries for structured output validation (passed to the final ask). */
+    retries?: number;
 };
 /** Race options */
 type RaceOptions<T = unknown> = {
@@ -207,7 +250,7 @@ type ExecutionStatus = 'running' | 'completed' | 'failed' | 'waiting';
 type TraceEvent = {
     executionId: string;
     step: number;
-    type: 'agent_call' | 'tool_call' | 'verify' | 'handoff' | 'tool_denied' | 'log' | 'workflow_start' | 'workflow_end' | 'guardrail';
+    type: 'agent_call' | 'tool_call' | 'verify' | 'handoff' | 'delegate' | 'tool_denied' | 'log' | 'workflow_start' | 'workflow_end' | 'guardrail';
     workflow?: string;
     agent?: string;
     tool?: string;
@@ -315,10 +358,13 @@ type AgentCallInfo = {
     promptVersion?: string;
     temperature?: number;
     maxTokens?: number;
-    thinking?: Thinking;
-    reasoningEffort?: ReasoningEffort;
+    effort?: Effort;
+    thinkingBudget?: number;
+    includeThoughts?: boolean;
     toolChoice?: ToolChoice;
     stop?: string[];
+    /** Provider-specific options merged into API requests. Not portable across providers. */
+    providerOptions?: Record<string, unknown>;
 };
 /** Chat message types for provider communication */
 type ChatRole = 'system' | 'user' | 'assistant' | 'tool';
@@ -328,6 +374,8 @@ type ChatMessage = {
     name?: string;
     tool_calls?: ToolCallMessage[];
     tool_call_id?: string;
+    /** Provider-specific opaque metadata that must round-trip through conversation history. */
+    providerMetadata?: Record<string, unknown>;
 };
 type ToolCallMessage = {
     id: string;
@@ -340,6 +388,7 @@ type ToolCallMessage = {
 /** Provider response */
 type ProviderResponse = {
     content: string;
+    thinking_content?: string;
     tool_calls?: ToolCallMessage[];
     usage?: {
         prompt_tokens: number;
@@ -349,6 +398,8 @@ type ProviderResponse = {
         cached_tokens?: number;
     };
     cost?: number;
+    /** Provider-specific opaque metadata that needs to round-trip through conversation history. */
+    providerMetadata?: Record<string, unknown>;
 };
 /** Descriptor for a handoff target agent with optional description. */
@@ -368,15 +419,20 @@ type AgentConfig = {
         metadata?: Record<string, unknown>;
     }) => string);
     tools?: Tool<any, any>[];
-    handoffs?: HandoffDescriptor[];
+    handoffs?: HandoffDescriptor[] | ((ctx: {
+        metadata?: Record<string, unknown>;
+    }) => HandoffDescriptor[]);
     mcp?: string[];
     mcpTools?: string[];
     temperature?: number;
     maxTokens?: number;
-    thinking?: Thinking;
-    reasoningEffort?: ReasoningEffort;
+    effort?: Effort;
+    thinkingBudget?: number;
+    includeThoughts?: boolean;
     toolChoice?: ToolChoice;
     stop?: string[];
+    /** Provider-specific options merged into API requests. Not portable across providers. */
+    providerOptions?: Record<string, unknown>;
     maxTurns?: number;
     timeout?: string;
     maxContext?: number;
@@ -896,6 +952,14 @@ declare class WorkflowContext<TInput = unknown> {
     private onAgentStart?;
     private onAgentCallComplete?;
     constructor(init: WorkflowContextInit);
+    /**
+     * Create a child context for nested agent invocations (e.g., agent-as-tool).
+     * Shares: budget tracking, abort signals, trace emission, provider registry,
+     *         state store, span manager, memory manager, MCP manager, config,
+     *         awaitHuman handler, pending decisions, tool overrides.
+     * Isolates: session history, step counter, streaming callbacks (onToken, onAgentStart, onToolCall).
+     */
+    createChildContext(): WorkflowContext;
     /**
      * Resolve the current abort signal.
      * Branch-scoped signals (from race/spawn/map/budget) in AsyncLocalStorage
@@ -904,6 +968,11 @@ declare class WorkflowContext<TInput = unknown> {
     private get currentSignal();
     ask<T = string>(agent: Agent, prompt: string, options?: AskOptions<T>): Promise<T>;
     private executeAgentCall;
+    /**
+     * Push the final assistant message into session history, preserving providerMetadata
+     * (e.g., Gemini thought signatures needed for multi-turn reasoning context).
+     */
+    private pushAssistantToSessionHistory;
     private buildToolDefs;
     /**
      * Summarize old messages to fit within context window.
@@ -964,6 +1033,18 @@ declare class WorkflowContext<TInput = unknown> {
     forget(key: string, options?: {
         scope?: 'session' | 'global';
     }): Promise<void>;
+    /**
+     * Select the best agent from a list of candidates and invoke it.
+     * Creates a temporary router agent that uses handoffs to pick the right specialist.
+     *
+     * This is convenience sugar over creating a router agent with dynamic handoffs.
+     * For full control over the router's behavior, create the router agent explicitly.
+     *
+     * @param agents - Candidate agents to choose from (at least 1)
+     * @param prompt - The prompt to send to the selected agent
+     * @param options - Optional: schema, routerModel, metadata, retries
+     */
+    delegate<T = string>(agents: Agent[], prompt: string, options?: DelegateOptions<T>): Promise<T>;
     private emitTrace;
 }
@@ -987,7 +1068,7 @@ type ToolConfig<TInput extends z.ZodTypeAny, TOutput = unknown> = {
     name: string;
     description: string;
     input: TInput;
-    handler: (input: z.infer<TInput>) => TOutput | Promise<TOutput>;
+    handler: (input: z.infer<TInput>, ctx: WorkflowContext) => TOutput | Promise<TOutput>;
     retry?: RetryPolicy;
     sensitive?: boolean;
     /** Maximum string length for any string argument. Default: 10000. Set to 0 to disable. */
@@ -1009,7 +1090,7 @@ type Tool<TInput extends z.ZodTypeAny = z.ZodTypeAny, TOutput = unknown> = {
     /** Run the tool directly from workflow code */
     run(ctx: WorkflowContext, input: z.infer<TInput>): Promise<TOutput>;
     /** Execute the handler (internal use — includes retry logic) */
-    _execute(input: z.infer<TInput>): Promise<TOutput>;
+    _execute(input: z.infer<TInput>, ctx?: WorkflowContext): Promise<TOutput>;
 };
 /**
  * Define a tool with Zod-validated input, a handler function, and optional retry policy.
@@ -1198,6 +1279,14 @@ declare class AxlRuntime extends EventEmitter {
     runRegisteredEval(name: string): Promise<unknown>;
     /** Get all execution info (running + completed). */
     getExecutions(): ExecutionInfo[];
+    /**
+     * Create a lightweight WorkflowContext for ad-hoc use (tool testing, prototyping).
+     * The context has access to the runtime's providers, state store, and MCP manager
+     * but no session history, streaming callbacks, or budget tracking.
+     */
+    createContext(options?: {
+        metadata?: Record<string, unknown>;
+    }): WorkflowContext;
     /** Register a custom provider instance. */
     registerProvider(name: string, provider: Provider): void;
     /** Execute a workflow and return the result. */
@@ -1705,4 +1794,4 @@ declare class NoopSpanManager implements SpanManager {
  */
 declare function createSpanManager(config?: TelemetryConfig): Promise<SpanManager>;
-export { type Agent, type AgentCallInfo, type AgentConfig, AnthropicProvider, type AskOptions, type AwaitHumanOptions, type AxlConfig, AxlError, AxlRuntime, AxlStream, BudgetExceededError, type BudgetOptions, type BudgetResult, type ChatMessage, type ChatOptions, type Embedder, type ExecutionInfo, type ExecutionState, GeminiProvider, type GuardrailBlockHandler, GuardrailError, type GuardrailResult, type GuardrailsConfig, type HandoffDescriptor, type HandoffRecord, type HumanDecision, InMemoryVectorStore, type InputGuardrail, type MapOptions, MaxTurnsError, McpManager, type McpServer, type McpServerConfig, type McpToolDefinition, type McpToolResult, type MemoryConfig, MemoryManager, MemoryStore, NoConsensus, NoopSpanManager, OpenAIEmbedder, OpenAIProvider, OpenAIResponsesProvider, type OutputGuardrail, type PendingDecision, type Provider, type ProviderAdapter, ProviderRegistry, type ProviderResponse, QuorumNotMet, type RaceOptions, type ReasoningEffort, type RecallOptions, RedisStore, type RememberOptions, type Result, type RetryPolicy, SQLiteStore, Session, type SessionOptions, type SpanHandle, type SpanManager, type SpawnOptions, SqliteVectorStore, type StateStore, type StreamChunk, type StreamEvent, type TelemetryConfig, type Thinking, TimeoutError, type Tool, type ToolCallMessage, type ToolChoice, type ToolConfig, ToolDenied, type ToolHooks, type TraceEvent, type VectorEntry, type VectorResult, type VectorStore, VerifyError, type VerifyOptions, type VoteOptions, type Workflow, type WorkflowConfig, WorkflowContext, type WorkflowContextInit, agent, createSpanManager, defineConfig, tool, workflow, zodToJsonSchema };
+export { type Agent, type AgentCallInfo, type AgentConfig, AnthropicProvider, type AskOptions, type AwaitHumanOptions, type AxlConfig, AxlError, AxlRuntime, AxlStream, BudgetExceededError, type BudgetOptions, type BudgetResult, type ChatMessage, type ChatOptions, type DelegateOptions, type Effort, type Embedder, type ExecutionInfo, type ExecutionState, GeminiProvider, type GuardrailBlockHandler, GuardrailError, type GuardrailResult, type GuardrailsConfig, type HandoffDescriptor, type HandoffRecord, type HumanDecision, InMemoryVectorStore, type InputGuardrail, type MapOptions, MaxTurnsError, McpManager, type McpServer, type McpServerConfig, type McpToolDefinition, type McpToolResult, type MemoryConfig, MemoryManager, MemoryStore, NoConsensus, NoopSpanManager, OpenAIEmbedder, OpenAIProvider, OpenAIResponsesProvider, type OutputGuardrail, type PendingDecision, type Provider, type ProviderAdapter, ProviderRegistry, type ProviderResponse, QuorumNotMet, type RaceOptions, type RecallOptions, RedisStore, type RememberOptions, type ResolvedThinkingOptions, type Result, type RetryPolicy, SQLiteStore, Session, type SessionOptions, type SpanHandle, type SpanManager, type SpawnOptions, SqliteVectorStore, type StateStore, type StreamChunk, type StreamEvent, type TelemetryConfig, TimeoutError, type Tool, type ToolCallMessage, type ToolChoice, type ToolConfig, ToolDenied, type ToolHooks, type TraceEvent, type VectorEntry, type VectorResult, type VectorStore, VerifyError, type VerifyOptions, type VoteOptions, type Workflow, type WorkflowConfig, WorkflowContext, type WorkflowContextInit, agent, createSpanManager, defineConfig, resolveThinkingOptions, tool, workflow, zodToJsonSchema };

package/dist/index.d.ts CHANGED Viewed

@@ -16,33 +16,23 @@ type ToolDefinition = {
     };
 };
 /**
- * Unified thinking/reasoning level that works across all providers.
+ * Unified effort level controlling how thoroughly the model responds.
  *
- * Simple form (`'low' | 'medium' | 'high' | 'max'`) is portable across all providers:
- * - **OpenAI** (o1/o3/o4-mini): maps to `reasoning_effort` (`'max'` → `'xhigh'`)
- * - **OpenAI Responses**: maps to `reasoning.effort` (`'max'` → `'xhigh'`)
- * - **Anthropic** (4.6): maps to adaptive mode + `output_config.effort`
- * - **Anthropic** (older): maps to `thinking.budget_tokens` (`'max'` → `32000`)
- * - **Gemini** (2.5+): maps to `generationConfig.thinkingConfig.thinkingBudget` (`'max'` → `24576`)
+ * - `'none'` — Disable thinking/reasoning. On Gemini 3.x, maps to the model's
+ *   minimum thinking level (3.1 Pro: 'low', others: 'minimal'). On other providers,
+ *   fully disables reasoning.
+ * - `'low'` through `'max'` — Increasing levels of reasoning depth and token spend.
  *
- * Budget form (`{ budgetTokens: number }`) gives explicit control over thinking tokens.
- * For OpenAI, budget is mapped to the nearest effort level.
+ * Provider mapping:
+ * - Anthropic 4.6: adaptive thinking + output_config.effort
+ * - Anthropic Opus 4.5: output_config.effort (no adaptive)
+ * - Anthropic older: thinking.budget_tokens fallback
+ * - OpenAI o-series: reasoning_effort
+ * - OpenAI GPT-5.x: reasoning.effort / reasoning_effort
+ * - Gemini 3.x: thinkingLevel (`'none'` → model min: `'minimal'` or `'low'` for 3.1 Pro)
+ * - Gemini 2.x: thinkingBudget (`'none'` → 0; some models have minimums)
  */
-type Thinking = 'low' | 'medium' | 'high' | 'max' | {
-    budgetTokens: number;
-};
-/**
- * Reasoning effort level for OpenAI reasoning models.
- *
- * This is a low-level, OpenAI-specific escape hatch. Prefer `thinking` for cross-provider use.
- *
- * Supported values:
- * - **OpenAI** (o1/o3/o4-mini): all values — `'none'`, `'minimal'`, `'low'`, `'medium'`, `'high'`, `'xhigh'`
- * - **OpenAI Responses**: all values (via `reasoning.effort`)
- * - **Anthropic**: not supported
- * - **Gemini**: not supported
- */
-type ReasoningEffort = 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh';
+type Effort = 'none' | 'low' | 'medium' | 'high' | 'max';
 /** Tool choice strategy for LLM calls. */
 type ToolChoice = 'auto' | 'none' | 'required' | {
     type: 'function';
@@ -61,9 +51,24 @@ type ChatOptions = {
     responseFormat?: ResponseFormat;
     stop?: string[];
     signal?: AbortSignal;
-    thinking?: Thinking;
-    reasoningEffort?: ReasoningEffort;
+    /** How hard should the model try? Primary param for cost/quality tradeoff.
+     *  'none' disables thinking/reasoning (Gemini 3.x: maps to minimal).
+     *  Omit to use provider defaults. */
+    effort?: Effort;
+    /** Precise thinking token budget (advanced). When set alongside `effort`, overrides the
+     *  thinking/reasoning allocation. On Anthropic 4.6, `effort` still controls output quality
+     *  independently. On all other providers, `thinkingBudget` fully overrides `effort` for
+     *  reasoning behavior. Set to 0 to disable thinking while keeping effort for output control
+     *  (Anthropic-specific optimization; on other providers, simply disables reasoning). */
+    thinkingBudget?: number;
+    /** Show reasoning summaries in responses (thinking_content / thinking_delta).
+     *  Supported on OpenAI Responses API and Gemini. No-op on Anthropic. */
+    includeThoughts?: boolean;
     toolChoice?: ToolChoice;
+    /** Provider-specific options merged LAST into the raw API request body.
+     *  Can override any computed field including model and messages — use with care.
+     *  NOT portable across providers — use effort/thinkingBudget/includeThoughts for cross-provider behavior. */
+    providerOptions?: Record<string, unknown>;
 };
 /**
  * Response format for structured output (JSON mode).
@@ -86,6 +91,9 @@ type ResponseFormat = {
 type StreamChunk = {
     type: 'text_delta';
     content: string;
+} | {
+    type: 'thinking_delta';
+    content: string;
 } | {
     type: 'tool_call_delta';
     id: string;
@@ -100,6 +108,8 @@ type StreamChunk = {
         reasoning_tokens?: number;
         cached_tokens?: number;
     };
+    /** Provider-specific opaque metadata (e.g. raw Gemini parts with thought signatures). */
+    providerMetadata?: Record<string, unknown>;
 };
 /**
  * Core provider interface. Every LLM adapter must implement this.
@@ -120,6 +130,24 @@ interface Provider {
  * Alias for Provider. Used for backward compatibility with index.ts exports.
  */
 type ProviderAdapter = Provider;
+/** Normalized thinking options computed once, used by all providers. */
+type ResolvedThinkingOptions = {
+    /** Raw effort value from user. */
+    effort: Effort | undefined;
+    /** Raw thinking budget from user. */
+    thinkingBudget: number | undefined;
+    /** Whether to include thought summaries in responses. */
+    includeThoughts: boolean;
+    /** True when thinking/reasoning should be disabled (effort: 'none' or thinkingBudget: 0). */
+    thinkingDisabled: boolean;
+    /** Effort level with 'none' stripped (undefined when effort is 'none' or unset). */
+    activeEffort: Exclude<Effort, 'none'> | undefined;
+    /** True when an explicit positive budget overrides effort-based allocation. */
+    hasBudgetOverride: boolean;
+};
+/** Resolve effort/thinkingBudget/includeThoughts into normalized form.
+ *  Validates inputs and computes derived flags used by all provider adapters. */
+declare function resolveThinkingOptions(options: Pick<ChatOptions, 'effort' | 'thinkingBudget' | 'includeThoughts'>): ResolvedThinkingOptions;
 /** Result type for concurrent operations (spawn, map) */
 type Result<T> = {
@@ -187,14 +215,29 @@ type AskOptions<T = unknown> = {
     temperature?: number;
     /** Override max tokens for this call (default: 4096). */
     maxTokens?: number;
-    /** Thinking level — portable across all providers. Prefer this over `reasoningEffort`. */
-    thinking?: Thinking;
-    /** OpenAI-specific reasoning effort (escape hatch). Overridden by `thinking` if both are set. */
-    reasoningEffort?: ReasoningEffort;
+    /** How hard should the model try? Primary param for cost/quality tradeoff. */
+    effort?: Effort;
+    /** Precise thinking token budget (advanced). Overrides effort-based thinking allocation. */
+    thinkingBudget?: number;
+    /** Show reasoning summaries in responses. */
+    includeThoughts?: boolean;
     /** Tool choice strategy for this call. */
     toolChoice?: ToolChoice;
     /** Stop sequences for this call. */
     stop?: string[];
+    /** Provider-specific options merged into API requests. Not portable across providers. */
+    providerOptions?: Record<string, unknown>;
+};
+/** Delegate options */
+type DelegateOptions<T = unknown> = {
+    /** Zod schema for structured output from the selected agent. */
+    schema?: z.ZodType<T>;
+    /** Model URI for the internal router agent (default: first candidate's model). */
+    routerModel?: string;
+    /** Additional metadata passed to the router and selected agent. */
+    metadata?: Record<string, unknown>;
+    /** Number of retries for structured output validation (passed to the final ask). */
+    retries?: number;
 };
 /** Race options */
 type RaceOptions<T = unknown> = {
@@ -207,7 +250,7 @@ type ExecutionStatus = 'running' | 'completed' | 'failed' | 'waiting';
 type TraceEvent = {
     executionId: string;
     step: number;
-    type: 'agent_call' | 'tool_call' | 'verify' | 'handoff' | 'tool_denied' | 'log' | 'workflow_start' | 'workflow_end' | 'guardrail';
+    type: 'agent_call' | 'tool_call' | 'verify' | 'handoff' | 'delegate' | 'tool_denied' | 'log' | 'workflow_start' | 'workflow_end' | 'guardrail';
     workflow?: string;
     agent?: string;
     tool?: string;
@@ -315,10 +358,13 @@ type AgentCallInfo = {
     promptVersion?: string;
     temperature?: number;
     maxTokens?: number;
-    thinking?: Thinking;
-    reasoningEffort?: ReasoningEffort;
+    effort?: Effort;
+    thinkingBudget?: number;
+    includeThoughts?: boolean;
     toolChoice?: ToolChoice;
     stop?: string[];
+    /** Provider-specific options merged into API requests. Not portable across providers. */
+    providerOptions?: Record<string, unknown>;
 };
 /** Chat message types for provider communication */
 type ChatRole = 'system' | 'user' | 'assistant' | 'tool';
@@ -328,6 +374,8 @@ type ChatMessage = {
     name?: string;
     tool_calls?: ToolCallMessage[];
     tool_call_id?: string;
+    /** Provider-specific opaque metadata that must round-trip through conversation history. */
+    providerMetadata?: Record<string, unknown>;
 };
 type ToolCallMessage = {
     id: string;
@@ -340,6 +388,7 @@ type ToolCallMessage = {
 /** Provider response */
 type ProviderResponse = {
     content: string;
+    thinking_content?: string;
     tool_calls?: ToolCallMessage[];
     usage?: {
         prompt_tokens: number;
@@ -349,6 +398,8 @@ type ProviderResponse = {
         cached_tokens?: number;
     };
     cost?: number;
+    /** Provider-specific opaque metadata that needs to round-trip through conversation history. */
+    providerMetadata?: Record<string, unknown>;
 };
 /** Descriptor for a handoff target agent with optional description. */
@@ -368,15 +419,20 @@ type AgentConfig = {
         metadata?: Record<string, unknown>;
     }) => string);
     tools?: Tool<any, any>[];
-    handoffs?: HandoffDescriptor[];
+    handoffs?: HandoffDescriptor[] | ((ctx: {
+        metadata?: Record<string, unknown>;
+    }) => HandoffDescriptor[]);
     mcp?: string[];
     mcpTools?: string[];
     temperature?: number;
     maxTokens?: number;
-    thinking?: Thinking;
-    reasoningEffort?: ReasoningEffort;
+    effort?: Effort;
+    thinkingBudget?: number;
+    includeThoughts?: boolean;
     toolChoice?: ToolChoice;
     stop?: string[];
+    /** Provider-specific options merged into API requests. Not portable across providers. */
+    providerOptions?: Record<string, unknown>;
     maxTurns?: number;
     timeout?: string;
     maxContext?: number;
@@ -896,6 +952,14 @@ declare class WorkflowContext<TInput = unknown> {
     private onAgentStart?;
     private onAgentCallComplete?;
     constructor(init: WorkflowContextInit);
+    /**
+     * Create a child context for nested agent invocations (e.g., agent-as-tool).
+     * Shares: budget tracking, abort signals, trace emission, provider registry,
+     *         state store, span manager, memory manager, MCP manager, config,
+     *         awaitHuman handler, pending decisions, tool overrides.
+     * Isolates: session history, step counter, streaming callbacks (onToken, onAgentStart, onToolCall).
+     */
+    createChildContext(): WorkflowContext;
     /**
      * Resolve the current abort signal.
      * Branch-scoped signals (from race/spawn/map/budget) in AsyncLocalStorage
@@ -904,6 +968,11 @@ declare class WorkflowContext<TInput = unknown> {
     private get currentSignal();
     ask<T = string>(agent: Agent, prompt: string, options?: AskOptions<T>): Promise<T>;
     private executeAgentCall;
+    /**
+     * Push the final assistant message into session history, preserving providerMetadata
+     * (e.g., Gemini thought signatures needed for multi-turn reasoning context).
+     */
+    private pushAssistantToSessionHistory;
     private buildToolDefs;
     /**
      * Summarize old messages to fit within context window.
@@ -964,6 +1033,18 @@ declare class WorkflowContext<TInput = unknown> {
     forget(key: string, options?: {
         scope?: 'session' | 'global';
     }): Promise<void>;
+    /**
+     * Select the best agent from a list of candidates and invoke it.
+     * Creates a temporary router agent that uses handoffs to pick the right specialist.
+     *
+     * This is convenience sugar over creating a router agent with dynamic handoffs.
+     * For full control over the router's behavior, create the router agent explicitly.
+     *
+     * @param agents - Candidate agents to choose from (at least 1)
+     * @param prompt - The prompt to send to the selected agent
+     * @param options - Optional: schema, routerModel, metadata, retries
+     */
+    delegate<T = string>(agents: Agent[], prompt: string, options?: DelegateOptions<T>): Promise<T>;
     private emitTrace;
 }
@@ -987,7 +1068,7 @@ type ToolConfig<TInput extends z.ZodTypeAny, TOutput = unknown> = {
     name: string;
     description: string;
     input: TInput;
-    handler: (input: z.infer<TInput>) => TOutput | Promise<TOutput>;
+    handler: (input: z.infer<TInput>, ctx: WorkflowContext) => TOutput | Promise<TOutput>;
     retry?: RetryPolicy;
     sensitive?: boolean;
     /** Maximum string length for any string argument. Default: 10000. Set to 0 to disable. */
@@ -1009,7 +1090,7 @@ type Tool<TInput extends z.ZodTypeAny = z.ZodTypeAny, TOutput = unknown> = {
     /** Run the tool directly from workflow code */
     run(ctx: WorkflowContext, input: z.infer<TInput>): Promise<TOutput>;
     /** Execute the handler (internal use — includes retry logic) */
-    _execute(input: z.infer<TInput>): Promise<TOutput>;
+    _execute(input: z.infer<TInput>, ctx?: WorkflowContext): Promise<TOutput>;
 };
 /**
  * Define a tool with Zod-validated input, a handler function, and optional retry policy.
@@ -1198,6 +1279,14 @@ declare class AxlRuntime extends EventEmitter {
     runRegisteredEval(name: string): Promise<unknown>;
     /** Get all execution info (running + completed). */
     getExecutions(): ExecutionInfo[];
+    /**
+     * Create a lightweight WorkflowContext for ad-hoc use (tool testing, prototyping).
+     * The context has access to the runtime's providers, state store, and MCP manager
+     * but no session history, streaming callbacks, or budget tracking.
+     */
+    createContext(options?: {
+        metadata?: Record<string, unknown>;
+    }): WorkflowContext;
     /** Register a custom provider instance. */
     registerProvider(name: string, provider: Provider): void;
     /** Execute a workflow and return the result. */
@@ -1705,4 +1794,4 @@ declare class NoopSpanManager implements SpanManager {
  */
 declare function createSpanManager(config?: TelemetryConfig): Promise<SpanManager>;
-export { type Agent, type AgentCallInfo, type AgentConfig, AnthropicProvider, type AskOptions, type AwaitHumanOptions, type AxlConfig, AxlError, AxlRuntime, AxlStream, BudgetExceededError, type BudgetOptions, type BudgetResult, type ChatMessage, type ChatOptions, type Embedder, type ExecutionInfo, type ExecutionState, GeminiProvider, type GuardrailBlockHandler, GuardrailError, type GuardrailResult, type GuardrailsConfig, type HandoffDescriptor, type HandoffRecord, type HumanDecision, InMemoryVectorStore, type InputGuardrail, type MapOptions, MaxTurnsError, McpManager, type McpServer, type McpServerConfig, type McpToolDefinition, type McpToolResult, type MemoryConfig, MemoryManager, MemoryStore, NoConsensus, NoopSpanManager, OpenAIEmbedder, OpenAIProvider, OpenAIResponsesProvider, type OutputGuardrail, type PendingDecision, type Provider, type ProviderAdapter, ProviderRegistry, type ProviderResponse, QuorumNotMet, type RaceOptions, type ReasoningEffort, type RecallOptions, RedisStore, type RememberOptions, type Result, type RetryPolicy, SQLiteStore, Session, type SessionOptions, type SpanHandle, type SpanManager, type SpawnOptions, SqliteVectorStore, type StateStore, type StreamChunk, type StreamEvent, type TelemetryConfig, type Thinking, TimeoutError, type Tool, type ToolCallMessage, type ToolChoice, type ToolConfig, ToolDenied, type ToolHooks, type TraceEvent, type VectorEntry, type VectorResult, type VectorStore, VerifyError, type VerifyOptions, type VoteOptions, type Workflow, type WorkflowConfig, WorkflowContext, type WorkflowContextInit, agent, createSpanManager, defineConfig, tool, workflow, zodToJsonSchema };
+export { type Agent, type AgentCallInfo, type AgentConfig, AnthropicProvider, type AskOptions, type AwaitHumanOptions, type AxlConfig, AxlError, AxlRuntime, AxlStream, BudgetExceededError, type BudgetOptions, type BudgetResult, type ChatMessage, type ChatOptions, type DelegateOptions, type Effort, type Embedder, type ExecutionInfo, type ExecutionState, GeminiProvider, type GuardrailBlockHandler, GuardrailError, type GuardrailResult, type GuardrailsConfig, type HandoffDescriptor, type HandoffRecord, type HumanDecision, InMemoryVectorStore, type InputGuardrail, type MapOptions, MaxTurnsError, McpManager, type McpServer, type McpServerConfig, type McpToolDefinition, type McpToolResult, type MemoryConfig, MemoryManager, MemoryStore, NoConsensus, NoopSpanManager, OpenAIEmbedder, OpenAIProvider, OpenAIResponsesProvider, type OutputGuardrail, type PendingDecision, type Provider, type ProviderAdapter, ProviderRegistry, type ProviderResponse, QuorumNotMet, type RaceOptions, type RecallOptions, RedisStore, type RememberOptions, type ResolvedThinkingOptions, type Result, type RetryPolicy, SQLiteStore, Session, type SessionOptions, type SpanHandle, type SpanManager, type SpawnOptions, SqliteVectorStore, type StateStore, type StreamChunk, type StreamEvent, type TelemetryConfig, TimeoutError, type Tool, type ToolCallMessage, type ToolChoice, type ToolConfig, ToolDenied, type ToolHooks, type TraceEvent, type VectorEntry, type VectorResult, type VectorStore, VerifyError, type VerifyOptions, type VoteOptions, type Workflow, type WorkflowConfig, WorkflowContext, type WorkflowContextInit, agent, createSpanManager, defineConfig, resolveThinkingOptions, tool, workflow, zodToJsonSchema };