@axlsdk/axl 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -16,33 +16,23 @@ type ToolDefinition = {
16
16
  };
17
17
  };
18
18
  /**
19
- * Unified thinking/reasoning level that works across all providers.
19
+ * Unified effort level controlling how thoroughly the model responds.
20
20
  *
21
- * Simple form (`'low' | 'medium' | 'high' | 'max'`) is portable across all providers:
22
- * - **OpenAI** (o1/o3/o4-mini): maps to `reasoning_effort` (`'max'` `'xhigh'`)
23
- * - **OpenAI Responses**: maps to `reasoning.effort` (`'max'` → `'xhigh'`)
24
- * - **Anthropic** (4.6): maps to adaptive mode + `output_config.effort`
25
- * - **Anthropic** (older): maps to `thinking.budget_tokens` (`'max'` → `32000`)
26
- * - **Gemini** (2.5+): maps to `generationConfig.thinkingConfig.thinkingBudget` (`'max'` → `24576`)
21
+ * - `'none'` Disable thinking/reasoning. On Gemini 3.x, maps to the model's
22
+ * minimum thinking level (3.1 Pro: 'low', others: 'minimal'). On other providers,
23
+ * fully disables reasoning.
24
+ * - `'low'` through `'max'` Increasing levels of reasoning depth and token spend.
27
25
  *
28
- * Budget form (`{ budgetTokens: number }`) gives explicit control over thinking tokens.
29
- * For OpenAI, budget is mapped to the nearest effort level.
26
+ * Provider mapping:
27
+ * - Anthropic 4.6: adaptive thinking + output_config.effort
28
+ * - Anthropic Opus 4.5: output_config.effort (no adaptive)
29
+ * - Anthropic older: thinking.budget_tokens fallback
30
+ * - OpenAI o-series: reasoning_effort
31
+ * - OpenAI GPT-5.x: reasoning.effort / reasoning_effort
32
+ * - Gemini 3.x: thinkingLevel (`'none'` → model min: `'minimal'` or `'low'` for 3.1 Pro)
33
+ * - Gemini 2.x: thinkingBudget (`'none'` → 0; some models have minimums)
30
34
  */
31
- type Thinking = 'low' | 'medium' | 'high' | 'max' | {
32
- budgetTokens: number;
33
- };
34
- /**
35
- * Reasoning effort level for OpenAI reasoning models.
36
- *
37
- * This is a low-level, OpenAI-specific escape hatch. Prefer `thinking` for cross-provider use.
38
- *
39
- * Supported values:
40
- * - **OpenAI** (o1/o3/o4-mini): all values — `'none'`, `'minimal'`, `'low'`, `'medium'`, `'high'`, `'xhigh'`
41
- * - **OpenAI Responses**: all values (via `reasoning.effort`)
42
- * - **Anthropic**: not supported
43
- * - **Gemini**: not supported
44
- */
45
- type ReasoningEffort = 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh';
35
+ type Effort = 'none' | 'low' | 'medium' | 'high' | 'max';
46
36
  /** Tool choice strategy for LLM calls. */
47
37
  type ToolChoice = 'auto' | 'none' | 'required' | {
48
38
  type: 'function';
@@ -61,9 +51,24 @@ type ChatOptions = {
61
51
  responseFormat?: ResponseFormat;
62
52
  stop?: string[];
63
53
  signal?: AbortSignal;
64
- thinking?: Thinking;
65
- reasoningEffort?: ReasoningEffort;
54
+ /** How hard should the model try? Primary param for cost/quality tradeoff.
55
+ * 'none' disables thinking/reasoning (Gemini 3.x: maps to minimal).
56
+ * Omit to use provider defaults. */
57
+ effort?: Effort;
58
+ /** Precise thinking token budget (advanced). When set alongside `effort`, overrides the
59
+ * thinking/reasoning allocation. On Anthropic 4.6, `effort` still controls output quality
60
+ * independently. On all other providers, `thinkingBudget` fully overrides `effort` for
61
+ * reasoning behavior. Set to 0 to disable thinking while keeping effort for output control
62
+ * (Anthropic-specific optimization; on other providers, simply disables reasoning). */
63
+ thinkingBudget?: number;
64
+ /** Show reasoning summaries in responses (thinking_content / thinking_delta).
65
+ * Supported on OpenAI Responses API and Gemini. No-op on Anthropic. */
66
+ includeThoughts?: boolean;
66
67
  toolChoice?: ToolChoice;
68
+ /** Provider-specific options merged LAST into the raw API request body.
69
+ * Can override any computed field including model and messages — use with care.
70
+ * NOT portable across providers — use effort/thinkingBudget/includeThoughts for cross-provider behavior. */
71
+ providerOptions?: Record<string, unknown>;
67
72
  };
68
73
  /**
69
74
  * Response format for structured output (JSON mode).
@@ -86,6 +91,9 @@ type ResponseFormat = {
86
91
  type StreamChunk = {
87
92
  type: 'text_delta';
88
93
  content: string;
94
+ } | {
95
+ type: 'thinking_delta';
96
+ content: string;
89
97
  } | {
90
98
  type: 'tool_call_delta';
91
99
  id: string;
@@ -100,6 +108,8 @@ type StreamChunk = {
100
108
  reasoning_tokens?: number;
101
109
  cached_tokens?: number;
102
110
  };
111
+ /** Provider-specific opaque metadata (e.g. raw Gemini parts with thought signatures). */
112
+ providerMetadata?: Record<string, unknown>;
103
113
  };
104
114
  /**
105
115
  * Core provider interface. Every LLM adapter must implement this.
@@ -120,6 +130,24 @@ interface Provider {
120
130
  * Alias for Provider. Used for backward compatibility with index.ts exports.
121
131
  */
122
132
  type ProviderAdapter = Provider;
133
+ /** Normalized thinking options computed once, used by all providers. */
134
+ type ResolvedThinkingOptions = {
135
+ /** Raw effort value from user. */
136
+ effort: Effort | undefined;
137
+ /** Raw thinking budget from user. */
138
+ thinkingBudget: number | undefined;
139
+ /** Whether to include thought summaries in responses. */
140
+ includeThoughts: boolean;
141
+ /** True when thinking/reasoning should be disabled (effort: 'none' or thinkingBudget: 0). */
142
+ thinkingDisabled: boolean;
143
+ /** Effort level with 'none' stripped (undefined when effort is 'none' or unset). */
144
+ activeEffort: Exclude<Effort, 'none'> | undefined;
145
+ /** True when an explicit positive budget overrides effort-based allocation. */
146
+ hasBudgetOverride: boolean;
147
+ };
148
+ /** Resolve effort/thinkingBudget/includeThoughts into normalized form.
149
+ * Validates inputs and computes derived flags used by all provider adapters. */
150
+ declare function resolveThinkingOptions(options: Pick<ChatOptions, 'effort' | 'thinkingBudget' | 'includeThoughts'>): ResolvedThinkingOptions;
123
151
 
124
152
  /** Result type for concurrent operations (spawn, map) */
125
153
  type Result<T> = {
@@ -187,14 +215,29 @@ type AskOptions<T = unknown> = {
187
215
  temperature?: number;
188
216
  /** Override max tokens for this call (default: 4096). */
189
217
  maxTokens?: number;
190
- /** Thinking level portable across all providers. Prefer this over `reasoningEffort`. */
191
- thinking?: Thinking;
192
- /** OpenAI-specific reasoning effort (escape hatch). Overridden by `thinking` if both are set. */
193
- reasoningEffort?: ReasoningEffort;
218
+ /** How hard should the model try? Primary param for cost/quality tradeoff. */
219
+ effort?: Effort;
220
+ /** Precise thinking token budget (advanced). Overrides effort-based thinking allocation. */
221
+ thinkingBudget?: number;
222
+ /** Show reasoning summaries in responses. */
223
+ includeThoughts?: boolean;
194
224
  /** Tool choice strategy for this call. */
195
225
  toolChoice?: ToolChoice;
196
226
  /** Stop sequences for this call. */
197
227
  stop?: string[];
228
+ /** Provider-specific options merged into API requests. Not portable across providers. */
229
+ providerOptions?: Record<string, unknown>;
230
+ };
231
+ /** Delegate options */
232
+ type DelegateOptions<T = unknown> = {
233
+ /** Zod schema for structured output from the selected agent. */
234
+ schema?: z.ZodType<T>;
235
+ /** Model URI for the internal router agent (default: first candidate's model). */
236
+ routerModel?: string;
237
+ /** Additional metadata passed to the router and selected agent. */
238
+ metadata?: Record<string, unknown>;
239
+ /** Number of retries for structured output validation (passed to the final ask). */
240
+ retries?: number;
198
241
  };
199
242
  /** Race options */
200
243
  type RaceOptions<T = unknown> = {
@@ -207,7 +250,7 @@ type ExecutionStatus = 'running' | 'completed' | 'failed' | 'waiting';
207
250
  type TraceEvent = {
208
251
  executionId: string;
209
252
  step: number;
210
- type: 'agent_call' | 'tool_call' | 'verify' | 'handoff' | 'tool_denied' | 'log' | 'workflow_start' | 'workflow_end' | 'guardrail';
253
+ type: 'agent_call' | 'tool_call' | 'verify' | 'handoff' | 'delegate' | 'tool_denied' | 'log' | 'workflow_start' | 'workflow_end' | 'guardrail';
211
254
  workflow?: string;
212
255
  agent?: string;
213
256
  tool?: string;
@@ -315,10 +358,13 @@ type AgentCallInfo = {
315
358
  promptVersion?: string;
316
359
  temperature?: number;
317
360
  maxTokens?: number;
318
- thinking?: Thinking;
319
- reasoningEffort?: ReasoningEffort;
361
+ effort?: Effort;
362
+ thinkingBudget?: number;
363
+ includeThoughts?: boolean;
320
364
  toolChoice?: ToolChoice;
321
365
  stop?: string[];
366
+ /** Provider-specific options merged into API requests. Not portable across providers. */
367
+ providerOptions?: Record<string, unknown>;
322
368
  };
323
369
  /** Chat message types for provider communication */
324
370
  type ChatRole = 'system' | 'user' | 'assistant' | 'tool';
@@ -328,6 +374,8 @@ type ChatMessage = {
328
374
  name?: string;
329
375
  tool_calls?: ToolCallMessage[];
330
376
  tool_call_id?: string;
377
+ /** Provider-specific opaque metadata that must round-trip through conversation history. */
378
+ providerMetadata?: Record<string, unknown>;
331
379
  };
332
380
  type ToolCallMessage = {
333
381
  id: string;
@@ -340,6 +388,7 @@ type ToolCallMessage = {
340
388
  /** Provider response */
341
389
  type ProviderResponse = {
342
390
  content: string;
391
+ thinking_content?: string;
343
392
  tool_calls?: ToolCallMessage[];
344
393
  usage?: {
345
394
  prompt_tokens: number;
@@ -349,6 +398,8 @@ type ProviderResponse = {
349
398
  cached_tokens?: number;
350
399
  };
351
400
  cost?: number;
401
+ /** Provider-specific opaque metadata that needs to round-trip through conversation history. */
402
+ providerMetadata?: Record<string, unknown>;
352
403
  };
353
404
 
354
405
  /** Descriptor for a handoff target agent with optional description. */
@@ -368,15 +419,20 @@ type AgentConfig = {
368
419
  metadata?: Record<string, unknown>;
369
420
  }) => string);
370
421
  tools?: Tool<any, any>[];
371
- handoffs?: HandoffDescriptor[];
422
+ handoffs?: HandoffDescriptor[] | ((ctx: {
423
+ metadata?: Record<string, unknown>;
424
+ }) => HandoffDescriptor[]);
372
425
  mcp?: string[];
373
426
  mcpTools?: string[];
374
427
  temperature?: number;
375
428
  maxTokens?: number;
376
- thinking?: Thinking;
377
- reasoningEffort?: ReasoningEffort;
429
+ effort?: Effort;
430
+ thinkingBudget?: number;
431
+ includeThoughts?: boolean;
378
432
  toolChoice?: ToolChoice;
379
433
  stop?: string[];
434
+ /** Provider-specific options merged into API requests. Not portable across providers. */
435
+ providerOptions?: Record<string, unknown>;
380
436
  maxTurns?: number;
381
437
  timeout?: string;
382
438
  maxContext?: number;
@@ -896,6 +952,14 @@ declare class WorkflowContext<TInput = unknown> {
896
952
  private onAgentStart?;
897
953
  private onAgentCallComplete?;
898
954
  constructor(init: WorkflowContextInit);
955
+ /**
956
+ * Create a child context for nested agent invocations (e.g., agent-as-tool).
957
+ * Shares: budget tracking, abort signals, trace emission, provider registry,
958
+ * state store, span manager, memory manager, MCP manager, config,
959
+ * awaitHuman handler, pending decisions, tool overrides.
960
+ * Isolates: session history, step counter, streaming callbacks (onToken, onAgentStart, onToolCall).
961
+ */
962
+ createChildContext(): WorkflowContext;
899
963
  /**
900
964
  * Resolve the current abort signal.
901
965
  * Branch-scoped signals (from race/spawn/map/budget) in AsyncLocalStorage
@@ -904,6 +968,11 @@ declare class WorkflowContext<TInput = unknown> {
904
968
  private get currentSignal();
905
969
  ask<T = string>(agent: Agent, prompt: string, options?: AskOptions<T>): Promise<T>;
906
970
  private executeAgentCall;
971
+ /**
972
+ * Push the final assistant message into session history, preserving providerMetadata
973
+ * (e.g., Gemini thought signatures needed for multi-turn reasoning context).
974
+ */
975
+ private pushAssistantToSessionHistory;
907
976
  private buildToolDefs;
908
977
  /**
909
978
  * Summarize old messages to fit within context window.
@@ -964,6 +1033,18 @@ declare class WorkflowContext<TInput = unknown> {
964
1033
  forget(key: string, options?: {
965
1034
  scope?: 'session' | 'global';
966
1035
  }): Promise<void>;
1036
+ /**
1037
+ * Select the best agent from a list of candidates and invoke it.
1038
+ * Creates a temporary router agent that uses handoffs to pick the right specialist.
1039
+ *
1040
+ * This is convenience sugar over creating a router agent with dynamic handoffs.
1041
+ * For full control over the router's behavior, create the router agent explicitly.
1042
+ *
1043
+ * @param agents - Candidate agents to choose from (at least 1)
1044
+ * @param prompt - The prompt to send to the selected agent
1045
+ * @param options - Optional: schema, routerModel, metadata, retries
1046
+ */
1047
+ delegate<T = string>(agents: Agent[], prompt: string, options?: DelegateOptions<T>): Promise<T>;
967
1048
  private emitTrace;
968
1049
  }
969
1050
 
@@ -987,7 +1068,7 @@ type ToolConfig<TInput extends z.ZodTypeAny, TOutput = unknown> = {
987
1068
  name: string;
988
1069
  description: string;
989
1070
  input: TInput;
990
- handler: (input: z.infer<TInput>) => TOutput | Promise<TOutput>;
1071
+ handler: (input: z.infer<TInput>, ctx: WorkflowContext) => TOutput | Promise<TOutput>;
991
1072
  retry?: RetryPolicy;
992
1073
  sensitive?: boolean;
993
1074
  /** Maximum string length for any string argument. Default: 10000. Set to 0 to disable. */
@@ -1009,7 +1090,7 @@ type Tool<TInput extends z.ZodTypeAny = z.ZodTypeAny, TOutput = unknown> = {
1009
1090
  /** Run the tool directly from workflow code */
1010
1091
  run(ctx: WorkflowContext, input: z.infer<TInput>): Promise<TOutput>;
1011
1092
  /** Execute the handler (internal use — includes retry logic) */
1012
- _execute(input: z.infer<TInput>): Promise<TOutput>;
1093
+ _execute(input: z.infer<TInput>, ctx?: WorkflowContext): Promise<TOutput>;
1013
1094
  };
1014
1095
  /**
1015
1096
  * Define a tool with Zod-validated input, a handler function, and optional retry policy.
@@ -1198,6 +1279,14 @@ declare class AxlRuntime extends EventEmitter {
1198
1279
  runRegisteredEval(name: string): Promise<unknown>;
1199
1280
  /** Get all execution info (running + completed). */
1200
1281
  getExecutions(): ExecutionInfo[];
1282
+ /**
1283
+ * Create a lightweight WorkflowContext for ad-hoc use (tool testing, prototyping).
1284
+ * The context has access to the runtime's providers, state store, and MCP manager
1285
+ * but no session history, streaming callbacks, or budget tracking.
1286
+ */
1287
+ createContext(options?: {
1288
+ metadata?: Record<string, unknown>;
1289
+ }): WorkflowContext;
1201
1290
  /** Register a custom provider instance. */
1202
1291
  registerProvider(name: string, provider: Provider): void;
1203
1292
  /** Execute a workflow and return the result. */
@@ -1705,4 +1794,4 @@ declare class NoopSpanManager implements SpanManager {
1705
1794
  */
1706
1795
  declare function createSpanManager(config?: TelemetryConfig): Promise<SpanManager>;
1707
1796
 
1708
- export { type Agent, type AgentCallInfo, type AgentConfig, AnthropicProvider, type AskOptions, type AwaitHumanOptions, type AxlConfig, AxlError, AxlRuntime, AxlStream, BudgetExceededError, type BudgetOptions, type BudgetResult, type ChatMessage, type ChatOptions, type Embedder, type ExecutionInfo, type ExecutionState, GeminiProvider, type GuardrailBlockHandler, GuardrailError, type GuardrailResult, type GuardrailsConfig, type HandoffDescriptor, type HandoffRecord, type HumanDecision, InMemoryVectorStore, type InputGuardrail, type MapOptions, MaxTurnsError, McpManager, type McpServer, type McpServerConfig, type McpToolDefinition, type McpToolResult, type MemoryConfig, MemoryManager, MemoryStore, NoConsensus, NoopSpanManager, OpenAIEmbedder, OpenAIProvider, OpenAIResponsesProvider, type OutputGuardrail, type PendingDecision, type Provider, type ProviderAdapter, ProviderRegistry, type ProviderResponse, QuorumNotMet, type RaceOptions, type ReasoningEffort, type RecallOptions, RedisStore, type RememberOptions, type Result, type RetryPolicy, SQLiteStore, Session, type SessionOptions, type SpanHandle, type SpanManager, type SpawnOptions, SqliteVectorStore, type StateStore, type StreamChunk, type StreamEvent, type TelemetryConfig, type Thinking, TimeoutError, type Tool, type ToolCallMessage, type ToolChoice, type ToolConfig, ToolDenied, type ToolHooks, type TraceEvent, type VectorEntry, type VectorResult, type VectorStore, VerifyError, type VerifyOptions, type VoteOptions, type Workflow, type WorkflowConfig, WorkflowContext, type WorkflowContextInit, agent, createSpanManager, defineConfig, tool, workflow, zodToJsonSchema };
1797
+ export { type Agent, type AgentCallInfo, type AgentConfig, AnthropicProvider, type AskOptions, type AwaitHumanOptions, type AxlConfig, AxlError, AxlRuntime, AxlStream, BudgetExceededError, type BudgetOptions, type BudgetResult, type ChatMessage, type ChatOptions, type DelegateOptions, type Effort, type Embedder, type ExecutionInfo, type ExecutionState, GeminiProvider, type GuardrailBlockHandler, GuardrailError, type GuardrailResult, type GuardrailsConfig, type HandoffDescriptor, type HandoffRecord, type HumanDecision, InMemoryVectorStore, type InputGuardrail, type MapOptions, MaxTurnsError, McpManager, type McpServer, type McpServerConfig, type McpToolDefinition, type McpToolResult, type MemoryConfig, MemoryManager, MemoryStore, NoConsensus, NoopSpanManager, OpenAIEmbedder, OpenAIProvider, OpenAIResponsesProvider, type OutputGuardrail, type PendingDecision, type Provider, type ProviderAdapter, ProviderRegistry, type ProviderResponse, QuorumNotMet, type RaceOptions, type RecallOptions, RedisStore, type RememberOptions, type ResolvedThinkingOptions, type Result, type RetryPolicy, SQLiteStore, Session, type SessionOptions, type SpanHandle, type SpanManager, type SpawnOptions, SqliteVectorStore, type StateStore, type StreamChunk, type StreamEvent, type TelemetryConfig, TimeoutError, type Tool, type ToolCallMessage, type ToolChoice, type ToolConfig, ToolDenied, type ToolHooks, type TraceEvent, type VectorEntry, type VectorResult, type VectorStore, VerifyError, type VerifyOptions, type VoteOptions, type Workflow, type WorkflowConfig, WorkflowContext, type WorkflowContextInit, agent, createSpanManager, defineConfig, resolveThinkingOptions, tool, workflow, zodToJsonSchema };
package/dist/index.d.ts CHANGED
@@ -16,33 +16,23 @@ type ToolDefinition = {
16
16
  };
17
17
  };
18
18
  /**
19
- * Unified thinking/reasoning level that works across all providers.
19
+ * Unified effort level controlling how thoroughly the model responds.
20
20
  *
21
- * Simple form (`'low' | 'medium' | 'high' | 'max'`) is portable across all providers:
22
- * - **OpenAI** (o1/o3/o4-mini): maps to `reasoning_effort` (`'max'` `'xhigh'`)
23
- * - **OpenAI Responses**: maps to `reasoning.effort` (`'max'` → `'xhigh'`)
24
- * - **Anthropic** (4.6): maps to adaptive mode + `output_config.effort`
25
- * - **Anthropic** (older): maps to `thinking.budget_tokens` (`'max'` → `32000`)
26
- * - **Gemini** (2.5+): maps to `generationConfig.thinkingConfig.thinkingBudget` (`'max'` → `24576`)
21
+ * - `'none'` Disable thinking/reasoning. On Gemini 3.x, maps to the model's
22
+ * minimum thinking level (3.1 Pro: 'low', others: 'minimal'). On other providers,
23
+ * fully disables reasoning.
24
+ * - `'low'` through `'max'` Increasing levels of reasoning depth and token spend.
27
25
  *
28
- * Budget form (`{ budgetTokens: number }`) gives explicit control over thinking tokens.
29
- * For OpenAI, budget is mapped to the nearest effort level.
26
+ * Provider mapping:
27
+ * - Anthropic 4.6: adaptive thinking + output_config.effort
28
+ * - Anthropic Opus 4.5: output_config.effort (no adaptive)
29
+ * - Anthropic older: thinking.budget_tokens fallback
30
+ * - OpenAI o-series: reasoning_effort
31
+ * - OpenAI GPT-5.x: reasoning.effort / reasoning_effort
32
+ * - Gemini 3.x: thinkingLevel (`'none'` → model min: `'minimal'` or `'low'` for 3.1 Pro)
33
+ * - Gemini 2.x: thinkingBudget (`'none'` → 0; some models have minimums)
30
34
  */
31
- type Thinking = 'low' | 'medium' | 'high' | 'max' | {
32
- budgetTokens: number;
33
- };
34
- /**
35
- * Reasoning effort level for OpenAI reasoning models.
36
- *
37
- * This is a low-level, OpenAI-specific escape hatch. Prefer `thinking` for cross-provider use.
38
- *
39
- * Supported values:
40
- * - **OpenAI** (o1/o3/o4-mini): all values — `'none'`, `'minimal'`, `'low'`, `'medium'`, `'high'`, `'xhigh'`
41
- * - **OpenAI Responses**: all values (via `reasoning.effort`)
42
- * - **Anthropic**: not supported
43
- * - **Gemini**: not supported
44
- */
45
- type ReasoningEffort = 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh';
35
+ type Effort = 'none' | 'low' | 'medium' | 'high' | 'max';
46
36
  /** Tool choice strategy for LLM calls. */
47
37
  type ToolChoice = 'auto' | 'none' | 'required' | {
48
38
  type: 'function';
@@ -61,9 +51,24 @@ type ChatOptions = {
61
51
  responseFormat?: ResponseFormat;
62
52
  stop?: string[];
63
53
  signal?: AbortSignal;
64
- thinking?: Thinking;
65
- reasoningEffort?: ReasoningEffort;
54
+ /** How hard should the model try? Primary param for cost/quality tradeoff.
55
+ * 'none' disables thinking/reasoning (Gemini 3.x: maps to minimal).
56
+ * Omit to use provider defaults. */
57
+ effort?: Effort;
58
+ /** Precise thinking token budget (advanced). When set alongside `effort`, overrides the
59
+ * thinking/reasoning allocation. On Anthropic 4.6, `effort` still controls output quality
60
+ * independently. On all other providers, `thinkingBudget` fully overrides `effort` for
61
+ * reasoning behavior. Set to 0 to disable thinking while keeping effort for output control
62
+ * (Anthropic-specific optimization; on other providers, simply disables reasoning). */
63
+ thinkingBudget?: number;
64
+ /** Show reasoning summaries in responses (thinking_content / thinking_delta).
65
+ * Supported on OpenAI Responses API and Gemini. No-op on Anthropic. */
66
+ includeThoughts?: boolean;
66
67
  toolChoice?: ToolChoice;
68
+ /** Provider-specific options merged LAST into the raw API request body.
69
+ * Can override any computed field including model and messages — use with care.
70
+ * NOT portable across providers — use effort/thinkingBudget/includeThoughts for cross-provider behavior. */
71
+ providerOptions?: Record<string, unknown>;
67
72
  };
68
73
  /**
69
74
  * Response format for structured output (JSON mode).
@@ -86,6 +91,9 @@ type ResponseFormat = {
86
91
  type StreamChunk = {
87
92
  type: 'text_delta';
88
93
  content: string;
94
+ } | {
95
+ type: 'thinking_delta';
96
+ content: string;
89
97
  } | {
90
98
  type: 'tool_call_delta';
91
99
  id: string;
@@ -100,6 +108,8 @@ type StreamChunk = {
100
108
  reasoning_tokens?: number;
101
109
  cached_tokens?: number;
102
110
  };
111
+ /** Provider-specific opaque metadata (e.g. raw Gemini parts with thought signatures). */
112
+ providerMetadata?: Record<string, unknown>;
103
113
  };
104
114
  /**
105
115
  * Core provider interface. Every LLM adapter must implement this.
@@ -120,6 +130,24 @@ interface Provider {
120
130
  * Alias for Provider. Used for backward compatibility with index.ts exports.
121
131
  */
122
132
  type ProviderAdapter = Provider;
133
+ /** Normalized thinking options computed once, used by all providers. */
134
+ type ResolvedThinkingOptions = {
135
+ /** Raw effort value from user. */
136
+ effort: Effort | undefined;
137
+ /** Raw thinking budget from user. */
138
+ thinkingBudget: number | undefined;
139
+ /** Whether to include thought summaries in responses. */
140
+ includeThoughts: boolean;
141
+ /** True when thinking/reasoning should be disabled (effort: 'none' or thinkingBudget: 0). */
142
+ thinkingDisabled: boolean;
143
+ /** Effort level with 'none' stripped (undefined when effort is 'none' or unset). */
144
+ activeEffort: Exclude<Effort, 'none'> | undefined;
145
+ /** True when an explicit positive budget overrides effort-based allocation. */
146
+ hasBudgetOverride: boolean;
147
+ };
148
+ /** Resolve effort/thinkingBudget/includeThoughts into normalized form.
149
+ * Validates inputs and computes derived flags used by all provider adapters. */
150
+ declare function resolveThinkingOptions(options: Pick<ChatOptions, 'effort' | 'thinkingBudget' | 'includeThoughts'>): ResolvedThinkingOptions;
123
151
 
124
152
  /** Result type for concurrent operations (spawn, map) */
125
153
  type Result<T> = {
@@ -187,14 +215,29 @@ type AskOptions<T = unknown> = {
187
215
  temperature?: number;
188
216
  /** Override max tokens for this call (default: 4096). */
189
217
  maxTokens?: number;
190
- /** Thinking level portable across all providers. Prefer this over `reasoningEffort`. */
191
- thinking?: Thinking;
192
- /** OpenAI-specific reasoning effort (escape hatch). Overridden by `thinking` if both are set. */
193
- reasoningEffort?: ReasoningEffort;
218
+ /** How hard should the model try? Primary param for cost/quality tradeoff. */
219
+ effort?: Effort;
220
+ /** Precise thinking token budget (advanced). Overrides effort-based thinking allocation. */
221
+ thinkingBudget?: number;
222
+ /** Show reasoning summaries in responses. */
223
+ includeThoughts?: boolean;
194
224
  /** Tool choice strategy for this call. */
195
225
  toolChoice?: ToolChoice;
196
226
  /** Stop sequences for this call. */
197
227
  stop?: string[];
228
+ /** Provider-specific options merged into API requests. Not portable across providers. */
229
+ providerOptions?: Record<string, unknown>;
230
+ };
231
+ /** Delegate options */
232
+ type DelegateOptions<T = unknown> = {
233
+ /** Zod schema for structured output from the selected agent. */
234
+ schema?: z.ZodType<T>;
235
+ /** Model URI for the internal router agent (default: first candidate's model). */
236
+ routerModel?: string;
237
+ /** Additional metadata passed to the router and selected agent. */
238
+ metadata?: Record<string, unknown>;
239
+ /** Number of retries for structured output validation (passed to the final ask). */
240
+ retries?: number;
198
241
  };
199
242
  /** Race options */
200
243
  type RaceOptions<T = unknown> = {
@@ -207,7 +250,7 @@ type ExecutionStatus = 'running' | 'completed' | 'failed' | 'waiting';
207
250
  type TraceEvent = {
208
251
  executionId: string;
209
252
  step: number;
210
- type: 'agent_call' | 'tool_call' | 'verify' | 'handoff' | 'tool_denied' | 'log' | 'workflow_start' | 'workflow_end' | 'guardrail';
253
+ type: 'agent_call' | 'tool_call' | 'verify' | 'handoff' | 'delegate' | 'tool_denied' | 'log' | 'workflow_start' | 'workflow_end' | 'guardrail';
211
254
  workflow?: string;
212
255
  agent?: string;
213
256
  tool?: string;
@@ -315,10 +358,13 @@ type AgentCallInfo = {
315
358
  promptVersion?: string;
316
359
  temperature?: number;
317
360
  maxTokens?: number;
318
- thinking?: Thinking;
319
- reasoningEffort?: ReasoningEffort;
361
+ effort?: Effort;
362
+ thinkingBudget?: number;
363
+ includeThoughts?: boolean;
320
364
  toolChoice?: ToolChoice;
321
365
  stop?: string[];
366
+ /** Provider-specific options merged into API requests. Not portable across providers. */
367
+ providerOptions?: Record<string, unknown>;
322
368
  };
323
369
  /** Chat message types for provider communication */
324
370
  type ChatRole = 'system' | 'user' | 'assistant' | 'tool';
@@ -328,6 +374,8 @@ type ChatMessage = {
328
374
  name?: string;
329
375
  tool_calls?: ToolCallMessage[];
330
376
  tool_call_id?: string;
377
+ /** Provider-specific opaque metadata that must round-trip through conversation history. */
378
+ providerMetadata?: Record<string, unknown>;
331
379
  };
332
380
  type ToolCallMessage = {
333
381
  id: string;
@@ -340,6 +388,7 @@ type ToolCallMessage = {
340
388
  /** Provider response */
341
389
  type ProviderResponse = {
342
390
  content: string;
391
+ thinking_content?: string;
343
392
  tool_calls?: ToolCallMessage[];
344
393
  usage?: {
345
394
  prompt_tokens: number;
@@ -349,6 +398,8 @@ type ProviderResponse = {
349
398
  cached_tokens?: number;
350
399
  };
351
400
  cost?: number;
401
+ /** Provider-specific opaque metadata that needs to round-trip through conversation history. */
402
+ providerMetadata?: Record<string, unknown>;
352
403
  };
353
404
 
354
405
  /** Descriptor for a handoff target agent with optional description. */
@@ -368,15 +419,20 @@ type AgentConfig = {
368
419
  metadata?: Record<string, unknown>;
369
420
  }) => string);
370
421
  tools?: Tool<any, any>[];
371
- handoffs?: HandoffDescriptor[];
422
+ handoffs?: HandoffDescriptor[] | ((ctx: {
423
+ metadata?: Record<string, unknown>;
424
+ }) => HandoffDescriptor[]);
372
425
  mcp?: string[];
373
426
  mcpTools?: string[];
374
427
  temperature?: number;
375
428
  maxTokens?: number;
376
- thinking?: Thinking;
377
- reasoningEffort?: ReasoningEffort;
429
+ effort?: Effort;
430
+ thinkingBudget?: number;
431
+ includeThoughts?: boolean;
378
432
  toolChoice?: ToolChoice;
379
433
  stop?: string[];
434
+ /** Provider-specific options merged into API requests. Not portable across providers. */
435
+ providerOptions?: Record<string, unknown>;
380
436
  maxTurns?: number;
381
437
  timeout?: string;
382
438
  maxContext?: number;
@@ -896,6 +952,14 @@ declare class WorkflowContext<TInput = unknown> {
896
952
  private onAgentStart?;
897
953
  private onAgentCallComplete?;
898
954
  constructor(init: WorkflowContextInit);
955
+ /**
956
+ * Create a child context for nested agent invocations (e.g., agent-as-tool).
957
+ * Shares: budget tracking, abort signals, trace emission, provider registry,
958
+ * state store, span manager, memory manager, MCP manager, config,
959
+ * awaitHuman handler, pending decisions, tool overrides.
960
+ * Isolates: session history, step counter, streaming callbacks (onToken, onAgentStart, onToolCall).
961
+ */
962
+ createChildContext(): WorkflowContext;
899
963
  /**
900
964
  * Resolve the current abort signal.
901
965
  * Branch-scoped signals (from race/spawn/map/budget) in AsyncLocalStorage
@@ -904,6 +968,11 @@ declare class WorkflowContext<TInput = unknown> {
904
968
  private get currentSignal();
905
969
  ask<T = string>(agent: Agent, prompt: string, options?: AskOptions<T>): Promise<T>;
906
970
  private executeAgentCall;
971
+ /**
972
+ * Push the final assistant message into session history, preserving providerMetadata
973
+ * (e.g., Gemini thought signatures needed for multi-turn reasoning context).
974
+ */
975
+ private pushAssistantToSessionHistory;
907
976
  private buildToolDefs;
908
977
  /**
909
978
  * Summarize old messages to fit within context window.
@@ -964,6 +1033,18 @@ declare class WorkflowContext<TInput = unknown> {
964
1033
  forget(key: string, options?: {
965
1034
  scope?: 'session' | 'global';
966
1035
  }): Promise<void>;
1036
+ /**
1037
+ * Select the best agent from a list of candidates and invoke it.
1038
+ * Creates a temporary router agent that uses handoffs to pick the right specialist.
1039
+ *
1040
+ * This is convenience sugar over creating a router agent with dynamic handoffs.
1041
+ * For full control over the router's behavior, create the router agent explicitly.
1042
+ *
1043
+ * @param agents - Candidate agents to choose from (at least 1)
1044
+ * @param prompt - The prompt to send to the selected agent
1045
+ * @param options - Optional: schema, routerModel, metadata, retries
1046
+ */
1047
+ delegate<T = string>(agents: Agent[], prompt: string, options?: DelegateOptions<T>): Promise<T>;
967
1048
  private emitTrace;
968
1049
  }
969
1050
 
@@ -987,7 +1068,7 @@ type ToolConfig<TInput extends z.ZodTypeAny, TOutput = unknown> = {
987
1068
  name: string;
988
1069
  description: string;
989
1070
  input: TInput;
990
- handler: (input: z.infer<TInput>) => TOutput | Promise<TOutput>;
1071
+ handler: (input: z.infer<TInput>, ctx: WorkflowContext) => TOutput | Promise<TOutput>;
991
1072
  retry?: RetryPolicy;
992
1073
  sensitive?: boolean;
993
1074
  /** Maximum string length for any string argument. Default: 10000. Set to 0 to disable. */
@@ -1009,7 +1090,7 @@ type Tool<TInput extends z.ZodTypeAny = z.ZodTypeAny, TOutput = unknown> = {
1009
1090
  /** Run the tool directly from workflow code */
1010
1091
  run(ctx: WorkflowContext, input: z.infer<TInput>): Promise<TOutput>;
1011
1092
  /** Execute the handler (internal use — includes retry logic) */
1012
- _execute(input: z.infer<TInput>): Promise<TOutput>;
1093
+ _execute(input: z.infer<TInput>, ctx?: WorkflowContext): Promise<TOutput>;
1013
1094
  };
1014
1095
  /**
1015
1096
  * Define a tool with Zod-validated input, a handler function, and optional retry policy.
@@ -1198,6 +1279,14 @@ declare class AxlRuntime extends EventEmitter {
1198
1279
  runRegisteredEval(name: string): Promise<unknown>;
1199
1280
  /** Get all execution info (running + completed). */
1200
1281
  getExecutions(): ExecutionInfo[];
1282
+ /**
1283
+ * Create a lightweight WorkflowContext for ad-hoc use (tool testing, prototyping).
1284
+ * The context has access to the runtime's providers, state store, and MCP manager
1285
+ * but no session history, streaming callbacks, or budget tracking.
1286
+ */
1287
+ createContext(options?: {
1288
+ metadata?: Record<string, unknown>;
1289
+ }): WorkflowContext;
1201
1290
  /** Register a custom provider instance. */
1202
1291
  registerProvider(name: string, provider: Provider): void;
1203
1292
  /** Execute a workflow and return the result. */
@@ -1705,4 +1794,4 @@ declare class NoopSpanManager implements SpanManager {
1705
1794
  */
1706
1795
  declare function createSpanManager(config?: TelemetryConfig): Promise<SpanManager>;
1707
1796
 
1708
- export { type Agent, type AgentCallInfo, type AgentConfig, AnthropicProvider, type AskOptions, type AwaitHumanOptions, type AxlConfig, AxlError, AxlRuntime, AxlStream, BudgetExceededError, type BudgetOptions, type BudgetResult, type ChatMessage, type ChatOptions, type Embedder, type ExecutionInfo, type ExecutionState, GeminiProvider, type GuardrailBlockHandler, GuardrailError, type GuardrailResult, type GuardrailsConfig, type HandoffDescriptor, type HandoffRecord, type HumanDecision, InMemoryVectorStore, type InputGuardrail, type MapOptions, MaxTurnsError, McpManager, type McpServer, type McpServerConfig, type McpToolDefinition, type McpToolResult, type MemoryConfig, MemoryManager, MemoryStore, NoConsensus, NoopSpanManager, OpenAIEmbedder, OpenAIProvider, OpenAIResponsesProvider, type OutputGuardrail, type PendingDecision, type Provider, type ProviderAdapter, ProviderRegistry, type ProviderResponse, QuorumNotMet, type RaceOptions, type ReasoningEffort, type RecallOptions, RedisStore, type RememberOptions, type Result, type RetryPolicy, SQLiteStore, Session, type SessionOptions, type SpanHandle, type SpanManager, type SpawnOptions, SqliteVectorStore, type StateStore, type StreamChunk, type StreamEvent, type TelemetryConfig, type Thinking, TimeoutError, type Tool, type ToolCallMessage, type ToolChoice, type ToolConfig, ToolDenied, type ToolHooks, type TraceEvent, type VectorEntry, type VectorResult, type VectorStore, VerifyError, type VerifyOptions, type VoteOptions, type Workflow, type WorkflowConfig, WorkflowContext, type WorkflowContextInit, agent, createSpanManager, defineConfig, tool, workflow, zodToJsonSchema };
1797
+ export { type Agent, type AgentCallInfo, type AgentConfig, AnthropicProvider, type AskOptions, type AwaitHumanOptions, type AxlConfig, AxlError, AxlRuntime, AxlStream, BudgetExceededError, type BudgetOptions, type BudgetResult, type ChatMessage, type ChatOptions, type DelegateOptions, type Effort, type Embedder, type ExecutionInfo, type ExecutionState, GeminiProvider, type GuardrailBlockHandler, GuardrailError, type GuardrailResult, type GuardrailsConfig, type HandoffDescriptor, type HandoffRecord, type HumanDecision, InMemoryVectorStore, type InputGuardrail, type MapOptions, MaxTurnsError, McpManager, type McpServer, type McpServerConfig, type McpToolDefinition, type McpToolResult, type MemoryConfig, MemoryManager, MemoryStore, NoConsensus, NoopSpanManager, OpenAIEmbedder, OpenAIProvider, OpenAIResponsesProvider, type OutputGuardrail, type PendingDecision, type Provider, type ProviderAdapter, ProviderRegistry, type ProviderResponse, QuorumNotMet, type RaceOptions, type RecallOptions, RedisStore, type RememberOptions, type ResolvedThinkingOptions, type Result, type RetryPolicy, SQLiteStore, Session, type SessionOptions, type SpanHandle, type SpanManager, type SpawnOptions, SqliteVectorStore, type StateStore, type StreamChunk, type StreamEvent, type TelemetryConfig, TimeoutError, type Tool, type ToolCallMessage, type ToolChoice, type ToolConfig, ToolDenied, type ToolHooks, type TraceEvent, type VectorEntry, type VectorResult, type VectorStore, VerifyError, type VerifyOptions, type VoteOptions, type Workflow, type WorkflowConfig, WorkflowContext, type WorkflowContextInit, agent, createSpanManager, defineConfig, resolveThinkingOptions, tool, workflow, zodToJsonSchema };