@axlsdk/axl 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -41,7 +41,7 @@ function tool(config) {
41
41
  on: config.retry?.on
42
42
  };
43
43
  const maxStringLen = config.maxStringLength ?? DEFAULT_MAX_STRING_LENGTH;
44
- const execute = async (input) => {
44
+ const execute = async (input, ctx) => {
45
45
  const parsed = config.input.parse(input);
46
46
  if (maxStringLen > 0) {
47
47
  validateStringLengths(parsed, maxStringLen);
@@ -50,7 +50,7 @@ function tool(config) {
50
50
  let lastError;
51
51
  for (let attempt = 1; attempt <= maxAttempts; attempt++) {
52
52
  try {
53
- return await config.handler(parsed);
53
+ return await config.handler(parsed, ctx);
54
54
  } catch (err) {
55
55
  lastError = err instanceof Error ? err : new Error(String(err));
56
56
  if (attempt === maxAttempts) break;
@@ -80,7 +80,7 @@ function tool(config) {
80
80
  if (config.hooks?.before) {
81
81
  processedInput = await config.hooks.before(processedInput, ctx);
82
82
  }
83
- let result = await execute(processedInput);
83
+ let result = await execute(processedInput, ctx);
84
84
  if (config.hooks?.after) {
85
85
  result = await config.hooks.after(result, ctx);
86
86
  }
@@ -102,6 +102,25 @@ function tool(config) {
102
102
  };
103
103
  }
104
104
 
105
+ // src/providers/types.ts
106
+ function resolveThinkingOptions(options) {
107
+ if (options.thinkingBudget !== void 0 && options.thinkingBudget < 0) {
108
+ throw new Error(`thinkingBudget must be non-negative, got ${options.thinkingBudget}`);
109
+ }
110
+ const effort = options.effort;
111
+ const thinkingBudget = options.thinkingBudget;
112
+ const hasBudgetOverride = thinkingBudget !== void 0 && thinkingBudget > 0;
113
+ return {
114
+ effort,
115
+ thinkingBudget,
116
+ includeThoughts: options.includeThoughts ?? false,
117
+ // Budget override wins: effort: 'none' + thinkingBudget: 5000 → thinking enabled
118
+ thinkingDisabled: (effort === "none" || thinkingBudget === 0) && !hasBudgetOverride,
119
+ activeEffort: effort && effort !== "none" ? effort : void 0,
120
+ hasBudgetOverride
121
+ };
122
+ }
123
+
105
124
  // src/providers/retry.ts
106
125
  var RETRYABLE_STATUS_CODES = /* @__PURE__ */ new Set([429, 503, 529]);
107
126
  var MAX_RETRIES = 2;
@@ -156,6 +175,9 @@ var OPENAI_PRICING = {
156
175
  "gpt-5-nano": [5e-8, 4e-7],
157
176
  "gpt-5.1": [125e-8, 1e-5],
158
177
  "gpt-5.2": [175e-8, 14e-6],
178
+ "gpt-5.3": [175e-8, 14e-6],
179
+ "gpt-5.4": [25e-7, 15e-6],
180
+ "gpt-5.4-pro": [3e-5, 18e-5],
159
181
  o1: [15e-6, 6e-5],
160
182
  "o1-mini": [3e-6, 12e-6],
161
183
  "o1-pro": [15e-5, 6e-4],
@@ -184,9 +206,32 @@ function estimateOpenAICost(model, promptTokens, completionTokens, cachedTokens)
184
206
  const inputCost = (promptTokens - cached) * inputRate + cached * inputRate * 0.5;
185
207
  return inputCost + completionTokens * outputRate;
186
208
  }
187
- function isReasoningModel(model) {
209
+ function isOSeriesModel(model) {
188
210
  return /^(o1|o3|o4-mini)/.test(model);
189
211
  }
212
+ function supportsReasoningEffort(model) {
213
+ return isOSeriesModel(model) || /^gpt-5/.test(model);
214
+ }
215
+ function supportsReasoningNone(model) {
216
+ return /^gpt-5\.[1-9]/.test(model);
217
+ }
218
+ function supportsXhigh(model) {
219
+ return /^gpt-5\.([2-9]|\d{2,})/.test(model);
220
+ }
221
+ function clampReasoningEffort(model, effort) {
222
+ if (model.startsWith("gpt-5-pro")) return "high";
223
+ if (effort === "none" && !supportsReasoningNone(model)) return "minimal";
224
+ if (effort === "xhigh" && !supportsXhigh(model)) return "high";
225
+ return effort;
226
+ }
227
+ function effortToReasoningEffort(effort) {
228
+ return effort === "max" ? "xhigh" : effort;
229
+ }
230
+ function budgetToReasoningEffort(budget) {
231
+ if (budget <= 1024) return "low";
232
+ if (budget <= 8192) return "medium";
233
+ return "high";
234
+ }
190
235
  var OpenAIProvider = class {
191
236
  name = "openai";
192
237
  baseUrl;
@@ -274,13 +319,26 @@ var OpenAIProvider = class {
274
319
  // Internal helpers
275
320
  // ---------------------------------------------------------------------------
276
321
  buildRequestBody(messages, options, stream) {
277
- const reasoning = isReasoningModel(options.model);
322
+ const oSeries = isOSeriesModel(options.model);
323
+ const reasoningCapable = supportsReasoningEffort(options.model);
324
+ const { thinkingBudget, thinkingDisabled, activeEffort, hasBudgetOverride } = resolveThinkingOptions(options);
325
+ let wireEffort;
326
+ if (reasoningCapable) {
327
+ if (hasBudgetOverride) {
328
+ wireEffort = clampReasoningEffort(options.model, budgetToReasoningEffort(thinkingBudget));
329
+ } else if (!thinkingDisabled && activeEffort) {
330
+ wireEffort = clampReasoningEffort(options.model, effortToReasoningEffort(activeEffort));
331
+ } else if (thinkingDisabled) {
332
+ wireEffort = clampReasoningEffort(options.model, "none");
333
+ }
334
+ }
335
+ const stripTemp = oSeries || reasoningCapable && wireEffort !== void 0;
278
336
  const body = {
279
337
  model: options.model,
280
- messages: messages.map((m) => this.formatMessage(m, reasoning)),
338
+ messages: messages.map((m) => this.formatMessage(m, oSeries)),
281
339
  stream
282
340
  };
283
- if (options.temperature !== void 0 && !reasoning) {
341
+ if (options.temperature !== void 0 && !stripTemp) {
284
342
  body.temperature = options.temperature;
285
343
  }
286
344
  if (options.maxTokens !== void 0) {
@@ -289,7 +347,9 @@ var OpenAIProvider = class {
289
347
  if (options.stop) body.stop = options.stop;
290
348
  if (options.tools && options.tools.length > 0) {
291
349
  body.tools = options.tools;
292
- body.parallel_tool_calls = true;
350
+ if (!oSeries) {
351
+ body.parallel_tool_calls = true;
352
+ }
293
353
  }
294
354
  if (options.toolChoice !== void 0) {
295
355
  body.tool_choice = options.toolChoice;
@@ -297,12 +357,13 @@ var OpenAIProvider = class {
297
357
  if (options.responseFormat) {
298
358
  body.response_format = options.responseFormat;
299
359
  }
300
- if (options.reasoningEffort) {
301
- body.reasoning_effort = options.reasoningEffort;
302
- }
360
+ if (wireEffort) body.reasoning_effort = wireEffort;
303
361
  if (stream) {
304
362
  body.stream_options = { include_usage: true };
305
363
  }
364
+ if (options.providerOptions) {
365
+ Object.assign(body, options.providerOptions);
366
+ }
306
367
  return body;
307
368
  }
308
369
  /** Extract a human-readable message from an API error response body. */
@@ -316,9 +377,9 @@ var OpenAIProvider = class {
316
377
  }
317
378
  return `OpenAI API error (${status}): ${body}`;
318
379
  }
319
- formatMessage(msg, reasoning) {
380
+ formatMessage(msg, oSeries) {
320
381
  const out = {
321
- role: msg.role === "system" && reasoning ? "developer" : msg.role,
382
+ role: msg.role === "system" && oSeries ? "developer" : msg.role,
322
383
  content: msg.content
323
384
  };
324
385
  if (msg.name) out.name = msg.name;
@@ -455,7 +516,20 @@ var OpenAIResponsesProvider = class {
455
516
  // Internal: build request body
456
517
  // ---------------------------------------------------------------------------
457
518
  buildRequestBody(messages, options, stream) {
458
- const reasoning = isReasoningModel(options.model);
519
+ const oSeries = isOSeriesModel(options.model);
520
+ const reasoningCapable = supportsReasoningEffort(options.model);
521
+ const { thinkingBudget, includeThoughts, thinkingDisabled, activeEffort, hasBudgetOverride } = resolveThinkingOptions(options);
522
+ let wireEffort;
523
+ if (reasoningCapable) {
524
+ if (hasBudgetOverride) {
525
+ wireEffort = clampReasoningEffort(options.model, budgetToReasoningEffort(thinkingBudget));
526
+ } else if (!thinkingDisabled && activeEffort) {
527
+ wireEffort = clampReasoningEffort(options.model, effortToReasoningEffort(activeEffort));
528
+ } else if (thinkingDisabled) {
529
+ wireEffort = clampReasoningEffort(options.model, "none");
530
+ }
531
+ }
532
+ const stripTemp = oSeries || reasoningCapable && wireEffort !== void 0;
459
533
  const systemMessages = messages.filter((m) => m.role === "system");
460
534
  const nonSystemMessages = messages.filter((m) => m.role !== "system");
461
535
  const body = {
@@ -470,7 +544,7 @@ var OpenAIResponsesProvider = class {
470
544
  if (options.maxTokens !== void 0) {
471
545
  body.max_output_tokens = options.maxTokens;
472
546
  }
473
- if (options.temperature !== void 0 && !reasoning) {
547
+ if (options.temperature !== void 0 && !stripTemp) {
474
548
  body.temperature = options.temperature;
475
549
  }
476
550
  if (options.tools && options.tools.length > 0) {
@@ -489,12 +563,21 @@ var OpenAIResponsesProvider = class {
489
563
  body.tool_choice = options.toolChoice;
490
564
  }
491
565
  }
492
- if (options.reasoningEffort) {
493
- body.reasoning = { effort: options.reasoningEffort };
566
+ if (reasoningCapable && (wireEffort !== void 0 || includeThoughts)) {
567
+ const reasoning = {};
568
+ if (wireEffort !== void 0) reasoning.effort = wireEffort;
569
+ if (includeThoughts) reasoning.summary = "detailed";
570
+ if (Object.keys(reasoning).length > 0) body.reasoning = reasoning;
571
+ }
572
+ if (reasoningCapable) {
573
+ body.include = ["reasoning.encrypted_content"];
494
574
  }
495
575
  if (options.responseFormat) {
496
576
  body.text = { format: this.mapResponseFormat(options.responseFormat) };
497
577
  }
578
+ if (options.providerOptions) {
579
+ Object.assign(body, options.providerOptions);
580
+ }
498
581
  return body;
499
582
  }
500
583
  // ---------------------------------------------------------------------------
@@ -510,6 +593,12 @@ var OpenAIResponsesProvider = class {
510
593
  output: msg.content
511
594
  });
512
595
  } else if (msg.role === "assistant" && msg.tool_calls && msg.tool_calls.length > 0) {
596
+ const reasoningItems = msg.providerMetadata?.openaiReasoningItems;
597
+ if (reasoningItems) {
598
+ for (const item of reasoningItems) {
599
+ input.push(item);
600
+ }
601
+ }
513
602
  if (msg.content) {
514
603
  input.push({ type: "message", role: "assistant", content: msg.content });
515
604
  }
@@ -522,6 +611,12 @@ var OpenAIResponsesProvider = class {
522
611
  });
523
612
  }
524
613
  } else if (msg.role === "user" || msg.role === "assistant") {
614
+ if (msg.role === "assistant" && msg.providerMetadata?.openaiReasoningItems) {
615
+ const reasoningItems = msg.providerMetadata.openaiReasoningItems;
616
+ for (const item of reasoningItems) {
617
+ input.push(item);
618
+ }
619
+ }
525
620
  input.push({
526
621
  type: "message",
527
622
  role: msg.role,
@@ -554,7 +649,9 @@ var OpenAIResponsesProvider = class {
554
649
  // ---------------------------------------------------------------------------
555
650
  parseResponse(json, model) {
556
651
  let content = "";
652
+ let thinkingContent = "";
557
653
  const toolCalls = [];
654
+ const reasoningItems = [];
558
655
  for (const item of json.output) {
559
656
  if (item.type === "message") {
560
657
  for (const part of item.content ?? []) {
@@ -571,6 +668,15 @@ var OpenAIResponsesProvider = class {
571
668
  arguments: item.arguments
572
669
  }
573
670
  });
671
+ } else if (item.type === "reasoning") {
672
+ reasoningItems.push(item);
673
+ if (item.summary) {
674
+ for (const s of item.summary) {
675
+ if (s.type === "summary_text" && s.text) {
676
+ thinkingContent += s.text;
677
+ }
678
+ }
679
+ }
574
680
  }
575
681
  }
576
682
  const usage = json.usage ? {
@@ -581,11 +687,14 @@ var OpenAIResponsesProvider = class {
581
687
  cached_tokens: json.usage.input_tokens_details?.cached_tokens
582
688
  } : void 0;
583
689
  const cost = usage ? estimateOpenAICost(model, usage.prompt_tokens, usage.completion_tokens, usage.cached_tokens) : void 0;
690
+ const providerMetadata = reasoningItems.length > 0 ? { openaiReasoningItems: reasoningItems } : void 0;
584
691
  return {
585
692
  content,
693
+ thinking_content: thinkingContent || void 0,
586
694
  tool_calls: toolCalls.length > 0 ? toolCalls : void 0,
587
695
  usage,
588
- cost
696
+ cost,
697
+ providerMetadata
589
698
  };
590
699
  }
591
700
  // ---------------------------------------------------------------------------
@@ -637,6 +746,8 @@ var OpenAIResponsesProvider = class {
637
746
  switch (eventType) {
638
747
  case "response.output_text.delta":
639
748
  return { type: "text_delta", content: data.delta ?? "" };
749
+ case "response.reasoning_summary_text.delta":
750
+ return { type: "thinking_delta", content: data.delta ?? "" };
640
751
  case "response.output_item.added":
641
752
  if (data.item?.type === "function_call") {
642
753
  const callId = data.item.call_id ?? data.item.id ?? "";
@@ -667,7 +778,9 @@ var OpenAIResponsesProvider = class {
667
778
  reasoning_tokens: response.usage.output_tokens_details?.reasoning_tokens,
668
779
  cached_tokens: response.usage.input_tokens_details?.cached_tokens
669
780
  } : void 0;
670
- return { type: "done", usage };
781
+ const reasoningItems = response?.output?.filter((item) => item.type === "reasoning") ?? [];
782
+ const providerMetadata = reasoningItems.length > 0 ? { openaiReasoningItems: reasoningItems } : void 0;
783
+ return { type: "done", usage, providerMetadata };
671
784
  }
672
785
  case "response.failed": {
673
786
  const errorMsg = data.response?.error?.message ?? data.response?.status_details?.error?.message ?? "Unknown error";
@@ -695,9 +808,12 @@ var OpenAIResponsesProvider = class {
695
808
  // src/providers/anthropic.ts
696
809
  var ANTHROPIC_API_VERSION = "2023-06-01";
697
810
  var ANTHROPIC_PRICING = {
698
- "claude-opus-4-6": [15e-6, 75e-6],
811
+ "claude-opus-4-6": [5e-6, 25e-6],
812
+ "claude-sonnet-4-6": [3e-6, 15e-6],
813
+ "claude-opus-4-5": [5e-6, 25e-6],
814
+ "claude-opus-4-1": [15e-6, 75e-6],
699
815
  "claude-sonnet-4-5": [3e-6, 15e-6],
700
- "claude-haiku-4-5": [8e-7, 4e-6],
816
+ "claude-haiku-4-5": [1e-6, 5e-6],
701
817
  "claude-sonnet-4": [3e-6, 15e-6],
702
818
  "claude-opus-4": [15e-6, 75e-6],
703
819
  "claude-3-7-sonnet": [3e-6, 15e-6],
@@ -707,12 +823,15 @@ var ANTHROPIC_PRICING = {
707
823
  "claude-3-sonnet": [3e-6, 15e-6],
708
824
  "claude-3-haiku": [25e-8, 125e-8]
709
825
  };
826
+ var ANTHROPIC_PRICING_KEYS_BY_LENGTH = Object.keys(ANTHROPIC_PRICING).sort(
827
+ (a, b) => b.length - a.length
828
+ );
710
829
  function estimateAnthropicCost(model, inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens) {
711
830
  let pricing = ANTHROPIC_PRICING[model];
712
831
  if (!pricing) {
713
- for (const [key, value] of Object.entries(ANTHROPIC_PRICING)) {
832
+ for (const key of ANTHROPIC_PRICING_KEYS_BY_LENGTH) {
714
833
  if (model.startsWith(key)) {
715
- pricing = value;
834
+ pricing = ANTHROPIC_PRICING[key];
716
835
  break;
717
836
  }
718
837
  }
@@ -724,6 +843,23 @@ function estimateAnthropicCost(model, inputTokens, outputTokens, cacheReadTokens
724
843
  const inputCost = (inputTokens - cacheRead - cacheWrite) * inputRate + cacheRead * inputRate * 0.1 + cacheWrite * inputRate * 1.25;
725
844
  return inputCost + outputTokens * outputRate;
726
845
  }
846
+ var THINKING_BUDGETS = {
847
+ low: 1024,
848
+ medium: 5e3,
849
+ high: 1e4,
850
+ // 30000 (not 32000) to stay under the 32K max_tokens limit on Opus 4/4.1.
851
+ // With auto-bump (+1024), max_tokens becomes 31024 which fits all models.
852
+ max: 3e4
853
+ };
854
+ function supportsAdaptiveThinking(model) {
855
+ return model.startsWith("claude-opus-4-6") || model.startsWith("claude-sonnet-4-6");
856
+ }
857
+ function supportsMaxEffort(model) {
858
+ return model.startsWith("claude-opus-4-6");
859
+ }
860
+ function supportsEffort(model) {
861
+ return model.startsWith("claude-opus-4-6") || model.startsWith("claude-sonnet-4-6") || model.startsWith("claude-opus-4-5");
862
+ }
727
863
  var AnthropicProvider = class {
728
864
  name = "anthropic";
729
865
  baseUrl;
@@ -813,21 +949,58 @@ var AnthropicProvider = class {
813
949
  if (systemText) {
814
950
  body.system = systemText;
815
951
  }
816
- if (options.temperature !== void 0) {
817
- body.temperature = options.temperature;
818
- }
819
952
  if (options.stop) {
820
953
  body.stop_sequences = options.stop;
821
954
  }
822
955
  if (options.tools && options.tools.length > 0) {
823
956
  body.tools = options.tools.map((t) => this.mapToolDefinition(t));
824
957
  }
958
+ if (options.toolChoice !== void 0) {
959
+ body.tool_choice = this.mapToolChoice(options.toolChoice);
960
+ }
961
+ const { thinkingBudget, thinkingDisabled, activeEffort, hasBudgetOverride } = resolveThinkingOptions(options);
962
+ let resolvedEffort = activeEffort;
963
+ if (resolvedEffort === "max" && !supportsMaxEffort(options.model)) {
964
+ resolvedEffort = "high";
965
+ }
966
+ if (hasBudgetOverride) {
967
+ body.thinking = { type: "enabled", budget_tokens: thinkingBudget };
968
+ const currentMax = body.max_tokens;
969
+ if (currentMax < thinkingBudget + 1024) {
970
+ body.max_tokens = thinkingBudget + 1024;
971
+ }
972
+ if (resolvedEffort && supportsEffort(options.model)) {
973
+ body.output_config = { effort: resolvedEffort };
974
+ }
975
+ } else if (thinkingDisabled) {
976
+ if (resolvedEffort && supportsEffort(options.model)) {
977
+ body.output_config = { effort: resolvedEffort };
978
+ }
979
+ } else if (resolvedEffort && supportsAdaptiveThinking(options.model)) {
980
+ body.thinking = { type: "adaptive" };
981
+ body.output_config = { effort: resolvedEffort };
982
+ } else if (resolvedEffort && supportsEffort(options.model)) {
983
+ body.output_config = { effort: resolvedEffort };
984
+ } else if (resolvedEffort) {
985
+ const budget = THINKING_BUDGETS[resolvedEffort] ?? 5e3;
986
+ body.thinking = { type: "enabled", budget_tokens: budget };
987
+ const currentMax = body.max_tokens;
988
+ if (currentMax < budget + 1024) {
989
+ body.max_tokens = budget + 1024;
990
+ }
991
+ }
992
+ if (options.temperature !== void 0 && !body.thinking) {
993
+ body.temperature = options.temperature;
994
+ }
825
995
  if (options.responseFormat && options.responseFormat.type !== "text") {
826
996
  const jsonInstruction = "You must respond with valid JSON only. No markdown fences, no extra text.";
827
997
  body.system = body.system ? `${body.system}
828
998
 
829
999
  ${jsonInstruction}` : jsonInstruction;
830
1000
  }
1001
+ if (options.providerOptions) {
1002
+ Object.assign(body, options.providerOptions);
1003
+ }
831
1004
  return body;
832
1005
  }
833
1006
  /**
@@ -917,14 +1090,33 @@ ${jsonInstruction}` : jsonInstruction;
917
1090
  input_schema: tool2.function.parameters
918
1091
  };
919
1092
  }
1093
+ /**
1094
+ * Map Axl's ToolChoice to Anthropic's tool_choice format.
1095
+ *
1096
+ * Axl (OpenAI format) → Anthropic format
1097
+ * 'auto' → { type: 'auto' }
1098
+ * 'none' → { type: 'none' }
1099
+ * 'required' → { type: 'any' }
1100
+ * { type:'function', function: { name } } → { type: 'tool', name }
1101
+ */
1102
+ mapToolChoice(choice) {
1103
+ if (typeof choice === "string") {
1104
+ if (choice === "required") return { type: "any" };
1105
+ return { type: choice };
1106
+ }
1107
+ return { type: "tool", name: choice.function.name };
1108
+ }
920
1109
  // ---------------------------------------------------------------------------
921
1110
  // Internal: response parsing
922
1111
  // ---------------------------------------------------------------------------
923
1112
  parseResponse(json) {
924
1113
  let content = "";
1114
+ let thinkingContent = "";
925
1115
  const toolCalls = [];
926
1116
  for (const block of json.content) {
927
- if (block.type === "text") {
1117
+ if (block.type === "thinking") {
1118
+ thinkingContent += block.thinking;
1119
+ } else if (block.type === "text") {
928
1120
  content += block.text;
929
1121
  } else if (block.type === "tool_use") {
930
1122
  toolCalls.push({
@@ -955,6 +1147,7 @@ ${jsonInstruction}` : jsonInstruction;
955
1147
  ) : void 0;
956
1148
  return {
957
1149
  content,
1150
+ thinking_content: thinkingContent || void 0,
958
1151
  tool_calls: toolCalls.length > 0 ? toolCalls : void 0,
959
1152
  usage,
960
1153
  cost
@@ -1005,7 +1198,9 @@ ${jsonInstruction}` : jsonInstruction;
1005
1198
  }
1006
1199
  case "content_block_delta": {
1007
1200
  const delta = event.delta;
1008
- if (delta?.type === "text_delta" && delta.text) {
1201
+ if (delta?.type === "thinking_delta" && delta.thinking) {
1202
+ yield { type: "thinking_delta", content: delta.thinking };
1203
+ } else if (delta?.type === "text_delta" && delta.text) {
1009
1204
  yield { type: "text_delta", content: delta.text };
1010
1205
  } else if (delta?.type === "input_json_delta" && delta.partial_json) {
1011
1206
  yield {
@@ -1076,14 +1271,19 @@ var GEMINI_PRICING = {
1076
1271
  "gemini-2.0-flash": [1e-7, 4e-7],
1077
1272
  "gemini-2.0-flash-lite": [1e-7, 4e-7],
1078
1273
  "gemini-3-pro-preview": [2e-6, 12e-6],
1079
- "gemini-3-flash-preview": [5e-7, 3e-6]
1274
+ "gemini-3-flash-preview": [5e-7, 3e-6],
1275
+ "gemini-3.1-pro-preview": [2e-6, 12e-6],
1276
+ "gemini-3.1-flash-lite-preview": [25e-8, 15e-7]
1080
1277
  };
1278
+ var GEMINI_PRICING_KEYS_BY_LENGTH = Object.keys(GEMINI_PRICING).sort(
1279
+ (a, b) => b.length - a.length
1280
+ );
1081
1281
  function estimateGeminiCost(model, inputTokens, outputTokens, cachedTokens) {
1082
1282
  let pricing = GEMINI_PRICING[model];
1083
1283
  if (!pricing) {
1084
- for (const [key, value] of Object.entries(GEMINI_PRICING)) {
1284
+ for (const key of GEMINI_PRICING_KEYS_BY_LENGTH) {
1085
1285
  if (model.startsWith(key)) {
1086
- pricing = value;
1286
+ pricing = GEMINI_PRICING[key];
1087
1287
  break;
1088
1288
  }
1089
1289
  }
@@ -1094,6 +1294,39 @@ function estimateGeminiCost(model, inputTokens, outputTokens, cachedTokens) {
1094
1294
  const inputCost = (inputTokens - cached) * inputRate + cached * inputRate * 0.1;
1095
1295
  return inputCost + outputTokens * outputRate;
1096
1296
  }
1297
+ var THINKING_BUDGETS2 = {
1298
+ low: 1024,
1299
+ medium: 5e3,
1300
+ high: 1e4,
1301
+ max: 24576
1302
+ };
1303
+ var THINKING_LEVELS = {
1304
+ low: "low",
1305
+ medium: "medium",
1306
+ high: "high",
1307
+ max: "high"
1308
+ // 3.x caps at 'high'
1309
+ };
1310
+ function isGemini3x(model) {
1311
+ return /^gemini-3[.-]/.test(model);
1312
+ }
1313
+ function budgetToThinkingLevel(budgetTokens) {
1314
+ if (budgetTokens <= 1024) return "low";
1315
+ if (budgetTokens <= 5e3) return "medium";
1316
+ return "high";
1317
+ }
1318
+ function minThinkingLevel(model) {
1319
+ if (model.startsWith("gemini-3.1-pro")) return "low";
1320
+ return "minimal";
1321
+ }
1322
+ var _warned3xEffortNone = /* @__PURE__ */ new Set();
1323
+ function warnGemini3xEffortNone(model) {
1324
+ if (_warned3xEffortNone.has(model)) return;
1325
+ _warned3xEffortNone.add(model);
1326
+ console.warn(
1327
+ `[axl] effort: 'none' on Gemini 3.x (${model}) maps to the model's minimum thinking level ('${minThinkingLevel(model)}'), not fully disabled. Gemini 3.x models cannot disable thinking entirely.`
1328
+ );
1329
+ }
1097
1330
  var GeminiProvider = class {
1098
1331
  name = "google";
1099
1332
  baseUrl;
@@ -1207,6 +1440,58 @@ var GeminiProvider = class {
1207
1440
  if (Object.keys(generationConfig).length > 0) {
1208
1441
  body.generationConfig = generationConfig;
1209
1442
  }
1443
+ const {
1444
+ effort,
1445
+ thinkingBudget,
1446
+ includeThoughts,
1447
+ thinkingDisabled,
1448
+ activeEffort,
1449
+ hasBudgetOverride
1450
+ } = resolveThinkingOptions(options);
1451
+ if (thinkingDisabled) {
1452
+ if (isGemini3x(options.model)) {
1453
+ if (effort === "none") {
1454
+ warnGemini3xEffortNone(options.model);
1455
+ }
1456
+ generationConfig.thinkingConfig = { thinkingLevel: minThinkingLevel(options.model) };
1457
+ } else {
1458
+ generationConfig.thinkingConfig = { thinkingBudget: 0 };
1459
+ }
1460
+ if (!body.generationConfig) body.generationConfig = generationConfig;
1461
+ } else if (hasBudgetOverride) {
1462
+ const config = {};
1463
+ if (isGemini3x(options.model)) {
1464
+ config.thinkingLevel = budgetToThinkingLevel(thinkingBudget);
1465
+ } else {
1466
+ config.thinkingBudget = thinkingBudget;
1467
+ }
1468
+ if (includeThoughts) config.includeThoughts = true;
1469
+ generationConfig.thinkingConfig = config;
1470
+ if (!body.generationConfig) body.generationConfig = generationConfig;
1471
+ } else if (activeEffort) {
1472
+ const config = {};
1473
+ if (isGemini3x(options.model)) {
1474
+ config.thinkingLevel = THINKING_LEVELS[activeEffort] ?? "medium";
1475
+ } else {
1476
+ if (activeEffort === "max" && options.model.startsWith("gemini-2.5-pro")) {
1477
+ config.thinkingBudget = 32768;
1478
+ } else {
1479
+ config.thinkingBudget = THINKING_BUDGETS2[activeEffort] ?? 5e3;
1480
+ }
1481
+ }
1482
+ if (includeThoughts) config.includeThoughts = true;
1483
+ generationConfig.thinkingConfig = config;
1484
+ if (!body.generationConfig) body.generationConfig = generationConfig;
1485
+ } else if (includeThoughts) {
1486
+ generationConfig.thinkingConfig = { includeThoughts: true };
1487
+ if (!body.generationConfig) body.generationConfig = generationConfig;
1488
+ }
1489
+ if (options.toolChoice !== void 0) {
1490
+ body.toolConfig = { functionCallingConfig: this.mapToolChoice(options.toolChoice) };
1491
+ }
1492
+ if (options.providerOptions) {
1493
+ Object.assign(body, options.providerOptions);
1494
+ }
1210
1495
  return body;
1211
1496
  }
1212
1497
  /**
@@ -1232,28 +1517,33 @@ var GeminiProvider = class {
1232
1517
  const result = [];
1233
1518
  for (const msg of messages) {
1234
1519
  if (msg.role === "assistant") {
1235
- const parts = [];
1236
- if (msg.content) {
1237
- parts.push({ text: msg.content });
1238
- }
1239
- if (msg.tool_calls && msg.tool_calls.length > 0) {
1240
- for (const tc of msg.tool_calls) {
1241
- let parsedArgs;
1242
- try {
1243
- parsedArgs = JSON.parse(tc.function.arguments);
1244
- } catch {
1245
- parsedArgs = {};
1246
- }
1247
- parts.push({
1248
- functionCall: {
1249
- name: tc.function.name,
1250
- args: parsedArgs
1520
+ const rawParts = msg.providerMetadata?.geminiParts;
1521
+ if (rawParts && rawParts.length > 0) {
1522
+ result.push({ role: "model", parts: rawParts });
1523
+ } else {
1524
+ const parts = [];
1525
+ if (msg.content) {
1526
+ parts.push({ text: msg.content });
1527
+ }
1528
+ if (msg.tool_calls && msg.tool_calls.length > 0) {
1529
+ for (const tc of msg.tool_calls) {
1530
+ let parsedArgs;
1531
+ try {
1532
+ parsedArgs = JSON.parse(tc.function.arguments);
1533
+ } catch {
1534
+ parsedArgs = {};
1251
1535
  }
1252
- });
1536
+ parts.push({
1537
+ functionCall: {
1538
+ name: tc.function.name,
1539
+ args: parsedArgs
1540
+ }
1541
+ });
1542
+ }
1543
+ }
1544
+ if (parts.length > 0) {
1545
+ result.push({ role: "model", parts });
1253
1546
  }
1254
- }
1255
- if (parts.length > 0) {
1256
- result.push({ role: "model", parts });
1257
1547
  }
1258
1548
  } else if (msg.role === "tool") {
1259
1549
  const functionName = toolCallIdToName.get(msg.tool_call_id) ?? "unknown";
@@ -1298,6 +1588,25 @@ var GeminiProvider = class {
1298
1588
  }
1299
1589
  return merged;
1300
1590
  }
1591
+ /**
1592
+ * Map Axl's ToolChoice to Gemini's functionCallingConfig format.
1593
+ *
1594
+ * - 'auto' → { mode: 'AUTO' }
1595
+ * - 'none' → { mode: 'NONE' }
1596
+ * - 'required' → { mode: 'ANY' }
1597
+ * - { type: 'function', function: { name } } → { mode: 'ANY', allowedFunctionNames: [name] }
1598
+ */
1599
+ mapToolChoice(choice) {
1600
+ if (typeof choice === "string") {
1601
+ const modeMap = {
1602
+ auto: "AUTO",
1603
+ none: "NONE",
1604
+ required: "ANY"
1605
+ };
1606
+ return { mode: modeMap[choice] ?? "AUTO" };
1607
+ }
1608
+ return { mode: "ANY", allowedFunctionNames: [choice.function.name] };
1609
+ }
1301
1610
  mapToolDefinition(tool2) {
1302
1611
  return {
1303
1612
  name: tool2.function.name,
@@ -1311,10 +1620,13 @@ var GeminiProvider = class {
1311
1620
  parseResponse(json, model) {
1312
1621
  const candidate = json.candidates?.[0];
1313
1622
  let content = "";
1623
+ let thinkingContent = "";
1314
1624
  const toolCalls = [];
1315
1625
  if (candidate?.content?.parts) {
1316
1626
  for (const part of candidate.content.parts) {
1317
- if (part.text) {
1627
+ if (part.thought && part.text) {
1628
+ thinkingContent += part.text;
1629
+ } else if (part.text) {
1318
1630
  content += part.text;
1319
1631
  } else if (part.functionCall) {
1320
1632
  toolCalls.push({
@@ -1329,18 +1641,24 @@ var GeminiProvider = class {
1329
1641
  }
1330
1642
  }
1331
1643
  const cachedTokens = json.usageMetadata?.cachedContentTokenCount;
1644
+ const reasoningTokens = json.usageMetadata?.thoughtsTokenCount;
1332
1645
  const usage = json.usageMetadata ? {
1333
1646
  prompt_tokens: json.usageMetadata.promptTokenCount ?? 0,
1334
1647
  completion_tokens: json.usageMetadata.candidatesTokenCount ?? 0,
1335
1648
  total_tokens: json.usageMetadata.totalTokenCount ?? 0,
1336
- cached_tokens: cachedTokens && cachedTokens > 0 ? cachedTokens : void 0
1649
+ cached_tokens: cachedTokens && cachedTokens > 0 ? cachedTokens : void 0,
1650
+ reasoning_tokens: reasoningTokens && reasoningTokens > 0 ? reasoningTokens : void 0
1337
1651
  } : void 0;
1338
1652
  const cost = usage ? estimateGeminiCost(model, usage.prompt_tokens, usage.completion_tokens, usage.cached_tokens) : void 0;
1653
+ const rawParts = candidate?.content?.parts;
1654
+ const providerMetadata = rawParts ? { geminiParts: rawParts } : void 0;
1339
1655
  return {
1340
1656
  content,
1657
+ thinking_content: thinkingContent || void 0,
1341
1658
  tool_calls: toolCalls.length > 0 ? toolCalls : void 0,
1342
1659
  usage,
1343
- cost
1660
+ cost,
1661
+ providerMetadata
1344
1662
  };
1345
1663
  }
1346
1664
  // ---------------------------------------------------------------------------
@@ -1351,6 +1669,7 @@ var GeminiProvider = class {
1351
1669
  const decoder = new TextDecoder();
1352
1670
  let buffer = "";
1353
1671
  let usage;
1672
+ const accumulatedParts = [];
1354
1673
  try {
1355
1674
  while (true) {
1356
1675
  const { done, value } = await reader.read();
@@ -1371,17 +1690,22 @@ var GeminiProvider = class {
1371
1690
  }
1372
1691
  if (chunk.usageMetadata) {
1373
1692
  const cached = chunk.usageMetadata.cachedContentTokenCount;
1693
+ const reasoning = chunk.usageMetadata.thoughtsTokenCount;
1374
1694
  usage = {
1375
1695
  prompt_tokens: chunk.usageMetadata.promptTokenCount ?? 0,
1376
1696
  completion_tokens: chunk.usageMetadata.candidatesTokenCount ?? 0,
1377
1697
  total_tokens: chunk.usageMetadata.totalTokenCount ?? 0,
1378
- cached_tokens: cached && cached > 0 ? cached : void 0
1698
+ cached_tokens: cached && cached > 0 ? cached : void 0,
1699
+ reasoning_tokens: reasoning && reasoning > 0 ? reasoning : void 0
1379
1700
  };
1380
1701
  }
1381
1702
  const candidate = chunk.candidates?.[0];
1382
1703
  if (candidate?.content?.parts) {
1383
1704
  for (const part of candidate.content.parts) {
1384
- if (part.text) {
1705
+ accumulatedParts.push(part);
1706
+ if (part.thought && part.text) {
1707
+ yield { type: "thinking_delta", content: part.text };
1708
+ } else if (part.text) {
1385
1709
  yield { type: "text_delta", content: part.text };
1386
1710
  } else if (part.functionCall) {
1387
1711
  yield {
@@ -1395,7 +1719,8 @@ var GeminiProvider = class {
1395
1719
  }
1396
1720
  }
1397
1721
  }
1398
- yield { type: "done", usage };
1722
+ const providerMetadata = accumulatedParts.length > 0 ? { geminiParts: accumulatedParts } : void 0;
1723
+ yield { type: "done", usage, providerMetadata };
1399
1724
  } finally {
1400
1725
  reader.releaseLock();
1401
1726
  }
@@ -1768,7 +2093,7 @@ function estimateMessagesTokens(messages) {
1768
2093
  }
1769
2094
  return total;
1770
2095
  }
1771
- var WorkflowContext = class {
2096
+ var WorkflowContext = class _WorkflowContext {
1772
2097
  input;
1773
2098
  executionId;
1774
2099
  metadata;
@@ -1821,6 +2146,37 @@ var WorkflowContext = class {
1821
2146
  this.summaryCache = init.metadata.summaryCache;
1822
2147
  }
1823
2148
  }
2149
+ /**
2150
+ * Create a child context for nested agent invocations (e.g., agent-as-tool).
2151
+ * Shares: budget tracking, abort signals, trace emission, provider registry,
2152
+ * state store, span manager, memory manager, MCP manager, config,
2153
+ * awaitHuman handler, pending decisions, tool overrides.
2154
+ * Isolates: session history, step counter, streaming callbacks (onToken, onAgentStart, onToolCall).
2155
+ */
2156
+ createChildContext() {
2157
+ return new _WorkflowContext({
2158
+ input: this.input,
2159
+ executionId: this.executionId,
2160
+ config: this.config,
2161
+ providerRegistry: this.providerRegistry,
2162
+ metadata: { ...this.metadata },
2163
+ // Shared infrastructure
2164
+ budgetContext: this.budgetContext,
2165
+ stateStore: this.stateStore,
2166
+ mcpManager: this.mcpManager,
2167
+ spanManager: this.spanManager,
2168
+ memoryManager: this.memoryManager,
2169
+ onTrace: this.onTrace,
2170
+ onAgentCallComplete: this.onAgentCallComplete,
2171
+ awaitHumanHandler: this.awaitHumanHandler,
2172
+ pendingDecisions: this.pendingDecisions,
2173
+ toolOverrides: this.toolOverrides,
2174
+ signal: this.signal,
2175
+ workflowName: this.workflowName
2176
+ // Isolated: sessionHistory (empty), stepCounter (0),
2177
+ // onToken (null), onAgentStart (null), onToolCall (null)
2178
+ });
2179
+ }
1824
2180
  /**
1825
2181
  * Resolve the current abort signal.
1826
2182
  * Branch-scoped signals (from race/spawn/map/budget) in AsyncLocalStorage
@@ -1880,7 +2236,15 @@ var WorkflowContext = class {
1880
2236
  model: agent2.resolveModel(resolveCtx),
1881
2237
  cost: costAfter - costBefore,
1882
2238
  duration: Date.now() - startTime,
1883
- promptVersion: agent2._config.version
2239
+ promptVersion: agent2._config.version,
2240
+ temperature: options?.temperature ?? agent2._config.temperature,
2241
+ maxTokens: options?.maxTokens ?? agent2._config.maxTokens ?? 4096,
2242
+ effort: options?.effort ?? agent2._config.effort,
2243
+ thinkingBudget: options?.thinkingBudget ?? agent2._config.thinkingBudget,
2244
+ includeThoughts: options?.includeThoughts ?? agent2._config.includeThoughts,
2245
+ toolChoice: options?.toolChoice ?? agent2._config.toolChoice,
2246
+ stop: options?.stop ?? agent2._config.stop,
2247
+ providerOptions: options?.providerOptions ?? agent2._config.providerOptions
1884
2248
  });
1885
2249
  return result;
1886
2250
  });
@@ -1903,7 +2267,21 @@ var WorkflowContext = class {
1903
2267
  const modelUri = agent2.resolveModel(resolveCtx);
1904
2268
  const systemPrompt = agent2.resolveSystem(resolveCtx);
1905
2269
  const { provider, model } = this.providerRegistry.resolve(modelUri, this.config);
1906
- const toolDefs = this.buildToolDefs(agent2);
2270
+ let resolvedHandoffs;
2271
+ if (typeof agent2._config.handoffs === "function") {
2272
+ try {
2273
+ resolvedHandoffs = agent2._config.handoffs(resolveCtx);
2274
+ } catch (err) {
2275
+ this.log("handoff_resolve_error", {
2276
+ agent: agent2._name,
2277
+ error: err instanceof Error ? err.message : String(err)
2278
+ });
2279
+ resolvedHandoffs = void 0;
2280
+ }
2281
+ } else {
2282
+ resolvedHandoffs = agent2._config.handoffs;
2283
+ }
2284
+ const toolDefs = this.buildToolDefs(agent2, resolvedHandoffs);
1907
2285
  const messages = [];
1908
2286
  if (systemPrompt) {
1909
2287
  messages.push({ role: "system", content: systemPrompt });
@@ -2007,9 +2385,15 @@ Please fix and try again.`;
2007
2385
  turns++;
2008
2386
  const chatOptions = {
2009
2387
  model,
2010
- temperature: agent2._config.temperature,
2388
+ temperature: options?.temperature ?? agent2._config.temperature,
2011
2389
  tools: toolDefs.length > 0 ? toolDefs : void 0,
2012
- maxTokens: 4096,
2390
+ maxTokens: options?.maxTokens ?? agent2._config.maxTokens ?? 4096,
2391
+ effort: options?.effort ?? agent2._config.effort,
2392
+ thinkingBudget: options?.thinkingBudget ?? agent2._config.thinkingBudget,
2393
+ includeThoughts: options?.includeThoughts ?? agent2._config.includeThoughts,
2394
+ toolChoice: options?.toolChoice ?? agent2._config.toolChoice,
2395
+ stop: options?.stop ?? agent2._config.stop,
2396
+ providerOptions: options?.providerOptions ?? agent2._config.providerOptions,
2013
2397
  signal: this.currentSignal
2014
2398
  };
2015
2399
  if (options?.schema && toolDefs.length === 0) {
@@ -2021,10 +2405,14 @@ Please fix and try again.`;
2021
2405
  let content2 = "";
2022
2406
  const toolCalls = [];
2023
2407
  const toolCallBuffers = /* @__PURE__ */ new Map();
2408
+ let streamProviderMetadata;
2409
+ let thinkingContent = "";
2024
2410
  for await (const chunk of provider.stream(currentMessages, chatOptions)) {
2025
2411
  if (chunk.type === "text_delta") {
2026
2412
  content2 += chunk.content;
2027
2413
  this.onToken(chunk.content);
2414
+ } else if (chunk.type === "thinking_delta") {
2415
+ thinkingContent += chunk.content;
2028
2416
  } else if (chunk.type === "tool_call_delta") {
2029
2417
  let buffer = toolCallBuffers.get(chunk.id);
2030
2418
  if (!buffer) {
@@ -2034,6 +2422,7 @@ Please fix and try again.`;
2034
2422
  if (chunk.name) buffer.name = chunk.name;
2035
2423
  if (chunk.arguments) buffer.arguments += chunk.arguments;
2036
2424
  } else if (chunk.type === "done") {
2425
+ streamProviderMetadata = chunk.providerMetadata;
2037
2426
  if (chunk.usage) {
2038
2427
  response = {
2039
2428
  content: content2,
@@ -2060,6 +2449,12 @@ Please fix and try again.`;
2060
2449
  if (toolCalls.length > 0) {
2061
2450
  response.tool_calls = toolCalls;
2062
2451
  }
2452
+ if (streamProviderMetadata) {
2453
+ response.providerMetadata = streamProviderMetadata;
2454
+ }
2455
+ if (thinkingContent) {
2456
+ response.thinking_content = thinkingContent;
2457
+ }
2063
2458
  } else {
2064
2459
  response = await provider.chat(currentMessages, chatOptions);
2065
2460
  }
@@ -2090,13 +2485,14 @@ Please fix and try again.`;
2090
2485
  currentMessages.push({
2091
2486
  role: "assistant",
2092
2487
  content: response.content || "",
2093
- tool_calls: response.tool_calls
2488
+ tool_calls: response.tool_calls,
2489
+ ...response.providerMetadata ? { providerMetadata: response.providerMetadata } : {}
2094
2490
  });
2095
2491
  for (const toolCall of response.tool_calls) {
2096
2492
  const toolName = toolCall.function.name;
2097
2493
  if (toolName.startsWith("handoff_to_")) {
2098
2494
  const targetName = toolName.replace("handoff_to_", "");
2099
- const descriptor = agent2._config.handoffs?.find((h) => h.agent._name === targetName);
2495
+ const descriptor = resolvedHandoffs?.find((h) => h.agent._name === targetName);
2100
2496
  if (descriptor) {
2101
2497
  const mode = descriptor.mode ?? "oneway";
2102
2498
  let handoffPrompt = prompt;
@@ -2108,10 +2504,11 @@ Please fix and try again.`;
2108
2504
  }
2109
2505
  }
2110
2506
  const handoffStart = Date.now();
2507
+ const handoffOptions = options ? { schema: options.schema, retries: options.retries, metadata: options.metadata } : void 0;
2111
2508
  const handoffFn = () => this.executeAgentCall(
2112
2509
  descriptor.agent,
2113
2510
  handoffPrompt,
2114
- options,
2511
+ handoffOptions,
2115
2512
  0,
2116
2513
  void 0,
2117
2514
  void 0,
@@ -2348,8 +2745,9 @@ Please fix and try again.`;
2348
2745
  resultContent2 = JSON.stringify(toolResult2);
2349
2746
  }
2350
2747
  } else if (tool2) {
2748
+ const childCtx = this.createChildContext();
2351
2749
  try {
2352
- toolResult2 = await tool2._execute(toolArgs);
2750
+ toolResult2 = await tool2._execute(toolArgs, childCtx);
2353
2751
  } catch (err) {
2354
2752
  toolResult2 = { error: err instanceof Error ? err.message : String(err) };
2355
2753
  }
@@ -2429,7 +2827,8 @@ Please fix and try again.`;
2429
2827
  guardrailOutputRetries++;
2430
2828
  currentMessages.push({
2431
2829
  role: "assistant",
2432
- content
2830
+ content,
2831
+ ...response.providerMetadata ? { providerMetadata: response.providerMetadata } : {}
2433
2832
  });
2434
2833
  currentMessages.push({
2435
2834
  role: "system",
@@ -2450,6 +2849,7 @@ Please fix and try again.`;
2450
2849
  try {
2451
2850
  const parsed = JSON.parse(stripMarkdownFences(content));
2452
2851
  const validated = options.schema.parse(parsed);
2852
+ this.pushAssistantToSessionHistory(content, response.providerMetadata);
2453
2853
  return validated;
2454
2854
  } catch (err) {
2455
2855
  const maxRetries = options.retries ?? 3;
@@ -2476,11 +2876,23 @@ Please fix and try again.`;
2476
2876
  throw new VerifyError(content, zodErr, maxRetries);
2477
2877
  }
2478
2878
  }
2879
+ this.pushAssistantToSessionHistory(content, response.providerMetadata);
2479
2880
  return content;
2480
2881
  }
2481
2882
  throw new MaxTurnsError("ctx.ask()", maxTurns);
2482
2883
  }
2483
- buildToolDefs(agent2) {
2884
+ /**
2885
+ * Push the final assistant message into session history, preserving providerMetadata
2886
+ * (e.g., Gemini thought signatures needed for multi-turn reasoning context).
2887
+ */
2888
+ pushAssistantToSessionHistory(content, providerMetadata) {
2889
+ this.sessionHistory.push({
2890
+ role: "assistant",
2891
+ content,
2892
+ ...providerMetadata ? { providerMetadata } : {}
2893
+ });
2894
+ }
2895
+ buildToolDefs(agent2, resolvedHandoffs) {
2484
2896
  const defs = [];
2485
2897
  if (agent2._config.tools) {
2486
2898
  for (const tool2 of agent2._config.tools) {
@@ -2494,8 +2906,8 @@ Please fix and try again.`;
2494
2906
  });
2495
2907
  }
2496
2908
  }
2497
- if (agent2._config.handoffs) {
2498
- for (const { agent: handoffAgent, description, mode } of agent2._config.handoffs) {
2909
+ if (resolvedHandoffs) {
2910
+ for (const { agent: handoffAgent, description, mode } of resolvedHandoffs) {
2499
2911
  const isRoundtrip = mode === "roundtrip";
2500
2912
  const defaultDesc = isRoundtrip ? `Delegate a task to ${handoffAgent._name} and receive the result back` : `Hand off the conversation to ${handoffAgent._name}`;
2501
2913
  defs.push({
@@ -3184,6 +3596,79 @@ ${summaryResponse.content}`
3184
3596
  const sessionId = this.metadata?.sessionId;
3185
3597
  await this.memoryManager.forget(key, this.stateStore, sessionId, options);
3186
3598
  }
3599
+ // ── ctx.delegate() ──────────────────────────────────────────────────
3600
+ /**
3601
+ * Select the best agent from a list of candidates and invoke it.
3602
+ * Creates a temporary router agent that uses handoffs to pick the right specialist.
3603
+ *
3604
+ * This is convenience sugar over creating a router agent with dynamic handoffs.
3605
+ * For full control over the router's behavior, create the router agent explicitly.
3606
+ *
3607
+ * @param agents - Candidate agents to choose from (at least 1)
3608
+ * @param prompt - The prompt to send to the selected agent
3609
+ * @param options - Optional: schema, routerModel, metadata, retries
3610
+ */
3611
+ async delegate(agents, prompt, options) {
3612
+ if (agents.length === 0) {
3613
+ throw new Error("ctx.delegate() requires at least one candidate agent");
3614
+ }
3615
+ const names = /* @__PURE__ */ new Set();
3616
+ for (const a of agents) {
3617
+ if (names.has(a._name)) {
3618
+ throw new Error(
3619
+ `ctx.delegate() received duplicate agent name '${a._name}'. All candidate agents must have unique names.`
3620
+ );
3621
+ }
3622
+ names.add(a._name);
3623
+ }
3624
+ if (agents.length === 1) {
3625
+ return this.ask(agents[0], prompt, {
3626
+ schema: options?.schema,
3627
+ retries: options?.retries,
3628
+ metadata: options?.metadata
3629
+ });
3630
+ }
3631
+ const resolveCtx = options?.metadata ? { metadata: { ...this.metadata, ...options.metadata } } : { metadata: this.metadata };
3632
+ const routerModelUri = options?.routerModel ?? agents[0].resolveModel(resolveCtx);
3633
+ const handoffs = agents.map((a) => {
3634
+ let description;
3635
+ try {
3636
+ description = a.resolveSystem(resolveCtx).slice(0, 200);
3637
+ } catch {
3638
+ description = `Agent: ${a._name}`;
3639
+ }
3640
+ return { agent: a, description };
3641
+ });
3642
+ const routerSystem = "Route to the best agent for this task. Always hand off; never answer directly.";
3643
+ const routerAgent = {
3644
+ _config: {
3645
+ model: routerModelUri,
3646
+ system: routerSystem,
3647
+ temperature: 0,
3648
+ handoffs,
3649
+ maxTurns: 2
3650
+ },
3651
+ _name: "_delegate_router",
3652
+ ask: async () => {
3653
+ throw new Error("Direct invocation not supported on delegate router");
3654
+ },
3655
+ resolveModel: () => routerModelUri,
3656
+ resolveSystem: () => routerSystem
3657
+ };
3658
+ this.emitTrace({
3659
+ type: "delegate",
3660
+ agent: "_delegate_router",
3661
+ data: {
3662
+ candidates: agents.map((a) => a._name),
3663
+ routerModel: routerModelUri
3664
+ }
3665
+ });
3666
+ return this.ask(routerAgent, prompt, {
3667
+ schema: options?.schema,
3668
+ retries: options?.retries,
3669
+ metadata: options?.metadata
3670
+ });
3671
+ }
3187
3672
  // ── Private ───────────────────────────────────────────────────────────
3188
3673
  emitTrace(partial) {
3189
3674
  let data = partial.data;
@@ -3793,11 +4278,13 @@ var Session = class _Session {
3793
4278
  ...cachedSummary ? { summaryCache: cachedSummary } : {}
3794
4279
  }
3795
4280
  });
3796
- const assistantMessage = {
3797
- role: "assistant",
3798
- content: typeof result === "string" ? result : JSON.stringify(result)
3799
- };
3800
- history.push(assistantMessage);
4281
+ const lastMsg = history[history.length - 1];
4282
+ if (!(lastMsg && lastMsg.role === "assistant")) {
4283
+ history.push({
4284
+ role: "assistant",
4285
+ content: typeof result === "string" ? result : JSON.stringify(result)
4286
+ });
4287
+ }
3801
4288
  if (this.options.persist !== false) {
3802
4289
  await this.store.saveSession(this.sessionId, history);
3803
4290
  }
@@ -3840,10 +4327,13 @@ var Session = class _Session {
3840
4327
  }
3841
4328
  });
3842
4329
  const updateHistory = async (result) => {
3843
- history.push({
3844
- role: "assistant",
3845
- content: typeof result === "string" ? result : JSON.stringify(result)
3846
- });
4330
+ const lastMsg = history[history.length - 1];
4331
+ if (!(lastMsg && lastMsg.role === "assistant")) {
4332
+ history.push({
4333
+ role: "assistant",
4334
+ content: typeof result === "string" ? result : JSON.stringify(result)
4335
+ });
4336
+ }
3847
4337
  if (this.options.persist !== false) {
3848
4338
  await this.store.saveSession(this.sessionId, history);
3849
4339
  }
@@ -4732,6 +5222,24 @@ var AxlRuntime = class extends EventEmitter2 {
4732
5222
  getExecutions() {
4733
5223
  return [...this.executions.values()];
4734
5224
  }
5225
+ /**
5226
+ * Create a lightweight WorkflowContext for ad-hoc use (tool testing, prototyping).
5227
+ * The context has access to the runtime's providers, state store, and MCP manager
5228
+ * but no session history, streaming callbacks, or budget tracking.
5229
+ */
5230
+ createContext(options) {
5231
+ return new WorkflowContext({
5232
+ input: void 0,
5233
+ executionId: randomUUID2(),
5234
+ metadata: options?.metadata,
5235
+ config: this.config,
5236
+ providerRegistry: this.providerRegistry,
5237
+ stateStore: this.stateStore,
5238
+ mcpManager: this.mcpManager,
5239
+ spanManager: this.spanManager,
5240
+ memoryManager: this.memoryManager
5241
+ });
5242
+ }
4735
5243
  /** Register a custom provider instance. */
4736
5244
  registerProvider(name, provider) {
4737
5245
  this.providerRegistry.registerInstance(name, provider);
@@ -5429,6 +5937,7 @@ export {
5429
5937
  agent,
5430
5938
  createSpanManager,
5431
5939
  defineConfig,
5940
+ resolveThinkingOptions,
5432
5941
  tool,
5433
5942
  workflow,
5434
5943
  zodToJsonSchema