@axlsdk/axl 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -41,7 +41,7 @@ function tool(config) {
41
41
  on: config.retry?.on
42
42
  };
43
43
  const maxStringLen = config.maxStringLength ?? DEFAULT_MAX_STRING_LENGTH;
44
- const execute = async (input) => {
44
+ const execute = async (input, ctx) => {
45
45
  const parsed = config.input.parse(input);
46
46
  if (maxStringLen > 0) {
47
47
  validateStringLengths(parsed, maxStringLen);
@@ -50,7 +50,7 @@ function tool(config) {
50
50
  let lastError;
51
51
  for (let attempt = 1; attempt <= maxAttempts; attempt++) {
52
52
  try {
53
- return await config.handler(parsed);
53
+ return await config.handler(parsed, ctx);
54
54
  } catch (err) {
55
55
  lastError = err instanceof Error ? err : new Error(String(err));
56
56
  if (attempt === maxAttempts) break;
@@ -80,7 +80,7 @@ function tool(config) {
80
80
  if (config.hooks?.before) {
81
81
  processedInput = await config.hooks.before(processedInput, ctx);
82
82
  }
83
- let result = await execute(processedInput);
83
+ let result = await execute(processedInput, ctx);
84
84
  if (config.hooks?.after) {
85
85
  result = await config.hooks.after(result, ctx);
86
86
  }
@@ -102,6 +102,25 @@ function tool(config) {
102
102
  };
103
103
  }
104
104
 
105
+ // src/providers/types.ts
106
+ function resolveThinkingOptions(options) {
107
+ if (options.thinkingBudget !== void 0 && options.thinkingBudget < 0) {
108
+ throw new Error(`thinkingBudget must be non-negative, got ${options.thinkingBudget}`);
109
+ }
110
+ const effort = options.effort;
111
+ const thinkingBudget = options.thinkingBudget;
112
+ const hasBudgetOverride = thinkingBudget !== void 0 && thinkingBudget > 0;
113
+ return {
114
+ effort,
115
+ thinkingBudget,
116
+ includeThoughts: options.includeThoughts ?? false,
117
+ // Budget override wins: effort: 'none' + thinkingBudget: 5000 → thinking enabled
118
+ thinkingDisabled: (effort === "none" || thinkingBudget === 0) && !hasBudgetOverride,
119
+ activeEffort: effort && effort !== "none" ? effort : void 0,
120
+ hasBudgetOverride
121
+ };
122
+ }
123
+
105
124
  // src/providers/retry.ts
106
125
  var RETRYABLE_STATUS_CODES = /* @__PURE__ */ new Set([429, 503, 529]);
107
126
  var MAX_RETRIES = 2;
@@ -156,6 +175,9 @@ var OPENAI_PRICING = {
156
175
  "gpt-5-nano": [5e-8, 4e-7],
157
176
  "gpt-5.1": [125e-8, 1e-5],
158
177
  "gpt-5.2": [175e-8, 14e-6],
178
+ "gpt-5.3": [175e-8, 14e-6],
179
+ "gpt-5.4": [25e-7, 15e-6],
180
+ "gpt-5.4-pro": [3e-5, 18e-5],
159
181
  o1: [15e-6, 6e-5],
160
182
  "o1-mini": [3e-6, 12e-6],
161
183
  "o1-pro": [15e-5, 6e-4],
@@ -184,26 +206,31 @@ function estimateOpenAICost(model, promptTokens, completionTokens, cachedTokens)
184
206
  const inputCost = (promptTokens - cached) * inputRate + cached * inputRate * 0.5;
185
207
  return inputCost + completionTokens * outputRate;
186
208
  }
187
- function isReasoningModel(model) {
209
+ function isOSeriesModel(model) {
188
210
  return /^(o1|o3|o4-mini)/.test(model);
189
211
  }
190
- function thinkingToReasoningEffort(thinking) {
191
- if (typeof thinking === "object") {
192
- const budget = thinking.budgetTokens;
193
- if (budget <= 1024) return "low";
194
- if (budget <= 8192) return "medium";
195
- return "high";
196
- }
197
- switch (thinking) {
198
- case "low":
199
- return "low";
200
- case "medium":
201
- return "medium";
202
- case "high":
203
- return "high";
204
- case "max":
205
- return "xhigh";
206
- }
212
+ function supportsReasoningEffort(model) {
213
+ return isOSeriesModel(model) || /^gpt-5/.test(model);
214
+ }
215
+ function supportsReasoningNone(model) {
216
+ return /^gpt-5\.[1-9]/.test(model);
217
+ }
218
+ function supportsXhigh(model) {
219
+ return /^gpt-5\.([2-9]|\d{2,})/.test(model);
220
+ }
221
+ function clampReasoningEffort(model, effort) {
222
+ if (model.startsWith("gpt-5-pro")) return "high";
223
+ if (effort === "none" && !supportsReasoningNone(model)) return "minimal";
224
+ if (effort === "xhigh" && !supportsXhigh(model)) return "high";
225
+ return effort;
226
+ }
227
+ function effortToReasoningEffort(effort) {
228
+ return effort === "max" ? "xhigh" : effort;
229
+ }
230
+ function budgetToReasoningEffort(budget) {
231
+ if (budget <= 1024) return "low";
232
+ if (budget <= 8192) return "medium";
233
+ return "high";
207
234
  }
208
235
  var OpenAIProvider = class {
209
236
  name = "openai";
@@ -292,13 +319,26 @@ var OpenAIProvider = class {
292
319
  // Internal helpers
293
320
  // ---------------------------------------------------------------------------
294
321
  buildRequestBody(messages, options, stream) {
295
- const reasoning = isReasoningModel(options.model);
322
+ const oSeries = isOSeriesModel(options.model);
323
+ const reasoningCapable = supportsReasoningEffort(options.model);
324
+ const { thinkingBudget, thinkingDisabled, activeEffort, hasBudgetOverride } = resolveThinkingOptions(options);
325
+ let wireEffort;
326
+ if (reasoningCapable) {
327
+ if (hasBudgetOverride) {
328
+ wireEffort = clampReasoningEffort(options.model, budgetToReasoningEffort(thinkingBudget));
329
+ } else if (!thinkingDisabled && activeEffort) {
330
+ wireEffort = clampReasoningEffort(options.model, effortToReasoningEffort(activeEffort));
331
+ } else if (thinkingDisabled) {
332
+ wireEffort = clampReasoningEffort(options.model, "none");
333
+ }
334
+ }
335
+ const stripTemp = oSeries || reasoningCapable && wireEffort !== void 0;
296
336
  const body = {
297
337
  model: options.model,
298
- messages: messages.map((m) => this.formatMessage(m, reasoning)),
338
+ messages: messages.map((m) => this.formatMessage(m, oSeries)),
299
339
  stream
300
340
  };
301
- if (options.temperature !== void 0 && !reasoning) {
341
+ if (options.temperature !== void 0 && !stripTemp) {
302
342
  body.temperature = options.temperature;
303
343
  }
304
344
  if (options.maxTokens !== void 0) {
@@ -307,7 +347,7 @@ var OpenAIProvider = class {
307
347
  if (options.stop) body.stop = options.stop;
308
348
  if (options.tools && options.tools.length > 0) {
309
349
  body.tools = options.tools;
310
- if (!reasoning) {
350
+ if (!oSeries) {
311
351
  body.parallel_tool_calls = true;
312
352
  }
313
353
  }
@@ -317,15 +357,13 @@ var OpenAIProvider = class {
317
357
  if (options.responseFormat) {
318
358
  body.response_format = options.responseFormat;
319
359
  }
320
- if (reasoning) {
321
- const effort = options.thinking ? thinkingToReasoningEffort(options.thinking) : options.reasoningEffort;
322
- if (effort) {
323
- body.reasoning_effort = effort;
324
- }
325
- }
360
+ if (wireEffort) body.reasoning_effort = wireEffort;
326
361
  if (stream) {
327
362
  body.stream_options = { include_usage: true };
328
363
  }
364
+ if (options.providerOptions) {
365
+ Object.assign(body, options.providerOptions);
366
+ }
329
367
  return body;
330
368
  }
331
369
  /** Extract a human-readable message from an API error response body. */
@@ -339,9 +377,9 @@ var OpenAIProvider = class {
339
377
  }
340
378
  return `OpenAI API error (${status}): ${body}`;
341
379
  }
342
- formatMessage(msg, reasoning) {
380
+ formatMessage(msg, oSeries) {
343
381
  const out = {
344
- role: msg.role === "system" && reasoning ? "developer" : msg.role,
382
+ role: msg.role === "system" && oSeries ? "developer" : msg.role,
345
383
  content: msg.content
346
384
  };
347
385
  if (msg.name) out.name = msg.name;
@@ -478,7 +516,20 @@ var OpenAIResponsesProvider = class {
478
516
  // Internal: build request body
479
517
  // ---------------------------------------------------------------------------
480
518
  buildRequestBody(messages, options, stream) {
481
- const reasoning = isReasoningModel(options.model);
519
+ const oSeries = isOSeriesModel(options.model);
520
+ const reasoningCapable = supportsReasoningEffort(options.model);
521
+ const { thinkingBudget, includeThoughts, thinkingDisabled, activeEffort, hasBudgetOverride } = resolveThinkingOptions(options);
522
+ let wireEffort;
523
+ if (reasoningCapable) {
524
+ if (hasBudgetOverride) {
525
+ wireEffort = clampReasoningEffort(options.model, budgetToReasoningEffort(thinkingBudget));
526
+ } else if (!thinkingDisabled && activeEffort) {
527
+ wireEffort = clampReasoningEffort(options.model, effortToReasoningEffort(activeEffort));
528
+ } else if (thinkingDisabled) {
529
+ wireEffort = clampReasoningEffort(options.model, "none");
530
+ }
531
+ }
532
+ const stripTemp = oSeries || reasoningCapable && wireEffort !== void 0;
482
533
  const systemMessages = messages.filter((m) => m.role === "system");
483
534
  const nonSystemMessages = messages.filter((m) => m.role !== "system");
484
535
  const body = {
@@ -493,7 +544,7 @@ var OpenAIResponsesProvider = class {
493
544
  if (options.maxTokens !== void 0) {
494
545
  body.max_output_tokens = options.maxTokens;
495
546
  }
496
- if (options.temperature !== void 0 && !reasoning) {
547
+ if (options.temperature !== void 0 && !stripTemp) {
497
548
  body.temperature = options.temperature;
498
549
  }
499
550
  if (options.tools && options.tools.length > 0) {
@@ -512,15 +563,21 @@ var OpenAIResponsesProvider = class {
512
563
  body.tool_choice = options.toolChoice;
513
564
  }
514
565
  }
515
- if (reasoning) {
516
- const effort = options.thinking ? thinkingToReasoningEffort(options.thinking) : options.reasoningEffort;
517
- if (effort) {
518
- body.reasoning = { effort };
519
- }
566
+ if (reasoningCapable && (wireEffort !== void 0 || includeThoughts)) {
567
+ const reasoning = {};
568
+ if (wireEffort !== void 0) reasoning.effort = wireEffort;
569
+ if (includeThoughts) reasoning.summary = "detailed";
570
+ if (Object.keys(reasoning).length > 0) body.reasoning = reasoning;
571
+ }
572
+ if (reasoningCapable) {
573
+ body.include = ["reasoning.encrypted_content"];
520
574
  }
521
575
  if (options.responseFormat) {
522
576
  body.text = { format: this.mapResponseFormat(options.responseFormat) };
523
577
  }
578
+ if (options.providerOptions) {
579
+ Object.assign(body, options.providerOptions);
580
+ }
524
581
  return body;
525
582
  }
526
583
  // ---------------------------------------------------------------------------
@@ -536,6 +593,12 @@ var OpenAIResponsesProvider = class {
536
593
  output: msg.content
537
594
  });
538
595
  } else if (msg.role === "assistant" && msg.tool_calls && msg.tool_calls.length > 0) {
596
+ const reasoningItems = msg.providerMetadata?.openaiReasoningItems;
597
+ if (reasoningItems) {
598
+ for (const item of reasoningItems) {
599
+ input.push(item);
600
+ }
601
+ }
539
602
  if (msg.content) {
540
603
  input.push({ type: "message", role: "assistant", content: msg.content });
541
604
  }
@@ -548,6 +611,12 @@ var OpenAIResponsesProvider = class {
548
611
  });
549
612
  }
550
613
  } else if (msg.role === "user" || msg.role === "assistant") {
614
+ if (msg.role === "assistant" && msg.providerMetadata?.openaiReasoningItems) {
615
+ const reasoningItems = msg.providerMetadata.openaiReasoningItems;
616
+ for (const item of reasoningItems) {
617
+ input.push(item);
618
+ }
619
+ }
551
620
  input.push({
552
621
  type: "message",
553
622
  role: msg.role,
@@ -580,7 +649,9 @@ var OpenAIResponsesProvider = class {
580
649
  // ---------------------------------------------------------------------------
581
650
  parseResponse(json, model) {
582
651
  let content = "";
652
+ let thinkingContent = "";
583
653
  const toolCalls = [];
654
+ const reasoningItems = [];
584
655
  for (const item of json.output) {
585
656
  if (item.type === "message") {
586
657
  for (const part of item.content ?? []) {
@@ -597,6 +668,15 @@ var OpenAIResponsesProvider = class {
597
668
  arguments: item.arguments
598
669
  }
599
670
  });
671
+ } else if (item.type === "reasoning") {
672
+ reasoningItems.push(item);
673
+ if (item.summary) {
674
+ for (const s of item.summary) {
675
+ if (s.type === "summary_text" && s.text) {
676
+ thinkingContent += s.text;
677
+ }
678
+ }
679
+ }
600
680
  }
601
681
  }
602
682
  const usage = json.usage ? {
@@ -607,11 +687,14 @@ var OpenAIResponsesProvider = class {
607
687
  cached_tokens: json.usage.input_tokens_details?.cached_tokens
608
688
  } : void 0;
609
689
  const cost = usage ? estimateOpenAICost(model, usage.prompt_tokens, usage.completion_tokens, usage.cached_tokens) : void 0;
690
+ const providerMetadata = reasoningItems.length > 0 ? { openaiReasoningItems: reasoningItems } : void 0;
610
691
  return {
611
692
  content,
693
+ thinking_content: thinkingContent || void 0,
612
694
  tool_calls: toolCalls.length > 0 ? toolCalls : void 0,
613
695
  usage,
614
- cost
696
+ cost,
697
+ providerMetadata
615
698
  };
616
699
  }
617
700
  // ---------------------------------------------------------------------------
@@ -663,6 +746,8 @@ var OpenAIResponsesProvider = class {
663
746
  switch (eventType) {
664
747
  case "response.output_text.delta":
665
748
  return { type: "text_delta", content: data.delta ?? "" };
749
+ case "response.reasoning_summary_text.delta":
750
+ return { type: "thinking_delta", content: data.delta ?? "" };
666
751
  case "response.output_item.added":
667
752
  if (data.item?.type === "function_call") {
668
753
  const callId = data.item.call_id ?? data.item.id ?? "";
@@ -693,7 +778,9 @@ var OpenAIResponsesProvider = class {
693
778
  reasoning_tokens: response.usage.output_tokens_details?.reasoning_tokens,
694
779
  cached_tokens: response.usage.input_tokens_details?.cached_tokens
695
780
  } : void 0;
696
- return { type: "done", usage };
781
+ const reasoningItems = response?.output?.filter((item) => item.type === "reasoning") ?? [];
782
+ const providerMetadata = reasoningItems.length > 0 ? { openaiReasoningItems: reasoningItems } : void 0;
783
+ return { type: "done", usage, providerMetadata };
697
784
  }
698
785
  case "response.failed": {
699
786
  const errorMsg = data.response?.error?.message ?? data.response?.status_details?.error?.message ?? "Unknown error";
@@ -721,9 +808,12 @@ var OpenAIResponsesProvider = class {
721
808
  // src/providers/anthropic.ts
722
809
  var ANTHROPIC_API_VERSION = "2023-06-01";
723
810
  var ANTHROPIC_PRICING = {
724
- "claude-opus-4-6": [15e-6, 75e-6],
811
+ "claude-opus-4-6": [5e-6, 25e-6],
812
+ "claude-sonnet-4-6": [3e-6, 15e-6],
813
+ "claude-opus-4-5": [5e-6, 25e-6],
814
+ "claude-opus-4-1": [15e-6, 75e-6],
725
815
  "claude-sonnet-4-5": [3e-6, 15e-6],
726
- "claude-haiku-4-5": [8e-7, 4e-6],
816
+ "claude-haiku-4-5": [1e-6, 5e-6],
727
817
  "claude-sonnet-4": [3e-6, 15e-6],
728
818
  "claude-opus-4": [15e-6, 75e-6],
729
819
  "claude-3-7-sonnet": [3e-6, 15e-6],
@@ -733,12 +823,15 @@ var ANTHROPIC_PRICING = {
733
823
  "claude-3-sonnet": [3e-6, 15e-6],
734
824
  "claude-3-haiku": [25e-8, 125e-8]
735
825
  };
826
+ var ANTHROPIC_PRICING_KEYS_BY_LENGTH = Object.keys(ANTHROPIC_PRICING).sort(
827
+ (a, b) => b.length - a.length
828
+ );
736
829
  function estimateAnthropicCost(model, inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens) {
737
830
  let pricing = ANTHROPIC_PRICING[model];
738
831
  if (!pricing) {
739
- for (const [key, value] of Object.entries(ANTHROPIC_PRICING)) {
832
+ for (const key of ANTHROPIC_PRICING_KEYS_BY_LENGTH) {
740
833
  if (model.startsWith(key)) {
741
- pricing = value;
834
+ pricing = ANTHROPIC_PRICING[key];
742
835
  break;
743
836
  }
744
837
  }
@@ -758,16 +851,15 @@ var THINKING_BUDGETS = {
758
851
  // With auto-bump (+1024), max_tokens becomes 31024 which fits all models.
759
852
  max: 3e4
760
853
  };
761
- function thinkingToBudgetTokens(thinking) {
762
- if (typeof thinking === "string") return THINKING_BUDGETS[thinking] ?? 5e3;
763
- return thinking.budgetTokens;
764
- }
765
854
  function supportsAdaptiveThinking(model) {
766
855
  return model.startsWith("claude-opus-4-6") || model.startsWith("claude-sonnet-4-6");
767
856
  }
768
857
  function supportsMaxEffort(model) {
769
858
  return model.startsWith("claude-opus-4-6");
770
859
  }
860
+ function supportsEffort(model) {
861
+ return model.startsWith("claude-opus-4-6") || model.startsWith("claude-sonnet-4-6") || model.startsWith("claude-opus-4-5");
862
+ }
771
863
  var AnthropicProvider = class {
772
864
  name = "anthropic";
773
865
  baseUrl;
@@ -857,9 +949,6 @@ var AnthropicProvider = class {
857
949
  if (systemText) {
858
950
  body.system = systemText;
859
951
  }
860
- if (options.temperature !== void 0 && !options.thinking) {
861
- body.temperature = options.temperature;
862
- }
863
952
  if (options.stop) {
864
953
  body.stop_sequences = options.stop;
865
954
  }
@@ -869,19 +958,39 @@ var AnthropicProvider = class {
869
958
  if (options.toolChoice !== void 0) {
870
959
  body.tool_choice = this.mapToolChoice(options.toolChoice);
871
960
  }
872
- if (options.thinking) {
873
- if (typeof options.thinking === "string" && supportsAdaptiveThinking(options.model) && // 'max' effort is only supported on Opus 4.6; Sonnet 4.6 falls back to manual mode
874
- (options.thinking !== "max" || supportsMaxEffort(options.model))) {
875
- body.thinking = { type: "adaptive" };
876
- body.output_config = { effort: options.thinking };
877
- } else {
878
- const budgetTokens = thinkingToBudgetTokens(options.thinking);
879
- body.thinking = { type: "enabled", budget_tokens: budgetTokens };
880
- const currentMax = body.max_tokens;
881
- if (currentMax < budgetTokens + 1024) {
882
- body.max_tokens = budgetTokens + 1024;
883
- }
961
+ const { thinkingBudget, thinkingDisabled, activeEffort, hasBudgetOverride } = resolveThinkingOptions(options);
962
+ let resolvedEffort = activeEffort;
963
+ if (resolvedEffort === "max" && !supportsMaxEffort(options.model)) {
964
+ resolvedEffort = "high";
965
+ }
966
+ if (hasBudgetOverride) {
967
+ body.thinking = { type: "enabled", budget_tokens: thinkingBudget };
968
+ const currentMax = body.max_tokens;
969
+ if (currentMax < thinkingBudget + 1024) {
970
+ body.max_tokens = thinkingBudget + 1024;
884
971
  }
972
+ if (resolvedEffort && supportsEffort(options.model)) {
973
+ body.output_config = { effort: resolvedEffort };
974
+ }
975
+ } else if (thinkingDisabled) {
976
+ if (resolvedEffort && supportsEffort(options.model)) {
977
+ body.output_config = { effort: resolvedEffort };
978
+ }
979
+ } else if (resolvedEffort && supportsAdaptiveThinking(options.model)) {
980
+ body.thinking = { type: "adaptive" };
981
+ body.output_config = { effort: resolvedEffort };
982
+ } else if (resolvedEffort && supportsEffort(options.model)) {
983
+ body.output_config = { effort: resolvedEffort };
984
+ } else if (resolvedEffort) {
985
+ const budget = THINKING_BUDGETS[resolvedEffort] ?? 5e3;
986
+ body.thinking = { type: "enabled", budget_tokens: budget };
987
+ const currentMax = body.max_tokens;
988
+ if (currentMax < budget + 1024) {
989
+ body.max_tokens = budget + 1024;
990
+ }
991
+ }
992
+ if (options.temperature !== void 0 && !body.thinking) {
993
+ body.temperature = options.temperature;
885
994
  }
886
995
  if (options.responseFormat && options.responseFormat.type !== "text") {
887
996
  const jsonInstruction = "You must respond with valid JSON only. No markdown fences, no extra text.";
@@ -889,6 +998,9 @@ var AnthropicProvider = class {
889
998
 
890
999
  ${jsonInstruction}` : jsonInstruction;
891
1000
  }
1001
+ if (options.providerOptions) {
1002
+ Object.assign(body, options.providerOptions);
1003
+ }
892
1004
  return body;
893
1005
  }
894
1006
  /**
@@ -999,9 +1111,12 @@ ${jsonInstruction}` : jsonInstruction;
999
1111
  // ---------------------------------------------------------------------------
1000
1112
  parseResponse(json) {
1001
1113
  let content = "";
1114
+ let thinkingContent = "";
1002
1115
  const toolCalls = [];
1003
1116
  for (const block of json.content) {
1004
- if (block.type === "text") {
1117
+ if (block.type === "thinking") {
1118
+ thinkingContent += block.thinking;
1119
+ } else if (block.type === "text") {
1005
1120
  content += block.text;
1006
1121
  } else if (block.type === "tool_use") {
1007
1122
  toolCalls.push({
@@ -1032,6 +1147,7 @@ ${jsonInstruction}` : jsonInstruction;
1032
1147
  ) : void 0;
1033
1148
  return {
1034
1149
  content,
1150
+ thinking_content: thinkingContent || void 0,
1035
1151
  tool_calls: toolCalls.length > 0 ? toolCalls : void 0,
1036
1152
  usage,
1037
1153
  cost
@@ -1082,7 +1198,9 @@ ${jsonInstruction}` : jsonInstruction;
1082
1198
  }
1083
1199
  case "content_block_delta": {
1084
1200
  const delta = event.delta;
1085
- if (delta?.type === "text_delta" && delta.text) {
1201
+ if (delta?.type === "thinking_delta" && delta.thinking) {
1202
+ yield { type: "thinking_delta", content: delta.thinking };
1203
+ } else if (delta?.type === "text_delta" && delta.text) {
1086
1204
  yield { type: "text_delta", content: delta.text };
1087
1205
  } else if (delta?.type === "input_json_delta" && delta.partial_json) {
1088
1206
  yield {
@@ -1153,14 +1271,19 @@ var GEMINI_PRICING = {
1153
1271
  "gemini-2.0-flash": [1e-7, 4e-7],
1154
1272
  "gemini-2.0-flash-lite": [1e-7, 4e-7],
1155
1273
  "gemini-3-pro-preview": [2e-6, 12e-6],
1156
- "gemini-3-flash-preview": [5e-7, 3e-6]
1274
+ "gemini-3-flash-preview": [5e-7, 3e-6],
1275
+ "gemini-3.1-pro-preview": [2e-6, 12e-6],
1276
+ "gemini-3.1-flash-lite-preview": [25e-8, 15e-7]
1157
1277
  };
1278
+ var GEMINI_PRICING_KEYS_BY_LENGTH = Object.keys(GEMINI_PRICING).sort(
1279
+ (a, b) => b.length - a.length
1280
+ );
1158
1281
  function estimateGeminiCost(model, inputTokens, outputTokens, cachedTokens) {
1159
1282
  let pricing = GEMINI_PRICING[model];
1160
1283
  if (!pricing) {
1161
- for (const [key, value] of Object.entries(GEMINI_PRICING)) {
1284
+ for (const key of GEMINI_PRICING_KEYS_BY_LENGTH) {
1162
1285
  if (model.startsWith(key)) {
1163
- pricing = value;
1286
+ pricing = GEMINI_PRICING[key];
1164
1287
  break;
1165
1288
  }
1166
1289
  }
@@ -1177,9 +1300,32 @@ var THINKING_BUDGETS2 = {
1177
1300
  high: 1e4,
1178
1301
  max: 24576
1179
1302
  };
1180
- function thinkingToBudgetTokens2(thinking) {
1181
- if (typeof thinking === "string") return THINKING_BUDGETS2[thinking] ?? 5e3;
1182
- return thinking.budgetTokens;
1303
+ var THINKING_LEVELS = {
1304
+ low: "low",
1305
+ medium: "medium",
1306
+ high: "high",
1307
+ max: "high"
1308
+ // 3.x caps at 'high'
1309
+ };
1310
+ function isGemini3x(model) {
1311
+ return /^gemini-3[.-]/.test(model);
1312
+ }
1313
+ function budgetToThinkingLevel(budgetTokens) {
1314
+ if (budgetTokens <= 1024) return "low";
1315
+ if (budgetTokens <= 5e3) return "medium";
1316
+ return "high";
1317
+ }
1318
+ function minThinkingLevel(model) {
1319
+ if (model.startsWith("gemini-3.1-pro")) return "low";
1320
+ return "minimal";
1321
+ }
1322
+ var _warned3xEffortNone = /* @__PURE__ */ new Set();
1323
+ function warnGemini3xEffortNone(model) {
1324
+ if (_warned3xEffortNone.has(model)) return;
1325
+ _warned3xEffortNone.add(model);
1326
+ console.warn(
1327
+ `[axl] effort: 'none' on Gemini 3.x (${model}) maps to the model's minimum thinking level ('${minThinkingLevel(model)}'), not fully disabled. Gemini 3.x models cannot disable thinking entirely.`
1328
+ );
1183
1329
  }
1184
1330
  var GeminiProvider = class {
1185
1331
  name = "google";
@@ -1294,17 +1440,58 @@ var GeminiProvider = class {
1294
1440
  if (Object.keys(generationConfig).length > 0) {
1295
1441
  body.generationConfig = generationConfig;
1296
1442
  }
1297
- if (options.thinking) {
1298
- generationConfig.thinkingConfig = {
1299
- thinkingBudget: thinkingToBudgetTokens2(options.thinking)
1300
- };
1301
- if (!body.generationConfig) {
1302
- body.generationConfig = generationConfig;
1443
+ const {
1444
+ effort,
1445
+ thinkingBudget,
1446
+ includeThoughts,
1447
+ thinkingDisabled,
1448
+ activeEffort,
1449
+ hasBudgetOverride
1450
+ } = resolveThinkingOptions(options);
1451
+ if (thinkingDisabled) {
1452
+ if (isGemini3x(options.model)) {
1453
+ if (effort === "none") {
1454
+ warnGemini3xEffortNone(options.model);
1455
+ }
1456
+ generationConfig.thinkingConfig = { thinkingLevel: minThinkingLevel(options.model) };
1457
+ } else {
1458
+ generationConfig.thinkingConfig = { thinkingBudget: 0 };
1459
+ }
1460
+ if (!body.generationConfig) body.generationConfig = generationConfig;
1461
+ } else if (hasBudgetOverride) {
1462
+ const config = {};
1463
+ if (isGemini3x(options.model)) {
1464
+ config.thinkingLevel = budgetToThinkingLevel(thinkingBudget);
1465
+ } else {
1466
+ config.thinkingBudget = thinkingBudget;
1467
+ }
1468
+ if (includeThoughts) config.includeThoughts = true;
1469
+ generationConfig.thinkingConfig = config;
1470
+ if (!body.generationConfig) body.generationConfig = generationConfig;
1471
+ } else if (activeEffort) {
1472
+ const config = {};
1473
+ if (isGemini3x(options.model)) {
1474
+ config.thinkingLevel = THINKING_LEVELS[activeEffort] ?? "medium";
1475
+ } else {
1476
+ if (activeEffort === "max" && options.model.startsWith("gemini-2.5-pro")) {
1477
+ config.thinkingBudget = 32768;
1478
+ } else {
1479
+ config.thinkingBudget = THINKING_BUDGETS2[activeEffort] ?? 5e3;
1480
+ }
1303
1481
  }
1482
+ if (includeThoughts) config.includeThoughts = true;
1483
+ generationConfig.thinkingConfig = config;
1484
+ if (!body.generationConfig) body.generationConfig = generationConfig;
1485
+ } else if (includeThoughts) {
1486
+ generationConfig.thinkingConfig = { includeThoughts: true };
1487
+ if (!body.generationConfig) body.generationConfig = generationConfig;
1304
1488
  }
1305
1489
  if (options.toolChoice !== void 0) {
1306
1490
  body.toolConfig = { functionCallingConfig: this.mapToolChoice(options.toolChoice) };
1307
1491
  }
1492
+ if (options.providerOptions) {
1493
+ Object.assign(body, options.providerOptions);
1494
+ }
1308
1495
  return body;
1309
1496
  }
1310
1497
  /**
@@ -1330,28 +1517,33 @@ var GeminiProvider = class {
1330
1517
  const result = [];
1331
1518
  for (const msg of messages) {
1332
1519
  if (msg.role === "assistant") {
1333
- const parts = [];
1334
- if (msg.content) {
1335
- parts.push({ text: msg.content });
1336
- }
1337
- if (msg.tool_calls && msg.tool_calls.length > 0) {
1338
- for (const tc of msg.tool_calls) {
1339
- let parsedArgs;
1340
- try {
1341
- parsedArgs = JSON.parse(tc.function.arguments);
1342
- } catch {
1343
- parsedArgs = {};
1344
- }
1345
- parts.push({
1346
- functionCall: {
1347
- name: tc.function.name,
1348
- args: parsedArgs
1520
+ const rawParts = msg.providerMetadata?.geminiParts;
1521
+ if (rawParts && rawParts.length > 0) {
1522
+ result.push({ role: "model", parts: rawParts });
1523
+ } else {
1524
+ const parts = [];
1525
+ if (msg.content) {
1526
+ parts.push({ text: msg.content });
1527
+ }
1528
+ if (msg.tool_calls && msg.tool_calls.length > 0) {
1529
+ for (const tc of msg.tool_calls) {
1530
+ let parsedArgs;
1531
+ try {
1532
+ parsedArgs = JSON.parse(tc.function.arguments);
1533
+ } catch {
1534
+ parsedArgs = {};
1349
1535
  }
1350
- });
1536
+ parts.push({
1537
+ functionCall: {
1538
+ name: tc.function.name,
1539
+ args: parsedArgs
1540
+ }
1541
+ });
1542
+ }
1543
+ }
1544
+ if (parts.length > 0) {
1545
+ result.push({ role: "model", parts });
1351
1546
  }
1352
- }
1353
- if (parts.length > 0) {
1354
- result.push({ role: "model", parts });
1355
1547
  }
1356
1548
  } else if (msg.role === "tool") {
1357
1549
  const functionName = toolCallIdToName.get(msg.tool_call_id) ?? "unknown";
@@ -1428,10 +1620,13 @@ var GeminiProvider = class {
1428
1620
  parseResponse(json, model) {
1429
1621
  const candidate = json.candidates?.[0];
1430
1622
  let content = "";
1623
+ let thinkingContent = "";
1431
1624
  const toolCalls = [];
1432
1625
  if (candidate?.content?.parts) {
1433
1626
  for (const part of candidate.content.parts) {
1434
- if (part.text) {
1627
+ if (part.thought && part.text) {
1628
+ thinkingContent += part.text;
1629
+ } else if (part.text) {
1435
1630
  content += part.text;
1436
1631
  } else if (part.functionCall) {
1437
1632
  toolCalls.push({
@@ -1446,18 +1641,24 @@ var GeminiProvider = class {
1446
1641
  }
1447
1642
  }
1448
1643
  const cachedTokens = json.usageMetadata?.cachedContentTokenCount;
1644
+ const reasoningTokens = json.usageMetadata?.thoughtsTokenCount;
1449
1645
  const usage = json.usageMetadata ? {
1450
1646
  prompt_tokens: json.usageMetadata.promptTokenCount ?? 0,
1451
1647
  completion_tokens: json.usageMetadata.candidatesTokenCount ?? 0,
1452
1648
  total_tokens: json.usageMetadata.totalTokenCount ?? 0,
1453
- cached_tokens: cachedTokens && cachedTokens > 0 ? cachedTokens : void 0
1649
+ cached_tokens: cachedTokens && cachedTokens > 0 ? cachedTokens : void 0,
1650
+ reasoning_tokens: reasoningTokens && reasoningTokens > 0 ? reasoningTokens : void 0
1454
1651
  } : void 0;
1455
1652
  const cost = usage ? estimateGeminiCost(model, usage.prompt_tokens, usage.completion_tokens, usage.cached_tokens) : void 0;
1653
+ const rawParts = candidate?.content?.parts;
1654
+ const providerMetadata = rawParts ? { geminiParts: rawParts } : void 0;
1456
1655
  return {
1457
1656
  content,
1657
+ thinking_content: thinkingContent || void 0,
1458
1658
  tool_calls: toolCalls.length > 0 ? toolCalls : void 0,
1459
1659
  usage,
1460
- cost
1660
+ cost,
1661
+ providerMetadata
1461
1662
  };
1462
1663
  }
1463
1664
  // ---------------------------------------------------------------------------
@@ -1468,6 +1669,7 @@ var GeminiProvider = class {
1468
1669
  const decoder = new TextDecoder();
1469
1670
  let buffer = "";
1470
1671
  let usage;
1672
+ const accumulatedParts = [];
1471
1673
  try {
1472
1674
  while (true) {
1473
1675
  const { done, value } = await reader.read();
@@ -1488,17 +1690,22 @@ var GeminiProvider = class {
1488
1690
  }
1489
1691
  if (chunk.usageMetadata) {
1490
1692
  const cached = chunk.usageMetadata.cachedContentTokenCount;
1693
+ const reasoning = chunk.usageMetadata.thoughtsTokenCount;
1491
1694
  usage = {
1492
1695
  prompt_tokens: chunk.usageMetadata.promptTokenCount ?? 0,
1493
1696
  completion_tokens: chunk.usageMetadata.candidatesTokenCount ?? 0,
1494
1697
  total_tokens: chunk.usageMetadata.totalTokenCount ?? 0,
1495
- cached_tokens: cached && cached > 0 ? cached : void 0
1698
+ cached_tokens: cached && cached > 0 ? cached : void 0,
1699
+ reasoning_tokens: reasoning && reasoning > 0 ? reasoning : void 0
1496
1700
  };
1497
1701
  }
1498
1702
  const candidate = chunk.candidates?.[0];
1499
1703
  if (candidate?.content?.parts) {
1500
1704
  for (const part of candidate.content.parts) {
1501
- if (part.text) {
1705
+ accumulatedParts.push(part);
1706
+ if (part.thought && part.text) {
1707
+ yield { type: "thinking_delta", content: part.text };
1708
+ } else if (part.text) {
1502
1709
  yield { type: "text_delta", content: part.text };
1503
1710
  } else if (part.functionCall) {
1504
1711
  yield {
@@ -1512,7 +1719,8 @@ var GeminiProvider = class {
1512
1719
  }
1513
1720
  }
1514
1721
  }
1515
- yield { type: "done", usage };
1722
+ const providerMetadata = accumulatedParts.length > 0 ? { geminiParts: accumulatedParts } : void 0;
1723
+ yield { type: "done", usage, providerMetadata };
1516
1724
  } finally {
1517
1725
  reader.releaseLock();
1518
1726
  }
@@ -1885,7 +2093,7 @@ function estimateMessagesTokens(messages) {
1885
2093
  }
1886
2094
  return total;
1887
2095
  }
1888
- var WorkflowContext = class {
2096
+ var WorkflowContext = class _WorkflowContext {
1889
2097
  input;
1890
2098
  executionId;
1891
2099
  metadata;
@@ -1938,6 +2146,37 @@ var WorkflowContext = class {
1938
2146
  this.summaryCache = init.metadata.summaryCache;
1939
2147
  }
1940
2148
  }
2149
+ /**
2150
+ * Create a child context for nested agent invocations (e.g., agent-as-tool).
2151
+ * Shares: budget tracking, abort signals, trace emission, provider registry,
2152
+ * state store, span manager, memory manager, MCP manager, config,
2153
+ * awaitHuman handler, pending decisions, tool overrides.
2154
+ * Isolates: session history, step counter, streaming callbacks (onToken, onAgentStart, onToolCall).
2155
+ */
2156
+ createChildContext() {
2157
+ return new _WorkflowContext({
2158
+ input: this.input,
2159
+ executionId: this.executionId,
2160
+ config: this.config,
2161
+ providerRegistry: this.providerRegistry,
2162
+ metadata: { ...this.metadata },
2163
+ // Shared infrastructure
2164
+ budgetContext: this.budgetContext,
2165
+ stateStore: this.stateStore,
2166
+ mcpManager: this.mcpManager,
2167
+ spanManager: this.spanManager,
2168
+ memoryManager: this.memoryManager,
2169
+ onTrace: this.onTrace,
2170
+ onAgentCallComplete: this.onAgentCallComplete,
2171
+ awaitHumanHandler: this.awaitHumanHandler,
2172
+ pendingDecisions: this.pendingDecisions,
2173
+ toolOverrides: this.toolOverrides,
2174
+ signal: this.signal,
2175
+ workflowName: this.workflowName
2176
+ // Isolated: sessionHistory (empty), stepCounter (0),
2177
+ // onToken (null), onAgentStart (null), onToolCall (null)
2178
+ });
2179
+ }
1941
2180
  /**
1942
2181
  * Resolve the current abort signal.
1943
2182
  * Branch-scoped signals (from race/spawn/map/budget) in AsyncLocalStorage
@@ -2000,10 +2239,12 @@ var WorkflowContext = class {
2000
2239
  promptVersion: agent2._config.version,
2001
2240
  temperature: options?.temperature ?? agent2._config.temperature,
2002
2241
  maxTokens: options?.maxTokens ?? agent2._config.maxTokens ?? 4096,
2003
- thinking: options?.thinking ?? agent2._config.thinking,
2004
- reasoningEffort: options?.reasoningEffort ?? agent2._config.reasoningEffort,
2242
+ effort: options?.effort ?? agent2._config.effort,
2243
+ thinkingBudget: options?.thinkingBudget ?? agent2._config.thinkingBudget,
2244
+ includeThoughts: options?.includeThoughts ?? agent2._config.includeThoughts,
2005
2245
  toolChoice: options?.toolChoice ?? agent2._config.toolChoice,
2006
- stop: options?.stop ?? agent2._config.stop
2246
+ stop: options?.stop ?? agent2._config.stop,
2247
+ providerOptions: options?.providerOptions ?? agent2._config.providerOptions
2007
2248
  });
2008
2249
  return result;
2009
2250
  });
@@ -2026,7 +2267,21 @@ var WorkflowContext = class {
2026
2267
  const modelUri = agent2.resolveModel(resolveCtx);
2027
2268
  const systemPrompt = agent2.resolveSystem(resolveCtx);
2028
2269
  const { provider, model } = this.providerRegistry.resolve(modelUri, this.config);
2029
- const toolDefs = this.buildToolDefs(agent2);
2270
+ let resolvedHandoffs;
2271
+ if (typeof agent2._config.handoffs === "function") {
2272
+ try {
2273
+ resolvedHandoffs = agent2._config.handoffs(resolveCtx);
2274
+ } catch (err) {
2275
+ this.log("handoff_resolve_error", {
2276
+ agent: agent2._name,
2277
+ error: err instanceof Error ? err.message : String(err)
2278
+ });
2279
+ resolvedHandoffs = void 0;
2280
+ }
2281
+ } else {
2282
+ resolvedHandoffs = agent2._config.handoffs;
2283
+ }
2284
+ const toolDefs = this.buildToolDefs(agent2, resolvedHandoffs);
2030
2285
  const messages = [];
2031
2286
  if (systemPrompt) {
2032
2287
  messages.push({ role: "system", content: systemPrompt });
@@ -2128,21 +2383,17 @@ Please fix and try again.`;
2128
2383
  throw new TimeoutError("ctx.ask()", timeoutMs);
2129
2384
  }
2130
2385
  turns++;
2131
- const thinking = options?.thinking ?? agent2._config.thinking;
2132
- if (thinking && typeof thinking === "object" && thinking.budgetTokens <= 0) {
2133
- throw new Error(
2134
- `thinking.budgetTokens must be a positive number, got ${thinking.budgetTokens}`
2135
- );
2136
- }
2137
2386
  const chatOptions = {
2138
2387
  model,
2139
2388
  temperature: options?.temperature ?? agent2._config.temperature,
2140
2389
  tools: toolDefs.length > 0 ? toolDefs : void 0,
2141
2390
  maxTokens: options?.maxTokens ?? agent2._config.maxTokens ?? 4096,
2142
- thinking,
2143
- reasoningEffort: options?.reasoningEffort ?? agent2._config.reasoningEffort,
2391
+ effort: options?.effort ?? agent2._config.effort,
2392
+ thinkingBudget: options?.thinkingBudget ?? agent2._config.thinkingBudget,
2393
+ includeThoughts: options?.includeThoughts ?? agent2._config.includeThoughts,
2144
2394
  toolChoice: options?.toolChoice ?? agent2._config.toolChoice,
2145
2395
  stop: options?.stop ?? agent2._config.stop,
2396
+ providerOptions: options?.providerOptions ?? agent2._config.providerOptions,
2146
2397
  signal: this.currentSignal
2147
2398
  };
2148
2399
  if (options?.schema && toolDefs.length === 0) {
@@ -2154,10 +2405,14 @@ Please fix and try again.`;
2154
2405
  let content2 = "";
2155
2406
  const toolCalls = [];
2156
2407
  const toolCallBuffers = /* @__PURE__ */ new Map();
2408
+ let streamProviderMetadata;
2409
+ let thinkingContent = "";
2157
2410
  for await (const chunk of provider.stream(currentMessages, chatOptions)) {
2158
2411
  if (chunk.type === "text_delta") {
2159
2412
  content2 += chunk.content;
2160
2413
  this.onToken(chunk.content);
2414
+ } else if (chunk.type === "thinking_delta") {
2415
+ thinkingContent += chunk.content;
2161
2416
  } else if (chunk.type === "tool_call_delta") {
2162
2417
  let buffer = toolCallBuffers.get(chunk.id);
2163
2418
  if (!buffer) {
@@ -2167,6 +2422,7 @@ Please fix and try again.`;
2167
2422
  if (chunk.name) buffer.name = chunk.name;
2168
2423
  if (chunk.arguments) buffer.arguments += chunk.arguments;
2169
2424
  } else if (chunk.type === "done") {
2425
+ streamProviderMetadata = chunk.providerMetadata;
2170
2426
  if (chunk.usage) {
2171
2427
  response = {
2172
2428
  content: content2,
@@ -2193,6 +2449,12 @@ Please fix and try again.`;
2193
2449
  if (toolCalls.length > 0) {
2194
2450
  response.tool_calls = toolCalls;
2195
2451
  }
2452
+ if (streamProviderMetadata) {
2453
+ response.providerMetadata = streamProviderMetadata;
2454
+ }
2455
+ if (thinkingContent) {
2456
+ response.thinking_content = thinkingContent;
2457
+ }
2196
2458
  } else {
2197
2459
  response = await provider.chat(currentMessages, chatOptions);
2198
2460
  }
@@ -2223,13 +2485,14 @@ Please fix and try again.`;
2223
2485
  currentMessages.push({
2224
2486
  role: "assistant",
2225
2487
  content: response.content || "",
2226
- tool_calls: response.tool_calls
2488
+ tool_calls: response.tool_calls,
2489
+ ...response.providerMetadata ? { providerMetadata: response.providerMetadata } : {}
2227
2490
  });
2228
2491
  for (const toolCall of response.tool_calls) {
2229
2492
  const toolName = toolCall.function.name;
2230
2493
  if (toolName.startsWith("handoff_to_")) {
2231
2494
  const targetName = toolName.replace("handoff_to_", "");
2232
- const descriptor = agent2._config.handoffs?.find((h) => h.agent._name === targetName);
2495
+ const descriptor = resolvedHandoffs?.find((h) => h.agent._name === targetName);
2233
2496
  if (descriptor) {
2234
2497
  const mode = descriptor.mode ?? "oneway";
2235
2498
  let handoffPrompt = prompt;
@@ -2482,8 +2745,9 @@ Please fix and try again.`;
2482
2745
  resultContent2 = JSON.stringify(toolResult2);
2483
2746
  }
2484
2747
  } else if (tool2) {
2748
+ const childCtx = this.createChildContext();
2485
2749
  try {
2486
- toolResult2 = await tool2._execute(toolArgs);
2750
+ toolResult2 = await tool2._execute(toolArgs, childCtx);
2487
2751
  } catch (err) {
2488
2752
  toolResult2 = { error: err instanceof Error ? err.message : String(err) };
2489
2753
  }
@@ -2563,7 +2827,8 @@ Please fix and try again.`;
2563
2827
  guardrailOutputRetries++;
2564
2828
  currentMessages.push({
2565
2829
  role: "assistant",
2566
- content
2830
+ content,
2831
+ ...response.providerMetadata ? { providerMetadata: response.providerMetadata } : {}
2567
2832
  });
2568
2833
  currentMessages.push({
2569
2834
  role: "system",
@@ -2584,6 +2849,7 @@ Please fix and try again.`;
2584
2849
  try {
2585
2850
  const parsed = JSON.parse(stripMarkdownFences(content));
2586
2851
  const validated = options.schema.parse(parsed);
2852
+ this.pushAssistantToSessionHistory(content, response.providerMetadata);
2587
2853
  return validated;
2588
2854
  } catch (err) {
2589
2855
  const maxRetries = options.retries ?? 3;
@@ -2610,11 +2876,23 @@ Please fix and try again.`;
2610
2876
  throw new VerifyError(content, zodErr, maxRetries);
2611
2877
  }
2612
2878
  }
2879
+ this.pushAssistantToSessionHistory(content, response.providerMetadata);
2613
2880
  return content;
2614
2881
  }
2615
2882
  throw new MaxTurnsError("ctx.ask()", maxTurns);
2616
2883
  }
2617
- buildToolDefs(agent2) {
2884
+ /**
2885
+ * Push the final assistant message into session history, preserving providerMetadata
2886
+ * (e.g., Gemini thought signatures needed for multi-turn reasoning context).
2887
+ */
2888
+ pushAssistantToSessionHistory(content, providerMetadata) {
2889
+ this.sessionHistory.push({
2890
+ role: "assistant",
2891
+ content,
2892
+ ...providerMetadata ? { providerMetadata } : {}
2893
+ });
2894
+ }
2895
+ buildToolDefs(agent2, resolvedHandoffs) {
2618
2896
  const defs = [];
2619
2897
  if (agent2._config.tools) {
2620
2898
  for (const tool2 of agent2._config.tools) {
@@ -2628,8 +2906,8 @@ Please fix and try again.`;
2628
2906
  });
2629
2907
  }
2630
2908
  }
2631
- if (agent2._config.handoffs) {
2632
- for (const { agent: handoffAgent, description, mode } of agent2._config.handoffs) {
2909
+ if (resolvedHandoffs) {
2910
+ for (const { agent: handoffAgent, description, mode } of resolvedHandoffs) {
2633
2911
  const isRoundtrip = mode === "roundtrip";
2634
2912
  const defaultDesc = isRoundtrip ? `Delegate a task to ${handoffAgent._name} and receive the result back` : `Hand off the conversation to ${handoffAgent._name}`;
2635
2913
  defs.push({
@@ -3318,6 +3596,79 @@ ${summaryResponse.content}`
3318
3596
  const sessionId = this.metadata?.sessionId;
3319
3597
  await this.memoryManager.forget(key, this.stateStore, sessionId, options);
3320
3598
  }
3599
+ // ── ctx.delegate() ──────────────────────────────────────────────────
3600
+ /**
3601
+ * Select the best agent from a list of candidates and invoke it.
3602
+ * Creates a temporary router agent that uses handoffs to pick the right specialist.
3603
+ *
3604
+ * This is convenience sugar over creating a router agent with dynamic handoffs.
3605
+ * For full control over the router's behavior, create the router agent explicitly.
3606
+ *
3607
+ * @param agents - Candidate agents to choose from (at least 1)
3608
+ * @param prompt - The prompt to send to the selected agent
3609
+ * @param options - Optional: schema, routerModel, metadata, retries
3610
+ */
3611
+ async delegate(agents, prompt, options) {
3612
+ if (agents.length === 0) {
3613
+ throw new Error("ctx.delegate() requires at least one candidate agent");
3614
+ }
3615
+ const names = /* @__PURE__ */ new Set();
3616
+ for (const a of agents) {
3617
+ if (names.has(a._name)) {
3618
+ throw new Error(
3619
+ `ctx.delegate() received duplicate agent name '${a._name}'. All candidate agents must have unique names.`
3620
+ );
3621
+ }
3622
+ names.add(a._name);
3623
+ }
3624
+ if (agents.length === 1) {
3625
+ return this.ask(agents[0], prompt, {
3626
+ schema: options?.schema,
3627
+ retries: options?.retries,
3628
+ metadata: options?.metadata
3629
+ });
3630
+ }
3631
+ const resolveCtx = options?.metadata ? { metadata: { ...this.metadata, ...options.metadata } } : { metadata: this.metadata };
3632
+ const routerModelUri = options?.routerModel ?? agents[0].resolveModel(resolveCtx);
3633
+ const handoffs = agents.map((a) => {
3634
+ let description;
3635
+ try {
3636
+ description = a.resolveSystem(resolveCtx).slice(0, 200);
3637
+ } catch {
3638
+ description = `Agent: ${a._name}`;
3639
+ }
3640
+ return { agent: a, description };
3641
+ });
3642
+ const routerSystem = "Route to the best agent for this task. Always hand off; never answer directly.";
3643
+ const routerAgent = {
3644
+ _config: {
3645
+ model: routerModelUri,
3646
+ system: routerSystem,
3647
+ temperature: 0,
3648
+ handoffs,
3649
+ maxTurns: 2
3650
+ },
3651
+ _name: "_delegate_router",
3652
+ ask: async () => {
3653
+ throw new Error("Direct invocation not supported on delegate router");
3654
+ },
3655
+ resolveModel: () => routerModelUri,
3656
+ resolveSystem: () => routerSystem
3657
+ };
3658
+ this.emitTrace({
3659
+ type: "delegate",
3660
+ agent: "_delegate_router",
3661
+ data: {
3662
+ candidates: agents.map((a) => a._name),
3663
+ routerModel: routerModelUri
3664
+ }
3665
+ });
3666
+ return this.ask(routerAgent, prompt, {
3667
+ schema: options?.schema,
3668
+ retries: options?.retries,
3669
+ metadata: options?.metadata
3670
+ });
3671
+ }
3321
3672
  // ── Private ───────────────────────────────────────────────────────────
3322
3673
  emitTrace(partial) {
3323
3674
  let data = partial.data;
@@ -3927,11 +4278,13 @@ var Session = class _Session {
3927
4278
  ...cachedSummary ? { summaryCache: cachedSummary } : {}
3928
4279
  }
3929
4280
  });
3930
- const assistantMessage = {
3931
- role: "assistant",
3932
- content: typeof result === "string" ? result : JSON.stringify(result)
3933
- };
3934
- history.push(assistantMessage);
4281
+ const lastMsg = history[history.length - 1];
4282
+ if (!(lastMsg && lastMsg.role === "assistant")) {
4283
+ history.push({
4284
+ role: "assistant",
4285
+ content: typeof result === "string" ? result : JSON.stringify(result)
4286
+ });
4287
+ }
3935
4288
  if (this.options.persist !== false) {
3936
4289
  await this.store.saveSession(this.sessionId, history);
3937
4290
  }
@@ -3974,10 +4327,13 @@ var Session = class _Session {
3974
4327
  }
3975
4328
  });
3976
4329
  const updateHistory = async (result) => {
3977
- history.push({
3978
- role: "assistant",
3979
- content: typeof result === "string" ? result : JSON.stringify(result)
3980
- });
4330
+ const lastMsg = history[history.length - 1];
4331
+ if (!(lastMsg && lastMsg.role === "assistant")) {
4332
+ history.push({
4333
+ role: "assistant",
4334
+ content: typeof result === "string" ? result : JSON.stringify(result)
4335
+ });
4336
+ }
3981
4337
  if (this.options.persist !== false) {
3982
4338
  await this.store.saveSession(this.sessionId, history);
3983
4339
  }
@@ -4866,6 +5222,24 @@ var AxlRuntime = class extends EventEmitter2 {
4866
5222
  getExecutions() {
4867
5223
  return [...this.executions.values()];
4868
5224
  }
5225
+ /**
5226
+ * Create a lightweight WorkflowContext for ad-hoc use (tool testing, prototyping).
5227
+ * The context has access to the runtime's providers, state store, and MCP manager
5228
+ * but no session history, streaming callbacks, or budget tracking.
5229
+ */
5230
+ createContext(options) {
5231
+ return new WorkflowContext({
5232
+ input: void 0,
5233
+ executionId: randomUUID2(),
5234
+ metadata: options?.metadata,
5235
+ config: this.config,
5236
+ providerRegistry: this.providerRegistry,
5237
+ stateStore: this.stateStore,
5238
+ mcpManager: this.mcpManager,
5239
+ spanManager: this.spanManager,
5240
+ memoryManager: this.memoryManager
5241
+ });
5242
+ }
4869
5243
  /** Register a custom provider instance. */
4870
5244
  registerProvider(name, provider) {
4871
5245
  this.providerRegistry.registerInstance(name, provider);
@@ -5563,6 +5937,7 @@ export {
5563
5937
  agent,
5564
5938
  createSpanManager,
5565
5939
  defineConfig,
5940
+ resolveThinkingOptions,
5566
5941
  tool,
5567
5942
  workflow,
5568
5943
  zodToJsonSchema