@axlsdk/axl 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -140,6 +140,7 @@ __export(index_exports, {
140
140
  agent: () => agent,
141
141
  createSpanManager: () => createSpanManager,
142
142
  defineConfig: () => defineConfig,
143
+ resolveThinkingOptions: () => resolveThinkingOptions,
143
144
  tool: () => tool,
144
145
  workflow: () => workflow,
145
146
  zodToJsonSchema: () => zodToJsonSchema
@@ -185,7 +186,7 @@ function tool(config) {
185
186
  on: config.retry?.on
186
187
  };
187
188
  const maxStringLen = config.maxStringLength ?? DEFAULT_MAX_STRING_LENGTH;
188
- const execute = async (input) => {
189
+ const execute = async (input, ctx) => {
189
190
  const parsed = config.input.parse(input);
190
191
  if (maxStringLen > 0) {
191
192
  validateStringLengths(parsed, maxStringLen);
@@ -194,7 +195,7 @@ function tool(config) {
194
195
  let lastError;
195
196
  for (let attempt = 1; attempt <= maxAttempts; attempt++) {
196
197
  try {
197
- return await config.handler(parsed);
198
+ return await config.handler(parsed, ctx);
198
199
  } catch (err) {
199
200
  lastError = err instanceof Error ? err : new Error(String(err));
200
201
  if (attempt === maxAttempts) break;
@@ -224,7 +225,7 @@ function tool(config) {
224
225
  if (config.hooks?.before) {
225
226
  processedInput = await config.hooks.before(processedInput, ctx);
226
227
  }
227
- let result = await execute(processedInput);
228
+ let result = await execute(processedInput, ctx);
228
229
  if (config.hooks?.after) {
229
230
  result = await config.hooks.after(result, ctx);
230
231
  }
@@ -246,6 +247,25 @@ function tool(config) {
246
247
  };
247
248
  }
248
249
 
250
+ // src/providers/types.ts
251
+ function resolveThinkingOptions(options) {
252
+ if (options.thinkingBudget !== void 0 && options.thinkingBudget < 0) {
253
+ throw new Error(`thinkingBudget must be non-negative, got ${options.thinkingBudget}`);
254
+ }
255
+ const effort = options.effort;
256
+ const thinkingBudget = options.thinkingBudget;
257
+ const hasBudgetOverride = thinkingBudget !== void 0 && thinkingBudget > 0;
258
+ return {
259
+ effort,
260
+ thinkingBudget,
261
+ includeThoughts: options.includeThoughts ?? false,
262
+ // Budget override wins: effort: 'none' + thinkingBudget: 5000 → thinking enabled
263
+ thinkingDisabled: (effort === "none" || thinkingBudget === 0) && !hasBudgetOverride,
264
+ activeEffort: effort && effort !== "none" ? effort : void 0,
265
+ hasBudgetOverride
266
+ };
267
+ }
268
+
249
269
  // src/providers/retry.ts
250
270
  var RETRYABLE_STATUS_CODES = /* @__PURE__ */ new Set([429, 503, 529]);
251
271
  var MAX_RETRIES = 2;
@@ -300,6 +320,9 @@ var OPENAI_PRICING = {
300
320
  "gpt-5-nano": [5e-8, 4e-7],
301
321
  "gpt-5.1": [125e-8, 1e-5],
302
322
  "gpt-5.2": [175e-8, 14e-6],
323
+ "gpt-5.3": [175e-8, 14e-6],
324
+ "gpt-5.4": [25e-7, 15e-6],
325
+ "gpt-5.4-pro": [3e-5, 18e-5],
303
326
  o1: [15e-6, 6e-5],
304
327
  "o1-mini": [3e-6, 12e-6],
305
328
  "o1-pro": [15e-5, 6e-4],
@@ -328,9 +351,32 @@ function estimateOpenAICost(model, promptTokens, completionTokens, cachedTokens)
328
351
  const inputCost = (promptTokens - cached) * inputRate + cached * inputRate * 0.5;
329
352
  return inputCost + completionTokens * outputRate;
330
353
  }
331
- function isReasoningModel(model) {
354
+ function isOSeriesModel(model) {
332
355
  return /^(o1|o3|o4-mini)/.test(model);
333
356
  }
357
+ function supportsReasoningEffort(model) {
358
+ return isOSeriesModel(model) || /^gpt-5/.test(model);
359
+ }
360
+ function supportsReasoningNone(model) {
361
+ return /^gpt-5\.[1-9]/.test(model);
362
+ }
363
+ function supportsXhigh(model) {
364
+ return /^gpt-5\.([2-9]|\d{2,})/.test(model);
365
+ }
366
+ function clampReasoningEffort(model, effort) {
367
+ if (model.startsWith("gpt-5-pro")) return "high";
368
+ if (effort === "none" && !supportsReasoningNone(model)) return "minimal";
369
+ if (effort === "xhigh" && !supportsXhigh(model)) return "high";
370
+ return effort;
371
+ }
372
+ function effortToReasoningEffort(effort) {
373
+ return effort === "max" ? "xhigh" : effort;
374
+ }
375
+ function budgetToReasoningEffort(budget) {
376
+ if (budget <= 1024) return "low";
377
+ if (budget <= 8192) return "medium";
378
+ return "high";
379
+ }
334
380
  var OpenAIProvider = class {
335
381
  name = "openai";
336
382
  baseUrl;
@@ -418,13 +464,26 @@ var OpenAIProvider = class {
418
464
  // Internal helpers
419
465
  // ---------------------------------------------------------------------------
420
466
  buildRequestBody(messages, options, stream) {
421
- const reasoning = isReasoningModel(options.model);
467
+ const oSeries = isOSeriesModel(options.model);
468
+ const reasoningCapable = supportsReasoningEffort(options.model);
469
+ const { thinkingBudget, thinkingDisabled, activeEffort, hasBudgetOverride } = resolveThinkingOptions(options);
470
+ let wireEffort;
471
+ if (reasoningCapable) {
472
+ if (hasBudgetOverride) {
473
+ wireEffort = clampReasoningEffort(options.model, budgetToReasoningEffort(thinkingBudget));
474
+ } else if (!thinkingDisabled && activeEffort) {
475
+ wireEffort = clampReasoningEffort(options.model, effortToReasoningEffort(activeEffort));
476
+ } else if (thinkingDisabled) {
477
+ wireEffort = clampReasoningEffort(options.model, "none");
478
+ }
479
+ }
480
+ const stripTemp = oSeries || reasoningCapable && wireEffort !== void 0;
422
481
  const body = {
423
482
  model: options.model,
424
- messages: messages.map((m) => this.formatMessage(m, reasoning)),
483
+ messages: messages.map((m) => this.formatMessage(m, oSeries)),
425
484
  stream
426
485
  };
427
- if (options.temperature !== void 0 && !reasoning) {
486
+ if (options.temperature !== void 0 && !stripTemp) {
428
487
  body.temperature = options.temperature;
429
488
  }
430
489
  if (options.maxTokens !== void 0) {
@@ -433,7 +492,9 @@ var OpenAIProvider = class {
433
492
  if (options.stop) body.stop = options.stop;
434
493
  if (options.tools && options.tools.length > 0) {
435
494
  body.tools = options.tools;
436
- body.parallel_tool_calls = true;
495
+ if (!oSeries) {
496
+ body.parallel_tool_calls = true;
497
+ }
437
498
  }
438
499
  if (options.toolChoice !== void 0) {
439
500
  body.tool_choice = options.toolChoice;
@@ -441,12 +502,13 @@ var OpenAIProvider = class {
441
502
  if (options.responseFormat) {
442
503
  body.response_format = options.responseFormat;
443
504
  }
444
- if (options.reasoningEffort) {
445
- body.reasoning_effort = options.reasoningEffort;
446
- }
505
+ if (wireEffort) body.reasoning_effort = wireEffort;
447
506
  if (stream) {
448
507
  body.stream_options = { include_usage: true };
449
508
  }
509
+ if (options.providerOptions) {
510
+ Object.assign(body, options.providerOptions);
511
+ }
450
512
  return body;
451
513
  }
452
514
  /** Extract a human-readable message from an API error response body. */
@@ -460,9 +522,9 @@ var OpenAIProvider = class {
460
522
  }
461
523
  return `OpenAI API error (${status}): ${body}`;
462
524
  }
463
- formatMessage(msg, reasoning) {
525
+ formatMessage(msg, oSeries) {
464
526
  const out = {
465
- role: msg.role === "system" && reasoning ? "developer" : msg.role,
527
+ role: msg.role === "system" && oSeries ? "developer" : msg.role,
466
528
  content: msg.content
467
529
  };
468
530
  if (msg.name) out.name = msg.name;
@@ -599,7 +661,20 @@ var OpenAIResponsesProvider = class {
599
661
  // Internal: build request body
600
662
  // ---------------------------------------------------------------------------
601
663
  buildRequestBody(messages, options, stream) {
602
- const reasoning = isReasoningModel(options.model);
664
+ const oSeries = isOSeriesModel(options.model);
665
+ const reasoningCapable = supportsReasoningEffort(options.model);
666
+ const { thinkingBudget, includeThoughts, thinkingDisabled, activeEffort, hasBudgetOverride } = resolveThinkingOptions(options);
667
+ let wireEffort;
668
+ if (reasoningCapable) {
669
+ if (hasBudgetOverride) {
670
+ wireEffort = clampReasoningEffort(options.model, budgetToReasoningEffort(thinkingBudget));
671
+ } else if (!thinkingDisabled && activeEffort) {
672
+ wireEffort = clampReasoningEffort(options.model, effortToReasoningEffort(activeEffort));
673
+ } else if (thinkingDisabled) {
674
+ wireEffort = clampReasoningEffort(options.model, "none");
675
+ }
676
+ }
677
+ const stripTemp = oSeries || reasoningCapable && wireEffort !== void 0;
603
678
  const systemMessages = messages.filter((m) => m.role === "system");
604
679
  const nonSystemMessages = messages.filter((m) => m.role !== "system");
605
680
  const body = {
@@ -614,7 +689,7 @@ var OpenAIResponsesProvider = class {
614
689
  if (options.maxTokens !== void 0) {
615
690
  body.max_output_tokens = options.maxTokens;
616
691
  }
617
- if (options.temperature !== void 0 && !reasoning) {
692
+ if (options.temperature !== void 0 && !stripTemp) {
618
693
  body.temperature = options.temperature;
619
694
  }
620
695
  if (options.tools && options.tools.length > 0) {
@@ -633,12 +708,21 @@ var OpenAIResponsesProvider = class {
633
708
  body.tool_choice = options.toolChoice;
634
709
  }
635
710
  }
636
- if (options.reasoningEffort) {
637
- body.reasoning = { effort: options.reasoningEffort };
711
+ if (reasoningCapable && (wireEffort !== void 0 || includeThoughts)) {
712
+ const reasoning = {};
713
+ if (wireEffort !== void 0) reasoning.effort = wireEffort;
714
+ if (includeThoughts) reasoning.summary = "detailed";
715
+ if (Object.keys(reasoning).length > 0) body.reasoning = reasoning;
716
+ }
717
+ if (reasoningCapable) {
718
+ body.include = ["reasoning.encrypted_content"];
638
719
  }
639
720
  if (options.responseFormat) {
640
721
  body.text = { format: this.mapResponseFormat(options.responseFormat) };
641
722
  }
723
+ if (options.providerOptions) {
724
+ Object.assign(body, options.providerOptions);
725
+ }
642
726
  return body;
643
727
  }
644
728
  // ---------------------------------------------------------------------------
@@ -654,6 +738,12 @@ var OpenAIResponsesProvider = class {
654
738
  output: msg.content
655
739
  });
656
740
  } else if (msg.role === "assistant" && msg.tool_calls && msg.tool_calls.length > 0) {
741
+ const reasoningItems = msg.providerMetadata?.openaiReasoningItems;
742
+ if (reasoningItems) {
743
+ for (const item of reasoningItems) {
744
+ input.push(item);
745
+ }
746
+ }
657
747
  if (msg.content) {
658
748
  input.push({ type: "message", role: "assistant", content: msg.content });
659
749
  }
@@ -666,6 +756,12 @@ var OpenAIResponsesProvider = class {
666
756
  });
667
757
  }
668
758
  } else if (msg.role === "user" || msg.role === "assistant") {
759
+ if (msg.role === "assistant" && msg.providerMetadata?.openaiReasoningItems) {
760
+ const reasoningItems = msg.providerMetadata.openaiReasoningItems;
761
+ for (const item of reasoningItems) {
762
+ input.push(item);
763
+ }
764
+ }
669
765
  input.push({
670
766
  type: "message",
671
767
  role: msg.role,
@@ -698,7 +794,9 @@ var OpenAIResponsesProvider = class {
698
794
  // ---------------------------------------------------------------------------
699
795
  parseResponse(json, model) {
700
796
  let content = "";
797
+ let thinkingContent = "";
701
798
  const toolCalls = [];
799
+ const reasoningItems = [];
702
800
  for (const item of json.output) {
703
801
  if (item.type === "message") {
704
802
  for (const part of item.content ?? []) {
@@ -715,6 +813,15 @@ var OpenAIResponsesProvider = class {
715
813
  arguments: item.arguments
716
814
  }
717
815
  });
816
+ } else if (item.type === "reasoning") {
817
+ reasoningItems.push(item);
818
+ if (item.summary) {
819
+ for (const s of item.summary) {
820
+ if (s.type === "summary_text" && s.text) {
821
+ thinkingContent += s.text;
822
+ }
823
+ }
824
+ }
718
825
  }
719
826
  }
720
827
  const usage = json.usage ? {
@@ -725,11 +832,14 @@ var OpenAIResponsesProvider = class {
725
832
  cached_tokens: json.usage.input_tokens_details?.cached_tokens
726
833
  } : void 0;
727
834
  const cost = usage ? estimateOpenAICost(model, usage.prompt_tokens, usage.completion_tokens, usage.cached_tokens) : void 0;
835
+ const providerMetadata = reasoningItems.length > 0 ? { openaiReasoningItems: reasoningItems } : void 0;
728
836
  return {
729
837
  content,
838
+ thinking_content: thinkingContent || void 0,
730
839
  tool_calls: toolCalls.length > 0 ? toolCalls : void 0,
731
840
  usage,
732
- cost
841
+ cost,
842
+ providerMetadata
733
843
  };
734
844
  }
735
845
  // ---------------------------------------------------------------------------
@@ -781,6 +891,8 @@ var OpenAIResponsesProvider = class {
781
891
  switch (eventType) {
782
892
  case "response.output_text.delta":
783
893
  return { type: "text_delta", content: data.delta ?? "" };
894
+ case "response.reasoning_summary_text.delta":
895
+ return { type: "thinking_delta", content: data.delta ?? "" };
784
896
  case "response.output_item.added":
785
897
  if (data.item?.type === "function_call") {
786
898
  const callId = data.item.call_id ?? data.item.id ?? "";
@@ -811,7 +923,9 @@ var OpenAIResponsesProvider = class {
811
923
  reasoning_tokens: response.usage.output_tokens_details?.reasoning_tokens,
812
924
  cached_tokens: response.usage.input_tokens_details?.cached_tokens
813
925
  } : void 0;
814
- return { type: "done", usage };
926
+ const reasoningItems = response?.output?.filter((item) => item.type === "reasoning") ?? [];
927
+ const providerMetadata = reasoningItems.length > 0 ? { openaiReasoningItems: reasoningItems } : void 0;
928
+ return { type: "done", usage, providerMetadata };
815
929
  }
816
930
  case "response.failed": {
817
931
  const errorMsg = data.response?.error?.message ?? data.response?.status_details?.error?.message ?? "Unknown error";
@@ -839,9 +953,12 @@ var OpenAIResponsesProvider = class {
839
953
  // src/providers/anthropic.ts
840
954
  var ANTHROPIC_API_VERSION = "2023-06-01";
841
955
  var ANTHROPIC_PRICING = {
842
- "claude-opus-4-6": [15e-6, 75e-6],
956
+ "claude-opus-4-6": [5e-6, 25e-6],
957
+ "claude-sonnet-4-6": [3e-6, 15e-6],
958
+ "claude-opus-4-5": [5e-6, 25e-6],
959
+ "claude-opus-4-1": [15e-6, 75e-6],
843
960
  "claude-sonnet-4-5": [3e-6, 15e-6],
844
- "claude-haiku-4-5": [8e-7, 4e-6],
961
+ "claude-haiku-4-5": [1e-6, 5e-6],
845
962
  "claude-sonnet-4": [3e-6, 15e-6],
846
963
  "claude-opus-4": [15e-6, 75e-6],
847
964
  "claude-3-7-sonnet": [3e-6, 15e-6],
@@ -851,12 +968,15 @@ var ANTHROPIC_PRICING = {
851
968
  "claude-3-sonnet": [3e-6, 15e-6],
852
969
  "claude-3-haiku": [25e-8, 125e-8]
853
970
  };
971
+ var ANTHROPIC_PRICING_KEYS_BY_LENGTH = Object.keys(ANTHROPIC_PRICING).sort(
972
+ (a, b) => b.length - a.length
973
+ );
854
974
  function estimateAnthropicCost(model, inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens) {
855
975
  let pricing = ANTHROPIC_PRICING[model];
856
976
  if (!pricing) {
857
- for (const [key, value] of Object.entries(ANTHROPIC_PRICING)) {
977
+ for (const key of ANTHROPIC_PRICING_KEYS_BY_LENGTH) {
858
978
  if (model.startsWith(key)) {
859
- pricing = value;
979
+ pricing = ANTHROPIC_PRICING[key];
860
980
  break;
861
981
  }
862
982
  }
@@ -868,6 +988,23 @@ function estimateAnthropicCost(model, inputTokens, outputTokens, cacheReadTokens
868
988
  const inputCost = (inputTokens - cacheRead - cacheWrite) * inputRate + cacheRead * inputRate * 0.1 + cacheWrite * inputRate * 1.25;
869
989
  return inputCost + outputTokens * outputRate;
870
990
  }
991
+ var THINKING_BUDGETS = {
992
+ low: 1024,
993
+ medium: 5e3,
994
+ high: 1e4,
995
+ // 30000 (not 32000) to stay under the 32K max_tokens limit on Opus 4/4.1.
996
+ // With auto-bump (+1024), max_tokens becomes 31024 which fits all models.
997
+ max: 3e4
998
+ };
999
+ function supportsAdaptiveThinking(model) {
1000
+ return model.startsWith("claude-opus-4-6") || model.startsWith("claude-sonnet-4-6");
1001
+ }
1002
+ function supportsMaxEffort(model) {
1003
+ return model.startsWith("claude-opus-4-6");
1004
+ }
1005
+ function supportsEffort(model) {
1006
+ return model.startsWith("claude-opus-4-6") || model.startsWith("claude-sonnet-4-6") || model.startsWith("claude-opus-4-5");
1007
+ }
871
1008
  var AnthropicProvider = class {
872
1009
  name = "anthropic";
873
1010
  baseUrl;
@@ -957,21 +1094,58 @@ var AnthropicProvider = class {
957
1094
  if (systemText) {
958
1095
  body.system = systemText;
959
1096
  }
960
- if (options.temperature !== void 0) {
961
- body.temperature = options.temperature;
962
- }
963
1097
  if (options.stop) {
964
1098
  body.stop_sequences = options.stop;
965
1099
  }
966
1100
  if (options.tools && options.tools.length > 0) {
967
1101
  body.tools = options.tools.map((t) => this.mapToolDefinition(t));
968
1102
  }
1103
+ if (options.toolChoice !== void 0) {
1104
+ body.tool_choice = this.mapToolChoice(options.toolChoice);
1105
+ }
1106
+ const { thinkingBudget, thinkingDisabled, activeEffort, hasBudgetOverride } = resolveThinkingOptions(options);
1107
+ let resolvedEffort = activeEffort;
1108
+ if (resolvedEffort === "max" && !supportsMaxEffort(options.model)) {
1109
+ resolvedEffort = "high";
1110
+ }
1111
+ if (hasBudgetOverride) {
1112
+ body.thinking = { type: "enabled", budget_tokens: thinkingBudget };
1113
+ const currentMax = body.max_tokens;
1114
+ if (currentMax < thinkingBudget + 1024) {
1115
+ body.max_tokens = thinkingBudget + 1024;
1116
+ }
1117
+ if (resolvedEffort && supportsEffort(options.model)) {
1118
+ body.output_config = { effort: resolvedEffort };
1119
+ }
1120
+ } else if (thinkingDisabled) {
1121
+ if (resolvedEffort && supportsEffort(options.model)) {
1122
+ body.output_config = { effort: resolvedEffort };
1123
+ }
1124
+ } else if (resolvedEffort && supportsAdaptiveThinking(options.model)) {
1125
+ body.thinking = { type: "adaptive" };
1126
+ body.output_config = { effort: resolvedEffort };
1127
+ } else if (resolvedEffort && supportsEffort(options.model)) {
1128
+ body.output_config = { effort: resolvedEffort };
1129
+ } else if (resolvedEffort) {
1130
+ const budget = THINKING_BUDGETS[resolvedEffort] ?? 5e3;
1131
+ body.thinking = { type: "enabled", budget_tokens: budget };
1132
+ const currentMax = body.max_tokens;
1133
+ if (currentMax < budget + 1024) {
1134
+ body.max_tokens = budget + 1024;
1135
+ }
1136
+ }
1137
+ if (options.temperature !== void 0 && !body.thinking) {
1138
+ body.temperature = options.temperature;
1139
+ }
969
1140
  if (options.responseFormat && options.responseFormat.type !== "text") {
970
1141
  const jsonInstruction = "You must respond with valid JSON only. No markdown fences, no extra text.";
971
1142
  body.system = body.system ? `${body.system}
972
1143
 
973
1144
  ${jsonInstruction}` : jsonInstruction;
974
1145
  }
1146
+ if (options.providerOptions) {
1147
+ Object.assign(body, options.providerOptions);
1148
+ }
975
1149
  return body;
976
1150
  }
977
1151
  /**
@@ -1061,14 +1235,33 @@ ${jsonInstruction}` : jsonInstruction;
1061
1235
  input_schema: tool2.function.parameters
1062
1236
  };
1063
1237
  }
1238
+ /**
1239
+ * Map Axl's ToolChoice to Anthropic's tool_choice format.
1240
+ *
1241
+ * Axl (OpenAI format) → Anthropic format
1242
+ * 'auto' → { type: 'auto' }
1243
+ * 'none' → { type: 'none' }
1244
+ * 'required' → { type: 'any' }
1245
+ * { type:'function', function: { name } } → { type: 'tool', name }
1246
+ */
1247
+ mapToolChoice(choice) {
1248
+ if (typeof choice === "string") {
1249
+ if (choice === "required") return { type: "any" };
1250
+ return { type: choice };
1251
+ }
1252
+ return { type: "tool", name: choice.function.name };
1253
+ }
1064
1254
  // ---------------------------------------------------------------------------
1065
1255
  // Internal: response parsing
1066
1256
  // ---------------------------------------------------------------------------
1067
1257
  parseResponse(json) {
1068
1258
  let content = "";
1259
+ let thinkingContent = "";
1069
1260
  const toolCalls = [];
1070
1261
  for (const block of json.content) {
1071
- if (block.type === "text") {
1262
+ if (block.type === "thinking") {
1263
+ thinkingContent += block.thinking;
1264
+ } else if (block.type === "text") {
1072
1265
  content += block.text;
1073
1266
  } else if (block.type === "tool_use") {
1074
1267
  toolCalls.push({
@@ -1099,6 +1292,7 @@ ${jsonInstruction}` : jsonInstruction;
1099
1292
  ) : void 0;
1100
1293
  return {
1101
1294
  content,
1295
+ thinking_content: thinkingContent || void 0,
1102
1296
  tool_calls: toolCalls.length > 0 ? toolCalls : void 0,
1103
1297
  usage,
1104
1298
  cost
@@ -1149,7 +1343,9 @@ ${jsonInstruction}` : jsonInstruction;
1149
1343
  }
1150
1344
  case "content_block_delta": {
1151
1345
  const delta = event.delta;
1152
- if (delta?.type === "text_delta" && delta.text) {
1346
+ if (delta?.type === "thinking_delta" && delta.thinking) {
1347
+ yield { type: "thinking_delta", content: delta.thinking };
1348
+ } else if (delta?.type === "text_delta" && delta.text) {
1153
1349
  yield { type: "text_delta", content: delta.text };
1154
1350
  } else if (delta?.type === "input_json_delta" && delta.partial_json) {
1155
1351
  yield {
@@ -1220,14 +1416,19 @@ var GEMINI_PRICING = {
1220
1416
  "gemini-2.0-flash": [1e-7, 4e-7],
1221
1417
  "gemini-2.0-flash-lite": [1e-7, 4e-7],
1222
1418
  "gemini-3-pro-preview": [2e-6, 12e-6],
1223
- "gemini-3-flash-preview": [5e-7, 3e-6]
1419
+ "gemini-3-flash-preview": [5e-7, 3e-6],
1420
+ "gemini-3.1-pro-preview": [2e-6, 12e-6],
1421
+ "gemini-3.1-flash-lite-preview": [25e-8, 15e-7]
1224
1422
  };
1423
+ var GEMINI_PRICING_KEYS_BY_LENGTH = Object.keys(GEMINI_PRICING).sort(
1424
+ (a, b) => b.length - a.length
1425
+ );
1225
1426
  function estimateGeminiCost(model, inputTokens, outputTokens, cachedTokens) {
1226
1427
  let pricing = GEMINI_PRICING[model];
1227
1428
  if (!pricing) {
1228
- for (const [key, value] of Object.entries(GEMINI_PRICING)) {
1429
+ for (const key of GEMINI_PRICING_KEYS_BY_LENGTH) {
1229
1430
  if (model.startsWith(key)) {
1230
- pricing = value;
1431
+ pricing = GEMINI_PRICING[key];
1231
1432
  break;
1232
1433
  }
1233
1434
  }
@@ -1238,6 +1439,39 @@ function estimateGeminiCost(model, inputTokens, outputTokens, cachedTokens) {
1238
1439
  const inputCost = (inputTokens - cached) * inputRate + cached * inputRate * 0.1;
1239
1440
  return inputCost + outputTokens * outputRate;
1240
1441
  }
1442
+ var THINKING_BUDGETS2 = {
1443
+ low: 1024,
1444
+ medium: 5e3,
1445
+ high: 1e4,
1446
+ max: 24576
1447
+ };
1448
+ var THINKING_LEVELS = {
1449
+ low: "low",
1450
+ medium: "medium",
1451
+ high: "high",
1452
+ max: "high"
1453
+ // 3.x caps at 'high'
1454
+ };
1455
+ function isGemini3x(model) {
1456
+ return /^gemini-3[.-]/.test(model);
1457
+ }
1458
+ function budgetToThinkingLevel(budgetTokens) {
1459
+ if (budgetTokens <= 1024) return "low";
1460
+ if (budgetTokens <= 5e3) return "medium";
1461
+ return "high";
1462
+ }
1463
+ function minThinkingLevel(model) {
1464
+ if (model.startsWith("gemini-3.1-pro")) return "low";
1465
+ return "minimal";
1466
+ }
1467
+ var _warned3xEffortNone = /* @__PURE__ */ new Set();
1468
+ function warnGemini3xEffortNone(model) {
1469
+ if (_warned3xEffortNone.has(model)) return;
1470
+ _warned3xEffortNone.add(model);
1471
+ console.warn(
1472
+ `[axl] effort: 'none' on Gemini 3.x (${model}) maps to the model's minimum thinking level ('${minThinkingLevel(model)}'), not fully disabled. Gemini 3.x models cannot disable thinking entirely.`
1473
+ );
1474
+ }
1241
1475
  var GeminiProvider = class {
1242
1476
  name = "google";
1243
1477
  baseUrl;
@@ -1351,6 +1585,58 @@ var GeminiProvider = class {
1351
1585
  if (Object.keys(generationConfig).length > 0) {
1352
1586
  body.generationConfig = generationConfig;
1353
1587
  }
1588
+ const {
1589
+ effort,
1590
+ thinkingBudget,
1591
+ includeThoughts,
1592
+ thinkingDisabled,
1593
+ activeEffort,
1594
+ hasBudgetOverride
1595
+ } = resolveThinkingOptions(options);
1596
+ if (thinkingDisabled) {
1597
+ if (isGemini3x(options.model)) {
1598
+ if (effort === "none") {
1599
+ warnGemini3xEffortNone(options.model);
1600
+ }
1601
+ generationConfig.thinkingConfig = { thinkingLevel: minThinkingLevel(options.model) };
1602
+ } else {
1603
+ generationConfig.thinkingConfig = { thinkingBudget: 0 };
1604
+ }
1605
+ if (!body.generationConfig) body.generationConfig = generationConfig;
1606
+ } else if (hasBudgetOverride) {
1607
+ const config = {};
1608
+ if (isGemini3x(options.model)) {
1609
+ config.thinkingLevel = budgetToThinkingLevel(thinkingBudget);
1610
+ } else {
1611
+ config.thinkingBudget = thinkingBudget;
1612
+ }
1613
+ if (includeThoughts) config.includeThoughts = true;
1614
+ generationConfig.thinkingConfig = config;
1615
+ if (!body.generationConfig) body.generationConfig = generationConfig;
1616
+ } else if (activeEffort) {
1617
+ const config = {};
1618
+ if (isGemini3x(options.model)) {
1619
+ config.thinkingLevel = THINKING_LEVELS[activeEffort] ?? "medium";
1620
+ } else {
1621
+ if (activeEffort === "max" && options.model.startsWith("gemini-2.5-pro")) {
1622
+ config.thinkingBudget = 32768;
1623
+ } else {
1624
+ config.thinkingBudget = THINKING_BUDGETS2[activeEffort] ?? 5e3;
1625
+ }
1626
+ }
1627
+ if (includeThoughts) config.includeThoughts = true;
1628
+ generationConfig.thinkingConfig = config;
1629
+ if (!body.generationConfig) body.generationConfig = generationConfig;
1630
+ } else if (includeThoughts) {
1631
+ generationConfig.thinkingConfig = { includeThoughts: true };
1632
+ if (!body.generationConfig) body.generationConfig = generationConfig;
1633
+ }
1634
+ if (options.toolChoice !== void 0) {
1635
+ body.toolConfig = { functionCallingConfig: this.mapToolChoice(options.toolChoice) };
1636
+ }
1637
+ if (options.providerOptions) {
1638
+ Object.assign(body, options.providerOptions);
1639
+ }
1354
1640
  return body;
1355
1641
  }
1356
1642
  /**
@@ -1376,28 +1662,33 @@ var GeminiProvider = class {
1376
1662
  const result = [];
1377
1663
  for (const msg of messages) {
1378
1664
  if (msg.role === "assistant") {
1379
- const parts = [];
1380
- if (msg.content) {
1381
- parts.push({ text: msg.content });
1382
- }
1383
- if (msg.tool_calls && msg.tool_calls.length > 0) {
1384
- for (const tc of msg.tool_calls) {
1385
- let parsedArgs;
1386
- try {
1387
- parsedArgs = JSON.parse(tc.function.arguments);
1388
- } catch {
1389
- parsedArgs = {};
1390
- }
1391
- parts.push({
1392
- functionCall: {
1393
- name: tc.function.name,
1394
- args: parsedArgs
1665
+ const rawParts = msg.providerMetadata?.geminiParts;
1666
+ if (rawParts && rawParts.length > 0) {
1667
+ result.push({ role: "model", parts: rawParts });
1668
+ } else {
1669
+ const parts = [];
1670
+ if (msg.content) {
1671
+ parts.push({ text: msg.content });
1672
+ }
1673
+ if (msg.tool_calls && msg.tool_calls.length > 0) {
1674
+ for (const tc of msg.tool_calls) {
1675
+ let parsedArgs;
1676
+ try {
1677
+ parsedArgs = JSON.parse(tc.function.arguments);
1678
+ } catch {
1679
+ parsedArgs = {};
1395
1680
  }
1396
- });
1681
+ parts.push({
1682
+ functionCall: {
1683
+ name: tc.function.name,
1684
+ args: parsedArgs
1685
+ }
1686
+ });
1687
+ }
1688
+ }
1689
+ if (parts.length > 0) {
1690
+ result.push({ role: "model", parts });
1397
1691
  }
1398
- }
1399
- if (parts.length > 0) {
1400
- result.push({ role: "model", parts });
1401
1692
  }
1402
1693
  } else if (msg.role === "tool") {
1403
1694
  const functionName = toolCallIdToName.get(msg.tool_call_id) ?? "unknown";
@@ -1442,6 +1733,25 @@ var GeminiProvider = class {
1442
1733
  }
1443
1734
  return merged;
1444
1735
  }
1736
+ /**
1737
+ * Map Axl's ToolChoice to Gemini's functionCallingConfig format.
1738
+ *
1739
+ * - 'auto' → { mode: 'AUTO' }
1740
+ * - 'none' → { mode: 'NONE' }
1741
+ * - 'required' → { mode: 'ANY' }
1742
+ * - { type: 'function', function: { name } } → { mode: 'ANY', allowedFunctionNames: [name] }
1743
+ */
1744
+ mapToolChoice(choice) {
1745
+ if (typeof choice === "string") {
1746
+ const modeMap = {
1747
+ auto: "AUTO",
1748
+ none: "NONE",
1749
+ required: "ANY"
1750
+ };
1751
+ return { mode: modeMap[choice] ?? "AUTO" };
1752
+ }
1753
+ return { mode: "ANY", allowedFunctionNames: [choice.function.name] };
1754
+ }
1445
1755
  mapToolDefinition(tool2) {
1446
1756
  return {
1447
1757
  name: tool2.function.name,
@@ -1455,10 +1765,13 @@ var GeminiProvider = class {
1455
1765
  parseResponse(json, model) {
1456
1766
  const candidate = json.candidates?.[0];
1457
1767
  let content = "";
1768
+ let thinkingContent = "";
1458
1769
  const toolCalls = [];
1459
1770
  if (candidate?.content?.parts) {
1460
1771
  for (const part of candidate.content.parts) {
1461
- if (part.text) {
1772
+ if (part.thought && part.text) {
1773
+ thinkingContent += part.text;
1774
+ } else if (part.text) {
1462
1775
  content += part.text;
1463
1776
  } else if (part.functionCall) {
1464
1777
  toolCalls.push({
@@ -1473,18 +1786,24 @@ var GeminiProvider = class {
1473
1786
  }
1474
1787
  }
1475
1788
  const cachedTokens = json.usageMetadata?.cachedContentTokenCount;
1789
+ const reasoningTokens = json.usageMetadata?.thoughtsTokenCount;
1476
1790
  const usage = json.usageMetadata ? {
1477
1791
  prompt_tokens: json.usageMetadata.promptTokenCount ?? 0,
1478
1792
  completion_tokens: json.usageMetadata.candidatesTokenCount ?? 0,
1479
1793
  total_tokens: json.usageMetadata.totalTokenCount ?? 0,
1480
- cached_tokens: cachedTokens && cachedTokens > 0 ? cachedTokens : void 0
1794
+ cached_tokens: cachedTokens && cachedTokens > 0 ? cachedTokens : void 0,
1795
+ reasoning_tokens: reasoningTokens && reasoningTokens > 0 ? reasoningTokens : void 0
1481
1796
  } : void 0;
1482
1797
  const cost = usage ? estimateGeminiCost(model, usage.prompt_tokens, usage.completion_tokens, usage.cached_tokens) : void 0;
1798
+ const rawParts = candidate?.content?.parts;
1799
+ const providerMetadata = rawParts ? { geminiParts: rawParts } : void 0;
1483
1800
  return {
1484
1801
  content,
1802
+ thinking_content: thinkingContent || void 0,
1485
1803
  tool_calls: toolCalls.length > 0 ? toolCalls : void 0,
1486
1804
  usage,
1487
- cost
1805
+ cost,
1806
+ providerMetadata
1488
1807
  };
1489
1808
  }
1490
1809
  // ---------------------------------------------------------------------------
@@ -1495,6 +1814,7 @@ var GeminiProvider = class {
1495
1814
  const decoder = new TextDecoder();
1496
1815
  let buffer = "";
1497
1816
  let usage;
1817
+ const accumulatedParts = [];
1498
1818
  try {
1499
1819
  while (true) {
1500
1820
  const { done, value } = await reader.read();
@@ -1515,17 +1835,22 @@ var GeminiProvider = class {
1515
1835
  }
1516
1836
  if (chunk.usageMetadata) {
1517
1837
  const cached = chunk.usageMetadata.cachedContentTokenCount;
1838
+ const reasoning = chunk.usageMetadata.thoughtsTokenCount;
1518
1839
  usage = {
1519
1840
  prompt_tokens: chunk.usageMetadata.promptTokenCount ?? 0,
1520
1841
  completion_tokens: chunk.usageMetadata.candidatesTokenCount ?? 0,
1521
1842
  total_tokens: chunk.usageMetadata.totalTokenCount ?? 0,
1522
- cached_tokens: cached && cached > 0 ? cached : void 0
1843
+ cached_tokens: cached && cached > 0 ? cached : void 0,
1844
+ reasoning_tokens: reasoning && reasoning > 0 ? reasoning : void 0
1523
1845
  };
1524
1846
  }
1525
1847
  const candidate = chunk.candidates?.[0];
1526
1848
  if (candidate?.content?.parts) {
1527
1849
  for (const part of candidate.content.parts) {
1528
- if (part.text) {
1850
+ accumulatedParts.push(part);
1851
+ if (part.thought && part.text) {
1852
+ yield { type: "thinking_delta", content: part.text };
1853
+ } else if (part.text) {
1529
1854
  yield { type: "text_delta", content: part.text };
1530
1855
  } else if (part.functionCall) {
1531
1856
  yield {
@@ -1539,7 +1864,8 @@ var GeminiProvider = class {
1539
1864
  }
1540
1865
  }
1541
1866
  }
1542
- yield { type: "done", usage };
1867
+ const providerMetadata = accumulatedParts.length > 0 ? { geminiParts: accumulatedParts } : void 0;
1868
+ yield { type: "done", usage, providerMetadata };
1543
1869
  } finally {
1544
1870
  reader.releaseLock();
1545
1871
  }
@@ -1912,7 +2238,7 @@ function estimateMessagesTokens(messages) {
1912
2238
  }
1913
2239
  return total;
1914
2240
  }
1915
- var WorkflowContext = class {
2241
+ var WorkflowContext = class _WorkflowContext {
1916
2242
  input;
1917
2243
  executionId;
1918
2244
  metadata;
@@ -1965,6 +2291,37 @@ var WorkflowContext = class {
1965
2291
  this.summaryCache = init.metadata.summaryCache;
1966
2292
  }
1967
2293
  }
2294
+ /**
2295
+ * Create a child context for nested agent invocations (e.g., agent-as-tool).
2296
+ * Shares: budget tracking, abort signals, trace emission, provider registry,
2297
+ * state store, span manager, memory manager, MCP manager, config,
2298
+ * awaitHuman handler, pending decisions, tool overrides.
2299
+ * Isolates: session history, step counter, streaming callbacks (onToken, onAgentStart, onToolCall).
2300
+ */
2301
+ createChildContext() {
2302
+ return new _WorkflowContext({
2303
+ input: this.input,
2304
+ executionId: this.executionId,
2305
+ config: this.config,
2306
+ providerRegistry: this.providerRegistry,
2307
+ metadata: { ...this.metadata },
2308
+ // Shared infrastructure
2309
+ budgetContext: this.budgetContext,
2310
+ stateStore: this.stateStore,
2311
+ mcpManager: this.mcpManager,
2312
+ spanManager: this.spanManager,
2313
+ memoryManager: this.memoryManager,
2314
+ onTrace: this.onTrace,
2315
+ onAgentCallComplete: this.onAgentCallComplete,
2316
+ awaitHumanHandler: this.awaitHumanHandler,
2317
+ pendingDecisions: this.pendingDecisions,
2318
+ toolOverrides: this.toolOverrides,
2319
+ signal: this.signal,
2320
+ workflowName: this.workflowName
2321
+ // Isolated: sessionHistory (empty), stepCounter (0),
2322
+ // onToken (null), onAgentStart (null), onToolCall (null)
2323
+ });
2324
+ }
1968
2325
  /**
1969
2326
  * Resolve the current abort signal.
1970
2327
  * Branch-scoped signals (from race/spawn/map/budget) in AsyncLocalStorage
@@ -2024,7 +2381,15 @@ var WorkflowContext = class {
2024
2381
  model: agent2.resolveModel(resolveCtx),
2025
2382
  cost: costAfter - costBefore,
2026
2383
  duration: Date.now() - startTime,
2027
- promptVersion: agent2._config.version
2384
+ promptVersion: agent2._config.version,
2385
+ temperature: options?.temperature ?? agent2._config.temperature,
2386
+ maxTokens: options?.maxTokens ?? agent2._config.maxTokens ?? 4096,
2387
+ effort: options?.effort ?? agent2._config.effort,
2388
+ thinkingBudget: options?.thinkingBudget ?? agent2._config.thinkingBudget,
2389
+ includeThoughts: options?.includeThoughts ?? agent2._config.includeThoughts,
2390
+ toolChoice: options?.toolChoice ?? agent2._config.toolChoice,
2391
+ stop: options?.stop ?? agent2._config.stop,
2392
+ providerOptions: options?.providerOptions ?? agent2._config.providerOptions
2028
2393
  });
2029
2394
  return result;
2030
2395
  });
@@ -2047,7 +2412,21 @@ var WorkflowContext = class {
2047
2412
  const modelUri = agent2.resolveModel(resolveCtx);
2048
2413
  const systemPrompt = agent2.resolveSystem(resolveCtx);
2049
2414
  const { provider, model } = this.providerRegistry.resolve(modelUri, this.config);
2050
- const toolDefs = this.buildToolDefs(agent2);
2415
+ let resolvedHandoffs;
2416
+ if (typeof agent2._config.handoffs === "function") {
2417
+ try {
2418
+ resolvedHandoffs = agent2._config.handoffs(resolveCtx);
2419
+ } catch (err) {
2420
+ this.log("handoff_resolve_error", {
2421
+ agent: agent2._name,
2422
+ error: err instanceof Error ? err.message : String(err)
2423
+ });
2424
+ resolvedHandoffs = void 0;
2425
+ }
2426
+ } else {
2427
+ resolvedHandoffs = agent2._config.handoffs;
2428
+ }
2429
+ const toolDefs = this.buildToolDefs(agent2, resolvedHandoffs);
2051
2430
  const messages = [];
2052
2431
  if (systemPrompt) {
2053
2432
  messages.push({ role: "system", content: systemPrompt });
@@ -2151,9 +2530,15 @@ Please fix and try again.`;
2151
2530
  turns++;
2152
2531
  const chatOptions = {
2153
2532
  model,
2154
- temperature: agent2._config.temperature,
2533
+ temperature: options?.temperature ?? agent2._config.temperature,
2155
2534
  tools: toolDefs.length > 0 ? toolDefs : void 0,
2156
- maxTokens: 4096,
2535
+ maxTokens: options?.maxTokens ?? agent2._config.maxTokens ?? 4096,
2536
+ effort: options?.effort ?? agent2._config.effort,
2537
+ thinkingBudget: options?.thinkingBudget ?? agent2._config.thinkingBudget,
2538
+ includeThoughts: options?.includeThoughts ?? agent2._config.includeThoughts,
2539
+ toolChoice: options?.toolChoice ?? agent2._config.toolChoice,
2540
+ stop: options?.stop ?? agent2._config.stop,
2541
+ providerOptions: options?.providerOptions ?? agent2._config.providerOptions,
2157
2542
  signal: this.currentSignal
2158
2543
  };
2159
2544
  if (options?.schema && toolDefs.length === 0) {
@@ -2165,10 +2550,14 @@ Please fix and try again.`;
2165
2550
  let content2 = "";
2166
2551
  const toolCalls = [];
2167
2552
  const toolCallBuffers = /* @__PURE__ */ new Map();
2553
+ let streamProviderMetadata;
2554
+ let thinkingContent = "";
2168
2555
  for await (const chunk of provider.stream(currentMessages, chatOptions)) {
2169
2556
  if (chunk.type === "text_delta") {
2170
2557
  content2 += chunk.content;
2171
2558
  this.onToken(chunk.content);
2559
+ } else if (chunk.type === "thinking_delta") {
2560
+ thinkingContent += chunk.content;
2172
2561
  } else if (chunk.type === "tool_call_delta") {
2173
2562
  let buffer = toolCallBuffers.get(chunk.id);
2174
2563
  if (!buffer) {
@@ -2178,6 +2567,7 @@ Please fix and try again.`;
2178
2567
  if (chunk.name) buffer.name = chunk.name;
2179
2568
  if (chunk.arguments) buffer.arguments += chunk.arguments;
2180
2569
  } else if (chunk.type === "done") {
2570
+ streamProviderMetadata = chunk.providerMetadata;
2181
2571
  if (chunk.usage) {
2182
2572
  response = {
2183
2573
  content: content2,
@@ -2204,6 +2594,12 @@ Please fix and try again.`;
2204
2594
  if (toolCalls.length > 0) {
2205
2595
  response.tool_calls = toolCalls;
2206
2596
  }
2597
+ if (streamProviderMetadata) {
2598
+ response.providerMetadata = streamProviderMetadata;
2599
+ }
2600
+ if (thinkingContent) {
2601
+ response.thinking_content = thinkingContent;
2602
+ }
2207
2603
  } else {
2208
2604
  response = await provider.chat(currentMessages, chatOptions);
2209
2605
  }
@@ -2234,13 +2630,14 @@ Please fix and try again.`;
2234
2630
  currentMessages.push({
2235
2631
  role: "assistant",
2236
2632
  content: response.content || "",
2237
- tool_calls: response.tool_calls
2633
+ tool_calls: response.tool_calls,
2634
+ ...response.providerMetadata ? { providerMetadata: response.providerMetadata } : {}
2238
2635
  });
2239
2636
  for (const toolCall of response.tool_calls) {
2240
2637
  const toolName = toolCall.function.name;
2241
2638
  if (toolName.startsWith("handoff_to_")) {
2242
2639
  const targetName = toolName.replace("handoff_to_", "");
2243
- const descriptor = agent2._config.handoffs?.find((h) => h.agent._name === targetName);
2640
+ const descriptor = resolvedHandoffs?.find((h) => h.agent._name === targetName);
2244
2641
  if (descriptor) {
2245
2642
  const mode = descriptor.mode ?? "oneway";
2246
2643
  let handoffPrompt = prompt;
@@ -2252,10 +2649,11 @@ Please fix and try again.`;
2252
2649
  }
2253
2650
  }
2254
2651
  const handoffStart = Date.now();
2652
+ const handoffOptions = options ? { schema: options.schema, retries: options.retries, metadata: options.metadata } : void 0;
2255
2653
  const handoffFn = () => this.executeAgentCall(
2256
2654
  descriptor.agent,
2257
2655
  handoffPrompt,
2258
- options,
2656
+ handoffOptions,
2259
2657
  0,
2260
2658
  void 0,
2261
2659
  void 0,
@@ -2492,8 +2890,9 @@ Please fix and try again.`;
2492
2890
  resultContent2 = JSON.stringify(toolResult2);
2493
2891
  }
2494
2892
  } else if (tool2) {
2893
+ const childCtx = this.createChildContext();
2495
2894
  try {
2496
- toolResult2 = await tool2._execute(toolArgs);
2895
+ toolResult2 = await tool2._execute(toolArgs, childCtx);
2497
2896
  } catch (err) {
2498
2897
  toolResult2 = { error: err instanceof Error ? err.message : String(err) };
2499
2898
  }
@@ -2573,7 +2972,8 @@ Please fix and try again.`;
2573
2972
  guardrailOutputRetries++;
2574
2973
  currentMessages.push({
2575
2974
  role: "assistant",
2576
- content
2975
+ content,
2976
+ ...response.providerMetadata ? { providerMetadata: response.providerMetadata } : {}
2577
2977
  });
2578
2978
  currentMessages.push({
2579
2979
  role: "system",
@@ -2594,6 +2994,7 @@ Please fix and try again.`;
2594
2994
  try {
2595
2995
  const parsed = JSON.parse(stripMarkdownFences(content));
2596
2996
  const validated = options.schema.parse(parsed);
2997
+ this.pushAssistantToSessionHistory(content, response.providerMetadata);
2597
2998
  return validated;
2598
2999
  } catch (err) {
2599
3000
  const maxRetries = options.retries ?? 3;
@@ -2620,11 +3021,23 @@ Please fix and try again.`;
2620
3021
  throw new VerifyError(content, zodErr, maxRetries);
2621
3022
  }
2622
3023
  }
3024
+ this.pushAssistantToSessionHistory(content, response.providerMetadata);
2623
3025
  return content;
2624
3026
  }
2625
3027
  throw new MaxTurnsError("ctx.ask()", maxTurns);
2626
3028
  }
2627
- buildToolDefs(agent2) {
3029
+ /**
3030
+ * Push the final assistant message into session history, preserving providerMetadata
3031
+ * (e.g., Gemini thought signatures needed for multi-turn reasoning context).
3032
+ */
3033
+ pushAssistantToSessionHistory(content, providerMetadata) {
3034
+ this.sessionHistory.push({
3035
+ role: "assistant",
3036
+ content,
3037
+ ...providerMetadata ? { providerMetadata } : {}
3038
+ });
3039
+ }
3040
+ buildToolDefs(agent2, resolvedHandoffs) {
2628
3041
  const defs = [];
2629
3042
  if (agent2._config.tools) {
2630
3043
  for (const tool2 of agent2._config.tools) {
@@ -2638,8 +3051,8 @@ Please fix and try again.`;
2638
3051
  });
2639
3052
  }
2640
3053
  }
2641
- if (agent2._config.handoffs) {
2642
- for (const { agent: handoffAgent, description, mode } of agent2._config.handoffs) {
3054
+ if (resolvedHandoffs) {
3055
+ for (const { agent: handoffAgent, description, mode } of resolvedHandoffs) {
2643
3056
  const isRoundtrip = mode === "roundtrip";
2644
3057
  const defaultDesc = isRoundtrip ? `Delegate a task to ${handoffAgent._name} and receive the result back` : `Hand off the conversation to ${handoffAgent._name}`;
2645
3058
  defs.push({
@@ -3328,6 +3741,79 @@ ${summaryResponse.content}`
3328
3741
  const sessionId = this.metadata?.sessionId;
3329
3742
  await this.memoryManager.forget(key, this.stateStore, sessionId, options);
3330
3743
  }
3744
+ // ── ctx.delegate() ──────────────────────────────────────────────────
3745
+ /**
3746
+ * Select the best agent from a list of candidates and invoke it.
3747
+ * Creates a temporary router agent that uses handoffs to pick the right specialist.
3748
+ *
3749
+ * This is convenience sugar over creating a router agent with dynamic handoffs.
3750
+ * For full control over the router's behavior, create the router agent explicitly.
3751
+ *
3752
+ * @param agents - Candidate agents to choose from (at least 1)
3753
+ * @param prompt - The prompt to send to the selected agent
3754
+ * @param options - Optional: schema, routerModel, metadata, retries
3755
+ */
3756
+ async delegate(agents, prompt, options) {
3757
+ if (agents.length === 0) {
3758
+ throw new Error("ctx.delegate() requires at least one candidate agent");
3759
+ }
3760
+ const names = /* @__PURE__ */ new Set();
3761
+ for (const a of agents) {
3762
+ if (names.has(a._name)) {
3763
+ throw new Error(
3764
+ `ctx.delegate() received duplicate agent name '${a._name}'. All candidate agents must have unique names.`
3765
+ );
3766
+ }
3767
+ names.add(a._name);
3768
+ }
3769
+ if (agents.length === 1) {
3770
+ return this.ask(agents[0], prompt, {
3771
+ schema: options?.schema,
3772
+ retries: options?.retries,
3773
+ metadata: options?.metadata
3774
+ });
3775
+ }
3776
+ const resolveCtx = options?.metadata ? { metadata: { ...this.metadata, ...options.metadata } } : { metadata: this.metadata };
3777
+ const routerModelUri = options?.routerModel ?? agents[0].resolveModel(resolveCtx);
3778
+ const handoffs = agents.map((a) => {
3779
+ let description;
3780
+ try {
3781
+ description = a.resolveSystem(resolveCtx).slice(0, 200);
3782
+ } catch {
3783
+ description = `Agent: ${a._name}`;
3784
+ }
3785
+ return { agent: a, description };
3786
+ });
3787
+ const routerSystem = "Route to the best agent for this task. Always hand off; never answer directly.";
3788
+ const routerAgent = {
3789
+ _config: {
3790
+ model: routerModelUri,
3791
+ system: routerSystem,
3792
+ temperature: 0,
3793
+ handoffs,
3794
+ maxTurns: 2
3795
+ },
3796
+ _name: "_delegate_router",
3797
+ ask: async () => {
3798
+ throw new Error("Direct invocation not supported on delegate router");
3799
+ },
3800
+ resolveModel: () => routerModelUri,
3801
+ resolveSystem: () => routerSystem
3802
+ };
3803
+ this.emitTrace({
3804
+ type: "delegate",
3805
+ agent: "_delegate_router",
3806
+ data: {
3807
+ candidates: agents.map((a) => a._name),
3808
+ routerModel: routerModelUri
3809
+ }
3810
+ });
3811
+ return this.ask(routerAgent, prompt, {
3812
+ schema: options?.schema,
3813
+ retries: options?.retries,
3814
+ metadata: options?.metadata
3815
+ });
3816
+ }
3331
3817
  // ── Private ───────────────────────────────────────────────────────────
3332
3818
  emitTrace(partial) {
3333
3819
  let data = partial.data;
@@ -3937,11 +4423,13 @@ var Session = class _Session {
3937
4423
  ...cachedSummary ? { summaryCache: cachedSummary } : {}
3938
4424
  }
3939
4425
  });
3940
- const assistantMessage = {
3941
- role: "assistant",
3942
- content: typeof result === "string" ? result : JSON.stringify(result)
3943
- };
3944
- history.push(assistantMessage);
4426
+ const lastMsg = history[history.length - 1];
4427
+ if (!(lastMsg && lastMsg.role === "assistant")) {
4428
+ history.push({
4429
+ role: "assistant",
4430
+ content: typeof result === "string" ? result : JSON.stringify(result)
4431
+ });
4432
+ }
3945
4433
  if (this.options.persist !== false) {
3946
4434
  await this.store.saveSession(this.sessionId, history);
3947
4435
  }
@@ -3984,10 +4472,13 @@ var Session = class _Session {
3984
4472
  }
3985
4473
  });
3986
4474
  const updateHistory = async (result) => {
3987
- history.push({
3988
- role: "assistant",
3989
- content: typeof result === "string" ? result : JSON.stringify(result)
3990
- });
4475
+ const lastMsg = history[history.length - 1];
4476
+ if (!(lastMsg && lastMsg.role === "assistant")) {
4477
+ history.push({
4478
+ role: "assistant",
4479
+ content: typeof result === "string" ? result : JSON.stringify(result)
4480
+ });
4481
+ }
3991
4482
  if (this.options.persist !== false) {
3992
4483
  await this.store.saveSession(this.sessionId, history);
3993
4484
  }
@@ -4877,6 +5368,24 @@ var AxlRuntime = class extends import_node_events2.EventEmitter {
4877
5368
  getExecutions() {
4878
5369
  return [...this.executions.values()];
4879
5370
  }
5371
+ /**
5372
+ * Create a lightweight WorkflowContext for ad-hoc use (tool testing, prototyping).
5373
+ * The context has access to the runtime's providers, state store, and MCP manager
5374
+ * but no session history, streaming callbacks, or budget tracking.
5375
+ */
5376
+ createContext(options) {
5377
+ return new WorkflowContext({
5378
+ input: void 0,
5379
+ executionId: (0, import_node_crypto2.randomUUID)(),
5380
+ metadata: options?.metadata,
5381
+ config: this.config,
5382
+ providerRegistry: this.providerRegistry,
5383
+ stateStore: this.stateStore,
5384
+ mcpManager: this.mcpManager,
5385
+ spanManager: this.spanManager,
5386
+ memoryManager: this.memoryManager
5387
+ });
5388
+ }
4880
5389
  /** Register a custom provider instance. */
4881
5390
  registerProvider(name, provider) {
4882
5391
  this.providerRegistry.registerInstance(name, provider);
@@ -5575,6 +6084,7 @@ function cosineSimilarity2(a, b) {
5575
6084
  agent,
5576
6085
  createSpanManager,
5577
6086
  defineConfig,
6087
+ resolveThinkingOptions,
5578
6088
  tool,
5579
6089
  workflow,
5580
6090
  zodToJsonSchema