@axlsdk/axl 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -140,6 +140,7 @@ __export(index_exports, {
140
140
  agent: () => agent,
141
141
  createSpanManager: () => createSpanManager,
142
142
  defineConfig: () => defineConfig,
143
+ resolveThinkingOptions: () => resolveThinkingOptions,
143
144
  tool: () => tool,
144
145
  workflow: () => workflow,
145
146
  zodToJsonSchema: () => zodToJsonSchema
@@ -185,7 +186,7 @@ function tool(config) {
185
186
  on: config.retry?.on
186
187
  };
187
188
  const maxStringLen = config.maxStringLength ?? DEFAULT_MAX_STRING_LENGTH;
188
- const execute = async (input) => {
189
+ const execute = async (input, ctx) => {
189
190
  const parsed = config.input.parse(input);
190
191
  if (maxStringLen > 0) {
191
192
  validateStringLengths(parsed, maxStringLen);
@@ -194,7 +195,7 @@ function tool(config) {
194
195
  let lastError;
195
196
  for (let attempt = 1; attempt <= maxAttempts; attempt++) {
196
197
  try {
197
- return await config.handler(parsed);
198
+ return await config.handler(parsed, ctx);
198
199
  } catch (err) {
199
200
  lastError = err instanceof Error ? err : new Error(String(err));
200
201
  if (attempt === maxAttempts) break;
@@ -224,7 +225,7 @@ function tool(config) {
224
225
  if (config.hooks?.before) {
225
226
  processedInput = await config.hooks.before(processedInput, ctx);
226
227
  }
227
- let result = await execute(processedInput);
228
+ let result = await execute(processedInput, ctx);
228
229
  if (config.hooks?.after) {
229
230
  result = await config.hooks.after(result, ctx);
230
231
  }
@@ -246,6 +247,25 @@ function tool(config) {
246
247
  };
247
248
  }
248
249
 
250
+ // src/providers/types.ts
251
+ function resolveThinkingOptions(options) {
252
+ if (options.thinkingBudget !== void 0 && options.thinkingBudget < 0) {
253
+ throw new Error(`thinkingBudget must be non-negative, got ${options.thinkingBudget}`);
254
+ }
255
+ const effort = options.effort;
256
+ const thinkingBudget = options.thinkingBudget;
257
+ const hasBudgetOverride = thinkingBudget !== void 0 && thinkingBudget > 0;
258
+ return {
259
+ effort,
260
+ thinkingBudget,
261
+ includeThoughts: options.includeThoughts ?? false,
262
+ // Budget override wins: effort: 'none' + thinkingBudget: 5000 → thinking enabled
263
+ thinkingDisabled: (effort === "none" || thinkingBudget === 0) && !hasBudgetOverride,
264
+ activeEffort: effort && effort !== "none" ? effort : void 0,
265
+ hasBudgetOverride
266
+ };
267
+ }
268
+
249
269
  // src/providers/retry.ts
250
270
  var RETRYABLE_STATUS_CODES = /* @__PURE__ */ new Set([429, 503, 529]);
251
271
  var MAX_RETRIES = 2;
@@ -300,6 +320,9 @@ var OPENAI_PRICING = {
300
320
  "gpt-5-nano": [5e-8, 4e-7],
301
321
  "gpt-5.1": [125e-8, 1e-5],
302
322
  "gpt-5.2": [175e-8, 14e-6],
323
+ "gpt-5.3": [175e-8, 14e-6],
324
+ "gpt-5.4": [25e-7, 15e-6],
325
+ "gpt-5.4-pro": [3e-5, 18e-5],
303
326
  o1: [15e-6, 6e-5],
304
327
  "o1-mini": [3e-6, 12e-6],
305
328
  "o1-pro": [15e-5, 6e-4],
@@ -328,26 +351,31 @@ function estimateOpenAICost(model, promptTokens, completionTokens, cachedTokens)
328
351
  const inputCost = (promptTokens - cached) * inputRate + cached * inputRate * 0.5;
329
352
  return inputCost + completionTokens * outputRate;
330
353
  }
331
- function isReasoningModel(model) {
354
+ function isOSeriesModel(model) {
332
355
  return /^(o1|o3|o4-mini)/.test(model);
333
356
  }
334
- function thinkingToReasoningEffort(thinking) {
335
- if (typeof thinking === "object") {
336
- const budget = thinking.budgetTokens;
337
- if (budget <= 1024) return "low";
338
- if (budget <= 8192) return "medium";
339
- return "high";
340
- }
341
- switch (thinking) {
342
- case "low":
343
- return "low";
344
- case "medium":
345
- return "medium";
346
- case "high":
347
- return "high";
348
- case "max":
349
- return "xhigh";
350
- }
357
+ function supportsReasoningEffort(model) {
358
+ return isOSeriesModel(model) || /^gpt-5/.test(model);
359
+ }
360
+ function supportsReasoningNone(model) {
361
+ return /^gpt-5\.[1-9]/.test(model);
362
+ }
363
+ function supportsXhigh(model) {
364
+ return /^gpt-5\.([2-9]|\d{2,})/.test(model);
365
+ }
366
+ function clampReasoningEffort(model, effort) {
367
+ if (model.startsWith("gpt-5-pro")) return "high";
368
+ if (effort === "none" && !supportsReasoningNone(model)) return "minimal";
369
+ if (effort === "xhigh" && !supportsXhigh(model)) return "high";
370
+ return effort;
371
+ }
372
+ function effortToReasoningEffort(effort) {
373
+ return effort === "max" ? "xhigh" : effort;
374
+ }
375
+ function budgetToReasoningEffort(budget) {
376
+ if (budget <= 1024) return "low";
377
+ if (budget <= 8192) return "medium";
378
+ return "high";
351
379
  }
352
380
  var OpenAIProvider = class {
353
381
  name = "openai";
@@ -436,13 +464,26 @@ var OpenAIProvider = class {
436
464
  // Internal helpers
437
465
  // ---------------------------------------------------------------------------
438
466
  buildRequestBody(messages, options, stream) {
439
- const reasoning = isReasoningModel(options.model);
467
+ const oSeries = isOSeriesModel(options.model);
468
+ const reasoningCapable = supportsReasoningEffort(options.model);
469
+ const { thinkingBudget, thinkingDisabled, activeEffort, hasBudgetOverride } = resolveThinkingOptions(options);
470
+ let wireEffort;
471
+ if (reasoningCapable) {
472
+ if (hasBudgetOverride) {
473
+ wireEffort = clampReasoningEffort(options.model, budgetToReasoningEffort(thinkingBudget));
474
+ } else if (!thinkingDisabled && activeEffort) {
475
+ wireEffort = clampReasoningEffort(options.model, effortToReasoningEffort(activeEffort));
476
+ } else if (thinkingDisabled) {
477
+ wireEffort = clampReasoningEffort(options.model, "none");
478
+ }
479
+ }
480
+ const stripTemp = oSeries || reasoningCapable && wireEffort !== void 0;
440
481
  const body = {
441
482
  model: options.model,
442
- messages: messages.map((m) => this.formatMessage(m, reasoning)),
483
+ messages: messages.map((m) => this.formatMessage(m, oSeries)),
443
484
  stream
444
485
  };
445
- if (options.temperature !== void 0 && !reasoning) {
486
+ if (options.temperature !== void 0 && !stripTemp) {
446
487
  body.temperature = options.temperature;
447
488
  }
448
489
  if (options.maxTokens !== void 0) {
@@ -451,7 +492,7 @@ var OpenAIProvider = class {
451
492
  if (options.stop) body.stop = options.stop;
452
493
  if (options.tools && options.tools.length > 0) {
453
494
  body.tools = options.tools;
454
- if (!reasoning) {
495
+ if (!oSeries) {
455
496
  body.parallel_tool_calls = true;
456
497
  }
457
498
  }
@@ -461,15 +502,13 @@ var OpenAIProvider = class {
461
502
  if (options.responseFormat) {
462
503
  body.response_format = options.responseFormat;
463
504
  }
464
- if (reasoning) {
465
- const effort = options.thinking ? thinkingToReasoningEffort(options.thinking) : options.reasoningEffort;
466
- if (effort) {
467
- body.reasoning_effort = effort;
468
- }
469
- }
505
+ if (wireEffort) body.reasoning_effort = wireEffort;
470
506
  if (stream) {
471
507
  body.stream_options = { include_usage: true };
472
508
  }
509
+ if (options.providerOptions) {
510
+ Object.assign(body, options.providerOptions);
511
+ }
473
512
  return body;
474
513
  }
475
514
  /** Extract a human-readable message from an API error response body. */
@@ -483,9 +522,9 @@ var OpenAIProvider = class {
483
522
  }
484
523
  return `OpenAI API error (${status}): ${body}`;
485
524
  }
486
- formatMessage(msg, reasoning) {
525
+ formatMessage(msg, oSeries) {
487
526
  const out = {
488
- role: msg.role === "system" && reasoning ? "developer" : msg.role,
527
+ role: msg.role === "system" && oSeries ? "developer" : msg.role,
489
528
  content: msg.content
490
529
  };
491
530
  if (msg.name) out.name = msg.name;
@@ -622,7 +661,20 @@ var OpenAIResponsesProvider = class {
622
661
  // Internal: build request body
623
662
  // ---------------------------------------------------------------------------
624
663
  buildRequestBody(messages, options, stream) {
625
- const reasoning = isReasoningModel(options.model);
664
+ const oSeries = isOSeriesModel(options.model);
665
+ const reasoningCapable = supportsReasoningEffort(options.model);
666
+ const { thinkingBudget, includeThoughts, thinkingDisabled, activeEffort, hasBudgetOverride } = resolveThinkingOptions(options);
667
+ let wireEffort;
668
+ if (reasoningCapable) {
669
+ if (hasBudgetOverride) {
670
+ wireEffort = clampReasoningEffort(options.model, budgetToReasoningEffort(thinkingBudget));
671
+ } else if (!thinkingDisabled && activeEffort) {
672
+ wireEffort = clampReasoningEffort(options.model, effortToReasoningEffort(activeEffort));
673
+ } else if (thinkingDisabled) {
674
+ wireEffort = clampReasoningEffort(options.model, "none");
675
+ }
676
+ }
677
+ const stripTemp = oSeries || reasoningCapable && wireEffort !== void 0;
626
678
  const systemMessages = messages.filter((m) => m.role === "system");
627
679
  const nonSystemMessages = messages.filter((m) => m.role !== "system");
628
680
  const body = {
@@ -637,7 +689,7 @@ var OpenAIResponsesProvider = class {
637
689
  if (options.maxTokens !== void 0) {
638
690
  body.max_output_tokens = options.maxTokens;
639
691
  }
640
- if (options.temperature !== void 0 && !reasoning) {
692
+ if (options.temperature !== void 0 && !stripTemp) {
641
693
  body.temperature = options.temperature;
642
694
  }
643
695
  if (options.tools && options.tools.length > 0) {
@@ -656,15 +708,21 @@ var OpenAIResponsesProvider = class {
656
708
  body.tool_choice = options.toolChoice;
657
709
  }
658
710
  }
659
- if (reasoning) {
660
- const effort = options.thinking ? thinkingToReasoningEffort(options.thinking) : options.reasoningEffort;
661
- if (effort) {
662
- body.reasoning = { effort };
663
- }
711
+ if (reasoningCapable && (wireEffort !== void 0 || includeThoughts)) {
712
+ const reasoning = {};
713
+ if (wireEffort !== void 0) reasoning.effort = wireEffort;
714
+ if (includeThoughts) reasoning.summary = "detailed";
715
+ if (Object.keys(reasoning).length > 0) body.reasoning = reasoning;
716
+ }
717
+ if (reasoningCapable) {
718
+ body.include = ["reasoning.encrypted_content"];
664
719
  }
665
720
  if (options.responseFormat) {
666
721
  body.text = { format: this.mapResponseFormat(options.responseFormat) };
667
722
  }
723
+ if (options.providerOptions) {
724
+ Object.assign(body, options.providerOptions);
725
+ }
668
726
  return body;
669
727
  }
670
728
  // ---------------------------------------------------------------------------
@@ -680,6 +738,12 @@ var OpenAIResponsesProvider = class {
680
738
  output: msg.content
681
739
  });
682
740
  } else if (msg.role === "assistant" && msg.tool_calls && msg.tool_calls.length > 0) {
741
+ const reasoningItems = msg.providerMetadata?.openaiReasoningItems;
742
+ if (reasoningItems) {
743
+ for (const item of reasoningItems) {
744
+ input.push(item);
745
+ }
746
+ }
683
747
  if (msg.content) {
684
748
  input.push({ type: "message", role: "assistant", content: msg.content });
685
749
  }
@@ -692,6 +756,12 @@ var OpenAIResponsesProvider = class {
692
756
  });
693
757
  }
694
758
  } else if (msg.role === "user" || msg.role === "assistant") {
759
+ if (msg.role === "assistant" && msg.providerMetadata?.openaiReasoningItems) {
760
+ const reasoningItems = msg.providerMetadata.openaiReasoningItems;
761
+ for (const item of reasoningItems) {
762
+ input.push(item);
763
+ }
764
+ }
695
765
  input.push({
696
766
  type: "message",
697
767
  role: msg.role,
@@ -724,7 +794,9 @@ var OpenAIResponsesProvider = class {
724
794
  // ---------------------------------------------------------------------------
725
795
  parseResponse(json, model) {
726
796
  let content = "";
797
+ let thinkingContent = "";
727
798
  const toolCalls = [];
799
+ const reasoningItems = [];
728
800
  for (const item of json.output) {
729
801
  if (item.type === "message") {
730
802
  for (const part of item.content ?? []) {
@@ -741,6 +813,15 @@ var OpenAIResponsesProvider = class {
741
813
  arguments: item.arguments
742
814
  }
743
815
  });
816
+ } else if (item.type === "reasoning") {
817
+ reasoningItems.push(item);
818
+ if (item.summary) {
819
+ for (const s of item.summary) {
820
+ if (s.type === "summary_text" && s.text) {
821
+ thinkingContent += s.text;
822
+ }
823
+ }
824
+ }
744
825
  }
745
826
  }
746
827
  const usage = json.usage ? {
@@ -751,11 +832,14 @@ var OpenAIResponsesProvider = class {
751
832
  cached_tokens: json.usage.input_tokens_details?.cached_tokens
752
833
  } : void 0;
753
834
  const cost = usage ? estimateOpenAICost(model, usage.prompt_tokens, usage.completion_tokens, usage.cached_tokens) : void 0;
835
+ const providerMetadata = reasoningItems.length > 0 ? { openaiReasoningItems: reasoningItems } : void 0;
754
836
  return {
755
837
  content,
838
+ thinking_content: thinkingContent || void 0,
756
839
  tool_calls: toolCalls.length > 0 ? toolCalls : void 0,
757
840
  usage,
758
- cost
841
+ cost,
842
+ providerMetadata
759
843
  };
760
844
  }
761
845
  // ---------------------------------------------------------------------------
@@ -807,6 +891,8 @@ var OpenAIResponsesProvider = class {
807
891
  switch (eventType) {
808
892
  case "response.output_text.delta":
809
893
  return { type: "text_delta", content: data.delta ?? "" };
894
+ case "response.reasoning_summary_text.delta":
895
+ return { type: "thinking_delta", content: data.delta ?? "" };
810
896
  case "response.output_item.added":
811
897
  if (data.item?.type === "function_call") {
812
898
  const callId = data.item.call_id ?? data.item.id ?? "";
@@ -837,7 +923,9 @@ var OpenAIResponsesProvider = class {
837
923
  reasoning_tokens: response.usage.output_tokens_details?.reasoning_tokens,
838
924
  cached_tokens: response.usage.input_tokens_details?.cached_tokens
839
925
  } : void 0;
840
- return { type: "done", usage };
926
+ const reasoningItems = response?.output?.filter((item) => item.type === "reasoning") ?? [];
927
+ const providerMetadata = reasoningItems.length > 0 ? { openaiReasoningItems: reasoningItems } : void 0;
928
+ return { type: "done", usage, providerMetadata };
841
929
  }
842
930
  case "response.failed": {
843
931
  const errorMsg = data.response?.error?.message ?? data.response?.status_details?.error?.message ?? "Unknown error";
@@ -865,9 +953,12 @@ var OpenAIResponsesProvider = class {
865
953
  // src/providers/anthropic.ts
866
954
  var ANTHROPIC_API_VERSION = "2023-06-01";
867
955
  var ANTHROPIC_PRICING = {
868
- "claude-opus-4-6": [15e-6, 75e-6],
956
+ "claude-opus-4-6": [5e-6, 25e-6],
957
+ "claude-sonnet-4-6": [3e-6, 15e-6],
958
+ "claude-opus-4-5": [5e-6, 25e-6],
959
+ "claude-opus-4-1": [15e-6, 75e-6],
869
960
  "claude-sonnet-4-5": [3e-6, 15e-6],
870
- "claude-haiku-4-5": [8e-7, 4e-6],
961
+ "claude-haiku-4-5": [1e-6, 5e-6],
871
962
  "claude-sonnet-4": [3e-6, 15e-6],
872
963
  "claude-opus-4": [15e-6, 75e-6],
873
964
  "claude-3-7-sonnet": [3e-6, 15e-6],
@@ -877,12 +968,15 @@ var ANTHROPIC_PRICING = {
877
968
  "claude-3-sonnet": [3e-6, 15e-6],
878
969
  "claude-3-haiku": [25e-8, 125e-8]
879
970
  };
971
+ var ANTHROPIC_PRICING_KEYS_BY_LENGTH = Object.keys(ANTHROPIC_PRICING).sort(
972
+ (a, b) => b.length - a.length
973
+ );
880
974
  function estimateAnthropicCost(model, inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens) {
881
975
  let pricing = ANTHROPIC_PRICING[model];
882
976
  if (!pricing) {
883
- for (const [key, value] of Object.entries(ANTHROPIC_PRICING)) {
977
+ for (const key of ANTHROPIC_PRICING_KEYS_BY_LENGTH) {
884
978
  if (model.startsWith(key)) {
885
- pricing = value;
979
+ pricing = ANTHROPIC_PRICING[key];
886
980
  break;
887
981
  }
888
982
  }
@@ -902,16 +996,15 @@ var THINKING_BUDGETS = {
902
996
  // With auto-bump (+1024), max_tokens becomes 31024 which fits all models.
903
997
  max: 3e4
904
998
  };
905
- function thinkingToBudgetTokens(thinking) {
906
- if (typeof thinking === "string") return THINKING_BUDGETS[thinking] ?? 5e3;
907
- return thinking.budgetTokens;
908
- }
909
999
  function supportsAdaptiveThinking(model) {
910
1000
  return model.startsWith("claude-opus-4-6") || model.startsWith("claude-sonnet-4-6");
911
1001
  }
912
1002
  function supportsMaxEffort(model) {
913
1003
  return model.startsWith("claude-opus-4-6");
914
1004
  }
1005
+ function supportsEffort(model) {
1006
+ return model.startsWith("claude-opus-4-6") || model.startsWith("claude-sonnet-4-6") || model.startsWith("claude-opus-4-5");
1007
+ }
915
1008
  var AnthropicProvider = class {
916
1009
  name = "anthropic";
917
1010
  baseUrl;
@@ -1001,9 +1094,6 @@ var AnthropicProvider = class {
1001
1094
  if (systemText) {
1002
1095
  body.system = systemText;
1003
1096
  }
1004
- if (options.temperature !== void 0 && !options.thinking) {
1005
- body.temperature = options.temperature;
1006
- }
1007
1097
  if (options.stop) {
1008
1098
  body.stop_sequences = options.stop;
1009
1099
  }
@@ -1013,19 +1103,39 @@ var AnthropicProvider = class {
1013
1103
  if (options.toolChoice !== void 0) {
1014
1104
  body.tool_choice = this.mapToolChoice(options.toolChoice);
1015
1105
  }
1016
- if (options.thinking) {
1017
- if (typeof options.thinking === "string" && supportsAdaptiveThinking(options.model) && // 'max' effort is only supported on Opus 4.6; Sonnet 4.6 falls back to manual mode
1018
- (options.thinking !== "max" || supportsMaxEffort(options.model))) {
1019
- body.thinking = { type: "adaptive" };
1020
- body.output_config = { effort: options.thinking };
1021
- } else {
1022
- const budgetTokens = thinkingToBudgetTokens(options.thinking);
1023
- body.thinking = { type: "enabled", budget_tokens: budgetTokens };
1024
- const currentMax = body.max_tokens;
1025
- if (currentMax < budgetTokens + 1024) {
1026
- body.max_tokens = budgetTokens + 1024;
1027
- }
1106
+ const { thinkingBudget, thinkingDisabled, activeEffort, hasBudgetOverride } = resolveThinkingOptions(options);
1107
+ let resolvedEffort = activeEffort;
1108
+ if (resolvedEffort === "max" && !supportsMaxEffort(options.model)) {
1109
+ resolvedEffort = "high";
1110
+ }
1111
+ if (hasBudgetOverride) {
1112
+ body.thinking = { type: "enabled", budget_tokens: thinkingBudget };
1113
+ const currentMax = body.max_tokens;
1114
+ if (currentMax < thinkingBudget + 1024) {
1115
+ body.max_tokens = thinkingBudget + 1024;
1028
1116
  }
1117
+ if (resolvedEffort && supportsEffort(options.model)) {
1118
+ body.output_config = { effort: resolvedEffort };
1119
+ }
1120
+ } else if (thinkingDisabled) {
1121
+ if (resolvedEffort && supportsEffort(options.model)) {
1122
+ body.output_config = { effort: resolvedEffort };
1123
+ }
1124
+ } else if (resolvedEffort && supportsAdaptiveThinking(options.model)) {
1125
+ body.thinking = { type: "adaptive" };
1126
+ body.output_config = { effort: resolvedEffort };
1127
+ } else if (resolvedEffort && supportsEffort(options.model)) {
1128
+ body.output_config = { effort: resolvedEffort };
1129
+ } else if (resolvedEffort) {
1130
+ const budget = THINKING_BUDGETS[resolvedEffort] ?? 5e3;
1131
+ body.thinking = { type: "enabled", budget_tokens: budget };
1132
+ const currentMax = body.max_tokens;
1133
+ if (currentMax < budget + 1024) {
1134
+ body.max_tokens = budget + 1024;
1135
+ }
1136
+ }
1137
+ if (options.temperature !== void 0 && !body.thinking) {
1138
+ body.temperature = options.temperature;
1029
1139
  }
1030
1140
  if (options.responseFormat && options.responseFormat.type !== "text") {
1031
1141
  const jsonInstruction = "You must respond with valid JSON only. No markdown fences, no extra text.";
@@ -1033,6 +1143,9 @@ var AnthropicProvider = class {
1033
1143
 
1034
1144
  ${jsonInstruction}` : jsonInstruction;
1035
1145
  }
1146
+ if (options.providerOptions) {
1147
+ Object.assign(body, options.providerOptions);
1148
+ }
1036
1149
  return body;
1037
1150
  }
1038
1151
  /**
@@ -1143,9 +1256,12 @@ ${jsonInstruction}` : jsonInstruction;
1143
1256
  // ---------------------------------------------------------------------------
1144
1257
  parseResponse(json) {
1145
1258
  let content = "";
1259
+ let thinkingContent = "";
1146
1260
  const toolCalls = [];
1147
1261
  for (const block of json.content) {
1148
- if (block.type === "text") {
1262
+ if (block.type === "thinking") {
1263
+ thinkingContent += block.thinking;
1264
+ } else if (block.type === "text") {
1149
1265
  content += block.text;
1150
1266
  } else if (block.type === "tool_use") {
1151
1267
  toolCalls.push({
@@ -1176,6 +1292,7 @@ ${jsonInstruction}` : jsonInstruction;
1176
1292
  ) : void 0;
1177
1293
  return {
1178
1294
  content,
1295
+ thinking_content: thinkingContent || void 0,
1179
1296
  tool_calls: toolCalls.length > 0 ? toolCalls : void 0,
1180
1297
  usage,
1181
1298
  cost
@@ -1226,7 +1343,9 @@ ${jsonInstruction}` : jsonInstruction;
1226
1343
  }
1227
1344
  case "content_block_delta": {
1228
1345
  const delta = event.delta;
1229
- if (delta?.type === "text_delta" && delta.text) {
1346
+ if (delta?.type === "thinking_delta" && delta.thinking) {
1347
+ yield { type: "thinking_delta", content: delta.thinking };
1348
+ } else if (delta?.type === "text_delta" && delta.text) {
1230
1349
  yield { type: "text_delta", content: delta.text };
1231
1350
  } else if (delta?.type === "input_json_delta" && delta.partial_json) {
1232
1351
  yield {
@@ -1297,14 +1416,19 @@ var GEMINI_PRICING = {
1297
1416
  "gemini-2.0-flash": [1e-7, 4e-7],
1298
1417
  "gemini-2.0-flash-lite": [1e-7, 4e-7],
1299
1418
  "gemini-3-pro-preview": [2e-6, 12e-6],
1300
- "gemini-3-flash-preview": [5e-7, 3e-6]
1419
+ "gemini-3-flash-preview": [5e-7, 3e-6],
1420
+ "gemini-3.1-pro-preview": [2e-6, 12e-6],
1421
+ "gemini-3.1-flash-lite-preview": [25e-8, 15e-7]
1301
1422
  };
1423
+ var GEMINI_PRICING_KEYS_BY_LENGTH = Object.keys(GEMINI_PRICING).sort(
1424
+ (a, b) => b.length - a.length
1425
+ );
1302
1426
  function estimateGeminiCost(model, inputTokens, outputTokens, cachedTokens) {
1303
1427
  let pricing = GEMINI_PRICING[model];
1304
1428
  if (!pricing) {
1305
- for (const [key, value] of Object.entries(GEMINI_PRICING)) {
1429
+ for (const key of GEMINI_PRICING_KEYS_BY_LENGTH) {
1306
1430
  if (model.startsWith(key)) {
1307
- pricing = value;
1431
+ pricing = GEMINI_PRICING[key];
1308
1432
  break;
1309
1433
  }
1310
1434
  }
@@ -1321,9 +1445,32 @@ var THINKING_BUDGETS2 = {
1321
1445
  high: 1e4,
1322
1446
  max: 24576
1323
1447
  };
1324
- function thinkingToBudgetTokens2(thinking) {
1325
- if (typeof thinking === "string") return THINKING_BUDGETS2[thinking] ?? 5e3;
1326
- return thinking.budgetTokens;
1448
+ var THINKING_LEVELS = {
1449
+ low: "low",
1450
+ medium: "medium",
1451
+ high: "high",
1452
+ max: "high"
1453
+ // 3.x caps at 'high'
1454
+ };
1455
+ function isGemini3x(model) {
1456
+ return /^gemini-3[.-]/.test(model);
1457
+ }
1458
+ function budgetToThinkingLevel(budgetTokens) {
1459
+ if (budgetTokens <= 1024) return "low";
1460
+ if (budgetTokens <= 5e3) return "medium";
1461
+ return "high";
1462
+ }
1463
+ function minThinkingLevel(model) {
1464
+ if (model.startsWith("gemini-3.1-pro")) return "low";
1465
+ return "minimal";
1466
+ }
1467
+ var _warned3xEffortNone = /* @__PURE__ */ new Set();
1468
+ function warnGemini3xEffortNone(model) {
1469
+ if (_warned3xEffortNone.has(model)) return;
1470
+ _warned3xEffortNone.add(model);
1471
+ console.warn(
1472
+ `[axl] effort: 'none' on Gemini 3.x (${model}) maps to the model's minimum thinking level ('${minThinkingLevel(model)}'), not fully disabled. Gemini 3.x models cannot disable thinking entirely.`
1473
+ );
1327
1474
  }
1328
1475
  var GeminiProvider = class {
1329
1476
  name = "google";
@@ -1438,17 +1585,58 @@ var GeminiProvider = class {
1438
1585
  if (Object.keys(generationConfig).length > 0) {
1439
1586
  body.generationConfig = generationConfig;
1440
1587
  }
1441
- if (options.thinking) {
1442
- generationConfig.thinkingConfig = {
1443
- thinkingBudget: thinkingToBudgetTokens2(options.thinking)
1444
- };
1445
- if (!body.generationConfig) {
1446
- body.generationConfig = generationConfig;
1588
+ const {
1589
+ effort,
1590
+ thinkingBudget,
1591
+ includeThoughts,
1592
+ thinkingDisabled,
1593
+ activeEffort,
1594
+ hasBudgetOverride
1595
+ } = resolveThinkingOptions(options);
1596
+ if (thinkingDisabled) {
1597
+ if (isGemini3x(options.model)) {
1598
+ if (effort === "none") {
1599
+ warnGemini3xEffortNone(options.model);
1600
+ }
1601
+ generationConfig.thinkingConfig = { thinkingLevel: minThinkingLevel(options.model) };
1602
+ } else {
1603
+ generationConfig.thinkingConfig = { thinkingBudget: 0 };
1604
+ }
1605
+ if (!body.generationConfig) body.generationConfig = generationConfig;
1606
+ } else if (hasBudgetOverride) {
1607
+ const config = {};
1608
+ if (isGemini3x(options.model)) {
1609
+ config.thinkingLevel = budgetToThinkingLevel(thinkingBudget);
1610
+ } else {
1611
+ config.thinkingBudget = thinkingBudget;
1612
+ }
1613
+ if (includeThoughts) config.includeThoughts = true;
1614
+ generationConfig.thinkingConfig = config;
1615
+ if (!body.generationConfig) body.generationConfig = generationConfig;
1616
+ } else if (activeEffort) {
1617
+ const config = {};
1618
+ if (isGemini3x(options.model)) {
1619
+ config.thinkingLevel = THINKING_LEVELS[activeEffort] ?? "medium";
1620
+ } else {
1621
+ if (activeEffort === "max" && options.model.startsWith("gemini-2.5-pro")) {
1622
+ config.thinkingBudget = 32768;
1623
+ } else {
1624
+ config.thinkingBudget = THINKING_BUDGETS2[activeEffort] ?? 5e3;
1625
+ }
1447
1626
  }
1627
+ if (includeThoughts) config.includeThoughts = true;
1628
+ generationConfig.thinkingConfig = config;
1629
+ if (!body.generationConfig) body.generationConfig = generationConfig;
1630
+ } else if (includeThoughts) {
1631
+ generationConfig.thinkingConfig = { includeThoughts: true };
1632
+ if (!body.generationConfig) body.generationConfig = generationConfig;
1448
1633
  }
1449
1634
  if (options.toolChoice !== void 0) {
1450
1635
  body.toolConfig = { functionCallingConfig: this.mapToolChoice(options.toolChoice) };
1451
1636
  }
1637
+ if (options.providerOptions) {
1638
+ Object.assign(body, options.providerOptions);
1639
+ }
1452
1640
  return body;
1453
1641
  }
1454
1642
  /**
@@ -1474,28 +1662,33 @@ var GeminiProvider = class {
1474
1662
  const result = [];
1475
1663
  for (const msg of messages) {
1476
1664
  if (msg.role === "assistant") {
1477
- const parts = [];
1478
- if (msg.content) {
1479
- parts.push({ text: msg.content });
1480
- }
1481
- if (msg.tool_calls && msg.tool_calls.length > 0) {
1482
- for (const tc of msg.tool_calls) {
1483
- let parsedArgs;
1484
- try {
1485
- parsedArgs = JSON.parse(tc.function.arguments);
1486
- } catch {
1487
- parsedArgs = {};
1488
- }
1489
- parts.push({
1490
- functionCall: {
1491
- name: tc.function.name,
1492
- args: parsedArgs
1665
+ const rawParts = msg.providerMetadata?.geminiParts;
1666
+ if (rawParts && rawParts.length > 0) {
1667
+ result.push({ role: "model", parts: rawParts });
1668
+ } else {
1669
+ const parts = [];
1670
+ if (msg.content) {
1671
+ parts.push({ text: msg.content });
1672
+ }
1673
+ if (msg.tool_calls && msg.tool_calls.length > 0) {
1674
+ for (const tc of msg.tool_calls) {
1675
+ let parsedArgs;
1676
+ try {
1677
+ parsedArgs = JSON.parse(tc.function.arguments);
1678
+ } catch {
1679
+ parsedArgs = {};
1493
1680
  }
1494
- });
1681
+ parts.push({
1682
+ functionCall: {
1683
+ name: tc.function.name,
1684
+ args: parsedArgs
1685
+ }
1686
+ });
1687
+ }
1688
+ }
1689
+ if (parts.length > 0) {
1690
+ result.push({ role: "model", parts });
1495
1691
  }
1496
- }
1497
- if (parts.length > 0) {
1498
- result.push({ role: "model", parts });
1499
1692
  }
1500
1693
  } else if (msg.role === "tool") {
1501
1694
  const functionName = toolCallIdToName.get(msg.tool_call_id) ?? "unknown";
@@ -1572,10 +1765,13 @@ var GeminiProvider = class {
1572
1765
  parseResponse(json, model) {
1573
1766
  const candidate = json.candidates?.[0];
1574
1767
  let content = "";
1768
+ let thinkingContent = "";
1575
1769
  const toolCalls = [];
1576
1770
  if (candidate?.content?.parts) {
1577
1771
  for (const part of candidate.content.parts) {
1578
- if (part.text) {
1772
+ if (part.thought && part.text) {
1773
+ thinkingContent += part.text;
1774
+ } else if (part.text) {
1579
1775
  content += part.text;
1580
1776
  } else if (part.functionCall) {
1581
1777
  toolCalls.push({
@@ -1590,18 +1786,24 @@ var GeminiProvider = class {
1590
1786
  }
1591
1787
  }
1592
1788
  const cachedTokens = json.usageMetadata?.cachedContentTokenCount;
1789
+ const reasoningTokens = json.usageMetadata?.thoughtsTokenCount;
1593
1790
  const usage = json.usageMetadata ? {
1594
1791
  prompt_tokens: json.usageMetadata.promptTokenCount ?? 0,
1595
1792
  completion_tokens: json.usageMetadata.candidatesTokenCount ?? 0,
1596
1793
  total_tokens: json.usageMetadata.totalTokenCount ?? 0,
1597
- cached_tokens: cachedTokens && cachedTokens > 0 ? cachedTokens : void 0
1794
+ cached_tokens: cachedTokens && cachedTokens > 0 ? cachedTokens : void 0,
1795
+ reasoning_tokens: reasoningTokens && reasoningTokens > 0 ? reasoningTokens : void 0
1598
1796
  } : void 0;
1599
1797
  const cost = usage ? estimateGeminiCost(model, usage.prompt_tokens, usage.completion_tokens, usage.cached_tokens) : void 0;
1798
+ const rawParts = candidate?.content?.parts;
1799
+ const providerMetadata = rawParts ? { geminiParts: rawParts } : void 0;
1600
1800
  return {
1601
1801
  content,
1802
+ thinking_content: thinkingContent || void 0,
1602
1803
  tool_calls: toolCalls.length > 0 ? toolCalls : void 0,
1603
1804
  usage,
1604
- cost
1805
+ cost,
1806
+ providerMetadata
1605
1807
  };
1606
1808
  }
1607
1809
  // ---------------------------------------------------------------------------
@@ -1612,6 +1814,7 @@ var GeminiProvider = class {
1612
1814
  const decoder = new TextDecoder();
1613
1815
  let buffer = "";
1614
1816
  let usage;
1817
+ const accumulatedParts = [];
1615
1818
  try {
1616
1819
  while (true) {
1617
1820
  const { done, value } = await reader.read();
@@ -1632,17 +1835,22 @@ var GeminiProvider = class {
1632
1835
  }
1633
1836
  if (chunk.usageMetadata) {
1634
1837
  const cached = chunk.usageMetadata.cachedContentTokenCount;
1838
+ const reasoning = chunk.usageMetadata.thoughtsTokenCount;
1635
1839
  usage = {
1636
1840
  prompt_tokens: chunk.usageMetadata.promptTokenCount ?? 0,
1637
1841
  completion_tokens: chunk.usageMetadata.candidatesTokenCount ?? 0,
1638
1842
  total_tokens: chunk.usageMetadata.totalTokenCount ?? 0,
1639
- cached_tokens: cached && cached > 0 ? cached : void 0
1843
+ cached_tokens: cached && cached > 0 ? cached : void 0,
1844
+ reasoning_tokens: reasoning && reasoning > 0 ? reasoning : void 0
1640
1845
  };
1641
1846
  }
1642
1847
  const candidate = chunk.candidates?.[0];
1643
1848
  if (candidate?.content?.parts) {
1644
1849
  for (const part of candidate.content.parts) {
1645
- if (part.text) {
1850
+ accumulatedParts.push(part);
1851
+ if (part.thought && part.text) {
1852
+ yield { type: "thinking_delta", content: part.text };
1853
+ } else if (part.text) {
1646
1854
  yield { type: "text_delta", content: part.text };
1647
1855
  } else if (part.functionCall) {
1648
1856
  yield {
@@ -1656,7 +1864,8 @@ var GeminiProvider = class {
1656
1864
  }
1657
1865
  }
1658
1866
  }
1659
- yield { type: "done", usage };
1867
+ const providerMetadata = accumulatedParts.length > 0 ? { geminiParts: accumulatedParts } : void 0;
1868
+ yield { type: "done", usage, providerMetadata };
1660
1869
  } finally {
1661
1870
  reader.releaseLock();
1662
1871
  }
@@ -2029,7 +2238,7 @@ function estimateMessagesTokens(messages) {
2029
2238
  }
2030
2239
  return total;
2031
2240
  }
2032
- var WorkflowContext = class {
2241
+ var WorkflowContext = class _WorkflowContext {
2033
2242
  input;
2034
2243
  executionId;
2035
2244
  metadata;
@@ -2082,6 +2291,37 @@ var WorkflowContext = class {
2082
2291
  this.summaryCache = init.metadata.summaryCache;
2083
2292
  }
2084
2293
  }
2294
+ /**
2295
+ * Create a child context for nested agent invocations (e.g., agent-as-tool).
2296
+ * Shares: budget tracking, abort signals, trace emission, provider registry,
2297
+ * state store, span manager, memory manager, MCP manager, config,
2298
+ * awaitHuman handler, pending decisions, tool overrides.
2299
+ * Isolates: session history, step counter, streaming callbacks (onToken, onAgentStart, onToolCall).
2300
+ */
2301
+ createChildContext() {
2302
+ return new _WorkflowContext({
2303
+ input: this.input,
2304
+ executionId: this.executionId,
2305
+ config: this.config,
2306
+ providerRegistry: this.providerRegistry,
2307
+ metadata: { ...this.metadata },
2308
+ // Shared infrastructure
2309
+ budgetContext: this.budgetContext,
2310
+ stateStore: this.stateStore,
2311
+ mcpManager: this.mcpManager,
2312
+ spanManager: this.spanManager,
2313
+ memoryManager: this.memoryManager,
2314
+ onTrace: this.onTrace,
2315
+ onAgentCallComplete: this.onAgentCallComplete,
2316
+ awaitHumanHandler: this.awaitHumanHandler,
2317
+ pendingDecisions: this.pendingDecisions,
2318
+ toolOverrides: this.toolOverrides,
2319
+ signal: this.signal,
2320
+ workflowName: this.workflowName
2321
+ // Isolated: sessionHistory (empty), stepCounter (0),
2322
+ // onToken (null), onAgentStart (null), onToolCall (null)
2323
+ });
2324
+ }
2085
2325
  /**
2086
2326
  * Resolve the current abort signal.
2087
2327
  * Branch-scoped signals (from race/spawn/map/budget) in AsyncLocalStorage
@@ -2144,10 +2384,12 @@ var WorkflowContext = class {
2144
2384
  promptVersion: agent2._config.version,
2145
2385
  temperature: options?.temperature ?? agent2._config.temperature,
2146
2386
  maxTokens: options?.maxTokens ?? agent2._config.maxTokens ?? 4096,
2147
- thinking: options?.thinking ?? agent2._config.thinking,
2148
- reasoningEffort: options?.reasoningEffort ?? agent2._config.reasoningEffort,
2387
+ effort: options?.effort ?? agent2._config.effort,
2388
+ thinkingBudget: options?.thinkingBudget ?? agent2._config.thinkingBudget,
2389
+ includeThoughts: options?.includeThoughts ?? agent2._config.includeThoughts,
2149
2390
  toolChoice: options?.toolChoice ?? agent2._config.toolChoice,
2150
- stop: options?.stop ?? agent2._config.stop
2391
+ stop: options?.stop ?? agent2._config.stop,
2392
+ providerOptions: options?.providerOptions ?? agent2._config.providerOptions
2151
2393
  });
2152
2394
  return result;
2153
2395
  });
@@ -2170,7 +2412,21 @@ var WorkflowContext = class {
2170
2412
  const modelUri = agent2.resolveModel(resolveCtx);
2171
2413
  const systemPrompt = agent2.resolveSystem(resolveCtx);
2172
2414
  const { provider, model } = this.providerRegistry.resolve(modelUri, this.config);
2173
- const toolDefs = this.buildToolDefs(agent2);
2415
+ let resolvedHandoffs;
2416
+ if (typeof agent2._config.handoffs === "function") {
2417
+ try {
2418
+ resolvedHandoffs = agent2._config.handoffs(resolveCtx);
2419
+ } catch (err) {
2420
+ this.log("handoff_resolve_error", {
2421
+ agent: agent2._name,
2422
+ error: err instanceof Error ? err.message : String(err)
2423
+ });
2424
+ resolvedHandoffs = void 0;
2425
+ }
2426
+ } else {
2427
+ resolvedHandoffs = agent2._config.handoffs;
2428
+ }
2429
+ const toolDefs = this.buildToolDefs(agent2, resolvedHandoffs);
2174
2430
  const messages = [];
2175
2431
  if (systemPrompt) {
2176
2432
  messages.push({ role: "system", content: systemPrompt });
@@ -2272,21 +2528,17 @@ Please fix and try again.`;
2272
2528
  throw new TimeoutError("ctx.ask()", timeoutMs);
2273
2529
  }
2274
2530
  turns++;
2275
- const thinking = options?.thinking ?? agent2._config.thinking;
2276
- if (thinking && typeof thinking === "object" && thinking.budgetTokens <= 0) {
2277
- throw new Error(
2278
- `thinking.budgetTokens must be a positive number, got ${thinking.budgetTokens}`
2279
- );
2280
- }
2281
2531
  const chatOptions = {
2282
2532
  model,
2283
2533
  temperature: options?.temperature ?? agent2._config.temperature,
2284
2534
  tools: toolDefs.length > 0 ? toolDefs : void 0,
2285
2535
  maxTokens: options?.maxTokens ?? agent2._config.maxTokens ?? 4096,
2286
- thinking,
2287
- reasoningEffort: options?.reasoningEffort ?? agent2._config.reasoningEffort,
2536
+ effort: options?.effort ?? agent2._config.effort,
2537
+ thinkingBudget: options?.thinkingBudget ?? agent2._config.thinkingBudget,
2538
+ includeThoughts: options?.includeThoughts ?? agent2._config.includeThoughts,
2288
2539
  toolChoice: options?.toolChoice ?? agent2._config.toolChoice,
2289
2540
  stop: options?.stop ?? agent2._config.stop,
2541
+ providerOptions: options?.providerOptions ?? agent2._config.providerOptions,
2290
2542
  signal: this.currentSignal
2291
2543
  };
2292
2544
  if (options?.schema && toolDefs.length === 0) {
@@ -2298,10 +2550,14 @@ Please fix and try again.`;
2298
2550
  let content2 = "";
2299
2551
  const toolCalls = [];
2300
2552
  const toolCallBuffers = /* @__PURE__ */ new Map();
2553
+ let streamProviderMetadata;
2554
+ let thinkingContent = "";
2301
2555
  for await (const chunk of provider.stream(currentMessages, chatOptions)) {
2302
2556
  if (chunk.type === "text_delta") {
2303
2557
  content2 += chunk.content;
2304
2558
  this.onToken(chunk.content);
2559
+ } else if (chunk.type === "thinking_delta") {
2560
+ thinkingContent += chunk.content;
2305
2561
  } else if (chunk.type === "tool_call_delta") {
2306
2562
  let buffer = toolCallBuffers.get(chunk.id);
2307
2563
  if (!buffer) {
@@ -2311,6 +2567,7 @@ Please fix and try again.`;
2311
2567
  if (chunk.name) buffer.name = chunk.name;
2312
2568
  if (chunk.arguments) buffer.arguments += chunk.arguments;
2313
2569
  } else if (chunk.type === "done") {
2570
+ streamProviderMetadata = chunk.providerMetadata;
2314
2571
  if (chunk.usage) {
2315
2572
  response = {
2316
2573
  content: content2,
@@ -2337,6 +2594,12 @@ Please fix and try again.`;
2337
2594
  if (toolCalls.length > 0) {
2338
2595
  response.tool_calls = toolCalls;
2339
2596
  }
2597
+ if (streamProviderMetadata) {
2598
+ response.providerMetadata = streamProviderMetadata;
2599
+ }
2600
+ if (thinkingContent) {
2601
+ response.thinking_content = thinkingContent;
2602
+ }
2340
2603
  } else {
2341
2604
  response = await provider.chat(currentMessages, chatOptions);
2342
2605
  }
@@ -2367,13 +2630,14 @@ Please fix and try again.`;
2367
2630
  currentMessages.push({
2368
2631
  role: "assistant",
2369
2632
  content: response.content || "",
2370
- tool_calls: response.tool_calls
2633
+ tool_calls: response.tool_calls,
2634
+ ...response.providerMetadata ? { providerMetadata: response.providerMetadata } : {}
2371
2635
  });
2372
2636
  for (const toolCall of response.tool_calls) {
2373
2637
  const toolName = toolCall.function.name;
2374
2638
  if (toolName.startsWith("handoff_to_")) {
2375
2639
  const targetName = toolName.replace("handoff_to_", "");
2376
- const descriptor = agent2._config.handoffs?.find((h) => h.agent._name === targetName);
2640
+ const descriptor = resolvedHandoffs?.find((h) => h.agent._name === targetName);
2377
2641
  if (descriptor) {
2378
2642
  const mode = descriptor.mode ?? "oneway";
2379
2643
  let handoffPrompt = prompt;
@@ -2626,8 +2890,9 @@ Please fix and try again.`;
2626
2890
  resultContent2 = JSON.stringify(toolResult2);
2627
2891
  }
2628
2892
  } else if (tool2) {
2893
+ const childCtx = this.createChildContext();
2629
2894
  try {
2630
- toolResult2 = await tool2._execute(toolArgs);
2895
+ toolResult2 = await tool2._execute(toolArgs, childCtx);
2631
2896
  } catch (err) {
2632
2897
  toolResult2 = { error: err instanceof Error ? err.message : String(err) };
2633
2898
  }
@@ -2707,7 +2972,8 @@ Please fix and try again.`;
2707
2972
  guardrailOutputRetries++;
2708
2973
  currentMessages.push({
2709
2974
  role: "assistant",
2710
- content
2975
+ content,
2976
+ ...response.providerMetadata ? { providerMetadata: response.providerMetadata } : {}
2711
2977
  });
2712
2978
  currentMessages.push({
2713
2979
  role: "system",
@@ -2728,6 +2994,7 @@ Please fix and try again.`;
2728
2994
  try {
2729
2995
  const parsed = JSON.parse(stripMarkdownFences(content));
2730
2996
  const validated = options.schema.parse(parsed);
2997
+ this.pushAssistantToSessionHistory(content, response.providerMetadata);
2731
2998
  return validated;
2732
2999
  } catch (err) {
2733
3000
  const maxRetries = options.retries ?? 3;
@@ -2754,11 +3021,23 @@ Please fix and try again.`;
2754
3021
  throw new VerifyError(content, zodErr, maxRetries);
2755
3022
  }
2756
3023
  }
3024
+ this.pushAssistantToSessionHistory(content, response.providerMetadata);
2757
3025
  return content;
2758
3026
  }
2759
3027
  throw new MaxTurnsError("ctx.ask()", maxTurns);
2760
3028
  }
2761
- buildToolDefs(agent2) {
3029
+ /**
3030
+ * Push the final assistant message into session history, preserving providerMetadata
3031
+ * (e.g., Gemini thought signatures needed for multi-turn reasoning context).
3032
+ */
3033
+ pushAssistantToSessionHistory(content, providerMetadata) {
3034
+ this.sessionHistory.push({
3035
+ role: "assistant",
3036
+ content,
3037
+ ...providerMetadata ? { providerMetadata } : {}
3038
+ });
3039
+ }
3040
+ buildToolDefs(agent2, resolvedHandoffs) {
2762
3041
  const defs = [];
2763
3042
  if (agent2._config.tools) {
2764
3043
  for (const tool2 of agent2._config.tools) {
@@ -2772,8 +3051,8 @@ Please fix and try again.`;
2772
3051
  });
2773
3052
  }
2774
3053
  }
2775
- if (agent2._config.handoffs) {
2776
- for (const { agent: handoffAgent, description, mode } of agent2._config.handoffs) {
3054
+ if (resolvedHandoffs) {
3055
+ for (const { agent: handoffAgent, description, mode } of resolvedHandoffs) {
2777
3056
  const isRoundtrip = mode === "roundtrip";
2778
3057
  const defaultDesc = isRoundtrip ? `Delegate a task to ${handoffAgent._name} and receive the result back` : `Hand off the conversation to ${handoffAgent._name}`;
2779
3058
  defs.push({
@@ -3462,6 +3741,79 @@ ${summaryResponse.content}`
3462
3741
  const sessionId = this.metadata?.sessionId;
3463
3742
  await this.memoryManager.forget(key, this.stateStore, sessionId, options);
3464
3743
  }
3744
+ // ── ctx.delegate() ──────────────────────────────────────────────────
3745
+ /**
3746
+ * Select the best agent from a list of candidates and invoke it.
3747
+ * Creates a temporary router agent that uses handoffs to pick the right specialist.
3748
+ *
3749
+ * This is convenience sugar over creating a router agent with dynamic handoffs.
3750
+ * For full control over the router's behavior, create the router agent explicitly.
3751
+ *
3752
+ * @param agents - Candidate agents to choose from (at least 1)
3753
+ * @param prompt - The prompt to send to the selected agent
3754
+ * @param options - Optional: schema, routerModel, metadata, retries
3755
+ */
3756
+ async delegate(agents, prompt, options) {
3757
+ if (agents.length === 0) {
3758
+ throw new Error("ctx.delegate() requires at least one candidate agent");
3759
+ }
3760
+ const names = /* @__PURE__ */ new Set();
3761
+ for (const a of agents) {
3762
+ if (names.has(a._name)) {
3763
+ throw new Error(
3764
+ `ctx.delegate() received duplicate agent name '${a._name}'. All candidate agents must have unique names.`
3765
+ );
3766
+ }
3767
+ names.add(a._name);
3768
+ }
3769
+ if (agents.length === 1) {
3770
+ return this.ask(agents[0], prompt, {
3771
+ schema: options?.schema,
3772
+ retries: options?.retries,
3773
+ metadata: options?.metadata
3774
+ });
3775
+ }
3776
+ const resolveCtx = options?.metadata ? { metadata: { ...this.metadata, ...options.metadata } } : { metadata: this.metadata };
3777
+ const routerModelUri = options?.routerModel ?? agents[0].resolveModel(resolveCtx);
3778
+ const handoffs = agents.map((a) => {
3779
+ let description;
3780
+ try {
3781
+ description = a.resolveSystem(resolveCtx).slice(0, 200);
3782
+ } catch {
3783
+ description = `Agent: ${a._name}`;
3784
+ }
3785
+ return { agent: a, description };
3786
+ });
3787
+ const routerSystem = "Route to the best agent for this task. Always hand off; never answer directly.";
3788
+ const routerAgent = {
3789
+ _config: {
3790
+ model: routerModelUri,
3791
+ system: routerSystem,
3792
+ temperature: 0,
3793
+ handoffs,
3794
+ maxTurns: 2
3795
+ },
3796
+ _name: "_delegate_router",
3797
+ ask: async () => {
3798
+ throw new Error("Direct invocation not supported on delegate router");
3799
+ },
3800
+ resolveModel: () => routerModelUri,
3801
+ resolveSystem: () => routerSystem
3802
+ };
3803
+ this.emitTrace({
3804
+ type: "delegate",
3805
+ agent: "_delegate_router",
3806
+ data: {
3807
+ candidates: agents.map((a) => a._name),
3808
+ routerModel: routerModelUri
3809
+ }
3810
+ });
3811
+ return this.ask(routerAgent, prompt, {
3812
+ schema: options?.schema,
3813
+ retries: options?.retries,
3814
+ metadata: options?.metadata
3815
+ });
3816
+ }
3465
3817
  // ── Private ───────────────────────────────────────────────────────────
3466
3818
  emitTrace(partial) {
3467
3819
  let data = partial.data;
@@ -4071,11 +4423,13 @@ var Session = class _Session {
4071
4423
  ...cachedSummary ? { summaryCache: cachedSummary } : {}
4072
4424
  }
4073
4425
  });
4074
- const assistantMessage = {
4075
- role: "assistant",
4076
- content: typeof result === "string" ? result : JSON.stringify(result)
4077
- };
4078
- history.push(assistantMessage);
4426
+ const lastMsg = history[history.length - 1];
4427
+ if (!(lastMsg && lastMsg.role === "assistant")) {
4428
+ history.push({
4429
+ role: "assistant",
4430
+ content: typeof result === "string" ? result : JSON.stringify(result)
4431
+ });
4432
+ }
4079
4433
  if (this.options.persist !== false) {
4080
4434
  await this.store.saveSession(this.sessionId, history);
4081
4435
  }
@@ -4118,10 +4472,13 @@ var Session = class _Session {
4118
4472
  }
4119
4473
  });
4120
4474
  const updateHistory = async (result) => {
4121
- history.push({
4122
- role: "assistant",
4123
- content: typeof result === "string" ? result : JSON.stringify(result)
4124
- });
4475
+ const lastMsg = history[history.length - 1];
4476
+ if (!(lastMsg && lastMsg.role === "assistant")) {
4477
+ history.push({
4478
+ role: "assistant",
4479
+ content: typeof result === "string" ? result : JSON.stringify(result)
4480
+ });
4481
+ }
4125
4482
  if (this.options.persist !== false) {
4126
4483
  await this.store.saveSession(this.sessionId, history);
4127
4484
  }
@@ -5011,6 +5368,24 @@ var AxlRuntime = class extends import_node_events2.EventEmitter {
5011
5368
  getExecutions() {
5012
5369
  return [...this.executions.values()];
5013
5370
  }
5371
+ /**
5372
+ * Create a lightweight WorkflowContext for ad-hoc use (tool testing, prototyping).
5373
+ * The context has access to the runtime's providers, state store, and MCP manager
5374
+ * but no session history, streaming callbacks, or budget tracking.
5375
+ */
5376
+ createContext(options) {
5377
+ return new WorkflowContext({
5378
+ input: void 0,
5379
+ executionId: (0, import_node_crypto2.randomUUID)(),
5380
+ metadata: options?.metadata,
5381
+ config: this.config,
5382
+ providerRegistry: this.providerRegistry,
5383
+ stateStore: this.stateStore,
5384
+ mcpManager: this.mcpManager,
5385
+ spanManager: this.spanManager,
5386
+ memoryManager: this.memoryManager
5387
+ });
5388
+ }
5014
5389
  /** Register a custom provider instance. */
5015
5390
  registerProvider(name, provider) {
5016
5391
  this.providerRegistry.registerInstance(name, provider);
@@ -5709,6 +6084,7 @@ function cosineSimilarity2(a, b) {
5709
6084
  agent,
5710
6085
  createSpanManager,
5711
6086
  defineConfig,
6087
+ resolveThinkingOptions,
5712
6088
  tool,
5713
6089
  workflow,
5714
6090
  zodToJsonSchema