@axlsdk/axl 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -43,6 +43,7 @@ const researcher = agent({
43
43
  model: 'openai:gpt-4o',
44
44
  system: 'You are a research assistant.',
45
45
  tools: [calculator],
46
+ thinking: 'high',
46
47
  maxTurns: 10,
47
48
  timeout: '30s',
48
49
  temperature: 0.7,
@@ -61,6 +62,31 @@ const dynamicAgent = agent({
61
62
  });
62
63
  ```
63
64
 
65
+ #### Thinking (cross-provider reasoning control)
66
+
67
+ The `thinking` parameter provides a unified way to control reasoning depth across all providers:
68
+
69
+ ```typescript
70
+ // Simple levels — works on any provider
71
+ const reasoner = agent({
72
+ model: 'anthropic:claude-sonnet-4-5',
73
+ system: 'You are a careful analyst.',
74
+ thinking: 'high', // 'low' | 'medium' | 'high' | 'max'
75
+ });
76
+
77
+ // Explicit budget (in tokens)
78
+ const budgetReasoner = agent({
79
+ model: 'google:gemini-2.5-flash',
80
+ system: 'Think step by step.',
81
+ thinking: { budgetTokens: 5000 },
82
+ });
83
+
84
+ // Per-call override
85
+ const result = await reasoner.ask('Analyze this data', { thinking: 'low' });
86
+ ```
87
+
88
+ Each provider maps `thinking` to its native API: `reasoning_effort` (OpenAI), `budget_tokens` (Anthropic), `thinkingBudget` (Gemini). See [docs/providers.md](../../docs/providers.md) for the full mapping table.
89
+
64
90
  ### `workflow(config)`
65
91
 
66
92
  Define a named workflow with typed input/output:
@@ -108,16 +134,16 @@ const history = await session.history();
108
134
 
109
135
  ### Context Primitives
110
136
 
111
- All available on `ctx` inside workflow handlers:
137
+ All available on `ctx` inside workflow handlers. See the [API Reference](../../docs/api-reference.md) for complete option types, valid values, and defaults.
112
138
 
113
139
  ```typescript
114
140
  // Invoke an agent
115
141
  const answer = await ctx.ask(agent, 'prompt', { schema, retries });
116
142
 
117
- // Run N concurrent tasks
143
+ // Run 3 agents in parallel — each gets the same question independently
118
144
  const results = await ctx.spawn(3, async (i) => ctx.ask(agent, prompts[i]));
119
145
 
120
- // Consensus vote
146
+ // Pick the answer that appeared most often (pure aggregation, no LLM involved)
121
147
  const winner = ctx.vote(results, { strategy: 'majority', key: 'answer' });
122
148
 
123
149
  // Self-correcting validation
@@ -144,7 +170,7 @@ const [a, b] = await ctx.parallel([
144
170
  () => ctx.ask(agentB, promptB),
145
171
  ]);
146
172
 
147
- // Map with bounded concurrency
173
+ // Map with bounded concurrency — resolve when 3 of N succeed, cancel the rest
148
174
  const mapped = await ctx.map(items, async (item) => ctx.ask(agent, item), {
149
175
  concurrency: 5,
150
176
  quorum: 3,
@@ -166,21 +192,27 @@ Automatic span emission for every `ctx.*` primitive with cost-per-span attributi
166
192
 
167
193
  ```typescript
168
194
  import { defineConfig, AxlRuntime } from '@axlsdk/axl';
195
+ import { BasicTracerProvider, SimpleSpanProcessor } from '@opentelemetry/sdk-trace-base';
169
196
  import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-http';
170
197
 
198
+ const tracerProvider = new BasicTracerProvider();
199
+ tracerProvider.addSpanProcessor(new SimpleSpanProcessor(
200
+ new OTLPTraceExporter({ url: 'http://localhost:4318/v1/traces' }),
201
+ ));
202
+
171
203
  const config = defineConfig({
172
204
  telemetry: {
173
205
  enabled: true,
174
206
  serviceName: 'my-app',
175
- exporter: new OTLPTraceExporter({ url: 'http://localhost:4318/v1/traces' }),
207
+ tracerProvider,
176
208
  },
177
209
  });
178
210
 
179
211
  const runtime = new AxlRuntime(config);
180
- runtime.initializeTelemetry();
212
+ await runtime.initializeTelemetry();
181
213
  ```
182
214
 
183
- **Span model:** `axl.workflow.execute` > `axl.agent.ask` > `axl.tool.call`. Also: `axl.ctx.spawn`, `axl.ctx.race`, `axl.ctx.vote`, `axl.ctx.budget`, `axl.ctx.checkpoint`, `axl.ctx.awaitHuman`. Each span includes relevant attributes (cost, duration, token counts, etc.).
215
+ **Span model:** `axl.workflow.execute` > `axl.agent.ask` > `axl.tool.call`. Also: `axl.ctx.spawn`, `axl.ctx.race`, `axl.ctx.vote`, `axl.ctx.budget`, `axl.ctx.awaitHuman`. Each span includes relevant attributes (cost, duration, token counts, etc.).
184
216
 
185
217
  When disabled (default), `NoopSpanManager` provides zero overhead.
186
218
 
@@ -210,7 +242,7 @@ import { AxlRuntime, InMemoryVectorStore, OpenAIEmbedder } from '@axlsdk/axl';
210
242
 
211
243
  const runtime = new AxlRuntime({
212
244
  memory: {
213
- vector: new InMemoryVectorStore(),
245
+ vectorStore: new InMemoryVectorStore(),
214
246
  embedder: new OpenAIEmbedder({ model: 'text-embedding-3-small' }),
215
247
  },
216
248
  });
@@ -226,9 +258,13 @@ Vector store implementations: `InMemoryVectorStore` (testing), `SqliteVectorStor
226
258
 
227
259
  ### Agent Guardrails
228
260
 
229
- Input and output validation at the agent boundary:
261
+ Input and output validation at the agent boundary. You define your own validation logic — Axl calls it before and after each LLM turn:
230
262
 
231
263
  ```typescript
264
+ // Your validation functions — Axl doesn't ship these, you bring your own
265
+ const containsPII = (text: string) => /\b\d{3}-\d{2}-\d{4}\b/.test(text);
266
+ const isOffTopic = (text: string) => !text.toLowerCase().includes('support');
267
+
232
268
  const safe = agent({
233
269
  model: 'openai:gpt-4o',
234
270
  system: 'You are a helpful assistant.',
@@ -254,10 +290,11 @@ When `onBlock` is `'retry'`, the LLM sees the block reason and self-corrects (sa
254
290
  ```typescript
255
291
  const session = runtime.session('user-123', {
256
292
  history: {
257
- maxMessages: 100, // Trim oldest messages when exceeded
258
- summarize: true, // Auto-summarize trimmed messages
293
+ maxMessages: 100, // Trim oldest messages when exceeded
294
+ summarize: true, // Auto-summarize trimmed messages
295
+ summaryModel: 'openai:gpt-4o-mini', // Model for summarization
259
296
  },
260
- persist: true, // Save to StateStore (default: true)
297
+ persist: true, // Save to StateStore (default: true)
261
298
  });
262
299
  ```
263
300
 
@@ -267,6 +304,7 @@ const session = runtime.session('user-123', {
267
304
  |--------|------|---------|-------------|
268
305
  | `history.maxMessages` | `number` | unlimited | Max messages to retain |
269
306
  | `history.summarize` | `boolean` | `false` | Summarize trimmed messages |
307
+ | `history.summaryModel` | `string` | — | Model URI for summarization (required when `summarize: true`) |
270
308
  | `persist` | `boolean` | `true` | Persist history to StateStore |
271
309
 
272
310
  ### Error Hierarchy
@@ -306,58 +344,17 @@ const runtime = new AxlRuntime({
306
344
 
307
345
  ### Provider URIs
308
346
 
309
- Four built-in providers are supported:
347
+ Four built-in providers using the `provider:model` URI scheme:
310
348
 
311
349
  ```
312
- # OpenAI Chat Completions API
313
- openai:gpt-4o # Flagship multimodal
314
- openai:gpt-4o-mini # Fast and affordable
315
- openai:gpt-4.1 # GPT-4.1
316
- openai:gpt-4.1-mini # GPT-4.1 small
317
- openai:gpt-4.1-nano # GPT-4.1 cheapest
318
- openai:gpt-5 # GPT-5
319
- openai:gpt-5-mini # GPT-5 small
320
- openai:gpt-5-nano # GPT-5 cheapest
321
- openai:gpt-5.1 # GPT-5.1
322
- openai:gpt-5.2 # GPT-5.2
323
- openai:o1 # Reasoning
324
- openai:o1-mini # Reasoning (small)
325
- openai:o1-pro # Reasoning (pro)
326
- openai:o3 # Reasoning
327
- openai:o3-mini # Reasoning (small)
328
- openai:o3-pro # Reasoning (pro)
329
- openai:o4-mini # Reasoning (small)
330
- openai:gpt-4-turbo # Legacy
331
- openai:gpt-4 # Legacy
332
- openai:gpt-3.5-turbo # Legacy
333
-
334
- # OpenAI — Responses API (same models, better caching, native reasoning)
335
- openai-responses:gpt-4o
336
- openai-responses:o3
337
-
338
- # Anthropic
339
- anthropic:claude-opus-4-6 # Most capable
340
- anthropic:claude-sonnet-4-5 # Balanced
341
- anthropic:claude-haiku-4-5 # Fast and affordable
342
- anthropic:claude-sonnet-4 # Previous gen
343
- anthropic:claude-opus-4 # Previous gen
344
- anthropic:claude-3-7-sonnet # Legacy
345
- anthropic:claude-3-5-sonnet # Legacy
346
- anthropic:claude-3-5-haiku # Legacy
347
- anthropic:claude-3-opus # Legacy
348
- anthropic:claude-3-sonnet # Legacy
349
- anthropic:claude-3-haiku # Legacy
350
-
351
- # Google Gemini
352
- google:gemini-2.5-pro # Most capable
353
- google:gemini-2.5-flash # Fast
354
- google:gemini-2.5-flash-lite # Cheapest 2.5
355
- google:gemini-2.0-flash # Previous gen
356
- google:gemini-2.0-flash-lite # Previous gen (lite)
357
- google:gemini-3-pro-preview # Next gen (preview)
358
- google:gemini-3-flash-preview # Next gen fast (preview)
350
+ openai:gpt-4o # OpenAI Chat Completions
351
+ openai-responses:gpt-4o # OpenAI Responses API
352
+ anthropic:claude-sonnet-4-5 # Anthropic
353
+ google:gemini-2.5-pro # Google Gemini
359
354
  ```
360
355
 
356
+ See [docs/providers.md](../../docs/providers.md) for the full model list including reasoning models.
357
+
361
358
  ## License
362
359
 
363
360
  [Apache 2.0](../../LICENSE)
package/dist/index.cjs CHANGED
@@ -331,6 +331,24 @@ function estimateOpenAICost(model, promptTokens, completionTokens, cachedTokens)
331
331
  function isReasoningModel(model) {
332
332
  return /^(o1|o3|o4-mini)/.test(model);
333
333
  }
334
+ function thinkingToReasoningEffort(thinking) {
335
+ if (typeof thinking === "object") {
336
+ const budget = thinking.budgetTokens;
337
+ if (budget <= 1024) return "low";
338
+ if (budget <= 8192) return "medium";
339
+ return "high";
340
+ }
341
+ switch (thinking) {
342
+ case "low":
343
+ return "low";
344
+ case "medium":
345
+ return "medium";
346
+ case "high":
347
+ return "high";
348
+ case "max":
349
+ return "xhigh";
350
+ }
351
+ }
334
352
  var OpenAIProvider = class {
335
353
  name = "openai";
336
354
  baseUrl;
@@ -433,7 +451,9 @@ var OpenAIProvider = class {
433
451
  if (options.stop) body.stop = options.stop;
434
452
  if (options.tools && options.tools.length > 0) {
435
453
  body.tools = options.tools;
436
- body.parallel_tool_calls = true;
454
+ if (!reasoning) {
455
+ body.parallel_tool_calls = true;
456
+ }
437
457
  }
438
458
  if (options.toolChoice !== void 0) {
439
459
  body.tool_choice = options.toolChoice;
@@ -441,8 +461,11 @@ var OpenAIProvider = class {
441
461
  if (options.responseFormat) {
442
462
  body.response_format = options.responseFormat;
443
463
  }
444
- if (options.reasoningEffort) {
445
- body.reasoning_effort = options.reasoningEffort;
464
+ if (reasoning) {
465
+ const effort = options.thinking ? thinkingToReasoningEffort(options.thinking) : options.reasoningEffort;
466
+ if (effort) {
467
+ body.reasoning_effort = effort;
468
+ }
446
469
  }
447
470
  if (stream) {
448
471
  body.stream_options = { include_usage: true };
@@ -633,8 +656,11 @@ var OpenAIResponsesProvider = class {
633
656
  body.tool_choice = options.toolChoice;
634
657
  }
635
658
  }
636
- if (options.reasoningEffort) {
637
- body.reasoning = { effort: options.reasoningEffort };
659
+ if (reasoning) {
660
+ const effort = options.thinking ? thinkingToReasoningEffort(options.thinking) : options.reasoningEffort;
661
+ if (effort) {
662
+ body.reasoning = { effort };
663
+ }
638
664
  }
639
665
  if (options.responseFormat) {
640
666
  body.text = { format: this.mapResponseFormat(options.responseFormat) };
@@ -868,6 +894,24 @@ function estimateAnthropicCost(model, inputTokens, outputTokens, cacheReadTokens
868
894
  const inputCost = (inputTokens - cacheRead - cacheWrite) * inputRate + cacheRead * inputRate * 0.1 + cacheWrite * inputRate * 1.25;
869
895
  return inputCost + outputTokens * outputRate;
870
896
  }
897
+ var THINKING_BUDGETS = {
898
+ low: 1024,
899
+ medium: 5e3,
900
+ high: 1e4,
901
+ // 30000 (not 32000) to stay under the 32K max_tokens limit on Opus 4/4.1.
902
+ // With auto-bump (+1024), max_tokens becomes 31024 which fits all models.
903
+ max: 3e4
904
+ };
905
+ function thinkingToBudgetTokens(thinking) {
906
+ if (typeof thinking === "string") return THINKING_BUDGETS[thinking] ?? 5e3;
907
+ return thinking.budgetTokens;
908
+ }
909
+ function supportsAdaptiveThinking(model) {
910
+ return model.startsWith("claude-opus-4-6") || model.startsWith("claude-sonnet-4-6");
911
+ }
912
+ function supportsMaxEffort(model) {
913
+ return model.startsWith("claude-opus-4-6");
914
+ }
871
915
  var AnthropicProvider = class {
872
916
  name = "anthropic";
873
917
  baseUrl;
@@ -957,7 +1001,7 @@ var AnthropicProvider = class {
957
1001
  if (systemText) {
958
1002
  body.system = systemText;
959
1003
  }
960
- if (options.temperature !== void 0) {
1004
+ if (options.temperature !== void 0 && !options.thinking) {
961
1005
  body.temperature = options.temperature;
962
1006
  }
963
1007
  if (options.stop) {
@@ -966,6 +1010,23 @@ var AnthropicProvider = class {
966
1010
  if (options.tools && options.tools.length > 0) {
967
1011
  body.tools = options.tools.map((t) => this.mapToolDefinition(t));
968
1012
  }
1013
+ if (options.toolChoice !== void 0) {
1014
+ body.tool_choice = this.mapToolChoice(options.toolChoice);
1015
+ }
1016
+ if (options.thinking) {
1017
+ if (typeof options.thinking === "string" && supportsAdaptiveThinking(options.model) && // 'max' effort is only supported on Opus 4.6; Sonnet 4.6 falls back to manual mode
1018
+ (options.thinking !== "max" || supportsMaxEffort(options.model))) {
1019
+ body.thinking = { type: "adaptive" };
1020
+ body.output_config = { effort: options.thinking };
1021
+ } else {
1022
+ const budgetTokens = thinkingToBudgetTokens(options.thinking);
1023
+ body.thinking = { type: "enabled", budget_tokens: budgetTokens };
1024
+ const currentMax = body.max_tokens;
1025
+ if (currentMax < budgetTokens + 1024) {
1026
+ body.max_tokens = budgetTokens + 1024;
1027
+ }
1028
+ }
1029
+ }
969
1030
  if (options.responseFormat && options.responseFormat.type !== "text") {
970
1031
  const jsonInstruction = "You must respond with valid JSON only. No markdown fences, no extra text.";
971
1032
  body.system = body.system ? `${body.system}
@@ -1061,6 +1122,22 @@ ${jsonInstruction}` : jsonInstruction;
1061
1122
  input_schema: tool2.function.parameters
1062
1123
  };
1063
1124
  }
1125
+ /**
1126
+ * Map Axl's ToolChoice to Anthropic's tool_choice format.
1127
+ *
1128
+ * Axl (OpenAI format) → Anthropic format
1129
+ * 'auto' → { type: 'auto' }
1130
+ * 'none' → { type: 'none' }
1131
+ * 'required' → { type: 'any' }
1132
+ * { type:'function', function: { name } } → { type: 'tool', name }
1133
+ */
1134
+ mapToolChoice(choice) {
1135
+ if (typeof choice === "string") {
1136
+ if (choice === "required") return { type: "any" };
1137
+ return { type: choice };
1138
+ }
1139
+ return { type: "tool", name: choice.function.name };
1140
+ }
1064
1141
  // ---------------------------------------------------------------------------
1065
1142
  // Internal: response parsing
1066
1143
  // ---------------------------------------------------------------------------
@@ -1238,6 +1315,16 @@ function estimateGeminiCost(model, inputTokens, outputTokens, cachedTokens) {
1238
1315
  const inputCost = (inputTokens - cached) * inputRate + cached * inputRate * 0.1;
1239
1316
  return inputCost + outputTokens * outputRate;
1240
1317
  }
1318
+ var THINKING_BUDGETS2 = {
1319
+ low: 1024,
1320
+ medium: 5e3,
1321
+ high: 1e4,
1322
+ max: 24576
1323
+ };
1324
+ function thinkingToBudgetTokens2(thinking) {
1325
+ if (typeof thinking === "string") return THINKING_BUDGETS2[thinking] ?? 5e3;
1326
+ return thinking.budgetTokens;
1327
+ }
1241
1328
  var GeminiProvider = class {
1242
1329
  name = "google";
1243
1330
  baseUrl;
@@ -1351,6 +1438,17 @@ var GeminiProvider = class {
1351
1438
  if (Object.keys(generationConfig).length > 0) {
1352
1439
  body.generationConfig = generationConfig;
1353
1440
  }
1441
+ if (options.thinking) {
1442
+ generationConfig.thinkingConfig = {
1443
+ thinkingBudget: thinkingToBudgetTokens2(options.thinking)
1444
+ };
1445
+ if (!body.generationConfig) {
1446
+ body.generationConfig = generationConfig;
1447
+ }
1448
+ }
1449
+ if (options.toolChoice !== void 0) {
1450
+ body.toolConfig = { functionCallingConfig: this.mapToolChoice(options.toolChoice) };
1451
+ }
1354
1452
  return body;
1355
1453
  }
1356
1454
  /**
@@ -1442,6 +1540,25 @@ var GeminiProvider = class {
1442
1540
  }
1443
1541
  return merged;
1444
1542
  }
1543
+ /**
1544
+ * Map Axl's ToolChoice to Gemini's functionCallingConfig format.
1545
+ *
1546
+ * - 'auto' → { mode: 'AUTO' }
1547
+ * - 'none' → { mode: 'NONE' }
1548
+ * - 'required' → { mode: 'ANY' }
1549
+ * - { type: 'function', function: { name } } → { mode: 'ANY', allowedFunctionNames: [name] }
1550
+ */
1551
+ mapToolChoice(choice) {
1552
+ if (typeof choice === "string") {
1553
+ const modeMap = {
1554
+ auto: "AUTO",
1555
+ none: "NONE",
1556
+ required: "ANY"
1557
+ };
1558
+ return { mode: modeMap[choice] ?? "AUTO" };
1559
+ }
1560
+ return { mode: "ANY", allowedFunctionNames: [choice.function.name] };
1561
+ }
1445
1562
  mapToolDefinition(tool2) {
1446
1563
  return {
1447
1564
  name: tool2.function.name,
@@ -1890,6 +2007,15 @@ function zodToJsonSchema(schema) {
1890
2007
  function estimateTokens(text) {
1891
2008
  return Math.ceil(text.length / 4);
1892
2009
  }
2010
+ function stripMarkdownFences(text) {
2011
+ const trimmed = text.trim();
2012
+ if (trimmed.startsWith("```")) {
2013
+ const withoutOpening = trimmed.replace(/^```\w*\s*\n?/, "");
2014
+ const withoutClosing = withoutOpening.replace(/\n?```\s*$/, "");
2015
+ return withoutClosing.trim();
2016
+ }
2017
+ return trimmed;
2018
+ }
1893
2019
  function estimateMessagesTokens(messages) {
1894
2020
  let total = 0;
1895
2021
  for (const msg of messages) {
@@ -2015,7 +2141,13 @@ var WorkflowContext = class {
2015
2141
  model: agent2.resolveModel(resolveCtx),
2016
2142
  cost: costAfter - costBefore,
2017
2143
  duration: Date.now() - startTime,
2018
- promptVersion: agent2._config.version
2144
+ promptVersion: agent2._config.version,
2145
+ temperature: options?.temperature ?? agent2._config.temperature,
2146
+ maxTokens: options?.maxTokens ?? agent2._config.maxTokens ?? 4096,
2147
+ thinking: options?.thinking ?? agent2._config.thinking,
2148
+ reasoningEffort: options?.reasoningEffort ?? agent2._config.reasoningEffort,
2149
+ toolChoice: options?.toolChoice ?? agent2._config.toolChoice,
2150
+ stop: options?.stop ?? agent2._config.stop
2019
2151
  });
2020
2152
  return result;
2021
2153
  });
@@ -2140,11 +2272,21 @@ Please fix and try again.`;
2140
2272
  throw new TimeoutError("ctx.ask()", timeoutMs);
2141
2273
  }
2142
2274
  turns++;
2275
+ const thinking = options?.thinking ?? agent2._config.thinking;
2276
+ if (thinking && typeof thinking === "object" && thinking.budgetTokens <= 0) {
2277
+ throw new Error(
2278
+ `thinking.budgetTokens must be a positive number, got ${thinking.budgetTokens}`
2279
+ );
2280
+ }
2143
2281
  const chatOptions = {
2144
2282
  model,
2145
- temperature: agent2._config.temperature,
2283
+ temperature: options?.temperature ?? agent2._config.temperature,
2146
2284
  tools: toolDefs.length > 0 ? toolDefs : void 0,
2147
- maxTokens: 4096,
2285
+ maxTokens: options?.maxTokens ?? agent2._config.maxTokens ?? 4096,
2286
+ thinking,
2287
+ reasoningEffort: options?.reasoningEffort ?? agent2._config.reasoningEffort,
2288
+ toolChoice: options?.toolChoice ?? agent2._config.toolChoice,
2289
+ stop: options?.stop ?? agent2._config.stop,
2148
2290
  signal: this.currentSignal
2149
2291
  };
2150
2292
  if (options?.schema && toolDefs.length === 0) {
@@ -2243,10 +2385,11 @@ Please fix and try again.`;
2243
2385
  }
2244
2386
  }
2245
2387
  const handoffStart = Date.now();
2388
+ const handoffOptions = options ? { schema: options.schema, retries: options.retries, metadata: options.metadata } : void 0;
2246
2389
  const handoffFn = () => this.executeAgentCall(
2247
2390
  descriptor.agent,
2248
2391
  handoffPrompt,
2249
- options,
2392
+ handoffOptions,
2250
2393
  0,
2251
2394
  void 0,
2252
2395
  void 0,
@@ -2583,7 +2726,7 @@ Please fix and try again.`;
2583
2726
  }
2584
2727
  if (options?.schema) {
2585
2728
  try {
2586
- const parsed = JSON.parse(content);
2729
+ const parsed = JSON.parse(stripMarkdownFences(content));
2587
2730
  const validated = options.schema.parse(parsed);
2588
2731
  return validated;
2589
2732
  } catch (err) {