npm - @axlsdk/axl - Versions diffs - 0.3.0 → 0.5.0 - Mend

@axlsdk/axl 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md CHANGED Viewed

@@ -43,6 +43,7 @@ const researcher = agent({
   model: 'openai:gpt-4o',
   system: 'You are a research assistant.',
   tools: [calculator],
+  thinking: 'high',
   maxTurns: 10,
   timeout: '30s',
   temperature: 0.7,
@@ -61,6 +62,31 @@ const dynamicAgent = agent({
 });
 ```
+#### Thinking (cross-provider reasoning control)
+The `thinking` parameter provides a unified way to control reasoning depth across all providers:
+```typescript
+// Simple levels — works on any provider
+const reasoner = agent({
+  model: 'anthropic:claude-sonnet-4-5',
+  system: 'You are a careful analyst.',
+  thinking: 'high',  // 'low' | 'medium' | 'high' | 'max'
+});
+// Explicit budget (in tokens)
+const budgetReasoner = agent({
+  model: 'google:gemini-2.5-flash',
+  system: 'Think step by step.',
+  thinking: { budgetTokens: 5000 },
+});
+// Per-call override
+const result = await reasoner.ask('Analyze this data', { thinking: 'low' });
+```
+Each provider maps `thinking` to its native API: `reasoning_effort` (OpenAI), `budget_tokens` (Anthropic), `thinkingBudget` (Gemini). See [docs/providers.md](../../docs/providers.md) for the full mapping table.
 ### `workflow(config)`
 Define a named workflow with typed input/output:
@@ -108,16 +134,16 @@ const history = await session.history();
 ### Context Primitives
-All available on `ctx` inside workflow handlers:
+All available on `ctx` inside workflow handlers. See the [API Reference](../../docs/api-reference.md) for complete option types, valid values, and defaults.
 ```typescript
 // Invoke an agent
 const answer = await ctx.ask(agent, 'prompt', { schema, retries });
-// Run N concurrent tasks
+// Run 3 agents in parallel — each gets the same question independently
 const results = await ctx.spawn(3, async (i) => ctx.ask(agent, prompts[i]));
-// Consensus vote
+// Pick the answer that appeared most often (pure aggregation, no LLM involved)
 const winner = ctx.vote(results, { strategy: 'majority', key: 'answer' });
 // Self-correcting validation
@@ -144,7 +170,7 @@ const [a, b] = await ctx.parallel([
   () => ctx.ask(agentB, promptB),
 ]);
-// Map with bounded concurrency
+// Map with bounded concurrency — resolve when 3 of N succeed, cancel the rest
 const mapped = await ctx.map(items, async (item) => ctx.ask(agent, item), {
   concurrency: 5,
   quorum: 3,
@@ -166,21 +192,27 @@ Automatic span emission for every `ctx.*` primitive with cost-per-span attributi
 ```typescript
 import { defineConfig, AxlRuntime } from '@axlsdk/axl';
+import { BasicTracerProvider, SimpleSpanProcessor } from '@opentelemetry/sdk-trace-base';
 import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-http';
+const tracerProvider = new BasicTracerProvider();
+tracerProvider.addSpanProcessor(new SimpleSpanProcessor(
+  new OTLPTraceExporter({ url: 'http://localhost:4318/v1/traces' }),
+));
 const config = defineConfig({
   telemetry: {
     enabled: true,
     serviceName: 'my-app',
-    exporter: new OTLPTraceExporter({ url: 'http://localhost:4318/v1/traces' }),
+    tracerProvider,
   },
 });
 const runtime = new AxlRuntime(config);
-runtime.initializeTelemetry();
+await runtime.initializeTelemetry();
 ```
-**Span model:** `axl.workflow.execute` > `axl.agent.ask` > `axl.tool.call`. Also: `axl.ctx.spawn`, `axl.ctx.race`, `axl.ctx.vote`, `axl.ctx.budget`, `axl.ctx.checkpoint`, `axl.ctx.awaitHuman`. Each span includes relevant attributes (cost, duration, token counts, etc.).
+**Span model:** `axl.workflow.execute` > `axl.agent.ask` > `axl.tool.call`. Also: `axl.ctx.spawn`, `axl.ctx.race`, `axl.ctx.vote`, `axl.ctx.budget`, `axl.ctx.awaitHuman`. Each span includes relevant attributes (cost, duration, token counts, etc.).
 When disabled (default), `NoopSpanManager` provides zero overhead.
@@ -210,7 +242,7 @@ import { AxlRuntime, InMemoryVectorStore, OpenAIEmbedder } from '@axlsdk/axl';
 const runtime = new AxlRuntime({
   memory: {
-    vector: new InMemoryVectorStore(),
+    vectorStore: new InMemoryVectorStore(),
     embedder: new OpenAIEmbedder({ model: 'text-embedding-3-small' }),
   },
 });
@@ -226,9 +258,13 @@ Vector store implementations: `InMemoryVectorStore` (testing), `SqliteVectorStor
 ### Agent Guardrails
-Input and output validation at the agent boundary:
+Input and output validation at the agent boundary. You define your own validation logic — Axl calls it before and after each LLM turn:
 ```typescript
+// Your validation functions — Axl doesn't ship these, you bring your own
+const containsPII = (text: string) => /\b\d{3}-\d{2}-\d{4}\b/.test(text);
+const isOffTopic = (text: string) => !text.toLowerCase().includes('support');
 const safe = agent({
   model: 'openai:gpt-4o',
   system: 'You are a helpful assistant.',
@@ -254,10 +290,11 @@ When `onBlock` is `'retry'`, the LLM sees the block reason and self-corrects (sa
 ```typescript
 const session = runtime.session('user-123', {
   history: {
-    maxMessages: 100,   // Trim oldest messages when exceeded
-    summarize: true,    // Auto-summarize trimmed messages
+    maxMessages: 100,          // Trim oldest messages when exceeded
+    summarize: true,           // Auto-summarize trimmed messages
+    summaryModel: 'openai:gpt-4o-mini',  // Model for summarization
   },
-  persist: true,        // Save to StateStore (default: true)
+  persist: true,               // Save to StateStore (default: true)
 });
 ```
@@ -267,6 +304,7 @@ const session = runtime.session('user-123', {
 |--------|------|---------|-------------|
 | `history.maxMessages` | `number` | unlimited | Max messages to retain |
 | `history.summarize` | `boolean` | `false` | Summarize trimmed messages |
+| `history.summaryModel` | `string` | — | Model URI for summarization (required when `summarize: true`) |
 | `persist` | `boolean` | `true` | Persist history to StateStore |
 ### Error Hierarchy
@@ -306,58 +344,17 @@ const runtime = new AxlRuntime({
 ### Provider URIs
-Four built-in providers are supported:
+Four built-in providers using the `provider:model` URI scheme:
 ```
-# OpenAI — Chat Completions API
-openai:gpt-4o                          # Flagship multimodal
-openai:gpt-4o-mini                     # Fast and affordable
-openai:gpt-4.1                         # GPT-4.1
-openai:gpt-4.1-mini                    # GPT-4.1 small
-openai:gpt-4.1-nano                    # GPT-4.1 cheapest
-openai:gpt-5                           # GPT-5
-openai:gpt-5-mini                      # GPT-5 small
-openai:gpt-5-nano                      # GPT-5 cheapest
-openai:gpt-5.1                         # GPT-5.1
-openai:gpt-5.2                         # GPT-5.2
-openai:o1                              # Reasoning
-openai:o1-mini                         # Reasoning (small)
-openai:o1-pro                          # Reasoning (pro)
-openai:o3                              # Reasoning
-openai:o3-mini                         # Reasoning (small)
-openai:o3-pro                          # Reasoning (pro)
-openai:o4-mini                         # Reasoning (small)
-openai:gpt-4-turbo                     # Legacy
-openai:gpt-4                           # Legacy
-openai:gpt-3.5-turbo                   # Legacy
-# OpenAI — Responses API (same models, better caching, native reasoning)
-openai-responses:gpt-4o
-openai-responses:o3
-# Anthropic
-anthropic:claude-opus-4-6              # Most capable
-anthropic:claude-sonnet-4-5            # Balanced
-anthropic:claude-haiku-4-5             # Fast and affordable
-anthropic:claude-sonnet-4              # Previous gen
-anthropic:claude-opus-4                # Previous gen
-anthropic:claude-3-7-sonnet            # Legacy
-anthropic:claude-3-5-sonnet            # Legacy
-anthropic:claude-3-5-haiku             # Legacy
-anthropic:claude-3-opus                # Legacy
-anthropic:claude-3-sonnet              # Legacy
-anthropic:claude-3-haiku               # Legacy
-# Google Gemini
-google:gemini-2.5-pro                  # Most capable
-google:gemini-2.5-flash                # Fast
-google:gemini-2.5-flash-lite           # Cheapest 2.5
-google:gemini-2.0-flash                # Previous gen
-google:gemini-2.0-flash-lite           # Previous gen (lite)
-google:gemini-3-pro-preview            # Next gen (preview)
-google:gemini-3-flash-preview          # Next gen fast (preview)
+openai:gpt-4o                          # OpenAI Chat Completions
+openai-responses:gpt-4o                # OpenAI Responses API
+anthropic:claude-sonnet-4-5            # Anthropic
+google:gemini-2.5-pro                  # Google Gemini
 ```
+See [docs/providers.md](../../docs/providers.md) for the full model list including reasoning models.
 ## License
 [Apache 2.0](../../LICENSE)

package/dist/index.cjs CHANGED Viewed

@@ -331,6 +331,24 @@ function estimateOpenAICost(model, promptTokens, completionTokens, cachedTokens)
 function isReasoningModel(model) {
   return /^(o1|o3|o4-mini)/.test(model);
 }
+function thinkingToReasoningEffort(thinking) {
+  if (typeof thinking === "object") {
+    const budget = thinking.budgetTokens;
+    if (budget <= 1024) return "low";
+    if (budget <= 8192) return "medium";
+    return "high";
+  }
+  switch (thinking) {
+    case "low":
+      return "low";
+    case "medium":
+      return "medium";
+    case "high":
+      return "high";
+    case "max":
+      return "xhigh";
+  }
+}
 var OpenAIProvider = class {
   name = "openai";
   baseUrl;
@@ -433,7 +451,9 @@ var OpenAIProvider = class {
     if (options.stop) body.stop = options.stop;
     if (options.tools && options.tools.length > 0) {
       body.tools = options.tools;
-      body.parallel_tool_calls = true;
+      if (!reasoning) {
+        body.parallel_tool_calls = true;
+      }
     }
     if (options.toolChoice !== void 0) {
       body.tool_choice = options.toolChoice;
@@ -441,8 +461,11 @@ var OpenAIProvider = class {
     if (options.responseFormat) {
       body.response_format = options.responseFormat;
     }
-    if (options.reasoningEffort) {
-      body.reasoning_effort = options.reasoningEffort;
+    if (reasoning) {
+      const effort = options.thinking ? thinkingToReasoningEffort(options.thinking) : options.reasoningEffort;
+      if (effort) {
+        body.reasoning_effort = effort;
+      }
     }
     if (stream) {
       body.stream_options = { include_usage: true };
@@ -633,8 +656,11 @@ var OpenAIResponsesProvider = class {
         body.tool_choice = options.toolChoice;
       }
     }
-    if (options.reasoningEffort) {
-      body.reasoning = { effort: options.reasoningEffort };
+    if (reasoning) {
+      const effort = options.thinking ? thinkingToReasoningEffort(options.thinking) : options.reasoningEffort;
+      if (effort) {
+        body.reasoning = { effort };
+      }
     }
     if (options.responseFormat) {
       body.text = { format: this.mapResponseFormat(options.responseFormat) };
@@ -868,6 +894,24 @@ function estimateAnthropicCost(model, inputTokens, outputTokens, cacheReadTokens
   const inputCost = (inputTokens - cacheRead - cacheWrite) * inputRate + cacheRead * inputRate * 0.1 + cacheWrite * inputRate * 1.25;
   return inputCost + outputTokens * outputRate;
 }
+var THINKING_BUDGETS = {
+  low: 1024,
+  medium: 5e3,
+  high: 1e4,
+  // 30000 (not 32000) to stay under the 32K max_tokens limit on Opus 4/4.1.
+  // With auto-bump (+1024), max_tokens becomes 31024 which fits all models.
+  max: 3e4
+};
+function thinkingToBudgetTokens(thinking) {
+  if (typeof thinking === "string") return THINKING_BUDGETS[thinking] ?? 5e3;
+  return thinking.budgetTokens;
+}
+function supportsAdaptiveThinking(model) {
+  return model.startsWith("claude-opus-4-6") || model.startsWith("claude-sonnet-4-6");
+}
+function supportsMaxEffort(model) {
+  return model.startsWith("claude-opus-4-6");
+}
 var AnthropicProvider = class {
   name = "anthropic";
   baseUrl;
@@ -957,7 +1001,7 @@ var AnthropicProvider = class {
     if (systemText) {
       body.system = systemText;
     }
-    if (options.temperature !== void 0) {
+    if (options.temperature !== void 0 && !options.thinking) {
       body.temperature = options.temperature;
     }
     if (options.stop) {
@@ -966,6 +1010,23 @@ var AnthropicProvider = class {
     if (options.tools && options.tools.length > 0) {
       body.tools = options.tools.map((t) => this.mapToolDefinition(t));
     }
+    if (options.toolChoice !== void 0) {
+      body.tool_choice = this.mapToolChoice(options.toolChoice);
+    }
+    if (options.thinking) {
+      if (typeof options.thinking === "string" && supportsAdaptiveThinking(options.model) && // 'max' effort is only supported on Opus 4.6; Sonnet 4.6 falls back to manual mode
+      (options.thinking !== "max" || supportsMaxEffort(options.model))) {
+        body.thinking = { type: "adaptive" };
+        body.output_config = { effort: options.thinking };
+      } else {
+        const budgetTokens = thinkingToBudgetTokens(options.thinking);
+        body.thinking = { type: "enabled", budget_tokens: budgetTokens };
+        const currentMax = body.max_tokens;
+        if (currentMax < budgetTokens + 1024) {
+          body.max_tokens = budgetTokens + 1024;
+        }
+      }
+    }
     if (options.responseFormat && options.responseFormat.type !== "text") {
       const jsonInstruction = "You must respond with valid JSON only. No markdown fences, no extra text.";
       body.system = body.system ? `${body.system}
@@ -1061,6 +1122,22 @@ ${jsonInstruction}` : jsonInstruction;
       input_schema: tool2.function.parameters
     };
   }
+  /**
+   * Map Axl's ToolChoice to Anthropic's tool_choice format.
+   *
+   * Axl (OpenAI format)          → Anthropic format
+   * 'auto'                       → { type: 'auto' }
+   * 'none'                       → { type: 'none' }
+   * 'required'                   → { type: 'any' }
+   * { type:'function', function: { name } } → { type: 'tool', name }
+   */
+  mapToolChoice(choice) {
+    if (typeof choice === "string") {
+      if (choice === "required") return { type: "any" };
+      return { type: choice };
+    }
+    return { type: "tool", name: choice.function.name };
+  }
   // ---------------------------------------------------------------------------
   // Internal: response parsing
   // ---------------------------------------------------------------------------
@@ -1238,6 +1315,16 @@ function estimateGeminiCost(model, inputTokens, outputTokens, cachedTokens) {
   const inputCost = (inputTokens - cached) * inputRate + cached * inputRate * 0.1;
   return inputCost + outputTokens * outputRate;
 }
+var THINKING_BUDGETS2 = {
+  low: 1024,
+  medium: 5e3,
+  high: 1e4,
+  max: 24576
+};
+function thinkingToBudgetTokens2(thinking) {
+  if (typeof thinking === "string") return THINKING_BUDGETS2[thinking] ?? 5e3;
+  return thinking.budgetTokens;
+}
 var GeminiProvider = class {
   name = "google";
   baseUrl;
@@ -1351,6 +1438,17 @@ var GeminiProvider = class {
     if (Object.keys(generationConfig).length > 0) {
       body.generationConfig = generationConfig;
     }
+    if (options.thinking) {
+      generationConfig.thinkingConfig = {
+        thinkingBudget: thinkingToBudgetTokens2(options.thinking)
+      };
+      if (!body.generationConfig) {
+        body.generationConfig = generationConfig;
+      }
+    }
+    if (options.toolChoice !== void 0) {
+      body.toolConfig = { functionCallingConfig: this.mapToolChoice(options.toolChoice) };
+    }
     return body;
   }
   /**
@@ -1442,6 +1540,25 @@ var GeminiProvider = class {
     }
     return merged;
   }
+  /**
+   * Map Axl's ToolChoice to Gemini's functionCallingConfig format.
+   *
+   * - 'auto'     → { mode: 'AUTO' }
+   * - 'none'     → { mode: 'NONE' }
+   * - 'required' → { mode: 'ANY' }
+   * - { type: 'function', function: { name } } → { mode: 'ANY', allowedFunctionNames: [name] }
+   */
+  mapToolChoice(choice) {
+    if (typeof choice === "string") {
+      const modeMap = {
+        auto: "AUTO",
+        none: "NONE",
+        required: "ANY"
+      };
+      return { mode: modeMap[choice] ?? "AUTO" };
+    }
+    return { mode: "ANY", allowedFunctionNames: [choice.function.name] };
+  }
   mapToolDefinition(tool2) {
     return {
       name: tool2.function.name,
@@ -1890,6 +2007,15 @@ function zodToJsonSchema(schema) {
 function estimateTokens(text) {
   return Math.ceil(text.length / 4);
 }
+function stripMarkdownFences(text) {
+  const trimmed = text.trim();
+  if (trimmed.startsWith("```")) {
+    const withoutOpening = trimmed.replace(/^```\w*\s*\n?/, "");
+    const withoutClosing = withoutOpening.replace(/\n?```\s*$/, "");
+    return withoutClosing.trim();
+  }
+  return trimmed;
+}
 function estimateMessagesTokens(messages) {
   let total = 0;
   for (const msg of messages) {
@@ -2015,7 +2141,13 @@ var WorkflowContext = class {
         model: agent2.resolveModel(resolveCtx),
         cost: costAfter - costBefore,
         duration: Date.now() - startTime,
-        promptVersion: agent2._config.version
+        promptVersion: agent2._config.version,
+        temperature: options?.temperature ?? agent2._config.temperature,
+        maxTokens: options?.maxTokens ?? agent2._config.maxTokens ?? 4096,
+        thinking: options?.thinking ?? agent2._config.thinking,
+        reasoningEffort: options?.reasoningEffort ?? agent2._config.reasoningEffort,
+        toolChoice: options?.toolChoice ?? agent2._config.toolChoice,
+        stop: options?.stop ?? agent2._config.stop
       });
       return result;
     });
@@ -2140,11 +2272,21 @@ Please fix and try again.`;
         throw new TimeoutError("ctx.ask()", timeoutMs);
       }
       turns++;
+      const thinking = options?.thinking ?? agent2._config.thinking;
+      if (thinking && typeof thinking === "object" && thinking.budgetTokens <= 0) {
+        throw new Error(
+          `thinking.budgetTokens must be a positive number, got ${thinking.budgetTokens}`
+        );
+      }
       const chatOptions = {
         model,
-        temperature: agent2._config.temperature,
+        temperature: options?.temperature ?? agent2._config.temperature,
         tools: toolDefs.length > 0 ? toolDefs : void 0,
-        maxTokens: 4096,
+        maxTokens: options?.maxTokens ?? agent2._config.maxTokens ?? 4096,
+        thinking,
+        reasoningEffort: options?.reasoningEffort ?? agent2._config.reasoningEffort,
+        toolChoice: options?.toolChoice ?? agent2._config.toolChoice,
+        stop: options?.stop ?? agent2._config.stop,
         signal: this.currentSignal
       };
       if (options?.schema && toolDefs.length === 0) {
@@ -2243,10 +2385,11 @@ Please fix and try again.`;
                 }
               }
               const handoffStart = Date.now();
+              const handoffOptions = options ? { schema: options.schema, retries: options.retries, metadata: options.metadata } : void 0;
               const handoffFn = () => this.executeAgentCall(
                 descriptor.agent,
                 handoffPrompt,
-                options,
+                handoffOptions,
                 0,
                 void 0,
                 void 0,
@@ -2583,7 +2726,7 @@ Please fix and try again.`;
       }
       if (options?.schema) {
         try {
-          const parsed = JSON.parse(content);
+          const parsed = JSON.parse(stripMarkdownFences(content));
           const validated = options.schema.parse(parsed);
           return validated;
         } catch (err) {