npm - @agentforge-io/core - Versions diffs - 2.2.4 → 2.3.1 - Mend

@agentforge-io/core 2.2.4 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/ai/index.d.ts +3 -0
package/dist/ai/index.js +4 -1
package/dist/index.d.ts +1 -0
package/dist/index.js +4 -0
package/dist/providers/anthropic-provider.d.ts +27 -0
package/dist/providers/anthropic-provider.js +206 -0
package/dist/providers/index.d.ts +3 -0
package/dist/providers/index.js +6 -0
package/dist/providers/types.d.ts +135 -0
package/dist/providers/types.js +17 -0
package/dist/services/agent-runner.service.d.ts +44 -6
package/dist/services/agent-runner.service.js +216 -258
package/dist/services/agent.service.js +56 -0
package/package.json +2 -2

package/dist/ai/index.d.ts CHANGED Viewed

@@ -15,3 +15,6 @@ export { PREPARED_STREAM_STORE, type PreparedStreamStore, type PreparedStreamPay
 export { InMemoryPreparedStreamStore } from '../services/in-memory-prepared-stream.store';
 export { JOB_QUEUE, type JobQueue, type JobStatus, type JobState, type JobContext, type JobProcessor, type EnqueueOptions, type QueueMetrics, } from '../adapters/job-queue/job-queue.types';
 export { InMemoryJobQueue, type InMemoryJobQueueOptions, } from '../adapters/job-queue/in-memory';
+export type { LLMProvider, LLMProviderCapabilities, LLMStreamParams, LLMStreamEvent, LLMMessage, LLMContentBlock, LLMToolSchema, } from '../providers/types';
+export { AnthropicProvider, modelRejectsTemperature, } from '../providers/anthropic-provider';
+export type { AnthropicProviderOptions } from '../providers/anthropic-provider';

package/dist/ai/index.js CHANGED Viewed

@@ -8,7 +8,7 @@
 //
 // Files still co-located physically; this is a logical seam.
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.InMemoryJobQueue = exports.JOB_QUEUE = exports.InMemoryPreparedStreamStore = exports.PREPARED_STREAM_STORE = exports.AgentJobWorker = exports.AgentForbiddenError = exports.AgentService = exports.ConversationNotFoundError = exports.ConversationService = exports.PreparedStreamError = exports.PreparedStreamService = exports.OrchestratorService = exports.AgentRunnerService = exports.ToolRegistryService = exports.DEFAULT_SHORT_INPUT_TOKENS = exports.DEFAULT_LONG_CONTEXT_TOKENS = exports.selectModel = exports.CURRENT_USER = exports.AGENT_QUEUE_NAME = exports.AGENT_FORGE_CONFIG = void 0;
+exports.modelRejectsTemperature = exports.AnthropicProvider = exports.InMemoryJobQueue = exports.JOB_QUEUE = exports.InMemoryPreparedStreamStore = exports.PREPARED_STREAM_STORE = exports.AgentJobWorker = exports.AgentForbiddenError = exports.AgentService = exports.ConversationNotFoundError = exports.ConversationService = exports.PreparedStreamError = exports.PreparedStreamService = exports.OrchestratorService = exports.AgentRunnerService = exports.ToolRegistryService = exports.DEFAULT_SHORT_INPUT_TOKENS = exports.DEFAULT_LONG_CONTEXT_TOKENS = exports.selectModel = exports.CURRENT_USER = exports.AGENT_QUEUE_NAME = exports.AGENT_FORGE_CONFIG = void 0;
 // ─── Constants ─────────────────────────────────────────────────────────────
 var constants_1 = require("../constants");
 Object.defineProperty(exports, "AGENT_FORGE_CONFIG", { enumerable: true, get: function () { return constants_1.AGENT_FORGE_CONFIG; } });
@@ -45,3 +45,6 @@ var job_queue_types_1 = require("../adapters/job-queue/job-queue.types");
 Object.defineProperty(exports, "JOB_QUEUE", { enumerable: true, get: function () { return job_queue_types_1.JOB_QUEUE; } });
 var in_memory_1 = require("../adapters/job-queue/in-memory");
 Object.defineProperty(exports, "InMemoryJobQueue", { enumerable: true, get: function () { return in_memory_1.InMemoryJobQueue; } });
+var anthropic_provider_1 = require("../providers/anthropic-provider");
+Object.defineProperty(exports, "AnthropicProvider", { enumerable: true, get: function () { return anthropic_provider_1.AnthropicProvider; } });
+Object.defineProperty(exports, "modelRejectsTemperature", { enumerable: true, get: function () { return anthropic_provider_1.modelRejectsTemperature; } });

package/dist/index.d.ts CHANGED Viewed

@@ -9,6 +9,7 @@ export { InMemoryRateLimiter } from './adapters/rate-limiter/in-memory';
 export { RedisRateLimiter, type RedisLike } from './adapters/rate-limiter/redis';
 export { JOB_QUEUE, type JobQueue, type JobStatus, type JobState, type JobContext, type JobProcessor, type EnqueueOptions, type QueueMetrics, } from './adapters/job-queue/job-queue.types';
 export { InMemoryJobQueue, type InMemoryJobQueueOptions, } from './adapters/job-queue/in-memory';
+export * from './providers';
 export * from './services';
 export type { AgentResolver, AgentRecord, AgentResolveParams, } from './services/agent.service';
 export { toAgentDefinition } from './services/agent.service';

package/dist/index.js CHANGED Viewed

@@ -52,6 +52,10 @@ var job_queue_types_1 = require("./adapters/job-queue/job-queue.types");
 Object.defineProperty(exports, "JOB_QUEUE", { enumerable: true, get: function () { return job_queue_types_1.JOB_QUEUE; } });
 var in_memory_2 = require("./adapters/job-queue/in-memory");
 Object.defineProperty(exports, "InMemoryJobQueue", { enumerable: true, get: function () { return in_memory_2.InMemoryJobQueue; } });
+// ─── LLM providers (framework-free) ─────────────────────────────────────────
+// Provider abstraction lets hosts swap Anthropic for LangChain-backed
+// providers (OpenAI/Grok/Gemini) without modifying the runner.
+__exportStar(require("./providers"), exports);
 // ─── Services (framework-free) ──────────────────────────────────────────────
 __exportStar(require("./services"), exports);
 // `toAgentDefinition` is the adapter from the host's `AgentRecord` shape

package/dist/providers/anthropic-provider.d.ts ADDED Viewed

@@ -0,0 +1,27 @@
+import type { LLMProvider, LLMProviderCapabilities, LLMStreamEvent, LLMStreamParams } from './types';
+/**
+ * Anthropic's newer model families deprecated the `temperature` parameter
+ * entirely — they auto-tune sampling internally and return 400
+ * `invalid_request_error: \`temperature\` is deprecated for this model` if
+ * the caller still sends one. Older families (3.x, the original 4.0
+ * releases) accept it fine.
+ *
+ * Detection by string match on the model id rather than a hard-coded
+ * allowlist: new model ids land between SDK releases, and we don't want
+ * to break temperature on legacy agents the day a new family ships.
+ */
+export declare function modelRejectsTemperature(model: string | undefined): boolean;
+export interface AnthropicProviderOptions {
+    apiKey: string;
+    /** Optional override for the Anthropic API base URL (proxies, custom
+     *  gateways). Defaults to the SDK's built-in production endpoint. */
+    baseURL?: string;
+}
+export declare class AnthropicProvider implements LLMProvider {
+    readonly id = "anthropic";
+    readonly displayName = "Anthropic";
+    readonly capabilities: LLMProviderCapabilities;
+    private readonly client;
+    constructor(opts: AnthropicProviderOptions);
+    stream(params: LLMStreamParams): AsyncGenerator<LLMStreamEvent>;
+}

package/dist/providers/anthropic-provider.js ADDED Viewed

@@ -0,0 +1,206 @@
+"use strict";
+// ─── Anthropic provider ──────────────────────────────────────────────────────
+//
+// Wraps `@anthropic-ai/sdk` behind the framework-free `LLMProvider` contract.
+// The runner no longer talks to Anthropic directly — it goes through this
+// adapter, which keeps every Anthropic-specific quirk (stream event shape,
+// the 4.5+ temperature deprecation, the `ToolResultBlockParam` content
+// envelope) confined to one file.
+//
+// Behavioural parity with the pre-refactor runner is the goal: same streaming
+// granularity, same model-router triggers, same temperature-rejection
+// heuristic, same `(tool completed with no output)` sentinel. Anything else
+// would be a silent behaviour change for every existing AgentForge install.
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.AnthropicProvider = void 0;
+exports.modelRejectsTemperature = modelRejectsTemperature;
+const sdk_1 = __importDefault(require("@anthropic-ai/sdk"));
+/**
+ * Anthropic's newer model families deprecated the `temperature` parameter
+ * entirely — they auto-tune sampling internally and return 400
+ * `invalid_request_error: \`temperature\` is deprecated for this model` if
+ * the caller still sends one. Older families (3.x, the original 4.0
+ * releases) accept it fine.
+ *
+ * Detection by string match on the model id rather than a hard-coded
+ * allowlist: new model ids land between SDK releases, and we don't want
+ * to break temperature on legacy agents the day a new family ships.
+ */
+function modelRejectsTemperature(model) {
+    if (!model)
+        return false;
+    // Ignore vendor prefixes like "anthropic/claude-..." and bracket
+    // suffixes like "claude-opus-4-7[1m]" (long-context variant).
+    const m = model.toLowerCase().replace(/\[[^\]]*\]/g, '');
+    if (/claude-[a-z]+-4-([5-9])\b/.test(m))
+        return true;
+    if (/claude-[a-z]+-([5-9])-/.test(m))
+        return true;
+    return false;
+}
+class AnthropicProvider {
+    constructor(opts) {
+        this.id = 'anthropic';
+        this.displayName = 'Anthropic';
+        this.capabilities = {
+            supportsTools: true,
+            supportsStreaming: true,
+            // Per-call gating still lives in `stream()` because the heuristic is
+            // model-specific (Claude 4.0 vs 4.5+) — at the provider level we
+            // declare "we know how to handle temperature when supplied" and let
+            // the stream method decide on a per-turn basis.
+            supportsTemperature: true,
+            supportsParallelTools: true,
+        };
+        this.client = new sdk_1.default({
+            apiKey: opts.apiKey,
+            baseURL: opts.baseURL,
+        });
+    }
+    async *stream(params) {
+        const includeTemperature = typeof params.temperature === 'number' &&
+            !modelRejectsTemperature(params.model);
+        const stream = this.client.messages.stream({
+            model: params.model,
+            max_tokens: params.maxTokens,
+            ...(includeTemperature ? { temperature: params.temperature } : {}),
+            system: params.systemPrompt,
+            messages: toAnthropicMessages(params.messages),
+            tools: params.tools,
+        });
+        // Mid-stream events — text deltas land here; tool_use blocks are
+        // recognised at `content_block_start` so the runner can yield a
+        // `tool_use_start` chunk to its SSE consumer immediately. The full
+        // parsed input only lands once `finalMessage()` resolves; the runner
+        // doesn't need it mid-stream so we just forward the name+id.
+        let usageInput = 0;
+        let usageOutput = 0;
+        let usageCacheCreate = 0;
+        let usageCacheRead = 0;
+        for await (const event of stream) {
+            if (event.type === 'content_block_start') {
+                if (event.content_block.type === 'tool_use') {
+                    yield {
+                        type: 'tool_use_start',
+                        toolName: event.content_block.name,
+                        toolUseId: event.content_block.id,
+                        // Input arrives as JSON deltas; we don't have it yet at
+                        // `content_block_start`. Runner reads the parsed input
+                        // from `message_stop.content` below.
+                        input: {},
+                    };
+                }
+            }
+            else if (event.type === 'content_block_delta') {
+                if (event.delta.type === 'text_delta') {
+                    yield { type: 'text_delta', delta: event.delta.text };
+                }
+            }
+            else if (event.type === 'message_delta') {
+                const deltaUsage = event.usage;
+                if (deltaUsage) {
+                    usageInput += deltaUsage.input_tokens ?? 0;
+                    usageOutput += deltaUsage.output_tokens ?? 0;
+                    usageCacheCreate += deltaUsage.cache_creation_input_tokens ?? 0;
+                    usageCacheRead += deltaUsage.cache_read_input_tokens ?? 0;
+                }
+            }
+        }
+        const finalMessage = await stream.finalMessage();
+        // Emit a single usage_delta with the total — keeps the contract simple
+        // (no caller needs to know whether the wire format buffered usage).
+        yield {
+            type: 'usage_delta',
+            usage: {
+                inputTokens: usageInput || finalMessage.usage?.input_tokens || 0,
+                outputTokens: usageOutput || finalMessage.usage?.output_tokens || 0,
+                totalTokens: (usageInput || finalMessage.usage?.input_tokens || 0) +
+                    (usageOutput || finalMessage.usage?.output_tokens || 0),
+                cacheCreationInputTokens: usageCacheCreate || undefined,
+                cacheReadInputTokens: usageCacheRead || undefined,
+            },
+        };
+        yield {
+            type: 'message_stop',
+            stopReason: normalizeStopReason(finalMessage.stop_reason),
+            content: fromAnthropicContent(finalMessage.content),
+        };
+    }
+}
+exports.AnthropicProvider = AnthropicProvider;
+// ─── Translation helpers ────────────────────────────────────────────────────
+/**
+ * Translate the SDK's provider-agnostic `LLMMessage[]` into Anthropic's
+ * native `MessageParam[]`. The shapes are deliberately close — tool_use /
+ * tool_result blocks already mirror Anthropic's content blocks 1:1 — so
+ * the mapping is mechanical.
+ */
+function toAnthropicMessages(messages) {
+    return messages.map((m) => {
+        if (typeof m.content === 'string') {
+            return { role: m.role, content: m.content };
+        }
+        return {
+            role: m.role,
+            content: m.content.map((block) => {
+                if (block.type === 'text') {
+                    return { type: 'text', text: block.text };
+                }
+                if (block.type === 'tool_use') {
+                    return {
+                        type: 'tool_use',
+                        id: block.id,
+                        name: block.name,
+                        input: block.input,
+                    };
+                }
+                return {
+                    type: 'tool_result',
+                    tool_use_id: block.tool_use_id,
+                    content: block.content,
+                    is_error: block.is_error,
+                };
+            }),
+        };
+    });
+}
+/**
+ * Inverse of `toAnthropicMessages` for the final-message envelope. The
+ * runner uses this to rebuild `currentMessages` for the next iteration of
+ * the tool loop without retaining Anthropic types in its own state.
+ */
+function fromAnthropicContent(content) {
+    const blocks = [];
+    for (const b of content) {
+        if (b.type === 'text') {
+            blocks.push({ type: 'text', text: b.text });
+        }
+        else if (b.type === 'tool_use') {
+            blocks.push({
+                type: 'tool_use',
+                id: b.id,
+                name: b.name,
+                input: b.input,
+            });
+        }
+        // server_tool_use / web_search_tool_result are Anthropic-only
+        // built-ins we don't surface today — silently drop them. If we
+        // ever expose them, add explicit cases here.
+    }
+    return blocks;
+}
+function normalizeStopReason(raw) {
+    switch (raw) {
+        case 'tool_use':
+            return 'tool_use';
+        case 'max_tokens':
+            return 'max_tokens';
+        case 'stop_sequence':
+            return 'stop_sequence';
+        default:
+            return 'end_turn';
+    }
+}

package/dist/providers/index.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+export type { LLMProvider, LLMProviderCapabilities, LLMStreamParams, LLMStreamEvent, LLMMessage, LLMContentBlock, LLMToolSchema, } from './types';
+export { AnthropicProvider, modelRejectsTemperature } from './anthropic-provider';
+export type { AnthropicProviderOptions } from './anthropic-provider';

package/dist/providers/index.js ADDED Viewed

@@ -0,0 +1,6 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.modelRejectsTemperature = exports.AnthropicProvider = void 0;
+var anthropic_provider_1 = require("./anthropic-provider");
+Object.defineProperty(exports, "AnthropicProvider", { enumerable: true, get: function () { return anthropic_provider_1.AnthropicProvider; } });
+Object.defineProperty(exports, "modelRejectsTemperature", { enumerable: true, get: function () { return anthropic_provider_1.modelRejectsTemperature; } });

package/dist/providers/types.d.ts ADDED Viewed

@@ -0,0 +1,135 @@
+import type { TokenUsage } from '../types/agent.types';
+export interface LLMProviderCapabilities {
+    /** Tool calling support. Old Anthropic 2.x and most fine-tunes don't
+     *  expose tools; the runner falls back to text-only completions when
+     *  this is false (no tools attached, no tool_use_start chunks). */
+    supportsTools: boolean;
+    /** Streaming (token-by-token) support. Required for the chat surface
+     *  in AgentForge today; a `false` provider can still be used for
+     *  one-shot helpers (approval copywriter) but not for chat. */
+    supportsStreaming: boolean;
+    /** Some Anthropic 4.5+ families reject the `temperature` parameter
+     *  entirely (auto-tune sampling internally). Set false on those so
+     *  the runner skips the param instead of emitting a 400. */
+    supportsTemperature: boolean;
+    /** Whether the provider lets the model emit multiple tool_use blocks
+     *  in a single assistant turn. Anthropic does. OpenAI does. Some
+     *  smaller models only do one. The runner doesn't *require* this —
+     *  it loops regardless — but downstream UI can adapt. */
+    supportsParallelTools: boolean;
+}
+/**
+ * Provider-agnostic chat message. The runner translates the Anthropic-shaped
+ * `AnthropicMessage[]` it currently carries into this shape on the way INTO
+ * a provider call; the provider translates back into its own native shape
+ * just before hitting the wire. This keeps the runner's loop one-size-fits-
+ * all without leaking Anthropic types out of the AnthropicProvider.
+ */
+export type LLMMessage = {
+    role: 'user';
+    content: string | LLMContentBlock[];
+} | {
+    role: 'assistant';
+    content: string | LLMContentBlock[];
+};
+/**
+ * Multi-part message content. Mirrors Anthropic's content blocks without
+ * inheriting from `@anthropic-ai/sdk`. Providers that only speak plain text
+ * (some smaller LangChain models) flatten the array down to a string on
+ * their side.
+ */
+export type LLMContentBlock = {
+    type: 'text';
+    text: string;
+} | {
+    type: 'tool_use';
+    id: string;
+    name: string;
+    input: Record<string, unknown>;
+} | {
+    type: 'tool_result';
+    tool_use_id: string;
+    content: string;
+    is_error?: boolean;
+};
+/**
+ * Tool description handed to the provider so the model knows what to call.
+ * Same shape as Anthropic's `Tool` type; OpenAI's `function` shape is
+ * derived inside the LangChain provider via `convertToOpenAITool`.
+ */
+export interface LLMToolSchema {
+    name: string;
+    description: string;
+    input_schema: Record<string, unknown>;
+}
+export interface LLMStreamParams {
+    /** Concrete model id ('claude-opus-4-7', 'gpt-4o', 'grok-2-latest'). */
+    model: string;
+    systemPrompt: string;
+    messages: LLMMessage[];
+    tools?: LLMToolSchema[];
+    maxTokens: number;
+    /** Caller-supplied. The provider should drop it silently if
+     *  `capabilities.supportsTemperature` is false. */
+    temperature?: number;
+}
+/**
+ * Normalized event the provider yields during `stream()`. The runner consumes
+ * these and translates them into `StreamChunk` (the platform-facing shape).
+ *
+ * `text_delta`         → token-by-token assistant text.
+ * `tool_use_start`     → model decided to call a tool; carries the input
+ *                        the provider parsed out of its native event stream.
+ *                        The runner dispatches the tool and feeds the result
+ *                        back via a follow-up message in the next iteration.
+ * `usage_delta`        → cumulative token-usage update. Some providers only
+ *                        emit usage at the end (one event); others emit
+ *                        running totals — the runner sums whatever arrives.
+ * `message_stop`       → end of the assistant turn. Carries the stop reason
+ *                        so the runner knows whether to loop again for tool
+ *                        results or finalize.
+ *
+ * NOTE: we deliberately don't surface `content_block_start`/`stop` etc. —
+ * those are Anthropic-specific transport details. Providers absorb them.
+ */
+export type LLMStreamEvent = {
+    type: 'text_delta';
+    delta: string;
+} | {
+    type: 'tool_use_start';
+    toolUseId: string;
+    toolName: string;
+    input: Record<string, unknown>;
+} | {
+    type: 'usage_delta';
+    usage: Partial<TokenUsage>;
+} | {
+    type: 'message_stop';
+    stopReason: 'end_turn' | 'tool_use' | 'max_tokens' | 'stop_sequence';
+    /** Final assistant content as a single array — used by the runner to
+     *  rebuild the assistant message for the next loop iteration without
+     *  re-asking the provider. Mirrors Anthropic's `finalMessage.content`. */
+    content: LLMContentBlock[];
+};
+export interface LLMProvider {
+    /** Stable id used by the platform's resolver to pick a provider via
+     *  `af_settings['llm.active_provider']`. Lowercase, hyphenated. */
+    readonly id: string;
+    /** Human-readable label for admin UIs and telemetry. */
+    readonly displayName: string;
+    readonly capabilities: LLMProviderCapabilities;
+    /**
+     * Streaming chat completion. Yields normalized events for one assistant
+     * turn. The runner calls `stream()` once per loop iteration — when the
+     * stop reason is `tool_use`, it appends tool results to `params.messages`
+     * and calls `stream()` again on the next iteration.
+     *
+     * Implementations MUST:
+     *   - emit `text_delta` events for assistant text as it arrives
+     *   - emit `tool_use_start` once the parsed tool input is complete
+     *   - emit `usage_delta` at least once (final total) when usage is known
+     *   - emit `message_stop` as the LAST event and only once
+     *   - throw on transport errors (caller handles fallback)
+     */
+    stream(params: LLMStreamParams): AsyncGenerator<LLMStreamEvent>;
+}

package/dist/providers/types.js ADDED Viewed

@@ -0,0 +1,17 @@
+"use strict";
+// ─── LLM Provider abstraction ────────────────────────────────────────────────
+//
+// Goal: every concrete LLM family (Anthropic, OpenAI via LangChain, Gemini,
+// Grok, …) ships behind the same `LLMProvider` interface so the rest of the
+// SDK (runner loop, orchestrator, approval copywriter) is provider-agnostic.
+//
+// The interface is intentionally narrow — it only owns one thing: turn a
+// prompt + tool catalog into a stream of normalized events. The agentic
+// loop (tool dispatch, approval gating, model routing) stays where it is
+// in `AgentRunnerService` because it is identical across providers.
+//
+// Streaming-only on purpose: every modern provider supports streaming and
+// the runner converts streamed chunks into the `StreamChunk` shape the
+// platform's SSE controller already speaks. A non-streaming `run()` would
+// be a second code path with the same loop — pointless.
+Object.defineProperty(exports, "__esModule", { value: true });

package/dist/services/agent-runner.service.d.ts CHANGED Viewed

@@ -3,15 +3,19 @@ import type { AgentDefinition, AnthropicConfig } from '../types/config.types';
 import type { ToolRegistryService } from './tool-registry.service';
 import type { Logger } from './tool-registry.service';
 import { type ToolApprovalGate } from './tool-approval-gate';
+import { type LLMProvider } from '../providers';
 /**
- * Framework-free runner for Claude. Handles the agentic loop (tool calls) for
- * sync runs and exposes streaming as an `AsyncGenerator<StreamChunk>` so any
- * transport (SSE, fetch+ReadableStream, WebSocket, etc.) can consume it.
+ * Framework-free runner. Handles the agentic loop (tool calls, model
+ * routing, approval gating) and delegates the LLM call itself to a
+ * pluggable `LLMProvider` — Anthropic by default, or LangChain-backed
+ * (OpenAI/Grok/Gemini/…) when the host wires a different provider in.
+ *
+ * Streaming is exposed as `AsyncGenerator<StreamChunk>` so any transport
+ * (SSE, fetch+ReadableStream, WebSocket, etc.) can consume it.
  */
 export declare class AgentRunnerService {
-    private readonly anthropicConfig;
     private readonly toolRegistry;
-    private readonly client;
+    private readonly provider;
     private readonly logger;
     /**
      * Optional pre-dispatch gate. When supplied, every tool call passes
@@ -27,7 +31,30 @@ export declare class AgentRunnerService {
      * story behaviorally identical to the pre-gate codebase.
      */
     private readonly approvalGate;
-    constructor(anthropicConfig: AnthropicConfig, toolRegistry: ToolRegistryService, opts?: {
+    /**
+     * Default model id surfaced to per-turn `selectModel()` when neither
+     * the agent nor the overrides pin one. Kept on the runner (not the
+     * provider) because the routing strategy is provider-agnostic — the
+     * provider only validates that the resolved model id is one it can
+     * serve.
+     */
+    private readonly defaultModel;
+    /** Default `max_tokens` ceiling when the agent / overrides leave it
+     *  unset. Same rationale as `defaultModel` — provider-agnostic knob. */
+    private readonly defaultMaxTokens;
+    /**
+     * Two-form constructor for backwards compatibility:
+     *
+     *   new AgentRunnerService(anthropicConfig, toolRegistry, opts?)
+     *      ^ legacy form — wraps `anthropicConfig` in an `AnthropicProvider`
+     *      so existing callers keep working without changes.
+     *
+     *   new AgentRunnerService({ provider, defaultModel?, defaultMaxTokens? },
+     *                          toolRegistry, opts?)
+     *      ^ new form — caller supplies any `LLMProvider` (Anthropic,
+     *      LangChain, …). The provider owns the wire-level call.
+     */
+    constructor(providerOrLegacyConfig: AnthropicConfig | RunnerProviderConfig, toolRegistry: ToolRegistryService, opts?: {
         logger?: Logger;
         approvalGate?: ToolApprovalGate;
     });
@@ -78,3 +105,14 @@ export declare class AgentRunnerService {
     private buildToolList;
     private dispatchTool;
 }
+/**
+ * New-style runner config: a fully-wired `LLMProvider` plus the runner-
+ * level defaults (`defaultModel`, `defaultMaxTokens`). The Anthropic-shaped
+ * legacy config (`AnthropicConfig`) remains accepted by the runner for
+ * back-compat — see the constructor's two-form signature.
+ */
+export interface RunnerProviderConfig {
+    provider: LLMProvider;
+    defaultModel?: string;
+    defaultMaxTokens?: number;
+}

package/dist/services/agent-runner.service.js CHANGED Viewed

@@ -1,213 +1,113 @@
 "use strict";
-var __importDefault = (this && this.__importDefault) || function (mod) {
-    return (mod && mod.__esModule) ? mod : { "default": mod };
-};
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.AgentRunnerService = void 0;
-const sdk_1 = __importDefault(require("@anthropic-ai/sdk"));
 const crypto_1 = require("crypto");
 const tool_approval_gate_1 = require("./tool-approval-gate");
 const model_strategy_1 = require("../types/model-strategy");
+const providers_1 = require("../providers");
 const noopLogger = {
     log: () => { }, warn: () => { }, debug: () => { }, error: () => { },
 };
 /**
- * Anthropic's newer model families deprecated the `temperature` parameter
- * entirely — they auto-tune sampling internally and return 400
- * `invalid_request_error: \`temperature\` is deprecated for this model` if
- * the caller still sends one. Older families (3.x, the original 4.0
- * releases) accept it fine.
- *
- * Detection by string match on the model id rather than a hard-coded
- * allowlist: new model ids land between SDK releases, and we don't want
- * to break temperature on legacy agents the day a new family ships.
- * Pattern: anything that contains `-4-5`, `-4-6`, `-4-7`, …, `-5-*`,
- * `-6-*`, etc. counts as "newer." Old 4-0 / 4-1 / 3-x ids are unaffected.
+ * Framework-free runner. Handles the agentic loop (tool calls, model
+ * routing, approval gating) and delegates the LLM call itself to a
+ * pluggable `LLMProvider` — Anthropic by default, or LangChain-backed
+ * (OpenAI/Grok/Gemini/…) when the host wires a different provider in.
  *
- * Heuristic, not exhaustive — if a future family lands with a different
- * naming convention we'll have to extend this. The cost of being wrong
- * is a single 400 the operator can fix by clearing the temperature in
- * the editor; the cost of NOT filtering is the same 400 today.
- */
-function modelRejectsTemperature(model) {
-    if (!model)
-        return false;
-    // Normalize: ignore vendor prefixes like "anthropic/claude-..." and
-    // bracket suffixes like "claude-opus-4-7[1m]" (long-context variant).
-    const m = model.toLowerCase().replace(/\[[^\]]*\]/g, '');
-    // claude-*-4-5, 4-6, 4-7, 4-8 …
-    if (/claude-[a-z]+-4-([5-9])\b/.test(m))
-        return true;
-    // claude-*-5-x, claude-*-6-x, … (future major bumps)
-    if (/claude-[a-z]+-([5-9])-/.test(m))
-        return true;
-    return false;
-}
-/**
- * Framework-free runner for Claude. Handles the agentic loop (tool calls) for
- * sync runs and exposes streaming as an `AsyncGenerator<StreamChunk>` so any
- * transport (SSE, fetch+ReadableStream, WebSocket, etc.) can consume it.
+ * Streaming is exposed as `AsyncGenerator<StreamChunk>` so any transport
+ * (SSE, fetch+ReadableStream, WebSocket, etc.) can consume it.
  */
 class AgentRunnerService {
-    constructor(anthropicConfig, toolRegistry, opts = {}) {
-        this.anthropicConfig = anthropicConfig;
+    /**
+     * Two-form constructor for backwards compatibility:
+     *
+     *   new AgentRunnerService(anthropicConfig, toolRegistry, opts?)
+     *      ^ legacy form — wraps `anthropicConfig` in an `AnthropicProvider`
+     *      so existing callers keep working without changes.
+     *
+     *   new AgentRunnerService({ provider, defaultModel?, defaultMaxTokens? },
+     *                          toolRegistry, opts?)
+     *      ^ new form — caller supplies any `LLMProvider` (Anthropic,
+     *      LangChain, …). The provider owns the wire-level call.
+     */
+    constructor(providerOrLegacyConfig, toolRegistry, opts = {}) {
         this.toolRegistry = toolRegistry;
-        this.client = new sdk_1.default({
-            apiKey: anthropicConfig.apiKey,
-            baseURL: anthropicConfig.baseURL,
-        });
+        if (isRunnerProviderConfig(providerOrLegacyConfig)) {
+            this.provider = providerOrLegacyConfig.provider;
+            this.defaultModel = providerOrLegacyConfig.defaultModel ?? 'claude-opus-4-6';
+            this.defaultMaxTokens = providerOrLegacyConfig.defaultMaxTokens;
+        }
+        else {
+            // Legacy path: build an AnthropicProvider from the inline config so
+            // every caller written before the LLMProvider abstraction landed
+            // keeps the exact same behaviour.
+            this.provider = new providers_1.AnthropicProvider({
+                apiKey: providerOrLegacyConfig.apiKey,
+                baseURL: providerOrLegacyConfig.baseURL,
+            });
+            this.defaultModel = providerOrLegacyConfig.defaultModel ?? 'claude-opus-4-6';
+            this.defaultMaxTokens = providerOrLegacyConfig.defaultMaxTokens;
+        }
         this.logger = opts.logger ?? noopLogger;
         this.approvalGate = opts.approvalGate;
     }
     // ─── Run (non-streaming) ──────────────────────────────────────────────────
     async run(agent, messages, context, overrides) {
-        const messageId = (0, crypto_1.randomUUID)();
-        const runnerDefault = this.anthropicConfig.defaultModel ?? 'claude-opus-4-6';
-        const baseModel = overrides?.model ?? agent.model ?? runnerDefault;
-        const maxTokens = overrides?.maxTokens ?? agent.maxTokens ?? this.anthropicConfig.defaultMaxTokens ?? 4096;
-        // Anthropic's newer models (Sonnet 4.6+, Haiku 4.5+) reject
-        // `temperature` when tools are present — they auto-tune sampling for
-        // tool use. Only forward it when the operator/caller declared one
-        // explicitly; never inject a default. Old models that required it
-        // accept its absence too (they fall back to their own internal
-        // default of 1.0).
-        const temperature = overrides?.temperature ?? agent.temperature;
-        const { tools, extras } = this.buildToolList(agent, overrides);
-        const systemPrompt = this.buildSystemPrompt(agent, tools, overrides);
+        // `run()` is implemented on top of `stream()` to avoid two parallel
+        // loops drifting (every bug fixed in one path historically had to
+        // be ported by hand to the other). Streaming a non-streaming caller
+        // costs almost nothing — the events accumulate in-memory — and the
+        // single-source-of-truth loop is well worth the minor overhead.
         const toolCalls = [];
-        let currentMessages = [...messages];
-        // Pre-compute the signals the model router reads. `hasTools` and
-        // `hasApprovalTool` are constant across the agentic loop (we
-        // don't add tools mid-conversation); `estimatedInputTokens`
-        // starts from a word-count heuristic and gets replaced by the
-        // real `usage.input_tokens` once we have a response.
-        const turnSignals = {
-            hasTools: !!tools && tools.length > 0,
-            hasApprovalTool: hasApprovalGatedTool(agent),
-            estimatedInputTokens: estimateInputTokens(systemPrompt, currentMessages),
-        };
-        let totalUsage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
-        let finalContent = '';
+        const toolCallStartByUseId = new Map();
+        let usage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
+        const textParts = [];
         let stopReason = 'end_turn';
-        // Last model id the router chose. Surfaced on the response so
-        // callers (and the conversation/usage logs) record what
-        // actually ran, not what the agent's `model` field says.
-        let lastModel = baseModel;
-        while (true) {
-            // Per-turn model selection. When overrides force a model we
-            // honour it (manual `agent.runMessage({ overrides: { model }})`
-            // beats the strategy). Otherwise the strategy decides; absent
-            // strategy → behave exactly like before this feature landed.
-            const selection = overrides?.model
-                ? { model: overrides.model, reason: 'forced' }
-                : (0, model_strategy_1.selectModel)(agent.modelStrategy, turnSignals, baseModel);
-            const model = selection.model;
-            lastModel = model;
-            if (this.logger && selection.reason !== 'default' && selection.reason !== 'forced') {
-                this.logger.debug(`[modelRouter] agent=${agent.id} ${selection.reason}=${selection.trigger} → ${model}`);
+        let messageId = (0, crypto_1.randomUUID)();
+        const model = overrides?.model ?? agent.model ?? this.defaultModel;
+        for await (const chunk of this.stream(agent, messages, context, overrides)) {
+            if (chunk.type === 'text_delta') {
+                textParts.push(chunk.delta);
             }
-            // Per-turn temperature gating. The PER-MODEL filter runs INSIDE
-            // the loop because `model` can change between turns (model
-            // strategy can route a long-context turn to a different family
-            // than the short turns above it). Computing once outside would
-            // either over-strip (drop temperature for a legacy follow-up
-            // model) or under-strip (forward it to a new-family upgrade).
-            const includeTemperature = typeof temperature === 'number' && !modelRejectsTemperature(model);
-            const response = await this.client.messages.create({
-                model,
-                max_tokens: maxTokens,
-                ...(includeTemperature ? { temperature } : {}),
-                system: systemPrompt,
-                messages: currentMessages,
-                tools: tools,
-            });
-            // Update the signal for the NEXT iteration of the loop — the
-            // tool-result feedback we're about to add can balloon the
-            // context past the long-context threshold.
-            turnSignals.estimatedInputTokens = response.usage.input_tokens;
-            totalUsage = {
-                inputTokens: totalUsage.inputTokens + response.usage.input_tokens,
-                outputTokens: totalUsage.outputTokens + response.usage.output_tokens,
-                totalTokens: totalUsage.totalTokens + response.usage.input_tokens + response.usage.output_tokens,
-                cacheCreationInputTokens: (totalUsage.cacheCreationInputTokens ?? 0) +
-                    (response.usage.cache_creation_input_tokens ?? 0),
-                cacheReadInputTokens: (totalUsage.cacheReadInputTokens ?? 0) +
-                    (response.usage.cache_read_input_tokens ?? 0),
-            };
-            stopReason = response.stop_reason ?? 'end_turn';
-            if (response.stop_reason === 'tool_use') {
-                currentMessages = [...currentMessages, { role: 'assistant', content: response.content }];
-                const toolResults = [];
-                for (const block of response.content) {
-                    if (block.type === 'tool_use') {
-                        const start = Date.now();
-                        let output = '';
-                        let error;
-                        try {
-                            output = await this.dispatchTool(block.name, block.input, context, extras);
-                        }
-                        catch (err) {
-                            // Approval-gate signals are NOT tool execution errors —
-                            // they ARE the surface the caller of run() branches on.
-                            // Re-throw so the loop aborts and the consumer (executor /
-                            // conversation service) can persist the pause + surface
-                            // the approvalId. Without this re-throw, the runner would
-                            // feed back a `"Error executing tool …"` to the LLM,
-                            // hiding the pause behind a regular tool failure.
-                            if (err instanceof tool_approval_gate_1.ToolApprovalRequired ||
-                                err instanceof tool_approval_gate_1.ToolBlockedError) {
-                                throw err;
-                            }
-                            error = err instanceof Error ? err.message : String(err);
-                            output = `Error executing tool ${block.name}: ${error}`;
-                        }
-                        toolCalls.push({
-                            toolName: block.name,
-                            toolUseId: block.id,
-                            input: block.input,
-                            output,
-                            error,
-                            durationMs: Date.now() - start,
-                        });
-                        toolResults.push({
-                            type: 'tool_result',
-                            tool_use_id: block.id,
-                            // Sentinel keeps Anthropic happy when a tool produced
-                            // no string output (e.g. a mutation that returned void).
-                            content: output || '(tool completed with no output)',
-                            is_error: !!error,
-                        });
-                    }
-                }
-                // Same defensive break as the stream path — if tool_use was
-                // signalled but we resolved zero tool calls, don't append
-                // an empty user message.
-                if (toolResults.length === 0) {
-                    this.logger.warn(`Agent "${agent.id}" reported tool_use but emitted no resolvable tool calls. Closing the turn.`);
-                    finalContent = response.content
-                        .filter((b) => b.type === 'text')
-                        .map((b) => b.text)
-                        .join('');
-                    break;
+            else if (chunk.type === 'tool_use_start') {
+                toolCallStartByUseId.set(chunk.toolUseId, {
+                    name: chunk.toolName,
+                    start: Date.now(),
+                });
+            }
+            else if (chunk.type === 'tool_result') {
+                // Match up the result with whichever tool_use_start opened it.
+                // We use a FIFO heuristic when the toolUseId isn't tracked
+                // (legacy chunk emitters that don't carry it) — for the
+                // current SDK they always carry it.
+                const pending = Array.from(toolCallStartByUseId.entries()).find(([, v]) => v.name === chunk.toolName);
+                if (pending) {
+                    const [id, { name, start }] = pending;
+                    toolCalls.push({
+                        toolName: name,
+                        toolUseId: id,
+                        input: {},
+                        output: chunk.result,
+                        durationMs: Date.now() - start,
+                    });
+                    toolCallStartByUseId.delete(id);
                 }
-                currentMessages = [...currentMessages, { role: 'user', content: toolResults }];
             }
-            else {
-                finalContent = response.content
-                    .filter((b) => b.type === 'text')
-                    .map((b) => b.text)
-                    .join('');
-                break;
+            else if (chunk.type === 'usage') {
+                usage = chunk.usage;
+            }
+            else if (chunk.type === 'done') {
+                messageId = chunk.messageId;
             }
         }
         return {
             messageId,
             conversationId: context.conversationId,
-            content: finalContent,
+            content: textParts.join(''),
             role: 'assistant',
             toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
-            usage: totalUsage,
-            model: lastModel,
+            usage,
+            model,
             stopReason,
             createdAt: new Date(),
         };
@@ -215,25 +115,29 @@ class AgentRunnerService {
     // ─── Run (streaming) ──────────────────────────────────────────────────────
     async *stream(agent, messages, context, overrides) {
         const messageId = (0, crypto_1.randomUUID)();
-        const runnerDefault = this.anthropicConfig.defaultModel ?? 'claude-opus-4-6';
-        const baseModel = overrides?.model ?? agent.model ?? runnerDefault;
-        const maxTokens = overrides?.maxTokens ?? agent.maxTokens ?? this.anthropicConfig.defaultMaxTokens ?? 4096;
-        // Anthropic's newer models (Sonnet 4.6+, Haiku 4.5+) reject
-        // `temperature` when tools are present — they auto-tune sampling for
-        // tool use. Only forward it when the operator/caller declared one
-        // explicitly; never inject a default. Old models that required it
-        // accept its absence too (they fall back to their own internal
-        // default of 1.0).
+        const baseModel = overrides?.model ?? agent.model ?? this.defaultModel;
+        const maxTokens = overrides?.maxTokens ??
+            agent.maxTokens ??
+            this.defaultMaxTokens ??
+            4096;
         const temperature = overrides?.temperature ?? agent.temperature;
         const { tools, extras } = this.buildToolList(agent, overrides);
         const systemPrompt = this.buildSystemPrompt(agent, tools, overrides);
-        let currentMessages = [...messages];
+        // Re-shape Anthropic-typed tool schemas to the provider-agnostic
+        // ones. The two shapes are identical today — Anthropic's `Tool`
+        // declares `name`, `description`, `input_schema` — so this is a
+        // type-level cast for callers feeding the runner from old code.
+        const llmTools = tools?.map((t) => ({
+            name: t.name,
+            description: t.description ?? '',
+            input_schema: t.input_schema,
+        }));
+        let currentMessages = anthropicMessagesToLLM(messages);
         let totalUsage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
-        // See `run()` above for the rationale on these signals.
         const turnSignals = {
-            hasTools: !!tools && tools.length > 0,
+            hasTools: !!llmTools && llmTools.length > 0,
             hasApprovalTool: hasApprovalGatedTool(agent),
-            estimatedInputTokens: estimateInputTokens(systemPrompt, currentMessages),
+            estimatedInputTokens: estimateInputTokens(systemPrompt, messages),
         };
         while (true) {
             const selection = overrides?.model
@@ -243,93 +147,94 @@ class AgentRunnerService {
             if (this.logger && selection.reason !== 'default' && selection.reason !== 'forced') {
                 this.logger.debug(`[modelRouter] agent=${agent.id} ${selection.reason}=${selection.trigger} → ${model}`);
             }
-            // Per-turn temperature gating — see `run()` above for rationale.
-            const includeTemperature = typeof temperature === 'number' && !modelRejectsTemperature(model);
-            const stream = this.client.messages.stream({
+            // Provider call. The provider decides per-call whether to honour
+            // `temperature` based on its own capabilities + the model id
+            // (Anthropic 4.5+ rejects it, OpenAI accepts it, …).
+            let stopReason = 'end_turn';
+            let finalContent = [];
+            for await (const event of this.provider.stream({
                 model,
-                max_tokens: maxTokens,
-                ...(includeTemperature ? { temperature } : {}),
-                system: systemPrompt,
+                systemPrompt,
                 messages: currentMessages,
-                tools: tools,
-            });
-            for await (const event of stream) {
-                if (event.type === 'content_block_start') {
-                    if (event.content_block.type === 'tool_use') {
-                        yield {
-                            type: 'tool_use_start',
-                            toolName: event.content_block.name,
-                            toolUseId: event.content_block.id,
-                        };
-                    }
+                tools: llmTools,
+                maxTokens,
+                temperature,
+            })) {
+                if (event.type === 'text_delta') {
+                    yield { type: 'text_delta', delta: event.delta };
                 }
-                else if (event.type === 'content_block_delta') {
-                    if (event.delta.type === 'text_delta') {
-                        yield { type: 'text_delta', delta: event.delta.text };
-                    }
+                else if (event.type === 'tool_use_start') {
+                    yield {
+                        type: 'tool_use_start',
+                        toolName: event.toolName,
+                        toolUseId: event.toolUseId,
+                    };
                 }
-                else if (event.type === 'message_delta') {
-                    const deltaUsage = event.usage;
+                else if (event.type === 'usage_delta') {
                     totalUsage = {
-                        inputTokens: totalUsage.inputTokens + (deltaUsage?.input_tokens ?? 0),
-                        outputTokens: totalUsage.outputTokens + (deltaUsage?.output_tokens ?? 0),
+                        inputTokens: totalUsage.inputTokens + (event.usage.inputTokens ?? 0),
+                        outputTokens: totalUsage.outputTokens + (event.usage.outputTokens ?? 0),
                         totalTokens: totalUsage.totalTokens +
-                            (deltaUsage?.input_tokens ?? 0) +
-                            (deltaUsage?.output_tokens ?? 0),
+                            (event.usage.inputTokens ?? 0) +
+                            (event.usage.outputTokens ?? 0),
+                        cacheCreationInputTokens: (totalUsage.cacheCreationInputTokens ?? 0) +
+                            (event.usage.cacheCreationInputTokens ?? 0),
+                        cacheReadInputTokens: (totalUsage.cacheReadInputTokens ?? 0) +
+                            (event.usage.cacheReadInputTokens ?? 0),
                     };
                 }
+                else if (event.type === 'message_stop') {
+                    stopReason = event.stopReason;
+                    finalContent = event.content;
+                }
             }
-            const finalMessage = await stream.finalMessage();
             // Refresh the input-token signal so the next iteration of the
             // tool loop has the post-tool-result context length, not the
             // initial estimate.
-            if (typeof finalMessage.usage?.input_tokens === 'number') {
-                turnSignals.estimatedInputTokens = finalMessage.usage.input_tokens;
+            if (totalUsage.inputTokens > 0) {
+                turnSignals.estimatedInputTokens = totalUsage.inputTokens;
             }
-            if (finalMessage.stop_reason === 'tool_use') {
-                currentMessages = [...currentMessages, { role: 'assistant', content: finalMessage.content }];
+            if (stopReason === 'tool_use') {
+                // Carry the assistant message (text + tool_use blocks) forward
+                // so the next turn sees its own previous tool calls.
+                currentMessages = [
+                    ...currentMessages,
+                    { role: 'assistant', content: finalContent },
+                ];
                 const toolResults = [];
-                for (const block of finalMessage.content) {
-                    if (block.type === 'tool_use') {
-                        let output = '';
-                        try {
-                            output = await this.dispatchTool(block.name, block.input, context, extras);
-                        }
-                        catch (err) {
-                            // Same rule as the sync path: gate signals propagate, regular
-                            // errors collapse into a `"Error: …"` tool result the LLM
-                            // can react to. The consumer of `stream()` catches the
-                            // ToolApprovalRequired and decides whether to emit a
-                            // structured chunk to the client or just end the stream.
-                            if (err instanceof tool_approval_gate_1.ToolApprovalRequired ||
-                                err instanceof tool_approval_gate_1.ToolBlockedError) {
-                                throw err;
-                            }
-                            output = `Error: ${err instanceof Error ? err.message : String(err)}`;
+                for (const block of finalContent) {
+                    if (block.type !== 'tool_use')
+                        continue;
+                    let output = '';
+                    try {
+                        output = await this.dispatchTool(block.name, block.input, context, extras);
+                    }
+                    catch (err) {
+                        // Approval-gate signals propagate; regular errors collapse
+                        // into a tool result the LLM can react to.
+                        if (err instanceof tool_approval_gate_1.ToolApprovalRequired ||
+                            err instanceof tool_approval_gate_1.ToolBlockedError) {
+                            throw err;
                         }
-                        yield { type: 'tool_result', toolName: block.name, result: output };
-                        toolResults.push({
-                            type: 'tool_result',
-                            tool_use_id: block.id,
-                            // Anthropic rejects empty tool_result content (part of
-                            // the "messages.N: user messages must have non-empty
-                            // content" 400). When the tool returned no string,
-                            // substitute a sentinel so the next planning step still
-                            // sees a coherent transcript.
-                            content: output || '(tool completed with no output)',
-                        });
+                        output = `Error: ${err instanceof Error ? err.message : String(err)}`;
                     }
+                    yield { type: 'tool_result', toolName: block.name, result: output };
+                    toolResults.push({
+                        type: 'tool_result',
+                        tool_use_id: block.id,
+                        // Anthropic rejects empty tool_result content. Sentinel
+                        // keeps every provider happy.
+                        content: output || '(tool completed with no output)',
+                    });
                 }
-                // Defensive: if the model said `tool_use` but emitted zero
-                // tool_use blocks (or all were filtered for unknown names),
-                // appending `{role:'user', content:[]}` triggers the same
-                // Anthropic 400. Break and let whatever text the model
-                // already produced stand as the final answer.
                 if (toolResults.length === 0) {
                     this.logger.warn(`Agent "${agent.id}" reported tool_use but emitted no resolvable tool calls. Closing the turn.`);
                     break;
                 }
-                currentMessages = [...currentMessages, { role: 'user', content: toolResults }];
+                currentMessages = [
+                    ...currentMessages,
+                    { role: 'user', content: toolResults },
+                ];
             }
             else {
                 break;
@@ -491,3 +396,56 @@ function hasApprovalGatedTool(agent) {
     }
     return false;
 }
+// ─── Message-shape translation ───────────────────────────────────────────────
+/**
+ * The runner's public API still accepts `AnthropicMessage[]` (kept for
+ * back-compat with every host wired before the provider abstraction
+ * landed). Internally the loop talks `LLMMessage[]`, so we translate
+ * on entry. The two shapes are deliberately close — text content is a
+ * plain string in both, multi-block content carries the same `text /
+ * tool_use / tool_result` discriminator — so this is mechanical.
+ */
+function anthropicMessagesToLLM(messages) {
+    return messages.map((m) => {
+        if (typeof m.content === 'string') {
+            return { role: m.role, content: m.content };
+        }
+        return {
+            role: m.role,
+            content: m.content.map((block) => {
+                const b = block;
+                if (b.type === 'text') {
+                    return { type: 'text', text: b.text };
+                }
+                if (b.type === 'tool_use') {
+                    return {
+                        type: 'tool_use',
+                        id: b.id,
+                        name: b.name,
+                        input: (b.input ?? {}),
+                    };
+                }
+                if (b.type === 'tool_result') {
+                    return {
+                        type: 'tool_result',
+                        tool_use_id: b.tool_use_id,
+                        content: typeof b.content === 'string'
+                            ? b.content
+                            : JSON.stringify(b.content),
+                        is_error: b.is_error,
+                    };
+                }
+                // Unknown block kinds (image, document, …) — flatten to a
+                // text marker so the LLM still sees something. Multimodal
+                // input plumbing is a follow-up.
+                return { type: 'text', text: `[unsupported:${b.type}]` };
+            }),
+        };
+    });
+}
+function isRunnerProviderConfig(v) {
+    return (typeof v === 'object' &&
+        v !== null &&
+        'provider' in v &&
+        typeof v.provider === 'object');
+}

package/dist/services/agent.service.js CHANGED Viewed

@@ -379,6 +379,11 @@ class AgentService {
         const filter = params.overrides?.extraToolsFilter;
         const fromConnectors = filter && resolvedExtras ? filter(resolvedExtras) : resolvedExtras;
         const extraTools = mergeExtraTools(params.overrides?.extraTools, fromConnectors);
+        // Hoisted accumulators so the post-loop persistence (after the
+        // try) can see the final list. Defined here, populated inside
+        // the for-await loop below.
+        const toolCallStartByUseId = new Map();
+        const accumulatedToolCalls = [];
         try {
             // Team orchestrators route through OrchestratorService.stream()
             // so the synthetic `delegate_to_*` tools the orchestrator was
@@ -403,11 +408,56 @@ class AgentService {
                     messageId: 'streaming',
                     agent: { timezone: agent.timezone },
                 }, { ...(params.overrides ?? {}), extraTools });
+            // Accumulate tool_use / tool_result chunks during streaming so
+            // we can persist them on the assistant message row (line ~700).
+            // Without this, `getHistory` returns the assistant's text but
+            // loses the tool calls — which means clients that render
+            // proposal cards (Recording Assist) or generic tool rows from
+            // history have nothing to rehydrate. FIFO matching mirrors the
+            // runner's heuristic so the shape stays consistent whether the
+            // turn streamed or used `run()`.
             for await (const chunk of stream) {
                 if (chunk.type === 'text_delta')
                     fullContent += chunk.delta;
                 if (chunk.type === 'usage')
                     finalUsage = chunk.usage;
+                if (chunk.type === 'tool_use_start') {
+                    toolCallStartByUseId.set(chunk.toolUseId, {
+                        name: chunk.toolName,
+                        start: Date.now(),
+                    });
+                }
+                if (chunk.type === 'tool_result') {
+                    // Match by toolUseId when present; otherwise FIFO on toolName
+                    // (matches AgentRunner.run's heuristic). Older runners that
+                    // don't emit `toolUseId` on result chunks still produce a
+                    // usable record.
+                    let entry = null;
+                    const useId = chunk.toolUseId;
+                    if (useId && toolCallStartByUseId.has(useId)) {
+                        const v = toolCallStartByUseId.get(useId);
+                        entry = { id: useId, name: v.name, start: v.start };
+                        toolCallStartByUseId.delete(useId);
+                    }
+                    else {
+                        const fifo = Array.from(toolCallStartByUseId.entries()).find(([, v]) => v.name === chunk.toolName);
+                        if (fifo) {
+                            entry = { id: fifo[0], name: fifo[1].name, start: fifo[1].start };
+                            toolCallStartByUseId.delete(fifo[0]);
+                        }
+                    }
+                    if (entry) {
+                        accumulatedToolCalls.push({
+                            toolName: entry.name,
+                            toolUseId: entry.id,
+                            input: {},
+                            output: typeof chunk.result === 'string'
+                                ? chunk.result
+                                : JSON.stringify(chunk.result),
+                            durationMs: Date.now() - entry.start,
+                        });
+                    }
+                }
                 yield chunk;
             }
         }
@@ -507,6 +557,12 @@ class AgentService {
                 role: 'assistant',
                 content: fullContent,
                 usage: finalUsage,
+                // Persist the accumulated tool calls so `getHistory` can
+                // surface them on reload — without this, proposal cards
+                // (Recording Assist) and other tool-result-driven UI
+                // disappear after refresh. Empty arrays drop to undefined
+                // so we don't pollute the column with `[]`.
+                toolCalls: accumulatedToolCalls.length > 0 ? accumulatedToolCalls : undefined,
             });
             const now = new Date();
             await this.dispatchUsage({

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@agentforge-io/core",
-  "version": "2.2.4",
-  "description": "Framework-free AI runtime SDK. Owns: agent loop (Anthropic), conversations, tools, streaming, agent-job queue, SdkHooks. Identity, billing, infra (email/uploads/secrets) live in the host's modules — not here.",
+  "version": "2.3.1",
+  "description": "Framework-free AI runtime SDK. Owns: agent loop (pluggable LLM provider — Anthropic by default, LangChain-backed providers as drop-ins), conversations, tools, streaming, agent-job queue, SdkHooks. Identity, billing, infra (email/uploads/secrets) live in the host's modules — not here.",
   "license": "MIT",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",