@agentforge-io/core 2.2.4 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,3 +15,6 @@ export { PREPARED_STREAM_STORE, type PreparedStreamStore, type PreparedStreamPay
15
15
  export { InMemoryPreparedStreamStore } from '../services/in-memory-prepared-stream.store';
16
16
  export { JOB_QUEUE, type JobQueue, type JobStatus, type JobState, type JobContext, type JobProcessor, type EnqueueOptions, type QueueMetrics, } from '../adapters/job-queue/job-queue.types';
17
17
  export { InMemoryJobQueue, type InMemoryJobQueueOptions, } from '../adapters/job-queue/in-memory';
18
+ export type { LLMProvider, LLMProviderCapabilities, LLMStreamParams, LLMStreamEvent, LLMMessage, LLMContentBlock, LLMToolSchema, } from '../providers/types';
19
+ export { AnthropicProvider, modelRejectsTemperature, } from '../providers/anthropic-provider';
20
+ export type { AnthropicProviderOptions } from '../providers/anthropic-provider';
package/dist/ai/index.js CHANGED
@@ -8,7 +8,7 @@
8
8
  //
9
9
  // Files still co-located physically; this is a logical seam.
10
10
  Object.defineProperty(exports, "__esModule", { value: true });
11
- exports.InMemoryJobQueue = exports.JOB_QUEUE = exports.InMemoryPreparedStreamStore = exports.PREPARED_STREAM_STORE = exports.AgentJobWorker = exports.AgentForbiddenError = exports.AgentService = exports.ConversationNotFoundError = exports.ConversationService = exports.PreparedStreamError = exports.PreparedStreamService = exports.OrchestratorService = exports.AgentRunnerService = exports.ToolRegistryService = exports.DEFAULT_SHORT_INPUT_TOKENS = exports.DEFAULT_LONG_CONTEXT_TOKENS = exports.selectModel = exports.CURRENT_USER = exports.AGENT_QUEUE_NAME = exports.AGENT_FORGE_CONFIG = void 0;
11
+ exports.modelRejectsTemperature = exports.AnthropicProvider = exports.InMemoryJobQueue = exports.JOB_QUEUE = exports.InMemoryPreparedStreamStore = exports.PREPARED_STREAM_STORE = exports.AgentJobWorker = exports.AgentForbiddenError = exports.AgentService = exports.ConversationNotFoundError = exports.ConversationService = exports.PreparedStreamError = exports.PreparedStreamService = exports.OrchestratorService = exports.AgentRunnerService = exports.ToolRegistryService = exports.DEFAULT_SHORT_INPUT_TOKENS = exports.DEFAULT_LONG_CONTEXT_TOKENS = exports.selectModel = exports.CURRENT_USER = exports.AGENT_QUEUE_NAME = exports.AGENT_FORGE_CONFIG = void 0;
12
12
  // ─── Constants ─────────────────────────────────────────────────────────────
13
13
  var constants_1 = require("../constants");
14
14
  Object.defineProperty(exports, "AGENT_FORGE_CONFIG", { enumerable: true, get: function () { return constants_1.AGENT_FORGE_CONFIG; } });
@@ -45,3 +45,6 @@ var job_queue_types_1 = require("../adapters/job-queue/job-queue.types");
45
45
  Object.defineProperty(exports, "JOB_QUEUE", { enumerable: true, get: function () { return job_queue_types_1.JOB_QUEUE; } });
46
46
  var in_memory_1 = require("../adapters/job-queue/in-memory");
47
47
  Object.defineProperty(exports, "InMemoryJobQueue", { enumerable: true, get: function () { return in_memory_1.InMemoryJobQueue; } });
48
+ var anthropic_provider_1 = require("../providers/anthropic-provider");
49
+ Object.defineProperty(exports, "AnthropicProvider", { enumerable: true, get: function () { return anthropic_provider_1.AnthropicProvider; } });
50
+ Object.defineProperty(exports, "modelRejectsTemperature", { enumerable: true, get: function () { return anthropic_provider_1.modelRejectsTemperature; } });
package/dist/index.d.ts CHANGED
@@ -9,6 +9,7 @@ export { InMemoryRateLimiter } from './adapters/rate-limiter/in-memory';
9
9
  export { RedisRateLimiter, type RedisLike } from './adapters/rate-limiter/redis';
10
10
  export { JOB_QUEUE, type JobQueue, type JobStatus, type JobState, type JobContext, type JobProcessor, type EnqueueOptions, type QueueMetrics, } from './adapters/job-queue/job-queue.types';
11
11
  export { InMemoryJobQueue, type InMemoryJobQueueOptions, } from './adapters/job-queue/in-memory';
12
+ export * from './providers';
12
13
  export * from './services';
13
14
  export type { AgentResolver, AgentRecord, AgentResolveParams, } from './services/agent.service';
14
15
  export { toAgentDefinition } from './services/agent.service';
package/dist/index.js CHANGED
@@ -52,6 +52,10 @@ var job_queue_types_1 = require("./adapters/job-queue/job-queue.types");
52
52
  Object.defineProperty(exports, "JOB_QUEUE", { enumerable: true, get: function () { return job_queue_types_1.JOB_QUEUE; } });
53
53
  var in_memory_2 = require("./adapters/job-queue/in-memory");
54
54
  Object.defineProperty(exports, "InMemoryJobQueue", { enumerable: true, get: function () { return in_memory_2.InMemoryJobQueue; } });
55
+ // ─── LLM providers (framework-free) ─────────────────────────────────────────
56
+ // Provider abstraction lets hosts swap Anthropic for LangChain-backed
57
+ // providers (OpenAI/Grok/Gemini) without modifying the runner.
58
+ __exportStar(require("./providers"), exports);
55
59
  // ─── Services (framework-free) ──────────────────────────────────────────────
56
60
  __exportStar(require("./services"), exports);
57
61
  // `toAgentDefinition` is the adapter from the host's `AgentRecord` shape
@@ -0,0 +1,27 @@
1
+ import type { LLMProvider, LLMProviderCapabilities, LLMStreamEvent, LLMStreamParams } from './types';
2
+ /**
3
+ * Anthropic's newer model families deprecated the `temperature` parameter
4
+ * entirely — they auto-tune sampling internally and return 400
5
+ * `invalid_request_error: \`temperature\` is deprecated for this model` if
6
+ * the caller still sends one. Older families (3.x, the original 4.0
7
+ * releases) accept it fine.
8
+ *
9
+ * Detection by string match on the model id rather than a hard-coded
10
+ * allowlist: new model ids land between SDK releases, and we don't want
11
+ * to break temperature on legacy agents the day a new family ships.
12
+ */
13
+ export declare function modelRejectsTemperature(model: string | undefined): boolean;
14
+ export interface AnthropicProviderOptions {
15
+ apiKey: string;
16
+ /** Optional override for the Anthropic API base URL (proxies, custom
17
+ * gateways). Defaults to the SDK's built-in production endpoint. */
18
+ baseURL?: string;
19
+ }
20
+ export declare class AnthropicProvider implements LLMProvider {
21
+ readonly id = "anthropic";
22
+ readonly displayName = "Anthropic";
23
+ readonly capabilities: LLMProviderCapabilities;
24
+ private readonly client;
25
+ constructor(opts: AnthropicProviderOptions);
26
+ stream(params: LLMStreamParams): AsyncGenerator<LLMStreamEvent>;
27
+ }
@@ -0,0 +1,206 @@
1
+ "use strict";
2
+ // ─── Anthropic provider ──────────────────────────────────────────────────────
3
+ //
4
+ // Wraps `@anthropic-ai/sdk` behind the framework-free `LLMProvider` contract.
5
+ // The runner no longer talks to Anthropic directly — it goes through this
6
+ // adapter, which keeps every Anthropic-specific quirk (stream event shape,
7
+ // the 4.5+ temperature deprecation, the `ToolResultBlockParam` content
8
+ // envelope) confined to one file.
9
+ //
10
+ // Behavioural parity with the pre-refactor runner is the goal: same streaming
11
+ // granularity, same model-router triggers, same temperature-rejection
12
+ // heuristic, same `(tool completed with no output)` sentinel. Anything else
13
+ // would be a silent behaviour change for every existing AgentForge install.
14
+ var __importDefault = (this && this.__importDefault) || function (mod) {
15
+ return (mod && mod.__esModule) ? mod : { "default": mod };
16
+ };
17
+ Object.defineProperty(exports, "__esModule", { value: true });
18
+ exports.AnthropicProvider = void 0;
19
+ exports.modelRejectsTemperature = modelRejectsTemperature;
20
+ const sdk_1 = __importDefault(require("@anthropic-ai/sdk"));
21
+ /**
22
+ * Anthropic's newer model families deprecated the `temperature` parameter
23
+ * entirely — they auto-tune sampling internally and return 400
24
+ * `invalid_request_error: \`temperature\` is deprecated for this model` if
25
+ * the caller still sends one. Older families (3.x, the original 4.0
26
+ * releases) accept it fine.
27
+ *
28
+ * Detection by string match on the model id rather than a hard-coded
29
+ * allowlist: new model ids land between SDK releases, and we don't want
30
+ * to break temperature on legacy agents the day a new family ships.
31
+ */
32
+ function modelRejectsTemperature(model) {
33
+ if (!model)
34
+ return false;
35
+ // Ignore vendor prefixes like "anthropic/claude-..." and bracket
36
+ // suffixes like "claude-opus-4-7[1m]" (long-context variant).
37
+ const m = model.toLowerCase().replace(/\[[^\]]*\]/g, '');
38
+ if (/claude-[a-z]+-4-([5-9])\b/.test(m))
39
+ return true;
40
+ if (/claude-[a-z]+-([5-9])-/.test(m))
41
+ return true;
42
+ return false;
43
+ }
44
+ class AnthropicProvider {
45
+ constructor(opts) {
46
+ this.id = 'anthropic';
47
+ this.displayName = 'Anthropic';
48
+ this.capabilities = {
49
+ supportsTools: true,
50
+ supportsStreaming: true,
51
+ // Per-call gating still lives in `stream()` because the heuristic is
52
+ // model-specific (Claude 4.0 vs 4.5+) — at the provider level we
53
+ // declare "we know how to handle temperature when supplied" and let
54
+ // the stream method decide on a per-turn basis.
55
+ supportsTemperature: true,
56
+ supportsParallelTools: true,
57
+ };
58
+ this.client = new sdk_1.default({
59
+ apiKey: opts.apiKey,
60
+ baseURL: opts.baseURL,
61
+ });
62
+ }
63
+ async *stream(params) {
64
+ const includeTemperature = typeof params.temperature === 'number' &&
65
+ !modelRejectsTemperature(params.model);
66
+ const stream = this.client.messages.stream({
67
+ model: params.model,
68
+ max_tokens: params.maxTokens,
69
+ ...(includeTemperature ? { temperature: params.temperature } : {}),
70
+ system: params.systemPrompt,
71
+ messages: toAnthropicMessages(params.messages),
72
+ tools: params.tools,
73
+ });
74
+ // Mid-stream events — text deltas land here; tool_use blocks are
75
+ // recognised at `content_block_start` so the runner can yield a
76
+ // `tool_use_start` chunk to its SSE consumer immediately. The full
77
+ // parsed input only lands once `finalMessage()` resolves; the runner
78
+ // doesn't need it mid-stream so we just forward the name+id.
79
+ let usageInput = 0;
80
+ let usageOutput = 0;
81
+ let usageCacheCreate = 0;
82
+ let usageCacheRead = 0;
83
+ for await (const event of stream) {
84
+ if (event.type === 'content_block_start') {
85
+ if (event.content_block.type === 'tool_use') {
86
+ yield {
87
+ type: 'tool_use_start',
88
+ toolName: event.content_block.name,
89
+ toolUseId: event.content_block.id,
90
+ // Input arrives as JSON deltas; we don't have it yet at
91
+ // `content_block_start`. Runner reads the parsed input
92
+ // from `message_stop.content` below.
93
+ input: {},
94
+ };
95
+ }
96
+ }
97
+ else if (event.type === 'content_block_delta') {
98
+ if (event.delta.type === 'text_delta') {
99
+ yield { type: 'text_delta', delta: event.delta.text };
100
+ }
101
+ }
102
+ else if (event.type === 'message_delta') {
103
+ const deltaUsage = event.usage;
104
+ if (deltaUsage) {
105
+ usageInput += deltaUsage.input_tokens ?? 0;
106
+ usageOutput += deltaUsage.output_tokens ?? 0;
107
+ usageCacheCreate += deltaUsage.cache_creation_input_tokens ?? 0;
108
+ usageCacheRead += deltaUsage.cache_read_input_tokens ?? 0;
109
+ }
110
+ }
111
+ }
112
+ const finalMessage = await stream.finalMessage();
113
+ // Emit a single usage_delta with the total — keeps the contract simple
114
+ // (no caller needs to know whether the wire format buffered usage).
115
+ yield {
116
+ type: 'usage_delta',
117
+ usage: {
118
+ inputTokens: usageInput || finalMessage.usage?.input_tokens || 0,
119
+ outputTokens: usageOutput || finalMessage.usage?.output_tokens || 0,
120
+ totalTokens: (usageInput || finalMessage.usage?.input_tokens || 0) +
121
+ (usageOutput || finalMessage.usage?.output_tokens || 0),
122
+ cacheCreationInputTokens: usageCacheCreate || undefined,
123
+ cacheReadInputTokens: usageCacheRead || undefined,
124
+ },
125
+ };
126
+ yield {
127
+ type: 'message_stop',
128
+ stopReason: normalizeStopReason(finalMessage.stop_reason),
129
+ content: fromAnthropicContent(finalMessage.content),
130
+ };
131
+ }
132
+ }
133
+ exports.AnthropicProvider = AnthropicProvider;
134
+ // ─── Translation helpers ────────────────────────────────────────────────────
135
+ /**
136
+ * Translate the SDK's provider-agnostic `LLMMessage[]` into Anthropic's
137
+ * native `MessageParam[]`. The shapes are deliberately close — tool_use /
138
+ * tool_result blocks already mirror Anthropic's content blocks 1:1 — so
139
+ * the mapping is mechanical.
140
+ */
141
+ function toAnthropicMessages(messages) {
142
+ return messages.map((m) => {
143
+ if (typeof m.content === 'string') {
144
+ return { role: m.role, content: m.content };
145
+ }
146
+ return {
147
+ role: m.role,
148
+ content: m.content.map((block) => {
149
+ if (block.type === 'text') {
150
+ return { type: 'text', text: block.text };
151
+ }
152
+ if (block.type === 'tool_use') {
153
+ return {
154
+ type: 'tool_use',
155
+ id: block.id,
156
+ name: block.name,
157
+ input: block.input,
158
+ };
159
+ }
160
+ return {
161
+ type: 'tool_result',
162
+ tool_use_id: block.tool_use_id,
163
+ content: block.content,
164
+ is_error: block.is_error,
165
+ };
166
+ }),
167
+ };
168
+ });
169
+ }
170
+ /**
171
+ * Inverse of `toAnthropicMessages` for the final-message envelope. The
172
+ * runner uses this to rebuild `currentMessages` for the next iteration of
173
+ * the tool loop without retaining Anthropic types in its own state.
174
+ */
175
+ function fromAnthropicContent(content) {
176
+ const blocks = [];
177
+ for (const b of content) {
178
+ if (b.type === 'text') {
179
+ blocks.push({ type: 'text', text: b.text });
180
+ }
181
+ else if (b.type === 'tool_use') {
182
+ blocks.push({
183
+ type: 'tool_use',
184
+ id: b.id,
185
+ name: b.name,
186
+ input: b.input,
187
+ });
188
+ }
189
+ // server_tool_use / web_search_tool_result are Anthropic-only
190
+ // built-ins we don't surface today — silently drop them. If we
191
+ // ever expose them, add explicit cases here.
192
+ }
193
+ return blocks;
194
+ }
195
+ function normalizeStopReason(raw) {
196
+ switch (raw) {
197
+ case 'tool_use':
198
+ return 'tool_use';
199
+ case 'max_tokens':
200
+ return 'max_tokens';
201
+ case 'stop_sequence':
202
+ return 'stop_sequence';
203
+ default:
204
+ return 'end_turn';
205
+ }
206
+ }
@@ -0,0 +1,3 @@
1
+ export type { LLMProvider, LLMProviderCapabilities, LLMStreamParams, LLMStreamEvent, LLMMessage, LLMContentBlock, LLMToolSchema, } from './types';
2
+ export { AnthropicProvider, modelRejectsTemperature } from './anthropic-provider';
3
+ export type { AnthropicProviderOptions } from './anthropic-provider';
@@ -0,0 +1,6 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.modelRejectsTemperature = exports.AnthropicProvider = void 0;
4
+ var anthropic_provider_1 = require("./anthropic-provider");
5
+ Object.defineProperty(exports, "AnthropicProvider", { enumerable: true, get: function () { return anthropic_provider_1.AnthropicProvider; } });
6
+ Object.defineProperty(exports, "modelRejectsTemperature", { enumerable: true, get: function () { return anthropic_provider_1.modelRejectsTemperature; } });
@@ -0,0 +1,135 @@
1
+ import type { TokenUsage } from '../types/agent.types';
2
+ export interface LLMProviderCapabilities {
3
+ /** Tool calling support. Old Anthropic 2.x and most fine-tunes don't
4
+ * expose tools; the runner falls back to text-only completions when
5
+ * this is false (no tools attached, no tool_use_start chunks). */
6
+ supportsTools: boolean;
7
+ /** Streaming (token-by-token) support. Required for the chat surface
8
+ * in AgentForge today; a `false` provider can still be used for
9
+ * one-shot helpers (approval copywriter) but not for chat. */
10
+ supportsStreaming: boolean;
11
+ /** Some Anthropic 4.5+ families reject the `temperature` parameter
12
+ * entirely (auto-tune sampling internally). Set false on those so
13
+ * the runner skips the param instead of emitting a 400. */
14
+ supportsTemperature: boolean;
15
+ /** Whether the provider lets the model emit multiple tool_use blocks
16
+ * in a single assistant turn. Anthropic does. OpenAI does. Some
17
+ * smaller models only do one. The runner doesn't *require* this —
18
+ * it loops regardless — but downstream UI can adapt. */
19
+ supportsParallelTools: boolean;
20
+ }
21
+ /**
22
+ * Provider-agnostic chat message. The runner translates the Anthropic-shaped
23
+ * `AnthropicMessage[]` it currently carries into this shape on the way INTO
24
+ * a provider call; the provider translates back into its own native shape
25
+ * just before hitting the wire. This keeps the runner's loop one-size-fits-
26
+ * all without leaking Anthropic types out of the AnthropicProvider.
27
+ */
28
+ export type LLMMessage = {
29
+ role: 'user';
30
+ content: string | LLMContentBlock[];
31
+ } | {
32
+ role: 'assistant';
33
+ content: string | LLMContentBlock[];
34
+ };
35
+ /**
36
+ * Multi-part message content. Mirrors Anthropic's content blocks without
37
+ * inheriting from `@anthropic-ai/sdk`. Providers that only speak plain text
38
+ * (some smaller LangChain models) flatten the array down to a string on
39
+ * their side.
40
+ */
41
+ export type LLMContentBlock = {
42
+ type: 'text';
43
+ text: string;
44
+ } | {
45
+ type: 'tool_use';
46
+ id: string;
47
+ name: string;
48
+ input: Record<string, unknown>;
49
+ } | {
50
+ type: 'tool_result';
51
+ tool_use_id: string;
52
+ content: string;
53
+ is_error?: boolean;
54
+ };
55
+ /**
56
+ * Tool description handed to the provider so the model knows what to call.
57
+ * Same shape as Anthropic's `Tool` type; OpenAI's `function` shape is
58
+ * derived inside the LangChain provider via `convertToOpenAITool`.
59
+ */
60
+ export interface LLMToolSchema {
61
+ name: string;
62
+ description: string;
63
+ input_schema: Record<string, unknown>;
64
+ }
65
+ export interface LLMStreamParams {
66
+ /** Concrete model id ('claude-opus-4-7', 'gpt-4o', 'grok-2-latest'). */
67
+ model: string;
68
+ systemPrompt: string;
69
+ messages: LLMMessage[];
70
+ tools?: LLMToolSchema[];
71
+ maxTokens: number;
72
+ /** Caller-supplied. The provider should drop it silently if
73
+ * `capabilities.supportsTemperature` is false. */
74
+ temperature?: number;
75
+ }
76
+ /**
77
+ * Normalized event the provider yields during `stream()`. The runner consumes
78
+ * these and translates them into `StreamChunk` (the platform-facing shape).
79
+ *
80
+ * `text_delta` → token-by-token assistant text.
81
+ * `tool_use_start` → model decided to call a tool; carries the input
82
+ * the provider parsed out of its native event stream.
83
+ * The runner dispatches the tool and feeds the result
84
+ * back via a follow-up message in the next iteration.
85
+ * `usage_delta` → cumulative token-usage update. Some providers only
86
+ * emit usage at the end (one event); others emit
87
+ * running totals — the runner sums whatever arrives.
88
+ * `message_stop` → end of the assistant turn. Carries the stop reason
89
+ * so the runner knows whether to loop again for tool
90
+ * results or finalize.
91
+ *
92
+ * NOTE: we deliberately don't surface `content_block_start`/`stop` etc. —
93
+ * those are Anthropic-specific transport details. Providers absorb them.
94
+ */
95
+ export type LLMStreamEvent = {
96
+ type: 'text_delta';
97
+ delta: string;
98
+ } | {
99
+ type: 'tool_use_start';
100
+ toolUseId: string;
101
+ toolName: string;
102
+ input: Record<string, unknown>;
103
+ } | {
104
+ type: 'usage_delta';
105
+ usage: Partial<TokenUsage>;
106
+ } | {
107
+ type: 'message_stop';
108
+ stopReason: 'end_turn' | 'tool_use' | 'max_tokens' | 'stop_sequence';
109
+ /** Final assistant content as a single array — used by the runner to
110
+ * rebuild the assistant message for the next loop iteration without
111
+ * re-asking the provider. Mirrors Anthropic's `finalMessage.content`. */
112
+ content: LLMContentBlock[];
113
+ };
114
+ export interface LLMProvider {
115
+ /** Stable id used by the platform's resolver to pick a provider via
116
+ * `af_settings['llm.active_provider']`. Lowercase, hyphenated. */
117
+ readonly id: string;
118
+ /** Human-readable label for admin UIs and telemetry. */
119
+ readonly displayName: string;
120
+ readonly capabilities: LLMProviderCapabilities;
121
+ /**
122
+ * Streaming chat completion. Yields normalized events for one assistant
123
+ * turn. The runner calls `stream()` once per loop iteration — when the
124
+ * stop reason is `tool_use`, it appends tool results to `params.messages`
125
+ * and calls `stream()` again on the next iteration.
126
+ *
127
+ * Implementations MUST:
128
+ * - emit `text_delta` events for assistant text as it arrives
129
+ * - emit `tool_use_start` once the parsed tool input is complete
130
+ * - emit `usage_delta` at least once (final total) when usage is known
131
+ * - emit `message_stop` as the LAST event and only once
132
+ * - throw on transport errors (caller handles fallback)
133
+ */
134
+ stream(params: LLMStreamParams): AsyncGenerator<LLMStreamEvent>;
135
+ }
@@ -0,0 +1,17 @@
1
+ "use strict";
2
+ // ─── LLM Provider abstraction ────────────────────────────────────────────────
3
+ //
4
+ // Goal: every concrete LLM family (Anthropic, OpenAI via LangChain, Gemini,
5
+ // Grok, …) ships behind the same `LLMProvider` interface so the rest of the
6
+ // SDK (runner loop, orchestrator, approval copywriter) is provider-agnostic.
7
+ //
8
+ // The interface is intentionally narrow — it only owns one thing: turn a
9
+ // prompt + tool catalog into a stream of normalized events. The agentic
10
+ // loop (tool dispatch, approval gating, model routing) stays where it is
11
+ // in `AgentRunnerService` because it is identical across providers.
12
+ //
13
+ // Streaming-only on purpose: every modern provider supports streaming and
14
+ // the runner converts streamed chunks into the `StreamChunk` shape the
15
+ // platform's SSE controller already speaks. A non-streaming `run()` would
16
+ // be a second code path with the same loop — pointless.
17
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -3,15 +3,19 @@ import type { AgentDefinition, AnthropicConfig } from '../types/config.types';
3
3
  import type { ToolRegistryService } from './tool-registry.service';
4
4
  import type { Logger } from './tool-registry.service';
5
5
  import { type ToolApprovalGate } from './tool-approval-gate';
6
+ import { type LLMProvider } from '../providers';
6
7
  /**
7
- * Framework-free runner for Claude. Handles the agentic loop (tool calls) for
8
- * sync runs and exposes streaming as an `AsyncGenerator<StreamChunk>` so any
9
- * transport (SSE, fetch+ReadableStream, WebSocket, etc.) can consume it.
8
+ * Framework-free runner. Handles the agentic loop (tool calls, model
9
+ * routing, approval gating) and delegates the LLM call itself to a
10
+ * pluggable `LLMProvider` Anthropic by default, or LangChain-backed
11
+ * (OpenAI/Grok/Gemini/…) when the host wires a different provider in.
12
+ *
13
+ * Streaming is exposed as `AsyncGenerator<StreamChunk>` so any transport
14
+ * (SSE, fetch+ReadableStream, WebSocket, etc.) can consume it.
10
15
  */
11
16
  export declare class AgentRunnerService {
12
- private readonly anthropicConfig;
13
17
  private readonly toolRegistry;
14
- private readonly client;
18
+ private readonly provider;
15
19
  private readonly logger;
16
20
  /**
17
21
  * Optional pre-dispatch gate. When supplied, every tool call passes
@@ -27,7 +31,30 @@ export declare class AgentRunnerService {
27
31
  * story behaviorally identical to the pre-gate codebase.
28
32
  */
29
33
  private readonly approvalGate;
30
- constructor(anthropicConfig: AnthropicConfig, toolRegistry: ToolRegistryService, opts?: {
34
+ /**
35
+ * Default model id surfaced to per-turn `selectModel()` when neither
36
+ * the agent nor the overrides pin one. Kept on the runner (not the
37
+ * provider) because the routing strategy is provider-agnostic — the
38
+ * provider only validates that the resolved model id is one it can
39
+ * serve.
40
+ */
41
+ private readonly defaultModel;
42
+ /** Default `max_tokens` ceiling when the agent / overrides leave it
43
+ * unset. Same rationale as `defaultModel` — provider-agnostic knob. */
44
+ private readonly defaultMaxTokens;
45
+ /**
46
+ * Two-form constructor for backwards compatibility:
47
+ *
48
+ * new AgentRunnerService(anthropicConfig, toolRegistry, opts?)
49
+ * ^ legacy form — wraps `anthropicConfig` in an `AnthropicProvider`
50
+ * so existing callers keep working without changes.
51
+ *
52
+ * new AgentRunnerService({ provider, defaultModel?, defaultMaxTokens? },
53
+ * toolRegistry, opts?)
54
+ * ^ new form — caller supplies any `LLMProvider` (Anthropic,
55
+ * LangChain, …). The provider owns the wire-level call.
56
+ */
57
+ constructor(providerOrLegacyConfig: AnthropicConfig | RunnerProviderConfig, toolRegistry: ToolRegistryService, opts?: {
31
58
  logger?: Logger;
32
59
  approvalGate?: ToolApprovalGate;
33
60
  });
@@ -78,3 +105,14 @@ export declare class AgentRunnerService {
78
105
  private buildToolList;
79
106
  private dispatchTool;
80
107
  }
108
+ /**
109
+ * New-style runner config: a fully-wired `LLMProvider` plus the runner-
110
+ * level defaults (`defaultModel`, `defaultMaxTokens`). The Anthropic-shaped
111
+ * legacy config (`AnthropicConfig`) remains accepted by the runner for
112
+ * back-compat — see the constructor's two-form signature.
113
+ */
114
+ export interface RunnerProviderConfig {
115
+ provider: LLMProvider;
116
+ defaultModel?: string;
117
+ defaultMaxTokens?: number;
118
+ }
@@ -1,213 +1,113 @@
1
1
  "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
2
  Object.defineProperty(exports, "__esModule", { value: true });
6
3
  exports.AgentRunnerService = void 0;
7
- const sdk_1 = __importDefault(require("@anthropic-ai/sdk"));
8
4
  const crypto_1 = require("crypto");
9
5
  const tool_approval_gate_1 = require("./tool-approval-gate");
10
6
  const model_strategy_1 = require("../types/model-strategy");
7
+ const providers_1 = require("../providers");
11
8
  const noopLogger = {
12
9
  log: () => { }, warn: () => { }, debug: () => { }, error: () => { },
13
10
  };
14
11
  /**
15
- * Anthropic's newer model families deprecated the `temperature` parameter
16
- * entirely they auto-tune sampling internally and return 400
17
- * `invalid_request_error: \`temperature\` is deprecated for this model` if
18
- * the caller still sends one. Older families (3.x, the original 4.0
19
- * releases) accept it fine.
20
- *
21
- * Detection by string match on the model id rather than a hard-coded
22
- * allowlist: new model ids land between SDK releases, and we don't want
23
- * to break temperature on legacy agents the day a new family ships.
24
- * Pattern: anything that contains `-4-5`, `-4-6`, `-4-7`, …, `-5-*`,
25
- * `-6-*`, etc. counts as "newer." Old 4-0 / 4-1 / 3-x ids are unaffected.
12
+ * Framework-free runner. Handles the agentic loop (tool calls, model
13
+ * routing, approval gating) and delegates the LLM call itself to a
14
+ * pluggable `LLMProvider` Anthropic by default, or LangChain-backed
15
+ * (OpenAI/Grok/Gemini/…) when the host wires a different provider in.
26
16
  *
27
- * Heuristic, not exhaustive if a future family lands with a different
28
- * naming convention we'll have to extend this. The cost of being wrong
29
- * is a single 400 the operator can fix by clearing the temperature in
30
- * the editor; the cost of NOT filtering is the same 400 today.
31
- */
32
- function modelRejectsTemperature(model) {
33
- if (!model)
34
- return false;
35
- // Normalize: ignore vendor prefixes like "anthropic/claude-..." and
36
- // bracket suffixes like "claude-opus-4-7[1m]" (long-context variant).
37
- const m = model.toLowerCase().replace(/\[[^\]]*\]/g, '');
38
- // claude-*-4-5, 4-6, 4-7, 4-8 …
39
- if (/claude-[a-z]+-4-([5-9])\b/.test(m))
40
- return true;
41
- // claude-*-5-x, claude-*-6-x, … (future major bumps)
42
- if (/claude-[a-z]+-([5-9])-/.test(m))
43
- return true;
44
- return false;
45
- }
46
- /**
47
- * Framework-free runner for Claude. Handles the agentic loop (tool calls) for
48
- * sync runs and exposes streaming as an `AsyncGenerator<StreamChunk>` so any
49
- * transport (SSE, fetch+ReadableStream, WebSocket, etc.) can consume it.
17
+ * Streaming is exposed as `AsyncGenerator<StreamChunk>` so any transport
18
+ * (SSE, fetch+ReadableStream, WebSocket, etc.) can consume it.
50
19
  */
51
20
  class AgentRunnerService {
52
- constructor(anthropicConfig, toolRegistry, opts = {}) {
53
- this.anthropicConfig = anthropicConfig;
21
+ /**
22
+ * Two-form constructor for backwards compatibility:
23
+ *
24
+ * new AgentRunnerService(anthropicConfig, toolRegistry, opts?)
25
+ * ^ legacy form — wraps `anthropicConfig` in an `AnthropicProvider`
26
+ * so existing callers keep working without changes.
27
+ *
28
+ * new AgentRunnerService({ provider, defaultModel?, defaultMaxTokens? },
29
+ * toolRegistry, opts?)
30
+ * ^ new form — caller supplies any `LLMProvider` (Anthropic,
31
+ * LangChain, …). The provider owns the wire-level call.
32
+ */
33
+ constructor(providerOrLegacyConfig, toolRegistry, opts = {}) {
54
34
  this.toolRegistry = toolRegistry;
55
- this.client = new sdk_1.default({
56
- apiKey: anthropicConfig.apiKey,
57
- baseURL: anthropicConfig.baseURL,
58
- });
35
+ if (isRunnerProviderConfig(providerOrLegacyConfig)) {
36
+ this.provider = providerOrLegacyConfig.provider;
37
+ this.defaultModel = providerOrLegacyConfig.defaultModel ?? 'claude-opus-4-6';
38
+ this.defaultMaxTokens = providerOrLegacyConfig.defaultMaxTokens;
39
+ }
40
+ else {
41
+ // Legacy path: build an AnthropicProvider from the inline config so
42
+ // every caller written before the LLMProvider abstraction landed
43
+ // keeps the exact same behaviour.
44
+ this.provider = new providers_1.AnthropicProvider({
45
+ apiKey: providerOrLegacyConfig.apiKey,
46
+ baseURL: providerOrLegacyConfig.baseURL,
47
+ });
48
+ this.defaultModel = providerOrLegacyConfig.defaultModel ?? 'claude-opus-4-6';
49
+ this.defaultMaxTokens = providerOrLegacyConfig.defaultMaxTokens;
50
+ }
59
51
  this.logger = opts.logger ?? noopLogger;
60
52
  this.approvalGate = opts.approvalGate;
61
53
  }
62
54
  // ─── Run (non-streaming) ──────────────────────────────────────────────────
63
55
  async run(agent, messages, context, overrides) {
64
- const messageId = (0, crypto_1.randomUUID)();
65
- const runnerDefault = this.anthropicConfig.defaultModel ?? 'claude-opus-4-6';
66
- const baseModel = overrides?.model ?? agent.model ?? runnerDefault;
67
- const maxTokens = overrides?.maxTokens ?? agent.maxTokens ?? this.anthropicConfig.defaultMaxTokens ?? 4096;
68
- // Anthropic's newer models (Sonnet 4.6+, Haiku 4.5+) reject
69
- // `temperature` when tools are present — they auto-tune sampling for
70
- // tool use. Only forward it when the operator/caller declared one
71
- // explicitly; never inject a default. Old models that required it
72
- // accept its absence too (they fall back to their own internal
73
- // default of 1.0).
74
- const temperature = overrides?.temperature ?? agent.temperature;
75
- const { tools, extras } = this.buildToolList(agent, overrides);
76
- const systemPrompt = this.buildSystemPrompt(agent, tools, overrides);
56
+ // `run()` is implemented on top of `stream()` to avoid two parallel
57
+ // loops drifting (every bug fixed in one path historically had to
58
+ // be ported by hand to the other). Streaming a non-streaming caller
59
+ // costs almost nothing the events accumulate in-memory — and the
60
+ // single-source-of-truth loop is well worth the minor overhead.
77
61
  const toolCalls = [];
78
- let currentMessages = [...messages];
79
- // Pre-compute the signals the model router reads. `hasTools` and
80
- // `hasApprovalTool` are constant across the agentic loop (we
81
- // don't add tools mid-conversation); `estimatedInputTokens`
82
- // starts from a word-count heuristic and gets replaced by the
83
- // real `usage.input_tokens` once we have a response.
84
- const turnSignals = {
85
- hasTools: !!tools && tools.length > 0,
86
- hasApprovalTool: hasApprovalGatedTool(agent),
87
- estimatedInputTokens: estimateInputTokens(systemPrompt, currentMessages),
88
- };
89
- let totalUsage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
90
- let finalContent = '';
62
+ const toolCallStartByUseId = new Map();
63
+ let usage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
64
+ const textParts = [];
91
65
  let stopReason = 'end_turn';
92
- // Last model id the router chose. Surfaced on the response so
93
- // callers (and the conversation/usage logs) record what
94
- // actually ran, not what the agent's `model` field says.
95
- let lastModel = baseModel;
96
- while (true) {
97
- // Per-turn model selection. When overrides force a model we
98
- // honour it (manual `agent.runMessage({ overrides: { model }})`
99
- // beats the strategy). Otherwise the strategy decides; absent
100
- // strategy → behave exactly like before this feature landed.
101
- const selection = overrides?.model
102
- ? { model: overrides.model, reason: 'forced' }
103
- : (0, model_strategy_1.selectModel)(agent.modelStrategy, turnSignals, baseModel);
104
- const model = selection.model;
105
- lastModel = model;
106
- if (this.logger && selection.reason !== 'default' && selection.reason !== 'forced') {
107
- this.logger.debug(`[modelRouter] agent=${agent.id} ${selection.reason}=${selection.trigger} → ${model}`);
66
+ let messageId = (0, crypto_1.randomUUID)();
67
+ const model = overrides?.model ?? agent.model ?? this.defaultModel;
68
+ for await (const chunk of this.stream(agent, messages, context, overrides)) {
69
+ if (chunk.type === 'text_delta') {
70
+ textParts.push(chunk.delta);
108
71
  }
109
- // Per-turn temperature gating. The PER-MODEL filter runs INSIDE
110
- // the loop because `model` can change between turns (model
111
- // strategy can route a long-context turn to a different family
112
- // than the short turns above it). Computing once outside would
113
- // either over-strip (drop temperature for a legacy follow-up
114
- // model) or under-strip (forward it to a new-family upgrade).
115
- const includeTemperature = typeof temperature === 'number' && !modelRejectsTemperature(model);
116
- const response = await this.client.messages.create({
117
- model,
118
- max_tokens: maxTokens,
119
- ...(includeTemperature ? { temperature } : {}),
120
- system: systemPrompt,
121
- messages: currentMessages,
122
- tools: tools,
123
- });
124
- // Update the signal for the NEXT iteration of the loop — the
125
- // tool-result feedback we're about to add can balloon the
126
- // context past the long-context threshold.
127
- turnSignals.estimatedInputTokens = response.usage.input_tokens;
128
- totalUsage = {
129
- inputTokens: totalUsage.inputTokens + response.usage.input_tokens,
130
- outputTokens: totalUsage.outputTokens + response.usage.output_tokens,
131
- totalTokens: totalUsage.totalTokens + response.usage.input_tokens + response.usage.output_tokens,
132
- cacheCreationInputTokens: (totalUsage.cacheCreationInputTokens ?? 0) +
133
- (response.usage.cache_creation_input_tokens ?? 0),
134
- cacheReadInputTokens: (totalUsage.cacheReadInputTokens ?? 0) +
135
- (response.usage.cache_read_input_tokens ?? 0),
136
- };
137
- stopReason = response.stop_reason ?? 'end_turn';
138
- if (response.stop_reason === 'tool_use') {
139
- currentMessages = [...currentMessages, { role: 'assistant', content: response.content }];
140
- const toolResults = [];
141
- for (const block of response.content) {
142
- if (block.type === 'tool_use') {
143
- const start = Date.now();
144
- let output = '';
145
- let error;
146
- try {
147
- output = await this.dispatchTool(block.name, block.input, context, extras);
148
- }
149
- catch (err) {
150
- // Approval-gate signals are NOT tool execution errors —
151
- // they ARE the surface the caller of run() branches on.
152
- // Re-throw so the loop aborts and the consumer (executor /
153
- // conversation service) can persist the pause + surface
154
- // the approvalId. Without this re-throw, the runner would
155
- // feed back a `"Error executing tool …"` to the LLM,
156
- // hiding the pause behind a regular tool failure.
157
- if (err instanceof tool_approval_gate_1.ToolApprovalRequired ||
158
- err instanceof tool_approval_gate_1.ToolBlockedError) {
159
- throw err;
160
- }
161
- error = err instanceof Error ? err.message : String(err);
162
- output = `Error executing tool ${block.name}: ${error}`;
163
- }
164
- toolCalls.push({
165
- toolName: block.name,
166
- toolUseId: block.id,
167
- input: block.input,
168
- output,
169
- error,
170
- durationMs: Date.now() - start,
171
- });
172
- toolResults.push({
173
- type: 'tool_result',
174
- tool_use_id: block.id,
175
- // Sentinel keeps Anthropic happy when a tool produced
176
- // no string output (e.g. a mutation that returned void).
177
- content: output || '(tool completed with no output)',
178
- is_error: !!error,
179
- });
180
- }
181
- }
182
- // Same defensive break as the stream path — if tool_use was
183
- // signalled but we resolved zero tool calls, don't append
184
- // an empty user message.
185
- if (toolResults.length === 0) {
186
- this.logger.warn(`Agent "${agent.id}" reported tool_use but emitted no resolvable tool calls. Closing the turn.`);
187
- finalContent = response.content
188
- .filter((b) => b.type === 'text')
189
- .map((b) => b.text)
190
- .join('');
191
- break;
72
+ else if (chunk.type === 'tool_use_start') {
73
+ toolCallStartByUseId.set(chunk.toolUseId, {
74
+ name: chunk.toolName,
75
+ start: Date.now(),
76
+ });
77
+ }
78
+ else if (chunk.type === 'tool_result') {
79
+ // Match up the result with whichever tool_use_start opened it.
80
+ // We use a FIFO heuristic when the toolUseId isn't tracked
81
+ // (legacy chunk emitters that don't carry it) — for the
82
+ // current SDK they always carry it.
83
+ const pending = Array.from(toolCallStartByUseId.entries()).find(([, v]) => v.name === chunk.toolName);
84
+ if (pending) {
85
+ const [id, { name, start }] = pending;
86
+ toolCalls.push({
87
+ toolName: name,
88
+ toolUseId: id,
89
+ input: {},
90
+ output: chunk.result,
91
+ durationMs: Date.now() - start,
92
+ });
93
+ toolCallStartByUseId.delete(id);
192
94
  }
193
- currentMessages = [...currentMessages, { role: 'user', content: toolResults }];
194
95
  }
195
- else {
196
- finalContent = response.content
197
- .filter((b) => b.type === 'text')
198
- .map((b) => b.text)
199
- .join('');
200
- break;
96
+ else if (chunk.type === 'usage') {
97
+ usage = chunk.usage;
98
+ }
99
+ else if (chunk.type === 'done') {
100
+ messageId = chunk.messageId;
201
101
  }
202
102
  }
203
103
  return {
204
104
  messageId,
205
105
  conversationId: context.conversationId,
206
- content: finalContent,
106
+ content: textParts.join(''),
207
107
  role: 'assistant',
208
108
  toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
209
- usage: totalUsage,
210
- model: lastModel,
109
+ usage,
110
+ model,
211
111
  stopReason,
212
112
  createdAt: new Date(),
213
113
  };
@@ -215,25 +115,29 @@ class AgentRunnerService {
215
115
  // ─── Run (streaming) ──────────────────────────────────────────────────────
216
116
  async *stream(agent, messages, context, overrides) {
217
117
  const messageId = (0, crypto_1.randomUUID)();
218
- const runnerDefault = this.anthropicConfig.defaultModel ?? 'claude-opus-4-6';
219
- const baseModel = overrides?.model ?? agent.model ?? runnerDefault;
220
- const maxTokens = overrides?.maxTokens ?? agent.maxTokens ?? this.anthropicConfig.defaultMaxTokens ?? 4096;
221
- // Anthropic's newer models (Sonnet 4.6+, Haiku 4.5+) reject
222
- // `temperature` when tools are present — they auto-tune sampling for
223
- // tool use. Only forward it when the operator/caller declared one
224
- // explicitly; never inject a default. Old models that required it
225
- // accept its absence too (they fall back to their own internal
226
- // default of 1.0).
118
+ const baseModel = overrides?.model ?? agent.model ?? this.defaultModel;
119
+ const maxTokens = overrides?.maxTokens ??
120
+ agent.maxTokens ??
121
+ this.defaultMaxTokens ??
122
+ 4096;
227
123
  const temperature = overrides?.temperature ?? agent.temperature;
228
124
  const { tools, extras } = this.buildToolList(agent, overrides);
229
125
  const systemPrompt = this.buildSystemPrompt(agent, tools, overrides);
230
- let currentMessages = [...messages];
126
+ // Re-shape Anthropic-typed tool schemas to the provider-agnostic
127
+ // ones. The two shapes are identical today — Anthropic's `Tool`
128
+ // declares `name`, `description`, `input_schema` — so this is a
129
+ // type-level cast for callers feeding the runner from old code.
130
+ const llmTools = tools?.map((t) => ({
131
+ name: t.name,
132
+ description: t.description ?? '',
133
+ input_schema: t.input_schema,
134
+ }));
135
+ let currentMessages = anthropicMessagesToLLM(messages);
231
136
  let totalUsage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
232
- // See `run()` above for the rationale on these signals.
233
137
  const turnSignals = {
234
- hasTools: !!tools && tools.length > 0,
138
+ hasTools: !!llmTools && llmTools.length > 0,
235
139
  hasApprovalTool: hasApprovalGatedTool(agent),
236
- estimatedInputTokens: estimateInputTokens(systemPrompt, currentMessages),
140
+ estimatedInputTokens: estimateInputTokens(systemPrompt, messages),
237
141
  };
238
142
  while (true) {
239
143
  const selection = overrides?.model
@@ -243,93 +147,94 @@ class AgentRunnerService {
243
147
  if (this.logger && selection.reason !== 'default' && selection.reason !== 'forced') {
244
148
  this.logger.debug(`[modelRouter] agent=${agent.id} ${selection.reason}=${selection.trigger} → ${model}`);
245
149
  }
246
- // Per-turn temperature gating see `run()` above for rationale.
247
- const includeTemperature = typeof temperature === 'number' && !modelRejectsTemperature(model);
248
- const stream = this.client.messages.stream({
150
+ // Provider call. The provider decides per-call whether to honour
151
+ // `temperature` based on its own capabilities + the model id
152
+ // (Anthropic 4.5+ rejects it, OpenAI accepts it, …).
153
+ let stopReason = 'end_turn';
154
+ let finalContent = [];
155
+ for await (const event of this.provider.stream({
249
156
  model,
250
- max_tokens: maxTokens,
251
- ...(includeTemperature ? { temperature } : {}),
252
- system: systemPrompt,
157
+ systemPrompt,
253
158
  messages: currentMessages,
254
- tools: tools,
255
- });
256
- for await (const event of stream) {
257
- if (event.type === 'content_block_start') {
258
- if (event.content_block.type === 'tool_use') {
259
- yield {
260
- type: 'tool_use_start',
261
- toolName: event.content_block.name,
262
- toolUseId: event.content_block.id,
263
- };
264
- }
159
+ tools: llmTools,
160
+ maxTokens,
161
+ temperature,
162
+ })) {
163
+ if (event.type === 'text_delta') {
164
+ yield { type: 'text_delta', delta: event.delta };
265
165
  }
266
- else if (event.type === 'content_block_delta') {
267
- if (event.delta.type === 'text_delta') {
268
- yield { type: 'text_delta', delta: event.delta.text };
269
- }
166
+ else if (event.type === 'tool_use_start') {
167
+ yield {
168
+ type: 'tool_use_start',
169
+ toolName: event.toolName,
170
+ toolUseId: event.toolUseId,
171
+ };
270
172
  }
271
- else if (event.type === 'message_delta') {
272
- const deltaUsage = event.usage;
173
+ else if (event.type === 'usage_delta') {
273
174
  totalUsage = {
274
- inputTokens: totalUsage.inputTokens + (deltaUsage?.input_tokens ?? 0),
275
- outputTokens: totalUsage.outputTokens + (deltaUsage?.output_tokens ?? 0),
175
+ inputTokens: totalUsage.inputTokens + (event.usage.inputTokens ?? 0),
176
+ outputTokens: totalUsage.outputTokens + (event.usage.outputTokens ?? 0),
276
177
  totalTokens: totalUsage.totalTokens +
277
- (deltaUsage?.input_tokens ?? 0) +
278
- (deltaUsage?.output_tokens ?? 0),
178
+ (event.usage.inputTokens ?? 0) +
179
+ (event.usage.outputTokens ?? 0),
180
+ cacheCreationInputTokens: (totalUsage.cacheCreationInputTokens ?? 0) +
181
+ (event.usage.cacheCreationInputTokens ?? 0),
182
+ cacheReadInputTokens: (totalUsage.cacheReadInputTokens ?? 0) +
183
+ (event.usage.cacheReadInputTokens ?? 0),
279
184
  };
280
185
  }
186
+ else if (event.type === 'message_stop') {
187
+ stopReason = event.stopReason;
188
+ finalContent = event.content;
189
+ }
281
190
  }
282
- const finalMessage = await stream.finalMessage();
283
191
  // Refresh the input-token signal so the next iteration of the
284
192
  // tool loop has the post-tool-result context length, not the
285
193
  // initial estimate.
286
- if (typeof finalMessage.usage?.input_tokens === 'number') {
287
- turnSignals.estimatedInputTokens = finalMessage.usage.input_tokens;
194
+ if (totalUsage.inputTokens > 0) {
195
+ turnSignals.estimatedInputTokens = totalUsage.inputTokens;
288
196
  }
289
- if (finalMessage.stop_reason === 'tool_use') {
290
- currentMessages = [...currentMessages, { role: 'assistant', content: finalMessage.content }];
197
+ if (stopReason === 'tool_use') {
198
+ // Carry the assistant message (text + tool_use blocks) forward
199
+ // so the next turn sees its own previous tool calls.
200
+ currentMessages = [
201
+ ...currentMessages,
202
+ { role: 'assistant', content: finalContent },
203
+ ];
291
204
  const toolResults = [];
292
- for (const block of finalMessage.content) {
293
- if (block.type === 'tool_use') {
294
- let output = '';
295
- try {
296
- output = await this.dispatchTool(block.name, block.input, context, extras);
297
- }
298
- catch (err) {
299
- // Same rule as the sync path: gate signals propagate, regular
300
- // errors collapse into a `"Error: …"` tool result the LLM
301
- // can react to. The consumer of `stream()` catches the
302
- // ToolApprovalRequired and decides whether to emit a
303
- // structured chunk to the client or just end the stream.
304
- if (err instanceof tool_approval_gate_1.ToolApprovalRequired ||
305
- err instanceof tool_approval_gate_1.ToolBlockedError) {
306
- throw err;
307
- }
308
- output = `Error: ${err instanceof Error ? err.message : String(err)}`;
205
+ for (const block of finalContent) {
206
+ if (block.type !== 'tool_use')
207
+ continue;
208
+ let output = '';
209
+ try {
210
+ output = await this.dispatchTool(block.name, block.input, context, extras);
211
+ }
212
+ catch (err) {
213
+ // Approval-gate signals propagate; regular errors collapse
214
+ // into a tool result the LLM can react to.
215
+ if (err instanceof tool_approval_gate_1.ToolApprovalRequired ||
216
+ err instanceof tool_approval_gate_1.ToolBlockedError) {
217
+ throw err;
309
218
  }
310
- yield { type: 'tool_result', toolName: block.name, result: output };
311
- toolResults.push({
312
- type: 'tool_result',
313
- tool_use_id: block.id,
314
- // Anthropic rejects empty tool_result content (part of
315
- // the "messages.N: user messages must have non-empty
316
- // content" 400). When the tool returned no string,
317
- // substitute a sentinel so the next planning step still
318
- // sees a coherent transcript.
319
- content: output || '(tool completed with no output)',
320
- });
219
+ output = `Error: ${err instanceof Error ? err.message : String(err)}`;
321
220
  }
221
+ yield { type: 'tool_result', toolName: block.name, result: output };
222
+ toolResults.push({
223
+ type: 'tool_result',
224
+ tool_use_id: block.id,
225
+ // Anthropic rejects empty tool_result content. Sentinel
226
+ // keeps every provider happy.
227
+ content: output || '(tool completed with no output)',
228
+ });
322
229
  }
323
- // Defensive: if the model said `tool_use` but emitted zero
324
- // tool_use blocks (or all were filtered for unknown names),
325
- // appending `{role:'user', content:[]}` triggers the same
326
- // Anthropic 400. Break and let whatever text the model
327
- // already produced stand as the final answer.
328
230
  if (toolResults.length === 0) {
329
231
  this.logger.warn(`Agent "${agent.id}" reported tool_use but emitted no resolvable tool calls. Closing the turn.`);
330
232
  break;
331
233
  }
332
- currentMessages = [...currentMessages, { role: 'user', content: toolResults }];
234
+ currentMessages = [
235
+ ...currentMessages,
236
+ { role: 'user', content: toolResults },
237
+ ];
333
238
  }
334
239
  else {
335
240
  break;
@@ -491,3 +396,56 @@ function hasApprovalGatedTool(agent) {
491
396
  }
492
397
  return false;
493
398
  }
399
+ // ─── Message-shape translation ───────────────────────────────────────────────
400
+ /**
401
+ * The runner's public API still accepts `AnthropicMessage[]` (kept for
402
+ * back-compat with every host wired before the provider abstraction
403
+ * landed). Internally the loop talks `LLMMessage[]`, so we translate
404
+ * on entry. The two shapes are deliberately close — text content is a
405
+ * plain string in both, multi-block content carries the same `text /
406
+ * tool_use / tool_result` discriminator — so this is mechanical.
407
+ */
408
+ function anthropicMessagesToLLM(messages) {
409
+ return messages.map((m) => {
410
+ if (typeof m.content === 'string') {
411
+ return { role: m.role, content: m.content };
412
+ }
413
+ return {
414
+ role: m.role,
415
+ content: m.content.map((block) => {
416
+ const b = block;
417
+ if (b.type === 'text') {
418
+ return { type: 'text', text: b.text };
419
+ }
420
+ if (b.type === 'tool_use') {
421
+ return {
422
+ type: 'tool_use',
423
+ id: b.id,
424
+ name: b.name,
425
+ input: (b.input ?? {}),
426
+ };
427
+ }
428
+ if (b.type === 'tool_result') {
429
+ return {
430
+ type: 'tool_result',
431
+ tool_use_id: b.tool_use_id,
432
+ content: typeof b.content === 'string'
433
+ ? b.content
434
+ : JSON.stringify(b.content),
435
+ is_error: b.is_error,
436
+ };
437
+ }
438
+ // Unknown block kinds (image, document, …) — flatten to a
439
+ // text marker so the LLM still sees something. Multimodal
440
+ // input plumbing is a follow-up.
441
+ return { type: 'text', text: `[unsupported:${b.type}]` };
442
+ }),
443
+ };
444
+ });
445
+ }
446
+ function isRunnerProviderConfig(v) {
447
+ return (typeof v === 'object' &&
448
+ v !== null &&
449
+ 'provider' in v &&
450
+ typeof v.provider === 'object');
451
+ }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@agentforge-io/core",
3
- "version": "2.2.4",
4
- "description": "Framework-free AI runtime SDK. Owns: agent loop (Anthropic), conversations, tools, streaming, agent-job queue, SdkHooks. Identity, billing, infra (email/uploads/secrets) live in the host's modules — not here.",
3
+ "version": "2.3.0",
4
+ "description": "Framework-free AI runtime SDK. Owns: agent loop (pluggable LLM provider — Anthropic by default, LangChain-backed providers as drop-ins), conversations, tools, streaming, agent-job queue, SdkHooks. Identity, billing, infra (email/uploads/secrets) live in the host's modules — not here.",
5
5
  "license": "MIT",
6
6
  "main": "dist/index.js",
7
7
  "types": "dist/index.d.ts",