@agentforge-io/core 2.2.4 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/index.d.ts +3 -0
- package/dist/ai/index.js +4 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.js +4 -0
- package/dist/providers/anthropic-provider.d.ts +27 -0
- package/dist/providers/anthropic-provider.js +206 -0
- package/dist/providers/index.d.ts +3 -0
- package/dist/providers/index.js +6 -0
- package/dist/providers/types.d.ts +135 -0
- package/dist/providers/types.js +17 -0
- package/dist/services/agent-runner.service.d.ts +44 -6
- package/dist/services/agent-runner.service.js +216 -258
- package/dist/services/agent.service.js +56 -0
- package/package.json +2 -2
package/dist/ai/index.d.ts
CHANGED
|
@@ -15,3 +15,6 @@ export { PREPARED_STREAM_STORE, type PreparedStreamStore, type PreparedStreamPay
|
|
|
15
15
|
export { InMemoryPreparedStreamStore } from '../services/in-memory-prepared-stream.store';
|
|
16
16
|
export { JOB_QUEUE, type JobQueue, type JobStatus, type JobState, type JobContext, type JobProcessor, type EnqueueOptions, type QueueMetrics, } from '../adapters/job-queue/job-queue.types';
|
|
17
17
|
export { InMemoryJobQueue, type InMemoryJobQueueOptions, } from '../adapters/job-queue/in-memory';
|
|
18
|
+
export type { LLMProvider, LLMProviderCapabilities, LLMStreamParams, LLMStreamEvent, LLMMessage, LLMContentBlock, LLMToolSchema, } from '../providers/types';
|
|
19
|
+
export { AnthropicProvider, modelRejectsTemperature, } from '../providers/anthropic-provider';
|
|
20
|
+
export type { AnthropicProviderOptions } from '../providers/anthropic-provider';
|
package/dist/ai/index.js
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
//
|
|
9
9
|
// Files still co-located physically; this is a logical seam.
|
|
10
10
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
11
|
-
exports.InMemoryJobQueue = exports.JOB_QUEUE = exports.InMemoryPreparedStreamStore = exports.PREPARED_STREAM_STORE = exports.AgentJobWorker = exports.AgentForbiddenError = exports.AgentService = exports.ConversationNotFoundError = exports.ConversationService = exports.PreparedStreamError = exports.PreparedStreamService = exports.OrchestratorService = exports.AgentRunnerService = exports.ToolRegistryService = exports.DEFAULT_SHORT_INPUT_TOKENS = exports.DEFAULT_LONG_CONTEXT_TOKENS = exports.selectModel = exports.CURRENT_USER = exports.AGENT_QUEUE_NAME = exports.AGENT_FORGE_CONFIG = void 0;
|
|
11
|
+
exports.modelRejectsTemperature = exports.AnthropicProvider = exports.InMemoryJobQueue = exports.JOB_QUEUE = exports.InMemoryPreparedStreamStore = exports.PREPARED_STREAM_STORE = exports.AgentJobWorker = exports.AgentForbiddenError = exports.AgentService = exports.ConversationNotFoundError = exports.ConversationService = exports.PreparedStreamError = exports.PreparedStreamService = exports.OrchestratorService = exports.AgentRunnerService = exports.ToolRegistryService = exports.DEFAULT_SHORT_INPUT_TOKENS = exports.DEFAULT_LONG_CONTEXT_TOKENS = exports.selectModel = exports.CURRENT_USER = exports.AGENT_QUEUE_NAME = exports.AGENT_FORGE_CONFIG = void 0;
|
|
12
12
|
// ─── Constants ─────────────────────────────────────────────────────────────
|
|
13
13
|
var constants_1 = require("../constants");
|
|
14
14
|
Object.defineProperty(exports, "AGENT_FORGE_CONFIG", { enumerable: true, get: function () { return constants_1.AGENT_FORGE_CONFIG; } });
|
|
@@ -45,3 +45,6 @@ var job_queue_types_1 = require("../adapters/job-queue/job-queue.types");
|
|
|
45
45
|
Object.defineProperty(exports, "JOB_QUEUE", { enumerable: true, get: function () { return job_queue_types_1.JOB_QUEUE; } });
|
|
46
46
|
var in_memory_1 = require("../adapters/job-queue/in-memory");
|
|
47
47
|
Object.defineProperty(exports, "InMemoryJobQueue", { enumerable: true, get: function () { return in_memory_1.InMemoryJobQueue; } });
|
|
48
|
+
var anthropic_provider_1 = require("../providers/anthropic-provider");
|
|
49
|
+
Object.defineProperty(exports, "AnthropicProvider", { enumerable: true, get: function () { return anthropic_provider_1.AnthropicProvider; } });
|
|
50
|
+
Object.defineProperty(exports, "modelRejectsTemperature", { enumerable: true, get: function () { return anthropic_provider_1.modelRejectsTemperature; } });
|
package/dist/index.d.ts
CHANGED
|
@@ -9,6 +9,7 @@ export { InMemoryRateLimiter } from './adapters/rate-limiter/in-memory';
|
|
|
9
9
|
export { RedisRateLimiter, type RedisLike } from './adapters/rate-limiter/redis';
|
|
10
10
|
export { JOB_QUEUE, type JobQueue, type JobStatus, type JobState, type JobContext, type JobProcessor, type EnqueueOptions, type QueueMetrics, } from './adapters/job-queue/job-queue.types';
|
|
11
11
|
export { InMemoryJobQueue, type InMemoryJobQueueOptions, } from './adapters/job-queue/in-memory';
|
|
12
|
+
export * from './providers';
|
|
12
13
|
export * from './services';
|
|
13
14
|
export type { AgentResolver, AgentRecord, AgentResolveParams, } from './services/agent.service';
|
|
14
15
|
export { toAgentDefinition } from './services/agent.service';
|
package/dist/index.js
CHANGED
|
@@ -52,6 +52,10 @@ var job_queue_types_1 = require("./adapters/job-queue/job-queue.types");
|
|
|
52
52
|
Object.defineProperty(exports, "JOB_QUEUE", { enumerable: true, get: function () { return job_queue_types_1.JOB_QUEUE; } });
|
|
53
53
|
var in_memory_2 = require("./adapters/job-queue/in-memory");
|
|
54
54
|
Object.defineProperty(exports, "InMemoryJobQueue", { enumerable: true, get: function () { return in_memory_2.InMemoryJobQueue; } });
|
|
55
|
+
// ─── LLM providers (framework-free) ─────────────────────────────────────────
|
|
56
|
+
// Provider abstraction lets hosts swap Anthropic for LangChain-backed
|
|
57
|
+
// providers (OpenAI/Grok/Gemini) without modifying the runner.
|
|
58
|
+
__exportStar(require("./providers"), exports);
|
|
55
59
|
// ─── Services (framework-free) ──────────────────────────────────────────────
|
|
56
60
|
__exportStar(require("./services"), exports);
|
|
57
61
|
// `toAgentDefinition` is the adapter from the host's `AgentRecord` shape
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import type { LLMProvider, LLMProviderCapabilities, LLMStreamEvent, LLMStreamParams } from './types';
|
|
2
|
+
/**
|
|
3
|
+
* Anthropic's newer model families deprecated the `temperature` parameter
|
|
4
|
+
* entirely — they auto-tune sampling internally and return 400
|
|
5
|
+
* `invalid_request_error: \`temperature\` is deprecated for this model` if
|
|
6
|
+
* the caller still sends one. Older families (3.x, the original 4.0
|
|
7
|
+
* releases) accept it fine.
|
|
8
|
+
*
|
|
9
|
+
* Detection by string match on the model id rather than a hard-coded
|
|
10
|
+
* allowlist: new model ids land between SDK releases, and we don't want
|
|
11
|
+
* to break temperature on legacy agents the day a new family ships.
|
|
12
|
+
*/
|
|
13
|
+
export declare function modelRejectsTemperature(model: string | undefined): boolean;
|
|
14
|
+
export interface AnthropicProviderOptions {
|
|
15
|
+
apiKey: string;
|
|
16
|
+
/** Optional override for the Anthropic API base URL (proxies, custom
|
|
17
|
+
* gateways). Defaults to the SDK's built-in production endpoint. */
|
|
18
|
+
baseURL?: string;
|
|
19
|
+
}
|
|
20
|
+
export declare class AnthropicProvider implements LLMProvider {
|
|
21
|
+
readonly id = "anthropic";
|
|
22
|
+
readonly displayName = "Anthropic";
|
|
23
|
+
readonly capabilities: LLMProviderCapabilities;
|
|
24
|
+
private readonly client;
|
|
25
|
+
constructor(opts: AnthropicProviderOptions);
|
|
26
|
+
stream(params: LLMStreamParams): AsyncGenerator<LLMStreamEvent>;
|
|
27
|
+
}
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// ─── Anthropic provider ──────────────────────────────────────────────────────
|
|
3
|
+
//
|
|
4
|
+
// Wraps `@anthropic-ai/sdk` behind the framework-free `LLMProvider` contract.
|
|
5
|
+
// The runner no longer talks to Anthropic directly — it goes through this
|
|
6
|
+
// adapter, which keeps every Anthropic-specific quirk (stream event shape,
|
|
7
|
+
// the 4.5+ temperature deprecation, the `ToolResultBlockParam` content
|
|
8
|
+
// envelope) confined to one file.
|
|
9
|
+
//
|
|
10
|
+
// Behavioural parity with the pre-refactor runner is the goal: same streaming
|
|
11
|
+
// granularity, same model-router triggers, same temperature-rejection
|
|
12
|
+
// heuristic, same `(tool completed with no output)` sentinel. Anything else
|
|
13
|
+
// would be a silent behaviour change for every existing AgentForge install.
|
|
14
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
15
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
16
|
+
};
|
|
17
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
18
|
+
exports.AnthropicProvider = void 0;
|
|
19
|
+
exports.modelRejectsTemperature = modelRejectsTemperature;
|
|
20
|
+
const sdk_1 = __importDefault(require("@anthropic-ai/sdk"));
|
|
21
|
+
/**
|
|
22
|
+
* Anthropic's newer model families deprecated the `temperature` parameter
|
|
23
|
+
* entirely — they auto-tune sampling internally and return 400
|
|
24
|
+
* `invalid_request_error: \`temperature\` is deprecated for this model` if
|
|
25
|
+
* the caller still sends one. Older families (3.x, the original 4.0
|
|
26
|
+
* releases) accept it fine.
|
|
27
|
+
*
|
|
28
|
+
* Detection by string match on the model id rather than a hard-coded
|
|
29
|
+
* allowlist: new model ids land between SDK releases, and we don't want
|
|
30
|
+
* to break temperature on legacy agents the day a new family ships.
|
|
31
|
+
*/
|
|
32
|
+
function modelRejectsTemperature(model) {
|
|
33
|
+
if (!model)
|
|
34
|
+
return false;
|
|
35
|
+
// Ignore vendor prefixes like "anthropic/claude-..." and bracket
|
|
36
|
+
// suffixes like "claude-opus-4-7[1m]" (long-context variant).
|
|
37
|
+
const m = model.toLowerCase().replace(/\[[^\]]*\]/g, '');
|
|
38
|
+
if (/claude-[a-z]+-4-([5-9])\b/.test(m))
|
|
39
|
+
return true;
|
|
40
|
+
if (/claude-[a-z]+-([5-9])-/.test(m))
|
|
41
|
+
return true;
|
|
42
|
+
return false;
|
|
43
|
+
}
|
|
44
|
+
class AnthropicProvider {
|
|
45
|
+
constructor(opts) {
|
|
46
|
+
this.id = 'anthropic';
|
|
47
|
+
this.displayName = 'Anthropic';
|
|
48
|
+
this.capabilities = {
|
|
49
|
+
supportsTools: true,
|
|
50
|
+
supportsStreaming: true,
|
|
51
|
+
// Per-call gating still lives in `stream()` because the heuristic is
|
|
52
|
+
// model-specific (Claude 4.0 vs 4.5+) — at the provider level we
|
|
53
|
+
// declare "we know how to handle temperature when supplied" and let
|
|
54
|
+
// the stream method decide on a per-turn basis.
|
|
55
|
+
supportsTemperature: true,
|
|
56
|
+
supportsParallelTools: true,
|
|
57
|
+
};
|
|
58
|
+
this.client = new sdk_1.default({
|
|
59
|
+
apiKey: opts.apiKey,
|
|
60
|
+
baseURL: opts.baseURL,
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
async *stream(params) {
|
|
64
|
+
const includeTemperature = typeof params.temperature === 'number' &&
|
|
65
|
+
!modelRejectsTemperature(params.model);
|
|
66
|
+
const stream = this.client.messages.stream({
|
|
67
|
+
model: params.model,
|
|
68
|
+
max_tokens: params.maxTokens,
|
|
69
|
+
...(includeTemperature ? { temperature: params.temperature } : {}),
|
|
70
|
+
system: params.systemPrompt,
|
|
71
|
+
messages: toAnthropicMessages(params.messages),
|
|
72
|
+
tools: params.tools,
|
|
73
|
+
});
|
|
74
|
+
// Mid-stream events — text deltas land here; tool_use blocks are
|
|
75
|
+
// recognised at `content_block_start` so the runner can yield a
|
|
76
|
+
// `tool_use_start` chunk to its SSE consumer immediately. The full
|
|
77
|
+
// parsed input only lands once `finalMessage()` resolves; the runner
|
|
78
|
+
// doesn't need it mid-stream so we just forward the name+id.
|
|
79
|
+
let usageInput = 0;
|
|
80
|
+
let usageOutput = 0;
|
|
81
|
+
let usageCacheCreate = 0;
|
|
82
|
+
let usageCacheRead = 0;
|
|
83
|
+
for await (const event of stream) {
|
|
84
|
+
if (event.type === 'content_block_start') {
|
|
85
|
+
if (event.content_block.type === 'tool_use') {
|
|
86
|
+
yield {
|
|
87
|
+
type: 'tool_use_start',
|
|
88
|
+
toolName: event.content_block.name,
|
|
89
|
+
toolUseId: event.content_block.id,
|
|
90
|
+
// Input arrives as JSON deltas; we don't have it yet at
|
|
91
|
+
// `content_block_start`. Runner reads the parsed input
|
|
92
|
+
// from `message_stop.content` below.
|
|
93
|
+
input: {},
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
else if (event.type === 'content_block_delta') {
|
|
98
|
+
if (event.delta.type === 'text_delta') {
|
|
99
|
+
yield { type: 'text_delta', delta: event.delta.text };
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
else if (event.type === 'message_delta') {
|
|
103
|
+
const deltaUsage = event.usage;
|
|
104
|
+
if (deltaUsage) {
|
|
105
|
+
usageInput += deltaUsage.input_tokens ?? 0;
|
|
106
|
+
usageOutput += deltaUsage.output_tokens ?? 0;
|
|
107
|
+
usageCacheCreate += deltaUsage.cache_creation_input_tokens ?? 0;
|
|
108
|
+
usageCacheRead += deltaUsage.cache_read_input_tokens ?? 0;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
const finalMessage = await stream.finalMessage();
|
|
113
|
+
// Emit a single usage_delta with the total — keeps the contract simple
|
|
114
|
+
// (no caller needs to know whether the wire format buffered usage).
|
|
115
|
+
yield {
|
|
116
|
+
type: 'usage_delta',
|
|
117
|
+
usage: {
|
|
118
|
+
inputTokens: usageInput || finalMessage.usage?.input_tokens || 0,
|
|
119
|
+
outputTokens: usageOutput || finalMessage.usage?.output_tokens || 0,
|
|
120
|
+
totalTokens: (usageInput || finalMessage.usage?.input_tokens || 0) +
|
|
121
|
+
(usageOutput || finalMessage.usage?.output_tokens || 0),
|
|
122
|
+
cacheCreationInputTokens: usageCacheCreate || undefined,
|
|
123
|
+
cacheReadInputTokens: usageCacheRead || undefined,
|
|
124
|
+
},
|
|
125
|
+
};
|
|
126
|
+
yield {
|
|
127
|
+
type: 'message_stop',
|
|
128
|
+
stopReason: normalizeStopReason(finalMessage.stop_reason),
|
|
129
|
+
content: fromAnthropicContent(finalMessage.content),
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
exports.AnthropicProvider = AnthropicProvider;
|
|
134
|
+
// ─── Translation helpers ────────────────────────────────────────────────────
|
|
135
|
+
/**
|
|
136
|
+
* Translate the SDK's provider-agnostic `LLMMessage[]` into Anthropic's
|
|
137
|
+
* native `MessageParam[]`. The shapes are deliberately close — tool_use /
|
|
138
|
+
* tool_result blocks already mirror Anthropic's content blocks 1:1 — so
|
|
139
|
+
* the mapping is mechanical.
|
|
140
|
+
*/
|
|
141
|
+
function toAnthropicMessages(messages) {
|
|
142
|
+
return messages.map((m) => {
|
|
143
|
+
if (typeof m.content === 'string') {
|
|
144
|
+
return { role: m.role, content: m.content };
|
|
145
|
+
}
|
|
146
|
+
return {
|
|
147
|
+
role: m.role,
|
|
148
|
+
content: m.content.map((block) => {
|
|
149
|
+
if (block.type === 'text') {
|
|
150
|
+
return { type: 'text', text: block.text };
|
|
151
|
+
}
|
|
152
|
+
if (block.type === 'tool_use') {
|
|
153
|
+
return {
|
|
154
|
+
type: 'tool_use',
|
|
155
|
+
id: block.id,
|
|
156
|
+
name: block.name,
|
|
157
|
+
input: block.input,
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
return {
|
|
161
|
+
type: 'tool_result',
|
|
162
|
+
tool_use_id: block.tool_use_id,
|
|
163
|
+
content: block.content,
|
|
164
|
+
is_error: block.is_error,
|
|
165
|
+
};
|
|
166
|
+
}),
|
|
167
|
+
};
|
|
168
|
+
});
|
|
169
|
+
}
|
|
170
|
+
/**
|
|
171
|
+
* Inverse of `toAnthropicMessages` for the final-message envelope. The
|
|
172
|
+
* runner uses this to rebuild `currentMessages` for the next iteration of
|
|
173
|
+
* the tool loop without retaining Anthropic types in its own state.
|
|
174
|
+
*/
|
|
175
|
+
function fromAnthropicContent(content) {
|
|
176
|
+
const blocks = [];
|
|
177
|
+
for (const b of content) {
|
|
178
|
+
if (b.type === 'text') {
|
|
179
|
+
blocks.push({ type: 'text', text: b.text });
|
|
180
|
+
}
|
|
181
|
+
else if (b.type === 'tool_use') {
|
|
182
|
+
blocks.push({
|
|
183
|
+
type: 'tool_use',
|
|
184
|
+
id: b.id,
|
|
185
|
+
name: b.name,
|
|
186
|
+
input: b.input,
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
// server_tool_use / web_search_tool_result are Anthropic-only
|
|
190
|
+
// built-ins we don't surface today — silently drop them. If we
|
|
191
|
+
// ever expose them, add explicit cases here.
|
|
192
|
+
}
|
|
193
|
+
return blocks;
|
|
194
|
+
}
|
|
195
|
+
function normalizeStopReason(raw) {
|
|
196
|
+
switch (raw) {
|
|
197
|
+
case 'tool_use':
|
|
198
|
+
return 'tool_use';
|
|
199
|
+
case 'max_tokens':
|
|
200
|
+
return 'max_tokens';
|
|
201
|
+
case 'stop_sequence':
|
|
202
|
+
return 'stop_sequence';
|
|
203
|
+
default:
|
|
204
|
+
return 'end_turn';
|
|
205
|
+
}
|
|
206
|
+
}
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
export type { LLMProvider, LLMProviderCapabilities, LLMStreamParams, LLMStreamEvent, LLMMessage, LLMContentBlock, LLMToolSchema, } from './types';
|
|
2
|
+
export { AnthropicProvider, modelRejectsTemperature } from './anthropic-provider';
|
|
3
|
+
export type { AnthropicProviderOptions } from './anthropic-provider';
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.modelRejectsTemperature = exports.AnthropicProvider = void 0;
|
|
4
|
+
var anthropic_provider_1 = require("./anthropic-provider");
|
|
5
|
+
Object.defineProperty(exports, "AnthropicProvider", { enumerable: true, get: function () { return anthropic_provider_1.AnthropicProvider; } });
|
|
6
|
+
Object.defineProperty(exports, "modelRejectsTemperature", { enumerable: true, get: function () { return anthropic_provider_1.modelRejectsTemperature; } });
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import type { TokenUsage } from '../types/agent.types';
|
|
2
|
+
export interface LLMProviderCapabilities {
|
|
3
|
+
/** Tool calling support. Old Anthropic 2.x and most fine-tunes don't
|
|
4
|
+
* expose tools; the runner falls back to text-only completions when
|
|
5
|
+
* this is false (no tools attached, no tool_use_start chunks). */
|
|
6
|
+
supportsTools: boolean;
|
|
7
|
+
/** Streaming (token-by-token) support. Required for the chat surface
|
|
8
|
+
* in AgentForge today; a `false` provider can still be used for
|
|
9
|
+
* one-shot helpers (approval copywriter) but not for chat. */
|
|
10
|
+
supportsStreaming: boolean;
|
|
11
|
+
/** Some Anthropic 4.5+ families reject the `temperature` parameter
|
|
12
|
+
* entirely (auto-tune sampling internally). Set false on those so
|
|
13
|
+
* the runner skips the param instead of emitting a 400. */
|
|
14
|
+
supportsTemperature: boolean;
|
|
15
|
+
/** Whether the provider lets the model emit multiple tool_use blocks
|
|
16
|
+
* in a single assistant turn. Anthropic does. OpenAI does. Some
|
|
17
|
+
* smaller models only do one. The runner doesn't *require* this —
|
|
18
|
+
* it loops regardless — but downstream UI can adapt. */
|
|
19
|
+
supportsParallelTools: boolean;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Provider-agnostic chat message. The runner translates the Anthropic-shaped
|
|
23
|
+
* `AnthropicMessage[]` it currently carries into this shape on the way INTO
|
|
24
|
+
* a provider call; the provider translates back into its own native shape
|
|
25
|
+
* just before hitting the wire. This keeps the runner's loop one-size-fits-
|
|
26
|
+
* all without leaking Anthropic types out of the AnthropicProvider.
|
|
27
|
+
*/
|
|
28
|
+
export type LLMMessage = {
|
|
29
|
+
role: 'user';
|
|
30
|
+
content: string | LLMContentBlock[];
|
|
31
|
+
} | {
|
|
32
|
+
role: 'assistant';
|
|
33
|
+
content: string | LLMContentBlock[];
|
|
34
|
+
};
|
|
35
|
+
/**
|
|
36
|
+
* Multi-part message content. Mirrors Anthropic's content blocks without
|
|
37
|
+
* inheriting from `@anthropic-ai/sdk`. Providers that only speak plain text
|
|
38
|
+
* (some smaller LangChain models) flatten the array down to a string on
|
|
39
|
+
* their side.
|
|
40
|
+
*/
|
|
41
|
+
export type LLMContentBlock = {
|
|
42
|
+
type: 'text';
|
|
43
|
+
text: string;
|
|
44
|
+
} | {
|
|
45
|
+
type: 'tool_use';
|
|
46
|
+
id: string;
|
|
47
|
+
name: string;
|
|
48
|
+
input: Record<string, unknown>;
|
|
49
|
+
} | {
|
|
50
|
+
type: 'tool_result';
|
|
51
|
+
tool_use_id: string;
|
|
52
|
+
content: string;
|
|
53
|
+
is_error?: boolean;
|
|
54
|
+
};
|
|
55
|
+
/**
|
|
56
|
+
* Tool description handed to the provider so the model knows what to call.
|
|
57
|
+
* Same shape as Anthropic's `Tool` type; OpenAI's `function` shape is
|
|
58
|
+
* derived inside the LangChain provider via `convertToOpenAITool`.
|
|
59
|
+
*/
|
|
60
|
+
export interface LLMToolSchema {
|
|
61
|
+
name: string;
|
|
62
|
+
description: string;
|
|
63
|
+
input_schema: Record<string, unknown>;
|
|
64
|
+
}
|
|
65
|
+
export interface LLMStreamParams {
|
|
66
|
+
/** Concrete model id ('claude-opus-4-7', 'gpt-4o', 'grok-2-latest'). */
|
|
67
|
+
model: string;
|
|
68
|
+
systemPrompt: string;
|
|
69
|
+
messages: LLMMessage[];
|
|
70
|
+
tools?: LLMToolSchema[];
|
|
71
|
+
maxTokens: number;
|
|
72
|
+
/** Caller-supplied. The provider should drop it silently if
|
|
73
|
+
* `capabilities.supportsTemperature` is false. */
|
|
74
|
+
temperature?: number;
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Normalized event the provider yields during `stream()`. The runner consumes
|
|
78
|
+
* these and translates them into `StreamChunk` (the platform-facing shape).
|
|
79
|
+
*
|
|
80
|
+
* `text_delta` → token-by-token assistant text.
|
|
81
|
+
* `tool_use_start` → model decided to call a tool; carries the input
|
|
82
|
+
* the provider parsed out of its native event stream.
|
|
83
|
+
* The runner dispatches the tool and feeds the result
|
|
84
|
+
* back via a follow-up message in the next iteration.
|
|
85
|
+
* `usage_delta` → cumulative token-usage update. Some providers only
|
|
86
|
+
* emit usage at the end (one event); others emit
|
|
87
|
+
* running totals — the runner sums whatever arrives.
|
|
88
|
+
* `message_stop` → end of the assistant turn. Carries the stop reason
|
|
89
|
+
* so the runner knows whether to loop again for tool
|
|
90
|
+
* results or finalize.
|
|
91
|
+
*
|
|
92
|
+
* NOTE: we deliberately don't surface `content_block_start`/`stop` etc. —
|
|
93
|
+
* those are Anthropic-specific transport details. Providers absorb them.
|
|
94
|
+
*/
|
|
95
|
+
export type LLMStreamEvent = {
|
|
96
|
+
type: 'text_delta';
|
|
97
|
+
delta: string;
|
|
98
|
+
} | {
|
|
99
|
+
type: 'tool_use_start';
|
|
100
|
+
toolUseId: string;
|
|
101
|
+
toolName: string;
|
|
102
|
+
input: Record<string, unknown>;
|
|
103
|
+
} | {
|
|
104
|
+
type: 'usage_delta';
|
|
105
|
+
usage: Partial<TokenUsage>;
|
|
106
|
+
} | {
|
|
107
|
+
type: 'message_stop';
|
|
108
|
+
stopReason: 'end_turn' | 'tool_use' | 'max_tokens' | 'stop_sequence';
|
|
109
|
+
/** Final assistant content as a single array — used by the runner to
|
|
110
|
+
* rebuild the assistant message for the next loop iteration without
|
|
111
|
+
* re-asking the provider. Mirrors Anthropic's `finalMessage.content`. */
|
|
112
|
+
content: LLMContentBlock[];
|
|
113
|
+
};
|
|
114
|
+
export interface LLMProvider {
|
|
115
|
+
/** Stable id used by the platform's resolver to pick a provider via
|
|
116
|
+
* `af_settings['llm.active_provider']`. Lowercase, hyphenated. */
|
|
117
|
+
readonly id: string;
|
|
118
|
+
/** Human-readable label for admin UIs and telemetry. */
|
|
119
|
+
readonly displayName: string;
|
|
120
|
+
readonly capabilities: LLMProviderCapabilities;
|
|
121
|
+
/**
|
|
122
|
+
* Streaming chat completion. Yields normalized events for one assistant
|
|
123
|
+
* turn. The runner calls `stream()` once per loop iteration — when the
|
|
124
|
+
* stop reason is `tool_use`, it appends tool results to `params.messages`
|
|
125
|
+
* and calls `stream()` again on the next iteration.
|
|
126
|
+
*
|
|
127
|
+
* Implementations MUST:
|
|
128
|
+
* - emit `text_delta` events for assistant text as it arrives
|
|
129
|
+
* - emit `tool_use_start` once the parsed tool input is complete
|
|
130
|
+
* - emit `usage_delta` at least once (final total) when usage is known
|
|
131
|
+
* - emit `message_stop` as the LAST event and only once
|
|
132
|
+
* - throw on transport errors (caller handles fallback)
|
|
133
|
+
*/
|
|
134
|
+
stream(params: LLMStreamParams): AsyncGenerator<LLMStreamEvent>;
|
|
135
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// ─── LLM Provider abstraction ────────────────────────────────────────────────
|
|
3
|
+
//
|
|
4
|
+
// Goal: every concrete LLM family (Anthropic, OpenAI via LangChain, Gemini,
|
|
5
|
+
// Grok, …) ships behind the same `LLMProvider` interface so the rest of the
|
|
6
|
+
// SDK (runner loop, orchestrator, approval copywriter) is provider-agnostic.
|
|
7
|
+
//
|
|
8
|
+
// The interface is intentionally narrow — it only owns one thing: turn a
|
|
9
|
+
// prompt + tool catalog into a stream of normalized events. The agentic
|
|
10
|
+
// loop (tool dispatch, approval gating, model routing) stays where it is
|
|
11
|
+
// in `AgentRunnerService` because it is identical across providers.
|
|
12
|
+
//
|
|
13
|
+
// Streaming-only on purpose: every modern provider supports streaming and
|
|
14
|
+
// the runner converts streamed chunks into the `StreamChunk` shape the
|
|
15
|
+
// platform's SSE controller already speaks. A non-streaming `run()` would
|
|
16
|
+
// be a second code path with the same loop — pointless.
|
|
17
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
@@ -3,15 +3,19 @@ import type { AgentDefinition, AnthropicConfig } from '../types/config.types';
|
|
|
3
3
|
import type { ToolRegistryService } from './tool-registry.service';
|
|
4
4
|
import type { Logger } from './tool-registry.service';
|
|
5
5
|
import { type ToolApprovalGate } from './tool-approval-gate';
|
|
6
|
+
import { type LLMProvider } from '../providers';
|
|
6
7
|
/**
|
|
7
|
-
* Framework-free runner
|
|
8
|
-
*
|
|
9
|
-
*
|
|
8
|
+
* Framework-free runner. Handles the agentic loop (tool calls, model
|
|
9
|
+
* routing, approval gating) and delegates the LLM call itself to a
|
|
10
|
+
* pluggable `LLMProvider` — Anthropic by default, or LangChain-backed
|
|
11
|
+
* (OpenAI/Grok/Gemini/…) when the host wires a different provider in.
|
|
12
|
+
*
|
|
13
|
+
* Streaming is exposed as `AsyncGenerator<StreamChunk>` so any transport
|
|
14
|
+
* (SSE, fetch+ReadableStream, WebSocket, etc.) can consume it.
|
|
10
15
|
*/
|
|
11
16
|
export declare class AgentRunnerService {
|
|
12
|
-
private readonly anthropicConfig;
|
|
13
17
|
private readonly toolRegistry;
|
|
14
|
-
private readonly
|
|
18
|
+
private readonly provider;
|
|
15
19
|
private readonly logger;
|
|
16
20
|
/**
|
|
17
21
|
* Optional pre-dispatch gate. When supplied, every tool call passes
|
|
@@ -27,7 +31,30 @@ export declare class AgentRunnerService {
|
|
|
27
31
|
* story behaviorally identical to the pre-gate codebase.
|
|
28
32
|
*/
|
|
29
33
|
private readonly approvalGate;
|
|
30
|
-
|
|
34
|
+
/**
|
|
35
|
+
* Default model id surfaced to per-turn `selectModel()` when neither
|
|
36
|
+
* the agent nor the overrides pin one. Kept on the runner (not the
|
|
37
|
+
* provider) because the routing strategy is provider-agnostic — the
|
|
38
|
+
* provider only validates that the resolved model id is one it can
|
|
39
|
+
* serve.
|
|
40
|
+
*/
|
|
41
|
+
private readonly defaultModel;
|
|
42
|
+
/** Default `max_tokens` ceiling when the agent / overrides leave it
|
|
43
|
+
* unset. Same rationale as `defaultModel` — provider-agnostic knob. */
|
|
44
|
+
private readonly defaultMaxTokens;
|
|
45
|
+
/**
|
|
46
|
+
* Two-form constructor for backwards compatibility:
|
|
47
|
+
*
|
|
48
|
+
* new AgentRunnerService(anthropicConfig, toolRegistry, opts?)
|
|
49
|
+
* ^ legacy form — wraps `anthropicConfig` in an `AnthropicProvider`
|
|
50
|
+
* so existing callers keep working without changes.
|
|
51
|
+
*
|
|
52
|
+
* new AgentRunnerService({ provider, defaultModel?, defaultMaxTokens? },
|
|
53
|
+
* toolRegistry, opts?)
|
|
54
|
+
* ^ new form — caller supplies any `LLMProvider` (Anthropic,
|
|
55
|
+
* LangChain, …). The provider owns the wire-level call.
|
|
56
|
+
*/
|
|
57
|
+
constructor(providerOrLegacyConfig: AnthropicConfig | RunnerProviderConfig, toolRegistry: ToolRegistryService, opts?: {
|
|
31
58
|
logger?: Logger;
|
|
32
59
|
approvalGate?: ToolApprovalGate;
|
|
33
60
|
});
|
|
@@ -78,3 +105,14 @@ export declare class AgentRunnerService {
|
|
|
78
105
|
private buildToolList;
|
|
79
106
|
private dispatchTool;
|
|
80
107
|
}
|
|
108
|
+
/**
|
|
109
|
+
* New-style runner config: a fully-wired `LLMProvider` plus the runner-
|
|
110
|
+
* level defaults (`defaultModel`, `defaultMaxTokens`). The Anthropic-shaped
|
|
111
|
+
* legacy config (`AnthropicConfig`) remains accepted by the runner for
|
|
112
|
+
* back-compat — see the constructor's two-form signature.
|
|
113
|
+
*/
|
|
114
|
+
export interface RunnerProviderConfig {
|
|
115
|
+
provider: LLMProvider;
|
|
116
|
+
defaultModel?: string;
|
|
117
|
+
defaultMaxTokens?: number;
|
|
118
|
+
}
|
|
@@ -1,213 +1,113 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
3
|
exports.AgentRunnerService = void 0;
|
|
7
|
-
const sdk_1 = __importDefault(require("@anthropic-ai/sdk"));
|
|
8
4
|
const crypto_1 = require("crypto");
|
|
9
5
|
const tool_approval_gate_1 = require("./tool-approval-gate");
|
|
10
6
|
const model_strategy_1 = require("../types/model-strategy");
|
|
7
|
+
const providers_1 = require("../providers");
|
|
11
8
|
const noopLogger = {
|
|
12
9
|
log: () => { }, warn: () => { }, debug: () => { }, error: () => { },
|
|
13
10
|
};
|
|
14
11
|
/**
|
|
15
|
-
*
|
|
16
|
-
*
|
|
17
|
-
* `
|
|
18
|
-
*
|
|
19
|
-
* releases) accept it fine.
|
|
20
|
-
*
|
|
21
|
-
* Detection by string match on the model id rather than a hard-coded
|
|
22
|
-
* allowlist: new model ids land between SDK releases, and we don't want
|
|
23
|
-
* to break temperature on legacy agents the day a new family ships.
|
|
24
|
-
* Pattern: anything that contains `-4-5`, `-4-6`, `-4-7`, …, `-5-*`,
|
|
25
|
-
* `-6-*`, etc. counts as "newer." Old 4-0 / 4-1 / 3-x ids are unaffected.
|
|
12
|
+
* Framework-free runner. Handles the agentic loop (tool calls, model
|
|
13
|
+
* routing, approval gating) and delegates the LLM call itself to a
|
|
14
|
+
* pluggable `LLMProvider` — Anthropic by default, or LangChain-backed
|
|
15
|
+
* (OpenAI/Grok/Gemini/…) when the host wires a different provider in.
|
|
26
16
|
*
|
|
27
|
-
*
|
|
28
|
-
*
|
|
29
|
-
* is a single 400 the operator can fix by clearing the temperature in
|
|
30
|
-
* the editor; the cost of NOT filtering is the same 400 today.
|
|
31
|
-
*/
|
|
32
|
-
function modelRejectsTemperature(model) {
|
|
33
|
-
if (!model)
|
|
34
|
-
return false;
|
|
35
|
-
// Normalize: ignore vendor prefixes like "anthropic/claude-..." and
|
|
36
|
-
// bracket suffixes like "claude-opus-4-7[1m]" (long-context variant).
|
|
37
|
-
const m = model.toLowerCase().replace(/\[[^\]]*\]/g, '');
|
|
38
|
-
// claude-*-4-5, 4-6, 4-7, 4-8 …
|
|
39
|
-
if (/claude-[a-z]+-4-([5-9])\b/.test(m))
|
|
40
|
-
return true;
|
|
41
|
-
// claude-*-5-x, claude-*-6-x, … (future major bumps)
|
|
42
|
-
if (/claude-[a-z]+-([5-9])-/.test(m))
|
|
43
|
-
return true;
|
|
44
|
-
return false;
|
|
45
|
-
}
|
|
46
|
-
/**
|
|
47
|
-
* Framework-free runner for Claude. Handles the agentic loop (tool calls) for
|
|
48
|
-
* sync runs and exposes streaming as an `AsyncGenerator<StreamChunk>` so any
|
|
49
|
-
* transport (SSE, fetch+ReadableStream, WebSocket, etc.) can consume it.
|
|
17
|
+
* Streaming is exposed as `AsyncGenerator<StreamChunk>` so any transport
|
|
18
|
+
* (SSE, fetch+ReadableStream, WebSocket, etc.) can consume it.
|
|
50
19
|
*/
|
|
51
20
|
class AgentRunnerService {
|
|
52
|
-
|
|
53
|
-
|
|
21
|
+
/**
|
|
22
|
+
* Two-form constructor for backwards compatibility:
|
|
23
|
+
*
|
|
24
|
+
* new AgentRunnerService(anthropicConfig, toolRegistry, opts?)
|
|
25
|
+
* ^ legacy form — wraps `anthropicConfig` in an `AnthropicProvider`
|
|
26
|
+
* so existing callers keep working without changes.
|
|
27
|
+
*
|
|
28
|
+
* new AgentRunnerService({ provider, defaultModel?, defaultMaxTokens? },
|
|
29
|
+
* toolRegistry, opts?)
|
|
30
|
+
* ^ new form — caller supplies any `LLMProvider` (Anthropic,
|
|
31
|
+
* LangChain, …). The provider owns the wire-level call.
|
|
32
|
+
*/
|
|
33
|
+
constructor(providerOrLegacyConfig, toolRegistry, opts = {}) {
|
|
54
34
|
this.toolRegistry = toolRegistry;
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
35
|
+
if (isRunnerProviderConfig(providerOrLegacyConfig)) {
|
|
36
|
+
this.provider = providerOrLegacyConfig.provider;
|
|
37
|
+
this.defaultModel = providerOrLegacyConfig.defaultModel ?? 'claude-opus-4-6';
|
|
38
|
+
this.defaultMaxTokens = providerOrLegacyConfig.defaultMaxTokens;
|
|
39
|
+
}
|
|
40
|
+
else {
|
|
41
|
+
// Legacy path: build an AnthropicProvider from the inline config so
|
|
42
|
+
// every caller written before the LLMProvider abstraction landed
|
|
43
|
+
// keeps the exact same behaviour.
|
|
44
|
+
this.provider = new providers_1.AnthropicProvider({
|
|
45
|
+
apiKey: providerOrLegacyConfig.apiKey,
|
|
46
|
+
baseURL: providerOrLegacyConfig.baseURL,
|
|
47
|
+
});
|
|
48
|
+
this.defaultModel = providerOrLegacyConfig.defaultModel ?? 'claude-opus-4-6';
|
|
49
|
+
this.defaultMaxTokens = providerOrLegacyConfig.defaultMaxTokens;
|
|
50
|
+
}
|
|
59
51
|
this.logger = opts.logger ?? noopLogger;
|
|
60
52
|
this.approvalGate = opts.approvalGate;
|
|
61
53
|
}
|
|
62
54
|
// ─── Run (non-streaming) ──────────────────────────────────────────────────
|
|
63
55
|
async run(agent, messages, context, overrides) {
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
//
|
|
69
|
-
// `temperature` when tools are present — they auto-tune sampling for
|
|
70
|
-
// tool use. Only forward it when the operator/caller declared one
|
|
71
|
-
// explicitly; never inject a default. Old models that required it
|
|
72
|
-
// accept its absence too (they fall back to their own internal
|
|
73
|
-
// default of 1.0).
|
|
74
|
-
const temperature = overrides?.temperature ?? agent.temperature;
|
|
75
|
-
const { tools, extras } = this.buildToolList(agent, overrides);
|
|
76
|
-
const systemPrompt = this.buildSystemPrompt(agent, tools, overrides);
|
|
56
|
+
// `run()` is implemented on top of `stream()` to avoid two parallel
|
|
57
|
+
// loops drifting (every bug fixed in one path historically had to
|
|
58
|
+
// be ported by hand to the other). Streaming a non-streaming caller
|
|
59
|
+
// costs almost nothing — the events accumulate in-memory — and the
|
|
60
|
+
// single-source-of-truth loop is well worth the minor overhead.
|
|
77
61
|
const toolCalls = [];
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
// don't add tools mid-conversation); `estimatedInputTokens`
|
|
82
|
-
// starts from a word-count heuristic and gets replaced by the
|
|
83
|
-
// real `usage.input_tokens` once we have a response.
|
|
84
|
-
const turnSignals = {
|
|
85
|
-
hasTools: !!tools && tools.length > 0,
|
|
86
|
-
hasApprovalTool: hasApprovalGatedTool(agent),
|
|
87
|
-
estimatedInputTokens: estimateInputTokens(systemPrompt, currentMessages),
|
|
88
|
-
};
|
|
89
|
-
let totalUsage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
|
|
90
|
-
let finalContent = '';
|
|
62
|
+
const toolCallStartByUseId = new Map();
|
|
63
|
+
let usage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
|
|
64
|
+
const textParts = [];
|
|
91
65
|
let stopReason = 'end_turn';
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
// Per-turn model selection. When overrides force a model we
|
|
98
|
-
// honour it (manual `agent.runMessage({ overrides: { model }})`
|
|
99
|
-
// beats the strategy). Otherwise the strategy decides; absent
|
|
100
|
-
// strategy → behave exactly like before this feature landed.
|
|
101
|
-
const selection = overrides?.model
|
|
102
|
-
? { model: overrides.model, reason: 'forced' }
|
|
103
|
-
: (0, model_strategy_1.selectModel)(agent.modelStrategy, turnSignals, baseModel);
|
|
104
|
-
const model = selection.model;
|
|
105
|
-
lastModel = model;
|
|
106
|
-
if (this.logger && selection.reason !== 'default' && selection.reason !== 'forced') {
|
|
107
|
-
this.logger.debug(`[modelRouter] agent=${agent.id} ${selection.reason}=${selection.trigger} → ${model}`);
|
|
66
|
+
let messageId = (0, crypto_1.randomUUID)();
|
|
67
|
+
const model = overrides?.model ?? agent.model ?? this.defaultModel;
|
|
68
|
+
for await (const chunk of this.stream(agent, messages, context, overrides)) {
|
|
69
|
+
if (chunk.type === 'text_delta') {
|
|
70
|
+
textParts.push(chunk.delta);
|
|
108
71
|
}
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
totalTokens: totalUsage.totalTokens + response.usage.input_tokens + response.usage.output_tokens,
|
|
132
|
-
cacheCreationInputTokens: (totalUsage.cacheCreationInputTokens ?? 0) +
|
|
133
|
-
(response.usage.cache_creation_input_tokens ?? 0),
|
|
134
|
-
cacheReadInputTokens: (totalUsage.cacheReadInputTokens ?? 0) +
|
|
135
|
-
(response.usage.cache_read_input_tokens ?? 0),
|
|
136
|
-
};
|
|
137
|
-
stopReason = response.stop_reason ?? 'end_turn';
|
|
138
|
-
if (response.stop_reason === 'tool_use') {
|
|
139
|
-
currentMessages = [...currentMessages, { role: 'assistant', content: response.content }];
|
|
140
|
-
const toolResults = [];
|
|
141
|
-
for (const block of response.content) {
|
|
142
|
-
if (block.type === 'tool_use') {
|
|
143
|
-
const start = Date.now();
|
|
144
|
-
let output = '';
|
|
145
|
-
let error;
|
|
146
|
-
try {
|
|
147
|
-
output = await this.dispatchTool(block.name, block.input, context, extras);
|
|
148
|
-
}
|
|
149
|
-
catch (err) {
|
|
150
|
-
// Approval-gate signals are NOT tool execution errors —
|
|
151
|
-
// they ARE the surface the caller of run() branches on.
|
|
152
|
-
// Re-throw so the loop aborts and the consumer (executor /
|
|
153
|
-
// conversation service) can persist the pause + surface
|
|
154
|
-
// the approvalId. Without this re-throw, the runner would
|
|
155
|
-
// feed back a `"Error executing tool …"` to the LLM,
|
|
156
|
-
// hiding the pause behind a regular tool failure.
|
|
157
|
-
if (err instanceof tool_approval_gate_1.ToolApprovalRequired ||
|
|
158
|
-
err instanceof tool_approval_gate_1.ToolBlockedError) {
|
|
159
|
-
throw err;
|
|
160
|
-
}
|
|
161
|
-
error = err instanceof Error ? err.message : String(err);
|
|
162
|
-
output = `Error executing tool ${block.name}: ${error}`;
|
|
163
|
-
}
|
|
164
|
-
toolCalls.push({
|
|
165
|
-
toolName: block.name,
|
|
166
|
-
toolUseId: block.id,
|
|
167
|
-
input: block.input,
|
|
168
|
-
output,
|
|
169
|
-
error,
|
|
170
|
-
durationMs: Date.now() - start,
|
|
171
|
-
});
|
|
172
|
-
toolResults.push({
|
|
173
|
-
type: 'tool_result',
|
|
174
|
-
tool_use_id: block.id,
|
|
175
|
-
// Sentinel keeps Anthropic happy when a tool produced
|
|
176
|
-
// no string output (e.g. a mutation that returned void).
|
|
177
|
-
content: output || '(tool completed with no output)',
|
|
178
|
-
is_error: !!error,
|
|
179
|
-
});
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
// Same defensive break as the stream path — if tool_use was
|
|
183
|
-
// signalled but we resolved zero tool calls, don't append
|
|
184
|
-
// an empty user message.
|
|
185
|
-
if (toolResults.length === 0) {
|
|
186
|
-
this.logger.warn(`Agent "${agent.id}" reported tool_use but emitted no resolvable tool calls. Closing the turn.`);
|
|
187
|
-
finalContent = response.content
|
|
188
|
-
.filter((b) => b.type === 'text')
|
|
189
|
-
.map((b) => b.text)
|
|
190
|
-
.join('');
|
|
191
|
-
break;
|
|
72
|
+
else if (chunk.type === 'tool_use_start') {
|
|
73
|
+
toolCallStartByUseId.set(chunk.toolUseId, {
|
|
74
|
+
name: chunk.toolName,
|
|
75
|
+
start: Date.now(),
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
else if (chunk.type === 'tool_result') {
|
|
79
|
+
// Match up the result with whichever tool_use_start opened it.
|
|
80
|
+
// We use a FIFO heuristic when the toolUseId isn't tracked
|
|
81
|
+
// (legacy chunk emitters that don't carry it) — for the
|
|
82
|
+
// current SDK they always carry it.
|
|
83
|
+
const pending = Array.from(toolCallStartByUseId.entries()).find(([, v]) => v.name === chunk.toolName);
|
|
84
|
+
if (pending) {
|
|
85
|
+
const [id, { name, start }] = pending;
|
|
86
|
+
toolCalls.push({
|
|
87
|
+
toolName: name,
|
|
88
|
+
toolUseId: id,
|
|
89
|
+
input: {},
|
|
90
|
+
output: chunk.result,
|
|
91
|
+
durationMs: Date.now() - start,
|
|
92
|
+
});
|
|
93
|
+
toolCallStartByUseId.delete(id);
|
|
192
94
|
}
|
|
193
|
-
currentMessages = [...currentMessages, { role: 'user', content: toolResults }];
|
|
194
95
|
}
|
|
195
|
-
else {
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
break;
|
|
96
|
+
else if (chunk.type === 'usage') {
|
|
97
|
+
usage = chunk.usage;
|
|
98
|
+
}
|
|
99
|
+
else if (chunk.type === 'done') {
|
|
100
|
+
messageId = chunk.messageId;
|
|
201
101
|
}
|
|
202
102
|
}
|
|
203
103
|
return {
|
|
204
104
|
messageId,
|
|
205
105
|
conversationId: context.conversationId,
|
|
206
|
-
content:
|
|
106
|
+
content: textParts.join(''),
|
|
207
107
|
role: 'assistant',
|
|
208
108
|
toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
|
|
209
|
-
usage
|
|
210
|
-
model
|
|
109
|
+
usage,
|
|
110
|
+
model,
|
|
211
111
|
stopReason,
|
|
212
112
|
createdAt: new Date(),
|
|
213
113
|
};
|
|
@@ -215,25 +115,29 @@ class AgentRunnerService {
|
|
|
215
115
|
// ─── Run (streaming) ──────────────────────────────────────────────────────
|
|
216
116
|
async *stream(agent, messages, context, overrides) {
|
|
217
117
|
const messageId = (0, crypto_1.randomUUID)();
|
|
218
|
-
const
|
|
219
|
-
const
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
// tool use. Only forward it when the operator/caller declared one
|
|
224
|
-
// explicitly; never inject a default. Old models that required it
|
|
225
|
-
// accept its absence too (they fall back to their own internal
|
|
226
|
-
// default of 1.0).
|
|
118
|
+
const baseModel = overrides?.model ?? agent.model ?? this.defaultModel;
|
|
119
|
+
const maxTokens = overrides?.maxTokens ??
|
|
120
|
+
agent.maxTokens ??
|
|
121
|
+
this.defaultMaxTokens ??
|
|
122
|
+
4096;
|
|
227
123
|
const temperature = overrides?.temperature ?? agent.temperature;
|
|
228
124
|
const { tools, extras } = this.buildToolList(agent, overrides);
|
|
229
125
|
const systemPrompt = this.buildSystemPrompt(agent, tools, overrides);
|
|
230
|
-
|
|
126
|
+
// Re-shape Anthropic-typed tool schemas to the provider-agnostic
|
|
127
|
+
// ones. The two shapes are identical today — Anthropic's `Tool`
|
|
128
|
+
// declares `name`, `description`, `input_schema` — so this is a
|
|
129
|
+
// type-level cast for callers feeding the runner from old code.
|
|
130
|
+
const llmTools = tools?.map((t) => ({
|
|
131
|
+
name: t.name,
|
|
132
|
+
description: t.description ?? '',
|
|
133
|
+
input_schema: t.input_schema,
|
|
134
|
+
}));
|
|
135
|
+
let currentMessages = anthropicMessagesToLLM(messages);
|
|
231
136
|
let totalUsage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
|
|
232
|
-
// See `run()` above for the rationale on these signals.
|
|
233
137
|
const turnSignals = {
|
|
234
|
-
hasTools: !!
|
|
138
|
+
hasTools: !!llmTools && llmTools.length > 0,
|
|
235
139
|
hasApprovalTool: hasApprovalGatedTool(agent),
|
|
236
|
-
estimatedInputTokens: estimateInputTokens(systemPrompt,
|
|
140
|
+
estimatedInputTokens: estimateInputTokens(systemPrompt, messages),
|
|
237
141
|
};
|
|
238
142
|
while (true) {
|
|
239
143
|
const selection = overrides?.model
|
|
@@ -243,93 +147,94 @@ class AgentRunnerService {
|
|
|
243
147
|
if (this.logger && selection.reason !== 'default' && selection.reason !== 'forced') {
|
|
244
148
|
this.logger.debug(`[modelRouter] agent=${agent.id} ${selection.reason}=${selection.trigger} → ${model}`);
|
|
245
149
|
}
|
|
246
|
-
//
|
|
247
|
-
|
|
248
|
-
|
|
150
|
+
// Provider call. The provider decides per-call whether to honour
|
|
151
|
+
// `temperature` based on its own capabilities + the model id
|
|
152
|
+
// (Anthropic 4.5+ rejects it, OpenAI accepts it, …).
|
|
153
|
+
let stopReason = 'end_turn';
|
|
154
|
+
let finalContent = [];
|
|
155
|
+
for await (const event of this.provider.stream({
|
|
249
156
|
model,
|
|
250
|
-
|
|
251
|
-
...(includeTemperature ? { temperature } : {}),
|
|
252
|
-
system: systemPrompt,
|
|
157
|
+
systemPrompt,
|
|
253
158
|
messages: currentMessages,
|
|
254
|
-
tools:
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
type: 'tool_use_start',
|
|
261
|
-
toolName: event.content_block.name,
|
|
262
|
-
toolUseId: event.content_block.id,
|
|
263
|
-
};
|
|
264
|
-
}
|
|
159
|
+
tools: llmTools,
|
|
160
|
+
maxTokens,
|
|
161
|
+
temperature,
|
|
162
|
+
})) {
|
|
163
|
+
if (event.type === 'text_delta') {
|
|
164
|
+
yield { type: 'text_delta', delta: event.delta };
|
|
265
165
|
}
|
|
266
|
-
else if (event.type === '
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
166
|
+
else if (event.type === 'tool_use_start') {
|
|
167
|
+
yield {
|
|
168
|
+
type: 'tool_use_start',
|
|
169
|
+
toolName: event.toolName,
|
|
170
|
+
toolUseId: event.toolUseId,
|
|
171
|
+
};
|
|
270
172
|
}
|
|
271
|
-
else if (event.type === '
|
|
272
|
-
const deltaUsage = event.usage;
|
|
173
|
+
else if (event.type === 'usage_delta') {
|
|
273
174
|
totalUsage = {
|
|
274
|
-
inputTokens: totalUsage.inputTokens + (
|
|
275
|
-
outputTokens: totalUsage.outputTokens + (
|
|
175
|
+
inputTokens: totalUsage.inputTokens + (event.usage.inputTokens ?? 0),
|
|
176
|
+
outputTokens: totalUsage.outputTokens + (event.usage.outputTokens ?? 0),
|
|
276
177
|
totalTokens: totalUsage.totalTokens +
|
|
277
|
-
(
|
|
278
|
-
(
|
|
178
|
+
(event.usage.inputTokens ?? 0) +
|
|
179
|
+
(event.usage.outputTokens ?? 0),
|
|
180
|
+
cacheCreationInputTokens: (totalUsage.cacheCreationInputTokens ?? 0) +
|
|
181
|
+
(event.usage.cacheCreationInputTokens ?? 0),
|
|
182
|
+
cacheReadInputTokens: (totalUsage.cacheReadInputTokens ?? 0) +
|
|
183
|
+
(event.usage.cacheReadInputTokens ?? 0),
|
|
279
184
|
};
|
|
280
185
|
}
|
|
186
|
+
else if (event.type === 'message_stop') {
|
|
187
|
+
stopReason = event.stopReason;
|
|
188
|
+
finalContent = event.content;
|
|
189
|
+
}
|
|
281
190
|
}
|
|
282
|
-
const finalMessage = await stream.finalMessage();
|
|
283
191
|
// Refresh the input-token signal so the next iteration of the
|
|
284
192
|
// tool loop has the post-tool-result context length, not the
|
|
285
193
|
// initial estimate.
|
|
286
|
-
if (
|
|
287
|
-
turnSignals.estimatedInputTokens =
|
|
194
|
+
if (totalUsage.inputTokens > 0) {
|
|
195
|
+
turnSignals.estimatedInputTokens = totalUsage.inputTokens;
|
|
288
196
|
}
|
|
289
|
-
if (
|
|
290
|
-
|
|
197
|
+
if (stopReason === 'tool_use') {
|
|
198
|
+
// Carry the assistant message (text + tool_use blocks) forward
|
|
199
|
+
// so the next turn sees its own previous tool calls.
|
|
200
|
+
currentMessages = [
|
|
201
|
+
...currentMessages,
|
|
202
|
+
{ role: 'assistant', content: finalContent },
|
|
203
|
+
];
|
|
291
204
|
const toolResults = [];
|
|
292
|
-
for (const block of
|
|
293
|
-
if (block.type
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
err instanceof tool_approval_gate_1.ToolBlockedError) {
|
|
306
|
-
throw err;
|
|
307
|
-
}
|
|
308
|
-
output = `Error: ${err instanceof Error ? err.message : String(err)}`;
|
|
205
|
+
for (const block of finalContent) {
|
|
206
|
+
if (block.type !== 'tool_use')
|
|
207
|
+
continue;
|
|
208
|
+
let output = '';
|
|
209
|
+
try {
|
|
210
|
+
output = await this.dispatchTool(block.name, block.input, context, extras);
|
|
211
|
+
}
|
|
212
|
+
catch (err) {
|
|
213
|
+
// Approval-gate signals propagate; regular errors collapse
|
|
214
|
+
// into a tool result the LLM can react to.
|
|
215
|
+
if (err instanceof tool_approval_gate_1.ToolApprovalRequired ||
|
|
216
|
+
err instanceof tool_approval_gate_1.ToolBlockedError) {
|
|
217
|
+
throw err;
|
|
309
218
|
}
|
|
310
|
-
|
|
311
|
-
toolResults.push({
|
|
312
|
-
type: 'tool_result',
|
|
313
|
-
tool_use_id: block.id,
|
|
314
|
-
// Anthropic rejects empty tool_result content (part of
|
|
315
|
-
// the "messages.N: user messages must have non-empty
|
|
316
|
-
// content" 400). When the tool returned no string,
|
|
317
|
-
// substitute a sentinel so the next planning step still
|
|
318
|
-
// sees a coherent transcript.
|
|
319
|
-
content: output || '(tool completed with no output)',
|
|
320
|
-
});
|
|
219
|
+
output = `Error: ${err instanceof Error ? err.message : String(err)}`;
|
|
321
220
|
}
|
|
221
|
+
yield { type: 'tool_result', toolName: block.name, result: output };
|
|
222
|
+
toolResults.push({
|
|
223
|
+
type: 'tool_result',
|
|
224
|
+
tool_use_id: block.id,
|
|
225
|
+
// Anthropic rejects empty tool_result content. Sentinel
|
|
226
|
+
// keeps every provider happy.
|
|
227
|
+
content: output || '(tool completed with no output)',
|
|
228
|
+
});
|
|
322
229
|
}
|
|
323
|
-
// Defensive: if the model said `tool_use` but emitted zero
|
|
324
|
-
// tool_use blocks (or all were filtered for unknown names),
|
|
325
|
-
// appending `{role:'user', content:[]}` triggers the same
|
|
326
|
-
// Anthropic 400. Break and let whatever text the model
|
|
327
|
-
// already produced stand as the final answer.
|
|
328
230
|
if (toolResults.length === 0) {
|
|
329
231
|
this.logger.warn(`Agent "${agent.id}" reported tool_use but emitted no resolvable tool calls. Closing the turn.`);
|
|
330
232
|
break;
|
|
331
233
|
}
|
|
332
|
-
currentMessages = [
|
|
234
|
+
currentMessages = [
|
|
235
|
+
...currentMessages,
|
|
236
|
+
{ role: 'user', content: toolResults },
|
|
237
|
+
];
|
|
333
238
|
}
|
|
334
239
|
else {
|
|
335
240
|
break;
|
|
@@ -491,3 +396,56 @@ function hasApprovalGatedTool(agent) {
|
|
|
491
396
|
}
|
|
492
397
|
return false;
|
|
493
398
|
}
|
|
399
|
+
// ─── Message-shape translation ───────────────────────────────────────────────
|
|
400
|
+
/**
|
|
401
|
+
* The runner's public API still accepts `AnthropicMessage[]` (kept for
|
|
402
|
+
* back-compat with every host wired before the provider abstraction
|
|
403
|
+
* landed). Internally the loop talks `LLMMessage[]`, so we translate
|
|
404
|
+
* on entry. The two shapes are deliberately close — text content is a
|
|
405
|
+
* plain string in both, multi-block content carries the same `text /
|
|
406
|
+
* tool_use / tool_result` discriminator — so this is mechanical.
|
|
407
|
+
*/
|
|
408
|
+
function anthropicMessagesToLLM(messages) {
|
|
409
|
+
return messages.map((m) => {
|
|
410
|
+
if (typeof m.content === 'string') {
|
|
411
|
+
return { role: m.role, content: m.content };
|
|
412
|
+
}
|
|
413
|
+
return {
|
|
414
|
+
role: m.role,
|
|
415
|
+
content: m.content.map((block) => {
|
|
416
|
+
const b = block;
|
|
417
|
+
if (b.type === 'text') {
|
|
418
|
+
return { type: 'text', text: b.text };
|
|
419
|
+
}
|
|
420
|
+
if (b.type === 'tool_use') {
|
|
421
|
+
return {
|
|
422
|
+
type: 'tool_use',
|
|
423
|
+
id: b.id,
|
|
424
|
+
name: b.name,
|
|
425
|
+
input: (b.input ?? {}),
|
|
426
|
+
};
|
|
427
|
+
}
|
|
428
|
+
if (b.type === 'tool_result') {
|
|
429
|
+
return {
|
|
430
|
+
type: 'tool_result',
|
|
431
|
+
tool_use_id: b.tool_use_id,
|
|
432
|
+
content: typeof b.content === 'string'
|
|
433
|
+
? b.content
|
|
434
|
+
: JSON.stringify(b.content),
|
|
435
|
+
is_error: b.is_error,
|
|
436
|
+
};
|
|
437
|
+
}
|
|
438
|
+
// Unknown block kinds (image, document, …) — flatten to a
|
|
439
|
+
// text marker so the LLM still sees something. Multimodal
|
|
440
|
+
// input plumbing is a follow-up.
|
|
441
|
+
return { type: 'text', text: `[unsupported:${b.type}]` };
|
|
442
|
+
}),
|
|
443
|
+
};
|
|
444
|
+
});
|
|
445
|
+
}
|
|
446
|
+
function isRunnerProviderConfig(v) {
|
|
447
|
+
return (typeof v === 'object' &&
|
|
448
|
+
v !== null &&
|
|
449
|
+
'provider' in v &&
|
|
450
|
+
typeof v.provider === 'object');
|
|
451
|
+
}
|
|
@@ -379,6 +379,11 @@ class AgentService {
|
|
|
379
379
|
const filter = params.overrides?.extraToolsFilter;
|
|
380
380
|
const fromConnectors = filter && resolvedExtras ? filter(resolvedExtras) : resolvedExtras;
|
|
381
381
|
const extraTools = mergeExtraTools(params.overrides?.extraTools, fromConnectors);
|
|
382
|
+
// Hoisted accumulators so the post-loop persistence (after the
|
|
383
|
+
// try) can see the final list. Defined here, populated inside
|
|
384
|
+
// the for-await loop below.
|
|
385
|
+
const toolCallStartByUseId = new Map();
|
|
386
|
+
const accumulatedToolCalls = [];
|
|
382
387
|
try {
|
|
383
388
|
// Team orchestrators route through OrchestratorService.stream()
|
|
384
389
|
// so the synthetic `delegate_to_*` tools the orchestrator was
|
|
@@ -403,11 +408,56 @@ class AgentService {
|
|
|
403
408
|
messageId: 'streaming',
|
|
404
409
|
agent: { timezone: agent.timezone },
|
|
405
410
|
}, { ...(params.overrides ?? {}), extraTools });
|
|
411
|
+
// Accumulate tool_use / tool_result chunks during streaming so
|
|
412
|
+
// we can persist them on the assistant message row (line ~700).
|
|
413
|
+
// Without this, `getHistory` returns the assistant's text but
|
|
414
|
+
// loses the tool calls — which means clients that render
|
|
415
|
+
// proposal cards (Recording Assist) or generic tool rows from
|
|
416
|
+
// history have nothing to rehydrate. FIFO matching mirrors the
|
|
417
|
+
// runner's heuristic so the shape stays consistent whether the
|
|
418
|
+
// turn streamed or used `run()`.
|
|
406
419
|
for await (const chunk of stream) {
|
|
407
420
|
if (chunk.type === 'text_delta')
|
|
408
421
|
fullContent += chunk.delta;
|
|
409
422
|
if (chunk.type === 'usage')
|
|
410
423
|
finalUsage = chunk.usage;
|
|
424
|
+
if (chunk.type === 'tool_use_start') {
|
|
425
|
+
toolCallStartByUseId.set(chunk.toolUseId, {
|
|
426
|
+
name: chunk.toolName,
|
|
427
|
+
start: Date.now(),
|
|
428
|
+
});
|
|
429
|
+
}
|
|
430
|
+
if (chunk.type === 'tool_result') {
|
|
431
|
+
// Match by toolUseId when present; otherwise FIFO on toolName
|
|
432
|
+
// (matches AgentRunner.run's heuristic). Older runners that
|
|
433
|
+
// don't emit `toolUseId` on result chunks still produce a
|
|
434
|
+
// usable record.
|
|
435
|
+
let entry = null;
|
|
436
|
+
const useId = chunk.toolUseId;
|
|
437
|
+
if (useId && toolCallStartByUseId.has(useId)) {
|
|
438
|
+
const v = toolCallStartByUseId.get(useId);
|
|
439
|
+
entry = { id: useId, name: v.name, start: v.start };
|
|
440
|
+
toolCallStartByUseId.delete(useId);
|
|
441
|
+
}
|
|
442
|
+
else {
|
|
443
|
+
const fifo = Array.from(toolCallStartByUseId.entries()).find(([, v]) => v.name === chunk.toolName);
|
|
444
|
+
if (fifo) {
|
|
445
|
+
entry = { id: fifo[0], name: fifo[1].name, start: fifo[1].start };
|
|
446
|
+
toolCallStartByUseId.delete(fifo[0]);
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
if (entry) {
|
|
450
|
+
accumulatedToolCalls.push({
|
|
451
|
+
toolName: entry.name,
|
|
452
|
+
toolUseId: entry.id,
|
|
453
|
+
input: {},
|
|
454
|
+
output: typeof chunk.result === 'string'
|
|
455
|
+
? chunk.result
|
|
456
|
+
: JSON.stringify(chunk.result),
|
|
457
|
+
durationMs: Date.now() - entry.start,
|
|
458
|
+
});
|
|
459
|
+
}
|
|
460
|
+
}
|
|
411
461
|
yield chunk;
|
|
412
462
|
}
|
|
413
463
|
}
|
|
@@ -507,6 +557,12 @@ class AgentService {
|
|
|
507
557
|
role: 'assistant',
|
|
508
558
|
content: fullContent,
|
|
509
559
|
usage: finalUsage,
|
|
560
|
+
// Persist the accumulated tool calls so `getHistory` can
|
|
561
|
+
// surface them on reload — without this, proposal cards
|
|
562
|
+
// (Recording Assist) and other tool-result-driven UI
|
|
563
|
+
// disappear after refresh. Empty arrays drop to undefined
|
|
564
|
+
// so we don't pollute the column with `[]`.
|
|
565
|
+
toolCalls: accumulatedToolCalls.length > 0 ? accumulatedToolCalls : undefined,
|
|
510
566
|
});
|
|
511
567
|
const now = new Date();
|
|
512
568
|
await this.dispatchUsage({
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agentforge-io/core",
|
|
3
|
-
"version": "2.
|
|
4
|
-
"description": "Framework-free AI runtime SDK. Owns: agent loop (Anthropic), conversations, tools, streaming, agent-job queue, SdkHooks. Identity, billing, infra (email/uploads/secrets) live in the host's modules — not here.",
|
|
3
|
+
"version": "2.3.1",
|
|
4
|
+
"description": "Framework-free AI runtime SDK. Owns: agent loop (pluggable LLM provider — Anthropic by default, LangChain-backed providers as drop-ins), conversations, tools, streaming, agent-job queue, SdkHooks. Identity, billing, infra (email/uploads/secrets) live in the host's modules — not here.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"main": "dist/index.js",
|
|
7
7
|
"types": "dist/index.d.ts",
|