@agentforge-io/core 2.0.23 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/index.d.ts +2 -0
- package/dist/ai/index.js +5 -1
- package/dist/factory.js +56 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.js +7 -1
- package/dist/services/agent-runner.service.js +117 -7
- package/dist/services/agent.service.d.ts +21 -1
- package/dist/services/agent.service.js +77 -10
- package/dist/services/orchestrator.service.d.ts +40 -1
- package/dist/services/orchestrator.service.js +220 -0
- package/dist/types/agent.types.d.ts +31 -6
- package/dist/types/config.types.d.ts +8 -1
- package/dist/types/index.d.ts +1 -0
- package/dist/types/index.js +1 -0
- package/dist/types/model-strategy.d.ts +97 -0
- package/dist/types/model-strategy.js +83 -0
- package/package.json +1 -1
- package/dist/adapters/billing/billing-adapter.interface.d.ts +0 -41
- package/dist/adapters/billing/billing-adapter.interface.js +0 -5
- package/dist/adapters/billing/stripe/stripe.adapter.d.ts +0 -30
- package/dist/adapters/billing/stripe/stripe.adapter.js +0 -122
- package/dist/adapters/email/email-adapter.interface.d.ts +0 -25
- package/dist/adapters/email/email-adapter.interface.js +0 -6
- package/dist/adapters/email/noop.adapter.d.ts +0 -10
- package/dist/adapters/email/noop.adapter.js +0 -15
- package/dist/adapters/email/resend.adapter.d.ts +0 -8
- package/dist/adapters/email/resend.adapter.js +0 -39
- package/dist/adapters/upload/noop.adapter.d.ts +0 -9
- package/dist/adapters/upload/noop.adapter.js +0 -14
- package/dist/adapters/upload/s3.adapter.d.ts +0 -38
- package/dist/adapters/upload/s3.adapter.js +0 -69
- package/dist/adapters/upload/upload-adapter.interface.d.ts +0 -37
- package/dist/adapters/upload/upload-adapter.interface.js +0 -15
- package/dist/billing/index.d.ts +0 -12
- package/dist/billing/index.js +0 -28
- package/dist/domain/agent.d.ts +0 -59
- package/dist/domain/agent.js +0 -2
- package/dist/domain/api-key.d.ts +0 -28
- package/dist/domain/api-key.js +0 -2
- package/dist/domain/auth-identity.d.ts +0 -10
- package/dist/domain/auth-identity.js +0 -2
- package/dist/domain/email-token.d.ts +0 -11
- package/dist/domain/email-token.js +0 -2
- package/dist/domain/external-user.d.ts +0 -23
- package/dist/domain/external-user.js +0 -2
- package/dist/domain/plan.d.ts +0 -20
- package/dist/domain/plan.js +0 -2
- package/dist/domain/platform-secret.d.ts +0 -24
- package/dist/domain/platform-secret.js +0 -8
- package/dist/domain/refresh-token.d.ts +0 -15
- package/dist/domain/refresh-token.js +0 -2
- package/dist/domain/subscription.d.ts +0 -21
- package/dist/domain/subscription.js +0 -2
- package/dist/domain/tenant.d.ts +0 -21
- package/dist/domain/tenant.js +0 -2
- package/dist/domain/usage-record.d.ts +0 -15
- package/dist/domain/usage-record.js +0 -2
- package/dist/domain/user.d.ts +0 -43
- package/dist/domain/user.js +0 -2
- package/dist/services/agent-config.service.d.ts +0 -45
- package/dist/services/agent-config.service.js +0 -114
- package/dist/services/api-key.service.d.ts +0 -41
- package/dist/services/api-key.service.js +0 -80
- package/dist/services/auth.service.d.ts +0 -133
- package/dist/services/auth.service.js +0 -411
- package/dist/services/billing.service.d.ts +0 -67
- package/dist/services/billing.service.js +0 -254
- package/dist/services/email-templates.d.ts +0 -18
- package/dist/services/email-templates.js +0 -39
- package/dist/services/email.service.d.ts +0 -26
- package/dist/services/email.service.js +0 -42
- package/dist/services/errors.d.ts +0 -7
- package/dist/services/errors.js +0 -27
- package/dist/services/oauth.service.d.ts +0 -73
- package/dist/services/oauth.service.js +0 -174
- package/dist/services/plan.service.d.ts +0 -54
- package/dist/services/plan.service.js +0 -120
- package/dist/services/refresh-token.service.d.ts +0 -38
- package/dist/services/refresh-token.service.js +0 -73
- package/dist/services/secrets/crypto.d.ts +0 -37
- package/dist/services/secrets/crypto.js +0 -110
- package/dist/services/secrets/known-keys.d.ts +0 -38
- package/dist/services/secrets/known-keys.js +0 -50
- package/dist/services/secrets.service.d.ts +0 -91
- package/dist/services/secrets.service.js +0 -193
- package/dist/services/tenant-billing.service.d.ts +0 -121
- package/dist/services/tenant-billing.service.js +0 -290
- package/dist/services/tenant.service.d.ts +0 -54
- package/dist/services/tenant.service.js +0 -96
- package/dist/services/upload.service.d.ts +0 -37
- package/dist/services/upload.service.js +0 -84
- package/dist/services/usage.service.d.ts +0 -34
- package/dist/services/usage.service.js +0 -108
- package/dist/types/billing.types.d.ts +0 -82
- package/dist/types/billing.types.js +0 -3
|
@@ -39,10 +39,32 @@ class OrchestratorService {
|
|
|
39
39
|
baseURL: anthropicConfig.baseURL,
|
|
40
40
|
});
|
|
41
41
|
this.logger = opts.logger ?? noopLogger;
|
|
42
|
+
this.resolveAgentHook = opts.resolveAgent;
|
|
42
43
|
for (const agent of opts.agents) {
|
|
43
44
|
this.agentsMap.set(agent.id, agent);
|
|
44
45
|
}
|
|
45
46
|
}
|
|
47
|
+
/**
|
|
48
|
+
* Lookup with dynamic-resolver fallback. Hits the static map first
|
|
49
|
+
* (built from the constructor's `agents` list — covers the bootstrap
|
|
50
|
+
* use case), then falls back to the host-supplied `resolveAgent`
|
|
51
|
+
* hook (used by Team orchestrators whose members are loaded from
|
|
52
|
+
* the database per-tenant). Resolved agents are cached in the map
|
|
53
|
+
* for the lifetime of the service to avoid re-fetching across a
|
|
54
|
+
* multi-turn conversation.
|
|
55
|
+
*/
|
|
56
|
+
async resolveAgentDynamic(agentId) {
|
|
57
|
+
const cached = this.agentsMap.get(agentId);
|
|
58
|
+
if (cached)
|
|
59
|
+
return cached;
|
|
60
|
+
if (!this.resolveAgentHook)
|
|
61
|
+
return undefined;
|
|
62
|
+
const resolved = await this.resolveAgentHook(agentId);
|
|
63
|
+
if (!resolved)
|
|
64
|
+
return undefined;
|
|
65
|
+
this.agentsMap.set(resolved.id, resolved);
|
|
66
|
+
return resolved;
|
|
67
|
+
}
|
|
46
68
|
/**
|
|
47
69
|
* Run an agent. Orchestrators automatically get delegation tools injected.
|
|
48
70
|
* Non-orchestrator agents fall straight through to the runner.
|
|
@@ -55,6 +77,204 @@ class OrchestratorService {
|
|
|
55
77
|
this.logger.debug(`Running orchestrator "${agentId}" with subagents: ${agent.subAgents.join(', ')}`);
|
|
56
78
|
return this.runOrchestratorLoop(agent, messages, context);
|
|
57
79
|
}
|
|
80
|
+
/**
|
|
81
|
+
* Streaming variant. Orchestrators emit `delegation_start` /
|
|
82
|
+
* `delegation_result` chunks around each sub-agent invocation; the
|
|
83
|
+
* sub-agent's own chunks are forwarded byte-by-byte with their
|
|
84
|
+
* `actingAgentId` set so the client renders the member's avatar /
|
|
85
|
+
* name on the right bubble. Non-orchestrator agents short-circuit
|
|
86
|
+
* to the runner's stream.
|
|
87
|
+
*
|
|
88
|
+
* Implementation note: we drive the same Anthropic agentic loop as
|
|
89
|
+
* `runOrchestratorLoop` (no shortcut — the orchestrator's reasoning
|
|
90
|
+
* about WHO to delegate to is still a non-streamed messages.create
|
|
91
|
+
* call). The streaming part is the SUB-AGENT'S response, which is
|
|
92
|
+
* the part the visitor actually cares about seeing in real time.
|
|
93
|
+
*/
|
|
94
|
+
async *stream(agentId, messages, context) {
|
|
95
|
+
const agent = (await this.resolveAgentDynamic(agentId)) ?? null;
|
|
96
|
+
if (!agent) {
|
|
97
|
+
throw new OrchestratorError('agent_not_found', `Agent "${agentId}" not found`);
|
|
98
|
+
}
|
|
99
|
+
if (!agent.canOrchestrate || !agent.subAgents?.length) {
|
|
100
|
+
// No-op orchestration — fall straight through. The runner's
|
|
101
|
+
// chunks won't carry `actingAgentId`, which is exactly what we
|
|
102
|
+
// want: this conversation is bound to one agent.
|
|
103
|
+
yield* this.runner.stream(agent, messages, context);
|
|
104
|
+
return;
|
|
105
|
+
}
|
|
106
|
+
this.logger.debug(`Streaming orchestrator "${agentId}" with subagents: ${agent.subAgents.join(', ')}`);
|
|
107
|
+
const delegationTools = this.buildDelegationTools(agent);
|
|
108
|
+
const model = agent.model ?? this.anthropicConfig.defaultModel ?? 'claude-opus-4-6';
|
|
109
|
+
const maxTokens = agent.maxTokens ?? this.anthropicConfig.defaultMaxTokens ?? 4096;
|
|
110
|
+
const anthropicTools = delegationTools.map((t) => ({
|
|
111
|
+
name: t.name,
|
|
112
|
+
description: t.description,
|
|
113
|
+
input_schema: t.inputSchema,
|
|
114
|
+
}));
|
|
115
|
+
let currentMessages = [...messages];
|
|
116
|
+
let totalUsage = {
|
|
117
|
+
inputTokens: 0,
|
|
118
|
+
outputTokens: 0,
|
|
119
|
+
totalTokens: 0,
|
|
120
|
+
};
|
|
121
|
+
const messageId = (0, crypto_1.randomUUID)();
|
|
122
|
+
// Hard cap on orchestrator loops. Without one, a misbehaving model
|
|
123
|
+
// could ping-pong delegate → think → delegate forever. Three hops
|
|
124
|
+
// is enough for any well-formed team flow; the system prompt also
|
|
125
|
+
// discourages chaining so this is mostly defense in depth.
|
|
126
|
+
const MAX_LOOPS = 3;
|
|
127
|
+
let loopCount = 0;
|
|
128
|
+
while (true) {
|
|
129
|
+
if (loopCount++ > MAX_LOOPS) {
|
|
130
|
+
yield {
|
|
131
|
+
type: 'text_delta',
|
|
132
|
+
delta: '\n\n(Orchestrator stopped: too many delegations in one turn.)',
|
|
133
|
+
};
|
|
134
|
+
break;
|
|
135
|
+
}
|
|
136
|
+
// Orchestrator's planning step — non-streamed. The model picks a
|
|
137
|
+
// member, we forward its prose as a single text_delta so the
|
|
138
|
+
// chat shows the "Routing to …" reasoning, then we drain the
|
|
139
|
+
// tool_use blocks one by one.
|
|
140
|
+
const response = await this.client.messages.create({
|
|
141
|
+
model,
|
|
142
|
+
max_tokens: maxTokens,
|
|
143
|
+
system: agent.systemPrompt,
|
|
144
|
+
messages: currentMessages,
|
|
145
|
+
tools: anthropicTools,
|
|
146
|
+
});
|
|
147
|
+
totalUsage = {
|
|
148
|
+
inputTokens: totalUsage.inputTokens + response.usage.input_tokens,
|
|
149
|
+
outputTokens: totalUsage.outputTokens + response.usage.output_tokens,
|
|
150
|
+
totalTokens: totalUsage.totalTokens +
|
|
151
|
+
response.usage.input_tokens +
|
|
152
|
+
response.usage.output_tokens,
|
|
153
|
+
};
|
|
154
|
+
// Emit orchestrator's own prose (the "Routing to Ana…" line).
|
|
155
|
+
// No actingAgentId — that means "this is the team speaking as
|
|
156
|
+
// itself", which the client renders with the team's branding.
|
|
157
|
+
for (const block of response.content) {
|
|
158
|
+
if (block.type === 'text' && block.text.trim()) {
|
|
159
|
+
yield { type: 'text_delta', delta: block.text };
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
if (response.stop_reason !== 'tool_use') {
|
|
163
|
+
// No delegation this turn — we're done.
|
|
164
|
+
break;
|
|
165
|
+
}
|
|
166
|
+
// Append the orchestrator's tool-use turn so the next loop sees
|
|
167
|
+
// it as part of the planning history.
|
|
168
|
+
const assistantMsg = {
|
|
169
|
+
role: 'assistant',
|
|
170
|
+
content: response.content,
|
|
171
|
+
};
|
|
172
|
+
currentMessages = [...currentMessages, assistantMsg];
|
|
173
|
+
const toolResults = [];
|
|
174
|
+
for (const block of response.content) {
|
|
175
|
+
if (block.type !== 'tool_use')
|
|
176
|
+
continue;
|
|
177
|
+
const delegateTool = delegationTools.find((t) => t.name === block.name);
|
|
178
|
+
if (!delegateTool)
|
|
179
|
+
continue;
|
|
180
|
+
const { task } = block.input;
|
|
181
|
+
const { subAgentId } = delegateTool;
|
|
182
|
+
const subAgent = await this.resolveAgentDynamic(subAgentId);
|
|
183
|
+
// delegation_start carries the member's identity so the client
|
|
184
|
+
// can render the routing hint AND swap the active bubble's
|
|
185
|
+
// avatar before the first text_delta arrives.
|
|
186
|
+
yield {
|
|
187
|
+
type: 'delegation_start',
|
|
188
|
+
subAgentId,
|
|
189
|
+
subAgentName: subAgent?.name,
|
|
190
|
+
subAgentAvatarUrl: undefined,
|
|
191
|
+
task,
|
|
192
|
+
};
|
|
193
|
+
if (!subAgent) {
|
|
194
|
+
// Member was deleted between team config + the orchestrator
|
|
195
|
+
// call. Synthesize a tool_result so the orchestrator can
|
|
196
|
+
// apologize on its next loop.
|
|
197
|
+
const errMsg = `Sub-agent "${subAgentId}" is no longer available.`;
|
|
198
|
+
yield {
|
|
199
|
+
type: 'delegation_result',
|
|
200
|
+
subAgentId,
|
|
201
|
+
result: errMsg,
|
|
202
|
+
};
|
|
203
|
+
toolResults.push({
|
|
204
|
+
type: 'tool_result',
|
|
205
|
+
tool_use_id: block.id,
|
|
206
|
+
content: errMsg,
|
|
207
|
+
});
|
|
208
|
+
continue;
|
|
209
|
+
}
|
|
210
|
+
// Stream the sub-agent's reply through the runner, tagging every
|
|
211
|
+
// chunk with the member's id so the client renders it with the
|
|
212
|
+
// member's identity. We accumulate the text body so the
|
|
213
|
+
// orchestrator's next loop can see what the member said.
|
|
214
|
+
const subMessages = [{ role: 'user', content: task }];
|
|
215
|
+
let assembled = '';
|
|
216
|
+
let subUsage = {
|
|
217
|
+
inputTokens: 0,
|
|
218
|
+
outputTokens: 0,
|
|
219
|
+
totalTokens: 0,
|
|
220
|
+
};
|
|
221
|
+
for await (const chunk of this.runner.stream(subAgent, subMessages, {
|
|
222
|
+
...context,
|
|
223
|
+
agentId: subAgentId,
|
|
224
|
+
})) {
|
|
225
|
+
// text_delta is the only chunk type whose body we accumulate
|
|
226
|
+
// for the orchestrator's tool_result. Other chunks (tool
|
|
227
|
+
// calls inside the member, usage updates) we forward to the
|
|
228
|
+
// visitor but don't feed back to the orchestrator.
|
|
229
|
+
if (chunk.type === 'text_delta') {
|
|
230
|
+
assembled += chunk.delta;
|
|
231
|
+
yield { ...chunk, actingAgentId: subAgentId };
|
|
232
|
+
continue;
|
|
233
|
+
}
|
|
234
|
+
if (chunk.type === 'usage') {
|
|
235
|
+
subUsage = chunk.usage;
|
|
236
|
+
yield { ...chunk, actingAgentId: subAgentId };
|
|
237
|
+
continue;
|
|
238
|
+
}
|
|
239
|
+
if (chunk.type === 'done') {
|
|
240
|
+
// Swallow the inner `done` — the OUTER loop emits its own
|
|
241
|
+
// when the whole orchestrator turn ends.
|
|
242
|
+
continue;
|
|
243
|
+
}
|
|
244
|
+
// Pass-through with identity tag (tool_use_start, tool_result,
|
|
245
|
+
// awaiting_approval, tool_blocked, etc.).
|
|
246
|
+
yield { ...chunk, actingAgentId: subAgentId };
|
|
247
|
+
}
|
|
248
|
+
totalUsage.inputTokens += subUsage.inputTokens;
|
|
249
|
+
totalUsage.outputTokens += subUsage.outputTokens;
|
|
250
|
+
totalUsage.totalTokens += subUsage.totalTokens;
|
|
251
|
+
yield {
|
|
252
|
+
type: 'delegation_result',
|
|
253
|
+
subAgentId,
|
|
254
|
+
result: assembled,
|
|
255
|
+
};
|
|
256
|
+
toolResults.push({
|
|
257
|
+
type: 'tool_result',
|
|
258
|
+
tool_use_id: block.id,
|
|
259
|
+
// Anthropic rejects empty user-message content with a 400. A
|
|
260
|
+
// sub-agent can produce zero text_deltas legitimately (it ran
|
|
261
|
+
// only tools and never spoke) — when that happens we feed a
|
|
262
|
+
// sentinel string back into the orchestrator's next loop so
|
|
263
|
+
// the conversation stays well-formed. The sentinel doubles as
|
|
264
|
+
// a signal the orchestrator can interpret ("the member acted
|
|
265
|
+
// silently — decide whether to ask the user for confirmation").
|
|
266
|
+
content: assembled || '(member completed silently — no textual response)',
|
|
267
|
+
});
|
|
268
|
+
}
|
|
269
|
+
// Feed the tool results back into the orchestrator's next loop.
|
|
270
|
+
currentMessages = [
|
|
271
|
+
...currentMessages,
|
|
272
|
+
{ role: 'user', content: toolResults },
|
|
273
|
+
];
|
|
274
|
+
}
|
|
275
|
+
yield { type: 'usage', usage: totalUsage };
|
|
276
|
+
yield { type: 'done', messageId };
|
|
277
|
+
}
|
|
58
278
|
async runOrchestratorLoop(orchestrator, messages, context) {
|
|
59
279
|
const delegations = [];
|
|
60
280
|
const delegationTools = this.buildDelegationTools(orchestrator);
|
|
@@ -165,29 +165,54 @@ export interface ApprovalCopyBundle {
|
|
|
165
165
|
blockedBody: string;
|
|
166
166
|
expiresPrefix: string;
|
|
167
167
|
}
|
|
168
|
-
|
|
168
|
+
/**
|
|
169
|
+
* Per-chunk identity. When a Team orchestrator delegates a turn to a
|
|
170
|
+
* member, the streaming chunks coming out of the member carry the
|
|
171
|
+
* member's `agentId` here so the chat client can render a bubble with
|
|
172
|
+
* the member's avatar and name. Optional everywhere — when absent the
|
|
173
|
+
* client falls back to the session's primary agent identity (the
|
|
174
|
+
* default, non-team behaviour).
|
|
175
|
+
*
|
|
176
|
+
* The orchestrator's OWN chunks (the routing wrappers it speaks as
|
|
177
|
+
* itself) carry no `actingAgentId` so the client renders them as the
|
|
178
|
+
* team / orchestrator identity. The boundary between members is marked
|
|
179
|
+
* by `delegation_start` and `delegation_result`.
|
|
180
|
+
*/
|
|
181
|
+
export interface ActingAgentTag {
|
|
182
|
+
/** Id of the agent producing this chunk. When absent, treat the
|
|
183
|
+
* chunk as coming from the session's primary agent (legacy
|
|
184
|
+
* non-team behaviour). */
|
|
185
|
+
actingAgentId?: string;
|
|
186
|
+
}
|
|
187
|
+
export type StreamChunk = ({
|
|
169
188
|
type: 'text_delta';
|
|
170
189
|
delta: string;
|
|
171
|
-
} | {
|
|
190
|
+
} & ActingAgentTag) | ({
|
|
172
191
|
type: 'tool_use_start';
|
|
173
192
|
toolName: string;
|
|
174
193
|
toolUseId: string;
|
|
175
|
-
} | {
|
|
194
|
+
} & ActingAgentTag) | ({
|
|
176
195
|
type: 'tool_result';
|
|
177
196
|
toolName: string;
|
|
178
197
|
result: string;
|
|
179
|
-
} | {
|
|
198
|
+
} & ActingAgentTag) | {
|
|
180
199
|
type: 'delegation_start';
|
|
181
200
|
subAgentId: string;
|
|
182
201
|
task: string;
|
|
202
|
+
/** Display name of the sub-agent. Lets the client render the
|
|
203
|
+
* "Routing to <name>…" hint without a separate roster fetch. */
|
|
204
|
+
subAgentName?: string;
|
|
205
|
+
/** Optional avatar URL of the sub-agent — used by the chat client
|
|
206
|
+
* to swap the avatar column for the member's bubble. */
|
|
207
|
+
subAgentAvatarUrl?: string;
|
|
183
208
|
} | {
|
|
184
209
|
type: 'delegation_result';
|
|
185
210
|
subAgentId: string;
|
|
186
211
|
result: string;
|
|
187
|
-
} | {
|
|
212
|
+
} | ({
|
|
188
213
|
type: 'usage';
|
|
189
214
|
usage: TokenUsage;
|
|
190
|
-
} | {
|
|
215
|
+
} & ActingAgentTag) | {
|
|
191
216
|
/**
|
|
192
217
|
* Emitted by the runtime when a tool dispatch hit
|
|
193
218
|
* `ToolApprovalGate.check() → { kind: 'approval' }`. The stream
|
|
@@ -72,8 +72,15 @@ export interface AgentDefinition {
|
|
|
72
72
|
name: string;
|
|
73
73
|
/** Agent description (shown to users) */
|
|
74
74
|
description?: string;
|
|
75
|
-
/** Claude model to use
|
|
75
|
+
/** Claude model to use. Ignored when `modelStrategy` is set —
|
|
76
|
+
* the strategy's `default` takes precedence as the base tier. */
|
|
76
77
|
model?: string;
|
|
78
|
+
/** Adaptive model selection per turn. When present the runner
|
|
79
|
+
* calls `selectModel(strategy, signals)` and routes between
|
|
80
|
+
* Haiku / Sonnet / Opus according to operator-declared rules.
|
|
81
|
+
* When absent the runner falls back to the legacy `model` /
|
|
82
|
+
* `defaultModel` chain — zero breaking change. */
|
|
83
|
+
modelStrategy?: import('./model-strategy').ModelStrategy;
|
|
77
84
|
/** System prompt */
|
|
78
85
|
systemPrompt: string;
|
|
79
86
|
/** Max tokens per response */
|
package/dist/types/index.d.ts
CHANGED
package/dist/types/index.js
CHANGED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/** The three Claude tiers the platform routes between today. New
|
|
2
|
+
* providers / families will land here once the SDK speaks them. */
|
|
3
|
+
export type ModelTier = 'haiku' | 'sonnet' | 'opus';
|
|
4
|
+
/** Conditions that can push a turn UP to the escalation tier. */
|
|
5
|
+
export type EscalateRule =
|
|
6
|
+
/** The agent has at least one tool attached to this turn. Tool use
|
|
7
|
+
* benefits disproportionately from a stronger reasoner — Haiku
|
|
8
|
+
* often picks the wrong tool or skips required arguments. */
|
|
9
|
+
'toolUse'
|
|
10
|
+
/** Estimated input tokens exceed `thresholds.longContextTokens`.
|
|
11
|
+
* Long context degrades quality fastest on cheaper tiers. */
|
|
12
|
+
| 'longContext'
|
|
13
|
+
/** At least one of the attached tools is in `approval` mode (a
|
|
14
|
+
* human will gate the run). Escalate so the reasoning the human
|
|
15
|
+
* reviews is as good as we can afford on a critical path. */
|
|
16
|
+
| 'approvalRequired';
|
|
17
|
+
/** Conditions that can drop a turn DOWN to the fallback tier. Only
|
|
18
|
+
* applied when no escalate rule fires — escalation always wins. */
|
|
19
|
+
export type FallbackRule =
|
|
20
|
+
/** Estimated input tokens below `thresholds.shortInputTokens`.
|
|
21
|
+
* "Hola", "thanks", confirmations, one-line clarifications —
|
|
22
|
+
* Haiku is more than enough and costs ~12× less than Opus. */
|
|
23
|
+
'shortInput';
|
|
24
|
+
export interface ModelStrategy {
|
|
25
|
+
kind: 'fixed' | 'tiered';
|
|
26
|
+
/** The base model. Used as-is when `kind === 'fixed'`, and as the
|
|
27
|
+
* middle tier when `kind === 'tiered'` (every turn that doesn't
|
|
28
|
+
* trigger an escalate / fallback rule lands here). Free-form
|
|
29
|
+
* string so future Anthropic model ids drop in without a SDK
|
|
30
|
+
* release. */
|
|
31
|
+
default: string;
|
|
32
|
+
/** Concrete model ids for each tier. Only consulted when
|
|
33
|
+
* `kind === 'tiered'`. Missing tiers mean "stay on default" so the
|
|
34
|
+
* operator can declare a partial ladder (e.g. only the Haiku
|
|
35
|
+
* fallback, no Opus escalation). */
|
|
36
|
+
tiers?: {
|
|
37
|
+
haiku?: string;
|
|
38
|
+
sonnet?: string;
|
|
39
|
+
opus?: string;
|
|
40
|
+
};
|
|
41
|
+
/** Conditions that escalate the turn to `tiers.opus`. Order does
|
|
42
|
+
* not matter — any rule firing escalates. */
|
|
43
|
+
escalate?: EscalateRule[];
|
|
44
|
+
/** Conditions that drop the turn to `tiers.haiku`. Only applied
|
|
45
|
+
* when no escalate rule fires. */
|
|
46
|
+
fallback?: FallbackRule[];
|
|
47
|
+
/** Thresholds the rules read. Sensible defaults below; operators
|
|
48
|
+
* can override per agent. */
|
|
49
|
+
thresholds?: {
|
|
50
|
+
/** `longContext` fires when estimated input exceeds this. */
|
|
51
|
+
longContextTokens?: number;
|
|
52
|
+
/** `shortInput` fires when estimated input is below this. */
|
|
53
|
+
shortInputTokens?: number;
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
/** Defaults applied when the operator doesn't set thresholds. Picked
|
|
57
|
+
* from the Anthropic cost / quality curve we've seen in practice:
|
|
58
|
+
*
|
|
59
|
+
* - 8k input tokens is where Sonnet's quality on long contexts
|
|
60
|
+
* starts to noticeably drift from Opus.
|
|
61
|
+
* - 200 tokens covers single-sentence inputs (greetings,
|
|
62
|
+
* confirmations) where Haiku is indistinguishable from Sonnet for
|
|
63
|
+
* the user.
|
|
64
|
+
*/
|
|
65
|
+
export declare const DEFAULT_LONG_CONTEXT_TOKENS = 8000;
|
|
66
|
+
export declare const DEFAULT_SHORT_INPUT_TOKENS = 200;
|
|
67
|
+
/** Inputs the runner provides per turn so the selector can decide
|
|
68
|
+
* without re-deriving anything. All optional — missing signals just
|
|
69
|
+
* cause the corresponding rules to no-op. */
|
|
70
|
+
export interface TurnSignals {
|
|
71
|
+
/** Estimated input tokens for this turn (system prompt + history
|
|
72
|
+
* + new message). The runner can use `usage.input_tokens` from
|
|
73
|
+
* the previous turn as a proxy; for the first turn a crude
|
|
74
|
+
* word-count heuristic is good enough. */
|
|
75
|
+
estimatedInputTokens?: number;
|
|
76
|
+
/** True if the agent has at least one tool attached to this turn
|
|
77
|
+
* (whether or not the model ends up calling it). */
|
|
78
|
+
hasTools?: boolean;
|
|
79
|
+
/** True if any attached tool is in `mode: 'approval'`. */
|
|
80
|
+
hasApprovalTool?: boolean;
|
|
81
|
+
}
|
|
82
|
+
export interface ModelSelection {
|
|
83
|
+
/** The model id the runner should pass to `messages.create`. */
|
|
84
|
+
model: string;
|
|
85
|
+
/** Which leg of the strategy fired. `default` = neither escalate
|
|
86
|
+
* nor fallback matched; `escalate` / `fallback` = a rule fired
|
|
87
|
+
* AND the corresponding tier was declared. `forced` = the
|
|
88
|
+
* strategy is `fixed`. */
|
|
89
|
+
reason: 'forced' | 'default' | 'escalate' | 'fallback';
|
|
90
|
+
/** When `reason === 'escalate' | 'fallback'`, the rule that fired.
|
|
91
|
+
* Used in telemetry so an operator can see why their bill
|
|
92
|
+
* spiked. */
|
|
93
|
+
trigger?: EscalateRule | FallbackRule;
|
|
94
|
+
}
|
|
95
|
+
/** Pure decision function. The runner calls this once per turn just
|
|
96
|
+
* before `messages.create`. */
|
|
97
|
+
export declare function selectModel(strategy: ModelStrategy | undefined, signals: TurnSignals, runnerDefault: string): ModelSelection;
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// Adaptive model selection for a single agent turn.
|
|
3
|
+
//
|
|
4
|
+
// An agent may declare ONE of two strategies:
|
|
5
|
+
//
|
|
6
|
+
// - `fixed` → use `agent.model` (or the runner's default) for every
|
|
7
|
+
// turn. The historical behaviour. Zero overhead, zero
|
|
8
|
+
// variance. Use this when cost is a non-issue or the
|
|
9
|
+
// domain is uniform (e.g. always-creative writing).
|
|
10
|
+
//
|
|
11
|
+
// - `tiered` → at each turn the runner inspects the request and
|
|
12
|
+
// may escalate to a more capable model OR fall back to
|
|
13
|
+
// a cheaper one based on a small set of heuristics
|
|
14
|
+
// declared by the operator. No LLM-router overhead —
|
|
15
|
+
// the decision is a pure function of the turn's shape
|
|
16
|
+
// (estimated input tokens, whether tools are attached,
|
|
17
|
+
// whether the turn will route through human approval).
|
|
18
|
+
//
|
|
19
|
+
// The decision lives in `selectModel(strategy, signals)` below so the
|
|
20
|
+
// runner only calls one function and the policy stays testable in
|
|
21
|
+
// isolation.
|
|
22
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
23
|
+
exports.DEFAULT_SHORT_INPUT_TOKENS = exports.DEFAULT_LONG_CONTEXT_TOKENS = void 0;
|
|
24
|
+
exports.selectModel = selectModel;
|
|
25
|
+
/** Defaults applied when the operator doesn't set thresholds. Picked
|
|
26
|
+
* from the Anthropic cost / quality curve we've seen in practice:
|
|
27
|
+
*
|
|
28
|
+
* - 8k input tokens is where Sonnet's quality on long contexts
|
|
29
|
+
* starts to noticeably drift from Opus.
|
|
30
|
+
* - 200 tokens covers single-sentence inputs (greetings,
|
|
31
|
+
* confirmations) where Haiku is indistinguishable from Sonnet for
|
|
32
|
+
* the user.
|
|
33
|
+
*/
|
|
34
|
+
exports.DEFAULT_LONG_CONTEXT_TOKENS = 8_000;
|
|
35
|
+
exports.DEFAULT_SHORT_INPUT_TOKENS = 200;
|
|
36
|
+
/** Pure decision function. The runner calls this once per turn just
|
|
37
|
+
* before `messages.create`. */
|
|
38
|
+
function selectModel(strategy, signals, runnerDefault) {
|
|
39
|
+
// No strategy → behave exactly like before this feature landed.
|
|
40
|
+
if (!strategy) {
|
|
41
|
+
return { model: runnerDefault, reason: 'default' };
|
|
42
|
+
}
|
|
43
|
+
if (strategy.kind === 'fixed') {
|
|
44
|
+
return { model: strategy.default || runnerDefault, reason: 'forced' };
|
|
45
|
+
}
|
|
46
|
+
// Tiered. Escalate beats fallback (a long-context tool-use turn is
|
|
47
|
+
// not a `shortInput` even if the new message is two words).
|
|
48
|
+
const longCtx = strategy.thresholds?.longContextTokens ?? exports.DEFAULT_LONG_CONTEXT_TOKENS;
|
|
49
|
+
const shortIn = strategy.thresholds?.shortInputTokens ?? exports.DEFAULT_SHORT_INPUT_TOKENS;
|
|
50
|
+
const escalate = strategy.escalate ?? [];
|
|
51
|
+
const fallback = strategy.fallback ?? [];
|
|
52
|
+
for (const rule of escalate) {
|
|
53
|
+
if (rule === 'toolUse' && signals.hasTools) {
|
|
54
|
+
return pickTier(strategy, 'opus', 'escalate', rule);
|
|
55
|
+
}
|
|
56
|
+
if (rule === 'longContext' &&
|
|
57
|
+
typeof signals.estimatedInputTokens === 'number' &&
|
|
58
|
+
signals.estimatedInputTokens > longCtx) {
|
|
59
|
+
return pickTier(strategy, 'opus', 'escalate', rule);
|
|
60
|
+
}
|
|
61
|
+
if (rule === 'approvalRequired' && signals.hasApprovalTool) {
|
|
62
|
+
return pickTier(strategy, 'opus', 'escalate', rule);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
for (const rule of fallback) {
|
|
66
|
+
if (rule === 'shortInput' &&
|
|
67
|
+
typeof signals.estimatedInputTokens === 'number' &&
|
|
68
|
+
signals.estimatedInputTokens < shortIn) {
|
|
69
|
+
return pickTier(strategy, 'haiku', 'fallback', rule);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return { model: strategy.default || runnerDefault, reason: 'default' };
|
|
73
|
+
}
|
|
74
|
+
function pickTier(strategy, tier, reason, trigger) {
|
|
75
|
+
const tiered = strategy.tiers?.[tier];
|
|
76
|
+
// No model declared for this tier → don't escalate/fallback to a
|
|
77
|
+
// ghost id; stay on default. Surface the rule that WOULD have
|
|
78
|
+
// fired so telemetry still captures the near-miss.
|
|
79
|
+
if (!tiered) {
|
|
80
|
+
return { model: strategy.default, reason: 'default', trigger };
|
|
81
|
+
}
|
|
82
|
+
return { model: tiered, reason, trigger };
|
|
83
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agentforge-io/core",
|
|
3
|
-
"version": "2.0
|
|
3
|
+
"version": "2.1.0",
|
|
4
4
|
"description": "Framework-free AI runtime SDK. Owns: agent loop (Anthropic), conversations, tools, streaming, agent-job queue, SdkHooks. Identity, billing, infra (email/uploads/secrets) live in the host's modules — not here.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
import type { CheckoutSession, CreateCheckoutParams, CreateCustomerParams, CreateSubscriptionParams, SubscriptionResult, WebhookEvent } from '../../types';
|
|
2
|
-
/**
|
|
3
|
-
* IBillingAdapter defines the contract for payment providers.
|
|
4
|
-
* Implement this interface to support any payment gateway (Stripe, Paddle, LemonSqueezy, etc.)
|
|
5
|
-
*/
|
|
6
|
-
export interface IBillingAdapter {
|
|
7
|
-
/**
|
|
8
|
-
* Create a Stripe Checkout (or equivalent) session for a subscription or one-time payment.
|
|
9
|
-
*/
|
|
10
|
-
createCheckoutSession(params: CreateCheckoutParams): Promise<CheckoutSession>;
|
|
11
|
-
/**
|
|
12
|
-
* Programmatically create a subscription (without hosted checkout).
|
|
13
|
-
*/
|
|
14
|
-
createSubscription(params: CreateSubscriptionParams): Promise<SubscriptionResult>;
|
|
15
|
-
/**
|
|
16
|
-
* Cancel a subscription (optionally at period end).
|
|
17
|
-
*/
|
|
18
|
-
cancelSubscription(subscriptionId: string, atPeriodEnd?: boolean): Promise<void>;
|
|
19
|
-
/**
|
|
20
|
-
* Handle an incoming webhook from the payment provider.
|
|
21
|
-
* Returns a normalized WebhookEvent.
|
|
22
|
-
*/
|
|
23
|
-
handleWebhook(payload: Buffer, signature: string): Promise<WebhookEvent>;
|
|
24
|
-
/**
|
|
25
|
-
* Generate a customer portal URL where users can manage their subscription.
|
|
26
|
-
*/
|
|
27
|
-
getPortalUrl(customerId: string, returnUrl: string): Promise<string>;
|
|
28
|
-
/**
|
|
29
|
-
* Create a customer record in the payment provider.
|
|
30
|
-
* Returns the provider-specific customer ID.
|
|
31
|
-
*/
|
|
32
|
-
createCustomer(params: CreateCustomerParams): Promise<string>;
|
|
33
|
-
/**
|
|
34
|
-
* Get the current subscription status from the provider.
|
|
35
|
-
*/
|
|
36
|
-
getSubscription(subscriptionId: string): Promise<SubscriptionResult & {
|
|
37
|
-
status: string;
|
|
38
|
-
}>;
|
|
39
|
-
}
|
|
40
|
-
/** Token to inject the billing adapter via NestJS DI */
|
|
41
|
-
export declare const BILLING_ADAPTER = "BILLING_ADAPTER";
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
import Stripe from 'stripe';
|
|
2
|
-
import type { IBillingAdapter } from '../billing-adapter.interface';
|
|
3
|
-
import type { CheckoutSession, CreateCheckoutParams, CreateCustomerParams, CreateSubscriptionParams, PlanDefinition, StripeConfig, SubscriptionResult, WebhookEvent } from '../../../types';
|
|
4
|
-
interface MiniLogger {
|
|
5
|
-
debug?: (m: string) => void;
|
|
6
|
-
warn?: (m: string) => void;
|
|
7
|
-
log?: (m: string) => void;
|
|
8
|
-
}
|
|
9
|
-
/**
|
|
10
|
-
* StripeAdapter implements IBillingAdapter using the Stripe API.
|
|
11
|
-
* Handles subscriptions, pay-per-use checkouts, webhooks, and customer portal.
|
|
12
|
-
*/
|
|
13
|
-
export declare class StripeAdapter implements IBillingAdapter {
|
|
14
|
-
private readonly logger;
|
|
15
|
-
private readonly stripe;
|
|
16
|
-
private readonly plans;
|
|
17
|
-
private readonly webhookSecret;
|
|
18
|
-
constructor(config: StripeConfig, plans?: PlanDefinition[], logger?: MiniLogger);
|
|
19
|
-
createCustomer(params: CreateCustomerParams): Promise<string>;
|
|
20
|
-
createCheckoutSession(params: CreateCheckoutParams): Promise<CheckoutSession>;
|
|
21
|
-
createSubscription(params: CreateSubscriptionParams): Promise<SubscriptionResult>;
|
|
22
|
-
cancelSubscription(subscriptionId: string, atPeriodEnd?: boolean): Promise<void>;
|
|
23
|
-
getSubscription(subscriptionId: string): Promise<SubscriptionResult & {
|
|
24
|
-
status: string;
|
|
25
|
-
}>;
|
|
26
|
-
getPortalUrl(customerId: string, returnUrl: string): Promise<string>;
|
|
27
|
-
handleWebhook(payload: Buffer, signature: string): Promise<WebhookEvent>;
|
|
28
|
-
getStripeInstance(): Stripe;
|
|
29
|
-
}
|
|
30
|
-
export {};
|