@agentforge-io/core 2.0.23 → 2.0.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/index.d.ts +2 -0
- package/dist/ai/index.js +5 -1
- package/dist/services/agent-runner.service.js +99 -3
- package/dist/services/agent.service.js +35 -2
- package/dist/types/config.types.d.ts +8 -1
- package/dist/types/index.d.ts +1 -0
- package/dist/types/index.js +1 -0
- package/dist/types/model-strategy.d.ts +97 -0
- package/dist/types/model-strategy.js +83 -0
- package/package.json +1 -1
package/dist/ai/index.d.ts
CHANGED
|
@@ -2,6 +2,8 @@ export { AGENT_FORGE_CONFIG, AGENT_QUEUE_NAME, CURRENT_USER, } from '../constant
|
|
|
2
2
|
export type { AgentDefinition, AnthropicConfig, McpServerConfig, AgentForgeConfig, DatabaseConfig, RedisConfig, QueueConfig, } from '../types/config.types';
|
|
3
3
|
export type { AgentResponse, AgentOverrides, StreamChunk, TokenUsage, ToolCallRecord, AgentToolDefinition, AgentJobPayload, AgentJobResult, AnthropicMessage, } from '../types/agent.types';
|
|
4
4
|
export type { SdkHooks, UsageEvent, TurnCompleteEvent, ToolCallEvent, } from '../types/hooks';
|
|
5
|
+
export type { ModelStrategy, ModelTier, EscalateRule, FallbackRule, TurnSignals, ModelSelection, } from '../types/model-strategy';
|
|
6
|
+
export { selectModel, DEFAULT_LONG_CONTEXT_TOKENS, DEFAULT_SHORT_INPUT_TOKENS, } from '../types/model-strategy';
|
|
5
7
|
export { ToolRegistryService, type Logger } from '../services/tool-registry.service';
|
|
6
8
|
export { AgentRunnerService } from '../services/agent-runner.service';
|
|
7
9
|
export { OrchestratorService } from '../services/orchestrator.service';
|
package/dist/ai/index.js
CHANGED
|
@@ -8,12 +8,16 @@
|
|
|
8
8
|
//
|
|
9
9
|
// Files still co-located physically; this is a logical seam.
|
|
10
10
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
11
|
-
exports.InMemoryJobQueue = exports.JOB_QUEUE = exports.InMemoryPreparedStreamStore = exports.PREPARED_STREAM_STORE = exports.AgentJobWorker = exports.AgentForbiddenError = exports.AgentService = exports.ConversationNotFoundError = exports.ConversationService = exports.PreparedStreamError = exports.PreparedStreamService = exports.OrchestratorService = exports.AgentRunnerService = exports.ToolRegistryService = exports.CURRENT_USER = exports.AGENT_QUEUE_NAME = exports.AGENT_FORGE_CONFIG = void 0;
|
|
11
|
+
exports.InMemoryJobQueue = exports.JOB_QUEUE = exports.InMemoryPreparedStreamStore = exports.PREPARED_STREAM_STORE = exports.AgentJobWorker = exports.AgentForbiddenError = exports.AgentService = exports.ConversationNotFoundError = exports.ConversationService = exports.PreparedStreamError = exports.PreparedStreamService = exports.OrchestratorService = exports.AgentRunnerService = exports.ToolRegistryService = exports.DEFAULT_SHORT_INPUT_TOKENS = exports.DEFAULT_LONG_CONTEXT_TOKENS = exports.selectModel = exports.CURRENT_USER = exports.AGENT_QUEUE_NAME = exports.AGENT_FORGE_CONFIG = void 0;
|
|
12
12
|
// ─── Constants ─────────────────────────────────────────────────────────────
|
|
13
13
|
var constants_1 = require("../constants");
|
|
14
14
|
Object.defineProperty(exports, "AGENT_FORGE_CONFIG", { enumerable: true, get: function () { return constants_1.AGENT_FORGE_CONFIG; } });
|
|
15
15
|
Object.defineProperty(exports, "AGENT_QUEUE_NAME", { enumerable: true, get: function () { return constants_1.AGENT_QUEUE_NAME; } });
|
|
16
16
|
Object.defineProperty(exports, "CURRENT_USER", { enumerable: true, get: function () { return constants_1.CURRENT_USER; } });
|
|
17
|
+
var model_strategy_1 = require("../types/model-strategy");
|
|
18
|
+
Object.defineProperty(exports, "selectModel", { enumerable: true, get: function () { return model_strategy_1.selectModel; } });
|
|
19
|
+
Object.defineProperty(exports, "DEFAULT_LONG_CONTEXT_TOKENS", { enumerable: true, get: function () { return model_strategy_1.DEFAULT_LONG_CONTEXT_TOKENS; } });
|
|
20
|
+
Object.defineProperty(exports, "DEFAULT_SHORT_INPUT_TOKENS", { enumerable: true, get: function () { return model_strategy_1.DEFAULT_SHORT_INPUT_TOKENS; } });
|
|
17
21
|
// ─── Services ──────────────────────────────────────────────────────────────
|
|
18
22
|
var tool_registry_service_1 = require("../services/tool-registry.service");
|
|
19
23
|
Object.defineProperty(exports, "ToolRegistryService", { enumerable: true, get: function () { return tool_registry_service_1.ToolRegistryService; } });
|
|
@@ -7,6 +7,7 @@ exports.AgentRunnerService = void 0;
|
|
|
7
7
|
const sdk_1 = __importDefault(require("@anthropic-ai/sdk"));
|
|
8
8
|
const crypto_1 = require("crypto");
|
|
9
9
|
const tool_approval_gate_1 = require("./tool-approval-gate");
|
|
10
|
+
const model_strategy_1 = require("../types/model-strategy");
|
|
10
11
|
const noopLogger = {
|
|
11
12
|
log: () => { }, warn: () => { }, debug: () => { }, error: () => { },
|
|
12
13
|
};
|
|
@@ -29,17 +30,44 @@ class AgentRunnerService {
|
|
|
29
30
|
// ─── Run (non-streaming) ──────────────────────────────────────────────────
|
|
30
31
|
async run(agent, messages, context, overrides) {
|
|
31
32
|
const messageId = (0, crypto_1.randomUUID)();
|
|
32
|
-
const
|
|
33
|
+
const runnerDefault = this.anthropicConfig.defaultModel ?? 'claude-opus-4-6';
|
|
34
|
+
const baseModel = overrides?.model ?? agent.model ?? runnerDefault;
|
|
33
35
|
const maxTokens = overrides?.maxTokens ?? agent.maxTokens ?? this.anthropicConfig.defaultMaxTokens ?? 4096;
|
|
34
36
|
const temperature = overrides?.temperature ?? agent.temperature ?? 1;
|
|
35
37
|
const { tools, extras } = this.buildToolList(agent, overrides);
|
|
36
38
|
const systemPrompt = this.buildSystemPrompt(agent, tools, overrides);
|
|
37
39
|
const toolCalls = [];
|
|
38
40
|
let currentMessages = [...messages];
|
|
41
|
+
// Pre-compute the signals the model router reads. `hasTools` and
|
|
42
|
+
// `hasApprovalTool` are constant across the agentic loop (we
|
|
43
|
+
// don't add tools mid-conversation); `estimatedInputTokens`
|
|
44
|
+
// starts from a word-count heuristic and gets replaced by the
|
|
45
|
+
// real `usage.input_tokens` once we have a response.
|
|
46
|
+
const turnSignals = {
|
|
47
|
+
hasTools: !!tools && tools.length > 0,
|
|
48
|
+
hasApprovalTool: hasApprovalGatedTool(agent),
|
|
49
|
+
estimatedInputTokens: estimateInputTokens(systemPrompt, currentMessages),
|
|
50
|
+
};
|
|
39
51
|
let totalUsage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
|
|
40
52
|
let finalContent = '';
|
|
41
53
|
let stopReason = 'end_turn';
|
|
54
|
+
// Last model id the router chose. Surfaced on the response so
|
|
55
|
+
// callers (and the conversation/usage logs) record what
|
|
56
|
+
// actually ran, not what the agent's `model` field says.
|
|
57
|
+
let lastModel = baseModel;
|
|
42
58
|
while (true) {
|
|
59
|
+
// Per-turn model selection. When overrides force a model we
|
|
60
|
+
// honour it (manual `agent.runMessage({ overrides: { model }})`
|
|
61
|
+
// beats the strategy). Otherwise the strategy decides; absent
|
|
62
|
+
// strategy → behave exactly like before this feature landed.
|
|
63
|
+
const selection = overrides?.model
|
|
64
|
+
? { model: overrides.model, reason: 'forced' }
|
|
65
|
+
: (0, model_strategy_1.selectModel)(agent.modelStrategy, turnSignals, baseModel);
|
|
66
|
+
const model = selection.model;
|
|
67
|
+
lastModel = model;
|
|
68
|
+
if (this.logger && selection.reason !== 'default' && selection.reason !== 'forced') {
|
|
69
|
+
this.logger.debug(`[modelRouter] agent=${agent.id} ${selection.reason}=${selection.trigger} → ${model}`);
|
|
70
|
+
}
|
|
43
71
|
const response = await this.client.messages.create({
|
|
44
72
|
model,
|
|
45
73
|
max_tokens: maxTokens,
|
|
@@ -48,6 +76,10 @@ class AgentRunnerService {
|
|
|
48
76
|
messages: currentMessages,
|
|
49
77
|
tools: tools,
|
|
50
78
|
});
|
|
79
|
+
// Update the signal for the NEXT iteration of the loop — the
|
|
80
|
+
// tool-result feedback we're about to add can balloon the
|
|
81
|
+
// context past the long-context threshold.
|
|
82
|
+
turnSignals.estimatedInputTokens = response.usage.input_tokens;
|
|
51
83
|
totalUsage = {
|
|
52
84
|
inputTokens: totalUsage.inputTokens + response.usage.input_tokens,
|
|
53
85
|
outputTokens: totalUsage.outputTokens + response.usage.output_tokens,
|
|
@@ -117,7 +149,7 @@ class AgentRunnerService {
|
|
|
117
149
|
role: 'assistant',
|
|
118
150
|
toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
|
|
119
151
|
usage: totalUsage,
|
|
120
|
-
model,
|
|
152
|
+
model: lastModel,
|
|
121
153
|
stopReason,
|
|
122
154
|
createdAt: new Date(),
|
|
123
155
|
};
|
|
@@ -125,14 +157,28 @@ class AgentRunnerService {
|
|
|
125
157
|
// ─── Run (streaming) ──────────────────────────────────────────────────────
|
|
126
158
|
async *stream(agent, messages, context, overrides) {
|
|
127
159
|
const messageId = (0, crypto_1.randomUUID)();
|
|
128
|
-
const
|
|
160
|
+
const runnerDefault = this.anthropicConfig.defaultModel ?? 'claude-opus-4-6';
|
|
161
|
+
const baseModel = overrides?.model ?? agent.model ?? runnerDefault;
|
|
129
162
|
const maxTokens = overrides?.maxTokens ?? agent.maxTokens ?? this.anthropicConfig.defaultMaxTokens ?? 4096;
|
|
130
163
|
const temperature = overrides?.temperature ?? agent.temperature ?? 1;
|
|
131
164
|
const { tools, extras } = this.buildToolList(agent, overrides);
|
|
132
165
|
const systemPrompt = this.buildSystemPrompt(agent, tools, overrides);
|
|
133
166
|
let currentMessages = [...messages];
|
|
134
167
|
let totalUsage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
|
|
168
|
+
// See `run()` above for the rationale on these signals.
|
|
169
|
+
const turnSignals = {
|
|
170
|
+
hasTools: !!tools && tools.length > 0,
|
|
171
|
+
hasApprovalTool: hasApprovalGatedTool(agent),
|
|
172
|
+
estimatedInputTokens: estimateInputTokens(systemPrompt, currentMessages),
|
|
173
|
+
};
|
|
135
174
|
while (true) {
|
|
175
|
+
const selection = overrides?.model
|
|
176
|
+
? { model: overrides.model, reason: 'forced' }
|
|
177
|
+
: (0, model_strategy_1.selectModel)(agent.modelStrategy, turnSignals, baseModel);
|
|
178
|
+
const model = selection.model;
|
|
179
|
+
if (this.logger && selection.reason !== 'default' && selection.reason !== 'forced') {
|
|
180
|
+
this.logger.debug(`[modelRouter] agent=${agent.id} ${selection.reason}=${selection.trigger} → ${model}`);
|
|
181
|
+
}
|
|
136
182
|
const stream = this.client.messages.stream({
|
|
137
183
|
model,
|
|
138
184
|
max_tokens: maxTokens,
|
|
@@ -168,6 +214,12 @@ class AgentRunnerService {
|
|
|
168
214
|
}
|
|
169
215
|
}
|
|
170
216
|
const finalMessage = await stream.finalMessage();
|
|
217
|
+
// Refresh the input-token signal so the next iteration of the
|
|
218
|
+
// tool loop has the post-tool-result context length, not the
|
|
219
|
+
// initial estimate.
|
|
220
|
+
if (typeof finalMessage.usage?.input_tokens === 'number') {
|
|
221
|
+
turnSignals.estimatedInputTokens = finalMessage.usage.input_tokens;
|
|
222
|
+
}
|
|
171
223
|
if (finalMessage.stop_reason === 'tool_use') {
|
|
172
224
|
currentMessages = [...currentMessages, { role: 'assistant', content: finalMessage.content }];
|
|
173
225
|
const toolResults = [];
|
|
@@ -311,3 +363,47 @@ class AgentRunnerService {
|
|
|
311
363
|
}
|
|
312
364
|
}
|
|
313
365
|
exports.AgentRunnerService = AgentRunnerService;
|
|
366
|
+
/** Cheap word-based token estimate for the first turn — Anthropic
|
|
367
|
+
* reports real `usage.input_tokens` from the response onwards, so
|
|
368
|
+
* this only needs to be accurate enough to fire `longContext` /
|
|
369
|
+
* `shortInput` rules on the FIRST request before any usage is back.
|
|
370
|
+
* Rough rule of thumb (verified against Anthropic's tokenizer on
|
|
371
|
+
* English prose): ~0.75 tokens per whitespace-split word. */
|
|
372
|
+
function estimateInputTokens(systemPrompt, messages) {
|
|
373
|
+
let words = systemPrompt.trim().split(/\s+/).filter(Boolean).length;
|
|
374
|
+
for (const m of messages) {
|
|
375
|
+
if (typeof m.content === 'string') {
|
|
376
|
+
words += m.content.trim().split(/\s+/).filter(Boolean).length;
|
|
377
|
+
continue;
|
|
378
|
+
}
|
|
379
|
+
if (Array.isArray(m.content)) {
|
|
380
|
+
for (const block of m.content) {
|
|
381
|
+
if (block &&
|
|
382
|
+
typeof block === 'object' &&
|
|
383
|
+
'type' in block &&
|
|
384
|
+
block.type === 'text' &&
|
|
385
|
+
typeof block.text === 'string') {
|
|
386
|
+
words += block.text
|
|
387
|
+
.trim()
|
|
388
|
+
.split(/\s+/)
|
|
389
|
+
.filter(Boolean).length;
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
return Math.ceil(words / 0.75);
|
|
395
|
+
}
|
|
396
|
+
/** True when at least one declared tool requires human approval at
|
|
397
|
+
* runtime. Read off `agent.tools` only — `extraTools` (per-call
|
|
398
|
+
* connector tools attached at request time) follow the host's own
|
|
399
|
+
* per-tenant policy and aren't visible to this scope. */
|
|
400
|
+
function hasApprovalGatedTool(agent) {
|
|
401
|
+
const tools = agent.tools;
|
|
402
|
+
if (!tools)
|
|
403
|
+
return false;
|
|
404
|
+
for (const t of tools) {
|
|
405
|
+
if (typeof t === 'object' && t && t.mode === 'approval')
|
|
406
|
+
return true;
|
|
407
|
+
}
|
|
408
|
+
return false;
|
|
409
|
+
}
|
|
@@ -212,7 +212,12 @@ class AgentService {
|
|
|
212
212
|
// caller's userId, which is the historical personal-agent path.
|
|
213
213
|
const resolvedExtras = await this.resolveExtraTools(agent.connectorOwnerUserId ?? params.userId);
|
|
214
214
|
const filter = params.overrides?.extraToolsFilter;
|
|
215
|
-
const
|
|
215
|
+
const fromConnectors = filter && resolvedExtras ? filter(resolvedExtras) : resolvedExtras;
|
|
216
|
+
// Merge connector tools with whatever the caller passed in
|
|
217
|
+
// `overrides.extraTools` (e.g. the remote-tool wrappers from
|
|
218
|
+
// ChatStreamController). Caller wins on name collisions so an
|
|
219
|
+
// explicit override always trumps an inherited connector tool.
|
|
220
|
+
const extraTools = mergeExtraTools(params.overrides?.extraTools, fromConnectors);
|
|
216
221
|
const response = await this.runner.run(agent, messages, {
|
|
217
222
|
userId: params.userId,
|
|
218
223
|
conversationId: params.conversationId,
|
|
@@ -278,7 +283,8 @@ class AgentService {
|
|
|
278
283
|
// toolbelt regardless of which visitor session is streaming.
|
|
279
284
|
const resolvedExtras = await this.resolveExtraTools(agent.connectorOwnerUserId ?? params.userId);
|
|
280
285
|
const filter = params.overrides?.extraToolsFilter;
|
|
281
|
-
const
|
|
286
|
+
const fromConnectors = filter && resolvedExtras ? filter(resolvedExtras) : resolvedExtras;
|
|
287
|
+
const extraTools = mergeExtraTools(params.overrides?.extraTools, fromConnectors);
|
|
282
288
|
try {
|
|
283
289
|
for await (const chunk of this.runner.stream(agent, messages, {
|
|
284
290
|
userId: params.userId,
|
|
@@ -509,3 +515,30 @@ function toAgentDefinition(record) {
|
|
|
509
515
|
...(extra.appearance !== undefined ? { appearance: extra.appearance } : {}),
|
|
510
516
|
};
|
|
511
517
|
}
|
|
518
|
+
/**
|
|
519
|
+
* Merge two `extraTools` arrays so an explicit caller-provided list
|
|
520
|
+
* (e.g. remote-tool wrappers from a chat-stream controller) doesn't get
|
|
521
|
+
* shadowed by an `undefined` result from the connector resolver.
|
|
522
|
+
*
|
|
523
|
+
* Caller wins on name collisions — the caller passed the tool
|
|
524
|
+
* deliberately and knows the host context; an inherited connector tool
|
|
525
|
+
* with the same name is almost certainly stale or coincidental.
|
|
526
|
+
*
|
|
527
|
+
* Returns `undefined` (not `[]`) when both inputs are empty so the
|
|
528
|
+
* runner's "if (!extras?.length) skip overrides" path keeps working.
|
|
529
|
+
*/
|
|
530
|
+
function mergeExtraTools(caller, connectors) {
|
|
531
|
+
if (!caller?.length && !connectors?.length)
|
|
532
|
+
return undefined;
|
|
533
|
+
if (!caller?.length)
|
|
534
|
+
return connectors;
|
|
535
|
+
if (!connectors?.length)
|
|
536
|
+
return caller;
|
|
537
|
+
const callerNames = new Set(caller.map((t) => t.name));
|
|
538
|
+
const merged = [...caller];
|
|
539
|
+
for (const t of connectors) {
|
|
540
|
+
if (!callerNames.has(t.name))
|
|
541
|
+
merged.push(t);
|
|
542
|
+
}
|
|
543
|
+
return merged;
|
|
544
|
+
}
|
|
@@ -72,8 +72,15 @@ export interface AgentDefinition {
|
|
|
72
72
|
name: string;
|
|
73
73
|
/** Agent description (shown to users) */
|
|
74
74
|
description?: string;
|
|
75
|
-
/** Claude model to use
|
|
75
|
+
/** Claude model to use. Ignored when `modelStrategy` is set —
|
|
76
|
+
* the strategy's `default` takes precedence as the base tier. */
|
|
76
77
|
model?: string;
|
|
78
|
+
/** Adaptive model selection per turn. When present the runner
|
|
79
|
+
* calls `selectModel(strategy, signals)` and routes between
|
|
80
|
+
* Haiku / Sonnet / Opus according to operator-declared rules.
|
|
81
|
+
* When absent the runner falls back to the legacy `model` /
|
|
82
|
+
* `defaultModel` chain — zero breaking change. */
|
|
83
|
+
modelStrategy?: import('./model-strategy').ModelStrategy;
|
|
77
84
|
/** System prompt */
|
|
78
85
|
systemPrompt: string;
|
|
79
86
|
/** Max tokens per response */
|
package/dist/types/index.d.ts
CHANGED
package/dist/types/index.js
CHANGED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/** The three Claude tiers the platform routes between today. New
|
|
2
|
+
* providers / families will land here once the SDK speaks them. */
|
|
3
|
+
export type ModelTier = 'haiku' | 'sonnet' | 'opus';
|
|
4
|
+
/** Conditions that can push a turn UP to the escalation tier. */
|
|
5
|
+
export type EscalateRule =
|
|
6
|
+
/** The agent has at least one tool attached to this turn. Tool use
|
|
7
|
+
* benefits disproportionately from a stronger reasoner — Haiku
|
|
8
|
+
* often picks the wrong tool or skips required arguments. */
|
|
9
|
+
'toolUse'
|
|
10
|
+
/** Estimated input tokens exceed `thresholds.longContextTokens`.
|
|
11
|
+
* Long context degrades quality fastest on cheaper tiers. */
|
|
12
|
+
| 'longContext'
|
|
13
|
+
/** At least one of the attached tools is in `approval` mode (a
|
|
14
|
+
* human will gate the run). Escalate so the reasoning the human
|
|
15
|
+
* reviews is as good as we can afford on a critical path. */
|
|
16
|
+
| 'approvalRequired';
|
|
17
|
+
/** Conditions that can drop a turn DOWN to the fallback tier. Only
|
|
18
|
+
* applied when no escalate rule fires — escalation always wins. */
|
|
19
|
+
export type FallbackRule =
|
|
20
|
+
/** Estimated input tokens below `thresholds.shortInputTokens`.
|
|
21
|
+
* "Hola", "thanks", confirmations, one-line clarifications —
|
|
22
|
+
* Haiku is more than enough and costs ~12× less than Opus. */
|
|
23
|
+
'shortInput';
|
|
24
|
+
export interface ModelStrategy {
|
|
25
|
+
kind: 'fixed' | 'tiered';
|
|
26
|
+
/** The base model. Used as-is when `kind === 'fixed'`, and as the
|
|
27
|
+
* middle tier when `kind === 'tiered'` (every turn that doesn't
|
|
28
|
+
* trigger an escalate / fallback rule lands here). Free-form
|
|
29
|
+
* string so future Anthropic model ids drop in without a SDK
|
|
30
|
+
* release. */
|
|
31
|
+
default: string;
|
|
32
|
+
/** Concrete model ids for each tier. Only consulted when
|
|
33
|
+
* `kind === 'tiered'`. Missing tiers mean "stay on default" so the
|
|
34
|
+
* operator can declare a partial ladder (e.g. only the Haiku
|
|
35
|
+
* fallback, no Opus escalation). */
|
|
36
|
+
tiers?: {
|
|
37
|
+
haiku?: string;
|
|
38
|
+
sonnet?: string;
|
|
39
|
+
opus?: string;
|
|
40
|
+
};
|
|
41
|
+
/** Conditions that escalate the turn to `tiers.opus`. Order does
|
|
42
|
+
* not matter — any rule firing escalates. */
|
|
43
|
+
escalate?: EscalateRule[];
|
|
44
|
+
/** Conditions that drop the turn to `tiers.haiku`. Only applied
|
|
45
|
+
* when no escalate rule fires. */
|
|
46
|
+
fallback?: FallbackRule[];
|
|
47
|
+
/** Thresholds the rules read. Sensible defaults below; operators
|
|
48
|
+
* can override per agent. */
|
|
49
|
+
thresholds?: {
|
|
50
|
+
/** `longContext` fires when estimated input exceeds this. */
|
|
51
|
+
longContextTokens?: number;
|
|
52
|
+
/** `shortInput` fires when estimated input is below this. */
|
|
53
|
+
shortInputTokens?: number;
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
/** Defaults applied when the operator doesn't set thresholds. Picked
|
|
57
|
+
* from the Anthropic cost / quality curve we've seen in practice:
|
|
58
|
+
*
|
|
59
|
+
* - 8k input tokens is where Sonnet's quality on long contexts
|
|
60
|
+
* starts to noticeably drift from Opus.
|
|
61
|
+
* - 200 tokens covers single-sentence inputs (greetings,
|
|
62
|
+
* confirmations) where Haiku is indistinguishable from Sonnet for
|
|
63
|
+
* the user.
|
|
64
|
+
*/
|
|
65
|
+
export declare const DEFAULT_LONG_CONTEXT_TOKENS = 8000;
|
|
66
|
+
export declare const DEFAULT_SHORT_INPUT_TOKENS = 200;
|
|
67
|
+
/** Inputs the runner provides per turn so the selector can decide
|
|
68
|
+
* without re-deriving anything. All optional — missing signals just
|
|
69
|
+
* cause the corresponding rules to no-op. */
|
|
70
|
+
export interface TurnSignals {
|
|
71
|
+
/** Estimated input tokens for this turn (system prompt + history
|
|
72
|
+
* + new message). The runner can use `usage.input_tokens` from
|
|
73
|
+
* the previous turn as a proxy; for the first turn a crude
|
|
74
|
+
* word-count heuristic is good enough. */
|
|
75
|
+
estimatedInputTokens?: number;
|
|
76
|
+
/** True if the agent has at least one tool attached to this turn
|
|
77
|
+
* (whether or not the model ends up calling it). */
|
|
78
|
+
hasTools?: boolean;
|
|
79
|
+
/** True if any attached tool is in `mode: 'approval'`. */
|
|
80
|
+
hasApprovalTool?: boolean;
|
|
81
|
+
}
|
|
82
|
+
export interface ModelSelection {
|
|
83
|
+
/** The model id the runner should pass to `messages.create`. */
|
|
84
|
+
model: string;
|
|
85
|
+
/** Which leg of the strategy fired. `default` = neither escalate
|
|
86
|
+
* nor fallback matched; `escalate` / `fallback` = a rule fired
|
|
87
|
+
* AND the corresponding tier was declared. `forced` = the
|
|
88
|
+
* strategy is `fixed`. */
|
|
89
|
+
reason: 'forced' | 'default' | 'escalate' | 'fallback';
|
|
90
|
+
/** When `reason === 'escalate' | 'fallback'`, the rule that fired.
|
|
91
|
+
* Used in telemetry so an operator can see why their bill
|
|
92
|
+
* spiked. */
|
|
93
|
+
trigger?: EscalateRule | FallbackRule;
|
|
94
|
+
}
|
|
95
|
+
/** Pure decision function. The runner calls this once per turn just
|
|
96
|
+
* before `messages.create`. */
|
|
97
|
+
export declare function selectModel(strategy: ModelStrategy | undefined, signals: TurnSignals, runnerDefault: string): ModelSelection;
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// Adaptive model selection for a single agent turn.
|
|
3
|
+
//
|
|
4
|
+
// An agent may declare ONE of two strategies:
|
|
5
|
+
//
|
|
6
|
+
// - `fixed` → use `agent.model` (or the runner's default) for every
|
|
7
|
+
// turn. The historical behaviour. Zero overhead, zero
|
|
8
|
+
// variance. Use this when cost is a non-issue or the
|
|
9
|
+
// domain is uniform (e.g. always-creative writing).
|
|
10
|
+
//
|
|
11
|
+
// - `tiered` → at each turn the runner inspects the request and
|
|
12
|
+
// may escalate to a more capable model OR fall back to
|
|
13
|
+
// a cheaper one based on a small set of heuristics
|
|
14
|
+
// declared by the operator. No LLM-router overhead —
|
|
15
|
+
// the decision is a pure function of the turn's shape
|
|
16
|
+
// (estimated input tokens, whether tools are attached,
|
|
17
|
+
// whether the turn will route through human approval).
|
|
18
|
+
//
|
|
19
|
+
// The decision lives in `selectModel(strategy, signals)` below so the
|
|
20
|
+
// runner only calls one function and the policy stays testable in
|
|
21
|
+
// isolation.
|
|
22
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
23
|
+
exports.DEFAULT_SHORT_INPUT_TOKENS = exports.DEFAULT_LONG_CONTEXT_TOKENS = void 0;
|
|
24
|
+
exports.selectModel = selectModel;
|
|
25
|
+
/** Defaults applied when the operator doesn't set thresholds. Picked
|
|
26
|
+
* from the Anthropic cost / quality curve we've seen in practice:
|
|
27
|
+
*
|
|
28
|
+
* - 8k input tokens is where Sonnet's quality on long contexts
|
|
29
|
+
* starts to noticeably drift from Opus.
|
|
30
|
+
* - 200 tokens covers single-sentence inputs (greetings,
|
|
31
|
+
* confirmations) where Haiku is indistinguishable from Sonnet for
|
|
32
|
+
* the user.
|
|
33
|
+
*/
|
|
34
|
+
exports.DEFAULT_LONG_CONTEXT_TOKENS = 8_000;
|
|
35
|
+
exports.DEFAULT_SHORT_INPUT_TOKENS = 200;
|
|
36
|
+
/** Pure decision function. The runner calls this once per turn just
|
|
37
|
+
* before `messages.create`. */
|
|
38
|
+
function selectModel(strategy, signals, runnerDefault) {
|
|
39
|
+
// No strategy → behave exactly like before this feature landed.
|
|
40
|
+
if (!strategy) {
|
|
41
|
+
return { model: runnerDefault, reason: 'default' };
|
|
42
|
+
}
|
|
43
|
+
if (strategy.kind === 'fixed') {
|
|
44
|
+
return { model: strategy.default || runnerDefault, reason: 'forced' };
|
|
45
|
+
}
|
|
46
|
+
// Tiered. Escalate beats fallback (a long-context tool-use turn is
|
|
47
|
+
// not a `shortInput` even if the new message is two words).
|
|
48
|
+
const longCtx = strategy.thresholds?.longContextTokens ?? exports.DEFAULT_LONG_CONTEXT_TOKENS;
|
|
49
|
+
const shortIn = strategy.thresholds?.shortInputTokens ?? exports.DEFAULT_SHORT_INPUT_TOKENS;
|
|
50
|
+
const escalate = strategy.escalate ?? [];
|
|
51
|
+
const fallback = strategy.fallback ?? [];
|
|
52
|
+
for (const rule of escalate) {
|
|
53
|
+
if (rule === 'toolUse' && signals.hasTools) {
|
|
54
|
+
return pickTier(strategy, 'opus', 'escalate', rule);
|
|
55
|
+
}
|
|
56
|
+
if (rule === 'longContext' &&
|
|
57
|
+
typeof signals.estimatedInputTokens === 'number' &&
|
|
58
|
+
signals.estimatedInputTokens > longCtx) {
|
|
59
|
+
return pickTier(strategy, 'opus', 'escalate', rule);
|
|
60
|
+
}
|
|
61
|
+
if (rule === 'approvalRequired' && signals.hasApprovalTool) {
|
|
62
|
+
return pickTier(strategy, 'opus', 'escalate', rule);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
for (const rule of fallback) {
|
|
66
|
+
if (rule === 'shortInput' &&
|
|
67
|
+
typeof signals.estimatedInputTokens === 'number' &&
|
|
68
|
+
signals.estimatedInputTokens < shortIn) {
|
|
69
|
+
return pickTier(strategy, 'haiku', 'fallback', rule);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return { model: strategy.default || runnerDefault, reason: 'default' };
|
|
73
|
+
}
|
|
74
|
+
function pickTier(strategy, tier, reason, trigger) {
|
|
75
|
+
const tiered = strategy.tiers?.[tier];
|
|
76
|
+
// No model declared for this tier → don't escalate/fallback to a
|
|
77
|
+
// ghost id; stay on default. Surface the rule that WOULD have
|
|
78
|
+
// fired so telemetry still captures the near-miss.
|
|
79
|
+
if (!tiered) {
|
|
80
|
+
return { model: strategy.default, reason: 'default', trigger };
|
|
81
|
+
}
|
|
82
|
+
return { model: tiered, reason, trigger };
|
|
83
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agentforge-io/core",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.24",
|
|
4
4
|
"description": "Framework-free AI runtime SDK. Owns: agent loop (Anthropic), conversations, tools, streaming, agent-job queue, SdkHooks. Identity, billing, infra (email/uploads/secrets) live in the host's modules — not here.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"main": "dist/index.js",
|