@agentforge-io/core 2.0.23 → 2.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,8 @@ export { AGENT_FORGE_CONFIG, AGENT_QUEUE_NAME, CURRENT_USER, } from '../constant
2
2
  export type { AgentDefinition, AnthropicConfig, McpServerConfig, AgentForgeConfig, DatabaseConfig, RedisConfig, QueueConfig, } from '../types/config.types';
3
3
  export type { AgentResponse, AgentOverrides, StreamChunk, TokenUsage, ToolCallRecord, AgentToolDefinition, AgentJobPayload, AgentJobResult, AnthropicMessage, } from '../types/agent.types';
4
4
  export type { SdkHooks, UsageEvent, TurnCompleteEvent, ToolCallEvent, } from '../types/hooks';
5
+ export type { ModelStrategy, ModelTier, EscalateRule, FallbackRule, TurnSignals, ModelSelection, } from '../types/model-strategy';
6
+ export { selectModel, DEFAULT_LONG_CONTEXT_TOKENS, DEFAULT_SHORT_INPUT_TOKENS, } from '../types/model-strategy';
5
7
  export { ToolRegistryService, type Logger } from '../services/tool-registry.service';
6
8
  export { AgentRunnerService } from '../services/agent-runner.service';
7
9
  export { OrchestratorService } from '../services/orchestrator.service';
package/dist/ai/index.js CHANGED
@@ -8,12 +8,16 @@
8
8
  //
9
9
  // Files still co-located physically; this is a logical seam.
10
10
  Object.defineProperty(exports, "__esModule", { value: true });
11
- exports.InMemoryJobQueue = exports.JOB_QUEUE = exports.InMemoryPreparedStreamStore = exports.PREPARED_STREAM_STORE = exports.AgentJobWorker = exports.AgentForbiddenError = exports.AgentService = exports.ConversationNotFoundError = exports.ConversationService = exports.PreparedStreamError = exports.PreparedStreamService = exports.OrchestratorService = exports.AgentRunnerService = exports.ToolRegistryService = exports.CURRENT_USER = exports.AGENT_QUEUE_NAME = exports.AGENT_FORGE_CONFIG = void 0;
11
+ exports.InMemoryJobQueue = exports.JOB_QUEUE = exports.InMemoryPreparedStreamStore = exports.PREPARED_STREAM_STORE = exports.AgentJobWorker = exports.AgentForbiddenError = exports.AgentService = exports.ConversationNotFoundError = exports.ConversationService = exports.PreparedStreamError = exports.PreparedStreamService = exports.OrchestratorService = exports.AgentRunnerService = exports.ToolRegistryService = exports.DEFAULT_SHORT_INPUT_TOKENS = exports.DEFAULT_LONG_CONTEXT_TOKENS = exports.selectModel = exports.CURRENT_USER = exports.AGENT_QUEUE_NAME = exports.AGENT_FORGE_CONFIG = void 0;
12
12
  // ─── Constants ─────────────────────────────────────────────────────────────
13
13
  var constants_1 = require("../constants");
14
14
  Object.defineProperty(exports, "AGENT_FORGE_CONFIG", { enumerable: true, get: function () { return constants_1.AGENT_FORGE_CONFIG; } });
15
15
  Object.defineProperty(exports, "AGENT_QUEUE_NAME", { enumerable: true, get: function () { return constants_1.AGENT_QUEUE_NAME; } });
16
16
  Object.defineProperty(exports, "CURRENT_USER", { enumerable: true, get: function () { return constants_1.CURRENT_USER; } });
17
+ var model_strategy_1 = require("../types/model-strategy");
18
+ Object.defineProperty(exports, "selectModel", { enumerable: true, get: function () { return model_strategy_1.selectModel; } });
19
+ Object.defineProperty(exports, "DEFAULT_LONG_CONTEXT_TOKENS", { enumerable: true, get: function () { return model_strategy_1.DEFAULT_LONG_CONTEXT_TOKENS; } });
20
+ Object.defineProperty(exports, "DEFAULT_SHORT_INPUT_TOKENS", { enumerable: true, get: function () { return model_strategy_1.DEFAULT_SHORT_INPUT_TOKENS; } });
17
21
  // ─── Services ──────────────────────────────────────────────────────────────
18
22
  var tool_registry_service_1 = require("../services/tool-registry.service");
19
23
  Object.defineProperty(exports, "ToolRegistryService", { enumerable: true, get: function () { return tool_registry_service_1.ToolRegistryService; } });
@@ -7,6 +7,7 @@ exports.AgentRunnerService = void 0;
7
7
  const sdk_1 = __importDefault(require("@anthropic-ai/sdk"));
8
8
  const crypto_1 = require("crypto");
9
9
  const tool_approval_gate_1 = require("./tool-approval-gate");
10
+ const model_strategy_1 = require("../types/model-strategy");
10
11
  const noopLogger = {
11
12
  log: () => { }, warn: () => { }, debug: () => { }, error: () => { },
12
13
  };
@@ -29,17 +30,44 @@ class AgentRunnerService {
29
30
  // ─── Run (non-streaming) ──────────────────────────────────────────────────
30
31
  async run(agent, messages, context, overrides) {
31
32
  const messageId = (0, crypto_1.randomUUID)();
32
- const model = overrides?.model ?? agent.model ?? this.anthropicConfig.defaultModel ?? 'claude-opus-4-6';
33
+ const runnerDefault = this.anthropicConfig.defaultModel ?? 'claude-opus-4-6';
34
+ const baseModel = overrides?.model ?? agent.model ?? runnerDefault;
33
35
  const maxTokens = overrides?.maxTokens ?? agent.maxTokens ?? this.anthropicConfig.defaultMaxTokens ?? 4096;
34
36
  const temperature = overrides?.temperature ?? agent.temperature ?? 1;
35
37
  const { tools, extras } = this.buildToolList(agent, overrides);
36
38
  const systemPrompt = this.buildSystemPrompt(agent, tools, overrides);
37
39
  const toolCalls = [];
38
40
  let currentMessages = [...messages];
41
+ // Pre-compute the signals the model router reads. `hasTools` and
42
+ // `hasApprovalTool` are constant across the agentic loop (we
43
+ // don't add tools mid-conversation); `estimatedInputTokens`
44
+ // starts from a word-count heuristic and gets replaced by the
45
+ // real `usage.input_tokens` once we have a response.
46
+ const turnSignals = {
47
+ hasTools: !!tools && tools.length > 0,
48
+ hasApprovalTool: hasApprovalGatedTool(agent),
49
+ estimatedInputTokens: estimateInputTokens(systemPrompt, currentMessages),
50
+ };
39
51
  let totalUsage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
40
52
  let finalContent = '';
41
53
  let stopReason = 'end_turn';
54
+ // Last model id the router chose. Surfaced on the response so
55
+ // callers (and the conversation/usage logs) record what
56
+ // actually ran, not what the agent's `model` field says.
57
+ let lastModel = baseModel;
42
58
  while (true) {
59
+ // Per-turn model selection. When overrides force a model we
60
+ // honour it (manual `agent.runMessage({ overrides: { model }})`
61
+ // beats the strategy). Otherwise the strategy decides; absent
62
+ // strategy → behave exactly like before this feature landed.
63
+ const selection = overrides?.model
64
+ ? { model: overrides.model, reason: 'forced' }
65
+ : (0, model_strategy_1.selectModel)(agent.modelStrategy, turnSignals, baseModel);
66
+ const model = selection.model;
67
+ lastModel = model;
68
+ if (this.logger && selection.reason !== 'default' && selection.reason !== 'forced') {
69
+ this.logger.debug(`[modelRouter] agent=${agent.id} ${selection.reason}=${selection.trigger} → ${model}`);
70
+ }
43
71
  const response = await this.client.messages.create({
44
72
  model,
45
73
  max_tokens: maxTokens,
@@ -48,6 +76,10 @@ class AgentRunnerService {
48
76
  messages: currentMessages,
49
77
  tools: tools,
50
78
  });
79
+ // Update the signal for the NEXT iteration of the loop — the
80
+ // tool-result feedback we're about to add can balloon the
81
+ // context past the long-context threshold.
82
+ turnSignals.estimatedInputTokens = response.usage.input_tokens;
51
83
  totalUsage = {
52
84
  inputTokens: totalUsage.inputTokens + response.usage.input_tokens,
53
85
  outputTokens: totalUsage.outputTokens + response.usage.output_tokens,
@@ -117,7 +149,7 @@ class AgentRunnerService {
117
149
  role: 'assistant',
118
150
  toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
119
151
  usage: totalUsage,
120
- model,
152
+ model: lastModel,
121
153
  stopReason,
122
154
  createdAt: new Date(),
123
155
  };
@@ -125,14 +157,28 @@ class AgentRunnerService {
125
157
  // ─── Run (streaming) ──────────────────────────────────────────────────────
126
158
  async *stream(agent, messages, context, overrides) {
127
159
  const messageId = (0, crypto_1.randomUUID)();
128
- const model = overrides?.model ?? agent.model ?? this.anthropicConfig.defaultModel ?? 'claude-opus-4-6';
160
+ const runnerDefault = this.anthropicConfig.defaultModel ?? 'claude-opus-4-6';
161
+ const baseModel = overrides?.model ?? agent.model ?? runnerDefault;
129
162
  const maxTokens = overrides?.maxTokens ?? agent.maxTokens ?? this.anthropicConfig.defaultMaxTokens ?? 4096;
130
163
  const temperature = overrides?.temperature ?? agent.temperature ?? 1;
131
164
  const { tools, extras } = this.buildToolList(agent, overrides);
132
165
  const systemPrompt = this.buildSystemPrompt(agent, tools, overrides);
133
166
  let currentMessages = [...messages];
134
167
  let totalUsage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
168
+ // See `run()` above for the rationale on these signals.
169
+ const turnSignals = {
170
+ hasTools: !!tools && tools.length > 0,
171
+ hasApprovalTool: hasApprovalGatedTool(agent),
172
+ estimatedInputTokens: estimateInputTokens(systemPrompt, currentMessages),
173
+ };
135
174
  while (true) {
175
+ const selection = overrides?.model
176
+ ? { model: overrides.model, reason: 'forced' }
177
+ : (0, model_strategy_1.selectModel)(agent.modelStrategy, turnSignals, baseModel);
178
+ const model = selection.model;
179
+ if (this.logger && selection.reason !== 'default' && selection.reason !== 'forced') {
180
+ this.logger.debug(`[modelRouter] agent=${agent.id} ${selection.reason}=${selection.trigger} → ${model}`);
181
+ }
136
182
  const stream = this.client.messages.stream({
137
183
  model,
138
184
  max_tokens: maxTokens,
@@ -168,6 +214,12 @@ class AgentRunnerService {
168
214
  }
169
215
  }
170
216
  const finalMessage = await stream.finalMessage();
217
+ // Refresh the input-token signal so the next iteration of the
218
+ // tool loop has the post-tool-result context length, not the
219
+ // initial estimate.
220
+ if (typeof finalMessage.usage?.input_tokens === 'number') {
221
+ turnSignals.estimatedInputTokens = finalMessage.usage.input_tokens;
222
+ }
171
223
  if (finalMessage.stop_reason === 'tool_use') {
172
224
  currentMessages = [...currentMessages, { role: 'assistant', content: finalMessage.content }];
173
225
  const toolResults = [];
@@ -311,3 +363,47 @@ class AgentRunnerService {
311
363
  }
312
364
  }
313
365
  exports.AgentRunnerService = AgentRunnerService;
366
+ /** Cheap word-based token estimate for the first turn — Anthropic
367
+ * reports real `usage.input_tokens` from the response onwards, so
368
+ * this only needs to be accurate enough to fire `longContext` /
369
+ * `shortInput` rules on the FIRST request before any usage is back.
370
+ * Rough rule of thumb (verified against Anthropic's tokenizer on
371
+ * English prose): ~0.75 tokens per whitespace-split word. */
372
+ function estimateInputTokens(systemPrompt, messages) {
373
+ let words = systemPrompt.trim().split(/\s+/).filter(Boolean).length;
374
+ for (const m of messages) {
375
+ if (typeof m.content === 'string') {
376
+ words += m.content.trim().split(/\s+/).filter(Boolean).length;
377
+ continue;
378
+ }
379
+ if (Array.isArray(m.content)) {
380
+ for (const block of m.content) {
381
+ if (block &&
382
+ typeof block === 'object' &&
383
+ 'type' in block &&
384
+ block.type === 'text' &&
385
+ typeof block.text === 'string') {
386
+ words += block.text
387
+ .trim()
388
+ .split(/\s+/)
389
+ .filter(Boolean).length;
390
+ }
391
+ }
392
+ }
393
+ }
394
+ return Math.ceil(words / 0.75);
395
+ }
396
+ /** True when at least one declared tool requires human approval at
397
+ * runtime. Read off `agent.tools` only — `extraTools` (per-call
398
+ * connector tools attached at request time) follow the host's own
399
+ * per-tenant policy and aren't visible to this scope. */
400
+ function hasApprovalGatedTool(agent) {
401
+ const tools = agent.tools;
402
+ if (!tools)
403
+ return false;
404
+ for (const t of tools) {
405
+ if (typeof t === 'object' && t && t.mode === 'approval')
406
+ return true;
407
+ }
408
+ return false;
409
+ }
@@ -212,7 +212,12 @@ class AgentService {
212
212
  // caller's userId, which is the historical personal-agent path.
213
213
  const resolvedExtras = await this.resolveExtraTools(agent.connectorOwnerUserId ?? params.userId);
214
214
  const filter = params.overrides?.extraToolsFilter;
215
- const extraTools = filter && resolvedExtras ? filter(resolvedExtras) : resolvedExtras;
215
+ const fromConnectors = filter && resolvedExtras ? filter(resolvedExtras) : resolvedExtras;
216
+ // Merge connector tools with whatever the caller passed in
217
+ // `overrides.extraTools` (e.g. the remote-tool wrappers from
218
+ // ChatStreamController). Caller wins on name collisions so an
219
+ // explicit override always trumps an inherited connector tool.
220
+ const extraTools = mergeExtraTools(params.overrides?.extraTools, fromConnectors);
216
221
  const response = await this.runner.run(agent, messages, {
217
222
  userId: params.userId,
218
223
  conversationId: params.conversationId,
@@ -278,7 +283,8 @@ class AgentService {
278
283
  // toolbelt regardless of which visitor session is streaming.
279
284
  const resolvedExtras = await this.resolveExtraTools(agent.connectorOwnerUserId ?? params.userId);
280
285
  const filter = params.overrides?.extraToolsFilter;
281
- const extraTools = filter && resolvedExtras ? filter(resolvedExtras) : resolvedExtras;
286
+ const fromConnectors = filter && resolvedExtras ? filter(resolvedExtras) : resolvedExtras;
287
+ const extraTools = mergeExtraTools(params.overrides?.extraTools, fromConnectors);
282
288
  try {
283
289
  for await (const chunk of this.runner.stream(agent, messages, {
284
290
  userId: params.userId,
@@ -509,3 +515,30 @@ function toAgentDefinition(record) {
509
515
  ...(extra.appearance !== undefined ? { appearance: extra.appearance } : {}),
510
516
  };
511
517
  }
518
+ /**
519
+ * Merge two `extraTools` arrays so an explicit caller-provided list
520
+ * (e.g. remote-tool wrappers from a chat-stream controller) doesn't get
521
+ * shadowed by an `undefined` result from the connector resolver.
522
+ *
523
+ * Caller wins on name collisions — the caller passed the tool
524
+ * deliberately and knows the host context; an inherited connector tool
525
+ * with the same name is almost certainly stale or coincidental.
526
+ *
527
+ * Returns `undefined` (not `[]`) when both inputs are empty so the
528
+ * runner's "if (!extras?.length) skip overrides" path keeps working.
529
+ */
530
+ function mergeExtraTools(caller, connectors) {
531
+ if (!caller?.length && !connectors?.length)
532
+ return undefined;
533
+ if (!caller?.length)
534
+ return connectors;
535
+ if (!connectors?.length)
536
+ return caller;
537
+ const callerNames = new Set(caller.map((t) => t.name));
538
+ const merged = [...caller];
539
+ for (const t of connectors) {
540
+ if (!callerNames.has(t.name))
541
+ merged.push(t);
542
+ }
543
+ return merged;
544
+ }
@@ -72,8 +72,15 @@ export interface AgentDefinition {
72
72
  name: string;
73
73
  /** Agent description (shown to users) */
74
74
  description?: string;
75
- /** Claude model to use */
75
+ /** Claude model to use. Ignored when `modelStrategy` is set —
76
+ * the strategy's `default` takes precedence as the base tier. */
76
77
  model?: string;
78
+ /** Adaptive model selection per turn. When present the runner
79
+ * calls `selectModel(strategy, signals)` and routes between
80
+ * Haiku / Sonnet / Opus according to operator-declared rules.
81
+ * When absent the runner falls back to the legacy `model` /
82
+ * `defaultModel` chain — zero breaking change. */
83
+ modelStrategy?: import('./model-strategy').ModelStrategy;
77
84
  /** System prompt */
78
85
  systemPrompt: string;
79
86
  /** Max tokens per response */
@@ -1,3 +1,4 @@
1
1
  export * from './agent.types';
2
2
  export * from './config.types';
3
3
  export * from './hooks';
4
+ export * from './model-strategy';
@@ -17,3 +17,4 @@ Object.defineProperty(exports, "__esModule", { value: true });
17
17
  __exportStar(require("./agent.types"), exports);
18
18
  __exportStar(require("./config.types"), exports);
19
19
  __exportStar(require("./hooks"), exports);
20
+ __exportStar(require("./model-strategy"), exports);
@@ -0,0 +1,97 @@
1
+ /** The three Claude tiers the platform routes between today. New
2
+ * providers / families will land here once the SDK speaks them. */
3
+ export type ModelTier = 'haiku' | 'sonnet' | 'opus';
4
+ /** Conditions that can push a turn UP to the escalation tier. */
5
+ export type EscalateRule =
6
+ /** The agent has at least one tool attached to this turn. Tool use
7
+ * benefits disproportionately from a stronger reasoner — Haiku
8
+ * often picks the wrong tool or skips required arguments. */
9
+ 'toolUse'
10
+ /** Estimated input tokens exceed `thresholds.longContextTokens`.
11
+ * Long context degrades quality fastest on cheaper tiers. */
12
+ | 'longContext'
13
+ /** At least one of the attached tools is in `approval` mode (a
14
+ * human will gate the run). Escalate so the reasoning the human
15
+ * reviews is as good as we can afford on a critical path. */
16
+ | 'approvalRequired';
17
+ /** Conditions that can drop a turn DOWN to the fallback tier. Only
18
+ * applied when no escalate rule fires — escalation always wins. */
19
+ export type FallbackRule =
20
+ /** Estimated input tokens below `thresholds.shortInputTokens`.
21
+ * "Hola", "thanks", confirmations, one-line clarifications —
22
+ * Haiku is more than enough and costs ~12× less than Opus. */
23
+ 'shortInput';
24
+ export interface ModelStrategy {
25
+ kind: 'fixed' | 'tiered';
26
+ /** The base model. Used as-is when `kind === 'fixed'`, and as the
27
+ * middle tier when `kind === 'tiered'` (every turn that doesn't
28
+ * trigger an escalate / fallback rule lands here). Free-form
29
+ * string so future Anthropic model ids drop in without a SDK
30
+ * release. */
31
+ default: string;
32
+ /** Concrete model ids for each tier. Only consulted when
33
+ * `kind === 'tiered'`. Missing tiers mean "stay on default" so the
34
+ * operator can declare a partial ladder (e.g. only the Haiku
35
+ * fallback, no Opus escalation). */
36
+ tiers?: {
37
+ haiku?: string;
38
+ sonnet?: string;
39
+ opus?: string;
40
+ };
41
+ /** Conditions that escalate the turn to `tiers.opus`. Order does
42
+ * not matter — any rule firing escalates. */
43
+ escalate?: EscalateRule[];
44
+ /** Conditions that drop the turn to `tiers.haiku`. Only applied
45
+ * when no escalate rule fires. */
46
+ fallback?: FallbackRule[];
47
+ /** Thresholds the rules read. Sensible defaults below; operators
48
+ * can override per agent. */
49
+ thresholds?: {
50
+ /** `longContext` fires when estimated input exceeds this. */
51
+ longContextTokens?: number;
52
+ /** `shortInput` fires when estimated input is below this. */
53
+ shortInputTokens?: number;
54
+ };
55
+ }
56
+ /** Defaults applied when the operator doesn't set thresholds. Picked
57
+ * from the Anthropic cost / quality curve we've seen in practice:
58
+ *
59
+ * - 8k input tokens is where Sonnet's quality on long contexts
60
+ * starts to noticeably drift from Opus.
61
+ * - 200 tokens covers single-sentence inputs (greetings,
62
+ * confirmations) where Haiku is indistinguishable from Sonnet for
63
+ * the user.
64
+ */
65
+ export declare const DEFAULT_LONG_CONTEXT_TOKENS = 8000;
66
+ export declare const DEFAULT_SHORT_INPUT_TOKENS = 200;
67
+ /** Inputs the runner provides per turn so the selector can decide
68
+ * without re-deriving anything. All optional — missing signals just
69
+ * cause the corresponding rules to no-op. */
70
+ export interface TurnSignals {
71
+ /** Estimated input tokens for this turn (system prompt + history
72
+ * + new message). The runner can use `usage.input_tokens` from
73
+ * the previous turn as a proxy; for the first turn a crude
74
+ * word-count heuristic is good enough. */
75
+ estimatedInputTokens?: number;
76
+ /** True if the agent has at least one tool attached to this turn
77
+ * (whether or not the model ends up calling it). */
78
+ hasTools?: boolean;
79
+ /** True if any attached tool is in `mode: 'approval'`. */
80
+ hasApprovalTool?: boolean;
81
+ }
82
+ export interface ModelSelection {
83
+ /** The model id the runner should pass to `messages.create`. */
84
+ model: string;
85
+ /** Which leg of the strategy fired. `default` = neither escalate
86
+ * nor fallback matched; `escalate` / `fallback` = a rule fired
87
+ * AND the corresponding tier was declared. `forced` = the
88
+ * strategy is `fixed`. */
89
+ reason: 'forced' | 'default' | 'escalate' | 'fallback';
90
+ /** When `reason === 'escalate' | 'fallback'`, the rule that fired.
91
+ * Used in telemetry so an operator can see why their bill
92
+ * spiked. */
93
+ trigger?: EscalateRule | FallbackRule;
94
+ }
95
+ /** Pure decision function. The runner calls this once per turn just
96
+ * before `messages.create`. */
97
+ export declare function selectModel(strategy: ModelStrategy | undefined, signals: TurnSignals, runnerDefault: string): ModelSelection;
@@ -0,0 +1,83 @@
1
+ "use strict";
2
+ // Adaptive model selection for a single agent turn.
3
+ //
4
+ // An agent may declare ONE of two strategies:
5
+ //
6
+ // - `fixed` → use `agent.model` (or the runner's default) for every
7
+ // turn. The historical behaviour. Zero overhead, zero
8
+ // variance. Use this when cost is a non-issue or the
9
+ // domain is uniform (e.g. always-creative writing).
10
+ //
11
+ // - `tiered` → at each turn the runner inspects the request and
12
+ // may escalate to a more capable model OR fall back to
13
+ // a cheaper one based on a small set of heuristics
14
+ // declared by the operator. No LLM-router overhead —
15
+ // the decision is a pure function of the turn's shape
16
+ // (estimated input tokens, whether tools are attached,
17
+ // whether the turn will route through human approval).
18
+ //
19
+ // The decision lives in `selectModel(strategy, signals)` below so the
20
+ // runner only calls one function and the policy stays testable in
21
+ // isolation.
22
+ Object.defineProperty(exports, "__esModule", { value: true });
23
+ exports.DEFAULT_SHORT_INPUT_TOKENS = exports.DEFAULT_LONG_CONTEXT_TOKENS = void 0;
24
+ exports.selectModel = selectModel;
25
+ /** Defaults applied when the operator doesn't set thresholds. Picked
26
+ * from the Anthropic cost / quality curve we've seen in practice:
27
+ *
28
+ * - 8k input tokens is where Sonnet's quality on long contexts
29
+ * starts to noticeably drift from Opus.
30
+ * - 200 tokens covers single-sentence inputs (greetings,
31
+ * confirmations) where Haiku is indistinguishable from Sonnet for
32
+ * the user.
33
+ */
34
+ exports.DEFAULT_LONG_CONTEXT_TOKENS = 8_000;
35
+ exports.DEFAULT_SHORT_INPUT_TOKENS = 200;
36
+ /** Pure decision function. The runner calls this once per turn just
37
+ * before `messages.create`. */
38
+ function selectModel(strategy, signals, runnerDefault) {
39
+ // No strategy → behave exactly like before this feature landed.
40
+ if (!strategy) {
41
+ return { model: runnerDefault, reason: 'default' };
42
+ }
43
+ if (strategy.kind === 'fixed') {
44
+ return { model: strategy.default || runnerDefault, reason: 'forced' };
45
+ }
46
+ // Tiered. Escalate beats fallback (a long-context tool-use turn is
47
+ // not a `shortInput` even if the new message is two words).
48
+ const longCtx = strategy.thresholds?.longContextTokens ?? exports.DEFAULT_LONG_CONTEXT_TOKENS;
49
+ const shortIn = strategy.thresholds?.shortInputTokens ?? exports.DEFAULT_SHORT_INPUT_TOKENS;
50
+ const escalate = strategy.escalate ?? [];
51
+ const fallback = strategy.fallback ?? [];
52
+ for (const rule of escalate) {
53
+ if (rule === 'toolUse' && signals.hasTools) {
54
+ return pickTier(strategy, 'opus', 'escalate', rule);
55
+ }
56
+ if (rule === 'longContext' &&
57
+ typeof signals.estimatedInputTokens === 'number' &&
58
+ signals.estimatedInputTokens > longCtx) {
59
+ return pickTier(strategy, 'opus', 'escalate', rule);
60
+ }
61
+ if (rule === 'approvalRequired' && signals.hasApprovalTool) {
62
+ return pickTier(strategy, 'opus', 'escalate', rule);
63
+ }
64
+ }
65
+ for (const rule of fallback) {
66
+ if (rule === 'shortInput' &&
67
+ typeof signals.estimatedInputTokens === 'number' &&
68
+ signals.estimatedInputTokens < shortIn) {
69
+ return pickTier(strategy, 'haiku', 'fallback', rule);
70
+ }
71
+ }
72
+ return { model: strategy.default || runnerDefault, reason: 'default' };
73
+ }
74
+ function pickTier(strategy, tier, reason, trigger) {
75
+ const tiered = strategy.tiers?.[tier];
76
+ // No model declared for this tier → don't escalate/fallback to a
77
+ // ghost id; stay on default. Surface the rule that WOULD have
78
+ // fired so telemetry still captures the near-miss.
79
+ if (!tiered) {
80
+ return { model: strategy.default, reason: 'default', trigger };
81
+ }
82
+ return { model: tiered, reason, trigger };
83
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@agentforge-io/core",
3
- "version": "2.0.23",
3
+ "version": "2.0.24",
4
4
  "description": "Framework-free AI runtime SDK. Owns: agent loop (Anthropic), conversations, tools, streaming, agent-job queue, SdkHooks. Identity, billing, infra (email/uploads/secrets) live in the host's modules — not here.",
5
5
  "license": "MIT",
6
6
  "main": "dist/index.js",