whale-code 6.5.4 → 6.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/services/agent-config.d.ts +25 -0
- package/dist/cli/services/agent-config.js +61 -0
- package/dist/cli/services/agent-loop.js +30 -9
- package/dist/cli/services/error-logger.d.ts +2 -3
- package/dist/cli/services/error-logger.js +43 -52
- package/dist/cli/services/subagent.js +11 -7
- package/dist/cli/services/teammate.js +28 -14
- package/dist/server/handlers/api-docs.d.ts +6 -0
- package/dist/server/handlers/api-docs.js +1478 -0
- package/dist/server/handlers/api-keys.js +16 -2
- package/dist/server/handlers/comms.d.ts +0 -53
- package/dist/server/handlers/comms.js +45 -27
- package/dist/server/handlers/voice.js +22 -0
- package/dist/server/index.js +57 -26
- package/dist/server/lib/clickhouse-client.js +2 -2
- package/dist/server/lib/pdf-renderer.d.ts +1 -1
- package/dist/server/lib/pdf-renderer.js +18 -4
- package/dist/server/lib/server-agent-loop.d.ts +6 -0
- package/dist/server/lib/server-agent-loop.js +20 -10
- package/dist/server/lib/server-subagent.d.ts +2 -0
- package/dist/server/lib/server-subagent.js +4 -2
- package/dist/server/providers/anthropic.js +4 -4
- package/dist/server/providers/bedrock.js +4 -4
- package/dist/server/tool-router.d.ts +13 -0
- package/dist/server/tool-router.js +3 -1
- package/dist/shared/agent-core.d.ts +86 -8
- package/dist/shared/agent-core.js +94 -19
- package/dist/shared/api-client.d.ts +1 -0
- package/dist/shared/api-client.js +2 -2
- package/dist/shared/tool-dispatch.d.ts +0 -2
- package/package.json +1 -1
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
* Consolidates: streaming, prompt caching, context management betas, compaction,
|
|
9
9
|
* loop detection, parallel tool execution, subagent delegation, retry, cost tracking.
|
|
10
10
|
*/
|
|
11
|
-
import { LoopDetector, getContextManagement, getMaxOutputTokens, getThinkingConfig, addPromptCaching, estimateCostUsd, isRetryableError, sanitizeError, routeModel, resolveToolChoice, emitCostWarningIfNeeded, demoteSubagentModel,
|
|
11
|
+
import { LoopDetector, getContextManagement, getMaxOutputTokens, getThinkingConfig, addPromptCaching, estimateCostUsd, isRetryableError, sanitizeError, routeModel, resolveToolChoice, emitCostWarningIfNeeded, demoteSubagentModel, COMPACTION_TOTAL_BUDGET, DEFAULT_SESSION_COST_BUDGET_USD, } from "../../shared/agent-core.js";
|
|
12
12
|
import { processStreamWithCallbacks } from "../../shared/sse-parser.js";
|
|
13
13
|
import { MODELS } from "../../shared/constants.js";
|
|
14
14
|
import { dispatchTools, buildAssistantContent } from "../../shared/tool-dispatch.js";
|
|
@@ -50,10 +50,16 @@ function mapToolChoiceForAnthropic(tc) {
|
|
|
50
50
|
// UNIFIED AGENT LOOP
|
|
51
51
|
// ============================================================================
|
|
52
52
|
export async function runServerAgentLoop(opts) {
|
|
53
|
-
const { anthropic, model, systemPrompt, messages, tools: inputTools, maxTurns, temperature, enableDelegation = true, enablePromptCaching = true, enableStreaming = true, maxConcurrentTools = DEFAULT_MAX_CONCURRENT_TOOLS, maxCostUsd: maxCostUsdOpt, onText, onToolStart, onCitation, documents, clientDisconnected = { value: false }, startedAt = Date.now(), maxDurationMs = 15 * 60 * 1000, } = opts;
|
|
54
|
-
// Resolve cost budget: explicit opt > env var > default
|
|
53
|
+
const { anthropic, model, systemPrompt, messages, tools: inputTools, maxTurns, temperature, enableDelegation = true, enablePromptCaching = true, enableStreaming = true, enableModelRouting = true, maxConcurrentTools = DEFAULT_MAX_CONCURRENT_TOOLS, maxCostUsd: maxCostUsdOpt, onText, onToolStart, onCitation, documents, clientDisconnected = { value: false }, startedAt = Date.now(), maxDurationMs = 15 * 60 * 1000, } = opts;
|
|
54
|
+
// Resolve cost budget: context_config > explicit opt > env var > default
|
|
55
|
+
const ctxOverrides = opts.contextOverrides;
|
|
55
56
|
const envBudget = parseFloat(process.env.WHALE_COST_BUDGET_USD || "");
|
|
56
|
-
const maxCostUsd =
|
|
57
|
+
const maxCostUsd = ctxOverrides?.session_cost_budget_usd
|
|
58
|
+
?? maxCostUsdOpt
|
|
59
|
+
?? (isFinite(envBudget) ? envBudget : DEFAULT_SESSION_COST_BUDGET_USD);
|
|
60
|
+
// Resolve compaction budget from overrides (used for compaction exhaustion checks)
|
|
61
|
+
const effectiveCompactionTrigger = ctxOverrides?.compaction_trigger_tokens ?? 150_000;
|
|
62
|
+
const effectiveCompactionBudget = ctxOverrides?.compaction_total_budget ?? COMPACTION_TOTAL_BUDGET;
|
|
57
63
|
// Auto-inject delegate_task for all models (subagents always use Claude Haiku/Sonnet)
|
|
58
64
|
// activeTools is mutable — discover_tools adds to it during the session
|
|
59
65
|
const activeTools = [...inputTools];
|
|
@@ -122,7 +128,8 @@ export async function runServerAgentLoop(opts) {
|
|
|
122
128
|
loopDetector.resetTurn();
|
|
123
129
|
// Route model on the FIRST turn only — subsequent turns use the requested model
|
|
124
130
|
// since the conversation may have become complex after tool results.
|
|
125
|
-
|
|
131
|
+
// enableModelRouting=false prevents downgrade (e.g. Opus agents that should stay Opus)
|
|
132
|
+
const effectiveModel = (turnCount === 1 && enableModelRouting)
|
|
126
133
|
? routeModel(firstUserText, model)
|
|
127
134
|
: model;
|
|
128
135
|
if (turnCount === 1 && effectiveModel !== model) {
|
|
@@ -130,7 +137,7 @@ export async function runServerAgentLoop(opts) {
|
|
|
130
137
|
}
|
|
131
138
|
// Per-turn model config — uses effectiveModel so routed models get correct settings
|
|
132
139
|
const maxTokens = opts.maxTokens ?? getMaxOutputTokens(effectiveModel);
|
|
133
|
-
const ctxMgmt = getContextManagement(effectiveModel);
|
|
140
|
+
const ctxMgmt = getContextManagement(effectiveModel, opts.contextOverrides);
|
|
134
141
|
const thinkingCfg = getThinkingConfig(effectiveModel, true);
|
|
135
142
|
// Prepare tool definitions — use pre-computed cache, only rebuild when tools change
|
|
136
143
|
// (discover_tools can add tools mid-session, detected via length change)
|
|
@@ -288,8 +295,8 @@ export async function runServerAgentLoop(opts) {
|
|
|
288
295
|
compactionCount++;
|
|
289
296
|
log.info({ compactionCount }, "compaction — preserving last 2 messages, resuming");
|
|
290
297
|
// Budget enforcement: if cumulative compaction cost exceeds budget, force wrap-up
|
|
291
|
-
if (compactionCount *
|
|
292
|
-
log.warn({ compactionCount, triggerTokens:
|
|
298
|
+
if (compactionCount * effectiveCompactionTrigger >= effectiveCompactionBudget) {
|
|
299
|
+
log.warn({ compactionCount, triggerTokens: effectiveCompactionTrigger, estimatedTokens: compactionCount * effectiveCompactionTrigger }, "compaction budget exhausted");
|
|
293
300
|
onText?.("\n[Context budget reached — wrapping up.]");
|
|
294
301
|
// Rebuild messages: compaction summary + wrap-up instruction
|
|
295
302
|
const compactedMessages = [
|
|
@@ -462,7 +469,7 @@ export async function runServerAgentLoop(opts) {
|
|
|
462
469
|
if (response.stop_reason === "compaction" && nsCompactionContent !== null) {
|
|
463
470
|
compactionCount++;
|
|
464
471
|
log.info({ compactionCount, streaming: false }, "compaction — preserving last 2 messages");
|
|
465
|
-
if (compactionCount *
|
|
472
|
+
if (compactionCount * effectiveCompactionTrigger >= effectiveCompactionBudget) {
|
|
466
473
|
onText?.("\n[Context budget reached — wrapping up.]");
|
|
467
474
|
const compactedMessages = [
|
|
468
475
|
{ role: "assistant", content: [{ type: "compaction", content: nsCompactionContent }] },
|
|
@@ -548,7 +555,8 @@ function makeToolExecutor(opts, tools, allToolNames, subagentTokens, discoveredT
|
|
|
548
555
|
if (name === "delegate_task") {
|
|
549
556
|
const subPrompt = String(input.prompt || "");
|
|
550
557
|
const subModel = demoteSubagentModel(input.model ? String(input.model) : undefined);
|
|
551
|
-
const
|
|
558
|
+
const defaultSubMaxTurns = opts.subagentMaxTurns ?? 6;
|
|
559
|
+
const subMaxTurns = Math.min(Math.max(1, Number(input.max_turns) || defaultSubMaxTurns), 12);
|
|
552
560
|
const subTools = tools.filter((t) => t.name !== "delegate_task");
|
|
553
561
|
const subId = `sub-${Date.now().toString(36)}`;
|
|
554
562
|
onSubagentProgress?.({ subagentId: subId, event: "started", model: subModel });
|
|
@@ -558,6 +566,8 @@ function makeToolExecutor(opts, tools, allToolNames, subagentTokens, discoveredT
|
|
|
558
566
|
maxTurns: subMaxTurns, tools: subTools,
|
|
559
567
|
executeTool: async (toolName, args) => executeTool(toolName, args, "server_subagent"),
|
|
560
568
|
onProgress: onSubagentProgress, clientDisconnected, startedAt, maxDurationMs,
|
|
569
|
+
maxOutputTokens: opts.subagentMaxTokens,
|
|
570
|
+
temperature: opts.subagentTemperature,
|
|
561
571
|
});
|
|
562
572
|
onSubagentProgress?.({ subagentId: subId, event: "done", output: subResult.output });
|
|
563
573
|
// Audit log
|
|
@@ -64,7 +64,9 @@ export async function runServerSubagent(opts) {
|
|
|
64
64
|
let cacheRead = 0;
|
|
65
65
|
// Enable thinking for capable models; adjust max tokens for Opus
|
|
66
66
|
const shouldThink = modelAlias === "opus" || modelAlias === "sonnet";
|
|
67
|
-
const
|
|
67
|
+
const baseMaxTokens = opts.maxOutputTokens ?? SUBAGENT_MAX_OUTPUT_TOKENS;
|
|
68
|
+
const effectiveMaxTokens = modelAlias === "opus" ? Math.max(16384, baseMaxTokens) : baseMaxTokens;
|
|
69
|
+
const effectiveTemperature = opts.temperature ?? 0.3;
|
|
68
70
|
// Context management for subagent: clear at 60K, keep 2, no compaction
|
|
69
71
|
const apiConfig = buildAPIRequest({
|
|
70
72
|
model: modelId,
|
|
@@ -99,7 +101,7 @@ export async function runServerSubagent(opts) {
|
|
|
99
101
|
return await anthropic.beta.messages.create({
|
|
100
102
|
model: modelId,
|
|
101
103
|
max_tokens: apiConfig.maxTokens,
|
|
102
|
-
temperature: shouldThink ? 1 :
|
|
104
|
+
temperature: shouldThink ? 1 : effectiveTemperature, // Anthropic requires temp=1 with thinking
|
|
103
105
|
system,
|
|
104
106
|
tools: cachedTools,
|
|
105
107
|
messages: cachedMessages,
|
|
@@ -8,16 +8,16 @@
|
|
|
8
8
|
* output token limits, and compaction config for Anthropic/Claude models.
|
|
9
9
|
*/
|
|
10
10
|
import Anthropic from "@anthropic-ai/sdk";
|
|
11
|
-
import { sanitizeError } from "../../shared/agent-core.js";
|
|
11
|
+
import { sanitizeError, AGENT_DEFAULTS } from "../../shared/agent-core.js";
|
|
12
12
|
import { getCapabilities } from "../lib/provider-capabilities.js";
|
|
13
13
|
import { registerProvider } from "./registry.js";
|
|
14
14
|
import { jsonResponse, writeSSEHeaders } from "./shared.js";
|
|
15
15
|
// ============================================================================
|
|
16
16
|
// CONSTANTS — Anthropic-specific model config
|
|
17
17
|
// ============================================================================
|
|
18
|
-
const COMPACTION_TRIGGER_TOKENS =
|
|
19
|
-
const COMPACTION_TOTAL_BUDGET =
|
|
20
|
-
const DEFAULT_OUTPUT_TOKENS = 16384;
|
|
18
|
+
const COMPACTION_TRIGGER_TOKENS = AGENT_DEFAULTS.compactionTriggerTokens;
|
|
19
|
+
const COMPACTION_TOTAL_BUDGET = AGENT_DEFAULTS.compactionTotalBudget;
|
|
20
|
+
const DEFAULT_OUTPUT_TOKENS = 16384; // Per-response cap (model max is separate)
|
|
21
21
|
const MODEL_MAX_OUTPUT_TOKENS = {
|
|
22
22
|
"claude-opus-4-6": 128000,
|
|
23
23
|
"claude-sonnet-4-6": 64000,
|
|
@@ -8,16 +8,16 @@
|
|
|
8
8
|
* output token limits, and compaction config for Bedrock Claude models.
|
|
9
9
|
*/
|
|
10
10
|
import { BedrockRuntimeClient, InvokeModelWithResponseStreamCommand } from "@aws-sdk/client-bedrock-runtime";
|
|
11
|
-
import { sanitizeError } from "../../shared/agent-core.js";
|
|
11
|
+
import { sanitizeError, AGENT_DEFAULTS } from "../../shared/agent-core.js";
|
|
12
12
|
import { getCapabilities } from "../lib/provider-capabilities.js";
|
|
13
13
|
import { registerProvider } from "./registry.js";
|
|
14
14
|
import { jsonResponse, writeSSEHeaders, resolveProviderCredentials } from "./shared.js";
|
|
15
15
|
// ============================================================================
|
|
16
16
|
// CONSTANTS — Bedrock-specific model config
|
|
17
17
|
// ============================================================================
|
|
18
|
-
const COMPACTION_TRIGGER_TOKENS =
|
|
19
|
-
const COMPACTION_TOTAL_BUDGET =
|
|
20
|
-
const DEFAULT_OUTPUT_TOKENS = 16384;
|
|
18
|
+
const COMPACTION_TRIGGER_TOKENS = AGENT_DEFAULTS.compactionTriggerTokens;
|
|
19
|
+
const COMPACTION_TOTAL_BUDGET = AGENT_DEFAULTS.compactionTotalBudget;
|
|
20
|
+
const DEFAULT_OUTPUT_TOKENS = 16384; // Per-response cap (model max is separate)
|
|
21
21
|
const MODEL_MAX_OUTPUT_TOKENS = {
|
|
22
22
|
"anthropic.claude-sonnet-4-6": 64000,
|
|
23
23
|
"us.anthropic.claude-sonnet-4-20250514-v1:0": 64000,
|
|
@@ -70,6 +70,19 @@ export interface AgentConfig {
|
|
|
70
70
|
max_history_chars?: number;
|
|
71
71
|
max_tool_result_chars?: number;
|
|
72
72
|
max_message_chars?: number;
|
|
73
|
+
compaction_trigger_tokens?: number;
|
|
74
|
+
compaction_total_budget?: number;
|
|
75
|
+
clear_thinking_keep?: number;
|
|
76
|
+
clear_tool_uses_trigger?: number;
|
|
77
|
+
clear_tool_uses_keep?: number;
|
|
78
|
+
session_cost_budget_usd?: number;
|
|
79
|
+
max_concurrent_tools?: number;
|
|
80
|
+
enable_delegation?: boolean;
|
|
81
|
+
max_duration_ms?: number;
|
|
82
|
+
enable_model_routing?: boolean;
|
|
83
|
+
subagent_max_tokens?: number;
|
|
84
|
+
subagent_max_turns?: number;
|
|
85
|
+
subagent_temperature?: number;
|
|
73
86
|
} | null;
|
|
74
87
|
}
|
|
75
88
|
export interface ToolLoadResult {
|
|
@@ -45,6 +45,7 @@ import { handleKali } from "./handlers/kali.js";
|
|
|
45
45
|
import { handleLocalAgent } from "./handlers/local-agent.js";
|
|
46
46
|
import { handleEnrichment } from "./handlers/enrichment.js";
|
|
47
47
|
import { handleStorefront } from "./handlers/storefront.js";
|
|
48
|
+
import { handleApiDocs } from "./handlers/api-docs.js";
|
|
48
49
|
import { handleClickHouse } from "./handlers/clickhouse.js";
|
|
49
50
|
import { summarizeResult, withTimeout } from "./lib/utils.js";
|
|
50
51
|
// ============================================================================
|
|
@@ -300,6 +301,7 @@ export const TOOL_HANDLERS = {
|
|
|
300
301
|
browser: { handler: handleBrowser, timeout: 120_000, requiresStore: true },
|
|
301
302
|
discovery: { handler: handleDiscovery, timeout: DEFAULT_TIMEOUT, requiresStore: false },
|
|
302
303
|
api_keys: { handler: handleAPIKeys, timeout: DEFAULT_TIMEOUT, requiresStore: true },
|
|
304
|
+
api_docs: { handler: handleApiDocs, timeout: 5000, requiresStore: false },
|
|
303
305
|
// --- Advertising ---
|
|
304
306
|
meta_ads: { handler: handleMetaAds, timeout: 300_000, requiresStore: true },
|
|
305
307
|
// --- Security & Local ---
|
|
@@ -371,7 +373,7 @@ const TOOL_CATEGORIES = {
|
|
|
371
373
|
llm: "ai", embeddings: "ai", creations: "media",
|
|
372
374
|
// Platform
|
|
373
375
|
web_search: "platform", browser: "platform", discovery: "platform",
|
|
374
|
-
api_keys: "platform",
|
|
376
|
+
api_keys: "platform", api_docs: "platform",
|
|
375
377
|
// Security
|
|
376
378
|
kali: "security", local_agent: "platform",
|
|
377
379
|
// Advertising
|
|
@@ -52,7 +52,17 @@ export interface ContextManagementConfig {
|
|
|
52
52
|
edits: Array<Record<string, unknown>>;
|
|
53
53
|
};
|
|
54
54
|
}
|
|
55
|
-
/**
|
|
55
|
+
/** Overrides from ai_agent_config.context_config JSONB — all optional, falls back to defaults */
|
|
56
|
+
export interface ContextManagementOverrides {
|
|
57
|
+
compaction_trigger_tokens?: number;
|
|
58
|
+
compaction_total_budget?: number;
|
|
59
|
+
clear_thinking_keep?: number;
|
|
60
|
+
clear_tool_uses_trigger?: number;
|
|
61
|
+
clear_tool_uses_keep?: number;
|
|
62
|
+
session_cost_budget_usd?: number;
|
|
63
|
+
}
|
|
64
|
+
/** Legacy compaction trigger — used by getCompactionConfig() for non-Anthropic providers.
|
|
65
|
+
* For Anthropic models, getContextManagement() with DB overrides takes precedence. */
|
|
56
66
|
export declare const COMPACTION_TRIGGER_TOKENS = 120000;
|
|
57
67
|
/** Max cumulative tokens before forcing wrap-up (prevents runaway compaction cost) */
|
|
58
68
|
export declare const COMPACTION_TOTAL_BUDGET = 2000000;
|
|
@@ -75,7 +85,7 @@ export declare function getCompactionConfig(model: string): CompactionConfig;
|
|
|
75
85
|
* - All other Claude models: clear thinking + clear tools at 80K/keep 3
|
|
76
86
|
* - Non-Anthropic models (Gemini, OpenAI): no betas, no context management
|
|
77
87
|
*/
|
|
78
|
-
export declare function getContextManagement(model: string): ContextManagementConfig;
|
|
88
|
+
export declare function getContextManagement(model: string, overrides?: ContextManagementOverrides): ContextManagementConfig;
|
|
79
89
|
export declare function getMaxOutputTokens(model: string, agentMax?: number): number;
|
|
80
90
|
/**
|
|
81
91
|
* Add prompt cache breakpoints to tools and messages.
|
|
@@ -163,12 +173,6 @@ export declare function categorizeError(err: unknown): {
|
|
|
163
173
|
retryable: boolean;
|
|
164
174
|
};
|
|
165
175
|
export declare function isRetryableError(err: unknown): boolean;
|
|
166
|
-
/** @deprecated — Anthropic context_management handles limits. Use SAFETY_MAX_CHARS in tool-dispatch instead. */
|
|
167
|
-
export declare function truncateToolResult(content: string, maxChars: number): string;
|
|
168
|
-
/** @deprecated — Anthropic context_management handles limits. */
|
|
169
|
-
export declare function getMaxToolResultChars(contextConfig?: {
|
|
170
|
-
max_tool_result_chars?: number;
|
|
171
|
-
} | null): number;
|
|
172
176
|
/**
|
|
173
177
|
* Demote subagent model requests — single source of truth for server + CLI.
|
|
174
178
|
* - explore/research: always haiku
|
|
@@ -177,4 +181,78 @@ export declare function getMaxToolResultChars(contextConfig?: {
|
|
|
177
181
|
* - default/undefined: haiku
|
|
178
182
|
*/
|
|
179
183
|
export declare function demoteSubagentModel(requested: string | undefined, agentType?: string): "haiku" | "sonnet";
|
|
184
|
+
export type CallPath = "sse" | "workflow" | "channel";
|
|
185
|
+
/** Documented defaults — replaces scattered magic numbers across call paths */
|
|
186
|
+
export declare const AGENT_DEFAULTS: {
|
|
187
|
+
readonly maxTurns: 10;
|
|
188
|
+
readonly temperature: 0.7;
|
|
189
|
+
readonly maxConcurrentTools: 7;
|
|
190
|
+
readonly enableDelegation: true;
|
|
191
|
+
readonly maxDurationMs: 900000;
|
|
192
|
+
readonly maxDurationMsWorkflow: 120000;
|
|
193
|
+
readonly enableModelRouting: true;
|
|
194
|
+
readonly subagentMaxTokens: 8192;
|
|
195
|
+
readonly subagentMaxTurns: 6;
|
|
196
|
+
readonly subagentTemperature: 0.3;
|
|
197
|
+
readonly maxMessageChars: 100000;
|
|
198
|
+
readonly maxHistoryChars: 400000;
|
|
199
|
+
readonly compactionTriggerTokens: 150000;
|
|
200
|
+
readonly compactionTotalBudget: 2000000;
|
|
201
|
+
readonly sessionCostBudgetUsd: 5;
|
|
202
|
+
};
|
|
203
|
+
/** Context config shape from ai_agent_config.context_config JSONB */
|
|
204
|
+
export interface AgentContextConfig {
|
|
205
|
+
includeLocations?: boolean;
|
|
206
|
+
locationIds?: string[];
|
|
207
|
+
includeCustomers?: boolean;
|
|
208
|
+
customerSegments?: string[];
|
|
209
|
+
max_history_chars?: number;
|
|
210
|
+
max_tool_result_chars?: number;
|
|
211
|
+
max_message_chars?: number;
|
|
212
|
+
compaction_trigger_tokens?: number;
|
|
213
|
+
compaction_total_budget?: number;
|
|
214
|
+
clear_thinking_keep?: number;
|
|
215
|
+
clear_tool_uses_trigger?: number;
|
|
216
|
+
clear_tool_uses_keep?: number;
|
|
217
|
+
session_cost_budget_usd?: number;
|
|
218
|
+
max_concurrent_tools?: number;
|
|
219
|
+
enable_delegation?: boolean;
|
|
220
|
+
max_duration_ms?: number;
|
|
221
|
+
enable_model_routing?: boolean;
|
|
222
|
+
subagent_max_tokens?: number;
|
|
223
|
+
subagent_max_turns?: number;
|
|
224
|
+
subagent_temperature?: number;
|
|
225
|
+
}
|
|
226
|
+
export interface ResolvedAgentLoopConfig {
|
|
227
|
+
maxTurns: number;
|
|
228
|
+
temperature: number;
|
|
229
|
+
maxTokens: number;
|
|
230
|
+
maxConcurrentTools: number;
|
|
231
|
+
enableDelegation: boolean;
|
|
232
|
+
maxDurationMs: number;
|
|
233
|
+
enableModelRouting: boolean;
|
|
234
|
+
maxMessageChars: number;
|
|
235
|
+
maxHistoryChars: number;
|
|
236
|
+
contextOverrides: ContextManagementOverrides;
|
|
237
|
+
subagentMaxTokens: number;
|
|
238
|
+
subagentMaxTurns: number;
|
|
239
|
+
subagentTemperature: number;
|
|
240
|
+
/** Which fields fell back to AGENT_DEFAULTS (transparency for logging) */
|
|
241
|
+
defaultsUsed: string[];
|
|
242
|
+
}
|
|
243
|
+
/**
|
|
244
|
+
* Resolve all agent loop behavioral knobs from DB config.
|
|
245
|
+
* All 3 call paths MUST use this — structurally prevents hardcoded constants.
|
|
246
|
+
*
|
|
247
|
+
* @param agent - Agent config (DB row or subset)
|
|
248
|
+
* @param callPath - Which call path is invoking ("sse" | "workflow" | "channel")
|
|
249
|
+
* @param workflowMaxTurns - Optional cap from workflow step config (workflow path only)
|
|
250
|
+
*/
|
|
251
|
+
export declare function resolveAgentLoopConfig(agent: {
|
|
252
|
+
max_tool_calls: number;
|
|
253
|
+
temperature: number;
|
|
254
|
+
max_tokens: number;
|
|
255
|
+
model: string;
|
|
256
|
+
context_config: AgentContextConfig | null;
|
|
257
|
+
}, callPath: CallPath, workflowMaxTurns?: number): ResolvedAgentLoopConfig;
|
|
180
258
|
export declare function sanitizeError(err: unknown): string;
|
|
@@ -51,7 +51,8 @@ export function resolveToolChoice(opts) {
|
|
|
51
51
|
// ============================================================================
|
|
52
52
|
// MODEL-AWARE CONTEXT MANAGEMENT
|
|
53
53
|
// ============================================================================
|
|
54
|
-
/**
|
|
54
|
+
/** Legacy compaction trigger — used by getCompactionConfig() for non-Anthropic providers.
|
|
55
|
+
* For Anthropic models, getContextManagement() with DB overrides takes precedence. */
|
|
55
56
|
export const COMPACTION_TRIGGER_TOKENS = 120_000;
|
|
56
57
|
/** Max cumulative tokens before forcing wrap-up (prevents runaway compaction cost) */
|
|
57
58
|
export const COMPACTION_TOTAL_BUDGET = 2_000_000;
|
|
@@ -76,7 +77,7 @@ export function getCompactionConfig(model) {
|
|
|
76
77
|
* - All other Claude models: clear thinking + clear tools at 80K/keep 3
|
|
77
78
|
* - Non-Anthropic models (Gemini, OpenAI): no betas, no context management
|
|
78
79
|
*/
|
|
79
|
-
export function getContextManagement(model) {
|
|
80
|
+
export function getContextManagement(model, overrides) {
|
|
80
81
|
// Non-Anthropic models don't use Anthropic betas or context management
|
|
81
82
|
const provider = getProvider(model);
|
|
82
83
|
if (provider === "gemini" || provider === "openai") {
|
|
@@ -85,29 +86,34 @@ export function getContextManagement(model) {
|
|
|
85
86
|
const edits = [];
|
|
86
87
|
const betas = ["context-management-2025-06-27"];
|
|
87
88
|
// Thinking block clearing — must come FIRST in edits array (API requirement).
|
|
88
|
-
//
|
|
89
|
-
|
|
89
|
+
// Default: keep 1 thinking turn (matches Claude Code / Anthropic API defaults).
|
|
90
|
+
const clearThinkingKeep = overrides?.clear_thinking_keep ?? 1;
|
|
90
91
|
edits.push({
|
|
91
92
|
type: "clear_thinking_20251015",
|
|
92
|
-
keep: { type: "thinking_turns", value:
|
|
93
|
+
keep: { type: "thinking_turns", value: clearThinkingKeep },
|
|
93
94
|
});
|
|
94
95
|
// Server-side compaction for models that support compact_20260112.
|
|
96
|
+
// Default trigger: 150K tokens (matches Anthropic API default).
|
|
95
97
|
// pause_after_compaction: true enables the loop to preserve recent messages
|
|
96
98
|
// and track compaction count for budget enforcement.
|
|
97
99
|
const supportsCompaction = model.includes("opus-4-6") || model.includes("sonnet-4-6");
|
|
100
|
+
const compactionTrigger = overrides?.compaction_trigger_tokens ?? 150_000;
|
|
98
101
|
if (supportsCompaction) {
|
|
99
102
|
edits.push({
|
|
100
103
|
type: "compact_20260112",
|
|
101
|
-
trigger: { type: "input_tokens", value:
|
|
104
|
+
trigger: { type: "input_tokens", value: compactionTrigger },
|
|
102
105
|
pause_after_compaction: true,
|
|
103
106
|
instructions: "Summarize the conversation preserving: (1) task goals and constraints, (2) files created/modified with paths, (3) decisions made and rationale, (4) errors encountered and resolutions, (5) exact next steps. Be concise but preserve all state needed to continue work without repeating mistakes.",
|
|
104
107
|
});
|
|
105
108
|
betas.push("compact-2026-01-12");
|
|
106
109
|
}
|
|
110
|
+
// Clear tool uses — default: trigger at 100K, keep 3 (matches Anthropic API defaults).
|
|
111
|
+
const clearToolUsesTrigger = overrides?.clear_tool_uses_trigger ?? 100_000;
|
|
112
|
+
const clearToolUsesKeep = overrides?.clear_tool_uses_keep ?? 3;
|
|
107
113
|
edits.push({
|
|
108
114
|
type: "clear_tool_uses_20250919",
|
|
109
|
-
trigger: { type: "input_tokens", value:
|
|
110
|
-
keep: { type: "tool_uses", value:
|
|
115
|
+
trigger: { type: "input_tokens", value: clearToolUsesTrigger },
|
|
116
|
+
keep: { type: "tool_uses", value: clearToolUsesKeep },
|
|
111
117
|
});
|
|
112
118
|
return { betas, config: { edits } };
|
|
113
119
|
}
|
|
@@ -551,17 +557,6 @@ export function isRetryableError(err) {
|
|
|
551
557
|
// ============================================================================
|
|
552
558
|
// TOOL RESULT TRUNCATION (deprecated — Anthropic context_management handles limits)
|
|
553
559
|
// ============================================================================
|
|
554
|
-
/** @deprecated — Anthropic context_management handles limits. Use SAFETY_MAX_CHARS in tool-dispatch instead. */
|
|
555
|
-
export function truncateToolResult(content, maxChars) {
|
|
556
|
-
if (content.length <= maxChars)
|
|
557
|
-
return content;
|
|
558
|
-
return content.slice(0, maxChars) + `\n\n... (truncated — ${content.length.toLocaleString()} chars total)`;
|
|
559
|
-
}
|
|
560
|
-
/** @deprecated — Anthropic context_management handles limits. */
|
|
561
|
-
export function getMaxToolResultChars(contextConfig) {
|
|
562
|
-
return contextConfig?.max_tool_result_chars || 80_000;
|
|
563
|
-
}
|
|
564
|
-
// ============================================================================
|
|
565
560
|
// UTILITY — sanitize errors (strip API keys, passwords)
|
|
566
561
|
// ============================================================================
|
|
567
562
|
/**
|
|
@@ -582,6 +577,86 @@ export function demoteSubagentModel(requested, agentType) {
|
|
|
582
577
|
return agentType === "plan" ? "sonnet" : "haiku";
|
|
583
578
|
return "haiku";
|
|
584
579
|
}
|
|
580
|
+
/** Documented defaults — replaces scattered magic numbers across call paths */
|
|
581
|
+
export const AGENT_DEFAULTS = {
|
|
582
|
+
maxTurns: 10,
|
|
583
|
+
temperature: 0.7,
|
|
584
|
+
maxConcurrentTools: 7,
|
|
585
|
+
enableDelegation: true,
|
|
586
|
+
maxDurationMs: 900_000, // 15 minutes (SSE default)
|
|
587
|
+
maxDurationMsWorkflow: 120_000, // 2 minutes (workflow/channel)
|
|
588
|
+
enableModelRouting: true,
|
|
589
|
+
subagentMaxTokens: 8192,
|
|
590
|
+
subagentMaxTurns: 6,
|
|
591
|
+
subagentTemperature: 0.3,
|
|
592
|
+
maxMessageChars: 100_000,
|
|
593
|
+
maxHistoryChars: 400_000,
|
|
594
|
+
compactionTriggerTokens: 150_000,
|
|
595
|
+
compactionTotalBudget: 2_000_000,
|
|
596
|
+
sessionCostBudgetUsd: 5.00,
|
|
597
|
+
};
|
|
598
|
+
/**
|
|
599
|
+
* Resolve all agent loop behavioral knobs from DB config.
|
|
600
|
+
* All 3 call paths MUST use this — structurally prevents hardcoded constants.
|
|
601
|
+
*
|
|
602
|
+
* @param agent - Agent config (DB row or subset)
|
|
603
|
+
* @param callPath - Which call path is invoking ("sse" | "workflow" | "channel")
|
|
604
|
+
* @param workflowMaxTurns - Optional cap from workflow step config (workflow path only)
|
|
605
|
+
*/
|
|
606
|
+
export function resolveAgentLoopConfig(agent, callPath, workflowMaxTurns) {
|
|
607
|
+
const cc = agent.context_config;
|
|
608
|
+
const defaultsUsed = [];
|
|
609
|
+
function resolve(dbValue, defaultValue, name) {
|
|
610
|
+
if (dbValue != null)
|
|
611
|
+
return dbValue;
|
|
612
|
+
defaultsUsed.push(name);
|
|
613
|
+
return defaultValue;
|
|
614
|
+
}
|
|
615
|
+
// Core agent behavior
|
|
616
|
+
const agentMaxTurns = agent.max_tool_calls || AGENT_DEFAULTS.maxTurns;
|
|
617
|
+
if (!agent.max_tool_calls)
|
|
618
|
+
defaultsUsed.push("maxTurns");
|
|
619
|
+
const temperature = agent.temperature ?? AGENT_DEFAULTS.temperature;
|
|
620
|
+
if (agent.temperature == null)
|
|
621
|
+
defaultsUsed.push("temperature");
|
|
622
|
+
const maxTokens = getMaxOutputTokens(agent.model || "claude-sonnet-4-6", agent.max_tokens);
|
|
623
|
+
if (!agent.max_tokens)
|
|
624
|
+
defaultsUsed.push("maxTokens");
|
|
625
|
+
// Call-path-specific duration defaults
|
|
626
|
+
const durationDefault = callPath === "sse"
|
|
627
|
+
? AGENT_DEFAULTS.maxDurationMs
|
|
628
|
+
: AGENT_DEFAULTS.maxDurationMsWorkflow;
|
|
629
|
+
// Call-path-specific turn caps
|
|
630
|
+
let maxTurns = agentMaxTurns;
|
|
631
|
+
if (callPath === "channel")
|
|
632
|
+
maxTurns = Math.min(agentMaxTurns, 15);
|
|
633
|
+
if (callPath === "workflow" && workflowMaxTurns != null) {
|
|
634
|
+
maxTurns = Math.min(agentMaxTurns, workflowMaxTurns);
|
|
635
|
+
}
|
|
636
|
+
return {
|
|
637
|
+
maxTurns,
|
|
638
|
+
temperature,
|
|
639
|
+
maxTokens,
|
|
640
|
+
maxConcurrentTools: resolve(cc?.max_concurrent_tools, AGENT_DEFAULTS.maxConcurrentTools, "maxConcurrentTools"),
|
|
641
|
+
enableDelegation: resolve(cc?.enable_delegation, AGENT_DEFAULTS.enableDelegation, "enableDelegation"),
|
|
642
|
+
maxDurationMs: resolve(cc?.max_duration_ms, durationDefault, "maxDurationMs"),
|
|
643
|
+
enableModelRouting: resolve(cc?.enable_model_routing, AGENT_DEFAULTS.enableModelRouting, "enableModelRouting"),
|
|
644
|
+
maxMessageChars: resolve(cc?.max_message_chars, AGENT_DEFAULTS.maxMessageChars, "maxMessageChars"),
|
|
645
|
+
maxHistoryChars: resolve(cc?.max_history_chars, AGENT_DEFAULTS.maxHistoryChars, "maxHistoryChars"),
|
|
646
|
+
contextOverrides: {
|
|
647
|
+
compaction_trigger_tokens: cc?.compaction_trigger_tokens ?? undefined,
|
|
648
|
+
compaction_total_budget: cc?.compaction_total_budget ?? undefined,
|
|
649
|
+
clear_thinking_keep: cc?.clear_thinking_keep ?? undefined,
|
|
650
|
+
clear_tool_uses_trigger: cc?.clear_tool_uses_trigger ?? undefined,
|
|
651
|
+
clear_tool_uses_keep: cc?.clear_tool_uses_keep ?? undefined,
|
|
652
|
+
session_cost_budget_usd: cc?.session_cost_budget_usd ?? undefined,
|
|
653
|
+
},
|
|
654
|
+
subagentMaxTokens: resolve(cc?.subagent_max_tokens, AGENT_DEFAULTS.subagentMaxTokens, "subagentMaxTokens"),
|
|
655
|
+
subagentMaxTurns: resolve(cc?.subagent_max_turns, AGENT_DEFAULTS.subagentMaxTurns, "subagentMaxTurns"),
|
|
656
|
+
subagentTemperature: resolve(cc?.subagent_temperature, AGENT_DEFAULTS.subagentTemperature, "subagentTemperature"),
|
|
657
|
+
defaultsUsed,
|
|
658
|
+
};
|
|
659
|
+
}
|
|
585
660
|
export function sanitizeError(err) {
|
|
586
661
|
const msg = String(err);
|
|
587
662
|
return msg
|
|
@@ -18,6 +18,7 @@ export declare function buildAPIRequest(opts: {
|
|
|
18
18
|
contextProfile: ContextProfile;
|
|
19
19
|
thinkingEnabled?: boolean;
|
|
20
20
|
maxOutputTokens?: number;
|
|
21
|
+
contextOverrides?: import("./agent-core.js").ContextManagementOverrides;
|
|
21
22
|
}): APIRequestConfig;
|
|
22
23
|
export interface CallServerProxyConfig {
|
|
23
24
|
proxyUrl: string;
|
|
@@ -23,7 +23,7 @@ const RETRY_BASE_DELAY_MS = 1000;
|
|
|
23
23
|
* - 'teammate': clear at 80K/keep 3, no compaction
|
|
24
24
|
*/
|
|
25
25
|
export function buildAPIRequest(opts) {
|
|
26
|
-
const { model, contextProfile, thinkingEnabled = false, maxOutputTokens } = opts;
|
|
26
|
+
const { model, contextProfile, thinkingEnabled = false, maxOutputTokens, contextOverrides } = opts;
|
|
27
27
|
// Context management config per profile
|
|
28
28
|
// Non-Anthropic models (Gemini, OpenAI, etc.) don't support Anthropic betas or context management
|
|
29
29
|
const provider = getProvider(model);
|
|
@@ -32,7 +32,7 @@ export function buildAPIRequest(opts) {
|
|
|
32
32
|
let edits = [];
|
|
33
33
|
switch (contextProfile) {
|
|
34
34
|
case "main": {
|
|
35
|
-
const ctxMgmt = getContextManagement(model);
|
|
35
|
+
const ctxMgmt = getContextManagement(model, contextOverrides);
|
|
36
36
|
betas = [...ctxMgmt.betas];
|
|
37
37
|
edits = ctxMgmt.config.edits;
|
|
38
38
|
break;
|
|
@@ -11,8 +11,6 @@ export interface ToolDispatchOptions {
|
|
|
11
11
|
loopDetector: LoopDetector;
|
|
12
12
|
/** Max concurrent tool executions (7 for main, 1 for sequential) */
|
|
13
13
|
maxConcurrent?: number;
|
|
14
|
-
/** @deprecated — Anthropic context_management handles limits. Only SAFETY_MAX_CHARS applies. */
|
|
15
|
-
maxResultChars?: number;
|
|
16
14
|
/** Per-tool execution timeout in ms (default 120000 = 2 minutes) */
|
|
17
15
|
toolTimeoutMs?: number;
|
|
18
16
|
/** Called when a tool starts executing */
|