whale-code 6.5.4 → 6.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,7 @@
8
8
  * Consolidates: streaming, prompt caching, context management betas, compaction,
9
9
  * loop detection, parallel tool execution, subagent delegation, retry, cost tracking.
10
10
  */
11
- import { LoopDetector, getContextManagement, getMaxOutputTokens, getThinkingConfig, addPromptCaching, estimateCostUsd, isRetryableError, sanitizeError, routeModel, resolveToolChoice, emitCostWarningIfNeeded, demoteSubagentModel, COMPACTION_TRIGGER_TOKENS, COMPACTION_TOTAL_BUDGET, DEFAULT_SESSION_COST_BUDGET_USD, } from "../../shared/agent-core.js";
11
+ import { LoopDetector, getContextManagement, getMaxOutputTokens, getThinkingConfig, addPromptCaching, estimateCostUsd, isRetryableError, sanitizeError, routeModel, resolveToolChoice, emitCostWarningIfNeeded, demoteSubagentModel, COMPACTION_TOTAL_BUDGET, DEFAULT_SESSION_COST_BUDGET_USD, } from "../../shared/agent-core.js";
12
12
  import { processStreamWithCallbacks } from "../../shared/sse-parser.js";
13
13
  import { MODELS } from "../../shared/constants.js";
14
14
  import { dispatchTools, buildAssistantContent } from "../../shared/tool-dispatch.js";
@@ -50,10 +50,16 @@ function mapToolChoiceForAnthropic(tc) {
50
50
  // UNIFIED AGENT LOOP
51
51
  // ============================================================================
52
52
  export async function runServerAgentLoop(opts) {
53
- const { anthropic, model, systemPrompt, messages, tools: inputTools, maxTurns, temperature, enableDelegation = true, enablePromptCaching = true, enableStreaming = true, maxConcurrentTools = DEFAULT_MAX_CONCURRENT_TOOLS, maxCostUsd: maxCostUsdOpt, onText, onToolStart, onCitation, documents, clientDisconnected = { value: false }, startedAt = Date.now(), maxDurationMs = 15 * 60 * 1000, } = opts;
54
- // Resolve cost budget: explicit opt > env var > default
53
+ const { anthropic, model, systemPrompt, messages, tools: inputTools, maxTurns, temperature, enableDelegation = true, enablePromptCaching = true, enableStreaming = true, enableModelRouting = true, maxConcurrentTools = DEFAULT_MAX_CONCURRENT_TOOLS, maxCostUsd: maxCostUsdOpt, onText, onToolStart, onCitation, documents, clientDisconnected = { value: false }, startedAt = Date.now(), maxDurationMs = 15 * 60 * 1000, } = opts;
54
+ // Resolve cost budget: context_config > explicit opt > env var > default
55
+ const ctxOverrides = opts.contextOverrides;
55
56
  const envBudget = parseFloat(process.env.WHALE_COST_BUDGET_USD || "");
56
- const maxCostUsd = maxCostUsdOpt ?? (isFinite(envBudget) ? envBudget : DEFAULT_SESSION_COST_BUDGET_USD);
57
+ const maxCostUsd = ctxOverrides?.session_cost_budget_usd
58
+ ?? maxCostUsdOpt
59
+ ?? (isFinite(envBudget) ? envBudget : DEFAULT_SESSION_COST_BUDGET_USD);
60
+ // Resolve compaction budget from overrides (used for compaction exhaustion checks)
61
+ const effectiveCompactionTrigger = ctxOverrides?.compaction_trigger_tokens ?? 150_000;
62
+ const effectiveCompactionBudget = ctxOverrides?.compaction_total_budget ?? COMPACTION_TOTAL_BUDGET;
57
63
  // Auto-inject delegate_task for all models (subagents always use Claude Haiku/Sonnet)
58
64
  // activeTools is mutable — discover_tools adds to it during the session
59
65
  const activeTools = [...inputTools];
@@ -122,7 +128,8 @@ export async function runServerAgentLoop(opts) {
122
128
  loopDetector.resetTurn();
123
129
  // Route model on the FIRST turn only — subsequent turns use the requested model
124
130
  // since the conversation may have become complex after tool results.
125
- const effectiveModel = turnCount === 1
131
+ // enableModelRouting=false prevents downgrade (e.g. Opus agents that should stay Opus)
132
+ const effectiveModel = (turnCount === 1 && enableModelRouting)
126
133
  ? routeModel(firstUserText, model)
127
134
  : model;
128
135
  if (turnCount === 1 && effectiveModel !== model) {
@@ -130,7 +137,7 @@ export async function runServerAgentLoop(opts) {
130
137
  }
131
138
  // Per-turn model config — uses effectiveModel so routed models get correct settings
132
139
  const maxTokens = opts.maxTokens ?? getMaxOutputTokens(effectiveModel);
133
- const ctxMgmt = getContextManagement(effectiveModel);
140
+ const ctxMgmt = getContextManagement(effectiveModel, opts.contextOverrides);
134
141
  const thinkingCfg = getThinkingConfig(effectiveModel, true);
135
142
  // Prepare tool definitions — use pre-computed cache, only rebuild when tools change
136
143
  // (discover_tools can add tools mid-session, detected via length change)
@@ -288,8 +295,8 @@ export async function runServerAgentLoop(opts) {
288
295
  compactionCount++;
289
296
  log.info({ compactionCount }, "compaction — preserving last 2 messages, resuming");
290
297
  // Budget enforcement: if cumulative compaction cost exceeds budget, force wrap-up
291
- if (compactionCount * COMPACTION_TRIGGER_TOKENS >= COMPACTION_TOTAL_BUDGET) {
292
- log.warn({ compactionCount, triggerTokens: COMPACTION_TRIGGER_TOKENS, estimatedTokens: compactionCount * COMPACTION_TRIGGER_TOKENS }, "compaction budget exhausted");
298
+ if (compactionCount * effectiveCompactionTrigger >= effectiveCompactionBudget) {
299
+ log.warn({ compactionCount, triggerTokens: effectiveCompactionTrigger, estimatedTokens: compactionCount * effectiveCompactionTrigger }, "compaction budget exhausted");
293
300
  onText?.("\n[Context budget reached — wrapping up.]");
294
301
  // Rebuild messages: compaction summary + wrap-up instruction
295
302
  const compactedMessages = [
@@ -462,7 +469,7 @@ export async function runServerAgentLoop(opts) {
462
469
  if (response.stop_reason === "compaction" && nsCompactionContent !== null) {
463
470
  compactionCount++;
464
471
  log.info({ compactionCount, streaming: false }, "compaction — preserving last 2 messages");
465
- if (compactionCount * COMPACTION_TRIGGER_TOKENS >= COMPACTION_TOTAL_BUDGET) {
472
+ if (compactionCount * effectiveCompactionTrigger >= effectiveCompactionBudget) {
466
473
  onText?.("\n[Context budget reached — wrapping up.]");
467
474
  const compactedMessages = [
468
475
  { role: "assistant", content: [{ type: "compaction", content: nsCompactionContent }] },
@@ -548,7 +555,8 @@ function makeToolExecutor(opts, tools, allToolNames, subagentTokens, discoveredT
548
555
  if (name === "delegate_task") {
549
556
  const subPrompt = String(input.prompt || "");
550
557
  const subModel = demoteSubagentModel(input.model ? String(input.model) : undefined);
551
- const subMaxTurns = Math.min(Math.max(1, Number(input.max_turns) || 6), 12);
558
+ const defaultSubMaxTurns = opts.subagentMaxTurns ?? 6;
559
+ const subMaxTurns = Math.min(Math.max(1, Number(input.max_turns) || defaultSubMaxTurns), 12);
552
560
  const subTools = tools.filter((t) => t.name !== "delegate_task");
553
561
  const subId = `sub-${Date.now().toString(36)}`;
554
562
  onSubagentProgress?.({ subagentId: subId, event: "started", model: subModel });
@@ -558,6 +566,8 @@ function makeToolExecutor(opts, tools, allToolNames, subagentTokens, discoveredT
558
566
  maxTurns: subMaxTurns, tools: subTools,
559
567
  executeTool: async (toolName, args) => executeTool(toolName, args, "server_subagent"),
560
568
  onProgress: onSubagentProgress, clientDisconnected, startedAt, maxDurationMs,
569
+ maxOutputTokens: opts.subagentMaxTokens,
570
+ temperature: opts.subagentTemperature,
561
571
  });
562
572
  onSubagentProgress?.({ subagentId: subId, event: "done", output: subResult.output });
563
573
  // Audit log
@@ -54,6 +54,8 @@ export interface RunServerSubagentOptions {
54
54
  };
55
55
  startedAt: number;
56
56
  maxDurationMs: number;
57
+ maxOutputTokens?: number;
58
+ temperature?: number;
57
59
  }
58
60
  export declare const DELEGATE_TASK_TOOL_DEF: {
59
61
  name: string;
@@ -64,7 +64,9 @@ export async function runServerSubagent(opts) {
64
64
  let cacheRead = 0;
65
65
  // Enable thinking for capable models; adjust max tokens for Opus
66
66
  const shouldThink = modelAlias === "opus" || modelAlias === "sonnet";
67
- const effectiveMaxTokens = modelAlias === "opus" ? 16384 : SUBAGENT_MAX_OUTPUT_TOKENS;
67
+ const baseMaxTokens = opts.maxOutputTokens ?? SUBAGENT_MAX_OUTPUT_TOKENS;
68
+ const effectiveMaxTokens = modelAlias === "opus" ? Math.max(16384, baseMaxTokens) : baseMaxTokens;
69
+ const effectiveTemperature = opts.temperature ?? 0.3;
68
70
  // Context management for subagent: clear at 60K, keep 2, no compaction
69
71
  const apiConfig = buildAPIRequest({
70
72
  model: modelId,
@@ -99,7 +101,7 @@ export async function runServerSubagent(opts) {
99
101
  return await anthropic.beta.messages.create({
100
102
  model: modelId,
101
103
  max_tokens: apiConfig.maxTokens,
102
- temperature: shouldThink ? 1 : 0.3, // Anthropic requires temp=1 with thinking
104
+ temperature: shouldThink ? 1 : effectiveTemperature, // Anthropic requires temp=1 with thinking
103
105
  system,
104
106
  tools: cachedTools,
105
107
  messages: cachedMessages,
@@ -8,16 +8,16 @@
8
8
  * output token limits, and compaction config for Anthropic/Claude models.
9
9
  */
10
10
  import Anthropic from "@anthropic-ai/sdk";
11
- import { sanitizeError } from "../../shared/agent-core.js";
11
+ import { sanitizeError, AGENT_DEFAULTS } from "../../shared/agent-core.js";
12
12
  import { getCapabilities } from "../lib/provider-capabilities.js";
13
13
  import { registerProvider } from "./registry.js";
14
14
  import { jsonResponse, writeSSEHeaders } from "./shared.js";
15
15
  // ============================================================================
16
16
  // CONSTANTS — Anthropic-specific model config
17
17
  // ============================================================================
18
- const COMPACTION_TRIGGER_TOKENS = 120_000;
19
- const COMPACTION_TOTAL_BUDGET = 2_000_000;
20
- const DEFAULT_OUTPUT_TOKENS = 16384;
18
+ const COMPACTION_TRIGGER_TOKENS = AGENT_DEFAULTS.compactionTriggerTokens;
19
+ const COMPACTION_TOTAL_BUDGET = AGENT_DEFAULTS.compactionTotalBudget;
20
+ const DEFAULT_OUTPUT_TOKENS = 16384; // Per-response cap (model max is separate)
21
21
  const MODEL_MAX_OUTPUT_TOKENS = {
22
22
  "claude-opus-4-6": 128000,
23
23
  "claude-sonnet-4-6": 64000,
@@ -8,16 +8,16 @@
8
8
  * output token limits, and compaction config for Bedrock Claude models.
9
9
  */
10
10
  import { BedrockRuntimeClient, InvokeModelWithResponseStreamCommand } from "@aws-sdk/client-bedrock-runtime";
11
- import { sanitizeError } from "../../shared/agent-core.js";
11
+ import { sanitizeError, AGENT_DEFAULTS } from "../../shared/agent-core.js";
12
12
  import { getCapabilities } from "../lib/provider-capabilities.js";
13
13
  import { registerProvider } from "./registry.js";
14
14
  import { jsonResponse, writeSSEHeaders, resolveProviderCredentials } from "./shared.js";
15
15
  // ============================================================================
16
16
  // CONSTANTS — Bedrock-specific model config
17
17
  // ============================================================================
18
- const COMPACTION_TRIGGER_TOKENS = 120_000;
19
- const COMPACTION_TOTAL_BUDGET = 2_000_000;
20
- const DEFAULT_OUTPUT_TOKENS = 16384;
18
+ const COMPACTION_TRIGGER_TOKENS = AGENT_DEFAULTS.compactionTriggerTokens;
19
+ const COMPACTION_TOTAL_BUDGET = AGENT_DEFAULTS.compactionTotalBudget;
20
+ const DEFAULT_OUTPUT_TOKENS = 16384; // Per-response cap (model max is separate)
21
21
  const MODEL_MAX_OUTPUT_TOKENS = {
22
22
  "anthropic.claude-sonnet-4-6": 64000,
23
23
  "us.anthropic.claude-sonnet-4-20250514-v1:0": 64000,
@@ -70,6 +70,19 @@ export interface AgentConfig {
70
70
  max_history_chars?: number;
71
71
  max_tool_result_chars?: number;
72
72
  max_message_chars?: number;
73
+ compaction_trigger_tokens?: number;
74
+ compaction_total_budget?: number;
75
+ clear_thinking_keep?: number;
76
+ clear_tool_uses_trigger?: number;
77
+ clear_tool_uses_keep?: number;
78
+ session_cost_budget_usd?: number;
79
+ max_concurrent_tools?: number;
80
+ enable_delegation?: boolean;
81
+ max_duration_ms?: number;
82
+ enable_model_routing?: boolean;
83
+ subagent_max_tokens?: number;
84
+ subagent_max_turns?: number;
85
+ subagent_temperature?: number;
73
86
  } | null;
74
87
  }
75
88
  export interface ToolLoadResult {
@@ -45,6 +45,7 @@ import { handleKali } from "./handlers/kali.js";
45
45
  import { handleLocalAgent } from "./handlers/local-agent.js";
46
46
  import { handleEnrichment } from "./handlers/enrichment.js";
47
47
  import { handleStorefront } from "./handlers/storefront.js";
48
+ import { handleApiDocs } from "./handlers/api-docs.js";
48
49
  import { handleClickHouse } from "./handlers/clickhouse.js";
49
50
  import { summarizeResult, withTimeout } from "./lib/utils.js";
50
51
  // ============================================================================
@@ -300,6 +301,7 @@ export const TOOL_HANDLERS = {
300
301
  browser: { handler: handleBrowser, timeout: 120_000, requiresStore: true },
301
302
  discovery: { handler: handleDiscovery, timeout: DEFAULT_TIMEOUT, requiresStore: false },
302
303
  api_keys: { handler: handleAPIKeys, timeout: DEFAULT_TIMEOUT, requiresStore: true },
304
+ api_docs: { handler: handleApiDocs, timeout: 5000, requiresStore: false },
303
305
  // --- Advertising ---
304
306
  meta_ads: { handler: handleMetaAds, timeout: 300_000, requiresStore: true },
305
307
  // --- Security & Local ---
@@ -371,7 +373,7 @@ const TOOL_CATEGORIES = {
371
373
  llm: "ai", embeddings: "ai", creations: "media",
372
374
  // Platform
373
375
  web_search: "platform", browser: "platform", discovery: "platform",
374
- api_keys: "platform",
376
+ api_keys: "platform", api_docs: "platform",
375
377
  // Security
376
378
  kali: "security", local_agent: "platform",
377
379
  // Advertising
@@ -52,7 +52,17 @@ export interface ContextManagementConfig {
52
52
  edits: Array<Record<string, unknown>>;
53
53
  };
54
54
  }
55
- /** Compaction trigger thresholdshared so agent loops can track budget */
55
+ /** Overrides from ai_agent_config.context_config JSONB all optional, falls back to defaults */
56
+ export interface ContextManagementOverrides {
57
+ compaction_trigger_tokens?: number;
58
+ compaction_total_budget?: number;
59
+ clear_thinking_keep?: number;
60
+ clear_tool_uses_trigger?: number;
61
+ clear_tool_uses_keep?: number;
62
+ session_cost_budget_usd?: number;
63
+ }
64
+ /** Legacy compaction trigger — used by getCompactionConfig() for non-Anthropic providers.
65
+ * For Anthropic models, getContextManagement() with DB overrides takes precedence. */
56
66
  export declare const COMPACTION_TRIGGER_TOKENS = 120000;
57
67
  /** Max cumulative tokens before forcing wrap-up (prevents runaway compaction cost) */
58
68
  export declare const COMPACTION_TOTAL_BUDGET = 2000000;
@@ -75,7 +85,7 @@ export declare function getCompactionConfig(model: string): CompactionConfig;
75
85
  * - All other Claude models: clear thinking + clear tools at 80K/keep 3
76
86
  * - Non-Anthropic models (Gemini, OpenAI): no betas, no context management
77
87
  */
78
- export declare function getContextManagement(model: string): ContextManagementConfig;
88
+ export declare function getContextManagement(model: string, overrides?: ContextManagementOverrides): ContextManagementConfig;
79
89
  export declare function getMaxOutputTokens(model: string, agentMax?: number): number;
80
90
  /**
81
91
  * Add prompt cache breakpoints to tools and messages.
@@ -163,12 +173,6 @@ export declare function categorizeError(err: unknown): {
163
173
  retryable: boolean;
164
174
  };
165
175
  export declare function isRetryableError(err: unknown): boolean;
166
- /** @deprecated — Anthropic context_management handles limits. Use SAFETY_MAX_CHARS in tool-dispatch instead. */
167
- export declare function truncateToolResult(content: string, maxChars: number): string;
168
- /** @deprecated — Anthropic context_management handles limits. */
169
- export declare function getMaxToolResultChars(contextConfig?: {
170
- max_tool_result_chars?: number;
171
- } | null): number;
172
176
  /**
173
177
  * Demote subagent model requests — single source of truth for server + CLI.
174
178
  * - explore/research: always haiku
@@ -177,4 +181,78 @@ export declare function getMaxToolResultChars(contextConfig?: {
177
181
  * - default/undefined: haiku
178
182
  */
179
183
  export declare function demoteSubagentModel(requested: string | undefined, agentType?: string): "haiku" | "sonnet";
184
+ export type CallPath = "sse" | "workflow" | "channel";
185
+ /** Documented defaults — replaces scattered magic numbers across call paths */
186
+ export declare const AGENT_DEFAULTS: {
187
+ readonly maxTurns: 10;
188
+ readonly temperature: 0.7;
189
+ readonly maxConcurrentTools: 7;
190
+ readonly enableDelegation: true;
191
+ readonly maxDurationMs: 900000;
192
+ readonly maxDurationMsWorkflow: 120000;
193
+ readonly enableModelRouting: true;
194
+ readonly subagentMaxTokens: 8192;
195
+ readonly subagentMaxTurns: 6;
196
+ readonly subagentTemperature: 0.3;
197
+ readonly maxMessageChars: 100000;
198
+ readonly maxHistoryChars: 400000;
199
+ readonly compactionTriggerTokens: 150000;
200
+ readonly compactionTotalBudget: 2000000;
201
+ readonly sessionCostBudgetUsd: 5;
202
+ };
203
+ /** Context config shape from ai_agent_config.context_config JSONB */
204
+ export interface AgentContextConfig {
205
+ includeLocations?: boolean;
206
+ locationIds?: string[];
207
+ includeCustomers?: boolean;
208
+ customerSegments?: string[];
209
+ max_history_chars?: number;
210
+ max_tool_result_chars?: number;
211
+ max_message_chars?: number;
212
+ compaction_trigger_tokens?: number;
213
+ compaction_total_budget?: number;
214
+ clear_thinking_keep?: number;
215
+ clear_tool_uses_trigger?: number;
216
+ clear_tool_uses_keep?: number;
217
+ session_cost_budget_usd?: number;
218
+ max_concurrent_tools?: number;
219
+ enable_delegation?: boolean;
220
+ max_duration_ms?: number;
221
+ enable_model_routing?: boolean;
222
+ subagent_max_tokens?: number;
223
+ subagent_max_turns?: number;
224
+ subagent_temperature?: number;
225
+ }
226
+ export interface ResolvedAgentLoopConfig {
227
+ maxTurns: number;
228
+ temperature: number;
229
+ maxTokens: number;
230
+ maxConcurrentTools: number;
231
+ enableDelegation: boolean;
232
+ maxDurationMs: number;
233
+ enableModelRouting: boolean;
234
+ maxMessageChars: number;
235
+ maxHistoryChars: number;
236
+ contextOverrides: ContextManagementOverrides;
237
+ subagentMaxTokens: number;
238
+ subagentMaxTurns: number;
239
+ subagentTemperature: number;
240
+ /** Which fields fell back to AGENT_DEFAULTS (transparency for logging) */
241
+ defaultsUsed: string[];
242
+ }
243
+ /**
244
+ * Resolve all agent loop behavioral knobs from DB config.
245
+ * All 3 call paths MUST use this — structurally prevents hardcoded constants.
246
+ *
247
+ * @param agent - Agent config (DB row or subset)
248
+ * @param callPath - Which call path is invoking ("sse" | "workflow" | "channel")
249
+ * @param workflowMaxTurns - Optional cap from workflow step config (workflow path only)
250
+ */
251
+ export declare function resolveAgentLoopConfig(agent: {
252
+ max_tool_calls: number;
253
+ temperature: number;
254
+ max_tokens: number;
255
+ model: string;
256
+ context_config: AgentContextConfig | null;
257
+ }, callPath: CallPath, workflowMaxTurns?: number): ResolvedAgentLoopConfig;
180
258
  export declare function sanitizeError(err: unknown): string;
@@ -51,7 +51,8 @@ export function resolveToolChoice(opts) {
51
51
  // ============================================================================
52
52
  // MODEL-AWARE CONTEXT MANAGEMENT
53
53
  // ============================================================================
54
- /** Compaction trigger threshold shared so agent loops can track budget */
54
+ /** Legacy compaction trigger — used by getCompactionConfig() for non-Anthropic providers.
55
+ * For Anthropic models, getContextManagement() with DB overrides takes precedence. */
55
56
  export const COMPACTION_TRIGGER_TOKENS = 120_000;
56
57
  /** Max cumulative tokens before forcing wrap-up (prevents runaway compaction cost) */
57
58
  export const COMPACTION_TOTAL_BUDGET = 2_000_000;
@@ -76,7 +77,7 @@ export function getCompactionConfig(model) {
76
77
  * - All other Claude models: clear thinking + clear tools at 80K/keep 3
77
78
  * - Non-Anthropic models (Gemini, OpenAI): no betas, no context management
78
79
  */
79
- export function getContextManagement(model) {
80
+ export function getContextManagement(model, overrides) {
80
81
  // Non-Anthropic models don't use Anthropic betas or context management
81
82
  const provider = getProvider(model);
82
83
  if (provider === "gemini" || provider === "openai") {
@@ -85,29 +86,34 @@ export function getContextManagement(model) {
85
86
  const edits = [];
86
87
  const betas = ["context-management-2025-06-27"];
87
88
  // Thinking block clearing — must come FIRST in edits array (API requirement).
88
- // Keeps last 2 turns of thinking to maintain reasoning continuity while
89
- // preventing unbounded growth from extended thinking.
89
+ // Default: keep 1 thinking turn (matches Claude Code / Anthropic API defaults).
90
+ const clearThinkingKeep = overrides?.clear_thinking_keep ?? 1;
90
91
  edits.push({
91
92
  type: "clear_thinking_20251015",
92
- keep: { type: "thinking_turns", value: 2 },
93
+ keep: { type: "thinking_turns", value: clearThinkingKeep },
93
94
  });
94
95
  // Server-side compaction for models that support compact_20260112.
96
+ // Default trigger: 150K tokens (matches Anthropic API default).
95
97
  // pause_after_compaction: true enables the loop to preserve recent messages
96
98
  // and track compaction count for budget enforcement.
97
99
  const supportsCompaction = model.includes("opus-4-6") || model.includes("sonnet-4-6");
100
+ const compactionTrigger = overrides?.compaction_trigger_tokens ?? 150_000;
98
101
  if (supportsCompaction) {
99
102
  edits.push({
100
103
  type: "compact_20260112",
101
- trigger: { type: "input_tokens", value: COMPACTION_TRIGGER_TOKENS },
104
+ trigger: { type: "input_tokens", value: compactionTrigger },
102
105
  pause_after_compaction: true,
103
106
  instructions: "Summarize the conversation preserving: (1) task goals and constraints, (2) files created/modified with paths, (3) decisions made and rationale, (4) errors encountered and resolutions, (5) exact next steps. Be concise but preserve all state needed to continue work without repeating mistakes.",
104
107
  });
105
108
  betas.push("compact-2026-01-12");
106
109
  }
110
+ // Clear tool uses — default: trigger at 100K, keep 3 (matches Anthropic API defaults).
111
+ const clearToolUsesTrigger = overrides?.clear_tool_uses_trigger ?? 100_000;
112
+ const clearToolUsesKeep = overrides?.clear_tool_uses_keep ?? 3;
107
113
  edits.push({
108
114
  type: "clear_tool_uses_20250919",
109
- trigger: { type: "input_tokens", value: 80_000 },
110
- keep: { type: "tool_uses", value: 3 },
115
+ trigger: { type: "input_tokens", value: clearToolUsesTrigger },
116
+ keep: { type: "tool_uses", value: clearToolUsesKeep },
111
117
  });
112
118
  return { betas, config: { edits } };
113
119
  }
@@ -551,17 +557,6 @@ export function isRetryableError(err) {
551
557
  // ============================================================================
552
558
  // TOOL RESULT TRUNCATION (deprecated — Anthropic context_management handles limits)
553
559
  // ============================================================================
554
- /** @deprecated — Anthropic context_management handles limits. Use SAFETY_MAX_CHARS in tool-dispatch instead. */
555
- export function truncateToolResult(content, maxChars) {
556
- if (content.length <= maxChars)
557
- return content;
558
- return content.slice(0, maxChars) + `\n\n... (truncated — ${content.length.toLocaleString()} chars total)`;
559
- }
560
- /** @deprecated — Anthropic context_management handles limits. */
561
- export function getMaxToolResultChars(contextConfig) {
562
- return contextConfig?.max_tool_result_chars || 80_000;
563
- }
564
- // ============================================================================
565
560
  // UTILITY — sanitize errors (strip API keys, passwords)
566
561
  // ============================================================================
567
562
  /**
@@ -582,6 +577,86 @@ export function demoteSubagentModel(requested, agentType) {
582
577
  return agentType === "plan" ? "sonnet" : "haiku";
583
578
  return "haiku";
584
579
  }
580
+ /** Documented defaults — replaces scattered magic numbers across call paths */
581
+ export const AGENT_DEFAULTS = {
582
+ maxTurns: 10,
583
+ temperature: 0.7,
584
+ maxConcurrentTools: 7,
585
+ enableDelegation: true,
586
+ maxDurationMs: 900_000, // 15 minutes (SSE default)
587
+ maxDurationMsWorkflow: 120_000, // 2 minutes (workflow/channel)
588
+ enableModelRouting: true,
589
+ subagentMaxTokens: 8192,
590
+ subagentMaxTurns: 6,
591
+ subagentTemperature: 0.3,
592
+ maxMessageChars: 100_000,
593
+ maxHistoryChars: 400_000,
594
+ compactionTriggerTokens: 150_000,
595
+ compactionTotalBudget: 2_000_000,
596
+ sessionCostBudgetUsd: 5.00,
597
+ };
598
+ /**
599
+ * Resolve all agent loop behavioral knobs from DB config.
600
+ * All 3 call paths MUST use this — structurally prevents hardcoded constants.
601
+ *
602
+ * @param agent - Agent config (DB row or subset)
603
+ * @param callPath - Which call path is invoking ("sse" | "workflow" | "channel")
604
+ * @param workflowMaxTurns - Optional cap from workflow step config (workflow path only)
605
+ */
606
+ export function resolveAgentLoopConfig(agent, callPath, workflowMaxTurns) {
607
+ const cc = agent.context_config;
608
+ const defaultsUsed = [];
609
+ function resolve(dbValue, defaultValue, name) {
610
+ if (dbValue != null)
611
+ return dbValue;
612
+ defaultsUsed.push(name);
613
+ return defaultValue;
614
+ }
615
+ // Core agent behavior
616
+ const agentMaxTurns = agent.max_tool_calls || AGENT_DEFAULTS.maxTurns;
617
+ if (!agent.max_tool_calls)
618
+ defaultsUsed.push("maxTurns");
619
+ const temperature = agent.temperature ?? AGENT_DEFAULTS.temperature;
620
+ if (agent.temperature == null)
621
+ defaultsUsed.push("temperature");
622
+ const maxTokens = getMaxOutputTokens(agent.model || "claude-sonnet-4-6", agent.max_tokens);
623
+ if (!agent.max_tokens)
624
+ defaultsUsed.push("maxTokens");
625
+ // Call-path-specific duration defaults
626
+ const durationDefault = callPath === "sse"
627
+ ? AGENT_DEFAULTS.maxDurationMs
628
+ : AGENT_DEFAULTS.maxDurationMsWorkflow;
629
+ // Call-path-specific turn caps
630
+ let maxTurns = agentMaxTurns;
631
+ if (callPath === "channel")
632
+ maxTurns = Math.min(agentMaxTurns, 15);
633
+ if (callPath === "workflow" && workflowMaxTurns != null) {
634
+ maxTurns = Math.min(agentMaxTurns, workflowMaxTurns);
635
+ }
636
+ return {
637
+ maxTurns,
638
+ temperature,
639
+ maxTokens,
640
+ maxConcurrentTools: resolve(cc?.max_concurrent_tools, AGENT_DEFAULTS.maxConcurrentTools, "maxConcurrentTools"),
641
+ enableDelegation: resolve(cc?.enable_delegation, AGENT_DEFAULTS.enableDelegation, "enableDelegation"),
642
+ maxDurationMs: resolve(cc?.max_duration_ms, durationDefault, "maxDurationMs"),
643
+ enableModelRouting: resolve(cc?.enable_model_routing, AGENT_DEFAULTS.enableModelRouting, "enableModelRouting"),
644
+ maxMessageChars: resolve(cc?.max_message_chars, AGENT_DEFAULTS.maxMessageChars, "maxMessageChars"),
645
+ maxHistoryChars: resolve(cc?.max_history_chars, AGENT_DEFAULTS.maxHistoryChars, "maxHistoryChars"),
646
+ contextOverrides: {
647
+ compaction_trigger_tokens: cc?.compaction_trigger_tokens ?? undefined,
648
+ compaction_total_budget: cc?.compaction_total_budget ?? undefined,
649
+ clear_thinking_keep: cc?.clear_thinking_keep ?? undefined,
650
+ clear_tool_uses_trigger: cc?.clear_tool_uses_trigger ?? undefined,
651
+ clear_tool_uses_keep: cc?.clear_tool_uses_keep ?? undefined,
652
+ session_cost_budget_usd: cc?.session_cost_budget_usd ?? undefined,
653
+ },
654
+ subagentMaxTokens: resolve(cc?.subagent_max_tokens, AGENT_DEFAULTS.subagentMaxTokens, "subagentMaxTokens"),
655
+ subagentMaxTurns: resolve(cc?.subagent_max_turns, AGENT_DEFAULTS.subagentMaxTurns, "subagentMaxTurns"),
656
+ subagentTemperature: resolve(cc?.subagent_temperature, AGENT_DEFAULTS.subagentTemperature, "subagentTemperature"),
657
+ defaultsUsed,
658
+ };
659
+ }
585
660
  export function sanitizeError(err) {
586
661
  const msg = String(err);
587
662
  return msg
@@ -18,6 +18,7 @@ export declare function buildAPIRequest(opts: {
18
18
  contextProfile: ContextProfile;
19
19
  thinkingEnabled?: boolean;
20
20
  maxOutputTokens?: number;
21
+ contextOverrides?: import("./agent-core.js").ContextManagementOverrides;
21
22
  }): APIRequestConfig;
22
23
  export interface CallServerProxyConfig {
23
24
  proxyUrl: string;
@@ -23,7 +23,7 @@ const RETRY_BASE_DELAY_MS = 1000;
23
23
  * - 'teammate': clear at 80K/keep 3, no compaction
24
24
  */
25
25
  export function buildAPIRequest(opts) {
26
- const { model, contextProfile, thinkingEnabled = false, maxOutputTokens } = opts;
26
+ const { model, contextProfile, thinkingEnabled = false, maxOutputTokens, contextOverrides } = opts;
27
27
  // Context management config per profile
28
28
  // Non-Anthropic models (Gemini, OpenAI, etc.) don't support Anthropic betas or context management
29
29
  const provider = getProvider(model);
@@ -32,7 +32,7 @@ export function buildAPIRequest(opts) {
32
32
  let edits = [];
33
33
  switch (contextProfile) {
34
34
  case "main": {
35
- const ctxMgmt = getContextManagement(model);
35
+ const ctxMgmt = getContextManagement(model, contextOverrides);
36
36
  betas = [...ctxMgmt.betas];
37
37
  edits = ctxMgmt.config.edits;
38
38
  break;
@@ -11,8 +11,6 @@ export interface ToolDispatchOptions {
11
11
  loopDetector: LoopDetector;
12
12
  /** Max concurrent tool executions (7 for main, 1 for sequential) */
13
13
  maxConcurrent?: number;
14
- /** @deprecated — Anthropic context_management handles limits. Only SAFETY_MAX_CHARS applies. */
15
- maxResultChars?: number;
16
14
  /** Per-tool execution timeout in ms (default 120000 = 2 minutes) */
17
15
  toolTimeoutMs?: number;
18
16
  /** Called when a tool starts executing */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "whale-code",
3
- "version": "6.5.4",
3
+ "version": "6.5.5",
4
4
  "description": "whale code — local-first AI agent CLI for inventory, orders, and analytics powered by MCP",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",