whale-code 6.5.0 → 6.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/swagmanager-mcp.js +44 -0
- package/dist/cli/services/agent-loop.js +12 -19
- package/dist/cli/services/subagent.js +6 -17
- package/dist/cli/services/tools/file-ops.js +28 -14
- package/dist/server/lib/compaction-service.js +3 -2
- package/dist/server/lib/server-agent-loop.js +10 -37
- package/dist/server/lib/server-subagent.js +2 -2
- package/dist/shared/agent-core.d.ts +20 -0
- package/dist/shared/agent-core.js +48 -13
- package/dist/shared/tool-dispatch.js +14 -0
- package/dist/updater.js +1 -1
- package/package.json +1 -1
package/bin/swagmanager-mcp.js
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
* whale mcp list|add|remove Manage MCP servers
|
|
13
13
|
* whale doctor Run diagnostics
|
|
14
14
|
* whale config [key] [value] View/set configuration
|
|
15
|
+
* whale update Update to latest version
|
|
15
16
|
* whale help Show this help
|
|
16
17
|
* (non-TTY stdin) MCP stdio server for Claude Code / Cursor
|
|
17
18
|
*/
|
|
@@ -169,6 +170,24 @@ if (flags.version) {
|
|
|
169
170
|
process.exit(0);
|
|
170
171
|
}
|
|
171
172
|
|
|
173
|
+
// ── Background update check (non-blocking, TTY only) ──
|
|
174
|
+
const PACKAGE_NAME = "whale-code";
|
|
175
|
+
async function checkForUpdate() {
|
|
176
|
+
try {
|
|
177
|
+
const { execSync } = await import("child_process");
|
|
178
|
+
const latest = execSync(`npm view ${PACKAGE_NAME} version 2>/dev/null`, { encoding: "utf-8", timeout: 5000 }).trim();
|
|
179
|
+
if (latest && latest !== PKG_VERSION) {
|
|
180
|
+
const d = "\x1b[2m", r = "\x1b[0m", c = "\x1b[36m", B = "\x1b[1m";
|
|
181
|
+
console.log(`\n${d} Update available: ${r}${c}v${PKG_VERSION}${r}${d} → ${r}${c}${B}v${latest}${r}`);
|
|
182
|
+
console.log(`${d} Run ${r}${B}whale update${r}${d} or ${r}${B}npm i -g ${PACKAGE_NAME}@latest${r}\n`);
|
|
183
|
+
}
|
|
184
|
+
} catch { /* silent — network or npm not available */ }
|
|
185
|
+
}
|
|
186
|
+
if (process.stdin.isTTY && !flags.print && !flags.version && !flags.help) {
|
|
187
|
+
// Fire and forget — don't block startup
|
|
188
|
+
checkForUpdate();
|
|
189
|
+
}
|
|
190
|
+
|
|
172
191
|
// ── Help ──
|
|
173
192
|
if (flags.help && !subcommand) {
|
|
174
193
|
showHelp();
|
|
@@ -312,6 +331,31 @@ switch (command) {
|
|
|
312
331
|
break;
|
|
313
332
|
}
|
|
314
333
|
|
|
334
|
+
case "update":
|
|
335
|
+
case "upgrade": {
|
|
336
|
+
const { execSync } = await import("child_process");
|
|
337
|
+
const d = "\x1b[2m", r = "\x1b[0m", c = "\x1b[36m", g = "\x1b[32m", B = "\x1b[1m";
|
|
338
|
+
|
|
339
|
+
console.log(`${d} Checking for updates...${r}`);
|
|
340
|
+
try {
|
|
341
|
+
const latest = execSync(`npm view ${PACKAGE_NAME} version 2>/dev/null`, { encoding: "utf-8", timeout: 10000 }).trim();
|
|
342
|
+
if (latest === PKG_VERSION) {
|
|
343
|
+
console.log(`${g} ✓${r} whale-code v${PKG_VERSION} is already up to date`);
|
|
344
|
+
process.exit(0);
|
|
345
|
+
}
|
|
346
|
+
console.log(`${d} Updating ${r}v${PKG_VERSION}${d} → ${r}${B}v${latest}${r}`);
|
|
347
|
+
console.log();
|
|
348
|
+
execSync(`npm install -g ${PACKAGE_NAME}@latest`, { stdio: "inherit" });
|
|
349
|
+
console.log();
|
|
350
|
+
console.log(`${g} ✓${r} Updated to whale-code v${latest}`);
|
|
351
|
+
} catch (err) {
|
|
352
|
+
console.error(` ✗ Update failed: ${err.message}`);
|
|
353
|
+
console.error(` Try: sudo npm install -g ${PACKAGE_NAME}@latest`);
|
|
354
|
+
process.exit(1);
|
|
355
|
+
}
|
|
356
|
+
break;
|
|
357
|
+
}
|
|
358
|
+
|
|
315
359
|
case "agent": {
|
|
316
360
|
// Forward remaining args to local-agent CLI
|
|
317
361
|
// Rebuild process.argv so the agent sees: [node, script, subcommand, ...flags]
|
|
@@ -26,7 +26,7 @@ import { captureError, addBreadcrumb } from "./error-logger.js";
|
|
|
26
26
|
import { setGlobalEmitter, clearGlobalEmitter, } from "./agent-events.js";
|
|
27
27
|
import { mcpClientManager } from "./mcp-client.js";
|
|
28
28
|
import { loadHooks, runBeforeToolHook, runAfterToolHook, runSessionHook } from "./hooks.js";
|
|
29
|
-
import { LoopDetector, COMPACTION_TRIGGER_TOKENS, COMPACTION_TOTAL_BUDGET, getCompactionConfig } from "../../shared/agent-core.js";
|
|
29
|
+
import { LoopDetector, COMPACTION_TRIGGER_TOKENS, COMPACTION_TOTAL_BUDGET, getCompactionConfig, DEFAULT_SESSION_COST_BUDGET_USD, emitCostWarningIfNeeded } from "../../shared/agent-core.js";
|
|
30
30
|
import { parseSSEStream, processStreamWithCallbacks, collectStreamResult } from "../../shared/sse-parser.js";
|
|
31
31
|
import { callServerProxy, callTranscribe, buildAPIRequest, buildSystemBlocks, prepareWithCaching, trimGeminiContext, trimOpenAIContext, requestProviderCompaction } from "../../shared/api-client.js";
|
|
32
32
|
import { getProvider, MODELS } from "../../shared/constants.js";
|
|
@@ -308,6 +308,7 @@ export async function runAgentLoop(opts) {
|
|
|
308
308
|
let sessionCostUsd = 0;
|
|
309
309
|
let compactionCount = 0;
|
|
310
310
|
const costWarningsEmitted = new Set();
|
|
311
|
+
const effectiveBudget = opts.maxBudgetUsd ?? DEFAULT_SESSION_COST_BUDGET_USD;
|
|
311
312
|
const activeModel = getModel();
|
|
312
313
|
// Tool executor — routes to interactive, local, server, or MCP tools.
|
|
313
314
|
// Wraps execution with before/after hooks when hooks are loaded.
|
|
@@ -384,16 +385,16 @@ export async function runAgentLoop(opts) {
|
|
|
384
385
|
callbacks.onError("Cancelled", messages);
|
|
385
386
|
return;
|
|
386
387
|
}
|
|
387
|
-
// Budget enforcement
|
|
388
|
-
if (
|
|
389
|
-
logSpan({ action: "chat.budget_exceeded", durationMs: Date.now() - sessionStart, context: turnCtx, storeId: storeId || undefined, severity: "warn", details: { session_cost_usd: sessionCostUsd, max_budget_usd:
|
|
390
|
-
callbacks.onError(`Budget exceeded: $${sessionCostUsd.toFixed(4)} >= $${
|
|
388
|
+
// Budget enforcement — always enforced (defaults to DEFAULT_SESSION_COST_BUDGET_USD)
|
|
389
|
+
if (sessionCostUsd >= effectiveBudget) {
|
|
390
|
+
logSpan({ action: "chat.budget_exceeded", durationMs: Date.now() - sessionStart, context: turnCtx, storeId: storeId || undefined, severity: "warn", details: { session_cost_usd: sessionCostUsd, max_budget_usd: effectiveBudget, iteration } });
|
|
391
|
+
callbacks.onError(`Budget exceeded: $${sessionCostUsd.toFixed(4)} >= $${effectiveBudget}`, messages);
|
|
391
392
|
return;
|
|
392
393
|
}
|
|
393
394
|
const apiStart = Date.now();
|
|
394
395
|
const apiSpanId = generateSpanId();
|
|
395
396
|
const apiRowId = crypto.randomUUID(); // UUID for this span's row — children reference via parent_id
|
|
396
|
-
const costContext = `Session cost: $${sessionCostUsd.toFixed(2)}
|
|
397
|
+
const costContext = `Session cost: $${sessionCostUsd.toFixed(2)} | Budget remaining: $${(effectiveBudget - sessionCostUsd).toFixed(2)}`;
|
|
397
398
|
// Build API request config
|
|
398
399
|
const currentModel = getModel();
|
|
399
400
|
const apiConfig = buildAPIRequest({
|
|
@@ -516,20 +517,12 @@ export async function runAgentLoop(opts) {
|
|
|
516
517
|
totalThinking += result.thinkingTokens;
|
|
517
518
|
sessionCostUsd += estimateCostUsd(result.usage.inputTokens, result.usage.outputTokens, currentModel, result.thinkingTokens, result.usage.cacheReadTokens, result.usage.cacheCreationTokens);
|
|
518
519
|
// Graduated cost warnings
|
|
519
|
-
if (
|
|
520
|
-
|
|
521
|
-
if (!costWarningsEmitted.has(pct) && sessionCostUsd >= opts.maxBudgetUsd * (pct / 100)) {
|
|
522
|
-
costWarningsEmitted.add(pct);
|
|
523
|
-
const warnMsg = `\n[Cost warning: ${pct}% of budget used ($${sessionCostUsd.toFixed(2)}/$${opts.maxBudgetUsd.toFixed(2)}). ${pct >= 75 ? "Wrap up soon." : ""}]`;
|
|
524
|
-
if (emitter) {
|
|
525
|
-
emitter.emitText(warnMsg);
|
|
526
|
-
}
|
|
527
|
-
else {
|
|
528
|
-
callbacks.onText(warnMsg);
|
|
529
|
-
}
|
|
530
|
-
}
|
|
531
|
-
}
|
|
520
|
+
emitCostWarningIfNeeded(sessionCostUsd, effectiveBudget, costWarningsEmitted, (text) => { if (emitter) {
|
|
521
|
+
emitter.emitText(text);
|
|
532
522
|
}
|
|
523
|
+
else {
|
|
524
|
+
callbacks.onText(text);
|
|
525
|
+
} });
|
|
533
526
|
// Server-side context management notification
|
|
534
527
|
if (result.contextManagementApplied) {
|
|
535
528
|
callbacks.onAutoCompact?.(messages.length, messages.length, 0);
|
|
@@ -11,7 +11,7 @@ import { readFileSync, existsSync, writeFileSync, mkdirSync, appendFileSync } fr
|
|
|
11
11
|
import { join } from "path";
|
|
12
12
|
import { homedir, tmpdir } from "os";
|
|
13
13
|
import { LOCAL_TOOL_DEFINITIONS, } from "./local-tools.js";
|
|
14
|
-
import { LoopDetector, estimateCostUsd } from "../../shared/agent-core.js";
|
|
14
|
+
import { LoopDetector, estimateCostUsd, demoteSubagentModel } from "../../shared/agent-core.js";
|
|
15
15
|
import { MODEL_MAP } from "../../shared/constants.js";
|
|
16
16
|
import { loadServerToolDefinitions, } from "./server-tools.js";
|
|
17
17
|
import { logSpan, generateSpanId, generateTraceId } from "./telemetry.js";
|
|
@@ -283,20 +283,9 @@ function emitSubagentProgress(agentType, agentId, message, turn, toolName) {
|
|
|
283
283
|
export async function runSubagent(options) {
|
|
284
284
|
const { prompt, subagent_type, model, resume, max_turns, name, parentContext, parentTraceContext } = options;
|
|
285
285
|
const agentId = resume || generateAgentId();
|
|
286
|
-
// Sub-agents default to Haiku for cost control
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
const demoteModel = (m) => {
|
|
290
|
-
const isLightType = subagent_type === "explore" || subagent_type === "research";
|
|
291
|
-
if (isLightType)
|
|
292
|
-
return MODEL_MAP["haiku"];
|
|
293
|
-
if (!m)
|
|
294
|
-
return MODEL_MAP["haiku"];
|
|
295
|
-
if (m === "opus")
|
|
296
|
-
return MODEL_MAP["sonnet"];
|
|
297
|
-
return MODEL_MAP[m] || MODEL_MAP["haiku"];
|
|
298
|
-
};
|
|
299
|
-
const modelId = demoteModel(model);
|
|
286
|
+
// Sub-agents default to Haiku for cost control — uses shared demotion logic
|
|
287
|
+
const demotedAlias = demoteSubagentModel(model, subagent_type);
|
|
288
|
+
const modelId = MODEL_MAP[demotedAlias] || MODEL_MAP["haiku"];
|
|
300
289
|
const cwd = process.cwd();
|
|
301
290
|
const systemPrompt = buildAgentPrompt(subagent_type, cwd);
|
|
302
291
|
const startTime = Date.now();
|
|
@@ -646,8 +635,8 @@ Each agent completes autonomously and returns results.`,
|
|
|
646
635
|
},
|
|
647
636
|
model: {
|
|
648
637
|
type: "string",
|
|
649
|
-
enum: ["sonnet", "
|
|
650
|
-
description: "Optional model
|
|
638
|
+
enum: ["sonnet", "haiku"],
|
|
639
|
+
description: "Optional model. haiku (default) for fast tasks, sonnet for complex reasoning.",
|
|
651
640
|
},
|
|
652
641
|
resume: {
|
|
653
642
|
type: "string",
|
|
@@ -16,22 +16,33 @@ export function resolvePath(p) {
|
|
|
16
16
|
return p;
|
|
17
17
|
}
|
|
18
18
|
const READ_CACHE_MAX = 100;
|
|
19
|
+
const MAX_ENTRY_SIZE = 100_000; // 100KB — skip caching larger files
|
|
20
|
+
const MAX_CACHE_BYTES = 10_000_000; // 10MB total budget
|
|
21
|
+
let totalCacheBytes = 0;
|
|
19
22
|
const readCache = new Map();
|
|
20
23
|
/** Clear the session-level read cache. Call on session reset. */
|
|
21
24
|
export function clearReadCache() {
|
|
22
25
|
readCache.clear();
|
|
26
|
+
totalCacheBytes = 0;
|
|
23
27
|
}
|
|
24
28
|
/** Invalidate a specific path from the cache (call on write/edit). */
|
|
25
29
|
function invalidateCache(path) {
|
|
26
|
-
readCache.
|
|
30
|
+
const existing = readCache.get(path);
|
|
31
|
+
if (existing) {
|
|
32
|
+
totalCacheBytes -= existing.content.length;
|
|
33
|
+
readCache.delete(path);
|
|
34
|
+
}
|
|
27
35
|
}
|
|
28
|
-
/** LRU eviction: remove oldest
|
|
36
|
+
/** LRU eviction: remove oldest entries when cache exceeds count or byte budget. */
|
|
29
37
|
function evictIfNeeded() {
|
|
30
|
-
|
|
31
|
-
// Map insertion order = LRU order; delete the first (oldest) entry
|
|
38
|
+
while (readCache.size >= READ_CACHE_MAX || totalCacheBytes > MAX_CACHE_BYTES) {
|
|
32
39
|
const oldest = readCache.keys().next().value;
|
|
33
|
-
if (oldest)
|
|
34
|
-
|
|
40
|
+
if (!oldest)
|
|
41
|
+
break;
|
|
42
|
+
const entry = readCache.get(oldest);
|
|
43
|
+
if (entry)
|
|
44
|
+
totalCacheBytes -= entry.content.length;
|
|
45
|
+
readCache.delete(oldest);
|
|
35
46
|
}
|
|
36
47
|
}
|
|
37
48
|
// ============================================================================
|
|
@@ -151,14 +162,17 @@ export async function readFile(input) {
|
|
|
151
162
|
}
|
|
152
163
|
const content = readFileSync(path, "utf-8");
|
|
153
164
|
const lines = content.split("\n");
|
|
154
|
-
// Cache the read
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
165
|
+
// Cache the read — skip caching files larger than MAX_ENTRY_SIZE
|
|
166
|
+
if (content.length <= MAX_ENTRY_SIZE) {
|
|
167
|
+
try {
|
|
168
|
+
const stat = statSync(path);
|
|
169
|
+
evictIfNeeded();
|
|
170
|
+
readCache.set(path, { content, mtimeMs: stat.mtimeMs, size: stat.size });
|
|
171
|
+
totalCacheBytes += content.length;
|
|
172
|
+
}
|
|
173
|
+
catch {
|
|
174
|
+
// stat failed — skip caching
|
|
175
|
+
}
|
|
162
176
|
}
|
|
163
177
|
return formatTextFileResult(lines, input, content);
|
|
164
178
|
}
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
// Used for non-Anthropic providers (OpenAI, Gemini) that lack native
|
|
4
4
|
// server-side compaction. Calls Haiku to produce a transparent summary
|
|
5
5
|
// using the same instructions as compact_20260112.
|
|
6
|
+
import { djb2Hash } from "../../shared/agent-core.js";
|
|
6
7
|
const COMPACTION_MODEL = "claude-haiku-4-5-20251001";
|
|
7
8
|
const COMPACTION_MAX_TOKENS = 4096;
|
|
8
9
|
const COMPACTION_TIMEOUT_MS = 30_000;
|
|
@@ -63,7 +64,7 @@ export function preCompact(messages) {
|
|
|
63
64
|
const rc = typeof block.content === "string" ? block.content : JSON.stringify(block.content);
|
|
64
65
|
if (rc.length < 200)
|
|
65
66
|
continue; // Skip small results — not worth deduping
|
|
66
|
-
const hashKey =
|
|
67
|
+
const hashKey = djb2Hash(rc);
|
|
67
68
|
const existing = contentHashes.get(hashKey);
|
|
68
69
|
if (existing) {
|
|
69
70
|
existing.count++;
|
|
@@ -84,7 +85,7 @@ export function preCompact(messages) {
|
|
|
84
85
|
const rc = typeof block.content === "string" ? block.content : JSON.stringify(block.content);
|
|
85
86
|
if (rc.length < 200)
|
|
86
87
|
return block;
|
|
87
|
-
const hashKey =
|
|
88
|
+
const hashKey = djb2Hash(rc);
|
|
88
89
|
const entry = contentHashes.get(hashKey);
|
|
89
90
|
if (entry && entry.count >= 2 && i !== entry.firstIdx) {
|
|
90
91
|
bytesRemoved += rc.length - 50;
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
* Consolidates: streaming, prompt caching, context management betas, compaction,
|
|
9
9
|
* loop detection, parallel tool execution, subagent delegation, retry, cost tracking.
|
|
10
10
|
*/
|
|
11
|
-
import { LoopDetector, getContextManagement, getMaxOutputTokens, getThinkingConfig, addPromptCaching, estimateCostUsd, isRetryableError, sanitizeError, routeModel, resolveToolChoice, COMPACTION_TRIGGER_TOKENS, COMPACTION_TOTAL_BUDGET, DEFAULT_SESSION_COST_BUDGET_USD, } from "../../shared/agent-core.js";
|
|
11
|
+
import { LoopDetector, getContextManagement, getMaxOutputTokens, getThinkingConfig, addPromptCaching, estimateCostUsd, isRetryableError, sanitizeError, routeModel, resolveToolChoice, emitCostWarningIfNeeded, demoteSubagentModel, COMPACTION_TRIGGER_TOKENS, COMPACTION_TOTAL_BUDGET, DEFAULT_SESSION_COST_BUDGET_USD, } from "../../shared/agent-core.js";
|
|
12
12
|
import { processStreamWithCallbacks } from "../../shared/sse-parser.js";
|
|
13
13
|
import { MODELS } from "../../shared/constants.js";
|
|
14
14
|
import { dispatchTools, buildAssistantContent } from "../../shared/tool-dispatch.js";
|
|
@@ -50,7 +50,10 @@ function mapToolChoiceForAnthropic(tc) {
|
|
|
50
50
|
// UNIFIED AGENT LOOP
|
|
51
51
|
// ============================================================================
|
|
52
52
|
export async function runServerAgentLoop(opts) {
|
|
53
|
-
const { anthropic, model, systemPrompt, messages, tools: inputTools, maxTurns, temperature, enableDelegation = true, enablePromptCaching = true, enableStreaming = true, maxConcurrentTools = DEFAULT_MAX_CONCURRENT_TOOLS, maxCostUsd
|
|
53
|
+
const { anthropic, model, systemPrompt, messages, tools: inputTools, maxTurns, temperature, enableDelegation = true, enablePromptCaching = true, enableStreaming = true, maxConcurrentTools = DEFAULT_MAX_CONCURRENT_TOOLS, maxCostUsd: maxCostUsdOpt, onText, onToolStart, onCitation, documents, clientDisconnected = { value: false }, startedAt = Date.now(), maxDurationMs = 15 * 60 * 1000, } = opts;
|
|
54
|
+
// Resolve cost budget: explicit opt > env var > default
|
|
55
|
+
const envBudget = parseFloat(process.env.WHALE_COST_BUDGET_USD || "");
|
|
56
|
+
const maxCostUsd = maxCostUsdOpt ?? (isFinite(envBudget) ? envBudget : DEFAULT_SESSION_COST_BUDGET_USD);
|
|
54
57
|
// Auto-inject delegate_task for all models (subagents always use Claude Haiku/Sonnet)
|
|
55
58
|
// activeTools is mutable — discover_tools adds to it during the session
|
|
56
59
|
const activeTools = [...inputTools];
|
|
@@ -262,14 +265,7 @@ export async function runServerAgentLoop(opts) {
|
|
|
262
265
|
// Update cost (include cache tokens for accurate pricing)
|
|
263
266
|
sessionCostUsd = estimateCostUsd(totalIn, totalOut, model, 0, cacheReadTokens, cacheCreationTokens);
|
|
264
267
|
// Graduated cost warnings — give the LLM visibility into spend
|
|
265
|
-
|
|
266
|
-
for (const pct of [25, 50, 75]) {
|
|
267
|
-
if (!costWarningsEmitted.has(pct) && sessionCostUsd >= maxCostUsd * (pct / 100)) {
|
|
268
|
-
costWarningsEmitted.add(pct);
|
|
269
|
-
onText?.(`\n[Cost warning: ${pct}% of budget used ($${sessionCostUsd.toFixed(2)}/$${maxCostUsd.toFixed(2)}). ${pct >= 75 ? "Wrap up soon." : ""}]`);
|
|
270
|
-
}
|
|
271
|
-
}
|
|
272
|
-
}
|
|
268
|
+
emitCostWarningIfNeeded(sessionCostUsd, maxCostUsd, costWarningsEmitted, onText);
|
|
273
269
|
// Record per-turn metrics for observability
|
|
274
270
|
const turnToolNames = toolUseBlocks.map(b => b.name);
|
|
275
271
|
turnMetrics.push({
|
|
@@ -349,14 +345,7 @@ export async function runServerAgentLoop(opts) {
|
|
|
349
345
|
totalOut += subagentTokens.output;
|
|
350
346
|
sessionCostUsd = estimateCostUsd(totalIn, totalOut, model, 0, cacheReadTokens, cacheCreationTokens) + subagentTokens.costUsd;
|
|
351
347
|
// Cost warnings after subagent aggregation (subagents can be expensive)
|
|
352
|
-
|
|
353
|
-
for (const pct of [25, 50, 75]) {
|
|
354
|
-
if (!costWarningsEmitted.has(pct) && sessionCostUsd >= maxCostUsd * (pct / 100)) {
|
|
355
|
-
costWarningsEmitted.add(pct);
|
|
356
|
-
onText?.(`\n[Cost warning: ${pct}% of budget used ($${sessionCostUsd.toFixed(2)}/$${maxCostUsd.toFixed(2)}). ${pct >= 75 ? "Wrap up soon." : ""}]`);
|
|
357
|
-
}
|
|
358
|
-
}
|
|
359
|
-
}
|
|
348
|
+
emitCostWarningIfNeeded(sessionCostUsd, maxCostUsd, costWarningsEmitted, onText);
|
|
360
349
|
const assistantContent = buildAssistantContent({ text: currentText, toolUseBlocks, compactionContent });
|
|
361
350
|
messages.push({ role: "assistant", content: assistantContent });
|
|
362
351
|
messages.push({ role: "user", content: toolResults });
|
|
@@ -453,14 +442,7 @@ export async function runServerAgentLoop(opts) {
|
|
|
453
442
|
}
|
|
454
443
|
sessionCostUsd = estimateCostUsd(totalIn, totalOut, model, 0, cacheReadTokens, cacheCreationTokens);
|
|
455
444
|
// Graduated cost warnings (non-streaming path)
|
|
456
|
-
|
|
457
|
-
for (const pct of [25, 50, 75]) {
|
|
458
|
-
if (!costWarningsEmitted.has(pct) && sessionCostUsd >= maxCostUsd * (pct / 100)) {
|
|
459
|
-
costWarningsEmitted.add(pct);
|
|
460
|
-
onText?.(`\n[Cost warning: ${pct}% of budget used ($${sessionCostUsd.toFixed(2)}/$${maxCostUsd.toFixed(2)}). ${pct >= 75 ? "Wrap up soon." : ""}]`);
|
|
461
|
-
}
|
|
462
|
-
}
|
|
463
|
-
}
|
|
445
|
+
emitCostWarningIfNeeded(sessionCostUsd, maxCostUsd, costWarningsEmitted, onText);
|
|
464
446
|
// Record per-turn metrics (non-streaming)
|
|
465
447
|
const nsTurnToolNames = toolUseBlocks.map(b => b.name);
|
|
466
448
|
turnMetrics.push({
|
|
@@ -525,14 +507,7 @@ export async function runServerAgentLoop(opts) {
|
|
|
525
507
|
totalOut += nonStreamSubTokens.output;
|
|
526
508
|
sessionCostUsd = estimateCostUsd(totalIn, totalOut, model, 0, cacheReadTokens, cacheCreationTokens) + nonStreamSubTokens.costUsd;
|
|
527
509
|
// Cost warnings after subagent aggregation (non-streaming)
|
|
528
|
-
|
|
529
|
-
for (const pct of [25, 50, 75]) {
|
|
530
|
-
if (!costWarningsEmitted.has(pct) && sessionCostUsd >= maxCostUsd * (pct / 100)) {
|
|
531
|
-
costWarningsEmitted.add(pct);
|
|
532
|
-
onText?.(`\n[Cost warning: ${pct}% of budget used ($${sessionCostUsd.toFixed(2)}/$${maxCostUsd.toFixed(2)}). ${pct >= 75 ? "Wrap up soon." : ""}]`);
|
|
533
|
-
}
|
|
534
|
-
}
|
|
535
|
-
}
|
|
510
|
+
emitCostWarningIfNeeded(sessionCostUsd, maxCostUsd, costWarningsEmitted, onText);
|
|
536
511
|
const assistantContent = buildAssistantContent({ text: currentText, toolUseBlocks });
|
|
537
512
|
messages.push({ role: "assistant", content: assistantContent });
|
|
538
513
|
messages.push({ role: "user", content: toolResults });
|
|
@@ -572,9 +547,7 @@ function makeToolExecutor(opts, tools, allToolNames, subagentTokens, discoveredT
|
|
|
572
547
|
// Subagent delegation — demote models to control cost (sub-agents should never run Opus)
|
|
573
548
|
if (name === "delegate_task") {
|
|
574
549
|
const subPrompt = String(input.prompt || "");
|
|
575
|
-
const
|
|
576
|
-
const subModel = (subModelInput === "opus" ? "sonnet" :
|
|
577
|
-
subModelInput === "sonnet" ? "haiku" : "haiku");
|
|
550
|
+
const subModel = demoteSubagentModel(input.model ? String(input.model) : undefined);
|
|
578
551
|
const subMaxTurns = Math.min(Math.max(1, Number(input.max_turns) || 6), 12);
|
|
579
552
|
const subTools = tools.filter((t) => t.name !== "delegate_task");
|
|
580
553
|
const subId = `sub-${Date.now().toString(36)}`;
|
|
@@ -26,8 +26,8 @@ export const DELEGATE_TASK_TOOL_DEF = {
|
|
|
26
26
|
},
|
|
27
27
|
model: {
|
|
28
28
|
type: "string",
|
|
29
|
-
enum: ["haiku", "sonnet"
|
|
30
|
-
description: "haiku (fast/$1) for simple lookups. sonnet ($3) for multi-step chains.
|
|
29
|
+
enum: ["haiku", "sonnet"],
|
|
30
|
+
description: "haiku (fast/$1) for simple lookups. sonnet ($3) for multi-step chains. Default: haiku.",
|
|
31
31
|
},
|
|
32
32
|
max_turns: {
|
|
33
33
|
type: "number",
|
|
@@ -88,6 +88,8 @@ export declare function addPromptCaching(tools: Array<Record<string, unknown>>,
|
|
|
88
88
|
tools: Array<Record<string, unknown>>;
|
|
89
89
|
messages: Array<Record<string, unknown>>;
|
|
90
90
|
};
|
|
91
|
+
/** djb2 string hash — fast, deterministic, no dependencies */
|
|
92
|
+
export declare function djb2Hash(str: string): string;
|
|
91
93
|
export declare class LoopDetector {
|
|
92
94
|
private history;
|
|
93
95
|
private consecutiveErrors;
|
|
@@ -110,6 +112,11 @@ export declare class LoopDetector {
|
|
|
110
112
|
* are tracked per-action so e.g. voice/speak failing won't block voice/music_compose. */
|
|
111
113
|
private errorKey;
|
|
112
114
|
recordCall(name: string, input: Record<string, unknown>): LoopCheckResult;
|
|
115
|
+
/**
|
|
116
|
+
* Track file read frequency — call when the tool is known to be a file read.
|
|
117
|
+
* Blocks re-reading the same path more than FILE_READ_LIMIT times per session.
|
|
118
|
+
*/
|
|
119
|
+
trackRead(path: string): LoopCheckResult;
|
|
113
120
|
recordResult(name: string, success: boolean, input?: Record<string, unknown>): void;
|
|
114
121
|
endTurn(): BailCheckResult;
|
|
115
122
|
resetTurn(): void;
|
|
@@ -139,6 +146,11 @@ export declare const MODEL_PRICING: Record<string, {
|
|
|
139
146
|
outputPer1M: number;
|
|
140
147
|
thinkingPer1M?: number;
|
|
141
148
|
}>;
|
|
149
|
+
/**
|
|
150
|
+
* Emit graduated cost warnings at 25%, 50%, 75% thresholds.
|
|
151
|
+
* Single source of truth — replaces copy-pasted blocks in server + CLI.
|
|
152
|
+
*/
|
|
153
|
+
export declare function emitCostWarningIfNeeded(sessionCostUsd: number, maxCostUsd: number, costWarningsEmitted: Set<number>, onText?: (text: string) => void): void;
|
|
142
154
|
export declare function estimateCostUsd(inputTokens: number, outputTokens: number, model: string, thinkingTokens?: number, cacheReadTokens?: number, cacheCreationTokens?: number): number;
|
|
143
155
|
/**
|
|
144
156
|
* Route to cheaper model when the task is simple enough.
|
|
@@ -157,4 +169,12 @@ export declare function truncateToolResult(content: string, maxChars: number): s
|
|
|
157
169
|
export declare function getMaxToolResultChars(contextConfig?: {
|
|
158
170
|
max_tool_result_chars?: number;
|
|
159
171
|
} | null): number;
|
|
172
|
+
/**
|
|
173
|
+
* Demote subagent model requests — single source of truth for server + CLI.
|
|
174
|
+
* - explore/research: always haiku
|
|
175
|
+
* - opus: demoted to sonnet
|
|
176
|
+
* - sonnet: kept for plan, demoted to haiku for others
|
|
177
|
+
* - default/undefined: haiku
|
|
178
|
+
*/
|
|
179
|
+
export declare function demoteSubagentModel(requested: string | undefined, agentType?: string): "haiku" | "sonnet";
|
|
160
180
|
export declare function sanitizeError(err: unknown): string;
|
|
@@ -195,7 +195,7 @@ export function addPromptCaching(tools, messages) {
|
|
|
195
195
|
// LOOP DETECTION
|
|
196
196
|
// ============================================================================
|
|
197
197
|
/** djb2 string hash — fast, deterministic, no dependencies */
|
|
198
|
-
function djb2Hash(str) {
|
|
198
|
+
export function djb2Hash(str) {
|
|
199
199
|
let hash = 5381;
|
|
200
200
|
for (let i = 0; i < str.length; i++) {
|
|
201
201
|
hash = ((hash << 5) + hash + str.charCodeAt(i)) & 0xffffffff;
|
|
@@ -230,18 +230,6 @@ export class LoopDetector {
|
|
|
230
230
|
recordCall(name, input) {
|
|
231
231
|
const inputHash = djb2Hash(JSON.stringify({ name, ...input }));
|
|
232
232
|
const eKey = this.errorKey(name, input);
|
|
233
|
-
// File read frequency tracking — block re-reading the same file >3 times per session
|
|
234
|
-
if (name === "read_file" && typeof input.path === "string") {
|
|
235
|
-
const filePath = input.path;
|
|
236
|
-
const readCount = (this.fileReadCounts.get(filePath) || 0) + 1;
|
|
237
|
-
this.fileReadCounts.set(filePath, readCount);
|
|
238
|
-
if (readCount > LoopDetector.FILE_READ_LIMIT) {
|
|
239
|
-
return {
|
|
240
|
-
blocked: true,
|
|
241
|
-
reason: `File "${filePath}" already read ${readCount - 1} times this session. Use the content from a previous read instead of re-reading.`,
|
|
242
|
-
};
|
|
243
|
-
}
|
|
244
|
-
}
|
|
245
233
|
if (this.failedStrategies.has(inputHash)) {
|
|
246
234
|
return {
|
|
247
235
|
blocked: true,
|
|
@@ -282,6 +270,21 @@ export class LoopDetector {
|
|
|
282
270
|
}
|
|
283
271
|
return { blocked: false };
|
|
284
272
|
}
|
|
273
|
+
/**
|
|
274
|
+
* Track file read frequency — call when the tool is known to be a file read.
|
|
275
|
+
* Blocks re-reading the same path more than FILE_READ_LIMIT times per session.
|
|
276
|
+
*/
|
|
277
|
+
trackRead(path) {
|
|
278
|
+
const readCount = (this.fileReadCounts.get(path) || 0) + 1;
|
|
279
|
+
this.fileReadCounts.set(path, readCount);
|
|
280
|
+
if (readCount > LoopDetector.FILE_READ_LIMIT) {
|
|
281
|
+
return {
|
|
282
|
+
blocked: true,
|
|
283
|
+
reason: `File "${path}" already read ${readCount - 1} times this session. Use the content from a previous read instead of re-reading.`,
|
|
284
|
+
};
|
|
285
|
+
}
|
|
286
|
+
return { blocked: false };
|
|
287
|
+
}
|
|
285
288
|
recordResult(name, success, input) {
|
|
286
289
|
const eKey = this.errorKey(name, input);
|
|
287
290
|
if (success) {
|
|
@@ -445,6 +448,20 @@ export const MODEL_PRICING = {
|
|
|
445
448
|
"o3-mini": { inputPer1M: 1.10, outputPer1M: 4.40, thinkingPer1M: 4.40 },
|
|
446
449
|
"o4-mini": { inputPer1M: 1.10, outputPer1M: 4.40, thinkingPer1M: 4.40 },
|
|
447
450
|
};
|
|
451
|
+
/**
|
|
452
|
+
* Emit graduated cost warnings at 25%, 50%, 75% thresholds.
|
|
453
|
+
* Single source of truth — replaces copy-pasted blocks in server + CLI.
|
|
454
|
+
*/
|
|
455
|
+
export function emitCostWarningIfNeeded(sessionCostUsd, maxCostUsd, costWarningsEmitted, onText) {
|
|
456
|
+
if (!isFinite(maxCostUsd) || !onText)
|
|
457
|
+
return;
|
|
458
|
+
for (const pct of [25, 50, 75]) {
|
|
459
|
+
if (!costWarningsEmitted.has(pct) && sessionCostUsd >= maxCostUsd * (pct / 100)) {
|
|
460
|
+
costWarningsEmitted.add(pct);
|
|
461
|
+
onText(`\n[Cost warning: ${pct}% of budget used ($${sessionCostUsd.toFixed(2)}/$${maxCostUsd.toFixed(2)}).${pct >= 75 ? " Wrap up soon." : ""}]`);
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
}
|
|
448
465
|
export function estimateCostUsd(inputTokens, outputTokens, model, thinkingTokens = 0, cacheReadTokens = 0, cacheCreationTokens = 0) {
|
|
449
466
|
// Exact match first, then find a pricing key that is a prefix of the model ID
|
|
450
467
|
const pricing = MODEL_PRICING[model]
|
|
@@ -547,6 +564,24 @@ export function getMaxToolResultChars(contextConfig) {
|
|
|
547
564
|
// ============================================================================
|
|
548
565
|
// UTILITY — sanitize errors (strip API keys, passwords)
|
|
549
566
|
// ============================================================================
|
|
567
|
+
/**
|
|
568
|
+
* Demote subagent model requests — single source of truth for server + CLI.
|
|
569
|
+
* - explore/research: always haiku
|
|
570
|
+
* - opus: demoted to sonnet
|
|
571
|
+
* - sonnet: kept for plan, demoted to haiku for others
|
|
572
|
+
* - default/undefined: haiku
|
|
573
|
+
*/
|
|
574
|
+
export function demoteSubagentModel(requested, agentType) {
|
|
575
|
+
if (agentType === "explore" || agentType === "research")
|
|
576
|
+
return "haiku";
|
|
577
|
+
if (!requested)
|
|
578
|
+
return "haiku";
|
|
579
|
+
if (requested === "opus")
|
|
580
|
+
return "sonnet";
|
|
581
|
+
if (requested === "sonnet")
|
|
582
|
+
return agentType === "plan" ? "sonnet" : "haiku";
|
|
583
|
+
return "haiku";
|
|
584
|
+
}
|
|
550
585
|
export function sanitizeError(err) {
|
|
551
586
|
const msg = String(err);
|
|
552
587
|
return msg
|
|
@@ -45,6 +45,20 @@ export async function dispatchTools(toolCalls, executor, opts) {
|
|
|
45
45
|
});
|
|
46
46
|
return;
|
|
47
47
|
}
|
|
48
|
+
// File read frequency tracking — tool-dispatch knows about read_file,
|
|
49
|
+
// LoopDetector.trackRead() stays tool-name-agnostic.
|
|
50
|
+
if (tu.name === "read_file" && typeof tu.input.path === "string") {
|
|
51
|
+
const readCheck = loopDetector.trackRead(tu.input.path);
|
|
52
|
+
if (readCheck.blocked) {
|
|
53
|
+
onResult?.(tu.name, false, readCheck.reason, 0);
|
|
54
|
+
resultMap.set(tu.id, {
|
|
55
|
+
type: "tool_result",
|
|
56
|
+
tool_use_id: tu.id,
|
|
57
|
+
content: JSON.stringify({ error: readCheck.reason }),
|
|
58
|
+
});
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
48
62
|
onStart?.(tu.name, tu.input);
|
|
49
63
|
const toolStart = Date.now();
|
|
50
64
|
let result;
|
package/dist/updater.js
CHANGED
|
@@ -15,7 +15,7 @@ import https from "https";
|
|
|
15
15
|
// ============================================================================
|
|
16
16
|
// CONFIG
|
|
17
17
|
// ============================================================================
|
|
18
|
-
const PACKAGE_NAME = "
|
|
18
|
+
const PACKAGE_NAME = "whale-code";
|
|
19
19
|
const CHECK_INTERVAL_MS = 4 * 60 * 60 * 1000; // 4 hours
|
|
20
20
|
const REGISTRY_URL = `https://registry.npmjs.org/${PACKAGE_NAME}/latest`;
|
|
21
21
|
// ============================================================================
|