whale-code 6.5.0 → 6.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,7 @@
12
12
  * whale mcp list|add|remove Manage MCP servers
13
13
  * whale doctor Run diagnostics
14
14
  * whale config [key] [value] View/set configuration
15
+ * whale update Update to latest version
15
16
  * whale help Show this help
16
17
  * (non-TTY stdin) MCP stdio server for Claude Code / Cursor
17
18
  */
@@ -169,6 +170,24 @@ if (flags.version) {
169
170
  process.exit(0);
170
171
  }
171
172
 
173
+ // ── Background update check (non-blocking, TTY only) ──
174
+ const PACKAGE_NAME = "whale-code";
175
+ async function checkForUpdate() {
176
+ try {
177
+ const { execSync } = await import("child_process");
178
+ const latest = execSync(`npm view ${PACKAGE_NAME} version 2>/dev/null`, { encoding: "utf-8", timeout: 5000 }).trim();
179
+ if (latest && latest !== PKG_VERSION) {
180
+ const d = "\x1b[2m", r = "\x1b[0m", c = "\x1b[36m", B = "\x1b[1m";
181
+ console.log(`\n${d} Update available: ${r}${c}v${PKG_VERSION}${r}${d} → ${r}${c}${B}v${latest}${r}`);
182
+ console.log(`${d} Run ${r}${B}whale update${r}${d} or ${r}${B}npm i -g ${PACKAGE_NAME}@latest${r}\n`);
183
+ }
184
+ } catch { /* silent — network or npm not available */ }
185
+ }
186
+ if (process.stdin.isTTY && !flags.print && !flags.version && !flags.help) {
187
+ // Fire and forget — don't block startup
188
+ checkForUpdate();
189
+ }
190
+
172
191
  // ── Help ──
173
192
  if (flags.help && !subcommand) {
174
193
  showHelp();
@@ -312,6 +331,31 @@ switch (command) {
312
331
  break;
313
332
  }
314
333
 
334
+ case "update":
335
+ case "upgrade": {
336
+ const { execSync } = await import("child_process");
337
+ const d = "\x1b[2m", r = "\x1b[0m", c = "\x1b[36m", g = "\x1b[32m", B = "\x1b[1m";
338
+
339
+ console.log(`${d} Checking for updates...${r}`);
340
+ try {
341
+ const latest = execSync(`npm view ${PACKAGE_NAME} version 2>/dev/null`, { encoding: "utf-8", timeout: 10000 }).trim();
342
+ if (latest === PKG_VERSION) {
343
+ console.log(`${g} ✓${r} whale-code v${PKG_VERSION} is already up to date`);
344
+ process.exit(0);
345
+ }
346
+ console.log(`${d} Updating ${r}v${PKG_VERSION}${d} → ${r}${B}v${latest}${r}`);
347
+ console.log();
348
+ execSync(`npm install -g ${PACKAGE_NAME}@latest`, { stdio: "inherit" });
349
+ console.log();
350
+ console.log(`${g} ✓${r} Updated to whale-code v${latest}`);
351
+ } catch (err) {
352
+ console.error(` ✗ Update failed: ${err.message}`);
353
+ console.error(` Try: sudo npm install -g ${PACKAGE_NAME}@latest`);
354
+ process.exit(1);
355
+ }
356
+ break;
357
+ }
358
+
315
359
  case "agent": {
316
360
  // Forward remaining args to local-agent CLI
317
361
  // Rebuild process.argv so the agent sees: [node, script, subcommand, ...flags]
@@ -26,7 +26,7 @@ import { captureError, addBreadcrumb } from "./error-logger.js";
26
26
  import { setGlobalEmitter, clearGlobalEmitter, } from "./agent-events.js";
27
27
  import { mcpClientManager } from "./mcp-client.js";
28
28
  import { loadHooks, runBeforeToolHook, runAfterToolHook, runSessionHook } from "./hooks.js";
29
- import { LoopDetector, COMPACTION_TRIGGER_TOKENS, COMPACTION_TOTAL_BUDGET, getCompactionConfig } from "../../shared/agent-core.js";
29
+ import { LoopDetector, COMPACTION_TRIGGER_TOKENS, COMPACTION_TOTAL_BUDGET, getCompactionConfig, DEFAULT_SESSION_COST_BUDGET_USD, emitCostWarningIfNeeded } from "../../shared/agent-core.js";
30
30
  import { parseSSEStream, processStreamWithCallbacks, collectStreamResult } from "../../shared/sse-parser.js";
31
31
  import { callServerProxy, callTranscribe, buildAPIRequest, buildSystemBlocks, prepareWithCaching, trimGeminiContext, trimOpenAIContext, requestProviderCompaction } from "../../shared/api-client.js";
32
32
  import { getProvider, MODELS } from "../../shared/constants.js";
@@ -308,6 +308,7 @@ export async function runAgentLoop(opts) {
308
308
  let sessionCostUsd = 0;
309
309
  let compactionCount = 0;
310
310
  const costWarningsEmitted = new Set();
311
+ const effectiveBudget = opts.maxBudgetUsd ?? DEFAULT_SESSION_COST_BUDGET_USD;
311
312
  const activeModel = getModel();
312
313
  // Tool executor — routes to interactive, local, server, or MCP tools.
313
314
  // Wraps execution with before/after hooks when hooks are loaded.
@@ -384,16 +385,16 @@ export async function runAgentLoop(opts) {
384
385
  callbacks.onError("Cancelled", messages);
385
386
  return;
386
387
  }
387
- // Budget enforcement
388
- if (opts.maxBudgetUsd && sessionCostUsd >= opts.maxBudgetUsd) {
389
- logSpan({ action: "chat.budget_exceeded", durationMs: Date.now() - sessionStart, context: turnCtx, storeId: storeId || undefined, severity: "warn", details: { session_cost_usd: sessionCostUsd, max_budget_usd: opts.maxBudgetUsd, iteration } });
390
- callbacks.onError(`Budget exceeded: $${sessionCostUsd.toFixed(4)} >= $${opts.maxBudgetUsd}`, messages);
388
+ // Budget enforcement — always enforced (defaults to DEFAULT_SESSION_COST_BUDGET_USD)
389
+ if (sessionCostUsd >= effectiveBudget) {
390
+ logSpan({ action: "chat.budget_exceeded", durationMs: Date.now() - sessionStart, context: turnCtx, storeId: storeId || undefined, severity: "warn", details: { session_cost_usd: sessionCostUsd, max_budget_usd: effectiveBudget, iteration } });
391
+ callbacks.onError(`Budget exceeded: $${sessionCostUsd.toFixed(4)} >= $${effectiveBudget}`, messages);
391
392
  return;
392
393
  }
393
394
  const apiStart = Date.now();
394
395
  const apiSpanId = generateSpanId();
395
396
  const apiRowId = crypto.randomUUID(); // UUID for this span's row — children reference via parent_id
396
- const costContext = `Session cost: $${sessionCostUsd.toFixed(2)}${opts.maxBudgetUsd ? ` | Budget remaining: $${(opts.maxBudgetUsd - sessionCostUsd).toFixed(2)}` : ""}`;
397
+ const costContext = `Session cost: $${sessionCostUsd.toFixed(2)} | Budget remaining: $${(effectiveBudget - sessionCostUsd).toFixed(2)}`;
397
398
  // Build API request config
398
399
  const currentModel = getModel();
399
400
  const apiConfig = buildAPIRequest({
@@ -516,20 +517,12 @@ export async function runAgentLoop(opts) {
516
517
  totalThinking += result.thinkingTokens;
517
518
  sessionCostUsd += estimateCostUsd(result.usage.inputTokens, result.usage.outputTokens, currentModel, result.thinkingTokens, result.usage.cacheReadTokens, result.usage.cacheCreationTokens);
518
519
  // Graduated cost warnings
519
- if (opts.maxBudgetUsd) {
520
- for (const pct of [25, 50, 75]) {
521
- if (!costWarningsEmitted.has(pct) && sessionCostUsd >= opts.maxBudgetUsd * (pct / 100)) {
522
- costWarningsEmitted.add(pct);
523
- const warnMsg = `\n[Cost warning: ${pct}% of budget used ($${sessionCostUsd.toFixed(2)}/$${opts.maxBudgetUsd.toFixed(2)}). ${pct >= 75 ? "Wrap up soon." : ""}]`;
524
- if (emitter) {
525
- emitter.emitText(warnMsg);
526
- }
527
- else {
528
- callbacks.onText(warnMsg);
529
- }
530
- }
531
- }
520
+ emitCostWarningIfNeeded(sessionCostUsd, effectiveBudget, costWarningsEmitted, (text) => { if (emitter) {
521
+ emitter.emitText(text);
532
522
  }
523
+ else {
524
+ callbacks.onText(text);
525
+ } });
533
526
  // Server-side context management notification
534
527
  if (result.contextManagementApplied) {
535
528
  callbacks.onAutoCompact?.(messages.length, messages.length, 0);
@@ -11,7 +11,7 @@ import { readFileSync, existsSync, writeFileSync, mkdirSync, appendFileSync } fr
11
11
  import { join } from "path";
12
12
  import { homedir, tmpdir } from "os";
13
13
  import { LOCAL_TOOL_DEFINITIONS, } from "./local-tools.js";
14
- import { LoopDetector, estimateCostUsd } from "../../shared/agent-core.js";
14
+ import { LoopDetector, estimateCostUsd, demoteSubagentModel } from "../../shared/agent-core.js";
15
15
  import { MODEL_MAP } from "../../shared/constants.js";
16
16
  import { loadServerToolDefinitions, } from "./server-tools.js";
17
17
  import { logSpan, generateSpanId, generateTraceId } from "./telemetry.js";
@@ -283,20 +283,9 @@ function emitSubagentProgress(agentType, agentId, message, turn, toolName) {
283
283
  export async function runSubagent(options) {
284
284
  const { prompt, subagent_type, model, resume, max_turns, name, parentContext, parentTraceContext } = options;
285
285
  const agentId = resume || generateAgentId();
286
- // Sub-agents default to Haiku for cost control, with model demotion:
287
- // - explore/research: always Haiku regardless of request
288
- // - general-purpose/plan: allow Sonnet max, demote Opus to Sonnet
289
- const demoteModel = (m) => {
290
- const isLightType = subagent_type === "explore" || subagent_type === "research";
291
- if (isLightType)
292
- return MODEL_MAP["haiku"];
293
- if (!m)
294
- return MODEL_MAP["haiku"];
295
- if (m === "opus")
296
- return MODEL_MAP["sonnet"];
297
- return MODEL_MAP[m] || MODEL_MAP["haiku"];
298
- };
299
- const modelId = demoteModel(model);
286
+ // Sub-agents default to Haiku for cost control uses shared demotion logic
287
+ const demotedAlias = demoteSubagentModel(model, subagent_type);
288
+ const modelId = MODEL_MAP[demotedAlias] || MODEL_MAP["haiku"];
300
289
  const cwd = process.cwd();
301
290
  const systemPrompt = buildAgentPrompt(subagent_type, cwd);
302
291
  const startTime = Date.now();
@@ -646,8 +635,8 @@ Each agent completes autonomously and returns results.`,
646
635
  },
647
636
  model: {
648
637
  type: "string",
649
- enum: ["sonnet", "opus", "haiku"],
650
- description: "Optional model override. If not specified, inherits from parent. Prefer haiku for quick, straightforward tasks to minimize cost and latency.",
638
+ enum: ["sonnet", "haiku"],
639
+ description: "Optional model. haiku (default) for fast tasks, sonnet for complex reasoning.",
651
640
  },
652
641
  resume: {
653
642
  type: "string",
@@ -16,22 +16,33 @@ export function resolvePath(p) {
16
16
  return p;
17
17
  }
18
18
  const READ_CACHE_MAX = 100;
19
+ const MAX_ENTRY_SIZE = 100_000; // 100KB — skip caching larger files
20
+ const MAX_CACHE_BYTES = 10_000_000; // 10MB total budget
21
+ let totalCacheBytes = 0;
19
22
  const readCache = new Map();
20
23
  /** Clear the session-level read cache. Call on session reset. */
21
24
  export function clearReadCache() {
22
25
  readCache.clear();
26
+ totalCacheBytes = 0;
23
27
  }
24
28
  /** Invalidate a specific path from the cache (call on write/edit). */
25
29
  function invalidateCache(path) {
26
- readCache.delete(path);
30
+ const existing = readCache.get(path);
31
+ if (existing) {
32
+ totalCacheBytes -= existing.content.length;
33
+ readCache.delete(path);
34
+ }
27
35
  }
28
- /** LRU eviction: remove oldest entry when cache is full. */
36
+ /** LRU eviction: remove oldest entries when cache exceeds count or byte budget. */
29
37
  function evictIfNeeded() {
30
- if (readCache.size >= READ_CACHE_MAX) {
31
- // Map insertion order = LRU order; delete the first (oldest) entry
38
+ while (readCache.size >= READ_CACHE_MAX || totalCacheBytes > MAX_CACHE_BYTES) {
32
39
  const oldest = readCache.keys().next().value;
33
- if (oldest)
34
- readCache.delete(oldest);
40
+ if (!oldest)
41
+ break;
42
+ const entry = readCache.get(oldest);
43
+ if (entry)
44
+ totalCacheBytes -= entry.content.length;
45
+ readCache.delete(oldest);
35
46
  }
36
47
  }
37
48
  // ============================================================================
@@ -151,14 +162,17 @@ export async function readFile(input) {
151
162
  }
152
163
  const content = readFileSync(path, "utf-8");
153
164
  const lines = content.split("\n");
154
- // Cache the read
155
- try {
156
- const stat = statSync(path);
157
- evictIfNeeded();
158
- readCache.set(path, { content, mtimeMs: stat.mtimeMs, size: stat.size });
159
- }
160
- catch {
161
- // stat failed — skip caching
165
+ // Cache the read — skip caching files larger than MAX_ENTRY_SIZE
166
+ if (content.length <= MAX_ENTRY_SIZE) {
167
+ try {
168
+ const stat = statSync(path);
169
+ evictIfNeeded();
170
+ readCache.set(path, { content, mtimeMs: stat.mtimeMs, size: stat.size });
171
+ totalCacheBytes += content.length;
172
+ }
173
+ catch {
174
+ // stat failed — skip caching
175
+ }
162
176
  }
163
177
  return formatTextFileResult(lines, input, content);
164
178
  }
@@ -3,6 +3,7 @@
3
3
  // Used for non-Anthropic providers (OpenAI, Gemini) that lack native
4
4
  // server-side compaction. Calls Haiku to produce a transparent summary
5
5
  // using the same instructions as compact_20260112.
6
+ import { djb2Hash } from "../../shared/agent-core.js";
6
7
  const COMPACTION_MODEL = "claude-haiku-4-5-20251001";
7
8
  const COMPACTION_MAX_TOKENS = 4096;
8
9
  const COMPACTION_TIMEOUT_MS = 30_000;
@@ -63,7 +64,7 @@ export function preCompact(messages) {
63
64
  const rc = typeof block.content === "string" ? block.content : JSON.stringify(block.content);
64
65
  if (rc.length < 200)
65
66
  continue; // Skip small results — not worth deduping
66
- const hashKey = `${rc.length}:${rc.slice(0, 200)}`;
67
+ const hashKey = djb2Hash(rc);
67
68
  const existing = contentHashes.get(hashKey);
68
69
  if (existing) {
69
70
  existing.count++;
@@ -84,7 +85,7 @@ export function preCompact(messages) {
84
85
  const rc = typeof block.content === "string" ? block.content : JSON.stringify(block.content);
85
86
  if (rc.length < 200)
86
87
  return block;
87
- const hashKey = `${rc.length}:${rc.slice(0, 200)}`;
88
+ const hashKey = djb2Hash(rc);
88
89
  const entry = contentHashes.get(hashKey);
89
90
  if (entry && entry.count >= 2 && i !== entry.firstIdx) {
90
91
  bytesRemoved += rc.length - 50;
@@ -8,7 +8,7 @@
8
8
  * Consolidates: streaming, prompt caching, context management betas, compaction,
9
9
  * loop detection, parallel tool execution, subagent delegation, retry, cost tracking.
10
10
  */
11
- import { LoopDetector, getContextManagement, getMaxOutputTokens, getThinkingConfig, addPromptCaching, estimateCostUsd, isRetryableError, sanitizeError, routeModel, resolveToolChoice, COMPACTION_TRIGGER_TOKENS, COMPACTION_TOTAL_BUDGET, DEFAULT_SESSION_COST_BUDGET_USD, } from "../../shared/agent-core.js";
11
+ import { LoopDetector, getContextManagement, getMaxOutputTokens, getThinkingConfig, addPromptCaching, estimateCostUsd, isRetryableError, sanitizeError, routeModel, resolveToolChoice, emitCostWarningIfNeeded, demoteSubagentModel, COMPACTION_TRIGGER_TOKENS, COMPACTION_TOTAL_BUDGET, DEFAULT_SESSION_COST_BUDGET_USD, } from "../../shared/agent-core.js";
12
12
  import { processStreamWithCallbacks } from "../../shared/sse-parser.js";
13
13
  import { MODELS } from "../../shared/constants.js";
14
14
  import { dispatchTools, buildAssistantContent } from "../../shared/tool-dispatch.js";
@@ -50,7 +50,10 @@ function mapToolChoiceForAnthropic(tc) {
50
50
  // UNIFIED AGENT LOOP
51
51
  // ============================================================================
52
52
  export async function runServerAgentLoop(opts) {
53
- const { anthropic, model, systemPrompt, messages, tools: inputTools, maxTurns, temperature, enableDelegation = true, enablePromptCaching = true, enableStreaming = true, maxConcurrentTools = DEFAULT_MAX_CONCURRENT_TOOLS, maxCostUsd = DEFAULT_SESSION_COST_BUDGET_USD, onText, onToolStart, onCitation, documents, clientDisconnected = { value: false }, startedAt = Date.now(), maxDurationMs = 15 * 60 * 1000, } = opts;
53
+ const { anthropic, model, systemPrompt, messages, tools: inputTools, maxTurns, temperature, enableDelegation = true, enablePromptCaching = true, enableStreaming = true, maxConcurrentTools = DEFAULT_MAX_CONCURRENT_TOOLS, maxCostUsd: maxCostUsdOpt, onText, onToolStart, onCitation, documents, clientDisconnected = { value: false }, startedAt = Date.now(), maxDurationMs = 15 * 60 * 1000, } = opts;
54
+ // Resolve cost budget: explicit opt > env var > default
55
+ const envBudget = parseFloat(process.env.WHALE_COST_BUDGET_USD || "");
56
+ const maxCostUsd = maxCostUsdOpt ?? (isFinite(envBudget) ? envBudget : DEFAULT_SESSION_COST_BUDGET_USD);
54
57
  // Auto-inject delegate_task for all models (subagents always use Claude Haiku/Sonnet)
55
58
  // activeTools is mutable — discover_tools adds to it during the session
56
59
  const activeTools = [...inputTools];
@@ -262,14 +265,7 @@ export async function runServerAgentLoop(opts) {
262
265
  // Update cost (include cache tokens for accurate pricing)
263
266
  sessionCostUsd = estimateCostUsd(totalIn, totalOut, model, 0, cacheReadTokens, cacheCreationTokens);
264
267
  // Graduated cost warnings — give the LLM visibility into spend
265
- if (isFinite(maxCostUsd)) {
266
- for (const pct of [25, 50, 75]) {
267
- if (!costWarningsEmitted.has(pct) && sessionCostUsd >= maxCostUsd * (pct / 100)) {
268
- costWarningsEmitted.add(pct);
269
- onText?.(`\n[Cost warning: ${pct}% of budget used ($${sessionCostUsd.toFixed(2)}/$${maxCostUsd.toFixed(2)}). ${pct >= 75 ? "Wrap up soon." : ""}]`);
270
- }
271
- }
272
- }
268
+ emitCostWarningIfNeeded(sessionCostUsd, maxCostUsd, costWarningsEmitted, onText);
273
269
  // Record per-turn metrics for observability
274
270
  const turnToolNames = toolUseBlocks.map(b => b.name);
275
271
  turnMetrics.push({
@@ -349,14 +345,7 @@ export async function runServerAgentLoop(opts) {
349
345
  totalOut += subagentTokens.output;
350
346
  sessionCostUsd = estimateCostUsd(totalIn, totalOut, model, 0, cacheReadTokens, cacheCreationTokens) + subagentTokens.costUsd;
351
347
  // Cost warnings after subagent aggregation (subagents can be expensive)
352
- if (isFinite(maxCostUsd)) {
353
- for (const pct of [25, 50, 75]) {
354
- if (!costWarningsEmitted.has(pct) && sessionCostUsd >= maxCostUsd * (pct / 100)) {
355
- costWarningsEmitted.add(pct);
356
- onText?.(`\n[Cost warning: ${pct}% of budget used ($${sessionCostUsd.toFixed(2)}/$${maxCostUsd.toFixed(2)}). ${pct >= 75 ? "Wrap up soon." : ""}]`);
357
- }
358
- }
359
- }
348
+ emitCostWarningIfNeeded(sessionCostUsd, maxCostUsd, costWarningsEmitted, onText);
360
349
  const assistantContent = buildAssistantContent({ text: currentText, toolUseBlocks, compactionContent });
361
350
  messages.push({ role: "assistant", content: assistantContent });
362
351
  messages.push({ role: "user", content: toolResults });
@@ -453,14 +442,7 @@ export async function runServerAgentLoop(opts) {
453
442
  }
454
443
  sessionCostUsd = estimateCostUsd(totalIn, totalOut, model, 0, cacheReadTokens, cacheCreationTokens);
455
444
  // Graduated cost warnings (non-streaming path)
456
- if (isFinite(maxCostUsd)) {
457
- for (const pct of [25, 50, 75]) {
458
- if (!costWarningsEmitted.has(pct) && sessionCostUsd >= maxCostUsd * (pct / 100)) {
459
- costWarningsEmitted.add(pct);
460
- onText?.(`\n[Cost warning: ${pct}% of budget used ($${sessionCostUsd.toFixed(2)}/$${maxCostUsd.toFixed(2)}). ${pct >= 75 ? "Wrap up soon." : ""}]`);
461
- }
462
- }
463
- }
445
+ emitCostWarningIfNeeded(sessionCostUsd, maxCostUsd, costWarningsEmitted, onText);
464
446
  // Record per-turn metrics (non-streaming)
465
447
  const nsTurnToolNames = toolUseBlocks.map(b => b.name);
466
448
  turnMetrics.push({
@@ -525,14 +507,7 @@ export async function runServerAgentLoop(opts) {
525
507
  totalOut += nonStreamSubTokens.output;
526
508
  sessionCostUsd = estimateCostUsd(totalIn, totalOut, model, 0, cacheReadTokens, cacheCreationTokens) + nonStreamSubTokens.costUsd;
527
509
  // Cost warnings after subagent aggregation (non-streaming)
528
- if (isFinite(maxCostUsd)) {
529
- for (const pct of [25, 50, 75]) {
530
- if (!costWarningsEmitted.has(pct) && sessionCostUsd >= maxCostUsd * (pct / 100)) {
531
- costWarningsEmitted.add(pct);
532
- onText?.(`\n[Cost warning: ${pct}% of budget used ($${sessionCostUsd.toFixed(2)}/$${maxCostUsd.toFixed(2)}). ${pct >= 75 ? "Wrap up soon." : ""}]`);
533
- }
534
- }
535
- }
510
+ emitCostWarningIfNeeded(sessionCostUsd, maxCostUsd, costWarningsEmitted, onText);
536
511
  const assistantContent = buildAssistantContent({ text: currentText, toolUseBlocks });
537
512
  messages.push({ role: "assistant", content: assistantContent });
538
513
  messages.push({ role: "user", content: toolResults });
@@ -572,9 +547,7 @@ function makeToolExecutor(opts, tools, allToolNames, subagentTokens, discoveredT
572
547
  // Subagent delegation — demote models to control cost (sub-agents should never run Opus)
573
548
  if (name === "delegate_task") {
574
549
  const subPrompt = String(input.prompt || "");
575
- const subModelInput = String(input.model || "haiku");
576
- const subModel = (subModelInput === "opus" ? "sonnet" :
577
- subModelInput === "sonnet" ? "haiku" : "haiku");
550
+ const subModel = demoteSubagentModel(input.model ? String(input.model) : undefined);
578
551
  const subMaxTurns = Math.min(Math.max(1, Number(input.max_turns) || 6), 12);
579
552
  const subTools = tools.filter((t) => t.name !== "delegate_task");
580
553
  const subId = `sub-${Date.now().toString(36)}`;
@@ -26,8 +26,8 @@ export const DELEGATE_TASK_TOOL_DEF = {
26
26
  },
27
27
  model: {
28
28
  type: "string",
29
- enum: ["haiku", "sonnet", "opus"],
30
- description: "haiku (fast/$1) for simple lookups. sonnet ($3) for multi-step chains. opus ($5) for complex reasoning. Default: haiku.",
29
+ enum: ["haiku", "sonnet"],
30
+ description: "haiku (fast/$1) for simple lookups. sonnet ($3) for multi-step chains. Default: haiku.",
31
31
  },
32
32
  max_turns: {
33
33
  type: "number",
@@ -88,6 +88,8 @@ export declare function addPromptCaching(tools: Array<Record<string, unknown>>,
88
88
  tools: Array<Record<string, unknown>>;
89
89
  messages: Array<Record<string, unknown>>;
90
90
  };
91
+ /** djb2 string hash — fast, deterministic, no dependencies */
92
+ export declare function djb2Hash(str: string): string;
91
93
  export declare class LoopDetector {
92
94
  private history;
93
95
  private consecutiveErrors;
@@ -110,6 +112,11 @@ export declare class LoopDetector {
110
112
  * are tracked per-action so e.g. voice/speak failing won't block voice/music_compose. */
111
113
  private errorKey;
112
114
  recordCall(name: string, input: Record<string, unknown>): LoopCheckResult;
115
+ /**
116
+ * Track file read frequency — call when the tool is known to be a file read.
117
+ * Blocks re-reading the same path more than FILE_READ_LIMIT times per session.
118
+ */
119
+ trackRead(path: string): LoopCheckResult;
113
120
  recordResult(name: string, success: boolean, input?: Record<string, unknown>): void;
114
121
  endTurn(): BailCheckResult;
115
122
  resetTurn(): void;
@@ -139,6 +146,11 @@ export declare const MODEL_PRICING: Record<string, {
139
146
  outputPer1M: number;
140
147
  thinkingPer1M?: number;
141
148
  }>;
149
+ /**
150
+ * Emit graduated cost warnings at 25%, 50%, 75% thresholds.
151
+ * Single source of truth — replaces copy-pasted blocks in server + CLI.
152
+ */
153
+ export declare function emitCostWarningIfNeeded(sessionCostUsd: number, maxCostUsd: number, costWarningsEmitted: Set<number>, onText?: (text: string) => void): void;
142
154
  export declare function estimateCostUsd(inputTokens: number, outputTokens: number, model: string, thinkingTokens?: number, cacheReadTokens?: number, cacheCreationTokens?: number): number;
143
155
  /**
144
156
  * Route to cheaper model when the task is simple enough.
@@ -157,4 +169,12 @@ export declare function truncateToolResult(content: string, maxChars: number): s
157
169
  export declare function getMaxToolResultChars(contextConfig?: {
158
170
  max_tool_result_chars?: number;
159
171
  } | null): number;
172
+ /**
173
+ * Demote subagent model requests — single source of truth for server + CLI.
174
+ * - explore/research: always haiku
175
+ * - opus: demoted to sonnet
176
+ * - sonnet: kept for plan, demoted to haiku for others
177
+ * - default/undefined: haiku
178
+ */
179
+ export declare function demoteSubagentModel(requested: string | undefined, agentType?: string): "haiku" | "sonnet";
160
180
  export declare function sanitizeError(err: unknown): string;
@@ -195,7 +195,7 @@ export function addPromptCaching(tools, messages) {
195
195
  // LOOP DETECTION
196
196
  // ============================================================================
197
197
  /** djb2 string hash — fast, deterministic, no dependencies */
198
- function djb2Hash(str) {
198
+ export function djb2Hash(str) {
199
199
  let hash = 5381;
200
200
  for (let i = 0; i < str.length; i++) {
201
201
  hash = ((hash << 5) + hash + str.charCodeAt(i)) & 0xffffffff;
@@ -230,18 +230,6 @@ export class LoopDetector {
230
230
  recordCall(name, input) {
231
231
  const inputHash = djb2Hash(JSON.stringify({ name, ...input }));
232
232
  const eKey = this.errorKey(name, input);
233
- // File read frequency tracking — block re-reading the same file >3 times per session
234
- if (name === "read_file" && typeof input.path === "string") {
235
- const filePath = input.path;
236
- const readCount = (this.fileReadCounts.get(filePath) || 0) + 1;
237
- this.fileReadCounts.set(filePath, readCount);
238
- if (readCount > LoopDetector.FILE_READ_LIMIT) {
239
- return {
240
- blocked: true,
241
- reason: `File "${filePath}" already read ${readCount - 1} times this session. Use the content from a previous read instead of re-reading.`,
242
- };
243
- }
244
- }
245
233
  if (this.failedStrategies.has(inputHash)) {
246
234
  return {
247
235
  blocked: true,
@@ -282,6 +270,21 @@ export class LoopDetector {
282
270
  }
283
271
  return { blocked: false };
284
272
  }
273
+ /**
274
+ * Track file read frequency — call when the tool is known to be a file read.
275
+ * Blocks re-reading the same path more than FILE_READ_LIMIT times per session.
276
+ */
277
+ trackRead(path) {
278
+ const readCount = (this.fileReadCounts.get(path) || 0) + 1;
279
+ this.fileReadCounts.set(path, readCount);
280
+ if (readCount > LoopDetector.FILE_READ_LIMIT) {
281
+ return {
282
+ blocked: true,
283
+ reason: `File "${path}" already read ${readCount - 1} times this session. Use the content from a previous read instead of re-reading.`,
284
+ };
285
+ }
286
+ return { blocked: false };
287
+ }
285
288
  recordResult(name, success, input) {
286
289
  const eKey = this.errorKey(name, input);
287
290
  if (success) {
@@ -445,6 +448,20 @@ export const MODEL_PRICING = {
445
448
  "o3-mini": { inputPer1M: 1.10, outputPer1M: 4.40, thinkingPer1M: 4.40 },
446
449
  "o4-mini": { inputPer1M: 1.10, outputPer1M: 4.40, thinkingPer1M: 4.40 },
447
450
  };
451
+ /**
452
+ * Emit graduated cost warnings at 25%, 50%, 75% thresholds.
453
+ * Single source of truth — replaces copy-pasted blocks in server + CLI.
454
+ */
455
+ export function emitCostWarningIfNeeded(sessionCostUsd, maxCostUsd, costWarningsEmitted, onText) {
456
+ if (!isFinite(maxCostUsd) || !onText)
457
+ return;
458
+ for (const pct of [25, 50, 75]) {
459
+ if (!costWarningsEmitted.has(pct) && sessionCostUsd >= maxCostUsd * (pct / 100)) {
460
+ costWarningsEmitted.add(pct);
461
+ onText(`\n[Cost warning: ${pct}% of budget used ($${sessionCostUsd.toFixed(2)}/$${maxCostUsd.toFixed(2)}).${pct >= 75 ? " Wrap up soon." : ""}]`);
462
+ }
463
+ }
464
+ }
448
465
  export function estimateCostUsd(inputTokens, outputTokens, model, thinkingTokens = 0, cacheReadTokens = 0, cacheCreationTokens = 0) {
449
466
  // Exact match first, then find a pricing key that is a prefix of the model ID
450
467
  const pricing = MODEL_PRICING[model]
@@ -547,6 +564,24 @@ export function getMaxToolResultChars(contextConfig) {
547
564
  // ============================================================================
548
565
  // UTILITY — sanitize errors (strip API keys, passwords)
549
566
  // ============================================================================
567
+ /**
568
+ * Demote subagent model requests — single source of truth for server + CLI.
569
+ * - explore/research: always haiku
570
+ * - opus: demoted to sonnet
571
+ * - sonnet: kept for plan, demoted to haiku for others
572
+ * - default/undefined: haiku
573
+ */
574
+ export function demoteSubagentModel(requested, agentType) {
575
+ if (agentType === "explore" || agentType === "research")
576
+ return "haiku";
577
+ if (!requested)
578
+ return "haiku";
579
+ if (requested === "opus")
580
+ return "sonnet";
581
+ if (requested === "sonnet")
582
+ return agentType === "plan" ? "sonnet" : "haiku";
583
+ return "haiku";
584
+ }
550
585
  export function sanitizeError(err) {
551
586
  const msg = String(err);
552
587
  return msg
@@ -45,6 +45,20 @@ export async function dispatchTools(toolCalls, executor, opts) {
45
45
  });
46
46
  return;
47
47
  }
48
+ // File read frequency tracking — tool-dispatch knows about read_file,
49
+ // LoopDetector.trackRead() stays tool-name-agnostic.
50
+ if (tu.name === "read_file" && typeof tu.input.path === "string") {
51
+ const readCheck = loopDetector.trackRead(tu.input.path);
52
+ if (readCheck.blocked) {
53
+ onResult?.(tu.name, false, readCheck.reason, 0);
54
+ resultMap.set(tu.id, {
55
+ type: "tool_result",
56
+ tool_use_id: tu.id,
57
+ content: JSON.stringify({ error: readCheck.reason }),
58
+ });
59
+ return;
60
+ }
61
+ }
48
62
  onStart?.(tu.name, tu.input);
49
63
  const toolStart = Date.now();
50
64
  let result;
package/dist/updater.js CHANGED
@@ -15,7 +15,7 @@ import https from "https";
15
15
  // ============================================================================
16
16
  // CONFIG
17
17
  // ============================================================================
18
- const PACKAGE_NAME = "swagmanager-mcp";
18
+ const PACKAGE_NAME = "whale-code";
19
19
  const CHECK_INTERVAL_MS = 4 * 60 * 60 * 1000; // 4 hours
20
20
  const REGISTRY_URL = `https://registry.npmjs.org/${PACKAGE_NAME}/latest`;
21
21
  // ============================================================================
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "whale-code",
3
- "version": "6.5.0",
3
+ "version": "6.5.2",
4
4
  "description": "whale code — local-first AI agent CLI for inventory, orders, and analytics powered by MCP",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",