cascade-ai 0.5.1 → 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -4,21 +4,22 @@ import { glob } from 'glob';
4
4
  import Anthropic from '@anthropic-ai/sdk';
5
5
  import OpenAI, { AzureOpenAI } from 'openai';
6
6
  import { GoogleGenAI, HarmBlockThreshold, HarmCategory } from '@google/genai';
7
- import axios2 from 'axios';
8
- import fs3 from 'fs/promises';
9
- import path16 from 'path';
7
+ import fs4 from 'fs/promises';
8
+ import path18 from 'path';
9
+ import os4 from 'os';
10
10
  import * as ignoreFactory from 'ignore';
11
11
  import ignoreFactory__default from 'ignore';
12
12
  import { exec, execFile, execSync } from 'child_process';
13
13
  import { promisify } from 'util';
14
- import fs15 from 'fs';
14
+ import fs17 from 'fs';
15
15
  import { simpleGit } from 'simple-git';
16
16
  import PDFDocument from 'pdfkit';
17
+ import dns from 'dns/promises';
18
+ import net from 'net';
17
19
  import { Client } from '@modelcontextprotocol/sdk/client/index.js';
18
20
  import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
19
21
  import { z } from 'zod';
20
- import os3 from 'os';
21
- import { createContext, runInContext } from 'vm';
22
+ import { Worker } from 'worker_threads';
22
23
  import Database from 'better-sqlite3';
23
24
  import { createServer } from 'http';
24
25
  import { fileURLToPath } from 'url';
@@ -70,13 +71,13 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
70
71
  var keytar_default;
71
72
  var init_keytar = __esm({
72
73
  "node_modules/keytar/build/Release/keytar.node"() {
73
- keytar_default = "./keytar-F4YAPN53.node";
74
+ keytar_default = "./keytar-VMICNFEJ.node";
74
75
  }
75
76
  });
76
77
 
77
- // node-file:F:\Softwares\Github Softwares\Cascade-AI\node_modules\keytar\build\Release\keytar.node
78
+ // node-file:/home/runner/work/Cascade-AI/Cascade-AI/node_modules/keytar/build/Release/keytar.node
78
79
  var require_keytar = __commonJS({
79
- "node-file:F:\\Softwares\\Github Softwares\\Cascade-AI\\node_modules\\keytar\\build\\Release\\keytar.node"(exports$1, module) {
80
+ "node-file:/home/runner/work/Cascade-AI/Cascade-AI/node_modules/keytar/build/Release/keytar.node"(exports, module) {
80
81
  init_keytar();
81
82
  try {
82
83
  module.exports = __require(keytar_default);
@@ -87,7 +88,7 @@ var require_keytar = __commonJS({
87
88
 
88
89
  // node_modules/keytar/lib/keytar.js
89
90
  var require_keytar2 = __commonJS({
90
- "node_modules/keytar/lib/keytar.js"(exports$1, module) {
91
+ "node_modules/keytar/lib/keytar.js"(exports, module) {
91
92
  var keytar = require_keytar();
92
93
  function checkRequired(val, name) {
93
94
  if (!val || val.length <= 0) {
@@ -124,7 +125,7 @@ var require_keytar2 = __commonJS({
124
125
  });
125
126
 
126
127
  // src/constants.ts
127
- var CASCADE_VERSION = "0.5.1";
128
+ var CASCADE_VERSION = "0.9.6";
128
129
  var CASCADE_CONFIG_DIR = ".cascade";
129
130
  var CASCADE_MD_FILE = "CASCADE.md";
130
131
  var CASCADE_IGNORE_FILE = ".cascadeignore";
@@ -292,7 +293,7 @@ var MODELS = {
292
293
  isLocal: false
293
294
  },
294
295
  "gemini-2.5-pro": {
295
- id: "gemini-2.5-pro-preview-05-06",
296
+ id: "gemini-2.5-pro",
296
297
  name: "Gemini 2.5 Pro",
297
298
  provider: "gemini",
298
299
  contextWindow: 1e6,
@@ -304,7 +305,7 @@ var MODELS = {
304
305
  isLocal: false
305
306
  },
306
307
  "gemini-2.5-flash": {
307
- id: "gemini-2.5-flash-preview-04-17",
308
+ id: "gemini-2.5-flash",
308
309
  name: "Gemini 2.5 Flash",
309
310
  provider: "gemini",
310
311
  contextWindow: 1e6,
@@ -369,6 +370,9 @@ var MODELS = {
369
370
  minSizeB: 7
370
371
  }
371
372
  };
373
+ for (const _m of Object.values(MODELS)) {
374
+ if (_m.supportsToolUse === void 0) _m.supportsToolUse = !_m.isLocal;
375
+ }
372
376
  var T1_MODEL_PRIORITY = [
373
377
  "claude-opus-4",
374
378
  "claude-sonnet-4",
@@ -428,12 +432,15 @@ var TOOL_NAMES = {
428
432
  PDF_CREATE: "pdf_create",
429
433
  RUN_CODE: "run_code",
430
434
  PEER_MESSAGE: "peer_message",
431
- WEB_SEARCH: "web_search"
435
+ WEB_SEARCH: "web_search",
436
+ REQUEST_WORKERS: "request_workers"
432
437
  };
433
438
  var DEFAULT_APPROVAL_REQUIRED = [
434
439
  TOOL_NAMES.SHELL,
435
440
  TOOL_NAMES.FILE_DELETE,
436
441
  TOOL_NAMES.FILE_WRITE,
442
+ TOOL_NAMES.FILE_EDIT,
443
+ TOOL_NAMES.GIT,
437
444
  TOOL_NAMES.BROWSER,
438
445
  TOOL_NAMES.GITHUB,
439
446
  "pdf_create",
@@ -480,9 +487,16 @@ var AnthropicProvider = class extends BaseProvider {
480
487
  client;
481
488
  constructor(config, model) {
482
489
  super(config, model);
483
- this.client = new Anthropic({
484
- apiKey: config.apiKey
485
- });
490
+ if (config.authToken) {
491
+ this.client = new Anthropic({
492
+ authToken: config.authToken,
493
+ defaultHeaders: { "anthropic-beta": "oauth-2025-04-20" }
494
+ });
495
+ } else {
496
+ this.client = new Anthropic({
497
+ apiKey: config.apiKey
498
+ });
499
+ }
486
500
  }
487
501
  async generate(options) {
488
502
  const chunks = [];
@@ -505,7 +519,7 @@ var AnthropicProvider = class extends BaseProvider {
505
519
  system: options.systemPrompt,
506
520
  messages,
507
521
  tools: tools?.length ? tools : void 0
508
- });
522
+ }, { signal: options.signal });
509
523
  let isThinking = false;
510
524
  for await (const event of stream) {
511
525
  if (event.type === "content_block_delta") {
@@ -592,7 +606,7 @@ var AnthropicProvider = class extends BaseProvider {
592
606
  }
593
607
  async isAvailable() {
594
608
  try {
595
- return !!this.config.apiKey;
609
+ return !!(this.config.apiKey || this.config.authToken);
596
610
  } catch {
597
611
  return false;
598
612
  }
@@ -693,7 +707,7 @@ var OpenAIProvider = class extends BaseProvider {
693
707
  };
694
708
  let stream;
695
709
  try {
696
- stream = await this.client.chat.completions.create(params);
710
+ stream = await this.client.chat.completions.create(params, { signal: options.signal });
697
711
  } catch (err) {
698
712
  if (err.message && err.message.includes("max_completion_tokens")) {
699
713
  const fallbackParams = { ...params };
@@ -702,7 +716,7 @@ var OpenAIProvider = class extends BaseProvider {
702
716
  if (this.model.id.includes("o1") || this.model.id.includes("o3")) {
703
717
  fallbackParams.temperature = 1;
704
718
  }
705
- stream = await this.client.chat.completions.create(fallbackParams);
719
+ stream = await this.client.chat.completions.create(fallbackParams, { signal: options.signal });
706
720
  } else {
707
721
  throw err;
708
722
  }
@@ -916,7 +930,8 @@ var GeminiProvider = class extends BaseProvider {
916
930
  { category: HarmCategory.HARM_CATEGORY_HARASSMENT, threshold: HarmBlockThreshold.BLOCK_NONE },
917
931
  { category: HarmCategory.HARM_CATEGORY_HATE_SPEECH, threshold: HarmBlockThreshold.BLOCK_NONE }
918
932
  ],
919
- tools: options.tools?.length ? [{ functionDeclarations: options.tools.map(this.convertTool) }] : void 0
933
+ tools: options.tools?.length ? [{ functionDeclarations: options.tools.map(this.convertTool) }] : void 0,
934
+ abortSignal: options.signal
920
935
  }
921
936
  });
922
937
  let fullContent = "";
@@ -1118,6 +1133,8 @@ var GeminiProvider = class extends BaseProvider {
1118
1133
  };
1119
1134
  }
1120
1135
  };
1136
+
1137
+ // src/providers/ollama.ts
1121
1138
  var TOOL_CAPABLE_FAMILIES = [
1122
1139
  "llama3.1",
1123
1140
  "llama3.2",
@@ -1154,9 +1171,10 @@ var OllamaProvider = class extends BaseProvider {
1154
1171
  parameters: t.inputSchema
1155
1172
  }
1156
1173
  }));
1157
- const response = await axios2.post(
1158
- `${this.baseUrl}/api/chat`,
1159
- {
1174
+ const response = await fetch(`${this.baseUrl}/api/chat`, {
1175
+ method: "POST",
1176
+ headers: { "Content-Type": "application/json" },
1177
+ body: JSON.stringify({
1160
1178
  model: this.model.id,
1161
1179
  messages,
1162
1180
  stream: true,
@@ -1165,61 +1183,43 @@ var OllamaProvider = class extends BaseProvider {
1165
1183
  num_predict: options.maxTokens ?? this.model.maxOutputTokens,
1166
1184
  temperature: options.temperature ?? 0.7
1167
1185
  }
1168
- },
1169
- { responseType: "stream" }
1170
- );
1186
+ }),
1187
+ signal: options.signal
1188
+ });
1189
+ if (!response.ok || !response.body) {
1190
+ throw new Error(`Ollama chat request failed: ${response.status} ${response.statusText}`);
1191
+ }
1171
1192
  let fullContent = "";
1172
1193
  let inputTokens = 0;
1173
1194
  let outputTokens = 0;
1174
1195
  const pendingToolCalls = [];
1175
- await new Promise((resolve, reject) => {
1176
- let buffer = "";
1177
- response.data.on("data", (chunk) => {
1178
- buffer += chunk.toString();
1179
- const lines = buffer.split("\n");
1180
- buffer = lines.pop() ?? "";
1181
- for (const line of lines) {
1182
- if (!line.trim()) continue;
1183
- try {
1184
- const parsed = JSON.parse(line);
1185
- if (parsed.message?.content) {
1186
- fullContent += parsed.message.content;
1187
- onChunk({ text: parsed.message.content, finishReason: null });
1188
- }
1189
- if (parsed.message?.tool_calls?.length) {
1190
- pendingToolCalls.push(...parsed.message.tool_calls);
1191
- }
1192
- if (parsed.done) {
1193
- inputTokens = parsed.prompt_eval_count ?? 0;
1194
- outputTokens = parsed.eval_count ?? 0;
1195
- }
1196
- } catch {
1197
- }
1196
+ const handleLine = (line) => {
1197
+ if (!line.trim()) return;
1198
+ try {
1199
+ const parsed = JSON.parse(line);
1200
+ if (parsed.message?.content) {
1201
+ fullContent += parsed.message.content;
1202
+ onChunk({ text: parsed.message.content, finishReason: null });
1198
1203
  }
1199
- });
1200
- response.data.on("end", () => {
1201
- const tail = buffer.trim();
1202
- if (tail) {
1203
- try {
1204
- const parsed = JSON.parse(tail);
1205
- if (parsed.message?.content) {
1206
- fullContent += parsed.message.content;
1207
- onChunk({ text: parsed.message.content, finishReason: null });
1208
- }
1209
- if (parsed.message?.tool_calls?.length) {
1210
- pendingToolCalls.push(...parsed.message.tool_calls);
1211
- }
1212
- if (parsed.done) {
1213
- inputTokens = parsed.prompt_eval_count ?? inputTokens;
1214
- outputTokens = parsed.eval_count ?? outputTokens;
1215
- }
1216
- } catch {
1217
- }
1204
+ if (parsed.message?.tool_calls?.length) {
1205
+ pendingToolCalls.push(...parsed.message.tool_calls);
1218
1206
  }
1219
- resolve();
1220
- });
1221
- response.data.on("error", reject);
1222
- });
1207
+ if (parsed.done) {
1208
+ inputTokens = parsed.prompt_eval_count ?? inputTokens;
1209
+ outputTokens = parsed.eval_count ?? outputTokens;
1210
+ }
1211
+ } catch {
1212
+ }
1213
+ };
1214
+ let buffer = "";
1215
+ const decoder = new TextDecoder();
1216
+ for await (const chunk of response.body) {
1217
+ buffer += decoder.decode(chunk, { stream: true });
1218
+ const lines = buffer.split("\n");
1219
+ buffer = lines.pop() ?? "";
1220
+ for (const line of lines) handleLine(line);
1221
+ }
1222
+ handleLine(buffer);
1223
1223
  const toolCalls = pendingToolCalls.map((tc, i) => {
1224
1224
  let input;
1225
1225
  if (typeof tc.function.arguments === "string") {
@@ -1251,9 +1251,11 @@ var OllamaProvider = class extends BaseProvider {
1251
1251
  }
1252
1252
  async listModels() {
1253
1253
  try {
1254
- const response = await axios2.get(`${this.baseUrl}/api/tags`);
1254
+ const response = await fetch(`${this.baseUrl}/api/tags`);
1255
+ if (!response.ok) return [];
1256
+ const data = await response.json();
1255
1257
  const supportedKeywords = ["llama3", "llama2", "gemma", "mistral", "mixtral", "qwen", "phi3", "codellama", "deepseek", "llava", "starcoder", "stable-code", "nomic-embed"];
1256
- return response.data.models.filter((m) => {
1258
+ return data.models.filter((m) => {
1257
1259
  const name = m.name.toLowerCase();
1258
1260
  return supportedKeywords.some((k) => name.includes(k));
1259
1261
  }).map((m) => ({
@@ -1275,11 +1277,15 @@ var OllamaProvider = class extends BaseProvider {
1275
1277
  }
1276
1278
  }
1277
1279
  async isAvailable() {
1280
+ const ac = new AbortController();
1281
+ const timer = setTimeout(() => ac.abort(), 2e3);
1278
1282
  try {
1279
- await axios2.get(`${this.baseUrl}/api/tags`, { timeout: 2e3 });
1280
- return true;
1283
+ const response = await fetch(`${this.baseUrl}/api/tags`, { signal: ac.signal });
1284
+ return response.ok;
1281
1285
  } catch {
1282
1286
  return false;
1287
+ } finally {
1288
+ clearTimeout(timer);
1283
1289
  }
1284
1290
  }
1285
1291
  convertMessages(messages, systemPrompt) {
@@ -1382,6 +1388,19 @@ var ModelSelector = class {
1382
1388
  addDynamicModel(model) {
1383
1389
  this.availableModels.set(model.id, model);
1384
1390
  }
1391
+ /**
1392
+ * Permanently drop a model from the available set for this session. Used by
1393
+ * the router's 404 / "model not found" self-heal so a dead id is never
1394
+ * selected again after it fails once.
1395
+ */
1396
+ removeModel(id) {
1397
+ this.availableModels.delete(id);
1398
+ }
1399
+ /** Look up an available model by exact id (post-discovery/pricing lookups). */
1400
+ getModelById(id) {
1401
+ const m = this.availableModels.get(id);
1402
+ return m && this.availableProviders.has(m.provider) ? m : void 0;
1403
+ }
1385
1404
  getAvailableModelsForProvider(provider) {
1386
1405
  const models = /* @__PURE__ */ new Map();
1387
1406
  for (const model of this.availableModels.values()) {
@@ -1398,6 +1417,7 @@ var ModelSelector = class {
1398
1417
  model = this.resolveDynamicModel(overrideModelId);
1399
1418
  }
1400
1419
  if (model && this.availableProviders.has(model.provider)) return model;
1420
+ return null;
1401
1421
  }
1402
1422
  if (requireVision) {
1403
1423
  return this.selectVisionModel();
@@ -1460,6 +1480,14 @@ var ModelSelector = class {
1460
1480
  candidates.push(model);
1461
1481
  }
1462
1482
  }
1483
+ const localOnly = this.availableProviders.size > 0 && Array.from(this.availableProviders).every((p) => p === "ollama");
1484
+ if (localOnly) {
1485
+ for (const model of this.availableModels.values()) {
1486
+ if (model.isLocal && this.availableProviders.has(model.provider) && !candidates.some((c) => c.id === model.id)) {
1487
+ candidates.push(model);
1488
+ }
1489
+ }
1490
+ }
1463
1491
  return candidates;
1464
1492
  }
1465
1493
  isProviderAvailable(provider) {
@@ -1864,6 +1892,267 @@ var ModelProfiler = class {
1864
1892
  }
1865
1893
  };
1866
1894
 
1895
+ // src/core/router/savings.ts
1896
+ var NO_SAVINGS = { savedUsd: 0, savedPct: 0, counterfactualUsd: 0 };
1897
+ function computeDelegationSavings(stats, t1Model) {
1898
+ if (!t1Model) return NO_SAVINGS;
1899
+ let counterfactualUsd = 0;
1900
+ const tiers = /* @__PURE__ */ new Set([
1901
+ ...Object.keys(stats.inputTokensByTier),
1902
+ ...Object.keys(stats.outputTokensByTier)
1903
+ ]);
1904
+ for (const tier of tiers) {
1905
+ counterfactualUsd += calculateCost(
1906
+ stats.inputTokensByTier[tier] ?? 0,
1907
+ stats.outputTokensByTier[tier] ?? 0,
1908
+ t1Model
1909
+ );
1910
+ }
1911
+ const savedUsd = counterfactualUsd - stats.totalCostUsd;
1912
+ if (!(savedUsd > 0) || counterfactualUsd <= 0) {
1913
+ return { ...NO_SAVINGS, counterfactualUsd: Math.max(0, counterfactualUsd) };
1914
+ }
1915
+ return {
1916
+ savedUsd,
1917
+ savedPct: Math.round(savedUsd / counterfactualUsd * 1e3) / 10,
1918
+ counterfactualUsd
1919
+ };
1920
+ }
1921
+ var DEFAULT_SNAPSHOT_URL = "https://raw.githubusercontent.com/Varun-SV/Cascade-AI/main/src/core/router/benchmark-data.json";
1922
+ var OPENROUTER_MODELS_URL = "https://openrouter.ai/api/v1/models";
1923
+ var FETCH_TIMEOUT_MS = 8e3;
1924
+ var DEFAULT_CACHE_FILE = path18.join(os4.homedir(), GLOBAL_CONFIG_DIR, "benchmarks-cache.json");
1925
+ function normalizeModelId(id) {
1926
+ let s = id.toLowerCase();
1927
+ const slash = s.lastIndexOf("/");
1928
+ if (slash !== -1) s = s.slice(slash + 1);
1929
+ s = s.replace(/-preview(?:-\d{2}-\d{2})?$/, "");
1930
+ s = s.replace(/-\d{8}$/, "");
1931
+ s = s.replace(/[:@].*$/, "");
1932
+ return s;
1933
+ }
1934
+ var LiveDataProvider = class {
1935
+ snapshot = null;
1936
+ prices = /* @__PURE__ */ new Map();
1937
+ source = "bundled";
1938
+ fetchedAt = 0;
1939
+ loaded = false;
1940
+ refreshing = null;
1941
+ opts;
1942
+ constructor(opts = {}) {
1943
+ this.opts = {
1944
+ live: opts.live ?? true,
1945
+ pricingLive: opts.pricingLive ?? true,
1946
+ refreshHours: opts.refreshHours ?? 24,
1947
+ cacheFile: opts.cacheFile ?? DEFAULT_CACHE_FILE,
1948
+ sourceUrl: opts.sourceUrl
1949
+ };
1950
+ }
1951
+ /** Load cached data from disk (cheap, no network). Safe to call repeatedly. */
1952
+ async load() {
1953
+ if (this.loaded) return;
1954
+ this.loaded = true;
1955
+ try {
1956
+ const raw = await fs4.readFile(this.opts.cacheFile, "utf-8");
1957
+ const cache = JSON.parse(raw);
1958
+ if (cache.snapshot?.families) {
1959
+ this.snapshot = cache.snapshot;
1960
+ this.source = "cache";
1961
+ }
1962
+ if (cache.prices) {
1963
+ for (const [id, p] of Object.entries(cache.prices)) this.prices.set(id, p);
1964
+ }
1965
+ this.fetchedAt = cache.fetchedAt ?? 0;
1966
+ } catch {
1967
+ }
1968
+ }
1969
+ /**
1970
+ * Refresh from the network if the cache is older than the TTL. Coalesces
1971
+ * concurrent callers and never throws — failures keep last-known-good data.
1972
+ */
1973
+ async refresh(force = false) {
1974
+ if (this.refreshing) return this.refreshing;
1975
+ this.refreshing = this.doRefresh(force).finally(() => {
1976
+ this.refreshing = null;
1977
+ });
1978
+ return this.refreshing;
1979
+ }
1980
+ async doRefresh(force) {
1981
+ await this.load();
1982
+ const ttlMs = this.opts.refreshHours * 36e5;
1983
+ const fresh = ttlMs > 0 && Date.now() - this.fetchedAt < ttlMs;
1984
+ if (!force && fresh && this.source !== "bundled") return;
1985
+ const [snap, prices] = await Promise.all([
1986
+ this.opts.live ? this.fetchSnapshot() : Promise.resolve(null),
1987
+ this.opts.pricingLive ? this.fetchPrices() : Promise.resolve(null)
1988
+ ]);
1989
+ let changed = false;
1990
+ if (snap) {
1991
+ this.snapshot = snap;
1992
+ this.source = "live";
1993
+ changed = true;
1994
+ }
1995
+ if (prices && prices.size > 0) {
1996
+ this.prices = prices;
1997
+ changed = true;
1998
+ }
1999
+ if (changed) {
2000
+ this.fetchedAt = Date.now();
2001
+ await this.saveCache();
2002
+ }
2003
+ }
2004
+ async fetchSnapshot() {
2005
+ const url = this.opts.sourceUrl ?? DEFAULT_SNAPSHOT_URL;
2006
+ try {
2007
+ const resp = await withTimeout(fetch(url), FETCH_TIMEOUT_MS, "benchmark fetch timed out");
2008
+ if (!resp.ok) return null;
2009
+ const data = await resp.json();
2010
+ if (!data || typeof data !== "object" || !data.families || typeof data.families !== "object") {
2011
+ return null;
2012
+ }
2013
+ return data;
2014
+ } catch {
2015
+ return null;
2016
+ }
2017
+ }
2018
+ async fetchPrices() {
2019
+ try {
2020
+ const resp = await withTimeout(fetch(OPENROUTER_MODELS_URL), FETCH_TIMEOUT_MS, "pricing fetch timed out");
2021
+ if (!resp.ok) return null;
2022
+ const data = await resp.json();
2023
+ if (!Array.isArray(data?.data)) return null;
2024
+ const out = /* @__PURE__ */ new Map();
2025
+ for (const m of data.data) {
2026
+ if (!m?.id || !m.pricing) continue;
2027
+ const input = Number(m.pricing.prompt) * 1e3;
2028
+ const output = Number(m.pricing.completion) * 1e3;
2029
+ if (!Number.isFinite(input) || !Number.isFinite(output)) continue;
2030
+ out.set(normalizeModelId(m.id), { input, output });
2031
+ }
2032
+ return out;
2033
+ } catch {
2034
+ return null;
2035
+ }
2036
+ }
2037
+ async saveCache() {
2038
+ try {
2039
+ await fs4.mkdir(path18.dirname(this.opts.cacheFile), { recursive: true });
2040
+ const cache = {
2041
+ fetchedAt: this.fetchedAt,
2042
+ snapshot: this.snapshot ?? void 0,
2043
+ prices: Object.fromEntries(this.prices)
2044
+ };
2045
+ await fs4.writeFile(this.opts.cacheFile, JSON.stringify(cache, null, 2), "utf-8");
2046
+ } catch {
2047
+ }
2048
+ }
2049
+ /** Quality profile for a model family, or null when we have no live/cached data. */
2050
+ getQualityProfile(family) {
2051
+ return this.snapshot?.families?.[family] ?? null;
2052
+ }
2053
+ /** Current per-1k price for a model id, or null when unknown. */
2054
+ getLivePrice(modelId) {
2055
+ return this.prices.get(normalizeModelId(modelId)) ?? null;
2056
+ }
2057
+ /**
2058
+ * Returns a price-corrected copy of each model when live pricing is known,
2059
+ * leaving the original untouched (so the shared catalog is never mutated).
2060
+ */
2061
+ applyLivePricing(models) {
2062
+ return models.map((m) => {
2063
+ const p = this.getLivePrice(m.id);
2064
+ if (!p) return m;
2065
+ return { ...m, inputCostPer1kTokens: p.input, outputCostPer1kTokens: p.output };
2066
+ });
2067
+ }
2068
+ /** Where the active quality data came from — for /why and `cascade models`. */
2069
+ getDataSource() {
2070
+ return this.source;
2071
+ }
2072
+ getGeneratedAt() {
2073
+ return this.snapshot?.generatedAt ?? null;
2074
+ }
2075
+ hasLivePricing() {
2076
+ return this.prices.size > 0;
2077
+ }
2078
+ };
2079
+
2080
+ // src/core/router/benchmarks.ts
2081
+ var liveProvider = null;
2082
+ function setBenchmarkLiveProvider(provider) {
2083
+ liveProvider = provider;
2084
+ }
2085
+ var FAMILY_BENCHMARKS = {
2086
+ // Anthropic — strongest at coding and agentic tool-use.
2087
+ "claude-opus": { code: 95, analysis: 92, creative: 90, data: 88 },
2088
+ "claude-sonnet": { code: 93, analysis: 88, creative: 87, data: 85 },
2089
+ "claude-haiku": { code: 80, analysis: 75, creative: 76, data: 72 },
2090
+ // OpenAI — strong all-round, particularly creative/writing.
2091
+ "gpt-4.1": { code: 90, analysis: 89, creative: 91, data: 87 },
2092
+ "gpt-4.1-mini": { code: 82, analysis: 80, creative: 83, data: 79 },
2093
+ "gpt-4.1-nano": { code: 70, analysis: 68, creative: 72, data: 66 },
2094
+ "gpt-4o": { code: 86, analysis: 85, creative: 90, data: 84 },
2095
+ "gpt-4o-mini": { code: 76, analysis: 74, creative: 80, data: 72 },
2096
+ // Google — strongest at analysis/data and long-context.
2097
+ "gemini-2.5-pro": { code: 90, analysis: 93, creative: 86, data: 92 },
2098
+ "gemini-2.5-flash": { code: 82, analysis: 83, creative: 80, data: 82 },
2099
+ "gemini-1.5-pro": { code: 82, analysis: 84, creative: 82, data: 85 },
2100
+ "gemini-2.0-flash": { code: 79, analysis: 80, creative: 79, data: 80 },
2101
+ "gemini-flash-lite": { code: 68, analysis: 68, creative: 70, data: 68 },
2102
+ // Local (Ollama) — lower absolute scores; the ordering is what matters when a
2103
+ // tier is restricted to local-only models.
2104
+ "deepseek": { code: 80, analysis: 72, creative: 68, data: 74 },
2105
+ "qwen": { code: 78, analysis: 73, creative: 72, data: 74 },
2106
+ "codellama": { code: 76, analysis: 60, creative: 55, data: 60 },
2107
+ "llama-70b": { code: 74, analysis: 72, creative: 73, data: 70 },
2108
+ "mistral": { code: 62, analysis: 64, creative: 66, data: 60 },
2109
+ "gemma": { code: 58, analysis: 60, creative: 62, data: 57 },
2110
+ "llama-small": { code: 55, analysis: 56, creative: 60, data: 54 }
2111
+ };
2112
+ var FAMILY_MATCHERS = [
2113
+ [/opus/i, "claude-opus"],
2114
+ [/sonnet/i, "claude-sonnet"],
2115
+ [/haiku/i, "claude-haiku"],
2116
+ [/gpt-?4\.1-nano/i, "gpt-4.1-nano"],
2117
+ [/gpt-?4\.1-mini/i, "gpt-4.1-mini"],
2118
+ [/gpt-?4\.1/i, "gpt-4.1"],
2119
+ [/gpt-?4o-mini/i, "gpt-4o-mini"],
2120
+ [/gpt-?4o/i, "gpt-4o"],
2121
+ [/gemini-?2\.5-pro/i, "gemini-2.5-pro"],
2122
+ [/gemini-?2\.5-flash/i, "gemini-2.5-flash"],
2123
+ [/gemini-?1\.5-pro/i, "gemini-1.5-pro"],
2124
+ [/gemini-?2\.0-flash-lite/i, "gemini-flash-lite"],
2125
+ [/gemini-?2\.0-flash/i, "gemini-2.0-flash"],
2126
+ [/codellama|code-llama|starcoder|stable-code/i, "codellama"],
2127
+ [/deepseek/i, "deepseek"],
2128
+ [/qwen/i, "qwen"],
2129
+ [/llama.?3.*70b|llama3:70b|llama-3-70b/i, "llama-70b"],
2130
+ [/llama/i, "llama-small"],
2131
+ [/mistral|mixtral/i, "mistral"],
2132
+ [/gemma/i, "gemma"]
2133
+ ];
2134
+ function resolveFamily(model) {
2135
+ const hay = `${model.id} ${model.name}`;
2136
+ for (const [re, fam] of FAMILY_MATCHERS) {
2137
+ if (re.test(hay)) return fam;
2138
+ }
2139
+ return null;
2140
+ }
2141
+ function benchmarkScore01(model, taskType) {
2142
+ const fam = resolveFamily(model);
2143
+ if (!fam) return 0.5;
2144
+ const profile = liveProvider?.getQualityProfile(fam) ?? FAMILY_BENCHMARKS[fam];
2145
+ if (!profile) return 0.5;
2146
+ let score;
2147
+ if (taskType === "mixed") {
2148
+ const vals = Object.values(profile).filter((v) => typeof v === "number");
2149
+ score = vals.length ? vals.reduce((a, b) => a + b, 0) / vals.length : 50;
2150
+ } else {
2151
+ score = profile[taskType] ?? 50;
2152
+ }
2153
+ return Math.max(0, Math.min(1, score / 100));
2154
+ }
2155
+
1867
2156
  // src/core/router/index.ts
1868
2157
  var CascadeRouter = class _CascadeRouter extends EventEmitter {
1869
2158
  selector;
@@ -1882,6 +2171,12 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
1882
2171
  tierModels = /* @__PURE__ */ new Map();
1883
2172
  config;
1884
2173
  sessionCostUsd = 0;
2174
+ // Per-run accounting for the hard per-task cap. Reset by beginRun() at the
2175
+ // start of every `cascade run`, independent of the session-wide budget.
2176
+ runTokens = 0;
2177
+ runCostUsd = 0;
2178
+ runBudgetExceeded = false;
2179
+ runBudgetExceededReason;
1885
2180
  /**
1886
2181
  * Budget state machine — guards against two concurrent `generate()` calls
1887
2182
  * each firing the warning or both slipping past the hard cap. All
@@ -1892,6 +2187,12 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
1892
2187
  budgetExceededReason;
1893
2188
  tpmLimiter;
1894
2189
  localQueue;
2190
+ taskAnalyzer;
2191
+ liveData;
2192
+ /** Snapshot of configured/default tier models, taken before Cascade Auto overrides them. */
2193
+ originalTierModels;
2194
+ /** The current run's abort signal — injected into every provider call so a cancel aborts in-flight requests. */
2195
+ runSignal;
1895
2196
  /** Thrown when the configured budget is exceeded. */
1896
2197
  static BudgetExceededError = class extends Error {
1897
2198
  constructor(msg) {
@@ -1918,10 +2219,17 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
1918
2219
  if (!override) continue;
1919
2220
  const model = this.selector.selectForTier(tier, override);
1920
2221
  if (!model) {
1921
- throw new Error(`Configured model "${override}" for ${tier} could not be loaded. Check provider availability and exact model name.`);
1922
- }
1923
- if (model.id !== override && `${model.provider}:${model.id}` !== override) {
1924
- throw new Error(`Configured model "${override}" for ${tier} resolved to "${model.id}". Use the exact provider model ID or prefix the provider (e.g. gemini:${override}).`);
2222
+ const knownProviders = ["anthropic", "openai", "gemini", "azure", "openai-compatible", "ollama"];
2223
+ const hasProviderPrefix = override.includes(":") && knownProviders.some((p) => override.startsWith(p + ":"));
2224
+ if (hasProviderPrefix) {
2225
+ const provider = override.split(":")[0];
2226
+ throw new Error(
2227
+ `Configured model "${override}" for ${tier} cannot be used: provider '${provider}' is not available or unreachable. Check that the provider is running and accessible.`
2228
+ );
2229
+ }
2230
+ throw new Error(
2231
+ `Configured model "${override}" for ${tier} could not be loaded. Check provider availability and exact model name.`
2232
+ );
1925
2233
  }
1926
2234
  this.tierModels.set(tier, model);
1927
2235
  this.ensureProvider(model, config.providers);
@@ -1946,19 +2254,93 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
1946
2254
  profiler.profileAll(allModels).catch(() => {
1947
2255
  });
1948
2256
  }
2257
+ /**
2258
+ * Cascade Auto live data: discover/validate real model ids from each cloud
2259
+ * provider, then fetch current public quality scores + per-token prices and
2260
+ * apply the prices to the available-model set. Best-effort and safe to run in
2261
+ * the background — any failure leaves the bundled catalog/benchmarks in effect.
2262
+ */
2263
+ async refreshLiveData() {
2264
+ const benchCfg = this.config.benchmarks ?? {};
2265
+ if (!this.liveData) {
2266
+ this.liveData = new LiveDataProvider({
2267
+ live: benchCfg.live,
2268
+ pricingLive: benchCfg.pricingLive,
2269
+ refreshHours: benchCfg.refreshHours,
2270
+ sourceUrl: benchCfg.sourceUrl
2271
+ });
2272
+ setBenchmarkLiveProvider(this.liveData);
2273
+ }
2274
+ await this.discoverProviderModels();
2275
+ await this.liveData.refresh().catch(() => {
2276
+ });
2277
+ this.applyLivePricing();
2278
+ }
2279
+ /** Returns the live-data provider once refreshLiveData has run (UX/insight). */
2280
+ getLiveData() {
2281
+ return this.liveData;
2282
+ }
2283
+ /**
2284
+ * Query each available cloud provider's live model list and register the
2285
+ * results. Confirms catalog ids still exist and surfaces newly released
2286
+ * models without a package upgrade. Mirrors discoverOllamaModels.
2287
+ */
2288
+ async discoverProviderModels() {
2289
+ const cloud = ["anthropic", "openai", "gemini", "azure", "openai-compatible"];
2290
+ const tasks = cloud.map(async (type) => {
2291
+ if (!this.selector.isProviderAvailable(type)) return;
2292
+ const seed = this.getAnyModelForProvider(type);
2293
+ if (!seed) return;
2294
+ const cfg = this.config.providers.find((p) => p.type === type) ?? { type };
2295
+ try {
2296
+ const provider = this.createProvider(cfg, seed);
2297
+ if (typeof provider.listModels !== "function") return;
2298
+ const models = await provider.listModels();
2299
+ for (const m of models) this.selector.addDynamicModel(m);
2300
+ } catch {
2301
+ }
2302
+ });
2303
+ await Promise.allSettled(tasks);
2304
+ }
2305
+ /**
2306
+ * Replace available models with live-priced copies and refresh the already
2307
+ * resolved tier models so shared-tier cost accounting uses current prices.
2308
+ */
2309
+ applyLivePricing() {
2310
+ if (!this.liveData?.hasLivePricing()) return;
2311
+ const updated = this.liveData.applyLivePricing(this.selector.getAllAvailableModels());
2312
+ for (const m of updated) this.selector.addDynamicModel(m);
2313
+ for (const tier of ["T1", "T2", "T3"]) {
2314
+ const cur = this.tierModels.get(tier);
2315
+ if (!cur) continue;
2316
+ const fresh = this.selector.getModelById(cur.id);
2317
+ if (fresh) this.tierModels.set(tier, fresh);
2318
+ }
2319
+ }
1949
2320
  async generate(tier, options, onChunk, requireVision = false) {
1950
2321
  if (this.budgetState === "exceeded") {
1951
2322
  throw new _CascadeRouter.BudgetExceededError(
1952
2323
  this.budgetExceededReason ?? "Session budget exceeded."
1953
2324
  );
1954
2325
  }
2326
+ if (this.runBudgetExceeded) {
2327
+ throw new _CascadeRouter.BudgetExceededError(
2328
+ this.runBudgetExceededReason ?? "Per-task budget exceeded."
2329
+ );
2330
+ }
1955
2331
  const limits = this.config?.tierLimits;
1956
2332
  const tierKey = tier.toLowerCase();
1957
2333
  const tierMaxTokens = limits?.[`${tierKey}MaxTokens`];
1958
2334
  if (tierMaxTokens && (!options.maxTokens || options.maxTokens > tierMaxTokens)) {
1959
2335
  options = { ...options, maxTokens: tierMaxTokens };
1960
2336
  }
1961
- const model = requireVision ? this.selector.selectVisionModel() : this.tierModels.get(tier);
2337
+ if (this.runSignal && !options.signal) {
2338
+ options = { ...options, signal: this.runSignal };
2339
+ }
2340
+ if (options.model && !requireVision) {
2341
+ this.ensureProvider(options.model, this.config.providers);
2342
+ }
2343
+ const model = requireVision ? this.selector.selectVisionModel() : options.model ?? this.tierModels.get(tier);
1962
2344
  if (!model) throw new Error(`No model available for tier ${tier}`);
1963
2345
  const provider = this.getProvider(model);
1964
2346
  if (!provider) throw new Error(`No provider for model ${model.id}`);
@@ -1987,16 +2369,33 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
1987
2369
  `Local model ${model.id} inference timed out after ${inferenceTimeoutMs}ms`
1988
2370
  );
1989
2371
  } else if (useStream && onChunk) {
2372
+ const cloudTimeoutMs = this.config.cloudInferenceTimeoutMs ?? 12e4;
1990
2373
  try {
1991
- result = await provider.generateStream(options, (chunk) => {
1992
- const text = typeof chunk?.text === "string" ? chunk.text : "";
1993
- if (text) onChunk({ ...chunk, text });
1994
- });
1995
- } catch {
1996
- result = await provider.generate(options);
2374
+ result = await withTimeout(
2375
+ provider.generateStream(options, (chunk) => {
2376
+ const text = typeof chunk?.text === "string" ? chunk.text : "";
2377
+ if (text) onChunk({ ...chunk, text });
2378
+ }),
2379
+ cloudTimeoutMs,
2380
+ `Model ${model.id} stream timed out after ${cloudTimeoutMs}ms`
2381
+ );
2382
+ } catch (streamErr) {
2383
+ if (streamErr instanceof Error && streamErr.name === "AbortError" || this.runSignal?.aborted || options.signal?.aborted) {
2384
+ throw streamErr;
2385
+ }
2386
+ result = await withTimeout(
2387
+ provider.generate(options),
2388
+ cloudTimeoutMs,
2389
+ `Model ${model.id} inference timed out after ${cloudTimeoutMs}ms`
2390
+ );
1997
2391
  }
1998
2392
  } else {
1999
- result = await provider.generate(options);
2393
+ const cloudTimeoutMs = this.config.cloudInferenceTimeoutMs ?? 12e4;
2394
+ result = await withTimeout(
2395
+ provider.generate(options),
2396
+ cloudTimeoutMs,
2397
+ `Model ${model.id} inference timed out after ${cloudTimeoutMs}ms`
2398
+ );
2000
2399
  }
2001
2400
  const correctedCost = calculateCost(
2002
2401
  result.usage.inputTokens,
@@ -2017,6 +2416,9 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
2017
2416
  this.failover.recordSuccess(model.provider);
2018
2417
  return result;
2019
2418
  } catch (err) {
2419
+ if (err instanceof Error && err.name === "AbortError" || this.runSignal?.aborted || options.signal?.aborted) {
2420
+ throw new CascadeCancelledError("Run cancelled");
2421
+ }
2020
2422
  const errMsg = err instanceof Error ? err.message : String(err);
2021
2423
  if (this.isRateLimitError(errMsg)) {
2022
2424
  this.failover.recordFailure(model.provider, "rate_limit");
@@ -2024,11 +2426,35 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
2024
2426
  if (fallback) {
2025
2427
  this.tierModels.set(tier, fallback);
2026
2428
  this.ensureProvider(fallback, this.config.providers);
2429
+ this.emit("failover", {
2430
+ tier,
2431
+ from: `${model.provider}:${model.id}`,
2432
+ to: `${fallback.provider}:${fallback.id}`,
2433
+ reason: "rate limit"
2434
+ });
2027
2435
  releaseLocalSlot?.();
2028
2436
  releaseLocalSlot = void 0;
2029
2437
  return this.generate(tier, options, onChunk, requireVision);
2030
2438
  }
2031
2439
  }
2440
+ if (isModelNotFoundError(errMsg)) {
2441
+ this.selector.removeModel(model.id);
2442
+ const next = this.selector.selectForTier(tier);
2443
+ if (next && next.id !== model.id) {
2444
+ this.tierModels.set(tier, next);
2445
+ this.ensureProvider(next, this.config.providers);
2446
+ this.emit("failover", {
2447
+ tier,
2448
+ from: `${model.provider}:${model.id}`,
2449
+ to: `${next.provider}:${next.id}`,
2450
+ reason: "model not found"
2451
+ });
2452
+ releaseLocalSlot?.();
2453
+ releaseLocalSlot = void 0;
2454
+ const retryOpts = options.model && options.model.id === model.id ? { ...options, model: void 0 } : options;
2455
+ return this.generate(tier, retryOpts, onChunk, requireVision);
2456
+ }
2457
+ }
2032
2458
  throw err;
2033
2459
  } finally {
2034
2460
  releaseLocalSlot?.();
@@ -2037,18 +2463,74 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
2037
2463
  getModelForTier(tier) {
2038
2464
  return this.tierModels.get(tier);
2039
2465
  }
2466
+ /** Reflection settings for workers (config.reflection). Off unless enabled. */
2467
+ getReflectionConfig() {
2468
+ const r = this.config?.reflection;
2469
+ return { enabled: r?.enabled === true, maxRounds: r?.maxRounds ?? 1 };
2470
+ }
2471
+ /** T3→T2 reinforcement settings (config.reinforcements). Off unless enabled. */
2472
+ getReinforcementsConfig() {
2473
+ const r = this.config?.reinforcements;
2474
+ return { enabled: r?.enabled === true, maxPerSection: r?.maxPerSection ?? 4 };
2475
+ }
2476
+ /**
2477
+ * Resolved T3 wave execution mode. 'auto' becomes 'sequential' when the T3
2478
+ * tier resolves to a LOCAL model (the single-GPU queue serializes anyway, so
2479
+ * running them in parallel just thrashes it), and 'parallel' for cloud.
2480
+ */
2481
+ getT3ExecutionMode() {
2482
+ const mode = this.config?.t3Execution ?? "auto";
2483
+ if (mode === "parallel" || mode === "sequential") return mode;
2484
+ return this.tierModels.get("T3")?.isLocal ? "sequential" : "parallel";
2485
+ }
2040
2486
  /**
2041
2487
  * Cascade Auto: temporarily override the model for a tier.
2042
2488
  * Used by TaskAnalyzer to inject task-optimal models before execution.
2043
2489
  * The override is valid for the current task only — restored by restoreTierModels().
2044
2490
  */
2045
2491
  overrideTierModel(tier, model) {
2492
+ if (!this.originalTierModels) {
2493
+ this.originalTierModels = new Map(this.tierModels);
2494
+ }
2046
2495
  this.tierModels.set(tier, model);
2047
2496
  this.ensureProvider(model, this.config.providers);
2048
2497
  }
2498
+ /**
2499
+ * Restore tier models to the configured/default baseline captured before the
2500
+ * first Cascade Auto override. Called at the end of each run so `/why`, the
2501
+ * status bar, and the next run reflect the configured models, not stale picks.
2502
+ */
2503
+ restoreTierModels() {
2504
+ if (this.originalTierModels) {
2505
+ this.tierModels = new Map(this.originalTierModels);
2506
+ this.originalTierModels = void 0;
2507
+ }
2508
+ }
2509
+ /** Set (or clear) the current run's abort signal for instant cancellation. */
2510
+ setRunSignal(signal) {
2511
+ this.runSignal = signal;
2512
+ }
2049
2513
  getSelector() {
2050
2514
  return this.selector;
2051
2515
  }
2516
+ /** Wire the Cascade Auto task analyzer used for per-subtask model routing. */
2517
+ setTaskAnalyzer(analyzer) {
2518
+ this.taskAnalyzer = analyzer;
2519
+ }
2520
+ /**
2521
+ * Cascade Auto per-subtask routing: pick the benchmark-best model for a
2522
+ * specific subtask's text, scoped to the tier's eligible candidates. Returns
2523
+ * null when Cascade Auto is off (callers then use the shared tier model).
2524
+ * Pure heuristic — no extra LLM call.
2525
+ */
2526
+ async selectModelForSubtask(tier, text) {
2527
+ if (!this.config?.cascadeAuto || !this.taskAnalyzer || !text.trim()) return null;
2528
+ try {
2529
+ return await this.taskAnalyzer.selectModel(text, tier, this.selector);
2530
+ } catch {
2531
+ return null;
2532
+ }
2533
+ }
2052
2534
  getStats() {
2053
2535
  return {
2054
2536
  totalTokens: this.stats.totalTokens,
@@ -2061,6 +2543,14 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
2061
2543
  outputTokensByTier: { ...this.stats.outputTokensByTier }
2062
2544
  };
2063
2545
  }
2546
+ /**
2547
+ * What did delegation save? Compares actual spend against the
2548
+ * counterfactual of every call running on the T1 model. This is the
2549
+ * number only a tiered hierarchy can show.
2550
+ */
2551
+ getDelegationSavings() {
2552
+ return computeDelegationSavings(this.stats, this.tierModels.get("T1"));
2553
+ }
2064
2554
  /**
2065
2555
  * Returns a human-readable cost summary broken down by tier.
2066
2556
  * Example: { T1: "$0.0120 (2 calls, 1500 tokens)", T2: "$0.0043 (6 calls, 4200 tokens)", ... }
@@ -2119,6 +2609,11 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
2119
2609
  * Sets (or clears) a runtime session budget cap (USD).
2120
2610
  * Pass null to remove the cap.
2121
2611
  */
2612
+ /** Raise/set the per-task token cap at runtime (used by /continue resume). */
2613
+ setMaxTokensPerRun(maxTokens) {
2614
+ if (!this.config) return;
2615
+ this.config = { ...this.config, budget: { ...this.config.budget, maxTokensPerRun: maxTokens } };
2616
+ }
2122
2617
  setSessionBudget(usd) {
2123
2618
  if (!this.config) return;
2124
2619
  if (!this.config.budget) {
@@ -2221,7 +2716,39 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
2221
2716
  this.stats.tokensByTier[tier] = (this.stats.tokensByTier[tier] ?? 0) + usage.totalTokens;
2222
2717
  this.stats.inputTokensByTier[tier] = (this.stats.inputTokensByTier[tier] ?? 0) + usage.inputTokens;
2223
2718
  this.stats.outputTokensByTier[tier] = (this.stats.outputTokensByTier[tier] ?? 0) + usage.outputTokens;
2719
+ this.runTokens += usage.totalTokens;
2720
+ this.runCostUsd += usage.estimatedCostUsd;
2224
2721
  this.updateBudgetState();
2722
+ this.enforceRunBudget();
2723
+ }
2724
+ /**
2725
+ * Resets per-run accounting at the start of each `cascade run`. Session
2726
+ * totals and a session-wide budget halt are deliberately preserved; only the
2727
+ * per-task ceiling is cleared so the next task starts with a fresh allowance.
2728
+ */
2729
+ beginRun() {
2730
+ this.runTokens = 0;
2731
+ this.runCostUsd = 0;
2732
+ this.runBudgetExceeded = false;
2733
+ this.runBudgetExceededReason = void 0;
2734
+ }
2735
+ /**
2736
+ * Enforce the hard per-task ceiling. Once tripped, the flag makes every
2737
+ * subsequent (and concurrent) generate() call in this run fail fast.
2738
+ */
2739
+ enforceRunBudget() {
2740
+ if (this.runBudgetExceeded) return;
2741
+ const budget = this.config?.budget;
2742
+ const maxTokens = budget?.maxTokensPerRun;
2743
+ const maxCost = budget?.maxCostPerRunUsd;
2744
+ const overTokens = maxTokens != null && this.runTokens >= maxTokens;
2745
+ const overCost = maxCost != null && this.runCostUsd >= maxCost;
2746
+ if (!overTokens && !overCost) return;
2747
+ const reason = overTokens ? `Per-task token cap of ${maxTokens.toLocaleString()} reached (used ${this.runTokens.toLocaleString()}). Stopping this run to avoid runaway cost \u2014 raise budget.maxTokensPerRun for larger jobs.` : `Per-task cost cap of $${maxCost.toFixed(4)} reached (spent $${this.runCostUsd.toFixed(4)}). Stopping this run to avoid runaway cost.`;
2748
+ this.runBudgetExceeded = true;
2749
+ this.runBudgetExceededReason = reason;
2750
+ this.emit("budget:exceeded", { reason, spentUsd: this.sessionCostUsd });
2751
+ throw new _CascadeRouter.BudgetExceededError(reason);
2225
2752
  }
2226
2753
  /**
2227
2754
  * Single point of truth for budget state transitions. Called after each
@@ -2271,6 +2798,9 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
2271
2798
  return /rate.?limit|429|too.?many.?requests|quota/i.test(msg);
2272
2799
  }
2273
2800
  };
2801
+ function isModelNotFoundError(msg) {
2802
+ return /not[_\s]?found|404|does not exist|no such model|unknown model|invalid model|model_not_found|not supported for generatecontent|is not supported for/i.test(msg);
2803
+ }
2274
2804
  var BaseTier = class extends EventEmitter {
2275
2805
  id;
2276
2806
  role;
@@ -2553,60 +3083,95 @@ var AuditLogger = class {
2553
3083
 
2554
3084
  // src/tools/text-tool-parser.ts
2555
3085
  var TOOL_CALL_RE = /<tool_call>\s*([\s\S]*?)\s*<\/tool_call>/g;
2556
- var JSON_BLOCK_RE = /```json\s*([\s\S]*?)\s*```/g;
2557
- var FUNCTION_OBJ_RE = /\{\s*"function"\s*:\s*\{[^}]*"name"\s*:[^}]*\}\s*\}/g;
3086
+ var JSON_BLOCK_RE = /```(?:json|tool_call|tool)?\s*([\s\S]*?)```/g;
2558
3087
  function parseTextToolCalls(text) {
2559
- const results = tryXmlBlocks(text);
2560
- if (results.length > 0) return results;
2561
- const jsonBlockResults = tryJsonCodeBlocks(text);
2562
- if (jsonBlockResults.length > 0) return jsonBlockResults;
2563
- return tryFunctionCallObjects(text);
3088
+ const xml = collect(text, TOOL_CALL_RE);
3089
+ if (xml.length > 0) return xml;
3090
+ const fenced = collect(text, JSON_BLOCK_RE);
3091
+ if (fenced.length > 0) return fenced;
3092
+ return tryBareObjects(text);
2564
3093
  }
2565
- function tryXmlBlocks(text) {
3094
+ function collect(text, re) {
2566
3095
  const results = [];
2567
3096
  let match;
2568
- TOOL_CALL_RE.lastIndex = 0;
2569
- while ((match = TOOL_CALL_RE.exec(text)) !== null) {
2570
- try {
2571
- const raw = JSON.parse(match[1]);
2572
- if (typeof raw.name !== "string") continue;
2573
- const input = typeof raw.input === "object" && raw.input !== null ? raw.input : {};
2574
- results.push({ name: raw.name, input });
2575
- } catch {
2576
- }
3097
+ re.lastIndex = 0;
3098
+ while ((match = re.exec(text)) !== null) {
3099
+ const body = (match[1] ?? "").trim();
3100
+ const parsed = parseJsonLoose(body);
3101
+ const call = coerceCall(parsed);
3102
+ if (call) results.push(call);
2577
3103
  }
2578
3104
  return results;
2579
3105
  }
2580
- function tryJsonCodeBlocks(text) {
3106
+ function tryBareObjects(text) {
2581
3107
  const results = [];
2582
- let match;
2583
- JSON_BLOCK_RE.lastIndex = 0;
2584
- while ((match = JSON_BLOCK_RE.exec(text)) !== null) {
2585
- try {
2586
- const raw = JSON.parse(match[1]);
2587
- if (typeof raw.name !== "string") continue;
2588
- const input = typeof raw.input === "object" && raw.input !== null ? raw.input : {};
2589
- results.push({ name: raw.name, input });
2590
- } catch {
3108
+ for (let i = 0; i < text.length; i++) {
3109
+ if (text[i] !== "{") continue;
3110
+ let depth = 0;
3111
+ let inStr = false;
3112
+ let esc = false;
3113
+ let end = -1;
3114
+ for (let j = i; j < text.length; j++) {
3115
+ const c = text[j];
3116
+ if (esc) {
3117
+ esc = false;
3118
+ continue;
3119
+ }
3120
+ if (c === "\\") {
3121
+ esc = true;
3122
+ continue;
3123
+ }
3124
+ if (c === '"') {
3125
+ inStr = !inStr;
3126
+ continue;
3127
+ }
3128
+ if (inStr) continue;
3129
+ if (c === "{") depth++;
3130
+ else if (c === "}") {
3131
+ depth--;
3132
+ if (depth === 0) {
3133
+ end = j;
3134
+ break;
3135
+ }
3136
+ }
2591
3137
  }
3138
+ if (end === -1) break;
3139
+ const candidate = text.slice(i, end + 1);
3140
+ if (/['"]name['"]\s*:/.test(candidate) && /['"](?:input|arguments)['"]\s*:/.test(candidate)) {
3141
+ const call = coerceCall(parseJsonLoose(candidate));
3142
+ if (call) results.push(call);
3143
+ }
3144
+ i = end;
2592
3145
  }
2593
3146
  return results;
2594
3147
  }
2595
- function tryFunctionCallObjects(text) {
2596
- const results = [];
2597
- let match;
2598
- FUNCTION_OBJ_RE.lastIndex = 0;
2599
- while ((match = FUNCTION_OBJ_RE.exec(text)) !== null) {
3148
+ function parseJsonLoose(raw) {
3149
+ if (!raw) return null;
3150
+ try {
3151
+ return JSON.parse(raw);
3152
+ } catch {
2600
3153
  try {
2601
- const raw = JSON.parse(match[0]);
2602
- const fn = raw.function;
2603
- if (!fn || typeof fn.name !== "string") continue;
2604
- const input = typeof fn.arguments === "object" && fn.arguments !== null ? fn.arguments : {};
2605
- results.push({ name: fn.name, input });
3154
+ return JSON.parse(raw.replace(/'/g, '"'));
2606
3155
  } catch {
3156
+ return null;
2607
3157
  }
2608
3158
  }
2609
- return results;
3159
+ }
3160
+ function coerceCall(raw) {
3161
+ if (!raw || typeof raw !== "object") return null;
3162
+ const obj = raw;
3163
+ const fn = obj.function && typeof obj.function === "object" ? obj.function : null;
3164
+ const name = typeof obj.name === "string" ? obj.name : fn && typeof fn.name === "string" ? fn.name : null;
3165
+ if (!name) return null;
3166
+ const rawInput = obj.input ?? obj.arguments ?? (fn ? fn.input ?? fn.arguments : void 0);
3167
+ let input = {};
3168
+ if (rawInput && typeof rawInput === "object") {
3169
+ input = rawInput;
3170
+ } else if (typeof rawInput === "string") {
3171
+ const parsed = parseJsonLoose(rawInput);
3172
+ if (parsed && typeof parsed === "object") input = parsed;
3173
+ }
3174
+ return { name, input };
2610
3175
  }
2611
3176
  function toToolCall(parsed, index) {
2612
3177
  return {
@@ -2617,32 +3182,59 @@ function toToolCall(parsed, index) {
2617
3182
  }
2618
3183
  function buildTextToolSystemPrompt(tools) {
2619
3184
  const toolDefs = tools.map((t) => {
2620
- const props = t.inputSchema?.properties ?? {};
2621
- const paramLines = Object.entries(props).map(([k, v]) => ` "${k}": "<${v.description ?? k}>"`);
2622
- return `\u2022 ${t.name}: ${t.description}
2623
- Input: {${paramLines.length ? "\n" + paramLines.join(",\n") + "\n " : ""}}`;
3185
+ const schema = t.inputSchema ?? {};
3186
+ const props = schema.properties && typeof schema.properties === "object" ? schema.properties : {};
3187
+ const required = Array.isArray(schema.required) ? schema.required : [];
3188
+ const paramLines = Object.entries(props).map(([k, v]) => {
3189
+ const type = typeof v.type === "string" ? v.type : "any";
3190
+ const desc = typeof v.description === "string" ? v.description : k;
3191
+ const req = required.includes(k) ? " [required]" : "";
3192
+ const enumVals = Array.isArray(v.enum) ? ` (one of: ${v.enum.map((e) => JSON.stringify(e)).join(", ")})` : "";
3193
+ return ` - ${k} (${type})${req}: ${desc}${enumVals}`;
3194
+ });
3195
+ return `\u2022 ${t.name} \u2014 ${t.description}${paramLines.length ? "\n" + paramLines.join("\n") : "\n (no parameters)"}`;
2624
3196
  }).join("\n");
2625
3197
  return `
2626
3198
  TOOL USE INSTRUCTIONS:
2627
- You do not have native tool-use capability. To call a tool, write a <tool_call> block:
3199
+ You do not have native tool-use capability. To call a tool, output a single <tool_call> block containing JSON with the tool name and its input arguments:
2628
3200
 
2629
3201
  <tool_call>
2630
- {"name": "<tool_name>", "input": {<parameters>}}
3202
+ {"name": "<tool_name>", "input": { ...arguments... }}
2631
3203
  </tool_call>
2632
3204
 
3205
+ Rules:
3206
+ - Use exactly the parameter names shown below and include every [required] parameter.
3207
+ - For parameters that list "one of", use one of those values verbatim.
3208
+ - Emit valid JSON with double quotes. Call only ONE tool at a time, then wait for the result.
3209
+
2633
3210
  Available tools:
2634
3211
  ${toolDefs}
2635
3212
 
2636
3213
  EXAMPLE \u2014 calling the "shell" tool to list files:
2637
3214
  <tool_call>
2638
- {"name": "shell", "input": {"command": "ls -la /workspace"}}
3215
+ {"name": "shell", "input": {"command": "ls -la"}}
2639
3216
  </tool_call>
2640
3217
 
2641
- You will then receive a user message with the result, then continue your work.
2642
- Only call one tool at a time. When you have enough information, provide your final answer.`;
3218
+ When you have enough information, stop calling tools and write your final answer.`;
2643
3219
  }
2644
3220
 
2645
3221
  // src/core/tiers/t3-worker.ts
3222
+ var CriticalToolError = class extends Error {
3223
+ constructor(message, toolName) {
3224
+ super(message);
3225
+ this.toolName = toolName;
3226
+ this.name = "CriticalToolError";
3227
+ }
3228
+ toolName;
3229
+ };
3230
+ var WorkerStallError = class extends Error {
3231
+ constructor(message, partialOutput) {
3232
+ super(message);
3233
+ this.partialOutput = partialOutput;
3234
+ this.name = "WorkerStallError";
3235
+ }
3236
+ partialOutput;
3237
+ };
2646
3238
  var T3_SYSTEM_PROMPT = `You are a T3 Worker agent in the Cascade AI system. Your job is to execute a specific subtask completely and accurately.
2647
3239
 
2648
3240
  Rules:
@@ -2664,6 +3256,10 @@ var T3Worker = class extends BaseTier {
2664
3256
  store;
2665
3257
  audit;
2666
3258
  tools = [];
3259
+ /** 0 = top-level worker (may request reinforcements); 1 = a spawned reinforcement (may not). */
3260
+ reinforcementDepth = 0;
3261
+ /** Sibling-worker requests this worker made via request_workers (T3→T2). */
3262
+ pendingReinforcements = [];
2667
3263
  /** @deprecated — kept only as fallback when no escalator is attached */
2668
3264
  sessionApprovals = /* @__PURE__ */ new Map();
2669
3265
  peerBus;
@@ -2676,10 +3272,22 @@ var T3Worker = class extends BaseTier {
2676
3272
  this.log(`Peer message from ${msg.fromId}: ${msg.type}`);
2677
3273
  this.receivePeerSync(msg.fromId, msg.payload);
2678
3274
  });
3275
+ this.peerBus.on("broadcast", (msg) => {
3276
+ const payload = msg?.payload;
3277
+ if (payload?.type === "TOOL_CREATED" && payload.spec && this.toolCreator) {
3278
+ this.toolCreator.registerSpec(payload.spec);
3279
+ this.tools = this.toolRegistry.getToolDefinitions();
3280
+ this.log(`Registered peer tool "${payload.spec.name}" from broadcast.`);
3281
+ }
3282
+ });
2679
3283
  }
2680
3284
  setPermissionEscalator(escalator) {
2681
3285
  this.permissionEscalator = escalator;
2682
3286
  }
3287
+ /** Marks this worker as a spawned reinforcement (depth 1 — cannot request more). */
3288
+ markAsReinforcement() {
3289
+ this.reinforcementDepth = 1;
3290
+ }
2683
3291
  setToolCreator(creator) {
2684
3292
  this.toolCreator = creator;
2685
3293
  }
@@ -2700,6 +3308,31 @@ var T3Worker = class extends BaseTier {
2700
3308
  this.setLabel(assignment.subtaskTitle);
2701
3309
  this.setStatus("ACTIVE");
2702
3310
  this.tools = this.toolRegistry.getToolDefinitions();
3311
+ if (this.reinforcementDepth === 0 && this.router.getReinforcementsConfig?.()?.enabled) {
3312
+ this.tools = [...this.tools, {
3313
+ name: "request_workers",
3314
+ description: "Ask your manager to spawn additional sibling workers for sub-problems you discover are too large or parallelizable to finish alone. Use sparingly \u2014 only when the work genuinely needs to fan out.",
3315
+ inputSchema: {
3316
+ type: "object",
3317
+ properties: {
3318
+ subtasks: {
3319
+ type: "array",
3320
+ description: "New sibling subtasks for your manager to spawn.",
3321
+ items: {
3322
+ type: "object",
3323
+ properties: {
3324
+ title: { type: "string" },
3325
+ description: { type: "string" },
3326
+ expectedOutput: { type: "string" }
3327
+ },
3328
+ required: ["title", "description"]
3329
+ }
3330
+ }
3331
+ },
3332
+ required: ["subtasks"]
3333
+ }
3334
+ }];
3335
+ }
2703
3336
  if (assignment.dependsOn?.length && this.peerBus) {
2704
3337
  this.sendStatusUpdate({
2705
3338
  progressPct: 0,
@@ -2800,12 +3433,31 @@ Now execute your subtask using this context where relevant.`
2800
3433
  return this.buildResult("ESCALATED", output, { checksRun, passed, failed }, issues, correctionAttempts);
2801
3434
  }
2802
3435
  }
3436
+ const reflectCfg = this.router.getReflectionConfig?.() ?? { enabled: false, maxRounds: 1 };
3437
+ if (reflectCfg.enabled) {
3438
+ this.sendStatusUpdate({ progressPct: 85, currentAction: "Reflecting on output", status: "IN_PROGRESS" });
3439
+ output = await this.reflectAndImprove(assignment, output, reflectCfg.maxRounds);
3440
+ }
2803
3441
  this.setStatus("COMPLETED", output);
2804
3442
  this.sendStatusUpdate({ progressPct: 100, currentAction: "Subtask complete", status: "IN_PROGRESS", output });
2805
3443
  this.peerBus?.publish(this.id, assignment.subtaskId, output, "COMPLETED");
2806
3444
  return this.buildResult("COMPLETED", output, { checksRun, passed, failed }, issues, correctionAttempts);
2807
3445
  } catch (err) {
2808
3446
  const errMsg = err instanceof Error ? err.message : String(err);
3447
+ if (err instanceof WorkerStallError) {
3448
+ issues.push(`Stalled: ${errMsg}`);
3449
+ const finalOutput2 = err.partialOutput || output || errMsg;
3450
+ this.setStatus("FAILED", finalOutput2);
3451
+ this.peerBus?.publish(this.id, assignment.subtaskId, finalOutput2, "FAILED");
3452
+ return this.buildResult("ESCALATED", finalOutput2, { checksRun, passed, failed }, issues, correctionAttempts);
3453
+ }
3454
+ if (err instanceof CriticalToolError) {
3455
+ issues.push(`[CRITICAL_TOOL_ERROR] ${err.toolName}: ${errMsg}`);
3456
+ const finalOutput2 = output || `Tool "${err.toolName}" failed unrecoverably: ${errMsg}`;
3457
+ this.setStatus("FAILED", finalOutput2);
3458
+ this.peerBus?.publish(this.id, assignment.subtaskId, finalOutput2, "FAILED");
3459
+ return this.buildResult("ESCALATED", finalOutput2, { checksRun, passed, failed }, issues, correctionAttempts);
3460
+ }
2809
3461
  issues.push(`Execution error: ${errMsg}`);
2810
3462
  const finalOutput = output || errMsg;
2811
3463
  this.setStatus("FAILED", finalOutput);
@@ -2843,8 +3495,17 @@ Now execute your subtask using this context where relevant.`
2843
3495
  const MAX_ITERATIONS = 15;
2844
3496
  const requiresArtifact = this.requiresArtifact();
2845
3497
  tools = [...tools];
2846
- const t3Model = this.router.getModelForTier("T3");
2847
- const useTextTools = t3Model?.supportsToolUse === false && tools.length > 0;
3498
+ let subtaskModel;
3499
+ try {
3500
+ const subtaskText = `${this.assignment?.subtaskTitle ?? ""} ${this.assignment?.description ?? ""} ${this.assignment?.expectedOutput ?? ""}`;
3501
+ subtaskModel = await this.router.selectModelForSubtask("T3", subtaskText) ?? void 0;
3502
+ if (subtaskModel) {
3503
+ this.log(`Cascade Auto: routing this subtask to ${subtaskModel.provider}:${subtaskModel.id}`);
3504
+ }
3505
+ } catch {
3506
+ }
3507
+ const effectiveModel = subtaskModel ?? this.router.getModelForTier("T3");
3508
+ const useTextTools = effectiveModel?.supportsToolUse === false && tools.length > 0;
2848
3509
  const textToolSuffix = useTextTools ? buildTextToolSystemPrompt(tools) : "";
2849
3510
  while (iterations < MAX_ITERATIONS) {
2850
3511
  iterations++;
@@ -2856,7 +3517,8 @@ Now execute your subtask using this context where relevant.`
2856
3517
  HIERARCHY CONTEXT: ${this.hierarchyContext}` : "") + textToolSuffix,
2857
3518
  // Don't pass tools array when model can't use them natively
2858
3519
  tools: useTextTools ? void 0 : tools.length ? tools : void 0,
2859
- maxTokens: 4096
3520
+ maxTokens: 4096,
3521
+ ...subtaskModel ? { model: subtaskModel } : {}
2860
3522
  };
2861
3523
  const result = await this.router.generate(
2862
3524
  "T3",
@@ -2880,10 +3542,17 @@ HIERARCHY CONTEXT: ${this.hierarchyContext}` : "") + textToolSuffix,
2880
3542
  }
2881
3543
  stalledArtifactIterations += 1;
2882
3544
  if (stalledArtifactIterations >= 2) {
3545
+ const partial = result.content || "";
2883
3546
  if (stalledArtifactIterations === 2) {
2884
- throw new Error(`Worker stalled waiting for artifact creation. Requesting dynamic tool generation from T2 Manager for: ${this.assignment?.subtaskTitle ?? "unknown task"}`);
3547
+ throw new WorkerStallError(
3548
+ `Worker stalled waiting for artifact creation. Requesting dynamic tool generation from T2 Manager for: ${this.assignment?.subtaskTitle ?? "unknown task"}`,
3549
+ partial
3550
+ );
2885
3551
  }
2886
- throw new Error("Artifact-producing task stalled without creating or verifying the required files");
3552
+ throw new WorkerStallError(
3553
+ "Artifact-producing task stalled without creating or verifying the required files",
3554
+ partial
3555
+ );
2887
3556
  }
2888
3557
  await this.context.addMessage({
2889
3558
  role: "user",
@@ -2920,7 +3589,41 @@ HIERARCHY CONTEXT: ${this.hierarchyContext}` : "") + textToolSuffix,
2920
3589
  toolCalls: allToolCalls
2921
3590
  };
2922
3591
  }
3592
+ /**
3593
+ * Lightweight argument check against the tool's JSON Schema: required fields
3594
+ * present and enum values in range. Not a full validator — just the two
3595
+ * failure modes weak models hit most. Returns an error message, or null if OK.
3596
+ */
3597
+ validateToolInput(tc) {
3598
+ const def = this.tools.find((t) => t.name === tc.name);
3599
+ const schema = def?.inputSchema;
3600
+ if (!schema) return null;
3601
+ const required = Array.isArray(schema.required) ? schema.required : [];
3602
+ const missing = required.filter((k) => tc.input[k] === void 0 || tc.input[k] === null || tc.input[k] === "");
3603
+ if (missing.length) {
3604
+ return `Tool error: missing required parameter(s) for "${tc.name}": ${missing.join(", ")}. Expected: ${JSON.stringify(schema)}. Supply them and call the tool again.`;
3605
+ }
3606
+ if (schema.properties) {
3607
+ for (const [k, prop] of Object.entries(schema.properties)) {
3608
+ const allowed = Array.isArray(prop?.enum) ? prop.enum : null;
3609
+ if (allowed && tc.input[k] !== void 0 && !allowed.includes(tc.input[k])) {
3610
+ return `Tool error: invalid value for "${k}" in "${tc.name}": ${JSON.stringify(tc.input[k])}. Must be one of ${JSON.stringify(allowed)}.`;
3611
+ }
3612
+ }
3613
+ }
3614
+ return null;
3615
+ }
2923
3616
  async executeTool(tc) {
3617
+ if (tc.name === "request_workers") {
3618
+ const msg = this.recordReinforcements(tc.input);
3619
+ this.emit("tool:result", { id: tc.id, tierId: this.id, toolName: tc.name, output: msg, durationMs: 0 });
3620
+ return msg;
3621
+ }
3622
+ const validationError = this.validateToolInput(tc);
3623
+ if (validationError) {
3624
+ this.emit("tool:result", { id: tc.id, tierId: this.id, toolName: tc.name, error: validationError, durationMs: 0 });
3625
+ return validationError;
3626
+ }
2924
3627
  const needsApproval = this.toolRegistry.requiresApproval(tc.name);
2925
3628
  if (needsApproval) {
2926
3629
  if (this.permissionEscalator) {
@@ -2941,7 +3644,14 @@ HIERARCHY CONTEXT: ${this.hierarchyContext}` : "") + textToolSuffix,
2941
3644
  const wasApproved = this.sessionApprovals.get(tc.name);
2942
3645
  if (!wasApproved) return `Tool ${tc.name} was denied by user.`;
2943
3646
  } else {
3647
+ const LEGACY_APPROVAL_TIMEOUT_MS = 6e5;
2944
3648
  const legacyDecision = await new Promise((resolve) => {
3649
+ const eventName = `tool:approval-response:${this.id}-${tc.id}`;
3650
+ const timer = setTimeout(() => {
3651
+ this.removeAllListeners(eventName);
3652
+ resolve({ approved: false });
3653
+ }, LEGACY_APPROVAL_TIMEOUT_MS);
3654
+ timer.unref?.();
2945
3655
  this.emit("tool:approval-request", {
2946
3656
  id: `${this.id}-${tc.id}`,
2947
3657
  tierId: this.id,
@@ -2950,7 +3660,10 @@ HIERARCHY CONTEXT: ${this.hierarchyContext}` : "") + textToolSuffix,
2950
3660
  description: `T3 (${this.assignment?.subtaskTitle}) wants to run "${tc.name}"`,
2951
3661
  isDangerous: this.toolRegistry.isDangerous(tc.name)
2952
3662
  });
2953
- this.once(`tool:approval-response:${this.id}-${tc.id}`, resolve);
3663
+ this.once(eventName, (d) => {
3664
+ clearTimeout(timer);
3665
+ resolve(d);
3666
+ });
2954
3667
  });
2955
3668
  if (legacyDecision.always) this.sessionApprovals.set(tc.name, legacyDecision.approved);
2956
3669
  if (!legacyDecision.approved) return `Tool ${tc.name} was denied by user.`;
@@ -2969,8 +3682,8 @@ HIERARCHY CONTEXT: ${this.hierarchyContext}` : "") + textToolSuffix,
2969
3682
  tierId: this.id,
2970
3683
  sessionId: this.taskId,
2971
3684
  requireApproval: false,
2972
- saveSnapshot: async (path17, content) => {
2973
- this.store?.addFileSnapshot(this.taskId, path17, content);
3685
+ saveSnapshot: async (path19, content) => {
3686
+ this.store?.addFileSnapshot(this.taskId, path19, content);
2974
3687
  },
2975
3688
  sendPeerSync: (to, syncType, content) => {
2976
3689
  this.peerBus?.send(this.id, to, syncType, this.assignment?.subtaskId ?? "", content);
@@ -2994,7 +3707,10 @@ HIERARCHY CONTEXT: ${this.hierarchyContext}` : "") + textToolSuffix,
2994
3707
  const durationMs = Date.now() - toolStartMs;
2995
3708
  const errMsg = err instanceof Error ? err.message : String(err);
2996
3709
  this.emit("tool:result", { id: tc.id, tierId: this.id, toolName: tc.name, error: errMsg, durationMs });
2997
- return `Tool error: ${errMsg}`;
3710
+ if (/\b(429|rate.?limit|authentication|api.?key|forbidden|401|403)\b/i.test(errMsg)) {
3711
+ throw new CriticalToolError(errMsg, tc.name);
3712
+ }
3713
+ return await this.adaptiveFallback(tc, `Tool error: ${errMsg}`);
2998
3714
  }
2999
3715
  }
3000
3716
  /**
@@ -3072,6 +3788,11 @@ HIERARCHY CONTEXT: ${this.hierarchyContext}` : "") + textToolSuffix,
3072
3788
  */
3073
3789
  async coordinateFileIntents(assignment) {
3074
3790
  if (!this.peerBus) return;
3791
+ const haystack = `${assignment.description}
3792
+ ${assignment.expectedOutput}`;
3793
+ if (!/\b(create|write|save|generate|produce|output|edit|update|modify|append|overwrite|rewrite)\b/i.test(haystack)) {
3794
+ return;
3795
+ }
3075
3796
  const plannedFiles = this.extractArtifactPaths(assignment);
3076
3797
  if (!plannedFiles.length) return;
3077
3798
  this.peerBus.broadcast(this.id, {
@@ -3082,16 +3803,22 @@ HIERARCHY CONTEXT: ${this.hierarchyContext}` : "") + textToolSuffix,
3082
3803
  await new Promise((r) => setTimeout(r, 500));
3083
3804
  const sortedFiles = [...plannedFiles].sort();
3084
3805
  for (const filePath of sortedFiles) {
3085
- if (this.peerBus.isFileLocked(filePath)) {
3086
- this.log(`[T3] Waiting for file lock: ${filePath}`);
3087
- this.sendStatusUpdate({
3088
- progressPct: 5,
3089
- currentAction: `Waiting for peer to finish editing: ${filePath}`,
3090
- status: "IN_PROGRESS"
3806
+ try {
3807
+ if (this.peerBus.isFileLocked(filePath)) {
3808
+ this.log(`[T3] Waiting for file lock: ${filePath}`);
3809
+ this.sendStatusUpdate({
3810
+ progressPct: 5,
3811
+ currentAction: `Waiting for peer to finish editing: ${filePath}`,
3812
+ status: "IN_PROGRESS"
3813
+ });
3814
+ await this.peerBus.waitForFileRelease(filePath, 1e4).catch(() => {
3815
+ });
3816
+ }
3817
+ await this.peerBus.lockFile(this.id, filePath, 1e4).catch(() => {
3091
3818
  });
3092
- await this.peerBus.waitForFileRelease(filePath);
3819
+ } catch (err) {
3820
+ this.log(`[T3] Lock coordination skipped for ${filePath}: ${err instanceof Error ? err.message : String(err)}`);
3093
3821
  }
3094
- await this.peerBus.lockFile(this.id, filePath);
3095
3822
  }
3096
3823
  const origPublish = this.peerBus.publish.bind(this.peerBus);
3097
3824
  const bus = this.peerBus;
@@ -3122,13 +3849,13 @@ ${assignment.expectedOutput}`;
3122
3849
  const artifactPaths = this.extractArtifactPaths(assignment);
3123
3850
  if (!artifactPaths.length) return { ok: true, issues: [] };
3124
3851
  const issues = [];
3125
- const { exec: exec3 } = await import('child_process');
3852
+ const { exec: exec2 } = await import('child_process');
3126
3853
  const { promisify: promisify4 } = await import('util');
3127
- const execAsync2 = promisify4(exec3);
3854
+ const execAsync2 = promisify4(exec2);
3128
3855
  for (const artifactPath of artifactPaths) {
3129
- const absolutePath = path16.resolve(process.cwd(), artifactPath);
3856
+ const absolutePath = path18.resolve(process.cwd(), artifactPath);
3130
3857
  try {
3131
- const stat = await fs3.stat(absolutePath);
3858
+ const stat = await fs4.stat(absolutePath);
3132
3859
  if (!stat.isFile()) {
3133
3860
  issues.push(`Expected artifact is not a file: ${artifactPath}`);
3134
3861
  continue;
@@ -3138,7 +3865,7 @@ ${assignment.expectedOutput}`;
3138
3865
  continue;
3139
3866
  }
3140
3867
  if (!/\.pdf$/i.test(artifactPath)) {
3141
- const content = await fs3.readFile(absolutePath, "utf-8");
3868
+ const content = await fs4.readFile(absolutePath, "utf-8");
3142
3869
  if (!content.trim()) {
3143
3870
  issues.push(`Artifact content is empty: ${artifactPath}`);
3144
3871
  continue;
@@ -3147,7 +3874,7 @@ ${assignment.expectedOutput}`;
3147
3874
  issues.push(`PDF artifact looks too small to be valid: ${artifactPath}`);
3148
3875
  continue;
3149
3876
  }
3150
- const ext = path16.extname(absolutePath).toLowerCase();
3877
+ const ext = path18.extname(absolutePath).toLowerCase();
3151
3878
  try {
3152
3879
  if (ext === ".ts" || ext === ".tsx") {
3153
3880
  await execAsync2(`npx tsc --noEmit ${absolutePath}`, { timeout: 1e4 });
@@ -3169,6 +3896,61 @@ ${stdout}`);
3169
3896
  }
3170
3897
  return { ok: issues.length === 0, issues };
3171
3898
  }
3899
+ /**
3900
+ * Reflection / self-critique: critique the output against the broader GOAL
3901
+ * (not just the subtask spec the self-test checks) and revise once if it falls
3902
+ * short. Two cheap calls per round — a JSON verdict, then a rewrite only if
3903
+ * needed. Best-effort: any parse/error just keeps the current output.
3904
+ */
3905
+ async reflectAndImprove(assignment, output, maxRounds) {
3906
+ const sys = this.systemPromptOverride + (this.hierarchyContext ? `
3907
+
3908
+ HIERARCHY CONTEXT: ${this.hierarchyContext}` : "");
3909
+ let current = output;
3910
+ for (let round = 0; round < Math.max(1, maxRounds); round++) {
3911
+ try {
3912
+ const verdict = await this.router.generate("T3", {
3913
+ messages: [{
3914
+ role: "user",
3915
+ content: `Does this output FULLY achieve the goal \u2014 not just the literal task, but the intent behind it?
3916
+
3917
+ Goal / expected: ${assignment.expectedOutput}
3918
+ Subtask: ${assignment.description}
3919
+
3920
+ Output:
3921
+ ${current}
3922
+
3923
+ Reply with ONLY JSON: {"sufficient": true|false, "notes": "what is weak or missing if not sufficient"}`
3924
+ }],
3925
+ systemPrompt: sys,
3926
+ maxTokens: 400
3927
+ });
3928
+ const parsed = JSON.parse(/\{[\s\S]*\}/.exec(verdict.content)?.[0] ?? "{}");
3929
+ if (parsed.sufficient !== false) break;
3930
+ const improved = await this.router.generate("T3", {
3931
+ messages: [{
3932
+ role: "user",
3933
+ content: `Improve the following so it fully achieves the goal. Address specifically: ${parsed.notes ?? "gaps vs the goal"}.
3934
+ Output ONLY the improved result \u2014 no preamble, no commentary.
3935
+
3936
+ Goal / expected: ${assignment.expectedOutput}
3937
+
3938
+ Current output:
3939
+ ${current}`
3940
+ }],
3941
+ systemPrompt: sys,
3942
+ maxTokens: 4096
3943
+ });
3944
+ const next = (improved.content ?? "").trim();
3945
+ if (!next) break;
3946
+ current = next;
3947
+ this.log("Reflection: revised output for better goal alignment.");
3948
+ } catch {
3949
+ break;
3950
+ }
3951
+ }
3952
+ return current;
3953
+ }
3172
3954
  async selfTest(assignment, output) {
3173
3955
  const prompt = `Self-test this output against the assignment requirements.
3174
3956
 
@@ -3243,6 +4025,35 @@ ${assignment.constraints.map((c) => `- ${c}`).join("\n")}
3243
4025
 
3244
4026
  Begin execution now.`;
3245
4027
  }
4028
+ /**
4029
+ * Records a request_workers call (T3→T2 reinforcement). Capped at
4030
+ * maxPerSection; reinforcement workers (depth 1) cannot request more.
4031
+ */
4032
+ recordReinforcements(input) {
4033
+ if (this.reinforcementDepth !== 0) {
4034
+ return "request_workers is unavailable to reinforcement workers \u2014 complete your assigned subtask.";
4035
+ }
4036
+ const max = this.router.getReinforcementsConfig?.()?.maxPerSection ?? 4;
4037
+ const raw = Array.isArray(input.subtasks) ? input.subtasks : [];
4038
+ let added = 0;
4039
+ for (const s of raw) {
4040
+ if (this.pendingReinforcements.length >= max) break;
4041
+ const o = s;
4042
+ if (typeof o?.title !== "string" || typeof o?.description !== "string") continue;
4043
+ this.pendingReinforcements.push({
4044
+ subtaskId: `reinf-${this.id}-${this.pendingReinforcements.length + 1}`,
4045
+ subtaskTitle: o.title,
4046
+ description: o.description,
4047
+ expectedOutput: typeof o.expectedOutput === "string" ? o.expectedOutput : o.title,
4048
+ constraints: [],
4049
+ peerT3Ids: [],
4050
+ parentT2: this.parentId ?? "root",
4051
+ dependsOn: []
4052
+ });
4053
+ added++;
4054
+ }
4055
+ return added > 0 ? `Requested ${added} reinforcement worker(s) from your manager; they will run in parallel. Focus on your own part \u2014 do not redo their work.` : "No valid reinforcement subtasks (each needs a title and description), or the per-section limit was reached.";
4056
+ }
3246
4057
  buildResult(status, output, testResults, issues, correctionAttempts) {
3247
4058
  return {
3248
4059
  subtaskId: this.assignment?.subtaskId ?? "",
@@ -3251,7 +4062,8 @@ Begin execution now.`;
3251
4062
  testResults,
3252
4063
  issues,
3253
4064
  peerSyncsUsed: this.peerSyncBuffer.map((m) => m.fromId),
3254
- correctionAttempts
4065
+ correctionAttempts,
4066
+ reinforcements: this.pendingReinforcements.length ? this.pendingReinforcements : void 0
3255
4067
  };
3256
4068
  }
3257
4069
  isFileOperation(toolName) {
@@ -3270,6 +4082,17 @@ var PeerBus = class extends EventEmitter {
3270
4082
  /** Called when any peer message or broadcast is sent — used for dashboard visibility. */
3271
4083
  onPeerMessage;
3272
4084
  sessionId = "";
4085
+ /** Surface coordination traffic (locks, barriers) to the visibility hook. */
4086
+ emitCoordination(fromId, text) {
4087
+ this.onPeerMessage?.({
4088
+ fromId,
4089
+ toId: void 0,
4090
+ syncType: "COORDINATION",
4091
+ payload: text,
4092
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
4093
+ sessionId: this.sessionId
4094
+ });
4095
+ }
3273
4096
  register(peerId) {
3274
4097
  this.members.add(peerId);
3275
4098
  }
@@ -3406,8 +4229,10 @@ var PeerBus = class extends EventEmitter {
3406
4229
  const existing = this.fileLocks.get(filePath);
3407
4230
  if (!existing) {
3408
4231
  this.fileLocks.set(filePath, { holderId: tierId, lockedAt: (/* @__PURE__ */ new Date()).toISOString(), waiters: [] });
4232
+ this.emitCoordination(tierId, `\u{1F512} locked ${filePath}`);
3409
4233
  return;
3410
4234
  }
4235
+ this.emitCoordination(tierId, `\u23F3 waiting for ${filePath} (held by ${existing.holderId})`);
3411
4236
  return new Promise((resolve, reject) => {
3412
4237
  const timer = setTimeout(() => {
3413
4238
  reject(new Error(`File lock timeout for ${filePath} (held by ${existing.holderId})`));
@@ -3415,6 +4240,7 @@ var PeerBus = class extends EventEmitter {
3415
4240
  existing.waiters.push(() => {
3416
4241
  clearTimeout(timer);
3417
4242
  this.fileLocks.set(filePath, { holderId: tierId, lockedAt: (/* @__PURE__ */ new Date()).toISOString(), waiters: [] });
4243
+ this.emitCoordination(tierId, `\u{1F512} locked ${filePath}`);
3418
4244
  resolve();
3419
4245
  });
3420
4246
  });
@@ -3425,6 +4251,7 @@ var PeerBus = class extends EventEmitter {
3425
4251
  releaseFile(tierId, filePath) {
3426
4252
  const lock = this.fileLocks.get(filePath);
3427
4253
  if (!lock || lock.holderId !== tierId) return;
4254
+ this.emitCoordination(tierId, `\u{1F513} released ${filePath}`);
3428
4255
  const nextWaiter = lock.waiters.shift();
3429
4256
  if (nextWaiter) {
3430
4257
  nextWaiter();
@@ -3504,6 +4331,7 @@ var PeerBus = class extends EventEmitter {
3504
4331
  }
3505
4332
  const bar = this.barriers.get(barrierName);
3506
4333
  bar.arrived.add(peerId);
4334
+ this.emitCoordination(peerId, `\u22A8 barrier "${barrierName}" (${bar.arrived.size}/${bar.total})`);
3507
4335
  if (bar.arrived.size >= bar.total) {
3508
4336
  this.emit(`barrier:${barrierName}`);
3509
4337
  return;
@@ -3536,6 +4364,7 @@ var T2Manager = class extends BaseTier {
3536
4364
  router;
3537
4365
  toolRegistry;
3538
4366
  assignment;
4367
+ sectionModel;
3539
4368
  t3Workers = /* @__PURE__ */ new Map();
3540
4369
  escalations = [];
3541
4370
  peerSyncBuffer = [];
@@ -3545,6 +4374,8 @@ var T2Manager = class extends BaseTier {
3545
4374
  t2PeerBus;
3546
4375
  permissionEscalator;
3547
4376
  toolCreator;
4377
+ /** Optional boardroom gate (Moderate / root-T2 runs) — pauses after decomposition. */
4378
+ planApprovalCallback;
3548
4379
  /** AbortController for the current T3 wave — aborted on cancel-and-respawn */
3549
4380
  waveAbortController = null;
3550
4381
  setPeerBus(bus) {
@@ -3582,6 +4413,10 @@ var T2Manager = class extends BaseTier {
3582
4413
  setToolCreator(creator) {
3583
4414
  this.toolCreator = creator;
3584
4415
  }
4416
+ /** Boardroom gate for Moderate (root-T2) runs: pause after decomposition. */
4417
+ setPlanApprovalCallback(cb) {
4418
+ this.planApprovalCallback = cb;
4419
+ }
3585
4420
  /**
3586
4421
  * Phase 1 of T2 peer discussion: broadcast this section's plan so sibling T2s
3587
4422
  * and T1 can detect overlaps and coordinate execution order.
@@ -3635,9 +4470,39 @@ var T2Manager = class extends BaseTier {
3635
4470
  status: "IN_PROGRESS"
3636
4471
  });
3637
4472
  this.log(`T2 managing section: ${assignment.sectionTitle}`);
4473
+ this.sectionModel = void 0;
4474
+ try {
4475
+ const sectionText = `${assignment.sectionTitle} ${assignment.description} ${assignment.expectedOutput}`;
4476
+ this.sectionModel = await this.router.selectModelForSubtask("T2", sectionText) ?? void 0;
4477
+ if (this.sectionModel) {
4478
+ this.log(`Cascade Auto: routing this section to ${this.sectionModel.provider}:${this.sectionModel.id}`);
4479
+ }
4480
+ } catch {
4481
+ }
3638
4482
  try {
3639
4483
  this.throwIfCancelled();
3640
- const subtasks = assignment.t3Subtasks.length > 0 ? assignment.t3Subtasks : await this.decomposeSection(assignment);
4484
+ let subtasks = assignment.t3Subtasks.length > 0 ? assignment.t3Subtasks : await this.decomposeSection(assignment);
4485
+ if (this.planApprovalCallback) {
4486
+ const decision = await this.planApprovalCallback(subtasks, assignment.sectionTitle);
4487
+ if (!decision.approved) {
4488
+ const output = "Plan rejected \u2014 nothing was executed.";
4489
+ this.setStatus("COMPLETED", output);
4490
+ this.sendStatusUpdate({ progressPct: 100, currentAction: "Plan rejected by user", status: "IN_PROGRESS", output });
4491
+ return { sectionId: assignment.sectionId, sectionTitle: assignment.sectionTitle, status: "COMPLETED", t3Results: [], sectionSummary: output, issues: [] };
4492
+ }
4493
+ if (decision.keepSubtaskIds?.length) {
4494
+ const keep = new Set(decision.keepSubtaskIds);
4495
+ subtasks = subtasks.filter((s) => keep.has(s.subtaskId));
4496
+ }
4497
+ if (decision.note?.trim()) {
4498
+ subtasks = await this.decomposeSection({
4499
+ ...assignment,
4500
+ description: `${assignment.description}
4501
+
4502
+ Guidance (must be followed): ${decision.note}`
4503
+ });
4504
+ }
4505
+ }
3641
4506
  this.sendStatusUpdate({
3642
4507
  progressPct: 20,
3643
4508
  currentAction: `Dispatching ${subtasks.length} T3 workers`,
@@ -3711,7 +4576,8 @@ Return ONLY the JSON array.`;
3711
4576
  systemPrompt: this.systemPromptOverride + T2_SYSTEM_PROMPT + (this.hierarchyContext ? `
3712
4577
 
3713
4578
  HIERARCHY CONTEXT: ${this.hierarchyContext}` : ""),
3714
- maxTokens: 2e3
4579
+ maxTokens: 2e3,
4580
+ ...this.sectionModel ? { model: this.sectionModel } : {}
3715
4581
  });
3716
4582
  try {
3717
4583
  const jsonMatch = /\[[\s\S]*\]/.exec(result.content);
@@ -3815,6 +4681,8 @@ HIERARCHY CONTEXT: ${this.hierarchyContext}` : ""),
3815
4681
  let remaining = new Set(sanitizedAssignments.map((a) => a.subtaskId));
3816
4682
  let wave = 0;
3817
4683
  let respawnBudget = 1;
4684
+ const reinforceCfg = this.router.getReinforcementsConfig?.() ?? { enabled: false, maxPerSection: 4 };
4685
+ let reinforcementsAdded = 0;
3818
4686
  while (remaining.size > 0) {
3819
4687
  const runnableIds = [...remaining].filter((id) => (inDegree.get(id) ?? 0) === 0);
3820
4688
  if (runnableIds.length === 0) {
@@ -3839,15 +4707,27 @@ HIERARCHY CONTEXT: ${this.hierarchyContext}` : ""),
3839
4707
  const waveSignal = AbortSignal.any(
3840
4708
  [this.signal, this.waveAbortController.signal].filter(Boolean)
3841
4709
  );
3842
- const waveResults = await Promise.allSettled(
3843
- runnableIds.map(async (id) => {
3844
- const assignment = sanitizedAssignments.find((a) => a.subtaskId === id);
3845
- const worker = workerMap.get(id);
3846
- const result = await worker.execute(assignment, taskId, waveSignal);
3847
- resultMap.set(id, result);
3848
- return result;
3849
- })
3850
- );
4710
+ const runOne = async (id) => {
4711
+ const assignment = sanitizedAssignments.find((a) => a.subtaskId === id);
4712
+ const worker = workerMap.get(id);
4713
+ const result = await worker.execute(assignment, taskId, waveSignal);
4714
+ resultMap.set(id, result);
4715
+ return result;
4716
+ };
4717
+ let waveResults;
4718
+ if (this.router.getT3ExecutionMode?.() === "sequential") {
4719
+ this.log(`Wave ${wave}: running ${runnableIds.length} subtask(s) sequentially (local tier)`);
4720
+ waveResults = [];
4721
+ for (const id of runnableIds) {
4722
+ try {
4723
+ waveResults.push({ status: "fulfilled", value: await runOne(id) });
4724
+ } catch (reason) {
4725
+ waveResults.push({ status: "rejected", reason });
4726
+ }
4727
+ }
4728
+ } else {
4729
+ waveResults = await Promise.allSettled(runnableIds.map(runOne));
4730
+ }
3851
4731
  const escalatedToolIdx = respawnBudget > 0 ? waveResults.findIndex(
3852
4732
  (r) => r.status === "fulfilled" && r.value.status === "ESCALATED" && r.value.issues.some((iss) => iss.includes("dynamic tool generation"))
3853
4733
  ) : -1;
@@ -3875,6 +4755,8 @@ HIERARCHY CONTEXT: ${this.hierarchyContext}` : ""),
3875
4755
  [SYSTEM]: Dynamic tool "${toolName}" is now available \u2014 use it to complete your task.`;
3876
4756
  }
3877
4757
  }
4758
+ const spec = this.toolCreator.getSpec(toolName);
4759
+ if (spec) this.t3PeerBus.broadcast(this.id, { type: "TOOL_CREATED", spec });
3878
4760
  }
3879
4761
  for (const id of runnableIds) {
3880
4762
  this.t3PeerBus.clearOutput(id);
@@ -3920,6 +4802,35 @@ HIERARCHY CONTEXT: ${this.hierarchyContext}` : ""),
3920
4802
  inDegree.set(dependent, Math.max(0, (inDegree.get(dependent) ?? 0) - 1));
3921
4803
  }
3922
4804
  }
4805
+ if (reinforceCfg.enabled && reinforcementsAdded < reinforceCfg.maxPerSection) {
4806
+ let addedThisWave = 0;
4807
+ for (const id of runnableIds) {
4808
+ for (const req of resultMap.get(id)?.reinforcements ?? []) {
4809
+ if (reinforcementsAdded >= reinforceCfg.maxPerSection) break;
4810
+ reinforcementsAdded++;
4811
+ addedThisWave++;
4812
+ const assignment = {
4813
+ ...req,
4814
+ subtaskId: `reinf-${this.id}-${reinforcementsAdded}`,
4815
+ dependsOn: [],
4816
+ peerT3Ids: []
4817
+ };
4818
+ sanitizedAssignments.push(assignment);
4819
+ adj.set(assignment.subtaskId, /* @__PURE__ */ new Set());
4820
+ inDegree.set(assignment.subtaskId, 0);
4821
+ remaining.add(assignment.subtaskId);
4822
+ const fresh = this.buildWorkerMap([assignment], taskId);
4823
+ for (const [k, v] of fresh) {
4824
+ v.markAsReinforcement();
4825
+ workerMap.set(k, v);
4826
+ }
4827
+ this.log(`Reinforcement: spawned worker "${assignment.subtaskTitle}" (requested by ${id})`);
4828
+ }
4829
+ }
4830
+ if (addedThisWave > 0) {
4831
+ this.sendStatusUpdate({ progressPct: 55, currentAction: `Added ${addedThisWave} reinforcement worker(s)`, status: "IN_PROGRESS" });
4832
+ }
4833
+ }
3923
4834
  }
3924
4835
  return [...resultMap.values()];
3925
4836
  }
@@ -4029,7 +4940,8 @@ NEW OUTPUTS TO INTEGRATE:
4029
4940
  systemPrompt: this.systemPromptOverride + "You are a T2 Manager. Summarize the work of your T3 workers succinctly." + (this.hierarchyContext ? `
4030
4941
 
4031
4942
  HIERARCHY CONTEXT: ${this.hierarchyContext}` : ""),
4032
- maxTokens: 500
4943
+ maxTokens: 500,
4944
+ ...this.sectionModel ? { model: this.sectionModel } : {}
4033
4945
  });
4034
4946
  currentSummary = result.content;
4035
4947
  } catch (err) {
@@ -4078,7 +4990,8 @@ Reply with exactly one word: YES, NO, or UNSURE.`;
4078
4990
 
4079
4991
  HIERARCHY CONTEXT: ${this.hierarchyContext}` : ""),
4080
4992
  maxTokens: 10,
4081
- temperature: 0
4993
+ temperature: 0,
4994
+ ...this.sectionModel ? { model: this.sectionModel } : {}
4082
4995
  });
4083
4996
  const answer = result.content.trim().toUpperCase();
4084
4997
  if (answer.includes("YES")) {
@@ -4186,6 +5099,7 @@ var T1Administrator = class extends BaseTier {
4186
5099
  taskGoal = "";
4187
5100
  peerMessageCallback;
4188
5101
  peerMessageSessionId = "";
5102
+ planApprovalCallback;
4189
5103
  constructor(router, toolRegistry, config) {
4190
5104
  super("T1", "T1");
4191
5105
  this.router = router;
@@ -4212,6 +5126,17 @@ var T1Administrator = class extends BaseTier {
4212
5126
  this.t2PeerBus.onPeerMessage = cb;
4213
5127
  this.t2PeerBus.sessionId = sessionId;
4214
5128
  }
5129
+ /**
5130
+ * Install a "boardroom" gate: called with T1's plan BEFORE any T2 manager
5131
+ * spawns. When unset, plans proceed immediately (headless/SDK unchanged).
5132
+ */
5133
+ setPlanApprovalCallback(cb) {
5134
+ this.planApprovalCallback = cb;
5135
+ }
5136
+ /** Decompose a prompt into a plan WITHOUT executing it (powers /plan preview). */
5137
+ async previewPlan(prompt) {
5138
+ return this.decomposeTask(prompt);
5139
+ }
4215
5140
  async execute(userPrompt, images, systemContext, signal) {
4216
5141
  this.signal = signal;
4217
5142
  this.taskId = randomUUID();
@@ -4230,29 +5155,72 @@ var T1Administrator = class extends BaseTier {
4230
5155
  enrichedPrompt = await this.analyzeImages(userPrompt, images);
4231
5156
  }
4232
5157
  this.throwIfCancelled();
4233
- const plan = await this.decomposeTask(enrichedPrompt, systemContext);
4234
- this.sendStatusUpdate({
4235
- progressPct: 10,
4236
- currentAction: `Plan ready: ${plan.complexity} \u2192 ${plan.sections.length} sections`,
4237
- status: "IN_PROGRESS"
4238
- });
4239
- this.emit("plan", { taskId: this.taskId, plan });
4240
- this.throwIfCancelled();
5158
+ let plan = await this.decomposeTask(enrichedPrompt, systemContext);
5159
+ this.sendStatusUpdate({
5160
+ progressPct: 10,
5161
+ currentAction: `Plan ready: ${plan.complexity} \u2192 ${plan.sections.length} sections`,
5162
+ status: "IN_PROGRESS"
5163
+ });
5164
+ this.emit("plan", { taskId: this.taskId, plan });
5165
+ if (this.planApprovalCallback) {
5166
+ const maxRounds = this.config.planReview?.maxRevisionRounds ?? 5;
5167
+ const reviewer = this.config.planReview?.autoReviewer === true;
5168
+ let round = 0;
5169
+ for (; ; ) {
5170
+ const critique = reviewer ? await this.reviewPlan(plan, enrichedPrompt) ?? void 0 : void 0;
5171
+ this.sendStatusUpdate({
5172
+ progressPct: 10,
5173
+ currentAction: "Boardroom: waiting for plan approval",
5174
+ status: "IN_PROGRESS"
5175
+ });
5176
+ const decision = await this.planApprovalCallback(plan, { critique });
5177
+ if (!decision.approved) {
5178
+ const output2 = "Plan rejected in the boardroom \u2014 nothing was executed. Rephrase the request or adjust the plan with a new prompt.";
5179
+ this.setStatus("COMPLETED", output2);
5180
+ this.sendStatusUpdate({ progressPct: 100, currentAction: "Plan rejected by user", status: "IN_PROGRESS", output: output2 });
5181
+ return { output: output2, t2Results: [], taskId: this.taskId, complexity: plan.complexity };
5182
+ }
5183
+ if (decision.editedPlan?.sections?.length) {
5184
+ plan = decision.editedPlan;
5185
+ try {
5186
+ this.validatePlan(plan);
5187
+ } catch {
5188
+ }
5189
+ this.emit("plan", { taskId: this.taskId, plan });
5190
+ }
5191
+ if (decision.note?.trim() && round < maxRounds) {
5192
+ round++;
5193
+ this.log(`Boardroom note \u2014 re-planning (round ${round}/${maxRounds}): ${decision.note}`);
5194
+ plan = await this.decomposeTask(
5195
+ `${enrichedPrompt}
5196
+
5197
+ Board guidance (must be followed in the plan): ${decision.note}`,
5198
+ systemContext
5199
+ );
5200
+ this.emit("plan", { taskId: this.taskId, plan });
5201
+ continue;
5202
+ }
5203
+ break;
5204
+ }
5205
+ }
5206
+ this.throwIfCancelled();
4241
5207
  let allT2Results = await this.dispatchT2Managers(plan.sections);
4242
5208
  let pass = 1;
4243
- const MAX_REPLAN_PASSES = 2;
4244
- while (pass <= MAX_REPLAN_PASSES) {
5209
+ const maxReplanPasses = this.config.maxReplanPasses ?? 2;
5210
+ const okCount = (rs) => rs.filter((r) => r.status === "COMPLETED" || r.status === "PARTIAL").length;
5211
+ while (pass <= maxReplanPasses) {
4245
5212
  const reviewResult = await this.reviewT2Outputs(enrichedPrompt, plan, allT2Results);
4246
5213
  if (reviewResult.approved) {
4247
5214
  this.log("T1 Review passed.");
4248
5215
  break;
4249
5216
  }
4250
- this.log(`T1 Review rejected outputs. Replanning (Pass ${pass}). Reason: ${reviewResult.reason}`);
5217
+ this.log(`T1 Review rejected outputs. Replanning (Pass ${pass}/${maxReplanPasses}). Reason: ${reviewResult.reason}`);
4251
5218
  this.sendStatusUpdate({
4252
5219
  progressPct: 80 + pass * 5,
4253
5220
  currentAction: `Review failed: ${reviewResult.reason}. Replanning...`,
4254
5221
  status: "IN_PROGRESS"
4255
5222
  });
5223
+ const okBefore = okCount(allT2Results);
4256
5224
  const correctionPlan = await this.decomposeTask(`The previous execution plan failed to fully satisfy the original goal or encountered errors.
4257
5225
  Review reason: ${reviewResult.reason}
4258
5226
 
@@ -4261,6 +5229,10 @@ Original goal: ${enrichedPrompt}
4261
5229
  Create a CORRECTION PLAN that contains only the new sections needed to fix the issues. Do not repeat successful sections.`);
4262
5230
  const correctionResults = await this.dispatchT2Managers(correctionPlan.sections);
4263
5231
  allT2Results = [...allT2Results, ...correctionResults];
5232
+ if (okCount(allT2Results) <= okBefore) {
5233
+ this.log("T1 Review: corrective pass made no net progress \u2014 stopping early with the best partial result.");
5234
+ break;
5235
+ }
4264
5236
  pass++;
4265
5237
  }
4266
5238
  this.sendStatusUpdate({
@@ -4329,6 +5301,34 @@ If no, reply with "REJECTED: [Detailed reason explaining exactly what is missing
4329
5301
 
4330
5302
  [Image context: ${result.content}]`;
4331
5303
  }
5304
+ /**
5305
+ * Automated reviewer pass: a single T1 critique of the plan before the user
5306
+ * sees it (planReview.autoReviewer). Best-effort — returns null on any error
5307
+ * so it never blocks the approval gate.
5308
+ */
5309
+ async reviewPlan(plan, goal) {
5310
+ try {
5311
+ const sections = plan.sections.map((s, i) => `${i + 1}. ${s.sectionTitle} \u2014 ${s.description} (${s.t3Subtasks?.length ?? 0} subtasks${s.dependsOn?.length ? `, depends on: ${s.dependsOn.join(", ")}` : ""})`).join("\n");
5312
+ const prompt = `You are a senior engineer reviewing an execution plan BEFORE it runs.
5313
+
5314
+ GOAL:
5315
+ ${goal}
5316
+
5317
+ PLAN (${plan.complexity}, ${plan.sections.length} sections):
5318
+ ${sections}
5319
+
5320
+ In 3-5 terse bullets, flag the most important RISKS, GAPS, or over-/under-decomposition the operator should weigh before approving. If the plan is sound, say so in one line. Output plain-text bullets only - no preamble.`;
5321
+ const result = await this.router.generate("T1", {
5322
+ messages: [{ role: "user", content: prompt }],
5323
+ systemPrompt: "You are a concise, critical plan reviewer. Be specific and brief.",
5324
+ maxTokens: 400
5325
+ });
5326
+ const text = (result.content ?? "").trim();
5327
+ return text.length ? text : null;
5328
+ } catch {
5329
+ return null;
5330
+ }
5331
+ }
4332
5332
  async decomposeTask(prompt, systemContext) {
4333
5333
  const contextSection = systemContext ? `
4334
5334
  Project context:
@@ -4628,7 +5628,14 @@ Leave dependsOn empty for sections that can run immediately in parallel.`;
4628
5628
  async compileFinalOutput(originalPrompt, plan, t2Results) {
4629
5629
  const completedSections = t2Results.filter((r) => r.status !== "FAILED");
4630
5630
  if (!completedSections.length) {
4631
- return "Task failed \u2014 all sections encountered errors. Please check the escalation log.";
5631
+ const allIssues = t2Results.flatMap((r) => r.t3Results.flatMap((t) => t.issues));
5632
+ const critical = allIssues.find((i) => i.includes("[CRITICAL_TOOL_ERROR]"));
5633
+ const stalled = allIssues.find((i) => /^Stalled:/.test(i));
5634
+ const topReason = critical ?? stalled ?? allIssues[0] ?? "no specific reason recorded";
5635
+ const sectionWord = t2Results.length === 1 ? "section" : "sections";
5636
+ return `Task failed \u2014 ${topReason}
5637
+
5638
+ All ${t2Results.length} ${sectionWord} encountered errors. Run \`/logs\` for details.`;
4632
5639
  }
4633
5640
  const sectionsText = completedSections.map((r) => `**${r.sectionTitle}**
4634
5641
  ${r.sectionSummary}
@@ -4748,7 +5755,7 @@ var ShellTool = class extends BaseTool {
4748
5755
  const timeout = input["timeout"] ?? 3e4;
4749
5756
  this.validateCommand(command);
4750
5757
  try {
4751
- const { stdout, stderr } = await execAsync(command, { cwd, timeout });
5758
+ const { stdout, stderr } = await execAsync(command, { cwd, timeout, windowsHide: true });
4752
5759
  const out = [stdout, stderr].filter(Boolean).join("\n").trim();
4753
5760
  return out || "(no output)";
4754
5761
  } catch (err) {
@@ -4762,11 +5769,14 @@ ${[e.stdout, e.stderr].filter(Boolean).join("\n").trim()}`;
4762
5769
  }
4763
5770
  validateCommand(command) {
4764
5771
  const builtinDangerous = [
4765
- /rm\s+-rf\s+\//,
4766
- />\s*\/dev\/sda/,
4767
- /mkfs\./,
4768
- /dd\s+if=.*of=\/dev\//,
4769
- /chmod\s+777\s+\//
5772
+ /\brm\s+(?:-\w+\s+)*-\w*[rf]\w*[rf]\w*\s+(?:\/|~|\$HOME)(?:\s|$)/,
5773
+ // rm -rf / , rm -fr ~
5774
+ />\s*\/dev\/[sh]d[a-z]/,
5775
+ /\bmkfs[.\s]/,
5776
+ /\bdd\s+.*\bof=\/dev\/[sh]d[a-z]/,
5777
+ /\bchmod\s+(?:-\w+\s+)*-?R?\s*777\s+\//,
5778
+ /:\(\)\s*\{\s*:\s*\|\s*:?\s*&\s*\}\s*;/
5779
+ // fork bomb :(){ :|:& };:
4770
5780
  ];
4771
5781
  for (const pattern of builtinDangerous) {
4772
5782
  if (pattern.test(command)) {
@@ -4805,16 +5815,16 @@ function resolveInWorkspace(workspaceRoot, input) {
4805
5815
  if (typeof input !== "string" || input.length === 0) {
4806
5816
  throw new WorkspaceSandboxError(String(input), workspaceRoot);
4807
5817
  }
4808
- const root = path16.resolve(workspaceRoot);
4809
- const abs = path16.isAbsolute(input) ? path16.resolve(input) : path16.resolve(root, input);
4810
- const rel = path16.relative(root, abs);
4811
- if (rel === "" || rel === ".") ; else if (rel.startsWith("..") || path16.isAbsolute(rel)) {
5818
+ const root = path18.resolve(workspaceRoot);
5819
+ const abs = path18.isAbsolute(input) ? path18.resolve(input) : path18.resolve(root, input);
5820
+ const rel = path18.relative(root, abs);
5821
+ if (rel === "" || rel === ".") ; else if (rel.startsWith("..") || path18.isAbsolute(rel)) {
4812
5822
  throw new WorkspaceSandboxError(input, root);
4813
5823
  }
4814
5824
  try {
4815
- const real = fs15.realpathSync(abs);
4816
- const realRel = path16.relative(root, real);
4817
- if (realRel !== "" && realRel !== "." && (realRel.startsWith("..") || path16.isAbsolute(realRel))) {
5825
+ const real = fs17.realpathSync(abs);
5826
+ const realRel = path18.relative(root, real);
5827
+ if (realRel !== "" && realRel !== "." && (realRel.startsWith("..") || path18.isAbsolute(realRel))) {
4818
5828
  throw new WorkspaceSandboxError(input, root);
4819
5829
  }
4820
5830
  } catch (e) {
@@ -4841,7 +5851,7 @@ var FileReadTool = class extends BaseTool {
4841
5851
  const absPath = resolveInWorkspace(this.workspaceRoot, filePath);
4842
5852
  const offset = input["offset"] ?? 1;
4843
5853
  const limit = input["limit"];
4844
- const content = await fs3.readFile(absPath, "utf-8");
5854
+ const content = await fs4.readFile(absPath, "utf-8");
4845
5855
  const lines = content.split("\n");
4846
5856
  const start = Math.max(0, offset - 1);
4847
5857
  const end = limit ? start + limit : lines.length;
@@ -4870,13 +5880,13 @@ var FileWriteTool = class extends BaseTool {
4870
5880
  const content = input["content"];
4871
5881
  if (options.saveSnapshot) {
4872
5882
  try {
4873
- const oldContent = await fs3.readFile(absPath, "utf-8");
5883
+ const oldContent = await fs4.readFile(absPath, "utf-8");
4874
5884
  await options.saveSnapshot(absPath, oldContent);
4875
5885
  } catch {
4876
5886
  }
4877
5887
  }
4878
- await fs3.mkdir(path16.dirname(absPath), { recursive: true });
4879
- await fs3.writeFile(absPath, content, "utf-8");
5888
+ await fs4.mkdir(path18.dirname(absPath), { recursive: true });
5889
+ await fs4.writeFile(absPath, content, "utf-8");
4880
5890
  return `Written ${content.length} characters to ${filePath}`;
4881
5891
  }
4882
5892
  };
@@ -4902,7 +5912,7 @@ var FileEditTool = class extends BaseTool {
4902
5912
  const oldString = input["old_string"];
4903
5913
  const newString = input["new_string"];
4904
5914
  const replaceAll = input["replace_all"] ?? false;
4905
- const rawContent = await fs3.readFile(absPath, "utf-8");
5915
+ const rawContent = await fs4.readFile(absPath, "utf-8");
4906
5916
  if (options.saveSnapshot) {
4907
5917
  await options.saveSnapshot(absPath, rawContent);
4908
5918
  }
@@ -4914,7 +5924,7 @@ var FileEditTool = class extends BaseTool {
4914
5924
  );
4915
5925
  }
4916
5926
  const updated = replaceAll ? content.split(normalizedOld).join(newString) : content.replace(normalizedOld, newString);
4917
- await fs3.writeFile(absPath, updated, "utf-8");
5927
+ await fs4.writeFile(absPath, updated, "utf-8");
4918
5928
  const count = replaceAll ? content.split(normalizedOld).length - 1 : 1;
4919
5929
  return `Replaced ${count} occurrence(s) in ${filePath}`;
4920
5930
  }
@@ -4937,12 +5947,12 @@ var FileDeleteTool = class extends BaseTool {
4937
5947
  const absPath = resolveInWorkspace(this.workspaceRoot, filePath);
4938
5948
  if (options.saveSnapshot) {
4939
5949
  try {
4940
- const oldContent = await fs3.readFile(absPath, "utf-8");
5950
+ const oldContent = await fs4.readFile(absPath, "utf-8");
4941
5951
  await options.saveSnapshot(absPath, oldContent);
4942
5952
  } catch {
4943
5953
  }
4944
5954
  }
4945
- await fs3.rm(absPath, { recursive: false });
5955
+ await fs4.rm(absPath, { recursive: false });
4946
5956
  return `Deleted ${filePath}`;
4947
5957
  }
4948
5958
  };
@@ -4959,7 +5969,7 @@ var FileListTool = class extends BaseTool {
4959
5969
  async execute(input, _options) {
4960
5970
  const inputPath = input["path"] || ".";
4961
5971
  const absPath = resolveInWorkspace(this.workspaceRoot, inputPath);
4962
- const entries = await fs3.readdir(absPath, { withFileTypes: true });
5972
+ const entries = await fs4.readdir(absPath, { withFileTypes: true });
4963
5973
  return entries.map((e) => `${e.isDirectory() ? "[DIR] " : " "}${e.name}`).join("\n") || "(empty directory)";
4964
5974
  }
4965
5975
  };
@@ -5052,6 +6062,8 @@ var GitTool = class extends BaseTool {
5052
6062
  return lines.join("\n") || "Working tree clean";
5053
6063
  }
5054
6064
  };
6065
+
6066
+ // src/tools/github.ts
5055
6067
  var GitHubTool = class extends BaseTool {
5056
6068
  name = "github";
5057
6069
  description = "Interact with GitHub or GitLab: create PRs, list issues, comment on issues.";
@@ -5076,6 +6088,34 @@ var GitHubTool = class extends BaseTool {
5076
6088
  isDangerous() {
5077
6089
  return true;
5078
6090
  }
6091
+ // ── fetch helpers (replace axios) ──────────────
6092
+ async request(url, init) {
6093
+ const res = await fetch(url, init);
6094
+ const text = await res.text();
6095
+ let data;
6096
+ try {
6097
+ data = text ? JSON.parse(text) : void 0;
6098
+ } catch {
6099
+ data = text;
6100
+ }
6101
+ if (!res.ok) {
6102
+ const err = new Error(`HTTP ${res.status}`);
6103
+ err.status = res.status;
6104
+ err.data = data;
6105
+ throw err;
6106
+ }
6107
+ return data;
6108
+ }
6109
+ apiGet(url, headers) {
6110
+ return this.request(url, { headers });
6111
+ }
6112
+ apiPost(url, body, headers) {
6113
+ return this.request(url, {
6114
+ method: "POST",
6115
+ headers: { ...headers, "Content-Type": "application/json" },
6116
+ body: JSON.stringify(body)
6117
+ });
6118
+ }
5079
6119
  async execute(input, _options) {
5080
6120
  const platform = input["platform"] ?? "github";
5081
6121
  const operation = input["operation"];
@@ -5098,10 +6138,10 @@ var GitHubTool = class extends BaseTool {
5098
6138
  }
5099
6139
  return await this.executeGitLab(operation, repo, token, input);
5100
6140
  } catch (err) {
5101
- const axiosErr = err;
5102
- if (axiosErr?.response?.status) {
5103
- const status = axiosErr.response.status;
5104
- const msg = axiosErr.response.data?.message ?? "";
6141
+ const httpErr = err;
6142
+ if (httpErr?.status) {
6143
+ const status = httpErr.status;
6144
+ const msg = httpErr.data?.message ?? "";
5105
6145
  switch (status) {
5106
6146
  case 401:
5107
6147
  return `Authentication failed: Your ${platform} token is invalid or expired. Check your token and try again.`;
@@ -5114,10 +6154,10 @@ var GitHubTool = class extends BaseTool {
5114
6154
  case 429:
5115
6155
  return `Rate limited by ${platform}. Please wait a moment before trying again.`;
5116
6156
  default:
5117
- return `${platform} API error (${status}): ${msg || (axiosErr.message ?? "Unknown error")}`;
6157
+ return `${platform} API error (${status}): ${msg || (httpErr.message ?? "Unknown error")}`;
5118
6158
  }
5119
6159
  }
5120
- return `${platform} request failed: ${axiosErr.message ?? String(err)}`;
6160
+ return `${platform} request failed: ${httpErr.message ?? String(err)}`;
5121
6161
  }
5122
6162
  }
5123
6163
  async executeGitHub(operation, repo, token, input) {
@@ -5128,35 +6168,35 @@ var GitHubTool = class extends BaseTool {
5128
6168
  const base = `https://api.github.com/repos/${repo}`;
5129
6169
  switch (operation) {
5130
6170
  case "list_issues": {
5131
- const response = await axios2.get(`${base}/issues`, { headers });
5132
- return response.data.map((i) => `#${i.number} [${i.state}] ${i.title}`).join("\n");
6171
+ const data = await this.apiGet(`${base}/issues`, headers);
6172
+ return data.map((i) => `#${i.number} [${i.state}] ${i.title}`).join("\n");
5133
6173
  }
5134
6174
  case "list_prs": {
5135
- const response = await axios2.get(`${base}/pulls`, { headers });
5136
- return response.data.map((p) => `#${p.number} [${p.state}] ${p.title} (${p.head.ref} \u2192 ${p.base.ref})`).join("\n");
6175
+ const data = await this.apiGet(`${base}/pulls`, headers);
6176
+ return data.map((p) => `#${p.number} [${p.state}] ${p.title} (${p.head.ref} \u2192 ${p.base.ref})`).join("\n");
5137
6177
  }
5138
6178
  case "create_pr": {
5139
- const response = await axios2.post(`${base}/pulls`, {
6179
+ const data = await this.apiPost(`${base}/pulls`, {
5140
6180
  title: input["title"],
5141
6181
  body: input["body"] ?? "",
5142
6182
  head: input["head"],
5143
6183
  base: input["base"] ?? "main"
5144
- }, { headers });
5145
- return `Created PR #${response.data.number}: ${response.data.html_url}`;
6184
+ }, headers);
6185
+ return `Created PR #${data.number}: ${data.html_url}`;
5146
6186
  }
5147
6187
  case "comment_issue": {
5148
6188
  const num = input["issue_number"];
5149
- await axios2.post(`${base}/issues/${num}/comments`, { body: input["body"] }, { headers });
6189
+ await this.apiPost(`${base}/issues/${num}/comments`, { body: input["body"] }, headers);
5150
6190
  return `Comment added to #${num}`;
5151
6191
  }
5152
6192
  case "get_pr": {
5153
6193
  const num = input["issue_number"];
5154
- const response = await axios2.get(`${base}/pulls/${num}`, { headers });
5155
- return `PR #${num}: ${response.data.title}
5156
- State: ${response.data.state}
5157
- ${response.data.html_url}
6194
+ const data = await this.apiGet(`${base}/pulls/${num}`, headers);
6195
+ return `PR #${num}: ${data.title}
6196
+ State: ${data.state}
6197
+ ${data.html_url}
5158
6198
 
5159
- ${response.data.body}`;
6199
+ ${data.body}`;
5160
6200
  }
5161
6201
  default:
5162
6202
  throw new Error(`Unknown GitHub operation: ${operation}`);
@@ -5168,35 +6208,35 @@ ${response.data.body}`;
5168
6208
  const base = `https://gitlab.com/api/v4/projects/${encodedRepo}`;
5169
6209
  switch (operation) {
5170
6210
  case "list_issues": {
5171
- const response = await axios2.get(`${base}/issues`, { headers });
5172
- return response.data.map((i) => `#${i.iid} [${i.state}] ${i.title}`).join("\n");
6211
+ const data = await this.apiGet(`${base}/issues`, headers);
6212
+ return data.map((i) => `#${i.iid} [${i.state}] ${i.title}`).join("\n");
5173
6213
  }
5174
6214
  case "create_pr": {
5175
- const response = await axios2.post(`${base}/merge_requests`, {
6215
+ const data = await this.apiPost(`${base}/merge_requests`, {
5176
6216
  title: input["title"],
5177
6217
  description: input["body"] ?? "",
5178
6218
  source_branch: input["head"],
5179
6219
  target_branch: input["base"] ?? "main"
5180
- }, { headers });
5181
- return `Created MR !${response.data.iid}: ${response.data.web_url}`;
6220
+ }, headers);
6221
+ return `Created MR !${data.iid}: ${data.web_url}`;
5182
6222
  }
5183
6223
  case "list_prs": {
5184
- const response = await axios2.get(`${base}/merge_requests`, { headers });
5185
- return response.data.map((p) => `!${p.iid} [${p.state}] ${p.title} (${p.source_branch} \u2192 ${p.target_branch})`).join("\n");
6224
+ const data = await this.apiGet(`${base}/merge_requests`, headers);
6225
+ return data.map((p) => `!${p.iid} [${p.state}] ${p.title} (${p.source_branch} \u2192 ${p.target_branch})`).join("\n");
5186
6226
  }
5187
6227
  case "comment_issue": {
5188
6228
  const num = input["issue_number"];
5189
- await axios2.post(`${base}/issues/${num}/notes`, { body: input["body"] }, { headers });
6229
+ await this.apiPost(`${base}/issues/${num}/notes`, { body: input["body"] }, headers);
5190
6230
  return `Comment added to #${num}`;
5191
6231
  }
5192
6232
  case "get_pr": {
5193
6233
  const num = input["issue_number"];
5194
- const response = await axios2.get(`${base}/merge_requests/${num}`, { headers });
5195
- return `MR !${num}: ${response.data.title}
5196
- State: ${response.data.state}
5197
- ${response.data.web_url}
6234
+ const data = await this.apiGet(`${base}/merge_requests/${num}`, headers);
6235
+ return `MR !${num}: ${data.title}
6236
+ State: ${data.state}
6237
+ ${data.web_url}
5198
6238
 
5199
- ${response.data.description}`;
6239
+ ${data.description}`;
5200
6240
  }
5201
6241
  default:
5202
6242
  throw new Error(`GitLab operation not supported: ${operation}`);
@@ -5342,8 +6382,8 @@ var ImageAnalyzeTool = class extends BaseTool {
5342
6382
  }
5343
6383
  };
5344
6384
  async function fileToImageAttachment(filePath) {
5345
- const data = await fs3.readFile(filePath);
5346
- const ext = path16.extname(filePath).toLowerCase();
6385
+ const data = await fs4.readFile(filePath);
6386
+ const ext = path18.extname(filePath).toLowerCase();
5347
6387
  const mimeMap = {
5348
6388
  ".jpg": "image/jpeg",
5349
6389
  ".jpeg": "image/jpeg",
@@ -5377,14 +6417,14 @@ var PDFCreateTool = class extends BaseTool {
5377
6417
  const filePath = input["path"];
5378
6418
  const content = input["content"];
5379
6419
  const title = input["title"];
5380
- const dir = path16.dirname(filePath);
5381
- if (!fs15.existsSync(dir)) {
5382
- fs15.mkdirSync(dir, { recursive: true });
6420
+ const dir = path18.dirname(filePath);
6421
+ if (!fs17.existsSync(dir)) {
6422
+ fs17.mkdirSync(dir, { recursive: true });
5383
6423
  }
5384
6424
  return new Promise((resolve, reject) => {
5385
6425
  try {
5386
6426
  const doc = new PDFDocument({ margin: 50 });
5387
- const stream = fs15.createWriteStream(filePath);
6427
+ const stream = fs17.createWriteStream(filePath);
5388
6428
  doc.pipe(stream);
5389
6429
  if (title) {
5390
6430
  doc.info["Title"] = title;
@@ -5462,24 +6502,22 @@ var CodeInterpreterTool = class extends BaseTool {
5462
6502
  }
5463
6503
  cmdPrefix = NODE_CMD;
5464
6504
  }
5465
- const tmpDir = path16.join(process.cwd(), ".cascade", "tmp");
5466
- if (!fs15.existsSync(tmpDir)) {
5467
- fs15.mkdirSync(tmpDir, { recursive: true });
6505
+ const tmpDir = path18.join(this.workspaceRoot, ".cascade", "tmp");
6506
+ if (!fs17.existsSync(tmpDir)) {
6507
+ fs17.mkdirSync(tmpDir, { recursive: true });
5468
6508
  }
5469
6509
  const extension = language === "python" ? "py" : "js";
5470
6510
  const fileName = `intp_${randomUUID().slice(0, 8)}.${extension}`;
5471
- const filePath = path16.join(tmpDir, fileName);
5472
- fs15.writeFileSync(filePath, code, "utf-8");
5473
- const quotedPath = `"${filePath}"`;
5474
- const quotedArgs = args.map((a) => `"${a}"`).join(" ");
5475
- const fullCmd = `${cmdPrefix} ${quotedPath}${quotedArgs ? " " + quotedArgs : ""}`;
6511
+ const filePath = path18.join(tmpDir, fileName);
6512
+ fs17.writeFileSync(filePath, code, "utf-8");
6513
+ const execArgs = [filePath, ...args];
5476
6514
  return new Promise((resolve) => {
5477
6515
  const startMs = Date.now();
5478
- exec(fullCmd, { cwd: process.cwd(), timeout: 3e4 }, (error, stdout, stderr) => {
6516
+ execFile(cmdPrefix, execArgs, { cwd: this.workspaceRoot, timeout: 3e4 }, (error, stdout, stderr) => {
5479
6517
  const duration = Date.now() - startMs;
5480
6518
  try {
5481
- if (fs15.existsSync(filePath)) {
5482
- fs15.unlinkSync(filePath);
6519
+ if (fs17.existsSync(filePath)) {
6520
+ fs17.unlinkSync(filePath);
5483
6521
  }
5484
6522
  } catch (cleanupErr) {
5485
6523
  console.error(`Failed to cleanup interpreter script ${filePath}:`, cleanupErr);
@@ -5758,7 +6796,7 @@ var GlobTool = class extends BaseTool {
5758
6796
  };
5759
6797
  async execute(input, _options) {
5760
6798
  const pattern = input["pattern"];
5761
- const searchPath = input["path"] ? path16.resolve(this.workspaceRoot, input["path"]) : this.workspaceRoot;
6799
+ const searchPath = input["path"] ? path18.resolve(this.workspaceRoot, input["path"]) : this.workspaceRoot;
5762
6800
  const matches = await glob(pattern, {
5763
6801
  cwd: searchPath,
5764
6802
  ignore: ["node_modules/**", ".git/**", "dist/**", "build/**"],
@@ -5771,7 +6809,7 @@ var GlobTool = class extends BaseTool {
5771
6809
  const withMtime = await Promise.all(
5772
6810
  matches.map(async (rel) => {
5773
6811
  try {
5774
- const stat = await fs3.stat(path16.join(searchPath, rel));
6812
+ const stat = await fs4.stat(path18.join(searchPath, rel));
5775
6813
  return { rel, mtime: stat.mtimeMs };
5776
6814
  } catch {
5777
6815
  return { rel, mtime: 0 };
@@ -5820,7 +6858,7 @@ var GrepTool = class extends BaseTool {
5820
6858
  };
5821
6859
  async execute(input, _options) {
5822
6860
  const pattern = input["pattern"];
5823
- const searchPath = input["path"] ? path16.resolve(this.workspaceRoot, input["path"]) : this.workspaceRoot;
6861
+ const searchPath = input["path"] ? path18.resolve(this.workspaceRoot, input["path"]) : this.workspaceRoot;
5824
6862
  const globPattern = input["glob"];
5825
6863
  const outputMode = input["output_mode"] ?? "content";
5826
6864
  const context = input["context"] ?? 0;
@@ -5874,15 +6912,15 @@ var GrepTool = class extends BaseTool {
5874
6912
  nodir: true
5875
6913
  });
5876
6914
  } catch {
5877
- files = [path16.relative(searchPath, searchPath) || "."];
6915
+ files = [path18.relative(searchPath, searchPath) || "."];
5878
6916
  }
5879
6917
  const results = [];
5880
6918
  let totalCount = 0;
5881
6919
  for (const rel of files) {
5882
- const abs = path16.join(searchPath, rel);
6920
+ const abs = path18.join(searchPath, rel);
5883
6921
  let content;
5884
6922
  try {
5885
- content = await fs3.readFile(abs, "utf-8");
6923
+ content = await fs4.readFile(abs, "utf-8");
5886
6924
  } catch {
5887
6925
  continue;
5888
6926
  }
@@ -5920,6 +6958,92 @@ Total: ${totalCount} matches`);
5920
6958
  return results.join("\n");
5921
6959
  }
5922
6960
  };
6961
+ var SsrfBlockedError = class extends Error {
6962
+ constructor(message) {
6963
+ super(message);
6964
+ this.name = "SsrfBlockedError";
6965
+ }
6966
+ };
6967
+ var ALLOWED_PROTOCOLS = /* @__PURE__ */ new Set(["http:", "https:"]);
6968
+ var MAX_REDIRECTS = 5;
6969
+ function allowLocal() {
6970
+ return process.env["CASCADE_ALLOW_LOCAL_FETCH"] === "1";
6971
+ }
6972
+ function isPrivateAddress(ip) {
6973
+ const type = net.isIP(ip);
6974
+ if (type === 4) return isPrivateIPv4(ip);
6975
+ if (type === 6) return isPrivateIPv6(ip);
6976
+ return true;
6977
+ }
6978
+ function isPrivateIPv4(ip) {
6979
+ const parts = ip.split(".").map((p) => Number(p));
6980
+ if (parts.length !== 4 || parts.some((p) => Number.isNaN(p) || p < 0 || p > 255)) return true;
6981
+ const [a, b] = parts;
6982
+ if (a === 0) return true;
6983
+ if (a === 10) return true;
6984
+ if (a === 127) return true;
6985
+ if (a === 169 && b === 254) return true;
6986
+ if (a === 172 && b >= 16 && b <= 31) return true;
6987
+ if (a === 192 && b === 168) return true;
6988
+ if (a === 100 && b >= 64 && b <= 127) return true;
6989
+ if (a >= 224) return true;
6990
+ return false;
6991
+ }
6992
+ function isPrivateIPv6(ip) {
6993
+ const lower = ip.toLowerCase().replace(/^\[|\]$/g, "");
6994
+ if (lower === "::1" || lower === "::") return true;
6995
+ if (lower.startsWith("fe80")) return true;
6996
+ if (lower.startsWith("fc") || lower.startsWith("fd")) return true;
6997
+ const mapped = /^::ffff:(\d+\.\d+\.\d+\.\d+)$/.exec(lower);
6998
+ if (mapped) return isPrivateIPv4(mapped[1]);
6999
+ return false;
7000
+ }
7001
+ async function assertPublicUrl(rawUrl) {
7002
+ let url;
7003
+ try {
7004
+ url = new URL(rawUrl);
7005
+ } catch {
7006
+ throw new SsrfBlockedError(`Invalid URL: ${rawUrl}`);
7007
+ }
7008
+ if (!ALLOWED_PROTOCOLS.has(url.protocol)) {
7009
+ throw new SsrfBlockedError(`Blocked URL scheme "${url.protocol}" \u2014 only http and https are allowed.`);
7010
+ }
7011
+ if (allowLocal()) return url;
7012
+ const host = url.hostname.replace(/^\[|\]$/g, "");
7013
+ if (net.isIP(host)) {
7014
+ if (isPrivateAddress(host)) {
7015
+ throw new SsrfBlockedError(`Blocked request to non-public address ${host}.`);
7016
+ }
7017
+ return url;
7018
+ }
7019
+ if (host === "localhost" || host.endsWith(".localhost") || host.endsWith(".local")) {
7020
+ throw new SsrfBlockedError(`Blocked request to local hostname "${host}".`);
7021
+ }
7022
+ let addresses;
7023
+ try {
7024
+ const records = await dns.lookup(host, { all: true });
7025
+ addresses = records.map((r) => r.address);
7026
+ } catch {
7027
+ throw new SsrfBlockedError(`Could not resolve host "${host}".`);
7028
+ }
7029
+ if (addresses.length === 0 || addresses.some((addr) => isPrivateAddress(addr))) {
7030
+ throw new SsrfBlockedError(`Blocked request to "${host}" \u2014 resolves to a non-public address.`);
7031
+ }
7032
+ return url;
7033
+ }
7034
+ async function safeFetch(rawUrl, init = {}) {
7035
+ let currentUrl = (await assertPublicUrl(rawUrl)).toString();
7036
+ for (let i = 0; i <= MAX_REDIRECTS; i++) {
7037
+ const resp = await fetch(currentUrl, { ...init, redirect: "manual" });
7038
+ if (resp.status < 300 || resp.status >= 400) return resp;
7039
+ const location = resp.headers.get("location");
7040
+ if (!location) return resp;
7041
+ const next = new URL(location, currentUrl);
7042
+ await assertPublicUrl(next.toString());
7043
+ currentUrl = next.toString();
7044
+ }
7045
+ throw new SsrfBlockedError(`Too many redirects (>${MAX_REDIRECTS}).`);
7046
+ }
5923
7047
 
5924
7048
  // src/tools/web-fetch.ts
5925
7049
  var MAX_CHARS = 5e4;
@@ -5953,15 +7077,17 @@ var WebFetchTool = class extends BaseTool {
5953
7077
  const url = input["url"];
5954
7078
  let resp;
5955
7079
  try {
5956
- resp = await fetch(url, {
7080
+ resp = await safeFetch(url, {
5957
7081
  headers: {
5958
7082
  "User-Agent": "Cascade-AI/1.0 WebFetchTool",
5959
7083
  Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,text/plain;q=0.8,*/*;q=0.5"
5960
7084
  },
5961
- signal: AbortSignal.timeout(TIMEOUT_MS),
5962
- redirect: "follow"
7085
+ signal: AbortSignal.timeout(TIMEOUT_MS)
5963
7086
  });
5964
7087
  } catch (err) {
7088
+ if (err instanceof SsrfBlockedError) {
7089
+ return `Refused to fetch ${url}: ${err.message}`;
7090
+ }
5965
7091
  return `Failed to fetch ${url}: ${err instanceof Error ? err.message : String(err)}`;
5966
7092
  }
5967
7093
  if (!resp.ok) {
@@ -6153,10 +7279,10 @@ var ToolRegistry = class extends EventEmitter {
6153
7279
  }
6154
7280
  isIgnored(filePath) {
6155
7281
  if (!filePath) return false;
6156
- const abs = path16.resolve(this.workspaceRoot, filePath);
6157
- const rel = path16.relative(this.workspaceRoot, abs);
6158
- if (!rel || rel.startsWith("..") || path16.isAbsolute(rel)) return true;
6159
- const posixRel = rel.split(path16.sep).join("/");
7282
+ const abs = path18.resolve(this.workspaceRoot, filePath);
7283
+ const rel = path18.relative(this.workspaceRoot, abs);
7284
+ if (!rel || rel.startsWith("..") || path18.isAbsolute(rel)) return true;
7285
+ const posixRel = rel.split(path18.sep).join("/");
6160
7286
  return this.ignoreMatcher.ignores(posixRel);
6161
7287
  }
6162
7288
  };
@@ -6180,9 +7306,11 @@ var McpClient = class _McpClient {
6180
7306
  tools = /* @__PURE__ */ new Map();
6181
7307
  trustedServers;
6182
7308
  approvalCallback;
7309
+ onWarn;
6183
7310
  constructor(options = {}) {
6184
7311
  this.trustedServers = new Set(options.trustedServers ?? []);
6185
7312
  this.approvalCallback = options.approvalCallback;
7313
+ this.onWarn = options.onWarn ?? ((message) => console.warn(message));
6186
7314
  }
6187
7315
  async connect(server) {
6188
7316
  if (!this.trustedServers.has(server.name)) {
@@ -6211,7 +7339,7 @@ var McpClient = class _McpClient {
6211
7339
  for (const tool of toolsResult.tools) {
6212
7340
  for (const existing of this.tools.values()) {
6213
7341
  if (existing.name === tool.name && existing.serverName !== server.name) {
6214
- console.warn(
7342
+ this.onWarn(
6215
7343
  `[mcp] Tool "${tool.name}" is exposed by both "${existing.serverName}" and "${server.name}". Cascade disambiguates internally via mcp::<server>::<tool>.`
6216
7344
  );
6217
7345
  break;
@@ -6291,6 +7419,19 @@ var PermissionEscalator = class extends EventEmitter {
6291
7419
  t1Evaluator;
6292
7420
  /** Pending user-decision resolvers keyed by request ID */
6293
7421
  pendingUserDecisions = /* @__PURE__ */ new Map();
7422
+ /** ms to wait for a user approval decision before denying for safety. */
7423
+ approvalTimeoutMs;
7424
+ /** Autonomous mode (autonomy: 'auto'): non-dangerous tools auto-approve. */
7425
+ autonomous;
7426
+ constructor(approvalTimeoutMs = 6e5, autonomous = false) {
7427
+ super();
7428
+ this.approvalTimeoutMs = approvalTimeoutMs;
7429
+ this.autonomous = autonomous;
7430
+ }
7431
+ /** Toggle autonomous auto-approval at runtime (e.g. from /auto). */
7432
+ setAutonomous(on) {
7433
+ this.autonomous = on;
7434
+ }
6294
7435
  setT2Evaluator(evaluator) {
6295
7436
  this.t2Evaluator = evaluator;
6296
7437
  }
@@ -6303,7 +7444,7 @@ var PermissionEscalator = class extends EventEmitter {
6303
7444
  */
6304
7445
  async requestPermission(req) {
6305
7446
  const cacheKey = `${req.parentT2Id}:${req.toolName}`;
6306
- if (this.sessionCache.has(cacheKey)) {
7447
+ if (!req.forceReprompt && this.sessionCache.has(cacheKey)) {
6307
7448
  return {
6308
7449
  requestId: req.id,
6309
7450
  approved: this.sessionCache.get(cacheKey),
@@ -6323,6 +7464,15 @@ var PermissionEscalator = class extends EventEmitter {
6323
7464
  this.sessionCache.set(cacheKey, true);
6324
7465
  return decision;
6325
7466
  }
7467
+ if (this.autonomous && !req.isDangerous) {
7468
+ return {
7469
+ requestId: req.id,
7470
+ approved: true,
7471
+ always: false,
7472
+ decidedBy: "T1",
7473
+ reasoning: "Autonomous mode \u2014 non-dangerous tool auto-approved"
7474
+ };
7475
+ }
6326
7476
  if (this.t2Evaluator) {
6327
7477
  try {
6328
7478
  const t2Decision = await this.t2Evaluator(req);
@@ -6363,13 +7513,28 @@ var PermissionEscalator = class extends EventEmitter {
6363
7513
  }
6364
7514
  waitForUserDecision(req) {
6365
7515
  return new Promise((resolve) => {
7516
+ let timer;
6366
7517
  const wrappedResolver = (decision) => {
7518
+ if (timer) clearTimeout(timer);
6367
7519
  if (decision.always) {
6368
7520
  this.sessionCache.set(`${req.parentT2Id}:${req.toolName}`, decision.approved);
6369
7521
  }
6370
7522
  resolve(decision);
6371
7523
  };
6372
7524
  this.pendingUserDecisions.set(req.id, wrappedResolver);
7525
+ if (this.approvalTimeoutMs > 0 && Number.isFinite(this.approvalTimeoutMs)) {
7526
+ timer = setTimeout(() => {
7527
+ if (this.pendingUserDecisions.delete(req.id)) {
7528
+ resolve({
7529
+ requestId: req.id,
7530
+ approved: false,
7531
+ decidedBy: "USER",
7532
+ reasoning: `Approval timed out after ${this.approvalTimeoutMs}ms \u2014 denied for safety`
7533
+ });
7534
+ }
7535
+ }, this.approvalTimeoutMs);
7536
+ timer.unref?.();
7537
+ }
6373
7538
  this.emit("permission:user-required", req);
6374
7539
  });
6375
7540
  }
@@ -6387,11 +7552,14 @@ var PermissionEscalator = class extends EventEmitter {
6387
7552
  };
6388
7553
  var ProviderConfigSchema = z.object({
6389
7554
  type: z.enum(["anthropic", "openai", "gemini", "azure", "openai-compatible", "ollama"]),
7555
+ label: z.string().optional(),
6390
7556
  apiKey: z.string().optional(),
6391
7557
  baseUrl: z.string().url().optional(),
6392
7558
  deploymentName: z.string().optional(),
6393
7559
  apiVersion: z.string().optional(),
6394
- model: z.string().optional()
7560
+ model: z.string().optional(),
7561
+ authToken: z.string().optional(),
7562
+ credentialSource: z.string().optional()
6395
7563
  });
6396
7564
  var ModelOverridesSchema = z.object({
6397
7565
  t1: z.string().optional(),
@@ -6421,10 +7589,12 @@ var ToolsConfigSchema = z.object({
6421
7589
  requireApprovalFor: z.array(z.string()).default([]),
6422
7590
  browserEnabled: z.boolean().default(false),
6423
7591
  mcpServers: z.array(McpServerConfigSchema).optional(),
7592
+ mcpTrusted: z.array(z.string()).optional(),
6424
7593
  /** Web search backends — at least one should be configured for best results */
6425
7594
  webSearch: WebSearchConfigSchema.optional()
6426
7595
  });
6427
7596
  var HookDefinitionSchema = z.object({
7597
+ name: z.string().optional(),
6428
7598
  command: z.string(),
6429
7599
  tools: z.array(z.string()).optional(),
6430
7600
  timeout: z.number().optional()
@@ -6437,6 +7607,13 @@ var HooksConfigSchema = z.object({
6437
7607
  });
6438
7608
  var DashboardConfigSchema = z.object({
6439
7609
  port: z.number().default(4891),
7610
+ /**
7611
+ * Interface to bind the dashboard HTTP/WebSocket server to. Defaults to
7612
+ * loopback so the dashboard — which exposes /api/run (arbitrary task
7613
+ * execution) and config mutation — is never reachable from the network
7614
+ * unless the operator explicitly opts in (e.g. "0.0.0.0" for team mode).
7615
+ */
7616
+ host: z.string().default("127.0.0.1"),
6440
7617
  auth: z.boolean().default(true),
6441
7618
  teamMode: z.enum(["single", "multi"]).default("single"),
6442
7619
  secret: z.string().optional()
@@ -6459,6 +7636,15 @@ var TierLimitsSchema = z.object({
6459
7636
  var BudgetConfigSchema = z.object({
6460
7637
  dailyBudgetUsd: z.number().optional(),
6461
7638
  sessionBudgetUsd: z.number().optional(),
7639
+ /**
7640
+ * Hard per-task token ceiling. A single `cascade run` is stopped once its
7641
+ * combined token usage crosses this, so a mis-routed trivial task can never
7642
+ * fan out into a runaway multi-agent burn. Resets every run. Raise it for
7643
+ * genuinely large jobs. Defaults to 200k.
7644
+ */
7645
+ maxTokensPerRun: z.number().int().positive().default(2e5),
7646
+ /** Optional hard per-task cost ceiling (USD). Unset = only the token cap applies. */
7647
+ maxCostPerRunUsd: z.number().positive().optional(),
6462
7648
  warnAtPct: z.number().default(80)
6463
7649
  });
6464
7650
  var WorkspaceConfigSchema = z.object({
@@ -6487,6 +7673,32 @@ var CascadeConfigSchema = z.object({
6487
7673
  * Heuristic-first with AI inference fallback (adds ~0–500ms per task).
6488
7674
  */
6489
7675
  cascadeAuto: z.boolean().default(false),
7676
+ /**
7677
+ * Cascade Auto trade-off bias when picking a model for a task:
7678
+ * - 'balanced' (default): quality × cost-efficiency — cheap models win
7679
+ * trivial tasks, strong models win hard ones.
7680
+ * - 'quality': pick the highest-benchmark model; cost only breaks ties.
7681
+ * - 'cost': pick the cheapest model that clears a per-task quality floor.
7682
+ */
7683
+ autoBias: z.enum(["balanced", "quality", "cost"]).default("balanced"),
7684
+ /**
7685
+ * Public-benchmark data source for Cascade Auto. All fields have safe
7686
+ * defaults so zero config "just works" — live data is fetched in the
7687
+ * background and the bundled snapshot is used until it arrives (or offline).
7688
+ */
7689
+ benchmarks: z.object({
7690
+ /** Fetch current quality scores from a public source. Default: true. */
7691
+ live: z.boolean().default(true),
7692
+ /** How long a fetched snapshot stays fresh before re-fetching (hours). */
7693
+ refreshHours: z.number().min(0).default(24),
7694
+ /**
7695
+ * Override the quality-benchmark source URL (must return the snapshot
7696
+ * JSON shape). When unset, the maintained GitHub-raw snapshot is used.
7697
+ */
7698
+ sourceUrl: z.string().url().optional(),
7699
+ /** Fetch current per-token prices from OpenRouter (free, no key). */
7700
+ pricingLive: z.boolean().default(true)
7701
+ }).default({}),
6490
7702
  /**
6491
7703
  * Runtime Tool Creation: when true, T3 workers can generate and register new tools
6492
7704
  * at runtime via the ToolCreator when no existing tool can handle a required operation.
@@ -6494,6 +7706,13 @@ var CascadeConfigSchema = z.object({
6494
7706
  * HTTP calls from generated tools require approval.
6495
7707
  */
6496
7708
  enableToolCreation: z.boolean().default(true),
7709
+ /**
7710
+ * Persist runtime-generated tools to .cascade/dynamic-tools.json and reload them
7711
+ * on startup for cross-run dedup. Reloaded (and peer-received) tools are always
7712
+ * treated as UNTRUSTED — their dangerous actions re-escalate. Set false to disable
7713
+ * persistence entirely.
7714
+ */
7715
+ persistDynamicTools: z.boolean().default(true),
6497
7716
  /**
6498
7717
  * External plugin paths or npm package names to load at startup.
6499
7718
  * Each entry must export a default ToolPlugin object.
@@ -6510,7 +7729,89 @@ var CascadeConfigSchema = z.object({
6510
7729
  * Timeout in milliseconds for a single local model inference call.
6511
7730
  * Local models can take minutes for large parameter counts. Default: 5 minutes.
6512
7731
  */
6513
- localInferenceTimeoutMs: z.number().int().min(1e3).default(3e5)
7732
+ localInferenceTimeoutMs: z.number().int().min(1e3).default(3e5),
7733
+ /**
7734
+ * Timeout (ms) for a single cloud LLM call (streaming or not). Guards against
7735
+ * a stalled provider stream hanging the whole run with no output. On timeout
7736
+ * the call errors and the worker escalates. Default: 2 minutes.
7737
+ */
7738
+ cloudInferenceTimeoutMs: z.number().int().min(1e3).default(12e4),
7739
+ /**
7740
+ * Timeout (ms) for a tool-approval decision. If no decision arrives in time the
7741
+ * request is DENIED (never auto-approved) so the run continues rather than
7742
+ * hanging on an unanswered prompt. Default: 10 minutes.
7743
+ */
7744
+ approvalTimeoutMs: z.number().int().min(1e3).default(6e5),
7745
+ /**
7746
+ * Boardroom plan approval: pause after the plan is produced so the user can
7747
+ * review the org chart (sections, workers, estimated cost) before any worker
7748
+ * spawns. Scope:
7749
+ * 'never' — never pause (default; no behavior change).
7750
+ * 'complex' — pause Complex runs only ('always' is kept as an alias).
7751
+ * 'all' — pause Moderate and Complex runs.
7752
+ * Headless/SDK consumers without a listener auto-approve, so pausing is safe
7753
+ * outside the TUI.
7754
+ */
7755
+ planApproval: z.enum(["never", "complex", "all", "always"]).default("never"),
7756
+ /**
7757
+ * Plan-review behaviour for the boardroom gate:
7758
+ * autoReviewer — a reviewer model critiques the plan (gaps/risks/cost)
7759
+ * before you see it, and the critique is shown in the dialog.
7760
+ * editable — allow editing the plan (drop sections) in the dialog.
7761
+ * maxRevisionRounds — how many steering-note → re-plan → re-ask rounds the
7762
+ * boardroom allows before proceeding with the last plan.
7763
+ */
7764
+ planReview: z.object({
7765
+ autoReviewer: z.boolean().default(false),
7766
+ editable: z.boolean().default(true),
7767
+ maxRevisionRounds: z.number().int().min(1).max(20).default(5)
7768
+ }).default({}),
7769
+ /**
7770
+ * Autonomy level. 'manual' (default): plan + tool approvals prompt as usual.
7771
+ * 'auto': hands-off — the plan gate auto-approves and the escalator
7772
+ * auto-approves NON-dangerous tools, while dangerous tools still escalate and
7773
+ * budget caps remain the hard stop. Toggle at runtime with /auto.
7774
+ */
7775
+ autonomy: z.enum(["manual", "auto"]).default("manual"),
7776
+ /**
7777
+ * Max corrective re-plan passes T1's reviewer runs before returning the best
7778
+ * partial result. The run also stops early when a pass makes no net progress.
7779
+ */
7780
+ maxReplanPasses: z.number().int().min(0).max(10).default(2),
7781
+ /**
7782
+ * Reflection / self-critique. When enabled, after a worker's pass/fail self-test
7783
+ * succeeds it runs a goal-alignment critique and revises once if the output is
7784
+ * weak against the broader goal (not just the subtask spec). Off by default — it
7785
+ * adds an LLM call per worker.
7786
+ */
7787
+ reflection: z.object({
7788
+ enabled: z.boolean().default(false),
7789
+ maxRounds: z.number().int().min(1).max(3).default(1)
7790
+ }).default({}),
7791
+ /**
7792
+ * T3 worker execution within a dependency wave:
7793
+ * 'auto' (default) — sequential when the T3 tier is a LOCAL model (a single
7794
+ * GPU serializes anyway, so parallel just thrashes the queue), parallel for
7795
+ * cloud models.
7796
+ * 'parallel' / 'sequential' — force it.
7797
+ */
7798
+ t3Execution: z.enum(["auto", "parallel", "sequential"]).default("auto"),
7799
+ /**
7800
+ * T3→T2 reinforcement: when enabled, a worker that discovers its subtask should
7801
+ * fan out can call the `request_workers` tool to have its T2 manager spawn
7802
+ * sibling workers for the new pieces (no 4th tier; bounded). Off by default.
7803
+ */
7804
+ reinforcements: z.object({
7805
+ enabled: z.boolean().default(false),
7806
+ maxPerSection: z.number().int().min(1).max(20).default(4)
7807
+ }).default({}),
7808
+ /**
7809
+ * Render the TUI in the terminal's alternate screen buffer (like vim).
7810
+ * Flicker-proof and restores the shell on exit, but native scrollback is
7811
+ * unavailable — history scrolls in-app with PgUp/PgDn. Also enabled per
7812
+ * session with the --alt-screen flag. Default: off.
7813
+ */
7814
+ altScreen: z.boolean().default(false)
6514
7815
  });
6515
7816
 
6516
7817
  // src/config/validate.ts
@@ -6648,14 +7949,20 @@ var TASK_TYPE_TAGS = {
6648
7949
  };
6649
7950
  var TaskAnalyzer = class {
6650
7951
  tracker;
7952
+ bias;
6651
7953
  lastProfile = null;
6652
7954
  lastSelectedModels = /* @__PURE__ */ new Map();
6653
- constructor(tracker) {
7955
+ constructor(tracker, bias = "balanced") {
6654
7956
  this.tracker = tracker;
7957
+ this.bias = bias;
6655
7958
  }
6656
7959
  setTracker(tracker) {
6657
7960
  this.tracker = tracker;
6658
7961
  }
7962
+ /** Change the cost/quality bias at runtime (e.g. when config reloads). */
7963
+ setBias(bias) {
7964
+ this.bias = bias;
7965
+ }
6659
7966
  /** Returns the TaskProfile from the most recent analyze() call — used for outcome recording. */
6660
7967
  getLastProfile() {
6661
7968
  return this.lastProfile;
@@ -6715,7 +8022,16 @@ var TaskAnalyzer = class {
6715
8022
  const perf = this.tracker?.performanceScore(model.id, profile.type) ?? 0.5;
6716
8023
  const costEff = this.costEfficiency(model, profile.complexity);
6717
8024
  const match = this.taskMatchScore(model, profile);
6718
- return perf * costEff * match;
8025
+ const benchmark = 0.3 + 0.7 * benchmarkScore01(model, profile.type);
8026
+ switch (this.bias) {
8027
+ case "quality":
8028
+ return perf * match * benchmark ** 2 * (0.85 + 0.15 * costEff);
8029
+ case "cost":
8030
+ return perf * match * costEff ** 1.5 * Math.sqrt(benchmark);
8031
+ case "balanced":
8032
+ default:
8033
+ return perf * costEff * match * benchmark;
8034
+ }
6719
8035
  }
6720
8036
  costEfficiency(model, complexity) {
6721
8037
  if (this.tracker) return this.tracker.costEfficiencyScore(model, complexity);
@@ -6735,7 +8051,7 @@ var TaskAnalyzer = class {
6735
8051
  analysisCache.clear();
6736
8052
  }
6737
8053
  };
6738
- var DEFAULT_STATS_FILE = path16.join(os3.homedir(), ".cascade", "model-perf.json");
8054
+ var DEFAULT_STATS_FILE = path18.join(os4.homedir(), ".cascade", "model-perf.json");
6739
8055
  var ModelPerformanceTracker = class {
6740
8056
  stats = /* @__PURE__ */ new Map();
6741
8057
  statsFile;
@@ -6747,7 +8063,7 @@ var ModelPerformanceTracker = class {
6747
8063
  if (this.loaded) return;
6748
8064
  this.loaded = true;
6749
8065
  try {
6750
- const raw = await fs3.readFile(this.statsFile, "utf-8");
8066
+ const raw = await fs4.readFile(this.statsFile, "utf-8");
6751
8067
  const parsed = JSON.parse(raw);
6752
8068
  for (const [key, stat] of Object.entries(parsed)) {
6753
8069
  this.stats.set(key, stat);
@@ -6757,10 +8073,10 @@ var ModelPerformanceTracker = class {
6757
8073
  }
6758
8074
  async save() {
6759
8075
  try {
6760
- await fs3.mkdir(path16.dirname(this.statsFile), { recursive: true });
8076
+ await fs4.mkdir(path18.dirname(this.statsFile), { recursive: true });
6761
8077
  const obj = {};
6762
8078
  for (const [key, stat] of this.stats) obj[key] = stat;
6763
- await fs3.writeFile(this.statsFile, JSON.stringify(obj, null, 2), "utf-8");
8079
+ await fs4.writeFile(this.statsFile, JSON.stringify(obj, null, 2), "utf-8");
6764
8080
  } catch {
6765
8081
  }
6766
8082
  }
@@ -6808,6 +8124,96 @@ var ModelPerformanceTracker = class {
6808
8124
  return Math.max(0.1, 1 - normalised * complexityWeight);
6809
8125
  }
6810
8126
  };
8127
+ var DYNAMIC_TOOLS_FILE = "dynamic-tools.json";
8128
+ function normalizeToolSchema(schema) {
8129
+ if (schema && schema["type"] === "object" && typeof schema["properties"] === "object") {
8130
+ return schema;
8131
+ }
8132
+ const properties = schema && typeof schema === "object" ? schema : {};
8133
+ return {
8134
+ type: "object",
8135
+ properties,
8136
+ required: Object.keys(properties)
8137
+ };
8138
+ }
8139
+ function capabilityKey(text) {
8140
+ return Array.from(
8141
+ new Set((text.toLowerCase().match(/[a-z0-9]+/g) ?? []).filter((w) => w.length > 2))
8142
+ ).sort().join(" ");
8143
+ }
8144
+ var DYNAMIC_TOOL_TIMEOUT_MS = 15e3;
8145
+ var DYNAMIC_FETCH_MAX = 1e6;
8146
+ var HARNESS_SRC = `
8147
+ const { parentPort, workerData } = require('node:worker_threads');
8148
+ const { executeCode, input } = workerData;
8149
+ let nextId = 0;
8150
+ const pending = new Map();
8151
+ function bridge(kind, payload) {
8152
+ return new Promise((resolve, reject) => {
8153
+ const id = nextId++;
8154
+ pending.set(id, { resolve, reject });
8155
+ parentPort.postMessage(Object.assign({ kind, id }, payload));
8156
+ });
8157
+ }
8158
+ parentPort.on('message', (msg) => {
8159
+ const p = pending.get(msg.id);
8160
+ if (!p) return;
8161
+ pending.delete(msg.id);
8162
+ if (msg.error !== undefined) p.reject(new Error(msg.error));
8163
+ else p.resolve(msg.value);
8164
+ });
8165
+ const callTool = (name, toolInput) => bridge('callTool', { name: name, input: toolInput });
8166
+ const fetch = async (url, init) => {
8167
+ const safeInit = init && typeof init === 'object'
8168
+ ? { method: init.method, headers: init.headers, body: typeof init.body === 'string' ? init.body : undefined }
8169
+ : undefined;
8170
+ const r = await bridge('fetch', { url: url, init: safeInit });
8171
+ return {
8172
+ ok: r.ok, status: r.status, statusText: r.statusText,
8173
+ headers: { get: (k) => (String(k).toLowerCase() === 'content-type' ? r.contentType : null) },
8174
+ text: async () => r.body,
8175
+ json: async () => JSON.parse(r.body),
8176
+ };
8177
+ };
8178
+ (async () => {
8179
+ const AsyncFunction = Object.getPrototypeOf(async function () {}).constructor;
8180
+ const fn = new AsyncFunction('input', 'callTool', 'fetch', 'console', executeCode);
8181
+ return await fn(input, callTool, fetch, { log() {}, error() {} });
8182
+ })()
8183
+ .then((r) => parentPort.postMessage({ kind: 'result', value: String(r == null ? '' : r) }))
8184
+ .catch((e) => parentPort.postMessage({ kind: 'result', value: 'Tool error: ' + (e && e.message ? e.message : String(e)) }));
8185
+ `;
8186
+ function isExecutableToolCode(code) {
8187
+ try {
8188
+ const AsyncFunction = Object.getPrototypeOf(async function() {
8189
+ }).constructor;
8190
+ new AsyncFunction("input", "callTool", "fetch", "console", code);
8191
+ return true;
8192
+ } catch {
8193
+ return false;
8194
+ }
8195
+ }
8196
+ async function bridgeFetch(url, init) {
8197
+ try {
8198
+ const i = init && typeof init === "object" ? init : {};
8199
+ const resp = await safeFetch(url, {
8200
+ method: typeof i["method"] === "string" ? i["method"] : void 0,
8201
+ headers: i["headers"],
8202
+ body: typeof i["body"] === "string" ? i["body"] : void 0
8203
+ });
8204
+ const contentType = resp.headers.get("content-type") ?? "";
8205
+ let body = "";
8206
+ try {
8207
+ body = await resp.text();
8208
+ } catch {
8209
+ body = "";
8210
+ }
8211
+ if (body.length > DYNAMIC_FETCH_MAX) body = body.slice(0, DYNAMIC_FETCH_MAX);
8212
+ return { ok: resp.ok, status: resp.status, statusText: resp.statusText, contentType, body };
8213
+ } catch (err) {
8214
+ return { __error: err instanceof Error ? err.message : String(err) };
8215
+ }
8216
+ }
6811
8217
  var DynamicTool = class extends BaseTool {
6812
8218
  name;
6813
8219
  description;
@@ -6815,8 +8221,12 @@ var DynamicTool = class extends BaseTool {
6815
8221
  executeCode;
6816
8222
  _isDangerous;
6817
8223
  registry;
6818
- escalator;
6819
- constructor(spec, registry, escalator) {
8224
+ /** Resolve the CURRENT escalator at call time — covers tools registered before
8225
+ * the per-run escalator was wired (persisted at init, received from a peer). */
8226
+ getEscalator;
8227
+ /** Untrusted = loaded from disk / a peer; its dangerous calls always re-prompt. */
8228
+ trusted;
8229
+ constructor(spec, registry, getEscalator, trusted) {
6820
8230
  super();
6821
8231
  this.name = spec.name;
6822
8232
  this.description = spec.description;
@@ -6824,32 +8234,35 @@ var DynamicTool = class extends BaseTool {
6824
8234
  this.executeCode = spec.executeCode;
6825
8235
  this._isDangerous = spec.isDangerous;
6826
8236
  this.registry = registry;
6827
- this.escalator = escalator;
8237
+ this.getEscalator = getEscalator;
8238
+ this.trusted = trusted;
6828
8239
  }
6829
8240
  isDangerous() {
6830
8241
  return this._isDangerous;
6831
8242
  }
6832
8243
  async execute(input, options) {
6833
8244
  const registry = this.registry;
6834
- const escalator = this.escalator;
6835
8245
  const callTool = async (toolName, toolInput) => {
6836
8246
  if (!registry.hasTool(toolName)) return `Tool not found: ${toolName}`;
6837
8247
  if (registry.isDangerous(toolName)) {
6838
- if (escalator) {
6839
- const req = {
6840
- id: `dynamic-${this.name}-${toolName}-${Date.now()}`,
6841
- requestedBy: `dynamic_tool:${this.name}`,
6842
- parentT2Id: options.tierId,
6843
- toolName,
6844
- input: toolInput,
6845
- isDangerous: true,
6846
- subtaskContext: `Dynamic tool "${this.name}" requesting access to "${toolName}"`,
6847
- sectionContext: `Dynamic tool "${this.name}"`
6848
- };
6849
- const decision = await escalator.requestPermission(req);
6850
- if (!decision.approved) {
6851
- return `Permission denied for ${toolName} (decided by ${decision.decidedBy}).`;
6852
- }
8248
+ const escalator = this.getEscalator();
8249
+ if (!escalator) {
8250
+ return `Permission denied for "${toolName}": dynamic tool "${this.name}" has no approver available (default-deny).`;
8251
+ }
8252
+ const req = {
8253
+ id: `dynamic-${this.name}-${toolName}-${Date.now()}`,
8254
+ requestedBy: `dynamic_tool:${this.name}`,
8255
+ parentT2Id: options.tierId,
8256
+ toolName,
8257
+ input: toolInput,
8258
+ isDangerous: true,
8259
+ subtaskContext: `Dynamic tool "${this.name}" (${this.trusted ? "trusted" : "UNTRUSTED"}) requesting access to "${toolName}"`,
8260
+ sectionContext: `Dynamic tool "${this.name}"`,
8261
+ forceReprompt: !this.trusted
8262
+ };
8263
+ const decision = await escalator.requestPermission(req);
8264
+ if (!decision.approved) {
8265
+ return `Permission denied for ${toolName} (decided by ${decision.decidedBy}).`;
6853
8266
  }
6854
8267
  }
6855
8268
  try {
@@ -6859,41 +8272,52 @@ var DynamicTool = class extends BaseTool {
6859
8272
  return `Error calling ${toolName}: ${err instanceof Error ? err.message : String(err)}`;
6860
8273
  }
6861
8274
  };
6862
- const sandbox = {
6863
- input,
6864
- fetch: globalThis.fetch,
6865
- callTool,
6866
- JSON,
6867
- Math,
6868
- Date,
6869
- console: { log: () => {
6870
- }, error: () => {
6871
- } },
6872
- setTimeout,
6873
- clearTimeout,
6874
- Promise,
6875
- Error,
6876
- String,
6877
- Number,
6878
- Boolean,
6879
- Array,
6880
- Object,
6881
- result: void 0
6882
- };
6883
- const context = createContext(sandbox);
6884
- const wrapped = `(async () => { ${this.executeCode} })().then(r => { result = String(r ?? ''); }).catch(e => { result = 'Tool error: ' + e.message; });`;
6885
- try {
6886
- const promise = runInContext(wrapped, context, {
6887
- timeout: 15e3,
6888
- breakOnSigint: true,
6889
- filename: `dynamic_tool_${this.name}.js`,
6890
- displayErrors: true
8275
+ return this.runInWorker(input, callTool);
8276
+ }
8277
+ /** Spawn the worker, service its callTool/fetch bridge, enforce the kill timeout. */
8278
+ runInWorker(input, callTool) {
8279
+ const timeoutMs = Math.max(200, Number(process.env["CASCADE_DYNAMIC_TOOL_TIMEOUT_MS"]) || DYNAMIC_TOOL_TIMEOUT_MS);
8280
+ return new Promise((resolve) => {
8281
+ let settled = false;
8282
+ const worker = new Worker(HARNESS_SRC, {
8283
+ eval: true,
8284
+ workerData: { executeCode: this.executeCode, input },
8285
+ resourceLimits: { maxOldGenerationSizeMb: 128 }
6891
8286
  });
6892
- await promise;
6893
- return sandbox["result"] ?? "";
6894
- } catch (err) {
6895
- return `Dynamic tool error: ${err instanceof Error ? err.message : String(err)}`;
6896
- }
8287
+ const finish = (value) => {
8288
+ if (settled) return;
8289
+ settled = true;
8290
+ clearTimeout(timer);
8291
+ void worker.terminate();
8292
+ resolve(value);
8293
+ };
8294
+ const timer = setTimeout(
8295
+ () => finish(`Dynamic tool "${this.name}" timed out after ${timeoutMs}ms and was terminated.`),
8296
+ timeoutMs
8297
+ );
8298
+ timer.unref?.();
8299
+ worker.on("message", (msg) => {
8300
+ if (msg?.kind === "result") {
8301
+ finish(typeof msg.value === "string" ? msg.value : String(msg.value ?? ""));
8302
+ } else if (msg?.kind === "callTool") {
8303
+ void (async () => {
8304
+ const value = await callTool(String(msg.name), msg.input ?? {});
8305
+ if (!settled) worker.postMessage({ id: msg.id, value });
8306
+ })();
8307
+ } else if (msg?.kind === "fetch") {
8308
+ void (async () => {
8309
+ const r = await bridgeFetch(String(msg.url), msg.init);
8310
+ if (settled) return;
8311
+ if ("__error" in r) worker.postMessage({ id: msg.id, error: r.__error });
8312
+ else worker.postMessage({ id: msg.id, value: r });
8313
+ })();
8314
+ }
8315
+ });
8316
+ worker.on("error", (err) => finish(`Dynamic tool error: ${err instanceof Error ? err.message : String(err)}`));
8317
+ worker.on("exit", (code) => {
8318
+ if (code !== 0) finish(`Dynamic tool "${this.name}" exited unexpectedly (code ${code}).`);
8319
+ });
8320
+ });
6897
8321
  }
6898
8322
  };
6899
8323
  var TOOL_CREATOR_PROMPT = `You are a tool-generation assistant for the Cascade AI system.
@@ -6926,52 +8350,153 @@ var ToolCreator = class {
6926
8350
  router;
6927
8351
  registry;
6928
8352
  escalator;
6929
- createdTools = /* @__PURE__ */ new Set();
6930
- constructor(router, registry) {
8353
+ workspacePath;
8354
+ /** When false, persisted tools are neither loaded nor written. */
8355
+ persistEnabled;
8356
+ logger;
8357
+ /** name → spec, for persistence, broadcast, and re-registration. */
8358
+ specs = /* @__PURE__ */ new Map();
8359
+ /** capability fingerprint → tool name, so the same need isn't re-generated. */
8360
+ capabilityIndex = /* @__PURE__ */ new Map();
8361
+ constructor(router, registry, workspacePath, persistEnabled = true) {
6931
8362
  this.router = router;
6932
8363
  this.registry = registry;
8364
+ this.workspacePath = workspacePath;
8365
+ this.persistEnabled = persistEnabled;
6933
8366
  }
6934
8367
  setPermissionEscalator(escalator) {
6935
8368
  this.escalator = escalator;
6936
8369
  }
8370
+ /** Route diagnostics through the host (Cascade) so they survive the Ink TUI. */
8371
+ setLogger(fn) {
8372
+ this.logger = fn;
8373
+ }
8374
+ /** Returns the stored spec for a created tool (for peer broadcast). */
8375
+ getSpec(name) {
8376
+ return this.specs.get(name);
8377
+ }
8378
+ log(msg) {
8379
+ if (this.logger) this.logger(msg);
8380
+ }
6937
8381
  /**
6938
8382
  * Generate a new tool from a description and register it with the ToolRegistry.
6939
- * The generated tool has access to all registered cascade tools via callTool().
6940
- * Returns the tool name if successful, null if generation failed.
8383
+ * Returns the tool name on success, or null on failure (with a logged reason —
8384
+ * failures are no longer swallowed silently). Reuses an existing tool when the
8385
+ * same capability has already been created (dedup) so peers/runs don't
8386
+ * regenerate identical tools.
6941
8387
  */
6942
8388
  async createTool(description, context) {
8389
+ const key = capabilityKey(`${description} ${context}`);
8390
+ const existing = this.capabilityIndex.get(key);
8391
+ if (existing && this.registry.hasTool(existing)) {
8392
+ this.log(`[tool-creator] Reusing existing tool "${existing}" for: ${description.slice(0, 80)}`);
8393
+ return existing;
8394
+ }
6943
8395
  const prompt = `${TOOL_CREATOR_PROMPT}
6944
8396
 
6945
8397
  Task context: ${context.slice(0, 200)}
6946
8398
  Required capability: ${description.slice(0, 300)}`;
8399
+ let spec = null;
8400
+ for (let attempt = 1; attempt <= 2 && !spec; attempt++) {
8401
+ try {
8402
+ const result = await this.router.generate("T3", {
8403
+ messages: [{ role: "user", content: prompt }],
8404
+ maxTokens: 800
8405
+ });
8406
+ const jsonMatch = /\{[\s\S]*\}/.exec(result.content);
8407
+ if (!jsonMatch) {
8408
+ this.log(`[tool-creator] Attempt ${attempt}: model returned no JSON object.`);
8409
+ continue;
8410
+ }
8411
+ const parsed = JSON.parse(jsonMatch[0]);
8412
+ if (!parsed.name || !parsed.description || !parsed.executeCode || !parsed.inputSchema) {
8413
+ this.log(`[tool-creator] Attempt ${attempt}: spec missing required fields (name/description/executeCode/inputSchema).`);
8414
+ continue;
8415
+ }
8416
+ spec = parsed;
8417
+ } catch (err) {
8418
+ this.log(`[tool-creator] Attempt ${attempt} failed: ${err instanceof Error ? err.message : String(err)}`);
8419
+ }
8420
+ }
8421
+ if (!spec) {
8422
+ this.log(`[tool-creator] Could not generate a tool for: ${description.slice(0, 80)}`);
8423
+ return null;
8424
+ }
8425
+ spec.inputSchema = normalizeToolSchema(spec.inputSchema);
8426
+ if (this.specs.has(spec.name) || this.registry.hasTool(spec.name)) {
8427
+ spec.name = `${spec.name}_${Date.now() % 1e4}`;
8428
+ }
8429
+ if (!isExecutableToolCode(spec.executeCode)) {
8430
+ this.log(`[tool-creator] Generated code for "${spec.name}" has a syntax error \u2014 discarded.`);
8431
+ return null;
8432
+ }
8433
+ this.registerSpec(spec, true);
8434
+ this.capabilityIndex.set(key, spec.name);
8435
+ this.log(`[tool-creator] Created tool "${spec.name}".`);
8436
+ void this.persist();
8437
+ return spec.name;
8438
+ }
8439
+ /**
8440
+ * Register a spec (from createTool, disk, or a peer) into the registry.
8441
+ * Idempotent — a name already present is skipped. `trusted` is set by the
8442
+ * caller and never inherited from disk: createTool passes true; persisted and
8443
+ * peer-broadcast specs pass false, so their dangerous actions always re-escalate.
8444
+ * The DynamicTool resolves the escalator lazily (`() => this.escalator`) so a
8445
+ * later setPermissionEscalator covers tools registered before the run wired it.
8446
+ */
8447
+ registerSpec(spec, trusted = false) {
8448
+ spec.trusted = trusted;
8449
+ if (this.registry.hasTool(spec.name)) {
8450
+ this.specs.set(spec.name, spec);
8451
+ return;
8452
+ }
8453
+ const tool = new DynamicTool(spec, this.registry, () => this.escalator, trusted);
8454
+ this.registry.register(tool);
8455
+ this.specs.set(spec.name, spec);
8456
+ this.capabilityIndex.set(capabilityKey(`${spec.description}`), spec.name);
8457
+ }
8458
+ /** Load tools persisted by previous runs and register them — as UNTRUSTED, and
8459
+ * only after re-validating each spec (its source could have been tampered with
8460
+ * or authored during a prior prompt-injected run). Untrusted tools re-escalate
8461
+ * any dangerous action, so a silently-reloaded tool can't act without approval. */
8462
+ async loadPersistedTools() {
8463
+ if (!this.workspacePath || !this.persistEnabled) return;
8464
+ const file = path18.join(this.workspacePath, ".cascade", DYNAMIC_TOOLS_FILE);
6947
8465
  try {
6948
- const result = await this.router.generate("T3", {
6949
- messages: [{ role: "user", content: prompt }],
6950
- maxTokens: 800
6951
- });
6952
- const jsonMatch = /\{[\s\S]*\}/.exec(result.content);
6953
- if (!jsonMatch) return null;
6954
- const spec = JSON.parse(jsonMatch[0]);
6955
- if (!spec.name || !spec.description || !spec.executeCode || !spec.inputSchema) return null;
6956
- if (this.createdTools.has(spec.name) || this.registry.hasTool(spec.name)) {
6957
- spec.name = `${spec.name}_${Date.now() % 1e4}`;
8466
+ const raw = await fs4.readFile(file, "utf-8");
8467
+ const specs = JSON.parse(raw);
8468
+ if (!Array.isArray(specs)) return;
8469
+ let loaded = 0;
8470
+ let skipped = 0;
8471
+ for (const spec of specs) {
8472
+ if (!(spec?.name && spec.description && spec.executeCode && spec.inputSchema) || !isExecutableToolCode(spec.executeCode)) {
8473
+ skipped++;
8474
+ continue;
8475
+ }
8476
+ spec.inputSchema = normalizeToolSchema(spec.inputSchema);
8477
+ this.registerSpec(spec, false);
8478
+ loaded++;
6958
8479
  }
6959
- try {
6960
- new Function("input", "fetch", "callTool", spec.executeCode);
6961
- } catch {
6962
- return null;
8480
+ if (loaded || skipped) {
8481
+ this.log(`[tool-creator] Loaded ${loaded} persisted tool(s) as untrusted${skipped ? `, skipped ${skipped} invalid` : ""}.`);
6963
8482
  }
6964
- const tool = new DynamicTool(spec, this.registry, this.escalator);
6965
- this.registry.register(tool);
6966
- this.createdTools.add(spec.name);
6967
- return spec.name;
6968
8483
  } catch {
6969
- return null;
8484
+ }
8485
+ }
8486
+ async persist() {
8487
+ if (!this.workspacePath || !this.persistEnabled) return;
8488
+ const dir = path18.join(this.workspacePath, ".cascade");
8489
+ const file = path18.join(dir, DYNAMIC_TOOLS_FILE);
8490
+ try {
8491
+ await fs4.mkdir(dir, { recursive: true });
8492
+ await fs4.writeFile(file, JSON.stringify(Array.from(this.specs.values()), null, 2), "utf-8");
8493
+ } catch (err) {
8494
+ this.log(`[tool-creator] Failed to persist tools: ${err instanceof Error ? err.message : String(err)}`);
6970
8495
  }
6971
8496
  }
6972
8497
  /** Returns the names of all tools created in this session. */
6973
8498
  getCreatedTools() {
6974
- return Array.from(this.createdTools);
8499
+ return Array.from(this.specs.keys());
6975
8500
  }
6976
8501
  };
6977
8502
 
@@ -6981,7 +8506,11 @@ var Cascade = class _Cascade extends EventEmitter {
6981
8506
  toolRegistry;
6982
8507
  mcpClient;
6983
8508
  config;
8509
+ /** Orchestration decisions for the CURRENT run — cleared on each run(). */
8510
+ decisionLog = [];
6984
8511
  initialized = false;
8512
+ /** Last task that stopped at the budget cap — powers /continue (resumeRun). */
8513
+ lastInterruptedRun;
6985
8514
  initPromise;
6986
8515
  store;
6987
8516
  audit;
@@ -6989,15 +8518,23 @@ var Cascade = class _Cascade extends EventEmitter {
6989
8518
  taskAnalyzer;
6990
8519
  perfTracker;
6991
8520
  toolCreator;
8521
+ workspacePath;
6992
8522
  constructor(config, workspacePath, store) {
6993
8523
  super();
6994
8524
  this.config = validateConfig(config);
8525
+ this.workspacePath = workspacePath;
6995
8526
  this.store = store;
6996
8527
  this.router = new CascadeRouter();
6997
8528
  this.mcpClient = new McpClient({
6998
8529
  trustedServers: this.config.tools.mcpTrusted,
6999
8530
  approvalCallback: async (server) => {
7000
8531
  return await this.requestMcpApproval(server);
8532
+ },
8533
+ // Route warnings through the event stream when anyone is listening —
8534
+ // a raw console write while the TUI is live corrupts Ink's frame.
8535
+ onWarn: (message) => {
8536
+ if (this.listenerCount("log") > 0) this.emit("log", { level: "warn", message });
8537
+ else console.warn(message);
7001
8538
  }
7002
8539
  });
7003
8540
  this.toolRegistry = new ToolRegistry(this.config.tools, workspacePath);
@@ -7007,11 +8544,15 @@ var Cascade = class _Cascade extends EventEmitter {
7007
8544
  if (this.config.cascadeAuto === true) {
7008
8545
  this.perfTracker = new ModelPerformanceTracker();
7009
8546
  void this.perfTracker.load();
7010
- this.taskAnalyzer = new TaskAnalyzer(this.perfTracker);
8547
+ this.taskAnalyzer = new TaskAnalyzer(this.perfTracker, this.config.autoBias ?? "balanced");
8548
+ this.router.setTaskAnalyzer(this.taskAnalyzer);
7011
8549
  }
7012
8550
  const cfg = this.config;
7013
8551
  if (cfg["enableToolCreation"] === true) {
7014
- this.toolCreator = new ToolCreator(this.router, this.toolRegistry);
8552
+ this.toolCreator = new ToolCreator(this.router, this.toolRegistry, this.workspacePath, cfg["persistDynamicTools"] !== false);
8553
+ this.toolCreator.setLogger((m) => {
8554
+ if (this.listenerCount("log") > 0) this.emit("log", { level: "info", message: m });
8555
+ });
7015
8556
  }
7016
8557
  }
7017
8558
  setStore(store) {
@@ -7042,6 +8583,17 @@ var Cascade = class _Cascade extends EventEmitter {
7042
8583
  this.emit("mcp:approval-required", { server });
7043
8584
  });
7044
8585
  }
8586
+ recordDecision(kind, detail) {
8587
+ this.decisionLog.push({ at: (/* @__PURE__ */ new Date()).toISOString(), kind, detail });
8588
+ }
8589
+ /**
8590
+ * The orchestration decision trail for the most recent run: complexity
8591
+ * verdict (and why), which model served each tier, failovers, and
8592
+ * escalations. Powers the /why command.
8593
+ */
8594
+ getDecisionLog() {
8595
+ return [...this.decisionLog];
8596
+ }
7045
8597
  /** Resolve a pending MCP server approval from a REPL / dashboard listener. */
7046
8598
  resolveMcpApproval(serverName, approved) {
7047
8599
  const resolver = this.pendingMcpApprovals.get(serverName);
@@ -7050,6 +8602,125 @@ var Cascade = class _Cascade extends EventEmitter {
7050
8602
  resolver(approved);
7051
8603
  }
7052
8604
  }
8605
+ // ── Boardroom plan approval ─────────────────────────────────────────
8606
+ // Same gate pattern as MCP approvals, with the opposite default: plans
8607
+ // are work the user asked for, so no listener (SDK/headless) or a
8608
+ // timeout means PROCEED, not reject.
8609
+ pendingPlanApproval;
8610
+ async requestPlanApproval(plan, taskId, critique, summary) {
8611
+ if (this.config.autonomy === "auto") {
8612
+ return { approved: true };
8613
+ }
8614
+ if (this.listenerCount("plan:approval-required") === 0) {
8615
+ return { approved: true };
8616
+ }
8617
+ const t2Count = plan.sections.length;
8618
+ const t3Count = plan.sections.reduce((sum, s) => sum + (s.t3Subtasks?.length ?? 0), 0);
8619
+ return await new Promise((resolve) => {
8620
+ const timeout = setTimeout(() => {
8621
+ if (this.pendingPlanApproval) {
8622
+ this.pendingPlanApproval = void 0;
8623
+ resolve({ approved: true });
8624
+ }
8625
+ }, 12e4);
8626
+ this.pendingPlanApproval = (decision) => {
8627
+ clearTimeout(timeout);
8628
+ this.pendingPlanApproval = void 0;
8629
+ resolve(decision);
8630
+ };
8631
+ this.emit("plan:approval-required", {
8632
+ taskId,
8633
+ plan,
8634
+ t2Count,
8635
+ t3Count,
8636
+ estCostUsd: this.estimatePlanCost(plan),
8637
+ critique,
8638
+ summary
8639
+ });
8640
+ });
8641
+ }
8642
+ /**
8643
+ * Resolve a pending boardroom plan approval from a REPL / dashboard listener.
8644
+ * An optional `note` re-plans and re-asks; an optional `editedPlan` is applied
8645
+ * directly (no re-decompose).
8646
+ */
8647
+ resolvePlanApproval(approved, note, editedPlan) {
8648
+ this.pendingPlanApproval?.({ approved, note, editedPlan });
8649
+ }
8650
+ /**
8651
+ * Autonomy control (used by the /auto command). 'auto' makes the next run
8652
+ * hands-off: the plan gate auto-approves and non-dangerous tools auto-approve,
8653
+ * while dangerous tools still escalate and budget caps remain the hard stop.
8654
+ */
8655
+ setAutonomy(mode) {
8656
+ this.config = { ...this.config, autonomy: mode };
8657
+ }
8658
+ getAutonomy() {
8659
+ return this.config.autonomy === "auto" ? "auto" : "manual";
8660
+ }
8661
+ /**
8662
+ * Preview T1's decomposition for a prompt WITHOUT executing it (powers /plan).
8663
+ * Idempotent init guard, so it works before the first run.
8664
+ */
8665
+ async previewPlan(prompt) {
8666
+ await this.init();
8667
+ const t1 = new T1Administrator(this.router, this.toolRegistry, this.config);
8668
+ if (this.store) t1.setStore(this.store);
8669
+ return t1.previewPlan(prompt);
8670
+ }
8671
+ /** True when a task stopped at the budget cap and can be resumed via /continue. */
8672
+ hasResumableRun() {
8673
+ return this.lastInterruptedRun != null;
8674
+ }
8675
+ /**
8676
+ * Raise the per-run token budget for a resume and return the continuation
8677
+ * prompt (or null when nothing is resumable). Consumes the interrupted-run
8678
+ * state. The REPL submits the returned prompt through its normal flow so the
8679
+ * resumed run renders like any other; `resumeRun` wraps this for SDK callers.
8680
+ */
8681
+ prepareResume(opts = {}) {
8682
+ const last = this.lastInterruptedRun;
8683
+ if (!last) return null;
8684
+ this.lastInterruptedRun = void 0;
8685
+ const raised = opts.maxTokens ?? Math.round((this.config.budget?.maxTokensPerRun ?? 2e5) * 2);
8686
+ this.config = { ...this.config, budget: { ...this.config.budget, maxTokensPerRun: raised } };
8687
+ this.router.setMaxTokensPerRun(raised);
8688
+ return `Continue and FINISH this task. A previous attempt was interrupted before completion; any files already created are on disk \u2014 build on them, do NOT recreate them. Complete only the remaining work.
8689
+
8690
+ Original task: ${last.prompt}` + (last.partialOutput ? `
8691
+
8692
+ Partial result so far:
8693
+ ${last.partialOutput}` : "");
8694
+ }
8695
+ /**
8696
+ * Resume the last budget-capped task with a raised budget (SDK/headless).
8697
+ * Returns null when there is nothing to resume.
8698
+ */
8699
+ async resumeRun(opts = {}) {
8700
+ const prompt = this.prepareResume(opts);
8701
+ if (!prompt) return null;
8702
+ return this.run({ prompt });
8703
+ }
8704
+ /**
8705
+ * Rough pre-execution cost estimate for a plan: ~3 T2 calls per section
8706
+ * plus ~4 T3 calls per subtask at typical token volumes. A ballpark for
8707
+ * the approval dialog, not an invoice — always label it "est."
8708
+ */
8709
+ estimatePlanCost(plan) {
8710
+ const T2_CALLS_PER_SECTION = 3;
8711
+ const T3_CALLS_PER_SUBTASK = 4;
8712
+ const IN_TOKENS = 1500;
8713
+ const OUT_TOKENS = 700;
8714
+ const t2Model = this.router.getTierModel("T2");
8715
+ const t3Model = this.router.getTierModel("T3");
8716
+ let est = 0;
8717
+ for (const section of plan.sections) {
8718
+ if (t2Model) est += T2_CALLS_PER_SECTION * calculateCost(IN_TOKENS, OUT_TOKENS, t2Model);
8719
+ const subtasks = section.t3Subtasks?.length ?? 1;
8720
+ if (t3Model) est += subtasks * T3_CALLS_PER_SUBTASK * calculateCost(IN_TOKENS, OUT_TOKENS, t3Model);
8721
+ }
8722
+ return est;
8723
+ }
7053
8724
  async init() {
7054
8725
  if (this.initialized) return;
7055
8726
  if (this.initPromise) return this.initPromise;
@@ -7058,6 +8729,9 @@ var Cascade = class _Cascade extends EventEmitter {
7058
8729
  this.router.on("budget:warning", (payload) => {
7059
8730
  this.emit("budget:warning", payload);
7060
8731
  });
8732
+ this.router.on("failover", (e) => {
8733
+ this.recordDecision("failover", `${e.tier} ${e.from} \u2192 ${e.to} (${e.reason})`);
8734
+ });
7061
8735
  this.router.on("budget:exceeded", (payload) => {
7062
8736
  this.emit("budget:exceeded", payload);
7063
8737
  for (const [name, resolver] of this.pendingMcpApprovals) {
@@ -7095,7 +8769,12 @@ var Cascade = class _Cascade extends EventEmitter {
7095
8769
  this.router.profileModels(this.store).catch(() => {
7096
8770
  });
7097
8771
  }
8772
+ if (this.config.cascadeAuto) {
8773
+ this.router.refreshLiveData().catch(() => {
8774
+ });
8775
+ }
7098
8776
  this.initOptionalFeatures();
8777
+ if (this.toolCreator) await this.toolCreator.loadPersistedTools();
7099
8778
  this.initialized = true;
7100
8779
  })();
7101
8780
  try {
@@ -7121,6 +8800,20 @@ var Cascade = class _Cascade extends EventEmitter {
7121
8800
  const wordCount = prompt.trim().split(/\s+/).length;
7122
8801
  return wordCount <= 12 && LOW_COMPLEXITY.some((re) => re.test(prompt.trim()));
7123
8802
  }
8803
+ /**
8804
+ * Read-only inquiries about existing content ("read / review / explain /
8805
+ * summarize / analyze this file or codebase and tell me …") are single-agent
8806
+ * work — one worker with file/grep tools answers directly, no T1→T2→T3 fan-out.
8807
+ * They must NOT ask to create, build, implement, refactor, or save an artifact;
8808
+ * those stay on the heavier classifier path. This keeps trivial "what does this
8809
+ * do?" requests from being mis-routed into a multi-agent, multi-thousand-token run.
8810
+ */
8811
+ looksLikeReadOnlyInquiry(prompt) {
8812
+ const p = prompt.trim();
8813
+ const inquiry = /\b(?:read|review|explain|describe|summari[sz]e|analy[sz]e|assess|evaluate|inspect|examine|explore|go through|look at|tell me about|what (?:is|are|does|do)|is it|understand|novelty|novel idea)\b/i.test(p);
8814
+ const producesArtifact = /\b(?:create|build|implement|generate|write|refactor|rewrite|add|fix|deploy|install|migrate|scaffold|set up|save (?:a|the)|report|\.(?:pdf|md|txt|json|csv|py|js|ts|tsx|jsx|html|docx?))\b/i.test(p);
8815
+ return inquiry && !producesArtifact;
8816
+ }
7124
8817
  // Cache glob scan results per workspace path to avoid repeated I/O.
7125
8818
  static globCache = /* @__PURE__ */ new Map();
7126
8819
  async countWorkspaceFiles(workspacePath) {
@@ -7140,9 +8833,22 @@ var Cascade = class _Cascade extends EventEmitter {
7140
8833
  }
7141
8834
  }
7142
8835
  async determineComplexity(prompt, workspacePath, conversationHistory = []) {
7143
- if (this.isCasualGreeting(prompt)) return "Simple";
7144
- if (this.looksLikeSimpleArtifactTask(prompt)) return "Simple";
7145
- if (this.looksLikeConversational(prompt)) return "Simple";
8836
+ if (this.isCasualGreeting(prompt)) {
8837
+ this.recordDecision("complexity", "Simple \u2014 heuristic: casual greeting (no classifier call)");
8838
+ return "Simple";
8839
+ }
8840
+ if (this.looksLikeSimpleArtifactTask(prompt)) {
8841
+ this.recordDecision("complexity", "Simple \u2014 heuristic: single-file artifact task (no classifier call)");
8842
+ return "Simple";
8843
+ }
8844
+ if (this.looksLikeConversational(prompt)) {
8845
+ this.recordDecision("complexity", "Simple \u2014 heuristic: short conversational message (no classifier call)");
8846
+ return "Simple";
8847
+ }
8848
+ if (this.looksLikeReadOnlyInquiry(prompt)) {
8849
+ this.recordDecision("complexity", "Simple \u2014 heuristic: read-only inquiry over existing content (single agent, no classifier call)");
8850
+ return "Simple";
8851
+ }
7146
8852
  let workspaceContext = "";
7147
8853
  try {
7148
8854
  const count = await this.countWorkspaceFiles(workspacePath);
@@ -7162,10 +8868,12 @@ Classification:
7162
8868
  Important rules:
7163
8869
  - Treat short follow-ups like "proceed", "continue", "do it", "yes" as referring to the recent context.
7164
8870
  - If the earlier context is complex, keep the inherited complexity unless the user clearly narrows scope.
8871
+ - Reading, explaining, summarizing, or analyzing existing files/code and answering a question \u2014 WITHOUT creating files or implementing changes \u2014 is "Simple" (single agent), never "Complex".
7165
8872
  - If the task asks for a simple single-file artifact like hello.txt, it is usually Moderate.
7166
8873
  - If the task asks for a saved report, PDF, implementation, or deeper verification workflow, it is at least Moderate and often Complex.
7167
8874
 
7168
- Respond with exactly one word: Simple, Moderate, or Complex.`;
8875
+ Respond with the verdict word first, then a dash and a short reason (under 12 words).
8876
+ Format: <Simple|Moderate|Complex> \u2014 <reason>`;
7169
8877
  const recentHistory = conversationHistory.slice(-6);
7170
8878
  const contextBlock = recentHistory.map((message, index) => {
7171
8879
  const content = typeof message.content === "string" ? message.content : message.content.map((block) => block.type === "text" ? block.text : "[non-text]").join(" ");
@@ -7180,26 +8888,36 @@ ${prompt}` : prompt;
7180
8888
  const result = await this.router.generate("T1", {
7181
8889
  messages: [{ role: "user", content: routedPrompt }],
7182
8890
  systemPrompt: sysPrompt,
7183
- maxTokens: 8,
8891
+ maxTokens: 40,
7184
8892
  temperature: 0
7185
8893
  });
7186
- const content = result.content.trim().toLowerCase();
7187
- if (content.includes("simple")) return "Simple";
7188
- if (content.includes("moderate")) return "Moderate";
7189
- return "Complex";
8894
+ const content = result.content.trim();
8895
+ const firstWord = (content.split(/[\s—–-]+/)[0] ?? "").toLowerCase();
8896
+ const reason = content.replace(/^\S+\s*[—–-]*\s*/, "").trim();
8897
+ const verdict = firstWord.includes("simple") ? "Simple" : firstWord.includes("moderate") ? "Moderate" : "Complex";
8898
+ this.recordDecision("complexity", `${verdict} \u2014 classifier: ${reason || "no reason given"}`);
8899
+ return verdict;
7190
8900
  } catch {
7191
8901
  const followUpPrompt = /^(proceed|continue|go ahead|do it|yes|yep|ok|okay|carry on)$/i.test(prompt.trim());
7192
- if (followUpPrompt && recentHistory.length > 0) return "Complex";
7193
- return "Complex";
8902
+ if (followUpPrompt && recentHistory.length > 0) {
8903
+ this.recordDecision("complexity", "Complex \u2014 classifier unavailable; short follow-up inherits prior context");
8904
+ return "Complex";
8905
+ }
8906
+ this.recordDecision("complexity", "Moderate \u2014 classifier unavailable; defaulting to the mid-cost route");
8907
+ return "Moderate";
7194
8908
  }
7195
8909
  }
7196
8910
  async run(options) {
7197
8911
  await this.init();
8912
+ this.router.beginRun();
8913
+ this.router.setRunSignal(options.signal);
7198
8914
  const startMs = Date.now();
7199
8915
  const taskId = randomUUID();
7200
- const escalator = new PermissionEscalator();
8916
+ this.decisionLog = [];
8917
+ const escalator = new PermissionEscalator(this.config.approvalTimeoutMs ?? 6e5, this.config.autonomy === "auto");
7201
8918
  escalator.on("permission:user-required", async (req) => {
7202
8919
  this.emit("permission:user-required", req);
8920
+ this.recordDecision("escalation", `"${req.toolName}" by ${req.requestedBy} \u2014 T2 and T1 both unsure, escalated to user`);
7203
8921
  const enrichedRequest = {
7204
8922
  id: req.id,
7205
8923
  tierId: req.requestedBy,
@@ -7236,16 +8954,32 @@ ${prompt}` : prompt;
7236
8954
  toolCreationEnabled: this.config["enableToolCreation"] === true
7237
8955
  });
7238
8956
  this.emit("tier:root", { role: complexity === "Simple" ? "T3" : complexity === "Moderate" ? "T2" : "T1" });
8957
+ const tiersInPlay = complexity === "Simple" ? ["T3"] : complexity === "Moderate" ? ["T2", "T3"] : ["T1", "T2", "T3"];
7239
8958
  if (this.taskAnalyzer) {
7240
- const tiers = complexity === "Simple" ? ["T3"] : complexity === "Moderate" ? ["T2", "T3"] : ["T1", "T2", "T3"];
7241
- await Promise.all(tiers.map(async (tier) => {
8959
+ await Promise.all(tiersInPlay.map(async (tier) => {
8960
+ const tierKey = tier.toLowerCase();
8961
+ if (this.config.models?.[tierKey]) return;
7242
8962
  try {
7243
8963
  const model = await this.taskAnalyzer.selectModel(options.prompt, tier, this.router.getSelector());
7244
- if (model) this.router.overrideTierModel(tier, model);
8964
+ if (model) {
8965
+ this.router.overrideTierModel(tier, model);
8966
+ const taskType = this.taskAnalyzer.getLastProfile()?.type ?? "mixed";
8967
+ const bench = Math.round(benchmarkScore01(model, taskType) * 100);
8968
+ const price = model.inputCostPer1kTokens === 0 && model.outputCostPer1kTokens === 0 ? "free" : `$${model.outputCostPer1kTokens.toFixed(4)}/1K out`;
8969
+ const dataSrc = this.router.getLiveData()?.getDataSource() ?? "bundled";
8970
+ this.recordDecision(
8971
+ "model",
8972
+ `${tier} \u2192 ${model.provider}:${model.id} \u2014 Cascade Auto: best value for ${taskType} (bench ${bench}/100, ${price}, data: ${dataSrc})`
8973
+ );
8974
+ }
7245
8975
  } catch {
7246
8976
  }
7247
8977
  }));
7248
8978
  }
8979
+ this.recordDecision("model", tiersInPlay.map((tier) => {
8980
+ const m = this.router.getTierModel(tier);
8981
+ return m ? `${tier} ${m.provider}:${m.id}${m.isLocal ? " \u2302local" : ""}` : `${tier} (none)`;
8982
+ }).join(" \xB7 "));
7249
8983
  const toolCreator = this.toolCreator;
7250
8984
  if (toolCreator) toolCreator.setPermissionEscalator(escalator);
7251
8985
  let finalOutput = "";
@@ -7327,6 +9061,25 @@ ${prompt}` : prompt;
7327
9061
  if (toolCreator) t2.setToolCreator(toolCreator);
7328
9062
  t2.setPeerMessageCallback((e) => this.emit("peer:message", e), options.sessionId ?? "");
7329
9063
  bindTierEvents(t2);
9064
+ if (this.config.planApproval === "all") {
9065
+ t2.setPlanApprovalCallback(async (subtasks) => {
9066
+ const pseudoPlan = {
9067
+ complexity: "Moderate",
9068
+ reasoning: "",
9069
+ sections: subtasks.map((st) => ({
9070
+ sectionId: st.subtaskId,
9071
+ sectionTitle: st.subtaskTitle,
9072
+ description: st.description,
9073
+ t3Subtasks: []
9074
+ }))
9075
+ };
9076
+ const n = subtasks.length;
9077
+ const summary = `${n} worker${n !== 1 ? "s" : ""} \xB7 1 root manager \xB7 est. $${this.estimatePlanCost(pseudoPlan).toFixed(4)}`;
9078
+ const decision = await this.requestPlanApproval(pseudoPlan, taskId, void 0, summary);
9079
+ const keepSubtaskIds = decision.editedPlan?.sections?.map((s) => s.sectionId).filter((id) => Boolean(id));
9080
+ return { approved: decision.approved, note: decision.note, keepSubtaskIds };
9081
+ });
9082
+ }
7330
9083
  const assignment = {
7331
9084
  sectionId: taskId,
7332
9085
  sectionTitle: "Direct Task",
@@ -7358,17 +9111,33 @@ ${prompt}` : prompt;
7358
9111
  t1.setPeerMessageCallback((e) => this.emit("peer:message", e), options.sessionId ?? "");
7359
9112
  bindTierEvents(t1);
7360
9113
  t1.on("plan", (e) => this.emit("plan", e));
9114
+ if (this.config.planApproval != null && this.config.planApproval !== "never") {
9115
+ t1.setPlanApprovalCallback(async (plan, meta) => {
9116
+ const decision = await this.requestPlanApproval(plan, taskId, meta?.critique);
9117
+ this.recordDecision("escalation", decision.approved ? `Boardroom: plan approved (${plan.sections.length} sections)${decision.note ? " with a steering note" : ""}${decision.editedPlan ? " (edited)" : ""}` : "Boardroom: plan rejected \u2014 run stopped before any T2 spawned");
9118
+ return decision;
9119
+ });
9120
+ }
7361
9121
  const result = await t1.execute(options.prompt, options.images, void 0, options.signal);
7362
9122
  finalOutput = result.output;
7363
9123
  t2Results = result.t2Results;
7364
9124
  }
7365
9125
  } catch (err) {
7366
- if (err instanceof CascadeCancelledError) {
9126
+ if (err instanceof CascadeCancelledError || err instanceof Error && err.name === "AbortError" || options.signal?.aborted) {
7367
9127
  this.emit("run:cancelled", {
9128
+ taskId,
9129
+ reason: err instanceof Error ? err.message : "Task cancelled",
9130
+ partialOutput: finalOutput || ""
9131
+ });
9132
+ runError = null;
9133
+ } else if (err instanceof Error && err.name === "BudgetExceededError") {
9134
+ this.emit("run:budget-exceeded", {
7368
9135
  taskId,
7369
9136
  reason: err.message,
7370
9137
  partialOutput: finalOutput || ""
7371
9138
  });
9139
+ this.lastInterruptedRun = { prompt: options.prompt, partialOutput: finalOutput || "", taskId };
9140
+ if (!finalOutput) finalOutput = `\u26A0 Stopped to avoid runaway cost: ${err.message}`;
7372
9141
  runError = null;
7373
9142
  } else {
7374
9143
  runError = err;
@@ -7379,6 +9148,8 @@ ${prompt}` : prompt;
7379
9148
  escalator.cancelAllPending();
7380
9149
  } catch {
7381
9150
  }
9151
+ this.router.restoreTierModels();
9152
+ this.router.setRunSignal(void 0);
7382
9153
  if (this.taskAnalyzer) {
7383
9154
  try {
7384
9155
  const stats2 = this.router.getStats();
@@ -7486,7 +9257,7 @@ var Keystore = class {
7486
9257
  const creds = await this.keytar.findCredentials(KEYTAR_SERVICE);
7487
9258
  this.cache = Object.fromEntries(creds.map((c) => [c.account, c.password]));
7488
9259
  this.backend = "keytar";
7489
- if (password && fs15.existsSync(this.storePath)) {
9260
+ if (password && fs17.existsSync(this.storePath)) {
7490
9261
  try {
7491
9262
  const fileEntries = this.decryptFile(password);
7492
9263
  for (const [k, v] of Object.entries(fileEntries)) {
@@ -7505,7 +9276,7 @@ var Keystore = class {
7505
9276
  "Keystore unlock requires a password because the OS keychain (keytar) is not available on this system."
7506
9277
  );
7507
9278
  }
7508
- if (!fs15.existsSync(this.storePath)) {
9279
+ if (!fs17.existsSync(this.storePath)) {
7509
9280
  const salt = crypto.randomBytes(SALT_LEN);
7510
9281
  this.masterKey = this.deriveKey(password, salt);
7511
9282
  this.writeWithSalt({}, salt);
@@ -7519,7 +9290,7 @@ var Keystore = class {
7519
9290
  }
7520
9291
  /** Synchronous legacy unlock kept for AES-only environments. */
7521
9292
  unlockSync(password) {
7522
- if (!fs15.existsSync(this.storePath)) {
9293
+ if (!fs17.existsSync(this.storePath)) {
7523
9294
  const salt = crypto.randomBytes(SALT_LEN);
7524
9295
  this.masterKey = this.deriveKey(password, salt);
7525
9296
  this.writeWithSalt({}, salt);
@@ -7577,7 +9348,7 @@ var Keystore = class {
7577
9348
  }
7578
9349
  }
7579
9350
  decryptFile(password, knownSalt) {
7580
- if (!fs15.existsSync(this.storePath)) return {};
9351
+ if (!fs17.existsSync(this.storePath)) return {};
7581
9352
  try {
7582
9353
  const { salt, ciphertext, iv, tag } = this.readRaw();
7583
9354
  const useSalt = knownSalt ?? salt;
@@ -7599,8 +9370,8 @@ var Keystore = class {
7599
9370
  const ciphertext = Buffer.concat([cipher.update(plaintext), cipher.final()]);
7600
9371
  const tag = cipher.getAuthTag();
7601
9372
  const out = Buffer.concat([raw.salt, iv, tag, ciphertext]);
7602
- fs15.mkdirSync(path16.dirname(this.storePath), { recursive: true });
7603
- fs15.writeFileSync(this.storePath, out, { mode: 384 });
9373
+ fs17.mkdirSync(path18.dirname(this.storePath), { recursive: true });
9374
+ fs17.writeFileSync(this.storePath, out, { mode: 384 });
7604
9375
  }
7605
9376
  writeWithSalt(data, salt) {
7606
9377
  if (!this.masterKey) throw new Error("writeWithSalt called before masterKey was set");
@@ -7610,11 +9381,11 @@ var Keystore = class {
7610
9381
  const ciphertext = Buffer.concat([cipher.update(plaintext), cipher.final()]);
7611
9382
  const tag = cipher.getAuthTag();
7612
9383
  const out = Buffer.concat([salt, iv, tag, ciphertext]);
7613
- fs15.mkdirSync(path16.dirname(this.storePath), { recursive: true });
7614
- fs15.writeFileSync(this.storePath, out, { mode: 384 });
9384
+ fs17.mkdirSync(path18.dirname(this.storePath), { recursive: true });
9385
+ fs17.writeFileSync(this.storePath, out, { mode: 384 });
7615
9386
  }
7616
9387
  readRaw() {
7617
- const buf = fs15.readFileSync(this.storePath);
9388
+ const buf = fs17.readFileSync(this.storePath);
7618
9389
  let offset = 0;
7619
9390
  const salt = buf.subarray(offset, offset + SALT_LEN);
7620
9391
  offset += SALT_LEN;
@@ -7647,9 +9418,9 @@ var CascadeIgnore = class {
7647
9418
  ]);
7648
9419
  }
7649
9420
  async load(workspacePath) {
7650
- const filePath = path16.join(workspacePath, ".cascadeignore");
9421
+ const filePath = path18.join(workspacePath, ".cascadeignore");
7651
9422
  try {
7652
- const content = await fs3.readFile(filePath, "utf-8");
9423
+ const content = await fs4.readFile(filePath, "utf-8");
7653
9424
  const lines = content.split("\n").filter((l) => l.trim() && !l.startsWith("#"));
7654
9425
  this.ig.add(lines);
7655
9426
  this.loaded = true;
@@ -7658,7 +9429,7 @@ var CascadeIgnore = class {
7658
9429
  }
7659
9430
  isIgnored(filePath, workspacePath) {
7660
9431
  try {
7661
- const relative = workspacePath ? path16.relative(workspacePath, filePath) : filePath;
9432
+ const relative = workspacePath ? path18.relative(workspacePath, filePath) : filePath;
7662
9433
  return this.ig.ignores(relative);
7663
9434
  } catch {
7664
9435
  return false;
@@ -7669,9 +9440,9 @@ var CascadeIgnore = class {
7669
9440
  }
7670
9441
  };
7671
9442
  async function loadCascadeMd(workspacePath) {
7672
- const filePath = path16.join(workspacePath, "CASCADE.md");
9443
+ const filePath = path18.join(workspacePath, "CASCADE.md");
7673
9444
  try {
7674
- const raw = await fs3.readFile(filePath, "utf-8");
9445
+ const raw = await fs4.readFile(filePath, "utf-8");
7675
9446
  return parseCascadeMd(raw);
7676
9447
  } catch {
7677
9448
  return null;
@@ -7700,7 +9471,7 @@ ${raw.trim()}`;
7700
9471
  var MemoryStore = class _MemoryStore {
7701
9472
  db;
7702
9473
  constructor(dbPath) {
7703
- fs15.mkdirSync(path16.dirname(dbPath), { recursive: true });
9474
+ fs17.mkdirSync(path18.dirname(dbPath), { recursive: true });
7704
9475
  try {
7705
9476
  this.db = new Database(dbPath, { timeout: 5e3 });
7706
9477
  this.db.pragma("journal_mode = WAL");
@@ -8458,15 +10229,15 @@ var ConfigManager = class {
8458
10229
  globalDir;
8459
10230
  constructor(workspacePath = process.cwd()) {
8460
10231
  this.workspacePath = workspacePath;
8461
- this.globalDir = path16.join(os3.homedir(), GLOBAL_CONFIG_DIR);
10232
+ this.globalDir = path18.join(os4.homedir(), GLOBAL_CONFIG_DIR);
8462
10233
  }
8463
10234
  async load() {
8464
10235
  this.config = await this.loadConfig();
8465
10236
  this.ignore = new CascadeIgnore();
8466
10237
  await this.ignore.load(this.workspacePath);
8467
10238
  this.cascadeMd = await loadCascadeMd(this.workspacePath);
8468
- this.keystore = new Keystore(path16.join(this.globalDir, GLOBAL_KEYSTORE_FILE));
8469
- this.store = new MemoryStore(path16.join(this.workspacePath, CASCADE_DB_FILE));
10239
+ this.keystore = new Keystore(path18.join(this.globalDir, GLOBAL_KEYSTORE_FILE));
10240
+ this.store = new MemoryStore(path18.join(this.workspacePath, CASCADE_DB_FILE));
8470
10241
  await this.injectEnvKeys();
8471
10242
  await this.ensureDefaultIdentity();
8472
10243
  }
@@ -8489,9 +10260,9 @@ var ConfigManager = class {
8489
10260
  return this.workspacePath;
8490
10261
  }
8491
10262
  async save() {
8492
- const configPath = path16.join(this.workspacePath, CASCADE_CONFIG_FILE);
8493
- await fs3.mkdir(path16.dirname(configPath), { recursive: true });
8494
- await fs3.writeFile(configPath, JSON.stringify(this.config, null, 2), "utf-8");
10263
+ const configPath = path18.join(this.workspacePath, CASCADE_CONFIG_FILE);
10264
+ await fs4.mkdir(path18.dirname(configPath), { recursive: true });
10265
+ await fs4.writeFile(configPath, JSON.stringify(this.config, null, 2), "utf-8");
8495
10266
  }
8496
10267
  async updateConfig(updates) {
8497
10268
  this.config = validateConfig({ ...this.config, ...updates });
@@ -8514,9 +10285,9 @@ var ConfigManager = class {
8514
10285
  return configProvider?.apiKey;
8515
10286
  }
8516
10287
  async loadConfig() {
8517
- const configPath = path16.join(this.workspacePath, CASCADE_CONFIG_FILE);
10288
+ const configPath = path18.join(this.workspacePath, CASCADE_CONFIG_FILE);
8518
10289
  try {
8519
- const raw = await fs3.readFile(configPath, "utf-8");
10290
+ const raw = await fs4.readFile(configPath, "utf-8");
8520
10291
  return validateConfig(JSON.parse(raw));
8521
10292
  } catch (err) {
8522
10293
  if (err.code === "ENOENT") {
@@ -8595,12 +10366,13 @@ async function streamCascade(prompt, onToken, options = {}) {
8595
10366
  }
8596
10367
  });
8597
10368
  }
10369
+ var JWT_ALGORITHM = "HS256";
8598
10370
  function createToken(user, secret) {
8599
- return jwt.sign(user, secret, { expiresIn: "24h" });
10371
+ return jwt.sign(user, secret, { expiresIn: "24h", algorithm: JWT_ALGORITHM });
8600
10372
  }
8601
10373
  function verifyToken(token, secret) {
8602
10374
  try {
8603
- return jwt.verify(token, secret);
10375
+ return jwt.verify(token, secret, { algorithms: [JWT_ALGORITHM] });
8604
10376
  } catch {
8605
10377
  return null;
8606
10378
  }
@@ -8731,7 +10503,7 @@ var DashboardSocket = class {
8731
10503
  this.io.close();
8732
10504
  }
8733
10505
  };
8734
- var __dirname$1 = path16.dirname(fileURLToPath(import.meta.url));
10506
+ var __dirname$1 = path18.dirname(fileURLToPath(import.meta.url));
8735
10507
  var DashboardServer = class {
8736
10508
  app;
8737
10509
  httpServer;
@@ -8742,12 +10514,14 @@ var DashboardServer = class {
8742
10514
  globalStore = null;
8743
10515
  broadcastTimer = null;
8744
10516
  port;
10517
+ host;
8745
10518
  workspacePath;
8746
10519
  constructor(config, store, workspacePath = process.cwd()) {
8747
10520
  this.config = config;
8748
10521
  this.store = store;
8749
10522
  this.workspacePath = workspacePath;
8750
10523
  this.port = config.dashboard.port ?? DEFAULT_DASHBOARD_PORT;
10524
+ this.host = config.dashboard.host ?? "127.0.0.1";
8751
10525
  this.dashboardSecret = this.resolveDashboardSecret();
8752
10526
  this.app = express();
8753
10527
  this.httpServer = createServer(this.app);
@@ -8760,10 +10534,19 @@ var DashboardServer = class {
8760
10534
  this.setupRoutes();
8761
10535
  }
8762
10536
  async start() {
10537
+ const isLoopback = this.host === "127.0.0.1" || this.host === "::1" || this.host === "localhost";
10538
+ if (!isLoopback) {
10539
+ console.warn(
10540
+ `\u26A0 Dashboard is binding to ${this.host}:${this.port} \u2014 reachable from the network. It exposes task execution (/api/run) and config endpoints. Ensure dashboard.auth is enabled and CASCADE_DASHBOARD_PASSWORD is set.`
10541
+ );
10542
+ if (!this.config.dashboard.auth) {
10543
+ console.warn("\u26A0 Dashboard auth is DISABLED while bound to a non-loopback interface \u2014 this allows unauthenticated remote task execution.");
10544
+ }
10545
+ }
8763
10546
  await new Promise((resolve, reject) => {
8764
10547
  const onError = (err) => reject(err);
8765
10548
  this.httpServer.once("error", onError);
8766
- this.httpServer.listen(this.port, () => {
10549
+ this.httpServer.listen(this.port, this.host, () => {
8767
10550
  this.httpServer.off("error", onError);
8768
10551
  resolve();
8769
10552
  });
@@ -8797,15 +10580,15 @@ var DashboardServer = class {
8797
10580
  resolveDashboardSecret() {
8798
10581
  const fromConfig = this.config.dashboard.secret ?? process.env["CASCADE_DASHBOARD_SECRET"];
8799
10582
  if (fromConfig) return fromConfig;
8800
- const secretPath = path16.join(this.workspacePath, CASCADE_DASHBOARD_SECRET_FILE);
10583
+ const secretPath = path18.join(this.workspacePath, CASCADE_DASHBOARD_SECRET_FILE);
8801
10584
  try {
8802
- if (fs15.existsSync(secretPath)) {
8803
- const existing = fs15.readFileSync(secretPath, "utf-8").trim();
10585
+ if (fs17.existsSync(secretPath)) {
10586
+ const existing = fs17.readFileSync(secretPath, "utf-8").trim();
8804
10587
  if (existing.length >= 16) return existing;
8805
10588
  }
8806
10589
  const generated = randomUUID();
8807
- fs15.mkdirSync(path16.dirname(secretPath), { recursive: true });
8808
- fs15.writeFileSync(secretPath, generated, { encoding: "utf-8", mode: 384 });
10590
+ fs17.mkdirSync(path18.dirname(secretPath), { recursive: true });
10591
+ fs17.writeFileSync(secretPath, generated, { encoding: "utf-8", mode: 384 });
8809
10592
  if (this.config.dashboard.auth) {
8810
10593
  console.warn(
8811
10594
  `Dashboard auth enabled with no secret configured; persisted a generated secret to ${secretPath}. Set CASCADE_DASHBOARD_SECRET or config.dashboard.secret to override.`
@@ -8832,7 +10615,7 @@ var DashboardServer = class {
8832
10615
  // ── Setup ─────────────────────────────────────
8833
10616
  getGlobalStore() {
8834
10617
  if (!this.globalStore) {
8835
- const globalDbPath = path16.join(os3.homedir(), GLOBAL_CONFIG_DIR, GLOBAL_RUNTIME_DB_FILE);
10618
+ const globalDbPath = path18.join(os4.homedir(), GLOBAL_CONFIG_DIR, GLOBAL_RUNTIME_DB_FILE);
8836
10619
  this.globalStore = new MemoryStore(globalDbPath);
8837
10620
  }
8838
10621
  return this.globalStore;
@@ -8893,12 +10676,12 @@ var DashboardServer = class {
8893
10676
  }
8894
10677
  }
8895
10678
  watchRuntimeChanges() {
8896
- const workspaceDbPath = path16.join(this.workspacePath, CASCADE_DB_FILE);
8897
- const globalDbPath = path16.join(os3.homedir(), GLOBAL_CONFIG_DIR, GLOBAL_RUNTIME_DB_FILE);
10679
+ const workspaceDbPath = path18.join(this.workspacePath, CASCADE_DB_FILE);
10680
+ const globalDbPath = path18.join(os4.homedir(), GLOBAL_CONFIG_DIR, GLOBAL_RUNTIME_DB_FILE);
8898
10681
  const watchPaths = [workspaceDbPath, globalDbPath].filter((p, index, arr) => arr.indexOf(p) === index);
8899
10682
  for (const watchPath of watchPaths) {
8900
- if (!fs15.existsSync(watchPath)) continue;
8901
- fs15.watchFile(watchPath, { interval: 3e3 }, () => {
10683
+ if (!fs17.existsSync(watchPath)) continue;
10684
+ fs17.watchFile(watchPath, { interval: 3e3 }, () => {
8902
10685
  this.throttledBroadcast(watchPath === globalDbPath ? "global" : "workspace");
8903
10686
  });
8904
10687
  }
@@ -9028,7 +10811,7 @@ var DashboardServer = class {
9028
10811
  const sessionId = req.params.id;
9029
10812
  this.store.deleteSession(sessionId);
9030
10813
  this.store.deleteRuntimeSession(sessionId);
9031
- const globalDbPath = path16.join(os3.homedir(), GLOBAL_CONFIG_DIR, GLOBAL_RUNTIME_DB_FILE);
10814
+ const globalDbPath = path18.join(os4.homedir(), GLOBAL_CONFIG_DIR, GLOBAL_RUNTIME_DB_FILE);
9032
10815
  const globalStore = new MemoryStore(globalDbPath);
9033
10816
  try {
9034
10817
  globalStore.deleteRuntimeSession(sessionId);
@@ -9042,7 +10825,7 @@ var DashboardServer = class {
9042
10825
  });
9043
10826
  this.app.delete("/api/sessions", auth, (req, res) => {
9044
10827
  const body = req.body;
9045
- const globalDbPath = path16.join(os3.homedir(), GLOBAL_CONFIG_DIR, GLOBAL_RUNTIME_DB_FILE);
10828
+ const globalDbPath = path18.join(os4.homedir(), GLOBAL_CONFIG_DIR, GLOBAL_RUNTIME_DB_FILE);
9046
10829
  if (body?.ids && Array.isArray(body.ids) && body.ids.length > 0) {
9047
10830
  const globalStore = new MemoryStore(globalDbPath);
9048
10831
  try {
@@ -9065,7 +10848,7 @@ var DashboardServer = class {
9065
10848
  });
9066
10849
  this.app.delete("/api/runtime", auth, (_req, res) => {
9067
10850
  this.store.deleteAllRuntimeNodes();
9068
- const globalDbPath = path16.join(os3.homedir(), GLOBAL_CONFIG_DIR, GLOBAL_RUNTIME_DB_FILE);
10851
+ const globalDbPath = path18.join(os4.homedir(), GLOBAL_CONFIG_DIR, GLOBAL_RUNTIME_DB_FILE);
9069
10852
  const globalStore = new MemoryStore(globalDbPath);
9070
10853
  try {
9071
10854
  globalStore.deleteAllRuntimeNodes();
@@ -9138,12 +10921,12 @@ var DashboardServer = class {
9138
10921
  if (body["tierLimits"]) this.config.tierLimits = { ...this.config.tierLimits, ...body["tierLimits"] };
9139
10922
  if (body["budget"]) this.config.budget = { ...this.config.budget, ...body["budget"] };
9140
10923
  try {
9141
- const configPath = path16.join(this.workspacePath, CASCADE_CONFIG_FILE);
9142
- const existing = fs15.existsSync(configPath) ? JSON.parse(fs15.readFileSync(configPath, "utf-8")) : {};
10924
+ const configPath = path18.join(this.workspacePath, CASCADE_CONFIG_FILE);
10925
+ const existing = fs17.existsSync(configPath) ? JSON.parse(fs17.readFileSync(configPath, "utf-8")) : {};
9143
10926
  const updated = { ...existing, tierLimits: this.config.tierLimits, budget: this.config.budget };
9144
10927
  const tmp = configPath + ".tmp";
9145
- fs15.writeFileSync(tmp, JSON.stringify(updated, null, 2), "utf-8");
9146
- fs15.renameSync(tmp, configPath);
10928
+ fs17.writeFileSync(tmp, JSON.stringify(updated, null, 2), "utf-8");
10929
+ fs17.renameSync(tmp, configPath);
9147
10930
  res.json({ ok: true });
9148
10931
  } catch (err) {
9149
10932
  res.status(500).json({ error: `Failed to save config: ${err instanceof Error ? err.message : String(err)}` });
@@ -9171,7 +10954,7 @@ var DashboardServer = class {
9171
10954
  this.app.get("/api/runtime", auth, (req, res) => {
9172
10955
  const scope = req.query["scope"] ?? "workspace";
9173
10956
  if (scope === "global") {
9174
- const globalDbPath = path16.join(os3.homedir(), GLOBAL_CONFIG_DIR, GLOBAL_RUNTIME_DB_FILE);
10957
+ const globalDbPath = path18.join(os4.homedir(), GLOBAL_CONFIG_DIR, GLOBAL_RUNTIME_DB_FILE);
9175
10958
  const globalStore = new MemoryStore(globalDbPath);
9176
10959
  try {
9177
10960
  res.json({
@@ -9244,13 +11027,13 @@ var DashboardServer = class {
9244
11027
  }))
9245
11028
  });
9246
11029
  });
9247
- const prodPath = path16.resolve(__dirname$1, "../web/dist");
9248
- const devPath = path16.resolve(__dirname$1, "../../web/dist");
9249
- const webDistPath = fs15.existsSync(prodPath) ? prodPath : devPath;
9250
- if (fs15.existsSync(webDistPath)) {
11030
+ const prodPath = path18.resolve(__dirname$1, "../web/dist");
11031
+ const devPath = path18.resolve(__dirname$1, "../../web/dist");
11032
+ const webDistPath = fs17.existsSync(prodPath) ? prodPath : devPath;
11033
+ if (fs17.existsSync(webDistPath)) {
9251
11034
  this.app.use(express.static(webDistPath));
9252
11035
  this.app.get("*", (_req, res) => {
9253
- res.sendFile(path16.join(webDistPath, "index.html"));
11036
+ res.sendFile(path18.join(webDistPath, "index.html"));
9254
11037
  });
9255
11038
  } else {
9256
11039
  this.app.get("/", (_req, res) => {