cascade-ai 0.3.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -2,23 +2,23 @@ import EventEmitter from 'events';
2
2
  import crypto, { randomUUID, timingSafeEqual } from 'crypto';
3
3
  import { glob } from 'glob';
4
4
  import Anthropic from '@anthropic-ai/sdk';
5
- import OpenAI from 'openai';
5
+ import OpenAI, { AzureOpenAI } from 'openai';
6
6
  import { GoogleGenAI, HarmBlockThreshold, HarmCategory } from '@google/genai';
7
7
  import axios2 from 'axios';
8
- import fs2 from 'fs/promises';
9
- import path13 from 'path';
8
+ import fs3 from 'fs/promises';
9
+ import path16 from 'path';
10
10
  import * as ignoreFactory from 'ignore';
11
11
  import ignoreFactory__default from 'ignore';
12
12
  import { exec, execFile, execSync } from 'child_process';
13
13
  import { promisify } from 'util';
14
+ import fs15 from 'fs';
14
15
  import { simpleGit } from 'simple-git';
15
- import fs11 from 'fs';
16
16
  import PDFDocument from 'pdfkit';
17
17
  import { Client } from '@modelcontextprotocol/sdk/client/index.js';
18
18
  import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
19
19
  import { z } from 'zod';
20
+ import os3 from 'os';
20
21
  import { createContext, runInContext } from 'vm';
21
- import os2 from 'os';
22
22
  import Database from 'better-sqlite3';
23
23
  import { createServer } from 'http';
24
24
  import { fileURLToPath } from 'url';
@@ -124,7 +124,7 @@ var require_keytar2 = __commonJS({
124
124
  });
125
125
 
126
126
  // src/constants.ts
127
- var CASCADE_VERSION = "0.3.0";
127
+ var CASCADE_VERSION = "0.5.1";
128
128
  var CASCADE_CONFIG_DIR = ".cascade";
129
129
  var CASCADE_MD_FILE = "CASCADE.md";
130
130
  var CASCADE_IGNORE_FILE = ".cascadeignore";
@@ -862,19 +862,21 @@ var OpenAIProvider = class extends BaseProvider {
862
862
  // src/providers/azure.ts
863
863
  var AzureOpenAIProvider = class extends OpenAIProvider {
864
864
  constructor(config, model) {
865
- const baseUrl = config.baseUrl ?? AZURE_BASE_URL_TEMPLATE.replace("{resource}", "YOUR_RESOURCE");
865
+ const rawUrl = config.baseUrl ?? AZURE_BASE_URL_TEMPLATE.replace("{resource}", "YOUR_RESOURCE");
866
+ const endpoint = rawUrl.replace(/\/+$/, "");
866
867
  super(
867
868
  {
868
869
  ...config,
869
- baseUrl: `${baseUrl}/openai/deployments/${config.deploymentName ?? model.id}`
870
+ baseUrl: endpoint
871
+ // Kept for superclass compatibility if it reads it
870
872
  },
871
873
  model
872
874
  );
873
- this.client = new OpenAI({
875
+ this.client = new AzureOpenAI({
874
876
  apiKey: config.apiKey,
875
- baseURL: `${baseUrl}/openai/deployments/${config.deploymentName ?? model.id}`,
876
- defaultQuery: { "api-version": config.apiVersion ?? "2024-08-01-preview" },
877
- defaultHeaders: { "api-key": config.apiKey ?? "" }
877
+ endpoint,
878
+ deployment: config.deploymentName ?? model.id,
879
+ apiVersion: config.apiVersion ?? "2024-08-01-preview"
878
880
  });
879
881
  }
880
882
  async listModels() {
@@ -1116,6 +1118,22 @@ var GeminiProvider = class extends BaseProvider {
1116
1118
  };
1117
1119
  }
1118
1120
  };
1121
+ var TOOL_CAPABLE_FAMILIES = [
1122
+ "llama3.1",
1123
+ "llama3.2",
1124
+ "llama3.3",
1125
+ "qwen2",
1126
+ "qwen2.5",
1127
+ "qwen3",
1128
+ "mistral-nemo",
1129
+ "mistral-small",
1130
+ "command-r",
1131
+ "firefunction"
1132
+ ];
1133
+ function isToolCapable(modelName) {
1134
+ const name = modelName.toLowerCase();
1135
+ return TOOL_CAPABLE_FAMILIES.some((family) => name.includes(family));
1136
+ }
1119
1137
  var OllamaProvider = class extends BaseProvider {
1120
1138
  baseUrl;
1121
1139
  constructor(config, model) {
@@ -1128,12 +1146,21 @@ var OllamaProvider = class extends BaseProvider {
1128
1146
  }
1129
1147
  async generateStream(options, onChunk) {
1130
1148
  const messages = this.convertMessages(options.messages, options.systemPrompt);
1149
+ const ollamaTools = options.tools?.map((t) => ({
1150
+ type: "function",
1151
+ function: {
1152
+ name: t.name,
1153
+ description: t.description,
1154
+ parameters: t.inputSchema
1155
+ }
1156
+ }));
1131
1157
  const response = await axios2.post(
1132
1158
  `${this.baseUrl}/api/chat`,
1133
1159
  {
1134
1160
  model: this.model.id,
1135
1161
  messages,
1136
1162
  stream: true,
1163
+ tools: ollamaTools?.length ? ollamaTools : void 0,
1137
1164
  options: {
1138
1165
  num_predict: options.maxTokens ?? this.model.maxOutputTokens,
1139
1166
  temperature: options.temperature ?? 0.7
@@ -1144,6 +1171,7 @@ var OllamaProvider = class extends BaseProvider {
1144
1171
  let fullContent = "";
1145
1172
  let inputTokens = 0;
1146
1173
  let outputTokens = 0;
1174
+ const pendingToolCalls = [];
1147
1175
  await new Promise((resolve, reject) => {
1148
1176
  let buffer = "";
1149
1177
  response.data.on("data", (chunk) => {
@@ -1158,6 +1186,9 @@ var OllamaProvider = class extends BaseProvider {
1158
1186
  fullContent += parsed.message.content;
1159
1187
  onChunk({ text: parsed.message.content, finishReason: null });
1160
1188
  }
1189
+ if (parsed.message?.tool_calls?.length) {
1190
+ pendingToolCalls.push(...parsed.message.tool_calls);
1191
+ }
1161
1192
  if (parsed.done) {
1162
1193
  inputTokens = parsed.prompt_eval_count ?? 0;
1163
1194
  outputTokens = parsed.eval_count ?? 0;
@@ -1175,6 +1206,9 @@ var OllamaProvider = class extends BaseProvider {
1175
1206
  fullContent += parsed.message.content;
1176
1207
  onChunk({ text: parsed.message.content, finishReason: null });
1177
1208
  }
1209
+ if (parsed.message?.tool_calls?.length) {
1210
+ pendingToolCalls.push(...parsed.message.tool_calls);
1211
+ }
1178
1212
  if (parsed.done) {
1179
1213
  inputTokens = parsed.prompt_eval_count ?? inputTokens;
1180
1214
  outputTokens = parsed.eval_count ?? outputTokens;
@@ -1186,11 +1220,30 @@ var OllamaProvider = class extends BaseProvider {
1186
1220
  });
1187
1221
  response.data.on("error", reject);
1188
1222
  });
1189
- onChunk({ text: "", finishReason: "stop" });
1223
+ const toolCalls = pendingToolCalls.map((tc, i) => {
1224
+ let input;
1225
+ if (typeof tc.function.arguments === "string") {
1226
+ try {
1227
+ input = JSON.parse(tc.function.arguments);
1228
+ } catch {
1229
+ input = { __rawArguments: tc.function.arguments };
1230
+ }
1231
+ } else {
1232
+ input = tc.function.arguments;
1233
+ }
1234
+ return {
1235
+ id: `ollama-tool-${Date.now()}-${i}`,
1236
+ name: tc.function.name,
1237
+ input
1238
+ };
1239
+ });
1240
+ const finishReason = toolCalls.length ? "tool_use" : "stop";
1241
+ onChunk({ text: "", finishReason });
1190
1242
  return {
1191
1243
  content: fullContent,
1192
1244
  usage: this.makeUsage(inputTokens, outputTokens),
1193
- finishReason: "stop"
1245
+ toolCalls: toolCalls.length ? toolCalls : void 0,
1246
+ finishReason
1194
1247
  };
1195
1248
  }
1196
1249
  async countTokens(text) {
@@ -1214,6 +1267,7 @@ var OllamaProvider = class extends BaseProvider {
1214
1267
  maxOutputTokens: 4e3,
1215
1268
  supportsStreaming: true,
1216
1269
  isLocal: true,
1270
+ supportsToolUse: isToolCapable(m.name),
1217
1271
  minSizeB: this.parseSizeB(m.details?.parameter_size)
1218
1272
  }));
1219
1273
  } catch {
@@ -1236,6 +1290,26 @@ var OllamaProvider = class extends BaseProvider {
1236
1290
  result.push({ role: "system", content: typeof m.content === "string" ? m.content : "" });
1237
1291
  continue;
1238
1292
  }
1293
+ if (m.role === "tool") {
1294
+ result.push({
1295
+ role: "tool",
1296
+ content: typeof m.content === "string" ? m.content : JSON.stringify(m.content)
1297
+ });
1298
+ continue;
1299
+ }
1300
+ if (m.role === "assistant" && m.toolCalls?.length) {
1301
+ result.push({
1302
+ role: "assistant",
1303
+ content: typeof m.content === "string" ? m.content : "",
1304
+ tool_calls: m.toolCalls.map((tc) => ({
1305
+ function: {
1306
+ name: tc.name,
1307
+ arguments: tc.input
1308
+ }
1309
+ }))
1310
+ });
1311
+ continue;
1312
+ }
1239
1313
  if (typeof m.content === "string") {
1240
1314
  result.push({ role: m.role, content: m.content });
1241
1315
  continue;
@@ -1368,6 +1442,26 @@ var ModelSelector = class {
1368
1442
  return T3_MODEL_PRIORITY;
1369
1443
  }
1370
1444
  }
1445
+ getAllAvailableModels() {
1446
+ return Array.from(this.availableModels.values()).filter(
1447
+ (m) => this.availableProviders.has(m.provider)
1448
+ );
1449
+ }
1450
+ /**
1451
+ * Returns all available models eligible for the given tier, ordered by the
1452
+ * tier's priority chain. Use this as the candidate set for scored selection.
1453
+ */
1454
+ getCandidatesForTier(tier) {
1455
+ const priority = this.getPriorityList(tier);
1456
+ const candidates = [];
1457
+ for (const key of priority) {
1458
+ const model = this.availableModels.get(key);
1459
+ if (model && this.availableProviders.has(model.provider)) {
1460
+ candidates.push(model);
1461
+ }
1462
+ }
1463
+ return candidates;
1464
+ }
1371
1465
  isProviderAvailable(provider) {
1372
1466
  return this.availableProviders.has(provider);
1373
1467
  }
@@ -1573,11 +1667,203 @@ var TpmLimiter = class {
1573
1667
  }
1574
1668
  };
1575
1669
 
1670
+ // src/core/router/local-queue.ts
1671
+ var LocalRequestQueue = class {
1672
+ maxConcurrent;
1673
+ active = 0;
1674
+ queue = [];
1675
+ constructor(maxConcurrent = 1) {
1676
+ this.maxConcurrent = Math.max(1, maxConcurrent);
1677
+ }
1678
+ /**
1679
+ * Acquire a queue slot. Returns a `release` function that MUST be called
1680
+ * when the inference call is done (even on error). Rejects if the slot
1681
+ * cannot be acquired within `timeoutMs`.
1682
+ */
1683
+ async acquire(timeoutMs) {
1684
+ if (this.active < this.maxConcurrent) {
1685
+ this.active++;
1686
+ return this.makeRelease();
1687
+ }
1688
+ return new Promise((resolve, reject) => {
1689
+ let settled = false;
1690
+ let timer;
1691
+ const resolver = (release) => {
1692
+ if (settled) return;
1693
+ settled = true;
1694
+ if (timer !== void 0) clearTimeout(timer);
1695
+ resolve(release);
1696
+ };
1697
+ if (timeoutMs !== void 0 && timeoutMs > 0) {
1698
+ timer = setTimeout(() => {
1699
+ if (settled) return;
1700
+ settled = true;
1701
+ const idx = this.queue.indexOf(resolver);
1702
+ if (idx !== -1) this.queue.splice(idx, 1);
1703
+ reject(new Error(
1704
+ `Local model queue: timed out waiting for a free slot after ${timeoutMs}ms. Active: ${this.active}, Queued: ${this.queue.length}. Consider increasing localConcurrency or localInferenceTimeoutMs in your config.`
1705
+ ));
1706
+ }, timeoutMs);
1707
+ }
1708
+ this.queue.push(resolver);
1709
+ });
1710
+ }
1711
+ /** Number of in-flight requests. */
1712
+ get activeCount() {
1713
+ return this.active;
1714
+ }
1715
+ /** Number of requests waiting for a slot. */
1716
+ get queueDepth() {
1717
+ return this.queue.length;
1718
+ }
1719
+ makeRelease() {
1720
+ let called = false;
1721
+ return () => {
1722
+ if (called) return;
1723
+ called = true;
1724
+ this.active--;
1725
+ const next = this.queue.shift();
1726
+ if (next) {
1727
+ this.active++;
1728
+ next(this.makeRelease());
1729
+ }
1730
+ };
1731
+ }
1732
+ };
1733
+
1576
1734
  // src/utils/cost.ts
1577
1735
  function calculateCost(inputTokens, outputTokens, model) {
1578
1736
  return inputTokens / 1e3 * model.inputCostPer1kTokens + outputTokens / 1e3 * model.outputCostPer1kTokens;
1579
1737
  }
1580
1738
 
1739
+ // src/utils/retry.ts
1740
+ var CascadeCancelledError = class extends Error {
1741
+ constructor(reason) {
1742
+ super(reason ?? "Run was cancelled via AbortSignal");
1743
+ this.name = "CascadeCancelledError";
1744
+ }
1745
+ };
1746
+ var CascadeToolError = class extends Error {
1747
+ /** A friendly message to show the user / T3 */
1748
+ userMessage;
1749
+ /** Whether this error class is retryable by default */
1750
+ retryable;
1751
+ constructor(userMessage, cause, retryable = false) {
1752
+ const causeMsg = cause instanceof Error ? cause.message : String(cause);
1753
+ super(`${userMessage}: ${causeMsg}`);
1754
+ this.name = "CascadeToolError";
1755
+ this.userMessage = userMessage;
1756
+ this.retryable = retryable;
1757
+ }
1758
+ };
1759
+ async function withTimeout(promise, timeoutMs, errorMessage = "Operation timed out") {
1760
+ let timer;
1761
+ const timeoutPromise = new Promise((_, reject) => {
1762
+ timer = setTimeout(
1763
+ () => reject(new Error(errorMessage)),
1764
+ timeoutMs
1765
+ );
1766
+ });
1767
+ try {
1768
+ return await Promise.race([promise, timeoutPromise]);
1769
+ } finally {
1770
+ if (timer !== void 0) clearTimeout(timer);
1771
+ }
1772
+ }
1773
+
1774
+ // src/core/router/model-profiler.ts
1775
+ var SKIP_PATTERN = /embed|dall-e|whisper|tts|vision|instruct-vision|rerank/i;
1776
+ var SPECIALIZATION_KEYWORDS = {
1777
+ code: ["code", "coding", "programming", "developer", "software", "function", "debug", "typescript", "python", "javascript"],
1778
+ analysis: ["analysis", "analytical", "reasoning", "logic", "research", "evaluate", "assess", "explain"],
1779
+ creative: ["creative", "writing", "story", "poetry", "content", "blog", "essay", "narrative"],
1780
+ data: ["data", "sql", "statistics", "chart", "csv", "json", "excel", "spreadsheet", "math", "mathematical"],
1781
+ instruction: ["instruction", "instruction-following", "accurate", "precise", "factual"],
1782
+ multilingual: ["multilingual", "language", "translation", "linguistic"],
1783
+ long_context: ["long", "context", "document", "book", "summarize", "large"]
1784
+ };
1785
+ function extractSpecializations(description) {
1786
+ const lower = description.toLowerCase();
1787
+ const found = [];
1788
+ for (const [key, terms] of Object.entries(SPECIALIZATION_KEYWORDS)) {
1789
+ if (terms.some((t) => lower.includes(t))) {
1790
+ found.push(key);
1791
+ }
1792
+ }
1793
+ return found;
1794
+ }
1795
+ async function fetchOpenRouterModels() {
1796
+ try {
1797
+ const resp = await fetch("https://openrouter.ai/api/v1/models", {
1798
+ headers: { "User-Agent": "Cascade-AI/0.4.0" },
1799
+ signal: AbortSignal.timeout(8e3)
1800
+ });
1801
+ if (!resp.ok) return [];
1802
+ const data = await resp.json();
1803
+ return data.data ?? [];
1804
+ } catch {
1805
+ return [];
1806
+ }
1807
+ }
1808
+ async function queryModelDirectly(router, model) {
1809
+ try {
1810
+ const result = await router.generate("T3", {
1811
+ messages: [{
1812
+ role: "user",
1813
+ content: 'What are your top 3 task specializations? Reply with valid JSON only: {"specializations": ["<area1>", "<area2>", "<area3>"]}'
1814
+ }],
1815
+ maxTokens: 60
1816
+ });
1817
+ const match = /\{[\s\S]*?\}/.exec(result.content);
1818
+ if (!match) return [];
1819
+ const parsed = JSON.parse(match[0]);
1820
+ const specs = parsed.specializations;
1821
+ if (!Array.isArray(specs)) return [];
1822
+ return specs.filter((s) => typeof s === "string").slice(0, 5);
1823
+ } catch {
1824
+ return [];
1825
+ }
1826
+ }
1827
+ var ModelProfiler = class {
1828
+ store;
1829
+ router;
1830
+ constructor(store, router) {
1831
+ this.store = store;
1832
+ this.router = router;
1833
+ }
1834
+ /**
1835
+ * Profile all models that haven't been profiled yet.
1836
+ * Safe to call concurrently — SQLite upsert handles races.
1837
+ */
1838
+ async profileAll(models) {
1839
+ const alreadyProfiled = new Set(this.store.getProfiledModelIds());
1840
+ const toProfile = models.filter(
1841
+ (m) => !alreadyProfiled.has(m.id) && !SKIP_PATTERN.test(m.id) && !SKIP_PATTERN.test(m.name)
1842
+ );
1843
+ if (toProfile.length === 0) return;
1844
+ const openRouterModels = await fetchOpenRouterModels();
1845
+ const orByNormalizedId = /* @__PURE__ */ new Map();
1846
+ for (const m of openRouterModels) {
1847
+ orByNormalizedId.set(m.id.toLowerCase(), m);
1848
+ const short = m.id.split("/").pop();
1849
+ if (short) orByNormalizedId.set(short.toLowerCase(), m);
1850
+ }
1851
+ await Promise.allSettled(
1852
+ toProfile.map(async (model) => {
1853
+ let specializations = [];
1854
+ const orMatch = orByNormalizedId.get(model.id.toLowerCase()) ?? orByNormalizedId.get(model.id.split("/").pop()?.toLowerCase() ?? "");
1855
+ if (orMatch?.description) {
1856
+ specializations = extractSpecializations(orMatch.description);
1857
+ }
1858
+ if (specializations.length === 0 && this.router) {
1859
+ specializations = await queryModelDirectly(this.router);
1860
+ }
1861
+ this.store.saveModelProfile(model.id, model.provider, specializations);
1862
+ })
1863
+ );
1864
+ }
1865
+ };
1866
+
1581
1867
  // src/core/router/index.ts
1582
1868
  var CascadeRouter = class _CascadeRouter extends EventEmitter {
1583
1869
  selector;
@@ -1605,6 +1891,7 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
1605
1891
  budgetState = "ok";
1606
1892
  budgetExceededReason;
1607
1893
  tpmLimiter;
1894
+ localQueue;
1608
1895
  /** Thrown when the configured budget is exceeded. */
1609
1896
  static BudgetExceededError = class extends Error {
1610
1897
  constructor(msg) {
@@ -1621,6 +1908,7 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
1621
1908
  this.selector = new ModelSelector(availableProviders);
1622
1909
  this.failover = new FailoverManager(this.selector);
1623
1910
  this.tpmLimiter = new TpmLimiter(config.rateLimits?.providerTpm ?? {});
1911
+ this.localQueue = new LocalRequestQueue(config.localConcurrency ?? 1);
1624
1912
  const ollamaCfg = config.providers.find((p) => p.type === "ollama");
1625
1913
  if (availableProviders.has("ollama")) {
1626
1914
  await this.discoverOllamaModels(ollamaCfg);
@@ -1632,7 +1920,7 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
1632
1920
  if (!model) {
1633
1921
  throw new Error(`Configured model "${override}" for ${tier} could not be loaded. Check provider availability and exact model name.`);
1634
1922
  }
1635
- if (model.id !== override) {
1923
+ if (model.id !== override && `${model.provider}:${model.id}` !== override) {
1636
1924
  throw new Error(`Configured model "${override}" for ${tier} resolved to "${model.id}". Use the exact provider model ID or prefix the provider (e.g. gemini:${override}).`);
1637
1925
  }
1638
1926
  this.tierModels.set(tier, model);
@@ -1647,6 +1935,17 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
1647
1935
  }
1648
1936
  }
1649
1937
  }
1938
+ /**
1939
+ * Run model specialization profiling in the background.
1940
+ * Only profiles models that haven't been profiled yet (cache-first).
1941
+ * No-op if store is not provided.
1942
+ */
1943
+ async profileModels(store) {
1944
+ const allModels = this.selector.getAllAvailableModels();
1945
+ const profiler = new ModelProfiler(store, this);
1946
+ profiler.profileAll(allModels).catch(() => {
1947
+ });
1948
+ }
1650
1949
  async generate(tier, options, onChunk, requireVision = false) {
1651
1950
  if (this.budgetState === "exceeded") {
1652
1951
  throw new _CascadeRouter.BudgetExceededError(
@@ -1668,9 +1967,26 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
1668
1967
  await this.tpmLimiter.acquire(model.provider, estimatedTokens);
1669
1968
  }
1670
1969
  const useStream = Boolean(onChunk) && model.supportsStreaming && typeof provider.generateStream === "function";
1970
+ let releaseLocalSlot;
1971
+ if (model.isLocal) {
1972
+ const inferenceTimeoutMs = this.config.localInferenceTimeoutMs ?? 3e5;
1973
+ const queueWaitMs = Math.round(inferenceTimeoutMs / 2);
1974
+ releaseLocalSlot = await this.localQueue.acquire(queueWaitMs);
1975
+ }
1671
1976
  try {
1672
1977
  let result;
1673
- if (useStream && onChunk) {
1978
+ if (model.isLocal) {
1979
+ const inferenceTimeoutMs = this.config.localInferenceTimeoutMs ?? 3e5;
1980
+ const inferencePromise = useStream && onChunk ? provider.generateStream(options, (chunk) => {
1981
+ const text = typeof chunk?.text === "string" ? chunk.text : "";
1982
+ if (text) onChunk({ ...chunk, text });
1983
+ }) : provider.generate(options);
1984
+ result = await withTimeout(
1985
+ inferencePromise,
1986
+ inferenceTimeoutMs,
1987
+ `Local model ${model.id} inference timed out after ${inferenceTimeoutMs}ms`
1988
+ );
1989
+ } else if (useStream && onChunk) {
1674
1990
  try {
1675
1991
  result = await provider.generateStream(options, (chunk) => {
1676
1992
  const text = typeof chunk?.text === "string" ? chunk.text : "";
@@ -1708,10 +2024,14 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
1708
2024
  if (fallback) {
1709
2025
  this.tierModels.set(tier, fallback);
1710
2026
  this.ensureProvider(fallback, this.config.providers);
2027
+ releaseLocalSlot?.();
2028
+ releaseLocalSlot = void 0;
1711
2029
  return this.generate(tier, options, onChunk, requireVision);
1712
2030
  }
1713
2031
  }
1714
2032
  throw err;
2033
+ } finally {
2034
+ releaseLocalSlot?.();
1715
2035
  }
1716
2036
  }
1717
2037
  getModelForTier(tier) {
@@ -1951,29 +2271,6 @@ var CascadeRouter = class _CascadeRouter extends EventEmitter {
1951
2271
  return /rate.?limit|429|too.?many.?requests|quota/i.test(msg);
1952
2272
  }
1953
2273
  };
1954
-
1955
- // src/utils/retry.ts
1956
- var CascadeCancelledError = class extends Error {
1957
- constructor(reason) {
1958
- super(reason ?? "Run was cancelled via AbortSignal");
1959
- this.name = "CascadeCancelledError";
1960
- }
1961
- };
1962
- var CascadeToolError = class extends Error {
1963
- /** A friendly message to show the user / T3 */
1964
- userMessage;
1965
- /** Whether this error class is retryable by default */
1966
- retryable;
1967
- constructor(userMessage, cause, retryable = false) {
1968
- const causeMsg = cause instanceof Error ? cause.message : String(cause);
1969
- super(`${userMessage}: ${causeMsg}`);
1970
- this.name = "CascadeToolError";
1971
- this.userMessage = userMessage;
1972
- this.retryable = retryable;
1973
- }
1974
- };
1975
-
1976
- // src/core/tiers/base.ts
1977
2274
  var BaseTier = class extends EventEmitter {
1978
2275
  id;
1979
2276
  role;
@@ -2254,6 +2551,97 @@ var AuditLogger = class {
2254
2551
  }
2255
2552
  };
2256
2553
 
2554
+ // src/tools/text-tool-parser.ts
2555
+ var TOOL_CALL_RE = /<tool_call>\s*([\s\S]*?)\s*<\/tool_call>/g;
2556
+ var JSON_BLOCK_RE = /```json\s*([\s\S]*?)\s*```/g;
2557
+ var FUNCTION_OBJ_RE = /\{\s*"function"\s*:\s*\{[^}]*"name"\s*:[^}]*\}\s*\}/g;
2558
+ function parseTextToolCalls(text) {
2559
+ const results = tryXmlBlocks(text);
2560
+ if (results.length > 0) return results;
2561
+ const jsonBlockResults = tryJsonCodeBlocks(text);
2562
+ if (jsonBlockResults.length > 0) return jsonBlockResults;
2563
+ return tryFunctionCallObjects(text);
2564
+ }
2565
+ function tryXmlBlocks(text) {
2566
+ const results = [];
2567
+ let match;
2568
+ TOOL_CALL_RE.lastIndex = 0;
2569
+ while ((match = TOOL_CALL_RE.exec(text)) !== null) {
2570
+ try {
2571
+ const raw = JSON.parse(match[1]);
2572
+ if (typeof raw.name !== "string") continue;
2573
+ const input = typeof raw.input === "object" && raw.input !== null ? raw.input : {};
2574
+ results.push({ name: raw.name, input });
2575
+ } catch {
2576
+ }
2577
+ }
2578
+ return results;
2579
+ }
2580
+ function tryJsonCodeBlocks(text) {
2581
+ const results = [];
2582
+ let match;
2583
+ JSON_BLOCK_RE.lastIndex = 0;
2584
+ while ((match = JSON_BLOCK_RE.exec(text)) !== null) {
2585
+ try {
2586
+ const raw = JSON.parse(match[1]);
2587
+ if (typeof raw.name !== "string") continue;
2588
+ const input = typeof raw.input === "object" && raw.input !== null ? raw.input : {};
2589
+ results.push({ name: raw.name, input });
2590
+ } catch {
2591
+ }
2592
+ }
2593
+ return results;
2594
+ }
2595
+ function tryFunctionCallObjects(text) {
2596
+ const results = [];
2597
+ let match;
2598
+ FUNCTION_OBJ_RE.lastIndex = 0;
2599
+ while ((match = FUNCTION_OBJ_RE.exec(text)) !== null) {
2600
+ try {
2601
+ const raw = JSON.parse(match[0]);
2602
+ const fn = raw.function;
2603
+ if (!fn || typeof fn.name !== "string") continue;
2604
+ const input = typeof fn.arguments === "object" && fn.arguments !== null ? fn.arguments : {};
2605
+ results.push({ name: fn.name, input });
2606
+ } catch {
2607
+ }
2608
+ }
2609
+ return results;
2610
+ }
2611
+ function toToolCall(parsed, index) {
2612
+ return {
2613
+ id: `text-tool-${Date.now()}-${index}`,
2614
+ name: parsed.name,
2615
+ input: parsed.input
2616
+ };
2617
+ }
2618
+ function buildTextToolSystemPrompt(tools) {
2619
+ const toolDefs = tools.map((t) => {
2620
+ const props = t.inputSchema?.properties ?? {};
2621
+ const paramLines = Object.entries(props).map(([k, v]) => ` "${k}": "<${v.description ?? k}>"`);
2622
+ return `\u2022 ${t.name}: ${t.description}
2623
+ Input: {${paramLines.length ? "\n" + paramLines.join(",\n") + "\n " : ""}}`;
2624
+ }).join("\n");
2625
+ return `
2626
+ TOOL USE INSTRUCTIONS:
2627
+ You do not have native tool-use capability. To call a tool, write a <tool_call> block:
2628
+
2629
+ <tool_call>
2630
+ {"name": "<tool_name>", "input": {<parameters>}}
2631
+ </tool_call>
2632
+
2633
+ Available tools:
2634
+ ${toolDefs}
2635
+
2636
+ EXAMPLE \u2014 calling the "shell" tool to list files:
2637
+ <tool_call>
2638
+ {"name": "shell", "input": {"command": "ls -la /workspace"}}
2639
+ </tool_call>
2640
+
2641
+ You will then receive a user message with the result, then continue your work.
2642
+ Only call one tool at a time. When you have enough information, provide your final answer.`;
2643
+ }
2644
+
2257
2645
  // src/core/tiers/t3-worker.ts
2258
2646
  var T3_SYSTEM_PROMPT = `You are a T3 Worker agent in the Cascade AI system. Your job is to execute a specific subtask completely and accurately.
2259
2647
 
@@ -2455,6 +2843,9 @@ Now execute your subtask using this context where relevant.`
2455
2843
  const MAX_ITERATIONS = 15;
2456
2844
  const requiresArtifact = this.requiresArtifact();
2457
2845
  tools = [...tools];
2846
+ const t3Model = this.router.getModelForTier("T3");
2847
+ const useTextTools = t3Model?.supportsToolUse === false && tools.length > 0;
2848
+ const textToolSuffix = useTextTools ? buildTextToolSystemPrompt(tools) : "";
2458
2849
  while (iterations < MAX_ITERATIONS) {
2459
2850
  iterations++;
2460
2851
  this.throwIfCancelled();
@@ -2462,8 +2853,9 @@ Now execute your subtask using this context where relevant.`
2462
2853
  messages: this.context.getMessages(),
2463
2854
  systemPrompt: this.systemPromptOverride + systemPrompt + (this.hierarchyContext ? `
2464
2855
 
2465
- HIERARCHY CONTEXT: ${this.hierarchyContext}` : ""),
2466
- tools: tools.length ? tools : void 0,
2856
+ HIERARCHY CONTEXT: ${this.hierarchyContext}` : "") + textToolSuffix,
2857
+ // Don't pass tools array when model can't use them natively
2858
+ tools: useTextTools ? void 0 : tools.length ? tools : void 0,
2467
2859
  maxTokens: 4096
2468
2860
  };
2469
2861
  const result = await this.router.generate(
@@ -2473,9 +2865,19 @@ HIERARCHY CONTEXT: ${this.hierarchyContext}` : ""),
2473
2865
  this.emit("stream:token", { tierId: this.id, text: chunk.text });
2474
2866
  }
2475
2867
  );
2476
- await this.context.addMessage({ role: "assistant", content: result.content, toolCalls: result.toolCalls });
2477
- if (!result.toolCalls?.length) {
2868
+ let effectiveToolCalls = result.toolCalls ?? [];
2869
+ if (useTextTools && effectiveToolCalls.length === 0) {
2870
+ const textCalls = parseTextToolCalls(result.content);
2871
+ effectiveToolCalls = textCalls.map((tc, i) => toToolCall(tc, i));
2872
+ }
2873
+ const effectiveResult = { ...result, toolCalls: effectiveToolCalls };
2874
+ await this.context.addMessage({ role: "assistant", content: result.content, toolCalls: effectiveToolCalls });
2875
+ if (!effectiveResult.toolCalls?.length) {
2478
2876
  if (requiresArtifact) {
2877
+ const artifactCheck = await this.verifyArtifacts(this.assignment);
2878
+ if (artifactCheck.ok) {
2879
+ return { output: result.content, toolCalls: allToolCalls };
2880
+ }
2479
2881
  stalledArtifactIterations += 1;
2480
2882
  if (stalledArtifactIterations >= 2) {
2481
2883
  if (stalledArtifactIterations === 2) {
@@ -2485,17 +2887,24 @@ HIERARCHY CONTEXT: ${this.hierarchyContext}` : ""),
2485
2887
  }
2486
2888
  await this.context.addMessage({
2487
2889
  role: "user",
2488
- content: "You have not yet created and verified the required artifact. Use tools to create the file in the workspace, verify it exists, and inspect the result before concluding."
2890
+ content: `You have not yet created and verified the required artifact. Issues: ${artifactCheck.issues.join("; ")}. Use tools to create the file in the workspace, verify it exists, and inspect the result before concluding.`
2489
2891
  });
2490
2892
  continue;
2491
2893
  }
2492
2894
  return { output: result.content, toolCalls: allToolCalls };
2493
2895
  }
2494
2896
  stalledArtifactIterations = 0;
2495
- if (result.finishReason === "stop" && !requiresArtifact) {
2496
- return { output: result.content, toolCalls: allToolCalls };
2897
+ if (effectiveResult.finishReason === "stop" && effectiveResult.toolCalls.length === 0) {
2898
+ if (requiresArtifact) {
2899
+ const artifactCheck = await this.verifyArtifacts(this.assignment);
2900
+ if (artifactCheck.ok) {
2901
+ return { output: result.content, toolCalls: allToolCalls };
2902
+ }
2903
+ } else {
2904
+ return { output: result.content, toolCalls: allToolCalls };
2905
+ }
2497
2906
  }
2498
- for (const tc of result.toolCalls) {
2907
+ for (const tc of effectiveResult.toolCalls) {
2499
2908
  allToolCalls.push(tc);
2500
2909
  const toolResult = await this.executeTool(tc);
2501
2910
  await this.context.addMessage({
@@ -2553,13 +2962,15 @@ HIERARCHY CONTEXT: ${this.hierarchyContext}` : ""),
2553
2962
  currentAction: `Using tool: ${tc.name}`,
2554
2963
  status: "IN_PROGRESS"
2555
2964
  });
2965
+ this.emit("tool:call", { id: tc.id, tierId: this.id, toolName: tc.name, input: tc.input });
2966
+ const toolStartMs = Date.now();
2556
2967
  try {
2557
2968
  const result = await this.toolRegistry.execute(tc.name, tc.input, {
2558
2969
  tierId: this.id,
2559
2970
  sessionId: this.taskId,
2560
2971
  requireApproval: false,
2561
- saveSnapshot: async (path14, content) => {
2562
- this.store?.addFileSnapshot(this.taskId, path14, content);
2972
+ saveSnapshot: async (path17, content) => {
2973
+ this.store?.addFileSnapshot(this.taskId, path17, content);
2563
2974
  },
2564
2975
  sendPeerSync: (to, syncType, content) => {
2565
2976
  this.peerBus?.send(this.id, to, syncType, this.assignment?.subtaskId ?? "", content);
@@ -2576,11 +2987,83 @@ HIERARCHY CONTEXT: ${this.hierarchyContext}` : ""),
2576
2987
  this.audit.fileChange(this.id, tc.input["path"] ?? "unknown", tc.name);
2577
2988
  }
2578
2989
  }
2579
- this.emit("tool:result", { tierId: this.id, toolName: tc.name, result });
2990
+ const durationMs = Date.now() - toolStartMs;
2991
+ this.emit("tool:result", { id: tc.id, tierId: this.id, toolName: tc.name, output: typeof result === "string" ? result : JSON.stringify(result), durationMs });
2580
2992
  return typeof result === "string" ? result : JSON.stringify(result);
2581
2993
  } catch (err) {
2582
- return `Tool error: ${err instanceof Error ? err.message : String(err)}`;
2994
+ const durationMs = Date.now() - toolStartMs;
2995
+ const errMsg = err instanceof Error ? err.message : String(err);
2996
+ this.emit("tool:result", { id: tc.id, tierId: this.id, toolName: tc.name, error: errMsg, durationMs });
2997
+ return `Tool error: ${errMsg}`;
2998
+ }
2999
+ }
3000
+ /**
3001
+ * Adaptive fallback cascade — invoked when executeTool() fails.
3002
+ * Strategy order:
3003
+ * 1. Find a semantically similar registered tool and retry with same input
3004
+ * 2. Synthesize a new tool via ToolCreator (if available) and run it
3005
+ * 3. Return the original error so the agent loop can decide what to do next
3006
+ */
3007
+ async adaptiveFallback(tc, originalError) {
3008
+ const altTool = this.findAlternativeTool(tc.name);
3009
+ if (altTool) {
3010
+ this.log(`Adaptive fallback: trying alternative tool "${altTool}" for failed "${tc.name}"`);
3011
+ this.sendStatusUpdate({ progressPct: 50, currentAction: `Fallback: trying ${altTool}`, status: "IN_PROGRESS" });
3012
+ try {
3013
+ const result = await this.toolRegistry.execute(altTool, tc.input, {
3014
+ tierId: this.id,
3015
+ sessionId: this.taskId,
3016
+ requireApproval: false
3017
+ });
3018
+ const str = typeof result === "string" ? result : JSON.stringify(result);
3019
+ if (!str.startsWith("Tool error:") && !str.startsWith("Error:")) {
3020
+ return `[Fallback via ${altTool}]: ${str}`;
3021
+ }
3022
+ } catch {
3023
+ }
3024
+ }
3025
+ if (this.toolCreator) {
3026
+ this.log(`Adaptive fallback: requesting dynamic tool synthesis for "${tc.name}"`);
3027
+ this.sendStatusUpdate({ progressPct: 55, currentAction: `Synthesizing fallback tool for: ${tc.name}`, status: "IN_PROGRESS" });
3028
+ try {
3029
+ const newToolName = await this.toolCreator.createTool(
3030
+ `Replacement for "${tc.name}" \u2014 original failed with: ${originalError.slice(0, 150)}`,
3031
+ this.assignment?.subtaskTitle ?? tc.name
3032
+ );
3033
+ if (newToolName) {
3034
+ this.log(`Adaptive fallback: synthesized "${newToolName}", retrying`);
3035
+ const result = await this.toolRegistry.execute(newToolName, tc.input, {
3036
+ tierId: this.id,
3037
+ sessionId: this.taskId,
3038
+ requireApproval: false
3039
+ });
3040
+ const str = typeof result === "string" ? result : JSON.stringify(result);
3041
+ if (!str.startsWith("Tool error:")) return `[Synthesized ${newToolName}]: ${str}`;
3042
+ }
3043
+ } catch {
3044
+ }
2583
3045
  }
3046
+ return originalError;
3047
+ }
3048
+ /**
3049
+ * Find a registered tool whose name/description semantically overlaps with
3050
+ * the failing tool. Returns the best candidate name, or null if none found.
3051
+ */
3052
+ findAlternativeTool(failedToolName) {
3053
+ const failedKeywords = failedToolName.toLowerCase().split(/[_\-\s]+/);
3054
+ const allTools = this.toolRegistry.getToolDefinitions();
3055
+ let bestScore = 0;
3056
+ let bestName = null;
3057
+ for (const tool of allTools) {
3058
+ if (tool.name === failedToolName) continue;
3059
+ const toolWords = tool.name.toLowerCase().split(/[_\-\s]+/);
3060
+ const score = failedKeywords.filter((k) => toolWords.includes(k)).length;
3061
+ if (score > bestScore && score >= 1) {
3062
+ bestScore = score;
3063
+ bestName = tool.name;
3064
+ }
3065
+ }
3066
+ return bestName;
2584
3067
  }
2585
3068
  /**
2586
3069
  * Announce which files this T3 plans to edit, then acquire locks on them
@@ -2640,12 +3123,12 @@ ${assignment.expectedOutput}`;
2640
3123
  if (!artifactPaths.length) return { ok: true, issues: [] };
2641
3124
  const issues = [];
2642
3125
  const { exec: exec3 } = await import('child_process');
2643
- const { promisify: promisify3 } = await import('util');
2644
- const execAsync2 = promisify3(exec3);
3126
+ const { promisify: promisify4 } = await import('util');
3127
+ const execAsync2 = promisify4(exec3);
2645
3128
  for (const artifactPath of artifactPaths) {
2646
- const absolutePath = path13.resolve(process.cwd(), artifactPath);
3129
+ const absolutePath = path16.resolve(process.cwd(), artifactPath);
2647
3130
  try {
2648
- const stat = await fs2.stat(absolutePath);
3131
+ const stat = await fs3.stat(absolutePath);
2649
3132
  if (!stat.isFile()) {
2650
3133
  issues.push(`Expected artifact is not a file: ${artifactPath}`);
2651
3134
  continue;
@@ -2655,7 +3138,7 @@ ${assignment.expectedOutput}`;
2655
3138
  continue;
2656
3139
  }
2657
3140
  if (!/\.pdf$/i.test(artifactPath)) {
2658
- const content = await fs2.readFile(absolutePath, "utf-8");
3141
+ const content = await fs3.readFile(absolutePath, "utf-8");
2659
3142
  if (!content.trim()) {
2660
3143
  issues.push(`Artifact content is empty: ${artifactPath}`);
2661
3144
  continue;
@@ -2664,7 +3147,7 @@ ${assignment.expectedOutput}`;
2664
3147
  issues.push(`PDF artifact looks too small to be valid: ${artifactPath}`);
2665
3148
  continue;
2666
3149
  }
2667
- const ext = path13.extname(absolutePath).toLowerCase();
3150
+ const ext = path16.extname(absolutePath).toLowerCase();
2668
3151
  try {
2669
3152
  if (ext === ".ts" || ext === ".tsx") {
2670
3153
  await execAsync2(`npx tsc --noEmit ${absolutePath}`, { timeout: 1e4 });
@@ -2782,6 +3265,11 @@ var PeerBus = class extends EventEmitter {
2782
3265
  barriers = /* @__PURE__ */ new Map();
2783
3266
  broadcastLog = [];
2784
3267
  fileLocks = /* @__PURE__ */ new Map();
3268
+ /** subtaskIds whose T3 is being retried by T2 — dependents should re-wait rather than fail fast */
3269
+ retryPending = /* @__PURE__ */ new Set();
3270
+ /** Called when any peer message or broadcast is sent — used for dashboard visibility. */
3271
+ onPeerMessage;
3272
+ sessionId = "";
2785
3273
  register(peerId) {
2786
3274
  this.members.add(peerId);
2787
3275
  }
@@ -2803,11 +3291,33 @@ var PeerBus = class extends EventEmitter {
2803
3291
  this.waiters.delete(subtaskId);
2804
3292
  }
2805
3293
  /**
2806
- * Wait for a specific subtask's output resolves immediately if already available
3294
+ * Mark a subtask as retry-pending so dependents re-wait instead of failing fast
3295
+ * when they see an ESCALATED status.
3296
+ */
3297
+ markRetryPending(subtaskId) {
3298
+ this.retryPending.add(subtaskId);
3299
+ this.outputs.delete(subtaskId);
3300
+ }
3301
+ /** Called by T2 after retry resolves (success or final failure). */
3302
+ clearRetryPending(subtaskId) {
3303
+ this.retryPending.delete(subtaskId);
3304
+ }
3305
+ /** Remove a single output entry so a respawned worker can republish without clearing prior-wave outputs. */
3306
+ clearOutput(subtaskId) {
3307
+ this.outputs.delete(subtaskId);
3308
+ this.waiters.delete(subtaskId);
3309
+ this.retryPending.delete(subtaskId);
3310
+ }
3311
+ isRetryPending(subtaskId) {
3312
+ return this.retryPending.has(subtaskId);
3313
+ }
3314
+ /**
3315
+ * Wait for a specific subtask's output — resolves immediately if already available.
3316
+ * If the output is ESCALATED but a retry is pending, waits for the retry result.
2807
3317
  */
2808
3318
  waitFor(subtaskId, timeoutMs = 12e4) {
2809
3319
  const existing = this.outputs.get(subtaskId);
2810
- if (existing) return Promise.resolve(existing);
3320
+ if (existing && !this.retryPending.has(subtaskId)) return Promise.resolve(existing);
2811
3321
  return new Promise((resolve, reject) => {
2812
3322
  const resolver = (output) => {
2813
3323
  clearTimeout(timer);
@@ -2838,6 +3348,7 @@ var PeerBus = class extends EventEmitter {
2838
3348
  * Also logs to broadcastLog so collect() can retrieve recent broadcasts.
2839
3349
  */
2840
3350
  broadcast(fromId, payload) {
3351
+ const timestamp = (/* @__PURE__ */ new Date()).toISOString();
2841
3352
  const msg = {
2842
3353
  fromId,
2843
3354
  toId: "*",
@@ -2845,10 +3356,18 @@ var PeerBus = class extends EventEmitter {
2845
3356
  subtaskId: "",
2846
3357
  syncType: "SHARE_OUTPUT",
2847
3358
  payload,
2848
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
3359
+ timestamp
2849
3360
  };
2850
- this.broadcastLog.push({ fromId, payload, timestamp: msg.timestamp });
3361
+ this.broadcastLog.push({ fromId, payload, timestamp });
2851
3362
  this.emit("broadcast", msg);
3363
+ this.onPeerMessage?.({
3364
+ fromId,
3365
+ toId: void 0,
3366
+ syncType: "SHARE_OUTPUT",
3367
+ payload: typeof payload === "string" ? payload : JSON.stringify(payload),
3368
+ timestamp,
3369
+ sessionId: this.sessionId
3370
+ });
2852
3371
  }
2853
3372
  /**
2854
3373
  * Collect all broadcast messages received within a time window.
@@ -2934,6 +3453,16 @@ var PeerBus = class extends EventEmitter {
2934
3453
  isFileLocked(filePath) {
2935
3454
  return this.fileLocks.has(filePath);
2936
3455
  }
3456
+ /**
3457
+ * Reset all runtime output/waiter state for a fresh T3 respawn wave.
3458
+ * Preserves member registrations and barrier definitions.
3459
+ */
3460
+ reset() {
3461
+ this.outputs.clear();
3462
+ this.waiters.clear();
3463
+ this.retryPending.clear();
3464
+ this.broadcastLog = [];
3465
+ }
2937
3466
  /**
2938
3467
  * Clear broadcast log — call between phases to avoid stale announcements.
2939
3468
  */
@@ -2944,6 +3473,7 @@ var PeerBus = class extends EventEmitter {
2944
3473
  * Send a targeted message to a specific peer
2945
3474
  */
2946
3475
  send(fromId, toId, syncType, subtaskId, payload) {
3476
+ const timestamp = (/* @__PURE__ */ new Date()).toISOString();
2947
3477
  const msg = {
2948
3478
  fromId,
2949
3479
  toId,
@@ -2951,10 +3481,18 @@ var PeerBus = class extends EventEmitter {
2951
3481
  subtaskId,
2952
3482
  syncType,
2953
3483
  payload,
2954
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
3484
+ timestamp
2955
3485
  };
2956
3486
  this.emit(`message:${toId}`, msg);
2957
3487
  this.emit("message", msg);
3488
+ this.onPeerMessage?.({
3489
+ fromId,
3490
+ toId,
3491
+ syncType: syncType ?? "SHARE_OUTPUT",
3492
+ payload: typeof payload === "string" ? payload : JSON.stringify(payload),
3493
+ timestamp,
3494
+ sessionId: this.sessionId
3495
+ });
2958
3496
  }
2959
3497
  /**
2960
3498
  * Barrier — wait until N peers have all reached this point
@@ -3007,6 +3545,8 @@ var T2Manager = class extends BaseTier {
3007
3545
  t2PeerBus;
3008
3546
  permissionEscalator;
3009
3547
  toolCreator;
3548
+ /** AbortController for the current T3 wave — aborted on cancel-and-respawn */
3549
+ waveAbortController = null;
3010
3550
  setPeerBus(bus) {
3011
3551
  this.t2PeerBus = bus;
3012
3552
  this.t2PeerBus.register(this.id);
@@ -3015,6 +3555,14 @@ var T2Manager = class extends BaseTier {
3015
3555
  this.receivePeerSync(msg.fromId, msg.payload);
3016
3556
  });
3017
3557
  }
3558
+ setPeerMessageCallback(cb, sessionId) {
3559
+ this.t3PeerBus.onPeerMessage = cb;
3560
+ this.t3PeerBus.sessionId = sessionId;
3561
+ if (this.t2PeerBus) {
3562
+ this.t2PeerBus.onPeerMessage = cb;
3563
+ this.t2PeerBus.sessionId = sessionId;
3564
+ }
3565
+ }
3018
3566
  constructor(router, toolRegistry, parentId) {
3019
3567
  super("T2", void 0, parentId);
3020
3568
  this.router = router;
@@ -3182,6 +3730,26 @@ HIERARCHY CONTEXT: ${this.hierarchyContext}` : ""),
3182
3730
  }];
3183
3731
  }
3184
3732
  }
3733
+ buildWorkerMap(assignments, taskId) {
3734
+ const workerMap = /* @__PURE__ */ new Map();
3735
+ for (const a of assignments) {
3736
+ const worker = new T3Worker(this.router, this.toolRegistry, this.id);
3737
+ if (this.store) worker.setStore(this.store, taskId);
3738
+ worker.setPeerBus(this.t3PeerBus);
3739
+ if (this.permissionEscalator) worker.setPermissionEscalator(this.permissionEscalator);
3740
+ if (this.toolCreator) worker.setToolCreator(this.toolCreator);
3741
+ workerMap.set(a.subtaskId, worker);
3742
+ this.t3Workers.set(a.subtaskId, worker);
3743
+ worker.on("stream:token", (e) => this.emit("stream:token", e));
3744
+ worker.on("log", (e) => this.emit("log", e));
3745
+ worker.on("tier:status", (e) => this.emit("tier:status", e));
3746
+ worker.on("tool:approval-request", (e) => this.emit("tool:approval-request", {
3747
+ ...e,
3748
+ __cascadeResponder: (decision) => worker.emit(`tool:approval-response:${e.id}`, decision)
3749
+ }));
3750
+ }
3751
+ return workerMap;
3752
+ }
3185
3753
  async executeSubtasks(subtasks, taskId) {
3186
3754
  const assignments = subtasks.map((s) => ({
3187
3755
  ...s,
@@ -3208,6 +3776,8 @@ HIERARCHY CONTEXT: ${this.hierarchyContext}` : ""),
3208
3776
  worker.on("stream:token", (e) => this.emit("stream:token", e));
3209
3777
  worker.on("log", (e) => this.emit("log", e));
3210
3778
  worker.on("tier:status", (e) => this.emit("tier:status", e));
3779
+ worker.on("tool:call", (e) => this.emit("tool:call", e));
3780
+ worker.on("tool:result", (e) => this.emit("tool:result", e));
3211
3781
  worker.on("tool:approval-request", (e) => this.emit("tool:approval-request", {
3212
3782
  ...e,
3213
3783
  __cascadeResponder: (decision) => worker.emit(`tool:approval-response:${e.id}`, decision)
@@ -3244,6 +3814,7 @@ HIERARCHY CONTEXT: ${this.hierarchyContext}` : ""),
3244
3814
  const sanitizedAssignments = this.breakCycles(assignments, adj, inDegree);
3245
3815
  let remaining = new Set(sanitizedAssignments.map((a) => a.subtaskId));
3246
3816
  let wave = 0;
3817
+ let respawnBudget = 1;
3247
3818
  while (remaining.size > 0) {
3248
3819
  const runnableIds = [...remaining].filter((id) => (inDegree.get(id) ?? 0) === 0);
3249
3820
  if (runnableIds.length === 0) {
@@ -3264,15 +3835,62 @@ HIERARCHY CONTEXT: ${this.hierarchyContext}` : ""),
3264
3835
  status: "IN_PROGRESS"
3265
3836
  });
3266
3837
  this.throwIfCancelled();
3838
+ this.waveAbortController = new AbortController();
3839
+ const waveSignal = AbortSignal.any(
3840
+ [this.signal, this.waveAbortController.signal].filter(Boolean)
3841
+ );
3267
3842
  const waveResults = await Promise.allSettled(
3268
3843
  runnableIds.map(async (id) => {
3269
3844
  const assignment = sanitizedAssignments.find((a) => a.subtaskId === id);
3270
3845
  const worker = workerMap.get(id);
3271
- const result = await worker.execute(assignment, taskId, this.signal);
3846
+ const result = await worker.execute(assignment, taskId, waveSignal);
3272
3847
  resultMap.set(id, result);
3273
3848
  return result;
3274
3849
  })
3275
3850
  );
3851
+ const escalatedToolIdx = respawnBudget > 0 ? waveResults.findIndex(
3852
+ (r) => r.status === "fulfilled" && r.value.status === "ESCALATED" && r.value.issues.some((iss) => iss.includes("dynamic tool generation"))
3853
+ ) : -1;
3854
+ if (escalatedToolIdx !== -1 && this.toolCreator) {
3855
+ respawnBudget--;
3856
+ this.waveAbortController.abort();
3857
+ const escalatedId = runnableIds[escalatedToolIdx];
3858
+ const escalatedAssignment = sanitizedAssignments.find((a) => a.subtaskId === escalatedId);
3859
+ this.log(`Wave ${wave}: tool escalation detected \u2014 synthesizing tool then respawning all ${runnableIds.length} worker(s)`);
3860
+ this.sendStatusUpdate({
3861
+ progressPct: 50,
3862
+ currentAction: `Synthesizing dynamic tool for: ${escalatedAssignment.subtaskTitle}`,
3863
+ status: "IN_PROGRESS"
3864
+ });
3865
+ const toolName = await this.toolCreator.createTool(
3866
+ `Help complete: ${escalatedAssignment.subtaskTitle}`,
3867
+ escalatedAssignment.description
3868
+ );
3869
+ if (toolName) {
3870
+ this.log(`Tool "${toolName}" created \u2014 respawning wave ${wave} workers`);
3871
+ for (const a of sanitizedAssignments) {
3872
+ if (runnableIds.includes(a.subtaskId)) {
3873
+ a.description += `
3874
+
3875
+ [SYSTEM]: Dynamic tool "${toolName}" is now available \u2014 use it to complete your task.`;
3876
+ }
3877
+ }
3878
+ }
3879
+ for (const id of runnableIds) {
3880
+ this.t3PeerBus.clearOutput(id);
3881
+ }
3882
+ const freshMap = this.buildWorkerMap(
3883
+ sanitizedAssignments.filter((a) => runnableIds.includes(a.subtaskId)),
3884
+ taskId
3885
+ );
3886
+ for (const [k, v] of freshMap) workerMap.set(k, v);
3887
+ for (const id of runnableIds) {
3888
+ remaining.add(id);
3889
+ inDegree.set(id, 0);
3890
+ }
3891
+ wave--;
3892
+ continue;
3893
+ }
3276
3894
  for (let i = 0; i < runnableIds.length; i++) {
3277
3895
  const id = runnableIds[i];
3278
3896
  remaining.delete(id);
@@ -3280,61 +3898,22 @@ HIERARCHY CONTEXT: ${this.hierarchyContext}` : ""),
3280
3898
  if (r.status === "rejected") {
3281
3899
  this.log(`T3 worker ${id} failed: ${r.reason instanceof Error ? r.reason.message : String(r.reason)} \u2014 retrying once`);
3282
3900
  const assignment = sanitizedAssignments.find((a) => a.subtaskId === id);
3283
- const retried = await this.retryT3(assignment, taskId);
3284
- resultMap.set(id, retried);
3285
- } else if (r.status === "fulfilled" && r.value.status === "ESCALATED" && r.value.issues.some((i2) => i2.includes("dynamic tool generation"))) {
3286
- const assignment = sanitizedAssignments.find((a) => a.subtaskId === id);
3287
- if (this.toolCreator) {
3288
- this.log(`T3 escalated for tool. T2 spawning Tool-Builder T3 for: ${assignment.subtaskTitle}`);
3289
- this.sendStatusUpdate({
3290
- progressPct: 50,
3291
- currentAction: `Spawning Tool-Builder T3 for: ${assignment.subtaskTitle}`,
3292
- status: "IN_PROGRESS"
3901
+ try {
3902
+ const retried = await this.retryT3(assignment, taskId);
3903
+ resultMap.set(id, retried);
3904
+ } catch (retryErr) {
3905
+ const msg = retryErr instanceof Error ? retryErr.message : String(retryErr);
3906
+ this.log(`T3 retry for ${id} threw before publishing \u2014 unblocking dependents with FAILED`);
3907
+ this.t3PeerBus.publish(this.id, id, `Retry failed: ${msg}`, "FAILED");
3908
+ resultMap.set(id, {
3909
+ subtaskId: id,
3910
+ status: "FAILED",
3911
+ output: `Retry threw: ${msg}`,
3912
+ testResults: { checksRun: [], passed: [], failed: [] },
3913
+ issues: [msg],
3914
+ peerSyncsUsed: [],
3915
+ correctionAttempts: 1
3293
3916
  });
3294
- const toolName = await this.toolCreator.createTool(
3295
- `Help complete: ${assignment.subtaskTitle}`,
3296
- assignment.description
3297
- );
3298
- if (toolName) {
3299
- this.log(`T2 verifying new tool: ${toolName}`);
3300
- this.sendStatusUpdate({
3301
- progressPct: 60,
3302
- currentAction: `T2 Verifying new tool: ${toolName}`,
3303
- status: "IN_PROGRESS"
3304
- });
3305
- try {
3306
- const verifyResult = await this.router.generate("T2", {
3307
- messages: [{ role: "user", content: `A new tool named "${toolName}" was just created dynamically to help with: ${assignment.description}. Based on its name and purpose, does this seem like a valid addition? Reply "VERIFIED" or "REJECTED".` }],
3308
- systemPrompt: this.systemPromptOverride + "You are T2 Manager verifying a dynamic tool.",
3309
- maxTokens: 50
3310
- });
3311
- if (!verifyResult.content.toUpperCase().includes("REJECTED")) {
3312
- this.log(`T2 verification passed for ${toolName}. Restarting original T3.`);
3313
- const retried = await this.retryT3({
3314
- ...assignment,
3315
- description: `${assignment.description}
3316
-
3317
- [SYSTEM NOTIFICATION]: A new dynamic tool "${toolName}" has been built and verified for you. Use it to complete your task.`
3318
- }, taskId);
3319
- resultMap.set(id, retried);
3320
- } else {
3321
- this.log(`T2 rejected the dynamic tool: ${toolName}`);
3322
- resultMap.set(id, r.value);
3323
- }
3324
- } catch {
3325
- const retried = await this.retryT3({
3326
- ...assignment,
3327
- description: `${assignment.description}
3328
-
3329
- [SYSTEM NOTIFICATION]: A new dynamic tool "${toolName}" has been built for you. Use it to complete your task.`
3330
- }, taskId);
3331
- resultMap.set(id, retried);
3332
- }
3333
- } else {
3334
- resultMap.set(id, r.value);
3335
- }
3336
- } else {
3337
- resultMap.set(id, r.value);
3338
3917
  }
3339
3918
  }
3340
3919
  for (const dependent of adj.get(id) ?? []) {
@@ -3605,6 +4184,8 @@ var T1Administrator = class extends BaseTier {
3605
4184
  toolCreator;
3606
4185
  /** Stored overall task goal — used when evaluating escalated permissions */
3607
4186
  taskGoal = "";
4187
+ peerMessageCallback;
4188
+ peerMessageSessionId = "";
3608
4189
  constructor(router, toolRegistry, config) {
3609
4190
  super("T1", "T1");
3610
4191
  this.router = router;
@@ -3625,6 +4206,12 @@ var T1Administrator = class extends BaseTier {
3625
4206
  setToolCreator(creator) {
3626
4207
  this.toolCreator = creator;
3627
4208
  }
4209
+ setPeerMessageCallback(cb, sessionId) {
4210
+ this.peerMessageCallback = cb;
4211
+ this.peerMessageSessionId = sessionId;
4212
+ this.t2PeerBus.onPeerMessage = cb;
4213
+ this.t2PeerBus.sessionId = sessionId;
4214
+ }
3628
4215
  async execute(userPrompt, images, systemContext, signal) {
3629
4216
  this.signal = signal;
3630
4217
  this.taskId = randomUUID();
@@ -3844,6 +4431,9 @@ Leave dependsOn empty for sections that can run immediately in parallel.`;
3844
4431
  manager.setStore(this.store);
3845
4432
  }
3846
4433
  manager.setPeerBus(this.t2PeerBus);
4434
+ if (this.peerMessageCallback) {
4435
+ manager.setPeerMessageCallback(this.peerMessageCallback, this.peerMessageSessionId);
4436
+ }
3847
4437
  if (this.permissionEscalator) {
3848
4438
  manager.setPermissionEscalator(this.permissionEscalator);
3849
4439
  }
@@ -3854,6 +4444,8 @@ Leave dependsOn empty for sections that can run immediately in parallel.`;
3854
4444
  bind(manager, "stream:token", (e) => this.emit("stream:token", e));
3855
4445
  bind(manager, "log", (e) => this.emit("log", e));
3856
4446
  bind(manager, "tier:status", (e) => this.emit("tier:status", e));
4447
+ bind(manager, "tool:call", (e) => this.emit("tool:call", e));
4448
+ bind(manager, "tool:result", (e) => this.emit("tool:result", e));
3857
4449
  bind(manager, "tool:approval-request", (e) => this.emit("tool:approval-request", e));
3858
4450
  bind(manager, "message", (msg) => {
3859
4451
  if (msg.type === "PEER_SYNC") {
@@ -4213,13 +4805,21 @@ function resolveInWorkspace(workspaceRoot, input) {
4213
4805
  if (typeof input !== "string" || input.length === 0) {
4214
4806
  throw new WorkspaceSandboxError(String(input), workspaceRoot);
4215
4807
  }
4216
- const root = path13.resolve(workspaceRoot);
4217
- const abs = path13.isAbsolute(input) ? path13.resolve(input) : path13.resolve(root, input);
4218
- const rel = path13.relative(root, abs);
4219
- if (rel === "" || rel === ".") return abs;
4220
- if (rel.startsWith("..") || path13.isAbsolute(rel)) {
4808
+ const root = path16.resolve(workspaceRoot);
4809
+ const abs = path16.isAbsolute(input) ? path16.resolve(input) : path16.resolve(root, input);
4810
+ const rel = path16.relative(root, abs);
4811
+ if (rel === "" || rel === ".") ; else if (rel.startsWith("..") || path16.isAbsolute(rel)) {
4221
4812
  throw new WorkspaceSandboxError(input, root);
4222
4813
  }
4814
+ try {
4815
+ const real = fs15.realpathSync(abs);
4816
+ const realRel = path16.relative(root, real);
4817
+ if (realRel !== "" && realRel !== "." && (realRel.startsWith("..") || path16.isAbsolute(realRel))) {
4818
+ throw new WorkspaceSandboxError(input, root);
4819
+ }
4820
+ } catch (e) {
4821
+ if (e instanceof WorkspaceSandboxError) throw e;
4822
+ }
4223
4823
  return abs;
4224
4824
  }
4225
4825
 
@@ -4241,7 +4841,7 @@ var FileReadTool = class extends BaseTool {
4241
4841
  const absPath = resolveInWorkspace(this.workspaceRoot, filePath);
4242
4842
  const offset = input["offset"] ?? 1;
4243
4843
  const limit = input["limit"];
4244
- const content = await fs2.readFile(absPath, "utf-8");
4844
+ const content = await fs3.readFile(absPath, "utf-8");
4245
4845
  const lines = content.split("\n");
4246
4846
  const start = Math.max(0, offset - 1);
4247
4847
  const end = limit ? start + limit : lines.length;
@@ -4270,13 +4870,13 @@ var FileWriteTool = class extends BaseTool {
4270
4870
  const content = input["content"];
4271
4871
  if (options.saveSnapshot) {
4272
4872
  try {
4273
- const oldContent = await fs2.readFile(absPath, "utf-8");
4873
+ const oldContent = await fs3.readFile(absPath, "utf-8");
4274
4874
  await options.saveSnapshot(absPath, oldContent);
4275
4875
  } catch {
4276
4876
  }
4277
4877
  }
4278
- await fs2.mkdir(path13.dirname(absPath), { recursive: true });
4279
- await fs2.writeFile(absPath, content, "utf-8");
4878
+ await fs3.mkdir(path16.dirname(absPath), { recursive: true });
4879
+ await fs3.writeFile(absPath, content, "utf-8");
4280
4880
  return `Written ${content.length} characters to ${filePath}`;
4281
4881
  }
4282
4882
  };
@@ -4302,7 +4902,7 @@ var FileEditTool = class extends BaseTool {
4302
4902
  const oldString = input["old_string"];
4303
4903
  const newString = input["new_string"];
4304
4904
  const replaceAll = input["replace_all"] ?? false;
4305
- const rawContent = await fs2.readFile(absPath, "utf-8");
4905
+ const rawContent = await fs3.readFile(absPath, "utf-8");
4306
4906
  if (options.saveSnapshot) {
4307
4907
  await options.saveSnapshot(absPath, rawContent);
4308
4908
  }
@@ -4314,7 +4914,7 @@ var FileEditTool = class extends BaseTool {
4314
4914
  );
4315
4915
  }
4316
4916
  const updated = replaceAll ? content.split(normalizedOld).join(newString) : content.replace(normalizedOld, newString);
4317
- await fs2.writeFile(absPath, updated, "utf-8");
4917
+ await fs3.writeFile(absPath, updated, "utf-8");
4318
4918
  const count = replaceAll ? content.split(normalizedOld).length - 1 : 1;
4319
4919
  return `Replaced ${count} occurrence(s) in ${filePath}`;
4320
4920
  }
@@ -4337,12 +4937,12 @@ var FileDeleteTool = class extends BaseTool {
4337
4937
  const absPath = resolveInWorkspace(this.workspaceRoot, filePath);
4338
4938
  if (options.saveSnapshot) {
4339
4939
  try {
4340
- const oldContent = await fs2.readFile(absPath, "utf-8");
4940
+ const oldContent = await fs3.readFile(absPath, "utf-8");
4341
4941
  await options.saveSnapshot(absPath, oldContent);
4342
4942
  } catch {
4343
4943
  }
4344
4944
  }
4345
- await fs2.rm(absPath, { recursive: false });
4945
+ await fs3.rm(absPath, { recursive: false });
4346
4946
  return `Deleted ${filePath}`;
4347
4947
  }
4348
4948
  };
@@ -4359,7 +4959,7 @@ var FileListTool = class extends BaseTool {
4359
4959
  async execute(input, _options) {
4360
4960
  const inputPath = input["path"] || ".";
4361
4961
  const absPath = resolveInWorkspace(this.workspaceRoot, inputPath);
4362
- const entries = await fs2.readdir(absPath, { withFileTypes: true });
4962
+ const entries = await fs3.readdir(absPath, { withFileTypes: true });
4363
4963
  return entries.map((e) => `${e.isDirectory() ? "[DIR] " : " "}${e.name}`).join("\n") || "(empty directory)";
4364
4964
  }
4365
4965
  };
@@ -4742,8 +5342,8 @@ var ImageAnalyzeTool = class extends BaseTool {
4742
5342
  }
4743
5343
  };
4744
5344
  async function fileToImageAttachment(filePath) {
4745
- const data = await fs2.readFile(filePath);
4746
- const ext = path13.extname(filePath).toLowerCase();
5345
+ const data = await fs3.readFile(filePath);
5346
+ const ext = path16.extname(filePath).toLowerCase();
4747
5347
  const mimeMap = {
4748
5348
  ".jpg": "image/jpeg",
4749
5349
  ".jpeg": "image/jpeg",
@@ -4777,14 +5377,14 @@ var PDFCreateTool = class extends BaseTool {
4777
5377
  const filePath = input["path"];
4778
5378
  const content = input["content"];
4779
5379
  const title = input["title"];
4780
- const dir = path13.dirname(filePath);
4781
- if (!fs11.existsSync(dir)) {
4782
- fs11.mkdirSync(dir, { recursive: true });
5380
+ const dir = path16.dirname(filePath);
5381
+ if (!fs15.existsSync(dir)) {
5382
+ fs15.mkdirSync(dir, { recursive: true });
4783
5383
  }
4784
5384
  return new Promise((resolve, reject) => {
4785
5385
  try {
4786
5386
  const doc = new PDFDocument({ margin: 50 });
4787
- const stream = fs11.createWriteStream(filePath);
5387
+ const stream = fs15.createWriteStream(filePath);
4788
5388
  doc.pipe(stream);
4789
5389
  if (title) {
4790
5390
  doc.info["Title"] = title;
@@ -4862,14 +5462,14 @@ var CodeInterpreterTool = class extends BaseTool {
4862
5462
  }
4863
5463
  cmdPrefix = NODE_CMD;
4864
5464
  }
4865
- const tmpDir = path13.join(process.cwd(), ".cascade", "tmp");
4866
- if (!fs11.existsSync(tmpDir)) {
4867
- fs11.mkdirSync(tmpDir, { recursive: true });
5465
+ const tmpDir = path16.join(process.cwd(), ".cascade", "tmp");
5466
+ if (!fs15.existsSync(tmpDir)) {
5467
+ fs15.mkdirSync(tmpDir, { recursive: true });
4868
5468
  }
4869
5469
  const extension = language === "python" ? "py" : "js";
4870
5470
  const fileName = `intp_${randomUUID().slice(0, 8)}.${extension}`;
4871
- const filePath = path13.join(tmpDir, fileName);
4872
- fs11.writeFileSync(filePath, code, "utf-8");
5471
+ const filePath = path16.join(tmpDir, fileName);
5472
+ fs15.writeFileSync(filePath, code, "utf-8");
4873
5473
  const quotedPath = `"${filePath}"`;
4874
5474
  const quotedArgs = args.map((a) => `"${a}"`).join(" ");
4875
5475
  const fullCmd = `${cmdPrefix} ${quotedPath}${quotedArgs ? " " + quotedArgs : ""}`;
@@ -4878,8 +5478,8 @@ var CodeInterpreterTool = class extends BaseTool {
4878
5478
  exec(fullCmd, { cwd: process.cwd(), timeout: 3e4 }, (error, stdout, stderr) => {
4879
5479
  const duration = Date.now() - startMs;
4880
5480
  try {
4881
- if (fs11.existsSync(filePath)) {
4882
- fs11.unlinkSync(filePath);
5481
+ if (fs15.existsSync(filePath)) {
5482
+ fs15.unlinkSync(filePath);
4883
5483
  }
4884
5484
  } catch (cleanupErr) {
4885
5485
  console.error(`Failed to cleanup interpreter script ${filePath}:`, cleanupErr);
@@ -5139,6 +5739,253 @@ var WebSearchTool = class extends BaseTool {
5139
5739
  return lines.join("\n");
5140
5740
  }
5141
5741
  };
5742
+ var GlobTool = class extends BaseTool {
5743
+ name = "glob";
5744
+ description = "Fast file pattern matching. Returns file paths matching a glob pattern, sorted by modification time. Use this to find files by name patterns.";
5745
+ inputSchema = {
5746
+ type: "object",
5747
+ properties: {
5748
+ pattern: {
5749
+ type: "string",
5750
+ description: 'Glob pattern to match files against, e.g. "**/*.ts", "src/**/*.tsx"'
5751
+ },
5752
+ path: {
5753
+ type: "string",
5754
+ description: "Directory to search in. Defaults to the workspace root."
5755
+ }
5756
+ },
5757
+ required: ["pattern"]
5758
+ };
5759
+ async execute(input, _options) {
5760
+ const pattern = input["pattern"];
5761
+ const searchPath = input["path"] ? path16.resolve(this.workspaceRoot, input["path"]) : this.workspaceRoot;
5762
+ const matches = await glob(pattern, {
5763
+ cwd: searchPath,
5764
+ ignore: ["node_modules/**", ".git/**", "dist/**", "build/**"],
5765
+ nodir: true,
5766
+ dot: false
5767
+ });
5768
+ if (matches.length === 0) {
5769
+ return `No files matched pattern: ${pattern}`;
5770
+ }
5771
+ const withMtime = await Promise.all(
5772
+ matches.map(async (rel) => {
5773
+ try {
5774
+ const stat = await fs3.stat(path16.join(searchPath, rel));
5775
+ return { rel, mtime: stat.mtimeMs };
5776
+ } catch {
5777
+ return { rel, mtime: 0 };
5778
+ }
5779
+ })
5780
+ );
5781
+ withMtime.sort((a, b) => b.mtime - a.mtime);
5782
+ const lines = withMtime.map((f) => f.rel);
5783
+ return lines.join("\n");
5784
+ }
5785
+ };
5786
+ var execFileAsync = promisify(execFile);
5787
+ var GrepTool = class extends BaseTool {
5788
+ name = "grep";
5789
+ description = "Search file contents using a regex pattern. Returns matching lines with file paths and line numbers. Tries ripgrep (rg) first, falls back to Node.js regex scan.";
5790
+ inputSchema = {
5791
+ type: "object",
5792
+ properties: {
5793
+ pattern: {
5794
+ type: "string",
5795
+ description: "Regular expression pattern to search for in file contents"
5796
+ },
5797
+ path: {
5798
+ type: "string",
5799
+ description: "File or directory to search in. Defaults to workspace root."
5800
+ },
5801
+ glob: {
5802
+ type: "string",
5803
+ description: 'Glob pattern to filter files, e.g. "*.ts", "**/*.tsx"'
5804
+ },
5805
+ output_mode: {
5806
+ type: "string",
5807
+ enum: ["content", "files_with_matches", "count"],
5808
+ description: '"content" shows matching lines (default), "files_with_matches" shows file paths only, "count" shows match counts'
5809
+ },
5810
+ context: {
5811
+ type: "number",
5812
+ description: "Lines of context around each match (content mode only). Default: 0."
5813
+ },
5814
+ case_insensitive: {
5815
+ type: "boolean",
5816
+ description: "Case-insensitive search. Default: false."
5817
+ }
5818
+ },
5819
+ required: ["pattern"]
5820
+ };
5821
+ async execute(input, _options) {
5822
+ const pattern = input["pattern"];
5823
+ const searchPath = input["path"] ? path16.resolve(this.workspaceRoot, input["path"]) : this.workspaceRoot;
5824
+ const globPattern = input["glob"];
5825
+ const outputMode = input["output_mode"] ?? "content";
5826
+ const context = input["context"] ?? 0;
5827
+ const caseInsensitive = input["case_insensitive"] ?? false;
5828
+ try {
5829
+ const result = await this.runRipgrep(
5830
+ pattern,
5831
+ searchPath,
5832
+ globPattern,
5833
+ outputMode,
5834
+ context,
5835
+ caseInsensitive
5836
+ );
5837
+ return result;
5838
+ } catch {
5839
+ }
5840
+ return this.nodeScan(pattern, searchPath, globPattern, outputMode, context, caseInsensitive);
5841
+ }
5842
+ async runRipgrep(pattern, searchPath, globPattern, outputMode, context, caseInsensitive) {
5843
+ const args = ["--no-heading"];
5844
+ if (caseInsensitive) args.push("-i");
5845
+ if (outputMode === "files_with_matches") args.push("-l");
5846
+ else if (outputMode === "count") args.push("-c");
5847
+ else {
5848
+ args.push("-n");
5849
+ if (context > 0) args.push(`-C${context}`);
5850
+ }
5851
+ if (globPattern) args.push("--glob", globPattern);
5852
+ args.push("--", pattern, searchPath);
5853
+ const { stdout } = await execFileAsync("rg", args, {
5854
+ timeout: 15e3,
5855
+ maxBuffer: 2 * 1024 * 1024
5856
+ });
5857
+ const trimmed = stdout.trim();
5858
+ return trimmed || `No matches found for: ${pattern}`;
5859
+ }
5860
+ async nodeScan(pattern, searchPath, globPattern, outputMode, context, caseInsensitive) {
5861
+ const flags = caseInsensitive ? "gi" : "g";
5862
+ let regex;
5863
+ try {
5864
+ regex = new RegExp(pattern, flags);
5865
+ } catch {
5866
+ return `Invalid regex pattern: ${pattern}`;
5867
+ }
5868
+ const fileGlob = globPattern ?? "**/*";
5869
+ let files;
5870
+ try {
5871
+ files = await glob(fileGlob, {
5872
+ cwd: searchPath,
5873
+ ignore: ["node_modules/**", ".git/**", "dist/**", "build/**"],
5874
+ nodir: true
5875
+ });
5876
+ } catch {
5877
+ files = [path16.relative(searchPath, searchPath) || "."];
5878
+ }
5879
+ const results = [];
5880
+ let totalCount = 0;
5881
+ for (const rel of files) {
5882
+ const abs = path16.join(searchPath, rel);
5883
+ let content;
5884
+ try {
5885
+ content = await fs3.readFile(abs, "utf-8");
5886
+ } catch {
5887
+ continue;
5888
+ }
5889
+ const lines = content.split("\n");
5890
+ const matchingLines = [];
5891
+ for (let i = 0; i < lines.length; i++) {
5892
+ if (regex.test(lines[i])) matchingLines.push(i);
5893
+ regex.lastIndex = 0;
5894
+ }
5895
+ if (matchingLines.length === 0) continue;
5896
+ totalCount += matchingLines.length;
5897
+ if (outputMode === "files_with_matches") {
5898
+ results.push(rel);
5899
+ } else if (outputMode === "count") {
5900
+ results.push(`${rel}: ${matchingLines.length}`);
5901
+ } else {
5902
+ const shown = /* @__PURE__ */ new Set();
5903
+ for (const lineIdx of matchingLines) {
5904
+ const start = Math.max(0, lineIdx - context);
5905
+ const end = Math.min(lines.length - 1, lineIdx + context);
5906
+ for (let i = start; i <= end; i++) shown.add(i);
5907
+ }
5908
+ const sortedIdxs = [...shown].sort((a, b) => a - b);
5909
+ for (const i of sortedIdxs) {
5910
+ const marker = matchingLines.includes(i) ? ":" : "-";
5911
+ results.push(`${rel}:${i + 1}${marker}${lines[i]}`);
5912
+ }
5913
+ }
5914
+ }
5915
+ if (results.length === 0) return `No matches found for: ${pattern}`;
5916
+ if (outputMode === "count") {
5917
+ results.push(`
5918
+ Total: ${totalCount} matches`);
5919
+ }
5920
+ return results.join("\n");
5921
+ }
5922
+ };
5923
+
5924
+ // src/tools/web-fetch.ts
5925
+ var MAX_CHARS = 5e4;
5926
+ var TIMEOUT_MS = 15e3;
5927
+ function stripHtml(html) {
5928
+ let text = html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<noscript[\s\S]*?<\/noscript>/gi, "");
5929
+ text = text.replace(/<br\s*\/?>/gi, "\n").replace(/<\/p>/gi, "\n").replace(/<\/div>/gi, "\n").replace(/<\/h[1-6]>/gi, "\n").replace(/<\/li>/gi, "\n").replace(/<\/tr>/gi, "\n").replace(/<\/td>/gi, " ");
5930
+ text = text.replace(/<[^>]+>/g, "");
5931
+ text = text.replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&quot;/g, '"').replace(/&#39;/g, "'").replace(/&nbsp;/g, " ").replace(/&#(\d+);/g, (_, n) => String.fromCharCode(Number(n)));
5932
+ text = text.split("\n").map((l) => l.trim()).filter((l) => l.length > 0).join("\n");
5933
+ return text;
5934
+ }
5935
+ var WebFetchTool = class extends BaseTool {
5936
+ name = "web_fetch";
5937
+ description = "Fetch a URL and return its content as plain text (HTML stripped). Use for reading documentation, web pages, or any URL. Limit: 50,000 characters.";
5938
+ inputSchema = {
5939
+ type: "object",
5940
+ properties: {
5941
+ url: {
5942
+ type: "string",
5943
+ description: "The URL to fetch"
5944
+ },
5945
+ prompt: {
5946
+ type: "string",
5947
+ description: "Optional hint for what information to extract from the page (not used for filtering, just context)"
5948
+ }
5949
+ },
5950
+ required: ["url"]
5951
+ };
5952
+ async execute(input, _options) {
5953
+ const url = input["url"];
5954
+ let resp;
5955
+ try {
5956
+ resp = await fetch(url, {
5957
+ headers: {
5958
+ "User-Agent": "Cascade-AI/1.0 WebFetchTool",
5959
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,text/plain;q=0.8,*/*;q=0.5"
5960
+ },
5961
+ signal: AbortSignal.timeout(TIMEOUT_MS),
5962
+ redirect: "follow"
5963
+ });
5964
+ } catch (err) {
5965
+ return `Failed to fetch ${url}: ${err instanceof Error ? err.message : String(err)}`;
5966
+ }
5967
+ if (!resp.ok) {
5968
+ return `HTTP ${resp.status} ${resp.statusText} from ${url}`;
5969
+ }
5970
+ const contentType = resp.headers.get("content-type") ?? "";
5971
+ let text;
5972
+ try {
5973
+ const raw = await resp.text();
5974
+ text = contentType.includes("html") ? stripHtml(raw) : raw;
5975
+ } catch (err) {
5976
+ return `Failed to read response body: ${err instanceof Error ? err.message : String(err)}`;
5977
+ }
5978
+ if (text.length > MAX_CHARS) {
5979
+ text = text.slice(0, MAX_CHARS) + `
5980
+
5981
+ [Content truncated at ${MAX_CHARS} characters]`;
5982
+ }
5983
+ return `URL: ${url}
5984
+ Content-Type: ${contentType}
5985
+
5986
+ ${text}`;
5987
+ }
5988
+ };
5142
5989
 
5143
5990
  // src/tools/mcp.ts
5144
5991
  var McpToolWrapper = class extends BaseTool {
@@ -5164,7 +6011,7 @@ var McpToolWrapper = class extends BaseTool {
5164
6011
 
5165
6012
  // src/tools/registry.ts
5166
6013
  var ignore = ignoreFactory__default.default ?? ignoreFactory__default;
5167
- var ToolRegistry = class {
6014
+ var ToolRegistry = class extends EventEmitter {
5168
6015
  tools = /* @__PURE__ */ new Map();
5169
6016
  config;
5170
6017
  ignoreMatcher = ignore();
@@ -5172,12 +6019,36 @@ var ToolRegistry = class {
5172
6019
  /** Loaded plugins, keyed by plugin name */
5173
6020
  plugins = /* @__PURE__ */ new Map();
5174
6021
  constructor(config, workspaceRoot = process.cwd()) {
6022
+ super();
5175
6023
  this.config = config;
5176
6024
  this.workspaceRoot = workspaceRoot;
5177
6025
  this.registerDefaults();
5178
6026
  }
5179
6027
  register(tool) {
5180
6028
  this.tools.set(tool.name, tool);
6029
+ this.emit("tool:added", tool.name);
6030
+ }
6031
+ /**
6032
+ * Wait until a named tool is registered, resolving immediately if it already exists.
6033
+ * T3 workers can call this after encountering a missing-tool error to resume
6034
+ * automatically once T2 synthesizes the tool.
6035
+ */
6036
+ waitForTool(toolName, timeoutMs = 6e4) {
6037
+ if (this.tools.has(toolName)) return Promise.resolve();
6038
+ return new Promise((resolve, reject) => {
6039
+ const timer = setTimeout(() => {
6040
+ this.off("tool:added", handler);
6041
+ reject(new Error(`Timeout waiting for tool: ${toolName}`));
6042
+ }, timeoutMs);
6043
+ const handler = (name) => {
6044
+ if (name === toolName) {
6045
+ clearTimeout(timer);
6046
+ this.off("tool:added", handler);
6047
+ resolve();
6048
+ }
6049
+ };
6050
+ this.on("tool:added", handler);
6051
+ });
5181
6052
  }
5182
6053
  /**
5183
6054
  * Register a ToolPlugin, loading all its tools into the registry.
@@ -5262,7 +6133,10 @@ var ToolRegistry = class {
5262
6133
  new PDFCreateTool(),
5263
6134
  new CodeInterpreterTool(),
5264
6135
  new PeerCommunicationTool(),
5265
- new WebSearchTool(this.config.webSearch)
6136
+ new WebSearchTool(this.config.webSearch),
6137
+ new GlobTool(),
6138
+ new GrepTool(),
6139
+ new WebFetchTool()
5266
6140
  ];
5267
6141
  for (const tool of tools) {
5268
6142
  tool.setWorkspaceRoot(this.workspaceRoot);
@@ -5279,10 +6153,10 @@ var ToolRegistry = class {
5279
6153
  }
5280
6154
  isIgnored(filePath) {
5281
6155
  if (!filePath) return false;
5282
- const abs = path13.resolve(this.workspaceRoot, filePath);
5283
- const rel = path13.relative(this.workspaceRoot, abs);
5284
- if (!rel || rel.startsWith("..") || path13.isAbsolute(rel)) return true;
5285
- const posixRel = rel.split(path13.sep).join("/");
6156
+ const abs = path16.resolve(this.workspaceRoot, filePath);
6157
+ const rel = path16.relative(this.workspaceRoot, abs);
6158
+ if (!rel || rel.startsWith("..") || path16.isAbsolute(rel)) return true;
6159
+ const posixRel = rel.split(path16.sep).join("/");
5286
6160
  return this.ignoreMatcher.ignores(posixRel);
5287
6161
  }
5288
6162
  };
@@ -5619,7 +6493,24 @@ var CascadeConfigSchema = z.object({
5619
6493
  * Generated tools are session-scoped and sandboxed in node:vm.
5620
6494
  * HTTP calls from generated tools require approval.
5621
6495
  */
5622
- enableToolCreation: z.boolean().default(false)
6496
+ enableToolCreation: z.boolean().default(true),
6497
+ /**
6498
+ * External plugin paths or npm package names to load at startup.
6499
+ * Each entry must export a default ToolPlugin object.
6500
+ * Example: ["./plugins/my-tool.js", "cascade-plugin-slack"]
6501
+ */
6502
+ plugins: z.array(z.string()).default([]),
6503
+ /**
6504
+ * Maximum number of concurrent inference requests to any local model provider
6505
+ * (e.g. Ollama). Defaults to 1 to prevent GPU memory pressure when multiple
6506
+ * T3 workers run in parallel on a single-GPU machine.
6507
+ */
6508
+ localConcurrency: z.number().int().min(1).default(1),
6509
+ /**
6510
+ * Timeout in milliseconds for a single local model inference call.
6511
+ * Local models can take minutes for large parameter counts. Default: 5 minutes.
6512
+ */
6513
+ localInferenceTimeoutMs: z.number().int().min(1e3).default(3e5)
5623
6514
  });
5624
6515
 
5625
6516
  // src/config/validate.ts
@@ -5747,139 +6638,237 @@ function heuristicAnalyze(prompt) {
5747
6638
  const estimatedTokens = wordCount * 5;
5748
6639
  return { type, complexity, requiresReasoning, requiresVision, estimatedTokens, confidence };
5749
6640
  }
5750
- function selectModelFromProfile(profile, tier, selector) {
5751
- if (profile.requiresVision) {
5752
- return selector.selectVisionModel();
5753
- }
5754
- if (tier === "T1") {
5755
- if (profile.complexity >= 4) {
5756
- return selector.selectForTier("T1");
5757
- } else {
5758
- return selector.selectForTier("T2");
5759
- }
5760
- }
5761
- if (tier === "T2") {
5762
- if (profile.type === "code" || profile.type === "data") {
5763
- return selector.selectForTier("T2");
5764
- } else if (profile.complexity <= 2) {
5765
- return selector.selectForTier("T3");
5766
- }
5767
- return selector.selectForTier("T2");
5768
- }
5769
- if (tier === "T3") {
5770
- if (profile.complexity >= 4 || profile.requiresReasoning) {
5771
- return selector.selectForTier("T2");
5772
- } else if (profile.type === "creative") {
5773
- return selector.selectForTier("T2");
5774
- } else {
5775
- return selector.selectForTier("T3");
5776
- }
5777
- }
5778
- return selector.selectForTier(tier);
5779
- }
5780
6641
  var analysisCache = /* @__PURE__ */ new Map();
6642
+ var TASK_TYPE_TAGS = {
6643
+ code: ["code", "instruction"],
6644
+ analysis: ["analysis", "instruction"],
6645
+ creative: ["creative", "multilingual"],
6646
+ data: ["data", "code"],
6647
+ mixed: []
6648
+ };
5781
6649
  var TaskAnalyzer = class {
5782
- router;
5783
- constructor(router) {
5784
- this.router = router;
6650
+ tracker;
6651
+ lastProfile = null;
6652
+ lastSelectedModels = /* @__PURE__ */ new Map();
6653
+ constructor(tracker) {
6654
+ this.tracker = tracker;
6655
+ }
6656
+ setTracker(tracker) {
6657
+ this.tracker = tracker;
6658
+ }
6659
+ /** Returns the TaskProfile from the most recent analyze() call — used for outcome recording. */
6660
+ getLastProfile() {
6661
+ return this.lastProfile;
5785
6662
  }
5786
6663
  /**
5787
- * Analyze a prompt and return a TaskProfile.
5788
- * Uses heuristics first; falls back to AI inference if confidence is low.
6664
+ * Analyze a prompt and return a TaskProfile using pure heuristics.
6665
+ * Low confidence prompts fall back to a conservative mixed/moderate profile.
5789
6666
  */
5790
6667
  async analyze(prompt) {
5791
6668
  const cacheKey = prompt.slice(0, 200);
5792
6669
  const cached = analysisCache.get(cacheKey);
5793
- if (cached) return cached;
5794
- const heuristic = heuristicAnalyze(prompt);
5795
- if (heuristic.confidence < 0.7 && this.router) {
5796
- try {
5797
- const aiProfile = await this.aiInference(prompt);
5798
- const merged = {
5799
- type: aiProfile.type,
5800
- complexity: aiProfile.complexity,
5801
- requiresReasoning: aiProfile.requiresReasoning,
5802
- requiresVision: heuristic.requiresVision || aiProfile.requiresVision,
5803
- estimatedTokens: heuristic.estimatedTokens,
5804
- confidence: 0.9
5805
- // AI-backed
5806
- };
5807
- analysisCache.set(cacheKey, merged);
5808
- return merged;
5809
- } catch {
5810
- }
6670
+ if (cached) {
6671
+ this.lastProfile = cached;
6672
+ return cached;
5811
6673
  }
5812
- analysisCache.set(cacheKey, heuristic);
5813
- return heuristic;
6674
+ const profile = heuristicAnalyze(prompt);
6675
+ analysisCache.set(cacheKey, profile);
6676
+ this.lastProfile = profile;
6677
+ return profile;
5814
6678
  }
5815
6679
  /**
5816
- * Select the optimal model for a given tier based on task analysis.
6680
+ * Select the optimal model for a given tier.
6681
+ * Scores tier-eligible models using cost efficiency + historical performance.
6682
+ * Falls back to the priority-list default when no candidates have history.
5817
6683
  */
5818
6684
  async selectModel(prompt, tier, selector) {
5819
6685
  const profile = await this.analyze(prompt);
5820
- return selectModelFromProfile(profile, tier, selector);
6686
+ if (profile.requiresVision) {
6687
+ return selector.selectVisionModel();
6688
+ }
6689
+ const candidates = selector.getCandidatesForTier(tier);
6690
+ if (candidates.length === 0) return selector.selectForTier(tier);
6691
+ const scored = candidates.map((m) => ({
6692
+ model: m,
6693
+ score: this.scoreModel(m, profile)
6694
+ }));
6695
+ scored.sort((a, b) => b.score - a.score);
6696
+ const best = scored[0]?.model ?? selector.selectForTier(tier);
6697
+ if (best) this.lastSelectedModels.set(tier, best);
6698
+ return best;
5821
6699
  }
5822
- async aiInference(prompt) {
5823
- if (!this.router) throw new Error("No router for AI inference");
5824
- const inferencePrompt = `Analyze this task and return ONLY a JSON object \u2014 no other text.
5825
-
5826
- Task: "${prompt.slice(0, 300)}"
5827
-
5828
- Return: { "type": "code"|"analysis"|"creative"|"data"|"mixed", "complexity": 1-5, "requiresReasoning": true|false, "requiresVision": true|false }
5829
-
5830
- Where complexity: 1=trivial, 2=simple, 3=moderate, 4=complex, 5=research-grade.`;
5831
- const result = await this.router.generate("T3", {
5832
- messages: [{ role: "user", content: inferencePrompt }],
5833
- maxTokens: 80
5834
- });
5835
- const jsonMatch = /\{[\s\S]*?\}/.exec(result.content);
5836
- if (!jsonMatch) throw new Error("No JSON in AI inference response");
5837
- const parsed = JSON.parse(jsonMatch[0]);
5838
- const validTypes = ["code", "analysis", "creative", "data", "mixed"];
5839
- const type = validTypes.includes(parsed.type) ? parsed.type : "mixed";
5840
- const complexity = Math.max(1, Math.min(5, Math.round(parsed.complexity)));
5841
- return {
5842
- type,
5843
- complexity,
5844
- requiresReasoning: Boolean(parsed.requiresReasoning),
5845
- requiresVision: Boolean(parsed.requiresVision),
5846
- estimatedTokens: 0,
5847
- confidence: 0.9
5848
- };
6700
+ /**
6701
+ * Record the outcome of a completed run across all tiers that were selected
6702
+ * during this session and persist stats to disk.
6703
+ */
6704
+ recordRunOutcome(outcome, costByTier) {
6705
+ if (!this.tracker || !this.lastProfile) return;
6706
+ const taskType = this.lastProfile.type;
6707
+ for (const [tier, model] of this.lastSelectedModels) {
6708
+ const cost = costByTier[tier] ?? 0;
6709
+ this.tracker.record(model.id, taskType, outcome, 0, cost);
6710
+ }
6711
+ this.lastSelectedModels.clear();
6712
+ void this.tracker.save();
6713
+ }
6714
+ scoreModel(model, profile) {
6715
+ const perf = this.tracker?.performanceScore(model.id, profile.type) ?? 0.5;
6716
+ const costEff = this.costEfficiency(model, profile.complexity);
6717
+ const match = this.taskMatchScore(model, profile);
6718
+ return perf * costEff * match;
6719
+ }
6720
+ costEfficiency(model, complexity) {
6721
+ if (this.tracker) return this.tracker.costEfficiencyScore(model, complexity);
6722
+ const blended = model.inputCostPer1kTokens + model.outputCostPer1kTokens * 2;
6723
+ const normalised = Math.min(1, blended / 0.05);
6724
+ const complexityWeight = (6 - complexity) / 5;
6725
+ return Math.max(0.1, 1 - normalised * complexityWeight);
6726
+ }
6727
+ taskMatchScore(model, profile) {
6728
+ const expected = TASK_TYPE_TAGS[profile.type];
6729
+ if (!model.specializations?.length || expected.length === 0) return 1;
6730
+ const matches = expected.filter((tag) => model.specializations.includes(tag)).length;
6731
+ return matches > 0 ? 1 + matches / expected.length * 0.3 : 0.8;
5849
6732
  }
5850
6733
  /** Clear the analysis cache (call between sessions). */
5851
6734
  static clearCache() {
5852
6735
  analysisCache.clear();
5853
6736
  }
5854
6737
  };
6738
+ var DEFAULT_STATS_FILE = path16.join(os3.homedir(), ".cascade", "model-perf.json");
6739
+ var ModelPerformanceTracker = class {
6740
+ stats = /* @__PURE__ */ new Map();
6741
+ statsFile;
6742
+ loaded = false;
6743
+ constructor(statsFile = DEFAULT_STATS_FILE) {
6744
+ this.statsFile = statsFile;
6745
+ }
6746
+ async load() {
6747
+ if (this.loaded) return;
6748
+ this.loaded = true;
6749
+ try {
6750
+ const raw = await fs3.readFile(this.statsFile, "utf-8");
6751
+ const parsed = JSON.parse(raw);
6752
+ for (const [key, stat] of Object.entries(parsed)) {
6753
+ this.stats.set(key, stat);
6754
+ }
6755
+ } catch {
6756
+ }
6757
+ }
6758
+ async save() {
6759
+ try {
6760
+ await fs3.mkdir(path16.dirname(this.statsFile), { recursive: true });
6761
+ const obj = {};
6762
+ for (const [key, stat] of this.stats) obj[key] = stat;
6763
+ await fs3.writeFile(this.statsFile, JSON.stringify(obj, null, 2), "utf-8");
6764
+ } catch {
6765
+ }
6766
+ }
6767
+ record(modelId, taskType, outcome, retries = 0, costUsd = 0) {
6768
+ const key = `${modelId}:${taskType}`;
6769
+ const s = this.stats.get(key) ?? {
6770
+ successCount: 0,
6771
+ failureCount: 0,
6772
+ totalRetries: 0,
6773
+ totalCostUsd: 0,
6774
+ sampleCount: 0
6775
+ };
6776
+ this.stats.set(key, {
6777
+ successCount: s.successCount + (outcome === "success" ? 1 : 0),
6778
+ failureCount: s.failureCount + (outcome === "failure" ? 1 : 0),
6779
+ totalRetries: s.totalRetries + retries,
6780
+ totalCostUsd: s.totalCostUsd + costUsd,
6781
+ sampleCount: s.sampleCount + 1
6782
+ });
6783
+ }
6784
+ /**
6785
+ * Returns 0.05–1.0; defaults to 0.5 (neutral prior) when no history exists.
6786
+ * High retry counts penalise the score.
6787
+ */
6788
+ performanceScore(modelId, taskType) {
6789
+ const key = `${modelId}:${taskType}`;
6790
+ const s = this.stats.get(key);
6791
+ if (!s || s.sampleCount === 0) return 0.5;
6792
+ const successRate = s.successCount / s.sampleCount;
6793
+ const avgRetries = s.totalRetries / s.sampleCount;
6794
+ const retryPenalty = Math.min(0.4, avgRetries / 3);
6795
+ return Math.max(0.05, successRate * (1 - retryPenalty));
6796
+ }
6797
+ /**
6798
+ * Returns 0.1–1.0. Cheaper models score higher, with the penalty scaled
6799
+ * down for complex tasks (where capability matters more than cost).
6800
+ *
6801
+ * blended cost = input + 2 × output (output tokens are typically pricier).
6802
+ * normalised over $0.05 blended as the "expensive" ceiling.
6803
+ */
6804
+ costEfficiencyScore(model, complexity) {
6805
+ const blended = model.inputCostPer1kTokens + model.outputCostPer1kTokens * 2;
6806
+ const normalised = Math.min(1, blended / 0.05);
6807
+ const complexityWeight = (6 - complexity) / 5;
6808
+ return Math.max(0.1, 1 - normalised * complexityWeight);
6809
+ }
6810
+ };
5855
6811
  var DynamicTool = class extends BaseTool {
5856
6812
  name;
5857
6813
  description;
5858
6814
  inputSchema;
5859
6815
  executeCode;
5860
6816
  _isDangerous;
5861
- constructor(spec) {
6817
+ registry;
6818
+ escalator;
6819
+ constructor(spec, registry, escalator) {
5862
6820
  super();
5863
6821
  this.name = spec.name;
5864
6822
  this.description = spec.description;
5865
6823
  this.inputSchema = spec.inputSchema;
5866
6824
  this.executeCode = spec.executeCode;
5867
6825
  this._isDangerous = spec.isDangerous;
6826
+ this.registry = registry;
6827
+ this.escalator = escalator;
5868
6828
  }
5869
6829
  isDangerous() {
5870
6830
  return this._isDangerous;
5871
6831
  }
5872
- async execute(input, _options) {
6832
+ async execute(input, options) {
6833
+ const registry = this.registry;
6834
+ const escalator = this.escalator;
6835
+ const callTool = async (toolName, toolInput) => {
6836
+ if (!registry.hasTool(toolName)) return `Tool not found: ${toolName}`;
6837
+ if (registry.isDangerous(toolName)) {
6838
+ if (escalator) {
6839
+ const req = {
6840
+ id: `dynamic-${this.name}-${toolName}-${Date.now()}`,
6841
+ requestedBy: `dynamic_tool:${this.name}`,
6842
+ parentT2Id: options.tierId,
6843
+ toolName,
6844
+ input: toolInput,
6845
+ isDangerous: true,
6846
+ subtaskContext: `Dynamic tool "${this.name}" requesting access to "${toolName}"`,
6847
+ sectionContext: `Dynamic tool "${this.name}"`
6848
+ };
6849
+ const decision = await escalator.requestPermission(req);
6850
+ if (!decision.approved) {
6851
+ return `Permission denied for ${toolName} (decided by ${decision.decidedBy}).`;
6852
+ }
6853
+ }
6854
+ }
6855
+ try {
6856
+ const result = await registry.execute(toolName, toolInput, options);
6857
+ return typeof result === "string" ? result : JSON.stringify(result);
6858
+ } catch (err) {
6859
+ return `Error calling ${toolName}: ${err instanceof Error ? err.message : String(err)}`;
6860
+ }
6861
+ };
5873
6862
  const sandbox = {
5874
6863
  input,
5875
6864
  fetch: globalThis.fetch,
6865
+ callTool,
5876
6866
  JSON,
5877
6867
  Math,
5878
6868
  Date,
5879
6869
  console: { log: () => {
5880
6870
  }, error: () => {
5881
6871
  } },
5882
- // Silenced
5883
6872
  setTimeout,
5884
6873
  clearTimeout,
5885
6874
  Promise,
@@ -5912,29 +6901,42 @@ Generate a minimal, safe JavaScript tool function for the described operation.
5912
6901
 
5913
6902
  Rules:
5914
6903
  - Return ONLY a JSON object with these fields: name, description, inputSchema, executeCode, isDangerous
5915
- - executeCode is a self-contained JavaScript function body that:
5916
- - Receives: input (object), fetch (if HTTP needed)
6904
+ - executeCode is a self-contained JavaScript async function body that:
6905
+ - Receives: input (object), fetch (for HTTP), callTool(toolName, input) (to call any registered cascade tool)
5917
6906
  - Returns: a string result
5918
- - Uses no require(), no fs, no process \u2014 only fetch, JSON, Math, Date, String, Number, Array, Object
6907
+ - For file operations, prefer: await callTool('file_read', { path: input.path })
6908
+ - For shell commands, prefer: await callTool('shell', { command: 'ls -la' })
6909
+ - For pure computation / HTTP: use fetch or built-ins (JSON, Math, Date, String, Number, Array, Object)
5919
6910
  - Must complete in under 15 seconds
5920
- - isDangerous should be true only if the tool makes write operations or external HTTP calls
6911
+ - isDangerous: true if the tool calls dangerous cascade tools (shell, file_write, file_delete, git) or makes HTTP calls that write data
5921
6912
  - name must be snake_case, start with "dynamic_", max 40 chars
5922
6913
  - description must be \u2264 120 chars
5923
6914
 
5924
- Example executeCode for an HTTP tool:
5925
- "const res = await fetch(input.url); const text = await res.text(); return text.slice(0, 2000);"
6915
+ Example for a file-summary tool:
6916
+ {
6917
+ "name": "dynamic_summarize_file",
6918
+ "description": "Read a file and return a one-paragraph summary",
6919
+ "inputSchema": { "path": { "type": "string", "description": "File path to summarize" } },
6920
+ "executeCode": "const content = await callTool('file_read', { path: input.path }); return content.slice(0, 500);",
6921
+ "isDangerous": false
6922
+ }
5926
6923
 
5927
6924
  Return ONLY valid JSON \u2014 no other text.`;
5928
6925
  var ToolCreator = class {
5929
6926
  router;
5930
6927
  registry;
6928
+ escalator;
5931
6929
  createdTools = /* @__PURE__ */ new Set();
5932
6930
  constructor(router, registry) {
5933
6931
  this.router = router;
5934
6932
  this.registry = registry;
5935
6933
  }
6934
+ setPermissionEscalator(escalator) {
6935
+ this.escalator = escalator;
6936
+ }
5936
6937
  /**
5937
6938
  * Generate a new tool from a description and register it with the ToolRegistry.
6939
+ * The generated tool has access to all registered cascade tools via callTool().
5938
6940
  * Returns the tool name if successful, null if generation failed.
5939
6941
  */
5940
6942
  async createTool(description, context) {
@@ -5945,26 +6947,21 @@ Required capability: ${description.slice(0, 300)}`;
5945
6947
  try {
5946
6948
  const result = await this.router.generate("T3", {
5947
6949
  messages: [{ role: "user", content: prompt }],
5948
- maxTokens: 600
6950
+ maxTokens: 800
5949
6951
  });
5950
6952
  const jsonMatch = /\{[\s\S]*\}/.exec(result.content);
5951
- if (!jsonMatch) {
5952
- return null;
5953
- }
6953
+ if (!jsonMatch) return null;
5954
6954
  const spec = JSON.parse(jsonMatch[0]);
5955
- if (!spec.name || !spec.description || !spec.executeCode || !spec.inputSchema) {
5956
- return null;
5957
- }
6955
+ if (!spec.name || !spec.description || !spec.executeCode || !spec.inputSchema) return null;
5958
6956
  if (this.createdTools.has(spec.name) || this.registry.hasTool(spec.name)) {
5959
6957
  spec.name = `${spec.name}_${Date.now() % 1e4}`;
5960
6958
  }
5961
6959
  try {
5962
- createContext({ input: {}, fetch: globalThis.fetch });
5963
- new Function("input", "fetch", spec.executeCode);
5964
- } catch (err) {
6960
+ new Function("input", "fetch", "callTool", spec.executeCode);
6961
+ } catch {
5965
6962
  return null;
5966
6963
  }
5967
- const tool = new DynamicTool(spec);
6964
+ const tool = new DynamicTool(spec, this.registry, this.escalator);
5968
6965
  this.registry.register(tool);
5969
6966
  this.createdTools.add(spec.name);
5970
6967
  return spec.name;
@@ -5972,16 +6969,14 @@ Required capability: ${description.slice(0, 300)}`;
5972
6969
  return null;
5973
6970
  }
5974
6971
  }
5975
- /**
5976
- * Returns the names of all tools created in this session.
5977
- */
6972
+ /** Returns the names of all tools created in this session. */
5978
6973
  getCreatedTools() {
5979
6974
  return Array.from(this.createdTools);
5980
6975
  }
5981
6976
  };
5982
6977
 
5983
6978
  // src/core/cascade.ts
5984
- var Cascade = class extends EventEmitter {
6979
+ var Cascade = class _Cascade extends EventEmitter {
5985
6980
  router;
5986
6981
  toolRegistry;
5987
6982
  mcpClient;
@@ -5992,6 +6987,7 @@ var Cascade = class extends EventEmitter {
5992
6987
  audit;
5993
6988
  telemetry;
5994
6989
  taskAnalyzer;
6990
+ perfTracker;
5995
6991
  toolCreator;
5996
6992
  constructor(config, workspacePath, store) {
5997
6993
  super();
@@ -6008,10 +7004,12 @@ var Cascade = class extends EventEmitter {
6008
7004
  this.telemetry = config.telemetry?.enabled ? new Telemetry(config.telemetry, config.telemetry.distinctId ?? "anonymous") : noopTelemetry;
6009
7005
  }
6010
7006
  initOptionalFeatures() {
6011
- const cfg = this.config;
6012
- if (cfg["cascadeAuto"] === true) {
6013
- this.taskAnalyzer = new TaskAnalyzer(this.router);
7007
+ if (this.config.cascadeAuto === true) {
7008
+ this.perfTracker = new ModelPerformanceTracker();
7009
+ void this.perfTracker.load();
7010
+ this.taskAnalyzer = new TaskAnalyzer(this.perfTracker);
6014
7011
  }
7012
+ const cfg = this.config;
6015
7013
  if (cfg["enableToolCreation"] === true) {
6016
7014
  this.toolCreator = new ToolCreator(this.router, this.toolRegistry);
6017
7015
  }
@@ -6077,6 +7075,26 @@ var Cascade = class extends EventEmitter {
6077
7075
  }
6078
7076
  }
6079
7077
  }
7078
+ const pluginPaths = this.config["plugins"];
7079
+ if (pluginPaths?.length) {
7080
+ for (const pluginPath of pluginPaths) {
7081
+ try {
7082
+ const mod = await import(pluginPath);
7083
+ const plugin = mod.default ?? mod;
7084
+ if (plugin && Array.isArray(plugin.tools)) {
7085
+ this.toolRegistry.registerPlugin(plugin);
7086
+ } else {
7087
+ console.warn(`[cascade] Plugin "${pluginPath}" does not export a valid ToolPlugin.`);
7088
+ }
7089
+ } catch (err) {
7090
+ console.warn(`[cascade] Failed to load plugin "${pluginPath}":`, err);
7091
+ }
7092
+ }
7093
+ }
7094
+ if (this.config.cascadeAuto && this.store) {
7095
+ this.router.profileModels(this.store).catch(() => {
7096
+ });
7097
+ }
6080
7098
  this.initOptionalFeatures();
6081
7099
  this.initialized = true;
6082
7100
  })();
@@ -6087,21 +7105,48 @@ var Cascade = class extends EventEmitter {
6087
7105
  throw err;
6088
7106
  }
6089
7107
  }
7108
+ isCasualGreeting(prompt) {
7109
+ const casual = /^(hi|hello|hey|greetings|thanks|thank you|thx|bye|goodbye|cya)$/i.test(prompt.trim().replace(/[!?.]+$/, ""));
7110
+ return casual;
7111
+ }
6090
7112
  looksLikeSimpleArtifactTask(prompt) {
6091
7113
  return /create .*\.(txt|md|json|csv)\b/i.test(prompt) && !/(research|compare|thorough|pdf|report|analy[sz]e|architecture|multi-agent)/i.test(prompt);
6092
7114
  }
6093
- async determineComplexity(prompt, workspacePath, conversationHistory = []) {
6094
- if (this.looksLikeSimpleArtifactTask(prompt)) {
6095
- return "Simple";
6096
- }
6097
- let workspaceContext = "";
7115
+ looksLikeConversational(prompt) {
7116
+ const LOW_COMPLEXITY = [
7117
+ /^(?:hi|hello|hey|thanks|thank you|ok|okay|yes|no|sure|got it|sounds good)\b/i,
7118
+ /^(?:what is|what are|list|show me|tell me|who is|where is|when is|how do i)\b/i,
7119
+ /\b(?:simple|quick|brief|small|single|one-line|typo|rename)\b/i
7120
+ ];
7121
+ const wordCount = prompt.trim().split(/\s+/).length;
7122
+ return wordCount <= 12 && LOW_COMPLEXITY.some((re) => re.test(prompt.trim()));
7123
+ }
7124
+ // Cache glob scan results per workspace path to avoid repeated I/O.
7125
+ static globCache = /* @__PURE__ */ new Map();
7126
+ async countWorkspaceFiles(workspacePath) {
7127
+ const now = Date.now();
7128
+ const cached = _Cascade.globCache.get(workspacePath);
7129
+ if (cached && cached.expiresAt > now) return cached.count;
6098
7130
  try {
6099
7131
  const files = await glob("**/*.*", {
6100
7132
  cwd: workspacePath,
6101
7133
  ignore: ["node_modules/**", ".git/**", "dist/**", "build/**"],
6102
7134
  nodir: true
6103
7135
  });
6104
- workspaceContext = `Workspace Scout: Found ~${files.length} source files in the project.`;
7136
+ _Cascade.globCache.set(workspacePath, { count: files.length, expiresAt: now + 3e4 });
7137
+ return files.length;
7138
+ } catch {
7139
+ return 0;
7140
+ }
7141
+ }
7142
+ async determineComplexity(prompt, workspacePath, conversationHistory = []) {
7143
+ if (this.isCasualGreeting(prompt)) return "Simple";
7144
+ if (this.looksLikeSimpleArtifactTask(prompt)) return "Simple";
7145
+ if (this.looksLikeConversational(prompt)) return "Simple";
7146
+ let workspaceContext = "";
7147
+ try {
7148
+ const count = await this.countWorkspaceFiles(workspacePath);
7149
+ workspaceContext = `Workspace Scout: Found ~${count} source files in the project.`;
6105
7150
  } catch {
6106
7151
  workspaceContext = "Workspace Scout: Could not scan workspace.";
6107
7152
  }
@@ -6187,7 +7232,7 @@ ${prompt}` : prompt;
6187
7232
  this.telemetry.capture("cascade:session_start", {
6188
7233
  complexity,
6189
7234
  providerCount: this.config.providers.length,
6190
- cascadeAutoEnabled: this.config["cascadeAuto"] === true,
7235
+ cascadeAutoEnabled: this.config.cascadeAuto === true,
6191
7236
  toolCreationEnabled: this.config["enableToolCreation"] === true
6192
7237
  });
6193
7238
  this.emit("tier:root", { role: complexity === "Simple" ? "T3" : complexity === "Moderate" ? "T2" : "T1" });
@@ -6202,6 +7247,7 @@ ${prompt}` : prompt;
6202
7247
  }));
6203
7248
  }
6204
7249
  const toolCreator = this.toolCreator;
7250
+ if (toolCreator) toolCreator.setPermissionEscalator(escalator);
6205
7251
  let finalOutput = "";
6206
7252
  let t2Results = [];
6207
7253
  let runError = null;
@@ -6223,6 +7269,8 @@ ${prompt}` : prompt;
6223
7269
  });
6224
7270
  tier.on("log", (e) => this.emit("log", e));
6225
7271
  tier.on("tier:status", (e) => this.emit("tier:status", e));
7272
+ tier.on("tool:call", (e) => this.emit("tool:call", e));
7273
+ tier.on("tool:result", (e) => this.emit("tool:result", e));
6226
7274
  tier.on("tool:approval-request", async (request) => {
6227
7275
  this.emit("tool:approval-request", request);
6228
7276
  let decision = { approved: false };
@@ -6277,6 +7325,7 @@ ${prompt}` : prompt;
6277
7325
  }
6278
7326
  t2.setPermissionEscalator(escalator);
6279
7327
  if (toolCreator) t2.setToolCreator(toolCreator);
7328
+ t2.setPeerMessageCallback((e) => this.emit("peer:message", e), options.sessionId ?? "");
6280
7329
  bindTierEvents(t2);
6281
7330
  const assignment = {
6282
7331
  sectionId: taskId,
@@ -6306,6 +7355,7 @@ ${prompt}` : prompt;
6306
7355
  }
6307
7356
  t1.setPermissionEscalator(escalator);
6308
7357
  if (toolCreator) t1.setToolCreator(toolCreator);
7358
+ t1.setPeerMessageCallback((e) => this.emit("peer:message", e), options.sessionId ?? "");
6309
7359
  bindTierEvents(t1);
6310
7360
  t1.on("plan", (e) => this.emit("plan", e));
6311
7361
  const result = await t1.execute(options.prompt, options.images, void 0, options.signal);
@@ -6329,6 +7379,13 @@ ${prompt}` : prompt;
6329
7379
  escalator.cancelAllPending();
6330
7380
  } catch {
6331
7381
  }
7382
+ if (this.taskAnalyzer) {
7383
+ try {
7384
+ const stats2 = this.router.getStats();
7385
+ this.taskAnalyzer.recordRunOutcome(runError ? "failure" : "success", stats2.costByTier);
7386
+ } catch {
7387
+ }
7388
+ }
6332
7389
  try {
6333
7390
  const stats2 = this.router.getStats();
6334
7391
  const durationMs2 = Date.now() - startMs;
@@ -6429,7 +7486,7 @@ var Keystore = class {
6429
7486
  const creds = await this.keytar.findCredentials(KEYTAR_SERVICE);
6430
7487
  this.cache = Object.fromEntries(creds.map((c) => [c.account, c.password]));
6431
7488
  this.backend = "keytar";
6432
- if (password && fs11.existsSync(this.storePath)) {
7489
+ if (password && fs15.existsSync(this.storePath)) {
6433
7490
  try {
6434
7491
  const fileEntries = this.decryptFile(password);
6435
7492
  for (const [k, v] of Object.entries(fileEntries)) {
@@ -6448,7 +7505,7 @@ var Keystore = class {
6448
7505
  "Keystore unlock requires a password because the OS keychain (keytar) is not available on this system."
6449
7506
  );
6450
7507
  }
6451
- if (!fs11.existsSync(this.storePath)) {
7508
+ if (!fs15.existsSync(this.storePath)) {
6452
7509
  const salt = crypto.randomBytes(SALT_LEN);
6453
7510
  this.masterKey = this.deriveKey(password, salt);
6454
7511
  this.writeWithSalt({}, salt);
@@ -6462,7 +7519,7 @@ var Keystore = class {
6462
7519
  }
6463
7520
  /** Synchronous legacy unlock kept for AES-only environments. */
6464
7521
  unlockSync(password) {
6465
- if (!fs11.existsSync(this.storePath)) {
7522
+ if (!fs15.existsSync(this.storePath)) {
6466
7523
  const salt = crypto.randomBytes(SALT_LEN);
6467
7524
  this.masterKey = this.deriveKey(password, salt);
6468
7525
  this.writeWithSalt({}, salt);
@@ -6520,7 +7577,7 @@ var Keystore = class {
6520
7577
  }
6521
7578
  }
6522
7579
  decryptFile(password, knownSalt) {
6523
- if (!fs11.existsSync(this.storePath)) return {};
7580
+ if (!fs15.existsSync(this.storePath)) return {};
6524
7581
  try {
6525
7582
  const { salt, ciphertext, iv, tag } = this.readRaw();
6526
7583
  const useSalt = knownSalt ?? salt;
@@ -6542,8 +7599,8 @@ var Keystore = class {
6542
7599
  const ciphertext = Buffer.concat([cipher.update(plaintext), cipher.final()]);
6543
7600
  const tag = cipher.getAuthTag();
6544
7601
  const out = Buffer.concat([raw.salt, iv, tag, ciphertext]);
6545
- fs11.mkdirSync(path13.dirname(this.storePath), { recursive: true });
6546
- fs11.writeFileSync(this.storePath, out, { mode: 384 });
7602
+ fs15.mkdirSync(path16.dirname(this.storePath), { recursive: true });
7603
+ fs15.writeFileSync(this.storePath, out, { mode: 384 });
6547
7604
  }
6548
7605
  writeWithSalt(data, salt) {
6549
7606
  if (!this.masterKey) throw new Error("writeWithSalt called before masterKey was set");
@@ -6553,11 +7610,11 @@ var Keystore = class {
6553
7610
  const ciphertext = Buffer.concat([cipher.update(plaintext), cipher.final()]);
6554
7611
  const tag = cipher.getAuthTag();
6555
7612
  const out = Buffer.concat([salt, iv, tag, ciphertext]);
6556
- fs11.mkdirSync(path13.dirname(this.storePath), { recursive: true });
6557
- fs11.writeFileSync(this.storePath, out, { mode: 384 });
7613
+ fs15.mkdirSync(path16.dirname(this.storePath), { recursive: true });
7614
+ fs15.writeFileSync(this.storePath, out, { mode: 384 });
6558
7615
  }
6559
7616
  readRaw() {
6560
- const buf = fs11.readFileSync(this.storePath);
7617
+ const buf = fs15.readFileSync(this.storePath);
6561
7618
  let offset = 0;
6562
7619
  const salt = buf.subarray(offset, offset + SALT_LEN);
6563
7620
  offset += SALT_LEN;
@@ -6590,9 +7647,9 @@ var CascadeIgnore = class {
6590
7647
  ]);
6591
7648
  }
6592
7649
  async load(workspacePath) {
6593
- const filePath = path13.join(workspacePath, ".cascadeignore");
7650
+ const filePath = path16.join(workspacePath, ".cascadeignore");
6594
7651
  try {
6595
- const content = await fs2.readFile(filePath, "utf-8");
7652
+ const content = await fs3.readFile(filePath, "utf-8");
6596
7653
  const lines = content.split("\n").filter((l) => l.trim() && !l.startsWith("#"));
6597
7654
  this.ig.add(lines);
6598
7655
  this.loaded = true;
@@ -6601,7 +7658,7 @@ var CascadeIgnore = class {
6601
7658
  }
6602
7659
  isIgnored(filePath, workspacePath) {
6603
7660
  try {
6604
- const relative = workspacePath ? path13.relative(workspacePath, filePath) : filePath;
7661
+ const relative = workspacePath ? path16.relative(workspacePath, filePath) : filePath;
6605
7662
  return this.ig.ignores(relative);
6606
7663
  } catch {
6607
7664
  return false;
@@ -6612,9 +7669,9 @@ var CascadeIgnore = class {
6612
7669
  }
6613
7670
  };
6614
7671
  async function loadCascadeMd(workspacePath) {
6615
- const filePath = path13.join(workspacePath, "CASCADE.md");
7672
+ const filePath = path16.join(workspacePath, "CASCADE.md");
6616
7673
  try {
6617
- const raw = await fs2.readFile(filePath, "utf-8");
7674
+ const raw = await fs3.readFile(filePath, "utf-8");
6618
7675
  return parseCascadeMd(raw);
6619
7676
  } catch {
6620
7677
  return null;
@@ -6643,7 +7700,7 @@ ${raw.trim()}`;
6643
7700
  var MemoryStore = class _MemoryStore {
6644
7701
  db;
6645
7702
  constructor(dbPath) {
6646
- fs11.mkdirSync(path13.dirname(dbPath), { recursive: true });
7703
+ fs15.mkdirSync(path16.dirname(dbPath), { recursive: true });
6647
7704
  try {
6648
7705
  this.db = new Database(dbPath, { timeout: 5e3 });
6649
7706
  this.db.pragma("journal_mode = WAL");
@@ -7126,6 +8183,27 @@ Original error: ${err.message}`
7126
8183
  if (!row.oldest) return Infinity;
7127
8184
  return Date.now() - new Date(row.oldest).getTime();
7128
8185
  }
8186
+ saveModelProfile(modelId, provider, specializations) {
8187
+ const cacheKey = `${provider}:${modelId}`;
8188
+ const existing = this.db.prepare("SELECT metadata FROM model_cache WHERE id = ?").get(cacheKey);
8189
+ const meta = existing ? JSON.parse(existing.metadata) : { id: modelId, provider, name: modelId, contextWindow: 0, isVisionCapable: false, inputCostPer1kTokens: 0, outputCostPer1kTokens: 0, maxOutputTokens: 0, supportsStreaming: false, isLocal: false };
8190
+ meta.specializations = specializations;
8191
+ this.db.prepare(`
8192
+ INSERT INTO model_cache (id, provider, model_id, name, metadata, updated_at)
8193
+ VALUES (?, ?, ?, ?, ?, ?)
8194
+ ON CONFLICT(id) DO UPDATE SET metadata = excluded.metadata, updated_at = excluded.updated_at
8195
+ `).run(cacheKey, provider, modelId, meta.name ?? modelId, JSON.stringify(meta), (/* @__PURE__ */ new Date()).toISOString());
8196
+ }
8197
+ getModelProfile(modelId, provider) {
8198
+ const row = this.db.prepare("SELECT metadata FROM model_cache WHERE id = ?").get(`${provider}:${modelId}`);
8199
+ return row ? JSON.parse(row.metadata) : void 0;
8200
+ }
8201
+ getProfiledModelIds() {
8202
+ const rows = this.db.prepare(
8203
+ "SELECT model_id FROM model_cache WHERE json_extract(metadata, '$.specializations') IS NOT NULL"
8204
+ ).all();
8205
+ return rows.map((r) => r.model_id);
8206
+ }
7129
8207
  // ── Tool Result Cache (in-memory, TTL-based) ──────────────────────────
7130
8208
  // Avoids redundant calls for read-only tools within a short window.
7131
8209
  // Not persisted to DB — cleared on process restart.
@@ -7380,15 +8458,15 @@ var ConfigManager = class {
7380
8458
  globalDir;
7381
8459
  constructor(workspacePath = process.cwd()) {
7382
8460
  this.workspacePath = workspacePath;
7383
- this.globalDir = path13.join(os2.homedir(), GLOBAL_CONFIG_DIR);
8461
+ this.globalDir = path16.join(os3.homedir(), GLOBAL_CONFIG_DIR);
7384
8462
  }
7385
8463
  async load() {
7386
8464
  this.config = await this.loadConfig();
7387
8465
  this.ignore = new CascadeIgnore();
7388
8466
  await this.ignore.load(this.workspacePath);
7389
8467
  this.cascadeMd = await loadCascadeMd(this.workspacePath);
7390
- this.keystore = new Keystore(path13.join(this.globalDir, GLOBAL_KEYSTORE_FILE));
7391
- this.store = new MemoryStore(path13.join(this.workspacePath, CASCADE_DB_FILE));
8468
+ this.keystore = new Keystore(path16.join(this.globalDir, GLOBAL_KEYSTORE_FILE));
8469
+ this.store = new MemoryStore(path16.join(this.workspacePath, CASCADE_DB_FILE));
7392
8470
  await this.injectEnvKeys();
7393
8471
  await this.ensureDefaultIdentity();
7394
8472
  }
@@ -7411,9 +8489,9 @@ var ConfigManager = class {
7411
8489
  return this.workspacePath;
7412
8490
  }
7413
8491
  async save() {
7414
- const configPath = path13.join(this.workspacePath, CASCADE_CONFIG_FILE);
7415
- await fs2.mkdir(path13.dirname(configPath), { recursive: true });
7416
- await fs2.writeFile(configPath, JSON.stringify(this.config, null, 2), "utf-8");
8492
+ const configPath = path16.join(this.workspacePath, CASCADE_CONFIG_FILE);
8493
+ await fs3.mkdir(path16.dirname(configPath), { recursive: true });
8494
+ await fs3.writeFile(configPath, JSON.stringify(this.config, null, 2), "utf-8");
7417
8495
  }
7418
8496
  async updateConfig(updates) {
7419
8497
  this.config = validateConfig({ ...this.config, ...updates });
@@ -7436,9 +8514,9 @@ var ConfigManager = class {
7436
8514
  return configProvider?.apiKey;
7437
8515
  }
7438
8516
  async loadConfig() {
7439
- const configPath = path13.join(this.workspacePath, CASCADE_CONFIG_FILE);
8517
+ const configPath = path16.join(this.workspacePath, CASCADE_CONFIG_FILE);
7440
8518
  try {
7441
- const raw = await fs2.readFile(configPath, "utf-8");
8519
+ const raw = await fs3.readFile(configPath, "utf-8");
7442
8520
  return validateConfig(JSON.parse(raw));
7443
8521
  } catch (err) {
7444
8522
  if (err.code === "ENOENT") {
@@ -7602,6 +8680,9 @@ var DashboardSocket = class {
7602
8680
  emitStreamToken(tierId, text, sessionId) {
7603
8681
  this.io.to(`session:${sessionId}`).emit("stream:token", { tierId, text, sessionId });
7604
8682
  }
8683
+ emitPeerMessage(event) {
8684
+ this.io.to(`session:${event.sessionId}`).emit("peer:message", event);
8685
+ }
7605
8686
  emitApprovalRequest(request) {
7606
8687
  this.io.emit("permission:user-required", request);
7607
8688
  }
@@ -7644,16 +8725,13 @@ var DashboardSocket = class {
7644
8725
  const { sessionId } = normalizeSessionSubscriptionPayload(payload);
7645
8726
  socket.leave(`session:${sessionId}`);
7646
8727
  });
7647
- socket.on("join:tenant", (tenantId) => {
7648
- socket.join(`tenant:${tenantId}`);
7649
- });
7650
8728
  });
7651
8729
  }
7652
8730
  close() {
7653
8731
  this.io.close();
7654
8732
  }
7655
8733
  };
7656
- var __dirname$1 = path13.dirname(fileURLToPath(import.meta.url));
8734
+ var __dirname$1 = path16.dirname(fileURLToPath(import.meta.url));
7657
8735
  var DashboardServer = class {
7658
8736
  app;
7659
8737
  httpServer;
@@ -7719,15 +8797,15 @@ var DashboardServer = class {
7719
8797
  resolveDashboardSecret() {
7720
8798
  const fromConfig = this.config.dashboard.secret ?? process.env["CASCADE_DASHBOARD_SECRET"];
7721
8799
  if (fromConfig) return fromConfig;
7722
- const secretPath = path13.join(this.workspacePath, CASCADE_DASHBOARD_SECRET_FILE);
8800
+ const secretPath = path16.join(this.workspacePath, CASCADE_DASHBOARD_SECRET_FILE);
7723
8801
  try {
7724
- if (fs11.existsSync(secretPath)) {
7725
- const existing = fs11.readFileSync(secretPath, "utf-8").trim();
8802
+ if (fs15.existsSync(secretPath)) {
8803
+ const existing = fs15.readFileSync(secretPath, "utf-8").trim();
7726
8804
  if (existing.length >= 16) return existing;
7727
8805
  }
7728
8806
  const generated = randomUUID();
7729
- fs11.mkdirSync(path13.dirname(secretPath), { recursive: true });
7730
- fs11.writeFileSync(secretPath, generated, { encoding: "utf-8", mode: 384 });
8807
+ fs15.mkdirSync(path16.dirname(secretPath), { recursive: true });
8808
+ fs15.writeFileSync(secretPath, generated, { encoding: "utf-8", mode: 384 });
7731
8809
  if (this.config.dashboard.auth) {
7732
8810
  console.warn(
7733
8811
  `Dashboard auth enabled with no secret configured; persisted a generated secret to ${secretPath}. Set CASCADE_DASHBOARD_SECRET or config.dashboard.secret to override.`
@@ -7754,7 +8832,7 @@ var DashboardServer = class {
7754
8832
  // ── Setup ─────────────────────────────────────
7755
8833
  getGlobalStore() {
7756
8834
  if (!this.globalStore) {
7757
- const globalDbPath = path13.join(os2.homedir(), GLOBAL_CONFIG_DIR, GLOBAL_RUNTIME_DB_FILE);
8835
+ const globalDbPath = path16.join(os3.homedir(), GLOBAL_CONFIG_DIR, GLOBAL_RUNTIME_DB_FILE);
7758
8836
  this.globalStore = new MemoryStore(globalDbPath);
7759
8837
  }
7760
8838
  return this.globalStore;
@@ -7815,12 +8893,12 @@ var DashboardServer = class {
7815
8893
  }
7816
8894
  }
7817
8895
  watchRuntimeChanges() {
7818
- const workspaceDbPath = path13.join(this.workspacePath, CASCADE_DB_FILE);
7819
- const globalDbPath = path13.join(os2.homedir(), GLOBAL_CONFIG_DIR, GLOBAL_RUNTIME_DB_FILE);
8896
+ const workspaceDbPath = path16.join(this.workspacePath, CASCADE_DB_FILE);
8897
+ const globalDbPath = path16.join(os3.homedir(), GLOBAL_CONFIG_DIR, GLOBAL_RUNTIME_DB_FILE);
7820
8898
  const watchPaths = [workspaceDbPath, globalDbPath].filter((p, index, arr) => arr.indexOf(p) === index);
7821
8899
  for (const watchPath of watchPaths) {
7822
- if (!fs11.existsSync(watchPath)) continue;
7823
- fs11.watchFile(watchPath, { interval: 3e3 }, () => {
8900
+ if (!fs15.existsSync(watchPath)) continue;
8901
+ fs15.watchFile(watchPath, { interval: 3e3 }, () => {
7824
8902
  this.throttledBroadcast(watchPath === globalDbPath ? "global" : "workspace");
7825
8903
  });
7826
8904
  }
@@ -7851,6 +8929,21 @@ var DashboardServer = class {
7851
8929
  legacyHeaders: false,
7852
8930
  message: { error: "Too many login attempts. Try again in 15 minutes." }
7853
8931
  });
8932
+ const apiLimiter = rateLimit({
8933
+ windowMs: 60 * 1e3,
8934
+ limit: 60,
8935
+ standardHeaders: "draft-7",
8936
+ legacyHeaders: false,
8937
+ message: { error: "Too many requests. Slow down." }
8938
+ });
8939
+ this.app.use("/api", apiLimiter);
8940
+ const mutationLimiter = rateLimit({
8941
+ windowMs: 60 * 1e3,
8942
+ limit: 10,
8943
+ standardHeaders: "draft-7",
8944
+ legacyHeaders: false,
8945
+ message: { error: "Too many requests on this endpoint." }
8946
+ });
7854
8947
  this.app.post("/api/auth/login", loginLimiter, (req, res) => {
7855
8948
  const { username, password } = req.body ?? {};
7856
8949
  if (!authRequired) {
@@ -7886,22 +8979,33 @@ var DashboardServer = class {
7886
8979
  res.status(401).json({ error: "Invalid credentials" });
7887
8980
  }
7888
8981
  });
7889
- this.app.post("/api/force-halt", auth, (req, res) => {
7890
- const { sessionId, nodeId } = req.body;
8982
+ this.app.post("/api/force-halt", auth, mutationLimiter, (req, res) => {
8983
+ const body = req.body;
8984
+ const sessionId = typeof body["sessionId"] === "string" ? body["sessionId"] : void 0;
8985
+ const nodeId = typeof body["nodeId"] === "string" ? body["nodeId"] : void 0;
7891
8986
  const payload = { sessionId, nodeId, requestedAt: (/* @__PURE__ */ new Date()).toISOString() };
7892
8987
  this.socket.broadcast("session:halt", payload);
7893
8988
  if (sessionId) this.socket.broadcastToRoom(`session:${sessionId}`, "session:halt", payload);
7894
8989
  res.json({ success: true, ...payload });
7895
8990
  });
7896
- this.app.post("/api/approve", auth, (req, res) => {
7897
- const { nodeId, sessionId } = req.body;
8991
+ this.app.post("/api/approve", auth, mutationLimiter, (req, res) => {
8992
+ const body = req.body;
8993
+ const sessionId = typeof body["sessionId"] === "string" ? body["sessionId"] : void 0;
8994
+ const nodeId = typeof body["nodeId"] === "string" ? body["nodeId"] : void 0;
7898
8995
  const payload = { sessionId, nodeId, requestedAt: (/* @__PURE__ */ new Date()).toISOString() };
7899
8996
  this.socket.broadcast("session:approve", payload);
7900
8997
  if (sessionId) this.socket.broadcastToRoom(`session:${sessionId}`, "session:approve", payload);
7901
8998
  res.json({ success: true, ...payload });
7902
8999
  });
7903
- this.app.post("/api/inject", auth, (req, res) => {
7904
- const { message, sessionId, nodeId } = req.body;
9000
+ this.app.post("/api/inject", auth, mutationLimiter, (req, res) => {
9001
+ const body = req.body;
9002
+ const message = typeof body["message"] === "string" ? body["message"] : void 0;
9003
+ const sessionId = typeof body["sessionId"] === "string" ? body["sessionId"] : void 0;
9004
+ const nodeId = typeof body["nodeId"] === "string" ? body["nodeId"] : void 0;
9005
+ if (!message) {
9006
+ res.status(400).json({ error: "message is required and must be a string" });
9007
+ return;
9008
+ }
7905
9009
  const payload = { sessionId, nodeId, message, requestedAt: (/* @__PURE__ */ new Date()).toISOString() };
7906
9010
  this.socket.broadcast("session:message-injected", payload);
7907
9011
  if (sessionId) this.socket.broadcastToRoom(`session:${sessionId}`, "session:message-injected", payload);
@@ -7924,7 +9028,7 @@ var DashboardServer = class {
7924
9028
  const sessionId = req.params.id;
7925
9029
  this.store.deleteSession(sessionId);
7926
9030
  this.store.deleteRuntimeSession(sessionId);
7927
- const globalDbPath = path13.join(os2.homedir(), GLOBAL_CONFIG_DIR, GLOBAL_RUNTIME_DB_FILE);
9031
+ const globalDbPath = path16.join(os3.homedir(), GLOBAL_CONFIG_DIR, GLOBAL_RUNTIME_DB_FILE);
7928
9032
  const globalStore = new MemoryStore(globalDbPath);
7929
9033
  try {
7930
9034
  globalStore.deleteRuntimeSession(sessionId);
@@ -7938,7 +9042,7 @@ var DashboardServer = class {
7938
9042
  });
7939
9043
  this.app.delete("/api/sessions", auth, (req, res) => {
7940
9044
  const body = req.body;
7941
- const globalDbPath = path13.join(os2.homedir(), GLOBAL_CONFIG_DIR, GLOBAL_RUNTIME_DB_FILE);
9045
+ const globalDbPath = path16.join(os3.homedir(), GLOBAL_CONFIG_DIR, GLOBAL_RUNTIME_DB_FILE);
7942
9046
  if (body?.ids && Array.isArray(body.ids) && body.ids.length > 0) {
7943
9047
  const globalStore = new MemoryStore(globalDbPath);
7944
9048
  try {
@@ -7961,7 +9065,7 @@ var DashboardServer = class {
7961
9065
  });
7962
9066
  this.app.delete("/api/runtime", auth, (_req, res) => {
7963
9067
  this.store.deleteAllRuntimeNodes();
7964
- const globalDbPath = path13.join(os2.homedir(), GLOBAL_CONFIG_DIR, GLOBAL_RUNTIME_DB_FILE);
9068
+ const globalDbPath = path16.join(os3.homedir(), GLOBAL_CONFIG_DIR, GLOBAL_RUNTIME_DB_FILE);
7965
9069
  const globalStore = new MemoryStore(globalDbPath);
7966
9070
  try {
7967
9071
  globalStore.deleteAllRuntimeNodes();
@@ -8023,16 +9127,26 @@ var DashboardServer = class {
8023
9127
  });
8024
9128
  this.app.put("/api/config", auth, async (req, res) => {
8025
9129
  const body = req.body;
8026
- if (body.tierLimits) this.config.tierLimits = { ...this.config.tierLimits, ...body.tierLimits };
8027
- if (body.budget) this.config.budget = { ...this.config.budget, ...body.budget };
9130
+ if (body["tierLimits"] !== void 0 && (typeof body["tierLimits"] !== "object" || Array.isArray(body["tierLimits"]))) {
9131
+ res.status(400).json({ error: "tierLimits must be an object" });
9132
+ return;
9133
+ }
9134
+ if (body["budget"] !== void 0 && (typeof body["budget"] !== "object" || Array.isArray(body["budget"]))) {
9135
+ res.status(400).json({ error: "budget must be an object" });
9136
+ return;
9137
+ }
9138
+ if (body["tierLimits"]) this.config.tierLimits = { ...this.config.tierLimits, ...body["tierLimits"] };
9139
+ if (body["budget"]) this.config.budget = { ...this.config.budget, ...body["budget"] };
8028
9140
  try {
8029
- const configPath = path13.join(this.workspacePath, CASCADE_CONFIG_FILE);
8030
- const existing = fs11.existsSync(configPath) ? JSON.parse(fs11.readFileSync(configPath, "utf-8")) : {};
9141
+ const configPath = path16.join(this.workspacePath, CASCADE_CONFIG_FILE);
9142
+ const existing = fs15.existsSync(configPath) ? JSON.parse(fs15.readFileSync(configPath, "utf-8")) : {};
8031
9143
  const updated = { ...existing, tierLimits: this.config.tierLimits, budget: this.config.budget };
8032
- fs11.writeFileSync(configPath, JSON.stringify(updated, null, 2), "utf-8");
9144
+ const tmp = configPath + ".tmp";
9145
+ fs15.writeFileSync(tmp, JSON.stringify(updated, null, 2), "utf-8");
9146
+ fs15.renameSync(tmp, configPath);
8033
9147
  res.json({ ok: true });
8034
9148
  } catch (err) {
8035
- res.status(500).json({ error: `Failed to save config: ${String(err)}` });
9149
+ res.status(500).json({ error: `Failed to save config: ${err instanceof Error ? err.message : String(err)}` });
8036
9150
  }
8037
9151
  });
8038
9152
  this.app.get("/api/runtime/logs/:sessionId", auth, (req, res) => {
@@ -8057,7 +9171,7 @@ var DashboardServer = class {
8057
9171
  this.app.get("/api/runtime", auth, (req, res) => {
8058
9172
  const scope = req.query["scope"] ?? "workspace";
8059
9173
  if (scope === "global") {
8060
- const globalDbPath = path13.join(os2.homedir(), GLOBAL_CONFIG_DIR, GLOBAL_RUNTIME_DB_FILE);
9174
+ const globalDbPath = path16.join(os3.homedir(), GLOBAL_CONFIG_DIR, GLOBAL_RUNTIME_DB_FILE);
8061
9175
  const globalStore = new MemoryStore(globalDbPath);
8062
9176
  try {
8063
9177
  res.json({
@@ -8078,7 +9192,7 @@ var DashboardServer = class {
8078
9192
  logs: this.store.listRuntimeNodeLogs(void 0, void 0, 500)
8079
9193
  });
8080
9194
  });
8081
- this.app.post("/api/run", auth, (req, res) => {
9195
+ this.app.post("/api/run", auth, mutationLimiter, (req, res) => {
8082
9196
  const body = req.body;
8083
9197
  if (!body.prompt || typeof body.prompt !== "string") {
8084
9198
  res.status(400).json({ error: "prompt is required" });
@@ -8099,12 +9213,15 @@ var DashboardServer = class {
8099
9213
  cascade.on("permission:user-required", (e) => {
8100
9214
  this.socket.broadcastToRoom(`session:${sessionId}`, "permission:user-required", { sessionId, ...e });
8101
9215
  });
9216
+ cascade.on("peer:message", (e) => {
9217
+ this.socket.emitPeerMessage(e);
9218
+ });
8102
9219
  try {
8103
9220
  const result = await cascade.run({ prompt: body.prompt, identityId: body.identityId });
8104
9221
  this.socket.broadcast("cost:update", {
8105
9222
  sessionId,
8106
- tokens: result.usage.totalTokens,
8107
- costUsd: result.usage.estimatedCostUsd
9223
+ totalTokens: result.usage.totalTokens,
9224
+ totalCostUsd: result.usage.estimatedCostUsd
8108
9225
  });
8109
9226
  this.socket.broadcastToRoom(`session:${sessionId}`, "session:complete", { sessionId, result });
8110
9227
  this.throttledBroadcast("workspace");
@@ -8127,13 +9244,13 @@ var DashboardServer = class {
8127
9244
  }))
8128
9245
  });
8129
9246
  });
8130
- const prodPath = path13.resolve(__dirname$1, "../web/dist");
8131
- const devPath = path13.resolve(__dirname$1, "../../web/dist");
8132
- const webDistPath = fs11.existsSync(prodPath) ? prodPath : devPath;
8133
- if (fs11.existsSync(webDistPath)) {
9247
+ const prodPath = path16.resolve(__dirname$1, "../web/dist");
9248
+ const devPath = path16.resolve(__dirname$1, "../../web/dist");
9249
+ const webDistPath = fs15.existsSync(prodPath) ? prodPath : devPath;
9250
+ if (fs15.existsSync(webDistPath)) {
8134
9251
  this.app.use(express.static(webDistPath));
8135
9252
  this.app.get("*", (_req, res) => {
8136
- res.sendFile(path13.join(webDistPath, "index.html"));
9253
+ res.sendFile(path16.join(webDistPath, "index.html"));
8137
9254
  });
8138
9255
  } else {
8139
9256
  this.app.get("/", (_req, res) => {
@@ -8210,7 +9327,7 @@ var TaskScheduler = class {
8210
9327
  return cron.validate(expression);
8211
9328
  }
8212
9329
  };
8213
- var execFileAsync = promisify(execFile);
9330
+ var execFileAsync2 = promisify(execFile);
8214
9331
  var SAFE_ENV_NAME = /^[A-Z][A-Z0-9_]*$/;
8215
9332
  function sanitizeEnvValue(v) {
8216
9333
  const raw = typeof v === "string" ? v : JSON.stringify(v);
@@ -8249,7 +9366,7 @@ var HooksRunner = class {
8249
9366
  const isWin = process.platform === "win32";
8250
9367
  const shell = isWin ? "cmd.exe" : "/bin/sh";
8251
9368
  const shellArgs = isWin ? ["/d", "/s", "/c", hook.command] : ["-c", hook.command];
8252
- const { stdout } = await execFileAsync(shell, shellArgs, {
9369
+ const { stdout } = await execFileAsync2(shell, shellArgs, {
8253
9370
  timeout: hook.timeout ?? 1e4,
8254
9371
  env: { ...process.env, ...envVars },
8255
9372
  windowsHide: true