cascade-ai 0.9.7 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -9574,6 +9574,19 @@ var TaskAnalyzer = class {
9574
9574
  this.lastSelectedModels.clear();
9575
9575
  void this.tracker.save();
9576
9576
  }
9577
+ /**
9578
+ * Record an explicit user rating (good/bad) for the last run's selected models.
9579
+ * Explicit ratings carry 3× the weight of auto-detected outcomes.
9580
+ * Does NOT clear lastSelectedModels — the auto record already did that.
9581
+ */
9582
+ recordExplicitRating(rating) {
9583
+ if (!this.tracker || !this.lastProfile) return false;
9584
+ const taskType = this.lastProfile.type;
9585
+ for (const [, model] of this.lastSelectedModels) {
9586
+ this.tracker.recordExplicit(model.id, taskType, rating, 0);
9587
+ }
9588
+ return this.lastSelectedModels.size > 0;
9589
+ }
9577
9590
  scoreModel(model, profile) {
9578
9591
  const perf = this.tracker?.performanceScore(model.id, profile.type) ?? 0.5;
9579
9592
  const costEff = this.costEfficiency(model, profile.complexity);
@@ -9653,6 +9666,20 @@ var ModelPerformanceTracker = class {
9653
9666
  sampleCount: s.sampleCount + 1
9654
9667
  });
9655
9668
  }
9669
+ /**
9670
+ * Record an explicit user rating (good/bad). Counts as 3 automatic samples
9671
+ * so user feedback carries significantly more weight than auto-detected outcomes.
9672
+ */
9673
+ recordExplicit(modelId, taskType, rating, costUsd = 0) {
9674
+ const outcome = rating === "good" ? "success" : "failure";
9675
+ this.record(modelId, taskType, outcome, 0, costUsd);
9676
+ this.record(modelId, taskType, outcome, 0, 0);
9677
+ this.record(modelId, taskType, outcome, 0, 0);
9678
+ }
9679
+ /** Returns all stats keyed by "modelId:taskType" — used by `cascade stats`. */
9680
+ getAll() {
9681
+ return new Map(this.stats);
9682
+ }
9656
9683
  /**
9657
9684
  * Returns 0.05–1.0; defaults to 0.5 (neutral prior) when no history exists.
9658
9685
  * High retry counts penalise the score.
@@ -10257,6 +10284,18 @@ ${last.partialOutput}` : "");
10257
10284
  if (!prompt) return null;
10258
10285
  return this.run({ prompt });
10259
10286
  }
10287
+ /**
10288
+ * Record an explicit user rating for the last completed run.
10289
+ * Explicit ratings carry 3× the weight of auto-detected outcomes so user
10290
+ * feedback meaningfully shifts future routing decisions.
10291
+ * Returns false when called before any task has run in this session.
10292
+ */
10293
+ rateLastRun(rating) {
10294
+ if (!this.taskAnalyzer) return false;
10295
+ const recorded = this.taskAnalyzer.recordExplicitRating(rating);
10296
+ if (recorded) void this.perfTracker?.save();
10297
+ return recorded;
10298
+ }
10260
10299
  /**
10261
10300
  * Rough pre-execution cost estimate for a plan: ~3 T2 calls per section
10262
10301
  * plus ~4 T3 calls per subtask at typical token volumes. A ballpark for
@@ -11116,6 +11155,12 @@ var SlashCommandRegistry = class {
11116
11155
  description: "Explain how the last run was routed (complexity, models, failovers)",
11117
11156
  handler: (_args, ctx) => ({ output: ctx.onWhy(), handled: true })
11118
11157
  });
11158
+ this.register({
11159
+ command: "/rate",
11160
+ description: "Rate the last task to improve auto-routing /rate good | bad",
11161
+ args: ["good", "bad"],
11162
+ handler: (args, ctx) => ({ output: ctx.onRate(args), handled: true })
11163
+ });
11119
11164
  this.register({
11120
11165
  command: "/auto",
11121
11166
  description: "Toggle autonomous (hands-off) mode /auto [on | off | status]",
@@ -12443,6 +12488,17 @@ ${msg.content}`).join("\n\n");
12443
12488
  return method === "osc52" ? `\u2714 Copied ${which} (${msg.content.length} chars) via terminal escape \u2014 works over SSH if your terminal supports OSC 52.` : `\u2714 Copied ${which} (${msg.content.length} chars) to clipboard.`;
12444
12489
  },
12445
12490
  onWhy: () => formatDecisionTrail(decisionLogRef.current),
12491
+ onRate: (args) => {
12492
+ const cascade = cascadeRef.current;
12493
+ if (!cascade) return "Not ready yet.";
12494
+ const rating = (args[0] ?? "").toLowerCase();
12495
+ if (rating !== "good" && rating !== "bad") {
12496
+ return "Usage: /rate good | bad";
12497
+ }
12498
+ const recorded = cascade.rateLastRun(rating);
12499
+ if (!recorded) return "Nothing to rate \u2014 run a task first, or auto-routing is not enabled.";
12500
+ return rating === "good" ? "\u2714 Rated good \u2014 models used for this task type got a boost." : "\u2714 Rated bad \u2014 models used for this task type were penalised. Auto-routing will try alternatives next time.";
12501
+ },
12446
12502
  onComms: () => {
12447
12503
  dispatch({ type: "TOGGLE_COMMS" });
12448
12504
  return state.showComms ? "Agent comms feed hidden. /comms to bring it back." : "Agent comms feed enabled \u2014 agent-to-agent traffic will appear during runs.";
@@ -14457,6 +14513,31 @@ var DashboardSocket = class {
14457
14513
  const { sessionId } = normalizeSessionSubscriptionPayload(payload);
14458
14514
  socket.leave(`session:${sessionId}`);
14459
14515
  });
14516
+ socket.on("session:rate", (payload) => {
14517
+ const sessionId = typeof payload?.sessionId === "string" ? payload.sessionId : "";
14518
+ const rating = payload?.rating === "good" || payload?.rating === "bad" ? payload.rating : null;
14519
+ if (sessionId && rating) {
14520
+ this.io.emit("session:rate", { sessionId, rating });
14521
+ }
14522
+ });
14523
+ });
14524
+ }
14525
+ onSessionRate(callback) {
14526
+ this.io.on("connection", (socket) => {
14527
+ socket.on("session:rate", (payload) => {
14528
+ const sessionId = typeof payload?.sessionId === "string" ? payload.sessionId : "";
14529
+ const rating = payload?.rating === "good" || payload?.rating === "bad" ? payload.rating : null;
14530
+ if (sessionId && rating) callback(sessionId, rating);
14531
+ });
14532
+ });
14533
+ }
14534
+ onConfigUpdate(callback) {
14535
+ this.io.on("connection", (socket) => {
14536
+ socket.on("config:update", (payload) => {
14537
+ if (typeof payload === "object" && payload !== null) {
14538
+ callback(payload);
14539
+ }
14540
+ });
14460
14541
  });
14461
14542
  }
14462
14543
  close() {
@@ -14476,6 +14557,7 @@ var DashboardServer = class {
14476
14557
  store;
14477
14558
  globalStore = null;
14478
14559
  broadcastTimer = null;
14560
+ activeSessions = /* @__PURE__ */ new Map();
14479
14561
  port;
14480
14562
  host;
14481
14563
  workspacePath;
@@ -14495,6 +14577,30 @@ var DashboardServer = class {
14495
14577
  });
14496
14578
  this.setupMiddleware();
14497
14579
  this.setupRoutes();
14580
+ this.socket.onSessionRate((sessionId, rating) => {
14581
+ this.activeSessions.get(sessionId)?.rateLastRun(rating);
14582
+ });
14583
+ this.socket.onConfigUpdate((data) => {
14584
+ if (data.keys) {
14585
+ for (const [type, apiKey] of Object.entries(data.keys)) {
14586
+ if (!apiKey) continue;
14587
+ const provider = this.config.providers.find((p) => p.type === type);
14588
+ if (provider) provider.apiKey = apiKey;
14589
+ else this.config.providers.push({ type, apiKey });
14590
+ }
14591
+ }
14592
+ if (data.models) {
14593
+ this.config.models = { ...this.config.models, ...data.models };
14594
+ }
14595
+ if (data.budget) {
14596
+ if (typeof data.budget.maxCostPerRun === "number") {
14597
+ this.config.budget.maxCostPerRunUsd = data.budget.maxCostPerRun;
14598
+ }
14599
+ if (data.budget.autoBias === "balanced" || data.budget.autoBias === "quality" || data.budget.autoBias === "cost") {
14600
+ this.config.autoBias = data.budget.autoBias;
14601
+ }
14602
+ }
14603
+ });
14498
14604
  }
14499
14605
  async start() {
14500
14606
  const isLoopback = this.host === "127.0.0.1" || this.host === "::1" || this.host === "localhost";
@@ -14948,6 +15054,7 @@ var DashboardServer = class {
14948
15054
  res.json({ sessionId, status: "ACTIVE" });
14949
15055
  void (async () => {
14950
15056
  const cascade = new Cascade(this.config, this.workspacePath, this.store);
15057
+ this.activeSessions.set(sessionId, cascade);
14951
15058
  cascade.on("stream:token", (e) => {
14952
15059
  this.socket.broadcast("stream:token", { sessionId, tierId: e.tierId, text: e.text });
14953
15060
  this.socket.broadcastToRoom(`session:${sessionId}`, "stream:token", { sessionId, tierId: e.tierId, text: e.text });
@@ -14976,6 +15083,8 @@ var DashboardServer = class {
14976
15083
  sessionId,
14977
15084
  error: err instanceof Error ? err.message : String(err)
14978
15085
  });
15086
+ } finally {
15087
+ this.activeSessions.delete(sessionId);
14979
15088
  }
14980
15089
  })();
14981
15090
  });
@@ -15427,6 +15536,44 @@ async function telemetryCommand(action) {
15427
15536
  }
15428
15537
  console.log();
15429
15538
  }
15539
+ var TASK_TYPES = ["code", "analysis", "creative", "data", "mixed"];
15540
+ async function statsCommand() {
15541
+ const tracker = new ModelPerformanceTracker();
15542
+ await tracker.load();
15543
+ const all = tracker.getAll();
15544
+ if (all.size === 0) {
15545
+ console.log(chalk11.dim("\n No routing history yet \u2014 run some tasks first.\n"));
15546
+ return;
15547
+ }
15548
+ console.log(chalk11.magenta("\n \u25C8 Auto-Routing History\n"));
15549
+ console.log(chalk11.dim(" Per-task-type model performance learned from past runs.\n"));
15550
+ for (const taskType of TASK_TYPES) {
15551
+ const entries = [];
15552
+ for (const [key, stat] of all) {
15553
+ if (!key.endsWith(`:${taskType}`)) continue;
15554
+ const modelId = key.slice(0, -(taskType.length + 1));
15555
+ const successRate = stat.sampleCount > 0 ? stat.successCount / stat.sampleCount : 0;
15556
+ const avgCostUsd = stat.sampleCount > 0 ? stat.totalCostUsd / stat.sampleCount : 0;
15557
+ entries.push({ modelId, successRate, samples: stat.sampleCount, avgCostUsd });
15558
+ }
15559
+ if (entries.length === 0) continue;
15560
+ entries.sort((a, b) => b.successRate - a.successRate || b.samples - a.samples);
15561
+ console.log(chalk11.bold(` ${taskType.toUpperCase()}`));
15562
+ const header = ` ${"Model".padEnd(36)} ${"Success".padEnd(9)} ${"Samples".padEnd(9)} Avg cost`;
15563
+ console.log(chalk11.dim(header));
15564
+ console.log(chalk11.dim(" " + "\u2500".repeat(62)));
15565
+ for (const e of entries) {
15566
+ const pct = `${Math.round(e.successRate * 100)}%`;
15567
+ const cost = e.avgCostUsd < 1e-4 ? "<$0.0001" : `$${e.avgCostUsd.toFixed(4)}`;
15568
+ const color = e.successRate >= 0.8 ? chalk11.green : e.successRate >= 0.5 ? chalk11.yellow : chalk11.red;
15569
+ console.log(
15570
+ ` ${e.modelId.padEnd(36)} ${color(pct.padEnd(9))} ${String(e.samples).padEnd(9)} ${chalk11.dim(cost)}`
15571
+ );
15572
+ }
15573
+ console.log();
15574
+ }
15575
+ console.log(chalk11.dim(" tip: use /rate good | bad after a task to improve these scores.\n"));
15576
+ }
15430
15577
 
15431
15578
  // src/cli/index.ts
15432
15579
  dotenv.config();
@@ -15526,6 +15673,9 @@ program.command("export").description("Export a session conversation to Markdown
15526
15673
  output: opts.output
15527
15674
  });
15528
15675
  });
15676
+ program.command("stats").description("Show auto-routing history: which models work best per task type").action(async () => {
15677
+ await statsCommand();
15678
+ });
15529
15679
  async function startRepl(options) {
15530
15680
  const workspacePath = options.workspace ?? process.cwd();
15531
15681
  printBanner();