cascade-ai 0.10.1 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.cjs CHANGED
@@ -9621,6 +9621,19 @@ var TaskAnalyzer = class {
9621
9621
  this.lastSelectedModels.clear();
9622
9622
  void this.tracker.save();
9623
9623
  }
9624
+ /**
9625
+ * Record an explicit user rating (good/bad) for the last run's selected models.
9626
+ * Explicit ratings carry 3× the weight of auto-detected outcomes.
9627
+ * Does NOT clear lastSelectedModels — the auto record already did that.
9628
+ */
9629
+ recordExplicitRating(rating) {
9630
+ if (!this.tracker || !this.lastProfile) return false;
9631
+ const taskType = this.lastProfile.type;
9632
+ for (const [, model] of this.lastSelectedModels) {
9633
+ this.tracker.recordExplicit(model.id, taskType, rating, 0);
9634
+ }
9635
+ return this.lastSelectedModels.size > 0;
9636
+ }
9624
9637
  scoreModel(model, profile) {
9625
9638
  const perf = this.tracker?.performanceScore(model.id, profile.type) ?? 0.5;
9626
9639
  const costEff = this.costEfficiency(model, profile.complexity);
@@ -9700,6 +9713,20 @@ var ModelPerformanceTracker = class {
9700
9713
  sampleCount: s.sampleCount + 1
9701
9714
  });
9702
9715
  }
9716
+ /**
9717
+ * Record an explicit user rating (good/bad). Counts as 3 automatic samples
9718
+ * so user feedback carries significantly more weight than auto-detected outcomes.
9719
+ */
9720
+ recordExplicit(modelId, taskType, rating, costUsd = 0) {
9721
+ const outcome = rating === "good" ? "success" : "failure";
9722
+ this.record(modelId, taskType, outcome, 0, costUsd);
9723
+ this.record(modelId, taskType, outcome, 0, 0);
9724
+ this.record(modelId, taskType, outcome, 0, 0);
9725
+ }
9726
+ /** Returns all stats keyed by "modelId:taskType" — used by `cascade stats`. */
9727
+ getAll() {
9728
+ return new Map(this.stats);
9729
+ }
9703
9730
  /**
9704
9731
  * Returns 0.05–1.0; defaults to 0.5 (neutral prior) when no history exists.
9705
9732
  * High retry counts penalise the score.
@@ -10304,6 +10331,18 @@ ${last.partialOutput}` : "");
10304
10331
  if (!prompt) return null;
10305
10332
  return this.run({ prompt });
10306
10333
  }
10334
+ /**
10335
+ * Record an explicit user rating for the last completed run.
10336
+ * Explicit ratings carry 3× the weight of auto-detected outcomes so user
10337
+ * feedback meaningfully shifts future routing decisions.
10338
+ * Returns false when called before any task has run in this session.
10339
+ */
10340
+ rateLastRun(rating) {
10341
+ if (!this.taskAnalyzer) return false;
10342
+ const recorded = this.taskAnalyzer.recordExplicitRating(rating);
10343
+ if (recorded) void this.perfTracker?.save();
10344
+ return recorded;
10345
+ }
10307
10346
  /**
10308
10347
  * Rough pre-execution cost estimate for a plan: ~3 T2 calls per section
10309
10348
  * plus ~4 T3 calls per subtask at typical token volumes. A ballpark for
@@ -11163,6 +11202,12 @@ var SlashCommandRegistry = class {
11163
11202
  description: "Explain how the last run was routed (complexity, models, failovers)",
11164
11203
  handler: (_args, ctx) => ({ output: ctx.onWhy(), handled: true })
11165
11204
  });
11205
+ this.register({
11206
+ command: "/rate",
11207
+ description: "Rate the last task to improve auto-routing /rate good | bad",
11208
+ args: ["good", "bad"],
11209
+ handler: (args, ctx) => ({ output: ctx.onRate(args), handled: true })
11210
+ });
11166
11211
  this.register({
11167
11212
  command: "/auto",
11168
11213
  description: "Toggle autonomous (hands-off) mode /auto [on | off | status]",
@@ -12490,6 +12535,17 @@ ${msg.content}`).join("\n\n");
12490
12535
  return method === "osc52" ? `\u2714 Copied ${which} (${msg.content.length} chars) via terminal escape \u2014 works over SSH if your terminal supports OSC 52.` : `\u2714 Copied ${which} (${msg.content.length} chars) to clipboard.`;
12491
12536
  },
12492
12537
  onWhy: () => formatDecisionTrail(decisionLogRef.current),
12538
+ onRate: (args) => {
12539
+ const cascade = cascadeRef.current;
12540
+ if (!cascade) return "Not ready yet.";
12541
+ const rating = (args[0] ?? "").toLowerCase();
12542
+ if (rating !== "good" && rating !== "bad") {
12543
+ return "Usage: /rate good | bad";
12544
+ }
12545
+ const recorded = cascade.rateLastRun(rating);
12546
+ if (!recorded) return "Nothing to rate \u2014 run a task first, or auto-routing is not enabled.";
12547
+ return rating === "good" ? "\u2714 Rated good \u2014 models used for this task type got a boost." : "\u2714 Rated bad \u2014 models used for this task type were penalised. Auto-routing will try alternatives next time.";
12548
+ },
12493
12549
  onComms: () => {
12494
12550
  dispatch({ type: "TOGGLE_COMMS" });
12495
12551
  return state.showComms ? "Agent comms feed hidden. /comms to bring it back." : "Agent comms feed enabled \u2014 agent-to-agent traffic will appear during runs.";
@@ -14504,6 +14560,31 @@ var DashboardSocket = class {
14504
14560
  const { sessionId } = normalizeSessionSubscriptionPayload(payload);
14505
14561
  socket.leave(`session:${sessionId}`);
14506
14562
  });
14563
+ socket.on("session:rate", (payload) => {
14564
+ const sessionId = typeof payload?.sessionId === "string" ? payload.sessionId : "";
14565
+ const rating = payload?.rating === "good" || payload?.rating === "bad" ? payload.rating : null;
14566
+ if (sessionId && rating) {
14567
+ this.io.emit("session:rate", { sessionId, rating });
14568
+ }
14569
+ });
14570
+ });
14571
+ }
14572
+ onSessionRate(callback) {
14573
+ this.io.on("connection", (socket) => {
14574
+ socket.on("session:rate", (payload) => {
14575
+ const sessionId = typeof payload?.sessionId === "string" ? payload.sessionId : "";
14576
+ const rating = payload?.rating === "good" || payload?.rating === "bad" ? payload.rating : null;
14577
+ if (sessionId && rating) callback(sessionId, rating);
14578
+ });
14579
+ });
14580
+ }
14581
+ onConfigUpdate(callback) {
14582
+ this.io.on("connection", (socket) => {
14583
+ socket.on("config:update", (payload) => {
14584
+ if (typeof payload === "object" && payload !== null) {
14585
+ callback(payload);
14586
+ }
14587
+ });
14507
14588
  });
14508
14589
  }
14509
14590
  close() {
@@ -14523,6 +14604,7 @@ var DashboardServer = class {
14523
14604
  store;
14524
14605
  globalStore = null;
14525
14606
  broadcastTimer = null;
14607
+ activeSessions = /* @__PURE__ */ new Map();
14526
14608
  port;
14527
14609
  host;
14528
14610
  workspacePath;
@@ -14542,6 +14624,30 @@ var DashboardServer = class {
14542
14624
  });
14543
14625
  this.setupMiddleware();
14544
14626
  this.setupRoutes();
14627
+ this.socket.onSessionRate((sessionId, rating) => {
14628
+ this.activeSessions.get(sessionId)?.rateLastRun(rating);
14629
+ });
14630
+ this.socket.onConfigUpdate((data) => {
14631
+ if (data.keys) {
14632
+ for (const [type, apiKey] of Object.entries(data.keys)) {
14633
+ if (!apiKey) continue;
14634
+ const provider = this.config.providers.find((p) => p.type === type);
14635
+ if (provider) provider.apiKey = apiKey;
14636
+ else this.config.providers.push({ type, apiKey });
14637
+ }
14638
+ }
14639
+ if (data.models) {
14640
+ this.config.models = { ...this.config.models, ...data.models };
14641
+ }
14642
+ if (data.budget) {
14643
+ if (typeof data.budget.maxCostPerRun === "number") {
14644
+ this.config.budget.maxCostPerRunUsd = data.budget.maxCostPerRun;
14645
+ }
14646
+ if (data.budget.autoBias === "balanced" || data.budget.autoBias === "quality" || data.budget.autoBias === "cost") {
14647
+ this.config.autoBias = data.budget.autoBias;
14648
+ }
14649
+ }
14650
+ });
14545
14651
  }
14546
14652
  async start() {
14547
14653
  const isLoopback = this.host === "127.0.0.1" || this.host === "::1" || this.host === "localhost";
@@ -14995,6 +15101,7 @@ var DashboardServer = class {
14995
15101
  res.json({ sessionId, status: "ACTIVE" });
14996
15102
  void (async () => {
14997
15103
  const cascade = new Cascade(this.config, this.workspacePath, this.store);
15104
+ this.activeSessions.set(sessionId, cascade);
14998
15105
  cascade.on("stream:token", (e) => {
14999
15106
  this.socket.broadcast("stream:token", { sessionId, tierId: e.tierId, text: e.text });
15000
15107
  this.socket.broadcastToRoom(`session:${sessionId}`, "stream:token", { sessionId, tierId: e.tierId, text: e.text });
@@ -15023,6 +15130,8 @@ var DashboardServer = class {
15023
15130
  sessionId,
15024
15131
  error: err instanceof Error ? err.message : String(err)
15025
15132
  });
15133
+ } finally {
15134
+ this.activeSessions.delete(sessionId);
15026
15135
  }
15027
15136
  })();
15028
15137
  });
@@ -15474,6 +15583,44 @@ async function telemetryCommand(action) {
15474
15583
  }
15475
15584
  console.log();
15476
15585
  }
15586
+ var TASK_TYPES = ["code", "analysis", "creative", "data", "mixed"];
15587
+ async function statsCommand() {
15588
+ const tracker = new ModelPerformanceTracker();
15589
+ await tracker.load();
15590
+ const all = tracker.getAll();
15591
+ if (all.size === 0) {
15592
+ console.log(chalk11__default.default.dim("\n No routing history yet \u2014 run some tasks first.\n"));
15593
+ return;
15594
+ }
15595
+ console.log(chalk11__default.default.magenta("\n \u25C8 Auto-Routing History\n"));
15596
+ console.log(chalk11__default.default.dim(" Per-task-type model performance learned from past runs.\n"));
15597
+ for (const taskType of TASK_TYPES) {
15598
+ const entries = [];
15599
+ for (const [key, stat] of all) {
15600
+ if (!key.endsWith(`:${taskType}`)) continue;
15601
+ const modelId = key.slice(0, -(taskType.length + 1));
15602
+ const successRate = stat.sampleCount > 0 ? stat.successCount / stat.sampleCount : 0;
15603
+ const avgCostUsd = stat.sampleCount > 0 ? stat.totalCostUsd / stat.sampleCount : 0;
15604
+ entries.push({ modelId, successRate, samples: stat.sampleCount, avgCostUsd });
15605
+ }
15606
+ if (entries.length === 0) continue;
15607
+ entries.sort((a, b) => b.successRate - a.successRate || b.samples - a.samples);
15608
+ console.log(chalk11__default.default.bold(` ${taskType.toUpperCase()}`));
15609
+ const header = ` ${"Model".padEnd(36)} ${"Success".padEnd(9)} ${"Samples".padEnd(9)} Avg cost`;
15610
+ console.log(chalk11__default.default.dim(header));
15611
+ console.log(chalk11__default.default.dim(" " + "\u2500".repeat(62)));
15612
+ for (const e of entries) {
15613
+ const pct = `${Math.round(e.successRate * 100)}%`;
15614
+ const cost = e.avgCostUsd < 1e-4 ? "<$0.0001" : `$${e.avgCostUsd.toFixed(4)}`;
15615
+ const color = e.successRate >= 0.8 ? chalk11__default.default.green : e.successRate >= 0.5 ? chalk11__default.default.yellow : chalk11__default.default.red;
15616
+ console.log(
15617
+ ` ${e.modelId.padEnd(36)} ${color(pct.padEnd(9))} ${String(e.samples).padEnd(9)} ${chalk11__default.default.dim(cost)}`
15618
+ );
15619
+ }
15620
+ console.log();
15621
+ }
15622
+ console.log(chalk11__default.default.dim(" tip: use /rate good | bad after a task to improve these scores.\n"));
15623
+ }
15477
15624
 
15478
15625
  // src/cli/index.ts
15479
15626
  dotenv__default.default.config();
@@ -15573,6 +15720,9 @@ program.command("export").description("Export a session conversation to Markdown
15573
15720
  output: opts.output
15574
15721
  });
15575
15722
  });
15723
+ program.command("stats").description("Show auto-routing history: which models work best per task type").action(async () => {
15724
+ await statsCommand();
15725
+ });
15576
15726
  async function startRepl(options) {
15577
15727
  const workspacePath = options.workspace ?? process.cwd();
15578
15728
  printBanner();