npm - cascade-ai - Versions diffs - 0.9.7 → 0.10.3 - Mend

cascade-ai 0.9.7 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/cli.cjs CHANGED Viewed

@@ -9621,6 +9621,19 @@ var TaskAnalyzer = class {
     this.lastSelectedModels.clear();
     void this.tracker.save();
   }
+  /**
+   * Record an explicit user rating (good/bad) for the last run's selected models.
+   * Explicit ratings carry 3× the weight of auto-detected outcomes.
+   * Does NOT clear lastSelectedModels — the auto record already did that.
+   */
+  recordExplicitRating(rating) {
+    if (!this.tracker || !this.lastProfile) return false;
+    const taskType = this.lastProfile.type;
+    for (const [, model] of this.lastSelectedModels) {
+      this.tracker.recordExplicit(model.id, taskType, rating, 0);
+    }
+    return this.lastSelectedModels.size > 0;
+  }
   scoreModel(model, profile) {
     const perf = this.tracker?.performanceScore(model.id, profile.type) ?? 0.5;
     const costEff = this.costEfficiency(model, profile.complexity);
@@ -9700,6 +9713,20 @@ var ModelPerformanceTracker = class {
       sampleCount: s.sampleCount + 1
     });
   }
+  /**
+   * Record an explicit user rating (good/bad). Counts as 3 automatic samples
+   * so user feedback carries significantly more weight than auto-detected outcomes.
+   */
+  recordExplicit(modelId, taskType, rating, costUsd = 0) {
+    const outcome = rating === "good" ? "success" : "failure";
+    this.record(modelId, taskType, outcome, 0, costUsd);
+    this.record(modelId, taskType, outcome, 0, 0);
+    this.record(modelId, taskType, outcome, 0, 0);
+  }
+  /** Returns all stats keyed by "modelId:taskType" — used by `cascade stats`. */
+  getAll() {
+    return new Map(this.stats);
+  }
   /**
    * Returns 0.05–1.0; defaults to 0.5 (neutral prior) when no history exists.
    * High retry counts penalise the score.
@@ -10304,6 +10331,18 @@ ${last.partialOutput}` : "");
     if (!prompt) return null;
     return this.run({ prompt });
   }
+  /**
+   * Record an explicit user rating for the last completed run.
+   * Explicit ratings carry 3× the weight of auto-detected outcomes so user
+   * feedback meaningfully shifts future routing decisions.
+   * Returns false when called before any task has run in this session.
+   */
+  rateLastRun(rating) {
+    if (!this.taskAnalyzer) return false;
+    const recorded = this.taskAnalyzer.recordExplicitRating(rating);
+    if (recorded) void this.perfTracker?.save();
+    return recorded;
+  }
   /**
    * Rough pre-execution cost estimate for a plan: ~3 T2 calls per section
    * plus ~4 T3 calls per subtask at typical token volumes. A ballpark for
@@ -11163,6 +11202,12 @@ var SlashCommandRegistry = class {
       description: "Explain how the last run was routed (complexity, models, failovers)",
       handler: (_args, ctx) => ({ output: ctx.onWhy(), handled: true })
     });
+    this.register({
+      command: "/rate",
+      description: "Rate the last task to improve auto-routing  /rate good | bad",
+      args: ["good", "bad"],
+      handler: (args, ctx) => ({ output: ctx.onRate(args), handled: true })
+    });
     this.register({
       command: "/auto",
       description: "Toggle autonomous (hands-off) mode  /auto [on | off | status]",
@@ -12490,6 +12535,17 @@ ${msg.content}`).join("\n\n");
         return method === "osc52" ? `\u2714 Copied ${which} (${msg.content.length} chars) via terminal escape \u2014 works over SSH if your terminal supports OSC 52.` : `\u2714 Copied ${which} (${msg.content.length} chars) to clipboard.`;
       },
       onWhy: () => formatDecisionTrail(decisionLogRef.current),
+      onRate: (args) => {
+        const cascade = cascadeRef.current;
+        if (!cascade) return "Not ready yet.";
+        const rating = (args[0] ?? "").toLowerCase();
+        if (rating !== "good" && rating !== "bad") {
+          return "Usage: /rate good | bad";
+        }
+        const recorded = cascade.rateLastRun(rating);
+        if (!recorded) return "Nothing to rate \u2014 run a task first, or auto-routing is not enabled.";
+        return rating === "good" ? "\u2714 Rated good \u2014 models used for this task type got a boost." : "\u2714 Rated bad \u2014 models used for this task type were penalised. Auto-routing will try alternatives next time.";
+      },
       onComms: () => {
         dispatch({ type: "TOGGLE_COMMS" });
         return state.showComms ? "Agent comms feed hidden. /comms to bring it back." : "Agent comms feed enabled \u2014 agent-to-agent traffic will appear during runs.";
@@ -14504,6 +14560,31 @@ var DashboardSocket = class {
         const { sessionId } = normalizeSessionSubscriptionPayload(payload);
         socket.leave(`session:${sessionId}`);
       });
+      socket.on("session:rate", (payload) => {
+        const sessionId = typeof payload?.sessionId === "string" ? payload.sessionId : "";
+        const rating = payload?.rating === "good" || payload?.rating === "bad" ? payload.rating : null;
+        if (sessionId && rating) {
+          this.io.emit("session:rate", { sessionId, rating });
+        }
+      });
+    });
+  }
+  onSessionRate(callback) {
+    this.io.on("connection", (socket) => {
+      socket.on("session:rate", (payload) => {
+        const sessionId = typeof payload?.sessionId === "string" ? payload.sessionId : "";
+        const rating = payload?.rating === "good" || payload?.rating === "bad" ? payload.rating : null;
+        if (sessionId && rating) callback(sessionId, rating);
+      });
+    });
+  }
+  onConfigUpdate(callback) {
+    this.io.on("connection", (socket) => {
+      socket.on("config:update", (payload) => {
+        if (typeof payload === "object" && payload !== null) {
+          callback(payload);
+        }
+      });
     });
   }
   close() {
@@ -14523,6 +14604,7 @@ var DashboardServer = class {
   store;
   globalStore = null;
   broadcastTimer = null;
+  activeSessions = /* @__PURE__ */ new Map();
   port;
   host;
   workspacePath;
@@ -14542,6 +14624,30 @@ var DashboardServer = class {
     });
     this.setupMiddleware();
     this.setupRoutes();
+    this.socket.onSessionRate((sessionId, rating) => {
+      this.activeSessions.get(sessionId)?.rateLastRun(rating);
+    });
+    this.socket.onConfigUpdate((data) => {
+      if (data.keys) {
+        for (const [type, apiKey] of Object.entries(data.keys)) {
+          if (!apiKey) continue;
+          const provider = this.config.providers.find((p) => p.type === type);
+          if (provider) provider.apiKey = apiKey;
+          else this.config.providers.push({ type, apiKey });
+        }
+      }
+      if (data.models) {
+        this.config.models = { ...this.config.models, ...data.models };
+      }
+      if (data.budget) {
+        if (typeof data.budget.maxCostPerRun === "number") {
+          this.config.budget.maxCostPerRunUsd = data.budget.maxCostPerRun;
+        }
+        if (data.budget.autoBias === "balanced" || data.budget.autoBias === "quality" || data.budget.autoBias === "cost") {
+          this.config.autoBias = data.budget.autoBias;
+        }
+      }
+    });
   }
   async start() {
     const isLoopback = this.host === "127.0.0.1" || this.host === "::1" || this.host === "localhost";
@@ -14995,6 +15101,7 @@ var DashboardServer = class {
       res.json({ sessionId, status: "ACTIVE" });
       void (async () => {
         const cascade = new Cascade(this.config, this.workspacePath, this.store);
+        this.activeSessions.set(sessionId, cascade);
         cascade.on("stream:token", (e) => {
           this.socket.broadcast("stream:token", { sessionId, tierId: e.tierId, text: e.text });
           this.socket.broadcastToRoom(`session:${sessionId}`, "stream:token", { sessionId, tierId: e.tierId, text: e.text });
@@ -15023,6 +15130,8 @@ var DashboardServer = class {
             sessionId,
             error: err instanceof Error ? err.message : String(err)
           });
+        } finally {
+          this.activeSessions.delete(sessionId);
         }
       })();
     });
@@ -15474,6 +15583,44 @@ async function telemetryCommand(action) {
   }
   console.log();
 }
+var TASK_TYPES = ["code", "analysis", "creative", "data", "mixed"];
+async function statsCommand() {
+  const tracker = new ModelPerformanceTracker();
+  await tracker.load();
+  const all = tracker.getAll();
+  if (all.size === 0) {
+    console.log(chalk11__default.default.dim("\n  No routing history yet \u2014 run some tasks first.\n"));
+    return;
+  }
+  console.log(chalk11__default.default.magenta("\n  \u25C8 Auto-Routing History\n"));
+  console.log(chalk11__default.default.dim("  Per-task-type model performance learned from past runs.\n"));
+  for (const taskType of TASK_TYPES) {
+    const entries = [];
+    for (const [key, stat] of all) {
+      if (!key.endsWith(`:${taskType}`)) continue;
+      const modelId = key.slice(0, -(taskType.length + 1));
+      const successRate = stat.sampleCount > 0 ? stat.successCount / stat.sampleCount : 0;
+      const avgCostUsd = stat.sampleCount > 0 ? stat.totalCostUsd / stat.sampleCount : 0;
+      entries.push({ modelId, successRate, samples: stat.sampleCount, avgCostUsd });
+    }
+    if (entries.length === 0) continue;
+    entries.sort((a, b) => b.successRate - a.successRate || b.samples - a.samples);
+    console.log(chalk11__default.default.bold(`  ${taskType.toUpperCase()}`));
+    const header = `  ${"Model".padEnd(36)} ${"Success".padEnd(9)} ${"Samples".padEnd(9)} Avg cost`;
+    console.log(chalk11__default.default.dim(header));
+    console.log(chalk11__default.default.dim("  " + "\u2500".repeat(62)));
+    for (const e of entries) {
+      const pct = `${Math.round(e.successRate * 100)}%`;
+      const cost = e.avgCostUsd < 1e-4 ? "<$0.0001" : `$${e.avgCostUsd.toFixed(4)}`;
+      const color = e.successRate >= 0.8 ? chalk11__default.default.green : e.successRate >= 0.5 ? chalk11__default.default.yellow : chalk11__default.default.red;
+      console.log(
+        `  ${e.modelId.padEnd(36)} ${color(pct.padEnd(9))} ${String(e.samples).padEnd(9)} ${chalk11__default.default.dim(cost)}`
+      );
+    }
+    console.log();
+  }
+  console.log(chalk11__default.default.dim("  tip: use /rate good | bad after a task to improve these scores.\n"));
+}
 // src/cli/index.ts
 dotenv__default.default.config();
@@ -15573,6 +15720,9 @@ program.command("export").description("Export a session conversation to Markdown
     output: opts.output
   });
 });
+program.command("stats").description("Show auto-routing history: which models work best per task type").action(async () => {
+  await statsCommand();
+});
 async function startRepl(options) {
   const workspacePath = options.workspace ?? process.cwd();
   printBanner();