cascade-ai 0.9.7 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -691,6 +691,13 @@ declare class ModelSelector {
691
691
  private resolveDynamicModel;
692
692
  }
693
693
 
694
+ interface ModelStat {
695
+ successCount: number;
696
+ failureCount: number;
697
+ totalRetries: number;
698
+ totalCostUsd: number;
699
+ sampleCount: number;
700
+ }
694
701
  declare class ModelPerformanceTracker {
695
702
  private stats;
696
703
  private readonly statsFile;
@@ -699,6 +706,13 @@ declare class ModelPerformanceTracker {
699
706
  load(): Promise<void>;
700
707
  save(): Promise<void>;
701
708
  record(modelId: string, taskType: TaskType, outcome: 'success' | 'failure', retries?: number, costUsd?: number): void;
709
+ /**
710
+ * Record an explicit user rating (good/bad). Counts as 3 automatic samples
711
+ * so user feedback carries significantly more weight than auto-detected outcomes.
712
+ */
713
+ recordExplicit(modelId: string, taskType: TaskType, rating: 'good' | 'bad', costUsd?: number): void;
714
+ /** Returns all stats keyed by "modelId:taskType" — used by `cascade stats`. */
715
+ getAll(): Map<string, ModelStat>;
702
716
  /**
703
717
  * Returns 0.05–1.0; defaults to 0.5 (neutral prior) when no history exists.
704
718
  * High retry counts penalise the score.
@@ -754,6 +768,12 @@ declare class TaskAnalyzer {
754
768
  * during this session and persist stats to disk.
755
769
  */
756
770
  recordRunOutcome(outcome: 'success' | 'failure', costByTier: Record<string, number>): void;
771
+ /**
772
+ * Record an explicit user rating (good/bad) for the last run's selected models.
773
+ * Explicit ratings carry 3× the weight of auto-detected outcomes.
774
+ * Does NOT clear lastSelectedModels — the auto record already did that.
775
+ */
776
+ recordExplicitRating(rating: 'good' | 'bad'): boolean;
757
777
  private scoreModel;
758
778
  private costEfficiency;
759
779
  private taskMatchScore;
@@ -1521,6 +1541,13 @@ declare class Cascade extends EventEmitter {
1521
1541
  resumeRun(opts?: {
1522
1542
  maxTokens?: number;
1523
1543
  }): Promise<CascadeRunResult | null>;
1544
+ /**
1545
+ * Record an explicit user rating for the last completed run.
1546
+ * Explicit ratings carry 3× the weight of auto-detected outcomes so user
1547
+ * feedback meaningfully shifts future routing decisions.
1548
+ * Returns false when called before any task has run in this session.
1549
+ */
1550
+ rateLastRun(rating: 'good' | 'bad'): boolean;
1524
1551
  /**
1525
1552
  * Rough pre-execution cost estimate for a plan: ~3 T2 calls per section
1526
1553
  * plus ~4 T3 calls per subtask at typical token volumes. A ballpark for
@@ -1988,6 +2015,15 @@ declare class DashboardSocket {
1988
2015
  emitApprovalRequest(request: PermissionRequest): void;
1989
2016
  onApprovalResponse(callback: (data: PermissionDecisionPayload) => void): void;
1990
2017
  private setupHandlers;
2018
+ onSessionRate(callback: (sessionId: string, rating: 'good' | 'bad') => void): void;
2019
+ onConfigUpdate(callback: (data: {
2020
+ keys?: Record<string, string>;
2021
+ models?: Record<string, string>;
2022
+ budget?: {
2023
+ maxCostPerRun?: number;
2024
+ autoBias?: string;
2025
+ };
2026
+ }) => void): void;
1991
2027
  close(): void;
1992
2028
  }
1993
2029
 
@@ -2000,6 +2036,7 @@ declare class DashboardServer {
2000
2036
  private store;
2001
2037
  private globalStore;
2002
2038
  private broadcastTimer;
2039
+ private activeSessions;
2003
2040
  private port;
2004
2041
  private host;
2005
2042
  private workspacePath;
package/dist/index.d.ts CHANGED
@@ -691,6 +691,13 @@ declare class ModelSelector {
691
691
  private resolveDynamicModel;
692
692
  }
693
693
 
694
+ interface ModelStat {
695
+ successCount: number;
696
+ failureCount: number;
697
+ totalRetries: number;
698
+ totalCostUsd: number;
699
+ sampleCount: number;
700
+ }
694
701
  declare class ModelPerformanceTracker {
695
702
  private stats;
696
703
  private readonly statsFile;
@@ -699,6 +706,13 @@ declare class ModelPerformanceTracker {
699
706
  load(): Promise<void>;
700
707
  save(): Promise<void>;
701
708
  record(modelId: string, taskType: TaskType, outcome: 'success' | 'failure', retries?: number, costUsd?: number): void;
709
+ /**
710
+ * Record an explicit user rating (good/bad). Counts as 3 automatic samples
711
+ * so user feedback carries significantly more weight than auto-detected outcomes.
712
+ */
713
+ recordExplicit(modelId: string, taskType: TaskType, rating: 'good' | 'bad', costUsd?: number): void;
714
+ /** Returns all stats keyed by "modelId:taskType" — used by `cascade stats`. */
715
+ getAll(): Map<string, ModelStat>;
702
716
  /**
703
717
  * Returns 0.05–1.0; defaults to 0.5 (neutral prior) when no history exists.
704
718
  * High retry counts penalise the score.
@@ -754,6 +768,12 @@ declare class TaskAnalyzer {
754
768
  * during this session and persist stats to disk.
755
769
  */
756
770
  recordRunOutcome(outcome: 'success' | 'failure', costByTier: Record<string, number>): void;
771
+ /**
772
+ * Record an explicit user rating (good/bad) for the last run's selected models.
773
+ * Explicit ratings carry 3× the weight of auto-detected outcomes.
774
+ * Does NOT clear lastSelectedModels — the auto record already did that.
775
+ */
776
+ recordExplicitRating(rating: 'good' | 'bad'): boolean;
757
777
  private scoreModel;
758
778
  private costEfficiency;
759
779
  private taskMatchScore;
@@ -1521,6 +1541,13 @@ declare class Cascade extends EventEmitter {
1521
1541
  resumeRun(opts?: {
1522
1542
  maxTokens?: number;
1523
1543
  }): Promise<CascadeRunResult | null>;
1544
+ /**
1545
+ * Record an explicit user rating for the last completed run.
1546
+ * Explicit ratings carry 3× the weight of auto-detected outcomes so user
1547
+ * feedback meaningfully shifts future routing decisions.
1548
+ * Returns false when called before any task has run in this session.
1549
+ */
1550
+ rateLastRun(rating: 'good' | 'bad'): boolean;
1524
1551
  /**
1525
1552
  * Rough pre-execution cost estimate for a plan: ~3 T2 calls per section
1526
1553
  * plus ~4 T3 calls per subtask at typical token volumes. A ballpark for
@@ -1988,6 +2015,15 @@ declare class DashboardSocket {
1988
2015
  emitApprovalRequest(request: PermissionRequest): void;
1989
2016
  onApprovalResponse(callback: (data: PermissionDecisionPayload) => void): void;
1990
2017
  private setupHandlers;
2018
+ onSessionRate(callback: (sessionId: string, rating: 'good' | 'bad') => void): void;
2019
+ onConfigUpdate(callback: (data: {
2020
+ keys?: Record<string, string>;
2021
+ models?: Record<string, string>;
2022
+ budget?: {
2023
+ maxCostPerRun?: number;
2024
+ autoBias?: string;
2025
+ };
2026
+ }) => void): void;
1991
2027
  close(): void;
1992
2028
  }
1993
2029
 
@@ -2000,6 +2036,7 @@ declare class DashboardServer {
2000
2036
  private store;
2001
2037
  private globalStore;
2002
2038
  private broadcastTimer;
2039
+ private activeSessions;
2003
2040
  private port;
2004
2041
  private host;
2005
2042
  private workspacePath;
package/dist/index.js CHANGED
@@ -8018,6 +8018,19 @@ var TaskAnalyzer = class {
8018
8018
  this.lastSelectedModels.clear();
8019
8019
  void this.tracker.save();
8020
8020
  }
8021
+ /**
8022
+ * Record an explicit user rating (good/bad) for the last run's selected models.
8023
+ * Explicit ratings carry 3× the weight of auto-detected outcomes.
8024
+ * Does NOT clear lastSelectedModels — the auto record already did that.
8025
+ */
8026
+ recordExplicitRating(rating) {
8027
+ if (!this.tracker || !this.lastProfile) return false;
8028
+ const taskType = this.lastProfile.type;
8029
+ for (const [, model] of this.lastSelectedModels) {
8030
+ this.tracker.recordExplicit(model.id, taskType, rating, 0);
8031
+ }
8032
+ return this.lastSelectedModels.size > 0;
8033
+ }
8021
8034
  scoreModel(model, profile) {
8022
8035
  const perf = this.tracker?.performanceScore(model.id, profile.type) ?? 0.5;
8023
8036
  const costEff = this.costEfficiency(model, profile.complexity);
@@ -8097,6 +8110,20 @@ var ModelPerformanceTracker = class {
8097
8110
  sampleCount: s.sampleCount + 1
8098
8111
  });
8099
8112
  }
8113
+ /**
8114
+ * Record an explicit user rating (good/bad). Counts as 3 automatic samples
8115
+ * so user feedback carries significantly more weight than auto-detected outcomes.
8116
+ */
8117
+ recordExplicit(modelId, taskType, rating, costUsd = 0) {
8118
+ const outcome = rating === "good" ? "success" : "failure";
8119
+ this.record(modelId, taskType, outcome, 0, costUsd);
8120
+ this.record(modelId, taskType, outcome, 0, 0);
8121
+ this.record(modelId, taskType, outcome, 0, 0);
8122
+ }
8123
+ /** Returns all stats keyed by "modelId:taskType" — used by `cascade stats`. */
8124
+ getAll() {
8125
+ return new Map(this.stats);
8126
+ }
8100
8127
  /**
8101
8128
  * Returns 0.05–1.0; defaults to 0.5 (neutral prior) when no history exists.
8102
8129
  * High retry counts penalise the score.
@@ -8701,6 +8728,18 @@ ${last.partialOutput}` : "");
8701
8728
  if (!prompt) return null;
8702
8729
  return this.run({ prompt });
8703
8730
  }
8731
+ /**
8732
+ * Record an explicit user rating for the last completed run.
8733
+ * Explicit ratings carry 3× the weight of auto-detected outcomes so user
8734
+ * feedback meaningfully shifts future routing decisions.
8735
+ * Returns false when called before any task has run in this session.
8736
+ */
8737
+ rateLastRun(rating) {
8738
+ if (!this.taskAnalyzer) return false;
8739
+ const recorded = this.taskAnalyzer.recordExplicitRating(rating);
8740
+ if (recorded) void this.perfTracker?.save();
8741
+ return recorded;
8742
+ }
8704
8743
  /**
8705
8744
  * Rough pre-execution cost estimate for a plan: ~3 T2 calls per section
8706
8745
  * plus ~4 T3 calls per subtask at typical token volumes. A ballpark for
@@ -10497,6 +10536,31 @@ var DashboardSocket = class {
10497
10536
  const { sessionId } = normalizeSessionSubscriptionPayload(payload);
10498
10537
  socket.leave(`session:${sessionId}`);
10499
10538
  });
10539
+ socket.on("session:rate", (payload) => {
10540
+ const sessionId = typeof payload?.sessionId === "string" ? payload.sessionId : "";
10541
+ const rating = payload?.rating === "good" || payload?.rating === "bad" ? payload.rating : null;
10542
+ if (sessionId && rating) {
10543
+ this.io.emit("session:rate", { sessionId, rating });
10544
+ }
10545
+ });
10546
+ });
10547
+ }
10548
+ onSessionRate(callback) {
10549
+ this.io.on("connection", (socket) => {
10550
+ socket.on("session:rate", (payload) => {
10551
+ const sessionId = typeof payload?.sessionId === "string" ? payload.sessionId : "";
10552
+ const rating = payload?.rating === "good" || payload?.rating === "bad" ? payload.rating : null;
10553
+ if (sessionId && rating) callback(sessionId, rating);
10554
+ });
10555
+ });
10556
+ }
10557
+ onConfigUpdate(callback) {
10558
+ this.io.on("connection", (socket) => {
10559
+ socket.on("config:update", (payload) => {
10560
+ if (typeof payload === "object" && payload !== null) {
10561
+ callback(payload);
10562
+ }
10563
+ });
10500
10564
  });
10501
10565
  }
10502
10566
  close() {
@@ -10513,6 +10577,7 @@ var DashboardServer = class {
10513
10577
  store;
10514
10578
  globalStore = null;
10515
10579
  broadcastTimer = null;
10580
+ activeSessions = /* @__PURE__ */ new Map();
10516
10581
  port;
10517
10582
  host;
10518
10583
  workspacePath;
@@ -10532,6 +10597,30 @@ var DashboardServer = class {
10532
10597
  });
10533
10598
  this.setupMiddleware();
10534
10599
  this.setupRoutes();
10600
+ this.socket.onSessionRate((sessionId, rating) => {
10601
+ this.activeSessions.get(sessionId)?.rateLastRun(rating);
10602
+ });
10603
+ this.socket.onConfigUpdate((data) => {
10604
+ if (data.keys) {
10605
+ for (const [type, apiKey] of Object.entries(data.keys)) {
10606
+ if (!apiKey) continue;
10607
+ const provider = this.config.providers.find((p) => p.type === type);
10608
+ if (provider) provider.apiKey = apiKey;
10609
+ else this.config.providers.push({ type, apiKey });
10610
+ }
10611
+ }
10612
+ if (data.models) {
10613
+ this.config.models = { ...this.config.models, ...data.models };
10614
+ }
10615
+ if (data.budget) {
10616
+ if (typeof data.budget.maxCostPerRun === "number") {
10617
+ this.config.budget.maxCostPerRunUsd = data.budget.maxCostPerRun;
10618
+ }
10619
+ if (data.budget.autoBias === "balanced" || data.budget.autoBias === "quality" || data.budget.autoBias === "cost") {
10620
+ this.config.autoBias = data.budget.autoBias;
10621
+ }
10622
+ }
10623
+ });
10535
10624
  }
10536
10625
  async start() {
10537
10626
  const isLoopback = this.host === "127.0.0.1" || this.host === "::1" || this.host === "localhost";
@@ -10985,6 +11074,7 @@ var DashboardServer = class {
10985
11074
  res.json({ sessionId, status: "ACTIVE" });
10986
11075
  void (async () => {
10987
11076
  const cascade = new Cascade(this.config, this.workspacePath, this.store);
11077
+ this.activeSessions.set(sessionId, cascade);
10988
11078
  cascade.on("stream:token", (e) => {
10989
11079
  this.socket.broadcast("stream:token", { sessionId, tierId: e.tierId, text: e.text });
10990
11080
  this.socket.broadcastToRoom(`session:${sessionId}`, "stream:token", { sessionId, tierId: e.tierId, text: e.text });
@@ -11013,6 +11103,8 @@ var DashboardServer = class {
11013
11103
  sessionId,
11014
11104
  error: err instanceof Error ? err.message : String(err)
11015
11105
  });
11106
+ } finally {
11107
+ this.activeSessions.delete(sessionId);
11016
11108
  }
11017
11109
  })();
11018
11110
  });