cascade-ai 0.4.0 → 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -14,6 +14,10 @@ interface ModelInfo {
14
14
  supportsStreaming: boolean;
15
15
  isLocal: boolean;
16
16
  minSizeB?: number;
17
+ /** Tool-use capability. False for Ollama; true for all cloud providers. */
18
+ supportsToolUse?: boolean;
19
+ /** Self-declared or API-sourced specialization categories. */
20
+ specializations?: string[];
17
21
  }
18
22
  interface ProviderConfig {
19
23
  type: ProviderType;
@@ -23,6 +27,14 @@ interface ProviderConfig {
23
27
  deploymentName?: string;
24
28
  apiVersion?: string;
25
29
  model?: string;
30
+ /**
31
+ * OAuth bearer token (e.g. a Claude Code subscription token) used instead
32
+ * of an API key. When set on an Anthropic provider, the request uses
33
+ * `Authorization: Bearer` + the oauth beta header rather than `x-api-key`.
34
+ */
35
+ authToken?: string;
36
+ /** Where an adopted credential came from, e.g. "Claude Code". Informational. */
37
+ credentialSource?: string;
26
38
  }
27
39
  interface StreamChunk {
28
40
  text: string;
@@ -43,6 +55,15 @@ interface GenerateOptions {
43
55
  tools?: ToolDefinition[];
44
56
  images?: ImageAttachment[];
45
57
  stream?: boolean;
58
+ /** Abort signal — when it fires, the provider aborts the in-flight request (instant cancel). */
59
+ signal?: AbortSignal;
60
+ /**
61
+ * Per-call model override. When set, this exact model is used for the call
62
+ * instead of the tier's default — lets Cascade Auto route each subtask to the
63
+ * best model for its type without disturbing the shared per-tier model that
64
+ * concurrent workers rely on. Ignored when a vision model is required.
65
+ */
66
+ model?: ModelInfo;
46
67
  }
47
68
  interface GenerateResult {
48
69
  content: string;
@@ -197,6 +218,8 @@ interface T3ResultPayload {
197
218
  issues: string[];
198
219
  peerSyncsUsed: string[];
199
220
  correctionAttempts: number;
221
+ /** Sibling workers this T3 asked its T2 to spawn (T3→T2 reinforcement request). */
222
+ reinforcements?: T2ToT3Assignment[];
200
223
  }
201
224
  interface T3Result extends T3ResultPayload {
202
225
  }
@@ -215,7 +238,7 @@ interface PeerSyncPayload {
215
238
  content: string | Record<string, unknown>;
216
239
  subtaskId?: string;
217
240
  }
218
- type PeerSyncType = 'SHARE_OUTPUT' | 'RESOLVE_CONFLICT' | 'DIVIDE_WORK' | 'CHECK_ASSUMPTION' | 'SIGNAL_READY';
241
+ type PeerSyncType = 'SHARE_OUTPUT' | 'RESOLVE_CONFLICT' | 'DIVIDE_WORK' | 'CHECK_ASSUMPTION' | 'SIGNAL_READY' | 'TOOL_CREATED' | 'COORDINATION';
219
242
  interface PeerMessage {
220
243
  fromId: string;
221
244
  toId: string;
@@ -225,6 +248,14 @@ interface PeerMessage {
225
248
  payload: unknown;
226
249
  timestamp: string;
227
250
  }
251
+ interface PeerMessageEvent {
252
+ fromId: string;
253
+ toId?: string;
254
+ syncType: PeerSyncType;
255
+ payload?: string;
256
+ timestamp: string;
257
+ sessionId: string;
258
+ }
228
259
  interface Session {
229
260
  id: string;
230
261
  title: string;
@@ -337,6 +368,54 @@ interface CascadeConfig {
337
368
  budget: BudgetConfig;
338
369
  theme: string;
339
370
  workspace: WorkspaceConfig;
371
+ cascadeAuto?: boolean;
372
+ /** Cascade Auto trade-off bias when picking a model. Default: 'balanced'. */
373
+ autoBias?: 'balanced' | 'quality' | 'cost';
374
+ /** Public-benchmark data source settings for Cascade Auto. */
375
+ benchmarks?: BenchmarksConfig;
376
+ enableToolCreation?: boolean;
377
+ /** Persist runtime-generated tools and reload them on startup (untrusted). Default: true. */
378
+ persistDynamicTools?: boolean;
379
+ plugins?: string[];
380
+ localConcurrency?: number;
381
+ localInferenceTimeoutMs?: number;
382
+ /** Timeout (ms) for a single cloud LLM call (streaming or not). Default: 120000. */
383
+ cloudInferenceTimeoutMs?: number;
384
+ /** Timeout (ms) for a tool-approval decision; denies (never auto-approves) on timeout. Default: 600000. */
385
+ approvalTimeoutMs?: number;
386
+ /**
387
+ * Pause for user approval of the plan. 'never' (default), 'complex' (Complex
388
+ * runs only; 'always' is an alias), or 'all' (Moderate + Complex).
389
+ */
390
+ planApproval?: 'never' | 'complex' | 'all' | 'always';
391
+ /** Plan-review behaviour for the boardroom gate. */
392
+ planReview?: PlanReviewConfig;
393
+ /** Autonomy level: 'manual' (default, prompts) or 'auto' (hands-off within guardrails). */
394
+ autonomy?: 'manual' | 'auto';
395
+ /** Max corrective re-plan passes before T1 returns the best partial. Default: 2. */
396
+ maxReplanPasses?: number;
397
+ /** Reflection / self-critique: goal-alignment critique + revise after self-test. Off by default. */
398
+ reflection?: {
399
+ enabled?: boolean;
400
+ maxRounds?: number;
401
+ };
402
+ /** T3 wave execution: 'auto' (sequential for local, parallel for cloud), or force one. Default: 'auto'. */
403
+ t3Execution?: 'auto' | 'parallel' | 'sequential';
404
+ /** T3→T2 reinforcement: let a worker ask its manager to spawn sibling workers. Off by default. */
405
+ reinforcements?: {
406
+ enabled?: boolean;
407
+ maxPerSection?: number;
408
+ };
409
+ /** Render the TUI in the alternate screen buffer (vim-style). Default: false. */
410
+ altScreen?: boolean;
411
+ }
412
+ interface PlanReviewConfig {
413
+ /** A reviewer model critiques the plan (gaps/risks/cost) before you see it. Default: false. */
414
+ autoReviewer?: boolean;
415
+ /** Allow editing the plan (drop sections) in the approval dialog. Default: true. */
416
+ editable?: boolean;
417
+ /** Steering-note → re-plan → re-ask rounds allowed before proceeding. Default: 5. */
418
+ maxRevisionRounds?: number;
340
419
  }
341
420
  interface ModelOverrides {
342
421
  t1?: string;
@@ -344,6 +423,16 @@ interface ModelOverrides {
344
423
  t3?: string;
345
424
  vision?: string;
346
425
  }
426
+ interface BenchmarksConfig {
427
+ /** Fetch current quality scores from a public source. Default: true. */
428
+ live?: boolean;
429
+ /** How long a fetched snapshot stays fresh before re-fetching (hours). Default: 24. */
430
+ refreshHours?: number;
431
+ /** Override the quality-benchmark source URL. When unset, the bundled GitHub-raw snapshot is used. */
432
+ sourceUrl?: string;
433
+ /** Fetch current per-token prices from OpenRouter (free, no key). Default: true. */
434
+ pricingLive?: boolean;
435
+ }
347
436
  interface ToolsConfig {
348
437
  shellAllowlist: string[];
349
438
  shellBlocklist: string[];
@@ -366,12 +455,14 @@ interface HooksConfig {
366
455
  postTask?: HookDefinition[];
367
456
  }
368
457
  interface HookDefinition {
458
+ name?: string;
369
459
  command: string;
370
460
  tools?: string[];
371
461
  timeout?: number;
372
462
  }
373
463
  interface DashboardConfig {
374
464
  port: number;
465
+ host: string;
375
466
  auth: boolean;
376
467
  teamMode: 'single' | 'multi';
377
468
  secret?: string;
@@ -394,6 +485,10 @@ interface TierLimits {
394
485
  interface BudgetConfig {
395
486
  dailyBudgetUsd?: number;
396
487
  sessionBudgetUsd?: number;
488
+ /** Hard per-task token ceiling. Resets each run. Default 200k. */
489
+ maxTokensPerRun?: number;
490
+ /** Optional hard per-task cost ceiling (USD). */
491
+ maxCostPerRunUsd?: number;
397
492
  warnAtPct: number;
398
493
  }
399
494
  interface WorkspaceConfig {
@@ -423,7 +518,7 @@ interface ThemeColors {
423
518
  t2Color: string;
424
519
  t3Color: string;
425
520
  }
426
- type CascadeEventType = 'task:start' | 'task:complete' | 'task:error' | 'tier:status' | 'tier:result' | 'stream:token' | 'stream:done' | 'tool:approval-request' | 'tool:approval-response' | 'tool:execute' | 'tool:result' | 'cost:update' | 'session:save' | 'escalation' | 'peer:sync';
521
+ type CascadeEventType = 'task:start' | 'task:complete' | 'task:error' | 'tier:status' | 'tier:result' | 'tier:root' | 'stream:token' | 'stream:done' | 'tool:approval-request' | 'tool:approval-response' | 'tool:execute' | 'tool:result' | 'tool:call' | 'cost:update' | 'session:save' | 'escalation' | 'peer:sync' | 'peer:message' | 'plan' | 'log' | 'run:cancelled' | 'budget:warning' | 'budget:exceeded' | 'permission:user-required' | 'mcp:approval-required' | 'plan:approval-required';
427
522
  interface CascadeEvent<T = unknown> {
428
523
  type: CascadeEventType;
429
524
  taskId?: string;
@@ -467,6 +562,13 @@ interface PermissionRequest {
467
562
  sectionContext: string;
468
563
  /** What T1's overall task goal is (injected when escalated to T1) */
469
564
  taskContext?: string;
565
+ /**
566
+ * When true, bypass the session approval cache so this request always reaches
567
+ * a fresh decision. Set for UNTRUSTED runtime tools (loaded from disk or
568
+ * received from a peer) so a prior `always` approval cannot silently
569
+ * auto-approve a later dangerous action.
570
+ */
571
+ forceReprompt?: boolean;
470
572
  }
471
573
  /**
472
574
  * A decision made at any tier (T2, T1, or USER) about a PermissionRequest.
@@ -558,11 +660,25 @@ declare class ModelSelector {
558
660
  private availableModels;
559
661
  constructor(availableProviders: Set<ProviderType>);
560
662
  addDynamicModel(model: ModelInfo): void;
663
+ /**
664
+ * Permanently drop a model from the available set for this session. Used by
665
+ * the router's 404 / "model not found" self-heal so a dead id is never
666
+ * selected again after it fails once.
667
+ */
668
+ removeModel(id: string): void;
669
+ /** Look up an available model by exact id (post-discovery/pricing lookups). */
670
+ getModelById(id: string): ModelInfo | undefined;
561
671
  getAvailableModelsForProvider(provider: ProviderType): ModelInfo[];
562
672
  selectForTier(tier: TierRole, overrideModelId?: string, requireVision?: boolean): ModelInfo | null;
563
673
  selectVisionModel(): ModelInfo | null;
564
674
  getNextFallback(currentModelId: string, tier: TierRole): ModelInfo | null;
565
675
  private getPriorityList;
676
+ getAllAvailableModels(): ModelInfo[];
677
+ /**
678
+ * Returns all available models eligible for the given tier, ordered by the
679
+ * tier's priority chain. Use this as the candidate set for scored selection.
680
+ */
681
+ getCandidatesForTier(tier: TierRole): ModelInfo[];
566
682
  isProviderAvailable(provider: ProviderType): boolean;
567
683
  markProviderUnavailable(provider: ProviderType): void;
568
684
  /**
@@ -575,6 +691,209 @@ declare class ModelSelector {
575
691
  private resolveDynamicModel;
576
692
  }
577
693
 
694
+ declare class ModelPerformanceTracker {
695
+ private stats;
696
+ private readonly statsFile;
697
+ private loaded;
698
+ constructor(statsFile?: string);
699
+ load(): Promise<void>;
700
+ save(): Promise<void>;
701
+ record(modelId: string, taskType: TaskType, outcome: 'success' | 'failure', retries?: number, costUsd?: number): void;
702
+ /**
703
+ * Returns 0.05–1.0; defaults to 0.5 (neutral prior) when no history exists.
704
+ * High retry counts penalise the score.
705
+ */
706
+ performanceScore(modelId: string, taskType: TaskType): number;
707
+ /**
708
+ * Returns 0.1–1.0. Cheaper models score higher, with the penalty scaled
709
+ * down for complex tasks (where capability matters more than cost).
710
+ *
711
+ * blended cost = input + 2 × output (output tokens are typically pricier).
712
+ * normalised over $0.05 blended as the "expensive" ceiling.
713
+ */
714
+ costEfficiencyScore(model: ModelInfo, complexity: 1 | 2 | 3 | 4 | 5): number;
715
+ }
716
+
717
+ type TaskType = 'code' | 'analysis' | 'creative' | 'data' | 'mixed';
718
+ /** Cascade Auto cost/quality trade-off bias. See CascadeConfig.autoBias. */
719
+ type AutoBias = 'balanced' | 'quality' | 'cost';
720
+ interface TaskProfile {
721
+ type: TaskType;
722
+ /** 1 = trivial, 5 = research-grade */
723
+ complexity: 1 | 2 | 3 | 4 | 5;
724
+ requiresReasoning: boolean;
725
+ requiresVision: boolean;
726
+ estimatedTokens: number;
727
+ /** 0.0–1.0 heuristic confidence; below 0.7 triggers AI fallback */
728
+ confidence: number;
729
+ }
730
+ declare class TaskAnalyzer {
731
+ private tracker?;
732
+ private bias;
733
+ private lastProfile;
734
+ private lastSelectedModels;
735
+ constructor(tracker?: ModelPerformanceTracker, bias?: AutoBias);
736
+ setTracker(tracker: ModelPerformanceTracker): void;
737
+ /** Change the cost/quality bias at runtime (e.g. when config reloads). */
738
+ setBias(bias: AutoBias): void;
739
+ /** Returns the TaskProfile from the most recent analyze() call — used for outcome recording. */
740
+ getLastProfile(): TaskProfile | null;
741
+ /**
742
+ * Analyze a prompt and return a TaskProfile using pure heuristics.
743
+ * Low confidence prompts fall back to a conservative mixed/moderate profile.
744
+ */
745
+ analyze(prompt: string): Promise<TaskProfile>;
746
+ /**
747
+ * Select the optimal model for a given tier.
748
+ * Scores tier-eligible models using cost efficiency + historical performance.
749
+ * Falls back to the priority-list default when no candidates have history.
750
+ */
751
+ selectModel(prompt: string, tier: TierRole, selector: ModelSelector): Promise<ModelInfo | null>;
752
+ /**
753
+ * Record the outcome of a completed run across all tiers that were selected
754
+ * during this session and persist stats to disk.
755
+ */
756
+ recordRunOutcome(outcome: 'success' | 'failure', costByTier: Record<string, number>): void;
757
+ private scoreModel;
758
+ private costEfficiency;
759
+ private taskMatchScore;
760
+ /** Clear the analysis cache (call between sessions). */
761
+ static clearCache(): void;
762
+ }
763
+
764
+ declare class MemoryStore {
765
+ private db;
766
+ constructor(dbPath: string);
767
+ private writeQueue;
768
+ private isProcessingQueue;
769
+ private processQueue;
770
+ private enqueueWrite;
771
+ createSession(session: Session): void;
772
+ updateSession(id: string, updates: Partial<Session>): void;
773
+ getSession(id: string): Session | null;
774
+ listSessions(identityId?: string, limit?: number): Session[];
775
+ deleteSession(id: string): void;
776
+ deleteAllSessions(): void;
777
+ deleteRuntimeSession(sessionId: string): void;
778
+ deleteAllRuntimeNodes(): void;
779
+ branchSession(originalId: string, newId: string): void;
780
+ upsertRuntimeSession(session: RuntimeSession): void;
781
+ listRuntimeSessions(limit?: number): RuntimeSession[];
782
+ upsertRuntimeNode(node: RuntimeNode): void;
783
+ listRuntimeNodes(sessionId?: string, limit?: number): RuntimeNode[];
784
+ addRuntimeNodeLog(log: RuntimeNodeLog): void;
785
+ listRuntimeNodeLogs(sessionId?: string, tierId?: string, limit?: number): RuntimeNodeLog[];
786
+ addMessage(message: StoredMessage): void;
787
+ getSessionMessages(sessionId: string): StoredMessage[];
788
+ searchMessages(query: string, limit?: number): StoredMessage[];
789
+ createIdentity(identity: Identity): void;
790
+ updateIdentity(id: string, updates: Partial<Identity>): void;
791
+ getIdentity(id: string): Identity | null;
792
+ getDefaultIdentity(): Identity | null;
793
+ listIdentities(): Identity[];
794
+ deleteIdentity(id: string): void;
795
+ saveScheduledTask(task: ScheduledTask): void;
796
+ listScheduledTasks(): ScheduledTask[];
797
+ deleteScheduledTask(id: string): void;
798
+ addAuditEntry(entry: AuditEntry): void;
799
+ getAuditLog(sessionId: string, limit?: number): AuditEntry[];
800
+ addFileSnapshot(sessionId: string, filePath: string, content: string): void;
801
+ getLatestFileSnapshots(sessionId: string): Array<{
802
+ filePath: string;
803
+ content: string;
804
+ }>;
805
+ upsertCachedModel(model: ModelInfo): void;
806
+ getCachedModels(provider?: ProviderType): ModelInfo[];
807
+ clearModelCache(provider?: ProviderType): void;
808
+ getCacheAge(): number;
809
+ saveModelProfile(modelId: string, provider: ProviderType, specializations: string[]): void;
810
+ getModelProfile(modelId: string, provider: ProviderType): ModelInfo | undefined;
811
+ getProfiledModelIds(): string[];
812
+ private toolResultCache;
813
+ private static CACHEABLE_TOOLS;
814
+ private static TOOL_TTL_MS;
815
+ /**
816
+ * Returns a cached tool result, or null if not cached / expired.
817
+ */
818
+ getToolResult(toolName: string, input: Record<string, unknown>): string | null;
819
+ /**
820
+ * Stores a tool result in the in-memory cache.
821
+ * Only caches read-only/safe tools (see CACHEABLE_TOOLS).
822
+ */
823
+ setToolResult(toolName: string, input: Record<string, unknown>, result: string): void;
824
+ /** Invalidate tool cache for a specific tool name, or all tools if omitted. */
825
+ invalidateToolCache(toolName?: string): void;
826
+ close(): void;
827
+ private migrate;
828
+ private deserializeSession;
829
+ private deserializeMessage;
830
+ private deserializeIdentity;
831
+ private deserializeScheduledTask;
832
+ }
833
+
834
+ interface DelegationSavings {
835
+ /** USD saved vs. running every call on the T1 model. 0 when nothing was saved. */
836
+ savedUsd: number;
837
+ /** Percentage of the counterfactual cost that was saved (0–100, one decimal). */
838
+ savedPct: number;
839
+ /** What the session would have cost if every call had used the T1 model. */
840
+ counterfactualUsd: number;
841
+ }
842
+
843
+ type BenchmarkProfile = Partial<Record<Exclude<TaskType, 'mixed'>, number>>;
844
+
845
+ type DataSource = 'live' | 'cache' | 'bundled';
846
+ interface PriceEntry {
847
+ input: number;
848
+ output: number;
849
+ }
850
+ interface LiveDataOptions {
851
+ /** Master switch for live quality fetch. Default: true. */
852
+ live?: boolean;
853
+ /** Master switch for live OpenRouter pricing. Default: true. */
854
+ pricingLive?: boolean;
855
+ /** Hours a fetched snapshot stays fresh before re-fetching. Default: 24. */
856
+ refreshHours?: number;
857
+ /** Override the quality snapshot URL. */
858
+ sourceUrl?: string;
859
+ /** Override the on-disk cache path (tests). */
860
+ cacheFile?: string;
861
+ }
862
+ declare class LiveDataProvider {
863
+ private snapshot;
864
+ private prices;
865
+ private source;
866
+ private fetchedAt;
867
+ private loaded;
868
+ private refreshing;
869
+ private readonly opts;
870
+ constructor(opts?: LiveDataOptions);
871
+ /** Load cached data from disk (cheap, no network). Safe to call repeatedly. */
872
+ load(): Promise<void>;
873
+ /**
874
+ * Refresh from the network if the cache is older than the TTL. Coalesces
875
+ * concurrent callers and never throws — failures keep last-known-good data.
876
+ */
877
+ refresh(force?: boolean): Promise<void>;
878
+ private doRefresh;
879
+ private fetchSnapshot;
880
+ private fetchPrices;
881
+ private saveCache;
882
+ /** Quality profile for a model family, or null when we have no live/cached data. */
883
+ getQualityProfile(family: string): BenchmarkProfile | null;
884
+ /** Current per-1k price for a model id, or null when unknown. */
885
+ getLivePrice(modelId: string): PriceEntry | null;
886
+ /**
887
+ * Returns a price-corrected copy of each model when live pricing is known,
888
+ * leaving the original untouched (so the shared catalog is never mutated).
889
+ */
890
+ applyLivePricing(models: ModelInfo[]): ModelInfo[];
891
+ /** Where the active quality data came from — for /why and `cascade models`. */
892
+ getDataSource(): DataSource;
893
+ getGeneratedAt(): string | null;
894
+ hasLivePricing(): boolean;
895
+ }
896
+
578
897
  interface RouterStats {
579
898
  totalTokens: number;
580
899
  totalCostUsd: number;
@@ -596,6 +915,10 @@ declare class CascadeRouter extends EventEmitter {
596
915
  private tierModels;
597
916
  private config;
598
917
  private sessionCostUsd;
918
+ private runTokens;
919
+ private runCostUsd;
920
+ private runBudgetExceeded;
921
+ private runBudgetExceededReason;
599
922
  /**
600
923
  * Budget state machine — guards against two concurrent `generate()` calls
601
924
  * each firing the warning or both slipping past the hard cap. All
@@ -605,6 +928,13 @@ declare class CascadeRouter extends EventEmitter {
605
928
  private budgetState;
606
929
  private budgetExceededReason;
607
930
  private tpmLimiter;
931
+ private localQueue;
932
+ private taskAnalyzer?;
933
+ private liveData?;
934
+ /** Snapshot of configured/default tier models, taken before Cascade Auto overrides them. */
935
+ private originalTierModels?;
936
+ /** The current run's abort signal — injected into every provider call so a cancel aborts in-flight requests. */
937
+ private runSignal?;
608
938
  /** Thrown when the configured budget is exceeded. */
609
939
  static BudgetExceededError: {
610
940
  new (msg: string): {
@@ -619,16 +949,81 @@ declare class CascadeRouter extends EventEmitter {
619
949
  };
620
950
  constructor();
621
951
  init(config: CascadeConfig): Promise<void>;
952
+ /**
953
+ * Run model specialization profiling in the background.
954
+ * Only profiles models that haven't been profiled yet (cache-first).
955
+ * No-op if store is not provided.
956
+ */
957
+ profileModels(store: MemoryStore): Promise<void>;
958
+ /**
959
+ * Cascade Auto live data: discover/validate real model ids from each cloud
960
+ * provider, then fetch current public quality scores + per-token prices and
961
+ * apply the prices to the available-model set. Best-effort and safe to run in
962
+ * the background — any failure leaves the bundled catalog/benchmarks in effect.
963
+ */
964
+ refreshLiveData(): Promise<void>;
965
+ /** Returns the live-data provider once refreshLiveData has run (UX/insight). */
966
+ getLiveData(): LiveDataProvider | undefined;
967
+ /**
968
+ * Query each available cloud provider's live model list and register the
969
+ * results. Confirms catalog ids still exist and surfaces newly released
970
+ * models without a package upgrade. Mirrors discoverOllamaModels.
971
+ */
972
+ private discoverProviderModels;
973
+ /**
974
+ * Replace available models with live-priced copies and refresh the already
975
+ * resolved tier models so shared-tier cost accounting uses current prices.
976
+ */
977
+ private applyLivePricing;
622
978
  generate(tier: TierRole, options: GenerateOptions, onChunk?: (chunk: StreamChunk) => void, requireVision?: boolean): Promise<GenerateResult>;
623
979
  getModelForTier(tier: TierRole): ModelInfo | undefined;
980
+ /** Reflection settings for workers (config.reflection). Off unless enabled. */
981
+ getReflectionConfig(): {
982
+ enabled: boolean;
983
+ maxRounds: number;
984
+ };
985
+ /** T3→T2 reinforcement settings (config.reinforcements). Off unless enabled. */
986
+ getReinforcementsConfig(): {
987
+ enabled: boolean;
988
+ maxPerSection: number;
989
+ };
990
+ /**
991
+ * Resolved T3 wave execution mode. 'auto' becomes 'sequential' when the T3
992
+ * tier resolves to a LOCAL model (the single-GPU queue serializes anyway, so
993
+ * running them in parallel just thrashes it), and 'parallel' for cloud.
994
+ */
995
+ getT3ExecutionMode(): 'parallel' | 'sequential';
624
996
  /**
625
997
  * Cascade Auto: temporarily override the model for a tier.
626
998
  * Used by TaskAnalyzer to inject task-optimal models before execution.
627
999
  * The override is valid for the current task only — restored by restoreTierModels().
628
1000
  */
629
1001
  overrideTierModel(tier: TierRole, model: ModelInfo): void;
1002
+ /**
1003
+ * Restore tier models to the configured/default baseline captured before the
1004
+ * first Cascade Auto override. Called at the end of each run so `/why`, the
1005
+ * status bar, and the next run reflect the configured models, not stale picks.
1006
+ */
1007
+ restoreTierModels(): void;
1008
+ /** Set (or clear) the current run's abort signal for instant cancellation. */
1009
+ setRunSignal(signal: AbortSignal | undefined): void;
630
1010
  getSelector(): ModelSelector;
1011
+ /** Wire the Cascade Auto task analyzer used for per-subtask model routing. */
1012
+ setTaskAnalyzer(analyzer: TaskAnalyzer): void;
1013
+ /**
1014
+ * Cascade Auto per-subtask routing: pick the benchmark-best model for a
1015
+ * specific subtask's text, scoped to the tier's eligible candidates. Returns
1016
+ * null when Cascade Auto is off (callers then use the shared tier model).
1017
+ * Pure heuristic — no extra LLM call.
1018
+ */
1019
+ selectModelForSubtask(tier: TierRole, text: string): Promise<ModelInfo | null>;
631
1020
  getStats(): RouterStats;
1021
+ /**
1022
+ * What did delegation save? Compares actual spend against the
1023
+ * counterfactual of every call running on the T1 model. This is the
1024
+ * number only a tiered hierarchy can show.
1025
+ */
1026
+ getDelegationSavings(): DelegationSavings;
632
1027
  /**
633
1028
  * Returns a human-readable cost summary broken down by tier.
634
1029
  * Example: { T1: "$0.0120 (2 calls, 1500 tokens)", T2: "$0.0043 (6 calls, 4200 tokens)", ... }
@@ -652,6 +1047,8 @@ declare class CascadeRouter extends EventEmitter {
652
1047
  * Sets (or clears) a runtime session budget cap (USD).
653
1048
  * Pass null to remove the cap.
654
1049
  */
1050
+ /** Raise/set the per-task token cap at runtime (used by /continue resume). */
1051
+ setMaxTokensPerRun(maxTokens: number): void;
655
1052
  setSessionBudget(usd: number | null): void;
656
1053
  /**
657
1054
  * Returns how much of the session budget has been used (USD).
@@ -674,6 +1071,17 @@ declare class CascadeRouter extends EventEmitter {
674
1071
  private createProvider;
675
1072
  private getAnyModelForProvider;
676
1073
  private recordStats;
1074
+ /**
1075
+ * Resets per-run accounting at the start of each `cascade run`. Session
1076
+ * totals and a session-wide budget halt are deliberately preserved; only the
1077
+ * per-task ceiling is cleared so the next task starts with a fresh allowance.
1078
+ */
1079
+ beginRun(): void;
1080
+ /**
1081
+ * Enforce the hard per-task ceiling. Once tripped, the flag makes every
1082
+ * subsequent (and concurrent) generate() call in this run fail fast.
1083
+ */
1084
+ private enforceRunBudget;
677
1085
  /**
678
1086
  * Single point of truth for budget state transitions. Called after each
679
1087
  * recordStats() so warning and hard-stop transitions are evaluated
@@ -721,6 +1129,12 @@ interface McpClientOptions {
721
1129
  trustedServers?: string[];
722
1130
  /** Approval gate invoked when a server is NOT in the trusted list. */
723
1131
  approvalCallback?: McpApprovalCallback;
1132
+ /**
1133
+ * Sink for non-fatal warnings. Hosts with a live TUI must route these
1134
+ * away from the terminal — a raw console write mid-frame corrupts Ink's
1135
+ * rendering. Defaults to console.warn.
1136
+ */
1137
+ onWarn?: (message: string) => void;
724
1138
  }
725
1139
  declare class McpClient {
726
1140
  private static activeProcessPids;
@@ -734,6 +1148,7 @@ declare class McpClient {
734
1148
  private tools;
735
1149
  private trustedServers;
736
1150
  private approvalCallback;
1151
+ private onWarn;
737
1152
  constructor(options?: McpClientOptions);
738
1153
  connect(server: McpServerConfig): Promise<void>;
739
1154
  disconnect(serverName: string): Promise<void>;
@@ -768,7 +1183,7 @@ interface ToolPlugin {
768
1183
  /** Called once when the plugin is registered */
769
1184
  onRegister?: (registry: ToolRegistry) => void;
770
1185
  }
771
- declare class ToolRegistry {
1186
+ declare class ToolRegistry extends EventEmitter {
772
1187
  private tools;
773
1188
  private config;
774
1189
  private ignoreMatcher;
@@ -777,6 +1192,12 @@ declare class ToolRegistry {
777
1192
  private plugins;
778
1193
  constructor(config: ToolsConfig, workspaceRoot?: string);
779
1194
  register(tool: BaseTool): void;
1195
+ /**
1196
+ * Wait until a named tool is registered, resolving immediately if it already exists.
1197
+ * T3 workers can call this after encountering a missing-tool error to resume
1198
+ * automatically once T2 synthesizes the tool.
1199
+ */
1200
+ waitForTool(toolName: string, timeoutMs?: number): Promise<void>;
780
1201
  /**
781
1202
  * Register a ToolPlugin, loading all its tools into the registry.
782
1203
  * Each tool is configured with the current workspace root.
@@ -802,114 +1223,6 @@ declare class ToolRegistry {
802
1223
  private isIgnored;
803
1224
  }
804
1225
 
805
- declare class MemoryStore {
806
- private db;
807
- constructor(dbPath: string);
808
- private writeQueue;
809
- private isProcessingQueue;
810
- private processQueue;
811
- private enqueueWrite;
812
- createSession(session: Session): void;
813
- updateSession(id: string, updates: Partial<Session>): void;
814
- getSession(id: string): Session | null;
815
- listSessions(identityId?: string, limit?: number): Session[];
816
- deleteSession(id: string): void;
817
- deleteAllSessions(): void;
818
- deleteRuntimeSession(sessionId: string): void;
819
- deleteAllRuntimeNodes(): void;
820
- branchSession(originalId: string, newId: string): void;
821
- upsertRuntimeSession(session: RuntimeSession): void;
822
- listRuntimeSessions(limit?: number): RuntimeSession[];
823
- upsertRuntimeNode(node: RuntimeNode): void;
824
- listRuntimeNodes(sessionId?: string, limit?: number): RuntimeNode[];
825
- addRuntimeNodeLog(log: RuntimeNodeLog): void;
826
- listRuntimeNodeLogs(sessionId?: string, tierId?: string, limit?: number): RuntimeNodeLog[];
827
- addMessage(message: StoredMessage): void;
828
- getSessionMessages(sessionId: string): StoredMessage[];
829
- searchMessages(query: string, limit?: number): StoredMessage[];
830
- createIdentity(identity: Identity): void;
831
- updateIdentity(id: string, updates: Partial<Identity>): void;
832
- getIdentity(id: string): Identity | null;
833
- getDefaultIdentity(): Identity | null;
834
- listIdentities(): Identity[];
835
- deleteIdentity(id: string): void;
836
- saveScheduledTask(task: ScheduledTask): void;
837
- listScheduledTasks(): ScheduledTask[];
838
- deleteScheduledTask(id: string): void;
839
- addAuditEntry(entry: AuditEntry): void;
840
- getAuditLog(sessionId: string, limit?: number): AuditEntry[];
841
- addFileSnapshot(sessionId: string, filePath: string, content: string): void;
842
- getLatestFileSnapshots(sessionId: string): Array<{
843
- filePath: string;
844
- content: string;
845
- }>;
846
- upsertCachedModel(model: ModelInfo): void;
847
- getCachedModels(provider?: ProviderType): ModelInfo[];
848
- clearModelCache(provider?: ProviderType): void;
849
- getCacheAge(): number;
850
- private toolResultCache;
851
- private static CACHEABLE_TOOLS;
852
- private static TOOL_TTL_MS;
853
- /**
854
- * Returns a cached tool result, or null if not cached / expired.
855
- */
856
- getToolResult(toolName: string, input: Record<string, unknown>): string | null;
857
- /**
858
- * Stores a tool result in the in-memory cache.
859
- * Only caches read-only/safe tools (see CACHEABLE_TOOLS).
860
- */
861
- setToolResult(toolName: string, input: Record<string, unknown>, result: string): void;
862
- /** Invalidate tool cache for a specific tool name, or all tools if omitted. */
863
- invalidateToolCache(toolName?: string): void;
864
- close(): void;
865
- private migrate;
866
- private deserializeSession;
867
- private deserializeMessage;
868
- private deserializeIdentity;
869
- private deserializeScheduledTask;
870
- }
871
-
872
- declare class Cascade extends EventEmitter {
873
- private router;
874
- private toolRegistry;
875
- private mcpClient;
876
- private config;
877
- private initialized;
878
- private initPromise?;
879
- private store?;
880
- private audit?;
881
- private telemetry;
882
- private taskAnalyzer?;
883
- private toolCreator?;
884
- constructor(config: CascadeConfig, workspacePath: string, store?: MemoryStore);
885
- private initOptionalFeatures;
886
- setStore(store: MemoryStore): void;
887
- /**
888
- * Emit an `mcp:approval-required` event and wait up to 30 s for a listener
889
- * to resolve it via `cascade.resolveMcpApproval(serverName, approved)`.
890
- *
891
- * If no listener is attached (e.g. a non-interactive SDK run), the default
892
- * is to reject — safer than silently spawning an arbitrary subprocess.
893
- */
894
- private pendingMcpApprovals;
895
- private requestMcpApproval;
896
- /** Resolve a pending MCP server approval from a REPL / dashboard listener. */
897
- resolveMcpApproval(serverName: string, approved: boolean): void;
898
- init(): Promise<void>;
899
- private isCasualGreeting;
900
- private looksLikeSimpleArtifactTask;
901
- private determineComplexity;
902
- run(options: CascadeRunOptions): Promise<CascadeRunResult>;
903
- getRouter(): CascadeRouter;
904
- getToolRegistry(): ToolRegistry;
905
- /**
906
- * Tear down MCP connections and flush any pending telemetry so long-lived
907
- * hosts (REPL, SDK embedders) don't leak child processes. Safe to call
908
- * multiple times.
909
- */
910
- close(): Promise<void>;
911
- }
912
-
913
1226
  declare abstract class BaseTier extends EventEmitter {
914
1227
  readonly id: string;
915
1228
  readonly role: TierRole;
@@ -959,6 +1272,13 @@ declare class PermissionEscalator extends EventEmitter {
959
1272
  private t1Evaluator?;
960
1273
  /** Pending user-decision resolvers keyed by request ID */
961
1274
  private pendingUserDecisions;
1275
+ /** ms to wait for a user approval decision before denying for safety. */
1276
+ private readonly approvalTimeoutMs;
1277
+ /** Autonomous mode (autonomy: 'auto'): non-dangerous tools auto-approve. */
1278
+ private autonomous;
1279
+ constructor(approvalTimeoutMs?: number, autonomous?: boolean);
1280
+ /** Toggle autonomous auto-approval at runtime (e.g. from /auto). */
1281
+ setAutonomous(on: boolean): void;
962
1282
  setT2Evaluator(evaluator: T2Evaluator): void;
963
1283
  setT1Evaluator(evaluator: T1Evaluator): void;
964
1284
  /**
@@ -978,22 +1298,84 @@ declare class PermissionEscalator extends EventEmitter {
978
1298
  cancelAllPending(): void;
979
1299
  }
980
1300
 
1301
+ interface GeneratedToolSpec {
1302
+ name: string;
1303
+ description: string;
1304
+ inputSchema: Record<string, unknown>;
1305
+ /** Raw JS function body — receives `input`, `fetch`, and `callTool`. Returns string | Promise<string> */
1306
+ executeCode: string;
1307
+ isDangerous: boolean;
1308
+ /**
1309
+ * Whether this tool's source is trusted (generated in THIS session) vs untrusted
1310
+ * (loaded from disk or received from a peer). Untrusted tools always re-escalate
1311
+ * their dangerous actions. Never persisted as trusted — forced false on reload.
1312
+ */
1313
+ trusted?: boolean;
1314
+ }
981
1315
  declare class ToolCreator {
982
1316
  private router;
983
1317
  private registry;
984
- private createdTools;
985
- constructor(router: CascadeRouter, registry: ToolRegistry);
1318
+ private escalator?;
1319
+ private workspacePath?;
1320
+ /** When false, persisted tools are neither loaded nor written. */
1321
+ private persistEnabled;
1322
+ private logger?;
1323
+ /** name → spec, for persistence, broadcast, and re-registration. */
1324
+ private specs;
1325
+ /** capability fingerprint → tool name, so the same need isn't re-generated. */
1326
+ private capabilityIndex;
1327
+ constructor(router: CascadeRouter, registry: ToolRegistry, workspacePath?: string, persistEnabled?: boolean);
1328
+ setPermissionEscalator(escalator: PermissionEscalator): void;
1329
+ /** Route diagnostics through the host (Cascade) so they survive the Ink TUI. */
1330
+ setLogger(fn: (msg: string) => void): void;
1331
+ /** Returns the stored spec for a created tool (for peer broadcast). */
1332
+ getSpec(name: string): GeneratedToolSpec | undefined;
1333
+ private log;
986
1334
  /**
987
1335
  * Generate a new tool from a description and register it with the ToolRegistry.
988
- * Returns the tool name if successful, null if generation failed.
1336
+ * Returns the tool name on success, or null on failure (with a logged reason —
1337
+ * failures are no longer swallowed silently). Reuses an existing tool when the
1338
+ * same capability has already been created (dedup) so peers/runs don't
1339
+ * regenerate identical tools.
989
1340
  */
990
1341
  createTool(description: string, context: string): Promise<string | null>;
991
1342
  /**
992
- * Returns the names of all tools created in this session.
1343
+ * Register a spec (from createTool, disk, or a peer) into the registry.
1344
+ * Idempotent — a name already present is skipped. `trusted` is set by the
1345
+ * caller and never inherited from disk: createTool passes true; persisted and
1346
+ * peer-broadcast specs pass false, so their dangerous actions always re-escalate.
1347
+ * The DynamicTool resolves the escalator lazily (`() => this.escalator`) so a
1348
+ * later setPermissionEscalator covers tools registered before the run wired it.
993
1349
  */
1350
+ registerSpec(spec: GeneratedToolSpec, trusted?: boolean): void;
1351
+ /** Load tools persisted by previous runs and register them — as UNTRUSTED, and
1352
+ * only after re-validating each spec (its source could have been tampered with
1353
+ * or authored during a prior prompt-injected run). Untrusted tools re-escalate
1354
+ * any dangerous action, so a silently-reloaded tool can't act without approval. */
1355
+ loadPersistedTools(): Promise<void>;
1356
+ private persist;
1357
+ /** Returns the names of all tools created in this session. */
994
1358
  getCreatedTools(): string[];
995
1359
  }
996
1360
 
1361
+ interface TaskPlan {
1362
+ complexity: TaskComplexity;
1363
+ sections: T1ToT2Assignment[];
1364
+ reasoning: string;
1365
+ }
1366
+ /** Decision returned by a plan-approval gate (the "boardroom"). */
1367
+ interface PlanApprovalDecision {
1368
+ approved: boolean;
1369
+ /** Optional steering note — triggers a re-plan pass, then re-asks (up to maxRevisionRounds). */
1370
+ note?: string;
1371
+ /** Optional user-edited plan — applied directly (no re-decompose) before proceeding. */
1372
+ editedPlan?: TaskPlan;
1373
+ }
1374
+ /** Extra context surfaced to the approval gate alongside the plan. */
1375
+ interface PlanApprovalMeta {
1376
+ /** Automated reviewer's critique of the plan (when planReview.autoReviewer is on). */
1377
+ critique?: string;
1378
+ }
997
1379
  declare class T1Administrator extends BaseTier {
998
1380
  private router;
999
1381
  private toolRegistry;
@@ -1006,6 +1388,9 @@ declare class T1Administrator extends BaseTier {
1006
1388
  private toolCreator?;
1007
1389
  /** Stored overall task goal — used when evaluating escalated permissions */
1008
1390
  private taskGoal;
1391
+ private peerMessageCallback?;
1392
+ private peerMessageSessionId;
1393
+ private planApprovalCallback?;
1009
1394
  constructor(router: CascadeRouter, toolRegistry: ToolRegistry, config: CascadeConfig);
1010
1395
  setStore(store: MemoryStore): void;
1011
1396
  /**
@@ -1014,6 +1399,14 @@ declare class T1Administrator extends BaseTier {
1014
1399
  */
1015
1400
  setPermissionEscalator(escalator: PermissionEscalator): void;
1016
1401
  setToolCreator(creator: ToolCreator): void;
1402
+ setPeerMessageCallback(cb: (event: PeerMessageEvent) => void, sessionId: string): void;
1403
+ /**
1404
+ * Install a "boardroom" gate: called with T1's plan BEFORE any T2 manager
1405
+ * spawns. When unset, plans proceed immediately (headless/SDK unchanged).
1406
+ */
1407
+ setPlanApprovalCallback(cb: (plan: TaskPlan, meta?: PlanApprovalMeta) => Promise<PlanApprovalDecision>): void;
1408
+ /** Decompose a prompt into a plan WITHOUT executing it (powers /plan preview). */
1409
+ previewPlan(prompt: string): Promise<TaskPlan>;
1017
1410
  execute(userPrompt: string, images?: ImageAttachment[], systemContext?: string, signal?: AbortSignal): Promise<{
1018
1411
  output: string;
1019
1412
  t2Results: T2Result[];
@@ -1023,6 +1416,12 @@ declare class T1Administrator extends BaseTier {
1023
1416
  getEscalations(): EscalationPayload[];
1024
1417
  private reviewT2Outputs;
1025
1418
  private analyzeImages;
1419
+ /**
1420
+ * Automated reviewer pass: a single T1 critique of the plan before the user
1421
+ * sees it (planReview.autoReviewer). Best-effort — returns null on any error
1422
+ * so it never blocks the approval gate.
1423
+ */
1424
+ private reviewPlan;
1026
1425
  private decomposeTask;
1027
1426
  private validatePlan;
1028
1427
  private dispatchT2Managers;
@@ -1039,6 +1438,122 @@ declare class T1Administrator extends BaseTier {
1039
1438
  private evaluatePermissionAtT1;
1040
1439
  }
1041
1440
 
1441
+ /** One entry in the per-run orchestration decision trail (see /why). */
1442
+ interface DecisionLogEntry {
1443
+ at: string;
1444
+ kind: 'complexity' | 'model' | 'failover' | 'escalation';
1445
+ detail: string;
1446
+ }
1447
+ declare class Cascade extends EventEmitter {
1448
+ private router;
1449
+ private toolRegistry;
1450
+ private mcpClient;
1451
+ private config;
1452
+ /** Orchestration decisions for the CURRENT run — cleared on each run(). */
1453
+ private decisionLog;
1454
+ private initialized;
1455
+ /** Last task that stopped at the budget cap — powers /continue (resumeRun). */
1456
+ private lastInterruptedRun?;
1457
+ private initPromise?;
1458
+ private store?;
1459
+ private audit?;
1460
+ private telemetry;
1461
+ private taskAnalyzer?;
1462
+ private perfTracker?;
1463
+ private toolCreator?;
1464
+ private workspacePath;
1465
+ constructor(config: CascadeConfig, workspacePath: string, store?: MemoryStore);
1466
+ private initOptionalFeatures;
1467
+ setStore(store: MemoryStore): void;
1468
+ /**
1469
+ * Emit an `mcp:approval-required` event and wait up to 30 s for a listener
1470
+ * to resolve it via `cascade.resolveMcpApproval(serverName, approved)`.
1471
+ *
1472
+ * If no listener is attached (e.g. a non-interactive SDK run), the default
1473
+ * is to reject — safer than silently spawning an arbitrary subprocess.
1474
+ */
1475
+ private pendingMcpApprovals;
1476
+ private requestMcpApproval;
1477
+ private recordDecision;
1478
+ /**
1479
+ * The orchestration decision trail for the most recent run: complexity
1480
+ * verdict (and why), which model served each tier, failovers, and
1481
+ * escalations. Powers the /why command.
1482
+ */
1483
+ getDecisionLog(): DecisionLogEntry[];
1484
+ /** Resolve a pending MCP server approval from a REPL / dashboard listener. */
1485
+ resolveMcpApproval(serverName: string, approved: boolean): void;
1486
+ private pendingPlanApproval?;
1487
+ private requestPlanApproval;
1488
+ /**
1489
+ * Resolve a pending boardroom plan approval from a REPL / dashboard listener.
1490
+ * An optional `note` re-plans and re-asks; an optional `editedPlan` is applied
1491
+ * directly (no re-decompose).
1492
+ */
1493
+ resolvePlanApproval(approved: boolean, note?: string, editedPlan?: TaskPlan): void;
1494
+ /**
1495
+ * Autonomy control (used by the /auto command). 'auto' makes the next run
1496
+ * hands-off: the plan gate auto-approves and non-dangerous tools auto-approve,
1497
+ * while dangerous tools still escalate and budget caps remain the hard stop.
1498
+ */
1499
+ setAutonomy(mode: 'manual' | 'auto'): void;
1500
+ getAutonomy(): 'manual' | 'auto';
1501
+ /**
1502
+ * Preview T1's decomposition for a prompt WITHOUT executing it (powers /plan).
1503
+ * Idempotent init guard, so it works before the first run.
1504
+ */
1505
+ previewPlan(prompt: string): Promise<TaskPlan>;
1506
+ /** True when a task stopped at the budget cap and can be resumed via /continue. */
1507
+ hasResumableRun(): boolean;
1508
+ /**
1509
+ * Raise the per-run token budget for a resume and return the continuation
1510
+ * prompt (or null when nothing is resumable). Consumes the interrupted-run
1511
+ * state. The REPL submits the returned prompt through its normal flow so the
1512
+ * resumed run renders like any other; `resumeRun` wraps this for SDK callers.
1513
+ */
1514
+ prepareResume(opts?: {
1515
+ maxTokens?: number;
1516
+ }): string | null;
1517
+ /**
1518
+ * Resume the last budget-capped task with a raised budget (SDK/headless).
1519
+ * Returns null when there is nothing to resume.
1520
+ */
1521
+ resumeRun(opts?: {
1522
+ maxTokens?: number;
1523
+ }): Promise<CascadeRunResult | null>;
1524
+ /**
1525
+ * Rough pre-execution cost estimate for a plan: ~3 T2 calls per section
1526
+ * plus ~4 T3 calls per subtask at typical token volumes. A ballpark for
1527
+ * the approval dialog, not an invoice — always label it "est."
1528
+ */
1529
+ private estimatePlanCost;
1530
+ init(): Promise<void>;
1531
+ private isCasualGreeting;
1532
+ private looksLikeSimpleArtifactTask;
1533
+ private looksLikeConversational;
1534
+ /**
1535
+ * Read-only inquiries about existing content ("read / review / explain /
1536
+ * summarize / analyze this file or codebase and tell me …") are single-agent
1537
+ * work — one worker with file/grep tools answers directly, no T1→T2→T3 fan-out.
1538
+ * They must NOT ask to create, build, implement, refactor, or save an artifact;
1539
+ * those stay on the heavier classifier path. This keeps trivial "what does this
1540
+ * do?" requests from being mis-routed into a multi-agent, multi-thousand-token run.
1541
+ */
1542
+ private looksLikeReadOnlyInquiry;
1543
+ private static globCache;
1544
+ private countWorkspaceFiles;
1545
+ private determineComplexity;
1546
+ run(options: CascadeRunOptions): Promise<CascadeRunResult>;
1547
+ getRouter(): CascadeRouter;
1548
+ getToolRegistry(): ToolRegistry;
1549
+ /**
1550
+ * Tear down MCP connections and flush any pending telemetry so long-lived
1551
+ * hosts (REPL, SDK embedders) don't leak child processes. Safe to call
1552
+ * multiple times.
1553
+ */
1554
+ close(): Promise<void>;
1555
+ }
1556
+
1042
1557
  interface PeerOutput {
1043
1558
  subtaskId: string;
1044
1559
  fromId: string;
@@ -1063,13 +1578,31 @@ declare class PeerBus extends EventEmitter {
1063
1578
  private barriers;
1064
1579
  private broadcastLog;
1065
1580
  private fileLocks;
1581
+ /** subtaskIds whose T3 is being retried by T2 — dependents should re-wait rather than fail fast */
1582
+ private retryPending;
1583
+ /** Called when any peer message or broadcast is sent — used for dashboard visibility. */
1584
+ onPeerMessage?: (event: PeerMessageEvent) => void;
1585
+ sessionId: string;
1586
+ /** Surface coordination traffic (locks, barriers) to the visibility hook. */
1587
+ private emitCoordination;
1066
1588
  register(peerId: string): void;
1067
1589
  /**
1068
1590
  * Publish output — unblocks any peers waiting on this subtaskId
1069
1591
  */
1070
1592
  publish(fromId: string, subtaskId: string, output: string, status: PeerOutput['status']): void;
1071
1593
  /**
1072
- * Wait for a specific subtask's output resolves immediately if already available
1594
+ * Mark a subtask as retry-pending so dependents re-wait instead of failing fast
1595
+ * when they see an ESCALATED status.
1596
+ */
1597
+ markRetryPending(subtaskId: string): void;
1598
+ /** Called by T2 after retry resolves (success or final failure). */
1599
+ clearRetryPending(subtaskId: string): void;
1600
+ /** Remove a single output entry so a respawned worker can republish without clearing prior-wave outputs. */
1601
+ clearOutput(subtaskId: string): void;
1602
+ isRetryPending(subtaskId: string): boolean;
1603
+ /**
1604
+ * Wait for a specific subtask's output — resolves immediately if already available.
1605
+ * If the output is ESCALATED but a retry is pending, waits for the retry result.
1073
1606
  */
1074
1607
  waitFor(subtaskId: string, timeoutMs?: number): Promise<PeerOutput>;
1075
1608
  /**
@@ -1104,6 +1637,11 @@ declare class PeerBus extends EventEmitter {
1104
1637
  * Check if a file is currently locked (non-blocking).
1105
1638
  */
1106
1639
  isFileLocked(filePath: string): boolean;
1640
+ /**
1641
+ * Reset all runtime output/waiter state for a fresh T3 respawn wave.
1642
+ * Preserves member registrations and barrier definitions.
1643
+ */
1644
+ reset(): void;
1107
1645
  /**
1108
1646
  * Clear broadcast log — call between phases to avoid stale announcements.
1109
1647
  */
@@ -1125,6 +1663,7 @@ declare class T2Manager extends BaseTier {
1125
1663
  private router;
1126
1664
  private toolRegistry;
1127
1665
  private assignment?;
1666
+ private sectionModel?;
1128
1667
  private t3Workers;
1129
1668
  private escalations;
1130
1669
  private peerSyncBuffer;
@@ -1133,7 +1672,12 @@ declare class T2Manager extends BaseTier {
1133
1672
  private t2PeerBus?;
1134
1673
  private permissionEscalator?;
1135
1674
  private toolCreator?;
1675
+ /** Optional boardroom gate (Moderate / root-T2 runs) — pauses after decomposition. */
1676
+ private planApprovalCallback?;
1677
+ /** AbortController for the current T3 wave — aborted on cancel-and-respawn */
1678
+ private waveAbortController;
1136
1679
  setPeerBus(bus: PeerBus): void;
1680
+ setPeerMessageCallback(cb: (event: PeerMessageEvent) => void, sessionId: string): void;
1137
1681
  constructor(router: CascadeRouter, toolRegistry: ToolRegistry, parentId: string);
1138
1682
  setStore(store: MemoryStore): void;
1139
1683
  /**
@@ -1142,6 +1686,16 @@ declare class T2Manager extends BaseTier {
1142
1686
  */
1143
1687
  setPermissionEscalator(escalator: PermissionEscalator): void;
1144
1688
  setToolCreator(creator: ToolCreator): void;
1689
+ /** Boardroom gate for Moderate (root-T2) runs: pause after decomposition. */
1690
+ setPlanApprovalCallback(cb: (subtasks: ReadonlyArray<{
1691
+ subtaskId: string;
1692
+ subtaskTitle: string;
1693
+ description: string;
1694
+ }>, sectionTitle: string) => Promise<{
1695
+ approved: boolean;
1696
+ note?: string;
1697
+ keepSubtaskIds?: string[];
1698
+ }>): void;
1145
1699
  /**
1146
1700
  * Phase 1 of T2 peer discussion: broadcast this section's plan so sibling T2s
1147
1701
  * and T1 can detect overlaps and coordinate execution order.
@@ -1157,6 +1711,7 @@ declare class T2Manager extends BaseTier {
1157
1711
  receivePeerSync(fromId: string, content: unknown): void;
1158
1712
  execute(assignment: T1ToT2Assignment, taskId: string, signal?: AbortSignal): Promise<T2Result>;
1159
1713
  private decomposeSection;
1714
+ private buildWorkerMap;
1160
1715
  private executeSubtasks;
1161
1716
  /**
1162
1717
  * Runs T3 workers respecting dependsOn declarations.
@@ -1199,6 +1754,10 @@ declare class T3Worker extends BaseTier {
1199
1754
  private store?;
1200
1755
  private audit?;
1201
1756
  private tools;
1757
+ /** 0 = top-level worker (may request reinforcements); 1 = a spawned reinforcement (may not). */
1758
+ private reinforcementDepth;
1759
+ /** Sibling-worker requests this worker made via request_workers (T3→T2). */
1760
+ private pendingReinforcements;
1202
1761
  /** @deprecated — kept only as fallback when no escalator is attached */
1203
1762
  private sessionApprovals;
1204
1763
  private peerBus?;
@@ -1206,6 +1765,8 @@ declare class T3Worker extends BaseTier {
1206
1765
  private toolCreator?;
1207
1766
  setPeerBus(bus: PeerBus): void;
1208
1767
  setPermissionEscalator(escalator: PermissionEscalator): void;
1768
+ /** Marks this worker as a spawned reinforcement (depth 1 — cannot request more). */
1769
+ markAsReinforcement(): void;
1209
1770
  setToolCreator(creator: ToolCreator): void;
1210
1771
  constructor(router: CascadeRouter, toolRegistry: ToolRegistry, parentId: string);
1211
1772
  setStore(store: MemoryStore, sessionId: string): void;
@@ -1215,7 +1776,26 @@ declare class T3Worker extends BaseTier {
1215
1776
  syncWithPeers(barrierName: string): Promise<void>;
1216
1777
  receivePeerSync(fromId: string, content: unknown): void;
1217
1778
  private runAgentLoop;
1779
+ /**
1780
+ * Lightweight argument check against the tool's JSON Schema: required fields
1781
+ * present and enum values in range. Not a full validator — just the two
1782
+ * failure modes weak models hit most. Returns an error message, or null if OK.
1783
+ */
1784
+ private validateToolInput;
1218
1785
  private executeTool;
1786
+ /**
1787
+ * Adaptive fallback cascade — invoked when executeTool() fails.
1788
+ * Strategy order:
1789
+ * 1. Find a semantically similar registered tool and retry with same input
1790
+ * 2. Synthesize a new tool via ToolCreator (if available) and run it
1791
+ * 3. Return the original error so the agent loop can decide what to do next
1792
+ */
1793
+ private adaptiveFallback;
1794
+ /**
1795
+ * Find a registered tool whose name/description semantically overlaps with
1796
+ * the failing tool. Returns the best candidate name, or null if none found.
1797
+ */
1798
+ private findAlternativeTool;
1219
1799
  /**
1220
1800
  * Announce which files this T3 plans to edit, then acquire locks on them
1221
1801
  * before competing siblings can claim them. T3s working on different files
@@ -1225,10 +1805,22 @@ declare class T3Worker extends BaseTier {
1225
1805
  private requiresArtifact;
1226
1806
  private extractArtifactPaths;
1227
1807
  private verifyArtifacts;
1808
+ /**
1809
+ * Reflection / self-critique: critique the output against the broader GOAL
1810
+ * (not just the subtask spec the self-test checks) and revise once if it falls
1811
+ * short. Two cheap calls per round — a JSON verdict, then a rewrite only if
1812
+ * needed. Best-effort: any parse/error just keeps the current output.
1813
+ */
1814
+ private reflectAndImprove;
1228
1815
  private selfTest;
1229
1816
  private correctOutput;
1230
1817
  private buildSystemPrompt;
1231
1818
  private buildInitialPrompt;
1819
+ /**
1820
+ * Records a request_workers call (T3→T2 reinforcement). Capped at
1821
+ * maxPerSection; reinforcement workers (depth 1) cannot request more.
1822
+ */
1823
+ private recordReinforcements;
1232
1824
  private buildResult;
1233
1825
  private isFileOperation;
1234
1826
  }
@@ -1392,6 +1984,7 @@ declare class DashboardSocket {
1392
1984
  emitCascadeEvent(ev: CascadeEvent): void;
1393
1985
  emitTierStatus(tierId: string, role: string, status: string, sessionId: string, action?: string): void;
1394
1986
  emitStreamToken(tierId: string, text: string, sessionId: string): void;
1987
+ emitPeerMessage(event: PeerMessageEvent): void;
1395
1988
  emitApprovalRequest(request: PermissionRequest): void;
1396
1989
  onApprovalResponse(callback: (data: PermissionDecisionPayload) => void): void;
1397
1990
  private setupHandlers;
@@ -1408,6 +2001,7 @@ declare class DashboardServer {
1408
2001
  private globalStore;
1409
2002
  private broadcastTimer;
1410
2003
  private port;
2004
+ private host;
1411
2005
  private workspacePath;
1412
2006
  constructor(config: CascadeConfig, store: MemoryStore, workspacePath?: string);
1413
2007
  start(): Promise<void>;
@@ -1585,7 +2179,7 @@ declare class Telemetry {
1585
2179
  shutdown(): Promise<void>;
1586
2180
  }
1587
2181
 
1588
- declare const CASCADE_VERSION = "0.4.0";
2182
+ declare const CASCADE_VERSION = "0.9.6";
1589
2183
  declare const CASCADE_CONFIG_DIR = ".cascade";
1590
2184
  declare const CASCADE_MD_FILE = "CASCADE.md";
1591
2185
  declare const CASCADE_IGNORE_FILE = ".cascadeignore";
@@ -1630,6 +2224,7 @@ declare const TOOL_NAMES: {
1630
2224
  readonly RUN_CODE: "run_code";
1631
2225
  readonly PEER_MESSAGE: "peer_message";
1632
2226
  readonly WEB_SEARCH: "web_search";
2227
+ readonly REQUEST_WORKERS: "request_workers";
1633
2228
  };
1634
2229
  declare const DEFAULT_APPROVAL_REQUIRED: string[];
1635
2230
  declare const PROVIDER_DISPLAY_NAMES: Record<ProviderType, string>;
@@ -1653,4 +2248,4 @@ declare class CascadeToolError extends Error {
1653
2248
  constructor(userMessage: string, cause?: unknown, retryable?: boolean);
1654
2249
  }
1655
2250
 
1656
- export { AZURE_BASE_URL_TEMPLATE, type ApprovalRequest, type ApprovalResponse, type AuditEntry, AuditLogger, type BudgetConfig, CASCADE_AUDIT_FILE, CASCADE_CONFIG_DIR, CASCADE_CONFIG_FILE, CASCADE_DASHBOARD_SECRET_FILE, CASCADE_DB_FILE, CASCADE_IGNORE_FILE, CASCADE_KEYSTORE_FILE, CASCADE_MD_FILE, CASCADE_VERSION, COMPLEXITY_T2_COUNT, Cascade, CascadeCancelledError, type CascadeConfig, type CascadeEvent, type CascadeEventType, CascadeIgnore, type CascadeMessage, CascadeRouter, type CascadeRunOptions, type CascadeRunResult, CascadeToolError, ConfigManager, type ConversationMessage, DEFAULT_API_PORT, DEFAULT_APPROVAL_REQUIRED, DEFAULT_AUTO_SUMMARIZE_AT, DEFAULT_CONTEXT_LIMIT, DEFAULT_DASHBOARD_PORT, DEFAULT_MAX_SESSION_MESSAGES, DEFAULT_RETENTION_DAYS, DEFAULT_THEME, type DashboardConfig, DashboardServer, type EscalationPayload, GLOBAL_CONFIG_DIR, GLOBAL_DB_FILE, GLOBAL_KEYSTORE_FILE, GLOBAL_RUNTIME_DB_FILE, type GenerateOptions, type GenerateResult, type HookDefinition, type HooksConfig, HooksRunner, type Identity, type ImageAttachment, Keystore, LM_STUDIO_BASE_URL, MODELS, McpClient, type McpServerConfig$1 as McpServerConfig, type MemoryConfig, MemoryStore, type Message, type MessageContent, type MessagePayload, type MessageStatus, type MessageType, type ModelInfo, type ModelOverrides, OLLAMA_BASE_URL, PROVIDER_DISPLAY_NAMES, type PeerMessage, type PeerSyncPayload, type PeerSyncType, type PermissionDecision, type PermissionDecisionPayload, type PermissionRequest, type ProviderConfig, type ProviderType, type RuntimeNode, type RuntimeNodeLog, type RuntimeRefreshPayload, type RuntimeScope, type RuntimeSession, type RuntimeSnapshotPayload, type ScheduledTask, type Session, type SessionCheckpoint, type SessionMetadata, type SessionSubscriptionPayload, type StatusUpdate, type StoredMessage, type StreamChunk, T1Administrator, type T1ToT2Assignment, T1_MODEL_PRIORITY, T2Manager, type T2Result, type T2ToT3Assignment, T2_MODEL_PRIORITY, type T3Result, type T3ResultPayload, type T3SubtaskSpec, T3Worker, T3_MODEL_PRIORITY, THEME_NAMES, TOOL_NAMES, type TaskComplexity, TaskScheduler, Telemetry, type TelemetryConfig, type Theme, type ThemeColors, type ThemeName, type TierConfig, type TierLimits, type TierRole, type TierStatus, type TokenUsage, type ToolCall, type ToolDefinition, type ToolExecuteOptions, ToolRegistry, type ToolResult, type ToolsConfig, VISION_MODEL_PRIORITY, type WebSearchConfig, type WebhookConfig, type WorkspaceConfig, createCascade, runCascade, streamCascade };
2251
+ export { AZURE_BASE_URL_TEMPLATE, type ApprovalRequest, type ApprovalResponse, type AuditEntry, AuditLogger, type BenchmarksConfig, type BudgetConfig, CASCADE_AUDIT_FILE, CASCADE_CONFIG_DIR, CASCADE_CONFIG_FILE, CASCADE_DASHBOARD_SECRET_FILE, CASCADE_DB_FILE, CASCADE_IGNORE_FILE, CASCADE_KEYSTORE_FILE, CASCADE_MD_FILE, CASCADE_VERSION, COMPLEXITY_T2_COUNT, Cascade, CascadeCancelledError, type CascadeConfig, type CascadeEvent, type CascadeEventType, CascadeIgnore, type CascadeMessage, CascadeRouter, type CascadeRunOptions, type CascadeRunResult, CascadeToolError, ConfigManager, type ConversationMessage, DEFAULT_API_PORT, DEFAULT_APPROVAL_REQUIRED, DEFAULT_AUTO_SUMMARIZE_AT, DEFAULT_CONTEXT_LIMIT, DEFAULT_DASHBOARD_PORT, DEFAULT_MAX_SESSION_MESSAGES, DEFAULT_RETENTION_DAYS, DEFAULT_THEME, type DashboardConfig, DashboardServer, type EscalationPayload, GLOBAL_CONFIG_DIR, GLOBAL_DB_FILE, GLOBAL_KEYSTORE_FILE, GLOBAL_RUNTIME_DB_FILE, type GenerateOptions, type GenerateResult, type HookDefinition, type HooksConfig, HooksRunner, type Identity, type ImageAttachment, Keystore, LM_STUDIO_BASE_URL, MODELS, McpClient, type McpServerConfig$1 as McpServerConfig, type MemoryConfig, MemoryStore, type Message, type MessageContent, type MessagePayload, type MessageStatus, type MessageType, type ModelInfo, type ModelOverrides, OLLAMA_BASE_URL, PROVIDER_DISPLAY_NAMES, type PeerMessage, type PeerMessageEvent, type PeerSyncPayload, type PeerSyncType, type PermissionDecision, type PermissionDecisionPayload, type PermissionRequest, type PlanReviewConfig, type ProviderConfig, type ProviderType, type RuntimeNode, type RuntimeNodeLog, type RuntimeRefreshPayload, type RuntimeScope, type RuntimeSession, type RuntimeSnapshotPayload, type ScheduledTask, type Session, type SessionCheckpoint, type SessionMetadata, type SessionSubscriptionPayload, type StatusUpdate, type StoredMessage, type StreamChunk, T1Administrator, type T1ToT2Assignment, T1_MODEL_PRIORITY, T2Manager, type T2Result, type T2ToT3Assignment, T2_MODEL_PRIORITY, type T3Result, type T3ResultPayload, type T3SubtaskSpec, T3Worker, T3_MODEL_PRIORITY, THEME_NAMES, TOOL_NAMES, type TaskComplexity, TaskScheduler, Telemetry, type TelemetryConfig, type Theme, type ThemeColors, type ThemeName, type TierConfig, type TierLimits, type TierRole, type TierStatus, type TokenUsage, type ToolCall, type ToolDefinition, type ToolExecuteOptions, ToolRegistry, type ToolResult, type ToolsConfig, VISION_MODEL_PRIORITY, type WebSearchConfig, type WebhookConfig, type WorkspaceConfig, createCascade, runCascade, streamCascade };