npm - cascade-ai - Versions diffs - 0.5.1 → 0.9.7 - Mend

cascade-ai 0.5.1 → 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/README.md +147 -28
package/bin/cascade.js +14 -1
package/dist/cli.cjs +4157 -1364
package/dist/cli.cjs.map +1 -1
package/dist/cli.js +4095 -1303
package/dist/cli.js.map +1 -1
package/dist/index.cjs +2214 -430
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +562 -70
package/dist/index.d.ts +562 -70
package/dist/index.js +2208 -425
package/dist/index.js.map +1 -1
package/dist/keytar-VMICNFEJ.node +0 -0
package/package.json +11 -12
package/web/dist/assets/index-DFRrUnoJ.js +246 -0
package/web/dist/assets/react-BP1N17hq.js +1 -0
package/web/dist/assets/reactflow-Clz8xC7C.js +33 -0
package/web/dist/index.html +2 -2
package/dist/keytar-F4YAPN53.node +0 -0
package/web/dist/assets/index-BFrwdYDg.js +0 -225
package/web/dist/assets/react-Cpp6qqoq.js +0 -1
package/web/dist/assets/reactflow-B1e2RnXD.js +0 -48

package/dist/index.d.cts CHANGED Viewed

@@ -27,6 +27,14 @@ interface ProviderConfig {
     deploymentName?: string;
     apiVersion?: string;
     model?: string;
+    /**
+     * OAuth bearer token (e.g. a Claude Code subscription token) used instead
+     * of an API key. When set on an Anthropic provider, the request uses
+     * `Authorization: Bearer` + the oauth beta header rather than `x-api-key`.
+     */
+    authToken?: string;
+    /** Where an adopted credential came from, e.g. "Claude Code". Informational. */
+    credentialSource?: string;
 }
 interface StreamChunk {
     text: string;
@@ -47,6 +55,15 @@ interface GenerateOptions {
     tools?: ToolDefinition[];
     images?: ImageAttachment[];
     stream?: boolean;
+    /** Abort signal — when it fires, the provider aborts the in-flight request (instant cancel). */
+    signal?: AbortSignal;
+    /**
+     * Per-call model override. When set, this exact model is used for the call
+     * instead of the tier's default — lets Cascade Auto route each subtask to the
+     * best model for its type without disturbing the shared per-tier model that
+     * concurrent workers rely on. Ignored when a vision model is required.
+     */
+    model?: ModelInfo;
 }
 interface GenerateResult {
     content: string;
@@ -201,6 +218,8 @@ interface T3ResultPayload {
     issues: string[];
     peerSyncsUsed: string[];
     correctionAttempts: number;
+    /** Sibling workers this T3 asked its T2 to spawn (T3→T2 reinforcement request). */
+    reinforcements?: T2ToT3Assignment[];
 }
 interface T3Result extends T3ResultPayload {
 }
@@ -219,7 +238,7 @@ interface PeerSyncPayload {
     content: string | Record<string, unknown>;
     subtaskId?: string;
 }
-type PeerSyncType = 'SHARE_OUTPUT' | 'RESOLVE_CONFLICT' | 'DIVIDE_WORK' | 'CHECK_ASSUMPTION' | 'SIGNAL_READY';
+type PeerSyncType = 'SHARE_OUTPUT' | 'RESOLVE_CONFLICT' | 'DIVIDE_WORK' | 'CHECK_ASSUMPTION' | 'SIGNAL_READY' | 'TOOL_CREATED' | 'COORDINATION';
 interface PeerMessage {
     fromId: string;
     toId: string;
@@ -350,10 +369,53 @@ interface CascadeConfig {
     theme: string;
     workspace: WorkspaceConfig;
     cascadeAuto?: boolean;
+    /** Cascade Auto trade-off bias when picking a model. Default: 'balanced'. */
+    autoBias?: 'balanced' | 'quality' | 'cost';
+    /** Public-benchmark data source settings for Cascade Auto. */
+    benchmarks?: BenchmarksConfig;
     enableToolCreation?: boolean;
+    /** Persist runtime-generated tools and reload them on startup (untrusted). Default: true. */
+    persistDynamicTools?: boolean;
     plugins?: string[];
     localConcurrency?: number;
     localInferenceTimeoutMs?: number;
+    /** Timeout (ms) for a single cloud LLM call (streaming or not). Default: 120000. */
+    cloudInferenceTimeoutMs?: number;
+    /** Timeout (ms) for a tool-approval decision; denies (never auto-approves) on timeout. Default: 600000. */
+    approvalTimeoutMs?: number;
+    /**
+     * Pause for user approval of the plan. 'never' (default), 'complex' (Complex
+     * runs only; 'always' is an alias), or 'all' (Moderate + Complex).
+     */
+    planApproval?: 'never' | 'complex' | 'all' | 'always';
+    /** Plan-review behaviour for the boardroom gate. */
+    planReview?: PlanReviewConfig;
+    /** Autonomy level: 'manual' (default, prompts) or 'auto' (hands-off within guardrails). */
+    autonomy?: 'manual' | 'auto';
+    /** Max corrective re-plan passes before T1 returns the best partial. Default: 2. */
+    maxReplanPasses?: number;
+    /** Reflection / self-critique: goal-alignment critique + revise after self-test. Off by default. */
+    reflection?: {
+        enabled?: boolean;
+        maxRounds?: number;
+    };
+    /** T3 wave execution: 'auto' (sequential for local, parallel for cloud), or force one. Default: 'auto'. */
+    t3Execution?: 'auto' | 'parallel' | 'sequential';
+    /** T3→T2 reinforcement: let a worker ask its manager to spawn sibling workers. Off by default. */
+    reinforcements?: {
+        enabled?: boolean;
+        maxPerSection?: number;
+    };
+    /** Render the TUI in the alternate screen buffer (vim-style). Default: false. */
+    altScreen?: boolean;
+}
+interface PlanReviewConfig {
+    /** A reviewer model critiques the plan (gaps/risks/cost) before you see it. Default: false. */
+    autoReviewer?: boolean;
+    /** Allow editing the plan (drop sections) in the approval dialog. Default: true. */
+    editable?: boolean;
+    /** Steering-note → re-plan → re-ask rounds allowed before proceeding. Default: 5. */
+    maxRevisionRounds?: number;
 }
 interface ModelOverrides {
     t1?: string;
@@ -361,6 +423,16 @@ interface ModelOverrides {
     t3?: string;
     vision?: string;
 }
+interface BenchmarksConfig {
+    /** Fetch current quality scores from a public source. Default: true. */
+    live?: boolean;
+    /** How long a fetched snapshot stays fresh before re-fetching (hours). Default: 24. */
+    refreshHours?: number;
+    /** Override the quality-benchmark source URL. When unset, the bundled GitHub-raw snapshot is used. */
+    sourceUrl?: string;
+    /** Fetch current per-token prices from OpenRouter (free, no key). Default: true. */
+    pricingLive?: boolean;
+}
 interface ToolsConfig {
     shellAllowlist: string[];
     shellBlocklist: string[];
@@ -383,12 +455,14 @@ interface HooksConfig {
     postTask?: HookDefinition[];
 }
 interface HookDefinition {
+    name?: string;
     command: string;
     tools?: string[];
     timeout?: number;
 }
 interface DashboardConfig {
     port: number;
+    host: string;
     auth: boolean;
     teamMode: 'single' | 'multi';
     secret?: string;
@@ -411,6 +485,10 @@ interface TierLimits {
 interface BudgetConfig {
     dailyBudgetUsd?: number;
     sessionBudgetUsd?: number;
+    /** Hard per-task token ceiling. Resets each run. Default 200k. */
+    maxTokensPerRun?: number;
+    /** Optional hard per-task cost ceiling (USD). */
+    maxCostPerRunUsd?: number;
     warnAtPct: number;
 }
 interface WorkspaceConfig {
@@ -440,24 +518,7 @@ interface ThemeColors {
     t2Color: string;
     t3Color: string;
 }
-interface ToolCallBlock {
-    id: string;
-    toolName: string;
-    input: Record<string, unknown>;
-    output?: string;
-    error?: string;
-    status: 'pending' | 'running' | 'done' | 'error';
-    tierId: string;
-    durationMs?: number;
-}
-interface ReplMessage {
-    id: string;
-    role: 'user' | 'assistant' | 'system' | 'error';
-    content: string;
-    timestamp: string;
-    toolBlocks?: ToolCallBlock[];
-}
-type CascadeEventType = 'task:start' | 'task:complete' | 'task:error' | 'tier:status' | 'tier:result' | 'stream:token' | 'stream:done' | 'tool:approval-request' | 'tool:approval-response' | 'tool:execute' | 'tool:result' | 'cost:update' | 'session:save' | 'escalation' | 'peer:sync';
+type CascadeEventType = 'task:start' | 'task:complete' | 'task:error' | 'tier:status' | 'tier:result' | 'tier:root' | 'stream:token' | 'stream:done' | 'tool:approval-request' | 'tool:approval-response' | 'tool:execute' | 'tool:result' | 'tool:call' | 'cost:update' | 'session:save' | 'escalation' | 'peer:sync' | 'peer:message' | 'plan' | 'log' | 'run:cancelled' | 'budget:warning' | 'budget:exceeded' | 'permission:user-required' | 'mcp:approval-required' | 'plan:approval-required';
 interface CascadeEvent<T = unknown> {
     type: CascadeEventType;
     taskId?: string;
@@ -501,6 +562,13 @@ interface PermissionRequest {
     sectionContext: string;
     /** What T1's overall task goal is (injected when escalated to T1) */
     taskContext?: string;
+    /**
+     * When true, bypass the session approval cache so this request always reaches
+     * a fresh decision. Set for UNTRUSTED runtime tools (loaded from disk or
+     * received from a peer) so a prior `always` approval cannot silently
+     * auto-approve a later dangerous action.
+     */
+    forceReprompt?: boolean;
 }
 /**
  * A decision made at any tier (T2, T1, or USER) about a PermissionRequest.
@@ -592,6 +660,14 @@ declare class ModelSelector {
     private availableModels;
     constructor(availableProviders: Set<ProviderType>);
     addDynamicModel(model: ModelInfo): void;
+    /**
+     * Permanently drop a model from the available set for this session. Used by
+     * the router's 404 / "model not found" self-heal so a dead id is never
+     * selected again after it fails once.
+     */
+    removeModel(id: string): void;
+    /** Look up an available model by exact id (post-discovery/pricing lookups). */
+    getModelById(id: string): ModelInfo | undefined;
     getAvailableModelsForProvider(provider: ProviderType): ModelInfo[];
     selectForTier(tier: TierRole, overrideModelId?: string, requireVision?: boolean): ModelInfo | null;
     selectVisionModel(): ModelInfo | null;
@@ -615,6 +691,76 @@ declare class ModelSelector {
     private resolveDynamicModel;
 }
+declare class ModelPerformanceTracker {
+    private stats;
+    private readonly statsFile;
+    private loaded;
+    constructor(statsFile?: string);
+    load(): Promise<void>;
+    save(): Promise<void>;
+    record(modelId: string, taskType: TaskType, outcome: 'success' | 'failure', retries?: number, costUsd?: number): void;
+    /**
+     * Returns 0.05–1.0; defaults to 0.5 (neutral prior) when no history exists.
+     * High retry counts penalise the score.
+     */
+    performanceScore(modelId: string, taskType: TaskType): number;
+    /**
+     * Returns 0.1–1.0. Cheaper models score higher, with the penalty scaled
+     * down for complex tasks (where capability matters more than cost).
+     *
+     * blended cost = input + 2 × output (output tokens are typically pricier).
+     * normalised over $0.05 blended as the "expensive" ceiling.
+     */
+    costEfficiencyScore(model: ModelInfo, complexity: 1 | 2 | 3 | 4 | 5): number;
+}
+type TaskType = 'code' | 'analysis' | 'creative' | 'data' | 'mixed';
+/** Cascade Auto cost/quality trade-off bias. See CascadeConfig.autoBias. */
+type AutoBias = 'balanced' | 'quality' | 'cost';
+interface TaskProfile {
+    type: TaskType;
+    /** 1 = trivial, 5 = research-grade */
+    complexity: 1 | 2 | 3 | 4 | 5;
+    requiresReasoning: boolean;
+    requiresVision: boolean;
+    estimatedTokens: number;
+    /** 0.0–1.0 heuristic confidence; below 0.7 triggers AI fallback */
+    confidence: number;
+}
+declare class TaskAnalyzer {
+    private tracker?;
+    private bias;
+    private lastProfile;
+    private lastSelectedModels;
+    constructor(tracker?: ModelPerformanceTracker, bias?: AutoBias);
+    setTracker(tracker: ModelPerformanceTracker): void;
+    /** Change the cost/quality bias at runtime (e.g. when config reloads). */
+    setBias(bias: AutoBias): void;
+    /** Returns the TaskProfile from the most recent analyze() call — used for outcome recording. */
+    getLastProfile(): TaskProfile | null;
+    /**
+     * Analyze a prompt and return a TaskProfile using pure heuristics.
+     * Low confidence prompts fall back to a conservative mixed/moderate profile.
+     */
+    analyze(prompt: string): Promise<TaskProfile>;
+    /**
+     * Select the optimal model for a given tier.
+     * Scores tier-eligible models using cost efficiency + historical performance.
+     * Falls back to the priority-list default when no candidates have history.
+     */
+    selectModel(prompt: string, tier: TierRole, selector: ModelSelector): Promise<ModelInfo | null>;
+    /**
+     * Record the outcome of a completed run across all tiers that were selected
+     * during this session and persist stats to disk.
+     */
+    recordRunOutcome(outcome: 'success' | 'failure', costByTier: Record<string, number>): void;
+    private scoreModel;
+    private costEfficiency;
+    private taskMatchScore;
+    /** Clear the analysis cache (call between sessions). */
+    static clearCache(): void;
+}
 declare class MemoryStore {
     private db;
     constructor(dbPath: string);
@@ -685,6 +831,69 @@ declare class MemoryStore {
     private deserializeScheduledTask;
 }
+interface DelegationSavings {
+    /** USD saved vs. running every call on the T1 model. 0 when nothing was saved. */
+    savedUsd: number;
+    /** Percentage of the counterfactual cost that was saved (0–100, one decimal). */
+    savedPct: number;
+    /** What the session would have cost if every call had used the T1 model. */
+    counterfactualUsd: number;
+}
+type BenchmarkProfile = Partial<Record<Exclude<TaskType, 'mixed'>, number>>;
+type DataSource = 'live' | 'cache' | 'bundled';
+interface PriceEntry {
+    input: number;
+    output: number;
+}
+interface LiveDataOptions {
+    /** Master switch for live quality fetch. Default: true. */
+    live?: boolean;
+    /** Master switch for live OpenRouter pricing. Default: true. */
+    pricingLive?: boolean;
+    /** Hours a fetched snapshot stays fresh before re-fetching. Default: 24. */
+    refreshHours?: number;
+    /** Override the quality snapshot URL. */
+    sourceUrl?: string;
+    /** Override the on-disk cache path (tests). */
+    cacheFile?: string;
+}
+declare class LiveDataProvider {
+    private snapshot;
+    private prices;
+    private source;
+    private fetchedAt;
+    private loaded;
+    private refreshing;
+    private readonly opts;
+    constructor(opts?: LiveDataOptions);
+    /** Load cached data from disk (cheap, no network). Safe to call repeatedly. */
+    load(): Promise<void>;
+    /**
+     * Refresh from the network if the cache is older than the TTL. Coalesces
+     * concurrent callers and never throws — failures keep last-known-good data.
+     */
+    refresh(force?: boolean): Promise<void>;
+    private doRefresh;
+    private fetchSnapshot;
+    private fetchPrices;
+    private saveCache;
+    /** Quality profile for a model family, or null when we have no live/cached data. */
+    getQualityProfile(family: string): BenchmarkProfile | null;
+    /** Current per-1k price for a model id, or null when unknown. */
+    getLivePrice(modelId: string): PriceEntry | null;
+    /**
+     * Returns a price-corrected copy of each model when live pricing is known,
+     * leaving the original untouched (so the shared catalog is never mutated).
+     */
+    applyLivePricing(models: ModelInfo[]): ModelInfo[];
+    /** Where the active quality data came from — for /why and `cascade models`. */
+    getDataSource(): DataSource;
+    getGeneratedAt(): string | null;
+    hasLivePricing(): boolean;
+}
 interface RouterStats {
     totalTokens: number;
     totalCostUsd: number;
@@ -706,6 +915,10 @@ declare class CascadeRouter extends EventEmitter {
     private tierModels;
     private config;
     private sessionCostUsd;
+    private runTokens;
+    private runCostUsd;
+    private runBudgetExceeded;
+    private runBudgetExceededReason;
     /**
      * Budget state machine — guards against two concurrent `generate()` calls
      * each firing the warning or both slipping past the hard cap. All
@@ -716,6 +929,12 @@ declare class CascadeRouter extends EventEmitter {
     private budgetExceededReason;
     private tpmLimiter;
     private localQueue;
+    private taskAnalyzer?;
+    private liveData?;
+    /** Snapshot of configured/default tier models, taken before Cascade Auto overrides them. */
+    private originalTierModels?;
+    /** The current run's abort signal — injected into every provider call so a cancel aborts in-flight requests. */
+    private runSignal?;
     /** Thrown when the configured budget is exceeded. */
     static BudgetExceededError: {
         new (msg: string): {
@@ -736,16 +955,75 @@ declare class CascadeRouter extends EventEmitter {
      * No-op if store is not provided.
      */
     profileModels(store: MemoryStore): Promise<void>;
+    /**
+     * Cascade Auto live data: discover/validate real model ids from each cloud
+     * provider, then fetch current public quality scores + per-token prices and
+     * apply the prices to the available-model set. Best-effort and safe to run in
+     * the background — any failure leaves the bundled catalog/benchmarks in effect.
+     */
+    refreshLiveData(): Promise<void>;
+    /** Returns the live-data provider once refreshLiveData has run (UX/insight). */
+    getLiveData(): LiveDataProvider | undefined;
+    /**
+     * Query each available cloud provider's live model list and register the
+     * results. Confirms catalog ids still exist and surfaces newly released
+     * models without a package upgrade. Mirrors discoverOllamaModels.
+     */
+    private discoverProviderModels;
+    /**
+     * Replace available models with live-priced copies and refresh the already
+     * resolved tier models so shared-tier cost accounting uses current prices.
+     */
+    private applyLivePricing;
     generate(tier: TierRole, options: GenerateOptions, onChunk?: (chunk: StreamChunk) => void, requireVision?: boolean): Promise<GenerateResult>;
     getModelForTier(tier: TierRole): ModelInfo | undefined;
+    /** Reflection settings for workers (config.reflection). Off unless enabled. */
+    getReflectionConfig(): {
+        enabled: boolean;
+        maxRounds: number;
+    };
+    /** T3→T2 reinforcement settings (config.reinforcements). Off unless enabled. */
+    getReinforcementsConfig(): {
+        enabled: boolean;
+        maxPerSection: number;
+    };
+    /**
+     * Resolved T3 wave execution mode. 'auto' becomes 'sequential' when the T3
+     * tier resolves to a LOCAL model (the single-GPU queue serializes anyway, so
+     * running them in parallel just thrashes it), and 'parallel' for cloud.
+     */
+    getT3ExecutionMode(): 'parallel' | 'sequential';
     /**
      * Cascade Auto: temporarily override the model for a tier.
      * Used by TaskAnalyzer to inject task-optimal models before execution.
      * The override is valid for the current task only — restored by restoreTierModels().
      */
     overrideTierModel(tier: TierRole, model: ModelInfo): void;
+    /**
+     * Restore tier models to the configured/default baseline captured before the
+     * first Cascade Auto override. Called at the end of each run so `/why`, the
+     * status bar, and the next run reflect the configured models, not stale picks.
+     */
+    restoreTierModels(): void;
+    /** Set (or clear) the current run's abort signal for instant cancellation. */
+    setRunSignal(signal: AbortSignal | undefined): void;
     getSelector(): ModelSelector;
+    /** Wire the Cascade Auto task analyzer used for per-subtask model routing. */
+    setTaskAnalyzer(analyzer: TaskAnalyzer): void;
+    /**
+     * Cascade Auto per-subtask routing: pick the benchmark-best model for a
+     * specific subtask's text, scoped to the tier's eligible candidates. Returns
+     * null when Cascade Auto is off (callers then use the shared tier model).
+     * Pure heuristic — no extra LLM call.
+     */
+    selectModelForSubtask(tier: TierRole, text: string): Promise<ModelInfo | null>;
     getStats(): RouterStats;
+    /**
+     * What did delegation save? Compares actual spend against the
+     * counterfactual of every call running on the T1 model. This is the
+     * number only a tiered hierarchy can show.
+     */
+    getDelegationSavings(): DelegationSavings;
     /**
      * Returns a human-readable cost summary broken down by tier.
      * Example: { T1: "$0.0120 (2 calls, 1500 tokens)", T2: "$0.0043 (6 calls, 4200 tokens)", ... }
@@ -769,6 +1047,8 @@ declare class CascadeRouter extends EventEmitter {
      * Sets (or clears) a runtime session budget cap (USD).
      * Pass null to remove the cap.
      */
+    /** Raise/set the per-task token cap at runtime (used by /continue resume). */
+    setMaxTokensPerRun(maxTokens: number): void;
     setSessionBudget(usd: number | null): void;
     /**
      * Returns how much of the session budget has been used (USD).
@@ -791,6 +1071,17 @@ declare class CascadeRouter extends EventEmitter {
     private createProvider;
     private getAnyModelForProvider;
     private recordStats;
+    /**
+     * Resets per-run accounting at the start of each `cascade run`. Session
+     * totals and a session-wide budget halt are deliberately preserved; only the
+     * per-task ceiling is cleared so the next task starts with a fresh allowance.
+     */
+    beginRun(): void;
+    /**
+     * Enforce the hard per-task ceiling. Once tripped, the flag makes every
+     * subsequent (and concurrent) generate() call in this run fail fast.
+     */
+    private enforceRunBudget;
     /**
      * Single point of truth for budget state transitions. Called after each
      * recordStats() so warning and hard-stop transitions are evaluated
@@ -838,6 +1129,12 @@ interface McpClientOptions {
     trustedServers?: string[];
     /** Approval gate invoked when a server is NOT in the trusted list. */
     approvalCallback?: McpApprovalCallback;
+    /**
+     * Sink for non-fatal warnings. Hosts with a live TUI must route these
+     * away from the terminal — a raw console write mid-frame corrupts Ink's
+     * rendering. Defaults to console.warn.
+     */
+    onWarn?: (message: string) => void;
 }
 declare class McpClient {
     private static activeProcessPids;
@@ -851,6 +1148,7 @@ declare class McpClient {
     private tools;
     private trustedServers;
     private approvalCallback;
+    private onWarn;
     constructor(options?: McpClientOptions);
     connect(server: McpServerConfig): Promise<void>;
     disconnect(serverName: string): Promise<void>;
@@ -925,51 +1223,6 @@ declare class ToolRegistry extends EventEmitter {
     private isIgnored;
 }
-declare class Cascade extends EventEmitter {
-    private router;
-    private toolRegistry;
-    private mcpClient;
-    private config;
-    private initialized;
-    private initPromise?;
-    private store?;
-    private audit?;
-    private telemetry;
-    private taskAnalyzer?;
-    private perfTracker?;
-    private toolCreator?;
-    constructor(config: CascadeConfig, workspacePath: string, store?: MemoryStore);
-    private initOptionalFeatures;
-    setStore(store: MemoryStore): void;
-    /**
-     * Emit an `mcp:approval-required` event and wait up to 30 s for a listener
-     * to resolve it via `cascade.resolveMcpApproval(serverName, approved)`.
-     *
-     * If no listener is attached (e.g. a non-interactive SDK run), the default
-     * is to reject — safer than silently spawning an arbitrary subprocess.
-     */
-    private pendingMcpApprovals;
-    private requestMcpApproval;
-    /** Resolve a pending MCP server approval from a REPL / dashboard listener. */
-    resolveMcpApproval(serverName: string, approved: boolean): void;
-    init(): Promise<void>;
-    private isCasualGreeting;
-    private looksLikeSimpleArtifactTask;
-    private looksLikeConversational;
-    private static globCache;
-    private countWorkspaceFiles;
-    private determineComplexity;
-    run(options: CascadeRunOptions): Promise<CascadeRunResult>;
-    getRouter(): CascadeRouter;
-    getToolRegistry(): ToolRegistry;
-    /**
-     * Tear down MCP connections and flush any pending telemetry so long-lived
-     * hosts (REPL, SDK embedders) don't leak child processes. Safe to call
-     * multiple times.
-     */
-    close(): Promise<void>;
-}
 declare abstract class BaseTier extends EventEmitter {
     readonly id: string;
     readonly role: TierRole;
@@ -1019,6 +1272,13 @@ declare class PermissionEscalator extends EventEmitter {
     private t1Evaluator?;
     /** Pending user-decision resolvers keyed by request ID */
     private pendingUserDecisions;
+    /** ms to wait for a user approval decision before denying for safety. */
+    private readonly approvalTimeoutMs;
+    /** Autonomous mode (autonomy: 'auto'): non-dangerous tools auto-approve. */
+    private autonomous;
+    constructor(approvalTimeoutMs?: number, autonomous?: boolean);
+    /** Toggle autonomous auto-approval at runtime (e.g. from /auto). */
+    setAutonomous(on: boolean): void;
     setT2Evaluator(evaluator: T2Evaluator): void;
     setT1Evaluator(evaluator: T1Evaluator): void;
     /**
@@ -1038,23 +1298,84 @@ declare class PermissionEscalator extends EventEmitter {
     cancelAllPending(): void;
 }
+interface GeneratedToolSpec {
+    name: string;
+    description: string;
+    inputSchema: Record<string, unknown>;
+    /** Raw JS function body — receives `input`, `fetch`, and `callTool`. Returns string | Promise<string> */
+    executeCode: string;
+    isDangerous: boolean;
+    /**
+     * Whether this tool's source is trusted (generated in THIS session) vs untrusted
+     * (loaded from disk or received from a peer). Untrusted tools always re-escalate
+     * their dangerous actions. Never persisted as trusted — forced false on reload.
+     */
+    trusted?: boolean;
+}
 declare class ToolCreator {
     private router;
     private registry;
     private escalator?;
-    private createdTools;
-    constructor(router: CascadeRouter, registry: ToolRegistry);
+    private workspacePath?;
+    /** When false, persisted tools are neither loaded nor written. */
+    private persistEnabled;
+    private logger?;
+    /** name → spec, for persistence, broadcast, and re-registration. */
+    private specs;
+    /** capability fingerprint → tool name, so the same need isn't re-generated. */
+    private capabilityIndex;
+    constructor(router: CascadeRouter, registry: ToolRegistry, workspacePath?: string, persistEnabled?: boolean);
     setPermissionEscalator(escalator: PermissionEscalator): void;
+    /** Route diagnostics through the host (Cascade) so they survive the Ink TUI. */
+    setLogger(fn: (msg: string) => void): void;
+    /** Returns the stored spec for a created tool (for peer broadcast). */
+    getSpec(name: string): GeneratedToolSpec | undefined;
+    private log;
     /**
      * Generate a new tool from a description and register it with the ToolRegistry.
-     * The generated tool has access to all registered cascade tools via callTool().
-     * Returns the tool name if successful, null if generation failed.
+     * Returns the tool name on success, or null on failure (with a logged reason —
+     * failures are no longer swallowed silently). Reuses an existing tool when the
+     * same capability has already been created (dedup) so peers/runs don't
+     * regenerate identical tools.
      */
     createTool(description: string, context: string): Promise<string | null>;
+    /**
+     * Register a spec (from createTool, disk, or a peer) into the registry.
+     * Idempotent — a name already present is skipped. `trusted` is set by the
+     * caller and never inherited from disk: createTool passes true; persisted and
+     * peer-broadcast specs pass false, so their dangerous actions always re-escalate.
+     * The DynamicTool resolves the escalator lazily (`() => this.escalator`) so a
+     * later setPermissionEscalator covers tools registered before the run wired it.
+     */
+    registerSpec(spec: GeneratedToolSpec, trusted?: boolean): void;
+    /** Load tools persisted by previous runs and register them — as UNTRUSTED, and
+     *  only after re-validating each spec (its source could have been tampered with
+     *  or authored during a prior prompt-injected run). Untrusted tools re-escalate
+     *  any dangerous action, so a silently-reloaded tool can't act without approval. */
+    loadPersistedTools(): Promise<void>;
+    private persist;
     /** Returns the names of all tools created in this session. */
     getCreatedTools(): string[];
 }
+interface TaskPlan {
+    complexity: TaskComplexity;
+    sections: T1ToT2Assignment[];
+    reasoning: string;
+}
+/** Decision returned by a plan-approval gate (the "boardroom"). */
+interface PlanApprovalDecision {
+    approved: boolean;
+    /** Optional steering note — triggers a re-plan pass, then re-asks (up to maxRevisionRounds). */
+    note?: string;
+    /** Optional user-edited plan — applied directly (no re-decompose) before proceeding. */
+    editedPlan?: TaskPlan;
+}
+/** Extra context surfaced to the approval gate alongside the plan. */
+interface PlanApprovalMeta {
+    /** Automated reviewer's critique of the plan (when planReview.autoReviewer is on). */
+    critique?: string;
+}
 declare class T1Administrator extends BaseTier {
     private router;
     private toolRegistry;
@@ -1069,6 +1390,7 @@ declare class T1Administrator extends BaseTier {
     private taskGoal;
     private peerMessageCallback?;
     private peerMessageSessionId;
+    private planApprovalCallback?;
     constructor(router: CascadeRouter, toolRegistry: ToolRegistry, config: CascadeConfig);
     setStore(store: MemoryStore): void;
     /**
@@ -1078,6 +1400,13 @@ declare class T1Administrator extends BaseTier {
     setPermissionEscalator(escalator: PermissionEscalator): void;
     setToolCreator(creator: ToolCreator): void;
     setPeerMessageCallback(cb: (event: PeerMessageEvent) => void, sessionId: string): void;
+    /**
+     * Install a "boardroom" gate: called with T1's plan BEFORE any T2 manager
+     * spawns. When unset, plans proceed immediately (headless/SDK unchanged).
+     */
+    setPlanApprovalCallback(cb: (plan: TaskPlan, meta?: PlanApprovalMeta) => Promise<PlanApprovalDecision>): void;
+    /** Decompose a prompt into a plan WITHOUT executing it (powers /plan preview). */
+    previewPlan(prompt: string): Promise<TaskPlan>;
     execute(userPrompt: string, images?: ImageAttachment[], systemContext?: string, signal?: AbortSignal): Promise<{
         output: string;
         t2Results: T2Result[];
@@ -1087,6 +1416,12 @@ declare class T1Administrator extends BaseTier {
     getEscalations(): EscalationPayload[];
     private reviewT2Outputs;
     private analyzeImages;
+    /**
+     * Automated reviewer pass: a single T1 critique of the plan before the user
+     * sees it (planReview.autoReviewer). Best-effort — returns null on any error
+     * so it never blocks the approval gate.
+     */
+    private reviewPlan;
     private decomposeTask;
     private validatePlan;
     private dispatchT2Managers;
@@ -1103,6 +1438,122 @@ declare class T1Administrator extends BaseTier {
     private evaluatePermissionAtT1;
 }
+/** One entry in the per-run orchestration decision trail (see /why). */
+interface DecisionLogEntry {
+    at: string;
+    kind: 'complexity' | 'model' | 'failover' | 'escalation';
+    detail: string;
+}
+declare class Cascade extends EventEmitter {
+    private router;
+    private toolRegistry;
+    private mcpClient;
+    private config;
+    /** Orchestration decisions for the CURRENT run — cleared on each run(). */
+    private decisionLog;
+    private initialized;
+    /** Last task that stopped at the budget cap — powers /continue (resumeRun). */
+    private lastInterruptedRun?;
+    private initPromise?;
+    private store?;
+    private audit?;
+    private telemetry;
+    private taskAnalyzer?;
+    private perfTracker?;
+    private toolCreator?;
+    private workspacePath;
+    constructor(config: CascadeConfig, workspacePath: string, store?: MemoryStore);
+    private initOptionalFeatures;
+    setStore(store: MemoryStore): void;
+    /**
+     * Emit an `mcp:approval-required` event and wait up to 30 s for a listener
+     * to resolve it via `cascade.resolveMcpApproval(serverName, approved)`.
+     *
+     * If no listener is attached (e.g. a non-interactive SDK run), the default
+     * is to reject — safer than silently spawning an arbitrary subprocess.
+     */
+    private pendingMcpApprovals;
+    private requestMcpApproval;
+    private recordDecision;
+    /**
+     * The orchestration decision trail for the most recent run: complexity
+     * verdict (and why), which model served each tier, failovers, and
+     * escalations. Powers the /why command.
+     */
+    getDecisionLog(): DecisionLogEntry[];
+    /** Resolve a pending MCP server approval from a REPL / dashboard listener. */
+    resolveMcpApproval(serverName: string, approved: boolean): void;
+    private pendingPlanApproval?;
+    private requestPlanApproval;
+    /**
+     * Resolve a pending boardroom plan approval from a REPL / dashboard listener.
+     * An optional `note` re-plans and re-asks; an optional `editedPlan` is applied
+     * directly (no re-decompose).
+     */
+    resolvePlanApproval(approved: boolean, note?: string, editedPlan?: TaskPlan): void;
+    /**
+     * Autonomy control (used by the /auto command). 'auto' makes the next run
+     * hands-off: the plan gate auto-approves and non-dangerous tools auto-approve,
+     * while dangerous tools still escalate and budget caps remain the hard stop.
+     */
+    setAutonomy(mode: 'manual' | 'auto'): void;
+    getAutonomy(): 'manual' | 'auto';
+    /**
+     * Preview T1's decomposition for a prompt WITHOUT executing it (powers /plan).
+     * Idempotent init guard, so it works before the first run.
+     */
+    previewPlan(prompt: string): Promise<TaskPlan>;
+    /** True when a task stopped at the budget cap and can be resumed via /continue. */
+    hasResumableRun(): boolean;
+    /**
+     * Raise the per-run token budget for a resume and return the continuation
+     * prompt (or null when nothing is resumable). Consumes the interrupted-run
+     * state. The REPL submits the returned prompt through its normal flow so the
+     * resumed run renders like any other; `resumeRun` wraps this for SDK callers.
+     */
+    prepareResume(opts?: {
+        maxTokens?: number;
+    }): string | null;
+    /**
+     * Resume the last budget-capped task with a raised budget (SDK/headless).
+     * Returns null when there is nothing to resume.
+     */
+    resumeRun(opts?: {
+        maxTokens?: number;
+    }): Promise<CascadeRunResult | null>;
+    /**
+     * Rough pre-execution cost estimate for a plan: ~3 T2 calls per section
+     * plus ~4 T3 calls per subtask at typical token volumes. A ballpark for
+     * the approval dialog, not an invoice — always label it "est."
+     */
+    private estimatePlanCost;
+    init(): Promise<void>;
+    private isCasualGreeting;
+    private looksLikeSimpleArtifactTask;
+    private looksLikeConversational;
+    /**
+     * Read-only inquiries about existing content ("read / review / explain /
+     * summarize / analyze this file or codebase and tell me …") are single-agent
+     * work — one worker with file/grep tools answers directly, no T1→T2→T3 fan-out.
+     * They must NOT ask to create, build, implement, refactor, or save an artifact;
+     * those stay on the heavier classifier path. This keeps trivial "what does this
+     * do?" requests from being mis-routed into a multi-agent, multi-thousand-token run.
+     */
+    private looksLikeReadOnlyInquiry;
+    private static globCache;
+    private countWorkspaceFiles;
+    private determineComplexity;
+    run(options: CascadeRunOptions): Promise<CascadeRunResult>;
+    getRouter(): CascadeRouter;
+    getToolRegistry(): ToolRegistry;
+    /**
+     * Tear down MCP connections and flush any pending telemetry so long-lived
+     * hosts (REPL, SDK embedders) don't leak child processes. Safe to call
+     * multiple times.
+     */
+    close(): Promise<void>;
+}
 interface PeerOutput {
     subtaskId: string;
     fromId: string;
@@ -1132,6 +1583,8 @@ declare class PeerBus extends EventEmitter {
     /** Called when any peer message or broadcast is sent — used for dashboard visibility. */
     onPeerMessage?: (event: PeerMessageEvent) => void;
     sessionId: string;
+    /** Surface coordination traffic (locks, barriers) to the visibility hook. */
+    private emitCoordination;
     register(peerId: string): void;
     /**
      * Publish output — unblocks any peers waiting on this subtaskId
@@ -1210,6 +1663,7 @@ declare class T2Manager extends BaseTier {
     private router;
     private toolRegistry;
     private assignment?;
+    private sectionModel?;
     private t3Workers;
     private escalations;
     private peerSyncBuffer;
@@ -1218,6 +1672,8 @@ declare class T2Manager extends BaseTier {
     private t2PeerBus?;
     private permissionEscalator?;
     private toolCreator?;
+    /** Optional boardroom gate (Moderate / root-T2 runs) — pauses after decomposition. */
+    private planApprovalCallback?;
     /** AbortController for the current T3 wave — aborted on cancel-and-respawn */
     private waveAbortController;
     setPeerBus(bus: PeerBus): void;
@@ -1230,6 +1686,16 @@ declare class T2Manager extends BaseTier {
      */
     setPermissionEscalator(escalator: PermissionEscalator): void;
     setToolCreator(creator: ToolCreator): void;
+    /** Boardroom gate for Moderate (root-T2) runs: pause after decomposition. */
+    setPlanApprovalCallback(cb: (subtasks: ReadonlyArray<{
+        subtaskId: string;
+        subtaskTitle: string;
+        description: string;
+    }>, sectionTitle: string) => Promise<{
+        approved: boolean;
+        note?: string;
+        keepSubtaskIds?: string[];
+    }>): void;
     /**
      * Phase 1 of T2 peer discussion: broadcast this section's plan so sibling T2s
      * and T1 can detect overlaps and coordinate execution order.
@@ -1288,6 +1754,10 @@ declare class T3Worker extends BaseTier {
     private store?;
     private audit?;
     private tools;
+    /** 0 = top-level worker (may request reinforcements); 1 = a spawned reinforcement (may not). */
+    private reinforcementDepth;
+    /** Sibling-worker requests this worker made via request_workers (T3→T2). */
+    private pendingReinforcements;
     /** @deprecated — kept only as fallback when no escalator is attached */
     private sessionApprovals;
     private peerBus?;
@@ -1295,6 +1765,8 @@ declare class T3Worker extends BaseTier {
     private toolCreator?;
     setPeerBus(bus: PeerBus): void;
     setPermissionEscalator(escalator: PermissionEscalator): void;
+    /** Marks this worker as a spawned reinforcement (depth 1 — cannot request more). */
+    markAsReinforcement(): void;
     setToolCreator(creator: ToolCreator): void;
     constructor(router: CascadeRouter, toolRegistry: ToolRegistry, parentId: string);
     setStore(store: MemoryStore, sessionId: string): void;
@@ -1304,6 +1776,12 @@ declare class T3Worker extends BaseTier {
     syncWithPeers(barrierName: string): Promise<void>;
     receivePeerSync(fromId: string, content: unknown): void;
     private runAgentLoop;
+    /**
+     * Lightweight argument check against the tool's JSON Schema: required fields
+     * present and enum values in range. Not a full validator — just the two
+     * failure modes weak models hit most. Returns an error message, or null if OK.
+     */
+    private validateToolInput;
     private executeTool;
     /**
      * Adaptive fallback cascade — invoked when executeTool() fails.
@@ -1327,10 +1805,22 @@ declare class T3Worker extends BaseTier {
     private requiresArtifact;
     private extractArtifactPaths;
     private verifyArtifacts;
+    /**
+     * Reflection / self-critique: critique the output against the broader GOAL
+     * (not just the subtask spec the self-test checks) and revise once if it falls
+     * short. Two cheap calls per round — a JSON verdict, then a rewrite only if
+     * needed. Best-effort: any parse/error just keeps the current output.
+     */
+    private reflectAndImprove;
     private selfTest;
     private correctOutput;
     private buildSystemPrompt;
     private buildInitialPrompt;
+    /**
+     * Records a request_workers call (T3→T2 reinforcement). Capped at
+     * maxPerSection; reinforcement workers (depth 1) cannot request more.
+     */
+    private recordReinforcements;
     private buildResult;
     private isFileOperation;
 }
@@ -1511,6 +2001,7 @@ declare class DashboardServer {
     private globalStore;
     private broadcastTimer;
     private port;
+    private host;
     private workspacePath;
     constructor(config: CascadeConfig, store: MemoryStore, workspacePath?: string);
     start(): Promise<void>;
@@ -1688,7 +2179,7 @@ declare class Telemetry {
     shutdown(): Promise<void>;
 }
-declare const CASCADE_VERSION = "0.5.1";
+declare const CASCADE_VERSION = "0.9.6";
 declare const CASCADE_CONFIG_DIR = ".cascade";
 declare const CASCADE_MD_FILE = "CASCADE.md";
 declare const CASCADE_IGNORE_FILE = ".cascadeignore";
@@ -1733,6 +2224,7 @@ declare const TOOL_NAMES: {
     readonly RUN_CODE: "run_code";
     readonly PEER_MESSAGE: "peer_message";
     readonly WEB_SEARCH: "web_search";
+    readonly REQUEST_WORKERS: "request_workers";
 };
 declare const DEFAULT_APPROVAL_REQUIRED: string[];
 declare const PROVIDER_DISPLAY_NAMES: Record<ProviderType, string>;
@@ -1756,4 +2248,4 @@ declare class CascadeToolError extends Error {
     constructor(userMessage: string, cause?: unknown, retryable?: boolean);
 }
-export { AZURE_BASE_URL_TEMPLATE, type ApprovalRequest, type ApprovalResponse, type AuditEntry, AuditLogger, type BudgetConfig, CASCADE_AUDIT_FILE, CASCADE_CONFIG_DIR, CASCADE_CONFIG_FILE, CASCADE_DASHBOARD_SECRET_FILE, CASCADE_DB_FILE, CASCADE_IGNORE_FILE, CASCADE_KEYSTORE_FILE, CASCADE_MD_FILE, CASCADE_VERSION, COMPLEXITY_T2_COUNT, Cascade, CascadeCancelledError, type CascadeConfig, type CascadeEvent, type CascadeEventType, CascadeIgnore, type CascadeMessage, CascadeRouter, type CascadeRunOptions, type CascadeRunResult, CascadeToolError, ConfigManager, type ConversationMessage, DEFAULT_API_PORT, DEFAULT_APPROVAL_REQUIRED, DEFAULT_AUTO_SUMMARIZE_AT, DEFAULT_CONTEXT_LIMIT, DEFAULT_DASHBOARD_PORT, DEFAULT_MAX_SESSION_MESSAGES, DEFAULT_RETENTION_DAYS, DEFAULT_THEME, type DashboardConfig, DashboardServer, type EscalationPayload, GLOBAL_CONFIG_DIR, GLOBAL_DB_FILE, GLOBAL_KEYSTORE_FILE, GLOBAL_RUNTIME_DB_FILE, type GenerateOptions, type GenerateResult, type HookDefinition, type HooksConfig, HooksRunner, type Identity, type ImageAttachment, Keystore, LM_STUDIO_BASE_URL, MODELS, McpClient, type McpServerConfig$1 as McpServerConfig, type MemoryConfig, MemoryStore, type Message, type MessageContent, type MessagePayload, type MessageStatus, type MessageType, type ModelInfo, type ModelOverrides, OLLAMA_BASE_URL, PROVIDER_DISPLAY_NAMES, type PeerMessage, type PeerMessageEvent, type PeerSyncPayload, type PeerSyncType, type PermissionDecision, type PermissionDecisionPayload, type PermissionRequest, type ProviderConfig, type ProviderType, type ReplMessage, type RuntimeNode, type RuntimeNodeLog, type RuntimeRefreshPayload, type RuntimeScope, type RuntimeSession, type RuntimeSnapshotPayload, type ScheduledTask, type Session, type SessionCheckpoint, type SessionMetadata, type SessionSubscriptionPayload, type StatusUpdate, type StoredMessage, type StreamChunk, T1Administrator, type T1ToT2Assignment, T1_MODEL_PRIORITY, T2Manager, type T2Result, type T2ToT3Assignment, T2_MODEL_PRIORITY, type T3Result, type T3ResultPayload, type T3SubtaskSpec, T3Worker, T3_MODEL_PRIORITY, THEME_NAMES, TOOL_NAMES, type TaskComplexity, TaskScheduler, Telemetry, type TelemetryConfig, type Theme, type ThemeColors, type ThemeName, type TierConfig, type TierLimits, type TierRole, type TierStatus, type TokenUsage, type ToolCall, type ToolCallBlock, type ToolDefinition, type ToolExecuteOptions, ToolRegistry, type ToolResult, type ToolsConfig, VISION_MODEL_PRIORITY, type WebSearchConfig, type WebhookConfig, type WorkspaceConfig, createCascade, runCascade, streamCascade };
+export { AZURE_BASE_URL_TEMPLATE, type ApprovalRequest, type ApprovalResponse, type AuditEntry, AuditLogger, type BenchmarksConfig, type BudgetConfig, CASCADE_AUDIT_FILE, CASCADE_CONFIG_DIR, CASCADE_CONFIG_FILE, CASCADE_DASHBOARD_SECRET_FILE, CASCADE_DB_FILE, CASCADE_IGNORE_FILE, CASCADE_KEYSTORE_FILE, CASCADE_MD_FILE, CASCADE_VERSION, COMPLEXITY_T2_COUNT, Cascade, CascadeCancelledError, type CascadeConfig, type CascadeEvent, type CascadeEventType, CascadeIgnore, type CascadeMessage, CascadeRouter, type CascadeRunOptions, type CascadeRunResult, CascadeToolError, ConfigManager, type ConversationMessage, DEFAULT_API_PORT, DEFAULT_APPROVAL_REQUIRED, DEFAULT_AUTO_SUMMARIZE_AT, DEFAULT_CONTEXT_LIMIT, DEFAULT_DASHBOARD_PORT, DEFAULT_MAX_SESSION_MESSAGES, DEFAULT_RETENTION_DAYS, DEFAULT_THEME, type DashboardConfig, DashboardServer, type EscalationPayload, GLOBAL_CONFIG_DIR, GLOBAL_DB_FILE, GLOBAL_KEYSTORE_FILE, GLOBAL_RUNTIME_DB_FILE, type GenerateOptions, type GenerateResult, type HookDefinition, type HooksConfig, HooksRunner, type Identity, type ImageAttachment, Keystore, LM_STUDIO_BASE_URL, MODELS, McpClient, type McpServerConfig$1 as McpServerConfig, type MemoryConfig, MemoryStore, type Message, type MessageContent, type MessagePayload, type MessageStatus, type MessageType, type ModelInfo, type ModelOverrides, OLLAMA_BASE_URL, PROVIDER_DISPLAY_NAMES, type PeerMessage, type PeerMessageEvent, type PeerSyncPayload, type PeerSyncType, type PermissionDecision, type PermissionDecisionPayload, type PermissionRequest, type PlanReviewConfig, type ProviderConfig, type ProviderType, type RuntimeNode, type RuntimeNodeLog, type RuntimeRefreshPayload, type RuntimeScope, type RuntimeSession, type RuntimeSnapshotPayload, type ScheduledTask, type Session, type SessionCheckpoint, type SessionMetadata, type SessionSubscriptionPayload, type StatusUpdate, type StoredMessage, type StreamChunk, T1Administrator, type T1ToT2Assignment, T1_MODEL_PRIORITY, T2Manager, type T2Result, type T2ToT3Assignment, T2_MODEL_PRIORITY, type T3Result, type T3ResultPayload, type T3SubtaskSpec, T3Worker, T3_MODEL_PRIORITY, THEME_NAMES, TOOL_NAMES, type TaskComplexity, TaskScheduler, Telemetry, type TelemetryConfig, type Theme, type ThemeColors, type ThemeName, type TierConfig, type TierLimits, type TierRole, type TierStatus, type TokenUsage, type ToolCall, type ToolDefinition, type ToolExecuteOptions, ToolRegistry, type ToolResult, type ToolsConfig, VISION_MODEL_PRIORITY, type WebSearchConfig, type WebhookConfig, type WorkspaceConfig, createCascade, runCascade, streamCascade };