npm - mstro-app - Versions diffs - 0.5.1 → 0.5.6 - Mend

mstro-app 0.5.1 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (283) hide show

package/server/cli/eta-estimator.ts ADDED Viewed

@@ -0,0 +1,249 @@
+// Copyright (c) 2025-present Mstro, Inc. All rights reserved.
+/**
+ * ETA estimator for the chat composing indicator.
+ *
+ * Reads recent movements from `.mstro/history/*.json` and builds a small
+ * conditional-quantile table: for each elapsed-time checkpoint, the p50/p90
+ * of TOTAL movement duration among movements that hadn't finished yet at
+ * that elapsed time. The web indicator interpolates against this table to
+ * render "Composing · {elapsed} · ~{p50} typical · {tokens}".
+ *
+ * Why conditional-on-elapsed and not a regression on prompt features:
+ *   - prompt length is uncorrelated with duration (r≈0.05); tool count is
+ *     strong (r≈0.74) but unknown a priori. Conditioning on elapsed alone
+ *     beats a static estimate dramatically — accuracy at 5m elapsed is
+ *     ~38% MAPE vs 160% at 0s with the same lookup, because the longer the
+ *     run goes, the smaller the cohort it could still belong to.
+ *
+ * Why a quantile table and not a regression model:
+ *   - The duration distribution is heavily skewed (mean 4m20s, median 1m49s,
+ *     p99 29m). A point estimate from a regression would be misleading; the
+ *     web shows a typical/range pair so users see "around X, can be up to Y".
+ *
+ * Sample selection:
+ *   - Up to MAX_SAMPLE_FILES most recent files by mtime, keeping work bounded
+ *     and biasing toward recent behavior. Movements with durationMs < 1s or
+ *     above SANITY_CEILING_MS are dropped as outliers (cancelled before they
+ *     started, or runaway sessions that don't represent typical waits).
+ *
+ * Returns `null` when there are fewer than MIN_SAMPLES movements; the caller
+ * falls back to "no ETA" rather than inventing one from too little data.
+ */
+import { promises as fsp } from 'node:fs';
+import { join } from 'node:path';
+import type { SessionHistory } from './improvisation-types.js';
+/** Bucket boundaries (ms) at which we precompute conditional quantiles. */
+const ELAPSED_CHECKPOINTS_MS = [
+  0,           // a-priori (elapsed=0)
+  10_000,      // 10s
+  30_000,      // 30s
+  60_000,      // 1m
+  120_000,     // 2m
+  300_000,     // 5m
+  600_000,     // 10m
+  900_000,     // 15m
+  1_500_000,   // 25m
+  2_400_000,   // 40m
+  3_600_000,   // 60m
+];
+const MAX_SAMPLE_FILES = 200;
+const MIN_SAMPLES = 30;
+const SANITY_FLOOR_MS = 1_000;        // <1s = noise (errors, instant cancels)
+const SANITY_CEILING_MS = 6 * 60 * 60_000; // 6h cap
+export interface EtaBucket {
+  /** Elapsed-ms threshold for this bucket. */
+  elapsedMs: number;
+  /** Conditional p50 of TOTAL duration among movements still running at elapsedMs. */
+  p50TotalMs: number;
+  /** Conditional p90 of TOTAL duration. */
+  p90TotalMs: number;
+  /** Sample count behind this bucket. */
+  n: number;
+}
+export interface EtaProfile {
+  /** Buckets in ascending elapsedMs. */
+  buckets: EtaBucket[];
+  /** Number of movements the profile was built from. */
+  sampleSize: number;
+  /** ISO timestamp of when this profile was computed. */
+  computedAt: string;
+}
+export interface EtaPrediction {
+  /** Predicted total duration (p50). Always >= elapsed. */
+  p50TotalMs: number;
+  /** Predicted upper bound (p90). Always >= p50. */
+  p90TotalMs: number;
+  /** Sample size for the bucket used. */
+  n: number;
+}
+/**
+ * Build an EtaProfile from a `.mstro/history/` directory. Returns null if
+ * there isn't enough data to form a stable estimate.
+ */
+export async function buildEtaProfile(
+  historyDir: string,
+  opts: { maxFiles?: number } = {},
+): Promise<EtaProfile | null> {
+  const maxFiles = opts.maxFiles ?? MAX_SAMPLE_FILES;
+  const durations = await collectRecentDurations(historyDir, maxFiles);
+  if (durations.length < MIN_SAMPLES) return null;
+  return buildProfileFromDurations(durations);
+}
+/**
+ * Cached variant for the WebSocket flow: same project's many tabs ask for
+ * the same profile within minutes of each other, and rescanning 200 files
+ * each time wastes I/O. Cache by historyDir with a TTL so that fresh
+ * movements eventually feed back into the estimate.
+ *
+ * Falls back to BASELINE_ETA_PROFILE when the local history is too thin —
+ * new installs still get a sensible "Composing · Xs / ~Ys" indicator from
+ * prompt 1 instead of waiting for 30+ runs to accumulate.
+ */
+const PROFILE_CACHE_TTL_MS = 5 * 60_000; // 5 minutes
+const profileCache = new Map<string, { profile: EtaProfile | null; expiresAt: number; pending?: Promise<EtaProfile | null> }>();
+export async function getEtaProfileCached(historyDir: string): Promise<EtaProfile | null> {
+  const now = Date.now();
+  const hit = profileCache.get(historyDir);
+  if (hit && hit.expiresAt > now) return hit.profile ?? BASELINE_ETA_PROFILE;
+  if (hit?.pending) return hit.pending;
+  const pending = buildEtaProfile(historyDir).then(profile => {
+    profileCache.set(historyDir, { profile, expiresAt: Date.now() + PROFILE_CACHE_TTL_MS });
+    return profile ?? BASELINE_ETA_PROFILE;
+  }).catch(() => {
+    profileCache.set(historyDir, { profile: null, expiresAt: Date.now() + PROFILE_CACHE_TTL_MS });
+    return BASELINE_ETA_PROFILE;
+  });
+  profileCache.set(historyDir, { profile: hit?.profile ?? null, expiresAt: hit?.expiresAt ?? 0, pending });
+  return pending;
+}
+/** Test hook: clear the in-process cache. */
+export function _clearEtaCache(): void { profileCache.clear(); }
+/**
+ * Baseline profile shipped in the package so a fresh install (no
+ * `.mstro/history`) still gets a useful "typical" estimate from the very
+ * first prompt. Numbers below were computed offline from the largest
+ * available real-world history sample (mstro's own project, 379 movements
+ * spanning short Q&A through multi-hour autonomous runs); they reflect a
+ * heavy mix of chat, planning, and execution prompts. Once a project
+ * accumulates >= MIN_SAMPLES local movements its own profile takes over.
+ */
+export const BASELINE_ETA_PROFILE: EtaProfile = {
+  buckets: [
+    { elapsedMs: 0,         p50TotalMs: 108_000,  p90TotalMs: 768_000,   n: 379 },
+    { elapsedMs: 10_000,    p50TotalMs: 117_000,  p90TotalMs: 769_000,   n: 368 },
+    { elapsedMs: 30_000,    p50TotalMs: 155_000,  p90TotalMs: 860_000,   n: 328 },
+    { elapsedMs: 60_000,    p50TotalMs: 245_000,  p90TotalMs: 1_013_000, n: 252 },
+    { elapsedMs: 120_000,   p50TotalMs: 392_000,  p90TotalMs: 1_171_000, n: 182 },
+    { elapsedMs: 300_000,   p50TotalMs: 605_000,  p90TotalMs: 1_412_000, n: 116 },
+    { elapsedMs: 600_000,   p50TotalMs: 945_000,  p90TotalMs: 1_679_000, n:  58 },
+    { elapsedMs: 900_000,   p50TotalMs: 1_265_000, p90TotalMs: 1_845_000, n:  30 },
+    { elapsedMs: 1_500_000, p50TotalMs: 1_728_000, p90TotalMs: 1_986_000, n:  10 },
+  ],
+  sampleSize: 379,
+  computedAt: '2026-05-06T00:00:00.000Z',
+};
+/** Synchronously build a profile from an in-memory list of durationMs values. Exposed for tests. */
+export function buildProfileFromDurations(durationsMs: number[]): EtaProfile {
+  const cleaned = durationsMs
+    .filter(d => Number.isFinite(d) && d >= SANITY_FLOOR_MS && d <= SANITY_CEILING_MS)
+    .sort((a, b) => a - b);
+  const buckets: EtaBucket[] = [];
+  for (const elapsedMs of ELAPSED_CHECKPOINTS_MS) {
+    const stillRunning = cleaned.filter(d => d > elapsedMs);
+    if (stillRunning.length === 0) break;
+    buckets.push({
+      elapsedMs,
+      p50TotalMs: quantile(stillRunning, 0.5),
+      p90TotalMs: quantile(stillRunning, 0.9),
+      n: stillRunning.length,
+    });
+  }
+  return {
+    buckets,
+    sampleSize: cleaned.length,
+    computedAt: new Date().toISOString(),
+  };
+}
+/**
+ * Predict total duration given current elapsed ms. Returns null if the
+ * profile has no usable buckets. The returned p50 is clamped to elapsed (so
+ * the indicator never shows a typical that has already passed).
+ */
+export function predictEta(profile: EtaProfile, elapsedMs: number): EtaPrediction | null {
+  if (profile.buckets.length === 0) return null;
+  let bucket: EtaBucket = profile.buckets[0];
+  for (const b of profile.buckets) {
+    if (b.elapsedMs <= elapsedMs) bucket = b;
+    else break;
+  }
+  // If elapsed has surpassed the last bucket's p50, the run is in the long
+  // tail. Keep the last bucket's quantiles but never report a "typical" that
+  // is shorter than elapsed itself — that would be nonsensical UX.
+  const p50TotalMs = Math.max(bucket.p50TotalMs, elapsedMs);
+  const p90TotalMs = Math.max(bucket.p90TotalMs, p50TotalMs);
+  return { p50TotalMs, p90TotalMs, n: bucket.n };
+}
+// -- internals --
+async function collectRecentDurations(historyDir: string, maxFiles: number): Promise<number[]> {
+  let entries: string[];
+  try {
+    entries = (await fsp.readdir(historyDir)).filter(f => f.endsWith('.json'));
+  } catch {
+    return [];
+  }
+  if (entries.length === 0) return [];
+  // Sort by mtime DESC for recency. statting up to N files is acceptable —
+  // even a few thousand stats is sub-100ms on local disk.
+  const stats = await Promise.all(
+    entries.map(async name => {
+      try {
+        const full = join(historyDir, name);
+        const s = await fsp.stat(full);
+        return { full, mtime: s.mtimeMs };
+      } catch {
+        return null;
+      }
+    }),
+  );
+  const ordered = stats
+    .filter((x): x is { full: string; mtime: number } => x !== null)
+    .sort((a, b) => b.mtime - a.mtime)
+    .slice(0, maxFiles);
+  const durations: number[] = [];
+  for (const { full } of ordered) {
+    let raw: string;
+    try { raw = await fsp.readFile(full, 'utf-8'); } catch { continue; }
+    let data: SessionHistory;
+    try { data = JSON.parse(raw) as SessionHistory; } catch { continue; }
+    if (!Array.isArray(data.movements)) continue;
+    for (const m of data.movements) {
+      const d = m.durationMs;
+      if (typeof d === 'number' && Number.isFinite(d)) durations.push(d);
+    }
+  }
+  return durations;
+}
+function quantile(sortedAsc: number[], q: number): number {
+  if (sortedAsc.length === 0) return 0;
+  const idx = Math.min(sortedAsc.length - 1, Math.floor(sortedAsc.length * q));
+  return sortedAsc[idx];
+}

package/server/cli/headless/claude-invoker-process.ts CHANGED Viewed

@@ -138,7 +138,15 @@ export async function spawnAndRegister(
   runningProcesses: Map<number, ChildProcess>,
   perfStart: number,
 ): Promise<ChildProcess> {
-  const mcpConfigPath = generateMcpConfig(config.workingDir, config.verbose, prompt, randomUUID(), config.deployMode);
+  const askUserQuestionRouting = (config.tabId && config.mstroPort && config.bouncerSecret)
+    ? { tabId: config.tabId, port: config.mstroPort, bouncerSecret: config.bouncerSecret }
+    : undefined;
+  const mcpConfigPath = generateMcpConfig(config.workingDir, config.verbose, {
+    userPrompt: prompt,
+    sessionId: randomUUID(),
+    deployMode: config.deployMode,
+    askUserQuestionRouting,
+  });
   if (!mcpConfigPath && config.outputCallback) {
     config.outputCallback(

package/server/cli/headless/mcp-config.ts CHANGED Viewed

@@ -58,16 +58,36 @@ function truncatePrompt(prompt: string): string {
   return `${clean}... [truncated]`;
 }
+/**
+ * Routing context for the AskUserQuestion bridge. The bouncer subprocess
+ * uses these env vars to call back into the CLI server when Claude pauses
+ * on AskUserQuestion. Optional — without them the bouncer falls back to
+ * passing the tool through with no answers (same as legacy behavior).
+ */
+export interface AskUserQuestionRouting {
+  /** Local CLI server port (e.g. 4101). */
+  port: number;
+  /** Tab the question should be routed to in the web UI. */
+  tabId: string;
+  /** Per-process bouncer secret from `getBouncerSecret()`. */
+  bouncerSecret: string;
+}
+export interface GenerateMcpConfigOptions {
+  userPrompt?: string;
+  /** Unique sessionId for the per-session config file name (filename only). */
+  sessionId?: string;
+  deployMode?: boolean;
+  askUserQuestionRouting?: AskUserQuestionRouting;
+}
 /**
  * Generate MCP config with bouncer + user's MCP servers from ~/.claude.json.
  * Writes to ~/.mstro/mcp-config-{sessionId}.json for use with --mcp-config flag.
  * Per-session files prevent concurrent sessions from overwriting each other's config.
- *
- * @param userPrompt — The user's original prompt, passed to the bouncer so its
- *   AI layer can distinguish user-requested operations from prompt injection.
- * @param sessionId — Unique session identifier for per-session config isolation.
  */
-export function generateMcpConfig(workingDir: string, verbose: boolean = false, userPrompt?: string, sessionId?: string, deployMode?: boolean): string | null {
+export function generateMcpConfig(workingDir: string, verbose: boolean = false, options: GenerateMcpConfigOptions = {}): string | null {
+  const { userPrompt, sessionId, deployMode, askUserQuestionRouting } = options;
   try {
     if (!existsSync(MCP_SERVER_PATH)) {
       herror(`[${new Date().toISOString()}] MCP server not found at ${MCP_SERVER_PATH}`);
@@ -86,6 +106,11 @@ export function generateMcpConfig(workingDir: string, verbose: boolean = false,
         ? truncatePrompt(userPrompt)
         : userPrompt;
     }
+    if (askUserQuestionRouting) {
+      bouncerEnv.MSTRO_PORT = String(askUserQuestionRouting.port);
+      bouncerEnv.MSTRO_TAB_ID = askUserQuestionRouting.tabId;
+      bouncerEnv.MSTRO_BOUNCER_SECRET = askUserQuestionRouting.bouncerSecret;
+    }
     const mcpServers: Record<string, unknown> = {
       'mstro-bouncer': {

package/server/cli/headless/runner.ts CHANGED Viewed

@@ -8,6 +8,8 @@
  */
 import type { ChildProcess } from 'node:child_process';
+import { getCurrentMstroPort } from '../../services/runtime-info.js';
+import { getBouncerSecret } from '../../services/websocket/ask-user-question-bridge.js';
 import { type ClaudeInvokerOptions, executeClaudeCommand } from './claude-invoker.js';
 import { estimateTokensFromOutput } from './output-utils.js';
 import { enrichPromptWithContext } from './prompt-utils.js';
@@ -19,6 +21,22 @@ import type {
   SessionResult,
 } from './types.js';
+/**
+ * Process-wide singletons used to wire AskUserQuestion routing. Both return
+ * undefined if the server hasn't started yet (e.g. unit-test contexts that
+ * construct HeadlessRunner directly), in which case AskUserQuestion falls
+ * back to legacy "no answers" behavior.
+ */
+function readDefaultMstroPort(): number | undefined {
+  return getCurrentMstroPort();
+}
+function readDefaultBouncerSecret(): string | undefined {
+  // The bridge module's secret is generated at module-eval time, so it's
+  // always defined. We still null-coalesce in the caller for symmetry.
+  return getBouncerSecret();
+}
 // Re-export types for backward compatibility
 export type { ExecutionCheckpoint, HeadlessConfig, ImageAttachment, SessionResult, SessionState, ToolTimeoutProfile, ToolUseEvent } from './types.js';
@@ -129,6 +147,9 @@ export class HeadlessRunner {
       onToolTimeout: config.onToolTimeout,
       extraEnv: config.extraEnv,
       deployMode: config.deployMode,
+      tabId: config.tabId,
+      mstroPort: config.mstroPort ?? readDefaultMstroPort(),
+      bouncerSecret: config.bouncerSecret ?? readDefaultBouncerSecret(),
     };
   }

package/server/cli/headless/stall-assessor.ts CHANGED Viewed

@@ -10,6 +10,7 @@
  * best result, error classification) live in haiku-assessments.ts.
  */
+import type { EngineEvent } from '../../engines/EngineEvent.js';
 import { loadSkillPrompt } from '../../services/plan/agent-loader.js';
 import { spawnHaikuRaw } from './haiku-assessments.js';
 import { hlog } from './headless-logger.js';
@@ -36,6 +37,98 @@ export interface StallVerdict {
   reason: string;
 }
+/**
+ * Mutable tool-activity accumulator fed by an engine-agnostic `EngineEvent`
+ * stream. Consumed by {@link buildStallContext} to produce the tool-related
+ * fields of a {@link StallContext} without coupling to any specific engine's
+ * internal shapes.
+ */
+export interface ToolActivityState {
+  /** Tool calls observed by `tool.start` but not yet ended. */
+  pendingToolIds: Set<string>;
+  /** Names of tools still pending (used by the stall heuristic). */
+  pendingToolNames: Set<string>;
+  /** Map of toolId -> toolName so `tool.end` can drop names when the last id goes. */
+  pendingToolNameById: Map<string, string>;
+  /** Last tool name seen via `tool.start`. */
+  lastToolName?: string;
+  /** Short summary of the last tool input (url/query/command/prompt). */
+  lastToolInputSummary?: string;
+  /** Total number of `tool.start` events observed this session. */
+  totalToolCalls: number;
+}
+/** Allocate a fresh, empty tool-activity state. */
+export function createToolActivityState(): ToolActivityState {
+  return {
+    pendingToolIds: new Set(),
+    pendingToolNames: new Set(),
+    pendingToolNameById: new Map(),
+    totalToolCalls: 0,
+  };
+}
+/**
+ * Update a {@link ToolActivityState} from a single engine event. Non-tool
+ * events are ignored. This lets the stall assessor operate on any
+ * CodingAgentEngine's event stream without knowing the engine's internals.
+ */
+export function applyEngineEventToActivity(state: ToolActivityState, event: EngineEvent): void {
+  if (event.kind === 'tool.start') {
+    state.pendingToolIds.add(event.toolCallId);
+    state.pendingToolNames.add(event.toolName);
+    state.pendingToolNameById.set(event.toolCallId, event.toolName);
+    state.lastToolName = event.toolName;
+    state.lastToolInputSummary = summarizeToolInput(event.input);
+    state.totalToolCalls++;
+    return;
+  }
+  if (event.kind === 'tool.end') {
+    state.pendingToolIds.delete(event.toolCallId);
+    state.pendingToolNameById.delete(event.toolCallId);
+    // Only drop the name from pendingToolNames if no other pending call uses it.
+    const stillPending = Array.from(state.pendingToolNameById.values()).includes(event.toolName);
+    if (!stillPending) state.pendingToolNames.delete(event.toolName);
+  }
+}
+/**
+ * Build a {@link StallContext} from an engine-agnostic activity state plus
+ * the caller-owned timing fields. The stall heuristics and Haiku assessment
+ * in this module already operate on {@link StallContext}, so they are now
+ * fully drivable by any CodingAgentEngine's event stream.
+ */
+export function buildStallContext(
+  activity: ToolActivityState,
+  timing: {
+    originalPrompt: string;
+    silenceMs: number;
+    elapsedTotalMs: number;
+    tokenSilenceMs?: number;
+  },
+): StallContext {
+  return {
+    originalPrompt: timing.originalPrompt,
+    silenceMs: timing.silenceMs,
+    elapsedTotalMs: timing.elapsedTotalMs,
+    tokenSilenceMs: timing.tokenSilenceMs,
+    lastToolName: activity.lastToolName,
+    lastToolInputSummary: activity.lastToolInputSummary,
+    pendingToolCount: activity.pendingToolIds.size,
+    pendingToolNames: new Set(activity.pendingToolNames),
+    totalToolCalls: activity.totalToolCalls,
+  };
+}
+function summarizeToolInput(input: Record<string, unknown>): string | undefined {
+  if (input.url) return `URL: ${String(input.url).slice(0, 200)}`;
+  if (input.query) return `Query: ${String(input.query).slice(0, 200)}`;
+  if (input.command) return `Command: ${String(input.command).slice(0, 200)}`;
+  if (input.prompt) return `Prompt: ${String(input.prompt).slice(0, 200)}`;
+  const serialized = JSON.stringify(input);
+  return serialized ? serialized.slice(0, 200) : undefined;
+}
 // ========== Fast Heuristic ==========
 function hasSubagentPending(pendingNames: Set<string>, lastToolName: string | undefined, hasPendingTools: boolean): boolean {

package/server/cli/headless/tool-watchdog.ts CHANGED Viewed

@@ -13,6 +13,7 @@
  * 3. Haiku tiebreaker: optional AI assessment before killing ambiguous cases
  */
+import type { EngineEvent } from '../../engines/EngineEvent.js';
 import { hlog } from './headless-logger.js';
 import type {
   ExecutionCheckpoint,
@@ -349,6 +350,26 @@ export class ToolWatchdog {
     }, extensionMs);
   }
+  /**
+   * Drive the watchdog from an engine-agnostic `EngineEvent` stream.
+   * Routes `tool.start` to `startWatch`, and `tool.end` to `clearWatch` +
+   * `recordCompletion` — so any CodingAgentEngine (Claude Code, OpenCode)
+   * can feed this watchdog without leaking engine-specific shapes.
+   * Non-tool events are ignored.
+   */
+  onEngineEvent(event: EngineEvent, onTimeout: (toolId: string) => void): void {
+    if (event.kind === 'tool.start') {
+      this.startWatch(event.toolCallId, event.toolName, event.input, () => onTimeout(event.toolCallId));
+      return;
+    }
+    if (event.kind === 'tool.end') {
+      this.clearWatch(event.toolCallId);
+      if (typeof event.durationMs === 'number' && event.durationMs >= 0) {
+        this.recordCompletion(event.toolName, event.durationMs);
+      }
+    }
+  }
   /** Stop watching a tool (it completed normally) */
   clearWatch(toolId: string): void {
     const watch = this.activeWatches.get(toolId);

package/server/cli/headless/types.ts CHANGED Viewed

@@ -129,6 +129,18 @@ export interface HeadlessConfig {
   disallowedTools?: string[];
   /** Enable deploy-mode patterns in the bouncer (stricter rules for end-user-driven sessions) */
   deployMode?: boolean;
+  /**
+   * Tab id used to route AskUserQuestion calls back to the right web client.
+   * When set together with `mstroPort` and `bouncerSecret`, the MCP bouncer
+   * pauses Claude on AskUserQuestion and waits for the user to answer in the
+   * web UI before resuming. When unset (e.g. CLI ad-hoc runs), the bouncer
+   * falls back to legacy behavior (allow with no answers).
+   */
+  tabId?: string;
+  /** CLI server port for the AskUserQuestion bridge. Pairs with `tabId`. */
+  mstroPort?: number;
+  /** Per-process bouncer secret for the AskUserQuestion bridge. Pairs with `tabId`. */
+  bouncerSecret?: string;
 }
 export interface SessionState {
@@ -215,7 +227,7 @@ export interface ExecutionResult {
 }
 /** Resolved config with all defaults applied */
-export type ResolvedHeadlessConfig = Omit<Required<HeadlessConfig>, 'outputCallback' | 'thinkingCallback' | 'toolUseCallback' | 'tokenUsageCallback' | 'continueSession' | 'claudeSessionId' | 'imageAttachments' | 'model' | 'effortLevel' | 'toolTimeoutProfiles' | 'onToolTimeout' | 'extraEnv' | 'disallowedTools' | 'deployMode'> & {
+export type ResolvedHeadlessConfig = Omit<Required<HeadlessConfig>, 'outputCallback' | 'thinkingCallback' | 'toolUseCallback' | 'tokenUsageCallback' | 'continueSession' | 'claudeSessionId' | 'imageAttachments' | 'model' | 'effortLevel' | 'toolTimeoutProfiles' | 'onToolTimeout' | 'extraEnv' | 'disallowedTools' | 'deployMode' | 'tabId' | 'mstroPort' | 'bouncerSecret'> & {
   outputCallback?: (text: string) => void;
   thinkingCallback?: (text: string) => void;
   toolUseCallback?: (event: ToolUseEvent) => void;
@@ -230,6 +242,9 @@ export type ResolvedHeadlessConfig = Omit<Required<HeadlessConfig>, 'outputCallb
   extraEnv?: Record<string, string>;
   disallowedTools?: string[];
   deployMode?: boolean;
+  tabId?: string;
+  mstroPort?: number;
+  bouncerSecret?: string;
 };

package/server/cli/improvisation-history-store.ts CHANGED Viewed

@@ -39,7 +39,9 @@ export function loadHistory(historyPath: string, sessionId: string): SessionHist
   if (existsSync(historyPath)) {
     try {
       const data = readFileSync(historyPath, 'utf-8');
-      return JSON.parse(data) as SessionHistory;
+      const parsed = JSON.parse(data) as SessionHistory;
+      if (!parsed.engine) parsed.engine = 'claude-code';
+      return parsed;
     } catch (error) {
       herror('Failed to load history:', error);
     }
@@ -51,6 +53,7 @@ export function loadHistory(historyPath: string, sessionId: string): SessionHist
     lastActivityAt: now,
     totalTokens: 0,
     movements: [],
+    engine: 'claude-code',
   };
 }

package/server/cli/improvisation-output-queue.ts CHANGED Viewed

@@ -4,12 +4,29 @@
  * Small FIFO output buffer with a fixed-interval flush timer, used by the
  * improvisation session manager to coalesce rapid stdout writes into
  * steady `onOutput` emissions.
+ *
+ * ## Why coalesce inside `flush()`
+ *
+ * Claude's stdout arrives as many small chunks during streaming. Each chunk
+ * lands here via `queue_`. When `flush()` ran one `onEmit` per queued chunk,
+ * a streaming-heavy run produced thousands of `onOutput` events per minute,
+ * each becoming a tab-scoped broadcast that consumes a slot in the per-tab
+ * replay buffer (`tab-event-buffer.ts`). For 14-min runs with ~120 tool
+ * calls, that easily exceeded the buffer's 1000-event cap and triggered
+ * silent replay gaps on web reconnect.
+ *
+ * The flush window (50ms) is below the human-perceptible paint threshold and
+ * below WebSocket roundtrip latency, so concatenating all queued text into a
+ * single `onEmit` per tick is invisible to the user but cuts buffer pressure
+ * by 3-10× during streaming. No call site downstream depends on chunk
+ * boundaries — `onOutput` consumers (terminal renderer, history persistence)
+ * already treat the text as an opaque append.
  */
 const FLUSH_INTERVAL_MS = 50;
 export class OutputQueue {
-  private queue: Array<{ text: string; timestamp: number }> = [];
+  private queue: string[] = [];
   private timer: NodeJS.Timeout | null = null;
   constructor(private readonly onEmit: (text: string) => void) {}
@@ -20,15 +37,20 @@ export class OutputQueue {
   }
   queue_(text: string): void {
-    this.queue.push({ text, timestamp: Date.now() });
+    if (text.length === 0) return;
+    this.queue.push(text);
   }
-  /** Drain all buffered entries, emitting each via `onEmit` in order. */
+  /**
+   * Drain all buffered entries, emitting them as a single concatenated
+   * string via `onEmit`. Order is preserved (FIFO). No-op when the queue is
+   * empty so the periodic timer doesn't fire spurious empty-string emits.
+   */
   flush(): void {
-    while (this.queue.length > 0) {
-      const item = this.queue.shift();
-      if (item) this.onEmit(item.text);
-    }
+    if (this.queue.length === 0) return;
+    const merged = this.queue.join('');
+    this.queue.length = 0;
+    this.onEmit(merged);
   }
   /** Stop the flush timer. Does NOT drain; call `flush()` first if needed. */