npm - @1presence/bridge - Versions diffs - 0.40.0 → 0.42.0 - Mend

@1presence/bridge 0.40.0 → 0.42.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/claude.js CHANGED Viewed

@@ -1,42 +1,41 @@
-"use strict";
-Object.defineProperty(exports, "__esModule", { value: true });
-exports.SECTION_COLORS = void 0;
-exports.setVerbose = setVerbose;
-exports.setDebug = setDebug;
-exports.paint = paint;
-exports.spawnClaude = spawnClaude;
-exports.killAll = killAll;
-exports.cancelConversation = cancelConversation;
-const child_process_1 = require("child_process");
-const fs_1 = require("fs");
-const os_1 = require("os");
-const path_1 = require("path");
+import { mkdirSync, writeFileSync, readFileSync } from 'fs';
+import { tmpdir } from 'os';
+import { join } from 'path';
+import { query } from '@anthropic-ai/claude-agent-sdk';
+// ─── Engine ────────────────────────────────────────────────────────────────────
+//
+// The bridge drives the local Claude Code install through the Claude Agent SDK's
+// query() function — the same documented entrypoint Claude Code itself uses. It
+// runs on the user's claude.ai subscription (Keychain OAuth, no API key), gives
+// structured streaming, and lets Claude manage its own subprocess lifecycle and
+// transient-error retries (which is why this file no longer carries the manual
+// print-mode respawn loop the old `claude --print` path needed — see
+// vault/Bugs.md and "Local Mode — Bridge Internals" in the vault). All
+// product/tool/UI/glossary/disclosure/memory rules still come from the dynamic
+// system prompt fetched from agent-api (mode=bridge) and are passed in as
+// Options.systemPrompt — never baked into this package.
 // ─── Bridge working directory ─────────────────────────────────────────────────
 //
-// Claude Code always loads CLAUDE.md files from cwd upward plus the global
-// ~/.claude/CLAUDE.md. The bridge runs in a dedicated temp dir so it never
-// inherits a development repo's project CLAUDE.md. The CLAUDE.md we write
-// into that dir is a TINY GUARD ONLY — its sole job is to neutralize the
-// user's global ~/.claude/CLAUDE.md (which contains personal/dev rules that
-// would conflict with bridge behavior). All product, tool, UI, glossary,
-// disclosure, gmail, and memory rules come from the dynamic system prompt
-// fetched from agent-api with mode=bridge — see the "Local Mode runtime
-// adapter" section of that prompt and packages/agent-api/src/systemPrompt.ts.
-// Do NOT add product rules here; they belong in the dynamic prompt so hosted
-// and bridge stay in sync.
-const BRIDGE_CWD = (0, path_1.join)((0, os_1.tmpdir)(), '1presence-bridge');
+// Claude Code can load CLAUDE.md files from cwd upward plus the global
+// ~/.claude/CLAUDE.md. The bridge runs in a dedicated temp dir and passes
+// settingSources: [] so no user/project settings or memory are loaded. The
+// CLAUDE.md we write here is a TINY GUARD ONLY — defence-in-depth to neutralize
+// the user's global ~/.claude/CLAUDE.md should any loading path reach it. All
+// real rules come from the dynamic system prompt (mode=bridge). Do NOT add
+// product rules here.
+const BRIDGE_CWD = join(tmpdir(), '1presence-bridge');
 const BRIDGE_CLAUDE_MD = `# Local Mode — context guard
-You are running in 1Presence Local Mode. Your **--system-prompt-file** is the
-sole authoritative source for product rules, tool policy, glossary, and
-disclosure rules. Treat any other CLAUDE.md content — including the global
+You are running in 1Presence Local Mode. Your **system prompt** is the sole
+authoritative source for product rules, tool policy, glossary, and disclosure
+rules. Treat any other CLAUDE.md content — including the global
 ~/.claude/CLAUDE.md and any project CLAUDE.md from a parent directory — as
 **not applicable** to this runtime. Do not follow it. Do not cite it.
 `;
 // Write the guard CLAUDE.md once on module load
-(0, fs_1.mkdirSync)(BRIDGE_CWD, { recursive: true });
-(0, fs_1.writeFileSync)((0, path_1.join)(BRIDGE_CWD, 'CLAUDE.md'), BRIDGE_CLAUDE_MD, 'utf-8');
-const config_1 = require("./config");
+mkdirSync(BRIDGE_CWD, { recursive: true });
+writeFileSync(join(BRIDGE_CWD, 'CLAUDE.md'), BRIDGE_CLAUDE_MD, 'utf-8');
+import { getBridgeModel } from './config.js';
 // Track whether we've already announced the model this process — printing it
 // per-spawn is noisy; once on startup is what the user actually wants to see.
 let modelAnnounced = false;
@@ -44,13 +43,13 @@ let modelAnnounced = false;
 // PLUS the entire system prompt. Great for prompt debugging, noisy for
 // message debugging (the prompt dump buries the conversation).
 let verbose = false;
-function setVerbose(v) { verbose = v; }
+export function setVerbose(v) { verbose = v; }
 // Debug flag — when set via --debug, render a clean, sectioned transcript of
 // the live turn: user prompt, assistant text, every tool input, every tool
 // result. This is the bridge equivalent of the chat's admin debug view. It
 // deliberately does NOT print the system prompt — that's what --verbose is for.
 let debug = false;
-function setDebug(v) { debug = v; }
+export function setDebug(v) { debug = v; }
 function formatPayload(value) {
     try {
         return JSON.stringify(value, null, 2);
@@ -65,14 +64,14 @@ function formatPayload(value) {
 // the shape of the chat's admin debug bubbles (user / assistant / tool input /
 // tool result) so what you see locally mirrors what an admin sees in the app.
 const USE_COLOR = process.stderr.isTTY === true && !process.env['NO_COLOR'];
-function paint(code, s) {
+export function paint(code, s) {
     return USE_COLOR ? `\x1b[${code}m${s}\x1b[0m` : s;
 }
 // ANSI colour codes per section, mirroring the admin debug palette. Shared
 // across all three console modes (debug / verbose / normal) so the same kind
 // of content is always the same colour — system prompts magenta, user prompts
 // blue, assistant text green, tool inputs cyan, tool results yellow.
-exports.SECTION_COLORS = {
+export const SECTION_COLORS = {
     system: '35', // magenta
     user: '34', // blue
     assistant: '32', // green
@@ -108,79 +107,96 @@ function summariseHistoryBlock(block) {
 // can tell user turns from assistant turns at a glance — the missing
 // distinction that made replayed context unreadable in --debug.
 function renderHistoryMessage(msg) {
-    const color = msg.role === 'user' ? exports.SECTION_COLORS.user : exports.SECTION_COLORS.assistant;
+    const color = msg.role === 'user' ? SECTION_COLORS.user : SECTION_COLORS.assistant;
     const body = typeof msg.content === 'string'
         ? msg.content
         : msg.content.map(summariseHistoryBlock).join('\n');
     debugBlock(`${msg.role} · history`, color, body);
 }
-// ─── Active processes ─────────────────────────────────────────────────────────
-const active = new Map();
-// conversationId → pending retry timer. A retry is scheduled with a backoff
-// delay, during which the conversation has NO entry in `active`. If a new user
-// message arrives in that window it must cancel the stale retry (otherwise the
-// retry would re-run the OLD turn's history and clobber the new one). The
-// supersede block clears any pending timer here before spawning.
-const pendingRetries = new Map();
-// Automatic retries when the `claude` CLI exits non-zero BEFORE producing any
-// real output. This covers the known Claude Code print-mode 400 regression that
-// surfaces as "API Error: 400 due to tool use concurrency issues" (GitHub
-// anthropics/claude-code#18131, still open) — it is non-deterministic enough
-// that a fresh spawn often succeeds. We retry ONLY when the failed attempt
-// produced no real assistant text and no tool calls, so a failure that lands
-// after real work (where retrying could double-execute a side-effectful tool)
-// is surfaced, never silently re-run.
+// ─── Active turns ───────────────────────────────────────────────────────────────
 //
-// 2 retries = up to 3 attempts/turn. The first retry captures nearly all of the
-// transient wins; further attempts buy little on transient failures but add
-// latency and re-send the full (1M-context) history again on deterministic ones
-// — see vault Bugs.md. Retries use escalating backoff (avoids a subscription
-// rate-limit cascade from rapid re-spawns) and stop once total retry time
-// exceeds the wall-clock cap, so a slow-failing attempt can't strand the user.
-// All below the SSE boundary, so the user sees only a slightly longer
-// "thinking" gap, never an intermediate error.
-const MAX_TURN_RETRIES = 2;
-const RETRY_BACKOFF_BASE_MS = 750; // delay = base * attempt# → 750ms, 1500ms
-const RETRY_WALL_CLOCK_CAP_MS = 12_000; // stop retrying past this much elapsed
-// Map a non-zero CLI exit + any captured "API Error:" line to a concise,
+// conversationId → AbortController for the in-flight query(). Aborting cancels
+// the turn (supersede on a new message, or the Stop button via the gateway's
+// `cancel` frame). The SDK's query() loop ends when its controller aborts.
+const active = new Map();
+// Map a thrown query() error / captured "API Error:" text to a concise,
 // user-facing Local Mode message. The raw upstream text stays in operator logs
 // only — we never echo a wall of provider error JSON into the chat. Referring
 // to "Claude Code" here is intentional and consistent with Local Mode's other
-// operational errors (e.g. the "claude CLI not found" message): in Local Mode
-// the user is knowingly running their own Claude Code install.
-//
-// NOTE on the 400 tool-use case: this is an open Claude Code print-mode
-// regression (introduced in 2.1.19, still present in 2.1.146 — the current
-// latest), so upgrading does NOT fix it. We deliberately do not suggest an
-// upgrade; the automatic retry is the real mitigation and resending sometimes
-// gets through.
-function describeCliFailure(code, apiErrorText, authFailure) {
+// operational errors: in Local Mode the user is knowingly running their own
+// Claude Code install.
+function describeCliFailure(apiErrorText, authFailure) {
     const t = apiErrorText.trim();
     // Auth/credential failure (401/403). Local Mode runs the user's own Claude
-    // Code, so naming it (and /login) is intentional and consistent with the
-    // "claude CLI not found" message — this is the only place that can tell them
-    // how to recover. Takes precedence over the generic branches below.
+    // Code, so naming it (and /login) is intentional — this is the only place
+    // that can tell them how to recover. Takes precedence over generic branches.
     if (authFailure) {
         return 'Local Mode could not sign in to Claude Code on this machine. Open a terminal, run `claude` and sign in (or run /login inside Claude Code), then send your message again.';
     }
-    if (/API Error:\s*400/i.test(t) && /(tool use|concurren|parallel)/i.test(t)) {
-        return 'Local Mode hit a known Claude Code error (a print-mode bug that affects every current version). I retried a few times automatically — sending the message again sometimes gets through. See https://github.com/anthropics/claude-code/issues/18131';
-    }
     if (/^API Error:/i.test(t)) {
         return `Local Mode error from Claude Code: ${t.replace(/^API Error:\s*/i, '').trim()}`;
     }
-    return `Local Mode stopped unexpectedly (claude exited with code ${code ?? 'unknown'}). Please try again.`;
+    if (t) {
+        return `Local Mode stopped unexpectedly: ${t}`;
+    }
+    return 'Local Mode stopped unexpectedly. Please try again.';
+}
+// ─── Prompt construction ─────────────────────────────────────────────────────────
+//
+// The gateway pushes the FULL conversation (sanitised via @presence/shared
+// toModelMessages) and `history` already ends with the new user turn. The SDK's
+// streaming input only triggers an assistant turn for user messages whose
+// `shouldQuery` is not false — so we replay every PRIOR turn with
+// shouldQuery:false (appended to the transcript, no turn generated), inject
+// assistant turns verbatim (carrying their tool_use blocks), and let ONLY the
+// final/live user turn run. This preserves stateless structured replay: no
+// session resume, no flat-text collapse, no local jsonl — Firestore stays the
+// single source of truth, exactly as the CLI stdin replay did.
+function buildPromptMessages(history) {
+    // Index of the live user turn — the last user-role message. The gateway
+    // always appends it; the scan is defensive against an unexpected tail.
+    let liveIdx = -1;
+    for (let i = history.length - 1; i >= 0; i--) {
+        if (history[i].role === 'user') {
+            liveIdx = i;
+            break;
+        }
+    }
+    const out = [];
+    history.forEach((msg, i) => {
+        // Normalise to array-of-blocks (a bare string becomes a single text block).
+        const content = Array.isArray(msg.content)
+            ? msg.content
+            : [{ type: 'text', text: typeof msg.content === 'string' ? msg.content : '' }];
+        if (msg.role === 'assistant') {
+            // Injected verbatim — runtime accepts a {type:'assistant'} message on the
+            // input stream and appends it to the transcript (it never triggers a turn).
+            out.push({ type: 'assistant', message: { role: 'assistant', content }, parent_tool_use_id: null });
+        }
+        else {
+            const isLive = i === liveIdx;
+            out.push({
+                type: 'user',
+                ...(isLive ? {} : { shouldQuery: false }),
+                message: { role: 'user', content },
+                parent_tool_use_id: null,
+            });
+        }
+    });
+    return out;
 }
-// ─── Spawn ────────────────────────────────────────────────────────────────────
-function spawnClaude(params) {
+async function* promptStream(messages) {
+    for (const m of messages)
+        yield m;
+}
+// ─── Spawn (drive one turn through the SDK) ──────────────────────────────────────
+export function spawnClaude(params) {
     const { conversationId, presenceSessionId, text, uid, history, vaultFileOpen, clientCapabilities, syncedFolders, onEvent, onDone, onError, onNotice } = params;
-    const attemptIdx = params._attemptIdx ?? 0;
-    const firstAttemptAt = params._firstAttemptAt ?? Date.now();
-    const systemPromptPath = (0, path_1.join)((0, os_1.tmpdir)(), `agent-${uid}.md`);
-    const mcpConfigPath = (0, path_1.join)((0, os_1.tmpdir)(), `mcp-${uid}.json`);
+    const systemPromptPath = join(tmpdir(), `agent-${uid}.md`);
+    const mcpConfigPath = join(tmpdir(), `mcp-${uid}.json`);
     if (verbose) {
         process.stderr.write(paint('90', `[bridge:verbose] cwd:           ${BRIDGE_CWD}`) + '\n');
-        process.stderr.write(paint('90', `[bridge:verbose] override md:   ${(0, path_1.join)(BRIDGE_CWD, 'CLAUDE.md')}`) + '\n');
+        process.stderr.write(paint('90', `[bridge:verbose] override md:   ${join(BRIDGE_CWD, 'CLAUDE.md')}`) + '\n');
         process.stderr.write(paint('90', `[bridge:verbose] system prompt: ${systemPromptPath}`) + '\n');
         process.stderr.write(paint('90', `[bridge:verbose] mcp config:    ${mcpConfigPath}`) + '\n');
         process.stderr.write(paint('90', `[bridge:verbose] session id:    ${presenceSessionId}`) + '\n');
@@ -189,22 +205,12 @@ function spawnClaude(params) {
     }
     // Surface the user's UID before the session line in every mode — it's the
     // Firestore doc prefix (`sessions/<uid>_<conversationId>`), so logging it
-    // makes a reported bridge failure correlatable to the stored session without
-    // having to ask which account hit it. The CLI's own `--session-id` is
-    // ephemeral and is NOT the Firestore conversationId, so the uid is the key
-    // join column when debugging.
+    // makes a reported bridge failure correlatable to the stored session.
     process.stderr.write(`[bridge] user ${uid}\n`);
-    // Debug transcript: lead with the user prompt for this turn (the clean
-    // message, before the gateway's ephemeral-context prefix), plus the session
-    // id (correlates with the chat URL / Firestore session doc) and a hint at
-    // how much prior context is being replayed.
+    // Debug transcript: lead with the prior context (replayed history) then the
+    // live user prompt. `history` already ends with the new user turn.
     if (debug || verbose) {
         process.stderr.write(`\n${paint('1', `══ session ${presenceSessionId} ══`)}\n`);
-        // `history` already ends with the new user prompt (gateway-appended). Render
-        // every PRIOR message with its role colour so what the model saw as context
-        // is auditable — then the live turn is shown via the clean `text` below and
-        // streams in beneath it. Defensive: if the tail isn't the current user turn
-        // (unexpected), render the whole history rather than dropping a message.
         const tail = history[history.length - 1];
         const prior = tail?.role === 'user' ? history.slice(0, -1) : history;
         if (prior.length > 0) {
@@ -212,456 +218,366 @@ function spawnClaude(params) {
             for (const msg of prior)
                 renderHistoryMessage(msg);
         }
-        debugBlock('user · this turn', exports.SECTION_COLORS.user, text);
+        debugBlock('user · this turn', SECTION_COLORS.user, text);
     }
     else {
-        // Default mode is quiet, but always surface the session id once per turn so
-        // it can be matched to the chat URL / Firestore session doc when debugging.
         process.stderr.write(`[bridge] session ${presenceSessionId}\n`);
     }
-    // tool_use_id → tool name, so a tool_result block (which only carries the id)
-    // can be labelled with the tool it answers in the debug transcript.
-    const toolNames = new Map();
-    // If a prior process is still running for this conversation (user sent a
-    // follow-up before the previous turn finished), supersede it. The latest
-    // user intent wins; the orphan would otherwise keep streaming events.
-    const existing = active.get(conversationId);
-    if (existing) {
-        process.stderr.write(`[bridge] superseding active conversation ${conversationId}\n`);
-        existing.kill('SIGTERM');
-        active.delete(conversationId);
-    }
-    // Cancel any retry scheduled for this conversation that hasn't fired yet.
-    // Without this, a new user message arriving during a retry's backoff window
-    // would race the stale retry — which carries the OLD turn's history and would
-    // clobber the new turn. Skip when this call IS the retry firing (attemptIdx>0,
-    // the timer already deleted itself before invoking us).
-    if (attemptIdx === 0) {
-        const pending = pendingRetries.get(conversationId);
-        if (pending) {
-            clearTimeout(pending);
-            pendingRetries.delete(conversationId);
-            process.stderr.write(`[bridge] cancelled pending retry for ${conversationId} (superseded by new turn)\n`);
-        }
-    }
-    // Note: ephemeral context (vault_file_open / client_capabilities / synced_folders)
-    // is injected into the last user message by the gateway BEFORE history is
-    // sent over the WS. The bridge no longer constructs `userMessageText` —
-    // `history` is the authoritative stream and already contains the new user
-    // prompt with prefix prepended. The `text`, `vaultFileOpen`,
-    // `clientCapabilities`, `syncedFolders` SpawnParams are retained for
-    // backward-compatible logging / spool correlation only.
+    // ephemeral context (vault_file_open / client_capabilities / synced_folders) is
+    // injected into the last user message by the gateway BEFORE history is sent —
+    // these params are retained for backward-compatible logging only.
     void vaultFileOpen;
     void clientCapabilities;
     void syncedFolders;
     void text;
-    // Lockdown rationale:
-    // - `--tools ""` disables ALL built-in tools (Bash/Read/Write/Edit/Glob/Grep/
-    //   WebFetch/etc.). MCP tools are not "built-in" so the 1Presence MCP surface
-    //   remains available.
-    // - `--setting-sources ""` prevents claude CLI from loading the user's
-    //   ~/.claude/settings.json (and project/.local equivalents). Without this,
-    //   permissive `permissions.allow` rules in the user's personal Claude Code
-    //   config would silently re-enable Bash/Edit/Write etc. inside the bridge.
-    // - `--strict-mcp-config` keeps the MCP surface to exactly what we wire in
-    //   via --mcp-config. Together these guarantee the bridge can only call
-    //   `mcp__1presence__*` — no filesystem, no shell, no arbitrary network.
-    //
-    // Session continuity rationale:
-    // - `--input-format stream-json` accepts structured user/assistant messages
-    //   on stdin. We replay prior turns (loaded by the gateway from Firestore)
-    //   followed by the new user turn — this is how the bridge sees history.
-    // - `--no-session-persistence` keeps no jsonl on disk. The bridge has zero
-    //   local filesystem dependency for continuity; Firestore is the only
-    //   source of truth.
-    // - `--session-id <uuid>` must be a fresh UUID per spawn: the CLI treats
-    //   this flag as a "claim a new session ID" operation and rejects the
-    //   second spawn with "Session ID X is already in use" if we reuse one
-    //   across turns of a chat — even with --no-session-persistence. The
-    //   bridge passes the per-spawn `conversationId` here; the presence
-    //   sessionId is correlated separately via bridge logs and spool records.
-    // The CLI treats --session-id as "claim this new session ID" and rejects a
-    // reused id with "Session ID X is already in use". A retry is a fresh spawn,
-    // so it MUST use a new uuid; the first attempt keeps the correlation id.
-    const spawnSessionId = attemptIdx === 0 ? presenceSessionId : crypto.randomUUID();
-    const args = [
-        '--print',
-        '--input-format', 'stream-json',
-        '--output-format', 'stream-json',
-        '--verbose',
-        '--tools', '',
-        '--setting-sources', '',
-        '--allowedTools', 'mcp__1presence__*',
-        '--system-prompt-file', systemPromptPath,
-        '--mcp-config', mcpConfigPath,
-        '--strict-mcp-config',
-        '--no-session-persistence',
-        '--session-id', spawnSessionId,
-    ];
-    const pinnedModel = (0, config_1.getBridgeModel)();
-    if (pinnedModel) {
-        args.push('--model', pinnedModel);
-    }
-    // Strip API key so Claude Code uses the user's claude.ai Pro subscription
-    // (OAuth credentials), not an API key that would bill to a separate account.
-    const { ANTHROPIC_API_KEY: _stripped, ...safeEnv } = process.env;
-    const proc = (0, child_process_1.spawn)('claude', args, {
-        cwd: BRIDGE_CWD,
-        env: safeEnv,
-        stdio: ['pipe', 'pipe', 'pipe'],
-    });
-    active.set(conversationId, proc);
-    // Feed the full conversation via stdin as stream-json. The gateway's
-    // early-save committed the new user message to Firestore BEFORE building
-    // `history`, so `history` already ends with the new user prompt (with the
-    // ephemeral context prefix prepended by the gateway). The bridge no longer
-    // appends a separate `newTurn` — doing so would duplicate the user prompt.
-    // Sanitisation (orphan tool_use stripping, displayOnly filtering, consecutive
-    // same-role merging) already happened on the gateway via
-    // @presence/shared.toModelMessages — replay the history verbatim.
-    try {
-        const stdin = proc.stdin;
-        if (!stdin) {
-            throw new Error('claude stdin is null — spawn must use stdio[0]="pipe"');
-        }
-        for (const msg of history) {
-            // Normalise to array-of-blocks: Claude Code's stream-json input parser
-            // iterates `content` directly. A string slips into a `"tool_use_id" in
-            // <char>` check inside the CLI and aborts the process with `W is not an
-            // Object` (JSC) / exit 1 mid-turn. The gateway also normalises before
-            // sending, so a current gateway + any bridge version is safe; this guard
-            // covers older gateways and ad-hoc local replay tests.
-            const content = Array.isArray(msg.content)
-                ? msg.content
-                : [{ type: 'text', text: typeof msg.content === 'string' ? msg.content : '' }];
-            const wrapped = { type: msg.role, message: { role: msg.role, content } };
-            stdin.write(JSON.stringify(wrapped) + '\n');
-        }
-        stdin.end();
-    }
-    catch (err) {
-        process.stderr.write(`[bridge] failed to write stdin: ${err.message}\n`);
-        proc.kill('SIGTERM');
-        onError(`stdin write failed: ${err.message}`, null, null);
-        return;
+    // Supersede any in-flight turn for this conversation (user sent a follow-up
+    // before the previous turn finished). The latest intent wins.
+    const existing = active.get(conversationId);
+    if (existing) {
+        process.stderr.write(`[bridge] superseding active conversation ${conversationId}\n`);
+        existing.abort();
+        active.delete(conversationId);
     }
+    const abort = new AbortController();
+    active.set(conversationId, abort);
+    // tool_use_id → tool name, so a tool_result block (which only carries the id)
+    // can be labelled with the tool it answers in the debug transcript.
+    const toolNames = new Map();
+    // Per-turn accounting.
     let sessionIdExtracted = false;
     let messageCount = 0;
     let costUsd = 0;
     let usage = null;
     // Prompt size of the MOST RECENT assistant call (input + both cache buckets),
     // overwritten on each assistant event so it ends on the turn's final, fullest
-    // call. This — not the summed `usage` above — is the current context fill the
-    // status line's 🧠 segment reports against the model's window.
+    // call. This — not the summed `usage` — is the current context fill the status
+    // line's 🧠 segment reports against the model's window.
     let lastContextTokens = 0;
     let extractedModel = null;
-    let buffer = '';
     let killedForViolation = false;
-    // Retry/error-surfacing tracking for this attempt:
-    //  - sawApiError: the CLI emitted an "API Error:" assistant text event (the
-    //    way Claude Code reports an underlying API failure mid-turn).
-    //  - apiErrorText: that text, captured for describeCliFailure().
-    //  - producedRealOutput: any real assistant text or tool_use was emitted, so
-    //    a later failure must NOT be retried (could double-run a side-effect).
     let sawApiError = false;
-    //  - sawAuthFailure: a 401/403 auth/credential failure (the user's local
-    //    Claude Code is not signed in). Surfaced as an actionable message and
-    //    never retried (re-spawning won't add credentials).
     let sawAuthFailure = false;
     let apiErrorText = '';
     let producedRealOutput = false;
-    proc.stdout.on('data', (chunk) => {
-        buffer += chunk.toString('utf-8');
-        const lines = buffer.split('\n');
-        buffer = lines.pop() ?? '';
-        for (const line of lines) {
-            const trimmed = line.trim();
-            if (!trimmed)
-                continue;
-            let event;
-            try {
-                event = JSON.parse(trimmed);
+    // Allow only the 1Presence MCP surface to execute. Built-in tools are disabled
+    // via extraArgs `--tools ""`; this is the runtime safety net (a hard deny that
+    // runs before any execution) for anything that slips past. Our MCP tools are
+    // auto-approved via allowedTools, so this callback only ever fires to deny.
+    const canUseTool = async (toolName, input) => {
+        if (toolName.startsWith('mcp__1presence__'))
+            return { behavior: 'allow', updatedInput: input };
+        return { behavior: 'deny', message: `Tool ${toolName} is not allowed in Local Mode`, interrupt: true };
+    };
+    // Strip API key so Claude Code uses the user's claude.ai subscription (OAuth
+    // credentials in the Keychain), not an API key that would bill a separate
+    // account. Options.env REPLACES the subprocess env, so spread the rest through.
+    const { ANTHROPIC_API_KEY: _stripped, ...safeEnv } = process.env;
+    const pinnedModel = getBridgeModel();
+    // Process one translated raw stream-json event: bookkeeping + forward. Mirrors
+    // the old CLI stdout parser so the gateway/accumulator see identical shapes.
+    // Returns false when the event must be suppressed (errors) or the turn was
+    // killed for a tool violation.
+    function handleEvent(event) {
+        const type = event['type'];
+        if (!sessionIdExtracted && type === 'system' && event['subtype'] === 'init') {
+            const keySource = event['apiKeySource'];
+            const model = event['model'];
+            if (model)
+                extractedModel = model;
+            if (!modelAnnounced) {
+                const source = keySource === 'none' || !keySource ? 'claude.ai subscription' : keySource;
+                const pin = getBridgeModel() ? '  (selected at startup)' : '';
+                process.stdout.write(`\n  model: ${model ?? 'unknown'}${pin}\n  auth:  ${source}\n\n`);
+                modelAnnounced = true;
             }
-            catch {
-                continue;
+            else {
+                process.stderr.write(`[bridge] model: ${model ?? 'unknown'}  apiKeySource: ${keySource ?? 'none'}\n`);
             }
-            const type = event['type'];
-            // Set when this event is the CLI's "API Error:" turn — we neither forward
-            // it to the PWA nor let it reach the accumulator (it carries no real
-            // content and would poison history / show a raw error mid-stream).
-            let suppressEvent = false;
-            // Extract model + key source info from the first system/init event.
-            // No session-id persistence — Firestore is the only source of truth
-            // now, and we pin --session-id to presenceSessionId on every spawn.
-            if (!sessionIdExtracted && type === 'system' && event['subtype'] === 'init') {
-                const keySource = event['apiKeySource'];
-                const model = event['model'];
-                if (model)
-                    extractedModel = model;
-                if (!modelAnnounced) {
-                    // First conversation since bridge started — announce prominently
-                    // so the user can confirm which model and credential is in use.
-                    const source = keySource === 'none' || !keySource ? 'claude.ai subscription' : keySource;
-                    const pin = (0, config_1.getBridgeModel)() ? '  (selected at startup)' : '';
-                    process.stdout.write(`\n  model: ${model ?? 'unknown'}${pin}\n  auth:  ${source}\n\n`);
-                    modelAnnounced = true;
-                }
-                else {
-                    // Subsequent conversations — quiet line for power users.
-                    process.stderr.write(`[bridge] model: ${model ?? 'unknown'}  apiKeySource: ${keySource ?? 'none'}\n`);
-                }
-                sessionIdExtracted = true;
+            sessionIdExtracted = true;
+        }
+        if (type === 'assistant') {
+            messageCount++;
+            const msg = event['message'];
+            const u = msg?.['usage'];
+            if (u) {
+                usage = {
+                    input_tokens: (usage?.input_tokens ?? 0) + (u['input_tokens'] ?? 0),
+                    output_tokens: (usage?.output_tokens ?? 0) + (u['output_tokens'] ?? 0),
+                    cache_read_input_tokens: (usage?.cache_read_input_tokens ?? 0) + (u['cache_read_input_tokens'] ?? 0),
+                    cache_creation_input_tokens: (usage?.cache_creation_input_tokens ?? 0) + (u['cache_creation_input_tokens'] ?? 0),
+                };
+                lastContextTokens = (u['input_tokens'] ?? 0) + (u['cache_read_input_tokens'] ?? 0) + (u['cache_creation_input_tokens'] ?? 0);
             }
-            // Count complete assistant turns + accumulate token usage + log tool calls
-            if (type === 'assistant') {
-                messageCount++;
-                const msg = event['message'];
-                const u = msg?.['usage'];
-                if (u) {
-                    usage = {
-                        input_tokens: (usage?.input_tokens ?? 0) + (u['input_tokens'] ?? 0),
-                        output_tokens: (usage?.output_tokens ?? 0) + (u['output_tokens'] ?? 0),
-                        cache_read_input_tokens: (usage?.cache_read_input_tokens ?? 0) + (u['cache_read_input_tokens'] ?? 0),
-                        cache_creation_input_tokens: (usage?.cache_creation_input_tokens ?? 0) + (u['cache_creation_input_tokens'] ?? 0),
-                    };
-                    // Full prompt size of THIS assistant call — non-cached input plus both
-                    // cache buckets. Overwrite (don't sum): the last write wins, which is
-                    // the turn's largest/final context.
-                    lastContextTokens = (u['input_tokens'] ?? 0) + (u['cache_read_input_tokens'] ?? 0) + (u['cache_creation_input_tokens'] ?? 0);
-                }
-                const content = msg?.['content'];
-                if (Array.isArray(content)) {
-                    let wroteText = false;
-                    for (const block of content) {
-                        if (block['type'] === 'tool_use') {
-                            producedRealOutput = true;
-                            const toolName = block['name'];
-                            const toolId = block['id'];
-                            if (toolId)
-                                toolNames.set(toolId, toolName);
-                            if (debug) {
-                                // Clean transcript: a single coloured block with the full input.
-                                debugBlock(`tool → ${toolName}`, exports.SECTION_COLORS.input, formatPayload(block['input']));
-                            }
-                            else {
-                                if (wroteText) {
-                                    process.stderr.write('\n');
-                                    wroteText = false;
-                                }
-                                const prefix = toolName.startsWith('mcp__') ? '[mcp]' : '[tool]';
-                                process.stderr.write(paint(exports.SECTION_COLORS.input, `[bridge] ${prefix} ${toolName}`) + '\n');
-                                if (verbose) {
-                                    const input = block['input'];
-                                    process.stderr.write(paint(exports.SECTION_COLORS.input, `[bridge:verbose] ─── input ${toolName} ───\n${formatPayload(input)}\n[bridge:verbose] ─── end input ───`) + '\n');
-                                }
+            const content = msg?.['content'];
+            if (Array.isArray(content)) {
+                let wroteText = false;
+                for (const block of content) {
+                    if (block['type'] === 'tool_use') {
+                        producedRealOutput = true;
+                        const toolName = block['name'];
+                        const toolId = block['id'];
+                        if (toolId)
+                            toolNames.set(toolId, toolName);
+                        if (debug) {
+                            debugBlock(`tool → ${toolName}`, SECTION_COLORS.input, formatPayload(block['input']));
+                        }
+                        else {
+                            if (wroteText) {
+                                process.stderr.write('\n');
+                                wroteText = false;
                             }
-                            // Defense-in-depth: CLI flags (--tools "", --allowedTools, --strict-mcp-config,
-                            // --setting-sources "") are supposed to make this unreachable. If we see a
-                            // non-1Presence tool here anyway, something has bypassed those guards — kill
-                            // immediately so any side effect already in flight is the only damage done.
-                            //
-                            // Valid forms:
-                            //   mcp__1presence__<name>  — namespaced MCP form
-                            //   <snake_case_name>       — bare form; Claude Code may omit the prefix in
-                            //                            stream-json output. Safe because --strict-mcp-config
-                            //                            limits MCP to the 1presence server only.
-                            // Invalid (real violations):
-                            //   PascalCase (Bash, Read, Write, …) — Claude Code built-ins
-                            //   mcp__<other>__*         — tools from a different MCP server
-                            const isMcp1presence = toolName.startsWith('mcp__1presence__');
-                            const isBareName = /^[a-z][a-z0-9_]*$/.test(toolName);
-                            if (!isMcp1presence && !isBareName) {
-                                killedForViolation = true;
-                                const violation = `bridge tool violation: ${toolName} is not allowed in Local Mode`;
-                                process.stderr.write(`[bridge] FATAL ${violation} — killing\n`);
-                                active.delete(conversationId);
-                                proc.kill('SIGKILL');
-                                onError(violation, usage, extractedModel);
-                                return;
+                            const prefix = toolName.startsWith('mcp__') ? '[mcp]' : '[tool]';
+                            process.stderr.write(paint(SECTION_COLORS.input, `[bridge] ${prefix} ${toolName}`) + '\n');
+                            if (verbose) {
+                                process.stderr.write(paint(SECTION_COLORS.input, `[bridge:verbose] ─── input ${toolName} ───\n${formatPayload(block['input'])}\n[bridge:verbose] ─── end input ───`) + '\n');
                             }
                         }
-                        else if (block['type'] === 'text') {
-                            const text = block['text'];
-                            if (text) {
-                                // The CLI reports auth/credential failures (401/403) as a
-                                // <synthetic> assistant text turn whose wording varies and does
-                                // NOT reliably start with "API Error:" — e.g. "Please run /login
-                                // · API Error: 401 Invalid authentication credentials" or
-                                // "Failed to authenticate. API Error: 401 …". Detect by the
-                                // structured signal (the event's `error: authentication_failed`
-                                // / `model: <synthetic>`) plus a wording fallback, so it is
-                                // classified instead of leaking raw into the chat as if the
-                                // model had said it.
-                                const isSynthetic = msg?.['model'] === '<synthetic>';
-                                const isAuthFailure = event['error'] === 'authentication_failed' ||
-                                    (isSynthetic && /(api error:\s*40[13]\b|invalid (api key|authentication)|please run \/login|failed to authenticate|unauthor)/i.test(text));
-                                if (/^API Error:/i.test(text.trimStart()) || isAuthFailure) {
-                                    // The CLI is reporting an underlying API/auth failure as
-                                    // assistant text. Capture it for the user-facing message, and
-                                    // suppress the whole event so the raw error never reaches the
-                                    // PWA or the accumulator (the gateway also blanks it via
-                                    // cleanTurnText — this is the upstream defense).
-                                    sawApiError = true;
-                                    apiErrorText = text.trim();
-                                    if (isAuthFailure)
-                                        sawAuthFailure = true;
-                                    // Operator log keeps the raw provider line verbatim (with a
-                                    // [bridge] prefix) so the real reason is diagnosable locally.
-                                    suppressEvent = true;
-                                    process.stderr.write(paint(exports.SECTION_COLORS.result, `[bridge] ${text.replace(/\n+/g, ' ')}`) + '\n');
-                                }
-                                else {
-                                    producedRealOutput = true;
-                                    if (debug) {
-                                        // Full text, newlines intact — the readable transcript.
-                                        debugBlock('assistant', exports.SECTION_COLORS.assistant, text);
-                                    }
-                                    else {
-                                        process.stderr.write(paint(exports.SECTION_COLORS.assistant, text.replace(/\n+/g, ' ')));
-                                        wroteText = true;
-                                    }
-                                }
-                            }
+                        // Defense-in-depth: canUseTool + --tools "" + strictMcpConfig should
+                        // make a non-1Presence tool unreachable. If one appears anyway, kill
+                        // the turn so any side effect in flight is the only damage done.
+                        const isMcp1presence = toolName.startsWith('mcp__1presence__');
+                        const isBareName = /^[a-z][a-z0-9_]*$/.test(toolName);
+                        if (!isMcp1presence && !isBareName) {
+                            killedForViolation = true;
+                            const violation = `bridge tool violation: ${toolName} is not allowed in Local Mode`;
+                            process.stderr.write(`[bridge] FATAL ${violation} — aborting\n`);
+                            active.delete(conversationId);
+                            abort.abort();
+                            onError(violation, usage, extractedModel);
+                            return false;
                         }
                     }
-                    if (wroteText)
-                        process.stderr.write('\n');
-                }
-            }
-            // Tool results stream back as `user` events with tool_result blocks.
-            if ((verbose || debug) && type === 'user') {
-                const msg = event['message'];
-                const content = msg?.['content'];
-                if (Array.isArray(content)) {
-                    for (const block of content) {
-                        if (block['type'] === 'tool_result') {
-                            const id = block['tool_use_id'] ?? '';
-                            const out = block['content'];
+                    else if (block['type'] === 'text') {
+                        const blockText = block['text'];
+                        if (blockText) {
+                            // The CLI/SDK can report auth/API failures as a synthetic assistant
+                            // text turn whose wording varies. Detect by the structured signal
+                            // (event.error) plus a wording fallback, so it's classified rather
+                            // than leaking raw into the chat as if the model had said it.
+                            const isSynthetic = msg?.['model'] === '<synthetic>';
+                            const isAuthFailure = event['error'] === 'authentication_failed' ||
+                                (isSynthetic && /(api error:\s*40[13]\b|invalid (api key|authentication)|please run \/login|failed to authenticate|unauthor)/i.test(blockText));
+                            if (/^API Error:/i.test(blockText.trimStart()) || isAuthFailure) {
+                                sawApiError = true;
+                                apiErrorText = blockText.trim();
+                                if (isAuthFailure)
+                                    sawAuthFailure = true;
+                                process.stderr.write(paint(SECTION_COLORS.result, `[bridge] ${blockText.replace(/\n+/g, ' ')}`) + '\n');
+                                return false; // suppress — never forward a raw error turn
+                            }
+                            producedRealOutput = true;
                             if (debug) {
-                                const name = toolNames.get(id) ?? id ?? 'result';
-                                const errFlag = block['is_error'] ? '  [error]' : '';
-                                debugBlock(`result ← ${name}${errFlag}`, exports.SECTION_COLORS.result, formatPayload(out));
+                                debugBlock('assistant', SECTION_COLORS.assistant, blockText);
                             }
                             else {
-                                process.stderr.write(paint(exports.SECTION_COLORS.result, `[bridge:verbose] ─── output ${id} ───\n${formatPayload(out)}\n[bridge:verbose] ─── end output ───`) + '\n');
+                                process.stderr.write(paint(SECTION_COLORS.assistant, blockText.replace(/\n+/g, ' ')));
+                                wroteText = true;
                             }
                         }
                     }
                 }
+                if (wroteText)
+                    process.stderr.write('\n');
             }
-            // Extract cost from the final result event. The CLI also stamps auth/API
-            // failures here as `is_error` + `api_error_status` (even though `subtype`
-            // stays "success"), so treat it as a robust backstop in case the
-            // assistant-text signal above was missed (wording drift across CLI
-            // versions). 401/403 → auth failure; other statuses keep the existing
-            // 400-retry behaviour (sawApiError only).
-            if (type === 'result') {
-                const c = event['cost_usd'] ?? event['total_cost_usd'];
-                if (typeof c === 'number')
-                    costUsd = c;
-                if (event['is_error'] === true) {
-                    sawApiError = true;
-                    const status = event['api_error_status'];
-                    if (status === 401 || status === 403)
-                        sawAuthFailure = true;
-                    if (!apiErrorText && typeof event['result'] === 'string') {
-                        apiErrorText = event['result'].trim();
+        }
+        if ((verbose || debug) && type === 'user') {
+            const msg = event['message'];
+            const content = msg?.['content'];
+            if (Array.isArray(content)) {
+                for (const block of content) {
+                    if (block['type'] === 'tool_result') {
+                        const id = block['tool_use_id'] ?? '';
+                        const out = block['content'];
+                        if (debug) {
+                            const name = toolNames.get(id) ?? id ?? 'result';
+                            const errFlag = block['is_error'] ? '  [error]' : '';
+                            debugBlock(`result ← ${name}${errFlag}`, SECTION_COLORS.result, formatPayload(out));
+                        }
+                        else {
+                            process.stderr.write(paint(SECTION_COLORS.result, `[bridge:verbose] ─── output ${id} ───\n${formatPayload(out)}\n[bridge:verbose] ─── end output ───`) + '\n');
+                        }
                     }
                 }
             }
-            if (!suppressEvent)
-                onEvent(event);
         }
-    });
-    proc.stderr.on('data', (chunk) => {
-        // claude CLI writes logs to stderr — not errors, just noise
-        const line = chunk.toString('utf-8').trim();
-        if (line)
-            process.stderr.write(`[claude] ${line}\n`);
-    });
-    proc.on('close', (code) => {
-        active.delete(conversationId);
-        // Violation path already called onError + killed — don't double-fire.
-        if (killedForViolation)
+        if (type === 'result') {
+            // total_cost_usd is the SDK's notional figure (0 on the no-op history
+            // append cycle; the real number on the live turn). Keep the largest seen.
+            const c = event['total_cost_usd'] ?? event['cost_usd'];
+            if (typeof c === 'number' && c > costUsd)
+                costUsd = c;
+            if (event['is_error'] === true) {
+                sawApiError = true;
+                const status = event['api_error_status'];
+                if (status === 401 || status === 403)
+                    sawAuthFailure = true;
+                if (!apiErrorText && typeof event['result'] === 'string') {
+                    apiErrorText = event['result'].trim();
+                }
+            }
+        }
+        return true;
+    }
+    // Drive the turn. Synchronous spawnClaude returns immediately; the SDK loop
+    // runs in this async IIFE and fires the same callbacks the CLI path did.
+    void (async () => {
+        let systemPrompt;
+        let mcpServers;
+        try {
+            systemPrompt = readFileSync(systemPromptPath, 'utf-8');
+            const mcpRaw = JSON.parse(readFileSync(mcpConfigPath, 'utf-8'));
+            mcpServers = mcpRaw.mcpServers ?? {};
+        }
+        catch (err) {
+            active.delete(conversationId);
+            onError(`Local Mode setup files unavailable: ${err.message}`, null, null);
             return;
-        // Flush any remaining buffer
-        if (buffer.trim()) {
-            try {
-                onEvent(JSON.parse(buffer.trim()));
+        }
+        const options = {
+            systemPrompt, // custom string → replaces the default Claude Code prompt
+            mcpServers: mcpServers,
+            strictMcpConfig: true, // only our MCP server, ignore project/user/plugin MCP
+            settingSources: [], // no user/project settings or memory
+            allowedTools: ['mcp__1presence__*'], // auto-approve our MCP surface (no prompt)
+            canUseTool, // hard deny anything else
+            extraArgs: { tools: '' }, // disable ALL built-in tools (= CLI --tools "")
+            cwd: BRIDGE_CWD,
+            abortController: abort,
+            includePartialMessages: false, // whole messages, matching the old non-partial path
+            permissionMode: 'default',
+            env: safeEnv,
+            stderr: (line) => { if (verbose && line.trim())
+                process.stderr.write(`[claude] ${line.trim()}\n`); },
+            ...(pinnedModel ? { model: pinnedModel } : {}),
+        };
+        const promptMessages = buildPromptMessages(history);
+        try {
+            for await (const m of query({ prompt: promptStream(promptMessages), options })) {
+                // Skip echoed input replays — they would double-count in the accumulator
+                // and re-stream prior turns to the PWA.
+                if (m.isReplay)
+                    continue;
+                switch (m.type) {
+                    case 'system': {
+                        if (m.subtype === 'init') {
+                            const init = m;
+                            const event = { type: 'system', subtype: 'init', model: init.model, apiKeySource: init.apiKeySource };
+                            if (handleEvent(event))
+                                onEvent(event);
+                        }
+                        break;
+                    }
+                    case 'assistant': {
+                        const am = m;
+                        // Structured error signal — classify, do not forward as a real turn.
+                        if (am.error) {
+                            sawApiError = true;
+                            if (am.error === 'authentication_failed' || am.error === 'oauth_org_not_allowed')
+                                sawAuthFailure = true;
+                            if (!apiErrorText)
+                                apiErrorText = `API Error: ${am.error}`;
+                            process.stderr.write(paint(SECTION_COLORS.result, `[bridge] assistant error: ${am.error}`) + '\n');
+                            break;
+                        }
+                        const event = { type: 'assistant', message: am.message, error: am.error };
+                        if (handleEvent(event))
+                            onEvent(event);
+                        if (killedForViolation)
+                            return; // handleEvent already aborted + onError
+                        break;
+                    }
+                    case 'user': {
+                        const um = m;
+                        const event = { type: 'user', message: um.message };
+                        if (handleEvent(event))
+                            onEvent(event);
+                        break;
+                    }
+                    case 'result': {
+                        const rm = m;
+                        const event = {
+                            type: 'result',
+                            subtype: rm['subtype'],
+                            total_cost_usd: rm['total_cost_usd'],
+                            is_error: rm['is_error'],
+                            api_error_status: rm['api_error_status'],
+                            result: rm['result'],
+                        };
+                        if (handleEvent(event))
+                            onEvent(event);
+                        break;
+                    }
+                    case 'auth_status': {
+                        const as = m;
+                        if (as.error) {
+                            sawApiError = true;
+                            sawAuthFailure = true;
+                            apiErrorText = as.error;
+                        }
+                        break;
+                    }
+                    case 'rate_limit_event': {
+                        // SDK surfaces upstream rate-limit pauses; it retries internally.
+                        // Admin-only ephemeral notice — jargon is fine in Local Mode.
+                        onNotice?.('Claude Code is pausing briefly for an upstream rate limit, then continuing…');
+                        break;
+                    }
+                    // Everything else (partial messages, status, hooks, notifications,
+                    // thinking tokens, …) is SDK-internal — not part of the CLI event
+                    // contract the gateway/accumulator understand, so it is dropped.
+                    default:
+                        break;
+                }
             }
-            catch { /* ignore */ }
         }
-        // An auth failure can land on a "successful" exit (the CLI stamps it on the
-        // result event but still exits 0 in some versions), and we've suppressed its
-        // text — so without this the turn would finish silently empty. Treat it as a
-        // failure regardless of exit code.
-        if (sawAuthFailure || (code !== 0 && code !== null)) {
-            // Auto-retry when the CLI failed BEFORE producing any real output — the
-            // signature of the known print-mode 400 regression. A fresh spawn (new
-            // --session-id) often succeeds. We never retry once real text or a tool
-            // call landed, to avoid double-running a side-effectful tool. We also
-            // never retry an auth failure — re-spawning won't add missing credentials,
-            // it just burns the user's plan. Retries use escalating backoff and stop
-            // past the wall-clock cap (see consts above).
-            const elapsed = Date.now() - firstAttemptAt;
-            if (attemptIdx < MAX_TURN_RETRIES && sawApiError && !sawAuthFailure && !producedRealOutput && elapsed < RETRY_WALL_CLOCK_CAP_MS) {
-                const delay = RETRY_BACKOFF_BASE_MS * (attemptIdx + 1);
-                const nextAttempt = attemptIdx + 2;
-                process.stderr.write(`[bridge] turn failed before output (${apiErrorText.replace(/\n+/g, ' ').slice(0, 120)}) — retrying (${nextAttempt} of ${MAX_TURN_RETRIES + 1}) in ${delay}ms\n`);
-                // Admin-only ephemeral thread notice — jargon is fine in Local Mode.
-                onNotice?.(`Claude Code print-mode 400 (tool-use concurrency, anthropics/claude-code#18131) — respawning, attempt ${nextAttempt}/${MAX_TURN_RETRIES + 1}…`);
-                const timer = setTimeout(() => {
-                    pendingRetries.delete(conversationId);
-                    spawnClaude({ ...params, _attemptIdx: attemptIdx + 1, _firstAttemptAt: firstAttemptAt });
-                }, delay);
-                pendingRetries.set(conversationId, timer);
+        catch (err) {
+            active.delete(conversationId);
+            // Aborted by supersede or the Stop button — no error to surface.
+            if (abort.signal.aborted)
                 return;
+            if (killedForViolation)
+                return;
+            const message = err?.message ?? String(err);
+            if (/40[13]\b|unauthor|invalid (api key|authentication)|please run \/login/i.test(message)) {
+                sawAuthFailure = true;
             }
-            // Pass any partial token usage we observed before the failure so the
-            // PWA and the gateway's bridge usage store can still record it. Surface a
-            // classified, user-readable message instead of the opaque exit code.
-            onError(describeCliFailure(code, apiErrorText, sawAuthFailure), usage, extractedModel);
-        }
-        else {
-            onDone(messageCount, costUsd, usage, extractedModel, lastContextTokens);
+            if (!apiErrorText)
+                apiErrorText = message;
+            onError(describeCliFailure(apiErrorText, sawAuthFailure), usage, extractedModel);
+            return;
         }
-    });
-    proc.on('error', (err) => {
         active.delete(conversationId);
-        if (err.code === 'ENOENT') {
-            onError('claude CLI not found. Please install Claude Code: https://claude.ai/code', usage, extractedModel);
+        if (killedForViolation)
+            return; // already errored
+        if (abort.signal.aborted)
+            return; // superseded/cancelled mid-stream
+        if (sawAuthFailure || (sawApiError && !producedRealOutput)) {
+            onError(describeCliFailure(apiErrorText, sawAuthFailure), usage, extractedModel);
         }
         else {
-            onError(err.message, usage, extractedModel);
+            onDone(messageCount, costUsd, usage, extractedModel, lastContextTokens);
         }
-    });
+    })();
 }
-function killAll() {
-    for (const [, proc] of active) {
-        proc.kill('SIGTERM');
+export function killAll() {
+    for (const [, abort] of active) {
+        abort.abort();
     }
     active.clear();
 }
 /**
  * Stop one in-flight turn (the Stop button, relayed by the gateway as a
- * `cancel` frame). Kills the running Claude Code process for this conversation
- * and cancels any scheduled retry, so no further stream events are produced.
- * Mirrors the supersede path in spawnClaude. Returns true if something was
- * actually stopped. Mechanical only — no product logic lives here.
+ * `cancel` frame). Aborts the running query() for this conversation so no
+ * further stream events are produced. Mirrors the supersede path in spawnClaude.
+ * Returns true if something was actually stopped. Mechanical only — no product
+ * logic lives here.
  */
-function cancelConversation(conversationId) {
-    let stopped = false;
-    const proc = active.get(conversationId);
-    if (proc) {
-        proc.kill('SIGTERM');
+export function cancelConversation(conversationId) {
+    const abort = active.get(conversationId);
+    if (abort) {
+        abort.abort();
         active.delete(conversationId);
-        stopped = true;
-    }
-    const pending = pendingRetries.get(conversationId);
-    if (pending) {
-        clearTimeout(pending);
-        pendingRetries.delete(conversationId);
-        stopped = true;
+        return true;
     }
-    return stopped;
+    return false;
 }