npm - @1presence/bridge - Versions diffs - 0.34.0 → 0.35.0 - Mend

@1presence/bridge 0.34.0 → 0.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/claude.js CHANGED Viewed

@@ -111,15 +111,32 @@ function renderHistoryMessage(msg) {
 }
 // ─── Active processes ─────────────────────────────────────────────────────────
 const active = new Map();
-// Maximum automatic retries when the `claude` CLI exits non-zero BEFORE
-// producing any real output. This covers the known Claude Code print-mode 400
-// regression that surfaces as "API Error: 400 due to tool use concurrency
-// issues" (GitHub anthropics/claude-code#18131, still open) — it is
-// non-deterministic enough that a fresh spawn frequently succeeds. We retry
-// ONLY when the failed attempt produced no real assistant text and no tool
-// calls, so a failure that lands after real work (where retrying could
-// double-execute a side-effectful tool) is surfaced, never silently re-run.
-const MAX_TURN_RETRIES = 1;
+// conversationId → pending retry timer. A retry is scheduled with a backoff
+// delay, during which the conversation has NO entry in `active`. If a new user
+// message arrives in that window it must cancel the stale retry (otherwise the
+// retry would re-run the OLD turn's history and clobber the new one). The
+// supersede block clears any pending timer here before spawning.
+const pendingRetries = new Map();
+// Automatic retries when the `claude` CLI exits non-zero BEFORE producing any
+// real output. This covers the known Claude Code print-mode 400 regression that
+// surfaces as "API Error: 400 due to tool use concurrency issues" (GitHub
+// anthropics/claude-code#18131, still open) — it is non-deterministic enough
+// that a fresh spawn often succeeds. We retry ONLY when the failed attempt
+// produced no real assistant text and no tool calls, so a failure that lands
+// after real work (where retrying could double-execute a side-effectful tool)
+// is surfaced, never silently re-run.
+//
+// 2 retries = up to 3 attempts/turn. The first retry captures nearly all of the
+// transient wins; further attempts buy little on transient failures but add
+// latency and re-send the full (1M-context) history again on deterministic ones
+// — see vault Bugs.md. Retries use escalating backoff (avoids a subscription
+// rate-limit cascade from rapid re-spawns) and stop once total retry time
+// exceeds the wall-clock cap, so a slow-failing attempt can't strand the user.
+// All below the SSE boundary, so the user sees only a slightly longer
+// "thinking" gap, never an intermediate error.
+const MAX_TURN_RETRIES = 2;
+const RETRY_BACKOFF_BASE_MS = 750; // delay = base * attempt# → 750ms, 1500ms
+const RETRY_WALL_CLOCK_CAP_MS = 12_000; // stop retrying past this much elapsed
 // Map a non-zero CLI exit + any captured "API Error:" line to a concise,
 // user-facing Local Mode message. The raw upstream text stays in operator logs
 // only — we never echo a wall of provider error JSON into the chat. Referring
@@ -135,7 +152,7 @@ const MAX_TURN_RETRIES = 1;
 function describeCliFailure(code, apiErrorText) {
     const t = apiErrorText.trim();
     if (/API Error:\s*400/i.test(t) && /(tool use|concurren|parallel)/i.test(t)) {
-        return 'Local Mode hit a known Claude Code error (a print-mode bug that affects every current version). I retried once automatically — sending the message again sometimes gets through.';
+        return 'Local Mode hit a known Claude Code error (a print-mode bug that affects every current version). I retried a few times automatically — sending the message again sometimes gets through. See https://github.com/anthropics/claude-code/issues/18131';
     }
     if (/^API Error:/i.test(t)) {
         return `Local Mode error from Claude Code: ${t.replace(/^API Error:\s*/i, '').trim()}`;
@@ -144,8 +161,9 @@ function describeCliFailure(code, apiErrorText) {
 }
 // ─── Spawn ────────────────────────────────────────────────────────────────────
 function spawnClaude(params) {
-    const { conversationId, presenceSessionId, text, uid, history, vaultFileOpen, clientCapabilities, syncedFolders, onEvent, onDone, onError } = params;
+    const { conversationId, presenceSessionId, text, uid, history, vaultFileOpen, clientCapabilities, syncedFolders, onEvent, onDone, onError, onNotice } = params;
     const attemptIdx = params._attemptIdx ?? 0;
+    const firstAttemptAt = params._firstAttemptAt ?? Date.now();
     const systemPromptPath = (0, path_1.join)((0, os_1.tmpdir)(), `agent-${uid}.md`);
     const mcpConfigPath = (0, path_1.join)((0, os_1.tmpdir)(), `mcp-${uid}.json`);
     if (verbose) {
@@ -201,6 +219,19 @@ function spawnClaude(params) {
         existing.kill('SIGTERM');
         active.delete(conversationId);
     }
+    // Cancel any retry scheduled for this conversation that hasn't fired yet.
+    // Without this, a new user message arriving during a retry's backoff window
+    // would race the stale retry — which carries the OLD turn's history and would
+    // clobber the new turn. Skip when this call IS the retry firing (attemptIdx>0,
+    // the timer already deleted itself before invoking us).
+    if (attemptIdx === 0) {
+        const pending = pendingRetries.get(conversationId);
+        if (pending) {
+            clearTimeout(pending);
+            pendingRetries.delete(conversationId);
+            process.stderr.write(`[bridge] cancelled pending retry for ${conversationId} (superseded by new turn)\n`);
+        }
+    }
     // Note: ephemeral context (vault_file_open / client_capabilities / synced_folders)
     // is injected into the last user message by the gateway BEFORE history is
     // sent over the WS. The bridge no longer constructs `userMessageText` —
@@ -506,13 +537,23 @@ function spawnClaude(params) {
             catch { /* ignore */ }
         }
         if (code !== 0 && code !== null) {
-            // Auto-retry once when the CLI failed BEFORE producing any real output —
-            // the signature of the known print-mode 400 regression. A fresh spawn
-            // (new --session-id) usually succeeds. We never retry once real text or a
-            // tool call landed, to avoid double-running a side-effectful tool.
-            if (attemptIdx < MAX_TURN_RETRIES && sawApiError && !producedRealOutput) {
-                process.stderr.write(`[bridge] turn failed before output (${apiErrorText.replace(/\n+/g, ' ').slice(0, 120)}) — retrying once\n`);
-                spawnClaude({ ...params, _attemptIdx: attemptIdx + 1 });
+            // Auto-retry when the CLI failed BEFORE producing any real output — the
+            // signature of the known print-mode 400 regression. A fresh spawn (new
+            // --session-id) often succeeds. We never retry once real text or a tool
+            // call landed, to avoid double-running a side-effectful tool. Retries use
+            // escalating backoff and stop past the wall-clock cap (see consts above).
+            const elapsed = Date.now() - firstAttemptAt;
+            if (attemptIdx < MAX_TURN_RETRIES && sawApiError && !producedRealOutput && elapsed < RETRY_WALL_CLOCK_CAP_MS) {
+                const delay = RETRY_BACKOFF_BASE_MS * (attemptIdx + 1);
+                const nextAttempt = attemptIdx + 2;
+                process.stderr.write(`[bridge] turn failed before output (${apiErrorText.replace(/\n+/g, ' ').slice(0, 120)}) — retrying (${nextAttempt} of ${MAX_TURN_RETRIES + 1}) in ${delay}ms\n`);
+                // Admin-only ephemeral thread notice — jargon is fine in Local Mode.
+                onNotice?.(`Claude Code print-mode 400 (tool-use concurrency, anthropics/claude-code#18131) — respawning, attempt ${nextAttempt}/${MAX_TURN_RETRIES + 1}…`);
+                const timer = setTimeout(() => {
+                    pendingRetries.delete(conversationId);
+                    spawnClaude({ ...params, _attemptIdx: attemptIdx + 1, _firstAttemptAt: firstAttemptAt });
+                }, delay);
+                pendingRetries.set(conversationId, timer);
                 return;
             }
             // Pass any partial token usage we observed before the failure so the

package/dist/index.js CHANGED Viewed

@@ -244,6 +244,14 @@ async function handleMessage(conversationId, text, sessionId, history, auth, vau
                 currentWs.send(JSON.stringify({ type: 'stream', conversationId, event }));
             }
         },
+        onNotice: (message) => {
+            // Ephemeral, non-persisted thread notice (admin-only Local Mode). Relayed
+            // by the gateway to the PWA SSE stream as a `notice` AgentEvent; it does
+            // NOT go through the turn accumulator, so it never lands in history.
+            if (currentWs?.readyState === ws_1.default.OPEN) {
+                currentWs.send(JSON.stringify({ type: 'notice', conversationId, message }));
+            }
+        },
         onDone: (messageCount, costUsd, usage, model) => {
             const parts = [];
             if (usage)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@1presence/bridge",
-  "version": "0.34.0",
+  "version": "0.35.0",
   "description": "Run 1Presence on your Mac and use your Claude.ai Pro subscription from any device",
   "bin": {
     "1presence-bridge": "dist/index.js"