@offbynan/pi-cursor-provider 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/README.md +24 -2
  2. package/h2-bridge.mjs +29 -8
  3. package/package.json +1 -1
  4. package/proxy.ts +544 -215
package/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
 
5
5
  - **Image support** — base64 `image_url` content parts forwarded to Cursor end-to-end; the upstream silently drops them
6
6
  - **Compaction support** — old turns archived as inline text to cut `getBlobArgs` round-trips from O(history) to O(tail); bridge termination errors surface as real failures instead of silent empty responses; checkpoint cleared after compaction to keep both sides in sync
7
- - **Reliability** — bridge timeouts hardened and configurable; SSE keepalive prevents pi from timing out during blob-fetching; conversation state and checkpoints survive transient failures and client disconnects
7
+ - **Reliability** — transparent retry for transient Cursor protocol errors (internal / unavailable / deadline_exceeded); HTTP/2 PING keepalive detects dead connections; stall timer kills stuck bridges; bridge timeouts hardened and configurable; SSE keepalive prevents pi from timing out during blob-fetching; conversation state and checkpoints survive transient failures and client disconnects
8
8
  - **Model support** — per-model context window inference (vs. hardcoded 200 k); runtime cap scaling when Cursor enforces a tighter window; detailed cost table for all current families; effort-suffix variants deduplicated so pi's reasoning-level setting drives the suffix automatically
9
9
  - **Thinking-tag filtering** — inline `<think>` / `<reasoning>` tags stripped from the response and routed to `reasoning_content`
10
10
  - **Fixes & observability** — `pi -p` exit hang fixed; dead TTL eviction code removed; opt-in JSONL debug logging with a bundled timeline viewer
@@ -59,6 +59,10 @@ pi → openai-completions → localhost:PORT/v1/chat/completions
59
59
  | `PI_CURSOR_PROVIDER_DEBUG_FILE` | auto in tmpdir | Override the debug log file path |
60
60
  | `PI_CURSOR_BRIDGE_INITIAL_TIMEOUT_MS` | `120000` | Kill bridge if no HTTP/2 activity within this many ms of spawn |
61
61
  | `PI_CURSOR_BRIDGE_ACTIVITY_TIMEOUT_MS` | `300000` | Kill bridge if no HTTP/2 activity for this many ms after the first frame |
62
+ | `PI_CURSOR_BRIDGE_PING_INTERVAL_MS` | `15000` | HTTP/2 PING interval to detect dead connections |
63
+ | `PI_CURSOR_BRIDGE_PING_TIMEOUT_MS` | `10000` | Timeout for each HTTP/2 PING before declaring the connection dead |
64
+ | `PI_CURSOR_BRIDGE_STALL_TIMEOUT_MS` | `120000` | Kill bridge if no data received from Cursor within this many ms |
65
+ | `PI_CURSOR_MAX_BRIDGE_RETRIES` | `2` | Max transparent retries on transient Cursor errors or bridge crashes |
62
66
  | `PI_CURSOR_TURN_ARCHIVE_THRESHOLD` | `20` | Keep this many recent turns as raw blobs; older turns are archived as inline text |
63
67
  | `PI_CURSOR_RAW_MODELS` | off | Set to disable model deduplication and see all raw Cursor model IDs |
64
68
 
@@ -150,6 +154,24 @@ The upstream has no observability. This fork adds opt-in JSONL event logging (se
150
154
  npm run debug:timeline -- --latest
151
155
  ```
152
156
 
157
+ ### Transparent retry for transient errors
158
+
159
+ When Cursor returns a retryable Connect-level error (`internal`, `unavailable`, `deadline_exceeded`) or the bridge process crashes mid-request, the proxy now automatically retries on a fresh HTTP/2 bridge — up to `PI_CURSOR_MAX_BRIDGE_RETRIES` times (default 2). The SSE response to pi stays open; the client sees at most a brief pause.
160
+
161
+ Retry is only attempted when no content has been streamed yet (so partial responses are never replayed). On retry the proxy rebuilds the Cursor request using the pre-turn checkpoint and replays cleanly.
162
+
163
+ Previously these transient errors were surfaced as `finish_reason: "error"`, requiring the user to manually continue each time.
164
+
165
+ ### HTTP/2 PING keepalive and stall detection
166
+
167
+ The bridge now configures HTTP/2-level PINGs (`PI_CURSOR_BRIDGE_PING_INTERVAL_MS` / `PI_CURSOR_BRIDGE_PING_TIMEOUT_MS`) so dead TCP connections (NAT timeout, load-balancer cycling) are detected within seconds rather than waiting for the 5-minute activity timeout.
168
+
169
+ Additionally, a stall timer (`PI_CURSOR_BRIDGE_STALL_TIMEOUT_MS`, default 120 s) kills the bridge if no data arrives from Cursor — catching cases where the HTTP/2 connection is technically alive but the server is stuck processing a stale checkpoint.
170
+
171
+ ### Usage reporting on tool-call continuations
172
+
173
+ When the proxy pauses mid-turn for a tool call and responds with pending tool calls (the partial-wait path), it now reports meaningful `usage` token counts instead of zeros. The stored `lastTotalTokens` from the previous stream segment is scaled proportionally if Cursor is enforcing a tighter context window than the model's nominal size. This lets pi track cumulative token usage accurately across multi-step tool-call turns.
174
+
153
175
  ### Bridge timeout hardening
154
176
 
155
177
  The upstream `h2-bridge.mjs` used a 30-second initial connection timeout and a 120-second activity timeout. Large conversations require Cursor to deserialise a big checkpoint and complete many `getBlobArgs` round-trips before it starts streaming tokens, which regularly exceeded these limits and caused compaction to fail with a `terminated` error.
@@ -276,7 +298,7 @@ Session state is cleared on pi lifecycle events — session switch, fork, `/tree
276
298
 
277
299
  ### Error resilience
278
300
 
279
- A bridge timeout or Connect-level error from Cursor does not wipe the stored checkpoint. The last good checkpoint survives transient failures and is used on the next retry. If Cursor sends a checkpoint before a client disconnect, that checkpoint is also preserved.
301
+ Transient Cursor errors (`internal`, `unavailable`, `deadline_exceeded`) and bridge crashes are retried automatically up to `PI_CURSOR_MAX_BRIDGE_RETRIES` times — without dropping the SSE connection to pi. The last good checkpoint survives all error types and is used on retry. If Cursor sends a checkpoint before a client disconnect, that checkpoint is also preserved.
280
302
 
281
303
  ## Requirements
282
304
 
package/h2-bridge.mjs CHANGED
@@ -87,15 +87,19 @@ if (!configBuf) process.exit(1);
87
87
  const config = JSON.parse(configBuf.toString("utf8"));
88
88
  const { accessToken, url, path: rpcPath, unary } = config;
89
89
 
90
- const client = http2.connect(url || "https://api2.cursor.sh");
91
-
92
- // Guard against initial connection failure. Reset on any h2 activity
93
- // so long-running agent conversations (with tool call round-trips) survive.
94
- // Initial timeout is generous because large conversations require Cursor to
95
- // deserialize a big checkpoint + run many getBlobArgs round-trips before it
96
- // starts streaming tokens — 30 s was too short and caused compaction failures.
97
90
  const INITIAL_TIMEOUT_MS = parseInt(process.env.PI_CURSOR_BRIDGE_INITIAL_TIMEOUT_MS ?? "") || 120_000;
98
91
  const ACTIVITY_TIMEOUT_MS = parseInt(process.env.PI_CURSOR_BRIDGE_ACTIVITY_TIMEOUT_MS ?? "") || 300_000;
92
+ const H2_PING_INTERVAL_MS = parseInt(process.env.PI_CURSOR_BRIDGE_PING_INTERVAL_MS ?? "") || 15_000;
93
+ const H2_PING_TIMEOUT_MS = parseInt(process.env.PI_CURSOR_BRIDGE_PING_TIMEOUT_MS ?? "") || 10_000;
94
+
95
+ const client = http2.connect(url || "https://api2.cursor.sh", {
96
+ // Detect dead TCP connections at the HTTP/2 level — without this, a silently
97
+ // dropped connection (NAT timeout, LB cycling) can leave the bridge waiting
98
+ // for up to ACTIVITY_TIMEOUT_MS (5 min) with no indication of failure.
99
+ pingInterval: H2_PING_INTERVAL_MS,
100
+ pingTimeout: H2_PING_TIMEOUT_MS,
101
+ });
102
+
99
103
  let timeout = setTimeout(killBridge, INITIAL_TIMEOUT_MS);
100
104
 
101
105
  function resetTimeout() {
@@ -105,12 +109,14 @@ function resetTimeout() {
105
109
 
106
110
  function killBridge() {
107
111
  clearTimeout(timeout);
112
+ process.stderr.write(JSON.stringify({ type: "exit_reason", reason: "timeout" }) + "\n");
108
113
  client.destroy();
109
- process.exit(1);
114
+ process.exit(2);
110
115
  }
111
116
 
112
117
  client.on("error", () => {
113
118
  clearTimeout(timeout);
119
+ process.stderr.write(JSON.stringify({ type: "exit_reason", reason: "connection_error" }) + "\n");
114
120
  process.exit(1);
115
121
  });
116
122
 
@@ -130,6 +136,20 @@ if (!unary) {
130
136
  }
131
137
  const h2Stream = client.request(headers);
132
138
 
139
+ // Read response headers: switch to activity timeout and forward status to stderr
140
+ h2Stream.on("response", (headers) => {
141
+ resetTimeout();
142
+ const status = headers[":status"] ?? null;
143
+ const grpcStatus = headers["grpc-status"] ?? null;
144
+ process.stderr.write(
145
+ JSON.stringify({
146
+ type: "response_headers",
147
+ status: status !== null ? Number(status) : null,
148
+ grpcStatus: grpcStatus !== null ? Number(grpcStatus) : null,
149
+ }) + "\n",
150
+ );
151
+ });
152
+
133
153
  // Forward H2 response data → stdout (length-prefixed)
134
154
  h2Stream.on("data", (chunk) => {
135
155
  resetTimeout();
@@ -145,6 +165,7 @@ h2Stream.on("end", () => {
145
165
 
146
166
  h2Stream.on("error", () => {
147
167
  clearTimeout(timeout);
168
+ process.stderr.write(JSON.stringify({ type: "exit_reason", reason: "stream_error" }) + "\n");
148
169
  client.close();
149
170
  process.exit(1);
150
171
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@offbynan/pi-cursor-provider",
3
- "version": "0.5.1",
3
+ "version": "0.5.3",
4
4
  "description": "Pi extension providing access to Cursor models via OAuth and a local OpenAI-compatible gRPC proxy",
5
5
  "type": "module",
6
6
  "license": "MIT",
package/proxy.ts CHANGED
@@ -154,6 +154,11 @@ interface PendingExec {
154
154
  decodedArgs: string;
155
155
  }
156
156
 
157
+ interface StderrData {
158
+ responseHeaders?: { status: number; grpcStatus: number | null };
159
+ exitReason?: "timeout" | "connection_error" | "stream_error";
160
+ }
161
+
157
162
  interface BridgeHandle {
158
163
  proc: Pick<ChildProcess, "kill">;
159
164
  readonly alive: boolean;
@@ -162,6 +167,7 @@ interface BridgeHandle {
162
167
  unref(): void;
163
168
  onData(cb: (chunk: Buffer) => void): void;
164
169
  onClose(cb: (code: number) => void): void;
170
+ getStderr(): StderrData;
165
171
  }
166
172
 
167
173
  export type BridgeFactory = (options: SpawnBridgeOptions) => BridgeHandle;
@@ -173,6 +179,7 @@ interface ActiveBridge {
173
179
  mcpTools: McpToolDefinition[];
174
180
  pendingExecs: PendingExec[];
175
181
  currentTurn: ParsedTurn;
182
+ lastTotalTokens: number;
176
183
  }
177
184
 
178
185
  export interface StoredConversation {
@@ -185,6 +192,15 @@ export interface StoredConversation {
185
192
  * our static inferContextWindow() estimate when Cursor enforces a tighter cap.
186
193
  */
187
194
  effectiveContextWindow?: number;
195
+ /**
196
+ * Last known usedTokens from Cursor's ConversationTokenDetails. Persisted
197
+ * so that tool-call continuations (which create a fresh StreamState) can
198
+ * report meaningful usage even if the checkpoint hasn't arrived yet in the
199
+ * new stream segment.
200
+ */
201
+ lastTotalTokens?: number;
202
+ /** Cached for transparent retry when a bridge dies mid-request. */
203
+ systemPrompt?: string;
188
204
  }
189
205
 
190
206
  interface StreamState {
@@ -263,6 +279,7 @@ interface ParsedMessages {
263
279
  // ── State ──
264
280
 
265
281
  const activeBridges = new Map<string, ActiveBridge>();
282
+ const sessionBridges = new Map<string, BridgeHandle>();
266
283
  const conversationStates = new Map<string, StoredConversation>();
267
284
  let bridgeFactory: BridgeFactory = spawnBridge;
268
285
  let debugRequestCounter = 0;
@@ -353,6 +370,7 @@ function nextDebugRequestId(): string {
353
370
 
354
371
  export const __testInternals = {
355
372
  activeBridges,
373
+ sessionBridges,
356
374
  conversationStates,
357
375
  };
358
376
 
@@ -395,7 +413,33 @@ function spawnBridge(options: SpawnBridgeOptions): BridgeHandle {
395
413
  unary: options.unary ?? false,
396
414
  });
397
415
  const proc = spawn("node", [BRIDGE_PATH], {
398
- stdio: ["pipe", "pipe", "ignore"],
416
+ stdio: ["pipe", "pipe", "pipe"],
417
+ });
418
+
419
+ const stderrData: StderrData = {};
420
+ let stderrBuf = "";
421
+ proc.stderr!.on("data", (chunk: Buffer) => {
422
+ stderrBuf += chunk.toString("utf8");
423
+ let nl: number;
424
+ while ((nl = stderrBuf.indexOf("\n")) !== -1) {
425
+ const line = stderrBuf.slice(0, nl).trim();
426
+ stderrBuf = stderrBuf.slice(nl + 1);
427
+ if (!line) continue;
428
+ debugLog("bridge.stderr", { rpcPath: options.rpcPath, line });
429
+ try {
430
+ const parsed = JSON.parse(line) as Record<string, unknown>;
431
+ if (parsed["type"] === "response_headers") {
432
+ stderrData.responseHeaders = {
433
+ status: parsed["status"] as number,
434
+ grpcStatus: parsed["grpcStatus"] as number | null,
435
+ };
436
+ } else if (parsed["type"] === "exit_reason") {
437
+ stderrData.exitReason = parsed["reason"] as StderrData["exitReason"];
438
+ }
439
+ } catch {
440
+ // not structured JSON — ignore
441
+ }
442
+ }
399
443
  });
400
444
 
401
445
  const config = JSON.stringify({
@@ -433,6 +477,7 @@ function spawnBridge(options: SpawnBridgeOptions): BridgeHandle {
433
477
  // loop alive after the bridge exits (critical for `pi -p` to exit cleanly).
434
478
  try { proc.stdout!.destroy(); } catch {}
435
479
  try { proc.stdin!.destroy(); } catch {}
480
+ try { proc.stderr!.destroy(); } catch {}
436
481
  debugLog("bridge.exit", { rpcPath: options.rpcPath, exitCode });
437
482
  cbs.close?.(exitCode);
438
483
  });
@@ -469,9 +514,43 @@ function spawnBridge(options: SpawnBridgeOptions): BridgeHandle {
469
514
  cbs.close = cb;
470
515
  }
471
516
  },
517
+ getStderr() {
518
+ return stderrData;
519
+ },
472
520
  };
473
521
  }
474
522
 
523
+ // ── Bridge failure classification ──
524
+
525
+ function classifyBridgeFailure(code: number, stderr: StderrData): string {
526
+ const hadHeaders = !!stderr.responseHeaders;
527
+ const status = stderr.responseHeaders?.status;
528
+ const reason = stderr.exitReason;
529
+
530
+ // Timeout (exit 2 or explicit reason)
531
+ if (code === 2 || reason === "timeout") {
532
+ return hadHeaders
533
+ ? "Cursor did not respond within 5 minutes — try again"
534
+ : "Could not reach Cursor's API within 2 minutes — check your network";
535
+ }
536
+
537
+ // Connection or stream error (exit 1)
538
+ if (status === 401 || status === 403) {
539
+ return "Cursor authentication expired — run /login cursor to re-authenticate";
540
+ }
541
+ if (status === 429) {
542
+ return "Cursor rate limited — try again shortly";
543
+ }
544
+ if (status !== undefined && status >= 500 && status < 600) {
545
+ return `Cursor server error (${status}) — try again`;
546
+ }
547
+ if (!hadHeaders) {
548
+ return "Could not connect to Cursor's API — check your network";
549
+ }
550
+
551
+ return `Cursor bridge terminated (exit ${code}) before response — try again or shorten the conversation`;
552
+ }
553
+
475
554
  // ── Unary RPC (for model discovery) ──
476
555
 
477
556
  export async function callCursorUnaryRpc(options: {
@@ -758,6 +837,7 @@ export function cleanupAllSessionState(): void {
758
837
  for (const [bridgeKey, active] of activeBridges) {
759
838
  cleanupBridge(active.bridge, active.heartbeatTimer, bridgeKey);
760
839
  }
840
+ sessionBridges.clear();
761
841
  conversationStates.clear();
762
842
  }
763
843
 
@@ -878,6 +958,7 @@ async function handleChatCompletion(
878
958
  res,
879
959
  body.stream !== false,
880
960
  requestId,
961
+ accessToken,
881
962
  );
882
963
  return;
883
964
  }
@@ -886,9 +967,7 @@ async function handleChatCompletion(
886
967
  }
887
968
 
888
969
  if (activeBridge && activeBridges.has(bridgeKey)) {
889
- clearInterval(activeBridge.heartbeatTimer);
890
- activeBridge.bridge.end();
891
- activeBridges.delete(bridgeKey);
970
+ cleanupBridge(activeBridge.bridge, activeBridge.heartbeatTimer, bridgeKey);
892
971
  }
893
972
 
894
973
  let stored = conversationStates.get(convKey);
@@ -902,6 +981,8 @@ async function handleChatCompletion(
902
981
  conversationStates.set(convKey, stored);
903
982
  }
904
983
 
984
+ stored.systemPrompt = systemPrompt;
985
+
905
986
  const mcpTools = buildMcpToolDefinitions(tools);
906
987
  const effectiveUserText =
907
988
  userText ||
@@ -942,11 +1023,12 @@ async function handleChatCompletion(
942
1023
  };
943
1024
 
944
1025
  if (body.stream === false) {
945
- debugLog("chat.dispatch_nonstream", { requestId, convKey });
1026
+ debugLog("chat.dispatch_nonstream", { requestId, bridgeKey, convKey });
946
1027
  await handleNonStreamingResponse(
947
1028
  payload,
948
1029
  accessToken,
949
1030
  modelId,
1031
+ bridgeKey,
950
1032
  convKey,
951
1033
  turns,
952
1034
  currentTurn,
@@ -2165,15 +2247,52 @@ function createConnectFrameParser(
2165
2247
  };
2166
2248
  }
2167
2249
 
2168
- function parseConnectEndStream(data: Uint8Array): Error | null {
2250
+ const CONTEXT_OVERFLOW_MSG =
2251
+ "context length exceeded — Cursor rejected the request as too large";
2252
+
2253
+ function isContextOverflowMessage(msg: string): boolean {
2254
+ return /context|token|length|overflow|too.?long|too.?large/i.test(msg);
2255
+ }
2256
+
2257
+ function mapConnectErrorCode(code: string, message: string): string {
2258
+ switch (code) {
2259
+ case "unauthenticated":
2260
+ return "Cursor authentication expired — run /login cursor";
2261
+ case "resource_exhausted":
2262
+ return CONTEXT_OVERFLOW_MSG;
2263
+ case "deadline_exceeded":
2264
+ return "Cursor request timed out server-side — try again";
2265
+ case "unavailable":
2266
+ return "Cursor service unavailable — try again";
2267
+ case "internal":
2268
+ return "Cursor internal error — try again";
2269
+ case "invalid_argument":
2270
+ return isContextOverflowMessage(message) ? CONTEXT_OVERFLOW_MSG : message;
2271
+ default:
2272
+ return message;
2273
+ }
2274
+ }
2275
+
2276
+ interface ConnectEndStreamError {
2277
+ message: string;
2278
+ retryable: boolean;
2279
+ }
2280
+
2281
+ const RETRYABLE_CONNECT_CODES = new Set(["internal", "unavailable", "deadline_exceeded"]);
2282
+
2283
+ function parseConnectEndStream(data: Uint8Array): ConnectEndStreamError | null {
2169
2284
  if (data.length === 0) return null;
2170
2285
  try {
2171
2286
  const payload = JSON.parse(new TextDecoder().decode(data));
2172
2287
  const error = payload?.error;
2173
- if (error)
2174
- return new Error(
2175
- `Connect error ${error.code ?? "unknown"}: ${error.message ?? "Unknown error"}`,
2176
- );
2288
+ if (error) {
2289
+ const code = String(error.code ?? "unknown");
2290
+ const rawMessage = String(error.message ?? "Unknown error");
2291
+ return {
2292
+ message: mapConnectErrorCode(code, rawMessage),
2293
+ retryable: RETRYABLE_CONNECT_CODES.has(code),
2294
+ };
2295
+ }
2177
2296
  return null;
2178
2297
  } catch {
2179
2298
  return null;
@@ -2212,11 +2331,29 @@ function computeUsage(state: StreamState) {
2212
2331
  return { prompt_tokens, completion_tokens, total_tokens };
2213
2332
  }
2214
2333
 
2334
+ function computeUsageFromStored(
2335
+ lastTotalTokens: number,
2336
+ convKey: string,
2337
+ modelId: string,
2338
+ ): { prompt_tokens: number; completion_tokens: number; total_tokens: number } | undefined {
2339
+ const totalTokens = lastTotalTokens || conversationStates.get(convKey)?.lastTotalTokens || 0;
2340
+ if (totalTokens === 0) return undefined;
2341
+ const stored = conversationStates.get(convKey);
2342
+ const cursorWindow = stored?.effectiveContextWindow ?? 0;
2343
+ const piWindow = inferContextWindow(modelId);
2344
+ let total_tokens = totalTokens;
2345
+ if (cursorWindow > 0 && piWindow > cursorWindow) {
2346
+ total_tokens = Math.round(totalTokens * piWindow / cursorWindow);
2347
+ }
2348
+ return { prompt_tokens: total_tokens, completion_tokens: 0, total_tokens };
2349
+ }
2350
+
2215
2351
  function respondWithPendingToolCalls(
2216
2352
  modelId: string,
2217
2353
  pendingExecs: PendingExec[],
2218
2354
  stream: boolean,
2219
2355
  res: ServerResponse,
2356
+ usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number },
2220
2357
  ): void {
2221
2358
  const completionId = `chatcmpl-${crypto.randomUUID().replace(/-/g, "").slice(0, 28)}`;
2222
2359
  const created = Math.floor(Date.now() / 1000);
@@ -2250,6 +2387,18 @@ function respondWithPendingToolCalls(
2250
2387
  })}\n\n`,
2251
2388
  );
2252
2389
  }
2390
+ if (usage) {
2391
+ res.write(
2392
+ `data: ${JSON.stringify({
2393
+ id: completionId,
2394
+ object: "chat.completion.chunk",
2395
+ created,
2396
+ model: modelId,
2397
+ choices: [],
2398
+ usage,
2399
+ })}\n\n`,
2400
+ );
2401
+ }
2253
2402
  res.write(
2254
2403
  `data: ${JSON.stringify({
2255
2404
  id: completionId,
@@ -2278,19 +2427,32 @@ function respondWithPendingToolCalls(
2278
2427
  finish_reason: "tool_calls",
2279
2428
  },
2280
2429
  ],
2281
- usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
2430
+ usage: usage ?? { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
2282
2431
  }),
2283
2432
  );
2284
2433
  }
2285
2434
 
2286
2435
  // ── Streaming response ──
2287
2436
 
2288
- function startBridge(accessToken: string, requestBytes: Uint8Array) {
2437
+ function startBridge(accessToken: string, requestBytes: Uint8Array, bridgeKey: string) {
2438
+ let staleBridgeKilled = false;
2439
+ const existing = sessionBridges.get(bridgeKey);
2440
+ if (existing) {
2441
+ if (existing.alive) {
2442
+ console.error(
2443
+ `[cursor-provider] Stale bridge detected for session ${bridgeKey} — force-killing and replacing`,
2444
+ );
2445
+ staleBridgeKilled = true;
2446
+ try { existing.proc.kill(); } catch {}
2447
+ }
2448
+ sessionBridges.delete(bridgeKey);
2449
+ }
2450
+
2289
2451
  const bridge = bridgeFactory({
2290
2452
  accessToken,
2291
2453
  rpcPath: "/agent.v1.AgentService/Run",
2292
2454
  });
2293
- debugLog("bridge.start_run", { requestBytes });
2455
+ debugLog("bridge.start_run", { requestBytes, bridgeKey, staleBridgeKilled });
2294
2456
  bridge.write(frameConnectMessage(requestBytes));
2295
2457
  const heartbeatTimer = setInterval(
2296
2458
  () => bridge.write(makeHeartbeatBytes()),
@@ -2298,7 +2460,8 @@ function startBridge(accessToken: string, requestBytes: Uint8Array) {
2298
2460
  );
2299
2461
  // Don't hold the event loop open between heartbeats.
2300
2462
  heartbeatTimer.unref();
2301
- return { bridge, heartbeatTimer };
2463
+ sessionBridges.set(bridgeKey, bridge);
2464
+ return { bridge, heartbeatTimer, staleBridgeKilled };
2302
2465
  }
2303
2466
 
2304
2467
  function handleStreamingResponse(
@@ -2314,9 +2477,10 @@ function handleStreamingResponse(
2314
2477
  requestId: string,
2315
2478
  ): void {
2316
2479
  debugLog("stream.start", { requestId, bridgeKey, convKey, modelId });
2317
- const { bridge, heartbeatTimer } = startBridge(
2480
+ const { bridge, heartbeatTimer, staleBridgeKilled } = startBridge(
2318
2481
  accessToken,
2319
2482
  payload.requestBytes,
2483
+ bridgeKey,
2320
2484
  );
2321
2485
  writeSSEStream(
2322
2486
  bridge,
@@ -2331,6 +2495,8 @@ function handleStreamingResponse(
2331
2495
  req,
2332
2496
  res,
2333
2497
  requestId,
2498
+ staleBridgeKilled,
2499
+ accessToken,
2334
2500
  );
2335
2501
  }
2336
2502
 
@@ -2357,13 +2523,18 @@ function cleanupBridge(
2357
2523
  if (bridge.alive) {
2358
2524
  sendCancelAction(bridge);
2359
2525
  bridge.end();
2526
+ setTimeout(() => { try { bridge.proc.kill(); } catch {} }, 10_000);
2360
2527
  }
2528
+ if (sessionBridges.get(bridgeKey) === bridge) sessionBridges.delete(bridgeKey);
2361
2529
  activeBridges.delete(bridgeKey);
2362
2530
  }
2363
2531
 
2532
+ const MAX_BRIDGE_RETRIES =
2533
+ parseInt(process.env.PI_CURSOR_MAX_BRIDGE_RETRIES ?? "") || 2;
2534
+
2364
2535
  function writeSSEStream(
2365
- bridge: BridgeHandle,
2366
- heartbeatTimer: ReturnType<typeof setInterval>,
2536
+ initialBridge: BridgeHandle,
2537
+ initialHeartbeatTimer: ReturnType<typeof setInterval>,
2367
2538
  blobStore: Map<string, Uint8Array>,
2368
2539
  mcpTools: McpToolDefinition[],
2369
2540
  modelId: string,
@@ -2374,6 +2545,8 @@ function writeSSEStream(
2374
2545
  req: IncomingMessage,
2375
2546
  res: ServerResponse,
2376
2547
  requestId?: string,
2548
+ staleBridgeKilled = false,
2549
+ accessToken?: string,
2377
2550
  ): void {
2378
2551
  debugLog("stream.writer_start", {
2379
2552
  requestId,
@@ -2386,11 +2559,35 @@ function writeSSEStream(
2386
2559
  const completionId = `chatcmpl-${crypto.randomUUID().replace(/-/g, "").slice(0, 28)}`;
2387
2560
  const created = Math.floor(Date.now() / 1000);
2388
2561
 
2562
+ // Mutable bridge references — updated on retry.
2563
+ let activeBridge = initialBridge;
2564
+ let activeHeartbeatTimer = initialHeartbeatTimer;
2565
+ let retryCount = 0;
2566
+ let retryableConnectError = false;
2567
+
2568
+ // Snapshot the checkpoint before this turn so retries replay cleanly
2569
+ // without risking a mid-turn checkpoint that includes partial progress.
2570
+ const preTurnCheckpoint = (() => {
2571
+ const s = conversationStates.get(convKey);
2572
+ return s?.checkpoint ? new Uint8Array(s.checkpoint) : null;
2573
+ })();
2574
+
2389
2575
  res.writeHead(200, {
2390
2576
  "Content-Type": "text/event-stream",
2391
2577
  "Cache-Control": "no-cache",
2392
2578
  Connection: "close",
2393
2579
  });
2580
+ if (staleBridgeKilled) {
2581
+ res.write(
2582
+ `data: ${JSON.stringify({
2583
+ id: completionId,
2584
+ object: "chat.completion.chunk",
2585
+ created,
2586
+ model: modelId,
2587
+ choices: [{ index: 0, delta: { content: "\u26a0 A previous request for this session was still running and has been cancelled.\n" }, finish_reason: null }],
2588
+ })}\n\n`,
2589
+ );
2590
+ }
2394
2591
 
2395
2592
  let closed = false;
2396
2593
  let keepAliveTimer: ReturnType<typeof setInterval> | undefined;
@@ -2406,6 +2603,7 @@ function writeSSEStream(
2406
2603
  if (closed) return;
2407
2604
  closed = true;
2408
2605
  clearInterval(keepAliveTimer);
2606
+ if (stallTimer) clearTimeout(stallTimer);
2409
2607
  res.end();
2410
2608
  };
2411
2609
 
@@ -2438,11 +2636,10 @@ function writeSSEStream(
2438
2636
  toolCallIndex: 0,
2439
2637
  pendingExecs: [],
2440
2638
  outputTokens: 0,
2441
- totalTokens: 0,
2639
+ totalTokens: storedForState?.lastTotalTokens ?? 0,
2442
2640
  cursorContextWindow: storedForState?.effectiveContextWindow ?? 0,
2443
2641
  inferredContextWindow: inferContextWindow(modelId),
2444
2642
  };
2445
- const tagFilter = createThinkingTagFilter();
2446
2643
  let mcpExecReceived = false;
2447
2644
  let cancelled = false;
2448
2645
  let latestCheckpoint: Uint8Array | null = null;
@@ -2454,225 +2651,341 @@ function writeSSEStream(
2454
2651
  }, 15_000);
2455
2652
  keepAliveTimer.unref();
2456
2653
 
2654
+ // Stall detector: kill the bridge if no data arrives from Cursor for too
2655
+ // long. This catches cases where the H2 connection is technically alive
2656
+ // but Cursor's server is stuck processing a stale conversation checkpoint.
2657
+ // Reset on every incoming Connect frame.
2658
+ const STALL_TIMEOUT_MS = parseInt(process.env.PI_CURSOR_BRIDGE_STALL_TIMEOUT_MS ?? "") || 120_000;
2659
+ let stallTimer: ReturnType<typeof setTimeout> | undefined;
2660
+ const resetStallTimer = () => {
2661
+ if (stallTimer) clearTimeout(stallTimer);
2662
+ stallTimer = setTimeout(() => {
2663
+ if (cancelled || closed) return;
2664
+ debugLog("stream.stall_timeout", { requestId, bridgeKey, convKey, modelId });
2665
+ console.error(
2666
+ `[cursor-provider] Bridge stalled for ${STALL_TIMEOUT_MS / 1000}s \u2014 killing (${modelId})`,
2667
+ );
2668
+ cleanupBridge(activeBridge, activeHeartbeatTimer, bridgeKey);
2669
+ }, STALL_TIMEOUT_MS);
2670
+ stallTimer.unref?.();
2671
+ };
2672
+
2457
2673
  // Detect client disconnect (e.g. user pressed Escape in pi)
2458
2674
  const onClientClose = () => {
2459
2675
  if (cancelled || closed) return;
2460
2676
  debugLog("stream.client_close", { requestId, bridgeKey, convKey });
2461
2677
  cancelled = true;
2462
- cleanupBridge(bridge, heartbeatTimer, bridgeKey);
2678
+ cleanupBridge(activeBridge, activeHeartbeatTimer, bridgeKey);
2463
2679
  closeResponse();
2464
2680
  };
2465
2681
  req.on("close", onClientClose);
2466
2682
  res.on("close", onClientClose);
2467
2683
 
2468
- const processChunk = createConnectFrameParser(
2469
- (messageBytes) => {
2470
- try {
2471
- const serverMessage = fromBinary(
2472
- AgentServerMessageSchema,
2473
- messageBytes,
2474
- );
2475
- processServerMessage(
2476
- serverMessage,
2477
- blobStore,
2478
- mcpTools,
2479
- (data) => bridge.write(data),
2480
- state,
2481
- (text, isThinking) => {
2482
- if (isThinking) {
2483
- sendSSE(makeChunk({ reasoning_content: text }));
2484
- } else {
2485
- const { content, reasoning } = tagFilter.process(text);
2486
- if (reasoning)
2487
- sendSSE(makeChunk({ reasoning_content: reasoning }));
2488
- if (content) {
2489
- appendAssistantTextToTurn(currentTurn, content);
2490
- sendSSE(makeChunk({ content }));
2684
+ // Wire data/close handlers onto the current activeBridge. Called once on
2685
+ // initial setup and again on each transparent retry.
2686
+ function attachToBridge(): void {
2687
+ // Each attempt gets a fresh thinking-tag filter so retried output doesn't
2688
+ // inherit stale parser state from the dead bridge.
2689
+ const tagFilter = createThinkingTagFilter();
2690
+ let contentSent = false;
2691
+ mcpExecReceived = false;
2692
+ resetStallTimer();
2693
+
2694
+ const processChunk = createConnectFrameParser(
2695
+ (messageBytes) => {
2696
+ resetStallTimer();
2697
+ try {
2698
+ const serverMessage = fromBinary(
2699
+ AgentServerMessageSchema,
2700
+ messageBytes,
2701
+ );
2702
+ processServerMessage(
2703
+ serverMessage,
2704
+ blobStore,
2705
+ mcpTools,
2706
+ (data) => activeBridge.write(data),
2707
+ state,
2708
+ (text, isThinking) => {
2709
+ if (isThinking) {
2710
+ contentSent = true;
2711
+ sendSSE(makeChunk({ reasoning_content: text }));
2712
+ } else {
2713
+ const { content, reasoning } = tagFilter.process(text);
2714
+ if (reasoning) {
2715
+ contentSent = true;
2716
+ sendSSE(makeChunk({ reasoning_content: reasoning }));
2717
+ }
2718
+ if (content) {
2719
+ contentSent = true;
2720
+ appendAssistantTextToTurn(currentTurn, content);
2721
+ sendSSE(makeChunk({ content }));
2722
+ }
2723
+ }
2724
+ },
2725
+ (exec) => {
2726
+ state.pendingExecs.push(exec);
2727
+ mcpExecReceived = true;
2728
+
2729
+ const flushed = tagFilter.flush();
2730
+ if (flushed.reasoning)
2731
+ sendSSE(makeChunk({ reasoning_content: flushed.reasoning }));
2732
+ if (flushed.content) {
2733
+ appendAssistantTextToTurn(currentTurn, flushed.content);
2734
+ sendSSE(makeChunk({ content: flushed.content }));
2491
2735
  }
2492
- }
2493
- },
2494
- (exec) => {
2495
- state.pendingExecs.push(exec);
2496
- mcpExecReceived = true;
2497
-
2498
- const flushed = tagFilter.flush();
2499
- if (flushed.reasoning)
2500
- sendSSE(makeChunk({ reasoning_content: flushed.reasoning }));
2501
- if (flushed.content) {
2502
- appendAssistantTextToTurn(currentTurn, flushed.content);
2503
- sendSSE(makeChunk({ content: flushed.content }));
2504
- }
2505
-
2506
- currentTurn.steps.push({
2507
- kind: "toolCall",
2508
- toolCallId: exec.toolCallId,
2509
- toolName: exec.toolName,
2510
- arguments: parseToolCallArguments(exec.decodedArgs),
2511
- });
2512
2736
 
2513
- const toolCallIndex = state.toolCallIndex++;
2514
- sendSSE(
2515
- makeChunk({
2516
- tool_calls: [
2517
- {
2518
- index: toolCallIndex,
2519
- id: exec.toolCallId,
2520
- type: "function",
2521
- function: {
2522
- name: exec.toolName,
2523
- arguments: exec.decodedArgs,
2737
+ currentTurn.steps.push({
2738
+ kind: "toolCall",
2739
+ toolCallId: exec.toolCallId,
2740
+ toolName: exec.toolName,
2741
+ arguments: parseToolCallArguments(exec.decodedArgs),
2742
+ });
2743
+
2744
+ const toolCallIndex = state.toolCallIndex++;
2745
+ sendSSE(
2746
+ makeChunk({
2747
+ tool_calls: [
2748
+ {
2749
+ index: toolCallIndex,
2750
+ id: exec.toolCallId,
2751
+ type: "function",
2752
+ function: {
2753
+ name: exec.toolName,
2754
+ arguments: exec.decodedArgs,
2755
+ },
2524
2756
  },
2525
- },
2526
- ],
2527
- }),
2757
+ ],
2758
+ }),
2759
+ );
2760
+
2761
+ activeBridges.set(bridgeKey, {
2762
+ bridge: activeBridge,
2763
+ heartbeatTimer: activeHeartbeatTimer,
2764
+ blobStore,
2765
+ mcpTools,
2766
+ pendingExecs: state.pendingExecs,
2767
+ currentTurn,
2768
+ lastTotalTokens: state.totalTokens,
2769
+ });
2770
+ debugLog("stream.tool_call_pause", {
2771
+ requestId,
2772
+ bridgeKey,
2773
+ exec,
2774
+ pendingExecs: state.pendingExecs,
2775
+ currentTurn,
2776
+ });
2777
+
2778
+ sendSSE(makeUsageChunk());
2779
+ sendSSE(makeChunk({}, "tool_calls"));
2780
+ sendDone();
2781
+ closeResponse();
2782
+ },
2783
+ (checkpointBytes) => {
2784
+ latestCheckpoint = checkpointBytes;
2785
+ const stored = conversationStates.get(convKey);
2786
+ if (stored) {
2787
+ stored.checkpoint = checkpointBytes;
2788
+ for (const [k, v] of blobStore) stored.blobStore.set(k, v);
2789
+ if (state.cursorContextWindow > 0) {
2790
+ stored.effectiveContextWindow = state.cursorContextWindow;
2791
+ }
2792
+ if (state.totalTokens > 0) {
2793
+ stored.lastTotalTokens = state.totalTokens;
2794
+ }
2795
+ }
2796
+ debugLog("stream.checkpoint_buffered", {
2797
+ requestId,
2798
+ convKey,
2799
+ checkpointBytes,
2800
+ });
2801
+ },
2802
+ );
2803
+ } catch (err) {
2804
+ console.error(
2805
+ "[cursor-provider] Stream message processing error:",
2806
+ err instanceof Error ? err.message : err,
2807
+ );
2808
+ }
2809
+ },
2810
+ (endStreamBytes) => {
2811
+ resetStallTimer();
2812
+ const endError = parseConnectEndStream(endStreamBytes);
2813
+ clearInterval(activeHeartbeatTimer);
2814
+ if (stallTimer) clearTimeout(stallTimer);
2815
+ if (endError) {
2816
+ if (endError.retryable && !contentSent && !closed && accessToken && retryCount < MAX_BRIDGE_RETRIES) {
2817
+ debugLog("stream.retryable_connect_error", {
2818
+ requestId, bridgeKey, convKey, modelId,
2819
+ message: endError.message, attempt: retryCount + 1,
2820
+ });
2821
+ console.error(
2822
+ `[cursor-provider] Retryable Cursor error (${modelId}): ${endError.message} — will retry (${retryCount + 1}/${MAX_BRIDGE_RETRIES})`,
2528
2823
  );
2824
+ retryableConnectError = true;
2825
+ try { activeBridge.proc.kill(); } catch {}
2826
+ return;
2827
+ }
2828
+ console.error(
2829
+ `[cursor-provider] Cursor stream error (${modelId}):`,
2830
+ endError.message,
2831
+ );
2832
+ activeBridge.end();
2833
+ activeBridge.unref();
2834
+ sendSSE(makeChunk({ content: endError.message }, "error"));
2835
+ sendSSE(makeUsageChunk());
2836
+ sendDone();
2837
+ closeResponse();
2838
+ } else {
2839
+ activeBridge.end();
2840
+ activeBridge.unref();
2841
+ const flushed = tagFilter.flush();
2842
+ if (flushed.reasoning)
2843
+ sendSSE(makeChunk({ reasoning_content: flushed.reasoning }));
2844
+ if (flushed.content) {
2845
+ appendAssistantTextToTurn(currentTurn, flushed.content);
2846
+ sendSSE(makeChunk({ content: flushed.content }));
2847
+ }
2848
+ sendSSE(makeChunk({}, "stop"));
2849
+ sendSSE(makeUsageChunk());
2850
+ sendDone();
2851
+ closeResponse();
2852
+ }
2853
+ },
2854
+ );
2529
2855
 
2530
- activeBridges.set(bridgeKey, {
2531
- bridge,
2532
- heartbeatTimer,
2533
- blobStore,
2534
- mcpTools,
2535
- pendingExecs: state.pendingExecs,
2536
- currentTurn,
2537
- });
2538
- debugLog("stream.tool_call_pause", {
2539
- requestId,
2540
- bridgeKey,
2541
- exec,
2542
- pendingExecs: state.pendingExecs,
2543
- currentTurn,
2544
- });
2856
+ activeBridge.onData(processChunk);
2545
2857
 
2546
- sendSSE(makeChunk({}, "tool_calls"));
2547
- sendDone();
2548
- closeResponse();
2549
- },
2550
- (checkpointBytes) => {
2551
- latestCheckpoint = checkpointBytes;
2552
- const stored = conversationStates.get(convKey);
2553
- if (stored) {
2554
- stored.checkpoint = checkpointBytes;
2555
- for (const [k, v] of blobStore) stored.blobStore.set(k, v);
2556
- if (state.cursorContextWindow > 0) {
2557
- stored.effectiveContextWindow = state.cursorContextWindow;
2558
- }
2559
- }
2560
- debugLog("stream.checkpoint_buffered", {
2561
- requestId,
2562
- convKey,
2563
- checkpointBytes,
2564
- });
2565
- },
2566
- );
2567
- } catch (err) {
2568
- console.error(
2569
- "[cursor-provider] Stream message processing error:",
2570
- err instanceof Error ? err.message : err,
2571
- );
2572
- }
2573
- },
2574
- (endStreamBytes) => {
2575
- const endError = parseConnectEndStream(endStreamBytes);
2576
- // Always stop heartbeats and unref the bridge regardless of error/success
2577
- // so the parent process is not kept alive waiting for HTTP/2 END_STREAM.
2578
- clearInterval(heartbeatTimer);
2579
- bridge.end();
2580
- bridge.unref();
2581
- if (endError) {
2582
- console.error(
2583
- `[cursor-provider] Cursor stream error (${modelId}):`,
2584
- endError.message,
2585
- );
2586
- sendSSE(makeChunk({ content: endError.message }, "error"));
2587
- sendSSE(makeUsageChunk());
2588
- sendDone();
2589
- closeResponse();
2590
- } else {
2591
- // Cursor's Connect-level response is complete. Send the SSE response
2592
- // immediately without waiting for HTTP/2 END_STREAM, which Cursor can
2593
- // delay by several seconds after the Connect end-stream frame.
2594
- const flushed = tagFilter.flush();
2595
- if (flushed.reasoning)
2596
- sendSSE(makeChunk({ reasoning_content: flushed.reasoning }));
2597
- if (flushed.content) {
2598
- appendAssistantTextToTurn(currentTurn, flushed.content);
2599
- sendSSE(makeChunk({ content: flushed.content }));
2858
+ activeBridge.onClose((code) => {
2859
+ debugLog("stream.bridge_close", {
2860
+ requestId,
2861
+ bridgeKey,
2862
+ convKey,
2863
+ code,
2864
+ cancelled,
2865
+ mcpExecReceived,
2866
+ currentTurn,
2867
+ latestCheckpoint,
2868
+ retryCount,
2869
+ });
2870
+ clearInterval(activeHeartbeatTimer);
2871
+ if (stallTimer) clearTimeout(stallTimer);
2872
+ if (sessionBridges.get(bridgeKey) === activeBridge) sessionBridges.delete(bridgeKey);
2873
+ const stored = conversationStates.get(convKey);
2874
+ if (stored) {
2875
+ for (const [k, v] of blobStore) stored.blobStore.set(k, v);
2876
+ if (latestCheckpoint) {
2877
+ stored.checkpoint = latestCheckpoint;
2878
+ debugLog("stream.checkpoint_committed", { requestId, convKey, stored });
2879
+ }
2880
+ if (state.cursorContextWindow > 0) {
2881
+ stored.effectiveContextWindow = state.cursorContextWindow;
2882
+ }
2883
+ if (state.totalTokens > 0) {
2884
+ stored.lastTotalTokens = state.totalTokens;
2600
2885
  }
2601
- sendSSE(makeChunk({}, "stop"));
2602
- sendSSE(makeUsageChunk());
2603
- sendDone();
2604
- closeResponse();
2605
2886
  }
2606
- },
2607
- );
2887
+ if (cancelled) return;
2888
+
2889
+ // ── Transparent retry on bridge failure ──
2890
+ // When the bridge dies mid-request (connection killed by LB, TCP
2891
+ // timeout, etc.) or Cursor returns a retryable protocol error
2892
+ // (internal, unavailable, deadline_exceeded), rebuild the Cursor
2893
+ // request and replay on a fresh bridge. The SSE response stays
2894
+ // open — the client sees at most a brief pause.
2895
+ const shouldRetry = retryableConnectError || code !== 0;
2896
+ if (shouldRetry && !closed && accessToken && retryCount < MAX_BRIDGE_RETRIES) {
2897
+ const cp = preTurnCheckpoint ?? stored?.checkpoint ?? null;
2898
+ // For retryable Connect errors, allow retry even without a
2899
+ // checkpoint (first request in session) — buildCursorRequest
2900
+ // handles checkpoint=null by rebuilding from turns.
2901
+ if (stored && (cp || retryableConnectError)) {
2902
+ retryCount++;
2903
+ const wasConnectError = retryableConnectError;
2904
+ retryableConnectError = false;
2905
+ debugLog("stream.retry", {
2906
+ requestId,
2907
+ bridgeKey,
2908
+ convKey,
2909
+ attempt: retryCount,
2910
+ maxRetries: MAX_BRIDGE_RETRIES,
2911
+ connectError: wasConnectError,
2912
+ });
2913
+ console.error(
2914
+ `[cursor-provider] ${wasConnectError ? "Retryable Cursor error" : `Bridge died (exit ${code})`}, retry ${retryCount}/${MAX_BRIDGE_RETRIES} (${modelId})`,
2915
+ );
2608
2916
 
2609
- bridge.onData(processChunk);
2917
+ // Reset per-attempt stream state; keep cumulative token counts.
2918
+ state.pendingExecs = [];
2919
+ latestCheckpoint = null;
2920
+
2921
+ const retryPayload = buildCursorRequest(
2922
+ modelId,
2923
+ stored.systemPrompt || "You are a helpful assistant.",
2924
+ currentTurn.userText,
2925
+ completedTurns,
2926
+ stored.conversationId,
2927
+ cp,
2928
+ blobStore,
2929
+ currentTurn.images,
2930
+ );
2931
+ retryPayload.mcpTools = mcpTools;
2610
2932
 
2611
- bridge.onClose((code) => {
2612
- debugLog("stream.bridge_close", {
2613
- requestId,
2614
- bridgeKey,
2615
- convKey,
2616
- code,
2617
- cancelled,
2618
- mcpExecReceived,
2619
- currentTurn,
2620
- latestCheckpoint,
2621
- });
2622
- clearInterval(heartbeatTimer);
2623
- req.removeListener("close", onClientClose);
2624
- res.removeListener("close", onClientClose);
2625
- const stored = conversationStates.get(convKey);
2626
- if (stored) {
2627
- for (const [k, v] of blobStore) stored.blobStore.set(k, v);
2628
- if (latestCheckpoint) {
2629
- stored.checkpoint = latestCheckpoint;
2630
- debugLog("stream.checkpoint_committed", { requestId, convKey, stored });
2631
- }
2632
- if (state.cursorContextWindow > 0) {
2633
- stored.effectiveContextWindow = state.cursorContextWindow;
2933
+ const { bridge: newBridge, heartbeatTimer: newTimer } = startBridge(
2934
+ accessToken,
2935
+ retryPayload.requestBytes,
2936
+ bridgeKey,
2937
+ );
2938
+ activeBridge = newBridge;
2939
+ activeHeartbeatTimer = newTimer;
2940
+
2941
+ // Re-register client-close listener with new bridge refs (the old
2942
+ // listener already uses the mutable activeBridge/activeHeartbeatTimer).
2943
+ attachToBridge();
2944
+ return;
2945
+ }
2634
2946
  }
2635
- }
2636
- if (cancelled) return;
2637
- if (!mcpExecReceived) {
2638
- if (code !== 0) {
2639
- // Bridge was killed before receiving any response (e.g. timeout waiting
2640
- // for Cursor to process a large checkpoint during compaction). Treat as
2641
- // an error so callers (like pi compaction) see a real failure instead of
2642
- // an empty successful-looking response.
2643
- console.error(
2644
- `[cursor-provider] Bridge exited (code ${code}) before receiving response (${modelId})`,
2645
- );
2646
- sendSSE(makeChunk({ content: `Cursor bridge terminated (exit ${code}) before response — try again or shorten the conversation` }, "error"));
2947
+
2948
+ // No retry — remove client-close listeners since this bridge is done.
2949
+ req.removeListener("close", onClientClose);
2950
+ res.removeListener("close", onClientClose);
2951
+
2952
+ if (!mcpExecReceived) {
2953
+ if (code !== 0) {
2954
+ console.error(
2955
+ `[cursor-provider] Bridge exited (code ${code}) before receiving response (${modelId})`,
2956
+ );
2957
+ const failureMsg = classifyBridgeFailure(code, activeBridge.getStderr());
2958
+ sendSSE(makeChunk({ content: failureMsg }, "error"));
2959
+ sendSSE(makeUsageChunk());
2960
+ sendDone();
2961
+ closeResponse();
2962
+ } else {
2963
+ const flushed = tagFilter.flush();
2964
+ if (flushed.reasoning)
2965
+ sendSSE(makeChunk({ reasoning_content: flushed.reasoning }));
2966
+ if (flushed.content) {
2967
+ appendAssistantTextToTurn(currentTurn, flushed.content);
2968
+ sendSSE(makeChunk({ content: flushed.content }));
2969
+ }
2970
+ sendSSE(makeChunk({}, "stop"));
2971
+ sendSSE(makeUsageChunk());
2972
+ sendDone();
2973
+ closeResponse();
2974
+ }
2975
+ } else if (code !== 0) {
2976
+ sendSSE(makeChunk({ content: "Bridge connection lost" }, "error"));
2647
2977
  sendSSE(makeUsageChunk());
2648
2978
  sendDone();
2649
2979
  closeResponse();
2980
+ activeBridges.delete(bridgeKey);
2650
2981
  } else {
2651
- const flushed = tagFilter.flush();
2652
- if (flushed.reasoning)
2653
- sendSSE(makeChunk({ reasoning_content: flushed.reasoning }));
2654
- if (flushed.content) {
2655
- appendAssistantTextToTurn(currentTurn, flushed.content);
2656
- sendSSE(makeChunk({ content: flushed.content }));
2657
- }
2658
- sendSSE(makeChunk({}, "stop"));
2659
- sendSSE(makeUsageChunk());
2660
- sendDone();
2982
+ activeBridges.delete(bridgeKey);
2661
2983
  closeResponse();
2662
2984
  }
2663
- } else if (code !== 0) {
2664
- sendSSE(makeChunk({ content: "Bridge connection lost" }, "error"));
2665
- sendSSE(makeUsageChunk());
2666
- sendDone();
2667
- closeResponse();
2668
- activeBridges.delete(bridgeKey);
2669
- } else {
2670
- // Bridge closed cleanly after a tool call pause. The HTTP response was
2671
- // already ended by the MCP exec handler; just ensure cleanup.
2672
- activeBridges.delete(bridgeKey);
2673
- closeResponse();
2674
- }
2675
- });
2985
+ });
2986
+ }
2987
+
2988
+ attachToBridge();
2676
2989
  }
2677
2990
 
2678
2991
  export function writeSSEStreamForTests(args: {
@@ -2688,6 +3001,7 @@ export function writeSSEStreamForTests(args: {
2688
3001
  req: IncomingMessage;
2689
3002
  res: ServerResponse;
2690
3003
  requestId?: string;
3004
+ accessToken?: string;
2691
3005
  }): void {
2692
3006
  writeSSEStream(
2693
3007
  args.bridge,
@@ -2702,6 +3016,8 @@ export function writeSSEStreamForTests(args: {
2702
3016
  args.req,
2703
3017
  args.res,
2704
3018
  args.requestId,
3019
+ false,
3020
+ args.accessToken,
2705
3021
  );
2706
3022
  }
2707
3023
 
@@ -2718,6 +3034,7 @@ function handleToolResultResume(
2718
3034
  res: ServerResponse,
2719
3035
  stream: boolean,
2720
3036
  requestId?: string,
3037
+ accessToken?: string,
2721
3038
  ): void {
2722
3039
  const {
2723
3040
  bridge,
@@ -2758,6 +3075,7 @@ function handleToolResultResume(
2758
3075
  mcpTools,
2759
3076
  pendingExecs,
2760
3077
  currentTurn,
3078
+ lastTotalTokens: active.lastTotalTokens,
2761
3079
  });
2762
3080
  debugLog("tool_resume.partial_wait", {
2763
3081
  requestId,
@@ -2765,7 +3083,7 @@ function handleToolResultResume(
2765
3083
  unresolvedExecs,
2766
3084
  currentTurn,
2767
3085
  });
2768
- respondWithPendingToolCalls(modelId, unresolvedExecs, stream, res);
3086
+ respondWithPendingToolCalls(modelId, unresolvedExecs, stream, res, computeUsageFromStored(active.lastTotalTokens, convKey, modelId));
2769
3087
  return;
2770
3088
  }
2771
3089
 
@@ -2819,6 +3137,8 @@ function handleToolResultResume(
2819
3137
  req,
2820
3138
  res,
2821
3139
  requestId,
3140
+ false,
3141
+ accessToken,
2822
3142
  );
2823
3143
  }
2824
3144
 
@@ -2828,6 +3148,7 @@ async function handleNonStreamingResponse(
2828
3148
  payload: CursorRequestPayload,
2829
3149
  accessToken: string,
2830
3150
  modelId: string,
3151
+ bridgeKey: string,
2831
3152
  convKey: string,
2832
3153
  completedTurns: ParsedTurn[],
2833
3154
  currentTurn: ParsedTurn,
@@ -2837,6 +3158,7 @@ async function handleNonStreamingResponse(
2837
3158
  ): Promise<void> {
2838
3159
  debugLog("nonstream.start", {
2839
3160
  requestId,
3161
+ bridgeKey,
2840
3162
  convKey,
2841
3163
  modelId,
2842
3164
  currentTurn,
@@ -2848,6 +3170,7 @@ async function handleNonStreamingResponse(
2848
3170
  const { bridge, heartbeatTimer } = startBridge(
2849
3171
  accessToken,
2850
3172
  payload.requestBytes,
3173
+ bridgeKey,
2851
3174
  );
2852
3175
  let cancelled = false;
2853
3176
 
@@ -2868,13 +3191,13 @@ async function handleNonStreamingResponse(
2868
3191
  toolCallIndex: 0,
2869
3192
  pendingExecs: [],
2870
3193
  outputTokens: 0,
2871
- totalTokens: 0,
3194
+ totalTokens: storedForNonStream?.lastTotalTokens ?? 0,
2872
3195
  cursorContextWindow: storedForNonStream?.effectiveContextWindow ?? 0,
2873
3196
  inferredContextWindow: inferContextWindow(modelId),
2874
3197
  };
2875
3198
  const tagFilter = createThinkingTagFilter();
2876
3199
  let fullText = "";
2877
- let nonStreamError: Error | null = null;
3200
+ let nonStreamError: ConnectEndStreamError | null = null;
2878
3201
  let latestCheckpoint: Uint8Array | null = null;
2879
3202
 
2880
3203
  return new Promise((resolve) => {
@@ -2973,6 +3296,7 @@ async function handleNonStreamingResponse(
2973
3296
  bridge.onClose((code) => {
2974
3297
  debugLog("nonstream.bridge_close", {
2975
3298
  requestId,
3299
+ bridgeKey,
2976
3300
  convKey,
2977
3301
  code,
2978
3302
  cancelled,
@@ -2981,6 +3305,7 @@ async function handleNonStreamingResponse(
2981
3305
  latestCheckpoint,
2982
3306
  });
2983
3307
  clearInterval(heartbeatTimer);
3308
+ if (sessionBridges.get(bridgeKey) === bridge) sessionBridges.delete(bridgeKey);
2984
3309
  req.removeListener("close", onClientClose);
2985
3310
  res.removeListener("close", onClientClose);
2986
3311
  const stored = conversationStates.get(convKey);
@@ -2997,6 +3322,9 @@ async function handleNonStreamingResponse(
2997
3322
  if (state.cursorContextWindow > 0) {
2998
3323
  stored.effectiveContextWindow = state.cursorContextWindow;
2999
3324
  }
3325
+ if (state.totalTokens > 0) {
3326
+ stored.lastTotalTokens = state.totalTokens;
3327
+ }
3000
3328
  }
3001
3329
 
3002
3330
  if (cancelled) {
@@ -3035,11 +3363,12 @@ async function handleNonStreamingResponse(
3035
3363
  console.error(
3036
3364
  `[cursor-provider] Bridge exited (code ${code}) before non-stream response (${modelId})`,
3037
3365
  );
3366
+ const failureMsg = classifyBridgeFailure(code, bridge.getStderr());
3038
3367
  res.writeHead(502, { "Content-Type": "application/json" });
3039
3368
  res.end(
3040
3369
  JSON.stringify({
3041
3370
  error: {
3042
- message: `Cursor bridge terminated (exit ${code}) before response — try again or shorten the conversation`,
3371
+ message: failureMsg,
3043
3372
  type: "upstream_error",
3044
3373
  code: "bridge_terminated",
3045
3374
  },