@offbynan/pi-cursor-provider 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/README.md +24 -2
  2. package/h2-bridge.mjs +11 -7
  3. package/package.json +1 -1
  4. package/proxy.ts +399 -204
package/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
 
5
5
  - **Image support** — base64 `image_url` content parts forwarded to Cursor end-to-end; the upstream silently drops them
6
6
  - **Compaction support** — old turns archived as inline text to cut `getBlobArgs` round-trips from O(history) to O(tail); bridge termination errors surface as real failures instead of silent empty responses; checkpoint cleared after compaction to keep both sides in sync
7
- - **Reliability** — bridge timeouts hardened and configurable; SSE keepalive prevents pi from timing out during blob-fetching; conversation state and checkpoints survive transient failures and client disconnects
7
+ - **Reliability** — transparent retry for transient Cursor protocol errors (internal / unavailable / deadline_exceeded); HTTP/2 PING keepalive detects dead connections; stall timer kills stuck bridges; bridge timeouts hardened and configurable; SSE keepalive prevents pi from timing out during blob-fetching; conversation state and checkpoints survive transient failures and client disconnects
8
8
  - **Model support** — per-model context window inference (vs. hardcoded 200 k); runtime cap scaling when Cursor enforces a tighter window; detailed cost table for all current families; effort-suffix variants deduplicated so pi's reasoning-level setting drives the suffix automatically
9
9
  - **Thinking-tag filtering** — inline `<think>` / `<reasoning>` tags stripped from the response and routed to `reasoning_content`
10
10
  - **Fixes & observability** — `pi -p` exit hang fixed; dead TTL eviction code removed; opt-in JSONL debug logging with a bundled timeline viewer
@@ -59,6 +59,10 @@ pi → openai-completions → localhost:PORT/v1/chat/completions
59
59
  | `PI_CURSOR_PROVIDER_DEBUG_FILE` | auto in tmpdir | Override the debug log file path |
60
60
  | `PI_CURSOR_BRIDGE_INITIAL_TIMEOUT_MS` | `120000` | Kill bridge if no HTTP/2 activity within this many ms of spawn |
61
61
  | `PI_CURSOR_BRIDGE_ACTIVITY_TIMEOUT_MS` | `300000` | Kill bridge if no HTTP/2 activity for this many ms after the first frame |
62
+ | `PI_CURSOR_BRIDGE_PING_INTERVAL_MS` | `15000` | HTTP/2 PING interval to detect dead connections |
63
+ | `PI_CURSOR_BRIDGE_PING_TIMEOUT_MS` | `10000` | Timeout for each HTTP/2 PING before declaring the connection dead |
64
+ | `PI_CURSOR_BRIDGE_STALL_TIMEOUT_MS` | `120000` | Kill bridge if no data received from Cursor within this many ms |
65
+ | `PI_CURSOR_MAX_BRIDGE_RETRIES` | `2` | Max transparent retries on transient Cursor errors or bridge crashes |
62
66
  | `PI_CURSOR_TURN_ARCHIVE_THRESHOLD` | `20` | Keep this many recent turns as raw blobs; older turns are archived as inline text |
63
67
  | `PI_CURSOR_RAW_MODELS` | off | Set to disable model deduplication and see all raw Cursor model IDs |
64
68
 
@@ -150,6 +154,24 @@ The upstream has no observability. This fork adds opt-in JSONL event logging (se
150
154
  npm run debug:timeline -- --latest
151
155
  ```
152
156
 
157
+ ### Transparent retry for transient errors
158
+
159
+ When Cursor returns a retryable Connect-level error (`internal`, `unavailable`, `deadline_exceeded`) or the bridge process crashes mid-request, the proxy now automatically retries on a fresh HTTP/2 bridge — up to `PI_CURSOR_MAX_BRIDGE_RETRIES` times (default 2). The SSE response to pi stays open; the client sees at most a brief pause.
160
+
161
+ Retry is only attempted when no content has been streamed yet (so partial responses are never replayed). On retry the proxy rebuilds the Cursor request using the pre-turn checkpoint and replays cleanly.
162
+
163
+ Previously these transient errors were surfaced as `finish_reason: "error"`, requiring the user to manually continue each time.
164
+
165
+ ### HTTP/2 PING keepalive and stall detection
166
+
167
+ The bridge now configures HTTP/2-level PINGs (`PI_CURSOR_BRIDGE_PING_INTERVAL_MS` / `PI_CURSOR_BRIDGE_PING_TIMEOUT_MS`) so dead TCP connections (NAT timeout, load-balancer cycling) are detected within seconds rather than waiting for the 5-minute activity timeout.
168
+
169
+ Additionally, a stall timer (`PI_CURSOR_BRIDGE_STALL_TIMEOUT_MS`, default 120 s) kills the bridge if no data arrives from Cursor — catching cases where the HTTP/2 connection is technically alive but the server is stuck processing a stale checkpoint.
170
+
171
+ ### Usage reporting on tool-call continuations
172
+
173
+ When the proxy pauses mid-turn for a tool call and responds with pending tool calls (the partial-wait path), it now reports meaningful `usage` token counts instead of zeros. The stored `lastTotalTokens` from the previous stream segment is scaled proportionally if Cursor is enforcing a tighter context window than the model's nominal size. This lets pi track cumulative token usage accurately across multi-step tool-call turns.
174
+
153
175
  ### Bridge timeout hardening
154
176
 
155
177
  The upstream `h2-bridge.mjs` used a 30-second initial connection timeout and a 120-second activity timeout. Large conversations require Cursor to deserialise a big checkpoint and complete many `getBlobArgs` round-trips before it starts streaming tokens, which regularly exceeded these limits and caused compaction to fail with a `terminated` error.
@@ -276,7 +298,7 @@ Session state is cleared on pi lifecycle events — session switch, fork, `/tree
276
298
 
277
299
  ### Error resilience
278
300
 
279
- A bridge timeout or Connect-level error from Cursor does not wipe the stored checkpoint. The last good checkpoint survives transient failures and is used on the next retry. If Cursor sends a checkpoint before a client disconnect, that checkpoint is also preserved.
301
+ Transient Cursor errors (`internal`, `unavailable`, `deadline_exceeded`) and bridge crashes are retried automatically up to `PI_CURSOR_MAX_BRIDGE_RETRIES` times — without dropping the SSE connection to pi. The last good checkpoint survives all error types and is used on retry. If Cursor sends a checkpoint before a client disconnect, that checkpoint is also preserved.
280
302
 
281
303
  ## Requirements
282
304
 
package/h2-bridge.mjs CHANGED
@@ -87,15 +87,19 @@ if (!configBuf) process.exit(1);
87
87
  const config = JSON.parse(configBuf.toString("utf8"));
88
88
  const { accessToken, url, path: rpcPath, unary } = config;
89
89
 
90
- const client = http2.connect(url || "https://api2.cursor.sh");
91
-
92
- // Guard against initial connection failure. Reset on any h2 activity
93
- // so long-running agent conversations (with tool call round-trips) survive.
94
- // Initial timeout is generous because large conversations require Cursor to
95
- // deserialize a big checkpoint + run many getBlobArgs round-trips before it
96
- // starts streaming tokens — 30 s was too short and caused compaction failures.
97
90
  const INITIAL_TIMEOUT_MS = parseInt(process.env.PI_CURSOR_BRIDGE_INITIAL_TIMEOUT_MS ?? "") || 120_000;
98
91
  const ACTIVITY_TIMEOUT_MS = parseInt(process.env.PI_CURSOR_BRIDGE_ACTIVITY_TIMEOUT_MS ?? "") || 300_000;
92
+ const H2_PING_INTERVAL_MS = parseInt(process.env.PI_CURSOR_BRIDGE_PING_INTERVAL_MS ?? "") || 15_000;
93
+ const H2_PING_TIMEOUT_MS = parseInt(process.env.PI_CURSOR_BRIDGE_PING_TIMEOUT_MS ?? "") || 10_000;
94
+
95
+ const client = http2.connect(url || "https://api2.cursor.sh", {
96
+ // Detect dead TCP connections at the HTTP/2 level — without this, a silently
97
+ // dropped connection (NAT timeout, LB cycling) can leave the bridge waiting
98
+ // for up to ACTIVITY_TIMEOUT_MS (5 min) with no indication of failure.
99
+ pingInterval: H2_PING_INTERVAL_MS,
100
+ pingTimeout: H2_PING_TIMEOUT_MS,
101
+ });
102
+
99
103
  let timeout = setTimeout(killBridge, INITIAL_TIMEOUT_MS);
100
104
 
101
105
  function resetTimeout() {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@offbynan/pi-cursor-provider",
3
- "version": "0.5.2",
3
+ "version": "0.5.3",
4
4
  "description": "Pi extension providing access to Cursor models via OAuth and a local OpenAI-compatible gRPC proxy",
5
5
  "type": "module",
6
6
  "license": "MIT",
package/proxy.ts CHANGED
@@ -179,6 +179,7 @@ interface ActiveBridge {
179
179
  mcpTools: McpToolDefinition[];
180
180
  pendingExecs: PendingExec[];
181
181
  currentTurn: ParsedTurn;
182
+ lastTotalTokens: number;
182
183
  }
183
184
 
184
185
  export interface StoredConversation {
@@ -191,6 +192,15 @@ export interface StoredConversation {
191
192
  * our static inferContextWindow() estimate when Cursor enforces a tighter cap.
192
193
  */
193
194
  effectiveContextWindow?: number;
195
+ /**
196
+ * Last known usedTokens from Cursor's ConversationTokenDetails. Persisted
197
+ * so that tool-call continuations (which create a fresh StreamState) can
198
+ * report meaningful usage even if the checkpoint hasn't arrived yet in the
199
+ * new stream segment.
200
+ */
201
+ lastTotalTokens?: number;
202
+ /** Cached for transparent retry when a bridge dies mid-request. */
203
+ systemPrompt?: string;
194
204
  }
195
205
 
196
206
  interface StreamState {
@@ -948,6 +958,7 @@ async function handleChatCompletion(
948
958
  res,
949
959
  body.stream !== false,
950
960
  requestId,
961
+ accessToken,
951
962
  );
952
963
  return;
953
964
  }
@@ -970,6 +981,8 @@ async function handleChatCompletion(
970
981
  conversationStates.set(convKey, stored);
971
982
  }
972
983
 
984
+ stored.systemPrompt = systemPrompt;
985
+
973
986
  const mcpTools = buildMcpToolDefinitions(tools);
974
987
  const effectiveUserText =
975
988
  userText ||
@@ -2260,7 +2273,14 @@ function mapConnectErrorCode(code: string, message: string): string {
2260
2273
  }
2261
2274
  }
2262
2275
 
2263
- function parseConnectEndStream(data: Uint8Array): Error | null {
2276
+ interface ConnectEndStreamError {
2277
+ message: string;
2278
+ retryable: boolean;
2279
+ }
2280
+
2281
+ const RETRYABLE_CONNECT_CODES = new Set(["internal", "unavailable", "deadline_exceeded"]);
2282
+
2283
+ function parseConnectEndStream(data: Uint8Array): ConnectEndStreamError | null {
2264
2284
  if (data.length === 0) return null;
2265
2285
  try {
2266
2286
  const payload = JSON.parse(new TextDecoder().decode(data));
@@ -2268,7 +2288,10 @@ function parseConnectEndStream(data: Uint8Array): Error | null {
2268
2288
  if (error) {
2269
2289
  const code = String(error.code ?? "unknown");
2270
2290
  const rawMessage = String(error.message ?? "Unknown error");
2271
- return new Error(mapConnectErrorCode(code, rawMessage));
2291
+ return {
2292
+ message: mapConnectErrorCode(code, rawMessage),
2293
+ retryable: RETRYABLE_CONNECT_CODES.has(code),
2294
+ };
2272
2295
  }
2273
2296
  return null;
2274
2297
  } catch {
@@ -2308,11 +2331,29 @@ function computeUsage(state: StreamState) {
2308
2331
  return { prompt_tokens, completion_tokens, total_tokens };
2309
2332
  }
2310
2333
 
2334
+ function computeUsageFromStored(
2335
+ lastTotalTokens: number,
2336
+ convKey: string,
2337
+ modelId: string,
2338
+ ): { prompt_tokens: number; completion_tokens: number; total_tokens: number } | undefined {
2339
+ const totalTokens = lastTotalTokens || conversationStates.get(convKey)?.lastTotalTokens || 0;
2340
+ if (totalTokens === 0) return undefined;
2341
+ const stored = conversationStates.get(convKey);
2342
+ const cursorWindow = stored?.effectiveContextWindow ?? 0;
2343
+ const piWindow = inferContextWindow(modelId);
2344
+ let total_tokens = totalTokens;
2345
+ if (cursorWindow > 0 && piWindow > cursorWindow) {
2346
+ total_tokens = Math.round(totalTokens * piWindow / cursorWindow);
2347
+ }
2348
+ return { prompt_tokens: total_tokens, completion_tokens: 0, total_tokens };
2349
+ }
2350
+
2311
2351
  function respondWithPendingToolCalls(
2312
2352
  modelId: string,
2313
2353
  pendingExecs: PendingExec[],
2314
2354
  stream: boolean,
2315
2355
  res: ServerResponse,
2356
+ usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number },
2316
2357
  ): void {
2317
2358
  const completionId = `chatcmpl-${crypto.randomUUID().replace(/-/g, "").slice(0, 28)}`;
2318
2359
  const created = Math.floor(Date.now() / 1000);
@@ -2346,6 +2387,18 @@ function respondWithPendingToolCalls(
2346
2387
  })}\n\n`,
2347
2388
  );
2348
2389
  }
2390
+ if (usage) {
2391
+ res.write(
2392
+ `data: ${JSON.stringify({
2393
+ id: completionId,
2394
+ object: "chat.completion.chunk",
2395
+ created,
2396
+ model: modelId,
2397
+ choices: [],
2398
+ usage,
2399
+ })}\n\n`,
2400
+ );
2401
+ }
2349
2402
  res.write(
2350
2403
  `data: ${JSON.stringify({
2351
2404
  id: completionId,
@@ -2374,7 +2427,7 @@ function respondWithPendingToolCalls(
2374
2427
  finish_reason: "tool_calls",
2375
2428
  },
2376
2429
  ],
2377
- usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
2430
+ usage: usage ?? { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
2378
2431
  }),
2379
2432
  );
2380
2433
  }
@@ -2443,6 +2496,7 @@ function handleStreamingResponse(
2443
2496
  res,
2444
2497
  requestId,
2445
2498
  staleBridgeKilled,
2499
+ accessToken,
2446
2500
  );
2447
2501
  }
2448
2502
 
@@ -2475,9 +2529,12 @@ function cleanupBridge(
2475
2529
  activeBridges.delete(bridgeKey);
2476
2530
  }
2477
2531
 
2532
+ const MAX_BRIDGE_RETRIES =
2533
+ parseInt(process.env.PI_CURSOR_MAX_BRIDGE_RETRIES ?? "") || 2;
2534
+
2478
2535
  function writeSSEStream(
2479
- bridge: BridgeHandle,
2480
- heartbeatTimer: ReturnType<typeof setInterval>,
2536
+ initialBridge: BridgeHandle,
2537
+ initialHeartbeatTimer: ReturnType<typeof setInterval>,
2481
2538
  blobStore: Map<string, Uint8Array>,
2482
2539
  mcpTools: McpToolDefinition[],
2483
2540
  modelId: string,
@@ -2489,6 +2546,7 @@ function writeSSEStream(
2489
2546
  res: ServerResponse,
2490
2547
  requestId?: string,
2491
2548
  staleBridgeKilled = false,
2549
+ accessToken?: string,
2492
2550
  ): void {
2493
2551
  debugLog("stream.writer_start", {
2494
2552
  requestId,
@@ -2501,6 +2559,19 @@ function writeSSEStream(
2501
2559
  const completionId = `chatcmpl-${crypto.randomUUID().replace(/-/g, "").slice(0, 28)}`;
2502
2560
  const created = Math.floor(Date.now() / 1000);
2503
2561
 
2562
+ // Mutable bridge references — updated on retry.
2563
+ let activeBridge = initialBridge;
2564
+ let activeHeartbeatTimer = initialHeartbeatTimer;
2565
+ let retryCount = 0;
2566
+ let retryableConnectError = false;
2567
+
2568
+ // Snapshot the checkpoint before this turn so retries replay cleanly
2569
+ // without risking a mid-turn checkpoint that includes partial progress.
2570
+ const preTurnCheckpoint = (() => {
2571
+ const s = conversationStates.get(convKey);
2572
+ return s?.checkpoint ? new Uint8Array(s.checkpoint) : null;
2573
+ })();
2574
+
2504
2575
  res.writeHead(200, {
2505
2576
  "Content-Type": "text/event-stream",
2506
2577
  "Cache-Control": "no-cache",
@@ -2532,6 +2603,7 @@ function writeSSEStream(
2532
2603
  if (closed) return;
2533
2604
  closed = true;
2534
2605
  clearInterval(keepAliveTimer);
2606
+ if (stallTimer) clearTimeout(stallTimer);
2535
2607
  res.end();
2536
2608
  };
2537
2609
 
@@ -2564,11 +2636,10 @@ function writeSSEStream(
2564
2636
  toolCallIndex: 0,
2565
2637
  pendingExecs: [],
2566
2638
  outputTokens: 0,
2567
- totalTokens: 0,
2639
+ totalTokens: storedForState?.lastTotalTokens ?? 0,
2568
2640
  cursorContextWindow: storedForState?.effectiveContextWindow ?? 0,
2569
2641
  inferredContextWindow: inferContextWindow(modelId),
2570
2642
  };
2571
- const tagFilter = createThinkingTagFilter();
2572
2643
  let mcpExecReceived = false;
2573
2644
  let cancelled = false;
2574
2645
  let latestCheckpoint: Uint8Array | null = null;
@@ -2580,227 +2651,341 @@ function writeSSEStream(
2580
2651
  }, 15_000);
2581
2652
  keepAliveTimer.unref();
2582
2653
 
2654
+ // Stall detector: kill the bridge if no data arrives from Cursor for too
2655
+ // long. This catches cases where the H2 connection is technically alive
2656
+ // but Cursor's server is stuck processing a stale conversation checkpoint.
2657
+ // Reset on every incoming Connect frame.
2658
+ const STALL_TIMEOUT_MS = parseInt(process.env.PI_CURSOR_BRIDGE_STALL_TIMEOUT_MS ?? "") || 120_000;
2659
+ let stallTimer: ReturnType<typeof setTimeout> | undefined;
2660
+ const resetStallTimer = () => {
2661
+ if (stallTimer) clearTimeout(stallTimer);
2662
+ stallTimer = setTimeout(() => {
2663
+ if (cancelled || closed) return;
2664
+ debugLog("stream.stall_timeout", { requestId, bridgeKey, convKey, modelId });
2665
+ console.error(
2666
+ `[cursor-provider] Bridge stalled for ${STALL_TIMEOUT_MS / 1000}s \u2014 killing (${modelId})`,
2667
+ );
2668
+ cleanupBridge(activeBridge, activeHeartbeatTimer, bridgeKey);
2669
+ }, STALL_TIMEOUT_MS);
2670
+ stallTimer.unref?.();
2671
+ };
2672
+
2583
2673
  // Detect client disconnect (e.g. user pressed Escape in pi)
2584
2674
  const onClientClose = () => {
2585
2675
  if (cancelled || closed) return;
2586
2676
  debugLog("stream.client_close", { requestId, bridgeKey, convKey });
2587
2677
  cancelled = true;
2588
- cleanupBridge(bridge, heartbeatTimer, bridgeKey);
2678
+ cleanupBridge(activeBridge, activeHeartbeatTimer, bridgeKey);
2589
2679
  closeResponse();
2590
2680
  };
2591
2681
  req.on("close", onClientClose);
2592
2682
  res.on("close", onClientClose);
2593
2683
 
2594
- const processChunk = createConnectFrameParser(
2595
- (messageBytes) => {
2596
- try {
2597
- const serverMessage = fromBinary(
2598
- AgentServerMessageSchema,
2599
- messageBytes,
2600
- );
2601
- processServerMessage(
2602
- serverMessage,
2603
- blobStore,
2604
- mcpTools,
2605
- (data) => bridge.write(data),
2606
- state,
2607
- (text, isThinking) => {
2608
- if (isThinking) {
2609
- sendSSE(makeChunk({ reasoning_content: text }));
2610
- } else {
2611
- const { content, reasoning } = tagFilter.process(text);
2612
- if (reasoning)
2613
- sendSSE(makeChunk({ reasoning_content: reasoning }));
2614
- if (content) {
2615
- appendAssistantTextToTurn(currentTurn, content);
2616
- sendSSE(makeChunk({ content }));
2684
+ // Wire data/close handlers onto the current activeBridge. Called once on
2685
+ // initial setup and again on each transparent retry.
2686
+ function attachToBridge(): void {
2687
+ // Each attempt gets a fresh thinking-tag filter so retried output doesn't
2688
+ // inherit stale parser state from the dead bridge.
2689
+ const tagFilter = createThinkingTagFilter();
2690
+ let contentSent = false;
2691
+ mcpExecReceived = false;
2692
+ resetStallTimer();
2693
+
2694
+ const processChunk = createConnectFrameParser(
2695
+ (messageBytes) => {
2696
+ resetStallTimer();
2697
+ try {
2698
+ const serverMessage = fromBinary(
2699
+ AgentServerMessageSchema,
2700
+ messageBytes,
2701
+ );
2702
+ processServerMessage(
2703
+ serverMessage,
2704
+ blobStore,
2705
+ mcpTools,
2706
+ (data) => activeBridge.write(data),
2707
+ state,
2708
+ (text, isThinking) => {
2709
+ if (isThinking) {
2710
+ contentSent = true;
2711
+ sendSSE(makeChunk({ reasoning_content: text }));
2712
+ } else {
2713
+ const { content, reasoning } = tagFilter.process(text);
2714
+ if (reasoning) {
2715
+ contentSent = true;
2716
+ sendSSE(makeChunk({ reasoning_content: reasoning }));
2717
+ }
2718
+ if (content) {
2719
+ contentSent = true;
2720
+ appendAssistantTextToTurn(currentTurn, content);
2721
+ sendSSE(makeChunk({ content }));
2722
+ }
2723
+ }
2724
+ },
2725
+ (exec) => {
2726
+ state.pendingExecs.push(exec);
2727
+ mcpExecReceived = true;
2728
+
2729
+ const flushed = tagFilter.flush();
2730
+ if (flushed.reasoning)
2731
+ sendSSE(makeChunk({ reasoning_content: flushed.reasoning }));
2732
+ if (flushed.content) {
2733
+ appendAssistantTextToTurn(currentTurn, flushed.content);
2734
+ sendSSE(makeChunk({ content: flushed.content }));
2617
2735
  }
2618
- }
2619
- },
2620
- (exec) => {
2621
- state.pendingExecs.push(exec);
2622
- mcpExecReceived = true;
2623
-
2624
- const flushed = tagFilter.flush();
2625
- if (flushed.reasoning)
2626
- sendSSE(makeChunk({ reasoning_content: flushed.reasoning }));
2627
- if (flushed.content) {
2628
- appendAssistantTextToTurn(currentTurn, flushed.content);
2629
- sendSSE(makeChunk({ content: flushed.content }));
2630
- }
2631
-
2632
- currentTurn.steps.push({
2633
- kind: "toolCall",
2634
- toolCallId: exec.toolCallId,
2635
- toolName: exec.toolName,
2636
- arguments: parseToolCallArguments(exec.decodedArgs),
2637
- });
2638
2736
 
2639
- const toolCallIndex = state.toolCallIndex++;
2640
- sendSSE(
2641
- makeChunk({
2642
- tool_calls: [
2643
- {
2644
- index: toolCallIndex,
2645
- id: exec.toolCallId,
2646
- type: "function",
2647
- function: {
2648
- name: exec.toolName,
2649
- arguments: exec.decodedArgs,
2737
+ currentTurn.steps.push({
2738
+ kind: "toolCall",
2739
+ toolCallId: exec.toolCallId,
2740
+ toolName: exec.toolName,
2741
+ arguments: parseToolCallArguments(exec.decodedArgs),
2742
+ });
2743
+
2744
+ const toolCallIndex = state.toolCallIndex++;
2745
+ sendSSE(
2746
+ makeChunk({
2747
+ tool_calls: [
2748
+ {
2749
+ index: toolCallIndex,
2750
+ id: exec.toolCallId,
2751
+ type: "function",
2752
+ function: {
2753
+ name: exec.toolName,
2754
+ arguments: exec.decodedArgs,
2755
+ },
2650
2756
  },
2651
- },
2652
- ],
2653
- }),
2757
+ ],
2758
+ }),
2759
+ );
2760
+
2761
+ activeBridges.set(bridgeKey, {
2762
+ bridge: activeBridge,
2763
+ heartbeatTimer: activeHeartbeatTimer,
2764
+ blobStore,
2765
+ mcpTools,
2766
+ pendingExecs: state.pendingExecs,
2767
+ currentTurn,
2768
+ lastTotalTokens: state.totalTokens,
2769
+ });
2770
+ debugLog("stream.tool_call_pause", {
2771
+ requestId,
2772
+ bridgeKey,
2773
+ exec,
2774
+ pendingExecs: state.pendingExecs,
2775
+ currentTurn,
2776
+ });
2777
+
2778
+ sendSSE(makeUsageChunk());
2779
+ sendSSE(makeChunk({}, "tool_calls"));
2780
+ sendDone();
2781
+ closeResponse();
2782
+ },
2783
+ (checkpointBytes) => {
2784
+ latestCheckpoint = checkpointBytes;
2785
+ const stored = conversationStates.get(convKey);
2786
+ if (stored) {
2787
+ stored.checkpoint = checkpointBytes;
2788
+ for (const [k, v] of blobStore) stored.blobStore.set(k, v);
2789
+ if (state.cursorContextWindow > 0) {
2790
+ stored.effectiveContextWindow = state.cursorContextWindow;
2791
+ }
2792
+ if (state.totalTokens > 0) {
2793
+ stored.lastTotalTokens = state.totalTokens;
2794
+ }
2795
+ }
2796
+ debugLog("stream.checkpoint_buffered", {
2797
+ requestId,
2798
+ convKey,
2799
+ checkpointBytes,
2800
+ });
2801
+ },
2802
+ );
2803
+ } catch (err) {
2804
+ console.error(
2805
+ "[cursor-provider] Stream message processing error:",
2806
+ err instanceof Error ? err.message : err,
2807
+ );
2808
+ }
2809
+ },
2810
+ (endStreamBytes) => {
2811
+ resetStallTimer();
2812
+ const endError = parseConnectEndStream(endStreamBytes);
2813
+ clearInterval(activeHeartbeatTimer);
2814
+ if (stallTimer) clearTimeout(stallTimer);
2815
+ if (endError) {
2816
+ if (endError.retryable && !contentSent && !closed && accessToken && retryCount < MAX_BRIDGE_RETRIES) {
2817
+ debugLog("stream.retryable_connect_error", {
2818
+ requestId, bridgeKey, convKey, modelId,
2819
+ message: endError.message, attempt: retryCount + 1,
2820
+ });
2821
+ console.error(
2822
+ `[cursor-provider] Retryable Cursor error (${modelId}): ${endError.message} — will retry (${retryCount + 1}/${MAX_BRIDGE_RETRIES})`,
2654
2823
  );
2824
+ retryableConnectError = true;
2825
+ try { activeBridge.proc.kill(); } catch {}
2826
+ return;
2827
+ }
2828
+ console.error(
2829
+ `[cursor-provider] Cursor stream error (${modelId}):`,
2830
+ endError.message,
2831
+ );
2832
+ activeBridge.end();
2833
+ activeBridge.unref();
2834
+ sendSSE(makeChunk({ content: endError.message }, "error"));
2835
+ sendSSE(makeUsageChunk());
2836
+ sendDone();
2837
+ closeResponse();
2838
+ } else {
2839
+ activeBridge.end();
2840
+ activeBridge.unref();
2841
+ const flushed = tagFilter.flush();
2842
+ if (flushed.reasoning)
2843
+ sendSSE(makeChunk({ reasoning_content: flushed.reasoning }));
2844
+ if (flushed.content) {
2845
+ appendAssistantTextToTurn(currentTurn, flushed.content);
2846
+ sendSSE(makeChunk({ content: flushed.content }));
2847
+ }
2848
+ sendSSE(makeChunk({}, "stop"));
2849
+ sendSSE(makeUsageChunk());
2850
+ sendDone();
2851
+ closeResponse();
2852
+ }
2853
+ },
2854
+ );
2655
2855
 
2656
- activeBridges.set(bridgeKey, {
2657
- bridge,
2658
- heartbeatTimer,
2659
- blobStore,
2660
- mcpTools,
2661
- pendingExecs: state.pendingExecs,
2662
- currentTurn,
2663
- });
2664
- debugLog("stream.tool_call_pause", {
2665
- requestId,
2666
- bridgeKey,
2667
- exec,
2668
- pendingExecs: state.pendingExecs,
2669
- currentTurn,
2670
- });
2856
+ activeBridge.onData(processChunk);
2671
2857
 
2672
- sendSSE(makeChunk({}, "tool_calls"));
2673
- sendDone();
2674
- closeResponse();
2675
- },
2676
- (checkpointBytes) => {
2677
- latestCheckpoint = checkpointBytes;
2678
- const stored = conversationStates.get(convKey);
2679
- if (stored) {
2680
- stored.checkpoint = checkpointBytes;
2681
- for (const [k, v] of blobStore) stored.blobStore.set(k, v);
2682
- if (state.cursorContextWindow > 0) {
2683
- stored.effectiveContextWindow = state.cursorContextWindow;
2684
- }
2685
- }
2686
- debugLog("stream.checkpoint_buffered", {
2687
- requestId,
2688
- convKey,
2689
- checkpointBytes,
2690
- });
2691
- },
2692
- );
2693
- } catch (err) {
2694
- console.error(
2695
- "[cursor-provider] Stream message processing error:",
2696
- err instanceof Error ? err.message : err,
2697
- );
2698
- }
2699
- },
2700
- (endStreamBytes) => {
2701
- const endError = parseConnectEndStream(endStreamBytes);
2702
- // Always stop heartbeats and unref the bridge regardless of error/success
2703
- // so the parent process is not kept alive waiting for HTTP/2 END_STREAM.
2704
- clearInterval(heartbeatTimer);
2705
- bridge.end();
2706
- bridge.unref();
2707
- if (endError) {
2708
- console.error(
2709
- `[cursor-provider] Cursor stream error (${modelId}):`,
2710
- endError.message,
2711
- );
2712
- sendSSE(makeChunk({ content: endError.message }, "error"));
2713
- sendSSE(makeUsageChunk());
2714
- sendDone();
2715
- closeResponse();
2716
- } else {
2717
- // Cursor's Connect-level response is complete. Send the SSE response
2718
- // immediately without waiting for HTTP/2 END_STREAM, which Cursor can
2719
- // delay by several seconds after the Connect end-stream frame.
2720
- const flushed = tagFilter.flush();
2721
- if (flushed.reasoning)
2722
- sendSSE(makeChunk({ reasoning_content: flushed.reasoning }));
2723
- if (flushed.content) {
2724
- appendAssistantTextToTurn(currentTurn, flushed.content);
2725
- sendSSE(makeChunk({ content: flushed.content }));
2858
+ activeBridge.onClose((code) => {
2859
+ debugLog("stream.bridge_close", {
2860
+ requestId,
2861
+ bridgeKey,
2862
+ convKey,
2863
+ code,
2864
+ cancelled,
2865
+ mcpExecReceived,
2866
+ currentTurn,
2867
+ latestCheckpoint,
2868
+ retryCount,
2869
+ });
2870
+ clearInterval(activeHeartbeatTimer);
2871
+ if (stallTimer) clearTimeout(stallTimer);
2872
+ if (sessionBridges.get(bridgeKey) === activeBridge) sessionBridges.delete(bridgeKey);
2873
+ const stored = conversationStates.get(convKey);
2874
+ if (stored) {
2875
+ for (const [k, v] of blobStore) stored.blobStore.set(k, v);
2876
+ if (latestCheckpoint) {
2877
+ stored.checkpoint = latestCheckpoint;
2878
+ debugLog("stream.checkpoint_committed", { requestId, convKey, stored });
2879
+ }
2880
+ if (state.cursorContextWindow > 0) {
2881
+ stored.effectiveContextWindow = state.cursorContextWindow;
2882
+ }
2883
+ if (state.totalTokens > 0) {
2884
+ stored.lastTotalTokens = state.totalTokens;
2726
2885
  }
2727
- sendSSE(makeChunk({}, "stop"));
2728
- sendSSE(makeUsageChunk());
2729
- sendDone();
2730
- closeResponse();
2731
2886
  }
2732
- },
2733
- );
2887
+ if (cancelled) return;
2888
+
2889
+ // ── Transparent retry on bridge failure ──
2890
+ // When the bridge dies mid-request (connection killed by LB, TCP
2891
+ // timeout, etc.) or Cursor returns a retryable protocol error
2892
+ // (internal, unavailable, deadline_exceeded), rebuild the Cursor
2893
+ // request and replay on a fresh bridge. The SSE response stays
2894
+ // open — the client sees at most a brief pause.
2895
+ const shouldRetry = retryableConnectError || code !== 0;
2896
+ if (shouldRetry && !closed && accessToken && retryCount < MAX_BRIDGE_RETRIES) {
2897
+ const cp = preTurnCheckpoint ?? stored?.checkpoint ?? null;
2898
+ // For retryable Connect errors, allow retry even without a
2899
+ // checkpoint (first request in session) — buildCursorRequest
2900
+ // handles checkpoint=null by rebuilding from turns.
2901
+ if (stored && (cp || retryableConnectError)) {
2902
+ retryCount++;
2903
+ const wasConnectError = retryableConnectError;
2904
+ retryableConnectError = false;
2905
+ debugLog("stream.retry", {
2906
+ requestId,
2907
+ bridgeKey,
2908
+ convKey,
2909
+ attempt: retryCount,
2910
+ maxRetries: MAX_BRIDGE_RETRIES,
2911
+ connectError: wasConnectError,
2912
+ });
2913
+ console.error(
2914
+ `[cursor-provider] ${wasConnectError ? "Retryable Cursor error" : `Bridge died (exit ${code})`}, retry ${retryCount}/${MAX_BRIDGE_RETRIES} (${modelId})`,
2915
+ );
2734
2916
 
2735
- bridge.onData(processChunk);
2917
+ // Reset per-attempt stream state; keep cumulative token counts.
2918
+ state.pendingExecs = [];
2919
+ latestCheckpoint = null;
2920
+
2921
+ const retryPayload = buildCursorRequest(
2922
+ modelId,
2923
+ stored.systemPrompt || "You are a helpful assistant.",
2924
+ currentTurn.userText,
2925
+ completedTurns,
2926
+ stored.conversationId,
2927
+ cp,
2928
+ blobStore,
2929
+ currentTurn.images,
2930
+ );
2931
+ retryPayload.mcpTools = mcpTools;
2736
2932
 
2737
- bridge.onClose((code) => {
2738
- debugLog("stream.bridge_close", {
2739
- requestId,
2740
- bridgeKey,
2741
- convKey,
2742
- code,
2743
- cancelled,
2744
- mcpExecReceived,
2745
- currentTurn,
2746
- latestCheckpoint,
2747
- });
2748
- clearInterval(heartbeatTimer);
2749
- if (sessionBridges.get(bridgeKey) === bridge) sessionBridges.delete(bridgeKey);
2750
- req.removeListener("close", onClientClose);
2751
- res.removeListener("close", onClientClose);
2752
- const stored = conversationStates.get(convKey);
2753
- if (stored) {
2754
- for (const [k, v] of blobStore) stored.blobStore.set(k, v);
2755
- if (latestCheckpoint) {
2756
- stored.checkpoint = latestCheckpoint;
2757
- debugLog("stream.checkpoint_committed", { requestId, convKey, stored });
2758
- }
2759
- if (state.cursorContextWindow > 0) {
2760
- stored.effectiveContextWindow = state.cursorContextWindow;
2933
+ const { bridge: newBridge, heartbeatTimer: newTimer } = startBridge(
2934
+ accessToken,
2935
+ retryPayload.requestBytes,
2936
+ bridgeKey,
2937
+ );
2938
+ activeBridge = newBridge;
2939
+ activeHeartbeatTimer = newTimer;
2940
+
2941
+ // Re-register client-close listener with new bridge refs (the old
2942
+ // listener already uses the mutable activeBridge/activeHeartbeatTimer).
2943
+ attachToBridge();
2944
+ return;
2945
+ }
2761
2946
  }
2762
- }
2763
- if (cancelled) return;
2764
- if (!mcpExecReceived) {
2765
- if (code !== 0) {
2766
- // Bridge was killed before receiving any response (e.g. timeout waiting
2767
- // for Cursor to process a large checkpoint during compaction). Treat as
2768
- // an error so callers (like pi compaction) see a real failure instead of
2769
- // an empty successful-looking response.
2770
- console.error(
2771
- `[cursor-provider] Bridge exited (code ${code}) before receiving response (${modelId})`,
2772
- );
2773
- const failureMsg = classifyBridgeFailure(code, bridge.getStderr());
2774
- sendSSE(makeChunk({ content: failureMsg }, "error"));
2947
+
2948
+ // No retry — remove client-close listeners since this bridge is done.
2949
+ req.removeListener("close", onClientClose);
2950
+ res.removeListener("close", onClientClose);
2951
+
2952
+ if (!mcpExecReceived) {
2953
+ if (code !== 0) {
2954
+ console.error(
2955
+ `[cursor-provider] Bridge exited (code ${code}) before receiving response (${modelId})`,
2956
+ );
2957
+ const failureMsg = classifyBridgeFailure(code, activeBridge.getStderr());
2958
+ sendSSE(makeChunk({ content: failureMsg }, "error"));
2959
+ sendSSE(makeUsageChunk());
2960
+ sendDone();
2961
+ closeResponse();
2962
+ } else {
2963
+ const flushed = tagFilter.flush();
2964
+ if (flushed.reasoning)
2965
+ sendSSE(makeChunk({ reasoning_content: flushed.reasoning }));
2966
+ if (flushed.content) {
2967
+ appendAssistantTextToTurn(currentTurn, flushed.content);
2968
+ sendSSE(makeChunk({ content: flushed.content }));
2969
+ }
2970
+ sendSSE(makeChunk({}, "stop"));
2971
+ sendSSE(makeUsageChunk());
2972
+ sendDone();
2973
+ closeResponse();
2974
+ }
2975
+ } else if (code !== 0) {
2976
+ sendSSE(makeChunk({ content: "Bridge connection lost" }, "error"));
2775
2977
  sendSSE(makeUsageChunk());
2776
2978
  sendDone();
2777
2979
  closeResponse();
2980
+ activeBridges.delete(bridgeKey);
2778
2981
  } else {
2779
- const flushed = tagFilter.flush();
2780
- if (flushed.reasoning)
2781
- sendSSE(makeChunk({ reasoning_content: flushed.reasoning }));
2782
- if (flushed.content) {
2783
- appendAssistantTextToTurn(currentTurn, flushed.content);
2784
- sendSSE(makeChunk({ content: flushed.content }));
2785
- }
2786
- sendSSE(makeChunk({}, "stop"));
2787
- sendSSE(makeUsageChunk());
2788
- sendDone();
2982
+ activeBridges.delete(bridgeKey);
2789
2983
  closeResponse();
2790
2984
  }
2791
- } else if (code !== 0) {
2792
- sendSSE(makeChunk({ content: "Bridge connection lost" }, "error"));
2793
- sendSSE(makeUsageChunk());
2794
- sendDone();
2795
- closeResponse();
2796
- activeBridges.delete(bridgeKey);
2797
- } else {
2798
- // Bridge closed cleanly after a tool call pause. The HTTP response was
2799
- // already ended by the MCP exec handler; just ensure cleanup.
2800
- activeBridges.delete(bridgeKey);
2801
- closeResponse();
2802
- }
2803
- });
2985
+ });
2986
+ }
2987
+
2988
+ attachToBridge();
2804
2989
  }
2805
2990
 
2806
2991
  export function writeSSEStreamForTests(args: {
@@ -2816,6 +3001,7 @@ export function writeSSEStreamForTests(args: {
2816
3001
  req: IncomingMessage;
2817
3002
  res: ServerResponse;
2818
3003
  requestId?: string;
3004
+ accessToken?: string;
2819
3005
  }): void {
2820
3006
  writeSSEStream(
2821
3007
  args.bridge,
@@ -2830,6 +3016,8 @@ export function writeSSEStreamForTests(args: {
2830
3016
  args.req,
2831
3017
  args.res,
2832
3018
  args.requestId,
3019
+ false,
3020
+ args.accessToken,
2833
3021
  );
2834
3022
  }
2835
3023
 
@@ -2846,6 +3034,7 @@ function handleToolResultResume(
2846
3034
  res: ServerResponse,
2847
3035
  stream: boolean,
2848
3036
  requestId?: string,
3037
+ accessToken?: string,
2849
3038
  ): void {
2850
3039
  const {
2851
3040
  bridge,
@@ -2886,6 +3075,7 @@ function handleToolResultResume(
2886
3075
  mcpTools,
2887
3076
  pendingExecs,
2888
3077
  currentTurn,
3078
+ lastTotalTokens: active.lastTotalTokens,
2889
3079
  });
2890
3080
  debugLog("tool_resume.partial_wait", {
2891
3081
  requestId,
@@ -2893,7 +3083,7 @@ function handleToolResultResume(
2893
3083
  unresolvedExecs,
2894
3084
  currentTurn,
2895
3085
  });
2896
- respondWithPendingToolCalls(modelId, unresolvedExecs, stream, res);
3086
+ respondWithPendingToolCalls(modelId, unresolvedExecs, stream, res, computeUsageFromStored(active.lastTotalTokens, convKey, modelId));
2897
3087
  return;
2898
3088
  }
2899
3089
 
@@ -2947,6 +3137,8 @@ function handleToolResultResume(
2947
3137
  req,
2948
3138
  res,
2949
3139
  requestId,
3140
+ false,
3141
+ accessToken,
2950
3142
  );
2951
3143
  }
2952
3144
 
@@ -2999,13 +3191,13 @@ async function handleNonStreamingResponse(
2999
3191
  toolCallIndex: 0,
3000
3192
  pendingExecs: [],
3001
3193
  outputTokens: 0,
3002
- totalTokens: 0,
3194
+ totalTokens: storedForNonStream?.lastTotalTokens ?? 0,
3003
3195
  cursorContextWindow: storedForNonStream?.effectiveContextWindow ?? 0,
3004
3196
  inferredContextWindow: inferContextWindow(modelId),
3005
3197
  };
3006
3198
  const tagFilter = createThinkingTagFilter();
3007
3199
  let fullText = "";
3008
- let nonStreamError: Error | null = null;
3200
+ let nonStreamError: ConnectEndStreamError | null = null;
3009
3201
  let latestCheckpoint: Uint8Array | null = null;
3010
3202
 
3011
3203
  return new Promise((resolve) => {
@@ -3130,6 +3322,9 @@ async function handleNonStreamingResponse(
3130
3322
  if (state.cursorContextWindow > 0) {
3131
3323
  stored.effectiveContextWindow = state.cursorContextWindow;
3132
3324
  }
3325
+ if (state.totalTokens > 0) {
3326
+ stored.lastTotalTokens = state.totalTokens;
3327
+ }
3133
3328
  }
3134
3329
 
3135
3330
  if (cancelled) {