@offbynan/pi-cursor-provider 0.5.2 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -2
- package/h2-bridge.mjs +11 -7
- package/package.json +1 -1
- package/proxy.ts +399 -204
package/README.md
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
- **Image support** — base64 `image_url` content parts forwarded to Cursor end-to-end; the upstream silently drops them
|
|
6
6
|
- **Compaction support** — old turns archived as inline text to cut `getBlobArgs` round-trips from O(history) to O(tail); bridge termination errors surface as real failures instead of silent empty responses; checkpoint cleared after compaction to keep both sides in sync
|
|
7
|
-
- **Reliability** — bridge timeouts hardened and configurable; SSE keepalive prevents pi from timing out during blob-fetching; conversation state and checkpoints survive transient failures and client disconnects
|
|
7
|
+
- **Reliability** — transparent retry for transient Cursor protocol errors (internal / unavailable / deadline_exceeded); HTTP/2 PING keepalive detects dead connections; stall timer kills stuck bridges; bridge timeouts hardened and configurable; SSE keepalive prevents pi from timing out during blob-fetching; conversation state and checkpoints survive transient failures and client disconnects
|
|
8
8
|
- **Model support** — per-model context window inference (vs. hardcoded 200 k); runtime cap scaling when Cursor enforces a tighter window; detailed cost table for all current families; effort-suffix variants deduplicated so pi's reasoning-level setting drives the suffix automatically
|
|
9
9
|
- **Thinking-tag filtering** — inline `<think>` / `<reasoning>` tags stripped from the response and routed to `reasoning_content`
|
|
10
10
|
- **Fixes & observability** — `pi -p` exit hang fixed; dead TTL eviction code removed; opt-in JSONL debug logging with a bundled timeline viewer
|
|
@@ -59,6 +59,10 @@ pi → openai-completions → localhost:PORT/v1/chat/completions
|
|
|
59
59
|
| `PI_CURSOR_PROVIDER_DEBUG_FILE` | auto in tmpdir | Override the debug log file path |
|
|
60
60
|
| `PI_CURSOR_BRIDGE_INITIAL_TIMEOUT_MS` | `120000` | Kill bridge if no HTTP/2 activity within this many ms of spawn |
|
|
61
61
|
| `PI_CURSOR_BRIDGE_ACTIVITY_TIMEOUT_MS` | `300000` | Kill bridge if no HTTP/2 activity for this many ms after the first frame |
|
|
62
|
+
| `PI_CURSOR_BRIDGE_PING_INTERVAL_MS` | `15000` | HTTP/2 PING interval to detect dead connections |
|
|
63
|
+
| `PI_CURSOR_BRIDGE_PING_TIMEOUT_MS` | `10000` | Timeout for each HTTP/2 PING before declaring the connection dead |
|
|
64
|
+
| `PI_CURSOR_BRIDGE_STALL_TIMEOUT_MS` | `120000` | Kill bridge if no data received from Cursor within this many ms |
|
|
65
|
+
| `PI_CURSOR_MAX_BRIDGE_RETRIES` | `2` | Max transparent retries on transient Cursor errors or bridge crashes |
|
|
62
66
|
| `PI_CURSOR_TURN_ARCHIVE_THRESHOLD` | `20` | Keep this many recent turns as raw blobs; older turns are archived as inline text |
|
|
63
67
|
| `PI_CURSOR_RAW_MODELS` | off | Set to disable model deduplication and see all raw Cursor model IDs |
|
|
64
68
|
|
|
@@ -150,6 +154,24 @@ The upstream has no observability. This fork adds opt-in JSONL event logging (se
|
|
|
150
154
|
npm run debug:timeline -- --latest
|
|
151
155
|
```
|
|
152
156
|
|
|
157
|
+
### Transparent retry for transient errors
|
|
158
|
+
|
|
159
|
+
When Cursor returns a retryable Connect-level error (`internal`, `unavailable`, `deadline_exceeded`) or the bridge process crashes mid-request, the proxy now automatically retries on a fresh HTTP/2 bridge — up to `PI_CURSOR_MAX_BRIDGE_RETRIES` times (default 2). The SSE response to pi stays open; the client sees at most a brief pause.
|
|
160
|
+
|
|
161
|
+
Retry is only attempted when no content has been streamed yet (so partial responses are never replayed). On retry the proxy rebuilds the Cursor request using the pre-turn checkpoint and replays cleanly.
|
|
162
|
+
|
|
163
|
+
Previously these transient errors were surfaced as `finish_reason: "error"`, requiring the user to manually continue each time.
|
|
164
|
+
|
|
165
|
+
### HTTP/2 PING keepalive and stall detection
|
|
166
|
+
|
|
167
|
+
The bridge now configures HTTP/2-level PINGs (`PI_CURSOR_BRIDGE_PING_INTERVAL_MS` / `PI_CURSOR_BRIDGE_PING_TIMEOUT_MS`) so dead TCP connections (NAT timeout, load-balancer cycling) are detected within seconds rather than waiting for the 5-minute activity timeout.
|
|
168
|
+
|
|
169
|
+
Additionally, a stall timer (`PI_CURSOR_BRIDGE_STALL_TIMEOUT_MS`, default 120 s) kills the bridge if no data arrives from Cursor — catching cases where the HTTP/2 connection is technically alive but the server is stuck processing a stale checkpoint.
|
|
170
|
+
|
|
171
|
+
### Usage reporting on tool-call continuations
|
|
172
|
+
|
|
173
|
+
When the proxy pauses mid-turn for a tool call and responds with pending tool calls (the partial-wait path), it now reports meaningful `usage` token counts instead of zeros. The stored `lastTotalTokens` from the previous stream segment is scaled proportionally if Cursor is enforcing a tighter context window than the model's nominal size. This lets pi track cumulative token usage accurately across multi-step tool-call turns.
|
|
174
|
+
|
|
153
175
|
### Bridge timeout hardening
|
|
154
176
|
|
|
155
177
|
The upstream `h2-bridge.mjs` used a 30-second initial connection timeout and a 120-second activity timeout. Large conversations require Cursor to deserialise a big checkpoint and complete many `getBlobArgs` round-trips before it starts streaming tokens, which regularly exceeded these limits and caused compaction to fail with a `terminated` error.
|
|
@@ -276,7 +298,7 @@ Session state is cleared on pi lifecycle events — session switch, fork, `/tree
|
|
|
276
298
|
|
|
277
299
|
### Error resilience
|
|
278
300
|
|
|
279
|
-
|
|
301
|
+
Transient Cursor errors (`internal`, `unavailable`, `deadline_exceeded`) and bridge crashes are retried automatically — up to `PI_CURSOR_MAX_BRIDGE_RETRIES` times — without dropping the SSE connection to pi. The last good checkpoint survives all error types and is used on retry. If Cursor sends a checkpoint before a client disconnect, that checkpoint is also preserved.
|
|
280
302
|
|
|
281
303
|
## Requirements
|
|
282
304
|
|
package/h2-bridge.mjs
CHANGED
|
@@ -87,15 +87,19 @@ if (!configBuf) process.exit(1);
|
|
|
87
87
|
const config = JSON.parse(configBuf.toString("utf8"));
|
|
88
88
|
const { accessToken, url, path: rpcPath, unary } = config;
|
|
89
89
|
|
|
90
|
-
const client = http2.connect(url || "https://api2.cursor.sh");
|
|
91
|
-
|
|
92
|
-
// Guard against initial connection failure. Reset on any h2 activity
|
|
93
|
-
// so long-running agent conversations (with tool call round-trips) survive.
|
|
94
|
-
// Initial timeout is generous because large conversations require Cursor to
|
|
95
|
-
// deserialize a big checkpoint + run many getBlobArgs round-trips before it
|
|
96
|
-
// starts streaming tokens — 30 s was too short and caused compaction failures.
|
|
97
90
|
const INITIAL_TIMEOUT_MS = parseInt(process.env.PI_CURSOR_BRIDGE_INITIAL_TIMEOUT_MS ?? "") || 120_000;
|
|
98
91
|
const ACTIVITY_TIMEOUT_MS = parseInt(process.env.PI_CURSOR_BRIDGE_ACTIVITY_TIMEOUT_MS ?? "") || 300_000;
|
|
92
|
+
const H2_PING_INTERVAL_MS = parseInt(process.env.PI_CURSOR_BRIDGE_PING_INTERVAL_MS ?? "") || 15_000;
|
|
93
|
+
const H2_PING_TIMEOUT_MS = parseInt(process.env.PI_CURSOR_BRIDGE_PING_TIMEOUT_MS ?? "") || 10_000;
|
|
94
|
+
|
|
95
|
+
const client = http2.connect(url || "https://api2.cursor.sh", {
|
|
96
|
+
// Detect dead TCP connections at the HTTP/2 level — without this, a silently
|
|
97
|
+
// dropped connection (NAT timeout, LB cycling) can leave the bridge waiting
|
|
98
|
+
// for up to ACTIVITY_TIMEOUT_MS (5 min) with no indication of failure.
|
|
99
|
+
pingInterval: H2_PING_INTERVAL_MS,
|
|
100
|
+
pingTimeout: H2_PING_TIMEOUT_MS,
|
|
101
|
+
});
|
|
102
|
+
|
|
99
103
|
let timeout = setTimeout(killBridge, INITIAL_TIMEOUT_MS);
|
|
100
104
|
|
|
101
105
|
function resetTimeout() {
|
package/package.json
CHANGED
package/proxy.ts
CHANGED
|
@@ -179,6 +179,7 @@ interface ActiveBridge {
|
|
|
179
179
|
mcpTools: McpToolDefinition[];
|
|
180
180
|
pendingExecs: PendingExec[];
|
|
181
181
|
currentTurn: ParsedTurn;
|
|
182
|
+
lastTotalTokens: number;
|
|
182
183
|
}
|
|
183
184
|
|
|
184
185
|
export interface StoredConversation {
|
|
@@ -191,6 +192,15 @@ export interface StoredConversation {
|
|
|
191
192
|
* our static inferContextWindow() estimate when Cursor enforces a tighter cap.
|
|
192
193
|
*/
|
|
193
194
|
effectiveContextWindow?: number;
|
|
195
|
+
/**
|
|
196
|
+
* Last known usedTokens from Cursor's ConversationTokenDetails. Persisted
|
|
197
|
+
* so that tool-call continuations (which create a fresh StreamState) can
|
|
198
|
+
* report meaningful usage even if the checkpoint hasn't arrived yet in the
|
|
199
|
+
* new stream segment.
|
|
200
|
+
*/
|
|
201
|
+
lastTotalTokens?: number;
|
|
202
|
+
/** Cached for transparent retry when a bridge dies mid-request. */
|
|
203
|
+
systemPrompt?: string;
|
|
194
204
|
}
|
|
195
205
|
|
|
196
206
|
interface StreamState {
|
|
@@ -948,6 +958,7 @@ async function handleChatCompletion(
|
|
|
948
958
|
res,
|
|
949
959
|
body.stream !== false,
|
|
950
960
|
requestId,
|
|
961
|
+
accessToken,
|
|
951
962
|
);
|
|
952
963
|
return;
|
|
953
964
|
}
|
|
@@ -970,6 +981,8 @@ async function handleChatCompletion(
|
|
|
970
981
|
conversationStates.set(convKey, stored);
|
|
971
982
|
}
|
|
972
983
|
|
|
984
|
+
stored.systemPrompt = systemPrompt;
|
|
985
|
+
|
|
973
986
|
const mcpTools = buildMcpToolDefinitions(tools);
|
|
974
987
|
const effectiveUserText =
|
|
975
988
|
userText ||
|
|
@@ -2260,7 +2273,14 @@ function mapConnectErrorCode(code: string, message: string): string {
|
|
|
2260
2273
|
}
|
|
2261
2274
|
}
|
|
2262
2275
|
|
|
2263
|
-
|
|
2276
|
+
interface ConnectEndStreamError {
|
|
2277
|
+
message: string;
|
|
2278
|
+
retryable: boolean;
|
|
2279
|
+
}
|
|
2280
|
+
|
|
2281
|
+
const RETRYABLE_CONNECT_CODES = new Set(["internal", "unavailable", "deadline_exceeded"]);
|
|
2282
|
+
|
|
2283
|
+
function parseConnectEndStream(data: Uint8Array): ConnectEndStreamError | null {
|
|
2264
2284
|
if (data.length === 0) return null;
|
|
2265
2285
|
try {
|
|
2266
2286
|
const payload = JSON.parse(new TextDecoder().decode(data));
|
|
@@ -2268,7 +2288,10 @@ function parseConnectEndStream(data: Uint8Array): Error | null {
|
|
|
2268
2288
|
if (error) {
|
|
2269
2289
|
const code = String(error.code ?? "unknown");
|
|
2270
2290
|
const rawMessage = String(error.message ?? "Unknown error");
|
|
2271
|
-
return
|
|
2291
|
+
return {
|
|
2292
|
+
message: mapConnectErrorCode(code, rawMessage),
|
|
2293
|
+
retryable: RETRYABLE_CONNECT_CODES.has(code),
|
|
2294
|
+
};
|
|
2272
2295
|
}
|
|
2273
2296
|
return null;
|
|
2274
2297
|
} catch {
|
|
@@ -2308,11 +2331,29 @@ function computeUsage(state: StreamState) {
|
|
|
2308
2331
|
return { prompt_tokens, completion_tokens, total_tokens };
|
|
2309
2332
|
}
|
|
2310
2333
|
|
|
2334
|
+
function computeUsageFromStored(
|
|
2335
|
+
lastTotalTokens: number,
|
|
2336
|
+
convKey: string,
|
|
2337
|
+
modelId: string,
|
|
2338
|
+
): { prompt_tokens: number; completion_tokens: number; total_tokens: number } | undefined {
|
|
2339
|
+
const totalTokens = lastTotalTokens || conversationStates.get(convKey)?.lastTotalTokens || 0;
|
|
2340
|
+
if (totalTokens === 0) return undefined;
|
|
2341
|
+
const stored = conversationStates.get(convKey);
|
|
2342
|
+
const cursorWindow = stored?.effectiveContextWindow ?? 0;
|
|
2343
|
+
const piWindow = inferContextWindow(modelId);
|
|
2344
|
+
let total_tokens = totalTokens;
|
|
2345
|
+
if (cursorWindow > 0 && piWindow > cursorWindow) {
|
|
2346
|
+
total_tokens = Math.round(totalTokens * piWindow / cursorWindow);
|
|
2347
|
+
}
|
|
2348
|
+
return { prompt_tokens: total_tokens, completion_tokens: 0, total_tokens };
|
|
2349
|
+
}
|
|
2350
|
+
|
|
2311
2351
|
function respondWithPendingToolCalls(
|
|
2312
2352
|
modelId: string,
|
|
2313
2353
|
pendingExecs: PendingExec[],
|
|
2314
2354
|
stream: boolean,
|
|
2315
2355
|
res: ServerResponse,
|
|
2356
|
+
usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number },
|
|
2316
2357
|
): void {
|
|
2317
2358
|
const completionId = `chatcmpl-${crypto.randomUUID().replace(/-/g, "").slice(0, 28)}`;
|
|
2318
2359
|
const created = Math.floor(Date.now() / 1000);
|
|
@@ -2346,6 +2387,18 @@ function respondWithPendingToolCalls(
|
|
|
2346
2387
|
})}\n\n`,
|
|
2347
2388
|
);
|
|
2348
2389
|
}
|
|
2390
|
+
if (usage) {
|
|
2391
|
+
res.write(
|
|
2392
|
+
`data: ${JSON.stringify({
|
|
2393
|
+
id: completionId,
|
|
2394
|
+
object: "chat.completion.chunk",
|
|
2395
|
+
created,
|
|
2396
|
+
model: modelId,
|
|
2397
|
+
choices: [],
|
|
2398
|
+
usage,
|
|
2399
|
+
})}\n\n`,
|
|
2400
|
+
);
|
|
2401
|
+
}
|
|
2349
2402
|
res.write(
|
|
2350
2403
|
`data: ${JSON.stringify({
|
|
2351
2404
|
id: completionId,
|
|
@@ -2374,7 +2427,7 @@ function respondWithPendingToolCalls(
|
|
|
2374
2427
|
finish_reason: "tool_calls",
|
|
2375
2428
|
},
|
|
2376
2429
|
],
|
|
2377
|
-
usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
|
|
2430
|
+
usage: usage ?? { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
|
|
2378
2431
|
}),
|
|
2379
2432
|
);
|
|
2380
2433
|
}
|
|
@@ -2443,6 +2496,7 @@ function handleStreamingResponse(
|
|
|
2443
2496
|
res,
|
|
2444
2497
|
requestId,
|
|
2445
2498
|
staleBridgeKilled,
|
|
2499
|
+
accessToken,
|
|
2446
2500
|
);
|
|
2447
2501
|
}
|
|
2448
2502
|
|
|
@@ -2475,9 +2529,12 @@ function cleanupBridge(
|
|
|
2475
2529
|
activeBridges.delete(bridgeKey);
|
|
2476
2530
|
}
|
|
2477
2531
|
|
|
2532
|
+
const MAX_BRIDGE_RETRIES =
|
|
2533
|
+
parseInt(process.env.PI_CURSOR_MAX_BRIDGE_RETRIES ?? "") || 2;
|
|
2534
|
+
|
|
2478
2535
|
function writeSSEStream(
|
|
2479
|
-
|
|
2480
|
-
|
|
2536
|
+
initialBridge: BridgeHandle,
|
|
2537
|
+
initialHeartbeatTimer: ReturnType<typeof setInterval>,
|
|
2481
2538
|
blobStore: Map<string, Uint8Array>,
|
|
2482
2539
|
mcpTools: McpToolDefinition[],
|
|
2483
2540
|
modelId: string,
|
|
@@ -2489,6 +2546,7 @@ function writeSSEStream(
|
|
|
2489
2546
|
res: ServerResponse,
|
|
2490
2547
|
requestId?: string,
|
|
2491
2548
|
staleBridgeKilled = false,
|
|
2549
|
+
accessToken?: string,
|
|
2492
2550
|
): void {
|
|
2493
2551
|
debugLog("stream.writer_start", {
|
|
2494
2552
|
requestId,
|
|
@@ -2501,6 +2559,19 @@ function writeSSEStream(
|
|
|
2501
2559
|
const completionId = `chatcmpl-${crypto.randomUUID().replace(/-/g, "").slice(0, 28)}`;
|
|
2502
2560
|
const created = Math.floor(Date.now() / 1000);
|
|
2503
2561
|
|
|
2562
|
+
// Mutable bridge references — updated on retry.
|
|
2563
|
+
let activeBridge = initialBridge;
|
|
2564
|
+
let activeHeartbeatTimer = initialHeartbeatTimer;
|
|
2565
|
+
let retryCount = 0;
|
|
2566
|
+
let retryableConnectError = false;
|
|
2567
|
+
|
|
2568
|
+
// Snapshot the checkpoint before this turn so retries replay cleanly
|
|
2569
|
+
// without risking a mid-turn checkpoint that includes partial progress.
|
|
2570
|
+
const preTurnCheckpoint = (() => {
|
|
2571
|
+
const s = conversationStates.get(convKey);
|
|
2572
|
+
return s?.checkpoint ? new Uint8Array(s.checkpoint) : null;
|
|
2573
|
+
})();
|
|
2574
|
+
|
|
2504
2575
|
res.writeHead(200, {
|
|
2505
2576
|
"Content-Type": "text/event-stream",
|
|
2506
2577
|
"Cache-Control": "no-cache",
|
|
@@ -2532,6 +2603,7 @@ function writeSSEStream(
|
|
|
2532
2603
|
if (closed) return;
|
|
2533
2604
|
closed = true;
|
|
2534
2605
|
clearInterval(keepAliveTimer);
|
|
2606
|
+
if (stallTimer) clearTimeout(stallTimer);
|
|
2535
2607
|
res.end();
|
|
2536
2608
|
};
|
|
2537
2609
|
|
|
@@ -2564,11 +2636,10 @@ function writeSSEStream(
|
|
|
2564
2636
|
toolCallIndex: 0,
|
|
2565
2637
|
pendingExecs: [],
|
|
2566
2638
|
outputTokens: 0,
|
|
2567
|
-
totalTokens: 0,
|
|
2639
|
+
totalTokens: storedForState?.lastTotalTokens ?? 0,
|
|
2568
2640
|
cursorContextWindow: storedForState?.effectiveContextWindow ?? 0,
|
|
2569
2641
|
inferredContextWindow: inferContextWindow(modelId),
|
|
2570
2642
|
};
|
|
2571
|
-
const tagFilter = createThinkingTagFilter();
|
|
2572
2643
|
let mcpExecReceived = false;
|
|
2573
2644
|
let cancelled = false;
|
|
2574
2645
|
let latestCheckpoint: Uint8Array | null = null;
|
|
@@ -2580,227 +2651,341 @@ function writeSSEStream(
|
|
|
2580
2651
|
}, 15_000);
|
|
2581
2652
|
keepAliveTimer.unref();
|
|
2582
2653
|
|
|
2654
|
+
// Stall detector: kill the bridge if no data arrives from Cursor for too
|
|
2655
|
+
// long. This catches cases where the H2 connection is technically alive
|
|
2656
|
+
// but Cursor's server is stuck processing a stale conversation checkpoint.
|
|
2657
|
+
// Reset on every incoming Connect frame.
|
|
2658
|
+
const STALL_TIMEOUT_MS = parseInt(process.env.PI_CURSOR_BRIDGE_STALL_TIMEOUT_MS ?? "") || 120_000;
|
|
2659
|
+
let stallTimer: ReturnType<typeof setTimeout> | undefined;
|
|
2660
|
+
const resetStallTimer = () => {
|
|
2661
|
+
if (stallTimer) clearTimeout(stallTimer);
|
|
2662
|
+
stallTimer = setTimeout(() => {
|
|
2663
|
+
if (cancelled || closed) return;
|
|
2664
|
+
debugLog("stream.stall_timeout", { requestId, bridgeKey, convKey, modelId });
|
|
2665
|
+
console.error(
|
|
2666
|
+
`[cursor-provider] Bridge stalled for ${STALL_TIMEOUT_MS / 1000}s \u2014 killing (${modelId})`,
|
|
2667
|
+
);
|
|
2668
|
+
cleanupBridge(activeBridge, activeHeartbeatTimer, bridgeKey);
|
|
2669
|
+
}, STALL_TIMEOUT_MS);
|
|
2670
|
+
stallTimer.unref?.();
|
|
2671
|
+
};
|
|
2672
|
+
|
|
2583
2673
|
// Detect client disconnect (e.g. user pressed Escape in pi)
|
|
2584
2674
|
const onClientClose = () => {
|
|
2585
2675
|
if (cancelled || closed) return;
|
|
2586
2676
|
debugLog("stream.client_close", { requestId, bridgeKey, convKey });
|
|
2587
2677
|
cancelled = true;
|
|
2588
|
-
cleanupBridge(
|
|
2678
|
+
cleanupBridge(activeBridge, activeHeartbeatTimer, bridgeKey);
|
|
2589
2679
|
closeResponse();
|
|
2590
2680
|
};
|
|
2591
2681
|
req.on("close", onClientClose);
|
|
2592
2682
|
res.on("close", onClientClose);
|
|
2593
2683
|
|
|
2594
|
-
|
|
2595
|
-
|
|
2596
|
-
|
|
2597
|
-
|
|
2598
|
-
|
|
2599
|
-
|
|
2600
|
-
|
|
2601
|
-
|
|
2602
|
-
|
|
2603
|
-
|
|
2604
|
-
|
|
2605
|
-
|
|
2606
|
-
|
|
2607
|
-
|
|
2608
|
-
|
|
2609
|
-
|
|
2610
|
-
|
|
2611
|
-
|
|
2612
|
-
|
|
2613
|
-
|
|
2614
|
-
|
|
2615
|
-
|
|
2616
|
-
|
|
2684
|
+
// Wire data/close handlers onto the current activeBridge. Called once on
|
|
2685
|
+
// initial setup and again on each transparent retry.
|
|
2686
|
+
function attachToBridge(): void {
|
|
2687
|
+
// Each attempt gets a fresh thinking-tag filter so retried output doesn't
|
|
2688
|
+
// inherit stale parser state from the dead bridge.
|
|
2689
|
+
const tagFilter = createThinkingTagFilter();
|
|
2690
|
+
let contentSent = false;
|
|
2691
|
+
mcpExecReceived = false;
|
|
2692
|
+
resetStallTimer();
|
|
2693
|
+
|
|
2694
|
+
const processChunk = createConnectFrameParser(
|
|
2695
|
+
(messageBytes) => {
|
|
2696
|
+
resetStallTimer();
|
|
2697
|
+
try {
|
|
2698
|
+
const serverMessage = fromBinary(
|
|
2699
|
+
AgentServerMessageSchema,
|
|
2700
|
+
messageBytes,
|
|
2701
|
+
);
|
|
2702
|
+
processServerMessage(
|
|
2703
|
+
serverMessage,
|
|
2704
|
+
blobStore,
|
|
2705
|
+
mcpTools,
|
|
2706
|
+
(data) => activeBridge.write(data),
|
|
2707
|
+
state,
|
|
2708
|
+
(text, isThinking) => {
|
|
2709
|
+
if (isThinking) {
|
|
2710
|
+
contentSent = true;
|
|
2711
|
+
sendSSE(makeChunk({ reasoning_content: text }));
|
|
2712
|
+
} else {
|
|
2713
|
+
const { content, reasoning } = tagFilter.process(text);
|
|
2714
|
+
if (reasoning) {
|
|
2715
|
+
contentSent = true;
|
|
2716
|
+
sendSSE(makeChunk({ reasoning_content: reasoning }));
|
|
2717
|
+
}
|
|
2718
|
+
if (content) {
|
|
2719
|
+
contentSent = true;
|
|
2720
|
+
appendAssistantTextToTurn(currentTurn, content);
|
|
2721
|
+
sendSSE(makeChunk({ content }));
|
|
2722
|
+
}
|
|
2723
|
+
}
|
|
2724
|
+
},
|
|
2725
|
+
(exec) => {
|
|
2726
|
+
state.pendingExecs.push(exec);
|
|
2727
|
+
mcpExecReceived = true;
|
|
2728
|
+
|
|
2729
|
+
const flushed = tagFilter.flush();
|
|
2730
|
+
if (flushed.reasoning)
|
|
2731
|
+
sendSSE(makeChunk({ reasoning_content: flushed.reasoning }));
|
|
2732
|
+
if (flushed.content) {
|
|
2733
|
+
appendAssistantTextToTurn(currentTurn, flushed.content);
|
|
2734
|
+
sendSSE(makeChunk({ content: flushed.content }));
|
|
2617
2735
|
}
|
|
2618
|
-
}
|
|
2619
|
-
},
|
|
2620
|
-
(exec) => {
|
|
2621
|
-
state.pendingExecs.push(exec);
|
|
2622
|
-
mcpExecReceived = true;
|
|
2623
|
-
|
|
2624
|
-
const flushed = tagFilter.flush();
|
|
2625
|
-
if (flushed.reasoning)
|
|
2626
|
-
sendSSE(makeChunk({ reasoning_content: flushed.reasoning }));
|
|
2627
|
-
if (flushed.content) {
|
|
2628
|
-
appendAssistantTextToTurn(currentTurn, flushed.content);
|
|
2629
|
-
sendSSE(makeChunk({ content: flushed.content }));
|
|
2630
|
-
}
|
|
2631
|
-
|
|
2632
|
-
currentTurn.steps.push({
|
|
2633
|
-
kind: "toolCall",
|
|
2634
|
-
toolCallId: exec.toolCallId,
|
|
2635
|
-
toolName: exec.toolName,
|
|
2636
|
-
arguments: parseToolCallArguments(exec.decodedArgs),
|
|
2637
|
-
});
|
|
2638
2736
|
|
|
2639
|
-
|
|
2640
|
-
|
|
2641
|
-
|
|
2642
|
-
|
|
2643
|
-
|
|
2644
|
-
|
|
2645
|
-
|
|
2646
|
-
|
|
2647
|
-
|
|
2648
|
-
|
|
2649
|
-
|
|
2737
|
+
currentTurn.steps.push({
|
|
2738
|
+
kind: "toolCall",
|
|
2739
|
+
toolCallId: exec.toolCallId,
|
|
2740
|
+
toolName: exec.toolName,
|
|
2741
|
+
arguments: parseToolCallArguments(exec.decodedArgs),
|
|
2742
|
+
});
|
|
2743
|
+
|
|
2744
|
+
const toolCallIndex = state.toolCallIndex++;
|
|
2745
|
+
sendSSE(
|
|
2746
|
+
makeChunk({
|
|
2747
|
+
tool_calls: [
|
|
2748
|
+
{
|
|
2749
|
+
index: toolCallIndex,
|
|
2750
|
+
id: exec.toolCallId,
|
|
2751
|
+
type: "function",
|
|
2752
|
+
function: {
|
|
2753
|
+
name: exec.toolName,
|
|
2754
|
+
arguments: exec.decodedArgs,
|
|
2755
|
+
},
|
|
2650
2756
|
},
|
|
2651
|
-
|
|
2652
|
-
|
|
2653
|
-
|
|
2757
|
+
],
|
|
2758
|
+
}),
|
|
2759
|
+
);
|
|
2760
|
+
|
|
2761
|
+
activeBridges.set(bridgeKey, {
|
|
2762
|
+
bridge: activeBridge,
|
|
2763
|
+
heartbeatTimer: activeHeartbeatTimer,
|
|
2764
|
+
blobStore,
|
|
2765
|
+
mcpTools,
|
|
2766
|
+
pendingExecs: state.pendingExecs,
|
|
2767
|
+
currentTurn,
|
|
2768
|
+
lastTotalTokens: state.totalTokens,
|
|
2769
|
+
});
|
|
2770
|
+
debugLog("stream.tool_call_pause", {
|
|
2771
|
+
requestId,
|
|
2772
|
+
bridgeKey,
|
|
2773
|
+
exec,
|
|
2774
|
+
pendingExecs: state.pendingExecs,
|
|
2775
|
+
currentTurn,
|
|
2776
|
+
});
|
|
2777
|
+
|
|
2778
|
+
sendSSE(makeUsageChunk());
|
|
2779
|
+
sendSSE(makeChunk({}, "tool_calls"));
|
|
2780
|
+
sendDone();
|
|
2781
|
+
closeResponse();
|
|
2782
|
+
},
|
|
2783
|
+
(checkpointBytes) => {
|
|
2784
|
+
latestCheckpoint = checkpointBytes;
|
|
2785
|
+
const stored = conversationStates.get(convKey);
|
|
2786
|
+
if (stored) {
|
|
2787
|
+
stored.checkpoint = checkpointBytes;
|
|
2788
|
+
for (const [k, v] of blobStore) stored.blobStore.set(k, v);
|
|
2789
|
+
if (state.cursorContextWindow > 0) {
|
|
2790
|
+
stored.effectiveContextWindow = state.cursorContextWindow;
|
|
2791
|
+
}
|
|
2792
|
+
if (state.totalTokens > 0) {
|
|
2793
|
+
stored.lastTotalTokens = state.totalTokens;
|
|
2794
|
+
}
|
|
2795
|
+
}
|
|
2796
|
+
debugLog("stream.checkpoint_buffered", {
|
|
2797
|
+
requestId,
|
|
2798
|
+
convKey,
|
|
2799
|
+
checkpointBytes,
|
|
2800
|
+
});
|
|
2801
|
+
},
|
|
2802
|
+
);
|
|
2803
|
+
} catch (err) {
|
|
2804
|
+
console.error(
|
|
2805
|
+
"[cursor-provider] Stream message processing error:",
|
|
2806
|
+
err instanceof Error ? err.message : err,
|
|
2807
|
+
);
|
|
2808
|
+
}
|
|
2809
|
+
},
|
|
2810
|
+
(endStreamBytes) => {
|
|
2811
|
+
resetStallTimer();
|
|
2812
|
+
const endError = parseConnectEndStream(endStreamBytes);
|
|
2813
|
+
clearInterval(activeHeartbeatTimer);
|
|
2814
|
+
if (stallTimer) clearTimeout(stallTimer);
|
|
2815
|
+
if (endError) {
|
|
2816
|
+
if (endError.retryable && !contentSent && !closed && accessToken && retryCount < MAX_BRIDGE_RETRIES) {
|
|
2817
|
+
debugLog("stream.retryable_connect_error", {
|
|
2818
|
+
requestId, bridgeKey, convKey, modelId,
|
|
2819
|
+
message: endError.message, attempt: retryCount + 1,
|
|
2820
|
+
});
|
|
2821
|
+
console.error(
|
|
2822
|
+
`[cursor-provider] Retryable Cursor error (${modelId}): ${endError.message} — will retry (${retryCount + 1}/${MAX_BRIDGE_RETRIES})`,
|
|
2654
2823
|
);
|
|
2824
|
+
retryableConnectError = true;
|
|
2825
|
+
try { activeBridge.proc.kill(); } catch {}
|
|
2826
|
+
return;
|
|
2827
|
+
}
|
|
2828
|
+
console.error(
|
|
2829
|
+
`[cursor-provider] Cursor stream error (${modelId}):`,
|
|
2830
|
+
endError.message,
|
|
2831
|
+
);
|
|
2832
|
+
activeBridge.end();
|
|
2833
|
+
activeBridge.unref();
|
|
2834
|
+
sendSSE(makeChunk({ content: endError.message }, "error"));
|
|
2835
|
+
sendSSE(makeUsageChunk());
|
|
2836
|
+
sendDone();
|
|
2837
|
+
closeResponse();
|
|
2838
|
+
} else {
|
|
2839
|
+
activeBridge.end();
|
|
2840
|
+
activeBridge.unref();
|
|
2841
|
+
const flushed = tagFilter.flush();
|
|
2842
|
+
if (flushed.reasoning)
|
|
2843
|
+
sendSSE(makeChunk({ reasoning_content: flushed.reasoning }));
|
|
2844
|
+
if (flushed.content) {
|
|
2845
|
+
appendAssistantTextToTurn(currentTurn, flushed.content);
|
|
2846
|
+
sendSSE(makeChunk({ content: flushed.content }));
|
|
2847
|
+
}
|
|
2848
|
+
sendSSE(makeChunk({}, "stop"));
|
|
2849
|
+
sendSSE(makeUsageChunk());
|
|
2850
|
+
sendDone();
|
|
2851
|
+
closeResponse();
|
|
2852
|
+
}
|
|
2853
|
+
},
|
|
2854
|
+
);
|
|
2655
2855
|
|
|
2656
|
-
|
|
2657
|
-
bridge,
|
|
2658
|
-
heartbeatTimer,
|
|
2659
|
-
blobStore,
|
|
2660
|
-
mcpTools,
|
|
2661
|
-
pendingExecs: state.pendingExecs,
|
|
2662
|
-
currentTurn,
|
|
2663
|
-
});
|
|
2664
|
-
debugLog("stream.tool_call_pause", {
|
|
2665
|
-
requestId,
|
|
2666
|
-
bridgeKey,
|
|
2667
|
-
exec,
|
|
2668
|
-
pendingExecs: state.pendingExecs,
|
|
2669
|
-
currentTurn,
|
|
2670
|
-
});
|
|
2856
|
+
activeBridge.onData(processChunk);
|
|
2671
2857
|
|
|
2672
|
-
|
|
2673
|
-
|
|
2674
|
-
|
|
2675
|
-
|
|
2676
|
-
|
|
2677
|
-
|
|
2678
|
-
|
|
2679
|
-
|
|
2680
|
-
|
|
2681
|
-
|
|
2682
|
-
|
|
2683
|
-
|
|
2684
|
-
|
|
2685
|
-
|
|
2686
|
-
|
|
2687
|
-
|
|
2688
|
-
|
|
2689
|
-
|
|
2690
|
-
|
|
2691
|
-
|
|
2692
|
-
|
|
2693
|
-
|
|
2694
|
-
|
|
2695
|
-
|
|
2696
|
-
|
|
2697
|
-
)
|
|
2698
|
-
|
|
2699
|
-
},
|
|
2700
|
-
(endStreamBytes) => {
|
|
2701
|
-
const endError = parseConnectEndStream(endStreamBytes);
|
|
2702
|
-
// Always stop heartbeats and unref the bridge regardless of error/success
|
|
2703
|
-
// so the parent process is not kept alive waiting for HTTP/2 END_STREAM.
|
|
2704
|
-
clearInterval(heartbeatTimer);
|
|
2705
|
-
bridge.end();
|
|
2706
|
-
bridge.unref();
|
|
2707
|
-
if (endError) {
|
|
2708
|
-
console.error(
|
|
2709
|
-
`[cursor-provider] Cursor stream error (${modelId}):`,
|
|
2710
|
-
endError.message,
|
|
2711
|
-
);
|
|
2712
|
-
sendSSE(makeChunk({ content: endError.message }, "error"));
|
|
2713
|
-
sendSSE(makeUsageChunk());
|
|
2714
|
-
sendDone();
|
|
2715
|
-
closeResponse();
|
|
2716
|
-
} else {
|
|
2717
|
-
// Cursor's Connect-level response is complete. Send the SSE response
|
|
2718
|
-
// immediately without waiting for HTTP/2 END_STREAM, which Cursor can
|
|
2719
|
-
// delay by several seconds after the Connect end-stream frame.
|
|
2720
|
-
const flushed = tagFilter.flush();
|
|
2721
|
-
if (flushed.reasoning)
|
|
2722
|
-
sendSSE(makeChunk({ reasoning_content: flushed.reasoning }));
|
|
2723
|
-
if (flushed.content) {
|
|
2724
|
-
appendAssistantTextToTurn(currentTurn, flushed.content);
|
|
2725
|
-
sendSSE(makeChunk({ content: flushed.content }));
|
|
2858
|
+
activeBridge.onClose((code) => {
|
|
2859
|
+
debugLog("stream.bridge_close", {
|
|
2860
|
+
requestId,
|
|
2861
|
+
bridgeKey,
|
|
2862
|
+
convKey,
|
|
2863
|
+
code,
|
|
2864
|
+
cancelled,
|
|
2865
|
+
mcpExecReceived,
|
|
2866
|
+
currentTurn,
|
|
2867
|
+
latestCheckpoint,
|
|
2868
|
+
retryCount,
|
|
2869
|
+
});
|
|
2870
|
+
clearInterval(activeHeartbeatTimer);
|
|
2871
|
+
if (stallTimer) clearTimeout(stallTimer);
|
|
2872
|
+
if (sessionBridges.get(bridgeKey) === activeBridge) sessionBridges.delete(bridgeKey);
|
|
2873
|
+
const stored = conversationStates.get(convKey);
|
|
2874
|
+
if (stored) {
|
|
2875
|
+
for (const [k, v] of blobStore) stored.blobStore.set(k, v);
|
|
2876
|
+
if (latestCheckpoint) {
|
|
2877
|
+
stored.checkpoint = latestCheckpoint;
|
|
2878
|
+
debugLog("stream.checkpoint_committed", { requestId, convKey, stored });
|
|
2879
|
+
}
|
|
2880
|
+
if (state.cursorContextWindow > 0) {
|
|
2881
|
+
stored.effectiveContextWindow = state.cursorContextWindow;
|
|
2882
|
+
}
|
|
2883
|
+
if (state.totalTokens > 0) {
|
|
2884
|
+
stored.lastTotalTokens = state.totalTokens;
|
|
2726
2885
|
}
|
|
2727
|
-
sendSSE(makeChunk({}, "stop"));
|
|
2728
|
-
sendSSE(makeUsageChunk());
|
|
2729
|
-
sendDone();
|
|
2730
|
-
closeResponse();
|
|
2731
2886
|
}
|
|
2732
|
-
|
|
2733
|
-
|
|
2887
|
+
if (cancelled) return;
|
|
2888
|
+
|
|
2889
|
+
// ── Transparent retry on bridge failure ──
|
|
2890
|
+
// When the bridge dies mid-request (connection killed by LB, TCP
|
|
2891
|
+
// timeout, etc.) or Cursor returns a retryable protocol error
|
|
2892
|
+
// (internal, unavailable, deadline_exceeded), rebuild the Cursor
|
|
2893
|
+
// request and replay on a fresh bridge. The SSE response stays
|
|
2894
|
+
// open — the client sees at most a brief pause.
|
|
2895
|
+
const shouldRetry = retryableConnectError || code !== 0;
|
|
2896
|
+
if (shouldRetry && !closed && accessToken && retryCount < MAX_BRIDGE_RETRIES) {
|
|
2897
|
+
const cp = preTurnCheckpoint ?? stored?.checkpoint ?? null;
|
|
2898
|
+
// For retryable Connect errors, allow retry even without a
|
|
2899
|
+
// checkpoint (first request in session) — buildCursorRequest
|
|
2900
|
+
// handles checkpoint=null by rebuilding from turns.
|
|
2901
|
+
if (stored && (cp || retryableConnectError)) {
|
|
2902
|
+
retryCount++;
|
|
2903
|
+
const wasConnectError = retryableConnectError;
|
|
2904
|
+
retryableConnectError = false;
|
|
2905
|
+
debugLog("stream.retry", {
|
|
2906
|
+
requestId,
|
|
2907
|
+
bridgeKey,
|
|
2908
|
+
convKey,
|
|
2909
|
+
attempt: retryCount,
|
|
2910
|
+
maxRetries: MAX_BRIDGE_RETRIES,
|
|
2911
|
+
connectError: wasConnectError,
|
|
2912
|
+
});
|
|
2913
|
+
console.error(
|
|
2914
|
+
`[cursor-provider] ${wasConnectError ? "Retryable Cursor error" : `Bridge died (exit ${code})`}, retry ${retryCount}/${MAX_BRIDGE_RETRIES} (${modelId})`,
|
|
2915
|
+
);
|
|
2734
2916
|
|
|
2735
|
-
|
|
2917
|
+
// Reset per-attempt stream state; keep cumulative token counts.
|
|
2918
|
+
state.pendingExecs = [];
|
|
2919
|
+
latestCheckpoint = null;
|
|
2920
|
+
|
|
2921
|
+
const retryPayload = buildCursorRequest(
|
|
2922
|
+
modelId,
|
|
2923
|
+
stored.systemPrompt || "You are a helpful assistant.",
|
|
2924
|
+
currentTurn.userText,
|
|
2925
|
+
completedTurns,
|
|
2926
|
+
stored.conversationId,
|
|
2927
|
+
cp,
|
|
2928
|
+
blobStore,
|
|
2929
|
+
currentTurn.images,
|
|
2930
|
+
);
|
|
2931
|
+
retryPayload.mcpTools = mcpTools;
|
|
2736
2932
|
|
|
2737
|
-
|
|
2738
|
-
|
|
2739
|
-
|
|
2740
|
-
|
|
2741
|
-
|
|
2742
|
-
|
|
2743
|
-
|
|
2744
|
-
|
|
2745
|
-
|
|
2746
|
-
|
|
2747
|
-
|
|
2748
|
-
|
|
2749
|
-
|
|
2750
|
-
req.removeListener("close", onClientClose);
|
|
2751
|
-
res.removeListener("close", onClientClose);
|
|
2752
|
-
const stored = conversationStates.get(convKey);
|
|
2753
|
-
if (stored) {
|
|
2754
|
-
for (const [k, v] of blobStore) stored.blobStore.set(k, v);
|
|
2755
|
-
if (latestCheckpoint) {
|
|
2756
|
-
stored.checkpoint = latestCheckpoint;
|
|
2757
|
-
debugLog("stream.checkpoint_committed", { requestId, convKey, stored });
|
|
2758
|
-
}
|
|
2759
|
-
if (state.cursorContextWindow > 0) {
|
|
2760
|
-
stored.effectiveContextWindow = state.cursorContextWindow;
|
|
2933
|
+
const { bridge: newBridge, heartbeatTimer: newTimer } = startBridge(
|
|
2934
|
+
accessToken,
|
|
2935
|
+
retryPayload.requestBytes,
|
|
2936
|
+
bridgeKey,
|
|
2937
|
+
);
|
|
2938
|
+
activeBridge = newBridge;
|
|
2939
|
+
activeHeartbeatTimer = newTimer;
|
|
2940
|
+
|
|
2941
|
+
// Re-register client-close listener with new bridge refs (the old
|
|
2942
|
+
// listener already uses the mutable activeBridge/activeHeartbeatTimer).
|
|
2943
|
+
attachToBridge();
|
|
2944
|
+
return;
|
|
2945
|
+
}
|
|
2761
2946
|
}
|
|
2762
|
-
|
|
2763
|
-
|
|
2764
|
-
|
|
2765
|
-
|
|
2766
|
-
|
|
2767
|
-
|
|
2768
|
-
|
|
2769
|
-
|
|
2770
|
-
|
|
2771
|
-
|
|
2772
|
-
|
|
2773
|
-
|
|
2774
|
-
|
|
2947
|
+
|
|
2948
|
+
// No retry — remove client-close listeners since this bridge is done.
|
|
2949
|
+
req.removeListener("close", onClientClose);
|
|
2950
|
+
res.removeListener("close", onClientClose);
|
|
2951
|
+
|
|
2952
|
+
if (!mcpExecReceived) {
|
|
2953
|
+
if (code !== 0) {
|
|
2954
|
+
console.error(
|
|
2955
|
+
`[cursor-provider] Bridge exited (code ${code}) before receiving response (${modelId})`,
|
|
2956
|
+
);
|
|
2957
|
+
const failureMsg = classifyBridgeFailure(code, activeBridge.getStderr());
|
|
2958
|
+
sendSSE(makeChunk({ content: failureMsg }, "error"));
|
|
2959
|
+
sendSSE(makeUsageChunk());
|
|
2960
|
+
sendDone();
|
|
2961
|
+
closeResponse();
|
|
2962
|
+
} else {
|
|
2963
|
+
const flushed = tagFilter.flush();
|
|
2964
|
+
if (flushed.reasoning)
|
|
2965
|
+
sendSSE(makeChunk({ reasoning_content: flushed.reasoning }));
|
|
2966
|
+
if (flushed.content) {
|
|
2967
|
+
appendAssistantTextToTurn(currentTurn, flushed.content);
|
|
2968
|
+
sendSSE(makeChunk({ content: flushed.content }));
|
|
2969
|
+
}
|
|
2970
|
+
sendSSE(makeChunk({}, "stop"));
|
|
2971
|
+
sendSSE(makeUsageChunk());
|
|
2972
|
+
sendDone();
|
|
2973
|
+
closeResponse();
|
|
2974
|
+
}
|
|
2975
|
+
} else if (code !== 0) {
|
|
2976
|
+
sendSSE(makeChunk({ content: "Bridge connection lost" }, "error"));
|
|
2775
2977
|
sendSSE(makeUsageChunk());
|
|
2776
2978
|
sendDone();
|
|
2777
2979
|
closeResponse();
|
|
2980
|
+
activeBridges.delete(bridgeKey);
|
|
2778
2981
|
} else {
|
|
2779
|
-
|
|
2780
|
-
if (flushed.reasoning)
|
|
2781
|
-
sendSSE(makeChunk({ reasoning_content: flushed.reasoning }));
|
|
2782
|
-
if (flushed.content) {
|
|
2783
|
-
appendAssistantTextToTurn(currentTurn, flushed.content);
|
|
2784
|
-
sendSSE(makeChunk({ content: flushed.content }));
|
|
2785
|
-
}
|
|
2786
|
-
sendSSE(makeChunk({}, "stop"));
|
|
2787
|
-
sendSSE(makeUsageChunk());
|
|
2788
|
-
sendDone();
|
|
2982
|
+
activeBridges.delete(bridgeKey);
|
|
2789
2983
|
closeResponse();
|
|
2790
2984
|
}
|
|
2791
|
-
}
|
|
2792
|
-
|
|
2793
|
-
|
|
2794
|
-
|
|
2795
|
-
closeResponse();
|
|
2796
|
-
activeBridges.delete(bridgeKey);
|
|
2797
|
-
} else {
|
|
2798
|
-
// Bridge closed cleanly after a tool call pause. The HTTP response was
|
|
2799
|
-
// already ended by the MCP exec handler; just ensure cleanup.
|
|
2800
|
-
activeBridges.delete(bridgeKey);
|
|
2801
|
-
closeResponse();
|
|
2802
|
-
}
|
|
2803
|
-
});
|
|
2985
|
+
});
|
|
2986
|
+
}
|
|
2987
|
+
|
|
2988
|
+
attachToBridge();
|
|
2804
2989
|
}
|
|
2805
2990
|
|
|
2806
2991
|
export function writeSSEStreamForTests(args: {
|
|
@@ -2816,6 +3001,7 @@ export function writeSSEStreamForTests(args: {
|
|
|
2816
3001
|
req: IncomingMessage;
|
|
2817
3002
|
res: ServerResponse;
|
|
2818
3003
|
requestId?: string;
|
|
3004
|
+
accessToken?: string;
|
|
2819
3005
|
}): void {
|
|
2820
3006
|
writeSSEStream(
|
|
2821
3007
|
args.bridge,
|
|
@@ -2830,6 +3016,8 @@ export function writeSSEStreamForTests(args: {
|
|
|
2830
3016
|
args.req,
|
|
2831
3017
|
args.res,
|
|
2832
3018
|
args.requestId,
|
|
3019
|
+
false,
|
|
3020
|
+
args.accessToken,
|
|
2833
3021
|
);
|
|
2834
3022
|
}
|
|
2835
3023
|
|
|
@@ -2846,6 +3034,7 @@ function handleToolResultResume(
|
|
|
2846
3034
|
res: ServerResponse,
|
|
2847
3035
|
stream: boolean,
|
|
2848
3036
|
requestId?: string,
|
|
3037
|
+
accessToken?: string,
|
|
2849
3038
|
): void {
|
|
2850
3039
|
const {
|
|
2851
3040
|
bridge,
|
|
@@ -2886,6 +3075,7 @@ function handleToolResultResume(
|
|
|
2886
3075
|
mcpTools,
|
|
2887
3076
|
pendingExecs,
|
|
2888
3077
|
currentTurn,
|
|
3078
|
+
lastTotalTokens: active.lastTotalTokens,
|
|
2889
3079
|
});
|
|
2890
3080
|
debugLog("tool_resume.partial_wait", {
|
|
2891
3081
|
requestId,
|
|
@@ -2893,7 +3083,7 @@ function handleToolResultResume(
|
|
|
2893
3083
|
unresolvedExecs,
|
|
2894
3084
|
currentTurn,
|
|
2895
3085
|
});
|
|
2896
|
-
respondWithPendingToolCalls(modelId, unresolvedExecs, stream, res);
|
|
3086
|
+
respondWithPendingToolCalls(modelId, unresolvedExecs, stream, res, computeUsageFromStored(active.lastTotalTokens, convKey, modelId));
|
|
2897
3087
|
return;
|
|
2898
3088
|
}
|
|
2899
3089
|
|
|
@@ -2947,6 +3137,8 @@ function handleToolResultResume(
|
|
|
2947
3137
|
req,
|
|
2948
3138
|
res,
|
|
2949
3139
|
requestId,
|
|
3140
|
+
false,
|
|
3141
|
+
accessToken,
|
|
2950
3142
|
);
|
|
2951
3143
|
}
|
|
2952
3144
|
|
|
@@ -2999,13 +3191,13 @@ async function handleNonStreamingResponse(
|
|
|
2999
3191
|
toolCallIndex: 0,
|
|
3000
3192
|
pendingExecs: [],
|
|
3001
3193
|
outputTokens: 0,
|
|
3002
|
-
totalTokens: 0,
|
|
3194
|
+
totalTokens: storedForNonStream?.lastTotalTokens ?? 0,
|
|
3003
3195
|
cursorContextWindow: storedForNonStream?.effectiveContextWindow ?? 0,
|
|
3004
3196
|
inferredContextWindow: inferContextWindow(modelId),
|
|
3005
3197
|
};
|
|
3006
3198
|
const tagFilter = createThinkingTagFilter();
|
|
3007
3199
|
let fullText = "";
|
|
3008
|
-
let nonStreamError:
|
|
3200
|
+
let nonStreamError: ConnectEndStreamError | null = null;
|
|
3009
3201
|
let latestCheckpoint: Uint8Array | null = null;
|
|
3010
3202
|
|
|
3011
3203
|
return new Promise((resolve) => {
|
|
@@ -3130,6 +3322,9 @@ async function handleNonStreamingResponse(
|
|
|
3130
3322
|
if (state.cursorContextWindow > 0) {
|
|
3131
3323
|
stored.effectiveContextWindow = state.cursorContextWindow;
|
|
3132
3324
|
}
|
|
3325
|
+
if (state.totalTokens > 0) {
|
|
3326
|
+
stored.lastTotalTokens = state.totalTokens;
|
|
3327
|
+
}
|
|
3133
3328
|
}
|
|
3134
3329
|
|
|
3135
3330
|
if (cancelled) {
|