@offbynan/pi-cursor-provider 0.3.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +112 -75
- package/h2-bridge.mjs +7 -2
- package/package.json +1 -1
- package/proxy.ts +242 -22
package/README.md
CHANGED
|
@@ -1,26 +1,67 @@
|
|
|
1
1
|
# pi-cursor-provider
|
|
2
2
|
|
|
3
|
-
**This fork improves on the upstream
|
|
3
|
+
**This fork improves on the upstream across six areas:**
|
|
4
4
|
|
|
5
|
-
- **Image support** — base64 `image_url` content parts
|
|
6
|
-
-
|
|
7
|
-
- **
|
|
8
|
-
- **
|
|
9
|
-
- **
|
|
10
|
-
- **
|
|
11
|
-
- **Per-model cost estimation** — detailed price table (input / output / cache) covering all current model families
|
|
12
|
-
- **Model deduplication** — effort-suffix variants (`-low`, `-medium`, `-high`, …) are collapsed into one entry; pi's reasoning-level setting drives the suffix automatically
|
|
13
|
-
- **Thinking-tag filtering** — inline `<think>` / `<reasoning>` tags are stripped from the response and routed to `reasoning_content`
|
|
14
|
-
- **Structured debug logging** — opt-in JSONL event log (`PI_CURSOR_PROVIDER_DEBUG=1`) with a bundled timeline viewer
|
|
5
|
+
- **Image support** — base64 `image_url` content parts forwarded to Cursor end-to-end; the upstream silently drops them
|
|
6
|
+
- **Compaction support** — old turns archived as inline text to cut `getBlobArgs` round-trips from O(history) to O(tail); bridge termination errors surface as real failures instead of silent empty responses; checkpoint cleared after compaction to keep both sides in sync
|
|
7
|
+
- **Reliability** — bridge timeouts hardened and configurable; SSE keepalive prevents pi from timing out during blob-fetching; conversation state and checkpoints survive transient failures and client disconnects
|
|
8
|
+
- **Model support** — per-model context window inference (vs. hardcoded 200 k); runtime cap scaling when Cursor enforces a tighter window; detailed cost table for all current families; effort-suffix variants deduplicated so pi's reasoning-level setting drives the suffix automatically
|
|
9
|
+
- **Thinking-tag filtering** — inline `<think>` / `<reasoning>` tags stripped from the response and routed to `reasoning_content`
|
|
10
|
+
- **Fixes & observability** — `pi -p` exit hang fixed; dead TTL eviction code removed; opt-in JSONL debug logging with a bundled timeline viewer
|
|
15
11
|
|
|
16
|
-
|
|
12
|
+
[Pi](https://github.com/badlogic/pi-mono) extension that provides access to [Cursor](https://cursor.com) models (Claude, GPT, Gemini, Grok, Kimi, Composer) via OAuth and a local OpenAI-compatible proxy.
|
|
17
13
|
|
|
18
14
|
[](https://www.npmjs.com/package/@offbynan/pi-cursor-provider)
|
|
19
15
|
|
|
20
|
-
[Pi](https://github.com/badlogic/pi-mono) extension that provides access to [Cursor](https://cursor.com) models via OAuth authentication and a local OpenAI-compatible proxy.
|
|
21
|
-
|
|
22
16
|
Forked from [ndraiman/pi-cursor-provider](https://github.com/ndraiman/pi-cursor-provider).
|
|
23
17
|
|
|
18
|
+
## Install
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
# Via pi
|
|
22
|
+
pi install npm:@offbynan/pi-cursor-provider
|
|
23
|
+
|
|
24
|
+
# Or manually
|
|
25
|
+
git clone https://github.com/offbynan/pi-cursor-provider ~/.pi/agent/extensions/cursor-provider
|
|
26
|
+
cd ~/.pi/agent/extensions/cursor-provider
|
|
27
|
+
npm install
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Usage
|
|
31
|
+
|
|
32
|
+
```
|
|
33
|
+
/login cursor # authenticate via browser
|
|
34
|
+
/model # select a Cursor model
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## How it works
|
|
38
|
+
|
|
39
|
+
```
|
|
40
|
+
pi → openai-completions → localhost:PORT/v1/chat/completions
|
|
41
|
+
↓
|
|
42
|
+
proxy.ts (HTTP server)
|
|
43
|
+
↓
|
|
44
|
+
h2-bridge.mjs (Node HTTP/2)
|
|
45
|
+
↓
|
|
46
|
+
api2.cursor.sh gRPC
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
1. **PKCE OAuth** — browser-based login to Cursor, no client secret needed
|
|
50
|
+
2. **Model discovery** — queries Cursor's `GetUsableModels` gRPC endpoint
|
|
51
|
+
3. **Local proxy** — translates OpenAI `/v1/chat/completions` to Cursor's protobuf/HTTP2 Connect protocol
|
|
52
|
+
4. **Tool routing** — rejects Cursor's native tools, exposes pi's tools via MCP
|
|
53
|
+
|
|
54
|
+
## Configuration
|
|
55
|
+
|
|
56
|
+
| Env var | Default | Description |
|
|
57
|
+
| ------- | ------- | ----------- |
|
|
58
|
+
| `PI_CURSOR_PROVIDER_DEBUG` | off | Set to any truthy value to enable JSONL debug logging |
|
|
59
|
+
| `PI_CURSOR_PROVIDER_DEBUG_FILE` | auto in tmpdir | Override the debug log file path |
|
|
60
|
+
| `PI_CURSOR_BRIDGE_INITIAL_TIMEOUT_MS` | `120000` | Kill bridge if no HTTP/2 activity within this many ms of spawn |
|
|
61
|
+
| `PI_CURSOR_BRIDGE_ACTIVITY_TIMEOUT_MS` | `300000` | Kill bridge if no HTTP/2 activity for this many ms after the first frame |
|
|
62
|
+
| `PI_CURSOR_TURN_ARCHIVE_THRESHOLD` | `20` | Keep this many recent turns as raw blobs; older turns are archived as inline text |
|
|
63
|
+
| `PI_CURSOR_RAW_MODELS` | off | Set to disable model deduplication and see all raw Cursor model IDs |
|
|
64
|
+
|
|
24
65
|
## Changes vs upstream
|
|
25
66
|
|
|
26
67
|
### Image support
|
|
@@ -109,41 +150,55 @@ The upstream has no observability. This fork adds opt-in JSONL event logging (se
|
|
|
109
150
|
npm run debug:timeline -- --latest
|
|
110
151
|
```
|
|
111
152
|
|
|
112
|
-
|
|
153
|
+
### Bridge timeout hardening
|
|
113
154
|
|
|
114
|
-
|
|
115
|
-
pi → openai-completions → localhost:PORT/v1/chat/completions
|
|
116
|
-
↓
|
|
117
|
-
proxy.ts (HTTP server)
|
|
118
|
-
↓
|
|
119
|
-
h2-bridge.mjs (Node HTTP/2)
|
|
120
|
-
↓
|
|
121
|
-
api2.cursor.sh gRPC
|
|
122
|
-
```
|
|
155
|
+
The upstream `h2-bridge.mjs` used a 30-second initial connection timeout and a 120-second activity timeout. Large conversations require Cursor to deserialise a big checkpoint and complete many `getBlobArgs` round-trips before it starts streaming tokens, which regularly exceeded these limits and caused compaction to fail with a `terminated` error.
|
|
123
156
|
|
|
124
|
-
|
|
125
|
-
2. **Model discovery** — queries Cursor's `GetUsableModels` gRPC endpoint
|
|
126
|
-
3. **Local proxy** — translates OpenAI `/v1/chat/completions` to Cursor's protobuf/HTTP2 Connect protocol
|
|
127
|
-
4. **Tool routing** — rejects Cursor's native tools, exposes pi's tools via MCP
|
|
157
|
+
This fork raises the defaults (120 s initial, 300 s activity) and makes them configurable via `PI_CURSOR_BRIDGE_INITIAL_TIMEOUT_MS` and `PI_CURSOR_BRIDGE_ACTIVITY_TIMEOUT_MS` (see [Configuration](#configuration)).
|
|
128
158
|
|
|
129
|
-
|
|
159
|
+
### Bridge termination error propagation
|
|
130
160
|
|
|
131
|
-
|
|
132
|
-
# Via pi install
|
|
133
|
-
pi install npm:@offbynan/pi-cursor-provider
|
|
161
|
+
In the upstream, if the `h2-bridge` child process exits before producing any response (e.g. due to a timeout), the proxy sends a `finish_reason: "stop"` with empty content on the streaming path, and a silent 200 OK on the non-streaming path. Pi receives what looks like a successful but empty response, then fails compaction with an opaque `terminated` error.
|
|
134
162
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
```
|
|
163
|
+
This fork checks the bridge exit code in both paths:
|
|
164
|
+
- **Streaming path** — if the bridge exits with code ≠ 0 before any response, an SSE error chunk is sent so pi surfaces a real failure.
|
|
165
|
+
- **Non-streaming path** — same condition returns a 502 JSON error.
|
|
166
|
+
- **Both paths** — the conversation state is preserved so the next retry can resume from the last good checkpoint rather than rebuilding from scratch.
|
|
140
167
|
|
|
141
|
-
|
|
168
|
+
### Conversation history archiving
|
|
142
169
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
170
|
+
Cursor's `AgentService/Run` RPC is stateless per request: each turn sends the full conversation state as a checkpoint blob, and the server fetches individual turn blobs via `getBlobArgs` as needed. For a long conversation every request incurs O(history) round-trips; the compaction turn is the worst case because Cursor must read the entire history to generate a summary.
|
|
171
|
+
|
|
172
|
+
This fork folds turns older than a configurable tail into a single `ConversationSummaryArchive` protobuf blob that stores the transcript as **inline text**. The server reads one blob instead of hundreds, cutting round-trips from O(N) to O(tail):
|
|
173
|
+
|
|
174
|
+
| Scenario | `getBlobArgs` before | `getBlobArgs` after |
|
|
175
|
+
| ---------------------- | --------------------- | ------------------- |
|
|
176
|
+
| 100-turn compaction | ~300 | ~61 |
|
|
177
|
+
| 20-turn normal turn | ~60 | ~60 (unchanged) |
|
|
178
|
+
|
|
179
|
+
The tail size is configurable via `PI_CURSOR_TURN_ARCHIVE_THRESHOLD` (default 20, see [Configuration](#configuration)).
|
|
180
|
+
|
|
181
|
+
Archiving is conservative: old turns are only replaced if every required blob is already in the local store. If any blob is missing the turns are left as-is, so no context is silently dropped.
|
|
182
|
+
|
|
183
|
+
### SSE keepalive during blob-fetching
|
|
184
|
+
|
|
185
|
+
Before the first token arrives, the proxy is silent: it sends HTTP 200 headers immediately but emits no SSE events while Cursor fetches conversation blobs. If pi's HTTP client has a request timeout (or a "time since last data" idle timeout), it fires during this window and the request is aborted with `Error: Request timed out.`
|
|
186
|
+
|
|
187
|
+
This fork starts a 15-second keepalive timer alongside the SSE stream. While the response is open and no data has been sent yet, the timer periodically writes an SSE comment (`: ping`) which is invisible to pi's message parser but resets any inactivity timer in the HTTP layer.
|
|
188
|
+
|
|
189
|
+
### Conversation state preserved on transient errors
|
|
190
|
+
|
|
191
|
+
Previously, a bridge timeout (`exit code ≠ 0`) or a Connect-level error from Cursor caused the proxy to call `conversationStates.delete(convKey)`, wiping the stored checkpoint. On the next request pi would rebuild the Cursor conversation from scratch — losing any context accumulated since the last compaction.
|
|
192
|
+
|
|
193
|
+
Neither failure mode actually invalidates the checkpoint. A bridge timeout means Cursor stopped responding to the current request, not that its conversation state is corrupt. A Connect error (e.g. rate limit, transient upstream failure) also leaves the prior checkpoint intact.
|
|
194
|
+
|
|
195
|
+
This fork removes both deletes. The last good checkpoint survives errors, so the next request resumes from where the conversation was rather than starting over.
|
|
196
|
+
|
|
197
|
+
### Checkpoint saved on client disconnect
|
|
198
|
+
|
|
199
|
+
When pi closes the SSE connection (e.g. its own request timeout fires), the proxy previously guarded checkpoint persistence behind `if (!cancelled)`, discarding any checkpoint that Cursor had already sent for that turn. On the next request the proxy used a stale checkpoint, losing the partial turn's context.
|
|
200
|
+
|
|
201
|
+
This fork removes the `!cancelled` guard. If Cursor sent a checkpoint before the disconnect, it is saved and the retry picks it up.
|
|
147
202
|
|
|
148
203
|
## Model Mapping
|
|
149
204
|
|
|
@@ -180,19 +235,17 @@ Models sharing the same `(base, variant)` with **≥2 effort levels** and a sens
|
|
|
180
235
|
The proxy inserts the effort before `-fast`/`-thinking`:
|
|
181
236
|
|
|
182
237
|
```
|
|
183
|
-
pi selects: gpt-5.4-fast + effort: high
|
|
238
|
+
pi selects: gpt-5.4-fast + effort: high → Cursor receives: gpt-5.4-high-fast
|
|
184
239
|
pi selects: gpt-5.4 + effort: medium → Cursor receives: gpt-5.4-medium
|
|
185
|
-
pi selects: composer-2
|
|
240
|
+
pi selects: composer-2 + (no effort) → Cursor receives: composer-2
|
|
186
241
|
```
|
|
187
242
|
|
|
188
|
-
When a group is **collapsed**, the proxy registers one model with `supportsReasoningEffort: true` and an internal effort map (see table above).
|
|
189
|
-
|
|
190
243
|
**Collapsed** when Cursor returns either:
|
|
191
244
|
|
|
192
245
|
- **Multiple** effort suffixes for the same `(base, -fast, -thinking)` group, or
|
|
193
|
-
- **A single** variant whose parsed effort suffix is **non-empty** (for example only `claude-4.5-opus-high` is listed). The suffix is removed from the displayed ID so
|
|
246
|
+
- **A single** variant whose parsed effort suffix is **non-empty** (for example only `claude-4.5-opus-high` is listed). The suffix is removed from the displayed ID so pi's reasoning-effort setting supplies it.
|
|
194
247
|
|
|
195
|
-
**Left as-is**
|
|
248
|
+
**Left as-is** when the group has **one** variant and the parsed effort suffix is **empty** — typically IDs with no effort segment, such as `composer-2`, `gemini-3.1-pro`, or `kimi-k2.5`.
|
|
196
249
|
|
|
197
250
|
### Disabling the mapping
|
|
198
251
|
|
|
@@ -204,42 +257,26 @@ PI_CURSOR_RAW_MODELS=1 pi
|
|
|
204
257
|
|
|
205
258
|
## Session Management
|
|
206
259
|
|
|
207
|
-
The proxy maintains conversation state
|
|
260
|
+
The proxy maintains per-session conversation state to enable multi-turn conversations with tool call continuations and clean lifecycle handling.
|
|
208
261
|
|
|
209
|
-
###
|
|
262
|
+
### State storage
|
|
210
263
|
|
|
211
|
-
- **
|
|
212
|
-
- **
|
|
213
|
-
- **
|
|
214
|
-
- **
|
|
264
|
+
- **Keyed by session ID** — pi injects its session ID into every request via a `before_provider_request` hook; the proxy uses it to key both bridge state and the stored conversation checkpoint.
|
|
265
|
+
- **Checkpoint** — Cursor sends a `conversationCheckpointUpdate` message after each completed turn. The proxy stores the latest checkpoint and reuses it on the next request, so Cursor picks up exactly where it left off without rebuilding the full conversation from scratch.
|
|
266
|
+
- **Blob store** — protobuf blobs referenced by the checkpoint are cached locally and served back to Cursor on demand via `getBlobArgs` / `setBlobArgs`.
|
|
267
|
+
- **In-memory only** — all state lives in process memory. A proxy restart loses checkpoints; the next request rebuilds from pi's message history.
|
|
215
268
|
|
|
216
269
|
### Tool continuations
|
|
217
270
|
|
|
218
|
-
When Cursor
|
|
219
|
-
|
|
220
|
-
### Interruptions
|
|
221
|
-
|
|
222
|
-
If the client disconnects or interrupts a turn mid-stream, the proxy cancels the upstream Cursor run and does **not** commit the pending checkpoint. Checkpoints are only committed after a turn finishes successfully.
|
|
223
|
-
|
|
224
|
-
### Session fork
|
|
225
|
-
|
|
226
|
-
When you navigate back in pi's session tree and branch from an earlier point, the proxy discards the stored checkpoint whenever the completed history no longer matches the stored checkpoint metadata. That includes both:
|
|
227
|
-
|
|
228
|
-
- completed turn count mismatches, and
|
|
229
|
-
- same-depth branch changes detected via completed-history fingerprint mismatch.
|
|
230
|
-
|
|
231
|
-
After discarding a stale checkpoint, the proxy reconstructs proper protobuf conversation turns from the message history pi sends, so Cursor sees the actual conversation structure at the fork point.
|
|
271
|
+
When Cursor requests a tool call, the proxy pauses the SSE stream, stores the live bridge in memory, and returns the tool call to pi. When pi sends the result on the next request, the proxy forwards it into the same in-flight Cursor run so the continuation stays part of the original turn.
|
|
232
272
|
|
|
233
|
-
###
|
|
273
|
+
### Lifecycle cleanup
|
|
234
274
|
|
|
235
|
-
|
|
275
|
+
Session state is cleared on pi lifecycle events — session switch, fork, `/tree`, shutdown, and post-compaction — so stale checkpoints never carry over into a new context.
|
|
236
276
|
|
|
237
|
-
|
|
277
|
+
### Error resilience
|
|
238
278
|
|
|
239
|
-
-
|
|
240
|
-
- tool calls
|
|
241
|
-
- tool results
|
|
242
|
-
- final assistant text after tool results
|
|
279
|
+
A bridge timeout or Connect-level error from Cursor does not wipe the stored checkpoint. The last good checkpoint survives transient failures and is used on the next retry. If Cursor sends a checkpoint before a client disconnect, that checkpoint is also preserved.
|
|
243
280
|
|
|
244
281
|
## Requirements
|
|
245
282
|
|
package/h2-bridge.mjs
CHANGED
|
@@ -91,11 +91,16 @@ const client = http2.connect(url || "https://api2.cursor.sh");
|
|
|
91
91
|
|
|
92
92
|
// Guard against initial connection failure. Reset on any h2 activity
|
|
93
93
|
// so long-running agent conversations (with tool call round-trips) survive.
|
|
94
|
-
|
|
94
|
+
// Initial timeout is generous because large conversations require Cursor to
|
|
95
|
+
// deserialize a big checkpoint + run many getBlobArgs round-trips before it
|
|
96
|
+
// starts streaming tokens — 30 s was too short and caused compaction failures.
|
|
97
|
+
const INITIAL_TIMEOUT_MS = parseInt(process.env.PI_CURSOR_BRIDGE_INITIAL_TIMEOUT_MS ?? "") || 120_000;
|
|
98
|
+
const ACTIVITY_TIMEOUT_MS = parseInt(process.env.PI_CURSOR_BRIDGE_ACTIVITY_TIMEOUT_MS ?? "") || 300_000;
|
|
99
|
+
let timeout = setTimeout(killBridge, INITIAL_TIMEOUT_MS);
|
|
95
100
|
|
|
96
101
|
function resetTimeout() {
|
|
97
102
|
clearTimeout(timeout);
|
|
98
|
-
timeout = setTimeout(killBridge,
|
|
103
|
+
timeout = setTimeout(killBridge, ACTIVITY_TIMEOUT_MS);
|
|
99
104
|
}
|
|
100
105
|
|
|
101
106
|
function killBridge() {
|
package/package.json
CHANGED
package/proxy.ts
CHANGED
|
@@ -78,6 +78,7 @@ import {
|
|
|
78
78
|
WriteResultSchema,
|
|
79
79
|
WriteShellStdinErrorSchema,
|
|
80
80
|
WriteShellStdinResultSchema,
|
|
81
|
+
ConversationSummaryArchiveSchema,
|
|
81
82
|
GetUsableModelsRequestSchema,
|
|
82
83
|
GetUsableModelsResponseSchema,
|
|
83
84
|
type AgentServerMessage,
|
|
@@ -598,8 +599,9 @@ function decodeConnectUnaryBody(payload: Uint8Array): Uint8Array | null {
|
|
|
598
599
|
* derive them from known model families. Update when new major versions ship.
|
|
599
600
|
*
|
|
600
601
|
* Sources:
|
|
601
|
-
* - Claude: platform.claude.ai/docs — claude-4.6-sonnet / claude-4.6-opus: 1M
|
|
602
|
-
*
|
|
602
|
+
* - Claude: platform.claude.ai/docs — claude-4.6-sonnet / claude-4.6-opus: native 1M context
|
|
603
|
+
* (GA Mar 2026), but Cursor enforces a 200k cap via ConversationTokenDetails.maxTokens.
|
|
604
|
+
* Registered at 200k to match Cursor's actual limit; all other Claude incl. 4.5, 4, Haiku: 200k.
|
|
603
605
|
* - Gemini: ai.google.dev/gemini-api/docs — all 2.5 / 3.x models: 1M.
|
|
604
606
|
* - GPT: chatai.guide — GPT-5.x: 400k; GPT-5.5+: 1M; nano/mini variants: 128k.
|
|
605
607
|
* - Grok 4: docs.x.ai — 256k.
|
|
@@ -612,9 +614,9 @@ export function inferContextWindow(id: string): number {
|
|
|
612
614
|
if (lower.includes("-1m")) return 1_048_576;
|
|
613
615
|
|
|
614
616
|
// ── Claude ────────────────────────────────────────────────────────────────
|
|
615
|
-
// Sonnet 4.6
|
|
616
|
-
//
|
|
617
|
-
|
|
617
|
+
// Sonnet 4.6 / Opus 4.6 natively support 1M but Cursor enforces 200k server-side.
|
|
618
|
+
// Registering at 200k avoids spurious 5× scaling in computeUsage.
|
|
619
|
+
// All other Claude (4.5, 4, Haiku, …) are also 200k.
|
|
618
620
|
if (lower.startsWith("claude-")) return 200_000;
|
|
619
621
|
|
|
620
622
|
// ── Gemini ────────────────────────────────────────────────────────────────
|
|
@@ -1331,6 +1333,103 @@ function buildTurnStepBytes(step: ParsedTurnStep): Uint8Array {
|
|
|
1331
1333
|
);
|
|
1332
1334
|
}
|
|
1333
1335
|
|
|
1336
|
+
// Number of most-recent turns to keep as raw blobs; older turns are folded
|
|
1337
|
+
// into a ConversationSummaryArchive blob with inline text. Keeping only the
|
|
1338
|
+
// tail as raw blobs caps the number of blob fetches the server needs per
|
|
1339
|
+
// request to O(THRESHOLD) instead of O(conversation_length), which is the
|
|
1340
|
+
// primary driver of compaction slowness for long sessions.
|
|
1341
|
+
const TURN_ARCHIVE_THRESHOLD =
|
|
1342
|
+
parseInt(process.env.PI_CURSOR_TURN_ARCHIVE_THRESHOLD ?? "") || 20;
|
|
1343
|
+
|
|
1344
|
+
/**
|
|
1345
|
+
* Renders parsed turns (already-decoded OpenAI messages) as plain text for
|
|
1346
|
+
* use as the `summary` field of a ConversationSummaryArchive. Tool results
|
|
1347
|
+
* are truncated so the archive blob stays small.
|
|
1348
|
+
*/
|
|
1349
|
+
function buildTurnsTranscript(turns: ParsedTurn[]): string {
|
|
1350
|
+
const parts: string[] = [
|
|
1351
|
+
`[Earlier conversation — ${turns.length} turn(s)]\n`,
|
|
1352
|
+
];
|
|
1353
|
+
for (const [i, turn] of turns.entries()) {
|
|
1354
|
+
parts.push(`Turn ${i + 1}:`);
|
|
1355
|
+
if (turn.userText) parts.push(`User: ${turn.userText.slice(0, 1000)}`);
|
|
1356
|
+
for (const step of turn.steps) {
|
|
1357
|
+
if (step.kind === "assistantText") {
|
|
1358
|
+
if (step.text) parts.push(`Assistant: ${step.text.slice(0, 800)}`);
|
|
1359
|
+
} else if (step.kind === "toolCall") {
|
|
1360
|
+
const argsStr = JSON.stringify(step.arguments).slice(0, 300);
|
|
1361
|
+
parts.push(`Tool: ${step.toolName}(${argsStr})`);
|
|
1362
|
+
if (step.result?.content) {
|
|
1363
|
+
parts.push(`Result: ${step.result.content.slice(0, 400)}`);
|
|
1364
|
+
}
|
|
1365
|
+
}
|
|
1366
|
+
}
|
|
1367
|
+
parts.push("");
|
|
1368
|
+
}
|
|
1369
|
+
return parts.join("\n");
|
|
1370
|
+
}
|
|
1371
|
+
|
|
1372
|
+
/**
|
|
1373
|
+
* Extracts a human-readable transcript of a single turn from the blob store.
|
|
1374
|
+
* Returns null if required blobs are missing (turn is left as a raw blob).
|
|
1375
|
+
*/
|
|
1376
|
+
function extractTextFromTurnBlob(
|
|
1377
|
+
turnBlobId: Uint8Array,
|
|
1378
|
+
blobStore: Map<string, Uint8Array>,
|
|
1379
|
+
): string | null {
|
|
1380
|
+
try {
|
|
1381
|
+
const turnData = blobStore.get(Buffer.from(turnBlobId).toString("hex"));
|
|
1382
|
+
if (!turnData) return null;
|
|
1383
|
+
|
|
1384
|
+
const turnStructure = fromBinary(ConversationTurnStructureSchema, turnData);
|
|
1385
|
+
if (turnStructure.turn.case !== "agentConversationTurn") return null;
|
|
1386
|
+
|
|
1387
|
+
const agentTurn = turnStructure.turn.value;
|
|
1388
|
+
const lines: string[] = [];
|
|
1389
|
+
|
|
1390
|
+
const userMsgData = blobStore.get(
|
|
1391
|
+
Buffer.from(agentTurn.userMessage).toString("hex"),
|
|
1392
|
+
);
|
|
1393
|
+
if (userMsgData) {
|
|
1394
|
+
const userMsg = fromBinary(UserMessageSchema, userMsgData);
|
|
1395
|
+
if (userMsg.text) lines.push(`User: ${userMsg.text.slice(0, 1000)}`);
|
|
1396
|
+
} else {
|
|
1397
|
+
return null; // can't represent this turn without its user message
|
|
1398
|
+
}
|
|
1399
|
+
|
|
1400
|
+
for (const stepBlobId of agentTurn.steps) {
|
|
1401
|
+
const stepData = blobStore.get(
|
|
1402
|
+
Buffer.from(stepBlobId).toString("hex"),
|
|
1403
|
+
);
|
|
1404
|
+
if (!stepData) continue;
|
|
1405
|
+
const step = fromBinary(ConversationStepSchema, stepData);
|
|
1406
|
+
if (step.message.case === "assistantMessage") {
|
|
1407
|
+
const text = step.message.value.text;
|
|
1408
|
+
if (text) lines.push(`Assistant: ${text.slice(0, 800)}`);
|
|
1409
|
+
} else if (step.message.case === "toolCall") {
|
|
1410
|
+
const tc = step.message.value;
|
|
1411
|
+
if (tc.tool.case === "mcpToolCall") {
|
|
1412
|
+
const mcp = tc.tool.value;
|
|
1413
|
+
const name = mcp.args?.name ?? "tool";
|
|
1414
|
+
lines.push(`Tool: ${name}`);
|
|
1415
|
+
if (mcp.result?.result.case === "success") {
|
|
1416
|
+
const content = mcp.result.result.value.content
|
|
1417
|
+
.map((c) =>
|
|
1418
|
+
c.content.case === "text" ? c.content.value.text : "",
|
|
1419
|
+
)
|
|
1420
|
+
.join("")
|
|
1421
|
+
.slice(0, 400);
|
|
1422
|
+
if (content) lines.push(`Result: ${content}`);
|
|
1423
|
+
}
|
|
1424
|
+
}
|
|
1425
|
+
}
|
|
1426
|
+
}
|
|
1427
|
+
return lines.join("\n") || null;
|
|
1428
|
+
} catch {
|
|
1429
|
+
return null;
|
|
1430
|
+
}
|
|
1431
|
+
}
|
|
1432
|
+
|
|
1334
1433
|
export function buildCursorRequest(
|
|
1335
1434
|
modelId: string,
|
|
1336
1435
|
systemPrompt: string,
|
|
@@ -1367,9 +1466,90 @@ export function buildCursorRequest(
|
|
|
1367
1466
|
ConversationStateStructureSchema,
|
|
1368
1467
|
checkpoint,
|
|
1369
1468
|
);
|
|
1469
|
+
// Archive old turns from the checkpoint when the tail is too long.
|
|
1470
|
+
// Each raw turn blob requires ~3 getBlobArgs round-trips from the server;
|
|
1471
|
+
// replacing old turns with a single ConversationSummaryArchive blob (inline
|
|
1472
|
+
// text) cuts that to 1 fetch for all archived history.
|
|
1473
|
+
if (conversationState.turns.length > TURN_ARCHIVE_THRESHOLD) {
|
|
1474
|
+
const oldTurnIds = conversationState.turns.slice(
|
|
1475
|
+
0,
|
|
1476
|
+
conversationState.turns.length - TURN_ARCHIVE_THRESHOLD,
|
|
1477
|
+
);
|
|
1478
|
+
const recentTurnIds = conversationState.turns.slice(
|
|
1479
|
+
-TURN_ARCHIVE_THRESHOLD,
|
|
1480
|
+
);
|
|
1481
|
+
|
|
1482
|
+
const archiveLines: string[] = [
|
|
1483
|
+
`[Earlier conversation \u2014 ${oldTurnIds.length} turn(s)]\n`,
|
|
1484
|
+
];
|
|
1485
|
+
let archivedCount = 0;
|
|
1486
|
+
for (const [i, oldTurnId] of oldTurnIds.entries()) {
|
|
1487
|
+
const text = extractTextFromTurnBlob(oldTurnId, blobStore);
|
|
1488
|
+
if (text === null) continue; // blob missing — leave turn as-is
|
|
1489
|
+
archiveLines.push(`Turn ${i + 1}:\n${text}`);
|
|
1490
|
+
archiveLines.push("");
|
|
1491
|
+
archivedCount++;
|
|
1492
|
+
}
|
|
1493
|
+
|
|
1494
|
+
// Only replace turns with archive if we could represent all of them;
|
|
1495
|
+
// a partial archive would silently drop context the server needs.
|
|
1496
|
+
if (archivedCount === oldTurnIds.length) {
|
|
1497
|
+
const archive = create(ConversationSummaryArchiveSchema, {
|
|
1498
|
+
summarizedMessages: oldTurnIds,
|
|
1499
|
+
summary: archiveLines.join("\n"),
|
|
1500
|
+
windowTail: oldTurnIds.length,
|
|
1501
|
+
summaryMessage: new Uint8Array(0),
|
|
1502
|
+
});
|
|
1503
|
+
const archiveBlobId = storeAsBlob(
|
|
1504
|
+
toBinary(ConversationSummaryArchiveSchema, archive),
|
|
1505
|
+
blobStore,
|
|
1506
|
+
);
|
|
1507
|
+
conversationState.turns = recentTurnIds;
|
|
1508
|
+
conversationState.summaryArchives = [
|
|
1509
|
+
...conversationState.summaryArchives,
|
|
1510
|
+
archiveBlobId,
|
|
1511
|
+
];
|
|
1512
|
+
debugLog("cursor_request.turns_archived", {
|
|
1513
|
+
archivedCount,
|
|
1514
|
+
remaining: recentTurnIds.length,
|
|
1515
|
+
totalArchives: conversationState.summaryArchives.length,
|
|
1516
|
+
});
|
|
1517
|
+
}
|
|
1518
|
+
}
|
|
1370
1519
|
} else {
|
|
1520
|
+
// When rebuilding from scratch (no checkpoint), archive old parsed turns
|
|
1521
|
+
// directly — we have their text, no blob parsing needed.
|
|
1522
|
+
const olderTurns =
|
|
1523
|
+
turns.length > TURN_ARCHIVE_THRESHOLD
|
|
1524
|
+
? turns.slice(0, turns.length - TURN_ARCHIVE_THRESHOLD)
|
|
1525
|
+
: [];
|
|
1526
|
+
const recentTurns =
|
|
1527
|
+
turns.length > TURN_ARCHIVE_THRESHOLD
|
|
1528
|
+
? turns.slice(-TURN_ARCHIVE_THRESHOLD)
|
|
1529
|
+
: turns;
|
|
1530
|
+
|
|
1531
|
+
const summaryArchives: Uint8Array[] = [];
|
|
1532
|
+
if (olderTurns.length > 0) {
|
|
1533
|
+
const archive = create(ConversationSummaryArchiveSchema, {
|
|
1534
|
+
summarizedMessages: [], // no blob IDs yet — turns haven't been stored
|
|
1535
|
+
summary: buildTurnsTranscript(olderTurns),
|
|
1536
|
+
windowTail: olderTurns.length,
|
|
1537
|
+
summaryMessage: new Uint8Array(0),
|
|
1538
|
+
});
|
|
1539
|
+
summaryArchives.push(
|
|
1540
|
+
storeAsBlob(
|
|
1541
|
+
toBinary(ConversationSummaryArchiveSchema, archive),
|
|
1542
|
+
blobStore,
|
|
1543
|
+
),
|
|
1544
|
+
);
|
|
1545
|
+
debugLog("cursor_request.turns_archived_from_scratch", {
|
|
1546
|
+
archivedCount: olderTurns.length,
|
|
1547
|
+
remaining: recentTurns.length,
|
|
1548
|
+
});
|
|
1549
|
+
}
|
|
1550
|
+
|
|
1371
1551
|
const turnBlobIds: Uint8Array[] = [];
|
|
1372
|
-
for (const turn of
|
|
1552
|
+
for (const turn of recentTurns) {
|
|
1373
1553
|
const userMsg = createUserMessage(
|
|
1374
1554
|
turn.userText,
|
|
1375
1555
|
selectedCtxBlob,
|
|
@@ -1408,7 +1588,7 @@ export function buildCursorRequest(
|
|
|
1408
1588
|
mode: 1,
|
|
1409
1589
|
fileStates: {},
|
|
1410
1590
|
fileStatesV2: {},
|
|
1411
|
-
summaryArchives
|
|
1591
|
+
summaryArchives,
|
|
1412
1592
|
turnTimings: [],
|
|
1413
1593
|
subagentStates: {},
|
|
1414
1594
|
selfSummaryCount: 0,
|
|
@@ -2213,6 +2393,7 @@ function writeSSEStream(
|
|
|
2213
2393
|
});
|
|
2214
2394
|
|
|
2215
2395
|
let closed = false;
|
|
2396
|
+
let keepAliveTimer: ReturnType<typeof setInterval> | undefined;
|
|
2216
2397
|
const sendSSE = (data: object) => {
|
|
2217
2398
|
if (closed) return;
|
|
2218
2399
|
res.write(`data: ${JSON.stringify(data)}\n\n`);
|
|
@@ -2224,6 +2405,7 @@ function writeSSEStream(
|
|
|
2224
2405
|
const closeResponse = () => {
|
|
2225
2406
|
if (closed) return;
|
|
2226
2407
|
closed = true;
|
|
2408
|
+
clearInterval(keepAliveTimer);
|
|
2227
2409
|
res.end();
|
|
2228
2410
|
};
|
|
2229
2411
|
|
|
@@ -2265,6 +2447,13 @@ function writeSSEStream(
|
|
|
2265
2447
|
let cancelled = false;
|
|
2266
2448
|
let latestCheckpoint: Uint8Array | null = null;
|
|
2267
2449
|
|
|
2450
|
+
// Keep the SSE connection alive during the silent blob-fetching phase so
|
|
2451
|
+
// pi's request timeout does not fire before the first token arrives.
|
|
2452
|
+
keepAliveTimer = setInterval(() => {
|
|
2453
|
+
if (!closed) res.write(": ping\n\n");
|
|
2454
|
+
}, 15_000);
|
|
2455
|
+
keepAliveTimer.unref();
|
|
2456
|
+
|
|
2268
2457
|
// Detect client disconnect (e.g. user pressed Escape in pi)
|
|
2269
2458
|
const onClientClose = () => {
|
|
2270
2459
|
if (cancelled || closed) return;
|
|
@@ -2394,7 +2583,6 @@ function writeSSEStream(
|
|
|
2394
2583
|
`[cursor-provider] Cursor stream error (${modelId}):`,
|
|
2395
2584
|
endError.message,
|
|
2396
2585
|
);
|
|
2397
|
-
conversationStates.delete(convKey);
|
|
2398
2586
|
sendSSE(makeChunk({ content: endError.message }, "error"));
|
|
2399
2587
|
sendSSE(makeUsageChunk());
|
|
2400
2588
|
sendDone();
|
|
@@ -2437,7 +2625,7 @@ function writeSSEStream(
|
|
|
2437
2625
|
const stored = conversationStates.get(convKey);
|
|
2438
2626
|
if (stored) {
|
|
2439
2627
|
for (const [k, v] of blobStore) stored.blobStore.set(k, v);
|
|
2440
|
-
if (
|
|
2628
|
+
if (latestCheckpoint) {
|
|
2441
2629
|
stored.checkpoint = latestCheckpoint;
|
|
2442
2630
|
debugLog("stream.checkpoint_committed", { requestId, convKey, stored });
|
|
2443
2631
|
}
|
|
@@ -2447,17 +2635,31 @@ function writeSSEStream(
|
|
|
2447
2635
|
}
|
|
2448
2636
|
if (cancelled) return;
|
|
2449
2637
|
if (!mcpExecReceived) {
|
|
2450
|
-
|
|
2451
|
-
|
|
2452
|
-
|
|
2453
|
-
|
|
2454
|
-
|
|
2455
|
-
|
|
2638
|
+
if (code !== 0) {
|
|
2639
|
+
// Bridge was killed before receiving any response (e.g. timeout waiting
|
|
2640
|
+
// for Cursor to process a large checkpoint during compaction). Treat as
|
|
2641
|
+
// an error so callers (like pi compaction) see a real failure instead of
|
|
2642
|
+
// an empty successful-looking response.
|
|
2643
|
+
console.error(
|
|
2644
|
+
`[cursor-provider] Bridge exited (code ${code}) before receiving response (${modelId})`,
|
|
2645
|
+
);
|
|
2646
|
+
sendSSE(makeChunk({ content: `Cursor bridge terminated (exit ${code}) before response — try again or shorten the conversation` }, "error"));
|
|
2647
|
+
sendSSE(makeUsageChunk());
|
|
2648
|
+
sendDone();
|
|
2649
|
+
closeResponse();
|
|
2650
|
+
} else {
|
|
2651
|
+
const flushed = tagFilter.flush();
|
|
2652
|
+
if (flushed.reasoning)
|
|
2653
|
+
sendSSE(makeChunk({ reasoning_content: flushed.reasoning }));
|
|
2654
|
+
if (flushed.content) {
|
|
2655
|
+
appendAssistantTextToTurn(currentTurn, flushed.content);
|
|
2656
|
+
sendSSE(makeChunk({ content: flushed.content }));
|
|
2657
|
+
}
|
|
2658
|
+
sendSSE(makeChunk({}, "stop"));
|
|
2659
|
+
sendSSE(makeUsageChunk());
|
|
2660
|
+
sendDone();
|
|
2661
|
+
closeResponse();
|
|
2456
2662
|
}
|
|
2457
|
-
sendSSE(makeChunk({}, "stop"));
|
|
2458
|
-
sendSSE(makeUsageChunk());
|
|
2459
|
-
sendDone();
|
|
2460
|
-
closeResponse();
|
|
2461
2663
|
} else if (code !== 0) {
|
|
2462
2664
|
sendSSE(makeChunk({ content: "Bridge connection lost" }, "error"));
|
|
2463
2665
|
sendSSE(makeUsageChunk());
|
|
@@ -2762,17 +2964,17 @@ async function handleNonStreamingResponse(
|
|
|
2762
2964
|
`[cursor-provider] Cursor non-stream error (${modelId}):`,
|
|
2763
2965
|
endError.message,
|
|
2764
2966
|
);
|
|
2765
|
-
conversationStates.delete(convKey);
|
|
2766
2967
|
nonStreamError = endError;
|
|
2767
2968
|
}
|
|
2768
2969
|
},
|
|
2769
2970
|
),
|
|
2770
2971
|
);
|
|
2771
2972
|
|
|
2772
|
-
bridge.onClose(() => {
|
|
2973
|
+
bridge.onClose((code) => {
|
|
2773
2974
|
debugLog("nonstream.bridge_close", {
|
|
2774
2975
|
requestId,
|
|
2775
2976
|
convKey,
|
|
2977
|
+
code,
|
|
2776
2978
|
cancelled,
|
|
2777
2979
|
nonStreamError: nonStreamError?.message,
|
|
2778
2980
|
currentTurn,
|
|
@@ -2784,7 +2986,7 @@ async function handleNonStreamingResponse(
|
|
|
2784
2986
|
const stored = conversationStates.get(convKey);
|
|
2785
2987
|
if (stored) {
|
|
2786
2988
|
for (const [k, v] of payload.blobStore) stored.blobStore.set(k, v);
|
|
2787
|
-
if (
|
|
2989
|
+
if (latestCheckpoint) {
|
|
2788
2990
|
stored.checkpoint = latestCheckpoint;
|
|
2789
2991
|
debugLog("nonstream.checkpoint_committed", {
|
|
2790
2992
|
requestId,
|
|
@@ -2829,6 +3031,24 @@ async function handleNonStreamingResponse(
|
|
|
2829
3031
|
return;
|
|
2830
3032
|
}
|
|
2831
3033
|
|
|
3034
|
+
if (code !== 0) {
|
|
3035
|
+
console.error(
|
|
3036
|
+
`[cursor-provider] Bridge exited (code ${code}) before non-stream response (${modelId})`,
|
|
3037
|
+
);
|
|
3038
|
+
res.writeHead(502, { "Content-Type": "application/json" });
|
|
3039
|
+
res.end(
|
|
3040
|
+
JSON.stringify({
|
|
3041
|
+
error: {
|
|
3042
|
+
message: `Cursor bridge terminated (exit ${code}) before response — try again or shorten the conversation`,
|
|
3043
|
+
type: "upstream_error",
|
|
3044
|
+
code: "bridge_terminated",
|
|
3045
|
+
},
|
|
3046
|
+
}),
|
|
3047
|
+
);
|
|
3048
|
+
resolve();
|
|
3049
|
+
return;
|
|
3050
|
+
}
|
|
3051
|
+
|
|
2832
3052
|
const flushed = tagFilter.flush();
|
|
2833
3053
|
fullText += flushed.content;
|
|
2834
3054
|
appendAssistantTextToTurn(currentTurn, flushed.content);
|