copilot-cursor-proxy 1.2.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -28,6 +28,14 @@ cd copilot-for-cursor
28
28
  bun run start.ts
29
29
  ```
30
30
 
31
+ ### Enable Max Mode (auto-compact long conversations)
32
+
33
+ ```bash
34
+ bun run start.ts --max
35
+ ```
36
+
37
+ > **Max mode** automatically compacts conversation history when the estimated token count exceeds 80% of the model's input token limit. It summarizes older messages into a structured summary while keeping the most recent messages intact — letting you have much longer coding sessions without hitting token limits.
38
+
31
39
  ### Then start an HTTPS tunnel
32
40
 
33
41
  Cursor requires HTTPS. In a second terminal:
@@ -66,6 +74,10 @@ Cursor → (HTTPS tunnel) → proxy-router (:4142) → copilot-api (:4141) → G
66
74
  | `stream-proxy.ts` | Streaming passthrough with chunk logging and error detection |
67
75
  | `debug-logger.ts` | Request/response debug logging helpers |
68
76
  | `start.ts` | One-command launcher for copilot-api + proxy-router |
77
+ | `max-mode.ts` | Auto-compaction for long conversations (`--max` flag) |
78
+ | `usage-db.ts` | Persistent request/token usage tracking |
79
+ | `auth-config.ts` | API key generation, validation, and config persistence |
80
+ | `upstream-auth.ts` | Upstream copilot-api authentication and key management |
69
81
 
70
82
  ---
71
83
 
@@ -139,6 +151,7 @@ Cursor → (HTTPS tunnel) → proxy-router (:4142) → copilot-api (:4141) → G
139
151
  * **💻 Terminal:** `Shell` (run commands)
140
152
  * **🔍 Search:** `Grep`, `Glob`, `SemanticSearch`
141
153
  * **🔌 MCP Tools:** External tools (Neon, Playwright, etc.)
154
+ * **🗜️ Max Mode:** Auto-compact long conversations to stay within token limits (`--max`)
142
155
 
143
156
  ---
144
157
 
@@ -187,6 +200,7 @@ Three tabs:
187
200
  | Plan mode | ✅ Works |
188
201
  | Agent mode | ✅ Works |
189
202
  | All GPT-5.x models | ✅ Works |
203
+ | Max mode (long session compaction) | ✅ Works (`--max` flag) |
190
204
  | Extended thinking (chain-of-thought) | ❌ Stripped |
191
205
  | Prompt caching (`cache_control`) | ❌ Stripped |
192
206
  | Claude Vision | ❌ Not supported via Copilot |
@@ -208,6 +222,9 @@ The proxy auto-routes these. Make sure you're running the latest version.
208
222
  **"connection refused":**
209
223
  Ensure services are running: `bun run start.ts` or check `http://localhost:4142`.
210
224
 
225
+ **Max mode not compacting:**
226
+ Compaction only triggers when estimated tokens exceed 80% of the model's limit and there are at least 15 messages. Check the console log for `🗜️ Max mode` messages.
227
+
211
228
  ---
212
229
 
213
230
  > ⚠️ **DISCLAIMER:** This project is **unofficial** and for **educational purposes only**. It interacts with undocumented internal APIs of GitHub Copilot and Cursor. Use at your own risk. The authors are not affiliated with GitHub, Microsoft, or Anysphere (Cursor). Please use your API credits responsibly and in accordance with the provider's Terms of Service.
@@ -121,6 +121,16 @@ const transformMessages = (json: any, isClaude: boolean): void => {
121
121
  }
122
122
  }
123
123
 
124
+ // Preserve any existing OpenAI-format tool_calls on the message
125
+ // (hybrid format: content is array but tool_calls are separate)
126
+ if (msg.tool_calls && Array.isArray(msg.tool_calls)) {
127
+ for (const tc of msg.tool_calls) {
128
+ if (!toolCalls.some(t => t.id === tc.id)) {
129
+ toolCalls.push(tc);
130
+ }
131
+ }
132
+ }
133
+
124
134
  const assistantMsg: any = { role: 'assistant' };
125
135
  assistantMsg.content = textParts.join('\n') || null;
126
136
  if (toolCalls.length > 0) assistantMsg.tool_calls = toolCalls;
package/max-mode.ts ADDED
@@ -0,0 +1,305 @@
1
+ import { getUpstreamAuthHeader } from './upstream-auth';
2
+ import { needsResponsesAPI } from './model-routing';
3
+
4
+ // ── Global config ─────────────────────────────────────────────────────────────
5
+ let maxModeEnabled = false;
6
+
7
+ export function enableMaxMode(): void {
8
+ maxModeEnabled = true;
9
+ }
10
+
11
+ export function isMaxMode(): boolean {
12
+ return maxModeEnabled;
13
+ }
14
+
15
+ // ── Model token limits cache ──────────────────────────────────────────────────
16
+ interface ModelLimits {
17
+ maxInputTokens: number;
18
+ maxOutputTokens: number;
19
+ }
20
+
21
+ const modelLimitsCache = new Map<string, ModelLimits>();
22
+
23
+ // Fallback defaults — only used when upstream /v1/models doesn't return capabilities.limits.
24
+ // Real limits are fetched dynamically from the copilot-api at startup via fetchAndCacheModelLimits().
25
+ // Output token values: Claude 64K (Sonnet 3.5/4 extended), GPT-4/5 16K, o1/o3 100K reasoning.
26
+ const DEFAULT_LIMITS: Record<string, ModelLimits> = {
27
+ 'claude': { maxInputTokens: 200000, maxOutputTokens: 64000 },
28
+ 'gpt-4': { maxInputTokens: 128000, maxOutputTokens: 16384 },
29
+ 'gpt-5': { maxInputTokens: 128000, maxOutputTokens: 16384 },
30
+ 'o1': { maxInputTokens: 200000, maxOutputTokens: 100000 },
31
+ 'o3': { maxInputTokens: 200000, maxOutputTokens: 100000 },
32
+ 'default': { maxInputTokens: 128000, maxOutputTokens: 16384 }, // conservative general-purpose fallback
33
+ };
34
+
35
+ function getDefaultLimits(model: string): ModelLimits {
36
+ const lower = model.toLowerCase();
37
+ for (const [prefix, limits] of Object.entries(DEFAULT_LIMITS)) {
38
+ if (prefix !== 'default' && lower.includes(prefix)) return limits;
39
+ }
40
+ return DEFAULT_LIMITS['default'];
41
+ }
42
+
43
+ export async function fetchAndCacheModelLimits(targetUrl: string): Promise<void> {
44
+ try {
45
+ const resp = await fetch(new URL('/v1/models', targetUrl).toString(), {
46
+ headers: { 'Authorization': getUpstreamAuthHeader() },
47
+ signal: AbortSignal.timeout(10000),
48
+ });
49
+ if (!resp.ok) return;
50
+ const data = await resp.json() as any;
51
+ if (!data.data || !Array.isArray(data.data)) return;
52
+
53
+ for (const model of data.data) {
54
+ const limits = model.capabilities?.limits;
55
+ if (limits) {
56
+ modelLimitsCache.set(model.id, {
57
+ maxInputTokens: limits.max_prompt_tokens || limits.max_input_tokens || getDefaultLimits(model.id).maxInputTokens,
58
+ maxOutputTokens: limits.max_output_tokens || getDefaultLimits(model.id).maxOutputTokens,
59
+ });
60
+ }
61
+ }
62
+ console.log(`📋 Max mode: cached token limits for ${modelLimitsCache.size} models`);
63
+ for (const [id, lim] of modelLimitsCache) {
64
+ console.log(` ${id}: input=${lim.maxInputTokens}, output=${lim.maxOutputTokens}`);
65
+ }
66
+ } catch (e: any) {
67
+ console.warn(`⚠️ Max mode: failed to fetch model limits: ${e?.message || e}`);
68
+ }
69
+ }
70
+
71
+ export function getModelLimits(model: string): ModelLimits {
72
+ return modelLimitsCache.get(model) || getDefaultLimits(model);
73
+ }
74
+
75
+ // ── Token estimation ──────────────────────────────────────────────────────────
76
+ // Simple char/4 heuristic — fast, zero-dependency, ~80% accurate for English.
77
+ // For mixed CJK content each character ≈ 1-2 tokens, so we use a blended ratio.
78
+
79
+ function estimateTokens(text: string): number {
80
+ if (!text) return 0;
81
+ // rough estimate: ascii chars / 4, non-ascii chars / 1.5
82
+ let ascii = 0, nonAscii = 0;
83
+ for (let i = 0; i < text.length; i++) {
84
+ if (text.charCodeAt(i) < 128) ascii++;
85
+ else nonAscii++;
86
+ }
87
+ return Math.ceil(ascii / 4 + nonAscii / 1.5);
88
+ }
89
+
90
+ function estimateMessagesTokens(messages: any[]): number {
91
+ let total = 0;
92
+ for (const msg of messages) {
93
+ // role overhead
94
+ total += 4;
95
+ if (typeof msg.content === 'string') {
96
+ total += estimateTokens(msg.content);
97
+ } else if (Array.isArray(msg.content)) {
98
+ for (const part of msg.content) {
99
+ if (part.type === 'text') total += estimateTokens(part.text || '');
100
+ else total += estimateTokens(JSON.stringify(part));
101
+ }
102
+ }
103
+ // tool calls overhead
104
+ if (msg.tool_calls) {
105
+ total += estimateTokens(JSON.stringify(msg.tool_calls));
106
+ }
107
+ }
108
+ return total;
109
+ }
110
+
111
+ // ── Helpers ───────────────────────────────────────────────────────────────────
112
+ function truncateContent(content: string, maxChars: number): string {
113
+ if (content.length <= maxChars) return content;
114
+ return content.slice(0, maxChars) + '\n... [truncated]';
115
+ }
116
+
117
+ function extractResponsesTextContent(data: any): string {
118
+ const outputMessages = (data.output || []).filter((item: any) =>
119
+ item.type === 'message' && Array.isArray(item.content)
120
+ );
121
+ const textParts = outputMessages
122
+ .flatMap((item: any) => item.content)
123
+ .filter((part: any) => part.type === 'output_text');
124
+ if (textParts.length === 0) {
125
+ console.warn('⚠️ Max mode: Responses summarization returned no output_text parts');
126
+ }
127
+ return textParts.map((part: any) => part.text).join('');
128
+ }
129
+
130
+ // ── Summarization prompt ──────────────────────────────────────────────────────
131
+ // Inspired by claude-code/opencode compaction prompts, adapted for proxy use.
132
+ const SUMMARIZATION_PROMPT = `Your task is to create a detailed summary of the conversation so far, paying close attention to the user's explicit requests and the assistant's previous actions.
133
+
134
+ Analyze each message chronologically and identify:
135
+ - The user's primary goals and requests
136
+ - Key technical concepts and decisions
137
+ - Files and code sections discussed or modified
138
+ - Problems encountered and solutions applied
139
+ - The current state of work in progress
140
+
141
+ Your summary MUST:
142
+ 1. Preserve all file paths, function names, variable names, and code snippets mentioned
143
+ 2. Retain exact error messages and their resolutions
144
+ 3. Capture the user's original intent and any refinements
145
+ 4. Note what has been completed vs what remains to be done
146
+ 5. Include enough technical detail to continue the conversation seamlessly
147
+
148
+ Format as a structured summary, not a conversation replay. Be concise but do NOT omit any technical details that would be needed to continue the work.`;
149
+
150
+ // ── Compaction logic ──────────────────────────────────────────────────────────
151
+ // Threshold: compact when estimated input tokens exceed this fraction of model max
152
+ const COMPACT_THRESHOLD = 0.80;
153
+ // Keep the most recent N messages untouched to preserve immediate context
154
+ const KEEP_RECENT_MESSAGES = 10;
155
+ // Never compact if total messages are below this count
156
+ const MIN_MESSAGES_FOR_COMPACTION = 15;
157
+ // Minimum old messages worth summarizing (below this, compaction is skipped)
158
+ const MIN_MESSAGES_TO_SUMMARIZE = 3;
159
+ // Max characters per individual message when building the summarization input
160
+ const MAX_MESSAGE_CHARS_FOR_SUMMARY = 8000;
161
+ // Acknowledgment message inserted after the summary to maintain conversation flow
162
+ const SUMMARY_ACKNOWLEDGMENT = 'Understood. I have the full context from the conversation summary. Let me continue.';
163
+
164
+ export async function compactIfNeeded(
165
+ json: any,
166
+ targetModel: string,
167
+ targetUrl: string,
168
+ ): Promise<any> {
169
+ if (!maxModeEnabled) return json;
170
+ if (!json.messages || !Array.isArray(json.messages) || json.messages.length < MIN_MESSAGES_FOR_COMPACTION) {
171
+ return json;
172
+ }
173
+
174
+ const limits = getModelLimits(targetModel);
175
+ const estimated = estimateMessagesTokens(json.messages);
176
+ const threshold = Math.floor(limits.maxInputTokens * COMPACT_THRESHOLD);
177
+
178
+ if (estimated <= threshold) {
179
+ return json;
180
+ }
181
+
182
+ console.log(`🗜️ Max mode: estimated ${estimated} tokens exceeds ${COMPACT_THRESHOLD * 100}% of ${limits.maxInputTokens} — compacting`);
183
+
184
+ // Split: system messages + old messages to summarize + recent messages to keep
185
+ const systemMsgs = json.messages.filter((m: any) => m.role === 'system');
186
+ const nonSystemMsgs = json.messages.filter((m: any) => m.role !== 'system');
187
+ // Keep at most half of non-system messages to ensure there's enough old content to summarize
188
+ const keepCount = Math.min(KEEP_RECENT_MESSAGES, Math.floor(nonSystemMsgs.length / 2));
189
+ const recentMsgs = nonSystemMsgs.slice(-keepCount);
190
+ const oldMsgs = nonSystemMsgs.slice(0, -keepCount);
191
+
192
+ if (oldMsgs.length < MIN_MESSAGES_TO_SUMMARIZE) return json; // nothing meaningful to compact
193
+
194
+ try {
195
+ const summary = await callSummarize(targetModel, oldMsgs, targetUrl);
196
+ if (!summary) return json; // summarization failed, pass through
197
+
198
+ console.log(`🗜️ Max mode: compacted ${oldMsgs.length} messages → 1 summary (${estimateTokens(summary)} est. tokens)`);
199
+
200
+ // Rebuild messages: system + summary-as-user-message + recent
201
+ json.messages = [
202
+ ...systemMsgs,
203
+ { role: 'user', content: `[Conversation Summary]\n${summary}` },
204
+ { role: 'assistant', content: SUMMARY_ACKNOWLEDGMENT },
205
+ ...recentMsgs,
206
+ ];
207
+
208
+ return json;
209
+ } catch (e: any) {
210
+ console.error(`❌ Max mode: compaction failed, passing through original:`, e?.message || e);
211
+ return json;
212
+ }
213
+ }
214
+
215
+ async function callSummarize(model: string, messages: any[], targetUrl: string): Promise<string | null> {
216
+ const conversationText = messages.map(m => {
217
+ const content = typeof m.content === 'string'
218
+ ? m.content
219
+ : Array.isArray(m.content)
220
+ ? m.content.map((p: any) => p.text || JSON.stringify(p)).join('\n')
221
+ : JSON.stringify(m.content);
222
+ const role = m.role || 'unknown';
223
+ const truncated = truncateContent(content, MAX_MESSAGE_CHARS_FOR_SUMMARY);
224
+ return `[${role}]: ${truncated}`;
225
+ }).join('\n\n');
226
+
227
+ console.log(`🗜️ Max mode: sending summarization request (${messages.length} messages → ${model})`);
228
+
229
+ if (needsResponsesAPI(model)) {
230
+ const responsesUrl = new URL('/v1/responses', targetUrl);
231
+ const responsesBody = JSON.stringify({
232
+ model,
233
+ instructions: SUMMARIZATION_PROMPT,
234
+ input: `Please summarize the following conversation:\n\n${conversationText}`,
235
+ max_output_tokens: 4096,
236
+ temperature: 0.2,
237
+ stream: false,
238
+ });
239
+
240
+ const resp = await fetch(responsesUrl.toString(), {
241
+ method: 'POST',
242
+ headers: {
243
+ 'Content-Type': 'application/json',
244
+ 'Authorization': getUpstreamAuthHeader(),
245
+ },
246
+ body: responsesBody,
247
+ });
248
+
249
+ if (!resp.ok) {
250
+ const errText = await resp.text();
251
+ console.error(`❌ Max mode summarization failed (${resp.status}):`, errText.slice(0, 500));
252
+ return null;
253
+ }
254
+
255
+ const data = await resp.json() as any;
256
+ const content = extractResponsesTextContent(data);
257
+
258
+ if (content) {
259
+ console.log(`🗜️ Max mode: summarization complete (${estimateTokens(content)} est. tokens)`);
260
+ }
261
+
262
+ return content || null;
263
+ }
264
+
265
+ const summarizeMessages = [
266
+ { role: 'system', content: SUMMARIZATION_PROMPT },
267
+ {
268
+ role: 'user',
269
+ content: `Please summarize the following conversation:\n\n${conversationText}`,
270
+ },
271
+ ];
272
+
273
+ const chatBody = JSON.stringify({
274
+ model,
275
+ messages: summarizeMessages,
276
+ max_tokens: 4096,
277
+ temperature: 0.2,
278
+ stream: false,
279
+ });
280
+
281
+ const chatUrl = new URL('/v1/chat/completions', targetUrl);
282
+ const resp = await fetch(chatUrl.toString(), {
283
+ method: 'POST',
284
+ headers: {
285
+ 'Content-Type': 'application/json',
286
+ 'Authorization': getUpstreamAuthHeader(),
287
+ },
288
+ body: chatBody,
289
+ });
290
+
291
+ if (!resp.ok) {
292
+ const errText = await resp.text();
293
+ console.error(`❌ Max mode summarization failed (${resp.status}):`, errText.slice(0, 500));
294
+ return null;
295
+ }
296
+
297
+ const data = await resp.json() as any;
298
+ const content = data.choices?.[0]?.message?.content;
299
+
300
+ if (content) {
301
+ console.log(`🗜️ Max mode: summarization complete (${estimateTokens(content)} est. tokens)`);
302
+ }
303
+
304
+ return content || null;
305
+ }
@@ -0,0 +1,3 @@
1
+ export function needsResponsesAPI(model: string): boolean {
2
+ return /^(?:gpt-5\.(?:[2-9]|\d{2,})(?:-codex)?|o\d+|goldeneye)/i.test(model);
3
+ }
package/package.json CHANGED
@@ -1,36 +1,36 @@
1
- {
2
- "name": "copilot-cursor-proxy",
3
- "version": "1.2.0",
4
- "description": "Proxy that bridges GitHub Copilot API to Cursor IDE — translates Anthropic format, bridges Responses API for GPT 5.x, and more",
5
- "bin": {
6
- "copilot-cursor-proxy": "bin/cli.js"
7
- },
8
- "files": [
9
- "bin/",
10
- "*.ts",
11
- "dashboard.html",
12
- "README.md"
13
- ],
14
- "scripts": {
15
- "build": "bun build start.ts proxy-router.ts anthropic-transforms.ts responses-bridge.ts responses-converters.ts stream-proxy.ts debug-logger.ts auth-config.ts upstream-auth.ts --outdir dist --target node",
16
- "dev": "bun run start.ts",
17
- "start": "bun dist/start.js"
18
- },
19
- "keywords": [
20
- "copilot",
21
- "cursor",
22
- "proxy",
23
- "anthropic",
24
- "openai",
25
- "responses-api"
26
- ],
27
- "license": "MIT",
28
- "repository": {
29
- "type": "git",
30
- "url": "git+https://github.com/CharlesYWL/copilot-for-cursor.git"
31
- },
32
- "engines": {
33
- "node": ">=18",
34
- "bun": ">=1.0"
35
- }
36
- }
1
+ {
2
+ "name": "copilot-cursor-proxy",
3
+ "version": "1.2.1",
4
+ "description": "Proxy that bridges GitHub Copilot API to Cursor IDE — translates Anthropic format, bridges Responses API for GPT 5.x, and more",
5
+ "bin": {
6
+ "copilot-cursor-proxy": "bin/cli.js"
7
+ },
8
+ "files": [
9
+ "bin/",
10
+ "*.ts",
11
+ "dashboard.html",
12
+ "README.md"
13
+ ],
14
+ "scripts": {
15
+ "build": "bun build start.ts --outdir dist --target node",
16
+ "dev": "bun run start.ts",
17
+ "start": "bun dist/start.js"
18
+ },
19
+ "keywords": [
20
+ "copilot",
21
+ "cursor",
22
+ "proxy",
23
+ "anthropic",
24
+ "openai",
25
+ "responses-api"
26
+ ],
27
+ "license": "MIT",
28
+ "repository": {
29
+ "type": "git",
30
+ "url": "git+https://github.com/CharlesYWL/copilot-for-cursor.git"
31
+ },
32
+ "engines": {
33
+ "node": ">=18",
34
+ "bun": ">=1.0"
35
+ }
36
+ }
package/proxy-router.ts CHANGED
@@ -5,6 +5,8 @@ import { logIncomingRequest, logTransformedRequest } from './debug-logger';
5
5
  import { addRequestLog, getNextRequestId, getUsageStats, flushToDisk, type RequestLog } from './usage-db';
6
6
  import { loadAuthConfig, saveAuthConfig, generateApiKey, validateApiKey } from './auth-config';
7
7
  import { getUpstreamAuthHeader, getUpstreamApiKeys, createUpstreamApiKey, deleteUpstreamApiKey } from './upstream-auth';
8
+ import { compactIfNeeded, isMaxMode } from './max-mode';
9
+ import { needsResponsesAPI } from './model-routing';
8
10
 
9
11
  // ── Console capture for SSE streaming ─────────────────────────────────────────
10
12
  interface ConsoleLine {
@@ -271,19 +273,24 @@ Bun.serve({
271
273
 
272
274
  logTransformedRequest(json);
273
275
 
276
+ // ── Max mode: compact long conversations before sending ───────────
277
+ if (isMaxMode()) {
278
+ json = await compactIfNeeded(json, targetModel, TARGET_URL);
279
+ }
280
+
274
281
  const headers = new Headers(req.headers);
275
282
  headers.set("host", targetUrl.host);
276
283
  headers.set("authorization", getUpstreamAuthHeader());
277
284
 
278
- const needsResponsesAPI = targetModel.match(/^gpt-5\.[2-9]|^gpt-5\.\d+-codex|^o[1-9]|^goldeneye/i);
285
+ const shouldUseResponsesAPI = needsResponsesAPI(targetModel);
279
286
 
280
- if (needsResponsesAPI && json.max_tokens) {
287
+ if (shouldUseResponsesAPI && json.max_tokens) {
281
288
  json.max_completion_tokens = json.max_tokens;
282
289
  delete json.max_tokens;
283
290
  console.log(`🔧 Converted max_tokens → max_completion_tokens`);
284
291
  }
285
292
 
286
- if (needsResponsesAPI) {
293
+ if (shouldUseResponsesAPI) {
287
294
  console.log(`🔀 Model ${targetModel} — using Responses API bridge`);
288
295
  const chatId = `chatcmpl-proxy-${++responseCounter}`;
289
296
  try {
package/start.ts CHANGED
@@ -7,6 +7,13 @@
7
7
  import { spawn, sleep } from 'bun';
8
8
  import { existsSync } from 'fs';
9
9
  import { getUpstreamAuthHeader } from './upstream-auth';
10
+ import { enableMaxMode, isMaxMode, fetchAndCacheModelLimits } from './max-mode';
11
+
12
+ // ── Parse CLI flags ──────────────────────────────────────────────────────────
13
+ const args = process.argv.slice(2);
14
+ if (args.includes('--max')) {
15
+ enableMaxMode();
16
+ }
10
17
 
11
18
  const COPILOT_API_PORT = 4141;
12
19
  const PROXY_PORT = 4142;
@@ -100,6 +107,12 @@ async function main() {
100
107
  console.log(`${GREEN}✅ copilot-api is ready on port ${COPILOT_API_PORT}${RESET}`);
101
108
  }
102
109
 
110
+ // 1.5 If --max mode, pre-fetch and cache model token limits
111
+ if (isMaxMode()) {
112
+ console.log(`${CYAN}🔥 Max mode enabled — will auto-compact long conversations${RESET}`);
113
+ await fetchAndCacheModelLimits(`http://localhost:${COPILOT_API_PORT}`);
114
+ }
115
+
103
116
  // 2. Check if proxy is already running
104
117
  const proxyAlreadyRunning = await isPortInUse(PROXY_PORT);
105
118
  if (proxyAlreadyRunning) {