copilot-cursor-proxy 1.2.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -0
- package/anthropic-transforms.ts +10 -0
- package/max-mode.ts +305 -0
- package/model-routing.ts +3 -0
- package/package.json +36 -36
- package/proxy-router.ts +10 -3
- package/start.ts +13 -0
package/README.md
CHANGED
|
@@ -28,6 +28,14 @@ cd copilot-for-cursor
|
|
|
28
28
|
bun run start.ts
|
|
29
29
|
```
|
|
30
30
|
|
|
31
|
+
### Enable Max Mode (auto-compact long conversations)
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
bun run start.ts --max
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
> **Max mode** automatically compacts conversation history when the estimated token count exceeds 80% of the model's input token limit. It summarizes older messages into a structured summary while keeping the most recent messages intact — letting you have much longer coding sessions without hitting token limits.
|
|
38
|
+
|
|
31
39
|
### Then start an HTTPS tunnel
|
|
32
40
|
|
|
33
41
|
Cursor requires HTTPS. In a second terminal:
|
|
@@ -66,6 +74,10 @@ Cursor → (HTTPS tunnel) → proxy-router (:4142) → copilot-api (:4141) → G
|
|
|
66
74
|
| `stream-proxy.ts` | Streaming passthrough with chunk logging and error detection |
|
|
67
75
|
| `debug-logger.ts` | Request/response debug logging helpers |
|
|
68
76
|
| `start.ts` | One-command launcher for copilot-api + proxy-router |
|
|
77
|
+
| `max-mode.ts` | Auto-compaction for long conversations (`--max` flag) |
|
|
78
|
+
| `usage-db.ts` | Persistent request/token usage tracking |
|
|
79
|
+
| `auth-config.ts` | API key generation, validation, and config persistence |
|
|
80
|
+
| `upstream-auth.ts` | Upstream copilot-api authentication and key management |
|
|
69
81
|
|
|
70
82
|
---
|
|
71
83
|
|
|
@@ -139,6 +151,7 @@ Cursor → (HTTPS tunnel) → proxy-router (:4142) → copilot-api (:4141) → G
|
|
|
139
151
|
* **💻 Terminal:** `Shell` (run commands)
|
|
140
152
|
* **🔍 Search:** `Grep`, `Glob`, `SemanticSearch`
|
|
141
153
|
* **🔌 MCP Tools:** External tools (Neon, Playwright, etc.)
|
|
154
|
+
* **🗜️ Max Mode:** Auto-compact long conversations to stay within token limits (`--max`)
|
|
142
155
|
|
|
143
156
|
---
|
|
144
157
|
|
|
@@ -187,6 +200,7 @@ Three tabs:
|
|
|
187
200
|
| Plan mode | ✅ Works |
|
|
188
201
|
| Agent mode | ✅ Works |
|
|
189
202
|
| All GPT-5.x models | ✅ Works |
|
|
203
|
+
| Max mode (long session compaction) | ✅ Works (`--max` flag) |
|
|
190
204
|
| Extended thinking (chain-of-thought) | ❌ Stripped |
|
|
191
205
|
| Prompt caching (`cache_control`) | ❌ Stripped |
|
|
192
206
|
| Claude Vision | ❌ Not supported via Copilot |
|
|
@@ -208,6 +222,9 @@ The proxy auto-routes these. Make sure you're running the latest version.
|
|
|
208
222
|
**"connection refused":**
|
|
209
223
|
Ensure services are running: `bun run start.ts` or check `http://localhost:4142`.
|
|
210
224
|
|
|
225
|
+
**Max mode not compacting:**
|
|
226
|
+
Compaction only triggers when estimated tokens exceed 80% of the model's limit and there are at least 15 messages. Check the console log for `🗜️ Max mode` messages.
|
|
227
|
+
|
|
211
228
|
---
|
|
212
229
|
|
|
213
230
|
> ⚠️ **DISCLAIMER:** This project is **unofficial** and for **educational purposes only**. It interacts with undocumented internal APIs of GitHub Copilot and Cursor. Use at your own risk. The authors are not affiliated with GitHub, Microsoft, or Anysphere (Cursor). Please use your API credits responsibly and in accordance with the provider's Terms of Service.
|
package/anthropic-transforms.ts
CHANGED
|
@@ -121,6 +121,16 @@ const transformMessages = (json: any, isClaude: boolean): void => {
|
|
|
121
121
|
}
|
|
122
122
|
}
|
|
123
123
|
|
|
124
|
+
// Preserve any existing OpenAI-format tool_calls on the message
|
|
125
|
+
// (hybrid format: content is array but tool_calls are separate)
|
|
126
|
+
if (msg.tool_calls && Array.isArray(msg.tool_calls)) {
|
|
127
|
+
for (const tc of msg.tool_calls) {
|
|
128
|
+
if (!toolCalls.some(t => t.id === tc.id)) {
|
|
129
|
+
toolCalls.push(tc);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
124
134
|
const assistantMsg: any = { role: 'assistant' };
|
|
125
135
|
assistantMsg.content = textParts.join('\n') || null;
|
|
126
136
|
if (toolCalls.length > 0) assistantMsg.tool_calls = toolCalls;
|
package/max-mode.ts
ADDED
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
import { getUpstreamAuthHeader } from './upstream-auth';
|
|
2
|
+
import { needsResponsesAPI } from './model-routing';
|
|
3
|
+
|
|
4
|
+
// ── Global config ─────────────────────────────────────────────────────────────
|
|
5
|
+
let maxModeEnabled = false;
|
|
6
|
+
|
|
7
|
+
export function enableMaxMode(): void {
|
|
8
|
+
maxModeEnabled = true;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function isMaxMode(): boolean {
|
|
12
|
+
return maxModeEnabled;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
// ── Model token limits cache ──────────────────────────────────────────────────
|
|
16
|
+
interface ModelLimits {
|
|
17
|
+
maxInputTokens: number;
|
|
18
|
+
maxOutputTokens: number;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const modelLimitsCache = new Map<string, ModelLimits>();
|
|
22
|
+
|
|
23
|
+
// Fallback defaults — only used when upstream /v1/models doesn't return capabilities.limits.
|
|
24
|
+
// Real limits are fetched dynamically from the copilot-api at startup via fetchAndCacheModelLimits().
|
|
25
|
+
// Output token values: Claude 64K (Sonnet 3.5/4 extended), GPT-4/5 16K, o1/o3 100K reasoning.
|
|
26
|
+
const DEFAULT_LIMITS: Record<string, ModelLimits> = {
|
|
27
|
+
'claude': { maxInputTokens: 200000, maxOutputTokens: 64000 },
|
|
28
|
+
'gpt-4': { maxInputTokens: 128000, maxOutputTokens: 16384 },
|
|
29
|
+
'gpt-5': { maxInputTokens: 128000, maxOutputTokens: 16384 },
|
|
30
|
+
'o1': { maxInputTokens: 200000, maxOutputTokens: 100000 },
|
|
31
|
+
'o3': { maxInputTokens: 200000, maxOutputTokens: 100000 },
|
|
32
|
+
'default': { maxInputTokens: 128000, maxOutputTokens: 16384 }, // conservative general-purpose fallback
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
function getDefaultLimits(model: string): ModelLimits {
|
|
36
|
+
const lower = model.toLowerCase();
|
|
37
|
+
for (const [prefix, limits] of Object.entries(DEFAULT_LIMITS)) {
|
|
38
|
+
if (prefix !== 'default' && lower.includes(prefix)) return limits;
|
|
39
|
+
}
|
|
40
|
+
return DEFAULT_LIMITS['default'];
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export async function fetchAndCacheModelLimits(targetUrl: string): Promise<void> {
|
|
44
|
+
try {
|
|
45
|
+
const resp = await fetch(new URL('/v1/models', targetUrl).toString(), {
|
|
46
|
+
headers: { 'Authorization': getUpstreamAuthHeader() },
|
|
47
|
+
signal: AbortSignal.timeout(10000),
|
|
48
|
+
});
|
|
49
|
+
if (!resp.ok) return;
|
|
50
|
+
const data = await resp.json() as any;
|
|
51
|
+
if (!data.data || !Array.isArray(data.data)) return;
|
|
52
|
+
|
|
53
|
+
for (const model of data.data) {
|
|
54
|
+
const limits = model.capabilities?.limits;
|
|
55
|
+
if (limits) {
|
|
56
|
+
modelLimitsCache.set(model.id, {
|
|
57
|
+
maxInputTokens: limits.max_prompt_tokens || limits.max_input_tokens || getDefaultLimits(model.id).maxInputTokens,
|
|
58
|
+
maxOutputTokens: limits.max_output_tokens || getDefaultLimits(model.id).maxOutputTokens,
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
console.log(`📋 Max mode: cached token limits for ${modelLimitsCache.size} models`);
|
|
63
|
+
for (const [id, lim] of modelLimitsCache) {
|
|
64
|
+
console.log(` ${id}: input=${lim.maxInputTokens}, output=${lim.maxOutputTokens}`);
|
|
65
|
+
}
|
|
66
|
+
} catch (e: any) {
|
|
67
|
+
console.warn(`⚠️ Max mode: failed to fetch model limits: ${e?.message || e}`);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export function getModelLimits(model: string): ModelLimits {
|
|
72
|
+
return modelLimitsCache.get(model) || getDefaultLimits(model);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// ── Token estimation ──────────────────────────────────────────────────────────
|
|
76
|
+
// Simple char/4 heuristic — fast, zero-dependency, ~80% accurate for English.
|
|
77
|
+
// For mixed CJK content each character ≈ 1-2 tokens, so we use a blended ratio.
|
|
78
|
+
|
|
79
|
+
function estimateTokens(text: string): number {
|
|
80
|
+
if (!text) return 0;
|
|
81
|
+
// rough estimate: ascii chars / 4, non-ascii chars / 1.5
|
|
82
|
+
let ascii = 0, nonAscii = 0;
|
|
83
|
+
for (let i = 0; i < text.length; i++) {
|
|
84
|
+
if (text.charCodeAt(i) < 128) ascii++;
|
|
85
|
+
else nonAscii++;
|
|
86
|
+
}
|
|
87
|
+
return Math.ceil(ascii / 4 + nonAscii / 1.5);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function estimateMessagesTokens(messages: any[]): number {
|
|
91
|
+
let total = 0;
|
|
92
|
+
for (const msg of messages) {
|
|
93
|
+
// role overhead
|
|
94
|
+
total += 4;
|
|
95
|
+
if (typeof msg.content === 'string') {
|
|
96
|
+
total += estimateTokens(msg.content);
|
|
97
|
+
} else if (Array.isArray(msg.content)) {
|
|
98
|
+
for (const part of msg.content) {
|
|
99
|
+
if (part.type === 'text') total += estimateTokens(part.text || '');
|
|
100
|
+
else total += estimateTokens(JSON.stringify(part));
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
// tool calls overhead
|
|
104
|
+
if (msg.tool_calls) {
|
|
105
|
+
total += estimateTokens(JSON.stringify(msg.tool_calls));
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
return total;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// ── Helpers ───────────────────────────────────────────────────────────────────
|
|
112
|
+
function truncateContent(content: string, maxChars: number): string {
|
|
113
|
+
if (content.length <= maxChars) return content;
|
|
114
|
+
return content.slice(0, maxChars) + '\n... [truncated]';
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function extractResponsesTextContent(data: any): string {
|
|
118
|
+
const outputMessages = (data.output || []).filter((item: any) =>
|
|
119
|
+
item.type === 'message' && Array.isArray(item.content)
|
|
120
|
+
);
|
|
121
|
+
const textParts = outputMessages
|
|
122
|
+
.flatMap((item: any) => item.content)
|
|
123
|
+
.filter((part: any) => part.type === 'output_text');
|
|
124
|
+
if (textParts.length === 0) {
|
|
125
|
+
console.warn('⚠️ Max mode: Responses summarization returned no output_text parts');
|
|
126
|
+
}
|
|
127
|
+
return textParts.map((part: any) => part.text).join('');
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// ── Summarization prompt ──────────────────────────────────────────────────────
|
|
131
|
+
// Inspired by claude-code/opencode compaction prompts, adapted for proxy use.
|
|
132
|
+
const SUMMARIZATION_PROMPT = `Your task is to create a detailed summary of the conversation so far, paying close attention to the user's explicit requests and the assistant's previous actions.
|
|
133
|
+
|
|
134
|
+
Analyze each message chronologically and identify:
|
|
135
|
+
- The user's primary goals and requests
|
|
136
|
+
- Key technical concepts and decisions
|
|
137
|
+
- Files and code sections discussed or modified
|
|
138
|
+
- Problems encountered and solutions applied
|
|
139
|
+
- The current state of work in progress
|
|
140
|
+
|
|
141
|
+
Your summary MUST:
|
|
142
|
+
1. Preserve all file paths, function names, variable names, and code snippets mentioned
|
|
143
|
+
2. Retain exact error messages and their resolutions
|
|
144
|
+
3. Capture the user's original intent and any refinements
|
|
145
|
+
4. Note what has been completed vs what remains to be done
|
|
146
|
+
5. Include enough technical detail to continue the conversation seamlessly
|
|
147
|
+
|
|
148
|
+
Format as a structured summary, not a conversation replay. Be concise but do NOT omit any technical details that would be needed to continue the work.`;
|
|
149
|
+
|
|
150
|
+
// ── Compaction logic ──────────────────────────────────────────────────────────
|
|
151
|
+
// Threshold: compact when estimated input tokens exceed this fraction of model max
|
|
152
|
+
const COMPACT_THRESHOLD = 0.80;
|
|
153
|
+
// Keep the most recent N messages untouched to preserve immediate context
|
|
154
|
+
const KEEP_RECENT_MESSAGES = 10;
|
|
155
|
+
// Never compact if total messages are below this count
|
|
156
|
+
const MIN_MESSAGES_FOR_COMPACTION = 15;
|
|
157
|
+
// Minimum old messages worth summarizing (below this, compaction is skipped)
|
|
158
|
+
const MIN_MESSAGES_TO_SUMMARIZE = 3;
|
|
159
|
+
// Max characters per individual message when building the summarization input
|
|
160
|
+
const MAX_MESSAGE_CHARS_FOR_SUMMARY = 8000;
|
|
161
|
+
// Acknowledgment message inserted after the summary to maintain conversation flow
|
|
162
|
+
const SUMMARY_ACKNOWLEDGMENT = 'Understood. I have the full context from the conversation summary. Let me continue.';
|
|
163
|
+
|
|
164
|
+
export async function compactIfNeeded(
|
|
165
|
+
json: any,
|
|
166
|
+
targetModel: string,
|
|
167
|
+
targetUrl: string,
|
|
168
|
+
): Promise<any> {
|
|
169
|
+
if (!maxModeEnabled) return json;
|
|
170
|
+
if (!json.messages || !Array.isArray(json.messages) || json.messages.length < MIN_MESSAGES_FOR_COMPACTION) {
|
|
171
|
+
return json;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const limits = getModelLimits(targetModel);
|
|
175
|
+
const estimated = estimateMessagesTokens(json.messages);
|
|
176
|
+
const threshold = Math.floor(limits.maxInputTokens * COMPACT_THRESHOLD);
|
|
177
|
+
|
|
178
|
+
if (estimated <= threshold) {
|
|
179
|
+
return json;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
console.log(`🗜️ Max mode: estimated ${estimated} tokens exceeds ${COMPACT_THRESHOLD * 100}% of ${limits.maxInputTokens} — compacting`);
|
|
183
|
+
|
|
184
|
+
// Split: system messages + old messages to summarize + recent messages to keep
|
|
185
|
+
const systemMsgs = json.messages.filter((m: any) => m.role === 'system');
|
|
186
|
+
const nonSystemMsgs = json.messages.filter((m: any) => m.role !== 'system');
|
|
187
|
+
// Keep at most half of non-system messages to ensure there's enough old content to summarize
|
|
188
|
+
const keepCount = Math.min(KEEP_RECENT_MESSAGES, Math.floor(nonSystemMsgs.length / 2));
|
|
189
|
+
const recentMsgs = nonSystemMsgs.slice(-keepCount);
|
|
190
|
+
const oldMsgs = nonSystemMsgs.slice(0, -keepCount);
|
|
191
|
+
|
|
192
|
+
if (oldMsgs.length < MIN_MESSAGES_TO_SUMMARIZE) return json; // nothing meaningful to compact
|
|
193
|
+
|
|
194
|
+
try {
|
|
195
|
+
const summary = await callSummarize(targetModel, oldMsgs, targetUrl);
|
|
196
|
+
if (!summary) return json; // summarization failed, pass through
|
|
197
|
+
|
|
198
|
+
console.log(`🗜️ Max mode: compacted ${oldMsgs.length} messages → 1 summary (${estimateTokens(summary)} est. tokens)`);
|
|
199
|
+
|
|
200
|
+
// Rebuild messages: system + summary-as-user-message + recent
|
|
201
|
+
json.messages = [
|
|
202
|
+
...systemMsgs,
|
|
203
|
+
{ role: 'user', content: `[Conversation Summary]\n${summary}` },
|
|
204
|
+
{ role: 'assistant', content: SUMMARY_ACKNOWLEDGMENT },
|
|
205
|
+
...recentMsgs,
|
|
206
|
+
];
|
|
207
|
+
|
|
208
|
+
return json;
|
|
209
|
+
} catch (e: any) {
|
|
210
|
+
console.error(`❌ Max mode: compaction failed, passing through original:`, e?.message || e);
|
|
211
|
+
return json;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
async function callSummarize(model: string, messages: any[], targetUrl: string): Promise<string | null> {
|
|
216
|
+
const conversationText = messages.map(m => {
|
|
217
|
+
const content = typeof m.content === 'string'
|
|
218
|
+
? m.content
|
|
219
|
+
: Array.isArray(m.content)
|
|
220
|
+
? m.content.map((p: any) => p.text || JSON.stringify(p)).join('\n')
|
|
221
|
+
: JSON.stringify(m.content);
|
|
222
|
+
const role = m.role || 'unknown';
|
|
223
|
+
const truncated = truncateContent(content, MAX_MESSAGE_CHARS_FOR_SUMMARY);
|
|
224
|
+
return `[${role}]: ${truncated}`;
|
|
225
|
+
}).join('\n\n');
|
|
226
|
+
|
|
227
|
+
console.log(`🗜️ Max mode: sending summarization request (${messages.length} messages → ${model})`);
|
|
228
|
+
|
|
229
|
+
if (needsResponsesAPI(model)) {
|
|
230
|
+
const responsesUrl = new URL('/v1/responses', targetUrl);
|
|
231
|
+
const responsesBody = JSON.stringify({
|
|
232
|
+
model,
|
|
233
|
+
instructions: SUMMARIZATION_PROMPT,
|
|
234
|
+
input: `Please summarize the following conversation:\n\n${conversationText}`,
|
|
235
|
+
max_output_tokens: 4096,
|
|
236
|
+
temperature: 0.2,
|
|
237
|
+
stream: false,
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
const resp = await fetch(responsesUrl.toString(), {
|
|
241
|
+
method: 'POST',
|
|
242
|
+
headers: {
|
|
243
|
+
'Content-Type': 'application/json',
|
|
244
|
+
'Authorization': getUpstreamAuthHeader(),
|
|
245
|
+
},
|
|
246
|
+
body: responsesBody,
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
if (!resp.ok) {
|
|
250
|
+
const errText = await resp.text();
|
|
251
|
+
console.error(`❌ Max mode summarization failed (${resp.status}):`, errText.slice(0, 500));
|
|
252
|
+
return null;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
const data = await resp.json() as any;
|
|
256
|
+
const content = extractResponsesTextContent(data);
|
|
257
|
+
|
|
258
|
+
if (content) {
|
|
259
|
+
console.log(`🗜️ Max mode: summarization complete (${estimateTokens(content)} est. tokens)`);
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
return content || null;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
const summarizeMessages = [
|
|
266
|
+
{ role: 'system', content: SUMMARIZATION_PROMPT },
|
|
267
|
+
{
|
|
268
|
+
role: 'user',
|
|
269
|
+
content: `Please summarize the following conversation:\n\n${conversationText}`,
|
|
270
|
+
},
|
|
271
|
+
];
|
|
272
|
+
|
|
273
|
+
const chatBody = JSON.stringify({
|
|
274
|
+
model,
|
|
275
|
+
messages: summarizeMessages,
|
|
276
|
+
max_tokens: 4096,
|
|
277
|
+
temperature: 0.2,
|
|
278
|
+
stream: false,
|
|
279
|
+
});
|
|
280
|
+
|
|
281
|
+
const chatUrl = new URL('/v1/chat/completions', targetUrl);
|
|
282
|
+
const resp = await fetch(chatUrl.toString(), {
|
|
283
|
+
method: 'POST',
|
|
284
|
+
headers: {
|
|
285
|
+
'Content-Type': 'application/json',
|
|
286
|
+
'Authorization': getUpstreamAuthHeader(),
|
|
287
|
+
},
|
|
288
|
+
body: chatBody,
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
if (!resp.ok) {
|
|
292
|
+
const errText = await resp.text();
|
|
293
|
+
console.error(`❌ Max mode summarization failed (${resp.status}):`, errText.slice(0, 500));
|
|
294
|
+
return null;
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
const data = await resp.json() as any;
|
|
298
|
+
const content = data.choices?.[0]?.message?.content;
|
|
299
|
+
|
|
300
|
+
if (content) {
|
|
301
|
+
console.log(`🗜️ Max mode: summarization complete (${estimateTokens(content)} est. tokens)`);
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
return content || null;
|
|
305
|
+
}
|
package/model-routing.ts
ADDED
package/package.json
CHANGED
|
@@ -1,36 +1,36 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "copilot-cursor-proxy",
|
|
3
|
-
"version": "1.2.
|
|
4
|
-
"description": "Proxy that bridges GitHub Copilot API to Cursor IDE — translates Anthropic format, bridges Responses API for GPT 5.x, and more",
|
|
5
|
-
"bin": {
|
|
6
|
-
"copilot-cursor-proxy": "bin/cli.js"
|
|
7
|
-
},
|
|
8
|
-
"files": [
|
|
9
|
-
"bin/",
|
|
10
|
-
"*.ts",
|
|
11
|
-
"dashboard.html",
|
|
12
|
-
"README.md"
|
|
13
|
-
],
|
|
14
|
-
"scripts": {
|
|
15
|
-
"build": "bun build start.ts
|
|
16
|
-
"dev": "bun run start.ts",
|
|
17
|
-
"start": "bun dist/start.js"
|
|
18
|
-
},
|
|
19
|
-
"keywords": [
|
|
20
|
-
"copilot",
|
|
21
|
-
"cursor",
|
|
22
|
-
"proxy",
|
|
23
|
-
"anthropic",
|
|
24
|
-
"openai",
|
|
25
|
-
"responses-api"
|
|
26
|
-
],
|
|
27
|
-
"license": "MIT",
|
|
28
|
-
"repository": {
|
|
29
|
-
"type": "git",
|
|
30
|
-
"url": "git+https://github.com/CharlesYWL/copilot-for-cursor.git"
|
|
31
|
-
},
|
|
32
|
-
"engines": {
|
|
33
|
-
"node": ">=18",
|
|
34
|
-
"bun": ">=1.0"
|
|
35
|
-
}
|
|
36
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"name": "copilot-cursor-proxy",
|
|
3
|
+
"version": "1.2.1",
|
|
4
|
+
"description": "Proxy that bridges GitHub Copilot API to Cursor IDE — translates Anthropic format, bridges Responses API for GPT 5.x, and more",
|
|
5
|
+
"bin": {
|
|
6
|
+
"copilot-cursor-proxy": "bin/cli.js"
|
|
7
|
+
},
|
|
8
|
+
"files": [
|
|
9
|
+
"bin/",
|
|
10
|
+
"*.ts",
|
|
11
|
+
"dashboard.html",
|
|
12
|
+
"README.md"
|
|
13
|
+
],
|
|
14
|
+
"scripts": {
|
|
15
|
+
"build": "bun build start.ts --outdir dist --target node",
|
|
16
|
+
"dev": "bun run start.ts",
|
|
17
|
+
"start": "bun dist/start.js"
|
|
18
|
+
},
|
|
19
|
+
"keywords": [
|
|
20
|
+
"copilot",
|
|
21
|
+
"cursor",
|
|
22
|
+
"proxy",
|
|
23
|
+
"anthropic",
|
|
24
|
+
"openai",
|
|
25
|
+
"responses-api"
|
|
26
|
+
],
|
|
27
|
+
"license": "MIT",
|
|
28
|
+
"repository": {
|
|
29
|
+
"type": "git",
|
|
30
|
+
"url": "git+https://github.com/CharlesYWL/copilot-for-cursor.git"
|
|
31
|
+
},
|
|
32
|
+
"engines": {
|
|
33
|
+
"node": ">=18",
|
|
34
|
+
"bun": ">=1.0"
|
|
35
|
+
}
|
|
36
|
+
}
|
package/proxy-router.ts
CHANGED
|
@@ -5,6 +5,8 @@ import { logIncomingRequest, logTransformedRequest } from './debug-logger';
|
|
|
5
5
|
import { addRequestLog, getNextRequestId, getUsageStats, flushToDisk, type RequestLog } from './usage-db';
|
|
6
6
|
import { loadAuthConfig, saveAuthConfig, generateApiKey, validateApiKey } from './auth-config';
|
|
7
7
|
import { getUpstreamAuthHeader, getUpstreamApiKeys, createUpstreamApiKey, deleteUpstreamApiKey } from './upstream-auth';
|
|
8
|
+
import { compactIfNeeded, isMaxMode } from './max-mode';
|
|
9
|
+
import { needsResponsesAPI } from './model-routing';
|
|
8
10
|
|
|
9
11
|
// ── Console capture for SSE streaming ─────────────────────────────────────────
|
|
10
12
|
interface ConsoleLine {
|
|
@@ -271,19 +273,24 @@ Bun.serve({
|
|
|
271
273
|
|
|
272
274
|
logTransformedRequest(json);
|
|
273
275
|
|
|
276
|
+
// ── Max mode: compact long conversations before sending ───────────
|
|
277
|
+
if (isMaxMode()) {
|
|
278
|
+
json = await compactIfNeeded(json, targetModel, TARGET_URL);
|
|
279
|
+
}
|
|
280
|
+
|
|
274
281
|
const headers = new Headers(req.headers);
|
|
275
282
|
headers.set("host", targetUrl.host);
|
|
276
283
|
headers.set("authorization", getUpstreamAuthHeader());
|
|
277
284
|
|
|
278
|
-
const
|
|
285
|
+
const shouldUseResponsesAPI = needsResponsesAPI(targetModel);
|
|
279
286
|
|
|
280
|
-
if (
|
|
287
|
+
if (shouldUseResponsesAPI && json.max_tokens) {
|
|
281
288
|
json.max_completion_tokens = json.max_tokens;
|
|
282
289
|
delete json.max_tokens;
|
|
283
290
|
console.log(`🔧 Converted max_tokens → max_completion_tokens`);
|
|
284
291
|
}
|
|
285
292
|
|
|
286
|
-
if (
|
|
293
|
+
if (shouldUseResponsesAPI) {
|
|
287
294
|
console.log(`🔀 Model ${targetModel} — using Responses API bridge`);
|
|
288
295
|
const chatId = `chatcmpl-proxy-${++responseCounter}`;
|
|
289
296
|
try {
|
package/start.ts
CHANGED
|
@@ -7,6 +7,13 @@
|
|
|
7
7
|
import { spawn, sleep } from 'bun';
|
|
8
8
|
import { existsSync } from 'fs';
|
|
9
9
|
import { getUpstreamAuthHeader } from './upstream-auth';
|
|
10
|
+
import { enableMaxMode, isMaxMode, fetchAndCacheModelLimits } from './max-mode';
|
|
11
|
+
|
|
12
|
+
// ── Parse CLI flags ──────────────────────────────────────────────────────────
|
|
13
|
+
const args = process.argv.slice(2);
|
|
14
|
+
if (args.includes('--max')) {
|
|
15
|
+
enableMaxMode();
|
|
16
|
+
}
|
|
10
17
|
|
|
11
18
|
const COPILOT_API_PORT = 4141;
|
|
12
19
|
const PROXY_PORT = 4142;
|
|
@@ -100,6 +107,12 @@ async function main() {
|
|
|
100
107
|
console.log(`${GREEN}✅ copilot-api is ready on port ${COPILOT_API_PORT}${RESET}`);
|
|
101
108
|
}
|
|
102
109
|
|
|
110
|
+
// 1.5 If --max mode, pre-fetch and cache model token limits
|
|
111
|
+
if (isMaxMode()) {
|
|
112
|
+
console.log(`${CYAN}🔥 Max mode enabled — will auto-compact long conversations${RESET}`);
|
|
113
|
+
await fetchAndCacheModelLimits(`http://localhost:${COPILOT_API_PORT}`);
|
|
114
|
+
}
|
|
115
|
+
|
|
103
116
|
// 2. Check if proxy is already running
|
|
104
117
|
const proxyAlreadyRunning = await isPortInUse(PROXY_PORT);
|
|
105
118
|
if (proxyAlreadyRunning) {
|