npm - copilot-cursor-proxy - Versions diffs - 1.2.0 → 1.2.1 - Mend

copilot-cursor-proxy 1.2.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md CHANGED Viewed

@@ -28,6 +28,14 @@ cd copilot-for-cursor
 bun run start.ts
 ```
+### Enable Max Mode (auto-compact long conversations)
+```bash
+bun run start.ts --max
+```
+> **Max mode** automatically compacts conversation history when the estimated token count exceeds 80% of the model's input token limit. It summarizes older messages into a structured summary while keeping the most recent messages intact — letting you have much longer coding sessions without hitting token limits.
 ### Then start an HTTPS tunnel
 Cursor requires HTTPS. In a second terminal:
@@ -66,6 +74,10 @@ Cursor → (HTTPS tunnel) → proxy-router (:4142) → copilot-api (:4141) → G
 | `stream-proxy.ts` | Streaming passthrough with chunk logging and error detection |
 | `debug-logger.ts` | Request/response debug logging helpers |
 | `start.ts` | One-command launcher for copilot-api + proxy-router |
+| `max-mode.ts` | Auto-compaction for long conversations (`--max` flag) |
+| `usage-db.ts` | Persistent request/token usage tracking |
+| `auth-config.ts` | API key generation, validation, and config persistence |
+| `upstream-auth.ts` | Upstream copilot-api authentication and key management |
 ---
@@ -139,6 +151,7 @@ Cursor → (HTTPS tunnel) → proxy-router (:4142) → copilot-api (:4141) → G
 *   **💻 Terminal:** `Shell` (run commands)
 *   **🔍 Search:** `Grep`, `Glob`, `SemanticSearch`
 *   **🔌 MCP Tools:** External tools (Neon, Playwright, etc.)
+*   **🗜️ Max Mode:** Auto-compact long conversations to stay within token limits (`--max`)
 ---
@@ -187,6 +200,7 @@ Three tabs:
 | Plan mode | ✅ Works |
 | Agent mode | ✅ Works |
 | All GPT-5.x models | ✅ Works |
+| Max mode (long session compaction) | ✅ Works (`--max` flag) |
 | Extended thinking (chain-of-thought) | ❌ Stripped |
 | Prompt caching (`cache_control`) | ❌ Stripped |
 | Claude Vision | ❌ Not supported via Copilot |
@@ -208,6 +222,9 @@ The proxy auto-routes these. Make sure you're running the latest version.
 **"connection refused":**
 Ensure services are running: `bun run start.ts` or check `http://localhost:4142`.
+**Max mode not compacting:**
+Compaction only triggers when estimated tokens exceed 80% of the model's limit and there are at least 15 messages. Check the console log for `🗜️ Max mode` messages.
 ---
 > ⚠️ **DISCLAIMER:** This project is **unofficial** and for **educational purposes only**. It interacts with undocumented internal APIs of GitHub Copilot and Cursor. Use at your own risk. The authors are not affiliated with GitHub, Microsoft, or Anysphere (Cursor). Please use your API credits responsibly and in accordance with the provider's Terms of Service.

package/anthropic-transforms.ts CHANGED Viewed

@@ -121,6 +121,16 @@ const transformMessages = (json: any, isClaude: boolean): void => {
                 }
             }
+            // Preserve any existing OpenAI-format tool_calls on the message
+            // (hybrid format: content is array but tool_calls are separate)
+            if (msg.tool_calls && Array.isArray(msg.tool_calls)) {
+                for (const tc of msg.tool_calls) {
+                    if (!toolCalls.some(t => t.id === tc.id)) {
+                        toolCalls.push(tc);
+                    }
+                }
+            }
             const assistantMsg: any = { role: 'assistant' };
             assistantMsg.content = textParts.join('\n') || null;
             if (toolCalls.length > 0) assistantMsg.tool_calls = toolCalls;

package/max-mode.ts ADDED Viewed

@@ -0,0 +1,305 @@
+import { getUpstreamAuthHeader } from './upstream-auth';
+import { needsResponsesAPI } from './model-routing';
+// ── Global config ─────────────────────────────────────────────────────────────
+let maxModeEnabled = false;
+export function enableMaxMode(): void {
+    maxModeEnabled = true;
+}
+export function isMaxMode(): boolean {
+    return maxModeEnabled;
+}
+// ── Model token limits cache ──────────────────────────────────────────────────
+interface ModelLimits {
+    maxInputTokens: number;
+    maxOutputTokens: number;
+}
+const modelLimitsCache = new Map<string, ModelLimits>();
+// Fallback defaults — only used when upstream /v1/models doesn't return capabilities.limits.
+// Real limits are fetched dynamically from the copilot-api at startup via fetchAndCacheModelLimits().
+// Output token values: Claude 64K (Sonnet 3.5/4 extended), GPT-4/5 16K, o1/o3 100K reasoning.
+const DEFAULT_LIMITS: Record<string, ModelLimits> = {
+    'claude': { maxInputTokens: 200000, maxOutputTokens: 64000 },
+    'gpt-4': { maxInputTokens: 128000, maxOutputTokens: 16384 },
+    'gpt-5': { maxInputTokens: 128000, maxOutputTokens: 16384 },
+    'o1': { maxInputTokens: 200000, maxOutputTokens: 100000 },
+    'o3': { maxInputTokens: 200000, maxOutputTokens: 100000 },
+    'default': { maxInputTokens: 128000, maxOutputTokens: 16384 }, // conservative general-purpose fallback
+};
+function getDefaultLimits(model: string): ModelLimits {
+    const lower = model.toLowerCase();
+    for (const [prefix, limits] of Object.entries(DEFAULT_LIMITS)) {
+        if (prefix !== 'default' && lower.includes(prefix)) return limits;
+    }
+    return DEFAULT_LIMITS['default'];
+}
+export async function fetchAndCacheModelLimits(targetUrl: string): Promise<void> {
+    try {
+        const resp = await fetch(new URL('/v1/models', targetUrl).toString(), {
+            headers: { 'Authorization': getUpstreamAuthHeader() },
+            signal: AbortSignal.timeout(10000),
+        });
+        if (!resp.ok) return;
+        const data = await resp.json() as any;
+        if (!data.data || !Array.isArray(data.data)) return;
+        for (const model of data.data) {
+            const limits = model.capabilities?.limits;
+            if (limits) {
+                modelLimitsCache.set(model.id, {
+                    maxInputTokens: limits.max_prompt_tokens || limits.max_input_tokens || getDefaultLimits(model.id).maxInputTokens,
+                    maxOutputTokens: limits.max_output_tokens || getDefaultLimits(model.id).maxOutputTokens,
+                });
+            }
+        }
+        console.log(`📋 Max mode: cached token limits for ${modelLimitsCache.size} models`);
+        for (const [id, lim] of modelLimitsCache) {
+            console.log(`   ${id}: input=${lim.maxInputTokens}, output=${lim.maxOutputTokens}`);
+        }
+    } catch (e: any) {
+        console.warn(`⚠️ Max mode: failed to fetch model limits: ${e?.message || e}`);
+    }
+}
+export function getModelLimits(model: string): ModelLimits {
+    return modelLimitsCache.get(model) || getDefaultLimits(model);
+}
+// ── Token estimation ──────────────────────────────────────────────────────────
+// Simple char/4 heuristic — fast, zero-dependency, ~80% accurate for English.
+// For mixed CJK content each character ≈ 1-2 tokens, so we use a blended ratio.
+function estimateTokens(text: string): number {
+    if (!text) return 0;
+    // rough estimate: ascii chars / 4, non-ascii chars / 1.5
+    let ascii = 0, nonAscii = 0;
+    for (let i = 0; i < text.length; i++) {
+        if (text.charCodeAt(i) < 128) ascii++;
+        else nonAscii++;
+    }
+    return Math.ceil(ascii / 4 + nonAscii / 1.5);
+}
+function estimateMessagesTokens(messages: any[]): number {
+    let total = 0;
+    for (const msg of messages) {
+        // role overhead
+        total += 4;
+        if (typeof msg.content === 'string') {
+            total += estimateTokens(msg.content);
+        } else if (Array.isArray(msg.content)) {
+            for (const part of msg.content) {
+                if (part.type === 'text') total += estimateTokens(part.text || '');
+                else total += estimateTokens(JSON.stringify(part));
+            }
+        }
+        // tool calls overhead
+        if (msg.tool_calls) {
+            total += estimateTokens(JSON.stringify(msg.tool_calls));
+        }
+    }
+    return total;
+}
+// ── Helpers ───────────────────────────────────────────────────────────────────
+function truncateContent(content: string, maxChars: number): string {
+    if (content.length <= maxChars) return content;
+    return content.slice(0, maxChars) + '\n... [truncated]';
+}
+function extractResponsesTextContent(data: any): string {
+    const outputMessages = (data.output || []).filter((item: any) =>
+        item.type === 'message' && Array.isArray(item.content)
+    );
+    const textParts = outputMessages
+        .flatMap((item: any) => item.content)
+        .filter((part: any) => part.type === 'output_text');
+    if (textParts.length === 0) {
+        console.warn('⚠️ Max mode: Responses summarization returned no output_text parts');
+    }
+    return textParts.map((part: any) => part.text).join('');
+}
+// ── Summarization prompt ──────────────────────────────────────────────────────
+// Inspired by claude-code/opencode compaction prompts, adapted for proxy use.
+const SUMMARIZATION_PROMPT = `Your task is to create a detailed summary of the conversation so far, paying close attention to the user's explicit requests and the assistant's previous actions.
+Analyze each message chronologically and identify:
+- The user's primary goals and requests
+- Key technical concepts and decisions
+- Files and code sections discussed or modified
+- Problems encountered and solutions applied
+- The current state of work in progress
+Your summary MUST:
+1. Preserve all file paths, function names, variable names, and code snippets mentioned
+2. Retain exact error messages and their resolutions
+3. Capture the user's original intent and any refinements
+4. Note what has been completed vs what remains to be done
+5. Include enough technical detail to continue the conversation seamlessly
+Format as a structured summary, not a conversation replay. Be concise but do NOT omit any technical details that would be needed to continue the work.`;
+// ── Compaction logic ──────────────────────────────────────────────────────────
+// Threshold: compact when estimated input tokens exceed this fraction of model max
+const COMPACT_THRESHOLD = 0.80;
+// Keep the most recent N messages untouched to preserve immediate context
+const KEEP_RECENT_MESSAGES = 10;
+// Never compact if total messages are below this count
+const MIN_MESSAGES_FOR_COMPACTION = 15;
+// Minimum old messages worth summarizing (below this, compaction is skipped)
+const MIN_MESSAGES_TO_SUMMARIZE = 3;
+// Max characters per individual message when building the summarization input
+const MAX_MESSAGE_CHARS_FOR_SUMMARY = 8000;
+// Acknowledgment message inserted after the summary to maintain conversation flow
+const SUMMARY_ACKNOWLEDGMENT = 'Understood. I have the full context from the conversation summary. Let me continue.';
+export async function compactIfNeeded(
+    json: any,
+    targetModel: string,
+    targetUrl: string,
+): Promise<any> {
+    if (!maxModeEnabled) return json;
+    if (!json.messages || !Array.isArray(json.messages) || json.messages.length < MIN_MESSAGES_FOR_COMPACTION) {
+        return json;
+    }
+    const limits = getModelLimits(targetModel);
+    const estimated = estimateMessagesTokens(json.messages);
+    const threshold = Math.floor(limits.maxInputTokens * COMPACT_THRESHOLD);
+    if (estimated <= threshold) {
+        return json;
+    }
+    console.log(`🗜️ Max mode: estimated ${estimated} tokens exceeds ${COMPACT_THRESHOLD * 100}% of ${limits.maxInputTokens} — compacting`);
+    // Split: system messages + old messages to summarize + recent messages to keep
+    const systemMsgs = json.messages.filter((m: any) => m.role === 'system');
+    const nonSystemMsgs = json.messages.filter((m: any) => m.role !== 'system');
+    // Keep at most half of non-system messages to ensure there's enough old content to summarize
+    const keepCount = Math.min(KEEP_RECENT_MESSAGES, Math.floor(nonSystemMsgs.length / 2));
+    const recentMsgs = nonSystemMsgs.slice(-keepCount);
+    const oldMsgs = nonSystemMsgs.slice(0, -keepCount);
+    if (oldMsgs.length < MIN_MESSAGES_TO_SUMMARIZE) return json; // nothing meaningful to compact
+    try {
+        const summary = await callSummarize(targetModel, oldMsgs, targetUrl);
+        if (!summary) return json; // summarization failed, pass through
+        console.log(`🗜️ Max mode: compacted ${oldMsgs.length} messages → 1 summary (${estimateTokens(summary)} est. tokens)`);
+        // Rebuild messages: system + summary-as-user-message + recent
+        json.messages = [
+            ...systemMsgs,
+            { role: 'user', content: `[Conversation Summary]\n${summary}` },
+            { role: 'assistant', content: SUMMARY_ACKNOWLEDGMENT },
+            ...recentMsgs,
+        ];
+        return json;
+    } catch (e: any) {
+        console.error(`❌ Max mode: compaction failed, passing through original:`, e?.message || e);
+        return json;
+    }
+}
+async function callSummarize(model: string, messages: any[], targetUrl: string): Promise<string | null> {
+    const conversationText = messages.map(m => {
+        const content = typeof m.content === 'string'
+            ? m.content
+            : Array.isArray(m.content)
+                ? m.content.map((p: any) => p.text || JSON.stringify(p)).join('\n')
+                : JSON.stringify(m.content);
+        const role = m.role || 'unknown';
+        const truncated = truncateContent(content, MAX_MESSAGE_CHARS_FOR_SUMMARY);
+        return `[${role}]: ${truncated}`;
+    }).join('\n\n');
+    console.log(`🗜️ Max mode: sending summarization request (${messages.length} messages → ${model})`);
+    if (needsResponsesAPI(model)) {
+        const responsesUrl = new URL('/v1/responses', targetUrl);
+        const responsesBody = JSON.stringify({
+            model,
+            instructions: SUMMARIZATION_PROMPT,
+            input: `Please summarize the following conversation:\n\n${conversationText}`,
+            max_output_tokens: 4096,
+            temperature: 0.2,
+            stream: false,
+        });
+        const resp = await fetch(responsesUrl.toString(), {
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/json',
+                'Authorization': getUpstreamAuthHeader(),
+            },
+            body: responsesBody,
+        });
+        if (!resp.ok) {
+            const errText = await resp.text();
+            console.error(`❌ Max mode summarization failed (${resp.status}):`, errText.slice(0, 500));
+            return null;
+        }
+        const data = await resp.json() as any;
+        const content = extractResponsesTextContent(data);
+        if (content) {
+            console.log(`🗜️ Max mode: summarization complete (${estimateTokens(content)} est. tokens)`);
+        }
+        return content || null;
+    }
+    const summarizeMessages = [
+        { role: 'system', content: SUMMARIZATION_PROMPT },
+        {
+            role: 'user',
+            content: `Please summarize the following conversation:\n\n${conversationText}`,
+        },
+    ];
+    const chatBody = JSON.stringify({
+        model,
+        messages: summarizeMessages,
+        max_tokens: 4096,
+        temperature: 0.2,
+        stream: false,
+    });
+    const chatUrl = new URL('/v1/chat/completions', targetUrl);
+    const resp = await fetch(chatUrl.toString(), {
+        method: 'POST',
+        headers: {
+            'Content-Type': 'application/json',
+            'Authorization': getUpstreamAuthHeader(),
+        },
+        body: chatBody,
+    });
+    if (!resp.ok) {
+        const errText = await resp.text();
+        console.error(`❌ Max mode summarization failed (${resp.status}):`, errText.slice(0, 500));
+        return null;
+    }
+    const data = await resp.json() as any;
+    const content = data.choices?.[0]?.message?.content;
+    if (content) {
+        console.log(`🗜️ Max mode: summarization complete (${estimateTokens(content)} est. tokens)`);
+    }
+    return content || null;
+}

package/model-routing.ts ADDED Viewed

@@ -0,0 +1,3 @@
+export function needsResponsesAPI(model: string): boolean {
+    return /^(?:gpt-5\.(?:[2-9]|\d{2,})(?:-codex)?|o\d+|goldeneye)/i.test(model);
+}

package/package.json CHANGED Viewed

@@ -1,36 +1,36 @@
-{
-  "name": "copilot-cursor-proxy",
-  "version": "1.2.0",
-  "description": "Proxy that bridges GitHub Copilot API to Cursor IDE — translates Anthropic format, bridges Responses API for GPT 5.x, and more",
-  "bin": {
-    "copilot-cursor-proxy": "bin/cli.js"
-  },
-  "files": [
-    "bin/",
-    "*.ts",
-    "dashboard.html",
-    "README.md"
-  ],
-  "scripts": {
-    "build": "bun build start.ts proxy-router.ts anthropic-transforms.ts responses-bridge.ts responses-converters.ts stream-proxy.ts debug-logger.ts auth-config.ts upstream-auth.ts --outdir dist --target node",
-    "dev": "bun run start.ts",
-    "start": "bun dist/start.js"
-  },
-  "keywords": [
-    "copilot",
-    "cursor",
-    "proxy",
-    "anthropic",
-    "openai",
-    "responses-api"
-  ],
-  "license": "MIT",
-  "repository": {
-    "type": "git",
-    "url": "git+https://github.com/CharlesYWL/copilot-for-cursor.git"
-  },
-  "engines": {
-    "node": ">=18",
-    "bun": ">=1.0"
-  }
-}
+{
+  "name": "copilot-cursor-proxy",
+  "version": "1.2.1",
+  "description": "Proxy that bridges GitHub Copilot API to Cursor IDE — translates Anthropic format, bridges Responses API for GPT 5.x, and more",
+  "bin": {
+    "copilot-cursor-proxy": "bin/cli.js"
+  },
+  "files": [
+    "bin/",
+    "*.ts",
+    "dashboard.html",
+    "README.md"
+  ],
+  "scripts": {
+    "build": "bun build start.ts --outdir dist --target node",
+    "dev": "bun run start.ts",
+    "start": "bun dist/start.js"
+  },
+  "keywords": [
+    "copilot",
+    "cursor",
+    "proxy",
+    "anthropic",
+    "openai",
+    "responses-api"
+  ],
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/CharlesYWL/copilot-for-cursor.git"
+  },
+  "engines": {
+    "node": ">=18",
+    "bun": ">=1.0"
+  }
+}

package/proxy-router.ts CHANGED Viewed

@@ -5,6 +5,8 @@ import { logIncomingRequest, logTransformedRequest } from './debug-logger';
 import { addRequestLog, getNextRequestId, getUsageStats, flushToDisk, type RequestLog } from './usage-db';
 import { loadAuthConfig, saveAuthConfig, generateApiKey, validateApiKey } from './auth-config';
 import { getUpstreamAuthHeader, getUpstreamApiKeys, createUpstreamApiKey, deleteUpstreamApiKey } from './upstream-auth';
+import { compactIfNeeded, isMaxMode } from './max-mode';
+import { needsResponsesAPI } from './model-routing';
 // ── Console capture for SSE streaming ─────────────────────────────────────────
 interface ConsoleLine {
@@ -271,19 +273,24 @@ Bun.serve({
         logTransformedRequest(json);
+        // ── Max mode: compact long conversations before sending ───────────
+        if (isMaxMode()) {
+            json = await compactIfNeeded(json, targetModel, TARGET_URL);
+        }
         const headers = new Headers(req.headers);
         headers.set("host", targetUrl.host);
         headers.set("authorization", getUpstreamAuthHeader());
-        const needsResponsesAPI = targetModel.match(/^gpt-5\.[2-9]|^gpt-5\.\d+-codex|^o[1-9]|^goldeneye/i);
+        const shouldUseResponsesAPI = needsResponsesAPI(targetModel);
-        if (needsResponsesAPI && json.max_tokens) {
+        if (shouldUseResponsesAPI && json.max_tokens) {
             json.max_completion_tokens = json.max_tokens;
             delete json.max_tokens;
             console.log(`🔧 Converted max_tokens → max_completion_tokens`);
         }
-        if (needsResponsesAPI) {
+        if (shouldUseResponsesAPI) {
             console.log(`🔀 Model ${targetModel} — using Responses API bridge`);
             const chatId = `chatcmpl-proxy-${++responseCounter}`;
             try {

package/start.ts CHANGED Viewed

@@ -7,6 +7,13 @@
 import { spawn, sleep } from 'bun';
 import { existsSync } from 'fs';
 import { getUpstreamAuthHeader } from './upstream-auth';
+import { enableMaxMode, isMaxMode, fetchAndCacheModelLimits } from './max-mode';
+// ── Parse CLI flags ──────────────────────────────────────────────────────────
+const args = process.argv.slice(2);
+if (args.includes('--max')) {
+    enableMaxMode();
+}
 const COPILOT_API_PORT = 4141;
 const PROXY_PORT = 4142;
@@ -100,6 +107,12 @@ async function main() {
         console.log(`${GREEN}✅ copilot-api is ready on port ${COPILOT_API_PORT}${RESET}`);
     }
+    // 1.5 If --max mode, pre-fetch and cache model token limits
+    if (isMaxMode()) {
+        console.log(`${CYAN}🔥 Max mode enabled — will auto-compact long conversations${RESET}`);
+        await fetchAndCacheModelLimits(`http://localhost:${COPILOT_API_PORT}`);
+    }
     // 2. Check if proxy is already running
     const proxyAlreadyRunning = await isPortInUse(PROXY_PORT);
     if (proxyAlreadyRunning) {