@hsupu/copilot-api 0.7.7 → 0.7.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -46,7 +46,7 @@ const state = {
46
46
  accountType: "individual",
47
47
  manualApprove: false,
48
48
  showToken: false,
49
- autoCompact: false
49
+ autoCompact: true
50
50
  };
51
51
 
52
52
  //#endregion
@@ -246,8 +246,8 @@ async function getVSCodeVersion() {
246
246
  }
247
247
  });
248
248
  if (!response.ok) return FALLBACK;
249
- const version = (await response.json()).tag_name;
250
- if (version && /^\d+\.\d+\.\d+$/.test(version)) return version;
249
+ const version$1 = (await response.json()).tag_name;
250
+ if (version$1 && /^\d+\.\d+\.\d+$/.test(version$1)) return version$1;
251
251
  return FALLBACK;
252
252
  } catch {
253
253
  return FALLBACK;
@@ -434,13 +434,13 @@ const checkUsage = defineCommand({
434
434
  const premiumUsed = premiumTotal - premium.remaining;
435
435
  const premiumPercentUsed = premiumTotal > 0 ? premiumUsed / premiumTotal * 100 : 0;
436
436
  const premiumPercentRemaining = premium.percent_remaining;
437
- function summarizeQuota(name, snap) {
438
- if (!snap) return `${name}: N/A`;
437
+ function summarizeQuota(name$1, snap) {
438
+ if (!snap) return `${name$1}: N/A`;
439
439
  const total = snap.entitlement;
440
440
  const used = total - snap.remaining;
441
441
  const percentUsed = total > 0 ? used / total * 100 : 0;
442
442
  const percentRemaining = snap.percent_remaining;
443
- return `${name}: ${used}/${total} used (${percentUsed.toFixed(1)}% used, ${percentRemaining.toFixed(1)}% remaining)`;
443
+ return `${name$1}: ${used}/${total} used (${percentUsed.toFixed(1)}% used, ${percentRemaining.toFixed(1)}% remaining)`;
444
444
  }
445
445
  const premiumLine = `Premium: ${premiumUsed}/${premiumTotal} used (${premiumPercentUsed.toFixed(1)}% used, ${premiumPercentRemaining.toFixed(1)}% remaining)`;
446
446
  const chatLine = summarizeQuota("Chat", usage.quota_snapshots.chat);
@@ -481,9 +481,9 @@ async function checkTokenExists() {
481
481
  }
482
482
  }
483
483
  async function getDebugInfo() {
484
- const [version, tokenExists] = await Promise.all([getPackageVersion(), checkTokenExists()]);
484
+ const [version$1, tokenExists] = await Promise.all([getPackageVersion(), checkTokenExists()]);
485
485
  return {
486
- version,
486
+ version: version$1,
487
487
  runtime: getRuntimeInfo(),
488
488
  paths: {
489
489
  APP_DIR: PATHS.APP_DIR,
@@ -571,8 +571,8 @@ const PATTERNS = {
571
571
  /**
572
572
  * Parse semver version string to comparable parts
573
573
  */
574
- function parseVersion(version) {
575
- return version.split(".").map((n) => Number.parseInt(n, 10) || 0);
574
+ function parseVersion(version$1) {
575
+ return version$1.split(".").map((n) => Number.parseInt(n, 10) || 0);
576
576
  }
577
577
  /**
578
578
  * Compare two semver versions
@@ -590,9 +590,9 @@ function compareVersions(a, b) {
590
590
  }
591
591
  return 0;
592
592
  }
593
- function getPatternTypeForVersion(version) {
594
- if (compareVersions(version, SUPPORTED_VERSIONS.v2a.min) >= 0 && compareVersions(version, SUPPORTED_VERSIONS.v2a.max) <= 0) return "func";
595
- if (compareVersions(version, SUPPORTED_VERSIONS.v2b.min) >= 0 && compareVersions(version, SUPPORTED_VERSIONS.v2b.max) <= 0) return "variable";
593
+ function getPatternTypeForVersion(version$1) {
594
+ if (compareVersions(version$1, SUPPORTED_VERSIONS.v2a.min) >= 0 && compareVersions(version$1, SUPPORTED_VERSIONS.v2a.max) <= 0) return "func";
595
+ if (compareVersions(version$1, SUPPORTED_VERSIONS.v2b.min) >= 0 && compareVersions(version$1, SUPPORTED_VERSIONS.v2b.max) <= 0) return "variable";
596
596
  return null;
597
597
  }
598
598
  /**
@@ -624,8 +624,8 @@ function findInVoltaTools(voltaHome) {
624
624
  if (existsSync(packagesPath)) paths.push(packagesPath);
625
625
  const toolsDir = join(voltaHome, "tools", "image", "node");
626
626
  if (existsSync(toolsDir)) try {
627
- for (const version of readdirSync(toolsDir)) {
628
- const claudePath = join(toolsDir, version, "lib", "node_modules", "@anthropic-ai", "claude-code", "cli.js");
627
+ for (const version$1 of readdirSync(toolsDir)) {
628
+ const claudePath = join(toolsDir, version$1, "lib", "node_modules", "@anthropic-ai", "claude-code", "cli.js");
629
629
  if (existsSync(claudePath)) paths.push(claudePath);
630
630
  }
631
631
  } catch {}
@@ -668,23 +668,23 @@ function getCurrentLimit(content) {
668
668
  * Check if Claude Code version is supported for patching
669
669
  */
670
670
  function checkVersionSupport(cliPath) {
671
- const version = getClaudeCodeVersion(cliPath);
672
- if (!version) return {
671
+ const version$1 = getClaudeCodeVersion(cliPath);
672
+ if (!version$1) return {
673
673
  supported: false,
674
674
  version: null,
675
675
  patternType: null,
676
676
  error: "Could not detect Claude Code version"
677
677
  };
678
- const patternType = getPatternTypeForVersion(version);
678
+ const patternType = getPatternTypeForVersion(version$1);
679
679
  if (!patternType) return {
680
680
  supported: false,
681
- version,
681
+ version: version$1,
682
682
  patternType: null,
683
- error: `Version ${version} is not supported. Supported: ${getSupportedRangeString()}`
683
+ error: `Version ${version$1} is not supported. Supported: ${getSupportedRangeString()}`
684
684
  };
685
685
  return {
686
686
  supported: true,
687
- version,
687
+ version: version$1,
688
688
  patternType
689
689
  };
690
690
  }
@@ -735,8 +735,8 @@ function restoreClaudeCode(cliPath) {
735
735
  return true;
736
736
  }
737
737
  function showStatus(cliPath, currentLimit) {
738
- const version = getClaudeCodeVersion(cliPath);
739
- if (version) consola.info(`Claude Code version: ${version}`);
738
+ const version$1 = getClaudeCodeVersion(cliPath);
739
+ if (version$1) consola.info(`Claude Code version: ${version$1}`);
740
740
  if (currentLimit === null) {
741
741
  consola.warn("Could not detect current limit - CLI may have been updated");
742
742
  consola.info("Look for the BS9 variable or HR function pattern in cli.js");
@@ -818,6 +818,86 @@ const patchClaude = defineCommand({
818
818
  }
819
819
  });
820
820
 
821
+ //#endregion
822
+ //#region package.json
823
+ var name = "@hsupu/copilot-api";
824
+ var version = "0.7.9";
825
+ var description = "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!";
826
+ var keywords = [
827
+ "proxy",
828
+ "github-copilot",
829
+ "openai-compatible",
830
+ "anthropic-compatible"
831
+ ];
832
+ var homepage = "https://github.com/puxu-msft/copilot-api-js";
833
+ var bugs = "https://github.com/puxu-msft/copilot-api-js/issues";
834
+ var repository = {
835
+ "type": "git",
836
+ "url": "git+https://github.com/puxu-msft/copilot-api-js.git"
837
+ };
838
+ var author = "hsupu";
839
+ var type = "module";
840
+ var bin = { "copilot-api": "dist/main.js" };
841
+ var files = ["dist"];
842
+ var scripts = {
843
+ "build": "npx tsdown",
844
+ "dev": "bun run --watch ./src/main.ts",
845
+ "knip": "knip-bun",
846
+ "lint": "eslint --cache",
847
+ "lint:all": "eslint --cache .",
848
+ "prepack": "npm run build",
849
+ "prepare": "npm run build && (command -v bun >/dev/null 2>&1 && simple-git-hooks || true)",
850
+ "release": "bumpp && npm publish --access public",
851
+ "start": "NODE_ENV=production bun run ./src/main.ts",
852
+ "typecheck": "tsc"
853
+ };
854
+ var simple_git_hooks = { "pre-commit": "bun x lint-staged" };
855
+ var lint_staged = { "*": "bun run lint --fix" };
856
+ var dependencies = {
857
+ "citty": "^0.1.6",
858
+ "clipboardy": "^5.0.0",
859
+ "consola": "^3.4.2",
860
+ "fetch-event-stream": "^0.1.5",
861
+ "gpt-tokenizer": "^3.0.1",
862
+ "hono": "^4.9.9",
863
+ "picocolors": "^1.1.1",
864
+ "proxy-from-env": "^1.1.0",
865
+ "srvx": "^0.8.9",
866
+ "tiny-invariant": "^1.3.3",
867
+ "undici": "^7.16.0"
868
+ };
869
+ var devDependencies = {
870
+ "@echristian/eslint-config": "^0.0.54",
871
+ "@types/bun": "^1.2.23",
872
+ "@types/proxy-from-env": "^1.0.4",
873
+ "bumpp": "^10.2.3",
874
+ "eslint": "^9.37.0",
875
+ "knip": "^5.64.1",
876
+ "lint-staged": "^16.2.3",
877
+ "prettier-plugin-packagejson": "^2.5.19",
878
+ "simple-git-hooks": "^2.13.1",
879
+ "tsdown": "^0.15.6",
880
+ "typescript": "^5.9.3"
881
+ };
882
+ var package_default = {
883
+ name,
884
+ version,
885
+ description,
886
+ keywords,
887
+ homepage,
888
+ bugs,
889
+ repository,
890
+ author,
891
+ type,
892
+ bin,
893
+ files,
894
+ scripts,
895
+ "simple-git-hooks": simple_git_hooks,
896
+ "lint-staged": lint_staged,
897
+ dependencies,
898
+ devDependencies
899
+ };
900
+
821
901
  //#endregion
822
902
  //#region src/lib/adaptive-rate-limiter.ts
823
903
  const DEFAULT_CONFIG$1 = {
@@ -1566,8 +1646,8 @@ var ConsoleRenderer = class {
1566
1646
  /**
1567
1647
  * Get log prefix based on log type
1568
1648
  */
1569
- getLogPrefix(type) {
1570
- switch (type) {
1649
+ getLogPrefix(type$1) {
1650
+ switch (type$1) {
1571
1651
  case "error":
1572
1652
  case "fatal": return pc.red("✖");
1573
1653
  case "warn": return pc.yellow("⚠");
@@ -2096,171 +2176,157 @@ const getTokenCount = async (payload, model) => {
2096
2176
  //#endregion
2097
2177
  //#region src/lib/auto-compact.ts
2098
2178
  const DEFAULT_CONFIG = {
2099
- targetTokens: 12e4,
2100
2179
  safetyMarginPercent: 2,
2101
2180
  maxRequestBodyBytes: 500 * 1024
2102
2181
  };
2182
+ /** Dynamic byte limit that adjusts based on 413 errors */
2183
+ let dynamicByteLimit = null;
2103
2184
  /**
2104
- * Dynamic byte limit that adjusts based on 413 errors.
2105
- * Starts at 500KB and can be adjusted when 413 errors are encountered.
2106
- */
2107
- let dynamicByteLimitOverride = null;
2108
- /**
2109
- * Called when a 413 error is encountered with a specific payload size.
2110
- * Adjusts the dynamic byte limit to 90% of the failing size.
2185
+ * Called when a 413 error occurs. Adjusts the byte limit to 90% of the failing size.
2111
2186
  */
2112
2187
  function onRequestTooLarge(failingBytes) {
2113
2188
  const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
2114
- dynamicByteLimitOverride = newLimit;
2115
- consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed, new limit: ${Math.round(newLimit / 1024)}KB`);
2189
+ dynamicByteLimit = newLimit;
2190
+ consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed ${Math.round(newLimit / 1024)}KB`);
2116
2191
  }
2117
- /**
2118
- * Check if payload needs compaction based on model limits OR request body size.
2119
- * Uses a safety margin to account for token counting differences.
2120
- */
2121
- async function checkNeedsCompaction(payload, model, config = {}) {
2122
- const cfg = {
2123
- ...DEFAULT_CONFIG,
2124
- ...config
2125
- };
2126
- const currentTokens = (await getTokenCount(payload, model)).input;
2127
- const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
2128
- const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
2129
- const currentBytes = JSON.stringify(payload).length;
2130
- const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
2131
- const exceedsTokens = currentTokens > tokenLimit;
2132
- const exceedsBytes = currentBytes > byteLimit;
2133
- let reason;
2134
- if (exceedsTokens && exceedsBytes) reason = "both";
2135
- else if (exceedsTokens) reason = "tokens";
2136
- else if (exceedsBytes) reason = "bytes";
2192
+ function calculateLimits(model, config) {
2193
+ const rawTokenLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
2194
+ const tokenLimit = Math.floor(rawTokenLimit * (1 - config.safetyMarginPercent / 100));
2195
+ const byteLimit = dynamicByteLimit ?? config.maxRequestBodyBytes;
2137
2196
  return {
2138
- needed: exceedsTokens || exceedsBytes,
2139
- currentTokens,
2140
2197
  tokenLimit,
2141
- currentBytes,
2142
- byteLimit,
2143
- reason
2198
+ byteLimit
2144
2199
  };
2145
2200
  }
2146
- /**
2147
- * Calculate approximate token count for a single message.
2148
- * This is a fast estimation for splitting decisions.
2149
- */
2150
- function estimateMessageTokens(message) {
2151
- let text = "";
2152
- if (typeof message.content === "string") text = message.content;
2153
- else if (Array.isArray(message.content)) {
2154
- for (const part of message.content) if (part.type === "text") text += part.text;
2155
- else if ("image_url" in part) text += part.image_url.url;
2201
+ /** Estimate tokens for a single message (fast approximation) */
2202
+ function estimateMessageTokens(msg) {
2203
+ let charCount = 0;
2204
+ if (typeof msg.content === "string") charCount = msg.content.length;
2205
+ else if (Array.isArray(msg.content)) {
2206
+ for (const part of msg.content) if (part.type === "text") charCount += part.text.length;
2207
+ else if ("image_url" in part) charCount += Math.min(part.image_url.url.length, 1e4);
2156
2208
  }
2157
- if (message.tool_calls) text += JSON.stringify(message.tool_calls);
2158
- return Math.ceil(text.length / 4) + 10;
2209
+ if (msg.tool_calls) charCount += JSON.stringify(msg.tool_calls).length;
2210
+ return Math.ceil(charCount / 4) + 10;
2159
2211
  }
2160
- /**
2161
- * Extract system messages from the beginning of the message list.
2162
- */
2212
+ /** Get byte size of a message */
2213
+ function getMessageBytes(msg) {
2214
+ return JSON.stringify(msg).length;
2215
+ }
2216
+ /** Extract system/developer messages from the beginning */
2163
2217
  function extractSystemMessages(messages) {
2164
- const systemMessages = [];
2165
- let i = 0;
2166
- while (i < messages.length) {
2167
- const msg = messages[i];
2168
- if (msg.role === "system" || msg.role === "developer") {
2169
- systemMessages.push(msg);
2170
- i++;
2171
- } else break;
2218
+ let splitIndex = 0;
2219
+ while (splitIndex < messages.length) {
2220
+ const role = messages[splitIndex].role;
2221
+ if (role !== "system" && role !== "developer") break;
2222
+ splitIndex++;
2172
2223
  }
2173
2224
  return {
2174
- systemMessages,
2175
- remainingMessages: messages.slice(i)
2225
+ systemMessages: messages.slice(0, splitIndex),
2226
+ conversationMessages: messages.slice(splitIndex)
2176
2227
  };
2177
2228
  }
2178
- /**
2179
- * Extract tool_use ids from assistant messages with tool_calls.
2180
- */
2181
- function getToolUseIds(message) {
2182
- if (message.role === "assistant" && message.tool_calls) return message.tool_calls.map((tc) => tc.id);
2229
+ /** Get tool_use IDs from an assistant message */
2230
+ function getToolCallIds(msg) {
2231
+ if (msg.role === "assistant" && msg.tool_calls) return msg.tool_calls.map((tc) => tc.id);
2183
2232
  return [];
2184
2233
  }
2185
- /**
2186
- * Find messages to keep from the end to stay under target tokens.
2187
- * Returns the starting index of messages to preserve.
2188
- */
2189
- function findPreserveIndex(messages, targetTokens, systemTokens) {
2190
- const availableTokens = targetTokens - systemTokens - 500;
2191
- let accumulatedTokens = 0;
2192
- for (let i = messages.length - 1; i >= 0; i--) {
2193
- const msgTokens = estimateMessageTokens(messages[i]);
2194
- if (accumulatedTokens + msgTokens > availableTokens) return i + 1;
2195
- accumulatedTokens += msgTokens;
2196
- }
2197
- return 0;
2198
- }
2199
- /**
2200
- * Filter out orphaned tool_result messages that don't have a matching tool_use
2201
- * in the preserved message list. This prevents API errors when truncation
2202
- * separates tool_use/tool_result pairs.
2203
- */
2234
+ /** Filter orphaned tool_result messages */
2204
2235
  function filterOrphanedToolResults(messages) {
2205
- const availableToolUseIds = /* @__PURE__ */ new Set();
2206
- for (const msg of messages) for (const id of getToolUseIds(msg)) availableToolUseIds.add(id);
2207
- const filteredMessages = [];
2236
+ const toolUseIds = /* @__PURE__ */ new Set();
2237
+ for (const msg of messages) for (const id of getToolCallIds(msg)) toolUseIds.add(id);
2208
2238
  let removedCount = 0;
2209
- for (const msg of messages) {
2210
- if (msg.role === "tool" && msg.tool_call_id && !availableToolUseIds.has(msg.tool_call_id)) {
2239
+ const filtered = messages.filter((msg) => {
2240
+ if (msg.role === "tool" && msg.tool_call_id && !toolUseIds.has(msg.tool_call_id)) {
2211
2241
  removedCount++;
2212
- continue;
2242
+ return false;
2213
2243
  }
2214
- filteredMessages.push(msg);
2215
- }
2216
- if (removedCount > 0) consola.info(`Auto-compact: Removed ${removedCount} orphaned tool_result message(s) without matching tool_use`);
2217
- return filteredMessages;
2244
+ return true;
2245
+ });
2246
+ if (removedCount > 0) consola.debug(`Auto-compact: Filtered ${removedCount} orphaned tool_result`);
2247
+ return filtered;
2218
2248
  }
2219
- /**
2220
- * Ensure the message list starts with a user message.
2221
- * If it starts with assistant or tool messages, skip them until we find a user message.
2222
- * This is required because OpenAI API expects conversations to start with user messages
2223
- * (after system messages).
2224
- */
2249
+ /** Ensure messages start with a user message */
2225
2250
  function ensureStartsWithUser(messages) {
2226
2251
  let startIndex = 0;
2227
- while (startIndex < messages.length) {
2228
- if (messages[startIndex].role === "user") break;
2229
- startIndex++;
2230
- }
2231
- if (startIndex > 0) consola.info(`Auto-compact: Skipped ${startIndex} leading non-user message(s) to ensure valid sequence`);
2252
+ while (startIndex < messages.length && messages[startIndex].role !== "user") startIndex++;
2253
+ if (startIndex > 0) consola.debug(`Auto-compact: Skipped ${startIndex} leading non-user messages`);
2232
2254
  return messages.slice(startIndex);
2233
2255
  }
2234
2256
  /**
2235
- * Calculate estimated tokens for system messages.
2257
+ * Find the optimal index from which to preserve messages.
2258
+ * Uses binary search with pre-calculated cumulative sums.
2259
+ * Returns the smallest index where the preserved portion fits within limits.
2236
2260
  */
2237
- function estimateSystemTokens(systemMessages) {
2238
- return systemMessages.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
2261
+ function findOptimalPreserveIndex(params) {
2262
+ const { messages, systemBytes, systemTokens, payloadOverhead, tokenLimit, byteLimit } = params;
2263
+ if (messages.length === 0) return 0;
2264
+ const markerBytes = 200;
2265
+ const availableTokens = tokenLimit - systemTokens - 50;
2266
+ const availableBytes = byteLimit - payloadOverhead - systemBytes - markerBytes;
2267
+ if (availableTokens <= 0 || availableBytes <= 0) return messages.length;
2268
+ const n = messages.length;
2269
+ const cumTokens = Array.from({ length: n + 1 }, () => 0);
2270
+ const cumBytes = Array.from({ length: n + 1 }, () => 0);
2271
+ for (let i = n - 1; i >= 0; i--) {
2272
+ const msg = messages[i];
2273
+ cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens(msg);
2274
+ cumBytes[i] = cumBytes[i + 1] + getMessageBytes(msg) + 1;
2275
+ }
2276
+ let left = 0;
2277
+ let right = n;
2278
+ while (left < right) {
2279
+ const mid = left + right >>> 1;
2280
+ if (cumTokens[mid] <= availableTokens && cumBytes[mid] <= availableBytes) right = mid;
2281
+ else left = mid + 1;
2282
+ }
2283
+ return left;
2239
2284
  }
2240
2285
  /**
2241
- * Create a truncation marker message.
2286
+ * Check if payload needs compaction based on model limits or byte size.
2242
2287
  */
2288
+ async function checkNeedsCompaction(payload, model, config = {}) {
2289
+ const cfg = {
2290
+ ...DEFAULT_CONFIG,
2291
+ ...config
2292
+ };
2293
+ const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
2294
+ const currentTokens = (await getTokenCount(payload, model)).input;
2295
+ const currentBytes = JSON.stringify(payload).length;
2296
+ const exceedsTokens = currentTokens > tokenLimit;
2297
+ const exceedsBytes = currentBytes > byteLimit;
2298
+ let reason;
2299
+ if (exceedsTokens && exceedsBytes) reason = "both";
2300
+ else if (exceedsTokens) reason = "tokens";
2301
+ else if (exceedsBytes) reason = "bytes";
2302
+ return {
2303
+ needed: exceedsTokens || exceedsBytes,
2304
+ currentTokens,
2305
+ tokenLimit,
2306
+ currentBytes,
2307
+ byteLimit,
2308
+ reason
2309
+ };
2310
+ }
2311
+ /** Create a truncation marker message */
2243
2312
  function createTruncationMarker(removedCount) {
2244
2313
  return {
2245
2314
  role: "user",
2246
- content: `[CONTEXT TRUNCATED: ${removedCount} earlier messages were removed to fit context limits. The conversation continues below.]`
2315
+ content: `[CONTEXT TRUNCATED: ${removedCount} earlier messages removed to fit context limits]`
2247
2316
  };
2248
2317
  }
2249
2318
  /**
2250
- * Perform auto-compaction on a payload that exceeds token or size limits.
2251
- * This uses simple truncation - no LLM calls required.
2252
- * Uses iterative approach with decreasing target tokens until under limit.
2319
+ * Perform auto-compaction on a payload that exceeds limits.
2320
+ * Uses binary search to find the optimal truncation point.
2253
2321
  */
2254
2322
  async function autoCompact(payload, model, config = {}) {
2255
2323
  const cfg = {
2256
2324
  ...DEFAULT_CONFIG,
2257
2325
  ...config
2258
2326
  };
2259
- const originalTokens = (await getTokenCount(payload, model)).input;
2260
- const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
2261
- const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
2327
+ const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
2262
2328
  const originalBytes = JSON.stringify(payload).length;
2263
- const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
2329
+ const originalTokens = (await getTokenCount(payload, model)).input;
2264
2330
  if (originalTokens <= tokenLimit && originalBytes <= byteLimit) return {
2265
2331
  payload,
2266
2332
  wasCompacted: false,
@@ -2274,60 +2340,33 @@ async function autoCompact(payload, model, config = {}) {
2274
2340
  if (exceedsTokens && exceedsBytes) reason = "tokens and size";
2275
2341
  else if (exceedsBytes) reason = "size";
2276
2342
  else reason = "tokens";
2277
- consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB), truncating...`);
2278
- const { systemMessages, remainingMessages } = extractSystemMessages(payload.messages);
2279
- const systemTokens = estimateSystemTokens(systemMessages);
2280
- consola.debug(`Auto-compact: ${systemMessages.length} system messages (~${systemTokens} tokens)`);
2281
- const MAX_ITERATIONS = 5;
2282
- const MIN_TARGET = 2e4;
2283
- let currentTarget = Math.min(cfg.targetTokens, tokenLimit);
2284
- let lastResult = null;
2285
- for (let iteration = 0; iteration < MAX_ITERATIONS; iteration++) {
2286
- const result = await tryCompactWithTarget({
2343
+ consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB)`);
2344
+ const { systemMessages, conversationMessages } = extractSystemMessages(payload.messages);
2345
+ const messagesJson = JSON.stringify(payload.messages);
2346
+ const payloadOverhead = originalBytes - messagesJson.length;
2347
+ const systemBytes = systemMessages.reduce((sum, m) => sum + getMessageBytes(m) + 1, 0);
2348
+ const systemTokens = systemMessages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
2349
+ consola.debug(`Auto-compact: overhead=${Math.round(payloadOverhead / 1024)}KB, system=${systemMessages.length} msgs (${Math.round(systemBytes / 1024)}KB)`);
2350
+ const preserveIndex = findOptimalPreserveIndex({
2351
+ messages: conversationMessages,
2352
+ systemBytes,
2353
+ systemTokens,
2354
+ payloadOverhead,
2355
+ tokenLimit,
2356
+ byteLimit
2357
+ });
2358
+ if (preserveIndex === 0) {
2359
+ consola.warn("Auto-compact: Cannot truncate, system messages too large");
2360
+ return {
2287
2361
  payload,
2288
- model,
2289
- systemMessages,
2290
- remainingMessages,
2291
- systemTokens,
2292
- targetTokens: currentTarget,
2293
- limit: tokenLimit,
2294
- originalTokens
2295
- });
2296
- if (!result.wasCompacted) return result;
2297
- lastResult = result;
2298
- const resultBytes = JSON.stringify(result.payload).length;
2299
- const underTokenLimit = result.compactedTokens <= tokenLimit;
2300
- const underByteLimit = resultBytes <= byteLimit;
2301
- if (underTokenLimit && underByteLimit) {
2302
- consola.info(`Auto-compact: ${originalTokens} → ${result.compactedTokens} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(resultBytes / 1024)}KB (removed ${result.removedMessageCount} messages)`);
2303
- return result;
2304
- }
2305
- const tokenStatus = underTokenLimit ? "OK" : `${result.compactedTokens} > ${tokenLimit}`;
2306
- const byteStatus = underByteLimit ? "OK" : `${Math.round(resultBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB`;
2307
- consola.warn(`Auto-compact: Still over limit (tokens: ${tokenStatus}, size: ${byteStatus}), trying more aggressive truncation`);
2308
- currentTarget = Math.floor(currentTarget * .7);
2309
- if (currentTarget < MIN_TARGET) {
2310
- consola.error("Auto-compact: Cannot reduce further, target too low");
2311
- return result;
2312
- }
2362
+ wasCompacted: false,
2363
+ originalTokens,
2364
+ compactedTokens: originalTokens,
2365
+ removedMessageCount: 0
2366
+ };
2313
2367
  }
2314
- consola.error(`Auto-compact: Exhausted ${MAX_ITERATIONS} iterations, returning best effort`);
2315
- return lastResult ?? {
2316
- payload,
2317
- wasCompacted: false,
2318
- originalTokens,
2319
- compactedTokens: originalTokens,
2320
- removedMessageCount: 0
2321
- };
2322
- }
2323
- /**
2324
- * Helper to attempt compaction with a specific target token count.
2325
- */
2326
- async function tryCompactWithTarget(opts) {
2327
- const { payload, model, systemMessages, remainingMessages, systemTokens, targetTokens, originalTokens } = opts;
2328
- const preserveIndex = findPreserveIndex(remainingMessages, targetTokens, systemTokens);
2329
- if (preserveIndex === 0) {
2330
- consola.warn("Auto-compact: Cannot truncate further without losing all conversation history");
2368
+ if (preserveIndex >= conversationMessages.length) {
2369
+ consola.warn("Auto-compact: Would need to remove all messages");
2331
2370
  return {
2332
2371
  payload,
2333
2372
  wasCompacted: false,
@@ -2336,13 +2375,12 @@ async function tryCompactWithTarget(opts) {
2336
2375
  removedMessageCount: 0
2337
2376
  };
2338
2377
  }
2339
- const removedMessages = remainingMessages.slice(0, preserveIndex);
2340
- let preservedMessages = remainingMessages.slice(preserveIndex);
2341
- preservedMessages = filterOrphanedToolResults(preservedMessages);
2342
- preservedMessages = ensureStartsWithUser(preservedMessages);
2343
- preservedMessages = filterOrphanedToolResults(preservedMessages);
2344
- if (preservedMessages.length === 0) {
2345
- consola.warn("Auto-compact: All messages were filtered out after cleanup, cannot compact");
2378
+ let preserved = conversationMessages.slice(preserveIndex);
2379
+ preserved = filterOrphanedToolResults(preserved);
2380
+ preserved = ensureStartsWithUser(preserved);
2381
+ preserved = filterOrphanedToolResults(preserved);
2382
+ if (preserved.length === 0) {
2383
+ consola.warn("Auto-compact: All messages filtered out after cleanup");
2346
2384
  return {
2347
2385
  payload,
2348
2386
  wasCompacted: false,
@@ -2351,27 +2389,30 @@ async function tryCompactWithTarget(opts) {
2351
2389
  removedMessageCount: 0
2352
2390
  };
2353
2391
  }
2354
- consola.debug(`Auto-compact: Removing ${removedMessages.length} messages, keeping ${preservedMessages.length}`);
2355
- const truncationMarker = createTruncationMarker(removedMessages.length);
2392
+ const removedCount = conversationMessages.length - preserved.length;
2393
+ const marker = createTruncationMarker(removedCount);
2356
2394
  const newPayload = {
2357
2395
  ...payload,
2358
2396
  messages: [
2359
2397
  ...systemMessages,
2360
- truncationMarker,
2361
- ...preservedMessages
2398
+ marker,
2399
+ ...preserved
2362
2400
  ]
2363
2401
  };
2402
+ const newBytes = JSON.stringify(newPayload).length;
2364
2403
  const newTokenCount = await getTokenCount(newPayload, model);
2404
+ consola.info(`Auto-compact: ${originalTokens} → ${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(newBytes / 1024)}KB (removed ${removedCount} messages)`);
2405
+ if (newBytes > byteLimit) consola.warn(`Auto-compact: Result still over byte limit (${Math.round(newBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB)`);
2365
2406
  return {
2366
2407
  payload: newPayload,
2367
2408
  wasCompacted: true,
2368
2409
  originalTokens,
2369
2410
  compactedTokens: newTokenCount.input,
2370
- removedMessageCount: removedMessages.length
2411
+ removedMessageCount: removedCount
2371
2412
  };
2372
2413
  }
2373
2414
  /**
2374
- * Create a marker to append to responses indicating auto-compaction occurred.
2415
+ * Create a marker to prepend to responses indicating auto-compaction occurred.
2375
2416
  */
2376
2417
  function createCompactionMarker(result) {
2377
2418
  if (!result.wasCompacted) return "";
@@ -2633,7 +2674,7 @@ function handleNonStreamingResponse$1(c, originalResponse, ctx) {
2633
2674
  ...choice$1,
2634
2675
  message: {
2635
2676
  ...choice$1.message,
2636
- content: (choice$1.message.content ?? "") + marker
2677
+ content: marker + (choice$1.message.content ?? "")
2637
2678
  }
2638
2679
  } : choice$1)
2639
2680
  };
@@ -2694,18 +2735,13 @@ async function handleStreamingResponse$1(opts) {
2694
2735
  const { stream, response, payload, ctx } = opts;
2695
2736
  const acc = createStreamAccumulator();
2696
2737
  try {
2697
- for await (const chunk of response) {
2698
- consola.debug("Streaming chunk:", JSON.stringify(chunk));
2699
- parseStreamChunk(chunk, acc);
2700
- await stream.writeSSE(chunk);
2701
- }
2702
2738
  if (ctx.compactResult?.wasCompacted) {
2703
2739
  const marker = createCompactionMarker(ctx.compactResult);
2704
2740
  const markerChunk = {
2705
2741
  id: `compact-marker-${Date.now()}`,
2706
2742
  object: "chat.completion.chunk",
2707
2743
  created: Math.floor(Date.now() / 1e3),
2708
- model: acc.model || payload.model,
2744
+ model: payload.model,
2709
2745
  choices: [{
2710
2746
  index: 0,
2711
2747
  delta: { content: marker },
@@ -2719,6 +2755,11 @@ async function handleStreamingResponse$1(opts) {
2719
2755
  });
2720
2756
  acc.content += marker;
2721
2757
  }
2758
+ for await (const chunk of response) {
2759
+ consola.debug("Streaming chunk:", JSON.stringify(chunk));
2760
+ parseStreamChunk(chunk, acc);
2761
+ await stream.writeSSE(chunk);
2762
+ }
2722
2763
  recordStreamSuccess(acc, payload.model, ctx);
2723
2764
  completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
2724
2765
  } catch (error) {
@@ -4509,7 +4550,7 @@ function handleNonStreamingResponse(opts) {
4509
4550
  consola.debug("Translated Anthropic response:", JSON.stringify(anthropicResponse));
4510
4551
  if (ctx.compactResult?.wasCompacted) {
4511
4552
  const marker = createCompactionMarker(ctx.compactResult);
4512
- anthropicResponse = appendMarkerToAnthropicResponse(anthropicResponse, marker);
4553
+ anthropicResponse = prependMarkerToAnthropicResponse(anthropicResponse, marker);
4513
4554
  }
4514
4555
  recordResponse(ctx.historyId, {
4515
4556
  success: true,
@@ -4541,16 +4582,16 @@ function handleNonStreamingResponse(opts) {
4541
4582
  });
4542
4583
  return c.json(anthropicResponse);
4543
4584
  }
4544
- function appendMarkerToAnthropicResponse(response, marker) {
4585
+ function prependMarkerToAnthropicResponse(response, marker) {
4545
4586
  const content = [...response.content];
4546
- const lastTextIndex = content.findLastIndex((block) => block.type === "text");
4547
- if (lastTextIndex !== -1) {
4548
- const textBlock = content[lastTextIndex];
4549
- if (textBlock.type === "text") content[lastTextIndex] = {
4587
+ const firstTextIndex = content.findIndex((block) => block.type === "text");
4588
+ if (firstTextIndex !== -1) {
4589
+ const textBlock = content[firstTextIndex];
4590
+ if (textBlock.type === "text") content[firstTextIndex] = {
4550
4591
  ...textBlock,
4551
- text: textBlock.text + marker
4592
+ text: marker + textBlock.text
4552
4593
  };
4553
- } else content.push({
4594
+ } else content.unshift({
4554
4595
  type: "text",
4555
4596
  text: marker
4556
4597
  });
@@ -4580,6 +4621,11 @@ async function handleStreamingResponse(opts) {
4580
4621
  };
4581
4622
  const acc = createAnthropicStreamAccumulator();
4582
4623
  try {
4624
+ if (ctx.compactResult?.wasCompacted) {
4625
+ const marker = createCompactionMarker(ctx.compactResult);
4626
+ await sendCompactionMarkerEvent(stream, streamState, marker);
4627
+ acc.content += marker;
4628
+ }
4583
4629
  await processStreamChunks({
4584
4630
  stream,
4585
4631
  response,
@@ -4587,11 +4633,6 @@ async function handleStreamingResponse(opts) {
4587
4633
  streamState,
4588
4634
  acc
4589
4635
  });
4590
- if (ctx.compactResult?.wasCompacted) {
4591
- const marker = createCompactionMarker(ctx.compactResult);
4592
- await sendCompactionMarkerEvent(stream, streamState, marker);
4593
- acc.content += marker;
4594
- }
4595
4636
  recordStreamingResponse(acc, anthropicPayload.model, ctx);
4596
4637
  completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
4597
4638
  } catch (error) {
@@ -4904,6 +4945,7 @@ function formatModelInfo(model) {
4904
4945
  return ` - ${model.id.padEnd(28)} context: ${contextK.padStart(5)}, output: ${outputK.padStart(4)}${featureStr}`;
4905
4946
  }
4906
4947
  async function runServer(options) {
4948
+ consola.info(`copilot-api v${package_default.version}`);
4907
4949
  if (options.proxyEnv) initProxyFromEnv();
4908
4950
  if (options.verbose) {
4909
4951
  consola.level = 5;
@@ -4921,7 +4963,7 @@ async function runServer(options) {
4921
4963
  consecutiveSuccessesForRecovery: options.consecutiveSuccesses
4922
4964
  });
4923
4965
  else consola.info("Rate limiting disabled");
4924
- if (options.autoCompact) consola.info("Auto-compact enabled: will compress context when exceeding token limits");
4966
+ if (!options.autoCompact) consola.info("Auto-compact disabled");
4925
4967
  initHistory(options.history, options.historyLimit);
4926
4968
  if (options.history) {
4927
4969
  const limitText = options.historyLimit === 0 ? "unlimited" : `max ${options.historyLimit}`;
@@ -5063,10 +5105,10 @@ const start = defineCommand({
5063
5105
  default: "1000",
5064
5106
  description: "Maximum number of history entries to keep in memory (0 = unlimited)"
5065
5107
  },
5066
- "auto-compact": {
5108
+ "no-auto-compact": {
5067
5109
  type: "boolean",
5068
5110
  default: false,
5069
- description: "Automatically compress conversation history when exceeding model token limits"
5111
+ description: "Disable automatic conversation history compression when exceeding limits"
5070
5112
  }
5071
5113
  },
5072
5114
  run({ args }) {
@@ -5087,7 +5129,7 @@ const start = defineCommand({
5087
5129
  proxyEnv: args["proxy-env"],
5088
5130
  history: !args["no-history"],
5089
5131
  historyLimit: Number.parseInt(args["history-limit"], 10),
5090
- autoCompact: args["auto-compact"]
5132
+ autoCompact: !args["no-auto-compact"]
5091
5133
  });
5092
5134
  }
5093
5135
  });