@hsupu/copilot-api 0.7.8 → 0.7.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -46,7 +46,7 @@ const state = {
46
46
  accountType: "individual",
47
47
  manualApprove: false,
48
48
  showToken: false,
49
- autoCompact: false
49
+ autoCompact: true
50
50
  };
51
51
 
52
52
  //#endregion
@@ -558,15 +558,12 @@ const SUPPORTED_VERSIONS = {
558
558
  min: "2.0.0",
559
559
  max: "2.1.10"
560
560
  },
561
- v2b: {
562
- min: "2.1.11",
563
- max: "2.1.12"
564
- }
561
+ v2b: { min: "2.1.11" }
565
562
  };
566
563
  const PATTERNS = {
567
564
  funcOriginal: /function HR\(A\)\{if\(A\.includes\("\[1m\]"\)\)return 1e6;return 200000\}/,
568
565
  funcPatched: /function HR\(A\)\{if\(A\.includes\("\[1m\]"\)\)return 1e6;return \d+\}/,
569
- variable: /var BS9=(\d+)/
566
+ variable: /var ([A-Za-z_$]\w*)=(\d+)(?=,\w+=20000,)/
570
567
  };
571
568
  /**
572
569
  * Parse semver version string to comparable parts
@@ -592,14 +589,14 @@ function compareVersions(a, b) {
592
589
  }
593
590
  function getPatternTypeForVersion(version$1) {
594
591
  if (compareVersions(version$1, SUPPORTED_VERSIONS.v2a.min) >= 0 && compareVersions(version$1, SUPPORTED_VERSIONS.v2a.max) <= 0) return "func";
595
- if (compareVersions(version$1, SUPPORTED_VERSIONS.v2b.min) >= 0 && compareVersions(version$1, SUPPORTED_VERSIONS.v2b.max) <= 0) return "variable";
592
+ if (compareVersions(version$1, SUPPORTED_VERSIONS.v2b.min) >= 0) return "variable";
596
593
  return null;
597
594
  }
598
595
  /**
599
596
  * Get supported version range string for error messages
600
597
  */
601
598
  function getSupportedRangeString() {
602
- return `${SUPPORTED_VERSIONS.v2a.min}-${SUPPORTED_VERSIONS.v2a.max}, ${SUPPORTED_VERSIONS.v2b.min}-${SUPPORTED_VERSIONS.v2b.max}`;
599
+ return `${SUPPORTED_VERSIONS.v2a.min}-${SUPPORTED_VERSIONS.v2a.max}, ${SUPPORTED_VERSIONS.v2b.min}+`;
603
600
  }
604
601
  /**
605
602
  * Get Claude Code version from package.json
@@ -632,9 +629,9 @@ function findInVoltaTools(voltaHome) {
632
629
  return paths;
633
630
  }
634
631
  /**
635
- * Find Claude Code CLI path by checking common locations
632
+ * Find all Claude Code CLI paths by checking common locations
636
633
  */
637
- function findClaudeCodePath() {
634
+ function findAllClaudeCodePaths() {
638
635
  const possiblePaths = [];
639
636
  const home = process.env.HOME || "";
640
637
  const voltaHome = process.env.VOLTA_HOME || join(home, ".volta");
@@ -649,22 +646,41 @@ function findClaudeCodePath() {
649
646
  for (const base of globalPaths) possiblePaths.push(join(base, "@anthropic-ai", "claude-code", "cli.js"));
650
647
  const bunGlobal = join(home, ".bun", "install", "global");
651
648
  if (existsSync(bunGlobal)) possiblePaths.push(join(bunGlobal, "node_modules", "@anthropic-ai", "claude-code", "cli.js"));
652
- return possiblePaths.find((p) => existsSync(p)) ?? null;
649
+ return [...new Set(possiblePaths.filter((p) => existsSync(p)))];
653
650
  }
654
651
  /**
655
- * Get current context limit from Claude Code
652
+ * Get installation info for a CLI path
656
653
  */
657
- function getCurrentLimit(content) {
654
+ function getInstallationInfo(cliPath) {
655
+ const version$1 = getClaudeCodeVersion(cliPath);
656
+ const content = readFileSync(cliPath, "utf8");
657
+ const limit = getCurrentLimit(content);
658
+ return {
659
+ path: cliPath,
660
+ version: version$1,
661
+ limit
662
+ };
663
+ }
664
+ function getCurrentLimitInfo(content) {
658
665
  const varMatch = content.match(PATTERNS.variable);
659
- if (varMatch) return Number.parseInt(varMatch[1], 10);
666
+ if (varMatch) return {
667
+ limit: Number.parseInt(varMatch[2], 10),
668
+ varName: varMatch[1]
669
+ };
660
670
  const funcMatch = content.match(PATTERNS.funcPatched);
661
671
  if (funcMatch) {
662
672
  const limitMatch = funcMatch[0].match(/return (\d+)\}$/);
663
- return limitMatch ? Number.parseInt(limitMatch[1], 10) : null;
673
+ return limitMatch ? { limit: Number.parseInt(limitMatch[1], 10) } : null;
664
674
  }
665
675
  return null;
666
676
  }
667
677
  /**
678
+ * Get current context limit from Claude Code (legacy wrapper)
679
+ */
680
+ function getCurrentLimit(content) {
681
+ return getCurrentLimitInfo(content)?.limit ?? null;
682
+ }
683
+ /**
668
684
  * Check if Claude Code version is supported for patching
669
685
  */
670
686
  function checkVersionSupport(cliPath) {
@@ -696,22 +712,25 @@ function patchClaudeCode(cliPath, newLimit) {
696
712
  const versionCheck = checkVersionSupport(cliPath);
697
713
  if (!versionCheck.supported) {
698
714
  consola.error(versionCheck.error);
699
- return false;
715
+ return "failed";
700
716
  }
701
717
  consola.info(`Claude Code version: ${versionCheck.version}`);
702
- if (getCurrentLimit(content) === newLimit) {
703
- consola.info(`Already patched with limit ${newLimit}`);
704
- return true;
705
- }
718
+ const limitInfo = getCurrentLimitInfo(content);
719
+ if (limitInfo?.limit === newLimit) return "already_patched";
706
720
  let newContent;
707
- if (versionCheck.patternType === "variable") newContent = content.replace(PATTERNS.variable, `var BS9=${newLimit}`);
708
- else {
721
+ if (versionCheck.patternType === "variable") {
722
+ if (!limitInfo?.varName) {
723
+ consola.error("Could not detect variable name for patching");
724
+ return "failed";
725
+ }
726
+ newContent = content.replace(PATTERNS.variable, `var ${limitInfo.varName}=${newLimit}`);
727
+ } else {
709
728
  const replacement = `function HR(A){if(A.includes("[1m]"))return 1e6;return ${newLimit}}`;
710
729
  const pattern = PATTERNS.funcOriginal.test(content) ? PATTERNS.funcOriginal : PATTERNS.funcPatched;
711
730
  newContent = content.replace(pattern, replacement);
712
731
  }
713
732
  writeFileSync(cliPath, newContent);
714
- return true;
733
+ return "success";
715
734
  }
716
735
  /**
717
736
  * Restore Claude Code to original 200k limit
@@ -724,13 +743,19 @@ function restoreClaudeCode(cliPath) {
724
743
  return false;
725
744
  }
726
745
  consola.info(`Claude Code version: ${versionCheck.version}`);
727
- if (getCurrentLimit(content) === 2e5) {
746
+ const limitInfo = getCurrentLimitInfo(content);
747
+ if (limitInfo?.limit === 2e5) {
728
748
  consola.info("Already at original 200000 limit");
729
749
  return true;
730
750
  }
731
751
  let newContent;
732
- if (versionCheck.patternType === "variable") newContent = content.replace(PATTERNS.variable, "var BS9=200000");
733
- else newContent = content.replace(PATTERNS.funcPatched, "function HR(A){if(A.includes(\"[1m]\"))return 1e6;return 200000}");
752
+ if (versionCheck.patternType === "variable") {
753
+ if (!limitInfo?.varName) {
754
+ consola.error("Could not detect variable name for restoring");
755
+ return false;
756
+ }
757
+ newContent = content.replace(PATTERNS.variable, `var ${limitInfo.varName}=200000`);
758
+ } else newContent = content.replace(PATTERNS.funcPatched, "function HR(A){if(A.includes(\"[1m]\"))return 1e6;return 200000}");
734
759
  writeFileSync(cliPath, newContent);
735
760
  return true;
736
761
  }
@@ -739,7 +764,7 @@ function showStatus(cliPath, currentLimit) {
739
764
  if (version$1) consola.info(`Claude Code version: ${version$1}`);
740
765
  if (currentLimit === null) {
741
766
  consola.warn("Could not detect current limit - CLI may have been updated");
742
- consola.info("Look for the BS9 variable or HR function pattern in cli.js");
767
+ consola.info("Look for a variable like 'var XXX=200000' followed by ',YYY=20000,' in cli.js");
743
768
  } else if (currentLimit === 2e5) consola.info("Status: Original (200k context window)");
744
769
  else consola.info(`Status: Patched (${currentLimit} context window)`);
745
770
  }
@@ -773,17 +798,42 @@ const patchClaude = defineCommand({
773
798
  description: "Show current patch status without modifying"
774
799
  }
775
800
  },
776
- run({ args }) {
777
- const cliPath = args.path || findClaudeCodePath();
778
- if (!cliPath) {
779
- consola.error("Could not find Claude Code installation");
780
- consola.info("Searched in: volta, npm global, bun global");
781
- consola.info("Use --path to specify the path to cli.js manually");
782
- process.exit(1);
783
- }
784
- if (!existsSync(cliPath)) {
785
- consola.error(`File not found: ${cliPath}`);
786
- process.exit(1);
801
+ async run({ args }) {
802
+ let cliPath;
803
+ if (args.path) {
804
+ cliPath = args.path;
805
+ if (!existsSync(cliPath)) {
806
+ consola.error(`File not found: ${cliPath}`);
807
+ process.exit(1);
808
+ }
809
+ } else {
810
+ const installations = findAllClaudeCodePaths();
811
+ if (installations.length === 0) {
812
+ consola.error("Could not find Claude Code installation");
813
+ consola.info("Searched in: volta, npm global, bun global");
814
+ consola.info("Use --path to specify the path to cli.js manually");
815
+ process.exit(1);
816
+ }
817
+ if (installations.length === 1) cliPath = installations[0];
818
+ else {
819
+ consola.info(`Found ${installations.length} Claude Code installations:`);
820
+ const options = installations.map((path$1) => {
821
+ const info = getInstallationInfo(path$1);
822
+ let status = "unknown";
823
+ if (info.limit === 2e5) status = "original";
824
+ else if (info.limit) status = `patched: ${info.limit}`;
825
+ return {
826
+ label: `v${info.version ?? "?"} (${status}) - ${path$1}`,
827
+ value: path$1
828
+ };
829
+ });
830
+ const selected = await consola.prompt("Select installation to patch:", {
831
+ type: "select",
832
+ options
833
+ });
834
+ if (typeof selected === "symbol") process.exit(0);
835
+ cliPath = selected;
836
+ }
787
837
  }
788
838
  consola.info(`Claude Code path: ${cliPath}`);
789
839
  const content = readFileSync(cliPath, "utf8");
@@ -806,13 +856,14 @@ const patchClaude = defineCommand({
806
856
  consola.error("Invalid limit value. Must be a number >= 1000");
807
857
  process.exit(1);
808
858
  }
809
- if (patchClaudeCode(cliPath, limit)) {
810
- consola.success(`Patched context window: 200000 → ${limit}`);
859
+ const result = patchClaudeCode(cliPath, limit);
860
+ if (result === "success") {
861
+ consola.success(`Patched context window: ${currentLimit ?? 2e5} → ${limit}`);
811
862
  consola.info("Note: You may need to re-run this after Claude Code updates");
812
- } else {
863
+ } else if (result === "already_patched") consola.success(`Already patched with limit ${limit}`);
864
+ else {
813
865
  consola.error("Failed to patch - pattern not found");
814
866
  consola.info("Claude Code may have been updated to a new version");
815
- consola.info("Check the cli.js for the HR function pattern");
816
867
  process.exit(1);
817
868
  }
818
869
  }
@@ -821,7 +872,7 @@ const patchClaude = defineCommand({
821
872
  //#endregion
822
873
  //#region package.json
823
874
  var name = "@hsupu/copilot-api";
824
- var version = "0.7.8";
875
+ var version = "0.7.10";
825
876
  var description = "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!";
826
877
  var keywords = [
827
878
  "proxy",
@@ -2176,188 +2227,157 @@ const getTokenCount = async (payload, model) => {
2176
2227
  //#endregion
2177
2228
  //#region src/lib/auto-compact.ts
2178
2229
  const DEFAULT_CONFIG = {
2179
- targetTokens: 12e4,
2180
2230
  safetyMarginPercent: 2,
2181
2231
  maxRequestBodyBytes: 500 * 1024
2182
2232
  };
2233
+ /** Dynamic byte limit that adjusts based on 413 errors */
2234
+ let dynamicByteLimit = null;
2183
2235
  /**
2184
- * Dynamic byte limit that adjusts based on 413 errors.
2185
- * Starts at 500KB and can be adjusted when 413 errors are encountered.
2186
- */
2187
- let dynamicByteLimitOverride = null;
2188
- /**
2189
- * Called when a 413 error is encountered with a specific payload size.
2190
- * Adjusts the dynamic byte limit to 90% of the failing size.
2236
+ * Called when a 413 error occurs. Adjusts the byte limit to 90% of the failing size.
2191
2237
  */
2192
2238
  function onRequestTooLarge(failingBytes) {
2193
2239
  const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
2194
- dynamicByteLimitOverride = newLimit;
2195
- consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed, new limit: ${Math.round(newLimit / 1024)}KB`);
2240
+ dynamicByteLimit = newLimit;
2241
+ consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed ${Math.round(newLimit / 1024)}KB`);
2196
2242
  }
2197
- /**
2198
- * Check if payload needs compaction based on model limits OR request body size.
2199
- * Uses a safety margin to account for token counting differences.
2200
- */
2201
- async function checkNeedsCompaction(payload, model, config = {}) {
2202
- const cfg = {
2203
- ...DEFAULT_CONFIG,
2204
- ...config
2205
- };
2206
- const currentTokens = (await getTokenCount(payload, model)).input;
2207
- const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
2208
- const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
2209
- const currentBytes = JSON.stringify(payload).length;
2210
- const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
2211
- const exceedsTokens = currentTokens > tokenLimit;
2212
- const exceedsBytes = currentBytes > byteLimit;
2213
- let reason;
2214
- if (exceedsTokens && exceedsBytes) reason = "both";
2215
- else if (exceedsTokens) reason = "tokens";
2216
- else if (exceedsBytes) reason = "bytes";
2243
+ function calculateLimits(model, config) {
2244
+ const rawTokenLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
2245
+ const tokenLimit = Math.floor(rawTokenLimit * (1 - config.safetyMarginPercent / 100));
2246
+ const byteLimit = dynamicByteLimit ?? config.maxRequestBodyBytes;
2217
2247
  return {
2218
- needed: exceedsTokens || exceedsBytes,
2219
- currentTokens,
2220
2248
  tokenLimit,
2221
- currentBytes,
2222
- byteLimit,
2223
- reason
2249
+ byteLimit
2224
2250
  };
2225
2251
  }
2226
- /**
2227
- * Calculate approximate token count for a single message.
2228
- * This is a fast estimation for splitting decisions.
2229
- */
2230
- function estimateMessageTokens(message) {
2231
- let text = "";
2232
- if (typeof message.content === "string") text = message.content;
2233
- else if (Array.isArray(message.content)) {
2234
- for (const part of message.content) if (part.type === "text") text += part.text;
2235
- else if ("image_url" in part) text += part.image_url.url;
2252
+ /** Estimate tokens for a single message (fast approximation) */
2253
+ function estimateMessageTokens(msg) {
2254
+ let charCount = 0;
2255
+ if (typeof msg.content === "string") charCount = msg.content.length;
2256
+ else if (Array.isArray(msg.content)) {
2257
+ for (const part of msg.content) if (part.type === "text") charCount += part.text.length;
2258
+ else if ("image_url" in part) charCount += Math.min(part.image_url.url.length, 1e4);
2236
2259
  }
2237
- if (message.tool_calls) text += JSON.stringify(message.tool_calls);
2238
- return Math.ceil(text.length / 4) + 10;
2260
+ if (msg.tool_calls) charCount += JSON.stringify(msg.tool_calls).length;
2261
+ return Math.ceil(charCount / 4) + 10;
2239
2262
  }
2240
- /**
2241
- * Extract system messages from the beginning of the message list.
2242
- */
2263
+ /** Get byte size of a message */
2264
+ function getMessageBytes(msg) {
2265
+ return JSON.stringify(msg).length;
2266
+ }
2267
+ /** Extract system/developer messages from the beginning */
2243
2268
  function extractSystemMessages(messages) {
2244
- const systemMessages = [];
2245
- let i = 0;
2246
- while (i < messages.length) {
2247
- const msg = messages[i];
2248
- if (msg.role === "system" || msg.role === "developer") {
2249
- systemMessages.push(msg);
2250
- i++;
2251
- } else break;
2269
+ let splitIndex = 0;
2270
+ while (splitIndex < messages.length) {
2271
+ const role = messages[splitIndex].role;
2272
+ if (role !== "system" && role !== "developer") break;
2273
+ splitIndex++;
2252
2274
  }
2253
2275
  return {
2254
- systemMessages,
2255
- remainingMessages: messages.slice(i)
2276
+ systemMessages: messages.slice(0, splitIndex),
2277
+ conversationMessages: messages.slice(splitIndex)
2256
2278
  };
2257
2279
  }
2258
- /**
2259
- * Extract tool_use ids from assistant messages with tool_calls.
2260
- */
2261
- function getToolUseIds(message) {
2262
- if (message.role === "assistant" && message.tool_calls) return message.tool_calls.map((tc) => tc.id);
2280
+ /** Get tool_use IDs from an assistant message */
2281
+ function getToolCallIds(msg) {
2282
+ if (msg.role === "assistant" && msg.tool_calls) return msg.tool_calls.map((tc) => tc.id);
2263
2283
  return [];
2264
2284
  }
2265
- /**
2266
- * Estimate the byte size of a message (for binary search).
2267
- */
2268
- function estimateMessageBytes(message) {
2269
- return JSON.stringify(message).length;
2285
+ /** Filter orphaned tool_result messages */
2286
+ function filterOrphanedToolResults(messages) {
2287
+ const toolUseIds = /* @__PURE__ */ new Set();
2288
+ for (const msg of messages) for (const id of getToolCallIds(msg)) toolUseIds.add(id);
2289
+ let removedCount = 0;
2290
+ const filtered = messages.filter((msg) => {
2291
+ if (msg.role === "tool" && msg.tool_call_id && !toolUseIds.has(msg.tool_call_id)) {
2292
+ removedCount++;
2293
+ return false;
2294
+ }
2295
+ return true;
2296
+ });
2297
+ if (removedCount > 0) consola.debug(`Auto-compact: Filtered ${removedCount} orphaned tool_result`);
2298
+ return filtered;
2299
+ }
2300
+ /** Ensure messages start with a user message */
2301
+ function ensureStartsWithUser(messages) {
2302
+ let startIndex = 0;
2303
+ while (startIndex < messages.length && messages[startIndex].role !== "user") startIndex++;
2304
+ if (startIndex > 0) consola.debug(`Auto-compact: Skipped ${startIndex} leading non-user messages`);
2305
+ return messages.slice(startIndex);
2270
2306
  }
2271
2307
  /**
2272
- * Find the optimal preserve index using binary search.
2273
- * This finds the point where we keep as many messages as possible
2274
- * while staying under both token and byte limits.
2308
+ * Find the optimal index from which to preserve messages.
2309
+ * Uses binary search with pre-calculated cumulative sums.
2310
+ * Returns the smallest index where the preserved portion fits within limits.
2275
2311
  */
2276
- function findOptimalPreserveIndex(messages, systemMessages, targetTokens, targetBytes) {
2312
+ function findOptimalPreserveIndex(params) {
2313
+ const { messages, systemBytes, systemTokens, payloadOverhead, tokenLimit, byteLimit } = params;
2277
2314
  if (messages.length === 0) return 0;
2278
- const systemTokens = systemMessages.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
2279
- const systemBytes = systemMessages.reduce((sum, msg) => sum + estimateMessageBytes(msg), 0);
2280
- const markerOverhead = 200;
2281
- const availableTokens = targetTokens - systemTokens - markerOverhead / 4;
2282
- const availableBytes = targetBytes - systemBytes - markerOverhead;
2283
- const cumulativeTokens = Array.from({ length: messages.length + 1 }, () => 0);
2284
- const cumulativeBytes = Array.from({ length: messages.length + 1 }, () => 0);
2285
- for (let i = messages.length - 1; i >= 0; i--) {
2315
+ const markerBytes = 200;
2316
+ const availableTokens = tokenLimit - systemTokens - 50;
2317
+ const availableBytes = byteLimit - payloadOverhead - systemBytes - markerBytes;
2318
+ if (availableTokens <= 0 || availableBytes <= 0) return messages.length;
2319
+ const n = messages.length;
2320
+ const cumTokens = Array.from({ length: n + 1 }, () => 0);
2321
+ const cumBytes = Array.from({ length: n + 1 }, () => 0);
2322
+ for (let i = n - 1; i >= 0; i--) {
2286
2323
  const msg = messages[i];
2287
- cumulativeTokens[i] = cumulativeTokens[i + 1] + estimateMessageTokens(msg);
2288
- cumulativeBytes[i] = cumulativeBytes[i + 1] + estimateMessageBytes(msg);
2324
+ cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens(msg);
2325
+ cumBytes[i] = cumBytes[i + 1] + getMessageBytes(msg) + 1;
2289
2326
  }
2290
2327
  let left = 0;
2291
- let right = messages.length;
2328
+ let right = n;
2292
2329
  while (left < right) {
2293
- const mid = Math.floor((left + right) / 2);
2294
- const tokensFromMid = cumulativeTokens[mid];
2295
- const bytesFromMid = cumulativeBytes[mid];
2296
- if (tokensFromMid <= availableTokens && bytesFromMid <= availableBytes) right = mid;
2330
+ const mid = left + right >>> 1;
2331
+ if (cumTokens[mid] <= availableTokens && cumBytes[mid] <= availableBytes) right = mid;
2297
2332
  else left = mid + 1;
2298
2333
  }
2299
2334
  return left;
2300
2335
  }
2301
2336
  /**
2302
- * Filter out orphaned tool_result messages that don't have a matching tool_use
2303
- * in the preserved message list. This prevents API errors when truncation
2304
- * separates tool_use/tool_result pairs.
2305
- */
2306
- function filterOrphanedToolResults(messages) {
2307
- const availableToolUseIds = /* @__PURE__ */ new Set();
2308
- for (const msg of messages) for (const id of getToolUseIds(msg)) availableToolUseIds.add(id);
2309
- const filteredMessages = [];
2310
- let removedCount = 0;
2311
- for (const msg of messages) {
2312
- if (msg.role === "tool" && msg.tool_call_id && !availableToolUseIds.has(msg.tool_call_id)) {
2313
- removedCount++;
2314
- continue;
2315
- }
2316
- filteredMessages.push(msg);
2317
- }
2318
- if (removedCount > 0) consola.info(`Auto-compact: Removed ${removedCount} orphaned tool_result message(s) without matching tool_use`);
2319
- return filteredMessages;
2320
- }
2321
- /**
2322
- * Ensure the message list starts with a user message.
2323
- * If it starts with assistant or tool messages, skip them until we find a user message.
2324
- * This is required because OpenAI API expects conversations to start with user messages
2325
- * (after system messages).
2337
+ * Check if payload needs compaction based on model limits or byte size.
2326
2338
  */
2327
- function ensureStartsWithUser(messages) {
2328
- let startIndex = 0;
2329
- while (startIndex < messages.length) {
2330
- if (messages[startIndex].role === "user") break;
2331
- startIndex++;
2332
- }
2333
- if (startIndex > 0) consola.info(`Auto-compact: Skipped ${startIndex} leading non-user message(s) to ensure valid sequence`);
2334
- return messages.slice(startIndex);
2339
+ async function checkNeedsCompaction(payload, model, config = {}) {
2340
+ const cfg = {
2341
+ ...DEFAULT_CONFIG,
2342
+ ...config
2343
+ };
2344
+ const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
2345
+ const currentTokens = (await getTokenCount(payload, model)).input;
2346
+ const currentBytes = JSON.stringify(payload).length;
2347
+ const exceedsTokens = currentTokens > tokenLimit;
2348
+ const exceedsBytes = currentBytes > byteLimit;
2349
+ let reason;
2350
+ if (exceedsTokens && exceedsBytes) reason = "both";
2351
+ else if (exceedsTokens) reason = "tokens";
2352
+ else if (exceedsBytes) reason = "bytes";
2353
+ return {
2354
+ needed: exceedsTokens || exceedsBytes,
2355
+ currentTokens,
2356
+ tokenLimit,
2357
+ currentBytes,
2358
+ byteLimit,
2359
+ reason
2360
+ };
2335
2361
  }
2336
- /**
2337
- * Create a truncation marker message.
2338
- */
2362
+ /** Create a truncation marker message */
2339
2363
  function createTruncationMarker(removedCount) {
2340
2364
  return {
2341
2365
  role: "user",
2342
- content: `[CONTEXT TRUNCATED: ${removedCount} earlier messages were removed to fit context limits. The conversation continues below.]`
2366
+ content: `[CONTEXT TRUNCATED: ${removedCount} earlier messages removed to fit context limits]`
2343
2367
  };
2344
2368
  }
2345
2369
  /**
2346
- * Perform auto-compaction on a payload that exceeds token or size limits.
2347
- * This uses simple truncation with binary search - no LLM calls required.
2348
- * The algorithm finds the optimal truncation point to maximize preserved messages
2349
- * while staying under both token and byte limits.
2370
+ * Perform auto-compaction on a payload that exceeds limits.
2371
+ * Uses binary search to find the optimal truncation point.
2350
2372
  */
2351
2373
  async function autoCompact(payload, model, config = {}) {
2352
2374
  const cfg = {
2353
2375
  ...DEFAULT_CONFIG,
2354
2376
  ...config
2355
2377
  };
2356
- const originalTokens = (await getTokenCount(payload, model)).input;
2357
- const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
2358
- const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
2378
+ const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
2359
2379
  const originalBytes = JSON.stringify(payload).length;
2360
- const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
2380
+ const originalTokens = (await getTokenCount(payload, model)).input;
2361
2381
  if (originalTokens <= tokenLimit && originalBytes <= byteLimit) return {
2362
2382
  payload,
2363
2383
  wasCompacted: false,
@@ -2371,12 +2391,23 @@ async function autoCompact(payload, model, config = {}) {
2371
2391
  if (exceedsTokens && exceedsBytes) reason = "tokens and size";
2372
2392
  else if (exceedsBytes) reason = "size";
2373
2393
  else reason = "tokens";
2374
- consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB), truncating...`);
2375
- const { systemMessages, remainingMessages } = extractSystemMessages(payload.messages);
2376
- consola.debug(`Auto-compact: ${systemMessages.length} system messages, ${remainingMessages.length} conversation messages`);
2377
- const preserveIndex = findOptimalPreserveIndex(remainingMessages, systemMessages, tokenLimit, byteLimit);
2394
+ consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB)`);
2395
+ const { systemMessages, conversationMessages } = extractSystemMessages(payload.messages);
2396
+ const messagesJson = JSON.stringify(payload.messages);
2397
+ const payloadOverhead = originalBytes - messagesJson.length;
2398
+ const systemBytes = systemMessages.reduce((sum, m) => sum + getMessageBytes(m) + 1, 0);
2399
+ const systemTokens = systemMessages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
2400
+ consola.debug(`Auto-compact: overhead=${Math.round(payloadOverhead / 1024)}KB, system=${systemMessages.length} msgs (${Math.round(systemBytes / 1024)}KB)`);
2401
+ const preserveIndex = findOptimalPreserveIndex({
2402
+ messages: conversationMessages,
2403
+ systemBytes,
2404
+ systemTokens,
2405
+ payloadOverhead,
2406
+ tokenLimit,
2407
+ byteLimit
2408
+ });
2378
2409
  if (preserveIndex === 0) {
2379
- consola.warn("Auto-compact: Cannot truncate without losing all conversation history");
2410
+ consola.warn("Auto-compact: Cannot truncate, system messages too large");
2380
2411
  return {
2381
2412
  payload,
2382
2413
  wasCompacted: false,
@@ -2385,8 +2416,8 @@ async function autoCompact(payload, model, config = {}) {
2385
2416
  removedMessageCount: 0
2386
2417
  };
2387
2418
  }
2388
- if (preserveIndex >= remainingMessages.length) {
2389
- consola.warn("Auto-compact: Would need to remove all messages, cannot compact");
2419
+ if (preserveIndex >= conversationMessages.length) {
2420
+ consola.warn("Auto-compact: Would need to remove all messages");
2390
2421
  return {
2391
2422
  payload,
2392
2423
  wasCompacted: false,
@@ -2395,13 +2426,12 @@ async function autoCompact(payload, model, config = {}) {
2395
2426
  removedMessageCount: 0
2396
2427
  };
2397
2428
  }
2398
- const removedMessages = remainingMessages.slice(0, preserveIndex);
2399
- let preservedMessages = remainingMessages.slice(preserveIndex);
2400
- preservedMessages = filterOrphanedToolResults(preservedMessages);
2401
- preservedMessages = ensureStartsWithUser(preservedMessages);
2402
- preservedMessages = filterOrphanedToolResults(preservedMessages);
2403
- if (preservedMessages.length === 0) {
2404
- consola.warn("Auto-compact: All messages were filtered out after cleanup, cannot compact");
2429
+ let preserved = conversationMessages.slice(preserveIndex);
2430
+ preserved = filterOrphanedToolResults(preserved);
2431
+ preserved = ensureStartsWithUser(preserved);
2432
+ preserved = filterOrphanedToolResults(preserved);
2433
+ if (preserved.length === 0) {
2434
+ consola.warn("Auto-compact: All messages filtered out after cleanup");
2405
2435
  return {
2406
2436
  payload,
2407
2437
  wasCompacted: false,
@@ -2410,29 +2440,30 @@ async function autoCompact(payload, model, config = {}) {
2410
2440
  removedMessageCount: 0
2411
2441
  };
2412
2442
  }
2413
- consola.debug(`Auto-compact: Removing ${removedMessages.length} messages, keeping ${preservedMessages.length}`);
2414
- const truncationMarker = createTruncationMarker(removedMessages.length);
2443
+ const removedCount = conversationMessages.length - preserved.length;
2444
+ const marker = createTruncationMarker(removedCount);
2415
2445
  const newPayload = {
2416
2446
  ...payload,
2417
2447
  messages: [
2418
2448
  ...systemMessages,
2419
- truncationMarker,
2420
- ...preservedMessages
2449
+ marker,
2450
+ ...preserved
2421
2451
  ]
2422
2452
  };
2423
- const newTokenCount = await getTokenCount(newPayload, model);
2424
2453
  const newBytes = JSON.stringify(newPayload).length;
2425
- consola.info(`Auto-compact: ${originalTokens} → ${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(newBytes / 1024)}KB (removed ${removedMessages.length} messages)`);
2454
+ const newTokenCount = await getTokenCount(newPayload, model);
2455
+ consola.info(`Auto-compact: ${originalTokens} → ${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(newBytes / 1024)}KB (removed ${removedCount} messages)`);
2456
+ if (newBytes > byteLimit) consola.warn(`Auto-compact: Result still over byte limit (${Math.round(newBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB)`);
2426
2457
  return {
2427
2458
  payload: newPayload,
2428
2459
  wasCompacted: true,
2429
2460
  originalTokens,
2430
2461
  compactedTokens: newTokenCount.input,
2431
- removedMessageCount: removedMessages.length
2462
+ removedMessageCount: removedCount
2432
2463
  };
2433
2464
  }
2434
2465
  /**
2435
- * Create a marker to append to responses indicating auto-compaction occurred.
2466
+ * Create a marker to prepend to responses indicating auto-compaction occurred.
2436
2467
  */
2437
2468
  function createCompactionMarker(result) {
2438
2469
  if (!result.wasCompacted) return "";
@@ -4129,16 +4160,33 @@ function translateAnthropicMessagesToOpenAI(anthropicMessages, system, toolNameM
4129
4160
  const otherMessages = anthropicMessages.flatMap((message) => message.role === "user" ? handleUserMessage(message) : handleAssistantMessage(message, toolNameMapping));
4130
4161
  return [...systemMessages, ...otherMessages];
4131
4162
  }
4163
+ const RESERVED_KEYWORDS = ["x-anthropic-billing-header"];
4164
+ /**
4165
+ * Filter out reserved keywords from system prompt text.
4166
+ * Copilot API rejects requests containing these keywords.
4167
+ * Removes the entire line containing the keyword to keep the prompt clean.
4168
+ */
4169
+ function filterReservedKeywords(text) {
4170
+ let filtered = text;
4171
+ for (const keyword of RESERVED_KEYWORDS) if (text.includes(keyword)) {
4172
+ consola.debug(`[Reserved Keyword] Removing line containing "${keyword}"`);
4173
+ filtered = filtered.split("\n").filter((line) => !line.includes(keyword)).join("\n");
4174
+ }
4175
+ return filtered;
4176
+ }
4132
4177
  function handleSystemPrompt(system) {
4133
4178
  if (!system) return [];
4134
4179
  if (typeof system === "string") return [{
4135
4180
  role: "system",
4136
- content: system
4137
- }];
4138
- else return [{
4139
- role: "system",
4140
- content: system.map((block) => block.text).join("\n\n")
4181
+ content: filterReservedKeywords(system)
4141
4182
  }];
4183
+ else {
4184
+ const systemText = system.map((block) => block.text).join("\n\n");
4185
+ return [{
4186
+ role: "system",
4187
+ content: filterReservedKeywords(systemText)
4188
+ }];
4189
+ }
4142
4190
  }
4143
4191
  function handleUserMessage(message) {
4144
4192
  const newMessages = [];
@@ -4983,7 +5031,7 @@ async function runServer(options) {
4983
5031
  consecutiveSuccessesForRecovery: options.consecutiveSuccesses
4984
5032
  });
4985
5033
  else consola.info("Rate limiting disabled");
4986
- if (options.autoCompact) consola.info("Auto-compact enabled: will compress context when exceeding token limits");
5034
+ if (!options.autoCompact) consola.info("Auto-compact disabled");
4987
5035
  initHistory(options.history, options.historyLimit);
4988
5036
  if (options.history) {
4989
5037
  const limitText = options.historyLimit === 0 ? "unlimited" : `max ${options.historyLimit}`;
@@ -5125,10 +5173,10 @@ const start = defineCommand({
5125
5173
  default: "1000",
5126
5174
  description: "Maximum number of history entries to keep in memory (0 = unlimited)"
5127
5175
  },
5128
- "auto-compact": {
5176
+ "no-auto-compact": {
5129
5177
  type: "boolean",
5130
5178
  default: false,
5131
- description: "Automatically compress conversation history when exceeding model token limits"
5179
+ description: "Disable automatic conversation history compression when exceeding limits"
5132
5180
  }
5133
5181
  },
5134
5182
  run({ args }) {
@@ -5149,7 +5197,7 @@ const start = defineCommand({
5149
5197
  proxyEnv: args["proxy-env"],
5150
5198
  history: !args["no-history"],
5151
5199
  historyLimit: Number.parseInt(args["history-limit"], 10),
5152
- autoCompact: args["auto-compact"]
5200
+ autoCompact: !args["no-auto-compact"]
5153
5201
  });
5154
5202
  }
5155
5203
  });