@hsupu/copilot-api 0.7.9 → 0.7.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -17,6 +17,7 @@ import pc from "picocolors";
17
17
  import { Hono } from "hono";
18
18
  import { cors } from "hono/cors";
19
19
  import { streamSSE } from "hono/streaming";
20
+ import { countTokens } from "@anthropic-ai/tokenizer";
20
21
  import { events } from "fetch-event-stream";
21
22
 
22
23
  //#region src/lib/paths.ts
@@ -46,7 +47,9 @@ const state = {
46
47
  accountType: "individual",
47
48
  manualApprove: false,
48
49
  showToken: false,
49
- autoCompact: true
50
+ verbose: false,
51
+ autoTruncate: true,
52
+ directAnthropicApi: true
50
53
  };
51
54
 
52
55
  //#endregion
@@ -480,9 +483,23 @@ async function checkTokenExists() {
480
483
  return false;
481
484
  }
482
485
  }
483
- async function getDebugInfo() {
486
+ async function getAccountInfo() {
487
+ try {
488
+ await ensurePaths();
489
+ await setupGitHubToken();
490
+ if (!state.githubToken) return null;
491
+ const [user, copilot] = await Promise.all([getGitHubUser(), getCopilotUsage()]);
492
+ return {
493
+ user,
494
+ copilot
495
+ };
496
+ } catch {
497
+ return null;
498
+ }
499
+ }
500
+ async function getDebugInfo(includeAccount) {
484
501
  const [version$1, tokenExists] = await Promise.all([getPackageVersion(), checkTokenExists()]);
485
- return {
502
+ const info = {
486
503
  version: version$1,
487
504
  runtime: getRuntimeInfo(),
488
505
  paths: {
@@ -491,9 +508,14 @@ async function getDebugInfo() {
491
508
  },
492
509
  tokenExists
493
510
  };
511
+ if (includeAccount && tokenExists) {
512
+ const account = await getAccountInfo();
513
+ if (account) info.account = account;
514
+ }
515
+ return info;
494
516
  }
495
517
  function printDebugInfoPlain(info) {
496
- consola.info(`copilot-api debug
518
+ let output = `copilot-api debug
497
519
 
498
520
  Version: ${info.version}
499
521
  Runtime: ${info.runtime.name} ${info.runtime.version} (${info.runtime.platform} ${info.runtime.arch})
@@ -502,19 +524,24 @@ Paths:
502
524
  - APP_DIR: ${info.paths.APP_DIR}
503
525
  - GITHUB_TOKEN_PATH: ${info.paths.GITHUB_TOKEN_PATH}
504
526
 
505
- Token exists: ${info.tokenExists ? "Yes" : "No"}`);
527
+ Token exists: ${info.tokenExists ? "Yes" : "No"}`;
528
+ if (info.account) output += `
529
+
530
+ Account Info:
531
+ ${JSON.stringify(info.account, null, 2)}`;
532
+ consola.info(output);
506
533
  }
507
534
  function printDebugInfoJson(info) {
508
535
  console.log(JSON.stringify(info, null, 2));
509
536
  }
510
537
  async function runDebug(options) {
511
- const debugInfo = await getDebugInfo();
512
- if (options.json) printDebugInfoJson(debugInfo);
513
- else printDebugInfoPlain(debugInfo);
538
+ const debugInfo$1 = await getDebugInfo(true);
539
+ if (options.json) printDebugInfoJson(debugInfo$1);
540
+ else printDebugInfoPlain(debugInfo$1);
514
541
  }
515
- const debug = defineCommand({
542
+ const debugInfo = defineCommand({
516
543
  meta: {
517
- name: "debug",
544
+ name: "info",
518
545
  description: "Print debug information about the application"
519
546
  },
520
547
  args: { json: {
@@ -526,6 +553,48 @@ const debug = defineCommand({
526
553
  return runDebug({ json: args.json });
527
554
  }
528
555
  });
556
+ const debugModels = defineCommand({
557
+ meta: {
558
+ name: "models",
559
+ description: "Fetch and display raw model data from Copilot API"
560
+ },
561
+ args: {
562
+ "account-type": {
563
+ type: "string",
564
+ alias: "a",
565
+ default: "individual",
566
+ description: "The type of GitHub account (individual, business, enterprise)"
567
+ },
568
+ "github-token": {
569
+ type: "string",
570
+ alias: "g",
571
+ description: "GitHub token to use (skips interactive auth)"
572
+ }
573
+ },
574
+ async run({ args }) {
575
+ state.accountType = args["account-type"];
576
+ await ensurePaths();
577
+ if (args["github-token"]) {
578
+ state.githubToken = args["github-token"];
579
+ consola.info("Using provided GitHub token");
580
+ } else await setupGitHubToken();
581
+ const { token } = await getCopilotToken();
582
+ state.copilotToken = token;
583
+ consola.info("Fetching models from Copilot API...");
584
+ const models = await getModels();
585
+ console.log(JSON.stringify(models, null, 2));
586
+ }
587
+ });
588
+ const debug = defineCommand({
589
+ meta: {
590
+ name: "debug",
591
+ description: "Debug commands for troubleshooting"
592
+ },
593
+ subCommands: {
594
+ info: debugInfo,
595
+ models: debugModels
596
+ }
597
+ });
529
598
 
530
599
  //#endregion
531
600
  //#region src/logout.ts
@@ -558,15 +627,12 @@ const SUPPORTED_VERSIONS = {
558
627
  min: "2.0.0",
559
628
  max: "2.1.10"
560
629
  },
561
- v2b: {
562
- min: "2.1.11",
563
- max: "2.1.12"
564
- }
630
+ v2b: { min: "2.1.11" }
565
631
  };
566
632
  const PATTERNS = {
567
633
  funcOriginal: /function HR\(A\)\{if\(A\.includes\("\[1m\]"\)\)return 1e6;return 200000\}/,
568
634
  funcPatched: /function HR\(A\)\{if\(A\.includes\("\[1m\]"\)\)return 1e6;return \d+\}/,
569
- variable: /var BS9=(\d+)/
635
+ variable: /var ([A-Za-z_$]\w*)=(\d+)(?=,\w+=20000,)/
570
636
  };
571
637
  /**
572
638
  * Parse semver version string to comparable parts
@@ -592,14 +658,14 @@ function compareVersions(a, b) {
592
658
  }
593
659
  function getPatternTypeForVersion(version$1) {
594
660
  if (compareVersions(version$1, SUPPORTED_VERSIONS.v2a.min) >= 0 && compareVersions(version$1, SUPPORTED_VERSIONS.v2a.max) <= 0) return "func";
595
- if (compareVersions(version$1, SUPPORTED_VERSIONS.v2b.min) >= 0 && compareVersions(version$1, SUPPORTED_VERSIONS.v2b.max) <= 0) return "variable";
661
+ if (compareVersions(version$1, SUPPORTED_VERSIONS.v2b.min) >= 0) return "variable";
596
662
  return null;
597
663
  }
598
664
  /**
599
665
  * Get supported version range string for error messages
600
666
  */
601
667
  function getSupportedRangeString() {
602
- return `${SUPPORTED_VERSIONS.v2a.min}-${SUPPORTED_VERSIONS.v2a.max}, ${SUPPORTED_VERSIONS.v2b.min}-${SUPPORTED_VERSIONS.v2b.max}`;
668
+ return `${SUPPORTED_VERSIONS.v2a.min}-${SUPPORTED_VERSIONS.v2a.max}, ${SUPPORTED_VERSIONS.v2b.min}+`;
603
669
  }
604
670
  /**
605
671
  * Get Claude Code version from package.json
@@ -632,9 +698,9 @@ function findInVoltaTools(voltaHome) {
632
698
  return paths;
633
699
  }
634
700
  /**
635
- * Find Claude Code CLI path by checking common locations
701
+ * Find all Claude Code CLI paths by checking common locations
636
702
  */
637
- function findClaudeCodePath() {
703
+ function findAllClaudeCodePaths() {
638
704
  const possiblePaths = [];
639
705
  const home = process.env.HOME || "";
640
706
  const voltaHome = process.env.VOLTA_HOME || join(home, ".volta");
@@ -649,22 +715,41 @@ function findClaudeCodePath() {
649
715
  for (const base of globalPaths) possiblePaths.push(join(base, "@anthropic-ai", "claude-code", "cli.js"));
650
716
  const bunGlobal = join(home, ".bun", "install", "global");
651
717
  if (existsSync(bunGlobal)) possiblePaths.push(join(bunGlobal, "node_modules", "@anthropic-ai", "claude-code", "cli.js"));
652
- return possiblePaths.find((p) => existsSync(p)) ?? null;
718
+ return [...new Set(possiblePaths.filter((p) => existsSync(p)))];
653
719
  }
654
720
  /**
655
- * Get current context limit from Claude Code
721
+ * Get installation info for a CLI path
656
722
  */
657
- function getCurrentLimit(content) {
723
+ function getInstallationInfo(cliPath) {
724
+ const version$1 = getClaudeCodeVersion(cliPath);
725
+ const content = readFileSync(cliPath, "utf8");
726
+ const limit = getCurrentLimit(content);
727
+ return {
728
+ path: cliPath,
729
+ version: version$1,
730
+ limit
731
+ };
732
+ }
733
+ function getCurrentLimitInfo(content) {
658
734
  const varMatch = content.match(PATTERNS.variable);
659
- if (varMatch) return Number.parseInt(varMatch[1], 10);
735
+ if (varMatch) return {
736
+ limit: Number.parseInt(varMatch[2], 10),
737
+ varName: varMatch[1]
738
+ };
660
739
  const funcMatch = content.match(PATTERNS.funcPatched);
661
740
  if (funcMatch) {
662
741
  const limitMatch = funcMatch[0].match(/return (\d+)\}$/);
663
- return limitMatch ? Number.parseInt(limitMatch[1], 10) : null;
742
+ return limitMatch ? { limit: Number.parseInt(limitMatch[1], 10) } : null;
664
743
  }
665
744
  return null;
666
745
  }
667
746
  /**
747
+ * Get current context limit from Claude Code (legacy wrapper)
748
+ */
749
+ function getCurrentLimit(content) {
750
+ return getCurrentLimitInfo(content)?.limit ?? null;
751
+ }
752
+ /**
668
753
  * Check if Claude Code version is supported for patching
669
754
  */
670
755
  function checkVersionSupport(cliPath) {
@@ -696,22 +781,25 @@ function patchClaudeCode(cliPath, newLimit) {
696
781
  const versionCheck = checkVersionSupport(cliPath);
697
782
  if (!versionCheck.supported) {
698
783
  consola.error(versionCheck.error);
699
- return false;
784
+ return "failed";
700
785
  }
701
786
  consola.info(`Claude Code version: ${versionCheck.version}`);
702
- if (getCurrentLimit(content) === newLimit) {
703
- consola.info(`Already patched with limit ${newLimit}`);
704
- return true;
705
- }
787
+ const limitInfo = getCurrentLimitInfo(content);
788
+ if (limitInfo?.limit === newLimit) return "already_patched";
706
789
  let newContent;
707
- if (versionCheck.patternType === "variable") newContent = content.replace(PATTERNS.variable, `var BS9=${newLimit}`);
708
- else {
790
+ if (versionCheck.patternType === "variable") {
791
+ if (!limitInfo?.varName) {
792
+ consola.error("Could not detect variable name for patching");
793
+ return "failed";
794
+ }
795
+ newContent = content.replace(PATTERNS.variable, `var ${limitInfo.varName}=${newLimit}`);
796
+ } else {
709
797
  const replacement = `function HR(A){if(A.includes("[1m]"))return 1e6;return ${newLimit}}`;
710
798
  const pattern = PATTERNS.funcOriginal.test(content) ? PATTERNS.funcOriginal : PATTERNS.funcPatched;
711
799
  newContent = content.replace(pattern, replacement);
712
800
  }
713
801
  writeFileSync(cliPath, newContent);
714
- return true;
802
+ return "success";
715
803
  }
716
804
  /**
717
805
  * Restore Claude Code to original 200k limit
@@ -724,13 +812,19 @@ function restoreClaudeCode(cliPath) {
724
812
  return false;
725
813
  }
726
814
  consola.info(`Claude Code version: ${versionCheck.version}`);
727
- if (getCurrentLimit(content) === 2e5) {
815
+ const limitInfo = getCurrentLimitInfo(content);
816
+ if (limitInfo?.limit === 2e5) {
728
817
  consola.info("Already at original 200000 limit");
729
818
  return true;
730
819
  }
731
820
  let newContent;
732
- if (versionCheck.patternType === "variable") newContent = content.replace(PATTERNS.variable, "var BS9=200000");
733
- else newContent = content.replace(PATTERNS.funcPatched, "function HR(A){if(A.includes(\"[1m]\"))return 1e6;return 200000}");
821
+ if (versionCheck.patternType === "variable") {
822
+ if (!limitInfo?.varName) {
823
+ consola.error("Could not detect variable name for restoring");
824
+ return false;
825
+ }
826
+ newContent = content.replace(PATTERNS.variable, `var ${limitInfo.varName}=200000`);
827
+ } else newContent = content.replace(PATTERNS.funcPatched, "function HR(A){if(A.includes(\"[1m]\"))return 1e6;return 200000}");
734
828
  writeFileSync(cliPath, newContent);
735
829
  return true;
736
830
  }
@@ -739,7 +833,7 @@ function showStatus(cliPath, currentLimit) {
739
833
  if (version$1) consola.info(`Claude Code version: ${version$1}`);
740
834
  if (currentLimit === null) {
741
835
  consola.warn("Could not detect current limit - CLI may have been updated");
742
- consola.info("Look for the BS9 variable or HR function pattern in cli.js");
836
+ consola.info("Look for a variable like 'var XXX=200000' followed by ',YYY=20000,' in cli.js");
743
837
  } else if (currentLimit === 2e5) consola.info("Status: Original (200k context window)");
744
838
  else consola.info(`Status: Patched (${currentLimit} context window)`);
745
839
  }
@@ -773,17 +867,42 @@ const patchClaude = defineCommand({
773
867
  description: "Show current patch status without modifying"
774
868
  }
775
869
  },
776
- run({ args }) {
777
- const cliPath = args.path || findClaudeCodePath();
778
- if (!cliPath) {
779
- consola.error("Could not find Claude Code installation");
780
- consola.info("Searched in: volta, npm global, bun global");
781
- consola.info("Use --path to specify the path to cli.js manually");
782
- process.exit(1);
783
- }
784
- if (!existsSync(cliPath)) {
785
- consola.error(`File not found: ${cliPath}`);
786
- process.exit(1);
870
+ async run({ args }) {
871
+ let cliPath;
872
+ if (args.path) {
873
+ cliPath = args.path;
874
+ if (!existsSync(cliPath)) {
875
+ consola.error(`File not found: ${cliPath}`);
876
+ process.exit(1);
877
+ }
878
+ } else {
879
+ const installations = findAllClaudeCodePaths();
880
+ if (installations.length === 0) {
881
+ consola.error("Could not find Claude Code installation");
882
+ consola.info("Searched in: volta, npm global, bun global");
883
+ consola.info("Use --path to specify the path to cli.js manually");
884
+ process.exit(1);
885
+ }
886
+ if (installations.length === 1) cliPath = installations[0];
887
+ else {
888
+ consola.info(`Found ${installations.length} Claude Code installations:`);
889
+ const options = installations.map((path$1) => {
890
+ const info = getInstallationInfo(path$1);
891
+ let status = "unknown";
892
+ if (info.limit === 2e5) status = "original";
893
+ else if (info.limit) status = `patched: ${info.limit}`;
894
+ return {
895
+ label: `v${info.version ?? "?"} (${status}) - ${path$1}`,
896
+ value: path$1
897
+ };
898
+ });
899
+ const selected = await consola.prompt("Select installation to patch:", {
900
+ type: "select",
901
+ options
902
+ });
903
+ if (typeof selected === "symbol") process.exit(0);
904
+ cliPath = selected;
905
+ }
787
906
  }
788
907
  consola.info(`Claude Code path: ${cliPath}`);
789
908
  const content = readFileSync(cliPath, "utf8");
@@ -806,13 +925,14 @@ const patchClaude = defineCommand({
806
925
  consola.error("Invalid limit value. Must be a number >= 1000");
807
926
  process.exit(1);
808
927
  }
809
- if (patchClaudeCode(cliPath, limit)) {
810
- consola.success(`Patched context window: 200000 → ${limit}`);
928
+ const result = patchClaudeCode(cliPath, limit);
929
+ if (result === "success") {
930
+ consola.success(`Patched context window: ${currentLimit ?? 2e5} → ${limit}`);
811
931
  consola.info("Note: You may need to re-run this after Claude Code updates");
812
- } else {
932
+ } else if (result === "already_patched") consola.success(`Already patched with limit ${limit}`);
933
+ else {
813
934
  consola.error("Failed to patch - pattern not found");
814
935
  consola.info("Claude Code may have been updated to a new version");
815
- consola.info("Check the cli.js for the HR function pattern");
816
936
  process.exit(1);
817
937
  }
818
938
  }
@@ -821,7 +941,7 @@ const patchClaude = defineCommand({
821
941
  //#endregion
822
942
  //#region package.json
823
943
  var name = "@hsupu/copilot-api";
824
- var version = "0.7.9";
944
+ var version = "0.7.11";
825
945
  var description = "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!";
826
946
  var keywords = [
827
947
  "proxy",
@@ -854,6 +974,7 @@ var scripts = {
854
974
  var simple_git_hooks = { "pre-commit": "bun x lint-staged" };
855
975
  var lint_staged = { "*": "bun run lint --fix" };
856
976
  var dependencies = {
977
+ "@anthropic-ai/tokenizer": "^0.0.4",
857
978
  "citty": "^0.1.6",
858
979
  "clipboardy": "^5.0.0",
859
980
  "consola": "^3.4.2",
@@ -2155,9 +2276,61 @@ const numTokensForTools = (tools, encoder, constants) => {
2155
2276
  return funcTokenCount;
2156
2277
  };
2157
2278
  /**
2158
- * Calculate the token count of messages, supporting multiple GPT encoders
2279
+ * Check if a model is an Anthropic model
2280
+ */
2281
+ function isAnthropicModel(model) {
2282
+ return model.vendor === "Anthropic";
2283
+ }
2284
+ /**
2285
+ * Convert a message to plain text for Anthropic tokenizer
2286
+ */
2287
+ function messageToText(message) {
2288
+ const parts = [];
2289
+ parts.push(`${message.role}:`);
2290
+ if (typeof message.content === "string") parts.push(message.content);
2291
+ else if (Array.isArray(message.content)) {
2292
+ for (const part of message.content) if ("text" in part && part.text) parts.push(part.text);
2293
+ else if (part.type === "image_url") parts.push("[image]");
2294
+ }
2295
+ if (message.tool_calls) for (const tc of message.tool_calls) parts.push(JSON.stringify(tc));
2296
+ if ("tool_call_id" in message && message.tool_call_id) parts.push(`tool_call_id:${message.tool_call_id}`);
2297
+ return parts.join("\n");
2298
+ }
2299
+ /**
2300
+ * Convert tools to text for Anthropic tokenizer
2301
+ */
2302
+ function toolsToText(tools) {
2303
+ return tools.map((tool) => JSON.stringify(tool)).join("\n");
2304
+ }
2305
+ /**
2306
+ * Calculate token count using Anthropic's official tokenizer
2307
+ */
2308
+ function getAnthropicTokenCount(payload) {
2309
+ const inputMessages = payload.messages.filter((msg) => msg.role !== "assistant");
2310
+ const outputMessages = payload.messages.filter((msg) => msg.role === "assistant");
2311
+ const inputText = inputMessages.map((msg) => messageToText(msg)).join("\n\n");
2312
+ const outputText = outputMessages.map((msg) => messageToText(msg)).join("\n\n");
2313
+ let inputTokens = countTokens(inputText);
2314
+ let outputTokens = countTokens(outputText);
2315
+ if (payload.tools && payload.tools.length > 0) {
2316
+ const toolsText = toolsToText(payload.tools);
2317
+ inputTokens += countTokens(toolsText);
2318
+ }
2319
+ inputTokens += inputMessages.length * 3;
2320
+ outputTokens += outputMessages.length * 3;
2321
+ inputTokens += 3;
2322
+ return {
2323
+ input: inputTokens,
2324
+ output: outputTokens
2325
+ };
2326
+ }
2327
+ /**
2328
+ * Calculate the token count of messages.
2329
+ * Uses Anthropic's official tokenizer for Anthropic models,
2330
+ * and GPT tokenizers for other models.
2159
2331
  */
2160
2332
  const getTokenCount = async (payload, model) => {
2333
+ if (isAnthropicModel(model)) return getAnthropicTokenCount(payload);
2161
2334
  const tokenizer = getTokenizerFromModel(model);
2162
2335
  const encoder = await getEncodeChatFunction(tokenizer);
2163
2336
  const simplifiedMessages = payload.messages;
@@ -2174,10 +2347,10 @@ const getTokenCount = async (payload, model) => {
2174
2347
  };
2175
2348
 
2176
2349
  //#endregion
2177
- //#region src/lib/auto-compact.ts
2350
+ //#region src/lib/auto-truncate.ts
2178
2351
  const DEFAULT_CONFIG = {
2179
2352
  safetyMarginPercent: 2,
2180
- maxRequestBodyBytes: 500 * 1024
2353
+ maxRequestBodyBytes: 510 * 1024
2181
2354
  };
2182
2355
  /** Dynamic byte limit that adjusts based on 413 errors */
2183
2356
  let dynamicByteLimit = null;
@@ -2187,7 +2360,7 @@ let dynamicByteLimit = null;
2187
2360
  function onRequestTooLarge(failingBytes) {
2188
2361
  const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
2189
2362
  dynamicByteLimit = newLimit;
2190
- consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed → ${Math.round(newLimit / 1024)}KB`);
2363
+ consola.info(`[AutoTruncate] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed → ${Math.round(newLimit / 1024)}KB`);
2191
2364
  }
2192
2365
  function calculateLimits(model, config) {
2193
2366
  const rawTokenLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
@@ -2243,14 +2416,14 @@ function filterOrphanedToolResults(messages) {
2243
2416
  }
2244
2417
  return true;
2245
2418
  });
2246
- if (removedCount > 0) consola.debug(`Auto-compact: Filtered ${removedCount} orphaned tool_result`);
2419
+ if (removedCount > 0) consola.debug(`[AutoTruncate] Filtered ${removedCount} orphaned tool_result`);
2247
2420
  return filtered;
2248
2421
  }
2249
2422
  /** Ensure messages start with a user message */
2250
2423
  function ensureStartsWithUser(messages) {
2251
2424
  let startIndex = 0;
2252
2425
  while (startIndex < messages.length && messages[startIndex].role !== "user") startIndex++;
2253
- if (startIndex > 0) consola.debug(`Auto-compact: Skipped ${startIndex} leading non-user messages`);
2426
+ if (startIndex > 0) consola.debug(`[AutoTruncate] Skipped ${startIndex} leading non-user messages`);
2254
2427
  return messages.slice(startIndex);
2255
2428
  }
2256
2429
  /**
@@ -2316,10 +2489,10 @@ function createTruncationMarker(removedCount) {
2316
2489
  };
2317
2490
  }
2318
2491
  /**
2319
- * Perform auto-compaction on a payload that exceeds limits.
2492
+ * Perform auto-truncation on a payload that exceeds limits.
2320
2493
  * Uses binary search to find the optimal truncation point.
2321
2494
  */
2322
- async function autoCompact(payload, model, config = {}) {
2495
+ async function autoTruncate(payload, model, config = {}) {
2323
2496
  const cfg = {
2324
2497
  ...DEFAULT_CONFIG,
2325
2498
  ...config
@@ -2340,13 +2513,13 @@ async function autoCompact(payload, model, config = {}) {
2340
2513
  if (exceedsTokens && exceedsBytes) reason = "tokens and size";
2341
2514
  else if (exceedsBytes) reason = "size";
2342
2515
  else reason = "tokens";
2343
- consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB)`);
2516
+ consola.info(`[AutoTruncate] Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB)`);
2344
2517
  const { systemMessages, conversationMessages } = extractSystemMessages(payload.messages);
2345
2518
  const messagesJson = JSON.stringify(payload.messages);
2346
2519
  const payloadOverhead = originalBytes - messagesJson.length;
2347
2520
  const systemBytes = systemMessages.reduce((sum, m) => sum + getMessageBytes(m) + 1, 0);
2348
2521
  const systemTokens = systemMessages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
2349
- consola.debug(`Auto-compact: overhead=${Math.round(payloadOverhead / 1024)}KB, system=${systemMessages.length} msgs (${Math.round(systemBytes / 1024)}KB)`);
2522
+ consola.debug(`[AutoTruncate] overhead=${Math.round(payloadOverhead / 1024)}KB, system=${systemMessages.length} msgs (${Math.round(systemBytes / 1024)}KB)`);
2350
2523
  const preserveIndex = findOptimalPreserveIndex({
2351
2524
  messages: conversationMessages,
2352
2525
  systemBytes,
@@ -2356,7 +2529,7 @@ async function autoCompact(payload, model, config = {}) {
2356
2529
  byteLimit
2357
2530
  });
2358
2531
  if (preserveIndex === 0) {
2359
- consola.warn("Auto-compact: Cannot truncate, system messages too large");
2532
+ consola.warn("[AutoTruncate] Cannot truncate, system messages too large");
2360
2533
  return {
2361
2534
  payload,
2362
2535
  wasCompacted: false,
@@ -2366,7 +2539,7 @@ async function autoCompact(payload, model, config = {}) {
2366
2539
  };
2367
2540
  }
2368
2541
  if (preserveIndex >= conversationMessages.length) {
2369
- consola.warn("Auto-compact: Would need to remove all messages");
2542
+ consola.warn("[AutoTruncate] Would need to remove all messages");
2370
2543
  return {
2371
2544
  payload,
2372
2545
  wasCompacted: false,
@@ -2380,7 +2553,7 @@ async function autoCompact(payload, model, config = {}) {
2380
2553
  preserved = ensureStartsWithUser(preserved);
2381
2554
  preserved = filterOrphanedToolResults(preserved);
2382
2555
  if (preserved.length === 0) {
2383
- consola.warn("Auto-compact: All messages filtered out after cleanup");
2556
+ consola.warn("[AutoTruncate] All messages filtered out after cleanup");
2384
2557
  return {
2385
2558
  payload,
2386
2559
  wasCompacted: false,
@@ -2401,8 +2574,8 @@ async function autoCompact(payload, model, config = {}) {
2401
2574
  };
2402
2575
  const newBytes = JSON.stringify(newPayload).length;
2403
2576
  const newTokenCount = await getTokenCount(newPayload, model);
2404
- consola.info(`Auto-compact: ${originalTokens} → ${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(newBytes / 1024)}KB (removed ${removedCount} messages)`);
2405
- if (newBytes > byteLimit) consola.warn(`Auto-compact: Result still over byte limit (${Math.round(newBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB)`);
2577
+ consola.info(`[AutoTruncate] ${originalTokens} → ${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(newBytes / 1024)}KB (removed ${removedCount} messages)`);
2578
+ if (newBytes > byteLimit) consola.warn(`[AutoTruncate] Result still over byte limit (${Math.round(newBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB)`);
2406
2579
  return {
2407
2580
  payload: newPayload,
2408
2581
  wasCompacted: true,
@@ -2412,13 +2585,13 @@ async function autoCompact(payload, model, config = {}) {
2412
2585
  };
2413
2586
  }
2414
2587
  /**
2415
- * Create a marker to prepend to responses indicating auto-compaction occurred.
2588
+ * Create a marker to prepend to responses indicating auto-truncation occurred.
2416
2589
  */
2417
- function createCompactionMarker(result) {
2590
+ function createTruncationResponseMarker(result) {
2418
2591
  if (!result.wasCompacted) return "";
2419
2592
  const reduction = result.originalTokens - result.compactedTokens;
2420
2593
  const percentage = Math.round(reduction / result.originalTokens * 100);
2421
- return `\n\n---\n[Auto-compacted: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
2594
+ return `\n\n---\n[Auto-truncated: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
2422
2595
  }
2423
2596
 
2424
2597
  //#endregion
@@ -2506,37 +2679,37 @@ function recordStreamError(opts) {
2506
2679
  function isNonStreaming(response) {
2507
2680
  return Object.hasOwn(response, "choices");
2508
2681
  }
2509
- /** Build final payload with auto-compact if needed */
2682
+ /** Build final payload with auto-truncate if needed */
2510
2683
  async function buildFinalPayload(payload, model) {
2511
- if (!state.autoCompact || !model) {
2512
- if (state.autoCompact && !model) consola.warn(`Auto-compact: Model '${payload.model}' not found in cached models, skipping`);
2684
+ if (!state.autoTruncate || !model) {
2685
+ if (state.autoTruncate && !model) consola.warn(`Auto-truncate: Model '${payload.model}' not found in cached models, skipping`);
2513
2686
  return {
2514
2687
  finalPayload: payload,
2515
- compactResult: null
2688
+ truncateResult: null
2516
2689
  };
2517
2690
  }
2518
2691
  try {
2519
2692
  const check = await checkNeedsCompaction(payload, model);
2520
- consola.debug(`Auto-compact check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
2693
+ consola.debug(`Auto-truncate check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
2521
2694
  if (!check.needed) return {
2522
2695
  finalPayload: payload,
2523
- compactResult: null
2696
+ truncateResult: null
2524
2697
  };
2525
2698
  let reasonText;
2526
2699
  if (check.reason === "both") reasonText = "tokens and size";
2527
2700
  else if (check.reason === "bytes") reasonText = "size";
2528
2701
  else reasonText = "tokens";
2529
- consola.info(`Auto-compact triggered: exceeds ${reasonText} limit`);
2530
- const compactResult = await autoCompact(payload, model);
2702
+ consola.info(`Auto-truncate triggered: exceeds ${reasonText} limit`);
2703
+ const truncateResult = await autoTruncate(payload, model);
2531
2704
  return {
2532
- finalPayload: compactResult.payload,
2533
- compactResult
2705
+ finalPayload: truncateResult.payload,
2706
+ truncateResult
2534
2707
  };
2535
2708
  } catch (error) {
2536
- consola.warn("Auto-compact failed, proceeding with original payload:", error instanceof Error ? error.message : error);
2709
+ consola.warn("Auto-truncate failed, proceeding with original payload:", error instanceof Error ? error.message : error);
2537
2710
  return {
2538
2711
  finalPayload: payload,
2539
- compactResult: null
2712
+ truncateResult: null
2540
2713
  };
2541
2714
  }
2542
2715
  }
@@ -2580,7 +2753,7 @@ async function logPayloadSizeInfo(payload, model) {
2580
2753
  if (largeMessages > 0) consola.info(` Large messages (>50KB): ${largeMessages}`);
2581
2754
  consola.info("");
2582
2755
  consola.info(" Suggestions:");
2583
- if (!state.autoCompact) consola.info(" • Enable --auto-compact to automatically truncate history");
2756
+ if (!state.autoTruncate) consola.info(" • Enable --auto-truncate to automatically truncate history");
2584
2757
  if (imageCount > 0) consola.info(" • Remove or resize large images in the conversation");
2585
2758
  consola.info(" • Start a new conversation with /clear or /reset");
2586
2759
  consola.info(" • Reduce conversation history by deleting old messages");
@@ -2612,8 +2785,8 @@ async function handleCompletion$1(c) {
2612
2785
  };
2613
2786
  const selectedModel = state.models?.data.find((model) => model.id === originalPayload.model);
2614
2787
  await logTokenCount(originalPayload, selectedModel);
2615
- const { finalPayload, compactResult } = await buildFinalPayload(originalPayload, selectedModel);
2616
- if (compactResult) ctx.compactResult = compactResult;
2788
+ const { finalPayload, truncateResult } = await buildFinalPayload(originalPayload, selectedModel);
2789
+ if (truncateResult) ctx.truncateResult = truncateResult;
2617
2790
  const payload = isNullish(finalPayload.max_tokens) ? {
2618
2791
  ...finalPayload,
2619
2792
  max_tokens: selectedModel?.capabilities?.limits?.max_output_tokens
@@ -2666,8 +2839,8 @@ async function logTokenCount(payload, selectedModel) {
2666
2839
  function handleNonStreamingResponse$1(c, originalResponse, ctx) {
2667
2840
  consola.debug("Non-streaming response:", JSON.stringify(originalResponse));
2668
2841
  let response = originalResponse;
2669
- if (ctx.compactResult?.wasCompacted && response.choices[0]?.message.content) {
2670
- const marker = createCompactionMarker(ctx.compactResult);
2842
+ if (state.verbose && ctx.truncateResult?.wasCompacted && response.choices[0]?.message.content) {
2843
+ const marker = createTruncationResponseMarker(ctx.truncateResult);
2671
2844
  response = {
2672
2845
  ...response,
2673
2846
  choices: response.choices.map((choice$1, i) => i === 0 ? {
@@ -2735,8 +2908,8 @@ async function handleStreamingResponse$1(opts) {
2735
2908
  const { stream, response, payload, ctx } = opts;
2736
2909
  const acc = createStreamAccumulator();
2737
2910
  try {
2738
- if (ctx.compactResult?.wasCompacted) {
2739
- const marker = createCompactionMarker(ctx.compactResult);
2911
+ if (state.verbose && ctx.truncateResult?.wasCompacted) {
2912
+ const marker = createTruncationResponseMarker(ctx.truncateResult);
2740
2913
  const markerChunk = {
2741
2914
  id: `compact-marker-${Date.now()}`,
2742
2915
  object: "chat.completion.chunk",
@@ -4109,16 +4282,33 @@ function translateAnthropicMessagesToOpenAI(anthropicMessages, system, toolNameM
4109
4282
  const otherMessages = anthropicMessages.flatMap((message) => message.role === "user" ? handleUserMessage(message) : handleAssistantMessage(message, toolNameMapping));
4110
4283
  return [...systemMessages, ...otherMessages];
4111
4284
  }
4285
+ const RESERVED_KEYWORDS = ["x-anthropic-billing-header", "x-anthropic-billing"];
4286
+ /**
4287
+ * Filter out reserved keywords from system prompt text.
4288
+ * Copilot API rejects requests containing these keywords.
4289
+ * Removes the entire line containing the keyword to keep the prompt clean.
4290
+ */
4291
+ function filterReservedKeywords(text) {
4292
+ let filtered = text;
4293
+ for (const keyword of RESERVED_KEYWORDS) if (text.includes(keyword)) {
4294
+ consola.debug(`[Reserved Keyword] Removing line containing "${keyword}"`);
4295
+ filtered = filtered.split("\n").filter((line) => !line.includes(keyword)).join("\n");
4296
+ }
4297
+ return filtered;
4298
+ }
4112
4299
  function handleSystemPrompt(system) {
4113
4300
  if (!system) return [];
4114
4301
  if (typeof system === "string") return [{
4115
4302
  role: "system",
4116
- content: system
4117
- }];
4118
- else return [{
4119
- role: "system",
4120
- content: system.map((block) => block.text).join("\n\n")
4303
+ content: filterReservedKeywords(system)
4121
4304
  }];
4305
+ else {
4306
+ const systemText = system.map((block) => block.text).join("\n\n");
4307
+ return [{
4308
+ role: "system",
4309
+ content: filterReservedKeywords(systemText)
4310
+ }];
4311
+ }
4122
4312
  }
4123
4313
  function handleUserMessage(message) {
4124
4314
  const newMessages = [];
@@ -4317,7 +4507,10 @@ function getAnthropicToolUseBlocks(toolCalls, toolNameMapping) {
4317
4507
  //#endregion
4318
4508
  //#region src/routes/messages/count-tokens-handler.ts
4319
4509
  /**
4320
- * Handles token counting for Anthropic messages
4510
+ * Handles token counting for Anthropic messages.
4511
+ *
4512
+ * For Anthropic models (vendor === "Anthropic"), uses the official Anthropic tokenizer.
4513
+ * For other models, uses GPT tokenizers with appropriate buffers.
4321
4514
  */
4322
4515
  async function handleCountTokens(c) {
4323
4516
  try {
@@ -4329,6 +4522,7 @@ async function handleCountTokens(c) {
4329
4522
  consola.warn("Model not found, returning default token count");
4330
4523
  return c.json({ input_tokens: 1 });
4331
4524
  }
4525
+ const isAnthropicModel$1 = selectedModel.vendor === "Anthropic";
4332
4526
  const tokenCount = await getTokenCount(openAIPayload, selectedModel);
4333
4527
  if (anthropicPayload.tools && anthropicPayload.tools.length > 0) {
4334
4528
  let mcpToolExist = false;
@@ -4339,9 +4533,8 @@ async function handleCountTokens(c) {
4339
4533
  }
4340
4534
  }
4341
4535
  let finalTokenCount = tokenCount.input + tokenCount.output;
4342
- if (anthropicPayload.model.startsWith("claude")) finalTokenCount = Math.round(finalTokenCount * 1.15);
4343
- else if (anthropicPayload.model.startsWith("grok")) finalTokenCount = Math.round(finalTokenCount * 1.03);
4344
- consola.debug("Token count:", finalTokenCount);
4536
+ if (!isAnthropicModel$1) finalTokenCount = anthropicPayload.model.startsWith("grok") ? Math.round(finalTokenCount * 1.03) : Math.round(finalTokenCount * 1.05);
4537
+ consola.debug(`Token count: ${finalTokenCount} (${isAnthropicModel$1 ? "Anthropic tokenizer" : "GPT tokenizer"})`);
4345
4538
  return c.json({ input_tokens: finalTokenCount });
4346
4539
  } catch (error) {
4347
4540
  consola.error("Error counting tokens:", error);
@@ -4349,6 +4542,101 @@ async function handleCountTokens(c) {
4349
4542
  }
4350
4543
  }
4351
4544
 
4545
+ //#endregion
4546
+ //#region src/services/copilot/create-anthropic-messages.ts
4547
+ /**
4548
+ * Fields that are supported by Copilot's Anthropic API endpoint.
4549
+ * Any other fields in the incoming request will be stripped.
4550
+ */
4551
+ const COPILOT_SUPPORTED_FIELDS = new Set([
4552
+ "model",
4553
+ "messages",
4554
+ "max_tokens",
4555
+ "system",
4556
+ "metadata",
4557
+ "stop_sequences",
4558
+ "stream",
4559
+ "temperature",
4560
+ "top_p",
4561
+ "top_k",
4562
+ "tools",
4563
+ "tool_choice",
4564
+ "thinking",
4565
+ "service_tier"
4566
+ ]);
4567
+ /**
4568
+ * Filter payload to only include fields supported by Copilot's Anthropic API.
4569
+ * This prevents errors like "Extra inputs are not permitted" for unsupported
4570
+ * fields like `output_config`.
4571
+ */
4572
+ function filterPayloadForCopilot(payload) {
4573
+ const filtered = {};
4574
+ const unsupportedFields = [];
4575
+ for (const [key, value] of Object.entries(payload)) if (COPILOT_SUPPORTED_FIELDS.has(key)) filtered[key] = value;
4576
+ else unsupportedFields.push(key);
4577
+ if (unsupportedFields.length > 0) consola.debug(`[DirectAnthropic] Filtered unsupported fields: ${unsupportedFields.join(", ")}`);
4578
+ return filtered;
4579
+ }
4580
+ /**
4581
+ * Adjust max_tokens if thinking is enabled.
4582
+ * According to Anthropic docs, max_tokens must be greater than thinking.budget_tokens.
4583
+ * max_tokens = thinking_budget + response_tokens
4584
+ */
4585
+ function adjustMaxTokensForThinking(payload) {
4586
+ const thinking = payload.thinking;
4587
+ if (!thinking) return payload;
4588
+ const budgetTokens = thinking.budget_tokens;
4589
+ if (!budgetTokens) return payload;
4590
+ if (payload.max_tokens <= budgetTokens) {
4591
+ const newMaxTokens = budgetTokens + Math.min(16384, budgetTokens);
4592
+ consola.debug(`[DirectAnthropic] Adjusted max_tokens: ${payload.max_tokens} → ${newMaxTokens} (thinking.budget_tokens=${budgetTokens})`);
4593
+ return {
4594
+ ...payload,
4595
+ max_tokens: newMaxTokens
4596
+ };
4597
+ }
4598
+ return payload;
4599
+ }
4600
+ /**
4601
+ * Create messages using Anthropic-style API directly.
4602
+ * This bypasses the OpenAI translation layer for Anthropic models.
4603
+ */
4604
+ async function createAnthropicMessages(payload) {
4605
+ if (!state.copilotToken) throw new Error("Copilot token not found");
4606
+ let filteredPayload = filterPayloadForCopilot(payload);
4607
+ filteredPayload = adjustMaxTokensForThinking(filteredPayload);
4608
+ const enableVision = filteredPayload.messages.some((msg) => {
4609
+ if (typeof msg.content === "string") return false;
4610
+ return msg.content.some((block) => block.type === "image");
4611
+ });
4612
+ const isAgentCall = filteredPayload.messages.some((msg) => msg.role === "assistant");
4613
+ const headers = {
4614
+ ...copilotHeaders(state, enableVision),
4615
+ "X-Initiator": isAgentCall ? "agent" : "user",
4616
+ "anthropic-version": "2023-06-01"
4617
+ };
4618
+ consola.debug("Sending direct Anthropic request to Copilot /v1/messages");
4619
+ const response = await fetch(`${copilotBaseUrl(state)}/v1/messages`, {
4620
+ method: "POST",
4621
+ headers,
4622
+ body: JSON.stringify(filteredPayload)
4623
+ });
4624
+ if (!response.ok) {
4625
+ consola.error("Failed to create Anthropic messages", response);
4626
+ throw await HTTPError.fromResponse("Failed to create Anthropic messages", response);
4627
+ }
4628
+ if (payload.stream) return events(response);
4629
+ return await response.json();
4630
+ }
4631
+ /**
4632
+ * Check if a model supports direct Anthropic API.
4633
+ * Returns true if direct Anthropic API is enabled and the model is from Anthropic vendor.
4634
+ */
4635
+ function supportsDirectAnthropicApi(modelId) {
4636
+ if (!state.directAnthropicApi) return false;
4637
+ return (state.models?.data.find((m) => m.id === modelId))?.vendor === "Anthropic";
4638
+ }
4639
+
4352
4640
  //#endregion
4353
4641
  //#region src/routes/messages/stream-translation.ts
4354
4642
  function isToolBlockOpen(state$1) {
@@ -4511,11 +4799,128 @@ async function handleCompletion(c) {
4511
4799
  trackingId,
4512
4800
  startTime
4513
4801
  };
4802
+ if (supportsDirectAnthropicApi(anthropicPayload.model)) return handleDirectAnthropicCompletion(c, anthropicPayload, ctx);
4803
+ return handleTranslatedCompletion(c, anthropicPayload, ctx);
4804
+ }
4805
+ /**
4806
+ * Handle completion using direct Anthropic API (no translation needed)
4807
+ */
4808
+ async function handleDirectAnthropicCompletion(c, anthropicPayload, ctx) {
4809
+ consola.debug("Using direct Anthropic API path for model:", anthropicPayload.model);
4810
+ if (state.manualApprove) await awaitApproval();
4811
+ try {
4812
+ const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createAnthropicMessages(anthropicPayload));
4813
+ ctx.queueWaitMs = queueWaitMs;
4814
+ if (Symbol.asyncIterator in response) {
4815
+ consola.debug("Streaming response from Copilot (direct Anthropic)");
4816
+ updateTrackerStatus(ctx.trackingId, "streaming");
4817
+ return streamSSE(c, async (stream) => {
4818
+ await handleDirectAnthropicStreamingResponse({
4819
+ stream,
4820
+ response,
4821
+ anthropicPayload,
4822
+ ctx
4823
+ });
4824
+ });
4825
+ }
4826
+ return handleDirectAnthropicNonStreamingResponse(c, response, ctx);
4827
+ } catch (error) {
4828
+ recordErrorResponse(ctx, anthropicPayload.model, error);
4829
+ throw error;
4830
+ }
4831
+ }
4832
+ /**
4833
+ * Handle non-streaming direct Anthropic response
4834
+ */
4835
+ function handleDirectAnthropicNonStreamingResponse(c, response, ctx) {
4836
+ consola.debug("Non-streaming response from Copilot (direct Anthropic):", JSON.stringify(response).slice(-400));
4837
+ recordResponse(ctx.historyId, {
4838
+ success: true,
4839
+ model: response.model,
4840
+ usage: response.usage,
4841
+ stop_reason: response.stop_reason ?? void 0,
4842
+ content: {
4843
+ role: "assistant",
4844
+ content: response.content.map((block) => {
4845
+ switch (block.type) {
4846
+ case "text": return {
4847
+ type: "text",
4848
+ text: block.text
4849
+ };
4850
+ case "tool_use": return {
4851
+ type: "tool_use",
4852
+ id: block.id,
4853
+ name: block.name,
4854
+ input: JSON.stringify(block.input)
4855
+ };
4856
+ case "thinking": return {
4857
+ type: "thinking",
4858
+ thinking: block.thinking
4859
+ };
4860
+ default: return { type: block.type };
4861
+ }
4862
+ })
4863
+ },
4864
+ toolCalls: extractToolCallsFromAnthropicContent(response.content)
4865
+ }, Date.now() - ctx.startTime);
4866
+ if (ctx.trackingId) requestTracker.updateRequest(ctx.trackingId, {
4867
+ inputTokens: response.usage.input_tokens,
4868
+ outputTokens: response.usage.output_tokens,
4869
+ queueWaitMs: ctx.queueWaitMs
4870
+ });
4871
+ return c.json(response);
4872
+ }
4873
+ /**
4874
+ * Handle streaming direct Anthropic response (passthrough SSE events)
4875
+ */
4876
+ async function handleDirectAnthropicStreamingResponse(opts) {
4877
+ const { stream, response, anthropicPayload, ctx } = opts;
4878
+ const acc = createAnthropicStreamAccumulator();
4879
+ try {
4880
+ for await (const rawEvent of response) {
4881
+ consola.debug("Direct Anthropic raw stream event:", JSON.stringify(rawEvent));
4882
+ if (rawEvent.data === "[DONE]") break;
4883
+ if (!rawEvent.data) continue;
4884
+ let event;
4885
+ try {
4886
+ event = JSON.parse(rawEvent.data);
4887
+ } catch (parseError) {
4888
+ consola.error("Failed to parse Anthropic stream event:", parseError, rawEvent.data);
4889
+ continue;
4890
+ }
4891
+ processAnthropicEvent(event, acc);
4892
+ await stream.writeSSE({
4893
+ event: rawEvent.event || event.type,
4894
+ data: rawEvent.data
4895
+ });
4896
+ }
4897
+ recordStreamingResponse(acc, anthropicPayload.model, ctx);
4898
+ completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
4899
+ } catch (error) {
4900
+ consola.error("Direct Anthropic stream error:", error);
4901
+ recordStreamError({
4902
+ acc,
4903
+ fallbackModel: anthropicPayload.model,
4904
+ ctx,
4905
+ error
4906
+ });
4907
+ failTracking(ctx.trackingId, error);
4908
+ const errorEvent = translateErrorToAnthropicErrorEvent();
4909
+ await stream.writeSSE({
4910
+ event: errorEvent.type,
4911
+ data: JSON.stringify(errorEvent)
4912
+ });
4913
+ }
4914
+ }
4915
+ /**
4916
+ * Handle completion using OpenAI translation path (legacy)
4917
+ */
4918
+ async function handleTranslatedCompletion(c, anthropicPayload, ctx) {
4514
4919
  const { payload: translatedPayload, toolNameMapping } = translateToOpenAI(anthropicPayload);
4515
4920
  consola.debug("Translated OpenAI request payload:", JSON.stringify(translatedPayload));
4516
4921
  const selectedModel = state.models?.data.find((model) => model.id === translatedPayload.model);
4517
- const { finalPayload: openAIPayload, compactResult } = await buildFinalPayload(translatedPayload, selectedModel);
4518
- if (compactResult) ctx.compactResult = compactResult;
4922
+ const { finalPayload: openAIPayload, truncateResult } = await buildFinalPayload(translatedPayload, selectedModel);
4923
+ if (truncateResult) ctx.truncateResult = truncateResult;
4519
4924
  if (state.manualApprove) await awaitApproval();
4520
4925
  try {
4521
4926
  const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(openAIPayload));
@@ -4527,7 +4932,7 @@ async function handleCompletion(c) {
4527
4932
  ctx
4528
4933
  });
4529
4934
  consola.debug("Streaming response from Copilot");
4530
- updateTrackerStatus(trackingId, "streaming");
4935
+ updateTrackerStatus(ctx.trackingId, "streaming");
4531
4936
  return streamSSE(c, async (stream) => {
4532
4937
  await handleStreamingResponse({
4533
4938
  stream,
@@ -4548,8 +4953,8 @@ function handleNonStreamingResponse(opts) {
4548
4953
  consola.debug("Non-streaming response from Copilot:", JSON.stringify(response).slice(-400));
4549
4954
  let anthropicResponse = translateToAnthropic(response, toolNameMapping);
4550
4955
  consola.debug("Translated Anthropic response:", JSON.stringify(anthropicResponse));
4551
- if (ctx.compactResult?.wasCompacted) {
4552
- const marker = createCompactionMarker(ctx.compactResult);
4956
+ if (state.verbose && ctx.truncateResult?.wasCompacted) {
4957
+ const marker = createTruncationResponseMarker(ctx.truncateResult);
4553
4958
  anthropicResponse = prependMarkerToAnthropicResponse(anthropicResponse, marker);
4554
4959
  }
4555
4960
  recordResponse(ctx.historyId, {
@@ -4621,9 +5026,9 @@ async function handleStreamingResponse(opts) {
4621
5026
  };
4622
5027
  const acc = createAnthropicStreamAccumulator();
4623
5028
  try {
4624
- if (ctx.compactResult?.wasCompacted) {
4625
- const marker = createCompactionMarker(ctx.compactResult);
4626
- await sendCompactionMarkerEvent(stream, streamState, marker);
5029
+ if (ctx.truncateResult?.wasCompacted) {
5030
+ const marker = createTruncationResponseMarker(ctx.truncateResult);
5031
+ await sendTruncationMarkerEvent(stream, streamState, marker);
4627
5032
  acc.content += marker;
4628
5033
  }
4629
5034
  await processStreamChunks({
@@ -4651,7 +5056,7 @@ async function handleStreamingResponse(opts) {
4651
5056
  });
4652
5057
  }
4653
5058
  }
4654
- async function sendCompactionMarkerEvent(stream, streamState, marker) {
5059
+ async function sendTruncationMarkerEvent(stream, streamState, marker) {
4655
5060
  const blockStartEvent = {
4656
5061
  type: "content_block_start",
4657
5062
  index: streamState.contentBlockIndex,
@@ -4824,6 +5229,15 @@ function extractToolCallsFromContent(content) {
4824
5229
  });
4825
5230
  return tools.length > 0 ? tools : void 0;
4826
5231
  }
5232
+ function extractToolCallsFromAnthropicContent(content) {
5233
+ const tools = [];
5234
+ for (const block of content) if (block.type === "tool_use") tools.push({
5235
+ id: block.id,
5236
+ name: block.name,
5237
+ input: JSON.stringify(block.input)
5238
+ });
5239
+ return tools.length > 0 ? tools : void 0;
5240
+ }
4827
5241
 
4828
5242
  //#endregion
4829
5243
  //#region src/routes/messages/route.ts
@@ -4950,12 +5364,14 @@ async function runServer(options) {
4950
5364
  if (options.verbose) {
4951
5365
  consola.level = 5;
4952
5366
  consola.info("Verbose logging enabled");
5367
+ state.verbose = true;
4953
5368
  }
4954
5369
  state.accountType = options.accountType;
4955
5370
  if (options.accountType !== "individual") consola.info(`Using ${options.accountType} plan GitHub account`);
4956
5371
  state.manualApprove = options.manual;
4957
5372
  state.showToken = options.showToken;
4958
- state.autoCompact = options.autoCompact;
5373
+ state.autoTruncate = options.autoTruncate;
5374
+ state.directAnthropicApi = options.directAnthropicApi;
4959
5375
  if (options.rateLimit) initAdaptiveRateLimiter({
4960
5376
  baseRetryIntervalSeconds: options.retryInterval,
4961
5377
  requestIntervalSeconds: options.requestInterval,
@@ -4963,7 +5379,8 @@ async function runServer(options) {
4963
5379
  consecutiveSuccessesForRecovery: options.consecutiveSuccesses
4964
5380
  });
4965
5381
  else consola.info("Rate limiting disabled");
4966
- if (!options.autoCompact) consola.info("Auto-compact disabled");
5382
+ if (!options.autoTruncate) consola.info("Auto-truncate disabled");
5383
+ if (!options.directAnthropicApi) consola.info("Direct Anthropic API disabled (using OpenAI translation)");
4967
5384
  initHistory(options.history, options.historyLimit);
4968
5385
  if (options.history) {
4969
5386
  const limitText = options.historyLimit === 0 ? "unlimited" : `max ${options.historyLimit}`;
@@ -5105,10 +5522,15 @@ const start = defineCommand({
5105
5522
  default: "1000",
5106
5523
  description: "Maximum number of history entries to keep in memory (0 = unlimited)"
5107
5524
  },
5108
- "no-auto-compact": {
5525
+ "no-auto-truncate": {
5526
+ type: "boolean",
5527
+ default: false,
5528
+ description: "Disable automatic conversation history truncation when exceeding limits"
5529
+ },
5530
+ "no-direct-anthropic": {
5109
5531
  type: "boolean",
5110
5532
  default: false,
5111
- description: "Disable automatic conversation history compression when exceeding limits"
5533
+ description: "Disable direct Anthropic API for Anthropic models (use OpenAI translation instead)"
5112
5534
  }
5113
5535
  },
5114
5536
  run({ args }) {
@@ -5129,7 +5551,8 @@ const start = defineCommand({
5129
5551
  proxyEnv: args["proxy-env"],
5130
5552
  history: !args["no-history"],
5131
5553
  historyLimit: Number.parseInt(args["history-limit"], 10),
5132
- autoCompact: !args["no-auto-compact"]
5554
+ autoTruncate: !args["no-auto-truncate"],
5555
+ directAnthropicApi: !args["no-direct-anthropic"]
5133
5556
  });
5134
5557
  }
5135
5558
  });