@hsupu/copilot-api 0.7.9 → 0.7.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/main.js +535 -112
- package/dist/main.js.map +1 -1
- package/package.json +2 -1
package/dist/main.js
CHANGED
|
@@ -17,6 +17,7 @@ import pc from "picocolors";
|
|
|
17
17
|
import { Hono } from "hono";
|
|
18
18
|
import { cors } from "hono/cors";
|
|
19
19
|
import { streamSSE } from "hono/streaming";
|
|
20
|
+
import { countTokens } from "@anthropic-ai/tokenizer";
|
|
20
21
|
import { events } from "fetch-event-stream";
|
|
21
22
|
|
|
22
23
|
//#region src/lib/paths.ts
|
|
@@ -46,7 +47,9 @@ const state = {
|
|
|
46
47
|
accountType: "individual",
|
|
47
48
|
manualApprove: false,
|
|
48
49
|
showToken: false,
|
|
49
|
-
|
|
50
|
+
verbose: false,
|
|
51
|
+
autoTruncate: true,
|
|
52
|
+
directAnthropicApi: true
|
|
50
53
|
};
|
|
51
54
|
|
|
52
55
|
//#endregion
|
|
@@ -480,9 +483,23 @@ async function checkTokenExists() {
|
|
|
480
483
|
return false;
|
|
481
484
|
}
|
|
482
485
|
}
|
|
483
|
-
async function
|
|
486
|
+
async function getAccountInfo() {
|
|
487
|
+
try {
|
|
488
|
+
await ensurePaths();
|
|
489
|
+
await setupGitHubToken();
|
|
490
|
+
if (!state.githubToken) return null;
|
|
491
|
+
const [user, copilot] = await Promise.all([getGitHubUser(), getCopilotUsage()]);
|
|
492
|
+
return {
|
|
493
|
+
user,
|
|
494
|
+
copilot
|
|
495
|
+
};
|
|
496
|
+
} catch {
|
|
497
|
+
return null;
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
async function getDebugInfo(includeAccount) {
|
|
484
501
|
const [version$1, tokenExists] = await Promise.all([getPackageVersion(), checkTokenExists()]);
|
|
485
|
-
|
|
502
|
+
const info = {
|
|
486
503
|
version: version$1,
|
|
487
504
|
runtime: getRuntimeInfo(),
|
|
488
505
|
paths: {
|
|
@@ -491,9 +508,14 @@ async function getDebugInfo() {
|
|
|
491
508
|
},
|
|
492
509
|
tokenExists
|
|
493
510
|
};
|
|
511
|
+
if (includeAccount && tokenExists) {
|
|
512
|
+
const account = await getAccountInfo();
|
|
513
|
+
if (account) info.account = account;
|
|
514
|
+
}
|
|
515
|
+
return info;
|
|
494
516
|
}
|
|
495
517
|
function printDebugInfoPlain(info) {
|
|
496
|
-
|
|
518
|
+
let output = `copilot-api debug
|
|
497
519
|
|
|
498
520
|
Version: ${info.version}
|
|
499
521
|
Runtime: ${info.runtime.name} ${info.runtime.version} (${info.runtime.platform} ${info.runtime.arch})
|
|
@@ -502,19 +524,24 @@ Paths:
|
|
|
502
524
|
- APP_DIR: ${info.paths.APP_DIR}
|
|
503
525
|
- GITHUB_TOKEN_PATH: ${info.paths.GITHUB_TOKEN_PATH}
|
|
504
526
|
|
|
505
|
-
Token exists: ${info.tokenExists ? "Yes" : "No"}
|
|
527
|
+
Token exists: ${info.tokenExists ? "Yes" : "No"}`;
|
|
528
|
+
if (info.account) output += `
|
|
529
|
+
|
|
530
|
+
Account Info:
|
|
531
|
+
${JSON.stringify(info.account, null, 2)}`;
|
|
532
|
+
consola.info(output);
|
|
506
533
|
}
|
|
507
534
|
function printDebugInfoJson(info) {
|
|
508
535
|
console.log(JSON.stringify(info, null, 2));
|
|
509
536
|
}
|
|
510
537
|
async function runDebug(options) {
|
|
511
|
-
const debugInfo = await getDebugInfo();
|
|
512
|
-
if (options.json) printDebugInfoJson(debugInfo);
|
|
513
|
-
else printDebugInfoPlain(debugInfo);
|
|
538
|
+
const debugInfo$1 = await getDebugInfo(true);
|
|
539
|
+
if (options.json) printDebugInfoJson(debugInfo$1);
|
|
540
|
+
else printDebugInfoPlain(debugInfo$1);
|
|
514
541
|
}
|
|
515
|
-
const
|
|
542
|
+
const debugInfo = defineCommand({
|
|
516
543
|
meta: {
|
|
517
|
-
name: "
|
|
544
|
+
name: "info",
|
|
518
545
|
description: "Print debug information about the application"
|
|
519
546
|
},
|
|
520
547
|
args: { json: {
|
|
@@ -526,6 +553,48 @@ const debug = defineCommand({
|
|
|
526
553
|
return runDebug({ json: args.json });
|
|
527
554
|
}
|
|
528
555
|
});
|
|
556
|
+
const debugModels = defineCommand({
|
|
557
|
+
meta: {
|
|
558
|
+
name: "models",
|
|
559
|
+
description: "Fetch and display raw model data from Copilot API"
|
|
560
|
+
},
|
|
561
|
+
args: {
|
|
562
|
+
"account-type": {
|
|
563
|
+
type: "string",
|
|
564
|
+
alias: "a",
|
|
565
|
+
default: "individual",
|
|
566
|
+
description: "The type of GitHub account (individual, business, enterprise)"
|
|
567
|
+
},
|
|
568
|
+
"github-token": {
|
|
569
|
+
type: "string",
|
|
570
|
+
alias: "g",
|
|
571
|
+
description: "GitHub token to use (skips interactive auth)"
|
|
572
|
+
}
|
|
573
|
+
},
|
|
574
|
+
async run({ args }) {
|
|
575
|
+
state.accountType = args["account-type"];
|
|
576
|
+
await ensurePaths();
|
|
577
|
+
if (args["github-token"]) {
|
|
578
|
+
state.githubToken = args["github-token"];
|
|
579
|
+
consola.info("Using provided GitHub token");
|
|
580
|
+
} else await setupGitHubToken();
|
|
581
|
+
const { token } = await getCopilotToken();
|
|
582
|
+
state.copilotToken = token;
|
|
583
|
+
consola.info("Fetching models from Copilot API...");
|
|
584
|
+
const models = await getModels();
|
|
585
|
+
console.log(JSON.stringify(models, null, 2));
|
|
586
|
+
}
|
|
587
|
+
});
|
|
588
|
+
const debug = defineCommand({
|
|
589
|
+
meta: {
|
|
590
|
+
name: "debug",
|
|
591
|
+
description: "Debug commands for troubleshooting"
|
|
592
|
+
},
|
|
593
|
+
subCommands: {
|
|
594
|
+
info: debugInfo,
|
|
595
|
+
models: debugModels
|
|
596
|
+
}
|
|
597
|
+
});
|
|
529
598
|
|
|
530
599
|
//#endregion
|
|
531
600
|
//#region src/logout.ts
|
|
@@ -558,15 +627,12 @@ const SUPPORTED_VERSIONS = {
|
|
|
558
627
|
min: "2.0.0",
|
|
559
628
|
max: "2.1.10"
|
|
560
629
|
},
|
|
561
|
-
v2b: {
|
|
562
|
-
min: "2.1.11",
|
|
563
|
-
max: "2.1.12"
|
|
564
|
-
}
|
|
630
|
+
v2b: { min: "2.1.11" }
|
|
565
631
|
};
|
|
566
632
|
const PATTERNS = {
|
|
567
633
|
funcOriginal: /function HR\(A\)\{if\(A\.includes\("\[1m\]"\)\)return 1e6;return 200000\}/,
|
|
568
634
|
funcPatched: /function HR\(A\)\{if\(A\.includes\("\[1m\]"\)\)return 1e6;return \d+\}/,
|
|
569
|
-
variable: /var
|
|
635
|
+
variable: /var ([A-Za-z_$]\w*)=(\d+)(?=,\w+=20000,)/
|
|
570
636
|
};
|
|
571
637
|
/**
|
|
572
638
|
* Parse semver version string to comparable parts
|
|
@@ -592,14 +658,14 @@ function compareVersions(a, b) {
|
|
|
592
658
|
}
|
|
593
659
|
function getPatternTypeForVersion(version$1) {
|
|
594
660
|
if (compareVersions(version$1, SUPPORTED_VERSIONS.v2a.min) >= 0 && compareVersions(version$1, SUPPORTED_VERSIONS.v2a.max) <= 0) return "func";
|
|
595
|
-
if (compareVersions(version$1, SUPPORTED_VERSIONS.v2b.min) >= 0
|
|
661
|
+
if (compareVersions(version$1, SUPPORTED_VERSIONS.v2b.min) >= 0) return "variable";
|
|
596
662
|
return null;
|
|
597
663
|
}
|
|
598
664
|
/**
|
|
599
665
|
* Get supported version range string for error messages
|
|
600
666
|
*/
|
|
601
667
|
function getSupportedRangeString() {
|
|
602
|
-
return `${SUPPORTED_VERSIONS.v2a.min}-${SUPPORTED_VERSIONS.v2a.max}, ${SUPPORTED_VERSIONS.v2b.min}
|
|
668
|
+
return `${SUPPORTED_VERSIONS.v2a.min}-${SUPPORTED_VERSIONS.v2a.max}, ${SUPPORTED_VERSIONS.v2b.min}+`;
|
|
603
669
|
}
|
|
604
670
|
/**
|
|
605
671
|
* Get Claude Code version from package.json
|
|
@@ -632,9 +698,9 @@ function findInVoltaTools(voltaHome) {
|
|
|
632
698
|
return paths;
|
|
633
699
|
}
|
|
634
700
|
/**
|
|
635
|
-
* Find Claude Code CLI
|
|
701
|
+
* Find all Claude Code CLI paths by checking common locations
|
|
636
702
|
*/
|
|
637
|
-
function
|
|
703
|
+
function findAllClaudeCodePaths() {
|
|
638
704
|
const possiblePaths = [];
|
|
639
705
|
const home = process.env.HOME || "";
|
|
640
706
|
const voltaHome = process.env.VOLTA_HOME || join(home, ".volta");
|
|
@@ -649,22 +715,41 @@ function findClaudeCodePath() {
|
|
|
649
715
|
for (const base of globalPaths) possiblePaths.push(join(base, "@anthropic-ai", "claude-code", "cli.js"));
|
|
650
716
|
const bunGlobal = join(home, ".bun", "install", "global");
|
|
651
717
|
if (existsSync(bunGlobal)) possiblePaths.push(join(bunGlobal, "node_modules", "@anthropic-ai", "claude-code", "cli.js"));
|
|
652
|
-
return possiblePaths.
|
|
718
|
+
return [...new Set(possiblePaths.filter((p) => existsSync(p)))];
|
|
653
719
|
}
|
|
654
720
|
/**
|
|
655
|
-
* Get
|
|
721
|
+
* Get installation info for a CLI path
|
|
656
722
|
*/
|
|
657
|
-
function
|
|
723
|
+
function getInstallationInfo(cliPath) {
|
|
724
|
+
const version$1 = getClaudeCodeVersion(cliPath);
|
|
725
|
+
const content = readFileSync(cliPath, "utf8");
|
|
726
|
+
const limit = getCurrentLimit(content);
|
|
727
|
+
return {
|
|
728
|
+
path: cliPath,
|
|
729
|
+
version: version$1,
|
|
730
|
+
limit
|
|
731
|
+
};
|
|
732
|
+
}
|
|
733
|
+
function getCurrentLimitInfo(content) {
|
|
658
734
|
const varMatch = content.match(PATTERNS.variable);
|
|
659
|
-
if (varMatch) return
|
|
735
|
+
if (varMatch) return {
|
|
736
|
+
limit: Number.parseInt(varMatch[2], 10),
|
|
737
|
+
varName: varMatch[1]
|
|
738
|
+
};
|
|
660
739
|
const funcMatch = content.match(PATTERNS.funcPatched);
|
|
661
740
|
if (funcMatch) {
|
|
662
741
|
const limitMatch = funcMatch[0].match(/return (\d+)\}$/);
|
|
663
|
-
return limitMatch ? Number.parseInt(limitMatch[1], 10) : null;
|
|
742
|
+
return limitMatch ? { limit: Number.parseInt(limitMatch[1], 10) } : null;
|
|
664
743
|
}
|
|
665
744
|
return null;
|
|
666
745
|
}
|
|
667
746
|
/**
|
|
747
|
+
* Get current context limit from Claude Code (legacy wrapper)
|
|
748
|
+
*/
|
|
749
|
+
function getCurrentLimit(content) {
|
|
750
|
+
return getCurrentLimitInfo(content)?.limit ?? null;
|
|
751
|
+
}
|
|
752
|
+
/**
|
|
668
753
|
* Check if Claude Code version is supported for patching
|
|
669
754
|
*/
|
|
670
755
|
function checkVersionSupport(cliPath) {
|
|
@@ -696,22 +781,25 @@ function patchClaudeCode(cliPath, newLimit) {
|
|
|
696
781
|
const versionCheck = checkVersionSupport(cliPath);
|
|
697
782
|
if (!versionCheck.supported) {
|
|
698
783
|
consola.error(versionCheck.error);
|
|
699
|
-
return
|
|
784
|
+
return "failed";
|
|
700
785
|
}
|
|
701
786
|
consola.info(`Claude Code version: ${versionCheck.version}`);
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
return true;
|
|
705
|
-
}
|
|
787
|
+
const limitInfo = getCurrentLimitInfo(content);
|
|
788
|
+
if (limitInfo?.limit === newLimit) return "already_patched";
|
|
706
789
|
let newContent;
|
|
707
|
-
if (versionCheck.patternType === "variable")
|
|
708
|
-
|
|
790
|
+
if (versionCheck.patternType === "variable") {
|
|
791
|
+
if (!limitInfo?.varName) {
|
|
792
|
+
consola.error("Could not detect variable name for patching");
|
|
793
|
+
return "failed";
|
|
794
|
+
}
|
|
795
|
+
newContent = content.replace(PATTERNS.variable, `var ${limitInfo.varName}=${newLimit}`);
|
|
796
|
+
} else {
|
|
709
797
|
const replacement = `function HR(A){if(A.includes("[1m]"))return 1e6;return ${newLimit}}`;
|
|
710
798
|
const pattern = PATTERNS.funcOriginal.test(content) ? PATTERNS.funcOriginal : PATTERNS.funcPatched;
|
|
711
799
|
newContent = content.replace(pattern, replacement);
|
|
712
800
|
}
|
|
713
801
|
writeFileSync(cliPath, newContent);
|
|
714
|
-
return
|
|
802
|
+
return "success";
|
|
715
803
|
}
|
|
716
804
|
/**
|
|
717
805
|
* Restore Claude Code to original 200k limit
|
|
@@ -724,13 +812,19 @@ function restoreClaudeCode(cliPath) {
|
|
|
724
812
|
return false;
|
|
725
813
|
}
|
|
726
814
|
consola.info(`Claude Code version: ${versionCheck.version}`);
|
|
727
|
-
|
|
815
|
+
const limitInfo = getCurrentLimitInfo(content);
|
|
816
|
+
if (limitInfo?.limit === 2e5) {
|
|
728
817
|
consola.info("Already at original 200000 limit");
|
|
729
818
|
return true;
|
|
730
819
|
}
|
|
731
820
|
let newContent;
|
|
732
|
-
if (versionCheck.patternType === "variable")
|
|
733
|
-
|
|
821
|
+
if (versionCheck.patternType === "variable") {
|
|
822
|
+
if (!limitInfo?.varName) {
|
|
823
|
+
consola.error("Could not detect variable name for restoring");
|
|
824
|
+
return false;
|
|
825
|
+
}
|
|
826
|
+
newContent = content.replace(PATTERNS.variable, `var ${limitInfo.varName}=200000`);
|
|
827
|
+
} else newContent = content.replace(PATTERNS.funcPatched, "function HR(A){if(A.includes(\"[1m]\"))return 1e6;return 200000}");
|
|
734
828
|
writeFileSync(cliPath, newContent);
|
|
735
829
|
return true;
|
|
736
830
|
}
|
|
@@ -739,7 +833,7 @@ function showStatus(cliPath, currentLimit) {
|
|
|
739
833
|
if (version$1) consola.info(`Claude Code version: ${version$1}`);
|
|
740
834
|
if (currentLimit === null) {
|
|
741
835
|
consola.warn("Could not detect current limit - CLI may have been updated");
|
|
742
|
-
consola.info("Look for
|
|
836
|
+
consola.info("Look for a variable like 'var XXX=200000' followed by ',YYY=20000,' in cli.js");
|
|
743
837
|
} else if (currentLimit === 2e5) consola.info("Status: Original (200k context window)");
|
|
744
838
|
else consola.info(`Status: Patched (${currentLimit} context window)`);
|
|
745
839
|
}
|
|
@@ -773,17 +867,42 @@ const patchClaude = defineCommand({
|
|
|
773
867
|
description: "Show current patch status without modifying"
|
|
774
868
|
}
|
|
775
869
|
},
|
|
776
|
-
run({ args }) {
|
|
777
|
-
|
|
778
|
-
if (
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
870
|
+
async run({ args }) {
|
|
871
|
+
let cliPath;
|
|
872
|
+
if (args.path) {
|
|
873
|
+
cliPath = args.path;
|
|
874
|
+
if (!existsSync(cliPath)) {
|
|
875
|
+
consola.error(`File not found: ${cliPath}`);
|
|
876
|
+
process.exit(1);
|
|
877
|
+
}
|
|
878
|
+
} else {
|
|
879
|
+
const installations = findAllClaudeCodePaths();
|
|
880
|
+
if (installations.length === 0) {
|
|
881
|
+
consola.error("Could not find Claude Code installation");
|
|
882
|
+
consola.info("Searched in: volta, npm global, bun global");
|
|
883
|
+
consola.info("Use --path to specify the path to cli.js manually");
|
|
884
|
+
process.exit(1);
|
|
885
|
+
}
|
|
886
|
+
if (installations.length === 1) cliPath = installations[0];
|
|
887
|
+
else {
|
|
888
|
+
consola.info(`Found ${installations.length} Claude Code installations:`);
|
|
889
|
+
const options = installations.map((path$1) => {
|
|
890
|
+
const info = getInstallationInfo(path$1);
|
|
891
|
+
let status = "unknown";
|
|
892
|
+
if (info.limit === 2e5) status = "original";
|
|
893
|
+
else if (info.limit) status = `patched: ${info.limit}`;
|
|
894
|
+
return {
|
|
895
|
+
label: `v${info.version ?? "?"} (${status}) - ${path$1}`,
|
|
896
|
+
value: path$1
|
|
897
|
+
};
|
|
898
|
+
});
|
|
899
|
+
const selected = await consola.prompt("Select installation to patch:", {
|
|
900
|
+
type: "select",
|
|
901
|
+
options
|
|
902
|
+
});
|
|
903
|
+
if (typeof selected === "symbol") process.exit(0);
|
|
904
|
+
cliPath = selected;
|
|
905
|
+
}
|
|
787
906
|
}
|
|
788
907
|
consola.info(`Claude Code path: ${cliPath}`);
|
|
789
908
|
const content = readFileSync(cliPath, "utf8");
|
|
@@ -806,13 +925,14 @@ const patchClaude = defineCommand({
|
|
|
806
925
|
consola.error("Invalid limit value. Must be a number >= 1000");
|
|
807
926
|
process.exit(1);
|
|
808
927
|
}
|
|
809
|
-
|
|
810
|
-
|
|
928
|
+
const result = patchClaudeCode(cliPath, limit);
|
|
929
|
+
if (result === "success") {
|
|
930
|
+
consola.success(`Patched context window: ${currentLimit ?? 2e5} → ${limit}`);
|
|
811
931
|
consola.info("Note: You may need to re-run this after Claude Code updates");
|
|
812
|
-
} else {
|
|
932
|
+
} else if (result === "already_patched") consola.success(`Already patched with limit ${limit}`);
|
|
933
|
+
else {
|
|
813
934
|
consola.error("Failed to patch - pattern not found");
|
|
814
935
|
consola.info("Claude Code may have been updated to a new version");
|
|
815
|
-
consola.info("Check the cli.js for the HR function pattern");
|
|
816
936
|
process.exit(1);
|
|
817
937
|
}
|
|
818
938
|
}
|
|
@@ -821,7 +941,7 @@ const patchClaude = defineCommand({
|
|
|
821
941
|
//#endregion
|
|
822
942
|
//#region package.json
|
|
823
943
|
var name = "@hsupu/copilot-api";
|
|
824
|
-
var version = "0.7.
|
|
944
|
+
var version = "0.7.11";
|
|
825
945
|
var description = "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!";
|
|
826
946
|
var keywords = [
|
|
827
947
|
"proxy",
|
|
@@ -854,6 +974,7 @@ var scripts = {
|
|
|
854
974
|
var simple_git_hooks = { "pre-commit": "bun x lint-staged" };
|
|
855
975
|
var lint_staged = { "*": "bun run lint --fix" };
|
|
856
976
|
var dependencies = {
|
|
977
|
+
"@anthropic-ai/tokenizer": "^0.0.4",
|
|
857
978
|
"citty": "^0.1.6",
|
|
858
979
|
"clipboardy": "^5.0.0",
|
|
859
980
|
"consola": "^3.4.2",
|
|
@@ -2155,9 +2276,61 @@ const numTokensForTools = (tools, encoder, constants) => {
|
|
|
2155
2276
|
return funcTokenCount;
|
|
2156
2277
|
};
|
|
2157
2278
|
/**
|
|
2158
|
-
*
|
|
2279
|
+
* Check if a model is an Anthropic model
|
|
2280
|
+
*/
|
|
2281
|
+
function isAnthropicModel(model) {
|
|
2282
|
+
return model.vendor === "Anthropic";
|
|
2283
|
+
}
|
|
2284
|
+
/**
|
|
2285
|
+
* Convert a message to plain text for Anthropic tokenizer
|
|
2286
|
+
*/
|
|
2287
|
+
function messageToText(message) {
|
|
2288
|
+
const parts = [];
|
|
2289
|
+
parts.push(`${message.role}:`);
|
|
2290
|
+
if (typeof message.content === "string") parts.push(message.content);
|
|
2291
|
+
else if (Array.isArray(message.content)) {
|
|
2292
|
+
for (const part of message.content) if ("text" in part && part.text) parts.push(part.text);
|
|
2293
|
+
else if (part.type === "image_url") parts.push("[image]");
|
|
2294
|
+
}
|
|
2295
|
+
if (message.tool_calls) for (const tc of message.tool_calls) parts.push(JSON.stringify(tc));
|
|
2296
|
+
if ("tool_call_id" in message && message.tool_call_id) parts.push(`tool_call_id:${message.tool_call_id}`);
|
|
2297
|
+
return parts.join("\n");
|
|
2298
|
+
}
|
|
2299
|
+
/**
|
|
2300
|
+
* Convert tools to text for Anthropic tokenizer
|
|
2301
|
+
*/
|
|
2302
|
+
function toolsToText(tools) {
|
|
2303
|
+
return tools.map((tool) => JSON.stringify(tool)).join("\n");
|
|
2304
|
+
}
|
|
2305
|
+
/**
|
|
2306
|
+
* Calculate token count using Anthropic's official tokenizer
|
|
2307
|
+
*/
|
|
2308
|
+
function getAnthropicTokenCount(payload) {
|
|
2309
|
+
const inputMessages = payload.messages.filter((msg) => msg.role !== "assistant");
|
|
2310
|
+
const outputMessages = payload.messages.filter((msg) => msg.role === "assistant");
|
|
2311
|
+
const inputText = inputMessages.map((msg) => messageToText(msg)).join("\n\n");
|
|
2312
|
+
const outputText = outputMessages.map((msg) => messageToText(msg)).join("\n\n");
|
|
2313
|
+
let inputTokens = countTokens(inputText);
|
|
2314
|
+
let outputTokens = countTokens(outputText);
|
|
2315
|
+
if (payload.tools && payload.tools.length > 0) {
|
|
2316
|
+
const toolsText = toolsToText(payload.tools);
|
|
2317
|
+
inputTokens += countTokens(toolsText);
|
|
2318
|
+
}
|
|
2319
|
+
inputTokens += inputMessages.length * 3;
|
|
2320
|
+
outputTokens += outputMessages.length * 3;
|
|
2321
|
+
inputTokens += 3;
|
|
2322
|
+
return {
|
|
2323
|
+
input: inputTokens,
|
|
2324
|
+
output: outputTokens
|
|
2325
|
+
};
|
|
2326
|
+
}
|
|
2327
|
+
/**
|
|
2328
|
+
* Calculate the token count of messages.
|
|
2329
|
+
* Uses Anthropic's official tokenizer for Anthropic models,
|
|
2330
|
+
* and GPT tokenizers for other models.
|
|
2159
2331
|
*/
|
|
2160
2332
|
const getTokenCount = async (payload, model) => {
|
|
2333
|
+
if (isAnthropicModel(model)) return getAnthropicTokenCount(payload);
|
|
2161
2334
|
const tokenizer = getTokenizerFromModel(model);
|
|
2162
2335
|
const encoder = await getEncodeChatFunction(tokenizer);
|
|
2163
2336
|
const simplifiedMessages = payload.messages;
|
|
@@ -2174,10 +2347,10 @@ const getTokenCount = async (payload, model) => {
|
|
|
2174
2347
|
};
|
|
2175
2348
|
|
|
2176
2349
|
//#endregion
|
|
2177
|
-
//#region src/lib/auto-
|
|
2350
|
+
//#region src/lib/auto-truncate.ts
|
|
2178
2351
|
const DEFAULT_CONFIG = {
|
|
2179
2352
|
safetyMarginPercent: 2,
|
|
2180
|
-
maxRequestBodyBytes:
|
|
2353
|
+
maxRequestBodyBytes: 510 * 1024
|
|
2181
2354
|
};
|
|
2182
2355
|
/** Dynamic byte limit that adjusts based on 413 errors */
|
|
2183
2356
|
let dynamicByteLimit = null;
|
|
@@ -2187,7 +2360,7 @@ let dynamicByteLimit = null;
|
|
|
2187
2360
|
function onRequestTooLarge(failingBytes) {
|
|
2188
2361
|
const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
|
|
2189
2362
|
dynamicByteLimit = newLimit;
|
|
2190
|
-
consola.info(`[
|
|
2363
|
+
consola.info(`[AutoTruncate] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed → ${Math.round(newLimit / 1024)}KB`);
|
|
2191
2364
|
}
|
|
2192
2365
|
function calculateLimits(model, config) {
|
|
2193
2366
|
const rawTokenLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
|
|
@@ -2243,14 +2416,14 @@ function filterOrphanedToolResults(messages) {
|
|
|
2243
2416
|
}
|
|
2244
2417
|
return true;
|
|
2245
2418
|
});
|
|
2246
|
-
if (removedCount > 0) consola.debug(`
|
|
2419
|
+
if (removedCount > 0) consola.debug(`[AutoTruncate] Filtered ${removedCount} orphaned tool_result`);
|
|
2247
2420
|
return filtered;
|
|
2248
2421
|
}
|
|
2249
2422
|
/** Ensure messages start with a user message */
|
|
2250
2423
|
function ensureStartsWithUser(messages) {
|
|
2251
2424
|
let startIndex = 0;
|
|
2252
2425
|
while (startIndex < messages.length && messages[startIndex].role !== "user") startIndex++;
|
|
2253
|
-
if (startIndex > 0) consola.debug(`
|
|
2426
|
+
if (startIndex > 0) consola.debug(`[AutoTruncate] Skipped ${startIndex} leading non-user messages`);
|
|
2254
2427
|
return messages.slice(startIndex);
|
|
2255
2428
|
}
|
|
2256
2429
|
/**
|
|
@@ -2316,10 +2489,10 @@ function createTruncationMarker(removedCount) {
|
|
|
2316
2489
|
};
|
|
2317
2490
|
}
|
|
2318
2491
|
/**
|
|
2319
|
-
* Perform auto-
|
|
2492
|
+
* Perform auto-truncation on a payload that exceeds limits.
|
|
2320
2493
|
* Uses binary search to find the optimal truncation point.
|
|
2321
2494
|
*/
|
|
2322
|
-
async function
|
|
2495
|
+
async function autoTruncate(payload, model, config = {}) {
|
|
2323
2496
|
const cfg = {
|
|
2324
2497
|
...DEFAULT_CONFIG,
|
|
2325
2498
|
...config
|
|
@@ -2340,13 +2513,13 @@ async function autoCompact(payload, model, config = {}) {
|
|
|
2340
2513
|
if (exceedsTokens && exceedsBytes) reason = "tokens and size";
|
|
2341
2514
|
else if (exceedsBytes) reason = "size";
|
|
2342
2515
|
else reason = "tokens";
|
|
2343
|
-
consola.info(`
|
|
2516
|
+
consola.info(`[AutoTruncate] Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB)`);
|
|
2344
2517
|
const { systemMessages, conversationMessages } = extractSystemMessages(payload.messages);
|
|
2345
2518
|
const messagesJson = JSON.stringify(payload.messages);
|
|
2346
2519
|
const payloadOverhead = originalBytes - messagesJson.length;
|
|
2347
2520
|
const systemBytes = systemMessages.reduce((sum, m) => sum + getMessageBytes(m) + 1, 0);
|
|
2348
2521
|
const systemTokens = systemMessages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
|
|
2349
|
-
consola.debug(`
|
|
2522
|
+
consola.debug(`[AutoTruncate] overhead=${Math.round(payloadOverhead / 1024)}KB, system=${systemMessages.length} msgs (${Math.round(systemBytes / 1024)}KB)`);
|
|
2350
2523
|
const preserveIndex = findOptimalPreserveIndex({
|
|
2351
2524
|
messages: conversationMessages,
|
|
2352
2525
|
systemBytes,
|
|
@@ -2356,7 +2529,7 @@ async function autoCompact(payload, model, config = {}) {
|
|
|
2356
2529
|
byteLimit
|
|
2357
2530
|
});
|
|
2358
2531
|
if (preserveIndex === 0) {
|
|
2359
|
-
consola.warn("
|
|
2532
|
+
consola.warn("[AutoTruncate] Cannot truncate, system messages too large");
|
|
2360
2533
|
return {
|
|
2361
2534
|
payload,
|
|
2362
2535
|
wasCompacted: false,
|
|
@@ -2366,7 +2539,7 @@ async function autoCompact(payload, model, config = {}) {
|
|
|
2366
2539
|
};
|
|
2367
2540
|
}
|
|
2368
2541
|
if (preserveIndex >= conversationMessages.length) {
|
|
2369
|
-
consola.warn("
|
|
2542
|
+
consola.warn("[AutoTruncate] Would need to remove all messages");
|
|
2370
2543
|
return {
|
|
2371
2544
|
payload,
|
|
2372
2545
|
wasCompacted: false,
|
|
@@ -2380,7 +2553,7 @@ async function autoCompact(payload, model, config = {}) {
|
|
|
2380
2553
|
preserved = ensureStartsWithUser(preserved);
|
|
2381
2554
|
preserved = filterOrphanedToolResults(preserved);
|
|
2382
2555
|
if (preserved.length === 0) {
|
|
2383
|
-
consola.warn("
|
|
2556
|
+
consola.warn("[AutoTruncate] All messages filtered out after cleanup");
|
|
2384
2557
|
return {
|
|
2385
2558
|
payload,
|
|
2386
2559
|
wasCompacted: false,
|
|
@@ -2401,8 +2574,8 @@ async function autoCompact(payload, model, config = {}) {
|
|
|
2401
2574
|
};
|
|
2402
2575
|
const newBytes = JSON.stringify(newPayload).length;
|
|
2403
2576
|
const newTokenCount = await getTokenCount(newPayload, model);
|
|
2404
|
-
consola.info(`
|
|
2405
|
-
if (newBytes > byteLimit) consola.warn(`
|
|
2577
|
+
consola.info(`[AutoTruncate] ${originalTokens} → ${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(newBytes / 1024)}KB (removed ${removedCount} messages)`);
|
|
2578
|
+
if (newBytes > byteLimit) consola.warn(`[AutoTruncate] Result still over byte limit (${Math.round(newBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB)`);
|
|
2406
2579
|
return {
|
|
2407
2580
|
payload: newPayload,
|
|
2408
2581
|
wasCompacted: true,
|
|
@@ -2412,13 +2585,13 @@ async function autoCompact(payload, model, config = {}) {
|
|
|
2412
2585
|
};
|
|
2413
2586
|
}
|
|
2414
2587
|
/**
|
|
2415
|
-
* Create a marker to prepend to responses indicating auto-
|
|
2588
|
+
* Create a marker to prepend to responses indicating auto-truncation occurred.
|
|
2416
2589
|
*/
|
|
2417
|
-
function
|
|
2590
|
+
function createTruncationResponseMarker(result) {
|
|
2418
2591
|
if (!result.wasCompacted) return "";
|
|
2419
2592
|
const reduction = result.originalTokens - result.compactedTokens;
|
|
2420
2593
|
const percentage = Math.round(reduction / result.originalTokens * 100);
|
|
2421
|
-
return `\n\n---\n[Auto-
|
|
2594
|
+
return `\n\n---\n[Auto-truncated: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
|
|
2422
2595
|
}
|
|
2423
2596
|
|
|
2424
2597
|
//#endregion
|
|
@@ -2506,37 +2679,37 @@ function recordStreamError(opts) {
|
|
|
2506
2679
|
function isNonStreaming(response) {
|
|
2507
2680
|
return Object.hasOwn(response, "choices");
|
|
2508
2681
|
}
|
|
2509
|
-
/** Build final payload with auto-
|
|
2682
|
+
/** Build final payload with auto-truncate if needed */
|
|
2510
2683
|
async function buildFinalPayload(payload, model) {
|
|
2511
|
-
if (!state.
|
|
2512
|
-
if (state.
|
|
2684
|
+
if (!state.autoTruncate || !model) {
|
|
2685
|
+
if (state.autoTruncate && !model) consola.warn(`Auto-truncate: Model '${payload.model}' not found in cached models, skipping`);
|
|
2513
2686
|
return {
|
|
2514
2687
|
finalPayload: payload,
|
|
2515
|
-
|
|
2688
|
+
truncateResult: null
|
|
2516
2689
|
};
|
|
2517
2690
|
}
|
|
2518
2691
|
try {
|
|
2519
2692
|
const check = await checkNeedsCompaction(payload, model);
|
|
2520
|
-
consola.debug(`Auto-
|
|
2693
|
+
consola.debug(`Auto-truncate check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
|
|
2521
2694
|
if (!check.needed) return {
|
|
2522
2695
|
finalPayload: payload,
|
|
2523
|
-
|
|
2696
|
+
truncateResult: null
|
|
2524
2697
|
};
|
|
2525
2698
|
let reasonText;
|
|
2526
2699
|
if (check.reason === "both") reasonText = "tokens and size";
|
|
2527
2700
|
else if (check.reason === "bytes") reasonText = "size";
|
|
2528
2701
|
else reasonText = "tokens";
|
|
2529
|
-
consola.info(`Auto-
|
|
2530
|
-
const
|
|
2702
|
+
consola.info(`Auto-truncate triggered: exceeds ${reasonText} limit`);
|
|
2703
|
+
const truncateResult = await autoTruncate(payload, model);
|
|
2531
2704
|
return {
|
|
2532
|
-
finalPayload:
|
|
2533
|
-
|
|
2705
|
+
finalPayload: truncateResult.payload,
|
|
2706
|
+
truncateResult
|
|
2534
2707
|
};
|
|
2535
2708
|
} catch (error) {
|
|
2536
|
-
consola.warn("Auto-
|
|
2709
|
+
consola.warn("Auto-truncate failed, proceeding with original payload:", error instanceof Error ? error.message : error);
|
|
2537
2710
|
return {
|
|
2538
2711
|
finalPayload: payload,
|
|
2539
|
-
|
|
2712
|
+
truncateResult: null
|
|
2540
2713
|
};
|
|
2541
2714
|
}
|
|
2542
2715
|
}
|
|
@@ -2580,7 +2753,7 @@ async function logPayloadSizeInfo(payload, model) {
|
|
|
2580
2753
|
if (largeMessages > 0) consola.info(` Large messages (>50KB): ${largeMessages}`);
|
|
2581
2754
|
consola.info("");
|
|
2582
2755
|
consola.info(" Suggestions:");
|
|
2583
|
-
if (!state.
|
|
2756
|
+
if (!state.autoTruncate) consola.info(" • Enable --auto-truncate to automatically truncate history");
|
|
2584
2757
|
if (imageCount > 0) consola.info(" • Remove or resize large images in the conversation");
|
|
2585
2758
|
consola.info(" • Start a new conversation with /clear or /reset");
|
|
2586
2759
|
consola.info(" • Reduce conversation history by deleting old messages");
|
|
@@ -2612,8 +2785,8 @@ async function handleCompletion$1(c) {
|
|
|
2612
2785
|
};
|
|
2613
2786
|
const selectedModel = state.models?.data.find((model) => model.id === originalPayload.model);
|
|
2614
2787
|
await logTokenCount(originalPayload, selectedModel);
|
|
2615
|
-
const { finalPayload,
|
|
2616
|
-
if (
|
|
2788
|
+
const { finalPayload, truncateResult } = await buildFinalPayload(originalPayload, selectedModel);
|
|
2789
|
+
if (truncateResult) ctx.truncateResult = truncateResult;
|
|
2617
2790
|
const payload = isNullish(finalPayload.max_tokens) ? {
|
|
2618
2791
|
...finalPayload,
|
|
2619
2792
|
max_tokens: selectedModel?.capabilities?.limits?.max_output_tokens
|
|
@@ -2666,8 +2839,8 @@ async function logTokenCount(payload, selectedModel) {
|
|
|
2666
2839
|
function handleNonStreamingResponse$1(c, originalResponse, ctx) {
|
|
2667
2840
|
consola.debug("Non-streaming response:", JSON.stringify(originalResponse));
|
|
2668
2841
|
let response = originalResponse;
|
|
2669
|
-
if (ctx.
|
|
2670
|
-
const marker =
|
|
2842
|
+
if (state.verbose && ctx.truncateResult?.wasCompacted && response.choices[0]?.message.content) {
|
|
2843
|
+
const marker = createTruncationResponseMarker(ctx.truncateResult);
|
|
2671
2844
|
response = {
|
|
2672
2845
|
...response,
|
|
2673
2846
|
choices: response.choices.map((choice$1, i) => i === 0 ? {
|
|
@@ -2735,8 +2908,8 @@ async function handleStreamingResponse$1(opts) {
|
|
|
2735
2908
|
const { stream, response, payload, ctx } = opts;
|
|
2736
2909
|
const acc = createStreamAccumulator();
|
|
2737
2910
|
try {
|
|
2738
|
-
if (ctx.
|
|
2739
|
-
const marker =
|
|
2911
|
+
if (state.verbose && ctx.truncateResult?.wasCompacted) {
|
|
2912
|
+
const marker = createTruncationResponseMarker(ctx.truncateResult);
|
|
2740
2913
|
const markerChunk = {
|
|
2741
2914
|
id: `compact-marker-${Date.now()}`,
|
|
2742
2915
|
object: "chat.completion.chunk",
|
|
@@ -4109,16 +4282,33 @@ function translateAnthropicMessagesToOpenAI(anthropicMessages, system, toolNameM
|
|
|
4109
4282
|
const otherMessages = anthropicMessages.flatMap((message) => message.role === "user" ? handleUserMessage(message) : handleAssistantMessage(message, toolNameMapping));
|
|
4110
4283
|
return [...systemMessages, ...otherMessages];
|
|
4111
4284
|
}
|
|
4285
|
+
const RESERVED_KEYWORDS = ["x-anthropic-billing-header", "x-anthropic-billing"];
|
|
4286
|
+
/**
|
|
4287
|
+
* Filter out reserved keywords from system prompt text.
|
|
4288
|
+
* Copilot API rejects requests containing these keywords.
|
|
4289
|
+
* Removes the entire line containing the keyword to keep the prompt clean.
|
|
4290
|
+
*/
|
|
4291
|
+
function filterReservedKeywords(text) {
|
|
4292
|
+
let filtered = text;
|
|
4293
|
+
for (const keyword of RESERVED_KEYWORDS) if (text.includes(keyword)) {
|
|
4294
|
+
consola.debug(`[Reserved Keyword] Removing line containing "${keyword}"`);
|
|
4295
|
+
filtered = filtered.split("\n").filter((line) => !line.includes(keyword)).join("\n");
|
|
4296
|
+
}
|
|
4297
|
+
return filtered;
|
|
4298
|
+
}
|
|
4112
4299
|
function handleSystemPrompt(system) {
|
|
4113
4300
|
if (!system) return [];
|
|
4114
4301
|
if (typeof system === "string") return [{
|
|
4115
4302
|
role: "system",
|
|
4116
|
-
content: system
|
|
4117
|
-
}];
|
|
4118
|
-
else return [{
|
|
4119
|
-
role: "system",
|
|
4120
|
-
content: system.map((block) => block.text).join("\n\n")
|
|
4303
|
+
content: filterReservedKeywords(system)
|
|
4121
4304
|
}];
|
|
4305
|
+
else {
|
|
4306
|
+
const systemText = system.map((block) => block.text).join("\n\n");
|
|
4307
|
+
return [{
|
|
4308
|
+
role: "system",
|
|
4309
|
+
content: filterReservedKeywords(systemText)
|
|
4310
|
+
}];
|
|
4311
|
+
}
|
|
4122
4312
|
}
|
|
4123
4313
|
function handleUserMessage(message) {
|
|
4124
4314
|
const newMessages = [];
|
|
@@ -4317,7 +4507,10 @@ function getAnthropicToolUseBlocks(toolCalls, toolNameMapping) {
|
|
|
4317
4507
|
//#endregion
|
|
4318
4508
|
//#region src/routes/messages/count-tokens-handler.ts
|
|
4319
4509
|
/**
|
|
4320
|
-
* Handles token counting for Anthropic messages
|
|
4510
|
+
* Handles token counting for Anthropic messages.
|
|
4511
|
+
*
|
|
4512
|
+
* For Anthropic models (vendor === "Anthropic"), uses the official Anthropic tokenizer.
|
|
4513
|
+
* For other models, uses GPT tokenizers with appropriate buffers.
|
|
4321
4514
|
*/
|
|
4322
4515
|
async function handleCountTokens(c) {
|
|
4323
4516
|
try {
|
|
@@ -4329,6 +4522,7 @@ async function handleCountTokens(c) {
|
|
|
4329
4522
|
consola.warn("Model not found, returning default token count");
|
|
4330
4523
|
return c.json({ input_tokens: 1 });
|
|
4331
4524
|
}
|
|
4525
|
+
const isAnthropicModel$1 = selectedModel.vendor === "Anthropic";
|
|
4332
4526
|
const tokenCount = await getTokenCount(openAIPayload, selectedModel);
|
|
4333
4527
|
if (anthropicPayload.tools && anthropicPayload.tools.length > 0) {
|
|
4334
4528
|
let mcpToolExist = false;
|
|
@@ -4339,9 +4533,8 @@ async function handleCountTokens(c) {
|
|
|
4339
4533
|
}
|
|
4340
4534
|
}
|
|
4341
4535
|
let finalTokenCount = tokenCount.input + tokenCount.output;
|
|
4342
|
-
if (anthropicPayload.model.startsWith("
|
|
4343
|
-
|
|
4344
|
-
consola.debug("Token count:", finalTokenCount);
|
|
4536
|
+
if (!isAnthropicModel$1) finalTokenCount = anthropicPayload.model.startsWith("grok") ? Math.round(finalTokenCount * 1.03) : Math.round(finalTokenCount * 1.05);
|
|
4537
|
+
consola.debug(`Token count: ${finalTokenCount} (${isAnthropicModel$1 ? "Anthropic tokenizer" : "GPT tokenizer"})`);
|
|
4345
4538
|
return c.json({ input_tokens: finalTokenCount });
|
|
4346
4539
|
} catch (error) {
|
|
4347
4540
|
consola.error("Error counting tokens:", error);
|
|
@@ -4349,6 +4542,101 @@ async function handleCountTokens(c) {
|
|
|
4349
4542
|
}
|
|
4350
4543
|
}
|
|
4351
4544
|
|
|
4545
|
+
//#endregion
|
|
4546
|
+
//#region src/services/copilot/create-anthropic-messages.ts
|
|
4547
|
+
/**
|
|
4548
|
+
* Fields that are supported by Copilot's Anthropic API endpoint.
|
|
4549
|
+
* Any other fields in the incoming request will be stripped.
|
|
4550
|
+
*/
|
|
4551
|
+
const COPILOT_SUPPORTED_FIELDS = new Set([
|
|
4552
|
+
"model",
|
|
4553
|
+
"messages",
|
|
4554
|
+
"max_tokens",
|
|
4555
|
+
"system",
|
|
4556
|
+
"metadata",
|
|
4557
|
+
"stop_sequences",
|
|
4558
|
+
"stream",
|
|
4559
|
+
"temperature",
|
|
4560
|
+
"top_p",
|
|
4561
|
+
"top_k",
|
|
4562
|
+
"tools",
|
|
4563
|
+
"tool_choice",
|
|
4564
|
+
"thinking",
|
|
4565
|
+
"service_tier"
|
|
4566
|
+
]);
|
|
4567
|
+
/**
|
|
4568
|
+
* Filter payload to only include fields supported by Copilot's Anthropic API.
|
|
4569
|
+
* This prevents errors like "Extra inputs are not permitted" for unsupported
|
|
4570
|
+
* fields like `output_config`.
|
|
4571
|
+
*/
|
|
4572
|
+
function filterPayloadForCopilot(payload) {
|
|
4573
|
+
const filtered = {};
|
|
4574
|
+
const unsupportedFields = [];
|
|
4575
|
+
for (const [key, value] of Object.entries(payload)) if (COPILOT_SUPPORTED_FIELDS.has(key)) filtered[key] = value;
|
|
4576
|
+
else unsupportedFields.push(key);
|
|
4577
|
+
if (unsupportedFields.length > 0) consola.debug(`[DirectAnthropic] Filtered unsupported fields: ${unsupportedFields.join(", ")}`);
|
|
4578
|
+
return filtered;
|
|
4579
|
+
}
|
|
4580
|
+
/**
|
|
4581
|
+
* Adjust max_tokens if thinking is enabled.
|
|
4582
|
+
* According to Anthropic docs, max_tokens must be greater than thinking.budget_tokens.
|
|
4583
|
+
* max_tokens = thinking_budget + response_tokens
|
|
4584
|
+
*/
|
|
4585
|
+
function adjustMaxTokensForThinking(payload) {
|
|
4586
|
+
const thinking = payload.thinking;
|
|
4587
|
+
if (!thinking) return payload;
|
|
4588
|
+
const budgetTokens = thinking.budget_tokens;
|
|
4589
|
+
if (!budgetTokens) return payload;
|
|
4590
|
+
if (payload.max_tokens <= budgetTokens) {
|
|
4591
|
+
const newMaxTokens = budgetTokens + Math.min(16384, budgetTokens);
|
|
4592
|
+
consola.debug(`[DirectAnthropic] Adjusted max_tokens: ${payload.max_tokens} → ${newMaxTokens} (thinking.budget_tokens=${budgetTokens})`);
|
|
4593
|
+
return {
|
|
4594
|
+
...payload,
|
|
4595
|
+
max_tokens: newMaxTokens
|
|
4596
|
+
};
|
|
4597
|
+
}
|
|
4598
|
+
return payload;
|
|
4599
|
+
}
|
|
4600
|
+
/**
|
|
4601
|
+
* Create messages using Anthropic-style API directly.
|
|
4602
|
+
* This bypasses the OpenAI translation layer for Anthropic models.
|
|
4603
|
+
*/
|
|
4604
|
+
async function createAnthropicMessages(payload) {
|
|
4605
|
+
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
4606
|
+
let filteredPayload = filterPayloadForCopilot(payload);
|
|
4607
|
+
filteredPayload = adjustMaxTokensForThinking(filteredPayload);
|
|
4608
|
+
const enableVision = filteredPayload.messages.some((msg) => {
|
|
4609
|
+
if (typeof msg.content === "string") return false;
|
|
4610
|
+
return msg.content.some((block) => block.type === "image");
|
|
4611
|
+
});
|
|
4612
|
+
const isAgentCall = filteredPayload.messages.some((msg) => msg.role === "assistant");
|
|
4613
|
+
const headers = {
|
|
4614
|
+
...copilotHeaders(state, enableVision),
|
|
4615
|
+
"X-Initiator": isAgentCall ? "agent" : "user",
|
|
4616
|
+
"anthropic-version": "2023-06-01"
|
|
4617
|
+
};
|
|
4618
|
+
consola.debug("Sending direct Anthropic request to Copilot /v1/messages");
|
|
4619
|
+
const response = await fetch(`${copilotBaseUrl(state)}/v1/messages`, {
|
|
4620
|
+
method: "POST",
|
|
4621
|
+
headers,
|
|
4622
|
+
body: JSON.stringify(filteredPayload)
|
|
4623
|
+
});
|
|
4624
|
+
if (!response.ok) {
|
|
4625
|
+
consola.error("Failed to create Anthropic messages", response);
|
|
4626
|
+
throw await HTTPError.fromResponse("Failed to create Anthropic messages", response);
|
|
4627
|
+
}
|
|
4628
|
+
if (payload.stream) return events(response);
|
|
4629
|
+
return await response.json();
|
|
4630
|
+
}
|
|
4631
|
+
/**
|
|
4632
|
+
* Check if a model supports direct Anthropic API.
|
|
4633
|
+
* Returns true if direct Anthropic API is enabled and the model is from Anthropic vendor.
|
|
4634
|
+
*/
|
|
4635
|
+
function supportsDirectAnthropicApi(modelId) {
|
|
4636
|
+
if (!state.directAnthropicApi) return false;
|
|
4637
|
+
return (state.models?.data.find((m) => m.id === modelId))?.vendor === "Anthropic";
|
|
4638
|
+
}
|
|
4639
|
+
|
|
4352
4640
|
//#endregion
|
|
4353
4641
|
//#region src/routes/messages/stream-translation.ts
|
|
4354
4642
|
function isToolBlockOpen(state$1) {
|
|
@@ -4511,11 +4799,128 @@ async function handleCompletion(c) {
|
|
|
4511
4799
|
trackingId,
|
|
4512
4800
|
startTime
|
|
4513
4801
|
};
|
|
4802
|
+
if (supportsDirectAnthropicApi(anthropicPayload.model)) return handleDirectAnthropicCompletion(c, anthropicPayload, ctx);
|
|
4803
|
+
return handleTranslatedCompletion(c, anthropicPayload, ctx);
|
|
4804
|
+
}
|
|
4805
|
+
/**
|
|
4806
|
+
* Handle completion using direct Anthropic API (no translation needed)
|
|
4807
|
+
*/
|
|
4808
|
+
async function handleDirectAnthropicCompletion(c, anthropicPayload, ctx) {
|
|
4809
|
+
consola.debug("Using direct Anthropic API path for model:", anthropicPayload.model);
|
|
4810
|
+
if (state.manualApprove) await awaitApproval();
|
|
4811
|
+
try {
|
|
4812
|
+
const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createAnthropicMessages(anthropicPayload));
|
|
4813
|
+
ctx.queueWaitMs = queueWaitMs;
|
|
4814
|
+
if (Symbol.asyncIterator in response) {
|
|
4815
|
+
consola.debug("Streaming response from Copilot (direct Anthropic)");
|
|
4816
|
+
updateTrackerStatus(ctx.trackingId, "streaming");
|
|
4817
|
+
return streamSSE(c, async (stream) => {
|
|
4818
|
+
await handleDirectAnthropicStreamingResponse({
|
|
4819
|
+
stream,
|
|
4820
|
+
response,
|
|
4821
|
+
anthropicPayload,
|
|
4822
|
+
ctx
|
|
4823
|
+
});
|
|
4824
|
+
});
|
|
4825
|
+
}
|
|
4826
|
+
return handleDirectAnthropicNonStreamingResponse(c, response, ctx);
|
|
4827
|
+
} catch (error) {
|
|
4828
|
+
recordErrorResponse(ctx, anthropicPayload.model, error);
|
|
4829
|
+
throw error;
|
|
4830
|
+
}
|
|
4831
|
+
}
|
|
4832
|
+
/**
|
|
4833
|
+
* Handle non-streaming direct Anthropic response
|
|
4834
|
+
*/
|
|
4835
|
+
function handleDirectAnthropicNonStreamingResponse(c, response, ctx) {
|
|
4836
|
+
consola.debug("Non-streaming response from Copilot (direct Anthropic):", JSON.stringify(response).slice(-400));
|
|
4837
|
+
recordResponse(ctx.historyId, {
|
|
4838
|
+
success: true,
|
|
4839
|
+
model: response.model,
|
|
4840
|
+
usage: response.usage,
|
|
4841
|
+
stop_reason: response.stop_reason ?? void 0,
|
|
4842
|
+
content: {
|
|
4843
|
+
role: "assistant",
|
|
4844
|
+
content: response.content.map((block) => {
|
|
4845
|
+
switch (block.type) {
|
|
4846
|
+
case "text": return {
|
|
4847
|
+
type: "text",
|
|
4848
|
+
text: block.text
|
|
4849
|
+
};
|
|
4850
|
+
case "tool_use": return {
|
|
4851
|
+
type: "tool_use",
|
|
4852
|
+
id: block.id,
|
|
4853
|
+
name: block.name,
|
|
4854
|
+
input: JSON.stringify(block.input)
|
|
4855
|
+
};
|
|
4856
|
+
case "thinking": return {
|
|
4857
|
+
type: "thinking",
|
|
4858
|
+
thinking: block.thinking
|
|
4859
|
+
};
|
|
4860
|
+
default: return { type: block.type };
|
|
4861
|
+
}
|
|
4862
|
+
})
|
|
4863
|
+
},
|
|
4864
|
+
toolCalls: extractToolCallsFromAnthropicContent(response.content)
|
|
4865
|
+
}, Date.now() - ctx.startTime);
|
|
4866
|
+
if (ctx.trackingId) requestTracker.updateRequest(ctx.trackingId, {
|
|
4867
|
+
inputTokens: response.usage.input_tokens,
|
|
4868
|
+
outputTokens: response.usage.output_tokens,
|
|
4869
|
+
queueWaitMs: ctx.queueWaitMs
|
|
4870
|
+
});
|
|
4871
|
+
return c.json(response);
|
|
4872
|
+
}
|
|
4873
|
+
/**
|
|
4874
|
+
* Handle streaming direct Anthropic response (passthrough SSE events)
|
|
4875
|
+
*/
|
|
4876
|
+
async function handleDirectAnthropicStreamingResponse(opts) {
|
|
4877
|
+
const { stream, response, anthropicPayload, ctx } = opts;
|
|
4878
|
+
const acc = createAnthropicStreamAccumulator();
|
|
4879
|
+
try {
|
|
4880
|
+
for await (const rawEvent of response) {
|
|
4881
|
+
consola.debug("Direct Anthropic raw stream event:", JSON.stringify(rawEvent));
|
|
4882
|
+
if (rawEvent.data === "[DONE]") break;
|
|
4883
|
+
if (!rawEvent.data) continue;
|
|
4884
|
+
let event;
|
|
4885
|
+
try {
|
|
4886
|
+
event = JSON.parse(rawEvent.data);
|
|
4887
|
+
} catch (parseError) {
|
|
4888
|
+
consola.error("Failed to parse Anthropic stream event:", parseError, rawEvent.data);
|
|
4889
|
+
continue;
|
|
4890
|
+
}
|
|
4891
|
+
processAnthropicEvent(event, acc);
|
|
4892
|
+
await stream.writeSSE({
|
|
4893
|
+
event: rawEvent.event || event.type,
|
|
4894
|
+
data: rawEvent.data
|
|
4895
|
+
});
|
|
4896
|
+
}
|
|
4897
|
+
recordStreamingResponse(acc, anthropicPayload.model, ctx);
|
|
4898
|
+
completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
|
|
4899
|
+
} catch (error) {
|
|
4900
|
+
consola.error("Direct Anthropic stream error:", error);
|
|
4901
|
+
recordStreamError({
|
|
4902
|
+
acc,
|
|
4903
|
+
fallbackModel: anthropicPayload.model,
|
|
4904
|
+
ctx,
|
|
4905
|
+
error
|
|
4906
|
+
});
|
|
4907
|
+
failTracking(ctx.trackingId, error);
|
|
4908
|
+
const errorEvent = translateErrorToAnthropicErrorEvent();
|
|
4909
|
+
await stream.writeSSE({
|
|
4910
|
+
event: errorEvent.type,
|
|
4911
|
+
data: JSON.stringify(errorEvent)
|
|
4912
|
+
});
|
|
4913
|
+
}
|
|
4914
|
+
}
|
|
4915
|
+
/**
|
|
4916
|
+
* Handle completion using OpenAI translation path (legacy)
|
|
4917
|
+
*/
|
|
4918
|
+
async function handleTranslatedCompletion(c, anthropicPayload, ctx) {
|
|
4514
4919
|
const { payload: translatedPayload, toolNameMapping } = translateToOpenAI(anthropicPayload);
|
|
4515
4920
|
consola.debug("Translated OpenAI request payload:", JSON.stringify(translatedPayload));
|
|
4516
4921
|
const selectedModel = state.models?.data.find((model) => model.id === translatedPayload.model);
|
|
4517
|
-
const { finalPayload: openAIPayload,
|
|
4518
|
-
if (
|
|
4922
|
+
const { finalPayload: openAIPayload, truncateResult } = await buildFinalPayload(translatedPayload, selectedModel);
|
|
4923
|
+
if (truncateResult) ctx.truncateResult = truncateResult;
|
|
4519
4924
|
if (state.manualApprove) await awaitApproval();
|
|
4520
4925
|
try {
|
|
4521
4926
|
const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(openAIPayload));
|
|
@@ -4527,7 +4932,7 @@ async function handleCompletion(c) {
|
|
|
4527
4932
|
ctx
|
|
4528
4933
|
});
|
|
4529
4934
|
consola.debug("Streaming response from Copilot");
|
|
4530
|
-
updateTrackerStatus(trackingId, "streaming");
|
|
4935
|
+
updateTrackerStatus(ctx.trackingId, "streaming");
|
|
4531
4936
|
return streamSSE(c, async (stream) => {
|
|
4532
4937
|
await handleStreamingResponse({
|
|
4533
4938
|
stream,
|
|
@@ -4548,8 +4953,8 @@ function handleNonStreamingResponse(opts) {
|
|
|
4548
4953
|
consola.debug("Non-streaming response from Copilot:", JSON.stringify(response).slice(-400));
|
|
4549
4954
|
let anthropicResponse = translateToAnthropic(response, toolNameMapping);
|
|
4550
4955
|
consola.debug("Translated Anthropic response:", JSON.stringify(anthropicResponse));
|
|
4551
|
-
if (ctx.
|
|
4552
|
-
const marker =
|
|
4956
|
+
if (state.verbose && ctx.truncateResult?.wasCompacted) {
|
|
4957
|
+
const marker = createTruncationResponseMarker(ctx.truncateResult);
|
|
4553
4958
|
anthropicResponse = prependMarkerToAnthropicResponse(anthropicResponse, marker);
|
|
4554
4959
|
}
|
|
4555
4960
|
recordResponse(ctx.historyId, {
|
|
@@ -4621,9 +5026,9 @@ async function handleStreamingResponse(opts) {
|
|
|
4621
5026
|
};
|
|
4622
5027
|
const acc = createAnthropicStreamAccumulator();
|
|
4623
5028
|
try {
|
|
4624
|
-
if (ctx.
|
|
4625
|
-
const marker =
|
|
4626
|
-
await
|
|
5029
|
+
if (ctx.truncateResult?.wasCompacted) {
|
|
5030
|
+
const marker = createTruncationResponseMarker(ctx.truncateResult);
|
|
5031
|
+
await sendTruncationMarkerEvent(stream, streamState, marker);
|
|
4627
5032
|
acc.content += marker;
|
|
4628
5033
|
}
|
|
4629
5034
|
await processStreamChunks({
|
|
@@ -4651,7 +5056,7 @@ async function handleStreamingResponse(opts) {
|
|
|
4651
5056
|
});
|
|
4652
5057
|
}
|
|
4653
5058
|
}
|
|
4654
|
-
async function
|
|
5059
|
+
async function sendTruncationMarkerEvent(stream, streamState, marker) {
|
|
4655
5060
|
const blockStartEvent = {
|
|
4656
5061
|
type: "content_block_start",
|
|
4657
5062
|
index: streamState.contentBlockIndex,
|
|
@@ -4824,6 +5229,15 @@ function extractToolCallsFromContent(content) {
|
|
|
4824
5229
|
});
|
|
4825
5230
|
return tools.length > 0 ? tools : void 0;
|
|
4826
5231
|
}
|
|
5232
|
+
function extractToolCallsFromAnthropicContent(content) {
|
|
5233
|
+
const tools = [];
|
|
5234
|
+
for (const block of content) if (block.type === "tool_use") tools.push({
|
|
5235
|
+
id: block.id,
|
|
5236
|
+
name: block.name,
|
|
5237
|
+
input: JSON.stringify(block.input)
|
|
5238
|
+
});
|
|
5239
|
+
return tools.length > 0 ? tools : void 0;
|
|
5240
|
+
}
|
|
4827
5241
|
|
|
4828
5242
|
//#endregion
|
|
4829
5243
|
//#region src/routes/messages/route.ts
|
|
@@ -4950,12 +5364,14 @@ async function runServer(options) {
|
|
|
4950
5364
|
if (options.verbose) {
|
|
4951
5365
|
consola.level = 5;
|
|
4952
5366
|
consola.info("Verbose logging enabled");
|
|
5367
|
+
state.verbose = true;
|
|
4953
5368
|
}
|
|
4954
5369
|
state.accountType = options.accountType;
|
|
4955
5370
|
if (options.accountType !== "individual") consola.info(`Using ${options.accountType} plan GitHub account`);
|
|
4956
5371
|
state.manualApprove = options.manual;
|
|
4957
5372
|
state.showToken = options.showToken;
|
|
4958
|
-
state.
|
|
5373
|
+
state.autoTruncate = options.autoTruncate;
|
|
5374
|
+
state.directAnthropicApi = options.directAnthropicApi;
|
|
4959
5375
|
if (options.rateLimit) initAdaptiveRateLimiter({
|
|
4960
5376
|
baseRetryIntervalSeconds: options.retryInterval,
|
|
4961
5377
|
requestIntervalSeconds: options.requestInterval,
|
|
@@ -4963,7 +5379,8 @@ async function runServer(options) {
|
|
|
4963
5379
|
consecutiveSuccessesForRecovery: options.consecutiveSuccesses
|
|
4964
5380
|
});
|
|
4965
5381
|
else consola.info("Rate limiting disabled");
|
|
4966
|
-
if (!options.
|
|
5382
|
+
if (!options.autoTruncate) consola.info("Auto-truncate disabled");
|
|
5383
|
+
if (!options.directAnthropicApi) consola.info("Direct Anthropic API disabled (using OpenAI translation)");
|
|
4967
5384
|
initHistory(options.history, options.historyLimit);
|
|
4968
5385
|
if (options.history) {
|
|
4969
5386
|
const limitText = options.historyLimit === 0 ? "unlimited" : `max ${options.historyLimit}`;
|
|
@@ -5105,10 +5522,15 @@ const start = defineCommand({
|
|
|
5105
5522
|
default: "1000",
|
|
5106
5523
|
description: "Maximum number of history entries to keep in memory (0 = unlimited)"
|
|
5107
5524
|
},
|
|
5108
|
-
"no-auto-
|
|
5525
|
+
"no-auto-truncate": {
|
|
5526
|
+
type: "boolean",
|
|
5527
|
+
default: false,
|
|
5528
|
+
description: "Disable automatic conversation history truncation when exceeding limits"
|
|
5529
|
+
},
|
|
5530
|
+
"no-direct-anthropic": {
|
|
5109
5531
|
type: "boolean",
|
|
5110
5532
|
default: false,
|
|
5111
|
-
description: "Disable
|
|
5533
|
+
description: "Disable direct Anthropic API for Anthropic models (use OpenAI translation instead)"
|
|
5112
5534
|
}
|
|
5113
5535
|
},
|
|
5114
5536
|
run({ args }) {
|
|
@@ -5129,7 +5551,8 @@ const start = defineCommand({
|
|
|
5129
5551
|
proxyEnv: args["proxy-env"],
|
|
5130
5552
|
history: !args["no-history"],
|
|
5131
5553
|
historyLimit: Number.parseInt(args["history-limit"], 10),
|
|
5132
|
-
|
|
5554
|
+
autoTruncate: !args["no-auto-truncate"],
|
|
5555
|
+
directAnthropicApi: !args["no-direct-anthropic"]
|
|
5133
5556
|
});
|
|
5134
5557
|
}
|
|
5135
5558
|
});
|