@hsupu/copilot-api 0.7.8 → 0.7.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.js +255 -207
- package/dist/main.js.map +1 -1
- package/package.json +1 -1
package/dist/main.js
CHANGED
|
@@ -46,7 +46,7 @@ const state = {
|
|
|
46
46
|
accountType: "individual",
|
|
47
47
|
manualApprove: false,
|
|
48
48
|
showToken: false,
|
|
49
|
-
autoCompact:
|
|
49
|
+
autoCompact: true
|
|
50
50
|
};
|
|
51
51
|
|
|
52
52
|
//#endregion
|
|
@@ -558,15 +558,12 @@ const SUPPORTED_VERSIONS = {
|
|
|
558
558
|
min: "2.0.0",
|
|
559
559
|
max: "2.1.10"
|
|
560
560
|
},
|
|
561
|
-
v2b: {
|
|
562
|
-
min: "2.1.11",
|
|
563
|
-
max: "2.1.12"
|
|
564
|
-
}
|
|
561
|
+
v2b: { min: "2.1.11" }
|
|
565
562
|
};
|
|
566
563
|
const PATTERNS = {
|
|
567
564
|
funcOriginal: /function HR\(A\)\{if\(A\.includes\("\[1m\]"\)\)return 1e6;return 200000\}/,
|
|
568
565
|
funcPatched: /function HR\(A\)\{if\(A\.includes\("\[1m\]"\)\)return 1e6;return \d+\}/,
|
|
569
|
-
variable: /var
|
|
566
|
+
variable: /var ([A-Za-z_$]\w*)=(\d+)(?=,\w+=20000,)/
|
|
570
567
|
};
|
|
571
568
|
/**
|
|
572
569
|
* Parse semver version string to comparable parts
|
|
@@ -592,14 +589,14 @@ function compareVersions(a, b) {
|
|
|
592
589
|
}
|
|
593
590
|
function getPatternTypeForVersion(version$1) {
|
|
594
591
|
if (compareVersions(version$1, SUPPORTED_VERSIONS.v2a.min) >= 0 && compareVersions(version$1, SUPPORTED_VERSIONS.v2a.max) <= 0) return "func";
|
|
595
|
-
if (compareVersions(version$1, SUPPORTED_VERSIONS.v2b.min) >= 0
|
|
592
|
+
if (compareVersions(version$1, SUPPORTED_VERSIONS.v2b.min) >= 0) return "variable";
|
|
596
593
|
return null;
|
|
597
594
|
}
|
|
598
595
|
/**
|
|
599
596
|
* Get supported version range string for error messages
|
|
600
597
|
*/
|
|
601
598
|
function getSupportedRangeString() {
|
|
602
|
-
return `${SUPPORTED_VERSIONS.v2a.min}-${SUPPORTED_VERSIONS.v2a.max}, ${SUPPORTED_VERSIONS.v2b.min}
|
|
599
|
+
return `${SUPPORTED_VERSIONS.v2a.min}-${SUPPORTED_VERSIONS.v2a.max}, ${SUPPORTED_VERSIONS.v2b.min}+`;
|
|
603
600
|
}
|
|
604
601
|
/**
|
|
605
602
|
* Get Claude Code version from package.json
|
|
@@ -632,9 +629,9 @@ function findInVoltaTools(voltaHome) {
|
|
|
632
629
|
return paths;
|
|
633
630
|
}
|
|
634
631
|
/**
|
|
635
|
-
* Find Claude Code CLI
|
|
632
|
+
* Find all Claude Code CLI paths by checking common locations
|
|
636
633
|
*/
|
|
637
|
-
function
|
|
634
|
+
function findAllClaudeCodePaths() {
|
|
638
635
|
const possiblePaths = [];
|
|
639
636
|
const home = process.env.HOME || "";
|
|
640
637
|
const voltaHome = process.env.VOLTA_HOME || join(home, ".volta");
|
|
@@ -649,22 +646,41 @@ function findClaudeCodePath() {
|
|
|
649
646
|
for (const base of globalPaths) possiblePaths.push(join(base, "@anthropic-ai", "claude-code", "cli.js"));
|
|
650
647
|
const bunGlobal = join(home, ".bun", "install", "global");
|
|
651
648
|
if (existsSync(bunGlobal)) possiblePaths.push(join(bunGlobal, "node_modules", "@anthropic-ai", "claude-code", "cli.js"));
|
|
652
|
-
return possiblePaths.
|
|
649
|
+
return [...new Set(possiblePaths.filter((p) => existsSync(p)))];
|
|
653
650
|
}
|
|
654
651
|
/**
|
|
655
|
-
* Get
|
|
652
|
+
* Get installation info for a CLI path
|
|
656
653
|
*/
|
|
657
|
-
function
|
|
654
|
+
function getInstallationInfo(cliPath) {
|
|
655
|
+
const version$1 = getClaudeCodeVersion(cliPath);
|
|
656
|
+
const content = readFileSync(cliPath, "utf8");
|
|
657
|
+
const limit = getCurrentLimit(content);
|
|
658
|
+
return {
|
|
659
|
+
path: cliPath,
|
|
660
|
+
version: version$1,
|
|
661
|
+
limit
|
|
662
|
+
};
|
|
663
|
+
}
|
|
664
|
+
function getCurrentLimitInfo(content) {
|
|
658
665
|
const varMatch = content.match(PATTERNS.variable);
|
|
659
|
-
if (varMatch) return
|
|
666
|
+
if (varMatch) return {
|
|
667
|
+
limit: Number.parseInt(varMatch[2], 10),
|
|
668
|
+
varName: varMatch[1]
|
|
669
|
+
};
|
|
660
670
|
const funcMatch = content.match(PATTERNS.funcPatched);
|
|
661
671
|
if (funcMatch) {
|
|
662
672
|
const limitMatch = funcMatch[0].match(/return (\d+)\}$/);
|
|
663
|
-
return limitMatch ? Number.parseInt(limitMatch[1], 10) : null;
|
|
673
|
+
return limitMatch ? { limit: Number.parseInt(limitMatch[1], 10) } : null;
|
|
664
674
|
}
|
|
665
675
|
return null;
|
|
666
676
|
}
|
|
667
677
|
/**
|
|
678
|
+
* Get current context limit from Claude Code (legacy wrapper)
|
|
679
|
+
*/
|
|
680
|
+
function getCurrentLimit(content) {
|
|
681
|
+
return getCurrentLimitInfo(content)?.limit ?? null;
|
|
682
|
+
}
|
|
683
|
+
/**
|
|
668
684
|
* Check if Claude Code version is supported for patching
|
|
669
685
|
*/
|
|
670
686
|
function checkVersionSupport(cliPath) {
|
|
@@ -696,22 +712,25 @@ function patchClaudeCode(cliPath, newLimit) {
|
|
|
696
712
|
const versionCheck = checkVersionSupport(cliPath);
|
|
697
713
|
if (!versionCheck.supported) {
|
|
698
714
|
consola.error(versionCheck.error);
|
|
699
|
-
return
|
|
715
|
+
return "failed";
|
|
700
716
|
}
|
|
701
717
|
consola.info(`Claude Code version: ${versionCheck.version}`);
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
return true;
|
|
705
|
-
}
|
|
718
|
+
const limitInfo = getCurrentLimitInfo(content);
|
|
719
|
+
if (limitInfo?.limit === newLimit) return "already_patched";
|
|
706
720
|
let newContent;
|
|
707
|
-
if (versionCheck.patternType === "variable")
|
|
708
|
-
|
|
721
|
+
if (versionCheck.patternType === "variable") {
|
|
722
|
+
if (!limitInfo?.varName) {
|
|
723
|
+
consola.error("Could not detect variable name for patching");
|
|
724
|
+
return "failed";
|
|
725
|
+
}
|
|
726
|
+
newContent = content.replace(PATTERNS.variable, `var ${limitInfo.varName}=${newLimit}`);
|
|
727
|
+
} else {
|
|
709
728
|
const replacement = `function HR(A){if(A.includes("[1m]"))return 1e6;return ${newLimit}}`;
|
|
710
729
|
const pattern = PATTERNS.funcOriginal.test(content) ? PATTERNS.funcOriginal : PATTERNS.funcPatched;
|
|
711
730
|
newContent = content.replace(pattern, replacement);
|
|
712
731
|
}
|
|
713
732
|
writeFileSync(cliPath, newContent);
|
|
714
|
-
return
|
|
733
|
+
return "success";
|
|
715
734
|
}
|
|
716
735
|
/**
|
|
717
736
|
* Restore Claude Code to original 200k limit
|
|
@@ -724,13 +743,19 @@ function restoreClaudeCode(cliPath) {
|
|
|
724
743
|
return false;
|
|
725
744
|
}
|
|
726
745
|
consola.info(`Claude Code version: ${versionCheck.version}`);
|
|
727
|
-
|
|
746
|
+
const limitInfo = getCurrentLimitInfo(content);
|
|
747
|
+
if (limitInfo?.limit === 2e5) {
|
|
728
748
|
consola.info("Already at original 200000 limit");
|
|
729
749
|
return true;
|
|
730
750
|
}
|
|
731
751
|
let newContent;
|
|
732
|
-
if (versionCheck.patternType === "variable")
|
|
733
|
-
|
|
752
|
+
if (versionCheck.patternType === "variable") {
|
|
753
|
+
if (!limitInfo?.varName) {
|
|
754
|
+
consola.error("Could not detect variable name for restoring");
|
|
755
|
+
return false;
|
|
756
|
+
}
|
|
757
|
+
newContent = content.replace(PATTERNS.variable, `var ${limitInfo.varName}=200000`);
|
|
758
|
+
} else newContent = content.replace(PATTERNS.funcPatched, "function HR(A){if(A.includes(\"[1m]\"))return 1e6;return 200000}");
|
|
734
759
|
writeFileSync(cliPath, newContent);
|
|
735
760
|
return true;
|
|
736
761
|
}
|
|
@@ -739,7 +764,7 @@ function showStatus(cliPath, currentLimit) {
|
|
|
739
764
|
if (version$1) consola.info(`Claude Code version: ${version$1}`);
|
|
740
765
|
if (currentLimit === null) {
|
|
741
766
|
consola.warn("Could not detect current limit - CLI may have been updated");
|
|
742
|
-
consola.info("Look for
|
|
767
|
+
consola.info("Look for a variable like 'var XXX=200000' followed by ',YYY=20000,' in cli.js");
|
|
743
768
|
} else if (currentLimit === 2e5) consola.info("Status: Original (200k context window)");
|
|
744
769
|
else consola.info(`Status: Patched (${currentLimit} context window)`);
|
|
745
770
|
}
|
|
@@ -773,17 +798,42 @@ const patchClaude = defineCommand({
|
|
|
773
798
|
description: "Show current patch status without modifying"
|
|
774
799
|
}
|
|
775
800
|
},
|
|
776
|
-
run({ args }) {
|
|
777
|
-
|
|
778
|
-
if (
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
801
|
+
async run({ args }) {
|
|
802
|
+
let cliPath;
|
|
803
|
+
if (args.path) {
|
|
804
|
+
cliPath = args.path;
|
|
805
|
+
if (!existsSync(cliPath)) {
|
|
806
|
+
consola.error(`File not found: ${cliPath}`);
|
|
807
|
+
process.exit(1);
|
|
808
|
+
}
|
|
809
|
+
} else {
|
|
810
|
+
const installations = findAllClaudeCodePaths();
|
|
811
|
+
if (installations.length === 0) {
|
|
812
|
+
consola.error("Could not find Claude Code installation");
|
|
813
|
+
consola.info("Searched in: volta, npm global, bun global");
|
|
814
|
+
consola.info("Use --path to specify the path to cli.js manually");
|
|
815
|
+
process.exit(1);
|
|
816
|
+
}
|
|
817
|
+
if (installations.length === 1) cliPath = installations[0];
|
|
818
|
+
else {
|
|
819
|
+
consola.info(`Found ${installations.length} Claude Code installations:`);
|
|
820
|
+
const options = installations.map((path$1) => {
|
|
821
|
+
const info = getInstallationInfo(path$1);
|
|
822
|
+
let status = "unknown";
|
|
823
|
+
if (info.limit === 2e5) status = "original";
|
|
824
|
+
else if (info.limit) status = `patched: ${info.limit}`;
|
|
825
|
+
return {
|
|
826
|
+
label: `v${info.version ?? "?"} (${status}) - ${path$1}`,
|
|
827
|
+
value: path$1
|
|
828
|
+
};
|
|
829
|
+
});
|
|
830
|
+
const selected = await consola.prompt("Select installation to patch:", {
|
|
831
|
+
type: "select",
|
|
832
|
+
options
|
|
833
|
+
});
|
|
834
|
+
if (typeof selected === "symbol") process.exit(0);
|
|
835
|
+
cliPath = selected;
|
|
836
|
+
}
|
|
787
837
|
}
|
|
788
838
|
consola.info(`Claude Code path: ${cliPath}`);
|
|
789
839
|
const content = readFileSync(cliPath, "utf8");
|
|
@@ -806,13 +856,14 @@ const patchClaude = defineCommand({
|
|
|
806
856
|
consola.error("Invalid limit value. Must be a number >= 1000");
|
|
807
857
|
process.exit(1);
|
|
808
858
|
}
|
|
809
|
-
|
|
810
|
-
|
|
859
|
+
const result = patchClaudeCode(cliPath, limit);
|
|
860
|
+
if (result === "success") {
|
|
861
|
+
consola.success(`Patched context window: ${currentLimit ?? 2e5} → ${limit}`);
|
|
811
862
|
consola.info("Note: You may need to re-run this after Claude Code updates");
|
|
812
|
-
} else {
|
|
863
|
+
} else if (result === "already_patched") consola.success(`Already patched with limit ${limit}`);
|
|
864
|
+
else {
|
|
813
865
|
consola.error("Failed to patch - pattern not found");
|
|
814
866
|
consola.info("Claude Code may have been updated to a new version");
|
|
815
|
-
consola.info("Check the cli.js for the HR function pattern");
|
|
816
867
|
process.exit(1);
|
|
817
868
|
}
|
|
818
869
|
}
|
|
@@ -821,7 +872,7 @@ const patchClaude = defineCommand({
|
|
|
821
872
|
//#endregion
|
|
822
873
|
//#region package.json
|
|
823
874
|
var name = "@hsupu/copilot-api";
|
|
824
|
-
var version = "0.7.
|
|
875
|
+
var version = "0.7.10";
|
|
825
876
|
var description = "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!";
|
|
826
877
|
var keywords = [
|
|
827
878
|
"proxy",
|
|
@@ -2176,188 +2227,157 @@ const getTokenCount = async (payload, model) => {
|
|
|
2176
2227
|
//#endregion
|
|
2177
2228
|
//#region src/lib/auto-compact.ts
|
|
2178
2229
|
const DEFAULT_CONFIG = {
|
|
2179
|
-
targetTokens: 12e4,
|
|
2180
2230
|
safetyMarginPercent: 2,
|
|
2181
2231
|
maxRequestBodyBytes: 500 * 1024
|
|
2182
2232
|
};
|
|
2233
|
+
/** Dynamic byte limit that adjusts based on 413 errors */
|
|
2234
|
+
let dynamicByteLimit = null;
|
|
2183
2235
|
/**
|
|
2184
|
-
*
|
|
2185
|
-
* Starts at 500KB and can be adjusted when 413 errors are encountered.
|
|
2186
|
-
*/
|
|
2187
|
-
let dynamicByteLimitOverride = null;
|
|
2188
|
-
/**
|
|
2189
|
-
* Called when a 413 error is encountered with a specific payload size.
|
|
2190
|
-
* Adjusts the dynamic byte limit to 90% of the failing size.
|
|
2236
|
+
* Called when a 413 error occurs. Adjusts the byte limit to 90% of the failing size.
|
|
2191
2237
|
*/
|
|
2192
2238
|
function onRequestTooLarge(failingBytes) {
|
|
2193
2239
|
const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
|
|
2194
|
-
|
|
2195
|
-
consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed
|
|
2240
|
+
dynamicByteLimit = newLimit;
|
|
2241
|
+
consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed → ${Math.round(newLimit / 1024)}KB`);
|
|
2196
2242
|
}
|
|
2197
|
-
|
|
2198
|
-
|
|
2199
|
-
|
|
2200
|
-
|
|
2201
|
-
async function checkNeedsCompaction(payload, model, config = {}) {
|
|
2202
|
-
const cfg = {
|
|
2203
|
-
...DEFAULT_CONFIG,
|
|
2204
|
-
...config
|
|
2205
|
-
};
|
|
2206
|
-
const currentTokens = (await getTokenCount(payload, model)).input;
|
|
2207
|
-
const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
|
|
2208
|
-
const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
|
|
2209
|
-
const currentBytes = JSON.stringify(payload).length;
|
|
2210
|
-
const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
|
|
2211
|
-
const exceedsTokens = currentTokens > tokenLimit;
|
|
2212
|
-
const exceedsBytes = currentBytes > byteLimit;
|
|
2213
|
-
let reason;
|
|
2214
|
-
if (exceedsTokens && exceedsBytes) reason = "both";
|
|
2215
|
-
else if (exceedsTokens) reason = "tokens";
|
|
2216
|
-
else if (exceedsBytes) reason = "bytes";
|
|
2243
|
+
function calculateLimits(model, config) {
|
|
2244
|
+
const rawTokenLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
|
|
2245
|
+
const tokenLimit = Math.floor(rawTokenLimit * (1 - config.safetyMarginPercent / 100));
|
|
2246
|
+
const byteLimit = dynamicByteLimit ?? config.maxRequestBodyBytes;
|
|
2217
2247
|
return {
|
|
2218
|
-
needed: exceedsTokens || exceedsBytes,
|
|
2219
|
-
currentTokens,
|
|
2220
2248
|
tokenLimit,
|
|
2221
|
-
|
|
2222
|
-
byteLimit,
|
|
2223
|
-
reason
|
|
2249
|
+
byteLimit
|
|
2224
2250
|
};
|
|
2225
2251
|
}
|
|
2226
|
-
/**
|
|
2227
|
-
|
|
2228
|
-
|
|
2229
|
-
|
|
2230
|
-
|
|
2231
|
-
|
|
2232
|
-
|
|
2233
|
-
else if (Array.isArray(message.content)) {
|
|
2234
|
-
for (const part of message.content) if (part.type === "text") text += part.text;
|
|
2235
|
-
else if ("image_url" in part) text += part.image_url.url;
|
|
2252
|
+
/** Estimate tokens for a single message (fast approximation) */
|
|
2253
|
+
function estimateMessageTokens(msg) {
|
|
2254
|
+
let charCount = 0;
|
|
2255
|
+
if (typeof msg.content === "string") charCount = msg.content.length;
|
|
2256
|
+
else if (Array.isArray(msg.content)) {
|
|
2257
|
+
for (const part of msg.content) if (part.type === "text") charCount += part.text.length;
|
|
2258
|
+
else if ("image_url" in part) charCount += Math.min(part.image_url.url.length, 1e4);
|
|
2236
2259
|
}
|
|
2237
|
-
if (
|
|
2238
|
-
return Math.ceil(
|
|
2260
|
+
if (msg.tool_calls) charCount += JSON.stringify(msg.tool_calls).length;
|
|
2261
|
+
return Math.ceil(charCount / 4) + 10;
|
|
2239
2262
|
}
|
|
2240
|
-
/**
|
|
2241
|
-
|
|
2242
|
-
|
|
2263
|
+
/** Get byte size of a message */
|
|
2264
|
+
function getMessageBytes(msg) {
|
|
2265
|
+
return JSON.stringify(msg).length;
|
|
2266
|
+
}
|
|
2267
|
+
/** Extract system/developer messages from the beginning */
|
|
2243
2268
|
function extractSystemMessages(messages) {
|
|
2244
|
-
|
|
2245
|
-
|
|
2246
|
-
|
|
2247
|
-
|
|
2248
|
-
|
|
2249
|
-
systemMessages.push(msg);
|
|
2250
|
-
i++;
|
|
2251
|
-
} else break;
|
|
2269
|
+
let splitIndex = 0;
|
|
2270
|
+
while (splitIndex < messages.length) {
|
|
2271
|
+
const role = messages[splitIndex].role;
|
|
2272
|
+
if (role !== "system" && role !== "developer") break;
|
|
2273
|
+
splitIndex++;
|
|
2252
2274
|
}
|
|
2253
2275
|
return {
|
|
2254
|
-
systemMessages,
|
|
2255
|
-
|
|
2276
|
+
systemMessages: messages.slice(0, splitIndex),
|
|
2277
|
+
conversationMessages: messages.slice(splitIndex)
|
|
2256
2278
|
};
|
|
2257
2279
|
}
|
|
2258
|
-
/**
|
|
2259
|
-
|
|
2260
|
-
|
|
2261
|
-
function getToolUseIds(message) {
|
|
2262
|
-
if (message.role === "assistant" && message.tool_calls) return message.tool_calls.map((tc) => tc.id);
|
|
2280
|
+
/** Get tool_use IDs from an assistant message */
|
|
2281
|
+
function getToolCallIds(msg) {
|
|
2282
|
+
if (msg.role === "assistant" && msg.tool_calls) return msg.tool_calls.map((tc) => tc.id);
|
|
2263
2283
|
return [];
|
|
2264
2284
|
}
|
|
2265
|
-
/**
|
|
2266
|
-
|
|
2267
|
-
*/
|
|
2268
|
-
|
|
2269
|
-
|
|
2285
|
+
/** Filter orphaned tool_result messages */
|
|
2286
|
+
function filterOrphanedToolResults(messages) {
|
|
2287
|
+
const toolUseIds = /* @__PURE__ */ new Set();
|
|
2288
|
+
for (const msg of messages) for (const id of getToolCallIds(msg)) toolUseIds.add(id);
|
|
2289
|
+
let removedCount = 0;
|
|
2290
|
+
const filtered = messages.filter((msg) => {
|
|
2291
|
+
if (msg.role === "tool" && msg.tool_call_id && !toolUseIds.has(msg.tool_call_id)) {
|
|
2292
|
+
removedCount++;
|
|
2293
|
+
return false;
|
|
2294
|
+
}
|
|
2295
|
+
return true;
|
|
2296
|
+
});
|
|
2297
|
+
if (removedCount > 0) consola.debug(`Auto-compact: Filtered ${removedCount} orphaned tool_result`);
|
|
2298
|
+
return filtered;
|
|
2299
|
+
}
|
|
2300
|
+
/** Ensure messages start with a user message */
|
|
2301
|
+
function ensureStartsWithUser(messages) {
|
|
2302
|
+
let startIndex = 0;
|
|
2303
|
+
while (startIndex < messages.length && messages[startIndex].role !== "user") startIndex++;
|
|
2304
|
+
if (startIndex > 0) consola.debug(`Auto-compact: Skipped ${startIndex} leading non-user messages`);
|
|
2305
|
+
return messages.slice(startIndex);
|
|
2270
2306
|
}
|
|
2271
2307
|
/**
|
|
2272
|
-
* Find the optimal
|
|
2273
|
-
*
|
|
2274
|
-
*
|
|
2308
|
+
* Find the optimal index from which to preserve messages.
|
|
2309
|
+
* Uses binary search with pre-calculated cumulative sums.
|
|
2310
|
+
* Returns the smallest index where the preserved portion fits within limits.
|
|
2275
2311
|
*/
|
|
2276
|
-
function findOptimalPreserveIndex(
|
|
2312
|
+
function findOptimalPreserveIndex(params) {
|
|
2313
|
+
const { messages, systemBytes, systemTokens, payloadOverhead, tokenLimit, byteLimit } = params;
|
|
2277
2314
|
if (messages.length === 0) return 0;
|
|
2278
|
-
const
|
|
2279
|
-
const
|
|
2280
|
-
const
|
|
2281
|
-
|
|
2282
|
-
const
|
|
2283
|
-
const
|
|
2284
|
-
const
|
|
2285
|
-
for (let i =
|
|
2315
|
+
const markerBytes = 200;
|
|
2316
|
+
const availableTokens = tokenLimit - systemTokens - 50;
|
|
2317
|
+
const availableBytes = byteLimit - payloadOverhead - systemBytes - markerBytes;
|
|
2318
|
+
if (availableTokens <= 0 || availableBytes <= 0) return messages.length;
|
|
2319
|
+
const n = messages.length;
|
|
2320
|
+
const cumTokens = Array.from({ length: n + 1 }, () => 0);
|
|
2321
|
+
const cumBytes = Array.from({ length: n + 1 }, () => 0);
|
|
2322
|
+
for (let i = n - 1; i >= 0; i--) {
|
|
2286
2323
|
const msg = messages[i];
|
|
2287
|
-
|
|
2288
|
-
|
|
2324
|
+
cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens(msg);
|
|
2325
|
+
cumBytes[i] = cumBytes[i + 1] + getMessageBytes(msg) + 1;
|
|
2289
2326
|
}
|
|
2290
2327
|
let left = 0;
|
|
2291
|
-
let right =
|
|
2328
|
+
let right = n;
|
|
2292
2329
|
while (left < right) {
|
|
2293
|
-
const mid =
|
|
2294
|
-
|
|
2295
|
-
const bytesFromMid = cumulativeBytes[mid];
|
|
2296
|
-
if (tokensFromMid <= availableTokens && bytesFromMid <= availableBytes) right = mid;
|
|
2330
|
+
const mid = left + right >>> 1;
|
|
2331
|
+
if (cumTokens[mid] <= availableTokens && cumBytes[mid] <= availableBytes) right = mid;
|
|
2297
2332
|
else left = mid + 1;
|
|
2298
2333
|
}
|
|
2299
2334
|
return left;
|
|
2300
2335
|
}
|
|
2301
2336
|
/**
|
|
2302
|
-
*
|
|
2303
|
-
* in the preserved message list. This prevents API errors when truncation
|
|
2304
|
-
* separates tool_use/tool_result pairs.
|
|
2305
|
-
*/
|
|
2306
|
-
function filterOrphanedToolResults(messages) {
|
|
2307
|
-
const availableToolUseIds = /* @__PURE__ */ new Set();
|
|
2308
|
-
for (const msg of messages) for (const id of getToolUseIds(msg)) availableToolUseIds.add(id);
|
|
2309
|
-
const filteredMessages = [];
|
|
2310
|
-
let removedCount = 0;
|
|
2311
|
-
for (const msg of messages) {
|
|
2312
|
-
if (msg.role === "tool" && msg.tool_call_id && !availableToolUseIds.has(msg.tool_call_id)) {
|
|
2313
|
-
removedCount++;
|
|
2314
|
-
continue;
|
|
2315
|
-
}
|
|
2316
|
-
filteredMessages.push(msg);
|
|
2317
|
-
}
|
|
2318
|
-
if (removedCount > 0) consola.info(`Auto-compact: Removed ${removedCount} orphaned tool_result message(s) without matching tool_use`);
|
|
2319
|
-
return filteredMessages;
|
|
2320
|
-
}
|
|
2321
|
-
/**
|
|
2322
|
-
* Ensure the message list starts with a user message.
|
|
2323
|
-
* If it starts with assistant or tool messages, skip them until we find a user message.
|
|
2324
|
-
* This is required because OpenAI API expects conversations to start with user messages
|
|
2325
|
-
* (after system messages).
|
|
2337
|
+
* Check if payload needs compaction based on model limits or byte size.
|
|
2326
2338
|
*/
|
|
2327
|
-
function
|
|
2328
|
-
|
|
2329
|
-
|
|
2330
|
-
|
|
2331
|
-
|
|
2332
|
-
}
|
|
2333
|
-
|
|
2334
|
-
|
|
2339
|
+
async function checkNeedsCompaction(payload, model, config = {}) {
|
|
2340
|
+
const cfg = {
|
|
2341
|
+
...DEFAULT_CONFIG,
|
|
2342
|
+
...config
|
|
2343
|
+
};
|
|
2344
|
+
const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
|
|
2345
|
+
const currentTokens = (await getTokenCount(payload, model)).input;
|
|
2346
|
+
const currentBytes = JSON.stringify(payload).length;
|
|
2347
|
+
const exceedsTokens = currentTokens > tokenLimit;
|
|
2348
|
+
const exceedsBytes = currentBytes > byteLimit;
|
|
2349
|
+
let reason;
|
|
2350
|
+
if (exceedsTokens && exceedsBytes) reason = "both";
|
|
2351
|
+
else if (exceedsTokens) reason = "tokens";
|
|
2352
|
+
else if (exceedsBytes) reason = "bytes";
|
|
2353
|
+
return {
|
|
2354
|
+
needed: exceedsTokens || exceedsBytes,
|
|
2355
|
+
currentTokens,
|
|
2356
|
+
tokenLimit,
|
|
2357
|
+
currentBytes,
|
|
2358
|
+
byteLimit,
|
|
2359
|
+
reason
|
|
2360
|
+
};
|
|
2335
2361
|
}
|
|
2336
|
-
/**
|
|
2337
|
-
* Create a truncation marker message.
|
|
2338
|
-
*/
|
|
2362
|
+
/** Create a truncation marker message */
|
|
2339
2363
|
function createTruncationMarker(removedCount) {
|
|
2340
2364
|
return {
|
|
2341
2365
|
role: "user",
|
|
2342
|
-
content: `[CONTEXT TRUNCATED: ${removedCount} earlier messages
|
|
2366
|
+
content: `[CONTEXT TRUNCATED: ${removedCount} earlier messages removed to fit context limits]`
|
|
2343
2367
|
};
|
|
2344
2368
|
}
|
|
2345
2369
|
/**
|
|
2346
|
-
* Perform auto-compaction on a payload that exceeds
|
|
2347
|
-
*
|
|
2348
|
-
* The algorithm finds the optimal truncation point to maximize preserved messages
|
|
2349
|
-
* while staying under both token and byte limits.
|
|
2370
|
+
* Perform auto-compaction on a payload that exceeds limits.
|
|
2371
|
+
* Uses binary search to find the optimal truncation point.
|
|
2350
2372
|
*/
|
|
2351
2373
|
async function autoCompact(payload, model, config = {}) {
|
|
2352
2374
|
const cfg = {
|
|
2353
2375
|
...DEFAULT_CONFIG,
|
|
2354
2376
|
...config
|
|
2355
2377
|
};
|
|
2356
|
-
const
|
|
2357
|
-
const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
|
|
2358
|
-
const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
|
|
2378
|
+
const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
|
|
2359
2379
|
const originalBytes = JSON.stringify(payload).length;
|
|
2360
|
-
const
|
|
2380
|
+
const originalTokens = (await getTokenCount(payload, model)).input;
|
|
2361
2381
|
if (originalTokens <= tokenLimit && originalBytes <= byteLimit) return {
|
|
2362
2382
|
payload,
|
|
2363
2383
|
wasCompacted: false,
|
|
@@ -2371,12 +2391,23 @@ async function autoCompact(payload, model, config = {}) {
|
|
|
2371
2391
|
if (exceedsTokens && exceedsBytes) reason = "tokens and size";
|
|
2372
2392
|
else if (exceedsBytes) reason = "size";
|
|
2373
2393
|
else reason = "tokens";
|
|
2374
|
-
consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB)
|
|
2375
|
-
const { systemMessages,
|
|
2376
|
-
|
|
2377
|
-
const
|
|
2394
|
+
consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB)`);
|
|
2395
|
+
const { systemMessages, conversationMessages } = extractSystemMessages(payload.messages);
|
|
2396
|
+
const messagesJson = JSON.stringify(payload.messages);
|
|
2397
|
+
const payloadOverhead = originalBytes - messagesJson.length;
|
|
2398
|
+
const systemBytes = systemMessages.reduce((sum, m) => sum + getMessageBytes(m) + 1, 0);
|
|
2399
|
+
const systemTokens = systemMessages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
|
|
2400
|
+
consola.debug(`Auto-compact: overhead=${Math.round(payloadOverhead / 1024)}KB, system=${systemMessages.length} msgs (${Math.round(systemBytes / 1024)}KB)`);
|
|
2401
|
+
const preserveIndex = findOptimalPreserveIndex({
|
|
2402
|
+
messages: conversationMessages,
|
|
2403
|
+
systemBytes,
|
|
2404
|
+
systemTokens,
|
|
2405
|
+
payloadOverhead,
|
|
2406
|
+
tokenLimit,
|
|
2407
|
+
byteLimit
|
|
2408
|
+
});
|
|
2378
2409
|
if (preserveIndex === 0) {
|
|
2379
|
-
consola.warn("Auto-compact: Cannot truncate
|
|
2410
|
+
consola.warn("Auto-compact: Cannot truncate, system messages too large");
|
|
2380
2411
|
return {
|
|
2381
2412
|
payload,
|
|
2382
2413
|
wasCompacted: false,
|
|
@@ -2385,8 +2416,8 @@ async function autoCompact(payload, model, config = {}) {
|
|
|
2385
2416
|
removedMessageCount: 0
|
|
2386
2417
|
};
|
|
2387
2418
|
}
|
|
2388
|
-
if (preserveIndex >=
|
|
2389
|
-
consola.warn("Auto-compact: Would need to remove all messages
|
|
2419
|
+
if (preserveIndex >= conversationMessages.length) {
|
|
2420
|
+
consola.warn("Auto-compact: Would need to remove all messages");
|
|
2390
2421
|
return {
|
|
2391
2422
|
payload,
|
|
2392
2423
|
wasCompacted: false,
|
|
@@ -2395,13 +2426,12 @@ async function autoCompact(payload, model, config = {}) {
|
|
|
2395
2426
|
removedMessageCount: 0
|
|
2396
2427
|
};
|
|
2397
2428
|
}
|
|
2398
|
-
|
|
2399
|
-
|
|
2400
|
-
|
|
2401
|
-
|
|
2402
|
-
|
|
2403
|
-
|
|
2404
|
-
consola.warn("Auto-compact: All messages were filtered out after cleanup, cannot compact");
|
|
2429
|
+
let preserved = conversationMessages.slice(preserveIndex);
|
|
2430
|
+
preserved = filterOrphanedToolResults(preserved);
|
|
2431
|
+
preserved = ensureStartsWithUser(preserved);
|
|
2432
|
+
preserved = filterOrphanedToolResults(preserved);
|
|
2433
|
+
if (preserved.length === 0) {
|
|
2434
|
+
consola.warn("Auto-compact: All messages filtered out after cleanup");
|
|
2405
2435
|
return {
|
|
2406
2436
|
payload,
|
|
2407
2437
|
wasCompacted: false,
|
|
@@ -2410,29 +2440,30 @@ async function autoCompact(payload, model, config = {}) {
|
|
|
2410
2440
|
removedMessageCount: 0
|
|
2411
2441
|
};
|
|
2412
2442
|
}
|
|
2413
|
-
|
|
2414
|
-
const
|
|
2443
|
+
const removedCount = conversationMessages.length - preserved.length;
|
|
2444
|
+
const marker = createTruncationMarker(removedCount);
|
|
2415
2445
|
const newPayload = {
|
|
2416
2446
|
...payload,
|
|
2417
2447
|
messages: [
|
|
2418
2448
|
...systemMessages,
|
|
2419
|
-
|
|
2420
|
-
...
|
|
2449
|
+
marker,
|
|
2450
|
+
...preserved
|
|
2421
2451
|
]
|
|
2422
2452
|
};
|
|
2423
|
-
const newTokenCount = await getTokenCount(newPayload, model);
|
|
2424
2453
|
const newBytes = JSON.stringify(newPayload).length;
|
|
2425
|
-
|
|
2454
|
+
const newTokenCount = await getTokenCount(newPayload, model);
|
|
2455
|
+
consola.info(`Auto-compact: ${originalTokens} → ${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(newBytes / 1024)}KB (removed ${removedCount} messages)`);
|
|
2456
|
+
if (newBytes > byteLimit) consola.warn(`Auto-compact: Result still over byte limit (${Math.round(newBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB)`);
|
|
2426
2457
|
return {
|
|
2427
2458
|
payload: newPayload,
|
|
2428
2459
|
wasCompacted: true,
|
|
2429
2460
|
originalTokens,
|
|
2430
2461
|
compactedTokens: newTokenCount.input,
|
|
2431
|
-
removedMessageCount:
|
|
2462
|
+
removedMessageCount: removedCount
|
|
2432
2463
|
};
|
|
2433
2464
|
}
|
|
2434
2465
|
/**
|
|
2435
|
-
* Create a marker to
|
|
2466
|
+
* Create a marker to prepend to responses indicating auto-compaction occurred.
|
|
2436
2467
|
*/
|
|
2437
2468
|
function createCompactionMarker(result) {
|
|
2438
2469
|
if (!result.wasCompacted) return "";
|
|
@@ -4129,16 +4160,33 @@ function translateAnthropicMessagesToOpenAI(anthropicMessages, system, toolNameM
|
|
|
4129
4160
|
const otherMessages = anthropicMessages.flatMap((message) => message.role === "user" ? handleUserMessage(message) : handleAssistantMessage(message, toolNameMapping));
|
|
4130
4161
|
return [...systemMessages, ...otherMessages];
|
|
4131
4162
|
}
|
|
4163
|
+
const RESERVED_KEYWORDS = ["x-anthropic-billing-header"];
|
|
4164
|
+
/**
|
|
4165
|
+
* Filter out reserved keywords from system prompt text.
|
|
4166
|
+
* Copilot API rejects requests containing these keywords.
|
|
4167
|
+
* Removes the entire line containing the keyword to keep the prompt clean.
|
|
4168
|
+
*/
|
|
4169
|
+
function filterReservedKeywords(text) {
|
|
4170
|
+
let filtered = text;
|
|
4171
|
+
for (const keyword of RESERVED_KEYWORDS) if (text.includes(keyword)) {
|
|
4172
|
+
consola.debug(`[Reserved Keyword] Removing line containing "${keyword}"`);
|
|
4173
|
+
filtered = filtered.split("\n").filter((line) => !line.includes(keyword)).join("\n");
|
|
4174
|
+
}
|
|
4175
|
+
return filtered;
|
|
4176
|
+
}
|
|
4132
4177
|
function handleSystemPrompt(system) {
|
|
4133
4178
|
if (!system) return [];
|
|
4134
4179
|
if (typeof system === "string") return [{
|
|
4135
4180
|
role: "system",
|
|
4136
|
-
content: system
|
|
4137
|
-
}];
|
|
4138
|
-
else return [{
|
|
4139
|
-
role: "system",
|
|
4140
|
-
content: system.map((block) => block.text).join("\n\n")
|
|
4181
|
+
content: filterReservedKeywords(system)
|
|
4141
4182
|
}];
|
|
4183
|
+
else {
|
|
4184
|
+
const systemText = system.map((block) => block.text).join("\n\n");
|
|
4185
|
+
return [{
|
|
4186
|
+
role: "system",
|
|
4187
|
+
content: filterReservedKeywords(systemText)
|
|
4188
|
+
}];
|
|
4189
|
+
}
|
|
4142
4190
|
}
|
|
4143
4191
|
function handleUserMessage(message) {
|
|
4144
4192
|
const newMessages = [];
|
|
@@ -4983,7 +5031,7 @@ async function runServer(options) {
|
|
|
4983
5031
|
consecutiveSuccessesForRecovery: options.consecutiveSuccesses
|
|
4984
5032
|
});
|
|
4985
5033
|
else consola.info("Rate limiting disabled");
|
|
4986
|
-
if (options.autoCompact) consola.info("Auto-compact
|
|
5034
|
+
if (!options.autoCompact) consola.info("Auto-compact disabled");
|
|
4987
5035
|
initHistory(options.history, options.historyLimit);
|
|
4988
5036
|
if (options.history) {
|
|
4989
5037
|
const limitText = options.historyLimit === 0 ? "unlimited" : `max ${options.historyLimit}`;
|
|
@@ -5125,10 +5173,10 @@ const start = defineCommand({
|
|
|
5125
5173
|
default: "1000",
|
|
5126
5174
|
description: "Maximum number of history entries to keep in memory (0 = unlimited)"
|
|
5127
5175
|
},
|
|
5128
|
-
"auto-compact": {
|
|
5176
|
+
"no-auto-compact": {
|
|
5129
5177
|
type: "boolean",
|
|
5130
5178
|
default: false,
|
|
5131
|
-
description: "
|
|
5179
|
+
description: "Disable automatic conversation history compression when exceeding limits"
|
|
5132
5180
|
}
|
|
5133
5181
|
},
|
|
5134
5182
|
run({ args }) {
|
|
@@ -5149,7 +5197,7 @@ const start = defineCommand({
|
|
|
5149
5197
|
proxyEnv: args["proxy-env"],
|
|
5150
5198
|
history: !args["no-history"],
|
|
5151
5199
|
historyLimit: Number.parseInt(args["history-limit"], 10),
|
|
5152
|
-
autoCompact: args["auto-compact"]
|
|
5200
|
+
autoCompact: !args["no-auto-compact"]
|
|
5153
5201
|
});
|
|
5154
5202
|
}
|
|
5155
5203
|
});
|