@hsupu/copilot-api 0.7.7 → 0.7.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.js +273 -231
- package/dist/main.js.map +1 -1
- package/package.json +1 -1
package/dist/main.js
CHANGED
|
@@ -46,7 +46,7 @@ const state = {
|
|
|
46
46
|
accountType: "individual",
|
|
47
47
|
manualApprove: false,
|
|
48
48
|
showToken: false,
|
|
49
|
-
autoCompact:
|
|
49
|
+
autoCompact: true
|
|
50
50
|
};
|
|
51
51
|
|
|
52
52
|
//#endregion
|
|
@@ -246,8 +246,8 @@ async function getVSCodeVersion() {
|
|
|
246
246
|
}
|
|
247
247
|
});
|
|
248
248
|
if (!response.ok) return FALLBACK;
|
|
249
|
-
const version = (await response.json()).tag_name;
|
|
250
|
-
if (version && /^\d+\.\d+\.\d+$/.test(version)) return version;
|
|
249
|
+
const version$1 = (await response.json()).tag_name;
|
|
250
|
+
if (version$1 && /^\d+\.\d+\.\d+$/.test(version$1)) return version$1;
|
|
251
251
|
return FALLBACK;
|
|
252
252
|
} catch {
|
|
253
253
|
return FALLBACK;
|
|
@@ -434,13 +434,13 @@ const checkUsage = defineCommand({
|
|
|
434
434
|
const premiumUsed = premiumTotal - premium.remaining;
|
|
435
435
|
const premiumPercentUsed = premiumTotal > 0 ? premiumUsed / premiumTotal * 100 : 0;
|
|
436
436
|
const premiumPercentRemaining = premium.percent_remaining;
|
|
437
|
-
function summarizeQuota(name, snap) {
|
|
438
|
-
if (!snap) return `${name}: N/A`;
|
|
437
|
+
function summarizeQuota(name$1, snap) {
|
|
438
|
+
if (!snap) return `${name$1}: N/A`;
|
|
439
439
|
const total = snap.entitlement;
|
|
440
440
|
const used = total - snap.remaining;
|
|
441
441
|
const percentUsed = total > 0 ? used / total * 100 : 0;
|
|
442
442
|
const percentRemaining = snap.percent_remaining;
|
|
443
|
-
return `${name}: ${used}/${total} used (${percentUsed.toFixed(1)}% used, ${percentRemaining.toFixed(1)}% remaining)`;
|
|
443
|
+
return `${name$1}: ${used}/${total} used (${percentUsed.toFixed(1)}% used, ${percentRemaining.toFixed(1)}% remaining)`;
|
|
444
444
|
}
|
|
445
445
|
const premiumLine = `Premium: ${premiumUsed}/${premiumTotal} used (${premiumPercentUsed.toFixed(1)}% used, ${premiumPercentRemaining.toFixed(1)}% remaining)`;
|
|
446
446
|
const chatLine = summarizeQuota("Chat", usage.quota_snapshots.chat);
|
|
@@ -481,9 +481,9 @@ async function checkTokenExists() {
|
|
|
481
481
|
}
|
|
482
482
|
}
|
|
483
483
|
async function getDebugInfo() {
|
|
484
|
-
const [version, tokenExists] = await Promise.all([getPackageVersion(), checkTokenExists()]);
|
|
484
|
+
const [version$1, tokenExists] = await Promise.all([getPackageVersion(), checkTokenExists()]);
|
|
485
485
|
return {
|
|
486
|
-
version,
|
|
486
|
+
version: version$1,
|
|
487
487
|
runtime: getRuntimeInfo(),
|
|
488
488
|
paths: {
|
|
489
489
|
APP_DIR: PATHS.APP_DIR,
|
|
@@ -571,8 +571,8 @@ const PATTERNS = {
|
|
|
571
571
|
/**
|
|
572
572
|
* Parse semver version string to comparable parts
|
|
573
573
|
*/
|
|
574
|
-
function parseVersion(version) {
|
|
575
|
-
return version.split(".").map((n) => Number.parseInt(n, 10) || 0);
|
|
574
|
+
function parseVersion(version$1) {
|
|
575
|
+
return version$1.split(".").map((n) => Number.parseInt(n, 10) || 0);
|
|
576
576
|
}
|
|
577
577
|
/**
|
|
578
578
|
* Compare two semver versions
|
|
@@ -590,9 +590,9 @@ function compareVersions(a, b) {
|
|
|
590
590
|
}
|
|
591
591
|
return 0;
|
|
592
592
|
}
|
|
593
|
-
function getPatternTypeForVersion(version) {
|
|
594
|
-
if (compareVersions(version, SUPPORTED_VERSIONS.v2a.min) >= 0 && compareVersions(version, SUPPORTED_VERSIONS.v2a.max) <= 0) return "func";
|
|
595
|
-
if (compareVersions(version, SUPPORTED_VERSIONS.v2b.min) >= 0 && compareVersions(version, SUPPORTED_VERSIONS.v2b.max) <= 0) return "variable";
|
|
593
|
+
function getPatternTypeForVersion(version$1) {
|
|
594
|
+
if (compareVersions(version$1, SUPPORTED_VERSIONS.v2a.min) >= 0 && compareVersions(version$1, SUPPORTED_VERSIONS.v2a.max) <= 0) return "func";
|
|
595
|
+
if (compareVersions(version$1, SUPPORTED_VERSIONS.v2b.min) >= 0 && compareVersions(version$1, SUPPORTED_VERSIONS.v2b.max) <= 0) return "variable";
|
|
596
596
|
return null;
|
|
597
597
|
}
|
|
598
598
|
/**
|
|
@@ -624,8 +624,8 @@ function findInVoltaTools(voltaHome) {
|
|
|
624
624
|
if (existsSync(packagesPath)) paths.push(packagesPath);
|
|
625
625
|
const toolsDir = join(voltaHome, "tools", "image", "node");
|
|
626
626
|
if (existsSync(toolsDir)) try {
|
|
627
|
-
for (const version of readdirSync(toolsDir)) {
|
|
628
|
-
const claudePath = join(toolsDir, version, "lib", "node_modules", "@anthropic-ai", "claude-code", "cli.js");
|
|
627
|
+
for (const version$1 of readdirSync(toolsDir)) {
|
|
628
|
+
const claudePath = join(toolsDir, version$1, "lib", "node_modules", "@anthropic-ai", "claude-code", "cli.js");
|
|
629
629
|
if (existsSync(claudePath)) paths.push(claudePath);
|
|
630
630
|
}
|
|
631
631
|
} catch {}
|
|
@@ -668,23 +668,23 @@ function getCurrentLimit(content) {
|
|
|
668
668
|
* Check if Claude Code version is supported for patching
|
|
669
669
|
*/
|
|
670
670
|
function checkVersionSupport(cliPath) {
|
|
671
|
-
const version = getClaudeCodeVersion(cliPath);
|
|
672
|
-
if (!version) return {
|
|
671
|
+
const version$1 = getClaudeCodeVersion(cliPath);
|
|
672
|
+
if (!version$1) return {
|
|
673
673
|
supported: false,
|
|
674
674
|
version: null,
|
|
675
675
|
patternType: null,
|
|
676
676
|
error: "Could not detect Claude Code version"
|
|
677
677
|
};
|
|
678
|
-
const patternType = getPatternTypeForVersion(version);
|
|
678
|
+
const patternType = getPatternTypeForVersion(version$1);
|
|
679
679
|
if (!patternType) return {
|
|
680
680
|
supported: false,
|
|
681
|
-
version,
|
|
681
|
+
version: version$1,
|
|
682
682
|
patternType: null,
|
|
683
|
-
error: `Version ${version} is not supported. Supported: ${getSupportedRangeString()}`
|
|
683
|
+
error: `Version ${version$1} is not supported. Supported: ${getSupportedRangeString()}`
|
|
684
684
|
};
|
|
685
685
|
return {
|
|
686
686
|
supported: true,
|
|
687
|
-
version,
|
|
687
|
+
version: version$1,
|
|
688
688
|
patternType
|
|
689
689
|
};
|
|
690
690
|
}
|
|
@@ -735,8 +735,8 @@ function restoreClaudeCode(cliPath) {
|
|
|
735
735
|
return true;
|
|
736
736
|
}
|
|
737
737
|
function showStatus(cliPath, currentLimit) {
|
|
738
|
-
const version = getClaudeCodeVersion(cliPath);
|
|
739
|
-
if (version) consola.info(`Claude Code version: ${version}`);
|
|
738
|
+
const version$1 = getClaudeCodeVersion(cliPath);
|
|
739
|
+
if (version$1) consola.info(`Claude Code version: ${version$1}`);
|
|
740
740
|
if (currentLimit === null) {
|
|
741
741
|
consola.warn("Could not detect current limit - CLI may have been updated");
|
|
742
742
|
consola.info("Look for the BS9 variable or HR function pattern in cli.js");
|
|
@@ -818,6 +818,86 @@ const patchClaude = defineCommand({
|
|
|
818
818
|
}
|
|
819
819
|
});
|
|
820
820
|
|
|
821
|
+
//#endregion
|
|
822
|
+
//#region package.json
|
|
823
|
+
var name = "@hsupu/copilot-api";
|
|
824
|
+
var version = "0.7.9";
|
|
825
|
+
var description = "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!";
|
|
826
|
+
var keywords = [
|
|
827
|
+
"proxy",
|
|
828
|
+
"github-copilot",
|
|
829
|
+
"openai-compatible",
|
|
830
|
+
"anthropic-compatible"
|
|
831
|
+
];
|
|
832
|
+
var homepage = "https://github.com/puxu-msft/copilot-api-js";
|
|
833
|
+
var bugs = "https://github.com/puxu-msft/copilot-api-js/issues";
|
|
834
|
+
var repository = {
|
|
835
|
+
"type": "git",
|
|
836
|
+
"url": "git+https://github.com/puxu-msft/copilot-api-js.git"
|
|
837
|
+
};
|
|
838
|
+
var author = "hsupu";
|
|
839
|
+
var type = "module";
|
|
840
|
+
var bin = { "copilot-api": "dist/main.js" };
|
|
841
|
+
var files = ["dist"];
|
|
842
|
+
var scripts = {
|
|
843
|
+
"build": "npx tsdown",
|
|
844
|
+
"dev": "bun run --watch ./src/main.ts",
|
|
845
|
+
"knip": "knip-bun",
|
|
846
|
+
"lint": "eslint --cache",
|
|
847
|
+
"lint:all": "eslint --cache .",
|
|
848
|
+
"prepack": "npm run build",
|
|
849
|
+
"prepare": "npm run build && (command -v bun >/dev/null 2>&1 && simple-git-hooks || true)",
|
|
850
|
+
"release": "bumpp && npm publish --access public",
|
|
851
|
+
"start": "NODE_ENV=production bun run ./src/main.ts",
|
|
852
|
+
"typecheck": "tsc"
|
|
853
|
+
};
|
|
854
|
+
var simple_git_hooks = { "pre-commit": "bun x lint-staged" };
|
|
855
|
+
var lint_staged = { "*": "bun run lint --fix" };
|
|
856
|
+
var dependencies = {
|
|
857
|
+
"citty": "^0.1.6",
|
|
858
|
+
"clipboardy": "^5.0.0",
|
|
859
|
+
"consola": "^3.4.2",
|
|
860
|
+
"fetch-event-stream": "^0.1.5",
|
|
861
|
+
"gpt-tokenizer": "^3.0.1",
|
|
862
|
+
"hono": "^4.9.9",
|
|
863
|
+
"picocolors": "^1.1.1",
|
|
864
|
+
"proxy-from-env": "^1.1.0",
|
|
865
|
+
"srvx": "^0.8.9",
|
|
866
|
+
"tiny-invariant": "^1.3.3",
|
|
867
|
+
"undici": "^7.16.0"
|
|
868
|
+
};
|
|
869
|
+
var devDependencies = {
|
|
870
|
+
"@echristian/eslint-config": "^0.0.54",
|
|
871
|
+
"@types/bun": "^1.2.23",
|
|
872
|
+
"@types/proxy-from-env": "^1.0.4",
|
|
873
|
+
"bumpp": "^10.2.3",
|
|
874
|
+
"eslint": "^9.37.0",
|
|
875
|
+
"knip": "^5.64.1",
|
|
876
|
+
"lint-staged": "^16.2.3",
|
|
877
|
+
"prettier-plugin-packagejson": "^2.5.19",
|
|
878
|
+
"simple-git-hooks": "^2.13.1",
|
|
879
|
+
"tsdown": "^0.15.6",
|
|
880
|
+
"typescript": "^5.9.3"
|
|
881
|
+
};
|
|
882
|
+
var package_default = {
|
|
883
|
+
name,
|
|
884
|
+
version,
|
|
885
|
+
description,
|
|
886
|
+
keywords,
|
|
887
|
+
homepage,
|
|
888
|
+
bugs,
|
|
889
|
+
repository,
|
|
890
|
+
author,
|
|
891
|
+
type,
|
|
892
|
+
bin,
|
|
893
|
+
files,
|
|
894
|
+
scripts,
|
|
895
|
+
"simple-git-hooks": simple_git_hooks,
|
|
896
|
+
"lint-staged": lint_staged,
|
|
897
|
+
dependencies,
|
|
898
|
+
devDependencies
|
|
899
|
+
};
|
|
900
|
+
|
|
821
901
|
//#endregion
|
|
822
902
|
//#region src/lib/adaptive-rate-limiter.ts
|
|
823
903
|
const DEFAULT_CONFIG$1 = {
|
|
@@ -1566,8 +1646,8 @@ var ConsoleRenderer = class {
|
|
|
1566
1646
|
/**
|
|
1567
1647
|
* Get log prefix based on log type
|
|
1568
1648
|
*/
|
|
1569
|
-
getLogPrefix(type) {
|
|
1570
|
-
switch (type) {
|
|
1649
|
+
getLogPrefix(type$1) {
|
|
1650
|
+
switch (type$1) {
|
|
1571
1651
|
case "error":
|
|
1572
1652
|
case "fatal": return pc.red("✖");
|
|
1573
1653
|
case "warn": return pc.yellow("⚠");
|
|
@@ -2096,171 +2176,157 @@ const getTokenCount = async (payload, model) => {
|
|
|
2096
2176
|
//#endregion
|
|
2097
2177
|
//#region src/lib/auto-compact.ts
|
|
2098
2178
|
const DEFAULT_CONFIG = {
|
|
2099
|
-
targetTokens: 12e4,
|
|
2100
2179
|
safetyMarginPercent: 2,
|
|
2101
2180
|
maxRequestBodyBytes: 500 * 1024
|
|
2102
2181
|
};
|
|
2182
|
+
/** Dynamic byte limit that adjusts based on 413 errors */
|
|
2183
|
+
let dynamicByteLimit = null;
|
|
2103
2184
|
/**
|
|
2104
|
-
*
|
|
2105
|
-
* Starts at 500KB and can be adjusted when 413 errors are encountered.
|
|
2106
|
-
*/
|
|
2107
|
-
let dynamicByteLimitOverride = null;
|
|
2108
|
-
/**
|
|
2109
|
-
* Called when a 413 error is encountered with a specific payload size.
|
|
2110
|
-
* Adjusts the dynamic byte limit to 90% of the failing size.
|
|
2185
|
+
* Called when a 413 error occurs. Adjusts the byte limit to 90% of the failing size.
|
|
2111
2186
|
*/
|
|
2112
2187
|
function onRequestTooLarge(failingBytes) {
|
|
2113
2188
|
const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
|
|
2114
|
-
|
|
2115
|
-
consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed
|
|
2189
|
+
dynamicByteLimit = newLimit;
|
|
2190
|
+
consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed → ${Math.round(newLimit / 1024)}KB`);
|
|
2116
2191
|
}
|
|
2117
|
-
|
|
2118
|
-
|
|
2119
|
-
|
|
2120
|
-
|
|
2121
|
-
async function checkNeedsCompaction(payload, model, config = {}) {
|
|
2122
|
-
const cfg = {
|
|
2123
|
-
...DEFAULT_CONFIG,
|
|
2124
|
-
...config
|
|
2125
|
-
};
|
|
2126
|
-
const currentTokens = (await getTokenCount(payload, model)).input;
|
|
2127
|
-
const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
|
|
2128
|
-
const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
|
|
2129
|
-
const currentBytes = JSON.stringify(payload).length;
|
|
2130
|
-
const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
|
|
2131
|
-
const exceedsTokens = currentTokens > tokenLimit;
|
|
2132
|
-
const exceedsBytes = currentBytes > byteLimit;
|
|
2133
|
-
let reason;
|
|
2134
|
-
if (exceedsTokens && exceedsBytes) reason = "both";
|
|
2135
|
-
else if (exceedsTokens) reason = "tokens";
|
|
2136
|
-
else if (exceedsBytes) reason = "bytes";
|
|
2192
|
+
function calculateLimits(model, config) {
|
|
2193
|
+
const rawTokenLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
|
|
2194
|
+
const tokenLimit = Math.floor(rawTokenLimit * (1 - config.safetyMarginPercent / 100));
|
|
2195
|
+
const byteLimit = dynamicByteLimit ?? config.maxRequestBodyBytes;
|
|
2137
2196
|
return {
|
|
2138
|
-
needed: exceedsTokens || exceedsBytes,
|
|
2139
|
-
currentTokens,
|
|
2140
2197
|
tokenLimit,
|
|
2141
|
-
|
|
2142
|
-
byteLimit,
|
|
2143
|
-
reason
|
|
2198
|
+
byteLimit
|
|
2144
2199
|
};
|
|
2145
2200
|
}
|
|
2146
|
-
/**
|
|
2147
|
-
|
|
2148
|
-
|
|
2149
|
-
|
|
2150
|
-
|
|
2151
|
-
|
|
2152
|
-
|
|
2153
|
-
else if (Array.isArray(message.content)) {
|
|
2154
|
-
for (const part of message.content) if (part.type === "text") text += part.text;
|
|
2155
|
-
else if ("image_url" in part) text += part.image_url.url;
|
|
2201
|
+
/** Estimate tokens for a single message (fast approximation) */
|
|
2202
|
+
function estimateMessageTokens(msg) {
|
|
2203
|
+
let charCount = 0;
|
|
2204
|
+
if (typeof msg.content === "string") charCount = msg.content.length;
|
|
2205
|
+
else if (Array.isArray(msg.content)) {
|
|
2206
|
+
for (const part of msg.content) if (part.type === "text") charCount += part.text.length;
|
|
2207
|
+
else if ("image_url" in part) charCount += Math.min(part.image_url.url.length, 1e4);
|
|
2156
2208
|
}
|
|
2157
|
-
if (
|
|
2158
|
-
return Math.ceil(
|
|
2209
|
+
if (msg.tool_calls) charCount += JSON.stringify(msg.tool_calls).length;
|
|
2210
|
+
return Math.ceil(charCount / 4) + 10;
|
|
2159
2211
|
}
|
|
2160
|
-
/**
|
|
2161
|
-
|
|
2162
|
-
|
|
2212
|
+
/** Get byte size of a message */
|
|
2213
|
+
function getMessageBytes(msg) {
|
|
2214
|
+
return JSON.stringify(msg).length;
|
|
2215
|
+
}
|
|
2216
|
+
/** Extract system/developer messages from the beginning */
|
|
2163
2217
|
function extractSystemMessages(messages) {
|
|
2164
|
-
|
|
2165
|
-
|
|
2166
|
-
|
|
2167
|
-
|
|
2168
|
-
|
|
2169
|
-
systemMessages.push(msg);
|
|
2170
|
-
i++;
|
|
2171
|
-
} else break;
|
|
2218
|
+
let splitIndex = 0;
|
|
2219
|
+
while (splitIndex < messages.length) {
|
|
2220
|
+
const role = messages[splitIndex].role;
|
|
2221
|
+
if (role !== "system" && role !== "developer") break;
|
|
2222
|
+
splitIndex++;
|
|
2172
2223
|
}
|
|
2173
2224
|
return {
|
|
2174
|
-
systemMessages,
|
|
2175
|
-
|
|
2225
|
+
systemMessages: messages.slice(0, splitIndex),
|
|
2226
|
+
conversationMessages: messages.slice(splitIndex)
|
|
2176
2227
|
};
|
|
2177
2228
|
}
|
|
2178
|
-
/**
|
|
2179
|
-
|
|
2180
|
-
|
|
2181
|
-
function getToolUseIds(message) {
|
|
2182
|
-
if (message.role === "assistant" && message.tool_calls) return message.tool_calls.map((tc) => tc.id);
|
|
2229
|
+
/** Get tool_use IDs from an assistant message */
|
|
2230
|
+
function getToolCallIds(msg) {
|
|
2231
|
+
if (msg.role === "assistant" && msg.tool_calls) return msg.tool_calls.map((tc) => tc.id);
|
|
2183
2232
|
return [];
|
|
2184
2233
|
}
|
|
2185
|
-
/**
|
|
2186
|
-
* Find messages to keep from the end to stay under target tokens.
|
|
2187
|
-
* Returns the starting index of messages to preserve.
|
|
2188
|
-
*/
|
|
2189
|
-
function findPreserveIndex(messages, targetTokens, systemTokens) {
|
|
2190
|
-
const availableTokens = targetTokens - systemTokens - 500;
|
|
2191
|
-
let accumulatedTokens = 0;
|
|
2192
|
-
for (let i = messages.length - 1; i >= 0; i--) {
|
|
2193
|
-
const msgTokens = estimateMessageTokens(messages[i]);
|
|
2194
|
-
if (accumulatedTokens + msgTokens > availableTokens) return i + 1;
|
|
2195
|
-
accumulatedTokens += msgTokens;
|
|
2196
|
-
}
|
|
2197
|
-
return 0;
|
|
2198
|
-
}
|
|
2199
|
-
/**
|
|
2200
|
-
* Filter out orphaned tool_result messages that don't have a matching tool_use
|
|
2201
|
-
* in the preserved message list. This prevents API errors when truncation
|
|
2202
|
-
* separates tool_use/tool_result pairs.
|
|
2203
|
-
*/
|
|
2234
|
+
/** Filter orphaned tool_result messages */
|
|
2204
2235
|
function filterOrphanedToolResults(messages) {
|
|
2205
|
-
const
|
|
2206
|
-
for (const msg of messages) for (const id of
|
|
2207
|
-
const filteredMessages = [];
|
|
2236
|
+
const toolUseIds = /* @__PURE__ */ new Set();
|
|
2237
|
+
for (const msg of messages) for (const id of getToolCallIds(msg)) toolUseIds.add(id);
|
|
2208
2238
|
let removedCount = 0;
|
|
2209
|
-
|
|
2210
|
-
if (msg.role === "tool" && msg.tool_call_id && !
|
|
2239
|
+
const filtered = messages.filter((msg) => {
|
|
2240
|
+
if (msg.role === "tool" && msg.tool_call_id && !toolUseIds.has(msg.tool_call_id)) {
|
|
2211
2241
|
removedCount++;
|
|
2212
|
-
|
|
2242
|
+
return false;
|
|
2213
2243
|
}
|
|
2214
|
-
|
|
2215
|
-
}
|
|
2216
|
-
if (removedCount > 0) consola.
|
|
2217
|
-
return
|
|
2244
|
+
return true;
|
|
2245
|
+
});
|
|
2246
|
+
if (removedCount > 0) consola.debug(`Auto-compact: Filtered ${removedCount} orphaned tool_result`);
|
|
2247
|
+
return filtered;
|
|
2218
2248
|
}
|
|
2219
|
-
/**
|
|
2220
|
-
* Ensure the message list starts with a user message.
|
|
2221
|
-
* If it starts with assistant or tool messages, skip them until we find a user message.
|
|
2222
|
-
* This is required because OpenAI API expects conversations to start with user messages
|
|
2223
|
-
* (after system messages).
|
|
2224
|
-
*/
|
|
2249
|
+
/** Ensure messages start with a user message */
|
|
2225
2250
|
function ensureStartsWithUser(messages) {
|
|
2226
2251
|
let startIndex = 0;
|
|
2227
|
-
while (startIndex < messages.length)
|
|
2228
|
-
|
|
2229
|
-
startIndex++;
|
|
2230
|
-
}
|
|
2231
|
-
if (startIndex > 0) consola.info(`Auto-compact: Skipped ${startIndex} leading non-user message(s) to ensure valid sequence`);
|
|
2252
|
+
while (startIndex < messages.length && messages[startIndex].role !== "user") startIndex++;
|
|
2253
|
+
if (startIndex > 0) consola.debug(`Auto-compact: Skipped ${startIndex} leading non-user messages`);
|
|
2232
2254
|
return messages.slice(startIndex);
|
|
2233
2255
|
}
|
|
2234
2256
|
/**
|
|
2235
|
-
*
|
|
2257
|
+
* Find the optimal index from which to preserve messages.
|
|
2258
|
+
* Uses binary search with pre-calculated cumulative sums.
|
|
2259
|
+
* Returns the smallest index where the preserved portion fits within limits.
|
|
2236
2260
|
*/
|
|
2237
|
-
function
|
|
2238
|
-
|
|
2261
|
+
function findOptimalPreserveIndex(params) {
|
|
2262
|
+
const { messages, systemBytes, systemTokens, payloadOverhead, tokenLimit, byteLimit } = params;
|
|
2263
|
+
if (messages.length === 0) return 0;
|
|
2264
|
+
const markerBytes = 200;
|
|
2265
|
+
const availableTokens = tokenLimit - systemTokens - 50;
|
|
2266
|
+
const availableBytes = byteLimit - payloadOverhead - systemBytes - markerBytes;
|
|
2267
|
+
if (availableTokens <= 0 || availableBytes <= 0) return messages.length;
|
|
2268
|
+
const n = messages.length;
|
|
2269
|
+
const cumTokens = Array.from({ length: n + 1 }, () => 0);
|
|
2270
|
+
const cumBytes = Array.from({ length: n + 1 }, () => 0);
|
|
2271
|
+
for (let i = n - 1; i >= 0; i--) {
|
|
2272
|
+
const msg = messages[i];
|
|
2273
|
+
cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens(msg);
|
|
2274
|
+
cumBytes[i] = cumBytes[i + 1] + getMessageBytes(msg) + 1;
|
|
2275
|
+
}
|
|
2276
|
+
let left = 0;
|
|
2277
|
+
let right = n;
|
|
2278
|
+
while (left < right) {
|
|
2279
|
+
const mid = left + right >>> 1;
|
|
2280
|
+
if (cumTokens[mid] <= availableTokens && cumBytes[mid] <= availableBytes) right = mid;
|
|
2281
|
+
else left = mid + 1;
|
|
2282
|
+
}
|
|
2283
|
+
return left;
|
|
2239
2284
|
}
|
|
2240
2285
|
/**
|
|
2241
|
-
*
|
|
2286
|
+
* Check if payload needs compaction based on model limits or byte size.
|
|
2242
2287
|
*/
|
|
2288
|
+
async function checkNeedsCompaction(payload, model, config = {}) {
|
|
2289
|
+
const cfg = {
|
|
2290
|
+
...DEFAULT_CONFIG,
|
|
2291
|
+
...config
|
|
2292
|
+
};
|
|
2293
|
+
const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
|
|
2294
|
+
const currentTokens = (await getTokenCount(payload, model)).input;
|
|
2295
|
+
const currentBytes = JSON.stringify(payload).length;
|
|
2296
|
+
const exceedsTokens = currentTokens > tokenLimit;
|
|
2297
|
+
const exceedsBytes = currentBytes > byteLimit;
|
|
2298
|
+
let reason;
|
|
2299
|
+
if (exceedsTokens && exceedsBytes) reason = "both";
|
|
2300
|
+
else if (exceedsTokens) reason = "tokens";
|
|
2301
|
+
else if (exceedsBytes) reason = "bytes";
|
|
2302
|
+
return {
|
|
2303
|
+
needed: exceedsTokens || exceedsBytes,
|
|
2304
|
+
currentTokens,
|
|
2305
|
+
tokenLimit,
|
|
2306
|
+
currentBytes,
|
|
2307
|
+
byteLimit,
|
|
2308
|
+
reason
|
|
2309
|
+
};
|
|
2310
|
+
}
|
|
2311
|
+
/** Create a truncation marker message */
|
|
2243
2312
|
function createTruncationMarker(removedCount) {
|
|
2244
2313
|
return {
|
|
2245
2314
|
role: "user",
|
|
2246
|
-
content: `[CONTEXT TRUNCATED: ${removedCount} earlier messages
|
|
2315
|
+
content: `[CONTEXT TRUNCATED: ${removedCount} earlier messages removed to fit context limits]`
|
|
2247
2316
|
};
|
|
2248
2317
|
}
|
|
2249
2318
|
/**
|
|
2250
|
-
* Perform auto-compaction on a payload that exceeds
|
|
2251
|
-
*
|
|
2252
|
-
* Uses iterative approach with decreasing target tokens until under limit.
|
|
2319
|
+
* Perform auto-compaction on a payload that exceeds limits.
|
|
2320
|
+
* Uses binary search to find the optimal truncation point.
|
|
2253
2321
|
*/
|
|
2254
2322
|
async function autoCompact(payload, model, config = {}) {
|
|
2255
2323
|
const cfg = {
|
|
2256
2324
|
...DEFAULT_CONFIG,
|
|
2257
2325
|
...config
|
|
2258
2326
|
};
|
|
2259
|
-
const
|
|
2260
|
-
const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
|
|
2261
|
-
const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
|
|
2327
|
+
const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
|
|
2262
2328
|
const originalBytes = JSON.stringify(payload).length;
|
|
2263
|
-
const
|
|
2329
|
+
const originalTokens = (await getTokenCount(payload, model)).input;
|
|
2264
2330
|
if (originalTokens <= tokenLimit && originalBytes <= byteLimit) return {
|
|
2265
2331
|
payload,
|
|
2266
2332
|
wasCompacted: false,
|
|
@@ -2274,60 +2340,33 @@ async function autoCompact(payload, model, config = {}) {
|
|
|
2274
2340
|
if (exceedsTokens && exceedsBytes) reason = "tokens and size";
|
|
2275
2341
|
else if (exceedsBytes) reason = "size";
|
|
2276
2342
|
else reason = "tokens";
|
|
2277
|
-
consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB)
|
|
2278
|
-
const { systemMessages,
|
|
2279
|
-
const
|
|
2280
|
-
|
|
2281
|
-
const
|
|
2282
|
-
const
|
|
2283
|
-
|
|
2284
|
-
|
|
2285
|
-
|
|
2286
|
-
|
|
2343
|
+
consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB)`);
|
|
2344
|
+
const { systemMessages, conversationMessages } = extractSystemMessages(payload.messages);
|
|
2345
|
+
const messagesJson = JSON.stringify(payload.messages);
|
|
2346
|
+
const payloadOverhead = originalBytes - messagesJson.length;
|
|
2347
|
+
const systemBytes = systemMessages.reduce((sum, m) => sum + getMessageBytes(m) + 1, 0);
|
|
2348
|
+
const systemTokens = systemMessages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
|
|
2349
|
+
consola.debug(`Auto-compact: overhead=${Math.round(payloadOverhead / 1024)}KB, system=${systemMessages.length} msgs (${Math.round(systemBytes / 1024)}KB)`);
|
|
2350
|
+
const preserveIndex = findOptimalPreserveIndex({
|
|
2351
|
+
messages: conversationMessages,
|
|
2352
|
+
systemBytes,
|
|
2353
|
+
systemTokens,
|
|
2354
|
+
payloadOverhead,
|
|
2355
|
+
tokenLimit,
|
|
2356
|
+
byteLimit
|
|
2357
|
+
});
|
|
2358
|
+
if (preserveIndex === 0) {
|
|
2359
|
+
consola.warn("Auto-compact: Cannot truncate, system messages too large");
|
|
2360
|
+
return {
|
|
2287
2361
|
payload,
|
|
2288
|
-
|
|
2289
|
-
|
|
2290
|
-
|
|
2291
|
-
|
|
2292
|
-
|
|
2293
|
-
limit: tokenLimit,
|
|
2294
|
-
originalTokens
|
|
2295
|
-
});
|
|
2296
|
-
if (!result.wasCompacted) return result;
|
|
2297
|
-
lastResult = result;
|
|
2298
|
-
const resultBytes = JSON.stringify(result.payload).length;
|
|
2299
|
-
const underTokenLimit = result.compactedTokens <= tokenLimit;
|
|
2300
|
-
const underByteLimit = resultBytes <= byteLimit;
|
|
2301
|
-
if (underTokenLimit && underByteLimit) {
|
|
2302
|
-
consola.info(`Auto-compact: ${originalTokens} → ${result.compactedTokens} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(resultBytes / 1024)}KB (removed ${result.removedMessageCount} messages)`);
|
|
2303
|
-
return result;
|
|
2304
|
-
}
|
|
2305
|
-
const tokenStatus = underTokenLimit ? "OK" : `${result.compactedTokens} > ${tokenLimit}`;
|
|
2306
|
-
const byteStatus = underByteLimit ? "OK" : `${Math.round(resultBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB`;
|
|
2307
|
-
consola.warn(`Auto-compact: Still over limit (tokens: ${tokenStatus}, size: ${byteStatus}), trying more aggressive truncation`);
|
|
2308
|
-
currentTarget = Math.floor(currentTarget * .7);
|
|
2309
|
-
if (currentTarget < MIN_TARGET) {
|
|
2310
|
-
consola.error("Auto-compact: Cannot reduce further, target too low");
|
|
2311
|
-
return result;
|
|
2312
|
-
}
|
|
2362
|
+
wasCompacted: false,
|
|
2363
|
+
originalTokens,
|
|
2364
|
+
compactedTokens: originalTokens,
|
|
2365
|
+
removedMessageCount: 0
|
|
2366
|
+
};
|
|
2313
2367
|
}
|
|
2314
|
-
|
|
2315
|
-
|
|
2316
|
-
payload,
|
|
2317
|
-
wasCompacted: false,
|
|
2318
|
-
originalTokens,
|
|
2319
|
-
compactedTokens: originalTokens,
|
|
2320
|
-
removedMessageCount: 0
|
|
2321
|
-
};
|
|
2322
|
-
}
|
|
2323
|
-
/**
|
|
2324
|
-
* Helper to attempt compaction with a specific target token count.
|
|
2325
|
-
*/
|
|
2326
|
-
async function tryCompactWithTarget(opts) {
|
|
2327
|
-
const { payload, model, systemMessages, remainingMessages, systemTokens, targetTokens, originalTokens } = opts;
|
|
2328
|
-
const preserveIndex = findPreserveIndex(remainingMessages, targetTokens, systemTokens);
|
|
2329
|
-
if (preserveIndex === 0) {
|
|
2330
|
-
consola.warn("Auto-compact: Cannot truncate further without losing all conversation history");
|
|
2368
|
+
if (preserveIndex >= conversationMessages.length) {
|
|
2369
|
+
consola.warn("Auto-compact: Would need to remove all messages");
|
|
2331
2370
|
return {
|
|
2332
2371
|
payload,
|
|
2333
2372
|
wasCompacted: false,
|
|
@@ -2336,13 +2375,12 @@ async function tryCompactWithTarget(opts) {
|
|
|
2336
2375
|
removedMessageCount: 0
|
|
2337
2376
|
};
|
|
2338
2377
|
}
|
|
2339
|
-
|
|
2340
|
-
|
|
2341
|
-
|
|
2342
|
-
|
|
2343
|
-
|
|
2344
|
-
|
|
2345
|
-
consola.warn("Auto-compact: All messages were filtered out after cleanup, cannot compact");
|
|
2378
|
+
let preserved = conversationMessages.slice(preserveIndex);
|
|
2379
|
+
preserved = filterOrphanedToolResults(preserved);
|
|
2380
|
+
preserved = ensureStartsWithUser(preserved);
|
|
2381
|
+
preserved = filterOrphanedToolResults(preserved);
|
|
2382
|
+
if (preserved.length === 0) {
|
|
2383
|
+
consola.warn("Auto-compact: All messages filtered out after cleanup");
|
|
2346
2384
|
return {
|
|
2347
2385
|
payload,
|
|
2348
2386
|
wasCompacted: false,
|
|
@@ -2351,27 +2389,30 @@ async function tryCompactWithTarget(opts) {
|
|
|
2351
2389
|
removedMessageCount: 0
|
|
2352
2390
|
};
|
|
2353
2391
|
}
|
|
2354
|
-
|
|
2355
|
-
const
|
|
2392
|
+
const removedCount = conversationMessages.length - preserved.length;
|
|
2393
|
+
const marker = createTruncationMarker(removedCount);
|
|
2356
2394
|
const newPayload = {
|
|
2357
2395
|
...payload,
|
|
2358
2396
|
messages: [
|
|
2359
2397
|
...systemMessages,
|
|
2360
|
-
|
|
2361
|
-
...
|
|
2398
|
+
marker,
|
|
2399
|
+
...preserved
|
|
2362
2400
|
]
|
|
2363
2401
|
};
|
|
2402
|
+
const newBytes = JSON.stringify(newPayload).length;
|
|
2364
2403
|
const newTokenCount = await getTokenCount(newPayload, model);
|
|
2404
|
+
consola.info(`Auto-compact: ${originalTokens} → ${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(newBytes / 1024)}KB (removed ${removedCount} messages)`);
|
|
2405
|
+
if (newBytes > byteLimit) consola.warn(`Auto-compact: Result still over byte limit (${Math.round(newBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB)`);
|
|
2365
2406
|
return {
|
|
2366
2407
|
payload: newPayload,
|
|
2367
2408
|
wasCompacted: true,
|
|
2368
2409
|
originalTokens,
|
|
2369
2410
|
compactedTokens: newTokenCount.input,
|
|
2370
|
-
removedMessageCount:
|
|
2411
|
+
removedMessageCount: removedCount
|
|
2371
2412
|
};
|
|
2372
2413
|
}
|
|
2373
2414
|
/**
|
|
2374
|
-
* Create a marker to
|
|
2415
|
+
* Create a marker to prepend to responses indicating auto-compaction occurred.
|
|
2375
2416
|
*/
|
|
2376
2417
|
function createCompactionMarker(result) {
|
|
2377
2418
|
if (!result.wasCompacted) return "";
|
|
@@ -2633,7 +2674,7 @@ function handleNonStreamingResponse$1(c, originalResponse, ctx) {
|
|
|
2633
2674
|
...choice$1,
|
|
2634
2675
|
message: {
|
|
2635
2676
|
...choice$1.message,
|
|
2636
|
-
content: (choice$1.message.content ?? "")
|
|
2677
|
+
content: marker + (choice$1.message.content ?? "")
|
|
2637
2678
|
}
|
|
2638
2679
|
} : choice$1)
|
|
2639
2680
|
};
|
|
@@ -2694,18 +2735,13 @@ async function handleStreamingResponse$1(opts) {
|
|
|
2694
2735
|
const { stream, response, payload, ctx } = opts;
|
|
2695
2736
|
const acc = createStreamAccumulator();
|
|
2696
2737
|
try {
|
|
2697
|
-
for await (const chunk of response) {
|
|
2698
|
-
consola.debug("Streaming chunk:", JSON.stringify(chunk));
|
|
2699
|
-
parseStreamChunk(chunk, acc);
|
|
2700
|
-
await stream.writeSSE(chunk);
|
|
2701
|
-
}
|
|
2702
2738
|
if (ctx.compactResult?.wasCompacted) {
|
|
2703
2739
|
const marker = createCompactionMarker(ctx.compactResult);
|
|
2704
2740
|
const markerChunk = {
|
|
2705
2741
|
id: `compact-marker-${Date.now()}`,
|
|
2706
2742
|
object: "chat.completion.chunk",
|
|
2707
2743
|
created: Math.floor(Date.now() / 1e3),
|
|
2708
|
-
model:
|
|
2744
|
+
model: payload.model,
|
|
2709
2745
|
choices: [{
|
|
2710
2746
|
index: 0,
|
|
2711
2747
|
delta: { content: marker },
|
|
@@ -2719,6 +2755,11 @@ async function handleStreamingResponse$1(opts) {
|
|
|
2719
2755
|
});
|
|
2720
2756
|
acc.content += marker;
|
|
2721
2757
|
}
|
|
2758
|
+
for await (const chunk of response) {
|
|
2759
|
+
consola.debug("Streaming chunk:", JSON.stringify(chunk));
|
|
2760
|
+
parseStreamChunk(chunk, acc);
|
|
2761
|
+
await stream.writeSSE(chunk);
|
|
2762
|
+
}
|
|
2722
2763
|
recordStreamSuccess(acc, payload.model, ctx);
|
|
2723
2764
|
completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
|
|
2724
2765
|
} catch (error) {
|
|
@@ -4509,7 +4550,7 @@ function handleNonStreamingResponse(opts) {
|
|
|
4509
4550
|
consola.debug("Translated Anthropic response:", JSON.stringify(anthropicResponse));
|
|
4510
4551
|
if (ctx.compactResult?.wasCompacted) {
|
|
4511
4552
|
const marker = createCompactionMarker(ctx.compactResult);
|
|
4512
|
-
anthropicResponse =
|
|
4553
|
+
anthropicResponse = prependMarkerToAnthropicResponse(anthropicResponse, marker);
|
|
4513
4554
|
}
|
|
4514
4555
|
recordResponse(ctx.historyId, {
|
|
4515
4556
|
success: true,
|
|
@@ -4541,16 +4582,16 @@ function handleNonStreamingResponse(opts) {
|
|
|
4541
4582
|
});
|
|
4542
4583
|
return c.json(anthropicResponse);
|
|
4543
4584
|
}
|
|
4544
|
-
function
|
|
4585
|
+
function prependMarkerToAnthropicResponse(response, marker) {
|
|
4545
4586
|
const content = [...response.content];
|
|
4546
|
-
const
|
|
4547
|
-
if (
|
|
4548
|
-
const textBlock = content[
|
|
4549
|
-
if (textBlock.type === "text") content[
|
|
4587
|
+
const firstTextIndex = content.findIndex((block) => block.type === "text");
|
|
4588
|
+
if (firstTextIndex !== -1) {
|
|
4589
|
+
const textBlock = content[firstTextIndex];
|
|
4590
|
+
if (textBlock.type === "text") content[firstTextIndex] = {
|
|
4550
4591
|
...textBlock,
|
|
4551
|
-
text: textBlock.text
|
|
4592
|
+
text: marker + textBlock.text
|
|
4552
4593
|
};
|
|
4553
|
-
} else content.
|
|
4594
|
+
} else content.unshift({
|
|
4554
4595
|
type: "text",
|
|
4555
4596
|
text: marker
|
|
4556
4597
|
});
|
|
@@ -4580,6 +4621,11 @@ async function handleStreamingResponse(opts) {
|
|
|
4580
4621
|
};
|
|
4581
4622
|
const acc = createAnthropicStreamAccumulator();
|
|
4582
4623
|
try {
|
|
4624
|
+
if (ctx.compactResult?.wasCompacted) {
|
|
4625
|
+
const marker = createCompactionMarker(ctx.compactResult);
|
|
4626
|
+
await sendCompactionMarkerEvent(stream, streamState, marker);
|
|
4627
|
+
acc.content += marker;
|
|
4628
|
+
}
|
|
4583
4629
|
await processStreamChunks({
|
|
4584
4630
|
stream,
|
|
4585
4631
|
response,
|
|
@@ -4587,11 +4633,6 @@ async function handleStreamingResponse(opts) {
|
|
|
4587
4633
|
streamState,
|
|
4588
4634
|
acc
|
|
4589
4635
|
});
|
|
4590
|
-
if (ctx.compactResult?.wasCompacted) {
|
|
4591
|
-
const marker = createCompactionMarker(ctx.compactResult);
|
|
4592
|
-
await sendCompactionMarkerEvent(stream, streamState, marker);
|
|
4593
|
-
acc.content += marker;
|
|
4594
|
-
}
|
|
4595
4636
|
recordStreamingResponse(acc, anthropicPayload.model, ctx);
|
|
4596
4637
|
completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
|
|
4597
4638
|
} catch (error) {
|
|
@@ -4904,6 +4945,7 @@ function formatModelInfo(model) {
|
|
|
4904
4945
|
return ` - ${model.id.padEnd(28)} context: ${contextK.padStart(5)}, output: ${outputK.padStart(4)}${featureStr}`;
|
|
4905
4946
|
}
|
|
4906
4947
|
async function runServer(options) {
|
|
4948
|
+
consola.info(`copilot-api v${package_default.version}`);
|
|
4907
4949
|
if (options.proxyEnv) initProxyFromEnv();
|
|
4908
4950
|
if (options.verbose) {
|
|
4909
4951
|
consola.level = 5;
|
|
@@ -4921,7 +4963,7 @@ async function runServer(options) {
|
|
|
4921
4963
|
consecutiveSuccessesForRecovery: options.consecutiveSuccesses
|
|
4922
4964
|
});
|
|
4923
4965
|
else consola.info("Rate limiting disabled");
|
|
4924
|
-
if (options.autoCompact) consola.info("Auto-compact
|
|
4966
|
+
if (!options.autoCompact) consola.info("Auto-compact disabled");
|
|
4925
4967
|
initHistory(options.history, options.historyLimit);
|
|
4926
4968
|
if (options.history) {
|
|
4927
4969
|
const limitText = options.historyLimit === 0 ? "unlimited" : `max ${options.historyLimit}`;
|
|
@@ -5063,10 +5105,10 @@ const start = defineCommand({
|
|
|
5063
5105
|
default: "1000",
|
|
5064
5106
|
description: "Maximum number of history entries to keep in memory (0 = unlimited)"
|
|
5065
5107
|
},
|
|
5066
|
-
"auto-compact": {
|
|
5108
|
+
"no-auto-compact": {
|
|
5067
5109
|
type: "boolean",
|
|
5068
5110
|
default: false,
|
|
5069
|
-
description: "
|
|
5111
|
+
description: "Disable automatic conversation history compression when exceeding limits"
|
|
5070
5112
|
}
|
|
5071
5113
|
},
|
|
5072
5114
|
run({ args }) {
|
|
@@ -5087,7 +5129,7 @@ const start = defineCommand({
|
|
|
5087
5129
|
proxyEnv: args["proxy-env"],
|
|
5088
5130
|
history: !args["no-history"],
|
|
5089
5131
|
historyLimit: Number.parseInt(args["history-limit"], 10),
|
|
5090
|
-
autoCompact: args["auto-compact"]
|
|
5132
|
+
autoCompact: !args["no-auto-compact"]
|
|
5091
5133
|
});
|
|
5092
5134
|
}
|
|
5093
5135
|
});
|