@hsupu/copilot-api 0.7.6 → 0.7.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.js +229 -118
- package/dist/main.js.map +1 -1
- package/package.json +1 -1
package/dist/main.js
CHANGED
|
@@ -246,8 +246,8 @@ async function getVSCodeVersion() {
|
|
|
246
246
|
}
|
|
247
247
|
});
|
|
248
248
|
if (!response.ok) return FALLBACK;
|
|
249
|
-
const version = (await response.json()).tag_name;
|
|
250
|
-
if (version && /^\d+\.\d+\.\d+$/.test(version)) return version;
|
|
249
|
+
const version$1 = (await response.json()).tag_name;
|
|
250
|
+
if (version$1 && /^\d+\.\d+\.\d+$/.test(version$1)) return version$1;
|
|
251
251
|
return FALLBACK;
|
|
252
252
|
} catch {
|
|
253
253
|
return FALLBACK;
|
|
@@ -434,13 +434,13 @@ const checkUsage = defineCommand({
|
|
|
434
434
|
const premiumUsed = premiumTotal - premium.remaining;
|
|
435
435
|
const premiumPercentUsed = premiumTotal > 0 ? premiumUsed / premiumTotal * 100 : 0;
|
|
436
436
|
const premiumPercentRemaining = premium.percent_remaining;
|
|
437
|
-
function summarizeQuota(name, snap) {
|
|
438
|
-
if (!snap) return `${name}: N/A`;
|
|
437
|
+
function summarizeQuota(name$1, snap) {
|
|
438
|
+
if (!snap) return `${name$1}: N/A`;
|
|
439
439
|
const total = snap.entitlement;
|
|
440
440
|
const used = total - snap.remaining;
|
|
441
441
|
const percentUsed = total > 0 ? used / total * 100 : 0;
|
|
442
442
|
const percentRemaining = snap.percent_remaining;
|
|
443
|
-
return `${name}: ${used}/${total} used (${percentUsed.toFixed(1)}% used, ${percentRemaining.toFixed(1)}% remaining)`;
|
|
443
|
+
return `${name$1}: ${used}/${total} used (${percentUsed.toFixed(1)}% used, ${percentRemaining.toFixed(1)}% remaining)`;
|
|
444
444
|
}
|
|
445
445
|
const premiumLine = `Premium: ${premiumUsed}/${premiumTotal} used (${premiumPercentUsed.toFixed(1)}% used, ${premiumPercentRemaining.toFixed(1)}% remaining)`;
|
|
446
446
|
const chatLine = summarizeQuota("Chat", usage.quota_snapshots.chat);
|
|
@@ -481,9 +481,9 @@ async function checkTokenExists() {
|
|
|
481
481
|
}
|
|
482
482
|
}
|
|
483
483
|
async function getDebugInfo() {
|
|
484
|
-
const [version, tokenExists] = await Promise.all([getPackageVersion(), checkTokenExists()]);
|
|
484
|
+
const [version$1, tokenExists] = await Promise.all([getPackageVersion(), checkTokenExists()]);
|
|
485
485
|
return {
|
|
486
|
-
version,
|
|
486
|
+
version: version$1,
|
|
487
487
|
runtime: getRuntimeInfo(),
|
|
488
488
|
paths: {
|
|
489
489
|
APP_DIR: PATHS.APP_DIR,
|
|
@@ -571,8 +571,8 @@ const PATTERNS = {
|
|
|
571
571
|
/**
|
|
572
572
|
* Parse semver version string to comparable parts
|
|
573
573
|
*/
|
|
574
|
-
function parseVersion(version) {
|
|
575
|
-
return version.split(".").map((n) => Number.parseInt(n, 10) || 0);
|
|
574
|
+
function parseVersion(version$1) {
|
|
575
|
+
return version$1.split(".").map((n) => Number.parseInt(n, 10) || 0);
|
|
576
576
|
}
|
|
577
577
|
/**
|
|
578
578
|
* Compare two semver versions
|
|
@@ -590,9 +590,9 @@ function compareVersions(a, b) {
|
|
|
590
590
|
}
|
|
591
591
|
return 0;
|
|
592
592
|
}
|
|
593
|
-
function getPatternTypeForVersion(version) {
|
|
594
|
-
if (compareVersions(version, SUPPORTED_VERSIONS.v2a.min) >= 0 && compareVersions(version, SUPPORTED_VERSIONS.v2a.max) <= 0) return "func";
|
|
595
|
-
if (compareVersions(version, SUPPORTED_VERSIONS.v2b.min) >= 0 && compareVersions(version, SUPPORTED_VERSIONS.v2b.max) <= 0) return "variable";
|
|
593
|
+
function getPatternTypeForVersion(version$1) {
|
|
594
|
+
if (compareVersions(version$1, SUPPORTED_VERSIONS.v2a.min) >= 0 && compareVersions(version$1, SUPPORTED_VERSIONS.v2a.max) <= 0) return "func";
|
|
595
|
+
if (compareVersions(version$1, SUPPORTED_VERSIONS.v2b.min) >= 0 && compareVersions(version$1, SUPPORTED_VERSIONS.v2b.max) <= 0) return "variable";
|
|
596
596
|
return null;
|
|
597
597
|
}
|
|
598
598
|
/**
|
|
@@ -624,8 +624,8 @@ function findInVoltaTools(voltaHome) {
|
|
|
624
624
|
if (existsSync(packagesPath)) paths.push(packagesPath);
|
|
625
625
|
const toolsDir = join(voltaHome, "tools", "image", "node");
|
|
626
626
|
if (existsSync(toolsDir)) try {
|
|
627
|
-
for (const version of readdirSync(toolsDir)) {
|
|
628
|
-
const claudePath = join(toolsDir, version, "lib", "node_modules", "@anthropic-ai", "claude-code", "cli.js");
|
|
627
|
+
for (const version$1 of readdirSync(toolsDir)) {
|
|
628
|
+
const claudePath = join(toolsDir, version$1, "lib", "node_modules", "@anthropic-ai", "claude-code", "cli.js");
|
|
629
629
|
if (existsSync(claudePath)) paths.push(claudePath);
|
|
630
630
|
}
|
|
631
631
|
} catch {}
|
|
@@ -668,23 +668,23 @@ function getCurrentLimit(content) {
|
|
|
668
668
|
* Check if Claude Code version is supported for patching
|
|
669
669
|
*/
|
|
670
670
|
function checkVersionSupport(cliPath) {
|
|
671
|
-
const version = getClaudeCodeVersion(cliPath);
|
|
672
|
-
if (!version) return {
|
|
671
|
+
const version$1 = getClaudeCodeVersion(cliPath);
|
|
672
|
+
if (!version$1) return {
|
|
673
673
|
supported: false,
|
|
674
674
|
version: null,
|
|
675
675
|
patternType: null,
|
|
676
676
|
error: "Could not detect Claude Code version"
|
|
677
677
|
};
|
|
678
|
-
const patternType = getPatternTypeForVersion(version);
|
|
678
|
+
const patternType = getPatternTypeForVersion(version$1);
|
|
679
679
|
if (!patternType) return {
|
|
680
680
|
supported: false,
|
|
681
|
-
version,
|
|
681
|
+
version: version$1,
|
|
682
682
|
patternType: null,
|
|
683
|
-
error: `Version ${version} is not supported. Supported: ${getSupportedRangeString()}`
|
|
683
|
+
error: `Version ${version$1} is not supported. Supported: ${getSupportedRangeString()}`
|
|
684
684
|
};
|
|
685
685
|
return {
|
|
686
686
|
supported: true,
|
|
687
|
-
version,
|
|
687
|
+
version: version$1,
|
|
688
688
|
patternType
|
|
689
689
|
};
|
|
690
690
|
}
|
|
@@ -735,8 +735,8 @@ function restoreClaudeCode(cliPath) {
|
|
|
735
735
|
return true;
|
|
736
736
|
}
|
|
737
737
|
function showStatus(cliPath, currentLimit) {
|
|
738
|
-
const version = getClaudeCodeVersion(cliPath);
|
|
739
|
-
if (version) consola.info(`Claude Code version: ${version}`);
|
|
738
|
+
const version$1 = getClaudeCodeVersion(cliPath);
|
|
739
|
+
if (version$1) consola.info(`Claude Code version: ${version$1}`);
|
|
740
740
|
if (currentLimit === null) {
|
|
741
741
|
consola.warn("Could not detect current limit - CLI may have been updated");
|
|
742
742
|
consola.info("Look for the BS9 variable or HR function pattern in cli.js");
|
|
@@ -818,6 +818,86 @@ const patchClaude = defineCommand({
|
|
|
818
818
|
}
|
|
819
819
|
});
|
|
820
820
|
|
|
821
|
+
//#endregion
|
|
822
|
+
//#region package.json
|
|
823
|
+
var name = "@hsupu/copilot-api";
|
|
824
|
+
var version = "0.7.8";
|
|
825
|
+
var description = "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!";
|
|
826
|
+
var keywords = [
|
|
827
|
+
"proxy",
|
|
828
|
+
"github-copilot",
|
|
829
|
+
"openai-compatible",
|
|
830
|
+
"anthropic-compatible"
|
|
831
|
+
];
|
|
832
|
+
var homepage = "https://github.com/puxu-msft/copilot-api-js";
|
|
833
|
+
var bugs = "https://github.com/puxu-msft/copilot-api-js/issues";
|
|
834
|
+
var repository = {
|
|
835
|
+
"type": "git",
|
|
836
|
+
"url": "git+https://github.com/puxu-msft/copilot-api-js.git"
|
|
837
|
+
};
|
|
838
|
+
var author = "hsupu";
|
|
839
|
+
var type = "module";
|
|
840
|
+
var bin = { "copilot-api": "dist/main.js" };
|
|
841
|
+
var files = ["dist"];
|
|
842
|
+
var scripts = {
|
|
843
|
+
"build": "npx tsdown",
|
|
844
|
+
"dev": "bun run --watch ./src/main.ts",
|
|
845
|
+
"knip": "knip-bun",
|
|
846
|
+
"lint": "eslint --cache",
|
|
847
|
+
"lint:all": "eslint --cache .",
|
|
848
|
+
"prepack": "npm run build",
|
|
849
|
+
"prepare": "npm run build && (command -v bun >/dev/null 2>&1 && simple-git-hooks || true)",
|
|
850
|
+
"release": "bumpp && npm publish --access public",
|
|
851
|
+
"start": "NODE_ENV=production bun run ./src/main.ts",
|
|
852
|
+
"typecheck": "tsc"
|
|
853
|
+
};
|
|
854
|
+
var simple_git_hooks = { "pre-commit": "bun x lint-staged" };
|
|
855
|
+
var lint_staged = { "*": "bun run lint --fix" };
|
|
856
|
+
var dependencies = {
|
|
857
|
+
"citty": "^0.1.6",
|
|
858
|
+
"clipboardy": "^5.0.0",
|
|
859
|
+
"consola": "^3.4.2",
|
|
860
|
+
"fetch-event-stream": "^0.1.5",
|
|
861
|
+
"gpt-tokenizer": "^3.0.1",
|
|
862
|
+
"hono": "^4.9.9",
|
|
863
|
+
"picocolors": "^1.1.1",
|
|
864
|
+
"proxy-from-env": "^1.1.0",
|
|
865
|
+
"srvx": "^0.8.9",
|
|
866
|
+
"tiny-invariant": "^1.3.3",
|
|
867
|
+
"undici": "^7.16.0"
|
|
868
|
+
};
|
|
869
|
+
var devDependencies = {
|
|
870
|
+
"@echristian/eslint-config": "^0.0.54",
|
|
871
|
+
"@types/bun": "^1.2.23",
|
|
872
|
+
"@types/proxy-from-env": "^1.0.4",
|
|
873
|
+
"bumpp": "^10.2.3",
|
|
874
|
+
"eslint": "^9.37.0",
|
|
875
|
+
"knip": "^5.64.1",
|
|
876
|
+
"lint-staged": "^16.2.3",
|
|
877
|
+
"prettier-plugin-packagejson": "^2.5.19",
|
|
878
|
+
"simple-git-hooks": "^2.13.1",
|
|
879
|
+
"tsdown": "^0.15.6",
|
|
880
|
+
"typescript": "^5.9.3"
|
|
881
|
+
};
|
|
882
|
+
var package_default = {
|
|
883
|
+
name,
|
|
884
|
+
version,
|
|
885
|
+
description,
|
|
886
|
+
keywords,
|
|
887
|
+
homepage,
|
|
888
|
+
bugs,
|
|
889
|
+
repository,
|
|
890
|
+
author,
|
|
891
|
+
type,
|
|
892
|
+
bin,
|
|
893
|
+
files,
|
|
894
|
+
scripts,
|
|
895
|
+
"simple-git-hooks": simple_git_hooks,
|
|
896
|
+
"lint-staged": lint_staged,
|
|
897
|
+
dependencies,
|
|
898
|
+
devDependencies
|
|
899
|
+
};
|
|
900
|
+
|
|
821
901
|
//#endregion
|
|
822
902
|
//#region src/lib/adaptive-rate-limiter.ts
|
|
823
903
|
const DEFAULT_CONFIG$1 = {
|
|
@@ -1566,8 +1646,8 @@ var ConsoleRenderer = class {
|
|
|
1566
1646
|
/**
|
|
1567
1647
|
* Get log prefix based on log type
|
|
1568
1648
|
*/
|
|
1569
|
-
getLogPrefix(type) {
|
|
1570
|
-
switch (type) {
|
|
1649
|
+
getLogPrefix(type$1) {
|
|
1650
|
+
switch (type$1) {
|
|
1571
1651
|
case "error":
|
|
1572
1652
|
case "fatal": return pc.red("✖");
|
|
1573
1653
|
case "warn": return pc.yellow("⚠");
|
|
@@ -2097,20 +2177,50 @@ const getTokenCount = async (payload, model) => {
|
|
|
2097
2177
|
//#region src/lib/auto-compact.ts
|
|
2098
2178
|
const DEFAULT_CONFIG = {
|
|
2099
2179
|
targetTokens: 12e4,
|
|
2100
|
-
safetyMarginPercent: 2
|
|
2180
|
+
safetyMarginPercent: 2,
|
|
2181
|
+
maxRequestBodyBytes: 500 * 1024
|
|
2101
2182
|
};
|
|
2102
2183
|
/**
|
|
2103
|
-
*
|
|
2184
|
+
* Dynamic byte limit that adjusts based on 413 errors.
|
|
2185
|
+
* Starts at 500KB and can be adjusted when 413 errors are encountered.
|
|
2186
|
+
*/
|
|
2187
|
+
let dynamicByteLimitOverride = null;
|
|
2188
|
+
/**
|
|
2189
|
+
* Called when a 413 error is encountered with a specific payload size.
|
|
2190
|
+
* Adjusts the dynamic byte limit to 90% of the failing size.
|
|
2191
|
+
*/
|
|
2192
|
+
function onRequestTooLarge(failingBytes) {
|
|
2193
|
+
const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
|
|
2194
|
+
dynamicByteLimitOverride = newLimit;
|
|
2195
|
+
consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed, new limit: ${Math.round(newLimit / 1024)}KB`);
|
|
2196
|
+
}
|
|
2197
|
+
/**
|
|
2198
|
+
* Check if payload needs compaction based on model limits OR request body size.
|
|
2104
2199
|
* Uses a safety margin to account for token counting differences.
|
|
2105
2200
|
*/
|
|
2106
|
-
async function checkNeedsCompaction(payload, model,
|
|
2201
|
+
async function checkNeedsCompaction(payload, model, config = {}) {
|
|
2202
|
+
const cfg = {
|
|
2203
|
+
...DEFAULT_CONFIG,
|
|
2204
|
+
...config
|
|
2205
|
+
};
|
|
2107
2206
|
const currentTokens = (await getTokenCount(payload, model)).input;
|
|
2108
2207
|
const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
|
|
2109
|
-
const
|
|
2208
|
+
const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
|
|
2209
|
+
const currentBytes = JSON.stringify(payload).length;
|
|
2210
|
+
const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
|
|
2211
|
+
const exceedsTokens = currentTokens > tokenLimit;
|
|
2212
|
+
const exceedsBytes = currentBytes > byteLimit;
|
|
2213
|
+
let reason;
|
|
2214
|
+
if (exceedsTokens && exceedsBytes) reason = "both";
|
|
2215
|
+
else if (exceedsTokens) reason = "tokens";
|
|
2216
|
+
else if (exceedsBytes) reason = "bytes";
|
|
2110
2217
|
return {
|
|
2111
|
-
needed:
|
|
2218
|
+
needed: exceedsTokens || exceedsBytes,
|
|
2112
2219
|
currentTokens,
|
|
2113
|
-
|
|
2220
|
+
tokenLimit,
|
|
2221
|
+
currentBytes,
|
|
2222
|
+
byteLimit,
|
|
2223
|
+
reason
|
|
2114
2224
|
};
|
|
2115
2225
|
}
|
|
2116
2226
|
/**
|
|
@@ -2153,18 +2263,40 @@ function getToolUseIds(message) {
|
|
|
2153
2263
|
return [];
|
|
2154
2264
|
}
|
|
2155
2265
|
/**
|
|
2156
|
-
*
|
|
2157
|
-
* Returns the starting index of messages to preserve.
|
|
2266
|
+
* Estimate the byte size of a message (for binary search).
|
|
2158
2267
|
*/
|
|
2159
|
-
function
|
|
2160
|
-
|
|
2161
|
-
|
|
2268
|
+
function estimateMessageBytes(message) {
|
|
2269
|
+
return JSON.stringify(message).length;
|
|
2270
|
+
}
|
|
2271
|
+
/**
|
|
2272
|
+
* Find the optimal preserve index using binary search.
|
|
2273
|
+
* This finds the point where we keep as many messages as possible
|
|
2274
|
+
* while staying under both token and byte limits.
|
|
2275
|
+
*/
|
|
2276
|
+
function findOptimalPreserveIndex(messages, systemMessages, targetTokens, targetBytes) {
|
|
2277
|
+
if (messages.length === 0) return 0;
|
|
2278
|
+
const systemTokens = systemMessages.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
|
|
2279
|
+
const systemBytes = systemMessages.reduce((sum, msg) => sum + estimateMessageBytes(msg), 0);
|
|
2280
|
+
const markerOverhead = 200;
|
|
2281
|
+
const availableTokens = targetTokens - systemTokens - markerOverhead / 4;
|
|
2282
|
+
const availableBytes = targetBytes - systemBytes - markerOverhead;
|
|
2283
|
+
const cumulativeTokens = Array.from({ length: messages.length + 1 }, () => 0);
|
|
2284
|
+
const cumulativeBytes = Array.from({ length: messages.length + 1 }, () => 0);
|
|
2162
2285
|
for (let i = messages.length - 1; i >= 0; i--) {
|
|
2163
|
-
const
|
|
2164
|
-
|
|
2165
|
-
|
|
2286
|
+
const msg = messages[i];
|
|
2287
|
+
cumulativeTokens[i] = cumulativeTokens[i + 1] + estimateMessageTokens(msg);
|
|
2288
|
+
cumulativeBytes[i] = cumulativeBytes[i + 1] + estimateMessageBytes(msg);
|
|
2166
2289
|
}
|
|
2167
|
-
|
|
2290
|
+
let left = 0;
|
|
2291
|
+
let right = messages.length;
|
|
2292
|
+
while (left < right) {
|
|
2293
|
+
const mid = Math.floor((left + right) / 2);
|
|
2294
|
+
const tokensFromMid = cumulativeTokens[mid];
|
|
2295
|
+
const bytesFromMid = cumulativeBytes[mid];
|
|
2296
|
+
if (tokensFromMid <= availableTokens && bytesFromMid <= availableBytes) right = mid;
|
|
2297
|
+
else left = mid + 1;
|
|
2298
|
+
}
|
|
2299
|
+
return left;
|
|
2168
2300
|
}
|
|
2169
2301
|
/**
|
|
2170
2302
|
* Filter out orphaned tool_result messages that don't have a matching tool_use
|
|
@@ -2202,12 +2334,6 @@ function ensureStartsWithUser(messages) {
|
|
|
2202
2334
|
return messages.slice(startIndex);
|
|
2203
2335
|
}
|
|
2204
2336
|
/**
|
|
2205
|
-
* Calculate estimated tokens for system messages.
|
|
2206
|
-
*/
|
|
2207
|
-
function estimateSystemTokens(systemMessages) {
|
|
2208
|
-
return systemMessages.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
|
|
2209
|
-
}
|
|
2210
|
-
/**
|
|
2211
2337
|
* Create a truncation marker message.
|
|
2212
2338
|
*/
|
|
2213
2339
|
function createTruncationMarker(removedCount) {
|
|
@@ -2217,9 +2343,10 @@ function createTruncationMarker(removedCount) {
|
|
|
2217
2343
|
};
|
|
2218
2344
|
}
|
|
2219
2345
|
/**
|
|
2220
|
-
* Perform auto-compaction on a payload that exceeds token limits.
|
|
2221
|
-
* This uses simple truncation - no LLM calls required.
|
|
2222
|
-
*
|
|
2346
|
+
* Perform auto-compaction on a payload that exceeds token or size limits.
|
|
2347
|
+
* This uses simple truncation with binary search - no LLM calls required.
|
|
2348
|
+
* The algorithm finds the optimal truncation point to maximize preserved messages
|
|
2349
|
+
* while staying under both token and byte limits.
|
|
2223
2350
|
*/
|
|
2224
2351
|
async function autoCompact(payload, model, config = {}) {
|
|
2225
2352
|
const cfg = {
|
|
@@ -2228,63 +2355,38 @@ async function autoCompact(payload, model, config = {}) {
|
|
|
2228
2355
|
};
|
|
2229
2356
|
const originalTokens = (await getTokenCount(payload, model)).input;
|
|
2230
2357
|
const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
|
|
2231
|
-
const
|
|
2232
|
-
|
|
2358
|
+
const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
|
|
2359
|
+
const originalBytes = JSON.stringify(payload).length;
|
|
2360
|
+
const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
|
|
2361
|
+
if (originalTokens <= tokenLimit && originalBytes <= byteLimit) return {
|
|
2233
2362
|
payload,
|
|
2234
2363
|
wasCompacted: false,
|
|
2235
2364
|
originalTokens,
|
|
2236
2365
|
compactedTokens: originalTokens,
|
|
2237
2366
|
removedMessageCount: 0
|
|
2238
2367
|
};
|
|
2239
|
-
|
|
2368
|
+
const exceedsTokens = originalTokens > tokenLimit;
|
|
2369
|
+
const exceedsBytes = originalBytes > byteLimit;
|
|
2370
|
+
let reason;
|
|
2371
|
+
if (exceedsTokens && exceedsBytes) reason = "tokens and size";
|
|
2372
|
+
else if (exceedsBytes) reason = "size";
|
|
2373
|
+
else reason = "tokens";
|
|
2374
|
+
consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB), truncating...`);
|
|
2240
2375
|
const { systemMessages, remainingMessages } = extractSystemMessages(payload.messages);
|
|
2241
|
-
|
|
2242
|
-
|
|
2243
|
-
|
|
2244
|
-
|
|
2245
|
-
|
|
2246
|
-
let lastResult = null;
|
|
2247
|
-
for (let iteration = 0; iteration < MAX_ITERATIONS; iteration++) {
|
|
2248
|
-
const result = await tryCompactWithTarget({
|
|
2376
|
+
consola.debug(`Auto-compact: ${systemMessages.length} system messages, ${remainingMessages.length} conversation messages`);
|
|
2377
|
+
const preserveIndex = findOptimalPreserveIndex(remainingMessages, systemMessages, tokenLimit, byteLimit);
|
|
2378
|
+
if (preserveIndex === 0) {
|
|
2379
|
+
consola.warn("Auto-compact: Cannot truncate without losing all conversation history");
|
|
2380
|
+
return {
|
|
2249
2381
|
payload,
|
|
2250
|
-
|
|
2251
|
-
|
|
2252
|
-
|
|
2253
|
-
|
|
2254
|
-
|
|
2255
|
-
limit,
|
|
2256
|
-
originalTokens
|
|
2257
|
-
});
|
|
2258
|
-
if (!result.wasCompacted) return result;
|
|
2259
|
-
lastResult = result;
|
|
2260
|
-
if (result.compactedTokens <= limit) {
|
|
2261
|
-
consola.info(`Auto-compact: ${originalTokens} → ${result.compactedTokens} tokens (removed ${result.removedMessageCount} messages)`);
|
|
2262
|
-
return result;
|
|
2263
|
-
}
|
|
2264
|
-
consola.warn(`Auto-compact: Still over limit (${result.compactedTokens} > ${limit}), trying more aggressive truncation`);
|
|
2265
|
-
currentTarget = Math.floor(currentTarget * .7);
|
|
2266
|
-
if (currentTarget < MIN_TARGET) {
|
|
2267
|
-
consola.error("Auto-compact: Cannot reduce further, target too low");
|
|
2268
|
-
return result;
|
|
2269
|
-
}
|
|
2382
|
+
wasCompacted: false,
|
|
2383
|
+
originalTokens,
|
|
2384
|
+
compactedTokens: originalTokens,
|
|
2385
|
+
removedMessageCount: 0
|
|
2386
|
+
};
|
|
2270
2387
|
}
|
|
2271
|
-
|
|
2272
|
-
|
|
2273
|
-
payload,
|
|
2274
|
-
wasCompacted: false,
|
|
2275
|
-
originalTokens,
|
|
2276
|
-
compactedTokens: originalTokens,
|
|
2277
|
-
removedMessageCount: 0
|
|
2278
|
-
};
|
|
2279
|
-
}
|
|
2280
|
-
/**
|
|
2281
|
-
* Helper to attempt compaction with a specific target token count.
|
|
2282
|
-
*/
|
|
2283
|
-
async function tryCompactWithTarget(opts) {
|
|
2284
|
-
const { payload, model, systemMessages, remainingMessages, systemTokens, targetTokens, originalTokens } = opts;
|
|
2285
|
-
const preserveIndex = findPreserveIndex(remainingMessages, targetTokens, systemTokens);
|
|
2286
|
-
if (preserveIndex === 0) {
|
|
2287
|
-
consola.warn("Auto-compact: Cannot truncate further without losing all conversation history");
|
|
2388
|
+
if (preserveIndex >= remainingMessages.length) {
|
|
2389
|
+
consola.warn("Auto-compact: Would need to remove all messages, cannot compact");
|
|
2288
2390
|
return {
|
|
2289
2391
|
payload,
|
|
2290
2392
|
wasCompacted: false,
|
|
@@ -2319,6 +2421,8 @@ async function tryCompactWithTarget(opts) {
|
|
|
2319
2421
|
]
|
|
2320
2422
|
};
|
|
2321
2423
|
const newTokenCount = await getTokenCount(newPayload, model);
|
|
2424
|
+
const newBytes = JSON.stringify(newPayload).length;
|
|
2425
|
+
consola.info(`Auto-compact: ${originalTokens} → ${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(newBytes / 1024)}KB (removed ${removedMessages.length} messages)`);
|
|
2322
2426
|
return {
|
|
2323
2427
|
payload: newPayload,
|
|
2324
2428
|
wasCompacted: true,
|
|
@@ -2433,12 +2537,16 @@ async function buildFinalPayload(payload, model) {
|
|
|
2433
2537
|
}
|
|
2434
2538
|
try {
|
|
2435
2539
|
const check = await checkNeedsCompaction(payload, model);
|
|
2436
|
-
consola.debug(`Auto-compact check: ${check.currentTokens} tokens, limit ${check.
|
|
2540
|
+
consola.debug(`Auto-compact check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
|
|
2437
2541
|
if (!check.needed) return {
|
|
2438
2542
|
finalPayload: payload,
|
|
2439
2543
|
compactResult: null
|
|
2440
2544
|
};
|
|
2441
|
-
|
|
2545
|
+
let reasonText;
|
|
2546
|
+
if (check.reason === "both") reasonText = "tokens and size";
|
|
2547
|
+
else if (check.reason === "bytes") reasonText = "size";
|
|
2548
|
+
else reasonText = "tokens";
|
|
2549
|
+
consola.info(`Auto-compact triggered: exceeds ${reasonText} limit`);
|
|
2442
2550
|
const compactResult = await autoCompact(payload, model);
|
|
2443
2551
|
return {
|
|
2444
2552
|
finalPayload: compactResult.payload,
|
|
@@ -2454,11 +2562,13 @@ async function buildFinalPayload(payload, model) {
|
|
|
2454
2562
|
}
|
|
2455
2563
|
/**
|
|
2456
2564
|
* Log helpful debugging information when a 413 error occurs.
|
|
2565
|
+
* Also adjusts the dynamic byte limit for future requests.
|
|
2457
2566
|
*/
|
|
2458
2567
|
async function logPayloadSizeInfo(payload, model) {
|
|
2459
2568
|
const messageCount = payload.messages.length;
|
|
2460
2569
|
const bodySize = JSON.stringify(payload).length;
|
|
2461
2570
|
const bodySizeKB = Math.round(bodySize / 1024);
|
|
2571
|
+
onRequestTooLarge(bodySize);
|
|
2462
2572
|
let imageCount = 0;
|
|
2463
2573
|
let largeMessages = 0;
|
|
2464
2574
|
let totalImageSize = 0;
|
|
@@ -2584,7 +2694,7 @@ function handleNonStreamingResponse$1(c, originalResponse, ctx) {
|
|
|
2584
2694
|
...choice$1,
|
|
2585
2695
|
message: {
|
|
2586
2696
|
...choice$1.message,
|
|
2587
|
-
content: (choice$1.message.content ?? "")
|
|
2697
|
+
content: marker + (choice$1.message.content ?? "")
|
|
2588
2698
|
}
|
|
2589
2699
|
} : choice$1)
|
|
2590
2700
|
};
|
|
@@ -2645,18 +2755,13 @@ async function handleStreamingResponse$1(opts) {
|
|
|
2645
2755
|
const { stream, response, payload, ctx } = opts;
|
|
2646
2756
|
const acc = createStreamAccumulator();
|
|
2647
2757
|
try {
|
|
2648
|
-
for await (const chunk of response) {
|
|
2649
|
-
consola.debug("Streaming chunk:", JSON.stringify(chunk));
|
|
2650
|
-
parseStreamChunk(chunk, acc);
|
|
2651
|
-
await stream.writeSSE(chunk);
|
|
2652
|
-
}
|
|
2653
2758
|
if (ctx.compactResult?.wasCompacted) {
|
|
2654
2759
|
const marker = createCompactionMarker(ctx.compactResult);
|
|
2655
2760
|
const markerChunk = {
|
|
2656
2761
|
id: `compact-marker-${Date.now()}`,
|
|
2657
2762
|
object: "chat.completion.chunk",
|
|
2658
2763
|
created: Math.floor(Date.now() / 1e3),
|
|
2659
|
-
model:
|
|
2764
|
+
model: payload.model,
|
|
2660
2765
|
choices: [{
|
|
2661
2766
|
index: 0,
|
|
2662
2767
|
delta: { content: marker },
|
|
@@ -2670,6 +2775,11 @@ async function handleStreamingResponse$1(opts) {
|
|
|
2670
2775
|
});
|
|
2671
2776
|
acc.content += marker;
|
|
2672
2777
|
}
|
|
2778
|
+
for await (const chunk of response) {
|
|
2779
|
+
consola.debug("Streaming chunk:", JSON.stringify(chunk));
|
|
2780
|
+
parseStreamChunk(chunk, acc);
|
|
2781
|
+
await stream.writeSSE(chunk);
|
|
2782
|
+
}
|
|
2673
2783
|
recordStreamSuccess(acc, payload.model, ctx);
|
|
2674
2784
|
completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
|
|
2675
2785
|
} catch (error) {
|
|
@@ -4460,7 +4570,7 @@ function handleNonStreamingResponse(opts) {
|
|
|
4460
4570
|
consola.debug("Translated Anthropic response:", JSON.stringify(anthropicResponse));
|
|
4461
4571
|
if (ctx.compactResult?.wasCompacted) {
|
|
4462
4572
|
const marker = createCompactionMarker(ctx.compactResult);
|
|
4463
|
-
anthropicResponse =
|
|
4573
|
+
anthropicResponse = prependMarkerToAnthropicResponse(anthropicResponse, marker);
|
|
4464
4574
|
}
|
|
4465
4575
|
recordResponse(ctx.historyId, {
|
|
4466
4576
|
success: true,
|
|
@@ -4492,16 +4602,16 @@ function handleNonStreamingResponse(opts) {
|
|
|
4492
4602
|
});
|
|
4493
4603
|
return c.json(anthropicResponse);
|
|
4494
4604
|
}
|
|
4495
|
-
function
|
|
4605
|
+
function prependMarkerToAnthropicResponse(response, marker) {
|
|
4496
4606
|
const content = [...response.content];
|
|
4497
|
-
const
|
|
4498
|
-
if (
|
|
4499
|
-
const textBlock = content[
|
|
4500
|
-
if (textBlock.type === "text") content[
|
|
4607
|
+
const firstTextIndex = content.findIndex((block) => block.type === "text");
|
|
4608
|
+
if (firstTextIndex !== -1) {
|
|
4609
|
+
const textBlock = content[firstTextIndex];
|
|
4610
|
+
if (textBlock.type === "text") content[firstTextIndex] = {
|
|
4501
4611
|
...textBlock,
|
|
4502
|
-
text: textBlock.text
|
|
4612
|
+
text: marker + textBlock.text
|
|
4503
4613
|
};
|
|
4504
|
-
} else content.
|
|
4614
|
+
} else content.unshift({
|
|
4505
4615
|
type: "text",
|
|
4506
4616
|
text: marker
|
|
4507
4617
|
});
|
|
@@ -4531,6 +4641,11 @@ async function handleStreamingResponse(opts) {
|
|
|
4531
4641
|
};
|
|
4532
4642
|
const acc = createAnthropicStreamAccumulator();
|
|
4533
4643
|
try {
|
|
4644
|
+
if (ctx.compactResult?.wasCompacted) {
|
|
4645
|
+
const marker = createCompactionMarker(ctx.compactResult);
|
|
4646
|
+
await sendCompactionMarkerEvent(stream, streamState, marker);
|
|
4647
|
+
acc.content += marker;
|
|
4648
|
+
}
|
|
4534
4649
|
await processStreamChunks({
|
|
4535
4650
|
stream,
|
|
4536
4651
|
response,
|
|
@@ -4538,11 +4653,6 @@ async function handleStreamingResponse(opts) {
|
|
|
4538
4653
|
streamState,
|
|
4539
4654
|
acc
|
|
4540
4655
|
});
|
|
4541
|
-
if (ctx.compactResult?.wasCompacted) {
|
|
4542
|
-
const marker = createCompactionMarker(ctx.compactResult);
|
|
4543
|
-
await sendCompactionMarkerEvent(stream, streamState, marker);
|
|
4544
|
-
acc.content += marker;
|
|
4545
|
-
}
|
|
4546
4656
|
recordStreamingResponse(acc, anthropicPayload.model, ctx);
|
|
4547
4657
|
completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
|
|
4548
4658
|
} catch (error) {
|
|
@@ -4855,6 +4965,7 @@ function formatModelInfo(model) {
|
|
|
4855
4965
|
return ` - ${model.id.padEnd(28)} context: ${contextK.padStart(5)}, output: ${outputK.padStart(4)}${featureStr}`;
|
|
4856
4966
|
}
|
|
4857
4967
|
async function runServer(options) {
|
|
4968
|
+
consola.info(`copilot-api v${package_default.version}`);
|
|
4858
4969
|
if (options.proxyEnv) initProxyFromEnv();
|
|
4859
4970
|
if (options.verbose) {
|
|
4860
4971
|
consola.level = 5;
|