ghc-proxy 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +189 -77
- package/dist/{main.js → main.mjs} +280 -198
- package/dist/main.mjs.map +1 -0
- package/package.json +22 -20
- package/dist/main.js.map +0 -1
|
@@ -83,20 +83,6 @@ async function writeConfigField(field, value) {
|
|
|
83
83
|
}
|
|
84
84
|
}
|
|
85
85
|
|
|
86
|
-
//#endregion
|
|
87
|
-
//#region src/lib/state.ts
|
|
88
|
-
const state = {
|
|
89
|
-
auth: {},
|
|
90
|
-
config: {
|
|
91
|
-
accountType: "individual",
|
|
92
|
-
manualApprove: false,
|
|
93
|
-
rateLimitWait: false,
|
|
94
|
-
showToken: false
|
|
95
|
-
},
|
|
96
|
-
cache: {},
|
|
97
|
-
rateLimit: {}
|
|
98
|
-
};
|
|
99
|
-
|
|
100
86
|
//#endregion
|
|
101
87
|
//#region src/lib/api-config.ts
|
|
102
88
|
function standardHeaders() {
|
|
@@ -112,9 +98,9 @@ const API_VERSION = "2025-04-01";
|
|
|
112
98
|
function copilotBaseUrl(config) {
|
|
113
99
|
return config.accountType === "individual" ? "https://api.githubcopilot.com" : `https://api.${config.accountType}.githubcopilot.com`;
|
|
114
100
|
}
|
|
115
|
-
function copilotHeaders(auth
|
|
101
|
+
function copilotHeaders(auth, config, vision = false) {
|
|
116
102
|
const headers = {
|
|
117
|
-
"Authorization": `Bearer ${auth
|
|
103
|
+
"Authorization": `Bearer ${auth.copilotToken}`,
|
|
118
104
|
"content-type": standardHeaders()["content-type"],
|
|
119
105
|
"copilot-integration-id": "vscode-chat",
|
|
120
106
|
"editor-version": `vscode/${config.vsCodeVersion ?? "unknown"}`,
|
|
@@ -129,10 +115,10 @@ function copilotHeaders(auth$1, config, vision = false) {
|
|
|
129
115
|
return headers;
|
|
130
116
|
}
|
|
131
117
|
const GITHUB_API_BASE_URL = "https://api.github.com";
|
|
132
|
-
function githubHeaders(auth
|
|
118
|
+
function githubHeaders(auth, config) {
|
|
133
119
|
return {
|
|
134
120
|
...standardHeaders(),
|
|
135
|
-
"authorization": `token ${auth
|
|
121
|
+
"authorization": `token ${auth.githubToken}`,
|
|
136
122
|
"editor-version": `vscode/${config.vsCodeVersion ?? "unknown"}`,
|
|
137
123
|
"editor-plugin-version": EDITOR_PLUGIN_VERSION,
|
|
138
124
|
"user-agent": USER_AGENT,
|
|
@@ -169,6 +155,10 @@ async function forwardError(c, error) {
|
|
|
169
155
|
type: "error"
|
|
170
156
|
} }, error.response.status);
|
|
171
157
|
}
|
|
158
|
+
if (error instanceof Error && error.name === "AbortError") return c.json({ error: {
|
|
159
|
+
message: "Upstream request was aborted",
|
|
160
|
+
type: "timeout_error"
|
|
161
|
+
} }, 504);
|
|
172
162
|
return c.json({ error: {
|
|
173
163
|
message: error.message,
|
|
174
164
|
type: "error"
|
|
@@ -181,8 +171,8 @@ var CopilotClient = class {
|
|
|
181
171
|
auth;
|
|
182
172
|
config;
|
|
183
173
|
fetchImpl;
|
|
184
|
-
constructor(auth
|
|
185
|
-
this.auth = auth
|
|
174
|
+
constructor(auth, config, deps) {
|
|
175
|
+
this.auth = auth;
|
|
186
176
|
this.config = config;
|
|
187
177
|
this.fetchImpl = deps?.fetch ?? fetch;
|
|
188
178
|
}
|
|
@@ -223,35 +213,17 @@ var CopilotClient = class {
|
|
|
223
213
|
return await response.json();
|
|
224
214
|
}
|
|
225
215
|
};
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
//#region src/lib/client-config.ts
|
|
229
|
-
function getClientConfig(appState) {
|
|
230
|
-
return {
|
|
231
|
-
accountType: appState.config.accountType,
|
|
232
|
-
vsCodeVersion: appState.cache.vsCodeVersion
|
|
233
|
-
};
|
|
216
|
+
function isNonStreamingResponse(response) {
|
|
217
|
+
return Object.hasOwn(response, "choices");
|
|
234
218
|
}
|
|
235
219
|
|
|
236
220
|
//#endregion
|
|
237
|
-
//#region src/lib/
|
|
221
|
+
//#region src/lib/sleep.ts
|
|
238
222
|
function sleep(ms) {
|
|
239
223
|
return new Promise((resolve) => {
|
|
240
224
|
setTimeout(resolve, ms);
|
|
241
225
|
});
|
|
242
226
|
}
|
|
243
|
-
function isNullish(value) {
|
|
244
|
-
return value === null || value === void 0;
|
|
245
|
-
}
|
|
246
|
-
async function cacheModels(client) {
|
|
247
|
-
const models = await (client ?? new CopilotClient(state.auth, getClientConfig(state))).getModels();
|
|
248
|
-
state.cache.models = models;
|
|
249
|
-
}
|
|
250
|
-
async function cacheVSCodeVersion() {
|
|
251
|
-
const response = await getVSCodeVersion();
|
|
252
|
-
state.cache.vsCodeVersion = response;
|
|
253
|
-
consola.info(`Using VSCode version: ${response}`);
|
|
254
|
-
}
|
|
255
227
|
|
|
256
228
|
//#endregion
|
|
257
229
|
//#region src/clients/github-client.ts
|
|
@@ -259,8 +231,8 @@ var GitHubClient = class {
|
|
|
259
231
|
auth;
|
|
260
232
|
config;
|
|
261
233
|
fetchImpl;
|
|
262
|
-
constructor(auth
|
|
263
|
-
this.auth = auth
|
|
234
|
+
constructor(auth, config, deps) {
|
|
235
|
+
this.auth = auth;
|
|
264
236
|
this.config = config;
|
|
265
237
|
this.fetchImpl = deps?.fetch ?? fetch;
|
|
266
238
|
}
|
|
@@ -287,9 +259,10 @@ var GitHubClient = class {
|
|
|
287
259
|
return await response.json();
|
|
288
260
|
}
|
|
289
261
|
async pollAccessToken(deviceCode) {
|
|
262
|
+
const MAX_POLL_ATTEMPTS = 60;
|
|
290
263
|
const sleepDuration = (deviceCode.interval + 1) * 1e3;
|
|
291
264
|
consola.debug(`Polling access token with interval of ${sleepDuration}ms`);
|
|
292
|
-
|
|
265
|
+
for (let attempt = 0; attempt < MAX_POLL_ATTEMPTS; attempt++) {
|
|
293
266
|
const response = await this.fetchImpl(`${GITHUB_BASE_URL}/login/oauth/access_token`, {
|
|
294
267
|
method: "POST",
|
|
295
268
|
headers: standardHeaders(),
|
|
@@ -309,6 +282,7 @@ var GitHubClient = class {
|
|
|
309
282
|
if (json.access_token) return json.access_token;
|
|
310
283
|
await sleep(sleepDuration);
|
|
311
284
|
}
|
|
285
|
+
throw new Error("Device code authorization timed out");
|
|
312
286
|
}
|
|
313
287
|
async getGitHubUser() {
|
|
314
288
|
const response = await this.fetchImpl(`${GITHUB_API_BASE_URL}/user`, { headers: {
|
|
@@ -339,6 +313,35 @@ async function getVSCodeVersion() {
|
|
|
339
313
|
}
|
|
340
314
|
}
|
|
341
315
|
|
|
316
|
+
//#endregion
|
|
317
|
+
//#region src/lib/state.ts
|
|
318
|
+
const state = {
|
|
319
|
+
auth: {},
|
|
320
|
+
config: {
|
|
321
|
+
accountType: "individual",
|
|
322
|
+
manualApprove: false,
|
|
323
|
+
rateLimitWait: false,
|
|
324
|
+
showToken: false
|
|
325
|
+
},
|
|
326
|
+
cache: {},
|
|
327
|
+
rateLimit: {}
|
|
328
|
+
};
|
|
329
|
+
function getClientConfig() {
|
|
330
|
+
return {
|
|
331
|
+
accountType: state.config.accountType,
|
|
332
|
+
vsCodeVersion: state.cache.vsCodeVersion
|
|
333
|
+
};
|
|
334
|
+
}
|
|
335
|
+
async function cacheModels(client) {
|
|
336
|
+
const models = await (client ?? new CopilotClient(state.auth, getClientConfig())).getModels();
|
|
337
|
+
state.cache.models = models;
|
|
338
|
+
}
|
|
339
|
+
async function cacheVSCodeVersion() {
|
|
340
|
+
const response = await getVSCodeVersion();
|
|
341
|
+
state.cache.vsCodeVersion = response;
|
|
342
|
+
consola.info(`Using VSCode version: ${response}`);
|
|
343
|
+
}
|
|
344
|
+
|
|
342
345
|
//#endregion
|
|
343
346
|
//#region src/lib/token.ts
|
|
344
347
|
async function writeGithubToken(token) {
|
|
@@ -355,10 +358,10 @@ async function setupCopilotToken() {
|
|
|
355
358
|
const refreshCopilotToken = async () => {
|
|
356
359
|
consola.debug("Refreshing Copilot token");
|
|
357
360
|
try {
|
|
358
|
-
const { token
|
|
359
|
-
state.auth.copilotToken = token
|
|
361
|
+
const { token } = await githubClient.getCopilotToken();
|
|
362
|
+
state.auth.copilotToken = token;
|
|
360
363
|
consola.debug("Copilot token refreshed");
|
|
361
|
-
if (state.config.showToken) consola.info("Refreshed Copilot token:", token
|
|
364
|
+
if (state.config.showToken) consola.info("Refreshed Copilot token:", token);
|
|
362
365
|
} catch (error) {
|
|
363
366
|
consola.error("Failed to refresh Copilot token:", error);
|
|
364
367
|
}
|
|
@@ -413,7 +416,7 @@ async function logUser() {
|
|
|
413
416
|
consola.info(`Logged in as ${user.login}`);
|
|
414
417
|
}
|
|
415
418
|
function createGitHubClient() {
|
|
416
|
-
return new GitHubClient(state.auth, getClientConfig(
|
|
419
|
+
return new GitHubClient(state.auth, getClientConfig());
|
|
417
420
|
}
|
|
418
421
|
async function ensureVSCodeVersion() {
|
|
419
422
|
if (!state.cache.vsCodeVersion) await cacheVSCodeVersion();
|
|
@@ -472,7 +475,7 @@ const checkUsage = defineCommand({
|
|
|
472
475
|
await cacheVSCodeVersion();
|
|
473
476
|
await setupGitHubToken();
|
|
474
477
|
try {
|
|
475
|
-
const usage = await new GitHubClient(state.auth, getClientConfig(
|
|
478
|
+
const usage = await new GitHubClient(state.auth, getClientConfig()).getCopilotUsage();
|
|
476
479
|
const premium = usage.quota_snapshots.premium_interactions;
|
|
477
480
|
const premiumTotal = premium.entitlement;
|
|
478
481
|
const premiumUsed = premiumTotal - premium.remaining;
|
|
@@ -629,8 +632,7 @@ function getShell() {
|
|
|
629
632
|
const { platform, ppid, env } = process;
|
|
630
633
|
if (platform === "win32") {
|
|
631
634
|
try {
|
|
632
|
-
|
|
633
|
-
if (execSync(command, { stdio: "pipe" }).toString().toLowerCase().includes("powershell.exe")) return "powershell";
|
|
635
|
+
if (execSync(`wmic process get ParentProcessId,Name | findstr "${ppid}"`, { stdio: "pipe" }).toString().toLowerCase().includes("powershell.exe")) return "powershell";
|
|
634
636
|
} catch {
|
|
635
637
|
return "cmd";
|
|
636
638
|
}
|
|
@@ -678,8 +680,8 @@ function generateEnvScript(envVars, commandToRun = "") {
|
|
|
678
680
|
|
|
679
681
|
//#endregion
|
|
680
682
|
//#region src/lib/request-logger.ts
|
|
681
|
-
function formatElapsed(start
|
|
682
|
-
const delta = Date.now() - start
|
|
683
|
+
function formatElapsed(start) {
|
|
684
|
+
const delta = Date.now() - start;
|
|
683
685
|
return delta < 1e3 ? `${delta}ms` : `${Math.round(delta / 1e3)}s`;
|
|
684
686
|
}
|
|
685
687
|
function formatPath(rawUrl) {
|
|
@@ -717,17 +719,17 @@ function formatModelMapping(info) {
|
|
|
717
719
|
}
|
|
718
720
|
const requestLogger = async (c, next) => {
|
|
719
721
|
const { method, url } = c.req;
|
|
720
|
-
const path
|
|
721
|
-
const start
|
|
722
|
+
const path = formatPath(url);
|
|
723
|
+
const start = Date.now();
|
|
722
724
|
try {
|
|
723
725
|
await next();
|
|
724
726
|
} finally {
|
|
725
|
-
const elapsed = formatElapsed(start
|
|
727
|
+
const elapsed = formatElapsed(start);
|
|
726
728
|
const status = c.res.status;
|
|
727
729
|
const modelInfo = c.get("modelMappingInfo");
|
|
728
730
|
const line = [
|
|
729
731
|
colorizeMethod(method),
|
|
730
|
-
colorize("white", path
|
|
732
|
+
colorize("white", path),
|
|
731
733
|
colorizeStatus(status),
|
|
732
734
|
colorize("dim", elapsed)
|
|
733
735
|
].join(" ");
|
|
@@ -746,33 +748,33 @@ async function awaitApproval() {
|
|
|
746
748
|
|
|
747
749
|
//#endregion
|
|
748
750
|
//#region src/lib/rate-limit.ts
|
|
749
|
-
async function checkRateLimit(state
|
|
750
|
-
if (state
|
|
751
|
+
async function checkRateLimit(state) {
|
|
752
|
+
if (state.config.rateLimitSeconds === void 0) return;
|
|
751
753
|
const now = Date.now();
|
|
752
|
-
if (!state
|
|
753
|
-
state
|
|
754
|
+
if (!state.rateLimit.lastRequestTimestamp) {
|
|
755
|
+
state.rateLimit.lastRequestTimestamp = now;
|
|
754
756
|
return;
|
|
755
757
|
}
|
|
756
|
-
const elapsedSeconds = (now - state
|
|
757
|
-
if (elapsedSeconds > state
|
|
758
|
-
state
|
|
758
|
+
const elapsedSeconds = (now - state.rateLimit.lastRequestTimestamp) / 1e3;
|
|
759
|
+
if (elapsedSeconds > state.config.rateLimitSeconds) {
|
|
760
|
+
state.rateLimit.lastRequestTimestamp = now;
|
|
759
761
|
return;
|
|
760
762
|
}
|
|
761
|
-
const waitTimeSeconds = Math.ceil(state
|
|
762
|
-
if (!state
|
|
763
|
+
const waitTimeSeconds = Math.ceil(state.config.rateLimitSeconds - elapsedSeconds);
|
|
764
|
+
if (!state.config.rateLimitWait) {
|
|
763
765
|
consola.warn(`Rate limit exceeded. Need to wait ${waitTimeSeconds} more seconds.`);
|
|
764
766
|
throw new HTTPError("Rate limit exceeded", Response.json({ message: "Rate limit exceeded" }, { status: 429 }));
|
|
765
767
|
}
|
|
766
768
|
const waitTimeMs = waitTimeSeconds * 1e3;
|
|
767
769
|
consola.warn(`Rate limit reached. Waiting ${waitTimeSeconds} seconds before proceeding...`);
|
|
768
770
|
await sleep(waitTimeMs);
|
|
769
|
-
state
|
|
771
|
+
state.rateLimit.lastRequestTimestamp = now;
|
|
770
772
|
consola.info("Rate limit wait completed, proceeding with request");
|
|
771
773
|
}
|
|
772
774
|
|
|
773
775
|
//#endregion
|
|
774
776
|
//#region src/routes/middleware/request-guard.ts
|
|
775
|
-
const requestGuard = async (
|
|
777
|
+
const requestGuard = async (_c, next) => {
|
|
776
778
|
await checkRateLimit(state);
|
|
777
779
|
if (state.config.manualApprove) await awaitApproval();
|
|
778
780
|
await next();
|
|
@@ -788,16 +790,26 @@ const ENCODING_MAP = {
|
|
|
788
790
|
r50k_base: () => import("gpt-tokenizer/encoding/r50k_base")
|
|
789
791
|
};
|
|
790
792
|
const encodingCache = /* @__PURE__ */ new Map();
|
|
793
|
+
const TOKENS_PER_MESSAGE = 3;
|
|
794
|
+
const TOKENS_PER_NAME = 1;
|
|
795
|
+
const REPLY_PRIMING_TOKENS = 3;
|
|
796
|
+
const BASE_CONSTANTS = {
|
|
797
|
+
propertyInitOverhead: 3,
|
|
798
|
+
propertyKeyOverhead: 3,
|
|
799
|
+
enumOverhead: -3,
|
|
800
|
+
enumItemCost: 3,
|
|
801
|
+
functionEndOverhead: 12
|
|
802
|
+
};
|
|
791
803
|
/**
|
|
792
804
|
* Calculate tokens for tool calls
|
|
793
805
|
*/
|
|
794
806
|
function calculateToolCallsTokens(toolCalls, encoder, constants) {
|
|
795
807
|
let tokens = 0;
|
|
796
808
|
for (const toolCall of toolCalls) {
|
|
797
|
-
tokens += constants.
|
|
809
|
+
tokens += constants.functionInitOverhead;
|
|
798
810
|
tokens += encoder.encode(JSON.stringify(toolCall)).length;
|
|
799
811
|
}
|
|
800
|
-
tokens += constants.
|
|
812
|
+
tokens += constants.functionEndOverhead;
|
|
801
813
|
return tokens;
|
|
802
814
|
}
|
|
803
815
|
/**
|
|
@@ -813,12 +825,10 @@ function calculateContentPartsTokens(contentParts, encoder) {
|
|
|
813
825
|
* Calculate tokens for a single message
|
|
814
826
|
*/
|
|
815
827
|
function calculateMessageTokens(message, encoder, constants) {
|
|
816
|
-
|
|
817
|
-
const tokensPerName = 1;
|
|
818
|
-
let tokens = tokensPerMessage;
|
|
828
|
+
let tokens = TOKENS_PER_MESSAGE;
|
|
819
829
|
for (const [key, value] of Object.entries(message)) {
|
|
820
830
|
if (typeof value === "string") tokens += encoder.encode(value).length;
|
|
821
|
-
if (key === "name") tokens +=
|
|
831
|
+
if (key === "name") tokens += TOKENS_PER_NAME;
|
|
822
832
|
if (key === "tool_calls") tokens += calculateToolCallsTokens(value, encoder, constants);
|
|
823
833
|
if (key === "content" && Array.isArray(value)) tokens += calculateContentPartsTokens(value, encoder);
|
|
824
834
|
}
|
|
@@ -831,17 +841,15 @@ function calculateTokens(messages, encoder, constants) {
|
|
|
831
841
|
if (messages.length === 0) return 0;
|
|
832
842
|
let numTokens = 0;
|
|
833
843
|
for (const message of messages) numTokens += calculateMessageTokens(message, encoder, constants);
|
|
834
|
-
numTokens +=
|
|
844
|
+
numTokens += REPLY_PRIMING_TOKENS;
|
|
835
845
|
return numTokens;
|
|
836
846
|
}
|
|
837
847
|
/**
|
|
838
848
|
* Get the corresponding encoder module based on encoding type
|
|
839
849
|
*/
|
|
840
|
-
async function
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
if (cached) return cached;
|
|
844
|
-
}
|
|
850
|
+
async function getEncoder(encoding) {
|
|
851
|
+
const cached = encodingCache.get(encoding);
|
|
852
|
+
if (cached) return cached;
|
|
845
853
|
const supportedEncoding = encoding;
|
|
846
854
|
if (!(supportedEncoding in ENCODING_MAP)) {
|
|
847
855
|
const fallbackModule = await ENCODING_MAP.o200k_base();
|
|
@@ -862,20 +870,10 @@ function getTokenizerFromModel(model) {
|
|
|
862
870
|
* Get model-specific constants for token calculation
|
|
863
871
|
*/
|
|
864
872
|
function getModelConstants(model) {
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
enumInit: -3,
|
|
870
|
-
enumItem: 3,
|
|
871
|
-
funcEnd: 12
|
|
872
|
-
} : {
|
|
873
|
-
funcInit: 7,
|
|
874
|
-
propInit: 3,
|
|
875
|
-
propKey: 3,
|
|
876
|
-
enumInit: -3,
|
|
877
|
-
enumItem: 3,
|
|
878
|
-
funcEnd: 12
|
|
873
|
+
const isLegacy = model.id === "gpt-3.5-turbo" || model.id === "gpt-4";
|
|
874
|
+
return {
|
|
875
|
+
...BASE_CONSTANTS,
|
|
876
|
+
functionInitOverhead: isLegacy ? 10 : 7
|
|
879
877
|
};
|
|
880
878
|
}
|
|
881
879
|
/**
|
|
@@ -883,16 +881,16 @@ function getModelConstants(model) {
|
|
|
883
881
|
*/
|
|
884
882
|
function calculateParameterTokens(key, prop, context) {
|
|
885
883
|
const { encoder, constants } = context;
|
|
886
|
-
let tokens = constants.
|
|
884
|
+
let tokens = constants.propertyKeyOverhead;
|
|
887
885
|
if (typeof prop !== "object" || prop === null) return tokens;
|
|
888
886
|
const param = prop;
|
|
889
887
|
const paramName = key;
|
|
890
888
|
const paramType = param.type || "string";
|
|
891
889
|
let paramDesc = param.description || "";
|
|
892
890
|
if (param.enum && Array.isArray(param.enum)) {
|
|
893
|
-
tokens += constants.
|
|
891
|
+
tokens += constants.enumOverhead;
|
|
894
892
|
for (const item of param.enum) {
|
|
895
|
-
tokens += constants.
|
|
893
|
+
tokens += constants.enumItemCost;
|
|
896
894
|
tokens += encoder.encode(String(item)).length;
|
|
897
895
|
}
|
|
898
896
|
}
|
|
@@ -921,7 +919,7 @@ function calculateParametersTokens(parameters, encoder, constants) {
|
|
|
921
919
|
for (const [key, value] of Object.entries(params)) if (key === "properties") {
|
|
922
920
|
const properties = value;
|
|
923
921
|
if (Object.keys(properties).length > 0) {
|
|
924
|
-
tokens += constants.
|
|
922
|
+
tokens += constants.propertyInitOverhead;
|
|
925
923
|
for (const propKey of Object.keys(properties)) tokens += calculateParameterTokens(propKey, properties[propKey], {
|
|
926
924
|
encoder,
|
|
927
925
|
constants
|
|
@@ -937,12 +935,12 @@ function calculateParametersTokens(parameters, encoder, constants) {
|
|
|
937
935
|
* Calculate tokens for a single tool
|
|
938
936
|
*/
|
|
939
937
|
function calculateToolTokens(tool, encoder, constants) {
|
|
940
|
-
let tokens = constants.
|
|
938
|
+
let tokens = constants.functionInitOverhead;
|
|
941
939
|
const func = tool.function;
|
|
942
|
-
const
|
|
943
|
-
let
|
|
944
|
-
if (
|
|
945
|
-
const line = `${
|
|
940
|
+
const functionName = func.name;
|
|
941
|
+
let functionDescription = func.description || "";
|
|
942
|
+
if (functionDescription.endsWith(".")) functionDescription = functionDescription.slice(0, -1);
|
|
943
|
+
const line = `${functionName}:${functionDescription}`;
|
|
946
944
|
tokens += encoder.encode(line).length;
|
|
947
945
|
if (typeof func.parameters === "object" && func.parameters !== null) tokens += calculateParametersTokens(func.parameters, encoder, constants);
|
|
948
946
|
return tokens;
|
|
@@ -951,20 +949,18 @@ function calculateToolTokens(tool, encoder, constants) {
|
|
|
951
949
|
* Calculate token count for tools based on model
|
|
952
950
|
*/
|
|
953
951
|
function numTokensForTools(tools, encoder, constants) {
|
|
954
|
-
let
|
|
955
|
-
for (const tool of tools)
|
|
956
|
-
|
|
957
|
-
return
|
|
952
|
+
let toolTokenCount = 0;
|
|
953
|
+
for (const tool of tools) toolTokenCount += calculateToolTokens(tool, encoder, constants);
|
|
954
|
+
toolTokenCount += constants.functionEndOverhead;
|
|
955
|
+
return toolTokenCount;
|
|
958
956
|
}
|
|
959
957
|
/**
|
|
960
958
|
* Calculate the token count of messages, supporting multiple GPT encoders
|
|
961
959
|
*/
|
|
962
960
|
async function getTokenCount(payload, model) {
|
|
963
|
-
const
|
|
964
|
-
const
|
|
965
|
-
const
|
|
966
|
-
const inputMessages = simplifiedMessages.filter((msg) => msg.role !== "assistant");
|
|
967
|
-
const outputMessages = simplifiedMessages.filter((msg) => msg.role === "assistant");
|
|
961
|
+
const encoder = await getEncoder(getTokenizerFromModel(model));
|
|
962
|
+
const inputMessages = payload.messages.filter((msg) => msg.role !== "assistant");
|
|
963
|
+
const outputMessages = payload.messages.filter((msg) => msg.role === "assistant");
|
|
968
964
|
const constants = getModelConstants(model);
|
|
969
965
|
let inputTokens = calculateTokens(inputMessages, encoder, constants);
|
|
970
966
|
if (payload.tools && payload.tools.length > 0) inputTokens += numTokensForTools(payload.tools, encoder, constants);
|
|
@@ -975,6 +971,23 @@ async function getTokenCount(payload, model) {
|
|
|
975
971
|
};
|
|
976
972
|
}
|
|
977
973
|
|
|
974
|
+
//#endregion
|
|
975
|
+
//#region src/lib/upstream-signal.ts
|
|
976
|
+
const DEFAULT_TIMEOUT_MS = 3e5;
|
|
977
|
+
function createUpstreamSignal(clientSignal, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
978
|
+
const controller = new AbortController();
|
|
979
|
+
const timeout = timeoutMs > 0 ? setTimeout(() => controller.abort(), timeoutMs) : void 0;
|
|
980
|
+
const onAbort = () => controller.abort();
|
|
981
|
+
if (clientSignal && !clientSignal.aborted) clientSignal.addEventListener("abort", onAbort);
|
|
982
|
+
return {
|
|
983
|
+
signal: controller.signal,
|
|
984
|
+
cleanup: () => {
|
|
985
|
+
if (timeout) clearTimeout(timeout);
|
|
986
|
+
clientSignal?.removeEventListener("abort", onAbort);
|
|
987
|
+
}
|
|
988
|
+
};
|
|
989
|
+
}
|
|
990
|
+
|
|
978
991
|
//#endregion
|
|
979
992
|
//#region src/lib/validation.ts
|
|
980
993
|
const openAIMessageSchema = z.object({
|
|
@@ -1048,16 +1061,18 @@ async function handleCompletion$1(c) {
|
|
|
1048
1061
|
} catch (error) {
|
|
1049
1062
|
consola.warn("Failed to calculate token count:", error);
|
|
1050
1063
|
}
|
|
1051
|
-
if (
|
|
1064
|
+
if (payload.max_tokens == null) {
|
|
1052
1065
|
payload = {
|
|
1053
1066
|
...payload,
|
|
1054
1067
|
max_tokens: selectedModel?.capabilities.limits.max_output_tokens
|
|
1055
1068
|
};
|
|
1056
1069
|
consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens));
|
|
1057
1070
|
}
|
|
1058
|
-
const
|
|
1059
|
-
|
|
1071
|
+
const { signal, cleanup } = createUpstreamSignal(c.req.raw.signal, state.config.upstreamTimeoutSeconds !== void 0 ? state.config.upstreamTimeoutSeconds * 1e3 : void 0);
|
|
1072
|
+
const response = await new CopilotClient(state.auth, getClientConfig()).createChatCompletions(payload, { signal });
|
|
1073
|
+
if (isNonStreamingResponse(response)) {
|
|
1060
1074
|
consola.debug("Non-streaming response:", JSON.stringify(response));
|
|
1075
|
+
cleanup();
|
|
1061
1076
|
return c.json(response);
|
|
1062
1077
|
}
|
|
1063
1078
|
consola.debug("Streaming response");
|
|
@@ -1067,12 +1082,11 @@ async function handleCompletion$1(c) {
|
|
|
1067
1082
|
consola.debug("Streaming chunk:", JSON.stringify(chunk));
|
|
1068
1083
|
await stream.writeSSE(chunk);
|
|
1069
1084
|
}
|
|
1070
|
-
} finally {
|
|
1085
|
+
} finally {
|
|
1086
|
+
cleanup();
|
|
1087
|
+
}
|
|
1071
1088
|
});
|
|
1072
1089
|
}
|
|
1073
|
-
function isNonStreaming$1(response) {
|
|
1074
|
-
return Object.hasOwn(response, "choices");
|
|
1075
|
-
}
|
|
1076
1090
|
|
|
1077
1091
|
//#endregion
|
|
1078
1092
|
//#region src/routes/chat-completions/route.ts
|
|
@@ -1084,7 +1098,7 @@ completionRoutes.post("/", requestGuard, (c) => handleCompletion$1(c));
|
|
|
1084
1098
|
const embeddingRoutes = new Hono();
|
|
1085
1099
|
embeddingRoutes.post("/", async (c) => {
|
|
1086
1100
|
const payload = parseEmbeddingRequest(await c.req.json());
|
|
1087
|
-
const response = await new CopilotClient(state.auth, getClientConfig(
|
|
1101
|
+
const response = await new CopilotClient(state.auth, getClientConfig()).createEmbeddings(payload);
|
|
1088
1102
|
return c.json(response);
|
|
1089
1103
|
});
|
|
1090
1104
|
|
|
@@ -1099,6 +1113,13 @@ function mapOpenAIStopReasonToAnthropic(finishReason) {
|
|
|
1099
1113
|
content_filter: "end_turn"
|
|
1100
1114
|
}[finishReason];
|
|
1101
1115
|
}
|
|
1116
|
+
function mapOpenAIUsageToAnthropic(usage) {
|
|
1117
|
+
return {
|
|
1118
|
+
input_tokens: (usage?.prompt_tokens ?? 0) - (usage?.prompt_tokens_details?.cached_tokens ?? 0),
|
|
1119
|
+
output_tokens: usage?.completion_tokens ?? 0,
|
|
1120
|
+
...usage?.prompt_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: usage.prompt_tokens_details.cached_tokens }
|
|
1121
|
+
};
|
|
1122
|
+
}
|
|
1102
1123
|
|
|
1103
1124
|
//#endregion
|
|
1104
1125
|
//#region src/translator/anthropic/anthropic-stream-translator.ts
|
|
@@ -1109,19 +1130,21 @@ var AnthropicStreamTranslator = class {
|
|
|
1109
1130
|
messageStartSent: false,
|
|
1110
1131
|
contentBlockIndex: 0,
|
|
1111
1132
|
contentBlockOpen: false,
|
|
1133
|
+
thinkingBlockOpen: false,
|
|
1112
1134
|
toolCalls: {}
|
|
1113
1135
|
};
|
|
1114
1136
|
}
|
|
1115
1137
|
onChunk(chunk) {
|
|
1116
1138
|
if (chunk.choices.length === 0) return [];
|
|
1117
|
-
const events
|
|
1139
|
+
const events = [];
|
|
1118
1140
|
const choice = chunk.choices[0];
|
|
1119
1141
|
const { delta } = choice;
|
|
1120
|
-
this.appendMessageStart(events
|
|
1121
|
-
this.
|
|
1122
|
-
this.
|
|
1123
|
-
this.
|
|
1124
|
-
|
|
1142
|
+
this.appendMessageStart(events, chunk);
|
|
1143
|
+
this.appendThinkingDelta(events, delta.reasoning_text);
|
|
1144
|
+
this.appendContentDelta(events, delta.content);
|
|
1145
|
+
this.appendToolCalls(events, delta.tool_calls);
|
|
1146
|
+
this.appendFinish(events, chunk, choice.finish_reason);
|
|
1147
|
+
return events;
|
|
1125
1148
|
}
|
|
1126
1149
|
onError(error) {
|
|
1127
1150
|
return [{
|
|
@@ -1147,9 +1170,9 @@ var AnthropicStreamTranslator = class {
|
|
|
1147
1170
|
return tc !== void 0 && tc.anthropicBlockIndex === this.state.contentBlockIndex;
|
|
1148
1171
|
});
|
|
1149
1172
|
}
|
|
1150
|
-
appendMessageStart(events
|
|
1173
|
+
appendMessageStart(events, chunk) {
|
|
1151
1174
|
if (this.state.messageStartSent) return;
|
|
1152
|
-
events
|
|
1175
|
+
events.push({
|
|
1153
1176
|
type: "message_start",
|
|
1154
1177
|
message: {
|
|
1155
1178
|
id: chunk.id,
|
|
@@ -1160,18 +1183,57 @@ var AnthropicStreamTranslator = class {
|
|
|
1160
1183
|
stop_reason: null,
|
|
1161
1184
|
stop_sequence: null,
|
|
1162
1185
|
usage: {
|
|
1163
|
-
|
|
1164
|
-
output_tokens: 0
|
|
1165
|
-
...chunk.usage?.prompt_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: chunk.usage.prompt_tokens_details.cached_tokens }
|
|
1186
|
+
...mapOpenAIUsageToAnthropic(chunk.usage),
|
|
1187
|
+
output_tokens: 0
|
|
1166
1188
|
}
|
|
1167
1189
|
}
|
|
1168
1190
|
});
|
|
1169
1191
|
this.state.messageStartSent = true;
|
|
1170
1192
|
}
|
|
1171
|
-
|
|
1193
|
+
appendThinkingDelta(events, reasoningText) {
|
|
1194
|
+
if (!reasoningText) return;
|
|
1195
|
+
if (this.state.contentBlockOpen && !this.state.thinkingBlockOpen) {
|
|
1196
|
+
events.push({
|
|
1197
|
+
type: "content_block_stop",
|
|
1198
|
+
index: this.state.contentBlockIndex
|
|
1199
|
+
});
|
|
1200
|
+
this.state.contentBlockIndex++;
|
|
1201
|
+
this.state.contentBlockOpen = false;
|
|
1202
|
+
}
|
|
1203
|
+
if (!this.state.thinkingBlockOpen) {
|
|
1204
|
+
events.push({
|
|
1205
|
+
type: "content_block_start",
|
|
1206
|
+
index: this.state.contentBlockIndex,
|
|
1207
|
+
content_block: {
|
|
1208
|
+
type: "thinking",
|
|
1209
|
+
thinking: ""
|
|
1210
|
+
}
|
|
1211
|
+
});
|
|
1212
|
+
this.state.contentBlockOpen = true;
|
|
1213
|
+
this.state.thinkingBlockOpen = true;
|
|
1214
|
+
}
|
|
1215
|
+
events.push({
|
|
1216
|
+
type: "content_block_delta",
|
|
1217
|
+
index: this.state.contentBlockIndex,
|
|
1218
|
+
delta: {
|
|
1219
|
+
type: "thinking_delta",
|
|
1220
|
+
thinking: reasoningText
|
|
1221
|
+
}
|
|
1222
|
+
});
|
|
1223
|
+
}
|
|
1224
|
+
appendContentDelta(events, content) {
|
|
1172
1225
|
if (!content) return;
|
|
1226
|
+
if (this.state.thinkingBlockOpen) {
|
|
1227
|
+
events.push({
|
|
1228
|
+
type: "content_block_stop",
|
|
1229
|
+
index: this.state.contentBlockIndex
|
|
1230
|
+
});
|
|
1231
|
+
this.state.contentBlockIndex++;
|
|
1232
|
+
this.state.contentBlockOpen = false;
|
|
1233
|
+
this.state.thinkingBlockOpen = false;
|
|
1234
|
+
}
|
|
1173
1235
|
if (this.isToolBlockOpen()) {
|
|
1174
|
-
events
|
|
1236
|
+
events.push({
|
|
1175
1237
|
type: "content_block_stop",
|
|
1176
1238
|
index: this.state.contentBlockIndex
|
|
1177
1239
|
});
|
|
@@ -1179,7 +1241,7 @@ var AnthropicStreamTranslator = class {
|
|
|
1179
1241
|
this.state.contentBlockOpen = false;
|
|
1180
1242
|
}
|
|
1181
1243
|
if (!this.state.contentBlockOpen) {
|
|
1182
|
-
events
|
|
1244
|
+
events.push({
|
|
1183
1245
|
type: "content_block_start",
|
|
1184
1246
|
index: this.state.contentBlockIndex,
|
|
1185
1247
|
content_block: {
|
|
@@ -1189,7 +1251,7 @@ var AnthropicStreamTranslator = class {
|
|
|
1189
1251
|
});
|
|
1190
1252
|
this.state.contentBlockOpen = true;
|
|
1191
1253
|
}
|
|
1192
|
-
events
|
|
1254
|
+
events.push({
|
|
1193
1255
|
type: "content_block_delta",
|
|
1194
1256
|
index: this.state.contentBlockIndex,
|
|
1195
1257
|
delta: {
|
|
@@ -1198,17 +1260,18 @@ var AnthropicStreamTranslator = class {
|
|
|
1198
1260
|
}
|
|
1199
1261
|
});
|
|
1200
1262
|
}
|
|
1201
|
-
appendToolCalls(events
|
|
1263
|
+
appendToolCalls(events, toolCalls) {
|
|
1202
1264
|
if (!toolCalls || toolCalls.length === 0) return;
|
|
1203
1265
|
for (const toolCall of toolCalls) {
|
|
1204
1266
|
if (toolCall.id && toolCall.function?.name) {
|
|
1205
1267
|
if (this.state.contentBlockOpen) {
|
|
1206
|
-
events
|
|
1268
|
+
events.push({
|
|
1207
1269
|
type: "content_block_stop",
|
|
1208
1270
|
index: this.state.contentBlockIndex
|
|
1209
1271
|
});
|
|
1210
1272
|
this.state.contentBlockIndex++;
|
|
1211
1273
|
this.state.contentBlockOpen = false;
|
|
1274
|
+
this.state.thinkingBlockOpen = false;
|
|
1212
1275
|
}
|
|
1213
1276
|
const anthropicBlockIndex = this.state.contentBlockIndex;
|
|
1214
1277
|
this.state.toolCalls[toolCall.index] = {
|
|
@@ -1216,7 +1279,7 @@ var AnthropicStreamTranslator = class {
|
|
|
1216
1279
|
name: toolCall.function.name,
|
|
1217
1280
|
anthropicBlockIndex
|
|
1218
1281
|
};
|
|
1219
|
-
events
|
|
1282
|
+
events.push({
|
|
1220
1283
|
type: "content_block_start",
|
|
1221
1284
|
index: anthropicBlockIndex,
|
|
1222
1285
|
content_block: {
|
|
@@ -1231,7 +1294,7 @@ var AnthropicStreamTranslator = class {
|
|
|
1231
1294
|
if (toolCall.function?.arguments) {
|
|
1232
1295
|
const toolCallInfo = this.state.toolCalls[toolCall.index];
|
|
1233
1296
|
if (!toolCallInfo) continue;
|
|
1234
|
-
events
|
|
1297
|
+
events.push({
|
|
1235
1298
|
type: "content_block_delta",
|
|
1236
1299
|
index: toolCallInfo.anthropicBlockIndex,
|
|
1237
1300
|
delta: {
|
|
@@ -1242,26 +1305,23 @@ var AnthropicStreamTranslator = class {
|
|
|
1242
1305
|
}
|
|
1243
1306
|
}
|
|
1244
1307
|
}
|
|
1245
|
-
appendFinish(events
|
|
1308
|
+
appendFinish(events, chunk, finishReason) {
|
|
1246
1309
|
if (!finishReason) return;
|
|
1247
1310
|
if (this.state.contentBlockOpen) {
|
|
1248
|
-
events
|
|
1311
|
+
events.push({
|
|
1249
1312
|
type: "content_block_stop",
|
|
1250
1313
|
index: this.state.contentBlockIndex
|
|
1251
1314
|
});
|
|
1252
1315
|
this.state.contentBlockOpen = false;
|
|
1316
|
+
this.state.thinkingBlockOpen = false;
|
|
1253
1317
|
}
|
|
1254
|
-
events
|
|
1318
|
+
events.push({
|
|
1255
1319
|
type: "message_delta",
|
|
1256
1320
|
delta: {
|
|
1257
1321
|
stop_reason: mapOpenAIStopReasonToAnthropic(finishReason),
|
|
1258
1322
|
stop_sequence: null
|
|
1259
1323
|
},
|
|
1260
|
-
usage:
|
|
1261
|
-
input_tokens: (chunk.usage?.prompt_tokens ?? 0) - (chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0),
|
|
1262
|
-
output_tokens: chunk.usage?.completion_tokens ?? 0,
|
|
1263
|
-
...chunk.usage?.prompt_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: chunk.usage.prompt_tokens_details.cached_tokens }
|
|
1264
|
-
}
|
|
1324
|
+
usage: mapOpenAIUsageToAnthropic(chunk.usage)
|
|
1265
1325
|
}, { type: "message_stop" });
|
|
1266
1326
|
}
|
|
1267
1327
|
};
|
|
@@ -1274,11 +1334,11 @@ const DEFAULT_FALLBACKS = {
|
|
|
1274
1334
|
claudeHaiku: "claude-haiku-4.5"
|
|
1275
1335
|
};
|
|
1276
1336
|
function getModelFallbackConfig() {
|
|
1277
|
-
const cachedConfig
|
|
1337
|
+
const cachedConfig = getCachedConfig();
|
|
1278
1338
|
return {
|
|
1279
|
-
claudeOpus: process.env.MODEL_FALLBACK_CLAUDE_OPUS || cachedConfig
|
|
1280
|
-
claudeSonnet: process.env.MODEL_FALLBACK_CLAUDE_SONNET || cachedConfig
|
|
1281
|
-
claudeHaiku: process.env.MODEL_FALLBACK_CLAUDE_HAIKU || cachedConfig
|
|
1339
|
+
claudeOpus: process.env.MODEL_FALLBACK_CLAUDE_OPUS || cachedConfig.modelFallback?.claudeOpus || DEFAULT_FALLBACKS.claudeOpus,
|
|
1340
|
+
claudeSonnet: process.env.MODEL_FALLBACK_CLAUDE_SONNET || cachedConfig.modelFallback?.claudeSonnet || DEFAULT_FALLBACKS.claudeSonnet,
|
|
1341
|
+
claudeHaiku: process.env.MODEL_FALLBACK_CLAUDE_HAIKU || cachedConfig.modelFallback?.claudeHaiku || DEFAULT_FALLBACKS.claudeHaiku
|
|
1282
1342
|
};
|
|
1283
1343
|
}
|
|
1284
1344
|
function resolveModel(modelId, knownModelIds, config) {
|
|
@@ -1293,17 +1353,20 @@ function resolveModel(modelId, knownModelIds, config) {
|
|
|
1293
1353
|
//#region src/translator/anthropic/anthropic-translator.ts
|
|
1294
1354
|
var AnthropicTranslator = class {
|
|
1295
1355
|
toOpenAI(payload) {
|
|
1356
|
+
const thinkingParams = this.translateThinking(payload.thinking, payload.model);
|
|
1357
|
+
const isThinkingActive = payload.thinking?.type === "enabled" || payload.thinking?.type === "adaptive";
|
|
1296
1358
|
return {
|
|
1297
1359
|
model: this.translateModelName(payload.model),
|
|
1298
1360
|
messages: this.translateAnthropicMessagesToOpenAI(payload.messages, payload.system),
|
|
1299
1361
|
max_tokens: payload.max_tokens,
|
|
1300
1362
|
stop: payload.stop_sequences,
|
|
1301
1363
|
stream: payload.stream,
|
|
1302
|
-
temperature: payload.temperature,
|
|
1303
|
-
top_p: payload.top_p,
|
|
1364
|
+
temperature: isThinkingActive ? void 0 : payload.temperature,
|
|
1365
|
+
top_p: isThinkingActive ? void 0 : payload.top_p,
|
|
1304
1366
|
user: payload.metadata?.user_id,
|
|
1305
1367
|
tools: this.translateAnthropicToolsToOpenAI(payload.tools),
|
|
1306
|
-
tool_choice: this.translateAnthropicToolChoiceToOpenAI(payload.tool_choice)
|
|
1368
|
+
tool_choice: this.translateAnthropicToolChoiceToOpenAI(payload.tool_choice),
|
|
1369
|
+
...thinkingParams
|
|
1307
1370
|
};
|
|
1308
1371
|
}
|
|
1309
1372
|
fromOpenAI(response) {
|
|
@@ -1326,20 +1389,32 @@ var AnthropicTranslator = class {
|
|
|
1326
1389
|
content: [...allTextBlocks, ...allToolUseBlocks],
|
|
1327
1390
|
stop_reason: mapOpenAIStopReasonToAnthropic(stopReason),
|
|
1328
1391
|
stop_sequence: null,
|
|
1329
|
-
usage:
|
|
1330
|
-
input_tokens: (response.usage?.prompt_tokens ?? 0) - (response.usage?.prompt_tokens_details?.cached_tokens ?? 0),
|
|
1331
|
-
output_tokens: response.usage?.completion_tokens ?? 0,
|
|
1332
|
-
...response.usage?.prompt_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: response.usage.prompt_tokens_details.cached_tokens }
|
|
1333
|
-
}
|
|
1392
|
+
usage: mapOpenAIUsageToAnthropic(response.usage)
|
|
1334
1393
|
};
|
|
1335
1394
|
}
|
|
1336
1395
|
createStreamTranslator() {
|
|
1337
1396
|
return new AnthropicStreamTranslator();
|
|
1338
1397
|
}
|
|
1339
1398
|
translateModelName(model) {
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1399
|
+
return resolveModel(model, state.cache.models ? new Set(state.cache.models.data.map((m) => m.id)) : void 0, getModelFallbackConfig());
|
|
1400
|
+
}
|
|
1401
|
+
translateThinking(thinking, model) {
|
|
1402
|
+
if (!thinking || thinking.type === "disabled") return {};
|
|
1403
|
+
const isClaude = model.startsWith("claude");
|
|
1404
|
+
if (thinking.type === "adaptive") return {
|
|
1405
|
+
reasoning_effort: "medium",
|
|
1406
|
+
...isClaude && { thinking_budget: 24e3 }
|
|
1407
|
+
};
|
|
1408
|
+
const budgetTokens = thinking.budget_tokens;
|
|
1409
|
+
return {
|
|
1410
|
+
reasoning_effort: this.budgetToReasoningEffort(budgetTokens),
|
|
1411
|
+
...isClaude && { thinking_budget: budgetTokens }
|
|
1412
|
+
};
|
|
1413
|
+
}
|
|
1414
|
+
budgetToReasoningEffort(budgetTokens) {
|
|
1415
|
+
if (budgetTokens <= 8e3) return "low";
|
|
1416
|
+
if (budgetTokens <= 24e3) return "medium";
|
|
1417
|
+
return "high";
|
|
1343
1418
|
}
|
|
1344
1419
|
translateAnthropicMessagesToOpenAI(anthropicMessages, system) {
|
|
1345
1420
|
const systemMessages = this.handleSystemPrompt(system);
|
|
@@ -1480,6 +1555,10 @@ var AnthropicTranslator = class {
|
|
|
1480
1555
|
|
|
1481
1556
|
//#endregion
|
|
1482
1557
|
//#region src/routes/messages/count-tokens-handler.ts
|
|
1558
|
+
const CLAUDE_TOOL_OVERHEAD_TOKENS = 346;
|
|
1559
|
+
const GROK_TOOL_OVERHEAD_TOKENS = 480;
|
|
1560
|
+
const CLAUDE_ESTIMATION_FACTOR = 1.15;
|
|
1561
|
+
const GROK_ESTIMATION_FACTOR = 1.03;
|
|
1483
1562
|
/**
|
|
1484
1563
|
* Handles token counting for Anthropic messages
|
|
1485
1564
|
*/
|
|
@@ -1498,13 +1577,13 @@ async function handleCountTokens(c) {
|
|
|
1498
1577
|
let mcpToolExist = false;
|
|
1499
1578
|
if (anthropicBeta?.startsWith("claude-code")) mcpToolExist = anthropicPayload.tools.some((tool) => tool.name.startsWith("mcp__"));
|
|
1500
1579
|
if (!mcpToolExist) {
|
|
1501
|
-
if (anthropicPayload.model.startsWith("claude")) tokenCount.input = tokenCount.input +
|
|
1502
|
-
else if (anthropicPayload.model.startsWith("grok")) tokenCount.input = tokenCount.input +
|
|
1580
|
+
if (anthropicPayload.model.startsWith("claude")) tokenCount.input = tokenCount.input + CLAUDE_TOOL_OVERHEAD_TOKENS;
|
|
1581
|
+
else if (anthropicPayload.model.startsWith("grok")) tokenCount.input = tokenCount.input + GROK_TOOL_OVERHEAD_TOKENS;
|
|
1503
1582
|
}
|
|
1504
1583
|
}
|
|
1505
1584
|
let finalTokenCount = tokenCount.input + tokenCount.output;
|
|
1506
|
-
if (anthropicPayload.model.startsWith("claude")) finalTokenCount = Math.round(finalTokenCount *
|
|
1507
|
-
else if (anthropicPayload.model.startsWith("grok")) finalTokenCount = Math.round(finalTokenCount *
|
|
1585
|
+
if (anthropicPayload.model.startsWith("claude")) finalTokenCount = Math.round(finalTokenCount * CLAUDE_ESTIMATION_FACTOR);
|
|
1586
|
+
else if (anthropicPayload.model.startsWith("grok")) finalTokenCount = Math.round(finalTokenCount * GROK_ESTIMATION_FACTOR);
|
|
1508
1587
|
consola.info("Token count:", finalTokenCount);
|
|
1509
1588
|
return c.json({ input_tokens: finalTokenCount });
|
|
1510
1589
|
}
|
|
@@ -1522,11 +1601,13 @@ async function handleCompletion(c) {
|
|
|
1522
1601
|
});
|
|
1523
1602
|
consola.debug("Claude Code requested model:", anthropicPayload.model, "-> Copilot model:", openAIPayload.model);
|
|
1524
1603
|
consola.debug("Translated OpenAI request payload:", JSON.stringify(openAIPayload));
|
|
1525
|
-
const
|
|
1526
|
-
|
|
1527
|
-
|
|
1604
|
+
const { signal, cleanup } = createUpstreamSignal(c.req.raw.signal, state.config.upstreamTimeoutSeconds !== void 0 ? state.config.upstreamTimeoutSeconds * 1e3 : void 0);
|
|
1605
|
+
const response = await new CopilotClient(state.auth, getClientConfig()).createChatCompletions(openAIPayload, { signal });
|
|
1606
|
+
if (isNonStreamingResponse(response)) {
|
|
1607
|
+
consola.debug("Non-streaming response from Copilot (full):", JSON.stringify(response, null, 2));
|
|
1528
1608
|
const anthropicResponse = translator.fromOpenAI(response);
|
|
1529
1609
|
consola.debug("Translated Anthropic response:", JSON.stringify(anthropicResponse));
|
|
1610
|
+
cleanup();
|
|
1530
1611
|
return c.json(anthropicResponse);
|
|
1531
1612
|
}
|
|
1532
1613
|
consola.debug("Streaming response from Copilot");
|
|
@@ -1538,8 +1619,8 @@ async function handleCompletion(c) {
|
|
|
1538
1619
|
if (rawEvent.data === "[DONE]") break;
|
|
1539
1620
|
if (!rawEvent.data) continue;
|
|
1540
1621
|
const chunk = JSON.parse(rawEvent.data);
|
|
1541
|
-
const events
|
|
1542
|
-
for (const event of events
|
|
1622
|
+
const events = streamTranslator.onChunk(chunk);
|
|
1623
|
+
for (const event of events) {
|
|
1543
1624
|
consola.debug("Translated Anthropic event:", JSON.stringify(event));
|
|
1544
1625
|
await stream.writeSSE({
|
|
1545
1626
|
event: event.type,
|
|
@@ -1558,12 +1639,11 @@ async function handleCompletion(c) {
|
|
|
1558
1639
|
event: event.type,
|
|
1559
1640
|
data: JSON.stringify(event)
|
|
1560
1641
|
});
|
|
1642
|
+
} finally {
|
|
1643
|
+
cleanup();
|
|
1561
1644
|
}
|
|
1562
1645
|
});
|
|
1563
1646
|
}
|
|
1564
|
-
function isNonStreaming(response) {
|
|
1565
|
-
return Object.hasOwn(response, "choices");
|
|
1566
|
-
}
|
|
1567
1647
|
|
|
1568
1648
|
//#endregion
|
|
1569
1649
|
//#region src/routes/messages/route.ts
|
|
@@ -1575,10 +1655,7 @@ messageRoutes.post("/count_tokens", (c) => handleCountTokens(c));
|
|
|
1575
1655
|
//#region src/routes/models/route.ts
|
|
1576
1656
|
const modelRoutes = new Hono();
|
|
1577
1657
|
modelRoutes.get("/", async (c) => {
|
|
1578
|
-
if (!state.cache.models)
|
|
1579
|
-
const copilotClient = new CopilotClient(state.auth, getClientConfig(state));
|
|
1580
|
-
await cacheModels(copilotClient);
|
|
1581
|
-
}
|
|
1658
|
+
if (!state.cache.models) await cacheModels(new CopilotClient(state.auth, getClientConfig()));
|
|
1582
1659
|
const models = state.cache.models?.data.map((model) => ({
|
|
1583
1660
|
id: model.id,
|
|
1584
1661
|
object: "model",
|
|
@@ -1606,7 +1683,7 @@ tokenRoute.get("/", (c) => {
|
|
|
1606
1683
|
//#region src/routes/usage/route.ts
|
|
1607
1684
|
const usageRoute = new Hono();
|
|
1608
1685
|
usageRoute.get("/", async (c) => {
|
|
1609
|
-
const usage = await new GitHubClient(state.auth, getClientConfig(
|
|
1686
|
+
const usage = await new GitHubClient(state.auth, getClientConfig()).getCopilotUsage();
|
|
1610
1687
|
return c.json(usage);
|
|
1611
1688
|
});
|
|
1612
1689
|
|
|
@@ -1677,17 +1754,13 @@ async function runServer(options) {
|
|
|
1677
1754
|
state.config.rateLimitSeconds = options.rateLimit;
|
|
1678
1755
|
state.config.rateLimitWait = options.rateLimitWait;
|
|
1679
1756
|
state.config.showToken = options.showToken;
|
|
1757
|
+
state.config.upstreamTimeoutSeconds = options.upstreamTimeoutSeconds;
|
|
1680
1758
|
await ensurePaths();
|
|
1681
1759
|
await readConfig();
|
|
1682
1760
|
await cacheVSCodeVersion();
|
|
1683
1761
|
if (!options.githubToken) await setupGitHubToken();
|
|
1684
1762
|
await setupCopilotToken();
|
|
1685
|
-
|
|
1686
|
-
...getClientConfig(state),
|
|
1687
|
-
accountType
|
|
1688
|
-
};
|
|
1689
|
-
const copilotClient = new CopilotClient(state.auth, clientConfig);
|
|
1690
|
-
await cacheModels(copilotClient);
|
|
1763
|
+
await cacheModels(new CopilotClient(state.auth, getClientConfig()));
|
|
1691
1764
|
consola.info(`Available models: \n${state.cache.models?.data.map((model) => `- ${model.id}`).join("\n")}`);
|
|
1692
1765
|
const serverUrl = `http://localhost:${options.port}`;
|
|
1693
1766
|
if (options.claudeCode) {
|
|
@@ -1700,6 +1773,15 @@ async function runServer(options) {
|
|
|
1700
1773
|
bun: options.idleTimeoutSeconds === void 0 ? void 0 : { idleTimeout: options.idleTimeoutSeconds }
|
|
1701
1774
|
});
|
|
1702
1775
|
}
|
|
1776
|
+
function parseIntArg(raw, name, fallbackMsg) {
|
|
1777
|
+
if (raw === void 0) return void 0;
|
|
1778
|
+
const n = Number.parseInt(raw, 10);
|
|
1779
|
+
if (Number.isNaN(n) || n < 0) {
|
|
1780
|
+
consola.warn(`Invalid --${name} value "${raw}". ${fallbackMsg}`);
|
|
1781
|
+
return;
|
|
1782
|
+
}
|
|
1783
|
+
return n;
|
|
1784
|
+
}
|
|
1703
1785
|
const start = defineCommand({
|
|
1704
1786
|
meta: {
|
|
1705
1787
|
name: "start",
|
|
@@ -1765,17 +1847,17 @@ const start = defineCommand({
|
|
|
1765
1847
|
type: "string",
|
|
1766
1848
|
default: "120",
|
|
1767
1849
|
description: "Bun server idle timeout in seconds"
|
|
1850
|
+
},
|
|
1851
|
+
"upstream-timeout": {
|
|
1852
|
+
type: "string",
|
|
1853
|
+
default: "300",
|
|
1854
|
+
description: "Upstream request timeout in seconds (0 to disable)"
|
|
1768
1855
|
}
|
|
1769
1856
|
},
|
|
1770
1857
|
run({ args }) {
|
|
1771
|
-
const
|
|
1772
|
-
const
|
|
1773
|
-
const
|
|
1774
|
-
let idleTimeoutSeconds = idleTimeoutRaw === void 0 ? void 0 : Number.parseInt(idleTimeoutRaw, 10);
|
|
1775
|
-
if (idleTimeoutSeconds !== void 0 && (Number.isNaN(idleTimeoutSeconds) || idleTimeoutSeconds < 0)) {
|
|
1776
|
-
consola.warn(`Invalid --idle-timeout value "${idleTimeoutRaw}". Falling back to Bun default.`);
|
|
1777
|
-
idleTimeoutSeconds = void 0;
|
|
1778
|
-
}
|
|
1858
|
+
const rateLimit = parseIntArg(args["rate-limit"], "rate-limit", "Rate limiting disabled.");
|
|
1859
|
+
const idleTimeoutSeconds = parseIntArg(args["idle-timeout"], "idle-timeout", "Falling back to Bun default.");
|
|
1860
|
+
const upstreamTimeoutSeconds = parseIntArg(args["upstream-timeout"], "upstream-timeout", "Falling back to default (300s).");
|
|
1779
1861
|
return runServer({
|
|
1780
1862
|
port: Number.parseInt(args.port, 10),
|
|
1781
1863
|
verbose: args.verbose,
|
|
@@ -1787,14 +1869,15 @@ const start = defineCommand({
|
|
|
1787
1869
|
claudeCode: args["claude-code"],
|
|
1788
1870
|
showToken: args["show-token"],
|
|
1789
1871
|
proxyEnv: args["proxy-env"],
|
|
1790
|
-
idleTimeoutSeconds
|
|
1872
|
+
idleTimeoutSeconds,
|
|
1873
|
+
upstreamTimeoutSeconds
|
|
1791
1874
|
});
|
|
1792
1875
|
}
|
|
1793
1876
|
});
|
|
1794
1877
|
|
|
1795
1878
|
//#endregion
|
|
1796
1879
|
//#region src/main.ts
|
|
1797
|
-
|
|
1880
|
+
runMain(defineCommand({
|
|
1798
1881
|
meta: {
|
|
1799
1882
|
name: "ghc-proxy",
|
|
1800
1883
|
description: "A wrapper around GitHub Copilot API to make it OpenAI compatible, making it usable for other tools."
|
|
@@ -1805,12 +1888,11 @@ const main = defineCommand({
|
|
|
1805
1888
|
"check-usage": checkUsage,
|
|
1806
1889
|
debug
|
|
1807
1890
|
}
|
|
1808
|
-
})
|
|
1809
|
-
runMain(main).catch((error) => {
|
|
1891
|
+
})).catch((error) => {
|
|
1810
1892
|
consola.error("Failed to start CLI:", error);
|
|
1811
1893
|
process.exitCode = 1;
|
|
1812
1894
|
});
|
|
1813
1895
|
|
|
1814
1896
|
//#endregion
|
|
1815
1897
|
export { };
|
|
1816
|
-
//# sourceMappingURL=main.
|
|
1898
|
+
//# sourceMappingURL=main.mjs.map
|