@hsupu/copilot-api 0.7.10 → 0.7.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -17,6 +17,7 @@ import pc from "picocolors";
17
17
  import { Hono } from "hono";
18
18
  import { cors } from "hono/cors";
19
19
  import { streamSSE } from "hono/streaming";
20
+ import { countTokens } from "@anthropic-ai/tokenizer";
20
21
  import { events } from "fetch-event-stream";
21
22
 
22
23
  //#region src/lib/paths.ts
@@ -46,7 +47,9 @@ const state = {
46
47
  accountType: "individual",
47
48
  manualApprove: false,
48
49
  showToken: false,
49
- autoCompact: true
50
+ verbose: false,
51
+ autoTruncate: true,
52
+ directAnthropicApi: true
50
53
  };
51
54
 
52
55
  //#endregion
@@ -480,9 +483,23 @@ async function checkTokenExists() {
480
483
  return false;
481
484
  }
482
485
  }
483
- async function getDebugInfo() {
486
+ async function getAccountInfo() {
487
+ try {
488
+ await ensurePaths();
489
+ await setupGitHubToken();
490
+ if (!state.githubToken) return null;
491
+ const [user, copilot] = await Promise.all([getGitHubUser(), getCopilotUsage()]);
492
+ return {
493
+ user,
494
+ copilot
495
+ };
496
+ } catch {
497
+ return null;
498
+ }
499
+ }
500
+ async function getDebugInfo(includeAccount) {
484
501
  const [version$1, tokenExists] = await Promise.all([getPackageVersion(), checkTokenExists()]);
485
- return {
502
+ const info = {
486
503
  version: version$1,
487
504
  runtime: getRuntimeInfo(),
488
505
  paths: {
@@ -491,9 +508,14 @@ async function getDebugInfo() {
491
508
  },
492
509
  tokenExists
493
510
  };
511
+ if (includeAccount && tokenExists) {
512
+ const account = await getAccountInfo();
513
+ if (account) info.account = account;
514
+ }
515
+ return info;
494
516
  }
495
517
  function printDebugInfoPlain(info) {
496
- consola.info(`copilot-api debug
518
+ let output = `copilot-api debug
497
519
 
498
520
  Version: ${info.version}
499
521
  Runtime: ${info.runtime.name} ${info.runtime.version} (${info.runtime.platform} ${info.runtime.arch})
@@ -502,19 +524,24 @@ Paths:
502
524
  - APP_DIR: ${info.paths.APP_DIR}
503
525
  - GITHUB_TOKEN_PATH: ${info.paths.GITHUB_TOKEN_PATH}
504
526
 
505
- Token exists: ${info.tokenExists ? "Yes" : "No"}`);
527
+ Token exists: ${info.tokenExists ? "Yes" : "No"}`;
528
+ if (info.account) output += `
529
+
530
+ Account Info:
531
+ ${JSON.stringify(info.account, null, 2)}`;
532
+ consola.info(output);
506
533
  }
507
534
  function printDebugInfoJson(info) {
508
535
  console.log(JSON.stringify(info, null, 2));
509
536
  }
510
537
  async function runDebug(options) {
511
- const debugInfo = await getDebugInfo();
512
- if (options.json) printDebugInfoJson(debugInfo);
513
- else printDebugInfoPlain(debugInfo);
538
+ const debugInfo$1 = await getDebugInfo(true);
539
+ if (options.json) printDebugInfoJson(debugInfo$1);
540
+ else printDebugInfoPlain(debugInfo$1);
514
541
  }
515
- const debug = defineCommand({
542
+ const debugInfo = defineCommand({
516
543
  meta: {
517
- name: "debug",
544
+ name: "info",
518
545
  description: "Print debug information about the application"
519
546
  },
520
547
  args: { json: {
@@ -526,6 +553,48 @@ const debug = defineCommand({
526
553
  return runDebug({ json: args.json });
527
554
  }
528
555
  });
556
+ const debugModels = defineCommand({
557
+ meta: {
558
+ name: "models",
559
+ description: "Fetch and display raw model data from Copilot API"
560
+ },
561
+ args: {
562
+ "account-type": {
563
+ type: "string",
564
+ alias: "a",
565
+ default: "individual",
566
+ description: "The type of GitHub account (individual, business, enterprise)"
567
+ },
568
+ "github-token": {
569
+ type: "string",
570
+ alias: "g",
571
+ description: "GitHub token to use (skips interactive auth)"
572
+ }
573
+ },
574
+ async run({ args }) {
575
+ state.accountType = args["account-type"];
576
+ await ensurePaths();
577
+ if (args["github-token"]) {
578
+ state.githubToken = args["github-token"];
579
+ consola.info("Using provided GitHub token");
580
+ } else await setupGitHubToken();
581
+ const { token } = await getCopilotToken();
582
+ state.copilotToken = token;
583
+ consola.info("Fetching models from Copilot API...");
584
+ const models = await getModels();
585
+ console.log(JSON.stringify(models, null, 2));
586
+ }
587
+ });
588
+ const debug = defineCommand({
589
+ meta: {
590
+ name: "debug",
591
+ description: "Debug commands for troubleshooting"
592
+ },
593
+ subCommands: {
594
+ info: debugInfo,
595
+ models: debugModels
596
+ }
597
+ });
529
598
 
530
599
  //#endregion
531
600
  //#region src/logout.ts
@@ -872,7 +941,7 @@ const patchClaude = defineCommand({
872
941
  //#endregion
873
942
  //#region package.json
874
943
  var name = "@hsupu/copilot-api";
875
- var version = "0.7.10";
944
+ var version = "0.7.11";
876
945
  var description = "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!";
877
946
  var keywords = [
878
947
  "proxy",
@@ -905,6 +974,7 @@ var scripts = {
905
974
  var simple_git_hooks = { "pre-commit": "bun x lint-staged" };
906
975
  var lint_staged = { "*": "bun run lint --fix" };
907
976
  var dependencies = {
977
+ "@anthropic-ai/tokenizer": "^0.0.4",
908
978
  "citty": "^0.1.6",
909
979
  "clipboardy": "^5.0.0",
910
980
  "consola": "^3.4.2",
@@ -2206,9 +2276,61 @@ const numTokensForTools = (tools, encoder, constants) => {
2206
2276
  return funcTokenCount;
2207
2277
  };
2208
2278
  /**
2209
- * Calculate the token count of messages, supporting multiple GPT encoders
2279
+ * Check if a model is an Anthropic model
2280
+ */
2281
+ function isAnthropicModel(model) {
2282
+ return model.vendor === "Anthropic";
2283
+ }
2284
+ /**
2285
+ * Convert a message to plain text for Anthropic tokenizer
2286
+ */
2287
+ function messageToText(message) {
2288
+ const parts = [];
2289
+ parts.push(`${message.role}:`);
2290
+ if (typeof message.content === "string") parts.push(message.content);
2291
+ else if (Array.isArray(message.content)) {
2292
+ for (const part of message.content) if ("text" in part && part.text) parts.push(part.text);
2293
+ else if (part.type === "image_url") parts.push("[image]");
2294
+ }
2295
+ if (message.tool_calls) for (const tc of message.tool_calls) parts.push(JSON.stringify(tc));
2296
+ if ("tool_call_id" in message && message.tool_call_id) parts.push(`tool_call_id:${message.tool_call_id}`);
2297
+ return parts.join("\n");
2298
+ }
2299
+ /**
2300
+ * Convert tools to text for Anthropic tokenizer
2301
+ */
2302
+ function toolsToText(tools) {
2303
+ return tools.map((tool) => JSON.stringify(tool)).join("\n");
2304
+ }
2305
+ /**
2306
+ * Calculate token count using Anthropic's official tokenizer
2307
+ */
2308
+ function getAnthropicTokenCount(payload) {
2309
+ const inputMessages = payload.messages.filter((msg) => msg.role !== "assistant");
2310
+ const outputMessages = payload.messages.filter((msg) => msg.role === "assistant");
2311
+ const inputText = inputMessages.map((msg) => messageToText(msg)).join("\n\n");
2312
+ const outputText = outputMessages.map((msg) => messageToText(msg)).join("\n\n");
2313
+ let inputTokens = countTokens(inputText);
2314
+ let outputTokens = countTokens(outputText);
2315
+ if (payload.tools && payload.tools.length > 0) {
2316
+ const toolsText = toolsToText(payload.tools);
2317
+ inputTokens += countTokens(toolsText);
2318
+ }
2319
+ inputTokens += inputMessages.length * 3;
2320
+ outputTokens += outputMessages.length * 3;
2321
+ inputTokens += 3;
2322
+ return {
2323
+ input: inputTokens,
2324
+ output: outputTokens
2325
+ };
2326
+ }
2327
+ /**
2328
+ * Calculate the token count of messages.
2329
+ * Uses Anthropic's official tokenizer for Anthropic models,
2330
+ * and GPT tokenizers for other models.
2210
2331
  */
2211
2332
  const getTokenCount = async (payload, model) => {
2333
+ if (isAnthropicModel(model)) return getAnthropicTokenCount(payload);
2212
2334
  const tokenizer = getTokenizerFromModel(model);
2213
2335
  const encoder = await getEncodeChatFunction(tokenizer);
2214
2336
  const simplifiedMessages = payload.messages;
@@ -2225,10 +2347,10 @@ const getTokenCount = async (payload, model) => {
2225
2347
  };
2226
2348
 
2227
2349
  //#endregion
2228
- //#region src/lib/auto-compact.ts
2350
+ //#region src/lib/auto-truncate.ts
2229
2351
  const DEFAULT_CONFIG = {
2230
2352
  safetyMarginPercent: 2,
2231
- maxRequestBodyBytes: 500 * 1024
2353
+ maxRequestBodyBytes: 510 * 1024
2232
2354
  };
2233
2355
  /** Dynamic byte limit that adjusts based on 413 errors */
2234
2356
  let dynamicByteLimit = null;
@@ -2238,7 +2360,7 @@ let dynamicByteLimit = null;
2238
2360
  function onRequestTooLarge(failingBytes) {
2239
2361
  const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
2240
2362
  dynamicByteLimit = newLimit;
2241
- consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed → ${Math.round(newLimit / 1024)}KB`);
2363
+ consola.info(`[AutoTruncate] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed → ${Math.round(newLimit / 1024)}KB`);
2242
2364
  }
2243
2365
  function calculateLimits(model, config) {
2244
2366
  const rawTokenLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
@@ -2294,14 +2416,14 @@ function filterOrphanedToolResults(messages) {
2294
2416
  }
2295
2417
  return true;
2296
2418
  });
2297
- if (removedCount > 0) consola.debug(`Auto-compact: Filtered ${removedCount} orphaned tool_result`);
2419
+ if (removedCount > 0) consola.debug(`[AutoTruncate] Filtered ${removedCount} orphaned tool_result`);
2298
2420
  return filtered;
2299
2421
  }
2300
2422
  /** Ensure messages start with a user message */
2301
2423
  function ensureStartsWithUser(messages) {
2302
2424
  let startIndex = 0;
2303
2425
  while (startIndex < messages.length && messages[startIndex].role !== "user") startIndex++;
2304
- if (startIndex > 0) consola.debug(`Auto-compact: Skipped ${startIndex} leading non-user messages`);
2426
+ if (startIndex > 0) consola.debug(`[AutoTruncate] Skipped ${startIndex} leading non-user messages`);
2305
2427
  return messages.slice(startIndex);
2306
2428
  }
2307
2429
  /**
@@ -2367,10 +2489,10 @@ function createTruncationMarker(removedCount) {
2367
2489
  };
2368
2490
  }
2369
2491
  /**
2370
- * Perform auto-compaction on a payload that exceeds limits.
2492
+ * Perform auto-truncation on a payload that exceeds limits.
2371
2493
  * Uses binary search to find the optimal truncation point.
2372
2494
  */
2373
- async function autoCompact(payload, model, config = {}) {
2495
+ async function autoTruncate(payload, model, config = {}) {
2374
2496
  const cfg = {
2375
2497
  ...DEFAULT_CONFIG,
2376
2498
  ...config
@@ -2391,13 +2513,13 @@ async function autoCompact(payload, model, config = {}) {
2391
2513
  if (exceedsTokens && exceedsBytes) reason = "tokens and size";
2392
2514
  else if (exceedsBytes) reason = "size";
2393
2515
  else reason = "tokens";
2394
- consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB)`);
2516
+ consola.info(`[AutoTruncate] Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB)`);
2395
2517
  const { systemMessages, conversationMessages } = extractSystemMessages(payload.messages);
2396
2518
  const messagesJson = JSON.stringify(payload.messages);
2397
2519
  const payloadOverhead = originalBytes - messagesJson.length;
2398
2520
  const systemBytes = systemMessages.reduce((sum, m) => sum + getMessageBytes(m) + 1, 0);
2399
2521
  const systemTokens = systemMessages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
2400
- consola.debug(`Auto-compact: overhead=${Math.round(payloadOverhead / 1024)}KB, system=${systemMessages.length} msgs (${Math.round(systemBytes / 1024)}KB)`);
2522
+ consola.debug(`[AutoTruncate] overhead=${Math.round(payloadOverhead / 1024)}KB, system=${systemMessages.length} msgs (${Math.round(systemBytes / 1024)}KB)`);
2401
2523
  const preserveIndex = findOptimalPreserveIndex({
2402
2524
  messages: conversationMessages,
2403
2525
  systemBytes,
@@ -2407,7 +2529,7 @@ async function autoCompact(payload, model, config = {}) {
2407
2529
  byteLimit
2408
2530
  });
2409
2531
  if (preserveIndex === 0) {
2410
- consola.warn("Auto-compact: Cannot truncate, system messages too large");
2532
+ consola.warn("[AutoTruncate] Cannot truncate, system messages too large");
2411
2533
  return {
2412
2534
  payload,
2413
2535
  wasCompacted: false,
@@ -2417,7 +2539,7 @@ async function autoCompact(payload, model, config = {}) {
2417
2539
  };
2418
2540
  }
2419
2541
  if (preserveIndex >= conversationMessages.length) {
2420
- consola.warn("Auto-compact: Would need to remove all messages");
2542
+ consola.warn("[AutoTruncate] Would need to remove all messages");
2421
2543
  return {
2422
2544
  payload,
2423
2545
  wasCompacted: false,
@@ -2431,7 +2553,7 @@ async function autoCompact(payload, model, config = {}) {
2431
2553
  preserved = ensureStartsWithUser(preserved);
2432
2554
  preserved = filterOrphanedToolResults(preserved);
2433
2555
  if (preserved.length === 0) {
2434
- consola.warn("Auto-compact: All messages filtered out after cleanup");
2556
+ consola.warn("[AutoTruncate] All messages filtered out after cleanup");
2435
2557
  return {
2436
2558
  payload,
2437
2559
  wasCompacted: false,
@@ -2452,8 +2574,8 @@ async function autoCompact(payload, model, config = {}) {
2452
2574
  };
2453
2575
  const newBytes = JSON.stringify(newPayload).length;
2454
2576
  const newTokenCount = await getTokenCount(newPayload, model);
2455
- consola.info(`Auto-compact: ${originalTokens} → ${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(newBytes / 1024)}KB (removed ${removedCount} messages)`);
2456
- if (newBytes > byteLimit) consola.warn(`Auto-compact: Result still over byte limit (${Math.round(newBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB)`);
2577
+ consola.info(`[AutoTruncate] ${originalTokens} → ${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(newBytes / 1024)}KB (removed ${removedCount} messages)`);
2578
+ if (newBytes > byteLimit) consola.warn(`[AutoTruncate] Result still over byte limit (${Math.round(newBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB)`);
2457
2579
  return {
2458
2580
  payload: newPayload,
2459
2581
  wasCompacted: true,
@@ -2463,13 +2585,13 @@ async function autoCompact(payload, model, config = {}) {
2463
2585
  };
2464
2586
  }
2465
2587
  /**
2466
- * Create a marker to prepend to responses indicating auto-compaction occurred.
2588
+ * Create a marker to prepend to responses indicating auto-truncation occurred.
2467
2589
  */
2468
- function createCompactionMarker(result) {
2590
+ function createTruncationResponseMarker(result) {
2469
2591
  if (!result.wasCompacted) return "";
2470
2592
  const reduction = result.originalTokens - result.compactedTokens;
2471
2593
  const percentage = Math.round(reduction / result.originalTokens * 100);
2472
- return `\n\n---\n[Auto-compacted: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
2594
+ return `\n\n---\n[Auto-truncated: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
2473
2595
  }
2474
2596
 
2475
2597
  //#endregion
@@ -2557,37 +2679,37 @@ function recordStreamError(opts) {
2557
2679
  function isNonStreaming(response) {
2558
2680
  return Object.hasOwn(response, "choices");
2559
2681
  }
2560
- /** Build final payload with auto-compact if needed */
2682
+ /** Build final payload with auto-truncate if needed */
2561
2683
  async function buildFinalPayload(payload, model) {
2562
- if (!state.autoCompact || !model) {
2563
- if (state.autoCompact && !model) consola.warn(`Auto-compact: Model '${payload.model}' not found in cached models, skipping`);
2684
+ if (!state.autoTruncate || !model) {
2685
+ if (state.autoTruncate && !model) consola.warn(`Auto-truncate: Model '${payload.model}' not found in cached models, skipping`);
2564
2686
  return {
2565
2687
  finalPayload: payload,
2566
- compactResult: null
2688
+ truncateResult: null
2567
2689
  };
2568
2690
  }
2569
2691
  try {
2570
2692
  const check = await checkNeedsCompaction(payload, model);
2571
- consola.debug(`Auto-compact check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
2693
+ consola.debug(`Auto-truncate check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
2572
2694
  if (!check.needed) return {
2573
2695
  finalPayload: payload,
2574
- compactResult: null
2696
+ truncateResult: null
2575
2697
  };
2576
2698
  let reasonText;
2577
2699
  if (check.reason === "both") reasonText = "tokens and size";
2578
2700
  else if (check.reason === "bytes") reasonText = "size";
2579
2701
  else reasonText = "tokens";
2580
- consola.info(`Auto-compact triggered: exceeds ${reasonText} limit`);
2581
- const compactResult = await autoCompact(payload, model);
2702
+ consola.info(`Auto-truncate triggered: exceeds ${reasonText} limit`);
2703
+ const truncateResult = await autoTruncate(payload, model);
2582
2704
  return {
2583
- finalPayload: compactResult.payload,
2584
- compactResult
2705
+ finalPayload: truncateResult.payload,
2706
+ truncateResult
2585
2707
  };
2586
2708
  } catch (error) {
2587
- consola.warn("Auto-compact failed, proceeding with original payload:", error instanceof Error ? error.message : error);
2709
+ consola.warn("Auto-truncate failed, proceeding with original payload:", error instanceof Error ? error.message : error);
2588
2710
  return {
2589
2711
  finalPayload: payload,
2590
- compactResult: null
2712
+ truncateResult: null
2591
2713
  };
2592
2714
  }
2593
2715
  }
@@ -2631,7 +2753,7 @@ async function logPayloadSizeInfo(payload, model) {
2631
2753
  if (largeMessages > 0) consola.info(` Large messages (>50KB): ${largeMessages}`);
2632
2754
  consola.info("");
2633
2755
  consola.info(" Suggestions:");
2634
- if (!state.autoCompact) consola.info(" • Enable --auto-compact to automatically truncate history");
2756
+ if (!state.autoTruncate) consola.info(" • Enable --auto-truncate to automatically truncate history");
2635
2757
  if (imageCount > 0) consola.info(" • Remove or resize large images in the conversation");
2636
2758
  consola.info(" • Start a new conversation with /clear or /reset");
2637
2759
  consola.info(" • Reduce conversation history by deleting old messages");
@@ -2663,8 +2785,8 @@ async function handleCompletion$1(c) {
2663
2785
  };
2664
2786
  const selectedModel = state.models?.data.find((model) => model.id === originalPayload.model);
2665
2787
  await logTokenCount(originalPayload, selectedModel);
2666
- const { finalPayload, compactResult } = await buildFinalPayload(originalPayload, selectedModel);
2667
- if (compactResult) ctx.compactResult = compactResult;
2788
+ const { finalPayload, truncateResult } = await buildFinalPayload(originalPayload, selectedModel);
2789
+ if (truncateResult) ctx.truncateResult = truncateResult;
2668
2790
  const payload = isNullish(finalPayload.max_tokens) ? {
2669
2791
  ...finalPayload,
2670
2792
  max_tokens: selectedModel?.capabilities?.limits?.max_output_tokens
@@ -2717,8 +2839,8 @@ async function logTokenCount(payload, selectedModel) {
2717
2839
  function handleNonStreamingResponse$1(c, originalResponse, ctx) {
2718
2840
  consola.debug("Non-streaming response:", JSON.stringify(originalResponse));
2719
2841
  let response = originalResponse;
2720
- if (ctx.compactResult?.wasCompacted && response.choices[0]?.message.content) {
2721
- const marker = createCompactionMarker(ctx.compactResult);
2842
+ if (state.verbose && ctx.truncateResult?.wasCompacted && response.choices[0]?.message.content) {
2843
+ const marker = createTruncationResponseMarker(ctx.truncateResult);
2722
2844
  response = {
2723
2845
  ...response,
2724
2846
  choices: response.choices.map((choice$1, i) => i === 0 ? {
@@ -2786,8 +2908,8 @@ async function handleStreamingResponse$1(opts) {
2786
2908
  const { stream, response, payload, ctx } = opts;
2787
2909
  const acc = createStreamAccumulator();
2788
2910
  try {
2789
- if (ctx.compactResult?.wasCompacted) {
2790
- const marker = createCompactionMarker(ctx.compactResult);
2911
+ if (state.verbose && ctx.truncateResult?.wasCompacted) {
2912
+ const marker = createTruncationResponseMarker(ctx.truncateResult);
2791
2913
  const markerChunk = {
2792
2914
  id: `compact-marker-${Date.now()}`,
2793
2915
  object: "chat.completion.chunk",
@@ -4160,7 +4282,7 @@ function translateAnthropicMessagesToOpenAI(anthropicMessages, system, toolNameM
4160
4282
  const otherMessages = anthropicMessages.flatMap((message) => message.role === "user" ? handleUserMessage(message) : handleAssistantMessage(message, toolNameMapping));
4161
4283
  return [...systemMessages, ...otherMessages];
4162
4284
  }
4163
- const RESERVED_KEYWORDS = ["x-anthropic-billing-header"];
4285
+ const RESERVED_KEYWORDS = ["x-anthropic-billing-header", "x-anthropic-billing"];
4164
4286
  /**
4165
4287
  * Filter out reserved keywords from system prompt text.
4166
4288
  * Copilot API rejects requests containing these keywords.
@@ -4385,7 +4507,10 @@ function getAnthropicToolUseBlocks(toolCalls, toolNameMapping) {
4385
4507
  //#endregion
4386
4508
  //#region src/routes/messages/count-tokens-handler.ts
4387
4509
  /**
4388
- * Handles token counting for Anthropic messages
4510
+ * Handles token counting for Anthropic messages.
4511
+ *
4512
+ * For Anthropic models (vendor === "Anthropic"), uses the official Anthropic tokenizer.
4513
+ * For other models, uses GPT tokenizers with appropriate buffers.
4389
4514
  */
4390
4515
  async function handleCountTokens(c) {
4391
4516
  try {
@@ -4397,6 +4522,7 @@ async function handleCountTokens(c) {
4397
4522
  consola.warn("Model not found, returning default token count");
4398
4523
  return c.json({ input_tokens: 1 });
4399
4524
  }
4525
+ const isAnthropicModel$1 = selectedModel.vendor === "Anthropic";
4400
4526
  const tokenCount = await getTokenCount(openAIPayload, selectedModel);
4401
4527
  if (anthropicPayload.tools && anthropicPayload.tools.length > 0) {
4402
4528
  let mcpToolExist = false;
@@ -4407,9 +4533,8 @@ async function handleCountTokens(c) {
4407
4533
  }
4408
4534
  }
4409
4535
  let finalTokenCount = tokenCount.input + tokenCount.output;
4410
- if (anthropicPayload.model.startsWith("claude")) finalTokenCount = Math.round(finalTokenCount * 1.15);
4411
- else if (anthropicPayload.model.startsWith("grok")) finalTokenCount = Math.round(finalTokenCount * 1.03);
4412
- consola.debug("Token count:", finalTokenCount);
4536
+ if (!isAnthropicModel$1) finalTokenCount = anthropicPayload.model.startsWith("grok") ? Math.round(finalTokenCount * 1.03) : Math.round(finalTokenCount * 1.05);
4537
+ consola.debug(`Token count: ${finalTokenCount} (${isAnthropicModel$1 ? "Anthropic tokenizer" : "GPT tokenizer"})`);
4413
4538
  return c.json({ input_tokens: finalTokenCount });
4414
4539
  } catch (error) {
4415
4540
  consola.error("Error counting tokens:", error);
@@ -4417,6 +4542,101 @@ async function handleCountTokens(c) {
4417
4542
  }
4418
4543
  }
4419
4544
 
4545
+ //#endregion
4546
+ //#region src/services/copilot/create-anthropic-messages.ts
4547
+ /**
4548
+ * Fields that are supported by Copilot's Anthropic API endpoint.
4549
+ * Any other fields in the incoming request will be stripped.
4550
+ */
4551
+ const COPILOT_SUPPORTED_FIELDS = new Set([
4552
+ "model",
4553
+ "messages",
4554
+ "max_tokens",
4555
+ "system",
4556
+ "metadata",
4557
+ "stop_sequences",
4558
+ "stream",
4559
+ "temperature",
4560
+ "top_p",
4561
+ "top_k",
4562
+ "tools",
4563
+ "tool_choice",
4564
+ "thinking",
4565
+ "service_tier"
4566
+ ]);
4567
+ /**
4568
+ * Filter payload to only include fields supported by Copilot's Anthropic API.
4569
+ * This prevents errors like "Extra inputs are not permitted" for unsupported
4570
+ * fields like `output_config`.
4571
+ */
4572
+ function filterPayloadForCopilot(payload) {
4573
+ const filtered = {};
4574
+ const unsupportedFields = [];
4575
+ for (const [key, value] of Object.entries(payload)) if (COPILOT_SUPPORTED_FIELDS.has(key)) filtered[key] = value;
4576
+ else unsupportedFields.push(key);
4577
+ if (unsupportedFields.length > 0) consola.debug(`[DirectAnthropic] Filtered unsupported fields: ${unsupportedFields.join(", ")}`);
4578
+ return filtered;
4579
+ }
4580
+ /**
4581
+ * Adjust max_tokens if thinking is enabled.
4582
+ * According to Anthropic docs, max_tokens must be greater than thinking.budget_tokens.
4583
+ * max_tokens = thinking_budget + response_tokens
4584
+ */
4585
+ function adjustMaxTokensForThinking(payload) {
4586
+ const thinking = payload.thinking;
4587
+ if (!thinking) return payload;
4588
+ const budgetTokens = thinking.budget_tokens;
4589
+ if (!budgetTokens) return payload;
4590
+ if (payload.max_tokens <= budgetTokens) {
4591
+ const newMaxTokens = budgetTokens + Math.min(16384, budgetTokens);
4592
+ consola.debug(`[DirectAnthropic] Adjusted max_tokens: ${payload.max_tokens} → ${newMaxTokens} (thinking.budget_tokens=${budgetTokens})`);
4593
+ return {
4594
+ ...payload,
4595
+ max_tokens: newMaxTokens
4596
+ };
4597
+ }
4598
+ return payload;
4599
+ }
4600
+ /**
4601
+ * Create messages using Anthropic-style API directly.
4602
+ * This bypasses the OpenAI translation layer for Anthropic models.
4603
+ */
4604
+ async function createAnthropicMessages(payload) {
4605
+ if (!state.copilotToken) throw new Error("Copilot token not found");
4606
+ let filteredPayload = filterPayloadForCopilot(payload);
4607
+ filteredPayload = adjustMaxTokensForThinking(filteredPayload);
4608
+ const enableVision = filteredPayload.messages.some((msg) => {
4609
+ if (typeof msg.content === "string") return false;
4610
+ return msg.content.some((block) => block.type === "image");
4611
+ });
4612
+ const isAgentCall = filteredPayload.messages.some((msg) => msg.role === "assistant");
4613
+ const headers = {
4614
+ ...copilotHeaders(state, enableVision),
4615
+ "X-Initiator": isAgentCall ? "agent" : "user",
4616
+ "anthropic-version": "2023-06-01"
4617
+ };
4618
+ consola.debug("Sending direct Anthropic request to Copilot /v1/messages");
4619
+ const response = await fetch(`${copilotBaseUrl(state)}/v1/messages`, {
4620
+ method: "POST",
4621
+ headers,
4622
+ body: JSON.stringify(filteredPayload)
4623
+ });
4624
+ if (!response.ok) {
4625
+ consola.error("Failed to create Anthropic messages", response);
4626
+ throw await HTTPError.fromResponse("Failed to create Anthropic messages", response);
4627
+ }
4628
+ if (payload.stream) return events(response);
4629
+ return await response.json();
4630
+ }
4631
+ /**
4632
+ * Check if a model supports direct Anthropic API.
4633
+ * Returns true if direct Anthropic API is enabled and the model is from Anthropic vendor.
4634
+ */
4635
+ function supportsDirectAnthropicApi(modelId) {
4636
+ if (!state.directAnthropicApi) return false;
4637
+ return (state.models?.data.find((m) => m.id === modelId))?.vendor === "Anthropic";
4638
+ }
4639
+
4420
4640
  //#endregion
4421
4641
  //#region src/routes/messages/stream-translation.ts
4422
4642
  function isToolBlockOpen(state$1) {
@@ -4579,11 +4799,128 @@ async function handleCompletion(c) {
4579
4799
  trackingId,
4580
4800
  startTime
4581
4801
  };
4802
+ if (supportsDirectAnthropicApi(anthropicPayload.model)) return handleDirectAnthropicCompletion(c, anthropicPayload, ctx);
4803
+ return handleTranslatedCompletion(c, anthropicPayload, ctx);
4804
+ }
4805
+ /**
4806
+ * Handle completion using direct Anthropic API (no translation needed)
4807
+ */
4808
+ async function handleDirectAnthropicCompletion(c, anthropicPayload, ctx) {
4809
+ consola.debug("Using direct Anthropic API path for model:", anthropicPayload.model);
4810
+ if (state.manualApprove) await awaitApproval();
4811
+ try {
4812
+ const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createAnthropicMessages(anthropicPayload));
4813
+ ctx.queueWaitMs = queueWaitMs;
4814
+ if (Symbol.asyncIterator in response) {
4815
+ consola.debug("Streaming response from Copilot (direct Anthropic)");
4816
+ updateTrackerStatus(ctx.trackingId, "streaming");
4817
+ return streamSSE(c, async (stream) => {
4818
+ await handleDirectAnthropicStreamingResponse({
4819
+ stream,
4820
+ response,
4821
+ anthropicPayload,
4822
+ ctx
4823
+ });
4824
+ });
4825
+ }
4826
+ return handleDirectAnthropicNonStreamingResponse(c, response, ctx);
4827
+ } catch (error) {
4828
+ recordErrorResponse(ctx, anthropicPayload.model, error);
4829
+ throw error;
4830
+ }
4831
+ }
4832
+ /**
4833
+ * Handle non-streaming direct Anthropic response
4834
+ */
4835
+ function handleDirectAnthropicNonStreamingResponse(c, response, ctx) {
4836
+ consola.debug("Non-streaming response from Copilot (direct Anthropic):", JSON.stringify(response).slice(-400));
4837
+ recordResponse(ctx.historyId, {
4838
+ success: true,
4839
+ model: response.model,
4840
+ usage: response.usage,
4841
+ stop_reason: response.stop_reason ?? void 0,
4842
+ content: {
4843
+ role: "assistant",
4844
+ content: response.content.map((block) => {
4845
+ switch (block.type) {
4846
+ case "text": return {
4847
+ type: "text",
4848
+ text: block.text
4849
+ };
4850
+ case "tool_use": return {
4851
+ type: "tool_use",
4852
+ id: block.id,
4853
+ name: block.name,
4854
+ input: JSON.stringify(block.input)
4855
+ };
4856
+ case "thinking": return {
4857
+ type: "thinking",
4858
+ thinking: block.thinking
4859
+ };
4860
+ default: return { type: block.type };
4861
+ }
4862
+ })
4863
+ },
4864
+ toolCalls: extractToolCallsFromAnthropicContent(response.content)
4865
+ }, Date.now() - ctx.startTime);
4866
+ if (ctx.trackingId) requestTracker.updateRequest(ctx.trackingId, {
4867
+ inputTokens: response.usage.input_tokens,
4868
+ outputTokens: response.usage.output_tokens,
4869
+ queueWaitMs: ctx.queueWaitMs
4870
+ });
4871
+ return c.json(response);
4872
+ }
4873
+ /**
4874
+ * Handle streaming direct Anthropic response (passthrough SSE events)
4875
+ */
4876
+ async function handleDirectAnthropicStreamingResponse(opts) {
4877
+ const { stream, response, anthropicPayload, ctx } = opts;
4878
+ const acc = createAnthropicStreamAccumulator();
4879
+ try {
4880
+ for await (const rawEvent of response) {
4881
+ consola.debug("Direct Anthropic raw stream event:", JSON.stringify(rawEvent));
4882
+ if (rawEvent.data === "[DONE]") break;
4883
+ if (!rawEvent.data) continue;
4884
+ let event;
4885
+ try {
4886
+ event = JSON.parse(rawEvent.data);
4887
+ } catch (parseError) {
4888
+ consola.error("Failed to parse Anthropic stream event:", parseError, rawEvent.data);
4889
+ continue;
4890
+ }
4891
+ processAnthropicEvent(event, acc);
4892
+ await stream.writeSSE({
4893
+ event: rawEvent.event || event.type,
4894
+ data: rawEvent.data
4895
+ });
4896
+ }
4897
+ recordStreamingResponse(acc, anthropicPayload.model, ctx);
4898
+ completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
4899
+ } catch (error) {
4900
+ consola.error("Direct Anthropic stream error:", error);
4901
+ recordStreamError({
4902
+ acc,
4903
+ fallbackModel: anthropicPayload.model,
4904
+ ctx,
4905
+ error
4906
+ });
4907
+ failTracking(ctx.trackingId, error);
4908
+ const errorEvent = translateErrorToAnthropicErrorEvent();
4909
+ await stream.writeSSE({
4910
+ event: errorEvent.type,
4911
+ data: JSON.stringify(errorEvent)
4912
+ });
4913
+ }
4914
+ }
4915
+ /**
4916
+ * Handle completion using OpenAI translation path (legacy)
4917
+ */
4918
+ async function handleTranslatedCompletion(c, anthropicPayload, ctx) {
4582
4919
  const { payload: translatedPayload, toolNameMapping } = translateToOpenAI(anthropicPayload);
4583
4920
  consola.debug("Translated OpenAI request payload:", JSON.stringify(translatedPayload));
4584
4921
  const selectedModel = state.models?.data.find((model) => model.id === translatedPayload.model);
4585
- const { finalPayload: openAIPayload, compactResult } = await buildFinalPayload(translatedPayload, selectedModel);
4586
- if (compactResult) ctx.compactResult = compactResult;
4922
+ const { finalPayload: openAIPayload, truncateResult } = await buildFinalPayload(translatedPayload, selectedModel);
4923
+ if (truncateResult) ctx.truncateResult = truncateResult;
4587
4924
  if (state.manualApprove) await awaitApproval();
4588
4925
  try {
4589
4926
  const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(openAIPayload));
@@ -4595,7 +4932,7 @@ async function handleCompletion(c) {
4595
4932
  ctx
4596
4933
  });
4597
4934
  consola.debug("Streaming response from Copilot");
4598
- updateTrackerStatus(trackingId, "streaming");
4935
+ updateTrackerStatus(ctx.trackingId, "streaming");
4599
4936
  return streamSSE(c, async (stream) => {
4600
4937
  await handleStreamingResponse({
4601
4938
  stream,
@@ -4616,8 +4953,8 @@ function handleNonStreamingResponse(opts) {
4616
4953
  consola.debug("Non-streaming response from Copilot:", JSON.stringify(response).slice(-400));
4617
4954
  let anthropicResponse = translateToAnthropic(response, toolNameMapping);
4618
4955
  consola.debug("Translated Anthropic response:", JSON.stringify(anthropicResponse));
4619
- if (ctx.compactResult?.wasCompacted) {
4620
- const marker = createCompactionMarker(ctx.compactResult);
4956
+ if (state.verbose && ctx.truncateResult?.wasCompacted) {
4957
+ const marker = createTruncationResponseMarker(ctx.truncateResult);
4621
4958
  anthropicResponse = prependMarkerToAnthropicResponse(anthropicResponse, marker);
4622
4959
  }
4623
4960
  recordResponse(ctx.historyId, {
@@ -4689,9 +5026,9 @@ async function handleStreamingResponse(opts) {
4689
5026
  };
4690
5027
  const acc = createAnthropicStreamAccumulator();
4691
5028
  try {
4692
- if (ctx.compactResult?.wasCompacted) {
4693
- const marker = createCompactionMarker(ctx.compactResult);
4694
- await sendCompactionMarkerEvent(stream, streamState, marker);
5029
+ if (ctx.truncateResult?.wasCompacted) {
5030
+ const marker = createTruncationResponseMarker(ctx.truncateResult);
5031
+ await sendTruncationMarkerEvent(stream, streamState, marker);
4695
5032
  acc.content += marker;
4696
5033
  }
4697
5034
  await processStreamChunks({
@@ -4719,7 +5056,7 @@ async function handleStreamingResponse(opts) {
4719
5056
  });
4720
5057
  }
4721
5058
  }
4722
- async function sendCompactionMarkerEvent(stream, streamState, marker) {
5059
+ async function sendTruncationMarkerEvent(stream, streamState, marker) {
4723
5060
  const blockStartEvent = {
4724
5061
  type: "content_block_start",
4725
5062
  index: streamState.contentBlockIndex,
@@ -4892,6 +5229,15 @@ function extractToolCallsFromContent(content) {
4892
5229
  });
4893
5230
  return tools.length > 0 ? tools : void 0;
4894
5231
  }
5232
+ function extractToolCallsFromAnthropicContent(content) {
5233
+ const tools = [];
5234
+ for (const block of content) if (block.type === "tool_use") tools.push({
5235
+ id: block.id,
5236
+ name: block.name,
5237
+ input: JSON.stringify(block.input)
5238
+ });
5239
+ return tools.length > 0 ? tools : void 0;
5240
+ }
4895
5241
 
4896
5242
  //#endregion
4897
5243
  //#region src/routes/messages/route.ts
@@ -5018,12 +5364,14 @@ async function runServer(options) {
5018
5364
  if (options.verbose) {
5019
5365
  consola.level = 5;
5020
5366
  consola.info("Verbose logging enabled");
5367
+ state.verbose = true;
5021
5368
  }
5022
5369
  state.accountType = options.accountType;
5023
5370
  if (options.accountType !== "individual") consola.info(`Using ${options.accountType} plan GitHub account`);
5024
5371
  state.manualApprove = options.manual;
5025
5372
  state.showToken = options.showToken;
5026
- state.autoCompact = options.autoCompact;
5373
+ state.autoTruncate = options.autoTruncate;
5374
+ state.directAnthropicApi = options.directAnthropicApi;
5027
5375
  if (options.rateLimit) initAdaptiveRateLimiter({
5028
5376
  baseRetryIntervalSeconds: options.retryInterval,
5029
5377
  requestIntervalSeconds: options.requestInterval,
@@ -5031,7 +5379,8 @@ async function runServer(options) {
5031
5379
  consecutiveSuccessesForRecovery: options.consecutiveSuccesses
5032
5380
  });
5033
5381
  else consola.info("Rate limiting disabled");
5034
- if (!options.autoCompact) consola.info("Auto-compact disabled");
5382
+ if (!options.autoTruncate) consola.info("Auto-truncate disabled");
5383
+ if (!options.directAnthropicApi) consola.info("Direct Anthropic API disabled (using OpenAI translation)");
5035
5384
  initHistory(options.history, options.historyLimit);
5036
5385
  if (options.history) {
5037
5386
  const limitText = options.historyLimit === 0 ? "unlimited" : `max ${options.historyLimit}`;
@@ -5173,10 +5522,15 @@ const start = defineCommand({
5173
5522
  default: "1000",
5174
5523
  description: "Maximum number of history entries to keep in memory (0 = unlimited)"
5175
5524
  },
5176
- "no-auto-compact": {
5525
+ "no-auto-truncate": {
5526
+ type: "boolean",
5527
+ default: false,
5528
+ description: "Disable automatic conversation history truncation when exceeding limits"
5529
+ },
5530
+ "no-direct-anthropic": {
5177
5531
  type: "boolean",
5178
5532
  default: false,
5179
- description: "Disable automatic conversation history compression when exceeding limits"
5533
+ description: "Disable direct Anthropic API for Anthropic models (use OpenAI translation instead)"
5180
5534
  }
5181
5535
  },
5182
5536
  run({ args }) {
@@ -5197,7 +5551,8 @@ const start = defineCommand({
5197
5551
  proxyEnv: args["proxy-env"],
5198
5552
  history: !args["no-history"],
5199
5553
  historyLimit: Number.parseInt(args["history-limit"], 10),
5200
- autoCompact: !args["no-auto-compact"]
5554
+ autoTruncate: !args["no-auto-truncate"],
5555
+ directAnthropicApi: !args["no-direct-anthropic"]
5201
5556
  });
5202
5557
  }
5203
5558
  });