@hsupu/copilot-api 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -12,12 +12,40 @@ import { getProxyForUrl } from "proxy-from-env";
12
12
  import { Agent, ProxyAgent, setGlobalDispatcher } from "undici";
13
13
  import { execSync } from "node:child_process";
14
14
  import process$1 from "node:process";
15
+ import { Box, Text, render, useInput, useStdout } from "ink";
16
+ import React, { useEffect, useState } from "react";
17
+ import { Fragment, jsx, jsxs } from "react/jsx-runtime";
15
18
  import { Hono } from "hono";
16
19
  import { cors } from "hono/cors";
17
- import { logger } from "hono/logger";
18
20
  import { streamSSE } from "hono/streaming";
19
21
  import { events } from "fetch-event-stream";
20
22
 
23
+ //#region rolldown:runtime
24
+ var __create = Object.create;
25
+ var __defProp = Object.defineProperty;
26
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
27
+ var __getOwnPropNames = Object.getOwnPropertyNames;
28
+ var __getProtoOf = Object.getPrototypeOf;
29
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
30
+ var __commonJS = (cb, mod) => function() {
31
+ return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
32
+ };
33
+ var __copyProps = (to, from, except, desc) => {
34
+ if (from && typeof from === "object" || typeof from === "function") for (var keys = __getOwnPropNames(from), i = 0, n = keys.length, key; i < n; i++) {
35
+ key = keys[i];
36
+ if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, {
37
+ get: ((k) => from[k]).bind(null, key),
38
+ enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable
39
+ });
40
+ }
41
+ return to;
42
+ };
43
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", {
44
+ value: mod,
45
+ enumerable: true
46
+ }) : target, mod));
47
+
48
+ //#endregion
21
49
  //#region src/lib/paths.ts
22
50
  const APP_DIR = path.join(os.homedir(), ".local", "share", "copilot-api");
23
51
  const GITHUB_TOKEN_PATH = path.join(APP_DIR, "github_token");
@@ -45,7 +73,8 @@ const state = {
45
73
  accountType: "individual",
46
74
  manualApprove: false,
47
75
  rateLimitWait: false,
48
- showToken: false
76
+ showToken: false,
77
+ autoCompact: false
49
78
  };
50
79
 
51
80
  //#endregion
@@ -104,6 +133,27 @@ var HTTPError = class HTTPError extends Error {
104
133
  return new HTTPError(message, response.status, text);
105
134
  }
106
135
  };
136
+ /** Parse token limit info from error message */
137
+ function parseTokenLimitError(message) {
138
+ const match = message.match(/prompt token count of (\d+) exceeds the limit of (\d+)/);
139
+ if (match) return {
140
+ current: Number.parseInt(match[1], 10),
141
+ limit: Number.parseInt(match[2], 10)
142
+ };
143
+ return null;
144
+ }
145
+ /** Format Anthropic-compatible error for token limit exceeded */
146
+ function formatTokenLimitError(current, limit) {
147
+ const excess = current - limit;
148
+ const percentage = Math.round(excess / limit * 100);
149
+ return {
150
+ type: "error",
151
+ error: {
152
+ type: "invalid_request_error",
153
+ message: `prompt is too long: ${current} tokens > ${limit} maximum (${excess} tokens over, ${percentage}% excess)`
154
+ }
155
+ };
156
+ }
107
157
  async function forwardError(c, error) {
108
158
  consola.error("Error occurred:", error);
109
159
  if (error instanceof HTTPError) {
@@ -114,6 +164,15 @@ async function forwardError(c, error) {
114
164
  errorJson = error.responseText;
115
165
  }
116
166
  consola.error("HTTP error:", errorJson);
167
+ const copilotError = errorJson;
168
+ if (copilotError.error?.code === "model_max_prompt_tokens_exceeded") {
169
+ const tokenInfo = parseTokenLimitError(copilotError.error.message ?? "");
170
+ if (tokenInfo) {
171
+ const formattedError = formatTokenLimitError(tokenInfo.current, tokenInfo.limit);
172
+ consola.debug("Returning formatted token limit error:", formattedError);
173
+ return c.json(formattedError, 400);
174
+ }
175
+ }
117
176
  return c.json({ error: {
118
177
  message: error.responseText,
119
178
  type: "error"
@@ -476,7 +535,7 @@ const logout = defineCommand({
476
535
 
477
536
  //#endregion
478
537
  //#region src/lib/history.ts
479
- function generateId() {
538
+ function generateId$1() {
480
539
  return Date.now().toString(36) + Math.random().toString(36).slice(2, 9);
481
540
  }
482
541
  const historyState = {
@@ -492,7 +551,7 @@ function initHistory(enabled, maxEntries) {
492
551
  historyState.maxEntries = maxEntries;
493
552
  historyState.entries = [];
494
553
  historyState.sessions = /* @__PURE__ */ new Map();
495
- historyState.currentSessionId = enabled ? generateId() : "";
554
+ historyState.currentSessionId = enabled ? generateId$1() : "";
496
555
  }
497
556
  function isHistoryEnabled() {
498
557
  return historyState.enabled;
@@ -506,7 +565,7 @@ function getCurrentSession(endpoint) {
506
565
  return historyState.currentSessionId;
507
566
  }
508
567
  }
509
- const sessionId = generateId();
568
+ const sessionId = generateId$1();
510
569
  historyState.currentSessionId = sessionId;
511
570
  historyState.sessions.set(sessionId, {
512
571
  id: sessionId,
@@ -526,7 +585,7 @@ function recordRequest(endpoint, request) {
526
585
  const session = historyState.sessions.get(sessionId);
527
586
  if (!session) return "";
528
587
  const entry = {
529
- id: generateId(),
588
+ id: generateId$1(),
530
589
  sessionId,
531
590
  timestamp: Date.now(),
532
591
  endpoint,
@@ -543,7 +602,11 @@ function recordRequest(endpoint, request) {
543
602
  historyState.entries.push(entry);
544
603
  session.requestCount++;
545
604
  if (!session.models.includes(request.model)) session.models.push(request.model);
546
- while (historyState.entries.length > historyState.maxEntries) {
605
+ if (request.tools && request.tools.length > 0) {
606
+ if (!session.toolsUsed) session.toolsUsed = [];
607
+ for (const tool of request.tools) if (!session.toolsUsed.includes(tool.name)) session.toolsUsed.push(tool.name);
608
+ }
609
+ while (historyState.maxEntries > 0 && historyState.entries.length > historyState.maxEntries) {
547
610
  const removed = historyState.entries.shift();
548
611
  if (removed) {
549
612
  if (historyState.entries.filter((e) => e.sessionId === removed.sessionId).length === 0) historyState.sessions.delete(removed.sessionId);
@@ -622,13 +685,13 @@ function getSessionEntries(sessionId) {
622
685
  function clearHistory() {
623
686
  historyState.entries = [];
624
687
  historyState.sessions = /* @__PURE__ */ new Map();
625
- historyState.currentSessionId = generateId();
688
+ historyState.currentSessionId = generateId$1();
626
689
  }
627
690
  function deleteSession(sessionId) {
628
691
  if (!historyState.sessions.has(sessionId)) return false;
629
692
  historyState.entries = historyState.entries.filter((e) => e.sessionId !== sessionId);
630
693
  historyState.sessions.delete(sessionId);
631
- if (historyState.currentSessionId === sessionId) historyState.currentSessionId = generateId();
694
+ if (historyState.currentSessionId === sessionId) historyState.currentSessionId = generateId$1();
632
695
  return true;
633
696
  }
634
697
  function getStats() {
@@ -768,7 +831,7 @@ function initProxyFromEnv() {
768
831
  //#endregion
769
832
  //#region src/lib/shell.ts
770
833
  function getShell() {
771
- const { platform, ppid, env } = process$1;
834
+ const { platform, ppid, env: env$1 } = process$1;
772
835
  if (platform === "win32") {
773
836
  try {
774
837
  const command = `wmic process get ParentProcessId,Name | findstr "${ppid}"`;
@@ -778,7 +841,7 @@ function getShell() {
778
841
  }
779
842
  return "cmd";
780
843
  } else {
781
- const shellPath = env.SHELL;
844
+ const shellPath = env$1.SHELL;
782
845
  if (shellPath) {
783
846
  if (shellPath.endsWith("zsh")) return "zsh";
784
847
  if (shellPath.endsWith("fish")) return "fish";
@@ -800,16 +863,16 @@ function generateEnvScript(envVars, commandToRun = "") {
800
863
  let commandBlock;
801
864
  switch (shell) {
802
865
  case "powershell":
803
- commandBlock = filteredEnvVars.map(([key, value]) => `$env:${key} = "${value.replace(/"/g, "`\"")}"`).join("; ");
866
+ commandBlock = filteredEnvVars.map(([key, value]) => `$env:${key} = "${value.replaceAll("\"", "`\"")}"`).join("; ");
804
867
  break;
805
868
  case "cmd":
806
869
  commandBlock = filteredEnvVars.map(([key, value]) => `set ${key}=${value}`).join(" & ");
807
870
  break;
808
871
  case "fish":
809
- commandBlock = filteredEnvVars.map(([key, value]) => `set -gx ${key} "${value.replace(/"/g, "\\\"")}"`).join("; ");
872
+ commandBlock = filteredEnvVars.map(([key, value]) => `set -gx ${key} "${value.replaceAll("\"", String.raw`\"`)}"`).join("; ");
810
873
  break;
811
874
  default: {
812
- const assignments = filteredEnvVars.map(([key, value]) => `${key}="${value.replace(/"/g, "\\\"")}"`).join(" ");
875
+ const assignments = filteredEnvVars.map(([key, value]) => `${key}="${value.replaceAll("\"", String.raw`\"`)}"`).join(" ");
813
876
  commandBlock = filteredEnvVars.length > 0 ? `export ${assignments}` : "";
814
877
  break;
815
878
  }
@@ -819,68 +882,721 @@ function generateEnvScript(envVars, commandToRun = "") {
819
882
  }
820
883
 
821
884
  //#endregion
822
- //#region src/lib/approval.ts
823
- const awaitApproval = async () => {
824
- if (!await consola.prompt(`Accept incoming request?`, { type: "confirm" })) throw new HTTPError("Request rejected", 403, JSON.stringify({ message: "Request rejected" }));
885
+ //#region node_modules/picocolors/picocolors.js
886
+ var require_picocolors = /* @__PURE__ */ __commonJS({ "node_modules/picocolors/picocolors.js": ((exports, module) => {
887
+ let p = process || {}, argv = p.argv || [], env = p.env || {};
888
+ let isColorSupported = !(!!env.NO_COLOR || argv.includes("--no-color")) && (!!env.FORCE_COLOR || argv.includes("--color") || p.platform === "win32" || (p.stdout || {}).isTTY && env.TERM !== "dumb" || !!env.CI);
889
+ let formatter = (open, close, replace = open) => (input) => {
890
+ let string = "" + input, index = string.indexOf(close, open.length);
891
+ return ~index ? open + replaceClose(string, close, replace, index) + close : open + string + close;
892
+ };
893
+ let replaceClose = (string, close, replace, index) => {
894
+ let result = "", cursor = 0;
895
+ do {
896
+ result += string.substring(cursor, index) + replace;
897
+ cursor = index + close.length;
898
+ index = string.indexOf(close, cursor);
899
+ } while (~index);
900
+ return result + string.substring(cursor);
901
+ };
902
+ let createColors = (enabled = isColorSupported) => {
903
+ let f = enabled ? formatter : () => String;
904
+ return {
905
+ isColorSupported: enabled,
906
+ reset: f("\x1B[0m", "\x1B[0m"),
907
+ bold: f("\x1B[1m", "\x1B[22m", "\x1B[22m\x1B[1m"),
908
+ dim: f("\x1B[2m", "\x1B[22m", "\x1B[22m\x1B[2m"),
909
+ italic: f("\x1B[3m", "\x1B[23m"),
910
+ underline: f("\x1B[4m", "\x1B[24m"),
911
+ inverse: f("\x1B[7m", "\x1B[27m"),
912
+ hidden: f("\x1B[8m", "\x1B[28m"),
913
+ strikethrough: f("\x1B[9m", "\x1B[29m"),
914
+ black: f("\x1B[30m", "\x1B[39m"),
915
+ red: f("\x1B[31m", "\x1B[39m"),
916
+ green: f("\x1B[32m", "\x1B[39m"),
917
+ yellow: f("\x1B[33m", "\x1B[39m"),
918
+ blue: f("\x1B[34m", "\x1B[39m"),
919
+ magenta: f("\x1B[35m", "\x1B[39m"),
920
+ cyan: f("\x1B[36m", "\x1B[39m"),
921
+ white: f("\x1B[37m", "\x1B[39m"),
922
+ gray: f("\x1B[90m", "\x1B[39m"),
923
+ bgBlack: f("\x1B[40m", "\x1B[49m"),
924
+ bgRed: f("\x1B[41m", "\x1B[49m"),
925
+ bgGreen: f("\x1B[42m", "\x1B[49m"),
926
+ bgYellow: f("\x1B[43m", "\x1B[49m"),
927
+ bgBlue: f("\x1B[44m", "\x1B[49m"),
928
+ bgMagenta: f("\x1B[45m", "\x1B[49m"),
929
+ bgCyan: f("\x1B[46m", "\x1B[49m"),
930
+ bgWhite: f("\x1B[47m", "\x1B[49m"),
931
+ blackBright: f("\x1B[90m", "\x1B[39m"),
932
+ redBright: f("\x1B[91m", "\x1B[39m"),
933
+ greenBright: f("\x1B[92m", "\x1B[39m"),
934
+ yellowBright: f("\x1B[93m", "\x1B[39m"),
935
+ blueBright: f("\x1B[94m", "\x1B[39m"),
936
+ magentaBright: f("\x1B[95m", "\x1B[39m"),
937
+ cyanBright: f("\x1B[96m", "\x1B[39m"),
938
+ whiteBright: f("\x1B[97m", "\x1B[39m"),
939
+ bgBlackBright: f("\x1B[100m", "\x1B[49m"),
940
+ bgRedBright: f("\x1B[101m", "\x1B[49m"),
941
+ bgGreenBright: f("\x1B[102m", "\x1B[49m"),
942
+ bgYellowBright: f("\x1B[103m", "\x1B[49m"),
943
+ bgBlueBright: f("\x1B[104m", "\x1B[49m"),
944
+ bgMagentaBright: f("\x1B[105m", "\x1B[49m"),
945
+ bgCyanBright: f("\x1B[106m", "\x1B[49m"),
946
+ bgWhiteBright: f("\x1B[107m", "\x1B[49m")
947
+ };
948
+ };
949
+ module.exports = createColors();
950
+ module.exports.createColors = createColors;
951
+ }) });
952
+
953
+ //#endregion
954
+ //#region src/lib/tui/console-renderer.ts
955
+ var import_picocolors = /* @__PURE__ */ __toESM(require_picocolors(), 1);
956
+ const CLEAR_LINE = "\x1B[2K\r";
957
+ function formatDuration$1(ms) {
958
+ if (ms < 1e3) return `${ms}ms`;
959
+ return `${(ms / 1e3).toFixed(1)}s`;
960
+ }
961
+ function formatNumber$1(n) {
962
+ if (n >= 1e6) return `${(n / 1e6).toFixed(1)}M`;
963
+ if (n >= 1e3) return `${(n / 1e3).toFixed(1)}K`;
964
+ return String(n);
965
+ }
966
+ function formatTokens$1(input, output) {
967
+ if (input === void 0 || output === void 0) return "-";
968
+ return `${formatNumber$1(input)}/${formatNumber$1(output)}`;
969
+ }
970
+ /**
971
+ * Console renderer that shows request lifecycle with apt-get style footer
972
+ *
973
+ * Log format:
974
+ * - Start: [....] METHOD /path model-name
975
+ * - Streaming: [<-->] METHOD /path model-name streaming...
976
+ * - Complete: [ OK ] METHOD /path 200 1.2s 1.5K/500 model-name
977
+ *
978
+ * Features:
979
+ * - /history API requests are displayed in gray (dim)
980
+ * - Sticky footer shows active request count, updated in-place on the last line
981
+ * - Footer disappears when all requests complete
982
+ */
983
+ var ConsoleRenderer = class {
984
+ activeRequests = /* @__PURE__ */ new Map();
985
+ showActive;
986
+ footerVisible = false;
987
+ isTTY;
988
+ constructor(options) {
989
+ this.showActive = options?.showActive ?? true;
990
+ this.isTTY = process.stdout.isTTY;
991
+ }
992
+ /**
993
+ * Get footer text based on active request count
994
+ */
995
+ getFooterText() {
996
+ const activeCount = this.activeRequests.size;
997
+ if (activeCount === 0) return "";
998
+ const plural = activeCount === 1 ? "" : "s";
999
+ return import_picocolors.default.dim(`[....] ${activeCount} request${plural} in progress...`);
1000
+ }
1001
+ /**
1002
+ * Render footer in-place on current line (no newline)
1003
+ * Only works on TTY terminals
1004
+ */
1005
+ renderFooter() {
1006
+ if (!this.isTTY) return;
1007
+ const footerText = this.getFooterText();
1008
+ if (footerText) {
1009
+ process.stdout.write(CLEAR_LINE + footerText);
1010
+ this.footerVisible = true;
1011
+ } else if (this.footerVisible) {
1012
+ process.stdout.write(CLEAR_LINE);
1013
+ this.footerVisible = false;
1014
+ }
1015
+ }
1016
+ /**
1017
+ * Clear footer and prepare for log output
1018
+ */
1019
+ clearFooterForLog() {
1020
+ if (this.footerVisible && this.isTTY) {
1021
+ process.stdout.write(CLEAR_LINE);
1022
+ this.footerVisible = false;
1023
+ }
1024
+ }
1025
+ /**
1026
+ * Print a log line with proper footer handling
1027
+ * 1. Clear footer if visible
1028
+ * 2. Print log with newline
1029
+ * 3. Re-render footer on new line (no newline after footer)
1030
+ */
1031
+ printLog(message, isGray = false) {
1032
+ this.clearFooterForLog();
1033
+ if (isGray) consola.log(import_picocolors.default.dim(message));
1034
+ else consola.log(message);
1035
+ this.renderFooter();
1036
+ }
1037
+ onRequestStart(request) {
1038
+ this.activeRequests.set(request.id, request);
1039
+ if (this.showActive) {
1040
+ const modelInfo = request.model ? ` ${request.model}` : "";
1041
+ const queueInfo = request.queuePosition !== void 0 && request.queuePosition > 0 ? ` [q#${request.queuePosition}]` : "";
1042
+ const message = `[....] ${request.method} ${request.path}${modelInfo}${queueInfo}`;
1043
+ this.printLog(message, request.isHistoryAccess);
1044
+ }
1045
+ }
1046
+ onRequestUpdate(id, update) {
1047
+ const request = this.activeRequests.get(id);
1048
+ if (!request) return;
1049
+ Object.assign(request, update);
1050
+ if (this.showActive && update.status === "streaming") {
1051
+ const modelInfo = request.model ? ` ${request.model}` : "";
1052
+ const message = `[<-->] ${request.method} ${request.path}${modelInfo} streaming...`;
1053
+ this.printLog(message, request.isHistoryAccess);
1054
+ }
1055
+ }
1056
+ onRequestComplete(request) {
1057
+ this.activeRequests.delete(request.id);
1058
+ const status = request.statusCode ?? 0;
1059
+ const duration = formatDuration$1(request.durationMs ?? 0);
1060
+ const tokens = request.model ? formatTokens$1(request.inputTokens, request.outputTokens) : "";
1061
+ const modelInfo = request.model ? ` ${request.model}` : "";
1062
+ const isError = request.status === "error" || status >= 400;
1063
+ const prefix = isError ? "[FAIL]" : "[ OK ]";
1064
+ const tokensPart = tokens ? ` ${tokens}` : "";
1065
+ let content = `${prefix} ${request.method} ${request.path} ${status} ${duration}${tokensPart}${modelInfo}`;
1066
+ if (isError) {
1067
+ const errorInfo = request.error ? `: ${request.error}` : "";
1068
+ content += errorInfo;
1069
+ }
1070
+ this.printLog(content, request.isHistoryAccess);
1071
+ }
1072
+ destroy() {
1073
+ if (this.footerVisible && this.isTTY) {
1074
+ process.stdout.write(CLEAR_LINE);
1075
+ this.footerVisible = false;
1076
+ }
1077
+ this.activeRequests.clear();
1078
+ }
825
1079
  };
826
1080
 
827
1081
  //#endregion
828
- //#region src/lib/queue.ts
829
- var RequestQueue = class {
830
- queue = [];
831
- processing = false;
832
- lastRequestTime = 0;
833
- async enqueue(execute, rateLimitSeconds) {
834
- return new Promise((resolve, reject) => {
835
- this.queue.push({
836
- execute,
837
- resolve,
838
- reject
839
- });
840
- if (this.queue.length > 1) {
841
- const waitTime = Math.ceil((this.queue.length - 1) * rateLimitSeconds);
842
- consola.info(`Request queued. Position: ${this.queue.length}, estimated wait: ${waitTime}s`);
843
- }
844
- this.processQueue(rateLimitSeconds);
1082
+ //#region src/lib/tui/fullscreen-renderer.tsx
1083
+ const tuiState = {
1084
+ activeRequests: /* @__PURE__ */ new Map(),
1085
+ completedRequests: [],
1086
+ errorRequests: []
1087
+ };
1088
+ const listeners = [];
1089
+ function notifyListeners() {
1090
+ for (const listener of listeners) listener();
1091
+ }
1092
+ function formatDuration(ms) {
1093
+ if (ms < 1e3) return `${ms}ms`;
1094
+ return `${(ms / 1e3).toFixed(1)}s`;
1095
+ }
1096
+ function formatNumber(n) {
1097
+ if (n >= 1e6) return `${(n / 1e6).toFixed(1)}M`;
1098
+ if (n >= 1e3) return `${(n / 1e3).toFixed(1)}K`;
1099
+ return String(n);
1100
+ }
1101
+ function formatTokens(input, output) {
1102
+ if (input === void 0 || output === void 0) return "-";
1103
+ return `${formatNumber(input)}/${formatNumber(output)}`;
1104
+ }
1105
+ function getElapsedTime(startTime) {
1106
+ return formatDuration(Date.now() - startTime);
1107
+ }
1108
+ function TabHeader({ currentTab, counts }) {
1109
+ const tabs = [
1110
+ {
1111
+ key: "active",
1112
+ label: "Active",
1113
+ count: counts.active
1114
+ },
1115
+ {
1116
+ key: "completed",
1117
+ label: "Completed",
1118
+ count: counts.completed
1119
+ },
1120
+ {
1121
+ key: "errors",
1122
+ label: "Errors",
1123
+ count: counts.errors
1124
+ }
1125
+ ];
1126
+ return /* @__PURE__ */ jsxs(Box, {
1127
+ borderStyle: "single",
1128
+ paddingX: 1,
1129
+ children: [tabs.map((tab, idx) => /* @__PURE__ */ jsxs(React.Fragment, { children: [idx > 0 && /* @__PURE__ */ jsx(Text, { children: " │ " }), /* @__PURE__ */ jsxs(Text, {
1130
+ bold: currentTab === tab.key,
1131
+ color: currentTab === tab.key ? "cyan" : void 0,
1132
+ inverse: currentTab === tab.key,
1133
+ children: [
1134
+ " ",
1135
+ "[",
1136
+ idx + 1,
1137
+ "] ",
1138
+ tab.label,
1139
+ " (",
1140
+ tab.count,
1141
+ ")",
1142
+ " "
1143
+ ]
1144
+ })] }, tab.key)), /* @__PURE__ */ jsx(Text, {
1145
+ dimColor: true,
1146
+ children: " │ Press 1/2/3 to switch tabs, q to quit"
1147
+ })]
1148
+ });
1149
+ }
1150
+ function getStatusColor(status) {
1151
+ if (status === "streaming") return "yellow";
1152
+ if (status === "queued") return "gray";
1153
+ return "blue";
1154
+ }
1155
+ function getStatusIcon(status) {
1156
+ if (status === "streaming") return "⟳";
1157
+ if (status === "queued") return "◷";
1158
+ return "●";
1159
+ }
1160
+ function ActiveRequestRow({ request }) {
1161
+ const [, setTick] = useState(0);
1162
+ useEffect(() => {
1163
+ const interval = setInterval(() => setTick((t) => t + 1), 1e3);
1164
+ return () => clearInterval(interval);
1165
+ }, []);
1166
+ const statusColor = getStatusColor(request.status);
1167
+ const statusIcon = getStatusIcon(request.status);
1168
+ return /* @__PURE__ */ jsxs(Box, { children: [
1169
+ /* @__PURE__ */ jsxs(Text, {
1170
+ color: statusColor,
1171
+ children: [statusIcon, " "]
1172
+ }),
1173
+ /* @__PURE__ */ jsx(Text, {
1174
+ bold: true,
1175
+ children: request.method
1176
+ }),
1177
+ /* @__PURE__ */ jsxs(Text, { children: [
1178
+ " ",
1179
+ request.path,
1180
+ " "
1181
+ ] }),
1182
+ /* @__PURE__ */ jsxs(Text, {
1183
+ dimColor: true,
1184
+ children: [getElapsedTime(request.startTime), " "]
1185
+ }),
1186
+ request.queuePosition !== void 0 && request.queuePosition > 0 && /* @__PURE__ */ jsxs(Text, {
1187
+ color: "gray",
1188
+ children: [
1189
+ "[queue #",
1190
+ request.queuePosition,
1191
+ "] "
1192
+ ]
1193
+ }),
1194
+ /* @__PURE__ */ jsx(Text, {
1195
+ color: "magenta",
1196
+ children: request.model
1197
+ })
1198
+ ] });
1199
+ }
1200
+ function CompletedRequestRow({ request }) {
1201
+ const isError = request.status === "error" || (request.statusCode ?? 0) >= 400;
1202
+ return /* @__PURE__ */ jsxs(Box, { children: [
1203
+ /* @__PURE__ */ jsxs(Text, {
1204
+ color: isError ? "red" : "green",
1205
+ children: [isError ? "✗" : "✓", " "]
1206
+ }),
1207
+ /* @__PURE__ */ jsx(Text, {
1208
+ bold: true,
1209
+ children: request.method
1210
+ }),
1211
+ /* @__PURE__ */ jsxs(Text, { children: [
1212
+ " ",
1213
+ request.path,
1214
+ " "
1215
+ ] }),
1216
+ /* @__PURE__ */ jsxs(Text, {
1217
+ color: isError ? "red" : "green",
1218
+ children: [request.statusCode ?? "-", " "]
1219
+ }),
1220
+ /* @__PURE__ */ jsxs(Text, {
1221
+ dimColor: true,
1222
+ children: [formatDuration(request.durationMs ?? 0), " "]
1223
+ }),
1224
+ /* @__PURE__ */ jsxs(Text, { children: [formatTokens(request.inputTokens, request.outputTokens), " "] }),
1225
+ /* @__PURE__ */ jsx(Text, {
1226
+ color: "magenta",
1227
+ children: request.model
1228
+ })
1229
+ ] });
1230
+ }
1231
+ function ErrorRequestRow({ request }) {
1232
+ return /* @__PURE__ */ jsxs(Box, {
1233
+ flexDirection: "column",
1234
+ children: [/* @__PURE__ */ jsxs(Box, { children: [
1235
+ /* @__PURE__ */ jsx(Text, {
1236
+ color: "red",
1237
+ children: "✗ "
1238
+ }),
1239
+ /* @__PURE__ */ jsx(Text, {
1240
+ bold: true,
1241
+ children: request.method
1242
+ }),
1243
+ /* @__PURE__ */ jsxs(Text, { children: [
1244
+ " ",
1245
+ request.path,
1246
+ " "
1247
+ ] }),
1248
+ /* @__PURE__ */ jsxs(Text, {
1249
+ color: "red",
1250
+ children: [request.statusCode ?? "-", " "]
1251
+ }),
1252
+ /* @__PURE__ */ jsxs(Text, {
1253
+ dimColor: true,
1254
+ children: [formatDuration(request.durationMs ?? 0), " "]
1255
+ }),
1256
+ /* @__PURE__ */ jsx(Text, {
1257
+ color: "magenta",
1258
+ children: request.model
1259
+ })
1260
+ ] }), request.error && /* @__PURE__ */ jsx(Box, {
1261
+ marginLeft: 2,
1262
+ children: /* @__PURE__ */ jsxs(Text, {
1263
+ color: "red",
1264
+ dimColor: true,
1265
+ children: ["└─ ", request.error]
1266
+ })
1267
+ })]
1268
+ });
1269
+ }
1270
+ function ContentPanel({ currentTab, activeList, completedList, errorList, contentHeight }) {
1271
+ if (currentTab === "active") {
1272
+ if (activeList.length === 0) return /* @__PURE__ */ jsx(Text, {
1273
+ dimColor: true,
1274
+ children: "No active requests"
845
1275
  });
1276
+ return /* @__PURE__ */ jsx(Fragment, { children: activeList.slice(0, contentHeight).map((req) => /* @__PURE__ */ jsx(ActiveRequestRow, { request: req }, req.id)) });
846
1277
  }
847
- async processQueue(rateLimitSeconds) {
848
- if (this.processing) return;
849
- this.processing = true;
850
- while (this.queue.length > 0) {
851
- const elapsedMs = Date.now() - this.lastRequestTime;
852
- const requiredMs = rateLimitSeconds * 1e3;
853
- if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
854
- const waitMs = requiredMs - elapsedMs;
855
- consola.debug(`Rate limit: waiting ${Math.ceil(waitMs / 1e3)}s`);
856
- await new Promise((resolve) => setTimeout(resolve, waitMs));
857
- }
858
- const request = this.queue.shift();
859
- if (!request) break;
860
- this.lastRequestTime = Date.now();
861
- try {
862
- const result = await request.execute();
863
- request.resolve(result);
864
- } catch (error) {
865
- request.reject(error);
866
- }
1278
+ if (currentTab === "completed") {
1279
+ if (completedList.length === 0) return /* @__PURE__ */ jsx(Text, {
1280
+ dimColor: true,
1281
+ children: "No completed requests"
1282
+ });
1283
+ return /* @__PURE__ */ jsx(Fragment, { children: completedList.slice(-contentHeight).reverse().map((req) => /* @__PURE__ */ jsx(CompletedRequestRow, { request: req }, req.id)) });
1284
+ }
1285
+ if (errorList.length === 0) return /* @__PURE__ */ jsx(Text, {
1286
+ dimColor: true,
1287
+ children: "No errors"
1288
+ });
1289
+ return /* @__PURE__ */ jsx(Fragment, { children: errorList.slice(-contentHeight).reverse().map((req) => /* @__PURE__ */ jsx(ErrorRequestRow, { request: req }, req.id)) });
1290
+ }
1291
+ function TuiApp() {
1292
+ const [currentTab, setCurrentTab] = useState("active");
1293
+ const [, forceUpdate] = useState(0);
1294
+ const { stdout } = useStdout();
1295
+ useEffect(() => {
1296
+ const listener = () => forceUpdate((n) => n + 1);
1297
+ listeners.push(listener);
1298
+ return () => {
1299
+ const idx = listeners.indexOf(listener);
1300
+ if (idx !== -1) listeners.splice(idx, 1);
1301
+ };
1302
+ }, []);
1303
+ useInput((input, key) => {
1304
+ switch (input) {
1305
+ case "1":
1306
+ setCurrentTab("active");
1307
+ break;
1308
+ case "2":
1309
+ setCurrentTab("completed");
1310
+ break;
1311
+ case "3":
1312
+ setCurrentTab("errors");
1313
+ break;
1314
+ default: if (input === "q" || key.ctrl && input === "c") process.exit(0);
867
1315
  }
868
- this.processing = false;
1316
+ });
1317
+ const activeList = Array.from(tuiState.activeRequests.values());
1318
+ const completedList = tuiState.completedRequests;
1319
+ const errorList = tuiState.errorRequests;
1320
+ const counts = {
1321
+ active: activeList.length,
1322
+ completed: completedList.length,
1323
+ errors: errorList.length
1324
+ };
1325
+ const terminalHeight = stdout.rows || 24;
1326
+ const contentHeight = terminalHeight - 3 - 1 - 2;
1327
+ return /* @__PURE__ */ jsxs(Box, {
1328
+ flexDirection: "column",
1329
+ height: terminalHeight,
1330
+ children: [
1331
+ /* @__PURE__ */ jsx(TabHeader, {
1332
+ currentTab,
1333
+ counts
1334
+ }),
1335
+ /* @__PURE__ */ jsx(Box, {
1336
+ flexDirection: "column",
1337
+ height: contentHeight,
1338
+ borderStyle: "single",
1339
+ paddingX: 1,
1340
+ overflow: "hidden",
1341
+ children: /* @__PURE__ */ jsx(ContentPanel, {
1342
+ currentTab,
1343
+ activeList,
1344
+ completedList,
1345
+ errorList,
1346
+ contentHeight
1347
+ })
1348
+ }),
1349
+ /* @__PURE__ */ jsx(Box, {
1350
+ paddingX: 1,
1351
+ children: /* @__PURE__ */ jsxs(Text, {
1352
+ dimColor: true,
1353
+ children: [
1354
+ "copilot-api │ Active: ",
1355
+ counts.active,
1356
+ " │ Completed: ",
1357
+ counts.completed,
1358
+ " ",
1359
+ "│ Errors: ",
1360
+ counts.errors
1361
+ ]
1362
+ })
1363
+ })
1364
+ ]
1365
+ });
1366
+ }
1367
+ /**
1368
+ * Fullscreen TUI renderer using Ink
1369
+ * Provides interactive terminal interface with tabs
1370
+ */
1371
+ var FullscreenRenderer = class {
1372
+ inkInstance = null;
1373
+ maxHistory = 100;
1374
+ constructor(options) {
1375
+ if (options?.maxHistory !== void 0) this.maxHistory = options.maxHistory;
869
1376
  }
870
- get length() {
871
- return this.queue.length;
1377
+ start() {
1378
+ if (this.inkInstance) return;
1379
+ this.inkInstance = render(/* @__PURE__ */ jsx(TuiApp, {}), {});
1380
+ }
1381
+ onRequestStart(request) {
1382
+ tuiState.activeRequests.set(request.id, { ...request });
1383
+ notifyListeners();
1384
+ }
1385
+ onRequestUpdate(id, update) {
1386
+ const request = tuiState.activeRequests.get(id);
1387
+ if (!request) return;
1388
+ Object.assign(request, update);
1389
+ notifyListeners();
1390
+ }
1391
+ onRequestComplete(request) {
1392
+ tuiState.activeRequests.delete(request.id);
1393
+ if (request.status === "error" || (request.statusCode ?? 0) >= 400) {
1394
+ tuiState.errorRequests.push({ ...request });
1395
+ while (tuiState.errorRequests.length > this.maxHistory) tuiState.errorRequests.shift();
1396
+ }
1397
+ tuiState.completedRequests.push({ ...request });
1398
+ while (tuiState.completedRequests.length > this.maxHistory) tuiState.completedRequests.shift();
1399
+ notifyListeners();
1400
+ }
1401
+ destroy() {
1402
+ if (this.inkInstance) {
1403
+ this.inkInstance.unmount();
1404
+ this.inkInstance = null;
1405
+ }
1406
+ tuiState.activeRequests.clear();
1407
+ tuiState.completedRequests = [];
1408
+ tuiState.errorRequests = [];
872
1409
  }
873
1410
  };
874
- const requestQueue = new RequestQueue();
1411
+
1412
+ //#endregion
1413
+ //#region src/lib/tui/tracker.ts
1414
+ function generateId() {
1415
+ return Date.now().toString(36) + Math.random().toString(36).slice(2, 6);
1416
+ }
1417
+ var RequestTracker = class {
1418
+ requests = /* @__PURE__ */ new Map();
1419
+ renderer = null;
1420
+ completedQueue = [];
1421
+ historySize = 5;
1422
+ completedDisplayMs = 2e3;
1423
+ setRenderer(renderer) {
1424
+ this.renderer = renderer;
1425
+ }
1426
+ setOptions(options) {
1427
+ if (options.historySize !== void 0) this.historySize = options.historySize;
1428
+ if (options.completedDisplayMs !== void 0) this.completedDisplayMs = options.completedDisplayMs;
1429
+ }
1430
+ /**
1431
+ * Start tracking a new request
1432
+ * Returns the tracking ID
1433
+ */
1434
+ startRequest(options) {
1435
+ const id = generateId();
1436
+ const request = {
1437
+ id,
1438
+ method: options.method,
1439
+ path: options.path,
1440
+ model: options.model,
1441
+ startTime: Date.now(),
1442
+ status: "executing",
1443
+ isHistoryAccess: options.isHistoryAccess
1444
+ };
1445
+ this.requests.set(id, request);
1446
+ this.renderer?.onRequestStart(request);
1447
+ return id;
1448
+ }
1449
+ /**
1450
+ * Update request status
1451
+ */
1452
+ updateRequest(id, update) {
1453
+ const request = this.requests.get(id);
1454
+ if (!request) return;
1455
+ if (update.status !== void 0) request.status = update.status;
1456
+ if (update.statusCode !== void 0) request.statusCode = update.statusCode;
1457
+ if (update.durationMs !== void 0) request.durationMs = update.durationMs;
1458
+ if (update.inputTokens !== void 0) request.inputTokens = update.inputTokens;
1459
+ if (update.outputTokens !== void 0) request.outputTokens = update.outputTokens;
1460
+ if (update.error !== void 0) request.error = update.error;
1461
+ if (update.queuePosition !== void 0) request.queuePosition = update.queuePosition;
1462
+ this.renderer?.onRequestUpdate(id, update);
1463
+ }
1464
+ /**
1465
+ * Mark request as completed
1466
+ */
1467
+ completeRequest(id, statusCode, usage) {
1468
+ const request = this.requests.get(id);
1469
+ if (!request) return;
1470
+ request.status = statusCode >= 200 && statusCode < 400 ? "completed" : "error";
1471
+ request.statusCode = statusCode;
1472
+ request.durationMs = Date.now() - request.startTime;
1473
+ if (usage) {
1474
+ request.inputTokens = usage.inputTokens;
1475
+ request.outputTokens = usage.outputTokens;
1476
+ }
1477
+ this.renderer?.onRequestComplete(request);
1478
+ this.requests.delete(id);
1479
+ this.completedQueue.push(request);
1480
+ while (this.completedQueue.length > this.historySize) this.completedQueue.shift();
1481
+ setTimeout(() => {
1482
+ const idx = this.completedQueue.indexOf(request);
1483
+ if (idx !== -1) this.completedQueue.splice(idx, 1);
1484
+ }, this.completedDisplayMs);
1485
+ }
1486
+ /**
1487
+ * Mark request as failed with error
1488
+ */
1489
+ failRequest(id, error) {
1490
+ const request = this.requests.get(id);
1491
+ if (!request) return;
1492
+ request.status = "error";
1493
+ request.error = error;
1494
+ request.durationMs = Date.now() - request.startTime;
1495
+ this.renderer?.onRequestComplete(request);
1496
+ this.requests.delete(id);
1497
+ this.completedQueue.push(request);
1498
+ while (this.completedQueue.length > this.historySize) this.completedQueue.shift();
1499
+ }
1500
+ /**
1501
+ * Get all active requests
1502
+ */
1503
+ getActiveRequests() {
1504
+ return Array.from(this.requests.values());
1505
+ }
1506
+ /**
1507
+ * Get recently completed requests
1508
+ */
1509
+ getCompletedRequests() {
1510
+ return [...this.completedQueue];
1511
+ }
1512
+ /**
1513
+ * Get request by ID
1514
+ */
1515
+ getRequest(id) {
1516
+ return this.requests.get(id);
1517
+ }
1518
+ /**
1519
+ * Clear all tracked requests
1520
+ */
1521
+ clear() {
1522
+ this.requests.clear();
1523
+ this.completedQueue = [];
1524
+ }
1525
+ };
1526
+ const requestTracker = new RequestTracker();
1527
+
1528
+ //#endregion
1529
+ //#region src/lib/tui/middleware.ts
875
1530
  /**
876
- * Execute a request with rate limiting via queue.
877
- * Requests are queued and processed sequentially at the configured rate.
1531
+ * Custom logger middleware that tracks requests through the TUI system
1532
+ * Shows single-line output: METHOD /path 200 1.2s 1.5K/500 model-name
1533
+ *
1534
+ * For streaming responses (SSE), the handler is responsible for calling
1535
+ * completeRequest after the stream finishes.
878
1536
  */
879
- async function executeWithRateLimit(state$1, execute) {
880
- if (state$1.rateLimitSeconds === void 0) return execute();
881
- return requestQueue.enqueue(execute, state$1.rateLimitSeconds);
1537
+ function tuiLogger() {
1538
+ return async (c, next) => {
1539
+ const method = c.req.method;
1540
+ const path$1 = c.req.path;
1541
+ const isHistoryAccess = path$1.startsWith("/history");
1542
+ const trackingId = requestTracker.startRequest({
1543
+ method,
1544
+ path: path$1,
1545
+ model: "",
1546
+ isHistoryAccess
1547
+ });
1548
+ c.set("trackingId", trackingId);
1549
+ try {
1550
+ await next();
1551
+ if ((c.res.headers.get("content-type") ?? "").includes("text/event-stream")) return;
1552
+ const status = c.res.status;
1553
+ const inputTokens = c.res.headers.get("x-input-tokens");
1554
+ const outputTokens = c.res.headers.get("x-output-tokens");
1555
+ const model = c.res.headers.get("x-model");
1556
+ if (model) {
1557
+ const request = requestTracker.getRequest(trackingId);
1558
+ if (request) request.model = model;
1559
+ }
1560
+ requestTracker.completeRequest(trackingId, status, inputTokens && outputTokens ? {
1561
+ inputTokens: Number.parseInt(inputTokens, 10),
1562
+ outputTokens: Number.parseInt(outputTokens, 10)
1563
+ } : void 0);
1564
+ } catch (error) {
1565
+ requestTracker.failRequest(trackingId, error instanceof Error ? error.message : "Unknown error");
1566
+ throw error;
1567
+ }
1568
+ };
1569
+ }
1570
+
1571
+ //#endregion
1572
+ //#region src/lib/tui/index.ts
1573
+ /**
1574
+ * Initialize the TUI system
1575
+ * @param options.mode - "console" for simple log output (default), "fullscreen" for interactive TUI
1576
+ */
1577
+ function initTui(options) {
1578
+ const enabled = options?.enabled ?? process.stdout.isTTY;
1579
+ const mode = options?.mode ?? "console";
1580
+ if (enabled) if (mode === "fullscreen") {
1581
+ const renderer = new FullscreenRenderer({ maxHistory: options?.historySize ?? 100 });
1582
+ requestTracker.setRenderer(renderer);
1583
+ renderer.start();
1584
+ } else {
1585
+ const renderer = new ConsoleRenderer();
1586
+ requestTracker.setRenderer(renderer);
1587
+ }
1588
+ if (options?.historySize !== void 0 || options?.completedDisplayMs !== void 0) requestTracker.setOptions({
1589
+ historySize: options.historySize,
1590
+ completedDisplayMs: options.completedDisplayMs
1591
+ });
882
1592
  }
883
1593
 
1594
+ //#endregion
1595
+ //#region src/lib/approval.ts
1596
+ const awaitApproval = async () => {
1597
+ if (!await consola.prompt(`Accept incoming request?`, { type: "confirm" })) throw new HTTPError("Request rejected", 403, JSON.stringify({ message: "Request rejected" }));
1598
+ };
1599
+
884
1600
  //#endregion
885
1601
  //#region src/lib/tokenizer.ts
886
1602
  const ENCODING_MAP = {
@@ -1085,6 +1801,231 @@ const getTokenCount = async (payload, model) => {
1085
1801
  };
1086
1802
  };
1087
1803
 
1804
+ //#endregion
1805
+ //#region src/lib/auto-compact.ts
1806
+ const DEFAULT_CONFIG = {
1807
+ targetTokens: 1e5,
1808
+ safetyMarginPercent: 10
1809
+ };
1810
+ /**
1811
+ * Check if payload needs compaction based on model limits.
1812
+ * Uses a safety margin to account for token counting differences.
1813
+ */
1814
+ async function checkNeedsCompaction(payload, model, safetyMarginPercent = 10) {
1815
+ const currentTokens = (await getTokenCount(payload, model)).input;
1816
+ const rawLimit = model.capabilities.limits.max_prompt_tokens ?? 128e3;
1817
+ const limit = Math.floor(rawLimit * (1 - safetyMarginPercent / 100));
1818
+ return {
1819
+ needed: currentTokens > limit,
1820
+ currentTokens,
1821
+ limit
1822
+ };
1823
+ }
1824
+ /**
1825
+ * Calculate approximate token count for a single message.
1826
+ * This is a fast estimation for splitting decisions.
1827
+ */
1828
+ function estimateMessageTokens(message) {
1829
+ let text = "";
1830
+ if (typeof message.content === "string") text = message.content;
1831
+ else if (Array.isArray(message.content)) {
1832
+ for (const part of message.content) if (part.type === "text") text += part.text;
1833
+ else if ("image_url" in part) text += part.image_url.url;
1834
+ }
1835
+ if (message.tool_calls) text += JSON.stringify(message.tool_calls);
1836
+ return Math.ceil(text.length / 4) + 10;
1837
+ }
1838
+ /**
1839
+ * Extract system messages from the beginning of the message list.
1840
+ */
1841
+ function extractSystemMessages(messages) {
1842
+ const systemMessages = [];
1843
+ let i = 0;
1844
+ while (i < messages.length) {
1845
+ const msg = messages[i];
1846
+ if (msg.role === "system" || msg.role === "developer") {
1847
+ systemMessages.push(msg);
1848
+ i++;
1849
+ } else break;
1850
+ }
1851
+ return {
1852
+ systemMessages,
1853
+ remainingMessages: messages.slice(i)
1854
+ };
1855
+ }
1856
+ /**
1857
+ * Find messages to keep from the end to stay under target tokens.
1858
+ * Returns the starting index of messages to preserve.
1859
+ */
1860
+ function findPreserveIndex(messages, targetTokens, systemTokens) {
1861
+ const availableTokens = targetTokens - systemTokens - 500;
1862
+ let accumulatedTokens = 0;
1863
+ for (let i = messages.length - 1; i >= 0; i--) {
1864
+ const msgTokens = estimateMessageTokens(messages[i]);
1865
+ if (accumulatedTokens + msgTokens > availableTokens) return i + 1;
1866
+ accumulatedTokens += msgTokens;
1867
+ }
1868
+ return 0;
1869
+ }
1870
+ /**
1871
+ * Calculate estimated tokens for system messages.
1872
+ */
1873
+ function estimateSystemTokens(systemMessages) {
1874
+ return systemMessages.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
1875
+ }
1876
+ /**
1877
+ * Create a truncation marker message.
1878
+ */
1879
+ function createTruncationMarker(removedCount) {
1880
+ return {
1881
+ role: "user",
1882
+ content: `[CONTEXT TRUNCATED: ${removedCount} earlier messages were removed to fit context limits. The conversation continues below.]`
1883
+ };
1884
+ }
1885
+ /**
1886
+ * Perform auto-compaction on a payload that exceeds token limits.
1887
+ * This uses simple truncation - no LLM calls required.
1888
+ */
1889
+ async function autoCompact(payload, model, config = {}) {
1890
+ const cfg = {
1891
+ ...DEFAULT_CONFIG,
1892
+ ...config
1893
+ };
1894
+ const originalTokens = (await getTokenCount(payload, model)).input;
1895
+ const rawLimit = model.capabilities.limits.max_prompt_tokens ?? 128e3;
1896
+ const limit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
1897
+ if (originalTokens <= limit) return {
1898
+ payload,
1899
+ wasCompacted: false,
1900
+ originalTokens,
1901
+ compactedTokens: originalTokens,
1902
+ removedMessageCount: 0
1903
+ };
1904
+ consola.info(`Auto-compact: ${originalTokens} tokens exceeds limit of ${limit}, truncating...`);
1905
+ const { systemMessages, remainingMessages } = extractSystemMessages(payload.messages);
1906
+ const systemTokens = estimateSystemTokens(systemMessages);
1907
+ consola.debug(`Auto-compact: ${systemMessages.length} system messages (~${systemTokens} tokens)`);
1908
+ const effectiveTarget = Math.min(cfg.targetTokens, limit);
1909
+ const preserveIndex = findPreserveIndex(remainingMessages, effectiveTarget, systemTokens);
1910
+ if (preserveIndex === 0) {
1911
+ consola.warn("Auto-compact: Cannot truncate further without losing all conversation history");
1912
+ return {
1913
+ payload,
1914
+ wasCompacted: false,
1915
+ originalTokens,
1916
+ compactedTokens: originalTokens,
1917
+ removedMessageCount: 0
1918
+ };
1919
+ }
1920
+ const removedMessages = remainingMessages.slice(0, preserveIndex);
1921
+ const preservedMessages = remainingMessages.slice(preserveIndex);
1922
+ consola.info(`Auto-compact: Removing ${removedMessages.length} messages, keeping ${preservedMessages.length}`);
1923
+ const truncationMarker = createTruncationMarker(removedMessages.length);
1924
+ const newPayload = {
1925
+ ...payload,
1926
+ messages: [
1927
+ ...systemMessages,
1928
+ truncationMarker,
1929
+ ...preservedMessages
1930
+ ]
1931
+ };
1932
+ const newTokenCount = await getTokenCount(newPayload, model);
1933
+ consola.info(`Auto-compact: Reduced from ${originalTokens} to ${newTokenCount.input} tokens`);
1934
+ if (newTokenCount.input > limit) {
1935
+ consola.warn(`Auto-compact: Still over limit (${newTokenCount.input} > ${limit}), trying more aggressive truncation`);
1936
+ const aggressiveTarget = Math.floor(effectiveTarget * .7);
1937
+ if (aggressiveTarget < 2e4) {
1938
+ consola.error("Auto-compact: Cannot reduce further, target too low");
1939
+ return {
1940
+ payload: newPayload,
1941
+ wasCompacted: true,
1942
+ originalTokens,
1943
+ compactedTokens: newTokenCount.input,
1944
+ removedMessageCount: removedMessages.length
1945
+ };
1946
+ }
1947
+ return autoCompact(payload, model, {
1948
+ ...cfg,
1949
+ targetTokens: aggressiveTarget
1950
+ });
1951
+ }
1952
+ return {
1953
+ payload: newPayload,
1954
+ wasCompacted: true,
1955
+ originalTokens,
1956
+ compactedTokens: newTokenCount.input,
1957
+ removedMessageCount: removedMessages.length
1958
+ };
1959
+ }
1960
+ /**
1961
+ * Create a marker to append to responses indicating auto-compaction occurred.
1962
+ */
1963
+ function createCompactionMarker(result) {
1964
+ if (!result.wasCompacted) return "";
1965
+ const reduction = result.originalTokens - result.compactedTokens;
1966
+ const percentage = Math.round(reduction / result.originalTokens * 100);
1967
+ return `\n\n---\n[Auto-compacted: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
1968
+ }
1969
+
1970
+ //#endregion
1971
+ //#region src/lib/queue.ts
1972
+ var RequestQueue = class {
1973
+ queue = [];
1974
+ processing = false;
1975
+ lastRequestTime = 0;
1976
+ async enqueue(execute, rateLimitSeconds) {
1977
+ return new Promise((resolve, reject) => {
1978
+ this.queue.push({
1979
+ execute,
1980
+ resolve,
1981
+ reject
1982
+ });
1983
+ if (this.queue.length > 1) {
1984
+ const position = this.queue.length;
1985
+ const waitTime = Math.ceil((position - 1) * rateLimitSeconds);
1986
+ (waitTime > 10 ? consola.warn : consola.info)(`Rate limit: request queued (position ${position}, ~${waitTime}s wait)`);
1987
+ }
1988
+ this.processQueue(rateLimitSeconds);
1989
+ });
1990
+ }
1991
+ async processQueue(rateLimitSeconds) {
1992
+ if (this.processing) return;
1993
+ this.processing = true;
1994
+ while (this.queue.length > 0) {
1995
+ const elapsedMs = Date.now() - this.lastRequestTime;
1996
+ const requiredMs = rateLimitSeconds * 1e3;
1997
+ if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
1998
+ const waitMs = requiredMs - elapsedMs;
1999
+ const waitSec = Math.ceil(waitMs / 1e3);
2000
+ (waitSec > 10 ? consola.warn : consola.info)(`Rate limit: waiting ${waitSec}s before next request...`);
2001
+ await new Promise((resolve) => setTimeout(resolve, waitMs));
2002
+ }
2003
+ const request = this.queue.shift();
2004
+ if (!request) break;
2005
+ this.lastRequestTime = Date.now();
2006
+ try {
2007
+ const result = await request.execute();
2008
+ request.resolve(result);
2009
+ } catch (error) {
2010
+ request.reject(error);
2011
+ }
2012
+ }
2013
+ this.processing = false;
2014
+ }
2015
+ get length() {
2016
+ return this.queue.length;
2017
+ }
2018
+ };
2019
+ const requestQueue = new RequestQueue();
2020
+ /**
2021
+ * Execute a request with rate limiting via queue.
2022
+ * Requests are queued and processed sequentially at the configured rate.
2023
+ */
2024
+ async function executeWithRateLimit(state$1, execute) {
2025
+ if (state$1.rateLimitSeconds === void 0) return execute();
2026
+ return requestQueue.enqueue(execute, state$1.rateLimitSeconds);
2027
+ }
2028
+
1088
2029
  //#endregion
1089
2030
  //#region src/services/copilot/create-chat-completions.ts
1090
2031
  const createChatCompletions = async (payload) => {
@@ -1104,176 +2045,329 @@ const createChatCompletions = async (payload) => {
1104
2045
  consola.error("Failed to create chat completions", response);
1105
2046
  throw await HTTPError.fromResponse("Failed to create chat completions", response);
1106
2047
  }
1107
- if (payload.stream) return events(response);
1108
- return await response.json();
1109
- };
1110
-
1111
- //#endregion
1112
- //#region src/routes/chat-completions/handler.ts
1113
- async function handleCompletion$1(c) {
1114
- const startTime = Date.now();
1115
- let payload = await c.req.json();
1116
- consola.debug("Request payload:", JSON.stringify(payload).slice(-400));
1117
- const historyId = recordRequest("openai", {
1118
- model: payload.model,
1119
- messages: convertOpenAIMessages(payload.messages),
1120
- stream: payload.stream ?? false,
1121
- tools: payload.tools?.map((t) => ({
1122
- name: t.function.name,
1123
- description: t.function.description
1124
- })),
1125
- max_tokens: payload.max_tokens ?? void 0,
1126
- temperature: payload.temperature ?? void 0
1127
- });
1128
- const selectedModel = state.models?.data.find((model) => model.id === payload.model);
2048
+ if (payload.stream) return events(response);
2049
+ return await response.json();
2050
+ };
2051
+
2052
+ //#endregion
2053
+ //#region src/routes/chat-completions/handler.ts
2054
+ async function handleCompletion$1(c) {
2055
+ const startTime = Date.now();
2056
+ const originalPayload = await c.req.json();
2057
+ consola.debug("Request payload:", JSON.stringify(originalPayload).slice(-400));
2058
+ const trackingId = c.get("trackingId");
2059
+ updateTrackerModel$1(trackingId, originalPayload.model);
2060
+ const ctx = {
2061
+ historyId: recordRequest("openai", {
2062
+ model: originalPayload.model,
2063
+ messages: convertOpenAIMessages(originalPayload.messages),
2064
+ stream: originalPayload.stream ?? false,
2065
+ tools: originalPayload.tools?.map((t) => ({
2066
+ name: t.function.name,
2067
+ description: t.function.description
2068
+ })),
2069
+ max_tokens: originalPayload.max_tokens ?? void 0,
2070
+ temperature: originalPayload.temperature ?? void 0
2071
+ }),
2072
+ trackingId,
2073
+ startTime
2074
+ };
2075
+ const selectedModel = state.models?.data.find((model) => model.id === originalPayload.model);
2076
+ await logTokenCount(originalPayload, selectedModel);
2077
+ const { finalPayload, compactResult } = await buildFinalPayload$1(originalPayload, selectedModel);
2078
+ if (compactResult) ctx.compactResult = compactResult;
2079
+ const payload = isNullish(finalPayload.max_tokens) ? {
2080
+ ...finalPayload,
2081
+ max_tokens: selectedModel?.capabilities.limits.max_output_tokens
2082
+ } : finalPayload;
2083
+ if (isNullish(originalPayload.max_tokens)) consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens));
2084
+ if (state.manualApprove) await awaitApproval();
2085
+ try {
2086
+ const response = await executeWithRateLimit(state, () => createChatCompletions(payload));
2087
+ if (isNonStreaming$1(response)) return handleNonStreamingResponse$1(c, response, ctx);
2088
+ consola.debug("Streaming response");
2089
+ updateTrackerStatus$1(trackingId, "streaming");
2090
+ return streamSSE(c, async (stream) => {
2091
+ await handleStreamingResponse$1({
2092
+ stream,
2093
+ response,
2094
+ payload,
2095
+ ctx
2096
+ });
2097
+ });
2098
+ } catch (error) {
2099
+ recordErrorResponse$1(ctx, payload.model, error);
2100
+ throw error;
2101
+ }
2102
+ }
2103
+ async function buildFinalPayload$1(payload, model) {
2104
+ if (!state.autoCompact || !model) {
2105
+ if (state.autoCompact && !model) consola.warn(`Auto-compact: Model '${payload.model}' not found in cached models, skipping`);
2106
+ return {
2107
+ finalPayload: payload,
2108
+ compactResult: null
2109
+ };
2110
+ }
2111
+ try {
2112
+ const check = await checkNeedsCompaction(payload, model);
2113
+ consola.debug(`Auto-compact check: ${check.currentTokens} tokens, limit ${check.limit}, needed: ${check.needed}`);
2114
+ if (!check.needed) return {
2115
+ finalPayload: payload,
2116
+ compactResult: null
2117
+ };
2118
+ consola.info(`Auto-compact triggered: ${check.currentTokens} tokens > ${check.limit} limit`);
2119
+ const compactResult = await autoCompact(payload, model);
2120
+ return {
2121
+ finalPayload: compactResult.payload,
2122
+ compactResult
2123
+ };
2124
+ } catch (error) {
2125
+ consola.warn("Auto-compact failed, proceeding with original payload:", error);
2126
+ return {
2127
+ finalPayload: payload,
2128
+ compactResult: null
2129
+ };
2130
+ }
2131
+ }
2132
+ async function logTokenCount(payload, selectedModel) {
1129
2133
  try {
1130
2134
  if (selectedModel) {
1131
2135
  const tokenCount = await getTokenCount(payload, selectedModel);
1132
- consola.info("Current token count:", tokenCount);
1133
- } else consola.warn("No model selected, skipping token count calculation");
2136
+ consola.debug("Current token count:", tokenCount);
2137
+ } else consola.debug("No model selected, skipping token count calculation");
1134
2138
  } catch (error) {
1135
- consola.warn("Failed to calculate token count:", error);
2139
+ consola.debug("Failed to calculate token count:", error);
1136
2140
  }
1137
- if (state.manualApprove) await awaitApproval();
1138
- if (isNullish(payload.max_tokens)) {
1139
- payload = {
1140
- ...payload,
1141
- max_tokens: selectedModel?.capabilities.limits.max_output_tokens
2141
+ }
2142
+ function updateTrackerModel$1(trackingId, model) {
2143
+ if (!trackingId) return;
2144
+ const request = requestTracker.getRequest(trackingId);
2145
+ if (request) request.model = model;
2146
+ }
2147
+ function updateTrackerStatus$1(trackingId, status) {
2148
+ if (!trackingId) return;
2149
+ requestTracker.updateRequest(trackingId, { status });
2150
+ }
2151
+ function recordErrorResponse$1(ctx, model, error) {
2152
+ recordResponse(ctx.historyId, {
2153
+ success: false,
2154
+ model,
2155
+ usage: {
2156
+ input_tokens: 0,
2157
+ output_tokens: 0
2158
+ },
2159
+ error: error instanceof Error ? error.message : "Unknown error",
2160
+ content: null
2161
+ }, Date.now() - ctx.startTime);
2162
+ }
2163
+ function handleNonStreamingResponse$1(c, originalResponse, ctx) {
2164
+ consola.debug("Non-streaming response:", JSON.stringify(originalResponse));
2165
+ let response = originalResponse;
2166
+ if (ctx.compactResult?.wasCompacted && response.choices[0]?.message.content) {
2167
+ const marker = createCompactionMarker(ctx.compactResult);
2168
+ response = {
2169
+ ...response,
2170
+ choices: response.choices.map((choice$1, i) => i === 0 ? {
2171
+ ...choice$1,
2172
+ message: {
2173
+ ...choice$1.message,
2174
+ content: (choice$1.message.content ?? "") + marker
2175
+ }
2176
+ } : choice$1)
1142
2177
  };
1143
- consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens));
1144
2178
  }
2179
+ const choice = response.choices[0];
2180
+ const usage = response.usage;
2181
+ recordResponse(ctx.historyId, {
2182
+ success: true,
2183
+ model: response.model,
2184
+ usage: {
2185
+ input_tokens: usage?.prompt_tokens ?? 0,
2186
+ output_tokens: usage?.completion_tokens ?? 0
2187
+ },
2188
+ stop_reason: choice.finish_reason,
2189
+ content: buildResponseContent(choice),
2190
+ toolCalls: extractToolCalls(choice)
2191
+ }, Date.now() - ctx.startTime);
2192
+ if (ctx.trackingId && usage) requestTracker.updateRequest(ctx.trackingId, {
2193
+ inputTokens: usage.prompt_tokens,
2194
+ outputTokens: usage.completion_tokens
2195
+ });
2196
+ return c.json(response);
2197
+ }
2198
+ function buildResponseContent(choice) {
2199
+ return {
2200
+ role: choice.message.role,
2201
+ content: typeof choice.message.content === "string" ? choice.message.content : JSON.stringify(choice.message.content),
2202
+ tool_calls: choice.message.tool_calls?.map((tc) => ({
2203
+ id: tc.id,
2204
+ type: tc.type,
2205
+ function: {
2206
+ name: tc.function.name,
2207
+ arguments: tc.function.arguments
2208
+ }
2209
+ }))
2210
+ };
2211
+ }
2212
+ function extractToolCalls(choice) {
2213
+ return choice.message.tool_calls?.map((tc) => ({
2214
+ id: tc.id,
2215
+ name: tc.function.name,
2216
+ input: tc.function.arguments
2217
+ }));
2218
+ }
2219
+ function createStreamAccumulator() {
2220
+ return {
2221
+ model: "",
2222
+ inputTokens: 0,
2223
+ outputTokens: 0,
2224
+ finishReason: "",
2225
+ content: "",
2226
+ toolCalls: [],
2227
+ toolCallMap: /* @__PURE__ */ new Map()
2228
+ };
2229
+ }
2230
+ async function handleStreamingResponse$1(opts) {
2231
+ const { stream, response, payload, ctx } = opts;
2232
+ const acc = createStreamAccumulator();
1145
2233
  try {
1146
- const response = await executeWithRateLimit(state, () => createChatCompletions(payload));
1147
- if (isNonStreaming$1(response)) {
1148
- consola.debug("Non-streaming response:", JSON.stringify(response));
1149
- const choice = response.choices[0];
1150
- recordResponse(historyId, {
1151
- success: true,
1152
- model: response.model,
1153
- usage: {
1154
- input_tokens: response.usage?.prompt_tokens ?? 0,
1155
- output_tokens: response.usage?.completion_tokens ?? 0
1156
- },
1157
- stop_reason: choice?.finish_reason ?? void 0,
1158
- content: choice?.message ? {
1159
- role: choice.message.role,
1160
- content: typeof choice.message.content === "string" ? choice.message.content : JSON.stringify(choice.message.content),
1161
- tool_calls: choice.message.tool_calls?.map((tc) => ({
1162
- id: tc.id,
1163
- type: tc.type,
1164
- function: {
1165
- name: tc.function.name,
1166
- arguments: tc.function.arguments
1167
- }
1168
- }))
1169
- } : null,
1170
- toolCalls: choice?.message?.tool_calls?.map((tc) => ({
1171
- id: tc.id,
1172
- name: tc.function.name,
1173
- input: tc.function.arguments
1174
- }))
1175
- }, Date.now() - startTime);
1176
- return c.json(response);
2234
+ for await (const chunk of response) {
2235
+ consola.debug("Streaming chunk:", JSON.stringify(chunk));
2236
+ parseStreamChunk(chunk, acc);
2237
+ await stream.writeSSE(chunk);
1177
2238
  }
1178
- consola.debug("Streaming response");
1179
- return streamSSE(c, async (stream) => {
1180
- let streamModel = "";
1181
- let streamInputTokens = 0;
1182
- let streamOutputTokens = 0;
1183
- let streamFinishReason = "";
1184
- let streamContent = "";
1185
- const streamToolCalls = [];
1186
- const toolCallAccumulators = /* @__PURE__ */ new Map();
1187
- try {
1188
- for await (const chunk of response) {
1189
- consola.debug("Streaming chunk:", JSON.stringify(chunk));
1190
- if (chunk.data && chunk.data !== "[DONE]") try {
1191
- const parsed = JSON.parse(chunk.data);
1192
- if (parsed.model && !streamModel) streamModel = parsed.model;
1193
- if (parsed.usage) {
1194
- streamInputTokens = parsed.usage.prompt_tokens;
1195
- streamOutputTokens = parsed.usage.completion_tokens;
1196
- }
1197
- const choice = parsed.choices[0];
1198
- if (choice?.delta?.content) streamContent += choice.delta.content;
1199
- if (choice?.delta?.tool_calls) for (const tc of choice.delta.tool_calls) {
1200
- const idx = tc.index;
1201
- if (!toolCallAccumulators.has(idx)) toolCallAccumulators.set(idx, {
1202
- id: tc.id || "",
1203
- name: tc.function?.name || "",
1204
- arguments: ""
1205
- });
1206
- const acc = toolCallAccumulators.get(idx);
1207
- if (acc) {
1208
- if (tc.id) acc.id = tc.id;
1209
- if (tc.function?.name) acc.name = tc.function.name;
1210
- if (tc.function?.arguments) acc.arguments += tc.function.arguments;
1211
- }
1212
- }
1213
- if (choice?.finish_reason) streamFinishReason = choice.finish_reason;
1214
- } catch {}
1215
- await stream.writeSSE(chunk);
1216
- }
1217
- for (const tc of toolCallAccumulators.values()) if (tc.id && tc.name) streamToolCalls.push({
1218
- id: tc.id,
1219
- name: tc.name,
1220
- arguments: tc.arguments
1221
- });
1222
- const toolCallsForContent = streamToolCalls.map((tc) => ({
1223
- id: tc.id,
1224
- type: "function",
1225
- function: {
1226
- name: tc.name,
1227
- arguments: tc.arguments
1228
- }
1229
- }));
1230
- recordResponse(historyId, {
1231
- success: true,
1232
- model: streamModel || payload.model,
1233
- usage: {
1234
- input_tokens: streamInputTokens,
1235
- output_tokens: streamOutputTokens
1236
- },
1237
- stop_reason: streamFinishReason || void 0,
1238
- content: {
1239
- role: "assistant",
1240
- content: streamContent || void 0,
1241
- tool_calls: toolCallsForContent.length > 0 ? toolCallsForContent : void 0
1242
- },
1243
- toolCalls: streamToolCalls.length > 0 ? streamToolCalls.map((tc) => ({
1244
- id: tc.id,
1245
- name: tc.name,
1246
- input: tc.arguments
1247
- })) : void 0
1248
- }, Date.now() - startTime);
1249
- } catch (error) {
1250
- recordResponse(historyId, {
1251
- success: false,
1252
- model: streamModel || payload.model,
1253
- usage: {
1254
- input_tokens: 0,
1255
- output_tokens: 0
1256
- },
1257
- error: error instanceof Error ? error.message : "Stream error",
1258
- content: null
1259
- }, Date.now() - startTime);
1260
- throw error;
1261
- }
1262
- });
2239
+ if (ctx.compactResult?.wasCompacted) {
2240
+ const marker = createCompactionMarker(ctx.compactResult);
2241
+ const markerChunk = {
2242
+ id: `compact-marker-${Date.now()}`,
2243
+ object: "chat.completion.chunk",
2244
+ created: Math.floor(Date.now() / 1e3),
2245
+ model: acc.model || payload.model,
2246
+ choices: [{
2247
+ index: 0,
2248
+ delta: { content: marker },
2249
+ finish_reason: null,
2250
+ logprobs: null
2251
+ }]
2252
+ };
2253
+ await stream.writeSSE({
2254
+ data: JSON.stringify(markerChunk),
2255
+ event: "message"
2256
+ });
2257
+ acc.content += marker;
2258
+ }
2259
+ recordStreamSuccess(acc, payload.model, ctx);
2260
+ completeTracking$1(ctx.trackingId, acc.inputTokens, acc.outputTokens);
1263
2261
  } catch (error) {
1264
- recordResponse(historyId, {
1265
- success: false,
1266
- model: payload.model,
1267
- usage: {
1268
- input_tokens: 0,
1269
- output_tokens: 0
1270
- },
1271
- error: error instanceof Error ? error.message : "Unknown error",
1272
- content: null
1273
- }, Date.now() - startTime);
2262
+ recordStreamError({
2263
+ acc,
2264
+ fallbackModel: payload.model,
2265
+ ctx,
2266
+ error
2267
+ });
2268
+ failTracking$1(ctx.trackingId, error);
1274
2269
  throw error;
1275
2270
  }
1276
2271
  }
2272
+ function parseStreamChunk(chunk, acc) {
2273
+ if (!chunk.data || chunk.data === "[DONE]") return;
2274
+ try {
2275
+ const parsed = JSON.parse(chunk.data);
2276
+ accumulateModel(parsed, acc);
2277
+ accumulateUsage(parsed, acc);
2278
+ accumulateChoice(parsed.choices[0], acc);
2279
+ } catch {}
2280
+ }
2281
+ function accumulateModel(parsed, acc) {
2282
+ if (parsed.model && !acc.model) acc.model = parsed.model;
2283
+ }
2284
+ function accumulateUsage(parsed, acc) {
2285
+ if (parsed.usage) {
2286
+ acc.inputTokens = parsed.usage.prompt_tokens;
2287
+ acc.outputTokens = parsed.usage.completion_tokens;
2288
+ }
2289
+ }
2290
+ function accumulateChoice(choice, acc) {
2291
+ if (!choice) return;
2292
+ if (choice.delta.content) acc.content += choice.delta.content;
2293
+ if (choice.delta.tool_calls) accumulateToolCalls(choice.delta.tool_calls, acc);
2294
+ if (choice.finish_reason) acc.finishReason = choice.finish_reason;
2295
+ }
2296
+ function accumulateToolCalls(toolCalls, acc) {
2297
+ if (!toolCalls) return;
2298
+ for (const tc of toolCalls) {
2299
+ const idx = tc.index;
2300
+ if (!acc.toolCallMap.has(idx)) acc.toolCallMap.set(idx, {
2301
+ id: tc.id ?? "",
2302
+ name: tc.function?.name ?? "",
2303
+ arguments: ""
2304
+ });
2305
+ const item = acc.toolCallMap.get(idx);
2306
+ if (item) {
2307
+ if (tc.id) item.id = tc.id;
2308
+ if (tc.function?.name) item.name = tc.function.name;
2309
+ if (tc.function?.arguments) item.arguments += tc.function.arguments;
2310
+ }
2311
+ }
2312
+ }
2313
+ function recordStreamSuccess(acc, fallbackModel, ctx) {
2314
+ for (const tc of acc.toolCallMap.values()) if (tc.id && tc.name) acc.toolCalls.push(tc);
2315
+ const toolCalls = acc.toolCalls.map((tc) => ({
2316
+ id: tc.id,
2317
+ type: "function",
2318
+ function: {
2319
+ name: tc.name,
2320
+ arguments: tc.arguments
2321
+ }
2322
+ }));
2323
+ recordResponse(ctx.historyId, {
2324
+ success: true,
2325
+ model: acc.model || fallbackModel,
2326
+ usage: {
2327
+ input_tokens: acc.inputTokens,
2328
+ output_tokens: acc.outputTokens
2329
+ },
2330
+ stop_reason: acc.finishReason || void 0,
2331
+ content: {
2332
+ role: "assistant",
2333
+ content: acc.content,
2334
+ tool_calls: toolCalls.length > 0 ? toolCalls : void 0
2335
+ },
2336
+ toolCalls: acc.toolCalls.length > 0 ? acc.toolCalls.map((tc) => ({
2337
+ id: tc.id,
2338
+ name: tc.name,
2339
+ input: tc.arguments
2340
+ })) : void 0
2341
+ }, Date.now() - ctx.startTime);
2342
+ }
2343
+ function recordStreamError(opts) {
2344
+ const { acc, fallbackModel, ctx, error } = opts;
2345
+ recordResponse(ctx.historyId, {
2346
+ success: false,
2347
+ model: acc.model || fallbackModel,
2348
+ usage: {
2349
+ input_tokens: 0,
2350
+ output_tokens: 0
2351
+ },
2352
+ error: error instanceof Error ? error.message : "Stream error",
2353
+ content: null
2354
+ }, Date.now() - ctx.startTime);
2355
+ }
2356
+ function completeTracking$1(trackingId, inputTokens, outputTokens) {
2357
+ if (!trackingId) return;
2358
+ requestTracker.updateRequest(trackingId, {
2359
+ inputTokens,
2360
+ outputTokens
2361
+ });
2362
+ requestTracker.completeRequest(trackingId, 200, {
2363
+ inputTokens,
2364
+ outputTokens
2365
+ });
2366
+ }
2367
+ function failTracking$1(trackingId, error) {
2368
+ if (!trackingId) return;
2369
+ requestTracker.failRequest(trackingId, error instanceof Error ? error.message : "Stream error");
2370
+ }
1277
2371
  const isNonStreaming$1 = (response) => Object.hasOwn(response, "choices");
1278
2372
  function convertOpenAIMessages(messages) {
1279
2373
  return messages.map((msg) => {
@@ -1461,6 +2555,78 @@ function getContentText(content) {
1461
2555
  return JSON.stringify(content, null, 2);
1462
2556
  }
1463
2557
 
2558
+ // Extract real user text, skipping system tags like <system-reminder>, <ide_opened_file>, etc.
2559
+ function extractRealUserText(content) {
2560
+ if (!content) return '';
2561
+ let text = '';
2562
+ if (typeof content === 'string') {
2563
+ text = content;
2564
+ } else if (Array.isArray(content)) {
2565
+ text = content
2566
+ .filter(c => c.type === 'text' && c.text)
2567
+ .map(c => c.text)
2568
+ .join('\\n');
2569
+ }
2570
+ if (!text) return '';
2571
+
2572
+ // Remove system tags and their content
2573
+ const systemTags = [
2574
+ 'system-reminder',
2575
+ 'ide_opened_file',
2576
+ 'ide_selection',
2577
+ 'ide_visible_files',
2578
+ 'ide_diagnostics',
2579
+ 'ide_cursor_position',
2580
+ 'user-prompt-submit-hook',
2581
+ 'antml:function_calls',
2582
+ 'antml:invoke',
2583
+ 'antml:parameter'
2584
+ ];
2585
+
2586
+ let cleaned = text;
2587
+ for (const tag of systemTags) {
2588
+ // Remove <tag>...</tag> blocks (including multiline)
2589
+ const regex = new RegExp('<' + tag + '[^>]*>[\\\\s\\\\S]*?</' + tag + '>', 'gi');
2590
+ cleaned = cleaned.replace(regex, '');
2591
+ // Remove self-closing <tag ... /> or <tag ...>content without closing
2592
+ const selfClosingRegex = new RegExp('<' + tag + '[^>]*/>', 'gi');
2593
+ cleaned = cleaned.replace(selfClosingRegex, '');
2594
+ }
2595
+
2596
+ // Trim whitespace and return
2597
+ return cleaned.trim();
2598
+ }
2599
+
2600
+ // Get preview text from assistant message content
2601
+ function getAssistantPreview(content) {
2602
+ if (!content) return '';
2603
+ if (typeof content === 'string') {
2604
+ const text = content.trim();
2605
+ if (text.length > 0) {
2606
+ return text.length > 80 ? text.slice(0, 80) + '...' : text;
2607
+ }
2608
+ return '';
2609
+ }
2610
+ if (Array.isArray(content)) {
2611
+ // First try to get text content
2612
+ const textParts = content.filter(c => c.type === 'text' && c.text).map(c => c.text);
2613
+ if (textParts.length > 0) {
2614
+ const text = textParts.join('\\n').trim();
2615
+ if (text.length > 0) {
2616
+ return text.length > 80 ? text.slice(0, 80) + '...' : text;
2617
+ }
2618
+ }
2619
+ // If no text, show tool_use info
2620
+ const toolUses = content.filter(c => c.type === 'tool_use');
2621
+ if (toolUses.length === 1) {
2622
+ return '[tool_use: ' + toolUses[0].name + ']';
2623
+ } else if (toolUses.length > 1) {
2624
+ return '[' + toolUses.length + ' tool_uses]';
2625
+ }
2626
+ }
2627
+ return '';
2628
+ }
2629
+
1464
2630
  function formatContentForDisplay(content) {
1465
2631
  if (!content) return { summary: '', raw: 'null' };
1466
2632
  if (typeof content === 'string') return { summary: content, raw: JSON.stringify(content) };
@@ -1516,6 +2682,7 @@ async function loadSessions() {
1516
2682
  for (const s of data.sessions) {
1517
2683
  const isActive = currentSessionId === s.id;
1518
2684
  const shortId = s.id.slice(0, 8);
2685
+ const toolCount = s.toolsUsed ? s.toolsUsed.length : 0;
1519
2686
  html += \`
1520
2687
  <div class="session-item\${isActive ? ' active' : ''}" onclick="selectSession('\${s.id}')">
1521
2688
  <div class="session-meta">
@@ -1526,6 +2693,7 @@ async function loadSessions() {
1526
2693
  <span style="color:var(--text-dim);font-family:monospace;font-size:10px;">\${shortId}</span>
1527
2694
  <span>\${s.requestCount} req</span>
1528
2695
  <span>\${formatNumber(s.totalInputTokens + s.totalOutputTokens)} tok</span>
2696
+ \${toolCount > 0 ? '<span class="badge tool">' + toolCount + ' tools</span>' : ''}
1529
2697
  <span class="badge \${s.endpoint}">\${s.endpoint}</span>
1530
2698
  </div>
1531
2699
  </div>
@@ -1584,6 +2752,37 @@ async function loadEntries() {
1584
2752
  const tokens = e.response ? formatNumber(e.response.usage.input_tokens) + '/' + formatNumber(e.response.usage.output_tokens) : '-';
1585
2753
  const shortId = e.id.slice(0, 8);
1586
2754
 
2755
+ // Get preview: show meaningful context about the request
2756
+ let lastUserMsg = '';
2757
+ const messages = e.request.messages;
2758
+ const lastMsg = messages[messages.length - 1];
2759
+
2760
+ // If last message is tool_result, look at the previous assistant message for context
2761
+ if (lastMsg && lastMsg.role === 'user') {
2762
+ const content = lastMsg.content;
2763
+ if (Array.isArray(content) && content.length > 0 && content[0].type === 'tool_result') {
2764
+ // This is a tool_result response - look for previous assistant message
2765
+ const prevMsg = messages.length >= 2 ? messages[messages.length - 2] : null;
2766
+ if (prevMsg && prevMsg.role === 'assistant') {
2767
+ lastUserMsg = getAssistantPreview(prevMsg.content);
2768
+ }
2769
+ // If no meaningful preview from assistant, show tool_result count
2770
+ if (!lastUserMsg) {
2771
+ const toolResults = content.filter(c => c.type === 'tool_result');
2772
+ lastUserMsg = '[' + toolResults.length + ' tool_result' + (toolResults.length > 1 ? 's' : '') + ']';
2773
+ }
2774
+ } else {
2775
+ // Regular user message, extract real text
2776
+ const realText = extractRealUserText(lastMsg.content);
2777
+ if (realText.length > 0) {
2778
+ lastUserMsg = realText.slice(0, 80);
2779
+ if (realText.length > 80) lastUserMsg += '...';
2780
+ }
2781
+ }
2782
+ } else if (lastMsg && lastMsg.role === 'assistant') {
2783
+ lastUserMsg = getAssistantPreview(lastMsg.content);
2784
+ }
2785
+
1587
2786
  html += \`
1588
2787
  <div class="entry-item\${isSelected ? ' selected' : ''}" onclick="showDetail('\${e.id}')">
1589
2788
  <div class="entry-header">
@@ -1596,6 +2795,7 @@ async function loadEntries() {
1596
2795
  <span class="entry-tokens">\${tokens}</span>
1597
2796
  <span class="entry-duration">\${formatDuration(e.durationMs)}</span>
1598
2797
  </div>
2798
+ \${lastUserMsg ? '<div class="entry-preview">' + escapeHtml(lastUserMsg) + '</div>' : ''}
1599
2799
  </div>
1600
2800
  \`;
1601
2801
  }
@@ -1655,7 +2855,7 @@ async function showDetail(id) {
1655
2855
  <div class="info-item"><div class="info-label">Duration</div><div class="info-value">\${formatDuration(entry.durationMs)}</div></div>
1656
2856
  <div class="info-item"><div class="info-label">Stop Reason</div><div class="info-value">\${entry.response.stop_reason || '-'}</div></div>
1657
2857
  </div>
1658
- \${entry.response.error ? '<div style="color:var(--error);margin-top:8px;">Error: ' + entry.response.error + '</div>' : ''}
2858
+ \${entry.response.error ? '<div class="error-detail"><div class="error-label">Error Details</div><pre class="error-content">' + escapeHtml(entry.response.error) + '</pre></div>' : ''}
1659
2859
  </div>
1660
2860
  \`;
1661
2861
  }
@@ -2023,6 +3223,14 @@ input::placeholder { color: var(--text-dim); }
2023
3223
  .entry-model { font-weight: 500; flex: 1; }
2024
3224
  .entry-tokens { font-size: 11px; color: var(--text-dim); }
2025
3225
  .entry-duration { font-size: 11px; color: var(--text-dim); min-width: 50px; text-align: right; }
3226
+ .entry-preview {
3227
+ padding: 0 16px 8px 16px;
3228
+ font-size: 11px;
3229
+ color: var(--text-muted);
3230
+ overflow: hidden;
3231
+ text-overflow: ellipsis;
3232
+ white-space: nowrap;
3233
+ }
2026
3234
 
2027
3235
  /* Badges */
2028
3236
  .badge {
@@ -2038,6 +3246,7 @@ input::placeholder { color: var(--text-dim); }
2038
3246
  .badge.anthropic { background: rgba(163, 113, 247, 0.15); color: var(--purple); }
2039
3247
  .badge.openai { background: rgba(210, 153, 34, 0.15); color: var(--warning); }
2040
3248
  .badge.stream { background: rgba(57, 197, 207, 0.15); color: var(--cyan); }
3249
+ .badge.tool { background: rgba(88, 166, 255, 0.15); color: var(--primary); }
2041
3250
 
2042
3251
  /* Detail panel */
2043
3252
  .detail-panel {
@@ -2133,6 +3342,32 @@ input::placeholder { color: var(--text-dim); }
2133
3342
  .info-label { font-size: 11px; color: var(--text-muted); }
2134
3343
  .info-value { font-weight: 500; }
2135
3344
 
3345
+ /* Error detail display */
3346
+ .error-detail {
3347
+ margin-top: 12px;
3348
+ padding: 12px;
3349
+ background: rgba(248, 81, 73, 0.1);
3350
+ border: 1px solid rgba(248, 81, 73, 0.3);
3351
+ border-radius: 6px;
3352
+ }
3353
+ .error-label {
3354
+ font-size: 11px;
3355
+ color: var(--error);
3356
+ font-weight: 600;
3357
+ margin-bottom: 8px;
3358
+ text-transform: uppercase;
3359
+ }
3360
+ .error-content {
3361
+ margin: 0;
3362
+ font-family: 'SF Mono', Monaco, 'Courier New', monospace;
3363
+ font-size: 12px;
3364
+ color: var(--error);
3365
+ white-space: pre-wrap;
3366
+ word-break: break-word;
3367
+ max-height: 300px;
3368
+ overflow-y: auto;
3369
+ }
3370
+
2136
3371
  /* Empty state */
2137
3372
  .empty-state {
2138
3373
  text-align: center;
@@ -2388,12 +3623,12 @@ function translateModelName(model) {
2388
3623
  haiku: "claude-haiku-4.5"
2389
3624
  };
2390
3625
  if (shortNameMap[model]) return shortNameMap[model];
2391
- if (model.match(/^claude-sonnet-4-5-\d+$/)) return "claude-sonnet-4.5";
2392
- if (model.match(/^claude-sonnet-4-\d+$/)) return "claude-sonnet-4";
2393
- if (model.match(/^claude-opus-4-5-\d+$/)) return "claude-opus-4.5";
2394
- if (model.match(/^claude-opus-4-\d+$/)) return "claude-opus-4.5";
2395
- if (model.match(/^claude-haiku-4-5-\d+$/)) return "claude-haiku-4.5";
2396
- if (model.match(/^claude-haiku-3-5-\d+$/)) return "claude-haiku-4.5";
3626
+ if (/^claude-sonnet-4-5-\d+$/.test(model)) return "claude-sonnet-4.5";
3627
+ if (/^claude-sonnet-4-\d+$/.test(model)) return "claude-sonnet-4";
3628
+ if (/^claude-opus-4-5-\d+$/.test(model)) return "claude-opus-4.5";
3629
+ if (/^claude-opus-4-\d+$/.test(model)) return "claude-opus-4.5";
3630
+ if (/^claude-haiku-4-5-\d+$/.test(model)) return "claude-haiku-4.5";
3631
+ if (/^claude-haiku-3-5-\d+$/.test(model)) return "claude-haiku-4.5";
2397
3632
  return model;
2398
3633
  }
2399
3634
  function translateAnthropicMessagesToOpenAI(anthropicMessages, system, toolNameMapping) {
@@ -2490,7 +3725,7 @@ function getTruncatedToolName(originalName, toolNameMapping) {
2490
3725
  if (existingTruncated) return existingTruncated;
2491
3726
  let hash = 0;
2492
3727
  for (let i = 0; i < originalName.length; i++) {
2493
- const char = originalName.charCodeAt(i);
3728
+ const char = originalName.codePointAt(i) ?? 0;
2494
3729
  hash = (hash << 5) - hash + char;
2495
3730
  hash = hash & hash;
2496
3731
  }
@@ -2527,8 +3762,9 @@ function translateAnthropicToolChoiceToOpenAI(anthropicToolChoice, toolNameMappi
2527
3762
  default: return;
2528
3763
  }
2529
3764
  }
2530
- function translateToAnthropic(response, toolNameMapping) {
2531
- if (response.choices.length === 0) return {
3765
+ /** Create empty response for edge case of no choices */
3766
+ function createEmptyResponse(response) {
3767
+ return {
2532
3768
  id: response.id,
2533
3769
  type: "message",
2534
3770
  role: "assistant",
@@ -2541,6 +3777,18 @@ function translateToAnthropic(response, toolNameMapping) {
2541
3777
  output_tokens: response.usage?.completion_tokens ?? 0
2542
3778
  }
2543
3779
  };
3780
+ }
3781
+ /** Build usage object from response */
3782
+ function buildUsageObject(response) {
3783
+ const cachedTokens = response.usage?.prompt_tokens_details?.cached_tokens;
3784
+ return {
3785
+ input_tokens: (response.usage?.prompt_tokens ?? 0) - (cachedTokens ?? 0),
3786
+ output_tokens: response.usage?.completion_tokens ?? 0,
3787
+ ...cachedTokens !== void 0 && { cache_read_input_tokens: cachedTokens }
3788
+ };
3789
+ }
3790
+ function translateToAnthropic(response, toolNameMapping) {
3791
+ if (response.choices.length === 0) return createEmptyResponse(response);
2544
3792
  const allTextBlocks = [];
2545
3793
  const allToolUseBlocks = [];
2546
3794
  let stopReason = null;
@@ -2560,11 +3808,7 @@ function translateToAnthropic(response, toolNameMapping) {
2560
3808
  content: [...allTextBlocks, ...allToolUseBlocks],
2561
3809
  stop_reason: mapOpenAIStopReasonToAnthropic(stopReason),
2562
3810
  stop_sequence: null,
2563
- usage: {
2564
- input_tokens: (response.usage?.prompt_tokens ?? 0) - (response.usage?.prompt_tokens_details?.cached_tokens ?? 0),
2565
- output_tokens: response.usage?.completion_tokens ?? 0,
2566
- ...response.usage?.prompt_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: response.usage.prompt_tokens_details.cached_tokens }
2567
- }
3811
+ usage: buildUsageObject(response)
2568
3812
  };
2569
3813
  }
2570
3814
  function getAnthropicTextBlocks(messageContent) {
@@ -2624,7 +3868,7 @@ async function handleCountTokens(c) {
2624
3868
  let finalTokenCount = tokenCount.input + tokenCount.output;
2625
3869
  if (anthropicPayload.model.startsWith("claude")) finalTokenCount = Math.round(finalTokenCount * 1.15);
2626
3870
  else if (anthropicPayload.model.startsWith("grok")) finalTokenCount = Math.round(finalTokenCount * 1.03);
2627
- consola.info("Token count:", finalTokenCount);
3871
+ consola.debug("Token count:", finalTokenCount);
2628
3872
  return c.json({ input_tokens: finalTokenCount });
2629
3873
  } catch (error) {
2630
3874
  consola.error("Error counting tokens:", error);
@@ -2776,175 +4020,365 @@ async function handleCompletion(c) {
2776
4020
  const startTime = Date.now();
2777
4021
  const anthropicPayload = await c.req.json();
2778
4022
  consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload));
2779
- const historyId = recordRequest("anthropic", {
2780
- model: anthropicPayload.model,
2781
- messages: convertAnthropicMessages(anthropicPayload.messages),
2782
- stream: anthropicPayload.stream ?? false,
2783
- tools: anthropicPayload.tools?.map((t) => ({
2784
- name: t.name,
2785
- description: t.description
2786
- })),
2787
- max_tokens: anthropicPayload.max_tokens,
2788
- temperature: anthropicPayload.temperature,
2789
- system: extractSystemPrompt(anthropicPayload.system)
2790
- });
2791
- const { payload: openAIPayload, toolNameMapping } = translateToOpenAI(anthropicPayload);
2792
- consola.debug("Translated OpenAI request payload:", JSON.stringify(openAIPayload));
4023
+ const trackingId = c.get("trackingId");
4024
+ updateTrackerModel(trackingId, anthropicPayload.model);
4025
+ const ctx = {
4026
+ historyId: recordRequest("anthropic", {
4027
+ model: anthropicPayload.model,
4028
+ messages: convertAnthropicMessages(anthropicPayload.messages),
4029
+ stream: anthropicPayload.stream ?? false,
4030
+ tools: anthropicPayload.tools?.map((t) => ({
4031
+ name: t.name,
4032
+ description: t.description
4033
+ })),
4034
+ max_tokens: anthropicPayload.max_tokens,
4035
+ temperature: anthropicPayload.temperature,
4036
+ system: extractSystemPrompt(anthropicPayload.system)
4037
+ }),
4038
+ trackingId,
4039
+ startTime
4040
+ };
4041
+ const { payload: translatedPayload, toolNameMapping } = translateToOpenAI(anthropicPayload);
4042
+ consola.debug("Translated OpenAI request payload:", JSON.stringify(translatedPayload));
4043
+ const selectedModel = state.models?.data.find((model) => model.id === translatedPayload.model);
4044
+ const { finalPayload: openAIPayload, compactResult } = await buildFinalPayload(translatedPayload, selectedModel);
4045
+ if (compactResult) ctx.compactResult = compactResult;
2793
4046
  if (state.manualApprove) await awaitApproval();
2794
4047
  try {
2795
4048
  const response = await executeWithRateLimit(state, () => createChatCompletions(openAIPayload));
2796
- if (isNonStreaming(response)) {
2797
- consola.debug("Non-streaming response from Copilot:", JSON.stringify(response).slice(-400));
2798
- const anthropicResponse = translateToAnthropic(response, toolNameMapping);
2799
- consola.debug("Translated Anthropic response:", JSON.stringify(anthropicResponse));
2800
- recordResponse(historyId, {
2801
- success: true,
2802
- model: anthropicResponse.model,
2803
- usage: anthropicResponse.usage,
2804
- stop_reason: anthropicResponse.stop_reason ?? void 0,
2805
- content: {
2806
- role: "assistant",
2807
- content: anthropicResponse.content.map((block) => {
2808
- if (block.type === "text") return {
2809
- type: "text",
2810
- text: block.text
2811
- };
2812
- if (block.type === "tool_use") return {
2813
- type: "tool_use",
2814
- id: block.id,
2815
- name: block.name,
2816
- input: JSON.stringify(block.input)
2817
- };
2818
- return { type: block.type };
2819
- })
2820
- },
2821
- toolCalls: extractToolCallsFromContent(anthropicResponse.content)
2822
- }, Date.now() - startTime);
2823
- return c.json(anthropicResponse);
2824
- }
4049
+ if (isNonStreaming(response)) return handleNonStreamingResponse({
4050
+ c,
4051
+ response,
4052
+ toolNameMapping,
4053
+ ctx
4054
+ });
2825
4055
  consola.debug("Streaming response from Copilot");
4056
+ updateTrackerStatus(trackingId, "streaming");
2826
4057
  return streamSSE(c, async (stream) => {
2827
- const streamState = {
2828
- messageStartSent: false,
2829
- contentBlockIndex: 0,
2830
- contentBlockOpen: false,
2831
- toolCalls: {}
2832
- };
2833
- let streamModel = "";
2834
- let streamInputTokens = 0;
2835
- let streamOutputTokens = 0;
2836
- let streamStopReason = "";
2837
- let streamContent = "";
2838
- const streamToolCalls = [];
2839
- let currentToolCall = null;
2840
- try {
2841
- for await (const rawEvent of response) {
2842
- consola.debug("Copilot raw stream event:", JSON.stringify(rawEvent));
2843
- if (rawEvent.data === "[DONE]") break;
2844
- if (!rawEvent.data) continue;
2845
- let chunk;
2846
- try {
2847
- chunk = JSON.parse(rawEvent.data);
2848
- } catch (parseError) {
2849
- consola.error("Failed to parse stream chunk:", parseError, rawEvent.data);
2850
- continue;
2851
- }
2852
- if (chunk.model && !streamModel) streamModel = chunk.model;
2853
- const events$1 = translateChunkToAnthropicEvents(chunk, streamState, toolNameMapping);
2854
- for (const event of events$1) {
2855
- consola.debug("Translated Anthropic event:", JSON.stringify(event));
2856
- switch (event.type) {
2857
- case "content_block_delta":
2858
- if ("text" in event.delta) streamContent += event.delta.text;
2859
- else if ("partial_json" in event.delta && currentToolCall) currentToolCall.input += event.delta.partial_json;
2860
- break;
2861
- case "content_block_start":
2862
- if (event.content_block.type === "tool_use") currentToolCall = {
2863
- id: event.content_block.id,
2864
- name: event.content_block.name,
2865
- input: ""
2866
- };
2867
- break;
2868
- case "content_block_stop":
2869
- if (currentToolCall) {
2870
- streamToolCalls.push(currentToolCall);
2871
- currentToolCall = null;
2872
- }
2873
- break;
2874
- case "message_delta":
2875
- if (event.delta.stop_reason) streamStopReason = event.delta.stop_reason;
2876
- if (event.usage) {
2877
- streamInputTokens = event.usage.input_tokens ?? 0;
2878
- streamOutputTokens = event.usage.output_tokens;
2879
- }
2880
- break;
2881
- }
2882
- await stream.writeSSE({
2883
- event: event.type,
2884
- data: JSON.stringify(event)
2885
- });
2886
- }
2887
- }
2888
- const contentBlocks = [];
2889
- if (streamContent) contentBlocks.push({
4058
+ await handleStreamingResponse({
4059
+ stream,
4060
+ response,
4061
+ toolNameMapping,
4062
+ anthropicPayload,
4063
+ ctx
4064
+ });
4065
+ });
4066
+ } catch (error) {
4067
+ recordErrorResponse(ctx, anthropicPayload.model, error);
4068
+ throw error;
4069
+ }
4070
+ }
4071
+ function updateTrackerModel(trackingId, model) {
4072
+ if (!trackingId) return;
4073
+ const request = requestTracker.getRequest(trackingId);
4074
+ if (request) request.model = model;
4075
+ }
4076
+ async function buildFinalPayload(payload, model) {
4077
+ if (!state.autoCompact || !model) {
4078
+ if (state.autoCompact && !model) consola.warn(`Auto-compact: Model '${payload.model}' not found in cached models, skipping`);
4079
+ return {
4080
+ finalPayload: payload,
4081
+ compactResult: null
4082
+ };
4083
+ }
4084
+ try {
4085
+ const check = await checkNeedsCompaction(payload, model);
4086
+ consola.debug(`Auto-compact check: ${check.currentTokens} tokens, limit ${check.limit}, needed: ${check.needed}`);
4087
+ if (!check.needed) return {
4088
+ finalPayload: payload,
4089
+ compactResult: null
4090
+ };
4091
+ consola.info(`Auto-compact triggered: ${check.currentTokens} tokens > ${check.limit} limit`);
4092
+ const compactResult = await autoCompact(payload, model);
4093
+ return {
4094
+ finalPayload: compactResult.payload,
4095
+ compactResult
4096
+ };
4097
+ } catch (error) {
4098
+ consola.warn("Auto-compact failed, proceeding with original payload:", error);
4099
+ return {
4100
+ finalPayload: payload,
4101
+ compactResult: null
4102
+ };
4103
+ }
4104
+ }
4105
+ function updateTrackerStatus(trackingId, status) {
4106
+ if (!trackingId) return;
4107
+ requestTracker.updateRequest(trackingId, { status });
4108
+ }
4109
+ function recordErrorResponse(ctx, model, error) {
4110
+ recordResponse(ctx.historyId, {
4111
+ success: false,
4112
+ model,
4113
+ usage: {
4114
+ input_tokens: 0,
4115
+ output_tokens: 0
4116
+ },
4117
+ error: error instanceof Error ? error.message : "Unknown error",
4118
+ content: null
4119
+ }, Date.now() - ctx.startTime);
4120
+ }
4121
+ function handleNonStreamingResponse(opts) {
4122
+ const { c, response, toolNameMapping, ctx } = opts;
4123
+ consola.debug("Non-streaming response from Copilot:", JSON.stringify(response).slice(-400));
4124
+ let anthropicResponse = translateToAnthropic(response, toolNameMapping);
4125
+ consola.debug("Translated Anthropic response:", JSON.stringify(anthropicResponse));
4126
+ if (ctx.compactResult?.wasCompacted) {
4127
+ const marker = createCompactionMarker(ctx.compactResult);
4128
+ anthropicResponse = appendMarkerToAnthropicResponse(anthropicResponse, marker);
4129
+ }
4130
+ recordResponse(ctx.historyId, {
4131
+ success: true,
4132
+ model: anthropicResponse.model,
4133
+ usage: anthropicResponse.usage,
4134
+ stop_reason: anthropicResponse.stop_reason ?? void 0,
4135
+ content: {
4136
+ role: "assistant",
4137
+ content: anthropicResponse.content.map((block) => {
4138
+ if (block.type === "text") return {
2890
4139
  type: "text",
2891
- text: streamContent
2892
- });
2893
- for (const tc of streamToolCalls) contentBlocks.push({
4140
+ text: block.text
4141
+ };
4142
+ if (block.type === "tool_use") return {
2894
4143
  type: "tool_use",
2895
- ...tc
2896
- });
2897
- recordResponse(historyId, {
2898
- success: true,
2899
- model: streamModel || anthropicPayload.model,
2900
- usage: {
2901
- input_tokens: streamInputTokens,
2902
- output_tokens: streamOutputTokens
2903
- },
2904
- stop_reason: streamStopReason || void 0,
2905
- content: contentBlocks.length > 0 ? {
2906
- role: "assistant",
2907
- content: contentBlocks
2908
- } : null,
2909
- toolCalls: streamToolCalls.length > 0 ? streamToolCalls.map((tc) => ({
2910
- id: tc.id,
2911
- name: tc.name,
2912
- input: tc.input
2913
- })) : void 0
2914
- }, Date.now() - startTime);
2915
- } catch (error) {
2916
- consola.error("Stream error:", error);
2917
- recordResponse(historyId, {
2918
- success: false,
2919
- model: streamModel || anthropicPayload.model,
2920
- usage: {
2921
- input_tokens: 0,
2922
- output_tokens: 0
2923
- },
2924
- error: error instanceof Error ? error.message : "Stream error",
2925
- content: null
2926
- }, Date.now() - startTime);
2927
- const errorEvent = translateErrorToAnthropicErrorEvent();
2928
- await stream.writeSSE({
2929
- event: errorEvent.type,
2930
- data: JSON.stringify(errorEvent)
2931
- });
2932
- }
4144
+ id: block.id,
4145
+ name: block.name,
4146
+ input: JSON.stringify(block.input)
4147
+ };
4148
+ return { type: block.type };
4149
+ })
4150
+ },
4151
+ toolCalls: extractToolCallsFromContent(anthropicResponse.content)
4152
+ }, Date.now() - ctx.startTime);
4153
+ if (ctx.trackingId) requestTracker.updateRequest(ctx.trackingId, {
4154
+ inputTokens: anthropicResponse.usage.input_tokens,
4155
+ outputTokens: anthropicResponse.usage.output_tokens
4156
+ });
4157
+ return c.json(anthropicResponse);
4158
+ }
4159
+ function appendMarkerToAnthropicResponse(response, marker) {
4160
+ const content = [...response.content];
4161
+ const lastTextIndex = content.findLastIndex((block) => block.type === "text");
4162
+ if (lastTextIndex !== -1) {
4163
+ const textBlock = content[lastTextIndex];
4164
+ if (textBlock.type === "text") content[lastTextIndex] = {
4165
+ ...textBlock,
4166
+ text: textBlock.text + marker
4167
+ };
4168
+ } else content.push({
4169
+ type: "text",
4170
+ text: marker
4171
+ });
4172
+ return {
4173
+ ...response,
4174
+ content
4175
+ };
4176
+ }
4177
+ function createAnthropicStreamAccumulator() {
4178
+ return {
4179
+ model: "",
4180
+ inputTokens: 0,
4181
+ outputTokens: 0,
4182
+ stopReason: "",
4183
+ content: "",
4184
+ toolCalls: [],
4185
+ currentToolCall: null
4186
+ };
4187
+ }
4188
+ async function handleStreamingResponse(opts) {
4189
+ const { stream, response, toolNameMapping, anthropicPayload, ctx } = opts;
4190
+ const streamState = {
4191
+ messageStartSent: false,
4192
+ contentBlockIndex: 0,
4193
+ contentBlockOpen: false,
4194
+ toolCalls: {}
4195
+ };
4196
+ const acc = createAnthropicStreamAccumulator();
4197
+ try {
4198
+ await processStreamChunks({
4199
+ stream,
4200
+ response,
4201
+ toolNameMapping,
4202
+ streamState,
4203
+ acc
2933
4204
  });
4205
+ if (ctx.compactResult?.wasCompacted) {
4206
+ const marker = createCompactionMarker(ctx.compactResult);
4207
+ await sendCompactionMarkerEvent(stream, streamState, marker);
4208
+ acc.content += marker;
4209
+ }
4210
+ recordStreamingResponse(acc, anthropicPayload.model, ctx);
4211
+ completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens);
2934
4212
  } catch (error) {
2935
- recordResponse(historyId, {
2936
- success: false,
2937
- model: anthropicPayload.model,
2938
- usage: {
2939
- input_tokens: 0,
2940
- output_tokens: 0
2941
- },
2942
- error: error instanceof Error ? error.message : "Unknown error",
2943
- content: null
2944
- }, Date.now() - startTime);
2945
- throw error;
4213
+ consola.error("Stream error:", error);
4214
+ recordStreamingError({
4215
+ acc,
4216
+ fallbackModel: anthropicPayload.model,
4217
+ ctx,
4218
+ error
4219
+ });
4220
+ failTracking(ctx.trackingId, error);
4221
+ const errorEvent = translateErrorToAnthropicErrorEvent();
4222
+ await stream.writeSSE({
4223
+ event: errorEvent.type,
4224
+ data: JSON.stringify(errorEvent)
4225
+ });
4226
+ }
4227
+ }
4228
+ async function sendCompactionMarkerEvent(stream, streamState, marker) {
4229
+ const blockStartEvent = {
4230
+ type: "content_block_start",
4231
+ index: streamState.contentBlockIndex,
4232
+ content_block: {
4233
+ type: "text",
4234
+ text: ""
4235
+ }
4236
+ };
4237
+ await stream.writeSSE({
4238
+ event: "content_block_start",
4239
+ data: JSON.stringify(blockStartEvent)
4240
+ });
4241
+ const deltaEvent = {
4242
+ type: "content_block_delta",
4243
+ index: streamState.contentBlockIndex,
4244
+ delta: {
4245
+ type: "text_delta",
4246
+ text: marker
4247
+ }
4248
+ };
4249
+ await stream.writeSSE({
4250
+ event: "content_block_delta",
4251
+ data: JSON.stringify(deltaEvent)
4252
+ });
4253
+ const blockStopEvent = {
4254
+ type: "content_block_stop",
4255
+ index: streamState.contentBlockIndex
4256
+ };
4257
+ await stream.writeSSE({
4258
+ event: "content_block_stop",
4259
+ data: JSON.stringify(blockStopEvent)
4260
+ });
4261
+ streamState.contentBlockIndex++;
4262
+ }
4263
+ async function processStreamChunks(opts) {
4264
+ const { stream, response, toolNameMapping, streamState, acc } = opts;
4265
+ for await (const rawEvent of response) {
4266
+ consola.debug("Copilot raw stream event:", JSON.stringify(rawEvent));
4267
+ if (rawEvent.data === "[DONE]") break;
4268
+ if (!rawEvent.data) continue;
4269
+ let chunk;
4270
+ try {
4271
+ chunk = JSON.parse(rawEvent.data);
4272
+ } catch (parseError) {
4273
+ consola.error("Failed to parse stream chunk:", parseError, rawEvent.data);
4274
+ continue;
4275
+ }
4276
+ if (chunk.model && !acc.model) acc.model = chunk.model;
4277
+ const events$1 = translateChunkToAnthropicEvents(chunk, streamState, toolNameMapping);
4278
+ for (const event of events$1) {
4279
+ consola.debug("Translated Anthropic event:", JSON.stringify(event));
4280
+ processAnthropicEvent(event, acc);
4281
+ await stream.writeSSE({
4282
+ event: event.type,
4283
+ data: JSON.stringify(event)
4284
+ });
4285
+ }
4286
+ }
4287
+ }
4288
+ function processAnthropicEvent(event, acc) {
4289
+ switch (event.type) {
4290
+ case "content_block_delta":
4291
+ handleContentBlockDelta(event.delta, acc);
4292
+ break;
4293
+ case "content_block_start":
4294
+ handleContentBlockStart(event.content_block, acc);
4295
+ break;
4296
+ case "content_block_stop":
4297
+ handleContentBlockStop(acc);
4298
+ break;
4299
+ case "message_delta":
4300
+ handleMessageDelta(event.delta, event.usage, acc);
4301
+ break;
4302
+ default: break;
4303
+ }
4304
+ }
4305
+ function handleContentBlockDelta(delta, acc) {
4306
+ if (delta.type === "text_delta") acc.content += delta.text;
4307
+ else if (delta.type === "input_json_delta" && acc.currentToolCall) acc.currentToolCall.input += delta.partial_json;
4308
+ }
4309
+ function handleContentBlockStart(block, acc) {
4310
+ if (block.type === "tool_use") acc.currentToolCall = {
4311
+ id: block.id,
4312
+ name: block.name,
4313
+ input: ""
4314
+ };
4315
+ }
4316
+ function handleContentBlockStop(acc) {
4317
+ if (acc.currentToolCall) {
4318
+ acc.toolCalls.push(acc.currentToolCall);
4319
+ acc.currentToolCall = null;
2946
4320
  }
2947
4321
  }
4322
+ function handleMessageDelta(delta, usage, acc) {
4323
+ if (delta.stop_reason) acc.stopReason = delta.stop_reason;
4324
+ if (usage) {
4325
+ acc.inputTokens = usage.input_tokens ?? 0;
4326
+ acc.outputTokens = usage.output_tokens;
4327
+ }
4328
+ }
4329
+ function recordStreamingResponse(acc, fallbackModel, ctx) {
4330
+ const contentBlocks = [];
4331
+ if (acc.content) contentBlocks.push({
4332
+ type: "text",
4333
+ text: acc.content
4334
+ });
4335
+ for (const tc of acc.toolCalls) contentBlocks.push({
4336
+ type: "tool_use",
4337
+ ...tc
4338
+ });
4339
+ recordResponse(ctx.historyId, {
4340
+ success: true,
4341
+ model: acc.model || fallbackModel,
4342
+ usage: {
4343
+ input_tokens: acc.inputTokens,
4344
+ output_tokens: acc.outputTokens
4345
+ },
4346
+ stop_reason: acc.stopReason || void 0,
4347
+ content: contentBlocks.length > 0 ? {
4348
+ role: "assistant",
4349
+ content: contentBlocks
4350
+ } : null,
4351
+ toolCalls: acc.toolCalls.length > 0 ? acc.toolCalls : void 0
4352
+ }, Date.now() - ctx.startTime);
4353
+ }
4354
+ function recordStreamingError(opts) {
4355
+ const { acc, fallbackModel, ctx, error } = opts;
4356
+ recordResponse(ctx.historyId, {
4357
+ success: false,
4358
+ model: acc.model || fallbackModel,
4359
+ usage: {
4360
+ input_tokens: 0,
4361
+ output_tokens: 0
4362
+ },
4363
+ error: error instanceof Error ? error.message : "Stream error",
4364
+ content: null
4365
+ }, Date.now() - ctx.startTime);
4366
+ }
4367
+ function completeTracking(trackingId, inputTokens, outputTokens) {
4368
+ if (!trackingId) return;
4369
+ requestTracker.updateRequest(trackingId, {
4370
+ inputTokens,
4371
+ outputTokens
4372
+ });
4373
+ requestTracker.completeRequest(trackingId, 200, {
4374
+ inputTokens,
4375
+ outputTokens
4376
+ });
4377
+ }
4378
+ function failTracking(trackingId, error) {
4379
+ if (!trackingId) return;
4380
+ requestTracker.failRequest(trackingId, error instanceof Error ? error.message : "Stream error");
4381
+ }
2948
4382
  function convertAnthropicMessages(messages) {
2949
4383
  return messages.map((msg) => {
2950
4384
  if (typeof msg.content === "string") return {
@@ -3025,7 +4459,21 @@ modelRoutes.get("/", async (c) => {
3025
4459
  created: 0,
3026
4460
  created_at: (/* @__PURE__ */ new Date(0)).toISOString(),
3027
4461
  owned_by: model.vendor,
3028
- display_name: model.name
4462
+ display_name: model.name,
4463
+ capabilities: {
4464
+ family: model.capabilities.family,
4465
+ type: model.capabilities.type,
4466
+ tokenizer: model.capabilities.tokenizer,
4467
+ limits: {
4468
+ max_context_window_tokens: model.capabilities.limits.max_context_window_tokens,
4469
+ max_output_tokens: model.capabilities.limits.max_output_tokens,
4470
+ max_prompt_tokens: model.capabilities.limits.max_prompt_tokens
4471
+ },
4472
+ supports: {
4473
+ tool_calls: model.capabilities.supports.tool_calls,
4474
+ parallel_tool_calls: model.capabilities.supports.parallel_tool_calls
4475
+ }
4476
+ }
3029
4477
  }));
3030
4478
  return c.json({
3031
4479
  object: "list",
@@ -3063,7 +4511,7 @@ usageRoute.get("/", async (c) => {
3063
4511
  //#endregion
3064
4512
  //#region src/server.ts
3065
4513
  const server = new Hono();
3066
- server.use(logger());
4514
+ server.use(tuiLogger());
3067
4515
  server.use(cors());
3068
4516
  server.get("/", (c) => c.text("Server running"));
3069
4517
  server.get("/health", (c) => {
@@ -3103,8 +4551,17 @@ async function runServer(options) {
3103
4551
  state.rateLimitSeconds = options.rateLimit;
3104
4552
  state.rateLimitWait = options.rateLimitWait;
3105
4553
  state.showToken = options.showToken;
4554
+ state.autoCompact = options.autoCompact;
4555
+ if (options.autoCompact) consola.info("Auto-compact enabled: will compress context when exceeding token limits");
3106
4556
  initHistory(options.history, options.historyLimit);
3107
- if (options.history) consola.info(`History recording enabled (max ${options.historyLimit} entries)`);
4557
+ if (options.history) {
4558
+ const limitText = options.historyLimit === 0 ? "unlimited" : `max ${options.historyLimit}`;
4559
+ consola.info(`History recording enabled (${limitText} entries)`);
4560
+ }
4561
+ initTui({
4562
+ enabled: true,
4563
+ mode: options.tui
4564
+ });
3108
4565
  await ensurePaths();
3109
4566
  await cacheVSCodeVersion();
3110
4567
  if (options.githubToken) {
@@ -3224,7 +4681,17 @@ const start = defineCommand({
3224
4681
  "history-limit": {
3225
4682
  type: "string",
3226
4683
  default: "1000",
3227
- description: "Maximum number of history entries to keep in memory"
4684
+ description: "Maximum number of history entries to keep in memory (0 = unlimited)"
4685
+ },
4686
+ tui: {
4687
+ type: "string",
4688
+ default: "console",
4689
+ description: "TUI mode: 'console' for simple log output, 'fullscreen' for interactive terminal UI with tabs"
4690
+ },
4691
+ "auto-compact": {
4692
+ type: "boolean",
4693
+ default: false,
4694
+ description: "Automatically compress conversation history when exceeding model token limits"
3228
4695
  }
3229
4696
  },
3230
4697
  run({ args }) {
@@ -3243,7 +4710,9 @@ const start = defineCommand({
3243
4710
  showToken: args["show-token"],
3244
4711
  proxyEnv: args["proxy-env"],
3245
4712
  history: args.history,
3246
- historyLimit: Number.parseInt(args["history-limit"], 10)
4713
+ historyLimit: Number.parseInt(args["history-limit"], 10),
4714
+ tui: args.tui,
4715
+ autoCompact: args["auto-compact"]
3247
4716
  });
3248
4717
  }
3249
4718
  });