@hsupu/copilot-api 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -12,9 +12,11 @@ import { getProxyForUrl } from "proxy-from-env";
12
12
  import { Agent, ProxyAgent, setGlobalDispatcher } from "undici";
13
13
  import { execSync } from "node:child_process";
14
14
  import process$1 from "node:process";
15
+ import { Box, Text, render, useInput, useStdout } from "ink";
16
+ import React, { useEffect, useState } from "react";
17
+ import { Fragment, jsx, jsxs } from "react/jsx-runtime";
15
18
  import { Hono } from "hono";
16
19
  import { cors } from "hono/cors";
17
- import { logger } from "hono/logger";
18
20
  import { streamSSE } from "hono/streaming";
19
21
  import { events } from "fetch-event-stream";
20
22
 
@@ -45,7 +47,8 @@ const state = {
45
47
  accountType: "individual",
46
48
  manualApprove: false,
47
49
  rateLimitWait: false,
48
- showToken: false
50
+ showToken: false,
51
+ autoCompact: false
49
52
  };
50
53
 
51
54
  //#endregion
@@ -104,6 +107,27 @@ var HTTPError = class HTTPError extends Error {
104
107
  return new HTTPError(message, response.status, text);
105
108
  }
106
109
  };
110
+ /** Parse token limit info from error message */
111
+ function parseTokenLimitError(message) {
112
+ const match = message.match(/prompt token count of (\d+) exceeds the limit of (\d+)/);
113
+ if (match) return {
114
+ current: Number.parseInt(match[1], 10),
115
+ limit: Number.parseInt(match[2], 10)
116
+ };
117
+ return null;
118
+ }
119
+ /** Format Anthropic-compatible error for token limit exceeded */
120
+ function formatTokenLimitError(current, limit) {
121
+ const excess = current - limit;
122
+ const percentage = Math.round(excess / limit * 100);
123
+ return {
124
+ type: "error",
125
+ error: {
126
+ type: "invalid_request_error",
127
+ message: `prompt is too long: ${current} tokens > ${limit} maximum (${excess} tokens over, ${percentage}% excess)`
128
+ }
129
+ };
130
+ }
107
131
  async function forwardError(c, error) {
108
132
  consola.error("Error occurred:", error);
109
133
  if (error instanceof HTTPError) {
@@ -114,6 +138,15 @@ async function forwardError(c, error) {
114
138
  errorJson = error.responseText;
115
139
  }
116
140
  consola.error("HTTP error:", errorJson);
141
+ const copilotError = errorJson;
142
+ if (copilotError.error?.code === "model_max_prompt_tokens_exceeded") {
143
+ const tokenInfo = parseTokenLimitError(copilotError.error.message ?? "");
144
+ if (tokenInfo) {
145
+ const formattedError = formatTokenLimitError(tokenInfo.current, tokenInfo.limit);
146
+ consola.info("Returning formatted token limit error:", formattedError);
147
+ return c.json(formattedError, 400);
148
+ }
149
+ }
117
150
  return c.json({ error: {
118
151
  message: error.responseText,
119
152
  type: "error"
@@ -476,7 +509,7 @@ const logout = defineCommand({
476
509
 
477
510
  //#endregion
478
511
  //#region src/lib/history.ts
479
- function generateId() {
512
+ function generateId$1() {
480
513
  return Date.now().toString(36) + Math.random().toString(36).slice(2, 9);
481
514
  }
482
515
  const historyState = {
@@ -492,7 +525,7 @@ function initHistory(enabled, maxEntries) {
492
525
  historyState.maxEntries = maxEntries;
493
526
  historyState.entries = [];
494
527
  historyState.sessions = /* @__PURE__ */ new Map();
495
- historyState.currentSessionId = enabled ? generateId() : "";
528
+ historyState.currentSessionId = enabled ? generateId$1() : "";
496
529
  }
497
530
  function isHistoryEnabled() {
498
531
  return historyState.enabled;
@@ -506,7 +539,7 @@ function getCurrentSession(endpoint) {
506
539
  return historyState.currentSessionId;
507
540
  }
508
541
  }
509
- const sessionId = generateId();
542
+ const sessionId = generateId$1();
510
543
  historyState.currentSessionId = sessionId;
511
544
  historyState.sessions.set(sessionId, {
512
545
  id: sessionId,
@@ -526,7 +559,7 @@ function recordRequest(endpoint, request) {
526
559
  const session = historyState.sessions.get(sessionId);
527
560
  if (!session) return "";
528
561
  const entry = {
529
- id: generateId(),
562
+ id: generateId$1(),
530
563
  sessionId,
531
564
  timestamp: Date.now(),
532
565
  endpoint,
@@ -543,7 +576,11 @@ function recordRequest(endpoint, request) {
543
576
  historyState.entries.push(entry);
544
577
  session.requestCount++;
545
578
  if (!session.models.includes(request.model)) session.models.push(request.model);
546
- while (historyState.entries.length > historyState.maxEntries) {
579
+ if (request.tools && request.tools.length > 0) {
580
+ if (!session.toolsUsed) session.toolsUsed = [];
581
+ for (const tool of request.tools) if (!session.toolsUsed.includes(tool.name)) session.toolsUsed.push(tool.name);
582
+ }
583
+ while (historyState.maxEntries > 0 && historyState.entries.length > historyState.maxEntries) {
547
584
  const removed = historyState.entries.shift();
548
585
  if (removed) {
549
586
  if (historyState.entries.filter((e) => e.sessionId === removed.sessionId).length === 0) historyState.sessions.delete(removed.sessionId);
@@ -622,13 +659,13 @@ function getSessionEntries(sessionId) {
622
659
  function clearHistory() {
623
660
  historyState.entries = [];
624
661
  historyState.sessions = /* @__PURE__ */ new Map();
625
- historyState.currentSessionId = generateId();
662
+ historyState.currentSessionId = generateId$1();
626
663
  }
627
664
  function deleteSession(sessionId) {
628
665
  if (!historyState.sessions.has(sessionId)) return false;
629
666
  historyState.entries = historyState.entries.filter((e) => e.sessionId !== sessionId);
630
667
  historyState.sessions.delete(sessionId);
631
- if (historyState.currentSessionId === sessionId) historyState.currentSessionId = generateId();
668
+ if (historyState.currentSessionId === sessionId) historyState.currentSessionId = generateId$1();
632
669
  return true;
633
670
  }
634
671
  function getStats() {
@@ -800,16 +837,16 @@ function generateEnvScript(envVars, commandToRun = "") {
800
837
  let commandBlock;
801
838
  switch (shell) {
802
839
  case "powershell":
803
- commandBlock = filteredEnvVars.map(([key, value]) => `$env:${key} = "${value.replace(/"/g, "`\"")}"`).join("; ");
840
+ commandBlock = filteredEnvVars.map(([key, value]) => `$env:${key} = "${value.replaceAll("\"", "`\"")}"`).join("; ");
804
841
  break;
805
842
  case "cmd":
806
843
  commandBlock = filteredEnvVars.map(([key, value]) => `set ${key}=${value}`).join(" & ");
807
844
  break;
808
845
  case "fish":
809
- commandBlock = filteredEnvVars.map(([key, value]) => `set -gx ${key} "${value.replace(/"/g, "\\\"")}"`).join("; ");
846
+ commandBlock = filteredEnvVars.map(([key, value]) => `set -gx ${key} "${value.replaceAll("\"", String.raw`\"`)}"`).join("; ");
810
847
  break;
811
848
  default: {
812
- const assignments = filteredEnvVars.map(([key, value]) => `${key}="${value.replace(/"/g, "\\\"")}"`).join(" ");
849
+ const assignments = filteredEnvVars.map(([key, value]) => `${key}="${value.replaceAll("\"", String.raw`\"`)}"`).join(" ");
813
850
  commandBlock = filteredEnvVars.length > 0 ? `export ${assignments}` : "";
814
851
  break;
815
852
  }
@@ -819,68 +856,580 @@ function generateEnvScript(envVars, commandToRun = "") {
819
856
  }
820
857
 
821
858
  //#endregion
822
- //#region src/lib/approval.ts
823
- const awaitApproval = async () => {
824
- if (!await consola.prompt(`Accept incoming request?`, { type: "confirm" })) throw new HTTPError("Request rejected", 403, JSON.stringify({ message: "Request rejected" }));
859
+ //#region src/lib/tui/console-renderer.ts
860
+ function formatDuration$1(ms) {
861
+ if (ms < 1e3) return `${ms}ms`;
862
+ return `${(ms / 1e3).toFixed(1)}s`;
863
+ }
864
+ function formatNumber$1(n) {
865
+ if (n >= 1e6) return `${(n / 1e6).toFixed(1)}M`;
866
+ if (n >= 1e3) return `${(n / 1e3).toFixed(1)}K`;
867
+ return String(n);
868
+ }
869
+ function formatTokens$1(input, output) {
870
+ if (input === void 0 || output === void 0) return "-";
871
+ return `${formatNumber$1(input)}/${formatNumber$1(output)}`;
872
+ }
873
+ /**
874
+ * Console renderer that shows request lifecycle
875
+ * Start: METHOD /path model-name
876
+ * Complete: METHOD /path 200 1.2s 1.5K/500 model-name
877
+ */
878
+ var ConsoleRenderer = class {
879
+ activeRequests = /* @__PURE__ */ new Map();
880
+ showActive;
881
+ constructor(options) {
882
+ this.showActive = options?.showActive ?? true;
883
+ }
884
+ onRequestStart(request) {
885
+ this.activeRequests.set(request.id, request);
886
+ if (this.showActive) {
887
+ const modelInfo = request.model ? ` ${request.model}` : "";
888
+ const queueInfo = request.queuePosition !== void 0 && request.queuePosition > 0 ? ` [q#${request.queuePosition}]` : "";
889
+ consola.log(`[....] ${request.method} ${request.path}${modelInfo}${queueInfo}`);
890
+ }
891
+ }
892
+ onRequestUpdate(id, update) {
893
+ const request = this.activeRequests.get(id);
894
+ if (!request) return;
895
+ Object.assign(request, update);
896
+ if (this.showActive && update.status === "streaming") {
897
+ const modelInfo = request.model ? ` ${request.model}` : "";
898
+ consola.log(`[<-->] ${request.method} ${request.path}${modelInfo} streaming...`);
899
+ }
900
+ }
901
+ onRequestComplete(request) {
902
+ this.activeRequests.delete(request.id);
903
+ const status = request.statusCode ?? 0;
904
+ const duration = formatDuration$1(request.durationMs ?? 0);
905
+ const tokens = request.model ? formatTokens$1(request.inputTokens, request.outputTokens) : "";
906
+ const modelInfo = request.model ? ` ${request.model}` : "";
907
+ const isError = request.status === "error" || status >= 400;
908
+ const prefix = isError ? "[FAIL]" : "[ OK ]";
909
+ const tokensPart = tokens ? ` ${tokens}` : "";
910
+ const content = `${prefix} ${request.method} ${request.path} ${status} ${duration}${tokensPart}${modelInfo}`;
911
+ if (isError) {
912
+ const errorInfo = request.error ? `: ${request.error}` : "";
913
+ consola.log(content + errorInfo);
914
+ } else consola.log(content);
915
+ }
916
+ destroy() {
917
+ this.activeRequests.clear();
918
+ }
825
919
  };
826
920
 
827
921
  //#endregion
828
- //#region src/lib/queue.ts
829
- var RequestQueue = class {
830
- queue = [];
831
- processing = false;
832
- lastRequestTime = 0;
833
- async enqueue(execute, rateLimitSeconds) {
834
- return new Promise((resolve, reject) => {
835
- this.queue.push({
836
- execute,
837
- resolve,
838
- reject
839
- });
840
- if (this.queue.length > 1) {
841
- const waitTime = Math.ceil((this.queue.length - 1) * rateLimitSeconds);
842
- consola.info(`Request queued. Position: ${this.queue.length}, estimated wait: ${waitTime}s`);
843
- }
844
- this.processQueue(rateLimitSeconds);
922
+ //#region src/lib/tui/fullscreen-renderer.tsx
923
+ const tuiState = {
924
+ activeRequests: /* @__PURE__ */ new Map(),
925
+ completedRequests: [],
926
+ errorRequests: []
927
+ };
928
+ const listeners = [];
929
+ function notifyListeners() {
930
+ for (const listener of listeners) listener();
931
+ }
932
+ function formatDuration(ms) {
933
+ if (ms < 1e3) return `${ms}ms`;
934
+ return `${(ms / 1e3).toFixed(1)}s`;
935
+ }
936
+ function formatNumber(n) {
937
+ if (n >= 1e6) return `${(n / 1e6).toFixed(1)}M`;
938
+ if (n >= 1e3) return `${(n / 1e3).toFixed(1)}K`;
939
+ return String(n);
940
+ }
941
+ function formatTokens(input, output) {
942
+ if (input === void 0 || output === void 0) return "-";
943
+ return `${formatNumber(input)}/${formatNumber(output)}`;
944
+ }
945
+ function getElapsedTime(startTime) {
946
+ return formatDuration(Date.now() - startTime);
947
+ }
948
+ function TabHeader({ currentTab, counts }) {
949
+ const tabs = [
950
+ {
951
+ key: "active",
952
+ label: "Active",
953
+ count: counts.active
954
+ },
955
+ {
956
+ key: "completed",
957
+ label: "Completed",
958
+ count: counts.completed
959
+ },
960
+ {
961
+ key: "errors",
962
+ label: "Errors",
963
+ count: counts.errors
964
+ }
965
+ ];
966
+ return /* @__PURE__ */ jsxs(Box, {
967
+ borderStyle: "single",
968
+ paddingX: 1,
969
+ children: [tabs.map((tab, idx) => /* @__PURE__ */ jsxs(React.Fragment, { children: [idx > 0 && /* @__PURE__ */ jsx(Text, { children: " │ " }), /* @__PURE__ */ jsxs(Text, {
970
+ bold: currentTab === tab.key,
971
+ color: currentTab === tab.key ? "cyan" : void 0,
972
+ inverse: currentTab === tab.key,
973
+ children: [
974
+ " ",
975
+ "[",
976
+ idx + 1,
977
+ "] ",
978
+ tab.label,
979
+ " (",
980
+ tab.count,
981
+ ")",
982
+ " "
983
+ ]
984
+ })] }, tab.key)), /* @__PURE__ */ jsx(Text, {
985
+ dimColor: true,
986
+ children: " │ Press 1/2/3 to switch tabs, q to quit"
987
+ })]
988
+ });
989
+ }
990
+ function getStatusColor(status) {
991
+ if (status === "streaming") return "yellow";
992
+ if (status === "queued") return "gray";
993
+ return "blue";
994
+ }
995
+ function getStatusIcon(status) {
996
+ if (status === "streaming") return "⟳";
997
+ if (status === "queued") return "◷";
998
+ return "●";
999
+ }
1000
+ function ActiveRequestRow({ request }) {
1001
+ const [, setTick] = useState(0);
1002
+ useEffect(() => {
1003
+ const interval = setInterval(() => setTick((t) => t + 1), 1e3);
1004
+ return () => clearInterval(interval);
1005
+ }, []);
1006
+ const statusColor = getStatusColor(request.status);
1007
+ const statusIcon = getStatusIcon(request.status);
1008
+ return /* @__PURE__ */ jsxs(Box, { children: [
1009
+ /* @__PURE__ */ jsxs(Text, {
1010
+ color: statusColor,
1011
+ children: [statusIcon, " "]
1012
+ }),
1013
+ /* @__PURE__ */ jsx(Text, {
1014
+ bold: true,
1015
+ children: request.method
1016
+ }),
1017
+ /* @__PURE__ */ jsxs(Text, { children: [
1018
+ " ",
1019
+ request.path,
1020
+ " "
1021
+ ] }),
1022
+ /* @__PURE__ */ jsxs(Text, {
1023
+ dimColor: true,
1024
+ children: [getElapsedTime(request.startTime), " "]
1025
+ }),
1026
+ request.queuePosition !== void 0 && request.queuePosition > 0 && /* @__PURE__ */ jsxs(Text, {
1027
+ color: "gray",
1028
+ children: [
1029
+ "[queue #",
1030
+ request.queuePosition,
1031
+ "] "
1032
+ ]
1033
+ }),
1034
+ /* @__PURE__ */ jsx(Text, {
1035
+ color: "magenta",
1036
+ children: request.model
1037
+ })
1038
+ ] });
1039
+ }
1040
+ function CompletedRequestRow({ request }) {
1041
+ const isError = request.status === "error" || (request.statusCode ?? 0) >= 400;
1042
+ return /* @__PURE__ */ jsxs(Box, { children: [
1043
+ /* @__PURE__ */ jsxs(Text, {
1044
+ color: isError ? "red" : "green",
1045
+ children: [isError ? "✗" : "✓", " "]
1046
+ }),
1047
+ /* @__PURE__ */ jsx(Text, {
1048
+ bold: true,
1049
+ children: request.method
1050
+ }),
1051
+ /* @__PURE__ */ jsxs(Text, { children: [
1052
+ " ",
1053
+ request.path,
1054
+ " "
1055
+ ] }),
1056
+ /* @__PURE__ */ jsxs(Text, {
1057
+ color: isError ? "red" : "green",
1058
+ children: [request.statusCode ?? "-", " "]
1059
+ }),
1060
+ /* @__PURE__ */ jsxs(Text, {
1061
+ dimColor: true,
1062
+ children: [formatDuration(request.durationMs ?? 0), " "]
1063
+ }),
1064
+ /* @__PURE__ */ jsxs(Text, { children: [formatTokens(request.inputTokens, request.outputTokens), " "] }),
1065
+ /* @__PURE__ */ jsx(Text, {
1066
+ color: "magenta",
1067
+ children: request.model
1068
+ })
1069
+ ] });
1070
+ }
1071
+ function ErrorRequestRow({ request }) {
1072
+ return /* @__PURE__ */ jsxs(Box, {
1073
+ flexDirection: "column",
1074
+ children: [/* @__PURE__ */ jsxs(Box, { children: [
1075
+ /* @__PURE__ */ jsx(Text, {
1076
+ color: "red",
1077
+ children: "✗ "
1078
+ }),
1079
+ /* @__PURE__ */ jsx(Text, {
1080
+ bold: true,
1081
+ children: request.method
1082
+ }),
1083
+ /* @__PURE__ */ jsxs(Text, { children: [
1084
+ " ",
1085
+ request.path,
1086
+ " "
1087
+ ] }),
1088
+ /* @__PURE__ */ jsxs(Text, {
1089
+ color: "red",
1090
+ children: [request.statusCode ?? "-", " "]
1091
+ }),
1092
+ /* @__PURE__ */ jsxs(Text, {
1093
+ dimColor: true,
1094
+ children: [formatDuration(request.durationMs ?? 0), " "]
1095
+ }),
1096
+ /* @__PURE__ */ jsx(Text, {
1097
+ color: "magenta",
1098
+ children: request.model
1099
+ })
1100
+ ] }), request.error && /* @__PURE__ */ jsx(Box, {
1101
+ marginLeft: 2,
1102
+ children: /* @__PURE__ */ jsxs(Text, {
1103
+ color: "red",
1104
+ dimColor: true,
1105
+ children: ["└─ ", request.error]
1106
+ })
1107
+ })]
1108
+ });
1109
+ }
1110
+ function ContentPanel({ currentTab, activeList, completedList, errorList, contentHeight }) {
1111
+ if (currentTab === "active") {
1112
+ if (activeList.length === 0) return /* @__PURE__ */ jsx(Text, {
1113
+ dimColor: true,
1114
+ children: "No active requests"
845
1115
  });
1116
+ return /* @__PURE__ */ jsx(Fragment, { children: activeList.slice(0, contentHeight).map((req) => /* @__PURE__ */ jsx(ActiveRequestRow, { request: req }, req.id)) });
846
1117
  }
847
- async processQueue(rateLimitSeconds) {
848
- if (this.processing) return;
849
- this.processing = true;
850
- while (this.queue.length > 0) {
851
- const elapsedMs = Date.now() - this.lastRequestTime;
852
- const requiredMs = rateLimitSeconds * 1e3;
853
- if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
854
- const waitMs = requiredMs - elapsedMs;
855
- consola.debug(`Rate limit: waiting ${Math.ceil(waitMs / 1e3)}s`);
856
- await new Promise((resolve) => setTimeout(resolve, waitMs));
857
- }
858
- const request = this.queue.shift();
859
- if (!request) break;
860
- this.lastRequestTime = Date.now();
861
- try {
862
- const result = await request.execute();
863
- request.resolve(result);
864
- } catch (error) {
865
- request.reject(error);
866
- }
1118
+ if (currentTab === "completed") {
1119
+ if (completedList.length === 0) return /* @__PURE__ */ jsx(Text, {
1120
+ dimColor: true,
1121
+ children: "No completed requests"
1122
+ });
1123
+ return /* @__PURE__ */ jsx(Fragment, { children: completedList.slice(-contentHeight).reverse().map((req) => /* @__PURE__ */ jsx(CompletedRequestRow, { request: req }, req.id)) });
1124
+ }
1125
+ if (errorList.length === 0) return /* @__PURE__ */ jsx(Text, {
1126
+ dimColor: true,
1127
+ children: "No errors"
1128
+ });
1129
+ return /* @__PURE__ */ jsx(Fragment, { children: errorList.slice(-contentHeight).reverse().map((req) => /* @__PURE__ */ jsx(ErrorRequestRow, { request: req }, req.id)) });
1130
+ }
1131
+ function TuiApp() {
1132
+ const [currentTab, setCurrentTab] = useState("active");
1133
+ const [, forceUpdate] = useState(0);
1134
+ const { stdout } = useStdout();
1135
+ useEffect(() => {
1136
+ const listener = () => forceUpdate((n) => n + 1);
1137
+ listeners.push(listener);
1138
+ return () => {
1139
+ const idx = listeners.indexOf(listener);
1140
+ if (idx !== -1) listeners.splice(idx, 1);
1141
+ };
1142
+ }, []);
1143
+ useInput((input, key) => {
1144
+ switch (input) {
1145
+ case "1":
1146
+ setCurrentTab("active");
1147
+ break;
1148
+ case "2":
1149
+ setCurrentTab("completed");
1150
+ break;
1151
+ case "3":
1152
+ setCurrentTab("errors");
1153
+ break;
1154
+ default: if (input === "q" || key.ctrl && input === "c") process.exit(0);
867
1155
  }
868
- this.processing = false;
1156
+ });
1157
+ const activeList = Array.from(tuiState.activeRequests.values());
1158
+ const completedList = tuiState.completedRequests;
1159
+ const errorList = tuiState.errorRequests;
1160
+ const counts = {
1161
+ active: activeList.length,
1162
+ completed: completedList.length,
1163
+ errors: errorList.length
1164
+ };
1165
+ const terminalHeight = stdout.rows || 24;
1166
+ const contentHeight = terminalHeight - 3 - 1 - 2;
1167
+ return /* @__PURE__ */ jsxs(Box, {
1168
+ flexDirection: "column",
1169
+ height: terminalHeight,
1170
+ children: [
1171
+ /* @__PURE__ */ jsx(TabHeader, {
1172
+ currentTab,
1173
+ counts
1174
+ }),
1175
+ /* @__PURE__ */ jsx(Box, {
1176
+ flexDirection: "column",
1177
+ height: contentHeight,
1178
+ borderStyle: "single",
1179
+ paddingX: 1,
1180
+ overflow: "hidden",
1181
+ children: /* @__PURE__ */ jsx(ContentPanel, {
1182
+ currentTab,
1183
+ activeList,
1184
+ completedList,
1185
+ errorList,
1186
+ contentHeight
1187
+ })
1188
+ }),
1189
+ /* @__PURE__ */ jsx(Box, {
1190
+ paddingX: 1,
1191
+ children: /* @__PURE__ */ jsxs(Text, {
1192
+ dimColor: true,
1193
+ children: [
1194
+ "copilot-api │ Active: ",
1195
+ counts.active,
1196
+ " │ Completed: ",
1197
+ counts.completed,
1198
+ " ",
1199
+ "│ Errors: ",
1200
+ counts.errors
1201
+ ]
1202
+ })
1203
+ })
1204
+ ]
1205
+ });
1206
+ }
1207
+ /**
1208
+ * Fullscreen TUI renderer using Ink
1209
+ * Provides interactive terminal interface with tabs
1210
+ */
1211
+ var FullscreenRenderer = class {
1212
+ inkInstance = null;
1213
+ maxHistory = 100;
1214
+ constructor(options) {
1215
+ if (options?.maxHistory !== void 0) this.maxHistory = options.maxHistory;
869
1216
  }
870
- get length() {
871
- return this.queue.length;
1217
+ start() {
1218
+ if (this.inkInstance) return;
1219
+ this.inkInstance = render(/* @__PURE__ */ jsx(TuiApp, {}), {});
1220
+ }
1221
+ onRequestStart(request) {
1222
+ tuiState.activeRequests.set(request.id, { ...request });
1223
+ notifyListeners();
1224
+ }
1225
+ onRequestUpdate(id, update) {
1226
+ const request = tuiState.activeRequests.get(id);
1227
+ if (!request) return;
1228
+ Object.assign(request, update);
1229
+ notifyListeners();
1230
+ }
1231
+ onRequestComplete(request) {
1232
+ tuiState.activeRequests.delete(request.id);
1233
+ if (request.status === "error" || (request.statusCode ?? 0) >= 400) {
1234
+ tuiState.errorRequests.push({ ...request });
1235
+ while (tuiState.errorRequests.length > this.maxHistory) tuiState.errorRequests.shift();
1236
+ }
1237
+ tuiState.completedRequests.push({ ...request });
1238
+ while (tuiState.completedRequests.length > this.maxHistory) tuiState.completedRequests.shift();
1239
+ notifyListeners();
1240
+ }
1241
+ destroy() {
1242
+ if (this.inkInstance) {
1243
+ this.inkInstance.unmount();
1244
+ this.inkInstance = null;
1245
+ }
1246
+ tuiState.activeRequests.clear();
1247
+ tuiState.completedRequests = [];
1248
+ tuiState.errorRequests = [];
872
1249
  }
873
1250
  };
874
- const requestQueue = new RequestQueue();
1251
+
1252
+ //#endregion
1253
+ //#region src/lib/tui/tracker.ts
1254
+ function generateId() {
1255
+ return Date.now().toString(36) + Math.random().toString(36).slice(2, 6);
1256
+ }
1257
+ var RequestTracker = class {
1258
+ requests = /* @__PURE__ */ new Map();
1259
+ renderer = null;
1260
+ completedQueue = [];
1261
+ historySize = 5;
1262
+ completedDisplayMs = 2e3;
1263
+ setRenderer(renderer) {
1264
+ this.renderer = renderer;
1265
+ }
1266
+ setOptions(options) {
1267
+ if (options.historySize !== void 0) this.historySize = options.historySize;
1268
+ if (options.completedDisplayMs !== void 0) this.completedDisplayMs = options.completedDisplayMs;
1269
+ }
1270
+ /**
1271
+ * Start tracking a new request
1272
+ * Returns the tracking ID
1273
+ */
1274
+ startRequest(method, path$1, model) {
1275
+ const id = generateId();
1276
+ const request = {
1277
+ id,
1278
+ method,
1279
+ path: path$1,
1280
+ model,
1281
+ startTime: Date.now(),
1282
+ status: "executing"
1283
+ };
1284
+ this.requests.set(id, request);
1285
+ this.renderer?.onRequestStart(request);
1286
+ return id;
1287
+ }
1288
+ /**
1289
+ * Update request status
1290
+ */
1291
+ updateRequest(id, update) {
1292
+ const request = this.requests.get(id);
1293
+ if (!request) return;
1294
+ if (update.status !== void 0) request.status = update.status;
1295
+ if (update.statusCode !== void 0) request.statusCode = update.statusCode;
1296
+ if (update.durationMs !== void 0) request.durationMs = update.durationMs;
1297
+ if (update.inputTokens !== void 0) request.inputTokens = update.inputTokens;
1298
+ if (update.outputTokens !== void 0) request.outputTokens = update.outputTokens;
1299
+ if (update.error !== void 0) request.error = update.error;
1300
+ if (update.queuePosition !== void 0) request.queuePosition = update.queuePosition;
1301
+ this.renderer?.onRequestUpdate(id, update);
1302
+ }
1303
+ /**
1304
+ * Mark request as completed
1305
+ */
1306
+ completeRequest(id, statusCode, usage) {
1307
+ const request = this.requests.get(id);
1308
+ if (!request) return;
1309
+ request.status = statusCode >= 200 && statusCode < 400 ? "completed" : "error";
1310
+ request.statusCode = statusCode;
1311
+ request.durationMs = Date.now() - request.startTime;
1312
+ if (usage) {
1313
+ request.inputTokens = usage.inputTokens;
1314
+ request.outputTokens = usage.outputTokens;
1315
+ }
1316
+ this.renderer?.onRequestComplete(request);
1317
+ this.requests.delete(id);
1318
+ this.completedQueue.push(request);
1319
+ while (this.completedQueue.length > this.historySize) this.completedQueue.shift();
1320
+ setTimeout(() => {
1321
+ const idx = this.completedQueue.indexOf(request);
1322
+ if (idx !== -1) this.completedQueue.splice(idx, 1);
1323
+ }, this.completedDisplayMs);
1324
+ }
1325
+ /**
1326
+ * Mark request as failed with error
1327
+ */
1328
+ failRequest(id, error) {
1329
+ const request = this.requests.get(id);
1330
+ if (!request) return;
1331
+ request.status = "error";
1332
+ request.error = error;
1333
+ request.durationMs = Date.now() - request.startTime;
1334
+ this.renderer?.onRequestComplete(request);
1335
+ this.requests.delete(id);
1336
+ this.completedQueue.push(request);
1337
+ while (this.completedQueue.length > this.historySize) this.completedQueue.shift();
1338
+ }
1339
+ /**
1340
+ * Get all active requests
1341
+ */
1342
+ getActiveRequests() {
1343
+ return Array.from(this.requests.values());
1344
+ }
1345
+ /**
1346
+ * Get recently completed requests
1347
+ */
1348
+ getCompletedRequests() {
1349
+ return [...this.completedQueue];
1350
+ }
1351
+ /**
1352
+ * Get request by ID
1353
+ */
1354
+ getRequest(id) {
1355
+ return this.requests.get(id);
1356
+ }
1357
+ /**
1358
+ * Clear all tracked requests
1359
+ */
1360
+ clear() {
1361
+ this.requests.clear();
1362
+ this.completedQueue = [];
1363
+ }
1364
+ };
1365
+ const requestTracker = new RequestTracker();
1366
+
1367
+ //#endregion
1368
+ //#region src/lib/tui/middleware.ts
875
1369
  /**
876
- * Execute a request with rate limiting via queue.
877
- * Requests are queued and processed sequentially at the configured rate.
1370
+ * Custom logger middleware that tracks requests through the TUI system
1371
+ * Shows single-line output: METHOD /path 200 1.2s 1.5K/500 model-name
1372
+ *
1373
+ * For streaming responses (SSE), the handler is responsible for calling
1374
+ * completeRequest after the stream finishes.
878
1375
  */
879
- async function executeWithRateLimit(state$1, execute) {
880
- if (state$1.rateLimitSeconds === void 0) return execute();
881
- return requestQueue.enqueue(execute, state$1.rateLimitSeconds);
1376
+ function tuiLogger() {
1377
+ return async (c, next) => {
1378
+ const method = c.req.method;
1379
+ const path$1 = c.req.path;
1380
+ const trackingId = requestTracker.startRequest(method, path$1, "");
1381
+ c.set("trackingId", trackingId);
1382
+ try {
1383
+ await next();
1384
+ if ((c.res.headers.get("content-type") ?? "").includes("text/event-stream")) return;
1385
+ const status = c.res.status;
1386
+ const inputTokens = c.res.headers.get("x-input-tokens");
1387
+ const outputTokens = c.res.headers.get("x-output-tokens");
1388
+ const model = c.res.headers.get("x-model");
1389
+ if (model) {
1390
+ const request = requestTracker.getRequest(trackingId);
1391
+ if (request) request.model = model;
1392
+ }
1393
+ requestTracker.completeRequest(trackingId, status, inputTokens && outputTokens ? {
1394
+ inputTokens: Number.parseInt(inputTokens, 10),
1395
+ outputTokens: Number.parseInt(outputTokens, 10)
1396
+ } : void 0);
1397
+ } catch (error) {
1398
+ requestTracker.failRequest(trackingId, error instanceof Error ? error.message : "Unknown error");
1399
+ throw error;
1400
+ }
1401
+ };
1402
+ }
1403
+
1404
+ //#endregion
1405
+ //#region src/lib/tui/index.ts
1406
+ /**
1407
+ * Initialize the TUI system
1408
+ * @param options.mode - "console" for simple log output (default), "fullscreen" for interactive TUI
1409
+ */
1410
+ function initTui(options) {
1411
+ const enabled = options?.enabled ?? process.stdout.isTTY;
1412
+ const mode = options?.mode ?? "console";
1413
+ if (enabled) if (mode === "fullscreen") {
1414
+ const renderer = new FullscreenRenderer({ maxHistory: options?.historySize ?? 100 });
1415
+ requestTracker.setRenderer(renderer);
1416
+ renderer.start();
1417
+ } else {
1418
+ const renderer = new ConsoleRenderer();
1419
+ requestTracker.setRenderer(renderer);
1420
+ }
1421
+ if (options?.historySize !== void 0 || options?.completedDisplayMs !== void 0) requestTracker.setOptions({
1422
+ historySize: options.historySize,
1423
+ completedDisplayMs: options.completedDisplayMs
1424
+ });
882
1425
  }
883
1426
 
1427
+ //#endregion
1428
+ //#region src/lib/approval.ts
1429
+ const awaitApproval = async () => {
1430
+ if (!await consola.prompt(`Accept incoming request?`, { type: "confirm" })) throw new HTTPError("Request rejected", 403, JSON.stringify({ message: "Request rejected" }));
1431
+ };
1432
+
884
1433
  //#endregion
885
1434
  //#region src/lib/tokenizer.ts
886
1435
  const ENCODING_MAP = {
@@ -1085,6 +1634,229 @@ const getTokenCount = async (payload, model) => {
1085
1634
  };
1086
1635
  };
1087
1636
 
1637
+ //#endregion
1638
+ //#region src/lib/auto-compact.ts
1639
+ const DEFAULT_CONFIG = {
1640
+ targetTokens: 1e5,
1641
+ safetyMarginPercent: 10
1642
+ };
1643
+ /**
1644
+ * Check if payload needs compaction based on model limits.
1645
+ * Uses a safety margin to account for token counting differences.
1646
+ */
1647
+ async function checkNeedsCompaction(payload, model, safetyMarginPercent = 10) {
1648
+ const currentTokens = (await getTokenCount(payload, model)).input;
1649
+ const rawLimit = model.capabilities.limits.max_prompt_tokens ?? 128e3;
1650
+ const limit = Math.floor(rawLimit * (1 - safetyMarginPercent / 100));
1651
+ return {
1652
+ needed: currentTokens > limit,
1653
+ currentTokens,
1654
+ limit
1655
+ };
1656
+ }
1657
+ /**
1658
+ * Calculate approximate token count for a single message.
1659
+ * This is a fast estimation for splitting decisions.
1660
+ */
1661
+ function estimateMessageTokens(message) {
1662
+ let text = "";
1663
+ if (typeof message.content === "string") text = message.content;
1664
+ else if (Array.isArray(message.content)) {
1665
+ for (const part of message.content) if (part.type === "text") text += part.text;
1666
+ else if ("image_url" in part) text += part.image_url.url;
1667
+ }
1668
+ if (message.tool_calls) text += JSON.stringify(message.tool_calls);
1669
+ return Math.ceil(text.length / 4) + 10;
1670
+ }
1671
+ /**
1672
+ * Extract system messages from the beginning of the message list.
1673
+ */
1674
+ function extractSystemMessages(messages) {
1675
+ const systemMessages = [];
1676
+ let i = 0;
1677
+ while (i < messages.length) {
1678
+ const msg = messages[i];
1679
+ if (msg.role === "system" || msg.role === "developer") {
1680
+ systemMessages.push(msg);
1681
+ i++;
1682
+ } else break;
1683
+ }
1684
+ return {
1685
+ systemMessages,
1686
+ remainingMessages: messages.slice(i)
1687
+ };
1688
+ }
1689
+ /**
1690
+ * Find messages to keep from the end to stay under target tokens.
1691
+ * Returns the starting index of messages to preserve.
1692
+ */
1693
+ function findPreserveIndex(messages, targetTokens, systemTokens) {
1694
+ const availableTokens = targetTokens - systemTokens - 500;
1695
+ let accumulatedTokens = 0;
1696
+ for (let i = messages.length - 1; i >= 0; i--) {
1697
+ const msgTokens = estimateMessageTokens(messages[i]);
1698
+ if (accumulatedTokens + msgTokens > availableTokens) return i + 1;
1699
+ accumulatedTokens += msgTokens;
1700
+ }
1701
+ return 0;
1702
+ }
1703
+ /**
1704
+ * Calculate estimated tokens for system messages.
1705
+ */
1706
+ function estimateSystemTokens(systemMessages) {
1707
+ return systemMessages.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
1708
+ }
1709
+ /**
1710
+ * Create a truncation marker message.
1711
+ */
1712
+ function createTruncationMarker(removedCount) {
1713
+ return {
1714
+ role: "user",
1715
+ content: `[CONTEXT TRUNCATED: ${removedCount} earlier messages were removed to fit context limits. The conversation continues below.]`
1716
+ };
1717
+ }
1718
+ /**
1719
+ * Perform auto-compaction on a payload that exceeds token limits.
1720
+ * This uses simple truncation - no LLM calls required.
1721
+ */
1722
+ async function autoCompact(payload, model, config = {}) {
1723
+ const cfg = {
1724
+ ...DEFAULT_CONFIG,
1725
+ ...config
1726
+ };
1727
+ const originalTokens = (await getTokenCount(payload, model)).input;
1728
+ const rawLimit = model.capabilities.limits.max_prompt_tokens ?? 128e3;
1729
+ const limit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
1730
+ if (originalTokens <= limit) return {
1731
+ payload,
1732
+ wasCompacted: false,
1733
+ originalTokens,
1734
+ compactedTokens: originalTokens,
1735
+ removedMessageCount: 0
1736
+ };
1737
+ consola.info(`Auto-compact: ${originalTokens} tokens exceeds limit of ${limit}, truncating...`);
1738
+ const { systemMessages, remainingMessages } = extractSystemMessages(payload.messages);
1739
+ const systemTokens = estimateSystemTokens(systemMessages);
1740
+ consola.debug(`Auto-compact: ${systemMessages.length} system messages (~${systemTokens} tokens)`);
1741
+ const effectiveTarget = Math.min(cfg.targetTokens, limit);
1742
+ const preserveIndex = findPreserveIndex(remainingMessages, effectiveTarget, systemTokens);
1743
+ if (preserveIndex === 0) {
1744
+ consola.warn("Auto-compact: Cannot truncate further without losing all conversation history");
1745
+ return {
1746
+ payload,
1747
+ wasCompacted: false,
1748
+ originalTokens,
1749
+ compactedTokens: originalTokens,
1750
+ removedMessageCount: 0
1751
+ };
1752
+ }
1753
+ const removedMessages = remainingMessages.slice(0, preserveIndex);
1754
+ const preservedMessages = remainingMessages.slice(preserveIndex);
1755
+ consola.info(`Auto-compact: Removing ${removedMessages.length} messages, keeping ${preservedMessages.length}`);
1756
+ const truncationMarker = createTruncationMarker(removedMessages.length);
1757
+ const newPayload = {
1758
+ ...payload,
1759
+ messages: [
1760
+ ...systemMessages,
1761
+ truncationMarker,
1762
+ ...preservedMessages
1763
+ ]
1764
+ };
1765
+ const newTokenCount = await getTokenCount(newPayload, model);
1766
+ consola.info(`Auto-compact: Reduced from ${originalTokens} to ${newTokenCount.input} tokens`);
1767
+ if (newTokenCount.input > limit) {
1768
+ consola.warn(`Auto-compact: Still over limit (${newTokenCount.input} > ${limit}), trying more aggressive truncation`);
1769
+ const aggressiveTarget = Math.floor(effectiveTarget * .7);
1770
+ if (aggressiveTarget < 2e4) {
1771
+ consola.error("Auto-compact: Cannot reduce further, target too low");
1772
+ return {
1773
+ payload: newPayload,
1774
+ wasCompacted: true,
1775
+ originalTokens,
1776
+ compactedTokens: newTokenCount.input,
1777
+ removedMessageCount: removedMessages.length
1778
+ };
1779
+ }
1780
+ return autoCompact(payload, model, {
1781
+ ...cfg,
1782
+ targetTokens: aggressiveTarget
1783
+ });
1784
+ }
1785
+ return {
1786
+ payload: newPayload,
1787
+ wasCompacted: true,
1788
+ originalTokens,
1789
+ compactedTokens: newTokenCount.input,
1790
+ removedMessageCount: removedMessages.length
1791
+ };
1792
+ }
1793
+ /**
1794
+ * Create a marker to append to responses indicating auto-compaction occurred.
1795
+ */
1796
+ function createCompactionMarker(result) {
1797
+ if (!result.wasCompacted) return "";
1798
+ const reduction = result.originalTokens - result.compactedTokens;
1799
+ const percentage = Math.round(reduction / result.originalTokens * 100);
1800
+ return `\n\n---\n[Auto-compacted: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
1801
+ }
1802
+
1803
+ //#endregion
1804
+ //#region src/lib/queue.ts
1805
+ var RequestQueue = class {
1806
+ queue = [];
1807
+ processing = false;
1808
+ lastRequestTime = 0;
1809
+ async enqueue(execute, rateLimitSeconds) {
1810
+ return new Promise((resolve, reject) => {
1811
+ this.queue.push({
1812
+ execute,
1813
+ resolve,
1814
+ reject
1815
+ });
1816
+ if (this.queue.length > 1) {
1817
+ const waitTime = Math.ceil((this.queue.length - 1) * rateLimitSeconds);
1818
+ consola.info(`Request queued. Position: ${this.queue.length}, estimated wait: ${waitTime}s`);
1819
+ }
1820
+ this.processQueue(rateLimitSeconds);
1821
+ });
1822
+ }
1823
+ async processQueue(rateLimitSeconds) {
1824
+ if (this.processing) return;
1825
+ this.processing = true;
1826
+ while (this.queue.length > 0) {
1827
+ const elapsedMs = Date.now() - this.lastRequestTime;
1828
+ const requiredMs = rateLimitSeconds * 1e3;
1829
+ if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
1830
+ const waitMs = requiredMs - elapsedMs;
1831
+ consola.debug(`Rate limit: waiting ${Math.ceil(waitMs / 1e3)}s`);
1832
+ await new Promise((resolve) => setTimeout(resolve, waitMs));
1833
+ }
1834
+ const request = this.queue.shift();
1835
+ if (!request) break;
1836
+ this.lastRequestTime = Date.now();
1837
+ try {
1838
+ const result = await request.execute();
1839
+ request.resolve(result);
1840
+ } catch (error) {
1841
+ request.reject(error);
1842
+ }
1843
+ }
1844
+ this.processing = false;
1845
+ }
1846
+ get length() {
1847
+ return this.queue.length;
1848
+ }
1849
+ };
1850
+ const requestQueue = new RequestQueue();
1851
+ /**
1852
+ * Execute a request with rate limiting via queue.
1853
+ * Requests are queued and processed sequentially at the configured rate.
1854
+ */
1855
+ async function executeWithRateLimit(state$1, execute) {
1856
+ if (state$1.rateLimitSeconds === void 0) return execute();
1857
+ return requestQueue.enqueue(execute, state$1.rateLimitSeconds);
1858
+ }
1859
+
1088
1860
  //#endregion
1089
1861
  //#region src/services/copilot/create-chat-completions.ts
1090
1862
  const createChatCompletions = async (payload) => {
@@ -1112,20 +1884,83 @@ const createChatCompletions = async (payload) => {
1112
1884
  //#region src/routes/chat-completions/handler.ts
1113
1885
  async function handleCompletion$1(c) {
1114
1886
  const startTime = Date.now();
1115
- let payload = await c.req.json();
1116
- consola.debug("Request payload:", JSON.stringify(payload).slice(-400));
1117
- const historyId = recordRequest("openai", {
1118
- model: payload.model,
1119
- messages: convertOpenAIMessages(payload.messages),
1120
- stream: payload.stream ?? false,
1121
- tools: payload.tools?.map((t) => ({
1122
- name: t.function.name,
1123
- description: t.function.description
1124
- })),
1125
- max_tokens: payload.max_tokens ?? void 0,
1126
- temperature: payload.temperature ?? void 0
1127
- });
1128
- const selectedModel = state.models?.data.find((model) => model.id === payload.model);
1887
+ const originalPayload = await c.req.json();
1888
+ consola.debug("Request payload:", JSON.stringify(originalPayload).slice(-400));
1889
+ const trackingId = c.get("trackingId");
1890
+ updateTrackerModel$1(trackingId, originalPayload.model);
1891
+ const ctx = {
1892
+ historyId: recordRequest("openai", {
1893
+ model: originalPayload.model,
1894
+ messages: convertOpenAIMessages(originalPayload.messages),
1895
+ stream: originalPayload.stream ?? false,
1896
+ tools: originalPayload.tools?.map((t) => ({
1897
+ name: t.function.name,
1898
+ description: t.function.description
1899
+ })),
1900
+ max_tokens: originalPayload.max_tokens ?? void 0,
1901
+ temperature: originalPayload.temperature ?? void 0
1902
+ }),
1903
+ trackingId,
1904
+ startTime
1905
+ };
1906
+ const selectedModel = state.models?.data.find((model) => model.id === originalPayload.model);
1907
+ await logTokenCount(originalPayload, selectedModel);
1908
+ const { finalPayload, compactResult } = await buildFinalPayload$1(originalPayload, selectedModel);
1909
+ if (compactResult) ctx.compactResult = compactResult;
1910
+ const payload = isNullish(finalPayload.max_tokens) ? {
1911
+ ...finalPayload,
1912
+ max_tokens: selectedModel?.capabilities.limits.max_output_tokens
1913
+ } : finalPayload;
1914
+ if (isNullish(originalPayload.max_tokens)) consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens));
1915
+ if (state.manualApprove) await awaitApproval();
1916
+ try {
1917
+ const response = await executeWithRateLimit(state, () => createChatCompletions(payload));
1918
+ if (isNonStreaming$1(response)) return handleNonStreamingResponse$1(c, response, ctx);
1919
+ consola.debug("Streaming response");
1920
+ updateTrackerStatus$1(trackingId, "streaming");
1921
+ return streamSSE(c, async (stream) => {
1922
+ await handleStreamingResponse$1({
1923
+ stream,
1924
+ response,
1925
+ payload,
1926
+ ctx
1927
+ });
1928
+ });
1929
+ } catch (error) {
1930
+ recordErrorResponse$1(ctx, payload.model, error);
1931
+ throw error;
1932
+ }
1933
+ }
1934
+ async function buildFinalPayload$1(payload, model) {
1935
+ if (!state.autoCompact || !model) {
1936
+ if (state.autoCompact && !model) consola.warn(`Auto-compact: Model '${payload.model}' not found in cached models, skipping`);
1937
+ return {
1938
+ finalPayload: payload,
1939
+ compactResult: null
1940
+ };
1941
+ }
1942
+ try {
1943
+ const check = await checkNeedsCompaction(payload, model);
1944
+ consola.info(`Auto-compact check: ${check.currentTokens} tokens, limit ${check.limit}, needed: ${check.needed}`);
1945
+ if (!check.needed) return {
1946
+ finalPayload: payload,
1947
+ compactResult: null
1948
+ };
1949
+ consola.info(`Auto-compact triggered: ${check.currentTokens} tokens > ${check.limit} limit`);
1950
+ const compactResult = await autoCompact(payload, model);
1951
+ return {
1952
+ finalPayload: compactResult.payload,
1953
+ compactResult
1954
+ };
1955
+ } catch (error) {
1956
+ consola.warn("Auto-compact failed, proceeding with original payload:", error);
1957
+ return {
1958
+ finalPayload: payload,
1959
+ compactResult: null
1960
+ };
1961
+ }
1962
+ }
1963
+ async function logTokenCount(payload, selectedModel) {
1129
1964
  try {
1130
1965
  if (selectedModel) {
1131
1966
  const tokenCount = await getTokenCount(payload, selectedModel);
@@ -1134,146 +1969,236 @@ async function handleCompletion$1(c) {
1134
1969
  } catch (error) {
1135
1970
  consola.warn("Failed to calculate token count:", error);
1136
1971
  }
1137
- if (state.manualApprove) await awaitApproval();
1138
- if (isNullish(payload.max_tokens)) {
1139
- payload = {
1140
- ...payload,
1141
- max_tokens: selectedModel?.capabilities.limits.max_output_tokens
1972
+ }
1973
+ function updateTrackerModel$1(trackingId, model) {
1974
+ if (!trackingId) return;
1975
+ const request = requestTracker.getRequest(trackingId);
1976
+ if (request) request.model = model;
1977
+ }
1978
+ function updateTrackerStatus$1(trackingId, status) {
1979
+ if (!trackingId) return;
1980
+ requestTracker.updateRequest(trackingId, { status });
1981
+ }
1982
+ function recordErrorResponse$1(ctx, model, error) {
1983
+ recordResponse(ctx.historyId, {
1984
+ success: false,
1985
+ model,
1986
+ usage: {
1987
+ input_tokens: 0,
1988
+ output_tokens: 0
1989
+ },
1990
+ error: error instanceof Error ? error.message : "Unknown error",
1991
+ content: null
1992
+ }, Date.now() - ctx.startTime);
1993
+ }
1994
+ function handleNonStreamingResponse$1(c, originalResponse, ctx) {
1995
+ consola.debug("Non-streaming response:", JSON.stringify(originalResponse));
1996
+ let response = originalResponse;
1997
+ if (ctx.compactResult?.wasCompacted && response.choices[0]?.message.content) {
1998
+ const marker = createCompactionMarker(ctx.compactResult);
1999
+ response = {
2000
+ ...response,
2001
+ choices: response.choices.map((choice$1, i) => i === 0 ? {
2002
+ ...choice$1,
2003
+ message: {
2004
+ ...choice$1.message,
2005
+ content: (choice$1.message.content ?? "") + marker
2006
+ }
2007
+ } : choice$1)
1142
2008
  };
1143
- consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens));
1144
2009
  }
2010
+ const choice = response.choices[0];
2011
+ const usage = response.usage;
2012
+ recordResponse(ctx.historyId, {
2013
+ success: true,
2014
+ model: response.model,
2015
+ usage: {
2016
+ input_tokens: usage?.prompt_tokens ?? 0,
2017
+ output_tokens: usage?.completion_tokens ?? 0
2018
+ },
2019
+ stop_reason: choice.finish_reason,
2020
+ content: buildResponseContent(choice),
2021
+ toolCalls: extractToolCalls(choice)
2022
+ }, Date.now() - ctx.startTime);
2023
+ if (ctx.trackingId && usage) requestTracker.updateRequest(ctx.trackingId, {
2024
+ inputTokens: usage.prompt_tokens,
2025
+ outputTokens: usage.completion_tokens
2026
+ });
2027
+ return c.json(response);
2028
+ }
2029
+ function buildResponseContent(choice) {
2030
+ return {
2031
+ role: choice.message.role,
2032
+ content: typeof choice.message.content === "string" ? choice.message.content : JSON.stringify(choice.message.content),
2033
+ tool_calls: choice.message.tool_calls?.map((tc) => ({
2034
+ id: tc.id,
2035
+ type: tc.type,
2036
+ function: {
2037
+ name: tc.function.name,
2038
+ arguments: tc.function.arguments
2039
+ }
2040
+ }))
2041
+ };
2042
+ }
2043
+ function extractToolCalls(choice) {
2044
+ return choice.message.tool_calls?.map((tc) => ({
2045
+ id: tc.id,
2046
+ name: tc.function.name,
2047
+ input: tc.function.arguments
2048
+ }));
2049
+ }
2050
+ function createStreamAccumulator() {
2051
+ return {
2052
+ model: "",
2053
+ inputTokens: 0,
2054
+ outputTokens: 0,
2055
+ finishReason: "",
2056
+ content: "",
2057
+ toolCalls: [],
2058
+ toolCallMap: /* @__PURE__ */ new Map()
2059
+ };
2060
+ }
2061
+ async function handleStreamingResponse$1(opts) {
2062
+ const { stream, response, payload, ctx } = opts;
2063
+ const acc = createStreamAccumulator();
1145
2064
  try {
1146
- const response = await executeWithRateLimit(state, () => createChatCompletions(payload));
1147
- if (isNonStreaming$1(response)) {
1148
- consola.debug("Non-streaming response:", JSON.stringify(response));
1149
- const choice = response.choices[0];
1150
- recordResponse(historyId, {
1151
- success: true,
1152
- model: response.model,
1153
- usage: {
1154
- input_tokens: response.usage?.prompt_tokens ?? 0,
1155
- output_tokens: response.usage?.completion_tokens ?? 0
1156
- },
1157
- stop_reason: choice?.finish_reason ?? void 0,
1158
- content: choice?.message ? {
1159
- role: choice.message.role,
1160
- content: typeof choice.message.content === "string" ? choice.message.content : JSON.stringify(choice.message.content),
1161
- tool_calls: choice.message.tool_calls?.map((tc) => ({
1162
- id: tc.id,
1163
- type: tc.type,
1164
- function: {
1165
- name: tc.function.name,
1166
- arguments: tc.function.arguments
1167
- }
1168
- }))
1169
- } : null,
1170
- toolCalls: choice?.message?.tool_calls?.map((tc) => ({
1171
- id: tc.id,
1172
- name: tc.function.name,
1173
- input: tc.function.arguments
1174
- }))
1175
- }, Date.now() - startTime);
1176
- return c.json(response);
2065
+ for await (const chunk of response) {
2066
+ consola.debug("Streaming chunk:", JSON.stringify(chunk));
2067
+ parseStreamChunk(chunk, acc);
2068
+ await stream.writeSSE(chunk);
1177
2069
  }
1178
- consola.debug("Streaming response");
1179
- return streamSSE(c, async (stream) => {
1180
- let streamModel = "";
1181
- let streamInputTokens = 0;
1182
- let streamOutputTokens = 0;
1183
- let streamFinishReason = "";
1184
- let streamContent = "";
1185
- const streamToolCalls = [];
1186
- const toolCallAccumulators = /* @__PURE__ */ new Map();
1187
- try {
1188
- for await (const chunk of response) {
1189
- consola.debug("Streaming chunk:", JSON.stringify(chunk));
1190
- if (chunk.data && chunk.data !== "[DONE]") try {
1191
- const parsed = JSON.parse(chunk.data);
1192
- if (parsed.model && !streamModel) streamModel = parsed.model;
1193
- if (parsed.usage) {
1194
- streamInputTokens = parsed.usage.prompt_tokens;
1195
- streamOutputTokens = parsed.usage.completion_tokens;
1196
- }
1197
- const choice = parsed.choices[0];
1198
- if (choice?.delta?.content) streamContent += choice.delta.content;
1199
- if (choice?.delta?.tool_calls) for (const tc of choice.delta.tool_calls) {
1200
- const idx = tc.index;
1201
- if (!toolCallAccumulators.has(idx)) toolCallAccumulators.set(idx, {
1202
- id: tc.id || "",
1203
- name: tc.function?.name || "",
1204
- arguments: ""
1205
- });
1206
- const acc = toolCallAccumulators.get(idx);
1207
- if (acc) {
1208
- if (tc.id) acc.id = tc.id;
1209
- if (tc.function?.name) acc.name = tc.function.name;
1210
- if (tc.function?.arguments) acc.arguments += tc.function.arguments;
1211
- }
1212
- }
1213
- if (choice?.finish_reason) streamFinishReason = choice.finish_reason;
1214
- } catch {}
1215
- await stream.writeSSE(chunk);
1216
- }
1217
- for (const tc of toolCallAccumulators.values()) if (tc.id && tc.name) streamToolCalls.push({
1218
- id: tc.id,
1219
- name: tc.name,
1220
- arguments: tc.arguments
1221
- });
1222
- const toolCallsForContent = streamToolCalls.map((tc) => ({
1223
- id: tc.id,
1224
- type: "function",
1225
- function: {
1226
- name: tc.name,
1227
- arguments: tc.arguments
1228
- }
1229
- }));
1230
- recordResponse(historyId, {
1231
- success: true,
1232
- model: streamModel || payload.model,
1233
- usage: {
1234
- input_tokens: streamInputTokens,
1235
- output_tokens: streamOutputTokens
1236
- },
1237
- stop_reason: streamFinishReason || void 0,
1238
- content: {
1239
- role: "assistant",
1240
- content: streamContent || void 0,
1241
- tool_calls: toolCallsForContent.length > 0 ? toolCallsForContent : void 0
1242
- },
1243
- toolCalls: streamToolCalls.length > 0 ? streamToolCalls.map((tc) => ({
1244
- id: tc.id,
1245
- name: tc.name,
1246
- input: tc.arguments
1247
- })) : void 0
1248
- }, Date.now() - startTime);
1249
- } catch (error) {
1250
- recordResponse(historyId, {
1251
- success: false,
1252
- model: streamModel || payload.model,
1253
- usage: {
1254
- input_tokens: 0,
1255
- output_tokens: 0
1256
- },
1257
- error: error instanceof Error ? error.message : "Stream error",
1258
- content: null
1259
- }, Date.now() - startTime);
1260
- throw error;
1261
- }
1262
- });
2070
+ if (ctx.compactResult?.wasCompacted) {
2071
+ const marker = createCompactionMarker(ctx.compactResult);
2072
+ const markerChunk = {
2073
+ id: `compact-marker-${Date.now()}`,
2074
+ object: "chat.completion.chunk",
2075
+ created: Math.floor(Date.now() / 1e3),
2076
+ model: acc.model || payload.model,
2077
+ choices: [{
2078
+ index: 0,
2079
+ delta: { content: marker },
2080
+ finish_reason: null,
2081
+ logprobs: null
2082
+ }]
2083
+ };
2084
+ await stream.writeSSE({
2085
+ data: JSON.stringify(markerChunk),
2086
+ event: "message"
2087
+ });
2088
+ acc.content += marker;
2089
+ }
2090
+ recordStreamSuccess(acc, payload.model, ctx);
2091
+ completeTracking$1(ctx.trackingId, acc.inputTokens, acc.outputTokens);
1263
2092
  } catch (error) {
1264
- recordResponse(historyId, {
1265
- success: false,
1266
- model: payload.model,
1267
- usage: {
1268
- input_tokens: 0,
1269
- output_tokens: 0
1270
- },
1271
- error: error instanceof Error ? error.message : "Unknown error",
1272
- content: null
1273
- }, Date.now() - startTime);
2093
+ recordStreamError({
2094
+ acc,
2095
+ fallbackModel: payload.model,
2096
+ ctx,
2097
+ error
2098
+ });
2099
+ failTracking$1(ctx.trackingId, error);
1274
2100
  throw error;
1275
2101
  }
1276
2102
  }
2103
+ function parseStreamChunk(chunk, acc) {
2104
+ if (!chunk.data || chunk.data === "[DONE]") return;
2105
+ try {
2106
+ const parsed = JSON.parse(chunk.data);
2107
+ accumulateModel(parsed, acc);
2108
+ accumulateUsage(parsed, acc);
2109
+ accumulateChoice(parsed.choices[0], acc);
2110
+ } catch {}
2111
+ }
2112
+ function accumulateModel(parsed, acc) {
2113
+ if (parsed.model && !acc.model) acc.model = parsed.model;
2114
+ }
2115
+ function accumulateUsage(parsed, acc) {
2116
+ if (parsed.usage) {
2117
+ acc.inputTokens = parsed.usage.prompt_tokens;
2118
+ acc.outputTokens = parsed.usage.completion_tokens;
2119
+ }
2120
+ }
2121
+ function accumulateChoice(choice, acc) {
2122
+ if (!choice) return;
2123
+ if (choice.delta.content) acc.content += choice.delta.content;
2124
+ if (choice.delta.tool_calls) accumulateToolCalls(choice.delta.tool_calls, acc);
2125
+ if (choice.finish_reason) acc.finishReason = choice.finish_reason;
2126
+ }
2127
+ function accumulateToolCalls(toolCalls, acc) {
2128
+ if (!toolCalls) return;
2129
+ for (const tc of toolCalls) {
2130
+ const idx = tc.index;
2131
+ if (!acc.toolCallMap.has(idx)) acc.toolCallMap.set(idx, {
2132
+ id: tc.id ?? "",
2133
+ name: tc.function?.name ?? "",
2134
+ arguments: ""
2135
+ });
2136
+ const item = acc.toolCallMap.get(idx);
2137
+ if (item) {
2138
+ if (tc.id) item.id = tc.id;
2139
+ if (tc.function?.name) item.name = tc.function.name;
2140
+ if (tc.function?.arguments) item.arguments += tc.function.arguments;
2141
+ }
2142
+ }
2143
+ }
2144
+ function recordStreamSuccess(acc, fallbackModel, ctx) {
2145
+ for (const tc of acc.toolCallMap.values()) if (tc.id && tc.name) acc.toolCalls.push(tc);
2146
+ const toolCalls = acc.toolCalls.map((tc) => ({
2147
+ id: tc.id,
2148
+ type: "function",
2149
+ function: {
2150
+ name: tc.name,
2151
+ arguments: tc.arguments
2152
+ }
2153
+ }));
2154
+ recordResponse(ctx.historyId, {
2155
+ success: true,
2156
+ model: acc.model || fallbackModel,
2157
+ usage: {
2158
+ input_tokens: acc.inputTokens,
2159
+ output_tokens: acc.outputTokens
2160
+ },
2161
+ stop_reason: acc.finishReason || void 0,
2162
+ content: {
2163
+ role: "assistant",
2164
+ content: acc.content,
2165
+ tool_calls: toolCalls.length > 0 ? toolCalls : void 0
2166
+ },
2167
+ toolCalls: acc.toolCalls.length > 0 ? acc.toolCalls.map((tc) => ({
2168
+ id: tc.id,
2169
+ name: tc.name,
2170
+ input: tc.arguments
2171
+ })) : void 0
2172
+ }, Date.now() - ctx.startTime);
2173
+ }
2174
+ function recordStreamError(opts) {
2175
+ const { acc, fallbackModel, ctx, error } = opts;
2176
+ recordResponse(ctx.historyId, {
2177
+ success: false,
2178
+ model: acc.model || fallbackModel,
2179
+ usage: {
2180
+ input_tokens: 0,
2181
+ output_tokens: 0
2182
+ },
2183
+ error: error instanceof Error ? error.message : "Stream error",
2184
+ content: null
2185
+ }, Date.now() - ctx.startTime);
2186
+ }
2187
+ function completeTracking$1(trackingId, inputTokens, outputTokens) {
2188
+ if (!trackingId) return;
2189
+ requestTracker.updateRequest(trackingId, {
2190
+ inputTokens,
2191
+ outputTokens
2192
+ });
2193
+ requestTracker.completeRequest(trackingId, 200, {
2194
+ inputTokens,
2195
+ outputTokens
2196
+ });
2197
+ }
2198
+ function failTracking$1(trackingId, error) {
2199
+ if (!trackingId) return;
2200
+ requestTracker.failRequest(trackingId, error instanceof Error ? error.message : "Stream error");
2201
+ }
1277
2202
  const isNonStreaming$1 = (response) => Object.hasOwn(response, "choices");
1278
2203
  function convertOpenAIMessages(messages) {
1279
2204
  return messages.map((msg) => {
@@ -1461,6 +2386,78 @@ function getContentText(content) {
1461
2386
  return JSON.stringify(content, null, 2);
1462
2387
  }
1463
2388
 
2389
+ // Extract real user text, skipping system tags like <system-reminder>, <ide_opened_file>, etc.
2390
+ function extractRealUserText(content) {
2391
+ if (!content) return '';
2392
+ let text = '';
2393
+ if (typeof content === 'string') {
2394
+ text = content;
2395
+ } else if (Array.isArray(content)) {
2396
+ text = content
2397
+ .filter(c => c.type === 'text' && c.text)
2398
+ .map(c => c.text)
2399
+ .join('\\n');
2400
+ }
2401
+ if (!text) return '';
2402
+
2403
+ // Remove system tags and their content
2404
+ const systemTags = [
2405
+ 'system-reminder',
2406
+ 'ide_opened_file',
2407
+ 'ide_selection',
2408
+ 'ide_visible_files',
2409
+ 'ide_diagnostics',
2410
+ 'ide_cursor_position',
2411
+ 'user-prompt-submit-hook',
2412
+ 'antml:function_calls',
2413
+ 'antml:invoke',
2414
+ 'antml:parameter'
2415
+ ];
2416
+
2417
+ let cleaned = text;
2418
+ for (const tag of systemTags) {
2419
+ // Remove <tag>...</tag> blocks (including multiline)
2420
+ const regex = new RegExp('<' + tag + '[^>]*>[\\\\s\\\\S]*?</' + tag + '>', 'gi');
2421
+ cleaned = cleaned.replace(regex, '');
2422
+ // Remove self-closing <tag ... /> or <tag ...>content without closing
2423
+ const selfClosingRegex = new RegExp('<' + tag + '[^>]*/>', 'gi');
2424
+ cleaned = cleaned.replace(selfClosingRegex, '');
2425
+ }
2426
+
2427
+ // Trim whitespace and return
2428
+ return cleaned.trim();
2429
+ }
2430
+
2431
+ // Get preview text from assistant message content
2432
+ function getAssistantPreview(content) {
2433
+ if (!content) return '';
2434
+ if (typeof content === 'string') {
2435
+ const text = content.trim();
2436
+ if (text.length > 0) {
2437
+ return text.length > 80 ? text.slice(0, 80) + '...' : text;
2438
+ }
2439
+ return '';
2440
+ }
2441
+ if (Array.isArray(content)) {
2442
+ // First try to get text content
2443
+ const textParts = content.filter(c => c.type === 'text' && c.text).map(c => c.text);
2444
+ if (textParts.length > 0) {
2445
+ const text = textParts.join('\\n').trim();
2446
+ if (text.length > 0) {
2447
+ return text.length > 80 ? text.slice(0, 80) + '...' : text;
2448
+ }
2449
+ }
2450
+ // If no text, show tool_use info
2451
+ const toolUses = content.filter(c => c.type === 'tool_use');
2452
+ if (toolUses.length === 1) {
2453
+ return '[tool_use: ' + toolUses[0].name + ']';
2454
+ } else if (toolUses.length > 1) {
2455
+ return '[' + toolUses.length + ' tool_uses]';
2456
+ }
2457
+ }
2458
+ return '';
2459
+ }
2460
+
1464
2461
  function formatContentForDisplay(content) {
1465
2462
  if (!content) return { summary: '', raw: 'null' };
1466
2463
  if (typeof content === 'string') return { summary: content, raw: JSON.stringify(content) };
@@ -1516,6 +2513,7 @@ async function loadSessions() {
1516
2513
  for (const s of data.sessions) {
1517
2514
  const isActive = currentSessionId === s.id;
1518
2515
  const shortId = s.id.slice(0, 8);
2516
+ const toolCount = s.toolsUsed ? s.toolsUsed.length : 0;
1519
2517
  html += \`
1520
2518
  <div class="session-item\${isActive ? ' active' : ''}" onclick="selectSession('\${s.id}')">
1521
2519
  <div class="session-meta">
@@ -1526,6 +2524,7 @@ async function loadSessions() {
1526
2524
  <span style="color:var(--text-dim);font-family:monospace;font-size:10px;">\${shortId}</span>
1527
2525
  <span>\${s.requestCount} req</span>
1528
2526
  <span>\${formatNumber(s.totalInputTokens + s.totalOutputTokens)} tok</span>
2527
+ \${toolCount > 0 ? '<span class="badge tool">' + toolCount + ' tools</span>' : ''}
1529
2528
  <span class="badge \${s.endpoint}">\${s.endpoint}</span>
1530
2529
  </div>
1531
2530
  </div>
@@ -1584,6 +2583,37 @@ async function loadEntries() {
1584
2583
  const tokens = e.response ? formatNumber(e.response.usage.input_tokens) + '/' + formatNumber(e.response.usage.output_tokens) : '-';
1585
2584
  const shortId = e.id.slice(0, 8);
1586
2585
 
2586
+ // Get preview: show meaningful context about the request
2587
+ let lastUserMsg = '';
2588
+ const messages = e.request.messages;
2589
+ const lastMsg = messages[messages.length - 1];
2590
+
2591
+ // If last message is tool_result, look at the previous assistant message for context
2592
+ if (lastMsg && lastMsg.role === 'user') {
2593
+ const content = lastMsg.content;
2594
+ if (Array.isArray(content) && content.length > 0 && content[0].type === 'tool_result') {
2595
+ // This is a tool_result response - look for previous assistant message
2596
+ const prevMsg = messages.length >= 2 ? messages[messages.length - 2] : null;
2597
+ if (prevMsg && prevMsg.role === 'assistant') {
2598
+ lastUserMsg = getAssistantPreview(prevMsg.content);
2599
+ }
2600
+ // If no meaningful preview from assistant, show tool_result count
2601
+ if (!lastUserMsg) {
2602
+ const toolResults = content.filter(c => c.type === 'tool_result');
2603
+ lastUserMsg = '[' + toolResults.length + ' tool_result' + (toolResults.length > 1 ? 's' : '') + ']';
2604
+ }
2605
+ } else {
2606
+ // Regular user message, extract real text
2607
+ const realText = extractRealUserText(lastMsg.content);
2608
+ if (realText.length > 0) {
2609
+ lastUserMsg = realText.slice(0, 80);
2610
+ if (realText.length > 80) lastUserMsg += '...';
2611
+ }
2612
+ }
2613
+ } else if (lastMsg && lastMsg.role === 'assistant') {
2614
+ lastUserMsg = getAssistantPreview(lastMsg.content);
2615
+ }
2616
+
1587
2617
  html += \`
1588
2618
  <div class="entry-item\${isSelected ? ' selected' : ''}" onclick="showDetail('\${e.id}')">
1589
2619
  <div class="entry-header">
@@ -1596,6 +2626,7 @@ async function loadEntries() {
1596
2626
  <span class="entry-tokens">\${tokens}</span>
1597
2627
  <span class="entry-duration">\${formatDuration(e.durationMs)}</span>
1598
2628
  </div>
2629
+ \${lastUserMsg ? '<div class="entry-preview">' + escapeHtml(lastUserMsg) + '</div>' : ''}
1599
2630
  </div>
1600
2631
  \`;
1601
2632
  }
@@ -1655,7 +2686,7 @@ async function showDetail(id) {
1655
2686
  <div class="info-item"><div class="info-label">Duration</div><div class="info-value">\${formatDuration(entry.durationMs)}</div></div>
1656
2687
  <div class="info-item"><div class="info-label">Stop Reason</div><div class="info-value">\${entry.response.stop_reason || '-'}</div></div>
1657
2688
  </div>
1658
- \${entry.response.error ? '<div style="color:var(--error);margin-top:8px;">Error: ' + entry.response.error + '</div>' : ''}
2689
+ \${entry.response.error ? '<div class="error-detail"><div class="error-label">Error Details</div><pre class="error-content">' + escapeHtml(entry.response.error) + '</pre></div>' : ''}
1659
2690
  </div>
1660
2691
  \`;
1661
2692
  }
@@ -2023,6 +3054,14 @@ input::placeholder { color: var(--text-dim); }
2023
3054
  .entry-model { font-weight: 500; flex: 1; }
2024
3055
  .entry-tokens { font-size: 11px; color: var(--text-dim); }
2025
3056
  .entry-duration { font-size: 11px; color: var(--text-dim); min-width: 50px; text-align: right; }
3057
+ .entry-preview {
3058
+ padding: 0 16px 8px 16px;
3059
+ font-size: 11px;
3060
+ color: var(--text-muted);
3061
+ overflow: hidden;
3062
+ text-overflow: ellipsis;
3063
+ white-space: nowrap;
3064
+ }
2026
3065
 
2027
3066
  /* Badges */
2028
3067
  .badge {
@@ -2038,6 +3077,7 @@ input::placeholder { color: var(--text-dim); }
2038
3077
  .badge.anthropic { background: rgba(163, 113, 247, 0.15); color: var(--purple); }
2039
3078
  .badge.openai { background: rgba(210, 153, 34, 0.15); color: var(--warning); }
2040
3079
  .badge.stream { background: rgba(57, 197, 207, 0.15); color: var(--cyan); }
3080
+ .badge.tool { background: rgba(88, 166, 255, 0.15); color: var(--primary); }
2041
3081
 
2042
3082
  /* Detail panel */
2043
3083
  .detail-panel {
@@ -2133,6 +3173,32 @@ input::placeholder { color: var(--text-dim); }
2133
3173
  .info-label { font-size: 11px; color: var(--text-muted); }
2134
3174
  .info-value { font-weight: 500; }
2135
3175
 
3176
+ /* Error detail display */
3177
+ .error-detail {
3178
+ margin-top: 12px;
3179
+ padding: 12px;
3180
+ background: rgba(248, 81, 73, 0.1);
3181
+ border: 1px solid rgba(248, 81, 73, 0.3);
3182
+ border-radius: 6px;
3183
+ }
3184
+ .error-label {
3185
+ font-size: 11px;
3186
+ color: var(--error);
3187
+ font-weight: 600;
3188
+ margin-bottom: 8px;
3189
+ text-transform: uppercase;
3190
+ }
3191
+ .error-content {
3192
+ margin: 0;
3193
+ font-family: 'SF Mono', Monaco, 'Courier New', monospace;
3194
+ font-size: 12px;
3195
+ color: var(--error);
3196
+ white-space: pre-wrap;
3197
+ word-break: break-word;
3198
+ max-height: 300px;
3199
+ overflow-y: auto;
3200
+ }
3201
+
2136
3202
  /* Empty state */
2137
3203
  .empty-state {
2138
3204
  text-align: center;
@@ -2388,12 +3454,12 @@ function translateModelName(model) {
2388
3454
  haiku: "claude-haiku-4.5"
2389
3455
  };
2390
3456
  if (shortNameMap[model]) return shortNameMap[model];
2391
- if (model.match(/^claude-sonnet-4-5-\d+$/)) return "claude-sonnet-4.5";
2392
- if (model.match(/^claude-sonnet-4-\d+$/)) return "claude-sonnet-4";
2393
- if (model.match(/^claude-opus-4-5-\d+$/)) return "claude-opus-4.5";
2394
- if (model.match(/^claude-opus-4-\d+$/)) return "claude-opus-4.5";
2395
- if (model.match(/^claude-haiku-4-5-\d+$/)) return "claude-haiku-4.5";
2396
- if (model.match(/^claude-haiku-3-5-\d+$/)) return "claude-haiku-4.5";
3457
+ if (/^claude-sonnet-4-5-\d+$/.test(model)) return "claude-sonnet-4.5";
3458
+ if (/^claude-sonnet-4-\d+$/.test(model)) return "claude-sonnet-4";
3459
+ if (/^claude-opus-4-5-\d+$/.test(model)) return "claude-opus-4.5";
3460
+ if (/^claude-opus-4-\d+$/.test(model)) return "claude-opus-4.5";
3461
+ if (/^claude-haiku-4-5-\d+$/.test(model)) return "claude-haiku-4.5";
3462
+ if (/^claude-haiku-3-5-\d+$/.test(model)) return "claude-haiku-4.5";
2397
3463
  return model;
2398
3464
  }
2399
3465
  function translateAnthropicMessagesToOpenAI(anthropicMessages, system, toolNameMapping) {
@@ -2490,7 +3556,7 @@ function getTruncatedToolName(originalName, toolNameMapping) {
2490
3556
  if (existingTruncated) return existingTruncated;
2491
3557
  let hash = 0;
2492
3558
  for (let i = 0; i < originalName.length; i++) {
2493
- const char = originalName.charCodeAt(i);
3559
+ const char = originalName.codePointAt(i) ?? 0;
2494
3560
  hash = (hash << 5) - hash + char;
2495
3561
  hash = hash & hash;
2496
3562
  }
@@ -2527,8 +3593,9 @@ function translateAnthropicToolChoiceToOpenAI(anthropicToolChoice, toolNameMappi
2527
3593
  default: return;
2528
3594
  }
2529
3595
  }
2530
- function translateToAnthropic(response, toolNameMapping) {
2531
- if (response.choices.length === 0) return {
3596
+ /** Create empty response for edge case of no choices */
3597
+ function createEmptyResponse(response) {
3598
+ return {
2532
3599
  id: response.id,
2533
3600
  type: "message",
2534
3601
  role: "assistant",
@@ -2541,6 +3608,18 @@ function translateToAnthropic(response, toolNameMapping) {
2541
3608
  output_tokens: response.usage?.completion_tokens ?? 0
2542
3609
  }
2543
3610
  };
3611
+ }
3612
+ /** Build usage object from response */
3613
+ function buildUsageObject(response) {
3614
+ const cachedTokens = response.usage?.prompt_tokens_details?.cached_tokens;
3615
+ return {
3616
+ input_tokens: (response.usage?.prompt_tokens ?? 0) - (cachedTokens ?? 0),
3617
+ output_tokens: response.usage?.completion_tokens ?? 0,
3618
+ ...cachedTokens !== void 0 && { cache_read_input_tokens: cachedTokens }
3619
+ };
3620
+ }
3621
+ function translateToAnthropic(response, toolNameMapping) {
3622
+ if (response.choices.length === 0) return createEmptyResponse(response);
2544
3623
  const allTextBlocks = [];
2545
3624
  const allToolUseBlocks = [];
2546
3625
  let stopReason = null;
@@ -2560,11 +3639,7 @@ function translateToAnthropic(response, toolNameMapping) {
2560
3639
  content: [...allTextBlocks, ...allToolUseBlocks],
2561
3640
  stop_reason: mapOpenAIStopReasonToAnthropic(stopReason),
2562
3641
  stop_sequence: null,
2563
- usage: {
2564
- input_tokens: (response.usage?.prompt_tokens ?? 0) - (response.usage?.prompt_tokens_details?.cached_tokens ?? 0),
2565
- output_tokens: response.usage?.completion_tokens ?? 0,
2566
- ...response.usage?.prompt_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: response.usage.prompt_tokens_details.cached_tokens }
2567
- }
3642
+ usage: buildUsageObject(response)
2568
3643
  };
2569
3644
  }
2570
3645
  function getAnthropicTextBlocks(messageContent) {
@@ -2776,175 +3851,365 @@ async function handleCompletion(c) {
2776
3851
  const startTime = Date.now();
2777
3852
  const anthropicPayload = await c.req.json();
2778
3853
  consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload));
2779
- const historyId = recordRequest("anthropic", {
2780
- model: anthropicPayload.model,
2781
- messages: convertAnthropicMessages(anthropicPayload.messages),
2782
- stream: anthropicPayload.stream ?? false,
2783
- tools: anthropicPayload.tools?.map((t) => ({
2784
- name: t.name,
2785
- description: t.description
2786
- })),
2787
- max_tokens: anthropicPayload.max_tokens,
2788
- temperature: anthropicPayload.temperature,
2789
- system: extractSystemPrompt(anthropicPayload.system)
2790
- });
2791
- const { payload: openAIPayload, toolNameMapping } = translateToOpenAI(anthropicPayload);
2792
- consola.debug("Translated OpenAI request payload:", JSON.stringify(openAIPayload));
3854
+ const trackingId = c.get("trackingId");
3855
+ updateTrackerModel(trackingId, anthropicPayload.model);
3856
+ const ctx = {
3857
+ historyId: recordRequest("anthropic", {
3858
+ model: anthropicPayload.model,
3859
+ messages: convertAnthropicMessages(anthropicPayload.messages),
3860
+ stream: anthropicPayload.stream ?? false,
3861
+ tools: anthropicPayload.tools?.map((t) => ({
3862
+ name: t.name,
3863
+ description: t.description
3864
+ })),
3865
+ max_tokens: anthropicPayload.max_tokens,
3866
+ temperature: anthropicPayload.temperature,
3867
+ system: extractSystemPrompt(anthropicPayload.system)
3868
+ }),
3869
+ trackingId,
3870
+ startTime
3871
+ };
3872
+ const { payload: translatedPayload, toolNameMapping } = translateToOpenAI(anthropicPayload);
3873
+ consola.debug("Translated OpenAI request payload:", JSON.stringify(translatedPayload));
3874
+ const selectedModel = state.models?.data.find((model) => model.id === translatedPayload.model);
3875
+ const { finalPayload: openAIPayload, compactResult } = await buildFinalPayload(translatedPayload, selectedModel);
3876
+ if (compactResult) ctx.compactResult = compactResult;
2793
3877
  if (state.manualApprove) await awaitApproval();
2794
3878
  try {
2795
3879
  const response = await executeWithRateLimit(state, () => createChatCompletions(openAIPayload));
2796
- if (isNonStreaming(response)) {
2797
- consola.debug("Non-streaming response from Copilot:", JSON.stringify(response).slice(-400));
2798
- const anthropicResponse = translateToAnthropic(response, toolNameMapping);
2799
- consola.debug("Translated Anthropic response:", JSON.stringify(anthropicResponse));
2800
- recordResponse(historyId, {
2801
- success: true,
2802
- model: anthropicResponse.model,
2803
- usage: anthropicResponse.usage,
2804
- stop_reason: anthropicResponse.stop_reason ?? void 0,
2805
- content: {
2806
- role: "assistant",
2807
- content: anthropicResponse.content.map((block) => {
2808
- if (block.type === "text") return {
2809
- type: "text",
2810
- text: block.text
2811
- };
2812
- if (block.type === "tool_use") return {
2813
- type: "tool_use",
2814
- id: block.id,
2815
- name: block.name,
2816
- input: JSON.stringify(block.input)
2817
- };
2818
- return { type: block.type };
2819
- })
2820
- },
2821
- toolCalls: extractToolCallsFromContent(anthropicResponse.content)
2822
- }, Date.now() - startTime);
2823
- return c.json(anthropicResponse);
2824
- }
3880
+ if (isNonStreaming(response)) return handleNonStreamingResponse({
3881
+ c,
3882
+ response,
3883
+ toolNameMapping,
3884
+ ctx
3885
+ });
2825
3886
  consola.debug("Streaming response from Copilot");
3887
+ updateTrackerStatus(trackingId, "streaming");
2826
3888
  return streamSSE(c, async (stream) => {
2827
- const streamState = {
2828
- messageStartSent: false,
2829
- contentBlockIndex: 0,
2830
- contentBlockOpen: false,
2831
- toolCalls: {}
2832
- };
2833
- let streamModel = "";
2834
- let streamInputTokens = 0;
2835
- let streamOutputTokens = 0;
2836
- let streamStopReason = "";
2837
- let streamContent = "";
2838
- const streamToolCalls = [];
2839
- let currentToolCall = null;
2840
- try {
2841
- for await (const rawEvent of response) {
2842
- consola.debug("Copilot raw stream event:", JSON.stringify(rawEvent));
2843
- if (rawEvent.data === "[DONE]") break;
2844
- if (!rawEvent.data) continue;
2845
- let chunk;
2846
- try {
2847
- chunk = JSON.parse(rawEvent.data);
2848
- } catch (parseError) {
2849
- consola.error("Failed to parse stream chunk:", parseError, rawEvent.data);
2850
- continue;
2851
- }
2852
- if (chunk.model && !streamModel) streamModel = chunk.model;
2853
- const events$1 = translateChunkToAnthropicEvents(chunk, streamState, toolNameMapping);
2854
- for (const event of events$1) {
2855
- consola.debug("Translated Anthropic event:", JSON.stringify(event));
2856
- switch (event.type) {
2857
- case "content_block_delta":
2858
- if ("text" in event.delta) streamContent += event.delta.text;
2859
- else if ("partial_json" in event.delta && currentToolCall) currentToolCall.input += event.delta.partial_json;
2860
- break;
2861
- case "content_block_start":
2862
- if (event.content_block.type === "tool_use") currentToolCall = {
2863
- id: event.content_block.id,
2864
- name: event.content_block.name,
2865
- input: ""
2866
- };
2867
- break;
2868
- case "content_block_stop":
2869
- if (currentToolCall) {
2870
- streamToolCalls.push(currentToolCall);
2871
- currentToolCall = null;
2872
- }
2873
- break;
2874
- case "message_delta":
2875
- if (event.delta.stop_reason) streamStopReason = event.delta.stop_reason;
2876
- if (event.usage) {
2877
- streamInputTokens = event.usage.input_tokens ?? 0;
2878
- streamOutputTokens = event.usage.output_tokens;
2879
- }
2880
- break;
2881
- }
2882
- await stream.writeSSE({
2883
- event: event.type,
2884
- data: JSON.stringify(event)
2885
- });
2886
- }
2887
- }
2888
- const contentBlocks = [];
2889
- if (streamContent) contentBlocks.push({
3889
+ await handleStreamingResponse({
3890
+ stream,
3891
+ response,
3892
+ toolNameMapping,
3893
+ anthropicPayload,
3894
+ ctx
3895
+ });
3896
+ });
3897
+ } catch (error) {
3898
+ recordErrorResponse(ctx, anthropicPayload.model, error);
3899
+ throw error;
3900
+ }
3901
+ }
3902
+ function updateTrackerModel(trackingId, model) {
3903
+ if (!trackingId) return;
3904
+ const request = requestTracker.getRequest(trackingId);
3905
+ if (request) request.model = model;
3906
+ }
3907
+ async function buildFinalPayload(payload, model) {
3908
+ if (!state.autoCompact || !model) {
3909
+ if (state.autoCompact && !model) consola.warn(`Auto-compact: Model '${payload.model}' not found in cached models, skipping`);
3910
+ return {
3911
+ finalPayload: payload,
3912
+ compactResult: null
3913
+ };
3914
+ }
3915
+ try {
3916
+ const check = await checkNeedsCompaction(payload, model);
3917
+ consola.info(`Auto-compact check: ${check.currentTokens} tokens, limit ${check.limit}, needed: ${check.needed}`);
3918
+ if (!check.needed) return {
3919
+ finalPayload: payload,
3920
+ compactResult: null
3921
+ };
3922
+ consola.info(`Auto-compact triggered: ${check.currentTokens} tokens > ${check.limit} limit`);
3923
+ const compactResult = await autoCompact(payload, model);
3924
+ return {
3925
+ finalPayload: compactResult.payload,
3926
+ compactResult
3927
+ };
3928
+ } catch (error) {
3929
+ consola.warn("Auto-compact failed, proceeding with original payload:", error);
3930
+ return {
3931
+ finalPayload: payload,
3932
+ compactResult: null
3933
+ };
3934
+ }
3935
+ }
3936
+ function updateTrackerStatus(trackingId, status) {
3937
+ if (!trackingId) return;
3938
+ requestTracker.updateRequest(trackingId, { status });
3939
+ }
3940
+ function recordErrorResponse(ctx, model, error) {
3941
+ recordResponse(ctx.historyId, {
3942
+ success: false,
3943
+ model,
3944
+ usage: {
3945
+ input_tokens: 0,
3946
+ output_tokens: 0
3947
+ },
3948
+ error: error instanceof Error ? error.message : "Unknown error",
3949
+ content: null
3950
+ }, Date.now() - ctx.startTime);
3951
+ }
3952
+ function handleNonStreamingResponse(opts) {
3953
+ const { c, response, toolNameMapping, ctx } = opts;
3954
+ consola.debug("Non-streaming response from Copilot:", JSON.stringify(response).slice(-400));
3955
+ let anthropicResponse = translateToAnthropic(response, toolNameMapping);
3956
+ consola.debug("Translated Anthropic response:", JSON.stringify(anthropicResponse));
3957
+ if (ctx.compactResult?.wasCompacted) {
3958
+ const marker = createCompactionMarker(ctx.compactResult);
3959
+ anthropicResponse = appendMarkerToAnthropicResponse(anthropicResponse, marker);
3960
+ }
3961
+ recordResponse(ctx.historyId, {
3962
+ success: true,
3963
+ model: anthropicResponse.model,
3964
+ usage: anthropicResponse.usage,
3965
+ stop_reason: anthropicResponse.stop_reason ?? void 0,
3966
+ content: {
3967
+ role: "assistant",
3968
+ content: anthropicResponse.content.map((block) => {
3969
+ if (block.type === "text") return {
2890
3970
  type: "text",
2891
- text: streamContent
2892
- });
2893
- for (const tc of streamToolCalls) contentBlocks.push({
3971
+ text: block.text
3972
+ };
3973
+ if (block.type === "tool_use") return {
2894
3974
  type: "tool_use",
2895
- ...tc
2896
- });
2897
- recordResponse(historyId, {
2898
- success: true,
2899
- model: streamModel || anthropicPayload.model,
2900
- usage: {
2901
- input_tokens: streamInputTokens,
2902
- output_tokens: streamOutputTokens
2903
- },
2904
- stop_reason: streamStopReason || void 0,
2905
- content: contentBlocks.length > 0 ? {
2906
- role: "assistant",
2907
- content: contentBlocks
2908
- } : null,
2909
- toolCalls: streamToolCalls.length > 0 ? streamToolCalls.map((tc) => ({
2910
- id: tc.id,
2911
- name: tc.name,
2912
- input: tc.input
2913
- })) : void 0
2914
- }, Date.now() - startTime);
2915
- } catch (error) {
2916
- consola.error("Stream error:", error);
2917
- recordResponse(historyId, {
2918
- success: false,
2919
- model: streamModel || anthropicPayload.model,
2920
- usage: {
2921
- input_tokens: 0,
2922
- output_tokens: 0
2923
- },
2924
- error: error instanceof Error ? error.message : "Stream error",
2925
- content: null
2926
- }, Date.now() - startTime);
2927
- const errorEvent = translateErrorToAnthropicErrorEvent();
2928
- await stream.writeSSE({
2929
- event: errorEvent.type,
2930
- data: JSON.stringify(errorEvent)
2931
- });
2932
- }
3975
+ id: block.id,
3976
+ name: block.name,
3977
+ input: JSON.stringify(block.input)
3978
+ };
3979
+ return { type: block.type };
3980
+ })
3981
+ },
3982
+ toolCalls: extractToolCallsFromContent(anthropicResponse.content)
3983
+ }, Date.now() - ctx.startTime);
3984
+ if (ctx.trackingId) requestTracker.updateRequest(ctx.trackingId, {
3985
+ inputTokens: anthropicResponse.usage.input_tokens,
3986
+ outputTokens: anthropicResponse.usage.output_tokens
3987
+ });
3988
+ return c.json(anthropicResponse);
3989
+ }
3990
+ function appendMarkerToAnthropicResponse(response, marker) {
3991
+ const content = [...response.content];
3992
+ const lastTextIndex = content.findLastIndex((block) => block.type === "text");
3993
+ if (lastTextIndex !== -1) {
3994
+ const textBlock = content[lastTextIndex];
3995
+ if (textBlock.type === "text") content[lastTextIndex] = {
3996
+ ...textBlock,
3997
+ text: textBlock.text + marker
3998
+ };
3999
+ } else content.push({
4000
+ type: "text",
4001
+ text: marker
4002
+ });
4003
+ return {
4004
+ ...response,
4005
+ content
4006
+ };
4007
+ }
4008
+ function createAnthropicStreamAccumulator() {
4009
+ return {
4010
+ model: "",
4011
+ inputTokens: 0,
4012
+ outputTokens: 0,
4013
+ stopReason: "",
4014
+ content: "",
4015
+ toolCalls: [],
4016
+ currentToolCall: null
4017
+ };
4018
+ }
4019
+ async function handleStreamingResponse(opts) {
4020
+ const { stream, response, toolNameMapping, anthropicPayload, ctx } = opts;
4021
+ const streamState = {
4022
+ messageStartSent: false,
4023
+ contentBlockIndex: 0,
4024
+ contentBlockOpen: false,
4025
+ toolCalls: {}
4026
+ };
4027
+ const acc = createAnthropicStreamAccumulator();
4028
+ try {
4029
+ await processStreamChunks({
4030
+ stream,
4031
+ response,
4032
+ toolNameMapping,
4033
+ streamState,
4034
+ acc
2933
4035
  });
4036
+ if (ctx.compactResult?.wasCompacted) {
4037
+ const marker = createCompactionMarker(ctx.compactResult);
4038
+ await sendCompactionMarkerEvent(stream, streamState, marker);
4039
+ acc.content += marker;
4040
+ }
4041
+ recordStreamingResponse(acc, anthropicPayload.model, ctx);
4042
+ completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens);
2934
4043
  } catch (error) {
2935
- recordResponse(historyId, {
2936
- success: false,
2937
- model: anthropicPayload.model,
2938
- usage: {
2939
- input_tokens: 0,
2940
- output_tokens: 0
2941
- },
2942
- error: error instanceof Error ? error.message : "Unknown error",
2943
- content: null
2944
- }, Date.now() - startTime);
2945
- throw error;
4044
+ consola.error("Stream error:", error);
4045
+ recordStreamingError({
4046
+ acc,
4047
+ fallbackModel: anthropicPayload.model,
4048
+ ctx,
4049
+ error
4050
+ });
4051
+ failTracking(ctx.trackingId, error);
4052
+ const errorEvent = translateErrorToAnthropicErrorEvent();
4053
+ await stream.writeSSE({
4054
+ event: errorEvent.type,
4055
+ data: JSON.stringify(errorEvent)
4056
+ });
4057
+ }
4058
+ }
4059
+ async function sendCompactionMarkerEvent(stream, streamState, marker) {
4060
+ const blockStartEvent = {
4061
+ type: "content_block_start",
4062
+ index: streamState.contentBlockIndex,
4063
+ content_block: {
4064
+ type: "text",
4065
+ text: ""
4066
+ }
4067
+ };
4068
+ await stream.writeSSE({
4069
+ event: "content_block_start",
4070
+ data: JSON.stringify(blockStartEvent)
4071
+ });
4072
+ const deltaEvent = {
4073
+ type: "content_block_delta",
4074
+ index: streamState.contentBlockIndex,
4075
+ delta: {
4076
+ type: "text_delta",
4077
+ text: marker
4078
+ }
4079
+ };
4080
+ await stream.writeSSE({
4081
+ event: "content_block_delta",
4082
+ data: JSON.stringify(deltaEvent)
4083
+ });
4084
+ const blockStopEvent = {
4085
+ type: "content_block_stop",
4086
+ index: streamState.contentBlockIndex
4087
+ };
4088
+ await stream.writeSSE({
4089
+ event: "content_block_stop",
4090
+ data: JSON.stringify(blockStopEvent)
4091
+ });
4092
+ streamState.contentBlockIndex++;
4093
+ }
4094
+ async function processStreamChunks(opts) {
4095
+ const { stream, response, toolNameMapping, streamState, acc } = opts;
4096
+ for await (const rawEvent of response) {
4097
+ consola.debug("Copilot raw stream event:", JSON.stringify(rawEvent));
4098
+ if (rawEvent.data === "[DONE]") break;
4099
+ if (!rawEvent.data) continue;
4100
+ let chunk;
4101
+ try {
4102
+ chunk = JSON.parse(rawEvent.data);
4103
+ } catch (parseError) {
4104
+ consola.error("Failed to parse stream chunk:", parseError, rawEvent.data);
4105
+ continue;
4106
+ }
4107
+ if (chunk.model && !acc.model) acc.model = chunk.model;
4108
+ const events$1 = translateChunkToAnthropicEvents(chunk, streamState, toolNameMapping);
4109
+ for (const event of events$1) {
4110
+ consola.debug("Translated Anthropic event:", JSON.stringify(event));
4111
+ processAnthropicEvent(event, acc);
4112
+ await stream.writeSSE({
4113
+ event: event.type,
4114
+ data: JSON.stringify(event)
4115
+ });
4116
+ }
2946
4117
  }
2947
4118
  }
4119
+ function processAnthropicEvent(event, acc) {
4120
+ switch (event.type) {
4121
+ case "content_block_delta":
4122
+ handleContentBlockDelta(event.delta, acc);
4123
+ break;
4124
+ case "content_block_start":
4125
+ handleContentBlockStart(event.content_block, acc);
4126
+ break;
4127
+ case "content_block_stop":
4128
+ handleContentBlockStop(acc);
4129
+ break;
4130
+ case "message_delta":
4131
+ handleMessageDelta(event.delta, event.usage, acc);
4132
+ break;
4133
+ default: break;
4134
+ }
4135
+ }
4136
+ function handleContentBlockDelta(delta, acc) {
4137
+ if (delta.type === "text_delta") acc.content += delta.text;
4138
+ else if (delta.type === "input_json_delta" && acc.currentToolCall) acc.currentToolCall.input += delta.partial_json;
4139
+ }
4140
+ function handleContentBlockStart(block, acc) {
4141
+ if (block.type === "tool_use") acc.currentToolCall = {
4142
+ id: block.id,
4143
+ name: block.name,
4144
+ input: ""
4145
+ };
4146
+ }
4147
+ function handleContentBlockStop(acc) {
4148
+ if (acc.currentToolCall) {
4149
+ acc.toolCalls.push(acc.currentToolCall);
4150
+ acc.currentToolCall = null;
4151
+ }
4152
+ }
4153
+ function handleMessageDelta(delta, usage, acc) {
4154
+ if (delta.stop_reason) acc.stopReason = delta.stop_reason;
4155
+ if (usage) {
4156
+ acc.inputTokens = usage.input_tokens ?? 0;
4157
+ acc.outputTokens = usage.output_tokens;
4158
+ }
4159
+ }
4160
+ function recordStreamingResponse(acc, fallbackModel, ctx) {
4161
+ const contentBlocks = [];
4162
+ if (acc.content) contentBlocks.push({
4163
+ type: "text",
4164
+ text: acc.content
4165
+ });
4166
+ for (const tc of acc.toolCalls) contentBlocks.push({
4167
+ type: "tool_use",
4168
+ ...tc
4169
+ });
4170
+ recordResponse(ctx.historyId, {
4171
+ success: true,
4172
+ model: acc.model || fallbackModel,
4173
+ usage: {
4174
+ input_tokens: acc.inputTokens,
4175
+ output_tokens: acc.outputTokens
4176
+ },
4177
+ stop_reason: acc.stopReason || void 0,
4178
+ content: contentBlocks.length > 0 ? {
4179
+ role: "assistant",
4180
+ content: contentBlocks
4181
+ } : null,
4182
+ toolCalls: acc.toolCalls.length > 0 ? acc.toolCalls : void 0
4183
+ }, Date.now() - ctx.startTime);
4184
+ }
4185
+ function recordStreamingError(opts) {
4186
+ const { acc, fallbackModel, ctx, error } = opts;
4187
+ recordResponse(ctx.historyId, {
4188
+ success: false,
4189
+ model: acc.model || fallbackModel,
4190
+ usage: {
4191
+ input_tokens: 0,
4192
+ output_tokens: 0
4193
+ },
4194
+ error: error instanceof Error ? error.message : "Stream error",
4195
+ content: null
4196
+ }, Date.now() - ctx.startTime);
4197
+ }
4198
+ function completeTracking(trackingId, inputTokens, outputTokens) {
4199
+ if (!trackingId) return;
4200
+ requestTracker.updateRequest(trackingId, {
4201
+ inputTokens,
4202
+ outputTokens
4203
+ });
4204
+ requestTracker.completeRequest(trackingId, 200, {
4205
+ inputTokens,
4206
+ outputTokens
4207
+ });
4208
+ }
4209
+ function failTracking(trackingId, error) {
4210
+ if (!trackingId) return;
4211
+ requestTracker.failRequest(trackingId, error instanceof Error ? error.message : "Stream error");
4212
+ }
2948
4213
  function convertAnthropicMessages(messages) {
2949
4214
  return messages.map((msg) => {
2950
4215
  if (typeof msg.content === "string") return {
@@ -3025,7 +4290,21 @@ modelRoutes.get("/", async (c) => {
3025
4290
  created: 0,
3026
4291
  created_at: (/* @__PURE__ */ new Date(0)).toISOString(),
3027
4292
  owned_by: model.vendor,
3028
- display_name: model.name
4293
+ display_name: model.name,
4294
+ capabilities: {
4295
+ family: model.capabilities.family,
4296
+ type: model.capabilities.type,
4297
+ tokenizer: model.capabilities.tokenizer,
4298
+ limits: {
4299
+ max_context_window_tokens: model.capabilities.limits.max_context_window_tokens,
4300
+ max_output_tokens: model.capabilities.limits.max_output_tokens,
4301
+ max_prompt_tokens: model.capabilities.limits.max_prompt_tokens
4302
+ },
4303
+ supports: {
4304
+ tool_calls: model.capabilities.supports.tool_calls,
4305
+ parallel_tool_calls: model.capabilities.supports.parallel_tool_calls
4306
+ }
4307
+ }
3029
4308
  }));
3030
4309
  return c.json({
3031
4310
  object: "list",
@@ -3063,7 +4342,7 @@ usageRoute.get("/", async (c) => {
3063
4342
  //#endregion
3064
4343
  //#region src/server.ts
3065
4344
  const server = new Hono();
3066
- server.use(logger());
4345
+ server.use(tuiLogger());
3067
4346
  server.use(cors());
3068
4347
  server.get("/", (c) => c.text("Server running"));
3069
4348
  server.get("/health", (c) => {
@@ -3103,8 +4382,17 @@ async function runServer(options) {
3103
4382
  state.rateLimitSeconds = options.rateLimit;
3104
4383
  state.rateLimitWait = options.rateLimitWait;
3105
4384
  state.showToken = options.showToken;
4385
+ state.autoCompact = options.autoCompact;
4386
+ if (options.autoCompact) consola.info("Auto-compact enabled: will compress context when exceeding token limits");
3106
4387
  initHistory(options.history, options.historyLimit);
3107
- if (options.history) consola.info(`History recording enabled (max ${options.historyLimit} entries)`);
4388
+ if (options.history) {
4389
+ const limitText = options.historyLimit === 0 ? "unlimited" : `max ${options.historyLimit}`;
4390
+ consola.info(`History recording enabled (${limitText} entries)`);
4391
+ }
4392
+ initTui({
4393
+ enabled: true,
4394
+ mode: options.tui
4395
+ });
3108
4396
  await ensurePaths();
3109
4397
  await cacheVSCodeVersion();
3110
4398
  if (options.githubToken) {
@@ -3224,7 +4512,17 @@ const start = defineCommand({
3224
4512
  "history-limit": {
3225
4513
  type: "string",
3226
4514
  default: "1000",
3227
- description: "Maximum number of history entries to keep in memory"
4515
+ description: "Maximum number of history entries to keep in memory (0 = unlimited)"
4516
+ },
4517
+ tui: {
4518
+ type: "string",
4519
+ default: "console",
4520
+ description: "TUI mode: 'console' for simple log output, 'fullscreen' for interactive terminal UI with tabs"
4521
+ },
4522
+ "auto-compact": {
4523
+ type: "boolean",
4524
+ default: false,
4525
+ description: "Automatically compress conversation history when exceeding model token limits"
3228
4526
  }
3229
4527
  },
3230
4528
  run({ args }) {
@@ -3243,7 +4541,9 @@ const start = defineCommand({
3243
4541
  showToken: args["show-token"],
3244
4542
  proxyEnv: args["proxy-env"],
3245
4543
  history: args.history,
3246
- historyLimit: Number.parseInt(args["history-limit"], 10)
4544
+ historyLimit: Number.parseInt(args["history-limit"], 10),
4545
+ tui: args.tui,
4546
+ autoCompact: args["auto-compact"]
3247
4547
  });
3248
4548
  }
3249
4549
  });