@dianshuv/copilot-api 0.2.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +6 -1
  2. package/dist/main.mjs +1027 -20
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -10,6 +10,11 @@
10
10
 
11
11
  - **Responses API endpoint**: `/v1/responses` passthrough for codex models (e.g., `gpt-5.2-codex`, `gpt-5.3-codex`) used by tools like OpenCode. Includes stream ID synchronization for `@ai-sdk/openai` compatibility.
12
12
  - **SubagentStart marker support**: Detects `__SUBAGENT_MARKER__` injected by Claude Code hooks to override `X-Initiator` header to `"agent"` for subagent requests, ensuring correct credit tier usage. Includes a ready-to-use Claude plugin (`claude-plugin/`).
13
+ - **Token analytics tab**: The `/history` page includes a Tokens tab with per-model token usage summary table and cumulative ECharts line chart for visualizing API consumption over time.
14
+ - **Real-time history updates**: The `/history` UI uses WebSocket for live updates instead of polling, with automatic fallback to polling and exponential backoff reconnection.
15
+ - **Graceful shutdown**: 4-phase shutdown sequence — stops accepting requests, waits for in-flight requests to complete, sends abort signal, then force-closes. Configurable via `--shutdown-graceful-wait` and `--shutdown-abort-wait`.
16
+ - **Stream repetition detection**: Detects when models get stuck in repetitive output loops using KMP-based pattern matching and logs a warning.
17
+ - **Stale request reaping**: Automatically force-fails requests that exceed a configurable maximum age (default 600s) to prevent resource leaks.
13
18
 
14
19
  ## Quick Start
15
20
 
@@ -97,7 +102,7 @@ copilot-api start
97
102
  | `/usage` | GET | Copilot usage stats |
98
103
  | `/token` | GET | Current Copilot token |
99
104
  | `/health` | GET | Health check |
100
- | `/history` | GET | Request history Web UI (enabled by default) |
105
+ | `/history` | GET | Request history Web UI with token analytics (enabled by default) |
101
106
  | `/history/api/*` | GET/DELETE | History API endpoints |
102
107
 
103
108
  ## Using with Claude Code
package/dist/main.mjs CHANGED
@@ -50,7 +50,10 @@ const state = {
50
50
  autoTruncate: true,
51
51
  compressToolResults: false,
52
52
  redirectAnthropic: false,
53
- rewriteAnthropicTools: true
53
+ rewriteAnthropicTools: true,
54
+ staleRequestMaxAge: 600,
55
+ shutdownGracefulWait: 60,
56
+ shutdownAbortWait: 120
54
57
  };
55
58
 
56
59
  //#endregion
@@ -1017,11 +1020,11 @@ const patchClaude = defineCommand({
1017
1020
 
1018
1021
  //#endregion
1019
1022
  //#region package.json
1020
- var version = "0.2.3";
1023
+ var version = "0.4.0";
1021
1024
 
1022
1025
  //#endregion
1023
1026
  //#region src/lib/adaptive-rate-limiter.ts
1024
- const DEFAULT_CONFIG = {
1027
+ const DEFAULT_CONFIG$1 = {
1025
1028
  baseRetryIntervalSeconds: 10,
1026
1029
  maxRetryIntervalSeconds: 120,
1027
1030
  requestIntervalSeconds: 10,
@@ -1050,7 +1053,7 @@ var AdaptiveRateLimiter = class {
1050
1053
  recoveryStepIndex = 0;
1051
1054
  constructor(config = {}) {
1052
1055
  this.config = {
1053
- ...DEFAULT_CONFIG,
1056
+ ...DEFAULT_CONFIG$1,
1054
1057
  ...config
1055
1058
  };
1056
1059
  }
@@ -1272,6 +1275,16 @@ var AdaptiveRateLimiter = class {
1272
1275
  return new Promise((resolve) => setTimeout(resolve, ms));
1273
1276
  }
1274
1277
  /**
1278
+ * Reject all currently queued requests during shutdown.
1279
+ * Returns the number of requests that were rejected.
1280
+ */
1281
+ rejectQueued() {
1282
+ const count = this.queue.length;
1283
+ for (const request of this.queue) request.reject(/* @__PURE__ */ new Error("Server is shutting down"));
1284
+ this.queue = [];
1285
+ return count;
1286
+ }
1287
+ /**
1275
1288
  * Get current status for debugging/monitoring
1276
1289
  */
1277
1290
  getStatus() {
@@ -1289,15 +1302,21 @@ let rateLimiterInstance = null;
1289
1302
  */
1290
1303
  function initAdaptiveRateLimiter(config = {}) {
1291
1304
  rateLimiterInstance = new AdaptiveRateLimiter(config);
1292
- const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG.baseRetryIntervalSeconds;
1293
- const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG.maxRetryIntervalSeconds;
1294
- const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG.requestIntervalSeconds;
1295
- const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG.recoveryTimeoutMinutes;
1296
- const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG.consecutiveSuccessesForRecovery;
1297
- const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG.gradualRecoverySteps;
1305
+ const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG$1.baseRetryIntervalSeconds;
1306
+ const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG$1.maxRetryIntervalSeconds;
1307
+ const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG$1.requestIntervalSeconds;
1308
+ const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG$1.recoveryTimeoutMinutes;
1309
+ const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG$1.consecutiveSuccessesForRecovery;
1310
+ const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG$1.gradualRecoverySteps;
1298
1311
  consola.info(`[RateLimiter] Initialized (backoff: ${baseRetry}s-${maxRetry}s, interval: ${interval}s, recovery: ${recovery}min or ${successes} successes, gradual: [${steps.join("s, ")}s])`);
1299
1312
  }
1300
1313
  /**
1314
+ * Get the rate limiter instance
1315
+ */
1316
+ function getAdaptiveRateLimiter() {
1317
+ return rateLimiterInstance;
1318
+ }
1319
+ /**
1301
1320
  * Execute a request with adaptive rate limiting.
1302
1321
  * If rate limiter is not initialized, executes immediately.
1303
1322
  * Returns the result along with queue wait time.
@@ -1310,6 +1329,292 @@ async function executeWithAdaptiveRateLimit(fn) {
1310
1329
  return rateLimiterInstance.execute(fn);
1311
1330
  }
1312
1331
 
1332
+ //#endregion
1333
+ //#region src/lib/context/request.ts
1334
+ let idCounter = 0;
1335
+ function createRequestContext(opts) {
1336
+ const id = `req_${Date.now()}_${++idCounter}`;
1337
+ const startTime = Date.now();
1338
+ const onEvent = opts.onEvent;
1339
+ let _state = "pending";
1340
+ let _originalRequest = null;
1341
+ let _response = null;
1342
+ let settled = false;
1343
+ function emit(event) {
1344
+ try {
1345
+ onEvent(event);
1346
+ } catch {}
1347
+ }
1348
+ const ctx = {
1349
+ id,
1350
+ tuiLogId: opts.tuiLogId,
1351
+ startTime,
1352
+ endpoint: opts.endpoint,
1353
+ get state() {
1354
+ return _state;
1355
+ },
1356
+ get durationMs() {
1357
+ return Date.now() - startTime;
1358
+ },
1359
+ get settled() {
1360
+ return settled;
1361
+ },
1362
+ get originalRequest() {
1363
+ return _originalRequest;
1364
+ },
1365
+ get response() {
1366
+ return _response;
1367
+ },
1368
+ setOriginalRequest(req) {
1369
+ _originalRequest = req;
1370
+ emit({
1371
+ type: "updated",
1372
+ context: ctx,
1373
+ field: "originalRequest"
1374
+ });
1375
+ },
1376
+ transition(newState) {
1377
+ const previousState = _state;
1378
+ _state = newState;
1379
+ emit({
1380
+ type: "state_changed",
1381
+ context: ctx,
1382
+ previousState
1383
+ });
1384
+ },
1385
+ complete(response) {
1386
+ if (settled) return;
1387
+ settled = true;
1388
+ _response = response;
1389
+ _state = "completed";
1390
+ emit({
1391
+ type: "completed",
1392
+ context: ctx,
1393
+ entry: ctx.toHistoryEntry()
1394
+ });
1395
+ },
1396
+ fail(model, error) {
1397
+ if (settled) return;
1398
+ settled = true;
1399
+ _response = {
1400
+ success: false,
1401
+ model,
1402
+ usage: {
1403
+ input_tokens: 0,
1404
+ output_tokens: 0
1405
+ },
1406
+ error: error instanceof Error ? error.message : String(error),
1407
+ content: null
1408
+ };
1409
+ _state = "failed";
1410
+ emit({
1411
+ type: "failed",
1412
+ context: ctx,
1413
+ entry: ctx.toHistoryEntry()
1414
+ });
1415
+ },
1416
+ toHistoryEntry() {
1417
+ const entry = {
1418
+ id,
1419
+ endpoint: opts.endpoint,
1420
+ timestamp: startTime,
1421
+ durationMs: Date.now() - startTime,
1422
+ request: {
1423
+ model: _originalRequest?.model,
1424
+ messages: _originalRequest?.messages,
1425
+ stream: _originalRequest?.stream,
1426
+ tools: _originalRequest?.tools,
1427
+ system: _originalRequest?.system
1428
+ }
1429
+ };
1430
+ if (_response) entry.response = _response;
1431
+ return entry;
1432
+ }
1433
+ };
1434
+ return ctx;
1435
+ }
1436
+
1437
+ //#endregion
1438
+ //#region src/lib/context/manager.ts
1439
+ /**
1440
+ * RequestContextManager — Active request management
1441
+ *
1442
+ * Manages all in-flight RequestContext instances. Publishes events for
1443
+ * WebSocket push and history persistence.
1444
+ */
1445
+ let _manager = null;
1446
+ function initRequestContextManager(staleMaxAgeSec) {
1447
+ _manager = createRequestContextManager(staleMaxAgeSec);
1448
+ return _manager;
1449
+ }
1450
+ const REAPER_INTERVAL_MS = 6e4;
1451
+ const DEFAULT_STALE_MAX_AGE_SEC = 600;
1452
+ function createRequestContextManager(staleMaxAgeSec) {
1453
+ const maxAgeSec = staleMaxAgeSec ?? DEFAULT_STALE_MAX_AGE_SEC;
1454
+ const activeContexts = /* @__PURE__ */ new Map();
1455
+ const listeners = /* @__PURE__ */ new Set();
1456
+ let reaperTimer = null;
1457
+ function runReaperOnce() {
1458
+ if (maxAgeSec <= 0) return;
1459
+ const maxAgeMs = maxAgeSec * 1e3;
1460
+ for (const [id, ctx] of activeContexts) if (ctx.durationMs > maxAgeMs) {
1461
+ consola.warn(`[context] Force-failing stale request ${id} (endpoint: ${ctx.endpoint}, model: ${ctx.originalRequest?.model ?? "unknown"}, state: ${ctx.state}, age: ${Math.round(ctx.durationMs / 1e3)}s, max: ${maxAgeSec}s)`);
1462
+ ctx.fail(ctx.originalRequest?.model ?? "unknown", /* @__PURE__ */ new Error(`Request exceeded maximum age of ${maxAgeSec}s (stale context reaper)`));
1463
+ }
1464
+ }
1465
+ function startReaper() {
1466
+ if (reaperTimer) return;
1467
+ reaperTimer = setInterval(runReaperOnce, REAPER_INTERVAL_MS);
1468
+ }
1469
+ function stopReaper() {
1470
+ if (reaperTimer) {
1471
+ clearInterval(reaperTimer);
1472
+ reaperTimer = null;
1473
+ }
1474
+ }
1475
+ function emit(event) {
1476
+ for (const listener of listeners) try {
1477
+ listener(event);
1478
+ } catch {}
1479
+ }
1480
+ function handleContextEvent(rawEvent) {
1481
+ const { type, context } = rawEvent;
1482
+ switch (type) {
1483
+ case "state_changed":
1484
+ if (rawEvent.previousState) emit({
1485
+ type: "state_changed",
1486
+ context,
1487
+ previousState: rawEvent.previousState
1488
+ });
1489
+ break;
1490
+ case "updated":
1491
+ if (rawEvent.field) emit({
1492
+ type: "updated",
1493
+ context,
1494
+ field: rawEvent.field
1495
+ });
1496
+ break;
1497
+ case "completed":
1498
+ if (rawEvent.entry) emit({
1499
+ type: "completed",
1500
+ context,
1501
+ entry: rawEvent.entry
1502
+ });
1503
+ activeContexts.delete(context.id);
1504
+ break;
1505
+ case "failed":
1506
+ if (rawEvent.entry) emit({
1507
+ type: "failed",
1508
+ context,
1509
+ entry: rawEvent.entry
1510
+ });
1511
+ activeContexts.delete(context.id);
1512
+ break;
1513
+ default: break;
1514
+ }
1515
+ }
1516
+ return {
1517
+ create(opts) {
1518
+ const ctx = createRequestContext({
1519
+ endpoint: opts.endpoint,
1520
+ tuiLogId: opts.tuiLogId,
1521
+ onEvent: handleContextEvent
1522
+ });
1523
+ activeContexts.set(ctx.id, ctx);
1524
+ emit({
1525
+ type: "created",
1526
+ context: ctx
1527
+ });
1528
+ return ctx;
1529
+ },
1530
+ get(id) {
1531
+ return activeContexts.get(id);
1532
+ },
1533
+ getAll() {
1534
+ return Array.from(activeContexts.values());
1535
+ },
1536
+ get activeCount() {
1537
+ return activeContexts.size;
1538
+ },
1539
+ on(_event, listener) {
1540
+ listeners.add(listener);
1541
+ },
1542
+ off(_event, listener) {
1543
+ listeners.delete(listener);
1544
+ },
1545
+ startReaper,
1546
+ stopReaper,
1547
+ _runReaperOnce: runReaperOnce
1548
+ };
1549
+ }
1550
+
1551
+ //#endregion
1552
+ //#region src/lib/history-ws.ts
1553
+ /**
1554
+ * WebSocket support for History API.
1555
+ * Enables real-time updates when new requests are recorded.
1556
+ */
1557
+ const clients = /* @__PURE__ */ new Set();
1558
+ function getClientCount() {
1559
+ return clients.size;
1560
+ }
1561
+ function closeAllClients() {
1562
+ for (const client of clients) try {
1563
+ client.close(1001, "Server shutting down");
1564
+ } catch {}
1565
+ clients.clear();
1566
+ }
1567
+ function broadcast(message) {
1568
+ const data = JSON.stringify(message);
1569
+ for (const client of clients) try {
1570
+ if (client.readyState === WebSocket.OPEN) client.send(data);
1571
+ else clients.delete(client);
1572
+ } catch (error) {
1573
+ consola.debug("WebSocket send failed, removing client:", error);
1574
+ clients.delete(client);
1575
+ }
1576
+ }
1577
+ function notifyEntryAdded(summary) {
1578
+ if (clients.size === 0) return;
1579
+ broadcast({
1580
+ type: "entry_added",
1581
+ data: summary,
1582
+ timestamp: Date.now()
1583
+ });
1584
+ }
1585
+ function notifyEntryUpdated(summary) {
1586
+ if (clients.size === 0) return;
1587
+ broadcast({
1588
+ type: "entry_updated",
1589
+ data: summary,
1590
+ timestamp: Date.now()
1591
+ });
1592
+ }
1593
+ function notifyStatsUpdated(stats) {
1594
+ if (clients.size === 0) return;
1595
+ broadcast({
1596
+ type: "stats_updated",
1597
+ data: stats,
1598
+ timestamp: Date.now()
1599
+ });
1600
+ }
1601
+ function notifyHistoryCleared() {
1602
+ if (clients.size === 0) return;
1603
+ broadcast({
1604
+ type: "history_cleared",
1605
+ data: null,
1606
+ timestamp: Date.now()
1607
+ });
1608
+ }
1609
+ function notifySessionDeleted(sessionId) {
1610
+ if (clients.size === 0) return;
1611
+ broadcast({
1612
+ type: "session_deleted",
1613
+ data: { sessionId },
1614
+ timestamp: Date.now()
1615
+ });
1616
+ }
1617
+
1313
1618
  //#endregion
1314
1619
  //#region src/lib/history.ts
1315
1620
  function generateId$1() {
@@ -1389,6 +1694,13 @@ function recordRequest(endpoint, request) {
1389
1694
  if (historyState.entries.filter((e) => e.sessionId === removed.sessionId).length === 0) historyState.sessions.delete(removed.sessionId);
1390
1695
  }
1391
1696
  }
1697
+ notifyEntryAdded({
1698
+ id: entry.id,
1699
+ endpoint,
1700
+ model: request.model,
1701
+ stream: request.stream,
1702
+ timestamp: entry.timestamp
1703
+ });
1392
1704
  return entry.id;
1393
1705
  }
1394
1706
  function recordResponse(id, response, durationMs) {
@@ -1403,6 +1715,20 @@ function recordResponse(id, response, durationMs) {
1403
1715
  session.totalOutputTokens += response.usage.output_tokens;
1404
1716
  session.lastActivity = Date.now();
1405
1717
  }
1718
+ notifyEntryUpdated({
1719
+ id: entry.id,
1720
+ endpoint: entry.endpoint,
1721
+ model: response.model,
1722
+ success: response.success,
1723
+ durationMs,
1724
+ inputTokens: response.usage.input_tokens,
1725
+ outputTokens: response.usage.output_tokens
1726
+ });
1727
+ notifyStatsUpdated({
1728
+ totalRequests: historyState.entries.length,
1729
+ totalInputTokens: session?.totalInputTokens ?? 0,
1730
+ totalOutputTokens: session?.totalOutputTokens ?? 0
1731
+ });
1406
1732
  }
1407
1733
  }
1408
1734
  function getHistory(options = {}) {
@@ -1477,12 +1803,14 @@ function clearHistory() {
1477
1803
  historyState.entries = [];
1478
1804
  historyState.sessions = /* @__PURE__ */ new Map();
1479
1805
  historyState.currentSessionId = generateId$1();
1806
+ notifyHistoryCleared();
1480
1807
  }
1481
1808
  function deleteSession(sessionId) {
1482
1809
  if (!historyState.sessions.has(sessionId)) return false;
1483
1810
  historyState.entries = historyState.entries.filter((e) => e.sessionId !== sessionId);
1484
1811
  historyState.sessions.delete(sessionId);
1485
1812
  if (historyState.currentSessionId === sessionId) historyState.currentSessionId = generateId$1();
1813
+ notifySessionDeleted(sessionId);
1486
1814
  return true;
1487
1815
  }
1488
1816
  function getStats() {
@@ -1533,6 +1861,37 @@ function getStats() {
1533
1861
  activeSessions
1534
1862
  };
1535
1863
  }
1864
+ function getTokenStats() {
1865
+ const models = {};
1866
+ const timeline = [];
1867
+ for (const entry of historyState.entries) {
1868
+ if (!entry.response) continue;
1869
+ const model = entry.response.model || entry.request.model;
1870
+ const inputTokens = entry.response.usage.input_tokens;
1871
+ const outputTokens = entry.response.usage.output_tokens;
1872
+ const existing = models[model];
1873
+ if (existing) {
1874
+ existing.inputTokens += inputTokens;
1875
+ existing.outputTokens += outputTokens;
1876
+ existing.requestCount++;
1877
+ } else models[model] = {
1878
+ inputTokens,
1879
+ outputTokens,
1880
+ requestCount: 1
1881
+ };
1882
+ timeline.push({
1883
+ timestamp: entry.timestamp,
1884
+ model,
1885
+ inputTokens,
1886
+ outputTokens
1887
+ });
1888
+ }
1889
+ timeline.sort((a, b) => a.timestamp - b.timestamp);
1890
+ return {
1891
+ models,
1892
+ timeline
1893
+ };
1894
+ }
1536
1895
  function exportHistory(format = "json") {
1537
1896
  if (format === "json") return JSON.stringify({
1538
1897
  sessions: Array.from(historyState.sessions.values()),
@@ -1700,6 +2059,142 @@ function generateEnvScript(envVars, commandToRun = "") {
1700
2059
  return commandBlock || commandToRun;
1701
2060
  }
1702
2061
 
2062
+ //#endregion
2063
+ //#region src/lib/shutdown.ts
2064
+ const DRAIN_POLL_INTERVAL_MS = 500;
2065
+ const DRAIN_PROGRESS_INTERVAL_MS = 5e3;
2066
+ let serverInstance = null;
2067
+ let _isShuttingDown = false;
2068
+ let shutdownResolve = null;
2069
+ let shutdownAbortController = null;
2070
+ function getIsShuttingDown() {
2071
+ return _isShuttingDown;
2072
+ }
2073
+ function setServerInstance(server) {
2074
+ serverInstance = server;
2075
+ }
2076
+ function formatActiveRequestsSummary(requests) {
2077
+ const now = Date.now();
2078
+ const lines = requests.map((req) => {
2079
+ const age = Math.round((now - req.startTime) / 1e3);
2080
+ const model = req.model || "unknown";
2081
+ const tags = req.tags?.length ? ` [${req.tags.join(", ")}]` : "";
2082
+ return ` ${req.method} ${req.path} ${model} (${req.status}, ${age}s)${tags}`;
2083
+ });
2084
+ return `Waiting for ${requests.length} active request(s):\n${lines.join("\n")}`;
2085
+ }
2086
+ async function drainActiveRequests(timeoutMs, tracker, opts) {
2087
+ const pollInterval = opts?.pollIntervalMs ?? DRAIN_POLL_INTERVAL_MS;
2088
+ const progressInterval = opts?.progressIntervalMs ?? DRAIN_PROGRESS_INTERVAL_MS;
2089
+ const deadline = Date.now() + timeoutMs;
2090
+ let lastProgressLog = 0;
2091
+ while (Date.now() < deadline) {
2092
+ const active = tracker.getActiveRequests();
2093
+ if (active.length === 0) return "drained";
2094
+ const now = Date.now();
2095
+ if (now - lastProgressLog >= progressInterval) {
2096
+ lastProgressLog = now;
2097
+ consola.info(formatActiveRequestsSummary(active));
2098
+ }
2099
+ await new Promise((resolve) => setTimeout(resolve, pollInterval));
2100
+ }
2101
+ return "timeout";
2102
+ }
2103
+ async function gracefulShutdown(signal, deps) {
2104
+ const tracker = deps?.tracker;
2105
+ const server = deps?.server ?? serverInstance;
2106
+ const rateLimiter = deps?.rateLimiter !== void 0 ? deps.rateLimiter : getAdaptiveRateLimiter();
2107
+ const stopRefresh = deps?.stopTokenRefreshFn ?? (() => {});
2108
+ const closeWsClients = deps?.closeAllClientsFn ?? closeAllClients;
2109
+ const getWsCount = deps?.getClientCountFn ?? getClientCount;
2110
+ const gracefulWaitMs = deps?.gracefulWaitMs ?? state.shutdownGracefulWait * 1e3;
2111
+ const abortWaitMs = deps?.abortWaitMs ?? state.shutdownAbortWait * 1e3;
2112
+ const drainOpts = {
2113
+ pollIntervalMs: deps?.drainPollIntervalMs ?? DRAIN_POLL_INTERVAL_MS,
2114
+ progressIntervalMs: deps?.drainProgressIntervalMs ?? DRAIN_PROGRESS_INTERVAL_MS
2115
+ };
2116
+ _isShuttingDown = true;
2117
+ shutdownAbortController = new AbortController();
2118
+ consola.info(`Received ${signal}, shutting down gracefully...`);
2119
+ try {
2120
+ deps?.contextManager?.stopReaper();
2121
+ } catch {}
2122
+ stopRefresh();
2123
+ const wsClients = getWsCount();
2124
+ if (wsClients > 0) {
2125
+ closeWsClients();
2126
+ consola.info(`Disconnected ${wsClients} WebSocket client(s)`);
2127
+ }
2128
+ if (rateLimiter) {
2129
+ const rejected = rateLimiter.rejectQueued();
2130
+ if (rejected > 0) consola.info(`Rejected ${rejected} queued request(s) from rate limiter`);
2131
+ }
2132
+ if (server) {
2133
+ server.close(false).catch((error) => {
2134
+ consola.error("Error stopping listener:", error);
2135
+ });
2136
+ consola.info("Stopped accepting new connections");
2137
+ }
2138
+ if (tracker) {
2139
+ const activeCount = tracker.getActiveRequests().length;
2140
+ if (activeCount > 0) {
2141
+ consola.info(`Phase 2: Waiting up to ${gracefulWaitMs / 1e3}s for ${activeCount} active request(s)...`);
2142
+ try {
2143
+ if (await drainActiveRequests(gracefulWaitMs, tracker, drainOpts) === "drained") {
2144
+ consola.info("All requests completed naturally");
2145
+ finalize(tracker);
2146
+ return;
2147
+ }
2148
+ } catch (error) {
2149
+ consola.error("Error during Phase 2 drain:", error);
2150
+ }
2151
+ const remaining = tracker.getActiveRequests().length;
2152
+ consola.info(`Phase 3: Sending abort signal to ${remaining} remaining request(s), waiting up to ${abortWaitMs / 1e3}s...`);
2153
+ shutdownAbortController.abort();
2154
+ try {
2155
+ if (await drainActiveRequests(abortWaitMs, tracker, drainOpts) === "drained") {
2156
+ consola.info("All requests completed after abort signal");
2157
+ finalize(tracker);
2158
+ return;
2159
+ }
2160
+ } catch (error) {
2161
+ consola.error("Error during Phase 3 drain:", error);
2162
+ }
2163
+ const forceRemaining = tracker.getActiveRequests().length;
2164
+ consola.warn(`Phase 4: Force-closing ${forceRemaining} remaining request(s)`);
2165
+ if (server) try {
2166
+ await server.close(true);
2167
+ } catch (error) {
2168
+ consola.error("Error force-closing server:", error);
2169
+ }
2170
+ }
2171
+ finalize(tracker);
2172
+ } else {
2173
+ consola.info("Shutdown complete");
2174
+ shutdownResolve?.();
2175
+ }
2176
+ }
2177
+ function finalize(tracker) {
2178
+ tracker.destroy();
2179
+ consola.info("Shutdown complete");
2180
+ shutdownResolve?.();
2181
+ }
2182
+ function setupShutdownHandlers() {
2183
+ const handler = (signal) => {
2184
+ if (_isShuttingDown) {
2185
+ consola.warn("Second signal received, forcing immediate exit");
2186
+ process.exit(1);
2187
+ }
2188
+ gracefulShutdown(signal).catch((error) => {
2189
+ consola.error("Fatal error during shutdown:", error);
2190
+ shutdownResolve?.();
2191
+ process.exit(1);
2192
+ });
2193
+ };
2194
+ process.on("SIGINT", () => handler("SIGINT"));
2195
+ process.on("SIGTERM", () => handler("SIGTERM"));
2196
+ }
2197
+
1703
2198
  //#endregion
1704
2199
  //#region src/lib/tui/console-renderer.ts
1705
2200
  const CLEAR_LINE = "\x1B[2K\r";
@@ -2060,6 +2555,7 @@ const requestTracker = new RequestTracker();
2060
2555
  */
2061
2556
  function tuiLogger() {
2062
2557
  return async (c, next) => {
2558
+ if (getIsShuttingDown()) return c.json({ error: "Server is shutting down" }, 503);
2063
2559
  const method = c.req.method;
2064
2560
  const path = c.req.path;
2065
2561
  const isHistoryAccess = path.startsWith("/history");
@@ -2783,6 +3279,127 @@ function createTruncationResponseMarkerOpenAI(result) {
2783
3279
  return `\n\n---\n[Auto-truncated: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
2784
3280
  }
2785
3281
 
3282
+ //#endregion
3283
+ //#region src/lib/repetition-detector.ts
3284
+ /**
3285
+ * Stream repetition detector.
3286
+ *
3287
+ * Uses the KMP failure function (prefix function) to detect repeated patterns
3288
+ * in streaming text output. When a model gets stuck in a repetitive loop,
3289
+ * it wastes tokens producing the same content over and over. This detector
3290
+ * identifies such loops early so the caller can take action (log warning,
3291
+ * abort stream, etc.).
3292
+ *
3293
+ * The algorithm works by maintaining a sliding buffer of recent text and
3294
+ * computing the longest proper prefix that is also a suffix — if this
3295
+ * length exceeds `(text.length - period) >= minRepetitions * period`,
3296
+ * it means a pattern of length `period` has repeated enough times.
3297
+ */
3298
+ const DEFAULT_CONFIG = {
3299
+ minPatternLength: 10,
3300
+ minRepetitions: 3,
3301
+ maxBufferSize: 5e3
3302
+ };
3303
+ var RepetitionDetector = class {
3304
+ buffer = "";
3305
+ config;
3306
+ detected = false;
3307
+ constructor(config) {
3308
+ this.config = {
3309
+ ...DEFAULT_CONFIG,
3310
+ ...config
3311
+ };
3312
+ }
3313
+ /**
3314
+ * Feed a text chunk into the detector.
3315
+ * Returns `true` if repetition has been detected (now or previously).
3316
+ * Once detected, subsequent calls return `true` without further analysis.
3317
+ */
3318
+ feed(text) {
3319
+ if (this.detected) return true;
3320
+ if (!text) return false;
3321
+ this.buffer += text;
3322
+ if (this.buffer.length > this.config.maxBufferSize) this.buffer = this.buffer.slice(-this.config.maxBufferSize);
3323
+ const minRequired = this.config.minPatternLength * this.config.minRepetitions;
3324
+ if (this.buffer.length < minRequired) return false;
3325
+ this.detected = detectRepetition(this.buffer, this.config.minPatternLength, this.config.minRepetitions);
3326
+ return this.detected;
3327
+ }
3328
+ /** Reset detector state for a new stream */
3329
+ reset() {
3330
+ this.buffer = "";
3331
+ this.detected = false;
3332
+ }
3333
+ /** Whether repetition has been detected */
3334
+ get isDetected() {
3335
+ return this.detected;
3336
+ }
3337
+ };
3338
+ /**
3339
+ * Detect if the tail of `text` contains a repeating pattern.
3340
+ *
3341
+ * Uses the KMP prefix function: for a string S, the prefix function π[i]
3342
+ * gives the length of the longest proper prefix of S[0..i] that is also
3343
+ * a suffix. If Ļ€[n-1] ≄ (n - period) where period = n - Ļ€[n-1], then
3344
+ * the string is composed of a repeating unit of length `period`.
3345
+ *
3346
+ * We check the suffix of the buffer (last `checkLength` chars) to detect
3347
+ * if a pattern of at least `minPatternLength` chars repeats at least
3348
+ * `minRepetitions` times.
3349
+ */
3350
+ function detectRepetition(text, minPatternLength, minRepetitions) {
3351
+ const minWindow = minPatternLength * minRepetitions;
3352
+ const maxWindow = Math.min(text.length, 2e3);
3353
+ const windowSizes = [
3354
+ minWindow,
3355
+ Math.floor(maxWindow * .5),
3356
+ maxWindow
3357
+ ].filter((w) => w >= minWindow && w <= text.length);
3358
+ for (const windowSize of windowSizes) {
3359
+ const window = text.slice(-windowSize);
3360
+ const period = findRepeatingPeriod(window);
3361
+ if (period >= minPatternLength) {
3362
+ if (Math.floor(window.length / period) >= minRepetitions) return true;
3363
+ }
3364
+ }
3365
+ return false;
3366
+ }
3367
+ /**
3368
+ * Find the shortest repeating period in a string using KMP prefix function.
3369
+ * Returns the period length, or the string length if no repetition found.
3370
+ */
3371
+ function findRepeatingPeriod(s) {
3372
+ const n = s.length;
3373
+ if (n === 0) return 0;
3374
+ const pi = new Int32Array(n);
3375
+ for (let i = 1; i < n; i++) {
3376
+ let j = pi[i - 1] ?? 0;
3377
+ while (j > 0 && s[i] !== s[j]) j = pi[j - 1] ?? 0;
3378
+ if (s[i] === s[j]) j++;
3379
+ pi[i] = j;
3380
+ }
3381
+ const period = n - pi[n - 1];
3382
+ if (period < n && n % period === 0) return period;
3383
+ if (period < n && pi[n - 1] >= period) return period;
3384
+ return n;
3385
+ }
3386
+ /**
3387
+ * Create a repetition detector callback for use in stream processing.
3388
+ * Returns a function that accepts text deltas and logs a warning on first detection.
3389
+ */
3390
+ function createStreamRepetitionChecker(label, config) {
3391
+ const detector = new RepetitionDetector(config);
3392
+ let warned = false;
3393
+ return (textDelta) => {
3394
+ const isRepetitive = detector.feed(textDelta);
3395
+ if (isRepetitive && !warned) {
3396
+ warned = true;
3397
+ consola.warn(`[RepetitionDetector] ${label}: Repetitive output detected in stream`);
3398
+ }
3399
+ return isRepetitive;
3400
+ };
3401
+ }
3402
+
2786
3403
  //#endregion
2787
3404
  //#region src/services/copilot/create-chat-completions.ts
2788
3405
  const createChatCompletions = async (payload, options) => {
@@ -3119,6 +3736,7 @@ function createStreamAccumulator() {
3119
3736
  async function handleStreamingResponse$1(opts) {
3120
3737
  const { stream, response, payload, ctx } = opts;
3121
3738
  const acc = createStreamAccumulator();
3739
+ const checkRepetition = createStreamRepetitionChecker(`openai:${payload.model}`);
3122
3740
  try {
3123
3741
  if (state.verbose && ctx.truncateResult?.wasCompacted) {
3124
3742
  const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
@@ -3142,7 +3760,7 @@ async function handleStreamingResponse$1(opts) {
3142
3760
  }
3143
3761
  for await (const chunk of response) {
3144
3762
  consola.debug("Streaming chunk:", JSON.stringify(chunk));
3145
- parseStreamChunk(chunk, acc);
3763
+ parseStreamChunk(chunk, acc, checkRepetition);
3146
3764
  await stream.writeSSE(chunk);
3147
3765
  }
3148
3766
  recordStreamSuccess(acc, payload.model, ctx);
@@ -3158,7 +3776,7 @@ async function handleStreamingResponse$1(opts) {
3158
3776
  throw error;
3159
3777
  }
3160
3778
  }
3161
- function parseStreamChunk(chunk, acc) {
3779
+ function parseStreamChunk(chunk, acc, checkRepetition) {
3162
3780
  if (!chunk.data || chunk.data === "[DONE]") return;
3163
3781
  try {
3164
3782
  const parsed = JSON.parse(chunk.data);
@@ -3169,7 +3787,10 @@ function parseStreamChunk(chunk, acc) {
3169
3787
  }
3170
3788
  const choice = parsed.choices[0];
3171
3789
  if (choice) {
3172
- if (choice.delta.content) acc.content += choice.delta.content;
3790
+ if (choice.delta.content) {
3791
+ acc.content += choice.delta.content;
3792
+ checkRepetition(choice.delta.content);
3793
+ }
3173
3794
  if (choice.delta.tool_calls) for (const tc of choice.delta.tool_calls) {
3174
3795
  const idx = tc.index;
3175
3796
  if (!acc.toolCallMap.has(idx)) acc.toolCallMap.set(idx, {
@@ -3360,6 +3981,11 @@ function handleDeleteSession(c) {
3360
3981
  message: "Session deleted"
3361
3982
  });
3362
3983
  }
3984
+ function handleGetTokenStats(c) {
3985
+ if (!isHistoryEnabled()) return c.json({ error: "History recording is not enabled" }, 400);
3986
+ const stats = getTokenStats();
3987
+ return c.json(stats);
3988
+ }
3363
3989
 
3364
3990
  //#endregion
3365
3991
  //#region src/routes/history/ui/script.ts
@@ -3903,11 +4529,274 @@ document.addEventListener('keydown', (e) => {
3903
4529
  }
3904
4530
  });
3905
4531
 
3906
- // Auto-refresh every 10 seconds
3907
- setInterval(() => {
4532
+ // Auto-refresh every 10 seconds (fallback when WebSocket is not available)
4533
+ let autoRefreshTimer = setInterval(() => {
3908
4534
  loadStats();
3909
4535
  loadSessions();
3910
4536
  }, 10000);
4537
+
4538
+ // WebSocket real-time updates
4539
+ let reconnectAttempts = 0;
4540
+ function connectWebSocket() {
4541
+ const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
4542
+ const wsUrl = protocol + '//' + window.location.host + '/history/ws';
4543
+
4544
+ try {
4545
+ const ws = new WebSocket(wsUrl);
4546
+
4547
+ ws.onopen = function() {
4548
+ console.log('[History] WebSocket connected');
4549
+ reconnectAttempts = 0;
4550
+ // Disable polling when WS is active
4551
+ clearInterval(autoRefreshTimer);
4552
+ };
4553
+
4554
+ ws.onmessage = function(event) {
4555
+ try {
4556
+ const msg = JSON.parse(event.data);
4557
+ switch (msg.type) {
4558
+ case 'entry_added':
4559
+ case 'entry_updated':
4560
+ loadSessions();
4561
+ loadStats();
4562
+ break;
4563
+ case 'stats_updated':
4564
+ loadStats();
4565
+ break;
4566
+ case 'history_cleared':
4567
+ case 'session_deleted':
4568
+ loadSessions();
4569
+ loadStats();
4570
+ break;
4571
+ }
4572
+ } catch (e) {
4573
+ console.warn('[History] Failed to parse WS message:', e);
4574
+ }
4575
+ };
4576
+
4577
+ ws.onclose = function() {
4578
+ console.log('[History] WebSocket disconnected, falling back to polling');
4579
+ // Re-enable polling as fallback (clear first to avoid duplicates)
4580
+ clearInterval(autoRefreshTimer);
4581
+ autoRefreshTimer = setInterval(() => {
4582
+ loadStats();
4583
+ loadSessions();
4584
+ }, 10000);
4585
+ // Reconnect with exponential backoff, max 10 attempts
4586
+ if (reconnectAttempts < 10) {
4587
+ const delay = Math.min(5000 * Math.pow(2, reconnectAttempts), 60000);
4588
+ reconnectAttempts++;
4589
+ setTimeout(connectWebSocket, delay);
4590
+ }
4591
+ };
4592
+
4593
+ ws.onerror = function() {
4594
+ // Will trigger onclose
4595
+ };
4596
+ } catch (e) {
4597
+ console.warn('[History] WebSocket not available:', e);
4598
+ }
4599
+ }
4600
+
4601
+ // Start WebSocket connection
4602
+ connectWebSocket();
4603
+
4604
+ // Tab switching
4605
+ function switchTab(tab) {
4606
+ document.querySelectorAll('.tab-item').forEach(t => t.classList.remove('active'));
4607
+ document.querySelector('.tab-item[data-tab="' + tab + '"]').classList.add('active');
4608
+
4609
+ document.querySelectorAll('.tab-panel').forEach(p => p.style.display = 'none');
4610
+ const panel = document.getElementById('tab-' + tab);
4611
+ panel.style.display = tab === 'requests' ? 'flex' : 'block';
4612
+
4613
+ if (tab === 'tokens') {
4614
+ panel.setAttribute('data-loaded', 'true');
4615
+ loadTokenStats();
4616
+ }
4617
+ }
4618
+
4619
+ async function loadTokenStats() {
4620
+ const container = document.getElementById('tokens-table-container');
4621
+ container.innerHTML = '<div class="loading">Loading...</div>';
4622
+
4623
+ try {
4624
+ const res = await fetch('/history/api/token-stats');
4625
+ const data = await res.json();
4626
+ if (data.error) {
4627
+ container.innerHTML = '<div class="empty-state"><h3>History Not Enabled</h3><p>Start server with --history</p></div>';
4628
+ return;
4629
+ }
4630
+
4631
+ const modelNames = Object.keys(data.models);
4632
+ if (modelNames.length === 0) {
4633
+ container.innerHTML = '<div class="empty-state"><h3>No token data</h3><p>Make some API requests first</p></div>';
4634
+ document.getElementById('chart-fallback').style.display = 'block';
4635
+ document.getElementById('chart-fallback').textContent = 'No data available for chart.';
4636
+ return;
4637
+ }
4638
+
4639
+ // Sort models by total tokens descending
4640
+ modelNames.sort((a, b) => {
4641
+ const totalA = data.models[a].inputTokens + data.models[a].outputTokens;
4642
+ const totalB = data.models[b].inputTokens + data.models[b].outputTokens;
4643
+ return totalB - totalA;
4644
+ });
4645
+
4646
+ // Reset chart fallback state
4647
+ document.getElementById('chart-fallback').style.display = 'none';
4648
+ document.getElementById('token-chart').style.display = '';
4649
+
4650
+ // Render table
4651
+ let totalInput = 0, totalOutput = 0, totalReqs = 0;
4652
+ let rows = '';
4653
+ for (const model of modelNames) {
4654
+ const m = data.models[model];
4655
+ const total = m.inputTokens + m.outputTokens;
4656
+ totalInput += m.inputTokens;
4657
+ totalOutput += m.outputTokens;
4658
+ totalReqs += m.requestCount;
4659
+ rows += '<tr>'
4660
+ + '<td>' + escapeHtml(model) + '</td>'
4661
+ + '<td class="number">' + formatNumber(m.inputTokens) + '</td>'
4662
+ + '<td class="number">' + formatNumber(m.outputTokens) + '</td>'
4663
+ + '<td class="number">' + formatNumber(total) + '</td>'
4664
+ + '<td class="number">' + m.requestCount + '</td>'
4665
+ + '</tr>';
4666
+ }
4667
+
4668
+ container.innerHTML = '<table class="tokens-table">'
4669
+ + '<thead><tr><th>Model</th><th class="number">Input Tokens</th><th class="number">Output Tokens</th><th class="number">Total Tokens</th><th class="number">Requests</th></tr></thead>'
4670
+ + '<tbody>' + rows + '</tbody>'
4671
+ + '<tfoot><tr><td>Total</td>'
4672
+ + '<td class="number">' + formatNumber(totalInput) + '</td>'
4673
+ + '<td class="number">' + formatNumber(totalOutput) + '</td>'
4674
+ + '<td class="number">' + formatNumber(totalInput + totalOutput) + '</td>'
4675
+ + '<td class="number">' + totalReqs + '</td>'
4676
+ + '</tr></tfoot></table>';
4677
+
4678
+ // Render chart
4679
+ renderTokenChart(data.timeline, modelNames);
4680
+ } catch (e) {
4681
+ container.innerHTML = '<div class="empty-state">Error: ' + e.message + '</div>';
4682
+ }
4683
+ }
4684
+
4685
+ let tokenChart = null;
4686
+ let tokenChartListenersAdded = false;
4687
+
4688
+ function renderTokenChart(timeline, modelNames) {
4689
+ if (typeof echarts === 'undefined') {
4690
+ document.getElementById('chart-fallback').style.display = 'block';
4691
+ document.getElementById('token-chart').style.display = 'none';
4692
+ return;
4693
+ }
4694
+
4695
+ const chartDom = document.getElementById('token-chart');
4696
+ const isDark = window.matchMedia('(prefers-color-scheme: dark)').matches;
4697
+
4698
+ if (tokenChart) {
4699
+ tokenChart.dispose();
4700
+ }
4701
+ tokenChart = echarts.init(chartDom, isDark ? 'dark' : null);
4702
+
4703
+ // Group timeline by model and compute cumulative totals
4704
+ const seriesData = {};
4705
+ for (const name of modelNames) {
4706
+ seriesData[name] = [];
4707
+ }
4708
+
4709
+ // Build cumulative data per model (timeline is already sorted by backend)
4710
+ const cumulative = {};
4711
+ for (const name of modelNames) {
4712
+ cumulative[name] = 0;
4713
+ }
4714
+
4715
+ for (const point of timeline) {
4716
+ const total = point.inputTokens + point.outputTokens;
4717
+ cumulative[point.model] += total;
4718
+ seriesData[point.model].push([point.timestamp, cumulative[point.model]]);
4719
+ }
4720
+
4721
+ const colors = ['#58a6ff', '#3fb950', '#f85149', '#d29922', '#a371f7', '#39c5cf', '#f778ba', '#79c0ff', '#7ee787', '#ffa657'];
4722
+
4723
+ const series = modelNames.map((name, i) => ({
4724
+ name: name,
4725
+ type: 'line',
4726
+ data: seriesData[name],
4727
+ smooth: true,
4728
+ symbol: 'circle',
4729
+ symbolSize: 4,
4730
+ lineStyle: { width: 2 },
4731
+ itemStyle: { color: colors[i % colors.length] },
4732
+ areaStyle: { opacity: 0.05 },
4733
+ }));
4734
+
4735
+ const style = getComputedStyle(document.documentElement);
4736
+ const textColor = style.getPropertyValue('--text').trim();
4737
+ const borderColor = style.getPropertyValue('--border').trim();
4738
+ const bgColor = style.getPropertyValue('--bg').trim();
4739
+
4740
+ const option = {
4741
+ backgroundColor: 'transparent',
4742
+ tooltip: {
4743
+ trigger: 'item',
4744
+ backgroundColor: bgColor,
4745
+ borderColor: borderColor,
4746
+ textStyle: { color: textColor, fontSize: 12 },
4747
+ formatter: function(params) {
4748
+ const d = new Date(params.data[0]);
4749
+ const time = d.toLocaleDateString() + ' ' + d.toLocaleTimeString();
4750
+ return '<b>' + params.seriesName + '</b><br/>'
4751
+ + time + '<br/>'
4752
+ + 'Cumulative: ' + formatNumber(params.data[1]) + ' tokens';
4753
+ }
4754
+ },
4755
+ legend: {
4756
+ data: modelNames,
4757
+ textStyle: { color: textColor, fontSize: 12 },
4758
+ top: 0,
4759
+ },
4760
+ grid: {
4761
+ left: 60,
4762
+ right: 20,
4763
+ top: 40,
4764
+ bottom: 40,
4765
+ },
4766
+ xAxis: {
4767
+ type: 'time',
4768
+ axisLine: { lineStyle: { color: borderColor } },
4769
+ axisLabel: { color: textColor, fontSize: 11 },
4770
+ splitLine: { show: false },
4771
+ },
4772
+ yAxis: {
4773
+ type: 'value',
4774
+ axisLine: { lineStyle: { color: borderColor } },
4775
+ axisLabel: {
4776
+ color: textColor,
4777
+ fontSize: 11,
4778
+ formatter: function(v) { return formatNumber(v); }
4779
+ },
4780
+ splitLine: { lineStyle: { color: borderColor, opacity: 0.3 } },
4781
+ },
4782
+ series: series,
4783
+ };
4784
+
4785
+ tokenChart.setOption(option);
4786
+
4787
+ // Add global listeners only once
4788
+ if (!tokenChartListenersAdded) {
4789
+ tokenChartListenersAdded = true;
4790
+ window.addEventListener('resize', function() {
4791
+ if (tokenChart) tokenChart.resize();
4792
+ });
4793
+ window.matchMedia('(prefers-color-scheme: dark)').addEventListener('change', function() {
4794
+ if (document.getElementById('tab-tokens').getAttribute('data-loaded') === 'true') {
4795
+ loadTokenStats();
4796
+ }
4797
+ });
4798
+ }
4799
+ }
3911
4800
  `;
3912
4801
 
3913
4802
  //#endregion
@@ -3948,10 +4837,39 @@ body {
3948
4837
  color: var(--text);
3949
4838
  line-height: 1.4;
3950
4839
  font-size: 13px;
4840
+ height: 100vh;
4841
+ display: flex;
4842
+ flex-direction: column;
4843
+ }
4844
+
4845
+ /* Tab bar */
4846
+ .tab-bar {
4847
+ display: flex;
4848
+ gap: 0;
4849
+ border-bottom: 1px solid var(--border);
4850
+ background: var(--bg-secondary);
4851
+ padding: 0 16px;
4852
+ flex-shrink: 0;
4853
+ }
4854
+ .tab-item {
4855
+ padding: 10px 20px;
4856
+ cursor: pointer;
4857
+ font-size: 13px;
4858
+ font-weight: 500;
4859
+ color: var(--text-muted);
4860
+ border-bottom: 2px solid transparent;
4861
+ transition: all 0.15s;
4862
+ user-select: none;
4863
+ }
4864
+ .tab-item:hover { color: var(--text); }
4865
+ .tab-item.active {
4866
+ color: var(--primary);
4867
+ border-bottom-color: var(--primary);
3951
4868
  }
4869
+ .tab-panel { flex: 1; overflow: hidden; }
3952
4870
 
3953
4871
  /* Layout */
3954
- .layout { display: flex; height: 100vh; }
4872
+ .layout { display: flex; height: 100%; }
3955
4873
  .sidebar {
3956
4874
  width: 280px;
3957
4875
  border-right: 1px solid var(--border);
@@ -4286,11 +5204,67 @@ input::placeholder { color: var(--text-dim); }
4286
5204
  white-space: pre-wrap;
4287
5205
  word-break: break-word;
4288
5206
  }
5207
+
5208
+ /* Tokens tab */
5209
+ .tokens-container {
5210
+ height: 100%;
5211
+ display: flex;
5212
+ flex-direction: column;
5213
+ overflow-y: auto;
5214
+ }
5215
+ .tokens-header {
5216
+ padding: 12px 16px;
5217
+ border-bottom: 1px solid var(--border);
5218
+ background: var(--bg-secondary);
5219
+ }
5220
+ .tokens-header h1 { font-size: 16px; font-weight: 600; }
5221
+ .tokens-table {
5222
+ width: 100%;
5223
+ border-collapse: collapse;
5224
+ font-size: 13px;
5225
+ }
5226
+ .tokens-table th {
5227
+ text-align: left;
5228
+ padding: 10px 16px;
5229
+ border-bottom: 2px solid var(--border);
5230
+ color: var(--text-muted);
5231
+ font-size: 11px;
5232
+ text-transform: uppercase;
5233
+ letter-spacing: 0.5px;
5234
+ font-weight: 600;
5235
+ }
5236
+ .tokens-table td {
5237
+ padding: 10px 16px;
5238
+ border-bottom: 1px solid var(--border);
5239
+ }
5240
+ .tokens-table tr:hover td { background: var(--bg-secondary); }
5241
+ .tokens-table .number { text-align: right; font-family: 'SF Mono', Monaco, 'Courier New', monospace; }
5242
+ .tokens-table tfoot td {
5243
+ font-weight: 600;
5244
+ border-top: 2px solid var(--border);
5245
+ }
5246
+ .chart-section { flex: 1; min-height: 0; display: flex; flex-direction: column; padding: 16px; }
5247
+ .chart-title { font-size: 14px; font-weight: 600; margin-bottom: 12px; }
5248
+ .chart-container { flex: 1; min-height: 400px; }
5249
+ .chart-fallback {
5250
+ padding: 40px 20px;
5251
+ text-align: center;
5252
+ color: var(--text-muted);
5253
+ background: var(--bg-secondary);
5254
+ border-radius: 8px;
5255
+ border: 1px solid var(--border);
5256
+ }
4289
5257
  `;
4290
5258
 
4291
5259
  //#endregion
4292
5260
  //#region src/routes/history/ui/template.ts
4293
5261
  const template = `
5262
+ <div class="tab-bar">
5263
+ <div class="tab-item active" onclick="switchTab('requests')" data-tab="requests">Requests</div>
5264
+ <div class="tab-item" onclick="switchTab('tokens')" data-tab="tokens">Tokens</div>
5265
+ </div>
5266
+
5267
+ <div id="tab-requests" class="tab-panel">
4294
5268
  <div class="layout">
4295
5269
  <!-- Sidebar: Sessions -->
4296
5270
  <div class="sidebar">
@@ -4354,6 +5328,25 @@ const template = `
4354
5328
  </div>
4355
5329
  </div>
4356
5330
  </div>
5331
+ </div>
5332
+
5333
+ <div id="tab-tokens" class="tab-panel" style="display:none" data-loaded="false">
5334
+ <div class="tokens-container">
5335
+ <div class="tokens-header">
5336
+ <h1>Token Analytics</h1>
5337
+ </div>
5338
+ <div id="tokens-table-container">
5339
+ <div class="loading">Loading...</div>
5340
+ </div>
5341
+ <div class="chart-section">
5342
+ <h2 class="chart-title">Cumulative Token Usage</h2>
5343
+ <div class="chart-container" id="token-chart"></div>
5344
+ <div class="chart-fallback" id="chart-fallback" style="display:none">
5345
+ ECharts library failed to load. Token chart is unavailable.
5346
+ </div>
5347
+ </div>
5348
+ </div>
5349
+ </div>
4357
5350
 
4358
5351
  <!-- Raw JSON Modal -->
4359
5352
  <div class="modal-overlay" id="raw-modal" onclick="closeRawModal(event)">
@@ -4382,6 +5375,7 @@ function getHistoryUI() {
4382
5375
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
4383
5376
  <title>Copilot API - Request History</title>
4384
5377
  <link rel="icon" href="data:,">
5378
+ <script defer src="https://cdn.jsdelivr.net/npm/echarts@5/dist/echarts.min.js"><\/script>
4385
5379
  <style>${styles}</style>
4386
5380
  </head>
4387
5381
  <body>
@@ -4398,10 +5392,16 @@ historyRoutes.get("/api/entries", handleGetEntries);
4398
5392
  historyRoutes.get("/api/entries/:id", handleGetEntry);
4399
5393
  historyRoutes.delete("/api/entries", handleDeleteEntries);
4400
5394
  historyRoutes.get("/api/stats", handleGetStats);
5395
+ historyRoutes.get("/api/token-stats", handleGetTokenStats);
4401
5396
  historyRoutes.get("/api/export", handleExport);
4402
5397
  historyRoutes.get("/api/sessions", handleGetSessions);
4403
5398
  historyRoutes.get("/api/sessions/:id", handleGetSession);
4404
5399
  historyRoutes.delete("/api/sessions/:id", handleDeleteSession);
5400
+ historyRoutes.get("/ws", (c) => {
5401
+ if (c.req.header("Upgrade") !== "websocket") return c.text("Expected WebSocket upgrade", 426);
5402
+ if (c.env?.server?.upgrade(c.req.raw)) return new Response(null, { status: 101 });
5403
+ return c.text("WebSocket upgrade failed", 500);
5404
+ });
4405
5405
  historyRoutes.get("/", (c) => {
4406
5406
  return c.html(getHistoryUI());
4407
5407
  });
@@ -5853,6 +6853,7 @@ function prependMarkerToAnthropicResponse$1(response, marker) {
5853
6853
  async function handleDirectAnthropicStreamingResponse(opts) {
5854
6854
  const { stream, response, anthropicPayload, ctx } = opts;
5855
6855
  const acc = createAnthropicStreamAccumulator();
6856
+ const checkRepetition = createStreamRepetitionChecker(`anthropic:${anthropicPayload.model}`);
5856
6857
  try {
5857
6858
  for await (const rawEvent of response) {
5858
6859
  consola.debug("Direct Anthropic raw stream event:", JSON.stringify(rawEvent));
@@ -5866,6 +6867,7 @@ async function handleDirectAnthropicStreamingResponse(opts) {
5866
6867
  continue;
5867
6868
  }
5868
6869
  processAnthropicEvent(event, acc);
6870
+ if (event.type === "content_block_delta" && event.delta.type === "text_delta") checkRepetition(event.delta.text);
5869
6871
  await stream.writeSSE({
5870
6872
  event: rawEvent.event || event.type,
5871
6873
  data: rawEvent.data
@@ -6065,6 +7067,7 @@ async function handleStreamingResponse(opts) {
6065
7067
  toolCalls: {}
6066
7068
  };
6067
7069
  const acc = createAnthropicStreamAccumulator();
7070
+ const checkRepetition = createStreamRepetitionChecker(`translated:${anthropicPayload.model}`);
6068
7071
  try {
6069
7072
  if (ctx.truncateResult?.wasCompacted) {
6070
7073
  const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
@@ -6076,7 +7079,8 @@ async function handleStreamingResponse(opts) {
6076
7079
  response,
6077
7080
  toolNameMapping,
6078
7081
  streamState,
6079
- acc
7082
+ acc,
7083
+ checkRepetition
6080
7084
  });
6081
7085
  recordStreamingResponse(acc, anthropicPayload.model, ctx);
6082
7086
  completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
@@ -6132,7 +7136,7 @@ async function sendTruncationMarkerEvent(stream, streamState, marker) {
6132
7136
  streamState.contentBlockIndex++;
6133
7137
  }
6134
7138
  async function processStreamChunks(opts) {
6135
- const { stream, response, toolNameMapping, streamState, acc } = opts;
7139
+ const { stream, response, toolNameMapping, streamState, acc, checkRepetition } = opts;
6136
7140
  for await (const rawEvent of response) {
6137
7141
  consola.debug("Copilot raw stream event:", JSON.stringify(rawEvent));
6138
7142
  if (rawEvent.data === "[DONE]") break;
@@ -6149,6 +7153,7 @@ async function processStreamChunks(opts) {
6149
7153
  for (const event of events) {
6150
7154
  consola.debug("Translated Anthropic event:", JSON.stringify(event));
6151
7155
  processAnthropicEvent(event, acc);
7156
+ if (event.type === "content_block_delta" && event.delta.type === "text_delta") checkRepetition(event.delta.text);
6152
7157
  await stream.writeSSE({
6153
7158
  event: event.type,
6154
7159
  data: JSON.stringify(event)
@@ -6632,6 +7637,7 @@ async function runServer(options) {
6632
7637
  consola.info(`History recording enabled (${limitText} entries)`);
6633
7638
  }
6634
7639
  initTui({ enabled: true });
7640
+ initRequestContextManager(state.staleRequestMaxAge).startReaper();
6635
7641
  await ensurePaths();
6636
7642
  await cacheVSCodeVersion();
6637
7643
  if (options.githubToken) {
@@ -6671,11 +7677,12 @@ async function runServer(options) {
6671
7677
  }
6672
7678
  }
6673
7679
  consola.box(`🌐 Usage Viewer: https://ericc-ch.github.io/copilot-api?endpoint=${serverUrl}/usage${options.history ? `\nšŸ“œ History UI: ${serverUrl}/history` : ""}`);
6674
- serve({
7680
+ setupShutdownHandlers();
7681
+ setServerInstance(serve({
6675
7682
  fetch: server.fetch,
6676
7683
  port: options.port,
6677
7684
  hostname: options.host
6678
- });
7685
+ }));
6679
7686
  }
6680
7687
  const start = defineCommand({
6681
7688
  meta: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@dianshuv/copilot-api",
3
- "version": "0.2.3",
3
+ "version": "0.4.0",
4
4
  "description": "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!",
5
5
  "author": "dianshuv",
6
6
  "type": "module",