@dianshuv/copilot-api 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +5 -0
  2. package/dist/main.mjs +687 -19
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -10,6 +10,11 @@
10
10
 
11
11
  - **Responses API endpoint**: `/v1/responses` passthrough for codex models (e.g., `gpt-5.2-codex`, `gpt-5.3-codex`) used by tools like OpenCode. Includes stream ID synchronization for `@ai-sdk/openai` compatibility.
12
12
  - **SubagentStart marker support**: Detects `__SUBAGENT_MARKER__` injected by Claude Code hooks to override `X-Initiator` header to `"agent"` for subagent requests, ensuring correct credit tier usage. Includes a ready-to-use Claude plugin (`claude-plugin/`).
13
+ - **Token analytics tab**: The `/history` page includes a Tokens tab with per-model token usage summary table and cumulative ECharts line chart for visualizing API consumption over time.
14
+ - **Real-time history updates**: The `/history` UI uses WebSocket for live updates instead of polling, with automatic fallback to polling and exponential backoff reconnection.
15
+ - **Graceful shutdown**: 4-phase shutdown sequence — stops accepting requests, waits for in-flight requests to complete, sends abort signal, then force-closes. Configurable via `--shutdown-graceful-wait` and `--shutdown-abort-wait`.
16
+ - **Stream repetition detection**: Detects when models get stuck in repetitive output loops using KMP-based pattern matching and logs a warning.
17
+ - **Stale request reaping**: Automatically force-fails requests that exceed a configurable maximum age (default 600s) to prevent resource leaks.
13
18
 
14
19
  ## Quick Start
15
20
 
package/dist/main.mjs CHANGED
@@ -50,7 +50,10 @@ const state = {
50
50
  autoTruncate: true,
51
51
  compressToolResults: false,
52
52
  redirectAnthropic: false,
53
- rewriteAnthropicTools: true
53
+ rewriteAnthropicTools: true,
54
+ staleRequestMaxAge: 600,
55
+ shutdownGracefulWait: 60,
56
+ shutdownAbortWait: 120
54
57
  };
55
58
 
56
59
  //#endregion
@@ -1017,11 +1020,11 @@ const patchClaude = defineCommand({
1017
1020
 
1018
1021
  //#endregion
1019
1022
  //#region package.json
1020
- var version = "0.3.0";
1023
+ var version = "0.4.0";
1021
1024
 
1022
1025
  //#endregion
1023
1026
  //#region src/lib/adaptive-rate-limiter.ts
1024
- const DEFAULT_CONFIG = {
1027
+ const DEFAULT_CONFIG$1 = {
1025
1028
  baseRetryIntervalSeconds: 10,
1026
1029
  maxRetryIntervalSeconds: 120,
1027
1030
  requestIntervalSeconds: 10,
@@ -1050,7 +1053,7 @@ var AdaptiveRateLimiter = class {
1050
1053
  recoveryStepIndex = 0;
1051
1054
  constructor(config = {}) {
1052
1055
  this.config = {
1053
- ...DEFAULT_CONFIG,
1056
+ ...DEFAULT_CONFIG$1,
1054
1057
  ...config
1055
1058
  };
1056
1059
  }
@@ -1272,6 +1275,16 @@ var AdaptiveRateLimiter = class {
1272
1275
  return new Promise((resolve) => setTimeout(resolve, ms));
1273
1276
  }
1274
1277
  /**
1278
+ * Reject all currently queued requests during shutdown.
1279
+ * Returns the number of requests that were rejected.
1280
+ */
1281
+ rejectQueued() {
1282
+ const count = this.queue.length;
1283
+ for (const request of this.queue) request.reject(/* @__PURE__ */ new Error("Server is shutting down"));
1284
+ this.queue = [];
1285
+ return count;
1286
+ }
1287
+ /**
1275
1288
  * Get current status for debugging/monitoring
1276
1289
  */
1277
1290
  getStatus() {
@@ -1289,15 +1302,21 @@ let rateLimiterInstance = null;
1289
1302
  */
1290
1303
  function initAdaptiveRateLimiter(config = {}) {
1291
1304
  rateLimiterInstance = new AdaptiveRateLimiter(config);
1292
- const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG.baseRetryIntervalSeconds;
1293
- const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG.maxRetryIntervalSeconds;
1294
- const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG.requestIntervalSeconds;
1295
- const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG.recoveryTimeoutMinutes;
1296
- const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG.consecutiveSuccessesForRecovery;
1297
- const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG.gradualRecoverySteps;
1305
+ const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG$1.baseRetryIntervalSeconds;
1306
+ const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG$1.maxRetryIntervalSeconds;
1307
+ const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG$1.requestIntervalSeconds;
1308
+ const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG$1.recoveryTimeoutMinutes;
1309
+ const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG$1.consecutiveSuccessesForRecovery;
1310
+ const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG$1.gradualRecoverySteps;
1298
1311
  consola.info(`[RateLimiter] Initialized (backoff: ${baseRetry}s-${maxRetry}s, interval: ${interval}s, recovery: ${recovery}min or ${successes} successes, gradual: [${steps.join("s, ")}s])`);
1299
1312
  }
1300
1313
  /**
1314
+ * Get the rate limiter instance
1315
+ */
1316
+ function getAdaptiveRateLimiter() {
1317
+ return rateLimiterInstance;
1318
+ }
1319
+ /**
1301
1320
  * Execute a request with adaptive rate limiting.
1302
1321
  * If rate limiter is not initialized, executes immediately.
1303
1322
  * Returns the result along with queue wait time.
@@ -1310,6 +1329,292 @@ async function executeWithAdaptiveRateLimit(fn) {
1310
1329
  return rateLimiterInstance.execute(fn);
1311
1330
  }
1312
1331
 
1332
+ //#endregion
1333
+ //#region src/lib/context/request.ts
1334
+ let idCounter = 0;
1335
+ function createRequestContext(opts) {
1336
+ const id = `req_${Date.now()}_${++idCounter}`;
1337
+ const startTime = Date.now();
1338
+ const onEvent = opts.onEvent;
1339
+ let _state = "pending";
1340
+ let _originalRequest = null;
1341
+ let _response = null;
1342
+ let settled = false;
1343
+ function emit(event) {
1344
+ try {
1345
+ onEvent(event);
1346
+ } catch {}
1347
+ }
1348
+ const ctx = {
1349
+ id,
1350
+ tuiLogId: opts.tuiLogId,
1351
+ startTime,
1352
+ endpoint: opts.endpoint,
1353
+ get state() {
1354
+ return _state;
1355
+ },
1356
+ get durationMs() {
1357
+ return Date.now() - startTime;
1358
+ },
1359
+ get settled() {
1360
+ return settled;
1361
+ },
1362
+ get originalRequest() {
1363
+ return _originalRequest;
1364
+ },
1365
+ get response() {
1366
+ return _response;
1367
+ },
1368
+ setOriginalRequest(req) {
1369
+ _originalRequest = req;
1370
+ emit({
1371
+ type: "updated",
1372
+ context: ctx,
1373
+ field: "originalRequest"
1374
+ });
1375
+ },
1376
+ transition(newState) {
1377
+ const previousState = _state;
1378
+ _state = newState;
1379
+ emit({
1380
+ type: "state_changed",
1381
+ context: ctx,
1382
+ previousState
1383
+ });
1384
+ },
1385
+ complete(response) {
1386
+ if (settled) return;
1387
+ settled = true;
1388
+ _response = response;
1389
+ _state = "completed";
1390
+ emit({
1391
+ type: "completed",
1392
+ context: ctx,
1393
+ entry: ctx.toHistoryEntry()
1394
+ });
1395
+ },
1396
+ fail(model, error) {
1397
+ if (settled) return;
1398
+ settled = true;
1399
+ _response = {
1400
+ success: false,
1401
+ model,
1402
+ usage: {
1403
+ input_tokens: 0,
1404
+ output_tokens: 0
1405
+ },
1406
+ error: error instanceof Error ? error.message : String(error),
1407
+ content: null
1408
+ };
1409
+ _state = "failed";
1410
+ emit({
1411
+ type: "failed",
1412
+ context: ctx,
1413
+ entry: ctx.toHistoryEntry()
1414
+ });
1415
+ },
1416
+ toHistoryEntry() {
1417
+ const entry = {
1418
+ id,
1419
+ endpoint: opts.endpoint,
1420
+ timestamp: startTime,
1421
+ durationMs: Date.now() - startTime,
1422
+ request: {
1423
+ model: _originalRequest?.model,
1424
+ messages: _originalRequest?.messages,
1425
+ stream: _originalRequest?.stream,
1426
+ tools: _originalRequest?.tools,
1427
+ system: _originalRequest?.system
1428
+ }
1429
+ };
1430
+ if (_response) entry.response = _response;
1431
+ return entry;
1432
+ }
1433
+ };
1434
+ return ctx;
1435
+ }
1436
+
1437
+ //#endregion
1438
+ //#region src/lib/context/manager.ts
1439
+ /**
1440
+ * RequestContextManager — Active request management
1441
+ *
1442
+ * Manages all in-flight RequestContext instances. Publishes events for
1443
+ * WebSocket push and history persistence.
1444
+ */
1445
+ let _manager = null;
1446
+ function initRequestContextManager(staleMaxAgeSec) {
1447
+ _manager = createRequestContextManager(staleMaxAgeSec);
1448
+ return _manager;
1449
+ }
1450
+ const REAPER_INTERVAL_MS = 6e4;
1451
+ const DEFAULT_STALE_MAX_AGE_SEC = 600;
1452
+ function createRequestContextManager(staleMaxAgeSec) {
1453
+ const maxAgeSec = staleMaxAgeSec ?? DEFAULT_STALE_MAX_AGE_SEC;
1454
+ const activeContexts = /* @__PURE__ */ new Map();
1455
+ const listeners = /* @__PURE__ */ new Set();
1456
+ let reaperTimer = null;
1457
+ function runReaperOnce() {
1458
+ if (maxAgeSec <= 0) return;
1459
+ const maxAgeMs = maxAgeSec * 1e3;
1460
+ for (const [id, ctx] of activeContexts) if (ctx.durationMs > maxAgeMs) {
1461
+ consola.warn(`[context] Force-failing stale request ${id} (endpoint: ${ctx.endpoint}, model: ${ctx.originalRequest?.model ?? "unknown"}, state: ${ctx.state}, age: ${Math.round(ctx.durationMs / 1e3)}s, max: ${maxAgeSec}s)`);
1462
+ ctx.fail(ctx.originalRequest?.model ?? "unknown", /* @__PURE__ */ new Error(`Request exceeded maximum age of ${maxAgeSec}s (stale context reaper)`));
1463
+ }
1464
+ }
1465
+ function startReaper() {
1466
+ if (reaperTimer) return;
1467
+ reaperTimer = setInterval(runReaperOnce, REAPER_INTERVAL_MS);
1468
+ }
1469
+ function stopReaper() {
1470
+ if (reaperTimer) {
1471
+ clearInterval(reaperTimer);
1472
+ reaperTimer = null;
1473
+ }
1474
+ }
1475
+ function emit(event) {
1476
+ for (const listener of listeners) try {
1477
+ listener(event);
1478
+ } catch {}
1479
+ }
1480
+ function handleContextEvent(rawEvent) {
1481
+ const { type, context } = rawEvent;
1482
+ switch (type) {
1483
+ case "state_changed":
1484
+ if (rawEvent.previousState) emit({
1485
+ type: "state_changed",
1486
+ context,
1487
+ previousState: rawEvent.previousState
1488
+ });
1489
+ break;
1490
+ case "updated":
1491
+ if (rawEvent.field) emit({
1492
+ type: "updated",
1493
+ context,
1494
+ field: rawEvent.field
1495
+ });
1496
+ break;
1497
+ case "completed":
1498
+ if (rawEvent.entry) emit({
1499
+ type: "completed",
1500
+ context,
1501
+ entry: rawEvent.entry
1502
+ });
1503
+ activeContexts.delete(context.id);
1504
+ break;
1505
+ case "failed":
1506
+ if (rawEvent.entry) emit({
1507
+ type: "failed",
1508
+ context,
1509
+ entry: rawEvent.entry
1510
+ });
1511
+ activeContexts.delete(context.id);
1512
+ break;
1513
+ default: break;
1514
+ }
1515
+ }
1516
+ return {
1517
+ create(opts) {
1518
+ const ctx = createRequestContext({
1519
+ endpoint: opts.endpoint,
1520
+ tuiLogId: opts.tuiLogId,
1521
+ onEvent: handleContextEvent
1522
+ });
1523
+ activeContexts.set(ctx.id, ctx);
1524
+ emit({
1525
+ type: "created",
1526
+ context: ctx
1527
+ });
1528
+ return ctx;
1529
+ },
1530
+ get(id) {
1531
+ return activeContexts.get(id);
1532
+ },
1533
+ getAll() {
1534
+ return Array.from(activeContexts.values());
1535
+ },
1536
+ get activeCount() {
1537
+ return activeContexts.size;
1538
+ },
1539
+ on(_event, listener) {
1540
+ listeners.add(listener);
1541
+ },
1542
+ off(_event, listener) {
1543
+ listeners.delete(listener);
1544
+ },
1545
+ startReaper,
1546
+ stopReaper,
1547
+ _runReaperOnce: runReaperOnce
1548
+ };
1549
+ }
1550
+
1551
+ //#endregion
1552
+ //#region src/lib/history-ws.ts
1553
+ /**
1554
+ * WebSocket support for History API.
1555
+ * Enables real-time updates when new requests are recorded.
1556
+ */
1557
+ const clients = /* @__PURE__ */ new Set();
1558
+ function getClientCount() {
1559
+ return clients.size;
1560
+ }
1561
+ function closeAllClients() {
1562
+ for (const client of clients) try {
1563
+ client.close(1001, "Server shutting down");
1564
+ } catch {}
1565
+ clients.clear();
1566
+ }
1567
+ function broadcast(message) {
1568
+ const data = JSON.stringify(message);
1569
+ for (const client of clients) try {
1570
+ if (client.readyState === WebSocket.OPEN) client.send(data);
1571
+ else clients.delete(client);
1572
+ } catch (error) {
1573
+ consola.debug("WebSocket send failed, removing client:", error);
1574
+ clients.delete(client);
1575
+ }
1576
+ }
1577
+ function notifyEntryAdded(summary) {
1578
+ if (clients.size === 0) return;
1579
+ broadcast({
1580
+ type: "entry_added",
1581
+ data: summary,
1582
+ timestamp: Date.now()
1583
+ });
1584
+ }
1585
+ function notifyEntryUpdated(summary) {
1586
+ if (clients.size === 0) return;
1587
+ broadcast({
1588
+ type: "entry_updated",
1589
+ data: summary,
1590
+ timestamp: Date.now()
1591
+ });
1592
+ }
1593
+ function notifyStatsUpdated(stats) {
1594
+ if (clients.size === 0) return;
1595
+ broadcast({
1596
+ type: "stats_updated",
1597
+ data: stats,
1598
+ timestamp: Date.now()
1599
+ });
1600
+ }
1601
+ function notifyHistoryCleared() {
1602
+ if (clients.size === 0) return;
1603
+ broadcast({
1604
+ type: "history_cleared",
1605
+ data: null,
1606
+ timestamp: Date.now()
1607
+ });
1608
+ }
1609
+ function notifySessionDeleted(sessionId) {
1610
+ if (clients.size === 0) return;
1611
+ broadcast({
1612
+ type: "session_deleted",
1613
+ data: { sessionId },
1614
+ timestamp: Date.now()
1615
+ });
1616
+ }
1617
+
1313
1618
  //#endregion
1314
1619
  //#region src/lib/history.ts
1315
1620
  function generateId$1() {
@@ -1389,6 +1694,13 @@ function recordRequest(endpoint, request) {
1389
1694
  if (historyState.entries.filter((e) => e.sessionId === removed.sessionId).length === 0) historyState.sessions.delete(removed.sessionId);
1390
1695
  }
1391
1696
  }
1697
+ notifyEntryAdded({
1698
+ id: entry.id,
1699
+ endpoint,
1700
+ model: request.model,
1701
+ stream: request.stream,
1702
+ timestamp: entry.timestamp
1703
+ });
1392
1704
  return entry.id;
1393
1705
  }
1394
1706
  function recordResponse(id, response, durationMs) {
@@ -1403,6 +1715,20 @@ function recordResponse(id, response, durationMs) {
1403
1715
  session.totalOutputTokens += response.usage.output_tokens;
1404
1716
  session.lastActivity = Date.now();
1405
1717
  }
1718
+ notifyEntryUpdated({
1719
+ id: entry.id,
1720
+ endpoint: entry.endpoint,
1721
+ model: response.model,
1722
+ success: response.success,
1723
+ durationMs,
1724
+ inputTokens: response.usage.input_tokens,
1725
+ outputTokens: response.usage.output_tokens
1726
+ });
1727
+ notifyStatsUpdated({
1728
+ totalRequests: historyState.entries.length,
1729
+ totalInputTokens: session?.totalInputTokens ?? 0,
1730
+ totalOutputTokens: session?.totalOutputTokens ?? 0
1731
+ });
1406
1732
  }
1407
1733
  }
1408
1734
  function getHistory(options = {}) {
@@ -1477,12 +1803,14 @@ function clearHistory() {
1477
1803
  historyState.entries = [];
1478
1804
  historyState.sessions = /* @__PURE__ */ new Map();
1479
1805
  historyState.currentSessionId = generateId$1();
1806
+ notifyHistoryCleared();
1480
1807
  }
1481
1808
  function deleteSession(sessionId) {
1482
1809
  if (!historyState.sessions.has(sessionId)) return false;
1483
1810
  historyState.entries = historyState.entries.filter((e) => e.sessionId !== sessionId);
1484
1811
  historyState.sessions.delete(sessionId);
1485
1812
  if (historyState.currentSessionId === sessionId) historyState.currentSessionId = generateId$1();
1813
+ notifySessionDeleted(sessionId);
1486
1814
  return true;
1487
1815
  }
1488
1816
  function getStats() {
@@ -1731,6 +2059,142 @@ function generateEnvScript(envVars, commandToRun = "") {
1731
2059
  return commandBlock || commandToRun;
1732
2060
  }
1733
2061
 
2062
+ //#endregion
2063
+ //#region src/lib/shutdown.ts
2064
+ const DRAIN_POLL_INTERVAL_MS = 500;
2065
+ const DRAIN_PROGRESS_INTERVAL_MS = 5e3;
2066
+ let serverInstance = null;
2067
+ let _isShuttingDown = false;
2068
+ let shutdownResolve = null;
2069
+ let shutdownAbortController = null;
2070
+ function getIsShuttingDown() {
2071
+ return _isShuttingDown;
2072
+ }
2073
+ function setServerInstance(server) {
2074
+ serverInstance = server;
2075
+ }
2076
+ function formatActiveRequestsSummary(requests) {
2077
+ const now = Date.now();
2078
+ const lines = requests.map((req) => {
2079
+ const age = Math.round((now - req.startTime) / 1e3);
2080
+ const model = req.model || "unknown";
2081
+ const tags = req.tags?.length ? ` [${req.tags.join(", ")}]` : "";
2082
+ return ` ${req.method} ${req.path} ${model} (${req.status}, ${age}s)${tags}`;
2083
+ });
2084
+ return `Waiting for ${requests.length} active request(s):\n${lines.join("\n")}`;
2085
+ }
2086
+ async function drainActiveRequests(timeoutMs, tracker, opts) {
2087
+ const pollInterval = opts?.pollIntervalMs ?? DRAIN_POLL_INTERVAL_MS;
2088
+ const progressInterval = opts?.progressIntervalMs ?? DRAIN_PROGRESS_INTERVAL_MS;
2089
+ const deadline = Date.now() + timeoutMs;
2090
+ let lastProgressLog = 0;
2091
+ while (Date.now() < deadline) {
2092
+ const active = tracker.getActiveRequests();
2093
+ if (active.length === 0) return "drained";
2094
+ const now = Date.now();
2095
+ if (now - lastProgressLog >= progressInterval) {
2096
+ lastProgressLog = now;
2097
+ consola.info(formatActiveRequestsSummary(active));
2098
+ }
2099
+ await new Promise((resolve) => setTimeout(resolve, pollInterval));
2100
+ }
2101
+ return "timeout";
2102
+ }
2103
+ async function gracefulShutdown(signal, deps) {
2104
+ const tracker = deps?.tracker;
2105
+ const server = deps?.server ?? serverInstance;
2106
+ const rateLimiter = deps?.rateLimiter !== void 0 ? deps.rateLimiter : getAdaptiveRateLimiter();
2107
+ const stopRefresh = deps?.stopTokenRefreshFn ?? (() => {});
2108
+ const closeWsClients = deps?.closeAllClientsFn ?? closeAllClients;
2109
+ const getWsCount = deps?.getClientCountFn ?? getClientCount;
2110
+ const gracefulWaitMs = deps?.gracefulWaitMs ?? state.shutdownGracefulWait * 1e3;
2111
+ const abortWaitMs = deps?.abortWaitMs ?? state.shutdownAbortWait * 1e3;
2112
+ const drainOpts = {
2113
+ pollIntervalMs: deps?.drainPollIntervalMs ?? DRAIN_POLL_INTERVAL_MS,
2114
+ progressIntervalMs: deps?.drainProgressIntervalMs ?? DRAIN_PROGRESS_INTERVAL_MS
2115
+ };
2116
+ _isShuttingDown = true;
2117
+ shutdownAbortController = new AbortController();
2118
+ consola.info(`Received ${signal}, shutting down gracefully...`);
2119
+ try {
2120
+ deps?.contextManager?.stopReaper();
2121
+ } catch {}
2122
+ stopRefresh();
2123
+ const wsClients = getWsCount();
2124
+ if (wsClients > 0) {
2125
+ closeWsClients();
2126
+ consola.info(`Disconnected ${wsClients} WebSocket client(s)`);
2127
+ }
2128
+ if (rateLimiter) {
2129
+ const rejected = rateLimiter.rejectQueued();
2130
+ if (rejected > 0) consola.info(`Rejected ${rejected} queued request(s) from rate limiter`);
2131
+ }
2132
+ if (server) {
2133
+ server.close(false).catch((error) => {
2134
+ consola.error("Error stopping listener:", error);
2135
+ });
2136
+ consola.info("Stopped accepting new connections");
2137
+ }
2138
+ if (tracker) {
2139
+ const activeCount = tracker.getActiveRequests().length;
2140
+ if (activeCount > 0) {
2141
+ consola.info(`Phase 2: Waiting up to ${gracefulWaitMs / 1e3}s for ${activeCount} active request(s)...`);
2142
+ try {
2143
+ if (await drainActiveRequests(gracefulWaitMs, tracker, drainOpts) === "drained") {
2144
+ consola.info("All requests completed naturally");
2145
+ finalize(tracker);
2146
+ return;
2147
+ }
2148
+ } catch (error) {
2149
+ consola.error("Error during Phase 2 drain:", error);
2150
+ }
2151
+ const remaining = tracker.getActiveRequests().length;
2152
+ consola.info(`Phase 3: Sending abort signal to ${remaining} remaining request(s), waiting up to ${abortWaitMs / 1e3}s...`);
2153
+ shutdownAbortController.abort();
2154
+ try {
2155
+ if (await drainActiveRequests(abortWaitMs, tracker, drainOpts) === "drained") {
2156
+ consola.info("All requests completed after abort signal");
2157
+ finalize(tracker);
2158
+ return;
2159
+ }
2160
+ } catch (error) {
2161
+ consola.error("Error during Phase 3 drain:", error);
2162
+ }
2163
+ const forceRemaining = tracker.getActiveRequests().length;
2164
+ consola.warn(`Phase 4: Force-closing ${forceRemaining} remaining request(s)`);
2165
+ if (server) try {
2166
+ await server.close(true);
2167
+ } catch (error) {
2168
+ consola.error("Error force-closing server:", error);
2169
+ }
2170
+ }
2171
+ finalize(tracker);
2172
+ } else {
2173
+ consola.info("Shutdown complete");
2174
+ shutdownResolve?.();
2175
+ }
2176
+ }
2177
+ function finalize(tracker) {
2178
+ tracker.destroy();
2179
+ consola.info("Shutdown complete");
2180
+ shutdownResolve?.();
2181
+ }
2182
+ function setupShutdownHandlers() {
2183
+ const handler = (signal) => {
2184
+ if (_isShuttingDown) {
2185
+ consola.warn("Second signal received, forcing immediate exit");
2186
+ process.exit(1);
2187
+ }
2188
+ gracefulShutdown(signal).catch((error) => {
2189
+ consola.error("Fatal error during shutdown:", error);
2190
+ shutdownResolve?.();
2191
+ process.exit(1);
2192
+ });
2193
+ };
2194
+ process.on("SIGINT", () => handler("SIGINT"));
2195
+ process.on("SIGTERM", () => handler("SIGTERM"));
2196
+ }
2197
+
1734
2198
  //#endregion
1735
2199
  //#region src/lib/tui/console-renderer.ts
1736
2200
  const CLEAR_LINE = "\x1B[2K\r";
@@ -2091,6 +2555,7 @@ const requestTracker = new RequestTracker();
2091
2555
  */
2092
2556
  function tuiLogger() {
2093
2557
  return async (c, next) => {
2558
+ if (getIsShuttingDown()) return c.json({ error: "Server is shutting down" }, 503);
2094
2559
  const method = c.req.method;
2095
2560
  const path = c.req.path;
2096
2561
  const isHistoryAccess = path.startsWith("/history");
@@ -2814,6 +3279,127 @@ function createTruncationResponseMarkerOpenAI(result) {
2814
3279
  return `\n\n---\n[Auto-truncated: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
2815
3280
  }
2816
3281
 
3282
+ //#endregion
3283
+ //#region src/lib/repetition-detector.ts
3284
+ /**
3285
+ * Stream repetition detector.
3286
+ *
3287
+ * Uses the KMP failure function (prefix function) to detect repeated patterns
3288
+ * in streaming text output. When a model gets stuck in a repetitive loop,
3289
+ * it wastes tokens producing the same content over and over. This detector
3290
+ * identifies such loops early so the caller can take action (log warning,
3291
+ * abort stream, etc.).
3292
+ *
3293
+ * The algorithm works by maintaining a sliding buffer of recent text and
3294
+ * computing the longest proper prefix that is also a suffix — if this
3295
+ * length exceeds `(text.length - period) >= minRepetitions * period`,
3296
+ * it means a pattern of length `period` has repeated enough times.
3297
+ */
3298
+ const DEFAULT_CONFIG = {
3299
+ minPatternLength: 10,
3300
+ minRepetitions: 3,
3301
+ maxBufferSize: 5e3
3302
+ };
3303
+ var RepetitionDetector = class {
3304
+ buffer = "";
3305
+ config;
3306
+ detected = false;
3307
+ constructor(config) {
3308
+ this.config = {
3309
+ ...DEFAULT_CONFIG,
3310
+ ...config
3311
+ };
3312
+ }
3313
+ /**
3314
+ * Feed a text chunk into the detector.
3315
+ * Returns `true` if repetition has been detected (now or previously).
3316
+ * Once detected, subsequent calls return `true` without further analysis.
3317
+ */
3318
+ feed(text) {
3319
+ if (this.detected) return true;
3320
+ if (!text) return false;
3321
+ this.buffer += text;
3322
+ if (this.buffer.length > this.config.maxBufferSize) this.buffer = this.buffer.slice(-this.config.maxBufferSize);
3323
+ const minRequired = this.config.minPatternLength * this.config.minRepetitions;
3324
+ if (this.buffer.length < minRequired) return false;
3325
+ this.detected = detectRepetition(this.buffer, this.config.minPatternLength, this.config.minRepetitions);
3326
+ return this.detected;
3327
+ }
3328
+ /** Reset detector state for a new stream */
3329
+ reset() {
3330
+ this.buffer = "";
3331
+ this.detected = false;
3332
+ }
3333
+ /** Whether repetition has been detected */
3334
+ get isDetected() {
3335
+ return this.detected;
3336
+ }
3337
+ };
3338
+ /**
3339
+ * Detect if the tail of `text` contains a repeating pattern.
3340
+ *
3341
+ * Uses the KMP prefix function: for a string S, the prefix function π[i]
3342
+ * gives the length of the longest proper prefix of S[0..i] that is also
3343
+ * a suffix. If Ļ€[n-1] ≄ (n - period) where period = n - Ļ€[n-1], then
3344
+ * the string is composed of a repeating unit of length `period`.
3345
+ *
3346
+ * We check the suffix of the buffer (last `checkLength` chars) to detect
3347
+ * if a pattern of at least `minPatternLength` chars repeats at least
3348
+ * `minRepetitions` times.
3349
+ */
3350
+ function detectRepetition(text, minPatternLength, minRepetitions) {
3351
+ const minWindow = minPatternLength * minRepetitions;
3352
+ const maxWindow = Math.min(text.length, 2e3);
3353
+ const windowSizes = [
3354
+ minWindow,
3355
+ Math.floor(maxWindow * .5),
3356
+ maxWindow
3357
+ ].filter((w) => w >= minWindow && w <= text.length);
3358
+ for (const windowSize of windowSizes) {
3359
+ const window = text.slice(-windowSize);
3360
+ const period = findRepeatingPeriod(window);
3361
+ if (period >= minPatternLength) {
3362
+ if (Math.floor(window.length / period) >= minRepetitions) return true;
3363
+ }
3364
+ }
3365
+ return false;
3366
+ }
3367
+ /**
3368
+ * Find the shortest repeating period in a string using KMP prefix function.
3369
+ * Returns the period length, or the string length if no repetition found.
3370
+ */
3371
+ function findRepeatingPeriod(s) {
3372
+ const n = s.length;
3373
+ if (n === 0) return 0;
3374
+ const pi = new Int32Array(n);
3375
+ for (let i = 1; i < n; i++) {
3376
+ let j = pi[i - 1] ?? 0;
3377
+ while (j > 0 && s[i] !== s[j]) j = pi[j - 1] ?? 0;
3378
+ if (s[i] === s[j]) j++;
3379
+ pi[i] = j;
3380
+ }
3381
+ const period = n - pi[n - 1];
3382
+ if (period < n && n % period === 0) return period;
3383
+ if (period < n && pi[n - 1] >= period) return period;
3384
+ return n;
3385
+ }
3386
+ /**
3387
+ * Create a repetition detector callback for use in stream processing.
3388
+ * Returns a function that accepts text deltas and logs a warning on first detection.
3389
+ */
3390
+ function createStreamRepetitionChecker(label, config) {
3391
+ const detector = new RepetitionDetector(config);
3392
+ let warned = false;
3393
+ return (textDelta) => {
3394
+ const isRepetitive = detector.feed(textDelta);
3395
+ if (isRepetitive && !warned) {
3396
+ warned = true;
3397
+ consola.warn(`[RepetitionDetector] ${label}: Repetitive output detected in stream`);
3398
+ }
3399
+ return isRepetitive;
3400
+ };
3401
+ }
3402
+
2817
3403
  //#endregion
2818
3404
  //#region src/services/copilot/create-chat-completions.ts
2819
3405
  const createChatCompletions = async (payload, options) => {
@@ -3150,6 +3736,7 @@ function createStreamAccumulator() {
3150
3736
  async function handleStreamingResponse$1(opts) {
3151
3737
  const { stream, response, payload, ctx } = opts;
3152
3738
  const acc = createStreamAccumulator();
3739
+ const checkRepetition = createStreamRepetitionChecker(`openai:${payload.model}`);
3153
3740
  try {
3154
3741
  if (state.verbose && ctx.truncateResult?.wasCompacted) {
3155
3742
  const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
@@ -3173,7 +3760,7 @@ async function handleStreamingResponse$1(opts) {
3173
3760
  }
3174
3761
  for await (const chunk of response) {
3175
3762
  consola.debug("Streaming chunk:", JSON.stringify(chunk));
3176
- parseStreamChunk(chunk, acc);
3763
+ parseStreamChunk(chunk, acc, checkRepetition);
3177
3764
  await stream.writeSSE(chunk);
3178
3765
  }
3179
3766
  recordStreamSuccess(acc, payload.model, ctx);
@@ -3189,7 +3776,7 @@ async function handleStreamingResponse$1(opts) {
3189
3776
  throw error;
3190
3777
  }
3191
3778
  }
3192
- function parseStreamChunk(chunk, acc) {
3779
+ function parseStreamChunk(chunk, acc, checkRepetition) {
3193
3780
  if (!chunk.data || chunk.data === "[DONE]") return;
3194
3781
  try {
3195
3782
  const parsed = JSON.parse(chunk.data);
@@ -3200,7 +3787,10 @@ function parseStreamChunk(chunk, acc) {
3200
3787
  }
3201
3788
  const choice = parsed.choices[0];
3202
3789
  if (choice) {
3203
- if (choice.delta.content) acc.content += choice.delta.content;
3790
+ if (choice.delta.content) {
3791
+ acc.content += choice.delta.content;
3792
+ checkRepetition(choice.delta.content);
3793
+ }
3204
3794
  if (choice.delta.tool_calls) for (const tc of choice.delta.tool_calls) {
3205
3795
  const idx = tc.index;
3206
3796
  if (!acc.toolCallMap.has(idx)) acc.toolCallMap.set(idx, {
@@ -3939,12 +4529,78 @@ document.addEventListener('keydown', (e) => {
3939
4529
  }
3940
4530
  });
3941
4531
 
3942
- // Auto-refresh every 10 seconds
3943
- setInterval(() => {
4532
+ // Auto-refresh every 10 seconds (fallback when WebSocket is not available)
4533
+ let autoRefreshTimer = setInterval(() => {
3944
4534
  loadStats();
3945
4535
  loadSessions();
3946
4536
  }, 10000);
3947
4537
 
4538
+ // WebSocket real-time updates
4539
+ let reconnectAttempts = 0;
4540
+ function connectWebSocket() {
4541
+ const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
4542
+ const wsUrl = protocol + '//' + window.location.host + '/history/ws';
4543
+
4544
+ try {
4545
+ const ws = new WebSocket(wsUrl);
4546
+
4547
+ ws.onopen = function() {
4548
+ console.log('[History] WebSocket connected');
4549
+ reconnectAttempts = 0;
4550
+ // Disable polling when WS is active
4551
+ clearInterval(autoRefreshTimer);
4552
+ };
4553
+
4554
+ ws.onmessage = function(event) {
4555
+ try {
4556
+ const msg = JSON.parse(event.data);
4557
+ switch (msg.type) {
4558
+ case 'entry_added':
4559
+ case 'entry_updated':
4560
+ loadSessions();
4561
+ loadStats();
4562
+ break;
4563
+ case 'stats_updated':
4564
+ loadStats();
4565
+ break;
4566
+ case 'history_cleared':
4567
+ case 'session_deleted':
4568
+ loadSessions();
4569
+ loadStats();
4570
+ break;
4571
+ }
4572
+ } catch (e) {
4573
+ console.warn('[History] Failed to parse WS message:', e);
4574
+ }
4575
+ };
4576
+
4577
+ ws.onclose = function() {
4578
+ console.log('[History] WebSocket disconnected, falling back to polling');
4579
+ // Re-enable polling as fallback (clear first to avoid duplicates)
4580
+ clearInterval(autoRefreshTimer);
4581
+ autoRefreshTimer = setInterval(() => {
4582
+ loadStats();
4583
+ loadSessions();
4584
+ }, 10000);
4585
+ // Reconnect with exponential backoff, max 10 attempts
4586
+ if (reconnectAttempts < 10) {
4587
+ const delay = Math.min(5000 * Math.pow(2, reconnectAttempts), 60000);
4588
+ reconnectAttempts++;
4589
+ setTimeout(connectWebSocket, delay);
4590
+ }
4591
+ };
4592
+
4593
+ ws.onerror = function() {
4594
+ // Will trigger onclose
4595
+ };
4596
+ } catch (e) {
4597
+ console.warn('[History] WebSocket not available:', e);
4598
+ }
4599
+ }
4600
+
4601
+ // Start WebSocket connection
4602
+ connectWebSocket();
4603
+
3948
4604
  // Tab switching
3949
4605
  function switchTab(tab) {
3950
4606
  document.querySelectorAll('.tab-item').forEach(t => t.classList.remove('active'));
@@ -4741,6 +5397,11 @@ historyRoutes.get("/api/export", handleExport);
4741
5397
  historyRoutes.get("/api/sessions", handleGetSessions);
4742
5398
  historyRoutes.get("/api/sessions/:id", handleGetSession);
4743
5399
  historyRoutes.delete("/api/sessions/:id", handleDeleteSession);
5400
+ historyRoutes.get("/ws", (c) => {
5401
+ if (c.req.header("Upgrade") !== "websocket") return c.text("Expected WebSocket upgrade", 426);
5402
+ if (c.env?.server?.upgrade(c.req.raw)) return new Response(null, { status: 101 });
5403
+ return c.text("WebSocket upgrade failed", 500);
5404
+ });
4744
5405
  historyRoutes.get("/", (c) => {
4745
5406
  return c.html(getHistoryUI());
4746
5407
  });
@@ -6192,6 +6853,7 @@ function prependMarkerToAnthropicResponse$1(response, marker) {
6192
6853
  async function handleDirectAnthropicStreamingResponse(opts) {
6193
6854
  const { stream, response, anthropicPayload, ctx } = opts;
6194
6855
  const acc = createAnthropicStreamAccumulator();
6856
+ const checkRepetition = createStreamRepetitionChecker(`anthropic:${anthropicPayload.model}`);
6195
6857
  try {
6196
6858
  for await (const rawEvent of response) {
6197
6859
  consola.debug("Direct Anthropic raw stream event:", JSON.stringify(rawEvent));
@@ -6205,6 +6867,7 @@ async function handleDirectAnthropicStreamingResponse(opts) {
6205
6867
  continue;
6206
6868
  }
6207
6869
  processAnthropicEvent(event, acc);
6870
+ if (event.type === "content_block_delta" && event.delta.type === "text_delta") checkRepetition(event.delta.text);
6208
6871
  await stream.writeSSE({
6209
6872
  event: rawEvent.event || event.type,
6210
6873
  data: rawEvent.data
@@ -6404,6 +7067,7 @@ async function handleStreamingResponse(opts) {
6404
7067
  toolCalls: {}
6405
7068
  };
6406
7069
  const acc = createAnthropicStreamAccumulator();
7070
+ const checkRepetition = createStreamRepetitionChecker(`translated:${anthropicPayload.model}`);
6407
7071
  try {
6408
7072
  if (ctx.truncateResult?.wasCompacted) {
6409
7073
  const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
@@ -6415,7 +7079,8 @@ async function handleStreamingResponse(opts) {
6415
7079
  response,
6416
7080
  toolNameMapping,
6417
7081
  streamState,
6418
- acc
7082
+ acc,
7083
+ checkRepetition
6419
7084
  });
6420
7085
  recordStreamingResponse(acc, anthropicPayload.model, ctx);
6421
7086
  completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
@@ -6471,7 +7136,7 @@ async function sendTruncationMarkerEvent(stream, streamState, marker) {
6471
7136
  streamState.contentBlockIndex++;
6472
7137
  }
6473
7138
  async function processStreamChunks(opts) {
6474
- const { stream, response, toolNameMapping, streamState, acc } = opts;
7139
+ const { stream, response, toolNameMapping, streamState, acc, checkRepetition } = opts;
6475
7140
  for await (const rawEvent of response) {
6476
7141
  consola.debug("Copilot raw stream event:", JSON.stringify(rawEvent));
6477
7142
  if (rawEvent.data === "[DONE]") break;
@@ -6488,6 +7153,7 @@ async function processStreamChunks(opts) {
6488
7153
  for (const event of events) {
6489
7154
  consola.debug("Translated Anthropic event:", JSON.stringify(event));
6490
7155
  processAnthropicEvent(event, acc);
7156
+ if (event.type === "content_block_delta" && event.delta.type === "text_delta") checkRepetition(event.delta.text);
6491
7157
  await stream.writeSSE({
6492
7158
  event: event.type,
6493
7159
  data: JSON.stringify(event)
@@ -6971,6 +7637,7 @@ async function runServer(options) {
6971
7637
  consola.info(`History recording enabled (${limitText} entries)`);
6972
7638
  }
6973
7639
  initTui({ enabled: true });
7640
+ initRequestContextManager(state.staleRequestMaxAge).startReaper();
6974
7641
  await ensurePaths();
6975
7642
  await cacheVSCodeVersion();
6976
7643
  if (options.githubToken) {
@@ -7010,11 +7677,12 @@ async function runServer(options) {
7010
7677
  }
7011
7678
  }
7012
7679
  consola.box(`🌐 Usage Viewer: https://ericc-ch.github.io/copilot-api?endpoint=${serverUrl}/usage${options.history ? `\nšŸ“œ History UI: ${serverUrl}/history` : ""}`);
7013
- serve({
7680
+ setupShutdownHandlers();
7681
+ setServerInstance(serve({
7014
7682
  fetch: server.fetch,
7015
7683
  port: options.port,
7016
7684
  hostname: options.host
7017
- });
7685
+ }));
7018
7686
  }
7019
7687
  const start = defineCommand({
7020
7688
  meta: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@dianshuv/copilot-api",
3
- "version": "0.3.0",
3
+ "version": "0.4.0",
4
4
  "description": "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!",
5
5
  "author": "dianshuv",
6
6
  "type": "module",