@dianshuv/copilot-api 0.2.3 ā 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -1
- package/dist/main.mjs +1027 -20
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -10,6 +10,11 @@
|
|
|
10
10
|
|
|
11
11
|
- **Responses API endpoint**: `/v1/responses` passthrough for codex models (e.g., `gpt-5.2-codex`, `gpt-5.3-codex`) used by tools like OpenCode. Includes stream ID synchronization for `@ai-sdk/openai` compatibility.
|
|
12
12
|
- **SubagentStart marker support**: Detects `__SUBAGENT_MARKER__` injected by Claude Code hooks to override `X-Initiator` header to `"agent"` for subagent requests, ensuring correct credit tier usage. Includes a ready-to-use Claude plugin (`claude-plugin/`).
|
|
13
|
+
- **Token analytics tab**: The `/history` page includes a Tokens tab with per-model token usage summary table and cumulative ECharts line chart for visualizing API consumption over time.
|
|
14
|
+
- **Real-time history updates**: The `/history` UI uses WebSocket for live updates instead of polling, with automatic fallback to polling and exponential backoff reconnection.
|
|
15
|
+
- **Graceful shutdown**: 4-phase shutdown sequence ā stops accepting requests, waits for in-flight requests to complete, sends abort signal, then force-closes. Configurable via `--shutdown-graceful-wait` and `--shutdown-abort-wait`.
|
|
16
|
+
- **Stream repetition detection**: Detects when models get stuck in repetitive output loops using KMP-based pattern matching and logs a warning.
|
|
17
|
+
- **Stale request reaping**: Automatically force-fails requests that exceed a configurable maximum age (default 600s) to prevent resource leaks.
|
|
13
18
|
|
|
14
19
|
## Quick Start
|
|
15
20
|
|
|
@@ -97,7 +102,7 @@ copilot-api start
|
|
|
97
102
|
| `/usage` | GET | Copilot usage stats |
|
|
98
103
|
| `/token` | GET | Current Copilot token |
|
|
99
104
|
| `/health` | GET | Health check |
|
|
100
|
-
| `/history` | GET | Request history Web UI (enabled by default) |
|
|
105
|
+
| `/history` | GET | Request history Web UI with token analytics (enabled by default) |
|
|
101
106
|
| `/history/api/*` | GET/DELETE | History API endpoints |
|
|
102
107
|
|
|
103
108
|
## Using with Claude Code
|
package/dist/main.mjs
CHANGED
|
@@ -50,7 +50,10 @@ const state = {
|
|
|
50
50
|
autoTruncate: true,
|
|
51
51
|
compressToolResults: false,
|
|
52
52
|
redirectAnthropic: false,
|
|
53
|
-
rewriteAnthropicTools: true
|
|
53
|
+
rewriteAnthropicTools: true,
|
|
54
|
+
staleRequestMaxAge: 600,
|
|
55
|
+
shutdownGracefulWait: 60,
|
|
56
|
+
shutdownAbortWait: 120
|
|
54
57
|
};
|
|
55
58
|
|
|
56
59
|
//#endregion
|
|
@@ -1017,11 +1020,11 @@ const patchClaude = defineCommand({
|
|
|
1017
1020
|
|
|
1018
1021
|
//#endregion
|
|
1019
1022
|
//#region package.json
|
|
1020
|
-
var version = "0.
|
|
1023
|
+
var version = "0.4.0";
|
|
1021
1024
|
|
|
1022
1025
|
//#endregion
|
|
1023
1026
|
//#region src/lib/adaptive-rate-limiter.ts
|
|
1024
|
-
const DEFAULT_CONFIG = {
|
|
1027
|
+
const DEFAULT_CONFIG$1 = {
|
|
1025
1028
|
baseRetryIntervalSeconds: 10,
|
|
1026
1029
|
maxRetryIntervalSeconds: 120,
|
|
1027
1030
|
requestIntervalSeconds: 10,
|
|
@@ -1050,7 +1053,7 @@ var AdaptiveRateLimiter = class {
|
|
|
1050
1053
|
recoveryStepIndex = 0;
|
|
1051
1054
|
constructor(config = {}) {
|
|
1052
1055
|
this.config = {
|
|
1053
|
-
...DEFAULT_CONFIG,
|
|
1056
|
+
...DEFAULT_CONFIG$1,
|
|
1054
1057
|
...config
|
|
1055
1058
|
};
|
|
1056
1059
|
}
|
|
@@ -1272,6 +1275,16 @@ var AdaptiveRateLimiter = class {
|
|
|
1272
1275
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
1273
1276
|
}
|
|
1274
1277
|
/**
|
|
1278
|
+
* Reject all currently queued requests during shutdown.
|
|
1279
|
+
* Returns the number of requests that were rejected.
|
|
1280
|
+
*/
|
|
1281
|
+
rejectQueued() {
|
|
1282
|
+
const count = this.queue.length;
|
|
1283
|
+
for (const request of this.queue) request.reject(/* @__PURE__ */ new Error("Server is shutting down"));
|
|
1284
|
+
this.queue = [];
|
|
1285
|
+
return count;
|
|
1286
|
+
}
|
|
1287
|
+
/**
|
|
1275
1288
|
* Get current status for debugging/monitoring
|
|
1276
1289
|
*/
|
|
1277
1290
|
getStatus() {
|
|
@@ -1289,15 +1302,21 @@ let rateLimiterInstance = null;
|
|
|
1289
1302
|
*/
|
|
1290
1303
|
function initAdaptiveRateLimiter(config = {}) {
|
|
1291
1304
|
rateLimiterInstance = new AdaptiveRateLimiter(config);
|
|
1292
|
-
const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG.baseRetryIntervalSeconds;
|
|
1293
|
-
const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG.maxRetryIntervalSeconds;
|
|
1294
|
-
const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG.requestIntervalSeconds;
|
|
1295
|
-
const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG.recoveryTimeoutMinutes;
|
|
1296
|
-
const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG.consecutiveSuccessesForRecovery;
|
|
1297
|
-
const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG.gradualRecoverySteps;
|
|
1305
|
+
const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG$1.baseRetryIntervalSeconds;
|
|
1306
|
+
const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG$1.maxRetryIntervalSeconds;
|
|
1307
|
+
const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG$1.requestIntervalSeconds;
|
|
1308
|
+
const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG$1.recoveryTimeoutMinutes;
|
|
1309
|
+
const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG$1.consecutiveSuccessesForRecovery;
|
|
1310
|
+
const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG$1.gradualRecoverySteps;
|
|
1298
1311
|
consola.info(`[RateLimiter] Initialized (backoff: ${baseRetry}s-${maxRetry}s, interval: ${interval}s, recovery: ${recovery}min or ${successes} successes, gradual: [${steps.join("s, ")}s])`);
|
|
1299
1312
|
}
|
|
1300
1313
|
/**
|
|
1314
|
+
* Get the rate limiter instance
|
|
1315
|
+
*/
|
|
1316
|
+
function getAdaptiveRateLimiter() {
|
|
1317
|
+
return rateLimiterInstance;
|
|
1318
|
+
}
|
|
1319
|
+
/**
|
|
1301
1320
|
* Execute a request with adaptive rate limiting.
|
|
1302
1321
|
* If rate limiter is not initialized, executes immediately.
|
|
1303
1322
|
* Returns the result along with queue wait time.
|
|
@@ -1310,6 +1329,292 @@ async function executeWithAdaptiveRateLimit(fn) {
|
|
|
1310
1329
|
return rateLimiterInstance.execute(fn);
|
|
1311
1330
|
}
|
|
1312
1331
|
|
|
1332
|
+
//#endregion
|
|
1333
|
+
//#region src/lib/context/request.ts
|
|
1334
|
+
let idCounter = 0;
|
|
1335
|
+
function createRequestContext(opts) {
|
|
1336
|
+
const id = `req_${Date.now()}_${++idCounter}`;
|
|
1337
|
+
const startTime = Date.now();
|
|
1338
|
+
const onEvent = opts.onEvent;
|
|
1339
|
+
let _state = "pending";
|
|
1340
|
+
let _originalRequest = null;
|
|
1341
|
+
let _response = null;
|
|
1342
|
+
let settled = false;
|
|
1343
|
+
function emit(event) {
|
|
1344
|
+
try {
|
|
1345
|
+
onEvent(event);
|
|
1346
|
+
} catch {}
|
|
1347
|
+
}
|
|
1348
|
+
const ctx = {
|
|
1349
|
+
id,
|
|
1350
|
+
tuiLogId: opts.tuiLogId,
|
|
1351
|
+
startTime,
|
|
1352
|
+
endpoint: opts.endpoint,
|
|
1353
|
+
get state() {
|
|
1354
|
+
return _state;
|
|
1355
|
+
},
|
|
1356
|
+
get durationMs() {
|
|
1357
|
+
return Date.now() - startTime;
|
|
1358
|
+
},
|
|
1359
|
+
get settled() {
|
|
1360
|
+
return settled;
|
|
1361
|
+
},
|
|
1362
|
+
get originalRequest() {
|
|
1363
|
+
return _originalRequest;
|
|
1364
|
+
},
|
|
1365
|
+
get response() {
|
|
1366
|
+
return _response;
|
|
1367
|
+
},
|
|
1368
|
+
setOriginalRequest(req) {
|
|
1369
|
+
_originalRequest = req;
|
|
1370
|
+
emit({
|
|
1371
|
+
type: "updated",
|
|
1372
|
+
context: ctx,
|
|
1373
|
+
field: "originalRequest"
|
|
1374
|
+
});
|
|
1375
|
+
},
|
|
1376
|
+
transition(newState) {
|
|
1377
|
+
const previousState = _state;
|
|
1378
|
+
_state = newState;
|
|
1379
|
+
emit({
|
|
1380
|
+
type: "state_changed",
|
|
1381
|
+
context: ctx,
|
|
1382
|
+
previousState
|
|
1383
|
+
});
|
|
1384
|
+
},
|
|
1385
|
+
complete(response) {
|
|
1386
|
+
if (settled) return;
|
|
1387
|
+
settled = true;
|
|
1388
|
+
_response = response;
|
|
1389
|
+
_state = "completed";
|
|
1390
|
+
emit({
|
|
1391
|
+
type: "completed",
|
|
1392
|
+
context: ctx,
|
|
1393
|
+
entry: ctx.toHistoryEntry()
|
|
1394
|
+
});
|
|
1395
|
+
},
|
|
1396
|
+
fail(model, error) {
|
|
1397
|
+
if (settled) return;
|
|
1398
|
+
settled = true;
|
|
1399
|
+
_response = {
|
|
1400
|
+
success: false,
|
|
1401
|
+
model,
|
|
1402
|
+
usage: {
|
|
1403
|
+
input_tokens: 0,
|
|
1404
|
+
output_tokens: 0
|
|
1405
|
+
},
|
|
1406
|
+
error: error instanceof Error ? error.message : String(error),
|
|
1407
|
+
content: null
|
|
1408
|
+
};
|
|
1409
|
+
_state = "failed";
|
|
1410
|
+
emit({
|
|
1411
|
+
type: "failed",
|
|
1412
|
+
context: ctx,
|
|
1413
|
+
entry: ctx.toHistoryEntry()
|
|
1414
|
+
});
|
|
1415
|
+
},
|
|
1416
|
+
toHistoryEntry() {
|
|
1417
|
+
const entry = {
|
|
1418
|
+
id,
|
|
1419
|
+
endpoint: opts.endpoint,
|
|
1420
|
+
timestamp: startTime,
|
|
1421
|
+
durationMs: Date.now() - startTime,
|
|
1422
|
+
request: {
|
|
1423
|
+
model: _originalRequest?.model,
|
|
1424
|
+
messages: _originalRequest?.messages,
|
|
1425
|
+
stream: _originalRequest?.stream,
|
|
1426
|
+
tools: _originalRequest?.tools,
|
|
1427
|
+
system: _originalRequest?.system
|
|
1428
|
+
}
|
|
1429
|
+
};
|
|
1430
|
+
if (_response) entry.response = _response;
|
|
1431
|
+
return entry;
|
|
1432
|
+
}
|
|
1433
|
+
};
|
|
1434
|
+
return ctx;
|
|
1435
|
+
}
|
|
1436
|
+
|
|
1437
|
+
//#endregion
|
|
1438
|
+
//#region src/lib/context/manager.ts
|
|
1439
|
+
/**
|
|
1440
|
+
* RequestContextManager ā Active request management
|
|
1441
|
+
*
|
|
1442
|
+
* Manages all in-flight RequestContext instances. Publishes events for
|
|
1443
|
+
* WebSocket push and history persistence.
|
|
1444
|
+
*/
|
|
1445
|
+
let _manager = null;
|
|
1446
|
+
function initRequestContextManager(staleMaxAgeSec) {
|
|
1447
|
+
_manager = createRequestContextManager(staleMaxAgeSec);
|
|
1448
|
+
return _manager;
|
|
1449
|
+
}
|
|
1450
|
+
const REAPER_INTERVAL_MS = 6e4;
|
|
1451
|
+
const DEFAULT_STALE_MAX_AGE_SEC = 600;
|
|
1452
|
+
function createRequestContextManager(staleMaxAgeSec) {
|
|
1453
|
+
const maxAgeSec = staleMaxAgeSec ?? DEFAULT_STALE_MAX_AGE_SEC;
|
|
1454
|
+
const activeContexts = /* @__PURE__ */ new Map();
|
|
1455
|
+
const listeners = /* @__PURE__ */ new Set();
|
|
1456
|
+
let reaperTimer = null;
|
|
1457
|
+
function runReaperOnce() {
|
|
1458
|
+
if (maxAgeSec <= 0) return;
|
|
1459
|
+
const maxAgeMs = maxAgeSec * 1e3;
|
|
1460
|
+
for (const [id, ctx] of activeContexts) if (ctx.durationMs > maxAgeMs) {
|
|
1461
|
+
consola.warn(`[context] Force-failing stale request ${id} (endpoint: ${ctx.endpoint}, model: ${ctx.originalRequest?.model ?? "unknown"}, state: ${ctx.state}, age: ${Math.round(ctx.durationMs / 1e3)}s, max: ${maxAgeSec}s)`);
|
|
1462
|
+
ctx.fail(ctx.originalRequest?.model ?? "unknown", /* @__PURE__ */ new Error(`Request exceeded maximum age of ${maxAgeSec}s (stale context reaper)`));
|
|
1463
|
+
}
|
|
1464
|
+
}
|
|
1465
|
+
function startReaper() {
|
|
1466
|
+
if (reaperTimer) return;
|
|
1467
|
+
reaperTimer = setInterval(runReaperOnce, REAPER_INTERVAL_MS);
|
|
1468
|
+
}
|
|
1469
|
+
function stopReaper() {
|
|
1470
|
+
if (reaperTimer) {
|
|
1471
|
+
clearInterval(reaperTimer);
|
|
1472
|
+
reaperTimer = null;
|
|
1473
|
+
}
|
|
1474
|
+
}
|
|
1475
|
+
function emit(event) {
|
|
1476
|
+
for (const listener of listeners) try {
|
|
1477
|
+
listener(event);
|
|
1478
|
+
} catch {}
|
|
1479
|
+
}
|
|
1480
|
+
function handleContextEvent(rawEvent) {
|
|
1481
|
+
const { type, context } = rawEvent;
|
|
1482
|
+
switch (type) {
|
|
1483
|
+
case "state_changed":
|
|
1484
|
+
if (rawEvent.previousState) emit({
|
|
1485
|
+
type: "state_changed",
|
|
1486
|
+
context,
|
|
1487
|
+
previousState: rawEvent.previousState
|
|
1488
|
+
});
|
|
1489
|
+
break;
|
|
1490
|
+
case "updated":
|
|
1491
|
+
if (rawEvent.field) emit({
|
|
1492
|
+
type: "updated",
|
|
1493
|
+
context,
|
|
1494
|
+
field: rawEvent.field
|
|
1495
|
+
});
|
|
1496
|
+
break;
|
|
1497
|
+
case "completed":
|
|
1498
|
+
if (rawEvent.entry) emit({
|
|
1499
|
+
type: "completed",
|
|
1500
|
+
context,
|
|
1501
|
+
entry: rawEvent.entry
|
|
1502
|
+
});
|
|
1503
|
+
activeContexts.delete(context.id);
|
|
1504
|
+
break;
|
|
1505
|
+
case "failed":
|
|
1506
|
+
if (rawEvent.entry) emit({
|
|
1507
|
+
type: "failed",
|
|
1508
|
+
context,
|
|
1509
|
+
entry: rawEvent.entry
|
|
1510
|
+
});
|
|
1511
|
+
activeContexts.delete(context.id);
|
|
1512
|
+
break;
|
|
1513
|
+
default: break;
|
|
1514
|
+
}
|
|
1515
|
+
}
|
|
1516
|
+
return {
|
|
1517
|
+
create(opts) {
|
|
1518
|
+
const ctx = createRequestContext({
|
|
1519
|
+
endpoint: opts.endpoint,
|
|
1520
|
+
tuiLogId: opts.tuiLogId,
|
|
1521
|
+
onEvent: handleContextEvent
|
|
1522
|
+
});
|
|
1523
|
+
activeContexts.set(ctx.id, ctx);
|
|
1524
|
+
emit({
|
|
1525
|
+
type: "created",
|
|
1526
|
+
context: ctx
|
|
1527
|
+
});
|
|
1528
|
+
return ctx;
|
|
1529
|
+
},
|
|
1530
|
+
get(id) {
|
|
1531
|
+
return activeContexts.get(id);
|
|
1532
|
+
},
|
|
1533
|
+
getAll() {
|
|
1534
|
+
return Array.from(activeContexts.values());
|
|
1535
|
+
},
|
|
1536
|
+
get activeCount() {
|
|
1537
|
+
return activeContexts.size;
|
|
1538
|
+
},
|
|
1539
|
+
on(_event, listener) {
|
|
1540
|
+
listeners.add(listener);
|
|
1541
|
+
},
|
|
1542
|
+
off(_event, listener) {
|
|
1543
|
+
listeners.delete(listener);
|
|
1544
|
+
},
|
|
1545
|
+
startReaper,
|
|
1546
|
+
stopReaper,
|
|
1547
|
+
_runReaperOnce: runReaperOnce
|
|
1548
|
+
};
|
|
1549
|
+
}
|
|
1550
|
+
|
|
1551
|
+
//#endregion
|
|
1552
|
+
//#region src/lib/history-ws.ts
|
|
1553
|
+
/**
|
|
1554
|
+
* WebSocket support for History API.
|
|
1555
|
+
* Enables real-time updates when new requests are recorded.
|
|
1556
|
+
*/
|
|
1557
|
+
const clients = /* @__PURE__ */ new Set();
|
|
1558
|
+
function getClientCount() {
|
|
1559
|
+
return clients.size;
|
|
1560
|
+
}
|
|
1561
|
+
function closeAllClients() {
|
|
1562
|
+
for (const client of clients) try {
|
|
1563
|
+
client.close(1001, "Server shutting down");
|
|
1564
|
+
} catch {}
|
|
1565
|
+
clients.clear();
|
|
1566
|
+
}
|
|
1567
|
+
function broadcast(message) {
|
|
1568
|
+
const data = JSON.stringify(message);
|
|
1569
|
+
for (const client of clients) try {
|
|
1570
|
+
if (client.readyState === WebSocket.OPEN) client.send(data);
|
|
1571
|
+
else clients.delete(client);
|
|
1572
|
+
} catch (error) {
|
|
1573
|
+
consola.debug("WebSocket send failed, removing client:", error);
|
|
1574
|
+
clients.delete(client);
|
|
1575
|
+
}
|
|
1576
|
+
}
|
|
1577
|
+
function notifyEntryAdded(summary) {
|
|
1578
|
+
if (clients.size === 0) return;
|
|
1579
|
+
broadcast({
|
|
1580
|
+
type: "entry_added",
|
|
1581
|
+
data: summary,
|
|
1582
|
+
timestamp: Date.now()
|
|
1583
|
+
});
|
|
1584
|
+
}
|
|
1585
|
+
function notifyEntryUpdated(summary) {
|
|
1586
|
+
if (clients.size === 0) return;
|
|
1587
|
+
broadcast({
|
|
1588
|
+
type: "entry_updated",
|
|
1589
|
+
data: summary,
|
|
1590
|
+
timestamp: Date.now()
|
|
1591
|
+
});
|
|
1592
|
+
}
|
|
1593
|
+
function notifyStatsUpdated(stats) {
|
|
1594
|
+
if (clients.size === 0) return;
|
|
1595
|
+
broadcast({
|
|
1596
|
+
type: "stats_updated",
|
|
1597
|
+
data: stats,
|
|
1598
|
+
timestamp: Date.now()
|
|
1599
|
+
});
|
|
1600
|
+
}
|
|
1601
|
+
function notifyHistoryCleared() {
|
|
1602
|
+
if (clients.size === 0) return;
|
|
1603
|
+
broadcast({
|
|
1604
|
+
type: "history_cleared",
|
|
1605
|
+
data: null,
|
|
1606
|
+
timestamp: Date.now()
|
|
1607
|
+
});
|
|
1608
|
+
}
|
|
1609
|
+
function notifySessionDeleted(sessionId) {
|
|
1610
|
+
if (clients.size === 0) return;
|
|
1611
|
+
broadcast({
|
|
1612
|
+
type: "session_deleted",
|
|
1613
|
+
data: { sessionId },
|
|
1614
|
+
timestamp: Date.now()
|
|
1615
|
+
});
|
|
1616
|
+
}
|
|
1617
|
+
|
|
1313
1618
|
//#endregion
|
|
1314
1619
|
//#region src/lib/history.ts
|
|
1315
1620
|
function generateId$1() {
|
|
@@ -1389,6 +1694,13 @@ function recordRequest(endpoint, request) {
|
|
|
1389
1694
|
if (historyState.entries.filter((e) => e.sessionId === removed.sessionId).length === 0) historyState.sessions.delete(removed.sessionId);
|
|
1390
1695
|
}
|
|
1391
1696
|
}
|
|
1697
|
+
notifyEntryAdded({
|
|
1698
|
+
id: entry.id,
|
|
1699
|
+
endpoint,
|
|
1700
|
+
model: request.model,
|
|
1701
|
+
stream: request.stream,
|
|
1702
|
+
timestamp: entry.timestamp
|
|
1703
|
+
});
|
|
1392
1704
|
return entry.id;
|
|
1393
1705
|
}
|
|
1394
1706
|
function recordResponse(id, response, durationMs) {
|
|
@@ -1403,6 +1715,20 @@ function recordResponse(id, response, durationMs) {
|
|
|
1403
1715
|
session.totalOutputTokens += response.usage.output_tokens;
|
|
1404
1716
|
session.lastActivity = Date.now();
|
|
1405
1717
|
}
|
|
1718
|
+
notifyEntryUpdated({
|
|
1719
|
+
id: entry.id,
|
|
1720
|
+
endpoint: entry.endpoint,
|
|
1721
|
+
model: response.model,
|
|
1722
|
+
success: response.success,
|
|
1723
|
+
durationMs,
|
|
1724
|
+
inputTokens: response.usage.input_tokens,
|
|
1725
|
+
outputTokens: response.usage.output_tokens
|
|
1726
|
+
});
|
|
1727
|
+
notifyStatsUpdated({
|
|
1728
|
+
totalRequests: historyState.entries.length,
|
|
1729
|
+
totalInputTokens: session?.totalInputTokens ?? 0,
|
|
1730
|
+
totalOutputTokens: session?.totalOutputTokens ?? 0
|
|
1731
|
+
});
|
|
1406
1732
|
}
|
|
1407
1733
|
}
|
|
1408
1734
|
function getHistory(options = {}) {
|
|
@@ -1477,12 +1803,14 @@ function clearHistory() {
|
|
|
1477
1803
|
historyState.entries = [];
|
|
1478
1804
|
historyState.sessions = /* @__PURE__ */ new Map();
|
|
1479
1805
|
historyState.currentSessionId = generateId$1();
|
|
1806
|
+
notifyHistoryCleared();
|
|
1480
1807
|
}
|
|
1481
1808
|
function deleteSession(sessionId) {
|
|
1482
1809
|
if (!historyState.sessions.has(sessionId)) return false;
|
|
1483
1810
|
historyState.entries = historyState.entries.filter((e) => e.sessionId !== sessionId);
|
|
1484
1811
|
historyState.sessions.delete(sessionId);
|
|
1485
1812
|
if (historyState.currentSessionId === sessionId) historyState.currentSessionId = generateId$1();
|
|
1813
|
+
notifySessionDeleted(sessionId);
|
|
1486
1814
|
return true;
|
|
1487
1815
|
}
|
|
1488
1816
|
function getStats() {
|
|
@@ -1533,6 +1861,37 @@ function getStats() {
|
|
|
1533
1861
|
activeSessions
|
|
1534
1862
|
};
|
|
1535
1863
|
}
|
|
1864
|
+
function getTokenStats() {
|
|
1865
|
+
const models = {};
|
|
1866
|
+
const timeline = [];
|
|
1867
|
+
for (const entry of historyState.entries) {
|
|
1868
|
+
if (!entry.response) continue;
|
|
1869
|
+
const model = entry.response.model || entry.request.model;
|
|
1870
|
+
const inputTokens = entry.response.usage.input_tokens;
|
|
1871
|
+
const outputTokens = entry.response.usage.output_tokens;
|
|
1872
|
+
const existing = models[model];
|
|
1873
|
+
if (existing) {
|
|
1874
|
+
existing.inputTokens += inputTokens;
|
|
1875
|
+
existing.outputTokens += outputTokens;
|
|
1876
|
+
existing.requestCount++;
|
|
1877
|
+
} else models[model] = {
|
|
1878
|
+
inputTokens,
|
|
1879
|
+
outputTokens,
|
|
1880
|
+
requestCount: 1
|
|
1881
|
+
};
|
|
1882
|
+
timeline.push({
|
|
1883
|
+
timestamp: entry.timestamp,
|
|
1884
|
+
model,
|
|
1885
|
+
inputTokens,
|
|
1886
|
+
outputTokens
|
|
1887
|
+
});
|
|
1888
|
+
}
|
|
1889
|
+
timeline.sort((a, b) => a.timestamp - b.timestamp);
|
|
1890
|
+
return {
|
|
1891
|
+
models,
|
|
1892
|
+
timeline
|
|
1893
|
+
};
|
|
1894
|
+
}
|
|
1536
1895
|
function exportHistory(format = "json") {
|
|
1537
1896
|
if (format === "json") return JSON.stringify({
|
|
1538
1897
|
sessions: Array.from(historyState.sessions.values()),
|
|
@@ -1700,6 +2059,142 @@ function generateEnvScript(envVars, commandToRun = "") {
|
|
|
1700
2059
|
return commandBlock || commandToRun;
|
|
1701
2060
|
}
|
|
1702
2061
|
|
|
2062
|
+
//#endregion
|
|
2063
|
+
//#region src/lib/shutdown.ts
|
|
2064
|
+
const DRAIN_POLL_INTERVAL_MS = 500;
|
|
2065
|
+
const DRAIN_PROGRESS_INTERVAL_MS = 5e3;
|
|
2066
|
+
let serverInstance = null;
|
|
2067
|
+
let _isShuttingDown = false;
|
|
2068
|
+
let shutdownResolve = null;
|
|
2069
|
+
let shutdownAbortController = null;
|
|
2070
|
+
function getIsShuttingDown() {
|
|
2071
|
+
return _isShuttingDown;
|
|
2072
|
+
}
|
|
2073
|
+
function setServerInstance(server) {
|
|
2074
|
+
serverInstance = server;
|
|
2075
|
+
}
|
|
2076
|
+
function formatActiveRequestsSummary(requests) {
|
|
2077
|
+
const now = Date.now();
|
|
2078
|
+
const lines = requests.map((req) => {
|
|
2079
|
+
const age = Math.round((now - req.startTime) / 1e3);
|
|
2080
|
+
const model = req.model || "unknown";
|
|
2081
|
+
const tags = req.tags?.length ? ` [${req.tags.join(", ")}]` : "";
|
|
2082
|
+
return ` ${req.method} ${req.path} ${model} (${req.status}, ${age}s)${tags}`;
|
|
2083
|
+
});
|
|
2084
|
+
return `Waiting for ${requests.length} active request(s):\n${lines.join("\n")}`;
|
|
2085
|
+
}
|
|
2086
|
+
async function drainActiveRequests(timeoutMs, tracker, opts) {
|
|
2087
|
+
const pollInterval = opts?.pollIntervalMs ?? DRAIN_POLL_INTERVAL_MS;
|
|
2088
|
+
const progressInterval = opts?.progressIntervalMs ?? DRAIN_PROGRESS_INTERVAL_MS;
|
|
2089
|
+
const deadline = Date.now() + timeoutMs;
|
|
2090
|
+
let lastProgressLog = 0;
|
|
2091
|
+
while (Date.now() < deadline) {
|
|
2092
|
+
const active = tracker.getActiveRequests();
|
|
2093
|
+
if (active.length === 0) return "drained";
|
|
2094
|
+
const now = Date.now();
|
|
2095
|
+
if (now - lastProgressLog >= progressInterval) {
|
|
2096
|
+
lastProgressLog = now;
|
|
2097
|
+
consola.info(formatActiveRequestsSummary(active));
|
|
2098
|
+
}
|
|
2099
|
+
await new Promise((resolve) => setTimeout(resolve, pollInterval));
|
|
2100
|
+
}
|
|
2101
|
+
return "timeout";
|
|
2102
|
+
}
|
|
2103
|
+
async function gracefulShutdown(signal, deps) {
|
|
2104
|
+
const tracker = deps?.tracker;
|
|
2105
|
+
const server = deps?.server ?? serverInstance;
|
|
2106
|
+
const rateLimiter = deps?.rateLimiter !== void 0 ? deps.rateLimiter : getAdaptiveRateLimiter();
|
|
2107
|
+
const stopRefresh = deps?.stopTokenRefreshFn ?? (() => {});
|
|
2108
|
+
const closeWsClients = deps?.closeAllClientsFn ?? closeAllClients;
|
|
2109
|
+
const getWsCount = deps?.getClientCountFn ?? getClientCount;
|
|
2110
|
+
const gracefulWaitMs = deps?.gracefulWaitMs ?? state.shutdownGracefulWait * 1e3;
|
|
2111
|
+
const abortWaitMs = deps?.abortWaitMs ?? state.shutdownAbortWait * 1e3;
|
|
2112
|
+
const drainOpts = {
|
|
2113
|
+
pollIntervalMs: deps?.drainPollIntervalMs ?? DRAIN_POLL_INTERVAL_MS,
|
|
2114
|
+
progressIntervalMs: deps?.drainProgressIntervalMs ?? DRAIN_PROGRESS_INTERVAL_MS
|
|
2115
|
+
};
|
|
2116
|
+
_isShuttingDown = true;
|
|
2117
|
+
shutdownAbortController = new AbortController();
|
|
2118
|
+
consola.info(`Received ${signal}, shutting down gracefully...`);
|
|
2119
|
+
try {
|
|
2120
|
+
deps?.contextManager?.stopReaper();
|
|
2121
|
+
} catch {}
|
|
2122
|
+
stopRefresh();
|
|
2123
|
+
const wsClients = getWsCount();
|
|
2124
|
+
if (wsClients > 0) {
|
|
2125
|
+
closeWsClients();
|
|
2126
|
+
consola.info(`Disconnected ${wsClients} WebSocket client(s)`);
|
|
2127
|
+
}
|
|
2128
|
+
if (rateLimiter) {
|
|
2129
|
+
const rejected = rateLimiter.rejectQueued();
|
|
2130
|
+
if (rejected > 0) consola.info(`Rejected ${rejected} queued request(s) from rate limiter`);
|
|
2131
|
+
}
|
|
2132
|
+
if (server) {
|
|
2133
|
+
server.close(false).catch((error) => {
|
|
2134
|
+
consola.error("Error stopping listener:", error);
|
|
2135
|
+
});
|
|
2136
|
+
consola.info("Stopped accepting new connections");
|
|
2137
|
+
}
|
|
2138
|
+
if (tracker) {
|
|
2139
|
+
const activeCount = tracker.getActiveRequests().length;
|
|
2140
|
+
if (activeCount > 0) {
|
|
2141
|
+
consola.info(`Phase 2: Waiting up to ${gracefulWaitMs / 1e3}s for ${activeCount} active request(s)...`);
|
|
2142
|
+
try {
|
|
2143
|
+
if (await drainActiveRequests(gracefulWaitMs, tracker, drainOpts) === "drained") {
|
|
2144
|
+
consola.info("All requests completed naturally");
|
|
2145
|
+
finalize(tracker);
|
|
2146
|
+
return;
|
|
2147
|
+
}
|
|
2148
|
+
} catch (error) {
|
|
2149
|
+
consola.error("Error during Phase 2 drain:", error);
|
|
2150
|
+
}
|
|
2151
|
+
const remaining = tracker.getActiveRequests().length;
|
|
2152
|
+
consola.info(`Phase 3: Sending abort signal to ${remaining} remaining request(s), waiting up to ${abortWaitMs / 1e3}s...`);
|
|
2153
|
+
shutdownAbortController.abort();
|
|
2154
|
+
try {
|
|
2155
|
+
if (await drainActiveRequests(abortWaitMs, tracker, drainOpts) === "drained") {
|
|
2156
|
+
consola.info("All requests completed after abort signal");
|
|
2157
|
+
finalize(tracker);
|
|
2158
|
+
return;
|
|
2159
|
+
}
|
|
2160
|
+
} catch (error) {
|
|
2161
|
+
consola.error("Error during Phase 3 drain:", error);
|
|
2162
|
+
}
|
|
2163
|
+
const forceRemaining = tracker.getActiveRequests().length;
|
|
2164
|
+
consola.warn(`Phase 4: Force-closing ${forceRemaining} remaining request(s)`);
|
|
2165
|
+
if (server) try {
|
|
2166
|
+
await server.close(true);
|
|
2167
|
+
} catch (error) {
|
|
2168
|
+
consola.error("Error force-closing server:", error);
|
|
2169
|
+
}
|
|
2170
|
+
}
|
|
2171
|
+
finalize(tracker);
|
|
2172
|
+
} else {
|
|
2173
|
+
consola.info("Shutdown complete");
|
|
2174
|
+
shutdownResolve?.();
|
|
2175
|
+
}
|
|
2176
|
+
}
|
|
2177
|
+
function finalize(tracker) {
|
|
2178
|
+
tracker.destroy();
|
|
2179
|
+
consola.info("Shutdown complete");
|
|
2180
|
+
shutdownResolve?.();
|
|
2181
|
+
}
|
|
2182
|
+
function setupShutdownHandlers() {
|
|
2183
|
+
const handler = (signal) => {
|
|
2184
|
+
if (_isShuttingDown) {
|
|
2185
|
+
consola.warn("Second signal received, forcing immediate exit");
|
|
2186
|
+
process.exit(1);
|
|
2187
|
+
}
|
|
2188
|
+
gracefulShutdown(signal).catch((error) => {
|
|
2189
|
+
consola.error("Fatal error during shutdown:", error);
|
|
2190
|
+
shutdownResolve?.();
|
|
2191
|
+
process.exit(1);
|
|
2192
|
+
});
|
|
2193
|
+
};
|
|
2194
|
+
process.on("SIGINT", () => handler("SIGINT"));
|
|
2195
|
+
process.on("SIGTERM", () => handler("SIGTERM"));
|
|
2196
|
+
}
|
|
2197
|
+
|
|
1703
2198
|
//#endregion
|
|
1704
2199
|
//#region src/lib/tui/console-renderer.ts
|
|
1705
2200
|
const CLEAR_LINE = "\x1B[2K\r";
|
|
@@ -2060,6 +2555,7 @@ const requestTracker = new RequestTracker();
|
|
|
2060
2555
|
*/
|
|
2061
2556
|
function tuiLogger() {
|
|
2062
2557
|
return async (c, next) => {
|
|
2558
|
+
if (getIsShuttingDown()) return c.json({ error: "Server is shutting down" }, 503);
|
|
2063
2559
|
const method = c.req.method;
|
|
2064
2560
|
const path = c.req.path;
|
|
2065
2561
|
const isHistoryAccess = path.startsWith("/history");
|
|
@@ -2783,6 +3279,127 @@ function createTruncationResponseMarkerOpenAI(result) {
|
|
|
2783
3279
|
return `\n\n---\n[Auto-truncated: ${result.removedMessageCount} messages removed, ${result.originalTokens} ā ${result.compactedTokens} tokens (${percentage}% reduction)]`;
|
|
2784
3280
|
}
|
|
2785
3281
|
|
|
3282
|
+
//#endregion
|
|
3283
|
+
//#region src/lib/repetition-detector.ts
|
|
3284
|
+
/**
|
|
3285
|
+
* Stream repetition detector.
|
|
3286
|
+
*
|
|
3287
|
+
* Uses the KMP failure function (prefix function) to detect repeated patterns
|
|
3288
|
+
* in streaming text output. When a model gets stuck in a repetitive loop,
|
|
3289
|
+
* it wastes tokens producing the same content over and over. This detector
|
|
3290
|
+
* identifies such loops early so the caller can take action (log warning,
|
|
3291
|
+
* abort stream, etc.).
|
|
3292
|
+
*
|
|
3293
|
+
* The algorithm works by maintaining a sliding buffer of recent text and
|
|
3294
|
+
* computing the longest proper prefix that is also a suffix ā if this
|
|
3295
|
+
* length exceeds `(text.length - period) >= minRepetitions * period`,
|
|
3296
|
+
* it means a pattern of length `period` has repeated enough times.
|
|
3297
|
+
*/
|
|
3298
|
+
const DEFAULT_CONFIG = {
|
|
3299
|
+
minPatternLength: 10,
|
|
3300
|
+
minRepetitions: 3,
|
|
3301
|
+
maxBufferSize: 5e3
|
|
3302
|
+
};
|
|
3303
|
+
var RepetitionDetector = class {
|
|
3304
|
+
buffer = "";
|
|
3305
|
+
config;
|
|
3306
|
+
detected = false;
|
|
3307
|
+
constructor(config) {
|
|
3308
|
+
this.config = {
|
|
3309
|
+
...DEFAULT_CONFIG,
|
|
3310
|
+
...config
|
|
3311
|
+
};
|
|
3312
|
+
}
|
|
3313
|
+
/**
|
|
3314
|
+
* Feed a text chunk into the detector.
|
|
3315
|
+
* Returns `true` if repetition has been detected (now or previously).
|
|
3316
|
+
* Once detected, subsequent calls return `true` without further analysis.
|
|
3317
|
+
*/
|
|
3318
|
+
feed(text) {
|
|
3319
|
+
if (this.detected) return true;
|
|
3320
|
+
if (!text) return false;
|
|
3321
|
+
this.buffer += text;
|
|
3322
|
+
if (this.buffer.length > this.config.maxBufferSize) this.buffer = this.buffer.slice(-this.config.maxBufferSize);
|
|
3323
|
+
const minRequired = this.config.minPatternLength * this.config.minRepetitions;
|
|
3324
|
+
if (this.buffer.length < minRequired) return false;
|
|
3325
|
+
this.detected = detectRepetition(this.buffer, this.config.minPatternLength, this.config.minRepetitions);
|
|
3326
|
+
return this.detected;
|
|
3327
|
+
}
|
|
3328
|
+
/** Reset detector state for a new stream */
|
|
3329
|
+
reset() {
|
|
3330
|
+
this.buffer = "";
|
|
3331
|
+
this.detected = false;
|
|
3332
|
+
}
|
|
3333
|
+
/** Whether repetition has been detected */
|
|
3334
|
+
get isDetected() {
|
|
3335
|
+
return this.detected;
|
|
3336
|
+
}
|
|
3337
|
+
};
|
|
3338
|
+
/**
|
|
3339
|
+
* Detect if the tail of `text` contains a repeating pattern.
|
|
3340
|
+
*
|
|
3341
|
+
* Uses the KMP prefix function: for a string S, the prefix function Ļ[i]
|
|
3342
|
+
* gives the length of the longest proper prefix of S[0..i] that is also
|
|
3343
|
+
* a suffix. If Ļ[n-1] ā„ (n - period) where period = n - Ļ[n-1], then
|
|
3344
|
+
* the string is composed of a repeating unit of length `period`.
|
|
3345
|
+
*
|
|
3346
|
+
* We check the suffix of the buffer (last `checkLength` chars) to detect
|
|
3347
|
+
* if a pattern of at least `minPatternLength` chars repeats at least
|
|
3348
|
+
* `minRepetitions` times.
|
|
3349
|
+
*/
|
|
3350
|
+
function detectRepetition(text, minPatternLength, minRepetitions) {
|
|
3351
|
+
const minWindow = minPatternLength * minRepetitions;
|
|
3352
|
+
const maxWindow = Math.min(text.length, 2e3);
|
|
3353
|
+
const windowSizes = [
|
|
3354
|
+
minWindow,
|
|
3355
|
+
Math.floor(maxWindow * .5),
|
|
3356
|
+
maxWindow
|
|
3357
|
+
].filter((w) => w >= minWindow && w <= text.length);
|
|
3358
|
+
for (const windowSize of windowSizes) {
|
|
3359
|
+
const window = text.slice(-windowSize);
|
|
3360
|
+
const period = findRepeatingPeriod(window);
|
|
3361
|
+
if (period >= minPatternLength) {
|
|
3362
|
+
if (Math.floor(window.length / period) >= minRepetitions) return true;
|
|
3363
|
+
}
|
|
3364
|
+
}
|
|
3365
|
+
return false;
|
|
3366
|
+
}
|
|
3367
|
+
/**
|
|
3368
|
+
* Find the shortest repeating period in a string using KMP prefix function.
|
|
3369
|
+
* Returns the period length, or the string length if no repetition found.
|
|
3370
|
+
*/
|
|
3371
|
+
function findRepeatingPeriod(s) {
|
|
3372
|
+
const n = s.length;
|
|
3373
|
+
if (n === 0) return 0;
|
|
3374
|
+
const pi = new Int32Array(n);
|
|
3375
|
+
for (let i = 1; i < n; i++) {
|
|
3376
|
+
let j = pi[i - 1] ?? 0;
|
|
3377
|
+
while (j > 0 && s[i] !== s[j]) j = pi[j - 1] ?? 0;
|
|
3378
|
+
if (s[i] === s[j]) j++;
|
|
3379
|
+
pi[i] = j;
|
|
3380
|
+
}
|
|
3381
|
+
const period = n - pi[n - 1];
|
|
3382
|
+
if (period < n && n % period === 0) return period;
|
|
3383
|
+
if (period < n && pi[n - 1] >= period) return period;
|
|
3384
|
+
return n;
|
|
3385
|
+
}
|
|
3386
|
+
/**
|
|
3387
|
+
* Create a repetition detector callback for use in stream processing.
|
|
3388
|
+
* Returns a function that accepts text deltas and logs a warning on first detection.
|
|
3389
|
+
*/
|
|
3390
|
+
function createStreamRepetitionChecker(label, config) {
|
|
3391
|
+
const detector = new RepetitionDetector(config);
|
|
3392
|
+
let warned = false;
|
|
3393
|
+
return (textDelta) => {
|
|
3394
|
+
const isRepetitive = detector.feed(textDelta);
|
|
3395
|
+
if (isRepetitive && !warned) {
|
|
3396
|
+
warned = true;
|
|
3397
|
+
consola.warn(`[RepetitionDetector] ${label}: Repetitive output detected in stream`);
|
|
3398
|
+
}
|
|
3399
|
+
return isRepetitive;
|
|
3400
|
+
};
|
|
3401
|
+
}
|
|
3402
|
+
|
|
2786
3403
|
//#endregion
|
|
2787
3404
|
//#region src/services/copilot/create-chat-completions.ts
|
|
2788
3405
|
const createChatCompletions = async (payload, options) => {
|
|
@@ -3119,6 +3736,7 @@ function createStreamAccumulator() {
|
|
|
3119
3736
|
async function handleStreamingResponse$1(opts) {
|
|
3120
3737
|
const { stream, response, payload, ctx } = opts;
|
|
3121
3738
|
const acc = createStreamAccumulator();
|
|
3739
|
+
const checkRepetition = createStreamRepetitionChecker(`openai:${payload.model}`);
|
|
3122
3740
|
try {
|
|
3123
3741
|
if (state.verbose && ctx.truncateResult?.wasCompacted) {
|
|
3124
3742
|
const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
|
|
@@ -3142,7 +3760,7 @@ async function handleStreamingResponse$1(opts) {
|
|
|
3142
3760
|
}
|
|
3143
3761
|
for await (const chunk of response) {
|
|
3144
3762
|
consola.debug("Streaming chunk:", JSON.stringify(chunk));
|
|
3145
|
-
parseStreamChunk(chunk, acc);
|
|
3763
|
+
parseStreamChunk(chunk, acc, checkRepetition);
|
|
3146
3764
|
await stream.writeSSE(chunk);
|
|
3147
3765
|
}
|
|
3148
3766
|
recordStreamSuccess(acc, payload.model, ctx);
|
|
@@ -3158,7 +3776,7 @@ async function handleStreamingResponse$1(opts) {
|
|
|
3158
3776
|
throw error;
|
|
3159
3777
|
}
|
|
3160
3778
|
}
|
|
3161
|
-
function parseStreamChunk(chunk, acc) {
|
|
3779
|
+
function parseStreamChunk(chunk, acc, checkRepetition) {
|
|
3162
3780
|
if (!chunk.data || chunk.data === "[DONE]") return;
|
|
3163
3781
|
try {
|
|
3164
3782
|
const parsed = JSON.parse(chunk.data);
|
|
@@ -3169,7 +3787,10 @@ function parseStreamChunk(chunk, acc) {
|
|
|
3169
3787
|
}
|
|
3170
3788
|
const choice = parsed.choices[0];
|
|
3171
3789
|
if (choice) {
|
|
3172
|
-
if (choice.delta.content)
|
|
3790
|
+
if (choice.delta.content) {
|
|
3791
|
+
acc.content += choice.delta.content;
|
|
3792
|
+
checkRepetition(choice.delta.content);
|
|
3793
|
+
}
|
|
3173
3794
|
if (choice.delta.tool_calls) for (const tc of choice.delta.tool_calls) {
|
|
3174
3795
|
const idx = tc.index;
|
|
3175
3796
|
if (!acc.toolCallMap.has(idx)) acc.toolCallMap.set(idx, {
|
|
@@ -3360,6 +3981,11 @@ function handleDeleteSession(c) {
|
|
|
3360
3981
|
message: "Session deleted"
|
|
3361
3982
|
});
|
|
3362
3983
|
}
|
|
3984
|
+
function handleGetTokenStats(c) {
|
|
3985
|
+
if (!isHistoryEnabled()) return c.json({ error: "History recording is not enabled" }, 400);
|
|
3986
|
+
const stats = getTokenStats();
|
|
3987
|
+
return c.json(stats);
|
|
3988
|
+
}
|
|
3363
3989
|
|
|
3364
3990
|
//#endregion
|
|
3365
3991
|
//#region src/routes/history/ui/script.ts
|
|
@@ -3903,11 +4529,274 @@ document.addEventListener('keydown', (e) => {
|
|
|
3903
4529
|
}
|
|
3904
4530
|
});
|
|
3905
4531
|
|
|
3906
|
-
// Auto-refresh every 10 seconds
|
|
3907
|
-
setInterval(() => {
|
|
4532
|
+
// Auto-refresh every 10 seconds (fallback when WebSocket is not available)
|
|
4533
|
+
let autoRefreshTimer = setInterval(() => {
|
|
3908
4534
|
loadStats();
|
|
3909
4535
|
loadSessions();
|
|
3910
4536
|
}, 10000);
|
|
4537
|
+
|
|
4538
|
+
// WebSocket real-time updates
|
|
4539
|
+
let reconnectAttempts = 0;
|
|
4540
|
+
function connectWebSocket() {
|
|
4541
|
+
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
|
|
4542
|
+
const wsUrl = protocol + '//' + window.location.host + '/history/ws';
|
|
4543
|
+
|
|
4544
|
+
try {
|
|
4545
|
+
const ws = new WebSocket(wsUrl);
|
|
4546
|
+
|
|
4547
|
+
ws.onopen = function() {
|
|
4548
|
+
console.log('[History] WebSocket connected');
|
|
4549
|
+
reconnectAttempts = 0;
|
|
4550
|
+
// Disable polling when WS is active
|
|
4551
|
+
clearInterval(autoRefreshTimer);
|
|
4552
|
+
};
|
|
4553
|
+
|
|
4554
|
+
ws.onmessage = function(event) {
|
|
4555
|
+
try {
|
|
4556
|
+
const msg = JSON.parse(event.data);
|
|
4557
|
+
switch (msg.type) {
|
|
4558
|
+
case 'entry_added':
|
|
4559
|
+
case 'entry_updated':
|
|
4560
|
+
loadSessions();
|
|
4561
|
+
loadStats();
|
|
4562
|
+
break;
|
|
4563
|
+
case 'stats_updated':
|
|
4564
|
+
loadStats();
|
|
4565
|
+
break;
|
|
4566
|
+
case 'history_cleared':
|
|
4567
|
+
case 'session_deleted':
|
|
4568
|
+
loadSessions();
|
|
4569
|
+
loadStats();
|
|
4570
|
+
break;
|
|
4571
|
+
}
|
|
4572
|
+
} catch (e) {
|
|
4573
|
+
console.warn('[History] Failed to parse WS message:', e);
|
|
4574
|
+
}
|
|
4575
|
+
};
|
|
4576
|
+
|
|
4577
|
+
ws.onclose = function() {
|
|
4578
|
+
console.log('[History] WebSocket disconnected, falling back to polling');
|
|
4579
|
+
// Re-enable polling as fallback (clear first to avoid duplicates)
|
|
4580
|
+
clearInterval(autoRefreshTimer);
|
|
4581
|
+
autoRefreshTimer = setInterval(() => {
|
|
4582
|
+
loadStats();
|
|
4583
|
+
loadSessions();
|
|
4584
|
+
}, 10000);
|
|
4585
|
+
// Reconnect with exponential backoff, max 10 attempts
|
|
4586
|
+
if (reconnectAttempts < 10) {
|
|
4587
|
+
const delay = Math.min(5000 * Math.pow(2, reconnectAttempts), 60000);
|
|
4588
|
+
reconnectAttempts++;
|
|
4589
|
+
setTimeout(connectWebSocket, delay);
|
|
4590
|
+
}
|
|
4591
|
+
};
|
|
4592
|
+
|
|
4593
|
+
ws.onerror = function() {
|
|
4594
|
+
// Will trigger onclose
|
|
4595
|
+
};
|
|
4596
|
+
} catch (e) {
|
|
4597
|
+
console.warn('[History] WebSocket not available:', e);
|
|
4598
|
+
}
|
|
4599
|
+
}
|
|
4600
|
+
|
|
4601
|
+
// Start WebSocket connection
|
|
4602
|
+
connectWebSocket();
|
|
4603
|
+
|
|
4604
|
+
// Tab switching
|
|
4605
|
+
function switchTab(tab) {
|
|
4606
|
+
document.querySelectorAll('.tab-item').forEach(t => t.classList.remove('active'));
|
|
4607
|
+
document.querySelector('.tab-item[data-tab="' + tab + '"]').classList.add('active');
|
|
4608
|
+
|
|
4609
|
+
document.querySelectorAll('.tab-panel').forEach(p => p.style.display = 'none');
|
|
4610
|
+
const panel = document.getElementById('tab-' + tab);
|
|
4611
|
+
panel.style.display = tab === 'requests' ? 'flex' : 'block';
|
|
4612
|
+
|
|
4613
|
+
if (tab === 'tokens') {
|
|
4614
|
+
panel.setAttribute('data-loaded', 'true');
|
|
4615
|
+
loadTokenStats();
|
|
4616
|
+
}
|
|
4617
|
+
}
|
|
4618
|
+
|
|
4619
|
+
async function loadTokenStats() {
|
|
4620
|
+
const container = document.getElementById('tokens-table-container');
|
|
4621
|
+
container.innerHTML = '<div class="loading">Loading...</div>';
|
|
4622
|
+
|
|
4623
|
+
try {
|
|
4624
|
+
const res = await fetch('/history/api/token-stats');
|
|
4625
|
+
const data = await res.json();
|
|
4626
|
+
if (data.error) {
|
|
4627
|
+
container.innerHTML = '<div class="empty-state"><h3>History Not Enabled</h3><p>Start server with --history</p></div>';
|
|
4628
|
+
return;
|
|
4629
|
+
}
|
|
4630
|
+
|
|
4631
|
+
const modelNames = Object.keys(data.models);
|
|
4632
|
+
if (modelNames.length === 0) {
|
|
4633
|
+
container.innerHTML = '<div class="empty-state"><h3>No token data</h3><p>Make some API requests first</p></div>';
|
|
4634
|
+
document.getElementById('chart-fallback').style.display = 'block';
|
|
4635
|
+
document.getElementById('chart-fallback').textContent = 'No data available for chart.';
|
|
4636
|
+
return;
|
|
4637
|
+
}
|
|
4638
|
+
|
|
4639
|
+
// Sort models by total tokens descending
|
|
4640
|
+
modelNames.sort((a, b) => {
|
|
4641
|
+
const totalA = data.models[a].inputTokens + data.models[a].outputTokens;
|
|
4642
|
+
const totalB = data.models[b].inputTokens + data.models[b].outputTokens;
|
|
4643
|
+
return totalB - totalA;
|
|
4644
|
+
});
|
|
4645
|
+
|
|
4646
|
+
// Reset chart fallback state
|
|
4647
|
+
document.getElementById('chart-fallback').style.display = 'none';
|
|
4648
|
+
document.getElementById('token-chart').style.display = '';
|
|
4649
|
+
|
|
4650
|
+
// Render table
|
|
4651
|
+
let totalInput = 0, totalOutput = 0, totalReqs = 0;
|
|
4652
|
+
let rows = '';
|
|
4653
|
+
for (const model of modelNames) {
|
|
4654
|
+
const m = data.models[model];
|
|
4655
|
+
const total = m.inputTokens + m.outputTokens;
|
|
4656
|
+
totalInput += m.inputTokens;
|
|
4657
|
+
totalOutput += m.outputTokens;
|
|
4658
|
+
totalReqs += m.requestCount;
|
|
4659
|
+
rows += '<tr>'
|
|
4660
|
+
+ '<td>' + escapeHtml(model) + '</td>'
|
|
4661
|
+
+ '<td class="number">' + formatNumber(m.inputTokens) + '</td>'
|
|
4662
|
+
+ '<td class="number">' + formatNumber(m.outputTokens) + '</td>'
|
|
4663
|
+
+ '<td class="number">' + formatNumber(total) + '</td>'
|
|
4664
|
+
+ '<td class="number">' + m.requestCount + '</td>'
|
|
4665
|
+
+ '</tr>';
|
|
4666
|
+
}
|
|
4667
|
+
|
|
4668
|
+
container.innerHTML = '<table class="tokens-table">'
|
|
4669
|
+
+ '<thead><tr><th>Model</th><th class="number">Input Tokens</th><th class="number">Output Tokens</th><th class="number">Total Tokens</th><th class="number">Requests</th></tr></thead>'
|
|
4670
|
+
+ '<tbody>' + rows + '</tbody>'
|
|
4671
|
+
+ '<tfoot><tr><td>Total</td>'
|
|
4672
|
+
+ '<td class="number">' + formatNumber(totalInput) + '</td>'
|
|
4673
|
+
+ '<td class="number">' + formatNumber(totalOutput) + '</td>'
|
|
4674
|
+
+ '<td class="number">' + formatNumber(totalInput + totalOutput) + '</td>'
|
|
4675
|
+
+ '<td class="number">' + totalReqs + '</td>'
|
|
4676
|
+
+ '</tr></tfoot></table>';
|
|
4677
|
+
|
|
4678
|
+
// Render chart
|
|
4679
|
+
renderTokenChart(data.timeline, modelNames);
|
|
4680
|
+
} catch (e) {
|
|
4681
|
+
container.innerHTML = '<div class="empty-state">Error: ' + e.message + '</div>';
|
|
4682
|
+
}
|
|
4683
|
+
}
|
|
4684
|
+
|
|
4685
|
+
let tokenChart = null;
|
|
4686
|
+
let tokenChartListenersAdded = false;
|
|
4687
|
+
|
|
4688
|
+
function renderTokenChart(timeline, modelNames) {
|
|
4689
|
+
if (typeof echarts === 'undefined') {
|
|
4690
|
+
document.getElementById('chart-fallback').style.display = 'block';
|
|
4691
|
+
document.getElementById('token-chart').style.display = 'none';
|
|
4692
|
+
return;
|
|
4693
|
+
}
|
|
4694
|
+
|
|
4695
|
+
const chartDom = document.getElementById('token-chart');
|
|
4696
|
+
const isDark = window.matchMedia('(prefers-color-scheme: dark)').matches;
|
|
4697
|
+
|
|
4698
|
+
if (tokenChart) {
|
|
4699
|
+
tokenChart.dispose();
|
|
4700
|
+
}
|
|
4701
|
+
tokenChart = echarts.init(chartDom, isDark ? 'dark' : null);
|
|
4702
|
+
|
|
4703
|
+
// Group timeline by model and compute cumulative totals
|
|
4704
|
+
const seriesData = {};
|
|
4705
|
+
for (const name of modelNames) {
|
|
4706
|
+
seriesData[name] = [];
|
|
4707
|
+
}
|
|
4708
|
+
|
|
4709
|
+
// Build cumulative data per model (timeline is already sorted by backend)
|
|
4710
|
+
const cumulative = {};
|
|
4711
|
+
for (const name of modelNames) {
|
|
4712
|
+
cumulative[name] = 0;
|
|
4713
|
+
}
|
|
4714
|
+
|
|
4715
|
+
for (const point of timeline) {
|
|
4716
|
+
const total = point.inputTokens + point.outputTokens;
|
|
4717
|
+
cumulative[point.model] += total;
|
|
4718
|
+
seriesData[point.model].push([point.timestamp, cumulative[point.model]]);
|
|
4719
|
+
}
|
|
4720
|
+
|
|
4721
|
+
const colors = ['#58a6ff', '#3fb950', '#f85149', '#d29922', '#a371f7', '#39c5cf', '#f778ba', '#79c0ff', '#7ee787', '#ffa657'];
|
|
4722
|
+
|
|
4723
|
+
const series = modelNames.map((name, i) => ({
|
|
4724
|
+
name: name,
|
|
4725
|
+
type: 'line',
|
|
4726
|
+
data: seriesData[name],
|
|
4727
|
+
smooth: true,
|
|
4728
|
+
symbol: 'circle',
|
|
4729
|
+
symbolSize: 4,
|
|
4730
|
+
lineStyle: { width: 2 },
|
|
4731
|
+
itemStyle: { color: colors[i % colors.length] },
|
|
4732
|
+
areaStyle: { opacity: 0.05 },
|
|
4733
|
+
}));
|
|
4734
|
+
|
|
4735
|
+
const style = getComputedStyle(document.documentElement);
|
|
4736
|
+
const textColor = style.getPropertyValue('--text').trim();
|
|
4737
|
+
const borderColor = style.getPropertyValue('--border').trim();
|
|
4738
|
+
const bgColor = style.getPropertyValue('--bg').trim();
|
|
4739
|
+
|
|
4740
|
+
const option = {
|
|
4741
|
+
backgroundColor: 'transparent',
|
|
4742
|
+
tooltip: {
|
|
4743
|
+
trigger: 'item',
|
|
4744
|
+
backgroundColor: bgColor,
|
|
4745
|
+
borderColor: borderColor,
|
|
4746
|
+
textStyle: { color: textColor, fontSize: 12 },
|
|
4747
|
+
formatter: function(params) {
|
|
4748
|
+
const d = new Date(params.data[0]);
|
|
4749
|
+
const time = d.toLocaleDateString() + ' ' + d.toLocaleTimeString();
|
|
4750
|
+
return '<b>' + params.seriesName + '</b><br/>'
|
|
4751
|
+
+ time + '<br/>'
|
|
4752
|
+
+ 'Cumulative: ' + formatNumber(params.data[1]) + ' tokens';
|
|
4753
|
+
}
|
|
4754
|
+
},
|
|
4755
|
+
legend: {
|
|
4756
|
+
data: modelNames,
|
|
4757
|
+
textStyle: { color: textColor, fontSize: 12 },
|
|
4758
|
+
top: 0,
|
|
4759
|
+
},
|
|
4760
|
+
grid: {
|
|
4761
|
+
left: 60,
|
|
4762
|
+
right: 20,
|
|
4763
|
+
top: 40,
|
|
4764
|
+
bottom: 40,
|
|
4765
|
+
},
|
|
4766
|
+
xAxis: {
|
|
4767
|
+
type: 'time',
|
|
4768
|
+
axisLine: { lineStyle: { color: borderColor } },
|
|
4769
|
+
axisLabel: { color: textColor, fontSize: 11 },
|
|
4770
|
+
splitLine: { show: false },
|
|
4771
|
+
},
|
|
4772
|
+
yAxis: {
|
|
4773
|
+
type: 'value',
|
|
4774
|
+
axisLine: { lineStyle: { color: borderColor } },
|
|
4775
|
+
axisLabel: {
|
|
4776
|
+
color: textColor,
|
|
4777
|
+
fontSize: 11,
|
|
4778
|
+
formatter: function(v) { return formatNumber(v); }
|
|
4779
|
+
},
|
|
4780
|
+
splitLine: { lineStyle: { color: borderColor, opacity: 0.3 } },
|
|
4781
|
+
},
|
|
4782
|
+
series: series,
|
|
4783
|
+
};
|
|
4784
|
+
|
|
4785
|
+
tokenChart.setOption(option);
|
|
4786
|
+
|
|
4787
|
+
// Add global listeners only once
|
|
4788
|
+
if (!tokenChartListenersAdded) {
|
|
4789
|
+
tokenChartListenersAdded = true;
|
|
4790
|
+
window.addEventListener('resize', function() {
|
|
4791
|
+
if (tokenChart) tokenChart.resize();
|
|
4792
|
+
});
|
|
4793
|
+
window.matchMedia('(prefers-color-scheme: dark)').addEventListener('change', function() {
|
|
4794
|
+
if (document.getElementById('tab-tokens').getAttribute('data-loaded') === 'true') {
|
|
4795
|
+
loadTokenStats();
|
|
4796
|
+
}
|
|
4797
|
+
});
|
|
4798
|
+
}
|
|
4799
|
+
}
|
|
3911
4800
|
`;
|
|
3912
4801
|
|
|
3913
4802
|
//#endregion
|
|
@@ -3948,10 +4837,39 @@ body {
|
|
|
3948
4837
|
color: var(--text);
|
|
3949
4838
|
line-height: 1.4;
|
|
3950
4839
|
font-size: 13px;
|
|
4840
|
+
height: 100vh;
|
|
4841
|
+
display: flex;
|
|
4842
|
+
flex-direction: column;
|
|
4843
|
+
}
|
|
4844
|
+
|
|
4845
|
+
/* Tab bar */
|
|
4846
|
+
.tab-bar {
|
|
4847
|
+
display: flex;
|
|
4848
|
+
gap: 0;
|
|
4849
|
+
border-bottom: 1px solid var(--border);
|
|
4850
|
+
background: var(--bg-secondary);
|
|
4851
|
+
padding: 0 16px;
|
|
4852
|
+
flex-shrink: 0;
|
|
4853
|
+
}
|
|
4854
|
+
.tab-item {
|
|
4855
|
+
padding: 10px 20px;
|
|
4856
|
+
cursor: pointer;
|
|
4857
|
+
font-size: 13px;
|
|
4858
|
+
font-weight: 500;
|
|
4859
|
+
color: var(--text-muted);
|
|
4860
|
+
border-bottom: 2px solid transparent;
|
|
4861
|
+
transition: all 0.15s;
|
|
4862
|
+
user-select: none;
|
|
4863
|
+
}
|
|
4864
|
+
.tab-item:hover { color: var(--text); }
|
|
4865
|
+
.tab-item.active {
|
|
4866
|
+
color: var(--primary);
|
|
4867
|
+
border-bottom-color: var(--primary);
|
|
3951
4868
|
}
|
|
4869
|
+
.tab-panel { flex: 1; overflow: hidden; }
|
|
3952
4870
|
|
|
3953
4871
|
/* Layout */
|
|
3954
|
-
.layout { display: flex; height:
|
|
4872
|
+
.layout { display: flex; height: 100%; }
|
|
3955
4873
|
.sidebar {
|
|
3956
4874
|
width: 280px;
|
|
3957
4875
|
border-right: 1px solid var(--border);
|
|
@@ -4286,11 +5204,67 @@ input::placeholder { color: var(--text-dim); }
|
|
|
4286
5204
|
white-space: pre-wrap;
|
|
4287
5205
|
word-break: break-word;
|
|
4288
5206
|
}
|
|
5207
|
+
|
|
5208
|
+
/* Tokens tab */
|
|
5209
|
+
.tokens-container {
|
|
5210
|
+
height: 100%;
|
|
5211
|
+
display: flex;
|
|
5212
|
+
flex-direction: column;
|
|
5213
|
+
overflow-y: auto;
|
|
5214
|
+
}
|
|
5215
|
+
.tokens-header {
|
|
5216
|
+
padding: 12px 16px;
|
|
5217
|
+
border-bottom: 1px solid var(--border);
|
|
5218
|
+
background: var(--bg-secondary);
|
|
5219
|
+
}
|
|
5220
|
+
.tokens-header h1 { font-size: 16px; font-weight: 600; }
|
|
5221
|
+
.tokens-table {
|
|
5222
|
+
width: 100%;
|
|
5223
|
+
border-collapse: collapse;
|
|
5224
|
+
font-size: 13px;
|
|
5225
|
+
}
|
|
5226
|
+
.tokens-table th {
|
|
5227
|
+
text-align: left;
|
|
5228
|
+
padding: 10px 16px;
|
|
5229
|
+
border-bottom: 2px solid var(--border);
|
|
5230
|
+
color: var(--text-muted);
|
|
5231
|
+
font-size: 11px;
|
|
5232
|
+
text-transform: uppercase;
|
|
5233
|
+
letter-spacing: 0.5px;
|
|
5234
|
+
font-weight: 600;
|
|
5235
|
+
}
|
|
5236
|
+
.tokens-table td {
|
|
5237
|
+
padding: 10px 16px;
|
|
5238
|
+
border-bottom: 1px solid var(--border);
|
|
5239
|
+
}
|
|
5240
|
+
.tokens-table tr:hover td { background: var(--bg-secondary); }
|
|
5241
|
+
.tokens-table .number { text-align: right; font-family: 'SF Mono', Monaco, 'Courier New', monospace; }
|
|
5242
|
+
.tokens-table tfoot td {
|
|
5243
|
+
font-weight: 600;
|
|
5244
|
+
border-top: 2px solid var(--border);
|
|
5245
|
+
}
|
|
5246
|
+
.chart-section { flex: 1; min-height: 0; display: flex; flex-direction: column; padding: 16px; }
|
|
5247
|
+
.chart-title { font-size: 14px; font-weight: 600; margin-bottom: 12px; }
|
|
5248
|
+
.chart-container { flex: 1; min-height: 400px; }
|
|
5249
|
+
.chart-fallback {
|
|
5250
|
+
padding: 40px 20px;
|
|
5251
|
+
text-align: center;
|
|
5252
|
+
color: var(--text-muted);
|
|
5253
|
+
background: var(--bg-secondary);
|
|
5254
|
+
border-radius: 8px;
|
|
5255
|
+
border: 1px solid var(--border);
|
|
5256
|
+
}
|
|
4289
5257
|
`;
|
|
4290
5258
|
|
|
4291
5259
|
//#endregion
|
|
4292
5260
|
//#region src/routes/history/ui/template.ts
|
|
4293
5261
|
const template = `
|
|
5262
|
+
<div class="tab-bar">
|
|
5263
|
+
<div class="tab-item active" onclick="switchTab('requests')" data-tab="requests">Requests</div>
|
|
5264
|
+
<div class="tab-item" onclick="switchTab('tokens')" data-tab="tokens">Tokens</div>
|
|
5265
|
+
</div>
|
|
5266
|
+
|
|
5267
|
+
<div id="tab-requests" class="tab-panel">
|
|
4294
5268
|
<div class="layout">
|
|
4295
5269
|
<!-- Sidebar: Sessions -->
|
|
4296
5270
|
<div class="sidebar">
|
|
@@ -4354,6 +5328,25 @@ const template = `
|
|
|
4354
5328
|
</div>
|
|
4355
5329
|
</div>
|
|
4356
5330
|
</div>
|
|
5331
|
+
</div>
|
|
5332
|
+
|
|
5333
|
+
<div id="tab-tokens" class="tab-panel" style="display:none" data-loaded="false">
|
|
5334
|
+
<div class="tokens-container">
|
|
5335
|
+
<div class="tokens-header">
|
|
5336
|
+
<h1>Token Analytics</h1>
|
|
5337
|
+
</div>
|
|
5338
|
+
<div id="tokens-table-container">
|
|
5339
|
+
<div class="loading">Loading...</div>
|
|
5340
|
+
</div>
|
|
5341
|
+
<div class="chart-section">
|
|
5342
|
+
<h2 class="chart-title">Cumulative Token Usage</h2>
|
|
5343
|
+
<div class="chart-container" id="token-chart"></div>
|
|
5344
|
+
<div class="chart-fallback" id="chart-fallback" style="display:none">
|
|
5345
|
+
ECharts library failed to load. Token chart is unavailable.
|
|
5346
|
+
</div>
|
|
5347
|
+
</div>
|
|
5348
|
+
</div>
|
|
5349
|
+
</div>
|
|
4357
5350
|
|
|
4358
5351
|
<!-- Raw JSON Modal -->
|
|
4359
5352
|
<div class="modal-overlay" id="raw-modal" onclick="closeRawModal(event)">
|
|
@@ -4382,6 +5375,7 @@ function getHistoryUI() {
|
|
|
4382
5375
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
4383
5376
|
<title>Copilot API - Request History</title>
|
|
4384
5377
|
<link rel="icon" href="data:,">
|
|
5378
|
+
<script defer src="https://cdn.jsdelivr.net/npm/echarts@5/dist/echarts.min.js"><\/script>
|
|
4385
5379
|
<style>${styles}</style>
|
|
4386
5380
|
</head>
|
|
4387
5381
|
<body>
|
|
@@ -4398,10 +5392,16 @@ historyRoutes.get("/api/entries", handleGetEntries);
|
|
|
4398
5392
|
historyRoutes.get("/api/entries/:id", handleGetEntry);
|
|
4399
5393
|
historyRoutes.delete("/api/entries", handleDeleteEntries);
|
|
4400
5394
|
historyRoutes.get("/api/stats", handleGetStats);
|
|
5395
|
+
historyRoutes.get("/api/token-stats", handleGetTokenStats);
|
|
4401
5396
|
historyRoutes.get("/api/export", handleExport);
|
|
4402
5397
|
historyRoutes.get("/api/sessions", handleGetSessions);
|
|
4403
5398
|
historyRoutes.get("/api/sessions/:id", handleGetSession);
|
|
4404
5399
|
historyRoutes.delete("/api/sessions/:id", handleDeleteSession);
|
|
5400
|
+
historyRoutes.get("/ws", (c) => {
|
|
5401
|
+
if (c.req.header("Upgrade") !== "websocket") return c.text("Expected WebSocket upgrade", 426);
|
|
5402
|
+
if (c.env?.server?.upgrade(c.req.raw)) return new Response(null, { status: 101 });
|
|
5403
|
+
return c.text("WebSocket upgrade failed", 500);
|
|
5404
|
+
});
|
|
4405
5405
|
historyRoutes.get("/", (c) => {
|
|
4406
5406
|
return c.html(getHistoryUI());
|
|
4407
5407
|
});
|
|
@@ -5853,6 +6853,7 @@ function prependMarkerToAnthropicResponse$1(response, marker) {
|
|
|
5853
6853
|
async function handleDirectAnthropicStreamingResponse(opts) {
|
|
5854
6854
|
const { stream, response, anthropicPayload, ctx } = opts;
|
|
5855
6855
|
const acc = createAnthropicStreamAccumulator();
|
|
6856
|
+
const checkRepetition = createStreamRepetitionChecker(`anthropic:${anthropicPayload.model}`);
|
|
5856
6857
|
try {
|
|
5857
6858
|
for await (const rawEvent of response) {
|
|
5858
6859
|
consola.debug("Direct Anthropic raw stream event:", JSON.stringify(rawEvent));
|
|
@@ -5866,6 +6867,7 @@ async function handleDirectAnthropicStreamingResponse(opts) {
|
|
|
5866
6867
|
continue;
|
|
5867
6868
|
}
|
|
5868
6869
|
processAnthropicEvent(event, acc);
|
|
6870
|
+
if (event.type === "content_block_delta" && event.delta.type === "text_delta") checkRepetition(event.delta.text);
|
|
5869
6871
|
await stream.writeSSE({
|
|
5870
6872
|
event: rawEvent.event || event.type,
|
|
5871
6873
|
data: rawEvent.data
|
|
@@ -6065,6 +7067,7 @@ async function handleStreamingResponse(opts) {
|
|
|
6065
7067
|
toolCalls: {}
|
|
6066
7068
|
};
|
|
6067
7069
|
const acc = createAnthropicStreamAccumulator();
|
|
7070
|
+
const checkRepetition = createStreamRepetitionChecker(`translated:${anthropicPayload.model}`);
|
|
6068
7071
|
try {
|
|
6069
7072
|
if (ctx.truncateResult?.wasCompacted) {
|
|
6070
7073
|
const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
|
|
@@ -6076,7 +7079,8 @@ async function handleStreamingResponse(opts) {
|
|
|
6076
7079
|
response,
|
|
6077
7080
|
toolNameMapping,
|
|
6078
7081
|
streamState,
|
|
6079
|
-
acc
|
|
7082
|
+
acc,
|
|
7083
|
+
checkRepetition
|
|
6080
7084
|
});
|
|
6081
7085
|
recordStreamingResponse(acc, anthropicPayload.model, ctx);
|
|
6082
7086
|
completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
|
|
@@ -6132,7 +7136,7 @@ async function sendTruncationMarkerEvent(stream, streamState, marker) {
|
|
|
6132
7136
|
streamState.contentBlockIndex++;
|
|
6133
7137
|
}
|
|
6134
7138
|
async function processStreamChunks(opts) {
|
|
6135
|
-
const { stream, response, toolNameMapping, streamState, acc } = opts;
|
|
7139
|
+
const { stream, response, toolNameMapping, streamState, acc, checkRepetition } = opts;
|
|
6136
7140
|
for await (const rawEvent of response) {
|
|
6137
7141
|
consola.debug("Copilot raw stream event:", JSON.stringify(rawEvent));
|
|
6138
7142
|
if (rawEvent.data === "[DONE]") break;
|
|
@@ -6149,6 +7153,7 @@ async function processStreamChunks(opts) {
|
|
|
6149
7153
|
for (const event of events) {
|
|
6150
7154
|
consola.debug("Translated Anthropic event:", JSON.stringify(event));
|
|
6151
7155
|
processAnthropicEvent(event, acc);
|
|
7156
|
+
if (event.type === "content_block_delta" && event.delta.type === "text_delta") checkRepetition(event.delta.text);
|
|
6152
7157
|
await stream.writeSSE({
|
|
6153
7158
|
event: event.type,
|
|
6154
7159
|
data: JSON.stringify(event)
|
|
@@ -6632,6 +7637,7 @@ async function runServer(options) {
|
|
|
6632
7637
|
consola.info(`History recording enabled (${limitText} entries)`);
|
|
6633
7638
|
}
|
|
6634
7639
|
initTui({ enabled: true });
|
|
7640
|
+
initRequestContextManager(state.staleRequestMaxAge).startReaper();
|
|
6635
7641
|
await ensurePaths();
|
|
6636
7642
|
await cacheVSCodeVersion();
|
|
6637
7643
|
if (options.githubToken) {
|
|
@@ -6671,11 +7677,12 @@ async function runServer(options) {
|
|
|
6671
7677
|
}
|
|
6672
7678
|
}
|
|
6673
7679
|
consola.box(`š Usage Viewer: https://ericc-ch.github.io/copilot-api?endpoint=${serverUrl}/usage${options.history ? `\nš History UI: ${serverUrl}/history` : ""}`);
|
|
6674
|
-
|
|
7680
|
+
setupShutdownHandlers();
|
|
7681
|
+
setServerInstance(serve({
|
|
6675
7682
|
fetch: server.fetch,
|
|
6676
7683
|
port: options.port,
|
|
6677
7684
|
hostname: options.host
|
|
6678
|
-
});
|
|
7685
|
+
}));
|
|
6679
7686
|
}
|
|
6680
7687
|
const start = defineCommand({
|
|
6681
7688
|
meta: {
|