@dianshuv/copilot-api 0.3.0 ā 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -0
- package/dist/main.mjs +910 -47
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -10,6 +10,11 @@
|
|
|
10
10
|
|
|
11
11
|
- **Responses API endpoint**: `/v1/responses` passthrough for codex models (e.g., `gpt-5.2-codex`, `gpt-5.3-codex`) used by tools like OpenCode. Includes stream ID synchronization for `@ai-sdk/openai` compatibility.
|
|
12
12
|
- **SubagentStart marker support**: Detects `__SUBAGENT_MARKER__` injected by Claude Code hooks to override `X-Initiator` header to `"agent"` for subagent requests, ensuring correct credit tier usage. Includes a ready-to-use Claude plugin (`claude-plugin/`).
|
|
13
|
+
- **Token analytics tab**: The `/history` page includes a Tokens tab with per-model token usage summary table and cumulative ECharts line chart for visualizing API consumption over time.
|
|
14
|
+
- **Real-time history updates**: The `/history` UI uses WebSocket for live updates instead of polling, with automatic fallback to polling and exponential backoff reconnection.
|
|
15
|
+
- **Graceful shutdown**: 4-phase shutdown sequence ā stops accepting requests, waits for in-flight requests to complete, sends abort signal, then force-closes. Configurable via `--shutdown-graceful-wait` and `--shutdown-abort-wait`.
|
|
16
|
+
- **Stream repetition detection**: Detects when models get stuck in repetitive output loops using KMP-based pattern matching and logs a warning.
|
|
17
|
+
- **Stale request reaping**: Automatically force-fails requests that exceed a configurable maximum age (default 600s) to prevent resource leaks.
|
|
13
18
|
|
|
14
19
|
## Quick Start
|
|
15
20
|
|
package/dist/main.mjs
CHANGED
|
@@ -50,7 +50,10 @@ const state = {
|
|
|
50
50
|
autoTruncate: true,
|
|
51
51
|
compressToolResults: false,
|
|
52
52
|
redirectAnthropic: false,
|
|
53
|
-
rewriteAnthropicTools: true
|
|
53
|
+
rewriteAnthropicTools: true,
|
|
54
|
+
staleRequestMaxAge: 600,
|
|
55
|
+
shutdownGracefulWait: 60,
|
|
56
|
+
shutdownAbortWait: 120
|
|
54
57
|
};
|
|
55
58
|
|
|
56
59
|
//#endregion
|
|
@@ -1017,11 +1020,11 @@ const patchClaude = defineCommand({
|
|
|
1017
1020
|
|
|
1018
1021
|
//#endregion
|
|
1019
1022
|
//#region package.json
|
|
1020
|
-
var version = "0.
|
|
1023
|
+
var version = "0.4.1";
|
|
1021
1024
|
|
|
1022
1025
|
//#endregion
|
|
1023
1026
|
//#region src/lib/adaptive-rate-limiter.ts
|
|
1024
|
-
const DEFAULT_CONFIG = {
|
|
1027
|
+
const DEFAULT_CONFIG$1 = {
|
|
1025
1028
|
baseRetryIntervalSeconds: 10,
|
|
1026
1029
|
maxRetryIntervalSeconds: 120,
|
|
1027
1030
|
requestIntervalSeconds: 10,
|
|
@@ -1050,7 +1053,7 @@ var AdaptiveRateLimiter = class {
|
|
|
1050
1053
|
recoveryStepIndex = 0;
|
|
1051
1054
|
constructor(config = {}) {
|
|
1052
1055
|
this.config = {
|
|
1053
|
-
...DEFAULT_CONFIG,
|
|
1056
|
+
...DEFAULT_CONFIG$1,
|
|
1054
1057
|
...config
|
|
1055
1058
|
};
|
|
1056
1059
|
}
|
|
@@ -1272,6 +1275,16 @@ var AdaptiveRateLimiter = class {
|
|
|
1272
1275
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
1273
1276
|
}
|
|
1274
1277
|
/**
|
|
1278
|
+
* Reject all currently queued requests during shutdown.
|
|
1279
|
+
* Returns the number of requests that were rejected.
|
|
1280
|
+
*/
|
|
1281
|
+
rejectQueued() {
|
|
1282
|
+
const count = this.queue.length;
|
|
1283
|
+
for (const request of this.queue) request.reject(/* @__PURE__ */ new Error("Server is shutting down"));
|
|
1284
|
+
this.queue = [];
|
|
1285
|
+
return count;
|
|
1286
|
+
}
|
|
1287
|
+
/**
|
|
1275
1288
|
* Get current status for debugging/monitoring
|
|
1276
1289
|
*/
|
|
1277
1290
|
getStatus() {
|
|
@@ -1289,15 +1302,21 @@ let rateLimiterInstance = null;
|
|
|
1289
1302
|
*/
|
|
1290
1303
|
function initAdaptiveRateLimiter(config = {}) {
|
|
1291
1304
|
rateLimiterInstance = new AdaptiveRateLimiter(config);
|
|
1292
|
-
const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG.baseRetryIntervalSeconds;
|
|
1293
|
-
const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG.maxRetryIntervalSeconds;
|
|
1294
|
-
const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG.requestIntervalSeconds;
|
|
1295
|
-
const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG.recoveryTimeoutMinutes;
|
|
1296
|
-
const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG.consecutiveSuccessesForRecovery;
|
|
1297
|
-
const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG.gradualRecoverySteps;
|
|
1305
|
+
const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG$1.baseRetryIntervalSeconds;
|
|
1306
|
+
const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG$1.maxRetryIntervalSeconds;
|
|
1307
|
+
const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG$1.requestIntervalSeconds;
|
|
1308
|
+
const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG$1.recoveryTimeoutMinutes;
|
|
1309
|
+
const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG$1.consecutiveSuccessesForRecovery;
|
|
1310
|
+
const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG$1.gradualRecoverySteps;
|
|
1298
1311
|
consola.info(`[RateLimiter] Initialized (backoff: ${baseRetry}s-${maxRetry}s, interval: ${interval}s, recovery: ${recovery}min or ${successes} successes, gradual: [${steps.join("s, ")}s])`);
|
|
1299
1312
|
}
|
|
1300
1313
|
/**
|
|
1314
|
+
* Get the rate limiter instance
|
|
1315
|
+
*/
|
|
1316
|
+
function getAdaptiveRateLimiter() {
|
|
1317
|
+
return rateLimiterInstance;
|
|
1318
|
+
}
|
|
1319
|
+
/**
|
|
1301
1320
|
* Execute a request with adaptive rate limiting.
|
|
1302
1321
|
* If rate limiter is not initialized, executes immediately.
|
|
1303
1322
|
* Returns the result along with queue wait time.
|
|
@@ -1310,6 +1329,292 @@ async function executeWithAdaptiveRateLimit(fn) {
|
|
|
1310
1329
|
return rateLimiterInstance.execute(fn);
|
|
1311
1330
|
}
|
|
1312
1331
|
|
|
1332
|
+
//#endregion
|
|
1333
|
+
//#region src/lib/context/request.ts
|
|
1334
|
+
let idCounter = 0;
|
|
1335
|
+
function createRequestContext(opts) {
|
|
1336
|
+
const id = `req_${Date.now()}_${++idCounter}`;
|
|
1337
|
+
const startTime = Date.now();
|
|
1338
|
+
const onEvent = opts.onEvent;
|
|
1339
|
+
let _state = "pending";
|
|
1340
|
+
let _originalRequest = null;
|
|
1341
|
+
let _response = null;
|
|
1342
|
+
let settled = false;
|
|
1343
|
+
function emit(event) {
|
|
1344
|
+
try {
|
|
1345
|
+
onEvent(event);
|
|
1346
|
+
} catch {}
|
|
1347
|
+
}
|
|
1348
|
+
const ctx = {
|
|
1349
|
+
id,
|
|
1350
|
+
tuiLogId: opts.tuiLogId,
|
|
1351
|
+
startTime,
|
|
1352
|
+
endpoint: opts.endpoint,
|
|
1353
|
+
get state() {
|
|
1354
|
+
return _state;
|
|
1355
|
+
},
|
|
1356
|
+
get durationMs() {
|
|
1357
|
+
return Date.now() - startTime;
|
|
1358
|
+
},
|
|
1359
|
+
get settled() {
|
|
1360
|
+
return settled;
|
|
1361
|
+
},
|
|
1362
|
+
get originalRequest() {
|
|
1363
|
+
return _originalRequest;
|
|
1364
|
+
},
|
|
1365
|
+
get response() {
|
|
1366
|
+
return _response;
|
|
1367
|
+
},
|
|
1368
|
+
setOriginalRequest(req) {
|
|
1369
|
+
_originalRequest = req;
|
|
1370
|
+
emit({
|
|
1371
|
+
type: "updated",
|
|
1372
|
+
context: ctx,
|
|
1373
|
+
field: "originalRequest"
|
|
1374
|
+
});
|
|
1375
|
+
},
|
|
1376
|
+
transition(newState) {
|
|
1377
|
+
const previousState = _state;
|
|
1378
|
+
_state = newState;
|
|
1379
|
+
emit({
|
|
1380
|
+
type: "state_changed",
|
|
1381
|
+
context: ctx,
|
|
1382
|
+
previousState
|
|
1383
|
+
});
|
|
1384
|
+
},
|
|
1385
|
+
complete(response) {
|
|
1386
|
+
if (settled) return;
|
|
1387
|
+
settled = true;
|
|
1388
|
+
_response = response;
|
|
1389
|
+
_state = "completed";
|
|
1390
|
+
emit({
|
|
1391
|
+
type: "completed",
|
|
1392
|
+
context: ctx,
|
|
1393
|
+
entry: ctx.toHistoryEntry()
|
|
1394
|
+
});
|
|
1395
|
+
},
|
|
1396
|
+
fail(model, error) {
|
|
1397
|
+
if (settled) return;
|
|
1398
|
+
settled = true;
|
|
1399
|
+
_response = {
|
|
1400
|
+
success: false,
|
|
1401
|
+
model,
|
|
1402
|
+
usage: {
|
|
1403
|
+
input_tokens: 0,
|
|
1404
|
+
output_tokens: 0
|
|
1405
|
+
},
|
|
1406
|
+
error: error instanceof Error ? error.message : String(error),
|
|
1407
|
+
content: null
|
|
1408
|
+
};
|
|
1409
|
+
_state = "failed";
|
|
1410
|
+
emit({
|
|
1411
|
+
type: "failed",
|
|
1412
|
+
context: ctx,
|
|
1413
|
+
entry: ctx.toHistoryEntry()
|
|
1414
|
+
});
|
|
1415
|
+
},
|
|
1416
|
+
toHistoryEntry() {
|
|
1417
|
+
const entry = {
|
|
1418
|
+
id,
|
|
1419
|
+
endpoint: opts.endpoint,
|
|
1420
|
+
timestamp: startTime,
|
|
1421
|
+
durationMs: Date.now() - startTime,
|
|
1422
|
+
request: {
|
|
1423
|
+
model: _originalRequest?.model,
|
|
1424
|
+
messages: _originalRequest?.messages,
|
|
1425
|
+
stream: _originalRequest?.stream,
|
|
1426
|
+
tools: _originalRequest?.tools,
|
|
1427
|
+
system: _originalRequest?.system
|
|
1428
|
+
}
|
|
1429
|
+
};
|
|
1430
|
+
if (_response) entry.response = _response;
|
|
1431
|
+
return entry;
|
|
1432
|
+
}
|
|
1433
|
+
};
|
|
1434
|
+
return ctx;
|
|
1435
|
+
}
|
|
1436
|
+
|
|
1437
|
+
//#endregion
|
|
1438
|
+
//#region src/lib/context/manager.ts
|
|
1439
|
+
/**
|
|
1440
|
+
* RequestContextManager ā Active request management
|
|
1441
|
+
*
|
|
1442
|
+
* Manages all in-flight RequestContext instances. Publishes events for
|
|
1443
|
+
* WebSocket push and history persistence.
|
|
1444
|
+
*/
|
|
1445
|
+
let _manager = null;
|
|
1446
|
+
function initRequestContextManager(staleMaxAgeSec) {
|
|
1447
|
+
_manager = createRequestContextManager(staleMaxAgeSec);
|
|
1448
|
+
return _manager;
|
|
1449
|
+
}
|
|
1450
|
+
const REAPER_INTERVAL_MS = 6e4;
|
|
1451
|
+
const DEFAULT_STALE_MAX_AGE_SEC = 600;
|
|
1452
|
+
function createRequestContextManager(staleMaxAgeSec) {
|
|
1453
|
+
const maxAgeSec = staleMaxAgeSec ?? DEFAULT_STALE_MAX_AGE_SEC;
|
|
1454
|
+
const activeContexts = /* @__PURE__ */ new Map();
|
|
1455
|
+
const listeners = /* @__PURE__ */ new Set();
|
|
1456
|
+
let reaperTimer = null;
|
|
1457
|
+
function runReaperOnce() {
|
|
1458
|
+
if (maxAgeSec <= 0) return;
|
|
1459
|
+
const maxAgeMs = maxAgeSec * 1e3;
|
|
1460
|
+
for (const [id, ctx] of activeContexts) if (ctx.durationMs > maxAgeMs) {
|
|
1461
|
+
consola.warn(`[context] Force-failing stale request ${id} (endpoint: ${ctx.endpoint}, model: ${ctx.originalRequest?.model ?? "unknown"}, state: ${ctx.state}, age: ${Math.round(ctx.durationMs / 1e3)}s, max: ${maxAgeSec}s)`);
|
|
1462
|
+
ctx.fail(ctx.originalRequest?.model ?? "unknown", /* @__PURE__ */ new Error(`Request exceeded maximum age of ${maxAgeSec}s (stale context reaper)`));
|
|
1463
|
+
}
|
|
1464
|
+
}
|
|
1465
|
+
function startReaper() {
|
|
1466
|
+
if (reaperTimer) return;
|
|
1467
|
+
reaperTimer = setInterval(runReaperOnce, REAPER_INTERVAL_MS);
|
|
1468
|
+
}
|
|
1469
|
+
function stopReaper() {
|
|
1470
|
+
if (reaperTimer) {
|
|
1471
|
+
clearInterval(reaperTimer);
|
|
1472
|
+
reaperTimer = null;
|
|
1473
|
+
}
|
|
1474
|
+
}
|
|
1475
|
+
function emit(event) {
|
|
1476
|
+
for (const listener of listeners) try {
|
|
1477
|
+
listener(event);
|
|
1478
|
+
} catch {}
|
|
1479
|
+
}
|
|
1480
|
+
function handleContextEvent(rawEvent) {
|
|
1481
|
+
const { type, context } = rawEvent;
|
|
1482
|
+
switch (type) {
|
|
1483
|
+
case "state_changed":
|
|
1484
|
+
if (rawEvent.previousState) emit({
|
|
1485
|
+
type: "state_changed",
|
|
1486
|
+
context,
|
|
1487
|
+
previousState: rawEvent.previousState
|
|
1488
|
+
});
|
|
1489
|
+
break;
|
|
1490
|
+
case "updated":
|
|
1491
|
+
if (rawEvent.field) emit({
|
|
1492
|
+
type: "updated",
|
|
1493
|
+
context,
|
|
1494
|
+
field: rawEvent.field
|
|
1495
|
+
});
|
|
1496
|
+
break;
|
|
1497
|
+
case "completed":
|
|
1498
|
+
if (rawEvent.entry) emit({
|
|
1499
|
+
type: "completed",
|
|
1500
|
+
context,
|
|
1501
|
+
entry: rawEvent.entry
|
|
1502
|
+
});
|
|
1503
|
+
activeContexts.delete(context.id);
|
|
1504
|
+
break;
|
|
1505
|
+
case "failed":
|
|
1506
|
+
if (rawEvent.entry) emit({
|
|
1507
|
+
type: "failed",
|
|
1508
|
+
context,
|
|
1509
|
+
entry: rawEvent.entry
|
|
1510
|
+
});
|
|
1511
|
+
activeContexts.delete(context.id);
|
|
1512
|
+
break;
|
|
1513
|
+
default: break;
|
|
1514
|
+
}
|
|
1515
|
+
}
|
|
1516
|
+
return {
|
|
1517
|
+
create(opts) {
|
|
1518
|
+
const ctx = createRequestContext({
|
|
1519
|
+
endpoint: opts.endpoint,
|
|
1520
|
+
tuiLogId: opts.tuiLogId,
|
|
1521
|
+
onEvent: handleContextEvent
|
|
1522
|
+
});
|
|
1523
|
+
activeContexts.set(ctx.id, ctx);
|
|
1524
|
+
emit({
|
|
1525
|
+
type: "created",
|
|
1526
|
+
context: ctx
|
|
1527
|
+
});
|
|
1528
|
+
return ctx;
|
|
1529
|
+
},
|
|
1530
|
+
get(id) {
|
|
1531
|
+
return activeContexts.get(id);
|
|
1532
|
+
},
|
|
1533
|
+
getAll() {
|
|
1534
|
+
return Array.from(activeContexts.values());
|
|
1535
|
+
},
|
|
1536
|
+
get activeCount() {
|
|
1537
|
+
return activeContexts.size;
|
|
1538
|
+
},
|
|
1539
|
+
on(_event, listener) {
|
|
1540
|
+
listeners.add(listener);
|
|
1541
|
+
},
|
|
1542
|
+
off(_event, listener) {
|
|
1543
|
+
listeners.delete(listener);
|
|
1544
|
+
},
|
|
1545
|
+
startReaper,
|
|
1546
|
+
stopReaper,
|
|
1547
|
+
_runReaperOnce: runReaperOnce
|
|
1548
|
+
};
|
|
1549
|
+
}
|
|
1550
|
+
|
|
1551
|
+
//#endregion
|
|
1552
|
+
//#region src/lib/history-ws.ts
|
|
1553
|
+
/**
|
|
1554
|
+
* WebSocket support for History API.
|
|
1555
|
+
* Enables real-time updates when new requests are recorded.
|
|
1556
|
+
*/
|
|
1557
|
+
const clients = /* @__PURE__ */ new Set();
|
|
1558
|
+
function getClientCount() {
|
|
1559
|
+
return clients.size;
|
|
1560
|
+
}
|
|
1561
|
+
function closeAllClients() {
|
|
1562
|
+
for (const client of clients) try {
|
|
1563
|
+
client.close(1001, "Server shutting down");
|
|
1564
|
+
} catch {}
|
|
1565
|
+
clients.clear();
|
|
1566
|
+
}
|
|
1567
|
+
function broadcast(message) {
|
|
1568
|
+
const data = JSON.stringify(message);
|
|
1569
|
+
for (const client of clients) try {
|
|
1570
|
+
if (client.readyState === WebSocket.OPEN) client.send(data);
|
|
1571
|
+
else clients.delete(client);
|
|
1572
|
+
} catch (error) {
|
|
1573
|
+
consola.debug("WebSocket send failed, removing client:", error);
|
|
1574
|
+
clients.delete(client);
|
|
1575
|
+
}
|
|
1576
|
+
}
|
|
1577
|
+
function notifyEntryAdded(summary) {
|
|
1578
|
+
if (clients.size === 0) return;
|
|
1579
|
+
broadcast({
|
|
1580
|
+
type: "entry_added",
|
|
1581
|
+
data: summary,
|
|
1582
|
+
timestamp: Date.now()
|
|
1583
|
+
});
|
|
1584
|
+
}
|
|
1585
|
+
function notifyEntryUpdated(summary) {
|
|
1586
|
+
if (clients.size === 0) return;
|
|
1587
|
+
broadcast({
|
|
1588
|
+
type: "entry_updated",
|
|
1589
|
+
data: summary,
|
|
1590
|
+
timestamp: Date.now()
|
|
1591
|
+
});
|
|
1592
|
+
}
|
|
1593
|
+
function notifyStatsUpdated(stats) {
|
|
1594
|
+
if (clients.size === 0) return;
|
|
1595
|
+
broadcast({
|
|
1596
|
+
type: "stats_updated",
|
|
1597
|
+
data: stats,
|
|
1598
|
+
timestamp: Date.now()
|
|
1599
|
+
});
|
|
1600
|
+
}
|
|
1601
|
+
function notifyHistoryCleared() {
|
|
1602
|
+
if (clients.size === 0) return;
|
|
1603
|
+
broadcast({
|
|
1604
|
+
type: "history_cleared",
|
|
1605
|
+
data: null,
|
|
1606
|
+
timestamp: Date.now()
|
|
1607
|
+
});
|
|
1608
|
+
}
|
|
1609
|
+
function notifySessionDeleted(sessionId) {
|
|
1610
|
+
if (clients.size === 0) return;
|
|
1611
|
+
broadcast({
|
|
1612
|
+
type: "session_deleted",
|
|
1613
|
+
data: { sessionId },
|
|
1614
|
+
timestamp: Date.now()
|
|
1615
|
+
});
|
|
1616
|
+
}
|
|
1617
|
+
|
|
1313
1618
|
//#endregion
|
|
1314
1619
|
//#region src/lib/history.ts
|
|
1315
1620
|
function generateId$1() {
|
|
@@ -1389,6 +1694,13 @@ function recordRequest(endpoint, request) {
|
|
|
1389
1694
|
if (historyState.entries.filter((e) => e.sessionId === removed.sessionId).length === 0) historyState.sessions.delete(removed.sessionId);
|
|
1390
1695
|
}
|
|
1391
1696
|
}
|
|
1697
|
+
notifyEntryAdded({
|
|
1698
|
+
id: entry.id,
|
|
1699
|
+
endpoint,
|
|
1700
|
+
model: request.model,
|
|
1701
|
+
stream: request.stream,
|
|
1702
|
+
timestamp: entry.timestamp
|
|
1703
|
+
});
|
|
1392
1704
|
return entry.id;
|
|
1393
1705
|
}
|
|
1394
1706
|
function recordResponse(id, response, durationMs) {
|
|
@@ -1403,6 +1715,20 @@ function recordResponse(id, response, durationMs) {
|
|
|
1403
1715
|
session.totalOutputTokens += response.usage.output_tokens;
|
|
1404
1716
|
session.lastActivity = Date.now();
|
|
1405
1717
|
}
|
|
1718
|
+
notifyEntryUpdated({
|
|
1719
|
+
id: entry.id,
|
|
1720
|
+
endpoint: entry.endpoint,
|
|
1721
|
+
model: response.model,
|
|
1722
|
+
success: response.success,
|
|
1723
|
+
durationMs,
|
|
1724
|
+
inputTokens: response.usage.input_tokens,
|
|
1725
|
+
outputTokens: response.usage.output_tokens
|
|
1726
|
+
});
|
|
1727
|
+
notifyStatsUpdated({
|
|
1728
|
+
totalRequests: historyState.entries.length,
|
|
1729
|
+
totalInputTokens: session?.totalInputTokens ?? 0,
|
|
1730
|
+
totalOutputTokens: session?.totalOutputTokens ?? 0
|
|
1731
|
+
});
|
|
1406
1732
|
}
|
|
1407
1733
|
}
|
|
1408
1734
|
function getHistory(options = {}) {
|
|
@@ -1477,12 +1803,14 @@ function clearHistory() {
|
|
|
1477
1803
|
historyState.entries = [];
|
|
1478
1804
|
historyState.sessions = /* @__PURE__ */ new Map();
|
|
1479
1805
|
historyState.currentSessionId = generateId$1();
|
|
1806
|
+
notifyHistoryCleared();
|
|
1480
1807
|
}
|
|
1481
1808
|
function deleteSession(sessionId) {
|
|
1482
1809
|
if (!historyState.sessions.has(sessionId)) return false;
|
|
1483
1810
|
historyState.entries = historyState.entries.filter((e) => e.sessionId !== sessionId);
|
|
1484
1811
|
historyState.sessions.delete(sessionId);
|
|
1485
1812
|
if (historyState.currentSessionId === sessionId) historyState.currentSessionId = generateId$1();
|
|
1813
|
+
notifySessionDeleted(sessionId);
|
|
1486
1814
|
return true;
|
|
1487
1815
|
}
|
|
1488
1816
|
function getStats() {
|
|
@@ -1731,6 +2059,142 @@ function generateEnvScript(envVars, commandToRun = "") {
|
|
|
1731
2059
|
return commandBlock || commandToRun;
|
|
1732
2060
|
}
|
|
1733
2061
|
|
|
2062
|
+
//#endregion
|
|
2063
|
+
//#region src/lib/shutdown.ts
|
|
2064
|
+
const DRAIN_POLL_INTERVAL_MS = 500;
|
|
2065
|
+
const DRAIN_PROGRESS_INTERVAL_MS = 5e3;
|
|
2066
|
+
let serverInstance = null;
|
|
2067
|
+
let _isShuttingDown = false;
|
|
2068
|
+
let shutdownResolve = null;
|
|
2069
|
+
let shutdownAbortController = null;
|
|
2070
|
+
function getIsShuttingDown() {
|
|
2071
|
+
return _isShuttingDown;
|
|
2072
|
+
}
|
|
2073
|
+
function setServerInstance(server) {
|
|
2074
|
+
serverInstance = server;
|
|
2075
|
+
}
|
|
2076
|
+
function formatActiveRequestsSummary(requests) {
|
|
2077
|
+
const now = Date.now();
|
|
2078
|
+
const lines = requests.map((req) => {
|
|
2079
|
+
const age = Math.round((now - req.startTime) / 1e3);
|
|
2080
|
+
const model = req.model || "unknown";
|
|
2081
|
+
const tags = req.tags?.length ? ` [${req.tags.join(", ")}]` : "";
|
|
2082
|
+
return ` ${req.method} ${req.path} ${model} (${req.status}, ${age}s)${tags}`;
|
|
2083
|
+
});
|
|
2084
|
+
return `Waiting for ${requests.length} active request(s):\n${lines.join("\n")}`;
|
|
2085
|
+
}
|
|
2086
|
+
async function drainActiveRequests(timeoutMs, tracker, opts) {
|
|
2087
|
+
const pollInterval = opts?.pollIntervalMs ?? DRAIN_POLL_INTERVAL_MS;
|
|
2088
|
+
const progressInterval = opts?.progressIntervalMs ?? DRAIN_PROGRESS_INTERVAL_MS;
|
|
2089
|
+
const deadline = Date.now() + timeoutMs;
|
|
2090
|
+
let lastProgressLog = 0;
|
|
2091
|
+
while (Date.now() < deadline) {
|
|
2092
|
+
const active = tracker.getActiveRequests();
|
|
2093
|
+
if (active.length === 0) return "drained";
|
|
2094
|
+
const now = Date.now();
|
|
2095
|
+
if (now - lastProgressLog >= progressInterval) {
|
|
2096
|
+
lastProgressLog = now;
|
|
2097
|
+
consola.info(formatActiveRequestsSummary(active));
|
|
2098
|
+
}
|
|
2099
|
+
await new Promise((resolve) => setTimeout(resolve, pollInterval));
|
|
2100
|
+
}
|
|
2101
|
+
return "timeout";
|
|
2102
|
+
}
|
|
2103
|
+
async function gracefulShutdown(signal, deps) {
|
|
2104
|
+
const tracker = deps?.tracker;
|
|
2105
|
+
const server = deps?.server ?? serverInstance;
|
|
2106
|
+
const rateLimiter = deps?.rateLimiter !== void 0 ? deps.rateLimiter : getAdaptiveRateLimiter();
|
|
2107
|
+
const stopRefresh = deps?.stopTokenRefreshFn ?? (() => {});
|
|
2108
|
+
const closeWsClients = deps?.closeAllClientsFn ?? closeAllClients;
|
|
2109
|
+
const getWsCount = deps?.getClientCountFn ?? getClientCount;
|
|
2110
|
+
const gracefulWaitMs = deps?.gracefulWaitMs ?? state.shutdownGracefulWait * 1e3;
|
|
2111
|
+
const abortWaitMs = deps?.abortWaitMs ?? state.shutdownAbortWait * 1e3;
|
|
2112
|
+
const drainOpts = {
|
|
2113
|
+
pollIntervalMs: deps?.drainPollIntervalMs ?? DRAIN_POLL_INTERVAL_MS,
|
|
2114
|
+
progressIntervalMs: deps?.drainProgressIntervalMs ?? DRAIN_PROGRESS_INTERVAL_MS
|
|
2115
|
+
};
|
|
2116
|
+
_isShuttingDown = true;
|
|
2117
|
+
shutdownAbortController = new AbortController();
|
|
2118
|
+
consola.info(`Received ${signal}, shutting down gracefully...`);
|
|
2119
|
+
try {
|
|
2120
|
+
deps?.contextManager?.stopReaper();
|
|
2121
|
+
} catch {}
|
|
2122
|
+
stopRefresh();
|
|
2123
|
+
const wsClients = getWsCount();
|
|
2124
|
+
if (wsClients > 0) {
|
|
2125
|
+
closeWsClients();
|
|
2126
|
+
consola.info(`Disconnected ${wsClients} WebSocket client(s)`);
|
|
2127
|
+
}
|
|
2128
|
+
if (rateLimiter) {
|
|
2129
|
+
const rejected = rateLimiter.rejectQueued();
|
|
2130
|
+
if (rejected > 0) consola.info(`Rejected ${rejected} queued request(s) from rate limiter`);
|
|
2131
|
+
}
|
|
2132
|
+
if (server) {
|
|
2133
|
+
server.close(false).catch((error) => {
|
|
2134
|
+
consola.error("Error stopping listener:", error);
|
|
2135
|
+
});
|
|
2136
|
+
consola.info("Stopped accepting new connections");
|
|
2137
|
+
}
|
|
2138
|
+
if (tracker) {
|
|
2139
|
+
const activeCount = tracker.getActiveRequests().length;
|
|
2140
|
+
if (activeCount > 0) {
|
|
2141
|
+
consola.info(`Phase 2: Waiting up to ${gracefulWaitMs / 1e3}s for ${activeCount} active request(s)...`);
|
|
2142
|
+
try {
|
|
2143
|
+
if (await drainActiveRequests(gracefulWaitMs, tracker, drainOpts) === "drained") {
|
|
2144
|
+
consola.info("All requests completed naturally");
|
|
2145
|
+
finalize(tracker);
|
|
2146
|
+
return;
|
|
2147
|
+
}
|
|
2148
|
+
} catch (error) {
|
|
2149
|
+
consola.error("Error during Phase 2 drain:", error);
|
|
2150
|
+
}
|
|
2151
|
+
const remaining = tracker.getActiveRequests().length;
|
|
2152
|
+
consola.info(`Phase 3: Sending abort signal to ${remaining} remaining request(s), waiting up to ${abortWaitMs / 1e3}s...`);
|
|
2153
|
+
shutdownAbortController.abort();
|
|
2154
|
+
try {
|
|
2155
|
+
if (await drainActiveRequests(abortWaitMs, tracker, drainOpts) === "drained") {
|
|
2156
|
+
consola.info("All requests completed after abort signal");
|
|
2157
|
+
finalize(tracker);
|
|
2158
|
+
return;
|
|
2159
|
+
}
|
|
2160
|
+
} catch (error) {
|
|
2161
|
+
consola.error("Error during Phase 3 drain:", error);
|
|
2162
|
+
}
|
|
2163
|
+
const forceRemaining = tracker.getActiveRequests().length;
|
|
2164
|
+
consola.warn(`Phase 4: Force-closing ${forceRemaining} remaining request(s)`);
|
|
2165
|
+
if (server) try {
|
|
2166
|
+
await server.close(true);
|
|
2167
|
+
} catch (error) {
|
|
2168
|
+
consola.error("Error force-closing server:", error);
|
|
2169
|
+
}
|
|
2170
|
+
}
|
|
2171
|
+
finalize(tracker);
|
|
2172
|
+
} else {
|
|
2173
|
+
consola.info("Shutdown complete");
|
|
2174
|
+
shutdownResolve?.();
|
|
2175
|
+
}
|
|
2176
|
+
}
|
|
2177
|
+
function finalize(tracker) {
|
|
2178
|
+
tracker.destroy();
|
|
2179
|
+
consola.info("Shutdown complete");
|
|
2180
|
+
shutdownResolve?.();
|
|
2181
|
+
}
|
|
2182
|
+
function setupShutdownHandlers() {
|
|
2183
|
+
const handler = (signal) => {
|
|
2184
|
+
if (_isShuttingDown) {
|
|
2185
|
+
consola.warn("Second signal received, forcing immediate exit");
|
|
2186
|
+
process.exit(1);
|
|
2187
|
+
}
|
|
2188
|
+
gracefulShutdown(signal).catch((error) => {
|
|
2189
|
+
consola.error("Fatal error during shutdown:", error);
|
|
2190
|
+
shutdownResolve?.();
|
|
2191
|
+
process.exit(1);
|
|
2192
|
+
});
|
|
2193
|
+
};
|
|
2194
|
+
process.on("SIGINT", () => handler("SIGINT"));
|
|
2195
|
+
process.on("SIGTERM", () => handler("SIGTERM"));
|
|
2196
|
+
}
|
|
2197
|
+
|
|
1734
2198
|
//#endregion
|
|
1735
2199
|
//#region src/lib/tui/console-renderer.ts
|
|
1736
2200
|
const CLEAR_LINE = "\x1B[2K\r";
|
|
@@ -2091,6 +2555,7 @@ const requestTracker = new RequestTracker();
|
|
|
2091
2555
|
*/
|
|
2092
2556
|
function tuiLogger() {
|
|
2093
2557
|
return async (c, next) => {
|
|
2558
|
+
if (getIsShuttingDown()) return c.json({ error: "Server is shutting down" }, 503);
|
|
2094
2559
|
const method = c.req.method;
|
|
2095
2560
|
const path = c.req.path;
|
|
2096
2561
|
const isHistoryAccess = path.startsWith("/history");
|
|
@@ -2814,6 +3279,127 @@ function createTruncationResponseMarkerOpenAI(result) {
|
|
|
2814
3279
|
return `\n\n---\n[Auto-truncated: ${result.removedMessageCount} messages removed, ${result.originalTokens} ā ${result.compactedTokens} tokens (${percentage}% reduction)]`;
|
|
2815
3280
|
}
|
|
2816
3281
|
|
|
3282
|
+
//#endregion
|
|
3283
|
+
//#region src/lib/repetition-detector.ts
|
|
3284
|
+
/**
|
|
3285
|
+
* Stream repetition detector.
|
|
3286
|
+
*
|
|
3287
|
+
* Uses the KMP failure function (prefix function) to detect repeated patterns
|
|
3288
|
+
* in streaming text output. When a model gets stuck in a repetitive loop,
|
|
3289
|
+
* it wastes tokens producing the same content over and over. This detector
|
|
3290
|
+
* identifies such loops early so the caller can take action (log warning,
|
|
3291
|
+
* abort stream, etc.).
|
|
3292
|
+
*
|
|
3293
|
+
* The algorithm works by maintaining a sliding buffer of recent text and
|
|
3294
|
+
* computing the longest proper prefix that is also a suffix ā if this
|
|
3295
|
+
* length exceeds `(text.length - period) >= minRepetitions * period`,
|
|
3296
|
+
* it means a pattern of length `period` has repeated enough times.
|
|
3297
|
+
*/
|
|
3298
|
+
const DEFAULT_CONFIG = {
|
|
3299
|
+
minPatternLength: 10,
|
|
3300
|
+
minRepetitions: 3,
|
|
3301
|
+
maxBufferSize: 5e3
|
|
3302
|
+
};
|
|
3303
|
+
var RepetitionDetector = class {
|
|
3304
|
+
buffer = "";
|
|
3305
|
+
config;
|
|
3306
|
+
detected = false;
|
|
3307
|
+
constructor(config) {
|
|
3308
|
+
this.config = {
|
|
3309
|
+
...DEFAULT_CONFIG,
|
|
3310
|
+
...config
|
|
3311
|
+
};
|
|
3312
|
+
}
|
|
3313
|
+
/**
|
|
3314
|
+
* Feed a text chunk into the detector.
|
|
3315
|
+
* Returns `true` if repetition has been detected (now or previously).
|
|
3316
|
+
* Once detected, subsequent calls return `true` without further analysis.
|
|
3317
|
+
*/
|
|
3318
|
+
feed(text) {
|
|
3319
|
+
if (this.detected) return true;
|
|
3320
|
+
if (!text) return false;
|
|
3321
|
+
this.buffer += text;
|
|
3322
|
+
if (this.buffer.length > this.config.maxBufferSize) this.buffer = this.buffer.slice(-this.config.maxBufferSize);
|
|
3323
|
+
const minRequired = this.config.minPatternLength * this.config.minRepetitions;
|
|
3324
|
+
if (this.buffer.length < minRequired) return false;
|
|
3325
|
+
this.detected = detectRepetition(this.buffer, this.config.minPatternLength, this.config.minRepetitions);
|
|
3326
|
+
return this.detected;
|
|
3327
|
+
}
|
|
3328
|
+
/** Reset detector state for a new stream */
|
|
3329
|
+
reset() {
|
|
3330
|
+
this.buffer = "";
|
|
3331
|
+
this.detected = false;
|
|
3332
|
+
}
|
|
3333
|
+
/** Whether repetition has been detected */
|
|
3334
|
+
get isDetected() {
|
|
3335
|
+
return this.detected;
|
|
3336
|
+
}
|
|
3337
|
+
};
|
|
3338
|
+
/**
|
|
3339
|
+
* Detect if the tail of `text` contains a repeating pattern.
|
|
3340
|
+
*
|
|
3341
|
+
* Uses the KMP prefix function: for a string S, the prefix function Ļ[i]
|
|
3342
|
+
* gives the length of the longest proper prefix of S[0..i] that is also
|
|
3343
|
+
* a suffix. If Ļ[n-1] ā„ (n - period) where period = n - Ļ[n-1], then
|
|
3344
|
+
* the string is composed of a repeating unit of length `period`.
|
|
3345
|
+
*
|
|
3346
|
+
* We check the suffix of the buffer (last `checkLength` chars) to detect
|
|
3347
|
+
* if a pattern of at least `minPatternLength` chars repeats at least
|
|
3348
|
+
* `minRepetitions` times.
|
|
3349
|
+
*/
|
|
3350
|
+
function detectRepetition(text, minPatternLength, minRepetitions) {
|
|
3351
|
+
const minWindow = minPatternLength * minRepetitions;
|
|
3352
|
+
const maxWindow = Math.min(text.length, 2e3);
|
|
3353
|
+
const windowSizes = [
|
|
3354
|
+
minWindow,
|
|
3355
|
+
Math.floor(maxWindow * .5),
|
|
3356
|
+
maxWindow
|
|
3357
|
+
].filter((w) => w >= minWindow && w <= text.length);
|
|
3358
|
+
for (const windowSize of windowSizes) {
|
|
3359
|
+
const window = text.slice(-windowSize);
|
|
3360
|
+
const period = findRepeatingPeriod(window);
|
|
3361
|
+
if (period >= minPatternLength) {
|
|
3362
|
+
if (Math.floor(window.length / period) >= minRepetitions) return true;
|
|
3363
|
+
}
|
|
3364
|
+
}
|
|
3365
|
+
return false;
|
|
3366
|
+
}
|
|
3367
|
+
/**
|
|
3368
|
+
* Find the shortest repeating period in a string using KMP prefix function.
|
|
3369
|
+
* Returns the period length, or the string length if no repetition found.
|
|
3370
|
+
*/
|
|
3371
|
+
function findRepeatingPeriod(s) {
|
|
3372
|
+
const n = s.length;
|
|
3373
|
+
if (n === 0) return 0;
|
|
3374
|
+
const pi = new Int32Array(n);
|
|
3375
|
+
for (let i = 1; i < n; i++) {
|
|
3376
|
+
let j = pi[i - 1] ?? 0;
|
|
3377
|
+
while (j > 0 && s[i] !== s[j]) j = pi[j - 1] ?? 0;
|
|
3378
|
+
if (s[i] === s[j]) j++;
|
|
3379
|
+
pi[i] = j;
|
|
3380
|
+
}
|
|
3381
|
+
const period = n - pi[n - 1];
|
|
3382
|
+
if (period < n && n % period === 0) return period;
|
|
3383
|
+
if (period < n && pi[n - 1] >= period) return period;
|
|
3384
|
+
return n;
|
|
3385
|
+
}
|
|
3386
|
+
/**
|
|
3387
|
+
* Create a repetition detector callback for use in stream processing.
|
|
3388
|
+
* Returns a function that accepts text deltas and logs a warning on first detection.
|
|
3389
|
+
*/
|
|
3390
|
+
function createStreamRepetitionChecker(label, config) {
|
|
3391
|
+
const detector = new RepetitionDetector(config);
|
|
3392
|
+
let warned = false;
|
|
3393
|
+
return (textDelta) => {
|
|
3394
|
+
const isRepetitive = detector.feed(textDelta);
|
|
3395
|
+
if (isRepetitive && !warned) {
|
|
3396
|
+
warned = true;
|
|
3397
|
+
consola.warn(`[RepetitionDetector] ${label}: Repetitive output detected in stream`);
|
|
3398
|
+
}
|
|
3399
|
+
return isRepetitive;
|
|
3400
|
+
};
|
|
3401
|
+
}
|
|
3402
|
+
|
|
2817
3403
|
//#endregion
|
|
2818
3404
|
//#region src/services/copilot/create-chat-completions.ts
|
|
2819
3405
|
const createChatCompletions = async (payload, options) => {
|
|
@@ -3150,6 +3736,7 @@ function createStreamAccumulator() {
|
|
|
3150
3736
|
async function handleStreamingResponse$1(opts) {
|
|
3151
3737
|
const { stream, response, payload, ctx } = opts;
|
|
3152
3738
|
const acc = createStreamAccumulator();
|
|
3739
|
+
const checkRepetition = createStreamRepetitionChecker(`openai:${payload.model}`);
|
|
3153
3740
|
try {
|
|
3154
3741
|
if (state.verbose && ctx.truncateResult?.wasCompacted) {
|
|
3155
3742
|
const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
|
|
@@ -3173,7 +3760,7 @@ async function handleStreamingResponse$1(opts) {
|
|
|
3173
3760
|
}
|
|
3174
3761
|
for await (const chunk of response) {
|
|
3175
3762
|
consola.debug("Streaming chunk:", JSON.stringify(chunk));
|
|
3176
|
-
parseStreamChunk(chunk, acc);
|
|
3763
|
+
parseStreamChunk(chunk, acc, checkRepetition);
|
|
3177
3764
|
await stream.writeSSE(chunk);
|
|
3178
3765
|
}
|
|
3179
3766
|
recordStreamSuccess(acc, payload.model, ctx);
|
|
@@ -3189,7 +3776,7 @@ async function handleStreamingResponse$1(opts) {
|
|
|
3189
3776
|
throw error;
|
|
3190
3777
|
}
|
|
3191
3778
|
}
|
|
3192
|
-
function parseStreamChunk(chunk, acc) {
|
|
3779
|
+
function parseStreamChunk(chunk, acc, checkRepetition) {
|
|
3193
3780
|
if (!chunk.data || chunk.data === "[DONE]") return;
|
|
3194
3781
|
try {
|
|
3195
3782
|
const parsed = JSON.parse(chunk.data);
|
|
@@ -3200,7 +3787,10 @@ function parseStreamChunk(chunk, acc) {
|
|
|
3200
3787
|
}
|
|
3201
3788
|
const choice = parsed.choices[0];
|
|
3202
3789
|
if (choice) {
|
|
3203
|
-
if (choice.delta.content)
|
|
3790
|
+
if (choice.delta.content) {
|
|
3791
|
+
acc.content += choice.delta.content;
|
|
3792
|
+
checkRepetition(choice.delta.content);
|
|
3793
|
+
}
|
|
3204
3794
|
if (choice.delta.tool_calls) for (const tc of choice.delta.tool_calls) {
|
|
3205
3795
|
const idx = tc.index;
|
|
3206
3796
|
if (!acc.toolCallMap.has(idx)) acc.toolCallMap.set(idx, {
|
|
@@ -3939,12 +4529,78 @@ document.addEventListener('keydown', (e) => {
|
|
|
3939
4529
|
}
|
|
3940
4530
|
});
|
|
3941
4531
|
|
|
3942
|
-
// Auto-refresh every 10 seconds
|
|
3943
|
-
setInterval(() => {
|
|
4532
|
+
// Auto-refresh every 10 seconds (fallback when WebSocket is not available)
|
|
4533
|
+
let autoRefreshTimer = setInterval(() => {
|
|
3944
4534
|
loadStats();
|
|
3945
4535
|
loadSessions();
|
|
3946
4536
|
}, 10000);
|
|
3947
4537
|
|
|
4538
|
+
// WebSocket real-time updates
|
|
4539
|
+
let reconnectAttempts = 0;
|
|
4540
|
+
function connectWebSocket() {
|
|
4541
|
+
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
|
|
4542
|
+
const wsUrl = protocol + '//' + window.location.host + '/history/ws';
|
|
4543
|
+
|
|
4544
|
+
try {
|
|
4545
|
+
const ws = new WebSocket(wsUrl);
|
|
4546
|
+
|
|
4547
|
+
ws.onopen = function() {
|
|
4548
|
+
console.log('[History] WebSocket connected');
|
|
4549
|
+
reconnectAttempts = 0;
|
|
4550
|
+
// Disable polling when WS is active
|
|
4551
|
+
clearInterval(autoRefreshTimer);
|
|
4552
|
+
};
|
|
4553
|
+
|
|
4554
|
+
ws.onmessage = function(event) {
|
|
4555
|
+
try {
|
|
4556
|
+
const msg = JSON.parse(event.data);
|
|
4557
|
+
switch (msg.type) {
|
|
4558
|
+
case 'entry_added':
|
|
4559
|
+
case 'entry_updated':
|
|
4560
|
+
loadSessions();
|
|
4561
|
+
loadStats();
|
|
4562
|
+
break;
|
|
4563
|
+
case 'stats_updated':
|
|
4564
|
+
loadStats();
|
|
4565
|
+
break;
|
|
4566
|
+
case 'history_cleared':
|
|
4567
|
+
case 'session_deleted':
|
|
4568
|
+
loadSessions();
|
|
4569
|
+
loadStats();
|
|
4570
|
+
break;
|
|
4571
|
+
}
|
|
4572
|
+
} catch (e) {
|
|
4573
|
+
console.warn('[History] Failed to parse WS message:', e);
|
|
4574
|
+
}
|
|
4575
|
+
};
|
|
4576
|
+
|
|
4577
|
+
ws.onclose = function() {
|
|
4578
|
+
console.log('[History] WebSocket disconnected, falling back to polling');
|
|
4579
|
+
// Re-enable polling as fallback (clear first to avoid duplicates)
|
|
4580
|
+
clearInterval(autoRefreshTimer);
|
|
4581
|
+
autoRefreshTimer = setInterval(() => {
|
|
4582
|
+
loadStats();
|
|
4583
|
+
loadSessions();
|
|
4584
|
+
}, 10000);
|
|
4585
|
+
// Reconnect with exponential backoff, max 10 attempts
|
|
4586
|
+
if (reconnectAttempts < 10) {
|
|
4587
|
+
const delay = Math.min(5000 * Math.pow(2, reconnectAttempts), 60000);
|
|
4588
|
+
reconnectAttempts++;
|
|
4589
|
+
setTimeout(connectWebSocket, delay);
|
|
4590
|
+
}
|
|
4591
|
+
};
|
|
4592
|
+
|
|
4593
|
+
ws.onerror = function() {
|
|
4594
|
+
// Will trigger onclose
|
|
4595
|
+
};
|
|
4596
|
+
} catch (e) {
|
|
4597
|
+
console.warn('[History] WebSocket not available:', e);
|
|
4598
|
+
}
|
|
4599
|
+
}
|
|
4600
|
+
|
|
4601
|
+
// Start WebSocket connection
|
|
4602
|
+
connectWebSocket();
|
|
4603
|
+
|
|
3948
4604
|
// Tab switching
|
|
3949
4605
|
function switchTab(tab) {
|
|
3950
4606
|
document.querySelectorAll('.tab-item').forEach(t => t.classList.remove('active'));
|
|
@@ -4741,6 +5397,11 @@ historyRoutes.get("/api/export", handleExport);
|
|
|
4741
5397
|
historyRoutes.get("/api/sessions", handleGetSessions);
|
|
4742
5398
|
historyRoutes.get("/api/sessions/:id", handleGetSession);
|
|
4743
5399
|
historyRoutes.delete("/api/sessions/:id", handleDeleteSession);
|
|
5400
|
+
historyRoutes.get("/ws", (c) => {
|
|
5401
|
+
if (c.req.header("Upgrade") !== "websocket") return c.text("Expected WebSocket upgrade", 426);
|
|
5402
|
+
if (c.env?.server?.upgrade(c.req.raw)) return new Response(null, { status: 101 });
|
|
5403
|
+
return c.text("WebSocket upgrade failed", 500);
|
|
5404
|
+
});
|
|
4744
5405
|
historyRoutes.get("/", (c) => {
|
|
4745
5406
|
return c.html(getHistoryUI());
|
|
4746
5407
|
});
|
|
@@ -6192,6 +6853,7 @@ function prependMarkerToAnthropicResponse$1(response, marker) {
|
|
|
6192
6853
|
async function handleDirectAnthropicStreamingResponse(opts) {
|
|
6193
6854
|
const { stream, response, anthropicPayload, ctx } = opts;
|
|
6194
6855
|
const acc = createAnthropicStreamAccumulator();
|
|
6856
|
+
const checkRepetition = createStreamRepetitionChecker(`anthropic:${anthropicPayload.model}`);
|
|
6195
6857
|
try {
|
|
6196
6858
|
for await (const rawEvent of response) {
|
|
6197
6859
|
consola.debug("Direct Anthropic raw stream event:", JSON.stringify(rawEvent));
|
|
@@ -6205,6 +6867,7 @@ async function handleDirectAnthropicStreamingResponse(opts) {
|
|
|
6205
6867
|
continue;
|
|
6206
6868
|
}
|
|
6207
6869
|
processAnthropicEvent(event, acc);
|
|
6870
|
+
if (event.type === "content_block_delta" && event.delta.type === "text_delta") checkRepetition(event.delta.text);
|
|
6208
6871
|
await stream.writeSSE({
|
|
6209
6872
|
event: rawEvent.event || event.type,
|
|
6210
6873
|
data: rawEvent.data
|
|
@@ -6404,6 +7067,7 @@ async function handleStreamingResponse(opts) {
|
|
|
6404
7067
|
toolCalls: {}
|
|
6405
7068
|
};
|
|
6406
7069
|
const acc = createAnthropicStreamAccumulator();
|
|
7070
|
+
const checkRepetition = createStreamRepetitionChecker(`translated:${anthropicPayload.model}`);
|
|
6407
7071
|
try {
|
|
6408
7072
|
if (ctx.truncateResult?.wasCompacted) {
|
|
6409
7073
|
const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
|
|
@@ -6415,7 +7079,8 @@ async function handleStreamingResponse(opts) {
|
|
|
6415
7079
|
response,
|
|
6416
7080
|
toolNameMapping,
|
|
6417
7081
|
streamState,
|
|
6418
|
-
acc
|
|
7082
|
+
acc,
|
|
7083
|
+
checkRepetition
|
|
6419
7084
|
});
|
|
6420
7085
|
recordStreamingResponse(acc, anthropicPayload.model, ctx);
|
|
6421
7086
|
completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
|
|
@@ -6471,7 +7136,7 @@ async function sendTruncationMarkerEvent(stream, streamState, marker) {
|
|
|
6471
7136
|
streamState.contentBlockIndex++;
|
|
6472
7137
|
}
|
|
6473
7138
|
async function processStreamChunks(opts) {
|
|
6474
|
-
const { stream, response, toolNameMapping, streamState, acc } = opts;
|
|
7139
|
+
const { stream, response, toolNameMapping, streamState, acc, checkRepetition } = opts;
|
|
6475
7140
|
for await (const rawEvent of response) {
|
|
6476
7141
|
consola.debug("Copilot raw stream event:", JSON.stringify(rawEvent));
|
|
6477
7142
|
if (rawEvent.data === "[DONE]") break;
|
|
@@ -6488,6 +7153,7 @@ async function processStreamChunks(opts) {
|
|
|
6488
7153
|
for (const event of events) {
|
|
6489
7154
|
consola.debug("Translated Anthropic event:", JSON.stringify(event));
|
|
6490
7155
|
processAnthropicEvent(event, acc);
|
|
7156
|
+
if (event.type === "content_block_delta" && event.delta.type === "text_delta") checkRepetition(event.delta.text);
|
|
6491
7157
|
await stream.writeSSE({
|
|
6492
7158
|
event: event.type,
|
|
6493
7159
|
data: JSON.stringify(event)
|
|
@@ -6779,50 +7445,229 @@ const containsVisionContent = (value) => {
|
|
|
6779
7445
|
if (Array.isArray(record.content)) return record.content.some((entry) => containsVisionContent(entry));
|
|
6780
7446
|
return false;
|
|
6781
7447
|
};
|
|
7448
|
+
/** Convert Responses API input to history MessageContent format */
|
|
7449
|
+
function convertResponsesInputToMessages(input) {
|
|
7450
|
+
if (!input) return [];
|
|
7451
|
+
if (typeof input === "string") return [{
|
|
7452
|
+
role: "user",
|
|
7453
|
+
content: input
|
|
7454
|
+
}];
|
|
7455
|
+
const messages = [];
|
|
7456
|
+
for (const item of input) {
|
|
7457
|
+
const record = item;
|
|
7458
|
+
switch (record.type) {
|
|
7459
|
+
case "function_call": {
|
|
7460
|
+
const fc = item;
|
|
7461
|
+
messages.push({
|
|
7462
|
+
role: "assistant",
|
|
7463
|
+
content: "",
|
|
7464
|
+
tool_calls: [{
|
|
7465
|
+
id: fc.call_id,
|
|
7466
|
+
type: "function",
|
|
7467
|
+
function: {
|
|
7468
|
+
name: fc.name,
|
|
7469
|
+
arguments: fc.arguments
|
|
7470
|
+
}
|
|
7471
|
+
}]
|
|
7472
|
+
});
|
|
7473
|
+
break;
|
|
7474
|
+
}
|
|
7475
|
+
case "function_call_output": {
|
|
7476
|
+
const fco = item;
|
|
7477
|
+
messages.push({
|
|
7478
|
+
role: "tool",
|
|
7479
|
+
content: typeof fco.output === "string" ? fco.output : JSON.stringify(fco.output),
|
|
7480
|
+
tool_call_id: fco.call_id
|
|
7481
|
+
});
|
|
7482
|
+
break;
|
|
7483
|
+
}
|
|
7484
|
+
case "reasoning": break;
|
|
7485
|
+
default: if ("role" in record) {
|
|
7486
|
+
const msg = item;
|
|
7487
|
+
messages.push({
|
|
7488
|
+
role: msg.role,
|
|
7489
|
+
content: typeof msg.content === "string" ? msg.content : JSON.stringify(msg.content)
|
|
7490
|
+
});
|
|
7491
|
+
}
|
|
7492
|
+
}
|
|
7493
|
+
}
|
|
7494
|
+
return messages;
|
|
7495
|
+
}
|
|
7496
|
+
/** Convert Responses API tools to history ToolDefinition format */
|
|
7497
|
+
function convertResponsesToolsToDefinitions(tools) {
|
|
7498
|
+
if (!tools) return [];
|
|
7499
|
+
return tools.filter((t) => t.type === "function").map((t) => {
|
|
7500
|
+
const ft = t;
|
|
7501
|
+
const def = { name: ft.name };
|
|
7502
|
+
if (ft.description) def.description = ft.description;
|
|
7503
|
+
return def;
|
|
7504
|
+
});
|
|
7505
|
+
}
|
|
7506
|
+
/** Extract response content and tool calls from ResponsesResult output in a single pass */
|
|
7507
|
+
function extractResponseData(result) {
|
|
7508
|
+
if (result.output.length === 0) return {
|
|
7509
|
+
content: null,
|
|
7510
|
+
toolCalls: void 0
|
|
7511
|
+
};
|
|
7512
|
+
let text = "";
|
|
7513
|
+
const contentToolCalls = [];
|
|
7514
|
+
const historyToolCalls = [];
|
|
7515
|
+
for (const item of result.output) if (item.type === "message" && "content" in item && item.content) {
|
|
7516
|
+
for (const block of item.content) if ("text" in block && typeof block.text === "string") text += block.text;
|
|
7517
|
+
else if ("refusal" in block && typeof block.refusal === "string") text += block.refusal;
|
|
7518
|
+
} else if (item.type === "function_call") {
|
|
7519
|
+
const fc = item;
|
|
7520
|
+
contentToolCalls.push({
|
|
7521
|
+
id: fc.call_id,
|
|
7522
|
+
type: "function",
|
|
7523
|
+
function: {
|
|
7524
|
+
name: fc.name,
|
|
7525
|
+
arguments: fc.arguments
|
|
7526
|
+
}
|
|
7527
|
+
});
|
|
7528
|
+
historyToolCalls.push({
|
|
7529
|
+
id: fc.call_id,
|
|
7530
|
+
name: fc.name,
|
|
7531
|
+
input: fc.arguments
|
|
7532
|
+
});
|
|
7533
|
+
}
|
|
7534
|
+
if (!text && contentToolCalls.length === 0) return {
|
|
7535
|
+
content: null,
|
|
7536
|
+
toolCalls: void 0
|
|
7537
|
+
};
|
|
7538
|
+
const content = {
|
|
7539
|
+
role: "assistant",
|
|
7540
|
+
content: text
|
|
7541
|
+
};
|
|
7542
|
+
if (contentToolCalls.length > 0) content.tool_calls = contentToolCalls;
|
|
7543
|
+
return {
|
|
7544
|
+
content,
|
|
7545
|
+
toolCalls: historyToolCalls.length > 0 ? historyToolCalls : void 0
|
|
7546
|
+
};
|
|
7547
|
+
}
|
|
7548
|
+
/** Map ResponsesResult.status to a stop_reason string */
|
|
7549
|
+
function extractResponseStopReason(result) {
|
|
7550
|
+
switch (result.status) {
|
|
7551
|
+
case "completed": return "stop";
|
|
7552
|
+
case "incomplete": return "length";
|
|
7553
|
+
case "failed": return "error";
|
|
7554
|
+
default: return result.status;
|
|
7555
|
+
}
|
|
7556
|
+
}
|
|
6782
7557
|
|
|
6783
7558
|
//#endregion
|
|
6784
7559
|
//#region src/routes/responses/handler.ts
|
|
6785
7560
|
const RESPONSES_ENDPOINT = "/responses";
|
|
7561
|
+
const TERMINAL_EVENTS = new Set([
|
|
7562
|
+
"response.completed",
|
|
7563
|
+
"response.incomplete",
|
|
7564
|
+
"response.failed",
|
|
7565
|
+
"error"
|
|
7566
|
+
]);
|
|
6786
7567
|
const handleResponses = async (c) => {
|
|
6787
7568
|
const payload = await c.req.json();
|
|
6788
7569
|
consola.debug("Responses request payload:", JSON.stringify(payload));
|
|
6789
7570
|
const trackingId = c.get("trackingId");
|
|
7571
|
+
const startTime = (trackingId ? requestTracker.getRequest(trackingId) : void 0)?.startTime ?? Date.now();
|
|
6790
7572
|
updateTrackerModel(trackingId, payload.model);
|
|
6791
7573
|
useFunctionApplyPatch(payload);
|
|
6792
7574
|
removeWebSearchTool(payload);
|
|
6793
|
-
|
|
6794
|
-
|
|
6795
|
-
|
|
6796
|
-
|
|
7575
|
+
const model = payload.model;
|
|
7576
|
+
const stream = payload.stream ?? false;
|
|
7577
|
+
const tools = convertResponsesToolsToDefinitions(payload.tools);
|
|
7578
|
+
const historyId = recordRequest("openai", {
|
|
7579
|
+
model,
|
|
7580
|
+
messages: convertResponsesInputToMessages(payload.input),
|
|
7581
|
+
stream,
|
|
7582
|
+
tools: tools.length > 0 ? tools : void 0,
|
|
7583
|
+
max_tokens: payload.max_output_tokens ?? void 0,
|
|
7584
|
+
temperature: payload.temperature ?? void 0,
|
|
7585
|
+
system: payload.instructions ?? void 0
|
|
7586
|
+
});
|
|
7587
|
+
const ctx = {
|
|
7588
|
+
historyId,
|
|
7589
|
+
trackingId,
|
|
7590
|
+
startTime
|
|
7591
|
+
};
|
|
7592
|
+
if (!((state.models?.data.find((m) => m.id === payload.model))?.supported_endpoints?.includes(RESPONSES_ENDPOINT) ?? false)) {
|
|
7593
|
+
recordErrorResponse(ctx, model, /* @__PURE__ */ new Error("This model does not support the responses endpoint."));
|
|
7594
|
+
return c.json({ error: {
|
|
7595
|
+
message: "This model does not support the responses endpoint. Please choose a different model.",
|
|
7596
|
+
type: "invalid_request_error"
|
|
7597
|
+
} }, 400);
|
|
7598
|
+
}
|
|
6797
7599
|
const { vision, initiator } = getResponsesRequestOptions(payload);
|
|
6798
7600
|
if (state.manualApprove) await awaitApproval();
|
|
6799
|
-
|
|
6800
|
-
|
|
6801
|
-
|
|
6802
|
-
|
|
6803
|
-
|
|
6804
|
-
|
|
6805
|
-
|
|
6806
|
-
|
|
6807
|
-
|
|
6808
|
-
|
|
6809
|
-
|
|
6810
|
-
|
|
6811
|
-
|
|
6812
|
-
|
|
6813
|
-
|
|
6814
|
-
|
|
7601
|
+
try {
|
|
7602
|
+
const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createResponses(payload, {
|
|
7603
|
+
vision,
|
|
7604
|
+
initiator
|
|
7605
|
+
}));
|
|
7606
|
+
ctx.queueWaitMs = queueWaitMs;
|
|
7607
|
+
if (isStreamingRequested(payload) && isAsyncIterable(response)) {
|
|
7608
|
+
consola.debug("Forwarding native Responses stream");
|
|
7609
|
+
updateTrackerStatus(trackingId, "streaming");
|
|
7610
|
+
return streamSSE(c, async (stream) => {
|
|
7611
|
+
const idTracker = createStreamIdTracker();
|
|
7612
|
+
let finalResult;
|
|
7613
|
+
let streamErrorMessage;
|
|
7614
|
+
try {
|
|
7615
|
+
for await (const chunk of response) {
|
|
7616
|
+
consola.debug("Responses stream chunk:", JSON.stringify(chunk));
|
|
7617
|
+
const eventType = chunk.event;
|
|
7618
|
+
const rawData = chunk.data ?? "";
|
|
7619
|
+
if (eventType && TERMINAL_EVENTS.has(eventType)) try {
|
|
7620
|
+
const parsed = JSON.parse(rawData);
|
|
7621
|
+
if ("response" in parsed) finalResult = parsed.response;
|
|
7622
|
+
else if (eventType === "error" && "message" in parsed) streamErrorMessage = parsed.message;
|
|
7623
|
+
} catch {}
|
|
7624
|
+
const processedData = fixStreamIds(rawData, eventType, idTracker);
|
|
7625
|
+
await stream.writeSSE({
|
|
7626
|
+
id: chunk.id,
|
|
7627
|
+
event: eventType,
|
|
7628
|
+
data: processedData
|
|
7629
|
+
});
|
|
7630
|
+
}
|
|
7631
|
+
if (finalResult) {
|
|
7632
|
+
recordResponseResult(finalResult, model, historyId, startTime);
|
|
7633
|
+
const usage = finalResult.usage;
|
|
7634
|
+
completeTracking(trackingId, usage?.input_tokens ?? 0, usage?.output_tokens ?? 0, queueWaitMs);
|
|
7635
|
+
} else if (streamErrorMessage) {
|
|
7636
|
+
recordResponse(historyId, {
|
|
7637
|
+
success: false,
|
|
7638
|
+
model,
|
|
7639
|
+
usage: {
|
|
7640
|
+
input_tokens: 0,
|
|
7641
|
+
output_tokens: 0
|
|
7642
|
+
},
|
|
7643
|
+
error: streamErrorMessage,
|
|
7644
|
+
content: null
|
|
7645
|
+
}, Date.now() - startTime);
|
|
7646
|
+
completeTracking(trackingId, 0, 0, queueWaitMs);
|
|
7647
|
+
} else completeTracking(trackingId, 0, 0, queueWaitMs);
|
|
7648
|
+
} catch (error) {
|
|
7649
|
+
recordStreamError({
|
|
7650
|
+
acc: { model: finalResult?.model || model },
|
|
7651
|
+
fallbackModel: model,
|
|
7652
|
+
ctx,
|
|
7653
|
+
error
|
|
6815
7654
|
});
|
|
7655
|
+
failTracking(trackingId, error);
|
|
7656
|
+
throw error;
|
|
6816
7657
|
}
|
|
6817
|
-
|
|
6818
|
-
|
|
6819
|
-
|
|
6820
|
-
|
|
6821
|
-
|
|
6822
|
-
|
|
7658
|
+
});
|
|
7659
|
+
}
|
|
7660
|
+
const result = response;
|
|
7661
|
+
const usage = result.usage;
|
|
7662
|
+
recordResponseResult(result, model, historyId, startTime);
|
|
7663
|
+
completeTracking(trackingId, usage?.input_tokens ?? 0, usage?.output_tokens ?? 0, ctx.queueWaitMs);
|
|
7664
|
+
consola.debug("Forwarding native Responses result:", JSON.stringify(result).slice(-400));
|
|
7665
|
+
return c.json(result);
|
|
7666
|
+
} catch (error) {
|
|
7667
|
+
recordErrorResponse(ctx, model, error);
|
|
7668
|
+
failTracking(trackingId, error);
|
|
7669
|
+
throw error;
|
|
6823
7670
|
}
|
|
6824
|
-
consola.debug("Forwarding native Responses result:", JSON.stringify(response).slice(-400));
|
|
6825
|
-
return c.json(response);
|
|
6826
7671
|
};
|
|
6827
7672
|
const isAsyncIterable = (value) => Boolean(value) && typeof value[Symbol.asyncIterator] === "function";
|
|
6828
7673
|
const isStreamingRequested = (payload) => Boolean(payload.stream);
|
|
@@ -6857,6 +7702,22 @@ const removeWebSearchTool = (payload) => {
|
|
|
6857
7702
|
return t.type !== "web_search";
|
|
6858
7703
|
});
|
|
6859
7704
|
};
|
|
7705
|
+
/** Record a ResponsesResult to history */
|
|
7706
|
+
function recordResponseResult(result, fallbackModel, historyId, startTime) {
|
|
7707
|
+
const usage = result.usage;
|
|
7708
|
+
const { content, toolCalls } = extractResponseData(result);
|
|
7709
|
+
recordResponse(historyId, {
|
|
7710
|
+
success: result.status !== "failed",
|
|
7711
|
+
model: result.model || fallbackModel,
|
|
7712
|
+
usage: {
|
|
7713
|
+
input_tokens: usage?.input_tokens ?? 0,
|
|
7714
|
+
output_tokens: usage?.output_tokens ?? 0
|
|
7715
|
+
},
|
|
7716
|
+
stop_reason: extractResponseStopReason(result),
|
|
7717
|
+
content,
|
|
7718
|
+
toolCalls
|
|
7719
|
+
}, Date.now() - startTime);
|
|
7720
|
+
}
|
|
6860
7721
|
|
|
6861
7722
|
//#endregion
|
|
6862
7723
|
//#region src/routes/responses/route.ts
|
|
@@ -6971,6 +7832,7 @@ async function runServer(options) {
|
|
|
6971
7832
|
consola.info(`History recording enabled (${limitText} entries)`);
|
|
6972
7833
|
}
|
|
6973
7834
|
initTui({ enabled: true });
|
|
7835
|
+
initRequestContextManager(state.staleRequestMaxAge).startReaper();
|
|
6974
7836
|
await ensurePaths();
|
|
6975
7837
|
await cacheVSCodeVersion();
|
|
6976
7838
|
if (options.githubToken) {
|
|
@@ -7010,11 +7872,12 @@ async function runServer(options) {
|
|
|
7010
7872
|
}
|
|
7011
7873
|
}
|
|
7012
7874
|
consola.box(`š Usage Viewer: https://ericc-ch.github.io/copilot-api?endpoint=${serverUrl}/usage${options.history ? `\nš History UI: ${serverUrl}/history` : ""}`);
|
|
7013
|
-
|
|
7875
|
+
setupShutdownHandlers();
|
|
7876
|
+
setServerInstance(serve({
|
|
7014
7877
|
fetch: server.fetch,
|
|
7015
7878
|
port: options.port,
|
|
7016
7879
|
hostname: options.host
|
|
7017
|
-
});
|
|
7880
|
+
}));
|
|
7018
7881
|
}
|
|
7019
7882
|
const start = defineCommand({
|
|
7020
7883
|
meta: {
|