@hsupu/copilot-api 0.7.17-beta.0 → 0.7.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +192 -144
- package/dist/main.mjs +1197 -919
- package/dist/main.mjs.map +1 -1
- package/package.json +7 -3
package/dist/main.mjs
CHANGED
|
@@ -17,7 +17,7 @@ import { trimTrailingSlash } from "hono/trailing-slash";
|
|
|
17
17
|
import { streamSSE } from "hono/streaming";
|
|
18
18
|
import { events } from "fetch-event-stream";
|
|
19
19
|
|
|
20
|
-
//#region src/lib/paths.ts
|
|
20
|
+
//#region src/lib/config/paths.ts
|
|
21
21
|
const APP_DIR = path.join(os.homedir(), ".local", "share", "copilot-api");
|
|
22
22
|
const GITHUB_TOKEN_PATH = path.join(APP_DIR, "github_token");
|
|
23
23
|
const PATHS = {
|
|
@@ -55,7 +55,7 @@ const state = {
|
|
|
55
55
|
};
|
|
56
56
|
|
|
57
57
|
//#endregion
|
|
58
|
-
//#region src/lib/api
|
|
58
|
+
//#region src/lib/config/api.ts
|
|
59
59
|
const standardHeaders = () => ({
|
|
60
60
|
"content-type": "application/json",
|
|
61
61
|
accept: "application/json"
|
|
@@ -618,6 +618,118 @@ function forwardError(c, error) {
|
|
|
618
618
|
type: "error"
|
|
619
619
|
} }, 500);
|
|
620
620
|
}
|
|
621
|
+
/**
|
|
622
|
+
* Classify a raw error into a structured ApiError.
|
|
623
|
+
* Used by the pipeline to route errors to appropriate RetryStrategies.
|
|
624
|
+
*/
|
|
625
|
+
function classifyError(error) {
|
|
626
|
+
if (error instanceof HTTPError) return classifyHTTPError(error);
|
|
627
|
+
if (error instanceof TypeError && error.message.includes("fetch")) return {
|
|
628
|
+
type: "network_error",
|
|
629
|
+
status: 0,
|
|
630
|
+
message: error.message,
|
|
631
|
+
raw: error
|
|
632
|
+
};
|
|
633
|
+
if (error instanceof Error) return {
|
|
634
|
+
type: "bad_request",
|
|
635
|
+
status: 0,
|
|
636
|
+
message: error.message,
|
|
637
|
+
raw: error
|
|
638
|
+
};
|
|
639
|
+
return {
|
|
640
|
+
type: "bad_request",
|
|
641
|
+
status: 0,
|
|
642
|
+
message: String(error),
|
|
643
|
+
raw: error
|
|
644
|
+
};
|
|
645
|
+
}
|
|
646
|
+
function classifyHTTPError(error) {
|
|
647
|
+
const { status, responseText, message } = error;
|
|
648
|
+
if (status === 429) return {
|
|
649
|
+
type: "rate_limited",
|
|
650
|
+
status,
|
|
651
|
+
message,
|
|
652
|
+
retryAfter: extractRetryAfterFromBody(responseText),
|
|
653
|
+
raw: error
|
|
654
|
+
};
|
|
655
|
+
if (status === 413) return {
|
|
656
|
+
type: "payload_too_large",
|
|
657
|
+
status,
|
|
658
|
+
message,
|
|
659
|
+
raw: error
|
|
660
|
+
};
|
|
661
|
+
if (status >= 500) return {
|
|
662
|
+
type: "server_error",
|
|
663
|
+
status,
|
|
664
|
+
message,
|
|
665
|
+
raw: error
|
|
666
|
+
};
|
|
667
|
+
if (status === 401 || status === 403) return {
|
|
668
|
+
type: "auth_expired",
|
|
669
|
+
status,
|
|
670
|
+
message,
|
|
671
|
+
raw: error
|
|
672
|
+
};
|
|
673
|
+
if (status === 400) {
|
|
674
|
+
const tokenLimit = tryExtractTokenLimit(responseText);
|
|
675
|
+
if (tokenLimit) return {
|
|
676
|
+
type: "token_limit",
|
|
677
|
+
status,
|
|
678
|
+
message,
|
|
679
|
+
tokenLimit: tokenLimit.limit,
|
|
680
|
+
tokenCurrent: tokenLimit.current,
|
|
681
|
+
raw: error
|
|
682
|
+
};
|
|
683
|
+
if (isRateLimitedInBody(responseText)) return {
|
|
684
|
+
type: "rate_limited",
|
|
685
|
+
status,
|
|
686
|
+
message,
|
|
687
|
+
retryAfter: extractRetryAfterFromBody(responseText),
|
|
688
|
+
raw: error
|
|
689
|
+
};
|
|
690
|
+
}
|
|
691
|
+
return {
|
|
692
|
+
type: "bad_request",
|
|
693
|
+
status,
|
|
694
|
+
message,
|
|
695
|
+
raw: error
|
|
696
|
+
};
|
|
697
|
+
}
|
|
698
|
+
/** Extract retry_after from JSON response body */
|
|
699
|
+
function extractRetryAfterFromBody(responseText) {
|
|
700
|
+
try {
|
|
701
|
+
const parsed = JSON.parse(responseText);
|
|
702
|
+
if (parsed && typeof parsed === "object") {
|
|
703
|
+
if ("retry_after" in parsed && typeof parsed.retry_after === "number") return parsed.retry_after;
|
|
704
|
+
if ("error" in parsed) {
|
|
705
|
+
const err = parsed.error;
|
|
706
|
+
if (err && typeof err === "object" && "retry_after" in err && typeof err.retry_after === "number") return err.retry_after;
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
} catch {}
|
|
710
|
+
}
|
|
711
|
+
/** Check if response body contains rate_limited code */
|
|
712
|
+
function isRateLimitedInBody(responseText) {
|
|
713
|
+
try {
|
|
714
|
+
const parsed = JSON.parse(responseText);
|
|
715
|
+
if (parsed && typeof parsed === "object" && "error" in parsed) {
|
|
716
|
+
const err = parsed.error;
|
|
717
|
+
if (err && typeof err === "object" && "code" in err) return err.code === "rate_limited";
|
|
718
|
+
}
|
|
719
|
+
} catch {}
|
|
720
|
+
return false;
|
|
721
|
+
}
|
|
722
|
+
/** Try to extract token limit info from response body */
|
|
723
|
+
function tryExtractTokenLimit(responseText) {
|
|
724
|
+
try {
|
|
725
|
+
const parsed = JSON.parse(responseText);
|
|
726
|
+
if (parsed && typeof parsed === "object" && "error" in parsed) {
|
|
727
|
+
const err = parsed.error;
|
|
728
|
+
if (err && typeof err === "object" && "message" in err && typeof err.message === "string") return parseTokenLimitError(err.message);
|
|
729
|
+
}
|
|
730
|
+
} catch {}
|
|
731
|
+
return null;
|
|
732
|
+
}
|
|
621
733
|
|
|
622
734
|
//#endregion
|
|
623
735
|
//#region src/services/github/get-copilot-token.ts
|
|
@@ -1444,7 +1556,7 @@ const debug = defineCommand({
|
|
|
1444
1556
|
});
|
|
1445
1557
|
|
|
1446
1558
|
//#endregion
|
|
1447
|
-
//#region src/lib/history
|
|
1559
|
+
//#region src/lib/history/ws.ts
|
|
1448
1560
|
const clients = /* @__PURE__ */ new Set();
|
|
1449
1561
|
function addClient(ws) {
|
|
1450
1562
|
clients.add(ws);
|
|
@@ -1496,144 +1608,432 @@ function notifyEntryUpdated(entry) {
|
|
|
1496
1608
|
}
|
|
1497
1609
|
|
|
1498
1610
|
//#endregion
|
|
1499
|
-
//#region src/lib/
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
/** Drain timeouts based on active request types */
|
|
1504
|
-
const THINKING_DRAIN_TIMEOUT_MS = 18e4;
|
|
1505
|
-
const NORMAL_DRAIN_TIMEOUT_MS = 6e4;
|
|
1506
|
-
const MIN_DRAIN_TIMEOUT_MS = 5e3;
|
|
1507
|
-
const DRAIN_POLL_INTERVAL_MS = 500;
|
|
1508
|
-
const DRAIN_PROGRESS_INTERVAL_MS = 5e3;
|
|
1509
|
-
/** Check if the server is in shutdown state (used by middleware to reject new requests) */
|
|
1510
|
-
function getIsShuttingDown() {
|
|
1511
|
-
return _isShuttingDown;
|
|
1512
|
-
}
|
|
1513
|
-
/**
|
|
1514
|
-
* Returns a promise that resolves when the server is shut down via signal.
|
|
1515
|
-
* Used by runServer() to keep the async function alive until shutdown.
|
|
1516
|
-
*/
|
|
1517
|
-
function waitForShutdown() {
|
|
1518
|
-
return new Promise((resolve) => {
|
|
1519
|
-
shutdownResolve = resolve;
|
|
1520
|
-
});
|
|
1611
|
+
//#region src/lib/history/store.ts
|
|
1612
|
+
function formatLocalTimestamp(ts) {
|
|
1613
|
+
const d = new Date(ts);
|
|
1614
|
+
return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")} ${String(d.getHours()).padStart(2, "0")}:${String(d.getMinutes()).padStart(2, "0")}:${String(d.getSeconds()).padStart(2, "0")}`;
|
|
1521
1615
|
}
|
|
1522
|
-
|
|
1523
|
-
|
|
1524
|
-
|
|
1616
|
+
const historyState = {
|
|
1617
|
+
enabled: false,
|
|
1618
|
+
entries: [],
|
|
1619
|
+
sessions: /* @__PURE__ */ new Map(),
|
|
1620
|
+
currentSessionId: "",
|
|
1621
|
+
maxEntries: 200
|
|
1622
|
+
};
|
|
1623
|
+
function initHistory(enabled, maxEntries) {
|
|
1624
|
+
historyState.enabled = enabled;
|
|
1625
|
+
historyState.maxEntries = maxEntries;
|
|
1626
|
+
historyState.entries = [];
|
|
1627
|
+
historyState.sessions = /* @__PURE__ */ new Map();
|
|
1628
|
+
historyState.currentSessionId = enabled ? generateId() : "";
|
|
1525
1629
|
}
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
* Thinking requests get more time because they can take 120s+.
|
|
1529
|
-
*/
|
|
1530
|
-
function computeDrainTimeout() {
|
|
1531
|
-
const active = requestTracker.getActiveRequests();
|
|
1532
|
-
if (active.length === 0) return MIN_DRAIN_TIMEOUT_MS;
|
|
1533
|
-
return active.some((r) => r.tags?.some((t) => t.startsWith("thinking:"))) ? THINKING_DRAIN_TIMEOUT_MS : NORMAL_DRAIN_TIMEOUT_MS;
|
|
1630
|
+
function isHistoryEnabled() {
|
|
1631
|
+
return historyState.enabled;
|
|
1534
1632
|
}
|
|
1535
|
-
|
|
1536
|
-
|
|
1633
|
+
function getCurrentSession(endpoint) {
|
|
1634
|
+
if (historyState.currentSessionId) {
|
|
1635
|
+
const session = historyState.sessions.get(historyState.currentSessionId);
|
|
1636
|
+
if (session) {
|
|
1637
|
+
session.lastActivity = Date.now();
|
|
1638
|
+
return historyState.currentSessionId;
|
|
1639
|
+
}
|
|
1640
|
+
}
|
|
1537
1641
|
const now = Date.now();
|
|
1538
|
-
const
|
|
1539
|
-
|
|
1540
|
-
|
|
1541
|
-
|
|
1542
|
-
|
|
1642
|
+
const sessionId = generateId();
|
|
1643
|
+
historyState.currentSessionId = sessionId;
|
|
1644
|
+
historyState.sessions.set(sessionId, {
|
|
1645
|
+
id: sessionId,
|
|
1646
|
+
startTime: now,
|
|
1647
|
+
lastActivity: now,
|
|
1648
|
+
requestCount: 0,
|
|
1649
|
+
totalInputTokens: 0,
|
|
1650
|
+
totalOutputTokens: 0,
|
|
1651
|
+
models: [],
|
|
1652
|
+
endpoint
|
|
1543
1653
|
});
|
|
1544
|
-
|
|
1654
|
+
return sessionId;
|
|
1545
1655
|
}
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
const
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
|
|
1656
|
+
function recordRequest(endpoint, request) {
|
|
1657
|
+
if (!historyState.enabled) return "";
|
|
1658
|
+
const sessionId = getCurrentSession(endpoint);
|
|
1659
|
+
const session = historyState.sessions.get(sessionId);
|
|
1660
|
+
if (!session) return "";
|
|
1661
|
+
const entry = {
|
|
1662
|
+
id: generateId(),
|
|
1663
|
+
sessionId,
|
|
1664
|
+
timestamp: Date.now(),
|
|
1665
|
+
endpoint,
|
|
1666
|
+
request: {
|
|
1667
|
+
model: request.model,
|
|
1668
|
+
messages: request.messages,
|
|
1669
|
+
stream: request.stream,
|
|
1670
|
+
tools: request.tools,
|
|
1671
|
+
max_tokens: request.max_tokens,
|
|
1672
|
+
temperature: request.temperature,
|
|
1673
|
+
system: request.system
|
|
1560
1674
|
}
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
consola.info(`Received ${signal}, shutting down gracefully...`);
|
|
1569
|
-
stopTokenRefresh();
|
|
1570
|
-
const wsClients = getClientCount();
|
|
1571
|
-
if (wsClients > 0) {
|
|
1572
|
-
closeAllClients();
|
|
1573
|
-
consola.info(`Disconnected ${wsClients} WebSocket client(s)`);
|
|
1675
|
+
};
|
|
1676
|
+
historyState.entries.push(entry);
|
|
1677
|
+
session.requestCount++;
|
|
1678
|
+
if (!session.models.includes(request.model)) session.models.push(request.model);
|
|
1679
|
+
if (request.tools && request.tools.length > 0) {
|
|
1680
|
+
if (!session.toolsUsed) session.toolsUsed = [];
|
|
1681
|
+
for (const tool of request.tools) if (!session.toolsUsed.includes(tool.name)) session.toolsUsed.push(tool.name);
|
|
1574
1682
|
}
|
|
1575
|
-
|
|
1576
|
-
const
|
|
1577
|
-
|
|
1578
|
-
|
|
1579
|
-
consola.info(`Draining ${activeCount} active request(s), timeout ${drainTimeout / 1e3}s`);
|
|
1580
|
-
if (await drainActiveRequests(drainTimeout) === "timeout") {
|
|
1581
|
-
const remaining = requestTracker.getActiveRequests();
|
|
1582
|
-
consola.warn(`Drain timeout, force-closing ${remaining.length} remaining request(s)`);
|
|
1583
|
-
} else consola.info("All requests completed");
|
|
1584
|
-
}
|
|
1585
|
-
try {
|
|
1586
|
-
await serverInstance.close(true);
|
|
1587
|
-
} catch (error) {
|
|
1588
|
-
consola.error("Error closing server:", error);
|
|
1683
|
+
while (historyState.maxEntries > 0 && historyState.entries.length > historyState.maxEntries) {
|
|
1684
|
+
const removed = historyState.entries.shift();
|
|
1685
|
+
if (removed) {
|
|
1686
|
+
if (historyState.entries.filter((e) => e.sessionId === removed.sessionId).length === 0) historyState.sessions.delete(removed.sessionId);
|
|
1589
1687
|
}
|
|
1590
1688
|
}
|
|
1591
|
-
|
|
1592
|
-
|
|
1689
|
+
notifyEntryAdded(entry);
|
|
1690
|
+
return entry.id;
|
|
1593
1691
|
}
|
|
1594
|
-
|
|
1595
|
-
|
|
1596
|
-
const
|
|
1597
|
-
|
|
1598
|
-
|
|
1599
|
-
|
|
1692
|
+
function recordResponse(id, response, durationMs) {
|
|
1693
|
+
if (!historyState.enabled || !id) return;
|
|
1694
|
+
const entry = historyState.entries.find((e) => e.id === id);
|
|
1695
|
+
if (entry) {
|
|
1696
|
+
entry.response = response;
|
|
1697
|
+
entry.durationMs = durationMs;
|
|
1698
|
+
const session = historyState.sessions.get(entry.sessionId);
|
|
1699
|
+
if (session) {
|
|
1700
|
+
session.totalInputTokens += response.usage.input_tokens;
|
|
1701
|
+
session.totalOutputTokens += response.usage.output_tokens;
|
|
1702
|
+
session.lastActivity = Date.now();
|
|
1600
1703
|
}
|
|
1601
|
-
|
|
1602
|
-
}
|
|
1603
|
-
process.on("SIGINT", () => handler("SIGINT"));
|
|
1604
|
-
process.on("SIGTERM", () => handler("SIGTERM"));
|
|
1704
|
+
notifyEntryUpdated(entry);
|
|
1705
|
+
}
|
|
1605
1706
|
}
|
|
1606
|
-
|
|
1607
|
-
|
|
1608
|
-
|
|
1609
|
-
|
|
1610
|
-
|
|
1611
|
-
|
|
1612
|
-
|
|
1613
|
-
completedTimeouts = /* @__PURE__ */ new Map();
|
|
1614
|
-
historySize = 5;
|
|
1615
|
-
completedDisplayMs = 2e3;
|
|
1616
|
-
setRenderer(renderer) {
|
|
1617
|
-
this.renderer = renderer;
|
|
1707
|
+
function recordRewrites(id, rewrites) {
|
|
1708
|
+
if (!historyState.enabled || !id) return;
|
|
1709
|
+
const entry = historyState.entries.find((e) => e.id === id);
|
|
1710
|
+
if (entry) {
|
|
1711
|
+
entry.rewrites = rewrites;
|
|
1712
|
+
if (rewrites.truncation) entry.truncation = rewrites.truncation;
|
|
1713
|
+
notifyEntryUpdated(entry);
|
|
1618
1714
|
}
|
|
1619
|
-
|
|
1620
|
-
|
|
1621
|
-
|
|
1715
|
+
}
|
|
1716
|
+
function getHistory(options = {}) {
|
|
1717
|
+
const { page = 1, limit = 50, model, endpoint, success, from, to, search, sessionId } = options;
|
|
1718
|
+
let filtered = [...historyState.entries];
|
|
1719
|
+
if (sessionId) filtered = filtered.filter((e) => e.sessionId === sessionId);
|
|
1720
|
+
if (model) {
|
|
1721
|
+
const modelLower = model.toLowerCase();
|
|
1722
|
+
filtered = filtered.filter((e) => e.request.model.toLowerCase().includes(modelLower) || e.response?.model.toLowerCase().includes(modelLower));
|
|
1622
1723
|
}
|
|
1623
|
-
|
|
1624
|
-
|
|
1625
|
-
|
|
1626
|
-
|
|
1627
|
-
|
|
1628
|
-
const
|
|
1629
|
-
|
|
1630
|
-
|
|
1631
|
-
|
|
1632
|
-
|
|
1633
|
-
|
|
1634
|
-
|
|
1635
|
-
|
|
1636
|
-
|
|
1724
|
+
if (endpoint) filtered = filtered.filter((e) => e.endpoint === endpoint);
|
|
1725
|
+
if (success !== void 0) filtered = filtered.filter((e) => e.response?.success === success);
|
|
1726
|
+
if (from) filtered = filtered.filter((e) => e.timestamp >= from);
|
|
1727
|
+
if (to) filtered = filtered.filter((e) => e.timestamp <= to);
|
|
1728
|
+
if (search) {
|
|
1729
|
+
const searchLower = search.toLowerCase();
|
|
1730
|
+
filtered = filtered.filter((e) => {
|
|
1731
|
+
if (e.request.model.toLowerCase().includes(searchLower) || e.response?.model && e.response.model.toLowerCase().includes(searchLower)) return true;
|
|
1732
|
+
if (e.response?.error && e.response.error.toLowerCase().includes(searchLower)) return true;
|
|
1733
|
+
if (e.request.system?.toLowerCase().includes(searchLower)) return true;
|
|
1734
|
+
if (e.request.messages.some((m) => {
|
|
1735
|
+
if (typeof m.content === "string") return m.content.toLowerCase().includes(searchLower);
|
|
1736
|
+
if (Array.isArray(m.content)) return m.content.some((c) => {
|
|
1737
|
+
if (c.text && c.text.toLowerCase().includes(searchLower)) return true;
|
|
1738
|
+
if (c.type === "tool_use") {
|
|
1739
|
+
const name = c.name;
|
|
1740
|
+
if (name && name.toLowerCase().includes(searchLower)) return true;
|
|
1741
|
+
if (c.input) {
|
|
1742
|
+
if ((typeof c.input === "string" ? c.input : JSON.stringify(c.input)).toLowerCase().includes(searchLower)) return true;
|
|
1743
|
+
}
|
|
1744
|
+
}
|
|
1745
|
+
if (c.type === "tool_result" && c.content) {
|
|
1746
|
+
if ((typeof c.content === "string" ? c.content : JSON.stringify(c.content)).toLowerCase().includes(searchLower)) return true;
|
|
1747
|
+
}
|
|
1748
|
+
if (c.type === "thinking") {
|
|
1749
|
+
const thinking = c.thinking;
|
|
1750
|
+
if (thinking && thinking.toLowerCase().includes(searchLower)) return true;
|
|
1751
|
+
}
|
|
1752
|
+
return false;
|
|
1753
|
+
});
|
|
1754
|
+
return false;
|
|
1755
|
+
})) return true;
|
|
1756
|
+
if (e.response?.content) {
|
|
1757
|
+
const rc = e.response.content;
|
|
1758
|
+
if (typeof rc.content === "string" && rc.content.toLowerCase().includes(searchLower)) return true;
|
|
1759
|
+
if (Array.isArray(rc.content)) {
|
|
1760
|
+
if (rc.content.some((c) => {
|
|
1761
|
+
if (c.text && c.text.toLowerCase().includes(searchLower)) return true;
|
|
1762
|
+
if (c.type === "tool_use" && c.name && c.name.toLowerCase().includes(searchLower)) return true;
|
|
1763
|
+
if (c.type === "thinking" && c.thinking && c.thinking.toLowerCase().includes(searchLower)) return true;
|
|
1764
|
+
return false;
|
|
1765
|
+
})) return true;
|
|
1766
|
+
}
|
|
1767
|
+
}
|
|
1768
|
+
if (e.response?.toolCalls?.some((t) => t.name.toLowerCase().includes(searchLower))) return true;
|
|
1769
|
+
return false;
|
|
1770
|
+
});
|
|
1771
|
+
}
|
|
1772
|
+
filtered.sort((a, b) => b.timestamp - a.timestamp);
|
|
1773
|
+
const total = filtered.length;
|
|
1774
|
+
const totalPages = Math.ceil(total / limit);
|
|
1775
|
+
const start = (page - 1) * limit;
|
|
1776
|
+
return {
|
|
1777
|
+
entries: filtered.slice(start, start + limit),
|
|
1778
|
+
total,
|
|
1779
|
+
page,
|
|
1780
|
+
limit,
|
|
1781
|
+
totalPages
|
|
1782
|
+
};
|
|
1783
|
+
}
|
|
1784
|
+
function getEntry(id) {
|
|
1785
|
+
return historyState.entries.find((e) => e.id === id);
|
|
1786
|
+
}
|
|
1787
|
+
function getSessions() {
|
|
1788
|
+
const sessions = Array.from(historyState.sessions.values()).sort((a, b) => b.lastActivity - a.lastActivity);
|
|
1789
|
+
return {
|
|
1790
|
+
sessions,
|
|
1791
|
+
total: sessions.length
|
|
1792
|
+
};
|
|
1793
|
+
}
|
|
1794
|
+
function getSession(id) {
|
|
1795
|
+
return historyState.sessions.get(id);
|
|
1796
|
+
}
|
|
1797
|
+
function getSessionEntries(sessionId) {
|
|
1798
|
+
return historyState.entries.filter((e) => e.sessionId === sessionId).sort((a, b) => a.timestamp - b.timestamp);
|
|
1799
|
+
}
|
|
1800
|
+
function clearHistory() {
|
|
1801
|
+
historyState.entries = [];
|
|
1802
|
+
historyState.sessions = /* @__PURE__ */ new Map();
|
|
1803
|
+
historyState.currentSessionId = generateId();
|
|
1804
|
+
}
|
|
1805
|
+
function deleteSession(sessionId) {
|
|
1806
|
+
if (!historyState.sessions.has(sessionId)) return false;
|
|
1807
|
+
historyState.entries = historyState.entries.filter((e) => e.sessionId !== sessionId);
|
|
1808
|
+
historyState.sessions.delete(sessionId);
|
|
1809
|
+
if (historyState.currentSessionId === sessionId) historyState.currentSessionId = generateId();
|
|
1810
|
+
return true;
|
|
1811
|
+
}
|
|
1812
|
+
function getStats() {
|
|
1813
|
+
const entries = historyState.entries;
|
|
1814
|
+
const modelDist = {};
|
|
1815
|
+
const endpointDist = {};
|
|
1816
|
+
const hourlyActivity = {};
|
|
1817
|
+
let totalInput = 0;
|
|
1818
|
+
let totalOutput = 0;
|
|
1819
|
+
let totalDuration = 0;
|
|
1820
|
+
let durationCount = 0;
|
|
1821
|
+
let successCount = 0;
|
|
1822
|
+
let failCount = 0;
|
|
1823
|
+
for (const entry of entries) {
|
|
1824
|
+
const model = entry.response?.model || entry.request.model;
|
|
1825
|
+
modelDist[model] = (modelDist[model] || 0) + 1;
|
|
1826
|
+
endpointDist[entry.endpoint] = (endpointDist[entry.endpoint] || 0) + 1;
|
|
1827
|
+
const d = new Date(entry.timestamp);
|
|
1828
|
+
const hour = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}T${String(d.getHours()).padStart(2, "0")}`;
|
|
1829
|
+
hourlyActivity[hour] = (hourlyActivity[hour] || 0) + 1;
|
|
1830
|
+
if (entry.response) {
|
|
1831
|
+
if (entry.response.success) successCount++;
|
|
1832
|
+
else failCount++;
|
|
1833
|
+
totalInput += entry.response.usage.input_tokens;
|
|
1834
|
+
totalOutput += entry.response.usage.output_tokens;
|
|
1835
|
+
}
|
|
1836
|
+
if (entry.durationMs) {
|
|
1837
|
+
totalDuration += entry.durationMs;
|
|
1838
|
+
durationCount++;
|
|
1839
|
+
}
|
|
1840
|
+
}
|
|
1841
|
+
const recentActivity = Object.entries(hourlyActivity).sort(([a], [b]) => a.localeCompare(b)).slice(-24).map(([hour, count]) => ({
|
|
1842
|
+
hour,
|
|
1843
|
+
count
|
|
1844
|
+
}));
|
|
1845
|
+
return {
|
|
1846
|
+
totalRequests: entries.length,
|
|
1847
|
+
successfulRequests: successCount,
|
|
1848
|
+
failedRequests: failCount,
|
|
1849
|
+
totalInputTokens: totalInput,
|
|
1850
|
+
totalOutputTokens: totalOutput,
|
|
1851
|
+
averageDurationMs: durationCount > 0 ? totalDuration / durationCount : 0,
|
|
1852
|
+
modelDistribution: modelDist,
|
|
1853
|
+
endpointDistribution: endpointDist,
|
|
1854
|
+
recentActivity,
|
|
1855
|
+
activeSessions: historyState.sessions.size
|
|
1856
|
+
};
|
|
1857
|
+
}
|
|
1858
|
+
function exportHistory(format = "json") {
|
|
1859
|
+
if (format === "json") return JSON.stringify({
|
|
1860
|
+
sessions: Array.from(historyState.sessions.values()),
|
|
1861
|
+
entries: historyState.entries
|
|
1862
|
+
}, null, 2);
|
|
1863
|
+
const headers = [
|
|
1864
|
+
"id",
|
|
1865
|
+
"session_id",
|
|
1866
|
+
"timestamp",
|
|
1867
|
+
"endpoint",
|
|
1868
|
+
"request_model",
|
|
1869
|
+
"message_count",
|
|
1870
|
+
"stream",
|
|
1871
|
+
"success",
|
|
1872
|
+
"response_model",
|
|
1873
|
+
"input_tokens",
|
|
1874
|
+
"output_tokens",
|
|
1875
|
+
"duration_ms",
|
|
1876
|
+
"stop_reason",
|
|
1877
|
+
"error"
|
|
1878
|
+
];
|
|
1879
|
+
const rows = historyState.entries.map((e) => [
|
|
1880
|
+
e.id,
|
|
1881
|
+
e.sessionId,
|
|
1882
|
+
formatLocalTimestamp(e.timestamp),
|
|
1883
|
+
e.endpoint,
|
|
1884
|
+
e.request.model,
|
|
1885
|
+
e.request.messages.length,
|
|
1886
|
+
e.request.stream,
|
|
1887
|
+
e.response?.success ?? "",
|
|
1888
|
+
e.response?.model ?? "",
|
|
1889
|
+
e.response?.usage.input_tokens ?? "",
|
|
1890
|
+
e.response?.usage.output_tokens ?? "",
|
|
1891
|
+
e.durationMs ?? "",
|
|
1892
|
+
e.response?.stop_reason ?? "",
|
|
1893
|
+
e.response?.error ?? ""
|
|
1894
|
+
]);
|
|
1895
|
+
return [headers.join(","), ...rows.map((r) => r.join(","))].join("\n");
|
|
1896
|
+
}
|
|
1897
|
+
|
|
1898
|
+
//#endregion
|
|
1899
|
+
//#region src/lib/shutdown.ts
|
|
1900
|
+
let serverInstance = null;
|
|
1901
|
+
let _isShuttingDown = false;
|
|
1902
|
+
let shutdownResolve = null;
|
|
1903
|
+
/** Drain timeouts based on active request types */
|
|
1904
|
+
const THINKING_DRAIN_TIMEOUT_MS = 18e4;
|
|
1905
|
+
const NORMAL_DRAIN_TIMEOUT_MS = 6e4;
|
|
1906
|
+
const MIN_DRAIN_TIMEOUT_MS = 5e3;
|
|
1907
|
+
const DRAIN_POLL_INTERVAL_MS = 500;
|
|
1908
|
+
const DRAIN_PROGRESS_INTERVAL_MS = 5e3;
|
|
1909
|
+
/** Check if the server is in shutdown state (used by middleware to reject new requests) */
|
|
1910
|
+
function getIsShuttingDown() {
|
|
1911
|
+
return _isShuttingDown;
|
|
1912
|
+
}
|
|
1913
|
+
/**
|
|
1914
|
+
* Returns a promise that resolves when the server is shut down via signal.
|
|
1915
|
+
* Used by runServer() to keep the async function alive until shutdown.
|
|
1916
|
+
*/
|
|
1917
|
+
function waitForShutdown() {
|
|
1918
|
+
return new Promise((resolve) => {
|
|
1919
|
+
shutdownResolve = resolve;
|
|
1920
|
+
});
|
|
1921
|
+
}
|
|
1922
|
+
/** Store the server instance for shutdown */
|
|
1923
|
+
function setServerInstance(server) {
|
|
1924
|
+
serverInstance = server;
|
|
1925
|
+
}
|
|
1926
|
+
/**
|
|
1927
|
+
* Compute drain timeout based on currently active requests.
|
|
1928
|
+
* Thinking requests get more time because they can take 120s+.
|
|
1929
|
+
*/
|
|
1930
|
+
function computeDrainTimeout() {
|
|
1931
|
+
const active = requestTracker.getActiveRequests();
|
|
1932
|
+
if (active.length === 0) return MIN_DRAIN_TIMEOUT_MS;
|
|
1933
|
+
return active.some((r) => r.tags?.some((t) => t.startsWith("thinking:"))) ? THINKING_DRAIN_TIMEOUT_MS : NORMAL_DRAIN_TIMEOUT_MS;
|
|
1934
|
+
}
|
|
1935
|
+
/** Log a summary of active requests during drain */
|
|
1936
|
+
function logActiveRequestsSummary(requests) {
|
|
1937
|
+
const now = Date.now();
|
|
1938
|
+
const lines = requests.map((req) => {
|
|
1939
|
+
const age = Math.round((now - req.startTime) / 1e3);
|
|
1940
|
+
const model = req.model || "unknown";
|
|
1941
|
+
const tags = req.tags?.length ? ` [${req.tags.join(", ")}]` : "";
|
|
1942
|
+
return ` ${req.method} ${req.path} ${model} (${req.status}, ${age}s)${tags}`;
|
|
1943
|
+
});
|
|
1944
|
+
consola.info(`Waiting for ${requests.length} active request(s):\n${lines.join("\n")}`);
|
|
1945
|
+
}
|
|
1946
|
+
/**
|
|
1947
|
+
* Wait for all active requests to complete, with periodic progress logging.
|
|
1948
|
+
* Returns "drained" when all requests finish, "timeout" if deadline is reached.
|
|
1949
|
+
*/
|
|
1950
|
+
async function drainActiveRequests(timeoutMs) {
|
|
1951
|
+
const deadline = Date.now() + timeoutMs;
|
|
1952
|
+
let lastProgressLog = 0;
|
|
1953
|
+
while (Date.now() < deadline) {
|
|
1954
|
+
const active = requestTracker.getActiveRequests();
|
|
1955
|
+
if (active.length === 0) return "drained";
|
|
1956
|
+
const now = Date.now();
|
|
1957
|
+
if (now - lastProgressLog >= DRAIN_PROGRESS_INTERVAL_MS) {
|
|
1958
|
+
lastProgressLog = now;
|
|
1959
|
+
logActiveRequestsSummary(active);
|
|
1960
|
+
}
|
|
1961
|
+
await new Promise((resolve) => setTimeout(resolve, DRAIN_POLL_INTERVAL_MS));
|
|
1962
|
+
}
|
|
1963
|
+
return "timeout";
|
|
1964
|
+
}
|
|
1965
|
+
/** Perform graceful shutdown */
|
|
1966
|
+
async function gracefulShutdown(signal) {
|
|
1967
|
+
_isShuttingDown = true;
|
|
1968
|
+
consola.info(`Received ${signal}, shutting down gracefully...`);
|
|
1969
|
+
stopTokenRefresh();
|
|
1970
|
+
const wsClients = getClientCount();
|
|
1971
|
+
if (wsClients > 0) {
|
|
1972
|
+
closeAllClients();
|
|
1973
|
+
consola.info(`Disconnected ${wsClients} WebSocket client(s)`);
|
|
1974
|
+
}
|
|
1975
|
+
if (serverInstance) {
|
|
1976
|
+
const activeCount = requestTracker.getActiveRequests().length;
|
|
1977
|
+
const drainTimeout = computeDrainTimeout();
|
|
1978
|
+
if (activeCount > 0) {
|
|
1979
|
+
consola.info(`Draining ${activeCount} active request(s), timeout ${drainTimeout / 1e3}s`);
|
|
1980
|
+
if (await drainActiveRequests(drainTimeout) === "timeout") {
|
|
1981
|
+
const remaining = requestTracker.getActiveRequests();
|
|
1982
|
+
consola.warn(`Drain timeout, force-closing ${remaining.length} remaining request(s)`);
|
|
1983
|
+
} else consola.info("All requests completed");
|
|
1984
|
+
}
|
|
1985
|
+
try {
|
|
1986
|
+
await serverInstance.close(true);
|
|
1987
|
+
} catch (error) {
|
|
1988
|
+
consola.error("Error closing server:", error);
|
|
1989
|
+
}
|
|
1990
|
+
}
|
|
1991
|
+
consola.info("Shutdown complete");
|
|
1992
|
+
shutdownResolve?.();
|
|
1993
|
+
}
|
|
1994
|
+
/** Setup process signal handlers for graceful shutdown */
|
|
1995
|
+
function setupShutdownHandlers() {
|
|
1996
|
+
const handler = (signal) => {
|
|
1997
|
+
if (_isShuttingDown) {
|
|
1998
|
+
consola.warn("Second signal received, forcing immediate exit");
|
|
1999
|
+
process.exit(1);
|
|
2000
|
+
}
|
|
2001
|
+
gracefulShutdown(signal);
|
|
2002
|
+
};
|
|
2003
|
+
process.on("SIGINT", () => handler("SIGINT"));
|
|
2004
|
+
process.on("SIGTERM", () => handler("SIGTERM"));
|
|
2005
|
+
}
|
|
2006
|
+
|
|
2007
|
+
//#endregion
|
|
2008
|
+
//#region src/lib/tui/tracker.ts
|
|
2009
|
+
var RequestTracker = class {
|
|
2010
|
+
requests = /* @__PURE__ */ new Map();
|
|
2011
|
+
renderer = null;
|
|
2012
|
+
completedQueue = [];
|
|
2013
|
+
completedTimeouts = /* @__PURE__ */ new Map();
|
|
2014
|
+
historySize = 5;
|
|
2015
|
+
completedDisplayMs = 2e3;
|
|
2016
|
+
setRenderer(renderer) {
|
|
2017
|
+
this.renderer = renderer;
|
|
2018
|
+
}
|
|
2019
|
+
setOptions(options) {
|
|
2020
|
+
if (options.historySize !== void 0) this.historySize = options.historySize;
|
|
2021
|
+
if (options.completedDisplayMs !== void 0) this.completedDisplayMs = options.completedDisplayMs;
|
|
2022
|
+
}
|
|
2023
|
+
/**
|
|
2024
|
+
* Start tracking a new request
|
|
2025
|
+
* Returns the tracking ID
|
|
2026
|
+
*/
|
|
2027
|
+
startRequest(options) {
|
|
2028
|
+
const id = generateId();
|
|
2029
|
+
const request = {
|
|
2030
|
+
id,
|
|
2031
|
+
method: options.method,
|
|
2032
|
+
path: options.path,
|
|
2033
|
+
model: options.model,
|
|
2034
|
+
startTime: Date.now(),
|
|
2035
|
+
status: "executing",
|
|
2036
|
+
isHistoryAccess: options.isHistoryAccess
|
|
1637
2037
|
};
|
|
1638
2038
|
this.requests.set(id, request);
|
|
1639
2039
|
this.renderer?.onRequestStart(request);
|
|
@@ -2293,7 +2693,7 @@ const setupClaudeCode = defineCommand({
|
|
|
2293
2693
|
|
|
2294
2694
|
//#endregion
|
|
2295
2695
|
//#region package.json
|
|
2296
|
-
var version = "0.7.17
|
|
2696
|
+
var version = "0.7.17";
|
|
2297
2697
|
|
|
2298
2698
|
//#endregion
|
|
2299
2699
|
//#region src/lib/adaptive-rate-limiter.ts
|
|
@@ -2324,558 +2724,270 @@ var AdaptiveRateLimiter = class {
|
|
|
2324
2724
|
lastRequestTime = 0;
|
|
2325
2725
|
/** Current step in gradual recovery (index into gradualRecoverySteps) */
|
|
2326
2726
|
recoveryStepIndex = 0;
|
|
2327
|
-
constructor(config = {}) {
|
|
2328
|
-
this.config = {
|
|
2329
|
-
...DEFAULT_CONFIG,
|
|
2330
|
-
...config
|
|
2331
|
-
};
|
|
2332
|
-
}
|
|
2333
|
-
/**
|
|
2334
|
-
* Execute a request with adaptive rate limiting.
|
|
2335
|
-
* Returns a promise that resolves when the request succeeds.
|
|
2336
|
-
* The request will be retried automatically on 429 errors.
|
|
2337
|
-
*/
|
|
2338
|
-
async execute(fn) {
|
|
2339
|
-
if (this.mode === "normal") return this.executeInNormalMode(fn);
|
|
2340
|
-
if (this.mode === "recovering") return this.executeInRecoveringMode(fn);
|
|
2341
|
-
return this.enqueue(fn);
|
|
2342
|
-
}
|
|
2343
|
-
/**
|
|
2344
|
-
* Check if an error is a rate limit error (429) and extract Retry-After if available
|
|
2345
|
-
*/
|
|
2346
|
-
isRateLimitError(error) {
|
|
2347
|
-
if (error && typeof error === "object") {
|
|
2348
|
-
if ("status" in error && error.status === 429) return {
|
|
2349
|
-
isRateLimit: true,
|
|
2350
|
-
retryAfter: this.extractRetryAfter(error)
|
|
2351
|
-
};
|
|
2352
|
-
if ("responseText" in error && typeof error.responseText === "string") try {
|
|
2353
|
-
const parsed = JSON.parse(error.responseText);
|
|
2354
|
-
if (parsed && typeof parsed === "object" && "error" in parsed && parsed.error && typeof parsed.error === "object" && "code" in parsed.error && parsed.error.code === "rate_limited") return { isRateLimit: true };
|
|
2355
|
-
} catch {}
|
|
2356
|
-
}
|
|
2357
|
-
return { isRateLimit: false };
|
|
2358
|
-
}
|
|
2359
|
-
/**
|
|
2360
|
-
* Extract Retry-After value from error response
|
|
2361
|
-
*/
|
|
2362
|
-
extractRetryAfter(error) {
|
|
2363
|
-
if (!error || typeof error !== "object") return void 0;
|
|
2364
|
-
if ("responseText" in error && typeof error.responseText === "string") try {
|
|
2365
|
-
const parsed = JSON.parse(error.responseText);
|
|
2366
|
-
if (parsed && typeof parsed === "object" && "retry_after" in parsed && typeof parsed.retry_after === "number") return parsed.retry_after;
|
|
2367
|
-
if (parsed && typeof parsed === "object" && "error" in parsed && parsed.error && typeof parsed.error === "object" && "retry_after" in parsed.error && typeof parsed.error.retry_after === "number") return parsed.error.retry_after;
|
|
2368
|
-
} catch {}
|
|
2369
|
-
}
|
|
2370
|
-
/**
|
|
2371
|
-
* Execute in normal mode - full speed
|
|
2372
|
-
*/
|
|
2373
|
-
async executeInNormalMode(fn) {
|
|
2374
|
-
try {
|
|
2375
|
-
return {
|
|
2376
|
-
result: await fn(),
|
|
2377
|
-
queueWaitMs: 0
|
|
2378
|
-
};
|
|
2379
|
-
} catch (error) {
|
|
2380
|
-
const { isRateLimit, retryAfter } = this.isRateLimitError(error);
|
|
2381
|
-
if (isRateLimit) {
|
|
2382
|
-
this.enterRateLimitedMode();
|
|
2383
|
-
return this.enqueue(fn, retryAfter);
|
|
2384
|
-
}
|
|
2385
|
-
throw error;
|
|
2386
|
-
}
|
|
2387
|
-
}
|
|
2388
|
-
/**
|
|
2389
|
-
* Execute in recovering mode - gradual speedup
|
|
2390
|
-
*/
|
|
2391
|
-
async executeInRecoveringMode(fn) {
|
|
2392
|
-
const startTime = Date.now();
|
|
2393
|
-
const currentInterval = this.config.gradualRecoverySteps[this.recoveryStepIndex] ?? 0;
|
|
2394
|
-
if (currentInterval > 0) {
|
|
2395
|
-
const elapsedMs = Date.now() - this.lastRequestTime;
|
|
2396
|
-
const requiredMs = currentInterval * 1e3;
|
|
2397
|
-
if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
|
|
2398
|
-
const waitMs = requiredMs - elapsedMs;
|
|
2399
|
-
await this.sleep(waitMs);
|
|
2400
|
-
}
|
|
2401
|
-
}
|
|
2402
|
-
this.lastRequestTime = Date.now();
|
|
2403
|
-
try {
|
|
2404
|
-
const result = await fn();
|
|
2405
|
-
this.recoveryStepIndex++;
|
|
2406
|
-
if (this.recoveryStepIndex >= this.config.gradualRecoverySteps.length) this.completeRecovery();
|
|
2407
|
-
else {
|
|
2408
|
-
const nextInterval = this.config.gradualRecoverySteps[this.recoveryStepIndex] ?? 0;
|
|
2409
|
-
consola.info(`[RateLimiter] Ramp-up step ${this.recoveryStepIndex}/${this.config.gradualRecoverySteps.length} (next interval: ${nextInterval}s)`);
|
|
2410
|
-
}
|
|
2411
|
-
return {
|
|
2412
|
-
result,
|
|
2413
|
-
queueWaitMs: Date.now() - startTime
|
|
2414
|
-
};
|
|
2415
|
-
} catch (error) {
|
|
2416
|
-
const { isRateLimit, retryAfter } = this.isRateLimitError(error);
|
|
2417
|
-
if (isRateLimit) {
|
|
2418
|
-
consola.warn("[RateLimiter] Hit rate limit during ramp-up, returning to rate-limited mode");
|
|
2419
|
-
this.enterRateLimitedMode();
|
|
2420
|
-
return this.enqueue(fn, retryAfter);
|
|
2421
|
-
}
|
|
2422
|
-
throw error;
|
|
2423
|
-
}
|
|
2727
|
+
constructor(config = {}) {
|
|
2728
|
+
this.config = {
|
|
2729
|
+
...DEFAULT_CONFIG,
|
|
2730
|
+
...config
|
|
2731
|
+
};
|
|
2424
2732
|
}
|
|
2425
2733
|
/**
|
|
2426
|
-
*
|
|
2734
|
+
* Execute a request with adaptive rate limiting.
|
|
2735
|
+
* Returns a promise that resolves when the request succeeds.
|
|
2736
|
+
* The request will be retried automatically on 429 errors.
|
|
2427
2737
|
*/
|
|
2428
|
-
|
|
2429
|
-
if (this.mode === "
|
|
2430
|
-
this.mode
|
|
2431
|
-
this.
|
|
2432
|
-
this.consecutiveSuccesses = 0;
|
|
2433
|
-
consola.warn(`[RateLimiter] Entering rate-limited mode. Requests will be queued with exponential backoff (base: ${this.config.baseRetryIntervalSeconds}s).`);
|
|
2738
|
+
async execute(fn) {
|
|
2739
|
+
if (this.mode === "normal") return this.executeInNormalMode(fn);
|
|
2740
|
+
if (this.mode === "recovering") return this.executeInRecoveringMode(fn);
|
|
2741
|
+
return this.enqueue(fn);
|
|
2434
2742
|
}
|
|
2435
2743
|
/**
|
|
2436
|
-
* Check if
|
|
2744
|
+
* Check if an error is a rate limit error (429) and extract Retry-After if available
|
|
2437
2745
|
*/
|
|
2438
|
-
|
|
2439
|
-
if (
|
|
2440
|
-
|
|
2441
|
-
|
|
2442
|
-
|
|
2443
|
-
|
|
2444
|
-
if (
|
|
2445
|
-
|
|
2446
|
-
return true;
|
|
2447
|
-
}
|
|
2746
|
+
isRateLimitError(error) {
|
|
2747
|
+
if (error && typeof error === "object") {
|
|
2748
|
+
if ("status" in error && error.status === 429) return {
|
|
2749
|
+
isRateLimit: true,
|
|
2750
|
+
retryAfter: this.extractRetryAfter(error)
|
|
2751
|
+
};
|
|
2752
|
+
if ("responseText" in error && typeof error.responseText === "string") try {
|
|
2753
|
+
const parsed = JSON.parse(error.responseText);
|
|
2754
|
+
if (parsed && typeof parsed === "object" && "error" in parsed && parsed.error && typeof parsed.error === "object" && "code" in parsed.error && parsed.error.code === "rate_limited") return { isRateLimit: true };
|
|
2755
|
+
} catch {}
|
|
2448
2756
|
}
|
|
2449
|
-
return false;
|
|
2450
|
-
}
|
|
2451
|
-
/**
|
|
2452
|
-
* Start gradual recovery mode
|
|
2453
|
-
*/
|
|
2454
|
-
startGradualRecovery() {
|
|
2455
|
-
this.mode = "recovering";
|
|
2456
|
-
this.recoveryStepIndex = 0;
|
|
2457
|
-
this.rateLimitedAt = null;
|
|
2458
|
-
this.consecutiveSuccesses = 0;
|
|
2459
|
-
const firstInterval = this.config.gradualRecoverySteps[0] ?? 0;
|
|
2460
|
-
consola.info(`[RateLimiter] Starting ramp-up (${this.config.gradualRecoverySteps.length} steps, first interval: ${firstInterval}s)`);
|
|
2757
|
+
return { isRateLimit: false };
|
|
2461
2758
|
}
|
|
2462
2759
|
/**
|
|
2463
|
-
*
|
|
2760
|
+
* Extract Retry-After value from error response
|
|
2464
2761
|
*/
|
|
2465
|
-
|
|
2466
|
-
|
|
2467
|
-
|
|
2468
|
-
|
|
2762
|
+
extractRetryAfter(error) {
|
|
2763
|
+
if (!error || typeof error !== "object") return void 0;
|
|
2764
|
+
if ("responseText" in error && typeof error.responseText === "string") try {
|
|
2765
|
+
const parsed = JSON.parse(error.responseText);
|
|
2766
|
+
if (parsed && typeof parsed === "object" && "retry_after" in parsed && typeof parsed.retry_after === "number") return parsed.retry_after;
|
|
2767
|
+
if (parsed && typeof parsed === "object" && "error" in parsed && parsed.error && typeof parsed.error === "object" && "retry_after" in parsed.error && typeof parsed.error.retry_after === "number") return parsed.error.retry_after;
|
|
2768
|
+
} catch {}
|
|
2469
2769
|
}
|
|
2470
2770
|
/**
|
|
2471
|
-
*
|
|
2771
|
+
* Execute in normal mode - full speed
|
|
2472
2772
|
*/
|
|
2473
|
-
|
|
2474
|
-
|
|
2475
|
-
|
|
2476
|
-
|
|
2477
|
-
|
|
2478
|
-
reject,
|
|
2479
|
-
retryCount: 0,
|
|
2480
|
-
retryAfterSeconds,
|
|
2481
|
-
enqueuedAt: Date.now()
|
|
2773
|
+
async executeInNormalMode(fn) {
|
|
2774
|
+
try {
|
|
2775
|
+
return {
|
|
2776
|
+
result: await fn(),
|
|
2777
|
+
queueWaitMs: 0
|
|
2482
2778
|
};
|
|
2483
|
-
|
|
2484
|
-
|
|
2485
|
-
|
|
2486
|
-
|
|
2487
|
-
|
|
2488
|
-
}
|
|
2489
|
-
this.processQueue();
|
|
2490
|
-
});
|
|
2491
|
-
}
|
|
2492
|
-
/**
|
|
2493
|
-
* Calculate retry interval with exponential backoff
|
|
2494
|
-
*/
|
|
2495
|
-
calculateRetryInterval(request) {
|
|
2496
|
-
if (request.retryAfterSeconds !== void 0 && request.retryAfterSeconds > 0) return request.retryAfterSeconds;
|
|
2497
|
-
const backoff = this.config.baseRetryIntervalSeconds * Math.pow(2, request.retryCount);
|
|
2498
|
-
return Math.min(backoff, this.config.maxRetryIntervalSeconds);
|
|
2499
|
-
}
|
|
2500
|
-
/**
|
|
2501
|
-
* Process the queue
|
|
2502
|
-
*/
|
|
2503
|
-
async processQueue() {
|
|
2504
|
-
if (this.processing) return;
|
|
2505
|
-
this.processing = true;
|
|
2506
|
-
while (this.queue.length > 0) {
|
|
2507
|
-
const request = this.queue[0];
|
|
2508
|
-
if (this.shouldAttemptRecovery()) this.startGradualRecovery();
|
|
2509
|
-
const elapsedMs = Date.now() - this.lastRequestTime;
|
|
2510
|
-
const requiredMs = (request.retryCount > 0 ? this.calculateRetryInterval(request) : this.config.requestIntervalSeconds) * 1e3;
|
|
2511
|
-
if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
|
|
2512
|
-
const waitMs = requiredMs - elapsedMs;
|
|
2513
|
-
const waitSec = Math.ceil(waitMs / 1e3);
|
|
2514
|
-
consola.info(`[RateLimiter] Waiting ${waitSec}s before next request...`);
|
|
2515
|
-
await this.sleep(waitMs);
|
|
2516
|
-
}
|
|
2517
|
-
this.lastRequestTime = Date.now();
|
|
2518
|
-
try {
|
|
2519
|
-
const result = await request.execute();
|
|
2520
|
-
this.queue.shift();
|
|
2521
|
-
this.consecutiveSuccesses++;
|
|
2522
|
-
request.retryAfterSeconds = void 0;
|
|
2523
|
-
const queueWaitMs = Date.now() - request.enqueuedAt;
|
|
2524
|
-
request.resolve({
|
|
2525
|
-
result,
|
|
2526
|
-
queueWaitMs
|
|
2527
|
-
});
|
|
2528
|
-
if (this.mode === "rate-limited") consola.info(`[RateLimiter] Request succeeded (${this.consecutiveSuccesses}/${this.config.consecutiveSuccessesForRecovery} for ramp-up)`);
|
|
2529
|
-
} catch (error) {
|
|
2530
|
-
const { isRateLimit, retryAfter } = this.isRateLimitError(error);
|
|
2531
|
-
if (isRateLimit) {
|
|
2532
|
-
request.retryCount++;
|
|
2533
|
-
request.retryAfterSeconds = retryAfter;
|
|
2534
|
-
this.consecutiveSuccesses = 0;
|
|
2535
|
-
this.rateLimitedAt = Date.now();
|
|
2536
|
-
const nextInterval = this.calculateRetryInterval(request);
|
|
2537
|
-
const source = retryAfter ? "server Retry-After" : "exponential backoff";
|
|
2538
|
-
consola.warn(`[RateLimiter] Request failed with 429 (retry #${request.retryCount}). Retrying in ${nextInterval}s (${source})...`);
|
|
2539
|
-
} else {
|
|
2540
|
-
this.queue.shift();
|
|
2541
|
-
request.reject(error);
|
|
2542
|
-
}
|
|
2779
|
+
} catch (error) {
|
|
2780
|
+
const { isRateLimit, retryAfter } = this.isRateLimitError(error);
|
|
2781
|
+
if (isRateLimit) {
|
|
2782
|
+
this.enterRateLimitedMode();
|
|
2783
|
+
return this.enqueue(fn, retryAfter);
|
|
2543
2784
|
}
|
|
2785
|
+
throw error;
|
|
2544
2786
|
}
|
|
2545
|
-
this.processing = false;
|
|
2546
|
-
}
|
|
2547
|
-
sleep(ms) {
|
|
2548
|
-
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
2549
|
-
}
|
|
2550
|
-
/**
|
|
2551
|
-
* Get current status for debugging/monitoring
|
|
2552
|
-
*/
|
|
2553
|
-
getStatus() {
|
|
2554
|
-
return {
|
|
2555
|
-
mode: this.mode,
|
|
2556
|
-
queueLength: this.queue.length,
|
|
2557
|
-
consecutiveSuccesses: this.consecutiveSuccesses,
|
|
2558
|
-
rateLimitedAt: this.rateLimitedAt
|
|
2559
|
-
};
|
|
2560
|
-
}
|
|
2561
|
-
};
|
|
2562
|
-
let rateLimiterInstance = null;
|
|
2563
|
-
/**
|
|
2564
|
-
* Initialize the adaptive rate limiter with configuration
|
|
2565
|
-
*/
|
|
2566
|
-
function initAdaptiveRateLimiter(config = {}) {
|
|
2567
|
-
rateLimiterInstance = new AdaptiveRateLimiter(config);
|
|
2568
|
-
const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG.baseRetryIntervalSeconds;
|
|
2569
|
-
const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG.maxRetryIntervalSeconds;
|
|
2570
|
-
const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG.requestIntervalSeconds;
|
|
2571
|
-
const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG.recoveryTimeoutMinutes;
|
|
2572
|
-
const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG.consecutiveSuccessesForRecovery;
|
|
2573
|
-
const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG.gradualRecoverySteps;
|
|
2574
|
-
consola.info(`[RateLimiter] Initialized (backoff: ${baseRetry}s-${maxRetry}s, interval: ${interval}s, recovery: ${recovery}min or ${successes} successes, gradual: [${steps.join("s, ")}s])`);
|
|
2575
|
-
}
|
|
2576
|
-
/**
|
|
2577
|
-
* Execute a request with adaptive rate limiting.
|
|
2578
|
-
* If rate limiter is not initialized, executes immediately.
|
|
2579
|
-
* Returns the result along with queue wait time.
|
|
2580
|
-
*/
|
|
2581
|
-
async function executeWithAdaptiveRateLimit(fn) {
|
|
2582
|
-
if (!rateLimiterInstance) return {
|
|
2583
|
-
result: await fn(),
|
|
2584
|
-
queueWaitMs: 0
|
|
2585
|
-
};
|
|
2586
|
-
return rateLimiterInstance.execute(fn);
|
|
2587
|
-
}
|
|
2588
|
-
|
|
2589
|
-
//#endregion
|
|
2590
|
-
//#region src/lib/history.ts
|
|
2591
|
-
function formatLocalTimestamp(ts) {
|
|
2592
|
-
const d = new Date(ts);
|
|
2593
|
-
return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")} ${String(d.getHours()).padStart(2, "0")}:${String(d.getMinutes()).padStart(2, "0")}:${String(d.getSeconds()).padStart(2, "0")}`;
|
|
2594
|
-
}
|
|
2595
|
-
const historyState = {
|
|
2596
|
-
enabled: false,
|
|
2597
|
-
entries: [],
|
|
2598
|
-
sessions: /* @__PURE__ */ new Map(),
|
|
2599
|
-
currentSessionId: "",
|
|
2600
|
-
maxEntries: 200
|
|
2601
|
-
};
|
|
2602
|
-
function initHistory(enabled, maxEntries) {
|
|
2603
|
-
historyState.enabled = enabled;
|
|
2604
|
-
historyState.maxEntries = maxEntries;
|
|
2605
|
-
historyState.entries = [];
|
|
2606
|
-
historyState.sessions = /* @__PURE__ */ new Map();
|
|
2607
|
-
historyState.currentSessionId = enabled ? generateId() : "";
|
|
2608
|
-
}
|
|
2609
|
-
function isHistoryEnabled() {
|
|
2610
|
-
return historyState.enabled;
|
|
2611
|
-
}
|
|
2612
|
-
function getCurrentSession(endpoint) {
|
|
2613
|
-
if (historyState.currentSessionId) {
|
|
2614
|
-
const session = historyState.sessions.get(historyState.currentSessionId);
|
|
2615
|
-
if (session) {
|
|
2616
|
-
session.lastActivity = Date.now();
|
|
2617
|
-
return historyState.currentSessionId;
|
|
2618
|
-
}
|
|
2619
|
-
}
|
|
2620
|
-
const now = Date.now();
|
|
2621
|
-
const sessionId = generateId();
|
|
2622
|
-
historyState.currentSessionId = sessionId;
|
|
2623
|
-
historyState.sessions.set(sessionId, {
|
|
2624
|
-
id: sessionId,
|
|
2625
|
-
startTime: now,
|
|
2626
|
-
lastActivity: now,
|
|
2627
|
-
requestCount: 0,
|
|
2628
|
-
totalInputTokens: 0,
|
|
2629
|
-
totalOutputTokens: 0,
|
|
2630
|
-
models: [],
|
|
2631
|
-
endpoint
|
|
2632
|
-
});
|
|
2633
|
-
return sessionId;
|
|
2634
|
-
}
|
|
2635
|
-
function recordRequest(endpoint, request) {
|
|
2636
|
-
if (!historyState.enabled) return "";
|
|
2637
|
-
const sessionId = getCurrentSession(endpoint);
|
|
2638
|
-
const session = historyState.sessions.get(sessionId);
|
|
2639
|
-
if (!session) return "";
|
|
2640
|
-
const entry = {
|
|
2641
|
-
id: generateId(),
|
|
2642
|
-
sessionId,
|
|
2643
|
-
timestamp: Date.now(),
|
|
2644
|
-
endpoint,
|
|
2645
|
-
request: {
|
|
2646
|
-
model: request.model,
|
|
2647
|
-
messages: request.messages,
|
|
2648
|
-
stream: request.stream,
|
|
2649
|
-
tools: request.tools,
|
|
2650
|
-
max_tokens: request.max_tokens,
|
|
2651
|
-
temperature: request.temperature,
|
|
2652
|
-
system: request.system
|
|
2653
|
-
}
|
|
2654
|
-
};
|
|
2655
|
-
historyState.entries.push(entry);
|
|
2656
|
-
session.requestCount++;
|
|
2657
|
-
if (!session.models.includes(request.model)) session.models.push(request.model);
|
|
2658
|
-
if (request.tools && request.tools.length > 0) {
|
|
2659
|
-
if (!session.toolsUsed) session.toolsUsed = [];
|
|
2660
|
-
for (const tool of request.tools) if (!session.toolsUsed.includes(tool.name)) session.toolsUsed.push(tool.name);
|
|
2661
2787
|
}
|
|
2662
|
-
|
|
2663
|
-
|
|
2664
|
-
|
|
2665
|
-
|
|
2788
|
+
/**
|
|
2789
|
+
* Execute in recovering mode - gradual speedup
|
|
2790
|
+
*/
|
|
2791
|
+
async executeInRecoveringMode(fn) {
|
|
2792
|
+
const startTime = Date.now();
|
|
2793
|
+
const currentInterval = this.config.gradualRecoverySteps[this.recoveryStepIndex] ?? 0;
|
|
2794
|
+
if (currentInterval > 0) {
|
|
2795
|
+
const elapsedMs = Date.now() - this.lastRequestTime;
|
|
2796
|
+
const requiredMs = currentInterval * 1e3;
|
|
2797
|
+
if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
|
|
2798
|
+
const waitMs = requiredMs - elapsedMs;
|
|
2799
|
+
await this.sleep(waitMs);
|
|
2800
|
+
}
|
|
2801
|
+
}
|
|
2802
|
+
this.lastRequestTime = Date.now();
|
|
2803
|
+
try {
|
|
2804
|
+
const result = await fn();
|
|
2805
|
+
this.recoveryStepIndex++;
|
|
2806
|
+
if (this.recoveryStepIndex >= this.config.gradualRecoverySteps.length) this.completeRecovery();
|
|
2807
|
+
else {
|
|
2808
|
+
const nextInterval = this.config.gradualRecoverySteps[this.recoveryStepIndex] ?? 0;
|
|
2809
|
+
consola.info(`[RateLimiter] Ramp-up step ${this.recoveryStepIndex}/${this.config.gradualRecoverySteps.length} (next interval: ${nextInterval}s)`);
|
|
2810
|
+
}
|
|
2811
|
+
return {
|
|
2812
|
+
result,
|
|
2813
|
+
queueWaitMs: Date.now() - startTime
|
|
2814
|
+
};
|
|
2815
|
+
} catch (error) {
|
|
2816
|
+
const { isRateLimit, retryAfter } = this.isRateLimitError(error);
|
|
2817
|
+
if (isRateLimit) {
|
|
2818
|
+
consola.warn("[RateLimiter] Hit rate limit during ramp-up, returning to rate-limited mode");
|
|
2819
|
+
this.enterRateLimitedMode();
|
|
2820
|
+
return this.enqueue(fn, retryAfter);
|
|
2821
|
+
}
|
|
2822
|
+
throw error;
|
|
2666
2823
|
}
|
|
2667
2824
|
}
|
|
2668
|
-
|
|
2669
|
-
|
|
2670
|
-
|
|
2671
|
-
|
|
2672
|
-
|
|
2673
|
-
|
|
2674
|
-
|
|
2675
|
-
|
|
2676
|
-
|
|
2677
|
-
|
|
2678
|
-
|
|
2679
|
-
|
|
2680
|
-
|
|
2681
|
-
|
|
2825
|
+
/**
|
|
2826
|
+
* Enter rate-limited mode
|
|
2827
|
+
*/
|
|
2828
|
+
enterRateLimitedMode() {
|
|
2829
|
+
if (this.mode === "rate-limited") return;
|
|
2830
|
+
this.mode = "rate-limited";
|
|
2831
|
+
this.rateLimitedAt = Date.now();
|
|
2832
|
+
this.consecutiveSuccesses = 0;
|
|
2833
|
+
consola.warn(`[RateLimiter] Entering rate-limited mode. Requests will be queued with exponential backoff (base: ${this.config.baseRetryIntervalSeconds}s).`);
|
|
2834
|
+
}
|
|
2835
|
+
/**
|
|
2836
|
+
* Check if we should try to recover to normal mode
|
|
2837
|
+
*/
|
|
2838
|
+
shouldAttemptRecovery() {
|
|
2839
|
+
if (this.consecutiveSuccesses >= this.config.consecutiveSuccessesForRecovery) {
|
|
2840
|
+
consola.info(`[RateLimiter] ${this.consecutiveSuccesses} consecutive successes. Starting ramp-up.`);
|
|
2841
|
+
return true;
|
|
2682
2842
|
}
|
|
2683
|
-
|
|
2843
|
+
if (this.rateLimitedAt) {
|
|
2844
|
+
if (Date.now() - this.rateLimitedAt >= this.config.recoveryTimeoutMinutes * 60 * 1e3) {
|
|
2845
|
+
consola.info(`[RateLimiter] ${this.config.recoveryTimeoutMinutes} minutes elapsed. Starting ramp-up.`);
|
|
2846
|
+
return true;
|
|
2847
|
+
}
|
|
2848
|
+
}
|
|
2849
|
+
return false;
|
|
2684
2850
|
}
|
|
2685
|
-
|
|
2686
|
-
|
|
2687
|
-
|
|
2688
|
-
|
|
2689
|
-
|
|
2690
|
-
|
|
2691
|
-
|
|
2692
|
-
|
|
2851
|
+
/**
|
|
2852
|
+
* Start gradual recovery mode
|
|
2853
|
+
*/
|
|
2854
|
+
startGradualRecovery() {
|
|
2855
|
+
this.mode = "recovering";
|
|
2856
|
+
this.recoveryStepIndex = 0;
|
|
2857
|
+
this.rateLimitedAt = null;
|
|
2858
|
+
this.consecutiveSuccesses = 0;
|
|
2859
|
+
const firstInterval = this.config.gradualRecoverySteps[0] ?? 0;
|
|
2860
|
+
consola.info(`[RateLimiter] Starting ramp-up (${this.config.gradualRecoverySteps.length} steps, first interval: ${firstInterval}s)`);
|
|
2693
2861
|
}
|
|
2694
|
-
|
|
2695
|
-
|
|
2696
|
-
|
|
2697
|
-
|
|
2698
|
-
|
|
2699
|
-
|
|
2700
|
-
|
|
2701
|
-
filtered = filtered.filter((e) => e.request.model.toLowerCase().includes(modelLower) || e.response?.model.toLowerCase().includes(modelLower));
|
|
2862
|
+
/**
|
|
2863
|
+
* Complete recovery to normal mode
|
|
2864
|
+
*/
|
|
2865
|
+
completeRecovery() {
|
|
2866
|
+
this.mode = "normal";
|
|
2867
|
+
this.recoveryStepIndex = 0;
|
|
2868
|
+
consola.success("[RateLimiter] Exiting rate-limited mode.");
|
|
2702
2869
|
}
|
|
2703
|
-
|
|
2704
|
-
|
|
2705
|
-
|
|
2706
|
-
|
|
2707
|
-
|
|
2708
|
-
|
|
2709
|
-
|
|
2710
|
-
|
|
2711
|
-
|
|
2712
|
-
|
|
2713
|
-
|
|
2714
|
-
|
|
2715
|
-
|
|
2716
|
-
|
|
2717
|
-
|
|
2718
|
-
|
|
2719
|
-
|
|
2720
|
-
|
|
2721
|
-
if ((typeof c.input === "string" ? c.input : JSON.stringify(c.input)).toLowerCase().includes(searchLower)) return true;
|
|
2722
|
-
}
|
|
2723
|
-
}
|
|
2724
|
-
if (c.type === "tool_result" && c.content) {
|
|
2725
|
-
if ((typeof c.content === "string" ? c.content : JSON.stringify(c.content)).toLowerCase().includes(searchLower)) return true;
|
|
2726
|
-
}
|
|
2727
|
-
if (c.type === "thinking") {
|
|
2728
|
-
const thinking = c.thinking;
|
|
2729
|
-
if (thinking && thinking.toLowerCase().includes(searchLower)) return true;
|
|
2730
|
-
}
|
|
2731
|
-
return false;
|
|
2732
|
-
});
|
|
2733
|
-
return false;
|
|
2734
|
-
})) return true;
|
|
2735
|
-
if (e.response?.content) {
|
|
2736
|
-
const rc = e.response.content;
|
|
2737
|
-
if (typeof rc.content === "string" && rc.content.toLowerCase().includes(searchLower)) return true;
|
|
2738
|
-
if (Array.isArray(rc.content)) {
|
|
2739
|
-
if (rc.content.some((c) => {
|
|
2740
|
-
if (c.text && c.text.toLowerCase().includes(searchLower)) return true;
|
|
2741
|
-
if (c.type === "tool_use" && c.name && c.name.toLowerCase().includes(searchLower)) return true;
|
|
2742
|
-
if (c.type === "thinking" && c.thinking && c.thinking.toLowerCase().includes(searchLower)) return true;
|
|
2743
|
-
return false;
|
|
2744
|
-
})) return true;
|
|
2745
|
-
}
|
|
2870
|
+
/**
|
|
2871
|
+
* Enqueue a request for later execution
|
|
2872
|
+
*/
|
|
2873
|
+
enqueue(fn, retryAfterSeconds) {
|
|
2874
|
+
return new Promise((resolve, reject) => {
|
|
2875
|
+
const request = {
|
|
2876
|
+
execute: fn,
|
|
2877
|
+
resolve,
|
|
2878
|
+
reject,
|
|
2879
|
+
retryCount: 0,
|
|
2880
|
+
retryAfterSeconds,
|
|
2881
|
+
enqueuedAt: Date.now()
|
|
2882
|
+
};
|
|
2883
|
+
this.queue.push(request);
|
|
2884
|
+
if (this.queue.length > 1) {
|
|
2885
|
+
const position = this.queue.length;
|
|
2886
|
+
const estimatedWait = (position - 1) * this.config.requestIntervalSeconds;
|
|
2887
|
+
consola.info(`[RateLimiter] Request queued (position ${position}, ~${estimatedWait}s wait)`);
|
|
2746
2888
|
}
|
|
2747
|
-
|
|
2748
|
-
return false;
|
|
2889
|
+
this.processQueue();
|
|
2749
2890
|
});
|
|
2750
2891
|
}
|
|
2751
|
-
|
|
2752
|
-
|
|
2753
|
-
|
|
2754
|
-
|
|
2755
|
-
|
|
2756
|
-
|
|
2757
|
-
|
|
2758
|
-
|
|
2759
|
-
|
|
2760
|
-
|
|
2761
|
-
|
|
2762
|
-
|
|
2763
|
-
|
|
2764
|
-
|
|
2765
|
-
|
|
2766
|
-
|
|
2767
|
-
|
|
2768
|
-
|
|
2769
|
-
|
|
2770
|
-
|
|
2771
|
-
|
|
2772
|
-
|
|
2773
|
-
|
|
2774
|
-
|
|
2775
|
-
}
|
|
2776
|
-
|
|
2777
|
-
|
|
2778
|
-
|
|
2779
|
-
|
|
2780
|
-
|
|
2781
|
-
|
|
2782
|
-
|
|
2783
|
-
|
|
2784
|
-
|
|
2785
|
-
|
|
2786
|
-
|
|
2787
|
-
|
|
2788
|
-
|
|
2789
|
-
|
|
2790
|
-
|
|
2791
|
-
|
|
2792
|
-
|
|
2793
|
-
|
|
2794
|
-
|
|
2795
|
-
|
|
2796
|
-
|
|
2797
|
-
|
|
2798
|
-
|
|
2799
|
-
|
|
2800
|
-
|
|
2801
|
-
|
|
2802
|
-
|
|
2803
|
-
const model = entry.response?.model || entry.request.model;
|
|
2804
|
-
modelDist[model] = (modelDist[model] || 0) + 1;
|
|
2805
|
-
endpointDist[entry.endpoint] = (endpointDist[entry.endpoint] || 0) + 1;
|
|
2806
|
-
const d = new Date(entry.timestamp);
|
|
2807
|
-
const hour = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}T${String(d.getHours()).padStart(2, "0")}`;
|
|
2808
|
-
hourlyActivity[hour] = (hourlyActivity[hour] || 0) + 1;
|
|
2809
|
-
if (entry.response) {
|
|
2810
|
-
if (entry.response.success) successCount++;
|
|
2811
|
-
else failCount++;
|
|
2812
|
-
totalInput += entry.response.usage.input_tokens;
|
|
2813
|
-
totalOutput += entry.response.usage.output_tokens;
|
|
2814
|
-
}
|
|
2815
|
-
if (entry.durationMs) {
|
|
2816
|
-
totalDuration += entry.durationMs;
|
|
2817
|
-
durationCount++;
|
|
2892
|
+
/**
|
|
2893
|
+
* Calculate retry interval with exponential backoff
|
|
2894
|
+
*/
|
|
2895
|
+
calculateRetryInterval(request) {
|
|
2896
|
+
if (request.retryAfterSeconds !== void 0 && request.retryAfterSeconds > 0) return request.retryAfterSeconds;
|
|
2897
|
+
const backoff = this.config.baseRetryIntervalSeconds * Math.pow(2, request.retryCount);
|
|
2898
|
+
return Math.min(backoff, this.config.maxRetryIntervalSeconds);
|
|
2899
|
+
}
|
|
2900
|
+
/**
|
|
2901
|
+
* Process the queue
|
|
2902
|
+
*/
|
|
2903
|
+
async processQueue() {
|
|
2904
|
+
if (this.processing) return;
|
|
2905
|
+
this.processing = true;
|
|
2906
|
+
while (this.queue.length > 0) {
|
|
2907
|
+
const request = this.queue[0];
|
|
2908
|
+
if (this.shouldAttemptRecovery()) this.startGradualRecovery();
|
|
2909
|
+
const elapsedMs = Date.now() - this.lastRequestTime;
|
|
2910
|
+
const requiredMs = (request.retryCount > 0 ? this.calculateRetryInterval(request) : this.config.requestIntervalSeconds) * 1e3;
|
|
2911
|
+
if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
|
|
2912
|
+
const waitMs = requiredMs - elapsedMs;
|
|
2913
|
+
const waitSec = Math.ceil(waitMs / 1e3);
|
|
2914
|
+
consola.info(`[RateLimiter] Waiting ${waitSec}s before next request...`);
|
|
2915
|
+
await this.sleep(waitMs);
|
|
2916
|
+
}
|
|
2917
|
+
this.lastRequestTime = Date.now();
|
|
2918
|
+
try {
|
|
2919
|
+
const result = await request.execute();
|
|
2920
|
+
this.queue.shift();
|
|
2921
|
+
this.consecutiveSuccesses++;
|
|
2922
|
+
request.retryAfterSeconds = void 0;
|
|
2923
|
+
const queueWaitMs = Date.now() - request.enqueuedAt;
|
|
2924
|
+
request.resolve({
|
|
2925
|
+
result,
|
|
2926
|
+
queueWaitMs
|
|
2927
|
+
});
|
|
2928
|
+
if (this.mode === "rate-limited") consola.info(`[RateLimiter] Request succeeded (${this.consecutiveSuccesses}/${this.config.consecutiveSuccessesForRecovery} for ramp-up)`);
|
|
2929
|
+
} catch (error) {
|
|
2930
|
+
const { isRateLimit, retryAfter } = this.isRateLimitError(error);
|
|
2931
|
+
if (isRateLimit) {
|
|
2932
|
+
request.retryCount++;
|
|
2933
|
+
request.retryAfterSeconds = retryAfter;
|
|
2934
|
+
this.consecutiveSuccesses = 0;
|
|
2935
|
+
this.rateLimitedAt = Date.now();
|
|
2936
|
+
const nextInterval = this.calculateRetryInterval(request);
|
|
2937
|
+
const source = retryAfter ? "server Retry-After" : "exponential backoff";
|
|
2938
|
+
consola.warn(`[RateLimiter] Request failed with 429 (retry #${request.retryCount}). Retrying in ${nextInterval}s (${source})...`);
|
|
2939
|
+
} else {
|
|
2940
|
+
this.queue.shift();
|
|
2941
|
+
request.reject(error);
|
|
2942
|
+
}
|
|
2943
|
+
}
|
|
2818
2944
|
}
|
|
2945
|
+
this.processing = false;
|
|
2819
2946
|
}
|
|
2820
|
-
|
|
2821
|
-
|
|
2822
|
-
|
|
2823
|
-
|
|
2824
|
-
|
|
2825
|
-
|
|
2826
|
-
|
|
2827
|
-
|
|
2828
|
-
|
|
2829
|
-
|
|
2830
|
-
|
|
2831
|
-
|
|
2832
|
-
|
|
2833
|
-
|
|
2834
|
-
|
|
2835
|
-
|
|
2947
|
+
sleep(ms) {
|
|
2948
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
2949
|
+
}
|
|
2950
|
+
/**
|
|
2951
|
+
* Get current status for debugging/monitoring
|
|
2952
|
+
*/
|
|
2953
|
+
getStatus() {
|
|
2954
|
+
return {
|
|
2955
|
+
mode: this.mode,
|
|
2956
|
+
queueLength: this.queue.length,
|
|
2957
|
+
consecutiveSuccesses: this.consecutiveSuccesses,
|
|
2958
|
+
rateLimitedAt: this.rateLimitedAt
|
|
2959
|
+
};
|
|
2960
|
+
}
|
|
2961
|
+
};
|
|
2962
|
+
let rateLimiterInstance = null;
|
|
2963
|
+
/**
|
|
2964
|
+
* Initialize the adaptive rate limiter with configuration
|
|
2965
|
+
*/
|
|
2966
|
+
function initAdaptiveRateLimiter(config = {}) {
|
|
2967
|
+
rateLimiterInstance = new AdaptiveRateLimiter(config);
|
|
2968
|
+
const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG.baseRetryIntervalSeconds;
|
|
2969
|
+
const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG.maxRetryIntervalSeconds;
|
|
2970
|
+
const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG.requestIntervalSeconds;
|
|
2971
|
+
const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG.recoveryTimeoutMinutes;
|
|
2972
|
+
const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG.consecutiveSuccessesForRecovery;
|
|
2973
|
+
const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG.gradualRecoverySteps;
|
|
2974
|
+
consola.info(`[RateLimiter] Initialized (backoff: ${baseRetry}s-${maxRetry}s, interval: ${interval}s, recovery: ${recovery}min or ${successes} successes, gradual: [${steps.join("s, ")}s])`);
|
|
2836
2975
|
}
|
|
2837
|
-
|
|
2838
|
-
|
|
2839
|
-
|
|
2840
|
-
|
|
2841
|
-
|
|
2842
|
-
|
|
2843
|
-
|
|
2844
|
-
|
|
2845
|
-
|
|
2846
|
-
|
|
2847
|
-
|
|
2848
|
-
"message_count",
|
|
2849
|
-
"stream",
|
|
2850
|
-
"success",
|
|
2851
|
-
"response_model",
|
|
2852
|
-
"input_tokens",
|
|
2853
|
-
"output_tokens",
|
|
2854
|
-
"duration_ms",
|
|
2855
|
-
"stop_reason",
|
|
2856
|
-
"error"
|
|
2857
|
-
];
|
|
2858
|
-
const rows = historyState.entries.map((e) => [
|
|
2859
|
-
e.id,
|
|
2860
|
-
e.sessionId,
|
|
2861
|
-
formatLocalTimestamp(e.timestamp),
|
|
2862
|
-
e.endpoint,
|
|
2863
|
-
e.request.model,
|
|
2864
|
-
e.request.messages.length,
|
|
2865
|
-
e.request.stream,
|
|
2866
|
-
e.response?.success ?? "",
|
|
2867
|
-
e.response?.model ?? "",
|
|
2868
|
-
e.response?.usage.input_tokens ?? "",
|
|
2869
|
-
e.response?.usage.output_tokens ?? "",
|
|
2870
|
-
e.durationMs ?? "",
|
|
2871
|
-
e.response?.stop_reason ?? "",
|
|
2872
|
-
e.response?.error ?? ""
|
|
2873
|
-
]);
|
|
2874
|
-
return [headers.join(","), ...rows.map((r) => r.join(","))].join("\n");
|
|
2976
|
+
/**
|
|
2977
|
+
* Execute a request with adaptive rate limiting.
|
|
2978
|
+
* If rate limiter is not initialized, executes immediately.
|
|
2979
|
+
* Returns the result along with queue wait time.
|
|
2980
|
+
*/
|
|
2981
|
+
async function executeWithAdaptiveRateLimit(fn) {
|
|
2982
|
+
if (!rateLimiterInstance) return {
|
|
2983
|
+
result: await fn(),
|
|
2984
|
+
queueWaitMs: 0
|
|
2985
|
+
};
|
|
2986
|
+
return rateLimiterInstance.execute(fn);
|
|
2875
2987
|
}
|
|
2876
2988
|
|
|
2877
2989
|
//#endregion
|
|
2878
|
-
//#region src/lib/proxy.ts
|
|
2990
|
+
//#region src/lib/config/proxy.ts
|
|
2879
2991
|
/**
|
|
2880
2992
|
* Custom dispatcher that routes requests through proxies based on environment variables.
|
|
2881
2993
|
* Extends Agent to properly inherit the Dispatcher interface.
|
|
@@ -3700,7 +3812,7 @@ function sanitizeOpenAIMessages(payload) {
|
|
|
3700
3812
|
}
|
|
3701
3813
|
|
|
3702
3814
|
//#endregion
|
|
3703
|
-
//#region src/lib/tokenizer.ts
|
|
3815
|
+
//#region src/lib/models/tokenizer.ts
|
|
3704
3816
|
const ENCODING_MAP = {
|
|
3705
3817
|
o200k_base: () => import("gpt-tokenizer/encoding/o200k_base"),
|
|
3706
3818
|
cl100k_base: () => import("gpt-tokenizer/encoding/cl100k_base"),
|
|
@@ -4278,7 +4390,7 @@ function createTruncationResponseMarkerOpenAI(result) {
|
|
|
4278
4390
|
}
|
|
4279
4391
|
|
|
4280
4392
|
//#endregion
|
|
4281
|
-
//#region src/lib/
|
|
4393
|
+
//#region src/lib/models/resolver.ts
|
|
4282
4394
|
/**
|
|
4283
4395
|
* Unified model name resolution and normalization.
|
|
4284
4396
|
*
|
|
@@ -4397,11 +4509,77 @@ const createChatCompletions = async (payload) => {
|
|
|
4397
4509
|
};
|
|
4398
4510
|
|
|
4399
4511
|
//#endregion
|
|
4400
|
-
//#region src/routes/shared.ts
|
|
4512
|
+
//#region src/routes/shared/payload.ts
|
|
4513
|
+
/**
|
|
4514
|
+
* Payload utilities for request handlers.
|
|
4515
|
+
*/
|
|
4516
|
+
/** Build final payload with sanitization (no pre-truncation — truncation is now reactive) */
|
|
4517
|
+
function buildFinalPayload(payload, _model) {
|
|
4518
|
+
const { payload: sanitizedPayload, removedCount: sanitizeRemovedCount, systemReminderRemovals } = sanitizeOpenAIMessages(payload);
|
|
4519
|
+
return {
|
|
4520
|
+
finalPayload: sanitizedPayload,
|
|
4521
|
+
truncateResult: null,
|
|
4522
|
+
sanitizeRemovedCount,
|
|
4523
|
+
systemReminderRemovals
|
|
4524
|
+
};
|
|
4525
|
+
}
|
|
4401
4526
|
/**
|
|
4402
|
-
*
|
|
4403
|
-
*
|
|
4527
|
+
* Log helpful debugging information when a 413 error occurs.
|
|
4528
|
+
* Also adjusts the dynamic byte limit for future requests.
|
|
4404
4529
|
*/
|
|
4530
|
+
async function logPayloadSizeInfo(payload, model) {
|
|
4531
|
+
const messageCount = payload.messages.length;
|
|
4532
|
+
const bodySize = JSON.stringify(payload).length;
|
|
4533
|
+
const bodySizeKB = bytesToKB(bodySize);
|
|
4534
|
+
onRequestTooLarge(bodySize);
|
|
4535
|
+
let imageCount = 0;
|
|
4536
|
+
let largeMessages = 0;
|
|
4537
|
+
let totalImageSize = 0;
|
|
4538
|
+
for (const msg of payload.messages) {
|
|
4539
|
+
if (Array.isArray(msg.content)) {
|
|
4540
|
+
for (const part of msg.content) if (part.type === "image_url") {
|
|
4541
|
+
imageCount++;
|
|
4542
|
+
if (part.image_url.url.startsWith("data:")) totalImageSize += part.image_url.url.length;
|
|
4543
|
+
}
|
|
4544
|
+
}
|
|
4545
|
+
if ((typeof msg.content === "string" ? msg.content.length : JSON.stringify(msg.content).length) > 5e4) largeMessages++;
|
|
4546
|
+
}
|
|
4547
|
+
consola.info("");
|
|
4548
|
+
consola.info("╭─────────────────────────────────────────────────────────╮");
|
|
4549
|
+
consola.info("│ 413 Request Entity Too Large │");
|
|
4550
|
+
consola.info("╰─────────────────────────────────────────────────────────╯");
|
|
4551
|
+
consola.info("");
|
|
4552
|
+
consola.info(` Request body size: ${bodySizeKB} KB (${bodySize.toLocaleString()} bytes)`);
|
|
4553
|
+
consola.info(` Message count: ${messageCount}`);
|
|
4554
|
+
if (model) try {
|
|
4555
|
+
const tokenCount = await getTokenCount(payload, model);
|
|
4556
|
+
const limit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
|
|
4557
|
+
consola.info(` Estimated tokens: ${tokenCount.input.toLocaleString()} / ${limit.toLocaleString()}`);
|
|
4558
|
+
} catch (error) {
|
|
4559
|
+
consola.debug("Token count estimation failed:", error);
|
|
4560
|
+
}
|
|
4561
|
+
if (imageCount > 0) {
|
|
4562
|
+
const imageSizeKB = bytesToKB(totalImageSize);
|
|
4563
|
+
consola.info(` Images: ${imageCount} (${imageSizeKB} KB base64 data)`);
|
|
4564
|
+
}
|
|
4565
|
+
if (largeMessages > 0) consola.info(` Large messages (>50KB): ${largeMessages}`);
|
|
4566
|
+
consola.info("");
|
|
4567
|
+
consola.info(" Suggestions:");
|
|
4568
|
+
if (imageCount > 0) consola.info(" • Remove or resize large images in the conversation");
|
|
4569
|
+
consola.info(" • Start a new conversation with /clear or /reset");
|
|
4570
|
+
consola.info(" • Reduce conversation history by deleting old messages");
|
|
4571
|
+
consola.info("");
|
|
4572
|
+
}
|
|
4573
|
+
|
|
4574
|
+
//#endregion
|
|
4575
|
+
//#region src/routes/shared/response.ts
|
|
4576
|
+
/** Type guard for non-streaming responses */
|
|
4577
|
+
function isNonStreaming(response) {
|
|
4578
|
+
return Object.hasOwn(response, "choices");
|
|
4579
|
+
}
|
|
4580
|
+
|
|
4581
|
+
//#endregion
|
|
4582
|
+
//#region src/routes/shared/tracking.ts
|
|
4405
4583
|
/** Helper to update tracker model */
|
|
4406
4584
|
function updateTrackerModel(trackingId, model) {
|
|
4407
4585
|
if (!trackingId) return;
|
|
@@ -4413,40 +4591,6 @@ function updateTrackerStatus(trackingId, status) {
|
|
|
4413
4591
|
if (!trackingId) return;
|
|
4414
4592
|
requestTracker.updateRequest(trackingId, { status });
|
|
4415
4593
|
}
|
|
4416
|
-
/** Record error response to history, preserving full error details for debugging */
|
|
4417
|
-
function recordErrorResponse(ctx, model, error) {
|
|
4418
|
-
const errorMessage = getErrorMessage(error);
|
|
4419
|
-
let content = null;
|
|
4420
|
-
if (error instanceof Error && "responseText" in error && typeof error.responseText === "string") {
|
|
4421
|
-
const responseText = error.responseText;
|
|
4422
|
-
const status = "status" in error ? error.status : void 0;
|
|
4423
|
-
if (responseText) {
|
|
4424
|
-
let formattedBody;
|
|
4425
|
-
try {
|
|
4426
|
-
formattedBody = JSON.stringify(JSON.parse(responseText), null, 2);
|
|
4427
|
-
} catch {
|
|
4428
|
-
formattedBody = responseText;
|
|
4429
|
-
}
|
|
4430
|
-
content = {
|
|
4431
|
-
role: "assistant",
|
|
4432
|
-
content: [{
|
|
4433
|
-
type: "text",
|
|
4434
|
-
text: `[API Error Response${status ? ` - HTTP ${status}` : ""}]\n\n${formattedBody}`
|
|
4435
|
-
}]
|
|
4436
|
-
};
|
|
4437
|
-
}
|
|
4438
|
-
}
|
|
4439
|
-
recordResponse(ctx.historyId, {
|
|
4440
|
-
success: false,
|
|
4441
|
-
model,
|
|
4442
|
-
usage: {
|
|
4443
|
-
input_tokens: 0,
|
|
4444
|
-
output_tokens: 0
|
|
4445
|
-
},
|
|
4446
|
-
error: errorMessage,
|
|
4447
|
-
content
|
|
4448
|
-
}, Date.now() - ctx.startTime);
|
|
4449
|
-
}
|
|
4450
4594
|
/** Complete TUI tracking */
|
|
4451
4595
|
function completeTracking(trackingId, inputTokens, outputTokens, queueWaitMs) {
|
|
4452
4596
|
if (!trackingId) return;
|
|
@@ -4465,16 +4609,39 @@ function failTracking(trackingId, error) {
|
|
|
4465
4609
|
if (!trackingId) return;
|
|
4466
4610
|
requestTracker.failRequest(trackingId, getErrorMessage(error, "Stream error"));
|
|
4467
4611
|
}
|
|
4468
|
-
/**
|
|
4469
|
-
|
|
4470
|
-
|
|
4471
|
-
|
|
4472
|
-
|
|
4473
|
-
|
|
4474
|
-
|
|
4475
|
-
|
|
4476
|
-
|
|
4477
|
-
|
|
4612
|
+
/** Record error response to history, preserving full error details for debugging */
|
|
4613
|
+
function recordErrorResponse(ctx, model, error) {
|
|
4614
|
+
const errorMessage = getErrorMessage(error);
|
|
4615
|
+
let content = null;
|
|
4616
|
+
if (error instanceof Error && "responseText" in error && typeof error.responseText === "string") {
|
|
4617
|
+
const responseText = error.responseText;
|
|
4618
|
+
const status = "status" in error ? error.status : void 0;
|
|
4619
|
+
if (responseText) {
|
|
4620
|
+
let formattedBody;
|
|
4621
|
+
try {
|
|
4622
|
+
formattedBody = JSON.stringify(JSON.parse(responseText), null, 2);
|
|
4623
|
+
} catch {
|
|
4624
|
+
formattedBody = responseText;
|
|
4625
|
+
}
|
|
4626
|
+
content = {
|
|
4627
|
+
role: "assistant",
|
|
4628
|
+
content: [{
|
|
4629
|
+
type: "text",
|
|
4630
|
+
text: `[API Error Response${status ? ` - HTTP ${status}` : ""}]\n\n${formattedBody}`
|
|
4631
|
+
}]
|
|
4632
|
+
};
|
|
4633
|
+
}
|
|
4634
|
+
}
|
|
4635
|
+
recordResponse(ctx.historyId, {
|
|
4636
|
+
success: false,
|
|
4637
|
+
model,
|
|
4638
|
+
usage: {
|
|
4639
|
+
input_tokens: 0,
|
|
4640
|
+
output_tokens: 0
|
|
4641
|
+
},
|
|
4642
|
+
error: errorMessage,
|
|
4643
|
+
content
|
|
4644
|
+
}, Date.now() - ctx.startTime);
|
|
4478
4645
|
}
|
|
4479
4646
|
/** Record streaming error to history, preserving any data accumulated before the error */
|
|
4480
4647
|
function recordStreamError(opts) {
|
|
@@ -4496,66 +4663,168 @@ function recordStreamError(opts) {
|
|
|
4496
4663
|
} : null
|
|
4497
4664
|
}, Date.now() - ctx.startTime);
|
|
4498
4665
|
}
|
|
4499
|
-
|
|
4500
|
-
|
|
4501
|
-
|
|
4502
|
-
|
|
4503
|
-
|
|
4504
|
-
|
|
4505
|
-
|
|
4506
|
-
|
|
4507
|
-
|
|
4508
|
-
|
|
4509
|
-
|
|
4510
|
-
|
|
4511
|
-
}
|
|
4666
|
+
|
|
4667
|
+
//#endregion
|
|
4668
|
+
//#region src/routes/shared/truncation.ts
|
|
4669
|
+
/**
|
|
4670
|
+
* Create a marker to prepend to responses indicating auto-truncation occurred.
|
|
4671
|
+
* Works with both OpenAI and Anthropic truncate results.
|
|
4672
|
+
*/
|
|
4673
|
+
function createTruncationMarker$1(result) {
|
|
4674
|
+
if (!result.wasCompacted) return "";
|
|
4675
|
+
const { originalTokens, compactedTokens, removedMessageCount } = result;
|
|
4676
|
+
if (originalTokens === void 0 || compactedTokens === void 0 || removedMessageCount === void 0) return `\n\n---\n[Auto-truncated: conversation history was reduced to fit context limits]`;
|
|
4677
|
+
const reduction = originalTokens - compactedTokens;
|
|
4678
|
+
return `\n\n---\n[Auto-truncated: ${removedMessageCount} messages removed, ${originalTokens} → ${compactedTokens} tokens (${Math.round(reduction / originalTokens * 100)}% reduction)]`;
|
|
4512
4679
|
}
|
|
4680
|
+
|
|
4681
|
+
//#endregion
|
|
4682
|
+
//#region src/routes/shared/pipeline.ts
|
|
4513
4683
|
/**
|
|
4514
|
-
*
|
|
4515
|
-
*
|
|
4684
|
+
* Request execution pipeline with pluggable retry strategies.
|
|
4685
|
+
*
|
|
4686
|
+
* Unifies the retry loop pattern shared by direct-anthropic-handler,
|
|
4687
|
+
* translated-handler, and (soon) completions handler.
|
|
4516
4688
|
*/
|
|
4517
|
-
|
|
4518
|
-
|
|
4519
|
-
|
|
4520
|
-
|
|
4521
|
-
|
|
4522
|
-
|
|
4523
|
-
|
|
4524
|
-
|
|
4525
|
-
|
|
4526
|
-
|
|
4527
|
-
|
|
4528
|
-
|
|
4529
|
-
|
|
4689
|
+
/**
|
|
4690
|
+
* Execute a request through the pipeline with retry strategies.
|
|
4691
|
+
*
|
|
4692
|
+
* Flow:
|
|
4693
|
+
* 1. Execute API call with the current payload
|
|
4694
|
+
* 2. On success → return response
|
|
4695
|
+
* 3. On failure → classify error → find first matching strategy → handle
|
|
4696
|
+
* - retry → use new payload, loop back to step 1
|
|
4697
|
+
* - abort or no strategy → throw error
|
|
4698
|
+
*/
|
|
4699
|
+
async function executeRequestPipeline(opts) {
|
|
4700
|
+
const { adapter, strategies, originalPayload, model, maxRetries = 3, onBeforeAttempt, onRetry } = opts;
|
|
4701
|
+
let effectivePayload = opts.payload;
|
|
4702
|
+
let lastError = null;
|
|
4703
|
+
let totalQueueWaitMs = 0;
|
|
4704
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
4705
|
+
onBeforeAttempt?.(attempt, effectivePayload);
|
|
4706
|
+
try {
|
|
4707
|
+
const { result: response, queueWaitMs } = await adapter.execute(effectivePayload);
|
|
4708
|
+
totalQueueWaitMs += queueWaitMs;
|
|
4709
|
+
return {
|
|
4710
|
+
response,
|
|
4711
|
+
effectivePayload,
|
|
4712
|
+
queueWaitMs: totalQueueWaitMs,
|
|
4713
|
+
totalRetries: attempt
|
|
4714
|
+
};
|
|
4715
|
+
} catch (error) {
|
|
4716
|
+
lastError = error;
|
|
4717
|
+
if (attempt >= maxRetries) break;
|
|
4718
|
+
const apiError = classifyError(error);
|
|
4719
|
+
let handled = false;
|
|
4720
|
+
for (const strategy of strategies) {
|
|
4721
|
+
if (!strategy.canHandle(apiError)) continue;
|
|
4722
|
+
const retryContext = {
|
|
4723
|
+
attempt,
|
|
4724
|
+
originalPayload,
|
|
4725
|
+
model,
|
|
4726
|
+
maxRetries
|
|
4727
|
+
};
|
|
4728
|
+
try {
|
|
4729
|
+
const action = await strategy.handle(apiError, effectivePayload, retryContext);
|
|
4730
|
+
if (action.action === "retry") {
|
|
4731
|
+
consola.debug(`[Pipeline] Strategy "${strategy.name}" requests retry (attempt ${attempt + 1}/${maxRetries + 1})`);
|
|
4732
|
+
if (action.waitMs && action.waitMs > 0) totalQueueWaitMs += action.waitMs;
|
|
4733
|
+
effectivePayload = action.payload;
|
|
4734
|
+
onRetry?.(attempt, strategy.name, action.payload, action.meta);
|
|
4735
|
+
handled = true;
|
|
4736
|
+
break;
|
|
4737
|
+
}
|
|
4738
|
+
break;
|
|
4739
|
+
} catch (strategyError) {
|
|
4740
|
+
consola.warn(`[Pipeline] Strategy "${strategy.name}" failed on attempt ${attempt + 1}:`, strategyError instanceof Error ? strategyError.message : strategyError);
|
|
4741
|
+
break;
|
|
4742
|
+
}
|
|
4530
4743
|
}
|
|
4744
|
+
if (!handled) break;
|
|
4531
4745
|
}
|
|
4532
|
-
if ((typeof msg.content === "string" ? msg.content.length : JSON.stringify(msg.content).length) > 5e4) largeMessages++;
|
|
4533
|
-
}
|
|
4534
|
-
consola.info("");
|
|
4535
|
-
consola.info("╭─────────────────────────────────────────────────────────╮");
|
|
4536
|
-
consola.info("│ 413 Request Entity Too Large │");
|
|
4537
|
-
consola.info("╰─────────────────────────────────────────────────────────╯");
|
|
4538
|
-
consola.info("");
|
|
4539
|
-
consola.info(` Request body size: ${bodySizeKB} KB (${bodySize.toLocaleString()} bytes)`);
|
|
4540
|
-
consola.info(` Message count: ${messageCount}`);
|
|
4541
|
-
if (model) try {
|
|
4542
|
-
const tokenCount = await getTokenCount(payload, model);
|
|
4543
|
-
const limit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
|
|
4544
|
-
consola.info(` Estimated tokens: ${tokenCount.input.toLocaleString()} / ${limit.toLocaleString()}`);
|
|
4545
|
-
} catch (error) {
|
|
4546
|
-
consola.debug("Token count estimation failed:", error);
|
|
4547
4746
|
}
|
|
4548
|
-
if (
|
|
4549
|
-
|
|
4550
|
-
|
|
4747
|
+
if (lastError) {
|
|
4748
|
+
if (classifyError(lastError).type === "payload_too_large") await adapter.logPayloadSize(effectivePayload);
|
|
4749
|
+
throw lastError instanceof Error ? lastError : /* @__PURE__ */ new Error("Unknown error");
|
|
4551
4750
|
}
|
|
4552
|
-
|
|
4553
|
-
|
|
4554
|
-
|
|
4555
|
-
|
|
4556
|
-
|
|
4557
|
-
|
|
4558
|
-
|
|
4751
|
+
throw new Error("Unexpected state in pipeline retry loop");
|
|
4752
|
+
}
|
|
4753
|
+
|
|
4754
|
+
//#endregion
|
|
4755
|
+
//#region src/routes/shared/strategies/auto-truncate.ts
|
|
4756
|
+
/**
|
|
4757
|
+
* Auto-truncate retry strategy.
|
|
4758
|
+
*
|
|
4759
|
+
* Handles 413 (body too large) and token limit errors by truncating the
|
|
4760
|
+
* message payload and retrying.
|
|
4761
|
+
*/
|
|
4762
|
+
/**
|
|
4763
|
+
* Create an auto-truncate retry strategy.
|
|
4764
|
+
*
|
|
4765
|
+
* @param truncate - Format-specific truncation function
|
|
4766
|
+
* @param resanitize - Format-specific re-sanitization after truncation
|
|
4767
|
+
* @param isEnabled - Check if auto-truncate is enabled (typically reads state.autoTruncate)
|
|
4768
|
+
*/
|
|
4769
|
+
function createAutoTruncateStrategy(opts) {
|
|
4770
|
+
const { truncate, resanitize, isEnabled, label } = opts;
|
|
4771
|
+
return {
|
|
4772
|
+
name: "auto-truncate",
|
|
4773
|
+
canHandle(error) {
|
|
4774
|
+
if (!isEnabled()) return false;
|
|
4775
|
+
return error.type === "payload_too_large" || error.type === "token_limit";
|
|
4776
|
+
},
|
|
4777
|
+
async handle(error, currentPayload, context) {
|
|
4778
|
+
const { attempt, originalPayload, model, maxRetries } = context;
|
|
4779
|
+
if (!model) return {
|
|
4780
|
+
action: "abort",
|
|
4781
|
+
error
|
|
4782
|
+
};
|
|
4783
|
+
const rawError = error.raw;
|
|
4784
|
+
if (!(rawError instanceof HTTPError)) return {
|
|
4785
|
+
action: "abort",
|
|
4786
|
+
error
|
|
4787
|
+
};
|
|
4788
|
+
const payloadBytes = JSON.stringify(currentPayload).length;
|
|
4789
|
+
const parsed = tryParseAndLearnLimit(rawError, model.id, payloadBytes);
|
|
4790
|
+
if (!parsed) return {
|
|
4791
|
+
action: "abort",
|
|
4792
|
+
error
|
|
4793
|
+
};
|
|
4794
|
+
let targetTokenLimit;
|
|
4795
|
+
let targetByteLimitBytes;
|
|
4796
|
+
if (parsed.type === "token_limit" && parsed.limit) {
|
|
4797
|
+
targetTokenLimit = Math.floor(parsed.limit * AUTO_TRUNCATE_RETRY_FACTOR);
|
|
4798
|
+
consola.info(`[${label}] Attempt ${attempt + 1}/${maxRetries + 1}: Token limit error (${parsed.current}>${parsed.limit}), retrying with limit ${targetTokenLimit}...`);
|
|
4799
|
+
} else if (parsed.type === "body_too_large") {
|
|
4800
|
+
targetByteLimitBytes = Math.floor(payloadBytes * AUTO_TRUNCATE_RETRY_FACTOR);
|
|
4801
|
+
consola.info(`[${label}] Attempt ${attempt + 1}/${maxRetries + 1}: Body too large (${bytesToKB(payloadBytes)}KB), retrying with limit ${bytesToKB(targetByteLimitBytes)}KB...`);
|
|
4802
|
+
}
|
|
4803
|
+
const truncateResult = await truncate(originalPayload, model, {
|
|
4804
|
+
checkTokenLimit: true,
|
|
4805
|
+
checkByteLimit: true,
|
|
4806
|
+
targetTokenLimit,
|
|
4807
|
+
targetByteLimitBytes
|
|
4808
|
+
});
|
|
4809
|
+
if (!truncateResult.wasCompacted) return {
|
|
4810
|
+
action: "abort",
|
|
4811
|
+
error
|
|
4812
|
+
};
|
|
4813
|
+
const sanitizeResult = resanitize(truncateResult.payload);
|
|
4814
|
+
return {
|
|
4815
|
+
action: "retry",
|
|
4816
|
+
payload: sanitizeResult.payload,
|
|
4817
|
+
meta: {
|
|
4818
|
+
truncateResult,
|
|
4819
|
+
sanitization: {
|
|
4820
|
+
removedCount: sanitizeResult.removedCount,
|
|
4821
|
+
systemReminderRemovals: sanitizeResult.systemReminderRemovals
|
|
4822
|
+
},
|
|
4823
|
+
attempt: attempt + 1
|
|
4824
|
+
}
|
|
4825
|
+
};
|
|
4826
|
+
}
|
|
4827
|
+
};
|
|
4559
4828
|
}
|
|
4560
4829
|
|
|
4561
4830
|
//#endregion
|
|
@@ -4606,19 +4875,46 @@ async function handleCompletion$1(c) {
|
|
|
4606
4875
|
return executeRequest({
|
|
4607
4876
|
c,
|
|
4608
4877
|
payload,
|
|
4878
|
+
originalPayload,
|
|
4609
4879
|
selectedModel,
|
|
4610
4880
|
ctx,
|
|
4611
4881
|
trackingId
|
|
4612
4882
|
});
|
|
4613
4883
|
}
|
|
4614
4884
|
/**
|
|
4615
|
-
* Execute the API call with
|
|
4885
|
+
* Execute the API call with reactive retry pipeline.
|
|
4886
|
+
* Handles 413 and token limit errors with auto-truncation.
|
|
4616
4887
|
*/
|
|
4617
4888
|
async function executeRequest(opts) {
|
|
4618
|
-
const { c, payload, selectedModel, ctx, trackingId } = opts;
|
|
4889
|
+
const { c, payload, originalPayload, selectedModel, ctx, trackingId } = opts;
|
|
4890
|
+
const adapter = {
|
|
4891
|
+
format: "openai",
|
|
4892
|
+
sanitize: (p) => sanitizeOpenAIMessages(p),
|
|
4893
|
+
execute: (p) => executeWithAdaptiveRateLimit(() => createChatCompletions(p)),
|
|
4894
|
+
logPayloadSize: (p) => logPayloadSizeInfo(p, selectedModel)
|
|
4895
|
+
};
|
|
4896
|
+
const strategies = [createAutoTruncateStrategy({
|
|
4897
|
+
truncate: (p, model, truncOpts) => autoTruncateOpenAI(p, model, truncOpts),
|
|
4898
|
+
resanitize: (p) => sanitizeOpenAIMessages(p),
|
|
4899
|
+
isEnabled: () => state.autoTruncate,
|
|
4900
|
+
label: "Completions"
|
|
4901
|
+
})];
|
|
4619
4902
|
try {
|
|
4620
|
-
const
|
|
4621
|
-
|
|
4903
|
+
const result = await executeRequestPipeline({
|
|
4904
|
+
adapter,
|
|
4905
|
+
strategies,
|
|
4906
|
+
payload,
|
|
4907
|
+
originalPayload,
|
|
4908
|
+
model: selectedModel,
|
|
4909
|
+
maxRetries: MAX_AUTO_TRUNCATE_RETRIES,
|
|
4910
|
+
onRetry: (attempt, _strategyName, _newPayload, meta) => {
|
|
4911
|
+
const retryTruncateResult = meta?.truncateResult;
|
|
4912
|
+
if (retryTruncateResult) ctx.truncateResult = retryTruncateResult;
|
|
4913
|
+
if (trackingId) requestTracker.updateRequest(trackingId, { tags: ["compact", `retry-${attempt + 1}`] });
|
|
4914
|
+
}
|
|
4915
|
+
});
|
|
4916
|
+
ctx.queueWaitMs = result.queueWaitMs;
|
|
4917
|
+
const response = result.response;
|
|
4622
4918
|
if (isNonStreaming(response)) return handleNonStreamingResponse$1(c, response, ctx);
|
|
4623
4919
|
consola.debug("Streaming response");
|
|
4624
4920
|
updateTrackerStatus(trackingId, "streaming");
|
|
@@ -4631,7 +4927,6 @@ async function executeRequest(opts) {
|
|
|
4631
4927
|
});
|
|
4632
4928
|
});
|
|
4633
4929
|
} catch (error) {
|
|
4634
|
-
if (error instanceof HTTPError && error.status === 413) await logPayloadSizeInfo(payload, selectedModel);
|
|
4635
4930
|
recordErrorResponse(ctx, payload.model, error);
|
|
4636
4931
|
throw error;
|
|
4637
4932
|
}
|
|
@@ -5715,7 +6010,7 @@ function mapOpenAIStopReasonToAnthropic(finishReason) {
|
|
|
5715
6010
|
}
|
|
5716
6011
|
|
|
5717
6012
|
//#endregion
|
|
5718
|
-
//#region src/
|
|
6013
|
+
//#region src/lib/translation/non-stream.ts
|
|
5719
6014
|
const OPENAI_TOOL_NAME_LIMIT = 64;
|
|
5720
6015
|
/**
|
|
5721
6016
|
* Ensure all tool_use blocks have corresponding tool_result responses,
|
|
@@ -6650,7 +6945,47 @@ function handleMessageDelta(delta, usage, acc) {
|
|
|
6650
6945
|
}
|
|
6651
6946
|
|
|
6652
6947
|
//#endregion
|
|
6653
|
-
//#region src/
|
|
6948
|
+
//#region src/lib/translation/message-mapping.ts
|
|
6949
|
+
/**
|
|
6950
|
+
* Check if two messages likely correspond to the same original message.
|
|
6951
|
+
* Used by buildMessageMapping to handle cases where sanitization removes
|
|
6952
|
+
* content blocks within a message (changing its shape) or removes entire messages.
|
|
6953
|
+
*/
|
|
6954
|
+
function messagesMatch(orig, rewritten) {
|
|
6955
|
+
if (orig.role !== rewritten.role) return false;
|
|
6956
|
+
if (typeof orig.content === "string" && typeof rewritten.content === "string") return rewritten.content.startsWith(orig.content.slice(0, 100)) || orig.content.startsWith(rewritten.content.slice(0, 100));
|
|
6957
|
+
const origBlocks = Array.isArray(orig.content) ? orig.content : [];
|
|
6958
|
+
const rwBlocks = Array.isArray(rewritten.content) ? rewritten.content : [];
|
|
6959
|
+
if (origBlocks.length === 0 || rwBlocks.length === 0) return true;
|
|
6960
|
+
const ob = origBlocks[0];
|
|
6961
|
+
const rb = rwBlocks[0];
|
|
6962
|
+
if (ob.type !== rb.type) return false;
|
|
6963
|
+
if (ob.type === "tool_use" && rb.type === "tool_use") return ob.id === rb.id;
|
|
6964
|
+
if (ob.type === "tool_result" && rb.type === "tool_result") return ob.tool_use_id === rb.tool_use_id;
|
|
6965
|
+
return true;
|
|
6966
|
+
}
|
|
6967
|
+
/**
|
|
6968
|
+
* Build messageMapping (rwIdx → origIdx) for the direct Anthropic path.
|
|
6969
|
+
* Uses a two-pointer approach since rewritten messages maintain the same relative
|
|
6970
|
+
* order as originals (all transformations are deletions, never reorderings).
|
|
6971
|
+
*/
|
|
6972
|
+
function buildMessageMapping(original, rewritten) {
|
|
6973
|
+
const mapping = [];
|
|
6974
|
+
let origIdx = 0;
|
|
6975
|
+
for (const element of rewritten) while (origIdx < original.length) {
|
|
6976
|
+
if (messagesMatch(original[origIdx], element)) {
|
|
6977
|
+
mapping.push(origIdx);
|
|
6978
|
+
origIdx++;
|
|
6979
|
+
break;
|
|
6980
|
+
}
|
|
6981
|
+
origIdx++;
|
|
6982
|
+
}
|
|
6983
|
+
while (mapping.length < rewritten.length) mapping.push(-1);
|
|
6984
|
+
return mapping;
|
|
6985
|
+
}
|
|
6986
|
+
|
|
6987
|
+
//#endregion
|
|
6988
|
+
//#region src/lib/translation/stream.ts
|
|
6654
6989
|
function isToolBlockOpen(state) {
|
|
6655
6990
|
if (!state.contentBlockOpen) return false;
|
|
6656
6991
|
return Object.values(state.toolCalls).some((tc) => tc.anthropicBlockIndex === state.contentBlockIndex);
|
|
@@ -6823,12 +7158,57 @@ async function handleDirectAnthropicCompletion(c, anthropicPayload, ctx) {
|
|
|
6823
7158
|
if (initialSanitized.thinking && initialSanitized.thinking.type !== "disabled") tags.push(`thinking:${initialSanitized.thinking.type}`);
|
|
6824
7159
|
if (tags.length > 0) requestTracker.updateRequest(ctx.trackingId, { tags });
|
|
6825
7160
|
}
|
|
6826
|
-
|
|
7161
|
+
const adapter = {
|
|
7162
|
+
format: "anthropic",
|
|
7163
|
+
sanitize: (p) => sanitizeAnthropicMessages(p),
|
|
7164
|
+
execute: (p) => executeWithAdaptiveRateLimit(() => createAnthropicMessages(p)),
|
|
7165
|
+
logPayloadSize: (p) => logPayloadSizeInfoAnthropic(p, selectedModel)
|
|
7166
|
+
};
|
|
7167
|
+
const strategies = [createAutoTruncateStrategy({
|
|
7168
|
+
truncate: (p, model, opts) => autoTruncateAnthropic(p, model, opts),
|
|
7169
|
+
resanitize: (p) => sanitizeAnthropicMessages(p),
|
|
7170
|
+
isEnabled: () => state.autoTruncate,
|
|
7171
|
+
label: "Anthropic"
|
|
7172
|
+
})];
|
|
6827
7173
|
let truncateResult;
|
|
6828
|
-
|
|
6829
|
-
|
|
6830
|
-
|
|
6831
|
-
|
|
7174
|
+
try {
|
|
7175
|
+
const result = await executeRequestPipeline({
|
|
7176
|
+
adapter,
|
|
7177
|
+
strategies,
|
|
7178
|
+
payload: initialSanitized,
|
|
7179
|
+
originalPayload: anthropicPayload,
|
|
7180
|
+
model: selectedModel,
|
|
7181
|
+
maxRetries: MAX_AUTO_TRUNCATE_RETRIES,
|
|
7182
|
+
onRetry: (_attempt, _strategyName, newPayload, meta) => {
|
|
7183
|
+
const retryTruncateResult = meta?.truncateResult;
|
|
7184
|
+
if (retryTruncateResult) truncateResult = retryTruncateResult;
|
|
7185
|
+
const retrySanitization = meta?.sanitization;
|
|
7186
|
+
const retryMessageMapping = buildMessageMapping(anthropicPayload.messages, newPayload.messages);
|
|
7187
|
+
recordRewrites(ctx.historyId, {
|
|
7188
|
+
truncation: retryTruncateResult ? {
|
|
7189
|
+
removedMessageCount: retryTruncateResult.removedMessageCount,
|
|
7190
|
+
originalTokens: retryTruncateResult.originalTokens,
|
|
7191
|
+
compactedTokens: retryTruncateResult.compactedTokens,
|
|
7192
|
+
processingTimeMs: retryTruncateResult.processingTimeMs
|
|
7193
|
+
} : void 0,
|
|
7194
|
+
sanitization: retrySanitization && (retrySanitization.removedCount > 0 || retrySanitization.systemReminderRemovals > 0) ? {
|
|
7195
|
+
removedBlockCount: retrySanitization.removedCount,
|
|
7196
|
+
systemReminderRemovals: retrySanitization.systemReminderRemovals
|
|
7197
|
+
} : void 0,
|
|
7198
|
+
rewrittenMessages: convertAnthropicMessages(newPayload.messages),
|
|
7199
|
+
rewrittenSystem: typeof newPayload.system === "string" ? newPayload.system : void 0,
|
|
7200
|
+
messageMapping: retryMessageMapping
|
|
7201
|
+
});
|
|
7202
|
+
if (ctx.trackingId) {
|
|
7203
|
+
const retryTags = ["compact", `retry-${meta?.attempt ?? 1}`];
|
|
7204
|
+
if (newPayload.thinking && newPayload.thinking.type !== "disabled") retryTags.push(`thinking:${newPayload.thinking.type}`);
|
|
7205
|
+
requestTracker.updateRequest(ctx.trackingId, { tags: retryTags });
|
|
7206
|
+
}
|
|
7207
|
+
}
|
|
7208
|
+
});
|
|
7209
|
+
ctx.queueWaitMs = result.queueWaitMs;
|
|
7210
|
+
const response = result.response;
|
|
7211
|
+
const effectivePayload = result.effectivePayload;
|
|
6832
7212
|
if (Symbol.asyncIterator in response) {
|
|
6833
7213
|
consola.debug("Streaming response from Copilot (direct Anthropic)");
|
|
6834
7214
|
updateTrackerStatus(ctx.trackingId, "streaming");
|
|
@@ -6843,67 +7223,9 @@ async function handleDirectAnthropicCompletion(c, anthropicPayload, ctx) {
|
|
|
6843
7223
|
}
|
|
6844
7224
|
return handleDirectAnthropicNonStreamingResponse(c, response, ctx, truncateResult);
|
|
6845
7225
|
} catch (error) {
|
|
6846
|
-
|
|
6847
|
-
|
|
6848
|
-
const payloadBytes = JSON.stringify(effectivePayload).length;
|
|
6849
|
-
const parsed = tryParseAndLearnLimit(error, selectedModel.id, payloadBytes);
|
|
6850
|
-
if (parsed) {
|
|
6851
|
-
let targetTokenLimit;
|
|
6852
|
-
let targetByteLimitBytes;
|
|
6853
|
-
if (parsed.type === "token_limit" && parsed.limit) {
|
|
6854
|
-
targetTokenLimit = Math.floor(parsed.limit * AUTO_TRUNCATE_RETRY_FACTOR);
|
|
6855
|
-
consola.info(`[Anthropic] Attempt ${attempt + 1}/${MAX_AUTO_TRUNCATE_RETRIES + 1}: Token limit error (${parsed.current}>${parsed.limit}), retrying with limit ${targetTokenLimit}...`);
|
|
6856
|
-
} else if (parsed.type === "body_too_large") {
|
|
6857
|
-
targetByteLimitBytes = Math.floor(payloadBytes * AUTO_TRUNCATE_RETRY_FACTOR);
|
|
6858
|
-
consola.info(`[Anthropic] Attempt ${attempt + 1}/${MAX_AUTO_TRUNCATE_RETRIES + 1}: Body too large (${bytesToKB(payloadBytes)}KB), retrying with limit ${bytesToKB(targetByteLimitBytes)}KB...`);
|
|
6859
|
-
}
|
|
6860
|
-
try {
|
|
6861
|
-
truncateResult = await autoTruncateAnthropic(anthropicPayload, selectedModel, {
|
|
6862
|
-
checkTokenLimit: true,
|
|
6863
|
-
checkByteLimit: true,
|
|
6864
|
-
targetTokenLimit,
|
|
6865
|
-
targetByteLimitBytes
|
|
6866
|
-
});
|
|
6867
|
-
if (truncateResult.wasCompacted) {
|
|
6868
|
-
const { payload: retrySanitized, removedCount: retryOrphanedRemovals, systemReminderRemovals: retrySystemRemovals } = sanitizeAnthropicMessages(truncateResult.payload);
|
|
6869
|
-
effectivePayload = retrySanitized;
|
|
6870
|
-
const retryMessageMapping = buildMessageMapping(anthropicPayload.messages, effectivePayload.messages);
|
|
6871
|
-
recordRewrites(ctx.historyId, {
|
|
6872
|
-
truncation: {
|
|
6873
|
-
removedMessageCount: truncateResult.removedMessageCount,
|
|
6874
|
-
originalTokens: truncateResult.originalTokens,
|
|
6875
|
-
compactedTokens: truncateResult.compactedTokens,
|
|
6876
|
-
processingTimeMs: truncateResult.processingTimeMs
|
|
6877
|
-
},
|
|
6878
|
-
sanitization: retryOrphanedRemovals > 0 || retrySystemRemovals > 0 ? {
|
|
6879
|
-
removedBlockCount: retryOrphanedRemovals,
|
|
6880
|
-
systemReminderRemovals: retrySystemRemovals
|
|
6881
|
-
} : void 0,
|
|
6882
|
-
rewrittenMessages: convertAnthropicMessages(effectivePayload.messages),
|
|
6883
|
-
rewrittenSystem: typeof effectivePayload.system === "string" ? effectivePayload.system : void 0,
|
|
6884
|
-
messageMapping: retryMessageMapping
|
|
6885
|
-
});
|
|
6886
|
-
if (ctx.trackingId) {
|
|
6887
|
-
const retryTags = ["compact", `retry-${attempt + 1}`];
|
|
6888
|
-
if (effectivePayload.thinking && effectivePayload.thinking.type !== "disabled") retryTags.push(`thinking:${effectivePayload.thinking.type}`);
|
|
6889
|
-
requestTracker.updateRequest(ctx.trackingId, { tags: retryTags });
|
|
6890
|
-
}
|
|
6891
|
-
continue;
|
|
6892
|
-
} else break;
|
|
6893
|
-
} catch (truncateError) {
|
|
6894
|
-
consola.warn(`[Anthropic] Auto-truncate failed on attempt ${attempt + 1}:`, truncateError instanceof Error ? truncateError.message : truncateError);
|
|
6895
|
-
break;
|
|
6896
|
-
}
|
|
6897
|
-
}
|
|
6898
|
-
}
|
|
6899
|
-
break;
|
|
6900
|
-
}
|
|
6901
|
-
if (lastError) {
|
|
6902
|
-
if (lastError instanceof HTTPError && lastError.status === 413) logPayloadSizeInfoAnthropic(effectivePayload, selectedModel);
|
|
6903
|
-
recordErrorResponse(ctx, anthropicPayload.model, lastError);
|
|
6904
|
-
throw lastError instanceof Error ? lastError : /* @__PURE__ */ new Error("Unknown error");
|
|
7226
|
+
recordErrorResponse(ctx, anthropicPayload.model, error);
|
|
7227
|
+
throw error;
|
|
6905
7228
|
}
|
|
6906
|
-
throw new Error("Unexpected state in retry loop");
|
|
6907
7229
|
}
|
|
6908
7230
|
/**
|
|
6909
7231
|
* Log payload size info for debugging 413 errors
|
|
@@ -7078,43 +7400,6 @@ function recordStreamingResponse$1(acc, fallbackModel, ctx) {
|
|
|
7078
7400
|
toolCalls
|
|
7079
7401
|
}, Date.now() - ctx.startTime);
|
|
7080
7402
|
}
|
|
7081
|
-
/**
|
|
7082
|
-
* Check if two messages likely correspond to the same original message.
|
|
7083
|
-
* Used by buildMessageMapping to handle cases where sanitization removes
|
|
7084
|
-
* content blocks within a message (changing its shape) or removes entire messages.
|
|
7085
|
-
*/
|
|
7086
|
-
function messagesMatch(orig, rewritten) {
|
|
7087
|
-
if (orig.role !== rewritten.role) return false;
|
|
7088
|
-
if (typeof orig.content === "string" && typeof rewritten.content === "string") return rewritten.content.startsWith(orig.content.slice(0, 100)) || orig.content.startsWith(rewritten.content.slice(0, 100));
|
|
7089
|
-
const origBlocks = Array.isArray(orig.content) ? orig.content : [];
|
|
7090
|
-
const rwBlocks = Array.isArray(rewritten.content) ? rewritten.content : [];
|
|
7091
|
-
if (origBlocks.length === 0 || rwBlocks.length === 0) return true;
|
|
7092
|
-
const ob = origBlocks[0];
|
|
7093
|
-
const rb = rwBlocks[0];
|
|
7094
|
-
if (ob.type !== rb.type) return false;
|
|
7095
|
-
if (ob.type === "tool_use" && rb.type === "tool_use") return ob.id === rb.id;
|
|
7096
|
-
if (ob.type === "tool_result" && rb.type === "tool_result") return ob.tool_use_id === rb.tool_use_id;
|
|
7097
|
-
return true;
|
|
7098
|
-
}
|
|
7099
|
-
/**
|
|
7100
|
-
* Build messageMapping (rwIdx → origIdx) for the direct Anthropic path.
|
|
7101
|
-
* Uses a two-pointer approach since rewritten messages maintain the same relative
|
|
7102
|
-
* order as originals (all transformations are deletions, never reorderings).
|
|
7103
|
-
*/
|
|
7104
|
-
function buildMessageMapping(original, rewritten) {
|
|
7105
|
-
const mapping = [];
|
|
7106
|
-
let origIdx = 0;
|
|
7107
|
-
for (const element of rewritten) while (origIdx < original.length) {
|
|
7108
|
-
if (messagesMatch(original[origIdx], element)) {
|
|
7109
|
-
mapping.push(origIdx);
|
|
7110
|
-
origIdx++;
|
|
7111
|
-
break;
|
|
7112
|
-
}
|
|
7113
|
-
origIdx++;
|
|
7114
|
-
}
|
|
7115
|
-
while (mapping.length < rewritten.length) mapping.push(-1);
|
|
7116
|
-
return mapping;
|
|
7117
|
-
}
|
|
7118
7403
|
|
|
7119
7404
|
//#endregion
|
|
7120
7405
|
//#region src/routes/messages/translated-handler.ts
|
|
@@ -7152,11 +7437,38 @@ async function handleTranslatedCompletion(c, anthropicPayload, ctx) {
|
|
|
7152
7437
|
if (anthropicPayload.thinking && anthropicPayload.thinking.type !== "disabled") tags.push(`thinking:${anthropicPayload.thinking.type}`);
|
|
7153
7438
|
if (tags.length > 0) requestTracker.updateRequest(ctx.trackingId, { tags });
|
|
7154
7439
|
}
|
|
7155
|
-
|
|
7156
|
-
|
|
7157
|
-
|
|
7158
|
-
|
|
7159
|
-
|
|
7440
|
+
const adapter = {
|
|
7441
|
+
format: "openai",
|
|
7442
|
+
sanitize: (p) => sanitizeOpenAIMessages(p),
|
|
7443
|
+
execute: (p) => executeWithAdaptiveRateLimit(() => createChatCompletions(p)),
|
|
7444
|
+
logPayloadSize: (p) => logPayloadSizeInfo(p, selectedModel)
|
|
7445
|
+
};
|
|
7446
|
+
const strategies = [createAutoTruncateStrategy({
|
|
7447
|
+
truncate: (p, model, opts) => autoTruncateOpenAI(p, model, opts),
|
|
7448
|
+
resanitize: (p) => sanitizeOpenAIMessages(p),
|
|
7449
|
+
isEnabled: () => state.autoTruncate,
|
|
7450
|
+
label: "Translated"
|
|
7451
|
+
})];
|
|
7452
|
+
try {
|
|
7453
|
+
const result = await executeRequestPipeline({
|
|
7454
|
+
adapter,
|
|
7455
|
+
strategies,
|
|
7456
|
+
payload: initialOpenAIPayload,
|
|
7457
|
+
originalPayload: translatedPayload,
|
|
7458
|
+
model: selectedModel,
|
|
7459
|
+
maxRetries: MAX_AUTO_TRUNCATE_RETRIES,
|
|
7460
|
+
onRetry: (attempt, _strategyName, _newPayload, meta) => {
|
|
7461
|
+
const retryTruncateResult = meta?.truncateResult;
|
|
7462
|
+
if (retryTruncateResult) ctx.truncateResult = retryTruncateResult;
|
|
7463
|
+
if (ctx.trackingId) {
|
|
7464
|
+
const retryTags = ["compact", `retry-${attempt + 1}`];
|
|
7465
|
+
if (anthropicPayload.thinking && anthropicPayload.thinking.type !== "disabled") retryTags.push(`thinking:${anthropicPayload.thinking.type}`);
|
|
7466
|
+
requestTracker.updateRequest(ctx.trackingId, { tags: retryTags });
|
|
7467
|
+
}
|
|
7468
|
+
}
|
|
7469
|
+
});
|
|
7470
|
+
ctx.queueWaitMs = result.queueWaitMs;
|
|
7471
|
+
const response = result.response;
|
|
7160
7472
|
if (isNonStreaming(response)) return handleNonStreamingResponse({
|
|
7161
7473
|
c,
|
|
7162
7474
|
response,
|
|
@@ -7175,52 +7487,9 @@ async function handleTranslatedCompletion(c, anthropicPayload, ctx) {
|
|
|
7175
7487
|
});
|
|
7176
7488
|
});
|
|
7177
7489
|
} catch (error) {
|
|
7178
|
-
|
|
7179
|
-
|
|
7180
|
-
const payloadBytes = JSON.stringify(effectivePayload).length;
|
|
7181
|
-
const parsed = tryParseAndLearnLimit(error, selectedModel.id, payloadBytes);
|
|
7182
|
-
if (parsed) {
|
|
7183
|
-
let targetTokenLimit;
|
|
7184
|
-
let targetByteLimitBytes;
|
|
7185
|
-
if (parsed.type === "token_limit" && parsed.limit) {
|
|
7186
|
-
targetTokenLimit = Math.floor(parsed.limit * AUTO_TRUNCATE_RETRY_FACTOR);
|
|
7187
|
-
consola.info(`[Translated] Attempt ${attempt + 1}/${MAX_AUTO_TRUNCATE_RETRIES + 1}: Token limit error (${parsed.current}>${parsed.limit}), retrying with limit ${targetTokenLimit}...`);
|
|
7188
|
-
} else if (parsed.type === "body_too_large") {
|
|
7189
|
-
targetByteLimitBytes = Math.floor(payloadBytes * AUTO_TRUNCATE_RETRY_FACTOR);
|
|
7190
|
-
consola.info(`[Translated] Attempt ${attempt + 1}/${MAX_AUTO_TRUNCATE_RETRIES + 1}: Body too large (${bytesToKB(payloadBytes)}KB), retrying with limit ${bytesToKB(targetByteLimitBytes)}KB...`);
|
|
7191
|
-
}
|
|
7192
|
-
try {
|
|
7193
|
-
const retryTruncateResult = await autoTruncateOpenAI(translatedPayload, selectedModel, {
|
|
7194
|
-
checkTokenLimit: true,
|
|
7195
|
-
checkByteLimit: true,
|
|
7196
|
-
targetTokenLimit,
|
|
7197
|
-
targetByteLimitBytes
|
|
7198
|
-
});
|
|
7199
|
-
if (retryTruncateResult.wasCompacted) {
|
|
7200
|
-
const { payload: retrySanitized } = sanitizeOpenAIMessages(retryTruncateResult.payload);
|
|
7201
|
-
effectivePayload = retrySanitized;
|
|
7202
|
-
ctx.truncateResult = retryTruncateResult;
|
|
7203
|
-
if (ctx.trackingId) {
|
|
7204
|
-
const retryTags = ["compact", `retry-${attempt + 1}`];
|
|
7205
|
-
if (anthropicPayload.thinking && anthropicPayload.thinking.type !== "disabled") retryTags.push(`thinking:${anthropicPayload.thinking.type}`);
|
|
7206
|
-
requestTracker.updateRequest(ctx.trackingId, { tags: retryTags });
|
|
7207
|
-
}
|
|
7208
|
-
continue;
|
|
7209
|
-
} else break;
|
|
7210
|
-
} catch (truncateError) {
|
|
7211
|
-
consola.warn(`[Translated] Auto-truncate failed on attempt ${attempt + 1}:`, truncateError instanceof Error ? truncateError.message : truncateError);
|
|
7212
|
-
break;
|
|
7213
|
-
}
|
|
7214
|
-
}
|
|
7215
|
-
}
|
|
7216
|
-
break;
|
|
7217
|
-
}
|
|
7218
|
-
if (lastError) {
|
|
7219
|
-
if (lastError instanceof HTTPError && lastError.status === 413) await logPayloadSizeInfo(effectivePayload, selectedModel);
|
|
7220
|
-
recordErrorResponse(ctx, anthropicPayload.model, lastError);
|
|
7221
|
-
throw lastError instanceof Error ? lastError : /* @__PURE__ */ new Error("Unknown error");
|
|
7490
|
+
recordErrorResponse(ctx, anthropicPayload.model, error);
|
|
7491
|
+
throw error;
|
|
7222
7492
|
}
|
|
7223
|
-
throw new Error("Unexpected state in retry loop");
|
|
7224
7493
|
}
|
|
7225
7494
|
function handleNonStreamingResponse(opts) {
|
|
7226
7495
|
const { c, response, toolNameMapping, ctx } = opts;
|
|
@@ -7596,6 +7865,25 @@ usageRoute.get("/", async (c) => {
|
|
|
7596
7865
|
}
|
|
7597
7866
|
});
|
|
7598
7867
|
|
|
7868
|
+
//#endregion
|
|
7869
|
+
//#region src/routes/index.ts
|
|
7870
|
+
/**
|
|
7871
|
+
* Register all API routes on the given Hono app.
|
|
7872
|
+
*/
|
|
7873
|
+
function registerRoutes(app) {
|
|
7874
|
+
app.route("/chat/completions", completionRoutes);
|
|
7875
|
+
app.route("/models", modelRoutes);
|
|
7876
|
+
app.route("/embeddings", embeddingRoutes);
|
|
7877
|
+
app.route("/usage", usageRoute);
|
|
7878
|
+
app.route("/token", tokenRoute);
|
|
7879
|
+
app.route("/v1/chat/completions", completionRoutes);
|
|
7880
|
+
app.route("/v1/models", modelRoutes);
|
|
7881
|
+
app.route("/v1/embeddings", embeddingRoutes);
|
|
7882
|
+
app.route("/v1/messages", messageRoutes);
|
|
7883
|
+
app.route("/api/event_logging", eventLoggingRoutes);
|
|
7884
|
+
app.route("/history", historyRoutes);
|
|
7885
|
+
}
|
|
7886
|
+
|
|
7599
7887
|
//#endregion
|
|
7600
7888
|
//#region src/server.ts
|
|
7601
7889
|
const server = new Hono();
|
|
@@ -7622,17 +7910,7 @@ server.get("/health", (c) => {
|
|
|
7622
7910
|
}
|
|
7623
7911
|
}, healthy ? 200 : 503);
|
|
7624
7912
|
});
|
|
7625
|
-
server
|
|
7626
|
-
server.route("/models", modelRoutes);
|
|
7627
|
-
server.route("/embeddings", embeddingRoutes);
|
|
7628
|
-
server.route("/usage", usageRoute);
|
|
7629
|
-
server.route("/token", tokenRoute);
|
|
7630
|
-
server.route("/v1/chat/completions", completionRoutes);
|
|
7631
|
-
server.route("/v1/models", modelRoutes);
|
|
7632
|
-
server.route("/v1/embeddings", embeddingRoutes);
|
|
7633
|
-
server.route("/v1/messages", messageRoutes);
|
|
7634
|
-
server.route("/api/event_logging", eventLoggingRoutes);
|
|
7635
|
-
server.route("/history", historyRoutes);
|
|
7913
|
+
registerRoutes(server);
|
|
7636
7914
|
|
|
7637
7915
|
//#endregion
|
|
7638
7916
|
//#region src/start.ts
|