@hsupu/copilot-api 0.7.17-beta.0 → 0.7.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.mjs CHANGED
@@ -17,7 +17,7 @@ import { trimTrailingSlash } from "hono/trailing-slash";
17
17
  import { streamSSE } from "hono/streaming";
18
18
  import { events } from "fetch-event-stream";
19
19
 
20
- //#region src/lib/paths.ts
20
+ //#region src/lib/config/paths.ts
21
21
  const APP_DIR = path.join(os.homedir(), ".local", "share", "copilot-api");
22
22
  const GITHUB_TOKEN_PATH = path.join(APP_DIR, "github_token");
23
23
  const PATHS = {
@@ -55,7 +55,7 @@ const state = {
55
55
  };
56
56
 
57
57
  //#endregion
58
- //#region src/lib/api-config.ts
58
+ //#region src/lib/config/api.ts
59
59
  const standardHeaders = () => ({
60
60
  "content-type": "application/json",
61
61
  accept: "application/json"
@@ -618,6 +618,118 @@ function forwardError(c, error) {
618
618
  type: "error"
619
619
  } }, 500);
620
620
  }
621
+ /**
622
+ * Classify a raw error into a structured ApiError.
623
+ * Used by the pipeline to route errors to appropriate RetryStrategies.
624
+ */
625
+ function classifyError(error) {
626
+ if (error instanceof HTTPError) return classifyHTTPError(error);
627
+ if (error instanceof TypeError && error.message.includes("fetch")) return {
628
+ type: "network_error",
629
+ status: 0,
630
+ message: error.message,
631
+ raw: error
632
+ };
633
+ if (error instanceof Error) return {
634
+ type: "bad_request",
635
+ status: 0,
636
+ message: error.message,
637
+ raw: error
638
+ };
639
+ return {
640
+ type: "bad_request",
641
+ status: 0,
642
+ message: String(error),
643
+ raw: error
644
+ };
645
+ }
646
+ function classifyHTTPError(error) {
647
+ const { status, responseText, message } = error;
648
+ if (status === 429) return {
649
+ type: "rate_limited",
650
+ status,
651
+ message,
652
+ retryAfter: extractRetryAfterFromBody(responseText),
653
+ raw: error
654
+ };
655
+ if (status === 413) return {
656
+ type: "payload_too_large",
657
+ status,
658
+ message,
659
+ raw: error
660
+ };
661
+ if (status >= 500) return {
662
+ type: "server_error",
663
+ status,
664
+ message,
665
+ raw: error
666
+ };
667
+ if (status === 401 || status === 403) return {
668
+ type: "auth_expired",
669
+ status,
670
+ message,
671
+ raw: error
672
+ };
673
+ if (status === 400) {
674
+ const tokenLimit = tryExtractTokenLimit(responseText);
675
+ if (tokenLimit) return {
676
+ type: "token_limit",
677
+ status,
678
+ message,
679
+ tokenLimit: tokenLimit.limit,
680
+ tokenCurrent: tokenLimit.current,
681
+ raw: error
682
+ };
683
+ if (isRateLimitedInBody(responseText)) return {
684
+ type: "rate_limited",
685
+ status,
686
+ message,
687
+ retryAfter: extractRetryAfterFromBody(responseText),
688
+ raw: error
689
+ };
690
+ }
691
+ return {
692
+ type: "bad_request",
693
+ status,
694
+ message,
695
+ raw: error
696
+ };
697
+ }
698
+ /** Extract retry_after from JSON response body */
699
+ function extractRetryAfterFromBody(responseText) {
700
+ try {
701
+ const parsed = JSON.parse(responseText);
702
+ if (parsed && typeof parsed === "object") {
703
+ if ("retry_after" in parsed && typeof parsed.retry_after === "number") return parsed.retry_after;
704
+ if ("error" in parsed) {
705
+ const err = parsed.error;
706
+ if (err && typeof err === "object" && "retry_after" in err && typeof err.retry_after === "number") return err.retry_after;
707
+ }
708
+ }
709
+ } catch {}
710
+ }
711
+ /** Check if response body contains rate_limited code */
712
+ function isRateLimitedInBody(responseText) {
713
+ try {
714
+ const parsed = JSON.parse(responseText);
715
+ if (parsed && typeof parsed === "object" && "error" in parsed) {
716
+ const err = parsed.error;
717
+ if (err && typeof err === "object" && "code" in err) return err.code === "rate_limited";
718
+ }
719
+ } catch {}
720
+ return false;
721
+ }
722
+ /** Try to extract token limit info from response body */
723
+ function tryExtractTokenLimit(responseText) {
724
+ try {
725
+ const parsed = JSON.parse(responseText);
726
+ if (parsed && typeof parsed === "object" && "error" in parsed) {
727
+ const err = parsed.error;
728
+ if (err && typeof err === "object" && "message" in err && typeof err.message === "string") return parseTokenLimitError(err.message);
729
+ }
730
+ } catch {}
731
+ return null;
732
+ }
621
733
 
622
734
  //#endregion
623
735
  //#region src/services/github/get-copilot-token.ts
@@ -1444,7 +1556,7 @@ const debug = defineCommand({
1444
1556
  });
1445
1557
 
1446
1558
  //#endregion
1447
- //#region src/lib/history-ws.ts
1559
+ //#region src/lib/history/ws.ts
1448
1560
  const clients = /* @__PURE__ */ new Set();
1449
1561
  function addClient(ws) {
1450
1562
  clients.add(ws);
@@ -1496,144 +1608,432 @@ function notifyEntryUpdated(entry) {
1496
1608
  }
1497
1609
 
1498
1610
  //#endregion
1499
- //#region src/lib/shutdown.ts
1500
- let serverInstance = null;
1501
- let _isShuttingDown = false;
1502
- let shutdownResolve = null;
1503
- /** Drain timeouts based on active request types */
1504
- const THINKING_DRAIN_TIMEOUT_MS = 18e4;
1505
- const NORMAL_DRAIN_TIMEOUT_MS = 6e4;
1506
- const MIN_DRAIN_TIMEOUT_MS = 5e3;
1507
- const DRAIN_POLL_INTERVAL_MS = 500;
1508
- const DRAIN_PROGRESS_INTERVAL_MS = 5e3;
1509
- /** Check if the server is in shutdown state (used by middleware to reject new requests) */
1510
- function getIsShuttingDown() {
1511
- return _isShuttingDown;
1512
- }
1513
- /**
1514
- * Returns a promise that resolves when the server is shut down via signal.
1515
- * Used by runServer() to keep the async function alive until shutdown.
1516
- */
1517
- function waitForShutdown() {
1518
- return new Promise((resolve) => {
1519
- shutdownResolve = resolve;
1520
- });
1611
+ //#region src/lib/history/store.ts
1612
+ function formatLocalTimestamp(ts) {
1613
+ const d = new Date(ts);
1614
+ return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")} ${String(d.getHours()).padStart(2, "0")}:${String(d.getMinutes()).padStart(2, "0")}:${String(d.getSeconds()).padStart(2, "0")}`;
1521
1615
  }
1522
- /** Store the server instance for shutdown */
1523
- function setServerInstance(server) {
1524
- serverInstance = server;
1616
+ const historyState = {
1617
+ enabled: false,
1618
+ entries: [],
1619
+ sessions: /* @__PURE__ */ new Map(),
1620
+ currentSessionId: "",
1621
+ maxEntries: 200
1622
+ };
1623
+ function initHistory(enabled, maxEntries) {
1624
+ historyState.enabled = enabled;
1625
+ historyState.maxEntries = maxEntries;
1626
+ historyState.entries = [];
1627
+ historyState.sessions = /* @__PURE__ */ new Map();
1628
+ historyState.currentSessionId = enabled ? generateId() : "";
1525
1629
  }
1526
- /**
1527
- * Compute drain timeout based on currently active requests.
1528
- * Thinking requests get more time because they can take 120s+.
1529
- */
1530
- function computeDrainTimeout() {
1531
- const active = requestTracker.getActiveRequests();
1532
- if (active.length === 0) return MIN_DRAIN_TIMEOUT_MS;
1533
- return active.some((r) => r.tags?.some((t) => t.startsWith("thinking:"))) ? THINKING_DRAIN_TIMEOUT_MS : NORMAL_DRAIN_TIMEOUT_MS;
1630
+ function isHistoryEnabled() {
1631
+ return historyState.enabled;
1534
1632
  }
1535
- /** Log a summary of active requests during drain */
1536
- function logActiveRequestsSummary(requests) {
1633
+ function getCurrentSession(endpoint) {
1634
+ if (historyState.currentSessionId) {
1635
+ const session = historyState.sessions.get(historyState.currentSessionId);
1636
+ if (session) {
1637
+ session.lastActivity = Date.now();
1638
+ return historyState.currentSessionId;
1639
+ }
1640
+ }
1537
1641
  const now = Date.now();
1538
- const lines = requests.map((req) => {
1539
- const age = Math.round((now - req.startTime) / 1e3);
1540
- const model = req.model || "unknown";
1541
- const tags = req.tags?.length ? ` [${req.tags.join(", ")}]` : "";
1542
- return ` ${req.method} ${req.path} ${model} (${req.status}, ${age}s)${tags}`;
1642
+ const sessionId = generateId();
1643
+ historyState.currentSessionId = sessionId;
1644
+ historyState.sessions.set(sessionId, {
1645
+ id: sessionId,
1646
+ startTime: now,
1647
+ lastActivity: now,
1648
+ requestCount: 0,
1649
+ totalInputTokens: 0,
1650
+ totalOutputTokens: 0,
1651
+ models: [],
1652
+ endpoint
1543
1653
  });
1544
- consola.info(`Waiting for ${requests.length} active request(s):\n${lines.join("\n")}`);
1654
+ return sessionId;
1545
1655
  }
1546
- /**
1547
- * Wait for all active requests to complete, with periodic progress logging.
1548
- * Returns "drained" when all requests finish, "timeout" if deadline is reached.
1549
- */
1550
- async function drainActiveRequests(timeoutMs) {
1551
- const deadline = Date.now() + timeoutMs;
1552
- let lastProgressLog = 0;
1553
- while (Date.now() < deadline) {
1554
- const active = requestTracker.getActiveRequests();
1555
- if (active.length === 0) return "drained";
1556
- const now = Date.now();
1557
- if (now - lastProgressLog >= DRAIN_PROGRESS_INTERVAL_MS) {
1558
- lastProgressLog = now;
1559
- logActiveRequestsSummary(active);
1656
+ function recordRequest(endpoint, request) {
1657
+ if (!historyState.enabled) return "";
1658
+ const sessionId = getCurrentSession(endpoint);
1659
+ const session = historyState.sessions.get(sessionId);
1660
+ if (!session) return "";
1661
+ const entry = {
1662
+ id: generateId(),
1663
+ sessionId,
1664
+ timestamp: Date.now(),
1665
+ endpoint,
1666
+ request: {
1667
+ model: request.model,
1668
+ messages: request.messages,
1669
+ stream: request.stream,
1670
+ tools: request.tools,
1671
+ max_tokens: request.max_tokens,
1672
+ temperature: request.temperature,
1673
+ system: request.system
1560
1674
  }
1561
- await new Promise((resolve) => setTimeout(resolve, DRAIN_POLL_INTERVAL_MS));
1562
- }
1563
- return "timeout";
1564
- }
1565
- /** Perform graceful shutdown */
1566
- async function gracefulShutdown(signal) {
1567
- _isShuttingDown = true;
1568
- consola.info(`Received ${signal}, shutting down gracefully...`);
1569
- stopTokenRefresh();
1570
- const wsClients = getClientCount();
1571
- if (wsClients > 0) {
1572
- closeAllClients();
1573
- consola.info(`Disconnected ${wsClients} WebSocket client(s)`);
1675
+ };
1676
+ historyState.entries.push(entry);
1677
+ session.requestCount++;
1678
+ if (!session.models.includes(request.model)) session.models.push(request.model);
1679
+ if (request.tools && request.tools.length > 0) {
1680
+ if (!session.toolsUsed) session.toolsUsed = [];
1681
+ for (const tool of request.tools) if (!session.toolsUsed.includes(tool.name)) session.toolsUsed.push(tool.name);
1574
1682
  }
1575
- if (serverInstance) {
1576
- const activeCount = requestTracker.getActiveRequests().length;
1577
- const drainTimeout = computeDrainTimeout();
1578
- if (activeCount > 0) {
1579
- consola.info(`Draining ${activeCount} active request(s), timeout ${drainTimeout / 1e3}s`);
1580
- if (await drainActiveRequests(drainTimeout) === "timeout") {
1581
- const remaining = requestTracker.getActiveRequests();
1582
- consola.warn(`Drain timeout, force-closing ${remaining.length} remaining request(s)`);
1583
- } else consola.info("All requests completed");
1584
- }
1585
- try {
1586
- await serverInstance.close(true);
1587
- } catch (error) {
1588
- consola.error("Error closing server:", error);
1683
+ while (historyState.maxEntries > 0 && historyState.entries.length > historyState.maxEntries) {
1684
+ const removed = historyState.entries.shift();
1685
+ if (removed) {
1686
+ if (historyState.entries.filter((e) => e.sessionId === removed.sessionId).length === 0) historyState.sessions.delete(removed.sessionId);
1589
1687
  }
1590
1688
  }
1591
- consola.info("Shutdown complete");
1592
- shutdownResolve?.();
1689
+ notifyEntryAdded(entry);
1690
+ return entry.id;
1593
1691
  }
1594
- /** Setup process signal handlers for graceful shutdown */
1595
- function setupShutdownHandlers() {
1596
- const handler = (signal) => {
1597
- if (_isShuttingDown) {
1598
- consola.warn("Second signal received, forcing immediate exit");
1599
- process.exit(1);
1692
+ function recordResponse(id, response, durationMs) {
1693
+ if (!historyState.enabled || !id) return;
1694
+ const entry = historyState.entries.find((e) => e.id === id);
1695
+ if (entry) {
1696
+ entry.response = response;
1697
+ entry.durationMs = durationMs;
1698
+ const session = historyState.sessions.get(entry.sessionId);
1699
+ if (session) {
1700
+ session.totalInputTokens += response.usage.input_tokens;
1701
+ session.totalOutputTokens += response.usage.output_tokens;
1702
+ session.lastActivity = Date.now();
1600
1703
  }
1601
- gracefulShutdown(signal);
1602
- };
1603
- process.on("SIGINT", () => handler("SIGINT"));
1604
- process.on("SIGTERM", () => handler("SIGTERM"));
1704
+ notifyEntryUpdated(entry);
1705
+ }
1605
1706
  }
1606
-
1607
- //#endregion
1608
- //#region src/lib/tui/tracker.ts
1609
- var RequestTracker = class {
1610
- requests = /* @__PURE__ */ new Map();
1611
- renderer = null;
1612
- completedQueue = [];
1613
- completedTimeouts = /* @__PURE__ */ new Map();
1614
- historySize = 5;
1615
- completedDisplayMs = 2e3;
1616
- setRenderer(renderer) {
1617
- this.renderer = renderer;
1707
+ function recordRewrites(id, rewrites) {
1708
+ if (!historyState.enabled || !id) return;
1709
+ const entry = historyState.entries.find((e) => e.id === id);
1710
+ if (entry) {
1711
+ entry.rewrites = rewrites;
1712
+ if (rewrites.truncation) entry.truncation = rewrites.truncation;
1713
+ notifyEntryUpdated(entry);
1618
1714
  }
1619
- setOptions(options) {
1620
- if (options.historySize !== void 0) this.historySize = options.historySize;
1621
- if (options.completedDisplayMs !== void 0) this.completedDisplayMs = options.completedDisplayMs;
1715
+ }
1716
+ function getHistory(options = {}) {
1717
+ const { page = 1, limit = 50, model, endpoint, success, from, to, search, sessionId } = options;
1718
+ let filtered = [...historyState.entries];
1719
+ if (sessionId) filtered = filtered.filter((e) => e.sessionId === sessionId);
1720
+ if (model) {
1721
+ const modelLower = model.toLowerCase();
1722
+ filtered = filtered.filter((e) => e.request.model.toLowerCase().includes(modelLower) || e.response?.model.toLowerCase().includes(modelLower));
1622
1723
  }
1623
- /**
1624
- * Start tracking a new request
1625
- * Returns the tracking ID
1626
- */
1627
- startRequest(options) {
1628
- const id = generateId();
1629
- const request = {
1630
- id,
1631
- method: options.method,
1632
- path: options.path,
1633
- model: options.model,
1634
- startTime: Date.now(),
1635
- status: "executing",
1636
- isHistoryAccess: options.isHistoryAccess
1724
+ if (endpoint) filtered = filtered.filter((e) => e.endpoint === endpoint);
1725
+ if (success !== void 0) filtered = filtered.filter((e) => e.response?.success === success);
1726
+ if (from) filtered = filtered.filter((e) => e.timestamp >= from);
1727
+ if (to) filtered = filtered.filter((e) => e.timestamp <= to);
1728
+ if (search) {
1729
+ const searchLower = search.toLowerCase();
1730
+ filtered = filtered.filter((e) => {
1731
+ if (e.request.model.toLowerCase().includes(searchLower) || e.response?.model && e.response.model.toLowerCase().includes(searchLower)) return true;
1732
+ if (e.response?.error && e.response.error.toLowerCase().includes(searchLower)) return true;
1733
+ if (e.request.system?.toLowerCase().includes(searchLower)) return true;
1734
+ if (e.request.messages.some((m) => {
1735
+ if (typeof m.content === "string") return m.content.toLowerCase().includes(searchLower);
1736
+ if (Array.isArray(m.content)) return m.content.some((c) => {
1737
+ if (c.text && c.text.toLowerCase().includes(searchLower)) return true;
1738
+ if (c.type === "tool_use") {
1739
+ const name = c.name;
1740
+ if (name && name.toLowerCase().includes(searchLower)) return true;
1741
+ if (c.input) {
1742
+ if ((typeof c.input === "string" ? c.input : JSON.stringify(c.input)).toLowerCase().includes(searchLower)) return true;
1743
+ }
1744
+ }
1745
+ if (c.type === "tool_result" && c.content) {
1746
+ if ((typeof c.content === "string" ? c.content : JSON.stringify(c.content)).toLowerCase().includes(searchLower)) return true;
1747
+ }
1748
+ if (c.type === "thinking") {
1749
+ const thinking = c.thinking;
1750
+ if (thinking && thinking.toLowerCase().includes(searchLower)) return true;
1751
+ }
1752
+ return false;
1753
+ });
1754
+ return false;
1755
+ })) return true;
1756
+ if (e.response?.content) {
1757
+ const rc = e.response.content;
1758
+ if (typeof rc.content === "string" && rc.content.toLowerCase().includes(searchLower)) return true;
1759
+ if (Array.isArray(rc.content)) {
1760
+ if (rc.content.some((c) => {
1761
+ if (c.text && c.text.toLowerCase().includes(searchLower)) return true;
1762
+ if (c.type === "tool_use" && c.name && c.name.toLowerCase().includes(searchLower)) return true;
1763
+ if (c.type === "thinking" && c.thinking && c.thinking.toLowerCase().includes(searchLower)) return true;
1764
+ return false;
1765
+ })) return true;
1766
+ }
1767
+ }
1768
+ if (e.response?.toolCalls?.some((t) => t.name.toLowerCase().includes(searchLower))) return true;
1769
+ return false;
1770
+ });
1771
+ }
1772
+ filtered.sort((a, b) => b.timestamp - a.timestamp);
1773
+ const total = filtered.length;
1774
+ const totalPages = Math.ceil(total / limit);
1775
+ const start = (page - 1) * limit;
1776
+ return {
1777
+ entries: filtered.slice(start, start + limit),
1778
+ total,
1779
+ page,
1780
+ limit,
1781
+ totalPages
1782
+ };
1783
+ }
1784
+ function getEntry(id) {
1785
+ return historyState.entries.find((e) => e.id === id);
1786
+ }
1787
+ function getSessions() {
1788
+ const sessions = Array.from(historyState.sessions.values()).sort((a, b) => b.lastActivity - a.lastActivity);
1789
+ return {
1790
+ sessions,
1791
+ total: sessions.length
1792
+ };
1793
+ }
1794
+ function getSession(id) {
1795
+ return historyState.sessions.get(id);
1796
+ }
1797
+ function getSessionEntries(sessionId) {
1798
+ return historyState.entries.filter((e) => e.sessionId === sessionId).sort((a, b) => a.timestamp - b.timestamp);
1799
+ }
1800
+ function clearHistory() {
1801
+ historyState.entries = [];
1802
+ historyState.sessions = /* @__PURE__ */ new Map();
1803
+ historyState.currentSessionId = generateId();
1804
+ }
1805
+ function deleteSession(sessionId) {
1806
+ if (!historyState.sessions.has(sessionId)) return false;
1807
+ historyState.entries = historyState.entries.filter((e) => e.sessionId !== sessionId);
1808
+ historyState.sessions.delete(sessionId);
1809
+ if (historyState.currentSessionId === sessionId) historyState.currentSessionId = generateId();
1810
+ return true;
1811
+ }
1812
+ function getStats() {
1813
+ const entries = historyState.entries;
1814
+ const modelDist = {};
1815
+ const endpointDist = {};
1816
+ const hourlyActivity = {};
1817
+ let totalInput = 0;
1818
+ let totalOutput = 0;
1819
+ let totalDuration = 0;
1820
+ let durationCount = 0;
1821
+ let successCount = 0;
1822
+ let failCount = 0;
1823
+ for (const entry of entries) {
1824
+ const model = entry.response?.model || entry.request.model;
1825
+ modelDist[model] = (modelDist[model] || 0) + 1;
1826
+ endpointDist[entry.endpoint] = (endpointDist[entry.endpoint] || 0) + 1;
1827
+ const d = new Date(entry.timestamp);
1828
+ const hour = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}T${String(d.getHours()).padStart(2, "0")}`;
1829
+ hourlyActivity[hour] = (hourlyActivity[hour] || 0) + 1;
1830
+ if (entry.response) {
1831
+ if (entry.response.success) successCount++;
1832
+ else failCount++;
1833
+ totalInput += entry.response.usage.input_tokens;
1834
+ totalOutput += entry.response.usage.output_tokens;
1835
+ }
1836
+ if (entry.durationMs) {
1837
+ totalDuration += entry.durationMs;
1838
+ durationCount++;
1839
+ }
1840
+ }
1841
+ const recentActivity = Object.entries(hourlyActivity).sort(([a], [b]) => a.localeCompare(b)).slice(-24).map(([hour, count]) => ({
1842
+ hour,
1843
+ count
1844
+ }));
1845
+ return {
1846
+ totalRequests: entries.length,
1847
+ successfulRequests: successCount,
1848
+ failedRequests: failCount,
1849
+ totalInputTokens: totalInput,
1850
+ totalOutputTokens: totalOutput,
1851
+ averageDurationMs: durationCount > 0 ? totalDuration / durationCount : 0,
1852
+ modelDistribution: modelDist,
1853
+ endpointDistribution: endpointDist,
1854
+ recentActivity,
1855
+ activeSessions: historyState.sessions.size
1856
+ };
1857
+ }
1858
+ function exportHistory(format = "json") {
1859
+ if (format === "json") return JSON.stringify({
1860
+ sessions: Array.from(historyState.sessions.values()),
1861
+ entries: historyState.entries
1862
+ }, null, 2);
1863
+ const headers = [
1864
+ "id",
1865
+ "session_id",
1866
+ "timestamp",
1867
+ "endpoint",
1868
+ "request_model",
1869
+ "message_count",
1870
+ "stream",
1871
+ "success",
1872
+ "response_model",
1873
+ "input_tokens",
1874
+ "output_tokens",
1875
+ "duration_ms",
1876
+ "stop_reason",
1877
+ "error"
1878
+ ];
1879
+ const rows = historyState.entries.map((e) => [
1880
+ e.id,
1881
+ e.sessionId,
1882
+ formatLocalTimestamp(e.timestamp),
1883
+ e.endpoint,
1884
+ e.request.model,
1885
+ e.request.messages.length,
1886
+ e.request.stream,
1887
+ e.response?.success ?? "",
1888
+ e.response?.model ?? "",
1889
+ e.response?.usage.input_tokens ?? "",
1890
+ e.response?.usage.output_tokens ?? "",
1891
+ e.durationMs ?? "",
1892
+ e.response?.stop_reason ?? "",
1893
+ e.response?.error ?? ""
1894
+ ]);
1895
+ return [headers.join(","), ...rows.map((r) => r.join(","))].join("\n");
1896
+ }
1897
+
1898
+ //#endregion
1899
+ //#region src/lib/shutdown.ts
1900
+ let serverInstance = null;
1901
+ let _isShuttingDown = false;
1902
+ let shutdownResolve = null;
1903
+ /** Drain timeouts based on active request types */
1904
+ const THINKING_DRAIN_TIMEOUT_MS = 18e4;
1905
+ const NORMAL_DRAIN_TIMEOUT_MS = 6e4;
1906
+ const MIN_DRAIN_TIMEOUT_MS = 5e3;
1907
+ const DRAIN_POLL_INTERVAL_MS = 500;
1908
+ const DRAIN_PROGRESS_INTERVAL_MS = 5e3;
1909
+ /** Check if the server is in shutdown state (used by middleware to reject new requests) */
1910
+ function getIsShuttingDown() {
1911
+ return _isShuttingDown;
1912
+ }
1913
+ /**
1914
+ * Returns a promise that resolves when the server is shut down via signal.
1915
+ * Used by runServer() to keep the async function alive until shutdown.
1916
+ */
1917
+ function waitForShutdown() {
1918
+ return new Promise((resolve) => {
1919
+ shutdownResolve = resolve;
1920
+ });
1921
+ }
1922
+ /** Store the server instance for shutdown */
1923
+ function setServerInstance(server) {
1924
+ serverInstance = server;
1925
+ }
1926
+ /**
1927
+ * Compute drain timeout based on currently active requests.
1928
+ * Thinking requests get more time because they can take 120s+.
1929
+ */
1930
+ function computeDrainTimeout() {
1931
+ const active = requestTracker.getActiveRequests();
1932
+ if (active.length === 0) return MIN_DRAIN_TIMEOUT_MS;
1933
+ return active.some((r) => r.tags?.some((t) => t.startsWith("thinking:"))) ? THINKING_DRAIN_TIMEOUT_MS : NORMAL_DRAIN_TIMEOUT_MS;
1934
+ }
1935
+ /** Log a summary of active requests during drain */
1936
+ function logActiveRequestsSummary(requests) {
1937
+ const now = Date.now();
1938
+ const lines = requests.map((req) => {
1939
+ const age = Math.round((now - req.startTime) / 1e3);
1940
+ const model = req.model || "unknown";
1941
+ const tags = req.tags?.length ? ` [${req.tags.join(", ")}]` : "";
1942
+ return ` ${req.method} ${req.path} ${model} (${req.status}, ${age}s)${tags}`;
1943
+ });
1944
+ consola.info(`Waiting for ${requests.length} active request(s):\n${lines.join("\n")}`);
1945
+ }
1946
+ /**
1947
+ * Wait for all active requests to complete, with periodic progress logging.
1948
+ * Returns "drained" when all requests finish, "timeout" if deadline is reached.
1949
+ */
1950
+ async function drainActiveRequests(timeoutMs) {
1951
+ const deadline = Date.now() + timeoutMs;
1952
+ let lastProgressLog = 0;
1953
+ while (Date.now() < deadline) {
1954
+ const active = requestTracker.getActiveRequests();
1955
+ if (active.length === 0) return "drained";
1956
+ const now = Date.now();
1957
+ if (now - lastProgressLog >= DRAIN_PROGRESS_INTERVAL_MS) {
1958
+ lastProgressLog = now;
1959
+ logActiveRequestsSummary(active);
1960
+ }
1961
+ await new Promise((resolve) => setTimeout(resolve, DRAIN_POLL_INTERVAL_MS));
1962
+ }
1963
+ return "timeout";
1964
+ }
1965
+ /** Perform graceful shutdown */
1966
+ async function gracefulShutdown(signal) {
1967
+ _isShuttingDown = true;
1968
+ consola.info(`Received ${signal}, shutting down gracefully...`);
1969
+ stopTokenRefresh();
1970
+ const wsClients = getClientCount();
1971
+ if (wsClients > 0) {
1972
+ closeAllClients();
1973
+ consola.info(`Disconnected ${wsClients} WebSocket client(s)`);
1974
+ }
1975
+ if (serverInstance) {
1976
+ const activeCount = requestTracker.getActiveRequests().length;
1977
+ const drainTimeout = computeDrainTimeout();
1978
+ if (activeCount > 0) {
1979
+ consola.info(`Draining ${activeCount} active request(s), timeout ${drainTimeout / 1e3}s`);
1980
+ if (await drainActiveRequests(drainTimeout) === "timeout") {
1981
+ const remaining = requestTracker.getActiveRequests();
1982
+ consola.warn(`Drain timeout, force-closing ${remaining.length} remaining request(s)`);
1983
+ } else consola.info("All requests completed");
1984
+ }
1985
+ try {
1986
+ await serverInstance.close(true);
1987
+ } catch (error) {
1988
+ consola.error("Error closing server:", error);
1989
+ }
1990
+ }
1991
+ consola.info("Shutdown complete");
1992
+ shutdownResolve?.();
1993
+ }
1994
+ /** Setup process signal handlers for graceful shutdown */
1995
+ function setupShutdownHandlers() {
1996
+ const handler = (signal) => {
1997
+ if (_isShuttingDown) {
1998
+ consola.warn("Second signal received, forcing immediate exit");
1999
+ process.exit(1);
2000
+ }
2001
+ gracefulShutdown(signal);
2002
+ };
2003
+ process.on("SIGINT", () => handler("SIGINT"));
2004
+ process.on("SIGTERM", () => handler("SIGTERM"));
2005
+ }
2006
+
2007
+ //#endregion
2008
+ //#region src/lib/tui/tracker.ts
2009
+ var RequestTracker = class {
2010
+ requests = /* @__PURE__ */ new Map();
2011
+ renderer = null;
2012
+ completedQueue = [];
2013
+ completedTimeouts = /* @__PURE__ */ new Map();
2014
+ historySize = 5;
2015
+ completedDisplayMs = 2e3;
2016
+ setRenderer(renderer) {
2017
+ this.renderer = renderer;
2018
+ }
2019
+ setOptions(options) {
2020
+ if (options.historySize !== void 0) this.historySize = options.historySize;
2021
+ if (options.completedDisplayMs !== void 0) this.completedDisplayMs = options.completedDisplayMs;
2022
+ }
2023
+ /**
2024
+ * Start tracking a new request
2025
+ * Returns the tracking ID
2026
+ */
2027
+ startRequest(options) {
2028
+ const id = generateId();
2029
+ const request = {
2030
+ id,
2031
+ method: options.method,
2032
+ path: options.path,
2033
+ model: options.model,
2034
+ startTime: Date.now(),
2035
+ status: "executing",
2036
+ isHistoryAccess: options.isHistoryAccess
1637
2037
  };
1638
2038
  this.requests.set(id, request);
1639
2039
  this.renderer?.onRequestStart(request);
@@ -2293,7 +2693,7 @@ const setupClaudeCode = defineCommand({
2293
2693
 
2294
2694
  //#endregion
2295
2695
  //#region package.json
2296
- var version = "0.7.17-beta.0";
2696
+ var version = "0.7.17";
2297
2697
 
2298
2698
  //#endregion
2299
2699
  //#region src/lib/adaptive-rate-limiter.ts
@@ -2324,558 +2724,270 @@ var AdaptiveRateLimiter = class {
2324
2724
  lastRequestTime = 0;
2325
2725
  /** Current step in gradual recovery (index into gradualRecoverySteps) */
2326
2726
  recoveryStepIndex = 0;
2327
- constructor(config = {}) {
2328
- this.config = {
2329
- ...DEFAULT_CONFIG,
2330
- ...config
2331
- };
2332
- }
2333
- /**
2334
- * Execute a request with adaptive rate limiting.
2335
- * Returns a promise that resolves when the request succeeds.
2336
- * The request will be retried automatically on 429 errors.
2337
- */
2338
- async execute(fn) {
2339
- if (this.mode === "normal") return this.executeInNormalMode(fn);
2340
- if (this.mode === "recovering") return this.executeInRecoveringMode(fn);
2341
- return this.enqueue(fn);
2342
- }
2343
- /**
2344
- * Check if an error is a rate limit error (429) and extract Retry-After if available
2345
- */
2346
- isRateLimitError(error) {
2347
- if (error && typeof error === "object") {
2348
- if ("status" in error && error.status === 429) return {
2349
- isRateLimit: true,
2350
- retryAfter: this.extractRetryAfter(error)
2351
- };
2352
- if ("responseText" in error && typeof error.responseText === "string") try {
2353
- const parsed = JSON.parse(error.responseText);
2354
- if (parsed && typeof parsed === "object" && "error" in parsed && parsed.error && typeof parsed.error === "object" && "code" in parsed.error && parsed.error.code === "rate_limited") return { isRateLimit: true };
2355
- } catch {}
2356
- }
2357
- return { isRateLimit: false };
2358
- }
2359
- /**
2360
- * Extract Retry-After value from error response
2361
- */
2362
- extractRetryAfter(error) {
2363
- if (!error || typeof error !== "object") return void 0;
2364
- if ("responseText" in error && typeof error.responseText === "string") try {
2365
- const parsed = JSON.parse(error.responseText);
2366
- if (parsed && typeof parsed === "object" && "retry_after" in parsed && typeof parsed.retry_after === "number") return parsed.retry_after;
2367
- if (parsed && typeof parsed === "object" && "error" in parsed && parsed.error && typeof parsed.error === "object" && "retry_after" in parsed.error && typeof parsed.error.retry_after === "number") return parsed.error.retry_after;
2368
- } catch {}
2369
- }
2370
- /**
2371
- * Execute in normal mode - full speed
2372
- */
2373
- async executeInNormalMode(fn) {
2374
- try {
2375
- return {
2376
- result: await fn(),
2377
- queueWaitMs: 0
2378
- };
2379
- } catch (error) {
2380
- const { isRateLimit, retryAfter } = this.isRateLimitError(error);
2381
- if (isRateLimit) {
2382
- this.enterRateLimitedMode();
2383
- return this.enqueue(fn, retryAfter);
2384
- }
2385
- throw error;
2386
- }
2387
- }
2388
- /**
2389
- * Execute in recovering mode - gradual speedup
2390
- */
2391
- async executeInRecoveringMode(fn) {
2392
- const startTime = Date.now();
2393
- const currentInterval = this.config.gradualRecoverySteps[this.recoveryStepIndex] ?? 0;
2394
- if (currentInterval > 0) {
2395
- const elapsedMs = Date.now() - this.lastRequestTime;
2396
- const requiredMs = currentInterval * 1e3;
2397
- if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
2398
- const waitMs = requiredMs - elapsedMs;
2399
- await this.sleep(waitMs);
2400
- }
2401
- }
2402
- this.lastRequestTime = Date.now();
2403
- try {
2404
- const result = await fn();
2405
- this.recoveryStepIndex++;
2406
- if (this.recoveryStepIndex >= this.config.gradualRecoverySteps.length) this.completeRecovery();
2407
- else {
2408
- const nextInterval = this.config.gradualRecoverySteps[this.recoveryStepIndex] ?? 0;
2409
- consola.info(`[RateLimiter] Ramp-up step ${this.recoveryStepIndex}/${this.config.gradualRecoverySteps.length} (next interval: ${nextInterval}s)`);
2410
- }
2411
- return {
2412
- result,
2413
- queueWaitMs: Date.now() - startTime
2414
- };
2415
- } catch (error) {
2416
- const { isRateLimit, retryAfter } = this.isRateLimitError(error);
2417
- if (isRateLimit) {
2418
- consola.warn("[RateLimiter] Hit rate limit during ramp-up, returning to rate-limited mode");
2419
- this.enterRateLimitedMode();
2420
- return this.enqueue(fn, retryAfter);
2421
- }
2422
- throw error;
2423
- }
2727
+ constructor(config = {}) {
2728
+ this.config = {
2729
+ ...DEFAULT_CONFIG,
2730
+ ...config
2731
+ };
2424
2732
  }
2425
2733
  /**
2426
- * Enter rate-limited mode
2734
+ * Execute a request with adaptive rate limiting.
2735
+ * Returns a promise that resolves when the request succeeds.
2736
+ * The request will be retried automatically on 429 errors.
2427
2737
  */
2428
- enterRateLimitedMode() {
2429
- if (this.mode === "rate-limited") return;
2430
- this.mode = "rate-limited";
2431
- this.rateLimitedAt = Date.now();
2432
- this.consecutiveSuccesses = 0;
2433
- consola.warn(`[RateLimiter] Entering rate-limited mode. Requests will be queued with exponential backoff (base: ${this.config.baseRetryIntervalSeconds}s).`);
2738
+ async execute(fn) {
2739
+ if (this.mode === "normal") return this.executeInNormalMode(fn);
2740
+ if (this.mode === "recovering") return this.executeInRecoveringMode(fn);
2741
+ return this.enqueue(fn);
2434
2742
  }
2435
2743
  /**
2436
- * Check if we should try to recover to normal mode
2744
+ * Check if an error is a rate limit error (429) and extract Retry-After if available
2437
2745
  */
2438
- shouldAttemptRecovery() {
2439
- if (this.consecutiveSuccesses >= this.config.consecutiveSuccessesForRecovery) {
2440
- consola.info(`[RateLimiter] ${this.consecutiveSuccesses} consecutive successes. Starting ramp-up.`);
2441
- return true;
2442
- }
2443
- if (this.rateLimitedAt) {
2444
- if (Date.now() - this.rateLimitedAt >= this.config.recoveryTimeoutMinutes * 60 * 1e3) {
2445
- consola.info(`[RateLimiter] ${this.config.recoveryTimeoutMinutes} minutes elapsed. Starting ramp-up.`);
2446
- return true;
2447
- }
2746
+ isRateLimitError(error) {
2747
+ if (error && typeof error === "object") {
2748
+ if ("status" in error && error.status === 429) return {
2749
+ isRateLimit: true,
2750
+ retryAfter: this.extractRetryAfter(error)
2751
+ };
2752
+ if ("responseText" in error && typeof error.responseText === "string") try {
2753
+ const parsed = JSON.parse(error.responseText);
2754
+ if (parsed && typeof parsed === "object" && "error" in parsed && parsed.error && typeof parsed.error === "object" && "code" in parsed.error && parsed.error.code === "rate_limited") return { isRateLimit: true };
2755
+ } catch {}
2448
2756
  }
2449
- return false;
2450
- }
2451
- /**
2452
- * Start gradual recovery mode
2453
- */
2454
- startGradualRecovery() {
2455
- this.mode = "recovering";
2456
- this.recoveryStepIndex = 0;
2457
- this.rateLimitedAt = null;
2458
- this.consecutiveSuccesses = 0;
2459
- const firstInterval = this.config.gradualRecoverySteps[0] ?? 0;
2460
- consola.info(`[RateLimiter] Starting ramp-up (${this.config.gradualRecoverySteps.length} steps, first interval: ${firstInterval}s)`);
2757
+ return { isRateLimit: false };
2461
2758
  }
2462
2759
  /**
2463
- * Complete recovery to normal mode
2760
+ * Extract Retry-After value from error response
2464
2761
  */
2465
- completeRecovery() {
2466
- this.mode = "normal";
2467
- this.recoveryStepIndex = 0;
2468
- consola.success("[RateLimiter] Exiting rate-limited mode.");
2762
+ extractRetryAfter(error) {
2763
+ if (!error || typeof error !== "object") return void 0;
2764
+ if ("responseText" in error && typeof error.responseText === "string") try {
2765
+ const parsed = JSON.parse(error.responseText);
2766
+ if (parsed && typeof parsed === "object" && "retry_after" in parsed && typeof parsed.retry_after === "number") return parsed.retry_after;
2767
+ if (parsed && typeof parsed === "object" && "error" in parsed && parsed.error && typeof parsed.error === "object" && "retry_after" in parsed.error && typeof parsed.error.retry_after === "number") return parsed.error.retry_after;
2768
+ } catch {}
2469
2769
  }
2470
2770
  /**
2471
- * Enqueue a request for later execution
2771
+ * Execute in normal mode - full speed
2472
2772
  */
2473
- enqueue(fn, retryAfterSeconds) {
2474
- return new Promise((resolve, reject) => {
2475
- const request = {
2476
- execute: fn,
2477
- resolve,
2478
- reject,
2479
- retryCount: 0,
2480
- retryAfterSeconds,
2481
- enqueuedAt: Date.now()
2773
+ async executeInNormalMode(fn) {
2774
+ try {
2775
+ return {
2776
+ result: await fn(),
2777
+ queueWaitMs: 0
2482
2778
  };
2483
- this.queue.push(request);
2484
- if (this.queue.length > 1) {
2485
- const position = this.queue.length;
2486
- const estimatedWait = (position - 1) * this.config.requestIntervalSeconds;
2487
- consola.info(`[RateLimiter] Request queued (position ${position}, ~${estimatedWait}s wait)`);
2488
- }
2489
- this.processQueue();
2490
- });
2491
- }
2492
- /**
2493
- * Calculate retry interval with exponential backoff
2494
- */
2495
- calculateRetryInterval(request) {
2496
- if (request.retryAfterSeconds !== void 0 && request.retryAfterSeconds > 0) return request.retryAfterSeconds;
2497
- const backoff = this.config.baseRetryIntervalSeconds * Math.pow(2, request.retryCount);
2498
- return Math.min(backoff, this.config.maxRetryIntervalSeconds);
2499
- }
2500
- /**
2501
- * Process the queue
2502
- */
2503
- async processQueue() {
2504
- if (this.processing) return;
2505
- this.processing = true;
2506
- while (this.queue.length > 0) {
2507
- const request = this.queue[0];
2508
- if (this.shouldAttemptRecovery()) this.startGradualRecovery();
2509
- const elapsedMs = Date.now() - this.lastRequestTime;
2510
- const requiredMs = (request.retryCount > 0 ? this.calculateRetryInterval(request) : this.config.requestIntervalSeconds) * 1e3;
2511
- if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
2512
- const waitMs = requiredMs - elapsedMs;
2513
- const waitSec = Math.ceil(waitMs / 1e3);
2514
- consola.info(`[RateLimiter] Waiting ${waitSec}s before next request...`);
2515
- await this.sleep(waitMs);
2516
- }
2517
- this.lastRequestTime = Date.now();
2518
- try {
2519
- const result = await request.execute();
2520
- this.queue.shift();
2521
- this.consecutiveSuccesses++;
2522
- request.retryAfterSeconds = void 0;
2523
- const queueWaitMs = Date.now() - request.enqueuedAt;
2524
- request.resolve({
2525
- result,
2526
- queueWaitMs
2527
- });
2528
- if (this.mode === "rate-limited") consola.info(`[RateLimiter] Request succeeded (${this.consecutiveSuccesses}/${this.config.consecutiveSuccessesForRecovery} for ramp-up)`);
2529
- } catch (error) {
2530
- const { isRateLimit, retryAfter } = this.isRateLimitError(error);
2531
- if (isRateLimit) {
2532
- request.retryCount++;
2533
- request.retryAfterSeconds = retryAfter;
2534
- this.consecutiveSuccesses = 0;
2535
- this.rateLimitedAt = Date.now();
2536
- const nextInterval = this.calculateRetryInterval(request);
2537
- const source = retryAfter ? "server Retry-After" : "exponential backoff";
2538
- consola.warn(`[RateLimiter] Request failed with 429 (retry #${request.retryCount}). Retrying in ${nextInterval}s (${source})...`);
2539
- } else {
2540
- this.queue.shift();
2541
- request.reject(error);
2542
- }
2779
+ } catch (error) {
2780
+ const { isRateLimit, retryAfter } = this.isRateLimitError(error);
2781
+ if (isRateLimit) {
2782
+ this.enterRateLimitedMode();
2783
+ return this.enqueue(fn, retryAfter);
2543
2784
  }
2785
+ throw error;
2544
2786
  }
2545
- this.processing = false;
2546
- }
2547
- sleep(ms) {
2548
- return new Promise((resolve) => setTimeout(resolve, ms));
2549
- }
2550
- /**
2551
- * Get current status for debugging/monitoring
2552
- */
2553
- getStatus() {
2554
- return {
2555
- mode: this.mode,
2556
- queueLength: this.queue.length,
2557
- consecutiveSuccesses: this.consecutiveSuccesses,
2558
- rateLimitedAt: this.rateLimitedAt
2559
- };
2560
- }
2561
- };
2562
- let rateLimiterInstance = null;
2563
- /**
2564
- * Initialize the adaptive rate limiter with configuration
2565
- */
2566
- function initAdaptiveRateLimiter(config = {}) {
2567
- rateLimiterInstance = new AdaptiveRateLimiter(config);
2568
- const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG.baseRetryIntervalSeconds;
2569
- const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG.maxRetryIntervalSeconds;
2570
- const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG.requestIntervalSeconds;
2571
- const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG.recoveryTimeoutMinutes;
2572
- const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG.consecutiveSuccessesForRecovery;
2573
- const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG.gradualRecoverySteps;
2574
- consola.info(`[RateLimiter] Initialized (backoff: ${baseRetry}s-${maxRetry}s, interval: ${interval}s, recovery: ${recovery}min or ${successes} successes, gradual: [${steps.join("s, ")}s])`);
2575
- }
2576
- /**
2577
- * Execute a request with adaptive rate limiting.
2578
- * If rate limiter is not initialized, executes immediately.
2579
- * Returns the result along with queue wait time.
2580
- */
2581
- async function executeWithAdaptiveRateLimit(fn) {
2582
- if (!rateLimiterInstance) return {
2583
- result: await fn(),
2584
- queueWaitMs: 0
2585
- };
2586
- return rateLimiterInstance.execute(fn);
2587
- }
2588
-
2589
- //#endregion
2590
- //#region src/lib/history.ts
2591
- function formatLocalTimestamp(ts) {
2592
- const d = new Date(ts);
2593
- return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")} ${String(d.getHours()).padStart(2, "0")}:${String(d.getMinutes()).padStart(2, "0")}:${String(d.getSeconds()).padStart(2, "0")}`;
2594
- }
2595
- const historyState = {
2596
- enabled: false,
2597
- entries: [],
2598
- sessions: /* @__PURE__ */ new Map(),
2599
- currentSessionId: "",
2600
- maxEntries: 200
2601
- };
2602
- function initHistory(enabled, maxEntries) {
2603
- historyState.enabled = enabled;
2604
- historyState.maxEntries = maxEntries;
2605
- historyState.entries = [];
2606
- historyState.sessions = /* @__PURE__ */ new Map();
2607
- historyState.currentSessionId = enabled ? generateId() : "";
2608
- }
2609
- function isHistoryEnabled() {
2610
- return historyState.enabled;
2611
- }
2612
- function getCurrentSession(endpoint) {
2613
- if (historyState.currentSessionId) {
2614
- const session = historyState.sessions.get(historyState.currentSessionId);
2615
- if (session) {
2616
- session.lastActivity = Date.now();
2617
- return historyState.currentSessionId;
2618
- }
2619
- }
2620
- const now = Date.now();
2621
- const sessionId = generateId();
2622
- historyState.currentSessionId = sessionId;
2623
- historyState.sessions.set(sessionId, {
2624
- id: sessionId,
2625
- startTime: now,
2626
- lastActivity: now,
2627
- requestCount: 0,
2628
- totalInputTokens: 0,
2629
- totalOutputTokens: 0,
2630
- models: [],
2631
- endpoint
2632
- });
2633
- return sessionId;
2634
- }
2635
- function recordRequest(endpoint, request) {
2636
- if (!historyState.enabled) return "";
2637
- const sessionId = getCurrentSession(endpoint);
2638
- const session = historyState.sessions.get(sessionId);
2639
- if (!session) return "";
2640
- const entry = {
2641
- id: generateId(),
2642
- sessionId,
2643
- timestamp: Date.now(),
2644
- endpoint,
2645
- request: {
2646
- model: request.model,
2647
- messages: request.messages,
2648
- stream: request.stream,
2649
- tools: request.tools,
2650
- max_tokens: request.max_tokens,
2651
- temperature: request.temperature,
2652
- system: request.system
2653
- }
2654
- };
2655
- historyState.entries.push(entry);
2656
- session.requestCount++;
2657
- if (!session.models.includes(request.model)) session.models.push(request.model);
2658
- if (request.tools && request.tools.length > 0) {
2659
- if (!session.toolsUsed) session.toolsUsed = [];
2660
- for (const tool of request.tools) if (!session.toolsUsed.includes(tool.name)) session.toolsUsed.push(tool.name);
2661
2787
  }
2662
- while (historyState.maxEntries > 0 && historyState.entries.length > historyState.maxEntries) {
2663
- const removed = historyState.entries.shift();
2664
- if (removed) {
2665
- if (historyState.entries.filter((e) => e.sessionId === removed.sessionId).length === 0) historyState.sessions.delete(removed.sessionId);
2788
+ /**
2789
+ * Execute in recovering mode - gradual speedup
2790
+ */
2791
+ async executeInRecoveringMode(fn) {
2792
+ const startTime = Date.now();
2793
+ const currentInterval = this.config.gradualRecoverySteps[this.recoveryStepIndex] ?? 0;
2794
+ if (currentInterval > 0) {
2795
+ const elapsedMs = Date.now() - this.lastRequestTime;
2796
+ const requiredMs = currentInterval * 1e3;
2797
+ if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
2798
+ const waitMs = requiredMs - elapsedMs;
2799
+ await this.sleep(waitMs);
2800
+ }
2801
+ }
2802
+ this.lastRequestTime = Date.now();
2803
+ try {
2804
+ const result = await fn();
2805
+ this.recoveryStepIndex++;
2806
+ if (this.recoveryStepIndex >= this.config.gradualRecoverySteps.length) this.completeRecovery();
2807
+ else {
2808
+ const nextInterval = this.config.gradualRecoverySteps[this.recoveryStepIndex] ?? 0;
2809
+ consola.info(`[RateLimiter] Ramp-up step ${this.recoveryStepIndex}/${this.config.gradualRecoverySteps.length} (next interval: ${nextInterval}s)`);
2810
+ }
2811
+ return {
2812
+ result,
2813
+ queueWaitMs: Date.now() - startTime
2814
+ };
2815
+ } catch (error) {
2816
+ const { isRateLimit, retryAfter } = this.isRateLimitError(error);
2817
+ if (isRateLimit) {
2818
+ consola.warn("[RateLimiter] Hit rate limit during ramp-up, returning to rate-limited mode");
2819
+ this.enterRateLimitedMode();
2820
+ return this.enqueue(fn, retryAfter);
2821
+ }
2822
+ throw error;
2666
2823
  }
2667
2824
  }
2668
- notifyEntryAdded(entry);
2669
- return entry.id;
2670
- }
2671
- function recordResponse(id, response, durationMs) {
2672
- if (!historyState.enabled || !id) return;
2673
- const entry = historyState.entries.find((e) => e.id === id);
2674
- if (entry) {
2675
- entry.response = response;
2676
- entry.durationMs = durationMs;
2677
- const session = historyState.sessions.get(entry.sessionId);
2678
- if (session) {
2679
- session.totalInputTokens += response.usage.input_tokens;
2680
- session.totalOutputTokens += response.usage.output_tokens;
2681
- session.lastActivity = Date.now();
2825
+ /**
2826
+ * Enter rate-limited mode
2827
+ */
2828
+ enterRateLimitedMode() {
2829
+ if (this.mode === "rate-limited") return;
2830
+ this.mode = "rate-limited";
2831
+ this.rateLimitedAt = Date.now();
2832
+ this.consecutiveSuccesses = 0;
2833
+ consola.warn(`[RateLimiter] Entering rate-limited mode. Requests will be queued with exponential backoff (base: ${this.config.baseRetryIntervalSeconds}s).`);
2834
+ }
2835
+ /**
2836
+ * Check if we should try to recover to normal mode
2837
+ */
2838
+ shouldAttemptRecovery() {
2839
+ if (this.consecutiveSuccesses >= this.config.consecutiveSuccessesForRecovery) {
2840
+ consola.info(`[RateLimiter] ${this.consecutiveSuccesses} consecutive successes. Starting ramp-up.`);
2841
+ return true;
2682
2842
  }
2683
- notifyEntryUpdated(entry);
2843
+ if (this.rateLimitedAt) {
2844
+ if (Date.now() - this.rateLimitedAt >= this.config.recoveryTimeoutMinutes * 60 * 1e3) {
2845
+ consola.info(`[RateLimiter] ${this.config.recoveryTimeoutMinutes} minutes elapsed. Starting ramp-up.`);
2846
+ return true;
2847
+ }
2848
+ }
2849
+ return false;
2684
2850
  }
2685
- }
2686
- function recordRewrites(id, rewrites) {
2687
- if (!historyState.enabled || !id) return;
2688
- const entry = historyState.entries.find((e) => e.id === id);
2689
- if (entry) {
2690
- entry.rewrites = rewrites;
2691
- if (rewrites.truncation) entry.truncation = rewrites.truncation;
2692
- notifyEntryUpdated(entry);
2851
+ /**
2852
+ * Start gradual recovery mode
2853
+ */
2854
+ startGradualRecovery() {
2855
+ this.mode = "recovering";
2856
+ this.recoveryStepIndex = 0;
2857
+ this.rateLimitedAt = null;
2858
+ this.consecutiveSuccesses = 0;
2859
+ const firstInterval = this.config.gradualRecoverySteps[0] ?? 0;
2860
+ consola.info(`[RateLimiter] Starting ramp-up (${this.config.gradualRecoverySteps.length} steps, first interval: ${firstInterval}s)`);
2693
2861
  }
2694
- }
2695
- function getHistory(options = {}) {
2696
- const { page = 1, limit = 50, model, endpoint, success, from, to, search, sessionId } = options;
2697
- let filtered = [...historyState.entries];
2698
- if (sessionId) filtered = filtered.filter((e) => e.sessionId === sessionId);
2699
- if (model) {
2700
- const modelLower = model.toLowerCase();
2701
- filtered = filtered.filter((e) => e.request.model.toLowerCase().includes(modelLower) || e.response?.model.toLowerCase().includes(modelLower));
2862
+ /**
2863
+ * Complete recovery to normal mode
2864
+ */
2865
+ completeRecovery() {
2866
+ this.mode = "normal";
2867
+ this.recoveryStepIndex = 0;
2868
+ consola.success("[RateLimiter] Exiting rate-limited mode.");
2702
2869
  }
2703
- if (endpoint) filtered = filtered.filter((e) => e.endpoint === endpoint);
2704
- if (success !== void 0) filtered = filtered.filter((e) => e.response?.success === success);
2705
- if (from) filtered = filtered.filter((e) => e.timestamp >= from);
2706
- if (to) filtered = filtered.filter((e) => e.timestamp <= to);
2707
- if (search) {
2708
- const searchLower = search.toLowerCase();
2709
- filtered = filtered.filter((e) => {
2710
- if (e.request.model.toLowerCase().includes(searchLower) || e.response?.model && e.response.model.toLowerCase().includes(searchLower)) return true;
2711
- if (e.response?.error && e.response.error.toLowerCase().includes(searchLower)) return true;
2712
- if (e.request.system?.toLowerCase().includes(searchLower)) return true;
2713
- if (e.request.messages.some((m) => {
2714
- if (typeof m.content === "string") return m.content.toLowerCase().includes(searchLower);
2715
- if (Array.isArray(m.content)) return m.content.some((c) => {
2716
- if (c.text && c.text.toLowerCase().includes(searchLower)) return true;
2717
- if (c.type === "tool_use") {
2718
- const name = c.name;
2719
- if (name && name.toLowerCase().includes(searchLower)) return true;
2720
- if (c.input) {
2721
- if ((typeof c.input === "string" ? c.input : JSON.stringify(c.input)).toLowerCase().includes(searchLower)) return true;
2722
- }
2723
- }
2724
- if (c.type === "tool_result" && c.content) {
2725
- if ((typeof c.content === "string" ? c.content : JSON.stringify(c.content)).toLowerCase().includes(searchLower)) return true;
2726
- }
2727
- if (c.type === "thinking") {
2728
- const thinking = c.thinking;
2729
- if (thinking && thinking.toLowerCase().includes(searchLower)) return true;
2730
- }
2731
- return false;
2732
- });
2733
- return false;
2734
- })) return true;
2735
- if (e.response?.content) {
2736
- const rc = e.response.content;
2737
- if (typeof rc.content === "string" && rc.content.toLowerCase().includes(searchLower)) return true;
2738
- if (Array.isArray(rc.content)) {
2739
- if (rc.content.some((c) => {
2740
- if (c.text && c.text.toLowerCase().includes(searchLower)) return true;
2741
- if (c.type === "tool_use" && c.name && c.name.toLowerCase().includes(searchLower)) return true;
2742
- if (c.type === "thinking" && c.thinking && c.thinking.toLowerCase().includes(searchLower)) return true;
2743
- return false;
2744
- })) return true;
2745
- }
2870
+ /**
2871
+ * Enqueue a request for later execution
2872
+ */
2873
+ enqueue(fn, retryAfterSeconds) {
2874
+ return new Promise((resolve, reject) => {
2875
+ const request = {
2876
+ execute: fn,
2877
+ resolve,
2878
+ reject,
2879
+ retryCount: 0,
2880
+ retryAfterSeconds,
2881
+ enqueuedAt: Date.now()
2882
+ };
2883
+ this.queue.push(request);
2884
+ if (this.queue.length > 1) {
2885
+ const position = this.queue.length;
2886
+ const estimatedWait = (position - 1) * this.config.requestIntervalSeconds;
2887
+ consola.info(`[RateLimiter] Request queued (position ${position}, ~${estimatedWait}s wait)`);
2746
2888
  }
2747
- if (e.response?.toolCalls?.some((t) => t.name.toLowerCase().includes(searchLower))) return true;
2748
- return false;
2889
+ this.processQueue();
2749
2890
  });
2750
2891
  }
2751
- filtered.sort((a, b) => b.timestamp - a.timestamp);
2752
- const total = filtered.length;
2753
- const totalPages = Math.ceil(total / limit);
2754
- const start = (page - 1) * limit;
2755
- return {
2756
- entries: filtered.slice(start, start + limit),
2757
- total,
2758
- page,
2759
- limit,
2760
- totalPages
2761
- };
2762
- }
2763
- function getEntry(id) {
2764
- return historyState.entries.find((e) => e.id === id);
2765
- }
2766
- function getSessions() {
2767
- const sessions = Array.from(historyState.sessions.values()).sort((a, b) => b.lastActivity - a.lastActivity);
2768
- return {
2769
- sessions,
2770
- total: sessions.length
2771
- };
2772
- }
2773
- function getSession(id) {
2774
- return historyState.sessions.get(id);
2775
- }
2776
- function getSessionEntries(sessionId) {
2777
- return historyState.entries.filter((e) => e.sessionId === sessionId).sort((a, b) => a.timestamp - b.timestamp);
2778
- }
2779
- function clearHistory() {
2780
- historyState.entries = [];
2781
- historyState.sessions = /* @__PURE__ */ new Map();
2782
- historyState.currentSessionId = generateId();
2783
- }
2784
- function deleteSession(sessionId) {
2785
- if (!historyState.sessions.has(sessionId)) return false;
2786
- historyState.entries = historyState.entries.filter((e) => e.sessionId !== sessionId);
2787
- historyState.sessions.delete(sessionId);
2788
- if (historyState.currentSessionId === sessionId) historyState.currentSessionId = generateId();
2789
- return true;
2790
- }
2791
- function getStats() {
2792
- const entries = historyState.entries;
2793
- const modelDist = {};
2794
- const endpointDist = {};
2795
- const hourlyActivity = {};
2796
- let totalInput = 0;
2797
- let totalOutput = 0;
2798
- let totalDuration = 0;
2799
- let durationCount = 0;
2800
- let successCount = 0;
2801
- let failCount = 0;
2802
- for (const entry of entries) {
2803
- const model = entry.response?.model || entry.request.model;
2804
- modelDist[model] = (modelDist[model] || 0) + 1;
2805
- endpointDist[entry.endpoint] = (endpointDist[entry.endpoint] || 0) + 1;
2806
- const d = new Date(entry.timestamp);
2807
- const hour = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}T${String(d.getHours()).padStart(2, "0")}`;
2808
- hourlyActivity[hour] = (hourlyActivity[hour] || 0) + 1;
2809
- if (entry.response) {
2810
- if (entry.response.success) successCount++;
2811
- else failCount++;
2812
- totalInput += entry.response.usage.input_tokens;
2813
- totalOutput += entry.response.usage.output_tokens;
2814
- }
2815
- if (entry.durationMs) {
2816
- totalDuration += entry.durationMs;
2817
- durationCount++;
2892
+ /**
2893
+ * Calculate retry interval with exponential backoff
2894
+ */
2895
+ calculateRetryInterval(request) {
2896
+ if (request.retryAfterSeconds !== void 0 && request.retryAfterSeconds > 0) return request.retryAfterSeconds;
2897
+ const backoff = this.config.baseRetryIntervalSeconds * Math.pow(2, request.retryCount);
2898
+ return Math.min(backoff, this.config.maxRetryIntervalSeconds);
2899
+ }
2900
+ /**
2901
+ * Process the queue
2902
+ */
2903
+ async processQueue() {
2904
+ if (this.processing) return;
2905
+ this.processing = true;
2906
+ while (this.queue.length > 0) {
2907
+ const request = this.queue[0];
2908
+ if (this.shouldAttemptRecovery()) this.startGradualRecovery();
2909
+ const elapsedMs = Date.now() - this.lastRequestTime;
2910
+ const requiredMs = (request.retryCount > 0 ? this.calculateRetryInterval(request) : this.config.requestIntervalSeconds) * 1e3;
2911
+ if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
2912
+ const waitMs = requiredMs - elapsedMs;
2913
+ const waitSec = Math.ceil(waitMs / 1e3);
2914
+ consola.info(`[RateLimiter] Waiting ${waitSec}s before next request...`);
2915
+ await this.sleep(waitMs);
2916
+ }
2917
+ this.lastRequestTime = Date.now();
2918
+ try {
2919
+ const result = await request.execute();
2920
+ this.queue.shift();
2921
+ this.consecutiveSuccesses++;
2922
+ request.retryAfterSeconds = void 0;
2923
+ const queueWaitMs = Date.now() - request.enqueuedAt;
2924
+ request.resolve({
2925
+ result,
2926
+ queueWaitMs
2927
+ });
2928
+ if (this.mode === "rate-limited") consola.info(`[RateLimiter] Request succeeded (${this.consecutiveSuccesses}/${this.config.consecutiveSuccessesForRecovery} for ramp-up)`);
2929
+ } catch (error) {
2930
+ const { isRateLimit, retryAfter } = this.isRateLimitError(error);
2931
+ if (isRateLimit) {
2932
+ request.retryCount++;
2933
+ request.retryAfterSeconds = retryAfter;
2934
+ this.consecutiveSuccesses = 0;
2935
+ this.rateLimitedAt = Date.now();
2936
+ const nextInterval = this.calculateRetryInterval(request);
2937
+ const source = retryAfter ? "server Retry-After" : "exponential backoff";
2938
+ consola.warn(`[RateLimiter] Request failed with 429 (retry #${request.retryCount}). Retrying in ${nextInterval}s (${source})...`);
2939
+ } else {
2940
+ this.queue.shift();
2941
+ request.reject(error);
2942
+ }
2943
+ }
2818
2944
  }
2945
+ this.processing = false;
2819
2946
  }
2820
- const recentActivity = Object.entries(hourlyActivity).sort(([a], [b]) => a.localeCompare(b)).slice(-24).map(([hour, count]) => ({
2821
- hour,
2822
- count
2823
- }));
2824
- return {
2825
- totalRequests: entries.length,
2826
- successfulRequests: successCount,
2827
- failedRequests: failCount,
2828
- totalInputTokens: totalInput,
2829
- totalOutputTokens: totalOutput,
2830
- averageDurationMs: durationCount > 0 ? totalDuration / durationCount : 0,
2831
- modelDistribution: modelDist,
2832
- endpointDistribution: endpointDist,
2833
- recentActivity,
2834
- activeSessions: historyState.sessions.size
2835
- };
2947
+ sleep(ms) {
2948
+ return new Promise((resolve) => setTimeout(resolve, ms));
2949
+ }
2950
+ /**
2951
+ * Get current status for debugging/monitoring
2952
+ */
2953
+ getStatus() {
2954
+ return {
2955
+ mode: this.mode,
2956
+ queueLength: this.queue.length,
2957
+ consecutiveSuccesses: this.consecutiveSuccesses,
2958
+ rateLimitedAt: this.rateLimitedAt
2959
+ };
2960
+ }
2961
+ };
2962
+ let rateLimiterInstance = null;
2963
+ /**
2964
+ * Initialize the adaptive rate limiter with configuration
2965
+ */
2966
+ function initAdaptiveRateLimiter(config = {}) {
2967
+ rateLimiterInstance = new AdaptiveRateLimiter(config);
2968
+ const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG.baseRetryIntervalSeconds;
2969
+ const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG.maxRetryIntervalSeconds;
2970
+ const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG.requestIntervalSeconds;
2971
+ const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG.recoveryTimeoutMinutes;
2972
+ const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG.consecutiveSuccessesForRecovery;
2973
+ const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG.gradualRecoverySteps;
2974
+ consola.info(`[RateLimiter] Initialized (backoff: ${baseRetry}s-${maxRetry}s, interval: ${interval}s, recovery: ${recovery}min or ${successes} successes, gradual: [${steps.join("s, ")}s])`);
2836
2975
  }
2837
- function exportHistory(format = "json") {
2838
- if (format === "json") return JSON.stringify({
2839
- sessions: Array.from(historyState.sessions.values()),
2840
- entries: historyState.entries
2841
- }, null, 2);
2842
- const headers = [
2843
- "id",
2844
- "session_id",
2845
- "timestamp",
2846
- "endpoint",
2847
- "request_model",
2848
- "message_count",
2849
- "stream",
2850
- "success",
2851
- "response_model",
2852
- "input_tokens",
2853
- "output_tokens",
2854
- "duration_ms",
2855
- "stop_reason",
2856
- "error"
2857
- ];
2858
- const rows = historyState.entries.map((e) => [
2859
- e.id,
2860
- e.sessionId,
2861
- formatLocalTimestamp(e.timestamp),
2862
- e.endpoint,
2863
- e.request.model,
2864
- e.request.messages.length,
2865
- e.request.stream,
2866
- e.response?.success ?? "",
2867
- e.response?.model ?? "",
2868
- e.response?.usage.input_tokens ?? "",
2869
- e.response?.usage.output_tokens ?? "",
2870
- e.durationMs ?? "",
2871
- e.response?.stop_reason ?? "",
2872
- e.response?.error ?? ""
2873
- ]);
2874
- return [headers.join(","), ...rows.map((r) => r.join(","))].join("\n");
2976
+ /**
2977
+ * Execute a request with adaptive rate limiting.
2978
+ * If rate limiter is not initialized, executes immediately.
2979
+ * Returns the result along with queue wait time.
2980
+ */
2981
+ async function executeWithAdaptiveRateLimit(fn) {
2982
+ if (!rateLimiterInstance) return {
2983
+ result: await fn(),
2984
+ queueWaitMs: 0
2985
+ };
2986
+ return rateLimiterInstance.execute(fn);
2875
2987
  }
2876
2988
 
2877
2989
  //#endregion
2878
- //#region src/lib/proxy.ts
2990
+ //#region src/lib/config/proxy.ts
2879
2991
  /**
2880
2992
  * Custom dispatcher that routes requests through proxies based on environment variables.
2881
2993
  * Extends Agent to properly inherit the Dispatcher interface.
@@ -3700,7 +3812,7 @@ function sanitizeOpenAIMessages(payload) {
3700
3812
  }
3701
3813
 
3702
3814
  //#endregion
3703
- //#region src/lib/tokenizer.ts
3815
+ //#region src/lib/models/tokenizer.ts
3704
3816
  const ENCODING_MAP = {
3705
3817
  o200k_base: () => import("gpt-tokenizer/encoding/o200k_base"),
3706
3818
  cl100k_base: () => import("gpt-tokenizer/encoding/cl100k_base"),
@@ -4278,7 +4390,7 @@ function createTruncationResponseMarkerOpenAI(result) {
4278
4390
  }
4279
4391
 
4280
4392
  //#endregion
4281
- //#region src/lib/model-resolver.ts
4393
+ //#region src/lib/models/resolver.ts
4282
4394
  /**
4283
4395
  * Unified model name resolution and normalization.
4284
4396
  *
@@ -4397,11 +4509,77 @@ const createChatCompletions = async (payload) => {
4397
4509
  };
4398
4510
 
4399
4511
  //#endregion
4400
- //#region src/routes/shared.ts
4512
+ //#region src/routes/shared/payload.ts
4513
+ /**
4514
+ * Payload utilities for request handlers.
4515
+ */
4516
+ /** Build final payload with sanitization (no pre-truncation — truncation is now reactive) */
4517
+ function buildFinalPayload(payload, _model) {
4518
+ const { payload: sanitizedPayload, removedCount: sanitizeRemovedCount, systemReminderRemovals } = sanitizeOpenAIMessages(payload);
4519
+ return {
4520
+ finalPayload: sanitizedPayload,
4521
+ truncateResult: null,
4522
+ sanitizeRemovedCount,
4523
+ systemReminderRemovals
4524
+ };
4525
+ }
4401
4526
  /**
4402
- * Shared utilities for request handlers.
4403
- * Contains common functions used by both OpenAI and Anthropic message handlers.
4527
+ * Log helpful debugging information when a 413 error occurs.
4528
+ * Also adjusts the dynamic byte limit for future requests.
4404
4529
  */
4530
+ async function logPayloadSizeInfo(payload, model) {
4531
+ const messageCount = payload.messages.length;
4532
+ const bodySize = JSON.stringify(payload).length;
4533
+ const bodySizeKB = bytesToKB(bodySize);
4534
+ onRequestTooLarge(bodySize);
4535
+ let imageCount = 0;
4536
+ let largeMessages = 0;
4537
+ let totalImageSize = 0;
4538
+ for (const msg of payload.messages) {
4539
+ if (Array.isArray(msg.content)) {
4540
+ for (const part of msg.content) if (part.type === "image_url") {
4541
+ imageCount++;
4542
+ if (part.image_url.url.startsWith("data:")) totalImageSize += part.image_url.url.length;
4543
+ }
4544
+ }
4545
+ if ((typeof msg.content === "string" ? msg.content.length : JSON.stringify(msg.content).length) > 5e4) largeMessages++;
4546
+ }
4547
+ consola.info("");
4548
+ consola.info("╭─────────────────────────────────────────────────────────╮");
4549
+ consola.info("│ 413 Request Entity Too Large │");
4550
+ consola.info("╰─────────────────────────────────────────────────────────╯");
4551
+ consola.info("");
4552
+ consola.info(` Request body size: ${bodySizeKB} KB (${bodySize.toLocaleString()} bytes)`);
4553
+ consola.info(` Message count: ${messageCount}`);
4554
+ if (model) try {
4555
+ const tokenCount = await getTokenCount(payload, model);
4556
+ const limit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
4557
+ consola.info(` Estimated tokens: ${tokenCount.input.toLocaleString()} / ${limit.toLocaleString()}`);
4558
+ } catch (error) {
4559
+ consola.debug("Token count estimation failed:", error);
4560
+ }
4561
+ if (imageCount > 0) {
4562
+ const imageSizeKB = bytesToKB(totalImageSize);
4563
+ consola.info(` Images: ${imageCount} (${imageSizeKB} KB base64 data)`);
4564
+ }
4565
+ if (largeMessages > 0) consola.info(` Large messages (>50KB): ${largeMessages}`);
4566
+ consola.info("");
4567
+ consola.info(" Suggestions:");
4568
+ if (imageCount > 0) consola.info(" • Remove or resize large images in the conversation");
4569
+ consola.info(" • Start a new conversation with /clear or /reset");
4570
+ consola.info(" • Reduce conversation history by deleting old messages");
4571
+ consola.info("");
4572
+ }
4573
+
4574
+ //#endregion
4575
+ //#region src/routes/shared/response.ts
4576
+ /** Type guard for non-streaming responses */
4577
+ function isNonStreaming(response) {
4578
+ return Object.hasOwn(response, "choices");
4579
+ }
4580
+
4581
+ //#endregion
4582
+ //#region src/routes/shared/tracking.ts
4405
4583
  /** Helper to update tracker model */
4406
4584
  function updateTrackerModel(trackingId, model) {
4407
4585
  if (!trackingId) return;
@@ -4413,40 +4591,6 @@ function updateTrackerStatus(trackingId, status) {
4413
4591
  if (!trackingId) return;
4414
4592
  requestTracker.updateRequest(trackingId, { status });
4415
4593
  }
4416
- /** Record error response to history, preserving full error details for debugging */
4417
- function recordErrorResponse(ctx, model, error) {
4418
- const errorMessage = getErrorMessage(error);
4419
- let content = null;
4420
- if (error instanceof Error && "responseText" in error && typeof error.responseText === "string") {
4421
- const responseText = error.responseText;
4422
- const status = "status" in error ? error.status : void 0;
4423
- if (responseText) {
4424
- let formattedBody;
4425
- try {
4426
- formattedBody = JSON.stringify(JSON.parse(responseText), null, 2);
4427
- } catch {
4428
- formattedBody = responseText;
4429
- }
4430
- content = {
4431
- role: "assistant",
4432
- content: [{
4433
- type: "text",
4434
- text: `[API Error Response${status ? ` - HTTP ${status}` : ""}]\n\n${formattedBody}`
4435
- }]
4436
- };
4437
- }
4438
- }
4439
- recordResponse(ctx.historyId, {
4440
- success: false,
4441
- model,
4442
- usage: {
4443
- input_tokens: 0,
4444
- output_tokens: 0
4445
- },
4446
- error: errorMessage,
4447
- content
4448
- }, Date.now() - ctx.startTime);
4449
- }
4450
4594
  /** Complete TUI tracking */
4451
4595
  function completeTracking(trackingId, inputTokens, outputTokens, queueWaitMs) {
4452
4596
  if (!trackingId) return;
@@ -4465,16 +4609,39 @@ function failTracking(trackingId, error) {
4465
4609
  if (!trackingId) return;
4466
4610
  requestTracker.failRequest(trackingId, getErrorMessage(error, "Stream error"));
4467
4611
  }
4468
- /**
4469
- * Create a marker to prepend to responses indicating auto-truncation occurred.
4470
- * Works with both OpenAI and Anthropic truncate results.
4471
- */
4472
- function createTruncationMarker$1(result) {
4473
- if (!result.wasCompacted) return "";
4474
- const { originalTokens, compactedTokens, removedMessageCount } = result;
4475
- if (originalTokens === void 0 || compactedTokens === void 0 || removedMessageCount === void 0) return `\n\n---\n[Auto-truncated: conversation history was reduced to fit context limits]`;
4476
- const reduction = originalTokens - compactedTokens;
4477
- return `\n\n---\n[Auto-truncated: ${removedMessageCount} messages removed, ${originalTokens} → ${compactedTokens} tokens (${Math.round(reduction / originalTokens * 100)}% reduction)]`;
4612
+ /** Record error response to history, preserving full error details for debugging */
4613
+ function recordErrorResponse(ctx, model, error) {
4614
+ const errorMessage = getErrorMessage(error);
4615
+ let content = null;
4616
+ if (error instanceof Error && "responseText" in error && typeof error.responseText === "string") {
4617
+ const responseText = error.responseText;
4618
+ const status = "status" in error ? error.status : void 0;
4619
+ if (responseText) {
4620
+ let formattedBody;
4621
+ try {
4622
+ formattedBody = JSON.stringify(JSON.parse(responseText), null, 2);
4623
+ } catch {
4624
+ formattedBody = responseText;
4625
+ }
4626
+ content = {
4627
+ role: "assistant",
4628
+ content: [{
4629
+ type: "text",
4630
+ text: `[API Error Response${status ? ` - HTTP ${status}` : ""}]\n\n${formattedBody}`
4631
+ }]
4632
+ };
4633
+ }
4634
+ }
4635
+ recordResponse(ctx.historyId, {
4636
+ success: false,
4637
+ model,
4638
+ usage: {
4639
+ input_tokens: 0,
4640
+ output_tokens: 0
4641
+ },
4642
+ error: errorMessage,
4643
+ content
4644
+ }, Date.now() - ctx.startTime);
4478
4645
  }
4479
4646
  /** Record streaming error to history, preserving any data accumulated before the error */
4480
4647
  function recordStreamError(opts) {
@@ -4496,66 +4663,168 @@ function recordStreamError(opts) {
4496
4663
  } : null
4497
4664
  }, Date.now() - ctx.startTime);
4498
4665
  }
4499
- /** Type guard for non-streaming responses */
4500
- function isNonStreaming(response) {
4501
- return Object.hasOwn(response, "choices");
4502
- }
4503
- /** Build final payload with sanitization (no pre-truncation truncation is now reactive) */
4504
- function buildFinalPayload(payload, _model) {
4505
- const { payload: sanitizedPayload, removedCount: sanitizeRemovedCount, systemReminderRemovals } = sanitizeOpenAIMessages(payload);
4506
- return {
4507
- finalPayload: sanitizedPayload,
4508
- truncateResult: null,
4509
- sanitizeRemovedCount,
4510
- systemReminderRemovals
4511
- };
4666
+
4667
+ //#endregion
4668
+ //#region src/routes/shared/truncation.ts
4669
+ /**
4670
+ * Create a marker to prepend to responses indicating auto-truncation occurred.
4671
+ * Works with both OpenAI and Anthropic truncate results.
4672
+ */
4673
+ function createTruncationMarker$1(result) {
4674
+ if (!result.wasCompacted) return "";
4675
+ const { originalTokens, compactedTokens, removedMessageCount } = result;
4676
+ if (originalTokens === void 0 || compactedTokens === void 0 || removedMessageCount === void 0) return `\n\n---\n[Auto-truncated: conversation history was reduced to fit context limits]`;
4677
+ const reduction = originalTokens - compactedTokens;
4678
+ return `\n\n---\n[Auto-truncated: ${removedMessageCount} messages removed, ${originalTokens} → ${compactedTokens} tokens (${Math.round(reduction / originalTokens * 100)}% reduction)]`;
4512
4679
  }
4680
+
4681
+ //#endregion
4682
+ //#region src/routes/shared/pipeline.ts
4513
4683
  /**
4514
- * Log helpful debugging information when a 413 error occurs.
4515
- * Also adjusts the dynamic byte limit for future requests.
4684
+ * Request execution pipeline with pluggable retry strategies.
4685
+ *
4686
+ * Unifies the retry loop pattern shared by direct-anthropic-handler,
4687
+ * translated-handler, and (soon) completions handler.
4516
4688
  */
4517
- async function logPayloadSizeInfo(payload, model) {
4518
- const messageCount = payload.messages.length;
4519
- const bodySize = JSON.stringify(payload).length;
4520
- const bodySizeKB = bytesToKB(bodySize);
4521
- onRequestTooLarge(bodySize);
4522
- let imageCount = 0;
4523
- let largeMessages = 0;
4524
- let totalImageSize = 0;
4525
- for (const msg of payload.messages) {
4526
- if (Array.isArray(msg.content)) {
4527
- for (const part of msg.content) if (part.type === "image_url") {
4528
- imageCount++;
4529
- if (part.image_url.url.startsWith("data:")) totalImageSize += part.image_url.url.length;
4689
+ /**
4690
+ * Execute a request through the pipeline with retry strategies.
4691
+ *
4692
+ * Flow:
4693
+ * 1. Execute API call with the current payload
4694
+ * 2. On success → return response
4695
+ * 3. On failure → classify error → find first matching strategy → handle
4696
+ * - retry use new payload, loop back to step 1
4697
+ * - abort or no strategy → throw error
4698
+ */
4699
+ async function executeRequestPipeline(opts) {
4700
+ const { adapter, strategies, originalPayload, model, maxRetries = 3, onBeforeAttempt, onRetry } = opts;
4701
+ let effectivePayload = opts.payload;
4702
+ let lastError = null;
4703
+ let totalQueueWaitMs = 0;
4704
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
4705
+ onBeforeAttempt?.(attempt, effectivePayload);
4706
+ try {
4707
+ const { result: response, queueWaitMs } = await adapter.execute(effectivePayload);
4708
+ totalQueueWaitMs += queueWaitMs;
4709
+ return {
4710
+ response,
4711
+ effectivePayload,
4712
+ queueWaitMs: totalQueueWaitMs,
4713
+ totalRetries: attempt
4714
+ };
4715
+ } catch (error) {
4716
+ lastError = error;
4717
+ if (attempt >= maxRetries) break;
4718
+ const apiError = classifyError(error);
4719
+ let handled = false;
4720
+ for (const strategy of strategies) {
4721
+ if (!strategy.canHandle(apiError)) continue;
4722
+ const retryContext = {
4723
+ attempt,
4724
+ originalPayload,
4725
+ model,
4726
+ maxRetries
4727
+ };
4728
+ try {
4729
+ const action = await strategy.handle(apiError, effectivePayload, retryContext);
4730
+ if (action.action === "retry") {
4731
+ consola.debug(`[Pipeline] Strategy "${strategy.name}" requests retry (attempt ${attempt + 1}/${maxRetries + 1})`);
4732
+ if (action.waitMs && action.waitMs > 0) totalQueueWaitMs += action.waitMs;
4733
+ effectivePayload = action.payload;
4734
+ onRetry?.(attempt, strategy.name, action.payload, action.meta);
4735
+ handled = true;
4736
+ break;
4737
+ }
4738
+ break;
4739
+ } catch (strategyError) {
4740
+ consola.warn(`[Pipeline] Strategy "${strategy.name}" failed on attempt ${attempt + 1}:`, strategyError instanceof Error ? strategyError.message : strategyError);
4741
+ break;
4742
+ }
4530
4743
  }
4744
+ if (!handled) break;
4531
4745
  }
4532
- if ((typeof msg.content === "string" ? msg.content.length : JSON.stringify(msg.content).length) > 5e4) largeMessages++;
4533
- }
4534
- consola.info("");
4535
- consola.info("╭─────────────────────────────────────────────────────────╮");
4536
- consola.info("│ 413 Request Entity Too Large │");
4537
- consola.info("╰─────────────────────────────────────────────────────────╯");
4538
- consola.info("");
4539
- consola.info(` Request body size: ${bodySizeKB} KB (${bodySize.toLocaleString()} bytes)`);
4540
- consola.info(` Message count: ${messageCount}`);
4541
- if (model) try {
4542
- const tokenCount = await getTokenCount(payload, model);
4543
- const limit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
4544
- consola.info(` Estimated tokens: ${tokenCount.input.toLocaleString()} / ${limit.toLocaleString()}`);
4545
- } catch (error) {
4546
- consola.debug("Token count estimation failed:", error);
4547
4746
  }
4548
- if (imageCount > 0) {
4549
- const imageSizeKB = bytesToKB(totalImageSize);
4550
- consola.info(` Images: ${imageCount} (${imageSizeKB} KB base64 data)`);
4747
+ if (lastError) {
4748
+ if (classifyError(lastError).type === "payload_too_large") await adapter.logPayloadSize(effectivePayload);
4749
+ throw lastError instanceof Error ? lastError : /* @__PURE__ */ new Error("Unknown error");
4551
4750
  }
4552
- if (largeMessages > 0) consola.info(` Large messages (>50KB): ${largeMessages}`);
4553
- consola.info("");
4554
- consola.info(" Suggestions:");
4555
- if (imageCount > 0) consola.info(" • Remove or resize large images in the conversation");
4556
- consola.info(" • Start a new conversation with /clear or /reset");
4557
- consola.info(" • Reduce conversation history by deleting old messages");
4558
- consola.info("");
4751
+ throw new Error("Unexpected state in pipeline retry loop");
4752
+ }
4753
+
4754
+ //#endregion
4755
+ //#region src/routes/shared/strategies/auto-truncate.ts
4756
+ /**
4757
+ * Auto-truncate retry strategy.
4758
+ *
4759
+ * Handles 413 (body too large) and token limit errors by truncating the
4760
+ * message payload and retrying.
4761
+ */
4762
+ /**
4763
+ * Create an auto-truncate retry strategy.
4764
+ *
4765
+ * @param truncate - Format-specific truncation function
4766
+ * @param resanitize - Format-specific re-sanitization after truncation
4767
+ * @param isEnabled - Check if auto-truncate is enabled (typically reads state.autoTruncate)
4768
+ */
4769
+ function createAutoTruncateStrategy(opts) {
4770
+ const { truncate, resanitize, isEnabled, label } = opts;
4771
+ return {
4772
+ name: "auto-truncate",
4773
+ canHandle(error) {
4774
+ if (!isEnabled()) return false;
4775
+ return error.type === "payload_too_large" || error.type === "token_limit";
4776
+ },
4777
+ async handle(error, currentPayload, context) {
4778
+ const { attempt, originalPayload, model, maxRetries } = context;
4779
+ if (!model) return {
4780
+ action: "abort",
4781
+ error
4782
+ };
4783
+ const rawError = error.raw;
4784
+ if (!(rawError instanceof HTTPError)) return {
4785
+ action: "abort",
4786
+ error
4787
+ };
4788
+ const payloadBytes = JSON.stringify(currentPayload).length;
4789
+ const parsed = tryParseAndLearnLimit(rawError, model.id, payloadBytes);
4790
+ if (!parsed) return {
4791
+ action: "abort",
4792
+ error
4793
+ };
4794
+ let targetTokenLimit;
4795
+ let targetByteLimitBytes;
4796
+ if (parsed.type === "token_limit" && parsed.limit) {
4797
+ targetTokenLimit = Math.floor(parsed.limit * AUTO_TRUNCATE_RETRY_FACTOR);
4798
+ consola.info(`[${label}] Attempt ${attempt + 1}/${maxRetries + 1}: Token limit error (${parsed.current}>${parsed.limit}), retrying with limit ${targetTokenLimit}...`);
4799
+ } else if (parsed.type === "body_too_large") {
4800
+ targetByteLimitBytes = Math.floor(payloadBytes * AUTO_TRUNCATE_RETRY_FACTOR);
4801
+ consola.info(`[${label}] Attempt ${attempt + 1}/${maxRetries + 1}: Body too large (${bytesToKB(payloadBytes)}KB), retrying with limit ${bytesToKB(targetByteLimitBytes)}KB...`);
4802
+ }
4803
+ const truncateResult = await truncate(originalPayload, model, {
4804
+ checkTokenLimit: true,
4805
+ checkByteLimit: true,
4806
+ targetTokenLimit,
4807
+ targetByteLimitBytes
4808
+ });
4809
+ if (!truncateResult.wasCompacted) return {
4810
+ action: "abort",
4811
+ error
4812
+ };
4813
+ const sanitizeResult = resanitize(truncateResult.payload);
4814
+ return {
4815
+ action: "retry",
4816
+ payload: sanitizeResult.payload,
4817
+ meta: {
4818
+ truncateResult,
4819
+ sanitization: {
4820
+ removedCount: sanitizeResult.removedCount,
4821
+ systemReminderRemovals: sanitizeResult.systemReminderRemovals
4822
+ },
4823
+ attempt: attempt + 1
4824
+ }
4825
+ };
4826
+ }
4827
+ };
4559
4828
  }
4560
4829
 
4561
4830
  //#endregion
@@ -4606,19 +4875,46 @@ async function handleCompletion$1(c) {
4606
4875
  return executeRequest({
4607
4876
  c,
4608
4877
  payload,
4878
+ originalPayload,
4609
4879
  selectedModel,
4610
4880
  ctx,
4611
4881
  trackingId
4612
4882
  });
4613
4883
  }
4614
4884
  /**
4615
- * Execute the API call with enhanced error handling for 413 errors.
4885
+ * Execute the API call with reactive retry pipeline.
4886
+ * Handles 413 and token limit errors with auto-truncation.
4616
4887
  */
4617
4888
  async function executeRequest(opts) {
4618
- const { c, payload, selectedModel, ctx, trackingId } = opts;
4889
+ const { c, payload, originalPayload, selectedModel, ctx, trackingId } = opts;
4890
+ const adapter = {
4891
+ format: "openai",
4892
+ sanitize: (p) => sanitizeOpenAIMessages(p),
4893
+ execute: (p) => executeWithAdaptiveRateLimit(() => createChatCompletions(p)),
4894
+ logPayloadSize: (p) => logPayloadSizeInfo(p, selectedModel)
4895
+ };
4896
+ const strategies = [createAutoTruncateStrategy({
4897
+ truncate: (p, model, truncOpts) => autoTruncateOpenAI(p, model, truncOpts),
4898
+ resanitize: (p) => sanitizeOpenAIMessages(p),
4899
+ isEnabled: () => state.autoTruncate,
4900
+ label: "Completions"
4901
+ })];
4619
4902
  try {
4620
- const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(payload));
4621
- ctx.queueWaitMs = queueWaitMs;
4903
+ const result = await executeRequestPipeline({
4904
+ adapter,
4905
+ strategies,
4906
+ payload,
4907
+ originalPayload,
4908
+ model: selectedModel,
4909
+ maxRetries: MAX_AUTO_TRUNCATE_RETRIES,
4910
+ onRetry: (attempt, _strategyName, _newPayload, meta) => {
4911
+ const retryTruncateResult = meta?.truncateResult;
4912
+ if (retryTruncateResult) ctx.truncateResult = retryTruncateResult;
4913
+ if (trackingId) requestTracker.updateRequest(trackingId, { tags: ["compact", `retry-${attempt + 1}`] });
4914
+ }
4915
+ });
4916
+ ctx.queueWaitMs = result.queueWaitMs;
4917
+ const response = result.response;
4622
4918
  if (isNonStreaming(response)) return handleNonStreamingResponse$1(c, response, ctx);
4623
4919
  consola.debug("Streaming response");
4624
4920
  updateTrackerStatus(trackingId, "streaming");
@@ -4631,7 +4927,6 @@ async function executeRequest(opts) {
4631
4927
  });
4632
4928
  });
4633
4929
  } catch (error) {
4634
- if (error instanceof HTTPError && error.status === 413) await logPayloadSizeInfo(payload, selectedModel);
4635
4930
  recordErrorResponse(ctx, payload.model, error);
4636
4931
  throw error;
4637
4932
  }
@@ -5715,7 +6010,7 @@ function mapOpenAIStopReasonToAnthropic(finishReason) {
5715
6010
  }
5716
6011
 
5717
6012
  //#endregion
5718
- //#region src/routes/messages/non-stream-translation.ts
6013
+ //#region src/lib/translation/non-stream.ts
5719
6014
  const OPENAI_TOOL_NAME_LIMIT = 64;
5720
6015
  /**
5721
6016
  * Ensure all tool_use blocks have corresponding tool_result responses,
@@ -6650,7 +6945,47 @@ function handleMessageDelta(delta, usage, acc) {
6650
6945
  }
6651
6946
 
6652
6947
  //#endregion
6653
- //#region src/routes/messages/stream-translation.ts
6948
+ //#region src/lib/translation/message-mapping.ts
6949
+ /**
6950
+ * Check if two messages likely correspond to the same original message.
6951
+ * Used by buildMessageMapping to handle cases where sanitization removes
6952
+ * content blocks within a message (changing its shape) or removes entire messages.
6953
+ */
6954
+ function messagesMatch(orig, rewritten) {
6955
+ if (orig.role !== rewritten.role) return false;
6956
+ if (typeof orig.content === "string" && typeof rewritten.content === "string") return rewritten.content.startsWith(orig.content.slice(0, 100)) || orig.content.startsWith(rewritten.content.slice(0, 100));
6957
+ const origBlocks = Array.isArray(orig.content) ? orig.content : [];
6958
+ const rwBlocks = Array.isArray(rewritten.content) ? rewritten.content : [];
6959
+ if (origBlocks.length === 0 || rwBlocks.length === 0) return true;
6960
+ const ob = origBlocks[0];
6961
+ const rb = rwBlocks[0];
6962
+ if (ob.type !== rb.type) return false;
6963
+ if (ob.type === "tool_use" && rb.type === "tool_use") return ob.id === rb.id;
6964
+ if (ob.type === "tool_result" && rb.type === "tool_result") return ob.tool_use_id === rb.tool_use_id;
6965
+ return true;
6966
+ }
6967
+ /**
6968
+ * Build messageMapping (rwIdx → origIdx) for the direct Anthropic path.
6969
+ * Uses a two-pointer approach since rewritten messages maintain the same relative
6970
+ * order as originals (all transformations are deletions, never reorderings).
6971
+ */
6972
+ function buildMessageMapping(original, rewritten) {
6973
+ const mapping = [];
6974
+ let origIdx = 0;
6975
+ for (const element of rewritten) while (origIdx < original.length) {
6976
+ if (messagesMatch(original[origIdx], element)) {
6977
+ mapping.push(origIdx);
6978
+ origIdx++;
6979
+ break;
6980
+ }
6981
+ origIdx++;
6982
+ }
6983
+ while (mapping.length < rewritten.length) mapping.push(-1);
6984
+ return mapping;
6985
+ }
6986
+
6987
+ //#endregion
6988
+ //#region src/lib/translation/stream.ts
6654
6989
  function isToolBlockOpen(state) {
6655
6990
  if (!state.contentBlockOpen) return false;
6656
6991
  return Object.values(state.toolCalls).some((tc) => tc.anthropicBlockIndex === state.contentBlockIndex);
@@ -6823,12 +7158,57 @@ async function handleDirectAnthropicCompletion(c, anthropicPayload, ctx) {
6823
7158
  if (initialSanitized.thinking && initialSanitized.thinking.type !== "disabled") tags.push(`thinking:${initialSanitized.thinking.type}`);
6824
7159
  if (tags.length > 0) requestTracker.updateRequest(ctx.trackingId, { tags });
6825
7160
  }
6826
- let effectivePayload = initialSanitized;
7161
+ const adapter = {
7162
+ format: "anthropic",
7163
+ sanitize: (p) => sanitizeAnthropicMessages(p),
7164
+ execute: (p) => executeWithAdaptiveRateLimit(() => createAnthropicMessages(p)),
7165
+ logPayloadSize: (p) => logPayloadSizeInfoAnthropic(p, selectedModel)
7166
+ };
7167
+ const strategies = [createAutoTruncateStrategy({
7168
+ truncate: (p, model, opts) => autoTruncateAnthropic(p, model, opts),
7169
+ resanitize: (p) => sanitizeAnthropicMessages(p),
7170
+ isEnabled: () => state.autoTruncate,
7171
+ label: "Anthropic"
7172
+ })];
6827
7173
  let truncateResult;
6828
- let lastError = null;
6829
- for (let attempt = 0; attempt <= MAX_AUTO_TRUNCATE_RETRIES; attempt++) try {
6830
- const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createAnthropicMessages(effectivePayload));
6831
- ctx.queueWaitMs = queueWaitMs;
7174
+ try {
7175
+ const result = await executeRequestPipeline({
7176
+ adapter,
7177
+ strategies,
7178
+ payload: initialSanitized,
7179
+ originalPayload: anthropicPayload,
7180
+ model: selectedModel,
7181
+ maxRetries: MAX_AUTO_TRUNCATE_RETRIES,
7182
+ onRetry: (_attempt, _strategyName, newPayload, meta) => {
7183
+ const retryTruncateResult = meta?.truncateResult;
7184
+ if (retryTruncateResult) truncateResult = retryTruncateResult;
7185
+ const retrySanitization = meta?.sanitization;
7186
+ const retryMessageMapping = buildMessageMapping(anthropicPayload.messages, newPayload.messages);
7187
+ recordRewrites(ctx.historyId, {
7188
+ truncation: retryTruncateResult ? {
7189
+ removedMessageCount: retryTruncateResult.removedMessageCount,
7190
+ originalTokens: retryTruncateResult.originalTokens,
7191
+ compactedTokens: retryTruncateResult.compactedTokens,
7192
+ processingTimeMs: retryTruncateResult.processingTimeMs
7193
+ } : void 0,
7194
+ sanitization: retrySanitization && (retrySanitization.removedCount > 0 || retrySanitization.systemReminderRemovals > 0) ? {
7195
+ removedBlockCount: retrySanitization.removedCount,
7196
+ systemReminderRemovals: retrySanitization.systemReminderRemovals
7197
+ } : void 0,
7198
+ rewrittenMessages: convertAnthropicMessages(newPayload.messages),
7199
+ rewrittenSystem: typeof newPayload.system === "string" ? newPayload.system : void 0,
7200
+ messageMapping: retryMessageMapping
7201
+ });
7202
+ if (ctx.trackingId) {
7203
+ const retryTags = ["compact", `retry-${meta?.attempt ?? 1}`];
7204
+ if (newPayload.thinking && newPayload.thinking.type !== "disabled") retryTags.push(`thinking:${newPayload.thinking.type}`);
7205
+ requestTracker.updateRequest(ctx.trackingId, { tags: retryTags });
7206
+ }
7207
+ }
7208
+ });
7209
+ ctx.queueWaitMs = result.queueWaitMs;
7210
+ const response = result.response;
7211
+ const effectivePayload = result.effectivePayload;
6832
7212
  if (Symbol.asyncIterator in response) {
6833
7213
  consola.debug("Streaming response from Copilot (direct Anthropic)");
6834
7214
  updateTrackerStatus(ctx.trackingId, "streaming");
@@ -6843,67 +7223,9 @@ async function handleDirectAnthropicCompletion(c, anthropicPayload, ctx) {
6843
7223
  }
6844
7224
  return handleDirectAnthropicNonStreamingResponse(c, response, ctx, truncateResult);
6845
7225
  } catch (error) {
6846
- lastError = error;
6847
- if (state.autoTruncate && error instanceof HTTPError && selectedModel && attempt < MAX_AUTO_TRUNCATE_RETRIES) {
6848
- const payloadBytes = JSON.stringify(effectivePayload).length;
6849
- const parsed = tryParseAndLearnLimit(error, selectedModel.id, payloadBytes);
6850
- if (parsed) {
6851
- let targetTokenLimit;
6852
- let targetByteLimitBytes;
6853
- if (parsed.type === "token_limit" && parsed.limit) {
6854
- targetTokenLimit = Math.floor(parsed.limit * AUTO_TRUNCATE_RETRY_FACTOR);
6855
- consola.info(`[Anthropic] Attempt ${attempt + 1}/${MAX_AUTO_TRUNCATE_RETRIES + 1}: Token limit error (${parsed.current}>${parsed.limit}), retrying with limit ${targetTokenLimit}...`);
6856
- } else if (parsed.type === "body_too_large") {
6857
- targetByteLimitBytes = Math.floor(payloadBytes * AUTO_TRUNCATE_RETRY_FACTOR);
6858
- consola.info(`[Anthropic] Attempt ${attempt + 1}/${MAX_AUTO_TRUNCATE_RETRIES + 1}: Body too large (${bytesToKB(payloadBytes)}KB), retrying with limit ${bytesToKB(targetByteLimitBytes)}KB...`);
6859
- }
6860
- try {
6861
- truncateResult = await autoTruncateAnthropic(anthropicPayload, selectedModel, {
6862
- checkTokenLimit: true,
6863
- checkByteLimit: true,
6864
- targetTokenLimit,
6865
- targetByteLimitBytes
6866
- });
6867
- if (truncateResult.wasCompacted) {
6868
- const { payload: retrySanitized, removedCount: retryOrphanedRemovals, systemReminderRemovals: retrySystemRemovals } = sanitizeAnthropicMessages(truncateResult.payload);
6869
- effectivePayload = retrySanitized;
6870
- const retryMessageMapping = buildMessageMapping(anthropicPayload.messages, effectivePayload.messages);
6871
- recordRewrites(ctx.historyId, {
6872
- truncation: {
6873
- removedMessageCount: truncateResult.removedMessageCount,
6874
- originalTokens: truncateResult.originalTokens,
6875
- compactedTokens: truncateResult.compactedTokens,
6876
- processingTimeMs: truncateResult.processingTimeMs
6877
- },
6878
- sanitization: retryOrphanedRemovals > 0 || retrySystemRemovals > 0 ? {
6879
- removedBlockCount: retryOrphanedRemovals,
6880
- systemReminderRemovals: retrySystemRemovals
6881
- } : void 0,
6882
- rewrittenMessages: convertAnthropicMessages(effectivePayload.messages),
6883
- rewrittenSystem: typeof effectivePayload.system === "string" ? effectivePayload.system : void 0,
6884
- messageMapping: retryMessageMapping
6885
- });
6886
- if (ctx.trackingId) {
6887
- const retryTags = ["compact", `retry-${attempt + 1}`];
6888
- if (effectivePayload.thinking && effectivePayload.thinking.type !== "disabled") retryTags.push(`thinking:${effectivePayload.thinking.type}`);
6889
- requestTracker.updateRequest(ctx.trackingId, { tags: retryTags });
6890
- }
6891
- continue;
6892
- } else break;
6893
- } catch (truncateError) {
6894
- consola.warn(`[Anthropic] Auto-truncate failed on attempt ${attempt + 1}:`, truncateError instanceof Error ? truncateError.message : truncateError);
6895
- break;
6896
- }
6897
- }
6898
- }
6899
- break;
6900
- }
6901
- if (lastError) {
6902
- if (lastError instanceof HTTPError && lastError.status === 413) logPayloadSizeInfoAnthropic(effectivePayload, selectedModel);
6903
- recordErrorResponse(ctx, anthropicPayload.model, lastError);
6904
- throw lastError instanceof Error ? lastError : /* @__PURE__ */ new Error("Unknown error");
7226
+ recordErrorResponse(ctx, anthropicPayload.model, error);
7227
+ throw error;
6905
7228
  }
6906
- throw new Error("Unexpected state in retry loop");
6907
7229
  }
6908
7230
  /**
6909
7231
  * Log payload size info for debugging 413 errors
@@ -7078,43 +7400,6 @@ function recordStreamingResponse$1(acc, fallbackModel, ctx) {
7078
7400
  toolCalls
7079
7401
  }, Date.now() - ctx.startTime);
7080
7402
  }
7081
- /**
7082
- * Check if two messages likely correspond to the same original message.
7083
- * Used by buildMessageMapping to handle cases where sanitization removes
7084
- * content blocks within a message (changing its shape) or removes entire messages.
7085
- */
7086
- function messagesMatch(orig, rewritten) {
7087
- if (orig.role !== rewritten.role) return false;
7088
- if (typeof orig.content === "string" && typeof rewritten.content === "string") return rewritten.content.startsWith(orig.content.slice(0, 100)) || orig.content.startsWith(rewritten.content.slice(0, 100));
7089
- const origBlocks = Array.isArray(orig.content) ? orig.content : [];
7090
- const rwBlocks = Array.isArray(rewritten.content) ? rewritten.content : [];
7091
- if (origBlocks.length === 0 || rwBlocks.length === 0) return true;
7092
- const ob = origBlocks[0];
7093
- const rb = rwBlocks[0];
7094
- if (ob.type !== rb.type) return false;
7095
- if (ob.type === "tool_use" && rb.type === "tool_use") return ob.id === rb.id;
7096
- if (ob.type === "tool_result" && rb.type === "tool_result") return ob.tool_use_id === rb.tool_use_id;
7097
- return true;
7098
- }
7099
- /**
7100
- * Build messageMapping (rwIdx → origIdx) for the direct Anthropic path.
7101
- * Uses a two-pointer approach since rewritten messages maintain the same relative
7102
- * order as originals (all transformations are deletions, never reorderings).
7103
- */
7104
- function buildMessageMapping(original, rewritten) {
7105
- const mapping = [];
7106
- let origIdx = 0;
7107
- for (const element of rewritten) while (origIdx < original.length) {
7108
- if (messagesMatch(original[origIdx], element)) {
7109
- mapping.push(origIdx);
7110
- origIdx++;
7111
- break;
7112
- }
7113
- origIdx++;
7114
- }
7115
- while (mapping.length < rewritten.length) mapping.push(-1);
7116
- return mapping;
7117
- }
7118
7403
 
7119
7404
  //#endregion
7120
7405
  //#region src/routes/messages/translated-handler.ts
@@ -7152,11 +7437,38 @@ async function handleTranslatedCompletion(c, anthropicPayload, ctx) {
7152
7437
  if (anthropicPayload.thinking && anthropicPayload.thinking.type !== "disabled") tags.push(`thinking:${anthropicPayload.thinking.type}`);
7153
7438
  if (tags.length > 0) requestTracker.updateRequest(ctx.trackingId, { tags });
7154
7439
  }
7155
- let effectivePayload = initialOpenAIPayload;
7156
- let lastError = null;
7157
- for (let attempt = 0; attempt <= MAX_AUTO_TRUNCATE_RETRIES; attempt++) try {
7158
- const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(effectivePayload));
7159
- ctx.queueWaitMs = queueWaitMs;
7440
+ const adapter = {
7441
+ format: "openai",
7442
+ sanitize: (p) => sanitizeOpenAIMessages(p),
7443
+ execute: (p) => executeWithAdaptiveRateLimit(() => createChatCompletions(p)),
7444
+ logPayloadSize: (p) => logPayloadSizeInfo(p, selectedModel)
7445
+ };
7446
+ const strategies = [createAutoTruncateStrategy({
7447
+ truncate: (p, model, opts) => autoTruncateOpenAI(p, model, opts),
7448
+ resanitize: (p) => sanitizeOpenAIMessages(p),
7449
+ isEnabled: () => state.autoTruncate,
7450
+ label: "Translated"
7451
+ })];
7452
+ try {
7453
+ const result = await executeRequestPipeline({
7454
+ adapter,
7455
+ strategies,
7456
+ payload: initialOpenAIPayload,
7457
+ originalPayload: translatedPayload,
7458
+ model: selectedModel,
7459
+ maxRetries: MAX_AUTO_TRUNCATE_RETRIES,
7460
+ onRetry: (attempt, _strategyName, _newPayload, meta) => {
7461
+ const retryTruncateResult = meta?.truncateResult;
7462
+ if (retryTruncateResult) ctx.truncateResult = retryTruncateResult;
7463
+ if (ctx.trackingId) {
7464
+ const retryTags = ["compact", `retry-${attempt + 1}`];
7465
+ if (anthropicPayload.thinking && anthropicPayload.thinking.type !== "disabled") retryTags.push(`thinking:${anthropicPayload.thinking.type}`);
7466
+ requestTracker.updateRequest(ctx.trackingId, { tags: retryTags });
7467
+ }
7468
+ }
7469
+ });
7470
+ ctx.queueWaitMs = result.queueWaitMs;
7471
+ const response = result.response;
7160
7472
  if (isNonStreaming(response)) return handleNonStreamingResponse({
7161
7473
  c,
7162
7474
  response,
@@ -7175,52 +7487,9 @@ async function handleTranslatedCompletion(c, anthropicPayload, ctx) {
7175
7487
  });
7176
7488
  });
7177
7489
  } catch (error) {
7178
- lastError = error;
7179
- if (state.autoTruncate && error instanceof HTTPError && selectedModel && attempt < MAX_AUTO_TRUNCATE_RETRIES) {
7180
- const payloadBytes = JSON.stringify(effectivePayload).length;
7181
- const parsed = tryParseAndLearnLimit(error, selectedModel.id, payloadBytes);
7182
- if (parsed) {
7183
- let targetTokenLimit;
7184
- let targetByteLimitBytes;
7185
- if (parsed.type === "token_limit" && parsed.limit) {
7186
- targetTokenLimit = Math.floor(parsed.limit * AUTO_TRUNCATE_RETRY_FACTOR);
7187
- consola.info(`[Translated] Attempt ${attempt + 1}/${MAX_AUTO_TRUNCATE_RETRIES + 1}: Token limit error (${parsed.current}>${parsed.limit}), retrying with limit ${targetTokenLimit}...`);
7188
- } else if (parsed.type === "body_too_large") {
7189
- targetByteLimitBytes = Math.floor(payloadBytes * AUTO_TRUNCATE_RETRY_FACTOR);
7190
- consola.info(`[Translated] Attempt ${attempt + 1}/${MAX_AUTO_TRUNCATE_RETRIES + 1}: Body too large (${bytesToKB(payloadBytes)}KB), retrying with limit ${bytesToKB(targetByteLimitBytes)}KB...`);
7191
- }
7192
- try {
7193
- const retryTruncateResult = await autoTruncateOpenAI(translatedPayload, selectedModel, {
7194
- checkTokenLimit: true,
7195
- checkByteLimit: true,
7196
- targetTokenLimit,
7197
- targetByteLimitBytes
7198
- });
7199
- if (retryTruncateResult.wasCompacted) {
7200
- const { payload: retrySanitized } = sanitizeOpenAIMessages(retryTruncateResult.payload);
7201
- effectivePayload = retrySanitized;
7202
- ctx.truncateResult = retryTruncateResult;
7203
- if (ctx.trackingId) {
7204
- const retryTags = ["compact", `retry-${attempt + 1}`];
7205
- if (anthropicPayload.thinking && anthropicPayload.thinking.type !== "disabled") retryTags.push(`thinking:${anthropicPayload.thinking.type}`);
7206
- requestTracker.updateRequest(ctx.trackingId, { tags: retryTags });
7207
- }
7208
- continue;
7209
- } else break;
7210
- } catch (truncateError) {
7211
- consola.warn(`[Translated] Auto-truncate failed on attempt ${attempt + 1}:`, truncateError instanceof Error ? truncateError.message : truncateError);
7212
- break;
7213
- }
7214
- }
7215
- }
7216
- break;
7217
- }
7218
- if (lastError) {
7219
- if (lastError instanceof HTTPError && lastError.status === 413) await logPayloadSizeInfo(effectivePayload, selectedModel);
7220
- recordErrorResponse(ctx, anthropicPayload.model, lastError);
7221
- throw lastError instanceof Error ? lastError : /* @__PURE__ */ new Error("Unknown error");
7490
+ recordErrorResponse(ctx, anthropicPayload.model, error);
7491
+ throw error;
7222
7492
  }
7223
- throw new Error("Unexpected state in retry loop");
7224
7493
  }
7225
7494
  function handleNonStreamingResponse(opts) {
7226
7495
  const { c, response, toolNameMapping, ctx } = opts;
@@ -7596,6 +7865,25 @@ usageRoute.get("/", async (c) => {
7596
7865
  }
7597
7866
  });
7598
7867
 
7868
+ //#endregion
7869
+ //#region src/routes/index.ts
7870
+ /**
7871
+ * Register all API routes on the given Hono app.
7872
+ */
7873
+ function registerRoutes(app) {
7874
+ app.route("/chat/completions", completionRoutes);
7875
+ app.route("/models", modelRoutes);
7876
+ app.route("/embeddings", embeddingRoutes);
7877
+ app.route("/usage", usageRoute);
7878
+ app.route("/token", tokenRoute);
7879
+ app.route("/v1/chat/completions", completionRoutes);
7880
+ app.route("/v1/models", modelRoutes);
7881
+ app.route("/v1/embeddings", embeddingRoutes);
7882
+ app.route("/v1/messages", messageRoutes);
7883
+ app.route("/api/event_logging", eventLoggingRoutes);
7884
+ app.route("/history", historyRoutes);
7885
+ }
7886
+
7599
7887
  //#endregion
7600
7888
  //#region src/server.ts
7601
7889
  const server = new Hono();
@@ -7622,17 +7910,7 @@ server.get("/health", (c) => {
7622
7910
  }
7623
7911
  }, healthy ? 200 : 503);
7624
7912
  });
7625
- server.route("/chat/completions", completionRoutes);
7626
- server.route("/models", modelRoutes);
7627
- server.route("/embeddings", embeddingRoutes);
7628
- server.route("/usage", usageRoute);
7629
- server.route("/token", tokenRoute);
7630
- server.route("/v1/chat/completions", completionRoutes);
7631
- server.route("/v1/models", modelRoutes);
7632
- server.route("/v1/embeddings", embeddingRoutes);
7633
- server.route("/v1/messages", messageRoutes);
7634
- server.route("/api/event_logging", eventLoggingRoutes);
7635
- server.route("/history", historyRoutes);
7913
+ registerRoutes(server);
7636
7914
 
7637
7915
  //#endregion
7638
7916
  //#region src/start.ts