@hsupu/copilot-api 0.8.1-beta.2 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/config.example.yaml +29 -0
  2. package/dist/main.mjs +3032 -895
  3. package/dist/main.mjs.map +1 -1
  4. package/package.json +10 -5
  5. package/ui/history-v3/dist/assets/LogsPage-CkzMEjbU.css +1 -0
  6. package/ui/history-v3/dist/assets/LogsPage-CyM6vZyg.js +1 -0
  7. package/ui/history-v3/dist/assets/VActivityPage-DGZYvP1d.css +1 -0
  8. package/ui/history-v3/dist/assets/VActivityPage-qSJRqJGU.js +3 -0
  9. package/ui/history-v3/dist/assets/VCard-B2q0rBgl.js +1 -0
  10. package/ui/history-v3/dist/assets/VCard-v_d_q0Rh.css +1 -0
  11. package/ui/history-v3/dist/assets/VConfigPage-DisddwD3.css +1 -0
  12. package/ui/history-v3/dist/assets/VConfigPage-zQu5b6tC.js +1 -0
  13. package/ui/history-v3/dist/assets/VDashboardPage-CsNfE_go.js +1 -0
  14. package/ui/history-v3/dist/assets/VDashboardPage-oAbMfN9y.css +1 -0
  15. package/ui/history-v3/dist/assets/VDialog-B5tAv-4d.js +1 -0
  16. package/ui/history-v3/dist/assets/VDialog-CFhCWS_I.css +1 -0
  17. package/ui/history-v3/dist/assets/VModelsPage-C1y0gG_S.js +1 -0
  18. package/ui/history-v3/dist/assets/VModelsPage-DMPq4SEZ.css +1 -0
  19. package/ui/history-v3/dist/assets/VSelect-CahLC2X3.js +1 -0
  20. package/ui/history-v3/dist/assets/VSelect-CiSADOyl.css +1 -0
  21. package/ui/history-v3/dist/assets/VSheet-BhXOOy9y.js +1 -0
  22. package/ui/history-v3/dist/assets/VSheet-DI_DMcjz.css +1 -0
  23. package/ui/history-v3/dist/assets/index-Cxye_z0t.js +18 -0
  24. package/ui/history-v3/dist/assets/index-sVLYkWM6.css +1 -0
  25. package/ui/history-v3/dist/assets/useLogs-C_hg5wZk.js +1 -0
  26. package/ui/history-v3/dist/assets/{vendor-CmGvxZwr.js → vendor-tLv7SZ-i.js} +1 -1
  27. package/ui/history-v3/dist/assets/vue-McY99x0M.js +1 -0
  28. package/ui/history-v3/dist/assets/ws-status-BJ5xglsi.js +1 -0
  29. package/ui/history-v3/dist/index.html +3 -3
  30. package/ui/history-v3/dist/assets/BaseSelect-CttLMFCN.js +0 -1
  31. package/ui/history-v3/dist/assets/BaseSelect-N-W6HPTu.css +0 -1
  32. package/ui/history-v3/dist/assets/DashboardPage-BYXNxjXb.css +0 -1
  33. package/ui/history-v3/dist/assets/DashboardPage-CgXivoWS.js +0 -1
  34. package/ui/history-v3/dist/assets/DetailPanel-CGpPxDDa.css +0 -1
  35. package/ui/history-v3/dist/assets/DetailPanel-EOQma5fZ.js +0 -3
  36. package/ui/history-v3/dist/assets/HistoryPage-PA0Yh3n3.css +0 -1
  37. package/ui/history-v3/dist/assets/HistoryPage-rhEb_UZG.js +0 -1
  38. package/ui/history-v3/dist/assets/LogsPage-BuPou1cg.css +0 -1
  39. package/ui/history-v3/dist/assets/LogsPage-CNXQuqMj.js +0 -1
  40. package/ui/history-v3/dist/assets/ModelsPage-Bpi7Y9GS.js +0 -1
  41. package/ui/history-v3/dist/assets/ModelsPage-F2KTxq2i.css +0 -1
  42. package/ui/history-v3/dist/assets/ProgressBar-6xzx-ZSc.js +0 -1
  43. package/ui/history-v3/dist/assets/ProgressBar-CtfiTXLy.css +0 -1
  44. package/ui/history-v3/dist/assets/UsagePage-COyq-DOU.css +0 -1
  45. package/ui/history-v3/dist/assets/UsagePage-CZfgTYCP.js +0 -1
  46. package/ui/history-v3/dist/assets/VChip-9UyCCNyg.js +0 -1
  47. package/ui/history-v3/dist/assets/VChip-B_fbAfwz.css +0 -1
  48. package/ui/history-v3/dist/assets/VDashboardPage-DXtj4agW.js +0 -1
  49. package/ui/history-v3/dist/assets/VDashboardPage-axfQtTiR.css +0 -1
  50. package/ui/history-v3/dist/assets/VDivider-D8zdArq0.js +0 -1
  51. package/ui/history-v3/dist/assets/VDivider-DITF6qCr.css +0 -1
  52. package/ui/history-v3/dist/assets/VHistoryPage-Dj0qUmWz.js +0 -1
  53. package/ui/history-v3/dist/assets/VHistoryPage-DqpLWYXo.css +0 -1
  54. package/ui/history-v3/dist/assets/VList-Bf0AJT_N.css +0 -1
  55. package/ui/history-v3/dist/assets/VList-Ct6gdZ-F.js +0 -1
  56. package/ui/history-v3/dist/assets/VLogsPage-BOA_17HS.js +0 -1
  57. package/ui/history-v3/dist/assets/VLogsPage-Dr3my9y3.css +0 -1
  58. package/ui/history-v3/dist/assets/VModelsPage-Bkon7sFs.css +0 -1
  59. package/ui/history-v3/dist/assets/VModelsPage-D-IbiiwR.js +0 -1
  60. package/ui/history-v3/dist/assets/VSpacer-DfbUir7X.css +0 -1
  61. package/ui/history-v3/dist/assets/VSpacer-F4vloCsf.js +0 -1
  62. package/ui/history-v3/dist/assets/VTable-B4qROCQu.js +0 -1
  63. package/ui/history-v3/dist/assets/VTable-BTui1tPX.css +0 -1
  64. package/ui/history-v3/dist/assets/VTooltip-9iBP-JhF.js +0 -1
  65. package/ui/history-v3/dist/assets/VTooltip-C1DKovoh.css +0 -1
  66. package/ui/history-v3/dist/assets/VUsagePage-B8WkBKET.js +0 -1
  67. package/ui/history-v3/dist/assets/VUsagePage-Se7R-H-Y.css +0 -1
  68. package/ui/history-v3/dist/assets/index-B8CP-fZd.css +0 -1
  69. package/ui/history-v3/dist/assets/index-cupXJxSz.js +0 -18
  70. package/ui/history-v3/dist/assets/useInjectedHistoryStore-Dx7UlhLw.js +0 -1
  71. package/ui/history-v3/dist/assets/useLogs-Bz9naVOB.js +0 -1
  72. package/ui/history-v3/dist/assets/usePolling-CRd-nhvF.js +0 -1
  73. package/ui/history-v3/dist/assets/vue-Bmo88J5t.js +0 -1
package/dist/main.mjs CHANGED
@@ -3,20 +3,21 @@ import { defineCommand, runMain } from "citty";
3
3
  import consola, { consola as consola$1 } from "consola";
4
4
  import * as fs$1 from "node:fs/promises";
5
5
  import fs, { access, constants, readFile } from "node:fs/promises";
6
+ import { randomBytes, randomUUID } from "node:crypto";
6
7
  import os, { homedir } from "node:os";
7
8
  import * as path$1 from "node:path";
8
9
  import path, { dirname, join, resolve } from "node:path";
9
10
  import tls from "node:tls";
10
11
  import { getProxyForUrl } from "proxy-from-env";
11
12
  import { SocksClient } from "socks";
12
- import { Agent, ProxyAgent, setGlobalDispatcher } from "undici";
13
- import { randomBytes, randomUUID } from "node:crypto";
13
+ import { Agent, ProxyAgent, WebSocket as WebSocket$1, setGlobalDispatcher } from "undici";
14
14
  import pc from "picocolors";
15
15
  import { existsSync, promises, readFileSync, readdirSync } from "node:fs";
16
16
  import invariant from "tiny-invariant";
17
17
  import { events } from "fetch-event-stream";
18
18
  import { Hono } from "hono";
19
19
  import { streamSSE } from "hono/streaming";
20
+ import { parseDocument } from "yaml";
20
21
  import { cors } from "hono/cors";
21
22
  import { trimTrailingSlash } from "hono/trailing-slash";
22
23
  //#region src/lib/state.ts
@@ -82,47 +83,110 @@ const DEFAULT_MODEL_OVERRIDES = {
82
83
  sonnet: "claude-sonnet-4.6",
83
84
  haiku: "claude-haiku-4.5"
84
85
  };
85
- const mutableState = {
86
- accountType: "individual",
87
- autoTruncate: true,
88
- compressToolResultsBeforeTruncate: true,
89
- contextEditingMode: "off",
86
+ /**
87
+ * Default values for config-managed scalar/runtime fields.
88
+ * Single source of truth for mutableState initialization and resetConfigManagedState().
89
+ * Model overrides continue to use DEFAULT_MODEL_OVERRIDES.
90
+ */
91
+ const CONFIG_MANAGED_DEFAULTS = {
90
92
  stripServerTools: false,
91
93
  immutableThinkingMessages: false,
92
94
  dedupToolCalls: false,
95
+ stripReadToolResultTags: false,
96
+ contextEditingMode: "off",
97
+ contextEditingTrigger: 1e5,
98
+ contextEditingKeepTools: 3,
99
+ contextEditingKeepThinking: 1,
100
+ toolSearchEnabled: true,
101
+ cacheControlMode: "proxied",
102
+ nonDeferredTools: [],
103
+ rewriteSystemReminders: false,
104
+ systemPromptOverrides: [],
105
+ compressToolResultsBeforeTruncate: true,
93
106
  fetchTimeout: 300,
107
+ streamIdleTimeout: 300,
108
+ staleRequestMaxAge: 600,
109
+ modelRefreshInterval: 600,
110
+ shutdownGracefulWait: 60,
111
+ shutdownAbortWait: 120,
94
112
  historyLimit: 200,
95
113
  historyMinEntries: 50,
114
+ normalizeResponsesCallIds: true,
115
+ upstreamWebSocket: false
116
+ };
117
+ function resetConfigManagedState() {
118
+ setAnthropicBehavior({
119
+ stripServerTools: CONFIG_MANAGED_DEFAULTS.stripServerTools,
120
+ immutableThinkingMessages: CONFIG_MANAGED_DEFAULTS.immutableThinkingMessages,
121
+ dedupToolCalls: CONFIG_MANAGED_DEFAULTS.dedupToolCalls,
122
+ stripReadToolResultTags: CONFIG_MANAGED_DEFAULTS.stripReadToolResultTags,
123
+ contextEditingMode: CONFIG_MANAGED_DEFAULTS.contextEditingMode,
124
+ contextEditingTrigger: CONFIG_MANAGED_DEFAULTS.contextEditingTrigger,
125
+ contextEditingKeepTools: CONFIG_MANAGED_DEFAULTS.contextEditingKeepTools,
126
+ contextEditingKeepThinking: CONFIG_MANAGED_DEFAULTS.contextEditingKeepThinking,
127
+ toolSearchEnabled: CONFIG_MANAGED_DEFAULTS.toolSearchEnabled,
128
+ cacheControlMode: CONFIG_MANAGED_DEFAULTS.cacheControlMode,
129
+ nonDeferredTools: [...CONFIG_MANAGED_DEFAULTS.nonDeferredTools],
130
+ rewriteSystemReminders: CONFIG_MANAGED_DEFAULTS.rewriteSystemReminders,
131
+ systemPromptOverrides: [...CONFIG_MANAGED_DEFAULTS.systemPromptOverrides],
132
+ compressToolResultsBeforeTruncate: CONFIG_MANAGED_DEFAULTS.compressToolResultsBeforeTruncate
133
+ });
134
+ setModelOverrides({ ...DEFAULT_MODEL_OVERRIDES });
135
+ setTimeoutConfig({
136
+ fetchTimeout: CONFIG_MANAGED_DEFAULTS.fetchTimeout,
137
+ streamIdleTimeout: CONFIG_MANAGED_DEFAULTS.streamIdleTimeout,
138
+ staleRequestMaxAge: CONFIG_MANAGED_DEFAULTS.staleRequestMaxAge,
139
+ modelRefreshInterval: CONFIG_MANAGED_DEFAULTS.modelRefreshInterval
140
+ });
141
+ setShutdownConfig({
142
+ shutdownGracefulWait: CONFIG_MANAGED_DEFAULTS.shutdownGracefulWait,
143
+ shutdownAbortWait: CONFIG_MANAGED_DEFAULTS.shutdownAbortWait
144
+ });
145
+ setHistoryConfig({
146
+ historyLimit: CONFIG_MANAGED_DEFAULTS.historyLimit,
147
+ historyMinEntries: CONFIG_MANAGED_DEFAULTS.historyMinEntries
148
+ });
149
+ setHistoryMaxEntries(CONFIG_MANAGED_DEFAULTS.historyLimit);
150
+ setResponsesConfig({
151
+ normalizeResponsesCallIds: CONFIG_MANAGED_DEFAULTS.normalizeResponsesCallIds,
152
+ upstreamWebSocket: CONFIG_MANAGED_DEFAULTS.upstreamWebSocket
153
+ });
154
+ }
155
+ const mutableState = {
156
+ accountType: "individual",
157
+ autoTruncate: true,
158
+ compressToolResultsBeforeTruncate: CONFIG_MANAGED_DEFAULTS.compressToolResultsBeforeTruncate,
159
+ contextEditingMode: CONFIG_MANAGED_DEFAULTS.contextEditingMode,
160
+ contextEditingTrigger: CONFIG_MANAGED_DEFAULTS.contextEditingTrigger,
161
+ contextEditingKeepTools: CONFIG_MANAGED_DEFAULTS.contextEditingKeepTools,
162
+ contextEditingKeepThinking: CONFIG_MANAGED_DEFAULTS.contextEditingKeepThinking,
163
+ toolSearchEnabled: CONFIG_MANAGED_DEFAULTS.toolSearchEnabled,
164
+ cacheControlMode: CONFIG_MANAGED_DEFAULTS.cacheControlMode,
165
+ nonDeferredTools: [...CONFIG_MANAGED_DEFAULTS.nonDeferredTools],
166
+ stripServerTools: CONFIG_MANAGED_DEFAULTS.stripServerTools,
167
+ immutableThinkingMessages: CONFIG_MANAGED_DEFAULTS.immutableThinkingMessages,
168
+ dedupToolCalls: CONFIG_MANAGED_DEFAULTS.dedupToolCalls,
169
+ fetchTimeout: CONFIG_MANAGED_DEFAULTS.fetchTimeout,
170
+ historyLimit: CONFIG_MANAGED_DEFAULTS.historyLimit,
171
+ historyMinEntries: CONFIG_MANAGED_DEFAULTS.historyMinEntries,
96
172
  modelIds: /* @__PURE__ */ new Set(),
97
173
  modelIndex: /* @__PURE__ */ new Map(),
98
174
  modelOverrides: { ...DEFAULT_MODEL_OVERRIDES },
99
- rewriteSystemReminders: false,
175
+ rewriteSystemReminders: CONFIG_MANAGED_DEFAULTS.rewriteSystemReminders,
100
176
  showGitHubToken: false,
101
- shutdownAbortWait: 120,
102
- shutdownGracefulWait: 60,
103
- staleRequestMaxAge: 600,
104
- streamIdleTimeout: 300,
105
- systemPromptOverrides: [],
106
- stripReadToolResultTags: false,
107
- normalizeResponsesCallIds: true,
177
+ shutdownAbortWait: CONFIG_MANAGED_DEFAULTS.shutdownAbortWait,
178
+ shutdownGracefulWait: CONFIG_MANAGED_DEFAULTS.shutdownGracefulWait,
179
+ staleRequestMaxAge: CONFIG_MANAGED_DEFAULTS.staleRequestMaxAge,
180
+ modelRefreshInterval: CONFIG_MANAGED_DEFAULTS.modelRefreshInterval,
181
+ streamIdleTimeout: CONFIG_MANAGED_DEFAULTS.streamIdleTimeout,
182
+ systemPromptOverrides: [...CONFIG_MANAGED_DEFAULTS.systemPromptOverrides],
183
+ stripReadToolResultTags: CONFIG_MANAGED_DEFAULTS.stripReadToolResultTags,
184
+ normalizeResponsesCallIds: CONFIG_MANAGED_DEFAULTS.normalizeResponsesCallIds,
185
+ upstreamWebSocket: CONFIG_MANAGED_DEFAULTS.upstreamWebSocket,
108
186
  verbose: false
109
187
  };
110
188
  const state = mutableState;
111
189
  //#endregion
112
- //#region src/lib/utils.ts
113
- const sleep = (ms) => new Promise((resolve) => {
114
- setTimeout(resolve, ms);
115
- });
116
- const isNullish = (value) => value === null || value === void 0;
117
- /** Convert bytes to KB with rounding */
118
- function bytesToKB(bytes) {
119
- return Math.round(bytes / 1024);
120
- }
121
- /** Generate unique ID (timestamp + random) */
122
- function generateId(randomLength = 7) {
123
- return Date.now().toString(36) + Math.random().toString(36).slice(2, 2 + randomLength);
124
- }
125
- //#endregion
126
190
  //#region src/lib/ws/broadcast.ts
127
191
  /** Connected clients indexed by their raw WebSocket instance */
128
192
  const clients = /* @__PURE__ */ new Map();
@@ -332,7 +396,8 @@ const historyIndexes = {
332
396
  summaryIndex: /* @__PURE__ */ new Map(),
333
397
  sessionEntryCount: /* @__PURE__ */ new Map(),
334
398
  sessionModelsSet: /* @__PURE__ */ new Map(),
335
- sessionToolsSet: /* @__PURE__ */ new Map()
399
+ sessionToolsSet: /* @__PURE__ */ new Map(),
400
+ responseSessionIndex: /* @__PURE__ */ new Map()
336
401
  };
337
402
  const historyStatsCache = {
338
403
  dirty: true,
@@ -344,6 +409,7 @@ function resetHistoryIndexes() {
344
409
  historyIndexes.sessionEntryCount.clear();
345
410
  historyIndexes.sessionModelsSet.clear();
346
411
  historyIndexes.sessionToolsSet.clear();
412
+ historyIndexes.responseSessionIndex.clear();
347
413
  }
348
414
  function invalidateHistoryStats() {
349
415
  historyStatsCache.dirty = true;
@@ -400,7 +466,7 @@ function initHistory(enabled, maxEntries) {
400
466
  historyState.maxEntries = maxEntries;
401
467
  historyState.entries = [];
402
468
  historyState.sessions = /* @__PURE__ */ new Map();
403
- historyState.currentSessionId = enabled ? generateId() : "";
469
+ historyState.currentSessionId = "";
404
470
  resetHistoryIndexes();
405
471
  invalidateHistoryStats();
406
472
  }
@@ -438,7 +504,7 @@ function getStats() {
438
504
  const model = entry.response?.model || entry.request.model || "unknown";
439
505
  modelDist[model] = (modelDist[model] || 0) + 1;
440
506
  endpointDist[entry.endpoint] = (endpointDist[entry.endpoint] || 0) + 1;
441
- const date = new Date(entry.timestamp);
507
+ const date = new Date(entry.startedAt);
442
508
  const hourKey = `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, "0")}-${String(date.getDate()).padStart(2, "0")}T${String(date.getHours()).padStart(2, "0")}`;
443
509
  hourlyActivity[hourKey] = (hourlyActivity[hourKey] || 0) + 1;
444
510
  if (entry.response) {
@@ -480,7 +546,7 @@ function exportHistory(format = "json") {
480
546
  const headers = [
481
547
  "id",
482
548
  "session_id",
483
- "timestamp",
549
+ "started_at",
484
550
  "endpoint",
485
551
  "request_model",
486
552
  "message_count",
@@ -495,8 +561,8 @@ function exportHistory(format = "json") {
495
561
  ];
496
562
  const rows = historyState.entries.map((entry) => [
497
563
  entry.id,
498
- entry.sessionId,
499
- formatLocalTimestamp(entry.timestamp),
564
+ entry.sessionId ?? "",
565
+ formatLocalTimestamp(entry.startedAt),
500
566
  entry.endpoint,
501
567
  entry.request.model,
502
568
  entry.request.messages?.length,
@@ -512,6 +578,118 @@ function exportHistory(format = "json") {
512
578
  return [headers.join(","), ...rows.map((row) => row.map((value) => escapeCsvValue(value)).join(","))].join("\n");
513
579
  }
514
580
  //#endregion
581
+ //#region src/lib/history/sessions.ts
582
+ const SESSION_HEADER_CANDIDATES = [
583
+ "x-session-id",
584
+ "x-conversation-id",
585
+ "x-chat-session-id",
586
+ "x-thread-id",
587
+ "x-interaction-id"
588
+ ];
589
+ function normalizeSessionId(value) {
590
+ if (typeof value !== "string") return void 0;
591
+ const trimmed = value.trim();
592
+ return trimmed.length > 0 ? trimmed : void 0;
593
+ }
594
+ function ensureSession(sessionId, endpoint) {
595
+ const existing = historyState.sessions.get(sessionId);
596
+ if (existing) {
597
+ existing.lastActivity = Date.now();
598
+ if (!existing.endpoints.includes(endpoint)) existing.endpoints.push(endpoint);
599
+ historyState.currentSessionId = sessionId;
600
+ return existing;
601
+ }
602
+ const now = Date.now();
603
+ const session = {
604
+ id: sessionId,
605
+ startTime: now,
606
+ lastActivity: now,
607
+ requestCount: 0,
608
+ totalInputTokens: 0,
609
+ totalOutputTokens: 0,
610
+ models: [],
611
+ endpoints: [endpoint]
612
+ };
613
+ historyState.sessions.set(sessionId, session);
614
+ historyIndexes.sessionModelsSet.set(sessionId, /* @__PURE__ */ new Set());
615
+ historyIndexes.sessionToolsSet.set(sessionId, /* @__PURE__ */ new Set());
616
+ historyState.currentSessionId = sessionId;
617
+ return session;
618
+ }
619
+ function getSessionIdFromHeaders(headers) {
620
+ for (const name of SESSION_HEADER_CANDIDATES) {
621
+ const normalized = normalizeSessionId(headers instanceof Headers ? headers.get(name) : headers[name]);
622
+ if (normalized) return normalized;
623
+ }
624
+ }
625
+ function resolveResponseSessionId(previousResponseId) {
626
+ const normalized = normalizeSessionId(previousResponseId);
627
+ if (!normalized) return void 0;
628
+ return historyIndexes.responseSessionIndex.get(normalized) ?? normalized;
629
+ }
630
+ function registerResponseSession(responseId, sessionId) {
631
+ const normalizedResponseId = normalizeSessionId(responseId);
632
+ const normalizedSessionId = normalizeSessionId(sessionId);
633
+ if (!normalizedResponseId || !normalizedSessionId) return;
634
+ historyIndexes.responseSessionIndex.set(normalizedResponseId, normalizedSessionId);
635
+ }
636
+ /**
637
+ * Get or create a tracked session when the caller has a real session identifier.
638
+ * Returns undefined when no trustworthy identifier is available.
639
+ */
640
+ function getCurrentSession(endpoint, sessionId) {
641
+ const normalized = normalizeSessionId(sessionId);
642
+ if (!normalized) return void 0;
643
+ ensureSession(normalized, endpoint);
644
+ return normalized;
645
+ }
646
+ function getSessions() {
647
+ const sessions = Array.from(historyState.sessions.values()).sort((a, b) => b.lastActivity - a.lastActivity);
648
+ return {
649
+ sessions,
650
+ total: sessions.length
651
+ };
652
+ }
653
+ function getSession(id) {
654
+ return historyState.sessions.get(id);
655
+ }
656
+ function getSessionEntries(sessionId, options = {}) {
657
+ const { cursor, limit = 50 } = options;
658
+ const all = historyState.entries.filter((entry) => entry.sessionId === sessionId).sort((a, b) => a.startedAt - b.startedAt);
659
+ const total = all.length;
660
+ let startIdx = 0;
661
+ if (cursor) {
662
+ const cursorIdx = all.findIndex((entry) => entry.id === cursor);
663
+ if (cursorIdx !== -1) startIdx = cursorIdx + 1;
664
+ }
665
+ const entries = all.slice(startIdx, startIdx + limit);
666
+ return {
667
+ entries,
668
+ total,
669
+ nextCursor: startIdx + limit < total ? entries.at(-1)?.id ?? null : null,
670
+ prevCursor: startIdx > 0 ? entries[0]?.id ?? null : null
671
+ };
672
+ }
673
+ function deleteSession(sessionId) {
674
+ if (!historyState.sessions.has(sessionId)) return false;
675
+ const remaining = [];
676
+ for (const entry of historyState.entries) if (entry.sessionId === sessionId) {
677
+ historyIndexes.entryIndex.delete(entry.id);
678
+ historyIndexes.summaryIndex.delete(entry.id);
679
+ } else remaining.push(entry);
680
+ historyState.entries = remaining;
681
+ historyState.sessions.delete(sessionId);
682
+ historyIndexes.sessionEntryCount.delete(sessionId);
683
+ historyIndexes.sessionModelsSet.delete(sessionId);
684
+ historyIndexes.sessionToolsSet.delete(sessionId);
685
+ for (const [responseId, mappedSessionId] of historyIndexes.responseSessionIndex) if (mappedSessionId === sessionId) historyIndexes.responseSessionIndex.delete(responseId);
686
+ invalidateHistoryStats();
687
+ if (historyState.currentSessionId === sessionId) historyState.currentSessionId = "";
688
+ notifySessionDeleted(sessionId);
689
+ notifyStatsUpdated(getStats());
690
+ return true;
691
+ }
692
+ //#endregion
515
693
  //#region src/lib/history/entries.ts
516
694
  /** Extract a preview from the last user message (first 100 chars) */
517
695
  function extractPreviewText(entry) {
@@ -542,9 +720,17 @@ function extractPreviewText(entry) {
542
720
  function toSummary(entry) {
543
721
  return {
544
722
  id: entry.id,
545
- sessionId: entry.sessionId,
546
- timestamp: entry.timestamp,
723
+ ...entry.sessionId ? { sessionId: entry.sessionId } : {},
724
+ rawPath: entry.rawPath,
725
+ startedAt: entry.startedAt,
726
+ endedAt: entry.endedAt,
547
727
  endpoint: entry.endpoint,
728
+ state: entry.state,
729
+ active: entry.active,
730
+ lastUpdatedAt: entry.lastUpdatedAt,
731
+ queueWaitMs: entry.queueWaitMs,
732
+ attemptCount: entry.attemptCount,
733
+ currentStrategy: entry.currentStrategy,
548
734
  requestModel: entry.request.model,
549
735
  stream: entry.request.stream,
550
736
  messageCount: entry.request.messages?.length ?? 0,
@@ -558,6 +744,7 @@ function toSummary(entry) {
558
744
  };
559
745
  }
560
746
  function updateSessionMetadata(entry) {
747
+ if (!entry.sessionId) return;
561
748
  const session = historyState.sessions.get(entry.sessionId);
562
749
  if (!session) return;
563
750
  const model = entry.request.model;
@@ -581,6 +768,30 @@ function updateSessionMetadata(entry) {
581
768
  }
582
769
  }
583
770
  }
771
+ function attachEntryToSession(entry) {
772
+ if (!entry.sessionId) return;
773
+ const sessionId = getCurrentSession(entry.endpoint, entry.sessionId);
774
+ if (!sessionId) return;
775
+ const session = historyState.sessions.get(sessionId);
776
+ if (!session) return;
777
+ entry.sessionId = sessionId;
778
+ session.requestCount++;
779
+ historyIndexes.sessionEntryCount.set(sessionId, (historyIndexes.sessionEntryCount.get(sessionId) ?? 0) + 1);
780
+ updateSessionMetadata(entry);
781
+ }
782
+ function detachEntryFromSession(entry) {
783
+ if (!entry.sessionId) return;
784
+ const sessionId = entry.sessionId;
785
+ const session = historyState.sessions.get(sessionId);
786
+ if (session) session.requestCount = Math.max(0, session.requestCount - 1);
787
+ const sessionCount = (historyIndexes.sessionEntryCount.get(sessionId) ?? 1) - 1;
788
+ if (sessionCount <= 0) {
789
+ historyIndexes.sessionEntryCount.delete(sessionId);
790
+ historyIndexes.sessionModelsSet.delete(sessionId);
791
+ historyIndexes.sessionToolsSet.delete(sessionId);
792
+ historyState.sessions.delete(sessionId);
793
+ } else historyIndexes.sessionEntryCount.set(sessionId, sessionCount);
794
+ }
584
795
  function removeOldestEntries(count) {
585
796
  if (count <= 0 || historyState.entries.length === 0) return 0;
586
797
  const actualCount = Math.min(count, historyState.entries.length);
@@ -588,13 +799,7 @@ function removeOldestEntries(count) {
588
799
  for (const entry of removed) {
589
800
  historyIndexes.entryIndex.delete(entry.id);
590
801
  historyIndexes.summaryIndex.delete(entry.id);
591
- const sessionCount = (historyIndexes.sessionEntryCount.get(entry.sessionId) ?? 1) - 1;
592
- if (sessionCount <= 0) {
593
- historyIndexes.sessionEntryCount.delete(entry.sessionId);
594
- historyIndexes.sessionModelsSet.delete(entry.sessionId);
595
- historyIndexes.sessionToolsSet.delete(entry.sessionId);
596
- historyState.sessions.delete(entry.sessionId);
597
- } else historyIndexes.sessionEntryCount.set(entry.sessionId, sessionCount);
802
+ detachEntryFromSession(entry);
598
803
  }
599
804
  if (removed.length > 0) invalidateHistoryStats();
600
805
  return removed.length;
@@ -606,13 +811,9 @@ function evictOldestEntries(count) {
606
811
  }
607
812
  function insertEntry(entry) {
608
813
  if (!historyState.enabled) return;
609
- const session = historyState.sessions.get(entry.sessionId);
610
- if (!session) return;
611
814
  historyState.entries.push(entry);
612
815
  historyIndexes.entryIndex.set(entry.id, entry);
613
- session.requestCount++;
614
- historyIndexes.sessionEntryCount.set(entry.sessionId, (historyIndexes.sessionEntryCount.get(entry.sessionId) ?? 0) + 1);
615
- updateSessionMetadata(entry);
816
+ attachEntryToSession(entry);
616
817
  const summary = toSummary(entry);
617
818
  historyIndexes.summaryIndex.set(entry.id, summary);
618
819
  if (historyState.maxEntries > 0 && historyState.entries.length > historyState.maxEntries) removeOldestEntries(historyState.entries.length - historyState.maxEntries);
@@ -624,19 +825,35 @@ function updateEntry(id, update) {
624
825
  if (!historyState.enabled) return;
625
826
  const entry = historyIndexes.entryIndex.get(id);
626
827
  if (!entry) return;
828
+ if (update.sessionId !== void 0 && update.sessionId !== entry.sessionId) {
829
+ detachEntryFromSession(entry);
830
+ entry.sessionId = update.sessionId;
831
+ attachEntryToSession(entry);
832
+ }
627
833
  if (update.request) {
628
834
  entry.request = update.request;
629
835
  updateSessionMetadata(entry);
630
836
  }
837
+ if (update.rawPath !== void 0) entry.rawPath = update.rawPath;
838
+ if (update.state !== void 0) entry.state = update.state;
839
+ if (update.active !== void 0) entry.active = update.active;
840
+ if (update.lastUpdatedAt !== void 0) entry.lastUpdatedAt = update.lastUpdatedAt;
841
+ if (update.queueWaitMs !== void 0) entry.queueWaitMs = update.queueWaitMs;
842
+ if (update.attemptCount !== void 0) entry.attemptCount = update.attemptCount;
843
+ if (update.currentStrategy !== void 0) entry.currentStrategy = update.currentStrategy;
631
844
  if (update.response) entry.response = update.response;
632
845
  if (update.pipelineInfo) entry.pipelineInfo = update.pipelineInfo;
633
846
  if (update.durationMs !== void 0) entry.durationMs = update.durationMs;
847
+ if (update.startedAt !== void 0) entry.startedAt = update.startedAt;
848
+ if (update.endedAt !== void 0) entry.endedAt = update.endedAt;
849
+ if (update.transport !== void 0) entry.transport = update.transport;
634
850
  if (update.sseEvents) entry.sseEvents = update.sseEvents;
635
851
  if (update.effectiveRequest) entry.effectiveRequest = update.effectiveRequest;
636
852
  if (update.wireRequest) entry.wireRequest = update.wireRequest;
637
853
  if (update.attempts) entry.attempts = update.attempts;
854
+ if (update.warningMessages) entry.warningMessages = update.warningMessages;
638
855
  if (update.response) {
639
- const session = historyState.sessions.get(entry.sessionId);
856
+ const session = entry.sessionId ? historyState.sessions.get(entry.sessionId) : void 0;
640
857
  if (session) {
641
858
  session.totalInputTokens += update.response.usage.input_tokens;
642
859
  session.totalOutputTokens += update.response.usage.output_tokens;
@@ -652,7 +869,7 @@ function updateEntry(id, update) {
652
869
  function clearHistory() {
653
870
  historyState.entries = [];
654
871
  historyState.sessions = /* @__PURE__ */ new Map();
655
- historyState.currentSessionId = generateId();
872
+ historyState.currentSessionId = "";
656
873
  resetHistoryIndexes();
657
874
  invalidateHistoryStats();
658
875
  notifyHistoryCleared();
@@ -673,13 +890,13 @@ function getHistorySummaries(options = {}) {
673
890
  }
674
891
  if (endpoint) summaries = summaries.filter((summary) => summary.endpoint === endpoint);
675
892
  if (success !== void 0) summaries = summaries.filter((summary) => summary.responseSuccess === success);
676
- if (from) summaries = summaries.filter((summary) => summary.timestamp >= from);
677
- if (to) summaries = summaries.filter((summary) => summary.timestamp <= to);
893
+ if (from) summaries = summaries.filter((summary) => summary.startedAt >= from);
894
+ if (to) summaries = summaries.filter((summary) => summary.startedAt <= to);
678
895
  if (search) {
679
896
  const needle = search.toLowerCase();
680
897
  summaries = summaries.filter((summary) => ensureSearchText(summary.id).includes(needle));
681
898
  }
682
- summaries.sort((a, b) => b.timestamp - a.timestamp || b.id.localeCompare(a.id));
899
+ summaries.sort((a, b) => b.startedAt - a.startedAt || b.id.localeCompare(a.id));
683
900
  const total = summaries.length;
684
901
  let startIdx = 0;
685
902
  if (cursor) {
@@ -695,85 +912,6 @@ function getHistorySummaries(options = {}) {
695
912
  };
696
913
  }
697
914
  //#endregion
698
- //#region src/lib/history/sessions.ts
699
- /**
700
- * Get or create current session.
701
- * Currently treats all requests as belonging to one session per server lifetime,
702
- * since clients don't provide session identifiers yet.
703
- * TODO: When clients support session headers, use that to group requests.
704
- */
705
- function getCurrentSession(endpoint) {
706
- if (historyState.currentSessionId) {
707
- const session = historyState.sessions.get(historyState.currentSessionId);
708
- if (session) {
709
- session.lastActivity = Date.now();
710
- if (!session.endpoints.includes(endpoint)) session.endpoints.push(endpoint);
711
- return historyState.currentSessionId;
712
- }
713
- }
714
- const now = Date.now();
715
- const sessionId = generateId();
716
- historyState.currentSessionId = sessionId;
717
- historyIndexes.sessionModelsSet.set(sessionId, /* @__PURE__ */ new Set());
718
- historyIndexes.sessionToolsSet.set(sessionId, /* @__PURE__ */ new Set());
719
- historyState.sessions.set(sessionId, {
720
- id: sessionId,
721
- startTime: now,
722
- lastActivity: now,
723
- requestCount: 0,
724
- totalInputTokens: 0,
725
- totalOutputTokens: 0,
726
- models: [],
727
- endpoints: [endpoint]
728
- });
729
- return sessionId;
730
- }
731
- function getSessions() {
732
- const sessions = Array.from(historyState.sessions.values()).sort((a, b) => b.lastActivity - a.lastActivity);
733
- return {
734
- sessions,
735
- total: sessions.length
736
- };
737
- }
738
- function getSession(id) {
739
- return historyState.sessions.get(id);
740
- }
741
- function getSessionEntries(sessionId, options = {}) {
742
- const { cursor, limit = 50 } = options;
743
- const all = historyState.entries.filter((entry) => entry.sessionId === sessionId).sort((a, b) => a.timestamp - b.timestamp);
744
- const total = all.length;
745
- let startIdx = 0;
746
- if (cursor) {
747
- const cursorIdx = all.findIndex((entry) => entry.id === cursor);
748
- if (cursorIdx !== -1) startIdx = cursorIdx + 1;
749
- }
750
- const entries = all.slice(startIdx, startIdx + limit);
751
- return {
752
- entries,
753
- total,
754
- nextCursor: startIdx + limit < total ? entries.at(-1)?.id ?? null : null,
755
- prevCursor: startIdx > 0 ? entries[0]?.id ?? null : null
756
- };
757
- }
758
- function deleteSession(sessionId) {
759
- if (!historyState.sessions.has(sessionId)) return false;
760
- const remaining = [];
761
- for (const entry of historyState.entries) if (entry.sessionId === sessionId) {
762
- historyIndexes.entryIndex.delete(entry.id);
763
- historyIndexes.summaryIndex.delete(entry.id);
764
- } else remaining.push(entry);
765
- historyState.entries = remaining;
766
- historyState.sessions.delete(sessionId);
767
- historyIndexes.sessionEntryCount.delete(sessionId);
768
- historyIndexes.sessionModelsSet.delete(sessionId);
769
- historyIndexes.sessionToolsSet.delete(sessionId);
770
- invalidateHistoryStats();
771
- if (historyState.currentSessionId === sessionId) historyState.currentSessionId = generateId();
772
- notifySessionDeleted(sessionId);
773
- notifyStatsUpdated(getStats());
774
- return true;
775
- }
776
- //#endregion
777
915
  //#region src/lib/history/memory-pressure.ts
778
916
  /**
779
917
  * Memory pressure monitor — proactively evicts old history entries
@@ -881,392 +1019,162 @@ function getMemoryPressureStats() {
881
1019
  };
882
1020
  }
883
1021
  //#endregion
884
- //#region src/lib/config/paths.ts
885
- const APP_DIR = path.join(os.homedir(), ".local", "share", "copilot-api");
886
- const PATHS = {
887
- APP_DIR,
888
- GITHUB_TOKEN_PATH: path.join(APP_DIR, "github_token"),
889
- CONFIG_YAML: path.join(APP_DIR, "config.yaml"),
890
- LEARNED_LIMITS: path.join(APP_DIR, "learned-limits.json"),
891
- ERROR_DIR: path.join(APP_DIR, "errmsgs")
892
- };
893
- async function ensurePaths() {
894
- await fs.mkdir(PATHS.APP_DIR, { recursive: true });
895
- await ensureFile(PATHS.GITHUB_TOKEN_PATH);
896
- }
897
- async function ensureFile(filePath) {
898
- const isWindows = process.platform === "win32";
899
- try {
900
- await fs.access(filePath, fs.constants.W_OK);
901
- if (!isWindows) {
902
- if (((await fs.stat(filePath)).mode & 511) !== 384) await fs.chmod(filePath, 384);
903
- }
904
- } catch {
905
- await fs.writeFile(filePath, "");
906
- if (!isWindows) await fs.chmod(filePath, 384);
907
- }
908
- }
909
- //#endregion
910
- //#region src/lib/config/config.ts
1022
+ //#region src/lib/copilot-api.ts
1023
+ const standardHeaders = () => ({
1024
+ "content-type": "application/json",
1025
+ accept: "application/json"
1026
+ });
1027
+ const COPILOT_VERSION = "0.38.0";
1028
+ const EDITOR_PLUGIN_VERSION = `copilot-chat/${COPILOT_VERSION}`;
1029
+ const USER_AGENT = `GitHubCopilotChat/${COPILOT_VERSION}`;
1030
+ /** Copilot Chat API version (for chat/completions requests) */
1031
+ const COPILOT_API_VERSION = "2025-05-01";
1032
+ /** Copilot internal API version (for token & usage endpoints) */
1033
+ const COPILOT_INTERNAL_API_VERSION = "2025-04-01";
1034
+ /** GitHub public API version (for /user, repos, etc.) */
1035
+ const GITHUB_API_VERSION = "2022-11-28";
911
1036
  /**
912
- * Application configuration: types, YAML loading, and state application.
913
- *
914
- * All config types live here as the single source of truth.
915
- * config.yaml is loaded with mtime-based caching.
1037
+ * Session-level interaction ID.
1038
+ * Used to correlate all requests within a single server session.
1039
+ * Unlike x-request-id (per-request UUID), this stays constant for the server lifetime.
916
1040
  */
917
- /** Compile a raw rewrite rule into a CompiledRewriteRule. Returns null for invalid regex. */
918
- function compileRewriteRule(raw) {
919
- const method = raw.method ?? "regex";
920
- let modelPattern;
921
- if (raw.model) try {
922
- modelPattern = new RegExp(raw.model, "i");
923
- } catch (err) {
924
- consola.warn(`[config] Invalid model regex in rewrite rule: "${raw.model}"`, err);
925
- return null;
926
- }
927
- if (method === "line") return {
928
- from: raw.from,
929
- to: raw.to,
930
- method,
931
- modelPattern
1041
+ const INTERACTION_ID = randomUUID();
1042
+ const copilotBaseUrl = (state) => state.accountType === "individual" ? "https://api.githubcopilot.com" : `https://api.${state.accountType}.githubcopilot.com`;
1043
+ const copilotWsUrl = (state) => copilotBaseUrl(state).replace(/^https:\/\//u, "wss://") + "/responses";
1044
+ const copilotHeaders = (state, opts) => {
1045
+ const requestId = randomUUID();
1046
+ const interactionType = opts?.intent ?? "conversation-panel";
1047
+ const headers = {
1048
+ Authorization: `Bearer ${state.copilotToken}`,
1049
+ "content-type": standardHeaders()["content-type"],
1050
+ "copilot-integration-id": "vscode-chat",
1051
+ "editor-version": `vscode/${state.vsCodeVersion}`,
1052
+ "editor-plugin-version": EDITOR_PLUGIN_VERSION,
1053
+ "user-agent": USER_AGENT,
1054
+ "openai-intent": interactionType,
1055
+ "x-github-api-version": COPILOT_API_VERSION,
1056
+ "x-request-id": requestId,
1057
+ "X-Interaction-Id": INTERACTION_ID,
1058
+ "X-Interaction-Type": interactionType,
1059
+ "X-Agent-Task-Id": requestId,
1060
+ "x-vscode-user-agent-library-version": "electron-fetch"
932
1061
  };
933
- try {
934
- let pattern = raw.from;
935
- let flags = "gms";
936
- const inlineMatch = pattern.match(/^\(\?([a-z]+)\)/i);
937
- if (inlineMatch) {
938
- pattern = pattern.slice(inlineMatch[0].length);
939
- for (const f of inlineMatch[1]) if (!flags.includes(f)) flags += f;
940
- }
941
- return {
942
- from: new RegExp(pattern, flags),
943
- to: raw.to,
944
- method,
945
- modelPattern
946
- };
947
- } catch (err) {
948
- consola.warn(`[config] Invalid regex in rewrite rule: "${raw.from}"`, err);
949
- return null;
1062
+ if (opts?.vision) headers["copilot-vision-request"] = "true";
1063
+ if (opts?.modelRequestHeaders) {
1064
+ const coreKeysLower = new Set(Object.keys(headers).map((k) => k.toLowerCase()));
1065
+ for (const [key, value] of Object.entries(opts.modelRequestHeaders)) if (!coreKeysLower.has(key.toLowerCase())) headers[key] = value;
950
1066
  }
1067
+ return headers;
1068
+ };
1069
+ const GITHUB_API_BASE_URL = "https://api.github.com";
1070
+ const githubHeaders = (state) => ({
1071
+ ...standardHeaders(),
1072
+ authorization: `token ${state.githubToken}`,
1073
+ "editor-version": `vscode/${state.vsCodeVersion}`,
1074
+ "editor-plugin-version": EDITOR_PLUGIN_VERSION,
1075
+ "user-agent": USER_AGENT,
1076
+ "x-github-api-version": GITHUB_API_VERSION,
1077
+ "x-vscode-user-agent-library-version": "electron-fetch"
1078
+ });
1079
+ const GITHUB_BASE_URL = "https://github.com";
1080
+ const GITHUB_CLIENT_ID = "Iv1.b507a08c87ecfe98";
1081
+ ["read:user"].join(" ");
1082
+ /** Fallback VSCode version when GitHub API is unavailable */
1083
+ const VSCODE_VERSION_FALLBACK = "1.104.3";
1084
+ /** GitHub API endpoint for latest VSCode release */
1085
+ const VSCODE_RELEASE_URL = "https://api.github.com/repos/microsoft/vscode/releases/latest";
1086
+ /** Fetch the latest VSCode version and cache in global state */
1087
+ async function cacheVSCodeVersion() {
1088
+ setVSCodeVersion(await getVSCodeVersion());
951
1089
  }
952
- /** Compile an array of raw rewrite rules, skipping invalid ones */
953
- function compileRewriteRules(raws) {
954
- return raws.map((r) => compileRewriteRule(r)).filter((r) => r !== null);
955
- }
956
- let cachedConfig = null;
957
- let configLastMtimeMs = 0;
958
- /** Time-based debounce: skip stat() if checked recently */
959
- let lastStatTimeMs = 0;
960
- const STAT_DEBOUNCE_MS = 2e3;
961
- async function loadConfig() {
1090
+ /** Fetch the latest VSCode version from GitHub releases, falling back to a hardcoded version */
1091
+ async function getVSCodeVersion() {
1092
+ const controller = new AbortController();
1093
+ const timeout = setTimeout(() => {
1094
+ controller.abort();
1095
+ }, 5e3);
962
1096
  try {
963
- const now = Date.now();
964
- if (cachedConfig && now - lastStatTimeMs < STAT_DEBOUNCE_MS) return cachedConfig;
965
- const stat = await fs.stat(PATHS.CONFIG_YAML);
966
- lastStatTimeMs = now;
967
- if (cachedConfig && stat.mtimeMs === configLastMtimeMs) return cachedConfig;
968
- const content = await fs.readFile(PATHS.CONFIG_YAML, "utf8");
969
- const { parse } = await import("yaml");
970
- cachedConfig = parse(content) ?? {};
971
- configLastMtimeMs = stat.mtimeMs;
972
- return cachedConfig;
973
- } catch (err) {
974
- if (err.code === "ENOENT") return {};
975
- try {
976
- configLastMtimeMs = (await fs.stat(PATHS.CONFIG_YAML)).mtimeMs;
977
- } catch {}
978
- consola.warn("[config] Failed to load config.yaml:", err);
979
- return {};
1097
+ const response = await fetch(VSCODE_RELEASE_URL, {
1098
+ signal: controller.signal,
1099
+ headers: {
1100
+ Accept: "application/vnd.github.v3+json",
1101
+ "User-Agent": "copilot-api"
1102
+ }
1103
+ });
1104
+ if (!response.ok) return VSCODE_VERSION_FALLBACK;
1105
+ const version = (await response.json()).tag_name;
1106
+ if (version && /^\d+\.\d+\.\d+$/.test(version)) return version;
1107
+ return VSCODE_VERSION_FALLBACK;
1108
+ } catch {
1109
+ return VSCODE_VERSION_FALLBACK;
1110
+ } finally {
1111
+ clearTimeout(timeout);
980
1112
  }
981
1113
  }
982
- /** Get the mtime of the currently cached config (0 if not loaded) */
983
- function getConfigMtimeMs() {
984
- return configLastMtimeMs;
985
- }
986
- let hasApplied = false;
987
- let lastAppliedMtimeMs = 0;
988
- /**
989
- * Load config.yaml and apply all hot-reloadable settings to global state.
990
- *
991
- * Scalar fields: only overridden when explicitly present in config (deleted keys keep current runtime value).
992
- * Collection fields (model_overrides, rewrite_system_reminders array): entire replacement when present.
993
- *
994
- * Safe to call per-request — loadConfig() is mtime-cached, so unchanged config
995
- * only costs one stat() syscall.
996
- *
997
- * NOT hot-reloaded: rate_limiter (stateful singleton initialized at startup).
998
- */
999
- async function applyConfigToState() {
1000
- const config = await loadConfig();
1001
- if (config.anthropic) {
1002
- const a = config.anthropic;
1003
- if (a.strip_server_tools !== void 0) setAnthropicBehavior({ stripServerTools: a.strip_server_tools });
1004
- if (a.immutable_thinking_messages !== void 0) setAnthropicBehavior({ immutableThinkingMessages: a.immutable_thinking_messages });
1005
- if (a.dedup_tool_calls !== void 0) setAnthropicBehavior({ dedupToolCalls: a.dedup_tool_calls === true ? "input" : a.dedup_tool_calls });
1006
- if (a.strip_read_tool_result_tags !== void 0) setAnthropicBehavior({ stripReadToolResultTags: a.strip_read_tool_result_tags });
1007
- if (a.context_editing !== void 0) setAnthropicBehavior({ contextEditingMode: a.context_editing });
1008
- if (a.rewrite_system_reminders !== void 0) {
1009
- if (typeof a.rewrite_system_reminders === "boolean") setAnthropicBehavior({ rewriteSystemReminders: a.rewrite_system_reminders });
1010
- else if (Array.isArray(a.rewrite_system_reminders)) setAnthropicBehavior({ rewriteSystemReminders: compileRewriteRules(a.rewrite_system_reminders) });
1011
- }
1012
- }
1013
- if (Array.isArray(config.system_prompt_overrides)) setAnthropicBehavior({ systemPromptOverrides: config.system_prompt_overrides.length > 0 ? compileRewriteRules(config.system_prompt_overrides) : [] });
1014
- if (config.model_overrides) setModelOverrides({
1015
- ...DEFAULT_MODEL_OVERRIDES,
1016
- ...config.model_overrides
1017
- });
1018
- if (config.compress_tool_results_before_truncate !== void 0) setAnthropicBehavior({ compressToolResultsBeforeTruncate: config.compress_tool_results_before_truncate });
1019
- if (config.history) {
1020
- const h = config.history;
1021
- if (h.limit !== void 0) {
1022
- setHistoryConfig({ historyLimit: h.limit });
1023
- setHistoryMaxEntries(h.limit);
1024
- }
1025
- if (h.min_entries !== void 0) setHistoryConfig({ historyMinEntries: h.min_entries });
1114
+ //#endregion
1115
+ //#region src/lib/error/http-error.ts
1116
+ var HTTPError = class HTTPError extends Error {
1117
+ status;
1118
+ responseText;
1119
+ /** Model ID that caused the error (if known) */
1120
+ modelId;
1121
+ /** Original response headers (for Retry-After, quota snapshots, etc.) */
1122
+ responseHeaders;
1123
+ constructor(message, status, responseText, modelId, responseHeaders) {
1124
+ super(message);
1125
+ this.status = status;
1126
+ this.responseText = responseText;
1127
+ this.modelId = modelId;
1128
+ this.responseHeaders = responseHeaders;
1026
1129
  }
1027
- if (config.shutdown) {
1028
- const s = config.shutdown;
1029
- if (s.graceful_wait !== void 0) setShutdownConfig({ shutdownGracefulWait: s.graceful_wait });
1030
- if (s.abort_wait !== void 0) setShutdownConfig({ shutdownAbortWait: s.abort_wait });
1130
+ static async fromResponse(message, response, modelId) {
1131
+ const text = await response.text();
1132
+ return new HTTPError(message, response.status, text, modelId, response.headers);
1031
1133
  }
1032
- if (config.fetch_timeout !== void 0) setTimeoutConfig({ fetchTimeout: config.fetch_timeout });
1033
- if (config.stream_idle_timeout !== void 0) setTimeoutConfig({ streamIdleTimeout: config.stream_idle_timeout });
1034
- if (config.stale_request_max_age !== void 0) setTimeoutConfig({ staleRequestMaxAge: config.stale_request_max_age });
1035
- const responsesConfig = config["openai-responses"];
1036
- if (responsesConfig && responsesConfig.normalize_call_ids !== void 0) setResponsesConfig({ normalizeResponsesCallIds: responsesConfig.normalize_call_ids });
1037
- const currentMtime = getConfigMtimeMs();
1038
- if (hasApplied && currentMtime !== lastAppliedMtimeMs) consola.info("[config] Reloaded config.yaml");
1039
- hasApplied = true;
1040
- lastAppliedMtimeMs = currentMtime;
1041
- return config;
1042
- }
1134
+ };
1043
1135
  //#endregion
1044
- //#region src/lib/proxy.ts
1045
- /**
1046
- * Proxy configuration: HTTP/HTTPS and SOCKS5/5h proxy support.
1047
- *
1048
- * Priority: explicit proxy URL (CLI --proxy or config.yaml) > env vars (--http-proxy-from-env).
1049
- * On Node.js, proxying works via undici's global dispatcher.
1050
- * On Bun, HTTP proxies are set via env vars (Bun handles them natively); SOCKS5 is not supported.
1051
- */
1052
- /**
1053
- * Initialize proxy for all outgoing fetch requests.
1054
- *
1055
- * On Node.js: sets undici's global dispatcher.
1056
- * On Bun: sets process.env.HTTP_PROXY/HTTPS_PROXY for HTTP proxies (Bun handles natively).
1057
- *
1058
- * Must be called before any network requests.
1059
- */
1060
- function initProxy(options) {
1061
- if (typeof Bun !== "undefined") {
1062
- initProxyBun(options);
1063
- return;
1064
- }
1065
- initProxyNode(options);
1136
+ //#region src/lib/error/parsing.ts
1137
+ /** Parse token limit info from error message text. */
1138
+ function parseTokenLimitError(message) {
1139
+ const openaiMatch = message.match(/prompt token count of (\d+) exceeds the limit of (\d+)/);
1140
+ if (openaiMatch) return {
1141
+ current: Number.parseInt(openaiMatch[1], 10),
1142
+ limit: Number.parseInt(openaiMatch[2], 10)
1143
+ };
1144
+ const anthropicMatch = message.match(/prompt is too long: (\d+) tokens > (\d+) maximum/);
1145
+ if (anthropicMatch) return {
1146
+ current: Number.parseInt(anthropicMatch[1], 10),
1147
+ limit: Number.parseInt(anthropicMatch[2], 10)
1148
+ };
1149
+ return null;
1066
1150
  }
1067
- /** Format a proxy URL for display (strip credentials) */
1068
- function formatProxyDisplay(proxyUrl) {
1151
+ /** Extract retry_after from JSON response body. */
1152
+ function extractRetryAfterFromBody(responseText) {
1069
1153
  try {
1070
- const u = new URL(proxyUrl);
1071
- const auth = u.username ? `${u.username}:***@` : "";
1072
- return `${u.protocol}//${auth}${u.host}`;
1073
- } catch {
1074
- return proxyUrl;
1075
- }
1154
+ const parsed = JSON.parse(responseText);
1155
+ if (parsed && typeof parsed === "object") {
1156
+ if ("retry_after" in parsed && typeof parsed.retry_after === "number") return parsed.retry_after;
1157
+ if ("error" in parsed) {
1158
+ const err = parsed.error;
1159
+ if (err && typeof err === "object" && "retry_after" in err && typeof err.retry_after === "number") return err.retry_after;
1160
+ }
1161
+ }
1162
+ } catch {}
1076
1163
  }
1077
- function initProxyNode(options) {
1164
+ /** Check if a 503 response body indicates upstream provider rate limiting. */
1165
+ function isUpstreamRateLimited(responseText) {
1078
1166
  try {
1079
- if (options.url) {
1080
- setGlobalDispatcher(createDispatcherForUrl(options.url));
1081
- consola.debug(`Proxy configured: ${formatProxyDisplay(options.url)}`);
1082
- return;
1083
- }
1084
- if (options.fromEnv) {
1085
- setGlobalDispatcher(new EnvProxyDispatcher());
1086
- consola.debug("HTTP proxy configured from environment (per-URL)");
1087
- }
1088
- } catch (err) {
1089
- consola.error("Proxy setup failed:", err);
1090
- throw err;
1091
- }
1092
- }
1093
- /** Create the appropriate undici dispatcher for a proxy URL scheme */
1094
- function createDispatcherForUrl(proxyUrl) {
1095
- const url = new URL(proxyUrl);
1096
- const protocol = url.protocol.toLowerCase();
1097
- if (protocol === "http:" || protocol === "https:") return new ProxyAgent(proxyUrl);
1098
- if (protocol === "socks5:" || protocol === "socks5h:") return createSocksAgent(url);
1099
- throw new Error(`Unsupported proxy protocol: ${protocol}. Supported: http, https, socks5, socks5h`);
1100
- }
1101
- /**
1102
- * Create an undici Agent that routes connections through a SOCKS5/5h proxy.
1103
- *
1104
- * For socks5h:// the proxy performs DNS resolution (hostname passed as-is).
1105
- * For socks5:// the hostname is also passed to the proxy (proxy resolves).
1106
- * Both protocols support username/password authentication via URL credentials.
1107
- */
1108
- function createSocksAgent(proxyUrl) {
1109
- const proxy = {
1110
- host: proxyUrl.hostname,
1111
- port: Number(proxyUrl.port) || 1080,
1112
- type: 5
1113
- };
1114
- if (proxyUrl.username) {
1115
- proxy.userId = decodeURIComponent(proxyUrl.username);
1116
- proxy.password = proxyUrl.password ? decodeURIComponent(proxyUrl.password) : void 0;
1117
- }
1118
- return new Agent({ connect(opts, callback) {
1119
- const destPort = Number(opts.port) || (opts.protocol === "https:" ? 443 : 80);
1120
- SocksClient.createConnection({
1121
- proxy,
1122
- command: "connect",
1123
- destination: {
1124
- host: opts.hostname,
1125
- port: destPort
1126
- }
1127
- }).then(({ socket }) => {
1128
- if (opts.protocol === "https:") callback(null, tls.connect({
1129
- socket,
1130
- servername: opts.servername ?? opts.hostname
1131
- }));
1132
- else callback(null, socket);
1133
- }).catch((err) => {
1134
- callback(err instanceof Error ? err : new Error(String(err)), null);
1135
- });
1136
- } });
1137
- }
1138
- /**
1139
- * Custom dispatcher that routes requests through proxies based on environment variables.
1140
- * Uses proxy-from-env to resolve HTTP_PROXY/HTTPS_PROXY/NO_PROXY per-URL.
1141
- */
1142
- var EnvProxyDispatcher = class extends Agent {
1143
- proxies = /* @__PURE__ */ new Map();
1144
- dispatch(options, handler) {
1145
- try {
1146
- const origin = this.getOriginUrl(options.origin);
1147
- const proxyUrl = this.getProxyUrl(origin);
1148
- if (!proxyUrl) {
1149
- consola.debug(`HTTP proxy bypass: ${origin.hostname}`);
1150
- return super.dispatch(options, handler);
1151
- }
1152
- const agent = this.getOrCreateProxyAgent(proxyUrl);
1153
- consola.debug(`HTTP proxy route: ${origin.hostname} via ${formatProxyDisplay(proxyUrl)}`);
1154
- return agent.dispatch(options, handler);
1155
- } catch {
1156
- return super.dispatch(options, handler);
1157
- }
1158
- }
1159
- getOriginUrl(origin) {
1160
- return typeof origin === "string" ? new URL(origin) : origin;
1161
- }
1162
- getProxyUrl(origin) {
1163
- const raw = getProxyForUrl(origin.toString());
1164
- return raw && raw.length > 0 ? raw : void 0;
1165
- }
1166
- getOrCreateProxyAgent(proxyUrl) {
1167
- let agent = this.proxies.get(proxyUrl);
1168
- if (!agent) {
1169
- agent = new ProxyAgent(proxyUrl);
1170
- this.proxies.set(proxyUrl, agent);
1171
- }
1172
- return agent;
1173
- }
1174
- async close() {
1175
- await super.close();
1176
- await Promise.all([...this.proxies.values()].map((p) => p.close()));
1177
- this.proxies.clear();
1178
- }
1179
- destroy(errOrCallback, callback) {
1180
- for (const agent of this.proxies.values()) if (typeof errOrCallback === "function") agent.destroy(errOrCallback);
1181
- else if (callback) agent.destroy(errOrCallback ?? null, callback);
1182
- else agent.destroy(errOrCallback ?? null).catch(() => {});
1183
- this.proxies.clear();
1184
- if (typeof errOrCallback === "function") {
1185
- super.destroy(errOrCallback);
1186
- return;
1187
- } else if (callback) {
1188
- super.destroy(errOrCallback ?? null, callback);
1189
- return;
1190
- } else return super.destroy(errOrCallback ?? null);
1191
- }
1192
- };
1193
- /**
1194
- * Initialize proxy for Bun runtime.
1195
- * Bun handles HTTP_PROXY/HTTPS_PROXY env vars natively.
1196
- * SOCKS5 proxies are not supported on Bun.
1197
- */
1198
- function initProxyBun(options) {
1199
- if (!options.url) return;
1200
- const protocol = new URL(options.url).protocol.toLowerCase();
1201
- if (protocol === "socks5:" || protocol === "socks5h:") throw new Error("SOCKS5 proxy is not supported on Bun runtime. Use Node.js or an HTTP proxy instead.");
1202
- process.env.HTTP_PROXY = options.url;
1203
- process.env.HTTPS_PROXY = options.url;
1204
- consola.debug(`Proxy configured (Bun env): ${formatProxyDisplay(options.url)}`);
1205
- }
1206
- //#endregion
1207
- //#region src/lib/error/http-error.ts
1208
- var HTTPError = class HTTPError extends Error {
1209
- status;
1210
- responseText;
1211
- /** Model ID that caused the error (if known) */
1212
- modelId;
1213
- /** Original response headers (for Retry-After, quota snapshots, etc.) */
1214
- responseHeaders;
1215
- constructor(message, status, responseText, modelId, responseHeaders) {
1216
- super(message);
1217
- this.status = status;
1218
- this.responseText = responseText;
1219
- this.modelId = modelId;
1220
- this.responseHeaders = responseHeaders;
1221
- }
1222
- static async fromResponse(message, response, modelId) {
1223
- const text = await response.text();
1224
- return new HTTPError(message, response.status, text, modelId, response.headers);
1225
- }
1226
- };
1227
- //#endregion
1228
- //#region src/lib/error/parsing.ts
1229
- /** Parse token limit info from error message text. */
1230
- function parseTokenLimitError(message) {
1231
- const openaiMatch = message.match(/prompt token count of (\d+) exceeds the limit of (\d+)/);
1232
- if (openaiMatch) return {
1233
- current: Number.parseInt(openaiMatch[1], 10),
1234
- limit: Number.parseInt(openaiMatch[2], 10)
1235
- };
1236
- const anthropicMatch = message.match(/prompt is too long: (\d+) tokens > (\d+) maximum/);
1237
- if (anthropicMatch) return {
1238
- current: Number.parseInt(anthropicMatch[1], 10),
1239
- limit: Number.parseInt(anthropicMatch[2], 10)
1240
- };
1241
- return null;
1242
- }
1243
- /** Extract retry_after from JSON response body. */
1244
- function extractRetryAfterFromBody(responseText) {
1245
- try {
1246
- const parsed = JSON.parse(responseText);
1247
- if (parsed && typeof parsed === "object") {
1248
- if ("retry_after" in parsed && typeof parsed.retry_after === "number") return parsed.retry_after;
1249
- if ("error" in parsed) {
1250
- const err = parsed.error;
1251
- if (err && typeof err === "object" && "retry_after" in err && typeof err.retry_after === "number") return err.retry_after;
1252
- }
1253
- }
1254
- } catch {}
1255
- }
1256
- /** Check if a 503 response body indicates upstream provider rate limiting. */
1257
- function isUpstreamRateLimited(responseText) {
1258
- try {
1259
- const parsed = JSON.parse(responseText);
1260
- if (parsed && typeof parsed === "object" && "error" in parsed) {
1261
- const err = parsed.error;
1262
- if (err && typeof err === "object") {
1263
- const errObj = err;
1264
- if (typeof errObj.code === "string" && errObj.code.includes("rate")) return true;
1265
- if (typeof errObj.message === "string") {
1266
- const msg = errObj.message.toLowerCase();
1267
- if (msg.includes("rate limit") || msg.includes("too many requests") || msg.includes("quota")) return true;
1268
- }
1269
- }
1167
+ const parsed = JSON.parse(responseText);
1168
+ if (parsed && typeof parsed === "object" && "error" in parsed) {
1169
+ const err = parsed.error;
1170
+ if (err && typeof err === "object") {
1171
+ const errObj = err;
1172
+ if (typeof errObj.code === "string" && errObj.code.includes("rate")) return true;
1173
+ if (typeof errObj.message === "string") {
1174
+ const msg = errObj.message.toLowerCase();
1175
+ if (msg.includes("rate limit") || msg.includes("too many requests") || msg.includes("quota")) return true;
1176
+ }
1177
+ }
1270
1178
  }
1271
1179
  } catch {
1272
1180
  const lower = responseText.toLowerCase();
@@ -1625,92 +1533,446 @@ function truncateForLog(text, maxLen) {
1625
1533
  return `${text.slice(0, maxLen)}… (${text.length} bytes total)`;
1626
1534
  }
1627
1535
  //#endregion
1628
- //#region src/lib/copilot-api.ts
1629
- const standardHeaders = () => ({
1630
- "content-type": "application/json",
1631
- accept: "application/json"
1632
- });
1633
- const COPILOT_VERSION = "0.38.0";
1634
- const EDITOR_PLUGIN_VERSION = `copilot-chat/${COPILOT_VERSION}`;
1635
- const USER_AGENT = `GitHubCopilotChat/${COPILOT_VERSION}`;
1636
- /** Copilot Chat API version (for chat/completions requests) */
1637
- const COPILOT_API_VERSION = "2025-05-01";
1638
- /** Copilot internal API version (for token & usage endpoints) */
1639
- const COPILOT_INTERNAL_API_VERSION = "2025-04-01";
1640
- /** GitHub public API version (for /user, repos, etc.) */
1641
- const GITHUB_API_VERSION = "2022-11-28";
1536
+ //#region src/lib/fetch-utils.ts
1537
+ const SENSITIVE_HEADER_NAMES = new Set([
1538
+ "authorization",
1539
+ "proxy-authorization",
1540
+ "x-api-key",
1541
+ "api-key"
1542
+ ]);
1642
1543
  /**
1643
- * Session-level interaction ID.
1644
- * Used to correlate all requests within a single server session.
1645
- * Unlike x-request-id (per-request UUID), this stays constant for the server lifetime.
1544
+ * Create an AbortSignal for fetch timeout if configured.
1545
+ * Controls the time from request start to receiving response headers.
1546
+ * Returns undefined if fetchTimeout is 0 (disabled).
1646
1547
  */
1647
- const INTERACTION_ID = randomUUID();
1648
- const copilotBaseUrl = (state) => state.accountType === "individual" ? "https://api.githubcopilot.com" : `https://api.${state.accountType}.githubcopilot.com`;
1649
- const copilotHeaders = (state, opts) => {
1650
- const headers = {
1651
- Authorization: `Bearer ${state.copilotToken}`,
1652
- "content-type": standardHeaders()["content-type"],
1653
- "copilot-integration-id": "vscode-chat",
1654
- "editor-version": `vscode/${state.vsCodeVersion}`,
1655
- "editor-plugin-version": EDITOR_PLUGIN_VERSION,
1656
- "user-agent": USER_AGENT,
1657
- "openai-intent": opts?.intent ?? "conversation-panel",
1658
- "x-github-api-version": COPILOT_API_VERSION,
1659
- "x-request-id": randomUUID(),
1660
- "X-Interaction-Id": INTERACTION_ID,
1661
- "x-vscode-user-agent-library-version": "electron-fetch"
1662
- };
1663
- if (opts?.vision) headers["copilot-vision-request"] = "true";
1664
- if (opts?.modelRequestHeaders) {
1665
- const coreKeysLower = new Set(Object.keys(headers).map((k) => k.toLowerCase()));
1666
- for (const [key, value] of Object.entries(opts.modelRequestHeaders)) if (!coreKeysLower.has(key.toLowerCase())) headers[key] = value;
1667
- }
1668
- return headers;
1669
- };
1670
- const GITHUB_API_BASE_URL = "https://api.github.com";
1671
- const githubHeaders = (state) => ({
1672
- ...standardHeaders(),
1673
- authorization: `token ${state.githubToken}`,
1674
- "editor-version": `vscode/${state.vsCodeVersion}`,
1675
- "editor-plugin-version": EDITOR_PLUGIN_VERSION,
1676
- "user-agent": USER_AGENT,
1677
- "x-github-api-version": GITHUB_API_VERSION,
1678
- "x-vscode-user-agent-library-version": "electron-fetch"
1679
- });
1680
- const GITHUB_BASE_URL = "https://github.com";
1681
- const GITHUB_CLIENT_ID = "Iv1.b507a08c87ecfe98";
1682
- ["read:user"].join(" ");
1683
- /** Fallback VSCode version when GitHub API is unavailable */
1684
- const VSCODE_VERSION_FALLBACK = "1.104.3";
1685
- /** GitHub API endpoint for latest VSCode release */
1686
- const VSCODE_RELEASE_URL = "https://api.github.com/repos/microsoft/vscode/releases/latest";
1687
- /** Fetch the latest VSCode version and cache in global state */
1688
- async function cacheVSCodeVersion() {
1689
- setVSCodeVersion(await getVSCodeVersion());
1548
+ function createFetchSignal() {
1549
+ return state.fetchTimeout > 0 ? AbortSignal.timeout(state.fetchTimeout * 1e3) : void 0;
1690
1550
  }
1691
- /** Fetch the latest VSCode version from GitHub releases, falling back to a hardcoded version */
1692
- async function getVSCodeVersion() {
1693
- const controller = new AbortController();
1694
- const timeout = setTimeout(() => {
1695
- controller.abort();
1696
- }, 5e3);
1551
+ /**
1552
+ * Populate a HeadersCapture object with request and response headers.
1553
+ * Should be called immediately after fetch(), before !response.ok check,
1554
+ * so headers are captured even for error responses.
1555
+ */
1556
+ function captureHttpHeaders(capture, requestHeaders, response) {
1557
+ capture.request = sanitizeHeadersForHistory(requestHeaders);
1558
+ capture.response = Object.fromEntries(response.headers.entries());
1559
+ }
1560
+ /** Return a copy of headers safe to persist in history/error artifacts. */
1561
+ function sanitizeHeadersForHistory(headers) {
1562
+ return Object.fromEntries(Object.entries(headers).map(([name, value]) => [name, SENSITIVE_HEADER_NAMES.has(name.toLowerCase()) ? "***" : value]));
1563
+ }
1564
+ //#endregion
1565
+ //#region src/lib/models/client.ts
1566
+ /** Fetch models from Copilot API and cache in global state */
1567
+ async function cacheModels() {
1568
+ setModels(await getModels());
1569
+ }
1570
+ const getModels = async () => {
1571
+ const response = await fetch(`${copilotBaseUrl(state)}/models`, {
1572
+ headers: copilotHeaders(state),
1573
+ signal: createFetchSignal()
1574
+ });
1575
+ if (!response.ok) throw await HTTPError.fromResponse("Failed to get models", response);
1576
+ return await response.json();
1577
+ };
1578
+ //#endregion
1579
+ //#region src/lib/models/refresh-loop.ts
1580
+ let refreshTimer = null;
1581
+ let refreshLoopRunning = false;
1582
+ let refreshIntervalSeconds = state.modelRefreshInterval;
1583
+ let refreshModelsImpl = cacheModels;
1584
+ function clearRefreshTimer() {
1585
+ if (refreshTimer) {
1586
+ clearTimeout(refreshTimer);
1587
+ refreshTimer = null;
1588
+ }
1589
+ }
1590
+ function logRefreshFailure(error) {
1591
+ const message = error instanceof Error ? error.message : String(error);
1592
+ if (state.models?.data.length) {
1593
+ consola.warn(`[Models] Periodic refresh failed, keeping cached models: ${message}`);
1594
+ return;
1595
+ }
1596
+ consola.error(`[Models] Periodic refresh failed with no cached models: ${message}`);
1597
+ }
1598
+ function scheduleNextRefresh() {
1599
+ clearRefreshTimer();
1600
+ if (!refreshLoopRunning || refreshIntervalSeconds <= 0) return;
1601
+ refreshTimer = setTimeout(() => {
1602
+ refreshModelsImpl().catch(logRefreshFailure).finally(() => {
1603
+ scheduleNextRefresh();
1604
+ });
1605
+ }, refreshIntervalSeconds * 1e3);
1606
+ }
1607
+ function startModelRefreshLoop(refreshModels = cacheModels) {
1608
+ refreshLoopRunning = true;
1609
+ refreshModelsImpl = refreshModels;
1610
+ refreshIntervalSeconds = state.modelRefreshInterval;
1611
+ scheduleNextRefresh();
1612
+ return () => {
1613
+ refreshLoopRunning = false;
1614
+ clearRefreshTimer();
1615
+ };
1616
+ }
1617
+ function syncModelRefreshLoop(intervalSeconds = state.modelRefreshInterval) {
1618
+ refreshIntervalSeconds = intervalSeconds;
1619
+ if (!refreshLoopRunning) return;
1620
+ scheduleNextRefresh();
1621
+ }
1622
+ //#endregion
1623
+ //#region src/lib/config/paths.ts
1624
+ const APP_DIR = path.join(os.homedir(), ".local", "share", "copilot-api");
1625
+ const PATHS = {
1626
+ APP_DIR,
1627
+ GITHUB_TOKEN_PATH: path.join(APP_DIR, "github_token"),
1628
+ CONFIG_YAML: path.join(APP_DIR, "config.yaml"),
1629
+ LEARNED_LIMITS: path.join(APP_DIR, "learned-limits.json"),
1630
+ REQUEST_TELEMETRY: path.join(APP_DIR, "request-telemetry.json"),
1631
+ ERROR_DIR: path.join(APP_DIR, "errmsgs")
1632
+ };
1633
+ async function ensurePaths() {
1634
+ await fs.mkdir(PATHS.APP_DIR, { recursive: true });
1635
+ await ensureFile(PATHS.GITHUB_TOKEN_PATH);
1636
+ }
1637
+ async function ensureFile(filePath) {
1638
+ const isWindows = process.platform === "win32";
1697
1639
  try {
1698
- const response = await fetch(VSCODE_RELEASE_URL, {
1699
- signal: controller.signal,
1700
- headers: {
1701
- Accept: "application/vnd.github.v3+json",
1702
- "User-Agent": "copilot-api"
1640
+ await fs.access(filePath, fs.constants.W_OK);
1641
+ if (!isWindows) {
1642
+ if (((await fs.stat(filePath)).mode & 511) !== 384) await fs.chmod(filePath, 384);
1643
+ }
1644
+ } catch {
1645
+ await fs.writeFile(filePath, "");
1646
+ if (!isWindows) await fs.chmod(filePath, 384);
1647
+ }
1648
+ }
1649
+ //#endregion
1650
+ //#region src/lib/config/config.ts
1651
+ /**
1652
+ * Application configuration: types, YAML loading, and state application.
1653
+ *
1654
+ * All config types live here as the single source of truth.
1655
+ * config.yaml is loaded with mtime-based caching.
1656
+ */
1657
+ /** Compile a raw rewrite rule into a CompiledRewriteRule. Returns null for invalid regex. */
1658
+ function compileRewriteRule(raw) {
1659
+ const method = raw.method ?? "regex";
1660
+ let modelPattern;
1661
+ if (raw.model) try {
1662
+ modelPattern = new RegExp(raw.model, "i");
1663
+ } catch (err) {
1664
+ consola.warn(`[config] Invalid model regex in rewrite rule: "${raw.model}"`, err);
1665
+ return null;
1666
+ }
1667
+ if (method === "line") return {
1668
+ from: raw.from,
1669
+ to: raw.to,
1670
+ method,
1671
+ modelPattern
1672
+ };
1673
+ try {
1674
+ let pattern = raw.from;
1675
+ let flags = "gms";
1676
+ const inlineMatch = pattern.match(/^\(\?([a-z]+)\)/i);
1677
+ if (inlineMatch) {
1678
+ pattern = pattern.slice(inlineMatch[0].length);
1679
+ for (const f of inlineMatch[1]) if (!flags.includes(f)) flags += f;
1680
+ }
1681
+ return {
1682
+ from: new RegExp(pattern, flags),
1683
+ to: raw.to,
1684
+ method,
1685
+ modelPattern
1686
+ };
1687
+ } catch (err) {
1688
+ consola.warn(`[config] Invalid regex in rewrite rule: "${raw.from}"`, err);
1689
+ return null;
1690
+ }
1691
+ }
1692
+ /** Compile an array of raw rewrite rules, skipping invalid ones */
1693
+ function compileRewriteRules(raws) {
1694
+ return raws.map((r) => compileRewriteRule(r)).filter((r) => r !== null);
1695
+ }
1696
+ let cachedConfig = null;
1697
+ let configLastMtimeMs = 0;
1698
+ /** Time-based debounce: skip stat() if checked recently */
1699
+ let lastStatTimeMs = 0;
1700
+ const STAT_DEBOUNCE_MS = 2e3;
1701
+ async function loadRawConfigFile() {
1702
+ try {
1703
+ const content = await fs.readFile(PATHS.CONFIG_YAML, "utf8");
1704
+ const { parse } = await import("yaml");
1705
+ const parsed = parse(content);
1706
+ if (parsed === null || parsed === void 0) return {};
1707
+ if (typeof parsed !== "object" || Array.isArray(parsed)) throw new TypeError("config.yaml must contain a top-level mapping");
1708
+ return parsed;
1709
+ } catch (err) {
1710
+ if (err.code === "ENOENT") return {};
1711
+ throw err;
1712
+ }
1713
+ }
1714
+ async function loadConfig() {
1715
+ try {
1716
+ const now = Date.now();
1717
+ if (cachedConfig && now - lastStatTimeMs < STAT_DEBOUNCE_MS) return cachedConfig;
1718
+ const stat = await fs.stat(PATHS.CONFIG_YAML);
1719
+ lastStatTimeMs = now;
1720
+ if (cachedConfig && stat.mtimeMs === configLastMtimeMs) return cachedConfig;
1721
+ cachedConfig = await loadRawConfigFile();
1722
+ configLastMtimeMs = stat.mtimeMs;
1723
+ return cachedConfig;
1724
+ } catch (err) {
1725
+ if (err.code === "ENOENT") return {};
1726
+ try {
1727
+ configLastMtimeMs = (await fs.stat(PATHS.CONFIG_YAML)).mtimeMs;
1728
+ } catch {}
1729
+ consola.warn("[config] Failed to load config.yaml:", err);
1730
+ return {};
1731
+ }
1732
+ }
1733
+ /** Get the mtime of the currently cached config (0 if not loaded) */
1734
+ function getConfigMtimeMs() {
1735
+ return configLastMtimeMs;
1736
+ }
1737
+ /** Exposed for testing: reset the mtime cache */
1738
+ function resetConfigCache() {
1739
+ cachedConfig = null;
1740
+ configLastMtimeMs = 0;
1741
+ lastStatTimeMs = 0;
1742
+ }
1743
+ let hasApplied = false;
1744
+ let lastAppliedMtimeMs = 0;
1745
+ /**
1746
+ * Load config.yaml and apply all hot-reloadable settings to global state.
1747
+ *
1748
+ * Scalar fields: only overridden when explicitly present in config (deleted keys keep current runtime value).
1749
+ * Collection fields (model_overrides, rewrite_system_reminders array): entire replacement when present.
1750
+ *
1751
+ * Safe to call per-request — loadConfig() is mtime-cached, so unchanged config
1752
+ * only costs one stat() syscall.
1753
+ *
1754
+ * NOT hot-reloaded: rate_limiter (stateful singleton initialized at startup).
1755
+ */
1756
+ async function applyConfigToState() {
1757
+ const config = await loadConfig();
1758
+ if (config.anthropic) {
1759
+ const a = config.anthropic;
1760
+ if (a.strip_server_tools !== void 0) setAnthropicBehavior({ stripServerTools: a.strip_server_tools });
1761
+ if (a.immutable_thinking_messages !== void 0) setAnthropicBehavior({ immutableThinkingMessages: a.immutable_thinking_messages });
1762
+ if (a.dedup_tool_calls !== void 0) setAnthropicBehavior({ dedupToolCalls: a.dedup_tool_calls === true ? "input" : a.dedup_tool_calls });
1763
+ if (a.strip_read_tool_result_tags !== void 0) setAnthropicBehavior({ stripReadToolResultTags: a.strip_read_tool_result_tags });
1764
+ if (a.context_editing !== void 0) setAnthropicBehavior({ contextEditingMode: a.context_editing });
1765
+ if (a.context_editing_trigger !== void 0) setAnthropicBehavior({ contextEditingTrigger: a.context_editing_trigger });
1766
+ if (a.context_editing_keep_tools !== void 0) setAnthropicBehavior({ contextEditingKeepTools: a.context_editing_keep_tools });
1767
+ if (a.context_editing_keep_thinking !== void 0) setAnthropicBehavior({ contextEditingKeepThinking: a.context_editing_keep_thinking });
1768
+ if (a.tool_search !== void 0) setAnthropicBehavior({ toolSearchEnabled: a.tool_search });
1769
+ if (a.cache_control !== void 0) setAnthropicBehavior({ cacheControlMode: a.cache_control });
1770
+ else if (a.auto_cache_control !== void 0) {
1771
+ const mapped = a.auto_cache_control ? "proxied" : "disabled";
1772
+ consola.warn(`[Config] anthropic.auto_cache_control is deprecated, use cache_control: "${mapped}" instead`);
1773
+ setAnthropicBehavior({ cacheControlMode: mapped });
1774
+ }
1775
+ if (Array.isArray(a.non_deferred_tools)) setAnthropicBehavior({ nonDeferredTools: a.non_deferred_tools });
1776
+ if (a.rewrite_system_reminders !== void 0) {
1777
+ if (typeof a.rewrite_system_reminders === "boolean") setAnthropicBehavior({ rewriteSystemReminders: a.rewrite_system_reminders });
1778
+ else if (Array.isArray(a.rewrite_system_reminders)) setAnthropicBehavior({ rewriteSystemReminders: compileRewriteRules(a.rewrite_system_reminders) });
1779
+ }
1780
+ }
1781
+ if (Array.isArray(config.system_prompt_overrides)) setAnthropicBehavior({ systemPromptOverrides: config.system_prompt_overrides.length > 0 ? compileRewriteRules(config.system_prompt_overrides) : [] });
1782
+ if (config.model_overrides) setModelOverrides({
1783
+ ...DEFAULT_MODEL_OVERRIDES,
1784
+ ...config.model_overrides
1785
+ });
1786
+ if (config.compress_tool_results_before_truncate !== void 0) setAnthropicBehavior({ compressToolResultsBeforeTruncate: config.compress_tool_results_before_truncate });
1787
+ if (config.history) {
1788
+ const h = config.history;
1789
+ if (h.limit !== void 0) {
1790
+ setHistoryConfig({ historyLimit: h.limit });
1791
+ setHistoryMaxEntries(h.limit);
1792
+ }
1793
+ if (h.min_entries !== void 0) setHistoryConfig({ historyMinEntries: h.min_entries });
1794
+ }
1795
+ if (config.shutdown) {
1796
+ const s = config.shutdown;
1797
+ if (s.graceful_wait !== void 0) setShutdownConfig({ shutdownGracefulWait: s.graceful_wait });
1798
+ if (s.abort_wait !== void 0) setShutdownConfig({ shutdownAbortWait: s.abort_wait });
1799
+ }
1800
+ if (config.fetch_timeout !== void 0) setTimeoutConfig({ fetchTimeout: config.fetch_timeout });
1801
+ if (config.stream_idle_timeout !== void 0) setTimeoutConfig({ streamIdleTimeout: config.stream_idle_timeout });
1802
+ if (config.stale_request_max_age !== void 0) setTimeoutConfig({ staleRequestMaxAge: config.stale_request_max_age });
1803
+ if (config.model_refresh_interval !== void 0) setTimeoutConfig({ modelRefreshInterval: config.model_refresh_interval });
1804
+ const responsesConfig = config["openai-responses"];
1805
+ if (responsesConfig && responsesConfig.normalize_call_ids !== void 0) setResponsesConfig({ normalizeResponsesCallIds: responsesConfig.normalize_call_ids });
1806
+ if (responsesConfig && responsesConfig.upstream_websocket !== void 0) setResponsesConfig({ upstreamWebSocket: responsesConfig.upstream_websocket });
1807
+ syncModelRefreshLoop();
1808
+ const currentMtime = getConfigMtimeMs();
1809
+ if (hasApplied && currentMtime !== lastAppliedMtimeMs) consola.info("[config] Reloaded config.yaml");
1810
+ hasApplied = true;
1811
+ lastAppliedMtimeMs = currentMtime;
1812
+ return config;
1813
+ }
1814
+ //#endregion
1815
+ //#region src/lib/proxy.ts
1816
+ /**
1817
+ * Proxy configuration: HTTP/HTTPS and SOCKS5/5h proxy support.
1818
+ *
1819
+ * Priority: explicit proxy URL (CLI --proxy or config.yaml) > env vars (--http-proxy-from-env).
1820
+ * On Node.js, proxying works via undici's global dispatcher.
1821
+ * On Bun, HTTP proxies are set via env vars (Bun handles them natively); SOCKS5 is not supported.
1822
+ */
1823
+ /**
1824
+ * Initialize proxy for all outgoing fetch requests.
1825
+ *
1826
+ * On Node.js: sets undici's global dispatcher.
1827
+ * On Bun: sets process.env.HTTP_PROXY/HTTPS_PROXY for HTTP proxies (Bun handles natively).
1828
+ *
1829
+ * Must be called before any network requests.
1830
+ */
1831
+ function initProxy(options) {
1832
+ if (typeof Bun !== "undefined") {
1833
+ initProxyBun(options);
1834
+ return;
1835
+ }
1836
+ initProxyNode(options);
1837
+ }
1838
+ /** Format a proxy URL for display (strip credentials) */
1839
+ function formatProxyDisplay(proxyUrl) {
1840
+ try {
1841
+ const u = new URL(proxyUrl);
1842
+ const auth = u.username ? `${u.username}:***@` : "";
1843
+ return `${u.protocol}//${auth}${u.host}`;
1844
+ } catch {
1845
+ return proxyUrl;
1846
+ }
1847
+ }
1848
+ function initProxyNode(options) {
1849
+ try {
1850
+ if (options.url) {
1851
+ setGlobalDispatcher(createDispatcherForUrl(options.url));
1852
+ consola.debug(`Proxy configured: ${formatProxyDisplay(options.url)}`);
1853
+ return;
1854
+ }
1855
+ if (options.fromEnv) {
1856
+ setGlobalDispatcher(new EnvProxyDispatcher());
1857
+ consola.debug("HTTP proxy configured from environment (per-URL)");
1858
+ }
1859
+ } catch (err) {
1860
+ consola.error("Proxy setup failed:", err);
1861
+ throw err;
1862
+ }
1863
+ }
1864
+ /** Create the appropriate undici dispatcher for a proxy URL scheme */
1865
+ function createDispatcherForUrl(proxyUrl) {
1866
+ const url = new URL(proxyUrl);
1867
+ const protocol = url.protocol.toLowerCase();
1868
+ if (protocol === "http:" || protocol === "https:") return new ProxyAgent(proxyUrl);
1869
+ if (protocol === "socks5:" || protocol === "socks5h:") return createSocksAgent(url);
1870
+ throw new Error(`Unsupported proxy protocol: ${protocol}. Supported: http, https, socks5, socks5h`);
1871
+ }
1872
+ /**
1873
+ * Create an undici Agent that routes connections through a SOCKS5/5h proxy.
1874
+ *
1875
+ * For socks5h:// the proxy performs DNS resolution (hostname passed as-is).
1876
+ * For socks5:// the hostname is also passed to the proxy (proxy resolves).
1877
+ * Both protocols support username/password authentication via URL credentials.
1878
+ */
1879
+ function createSocksAgent(proxyUrl) {
1880
+ const proxy = {
1881
+ host: proxyUrl.hostname,
1882
+ port: Number(proxyUrl.port) || 1080,
1883
+ type: 5
1884
+ };
1885
+ if (proxyUrl.username) {
1886
+ proxy.userId = decodeURIComponent(proxyUrl.username);
1887
+ proxy.password = proxyUrl.password ? decodeURIComponent(proxyUrl.password) : void 0;
1888
+ }
1889
+ return new Agent({ connect(opts, callback) {
1890
+ const destPort = Number(opts.port) || (opts.protocol === "https:" ? 443 : 80);
1891
+ SocksClient.createConnection({
1892
+ proxy,
1893
+ command: "connect",
1894
+ destination: {
1895
+ host: opts.hostname,
1896
+ port: destPort
1703
1897
  }
1898
+ }).then(({ socket }) => {
1899
+ if (opts.protocol === "https:") callback(null, tls.connect({
1900
+ socket,
1901
+ servername: opts.servername ?? opts.hostname
1902
+ }));
1903
+ else callback(null, socket);
1904
+ }).catch((err) => {
1905
+ callback(err instanceof Error ? err : new Error(String(err)), null);
1704
1906
  });
1705
- if (!response.ok) return VSCODE_VERSION_FALLBACK;
1706
- const version = (await response.json()).tag_name;
1707
- if (version && /^\d+\.\d+\.\d+$/.test(version)) return version;
1708
- return VSCODE_VERSION_FALLBACK;
1709
- } catch {
1710
- return VSCODE_VERSION_FALLBACK;
1711
- } finally {
1712
- clearTimeout(timeout);
1907
+ } });
1908
+ }
1909
+ /**
1910
+ * Custom dispatcher that routes requests through proxies based on environment variables.
1911
+ * Uses proxy-from-env to resolve HTTP_PROXY/HTTPS_PROXY/NO_PROXY per-URL.
1912
+ */
1913
+ var EnvProxyDispatcher = class extends Agent {
1914
+ proxies = /* @__PURE__ */ new Map();
1915
+ dispatch(options, handler) {
1916
+ try {
1917
+ const origin = this.getOriginUrl(options.origin);
1918
+ const proxyUrl = this.getProxyUrl(origin);
1919
+ if (!proxyUrl) {
1920
+ consola.debug(`HTTP proxy bypass: ${origin.hostname}`);
1921
+ return super.dispatch(options, handler);
1922
+ }
1923
+ const agent = this.getOrCreateProxyAgent(proxyUrl);
1924
+ consola.debug(`HTTP proxy route: ${origin.hostname} via ${formatProxyDisplay(proxyUrl)}`);
1925
+ return agent.dispatch(options, handler);
1926
+ } catch {
1927
+ return super.dispatch(options, handler);
1928
+ }
1929
+ }
1930
+ getOriginUrl(origin) {
1931
+ return typeof origin === "string" ? new URL(origin) : origin;
1932
+ }
1933
+ getProxyUrl(origin) {
1934
+ const raw = getProxyForUrl(origin.toString());
1935
+ return raw && raw.length > 0 ? raw : void 0;
1936
+ }
1937
+ getOrCreateProxyAgent(proxyUrl) {
1938
+ let agent = this.proxies.get(proxyUrl);
1939
+ if (!agent) {
1940
+ agent = new ProxyAgent(proxyUrl);
1941
+ this.proxies.set(proxyUrl, agent);
1942
+ }
1943
+ return agent;
1944
+ }
1945
+ async close() {
1946
+ await super.close();
1947
+ await Promise.all([...this.proxies.values()].map((p) => p.close()));
1948
+ this.proxies.clear();
1949
+ }
1950
+ destroy(errOrCallback, callback) {
1951
+ for (const agent of this.proxies.values()) if (typeof errOrCallback === "function") agent.destroy(errOrCallback);
1952
+ else if (callback) agent.destroy(errOrCallback ?? null, callback);
1953
+ else agent.destroy(errOrCallback ?? null).catch(() => {});
1954
+ this.proxies.clear();
1955
+ if (typeof errOrCallback === "function") {
1956
+ super.destroy(errOrCallback);
1957
+ return;
1958
+ } else if (callback) {
1959
+ super.destroy(errOrCallback ?? null, callback);
1960
+ return;
1961
+ } else return super.destroy(errOrCallback ?? null);
1713
1962
  }
1963
+ };
1964
+ /**
1965
+ * Initialize proxy for Bun runtime.
1966
+ * Bun handles HTTP_PROXY/HTTPS_PROXY env vars natively.
1967
+ * SOCKS5 proxies are not supported on Bun.
1968
+ */
1969
+ function initProxyBun(options) {
1970
+ if (!options.url) return;
1971
+ const protocol = new URL(options.url).protocol.toLowerCase();
1972
+ if (protocol === "socks5:" || protocol === "socks5h:") throw new Error("SOCKS5 proxy is not supported on Bun runtime. Use Node.js or an HTTP proxy instead.");
1973
+ process.env.HTTP_PROXY = options.url;
1974
+ process.env.HTTPS_PROXY = options.url;
1975
+ consola.debug(`Proxy configured (Bun env): ${formatProxyDisplay(options.url)}`);
1714
1976
  }
1715
1977
  //#endregion
1716
1978
  //#region src/lib/token/copilot-client.ts
@@ -1914,6 +2176,20 @@ var CopilotTokenManager = class {
1914
2176
  }
1915
2177
  };
1916
2178
  //#endregion
2179
+ //#region src/lib/utils.ts
2180
+ const sleep = (ms) => new Promise((resolve) => {
2181
+ setTimeout(resolve, ms);
2182
+ });
2183
+ const isNullish = (value) => value === null || value === void 0;
2184
+ /** Convert bytes to KB with rounding */
2185
+ function bytesToKB(bytes) {
2186
+ return Math.round(bytes / 1024);
2187
+ }
2188
+ /** Generate unique ID (timestamp + random) */
2189
+ function generateId(randomLength = 7) {
2190
+ return Date.now().toString(36) + Math.random().toString(36).slice(2, 2 + randomLength);
2191
+ }
2192
+ //#endregion
1917
2193
  //#region src/lib/token/github-client.ts
1918
2194
  /** GitHub OAuth API client — device code flow and user info */
1919
2195
  const getGitHubUser = async () => {
@@ -2473,49 +2749,6 @@ const checkUsage = defineCommand({
2473
2749
  }
2474
2750
  });
2475
2751
  //#endregion
2476
- //#region src/lib/fetch-utils.ts
2477
- const SENSITIVE_HEADER_NAMES = new Set([
2478
- "authorization",
2479
- "proxy-authorization",
2480
- "x-api-key",
2481
- "api-key"
2482
- ]);
2483
- /**
2484
- * Create an AbortSignal for fetch timeout if configured.
2485
- * Controls the time from request start to receiving response headers.
2486
- * Returns undefined if fetchTimeout is 0 (disabled).
2487
- */
2488
- function createFetchSignal() {
2489
- return state.fetchTimeout > 0 ? AbortSignal.timeout(state.fetchTimeout * 1e3) : void 0;
2490
- }
2491
- /**
2492
- * Populate a HeadersCapture object with request and response headers.
2493
- * Should be called immediately after fetch(), before !response.ok check,
2494
- * so headers are captured even for error responses.
2495
- */
2496
- function captureHttpHeaders(capture, requestHeaders, response) {
2497
- capture.request = sanitizeHeadersForHistory(requestHeaders);
2498
- capture.response = Object.fromEntries(response.headers.entries());
2499
- }
2500
- /** Return a copy of headers safe to persist in history/error artifacts. */
2501
- function sanitizeHeadersForHistory(headers) {
2502
- return Object.fromEntries(Object.entries(headers).map(([name, value]) => [name, SENSITIVE_HEADER_NAMES.has(name.toLowerCase()) ? "***" : value]));
2503
- }
2504
- //#endregion
2505
- //#region src/lib/models/client.ts
2506
- /** Fetch models from Copilot API and cache in global state */
2507
- async function cacheModels() {
2508
- setModels(await getModels());
2509
- }
2510
- const getModels = async () => {
2511
- const response = await fetch(`${copilotBaseUrl(state)}/models`, {
2512
- headers: copilotHeaders(state),
2513
- signal: createFetchSignal()
2514
- });
2515
- if (!response.ok) throw await HTTPError.fromResponse("Failed to get models", response);
2516
- return await response.json();
2517
- };
2518
- //#endregion
2519
2752
  //#region src/debug.ts
2520
2753
  async function getPackageVersion() {
2521
2754
  try {
@@ -3004,6 +3237,284 @@ async function executeWithAdaptiveRateLimit(fn) {
3004
3237
  return rateLimiterInstance.execute(fn);
3005
3238
  }
3006
3239
  //#endregion
3240
+ //#region src/lib/request-telemetry.ts
3241
+ const BUCKET_MS = 300 * 1e3;
3242
+ const WINDOW_MS = 10080 * 60 * 1e3;
3243
+ const PERSIST_INTERVAL_MS = 60 * 1e3;
3244
+ let acceptedSinceStart = 0;
3245
+ let bucketCounts = /* @__PURE__ */ new Map();
3246
+ let modelStatsSinceStart = /* @__PURE__ */ new Map();
3247
+ let modelBucketStats = /* @__PURE__ */ new Map();
3248
+ let persistTimer$1 = null;
3249
+ let telemetryFilePath = PATHS.REQUEST_TELEMETRY;
3250
+ function getBucketStart(timestamp) {
3251
+ return Math.floor(timestamp / BUCKET_MS) * BUCKET_MS;
3252
+ }
3253
+ function createEmptyModelTelemetry() {
3254
+ return {
3255
+ requestCount: 0,
3256
+ successCount: 0,
3257
+ failureCount: 0,
3258
+ totalDurationMs: 0,
3259
+ inputTokens: 0,
3260
+ outputTokens: 0,
3261
+ cacheReadInputTokens: 0,
3262
+ cacheCreationInputTokens: 0,
3263
+ reasoningTokens: 0
3264
+ };
3265
+ }
3266
+ function isValidPersistedModelTelemetry(value) {
3267
+ if (!value || typeof value !== "object") return false;
3268
+ const stats = value;
3269
+ return typeof stats.requestCount === "number" && typeof stats.successCount === "number" && typeof stats.failureCount === "number" && typeof stats.totalDurationMs === "number" && typeof stats.inputTokens === "number" && typeof stats.outputTokens === "number" && typeof stats.cacheReadInputTokens === "number" && typeof stats.cacheCreationInputTokens === "number" && typeof stats.reasoningTokens === "number";
3270
+ }
3271
+ function copyPersistedTelemetry(stats) {
3272
+ return {
3273
+ requestCount: stats.requestCount,
3274
+ successCount: stats.successCount,
3275
+ failureCount: stats.failureCount,
3276
+ totalDurationMs: stats.totalDurationMs,
3277
+ inputTokens: stats.inputTokens,
3278
+ outputTokens: stats.outputTokens,
3279
+ cacheReadInputTokens: stats.cacheReadInputTokens,
3280
+ cacheCreationInputTokens: stats.cacheCreationInputTokens,
3281
+ reasoningTokens: stats.reasoningTokens
3282
+ };
3283
+ }
3284
+ function getOrCreateModelStats(target, model) {
3285
+ const normalizedModel = model.trim() || "unknown";
3286
+ let stats = target.get(normalizedModel);
3287
+ if (!stats) {
3288
+ stats = createEmptyModelTelemetry();
3289
+ target.set(normalizedModel, stats);
3290
+ }
3291
+ return stats;
3292
+ }
3293
+ function getOrCreateModelBucket(timestamp) {
3294
+ let bucket = modelBucketStats.get(timestamp);
3295
+ if (!bucket) {
3296
+ bucket = /* @__PURE__ */ new Map();
3297
+ modelBucketStats.set(timestamp, bucket);
3298
+ }
3299
+ return bucket;
3300
+ }
3301
+ function applySettledTelemetry(stats, opts) {
3302
+ const durationMs = Math.max(0, opts.endedAt - opts.startedAt);
3303
+ const usage = opts.usage;
3304
+ stats.requestCount += 1;
3305
+ if (opts.success) stats.successCount += 1;
3306
+ else stats.failureCount += 1;
3307
+ stats.totalDurationMs += durationMs;
3308
+ stats.inputTokens += usage?.input_tokens ?? 0;
3309
+ stats.outputTokens += usage?.output_tokens ?? 0;
3310
+ stats.cacheReadInputTokens += usage?.cache_read_input_tokens ?? 0;
3311
+ stats.cacheCreationInputTokens += usage?.cache_creation_input_tokens ?? 0;
3312
+ stats.reasoningTokens += usage?.output_tokens_details?.reasoning_tokens ?? 0;
3313
+ }
3314
+ function pruneBuckets(now = Date.now()) {
3315
+ const earliest = getBucketStart(now - WINDOW_MS);
3316
+ for (const key of bucketCounts.keys()) if (key < earliest) bucketCounts.delete(key);
3317
+ for (const key of modelBucketStats.keys()) if (key < earliest) modelBucketStats.delete(key);
3318
+ }
3319
+ function buildFilledBuckets(now = Date.now()) {
3320
+ const latestBucket = getBucketStart(now);
3321
+ const bucketCount = Math.floor(WINDOW_MS / BUCKET_MS);
3322
+ const firstBucket = latestBucket - (bucketCount - 1) * BUCKET_MS;
3323
+ const result = [];
3324
+ for (let index = 0; index < bucketCount; index++) {
3325
+ const timestamp = firstBucket + index * BUCKET_MS;
3326
+ result.push({
3327
+ timestamp,
3328
+ count: bucketCounts.get(timestamp) ?? 0
3329
+ });
3330
+ }
3331
+ return result;
3332
+ }
3333
+ function buildModelSnapshots(source) {
3334
+ return [...source].map(([model, stats]) => toModelSnapshot(model, stats)).sort((left, right) => right.requestCount - left.requestCount || right.usage.totalTokens - left.usage.totalTokens || right.totalDurationMs - left.totalDurationMs || left.model.localeCompare(right.model));
3335
+ }
3336
+ function toUsageTotals(stats) {
3337
+ return {
3338
+ inputTokens: stats.inputTokens,
3339
+ outputTokens: stats.outputTokens,
3340
+ totalTokens: stats.inputTokens + stats.outputTokens,
3341
+ cacheReadInputTokens: stats.cacheReadInputTokens,
3342
+ cacheCreationInputTokens: stats.cacheCreationInputTokens,
3343
+ reasoningTokens: stats.reasoningTokens
3344
+ };
3345
+ }
3346
+ function toModelSnapshot(model, stats) {
3347
+ return {
3348
+ model,
3349
+ requestCount: stats.requestCount,
3350
+ successCount: stats.successCount,
3351
+ failureCount: stats.failureCount,
3352
+ totalDurationMs: stats.totalDurationMs,
3353
+ averageDurationMs: stats.requestCount > 0 ? stats.totalDurationMs / stats.requestCount : 0,
3354
+ usage: toUsageTotals(stats)
3355
+ };
3356
+ }
3357
+ function buildLast7dModelSnapshots(now = Date.now()) {
3358
+ pruneBuckets(now);
3359
+ const aggregate = /* @__PURE__ */ new Map();
3360
+ const series = /* @__PURE__ */ new Map();
3361
+ for (const [timestamp, bucket] of modelBucketStats.entries()) for (const [model, stats] of bucket.entries()) {
3362
+ const target = getOrCreateModelStats(aggregate, model);
3363
+ target.requestCount += stats.requestCount;
3364
+ target.successCount += stats.successCount;
3365
+ target.failureCount += stats.failureCount;
3366
+ target.totalDurationMs += stats.totalDurationMs;
3367
+ target.inputTokens += stats.inputTokens;
3368
+ target.outputTokens += stats.outputTokens;
3369
+ target.cacheReadInputTokens += stats.cacheReadInputTokens;
3370
+ target.cacheCreationInputTokens += stats.cacheCreationInputTokens;
3371
+ target.reasoningTokens += stats.reasoningTokens;
3372
+ let buckets = series.get(model);
3373
+ if (!buckets) {
3374
+ buckets = [];
3375
+ series.set(model, buckets);
3376
+ }
3377
+ buckets.push({
3378
+ timestamp,
3379
+ requestCount: stats.requestCount,
3380
+ successCount: stats.successCount,
3381
+ failureCount: stats.failureCount,
3382
+ totalDurationMs: stats.totalDurationMs,
3383
+ averageDurationMs: stats.requestCount > 0 ? stats.totalDurationMs / stats.requestCount : 0,
3384
+ usage: toUsageTotals(stats)
3385
+ });
3386
+ }
3387
+ return [...aggregate.entries()].map(([model, stats]) => ({
3388
+ ...toModelSnapshot(model, stats),
3389
+ buckets: (series.get(model) ?? []).sort((left, right) => left.timestamp - right.timestamp)
3390
+ })).sort((left, right) => right.requestCount - left.requestCount || right.usage.totalTokens - left.usage.totalTokens || right.totalDurationMs - left.totalDurationMs || left.model.localeCompare(right.model));
3391
+ }
3392
+ function startPeriodicPersistence() {
3393
+ if (persistTimer$1) return;
3394
+ persistTimer$1 = setInterval(() => {
3395
+ persistRequestTelemetry();
3396
+ }, PERSIST_INTERVAL_MS);
3397
+ }
3398
+ function stopPeriodicPersistence() {
3399
+ if (!persistTimer$1) return;
3400
+ clearInterval(persistTimer$1);
3401
+ persistTimer$1 = null;
3402
+ }
3403
+ function loadModelBuckets(raw) {
3404
+ modelBucketStats = new Map(Object.entries(raw).map(([bucketKey, bucketValue]) => {
3405
+ const bucketTimestamp = Number(bucketKey);
3406
+ if (!Number.isFinite(bucketTimestamp) || !bucketValue || typeof bucketValue !== "object") return null;
3407
+ const bucket = /* @__PURE__ */ new Map();
3408
+ for (const [model, stats] of Object.entries(bucketValue)) if (isValidPersistedModelTelemetry(stats)) bucket.set(model, copyPersistedTelemetry(stats));
3409
+ return [bucketTimestamp, bucket];
3410
+ }).filter((entry) => Boolean(entry)));
3411
+ }
3412
+ async function initRequestTelemetry() {
3413
+ stopPeriodicPersistence();
3414
+ acceptedSinceStart = 0;
3415
+ bucketCounts = /* @__PURE__ */ new Map();
3416
+ modelStatsSinceStart = /* @__PURE__ */ new Map();
3417
+ modelBucketStats = /* @__PURE__ */ new Map();
3418
+ try {
3419
+ const raw = await fs.readFile(telemetryFilePath, "utf8");
3420
+ const parsed = JSON.parse(raw);
3421
+ if (parsed.buckets && typeof parsed.buckets === "object") bucketCounts = new Map(Object.entries(parsed.buckets).map(([key, value]) => [Number(key), value]).filter(([key, value]) => Number.isFinite(key) && typeof value === "number" && value >= 0));
3422
+ if (parsed.version === 2 && parsed.modelBuckets && typeof parsed.modelBuckets === "object") loadModelBuckets(parsed.modelBuckets);
3423
+ } catch {}
3424
+ pruneBuckets();
3425
+ startPeriodicPersistence();
3426
+ }
3427
+ function recordAcceptedRequest(timestamp = Date.now()) {
3428
+ acceptedSinceStart += 1;
3429
+ const bucket = getBucketStart(timestamp);
3430
+ bucketCounts.set(bucket, (bucketCounts.get(bucket) ?? 0) + 1);
3431
+ pruneBuckets(timestamp);
3432
+ }
3433
+ function recordSettledRequest(model, opts) {
3434
+ const normalizedModel = model.trim() || "unknown";
3435
+ applySettledTelemetry(getOrCreateModelStats(modelStatsSinceStart, normalizedModel), opts);
3436
+ applySettledTelemetry(getOrCreateModelStats(getOrCreateModelBucket(getBucketStart(opts.startedAt)), normalizedModel), opts);
3437
+ pruneBuckets(opts.startedAt);
3438
+ }
3439
+ function getRequestTelemetrySnapshot(now = Date.now()) {
3440
+ pruneBuckets(now);
3441
+ const buckets = buildFilledBuckets(now);
3442
+ const totalLast7d = buckets.reduce((sum, bucket) => sum + bucket.count, 0);
3443
+ return {
3444
+ acceptedSinceStart,
3445
+ bucketSizeMinutes: BUCKET_MS / (60 * 1e3),
3446
+ windowDays: WINDOW_MS / (1440 * 60 * 1e3),
3447
+ totalLast7d,
3448
+ buckets,
3449
+ modelsSinceStart: buildModelSnapshots(modelStatsSinceStart.entries()),
3450
+ modelsLast7d: buildLast7dModelSnapshots(now)
3451
+ };
3452
+ }
3453
+ async function persistRequestTelemetry() {
3454
+ pruneBuckets();
3455
+ const file = {
3456
+ version: 2,
3457
+ buckets: Object.fromEntries([...bucketCounts.entries()].map(([key, value]) => [String(key), value])),
3458
+ modelBuckets: Object.fromEntries([...modelBucketStats.entries()].map(([bucketTimestamp, bucket]) => [String(bucketTimestamp), Object.fromEntries([...bucket.entries()].map(([model, stats]) => [model, {
3459
+ requestCount: stats.requestCount,
3460
+ successCount: stats.successCount,
3461
+ failureCount: stats.failureCount,
3462
+ totalDurationMs: stats.totalDurationMs,
3463
+ inputTokens: stats.inputTokens,
3464
+ outputTokens: stats.outputTokens,
3465
+ cacheReadInputTokens: stats.cacheReadInputTokens,
3466
+ cacheCreationInputTokens: stats.cacheCreationInputTokens,
3467
+ reasoningTokens: stats.reasoningTokens
3468
+ }]))]))
3469
+ };
3470
+ try {
3471
+ await fs.writeFile(telemetryFilePath, JSON.stringify(file, null, 2), "utf8");
3472
+ } catch {}
3473
+ }
3474
+ async function shutdownRequestTelemetry() {
3475
+ stopPeriodicPersistence();
3476
+ await persistRequestTelemetry();
3477
+ }
3478
+ //#endregion
3479
+ //#region src/lib/context/activity-summary.ts
3480
+ function isActiveRequestState(state) {
3481
+ return state !== "completed" && state !== "failed";
3482
+ }
3483
+ function summarizeRequestContext(context) {
3484
+ const state = context.state ?? "pending";
3485
+ return {
3486
+ id: context.id,
3487
+ endpoint: context.endpoint,
3488
+ ...context.rawPath ? { rawPath: context.rawPath } : {},
3489
+ state,
3490
+ active: isActiveRequestState(state),
3491
+ startTime: context.startTime,
3492
+ durationMs: context.durationMs ?? 0,
3493
+ lastUpdatedAt: Date.now(),
3494
+ model: context.originalRequest?.model,
3495
+ stream: context.originalRequest?.stream,
3496
+ attemptCount: context.attempts?.length ?? 0,
3497
+ currentStrategy: context.currentAttempt?.strategy,
3498
+ queueWaitMs: context.queueWaitMs ?? 0,
3499
+ ...context.transport ? { transport: context.transport } : {}
3500
+ };
3501
+ }
3502
+ function buildHistoryActivityPatch(context) {
3503
+ const snapshot = summarizeRequestContext(context);
3504
+ return {
3505
+ ...snapshot.rawPath ? { rawPath: snapshot.rawPath } : {},
3506
+ startedAt: snapshot.startTime,
3507
+ state: snapshot.state,
3508
+ active: snapshot.active,
3509
+ lastUpdatedAt: snapshot.lastUpdatedAt,
3510
+ queueWaitMs: snapshot.queueWaitMs,
3511
+ attemptCount: snapshot.attemptCount,
3512
+ currentStrategy: snapshot.currentStrategy,
3513
+ durationMs: snapshot.durationMs,
3514
+ ...snapshot.transport ? { transport: snapshot.transport } : {}
3515
+ };
3516
+ }
3517
+ //#endregion
3007
3518
  //#region src/lib/models/resolver.ts
3008
3519
  /**
3009
3520
  * Unified model name resolution and normalization.
@@ -3198,13 +3709,16 @@ function createRequestContext(opts) {
3198
3709
  const startTime = Date.now();
3199
3710
  const onEvent = opts.onEvent;
3200
3711
  let _state = "pending";
3712
+ let _sessionId = opts.sessionId;
3201
3713
  let _originalRequest = null;
3202
3714
  let _response = null;
3203
3715
  let _pipelineInfo = null;
3204
3716
  let _sseEvents = null;
3205
3717
  let _httpHeaders = null;
3206
3718
  let _queueWaitMs = 0;
3719
+ const _warningMessages = [];
3207
3720
  const _attempts = [];
3721
+ let _endTime = null;
3208
3722
  /** Guard: once complete() or fail() is called, subsequent calls are no-ops */
3209
3723
  let settled = false;
3210
3724
  function emit(event) {
@@ -3214,8 +3728,15 @@ function createRequestContext(opts) {
3214
3728
  }
3215
3729
  const ctx = {
3216
3730
  id,
3731
+ get sessionId() {
3732
+ return _sessionId;
3733
+ },
3217
3734
  tuiLogId: opts.tuiLogId,
3735
+ rawPath: opts.rawPath,
3218
3736
  startTime,
3737
+ get endTime() {
3738
+ return _endTime;
3739
+ },
3219
3740
  endpoint: opts.endpoint,
3220
3741
  get state() {
3221
3742
  return _state;
@@ -3238,6 +3759,9 @@ function createRequestContext(opts) {
3238
3759
  get httpHeaders() {
3239
3760
  return _httpHeaders;
3240
3761
  },
3762
+ get transport() {
3763
+ return _attempts.findLast((attempt) => attempt.response)?.transport ?? _attempts.at(-1)?.transport ?? null;
3764
+ },
3241
3765
  get attempts() {
3242
3766
  return _attempts;
3243
3767
  },
@@ -3247,6 +3771,12 @@ function createRequestContext(opts) {
3247
3771
  get queueWaitMs() {
3248
3772
  return _queueWaitMs;
3249
3773
  },
3774
+ get warningMessages() {
3775
+ return _warningMessages;
3776
+ },
3777
+ setSessionId(sessionId) {
3778
+ _sessionId = sessionId;
3779
+ },
3250
3780
  setOriginalRequest(req) {
3251
3781
  _originalRequest = req;
3252
3782
  emit({
@@ -3272,6 +3802,15 @@ function createRequestContext(opts) {
3272
3802
  response: capture.response
3273
3803
  };
3274
3804
  },
3805
+ addWarningMessage(warning) {
3806
+ if (_warningMessages.some((existing) => existing.code === warning.code && existing.message === warning.message)) return;
3807
+ _warningMessages.push(warning);
3808
+ emit({
3809
+ type: "updated",
3810
+ context: ctx,
3811
+ field: "warningMessages"
3812
+ });
3813
+ },
3275
3814
  beginAttempt(attemptOpts) {
3276
3815
  const attempt = {
3277
3816
  index: _attempts.length,
@@ -3279,6 +3818,7 @@ function createRequestContext(opts) {
3279
3818
  wireRequest: null,
3280
3819
  response: null,
3281
3820
  error: null,
3821
+ transport: attemptOpts.transport ?? "http",
3282
3822
  strategy: attemptOpts.strategy,
3283
3823
  truncation: attemptOpts.truncation,
3284
3824
  waitMs: attemptOpts.waitMs,
@@ -3298,11 +3838,36 @@ function createRequestContext(opts) {
3298
3838
  },
3299
3839
  setAttemptEffectiveRequest(req) {
3300
3840
  const attempt = ctx.currentAttempt;
3301
- if (attempt) attempt.effectiveRequest = req;
3841
+ if (attempt) {
3842
+ attempt.effectiveRequest = req;
3843
+ emit({
3844
+ type: "updated",
3845
+ context: ctx,
3846
+ field: "attempts"
3847
+ });
3848
+ }
3302
3849
  },
3303
3850
  setAttemptWireRequest(req) {
3304
3851
  const attempt = ctx.currentAttempt;
3305
- if (attempt) attempt.wireRequest = req;
3852
+ if (attempt) {
3853
+ attempt.wireRequest = req;
3854
+ emit({
3855
+ type: "updated",
3856
+ context: ctx,
3857
+ field: "attempts"
3858
+ });
3859
+ }
3860
+ },
3861
+ setAttemptTransport(transport) {
3862
+ const attempt = ctx.currentAttempt;
3863
+ if (attempt) {
3864
+ attempt.transport = transport;
3865
+ emit({
3866
+ type: "updated",
3867
+ context: ctx,
3868
+ field: "attempts"
3869
+ });
3870
+ }
3306
3871
  },
3307
3872
  setAttemptResponse(response) {
3308
3873
  const attempt = ctx.currentAttempt;
@@ -3320,6 +3885,11 @@ function createRequestContext(opts) {
3320
3885
  },
3321
3886
  addQueueWaitMs(ms) {
3322
3887
  _queueWaitMs += ms;
3888
+ emit({
3889
+ type: "updated",
3890
+ context: ctx,
3891
+ field: "queueWaitMs"
3892
+ });
3323
3893
  },
3324
3894
  transition(newState, meta) {
3325
3895
  const previousState = _state;
@@ -3334,6 +3904,7 @@ function createRequestContext(opts) {
3334
3904
  complete(response) {
3335
3905
  if (settled) return;
3336
3906
  settled = true;
3907
+ _endTime = Date.now();
3337
3908
  if (response.model) response.model = normalizeModelId(response.model);
3338
3909
  _response = response;
3339
3910
  ctx.setAttemptResponse(response);
@@ -3347,6 +3918,7 @@ function createRequestContext(opts) {
3347
3918
  fail(model, error) {
3348
3919
  if (settled) return;
3349
3920
  settled = true;
3921
+ _endTime = Date.now();
3350
3922
  const errorMsg = getErrorMessage(error);
3351
3923
  _response = {
3352
3924
  success: false,
@@ -3372,11 +3944,23 @@ function createRequestContext(opts) {
3372
3944
  },
3373
3945
  toHistoryEntry() {
3374
3946
  const p = _originalRequest?.payload;
3947
+ const endedAt = _endTime ?? Date.now();
3375
3948
  const entry = {
3376
3949
  id,
3377
3950
  endpoint: opts.endpoint,
3378
- timestamp: startTime,
3379
- durationMs: Date.now() - startTime,
3951
+ ..._sessionId ? { sessionId: _sessionId } : {},
3952
+ ...opts.rawPath ? { rawPath: opts.rawPath } : {},
3953
+ startedAt: startTime,
3954
+ endedAt,
3955
+ state: _state,
3956
+ active: false,
3957
+ lastUpdatedAt: endedAt,
3958
+ queueWaitMs: _queueWaitMs,
3959
+ attemptCount: _attempts.length,
3960
+ currentStrategy: _attempts.at(-1)?.strategy,
3961
+ durationMs: endedAt - startTime,
3962
+ ...ctx.transport ? { transport: ctx.transport } : {},
3963
+ ..._warningMessages.length > 0 && { warningMessages: [..._warningMessages] },
3380
3964
  request: {
3381
3965
  model: _originalRequest?.model,
3382
3966
  messages: _originalRequest?.messages,
@@ -3402,7 +3986,7 @@ function createRequestContext(opts) {
3402
3986
  format: ep.format,
3403
3987
  messageCount: ep.messages.length,
3404
3988
  messages: ep.messages,
3405
- system: ep.payload?.system,
3989
+ system: ep.payload.system,
3406
3990
  payload: ep.payload
3407
3991
  };
3408
3992
  }
@@ -3413,7 +3997,7 @@ function createRequestContext(opts) {
3413
3997
  format: wp.format,
3414
3998
  messageCount: wp.messages.length,
3415
3999
  messages: wp.messages,
3416
- system: wp.payload?.system,
4000
+ system: wp.payload.system,
3417
4001
  payload: wp.payload,
3418
4002
  headers: wp.headers
3419
4003
  };
@@ -3422,10 +4006,11 @@ function createRequestContext(opts) {
3422
4006
  index: a.index,
3423
4007
  strategy: a.strategy,
3424
4008
  durationMs: a.durationMs,
4009
+ transport: a.transport,
3425
4010
  error: a.error?.message,
3426
4011
  truncation: a.truncation,
3427
4012
  sanitization: a.sanitization,
3428
- effectiveMessageCount: a.effectiveRequest?.messages?.length
4013
+ effectiveMessageCount: a.effectiveRequest?.messages.length
3429
4014
  }));
3430
4015
  return entry;
3431
4016
  }
@@ -3500,7 +4085,7 @@ function createRequestContextManager() {
3500
4085
  });
3501
4086
  notifyActiveRequestChanged({
3502
4087
  action: "state_changed",
3503
- request: summarizeContext(context),
4088
+ request: summarizeRequestContext(context),
3504
4089
  activeCount: activeContexts.size
3505
4090
  });
3506
4091
  }
@@ -3513,11 +4098,19 @@ function createRequestContextManager() {
3513
4098
  });
3514
4099
  break;
3515
4100
  case "completed":
3516
- if (rawEvent.entry) emit({
3517
- type: "completed",
3518
- context,
3519
- entry: rawEvent.entry
3520
- });
4101
+ if (rawEvent.entry) {
4102
+ recordSettledRequest(rawEvent.entry.response?.model ?? rawEvent.entry.request.model ?? "unknown", {
4103
+ startedAt: rawEvent.entry.startedAt,
4104
+ endedAt: rawEvent.entry.endedAt,
4105
+ success: rawEvent.entry.response?.success ?? true,
4106
+ usage: rawEvent.entry.response?.usage
4107
+ });
4108
+ emit({
4109
+ type: "completed",
4110
+ context,
4111
+ entry: rawEvent.entry
4112
+ });
4113
+ }
3521
4114
  activeContexts.delete(context.id);
3522
4115
  notifyActiveRequestChanged({
3523
4116
  action: "completed",
@@ -3526,11 +4119,19 @@ function createRequestContextManager() {
3526
4119
  });
3527
4120
  break;
3528
4121
  case "failed":
3529
- if (rawEvent.entry) emit({
3530
- type: "failed",
3531
- context,
3532
- entry: rawEvent.entry
3533
- });
4122
+ if (rawEvent.entry) {
4123
+ recordSettledRequest(rawEvent.entry.response?.model ?? rawEvent.entry.request.model ?? "unknown", {
4124
+ startedAt: rawEvent.entry.startedAt,
4125
+ endedAt: rawEvent.entry.endedAt,
4126
+ success: rawEvent.entry.response?.success ?? false,
4127
+ usage: rawEvent.entry.response?.usage
4128
+ });
4129
+ emit({
4130
+ type: "failed",
4131
+ context,
4132
+ entry: rawEvent.entry
4133
+ });
4134
+ }
3534
4135
  activeContexts.delete(context.id);
3535
4136
  notifyActiveRequestChanged({
3536
4137
  action: "failed",
@@ -3541,28 +4142,16 @@ function createRequestContextManager() {
3541
4142
  default: break;
3542
4143
  }
3543
4144
  }
3544
- /** Build a lightweight summary of a context for WS broadcast */
3545
- function summarizeContext(ctx) {
3546
- return {
3547
- id: ctx.id,
3548
- endpoint: ctx.endpoint,
3549
- state: ctx.state,
3550
- startTime: ctx.startTime,
3551
- durationMs: ctx.durationMs,
3552
- model: ctx.originalRequest?.model,
3553
- stream: ctx.originalRequest?.stream,
3554
- attemptCount: ctx.attempts.length,
3555
- currentStrategy: ctx.currentAttempt?.strategy,
3556
- queueWaitMs: ctx.queueWaitMs
3557
- };
3558
- }
3559
4145
  return {
3560
4146
  create(opts) {
3561
4147
  const ctx = createRequestContext({
3562
4148
  endpoint: opts.endpoint,
4149
+ sessionId: opts.sessionId,
3563
4150
  tuiLogId: opts.tuiLogId,
4151
+ rawPath: opts.rawPath,
3564
4152
  onEvent: handleContextEvent
3565
4153
  });
4154
+ recordAcceptedRequest(ctx.startTime);
3566
4155
  activeContexts.set(ctx.id, ctx);
3567
4156
  emit({
3568
4157
  type: "created",
@@ -3570,31 +4159,363 @@ function createRequestContextManager() {
3570
4159
  });
3571
4160
  notifyActiveRequestChanged({
3572
4161
  action: "created",
3573
- request: summarizeContext(ctx),
4162
+ request: summarizeRequestContext(ctx),
3574
4163
  activeCount: activeContexts.size
3575
4164
  });
3576
- return ctx;
4165
+ return ctx;
4166
+ },
4167
+ get(id) {
4168
+ return activeContexts.get(id);
4169
+ },
4170
+ getAll() {
4171
+ return Array.from(activeContexts.values());
4172
+ },
4173
+ get activeCount() {
4174
+ return activeContexts.size;
4175
+ },
4176
+ on(_event, listener) {
4177
+ listeners.add(listener);
4178
+ },
4179
+ off(_event, listener) {
4180
+ listeners.delete(listener);
4181
+ },
4182
+ startReaper,
4183
+ stopReaper,
4184
+ _runReaperOnce: runReaperOnce
4185
+ };
4186
+ }
4187
+ //#endregion
4188
+ //#region src/lib/openai/upstream-ws-connection.ts
4189
+ const DEFAULT_IDLE_TIMEOUT_MS = 5 * 6e4;
4190
+ const CLOSE_CODE_GOING_AWAY = 1001;
4191
+ const TERMINAL_EVENTS$1 = new Set([
4192
+ "response.completed",
4193
+ "response.failed",
4194
+ "response.incomplete",
4195
+ "error"
4196
+ ]);
4197
+ function createUpstreamWsConnection(opts) {
4198
+ const createSocket = opts.createSocket ?? ((url, headers) => new WebSocket$1(url, { headers }));
4199
+ const idleTimeoutMs = opts.idleTimeoutMs ?? DEFAULT_IDLE_TIMEOUT_MS;
4200
+ let socket = null;
4201
+ let busy = false;
4202
+ let statefulMarker;
4203
+ let currentQueue = null;
4204
+ let currentAbortCleanup = null;
4205
+ let idleTimer = null;
4206
+ const clearIdleTimer = () => {
4207
+ if (idleTimer) {
4208
+ clearTimeout(idleTimer);
4209
+ idleTimer = null;
4210
+ }
4211
+ };
4212
+ const scheduleIdleClose = () => {
4213
+ clearIdleTimer();
4214
+ if (!socket || busy || socket.readyState !== socket.OPEN || idleTimeoutMs <= 0) return;
4215
+ idleTimer = setTimeout(() => {
4216
+ socket?.close(CLOSE_CODE_GOING_AWAY, "Idle timeout");
4217
+ }, idleTimeoutMs);
4218
+ };
4219
+ const finishRequest = () => {
4220
+ busy = false;
4221
+ currentAbortCleanup?.();
4222
+ currentAbortCleanup = null;
4223
+ currentQueue?.close();
4224
+ currentQueue = null;
4225
+ scheduleIdleClose();
4226
+ };
4227
+ const failRequest = (error) => {
4228
+ busy = false;
4229
+ currentAbortCleanup?.();
4230
+ currentAbortCleanup = null;
4231
+ currentQueue?.fail(error);
4232
+ currentQueue = null;
4233
+ };
4234
+ const handleMessage = (event) => {
4235
+ if (!(event instanceof MessageEvent)) return;
4236
+ if (!currentQueue) return;
4237
+ clearIdleTimer();
4238
+ try {
4239
+ const parsed = parseWebSocketEvent(event.data);
4240
+ currentQueue.push(parsed);
4241
+ if (parsed.type === "response.completed") statefulMarker = parsed.response.id;
4242
+ if (TERMINAL_EVENTS$1.has(parsed.type)) finishRequest();
4243
+ } catch (error) {
4244
+ failRequest(error instanceof Error ? error : new Error(String(error)));
4245
+ }
4246
+ };
4247
+ const handleError = () => {
4248
+ if (!busy || !currentQueue) return;
4249
+ failRequest(/* @__PURE__ */ new Error("Upstream WebSocket error"));
4250
+ };
4251
+ const handleClose = (event) => {
4252
+ clearIdleTimer();
4253
+ socket?.removeEventListener("message", handleMessage);
4254
+ socket?.removeEventListener("error", handleError);
4255
+ socket?.removeEventListener("close", handleClose);
4256
+ socket = null;
4257
+ opts.onClose?.();
4258
+ if (!busy || !currentQueue) return;
4259
+ const closeEvent = event;
4260
+ failRequest(/* @__PURE__ */ new Error(`Upstream WebSocket closed (${closeEvent.code}: ${closeEvent.reason || "unknown"})`));
4261
+ };
4262
+ return {
4263
+ async connect(connectOpts) {
4264
+ const existingSocket = socket;
4265
+ if (existingSocket && existingSocket.readyState === existingSocket.OPEN) return;
4266
+ if (existingSocket && existingSocket.readyState === existingSocket.CONNECTING) throw new Error("Upstream WebSocket is already connecting");
4267
+ const ws = createSocket(copilotWsUrl(state), opts.headers);
4268
+ socket = ws;
4269
+ ws.addEventListener("message", handleMessage);
4270
+ ws.addEventListener("error", handleError);
4271
+ ws.addEventListener("close", handleClose);
4272
+ await new Promise((resolve, reject) => {
4273
+ const signal = connectOpts?.signal;
4274
+ const activeSocket = ws;
4275
+ const cleanup = () => {
4276
+ activeSocket.removeEventListener("open", onOpen);
4277
+ activeSocket.removeEventListener("error", onOpenError);
4278
+ signal?.removeEventListener("abort", onAbort);
4279
+ };
4280
+ const onOpen = () => {
4281
+ cleanup();
4282
+ resolve();
4283
+ };
4284
+ const onOpenError = () => {
4285
+ cleanup();
4286
+ activeSocket.close(CLOSE_CODE_GOING_AWAY, "Handshake failed");
4287
+ reject(/* @__PURE__ */ new Error("Upstream WebSocket handshake failed"));
4288
+ };
4289
+ const onAbort = () => {
4290
+ cleanup();
4291
+ activeSocket.close(CLOSE_CODE_GOING_AWAY, "Aborted");
4292
+ reject(/* @__PURE__ */ new Error("Upstream WebSocket connection aborted"));
4293
+ };
4294
+ activeSocket.addEventListener("open", onOpen, { once: true });
4295
+ activeSocket.addEventListener("error", onOpenError, { once: true });
4296
+ signal?.addEventListener("abort", onAbort, { once: true });
4297
+ if (signal?.aborted) onAbort();
4298
+ });
4299
+ scheduleIdleClose();
4300
+ },
4301
+ sendRequest(payload, requestOpts) {
4302
+ if (!socket || socket.readyState !== socket.OPEN) throw new Error("Upstream WebSocket is not connected");
4303
+ if (busy) throw new Error("Upstream WebSocket connection is busy");
4304
+ clearIdleTimer();
4305
+ busy = true;
4306
+ currentQueue = createAsyncQueue();
4307
+ const abortSignal = requestOpts?.abortSignal;
4308
+ const onAbort = () => {
4309
+ failRequest(/* @__PURE__ */ new Error("Upstream WebSocket request aborted"));
4310
+ };
4311
+ currentAbortCleanup = () => {
4312
+ abortSignal?.removeEventListener("abort", onAbort);
4313
+ };
4314
+ abortSignal?.addEventListener("abort", onAbort, { once: true });
4315
+ try {
4316
+ const { stream: _stream, ...wire } = payload;
4317
+ socket.send(JSON.stringify({
4318
+ type: "response.create",
4319
+ ...wire
4320
+ }));
4321
+ } catch (error) {
4322
+ currentAbortCleanup();
4323
+ currentAbortCleanup = null;
4324
+ failRequest(error instanceof Error ? error : new Error(String(error)));
4325
+ }
4326
+ const queue = currentQueue;
4327
+ return (async function* () {
4328
+ try {
4329
+ yield* queue.iterate();
4330
+ } finally {
4331
+ currentAbortCleanup?.();
4332
+ currentAbortCleanup = null;
4333
+ }
4334
+ })();
4335
+ },
4336
+ get isOpen() {
4337
+ return socket !== null && socket.readyState === socket.OPEN;
4338
+ },
4339
+ get isBusy() {
4340
+ return busy;
4341
+ },
4342
+ get statefulMarker() {
4343
+ return statefulMarker;
4344
+ },
4345
+ get model() {
4346
+ return opts.model;
4347
+ },
4348
+ close() {
4349
+ clearIdleTimer();
4350
+ socket?.close(CLOSE_CODE_GOING_AWAY, "Going away");
4351
+ }
4352
+ };
4353
+ }
4354
+ function createAsyncQueue() {
4355
+ const values = [];
4356
+ const waiters = [];
4357
+ let closed = false;
4358
+ let failure = null;
4359
+ const drain = () => {
4360
+ while (waiters.length > 0) {
4361
+ if (failure) {
4362
+ waiters.shift()?.reject(failure);
4363
+ continue;
4364
+ }
4365
+ if (values.length > 0) {
4366
+ waiters.shift()?.resolve({
4367
+ done: false,
4368
+ value: values.shift()
4369
+ });
4370
+ continue;
4371
+ }
4372
+ if (closed) {
4373
+ waiters.shift()?.resolve({
4374
+ done: true,
4375
+ value: void 0
4376
+ });
4377
+ continue;
4378
+ }
4379
+ break;
4380
+ }
4381
+ };
4382
+ return {
4383
+ push(value) {
4384
+ if (closed || failure) return;
4385
+ values.push(value);
4386
+ drain();
4387
+ },
4388
+ close() {
4389
+ closed = true;
4390
+ drain();
4391
+ },
4392
+ fail(error) {
4393
+ if (failure) return;
4394
+ failure = error;
4395
+ drain();
4396
+ },
4397
+ async *iterate() {
4398
+ for (;;) {
4399
+ if (failure) throw failure;
4400
+ if (values.length > 0) {
4401
+ yield values.shift();
4402
+ continue;
4403
+ }
4404
+ if (closed) return;
4405
+ const next = await new Promise((resolve, reject) => {
4406
+ waiters.push({
4407
+ resolve,
4408
+ reject
4409
+ });
4410
+ drain();
4411
+ });
4412
+ if (next.done) return;
4413
+ yield next.value;
4414
+ }
4415
+ }
4416
+ };
4417
+ }
4418
+ function parseWebSocketEvent(input) {
4419
+ let text = null;
4420
+ if (typeof input === "string") text = input;
4421
+ else if (input instanceof ArrayBuffer) text = Buffer.from(input).toString("utf8");
4422
+ else if (ArrayBuffer.isView(input)) text = Buffer.from(input.buffer, input.byteOffset, input.byteLength).toString("utf8");
4423
+ if (text === null) throw new Error("Unsupported upstream WebSocket frame");
4424
+ const parsed = JSON.parse(text);
4425
+ if (isCapiWebSocketError(parsed)) return {
4426
+ type: "error",
4427
+ code: parsed.error.code,
4428
+ message: parsed.error.message,
4429
+ sequence_number: typeof parsed.sequence_number === "number" ? parsed.sequence_number : 0
4430
+ };
4431
+ return parsed;
4432
+ }
4433
+ function isCapiWebSocketError(input) {
4434
+ if (!input || typeof input !== "object") return false;
4435
+ const record = input;
4436
+ if (record.type !== "error") return false;
4437
+ if (!record.error || typeof record.error !== "object") return false;
4438
+ const error = record.error;
4439
+ return typeof error.code === "string" && typeof error.message === "string";
4440
+ }
4441
+ //#endregion
4442
+ //#region src/lib/openai/upstream-ws.ts
4443
+ const MAX_CONSECUTIVE_WS_FALLBACKS = 3;
4444
+ let connectionFactory = createUpstreamWsConnection;
4445
+ function createUpstreamWsManager() {
4446
+ const connections = /* @__PURE__ */ new Map();
4447
+ let stopped = false;
4448
+ let consecutiveFallbacks = 0;
4449
+ let temporarilyDisabled = false;
4450
+ return {
4451
+ findReusable({ previousResponseId, model }) {
4452
+ if (stopped || temporarilyDisabled) return void 0;
4453
+ for (const connection of connections.values()) {
4454
+ if (!connection.isOpen) continue;
4455
+ if (connection.isBusy) continue;
4456
+ if (connection.statefulMarker !== previousResponseId) continue;
4457
+ if (connection.model !== model) continue;
4458
+ return connection;
4459
+ }
4460
+ },
4461
+ create({ headers, model }) {
4462
+ if (stopped) throw new Error("Upstream WebSocket manager is not accepting new work");
4463
+ const key = randomUUID();
4464
+ const connection = connectionFactory({
4465
+ headers,
4466
+ model,
4467
+ onClose: () => {
4468
+ connections.delete(key);
4469
+ }
4470
+ });
4471
+ connections.set(key, connection);
4472
+ return Promise.resolve(connection);
3577
4473
  },
3578
- get(id) {
3579
- return activeContexts.get(id);
4474
+ stopNew() {
4475
+ stopped = true;
3580
4476
  },
3581
- getAll() {
3582
- return Array.from(activeContexts.values());
4477
+ closeAll() {
4478
+ for (const connection of connections.values()) connection.close();
4479
+ connections.clear();
4480
+ },
4481
+ resetRuntimeState() {
4482
+ stopped = false;
4483
+ consecutiveFallbacks = 0;
4484
+ temporarilyDisabled = false;
4485
+ this.closeAll();
4486
+ },
4487
+ recordSuccessfulStart() {
4488
+ consecutiveFallbacks = 0;
4489
+ temporarilyDisabled = false;
4490
+ },
4491
+ recordFallback() {
4492
+ consecutiveFallbacks += 1;
4493
+ if (consecutiveFallbacks >= MAX_CONSECUTIVE_WS_FALLBACKS) temporarilyDisabled = true;
3583
4494
  },
3584
4495
  get activeCount() {
3585
- return activeContexts.size;
4496
+ let count = 0;
4497
+ for (const connection of connections.values()) if (connection.isOpen) count += 1;
4498
+ return count;
3586
4499
  },
3587
- on(_event, listener) {
3588
- listeners.add(listener);
4500
+ get consecutiveFallbacks() {
4501
+ return consecutiveFallbacks;
3589
4502
  },
3590
- off(_event, listener) {
3591
- listeners.delete(listener);
4503
+ get temporarilyDisabled() {
4504
+ return temporarilyDisabled;
3592
4505
  },
3593
- startReaper,
3594
- stopReaper,
3595
- _runReaperOnce: runReaperOnce
4506
+ get stopped() {
4507
+ return stopped;
4508
+ }
3596
4509
  };
3597
4510
  }
4511
+ let manager = null;
4512
+ function getUpstreamWsManager() {
4513
+ manager ??= createUpstreamWsManager();
4514
+ return manager;
4515
+ }
4516
+ function peekUpstreamWsManager() {
4517
+ return manager;
4518
+ }
3598
4519
  let serverInstance = null;
3599
4520
  let _isShuttingDown = false;
3600
4521
  let shutdownResolve = null;
@@ -3718,6 +4639,7 @@ async function gracefulShutdown(signal, deps) {
3718
4639
  } catch {}
3719
4640
  stopRefresh();
3720
4641
  stopMemoryPressureMonitor();
4642
+ peekUpstreamWsManager()?.stopNew();
3721
4643
  const wsClients = getWsClientCount();
3722
4644
  if (wsClients > 0) {
3723
4645
  closeWsClients();
@@ -3775,6 +4697,7 @@ async function gracefulShutdown(signal, deps) {
3775
4697
  } catch (error) {
3776
4698
  consola.error("Error force-closing server:", error);
3777
4699
  }
4700
+ peekUpstreamWsManager()?.closeAll();
3778
4701
  }
3779
4702
  finalize(tracker);
3780
4703
  }
@@ -3783,26 +4706,37 @@ function finalize(tracker) {
3783
4706
  setPhase("finalized");
3784
4707
  shutdownDrainAbortController = null;
3785
4708
  tracker.destroy();
3786
- consola.info("Shutdown complete");
3787
- shutdownResolve?.();
4709
+ shutdownRequestTelemetry().finally(() => {
4710
+ consola.info("Shutdown complete");
4711
+ shutdownResolve?.();
4712
+ });
3788
4713
  }
3789
4714
  function handleShutdownSignal(signal, opts) {
3790
4715
  const shutdownFn = opts?.gracefulShutdownFn ?? ((shutdownSignal) => gracefulShutdown(shutdownSignal));
3791
4716
  const exitFn = opts?.exitFn ?? ((code) => process.exit(code));
3792
- if (_isShuttingDown) {
3793
- if (shutdownPhase === "phase2") {
4717
+ if (_isShuttingDown) switch (shutdownPhase) {
4718
+ case "phase1":
4719
+ consola.warn("Signal received during Phase 1 setup, waiting for shutdown to proceed");
4720
+ return shutdownPromise ?? void 0;
4721
+ case "phase2":
3794
4722
  consola.warn("Second signal received, escalating shutdown to abort active requests");
3795
4723
  shutdownDrainAbortController?.abort();
3796
4724
  return shutdownPromise ?? void 0;
3797
- }
3798
- if (shutdownPhase === "phase3") {
4725
+ case "phase3":
3799
4726
  consola.warn("Additional signal received, escalating shutdown to force-close remaining requests");
3800
4727
  shutdownDrainAbortController?.abort();
3801
4728
  return shutdownPromise ?? void 0;
3802
- }
3803
- consola.warn("Additional signal received during forced shutdown, exiting immediately");
3804
- exitFn(1);
3805
- return shutdownPromise ?? void 0;
4729
+ case "phase4":
4730
+ consola.warn("Additional signal received during forced shutdown, exiting immediately");
4731
+ exitFn(1);
4732
+ return shutdownPromise ?? void 0;
4733
+ case "finalized":
4734
+ consola.info("Signal received after shutdown finalized, ignoring");
4735
+ return shutdownPromise ?? void 0;
4736
+ default:
4737
+ consola.warn("Signal received in unexpected shutdown phase, exiting immediately");
4738
+ exitFn(1);
4739
+ return shutdownPromise ?? void 0;
3806
4740
  }
3807
4741
  shutdownPromise = shutdownFn(signal).catch((error) => {
3808
4742
  consola.error("Fatal error during shutdown:", error);
@@ -4579,54 +5513,8 @@ const setupClaudeCode = defineCommand({
4579
5513
  }
4580
5514
  });
4581
5515
  //#endregion
4582
- //#region src/lib/serve.ts
4583
- /** Start the HTTP server and return a ServerInstance. */
4584
- async function startServer(options) {
4585
- if (typeof globalThis.Bun !== "undefined") return startBunServer(options);
4586
- return startNodeServer(options);
4587
- }
4588
- async function startNodeServer(options) {
4589
- const { createAdaptorServer } = await import("./dist-B3gFwWti.mjs");
4590
- const nodeServer = createAdaptorServer({ fetch: options.fetch });
4591
- await new Promise((resolve, reject) => {
4592
- nodeServer.once("error", reject);
4593
- nodeServer.listen({
4594
- port: options.port,
4595
- host: options.hostname,
4596
- exclusive: false
4597
- }, () => {
4598
- nodeServer.removeListener("error", reject);
4599
- resolve();
4600
- });
4601
- });
4602
- return {
4603
- nodeServer,
4604
- close(force) {
4605
- return new Promise((resolve, reject) => {
4606
- if (force && "closeAllConnections" in nodeServer) nodeServer.closeAllConnections();
4607
- nodeServer.close((err) => err ? reject(err) : resolve());
4608
- });
4609
- }
4610
- };
4611
- }
4612
- async function startBunServer(options) {
4613
- const bunServer = Bun.serve({
4614
- fetch(request, server) {
4615
- return options.fetch(request, { server });
4616
- },
4617
- port: options.port,
4618
- hostname: options.hostname,
4619
- idleTimeout: 255,
4620
- ...options.bunWebSocket ? { websocket: options.bunWebSocket } : {}
4621
- });
4622
- return { close(force) {
4623
- bunServer.stop(force ?? false);
4624
- return Promise.resolve();
4625
- } };
4626
- }
4627
- //#endregion
4628
5516
  //#region package.json
4629
- var version = "0.8.1-beta.2";
5517
+ var version = "0.8.3";
4630
5518
  //#endregion
4631
5519
  //#region src/lib/system-prompt/override.ts
4632
5520
  /**
@@ -5188,7 +6076,9 @@ function handleErrorPersistence(event) {
5188
6076
  const MAX_MESSAGES_IN_DUMP = 50;
5189
6077
  async function writeErrorEntry(entry) {
5190
6078
  const meta = {
5191
- timestamp: new Date(entry.timestamp).toISOString(),
6079
+ timestamp: new Date(entry.startedAt).toISOString(),
6080
+ startedAt: new Date(entry.startedAt).toISOString(),
6081
+ endedAt: new Date(entry.endedAt).toISOString(),
5192
6082
  id: entry.id,
5193
6083
  endpoint: entry.endpoint,
5194
6084
  durationMs: entry.durationMs,
@@ -5251,12 +6141,12 @@ function handleHistoryEvent(event) {
5251
6141
  const orig = event.context.originalRequest;
5252
6142
  if (!orig) break;
5253
6143
  const ctx = event.context;
5254
- const sessionId = getCurrentSession(ctx.endpoint);
5255
6144
  insertEntry({
5256
6145
  id: ctx.id,
5257
- sessionId,
5258
- timestamp: ctx.startTime,
6146
+ ...ctx.sessionId ? { sessionId: ctx.sessionId } : {},
6147
+ ...ctx.rawPath ? { rawPath: ctx.rawPath } : {},
5259
6148
  endpoint: ctx.endpoint,
6149
+ ...buildHistoryActivityPatch(ctx),
5260
6150
  request: {
5261
6151
  model: orig.model,
5262
6152
  messages: orig.messages,
@@ -5266,16 +6156,33 @@ function handleHistoryEvent(event) {
5266
6156
  }
5267
6157
  });
5268
6158
  }
6159
+ if (event.field === "attempts" || event.field === "queueWaitMs") updateEntry(event.context.id, buildHistoryActivityPatch(event.context));
6160
+ if (event.field === "warningMessages" && event.context.warningMessages.length > 0) updateEntry(event.context.id, { warningMessages: [...event.context.warningMessages] });
5269
6161
  if (event.field === "pipelineInfo" && event.context.pipelineInfo) updateEntry(event.context.id, { pipelineInfo: event.context.pipelineInfo });
5270
6162
  break;
6163
+ case "state_changed":
6164
+ updateEntry(event.context.id, buildHistoryActivityPatch(event.context));
6165
+ break;
5271
6166
  case "completed":
5272
6167
  case "failed": {
5273
6168
  const entryData = event.entry;
5274
6169
  const response = toHistoryResponse(entryData);
5275
6170
  updateEntry(entryData.id, {
6171
+ rawPath: entryData.rawPath,
6172
+ sessionId: entryData.sessionId,
6173
+ state: entryData.state,
6174
+ active: entryData.active,
6175
+ lastUpdatedAt: entryData.lastUpdatedAt,
6176
+ queueWaitMs: entryData.queueWaitMs,
6177
+ attemptCount: entryData.attemptCount,
6178
+ currentStrategy: entryData.currentStrategy,
5276
6179
  response,
6180
+ startedAt: entryData.startedAt,
6181
+ endedAt: entryData.endedAt,
5277
6182
  durationMs: entryData.durationMs,
6183
+ transport: entryData.transport,
5278
6184
  sseEvents: entryData.sseEvents,
6185
+ ...entryData.warningMessages && { warningMessages: entryData.warningMessages },
5279
6186
  ...entryData.effectiveRequest && { effectiveRequest: {
5280
6187
  model: entryData.effectiveRequest.model,
5281
6188
  format: entryData.effectiveRequest.format,
@@ -5317,6 +6224,10 @@ function handleTuiEvent(event) {
5317
6224
  const attempt = event.context.currentAttempt;
5318
6225
  if (attempt?.strategy) tuiLogger.updateRequest(tuiLogId, { tags: [attempt.strategy] });
5319
6226
  }
6227
+ if (event.field === "attempts") {
6228
+ const transportTag = toTransportTag(event.context.currentAttempt?.transport);
6229
+ if (transportTag) tuiLogger.updateRequest(tuiLogId, { tags: [transportTag] });
6230
+ }
5320
6231
  break;
5321
6232
  }
5322
6233
  case "completed": {
@@ -5370,6 +6281,10 @@ function toHistoryResponse(entryData) {
5370
6281
  headers: entryData.httpHeaders?.response
5371
6282
  };
5372
6283
  }
6284
+ function toTransportTag(transport) {
6285
+ if (transport === "upstream-ws") return "ws";
6286
+ if (transport === "upstream-ws-fallback") return "ws→http";
6287
+ }
5373
6288
  function registerContextConsumers(manager) {
5374
6289
  manager.on("change", handleHistoryEvent);
5375
6290
  manager.on("change", handleTuiEvent);
@@ -5419,6 +6334,63 @@ function isEndpointSupported(model, endpoint) {
5419
6334
  function isResponsesSupported(model) {
5420
6335
  return isEndpointSupported(model, ENDPOINT.RESPONSES) || isEndpointSupported(model, ENDPOINT.WS_RESPONSES);
5421
6336
  }
6337
+ /**
6338
+ * Check if a model explicitly supports upstream WebSocket transport for Responses API.
6339
+ *
6340
+ * Unlike `isEndpointSupported`, legacy models without `supported_endpoints` do not
6341
+ * implicitly gain WebSocket support. We only enable this transport when Copilot has
6342
+ * advertised the dedicated `ws:/responses` capability.
6343
+ */
6344
+ function isWsResponsesSupported(model) {
6345
+ if (!model?.supported_endpoints) return false;
6346
+ return model.supported_endpoints.includes(ENDPOINT.WS_RESPONSES);
6347
+ }
6348
+ //#endregion
6349
+ //#region src/lib/serve.ts
6350
+ /** Start the HTTP server and return a ServerInstance. */
6351
+ async function startServer(options) {
6352
+ if (typeof globalThis.Bun !== "undefined") return startBunServer(options);
6353
+ return startNodeServer(options);
6354
+ }
6355
+ async function startNodeServer(options) {
6356
+ const { createAdaptorServer } = await import("./dist-B3gFwWti.mjs");
6357
+ const nodeServer = createAdaptorServer({ fetch: options.fetch });
6358
+ await new Promise((resolve, reject) => {
6359
+ nodeServer.once("error", reject);
6360
+ nodeServer.listen({
6361
+ port: options.port,
6362
+ host: options.hostname,
6363
+ exclusive: false
6364
+ }, () => {
6365
+ nodeServer.removeListener("error", reject);
6366
+ resolve();
6367
+ });
6368
+ });
6369
+ return {
6370
+ nodeServer,
6371
+ close(force) {
6372
+ return new Promise((resolve, reject) => {
6373
+ if (force && "closeAllConnections" in nodeServer) nodeServer.closeAllConnections();
6374
+ nodeServer.close((err) => err ? reject(err) : resolve());
6375
+ });
6376
+ }
6377
+ };
6378
+ }
6379
+ async function startBunServer(options) {
6380
+ const bunServer = Bun.serve({
6381
+ fetch(request, server) {
6382
+ return options.fetch(request, { server });
6383
+ },
6384
+ port: options.port,
6385
+ hostname: options.hostname,
6386
+ idleTimeout: 255,
6387
+ ...options.bunWebSocket ? { websocket: options.bunWebSocket } : {}
6388
+ });
6389
+ return { close(force) {
6390
+ bunServer.stop(force ?? false);
6391
+ return Promise.resolve();
6392
+ } };
6393
+ }
5422
6394
  //#endregion
5423
6395
  //#region src/lib/openai/responses-conversion.ts
5424
6396
  /**
@@ -6012,6 +6984,47 @@ const createResponses = async (payload, opts) => {
6012
6984
  wire: prepared.wire,
6013
6985
  headers: sanitizeHeadersForHistory(prepared.headers)
6014
6986
  });
6987
+ const { wire } = prepared;
6988
+ let usedFallback = false;
6989
+ if (wire.stream && canUseUpstreamWebSocket(opts?.resolvedModel)) {
6990
+ const manager = getUpstreamWsManager();
6991
+ const connection = (typeof wire.previous_response_id === "string" ? manager.findReusable({
6992
+ previousResponseId: wire.previous_response_id,
6993
+ model: wire.model
6994
+ }) : void 0) ?? await manager.create({
6995
+ headers: prepared.headers,
6996
+ model: wire.model
6997
+ });
6998
+ try {
6999
+ if (!connection.isOpen) await connection.connect({ signal: createFetchSignal() });
7000
+ const iterator = connection.sendRequest(wire)[Symbol.asyncIterator]();
7001
+ const first = await awaitFirstEvent(iterator);
7002
+ manager.recordSuccessfulStart();
7003
+ opts?.onTransport?.("upstream-ws");
7004
+ return (async function* () {
7005
+ yield toSseMessage(first);
7006
+ for (;;) {
7007
+ const result = await iterator.next();
7008
+ if (result.done) return;
7009
+ yield toSseMessage(result.value);
7010
+ }
7011
+ })();
7012
+ } catch (error) {
7013
+ manager.recordFallback();
7014
+ opts?.onTransport?.("upstream-ws-fallback");
7015
+ usedFallback = true;
7016
+ connection.close();
7017
+ consola.warn(`[responses] Upstream WS failed before first event, falling back to HTTP (${manager.consecutiveFallbacks}/3): ${error instanceof Error ? error.message : String(error)}`);
7018
+ }
7019
+ }
7020
+ if (!usedFallback) opts?.onTransport?.("http");
7021
+ return createResponsesViaHttp(prepared, opts?.headersCapture);
7022
+ };
7023
+ function canUseUpstreamWebSocket(model) {
7024
+ const manager = getUpstreamWsManager();
7025
+ return state.upstreamWebSocket && !manager.temporarilyDisabled && !manager.stopped && isWsResponsesSupported(model);
7026
+ }
7027
+ async function createResponsesViaHttp(prepared, headersCapture) {
6015
7028
  const { wire, headers } = prepared;
6016
7029
  const fetchSignal = createFetchSignal();
6017
7030
  const response = await fetch(`${copilotBaseUrl(state)}/responses`, {
@@ -6020,14 +7033,46 @@ const createResponses = async (payload, opts) => {
6020
7033
  body: JSON.stringify(wire),
6021
7034
  signal: fetchSignal
6022
7035
  });
6023
- if (opts?.headersCapture) captureHttpHeaders(opts.headersCapture, headers, response);
7036
+ if (headersCapture) captureHttpHeaders(headersCapture, headers, response);
6024
7037
  if (!response.ok) {
6025
7038
  consola.error("Failed to create responses", response);
6026
7039
  throw await HTTPError.fromResponse("Failed to create responses", response, wire.model);
6027
7040
  }
6028
7041
  if (wire.stream) return events(response);
6029
7042
  return await response.json();
6030
- };
7043
+ }
7044
+ async function awaitFirstEvent(iterator) {
7045
+ const signal = createFetchSignal();
7046
+ if (!signal) {
7047
+ const first = await iterator.next();
7048
+ if (first.done) throw new Error("Upstream WebSocket closed before first event");
7049
+ return first.value;
7050
+ }
7051
+ return await new Promise((resolve, reject) => {
7052
+ const onAbort = () => {
7053
+ signal.removeEventListener("abort", onAbort);
7054
+ reject(/* @__PURE__ */ new Error("Upstream WebSocket timed out before first event"));
7055
+ };
7056
+ signal.addEventListener("abort", onAbort, { once: true });
7057
+ iterator.next().then((result) => {
7058
+ signal.removeEventListener("abort", onAbort);
7059
+ if (result.done) {
7060
+ reject(/* @__PURE__ */ new Error("Upstream WebSocket closed before first event"));
7061
+ return;
7062
+ }
7063
+ resolve(result.value);
7064
+ }).catch((error) => {
7065
+ signal.removeEventListener("abort", onAbort);
7066
+ reject(error instanceof Error ? error : new Error(String(error)));
7067
+ });
7068
+ });
7069
+ }
7070
+ function toSseMessage(event) {
7071
+ return {
7072
+ event: event.type,
7073
+ data: JSON.stringify(event)
7074
+ };
7075
+ }
6031
7076
  //#endregion
6032
7077
  //#region src/lib/request/strategies/network-retry.ts
6033
7078
  /**
@@ -6129,7 +7174,7 @@ function createTokenRefreshStrategy() {
6129
7174
  * centralizes that configuration to avoid duplication.
6130
7175
  */
6131
7176
  /** Create the FormatAdapter for Responses API pipeline execution */
6132
- function createResponsesAdapter(selectedModel, headersCapture, onPrepared) {
7177
+ function createResponsesAdapter(selectedModel, headersCapture, onPrepared, onTransport) {
6133
7178
  return {
6134
7179
  format: "openai-responses",
6135
7180
  sanitize: (p) => ({
@@ -6140,10 +7185,11 @@ function createResponsesAdapter(selectedModel, headersCapture, onPrepared) {
6140
7185
  execute: (p) => executeWithAdaptiveRateLimit(() => createResponses(p, {
6141
7186
  resolvedModel: selectedModel,
6142
7187
  headersCapture,
7188
+ onTransport,
6143
7189
  onPrepared: ({ wire, headers }) => {
6144
7190
  onPrepared?.({
6145
7191
  model: typeof wire.model === "string" ? wire.model : p.model,
6146
- messages: [],
7192
+ messages: extractInputItems(wire.input),
6147
7193
  payload: wire,
6148
7194
  headers,
6149
7195
  format: "openai-responses"
@@ -6160,6 +7206,14 @@ function createResponsesAdapter(selectedModel, headersCapture, onPrepared) {
6160
7206
  function createResponsesStrategies() {
6161
7207
  return [createNetworkRetryStrategy(), createTokenRefreshStrategy()];
6162
7208
  }
7209
+ function extractInputItems(input) {
7210
+ if (typeof input === "string") return [{
7211
+ type: "message",
7212
+ role: "user",
7213
+ content: input
7214
+ }];
7215
+ return input;
7216
+ }
6163
7217
  const CALL_PREFIX = "call_";
6164
7218
  const FC_PREFIX = "fc_";
6165
7219
  /**
@@ -6258,7 +7312,9 @@ async function handleResponseCreate(ws, rawPayload) {
6258
7312
  });
6259
7313
  const reqCtx = getRequestContextManager().create({
6260
7314
  endpoint: "openai-responses",
6261
- tuiLogId
7315
+ sessionId: resolveResponseSessionId(payload.previous_response_id),
7316
+ tuiLogId,
7317
+ rawPath: "/v1/responses"
6262
7318
  });
6263
7319
  reqCtx.setOriginalRequest({
6264
7320
  model: requestedModel,
@@ -6275,6 +7331,8 @@ async function handleResponseCreate(ws, rawPayload) {
6275
7331
  const headersCapture = {};
6276
7332
  const adapter = createResponsesAdapter(selectedModel, headersCapture, (wireRequest) => {
6277
7333
  reqCtx.setAttemptWireRequest(wireRequest);
7334
+ }, (transport) => {
7335
+ reqCtx.setAttemptTransport(transport);
6278
7336
  });
6279
7337
  const strategies = createResponsesStrategies();
6280
7338
  try {
@@ -6312,6 +7370,8 @@ async function handleResponseCreate(ws, rawPayload) {
6312
7370
  consola.debug("[WS] Skipping unparseable SSE event");
6313
7371
  }
6314
7372
  }
7373
+ if (!reqCtx.sessionId && acc.responseId) reqCtx.setSessionId(acc.responseId);
7374
+ registerResponseSession(acc.responseId, reqCtx.sessionId);
6315
7375
  const responseData = buildResponsesResponseData(acc, resolvedModel);
6316
7376
  reqCtx.complete(responseData);
6317
7377
  ws.close(1e3, "done");
@@ -7216,34 +8276,370 @@ function createOpenAIStreamAccumulator() {
7216
8276
  toolCallMap: /* @__PURE__ */ new Map()
7217
8277
  };
7218
8278
  }
7219
- /** Accumulate a single parsed OpenAI chunk into the accumulator */
7220
- function accumulateOpenAIStreamEvent(parsed, acc) {
7221
- if (parsed.model && !acc.model) acc.model = parsed.model;
7222
- if (parsed.usage) {
7223
- acc.inputTokens = parsed.usage.prompt_tokens;
7224
- acc.outputTokens = parsed.usage.completion_tokens;
7225
- if (parsed.usage.prompt_tokens_details?.cached_tokens !== void 0) acc.cachedTokens = parsed.usage.prompt_tokens_details.cached_tokens;
7226
- if (parsed.usage.completion_tokens_details?.reasoning_tokens !== void 0) acc.reasoningTokens = parsed.usage.completion_tokens_details.reasoning_tokens;
8279
+ /** Accumulate a single parsed OpenAI chunk into the accumulator */
8280
+ function accumulateOpenAIStreamEvent(parsed, acc) {
8281
+ if (parsed.model && !acc.model) acc.model = parsed.model;
8282
+ if (parsed.usage) {
8283
+ acc.inputTokens = parsed.usage.prompt_tokens;
8284
+ acc.outputTokens = parsed.usage.completion_tokens;
8285
+ if (parsed.usage.prompt_tokens_details?.cached_tokens !== void 0) acc.cachedTokens = parsed.usage.prompt_tokens_details.cached_tokens;
8286
+ if (parsed.usage.completion_tokens_details?.reasoning_tokens !== void 0) acc.reasoningTokens = parsed.usage.completion_tokens_details.reasoning_tokens;
8287
+ }
8288
+ const choice = parsed.choices[0];
8289
+ if (choice) {
8290
+ if (choice.delta.content) acc.rawContent += choice.delta.content;
8291
+ if (choice.delta.tool_calls) for (const tc of choice.delta.tool_calls) {
8292
+ const idx = tc.index;
8293
+ if (!acc.toolCallMap.has(idx)) acc.toolCallMap.set(idx, {
8294
+ id: tc.id ?? "",
8295
+ name: tc.function?.name ?? "",
8296
+ argumentParts: []
8297
+ });
8298
+ const item = acc.toolCallMap.get(idx);
8299
+ if (item) {
8300
+ if (tc.id) item.id = tc.id;
8301
+ if (tc.function?.name) item.name = tc.function.name;
8302
+ if (tc.function?.arguments) item.argumentParts.push(tc.function.arguments);
8303
+ }
8304
+ }
8305
+ if (choice.finish_reason) acc.finishReason = choice.finish_reason;
8306
+ }
8307
+ }
8308
+ //#endregion
8309
+ //#region src/lib/openai/translate/cc-to-responses.ts
8310
+ const DROPPED_PARAMS = [
8311
+ "stop",
8312
+ "n",
8313
+ "frequency_penalty",
8314
+ "presence_penalty",
8315
+ "logit_bias",
8316
+ "logprobs",
8317
+ "seed"
8318
+ ];
8319
+ function splitInstructionsAndConversation(messages) {
8320
+ const systemTexts = [];
8321
+ const conversationMessages = [];
8322
+ for (const message of messages) {
8323
+ if (message.role === "system" || message.role === "developer") {
8324
+ const text = extractTextContent(message.content);
8325
+ if (text) systemTexts.push(text);
8326
+ continue;
8327
+ }
8328
+ conversationMessages.push(message);
8329
+ }
8330
+ return {
8331
+ instructions: systemTexts.length > 0 ? systemTexts.join("\n\n") : void 0,
8332
+ conversationMessages
8333
+ };
8334
+ }
8335
+ function translateChatCompletionsToResponses(payload) {
8336
+ const droppedParams = DROPPED_PARAMS.filter((key) => payload[key] !== void 0 && payload[key] !== null);
8337
+ const { instructions, conversationMessages } = splitInstructionsAndConversation(payload.messages);
8338
+ return {
8339
+ payload: {
8340
+ model: payload.model,
8341
+ input: translateMessages(conversationMessages),
8342
+ ...instructions !== void 0 && { instructions },
8343
+ ...payload.temperature !== void 0 && payload.temperature !== null && { temperature: payload.temperature },
8344
+ ...payload.top_p !== void 0 && payload.top_p !== null && { top_p: payload.top_p },
8345
+ ...payload.max_tokens !== void 0 && payload.max_tokens !== null && { max_output_tokens: payload.max_tokens },
8346
+ ...payload.stream !== void 0 && payload.stream !== null && { stream: payload.stream },
8347
+ ...payload.parallel_tool_calls !== void 0 && payload.parallel_tool_calls !== null && { parallel_tool_calls: payload.parallel_tool_calls },
8348
+ ...payload.user !== void 0 && { user: payload.user },
8349
+ ...payload.service_tier !== void 0 && { service_tier: payload.service_tier },
8350
+ ...payload.top_logprobs !== void 0 && payload.top_logprobs !== null && { top_logprobs: payload.top_logprobs },
8351
+ ...payload.tools && { tools: translateTools(payload.tools) },
8352
+ ...payload.tool_choice && { tool_choice: translateToolChoice(payload.tool_choice) },
8353
+ ...payload.response_format && { text: { format: translateResponseFormat(payload.response_format) } },
8354
+ ...payload.stream_options?.include_usage && { include: ["usage"] }
8355
+ },
8356
+ droppedParams
8357
+ };
8358
+ }
8359
+ function translateMessages(messages) {
8360
+ const items = [];
8361
+ for (const message of messages) switch (message.role) {
8362
+ case "user":
8363
+ items.push(convertUserMessage(message));
8364
+ break;
8365
+ case "assistant":
8366
+ items.push(...convertAssistantMessage(message));
8367
+ break;
8368
+ case "tool":
8369
+ items.push(convertToolMessage(message));
8370
+ break;
8371
+ default: break;
8372
+ }
8373
+ return items;
8374
+ }
8375
+ function convertUserMessage(message) {
8376
+ if (typeof message.content === "string") return {
8377
+ type: "message",
8378
+ role: "user",
8379
+ content: [{
8380
+ type: "input_text",
8381
+ text: message.content
8382
+ }]
8383
+ };
8384
+ if (!Array.isArray(message.content)) return {
8385
+ type: "message",
8386
+ role: "user",
8387
+ content: [{
8388
+ type: "input_text",
8389
+ text: ""
8390
+ }]
8391
+ };
8392
+ return {
8393
+ type: "message",
8394
+ role: "user",
8395
+ content: message.content.map((part) => convertUserContentPart(part))
8396
+ };
8397
+ }
8398
+ function convertUserContentPart(part) {
8399
+ if (part.type === "text") return {
8400
+ type: "input_text",
8401
+ text: part.text
8402
+ };
8403
+ return {
8404
+ type: "input_image",
8405
+ image_url: part.image_url.url,
8406
+ detail: part.image_url.detail
8407
+ };
8408
+ }
8409
+ function convertAssistantMessage(message) {
8410
+ const items = [];
8411
+ const text = extractTextContent(message.content);
8412
+ if (text) items.push({
8413
+ type: "message",
8414
+ role: "assistant",
8415
+ content: [{
8416
+ type: "output_text",
8417
+ text
8418
+ }]
8419
+ });
8420
+ for (const toolCall of message.tool_calls ?? []) items.push({
8421
+ type: "function_call",
8422
+ id: toolCall.id,
8423
+ call_id: toolCall.id,
8424
+ name: toolCall.function.name,
8425
+ arguments: toolCall.function.arguments
8426
+ });
8427
+ return items;
8428
+ }
8429
+ function convertToolMessage(message) {
8430
+ let output = "";
8431
+ if (typeof message.content === "string") output = message.content;
8432
+ else if (Array.isArray(message.content)) {
8433
+ const textParts = message.content.filter((part) => part.type === "text").map((part) => part.text);
8434
+ output = textParts.length > 0 ? textParts.join("") : JSON.stringify(message.content);
8435
+ }
8436
+ return {
8437
+ type: "function_call_output",
8438
+ call_id: message.tool_call_id ?? "",
8439
+ output
8440
+ };
8441
+ }
8442
+ function extractTextContent(content) {
8443
+ if (typeof content === "string") return content;
8444
+ if (!Array.isArray(content)) return "";
8445
+ return content.filter((part) => part.type === "text").map((part) => part.text).join("");
8446
+ }
8447
+ function translateTools(tools) {
8448
+ return tools.map((tool) => ({
8449
+ type: "function",
8450
+ name: tool.function.name,
8451
+ description: tool.function.description,
8452
+ parameters: tool.function.parameters,
8453
+ strict: tool.function.strict
8454
+ }));
8455
+ }
8456
+ function translateToolChoice(choice) {
8457
+ if (typeof choice === "string") return choice;
8458
+ return {
8459
+ type: "function",
8460
+ name: choice.function.name
8461
+ };
8462
+ }
8463
+ function translateResponseFormat(format) {
8464
+ if (format.type === "json_schema") return {
8465
+ type: "json_schema",
8466
+ name: format.json_schema.name,
8467
+ description: format.json_schema.description,
8468
+ schema: format.json_schema.schema,
8469
+ strict: format.json_schema.strict
8470
+ };
8471
+ return { type: format.type };
8472
+ }
8473
+ //#endregion
8474
+ //#region src/lib/openai/translate/responses-to-cc.ts
8475
+ function translateResponsesResponseToCC(response) {
8476
+ if (response.status === "failed") throw new HTTPError(response.error?.message ?? "Upstream response failed", 500, JSON.stringify(response.error ?? { status: response.status }), response.model);
8477
+ return {
8478
+ id: response.id,
8479
+ object: "chat.completion",
8480
+ created: response.created_at,
8481
+ model: response.model,
8482
+ choices: [{
8483
+ index: 0,
8484
+ message: extractMessageFromOutput(response.output),
8485
+ finish_reason: mapFinishReason(response.status, response.output, response.incomplete_details),
8486
+ logprobs: null
8487
+ }],
8488
+ ...response.usage && { usage: mapUsage(response.usage) },
8489
+ ...response.service_tier !== void 0 && { service_tier: response.service_tier }
8490
+ };
8491
+ }
8492
+ function extractMessageFromOutput(output) {
8493
+ const textParts = [];
8494
+ const toolCalls = [];
8495
+ for (const item of output) {
8496
+ if (item.type === "message") for (const part of item.content) {
8497
+ if (part.type === "output_text") textParts.push(part.text);
8498
+ if (part.type === "refusal") textParts.push(part.refusal);
8499
+ }
8500
+ if (item.type === "function_call") toolCalls.push({
8501
+ id: item.call_id,
8502
+ type: "function",
8503
+ function: {
8504
+ name: item.name,
8505
+ arguments: item.arguments
8506
+ }
8507
+ });
8508
+ }
8509
+ return {
8510
+ role: "assistant",
8511
+ content: textParts.join("") || null,
8512
+ ...toolCalls.length > 0 && { tool_calls: toolCalls }
8513
+ };
8514
+ }
8515
+ function mapFinishReason(status, output, incompleteDetails) {
8516
+ if (output.some((item) => item.type === "function_call")) return "tool_calls";
8517
+ switch (status) {
8518
+ case "completed": return "stop";
8519
+ case "incomplete": return mapIncompleteFinishReason(incompleteDetails);
8520
+ default: return "stop";
7227
8521
  }
7228
- const choice = parsed.choices[0];
7229
- if (choice) {
7230
- if (choice.delta.content) acc.rawContent += choice.delta.content;
7231
- if (choice.delta.tool_calls) for (const tc of choice.delta.tool_calls) {
7232
- const idx = tc.index;
7233
- if (!acc.toolCallMap.has(idx)) acc.toolCallMap.set(idx, {
7234
- id: tc.id ?? "",
7235
- name: tc.function?.name ?? "",
7236
- argumentParts: []
7237
- });
7238
- const item = acc.toolCallMap.get(idx);
7239
- if (item) {
7240
- if (tc.id) item.id = tc.id;
7241
- if (tc.function?.name) item.name = tc.function.name;
7242
- if (tc.function?.arguments) item.argumentParts.push(tc.function.arguments);
8522
+ }
8523
+ function mapIncompleteFinishReason(incompleteDetails) {
8524
+ if (incompleteDetails?.reason === "content_filter") return "content_filter";
8525
+ return "length";
8526
+ }
8527
+ function mapUsage(usage) {
8528
+ return {
8529
+ prompt_tokens: usage.input_tokens,
8530
+ completion_tokens: usage.output_tokens,
8531
+ total_tokens: usage.total_tokens,
8532
+ ...usage.input_tokens_details?.cached_tokens !== void 0 && { prompt_tokens_details: { cached_tokens: usage.input_tokens_details.cached_tokens } },
8533
+ ...usage.output_tokens_details?.reasoning_tokens !== void 0 && { completion_tokens_details: { reasoning_tokens: usage.output_tokens_details.reasoning_tokens } }
8534
+ };
8535
+ }
8536
+ //#endregion
8537
+ //#region src/lib/openai/translate/responses-to-cc-stream.ts
8538
+ function createStreamTranslator(opts) {
8539
+ const state = {
8540
+ sentFirstChunk: false,
8541
+ responseId: "",
8542
+ model: "",
8543
+ toolCallIndexMap: /* @__PURE__ */ new Map(),
8544
+ nextToolCallIndex: 0,
8545
+ toolCallIds: /* @__PURE__ */ new Map(),
8546
+ includeUsage: opts.includeUsage
8547
+ };
8548
+ function translate(event) {
8549
+ switch (event.type) {
8550
+ case "response.created":
8551
+ state.responseId = event.response.id;
8552
+ state.model = event.response.model;
8553
+ state.sentFirstChunk = true;
8554
+ return [buildChunk(state, { role: "assistant" })];
8555
+ case "response.output_text.delta": return [buildChunk(state, { content: event.delta })];
8556
+ case "response.refusal.delta": return [buildChunk(state, { content: event.delta })];
8557
+ case "response.output_item.added": {
8558
+ if (event.item.type !== "function_call") return [];
8559
+ const toolCallIndex = state.nextToolCallIndex++;
8560
+ const callId = event.item.call_id || event.item.id;
8561
+ state.toolCallIndexMap.set(event.output_index, toolCallIndex);
8562
+ state.toolCallIds.set(event.output_index, callId);
8563
+ return [buildChunk(state, { tool_calls: [{
8564
+ index: toolCallIndex,
8565
+ id: callId,
8566
+ type: "function",
8567
+ function: { name: event.item.name }
8568
+ }] })];
8569
+ }
8570
+ case "response.function_call_arguments.delta": {
8571
+ const toolCallIndex = state.toolCallIndexMap.get(event.output_index);
8572
+ if (toolCallIndex === void 0) return [];
8573
+ return [buildChunk(state, { tool_calls: [{
8574
+ index: toolCallIndex,
8575
+ function: { arguments: event.delta }
8576
+ }] })];
7243
8577
  }
8578
+ case "response.completed": {
8579
+ syncStateFromResponse(state, event.response);
8580
+ const chunks = [buildChunk(state, {}, state.nextToolCallIndex > 0 ? "tool_calls" : "stop")];
8581
+ if (state.includeUsage && event.response.usage) chunks.push(buildUsageChunk(state, event.response));
8582
+ return chunks;
8583
+ }
8584
+ case "response.incomplete":
8585
+ syncStateFromResponse(state, event.response);
8586
+ return [buildChunk(state, {}, mapIncompleteFinishReason(event.response.incomplete_details))];
8587
+ case "response.failed": throw new Error(event.response.error?.message ?? "Upstream response failed");
8588
+ case "error": throw new Error(event.message ?? "Upstream error");
8589
+ default: return [];
7244
8590
  }
7245
- if (choice.finish_reason) acc.finishReason = choice.finish_reason;
7246
8591
  }
8592
+ return {
8593
+ translate,
8594
+ getState: () => state
8595
+ };
8596
+ }
8597
+ async function* translateResponsesStream(upstream, translator) {
8598
+ for await (const rawEvent of upstream) {
8599
+ if (!rawEvent.data || rawEvent.data === "[DONE]") continue;
8600
+ const event = JSON.parse(rawEvent.data);
8601
+ const chunks = translator.translate(event);
8602
+ for (const chunk of chunks) yield {
8603
+ data: JSON.stringify(chunk),
8604
+ event: "message"
8605
+ };
8606
+ }
8607
+ yield { data: "[DONE]" };
8608
+ }
8609
+ function syncStateFromResponse(state, response) {
8610
+ if (!state.responseId) state.responseId = response.id;
8611
+ if (!state.model) state.model = response.model;
8612
+ }
8613
+ function buildChunk(state, delta, finishReason = null) {
8614
+ return {
8615
+ id: state.responseId,
8616
+ object: "chat.completion.chunk",
8617
+ created: Math.floor(Date.now() / 1e3),
8618
+ model: state.model,
8619
+ choices: [{
8620
+ index: 0,
8621
+ delta,
8622
+ finish_reason: finishReason,
8623
+ logprobs: null
8624
+ }]
8625
+ };
8626
+ }
8627
+ function buildUsageChunk(state, response) {
8628
+ const usage = response.usage;
8629
+ return {
8630
+ id: state.responseId,
8631
+ object: "chat.completion.chunk",
8632
+ created: Math.floor(Date.now() / 1e3),
8633
+ model: state.model,
8634
+ choices: [],
8635
+ ...usage && { usage: {
8636
+ prompt_tokens: usage.input_tokens,
8637
+ completion_tokens: usage.output_tokens,
8638
+ total_tokens: usage.total_tokens,
8639
+ ...usage.input_tokens_details?.cached_tokens !== void 0 && { prompt_tokens_details: { cached_tokens: usage.input_tokens_details.cached_tokens } },
8640
+ ...usage.output_tokens_details?.reasoning_tokens !== void 0 && { completion_tokens_details: { reasoning_tokens: usage.output_tokens_details.reasoning_tokens } }
8641
+ } }
8642
+ };
7247
8643
  }
7248
8644
  //#endregion
7249
8645
  //#region src/lib/request/payload.ts
@@ -7416,6 +8812,7 @@ function createTruncationMarker$1(result) {
7416
8812
  }
7417
8813
  //#endregion
7418
8814
  //#region src/routes/chat-completions/handler.ts
8815
+ const DROPPED_CC_PARAMS_WARNING_CODE = "cc_to_responses_dropped_params";
7419
8816
  async function handleChatCompletion(c) {
7420
8817
  const originalPayload = await c.req.json();
7421
8818
  const clientModel = originalPayload.model;
@@ -7425,15 +8822,13 @@ async function handleChatCompletion(c) {
7425
8822
  originalPayload.model = resolvedModel;
7426
8823
  }
7427
8824
  const selectedModel = state.modelIndex.get(originalPayload.model);
7428
- if (!isEndpointSupported(selectedModel, ENDPOINT.CHAT_COMPLETIONS)) {
7429
- const msg = `Model "${originalPayload.model}" does not support the ${ENDPOINT.CHAT_COMPLETIONS} endpoint`;
7430
- throw new HTTPError(msg, 400, msg);
7431
- }
7432
8825
  originalPayload.messages = await processOpenAIMessages(originalPayload.messages, originalPayload.model);
7433
8826
  const tuiLogId = c.get("tuiLogId");
7434
8827
  const reqCtx = getRequestContextManager().create({
7435
8828
  endpoint: "openai-chat-completions",
7436
- tuiLogId
8829
+ sessionId: getSessionIdFromHeaders(c.req.raw.headers),
8830
+ tuiLogId,
8831
+ rawPath: c.req.path
7437
8832
  });
7438
8833
  reqCtx.setOriginalRequest({
7439
8834
  model: clientModel,
@@ -7455,13 +8850,25 @@ async function handleChatCompletion(c) {
7455
8850
  max_tokens: selectedModel?.capabilities?.limits?.max_output_tokens
7456
8851
  } : sanitizedPayload;
7457
8852
  if (isNullish(originalPayload.max_tokens)) consola.debug("Set max_tokens to:", JSON.stringify(finalPayload.max_tokens));
7458
- return executeRequest({
8853
+ if (isEndpointSupported(selectedModel, ENDPOINT.CHAT_COMPLETIONS)) return executeRequest({
7459
8854
  c,
7460
8855
  payload: finalPayload,
7461
8856
  originalPayload,
7462
8857
  selectedModel,
7463
8858
  reqCtx
7464
8859
  });
8860
+ if (isResponsesSupported(selectedModel)) {
8861
+ if (tuiLogId) tuiLogger.updateRequest(tuiLogId, { tags: ["via-responses"] });
8862
+ return executeRequestViaResponses({
8863
+ c,
8864
+ payload: finalPayload,
8865
+ originalPayload,
8866
+ selectedModel,
8867
+ reqCtx
8868
+ });
8869
+ }
8870
+ const msg = `Model "${originalPayload.model}" does not support the ${ENDPOINT.CHAT_COMPLETIONS} endpoint`;
8871
+ throw new HTTPError(msg, 400, msg);
7465
8872
  }
7466
8873
  /**
7467
8874
  * Execute the API call with reactive retry pipeline.
@@ -7470,34 +8877,102 @@ async function handleChatCompletion(c) {
7470
8877
  async function executeRequest(opts) {
7471
8878
  const { c, payload, originalPayload, selectedModel, reqCtx } = opts;
7472
8879
  const headersCapture = {};
7473
- const adapter = {
7474
- format: "openai-chat-completions",
7475
- sanitize: (p) => sanitizeOpenAIMessages(p),
7476
- execute: (p) => executeWithAdaptiveRateLimit(() => createChatCompletions(p, {
7477
- resolvedModel: selectedModel,
7478
- headersCapture,
7479
- onPrepared: ({ wire, headers }) => {
7480
- reqCtx.setAttemptWireRequest({
7481
- model: typeof wire.model === "string" ? wire.model : payload.model,
7482
- messages: Array.isArray(wire.messages) ? wire.messages : [],
7483
- payload: wire,
7484
- headers,
7485
- format: "openai-chat-completions"
7486
- });
7487
- }
7488
- })),
7489
- logPayloadSize: (p) => logPayloadSizeInfo(p, selectedModel)
7490
- };
7491
- const strategies = [
8880
+ return executeRequestWithAdapter({
8881
+ c,
8882
+ payload,
8883
+ originalPayload,
8884
+ selectedModel,
8885
+ reqCtx,
8886
+ adapter: {
8887
+ format: "openai-chat-completions",
8888
+ sanitize: (p) => sanitizeOpenAIMessages(p),
8889
+ execute: (p) => executeWithAdaptiveRateLimit(() => createChatCompletions(p, {
8890
+ resolvedModel: selectedModel,
8891
+ headersCapture,
8892
+ onPrepared: ({ wire, headers }) => {
8893
+ reqCtx.setAttemptWireRequest({
8894
+ model: typeof wire.model === "string" ? wire.model : payload.model,
8895
+ messages: Array.isArray(wire.messages) ? wire.messages : [],
8896
+ payload: wire,
8897
+ headers,
8898
+ format: "openai-chat-completions"
8899
+ });
8900
+ }
8901
+ })),
8902
+ logPayloadSize: (p) => logPayloadSizeInfo(p, selectedModel)
8903
+ },
8904
+ strategies: createChatCompletionsStrategies("Completions"),
8905
+ headersCapture
8906
+ });
8907
+ }
8908
+ async function executeRequestViaResponses(opts) {
8909
+ const { c, payload, originalPayload, selectedModel, reqCtx } = opts;
8910
+ const headersCapture = {};
8911
+ return executeRequestWithAdapter({
8912
+ c,
8913
+ payload,
8914
+ originalPayload,
8915
+ selectedModel,
8916
+ reqCtx,
8917
+ adapter: {
8918
+ format: "openai-chat-completions",
8919
+ sanitize: (p) => sanitizeOpenAIMessages(p),
8920
+ execute: async (ccPayload) => {
8921
+ const { payload: responsesPayload, droppedParams } = translateChatCompletionsToResponses(ccPayload);
8922
+ if (droppedParams.length > 0) recordDroppedCcParamsWarning(reqCtx, ccPayload.model, droppedParams);
8923
+ const finalPayload = state.normalizeResponsesCallIds ? normalizeCallIds(responsesPayload) : responsesPayload;
8924
+ const result = await executeWithAdaptiveRateLimit(() => createResponses(finalPayload, {
8925
+ resolvedModel: selectedModel,
8926
+ headersCapture,
8927
+ onPrepared: ({ wire, headers }) => {
8928
+ reqCtx.setAttemptWireRequest({
8929
+ model: typeof wire.model === "string" ? wire.model : ccPayload.model,
8930
+ messages: extractInputItems(wire.input),
8931
+ payload: wire,
8932
+ headers,
8933
+ format: "openai-responses"
8934
+ });
8935
+ }
8936
+ }));
8937
+ if (!ccPayload.stream) return {
8938
+ result: translateResponsesResponseToCC(result.result),
8939
+ queueWaitMs: result.queueWaitMs
8940
+ };
8941
+ return {
8942
+ result: translateResponsesStream(result.result, createStreamTranslator({ includeUsage: ccPayload.stream_options?.include_usage ?? false })),
8943
+ queueWaitMs: result.queueWaitMs
8944
+ };
8945
+ },
8946
+ logPayloadSize: (p) => logPayloadSizeInfo(p, selectedModel)
8947
+ },
8948
+ strategies: createChatCompletionsStrategies("Completions(→Responses)"),
8949
+ headersCapture
8950
+ });
8951
+ }
8952
+ function recordDroppedCcParamsWarning(reqCtx, model, droppedParams) {
8953
+ const message = `Chat Completions -> Responses translation dropped unsupported params: ${droppedParams.join(", ")}`;
8954
+ if (reqCtx.warningMessages.some((warning) => warning.code === DROPPED_CC_PARAMS_WARNING_CODE && warning.message === message)) return;
8955
+ consola.warn(`[CC→Responses] model=${model} ${message}`);
8956
+ reqCtx.addWarningMessage({
8957
+ code: DROPPED_CC_PARAMS_WARNING_CODE,
8958
+ message
8959
+ });
8960
+ if (reqCtx.tuiLogId) tuiLogger.updateRequest(reqCtx.tuiLogId, { tags: ["dropped-params"] });
8961
+ }
8962
+ function createChatCompletionsStrategies(label) {
8963
+ return [
7492
8964
  createNetworkRetryStrategy(),
7493
8965
  createTokenRefreshStrategy(),
7494
8966
  createAutoTruncateStrategy({
7495
8967
  truncate: (p, model, truncOpts) => autoTruncateOpenAI(p, model, truncOpts),
7496
8968
  resanitize: (p) => sanitizeOpenAIMessages(p),
7497
8969
  isEnabled: () => state.autoTruncate,
7498
- label: "Completions"
8970
+ label
7499
8971
  })
7500
8972
  ];
8973
+ }
8974
+ async function executeRequestWithAdapter(opts) {
8975
+ const { c, payload, originalPayload, selectedModel, reqCtx, adapter, strategies, headersCapture } = opts;
7501
8976
  let truncateResult;
7502
8977
  try {
7503
8978
  const result = await executeRequestPipeline({
@@ -7648,7 +9123,7 @@ chatCompletionRoutes.post("/", async (c) => {
7648
9123
  });
7649
9124
  //#endregion
7650
9125
  //#region src/routes/config/route.ts
7651
- /** Current effective runtime configuration (read-only, sanitized) */
9126
+ /** Current effective runtime configuration and editable config.yaml routes */
7652
9127
  const configRoutes = new Hono();
7653
9128
  configRoutes.get("/", (c) => {
7654
9129
  return c.json({
@@ -7659,13 +9134,21 @@ configRoutes.get("/", (c) => {
7659
9134
  immutableThinkingMessages: state.immutableThinkingMessages,
7660
9135
  dedupToolCalls: state.dedupToolCalls,
7661
9136
  contextEditingMode: state.contextEditingMode,
9137
+ contextEditingTrigger: state.contextEditingTrigger,
9138
+ contextEditingKeepTools: state.contextEditingKeepTools,
9139
+ contextEditingKeepThinking: state.contextEditingKeepThinking,
9140
+ toolSearchEnabled: state.toolSearchEnabled,
9141
+ cacheControlMode: state.cacheControlMode,
9142
+ nonDeferredTools: state.nonDeferredTools,
7662
9143
  rewriteSystemReminders: serializeRewriteSystemReminders(state.rewriteSystemReminders),
7663
9144
  stripReadToolResultTags: state.stripReadToolResultTags,
7664
9145
  systemPromptOverridesCount: state.systemPromptOverrides.length,
7665
9146
  normalizeResponsesCallIds: state.normalizeResponsesCallIds,
9147
+ upstreamWebSocket: state.upstreamWebSocket,
7666
9148
  fetchTimeout: state.fetchTimeout,
7667
9149
  streamIdleTimeout: state.streamIdleTimeout,
7668
9150
  staleRequestMaxAge: state.staleRequestMaxAge,
9151
+ modelRefreshInterval: state.modelRefreshInterval,
7669
9152
  shutdownGracefulWait: state.shutdownGracefulWait,
7670
9153
  shutdownAbortWait: state.shutdownAbortWait,
7671
9154
  historyLimit: state.historyLimit,
@@ -7674,6 +9157,49 @@ configRoutes.get("/", (c) => {
7674
9157
  rateLimiter: state.adaptiveRateLimitConfig ?? null
7675
9158
  });
7676
9159
  });
9160
+ configRoutes.get("/yaml", async (c) => {
9161
+ try {
9162
+ const config = await loadRawConfigFile();
9163
+ return c.json(config);
9164
+ } catch (error) {
9165
+ const message = error instanceof Error ? error.message : "Failed to read config.yaml";
9166
+ return c.json({
9167
+ error: "Failed to read config.yaml",
9168
+ details: [{
9169
+ field: "$",
9170
+ message
9171
+ }]
9172
+ }, 500);
9173
+ }
9174
+ });
9175
+ configRoutes.put("/yaml", async (c) => {
9176
+ let body;
9177
+ try {
9178
+ body = await c.req.json();
9179
+ } catch {
9180
+ return c.json({
9181
+ error: "Invalid JSON body",
9182
+ details: [{
9183
+ field: "$",
9184
+ message: "Request body must be valid JSON"
9185
+ }]
9186
+ }, 400);
9187
+ }
9188
+ const validation = validateConfigBody(body);
9189
+ if (!validation.valid) return c.json({
9190
+ error: "Config validation failed",
9191
+ details: validation.details
9192
+ }, 400);
9193
+ const doc = await loadEditableConfigDocument();
9194
+ mergeConfigIntoDocument(doc, validation.value);
9195
+ await fs.mkdir(PATHS.APP_DIR, { recursive: true });
9196
+ await fs.writeFile(PATHS.CONFIG_YAML, doc.toString(), "utf8");
9197
+ resetConfigCache();
9198
+ resetConfigManagedState();
9199
+ await applyConfigToState();
9200
+ const saved = await loadRawConfigFile();
9201
+ return c.json(saved);
9202
+ });
7677
9203
  /**
7678
9204
  * Serialize rewriteSystemReminders for API output.
7679
9205
  * CompiledRewriteRule contains RegExp objects which don't serialize well —
@@ -7688,6 +9214,322 @@ function serializeRewriteSystemReminders(value) {
7688
9214
  ...rule.modelPattern ? { model: rule.modelPattern.source } : {}
7689
9215
  }));
7690
9216
  }
9217
+ const TOP_LEVEL_KEYS = new Set([
9218
+ "proxy",
9219
+ "model_overrides",
9220
+ "stream_idle_timeout",
9221
+ "fetch_timeout",
9222
+ "stale_request_max_age",
9223
+ "model_refresh_interval",
9224
+ "shutdown",
9225
+ "history",
9226
+ "anthropic",
9227
+ "openai-responses",
9228
+ "rate_limiter",
9229
+ "compress_tool_results_before_truncate",
9230
+ "system_prompt_overrides",
9231
+ "system_prompt_prepend",
9232
+ "system_prompt_append"
9233
+ ]);
9234
+ const ANTHROPIC_KEYS = new Set([
9235
+ "strip_server_tools",
9236
+ "dedup_tool_calls",
9237
+ "immutable_thinking_messages",
9238
+ "strip_read_tool_result_tags",
9239
+ "context_editing",
9240
+ "context_editing_trigger",
9241
+ "context_editing_keep_tools",
9242
+ "context_editing_keep_thinking",
9243
+ "tool_search",
9244
+ "cache_control",
9245
+ "auto_cache_control",
9246
+ "non_deferred_tools",
9247
+ "rewrite_system_reminders"
9248
+ ]);
9249
+ const SHUTDOWN_KEYS = new Set(["graceful_wait", "abort_wait"]);
9250
+ const HISTORY_KEYS = new Set(["limit", "min_entries"]);
9251
+ const RESPONSES_KEYS = new Set(["normalize_call_ids", "upstream_websocket"]);
9252
+ const RATE_LIMITER_KEYS = new Set([
9253
+ "retry_interval",
9254
+ "request_interval",
9255
+ "recovery_timeout",
9256
+ "consecutive_successes"
9257
+ ]);
9258
+ const ANTHROPIC_COLLECTION_KEYS = new Set(["rewrite_system_reminders", "non_deferred_tools"]);
9259
+ function validateConfigBody(input) {
9260
+ if (!isPlainObject(input)) return {
9261
+ valid: false,
9262
+ details: [{
9263
+ field: "$",
9264
+ message: "Config body must be a JSON object",
9265
+ value: input
9266
+ }]
9267
+ };
9268
+ const body = input;
9269
+ const details = [];
9270
+ validateUnknownKeys(body, TOP_LEVEL_KEYS, "", details);
9271
+ if (hasOwn(body, "proxy")) validateOptionalString(body.proxy, "proxy", details, { validateUrlScheme: true });
9272
+ if (hasOwn(body, "model_overrides")) validateStringMap(body.model_overrides, "model_overrides", details);
9273
+ if (hasOwn(body, "stream_idle_timeout")) validateNonNegativeInteger(body.stream_idle_timeout, "stream_idle_timeout", details);
9274
+ if (hasOwn(body, "fetch_timeout")) validateNonNegativeInteger(body.fetch_timeout, "fetch_timeout", details);
9275
+ if (hasOwn(body, "stale_request_max_age")) validateNonNegativeInteger(body.stale_request_max_age, "stale_request_max_age", details);
9276
+ if (hasOwn(body, "model_refresh_interval")) validateNonNegativeInteger(body.model_refresh_interval, "model_refresh_interval", details);
9277
+ if (hasOwn(body, "compress_tool_results_before_truncate")) validateBoolean(body.compress_tool_results_before_truncate, "compress_tool_results_before_truncate", details);
9278
+ if (hasOwn(body, "system_prompt_prepend")) validateOptionalString(body.system_prompt_prepend, "system_prompt_prepend", details);
9279
+ if (hasOwn(body, "system_prompt_append")) validateOptionalString(body.system_prompt_append, "system_prompt_append", details);
9280
+ if (hasOwn(body, "system_prompt_overrides")) validateRewriteRules(body.system_prompt_overrides, "system_prompt_overrides", details, { allowModel: true });
9281
+ if (hasOwn(body, "shutdown")) validateNestedObject(body.shutdown, "shutdown", SHUTDOWN_KEYS, details, (value, path) => validateNonNegativeInteger(value, path, details));
9282
+ if (hasOwn(body, "history")) validateNestedObject(body.history, "history", HISTORY_KEYS, details, (value, path) => validateNonNegativeInteger(value, path, details));
9283
+ if (hasOwn(body, "openai-responses")) validateNestedObject(body["openai-responses"], "openai-responses", RESPONSES_KEYS, details, (value, path) => validateBoolean(value, path, details));
9284
+ if (hasOwn(body, "rate_limiter")) validateNestedObject(body.rate_limiter, "rate_limiter", RATE_LIMITER_KEYS, details, (value, path) => validateNonNegativeInteger(value, path, details));
9285
+ if (hasOwn(body, "anthropic")) validateAnthropic(body.anthropic, details);
9286
+ if (details.length > 0) return {
9287
+ valid: false,
9288
+ details
9289
+ };
9290
+ return {
9291
+ valid: true,
9292
+ value: input
9293
+ };
9294
+ }
9295
+ function validateAnthropic(value, details) {
9296
+ if (value === null) return;
9297
+ if (!isPlainObject(value)) {
9298
+ pushDetail(details, "anthropic", "Must be an object or null", value);
9299
+ return;
9300
+ }
9301
+ validateUnknownKeys(value, ANTHROPIC_KEYS, "anthropic", details);
9302
+ if (hasOwn(value, "strip_server_tools")) validateBoolean(value.strip_server_tools, "anthropic.strip_server_tools", details);
9303
+ if (hasOwn(value, "immutable_thinking_messages")) validateBoolean(value.immutable_thinking_messages, "anthropic.immutable_thinking_messages", details);
9304
+ if (hasOwn(value, "strip_read_tool_result_tags")) validateBoolean(value.strip_read_tool_result_tags, "anthropic.strip_read_tool_result_tags", details);
9305
+ if (hasOwn(value, "dedup_tool_calls")) {
9306
+ const allowed = new Set([
9307
+ false,
9308
+ true,
9309
+ "input",
9310
+ "result"
9311
+ ]);
9312
+ validateEnum(value.dedup_tool_calls, "anthropic.dedup_tool_calls", allowed, details);
9313
+ }
9314
+ if (hasOwn(value, "context_editing")) validateEnum(value.context_editing, "anthropic.context_editing", new Set([
9315
+ "off",
9316
+ "clear-thinking",
9317
+ "clear-tooluse",
9318
+ "clear-both"
9319
+ ]), details);
9320
+ if (hasOwn(value, "context_editing_trigger")) validateNonNegativeInteger(value.context_editing_trigger, "anthropic.context_editing_trigger", details);
9321
+ if (hasOwn(value, "context_editing_keep_tools")) validateNonNegativeInteger(value.context_editing_keep_tools, "anthropic.context_editing_keep_tools", details);
9322
+ if (hasOwn(value, "context_editing_keep_thinking")) validateNonNegativeInteger(value.context_editing_keep_thinking, "anthropic.context_editing_keep_thinking", details);
9323
+ if (hasOwn(value, "tool_search")) validateBoolean(value.tool_search, "anthropic.tool_search", details);
9324
+ if (hasOwn(value, "cache_control")) {
9325
+ const valid = [
9326
+ "disabled",
9327
+ "passthrough",
9328
+ "sanitize",
9329
+ "proxied"
9330
+ ];
9331
+ if (!valid.includes(value.cache_control)) details.push({
9332
+ field: "anthropic.cache_control",
9333
+ message: `Must be one of: ${valid.join(", ")}`,
9334
+ value: value.cache_control
9335
+ });
9336
+ }
9337
+ if (hasOwn(value, "auto_cache_control")) validateBoolean(value.auto_cache_control, "anthropic.auto_cache_control (deprecated)", details);
9338
+ if (hasOwn(value, "non_deferred_tools")) validateStringArray(value.non_deferred_tools, "anthropic.non_deferred_tools", details);
9339
+ if (hasOwn(value, "rewrite_system_reminders")) {
9340
+ const rewrite = value.rewrite_system_reminders;
9341
+ if (typeof rewrite === "boolean") return;
9342
+ validateRewriteRules(rewrite, "anthropic.rewrite_system_reminders", details, { allowModel: false });
9343
+ }
9344
+ }
9345
+ function validateUnknownKeys(object, allowedKeys, parentPath, details) {
9346
+ for (const key of Object.keys(object)) {
9347
+ if (allowedKeys.has(key)) continue;
9348
+ pushDetail(details, parentPath ? `${parentPath}.${key}` : key, "Unknown config field", object[key]);
9349
+ }
9350
+ }
9351
+ function validateNestedObject(value, field, allowedKeys, details, validateValue) {
9352
+ if (value === null) return;
9353
+ if (!isPlainObject(value)) {
9354
+ pushDetail(details, field, "Must be an object or null", value);
9355
+ return;
9356
+ }
9357
+ validateUnknownKeys(value, allowedKeys, field, details);
9358
+ for (const [key, child] of Object.entries(value)) validateValue(child, `${field}.${key}`);
9359
+ }
9360
+ function validateStringMap(value, field, details) {
9361
+ if (value === null) return;
9362
+ if (!isPlainObject(value)) {
9363
+ pushDetail(details, field, "Must be an object or null", value);
9364
+ return;
9365
+ }
9366
+ for (const [key, target] of Object.entries(value)) {
9367
+ if (key.trim().length === 0) pushDetail(details, `${field}.${key}`, "Override key must be a non-empty string", key);
9368
+ if (typeof target !== "string" || target.trim().length === 0) pushDetail(details, `${field}.${key}`, "Override target must be a non-empty string", target);
9369
+ }
9370
+ }
9371
+ function validateStringArray(value, field, details) {
9372
+ if (value === null) return;
9373
+ if (!Array.isArray(value)) {
9374
+ pushDetail(details, field, "Must be an array of strings or null", value);
9375
+ return;
9376
+ }
9377
+ for (const [index, item] of value.entries()) if (typeof item !== "string" || item.trim().length === 0) pushDetail(details, `${field}.${index}`, "Must be a non-empty string", item);
9378
+ }
9379
+ function validateRewriteRules(value, field, details, options) {
9380
+ if (value === null) return;
9381
+ if (!Array.isArray(value)) {
9382
+ pushDetail(details, field, "Must be an array, boolean, or null", value);
9383
+ return;
9384
+ }
9385
+ for (const [index, item] of value.entries()) {
9386
+ const itemField = `${field}.${index}`;
9387
+ if (!isPlainObject(item)) {
9388
+ pushDetail(details, itemField, "Rule must be an object", item);
9389
+ continue;
9390
+ }
9391
+ validateUnknownKeys(item, options.allowModel ? new Set([
9392
+ "from",
9393
+ "to",
9394
+ "method",
9395
+ "model"
9396
+ ]) : new Set([
9397
+ "from",
9398
+ "to",
9399
+ "method"
9400
+ ]), itemField, details);
9401
+ if (typeof item.from !== "string" || item.from.length === 0) {
9402
+ pushDetail(details, `${itemField}.from`, "Must be a non-empty string", item.from);
9403
+ continue;
9404
+ }
9405
+ if (typeof item.to !== "string") pushDetail(details, `${itemField}.to`, "Must be a string", item.to);
9406
+ if (item.method !== void 0 && item.method !== "line" && item.method !== "regex") pushDetail(details, `${itemField}.method`, "Must be 'line' or 'regex'", item.method);
9407
+ if (!options.allowModel && hasOwn(item, "model")) pushDetail(details, `${itemField}.model`, "Field is not supported here", item.model);
9408
+ if (options.allowModel && item.model !== void 0 && typeof item.model !== "string") pushDetail(details, `${itemField}.model`, "Must be a string", item.model);
9409
+ if (details.some((detail) => detail.field.startsWith(`${itemField}.`))) continue;
9410
+ if (compileRewriteRule({
9411
+ from: item.from,
9412
+ to: item.to,
9413
+ ...item.method ? { method: item.method } : {},
9414
+ ...options.allowModel && typeof item.model === "string" ? { model: item.model } : {}
9415
+ }) === null) pushDetail(details, `${itemField}.from`, "Invalid rewrite rule regex", item.from);
9416
+ }
9417
+ }
9418
+ function validateOptionalString(value, field, details, options) {
9419
+ if (value === null) return;
9420
+ if (typeof value !== "string") {
9421
+ pushDetail(details, field, "Must be a string or null", value);
9422
+ return;
9423
+ }
9424
+ if (options?.validateUrlScheme) validateProxy(value, field, details);
9425
+ }
9426
+ function validateProxy(value, field, details) {
9427
+ try {
9428
+ const url = new URL(value);
9429
+ if (![
9430
+ "http:",
9431
+ "https:",
9432
+ "socks5:",
9433
+ "socks5h:"
9434
+ ].includes(url.protocol)) pushDetail(details, field, "Proxy must use http, https, socks5, or socks5h scheme", value);
9435
+ } catch {
9436
+ pushDetail(details, field, "Proxy must be a valid URL", value);
9437
+ }
9438
+ }
9439
+ function validateBoolean(value, field, details) {
9440
+ if (value === null) return;
9441
+ if (typeof value !== "boolean") pushDetail(details, field, "Must be a boolean or null", value);
9442
+ }
9443
+ function validateNonNegativeInteger(value, field, details) {
9444
+ if (value === null) return;
9445
+ if (!Number.isInteger(value) || Number(value) < 0) pushDetail(details, field, "Must be a non-negative integer or null", value);
9446
+ }
9447
+ function validateEnum(value, field, allowed, details) {
9448
+ if (value === null) return;
9449
+ if (!allowed.has(value)) pushDetail(details, field, `Must be one of: ${[...allowed].map(String).join(", ")}`, value);
9450
+ }
9451
+ function pushDetail(details, field, message, value) {
9452
+ details.push({
9453
+ field,
9454
+ message,
9455
+ ...value !== void 0 ? { value } : {}
9456
+ });
9457
+ }
9458
+ function hasOwn(object, key) {
9459
+ return Object.prototype.hasOwnProperty.call(object, key);
9460
+ }
9461
+ function isPlainObject(value) {
9462
+ return typeof value === "object" && value !== null && !Array.isArray(value);
9463
+ }
9464
+ async function loadEditableConfigDocument() {
9465
+ try {
9466
+ return parseExistingDocument(await fs.readFile(PATHS.CONFIG_YAML, "utf8"));
9467
+ } catch (err) {
9468
+ if (err.code === "ENOENT") return parseDocument("{}\n");
9469
+ throw err;
9470
+ }
9471
+ }
9472
+ function parseExistingDocument(content) {
9473
+ const doc = parseDocument(content.trim().length > 0 ? content : "{}\n");
9474
+ if (doc.errors.length > 0) throw new Error(doc.errors[0]?.message ?? "Invalid config.yaml");
9475
+ const raw = doc.toJSON();
9476
+ if (raw !== null && raw !== void 0 && (typeof raw !== "object" || Array.isArray(raw))) throw new Error("config.yaml must contain a top-level mapping");
9477
+ return doc;
9478
+ }
9479
+ function mergeConfigIntoDocument(doc, body) {
9480
+ if (hasOwn(body, "proxy")) setScalar(doc, ["proxy"], body.proxy);
9481
+ if (hasOwn(body, "stream_idle_timeout")) setScalar(doc, ["stream_idle_timeout"], body.stream_idle_timeout);
9482
+ if (hasOwn(body, "fetch_timeout")) setScalar(doc, ["fetch_timeout"], body.fetch_timeout);
9483
+ if (hasOwn(body, "stale_request_max_age")) setScalar(doc, ["stale_request_max_age"], body.stale_request_max_age);
9484
+ if (hasOwn(body, "model_refresh_interval")) setScalar(doc, ["model_refresh_interval"], body.model_refresh_interval);
9485
+ if (hasOwn(body, "compress_tool_results_before_truncate")) setScalar(doc, ["compress_tool_results_before_truncate"], body.compress_tool_results_before_truncate);
9486
+ if (hasOwn(body, "system_prompt_prepend")) setScalar(doc, ["system_prompt_prepend"], body.system_prompt_prepend);
9487
+ if (hasOwn(body, "system_prompt_append")) setScalar(doc, ["system_prompt_append"], body.system_prompt_append);
9488
+ if (hasOwn(body, "model_overrides")) replaceCollection(doc, ["model_overrides"], body.model_overrides);
9489
+ if (hasOwn(body, "system_prompt_overrides")) replaceCollection(doc, ["system_prompt_overrides"], body.system_prompt_overrides);
9490
+ if (hasOwn(body, "rate_limiter")) setNestedScalarContainer(doc, ["rate_limiter"], body.rate_limiter);
9491
+ if (hasOwn(body, "shutdown")) setNestedScalarContainer(doc, ["shutdown"], body.shutdown);
9492
+ if (hasOwn(body, "history")) setNestedScalarContainer(doc, ["history"], body.history);
9493
+ if (hasOwn(body, "openai-responses")) setNestedScalarContainer(doc, ["openai-responses"], body["openai-responses"]);
9494
+ if (hasOwn(body, "anthropic")) {
9495
+ const anthropic = body.anthropic;
9496
+ if (anthropic === null) doc.deleteIn(["anthropic"]);
9497
+ else if (anthropic) {
9498
+ setNestedScalarContainer(doc, ["anthropic"], anthropic, { excludeKeys: ANTHROPIC_COLLECTION_KEYS });
9499
+ if (hasOwn(anthropic, "rewrite_system_reminders")) {
9500
+ const rewrite = anthropic.rewrite_system_reminders;
9501
+ replaceCollection(doc, ["anthropic", "rewrite_system_reminders"], Array.isArray(rewrite) && rewrite.length === 0 ? false : rewrite);
9502
+ }
9503
+ if (hasOwn(anthropic, "non_deferred_tools")) replaceCollection(doc, ["anthropic", "non_deferred_tools"], anthropic.non_deferred_tools);
9504
+ }
9505
+ }
9506
+ }
9507
+ function setScalar(doc, path, value) {
9508
+ if (value === null || value === void 0) {
9509
+ doc.deleteIn(path);
9510
+ return;
9511
+ }
9512
+ doc.setIn(path, value);
9513
+ }
9514
+ function setNestedScalarContainer(doc, path, value, options) {
9515
+ if (value === null || value === void 0) {
9516
+ doc.deleteIn(path);
9517
+ return;
9518
+ }
9519
+ if (!isPlainObject(value)) return;
9520
+ for (const [key, child] of Object.entries(value)) {
9521
+ if (options?.excludeKeys?.has(key)) continue;
9522
+ setScalar(doc, [...path, key], child);
9523
+ }
9524
+ }
9525
+ function replaceCollection(doc, path, value) {
9526
+ if (value === null || value === void 0) {
9527
+ doc.deleteIn(path);
9528
+ return;
9529
+ }
9530
+ doc.deleteIn(path);
9531
+ doc.setIn(path, value);
9532
+ }
7691
9533
  //#endregion
7692
9534
  //#region src/lib/openai/embeddings.ts
7693
9535
  const createEmbeddings = async (payload) => {
@@ -7743,7 +9585,9 @@ function handleGetEntries(c) {
7743
9585
  }
7744
9586
  function handleGetEntry(c) {
7745
9587
  if (!isHistoryEnabled()) return c.json({ error: "History recording is not enabled" }, 400);
7746
- const entry = getEntry(c.req.param("id"));
9588
+ const id = c.req.param("id");
9589
+ if (!id) return c.json({ error: "Entry id is required" }, 400);
9590
+ const entry = getEntry(id);
7747
9591
  if (!entry) return c.json({ error: "Entry not found" }, 404);
7748
9592
  return c.json(entry);
7749
9593
  }
@@ -7782,6 +9626,7 @@ function handleGetSessions(c) {
7782
9626
  function handleGetSession(c) {
7783
9627
  if (!isHistoryEnabled()) return c.json({ error: "History recording is not enabled" }, 400);
7784
9628
  const id = c.req.param("id");
9629
+ if (!id) return c.json({ error: "Session id is required" }, 400);
7785
9630
  const session = getSession(id);
7786
9631
  if (!session) return c.json({ error: "Session not found" }, 404);
7787
9632
  const query = c.req.query();
@@ -7796,7 +9641,9 @@ function handleGetSession(c) {
7796
9641
  }
7797
9642
  function handleDeleteSession(c) {
7798
9643
  if (!isHistoryEnabled()) return c.json({ error: "History recording is not enabled" }, 400);
7799
- if (!deleteSession(c.req.param("id"))) return c.json({ error: "Session not found" }, 404);
9644
+ const id = c.req.param("id");
9645
+ if (!id) return c.json({ error: "Session id is required" }, 400);
9646
+ if (!deleteSession(id)) return c.json({ error: "Session not found" }, 404);
7800
9647
  return c.json({
7801
9648
  success: true,
7802
9649
  message: "Session deleted"
@@ -7805,6 +9652,7 @@ function handleDeleteSession(c) {
7805
9652
  //#endregion
7806
9653
  //#region src/routes/history/route.ts
7807
9654
  const historyRoutes = new Hono();
9655
+ historyRoutes.get("/", (c) => c.redirect("/ui#/v/activity", 302));
7808
9656
  historyRoutes.all("/", (c) => c.json({ error: "Not Found" }, 404));
7809
9657
  /** API endpoints */
7810
9658
  historyRoutes.get("/api/entries", handleGetEntries);
@@ -8535,7 +10383,7 @@ function processToolBlocks(messages, tools) {
8535
10383
  orphanedToolResultCount++;
8536
10384
  continue;
8537
10385
  }
8538
- } else if (block.type !== "text" && block.type !== "image") {
10386
+ } else if (block.type !== "text" && block.type !== "image" && block.type !== "document") {
8539
10387
  orphanedToolResultCount++;
8540
10388
  continue;
8541
10389
  }
@@ -9082,12 +10930,12 @@ function isAnthropicFeatureUnsupported(modelId, feature) {
9082
10930
  /**
9083
10931
  * Context editing is supported by a broader set of models:
9084
10932
  * - Claude Haiku 4.5
9085
- * - Claude Sonnet 4/4.5
10933
+ * - Claude Sonnet 4/4.5/4.6
9086
10934
  * - Claude Opus 4/4.1/4.5/4.6
9087
10935
  */
9088
10936
  function modelSupportsContextEditing(modelId) {
9089
10937
  const normalized = normalizeForMatching(modelId);
9090
- return normalized.startsWith("claude-haiku-4-5") || normalized.startsWith("claude-sonnet-4-5") || normalized.startsWith("claude-sonnet-4") || normalized.startsWith("claude-opus-4-5") || normalized.startsWith("claude-opus-4-6") || normalized.startsWith("claude-opus-4-1") || normalized.startsWith("claude-opus-4");
10938
+ return normalized.startsWith("claude-haiku-4-5") || normalized.startsWith("claude-sonnet-4-6") || normalized.startsWith("claude-sonnet-4-5") || normalized === "claude-sonnet-4" || normalized.startsWith("claude-opus-4-5") || normalized.startsWith("claude-opus-4-6") || normalized.startsWith("claude-opus-4-1") || normalized === "claude-opus-41" || normalized === "claude-opus-4";
9091
10939
  }
9092
10940
  /**
9093
10941
  * Check if context editing is enabled for a model.
@@ -9099,11 +10947,12 @@ function isContextEditingEnabled(modelId) {
9099
10947
  }
9100
10948
  /**
9101
10949
  * Tool search is supported by:
10950
+ * - Claude Sonnet 4.5/4.6
9102
10951
  * - Claude Opus 4.5/4.6
9103
10952
  */
9104
10953
  function modelSupportsToolSearch(modelId) {
9105
10954
  const normalized = normalizeForMatching(modelId);
9106
- return normalized.startsWith("claude-opus-4-5") || normalized.startsWith("claude-opus-4-6");
10955
+ return normalized.startsWith("claude-sonnet-4-5") || normalized.startsWith("claude-sonnet-4-6") || normalized.startsWith("claude-opus-4-5") || normalized.startsWith("claude-opus-4-6");
9107
10956
  }
9108
10957
  /**
9109
10958
  * Check if a model supports adaptive thinking (from model metadata).
@@ -9156,10 +11005,9 @@ function buildAnthropicBetaHeaders(modelId, resolvedModel, opts) {
9156
11005
  */
9157
11006
  function buildContextManagement(mode, hasThinking) {
9158
11007
  if (mode === "off") return;
9159
- const triggerType = "input_tokens";
9160
- const triggerValue = 1e5;
9161
- const keepCount = 3;
9162
- const thinkingKeepTurns = 1;
11008
+ const triggerValue = state.contextEditingTrigger;
11009
+ const keepCount = state.contextEditingKeepTools;
11010
+ const thinkingKeepTurns = state.contextEditingKeepThinking;
9163
11011
  const edits = [];
9164
11012
  if ((mode === "clear-thinking" || mode === "clear-both") && hasThinking) edits.push({
9165
11013
  type: "clear_thinking_20251015",
@@ -9171,7 +11019,7 @@ function buildContextManagement(mode, hasThinking) {
9171
11019
  if (mode === "clear-tooluse" || mode === "clear-both") edits.push({
9172
11020
  type: "clear_tool_uses_20250919",
9173
11021
  trigger: {
9174
- type: triggerType,
11022
+ type: "input_tokens",
9175
11023
  value: triggerValue
9176
11024
  },
9177
11025
  keep: {
@@ -9301,8 +11149,10 @@ function buildHistoryToolStubs(historyToolNames) {
9301
11149
  */
9302
11150
  function processToolPipeline(tools, modelId, messages) {
9303
11151
  const existingNamesLower = new Set(tools.map((t) => t.name.toLowerCase()));
9304
- const toolSearchEnabled = modelSupportsToolSearch(modelId);
11152
+ const toolSearchEnabled = state.toolSearchEnabled && modelSupportsToolSearch(modelId);
9305
11153
  const historyToolNames = toolSearchEnabled ? collectHistoryToolNames(messages) : void 0;
11154
+ const nonDeferred = [];
11155
+ const deferred = [];
9306
11156
  const result = [];
9307
11157
  if (toolSearchEnabled) result.push({
9308
11158
  name: TOOL_SEARCH_TOOL_NAME,
@@ -9311,11 +11161,11 @@ function processToolPipeline(tools, modelId, messages) {
9311
11161
  });
9312
11162
  for (const tool of tools) {
9313
11163
  const normalized = tool.type ? tool : ensureInputSchema(tool);
9314
- const shouldDefer = toolSearchEnabled && tool.defer_loading !== false && !NON_DEFERRED_TOOL_NAMES.has(tool.name) && !historyToolNames?.has(tool.name);
9315
- result.push(shouldDefer ? {
11164
+ if (toolSearchEnabled && tool.defer_loading !== false && !NON_DEFERRED_TOOL_NAMES.has(tool.name) && !state.nonDeferredTools.includes(tool.name) && !historyToolNames?.has(tool.name)) deferred.push({
9316
11165
  ...normalized,
9317
11166
  defer_loading: true
9318
- } : normalized);
11167
+ });
11168
+ else nonDeferred.push(normalized);
9319
11169
  }
9320
11170
  for (const name of CLAUDE_CODE_OFFICIAL_TOOLS) if (!existingNamesLower.has(name.toLowerCase())) {
9321
11171
  const stub = {
@@ -9323,19 +11173,24 @@ function processToolPipeline(tools, modelId, messages) {
9323
11173
  description: `Claude Code ${name} tool`,
9324
11174
  input_schema: EMPTY_INPUT_SCHEMA
9325
11175
  };
9326
- result.push(stub);
11176
+ nonDeferred.push(stub);
9327
11177
  }
9328
11178
  if (historyToolNames) {
9329
- const allResultNames = new Set(result.map((t) => t.name));
11179
+ const allResultNames = new Set([
11180
+ ...nonDeferred,
11181
+ ...deferred,
11182
+ ...result
11183
+ ].map((t) => t.name));
9330
11184
  for (const name of historyToolNames) if (!allResultNames.has(name)) {
9331
11185
  consola.debug(`[ToolPipeline] Injecting stub for history-referenced tool: ${name}`);
9332
- result.push({
11186
+ nonDeferred.push({
9333
11187
  name,
9334
11188
  description: `Stub for tool referenced in conversation history`,
9335
11189
  input_schema: EMPTY_INPUT_SCHEMA
9336
11190
  });
9337
11191
  }
9338
11192
  }
11193
+ result.push(...nonDeferred, ...deferred);
9339
11194
  const deferredCount = result.filter((t) => t.defer_loading === true).length;
9340
11195
  const injectedCount = result.length - tools.length;
9341
11196
  if (deferredCount > 0 || injectedCount > 0) consola.debug(`[ToolPipeline] ${result.length} tools (${deferredCount} deferred, ${injectedCount} injected, tool_search: ${toolSearchEnabled})`);
@@ -9422,10 +11277,13 @@ function stripServerTools(tools) {
9422
11277
  }
9423
11278
  //#endregion
9424
11279
  //#region src/lib/anthropic/request-preparation.ts
9425
- const COPILOT_REJECTED_FIELDS = new Set(["output_config", "inference_geo"]);
11280
+ const COPILOT_REJECTED_FIELDS = new Set(["inference_geo"]);
11281
+ const CACHE_CONTROL_BREAKPOINT_LIMIT = 4;
11282
+ const EPHEMERAL_CACHE_CONTROL = { type: "ephemeral" };
9426
11283
  function prepareAnthropicRequest(payload, opts) {
9427
11284
  const wire = buildWirePayload(payload);
9428
- adjustThinkingBudget(wire);
11285
+ adjustThinkingBudget(wire, opts?.resolvedModel);
11286
+ applyCacheControlMode(wire);
9429
11287
  const model = wire.model;
9430
11288
  const messages = wire.messages;
9431
11289
  const thinking = wire.thinking;
@@ -9469,18 +11327,139 @@ function buildWirePayload(payload) {
9469
11327
  if (wire.tools) wire.tools = stripServerTools(wire.tools);
9470
11328
  return wire;
9471
11329
  }
9472
- function adjustThinkingBudget(wire) {
11330
+ function adjustThinkingBudget(wire, resolvedModel) {
9473
11331
  const thinking = wire.thinking;
9474
11332
  if (!thinking || thinking.type === "disabled" || thinking.type === "adaptive") return;
9475
11333
  const budgetTokens = thinking.budget_tokens;
9476
11334
  if (!budgetTokens) return;
11335
+ let adjusted = budgetTokens;
11336
+ const minBudget = resolvedModel?.capabilities?.supports?.min_thinking_budget;
11337
+ const maxBudget = resolvedModel?.capabilities?.supports?.max_thinking_budget;
9477
11338
  const maxTokens = wire.max_tokens;
9478
- if (budgetTokens >= maxTokens) {
9479
- const adjusted = maxTokens - 1;
11339
+ if (typeof minBudget === "number" && adjusted < minBudget) adjusted = minBudget;
11340
+ if (typeof maxBudget === "number" && adjusted > maxBudget) adjusted = maxBudget;
11341
+ if (typeof maxTokens === "number" && adjusted >= maxTokens) adjusted = maxTokens - 1;
11342
+ if (adjusted !== budgetTokens) {
9480
11343
  wire.thinking.budget_tokens = adjusted;
9481
11344
  consola.debug(`[DirectAnthropic] Capped thinking.budget_tokens: ${budgetTokens} → ${adjusted} (max_tokens=${maxTokens})`);
9482
11345
  }
9483
11346
  }
11347
+ /**
11348
+ * Dispatch cache_control handling based on the configured mode.
11349
+ * - disabled: strip all cache_control from the wire payload
11350
+ * - passthrough: leave everything as-is
11351
+ * - sanitize: normalize all cache_control to { type: "ephemeral" }
11352
+ * - proxied: strip client cache_control then auto-inject breakpoints
11353
+ */
11354
+ function applyCacheControlMode(wire) {
11355
+ switch (state.cacheControlMode) {
11356
+ case "disabled":
11357
+ walkCacheControl(wire, () => void 0);
11358
+ break;
11359
+ case "passthrough": break;
11360
+ case "sanitize":
11361
+ walkCacheControl(wire, () => EPHEMERAL_CACHE_CONTROL);
11362
+ break;
11363
+ case "proxied":
11364
+ walkCacheControl(wire, () => void 0);
11365
+ addToolsAndSystemCacheControl(wire);
11366
+ break;
11367
+ }
11368
+ }
11369
+ function addToolsAndSystemCacheControl(wire) {
11370
+ let remaining = CACHE_CONTROL_BREAKPOINT_LIMIT - countExistingCacheBreakpoints(wire);
11371
+ if (remaining <= 0) return;
11372
+ const toolResult = addToolCacheControl(wire.tools, remaining);
11373
+ if (toolResult.changed) {
11374
+ wire.tools = toolResult.tools;
11375
+ remaining = toolResult.remaining;
11376
+ }
11377
+ if (remaining <= 0) return;
11378
+ const systemResult = addSystemCacheControl(wire.system, remaining);
11379
+ if (systemResult.changed) wire.system = systemResult.system;
11380
+ }
11381
+ function countExistingCacheBreakpoints(wire) {
11382
+ return countCacheControlOccurrences(wire.messages) + countCacheControlOccurrences(wire.system) + countCacheControlOccurrences(wire.tools);
11383
+ }
11384
+ function countCacheControlOccurrences(value) {
11385
+ if (Array.isArray(value)) return value.reduce((count, item) => count + countCacheControlOccurrences(item), 0);
11386
+ if (!value || typeof value !== "object") return 0;
11387
+ const record = value;
11388
+ let count = record.cache_control ? 1 : 0;
11389
+ for (const nested of Object.values(record)) if (nested !== record.cache_control) count += countCacheControlOccurrences(nested);
11390
+ return count;
11391
+ }
11392
+ function addToolCacheControl(tools, remaining) {
11393
+ if (!tools || remaining <= 0) return {
11394
+ tools,
11395
+ remaining,
11396
+ changed: false
11397
+ };
11398
+ const lastNonDeferredIndex = findLastIndex(tools, (tool) => tool.defer_loading !== true);
11399
+ if (lastNonDeferredIndex < 0 || tools[lastNonDeferredIndex].cache_control) return {
11400
+ tools,
11401
+ remaining,
11402
+ changed: false
11403
+ };
11404
+ const updatedTools = [...tools];
11405
+ updatedTools[lastNonDeferredIndex] = {
11406
+ ...updatedTools[lastNonDeferredIndex],
11407
+ cache_control: EPHEMERAL_CACHE_CONTROL
11408
+ };
11409
+ return {
11410
+ tools: updatedTools,
11411
+ remaining: remaining - 1,
11412
+ changed: true
11413
+ };
11414
+ }
11415
+ function addSystemCacheControl(system, remaining) {
11416
+ if (!Array.isArray(system) || remaining <= 0) return {
11417
+ system,
11418
+ changed: false
11419
+ };
11420
+ const lastSystemIndex = system.length - 1;
11421
+ if (lastSystemIndex < 0 || system[lastSystemIndex].cache_control) return {
11422
+ system,
11423
+ changed: false
11424
+ };
11425
+ const updatedSystem = [...system];
11426
+ updatedSystem[lastSystemIndex] = {
11427
+ ...updatedSystem[lastSystemIndex],
11428
+ cache_control: EPHEMERAL_CACHE_CONTROL
11429
+ };
11430
+ return {
11431
+ system: updatedSystem,
11432
+ changed: true
11433
+ };
11434
+ }
11435
+ function findLastIndex(items, predicate) {
11436
+ for (let index = items.length - 1; index >= 0; index--) if (predicate(items[index])) return index;
11437
+ return -1;
11438
+ }
11439
+ /**
11440
+ * Walk all cache_control occurrences in the wire payload (system, messages, tools)
11441
+ * and apply a handler. The handler receives the existing cache_control value and returns:
11442
+ * - undefined: delete the cache_control field
11443
+ * - an object: replace the cache_control field with this value
11444
+ */
11445
+ function walkCacheControl(wire, handler) {
11446
+ for (const key of [
11447
+ "system",
11448
+ "messages",
11449
+ "tools"
11450
+ ]) if (Array.isArray(wire[key])) walkCacheControlArray(wire[key], handler);
11451
+ }
11452
+ function walkCacheControlArray(items, handler) {
11453
+ for (const item of items) {
11454
+ if (!item || typeof item !== "object") continue;
11455
+ if ("cache_control" in item && item.cache_control) {
11456
+ const replacement = handler(item.cache_control);
11457
+ if (replacement === void 0) delete item.cache_control;
11458
+ else item.cache_control = replacement;
11459
+ }
11460
+ if (Array.isArray(item.content)) walkCacheControlArray(item.content, handler);
11461
+ }
11462
+ }
9484
11463
  //#endregion
9485
11464
  //#region src/lib/anthropic/client.ts
9486
11465
  /**
@@ -10193,7 +12172,9 @@ async function handleMessages(c) {
10193
12172
  consola.debug(`[AnthropicRouting] ${anthropicPayload.model}: ${routingDecision.reason}`);
10194
12173
  const reqCtx = getRequestContextManager().create({
10195
12174
  endpoint: "anthropic-messages",
10196
- tuiLogId
12175
+ sessionId: getSessionIdFromHeaders(c.req.raw.headers),
12176
+ tuiLogId,
12177
+ rawPath: c.req.path
10197
12178
  });
10198
12179
  reqCtx.setOriginalRequest({
10199
12180
  model: clientModelName ?? anthropicPayload.model,
@@ -10459,39 +12440,18 @@ messagesRoutes.post("/count_tokens", async (c) => {
10459
12440
  //#endregion
10460
12441
  //#region src/routes/models/route.ts
10461
12442
  const modelsRoutes = new Hono();
10462
- const EPOCH_ISO = (/* @__PURE__ */ new Date(0)).toISOString();
10463
- function formatModel(model) {
10464
- return {
10465
- id: model.id,
10466
- object: "model",
10467
- type: "model",
10468
- created: 0,
10469
- created_at: EPOCH_ISO,
10470
- owned_by: model.vendor,
10471
- display_name: model.name,
10472
- capabilities: model.capabilities
10473
- };
10474
- }
10475
- function formatModelDetail(model) {
10476
- return {
10477
- ...formatModel(model),
10478
- version: model.version,
10479
- preview: model.preview,
10480
- model_picker_enabled: model.model_picker_enabled,
10481
- model_picker_category: model.model_picker_category,
10482
- supported_endpoints: model.supported_endpoints,
10483
- billing: model.billing
10484
- };
12443
+ /** Strip internal fields that should not be exposed to external consumers. */
12444
+ function stripInternalFields(model) {
12445
+ const { request_headers: _requestHeaders, ...rest } = model;
12446
+ return rest;
10485
12447
  }
10486
12448
  modelsRoutes.get("/", async (c) => {
10487
12449
  try {
10488
12450
  if (!state.models) await cacheModels();
10489
- const formatter = c.req.query("detail") === "true" ? formatModelDetail : formatModel;
10490
- const models = state.models?.data.map((m) => formatter(m));
12451
+ const models = state.models?.data.map(stripInternalFields);
10491
12452
  return c.json({
10492
- object: "list",
10493
- data: models,
10494
- has_more: false
12453
+ object: state.models?.object ?? "list",
12454
+ data: models
10495
12455
  });
10496
12456
  } catch (error) {
10497
12457
  return forwardError(c, error);
@@ -10508,7 +12468,7 @@ modelsRoutes.get("/:model", async (c) => {
10508
12468
  param: "model",
10509
12469
  code: "model_not_found"
10510
12470
  } }, 404);
10511
- return c.json(formatModelDetail(model));
12471
+ return c.json(stripInternalFields(model));
10512
12472
  } catch (error) {
10513
12473
  return forwardError(c, error);
10514
12474
  }
@@ -10533,7 +12493,9 @@ async function handleResponses(c) {
10533
12493
  const tuiLogId = c.get("tuiLogId");
10534
12494
  const reqCtx = getRequestContextManager().create({
10535
12495
  endpoint: "openai-responses",
10536
- tuiLogId
12496
+ sessionId: getSessionIdFromHeaders(c.req.raw.headers) ?? resolveResponseSessionId(payload.previous_response_id),
12497
+ tuiLogId,
12498
+ rawPath: c.req.path
10537
12499
  });
10538
12500
  reqCtx.setOriginalRequest({
10539
12501
  model: clientModel,
@@ -10560,6 +12522,8 @@ async function handleDirectResponses(opts) {
10560
12522
  const headersCapture = {};
10561
12523
  const adapter = createResponsesAdapter(selectedModel, headersCapture, (wireRequest) => {
10562
12524
  reqCtx.setAttemptWireRequest(wireRequest);
12525
+ }, (transport) => {
12526
+ reqCtx.setAttemptTransport(transport);
10563
12527
  });
10564
12528
  const strategies = createResponsesStrategies();
10565
12529
  try {
@@ -10576,6 +12540,8 @@ async function handleDirectResponses(opts) {
10576
12540
  const response = pipelineResult.response;
10577
12541
  if (!payload.stream) {
10578
12542
  const responsesResponse = response;
12543
+ if (!reqCtx.sessionId && responsesResponse.id) reqCtx.setSessionId(responsesResponse.id);
12544
+ registerResponseSession(responsesResponse.id, reqCtx.sessionId);
10579
12545
  const content = responsesOutputToContent(responsesResponse.output);
10580
12546
  reqCtx.complete({
10581
12547
  success: true,
@@ -10628,6 +12594,8 @@ async function handleDirectResponses(opts) {
10628
12594
  } catch {}
10629
12595
  }
10630
12596
  }
12597
+ if (!reqCtx.sessionId && acc.responseId) reqCtx.setSessionId(acc.responseId);
12598
+ registerResponseSession(acc.responseId, reqCtx.sessionId);
10631
12599
  const responseData = buildResponsesResponseData(acc, payload.model);
10632
12600
  reqCtx.complete(responseData);
10633
12601
  } catch (error) {
@@ -10675,7 +12643,18 @@ statusRoutes.get("/", async (c) => {
10675
12643
  const now = Date.now();
10676
12644
  const limiter = getAdaptiveRateLimiter();
10677
12645
  const limiterStatus = limiter?.getStatus();
12646
+ let serverStatus;
12647
+ if (getIsShuttingDown()) serverStatus = "shutting_down";
12648
+ else if (state.copilotToken && state.githubToken) serverStatus = "healthy";
12649
+ else serverStatus = "unhealthy";
12650
+ const rateLimiter = limiter && limiterStatus ? {
12651
+ enabled: true,
12652
+ ...limiterStatus,
12653
+ config: limiter.getConfig()
12654
+ } : { enabled: false };
10678
12655
  const memStats = getMemoryPressureStats();
12656
+ const requestTelemetry = getRequestTelemetrySnapshot(now);
12657
+ const upstreamWs = peekUpstreamWsManager();
10679
12658
  let activeCount = 0;
10680
12659
  try {
10681
12660
  activeCount = getRequestContextManager().activeCount;
@@ -10692,7 +12671,7 @@ statusRoutes.get("/", async (c) => {
10692
12671
  };
10693
12672
  } catch {}
10694
12673
  return c.json({
10695
- status: getIsShuttingDown() ? "shutting_down" : state.copilotToken && state.githubToken ? "healthy" : "unhealthy",
12674
+ status: serverStatus,
10696
12675
  uptime: serverStartTime > 0 ? Math.floor((now - serverStartTime) / 1e3) : 0,
10697
12676
  version,
10698
12677
  vsCodeVersion: state.vsCodeVersion ?? null,
@@ -10704,11 +12683,8 @@ statusRoutes.get("/", async (c) => {
10704
12683
  },
10705
12684
  quota,
10706
12685
  activeRequests: { count: activeCount },
10707
- rateLimiter: limiterStatus ? {
10708
- enabled: true,
10709
- ...limiterStatus,
10710
- config: limiter.getConfig()
10711
- } : { enabled: false },
12686
+ rateLimiter,
12687
+ requestTelemetry,
10712
12688
  memory: {
10713
12689
  heapUsedMB: memStats.heapUsedMB,
10714
12690
  heapLimitMB: memStats.heapLimitMB,
@@ -10720,6 +12696,12 @@ statusRoutes.get("/", async (c) => {
10720
12696
  models: {
10721
12697
  totalCount: state.models?.data.length ?? 0,
10722
12698
  availableCount: state.modelIds.size
12699
+ },
12700
+ upstream_websocket: {
12701
+ enabled: state.upstreamWebSocket,
12702
+ active_connections: upstreamWs?.activeCount ?? 0,
12703
+ consecutive_fallbacks: upstreamWs?.consecutiveFallbacks ?? 0,
12704
+ temporarily_disabled: upstreamWs?.temporarilyDisabled ?? false
10723
12705
  }
10724
12706
  });
10725
12707
  });
@@ -10760,7 +12742,29 @@ function getMimeType(path) {
10760
12742
  }
10761
12743
  //#endregion
10762
12744
  //#region src/routes/ui/route.ts
10763
- const uiRoutes = new Hono();
12745
+ const UI_MOUNT_PREFIX = "/ui";
12746
+ const TEXT_RESPONSE_TYPES = [
12747
+ "text/html",
12748
+ "text/css",
12749
+ "text/javascript",
12750
+ "application/javascript",
12751
+ "application/x-javascript"
12752
+ ];
12753
+ const JAVASCRIPT_RESPONSE_TYPES = [
12754
+ "text/javascript",
12755
+ "application/javascript",
12756
+ "application/x-javascript"
12757
+ ];
12758
+ const VITE_DEV_PATH_PREFIXES = [
12759
+ "/@vite",
12760
+ "/@fs/",
12761
+ "/@id/",
12762
+ "/src/",
12763
+ "/node_modules/",
12764
+ "/__vite_ping",
12765
+ "/__open-in-editor",
12766
+ "/vite.svg"
12767
+ ];
10764
12768
  /**
10765
12769
  * Resolve a UI directory that exists at runtime.
10766
12770
  * In dev mode this file lives at src/routes/ui/ — 3 levels below project root.
@@ -10771,6 +12775,68 @@ function resolveUiDir(subpath) {
10771
12775
  return candidates.find((candidate) => existsSync(candidate)) ?? candidates[0];
10772
12776
  }
10773
12777
  const uiDir = resolveUiDir("history-v3/dist");
12778
+ function stripTrailingSlash(pathname) {
12779
+ return pathname !== "/" ? pathname.replace(/\/+$/, "") : pathname;
12780
+ }
12781
+ function escapeRegExp(value) {
12782
+ return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
12783
+ }
12784
+ function joinUrlPath(basePathname, requestPathname) {
12785
+ const normalizedBase = stripTrailingSlash(basePathname);
12786
+ const normalizedRequest = requestPathname.startsWith("/") ? requestPathname : `/${requestPathname}`;
12787
+ if (normalizedBase === "/") return normalizedRequest;
12788
+ return `${normalizedBase}${normalizedRequest}`;
12789
+ }
12790
+ function stripUiMountPrefix(pathname) {
12791
+ if (pathname === UI_MOUNT_PREFIX) return "/";
12792
+ if (pathname.startsWith(`${UI_MOUNT_PREFIX}/`)) return pathname.slice(3);
12793
+ return pathname;
12794
+ }
12795
+ function isTextResponse(contentType) {
12796
+ return TEXT_RESPONSE_TYPES.some((value) => contentType?.includes(value));
12797
+ }
12798
+ function isJavaScriptResponse(contentType) {
12799
+ return JAVASCRIPT_RESPONSE_TYPES.some((value) => contentType?.includes(value));
12800
+ }
12801
+ function rewriteBaseUrlLiteral(content) {
12802
+ return content.replace(/("BASE_URL"\s*:\s*")\/(")/g, `$1${UI_MOUNT_PREFIX}/$2`);
12803
+ }
12804
+ function rewriteQuotedPathPrefixes(content, fromPrefix, toPrefix) {
12805
+ const quotePattern = new RegExp(`(["'\`])${escapeRegExp(fromPrefix)}`, "g");
12806
+ return content.replace(quotePattern, `$1${toPrefix}`);
12807
+ }
12808
+ function rewriteParenthesizedPathPrefixes(content, fromPrefix, toPrefix) {
12809
+ const parenthesizedPattern = new RegExp(`(\\()${escapeRegExp(fromPrefix)}`, "g");
12810
+ return content.replace(parenthesizedPattern, `$1${toPrefix}`);
12811
+ }
12812
+ function rewriteProxyTextResponse(content, externalUiUrl, contentType) {
12813
+ const externalBase = new URL(externalUiUrl);
12814
+ const externalBasePath = stripTrailingSlash(externalBase.pathname);
12815
+ const rewrittenBase = rewriteBaseUrlLiteral(content);
12816
+ const rewriteBareParenthesizedPaths = !isJavaScriptResponse(contentType);
12817
+ return VITE_DEV_PATH_PREFIXES.reduce((current, vitePathPrefix) => {
12818
+ const externalPathPrefix = externalBasePath === "/" ? vitePathPrefix : `${externalBasePath}${vitePathPrefix}`;
12819
+ const localPathPrefix = `${UI_MOUNT_PREFIX}${vitePathPrefix}`;
12820
+ const absoluteExternalPrefix = `${externalBase.origin}${externalPathPrefix}`;
12821
+ const rewrittenQuotedRelative = rewriteQuotedPathPrefixes(rewriteQuotedPathPrefixes(current, absoluteExternalPrefix, localPathPrefix), externalPathPrefix, localPathPrefix);
12822
+ if (!rewriteBareParenthesizedPaths) return rewrittenQuotedRelative;
12823
+ return rewriteParenthesizedPathPrefixes(rewriteParenthesizedPathPrefixes(rewrittenQuotedRelative, absoluteExternalPrefix, localPathPrefix), externalPathPrefix, localPathPrefix);
12824
+ }, rewrittenBase);
12825
+ }
12826
+ function rewriteLocationHeader(location, externalUiUrl) {
12827
+ const externalBase = new URL(externalUiUrl);
12828
+ const resolvedLocation = new URL(location, externalBase);
12829
+ if (!(resolvedLocation.origin === externalBase.origin)) return location;
12830
+ const externalBasePath = stripTrailingSlash(externalBase.pathname);
12831
+ return `${UI_MOUNT_PREFIX}${externalBasePath === "/" ? resolvedLocation.pathname : resolvedLocation.pathname.startsWith(`${externalBasePath}/`) || resolvedLocation.pathname === externalBasePath ? resolvedLocation.pathname.slice(externalBasePath.length) || "/" : resolvedLocation.pathname}${resolvedLocation.search}${resolvedLocation.hash}`;
12832
+ }
12833
+ function normalizeExternalUiUrl(externalUiUrl) {
12834
+ const url = new URL(externalUiUrl);
12835
+ if (!["http:", "https:"].includes(url.protocol)) throw new Error(`Unsupported external UI URL protocol: ${url.protocol}. Use http:// or https://`);
12836
+ if (url.search || url.hash) throw new Error("--external-ui-url must not include query parameters or hash fragments");
12837
+ const normalizedPathname = stripTrailingSlash(url.pathname);
12838
+ return `${url.origin}${normalizedPathname === "/" ? "" : normalizedPathname}`;
12839
+ }
10774
12840
  async function serveIndexHtml(c) {
10775
12841
  try {
10776
12842
  await access(join(uiDir, "index.html"), constants.R_OK);
@@ -10780,7 +12846,7 @@ async function serveIndexHtml(c) {
10780
12846
  return c.notFound();
10781
12847
  }
10782
12848
  }
10783
- async function serveAsset(c) {
12849
+ async function serveStaticAsset(c) {
10784
12850
  const assetsIdx = c.req.path.indexOf("/assets/");
10785
12851
  if (assetsIdx === -1) return c.notFound();
10786
12852
  const filePath = c.req.path.slice(assetsIdx);
@@ -10797,14 +12863,59 @@ async function serveAsset(c) {
10797
12863
  return c.notFound();
10798
12864
  }
10799
12865
  }
10800
- uiRoutes.get("/", serveIndexHtml);
10801
- uiRoutes.get("/assets/*", serveAsset);
12866
+ async function proxyExternalUiRequest(c, externalUiUrl) {
12867
+ const requestUrl = new URL(c.req.url);
12868
+ const externalBase = new URL(externalUiUrl);
12869
+ const upstreamUrl = new URL(externalBase);
12870
+ upstreamUrl.pathname = joinUrlPath(externalBase.pathname, stripUiMountPrefix(c.req.path));
12871
+ upstreamUrl.search = requestUrl.search;
12872
+ const requestHeaders = new Headers(c.req.raw.headers);
12873
+ requestHeaders.set("host", upstreamUrl.host);
12874
+ requestHeaders.set("x-forwarded-host", requestUrl.host);
12875
+ requestHeaders.set("x-forwarded-proto", requestUrl.protocol.replace(":", ""));
12876
+ const body = c.req.method === "GET" || c.req.method === "HEAD" ? void 0 : await c.req.raw.arrayBuffer();
12877
+ const upstreamResponse = await fetch(upstreamUrl, {
12878
+ method: c.req.method,
12879
+ headers: requestHeaders,
12880
+ body,
12881
+ redirect: "manual"
12882
+ });
12883
+ const responseHeaders = new Headers(upstreamResponse.headers);
12884
+ const location = responseHeaders.get("location");
12885
+ if (location) responseHeaders.set("location", rewriteLocationHeader(location, externalUiUrl));
12886
+ if (isTextResponse(responseHeaders.get("content-type"))) {
12887
+ const rewritten = rewriteProxyTextResponse(await upstreamResponse.text(), externalUiUrl, responseHeaders.get("content-type"));
12888
+ responseHeaders.delete("content-length");
12889
+ return new Response(rewritten, {
12890
+ status: upstreamResponse.status,
12891
+ statusText: upstreamResponse.statusText,
12892
+ headers: responseHeaders
12893
+ });
12894
+ }
12895
+ return new Response(upstreamResponse.body, {
12896
+ status: upstreamResponse.status,
12897
+ statusText: upstreamResponse.statusText,
12898
+ headers: responseHeaders
12899
+ });
12900
+ }
12901
+ function createUiRoutes(options = {}) {
12902
+ const uiRoutes = new Hono();
12903
+ if (options.externalUiUrl) {
12904
+ const normalizedExternalUiUrl = normalizeExternalUiUrl(options.externalUiUrl);
12905
+ uiRoutes.all("/", (c) => proxyExternalUiRequest(c, normalizedExternalUiUrl));
12906
+ uiRoutes.all("/*", (c) => proxyExternalUiRequest(c, normalizedExternalUiUrl));
12907
+ return uiRoutes;
12908
+ }
12909
+ uiRoutes.get("/", serveIndexHtml);
12910
+ uiRoutes.get("/assets/*", serveStaticAsset);
12911
+ return uiRoutes;
12912
+ }
10802
12913
  //#endregion
10803
12914
  //#region src/routes/index.ts
10804
12915
  /**
10805
12916
  * Register all HTTP routes on the given Hono app.
10806
12917
  */
10807
- function registerHttpRoutes(app) {
12918
+ function registerHttpRoutes(app, options = {}) {
10808
12919
  app.route("/chat/completions", chatCompletionRoutes);
10809
12920
  app.route("/models", modelsRoutes);
10810
12921
  app.route("/embeddings", embeddingsRoutes);
@@ -10820,7 +12931,7 @@ function registerHttpRoutes(app) {
10820
12931
  app.route("/api/config", configRoutes);
10821
12932
  app.route("/api/logs", logsRoutes);
10822
12933
  app.route("/history", historyRoutes);
10823
- app.route("/ui", uiRoutes);
12934
+ app.route("/ui", createUiRoutes(options));
10824
12935
  }
10825
12936
  /**
10826
12937
  * Register all WebSocket routes on the given Hono app.
@@ -10831,41 +12942,44 @@ function registerWsRoutes(app, wsUpgrade) {
10831
12942
  }
10832
12943
  //#endregion
10833
12944
  //#region src/server.ts
10834
- const server = new Hono();
10835
- server.onError((error, c) => {
10836
- if (c.req.header("upgrade")?.toLowerCase() === "websocket") {
10837
- consola.debug("WebSocket error:", error);
10838
- return c.text("", 500);
10839
- }
10840
- consola.error(`Unhandled route error in ${c.req.method} ${c.req.path}:`, error);
10841
- return forwardError(c, error);
10842
- });
10843
- const browserProbePaths = new Set(["/favicon.ico", "/.well-known/appspecific/com.chrome.devtools.json"]);
10844
- server.notFound((c) => {
10845
- if (browserProbePaths.has(c.req.path)) return c.body(null, 204);
10846
- return c.json({ error: "Not Found" }, 404);
10847
- });
10848
- server.use(async (_c, next) => {
10849
- await applyConfigToState();
10850
- await ensureValidCopilotToken();
10851
- await next();
10852
- });
10853
- server.use(tuiMiddleware());
10854
- server.use(cors());
10855
- server.use(trimTrailingSlash());
10856
- server.get("/", (c) => c.text("Server running"));
10857
- server.get("/health", (c) => {
10858
- const healthy = Boolean(state.copilotToken && state.githubToken);
10859
- return c.json({
10860
- status: healthy ? "healthy" : "unhealthy",
10861
- checks: {
10862
- copilotToken: Boolean(state.copilotToken),
10863
- githubToken: Boolean(state.githubToken),
10864
- models: Boolean(state.models)
10865
- }
10866
- }, healthy ? 200 : 503);
10867
- });
10868
- registerHttpRoutes(server);
12945
+ function createServer(options = {}) {
12946
+ const server = new Hono();
12947
+ server.onError((error, c) => {
12948
+ if (c.req.header("upgrade")?.toLowerCase() === "websocket") {
12949
+ consola.debug("WebSocket error:", error);
12950
+ return c.text("", 500);
12951
+ }
12952
+ consola.error(`Unhandled route error in ${c.req.method} ${c.req.path}:`, error);
12953
+ return forwardError(c, error);
12954
+ });
12955
+ const browserProbePaths = new Set(["/favicon.ico", "/.well-known/appspecific/com.chrome.devtools.json"]);
12956
+ server.notFound((c) => {
12957
+ if (browserProbePaths.has(c.req.path)) return c.body(null, 204);
12958
+ return c.json({ error: "Not Found" }, 404);
12959
+ });
12960
+ server.use(async (_c, next) => {
12961
+ await applyConfigToState();
12962
+ await ensureValidCopilotToken();
12963
+ await next();
12964
+ });
12965
+ server.use(tuiMiddleware());
12966
+ server.use(cors());
12967
+ server.use(trimTrailingSlash());
12968
+ server.get("/", (c) => c.text("Server running"));
12969
+ server.get("/health", (c) => {
12970
+ const healthy = Boolean(state.copilotToken && state.githubToken);
12971
+ return c.json({
12972
+ status: healthy ? "healthy" : "unhealthy",
12973
+ checks: {
12974
+ copilotToken: Boolean(state.copilotToken),
12975
+ githubToken: Boolean(state.githubToken),
12976
+ models: Boolean(state.models)
12977
+ }
12978
+ }, healthy ? 200 : 503);
12979
+ });
12980
+ registerHttpRoutes(server, { externalUiUrl: options.externalUiUrl });
12981
+ return server;
12982
+ }
10869
12983
  //#endregion
10870
12984
  //#region src/start.ts
10871
12985
  /** Format limit values as "Xk" or "?" if not available */
@@ -10917,6 +13031,13 @@ async function runServer(options) {
10917
13031
  consola.error(`Invalid account type: "${options.accountType}". Must be one of: ${VALID_ACCOUNT_TYPES.join(", ")}`);
10918
13032
  process.exit(1);
10919
13033
  }
13034
+ let externalUiUrl;
13035
+ if (options.externalUiUrl) try {
13036
+ externalUiUrl = normalizeExternalUiUrl(options.externalUiUrl);
13037
+ } catch (error) {
13038
+ consola.error(error instanceof Error ? error.message : String(error));
13039
+ process.exit(1);
13040
+ }
10920
13041
  if (options.verbose) {
10921
13042
  consola.level = 5;
10922
13043
  setCliState({ verbose: true });
@@ -10948,11 +13069,13 @@ async function runServer(options) {
10948
13069
  });
10949
13070
  initHistory(true, state.historyLimit);
10950
13071
  startMemoryPressureMonitor();
13072
+ await initRequestTelemetry();
10951
13073
  const contextManager = initRequestContextManager();
10952
13074
  registerContextConsumers(contextManager);
10953
13075
  setConnectedDataFactory(() => contextManager.getAll().map((ctx) => ({
10954
13076
  id: ctx.id,
10955
13077
  endpoint: ctx.endpoint,
13078
+ rawPath: ctx.rawPath,
10956
13079
  state: ctx.state,
10957
13080
  startTime: ctx.startTime,
10958
13081
  durationMs: ctx.durationMs,
@@ -10971,11 +13094,14 @@ async function runServer(options) {
10971
13094
  process.exit(1);
10972
13095
  }
10973
13096
  consola.info(`Available models:\n${state.models?.data.map((m) => formatModelInfo(m)).join("\n")}`);
13097
+ const stopModelRefreshLoop = startModelRefreshLoop();
10974
13098
  await loadPersistedLimits();
10975
13099
  const serverUrl = `http://${options.host ?? "localhost"}:${options.port}`;
13100
+ const server = createServer({ externalUiUrl });
10976
13101
  const wsAdapter = await createWebSocketAdapter(server);
10977
13102
  registerWsRoutes(server, wsAdapter.upgradeWebSocket);
10978
- consola.info(`Web UI: ${serverUrl}/ui`);
13103
+ if (externalUiUrl) consola.info(`Web UI: ${serverUrl}/ui (proxied from ${externalUiUrl})`);
13104
+ else consola.info(`Web UI: ${serverUrl}/ui`);
10979
13105
  const bunWebSocket = typeof globalThis.Bun !== "undefined" ? (await import("hono/bun")).websocket : void 0;
10980
13106
  let serverInstance;
10981
13107
  try {
@@ -10994,7 +13120,11 @@ async function runServer(options) {
10994
13120
  setServerInstance(serverInstance);
10995
13121
  setupShutdownHandlers();
10996
13122
  if (wsAdapter.injectWebSocket && serverInstance.nodeServer) wsAdapter.injectWebSocket(serverInstance.nodeServer);
10997
- await waitForShutdown();
13123
+ try {
13124
+ await waitForShutdown();
13125
+ } finally {
13126
+ stopModelRefreshLoop();
13127
+ }
10998
13128
  }
10999
13129
  const start = defineCommand({
11000
13130
  meta: {
@@ -11053,6 +13183,10 @@ const start = defineCommand({
11053
13183
  type: "boolean",
11054
13184
  default: true,
11055
13185
  description: "Reactive auto-truncate: retries with truncated payload on limit errors (disable with --no-auto-truncate)"
13186
+ },
13187
+ "external-ui-url": {
13188
+ type: "string",
13189
+ description: "Proxy /ui to an external frontend dev/build server (for example http://localhost:5173)"
11056
13190
  }
11057
13191
  },
11058
13192
  run({ args }) {
@@ -11078,7 +13212,9 @@ const start = defineCommand({
11078
13212
  "http-proxy-from-env",
11079
13213
  "httpProxyFromEnv",
11080
13214
  "auto-truncate",
11081
- "autoTruncate"
13215
+ "autoTruncate",
13216
+ "external-ui-url",
13217
+ "externalUiUrl"
11082
13218
  ]);
11083
13219
  const unknownArgs = Object.keys(args).filter((key) => !knownArgs.has(key));
11084
13220
  if (unknownArgs.length > 0) consola.warn(`Unknown argument(s): ${unknownArgs.map((a) => `--${a}`).join(", ")}`);
@@ -11092,7 +13228,8 @@ const start = defineCommand({
11092
13228
  showGitHubToken: args["show-github-token"],
11093
13229
  proxy: args.proxy,
11094
13230
  httpProxyFromEnv: args["http-proxy-from-env"],
11095
- autoTruncate: args["auto-truncate"]
13231
+ autoTruncate: args["auto-truncate"],
13232
+ externalUiUrl: args["external-ui-url"]
11096
13233
  });
11097
13234
  }
11098
13235
  });