@hsupu/copilot-api 0.8.1-beta.2 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/config.example.yaml +25 -0
  2. package/dist/main.mjs +2967 -903
  3. package/dist/main.mjs.map +1 -1
  4. package/package.json +10 -5
  5. package/ui/history-v3/dist/assets/LogsPage-CkzMEjbU.css +1 -0
  6. package/ui/history-v3/dist/assets/LogsPage-CyM6vZyg.js +1 -0
  7. package/ui/history-v3/dist/assets/VActivityPage-DGZYvP1d.css +1 -0
  8. package/ui/history-v3/dist/assets/VActivityPage-qSJRqJGU.js +3 -0
  9. package/ui/history-v3/dist/assets/VCard-B2q0rBgl.js +1 -0
  10. package/ui/history-v3/dist/assets/VCard-v_d_q0Rh.css +1 -0
  11. package/ui/history-v3/dist/assets/VConfigPage-DisddwD3.css +1 -0
  12. package/ui/history-v3/dist/assets/VConfigPage-zQu5b6tC.js +1 -0
  13. package/ui/history-v3/dist/assets/VDashboardPage-CsNfE_go.js +1 -0
  14. package/ui/history-v3/dist/assets/VDashboardPage-oAbMfN9y.css +1 -0
  15. package/ui/history-v3/dist/assets/VDialog-B5tAv-4d.js +1 -0
  16. package/ui/history-v3/dist/assets/VDialog-CFhCWS_I.css +1 -0
  17. package/ui/history-v3/dist/assets/VModelsPage-C1y0gG_S.js +1 -0
  18. package/ui/history-v3/dist/assets/VModelsPage-DMPq4SEZ.css +1 -0
  19. package/ui/history-v3/dist/assets/VSelect-CahLC2X3.js +1 -0
  20. package/ui/history-v3/dist/assets/VSelect-CiSADOyl.css +1 -0
  21. package/ui/history-v3/dist/assets/VSheet-BhXOOy9y.js +1 -0
  22. package/ui/history-v3/dist/assets/VSheet-DI_DMcjz.css +1 -0
  23. package/ui/history-v3/dist/assets/index-Cxye_z0t.js +18 -0
  24. package/ui/history-v3/dist/assets/index-sVLYkWM6.css +1 -0
  25. package/ui/history-v3/dist/assets/useLogs-C_hg5wZk.js +1 -0
  26. package/ui/history-v3/dist/assets/{vendor-CmGvxZwr.js → vendor-tLv7SZ-i.js} +1 -1
  27. package/ui/history-v3/dist/assets/vue-McY99x0M.js +1 -0
  28. package/ui/history-v3/dist/assets/ws-status-BJ5xglsi.js +1 -0
  29. package/ui/history-v3/dist/index.html +3 -3
  30. package/ui/history-v3/dist/assets/BaseSelect-CttLMFCN.js +0 -1
  31. package/ui/history-v3/dist/assets/BaseSelect-N-W6HPTu.css +0 -1
  32. package/ui/history-v3/dist/assets/DashboardPage-BYXNxjXb.css +0 -1
  33. package/ui/history-v3/dist/assets/DashboardPage-CgXivoWS.js +0 -1
  34. package/ui/history-v3/dist/assets/DetailPanel-CGpPxDDa.css +0 -1
  35. package/ui/history-v3/dist/assets/DetailPanel-EOQma5fZ.js +0 -3
  36. package/ui/history-v3/dist/assets/HistoryPage-PA0Yh3n3.css +0 -1
  37. package/ui/history-v3/dist/assets/HistoryPage-rhEb_UZG.js +0 -1
  38. package/ui/history-v3/dist/assets/LogsPage-BuPou1cg.css +0 -1
  39. package/ui/history-v3/dist/assets/LogsPage-CNXQuqMj.js +0 -1
  40. package/ui/history-v3/dist/assets/ModelsPage-Bpi7Y9GS.js +0 -1
  41. package/ui/history-v3/dist/assets/ModelsPage-F2KTxq2i.css +0 -1
  42. package/ui/history-v3/dist/assets/ProgressBar-6xzx-ZSc.js +0 -1
  43. package/ui/history-v3/dist/assets/ProgressBar-CtfiTXLy.css +0 -1
  44. package/ui/history-v3/dist/assets/UsagePage-COyq-DOU.css +0 -1
  45. package/ui/history-v3/dist/assets/UsagePage-CZfgTYCP.js +0 -1
  46. package/ui/history-v3/dist/assets/VChip-9UyCCNyg.js +0 -1
  47. package/ui/history-v3/dist/assets/VChip-B_fbAfwz.css +0 -1
  48. package/ui/history-v3/dist/assets/VDashboardPage-DXtj4agW.js +0 -1
  49. package/ui/history-v3/dist/assets/VDashboardPage-axfQtTiR.css +0 -1
  50. package/ui/history-v3/dist/assets/VDivider-D8zdArq0.js +0 -1
  51. package/ui/history-v3/dist/assets/VDivider-DITF6qCr.css +0 -1
  52. package/ui/history-v3/dist/assets/VHistoryPage-Dj0qUmWz.js +0 -1
  53. package/ui/history-v3/dist/assets/VHistoryPage-DqpLWYXo.css +0 -1
  54. package/ui/history-v3/dist/assets/VList-Bf0AJT_N.css +0 -1
  55. package/ui/history-v3/dist/assets/VList-Ct6gdZ-F.js +0 -1
  56. package/ui/history-v3/dist/assets/VLogsPage-BOA_17HS.js +0 -1
  57. package/ui/history-v3/dist/assets/VLogsPage-Dr3my9y3.css +0 -1
  58. package/ui/history-v3/dist/assets/VModelsPage-Bkon7sFs.css +0 -1
  59. package/ui/history-v3/dist/assets/VModelsPage-D-IbiiwR.js +0 -1
  60. package/ui/history-v3/dist/assets/VSpacer-DfbUir7X.css +0 -1
  61. package/ui/history-v3/dist/assets/VSpacer-F4vloCsf.js +0 -1
  62. package/ui/history-v3/dist/assets/VTable-B4qROCQu.js +0 -1
  63. package/ui/history-v3/dist/assets/VTable-BTui1tPX.css +0 -1
  64. package/ui/history-v3/dist/assets/VTooltip-9iBP-JhF.js +0 -1
  65. package/ui/history-v3/dist/assets/VTooltip-C1DKovoh.css +0 -1
  66. package/ui/history-v3/dist/assets/VUsagePage-B8WkBKET.js +0 -1
  67. package/ui/history-v3/dist/assets/VUsagePage-Se7R-H-Y.css +0 -1
  68. package/ui/history-v3/dist/assets/index-B8CP-fZd.css +0 -1
  69. package/ui/history-v3/dist/assets/index-cupXJxSz.js +0 -18
  70. package/ui/history-v3/dist/assets/useInjectedHistoryStore-Dx7UlhLw.js +0 -1
  71. package/ui/history-v3/dist/assets/useLogs-Bz9naVOB.js +0 -1
  72. package/ui/history-v3/dist/assets/usePolling-CRd-nhvF.js +0 -1
  73. package/ui/history-v3/dist/assets/vue-Bmo88J5t.js +0 -1
package/dist/main.mjs CHANGED
@@ -3,20 +3,21 @@ import { defineCommand, runMain } from "citty";
3
3
  import consola, { consola as consola$1 } from "consola";
4
4
  import * as fs$1 from "node:fs/promises";
5
5
  import fs, { access, constants, readFile } from "node:fs/promises";
6
+ import { randomBytes, randomUUID } from "node:crypto";
6
7
  import os, { homedir } from "node:os";
7
8
  import * as path$1 from "node:path";
8
9
  import path, { dirname, join, resolve } from "node:path";
9
10
  import tls from "node:tls";
10
11
  import { getProxyForUrl } from "proxy-from-env";
11
12
  import { SocksClient } from "socks";
12
- import { Agent, ProxyAgent, setGlobalDispatcher } from "undici";
13
- import { randomBytes, randomUUID } from "node:crypto";
13
+ import { Agent, ProxyAgent, WebSocket as WebSocket$1, setGlobalDispatcher } from "undici";
14
14
  import pc from "picocolors";
15
15
  import { existsSync, promises, readFileSync, readdirSync } from "node:fs";
16
16
  import invariant from "tiny-invariant";
17
17
  import { events } from "fetch-event-stream";
18
18
  import { Hono } from "hono";
19
19
  import { streamSSE } from "hono/streaming";
20
+ import { parseDocument } from "yaml";
20
21
  import { cors } from "hono/cors";
21
22
  import { trimTrailingSlash } from "hono/trailing-slash";
22
23
  //#region src/lib/state.ts
@@ -82,47 +83,110 @@ const DEFAULT_MODEL_OVERRIDES = {
82
83
  sonnet: "claude-sonnet-4.6",
83
84
  haiku: "claude-haiku-4.5"
84
85
  };
85
- const mutableState = {
86
- accountType: "individual",
87
- autoTruncate: true,
88
- compressToolResultsBeforeTruncate: true,
89
- contextEditingMode: "off",
86
+ /**
87
+ * Default values for config-managed scalar/runtime fields.
88
+ * Single source of truth for mutableState initialization and resetConfigManagedState().
89
+ * Model overrides continue to use DEFAULT_MODEL_OVERRIDES.
90
+ */
91
+ const CONFIG_MANAGED_DEFAULTS = {
90
92
  stripServerTools: false,
91
93
  immutableThinkingMessages: false,
92
94
  dedupToolCalls: false,
95
+ stripReadToolResultTags: false,
96
+ contextEditingMode: "off",
97
+ contextEditingTrigger: 1e5,
98
+ contextEditingKeepTools: 3,
99
+ contextEditingKeepThinking: 1,
100
+ toolSearchEnabled: true,
101
+ autoCacheControl: true,
102
+ nonDeferredTools: [],
103
+ rewriteSystemReminders: false,
104
+ systemPromptOverrides: [],
105
+ compressToolResultsBeforeTruncate: true,
93
106
  fetchTimeout: 300,
107
+ streamIdleTimeout: 300,
108
+ staleRequestMaxAge: 600,
109
+ modelRefreshInterval: 600,
110
+ shutdownGracefulWait: 60,
111
+ shutdownAbortWait: 120,
94
112
  historyLimit: 200,
95
113
  historyMinEntries: 50,
114
+ normalizeResponsesCallIds: true,
115
+ upstreamWebSocket: false
116
+ };
117
+ function resetConfigManagedState() {
118
+ setAnthropicBehavior({
119
+ stripServerTools: CONFIG_MANAGED_DEFAULTS.stripServerTools,
120
+ immutableThinkingMessages: CONFIG_MANAGED_DEFAULTS.immutableThinkingMessages,
121
+ dedupToolCalls: CONFIG_MANAGED_DEFAULTS.dedupToolCalls,
122
+ stripReadToolResultTags: CONFIG_MANAGED_DEFAULTS.stripReadToolResultTags,
123
+ contextEditingMode: CONFIG_MANAGED_DEFAULTS.contextEditingMode,
124
+ contextEditingTrigger: CONFIG_MANAGED_DEFAULTS.contextEditingTrigger,
125
+ contextEditingKeepTools: CONFIG_MANAGED_DEFAULTS.contextEditingKeepTools,
126
+ contextEditingKeepThinking: CONFIG_MANAGED_DEFAULTS.contextEditingKeepThinking,
127
+ toolSearchEnabled: CONFIG_MANAGED_DEFAULTS.toolSearchEnabled,
128
+ autoCacheControl: CONFIG_MANAGED_DEFAULTS.autoCacheControl,
129
+ nonDeferredTools: [...CONFIG_MANAGED_DEFAULTS.nonDeferredTools],
130
+ rewriteSystemReminders: CONFIG_MANAGED_DEFAULTS.rewriteSystemReminders,
131
+ systemPromptOverrides: [...CONFIG_MANAGED_DEFAULTS.systemPromptOverrides],
132
+ compressToolResultsBeforeTruncate: CONFIG_MANAGED_DEFAULTS.compressToolResultsBeforeTruncate
133
+ });
134
+ setModelOverrides({ ...DEFAULT_MODEL_OVERRIDES });
135
+ setTimeoutConfig({
136
+ fetchTimeout: CONFIG_MANAGED_DEFAULTS.fetchTimeout,
137
+ streamIdleTimeout: CONFIG_MANAGED_DEFAULTS.streamIdleTimeout,
138
+ staleRequestMaxAge: CONFIG_MANAGED_DEFAULTS.staleRequestMaxAge,
139
+ modelRefreshInterval: CONFIG_MANAGED_DEFAULTS.modelRefreshInterval
140
+ });
141
+ setShutdownConfig({
142
+ shutdownGracefulWait: CONFIG_MANAGED_DEFAULTS.shutdownGracefulWait,
143
+ shutdownAbortWait: CONFIG_MANAGED_DEFAULTS.shutdownAbortWait
144
+ });
145
+ setHistoryConfig({
146
+ historyLimit: CONFIG_MANAGED_DEFAULTS.historyLimit,
147
+ historyMinEntries: CONFIG_MANAGED_DEFAULTS.historyMinEntries
148
+ });
149
+ setHistoryMaxEntries(CONFIG_MANAGED_DEFAULTS.historyLimit);
150
+ setResponsesConfig({
151
+ normalizeResponsesCallIds: CONFIG_MANAGED_DEFAULTS.normalizeResponsesCallIds,
152
+ upstreamWebSocket: CONFIG_MANAGED_DEFAULTS.upstreamWebSocket
153
+ });
154
+ }
155
+ const mutableState = {
156
+ accountType: "individual",
157
+ autoTruncate: true,
158
+ compressToolResultsBeforeTruncate: CONFIG_MANAGED_DEFAULTS.compressToolResultsBeforeTruncate,
159
+ contextEditingMode: CONFIG_MANAGED_DEFAULTS.contextEditingMode,
160
+ contextEditingTrigger: CONFIG_MANAGED_DEFAULTS.contextEditingTrigger,
161
+ contextEditingKeepTools: CONFIG_MANAGED_DEFAULTS.contextEditingKeepTools,
162
+ contextEditingKeepThinking: CONFIG_MANAGED_DEFAULTS.contextEditingKeepThinking,
163
+ toolSearchEnabled: CONFIG_MANAGED_DEFAULTS.toolSearchEnabled,
164
+ autoCacheControl: CONFIG_MANAGED_DEFAULTS.autoCacheControl,
165
+ nonDeferredTools: [...CONFIG_MANAGED_DEFAULTS.nonDeferredTools],
166
+ stripServerTools: CONFIG_MANAGED_DEFAULTS.stripServerTools,
167
+ immutableThinkingMessages: CONFIG_MANAGED_DEFAULTS.immutableThinkingMessages,
168
+ dedupToolCalls: CONFIG_MANAGED_DEFAULTS.dedupToolCalls,
169
+ fetchTimeout: CONFIG_MANAGED_DEFAULTS.fetchTimeout,
170
+ historyLimit: CONFIG_MANAGED_DEFAULTS.historyLimit,
171
+ historyMinEntries: CONFIG_MANAGED_DEFAULTS.historyMinEntries,
96
172
  modelIds: /* @__PURE__ */ new Set(),
97
173
  modelIndex: /* @__PURE__ */ new Map(),
98
174
  modelOverrides: { ...DEFAULT_MODEL_OVERRIDES },
99
- rewriteSystemReminders: false,
175
+ rewriteSystemReminders: CONFIG_MANAGED_DEFAULTS.rewriteSystemReminders,
100
176
  showGitHubToken: false,
101
- shutdownAbortWait: 120,
102
- shutdownGracefulWait: 60,
103
- staleRequestMaxAge: 600,
104
- streamIdleTimeout: 300,
105
- systemPromptOverrides: [],
106
- stripReadToolResultTags: false,
107
- normalizeResponsesCallIds: true,
177
+ shutdownAbortWait: CONFIG_MANAGED_DEFAULTS.shutdownAbortWait,
178
+ shutdownGracefulWait: CONFIG_MANAGED_DEFAULTS.shutdownGracefulWait,
179
+ staleRequestMaxAge: CONFIG_MANAGED_DEFAULTS.staleRequestMaxAge,
180
+ modelRefreshInterval: CONFIG_MANAGED_DEFAULTS.modelRefreshInterval,
181
+ streamIdleTimeout: CONFIG_MANAGED_DEFAULTS.streamIdleTimeout,
182
+ systemPromptOverrides: [...CONFIG_MANAGED_DEFAULTS.systemPromptOverrides],
183
+ stripReadToolResultTags: CONFIG_MANAGED_DEFAULTS.stripReadToolResultTags,
184
+ normalizeResponsesCallIds: CONFIG_MANAGED_DEFAULTS.normalizeResponsesCallIds,
185
+ upstreamWebSocket: CONFIG_MANAGED_DEFAULTS.upstreamWebSocket,
108
186
  verbose: false
109
187
  };
110
188
  const state = mutableState;
111
189
  //#endregion
112
- //#region src/lib/utils.ts
113
- const sleep = (ms) => new Promise((resolve) => {
114
- setTimeout(resolve, ms);
115
- });
116
- const isNullish = (value) => value === null || value === void 0;
117
- /** Convert bytes to KB with rounding */
118
- function bytesToKB(bytes) {
119
- return Math.round(bytes / 1024);
120
- }
121
- /** Generate unique ID (timestamp + random) */
122
- function generateId(randomLength = 7) {
123
- return Date.now().toString(36) + Math.random().toString(36).slice(2, 2 + randomLength);
124
- }
125
- //#endregion
126
190
  //#region src/lib/ws/broadcast.ts
127
191
  /** Connected clients indexed by their raw WebSocket instance */
128
192
  const clients = /* @__PURE__ */ new Map();
@@ -332,7 +396,8 @@ const historyIndexes = {
332
396
  summaryIndex: /* @__PURE__ */ new Map(),
333
397
  sessionEntryCount: /* @__PURE__ */ new Map(),
334
398
  sessionModelsSet: /* @__PURE__ */ new Map(),
335
- sessionToolsSet: /* @__PURE__ */ new Map()
399
+ sessionToolsSet: /* @__PURE__ */ new Map(),
400
+ responseSessionIndex: /* @__PURE__ */ new Map()
336
401
  };
337
402
  const historyStatsCache = {
338
403
  dirty: true,
@@ -344,6 +409,7 @@ function resetHistoryIndexes() {
344
409
  historyIndexes.sessionEntryCount.clear();
345
410
  historyIndexes.sessionModelsSet.clear();
346
411
  historyIndexes.sessionToolsSet.clear();
412
+ historyIndexes.responseSessionIndex.clear();
347
413
  }
348
414
  function invalidateHistoryStats() {
349
415
  historyStatsCache.dirty = true;
@@ -400,7 +466,7 @@ function initHistory(enabled, maxEntries) {
400
466
  historyState.maxEntries = maxEntries;
401
467
  historyState.entries = [];
402
468
  historyState.sessions = /* @__PURE__ */ new Map();
403
- historyState.currentSessionId = enabled ? generateId() : "";
469
+ historyState.currentSessionId = "";
404
470
  resetHistoryIndexes();
405
471
  invalidateHistoryStats();
406
472
  }
@@ -438,7 +504,7 @@ function getStats() {
438
504
  const model = entry.response?.model || entry.request.model || "unknown";
439
505
  modelDist[model] = (modelDist[model] || 0) + 1;
440
506
  endpointDist[entry.endpoint] = (endpointDist[entry.endpoint] || 0) + 1;
441
- const date = new Date(entry.timestamp);
507
+ const date = new Date(entry.startedAt);
442
508
  const hourKey = `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, "0")}-${String(date.getDate()).padStart(2, "0")}T${String(date.getHours()).padStart(2, "0")}`;
443
509
  hourlyActivity[hourKey] = (hourlyActivity[hourKey] || 0) + 1;
444
510
  if (entry.response) {
@@ -480,7 +546,7 @@ function exportHistory(format = "json") {
480
546
  const headers = [
481
547
  "id",
482
548
  "session_id",
483
- "timestamp",
549
+ "started_at",
484
550
  "endpoint",
485
551
  "request_model",
486
552
  "message_count",
@@ -495,8 +561,8 @@ function exportHistory(format = "json") {
495
561
  ];
496
562
  const rows = historyState.entries.map((entry) => [
497
563
  entry.id,
498
- entry.sessionId,
499
- formatLocalTimestamp(entry.timestamp),
564
+ entry.sessionId ?? "",
565
+ formatLocalTimestamp(entry.startedAt),
500
566
  entry.endpoint,
501
567
  entry.request.model,
502
568
  entry.request.messages?.length,
@@ -512,6 +578,118 @@ function exportHistory(format = "json") {
512
578
  return [headers.join(","), ...rows.map((row) => row.map((value) => escapeCsvValue(value)).join(","))].join("\n");
513
579
  }
514
580
  //#endregion
581
+ //#region src/lib/history/sessions.ts
582
+ const SESSION_HEADER_CANDIDATES = [
583
+ "x-session-id",
584
+ "x-conversation-id",
585
+ "x-chat-session-id",
586
+ "x-thread-id",
587
+ "x-interaction-id"
588
+ ];
589
+ function normalizeSessionId(value) {
590
+ if (typeof value !== "string") return void 0;
591
+ const trimmed = value.trim();
592
+ return trimmed.length > 0 ? trimmed : void 0;
593
+ }
594
+ function ensureSession(sessionId, endpoint) {
595
+ const existing = historyState.sessions.get(sessionId);
596
+ if (existing) {
597
+ existing.lastActivity = Date.now();
598
+ if (!existing.endpoints.includes(endpoint)) existing.endpoints.push(endpoint);
599
+ historyState.currentSessionId = sessionId;
600
+ return existing;
601
+ }
602
+ const now = Date.now();
603
+ const session = {
604
+ id: sessionId,
605
+ startTime: now,
606
+ lastActivity: now,
607
+ requestCount: 0,
608
+ totalInputTokens: 0,
609
+ totalOutputTokens: 0,
610
+ models: [],
611
+ endpoints: [endpoint]
612
+ };
613
+ historyState.sessions.set(sessionId, session);
614
+ historyIndexes.sessionModelsSet.set(sessionId, /* @__PURE__ */ new Set());
615
+ historyIndexes.sessionToolsSet.set(sessionId, /* @__PURE__ */ new Set());
616
+ historyState.currentSessionId = sessionId;
617
+ return session;
618
+ }
619
+ function getSessionIdFromHeaders(headers) {
620
+ for (const name of SESSION_HEADER_CANDIDATES) {
621
+ const normalized = normalizeSessionId(headers instanceof Headers ? headers.get(name) : headers[name]);
622
+ if (normalized) return normalized;
623
+ }
624
+ }
625
+ function resolveResponseSessionId(previousResponseId) {
626
+ const normalized = normalizeSessionId(previousResponseId);
627
+ if (!normalized) return void 0;
628
+ return historyIndexes.responseSessionIndex.get(normalized) ?? normalized;
629
+ }
630
+ function registerResponseSession(responseId, sessionId) {
631
+ const normalizedResponseId = normalizeSessionId(responseId);
632
+ const normalizedSessionId = normalizeSessionId(sessionId);
633
+ if (!normalizedResponseId || !normalizedSessionId) return;
634
+ historyIndexes.responseSessionIndex.set(normalizedResponseId, normalizedSessionId);
635
+ }
636
+ /**
637
+ * Get or create a tracked session when the caller has a real session identifier.
638
+ * Returns undefined when no trustworthy identifier is available.
639
+ */
640
+ function getCurrentSession(endpoint, sessionId) {
641
+ const normalized = normalizeSessionId(sessionId);
642
+ if (!normalized) return void 0;
643
+ ensureSession(normalized, endpoint);
644
+ return normalized;
645
+ }
646
+ function getSessions() {
647
+ const sessions = Array.from(historyState.sessions.values()).sort((a, b) => b.lastActivity - a.lastActivity);
648
+ return {
649
+ sessions,
650
+ total: sessions.length
651
+ };
652
+ }
653
+ function getSession(id) {
654
+ return historyState.sessions.get(id);
655
+ }
656
+ function getSessionEntries(sessionId, options = {}) {
657
+ const { cursor, limit = 50 } = options;
658
+ const all = historyState.entries.filter((entry) => entry.sessionId === sessionId).sort((a, b) => a.startedAt - b.startedAt);
659
+ const total = all.length;
660
+ let startIdx = 0;
661
+ if (cursor) {
662
+ const cursorIdx = all.findIndex((entry) => entry.id === cursor);
663
+ if (cursorIdx !== -1) startIdx = cursorIdx + 1;
664
+ }
665
+ const entries = all.slice(startIdx, startIdx + limit);
666
+ return {
667
+ entries,
668
+ total,
669
+ nextCursor: startIdx + limit < total ? entries.at(-1)?.id ?? null : null,
670
+ prevCursor: startIdx > 0 ? entries[0]?.id ?? null : null
671
+ };
672
+ }
673
+ function deleteSession(sessionId) {
674
+ if (!historyState.sessions.has(sessionId)) return false;
675
+ const remaining = [];
676
+ for (const entry of historyState.entries) if (entry.sessionId === sessionId) {
677
+ historyIndexes.entryIndex.delete(entry.id);
678
+ historyIndexes.summaryIndex.delete(entry.id);
679
+ } else remaining.push(entry);
680
+ historyState.entries = remaining;
681
+ historyState.sessions.delete(sessionId);
682
+ historyIndexes.sessionEntryCount.delete(sessionId);
683
+ historyIndexes.sessionModelsSet.delete(sessionId);
684
+ historyIndexes.sessionToolsSet.delete(sessionId);
685
+ for (const [responseId, mappedSessionId] of historyIndexes.responseSessionIndex) if (mappedSessionId === sessionId) historyIndexes.responseSessionIndex.delete(responseId);
686
+ invalidateHistoryStats();
687
+ if (historyState.currentSessionId === sessionId) historyState.currentSessionId = "";
688
+ notifySessionDeleted(sessionId);
689
+ notifyStatsUpdated(getStats());
690
+ return true;
691
+ }
692
+ //#endregion
515
693
  //#region src/lib/history/entries.ts
516
694
  /** Extract a preview from the last user message (first 100 chars) */
517
695
  function extractPreviewText(entry) {
@@ -542,9 +720,17 @@ function extractPreviewText(entry) {
542
720
  function toSummary(entry) {
543
721
  return {
544
722
  id: entry.id,
545
- sessionId: entry.sessionId,
546
- timestamp: entry.timestamp,
723
+ ...entry.sessionId ? { sessionId: entry.sessionId } : {},
724
+ rawPath: entry.rawPath,
725
+ startedAt: entry.startedAt,
726
+ endedAt: entry.endedAt,
547
727
  endpoint: entry.endpoint,
728
+ state: entry.state,
729
+ active: entry.active,
730
+ lastUpdatedAt: entry.lastUpdatedAt,
731
+ queueWaitMs: entry.queueWaitMs,
732
+ attemptCount: entry.attemptCount,
733
+ currentStrategy: entry.currentStrategy,
548
734
  requestModel: entry.request.model,
549
735
  stream: entry.request.stream,
550
736
  messageCount: entry.request.messages?.length ?? 0,
@@ -558,6 +744,7 @@ function toSummary(entry) {
558
744
  };
559
745
  }
560
746
  function updateSessionMetadata(entry) {
747
+ if (!entry.sessionId) return;
561
748
  const session = historyState.sessions.get(entry.sessionId);
562
749
  if (!session) return;
563
750
  const model = entry.request.model;
@@ -581,6 +768,30 @@ function updateSessionMetadata(entry) {
581
768
  }
582
769
  }
583
770
  }
771
+ function attachEntryToSession(entry) {
772
+ if (!entry.sessionId) return;
773
+ const sessionId = getCurrentSession(entry.endpoint, entry.sessionId);
774
+ if (!sessionId) return;
775
+ const session = historyState.sessions.get(sessionId);
776
+ if (!session) return;
777
+ entry.sessionId = sessionId;
778
+ session.requestCount++;
779
+ historyIndexes.sessionEntryCount.set(sessionId, (historyIndexes.sessionEntryCount.get(sessionId) ?? 0) + 1);
780
+ updateSessionMetadata(entry);
781
+ }
782
+ function detachEntryFromSession(entry) {
783
+ if (!entry.sessionId) return;
784
+ const sessionId = entry.sessionId;
785
+ const session = historyState.sessions.get(sessionId);
786
+ if (session) session.requestCount = Math.max(0, session.requestCount - 1);
787
+ const sessionCount = (historyIndexes.sessionEntryCount.get(sessionId) ?? 1) - 1;
788
+ if (sessionCount <= 0) {
789
+ historyIndexes.sessionEntryCount.delete(sessionId);
790
+ historyIndexes.sessionModelsSet.delete(sessionId);
791
+ historyIndexes.sessionToolsSet.delete(sessionId);
792
+ historyState.sessions.delete(sessionId);
793
+ } else historyIndexes.sessionEntryCount.set(sessionId, sessionCount);
794
+ }
584
795
  function removeOldestEntries(count) {
585
796
  if (count <= 0 || historyState.entries.length === 0) return 0;
586
797
  const actualCount = Math.min(count, historyState.entries.length);
@@ -588,13 +799,7 @@ function removeOldestEntries(count) {
588
799
  for (const entry of removed) {
589
800
  historyIndexes.entryIndex.delete(entry.id);
590
801
  historyIndexes.summaryIndex.delete(entry.id);
591
- const sessionCount = (historyIndexes.sessionEntryCount.get(entry.sessionId) ?? 1) - 1;
592
- if (sessionCount <= 0) {
593
- historyIndexes.sessionEntryCount.delete(entry.sessionId);
594
- historyIndexes.sessionModelsSet.delete(entry.sessionId);
595
- historyIndexes.sessionToolsSet.delete(entry.sessionId);
596
- historyState.sessions.delete(entry.sessionId);
597
- } else historyIndexes.sessionEntryCount.set(entry.sessionId, sessionCount);
802
+ detachEntryFromSession(entry);
598
803
  }
599
804
  if (removed.length > 0) invalidateHistoryStats();
600
805
  return removed.length;
@@ -606,13 +811,9 @@ function evictOldestEntries(count) {
606
811
  }
607
812
  function insertEntry(entry) {
608
813
  if (!historyState.enabled) return;
609
- const session = historyState.sessions.get(entry.sessionId);
610
- if (!session) return;
611
814
  historyState.entries.push(entry);
612
815
  historyIndexes.entryIndex.set(entry.id, entry);
613
- session.requestCount++;
614
- historyIndexes.sessionEntryCount.set(entry.sessionId, (historyIndexes.sessionEntryCount.get(entry.sessionId) ?? 0) + 1);
615
- updateSessionMetadata(entry);
816
+ attachEntryToSession(entry);
616
817
  const summary = toSummary(entry);
617
818
  historyIndexes.summaryIndex.set(entry.id, summary);
618
819
  if (historyState.maxEntries > 0 && historyState.entries.length > historyState.maxEntries) removeOldestEntries(historyState.entries.length - historyState.maxEntries);
@@ -624,19 +825,35 @@ function updateEntry(id, update) {
624
825
  if (!historyState.enabled) return;
625
826
  const entry = historyIndexes.entryIndex.get(id);
626
827
  if (!entry) return;
828
+ if (update.sessionId !== void 0 && update.sessionId !== entry.sessionId) {
829
+ detachEntryFromSession(entry);
830
+ entry.sessionId = update.sessionId;
831
+ attachEntryToSession(entry);
832
+ }
627
833
  if (update.request) {
628
834
  entry.request = update.request;
629
835
  updateSessionMetadata(entry);
630
836
  }
837
+ if (update.rawPath !== void 0) entry.rawPath = update.rawPath;
838
+ if (update.state !== void 0) entry.state = update.state;
839
+ if (update.active !== void 0) entry.active = update.active;
840
+ if (update.lastUpdatedAt !== void 0) entry.lastUpdatedAt = update.lastUpdatedAt;
841
+ if (update.queueWaitMs !== void 0) entry.queueWaitMs = update.queueWaitMs;
842
+ if (update.attemptCount !== void 0) entry.attemptCount = update.attemptCount;
843
+ if (update.currentStrategy !== void 0) entry.currentStrategy = update.currentStrategy;
631
844
  if (update.response) entry.response = update.response;
632
845
  if (update.pipelineInfo) entry.pipelineInfo = update.pipelineInfo;
633
846
  if (update.durationMs !== void 0) entry.durationMs = update.durationMs;
847
+ if (update.startedAt !== void 0) entry.startedAt = update.startedAt;
848
+ if (update.endedAt !== void 0) entry.endedAt = update.endedAt;
849
+ if (update.transport !== void 0) entry.transport = update.transport;
634
850
  if (update.sseEvents) entry.sseEvents = update.sseEvents;
635
851
  if (update.effectiveRequest) entry.effectiveRequest = update.effectiveRequest;
636
852
  if (update.wireRequest) entry.wireRequest = update.wireRequest;
637
853
  if (update.attempts) entry.attempts = update.attempts;
854
+ if (update.warningMessages) entry.warningMessages = update.warningMessages;
638
855
  if (update.response) {
639
- const session = historyState.sessions.get(entry.sessionId);
856
+ const session = entry.sessionId ? historyState.sessions.get(entry.sessionId) : void 0;
640
857
  if (session) {
641
858
  session.totalInputTokens += update.response.usage.input_tokens;
642
859
  session.totalOutputTokens += update.response.usage.output_tokens;
@@ -652,7 +869,7 @@ function updateEntry(id, update) {
652
869
  function clearHistory() {
653
870
  historyState.entries = [];
654
871
  historyState.sessions = /* @__PURE__ */ new Map();
655
- historyState.currentSessionId = generateId();
872
+ historyState.currentSessionId = "";
656
873
  resetHistoryIndexes();
657
874
  invalidateHistoryStats();
658
875
  notifyHistoryCleared();
@@ -673,13 +890,13 @@ function getHistorySummaries(options = {}) {
673
890
  }
674
891
  if (endpoint) summaries = summaries.filter((summary) => summary.endpoint === endpoint);
675
892
  if (success !== void 0) summaries = summaries.filter((summary) => summary.responseSuccess === success);
676
- if (from) summaries = summaries.filter((summary) => summary.timestamp >= from);
677
- if (to) summaries = summaries.filter((summary) => summary.timestamp <= to);
893
+ if (from) summaries = summaries.filter((summary) => summary.startedAt >= from);
894
+ if (to) summaries = summaries.filter((summary) => summary.startedAt <= to);
678
895
  if (search) {
679
896
  const needle = search.toLowerCase();
680
897
  summaries = summaries.filter((summary) => ensureSearchText(summary.id).includes(needle));
681
898
  }
682
- summaries.sort((a, b) => b.timestamp - a.timestamp || b.id.localeCompare(a.id));
899
+ summaries.sort((a, b) => b.startedAt - a.startedAt || b.id.localeCompare(a.id));
683
900
  const total = summaries.length;
684
901
  let startIdx = 0;
685
902
  if (cursor) {
@@ -695,85 +912,6 @@ function getHistorySummaries(options = {}) {
695
912
  };
696
913
  }
697
914
  //#endregion
698
- //#region src/lib/history/sessions.ts
699
- /**
700
- * Get or create current session.
701
- * Currently treats all requests as belonging to one session per server lifetime,
702
- * since clients don't provide session identifiers yet.
703
- * TODO: When clients support session headers, use that to group requests.
704
- */
705
- function getCurrentSession(endpoint) {
706
- if (historyState.currentSessionId) {
707
- const session = historyState.sessions.get(historyState.currentSessionId);
708
- if (session) {
709
- session.lastActivity = Date.now();
710
- if (!session.endpoints.includes(endpoint)) session.endpoints.push(endpoint);
711
- return historyState.currentSessionId;
712
- }
713
- }
714
- const now = Date.now();
715
- const sessionId = generateId();
716
- historyState.currentSessionId = sessionId;
717
- historyIndexes.sessionModelsSet.set(sessionId, /* @__PURE__ */ new Set());
718
- historyIndexes.sessionToolsSet.set(sessionId, /* @__PURE__ */ new Set());
719
- historyState.sessions.set(sessionId, {
720
- id: sessionId,
721
- startTime: now,
722
- lastActivity: now,
723
- requestCount: 0,
724
- totalInputTokens: 0,
725
- totalOutputTokens: 0,
726
- models: [],
727
- endpoints: [endpoint]
728
- });
729
- return sessionId;
730
- }
731
- function getSessions() {
732
- const sessions = Array.from(historyState.sessions.values()).sort((a, b) => b.lastActivity - a.lastActivity);
733
- return {
734
- sessions,
735
- total: sessions.length
736
- };
737
- }
738
- function getSession(id) {
739
- return historyState.sessions.get(id);
740
- }
741
- function getSessionEntries(sessionId, options = {}) {
742
- const { cursor, limit = 50 } = options;
743
- const all = historyState.entries.filter((entry) => entry.sessionId === sessionId).sort((a, b) => a.timestamp - b.timestamp);
744
- const total = all.length;
745
- let startIdx = 0;
746
- if (cursor) {
747
- const cursorIdx = all.findIndex((entry) => entry.id === cursor);
748
- if (cursorIdx !== -1) startIdx = cursorIdx + 1;
749
- }
750
- const entries = all.slice(startIdx, startIdx + limit);
751
- return {
752
- entries,
753
- total,
754
- nextCursor: startIdx + limit < total ? entries.at(-1)?.id ?? null : null,
755
- prevCursor: startIdx > 0 ? entries[0]?.id ?? null : null
756
- };
757
- }
758
- function deleteSession(sessionId) {
759
- if (!historyState.sessions.has(sessionId)) return false;
760
- const remaining = [];
761
- for (const entry of historyState.entries) if (entry.sessionId === sessionId) {
762
- historyIndexes.entryIndex.delete(entry.id);
763
- historyIndexes.summaryIndex.delete(entry.id);
764
- } else remaining.push(entry);
765
- historyState.entries = remaining;
766
- historyState.sessions.delete(sessionId);
767
- historyIndexes.sessionEntryCount.delete(sessionId);
768
- historyIndexes.sessionModelsSet.delete(sessionId);
769
- historyIndexes.sessionToolsSet.delete(sessionId);
770
- invalidateHistoryStats();
771
- if (historyState.currentSessionId === sessionId) historyState.currentSessionId = generateId();
772
- notifySessionDeleted(sessionId);
773
- notifyStatsUpdated(getStats());
774
- return true;
775
- }
776
- //#endregion
777
915
  //#region src/lib/history/memory-pressure.ts
778
916
  /**
779
917
  * Memory pressure monitor — proactively evicts old history entries
@@ -881,392 +1019,162 @@ function getMemoryPressureStats() {
881
1019
  };
882
1020
  }
883
1021
  //#endregion
884
- //#region src/lib/config/paths.ts
885
- const APP_DIR = path.join(os.homedir(), ".local", "share", "copilot-api");
886
- const PATHS = {
887
- APP_DIR,
888
- GITHUB_TOKEN_PATH: path.join(APP_DIR, "github_token"),
889
- CONFIG_YAML: path.join(APP_DIR, "config.yaml"),
890
- LEARNED_LIMITS: path.join(APP_DIR, "learned-limits.json"),
891
- ERROR_DIR: path.join(APP_DIR, "errmsgs")
892
- };
893
- async function ensurePaths() {
894
- await fs.mkdir(PATHS.APP_DIR, { recursive: true });
895
- await ensureFile(PATHS.GITHUB_TOKEN_PATH);
896
- }
897
- async function ensureFile(filePath) {
898
- const isWindows = process.platform === "win32";
899
- try {
900
- await fs.access(filePath, fs.constants.W_OK);
901
- if (!isWindows) {
902
- if (((await fs.stat(filePath)).mode & 511) !== 384) await fs.chmod(filePath, 384);
903
- }
904
- } catch {
905
- await fs.writeFile(filePath, "");
906
- if (!isWindows) await fs.chmod(filePath, 384);
907
- }
908
- }
909
- //#endregion
910
- //#region src/lib/config/config.ts
1022
+ //#region src/lib/copilot-api.ts
1023
+ const standardHeaders = () => ({
1024
+ "content-type": "application/json",
1025
+ accept: "application/json"
1026
+ });
1027
+ const COPILOT_VERSION = "0.38.0";
1028
+ const EDITOR_PLUGIN_VERSION = `copilot-chat/${COPILOT_VERSION}`;
1029
+ const USER_AGENT = `GitHubCopilotChat/${COPILOT_VERSION}`;
1030
+ /** Copilot Chat API version (for chat/completions requests) */
1031
+ const COPILOT_API_VERSION = "2025-05-01";
1032
+ /** Copilot internal API version (for token & usage endpoints) */
1033
+ const COPILOT_INTERNAL_API_VERSION = "2025-04-01";
1034
+ /** GitHub public API version (for /user, repos, etc.) */
1035
+ const GITHUB_API_VERSION = "2022-11-28";
911
1036
  /**
912
- * Application configuration: types, YAML loading, and state application.
913
- *
914
- * All config types live here as the single source of truth.
915
- * config.yaml is loaded with mtime-based caching.
1037
+ * Session-level interaction ID.
1038
+ * Used to correlate all requests within a single server session.
1039
+ * Unlike x-request-id (per-request UUID), this stays constant for the server lifetime.
916
1040
  */
917
- /** Compile a raw rewrite rule into a CompiledRewriteRule. Returns null for invalid regex. */
918
- function compileRewriteRule(raw) {
919
- const method = raw.method ?? "regex";
920
- let modelPattern;
921
- if (raw.model) try {
922
- modelPattern = new RegExp(raw.model, "i");
923
- } catch (err) {
924
- consola.warn(`[config] Invalid model regex in rewrite rule: "${raw.model}"`, err);
925
- return null;
926
- }
927
- if (method === "line") return {
928
- from: raw.from,
929
- to: raw.to,
930
- method,
931
- modelPattern
1041
+ const INTERACTION_ID = randomUUID();
1042
+ const copilotBaseUrl = (state) => state.accountType === "individual" ? "https://api.githubcopilot.com" : `https://api.${state.accountType}.githubcopilot.com`;
1043
+ const copilotWsUrl = (state) => copilotBaseUrl(state).replace(/^https:\/\//u, "wss://") + "/responses";
1044
+ const copilotHeaders = (state, opts) => {
1045
+ const requestId = randomUUID();
1046
+ const interactionType = opts?.intent ?? "conversation-panel";
1047
+ const headers = {
1048
+ Authorization: `Bearer ${state.copilotToken}`,
1049
+ "content-type": standardHeaders()["content-type"],
1050
+ "copilot-integration-id": "vscode-chat",
1051
+ "editor-version": `vscode/${state.vsCodeVersion}`,
1052
+ "editor-plugin-version": EDITOR_PLUGIN_VERSION,
1053
+ "user-agent": USER_AGENT,
1054
+ "openai-intent": interactionType,
1055
+ "x-github-api-version": COPILOT_API_VERSION,
1056
+ "x-request-id": requestId,
1057
+ "X-Interaction-Id": INTERACTION_ID,
1058
+ "X-Interaction-Type": interactionType,
1059
+ "X-Agent-Task-Id": requestId,
1060
+ "x-vscode-user-agent-library-version": "electron-fetch"
932
1061
  };
933
- try {
934
- let pattern = raw.from;
935
- let flags = "gms";
936
- const inlineMatch = pattern.match(/^\(\?([a-z]+)\)/i);
937
- if (inlineMatch) {
938
- pattern = pattern.slice(inlineMatch[0].length);
939
- for (const f of inlineMatch[1]) if (!flags.includes(f)) flags += f;
940
- }
941
- return {
942
- from: new RegExp(pattern, flags),
943
- to: raw.to,
944
- method,
945
- modelPattern
946
- };
947
- } catch (err) {
948
- consola.warn(`[config] Invalid regex in rewrite rule: "${raw.from}"`, err);
949
- return null;
1062
+ if (opts?.vision) headers["copilot-vision-request"] = "true";
1063
+ if (opts?.modelRequestHeaders) {
1064
+ const coreKeysLower = new Set(Object.keys(headers).map((k) => k.toLowerCase()));
1065
+ for (const [key, value] of Object.entries(opts.modelRequestHeaders)) if (!coreKeysLower.has(key.toLowerCase())) headers[key] = value;
950
1066
  }
1067
+ return headers;
1068
+ };
1069
+ const GITHUB_API_BASE_URL = "https://api.github.com";
1070
+ const githubHeaders = (state) => ({
1071
+ ...standardHeaders(),
1072
+ authorization: `token ${state.githubToken}`,
1073
+ "editor-version": `vscode/${state.vsCodeVersion}`,
1074
+ "editor-plugin-version": EDITOR_PLUGIN_VERSION,
1075
+ "user-agent": USER_AGENT,
1076
+ "x-github-api-version": GITHUB_API_VERSION,
1077
+ "x-vscode-user-agent-library-version": "electron-fetch"
1078
+ });
1079
+ const GITHUB_BASE_URL = "https://github.com";
1080
+ const GITHUB_CLIENT_ID = "Iv1.b507a08c87ecfe98";
1081
+ ["read:user"].join(" ");
1082
+ /** Fallback VSCode version when GitHub API is unavailable */
1083
+ const VSCODE_VERSION_FALLBACK = "1.104.3";
1084
+ /** GitHub API endpoint for latest VSCode release */
1085
+ const VSCODE_RELEASE_URL = "https://api.github.com/repos/microsoft/vscode/releases/latest";
1086
+ /** Fetch the latest VSCode version and cache in global state */
1087
+ async function cacheVSCodeVersion() {
1088
+ setVSCodeVersion(await getVSCodeVersion());
951
1089
  }
952
- /** Compile an array of raw rewrite rules, skipping invalid ones */
953
- function compileRewriteRules(raws) {
954
- return raws.map((r) => compileRewriteRule(r)).filter((r) => r !== null);
955
- }
956
- let cachedConfig = null;
957
- let configLastMtimeMs = 0;
958
- /** Time-based debounce: skip stat() if checked recently */
959
- let lastStatTimeMs = 0;
960
- const STAT_DEBOUNCE_MS = 2e3;
961
- async function loadConfig() {
1090
+ /** Fetch the latest VSCode version from GitHub releases, falling back to a hardcoded version */
1091
+ async function getVSCodeVersion() {
1092
+ const controller = new AbortController();
1093
+ const timeout = setTimeout(() => {
1094
+ controller.abort();
1095
+ }, 5e3);
962
1096
  try {
963
- const now = Date.now();
964
- if (cachedConfig && now - lastStatTimeMs < STAT_DEBOUNCE_MS) return cachedConfig;
965
- const stat = await fs.stat(PATHS.CONFIG_YAML);
966
- lastStatTimeMs = now;
967
- if (cachedConfig && stat.mtimeMs === configLastMtimeMs) return cachedConfig;
968
- const content = await fs.readFile(PATHS.CONFIG_YAML, "utf8");
969
- const { parse } = await import("yaml");
970
- cachedConfig = parse(content) ?? {};
971
- configLastMtimeMs = stat.mtimeMs;
972
- return cachedConfig;
973
- } catch (err) {
974
- if (err.code === "ENOENT") return {};
975
- try {
976
- configLastMtimeMs = (await fs.stat(PATHS.CONFIG_YAML)).mtimeMs;
977
- } catch {}
978
- consola.warn("[config] Failed to load config.yaml:", err);
979
- return {};
1097
+ const response = await fetch(VSCODE_RELEASE_URL, {
1098
+ signal: controller.signal,
1099
+ headers: {
1100
+ Accept: "application/vnd.github.v3+json",
1101
+ "User-Agent": "copilot-api"
1102
+ }
1103
+ });
1104
+ if (!response.ok) return VSCODE_VERSION_FALLBACK;
1105
+ const version = (await response.json()).tag_name;
1106
+ if (version && /^\d+\.\d+\.\d+$/.test(version)) return version;
1107
+ return VSCODE_VERSION_FALLBACK;
1108
+ } catch {
1109
+ return VSCODE_VERSION_FALLBACK;
1110
+ } finally {
1111
+ clearTimeout(timeout);
980
1112
  }
981
1113
  }
982
- /** Get the mtime of the currently cached config (0 if not loaded) */
983
- function getConfigMtimeMs() {
984
- return configLastMtimeMs;
985
- }
986
- let hasApplied = false;
987
- let lastAppliedMtimeMs = 0;
988
- /**
989
- * Load config.yaml and apply all hot-reloadable settings to global state.
990
- *
991
- * Scalar fields: only overridden when explicitly present in config (deleted keys keep current runtime value).
992
- * Collection fields (model_overrides, rewrite_system_reminders array): entire replacement when present.
993
- *
994
- * Safe to call per-request — loadConfig() is mtime-cached, so unchanged config
995
- * only costs one stat() syscall.
996
- *
997
- * NOT hot-reloaded: rate_limiter (stateful singleton initialized at startup).
998
- */
999
- async function applyConfigToState() {
1000
- const config = await loadConfig();
1001
- if (config.anthropic) {
1002
- const a = config.anthropic;
1003
- if (a.strip_server_tools !== void 0) setAnthropicBehavior({ stripServerTools: a.strip_server_tools });
1004
- if (a.immutable_thinking_messages !== void 0) setAnthropicBehavior({ immutableThinkingMessages: a.immutable_thinking_messages });
1005
- if (a.dedup_tool_calls !== void 0) setAnthropicBehavior({ dedupToolCalls: a.dedup_tool_calls === true ? "input" : a.dedup_tool_calls });
1006
- if (a.strip_read_tool_result_tags !== void 0) setAnthropicBehavior({ stripReadToolResultTags: a.strip_read_tool_result_tags });
1007
- if (a.context_editing !== void 0) setAnthropicBehavior({ contextEditingMode: a.context_editing });
1008
- if (a.rewrite_system_reminders !== void 0) {
1009
- if (typeof a.rewrite_system_reminders === "boolean") setAnthropicBehavior({ rewriteSystemReminders: a.rewrite_system_reminders });
1010
- else if (Array.isArray(a.rewrite_system_reminders)) setAnthropicBehavior({ rewriteSystemReminders: compileRewriteRules(a.rewrite_system_reminders) });
1011
- }
1012
- }
1013
- if (Array.isArray(config.system_prompt_overrides)) setAnthropicBehavior({ systemPromptOverrides: config.system_prompt_overrides.length > 0 ? compileRewriteRules(config.system_prompt_overrides) : [] });
1014
- if (config.model_overrides) setModelOverrides({
1015
- ...DEFAULT_MODEL_OVERRIDES,
1016
- ...config.model_overrides
1017
- });
1018
- if (config.compress_tool_results_before_truncate !== void 0) setAnthropicBehavior({ compressToolResultsBeforeTruncate: config.compress_tool_results_before_truncate });
1019
- if (config.history) {
1020
- const h = config.history;
1021
- if (h.limit !== void 0) {
1022
- setHistoryConfig({ historyLimit: h.limit });
1023
- setHistoryMaxEntries(h.limit);
1024
- }
1025
- if (h.min_entries !== void 0) setHistoryConfig({ historyMinEntries: h.min_entries });
1114
+ //#endregion
1115
+ //#region src/lib/error/http-error.ts
1116
+ var HTTPError = class HTTPError extends Error {
1117
+ status;
1118
+ responseText;
1119
+ /** Model ID that caused the error (if known) */
1120
+ modelId;
1121
+ /** Original response headers (for Retry-After, quota snapshots, etc.) */
1122
+ responseHeaders;
1123
+ constructor(message, status, responseText, modelId, responseHeaders) {
1124
+ super(message);
1125
+ this.status = status;
1126
+ this.responseText = responseText;
1127
+ this.modelId = modelId;
1128
+ this.responseHeaders = responseHeaders;
1026
1129
  }
1027
- if (config.shutdown) {
1028
- const s = config.shutdown;
1029
- if (s.graceful_wait !== void 0) setShutdownConfig({ shutdownGracefulWait: s.graceful_wait });
1030
- if (s.abort_wait !== void 0) setShutdownConfig({ shutdownAbortWait: s.abort_wait });
1130
+ static async fromResponse(message, response, modelId) {
1131
+ const text = await response.text();
1132
+ return new HTTPError(message, response.status, text, modelId, response.headers);
1031
1133
  }
1032
- if (config.fetch_timeout !== void 0) setTimeoutConfig({ fetchTimeout: config.fetch_timeout });
1033
- if (config.stream_idle_timeout !== void 0) setTimeoutConfig({ streamIdleTimeout: config.stream_idle_timeout });
1034
- if (config.stale_request_max_age !== void 0) setTimeoutConfig({ staleRequestMaxAge: config.stale_request_max_age });
1035
- const responsesConfig = config["openai-responses"];
1036
- if (responsesConfig && responsesConfig.normalize_call_ids !== void 0) setResponsesConfig({ normalizeResponsesCallIds: responsesConfig.normalize_call_ids });
1037
- const currentMtime = getConfigMtimeMs();
1038
- if (hasApplied && currentMtime !== lastAppliedMtimeMs) consola.info("[config] Reloaded config.yaml");
1039
- hasApplied = true;
1040
- lastAppliedMtimeMs = currentMtime;
1041
- return config;
1042
- }
1134
+ };
1043
1135
  //#endregion
1044
- //#region src/lib/proxy.ts
1045
- /**
1046
- * Proxy configuration: HTTP/HTTPS and SOCKS5/5h proxy support.
1047
- *
1048
- * Priority: explicit proxy URL (CLI --proxy or config.yaml) > env vars (--http-proxy-from-env).
1049
- * On Node.js, proxying works via undici's global dispatcher.
1050
- * On Bun, HTTP proxies are set via env vars (Bun handles them natively); SOCKS5 is not supported.
1051
- */
1052
- /**
1053
- * Initialize proxy for all outgoing fetch requests.
1054
- *
1055
- * On Node.js: sets undici's global dispatcher.
1056
- * On Bun: sets process.env.HTTP_PROXY/HTTPS_PROXY for HTTP proxies (Bun handles natively).
1057
- *
1058
- * Must be called before any network requests.
1059
- */
1060
- function initProxy(options) {
1061
- if (typeof Bun !== "undefined") {
1062
- initProxyBun(options);
1063
- return;
1064
- }
1065
- initProxyNode(options);
1136
+ //#region src/lib/error/parsing.ts
1137
+ /** Parse token limit info from error message text. */
1138
+ function parseTokenLimitError(message) {
1139
+ const openaiMatch = message.match(/prompt token count of (\d+) exceeds the limit of (\d+)/);
1140
+ if (openaiMatch) return {
1141
+ current: Number.parseInt(openaiMatch[1], 10),
1142
+ limit: Number.parseInt(openaiMatch[2], 10)
1143
+ };
1144
+ const anthropicMatch = message.match(/prompt is too long: (\d+) tokens > (\d+) maximum/);
1145
+ if (anthropicMatch) return {
1146
+ current: Number.parseInt(anthropicMatch[1], 10),
1147
+ limit: Number.parseInt(anthropicMatch[2], 10)
1148
+ };
1149
+ return null;
1066
1150
  }
1067
- /** Format a proxy URL for display (strip credentials) */
1068
- function formatProxyDisplay(proxyUrl) {
1151
+ /** Extract retry_after from JSON response body. */
1152
+ function extractRetryAfterFromBody(responseText) {
1069
1153
  try {
1070
- const u = new URL(proxyUrl);
1071
- const auth = u.username ? `${u.username}:***@` : "";
1072
- return `${u.protocol}//${auth}${u.host}`;
1073
- } catch {
1074
- return proxyUrl;
1075
- }
1154
+ const parsed = JSON.parse(responseText);
1155
+ if (parsed && typeof parsed === "object") {
1156
+ if ("retry_after" in parsed && typeof parsed.retry_after === "number") return parsed.retry_after;
1157
+ if ("error" in parsed) {
1158
+ const err = parsed.error;
1159
+ if (err && typeof err === "object" && "retry_after" in err && typeof err.retry_after === "number") return err.retry_after;
1160
+ }
1161
+ }
1162
+ } catch {}
1076
1163
  }
1077
- function initProxyNode(options) {
1164
+ /** Check if a 503 response body indicates upstream provider rate limiting. */
1165
+ function isUpstreamRateLimited(responseText) {
1078
1166
  try {
1079
- if (options.url) {
1080
- setGlobalDispatcher(createDispatcherForUrl(options.url));
1081
- consola.debug(`Proxy configured: ${formatProxyDisplay(options.url)}`);
1082
- return;
1083
- }
1084
- if (options.fromEnv) {
1085
- setGlobalDispatcher(new EnvProxyDispatcher());
1086
- consola.debug("HTTP proxy configured from environment (per-URL)");
1087
- }
1088
- } catch (err) {
1089
- consola.error("Proxy setup failed:", err);
1090
- throw err;
1091
- }
1092
- }
1093
- /** Create the appropriate undici dispatcher for a proxy URL scheme */
1094
- function createDispatcherForUrl(proxyUrl) {
1095
- const url = new URL(proxyUrl);
1096
- const protocol = url.protocol.toLowerCase();
1097
- if (protocol === "http:" || protocol === "https:") return new ProxyAgent(proxyUrl);
1098
- if (protocol === "socks5:" || protocol === "socks5h:") return createSocksAgent(url);
1099
- throw new Error(`Unsupported proxy protocol: ${protocol}. Supported: http, https, socks5, socks5h`);
1100
- }
1101
- /**
1102
- * Create an undici Agent that routes connections through a SOCKS5/5h proxy.
1103
- *
1104
- * For socks5h:// the proxy performs DNS resolution (hostname passed as-is).
1105
- * For socks5:// the hostname is also passed to the proxy (proxy resolves).
1106
- * Both protocols support username/password authentication via URL credentials.
1107
- */
1108
- function createSocksAgent(proxyUrl) {
1109
- const proxy = {
1110
- host: proxyUrl.hostname,
1111
- port: Number(proxyUrl.port) || 1080,
1112
- type: 5
1113
- };
1114
- if (proxyUrl.username) {
1115
- proxy.userId = decodeURIComponent(proxyUrl.username);
1116
- proxy.password = proxyUrl.password ? decodeURIComponent(proxyUrl.password) : void 0;
1117
- }
1118
- return new Agent({ connect(opts, callback) {
1119
- const destPort = Number(opts.port) || (opts.protocol === "https:" ? 443 : 80);
1120
- SocksClient.createConnection({
1121
- proxy,
1122
- command: "connect",
1123
- destination: {
1124
- host: opts.hostname,
1125
- port: destPort
1126
- }
1127
- }).then(({ socket }) => {
1128
- if (opts.protocol === "https:") callback(null, tls.connect({
1129
- socket,
1130
- servername: opts.servername ?? opts.hostname
1131
- }));
1132
- else callback(null, socket);
1133
- }).catch((err) => {
1134
- callback(err instanceof Error ? err : new Error(String(err)), null);
1135
- });
1136
- } });
1137
- }
1138
- /**
1139
- * Custom dispatcher that routes requests through proxies based on environment variables.
1140
- * Uses proxy-from-env to resolve HTTP_PROXY/HTTPS_PROXY/NO_PROXY per-URL.
1141
- */
1142
- var EnvProxyDispatcher = class extends Agent {
1143
- proxies = /* @__PURE__ */ new Map();
1144
- dispatch(options, handler) {
1145
- try {
1146
- const origin = this.getOriginUrl(options.origin);
1147
- const proxyUrl = this.getProxyUrl(origin);
1148
- if (!proxyUrl) {
1149
- consola.debug(`HTTP proxy bypass: ${origin.hostname}`);
1150
- return super.dispatch(options, handler);
1151
- }
1152
- const agent = this.getOrCreateProxyAgent(proxyUrl);
1153
- consola.debug(`HTTP proxy route: ${origin.hostname} via ${formatProxyDisplay(proxyUrl)}`);
1154
- return agent.dispatch(options, handler);
1155
- } catch {
1156
- return super.dispatch(options, handler);
1157
- }
1158
- }
1159
- getOriginUrl(origin) {
1160
- return typeof origin === "string" ? new URL(origin) : origin;
1161
- }
1162
- getProxyUrl(origin) {
1163
- const raw = getProxyForUrl(origin.toString());
1164
- return raw && raw.length > 0 ? raw : void 0;
1165
- }
1166
- getOrCreateProxyAgent(proxyUrl) {
1167
- let agent = this.proxies.get(proxyUrl);
1168
- if (!agent) {
1169
- agent = new ProxyAgent(proxyUrl);
1170
- this.proxies.set(proxyUrl, agent);
1171
- }
1172
- return agent;
1173
- }
1174
- async close() {
1175
- await super.close();
1176
- await Promise.all([...this.proxies.values()].map((p) => p.close()));
1177
- this.proxies.clear();
1178
- }
1179
- destroy(errOrCallback, callback) {
1180
- for (const agent of this.proxies.values()) if (typeof errOrCallback === "function") agent.destroy(errOrCallback);
1181
- else if (callback) agent.destroy(errOrCallback ?? null, callback);
1182
- else agent.destroy(errOrCallback ?? null).catch(() => {});
1183
- this.proxies.clear();
1184
- if (typeof errOrCallback === "function") {
1185
- super.destroy(errOrCallback);
1186
- return;
1187
- } else if (callback) {
1188
- super.destroy(errOrCallback ?? null, callback);
1189
- return;
1190
- } else return super.destroy(errOrCallback ?? null);
1191
- }
1192
- };
1193
- /**
1194
- * Initialize proxy for Bun runtime.
1195
- * Bun handles HTTP_PROXY/HTTPS_PROXY env vars natively.
1196
- * SOCKS5 proxies are not supported on Bun.
1197
- */
1198
- function initProxyBun(options) {
1199
- if (!options.url) return;
1200
- const protocol = new URL(options.url).protocol.toLowerCase();
1201
- if (protocol === "socks5:" || protocol === "socks5h:") throw new Error("SOCKS5 proxy is not supported on Bun runtime. Use Node.js or an HTTP proxy instead.");
1202
- process.env.HTTP_PROXY = options.url;
1203
- process.env.HTTPS_PROXY = options.url;
1204
- consola.debug(`Proxy configured (Bun env): ${formatProxyDisplay(options.url)}`);
1205
- }
1206
- //#endregion
1207
- //#region src/lib/error/http-error.ts
1208
- var HTTPError = class HTTPError extends Error {
1209
- status;
1210
- responseText;
1211
- /** Model ID that caused the error (if known) */
1212
- modelId;
1213
- /** Original response headers (for Retry-After, quota snapshots, etc.) */
1214
- responseHeaders;
1215
- constructor(message, status, responseText, modelId, responseHeaders) {
1216
- super(message);
1217
- this.status = status;
1218
- this.responseText = responseText;
1219
- this.modelId = modelId;
1220
- this.responseHeaders = responseHeaders;
1221
- }
1222
- static async fromResponse(message, response, modelId) {
1223
- const text = await response.text();
1224
- return new HTTPError(message, response.status, text, modelId, response.headers);
1225
- }
1226
- };
1227
- //#endregion
1228
- //#region src/lib/error/parsing.ts
1229
- /** Parse token limit info from error message text. */
1230
- function parseTokenLimitError(message) {
1231
- const openaiMatch = message.match(/prompt token count of (\d+) exceeds the limit of (\d+)/);
1232
- if (openaiMatch) return {
1233
- current: Number.parseInt(openaiMatch[1], 10),
1234
- limit: Number.parseInt(openaiMatch[2], 10)
1235
- };
1236
- const anthropicMatch = message.match(/prompt is too long: (\d+) tokens > (\d+) maximum/);
1237
- if (anthropicMatch) return {
1238
- current: Number.parseInt(anthropicMatch[1], 10),
1239
- limit: Number.parseInt(anthropicMatch[2], 10)
1240
- };
1241
- return null;
1242
- }
1243
- /** Extract retry_after from JSON response body. */
1244
- function extractRetryAfterFromBody(responseText) {
1245
- try {
1246
- const parsed = JSON.parse(responseText);
1247
- if (parsed && typeof parsed === "object") {
1248
- if ("retry_after" in parsed && typeof parsed.retry_after === "number") return parsed.retry_after;
1249
- if ("error" in parsed) {
1250
- const err = parsed.error;
1251
- if (err && typeof err === "object" && "retry_after" in err && typeof err.retry_after === "number") return err.retry_after;
1252
- }
1253
- }
1254
- } catch {}
1255
- }
1256
- /** Check if a 503 response body indicates upstream provider rate limiting. */
1257
- function isUpstreamRateLimited(responseText) {
1258
- try {
1259
- const parsed = JSON.parse(responseText);
1260
- if (parsed && typeof parsed === "object" && "error" in parsed) {
1261
- const err = parsed.error;
1262
- if (err && typeof err === "object") {
1263
- const errObj = err;
1264
- if (typeof errObj.code === "string" && errObj.code.includes("rate")) return true;
1265
- if (typeof errObj.message === "string") {
1266
- const msg = errObj.message.toLowerCase();
1267
- if (msg.includes("rate limit") || msg.includes("too many requests") || msg.includes("quota")) return true;
1268
- }
1269
- }
1167
+ const parsed = JSON.parse(responseText);
1168
+ if (parsed && typeof parsed === "object" && "error" in parsed) {
1169
+ const err = parsed.error;
1170
+ if (err && typeof err === "object") {
1171
+ const errObj = err;
1172
+ if (typeof errObj.code === "string" && errObj.code.includes("rate")) return true;
1173
+ if (typeof errObj.message === "string") {
1174
+ const msg = errObj.message.toLowerCase();
1175
+ if (msg.includes("rate limit") || msg.includes("too many requests") || msg.includes("quota")) return true;
1176
+ }
1177
+ }
1270
1178
  }
1271
1179
  } catch {
1272
1180
  const lower = responseText.toLowerCase();
@@ -1625,92 +1533,441 @@ function truncateForLog(text, maxLen) {
1625
1533
  return `${text.slice(0, maxLen)}… (${text.length} bytes total)`;
1626
1534
  }
1627
1535
  //#endregion
1628
- //#region src/lib/copilot-api.ts
1629
- const standardHeaders = () => ({
1630
- "content-type": "application/json",
1631
- accept: "application/json"
1632
- });
1633
- const COPILOT_VERSION = "0.38.0";
1634
- const EDITOR_PLUGIN_VERSION = `copilot-chat/${COPILOT_VERSION}`;
1635
- const USER_AGENT = `GitHubCopilotChat/${COPILOT_VERSION}`;
1636
- /** Copilot Chat API version (for chat/completions requests) */
1637
- const COPILOT_API_VERSION = "2025-05-01";
1638
- /** Copilot internal API version (for token & usage endpoints) */
1639
- const COPILOT_INTERNAL_API_VERSION = "2025-04-01";
1640
- /** GitHub public API version (for /user, repos, etc.) */
1641
- const GITHUB_API_VERSION = "2022-11-28";
1536
+ //#region src/lib/fetch-utils.ts
1537
+ const SENSITIVE_HEADER_NAMES = new Set([
1538
+ "authorization",
1539
+ "proxy-authorization",
1540
+ "x-api-key",
1541
+ "api-key"
1542
+ ]);
1642
1543
  /**
1643
- * Session-level interaction ID.
1644
- * Used to correlate all requests within a single server session.
1645
- * Unlike x-request-id (per-request UUID), this stays constant for the server lifetime.
1544
+ * Create an AbortSignal for fetch timeout if configured.
1545
+ * Controls the time from request start to receiving response headers.
1546
+ * Returns undefined if fetchTimeout is 0 (disabled).
1646
1547
  */
1647
- const INTERACTION_ID = randomUUID();
1648
- const copilotBaseUrl = (state) => state.accountType === "individual" ? "https://api.githubcopilot.com" : `https://api.${state.accountType}.githubcopilot.com`;
1649
- const copilotHeaders = (state, opts) => {
1650
- const headers = {
1651
- Authorization: `Bearer ${state.copilotToken}`,
1652
- "content-type": standardHeaders()["content-type"],
1653
- "copilot-integration-id": "vscode-chat",
1654
- "editor-version": `vscode/${state.vsCodeVersion}`,
1655
- "editor-plugin-version": EDITOR_PLUGIN_VERSION,
1656
- "user-agent": USER_AGENT,
1657
- "openai-intent": opts?.intent ?? "conversation-panel",
1658
- "x-github-api-version": COPILOT_API_VERSION,
1659
- "x-request-id": randomUUID(),
1660
- "X-Interaction-Id": INTERACTION_ID,
1661
- "x-vscode-user-agent-library-version": "electron-fetch"
1662
- };
1663
- if (opts?.vision) headers["copilot-vision-request"] = "true";
1664
- if (opts?.modelRequestHeaders) {
1665
- const coreKeysLower = new Set(Object.keys(headers).map((k) => k.toLowerCase()));
1666
- for (const [key, value] of Object.entries(opts.modelRequestHeaders)) if (!coreKeysLower.has(key.toLowerCase())) headers[key] = value;
1667
- }
1668
- return headers;
1669
- };
1670
- const GITHUB_API_BASE_URL = "https://api.github.com";
1671
- const githubHeaders = (state) => ({
1672
- ...standardHeaders(),
1673
- authorization: `token ${state.githubToken}`,
1674
- "editor-version": `vscode/${state.vsCodeVersion}`,
1675
- "editor-plugin-version": EDITOR_PLUGIN_VERSION,
1676
- "user-agent": USER_AGENT,
1677
- "x-github-api-version": GITHUB_API_VERSION,
1678
- "x-vscode-user-agent-library-version": "electron-fetch"
1679
- });
1680
- const GITHUB_BASE_URL = "https://github.com";
1681
- const GITHUB_CLIENT_ID = "Iv1.b507a08c87ecfe98";
1682
- ["read:user"].join(" ");
1683
- /** Fallback VSCode version when GitHub API is unavailable */
1684
- const VSCODE_VERSION_FALLBACK = "1.104.3";
1685
- /** GitHub API endpoint for latest VSCode release */
1686
- const VSCODE_RELEASE_URL = "https://api.github.com/repos/microsoft/vscode/releases/latest";
1687
- /** Fetch the latest VSCode version and cache in global state */
1688
- async function cacheVSCodeVersion() {
1689
- setVSCodeVersion(await getVSCodeVersion());
1548
+ function createFetchSignal() {
1549
+ return state.fetchTimeout > 0 ? AbortSignal.timeout(state.fetchTimeout * 1e3) : void 0;
1690
1550
  }
1691
- /** Fetch the latest VSCode version from GitHub releases, falling back to a hardcoded version */
1692
- async function getVSCodeVersion() {
1693
- const controller = new AbortController();
1694
- const timeout = setTimeout(() => {
1695
- controller.abort();
1696
- }, 5e3);
1551
+ /**
1552
+ * Populate a HeadersCapture object with request and response headers.
1553
+ * Should be called immediately after fetch(), before !response.ok check,
1554
+ * so headers are captured even for error responses.
1555
+ */
1556
+ function captureHttpHeaders(capture, requestHeaders, response) {
1557
+ capture.request = sanitizeHeadersForHistory(requestHeaders);
1558
+ capture.response = Object.fromEntries(response.headers.entries());
1559
+ }
1560
+ /** Return a copy of headers safe to persist in history/error artifacts. */
1561
+ function sanitizeHeadersForHistory(headers) {
1562
+ return Object.fromEntries(Object.entries(headers).map(([name, value]) => [name, SENSITIVE_HEADER_NAMES.has(name.toLowerCase()) ? "***" : value]));
1563
+ }
1564
+ //#endregion
1565
+ //#region src/lib/models/client.ts
1566
+ /** Fetch models from Copilot API and cache in global state */
1567
+ async function cacheModels() {
1568
+ setModels(await getModels());
1569
+ }
1570
+ const getModels = async () => {
1571
+ const response = await fetch(`${copilotBaseUrl(state)}/models`, {
1572
+ headers: copilotHeaders(state),
1573
+ signal: createFetchSignal()
1574
+ });
1575
+ if (!response.ok) throw await HTTPError.fromResponse("Failed to get models", response);
1576
+ return await response.json();
1577
+ };
1578
+ //#endregion
1579
+ //#region src/lib/models/refresh-loop.ts
1580
+ let refreshTimer = null;
1581
+ let refreshLoopRunning = false;
1582
+ let refreshIntervalSeconds = state.modelRefreshInterval;
1583
+ let refreshModelsImpl = cacheModels;
1584
+ function clearRefreshTimer() {
1585
+ if (refreshTimer) {
1586
+ clearTimeout(refreshTimer);
1587
+ refreshTimer = null;
1588
+ }
1589
+ }
1590
+ function logRefreshFailure(error) {
1591
+ const message = error instanceof Error ? error.message : String(error);
1592
+ if (state.models?.data.length) {
1593
+ consola.warn(`[Models] Periodic refresh failed, keeping cached models: ${message}`);
1594
+ return;
1595
+ }
1596
+ consola.error(`[Models] Periodic refresh failed with no cached models: ${message}`);
1597
+ }
1598
+ function scheduleNextRefresh() {
1599
+ clearRefreshTimer();
1600
+ if (!refreshLoopRunning || refreshIntervalSeconds <= 0) return;
1601
+ refreshTimer = setTimeout(() => {
1602
+ refreshModelsImpl().catch(logRefreshFailure).finally(() => {
1603
+ scheduleNextRefresh();
1604
+ });
1605
+ }, refreshIntervalSeconds * 1e3);
1606
+ }
1607
+ function startModelRefreshLoop(refreshModels = cacheModels) {
1608
+ refreshLoopRunning = true;
1609
+ refreshModelsImpl = refreshModels;
1610
+ refreshIntervalSeconds = state.modelRefreshInterval;
1611
+ scheduleNextRefresh();
1612
+ return () => {
1613
+ refreshLoopRunning = false;
1614
+ clearRefreshTimer();
1615
+ };
1616
+ }
1617
+ function syncModelRefreshLoop(intervalSeconds = state.modelRefreshInterval) {
1618
+ refreshIntervalSeconds = intervalSeconds;
1619
+ if (!refreshLoopRunning) return;
1620
+ scheduleNextRefresh();
1621
+ }
1622
+ //#endregion
1623
+ //#region src/lib/config/paths.ts
1624
+ const APP_DIR = path.join(os.homedir(), ".local", "share", "copilot-api");
1625
+ const PATHS = {
1626
+ APP_DIR,
1627
+ GITHUB_TOKEN_PATH: path.join(APP_DIR, "github_token"),
1628
+ CONFIG_YAML: path.join(APP_DIR, "config.yaml"),
1629
+ LEARNED_LIMITS: path.join(APP_DIR, "learned-limits.json"),
1630
+ REQUEST_TELEMETRY: path.join(APP_DIR, "request-telemetry.json"),
1631
+ ERROR_DIR: path.join(APP_DIR, "errmsgs")
1632
+ };
1633
+ async function ensurePaths() {
1634
+ await fs.mkdir(PATHS.APP_DIR, { recursive: true });
1635
+ await ensureFile(PATHS.GITHUB_TOKEN_PATH);
1636
+ }
1637
+ async function ensureFile(filePath) {
1638
+ const isWindows = process.platform === "win32";
1697
1639
  try {
1698
- const response = await fetch(VSCODE_RELEASE_URL, {
1699
- signal: controller.signal,
1700
- headers: {
1701
- Accept: "application/vnd.github.v3+json",
1702
- "User-Agent": "copilot-api"
1640
+ await fs.access(filePath, fs.constants.W_OK);
1641
+ if (!isWindows) {
1642
+ if (((await fs.stat(filePath)).mode & 511) !== 384) await fs.chmod(filePath, 384);
1643
+ }
1644
+ } catch {
1645
+ await fs.writeFile(filePath, "");
1646
+ if (!isWindows) await fs.chmod(filePath, 384);
1647
+ }
1648
+ }
1649
+ //#endregion
1650
+ //#region src/lib/config/config.ts
1651
+ /**
1652
+ * Application configuration: types, YAML loading, and state application.
1653
+ *
1654
+ * All config types live here as the single source of truth.
1655
+ * config.yaml is loaded with mtime-based caching.
1656
+ */
1657
+ /** Compile a raw rewrite rule into a CompiledRewriteRule. Returns null for invalid regex. */
1658
+ function compileRewriteRule(raw) {
1659
+ const method = raw.method ?? "regex";
1660
+ let modelPattern;
1661
+ if (raw.model) try {
1662
+ modelPattern = new RegExp(raw.model, "i");
1663
+ } catch (err) {
1664
+ consola.warn(`[config] Invalid model regex in rewrite rule: "${raw.model}"`, err);
1665
+ return null;
1666
+ }
1667
+ if (method === "line") return {
1668
+ from: raw.from,
1669
+ to: raw.to,
1670
+ method,
1671
+ modelPattern
1672
+ };
1673
+ try {
1674
+ let pattern = raw.from;
1675
+ let flags = "gms";
1676
+ const inlineMatch = pattern.match(/^\(\?([a-z]+)\)/i);
1677
+ if (inlineMatch) {
1678
+ pattern = pattern.slice(inlineMatch[0].length);
1679
+ for (const f of inlineMatch[1]) if (!flags.includes(f)) flags += f;
1680
+ }
1681
+ return {
1682
+ from: new RegExp(pattern, flags),
1683
+ to: raw.to,
1684
+ method,
1685
+ modelPattern
1686
+ };
1687
+ } catch (err) {
1688
+ consola.warn(`[config] Invalid regex in rewrite rule: "${raw.from}"`, err);
1689
+ return null;
1690
+ }
1691
+ }
1692
+ /** Compile an array of raw rewrite rules, skipping invalid ones */
1693
+ function compileRewriteRules(raws) {
1694
+ return raws.map((r) => compileRewriteRule(r)).filter((r) => r !== null);
1695
+ }
1696
+ let cachedConfig = null;
1697
+ let configLastMtimeMs = 0;
1698
+ /** Time-based debounce: skip stat() if checked recently */
1699
+ let lastStatTimeMs = 0;
1700
+ const STAT_DEBOUNCE_MS = 2e3;
1701
+ async function loadRawConfigFile() {
1702
+ try {
1703
+ const content = await fs.readFile(PATHS.CONFIG_YAML, "utf8");
1704
+ const { parse } = await import("yaml");
1705
+ const parsed = parse(content);
1706
+ if (parsed === null || parsed === void 0) return {};
1707
+ if (typeof parsed !== "object" || Array.isArray(parsed)) throw new TypeError("config.yaml must contain a top-level mapping");
1708
+ return parsed;
1709
+ } catch (err) {
1710
+ if (err.code === "ENOENT") return {};
1711
+ throw err;
1712
+ }
1713
+ }
1714
+ async function loadConfig() {
1715
+ try {
1716
+ const now = Date.now();
1717
+ if (cachedConfig && now - lastStatTimeMs < STAT_DEBOUNCE_MS) return cachedConfig;
1718
+ const stat = await fs.stat(PATHS.CONFIG_YAML);
1719
+ lastStatTimeMs = now;
1720
+ if (cachedConfig && stat.mtimeMs === configLastMtimeMs) return cachedConfig;
1721
+ cachedConfig = await loadRawConfigFile();
1722
+ configLastMtimeMs = stat.mtimeMs;
1723
+ return cachedConfig;
1724
+ } catch (err) {
1725
+ if (err.code === "ENOENT") return {};
1726
+ try {
1727
+ configLastMtimeMs = (await fs.stat(PATHS.CONFIG_YAML)).mtimeMs;
1728
+ } catch {}
1729
+ consola.warn("[config] Failed to load config.yaml:", err);
1730
+ return {};
1731
+ }
1732
+ }
1733
+ /** Get the mtime of the currently cached config (0 if not loaded) */
1734
+ function getConfigMtimeMs() {
1735
+ return configLastMtimeMs;
1736
+ }
1737
+ /** Exposed for testing: reset the mtime cache */
1738
+ function resetConfigCache() {
1739
+ cachedConfig = null;
1740
+ configLastMtimeMs = 0;
1741
+ lastStatTimeMs = 0;
1742
+ }
1743
+ let hasApplied = false;
1744
+ let lastAppliedMtimeMs = 0;
1745
+ /**
1746
+ * Load config.yaml and apply all hot-reloadable settings to global state.
1747
+ *
1748
+ * Scalar fields: only overridden when explicitly present in config (deleted keys keep current runtime value).
1749
+ * Collection fields (model_overrides, rewrite_system_reminders array): entire replacement when present.
1750
+ *
1751
+ * Safe to call per-request — loadConfig() is mtime-cached, so unchanged config
1752
+ * only costs one stat() syscall.
1753
+ *
1754
+ * NOT hot-reloaded: rate_limiter (stateful singleton initialized at startup).
1755
+ */
1756
+ async function applyConfigToState() {
1757
+ const config = await loadConfig();
1758
+ if (config.anthropic) {
1759
+ const a = config.anthropic;
1760
+ if (a.strip_server_tools !== void 0) setAnthropicBehavior({ stripServerTools: a.strip_server_tools });
1761
+ if (a.immutable_thinking_messages !== void 0) setAnthropicBehavior({ immutableThinkingMessages: a.immutable_thinking_messages });
1762
+ if (a.dedup_tool_calls !== void 0) setAnthropicBehavior({ dedupToolCalls: a.dedup_tool_calls === true ? "input" : a.dedup_tool_calls });
1763
+ if (a.strip_read_tool_result_tags !== void 0) setAnthropicBehavior({ stripReadToolResultTags: a.strip_read_tool_result_tags });
1764
+ if (a.context_editing !== void 0) setAnthropicBehavior({ contextEditingMode: a.context_editing });
1765
+ if (a.context_editing_trigger !== void 0) setAnthropicBehavior({ contextEditingTrigger: a.context_editing_trigger });
1766
+ if (a.context_editing_keep_tools !== void 0) setAnthropicBehavior({ contextEditingKeepTools: a.context_editing_keep_tools });
1767
+ if (a.context_editing_keep_thinking !== void 0) setAnthropicBehavior({ contextEditingKeepThinking: a.context_editing_keep_thinking });
1768
+ if (a.tool_search !== void 0) setAnthropicBehavior({ toolSearchEnabled: a.tool_search });
1769
+ if (a.auto_cache_control !== void 0) setAnthropicBehavior({ autoCacheControl: a.auto_cache_control });
1770
+ if (Array.isArray(a.non_deferred_tools)) setAnthropicBehavior({ nonDeferredTools: a.non_deferred_tools });
1771
+ if (a.rewrite_system_reminders !== void 0) {
1772
+ if (typeof a.rewrite_system_reminders === "boolean") setAnthropicBehavior({ rewriteSystemReminders: a.rewrite_system_reminders });
1773
+ else if (Array.isArray(a.rewrite_system_reminders)) setAnthropicBehavior({ rewriteSystemReminders: compileRewriteRules(a.rewrite_system_reminders) });
1774
+ }
1775
+ }
1776
+ if (Array.isArray(config.system_prompt_overrides)) setAnthropicBehavior({ systemPromptOverrides: config.system_prompt_overrides.length > 0 ? compileRewriteRules(config.system_prompt_overrides) : [] });
1777
+ if (config.model_overrides) setModelOverrides({
1778
+ ...DEFAULT_MODEL_OVERRIDES,
1779
+ ...config.model_overrides
1780
+ });
1781
+ if (config.compress_tool_results_before_truncate !== void 0) setAnthropicBehavior({ compressToolResultsBeforeTruncate: config.compress_tool_results_before_truncate });
1782
+ if (config.history) {
1783
+ const h = config.history;
1784
+ if (h.limit !== void 0) {
1785
+ setHistoryConfig({ historyLimit: h.limit });
1786
+ setHistoryMaxEntries(h.limit);
1787
+ }
1788
+ if (h.min_entries !== void 0) setHistoryConfig({ historyMinEntries: h.min_entries });
1789
+ }
1790
+ if (config.shutdown) {
1791
+ const s = config.shutdown;
1792
+ if (s.graceful_wait !== void 0) setShutdownConfig({ shutdownGracefulWait: s.graceful_wait });
1793
+ if (s.abort_wait !== void 0) setShutdownConfig({ shutdownAbortWait: s.abort_wait });
1794
+ }
1795
+ if (config.fetch_timeout !== void 0) setTimeoutConfig({ fetchTimeout: config.fetch_timeout });
1796
+ if (config.stream_idle_timeout !== void 0) setTimeoutConfig({ streamIdleTimeout: config.stream_idle_timeout });
1797
+ if (config.stale_request_max_age !== void 0) setTimeoutConfig({ staleRequestMaxAge: config.stale_request_max_age });
1798
+ if (config.model_refresh_interval !== void 0) setTimeoutConfig({ modelRefreshInterval: config.model_refresh_interval });
1799
+ const responsesConfig = config["openai-responses"];
1800
+ if (responsesConfig && responsesConfig.normalize_call_ids !== void 0) setResponsesConfig({ normalizeResponsesCallIds: responsesConfig.normalize_call_ids });
1801
+ if (responsesConfig && responsesConfig.upstream_websocket !== void 0) setResponsesConfig({ upstreamWebSocket: responsesConfig.upstream_websocket });
1802
+ syncModelRefreshLoop();
1803
+ const currentMtime = getConfigMtimeMs();
1804
+ if (hasApplied && currentMtime !== lastAppliedMtimeMs) consola.info("[config] Reloaded config.yaml");
1805
+ hasApplied = true;
1806
+ lastAppliedMtimeMs = currentMtime;
1807
+ return config;
1808
+ }
1809
+ //#endregion
1810
+ //#region src/lib/proxy.ts
1811
+ /**
1812
+ * Proxy configuration: HTTP/HTTPS and SOCKS5/5h proxy support.
1813
+ *
1814
+ * Priority: explicit proxy URL (CLI --proxy or config.yaml) > env vars (--http-proxy-from-env).
1815
+ * On Node.js, proxying works via undici's global dispatcher.
1816
+ * On Bun, HTTP proxies are set via env vars (Bun handles them natively); SOCKS5 is not supported.
1817
+ */
1818
+ /**
1819
+ * Initialize proxy for all outgoing fetch requests.
1820
+ *
1821
+ * On Node.js: sets undici's global dispatcher.
1822
+ * On Bun: sets process.env.HTTP_PROXY/HTTPS_PROXY for HTTP proxies (Bun handles natively).
1823
+ *
1824
+ * Must be called before any network requests.
1825
+ */
1826
+ function initProxy(options) {
1827
+ if (typeof Bun !== "undefined") {
1828
+ initProxyBun(options);
1829
+ return;
1830
+ }
1831
+ initProxyNode(options);
1832
+ }
1833
+ /** Format a proxy URL for display (strip credentials) */
1834
+ function formatProxyDisplay(proxyUrl) {
1835
+ try {
1836
+ const u = new URL(proxyUrl);
1837
+ const auth = u.username ? `${u.username}:***@` : "";
1838
+ return `${u.protocol}//${auth}${u.host}`;
1839
+ } catch {
1840
+ return proxyUrl;
1841
+ }
1842
+ }
1843
+ function initProxyNode(options) {
1844
+ try {
1845
+ if (options.url) {
1846
+ setGlobalDispatcher(createDispatcherForUrl(options.url));
1847
+ consola.debug(`Proxy configured: ${formatProxyDisplay(options.url)}`);
1848
+ return;
1849
+ }
1850
+ if (options.fromEnv) {
1851
+ setGlobalDispatcher(new EnvProxyDispatcher());
1852
+ consola.debug("HTTP proxy configured from environment (per-URL)");
1853
+ }
1854
+ } catch (err) {
1855
+ consola.error("Proxy setup failed:", err);
1856
+ throw err;
1857
+ }
1858
+ }
1859
+ /** Create the appropriate undici dispatcher for a proxy URL scheme */
1860
+ function createDispatcherForUrl(proxyUrl) {
1861
+ const url = new URL(proxyUrl);
1862
+ const protocol = url.protocol.toLowerCase();
1863
+ if (protocol === "http:" || protocol === "https:") return new ProxyAgent(proxyUrl);
1864
+ if (protocol === "socks5:" || protocol === "socks5h:") return createSocksAgent(url);
1865
+ throw new Error(`Unsupported proxy protocol: ${protocol}. Supported: http, https, socks5, socks5h`);
1866
+ }
1867
+ /**
1868
+ * Create an undici Agent that routes connections through a SOCKS5/5h proxy.
1869
+ *
1870
+ * For socks5h:// the proxy performs DNS resolution (hostname passed as-is).
1871
+ * For socks5:// the hostname is also passed to the proxy (proxy resolves).
1872
+ * Both protocols support username/password authentication via URL credentials.
1873
+ */
1874
+ function createSocksAgent(proxyUrl) {
1875
+ const proxy = {
1876
+ host: proxyUrl.hostname,
1877
+ port: Number(proxyUrl.port) || 1080,
1878
+ type: 5
1879
+ };
1880
+ if (proxyUrl.username) {
1881
+ proxy.userId = decodeURIComponent(proxyUrl.username);
1882
+ proxy.password = proxyUrl.password ? decodeURIComponent(proxyUrl.password) : void 0;
1883
+ }
1884
+ return new Agent({ connect(opts, callback) {
1885
+ const destPort = Number(opts.port) || (opts.protocol === "https:" ? 443 : 80);
1886
+ SocksClient.createConnection({
1887
+ proxy,
1888
+ command: "connect",
1889
+ destination: {
1890
+ host: opts.hostname,
1891
+ port: destPort
1703
1892
  }
1893
+ }).then(({ socket }) => {
1894
+ if (opts.protocol === "https:") callback(null, tls.connect({
1895
+ socket,
1896
+ servername: opts.servername ?? opts.hostname
1897
+ }));
1898
+ else callback(null, socket);
1899
+ }).catch((err) => {
1900
+ callback(err instanceof Error ? err : new Error(String(err)), null);
1704
1901
  });
1705
- if (!response.ok) return VSCODE_VERSION_FALLBACK;
1706
- const version = (await response.json()).tag_name;
1707
- if (version && /^\d+\.\d+\.\d+$/.test(version)) return version;
1708
- return VSCODE_VERSION_FALLBACK;
1709
- } catch {
1710
- return VSCODE_VERSION_FALLBACK;
1711
- } finally {
1712
- clearTimeout(timeout);
1902
+ } });
1903
+ }
1904
+ /**
1905
+ * Custom dispatcher that routes requests through proxies based on environment variables.
1906
+ * Uses proxy-from-env to resolve HTTP_PROXY/HTTPS_PROXY/NO_PROXY per-URL.
1907
+ */
1908
+ var EnvProxyDispatcher = class extends Agent {
1909
+ proxies = /* @__PURE__ */ new Map();
1910
+ dispatch(options, handler) {
1911
+ try {
1912
+ const origin = this.getOriginUrl(options.origin);
1913
+ const proxyUrl = this.getProxyUrl(origin);
1914
+ if (!proxyUrl) {
1915
+ consola.debug(`HTTP proxy bypass: ${origin.hostname}`);
1916
+ return super.dispatch(options, handler);
1917
+ }
1918
+ const agent = this.getOrCreateProxyAgent(proxyUrl);
1919
+ consola.debug(`HTTP proxy route: ${origin.hostname} via ${formatProxyDisplay(proxyUrl)}`);
1920
+ return agent.dispatch(options, handler);
1921
+ } catch {
1922
+ return super.dispatch(options, handler);
1923
+ }
1924
+ }
1925
+ getOriginUrl(origin) {
1926
+ return typeof origin === "string" ? new URL(origin) : origin;
1927
+ }
1928
+ getProxyUrl(origin) {
1929
+ const raw = getProxyForUrl(origin.toString());
1930
+ return raw && raw.length > 0 ? raw : void 0;
1931
+ }
1932
+ getOrCreateProxyAgent(proxyUrl) {
1933
+ let agent = this.proxies.get(proxyUrl);
1934
+ if (!agent) {
1935
+ agent = new ProxyAgent(proxyUrl);
1936
+ this.proxies.set(proxyUrl, agent);
1937
+ }
1938
+ return agent;
1939
+ }
1940
+ async close() {
1941
+ await super.close();
1942
+ await Promise.all([...this.proxies.values()].map((p) => p.close()));
1943
+ this.proxies.clear();
1944
+ }
1945
+ destroy(errOrCallback, callback) {
1946
+ for (const agent of this.proxies.values()) if (typeof errOrCallback === "function") agent.destroy(errOrCallback);
1947
+ else if (callback) agent.destroy(errOrCallback ?? null, callback);
1948
+ else agent.destroy(errOrCallback ?? null).catch(() => {});
1949
+ this.proxies.clear();
1950
+ if (typeof errOrCallback === "function") {
1951
+ super.destroy(errOrCallback);
1952
+ return;
1953
+ } else if (callback) {
1954
+ super.destroy(errOrCallback ?? null, callback);
1955
+ return;
1956
+ } else return super.destroy(errOrCallback ?? null);
1713
1957
  }
1958
+ };
1959
+ /**
1960
+ * Initialize proxy for Bun runtime.
1961
+ * Bun handles HTTP_PROXY/HTTPS_PROXY env vars natively.
1962
+ * SOCKS5 proxies are not supported on Bun.
1963
+ */
1964
+ function initProxyBun(options) {
1965
+ if (!options.url) return;
1966
+ const protocol = new URL(options.url).protocol.toLowerCase();
1967
+ if (protocol === "socks5:" || protocol === "socks5h:") throw new Error("SOCKS5 proxy is not supported on Bun runtime. Use Node.js or an HTTP proxy instead.");
1968
+ process.env.HTTP_PROXY = options.url;
1969
+ process.env.HTTPS_PROXY = options.url;
1970
+ consola.debug(`Proxy configured (Bun env): ${formatProxyDisplay(options.url)}`);
1714
1971
  }
1715
1972
  //#endregion
1716
1973
  //#region src/lib/token/copilot-client.ts
@@ -1914,6 +2171,20 @@ var CopilotTokenManager = class {
1914
2171
  }
1915
2172
  };
1916
2173
  //#endregion
2174
+ //#region src/lib/utils.ts
2175
+ const sleep = (ms) => new Promise((resolve) => {
2176
+ setTimeout(resolve, ms);
2177
+ });
2178
+ const isNullish = (value) => value === null || value === void 0;
2179
+ /** Convert bytes to KB with rounding */
2180
+ function bytesToKB(bytes) {
2181
+ return Math.round(bytes / 1024);
2182
+ }
2183
+ /** Generate unique ID (timestamp + random) */
2184
+ function generateId(randomLength = 7) {
2185
+ return Date.now().toString(36) + Math.random().toString(36).slice(2, 2 + randomLength);
2186
+ }
2187
+ //#endregion
1917
2188
  //#region src/lib/token/github-client.ts
1918
2189
  /** GitHub OAuth API client — device code flow and user info */
1919
2190
  const getGitHubUser = async () => {
@@ -2473,49 +2744,6 @@ const checkUsage = defineCommand({
2473
2744
  }
2474
2745
  });
2475
2746
  //#endregion
2476
- //#region src/lib/fetch-utils.ts
2477
- const SENSITIVE_HEADER_NAMES = new Set([
2478
- "authorization",
2479
- "proxy-authorization",
2480
- "x-api-key",
2481
- "api-key"
2482
- ]);
2483
- /**
2484
- * Create an AbortSignal for fetch timeout if configured.
2485
- * Controls the time from request start to receiving response headers.
2486
- * Returns undefined if fetchTimeout is 0 (disabled).
2487
- */
2488
- function createFetchSignal() {
2489
- return state.fetchTimeout > 0 ? AbortSignal.timeout(state.fetchTimeout * 1e3) : void 0;
2490
- }
2491
- /**
2492
- * Populate a HeadersCapture object with request and response headers.
2493
- * Should be called immediately after fetch(), before !response.ok check,
2494
- * so headers are captured even for error responses.
2495
- */
2496
- function captureHttpHeaders(capture, requestHeaders, response) {
2497
- capture.request = sanitizeHeadersForHistory(requestHeaders);
2498
- capture.response = Object.fromEntries(response.headers.entries());
2499
- }
2500
- /** Return a copy of headers safe to persist in history/error artifacts. */
2501
- function sanitizeHeadersForHistory(headers) {
2502
- return Object.fromEntries(Object.entries(headers).map(([name, value]) => [name, SENSITIVE_HEADER_NAMES.has(name.toLowerCase()) ? "***" : value]));
2503
- }
2504
- //#endregion
2505
- //#region src/lib/models/client.ts
2506
- /** Fetch models from Copilot API and cache in global state */
2507
- async function cacheModels() {
2508
- setModels(await getModels());
2509
- }
2510
- const getModels = async () => {
2511
- const response = await fetch(`${copilotBaseUrl(state)}/models`, {
2512
- headers: copilotHeaders(state),
2513
- signal: createFetchSignal()
2514
- });
2515
- if (!response.ok) throw await HTTPError.fromResponse("Failed to get models", response);
2516
- return await response.json();
2517
- };
2518
- //#endregion
2519
2747
  //#region src/debug.ts
2520
2748
  async function getPackageVersion() {
2521
2749
  try {
@@ -3004,6 +3232,284 @@ async function executeWithAdaptiveRateLimit(fn) {
3004
3232
  return rateLimiterInstance.execute(fn);
3005
3233
  }
3006
3234
  //#endregion
3235
+ //#region src/lib/request-telemetry.ts
3236
+ const BUCKET_MS = 300 * 1e3;
3237
+ const WINDOW_MS = 10080 * 60 * 1e3;
3238
+ const PERSIST_INTERVAL_MS = 60 * 1e3;
3239
+ let acceptedSinceStart = 0;
3240
+ let bucketCounts = /* @__PURE__ */ new Map();
3241
+ let modelStatsSinceStart = /* @__PURE__ */ new Map();
3242
+ let modelBucketStats = /* @__PURE__ */ new Map();
3243
+ let persistTimer$1 = null;
3244
+ let telemetryFilePath = PATHS.REQUEST_TELEMETRY;
3245
+ function getBucketStart(timestamp) {
3246
+ return Math.floor(timestamp / BUCKET_MS) * BUCKET_MS;
3247
+ }
3248
+ function createEmptyModelTelemetry() {
3249
+ return {
3250
+ requestCount: 0,
3251
+ successCount: 0,
3252
+ failureCount: 0,
3253
+ totalDurationMs: 0,
3254
+ inputTokens: 0,
3255
+ outputTokens: 0,
3256
+ cacheReadInputTokens: 0,
3257
+ cacheCreationInputTokens: 0,
3258
+ reasoningTokens: 0
3259
+ };
3260
+ }
3261
+ function isValidPersistedModelTelemetry(value) {
3262
+ if (!value || typeof value !== "object") return false;
3263
+ const stats = value;
3264
+ return typeof stats.requestCount === "number" && typeof stats.successCount === "number" && typeof stats.failureCount === "number" && typeof stats.totalDurationMs === "number" && typeof stats.inputTokens === "number" && typeof stats.outputTokens === "number" && typeof stats.cacheReadInputTokens === "number" && typeof stats.cacheCreationInputTokens === "number" && typeof stats.reasoningTokens === "number";
3265
+ }
3266
+ function copyPersistedTelemetry(stats) {
3267
+ return {
3268
+ requestCount: stats.requestCount,
3269
+ successCount: stats.successCount,
3270
+ failureCount: stats.failureCount,
3271
+ totalDurationMs: stats.totalDurationMs,
3272
+ inputTokens: stats.inputTokens,
3273
+ outputTokens: stats.outputTokens,
3274
+ cacheReadInputTokens: stats.cacheReadInputTokens,
3275
+ cacheCreationInputTokens: stats.cacheCreationInputTokens,
3276
+ reasoningTokens: stats.reasoningTokens
3277
+ };
3278
+ }
3279
+ function getOrCreateModelStats(target, model) {
3280
+ const normalizedModel = model.trim() || "unknown";
3281
+ let stats = target.get(normalizedModel);
3282
+ if (!stats) {
3283
+ stats = createEmptyModelTelemetry();
3284
+ target.set(normalizedModel, stats);
3285
+ }
3286
+ return stats;
3287
+ }
3288
+ function getOrCreateModelBucket(timestamp) {
3289
+ let bucket = modelBucketStats.get(timestamp);
3290
+ if (!bucket) {
3291
+ bucket = /* @__PURE__ */ new Map();
3292
+ modelBucketStats.set(timestamp, bucket);
3293
+ }
3294
+ return bucket;
3295
+ }
3296
+ function applySettledTelemetry(stats, opts) {
3297
+ const durationMs = Math.max(0, opts.endedAt - opts.startedAt);
3298
+ const usage = opts.usage;
3299
+ stats.requestCount += 1;
3300
+ if (opts.success) stats.successCount += 1;
3301
+ else stats.failureCount += 1;
3302
+ stats.totalDurationMs += durationMs;
3303
+ stats.inputTokens += usage?.input_tokens ?? 0;
3304
+ stats.outputTokens += usage?.output_tokens ?? 0;
3305
+ stats.cacheReadInputTokens += usage?.cache_read_input_tokens ?? 0;
3306
+ stats.cacheCreationInputTokens += usage?.cache_creation_input_tokens ?? 0;
3307
+ stats.reasoningTokens += usage?.output_tokens_details?.reasoning_tokens ?? 0;
3308
+ }
3309
+ function pruneBuckets(now = Date.now()) {
3310
+ const earliest = getBucketStart(now - WINDOW_MS);
3311
+ for (const key of bucketCounts.keys()) if (key < earliest) bucketCounts.delete(key);
3312
+ for (const key of modelBucketStats.keys()) if (key < earliest) modelBucketStats.delete(key);
3313
+ }
3314
+ function buildFilledBuckets(now = Date.now()) {
3315
+ const latestBucket = getBucketStart(now);
3316
+ const bucketCount = Math.floor(WINDOW_MS / BUCKET_MS);
3317
+ const firstBucket = latestBucket - (bucketCount - 1) * BUCKET_MS;
3318
+ const result = [];
3319
+ for (let index = 0; index < bucketCount; index++) {
3320
+ const timestamp = firstBucket + index * BUCKET_MS;
3321
+ result.push({
3322
+ timestamp,
3323
+ count: bucketCounts.get(timestamp) ?? 0
3324
+ });
3325
+ }
3326
+ return result;
3327
+ }
3328
+ function buildModelSnapshots(source) {
3329
+ return [...source].map(([model, stats]) => toModelSnapshot(model, stats)).sort((left, right) => right.requestCount - left.requestCount || right.usage.totalTokens - left.usage.totalTokens || right.totalDurationMs - left.totalDurationMs || left.model.localeCompare(right.model));
3330
+ }
3331
+ function toUsageTotals(stats) {
3332
+ return {
3333
+ inputTokens: stats.inputTokens,
3334
+ outputTokens: stats.outputTokens,
3335
+ totalTokens: stats.inputTokens + stats.outputTokens,
3336
+ cacheReadInputTokens: stats.cacheReadInputTokens,
3337
+ cacheCreationInputTokens: stats.cacheCreationInputTokens,
3338
+ reasoningTokens: stats.reasoningTokens
3339
+ };
3340
+ }
3341
+ function toModelSnapshot(model, stats) {
3342
+ return {
3343
+ model,
3344
+ requestCount: stats.requestCount,
3345
+ successCount: stats.successCount,
3346
+ failureCount: stats.failureCount,
3347
+ totalDurationMs: stats.totalDurationMs,
3348
+ averageDurationMs: stats.requestCount > 0 ? stats.totalDurationMs / stats.requestCount : 0,
3349
+ usage: toUsageTotals(stats)
3350
+ };
3351
+ }
3352
+ function buildLast7dModelSnapshots(now = Date.now()) {
3353
+ pruneBuckets(now);
3354
+ const aggregate = /* @__PURE__ */ new Map();
3355
+ const series = /* @__PURE__ */ new Map();
3356
+ for (const [timestamp, bucket] of modelBucketStats.entries()) for (const [model, stats] of bucket.entries()) {
3357
+ const target = getOrCreateModelStats(aggregate, model);
3358
+ target.requestCount += stats.requestCount;
3359
+ target.successCount += stats.successCount;
3360
+ target.failureCount += stats.failureCount;
3361
+ target.totalDurationMs += stats.totalDurationMs;
3362
+ target.inputTokens += stats.inputTokens;
3363
+ target.outputTokens += stats.outputTokens;
3364
+ target.cacheReadInputTokens += stats.cacheReadInputTokens;
3365
+ target.cacheCreationInputTokens += stats.cacheCreationInputTokens;
3366
+ target.reasoningTokens += stats.reasoningTokens;
3367
+ let buckets = series.get(model);
3368
+ if (!buckets) {
3369
+ buckets = [];
3370
+ series.set(model, buckets);
3371
+ }
3372
+ buckets.push({
3373
+ timestamp,
3374
+ requestCount: stats.requestCount,
3375
+ successCount: stats.successCount,
3376
+ failureCount: stats.failureCount,
3377
+ totalDurationMs: stats.totalDurationMs,
3378
+ averageDurationMs: stats.requestCount > 0 ? stats.totalDurationMs / stats.requestCount : 0,
3379
+ usage: toUsageTotals(stats)
3380
+ });
3381
+ }
3382
+ return [...aggregate.entries()].map(([model, stats]) => ({
3383
+ ...toModelSnapshot(model, stats),
3384
+ buckets: (series.get(model) ?? []).sort((left, right) => left.timestamp - right.timestamp)
3385
+ })).sort((left, right) => right.requestCount - left.requestCount || right.usage.totalTokens - left.usage.totalTokens || right.totalDurationMs - left.totalDurationMs || left.model.localeCompare(right.model));
3386
+ }
3387
+ function startPeriodicPersistence() {
3388
+ if (persistTimer$1) return;
3389
+ persistTimer$1 = setInterval(() => {
3390
+ persistRequestTelemetry();
3391
+ }, PERSIST_INTERVAL_MS);
3392
+ }
3393
+ function stopPeriodicPersistence() {
3394
+ if (!persistTimer$1) return;
3395
+ clearInterval(persistTimer$1);
3396
+ persistTimer$1 = null;
3397
+ }
3398
+ function loadModelBuckets(raw) {
3399
+ modelBucketStats = new Map(Object.entries(raw).map(([bucketKey, bucketValue]) => {
3400
+ const bucketTimestamp = Number(bucketKey);
3401
+ if (!Number.isFinite(bucketTimestamp) || !bucketValue || typeof bucketValue !== "object") return null;
3402
+ const bucket = /* @__PURE__ */ new Map();
3403
+ for (const [model, stats] of Object.entries(bucketValue)) if (isValidPersistedModelTelemetry(stats)) bucket.set(model, copyPersistedTelemetry(stats));
3404
+ return [bucketTimestamp, bucket];
3405
+ }).filter((entry) => Boolean(entry)));
3406
+ }
3407
+ async function initRequestTelemetry() {
3408
+ stopPeriodicPersistence();
3409
+ acceptedSinceStart = 0;
3410
+ bucketCounts = /* @__PURE__ */ new Map();
3411
+ modelStatsSinceStart = /* @__PURE__ */ new Map();
3412
+ modelBucketStats = /* @__PURE__ */ new Map();
3413
+ try {
3414
+ const raw = await fs.readFile(telemetryFilePath, "utf8");
3415
+ const parsed = JSON.parse(raw);
3416
+ if (parsed.buckets && typeof parsed.buckets === "object") bucketCounts = new Map(Object.entries(parsed.buckets).map(([key, value]) => [Number(key), value]).filter(([key, value]) => Number.isFinite(key) && typeof value === "number" && value >= 0));
3417
+ if (parsed.version === 2 && parsed.modelBuckets && typeof parsed.modelBuckets === "object") loadModelBuckets(parsed.modelBuckets);
3418
+ } catch {}
3419
+ pruneBuckets();
3420
+ startPeriodicPersistence();
3421
+ }
3422
+ function recordAcceptedRequest(timestamp = Date.now()) {
3423
+ acceptedSinceStart += 1;
3424
+ const bucket = getBucketStart(timestamp);
3425
+ bucketCounts.set(bucket, (bucketCounts.get(bucket) ?? 0) + 1);
3426
+ pruneBuckets(timestamp);
3427
+ }
3428
+ function recordSettledRequest(model, opts) {
3429
+ const normalizedModel = model.trim() || "unknown";
3430
+ applySettledTelemetry(getOrCreateModelStats(modelStatsSinceStart, normalizedModel), opts);
3431
+ applySettledTelemetry(getOrCreateModelStats(getOrCreateModelBucket(getBucketStart(opts.startedAt)), normalizedModel), opts);
3432
+ pruneBuckets(opts.startedAt);
3433
+ }
3434
+ function getRequestTelemetrySnapshot(now = Date.now()) {
3435
+ pruneBuckets(now);
3436
+ const buckets = buildFilledBuckets(now);
3437
+ const totalLast7d = buckets.reduce((sum, bucket) => sum + bucket.count, 0);
3438
+ return {
3439
+ acceptedSinceStart,
3440
+ bucketSizeMinutes: BUCKET_MS / (60 * 1e3),
3441
+ windowDays: WINDOW_MS / (1440 * 60 * 1e3),
3442
+ totalLast7d,
3443
+ buckets,
3444
+ modelsSinceStart: buildModelSnapshots(modelStatsSinceStart.entries()),
3445
+ modelsLast7d: buildLast7dModelSnapshots(now)
3446
+ };
3447
+ }
3448
+ async function persistRequestTelemetry() {
3449
+ pruneBuckets();
3450
+ const file = {
3451
+ version: 2,
3452
+ buckets: Object.fromEntries([...bucketCounts.entries()].map(([key, value]) => [String(key), value])),
3453
+ modelBuckets: Object.fromEntries([...modelBucketStats.entries()].map(([bucketTimestamp, bucket]) => [String(bucketTimestamp), Object.fromEntries([...bucket.entries()].map(([model, stats]) => [model, {
3454
+ requestCount: stats.requestCount,
3455
+ successCount: stats.successCount,
3456
+ failureCount: stats.failureCount,
3457
+ totalDurationMs: stats.totalDurationMs,
3458
+ inputTokens: stats.inputTokens,
3459
+ outputTokens: stats.outputTokens,
3460
+ cacheReadInputTokens: stats.cacheReadInputTokens,
3461
+ cacheCreationInputTokens: stats.cacheCreationInputTokens,
3462
+ reasoningTokens: stats.reasoningTokens
3463
+ }]))]))
3464
+ };
3465
+ try {
3466
+ await fs.writeFile(telemetryFilePath, JSON.stringify(file, null, 2), "utf8");
3467
+ } catch {}
3468
+ }
3469
+ async function shutdownRequestTelemetry() {
3470
+ stopPeriodicPersistence();
3471
+ await persistRequestTelemetry();
3472
+ }
3473
+ //#endregion
3474
+ //#region src/lib/context/activity-summary.ts
3475
+ function isActiveRequestState(state) {
3476
+ return state !== "completed" && state !== "failed";
3477
+ }
3478
+ function summarizeRequestContext(context) {
3479
+ const state = context.state ?? "pending";
3480
+ return {
3481
+ id: context.id,
3482
+ endpoint: context.endpoint,
3483
+ ...context.rawPath ? { rawPath: context.rawPath } : {},
3484
+ state,
3485
+ active: isActiveRequestState(state),
3486
+ startTime: context.startTime,
3487
+ durationMs: context.durationMs ?? 0,
3488
+ lastUpdatedAt: Date.now(),
3489
+ model: context.originalRequest?.model,
3490
+ stream: context.originalRequest?.stream,
3491
+ attemptCount: context.attempts?.length ?? 0,
3492
+ currentStrategy: context.currentAttempt?.strategy,
3493
+ queueWaitMs: context.queueWaitMs ?? 0,
3494
+ ...context.transport ? { transport: context.transport } : {}
3495
+ };
3496
+ }
3497
+ function buildHistoryActivityPatch(context) {
3498
+ const snapshot = summarizeRequestContext(context);
3499
+ return {
3500
+ ...snapshot.rawPath ? { rawPath: snapshot.rawPath } : {},
3501
+ startedAt: snapshot.startTime,
3502
+ state: snapshot.state,
3503
+ active: snapshot.active,
3504
+ lastUpdatedAt: snapshot.lastUpdatedAt,
3505
+ queueWaitMs: snapshot.queueWaitMs,
3506
+ attemptCount: snapshot.attemptCount,
3507
+ currentStrategy: snapshot.currentStrategy,
3508
+ durationMs: snapshot.durationMs,
3509
+ ...snapshot.transport ? { transport: snapshot.transport } : {}
3510
+ };
3511
+ }
3512
+ //#endregion
3007
3513
  //#region src/lib/models/resolver.ts
3008
3514
  /**
3009
3515
  * Unified model name resolution and normalization.
@@ -3198,13 +3704,16 @@ function createRequestContext(opts) {
3198
3704
  const startTime = Date.now();
3199
3705
  const onEvent = opts.onEvent;
3200
3706
  let _state = "pending";
3707
+ let _sessionId = opts.sessionId;
3201
3708
  let _originalRequest = null;
3202
3709
  let _response = null;
3203
3710
  let _pipelineInfo = null;
3204
3711
  let _sseEvents = null;
3205
3712
  let _httpHeaders = null;
3206
3713
  let _queueWaitMs = 0;
3714
+ const _warningMessages = [];
3207
3715
  const _attempts = [];
3716
+ let _endTime = null;
3208
3717
  /** Guard: once complete() or fail() is called, subsequent calls are no-ops */
3209
3718
  let settled = false;
3210
3719
  function emit(event) {
@@ -3214,8 +3723,15 @@ function createRequestContext(opts) {
3214
3723
  }
3215
3724
  const ctx = {
3216
3725
  id,
3726
+ get sessionId() {
3727
+ return _sessionId;
3728
+ },
3217
3729
  tuiLogId: opts.tuiLogId,
3730
+ rawPath: opts.rawPath,
3218
3731
  startTime,
3732
+ get endTime() {
3733
+ return _endTime;
3734
+ },
3219
3735
  endpoint: opts.endpoint,
3220
3736
  get state() {
3221
3737
  return _state;
@@ -3238,6 +3754,9 @@ function createRequestContext(opts) {
3238
3754
  get httpHeaders() {
3239
3755
  return _httpHeaders;
3240
3756
  },
3757
+ get transport() {
3758
+ return _attempts.findLast((attempt) => attempt.response)?.transport ?? _attempts.at(-1)?.transport ?? null;
3759
+ },
3241
3760
  get attempts() {
3242
3761
  return _attempts;
3243
3762
  },
@@ -3247,6 +3766,12 @@ function createRequestContext(opts) {
3247
3766
  get queueWaitMs() {
3248
3767
  return _queueWaitMs;
3249
3768
  },
3769
+ get warningMessages() {
3770
+ return _warningMessages;
3771
+ },
3772
+ setSessionId(sessionId) {
3773
+ _sessionId = sessionId;
3774
+ },
3250
3775
  setOriginalRequest(req) {
3251
3776
  _originalRequest = req;
3252
3777
  emit({
@@ -3272,6 +3797,15 @@ function createRequestContext(opts) {
3272
3797
  response: capture.response
3273
3798
  };
3274
3799
  },
3800
+ addWarningMessage(warning) {
3801
+ if (_warningMessages.some((existing) => existing.code === warning.code && existing.message === warning.message)) return;
3802
+ _warningMessages.push(warning);
3803
+ emit({
3804
+ type: "updated",
3805
+ context: ctx,
3806
+ field: "warningMessages"
3807
+ });
3808
+ },
3275
3809
  beginAttempt(attemptOpts) {
3276
3810
  const attempt = {
3277
3811
  index: _attempts.length,
@@ -3279,6 +3813,7 @@ function createRequestContext(opts) {
3279
3813
  wireRequest: null,
3280
3814
  response: null,
3281
3815
  error: null,
3816
+ transport: attemptOpts.transport ?? "http",
3282
3817
  strategy: attemptOpts.strategy,
3283
3818
  truncation: attemptOpts.truncation,
3284
3819
  waitMs: attemptOpts.waitMs,
@@ -3298,11 +3833,36 @@ function createRequestContext(opts) {
3298
3833
  },
3299
3834
  setAttemptEffectiveRequest(req) {
3300
3835
  const attempt = ctx.currentAttempt;
3301
- if (attempt) attempt.effectiveRequest = req;
3836
+ if (attempt) {
3837
+ attempt.effectiveRequest = req;
3838
+ emit({
3839
+ type: "updated",
3840
+ context: ctx,
3841
+ field: "attempts"
3842
+ });
3843
+ }
3302
3844
  },
3303
3845
  setAttemptWireRequest(req) {
3304
3846
  const attempt = ctx.currentAttempt;
3305
- if (attempt) attempt.wireRequest = req;
3847
+ if (attempt) {
3848
+ attempt.wireRequest = req;
3849
+ emit({
3850
+ type: "updated",
3851
+ context: ctx,
3852
+ field: "attempts"
3853
+ });
3854
+ }
3855
+ },
3856
+ setAttemptTransport(transport) {
3857
+ const attempt = ctx.currentAttempt;
3858
+ if (attempt) {
3859
+ attempt.transport = transport;
3860
+ emit({
3861
+ type: "updated",
3862
+ context: ctx,
3863
+ field: "attempts"
3864
+ });
3865
+ }
3306
3866
  },
3307
3867
  setAttemptResponse(response) {
3308
3868
  const attempt = ctx.currentAttempt;
@@ -3320,6 +3880,11 @@ function createRequestContext(opts) {
3320
3880
  },
3321
3881
  addQueueWaitMs(ms) {
3322
3882
  _queueWaitMs += ms;
3883
+ emit({
3884
+ type: "updated",
3885
+ context: ctx,
3886
+ field: "queueWaitMs"
3887
+ });
3323
3888
  },
3324
3889
  transition(newState, meta) {
3325
3890
  const previousState = _state;
@@ -3334,6 +3899,7 @@ function createRequestContext(opts) {
3334
3899
  complete(response) {
3335
3900
  if (settled) return;
3336
3901
  settled = true;
3902
+ _endTime = Date.now();
3337
3903
  if (response.model) response.model = normalizeModelId(response.model);
3338
3904
  _response = response;
3339
3905
  ctx.setAttemptResponse(response);
@@ -3347,6 +3913,7 @@ function createRequestContext(opts) {
3347
3913
  fail(model, error) {
3348
3914
  if (settled) return;
3349
3915
  settled = true;
3916
+ _endTime = Date.now();
3350
3917
  const errorMsg = getErrorMessage(error);
3351
3918
  _response = {
3352
3919
  success: false,
@@ -3372,11 +3939,23 @@ function createRequestContext(opts) {
3372
3939
  },
3373
3940
  toHistoryEntry() {
3374
3941
  const p = _originalRequest?.payload;
3942
+ const endedAt = _endTime ?? Date.now();
3375
3943
  const entry = {
3376
3944
  id,
3377
3945
  endpoint: opts.endpoint,
3378
- timestamp: startTime,
3379
- durationMs: Date.now() - startTime,
3946
+ ..._sessionId ? { sessionId: _sessionId } : {},
3947
+ ...opts.rawPath ? { rawPath: opts.rawPath } : {},
3948
+ startedAt: startTime,
3949
+ endedAt,
3950
+ state: _state,
3951
+ active: false,
3952
+ lastUpdatedAt: endedAt,
3953
+ queueWaitMs: _queueWaitMs,
3954
+ attemptCount: _attempts.length,
3955
+ currentStrategy: _attempts.at(-1)?.strategy,
3956
+ durationMs: endedAt - startTime,
3957
+ ...ctx.transport ? { transport: ctx.transport } : {},
3958
+ ..._warningMessages.length > 0 && { warningMessages: [..._warningMessages] },
3380
3959
  request: {
3381
3960
  model: _originalRequest?.model,
3382
3961
  messages: _originalRequest?.messages,
@@ -3402,7 +3981,7 @@ function createRequestContext(opts) {
3402
3981
  format: ep.format,
3403
3982
  messageCount: ep.messages.length,
3404
3983
  messages: ep.messages,
3405
- system: ep.payload?.system,
3984
+ system: ep.payload.system,
3406
3985
  payload: ep.payload
3407
3986
  };
3408
3987
  }
@@ -3413,7 +3992,7 @@ function createRequestContext(opts) {
3413
3992
  format: wp.format,
3414
3993
  messageCount: wp.messages.length,
3415
3994
  messages: wp.messages,
3416
- system: wp.payload?.system,
3995
+ system: wp.payload.system,
3417
3996
  payload: wp.payload,
3418
3997
  headers: wp.headers
3419
3998
  };
@@ -3422,10 +4001,11 @@ function createRequestContext(opts) {
3422
4001
  index: a.index,
3423
4002
  strategy: a.strategy,
3424
4003
  durationMs: a.durationMs,
4004
+ transport: a.transport,
3425
4005
  error: a.error?.message,
3426
4006
  truncation: a.truncation,
3427
4007
  sanitization: a.sanitization,
3428
- effectiveMessageCount: a.effectiveRequest?.messages?.length
4008
+ effectiveMessageCount: a.effectiveRequest?.messages.length
3429
4009
  }));
3430
4010
  return entry;
3431
4011
  }
@@ -3500,7 +4080,7 @@ function createRequestContextManager() {
3500
4080
  });
3501
4081
  notifyActiveRequestChanged({
3502
4082
  action: "state_changed",
3503
- request: summarizeContext(context),
4083
+ request: summarizeRequestContext(context),
3504
4084
  activeCount: activeContexts.size
3505
4085
  });
3506
4086
  }
@@ -3513,11 +4093,19 @@ function createRequestContextManager() {
3513
4093
  });
3514
4094
  break;
3515
4095
  case "completed":
3516
- if (rawEvent.entry) emit({
3517
- type: "completed",
3518
- context,
3519
- entry: rawEvent.entry
3520
- });
4096
+ if (rawEvent.entry) {
4097
+ recordSettledRequest(rawEvent.entry.response?.model ?? rawEvent.entry.request.model ?? "unknown", {
4098
+ startedAt: rawEvent.entry.startedAt,
4099
+ endedAt: rawEvent.entry.endedAt,
4100
+ success: rawEvent.entry.response?.success ?? true,
4101
+ usage: rawEvent.entry.response?.usage
4102
+ });
4103
+ emit({
4104
+ type: "completed",
4105
+ context,
4106
+ entry: rawEvent.entry
4107
+ });
4108
+ }
3521
4109
  activeContexts.delete(context.id);
3522
4110
  notifyActiveRequestChanged({
3523
4111
  action: "completed",
@@ -3526,11 +4114,19 @@ function createRequestContextManager() {
3526
4114
  });
3527
4115
  break;
3528
4116
  case "failed":
3529
- if (rawEvent.entry) emit({
3530
- type: "failed",
3531
- context,
3532
- entry: rawEvent.entry
3533
- });
4117
+ if (rawEvent.entry) {
4118
+ recordSettledRequest(rawEvent.entry.response?.model ?? rawEvent.entry.request.model ?? "unknown", {
4119
+ startedAt: rawEvent.entry.startedAt,
4120
+ endedAt: rawEvent.entry.endedAt,
4121
+ success: rawEvent.entry.response?.success ?? false,
4122
+ usage: rawEvent.entry.response?.usage
4123
+ });
4124
+ emit({
4125
+ type: "failed",
4126
+ context,
4127
+ entry: rawEvent.entry
4128
+ });
4129
+ }
3534
4130
  activeContexts.delete(context.id);
3535
4131
  notifyActiveRequestChanged({
3536
4132
  action: "failed",
@@ -3541,28 +4137,16 @@ function createRequestContextManager() {
3541
4137
  default: break;
3542
4138
  }
3543
4139
  }
3544
- /** Build a lightweight summary of a context for WS broadcast */
3545
- function summarizeContext(ctx) {
3546
- return {
3547
- id: ctx.id,
3548
- endpoint: ctx.endpoint,
3549
- state: ctx.state,
3550
- startTime: ctx.startTime,
3551
- durationMs: ctx.durationMs,
3552
- model: ctx.originalRequest?.model,
3553
- stream: ctx.originalRequest?.stream,
3554
- attemptCount: ctx.attempts.length,
3555
- currentStrategy: ctx.currentAttempt?.strategy,
3556
- queueWaitMs: ctx.queueWaitMs
3557
- };
3558
- }
3559
4140
  return {
3560
4141
  create(opts) {
3561
4142
  const ctx = createRequestContext({
3562
4143
  endpoint: opts.endpoint,
4144
+ sessionId: opts.sessionId,
3563
4145
  tuiLogId: opts.tuiLogId,
4146
+ rawPath: opts.rawPath,
3564
4147
  onEvent: handleContextEvent
3565
4148
  });
4149
+ recordAcceptedRequest(ctx.startTime);
3566
4150
  activeContexts.set(ctx.id, ctx);
3567
4151
  emit({
3568
4152
  type: "created",
@@ -3570,7 +4154,7 @@ function createRequestContextManager() {
3570
4154
  });
3571
4155
  notifyActiveRequestChanged({
3572
4156
  action: "created",
3573
- request: summarizeContext(ctx),
4157
+ request: summarizeRequestContext(ctx),
3574
4158
  activeCount: activeContexts.size
3575
4159
  });
3576
4160
  return ctx;
@@ -3578,23 +4162,355 @@ function createRequestContextManager() {
3578
4162
  get(id) {
3579
4163
  return activeContexts.get(id);
3580
4164
  },
3581
- getAll() {
3582
- return Array.from(activeContexts.values());
4165
+ getAll() {
4166
+ return Array.from(activeContexts.values());
4167
+ },
4168
+ get activeCount() {
4169
+ return activeContexts.size;
4170
+ },
4171
+ on(_event, listener) {
4172
+ listeners.add(listener);
4173
+ },
4174
+ off(_event, listener) {
4175
+ listeners.delete(listener);
4176
+ },
4177
+ startReaper,
4178
+ stopReaper,
4179
+ _runReaperOnce: runReaperOnce
4180
+ };
4181
+ }
4182
+ //#endregion
4183
+ //#region src/lib/openai/upstream-ws-connection.ts
4184
+ const DEFAULT_IDLE_TIMEOUT_MS = 5 * 6e4;
4185
+ const CLOSE_CODE_GOING_AWAY = 1001;
4186
+ const TERMINAL_EVENTS$1 = new Set([
4187
+ "response.completed",
4188
+ "response.failed",
4189
+ "response.incomplete",
4190
+ "error"
4191
+ ]);
4192
+ function createUpstreamWsConnection(opts) {
4193
+ const createSocket = opts.createSocket ?? ((url, headers) => new WebSocket$1(url, { headers }));
4194
+ const idleTimeoutMs = opts.idleTimeoutMs ?? DEFAULT_IDLE_TIMEOUT_MS;
4195
+ let socket = null;
4196
+ let busy = false;
4197
+ let statefulMarker;
4198
+ let currentQueue = null;
4199
+ let currentAbortCleanup = null;
4200
+ let idleTimer = null;
4201
+ const clearIdleTimer = () => {
4202
+ if (idleTimer) {
4203
+ clearTimeout(idleTimer);
4204
+ idleTimer = null;
4205
+ }
4206
+ };
4207
+ const scheduleIdleClose = () => {
4208
+ clearIdleTimer();
4209
+ if (!socket || busy || socket.readyState !== socket.OPEN || idleTimeoutMs <= 0) return;
4210
+ idleTimer = setTimeout(() => {
4211
+ socket?.close(CLOSE_CODE_GOING_AWAY, "Idle timeout");
4212
+ }, idleTimeoutMs);
4213
+ };
4214
+ const finishRequest = () => {
4215
+ busy = false;
4216
+ currentAbortCleanup?.();
4217
+ currentAbortCleanup = null;
4218
+ currentQueue?.close();
4219
+ currentQueue = null;
4220
+ scheduleIdleClose();
4221
+ };
4222
+ const failRequest = (error) => {
4223
+ busy = false;
4224
+ currentAbortCleanup?.();
4225
+ currentAbortCleanup = null;
4226
+ currentQueue?.fail(error);
4227
+ currentQueue = null;
4228
+ };
4229
+ const handleMessage = (event) => {
4230
+ if (!(event instanceof MessageEvent)) return;
4231
+ if (!currentQueue) return;
4232
+ clearIdleTimer();
4233
+ try {
4234
+ const parsed = parseWebSocketEvent(event.data);
4235
+ currentQueue.push(parsed);
4236
+ if (parsed.type === "response.completed") statefulMarker = parsed.response.id;
4237
+ if (TERMINAL_EVENTS$1.has(parsed.type)) finishRequest();
4238
+ } catch (error) {
4239
+ failRequest(error instanceof Error ? error : new Error(String(error)));
4240
+ }
4241
+ };
4242
+ const handleError = () => {
4243
+ if (!busy || !currentQueue) return;
4244
+ failRequest(/* @__PURE__ */ new Error("Upstream WebSocket error"));
4245
+ };
4246
+ const handleClose = (event) => {
4247
+ clearIdleTimer();
4248
+ socket?.removeEventListener("message", handleMessage);
4249
+ socket?.removeEventListener("error", handleError);
4250
+ socket?.removeEventListener("close", handleClose);
4251
+ socket = null;
4252
+ opts.onClose?.();
4253
+ if (!busy || !currentQueue) return;
4254
+ const closeEvent = event;
4255
+ failRequest(/* @__PURE__ */ new Error(`Upstream WebSocket closed (${closeEvent.code}: ${closeEvent.reason || "unknown"})`));
4256
+ };
4257
+ return {
4258
+ async connect(connectOpts) {
4259
+ const existingSocket = socket;
4260
+ if (existingSocket && existingSocket.readyState === existingSocket.OPEN) return;
4261
+ if (existingSocket && existingSocket.readyState === existingSocket.CONNECTING) throw new Error("Upstream WebSocket is already connecting");
4262
+ const ws = createSocket(copilotWsUrl(state), opts.headers);
4263
+ socket = ws;
4264
+ ws.addEventListener("message", handleMessage);
4265
+ ws.addEventListener("error", handleError);
4266
+ ws.addEventListener("close", handleClose);
4267
+ await new Promise((resolve, reject) => {
4268
+ const signal = connectOpts?.signal;
4269
+ const activeSocket = ws;
4270
+ const cleanup = () => {
4271
+ activeSocket.removeEventListener("open", onOpen);
4272
+ activeSocket.removeEventListener("error", onOpenError);
4273
+ signal?.removeEventListener("abort", onAbort);
4274
+ };
4275
+ const onOpen = () => {
4276
+ cleanup();
4277
+ resolve();
4278
+ };
4279
+ const onOpenError = () => {
4280
+ cleanup();
4281
+ activeSocket.close(CLOSE_CODE_GOING_AWAY, "Handshake failed");
4282
+ reject(/* @__PURE__ */ new Error("Upstream WebSocket handshake failed"));
4283
+ };
4284
+ const onAbort = () => {
4285
+ cleanup();
4286
+ activeSocket.close(CLOSE_CODE_GOING_AWAY, "Aborted");
4287
+ reject(/* @__PURE__ */ new Error("Upstream WebSocket connection aborted"));
4288
+ };
4289
+ activeSocket.addEventListener("open", onOpen, { once: true });
4290
+ activeSocket.addEventListener("error", onOpenError, { once: true });
4291
+ signal?.addEventListener("abort", onAbort, { once: true });
4292
+ if (signal?.aborted) onAbort();
4293
+ });
4294
+ scheduleIdleClose();
4295
+ },
4296
+ sendRequest(payload, requestOpts) {
4297
+ if (!socket || socket.readyState !== socket.OPEN) throw new Error("Upstream WebSocket is not connected");
4298
+ if (busy) throw new Error("Upstream WebSocket connection is busy");
4299
+ clearIdleTimer();
4300
+ busy = true;
4301
+ currentQueue = createAsyncQueue();
4302
+ const abortSignal = requestOpts?.abortSignal;
4303
+ const onAbort = () => {
4304
+ failRequest(/* @__PURE__ */ new Error("Upstream WebSocket request aborted"));
4305
+ };
4306
+ currentAbortCleanup = () => {
4307
+ abortSignal?.removeEventListener("abort", onAbort);
4308
+ };
4309
+ abortSignal?.addEventListener("abort", onAbort, { once: true });
4310
+ try {
4311
+ const { stream: _stream, ...wire } = payload;
4312
+ socket.send(JSON.stringify({
4313
+ type: "response.create",
4314
+ ...wire
4315
+ }));
4316
+ } catch (error) {
4317
+ currentAbortCleanup();
4318
+ currentAbortCleanup = null;
4319
+ failRequest(error instanceof Error ? error : new Error(String(error)));
4320
+ }
4321
+ const queue = currentQueue;
4322
+ return (async function* () {
4323
+ try {
4324
+ yield* queue.iterate();
4325
+ } finally {
4326
+ currentAbortCleanup?.();
4327
+ currentAbortCleanup = null;
4328
+ }
4329
+ })();
4330
+ },
4331
+ get isOpen() {
4332
+ return socket !== null && socket.readyState === socket.OPEN;
4333
+ },
4334
+ get isBusy() {
4335
+ return busy;
4336
+ },
4337
+ get statefulMarker() {
4338
+ return statefulMarker;
4339
+ },
4340
+ get model() {
4341
+ return opts.model;
4342
+ },
4343
+ close() {
4344
+ clearIdleTimer();
4345
+ socket?.close(CLOSE_CODE_GOING_AWAY, "Going away");
4346
+ }
4347
+ };
4348
+ }
4349
+ function createAsyncQueue() {
4350
+ const values = [];
4351
+ const waiters = [];
4352
+ let closed = false;
4353
+ let failure = null;
4354
+ const drain = () => {
4355
+ while (waiters.length > 0) {
4356
+ if (failure) {
4357
+ waiters.shift()?.reject(failure);
4358
+ continue;
4359
+ }
4360
+ if (values.length > 0) {
4361
+ waiters.shift()?.resolve({
4362
+ done: false,
4363
+ value: values.shift()
4364
+ });
4365
+ continue;
4366
+ }
4367
+ if (closed) {
4368
+ waiters.shift()?.resolve({
4369
+ done: true,
4370
+ value: void 0
4371
+ });
4372
+ continue;
4373
+ }
4374
+ break;
4375
+ }
4376
+ };
4377
+ return {
4378
+ push(value) {
4379
+ if (closed || failure) return;
4380
+ values.push(value);
4381
+ drain();
4382
+ },
4383
+ close() {
4384
+ closed = true;
4385
+ drain();
4386
+ },
4387
+ fail(error) {
4388
+ if (failure) return;
4389
+ failure = error;
4390
+ drain();
4391
+ },
4392
+ async *iterate() {
4393
+ for (;;) {
4394
+ if (failure) throw failure;
4395
+ if (values.length > 0) {
4396
+ yield values.shift();
4397
+ continue;
4398
+ }
4399
+ if (closed) return;
4400
+ const next = await new Promise((resolve, reject) => {
4401
+ waiters.push({
4402
+ resolve,
4403
+ reject
4404
+ });
4405
+ drain();
4406
+ });
4407
+ if (next.done) return;
4408
+ yield next.value;
4409
+ }
4410
+ }
4411
+ };
4412
+ }
4413
+ function parseWebSocketEvent(input) {
4414
+ let text = null;
4415
+ if (typeof input === "string") text = input;
4416
+ else if (input instanceof ArrayBuffer) text = Buffer.from(input).toString("utf8");
4417
+ else if (ArrayBuffer.isView(input)) text = Buffer.from(input.buffer, input.byteOffset, input.byteLength).toString("utf8");
4418
+ if (text === null) throw new Error("Unsupported upstream WebSocket frame");
4419
+ const parsed = JSON.parse(text);
4420
+ if (isCapiWebSocketError(parsed)) return {
4421
+ type: "error",
4422
+ code: parsed.error.code,
4423
+ message: parsed.error.message,
4424
+ sequence_number: typeof parsed.sequence_number === "number" ? parsed.sequence_number : 0
4425
+ };
4426
+ return parsed;
4427
+ }
4428
+ function isCapiWebSocketError(input) {
4429
+ if (!input || typeof input !== "object") return false;
4430
+ const record = input;
4431
+ if (record.type !== "error") return false;
4432
+ if (!record.error || typeof record.error !== "object") return false;
4433
+ const error = record.error;
4434
+ return typeof error.code === "string" && typeof error.message === "string";
4435
+ }
4436
+ //#endregion
4437
+ //#region src/lib/openai/upstream-ws.ts
4438
+ const MAX_CONSECUTIVE_WS_FALLBACKS = 3;
4439
+ let connectionFactory = createUpstreamWsConnection;
4440
+ function createUpstreamWsManager() {
4441
+ const connections = /* @__PURE__ */ new Map();
4442
+ let stopped = false;
4443
+ let consecutiveFallbacks = 0;
4444
+ let temporarilyDisabled = false;
4445
+ return {
4446
+ findReusable({ previousResponseId, model }) {
4447
+ if (stopped || temporarilyDisabled) return void 0;
4448
+ for (const connection of connections.values()) {
4449
+ if (!connection.isOpen) continue;
4450
+ if (connection.isBusy) continue;
4451
+ if (connection.statefulMarker !== previousResponseId) continue;
4452
+ if (connection.model !== model) continue;
4453
+ return connection;
4454
+ }
4455
+ },
4456
+ create({ headers, model }) {
4457
+ if (stopped) throw new Error("Upstream WebSocket manager is not accepting new work");
4458
+ const key = randomUUID();
4459
+ const connection = connectionFactory({
4460
+ headers,
4461
+ model,
4462
+ onClose: () => {
4463
+ connections.delete(key);
4464
+ }
4465
+ });
4466
+ connections.set(key, connection);
4467
+ return Promise.resolve(connection);
4468
+ },
4469
+ stopNew() {
4470
+ stopped = true;
4471
+ },
4472
+ closeAll() {
4473
+ for (const connection of connections.values()) connection.close();
4474
+ connections.clear();
4475
+ },
4476
+ resetRuntimeState() {
4477
+ stopped = false;
4478
+ consecutiveFallbacks = 0;
4479
+ temporarilyDisabled = false;
4480
+ this.closeAll();
4481
+ },
4482
+ recordSuccessfulStart() {
4483
+ consecutiveFallbacks = 0;
4484
+ temporarilyDisabled = false;
4485
+ },
4486
+ recordFallback() {
4487
+ consecutiveFallbacks += 1;
4488
+ if (consecutiveFallbacks >= MAX_CONSECUTIVE_WS_FALLBACKS) temporarilyDisabled = true;
3583
4489
  },
3584
4490
  get activeCount() {
3585
- return activeContexts.size;
4491
+ let count = 0;
4492
+ for (const connection of connections.values()) if (connection.isOpen) count += 1;
4493
+ return count;
3586
4494
  },
3587
- on(_event, listener) {
3588
- listeners.add(listener);
4495
+ get consecutiveFallbacks() {
4496
+ return consecutiveFallbacks;
3589
4497
  },
3590
- off(_event, listener) {
3591
- listeners.delete(listener);
4498
+ get temporarilyDisabled() {
4499
+ return temporarilyDisabled;
3592
4500
  },
3593
- startReaper,
3594
- stopReaper,
3595
- _runReaperOnce: runReaperOnce
4501
+ get stopped() {
4502
+ return stopped;
4503
+ }
3596
4504
  };
3597
4505
  }
4506
+ let manager = null;
4507
+ function getUpstreamWsManager() {
4508
+ manager ??= createUpstreamWsManager();
4509
+ return manager;
4510
+ }
4511
+ function peekUpstreamWsManager() {
4512
+ return manager;
4513
+ }
3598
4514
  let serverInstance = null;
3599
4515
  let _isShuttingDown = false;
3600
4516
  let shutdownResolve = null;
@@ -3718,6 +4634,7 @@ async function gracefulShutdown(signal, deps) {
3718
4634
  } catch {}
3719
4635
  stopRefresh();
3720
4636
  stopMemoryPressureMonitor();
4637
+ peekUpstreamWsManager()?.stopNew();
3721
4638
  const wsClients = getWsClientCount();
3722
4639
  if (wsClients > 0) {
3723
4640
  closeWsClients();
@@ -3775,6 +4692,7 @@ async function gracefulShutdown(signal, deps) {
3775
4692
  } catch (error) {
3776
4693
  consola.error("Error force-closing server:", error);
3777
4694
  }
4695
+ peekUpstreamWsManager()?.closeAll();
3778
4696
  }
3779
4697
  finalize(tracker);
3780
4698
  }
@@ -3783,8 +4701,10 @@ function finalize(tracker) {
3783
4701
  setPhase("finalized");
3784
4702
  shutdownDrainAbortController = null;
3785
4703
  tracker.destroy();
3786
- consola.info("Shutdown complete");
3787
- shutdownResolve?.();
4704
+ shutdownRequestTelemetry().finally(() => {
4705
+ consola.info("Shutdown complete");
4706
+ shutdownResolve?.();
4707
+ });
3788
4708
  }
3789
4709
  function handleShutdownSignal(signal, opts) {
3790
4710
  const shutdownFn = opts?.gracefulShutdownFn ?? ((shutdownSignal) => gracefulShutdown(shutdownSignal));
@@ -4579,54 +5499,8 @@ const setupClaudeCode = defineCommand({
4579
5499
  }
4580
5500
  });
4581
5501
  //#endregion
4582
- //#region src/lib/serve.ts
4583
- /** Start the HTTP server and return a ServerInstance. */
4584
- async function startServer(options) {
4585
- if (typeof globalThis.Bun !== "undefined") return startBunServer(options);
4586
- return startNodeServer(options);
4587
- }
4588
- async function startNodeServer(options) {
4589
- const { createAdaptorServer } = await import("./dist-B3gFwWti.mjs");
4590
- const nodeServer = createAdaptorServer({ fetch: options.fetch });
4591
- await new Promise((resolve, reject) => {
4592
- nodeServer.once("error", reject);
4593
- nodeServer.listen({
4594
- port: options.port,
4595
- host: options.hostname,
4596
- exclusive: false
4597
- }, () => {
4598
- nodeServer.removeListener("error", reject);
4599
- resolve();
4600
- });
4601
- });
4602
- return {
4603
- nodeServer,
4604
- close(force) {
4605
- return new Promise((resolve, reject) => {
4606
- if (force && "closeAllConnections" in nodeServer) nodeServer.closeAllConnections();
4607
- nodeServer.close((err) => err ? reject(err) : resolve());
4608
- });
4609
- }
4610
- };
4611
- }
4612
- async function startBunServer(options) {
4613
- const bunServer = Bun.serve({
4614
- fetch(request, server) {
4615
- return options.fetch(request, { server });
4616
- },
4617
- port: options.port,
4618
- hostname: options.hostname,
4619
- idleTimeout: 255,
4620
- ...options.bunWebSocket ? { websocket: options.bunWebSocket } : {}
4621
- });
4622
- return { close(force) {
4623
- bunServer.stop(force ?? false);
4624
- return Promise.resolve();
4625
- } };
4626
- }
4627
- //#endregion
4628
5502
  //#region package.json
4629
- var version = "0.8.1-beta.2";
5503
+ var version = "0.8.2";
4630
5504
  //#endregion
4631
5505
  //#region src/lib/system-prompt/override.ts
4632
5506
  /**
@@ -5188,7 +6062,9 @@ function handleErrorPersistence(event) {
5188
6062
  const MAX_MESSAGES_IN_DUMP = 50;
5189
6063
  async function writeErrorEntry(entry) {
5190
6064
  const meta = {
5191
- timestamp: new Date(entry.timestamp).toISOString(),
6065
+ timestamp: new Date(entry.startedAt).toISOString(),
6066
+ startedAt: new Date(entry.startedAt).toISOString(),
6067
+ endedAt: new Date(entry.endedAt).toISOString(),
5192
6068
  id: entry.id,
5193
6069
  endpoint: entry.endpoint,
5194
6070
  durationMs: entry.durationMs,
@@ -5251,12 +6127,12 @@ function handleHistoryEvent(event) {
5251
6127
  const orig = event.context.originalRequest;
5252
6128
  if (!orig) break;
5253
6129
  const ctx = event.context;
5254
- const sessionId = getCurrentSession(ctx.endpoint);
5255
6130
  insertEntry({
5256
6131
  id: ctx.id,
5257
- sessionId,
5258
- timestamp: ctx.startTime,
6132
+ ...ctx.sessionId ? { sessionId: ctx.sessionId } : {},
6133
+ ...ctx.rawPath ? { rawPath: ctx.rawPath } : {},
5259
6134
  endpoint: ctx.endpoint,
6135
+ ...buildHistoryActivityPatch(ctx),
5260
6136
  request: {
5261
6137
  model: orig.model,
5262
6138
  messages: orig.messages,
@@ -5266,16 +6142,33 @@ function handleHistoryEvent(event) {
5266
6142
  }
5267
6143
  });
5268
6144
  }
6145
+ if (event.field === "attempts" || event.field === "queueWaitMs") updateEntry(event.context.id, buildHistoryActivityPatch(event.context));
6146
+ if (event.field === "warningMessages" && event.context.warningMessages.length > 0) updateEntry(event.context.id, { warningMessages: [...event.context.warningMessages] });
5269
6147
  if (event.field === "pipelineInfo" && event.context.pipelineInfo) updateEntry(event.context.id, { pipelineInfo: event.context.pipelineInfo });
5270
6148
  break;
6149
+ case "state_changed":
6150
+ updateEntry(event.context.id, buildHistoryActivityPatch(event.context));
6151
+ break;
5271
6152
  case "completed":
5272
6153
  case "failed": {
5273
6154
  const entryData = event.entry;
5274
6155
  const response = toHistoryResponse(entryData);
5275
6156
  updateEntry(entryData.id, {
6157
+ rawPath: entryData.rawPath,
6158
+ sessionId: entryData.sessionId,
6159
+ state: entryData.state,
6160
+ active: entryData.active,
6161
+ lastUpdatedAt: entryData.lastUpdatedAt,
6162
+ queueWaitMs: entryData.queueWaitMs,
6163
+ attemptCount: entryData.attemptCount,
6164
+ currentStrategy: entryData.currentStrategy,
5276
6165
  response,
6166
+ startedAt: entryData.startedAt,
6167
+ endedAt: entryData.endedAt,
5277
6168
  durationMs: entryData.durationMs,
6169
+ transport: entryData.transport,
5278
6170
  sseEvents: entryData.sseEvents,
6171
+ ...entryData.warningMessages && { warningMessages: entryData.warningMessages },
5279
6172
  ...entryData.effectiveRequest && { effectiveRequest: {
5280
6173
  model: entryData.effectiveRequest.model,
5281
6174
  format: entryData.effectiveRequest.format,
@@ -5317,6 +6210,10 @@ function handleTuiEvent(event) {
5317
6210
  const attempt = event.context.currentAttempt;
5318
6211
  if (attempt?.strategy) tuiLogger.updateRequest(tuiLogId, { tags: [attempt.strategy] });
5319
6212
  }
6213
+ if (event.field === "attempts") {
6214
+ const transportTag = toTransportTag(event.context.currentAttempt?.transport);
6215
+ if (transportTag) tuiLogger.updateRequest(tuiLogId, { tags: [transportTag] });
6216
+ }
5320
6217
  break;
5321
6218
  }
5322
6219
  case "completed": {
@@ -5370,6 +6267,10 @@ function toHistoryResponse(entryData) {
5370
6267
  headers: entryData.httpHeaders?.response
5371
6268
  };
5372
6269
  }
6270
+ function toTransportTag(transport) {
6271
+ if (transport === "upstream-ws") return "ws";
6272
+ if (transport === "upstream-ws-fallback") return "ws→http";
6273
+ }
5373
6274
  function registerContextConsumers(manager) {
5374
6275
  manager.on("change", handleHistoryEvent);
5375
6276
  manager.on("change", handleTuiEvent);
@@ -5419,6 +6320,63 @@ function isEndpointSupported(model, endpoint) {
5419
6320
  function isResponsesSupported(model) {
5420
6321
  return isEndpointSupported(model, ENDPOINT.RESPONSES) || isEndpointSupported(model, ENDPOINT.WS_RESPONSES);
5421
6322
  }
6323
+ /**
6324
+ * Check if a model explicitly supports upstream WebSocket transport for Responses API.
6325
+ *
6326
+ * Unlike `isEndpointSupported`, legacy models without `supported_endpoints` do not
6327
+ * implicitly gain WebSocket support. We only enable this transport when Copilot has
6328
+ * advertised the dedicated `ws:/responses` capability.
6329
+ */
6330
+ function isWsResponsesSupported(model) {
6331
+ if (!model?.supported_endpoints) return false;
6332
+ return model.supported_endpoints.includes(ENDPOINT.WS_RESPONSES);
6333
+ }
6334
+ //#endregion
6335
+ //#region src/lib/serve.ts
6336
+ /** Start the HTTP server and return a ServerInstance. */
6337
+ async function startServer(options) {
6338
+ if (typeof globalThis.Bun !== "undefined") return startBunServer(options);
6339
+ return startNodeServer(options);
6340
+ }
6341
+ async function startNodeServer(options) {
6342
+ const { createAdaptorServer } = await import("./dist-B3gFwWti.mjs");
6343
+ const nodeServer = createAdaptorServer({ fetch: options.fetch });
6344
+ await new Promise((resolve, reject) => {
6345
+ nodeServer.once("error", reject);
6346
+ nodeServer.listen({
6347
+ port: options.port,
6348
+ host: options.hostname,
6349
+ exclusive: false
6350
+ }, () => {
6351
+ nodeServer.removeListener("error", reject);
6352
+ resolve();
6353
+ });
6354
+ });
6355
+ return {
6356
+ nodeServer,
6357
+ close(force) {
6358
+ return new Promise((resolve, reject) => {
6359
+ if (force && "closeAllConnections" in nodeServer) nodeServer.closeAllConnections();
6360
+ nodeServer.close((err) => err ? reject(err) : resolve());
6361
+ });
6362
+ }
6363
+ };
6364
+ }
6365
+ async function startBunServer(options) {
6366
+ const bunServer = Bun.serve({
6367
+ fetch(request, server) {
6368
+ return options.fetch(request, { server });
6369
+ },
6370
+ port: options.port,
6371
+ hostname: options.hostname,
6372
+ idleTimeout: 255,
6373
+ ...options.bunWebSocket ? { websocket: options.bunWebSocket } : {}
6374
+ });
6375
+ return { close(force) {
6376
+ bunServer.stop(force ?? false);
6377
+ return Promise.resolve();
6378
+ } };
6379
+ }
5422
6380
  //#endregion
5423
6381
  //#region src/lib/openai/responses-conversion.ts
5424
6382
  /**
@@ -6012,6 +6970,47 @@ const createResponses = async (payload, opts) => {
6012
6970
  wire: prepared.wire,
6013
6971
  headers: sanitizeHeadersForHistory(prepared.headers)
6014
6972
  });
6973
+ const { wire } = prepared;
6974
+ let usedFallback = false;
6975
+ if (wire.stream && canUseUpstreamWebSocket(opts?.resolvedModel)) {
6976
+ const manager = getUpstreamWsManager();
6977
+ const connection = (typeof wire.previous_response_id === "string" ? manager.findReusable({
6978
+ previousResponseId: wire.previous_response_id,
6979
+ model: wire.model
6980
+ }) : void 0) ?? await manager.create({
6981
+ headers: prepared.headers,
6982
+ model: wire.model
6983
+ });
6984
+ try {
6985
+ if (!connection.isOpen) await connection.connect({ signal: createFetchSignal() });
6986
+ const iterator = connection.sendRequest(wire)[Symbol.asyncIterator]();
6987
+ const first = await awaitFirstEvent(iterator);
6988
+ manager.recordSuccessfulStart();
6989
+ opts?.onTransport?.("upstream-ws");
6990
+ return (async function* () {
6991
+ yield toSseMessage(first);
6992
+ for (;;) {
6993
+ const result = await iterator.next();
6994
+ if (result.done) return;
6995
+ yield toSseMessage(result.value);
6996
+ }
6997
+ })();
6998
+ } catch (error) {
6999
+ manager.recordFallback();
7000
+ opts?.onTransport?.("upstream-ws-fallback");
7001
+ usedFallback = true;
7002
+ connection.close();
7003
+ consola.warn(`[responses] Upstream WS failed before first event, falling back to HTTP (${manager.consecutiveFallbacks}/3): ${error instanceof Error ? error.message : String(error)}`);
7004
+ }
7005
+ }
7006
+ if (!usedFallback) opts?.onTransport?.("http");
7007
+ return createResponsesViaHttp(prepared, opts?.headersCapture);
7008
+ };
7009
+ function canUseUpstreamWebSocket(model) {
7010
+ const manager = getUpstreamWsManager();
7011
+ return state.upstreamWebSocket && !manager.temporarilyDisabled && !manager.stopped && isWsResponsesSupported(model);
7012
+ }
7013
+ async function createResponsesViaHttp(prepared, headersCapture) {
6015
7014
  const { wire, headers } = prepared;
6016
7015
  const fetchSignal = createFetchSignal();
6017
7016
  const response = await fetch(`${copilotBaseUrl(state)}/responses`, {
@@ -6020,14 +7019,46 @@ const createResponses = async (payload, opts) => {
6020
7019
  body: JSON.stringify(wire),
6021
7020
  signal: fetchSignal
6022
7021
  });
6023
- if (opts?.headersCapture) captureHttpHeaders(opts.headersCapture, headers, response);
7022
+ if (headersCapture) captureHttpHeaders(headersCapture, headers, response);
6024
7023
  if (!response.ok) {
6025
7024
  consola.error("Failed to create responses", response);
6026
7025
  throw await HTTPError.fromResponse("Failed to create responses", response, wire.model);
6027
7026
  }
6028
7027
  if (wire.stream) return events(response);
6029
7028
  return await response.json();
6030
- };
7029
+ }
7030
+ async function awaitFirstEvent(iterator) {
7031
+ const signal = createFetchSignal();
7032
+ if (!signal) {
7033
+ const first = await iterator.next();
7034
+ if (first.done) throw new Error("Upstream WebSocket closed before first event");
7035
+ return first.value;
7036
+ }
7037
+ return await new Promise((resolve, reject) => {
7038
+ const onAbort = () => {
7039
+ signal.removeEventListener("abort", onAbort);
7040
+ reject(/* @__PURE__ */ new Error("Upstream WebSocket timed out before first event"));
7041
+ };
7042
+ signal.addEventListener("abort", onAbort, { once: true });
7043
+ iterator.next().then((result) => {
7044
+ signal.removeEventListener("abort", onAbort);
7045
+ if (result.done) {
7046
+ reject(/* @__PURE__ */ new Error("Upstream WebSocket closed before first event"));
7047
+ return;
7048
+ }
7049
+ resolve(result.value);
7050
+ }).catch((error) => {
7051
+ signal.removeEventListener("abort", onAbort);
7052
+ reject(error instanceof Error ? error : new Error(String(error)));
7053
+ });
7054
+ });
7055
+ }
7056
+ function toSseMessage(event) {
7057
+ return {
7058
+ event: event.type,
7059
+ data: JSON.stringify(event)
7060
+ };
7061
+ }
6031
7062
  //#endregion
6032
7063
  //#region src/lib/request/strategies/network-retry.ts
6033
7064
  /**
@@ -6129,7 +7160,7 @@ function createTokenRefreshStrategy() {
6129
7160
  * centralizes that configuration to avoid duplication.
6130
7161
  */
6131
7162
  /** Create the FormatAdapter for Responses API pipeline execution */
6132
- function createResponsesAdapter(selectedModel, headersCapture, onPrepared) {
7163
+ function createResponsesAdapter(selectedModel, headersCapture, onPrepared, onTransport) {
6133
7164
  return {
6134
7165
  format: "openai-responses",
6135
7166
  sanitize: (p) => ({
@@ -6140,10 +7171,11 @@ function createResponsesAdapter(selectedModel, headersCapture, onPrepared) {
6140
7171
  execute: (p) => executeWithAdaptiveRateLimit(() => createResponses(p, {
6141
7172
  resolvedModel: selectedModel,
6142
7173
  headersCapture,
7174
+ onTransport,
6143
7175
  onPrepared: ({ wire, headers }) => {
6144
7176
  onPrepared?.({
6145
7177
  model: typeof wire.model === "string" ? wire.model : p.model,
6146
- messages: [],
7178
+ messages: extractInputItems(wire.input),
6147
7179
  payload: wire,
6148
7180
  headers,
6149
7181
  format: "openai-responses"
@@ -6160,6 +7192,14 @@ function createResponsesAdapter(selectedModel, headersCapture, onPrepared) {
6160
7192
  function createResponsesStrategies() {
6161
7193
  return [createNetworkRetryStrategy(), createTokenRefreshStrategy()];
6162
7194
  }
7195
+ function extractInputItems(input) {
7196
+ if (typeof input === "string") return [{
7197
+ type: "message",
7198
+ role: "user",
7199
+ content: input
7200
+ }];
7201
+ return input;
7202
+ }
6163
7203
  const CALL_PREFIX = "call_";
6164
7204
  const FC_PREFIX = "fc_";
6165
7205
  /**
@@ -6258,7 +7298,9 @@ async function handleResponseCreate(ws, rawPayload) {
6258
7298
  });
6259
7299
  const reqCtx = getRequestContextManager().create({
6260
7300
  endpoint: "openai-responses",
6261
- tuiLogId
7301
+ sessionId: resolveResponseSessionId(payload.previous_response_id),
7302
+ tuiLogId,
7303
+ rawPath: "/v1/responses"
6262
7304
  });
6263
7305
  reqCtx.setOriginalRequest({
6264
7306
  model: requestedModel,
@@ -6275,6 +7317,8 @@ async function handleResponseCreate(ws, rawPayload) {
6275
7317
  const headersCapture = {};
6276
7318
  const adapter = createResponsesAdapter(selectedModel, headersCapture, (wireRequest) => {
6277
7319
  reqCtx.setAttemptWireRequest(wireRequest);
7320
+ }, (transport) => {
7321
+ reqCtx.setAttemptTransport(transport);
6278
7322
  });
6279
7323
  const strategies = createResponsesStrategies();
6280
7324
  try {
@@ -6312,6 +7356,8 @@ async function handleResponseCreate(ws, rawPayload) {
6312
7356
  consola.debug("[WS] Skipping unparseable SSE event");
6313
7357
  }
6314
7358
  }
7359
+ if (!reqCtx.sessionId && acc.responseId) reqCtx.setSessionId(acc.responseId);
7360
+ registerResponseSession(acc.responseId, reqCtx.sessionId);
6315
7361
  const responseData = buildResponsesResponseData(acc, resolvedModel);
6316
7362
  reqCtx.complete(responseData);
6317
7363
  ws.close(1e3, "done");
@@ -7193,57 +8239,393 @@ function sanitizeOpenAIMessages(payload) {
7193
8239
  const blocksRemoved = originalCount - messages.length;
7194
8240
  if (blocksRemoved > 0) consola.info(`[Sanitizer:OpenAI] Filtered ${blocksRemoved} orphaned tool messages`);
7195
8241
  return {
7196
- payload: {
7197
- ...payload,
7198
- messages: allMessages
7199
- },
7200
- blocksRemoved,
7201
- systemReminderRemovals
8242
+ payload: {
8243
+ ...payload,
8244
+ messages: allMessages
8245
+ },
8246
+ blocksRemoved,
8247
+ systemReminderRemovals
8248
+ };
8249
+ }
8250
+ //#endregion
8251
+ //#region src/lib/openai/stream-accumulator.ts
8252
+ function createOpenAIStreamAccumulator() {
8253
+ return {
8254
+ model: "",
8255
+ inputTokens: 0,
8256
+ outputTokens: 0,
8257
+ cachedTokens: 0,
8258
+ reasoningTokens: 0,
8259
+ finishReason: "",
8260
+ rawContent: "",
8261
+ toolCalls: [],
8262
+ toolCallMap: /* @__PURE__ */ new Map()
8263
+ };
8264
+ }
8265
+ /** Accumulate a single parsed OpenAI chunk into the accumulator */
8266
+ function accumulateOpenAIStreamEvent(parsed, acc) {
8267
+ if (parsed.model && !acc.model) acc.model = parsed.model;
8268
+ if (parsed.usage) {
8269
+ acc.inputTokens = parsed.usage.prompt_tokens;
8270
+ acc.outputTokens = parsed.usage.completion_tokens;
8271
+ if (parsed.usage.prompt_tokens_details?.cached_tokens !== void 0) acc.cachedTokens = parsed.usage.prompt_tokens_details.cached_tokens;
8272
+ if (parsed.usage.completion_tokens_details?.reasoning_tokens !== void 0) acc.reasoningTokens = parsed.usage.completion_tokens_details.reasoning_tokens;
8273
+ }
8274
+ const choice = parsed.choices[0];
8275
+ if (choice) {
8276
+ if (choice.delta.content) acc.rawContent += choice.delta.content;
8277
+ if (choice.delta.tool_calls) for (const tc of choice.delta.tool_calls) {
8278
+ const idx = tc.index;
8279
+ if (!acc.toolCallMap.has(idx)) acc.toolCallMap.set(idx, {
8280
+ id: tc.id ?? "",
8281
+ name: tc.function?.name ?? "",
8282
+ argumentParts: []
8283
+ });
8284
+ const item = acc.toolCallMap.get(idx);
8285
+ if (item) {
8286
+ if (tc.id) item.id = tc.id;
8287
+ if (tc.function?.name) item.name = tc.function.name;
8288
+ if (tc.function?.arguments) item.argumentParts.push(tc.function.arguments);
8289
+ }
8290
+ }
8291
+ if (choice.finish_reason) acc.finishReason = choice.finish_reason;
8292
+ }
8293
+ }
8294
+ //#endregion
8295
+ //#region src/lib/openai/translate/cc-to-responses.ts
8296
+ const DROPPED_PARAMS = [
8297
+ "stop",
8298
+ "n",
8299
+ "frequency_penalty",
8300
+ "presence_penalty",
8301
+ "logit_bias",
8302
+ "logprobs",
8303
+ "seed"
8304
+ ];
8305
+ function splitInstructionsAndConversation(messages) {
8306
+ const systemTexts = [];
8307
+ const conversationMessages = [];
8308
+ for (const message of messages) {
8309
+ if (message.role === "system" || message.role === "developer") {
8310
+ const text = extractTextContent(message.content);
8311
+ if (text) systemTexts.push(text);
8312
+ continue;
8313
+ }
8314
+ conversationMessages.push(message);
8315
+ }
8316
+ return {
8317
+ instructions: systemTexts.length > 0 ? systemTexts.join("\n\n") : void 0,
8318
+ conversationMessages
8319
+ };
8320
+ }
8321
+ function translateChatCompletionsToResponses(payload) {
8322
+ const droppedParams = DROPPED_PARAMS.filter((key) => payload[key] !== void 0 && payload[key] !== null);
8323
+ const { instructions, conversationMessages } = splitInstructionsAndConversation(payload.messages);
8324
+ return {
8325
+ payload: {
8326
+ model: payload.model,
8327
+ input: translateMessages(conversationMessages),
8328
+ ...instructions !== void 0 && { instructions },
8329
+ ...payload.temperature !== void 0 && payload.temperature !== null && { temperature: payload.temperature },
8330
+ ...payload.top_p !== void 0 && payload.top_p !== null && { top_p: payload.top_p },
8331
+ ...payload.max_tokens !== void 0 && payload.max_tokens !== null && { max_output_tokens: payload.max_tokens },
8332
+ ...payload.stream !== void 0 && payload.stream !== null && { stream: payload.stream },
8333
+ ...payload.parallel_tool_calls !== void 0 && payload.parallel_tool_calls !== null && { parallel_tool_calls: payload.parallel_tool_calls },
8334
+ ...payload.user !== void 0 && { user: payload.user },
8335
+ ...payload.service_tier !== void 0 && { service_tier: payload.service_tier },
8336
+ ...payload.top_logprobs !== void 0 && payload.top_logprobs !== null && { top_logprobs: payload.top_logprobs },
8337
+ ...payload.tools && { tools: translateTools(payload.tools) },
8338
+ ...payload.tool_choice && { tool_choice: translateToolChoice(payload.tool_choice) },
8339
+ ...payload.response_format && { text: { format: translateResponseFormat(payload.response_format) } },
8340
+ ...payload.stream_options?.include_usage && { include: ["usage"] }
8341
+ },
8342
+ droppedParams
8343
+ };
8344
+ }
8345
+ function translateMessages(messages) {
8346
+ const items = [];
8347
+ for (const message of messages) switch (message.role) {
8348
+ case "user":
8349
+ items.push(convertUserMessage(message));
8350
+ break;
8351
+ case "assistant":
8352
+ items.push(...convertAssistantMessage(message));
8353
+ break;
8354
+ case "tool":
8355
+ items.push(convertToolMessage(message));
8356
+ break;
8357
+ default: break;
8358
+ }
8359
+ return items;
8360
+ }
8361
+ function convertUserMessage(message) {
8362
+ if (typeof message.content === "string") return {
8363
+ type: "message",
8364
+ role: "user",
8365
+ content: [{
8366
+ type: "input_text",
8367
+ text: message.content
8368
+ }]
8369
+ };
8370
+ if (!Array.isArray(message.content)) return {
8371
+ type: "message",
8372
+ role: "user",
8373
+ content: [{
8374
+ type: "input_text",
8375
+ text: ""
8376
+ }]
8377
+ };
8378
+ return {
8379
+ type: "message",
8380
+ role: "user",
8381
+ content: message.content.map((part) => convertUserContentPart(part))
8382
+ };
8383
+ }
8384
+ function convertUserContentPart(part) {
8385
+ if (part.type === "text") return {
8386
+ type: "input_text",
8387
+ text: part.text
8388
+ };
8389
+ return {
8390
+ type: "input_image",
8391
+ image_url: part.image_url.url,
8392
+ detail: part.image_url.detail
8393
+ };
8394
+ }
8395
+ function convertAssistantMessage(message) {
8396
+ const items = [];
8397
+ const text = extractTextContent(message.content);
8398
+ if (text) items.push({
8399
+ type: "message",
8400
+ role: "assistant",
8401
+ content: [{
8402
+ type: "output_text",
8403
+ text
8404
+ }]
8405
+ });
8406
+ for (const toolCall of message.tool_calls ?? []) items.push({
8407
+ type: "function_call",
8408
+ id: toolCall.id,
8409
+ call_id: toolCall.id,
8410
+ name: toolCall.function.name,
8411
+ arguments: toolCall.function.arguments
8412
+ });
8413
+ return items;
8414
+ }
8415
+ function convertToolMessage(message) {
8416
+ let output = "";
8417
+ if (typeof message.content === "string") output = message.content;
8418
+ else if (Array.isArray(message.content)) {
8419
+ const textParts = message.content.filter((part) => part.type === "text").map((part) => part.text);
8420
+ output = textParts.length > 0 ? textParts.join("") : JSON.stringify(message.content);
8421
+ }
8422
+ return {
8423
+ type: "function_call_output",
8424
+ call_id: message.tool_call_id ?? "",
8425
+ output
8426
+ };
8427
+ }
8428
+ function extractTextContent(content) {
8429
+ if (typeof content === "string") return content;
8430
+ if (!Array.isArray(content)) return "";
8431
+ return content.filter((part) => part.type === "text").map((part) => part.text).join("");
8432
+ }
8433
+ function translateTools(tools) {
8434
+ return tools.map((tool) => ({
8435
+ type: "function",
8436
+ name: tool.function.name,
8437
+ description: tool.function.description,
8438
+ parameters: tool.function.parameters,
8439
+ strict: tool.function.strict
8440
+ }));
8441
+ }
8442
+ function translateToolChoice(choice) {
8443
+ if (typeof choice === "string") return choice;
8444
+ return {
8445
+ type: "function",
8446
+ name: choice.function.name
8447
+ };
8448
+ }
8449
+ function translateResponseFormat(format) {
8450
+ if (format.type === "json_schema") return {
8451
+ type: "json_schema",
8452
+ name: format.json_schema.name,
8453
+ description: format.json_schema.description,
8454
+ schema: format.json_schema.schema,
8455
+ strict: format.json_schema.strict
8456
+ };
8457
+ return { type: format.type };
8458
+ }
8459
+ //#endregion
8460
+ //#region src/lib/openai/translate/responses-to-cc.ts
8461
+ function translateResponsesResponseToCC(response) {
8462
+ if (response.status === "failed") throw new HTTPError(response.error?.message ?? "Upstream response failed", 500, JSON.stringify(response.error ?? { status: response.status }), response.model);
8463
+ return {
8464
+ id: response.id,
8465
+ object: "chat.completion",
8466
+ created: response.created_at,
8467
+ model: response.model,
8468
+ choices: [{
8469
+ index: 0,
8470
+ message: extractMessageFromOutput(response.output),
8471
+ finish_reason: mapFinishReason(response.status, response.output, response.incomplete_details),
8472
+ logprobs: null
8473
+ }],
8474
+ ...response.usage && { usage: mapUsage(response.usage) },
8475
+ ...response.service_tier !== void 0 && { service_tier: response.service_tier }
8476
+ };
8477
+ }
8478
+ function extractMessageFromOutput(output) {
8479
+ const textParts = [];
8480
+ const toolCalls = [];
8481
+ for (const item of output) {
8482
+ if (item.type === "message") for (const part of item.content) {
8483
+ if (part.type === "output_text") textParts.push(part.text);
8484
+ if (part.type === "refusal") textParts.push(part.refusal);
8485
+ }
8486
+ if (item.type === "function_call") toolCalls.push({
8487
+ id: item.call_id,
8488
+ type: "function",
8489
+ function: {
8490
+ name: item.name,
8491
+ arguments: item.arguments
8492
+ }
8493
+ });
8494
+ }
8495
+ return {
8496
+ role: "assistant",
8497
+ content: textParts.join("") || null,
8498
+ ...toolCalls.length > 0 && { tool_calls: toolCalls }
7202
8499
  };
7203
8500
  }
7204
- //#endregion
7205
- //#region src/lib/openai/stream-accumulator.ts
7206
- function createOpenAIStreamAccumulator() {
8501
+ function mapFinishReason(status, output, incompleteDetails) {
8502
+ if (output.some((item) => item.type === "function_call")) return "tool_calls";
8503
+ switch (status) {
8504
+ case "completed": return "stop";
8505
+ case "incomplete": return mapIncompleteFinishReason(incompleteDetails);
8506
+ default: return "stop";
8507
+ }
8508
+ }
8509
+ function mapIncompleteFinishReason(incompleteDetails) {
8510
+ if (incompleteDetails?.reason === "content_filter") return "content_filter";
8511
+ return "length";
8512
+ }
8513
+ function mapUsage(usage) {
7207
8514
  return {
7208
- model: "",
7209
- inputTokens: 0,
7210
- outputTokens: 0,
7211
- cachedTokens: 0,
7212
- reasoningTokens: 0,
7213
- finishReason: "",
7214
- rawContent: "",
7215
- toolCalls: [],
7216
- toolCallMap: /* @__PURE__ */ new Map()
8515
+ prompt_tokens: usage.input_tokens,
8516
+ completion_tokens: usage.output_tokens,
8517
+ total_tokens: usage.total_tokens,
8518
+ ...usage.input_tokens_details?.cached_tokens !== void 0 && { prompt_tokens_details: { cached_tokens: usage.input_tokens_details.cached_tokens } },
8519
+ ...usage.output_tokens_details?.reasoning_tokens !== void 0 && { completion_tokens_details: { reasoning_tokens: usage.output_tokens_details.reasoning_tokens } }
7217
8520
  };
7218
8521
  }
7219
- /** Accumulate a single parsed OpenAI chunk into the accumulator */
7220
- function accumulateOpenAIStreamEvent(parsed, acc) {
7221
- if (parsed.model && !acc.model) acc.model = parsed.model;
7222
- if (parsed.usage) {
7223
- acc.inputTokens = parsed.usage.prompt_tokens;
7224
- acc.outputTokens = parsed.usage.completion_tokens;
7225
- if (parsed.usage.prompt_tokens_details?.cached_tokens !== void 0) acc.cachedTokens = parsed.usage.prompt_tokens_details.cached_tokens;
7226
- if (parsed.usage.completion_tokens_details?.reasoning_tokens !== void 0) acc.reasoningTokens = parsed.usage.completion_tokens_details.reasoning_tokens;
7227
- }
7228
- const choice = parsed.choices[0];
7229
- if (choice) {
7230
- if (choice.delta.content) acc.rawContent += choice.delta.content;
7231
- if (choice.delta.tool_calls) for (const tc of choice.delta.tool_calls) {
7232
- const idx = tc.index;
7233
- if (!acc.toolCallMap.has(idx)) acc.toolCallMap.set(idx, {
7234
- id: tc.id ?? "",
7235
- name: tc.function?.name ?? "",
7236
- argumentParts: []
7237
- });
7238
- const item = acc.toolCallMap.get(idx);
7239
- if (item) {
7240
- if (tc.id) item.id = tc.id;
7241
- if (tc.function?.name) item.name = tc.function.name;
7242
- if (tc.function?.arguments) item.argumentParts.push(tc.function.arguments);
8522
+ //#endregion
8523
+ //#region src/lib/openai/translate/responses-to-cc-stream.ts
8524
+ function createStreamTranslator(opts) {
8525
+ const state = {
8526
+ sentFirstChunk: false,
8527
+ responseId: "",
8528
+ model: "",
8529
+ toolCallIndexMap: /* @__PURE__ */ new Map(),
8530
+ nextToolCallIndex: 0,
8531
+ toolCallIds: /* @__PURE__ */ new Map(),
8532
+ includeUsage: opts.includeUsage
8533
+ };
8534
+ function translate(event) {
8535
+ switch (event.type) {
8536
+ case "response.created":
8537
+ state.responseId = event.response.id;
8538
+ state.model = event.response.model;
8539
+ state.sentFirstChunk = true;
8540
+ return [buildChunk(state, { role: "assistant" })];
8541
+ case "response.output_text.delta": return [buildChunk(state, { content: event.delta })];
8542
+ case "response.refusal.delta": return [buildChunk(state, { content: event.delta })];
8543
+ case "response.output_item.added": {
8544
+ if (event.item.type !== "function_call") return [];
8545
+ const toolCallIndex = state.nextToolCallIndex++;
8546
+ const callId = event.item.call_id || event.item.id;
8547
+ state.toolCallIndexMap.set(event.output_index, toolCallIndex);
8548
+ state.toolCallIds.set(event.output_index, callId);
8549
+ return [buildChunk(state, { tool_calls: [{
8550
+ index: toolCallIndex,
8551
+ id: callId,
8552
+ type: "function",
8553
+ function: { name: event.item.name }
8554
+ }] })];
7243
8555
  }
8556
+ case "response.function_call_arguments.delta": {
8557
+ const toolCallIndex = state.toolCallIndexMap.get(event.output_index);
8558
+ if (toolCallIndex === void 0) return [];
8559
+ return [buildChunk(state, { tool_calls: [{
8560
+ index: toolCallIndex,
8561
+ function: { arguments: event.delta }
8562
+ }] })];
8563
+ }
8564
+ case "response.completed": {
8565
+ syncStateFromResponse(state, event.response);
8566
+ const chunks = [buildChunk(state, {}, state.nextToolCallIndex > 0 ? "tool_calls" : "stop")];
8567
+ if (state.includeUsage && event.response.usage) chunks.push(buildUsageChunk(state, event.response));
8568
+ return chunks;
8569
+ }
8570
+ case "response.incomplete":
8571
+ syncStateFromResponse(state, event.response);
8572
+ return [buildChunk(state, {}, mapIncompleteFinishReason(event.response.incomplete_details))];
8573
+ case "response.failed": throw new Error(event.response.error?.message ?? "Upstream response failed");
8574
+ case "error": throw new Error(event.message ?? "Upstream error");
8575
+ default: return [];
7244
8576
  }
7245
- if (choice.finish_reason) acc.finishReason = choice.finish_reason;
7246
8577
  }
8578
+ return {
8579
+ translate,
8580
+ getState: () => state
8581
+ };
8582
+ }
8583
+ async function* translateResponsesStream(upstream, translator) {
8584
+ for await (const rawEvent of upstream) {
8585
+ if (!rawEvent.data || rawEvent.data === "[DONE]") continue;
8586
+ const event = JSON.parse(rawEvent.data);
8587
+ const chunks = translator.translate(event);
8588
+ for (const chunk of chunks) yield {
8589
+ data: JSON.stringify(chunk),
8590
+ event: "message"
8591
+ };
8592
+ }
8593
+ yield { data: "[DONE]" };
8594
+ }
8595
+ function syncStateFromResponse(state, response) {
8596
+ if (!state.responseId) state.responseId = response.id;
8597
+ if (!state.model) state.model = response.model;
8598
+ }
8599
+ function buildChunk(state, delta, finishReason = null) {
8600
+ return {
8601
+ id: state.responseId,
8602
+ object: "chat.completion.chunk",
8603
+ created: Math.floor(Date.now() / 1e3),
8604
+ model: state.model,
8605
+ choices: [{
8606
+ index: 0,
8607
+ delta,
8608
+ finish_reason: finishReason,
8609
+ logprobs: null
8610
+ }]
8611
+ };
8612
+ }
8613
+ function buildUsageChunk(state, response) {
8614
+ const usage = response.usage;
8615
+ return {
8616
+ id: state.responseId,
8617
+ object: "chat.completion.chunk",
8618
+ created: Math.floor(Date.now() / 1e3),
8619
+ model: state.model,
8620
+ choices: [],
8621
+ ...usage && { usage: {
8622
+ prompt_tokens: usage.input_tokens,
8623
+ completion_tokens: usage.output_tokens,
8624
+ total_tokens: usage.total_tokens,
8625
+ ...usage.input_tokens_details?.cached_tokens !== void 0 && { prompt_tokens_details: { cached_tokens: usage.input_tokens_details.cached_tokens } },
8626
+ ...usage.output_tokens_details?.reasoning_tokens !== void 0 && { completion_tokens_details: { reasoning_tokens: usage.output_tokens_details.reasoning_tokens } }
8627
+ } }
8628
+ };
7247
8629
  }
7248
8630
  //#endregion
7249
8631
  //#region src/lib/request/payload.ts
@@ -7416,6 +8798,7 @@ function createTruncationMarker$1(result) {
7416
8798
  }
7417
8799
  //#endregion
7418
8800
  //#region src/routes/chat-completions/handler.ts
8801
+ const DROPPED_CC_PARAMS_WARNING_CODE = "cc_to_responses_dropped_params";
7419
8802
  async function handleChatCompletion(c) {
7420
8803
  const originalPayload = await c.req.json();
7421
8804
  const clientModel = originalPayload.model;
@@ -7425,15 +8808,13 @@ async function handleChatCompletion(c) {
7425
8808
  originalPayload.model = resolvedModel;
7426
8809
  }
7427
8810
  const selectedModel = state.modelIndex.get(originalPayload.model);
7428
- if (!isEndpointSupported(selectedModel, ENDPOINT.CHAT_COMPLETIONS)) {
7429
- const msg = `Model "${originalPayload.model}" does not support the ${ENDPOINT.CHAT_COMPLETIONS} endpoint`;
7430
- throw new HTTPError(msg, 400, msg);
7431
- }
7432
8811
  originalPayload.messages = await processOpenAIMessages(originalPayload.messages, originalPayload.model);
7433
8812
  const tuiLogId = c.get("tuiLogId");
7434
8813
  const reqCtx = getRequestContextManager().create({
7435
8814
  endpoint: "openai-chat-completions",
7436
- tuiLogId
8815
+ sessionId: getSessionIdFromHeaders(c.req.raw.headers),
8816
+ tuiLogId,
8817
+ rawPath: c.req.path
7437
8818
  });
7438
8819
  reqCtx.setOriginalRequest({
7439
8820
  model: clientModel,
@@ -7455,13 +8836,25 @@ async function handleChatCompletion(c) {
7455
8836
  max_tokens: selectedModel?.capabilities?.limits?.max_output_tokens
7456
8837
  } : sanitizedPayload;
7457
8838
  if (isNullish(originalPayload.max_tokens)) consola.debug("Set max_tokens to:", JSON.stringify(finalPayload.max_tokens));
7458
- return executeRequest({
8839
+ if (isEndpointSupported(selectedModel, ENDPOINT.CHAT_COMPLETIONS)) return executeRequest({
7459
8840
  c,
7460
8841
  payload: finalPayload,
7461
8842
  originalPayload,
7462
8843
  selectedModel,
7463
8844
  reqCtx
7464
8845
  });
8846
+ if (isResponsesSupported(selectedModel)) {
8847
+ if (tuiLogId) tuiLogger.updateRequest(tuiLogId, { tags: ["via-responses"] });
8848
+ return executeRequestViaResponses({
8849
+ c,
8850
+ payload: finalPayload,
8851
+ originalPayload,
8852
+ selectedModel,
8853
+ reqCtx
8854
+ });
8855
+ }
8856
+ const msg = `Model "${originalPayload.model}" does not support the ${ENDPOINT.CHAT_COMPLETIONS} endpoint`;
8857
+ throw new HTTPError(msg, 400, msg);
7465
8858
  }
7466
8859
  /**
7467
8860
  * Execute the API call with reactive retry pipeline.
@@ -7470,34 +8863,102 @@ async function handleChatCompletion(c) {
7470
8863
  async function executeRequest(opts) {
7471
8864
  const { c, payload, originalPayload, selectedModel, reqCtx } = opts;
7472
8865
  const headersCapture = {};
7473
- const adapter = {
7474
- format: "openai-chat-completions",
7475
- sanitize: (p) => sanitizeOpenAIMessages(p),
7476
- execute: (p) => executeWithAdaptiveRateLimit(() => createChatCompletions(p, {
7477
- resolvedModel: selectedModel,
7478
- headersCapture,
7479
- onPrepared: ({ wire, headers }) => {
7480
- reqCtx.setAttemptWireRequest({
7481
- model: typeof wire.model === "string" ? wire.model : payload.model,
7482
- messages: Array.isArray(wire.messages) ? wire.messages : [],
7483
- payload: wire,
7484
- headers,
7485
- format: "openai-chat-completions"
7486
- });
7487
- }
7488
- })),
7489
- logPayloadSize: (p) => logPayloadSizeInfo(p, selectedModel)
7490
- };
7491
- const strategies = [
8866
+ return executeRequestWithAdapter({
8867
+ c,
8868
+ payload,
8869
+ originalPayload,
8870
+ selectedModel,
8871
+ reqCtx,
8872
+ adapter: {
8873
+ format: "openai-chat-completions",
8874
+ sanitize: (p) => sanitizeOpenAIMessages(p),
8875
+ execute: (p) => executeWithAdaptiveRateLimit(() => createChatCompletions(p, {
8876
+ resolvedModel: selectedModel,
8877
+ headersCapture,
8878
+ onPrepared: ({ wire, headers }) => {
8879
+ reqCtx.setAttemptWireRequest({
8880
+ model: typeof wire.model === "string" ? wire.model : payload.model,
8881
+ messages: Array.isArray(wire.messages) ? wire.messages : [],
8882
+ payload: wire,
8883
+ headers,
8884
+ format: "openai-chat-completions"
8885
+ });
8886
+ }
8887
+ })),
8888
+ logPayloadSize: (p) => logPayloadSizeInfo(p, selectedModel)
8889
+ },
8890
+ strategies: createChatCompletionsStrategies("Completions"),
8891
+ headersCapture
8892
+ });
8893
+ }
8894
+ async function executeRequestViaResponses(opts) {
8895
+ const { c, payload, originalPayload, selectedModel, reqCtx } = opts;
8896
+ const headersCapture = {};
8897
+ return executeRequestWithAdapter({
8898
+ c,
8899
+ payload,
8900
+ originalPayload,
8901
+ selectedModel,
8902
+ reqCtx,
8903
+ adapter: {
8904
+ format: "openai-chat-completions",
8905
+ sanitize: (p) => sanitizeOpenAIMessages(p),
8906
+ execute: async (ccPayload) => {
8907
+ const { payload: responsesPayload, droppedParams } = translateChatCompletionsToResponses(ccPayload);
8908
+ if (droppedParams.length > 0) recordDroppedCcParamsWarning(reqCtx, ccPayload.model, droppedParams);
8909
+ const finalPayload = state.normalizeResponsesCallIds ? normalizeCallIds(responsesPayload) : responsesPayload;
8910
+ const result = await executeWithAdaptiveRateLimit(() => createResponses(finalPayload, {
8911
+ resolvedModel: selectedModel,
8912
+ headersCapture,
8913
+ onPrepared: ({ wire, headers }) => {
8914
+ reqCtx.setAttemptWireRequest({
8915
+ model: typeof wire.model === "string" ? wire.model : ccPayload.model,
8916
+ messages: extractInputItems(wire.input),
8917
+ payload: wire,
8918
+ headers,
8919
+ format: "openai-responses"
8920
+ });
8921
+ }
8922
+ }));
8923
+ if (!ccPayload.stream) return {
8924
+ result: translateResponsesResponseToCC(result.result),
8925
+ queueWaitMs: result.queueWaitMs
8926
+ };
8927
+ return {
8928
+ result: translateResponsesStream(result.result, createStreamTranslator({ includeUsage: ccPayload.stream_options?.include_usage ?? false })),
8929
+ queueWaitMs: result.queueWaitMs
8930
+ };
8931
+ },
8932
+ logPayloadSize: (p) => logPayloadSizeInfo(p, selectedModel)
8933
+ },
8934
+ strategies: createChatCompletionsStrategies("Completions(→Responses)"),
8935
+ headersCapture
8936
+ });
8937
+ }
8938
+ function recordDroppedCcParamsWarning(reqCtx, model, droppedParams) {
8939
+ const message = `Chat Completions -> Responses translation dropped unsupported params: ${droppedParams.join(", ")}`;
8940
+ if (reqCtx.warningMessages.some((warning) => warning.code === DROPPED_CC_PARAMS_WARNING_CODE && warning.message === message)) return;
8941
+ consola.warn(`[CC→Responses] model=${model} ${message}`);
8942
+ reqCtx.addWarningMessage({
8943
+ code: DROPPED_CC_PARAMS_WARNING_CODE,
8944
+ message
8945
+ });
8946
+ if (reqCtx.tuiLogId) tuiLogger.updateRequest(reqCtx.tuiLogId, { tags: ["dropped-params"] });
8947
+ }
8948
+ function createChatCompletionsStrategies(label) {
8949
+ return [
7492
8950
  createNetworkRetryStrategy(),
7493
8951
  createTokenRefreshStrategy(),
7494
8952
  createAutoTruncateStrategy({
7495
8953
  truncate: (p, model, truncOpts) => autoTruncateOpenAI(p, model, truncOpts),
7496
8954
  resanitize: (p) => sanitizeOpenAIMessages(p),
7497
8955
  isEnabled: () => state.autoTruncate,
7498
- label: "Completions"
8956
+ label
7499
8957
  })
7500
8958
  ];
8959
+ }
8960
+ async function executeRequestWithAdapter(opts) {
8961
+ const { c, payload, originalPayload, selectedModel, reqCtx, adapter, strategies, headersCapture } = opts;
7501
8962
  let truncateResult;
7502
8963
  try {
7503
8964
  const result = await executeRequestPipeline({
@@ -7648,7 +9109,7 @@ chatCompletionRoutes.post("/", async (c) => {
7648
9109
  });
7649
9110
  //#endregion
7650
9111
  //#region src/routes/config/route.ts
7651
- /** Current effective runtime configuration (read-only, sanitized) */
9112
+ /** Current effective runtime configuration and editable config.yaml routes */
7652
9113
  const configRoutes = new Hono();
7653
9114
  configRoutes.get("/", (c) => {
7654
9115
  return c.json({
@@ -7659,13 +9120,21 @@ configRoutes.get("/", (c) => {
7659
9120
  immutableThinkingMessages: state.immutableThinkingMessages,
7660
9121
  dedupToolCalls: state.dedupToolCalls,
7661
9122
  contextEditingMode: state.contextEditingMode,
9123
+ contextEditingTrigger: state.contextEditingTrigger,
9124
+ contextEditingKeepTools: state.contextEditingKeepTools,
9125
+ contextEditingKeepThinking: state.contextEditingKeepThinking,
9126
+ toolSearchEnabled: state.toolSearchEnabled,
9127
+ autoCacheControl: state.autoCacheControl,
9128
+ nonDeferredTools: state.nonDeferredTools,
7662
9129
  rewriteSystemReminders: serializeRewriteSystemReminders(state.rewriteSystemReminders),
7663
9130
  stripReadToolResultTags: state.stripReadToolResultTags,
7664
9131
  systemPromptOverridesCount: state.systemPromptOverrides.length,
7665
9132
  normalizeResponsesCallIds: state.normalizeResponsesCallIds,
9133
+ upstreamWebSocket: state.upstreamWebSocket,
7666
9134
  fetchTimeout: state.fetchTimeout,
7667
9135
  streamIdleTimeout: state.streamIdleTimeout,
7668
9136
  staleRequestMaxAge: state.staleRequestMaxAge,
9137
+ modelRefreshInterval: state.modelRefreshInterval,
7669
9138
  shutdownGracefulWait: state.shutdownGracefulWait,
7670
9139
  shutdownAbortWait: state.shutdownAbortWait,
7671
9140
  historyLimit: state.historyLimit,
@@ -7674,6 +9143,49 @@ configRoutes.get("/", (c) => {
7674
9143
  rateLimiter: state.adaptiveRateLimitConfig ?? null
7675
9144
  });
7676
9145
  });
9146
+ configRoutes.get("/yaml", async (c) => {
9147
+ try {
9148
+ const config = await loadRawConfigFile();
9149
+ return c.json(config);
9150
+ } catch (error) {
9151
+ const message = error instanceof Error ? error.message : "Failed to read config.yaml";
9152
+ return c.json({
9153
+ error: "Failed to read config.yaml",
9154
+ details: [{
9155
+ field: "$",
9156
+ message
9157
+ }]
9158
+ }, 500);
9159
+ }
9160
+ });
9161
+ configRoutes.put("/yaml", async (c) => {
9162
+ let body;
9163
+ try {
9164
+ body = await c.req.json();
9165
+ } catch {
9166
+ return c.json({
9167
+ error: "Invalid JSON body",
9168
+ details: [{
9169
+ field: "$",
9170
+ message: "Request body must be valid JSON"
9171
+ }]
9172
+ }, 400);
9173
+ }
9174
+ const validation = validateConfigBody(body);
9175
+ if (!validation.valid) return c.json({
9176
+ error: "Config validation failed",
9177
+ details: validation.details
9178
+ }, 400);
9179
+ const doc = await loadEditableConfigDocument();
9180
+ mergeConfigIntoDocument(doc, validation.value);
9181
+ await fs.mkdir(PATHS.APP_DIR, { recursive: true });
9182
+ await fs.writeFile(PATHS.CONFIG_YAML, doc.toString(), "utf8");
9183
+ resetConfigCache();
9184
+ resetConfigManagedState();
9185
+ await applyConfigToState();
9186
+ const saved = await loadRawConfigFile();
9187
+ return c.json(saved);
9188
+ });
7677
9189
  /**
7678
9190
  * Serialize rewriteSystemReminders for API output.
7679
9191
  * CompiledRewriteRule contains RegExp objects which don't serialize well —
@@ -7688,6 +9200,308 @@ function serializeRewriteSystemReminders(value) {
7688
9200
  ...rule.modelPattern ? { model: rule.modelPattern.source } : {}
7689
9201
  }));
7690
9202
  }
9203
+ const TOP_LEVEL_KEYS = new Set([
9204
+ "proxy",
9205
+ "model_overrides",
9206
+ "stream_idle_timeout",
9207
+ "fetch_timeout",
9208
+ "stale_request_max_age",
9209
+ "model_refresh_interval",
9210
+ "shutdown",
9211
+ "history",
9212
+ "anthropic",
9213
+ "openai-responses",
9214
+ "rate_limiter",
9215
+ "compress_tool_results_before_truncate",
9216
+ "system_prompt_overrides",
9217
+ "system_prompt_prepend",
9218
+ "system_prompt_append"
9219
+ ]);
9220
+ const ANTHROPIC_KEYS = new Set([
9221
+ "strip_server_tools",
9222
+ "dedup_tool_calls",
9223
+ "immutable_thinking_messages",
9224
+ "strip_read_tool_result_tags",
9225
+ "context_editing",
9226
+ "context_editing_trigger",
9227
+ "context_editing_keep_tools",
9228
+ "context_editing_keep_thinking",
9229
+ "tool_search",
9230
+ "auto_cache_control",
9231
+ "non_deferred_tools",
9232
+ "rewrite_system_reminders"
9233
+ ]);
9234
+ const SHUTDOWN_KEYS = new Set(["graceful_wait", "abort_wait"]);
9235
+ const HISTORY_KEYS = new Set(["limit", "min_entries"]);
9236
+ const RESPONSES_KEYS = new Set(["normalize_call_ids", "upstream_websocket"]);
9237
+ const RATE_LIMITER_KEYS = new Set([
9238
+ "retry_interval",
9239
+ "request_interval",
9240
+ "recovery_timeout",
9241
+ "consecutive_successes"
9242
+ ]);
9243
+ const ANTHROPIC_COLLECTION_KEYS = new Set(["rewrite_system_reminders", "non_deferred_tools"]);
9244
+ function validateConfigBody(input) {
9245
+ if (!isPlainObject(input)) return {
9246
+ valid: false,
9247
+ details: [{
9248
+ field: "$",
9249
+ message: "Config body must be a JSON object",
9250
+ value: input
9251
+ }]
9252
+ };
9253
+ const body = input;
9254
+ const details = [];
9255
+ validateUnknownKeys(body, TOP_LEVEL_KEYS, "", details);
9256
+ if (hasOwn(body, "proxy")) validateOptionalString(body.proxy, "proxy", details, { validateUrlScheme: true });
9257
+ if (hasOwn(body, "model_overrides")) validateStringMap(body.model_overrides, "model_overrides", details);
9258
+ if (hasOwn(body, "stream_idle_timeout")) validateNonNegativeInteger(body.stream_idle_timeout, "stream_idle_timeout", details);
9259
+ if (hasOwn(body, "fetch_timeout")) validateNonNegativeInteger(body.fetch_timeout, "fetch_timeout", details);
9260
+ if (hasOwn(body, "stale_request_max_age")) validateNonNegativeInteger(body.stale_request_max_age, "stale_request_max_age", details);
9261
+ if (hasOwn(body, "model_refresh_interval")) validateNonNegativeInteger(body.model_refresh_interval, "model_refresh_interval", details);
9262
+ if (hasOwn(body, "compress_tool_results_before_truncate")) validateBoolean(body.compress_tool_results_before_truncate, "compress_tool_results_before_truncate", details);
9263
+ if (hasOwn(body, "system_prompt_prepend")) validateOptionalString(body.system_prompt_prepend, "system_prompt_prepend", details);
9264
+ if (hasOwn(body, "system_prompt_append")) validateOptionalString(body.system_prompt_append, "system_prompt_append", details);
9265
+ if (hasOwn(body, "system_prompt_overrides")) validateRewriteRules(body.system_prompt_overrides, "system_prompt_overrides", details, { allowModel: true });
9266
+ if (hasOwn(body, "shutdown")) validateNestedObject(body.shutdown, "shutdown", SHUTDOWN_KEYS, details, (value, path) => validateNonNegativeInteger(value, path, details));
9267
+ if (hasOwn(body, "history")) validateNestedObject(body.history, "history", HISTORY_KEYS, details, (value, path) => validateNonNegativeInteger(value, path, details));
9268
+ if (hasOwn(body, "openai-responses")) validateNestedObject(body["openai-responses"], "openai-responses", RESPONSES_KEYS, details, (value, path) => validateBoolean(value, path, details));
9269
+ if (hasOwn(body, "rate_limiter")) validateNestedObject(body.rate_limiter, "rate_limiter", RATE_LIMITER_KEYS, details, (value, path) => validateNonNegativeInteger(value, path, details));
9270
+ if (hasOwn(body, "anthropic")) validateAnthropic(body.anthropic, details);
9271
+ if (details.length > 0) return {
9272
+ valid: false,
9273
+ details
9274
+ };
9275
+ return {
9276
+ valid: true,
9277
+ value: input
9278
+ };
9279
+ }
9280
+ function validateAnthropic(value, details) {
9281
+ if (value === null) return;
9282
+ if (!isPlainObject(value)) {
9283
+ pushDetail(details, "anthropic", "Must be an object or null", value);
9284
+ return;
9285
+ }
9286
+ validateUnknownKeys(value, ANTHROPIC_KEYS, "anthropic", details);
9287
+ if (hasOwn(value, "strip_server_tools")) validateBoolean(value.strip_server_tools, "anthropic.strip_server_tools", details);
9288
+ if (hasOwn(value, "immutable_thinking_messages")) validateBoolean(value.immutable_thinking_messages, "anthropic.immutable_thinking_messages", details);
9289
+ if (hasOwn(value, "strip_read_tool_result_tags")) validateBoolean(value.strip_read_tool_result_tags, "anthropic.strip_read_tool_result_tags", details);
9290
+ if (hasOwn(value, "dedup_tool_calls")) {
9291
+ const allowed = new Set([
9292
+ false,
9293
+ true,
9294
+ "input",
9295
+ "result"
9296
+ ]);
9297
+ validateEnum(value.dedup_tool_calls, "anthropic.dedup_tool_calls", allowed, details);
9298
+ }
9299
+ if (hasOwn(value, "context_editing")) validateEnum(value.context_editing, "anthropic.context_editing", new Set([
9300
+ "off",
9301
+ "clear-thinking",
9302
+ "clear-tooluse",
9303
+ "clear-both"
9304
+ ]), details);
9305
+ if (hasOwn(value, "context_editing_trigger")) validateNonNegativeInteger(value.context_editing_trigger, "anthropic.context_editing_trigger", details);
9306
+ if (hasOwn(value, "context_editing_keep_tools")) validateNonNegativeInteger(value.context_editing_keep_tools, "anthropic.context_editing_keep_tools", details);
9307
+ if (hasOwn(value, "context_editing_keep_thinking")) validateNonNegativeInteger(value.context_editing_keep_thinking, "anthropic.context_editing_keep_thinking", details);
9308
+ if (hasOwn(value, "tool_search")) validateBoolean(value.tool_search, "anthropic.tool_search", details);
9309
+ if (hasOwn(value, "auto_cache_control")) validateBoolean(value.auto_cache_control, "anthropic.auto_cache_control", details);
9310
+ if (hasOwn(value, "non_deferred_tools")) validateStringArray(value.non_deferred_tools, "anthropic.non_deferred_tools", details);
9311
+ if (hasOwn(value, "rewrite_system_reminders")) {
9312
+ const rewrite = value.rewrite_system_reminders;
9313
+ if (typeof rewrite === "boolean") return;
9314
+ validateRewriteRules(rewrite, "anthropic.rewrite_system_reminders", details, { allowModel: false });
9315
+ }
9316
+ }
9317
+ function validateUnknownKeys(object, allowedKeys, parentPath, details) {
9318
+ for (const key of Object.keys(object)) {
9319
+ if (allowedKeys.has(key)) continue;
9320
+ pushDetail(details, parentPath ? `${parentPath}.${key}` : key, "Unknown config field", object[key]);
9321
+ }
9322
+ }
9323
+ function validateNestedObject(value, field, allowedKeys, details, validateValue) {
9324
+ if (value === null) return;
9325
+ if (!isPlainObject(value)) {
9326
+ pushDetail(details, field, "Must be an object or null", value);
9327
+ return;
9328
+ }
9329
+ validateUnknownKeys(value, allowedKeys, field, details);
9330
+ for (const [key, child] of Object.entries(value)) validateValue(child, `${field}.${key}`);
9331
+ }
9332
+ function validateStringMap(value, field, details) {
9333
+ if (value === null) return;
9334
+ if (!isPlainObject(value)) {
9335
+ pushDetail(details, field, "Must be an object or null", value);
9336
+ return;
9337
+ }
9338
+ for (const [key, target] of Object.entries(value)) {
9339
+ if (key.trim().length === 0) pushDetail(details, `${field}.${key}`, "Override key must be a non-empty string", key);
9340
+ if (typeof target !== "string" || target.trim().length === 0) pushDetail(details, `${field}.${key}`, "Override target must be a non-empty string", target);
9341
+ }
9342
+ }
9343
+ function validateStringArray(value, field, details) {
9344
+ if (value === null) return;
9345
+ if (!Array.isArray(value)) {
9346
+ pushDetail(details, field, "Must be an array of strings or null", value);
9347
+ return;
9348
+ }
9349
+ for (const [index, item] of value.entries()) if (typeof item !== "string" || item.trim().length === 0) pushDetail(details, `${field}.${index}`, "Must be a non-empty string", item);
9350
+ }
9351
+ function validateRewriteRules(value, field, details, options) {
9352
+ if (value === null) return;
9353
+ if (!Array.isArray(value)) {
9354
+ pushDetail(details, field, "Must be an array, boolean, or null", value);
9355
+ return;
9356
+ }
9357
+ for (const [index, item] of value.entries()) {
9358
+ const itemField = `${field}.${index}`;
9359
+ if (!isPlainObject(item)) {
9360
+ pushDetail(details, itemField, "Rule must be an object", item);
9361
+ continue;
9362
+ }
9363
+ validateUnknownKeys(item, options.allowModel ? new Set([
9364
+ "from",
9365
+ "to",
9366
+ "method",
9367
+ "model"
9368
+ ]) : new Set([
9369
+ "from",
9370
+ "to",
9371
+ "method"
9372
+ ]), itemField, details);
9373
+ if (typeof item.from !== "string" || item.from.length === 0) {
9374
+ pushDetail(details, `${itemField}.from`, "Must be a non-empty string", item.from);
9375
+ continue;
9376
+ }
9377
+ if (typeof item.to !== "string") pushDetail(details, `${itemField}.to`, "Must be a string", item.to);
9378
+ if (item.method !== void 0 && item.method !== "line" && item.method !== "regex") pushDetail(details, `${itemField}.method`, "Must be 'line' or 'regex'", item.method);
9379
+ if (!options.allowModel && hasOwn(item, "model")) pushDetail(details, `${itemField}.model`, "Field is not supported here", item.model);
9380
+ if (options.allowModel && item.model !== void 0 && typeof item.model !== "string") pushDetail(details, `${itemField}.model`, "Must be a string", item.model);
9381
+ if (details.some((detail) => detail.field.startsWith(`${itemField}.`))) continue;
9382
+ if (compileRewriteRule({
9383
+ from: item.from,
9384
+ to: item.to,
9385
+ ...item.method ? { method: item.method } : {},
9386
+ ...options.allowModel && typeof item.model === "string" ? { model: item.model } : {}
9387
+ }) === null) pushDetail(details, `${itemField}.from`, "Invalid rewrite rule regex", item.from);
9388
+ }
9389
+ }
9390
+ function validateOptionalString(value, field, details, options) {
9391
+ if (value === null) return;
9392
+ if (typeof value !== "string") {
9393
+ pushDetail(details, field, "Must be a string or null", value);
9394
+ return;
9395
+ }
9396
+ if (options?.validateUrlScheme) validateProxy(value, field, details);
9397
+ }
9398
+ function validateProxy(value, field, details) {
9399
+ try {
9400
+ const url = new URL(value);
9401
+ if (![
9402
+ "http:",
9403
+ "https:",
9404
+ "socks5:",
9405
+ "socks5h:"
9406
+ ].includes(url.protocol)) pushDetail(details, field, "Proxy must use http, https, socks5, or socks5h scheme", value);
9407
+ } catch {
9408
+ pushDetail(details, field, "Proxy must be a valid URL", value);
9409
+ }
9410
+ }
9411
+ function validateBoolean(value, field, details) {
9412
+ if (value === null) return;
9413
+ if (typeof value !== "boolean") pushDetail(details, field, "Must be a boolean or null", value);
9414
+ }
9415
+ function validateNonNegativeInteger(value, field, details) {
9416
+ if (value === null) return;
9417
+ if (!Number.isInteger(value) || Number(value) < 0) pushDetail(details, field, "Must be a non-negative integer or null", value);
9418
+ }
9419
+ function validateEnum(value, field, allowed, details) {
9420
+ if (value === null) return;
9421
+ if (!allowed.has(value)) pushDetail(details, field, `Must be one of: ${[...allowed].map(String).join(", ")}`, value);
9422
+ }
9423
+ function pushDetail(details, field, message, value) {
9424
+ details.push({
9425
+ field,
9426
+ message,
9427
+ ...value !== void 0 ? { value } : {}
9428
+ });
9429
+ }
9430
+ function hasOwn(object, key) {
9431
+ return Object.prototype.hasOwnProperty.call(object, key);
9432
+ }
9433
+ function isPlainObject(value) {
9434
+ return typeof value === "object" && value !== null && !Array.isArray(value);
9435
+ }
9436
+ async function loadEditableConfigDocument() {
9437
+ try {
9438
+ return parseExistingDocument(await fs.readFile(PATHS.CONFIG_YAML, "utf8"));
9439
+ } catch (err) {
9440
+ if (err.code === "ENOENT") return parseDocument("{}\n");
9441
+ throw err;
9442
+ }
9443
+ }
9444
+ function parseExistingDocument(content) {
9445
+ const doc = parseDocument(content.trim().length > 0 ? content : "{}\n");
9446
+ if (doc.errors.length > 0) throw new Error(doc.errors[0]?.message ?? "Invalid config.yaml");
9447
+ const raw = doc.toJSON();
9448
+ if (raw !== null && raw !== void 0 && (typeof raw !== "object" || Array.isArray(raw))) throw new Error("config.yaml must contain a top-level mapping");
9449
+ return doc;
9450
+ }
9451
+ function mergeConfigIntoDocument(doc, body) {
9452
+ if (hasOwn(body, "proxy")) setScalar(doc, ["proxy"], body.proxy);
9453
+ if (hasOwn(body, "stream_idle_timeout")) setScalar(doc, ["stream_idle_timeout"], body.stream_idle_timeout);
9454
+ if (hasOwn(body, "fetch_timeout")) setScalar(doc, ["fetch_timeout"], body.fetch_timeout);
9455
+ if (hasOwn(body, "stale_request_max_age")) setScalar(doc, ["stale_request_max_age"], body.stale_request_max_age);
9456
+ if (hasOwn(body, "model_refresh_interval")) setScalar(doc, ["model_refresh_interval"], body.model_refresh_interval);
9457
+ if (hasOwn(body, "compress_tool_results_before_truncate")) setScalar(doc, ["compress_tool_results_before_truncate"], body.compress_tool_results_before_truncate);
9458
+ if (hasOwn(body, "system_prompt_prepend")) setScalar(doc, ["system_prompt_prepend"], body.system_prompt_prepend);
9459
+ if (hasOwn(body, "system_prompt_append")) setScalar(doc, ["system_prompt_append"], body.system_prompt_append);
9460
+ if (hasOwn(body, "model_overrides")) replaceCollection(doc, ["model_overrides"], body.model_overrides);
9461
+ if (hasOwn(body, "system_prompt_overrides")) replaceCollection(doc, ["system_prompt_overrides"], body.system_prompt_overrides);
9462
+ if (hasOwn(body, "rate_limiter")) setNestedScalarContainer(doc, ["rate_limiter"], body.rate_limiter);
9463
+ if (hasOwn(body, "shutdown")) setNestedScalarContainer(doc, ["shutdown"], body.shutdown);
9464
+ if (hasOwn(body, "history")) setNestedScalarContainer(doc, ["history"], body.history);
9465
+ if (hasOwn(body, "openai-responses")) setNestedScalarContainer(doc, ["openai-responses"], body["openai-responses"]);
9466
+ if (hasOwn(body, "anthropic")) {
9467
+ const anthropic = body.anthropic;
9468
+ if (anthropic === null) doc.deleteIn(["anthropic"]);
9469
+ else if (anthropic) {
9470
+ setNestedScalarContainer(doc, ["anthropic"], anthropic, { excludeKeys: ANTHROPIC_COLLECTION_KEYS });
9471
+ if (hasOwn(anthropic, "rewrite_system_reminders")) {
9472
+ const rewrite = anthropic.rewrite_system_reminders;
9473
+ replaceCollection(doc, ["anthropic", "rewrite_system_reminders"], Array.isArray(rewrite) && rewrite.length === 0 ? false : rewrite);
9474
+ }
9475
+ if (hasOwn(anthropic, "non_deferred_tools")) replaceCollection(doc, ["anthropic", "non_deferred_tools"], anthropic.non_deferred_tools);
9476
+ }
9477
+ }
9478
+ }
9479
+ function setScalar(doc, path, value) {
9480
+ if (value === null || value === void 0) {
9481
+ doc.deleteIn(path);
9482
+ return;
9483
+ }
9484
+ doc.setIn(path, value);
9485
+ }
9486
+ function setNestedScalarContainer(doc, path, value, options) {
9487
+ if (value === null || value === void 0) {
9488
+ doc.deleteIn(path);
9489
+ return;
9490
+ }
9491
+ if (!isPlainObject(value)) return;
9492
+ for (const [key, child] of Object.entries(value)) {
9493
+ if (options?.excludeKeys?.has(key)) continue;
9494
+ setScalar(doc, [...path, key], child);
9495
+ }
9496
+ }
9497
+ function replaceCollection(doc, path, value) {
9498
+ if (value === null || value === void 0) {
9499
+ doc.deleteIn(path);
9500
+ return;
9501
+ }
9502
+ doc.deleteIn(path);
9503
+ doc.setIn(path, value);
9504
+ }
7691
9505
  //#endregion
7692
9506
  //#region src/lib/openai/embeddings.ts
7693
9507
  const createEmbeddings = async (payload) => {
@@ -7743,7 +9557,9 @@ function handleGetEntries(c) {
7743
9557
  }
7744
9558
  function handleGetEntry(c) {
7745
9559
  if (!isHistoryEnabled()) return c.json({ error: "History recording is not enabled" }, 400);
7746
- const entry = getEntry(c.req.param("id"));
9560
+ const id = c.req.param("id");
9561
+ if (!id) return c.json({ error: "Entry id is required" }, 400);
9562
+ const entry = getEntry(id);
7747
9563
  if (!entry) return c.json({ error: "Entry not found" }, 404);
7748
9564
  return c.json(entry);
7749
9565
  }
@@ -7782,6 +9598,7 @@ function handleGetSessions(c) {
7782
9598
  function handleGetSession(c) {
7783
9599
  if (!isHistoryEnabled()) return c.json({ error: "History recording is not enabled" }, 400);
7784
9600
  const id = c.req.param("id");
9601
+ if (!id) return c.json({ error: "Session id is required" }, 400);
7785
9602
  const session = getSession(id);
7786
9603
  if (!session) return c.json({ error: "Session not found" }, 404);
7787
9604
  const query = c.req.query();
@@ -7796,7 +9613,9 @@ function handleGetSession(c) {
7796
9613
  }
7797
9614
  function handleDeleteSession(c) {
7798
9615
  if (!isHistoryEnabled()) return c.json({ error: "History recording is not enabled" }, 400);
7799
- if (!deleteSession(c.req.param("id"))) return c.json({ error: "Session not found" }, 404);
9616
+ const id = c.req.param("id");
9617
+ if (!id) return c.json({ error: "Session id is required" }, 400);
9618
+ if (!deleteSession(id)) return c.json({ error: "Session not found" }, 404);
7800
9619
  return c.json({
7801
9620
  success: true,
7802
9621
  message: "Session deleted"
@@ -7805,6 +9624,7 @@ function handleDeleteSession(c) {
7805
9624
  //#endregion
7806
9625
  //#region src/routes/history/route.ts
7807
9626
  const historyRoutes = new Hono();
9627
+ historyRoutes.get("/", (c) => c.redirect("/ui#/v/activity", 302));
7808
9628
  historyRoutes.all("/", (c) => c.json({ error: "Not Found" }, 404));
7809
9629
  /** API endpoints */
7810
9630
  historyRoutes.get("/api/entries", handleGetEntries);
@@ -8535,7 +10355,7 @@ function processToolBlocks(messages, tools) {
8535
10355
  orphanedToolResultCount++;
8536
10356
  continue;
8537
10357
  }
8538
- } else if (block.type !== "text" && block.type !== "image") {
10358
+ } else if (block.type !== "text" && block.type !== "image" && block.type !== "document") {
8539
10359
  orphanedToolResultCount++;
8540
10360
  continue;
8541
10361
  }
@@ -9082,12 +10902,12 @@ function isAnthropicFeatureUnsupported(modelId, feature) {
9082
10902
  /**
9083
10903
  * Context editing is supported by a broader set of models:
9084
10904
  * - Claude Haiku 4.5
9085
- * - Claude Sonnet 4/4.5
10905
+ * - Claude Sonnet 4/4.5/4.6
9086
10906
  * - Claude Opus 4/4.1/4.5/4.6
9087
10907
  */
9088
10908
  function modelSupportsContextEditing(modelId) {
9089
10909
  const normalized = normalizeForMatching(modelId);
9090
- return normalized.startsWith("claude-haiku-4-5") || normalized.startsWith("claude-sonnet-4-5") || normalized.startsWith("claude-sonnet-4") || normalized.startsWith("claude-opus-4-5") || normalized.startsWith("claude-opus-4-6") || normalized.startsWith("claude-opus-4-1") || normalized.startsWith("claude-opus-4");
10910
+ return normalized.startsWith("claude-haiku-4-5") || normalized.startsWith("claude-sonnet-4-6") || normalized.startsWith("claude-sonnet-4-5") || normalized === "claude-sonnet-4" || normalized.startsWith("claude-opus-4-5") || normalized.startsWith("claude-opus-4-6") || normalized.startsWith("claude-opus-4-1") || normalized === "claude-opus-41" || normalized === "claude-opus-4";
9091
10911
  }
9092
10912
  /**
9093
10913
  * Check if context editing is enabled for a model.
@@ -9099,11 +10919,12 @@ function isContextEditingEnabled(modelId) {
9099
10919
  }
9100
10920
  /**
9101
10921
  * Tool search is supported by:
10922
+ * - Claude Sonnet 4.5/4.6
9102
10923
  * - Claude Opus 4.5/4.6
9103
10924
  */
9104
10925
  function modelSupportsToolSearch(modelId) {
9105
10926
  const normalized = normalizeForMatching(modelId);
9106
- return normalized.startsWith("claude-opus-4-5") || normalized.startsWith("claude-opus-4-6");
10927
+ return normalized.startsWith("claude-sonnet-4-5") || normalized.startsWith("claude-sonnet-4-6") || normalized.startsWith("claude-opus-4-5") || normalized.startsWith("claude-opus-4-6");
9107
10928
  }
9108
10929
  /**
9109
10930
  * Check if a model supports adaptive thinking (from model metadata).
@@ -9156,10 +10977,9 @@ function buildAnthropicBetaHeaders(modelId, resolvedModel, opts) {
9156
10977
  */
9157
10978
  function buildContextManagement(mode, hasThinking) {
9158
10979
  if (mode === "off") return;
9159
- const triggerType = "input_tokens";
9160
- const triggerValue = 1e5;
9161
- const keepCount = 3;
9162
- const thinkingKeepTurns = 1;
10980
+ const triggerValue = state.contextEditingTrigger;
10981
+ const keepCount = state.contextEditingKeepTools;
10982
+ const thinkingKeepTurns = state.contextEditingKeepThinking;
9163
10983
  const edits = [];
9164
10984
  if ((mode === "clear-thinking" || mode === "clear-both") && hasThinking) edits.push({
9165
10985
  type: "clear_thinking_20251015",
@@ -9171,7 +10991,7 @@ function buildContextManagement(mode, hasThinking) {
9171
10991
  if (mode === "clear-tooluse" || mode === "clear-both") edits.push({
9172
10992
  type: "clear_tool_uses_20250919",
9173
10993
  trigger: {
9174
- type: triggerType,
10994
+ type: "input_tokens",
9175
10995
  value: triggerValue
9176
10996
  },
9177
10997
  keep: {
@@ -9301,8 +11121,10 @@ function buildHistoryToolStubs(historyToolNames) {
9301
11121
  */
9302
11122
  function processToolPipeline(tools, modelId, messages) {
9303
11123
  const existingNamesLower = new Set(tools.map((t) => t.name.toLowerCase()));
9304
- const toolSearchEnabled = modelSupportsToolSearch(modelId);
11124
+ const toolSearchEnabled = state.toolSearchEnabled && modelSupportsToolSearch(modelId);
9305
11125
  const historyToolNames = toolSearchEnabled ? collectHistoryToolNames(messages) : void 0;
11126
+ const nonDeferred = [];
11127
+ const deferred = [];
9306
11128
  const result = [];
9307
11129
  if (toolSearchEnabled) result.push({
9308
11130
  name: TOOL_SEARCH_TOOL_NAME,
@@ -9311,11 +11133,11 @@ function processToolPipeline(tools, modelId, messages) {
9311
11133
  });
9312
11134
  for (const tool of tools) {
9313
11135
  const normalized = tool.type ? tool : ensureInputSchema(tool);
9314
- const shouldDefer = toolSearchEnabled && tool.defer_loading !== false && !NON_DEFERRED_TOOL_NAMES.has(tool.name) && !historyToolNames?.has(tool.name);
9315
- result.push(shouldDefer ? {
11136
+ if (toolSearchEnabled && tool.defer_loading !== false && !NON_DEFERRED_TOOL_NAMES.has(tool.name) && !state.nonDeferredTools.includes(tool.name) && !historyToolNames?.has(tool.name)) deferred.push({
9316
11137
  ...normalized,
9317
11138
  defer_loading: true
9318
- } : normalized);
11139
+ });
11140
+ else nonDeferred.push(normalized);
9319
11141
  }
9320
11142
  for (const name of CLAUDE_CODE_OFFICIAL_TOOLS) if (!existingNamesLower.has(name.toLowerCase())) {
9321
11143
  const stub = {
@@ -9323,19 +11145,24 @@ function processToolPipeline(tools, modelId, messages) {
9323
11145
  description: `Claude Code ${name} tool`,
9324
11146
  input_schema: EMPTY_INPUT_SCHEMA
9325
11147
  };
9326
- result.push(stub);
11148
+ nonDeferred.push(stub);
9327
11149
  }
9328
11150
  if (historyToolNames) {
9329
- const allResultNames = new Set(result.map((t) => t.name));
11151
+ const allResultNames = new Set([
11152
+ ...nonDeferred,
11153
+ ...deferred,
11154
+ ...result
11155
+ ].map((t) => t.name));
9330
11156
  for (const name of historyToolNames) if (!allResultNames.has(name)) {
9331
11157
  consola.debug(`[ToolPipeline] Injecting stub for history-referenced tool: ${name}`);
9332
- result.push({
11158
+ nonDeferred.push({
9333
11159
  name,
9334
11160
  description: `Stub for tool referenced in conversation history`,
9335
11161
  input_schema: EMPTY_INPUT_SCHEMA
9336
11162
  });
9337
11163
  }
9338
11164
  }
11165
+ result.push(...nonDeferred, ...deferred);
9339
11166
  const deferredCount = result.filter((t) => t.defer_loading === true).length;
9340
11167
  const injectedCount = result.length - tools.length;
9341
11168
  if (deferredCount > 0 || injectedCount > 0) consola.debug(`[ToolPipeline] ${result.length} tools (${deferredCount} deferred, ${injectedCount} injected, tool_search: ${toolSearchEnabled})`);
@@ -9422,10 +11249,13 @@ function stripServerTools(tools) {
9422
11249
  }
9423
11250
  //#endregion
9424
11251
  //#region src/lib/anthropic/request-preparation.ts
9425
- const COPILOT_REJECTED_FIELDS = new Set(["output_config", "inference_geo"]);
11252
+ const COPILOT_REJECTED_FIELDS = new Set(["inference_geo"]);
11253
+ const CACHE_CONTROL_BREAKPOINT_LIMIT = 4;
11254
+ const EPHEMERAL_CACHE_CONTROL = { type: "ephemeral" };
9426
11255
  function prepareAnthropicRequest(payload, opts) {
9427
11256
  const wire = buildWirePayload(payload);
9428
- adjustThinkingBudget(wire);
11257
+ adjustThinkingBudget(wire, opts?.resolvedModel);
11258
+ addToolsAndSystemCacheControl(wire);
9429
11259
  const model = wire.model;
9430
11260
  const messages = wire.messages;
9431
11261
  const thinking = wire.thinking;
@@ -9469,18 +11299,94 @@ function buildWirePayload(payload) {
9469
11299
  if (wire.tools) wire.tools = stripServerTools(wire.tools);
9470
11300
  return wire;
9471
11301
  }
9472
- function adjustThinkingBudget(wire) {
11302
+ function adjustThinkingBudget(wire, resolvedModel) {
9473
11303
  const thinking = wire.thinking;
9474
11304
  if (!thinking || thinking.type === "disabled" || thinking.type === "adaptive") return;
9475
11305
  const budgetTokens = thinking.budget_tokens;
9476
11306
  if (!budgetTokens) return;
11307
+ let adjusted = budgetTokens;
11308
+ const minBudget = resolvedModel?.capabilities?.supports?.min_thinking_budget;
11309
+ const maxBudget = resolvedModel?.capabilities?.supports?.max_thinking_budget;
9477
11310
  const maxTokens = wire.max_tokens;
9478
- if (budgetTokens >= maxTokens) {
9479
- const adjusted = maxTokens - 1;
11311
+ if (typeof minBudget === "number" && adjusted < minBudget) adjusted = minBudget;
11312
+ if (typeof maxBudget === "number" && adjusted > maxBudget) adjusted = maxBudget;
11313
+ if (typeof maxTokens === "number" && adjusted >= maxTokens) adjusted = maxTokens - 1;
11314
+ if (adjusted !== budgetTokens) {
9480
11315
  wire.thinking.budget_tokens = adjusted;
9481
11316
  consola.debug(`[DirectAnthropic] Capped thinking.budget_tokens: ${budgetTokens} → ${adjusted} (max_tokens=${maxTokens})`);
9482
11317
  }
9483
11318
  }
11319
+ function addToolsAndSystemCacheControl(wire) {
11320
+ if (!state.autoCacheControl) return;
11321
+ let remaining = CACHE_CONTROL_BREAKPOINT_LIMIT - countExistingCacheBreakpoints(wire);
11322
+ if (remaining <= 0) return;
11323
+ const toolResult = addToolCacheControl(wire.tools, remaining);
11324
+ if (toolResult.changed) {
11325
+ wire.tools = toolResult.tools;
11326
+ remaining = toolResult.remaining;
11327
+ }
11328
+ if (remaining <= 0) return;
11329
+ const systemResult = addSystemCacheControl(wire.system, remaining);
11330
+ if (systemResult.changed) wire.system = systemResult.system;
11331
+ }
11332
+ function countExistingCacheBreakpoints(wire) {
11333
+ return countCacheControlOccurrences(wire.messages) + countCacheControlOccurrences(wire.system) + countCacheControlOccurrences(wire.tools);
11334
+ }
11335
+ function countCacheControlOccurrences(value) {
11336
+ if (Array.isArray(value)) return value.reduce((count, item) => count + countCacheControlOccurrences(item), 0);
11337
+ if (!value || typeof value !== "object") return 0;
11338
+ const record = value;
11339
+ let count = record.cache_control ? 1 : 0;
11340
+ for (const nested of Object.values(record)) if (nested !== record.cache_control) count += countCacheControlOccurrences(nested);
11341
+ return count;
11342
+ }
11343
+ function addToolCacheControl(tools, remaining) {
11344
+ if (!tools || remaining <= 0) return {
11345
+ tools,
11346
+ remaining,
11347
+ changed: false
11348
+ };
11349
+ const lastNonDeferredIndex = findLastIndex(tools, (tool) => tool.defer_loading !== true);
11350
+ if (lastNonDeferredIndex < 0 || tools[lastNonDeferredIndex].cache_control) return {
11351
+ tools,
11352
+ remaining,
11353
+ changed: false
11354
+ };
11355
+ const updatedTools = [...tools];
11356
+ updatedTools[lastNonDeferredIndex] = {
11357
+ ...updatedTools[lastNonDeferredIndex],
11358
+ cache_control: EPHEMERAL_CACHE_CONTROL
11359
+ };
11360
+ return {
11361
+ tools: updatedTools,
11362
+ remaining: remaining - 1,
11363
+ changed: true
11364
+ };
11365
+ }
11366
+ function addSystemCacheControl(system, remaining) {
11367
+ if (!Array.isArray(system) || remaining <= 0) return {
11368
+ system,
11369
+ changed: false
11370
+ };
11371
+ const lastSystemIndex = system.length - 1;
11372
+ if (lastSystemIndex < 0 || system[lastSystemIndex].cache_control) return {
11373
+ system,
11374
+ changed: false
11375
+ };
11376
+ const updatedSystem = [...system];
11377
+ updatedSystem[lastSystemIndex] = {
11378
+ ...updatedSystem[lastSystemIndex],
11379
+ cache_control: EPHEMERAL_CACHE_CONTROL
11380
+ };
11381
+ return {
11382
+ system: updatedSystem,
11383
+ changed: true
11384
+ };
11385
+ }
11386
+ function findLastIndex(items, predicate) {
11387
+ for (let index = items.length - 1; index >= 0; index--) if (predicate(items[index])) return index;
11388
+ return -1;
11389
+ }
9484
11390
  //#endregion
9485
11391
  //#region src/lib/anthropic/client.ts
9486
11392
  /**
@@ -10193,7 +12099,9 @@ async function handleMessages(c) {
10193
12099
  consola.debug(`[AnthropicRouting] ${anthropicPayload.model}: ${routingDecision.reason}`);
10194
12100
  const reqCtx = getRequestContextManager().create({
10195
12101
  endpoint: "anthropic-messages",
10196
- tuiLogId
12102
+ sessionId: getSessionIdFromHeaders(c.req.raw.headers),
12103
+ tuiLogId,
12104
+ rawPath: c.req.path
10197
12105
  });
10198
12106
  reqCtx.setOriginalRequest({
10199
12107
  model: clientModelName ?? anthropicPayload.model,
@@ -10459,39 +12367,18 @@ messagesRoutes.post("/count_tokens", async (c) => {
10459
12367
  //#endregion
10460
12368
  //#region src/routes/models/route.ts
10461
12369
  const modelsRoutes = new Hono();
10462
- const EPOCH_ISO = (/* @__PURE__ */ new Date(0)).toISOString();
10463
- function formatModel(model) {
10464
- return {
10465
- id: model.id,
10466
- object: "model",
10467
- type: "model",
10468
- created: 0,
10469
- created_at: EPOCH_ISO,
10470
- owned_by: model.vendor,
10471
- display_name: model.name,
10472
- capabilities: model.capabilities
10473
- };
10474
- }
10475
- function formatModelDetail(model) {
10476
- return {
10477
- ...formatModel(model),
10478
- version: model.version,
10479
- preview: model.preview,
10480
- model_picker_enabled: model.model_picker_enabled,
10481
- model_picker_category: model.model_picker_category,
10482
- supported_endpoints: model.supported_endpoints,
10483
- billing: model.billing
10484
- };
12370
+ /** Strip internal fields that should not be exposed to external consumers. */
12371
+ function stripInternalFields(model) {
12372
+ const { request_headers: _requestHeaders, ...rest } = model;
12373
+ return rest;
10485
12374
  }
10486
12375
  modelsRoutes.get("/", async (c) => {
10487
12376
  try {
10488
12377
  if (!state.models) await cacheModels();
10489
- const formatter = c.req.query("detail") === "true" ? formatModelDetail : formatModel;
10490
- const models = state.models?.data.map((m) => formatter(m));
12378
+ const models = state.models?.data.map(stripInternalFields);
10491
12379
  return c.json({
10492
- object: "list",
10493
- data: models,
10494
- has_more: false
12380
+ object: state.models?.object ?? "list",
12381
+ data: models
10495
12382
  });
10496
12383
  } catch (error) {
10497
12384
  return forwardError(c, error);
@@ -10508,7 +12395,7 @@ modelsRoutes.get("/:model", async (c) => {
10508
12395
  param: "model",
10509
12396
  code: "model_not_found"
10510
12397
  } }, 404);
10511
- return c.json(formatModelDetail(model));
12398
+ return c.json(stripInternalFields(model));
10512
12399
  } catch (error) {
10513
12400
  return forwardError(c, error);
10514
12401
  }
@@ -10533,7 +12420,9 @@ async function handleResponses(c) {
10533
12420
  const tuiLogId = c.get("tuiLogId");
10534
12421
  const reqCtx = getRequestContextManager().create({
10535
12422
  endpoint: "openai-responses",
10536
- tuiLogId
12423
+ sessionId: getSessionIdFromHeaders(c.req.raw.headers) ?? resolveResponseSessionId(payload.previous_response_id),
12424
+ tuiLogId,
12425
+ rawPath: c.req.path
10537
12426
  });
10538
12427
  reqCtx.setOriginalRequest({
10539
12428
  model: clientModel,
@@ -10560,6 +12449,8 @@ async function handleDirectResponses(opts) {
10560
12449
  const headersCapture = {};
10561
12450
  const adapter = createResponsesAdapter(selectedModel, headersCapture, (wireRequest) => {
10562
12451
  reqCtx.setAttemptWireRequest(wireRequest);
12452
+ }, (transport) => {
12453
+ reqCtx.setAttemptTransport(transport);
10563
12454
  });
10564
12455
  const strategies = createResponsesStrategies();
10565
12456
  try {
@@ -10576,6 +12467,8 @@ async function handleDirectResponses(opts) {
10576
12467
  const response = pipelineResult.response;
10577
12468
  if (!payload.stream) {
10578
12469
  const responsesResponse = response;
12470
+ if (!reqCtx.sessionId && responsesResponse.id) reqCtx.setSessionId(responsesResponse.id);
12471
+ registerResponseSession(responsesResponse.id, reqCtx.sessionId);
10579
12472
  const content = responsesOutputToContent(responsesResponse.output);
10580
12473
  reqCtx.complete({
10581
12474
  success: true,
@@ -10628,6 +12521,8 @@ async function handleDirectResponses(opts) {
10628
12521
  } catch {}
10629
12522
  }
10630
12523
  }
12524
+ if (!reqCtx.sessionId && acc.responseId) reqCtx.setSessionId(acc.responseId);
12525
+ registerResponseSession(acc.responseId, reqCtx.sessionId);
10631
12526
  const responseData = buildResponsesResponseData(acc, payload.model);
10632
12527
  reqCtx.complete(responseData);
10633
12528
  } catch (error) {
@@ -10675,7 +12570,18 @@ statusRoutes.get("/", async (c) => {
10675
12570
  const now = Date.now();
10676
12571
  const limiter = getAdaptiveRateLimiter();
10677
12572
  const limiterStatus = limiter?.getStatus();
12573
+ let serverStatus;
12574
+ if (getIsShuttingDown()) serverStatus = "shutting_down";
12575
+ else if (state.copilotToken && state.githubToken) serverStatus = "healthy";
12576
+ else serverStatus = "unhealthy";
12577
+ const rateLimiter = limiter && limiterStatus ? {
12578
+ enabled: true,
12579
+ ...limiterStatus,
12580
+ config: limiter.getConfig()
12581
+ } : { enabled: false };
10678
12582
  const memStats = getMemoryPressureStats();
12583
+ const requestTelemetry = getRequestTelemetrySnapshot(now);
12584
+ const upstreamWs = peekUpstreamWsManager();
10679
12585
  let activeCount = 0;
10680
12586
  try {
10681
12587
  activeCount = getRequestContextManager().activeCount;
@@ -10692,7 +12598,7 @@ statusRoutes.get("/", async (c) => {
10692
12598
  };
10693
12599
  } catch {}
10694
12600
  return c.json({
10695
- status: getIsShuttingDown() ? "shutting_down" : state.copilotToken && state.githubToken ? "healthy" : "unhealthy",
12601
+ status: serverStatus,
10696
12602
  uptime: serverStartTime > 0 ? Math.floor((now - serverStartTime) / 1e3) : 0,
10697
12603
  version,
10698
12604
  vsCodeVersion: state.vsCodeVersion ?? null,
@@ -10704,11 +12610,8 @@ statusRoutes.get("/", async (c) => {
10704
12610
  },
10705
12611
  quota,
10706
12612
  activeRequests: { count: activeCount },
10707
- rateLimiter: limiterStatus ? {
10708
- enabled: true,
10709
- ...limiterStatus,
10710
- config: limiter.getConfig()
10711
- } : { enabled: false },
12613
+ rateLimiter,
12614
+ requestTelemetry,
10712
12615
  memory: {
10713
12616
  heapUsedMB: memStats.heapUsedMB,
10714
12617
  heapLimitMB: memStats.heapLimitMB,
@@ -10720,6 +12623,12 @@ statusRoutes.get("/", async (c) => {
10720
12623
  models: {
10721
12624
  totalCount: state.models?.data.length ?? 0,
10722
12625
  availableCount: state.modelIds.size
12626
+ },
12627
+ upstream_websocket: {
12628
+ enabled: state.upstreamWebSocket,
12629
+ active_connections: upstreamWs?.activeCount ?? 0,
12630
+ consecutive_fallbacks: upstreamWs?.consecutiveFallbacks ?? 0,
12631
+ temporarily_disabled: upstreamWs?.temporarilyDisabled ?? false
10723
12632
  }
10724
12633
  });
10725
12634
  });
@@ -10760,7 +12669,29 @@ function getMimeType(path) {
10760
12669
  }
10761
12670
  //#endregion
10762
12671
  //#region src/routes/ui/route.ts
10763
- const uiRoutes = new Hono();
12672
+ const UI_MOUNT_PREFIX = "/ui";
12673
+ const TEXT_RESPONSE_TYPES = [
12674
+ "text/html",
12675
+ "text/css",
12676
+ "text/javascript",
12677
+ "application/javascript",
12678
+ "application/x-javascript"
12679
+ ];
12680
+ const JAVASCRIPT_RESPONSE_TYPES = [
12681
+ "text/javascript",
12682
+ "application/javascript",
12683
+ "application/x-javascript"
12684
+ ];
12685
+ const VITE_DEV_PATH_PREFIXES = [
12686
+ "/@vite",
12687
+ "/@fs/",
12688
+ "/@id/",
12689
+ "/src/",
12690
+ "/node_modules/",
12691
+ "/__vite_ping",
12692
+ "/__open-in-editor",
12693
+ "/vite.svg"
12694
+ ];
10764
12695
  /**
10765
12696
  * Resolve a UI directory that exists at runtime.
10766
12697
  * In dev mode this file lives at src/routes/ui/ — 3 levels below project root.
@@ -10771,6 +12702,68 @@ function resolveUiDir(subpath) {
10771
12702
  return candidates.find((candidate) => existsSync(candidate)) ?? candidates[0];
10772
12703
  }
10773
12704
  const uiDir = resolveUiDir("history-v3/dist");
12705
+ function stripTrailingSlash(pathname) {
12706
+ return pathname !== "/" ? pathname.replace(/\/+$/, "") : pathname;
12707
+ }
12708
+ function escapeRegExp(value) {
12709
+ return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
12710
+ }
12711
+ function joinUrlPath(basePathname, requestPathname) {
12712
+ const normalizedBase = stripTrailingSlash(basePathname);
12713
+ const normalizedRequest = requestPathname.startsWith("/") ? requestPathname : `/${requestPathname}`;
12714
+ if (normalizedBase === "/") return normalizedRequest;
12715
+ return `${normalizedBase}${normalizedRequest}`;
12716
+ }
12717
+ function stripUiMountPrefix(pathname) {
12718
+ if (pathname === UI_MOUNT_PREFIX) return "/";
12719
+ if (pathname.startsWith(`${UI_MOUNT_PREFIX}/`)) return pathname.slice(3);
12720
+ return pathname;
12721
+ }
12722
+ function isTextResponse(contentType) {
12723
+ return TEXT_RESPONSE_TYPES.some((value) => contentType?.includes(value));
12724
+ }
12725
+ function isJavaScriptResponse(contentType) {
12726
+ return JAVASCRIPT_RESPONSE_TYPES.some((value) => contentType?.includes(value));
12727
+ }
12728
+ function rewriteBaseUrlLiteral(content) {
12729
+ return content.replace(/("BASE_URL"\s*:\s*")\/(")/g, `$1${UI_MOUNT_PREFIX}/$2`);
12730
+ }
12731
+ function rewriteQuotedPathPrefixes(content, fromPrefix, toPrefix) {
12732
+ const quotePattern = new RegExp(`(["'\`])${escapeRegExp(fromPrefix)}`, "g");
12733
+ return content.replace(quotePattern, `$1${toPrefix}`);
12734
+ }
12735
+ function rewriteParenthesizedPathPrefixes(content, fromPrefix, toPrefix) {
12736
+ const parenthesizedPattern = new RegExp(`(\\()${escapeRegExp(fromPrefix)}`, "g");
12737
+ return content.replace(parenthesizedPattern, `$1${toPrefix}`);
12738
+ }
12739
+ function rewriteProxyTextResponse(content, externalUiUrl, contentType) {
12740
+ const externalBase = new URL(externalUiUrl);
12741
+ const externalBasePath = stripTrailingSlash(externalBase.pathname);
12742
+ const rewrittenBase = rewriteBaseUrlLiteral(content);
12743
+ const rewriteBareParenthesizedPaths = !isJavaScriptResponse(contentType);
12744
+ return VITE_DEV_PATH_PREFIXES.reduce((current, vitePathPrefix) => {
12745
+ const externalPathPrefix = externalBasePath === "/" ? vitePathPrefix : `${externalBasePath}${vitePathPrefix}`;
12746
+ const localPathPrefix = `${UI_MOUNT_PREFIX}${vitePathPrefix}`;
12747
+ const absoluteExternalPrefix = `${externalBase.origin}${externalPathPrefix}`;
12748
+ const rewrittenQuotedRelative = rewriteQuotedPathPrefixes(rewriteQuotedPathPrefixes(current, absoluteExternalPrefix, localPathPrefix), externalPathPrefix, localPathPrefix);
12749
+ if (!rewriteBareParenthesizedPaths) return rewrittenQuotedRelative;
12750
+ return rewriteParenthesizedPathPrefixes(rewriteParenthesizedPathPrefixes(rewrittenQuotedRelative, absoluteExternalPrefix, localPathPrefix), externalPathPrefix, localPathPrefix);
12751
+ }, rewrittenBase);
12752
+ }
12753
+ function rewriteLocationHeader(location, externalUiUrl) {
12754
+ const externalBase = new URL(externalUiUrl);
12755
+ const resolvedLocation = new URL(location, externalBase);
12756
+ if (!(resolvedLocation.origin === externalBase.origin)) return location;
12757
+ const externalBasePath = stripTrailingSlash(externalBase.pathname);
12758
+ return `${UI_MOUNT_PREFIX}${externalBasePath === "/" ? resolvedLocation.pathname : resolvedLocation.pathname.startsWith(`${externalBasePath}/`) || resolvedLocation.pathname === externalBasePath ? resolvedLocation.pathname.slice(externalBasePath.length) || "/" : resolvedLocation.pathname}${resolvedLocation.search}${resolvedLocation.hash}`;
12759
+ }
12760
+ function normalizeExternalUiUrl(externalUiUrl) {
12761
+ const url = new URL(externalUiUrl);
12762
+ if (!["http:", "https:"].includes(url.protocol)) throw new Error(`Unsupported external UI URL protocol: ${url.protocol}. Use http:// or https://`);
12763
+ if (url.search || url.hash) throw new Error("--external-ui-url must not include query parameters or hash fragments");
12764
+ const normalizedPathname = stripTrailingSlash(url.pathname);
12765
+ return `${url.origin}${normalizedPathname === "/" ? "" : normalizedPathname}`;
12766
+ }
10774
12767
  async function serveIndexHtml(c) {
10775
12768
  try {
10776
12769
  await access(join(uiDir, "index.html"), constants.R_OK);
@@ -10780,7 +12773,7 @@ async function serveIndexHtml(c) {
10780
12773
  return c.notFound();
10781
12774
  }
10782
12775
  }
10783
- async function serveAsset(c) {
12776
+ async function serveStaticAsset(c) {
10784
12777
  const assetsIdx = c.req.path.indexOf("/assets/");
10785
12778
  if (assetsIdx === -1) return c.notFound();
10786
12779
  const filePath = c.req.path.slice(assetsIdx);
@@ -10797,14 +12790,59 @@ async function serveAsset(c) {
10797
12790
  return c.notFound();
10798
12791
  }
10799
12792
  }
10800
- uiRoutes.get("/", serveIndexHtml);
10801
- uiRoutes.get("/assets/*", serveAsset);
12793
+ async function proxyExternalUiRequest(c, externalUiUrl) {
12794
+ const requestUrl = new URL(c.req.url);
12795
+ const externalBase = new URL(externalUiUrl);
12796
+ const upstreamUrl = new URL(externalBase);
12797
+ upstreamUrl.pathname = joinUrlPath(externalBase.pathname, stripUiMountPrefix(c.req.path));
12798
+ upstreamUrl.search = requestUrl.search;
12799
+ const requestHeaders = new Headers(c.req.raw.headers);
12800
+ requestHeaders.set("host", upstreamUrl.host);
12801
+ requestHeaders.set("x-forwarded-host", requestUrl.host);
12802
+ requestHeaders.set("x-forwarded-proto", requestUrl.protocol.replace(":", ""));
12803
+ const body = c.req.method === "GET" || c.req.method === "HEAD" ? void 0 : await c.req.raw.arrayBuffer();
12804
+ const upstreamResponse = await fetch(upstreamUrl, {
12805
+ method: c.req.method,
12806
+ headers: requestHeaders,
12807
+ body,
12808
+ redirect: "manual"
12809
+ });
12810
+ const responseHeaders = new Headers(upstreamResponse.headers);
12811
+ const location = responseHeaders.get("location");
12812
+ if (location) responseHeaders.set("location", rewriteLocationHeader(location, externalUiUrl));
12813
+ if (isTextResponse(responseHeaders.get("content-type"))) {
12814
+ const rewritten = rewriteProxyTextResponse(await upstreamResponse.text(), externalUiUrl, responseHeaders.get("content-type"));
12815
+ responseHeaders.delete("content-length");
12816
+ return new Response(rewritten, {
12817
+ status: upstreamResponse.status,
12818
+ statusText: upstreamResponse.statusText,
12819
+ headers: responseHeaders
12820
+ });
12821
+ }
12822
+ return new Response(upstreamResponse.body, {
12823
+ status: upstreamResponse.status,
12824
+ statusText: upstreamResponse.statusText,
12825
+ headers: responseHeaders
12826
+ });
12827
+ }
12828
+ function createUiRoutes(options = {}) {
12829
+ const uiRoutes = new Hono();
12830
+ if (options.externalUiUrl) {
12831
+ const normalizedExternalUiUrl = normalizeExternalUiUrl(options.externalUiUrl);
12832
+ uiRoutes.all("/", (c) => proxyExternalUiRequest(c, normalizedExternalUiUrl));
12833
+ uiRoutes.all("/*", (c) => proxyExternalUiRequest(c, normalizedExternalUiUrl));
12834
+ return uiRoutes;
12835
+ }
12836
+ uiRoutes.get("/", serveIndexHtml);
12837
+ uiRoutes.get("/assets/*", serveStaticAsset);
12838
+ return uiRoutes;
12839
+ }
10802
12840
  //#endregion
10803
12841
  //#region src/routes/index.ts
10804
12842
  /**
10805
12843
  * Register all HTTP routes on the given Hono app.
10806
12844
  */
10807
- function registerHttpRoutes(app) {
12845
+ function registerHttpRoutes(app, options = {}) {
10808
12846
  app.route("/chat/completions", chatCompletionRoutes);
10809
12847
  app.route("/models", modelsRoutes);
10810
12848
  app.route("/embeddings", embeddingsRoutes);
@@ -10820,7 +12858,7 @@ function registerHttpRoutes(app) {
10820
12858
  app.route("/api/config", configRoutes);
10821
12859
  app.route("/api/logs", logsRoutes);
10822
12860
  app.route("/history", historyRoutes);
10823
- app.route("/ui", uiRoutes);
12861
+ app.route("/ui", createUiRoutes(options));
10824
12862
  }
10825
12863
  /**
10826
12864
  * Register all WebSocket routes on the given Hono app.
@@ -10831,41 +12869,44 @@ function registerWsRoutes(app, wsUpgrade) {
10831
12869
  }
10832
12870
  //#endregion
10833
12871
  //#region src/server.ts
10834
- const server = new Hono();
10835
- server.onError((error, c) => {
10836
- if (c.req.header("upgrade")?.toLowerCase() === "websocket") {
10837
- consola.debug("WebSocket error:", error);
10838
- return c.text("", 500);
10839
- }
10840
- consola.error(`Unhandled route error in ${c.req.method} ${c.req.path}:`, error);
10841
- return forwardError(c, error);
10842
- });
10843
- const browserProbePaths = new Set(["/favicon.ico", "/.well-known/appspecific/com.chrome.devtools.json"]);
10844
- server.notFound((c) => {
10845
- if (browserProbePaths.has(c.req.path)) return c.body(null, 204);
10846
- return c.json({ error: "Not Found" }, 404);
10847
- });
10848
- server.use(async (_c, next) => {
10849
- await applyConfigToState();
10850
- await ensureValidCopilotToken();
10851
- await next();
10852
- });
10853
- server.use(tuiMiddleware());
10854
- server.use(cors());
10855
- server.use(trimTrailingSlash());
10856
- server.get("/", (c) => c.text("Server running"));
10857
- server.get("/health", (c) => {
10858
- const healthy = Boolean(state.copilotToken && state.githubToken);
10859
- return c.json({
10860
- status: healthy ? "healthy" : "unhealthy",
10861
- checks: {
10862
- copilotToken: Boolean(state.copilotToken),
10863
- githubToken: Boolean(state.githubToken),
10864
- models: Boolean(state.models)
10865
- }
10866
- }, healthy ? 200 : 503);
10867
- });
10868
- registerHttpRoutes(server);
12872
+ function createServer(options = {}) {
12873
+ const server = new Hono();
12874
+ server.onError((error, c) => {
12875
+ if (c.req.header("upgrade")?.toLowerCase() === "websocket") {
12876
+ consola.debug("WebSocket error:", error);
12877
+ return c.text("", 500);
12878
+ }
12879
+ consola.error(`Unhandled route error in ${c.req.method} ${c.req.path}:`, error);
12880
+ return forwardError(c, error);
12881
+ });
12882
+ const browserProbePaths = new Set(["/favicon.ico", "/.well-known/appspecific/com.chrome.devtools.json"]);
12883
+ server.notFound((c) => {
12884
+ if (browserProbePaths.has(c.req.path)) return c.body(null, 204);
12885
+ return c.json({ error: "Not Found" }, 404);
12886
+ });
12887
+ server.use(async (_c, next) => {
12888
+ await applyConfigToState();
12889
+ await ensureValidCopilotToken();
12890
+ await next();
12891
+ });
12892
+ server.use(tuiMiddleware());
12893
+ server.use(cors());
12894
+ server.use(trimTrailingSlash());
12895
+ server.get("/", (c) => c.text("Server running"));
12896
+ server.get("/health", (c) => {
12897
+ const healthy = Boolean(state.copilotToken && state.githubToken);
12898
+ return c.json({
12899
+ status: healthy ? "healthy" : "unhealthy",
12900
+ checks: {
12901
+ copilotToken: Boolean(state.copilotToken),
12902
+ githubToken: Boolean(state.githubToken),
12903
+ models: Boolean(state.models)
12904
+ }
12905
+ }, healthy ? 200 : 503);
12906
+ });
12907
+ registerHttpRoutes(server, { externalUiUrl: options.externalUiUrl });
12908
+ return server;
12909
+ }
10869
12910
  //#endregion
10870
12911
  //#region src/start.ts
10871
12912
  /** Format limit values as "Xk" or "?" if not available */
@@ -10917,6 +12958,13 @@ async function runServer(options) {
10917
12958
  consola.error(`Invalid account type: "${options.accountType}". Must be one of: ${VALID_ACCOUNT_TYPES.join(", ")}`);
10918
12959
  process.exit(1);
10919
12960
  }
12961
+ let externalUiUrl;
12962
+ if (options.externalUiUrl) try {
12963
+ externalUiUrl = normalizeExternalUiUrl(options.externalUiUrl);
12964
+ } catch (error) {
12965
+ consola.error(error instanceof Error ? error.message : String(error));
12966
+ process.exit(1);
12967
+ }
10920
12968
  if (options.verbose) {
10921
12969
  consola.level = 5;
10922
12970
  setCliState({ verbose: true });
@@ -10948,11 +12996,13 @@ async function runServer(options) {
10948
12996
  });
10949
12997
  initHistory(true, state.historyLimit);
10950
12998
  startMemoryPressureMonitor();
12999
+ await initRequestTelemetry();
10951
13000
  const contextManager = initRequestContextManager();
10952
13001
  registerContextConsumers(contextManager);
10953
13002
  setConnectedDataFactory(() => contextManager.getAll().map((ctx) => ({
10954
13003
  id: ctx.id,
10955
13004
  endpoint: ctx.endpoint,
13005
+ rawPath: ctx.rawPath,
10956
13006
  state: ctx.state,
10957
13007
  startTime: ctx.startTime,
10958
13008
  durationMs: ctx.durationMs,
@@ -10971,11 +13021,14 @@ async function runServer(options) {
10971
13021
  process.exit(1);
10972
13022
  }
10973
13023
  consola.info(`Available models:\n${state.models?.data.map((m) => formatModelInfo(m)).join("\n")}`);
13024
+ const stopModelRefreshLoop = startModelRefreshLoop();
10974
13025
  await loadPersistedLimits();
10975
13026
  const serverUrl = `http://${options.host ?? "localhost"}:${options.port}`;
13027
+ const server = createServer({ externalUiUrl });
10976
13028
  const wsAdapter = await createWebSocketAdapter(server);
10977
13029
  registerWsRoutes(server, wsAdapter.upgradeWebSocket);
10978
- consola.info(`Web UI: ${serverUrl}/ui`);
13030
+ if (externalUiUrl) consola.info(`Web UI: ${serverUrl}/ui (proxied from ${externalUiUrl})`);
13031
+ else consola.info(`Web UI: ${serverUrl}/ui`);
10979
13032
  const bunWebSocket = typeof globalThis.Bun !== "undefined" ? (await import("hono/bun")).websocket : void 0;
10980
13033
  let serverInstance;
10981
13034
  try {
@@ -10994,7 +13047,11 @@ async function runServer(options) {
10994
13047
  setServerInstance(serverInstance);
10995
13048
  setupShutdownHandlers();
10996
13049
  if (wsAdapter.injectWebSocket && serverInstance.nodeServer) wsAdapter.injectWebSocket(serverInstance.nodeServer);
10997
- await waitForShutdown();
13050
+ try {
13051
+ await waitForShutdown();
13052
+ } finally {
13053
+ stopModelRefreshLoop();
13054
+ }
10998
13055
  }
10999
13056
  const start = defineCommand({
11000
13057
  meta: {
@@ -11053,6 +13110,10 @@ const start = defineCommand({
11053
13110
  type: "boolean",
11054
13111
  default: true,
11055
13112
  description: "Reactive auto-truncate: retries with truncated payload on limit errors (disable with --no-auto-truncate)"
13113
+ },
13114
+ "external-ui-url": {
13115
+ type: "string",
13116
+ description: "Proxy /ui to an external frontend dev/build server (for example http://localhost:5173)"
11056
13117
  }
11057
13118
  },
11058
13119
  run({ args }) {
@@ -11078,7 +13139,9 @@ const start = defineCommand({
11078
13139
  "http-proxy-from-env",
11079
13140
  "httpProxyFromEnv",
11080
13141
  "auto-truncate",
11081
- "autoTruncate"
13142
+ "autoTruncate",
13143
+ "external-ui-url",
13144
+ "externalUiUrl"
11082
13145
  ]);
11083
13146
  const unknownArgs = Object.keys(args).filter((key) => !knownArgs.has(key));
11084
13147
  if (unknownArgs.length > 0) consola.warn(`Unknown argument(s): ${unknownArgs.map((a) => `--${a}`).join(", ")}`);
@@ -11092,7 +13155,8 @@ const start = defineCommand({
11092
13155
  showGitHubToken: args["show-github-token"],
11093
13156
  proxy: args.proxy,
11094
13157
  httpProxyFromEnv: args["http-proxy-from-env"],
11095
- autoTruncate: args["auto-truncate"]
13158
+ autoTruncate: args["auto-truncate"],
13159
+ externalUiUrl: args["external-ui-url"]
11096
13160
  });
11097
13161
  }
11098
13162
  });