@hsupu/copilot-api 0.7.23 → 0.8.1-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/config.example.yaml +4 -0
  2. package/dist/dist-8vhX0s4a.mjs +429 -0
  3. package/dist/dist-8vhX0s4a.mjs.map +1 -0
  4. package/dist/main.mjs +884 -248
  5. package/dist/main.mjs.map +1 -1
  6. package/package.json +1 -3
  7. package/ui/history-v3/dist/assets/DashboardPage-B1uByGH4.js +1 -0
  8. package/ui/history-v3/dist/assets/DashboardPage-CabImjCg.css +1 -0
  9. package/ui/history-v3/dist/assets/HistoryPage-CcuFI_7q.js +3 -0
  10. package/ui/history-v3/dist/assets/HistoryPage-MBmz_wrp.css +1 -0
  11. package/ui/history-v3/dist/assets/LogsPage-CTboGme2.js +1 -0
  12. package/ui/history-v3/dist/assets/LogsPage-DnfMvMyI.css +1 -0
  13. package/ui/history-v3/dist/assets/ModelsPage-BCijEIH1.css +1 -0
  14. package/ui/history-v3/dist/assets/ModelsPage-B_sjc_hg.js +1 -0
  15. package/ui/history-v3/dist/assets/ProgressBar-BQxTnwIj.js +1 -0
  16. package/ui/history-v3/dist/assets/ProgressBar-B_0VSeE9.css +1 -0
  17. package/ui/history-v3/dist/assets/UsagePage-Nx_clyfV.css +1 -0
  18. package/ui/history-v3/dist/assets/UsagePage-Nxy64EGo.js +1 -0
  19. package/ui/history-v3/dist/assets/index-D5HcAJB-.css +1 -0
  20. package/ui/history-v3/dist/assets/index-DOp9l2lW.js +2 -0
  21. package/ui/history-v3/dist/assets/useFormatters-BBIZmSf2.js +1 -0
  22. package/ui/history-v3/dist/assets/{vendor-BGG3lVOP.js → vendor-BJ2Uq5vY.js} +1 -1
  23. package/ui/history-v3/dist/assets/vue-CYcm9SLm.js +1 -0
  24. package/ui/history-v3/dist/index.html +3 -4
  25. package/ui/history-v1/index.html +0 -149
  26. package/ui/history-v1/script.js +0 -1799
  27. package/ui/history-v1/styles.css +0 -1467
  28. package/ui/history-v3/dist/assets/index-CaOzq3V0.js +0 -3
  29. package/ui/history-v3/dist/assets/index-Dfh3zN1X.css +0 -1
  30. package/ui/history-v3/dist/assets/vue-CJ6HbDRX.js +0 -1
package/dist/main.mjs CHANGED
@@ -14,7 +14,6 @@ import { randomBytes, randomUUID } from "node:crypto";
14
14
  import pc from "picocolors";
15
15
  import { existsSync, promises, readFileSync, readdirSync } from "node:fs";
16
16
  import invariant from "tiny-invariant";
17
- import { serve } from "srvx";
18
17
  import { Hono } from "hono";
19
18
  import { events } from "fetch-event-stream";
20
19
  import { cors } from "hono/cors";
@@ -22,6 +21,12 @@ import { trimTrailingSlash } from "hono/trailing-slash";
22
21
  import { streamSSE } from "hono/streaming";
23
22
 
24
23
  //#region src/lib/state.ts
24
+ /** Epoch ms when the server started (set once in runServer) */
25
+ let serverStartTime = 0;
26
+ /** Set the server start time (called once from runServer) */
27
+ function setServerStartTime(ts) {
28
+ serverStartTime = ts;
29
+ }
25
30
  /**
26
31
  * Rebuild model lookup indexes from state.models.
27
32
  * Called by cacheModels() in production; call directly in tests after setting state.models.
@@ -42,6 +47,7 @@ const state = {
42
47
  compressToolResultsBeforeTruncate: true,
43
48
  contextEditingMode: "off",
44
49
  stripServerTools: false,
50
+ immutableThinkingMessages: false,
45
51
  dedupToolCalls: false,
46
52
  fetchTimeout: 300,
47
53
  historyLimit: 200,
@@ -77,16 +83,15 @@ function generateId(randomLength = 7) {
77
83
  }
78
84
 
79
85
  //#endregion
80
- //#region src/lib/history/ws.ts
81
- /**
82
- * WebSocket support for History API.
83
- * Enables real-time updates when new requests are recorded.
84
- */
85
- /** Track connected WebSocket clients */
86
- const clients = /* @__PURE__ */ new Set();
87
- /** Register a new WebSocket client and send connection confirmation */
86
+ //#region src/lib/ws/index.ts
87
+ /** Connected clients indexed by their raw WebSocket instance */
88
+ const clients = /* @__PURE__ */ new Map();
89
+ /** Register a new WebSocket client (starts with no topic subscriptions = receive all) */
88
90
  function addClient(ws) {
89
- clients.add(ws);
91
+ clients.set(ws, {
92
+ ws,
93
+ topics: /* @__PURE__ */ new Set()
94
+ });
90
95
  const msg = {
91
96
  type: "connected",
92
97
  data: { clientCount: clients.size },
@@ -104,19 +109,41 @@ function getClientCount() {
104
109
  }
105
110
  /** Close all connected WebSocket clients */
106
111
  function closeAllClients() {
107
- for (const client of clients) try {
108
- client.close(1001, "Server shutting down");
112
+ for (const { ws } of clients.values()) try {
113
+ ws.close(1001, "Server shutting down");
109
114
  } catch {}
110
115
  clients.clear();
111
116
  }
112
- function broadcast(message) {
117
+ /** Handle an incoming message from a client (topic subscription) */
118
+ function handleClientMessage(ws, data) {
119
+ try {
120
+ const parsed = JSON.parse(data);
121
+ if (!isSubscribeMessage(parsed)) return;
122
+ const client = clients.get(ws);
123
+ if (!client) return;
124
+ client.topics = new Set(parsed.topics);
125
+ consola.debug(`[WS] Client subscribed to topics: [${[...client.topics].join(", ")}]`);
126
+ } catch {}
127
+ }
128
+ /**
129
+ * Broadcast a message to clients subscribed to a specific topic.
130
+ *
131
+ * - Clients with no subscriptions (empty topics) receive the message (wildcard).
132
+ * - Clients subscribed to the given topic receive the message.
133
+ * - Clients subscribed to other topics (but not this one) are skipped.
134
+ */
135
+ function broadcast(message, topic) {
136
+ if (clients.size === 0) return;
113
137
  const data = JSON.stringify(message);
114
- for (const client of clients) try {
115
- if (client.readyState === WebSocket.OPEN) client.send(data);
116
- else clients.delete(client);
117
- } catch (error) {
118
- consola.debug("WebSocket send failed, removing client:", error);
119
- clients.delete(client);
138
+ for (const [rawWs, client] of clients) {
139
+ if (client.topics.size > 0 && !client.topics.has(topic)) continue;
140
+ try {
141
+ if (rawWs.readyState === WebSocket.OPEN) rawWs.send(data);
142
+ else clients.delete(rawWs);
143
+ } catch (error) {
144
+ consola.debug("WebSocket send failed, removing client:", error);
145
+ clients.delete(rawWs);
146
+ }
120
147
  }
121
148
  }
122
149
  /** Called when a new entry is recorded */
@@ -126,7 +153,7 @@ function notifyEntryAdded(summary) {
126
153
  type: "entry_added",
127
154
  data: summary,
128
155
  timestamp: Date.now()
129
- });
156
+ }, "history");
130
157
  }
131
158
  /** Called when an entry is updated (e.g., response received) */
132
159
  function notifyEntryUpdated(summary) {
@@ -135,7 +162,7 @@ function notifyEntryUpdated(summary) {
135
162
  type: "entry_updated",
136
163
  data: summary,
137
164
  timestamp: Date.now()
138
- });
165
+ }, "history");
139
166
  }
140
167
  /** Called when stats change */
141
168
  function notifyStatsUpdated(stats) {
@@ -144,7 +171,7 @@ function notifyStatsUpdated(stats) {
144
171
  type: "stats_updated",
145
172
  data: stats,
146
173
  timestamp: Date.now()
147
- });
174
+ }, "history");
148
175
  }
149
176
  /** Called when all history is cleared */
150
177
  function notifyHistoryCleared() {
@@ -153,7 +180,7 @@ function notifyHistoryCleared() {
153
180
  type: "history_cleared",
154
181
  data: null,
155
182
  timestamp: Date.now()
156
- });
183
+ }, "history");
157
184
  }
158
185
  /** Called when a session is deleted */
159
186
  function notifySessionDeleted(sessionId) {
@@ -162,7 +189,65 @@ function notifySessionDeleted(sessionId) {
162
189
  type: "session_deleted",
163
190
  data: { sessionId },
164
191
  timestamp: Date.now()
165
- });
192
+ }, "history");
193
+ }
194
+ /** Called when active request state changes (topic: "requests") */
195
+ function notifyActiveRequestChanged(data) {
196
+ if (clients.size === 0) return;
197
+ broadcast({
198
+ type: "active_request_changed",
199
+ data,
200
+ timestamp: Date.now()
201
+ }, "requests");
202
+ }
203
+ /** Called when rate limiter state changes (topic: "status") */
204
+ function notifyRateLimiterChanged(data) {
205
+ if (clients.size === 0) return;
206
+ broadcast({
207
+ type: "rate_limiter_changed",
208
+ data,
209
+ timestamp: Date.now()
210
+ }, "status");
211
+ }
212
+ /** Called when shutdown phase changes (topic: "status") */
213
+ function notifyShutdownPhaseChanged(data) {
214
+ if (clients.size === 0) return;
215
+ broadcast({
216
+ type: "shutdown_phase_changed",
217
+ data,
218
+ timestamp: Date.now()
219
+ }, "status");
220
+ }
221
+ /**
222
+ * Initialize the global WebSocket endpoint at `/ws`.
223
+ * Registers the route on the root Hono app using the shared WebSocket adapter.
224
+ *
225
+ * @param rootApp - The root Hono app instance
226
+ * @param upgradeWs - Shared WebSocket upgrade function from createWebSocketAdapter
227
+ */
228
+ function initWebSocket(rootApp, upgradeWs) {
229
+ rootApp.get("/ws", upgradeWs(() => ({
230
+ onOpen(_event, ws) {
231
+ addClient(ws.raw);
232
+ },
233
+ onClose(_event, ws) {
234
+ removeClient(ws.raw);
235
+ },
236
+ onMessage(event, ws) {
237
+ const raw = typeof event.data === "string" ? event.data : String(event.data);
238
+ handleClientMessage(ws.raw, raw);
239
+ },
240
+ onError(event, ws) {
241
+ consola.debug("WebSocket error:", event);
242
+ removeClient(ws.raw);
243
+ }
244
+ })));
245
+ }
246
+ /** Type guard for subscribe messages from the client */
247
+ function isSubscribeMessage(value) {
248
+ if (typeof value !== "object" || value === null) return false;
249
+ const msg = value;
250
+ return msg.type === "subscribe" && Array.isArray(msg.topics);
166
251
  }
167
252
 
168
253
  //#endregion
@@ -449,7 +534,9 @@ function updateEntry(id, update) {
449
534
  if (update.pipelineInfo) entry.pipelineInfo = update.pipelineInfo;
450
535
  if (update.durationMs !== void 0) entry.durationMs = update.durationMs;
451
536
  if (update.sseEvents) entry.sseEvents = update.sseEvents;
452
- if (update.httpHeaders) entry.httpHeaders = update.httpHeaders;
537
+ if (update.effectiveRequest) entry.effectiveRequest = update.effectiveRequest;
538
+ if (update.wireRequest) entry.wireRequest = update.wireRequest;
539
+ if (update.attempts) entry.attempts = update.attempts;
453
540
  if (update.response) {
454
541
  const session = historyState.sessions.get(entry.sessionId);
455
542
  if (session) {
@@ -468,13 +555,12 @@ function getEntry(id) {
468
555
  return entryIndex.get(id) ?? historyState.entries.find((e) => e.id === id);
469
556
  }
470
557
  /**
471
- * Efficient summary-only query for list views. Filters and paginates using
472
- * the lightweight summaryIndex instead of full entries.
473
- * Search matches against the pre-computed `searchText` field — O(n) string
474
- * includes instead of O(n*m*b) deep content block traversal.
558
+ * Efficient summary-only query for list views with cursor-based pagination.
559
+ * Filters using the lightweight summaryIndex. Search matches against the
560
+ * pre-computed `searchText` field.
475
561
  */
476
562
  function getHistorySummaries(options = {}) {
477
- const { page = 1, limit = 50, model, endpoint, success, from, to, search, sessionId } = options;
563
+ const { cursor, limit = 50, direction = "older", model, endpoint, success, from, to, search, sessionId } = options;
478
564
  let summaries = Array.from(summaryIndex.values());
479
565
  if (sessionId) summaries = summaries.filter((s) => s.sessionId === sessionId);
480
566
  if (model) {
@@ -495,16 +581,19 @@ function getHistorySummaries(options = {}) {
495
581
  return s.searchText.includes(needle);
496
582
  });
497
583
  }
498
- summaries.sort((a, b) => b.timestamp - a.timestamp);
584
+ summaries.sort((a, b) => b.timestamp - a.timestamp || b.id.localeCompare(a.id));
499
585
  const total = summaries.length;
500
- const totalPages = Math.ceil(total / limit);
501
- const start = (page - 1) * limit;
586
+ let startIdx = 0;
587
+ if (cursor) {
588
+ const cursorIdx = summaries.findIndex((s) => s.id === cursor);
589
+ if (cursorIdx !== -1) startIdx = direction === "older" ? cursorIdx + 1 : Math.max(0, cursorIdx - limit);
590
+ }
591
+ const entries = summaries.slice(startIdx, startIdx + limit);
502
592
  return {
503
- entries: summaries.slice(start, start + limit),
593
+ entries,
504
594
  total,
505
- page,
506
- limit,
507
- totalPages
595
+ nextCursor: startIdx + limit < total ? entries.at(-1)?.id ?? null : null,
596
+ prevCursor: startIdx > 0 ? entries[0]?.id ?? null : null
508
597
  };
509
598
  }
510
599
  function getSessions() {
@@ -518,17 +607,20 @@ function getSession(id) {
518
607
  return historyState.sessions.get(id);
519
608
  }
520
609
  function getSessionEntries(sessionId, options = {}) {
521
- const { page = 1, limit = 50 } = options;
610
+ const { cursor, limit = 50 } = options;
522
611
  const all = historyState.entries.filter((e) => e.sessionId === sessionId).sort((a, b) => a.timestamp - b.timestamp);
523
612
  const total = all.length;
524
- const totalPages = Math.max(1, Math.ceil(total / limit));
525
- const start = (page - 1) * limit;
613
+ let startIdx = 0;
614
+ if (cursor) {
615
+ const cursorIdx = all.findIndex((e) => e.id === cursor);
616
+ if (cursorIdx !== -1) startIdx = cursorIdx + 1;
617
+ }
618
+ const entries = all.slice(startIdx, startIdx + limit);
526
619
  return {
527
- entries: all.slice(start, start + limit),
620
+ entries,
528
621
  total,
529
- page,
530
- limit,
531
- totalPages
622
+ nextCursor: startIdx + limit < total ? entries.at(-1)?.id ?? null : null,
623
+ prevCursor: startIdx > 0 ? entries[0]?.id ?? null : null
532
624
  };
533
625
  }
534
626
  function clearHistory() {
@@ -758,6 +850,16 @@ function stopMemoryPressureMonitor() {
758
850
  timer = null;
759
851
  }
760
852
  }
853
+ /** Get memory pressure diagnostics */
854
+ function getMemoryPressureStats() {
855
+ const { heapUsed } = process.memoryUsage();
856
+ return {
857
+ totalEvictedCount,
858
+ currentMaxEntries: historyState.maxEntries,
859
+ heapUsedMB: Math.round(heapUsed / 1024 / 1024),
860
+ heapLimitMB: resolvedHeapLimit ? Math.round(resolvedHeapLimit / 1024 / 1024) : null
861
+ };
862
+ }
761
863
 
762
864
  //#endregion
763
865
  //#region src/lib/config/paths.ts
@@ -882,6 +984,7 @@ async function applyConfigToState() {
882
984
  if (config.anthropic) {
883
985
  const a = config.anthropic;
884
986
  if (a.strip_server_tools !== void 0) state.stripServerTools = a.strip_server_tools;
987
+ if (a.immutable_thinking_messages !== void 0) state.immutableThinkingMessages = a.immutable_thinking_messages;
885
988
  if (a.dedup_tool_calls !== void 0) state.dedupToolCalls = a.dedup_tool_calls === true ? "input" : a.dedup_tool_calls;
886
989
  if (a.strip_read_tool_result_tags !== void 0) state.stripReadToolResultTags = a.strip_read_tool_result_tags;
887
990
  if (a.context_editing !== void 0) state.contextEditingMode = a.context_editing;
@@ -2791,6 +2894,12 @@ const checkUsage = defineCommand({
2791
2894
 
2792
2895
  //#endregion
2793
2896
  //#region src/lib/fetch-utils.ts
2897
+ const SENSITIVE_HEADER_NAMES = new Set([
2898
+ "authorization",
2899
+ "proxy-authorization",
2900
+ "x-api-key",
2901
+ "api-key"
2902
+ ]);
2794
2903
  /**
2795
2904
  * Create an AbortSignal for fetch timeout if configured.
2796
2905
  * Controls the time from request start to receiving response headers.
@@ -2805,9 +2914,13 @@ function createFetchSignal() {
2805
2914
  * so headers are captured even for error responses.
2806
2915
  */
2807
2916
  function captureHttpHeaders(capture, requestHeaders, response) {
2808
- capture.request = { ...requestHeaders };
2917
+ capture.request = sanitizeHeadersForHistory(requestHeaders);
2809
2918
  capture.response = Object.fromEntries(response.headers.entries());
2810
2919
  }
2920
+ /** Return a copy of headers safe to persist in history/error artifacts. */
2921
+ function sanitizeHeadersForHistory(headers) {
2922
+ return Object.fromEntries(Object.entries(headers).map(([name, value]) => [name, SENSITIVE_HEADER_NAMES.has(name.toLowerCase()) ? "***" : value]));
2923
+ }
2811
2924
 
2812
2925
  //#endregion
2813
2926
  //#region src/lib/models/client.ts
@@ -3100,10 +3213,18 @@ var AdaptiveRateLimiter = class {
3100
3213
  */
3101
3214
  enterRateLimitedMode() {
3102
3215
  if (this.mode === "rate-limited") return;
3216
+ const previousMode = this.mode;
3103
3217
  this.mode = "rate-limited";
3104
3218
  this.rateLimitedAt = Date.now();
3105
3219
  this.consecutiveSuccesses = 0;
3106
3220
  consola.warn(`[RateLimiter] Entering rate-limited mode. Requests will be queued with exponential backoff (base: ${this.config.baseRetryIntervalSeconds}s).`);
3221
+ notifyRateLimiterChanged({
3222
+ mode: this.mode,
3223
+ previousMode,
3224
+ queueLength: this.queue.length,
3225
+ consecutiveSuccesses: this.consecutiveSuccesses,
3226
+ rateLimitedAt: this.rateLimitedAt
3227
+ });
3107
3228
  }
3108
3229
  /**
3109
3230
  * Check if we should try to recover to normal mode
@@ -3125,20 +3246,36 @@ var AdaptiveRateLimiter = class {
3125
3246
  * Start gradual recovery mode
3126
3247
  */
3127
3248
  startGradualRecovery() {
3249
+ const previousMode = this.mode;
3128
3250
  this.mode = "recovering";
3129
3251
  this.recoveryStepIndex = 0;
3130
3252
  this.rateLimitedAt = null;
3131
3253
  this.consecutiveSuccesses = 0;
3132
3254
  const firstInterval = this.config.gradualRecoverySteps[0] ?? 0;
3133
3255
  consola.info(`[RateLimiter] Starting ramp-up (${this.config.gradualRecoverySteps.length} steps, first interval: ${firstInterval}s)`);
3256
+ notifyRateLimiterChanged({
3257
+ mode: this.mode,
3258
+ previousMode,
3259
+ queueLength: this.queue.length,
3260
+ consecutiveSuccesses: this.consecutiveSuccesses,
3261
+ rateLimitedAt: this.rateLimitedAt
3262
+ });
3134
3263
  }
3135
3264
  /**
3136
3265
  * Complete recovery to normal mode
3137
3266
  */
3138
3267
  completeRecovery() {
3268
+ const previousMode = this.mode;
3139
3269
  this.mode = "normal";
3140
3270
  this.recoveryStepIndex = 0;
3141
3271
  consola.success("[RateLimiter] Exiting rate-limited mode.");
3272
+ notifyRateLimiterChanged({
3273
+ mode: this.mode,
3274
+ previousMode,
3275
+ queueLength: this.queue.length,
3276
+ consecutiveSuccesses: this.consecutiveSuccesses,
3277
+ rateLimitedAt: this.rateLimitedAt
3278
+ });
3142
3279
  }
3143
3280
  /**
3144
3281
  * Enqueue a request for later execution
@@ -3256,6 +3393,10 @@ var AdaptiveRateLimiter = class {
3256
3393
  rateLimitedAt: this.rateLimitedAt
3257
3394
  };
3258
3395
  }
3396
+ /** Get the effective configuration */
3397
+ getConfig() {
3398
+ return { ...this.config };
3399
+ }
3259
3400
  };
3260
3401
  /** Singleton instance */
3261
3402
  let rateLimiterInstance = null;
@@ -3490,10 +3631,8 @@ function createRequestContext(opts) {
3490
3631
  let _originalRequest = null;
3491
3632
  let _response = null;
3492
3633
  let _pipelineInfo = null;
3493
- let _preprocessInfo = null;
3494
3634
  let _sseEvents = null;
3495
3635
  let _httpHeaders = null;
3496
- const _sanitizationHistory = [];
3497
3636
  let _queueWaitMs = 0;
3498
3637
  const _attempts = [];
3499
3638
  /** Guard: once complete() or fail() is called, subsequent calls are no-ops */
@@ -3526,9 +3665,6 @@ function createRequestContext(opts) {
3526
3665
  get pipelineInfo() {
3527
3666
  return _pipelineInfo;
3528
3667
  },
3529
- get preprocessInfo() {
3530
- return _preprocessInfo;
3531
- },
3532
3668
  get httpHeaders() {
3533
3669
  return _httpHeaders;
3534
3670
  },
@@ -3549,18 +3685,8 @@ function createRequestContext(opts) {
3549
3685
  field: "originalRequest"
3550
3686
  });
3551
3687
  },
3552
- setPreprocessInfo(info) {
3553
- _preprocessInfo = info;
3554
- },
3555
- addSanitizationInfo(info) {
3556
- _sanitizationHistory.push(info);
3557
- },
3558
3688
  setPipelineInfo(info) {
3559
- _pipelineInfo = {
3560
- ..._preprocessInfo && { preprocessing: _preprocessInfo },
3561
- ..._sanitizationHistory.length > 0 && { sanitization: _sanitizationHistory },
3562
- ...info
3563
- };
3689
+ _pipelineInfo = info;
3564
3690
  emit({
3565
3691
  type: "updated",
3566
3692
  context: ctx,
@@ -3580,6 +3706,7 @@ function createRequestContext(opts) {
3580
3706
  const attempt = {
3581
3707
  index: _attempts.length,
3582
3708
  effectiveRequest: null,
3709
+ wireRequest: null,
3583
3710
  response: null,
3584
3711
  error: null,
3585
3712
  strategy: attemptOpts.strategy,
@@ -3603,6 +3730,10 @@ function createRequestContext(opts) {
3603
3730
  const attempt = ctx.currentAttempt;
3604
3731
  if (attempt) attempt.effectiveRequest = req;
3605
3732
  },
3733
+ setAttemptWireRequest(req) {
3734
+ const attempt = ctx.currentAttempt;
3735
+ if (attempt) attempt.wireRequest = req;
3736
+ },
3606
3737
  setAttemptResponse(response) {
3607
3738
  const attempt = ctx.currentAttempt;
3608
3739
  if (attempt) {
@@ -3643,24 +3774,6 @@ function createRequestContext(opts) {
3643
3774
  entry: ctx.toHistoryEntry()
3644
3775
  });
3645
3776
  },
3646
- completeFromStream(acc) {
3647
- const response = {
3648
- success: true,
3649
- model: acc.model,
3650
- usage: {
3651
- input_tokens: acc.inputTokens,
3652
- output_tokens: acc.outputTokens,
3653
- ...acc.cacheReadTokens > 0 && { cache_read_input_tokens: acc.cacheReadTokens },
3654
- ...acc.cacheCreationTokens > 0 && { cache_creation_input_tokens: acc.cacheCreationTokens }
3655
- },
3656
- content: acc.contentBlocks.length > 0 ? {
3657
- role: "assistant",
3658
- content: acc.contentBlocks
3659
- } : null,
3660
- stop_reason: acc.stopReason || void 0
3661
- };
3662
- ctx.complete(response);
3663
- },
3664
3777
  fail(model, error) {
3665
3778
  if (settled) return;
3666
3779
  settled = true;
@@ -3688,6 +3801,7 @@ function createRequestContext(opts) {
3688
3801
  });
3689
3802
  },
3690
3803
  toHistoryEntry() {
3804
+ const p = _originalRequest?.payload;
3691
3805
  const entry = {
3692
3806
  id,
3693
3807
  endpoint: opts.endpoint,
@@ -3698,7 +3812,10 @@ function createRequestContext(opts) {
3698
3812
  messages: _originalRequest?.messages,
3699
3813
  stream: _originalRequest?.stream,
3700
3814
  tools: _originalRequest?.tools,
3701
- system: _originalRequest?.system
3815
+ system: _originalRequest?.system,
3816
+ max_tokens: typeof p?.max_tokens === "number" ? p.max_tokens : void 0,
3817
+ temperature: typeof p?.temperature === "number" ? p.temperature : void 0,
3818
+ thinking: p?.thinking ?? void 0
3702
3819
  }
3703
3820
  };
3704
3821
  if (_response) entry.response = _response;
@@ -3707,12 +3824,38 @@ function createRequestContext(opts) {
3707
3824
  if (_pipelineInfo) entry.pipelineInfo = _pipelineInfo;
3708
3825
  if (_sseEvents) entry.sseEvents = _sseEvents;
3709
3826
  if (_httpHeaders) entry.httpHeaders = _httpHeaders;
3710
- if (_attempts.length > 1) entry.attempts = _attempts.map((a) => ({
3827
+ const finalAttempt = _attempts.at(-1);
3828
+ if (finalAttempt?.effectiveRequest) {
3829
+ const ep = finalAttempt.effectiveRequest;
3830
+ entry.effectiveRequest = {
3831
+ model: ep.model,
3832
+ format: ep.format,
3833
+ messageCount: ep.messages.length,
3834
+ messages: ep.messages,
3835
+ system: ep.payload?.system,
3836
+ payload: ep.payload
3837
+ };
3838
+ }
3839
+ if (finalAttempt?.wireRequest) {
3840
+ const wp = finalAttempt.wireRequest;
3841
+ entry.wireRequest = {
3842
+ model: wp.model,
3843
+ format: wp.format,
3844
+ messageCount: wp.messages.length,
3845
+ messages: wp.messages,
3846
+ system: wp.payload?.system,
3847
+ payload: wp.payload,
3848
+ headers: wp.headers
3849
+ };
3850
+ }
3851
+ if (_attempts.length > 0) entry.attempts = _attempts.map((a) => ({
3711
3852
  index: a.index,
3712
3853
  strategy: a.strategy,
3713
3854
  durationMs: a.durationMs,
3714
3855
  error: a.error?.message,
3715
- truncation: a.truncation
3856
+ truncation: a.truncation,
3857
+ sanitization: a.sanitization,
3858
+ effectiveMessageCount: a.effectiveRequest?.messages?.length
3716
3859
  }));
3717
3860
  return entry;
3718
3861
  }
@@ -3779,12 +3922,19 @@ function createRequestContextManager() {
3779
3922
  const { type, context } = rawEvent;
3780
3923
  switch (type) {
3781
3924
  case "state_changed":
3782
- if (rawEvent.previousState) emit({
3783
- type: "state_changed",
3784
- context,
3785
- previousState: rawEvent.previousState,
3786
- meta: rawEvent.meta
3787
- });
3925
+ if (rawEvent.previousState) {
3926
+ emit({
3927
+ type: "state_changed",
3928
+ context,
3929
+ previousState: rawEvent.previousState,
3930
+ meta: rawEvent.meta
3931
+ });
3932
+ notifyActiveRequestChanged({
3933
+ action: "state_changed",
3934
+ request: summarizeContext(context),
3935
+ activeCount: activeContexts.size
3936
+ });
3937
+ }
3788
3938
  break;
3789
3939
  case "updated":
3790
3940
  if (rawEvent.field) emit({
@@ -3800,6 +3950,11 @@ function createRequestContextManager() {
3800
3950
  entry: rawEvent.entry
3801
3951
  });
3802
3952
  activeContexts.delete(context.id);
3953
+ notifyActiveRequestChanged({
3954
+ action: "completed",
3955
+ requestId: context.id,
3956
+ activeCount: activeContexts.size
3957
+ });
3803
3958
  break;
3804
3959
  case "failed":
3805
3960
  if (rawEvent.entry) emit({
@@ -3808,10 +3963,30 @@ function createRequestContextManager() {
3808
3963
  entry: rawEvent.entry
3809
3964
  });
3810
3965
  activeContexts.delete(context.id);
3966
+ notifyActiveRequestChanged({
3967
+ action: "failed",
3968
+ requestId: context.id,
3969
+ activeCount: activeContexts.size
3970
+ });
3811
3971
  break;
3812
3972
  default: break;
3813
3973
  }
3814
3974
  }
3975
+ /** Build a lightweight summary of a context for WS broadcast */
3976
+ function summarizeContext(ctx) {
3977
+ return {
3978
+ id: ctx.id,
3979
+ endpoint: ctx.endpoint,
3980
+ state: ctx.state,
3981
+ startTime: ctx.startTime,
3982
+ durationMs: ctx.durationMs,
3983
+ model: ctx.originalRequest?.model,
3984
+ stream: ctx.originalRequest?.stream,
3985
+ attemptCount: ctx.attempts.length,
3986
+ currentStrategy: ctx.currentAttempt?.strategy,
3987
+ queueWaitMs: ctx.queueWaitMs
3988
+ };
3989
+ }
3815
3990
  return {
3816
3991
  create(opts) {
3817
3992
  const ctx = createRequestContext({
@@ -3824,6 +3999,11 @@ function createRequestContextManager() {
3824
3999
  type: "created",
3825
4000
  context: ctx
3826
4001
  });
4002
+ notifyActiveRequestChanged({
4003
+ action: "created",
4004
+ request: summarizeContext(ctx),
4005
+ activeCount: activeContexts.size
4006
+ });
3827
4007
  return ctx;
3828
4008
  },
3829
4009
  get(id) {
@@ -3857,10 +4037,26 @@ let serverInstance = null;
3857
4037
  let _isShuttingDown = false;
3858
4038
  let shutdownResolve = null;
3859
4039
  let shutdownAbortController = null;
4040
+ let shutdownDrainAbortController = null;
4041
+ let shutdownPhase = "idle";
4042
+ let shutdownPromise = null;
4043
+ /** Transition shutdown phase and broadcast via WebSocket */
4044
+ function setPhase(phase) {
4045
+ const prev = shutdownPhase;
4046
+ shutdownPhase = phase;
4047
+ if (prev !== phase) notifyShutdownPhaseChanged({
4048
+ phase,
4049
+ previousPhase: prev
4050
+ });
4051
+ }
3860
4052
  /** Check if the server is in shutdown state (used by middleware to reject new requests) */
3861
4053
  function getIsShuttingDown() {
3862
4054
  return _isShuttingDown;
3863
4055
  }
4056
+ /** Get the current shutdown phase */
4057
+ function getShutdownPhase() {
4058
+ return shutdownPhase;
4059
+ }
3864
4060
  /**
3865
4061
  * Get the shutdown abort signal.
3866
4062
  * Returns undefined before shutdown starts. During Phase 1–2 the signal is
@@ -3900,9 +4096,11 @@ function formatActiveRequestsSummary(requests) {
3900
4096
  async function drainActiveRequests(timeoutMs, tracker, opts) {
3901
4097
  const pollInterval = opts?.pollIntervalMs ?? DRAIN_POLL_INTERVAL_MS;
3902
4098
  const progressInterval = opts?.progressIntervalMs ?? DRAIN_PROGRESS_INTERVAL_MS;
4099
+ const abortSignal = opts?.abortSignal;
3903
4100
  const deadline = Date.now() + timeoutMs;
3904
4101
  let lastProgressLog = 0;
3905
4102
  while (Date.now() < deadline) {
4103
+ if (abortSignal?.aborted) return "aborted";
3906
4104
  const active = tracker.getActiveRequests();
3907
4105
  if (active.length === 0) return "drained";
3908
4106
  const now = Date.now();
@@ -3910,7 +4108,23 @@ async function drainActiveRequests(timeoutMs, tracker, opts) {
3910
4108
  lastProgressLog = now;
3911
4109
  consola.info(formatActiveRequestsSummary(active));
3912
4110
  }
3913
- await new Promise((resolve) => setTimeout(resolve, pollInterval));
4111
+ if (await new Promise((resolve) => {
4112
+ let settled = false;
4113
+ let onAbort;
4114
+ const finish = (value) => {
4115
+ if (settled) return;
4116
+ settled = true;
4117
+ if (abortSignal && onAbort) abortSignal.removeEventListener("abort", onAbort);
4118
+ resolve(value);
4119
+ };
4120
+ const timeoutId = setTimeout(() => finish("timer"), pollInterval);
4121
+ if (!abortSignal) return;
4122
+ onAbort = () => {
4123
+ clearTimeout(timeoutId);
4124
+ finish("aborted");
4125
+ };
4126
+ abortSignal.addEventListener("abort", onAbort, { once: true });
4127
+ }) === "aborted") return "aborted";
3914
4128
  }
3915
4129
  return "timeout";
3916
4130
  }
@@ -3935,6 +4149,7 @@ async function gracefulShutdown(signal, deps) {
3935
4149
  };
3936
4150
  _isShuttingDown = true;
3937
4151
  shutdownAbortController = new AbortController();
4152
+ setPhase("phase1");
3938
4153
  consola.info(`Received ${signal}, shutting down gracefully...`);
3939
4154
  try {
3940
4155
  (deps?.contextManager ?? getRequestContextManager()).stopReaper();
@@ -3959,8 +4174,13 @@ async function gracefulShutdown(signal, deps) {
3959
4174
  const activeCount = tracker.getActiveRequests().length;
3960
4175
  if (activeCount > 0) {
3961
4176
  consola.info(`Phase 2: Waiting up to ${gracefulWaitMs / 1e3}s for ${activeCount} active request(s)...`);
4177
+ setPhase("phase2");
4178
+ shutdownDrainAbortController = new AbortController();
3962
4179
  try {
3963
- if (await drainActiveRequests(gracefulWaitMs, tracker, drainOpts) === "drained") {
4180
+ if (await drainActiveRequests(gracefulWaitMs, tracker, {
4181
+ ...drainOpts,
4182
+ abortSignal: shutdownDrainAbortController.signal
4183
+ }) === "drained") {
3964
4184
  consola.info("All requests completed naturally");
3965
4185
  finalize(tracker);
3966
4186
  return;
@@ -3970,9 +4190,14 @@ async function gracefulShutdown(signal, deps) {
3970
4190
  }
3971
4191
  const remaining = tracker.getActiveRequests().length;
3972
4192
  consola.info(`Phase 3: Sending abort signal to ${remaining} remaining request(s), waiting up to ${abortWaitMs / 1e3}s...`);
4193
+ setPhase("phase3");
4194
+ shutdownDrainAbortController = new AbortController();
3973
4195
  shutdownAbortController.abort();
3974
4196
  try {
3975
- if (await drainActiveRequests(abortWaitMs, tracker, drainOpts) === "drained") {
4197
+ if (await drainActiveRequests(abortWaitMs, tracker, {
4198
+ ...drainOpts,
4199
+ abortSignal: shutdownDrainAbortController.signal
4200
+ }) === "drained") {
3976
4201
  consola.info("All requests completed after abort signal");
3977
4202
  finalize(tracker);
3978
4203
  return;
@@ -3980,6 +4205,7 @@ async function gracefulShutdown(signal, deps) {
3980
4205
  } catch (error) {
3981
4206
  consola.error("Error during Phase 3 drain:", error);
3982
4207
  }
4208
+ setPhase("phase4");
3983
4209
  const forceRemaining = tracker.getActiveRequests().length;
3984
4210
  consola.warn(`Phase 4: Force-closing ${forceRemaining} remaining request(s)`);
3985
4211
  if (server) try {
@@ -3992,22 +4218,41 @@ async function gracefulShutdown(signal, deps) {
3992
4218
  }
3993
4219
  /** Final cleanup after drain/force-close */
3994
4220
  function finalize(tracker) {
4221
+ setPhase("finalized");
4222
+ shutdownDrainAbortController = null;
3995
4223
  tracker.destroy();
3996
4224
  consola.info("Shutdown complete");
3997
4225
  shutdownResolve?.();
3998
4226
  }
4227
+ function handleShutdownSignal(signal, opts) {
4228
+ const shutdownFn = opts?.gracefulShutdownFn ?? ((shutdownSignal) => gracefulShutdown(shutdownSignal));
4229
+ const exitFn = opts?.exitFn ?? ((code) => process.exit(code));
4230
+ if (_isShuttingDown) {
4231
+ if (shutdownPhase === "phase2") {
4232
+ consola.warn("Second signal received, escalating shutdown to abort active requests");
4233
+ shutdownDrainAbortController?.abort();
4234
+ return shutdownPromise ?? void 0;
4235
+ }
4236
+ if (shutdownPhase === "phase3") {
4237
+ consola.warn("Additional signal received, escalating shutdown to force-close remaining requests");
4238
+ shutdownDrainAbortController?.abort();
4239
+ return shutdownPromise ?? void 0;
4240
+ }
4241
+ consola.warn("Additional signal received during forced shutdown, exiting immediately");
4242
+ exitFn(1);
4243
+ return shutdownPromise ?? void 0;
4244
+ }
4245
+ shutdownPromise = shutdownFn(signal).catch((error) => {
4246
+ consola.error("Fatal error during shutdown:", error);
4247
+ shutdownResolve?.();
4248
+ exitFn(1);
4249
+ });
4250
+ return shutdownPromise;
4251
+ }
3999
4252
  /** Setup process signal handlers for graceful shutdown */
4000
4253
  function setupShutdownHandlers() {
4001
4254
  const handler = (signal) => {
4002
- if (_isShuttingDown) {
4003
- consola.warn("Second signal received, forcing immediate exit");
4004
- process.exit(1);
4005
- }
4006
- gracefulShutdown(signal).catch((error) => {
4007
- consola.error("Fatal error during shutdown:", error);
4008
- shutdownResolve?.();
4009
- process.exit(1);
4010
- });
4255
+ handleShutdownSignal(signal);
4011
4256
  };
4012
4257
  process.on("SIGINT", () => handler("SIGINT"));
4013
4258
  process.on("SIGTERM", () => handler("SIGTERM"));
@@ -4780,9 +5025,56 @@ const setupClaudeCode = defineCommand({
4780
5025
  }
4781
5026
  });
4782
5027
 
5028
+ //#endregion
5029
+ //#region src/lib/serve.ts
5030
+ /** Start the HTTP server and return a ServerInstance. */
5031
+ async function startServer(options) {
5032
+ if (typeof globalThis.Bun !== "undefined") return startBunServer(options);
5033
+ return startNodeServer(options);
5034
+ }
5035
+ async function startNodeServer(options) {
5036
+ const { createAdaptorServer } = await import("./dist-8vhX0s4a.mjs");
5037
+ const nodeServer = createAdaptorServer({ fetch: options.fetch });
5038
+ await new Promise((resolve, reject) => {
5039
+ nodeServer.once("error", reject);
5040
+ nodeServer.listen({
5041
+ port: options.port,
5042
+ host: options.hostname,
5043
+ exclusive: false
5044
+ }, () => {
5045
+ nodeServer.removeListener("error", reject);
5046
+ resolve();
5047
+ });
5048
+ });
5049
+ return {
5050
+ nodeServer,
5051
+ close(force) {
5052
+ return new Promise((resolve, reject) => {
5053
+ if (force && "closeAllConnections" in nodeServer) nodeServer.closeAllConnections();
5054
+ nodeServer.close((err) => err ? reject(err) : resolve());
5055
+ });
5056
+ }
5057
+ };
5058
+ }
5059
+ async function startBunServer(options) {
5060
+ const bunServer = Bun.serve({
5061
+ fetch(request, server) {
5062
+ return options.fetch(request, { server });
5063
+ },
5064
+ port: options.port,
5065
+ hostname: options.hostname,
5066
+ idleTimeout: 255,
5067
+ ...options.bunWebSocket ? { websocket: options.bunWebSocket } : {}
5068
+ });
5069
+ return { close(force) {
5070
+ bunServer.stop(force ?? false);
5071
+ return Promise.resolve();
5072
+ } };
5073
+ }
5074
+
4783
5075
  //#endregion
4784
5076
  //#region package.json
4785
- var version = "0.7.23";
5077
+ var version = "0.8.1-beta.1";
4786
5078
 
4787
5079
  //#endregion
4788
5080
  //#region src/lib/context/error-persistence.ts
@@ -4799,6 +5091,8 @@ var version = "0.7.23";
4799
5091
  * Files:
4800
5092
  * - meta.json: structured metadata (timestamp, endpoint, model, error, attempts)
4801
5093
  * - request.json: full request payload (messages capped at 50 for size)
5094
+ * - effective-request.json: logical request after sanitize/truncate/retry
5095
+ * - wire-request.json: final outbound HTTP payload + headers sent upstream
4802
5096
  * - response.txt: raw upstream response body (if available)
4803
5097
  * - sse-events.json: recorded SSE events (if streaming request failed mid-stream)
4804
5098
  */
@@ -4823,6 +5117,14 @@ async function writeErrorEntry(entry) {
4823
5117
  messageCount: entry.request.messages?.length,
4824
5118
  toolCount: entry.request.tools?.length
4825
5119
  },
5120
+ effective: entry.effectiveRequest ? {
5121
+ model: entry.effectiveRequest.model,
5122
+ messageCount: entry.effectiveRequest.messageCount
5123
+ } : void 0,
5124
+ wire: entry.wireRequest ? {
5125
+ model: entry.wireRequest.model,
5126
+ messageCount: entry.wireRequest.messageCount
5127
+ } : void 0,
4826
5128
  response: entry.response ? {
4827
5129
  success: entry.response.success,
4828
5130
  model: entry.response.model,
@@ -4844,6 +5146,8 @@ async function writeErrorEntry(entry) {
4844
5146
  }
4845
5147
  if (entry.response?.responseText) files.push(["response.txt", entry.response.responseText]);
4846
5148
  if (entry.sseEvents?.length) files.push(["sse-events.json", JSON.stringify(entry.sseEvents, null, 2)]);
5149
+ if (entry.effectiveRequest) files.push(["effective-request.json", JSON.stringify(entry.effectiveRequest.payload ?? entry.effectiveRequest, null, 2)]);
5150
+ if (entry.wireRequest) files.push(["wire-request.json", JSON.stringify(entry.wireRequest, null, 2)]);
4847
5151
  const id = randomBytes(4).toString("hex");
4848
5152
  const dirPath = path$1.join(PATHS.ERROR_DIR, `${formatTimestamp()}_${id}`);
4849
5153
  await fs$1.mkdir(dirPath, { recursive: true });
@@ -4861,34 +5165,26 @@ function formatTimestamp() {
4861
5165
  function handleHistoryEvent(event) {
4862
5166
  if (!isHistoryEnabled()) return;
4863
5167
  switch (event.type) {
4864
- case "created": {
4865
- const ctx = event.context;
4866
- const sessionId = getCurrentSession(ctx.endpoint);
4867
- insertEntry({
4868
- id: ctx.id,
4869
- sessionId,
4870
- timestamp: ctx.startTime,
4871
- endpoint: ctx.endpoint,
4872
- request: {
4873
- model: ctx.originalRequest?.model,
4874
- messages: ctx.originalRequest?.messages,
4875
- stream: ctx.originalRequest?.stream,
4876
- tools: ctx.originalRequest?.tools,
4877
- system: ctx.originalRequest?.system
4878
- }
4879
- });
4880
- break;
4881
- }
5168
+ case "created": break;
4882
5169
  case "updated":
4883
- if (event.field === "originalRequest" && event.context.originalRequest) {
5170
+ if (event.field === "originalRequest") {
4884
5171
  const orig = event.context.originalRequest;
4885
- updateEntry(event.context.id, { request: {
4886
- model: orig.model,
4887
- messages: orig.messages,
4888
- stream: orig.stream,
4889
- tools: orig.tools,
4890
- system: orig.system
4891
- } });
5172
+ if (!orig) break;
5173
+ const ctx = event.context;
5174
+ const sessionId = getCurrentSession(ctx.endpoint);
5175
+ insertEntry({
5176
+ id: ctx.id,
5177
+ sessionId,
5178
+ timestamp: ctx.startTime,
5179
+ endpoint: ctx.endpoint,
5180
+ request: {
5181
+ model: orig.model,
5182
+ messages: orig.messages,
5183
+ stream: orig.stream,
5184
+ tools: orig.tools,
5185
+ system: orig.system
5186
+ }
5187
+ });
4892
5188
  }
4893
5189
  if (event.field === "pipelineInfo" && event.context.pipelineInfo) updateEntry(event.context.id, { pipelineInfo: event.context.pipelineInfo });
4894
5190
  break;
@@ -4900,7 +5196,24 @@ function handleHistoryEvent(event) {
4900
5196
  response,
4901
5197
  durationMs: entryData.durationMs,
4902
5198
  sseEvents: entryData.sseEvents,
4903
- httpHeaders: entryData.httpHeaders
5199
+ ...entryData.effectiveRequest && { effectiveRequest: {
5200
+ model: entryData.effectiveRequest.model,
5201
+ format: entryData.effectiveRequest.format,
5202
+ messageCount: entryData.effectiveRequest.messageCount,
5203
+ messages: entryData.effectiveRequest.messages,
5204
+ system: entryData.effectiveRequest.system,
5205
+ payload: entryData.effectiveRequest.payload
5206
+ } },
5207
+ ...entryData.wireRequest && { wireRequest: {
5208
+ model: entryData.wireRequest.model,
5209
+ format: entryData.wireRequest.format,
5210
+ messageCount: entryData.wireRequest.messageCount,
5211
+ messages: entryData.wireRequest.messages,
5212
+ system: entryData.wireRequest.system,
5213
+ payload: entryData.wireRequest.payload,
5214
+ headers: entryData.wireRequest.headers ?? entryData.httpHeaders?.request
5215
+ } },
5216
+ ...entryData.attempts && { attempts: entryData.attempts }
4904
5217
  });
4905
5218
  break;
4906
5219
  }
@@ -4971,7 +5284,10 @@ function toHistoryResponse(entryData) {
4971
5284
  },
4972
5285
  stop_reason: r.stop_reason,
4973
5286
  error: r.error,
4974
- content: r.content
5287
+ status: r.status,
5288
+ content: r.content,
5289
+ rawBody: r.responseText,
5290
+ headers: entryData.httpHeaders?.response
4975
5291
  };
4976
5292
  }
4977
5293
  function registerContextConsumers(manager) {
@@ -4986,6 +5302,7 @@ const ENDPOINT = {
4986
5302
  MESSAGES: "/v1/messages",
4987
5303
  CHAT_COMPLETIONS: "/chat/completions",
4988
5304
  RESPONSES: "/responses",
5305
+ WS_RESPONSES: "ws:/responses",
4989
5306
  EMBEDDINGS: "/v1/embeddings"
4990
5307
  };
4991
5308
  /** Capability type → default endpoints for legacy models without `supported_endpoints` */
@@ -5016,9 +5333,16 @@ function isEndpointSupported(model, endpoint) {
5016
5333
  if (!model?.supported_endpoints) return true;
5017
5334
  return model.supported_endpoints.includes(endpoint);
5018
5335
  }
5336
+ /**
5337
+ * Check if a model supports the Responses API via either transport:
5338
+ * HTTP (`/responses`) or WebSocket (`ws:/responses`).
5339
+ */
5340
+ function isResponsesSupported(model) {
5341
+ return isEndpointSupported(model, ENDPOINT.RESPONSES) || isEndpointSupported(model, ENDPOINT.WS_RESPONSES);
5342
+ }
5019
5343
 
5020
5344
  //#endregion
5021
- //#region src/lib/ws.ts
5345
+ //#region src/lib/ws-adapter.ts
5022
5346
  /** Create a shared WebSocket adapter for the given Hono app */
5023
5347
  async function createWebSocketAdapter(app) {
5024
5348
  if (typeof globalThis.Bun !== "undefined") {
@@ -5039,8 +5363,9 @@ function handleGetEntries(c) {
5039
5363
  if (!isHistoryEnabled()) return c.json({ error: "History recording is not enabled" }, 400);
5040
5364
  const query = c.req.query();
5041
5365
  const result = getHistorySummaries({
5042
- page: query.page ? Number.parseInt(query.page, 10) : void 0,
5366
+ cursor: query.cursor || void 0,
5043
5367
  limit: query.limit ? Number.parseInt(query.limit, 10) : void 0,
5368
+ direction: query.direction || void 0,
5044
5369
  model: query.model || void 0,
5045
5370
  endpoint: query.endpoint,
5046
5371
  success: query.success ? query.success === "true" : void 0,
@@ -5096,7 +5421,7 @@ function handleGetSession(c) {
5096
5421
  if (!session) return c.json({ error: "Session not found" }, 404);
5097
5422
  const query = c.req.query();
5098
5423
  const result = getSessionEntries(id, {
5099
- page: query.page ? Number.parseInt(query.page, 10) : void 0,
5424
+ cursor: query.cursor || void 0,
5100
5425
  limit: query.limit ? Number.parseInt(query.limit, 10) : void 0
5101
5426
  });
5102
5427
  return c.json({
@@ -5155,7 +5480,10 @@ function initHistoryWebSocket(rootApp, upgradeWs) {
5155
5480
  onClose(_event, ws) {
5156
5481
  removeClient(ws.raw);
5157
5482
  },
5158
- onMessage(_event, _ws) {},
5483
+ onMessage(event, ws) {
5484
+ const raw = typeof event.data === "string" ? event.data : String(event.data);
5485
+ handleClientMessage(ws.raw, raw);
5486
+ },
5159
5487
  onError(event, ws) {
5160
5488
  consola.debug("WebSocket error:", event);
5161
5489
  removeClient(ws.raw);
@@ -5465,9 +5793,21 @@ async function executeRequestPipeline(opts) {
5465
5793
  let effectivePayload = opts.payload;
5466
5794
  let lastError = null;
5467
5795
  let totalQueueWaitMs = 0;
5796
+ let lastStrategyName;
5468
5797
  for (let attempt = 0; attempt <= maxRetries; attempt++) {
5798
+ requestContext?.beginAttempt({ strategy: attempt > 0 ? lastStrategyName : void 0 });
5799
+ lastStrategyName = void 0;
5800
+ if (requestContext) {
5801
+ const p = effectivePayload;
5802
+ requestContext.setAttemptEffectiveRequest({
5803
+ model: typeof p.model === "string" ? p.model : "",
5804
+ resolvedModel: model,
5805
+ messages: Array.isArray(p.messages) ? p.messages : [],
5806
+ payload: effectivePayload,
5807
+ format: adapter.format
5808
+ });
5809
+ }
5469
5810
  onBeforeAttempt?.(attempt, effectivePayload);
5470
- requestContext?.beginAttempt({ strategy: attempt > 0 ? "retry" : void 0 });
5471
5811
  requestContext?.transition("executing");
5472
5812
  try {
5473
5813
  const { result: response, queueWaitMs } = await adapter.execute(effectivePayload);
@@ -5501,6 +5841,8 @@ async function executeRequestPipeline(opts) {
5501
5841
  totalQueueWaitMs += action.waitMs;
5502
5842
  requestContext?.addQueueWaitMs(action.waitMs);
5503
5843
  }
5844
+ if (action.meta?.sanitization && requestContext) requestContext.setAttemptSanitization(action.meta.sanitization);
5845
+ lastStrategyName = strategy.name;
5504
5846
  effectivePayload = action.payload;
5505
5847
  onRetry?.(attempt, strategy.name, action.payload, action.meta);
5506
5848
  handled = true;
@@ -5872,33 +6214,45 @@ async function processResponsesInstructions(instructions, model) {
5872
6214
 
5873
6215
  //#endregion
5874
6216
  //#region src/lib/openai/responses-client.ts
6217
+ function prepareResponsesRequest(payload, opts) {
6218
+ const wire = payload;
6219
+ const enableVision = hasVisionContent(wire.input);
6220
+ const isAgentCall = Array.isArray(wire.input) && wire.input.some((item) => item.role === "assistant" || item.type === "function_call" || item.type === "function_call_output");
6221
+ const modelSupportsVision = opts?.resolvedModel?.capabilities?.supports?.vision !== false;
6222
+ return {
6223
+ wire,
6224
+ headers: {
6225
+ ...copilotHeaders(state, {
6226
+ vision: enableVision && modelSupportsVision,
6227
+ modelRequestHeaders: opts?.resolvedModel?.request_headers,
6228
+ intent: isAgentCall ? "conversation-agent" : "conversation-panel"
6229
+ }),
6230
+ "X-Initiator": isAgentCall ? "agent" : "user"
6231
+ }
6232
+ };
6233
+ }
5875
6234
  /** Call Copilot /responses endpoint */
5876
6235
  const createResponses = async (payload, opts) => {
5877
6236
  if (!state.copilotToken) throw new Error("Copilot token not found");
5878
- const enableVision = hasVisionContent(payload.input);
5879
- const isAgentCall = Array.isArray(payload.input) && payload.input.some((item) => item.role === "assistant" || item.type === "function_call" || item.type === "function_call_output");
5880
- const modelSupportsVision = opts?.resolvedModel?.capabilities?.supports?.vision !== false;
5881
- const headers = {
5882
- ...copilotHeaders(state, {
5883
- vision: enableVision && modelSupportsVision,
5884
- modelRequestHeaders: opts?.resolvedModel?.request_headers,
5885
- intent: isAgentCall ? "conversation-agent" : "conversation-panel"
5886
- }),
5887
- "X-Initiator": isAgentCall ? "agent" : "user"
5888
- };
6237
+ const prepared = prepareResponsesRequest(payload, opts);
6238
+ opts?.onPrepared?.({
6239
+ wire: prepared.wire,
6240
+ headers: sanitizeHeadersForHistory(prepared.headers)
6241
+ });
6242
+ const { wire, headers } = prepared;
5889
6243
  const fetchSignal = createFetchSignal();
5890
6244
  const response = await fetch(`${copilotBaseUrl(state)}/responses`, {
5891
6245
  method: "POST",
5892
6246
  headers,
5893
- body: JSON.stringify(payload),
6247
+ body: JSON.stringify(wire),
5894
6248
  signal: fetchSignal
5895
6249
  });
5896
6250
  if (opts?.headersCapture) captureHttpHeaders(opts.headersCapture, headers, response);
5897
6251
  if (!response.ok) {
5898
6252
  consola.error("Failed to create responses", response);
5899
- throw await HTTPError.fromResponse("Failed to create responses", response, payload.model);
6253
+ throw await HTTPError.fromResponse("Failed to create responses", response, wire.model);
5900
6254
  }
5901
- if (payload.stream) return events(response);
6255
+ if (wire.stream) return events(response);
5902
6256
  return await response.json();
5903
6257
  };
5904
6258
  /** Check if the input contains any image content */
@@ -6010,7 +6364,7 @@ function createTokenRefreshStrategy() {
6010
6364
  * centralizes that configuration to avoid duplication.
6011
6365
  */
6012
6366
  /** Create the FormatAdapter for Responses API pipeline execution */
6013
- function createResponsesAdapter(selectedModel, headersCapture) {
6367
+ function createResponsesAdapter(selectedModel, headersCapture, onPrepared) {
6014
6368
  return {
6015
6369
  format: "openai-responses",
6016
6370
  sanitize: (p) => ({
@@ -6020,7 +6374,16 @@ function createResponsesAdapter(selectedModel, headersCapture) {
6020
6374
  }),
6021
6375
  execute: (p) => executeWithAdaptiveRateLimit(() => createResponses(p, {
6022
6376
  resolvedModel: selectedModel,
6023
- headersCapture
6377
+ headersCapture,
6378
+ onPrepared: ({ wire, headers }) => {
6379
+ onPrepared?.({
6380
+ model: typeof wire.model === "string" ? wire.model : p.model,
6381
+ messages: [],
6382
+ payload: wire,
6383
+ headers,
6384
+ format: "openai-responses"
6385
+ });
6386
+ }
6024
6387
  })),
6025
6388
  logPayloadSize: (p) => {
6026
6389
  const count = typeof p.input === "string" ? 1 : p.input.length;
@@ -6118,7 +6481,7 @@ async function handleResponseCreate(ws, rawPayload) {
6118
6481
  const resolvedModel = resolveModelName(requestedModel);
6119
6482
  payload.model = resolvedModel;
6120
6483
  const selectedModel = state.modelIndex.get(resolvedModel);
6121
- if (!isEndpointSupported(selectedModel, ENDPOINT.RESPONSES)) {
6484
+ if (!isResponsesSupported(selectedModel)) {
6122
6485
  sendErrorAndClose(ws, `Model "${resolvedModel}" does not support the Responses API`, "invalid_request_error");
6123
6486
  return;
6124
6487
  }
@@ -6146,7 +6509,9 @@ async function handleResponseCreate(ws, rawPayload) {
6146
6509
  clientModel: requestedModel
6147
6510
  });
6148
6511
  const headersCapture = {};
6149
- const adapter = createResponsesAdapter(selectedModel, headersCapture);
6512
+ const adapter = createResponsesAdapter(selectedModel, headersCapture, (wireRequest) => {
6513
+ reqCtx.setAttemptWireRequest(wireRequest);
6514
+ });
6150
6515
  const strategies = createResponsesStrategies();
6151
6516
  try {
6152
6517
  const pipelineResult = await executeRequestPipeline({
@@ -6155,15 +6520,16 @@ async function handleResponseCreate(ws, rawPayload) {
6155
6520
  payload,
6156
6521
  originalPayload: payload,
6157
6522
  model: selectedModel,
6158
- maxRetries: 1
6523
+ maxRetries: 1,
6524
+ requestContext: reqCtx
6159
6525
  });
6160
6526
  reqCtx.setHttpHeaders(headersCapture);
6161
6527
  const iterator = pipelineResult.response[Symbol.asyncIterator]();
6162
6528
  const acc = createResponsesStreamAccumulator();
6163
6529
  const idleTimeoutMs = state.streamIdleTimeout > 0 ? state.streamIdleTimeout * 1e3 : 0;
6164
- const shutdownSignal = getShutdownSignal();
6165
6530
  let eventsReceived = 0;
6166
6531
  while (true) {
6532
+ const shutdownSignal = getShutdownSignal();
6167
6533
  const result = await raceIteratorNext(iterator.next(), {
6168
6534
  idleTimeoutMs,
6169
6535
  abortSignal: shutdownSignal ?? void 0
@@ -6954,32 +7320,44 @@ function createTruncationResponseMarkerOpenAI(result) {
6954
7320
 
6955
7321
  //#endregion
6956
7322
  //#region src/lib/openai/client.ts
6957
- const createChatCompletions = async (payload, opts) => {
6958
- if (!state.copilotToken) throw new Error("Copilot token not found");
6959
- const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x) => x.type === "image_url"));
6960
- const isAgentCall = payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role));
7323
+ function prepareChatCompletionsRequest(payload, opts) {
7324
+ const wire = payload;
7325
+ const enableVision = wire.messages.some((x) => typeof x.content !== "string" && x.content?.some((x) => x.type === "image_url"));
7326
+ const isAgentCall = wire.messages.some((msg) => ["assistant", "tool"].includes(msg.role));
6961
7327
  const modelSupportsVision = opts?.resolvedModel?.capabilities?.supports?.vision !== false;
6962
- const headers = {
6963
- ...copilotHeaders(state, {
6964
- vision: enableVision && modelSupportsVision,
6965
- modelRequestHeaders: opts?.resolvedModel?.request_headers,
6966
- intent: isAgentCall ? "conversation-agent" : "conversation-panel"
6967
- }),
6968
- "X-Initiator": isAgentCall ? "agent" : "user"
7328
+ return {
7329
+ wire,
7330
+ headers: {
7331
+ ...copilotHeaders(state, {
7332
+ vision: enableVision && modelSupportsVision,
7333
+ modelRequestHeaders: opts?.resolvedModel?.request_headers,
7334
+ intent: isAgentCall ? "conversation-agent" : "conversation-panel"
7335
+ }),
7336
+ "X-Initiator": isAgentCall ? "agent" : "user"
7337
+ }
6969
7338
  };
7339
+ }
7340
+ const createChatCompletions = async (payload, opts) => {
7341
+ if (!state.copilotToken) throw new Error("Copilot token not found");
7342
+ const prepared = prepareChatCompletionsRequest(payload, opts);
7343
+ opts?.onPrepared?.({
7344
+ wire: prepared.wire,
7345
+ headers: sanitizeHeadersForHistory(prepared.headers)
7346
+ });
7347
+ const { wire, headers } = prepared;
6970
7348
  const fetchSignal = createFetchSignal();
6971
7349
  const response = await fetch(`${copilotBaseUrl(state)}/chat/completions`, {
6972
7350
  method: "POST",
6973
7351
  headers,
6974
- body: JSON.stringify(payload),
7352
+ body: JSON.stringify(wire),
6975
7353
  signal: fetchSignal
6976
7354
  });
6977
7355
  if (opts?.headersCapture) captureHttpHeaders(opts.headersCapture, headers, response);
6978
7356
  if (!response.ok) {
6979
7357
  consola.error("Failed to create chat completions", response);
6980
- throw await HTTPError.fromResponse("Failed to create chat completions", response, payload.model);
7358
+ throw await HTTPError.fromResponse("Failed to create chat completions", response, wire.model);
6981
7359
  }
6982
- if (payload.stream) return events(response);
7360
+ if (wire.stream) return events(response);
6983
7361
  return await response.json();
6984
7362
  };
6985
7363
 
@@ -7364,7 +7742,16 @@ async function executeRequest(opts) {
7364
7742
  sanitize: (p) => sanitizeOpenAIMessages(p),
7365
7743
  execute: (p) => executeWithAdaptiveRateLimit(() => createChatCompletions(p, {
7366
7744
  resolvedModel: selectedModel,
7367
- headersCapture
7745
+ headersCapture,
7746
+ onPrepared: ({ wire, headers }) => {
7747
+ reqCtx.setAttemptWireRequest({
7748
+ model: typeof wire.model === "string" ? wire.model : payload.model,
7749
+ messages: Array.isArray(wire.messages) ? wire.messages : [],
7750
+ payload: wire,
7751
+ headers,
7752
+ format: "openai-chat-completions"
7753
+ });
7754
+ }
7368
7755
  })),
7369
7756
  logPayloadSize: (p) => logPayloadSizeInfo(p, selectedModel)
7370
7757
  };
@@ -7476,8 +7863,8 @@ async function handleStreamingResponse(opts) {
7476
7863
  acc.rawContent += marker;
7477
7864
  }
7478
7865
  const iterator = response[Symbol.asyncIterator]();
7479
- const abortSignal = combineAbortSignals(getShutdownSignal(), clientAbortSignal);
7480
7866
  for (;;) {
7867
+ const abortSignal = combineAbortSignals(getShutdownSignal(), clientAbortSignal);
7481
7868
  const result = await raceIteratorNext(iterator.next(), {
7482
7869
  idleTimeoutMs,
7483
7870
  abortSignal
@@ -7528,6 +7915,48 @@ chatCompletionRoutes.post("/", async (c) => {
7528
7915
  }
7529
7916
  });
7530
7917
 
7918
+ //#endregion
7919
+ //#region src/routes/config/route.ts
7920
+ /** Current effective runtime configuration (read-only, sanitized) */
7921
+ const configRoutes = new Hono();
7922
+ configRoutes.get("/", (c) => {
7923
+ return c.json({
7924
+ autoTruncate: state.autoTruncate,
7925
+ compressToolResultsBeforeTruncate: state.compressToolResultsBeforeTruncate,
7926
+ stripServerTools: state.stripServerTools,
7927
+ immutableThinkingMessages: state.immutableThinkingMessages,
7928
+ dedupToolCalls: state.dedupToolCalls,
7929
+ contextEditingMode: state.contextEditingMode,
7930
+ rewriteSystemReminders: serializeRewriteSystemReminders(state.rewriteSystemReminders),
7931
+ stripReadToolResultTags: state.stripReadToolResultTags,
7932
+ systemPromptOverridesCount: state.systemPromptOverrides.length,
7933
+ normalizeResponsesCallIds: state.normalizeResponsesCallIds,
7934
+ fetchTimeout: state.fetchTimeout,
7935
+ streamIdleTimeout: state.streamIdleTimeout,
7936
+ staleRequestMaxAge: state.staleRequestMaxAge,
7937
+ shutdownGracefulWait: state.shutdownGracefulWait,
7938
+ shutdownAbortWait: state.shutdownAbortWait,
7939
+ historyLimit: state.historyLimit,
7940
+ historyMinEntries: state.historyMinEntries,
7941
+ modelOverrides: state.modelOverrides,
7942
+ rateLimiter: state.adaptiveRateLimitConfig ?? null
7943
+ });
7944
+ });
7945
+ /**
7946
+ * Serialize rewriteSystemReminders for API output.
7947
+ * CompiledRewriteRule contains RegExp objects which don't serialize well —
7948
+ * convert back to a human-readable form.
7949
+ */
7950
+ function serializeRewriteSystemReminders(value) {
7951
+ if (typeof value === "boolean") return value;
7952
+ return value.map((rule) => ({
7953
+ from: rule.from instanceof RegExp ? rule.from.source : rule.from,
7954
+ to: rule.to,
7955
+ ...rule.method ? { method: rule.method } : {},
7956
+ ...rule.modelPattern ? { model: rule.modelPattern.source } : {}
7957
+ }));
7958
+ }
7959
+
7531
7960
  //#endregion
7532
7961
  //#region src/lib/openai/embeddings.ts
7533
7962
  const createEmbeddings = async (payload) => {
@@ -7565,6 +7994,25 @@ eventLoggingRoutes.post("/batch", (c) => {
7565
7994
  return c.text("OK", 200);
7566
7995
  });
7567
7996
 
7997
+ //#endregion
7998
+ //#region src/routes/logs/route.ts
7999
+ /**
8000
+ * Live log endpoint — recent EntrySummary snapshot for the log viewer page.
8001
+ *
8002
+ * Returns the most recent entries (newest first, capped at `limit`).
8003
+ * After initial load, the web client subscribes to the existing /history/ws
8004
+ * WebSocket for real-time `entry_added` / `entry_updated` events.
8005
+ */
8006
+ const logsRoutes = new Hono();
8007
+ logsRoutes.get("/", (c) => {
8008
+ if (!isHistoryEnabled()) return c.json({ error: "History recording is not enabled" }, 400);
8009
+ const result = getHistorySummaries({ limit: Math.min(Number(c.req.query("limit")) || 100, 500) });
8010
+ return c.json({
8011
+ entries: result.entries,
8012
+ total: result.total
8013
+ });
8014
+ });
8015
+
7568
8016
  //#endregion
7569
8017
  //#region src/types/api/anthropic.ts
7570
8018
  /** Type guard for server-side tool result blocks (web_search, tool_search, code_execution, etc.) */
@@ -7574,6 +8022,28 @@ function isServerToolResultBlock(block) {
7574
8022
  return type !== "tool_result" && type.endsWith("_tool_result") && "tool_use_id" in block;
7575
8023
  }
7576
8024
 
8025
+ //#endregion
8026
+ //#region src/lib/anthropic/thinking-immutability.ts
8027
+ /**
8028
+ * Whether an assistant message contains signature-bound thinking content.
8029
+ *
8030
+ * Anthropic returns `thinking` / `redacted_thinking` blocks in assistant
8031
+ * messages. These blocks may need stronger preservation guarantees depending
8032
+ * on the configured rewrite policy.
8033
+ */
8034
+ function hasThinkingSignatureBlocks(msg) {
8035
+ return msg.role === "assistant" && Array.isArray(msg.content) && msg.content.some((block) => block.type === "thinking" || block.type === "redacted_thinking");
8036
+ }
8037
+ /**
8038
+ * Strong preservation mode for assistant messages that contain thinking blocks.
8039
+ *
8040
+ * When enabled, the entire assistant message is treated as immutable by
8041
+ * client-side rewrite passes.
8042
+ */
8043
+ function isImmutableThinkingAssistantMessage(msg) {
8044
+ return state.immutableThinkingMessages && hasThinkingSignatureBlocks(msg);
8045
+ }
8046
+
7577
8047
  //#endregion
7578
8048
  //#region src/lib/anthropic/sanitize.ts
7579
8049
  /**
@@ -7676,6 +8146,7 @@ function sanitizeMessageParamContent(msg) {
7676
8146
  content: blocks
7677
8147
  } : msg;
7678
8148
  }
8149
+ if (isImmutableThinkingAssistantMessage(msg)) return msg;
7679
8150
  const { blocks, modified } = sanitizeTextBlocksInArray(msg.content, (b) => b.type === "text" && "text" in b ? b.text : void 0, (b, text) => ({
7680
8151
  ...b,
7681
8152
  text
@@ -7738,7 +8209,7 @@ function sanitizeAnthropicSystemPrompt(system) {
7738
8209
  function filterEmptyAnthropicTextBlocks(messages) {
7739
8210
  return messages.map((msg) => {
7740
8211
  if (typeof msg.content === "string") return msg;
7741
- if (msg.role === "assistant" && msg.content.some((b) => b.type === "thinking" || b.type === "redacted_thinking")) return msg;
8212
+ if (msg.role === "assistant" && hasThinkingSignatureBlocks(msg)) return msg;
7742
8213
  const filtered = msg.content.filter((block) => {
7743
8214
  if (block.type === "text" && "text" in block) return block.text.trim() !== "";
7744
8215
  return true;
@@ -7805,6 +8276,10 @@ function processToolBlocks(messages, tools) {
7805
8276
  continue;
7806
8277
  }
7807
8278
  if (msg.role === "assistant") {
8279
+ if (isImmutableThinkingAssistantMessage(msg)) {
8280
+ result.push(msg);
8281
+ continue;
8282
+ }
7808
8283
  const newContent = [];
7809
8284
  let modified = false;
7810
8285
  for (const block of msg.content) if (block.type === "tool_use") {
@@ -7945,7 +8420,7 @@ function deduplicateToolCalls(messages, mode = "input") {
7945
8420
  const protectedIds = /* @__PURE__ */ new Set();
7946
8421
  for (const msg of messages) {
7947
8422
  if (msg.role !== "assistant" || typeof msg.content === "string") continue;
7948
- if (!msg.content.some((b) => b.type === "thinking" || b.type === "redacted_thinking")) continue;
8423
+ if (!hasThinkingSignatureBlocks(msg)) continue;
7949
8424
  for (const block of msg.content) if (block.type === "tool_use") protectedIds.add(block.id);
7950
8425
  }
7951
8426
  const removedIds = /* @__PURE__ */ new Set();
@@ -7989,6 +8464,10 @@ function deduplicateToolCalls(messages, mode = "input") {
7989
8464
  for (const msg of filtered) {
7990
8465
  const prev = merged.at(-1);
7991
8466
  if (prev && prev.role === msg.role) {
8467
+ if (prev.role === "assistant" && (isImmutableThinkingAssistantMessage(prev) || isImmutableThinkingAssistantMessage(msg))) {
8468
+ merged.push(msg);
8469
+ continue;
8470
+ }
7992
8471
  const prevContent = typeof prev.content === "string" ? [{
7993
8472
  type: "text",
7994
8473
  text: prev.content
@@ -8361,6 +8840,7 @@ function stripThinkingBlocks(messages, preserveRecentCount) {
8361
8840
  return {
8362
8841
  messages: messages.map((msg, i) => {
8363
8842
  if (i >= stripBefore || msg.role !== "assistant" || !Array.isArray(msg.content)) return msg;
8843
+ if (isImmutableThinkingAssistantMessage(msg)) return msg;
8364
8844
  if (!msg.content.some((block) => block.type === "thinking" || block.type === "redacted_thinking")) return msg;
8365
8845
  const filtered = msg.content.filter((block) => {
8366
8846
  if (block.type === "thinking" || block.type === "redacted_thinking") {
@@ -8802,6 +9282,28 @@ async function handleCountTokens(c) {
8802
9282
  }
8803
9283
  }
8804
9284
 
9285
+ //#endregion
9286
+ //#region src/lib/anthropic/feature-negotiation.ts
9287
+ const NEGOTIATION_TTL_MS = 600 * 1e3;
9288
+ const unsupportedFeatures = /* @__PURE__ */ new Map();
9289
+ function makeKey(modelId, feature) {
9290
+ return `${copilotBaseUrl(state)}|anthropic-messages|${normalizeForMatching(modelId)}|${feature}`;
9291
+ }
9292
+ function isFresh(expiresAt) {
9293
+ return expiresAt > Date.now();
9294
+ }
9295
+ function markAnthropicFeatureUnsupported(modelId, feature) {
9296
+ unsupportedFeatures.set(makeKey(modelId, feature), Date.now() + NEGOTIATION_TTL_MS);
9297
+ }
9298
+ function isAnthropicFeatureUnsupported(modelId, feature) {
9299
+ const key = makeKey(modelId, feature);
9300
+ const expiresAt = unsupportedFeatures.get(key);
9301
+ if (!expiresAt) return false;
9302
+ if (isFresh(expiresAt)) return true;
9303
+ unsupportedFeatures.delete(key);
9304
+ return false;
9305
+ }
9306
+
8805
9307
  //#endregion
8806
9308
  //#region src/lib/anthropic/features.ts
8807
9309
  /**
@@ -8857,11 +9359,11 @@ function modelHasAdaptiveThinking(resolvedModel) {
8857
9359
  * The resolvedModel parameter provides model metadata for capability-based
8858
9360
  * decisions. When unavailable, falls back to name-based detection.
8859
9361
  */
8860
- function buildAnthropicBetaHeaders(modelId, resolvedModel) {
9362
+ function buildAnthropicBetaHeaders(modelId, resolvedModel, opts) {
8861
9363
  const headers = {};
8862
9364
  const betaFeatures = [];
8863
9365
  if (!modelHasAdaptiveThinking(resolvedModel)) betaFeatures.push("interleaved-thinking-2025-05-14");
8864
- if (isContextEditingEnabled(modelId)) betaFeatures.push("context-management-2025-06-27");
9366
+ if (!opts?.disableContextManagement && isContextEditingEnabled(modelId)) betaFeatures.push("context-management-2025-06-27");
8865
9367
  if (modelSupportsToolSearch(modelId)) betaFeatures.push("advanced-tool-use-2025-11-20");
8866
9368
  if (betaFeatures.length > 0) headers["anthropic-beta"] = betaFeatures.join(",");
8867
9369
  return headers;
@@ -9197,11 +9699,44 @@ function adjustThinkingBudget(wire) {
9197
9699
  */
9198
9700
  async function createAnthropicMessages(payload, opts) {
9199
9701
  if (!state.copilotToken) throw new Error("Copilot token not found");
9702
+ const prepared = prepareAnthropicRequest(payload, opts);
9703
+ opts?.onPrepared?.({
9704
+ wire: prepared.wire,
9705
+ headers: sanitizeHeadersForHistory(prepared.headers)
9706
+ });
9707
+ const { wire, headers } = prepared;
9708
+ const model = wire.model;
9709
+ const messages = wire.messages;
9710
+ const tools = wire.tools;
9711
+ const thinking = wire.thinking;
9712
+ consola.debug("Sending direct Anthropic request to Copilot /v1/messages");
9713
+ const fetchSignal = createFetchSignal();
9714
+ const response = await fetch(`${copilotBaseUrl(state)}/v1/messages`, {
9715
+ method: "POST",
9716
+ headers,
9717
+ body: JSON.stringify(wire),
9718
+ signal: fetchSignal
9719
+ });
9720
+ if (opts?.headersCapture) captureHttpHeaders(opts.headersCapture, headers, response);
9721
+ if (!response.ok) {
9722
+ consola.debug("Request failed:", {
9723
+ model,
9724
+ max_tokens: wire.max_tokens,
9725
+ stream: wire.stream,
9726
+ toolCount: tools?.length ?? 0,
9727
+ thinking,
9728
+ messageCount: messages.length
9729
+ });
9730
+ throw await HTTPError.fromResponse("Failed to create Anthropic messages", response, model);
9731
+ }
9732
+ if (payload.stream) return events(response);
9733
+ return await response.json();
9734
+ }
9735
+ function prepareAnthropicRequest(payload, opts) {
9200
9736
  const wire = buildWirePayload(payload);
9201
9737
  adjustThinkingBudget(wire);
9202
9738
  const model = wire.model;
9203
9739
  const messages = wire.messages;
9204
- const tools = wire.tools;
9205
9740
  const thinking = wire.thinking;
9206
9741
  const enableVision = messages.some((msg) => {
9207
9742
  if (typeof msg.content === "string") return false;
@@ -9209,6 +9744,8 @@ async function createAnthropicMessages(payload, opts) {
9209
9744
  });
9210
9745
  const isAgentCall = messages.some((msg) => msg.role === "assistant");
9211
9746
  const modelSupportsVision = opts?.resolvedModel?.capabilities?.supports?.vision !== false;
9747
+ const contextManagementDisabled = wire.context_management === null || isAnthropicFeatureUnsupported(model, "context_management");
9748
+ if (contextManagementDisabled) delete wire.context_management;
9212
9749
  const headers = {
9213
9750
  ...copilotHeaders(state, {
9214
9751
  vision: enableVision && modelSupportsVision,
@@ -9217,9 +9754,9 @@ async function createAnthropicMessages(payload, opts) {
9217
9754
  }),
9218
9755
  "X-Initiator": isAgentCall ? "agent" : "user",
9219
9756
  "anthropic-version": "2023-06-01",
9220
- ...buildAnthropicBetaHeaders(model, opts?.resolvedModel)
9757
+ ...buildAnthropicBetaHeaders(model, opts?.resolvedModel, { disableContextManagement: contextManagementDisabled })
9221
9758
  };
9222
- if (!wire.context_management && isContextEditingEnabled(model)) {
9759
+ if (!contextManagementDisabled && !("context_management" in wire) && isContextEditingEnabled(model)) {
9223
9760
  const hasThinking = Boolean(thinking && thinking.type !== "disabled");
9224
9761
  const contextManagement = buildContextManagement(state.contextEditingMode, hasThinking);
9225
9762
  if (contextManagement) {
@@ -9227,28 +9764,10 @@ async function createAnthropicMessages(payload, opts) {
9227
9764
  consola.debug("[DirectAnthropic] Added context_management:", JSON.stringify(contextManagement));
9228
9765
  }
9229
9766
  }
9230
- consola.debug("Sending direct Anthropic request to Copilot /v1/messages");
9231
- const fetchSignal = createFetchSignal();
9232
- const response = await fetch(`${copilotBaseUrl(state)}/v1/messages`, {
9233
- method: "POST",
9234
- headers,
9235
- body: JSON.stringify(wire),
9236
- signal: fetchSignal
9237
- });
9238
- if (opts?.headersCapture) captureHttpHeaders(opts.headersCapture, headers, response);
9239
- if (!response.ok) {
9240
- consola.debug("Request failed:", {
9241
- model,
9242
- max_tokens: wire.max_tokens,
9243
- stream: wire.stream,
9244
- toolCount: tools?.length ?? 0,
9245
- thinking,
9246
- messageCount: messages.length
9247
- });
9248
- throw await HTTPError.fromResponse("Failed to create Anthropic messages", response, model);
9249
- }
9250
- if (payload.stream) return events(response);
9251
- return await response.json();
9767
+ return {
9768
+ wire,
9769
+ headers
9770
+ };
9252
9771
  }
9253
9772
 
9254
9773
  //#endregion
@@ -9605,11 +10124,11 @@ function supportsDirectAnthropicApi(modelId) {
9605
10124
  * and the shutdown abort signal — so a stalled upstream connection can be
9606
10125
  * interrupted by either mechanism without waiting for the next event.
9607
10126
  */
9608
- async function* processAnthropicStream(response, acc, clientAbortSignal) {
10127
+ async function* processAnthropicStream(response, acc, clientAbortSignal, shutdownSignalProvider = getShutdownSignal) {
9609
10128
  const idleTimeoutMs = state.streamIdleTimeout * 1e3;
9610
10129
  const iterator = response[Symbol.asyncIterator]();
9611
- const abortSignal = combineAbortSignals(getShutdownSignal(), clientAbortSignal);
9612
10130
  for (;;) {
10131
+ const abortSignal = combineAbortSignals(shutdownSignalProvider(), clientAbortSignal);
9613
10132
  const result = await raceIteratorNext(iterator.next(), {
9614
10133
  idleTimeoutMs,
9615
10134
  abortSignal
@@ -9759,6 +10278,48 @@ function createStreamRepetitionChecker(label, config) {
9759
10278
  };
9760
10279
  }
9761
10280
 
10281
+ //#endregion
10282
+ //#region src/lib/request/strategies/context-management-retry.ts
10283
+ const EXTRA_INPUTS_PATTERN = /context_management:\s*Extra inputs are not permitted/i;
10284
+ function parseContextManagementExtraInputsError(message) {
10285
+ return EXTRA_INPUTS_PATTERN.test(message);
10286
+ }
10287
+ function extractErrorMessage(error) {
10288
+ if (parseContextManagementExtraInputsError(error.message)) return error.message;
10289
+ const raw = error.raw;
10290
+ if (!raw || typeof raw !== "object" || !("responseText" in raw) || typeof raw.responseText !== "string") return null;
10291
+ try {
10292
+ return JSON.parse(raw.responseText).error?.message ?? raw.responseText;
10293
+ } catch {
10294
+ return raw.responseText;
10295
+ }
10296
+ }
10297
+ function createContextManagementRetryStrategy() {
10298
+ return {
10299
+ name: "context-management-retry",
10300
+ canHandle(error) {
10301
+ if (error.type !== "bad_request" || error.status !== 400) return false;
10302
+ const message = extractErrorMessage(error);
10303
+ return message ? parseContextManagementExtraInputsError(message) : false;
10304
+ },
10305
+ handle(error, currentPayload, _context) {
10306
+ markAnthropicFeatureUnsupported(currentPayload.model, "context_management");
10307
+ if (currentPayload.context_management === null) return Promise.resolve({
10308
+ action: "abort",
10309
+ error
10310
+ });
10311
+ return Promise.resolve({
10312
+ action: "retry",
10313
+ payload: {
10314
+ ...currentPayload,
10315
+ context_management: null
10316
+ },
10317
+ meta: { disabledContextManagement: true }
10318
+ });
10319
+ }
10320
+ };
10321
+ }
10322
+
9762
10323
  //#endregion
9763
10324
  //#region src/lib/request/strategies/deferred-tool-retry.ts
9764
10325
  /**
@@ -9903,23 +10464,22 @@ async function handleMessages(c) {
9903
10464
  });
9904
10465
  const preprocessed = preprocessAnthropicMessages(anthropicPayload.messages);
9905
10466
  anthropicPayload.messages = preprocessed.messages;
9906
- reqCtx.setPreprocessInfo({
10467
+ return handleDirectAnthropicCompletion(c, anthropicPayload, reqCtx, {
9907
10468
  strippedReadTagCount: preprocessed.strippedReadTagCount,
9908
10469
  dedupedToolCallCount: preprocessed.dedupedToolCallCount
9909
10470
  });
9910
- return handleDirectAnthropicCompletion(c, anthropicPayload, reqCtx);
9911
10471
  }
9912
- async function handleDirectAnthropicCompletion(c, anthropicPayload, reqCtx) {
10472
+ async function handleDirectAnthropicCompletion(c, anthropicPayload, reqCtx, preprocessInfo) {
9913
10473
  consola.debug("Using direct Anthropic API path for model:", anthropicPayload.model);
9914
10474
  const selectedModel = state.modelIndex.get(anthropicPayload.model);
9915
10475
  const { payload: initialSanitized, stats: sanitizationStats } = sanitizeAnthropicMessages(preprocessTools(anthropicPayload));
9916
- reqCtx.addSanitizationInfo(toSanitizationInfo(sanitizationStats));
9917
- const hasPreprocessing = reqCtx.preprocessInfo ? reqCtx.preprocessInfo.dedupedToolCallCount > 0 || reqCtx.preprocessInfo.strippedReadTagCount > 0 : false;
10476
+ const initialSanitizationInfo = toSanitizationInfo(sanitizationStats);
10477
+ const hasPreprocessing = preprocessInfo.dedupedToolCallCount > 0 || preprocessInfo.strippedReadTagCount > 0;
9918
10478
  if (sanitizationStats.totalBlocksRemoved > 0 || sanitizationStats.systemReminderRemovals > 0 || sanitizationStats.fixedNameCount > 0 || hasPreprocessing) {
9919
10479
  const messageMapping = buildMessageMapping(anthropicPayload.messages, initialSanitized.messages);
9920
10480
  reqCtx.setPipelineInfo({
9921
- rewrittenMessages: initialSanitized.messages,
9922
- rewrittenSystem: typeof initialSanitized.system === "string" ? initialSanitized.system : void 0,
10481
+ preprocessing: preprocessInfo,
10482
+ sanitization: [initialSanitizationInfo],
9923
10483
  messageMapping
9924
10484
  });
9925
10485
  }
@@ -9934,13 +10494,23 @@ async function handleDirectAnthropicCompletion(c, anthropicPayload, reqCtx) {
9934
10494
  sanitize: (p) => sanitizeAnthropicMessages(preprocessTools(p)),
9935
10495
  execute: (p) => executeWithAdaptiveRateLimit(() => createAnthropicMessages(p, {
9936
10496
  resolvedModel: selectedModel,
9937
- headersCapture
10497
+ headersCapture,
10498
+ onPrepared: ({ wire, headers }) => {
10499
+ reqCtx.setAttemptWireRequest({
10500
+ model: typeof wire.model === "string" ? wire.model : anthropicPayload.model,
10501
+ messages: Array.isArray(wire.messages) ? wire.messages : [],
10502
+ payload: wire,
10503
+ headers,
10504
+ format: "anthropic-messages"
10505
+ });
10506
+ }
9938
10507
  })),
9939
10508
  logPayloadSize: (p) => logPayloadSizeInfoAnthropic(p, selectedModel)
9940
10509
  };
9941
10510
  const strategies = [
9942
10511
  createNetworkRetryStrategy(),
9943
10512
  createTokenRefreshStrategy(),
10513
+ createContextManagementRetryStrategy(),
9944
10514
  createDeferredToolRetryStrategy(),
9945
10515
  createAutoTruncateStrategy({
9946
10516
  truncate: (p, model, opts) => autoTruncateAnthropic(p, model, opts),
@@ -9963,17 +10533,18 @@ async function handleDirectAnthropicCompletion(c, anthropicPayload, reqCtx) {
9963
10533
  const retryTruncateResult = meta?.truncateResult;
9964
10534
  if (retryTruncateResult) truncateResult = retryTruncateResult;
9965
10535
  const retrySanitization = meta?.sanitization;
9966
- if (retrySanitization) reqCtx.addSanitizationInfo(toSanitizationInfo(retrySanitization));
10536
+ const allSanitization = [initialSanitizationInfo, ...retrySanitization ? [toSanitizationInfo(retrySanitization)] : []];
9967
10537
  const retryMessageMapping = buildMessageMapping(anthropicPayload.messages, newPayload.messages);
9968
10538
  reqCtx.setPipelineInfo({
10539
+ preprocessing: preprocessInfo,
10540
+ sanitization: allSanitization,
9969
10541
  truncation: retryTruncateResult ? {
10542
+ wasTruncated: true,
9970
10543
  removedMessageCount: retryTruncateResult.removedMessageCount,
9971
10544
  originalTokens: retryTruncateResult.originalTokens,
9972
10545
  compactedTokens: retryTruncateResult.compactedTokens,
9973
10546
  processingTimeMs: retryTruncateResult.processingTimeMs
9974
10547
  } : void 0,
9975
- rewrittenMessages: newPayload.messages,
9976
- rewrittenSystem: typeof newPayload.system === "string" ? newPayload.system : void 0,
9977
10548
  messageMapping: retryMessageMapping
9978
10549
  });
9979
10550
  if (reqCtx.tuiLogId) {
@@ -10158,10 +10729,22 @@ function formatModel(model) {
10158
10729
  capabilities: model.capabilities
10159
10730
  };
10160
10731
  }
10732
+ function formatModelDetail(model) {
10733
+ return {
10734
+ ...formatModel(model),
10735
+ version: model.version,
10736
+ preview: model.preview,
10737
+ model_picker_enabled: model.model_picker_enabled,
10738
+ model_picker_category: model.model_picker_category,
10739
+ supported_endpoints: model.supported_endpoints,
10740
+ billing: model.billing
10741
+ };
10742
+ }
10161
10743
  modelsRoutes.get("/", async (c) => {
10162
10744
  try {
10163
10745
  if (!state.models) await cacheModels();
10164
- const models = state.models?.data.map((m) => formatModel(m));
10746
+ const formatter = c.req.query("detail") === "true" ? formatModelDetail : formatModel;
10747
+ const models = state.models?.data.map((m) => formatter(m));
10165
10748
  return c.json({
10166
10749
  object: "list",
10167
10750
  data: models,
@@ -10182,7 +10765,7 @@ modelsRoutes.get("/:model", async (c) => {
10182
10765
  param: "model",
10183
10766
  code: "model_not_found"
10184
10767
  } }, 404);
10185
- return c.json(formatModel(model));
10768
+ return c.json(formatModelDetail(model));
10186
10769
  } catch (error) {
10187
10770
  return forwardError(c, error);
10188
10771
  }
@@ -10199,7 +10782,7 @@ async function handleResponses(c) {
10199
10782
  consola.debug(`Model name resolved: ${clientModel} → ${resolvedModel}`);
10200
10783
  payload.model = resolvedModel;
10201
10784
  }
10202
- if (!isEndpointSupported(state.modelIndex.get(payload.model), ENDPOINT.RESPONSES)) {
10785
+ if (!isResponsesSupported(state.modelIndex.get(payload.model))) {
10203
10786
  const msg = `Model "${payload.model}" does not support the ${ENDPOINT.RESPONSES} endpoint`;
10204
10787
  throw new HTTPError(msg, 400, msg);
10205
10788
  }
@@ -10233,7 +10816,9 @@ async function handleDirectResponses(opts) {
10233
10816
  const { c, payload, reqCtx } = opts;
10234
10817
  const selectedModel = state.modelIndex.get(payload.model);
10235
10818
  const headersCapture = {};
10236
- const adapter = createResponsesAdapter(selectedModel, headersCapture);
10819
+ const adapter = createResponsesAdapter(selectedModel, headersCapture, (wireRequest) => {
10820
+ reqCtx.setAttemptWireRequest(wireRequest);
10821
+ });
10237
10822
  const strategies = createResponsesStrategies();
10238
10823
  try {
10239
10824
  const pipelineResult = await executeRequestPipeline({
@@ -10247,7 +10832,6 @@ async function handleDirectResponses(opts) {
10247
10832
  });
10248
10833
  reqCtx.setHttpHeaders(headersCapture);
10249
10834
  const response = pipelineResult.response;
10250
- reqCtx.addQueueWaitMs(pipelineResult.queueWaitMs);
10251
10835
  if (!payload.stream) {
10252
10836
  const responsesResponse = response;
10253
10837
  const content = responsesOutputToContent(responsesResponse.output);
@@ -10276,8 +10860,8 @@ async function handleDirectResponses(opts) {
10276
10860
  let eventsIn = 0;
10277
10861
  try {
10278
10862
  const iterator = response[Symbol.asyncIterator]();
10279
- const abortSignal = combineAbortSignals(getShutdownSignal(), clientAbort.signal);
10280
10863
  for (;;) {
10864
+ const abortSignal = combineAbortSignals(getShutdownSignal(), clientAbort.signal);
10281
10865
  const result = await raceIteratorNext(iterator.next(), {
10282
10866
  idleTimeoutMs,
10283
10867
  abortSignal
@@ -10340,23 +10924,82 @@ responsesRoutes.post("/", async (c) => {
10340
10924
  });
10341
10925
 
10342
10926
  //#endregion
10343
- //#region src/routes/token/route.ts
10344
- const tokenRoutes = new Hono();
10345
- tokenRoutes.get("/", (c) => {
10927
+ //#region src/routes/status/route.ts
10928
+ /**
10929
+ * Aggregated server status endpoint.
10930
+ * Returns health, auth, quota, rate limiter, memory, shutdown, and model counts
10931
+ * in a single request.
10932
+ */
10933
+ const statusRoutes = new Hono();
10934
+ statusRoutes.get("/", async (c) => {
10935
+ const now = Date.now();
10936
+ const limiter = getAdaptiveRateLimiter();
10937
+ const limiterStatus = limiter?.getStatus();
10938
+ const memStats = getMemoryPressureStats();
10939
+ let activeCount = 0;
10346
10940
  try {
10347
- return c.json({ token: state.copilotToken });
10348
- } catch (error) {
10349
- return forwardError(c, error);
10350
- }
10941
+ activeCount = getRequestContextManager().activeCount;
10942
+ } catch {}
10943
+ let quota = null;
10944
+ try {
10945
+ const usage = await getCopilotUsage();
10946
+ quota = {
10947
+ plan: usage.copilot_plan,
10948
+ resetDate: usage.quota_reset_date,
10949
+ chat: usage.quota_snapshots.chat,
10950
+ completions: usage.quota_snapshots.completions,
10951
+ premiumInteractions: usage.quota_snapshots.premium_interactions
10952
+ };
10953
+ } catch {}
10954
+ return c.json({
10955
+ status: getIsShuttingDown() ? "shutting_down" : state.copilotToken && state.githubToken ? "healthy" : "unhealthy",
10956
+ uptime: serverStartTime > 0 ? Math.floor((now - serverStartTime) / 1e3) : 0,
10957
+ version,
10958
+ auth: {
10959
+ accountType: state.accountType,
10960
+ tokenSource: state.tokenInfo?.source ?? null,
10961
+ tokenExpiresAt: state.tokenInfo?.expiresAt ?? null,
10962
+ copilotTokenExpiresAt: state.copilotTokenInfo ? state.copilotTokenInfo.expiresAt * 1e3 : null
10963
+ },
10964
+ quota,
10965
+ activeRequests: { count: activeCount },
10966
+ rateLimiter: limiterStatus ? {
10967
+ ...limiterStatus,
10968
+ config: limiter.getConfig()
10969
+ } : null,
10970
+ memory: {
10971
+ heapUsedMB: memStats.heapUsedMB,
10972
+ heapLimitMB: memStats.heapLimitMB,
10973
+ historyEntryCount: historyState.entries.length,
10974
+ historyMaxEntries: memStats.currentMaxEntries,
10975
+ totalEvictedCount: memStats.totalEvictedCount
10976
+ },
10977
+ shutdown: { phase: getShutdownPhase() },
10978
+ models: {
10979
+ totalCount: state.models?.data.length ?? 0,
10980
+ availableCount: state.modelIds.size
10981
+ }
10982
+ });
10351
10983
  });
10352
10984
 
10353
10985
  //#endregion
10354
- //#region src/routes/usage/route.ts
10355
- const usageRoutes = new Hono();
10356
- usageRoutes.get("/", async (c) => {
10986
+ //#region src/routes/token/route.ts
10987
+ const tokenRoutes = new Hono();
10988
+ tokenRoutes.get("/", (c) => {
10357
10989
  try {
10358
- const usage = await getCopilotUsage();
10359
- return c.json(usage);
10990
+ return c.json({
10991
+ github: state.tokenInfo ? {
10992
+ token: state.tokenInfo.token,
10993
+ source: state.tokenInfo.source,
10994
+ expiresAt: state.tokenInfo.expiresAt ?? null,
10995
+ refreshable: state.tokenInfo.refreshable
10996
+ } : null,
10997
+ copilot: state.copilotTokenInfo ? {
10998
+ token: state.copilotTokenInfo.token,
10999
+ expiresAt: state.copilotTokenInfo.expiresAt,
11000
+ refreshIn: state.copilotTokenInfo.refreshIn
11001
+ } : null
11002
+ });
10360
11003
  } catch (error) {
10361
11004
  return forwardError(c, error);
10362
11005
  }
@@ -10378,8 +11021,10 @@ function registerRoutes(app) {
10378
11021
  app.route("/v1/responses", responsesRoutes);
10379
11022
  app.route("/v1/messages", messagesRoutes);
10380
11023
  app.route("/api/event_logging", eventLoggingRoutes);
10381
- app.route("/usage", usageRoutes);
10382
- app.route("/token", tokenRoutes);
11024
+ app.route("/api/status", statusRoutes);
11025
+ app.route("/api/tokens", tokenRoutes);
11026
+ app.route("/api/config", configRoutes);
11027
+ app.route("/api/logs", logsRoutes);
10383
11028
  app.route("/history", historyRoutes);
10384
11029
  }
10385
11030
 
@@ -10518,6 +11163,7 @@ async function runServer(options) {
10518
11163
  else off("[model_overrides]", "Model overrides");
10519
11164
  if (state.dedupToolCalls) on("[anthropic.dedup_tool_calls]", "Dedup tool calls", `mode: ${state.dedupToolCalls}`);
10520
11165
  else off("[anthropic.dedup_tool_calls]", "Dedup tool calls");
11166
+ toggle(state.immutableThinkingMessages, "[anthropic.immutable_thinking_messages]", "Immutable thinking messages");
10521
11167
  toggle(state.stripReadToolResultTags, "[anthropic.strip_read_tool_result_tags]", "Strip Read tool result tags");
10522
11168
  if (state.rewriteSystemReminders === true) on("[anthropic.rewrite_system_reminders]", "Rewrite system reminders", "remove all");
10523
11169
  else if (state.rewriteSystemReminders === false) off("[anthropic.rewrite_system_reminders]", "Rewrite system reminders");
@@ -10567,39 +11213,29 @@ async function runServer(options) {
10567
11213
  }).join("\n");
10568
11214
  if (overrideLines) consola.info(`Model overrides:\n${overrideLines}`);
10569
11215
  const serverUrl = `http://${options.host ?? "localhost"}:${options.port}`;
10570
- if (typeof globalThis.Bun !== "undefined") server.use("*", async (c, next) => {
10571
- const runtime = c.req.raw.runtime;
10572
- if (runtime?.bun?.server) c.env = { server: runtime.bun.server };
10573
- await next();
10574
- });
10575
11216
  const wsAdapter = await createWebSocketAdapter(server);
11217
+ initWebSocket(server, wsAdapter.upgradeWebSocket);
10576
11218
  initHistoryWebSocket(server, wsAdapter.upgradeWebSocket);
10577
11219
  initResponsesWebSocket(server, wsAdapter.upgradeWebSocket);
10578
11220
  consola.box(`Web UI:\n🌐 Usage Viewer: https://ericc-ch.github.io/copilot-api?endpoint=${serverUrl}/usage\nšŸ“œ History UI: ${serverUrl}/history`);
10579
11221
  const bunWebSocket = typeof globalThis.Bun !== "undefined" ? (await import("hono/bun")).websocket : void 0;
10580
11222
  let serverInstance;
10581
11223
  try {
10582
- serverInstance = serve({
11224
+ serverInstance = await startServer({
10583
11225
  fetch: server.fetch,
10584
11226
  port: options.port,
10585
11227
  hostname: options.host,
10586
- reusePort: true,
10587
- gracefulShutdown: false,
10588
- bun: {
10589
- idleTimeout: 255,
10590
- ...bunWebSocket && { websocket: bunWebSocket }
10591
- }
11228
+ bunWebSocket
10592
11229
  });
10593
11230
  } catch (error) {
10594
11231
  consola.error(`Failed to start server on port ${options.port}. Is the port already in use?`, error);
10595
11232
  process.exit(1);
10596
11233
  }
11234
+ consola.info(`Listening on ${serverUrl}`);
11235
+ setServerStartTime(Date.now());
10597
11236
  setServerInstance(serverInstance);
10598
11237
  setupShutdownHandlers();
10599
- if (wsAdapter.injectWebSocket) {
10600
- const nodeServer = serverInstance.node?.server;
10601
- if (nodeServer && "on" in nodeServer) wsAdapter.injectWebSocket(nodeServer);
10602
- }
11238
+ if (wsAdapter.injectWebSocket && serverInstance.nodeServer) wsAdapter.injectWebSocket(serverInstance.nodeServer);
10603
11239
  await waitForShutdown();
10604
11240
  }
10605
11241
  const start = defineCommand({