@oh-my-pi/pi-ai 15.5.3 → 15.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,38 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [15.5.6] - 2026-05-27
6
+ ### Added
7
+
8
+ - Added `PI_CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS` to control how long an idle Codex WebSocket stays eligible for reuse, with `0` disabling the check
9
+
10
+ ### Fixed
11
+
12
+ - Fixed reused Codex WebSocket connections that had gone silent without activity to be dropped and replaced with a fresh handshake after the idle-reuse threshold, preventing stalled next requests
13
+ - Fixed stale response frames left in the websocket queue from a completed turn so subsequent requests no longer process terminal frames from the previous response
14
+ - Fixed websocket dead-socket detection to fail a stale connection when no inbound traffic or pong is observed after a ping timeout, improving recovery on runtimes that do not emit pong events
15
+
16
+ ## [15.5.5] - 2026-05-27
17
+
18
+ ### Added
19
+
20
+ - Added `PI_CODEX_WEBSOCKET_PING_INTERVAL_MS` to configure the interval for Codex WebSocket protocol ping heartbeats
21
+ - Added `PI_CODEX_WEBSOCKET_PONG_TIMEOUT_MS` to configure the Codex WebSocket pong timeout used to detect unresponsive connections
22
+ - Added `PI_CODEX_WEBSOCKET_MESSAGE_QUEUE_CAPACITY` to configure the maximum buffered Codex WebSocket inbound queue size before transport fallback
23
+
24
+ ### Changed
25
+
26
+ - Improved Codex WebSocket timeout diagnostics to include last event type and time since last progress event
27
+ - Enhanced Codex WebSocket error classification to recognize ping, pong, send, and queue-overflow failures as retryable
28
+
29
+ ### Fixed
30
+
31
+ - Fixed Codex WebSocket send failures by wrapping socket.send() in try-catch and surfacing errors as retryable transport errors
32
+ - Fixed Codex WebSocket inbound queue overflow by adding capacity bounds and triggering fallback to SSE when exceeded
33
+ - Fixed Codex WebSocket pong timeout detection by tracking pong events and failing the connection when no pong is received within the configured timeout
34
+ - Fixed Anthropic streaming to suppress hallucinated meta-prompt thinking blocks (the recent "I don't see any current rewritten thinking..." regression). When the marker phrase `rewritten thinking` appears in a streamed thinking summary the block is collapsed to a plain `Thinking...` placeholder and its signature is dropped so subsequent turns can't re-anchor on the garbled chain.
35
+ - Fixed Codex WebSocket silent stalls by adding protocol pings, inbound queue bounding, clearer idle-timeout diagnostics, and SDK retry clamping for first-event timeouts.
36
+
5
37
  ## [15.5.0] - 2026-05-26
6
38
  ### Added
7
39
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "15.5.3",
4
+ "version": "15.5.6",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -40,7 +40,7 @@
40
40
  "dependencies": {
41
41
  "@anthropic-ai/sdk": "^0.94.0",
42
42
  "@bufbuild/protobuf": "^2.12.0",
43
- "@oh-my-pi/pi-utils": "15.5.3",
43
+ "@oh-my-pi/pi-utils": "15.5.6",
44
44
  "openai": "^6.36.0",
45
45
  "partial-json": "^0.1.7",
46
46
  "zod": "4.4.3"
@@ -66,6 +66,7 @@ import { notifyProviderResponse } from "../utils/provider-response";
66
66
  import { isCopilotTransientModelError } from "../utils/retry";
67
67
  import { COMBINATOR_KEYS, NO_STRICT, toolWireSchema } from "../utils/schema";
68
68
  import { spillToDescription } from "../utils/schema/spill";
69
+ import { createSdkStreamRequestOptions } from "../utils/sdk-stream-timeout";
69
70
  import { notifyRawSseEvent, wrapFetchForSseDebug } from "../utils/sse-debug";
70
71
  import {
71
72
  buildCopilotDynamicHeaders,
@@ -547,6 +548,17 @@ function convertContentBlocks(
547
548
  return blocks;
548
549
  }
549
550
 
551
+ /**
552
+ * Marker phrase that Claude has been observed to hallucinate inside reasoning summaries
553
+ * (e.g. "I don't see any current rewritten thinking or next thinking to process. Could
554
+ * you provide..."). When this substring appears in a streamed thinking block we collapse
555
+ * the entire block to {@link BROKEN_THINKING_REPLACEMENT} and drop the signature so
556
+ * downstream UI/transcripts don't surface the meta-prompt and replay can't re-anchor on
557
+ * the garbled chain.
558
+ */
559
+ const BROKEN_THINKING_MARKER = "rewritten thinking";
560
+ const BROKEN_THINKING_REPLACEMENT = "Thinking...";
561
+
550
562
  export type AnthropicEffort = "low" | "medium" | "high" | "xhigh" | "max";
551
563
  export type AnthropicThinkingDisplay = "summarized" | "omitted";
552
564
 
@@ -1096,6 +1108,12 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1096
1108
  const requestTimeoutMs =
1097
1109
  firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0 ? firstEventTimeoutMs : undefined;
1098
1110
  const blocks = output.content as Block[];
1111
+ // Recent Claude releases occasionally hallucinate meta-prompts asking the operator
1112
+ // to supply "rewritten thinking" / "next thinking" as reasoning content. The summary
1113
+ // is useless and confuses the UI, so we collapse any thinking block whose stream
1114
+ // contains the marker phrase down to a plain "Thinking..." placeholder and drop the
1115
+ // (now invalid) signature so subsequent turns don't replay the garbled chain.
1116
+ const suppressedThinkingBlocks = new WeakSet<Block>();
1099
1117
  stream.push({ type: "start", partial: output });
1100
1118
  // Retry loop for transient errors from the stream.
1101
1119
  // Provider-level transport/rate-limit failures: only before any streamed content starts.
@@ -1106,10 +1124,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1106
1124
  while (true) {
1107
1125
  activeAbortTracker = createAbortSourceTracker(options?.signal);
1108
1126
  const { requestSignal } = activeAbortTracker;
1109
- const requestOptions =
1110
- requestTimeoutMs === undefined
1111
- ? { signal: requestSignal }
1112
- : { signal: requestSignal, timeout: requestTimeoutMs };
1127
+ const requestOptions = createSdkStreamRequestOptions(requestSignal, requestTimeoutMs);
1113
1128
  const anthropicRequest = client.messages.create({ ...params, stream: true }, requestOptions);
1114
1129
  let streamedReplayUnsafeContent = false;
1115
1130
 
@@ -1253,7 +1268,14 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1253
1268
  const index = blocks.findIndex(b => b.index === event.index);
1254
1269
  const block = blocks[index];
1255
1270
  if (block && block.type === "thinking") {
1271
+ if (suppressedThinkingBlocks.has(block)) continue;
1256
1272
  block.thinking += event.delta.thinking;
1273
+ if (block.thinking.includes(BROKEN_THINKING_MARKER)) {
1274
+ suppressedThinkingBlocks.add(block);
1275
+ block.thinking = BROKEN_THINKING_REPLACEMENT;
1276
+ block.thinkingSignature = "";
1277
+ continue;
1278
+ }
1257
1279
  stream.push({
1258
1280
  type: "thinking_delta",
1259
1281
  contentIndex: index,
@@ -1277,7 +1299,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1277
1299
  } else if (event.delta.type === "signature_delta") {
1278
1300
  const index = blocks.findIndex(b => b.index === event.index);
1279
1301
  const block = blocks[index];
1280
- if (block && block.type === "thinking") {
1302
+ if (block && block.type === "thinking" && !suppressedThinkingBlocks.has(block)) {
1281
1303
  block.thinkingSignature = block.thinkingSignature || "";
1282
1304
  block.thinkingSignature += event.delta.signature;
1283
1305
  }
@@ -1295,6 +1317,14 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1295
1317
  partial: output,
1296
1318
  });
1297
1319
  } else if (block.type === "thinking") {
1320
+ if (
1321
+ !suppressedThinkingBlocks.has(block) &&
1322
+ block.thinking.includes(BROKEN_THINKING_MARKER)
1323
+ ) {
1324
+ suppressedThinkingBlocks.add(block);
1325
+ block.thinking = BROKEN_THINKING_REPLACEMENT;
1326
+ block.thinkingSignature = "";
1327
+ }
1298
1328
  stream.push({
1299
1329
  type: "thinking_end",
1300
1330
  contentIndex: index,
@@ -93,6 +93,23 @@ const CODEX_DEBUG = $flag("PI_CODEX_DEBUG");
93
93
  const CODEX_MAX_RETRIES = 5;
94
94
  const CODEX_RETRY_DELAY_MS = 500;
95
95
  const CODEX_WEBSOCKET_CONNECT_TIMEOUT_MS = 10000;
96
+ const CODEX_WEBSOCKET_PING_INTERVAL_MS = 10_000;
97
+ const CODEX_WEBSOCKET_PONG_TIMEOUT_MS = 60_000;
98
+ const CODEX_WEBSOCKET_MESSAGE_QUEUE_CAPACITY = 4096;
99
+ /**
100
+ * Maximum quiet period (no inbound frames AND no observed pong) we'll trust a
101
+ * reused WebSocket for before forcing a fresh handshake. Codex backends and
102
+ * intermediaries occasionally evict idle sockets server-side without sending a
103
+ * FIN, leaving the local `readyState` as OPEN while the next `send()` becomes a
104
+ * write into a half-open buffer. Reusing such a socket parks the next request
105
+ * at `#nextMessage` until the first-event/idle timeout fires (issue #1450). The
106
+ * heartbeat below also catches dead sockets, but only after `pongTimeoutMs`
107
+ * (default 60s) and only while a request is active — this gate closes the door
108
+ * earlier and even when the gap between requests is purely client-side (tool
109
+ * execution, user typing, etc.). Set `PI_CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS=0`
110
+ * to disable.
111
+ */
112
+ const CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS = 30_000;
96
113
  /**
97
114
  * Steady-state liveness ceiling for the Codex WebSocket transport. Distinct from
98
115
  * the OMP-wide stream watchdog removed in #1392: a WebSocket can stay TCP-open
@@ -132,6 +149,24 @@ function isCodexStreamProgressEvent(event: unknown): boolean {
132
149
  return typeof type === "string" && CODEX_ADDITIONAL_PROGRESS_EVENT_TYPES.has(type);
133
150
  }
134
151
 
152
+ type CodexWebSocketTimeoutDetails = {
153
+ lastEventAt: number;
154
+ lastEventType?: string;
155
+ lastProgressAt: number;
156
+ lastProgressEventType?: string;
157
+ };
158
+
159
+ function createCodexWebSocketTimeoutMessage(reason: string, details: CodexWebSocketTimeoutDetails): string {
160
+ const now = Date.now();
161
+ const lastEvent = details.lastEventType
162
+ ? `${details.lastEventType} ${Math.max(0, now - details.lastEventAt)}ms ago`
163
+ : "none";
164
+ const lastProgress = details.lastProgressEventType
165
+ ? `${details.lastProgressEventType} ${Math.max(0, now - details.lastProgressAt)}ms ago`
166
+ : "none";
167
+ return `${reason} (last event: ${lastEvent}; last progress: ${lastProgress})`;
168
+ }
169
+
135
170
  type CodexTransport = "sse" | "websocket";
136
171
  type CodexEventItem = ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall | ResponseCustomToolCall;
137
172
  type CodexOutputBlock = ThinkingContent | TextContent | (ToolCall & { partialJson: string });
@@ -254,6 +289,25 @@ function getCodexWebSocketFirstEventTimeoutMs(): number {
254
289
  );
255
290
  }
256
291
 
292
+ function getCodexWebSocketPingIntervalMs(): number {
293
+ return parseCodexNonNegativeInteger($env.PI_CODEX_WEBSOCKET_PING_INTERVAL_MS, CODEX_WEBSOCKET_PING_INTERVAL_MS);
294
+ }
295
+
296
+ function getCodexWebSocketPongTimeoutMs(): number {
297
+ return parseCodexNonNegativeInteger($env.PI_CODEX_WEBSOCKET_PONG_TIMEOUT_MS, CODEX_WEBSOCKET_PONG_TIMEOUT_MS);
298
+ }
299
+
300
+ function getCodexWebSocketMessageQueueCapacity(): number {
301
+ return parseCodexPositiveInteger(
302
+ $env.PI_CODEX_WEBSOCKET_MESSAGE_QUEUE_CAPACITY,
303
+ CODEX_WEBSOCKET_MESSAGE_QUEUE_CAPACITY,
304
+ );
305
+ }
306
+
307
+ function getCodexWebSocketMaxIdleReuseMs(): number {
308
+ return parseCodexNonNegativeInteger($env.PI_CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS, CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS);
309
+ }
310
+
257
311
  function createCodexProviderSessionState(): CodexProviderSessionState {
258
312
  const state: CodexProviderSessionState = {
259
313
  webSocketSessions: new Map(),
@@ -301,6 +355,10 @@ function isCodexWebSocketRetryableStreamError(error: unknown): boolean {
301
355
  message.includes("websocket closed (") ||
302
356
  message.includes("websocket closed before response completion") ||
303
357
  message.includes("websocket connection is unavailable") ||
358
+ message.includes("websocket send failed") ||
359
+ message.includes("websocket ping failed") ||
360
+ message.includes("websocket pong timeout") ||
361
+ message.includes("websocket message queue exceeded") ||
304
362
  message.includes("idle timeout waiting for websocket") ||
305
363
  message.includes("timeout waiting for first websocket event") ||
306
364
  message.includes("syntaxerror") ||
@@ -1986,6 +2044,17 @@ class CodexWebSocketConnection {
1986
2044
  #connectPromise?: Promise<void>;
1987
2045
  #activeRequest = false;
1988
2046
  #streamObserver?: (event: RawSseEvent) => void;
2047
+ #heartbeatInterval: NodeJS.Timeout | undefined;
2048
+ #removePongListener?: () => void;
2049
+ /**
2050
+ * Wall-clock of the most recent inbound activity on this socket — any
2051
+ * decoded message, any pong, or the moment the handshake completed. Used
2052
+ * by {@link isHealthyForReuse} so we don't write a continuation frame into
2053
+ * a TCP-open-but-server-evicted socket whose `readyState` still says OPEN.
2054
+ */
2055
+ #lastInboundAt = 0;
2056
+ /** Wall-clock of the last heartbeat ping we issued; 0 if none yet. */
2057
+ #lastPingAt = 0;
1989
2058
 
1990
2059
  constructor(url: string, headers: Record<string, string>, options: CodexWebSocketConnectionOptions) {
1991
2060
  this.#url = url;
@@ -1997,6 +2066,29 @@ class CodexWebSocketConnection {
1997
2066
  return this.#socket?.readyState === WebSocket.OPEN;
1998
2067
  }
1999
2068
 
2069
+ /**
2070
+ * Stricter variant of {@link isOpen} for the connection-pool reuse gate.
2071
+ * Refuses sockets that have been silent past {@link CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS}.
2072
+ *
2073
+ * Bun's `WebSocket` does not always surface server-side eviction (no
2074
+ * `onclose`, no `onerror`), so a socket can sit in readyState OPEN long
2075
+ * after the upstream has dropped it. Reusing such a socket sends the next
2076
+ * `response.create` into a half-open write buffer and parks the reader
2077
+ * until the first-event / idle timeout fires (issue #1450). Forcing a
2078
+ * reconnect on any suspect socket trades a sub-second handshake for a
2079
+ * 60–300 s stall.
2080
+ */
2081
+ isHealthyForReuse(): boolean {
2082
+ if (!this.isOpen()) return false;
2083
+ const maxIdleMs = getCodexWebSocketMaxIdleReuseMs();
2084
+ if (maxIdleMs <= 0) return true;
2085
+ // Initial connect sets #lastInboundAt; any later message or pong refreshes
2086
+ // it. A zero value means the field was never initialized, which itself is
2087
+ // a desync — treat as unhealthy.
2088
+ if (this.#lastInboundAt === 0) return false;
2089
+ return Date.now() - this.#lastInboundAt <= maxIdleMs;
2090
+ }
2091
+
2000
2092
  matchesAuth(headers: Record<string, string>): boolean {
2001
2093
  return this.#headers.authorization === headers.authorization;
2002
2094
  }
@@ -2009,6 +2101,7 @@ class CodexWebSocketConnection {
2009
2101
  this.#socket.close(1000, reason);
2010
2102
  }
2011
2103
  this.#socket = null;
2104
+ this.#stopHeartbeat();
2012
2105
  }
2013
2106
 
2014
2107
  async connect(signal?: AbortSignal): Promise<void> {
@@ -2058,7 +2151,9 @@ class CodexWebSocketConnection {
2058
2151
  if (!settled) {
2059
2152
  settled = true;
2060
2153
  clearPending();
2154
+ this.#lastInboundAt = Date.now();
2061
2155
  this.#captureHandshakeHeaders(socket, event);
2156
+ this.#startHeartbeat(socket);
2062
2157
  resolve();
2063
2158
  }
2064
2159
  };
@@ -2079,6 +2174,7 @@ class CodexWebSocketConnection {
2079
2174
  };
2080
2175
  socket.onclose = event => {
2081
2176
  this.#socket = null;
2177
+ this.#stopHeartbeat();
2082
2178
  if (!settled) {
2083
2179
  settled = true;
2084
2180
  clearPending();
@@ -2089,6 +2185,10 @@ class CodexWebSocketConnection {
2089
2185
  this.#push(null);
2090
2186
  };
2091
2187
  socket.onmessage = event => {
2188
+ // Stamp inbound activity before parsing so even malformed frames refresh
2189
+ // the liveness clock — what matters for reuse health is that the upstream
2190
+ // is still talking to us, not that every frame is well-formed.
2191
+ this.#lastInboundAt = Date.now();
2092
2192
  try {
2093
2193
  const text = typeof event.data === "string" ? event.data : Buffer.from(event.data).toString("utf-8");
2094
2194
  if (!text) return;
@@ -2132,6 +2232,17 @@ class CodexWebSocketConnection {
2132
2232
  }
2133
2233
  this.#activeRequest = true;
2134
2234
  this.#streamObserver = onSseEvent;
2235
+ // Drain any non-error frames left over from a prior request before sending.
2236
+ // `processCodexResponseStream` breaks its `for-await` on the terminal event,
2237
+ // which interrupts our generator at `yield next` (the post-yield `break`
2238
+ // never runs). Any frame that landed between the consumer's break and the
2239
+ // generator's `finally` lingers in `#queue` and would otherwise become the
2240
+ // first frame of THIS request — a stale `response.completed` would end the
2241
+ // turn immediately with empty output, and a stale non-progress frame would
2242
+ // flip `sawFirstEvent` and silently downgrade the first-event timeout to
2243
+ // the longer idle timeout. Transport errors are preserved so we surface
2244
+ // the death signal instead of writing into a dead socket.
2245
+ this.#dropStaleFrames();
2135
2246
  const onAbort = () => {
2136
2247
  this.close("aborted");
2137
2248
  this.#push(createCodexWebSocketTransportError("request was aborted"));
@@ -2147,23 +2258,48 @@ class CodexWebSocketConnection {
2147
2258
  try {
2148
2259
  const requestPayload = JSON.stringify(request);
2149
2260
  notifyCodexWebSocketOutbound(onSseEvent, request, requestPayload);
2150
- this.#socket.send(requestPayload);
2261
+ try {
2262
+ this.#socket.send(requestPayload);
2263
+ } catch (error) {
2264
+ throw createCodexWebSocketTransportError(
2265
+ `websocket send failed: ${error instanceof Error ? error.message : String(error)}`,
2266
+ );
2267
+ }
2151
2268
  let sawFirstEvent = false;
2152
2269
  const { idleTimeoutMs, firstEventTimeoutMs } = timeouts;
2153
2270
  let lastProgressAt = Date.now();
2271
+ let lastProgressEventType: string | undefined;
2272
+ let lastEventAt = lastProgressAt;
2273
+ let lastEventType: string | undefined;
2154
2274
  while (true) {
2155
2275
  let timeoutMs: number | undefined;
2156
2276
  let timeoutReason: string;
2157
2277
  if (sawFirstEvent) {
2158
- timeoutReason = "idle timeout waiting for websocket";
2278
+ timeoutReason = createCodexWebSocketTimeoutMessage("idle timeout waiting for websocket", {
2279
+ lastEventAt,
2280
+ lastEventType,
2281
+ lastProgressAt,
2282
+ lastProgressEventType,
2283
+ });
2159
2284
  if (idleTimeoutMs !== undefined && idleTimeoutMs > 0) {
2160
2285
  timeoutMs = idleTimeoutMs - (Date.now() - lastProgressAt);
2161
2286
  if (timeoutMs <= 0) {
2287
+ logCodexDebug("codex websocket idle timeout", {
2288
+ lastEventType,
2289
+ lastProgressEventType,
2290
+ msSinceLastEvent: Date.now() - lastEventAt,
2291
+ msSinceLastProgress: Date.now() - lastProgressAt,
2292
+ });
2162
2293
  throw createCodexWebSocketTransportError(timeoutReason);
2163
2294
  }
2164
2295
  }
2165
2296
  } else {
2166
- timeoutReason = "timeout waiting for first websocket event";
2297
+ timeoutReason = createCodexWebSocketTimeoutMessage("timeout waiting for first websocket event", {
2298
+ lastEventAt,
2299
+ lastEventType,
2300
+ lastProgressAt,
2301
+ lastProgressEventType,
2302
+ });
2167
2303
  if (firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0) {
2168
2304
  timeoutMs = firstEventTimeoutMs;
2169
2305
  }
@@ -2176,11 +2312,14 @@ class CodexWebSocketConnection {
2176
2312
  throw createCodexWebSocketTransportError("websocket closed before response completion");
2177
2313
  }
2178
2314
  sawFirstEvent = true;
2315
+ const eventType = typeof next.type === "string" ? next.type : "";
2316
+ lastEventAt = Date.now();
2317
+ lastEventType = eventType || undefined;
2179
2318
  if (isCodexStreamProgressEvent(next)) {
2180
- lastProgressAt = Date.now();
2319
+ lastProgressAt = lastEventAt;
2320
+ lastProgressEventType = lastEventType;
2181
2321
  }
2182
2322
  yield next;
2183
- const eventType = typeof next.type === "string" ? next.type : "";
2184
2323
  if (
2185
2324
  eventType === "response.completed" ||
2186
2325
  eventType === "response.done" ||
@@ -2207,7 +2346,133 @@ class CodexWebSocketConnection {
2207
2346
  this.#onHandshakeHeaders(headers);
2208
2347
  }
2209
2348
 
2349
+ #startHeartbeat(socket: Bun.WebSocket): void {
2350
+ this.#stopHeartbeat();
2351
+ const intervalMs = getCodexWebSocketPingIntervalMs();
2352
+ if (intervalMs <= 0) return;
2353
+
2354
+ this.#lastPingAt = 0;
2355
+ const socketEventTarget = socket as EventTarget;
2356
+ const onPong = () => {
2357
+ // Pongs are inbound activity — refresh the reuse-health clock so a quiet
2358
+ // but ping-responsive socket stays trustworthy across requests.
2359
+ this.#lastInboundAt = Date.now();
2360
+ };
2361
+ if (
2362
+ typeof socketEventTarget.addEventListener === "function" &&
2363
+ typeof socketEventTarget.removeEventListener === "function"
2364
+ ) {
2365
+ socketEventTarget.addEventListener("pong", onPong);
2366
+ this.#removePongListener = () => socketEventTarget.removeEventListener("pong", onPong);
2367
+ }
2368
+
2369
+ this.#heartbeatInterval = setInterval(() => {
2370
+ if (this.#socket !== socket || socket.readyState !== WebSocket.OPEN) {
2371
+ this.#stopHeartbeat();
2372
+ return;
2373
+ }
2374
+ // Fail-closed on missing pongs even when no pong has ever been observed.
2375
+ // The previous `#observedPong &&` guard disabled the timeout entirely on
2376
+ // runtimes where Bun does not surface a `pong` event for our outgoing
2377
+ // pings (issue #1450) — letting truly dead sockets sail through the
2378
+ // pool until the per-request first-event / idle timeout (60–300 s)
2379
+ // finally fired. Instead, trigger on inbound silence: if we sent a
2380
+ // ping at least `pongTimeoutMs` ago and have received no traffic of
2381
+ // any kind (data frame or pong) since, the socket is unhealthy.
2382
+ const pongTimeoutMs = getCodexWebSocketPongTimeoutMs();
2383
+ if (
2384
+ pongTimeoutMs > 0 &&
2385
+ this.#lastPingAt > 0 &&
2386
+ this.#lastPingAt > this.#lastInboundAt &&
2387
+ Date.now() - this.#lastPingAt > pongTimeoutMs
2388
+ ) {
2389
+ this.#failQueue(createCodexWebSocketTransportError("websocket pong timeout"), "pong-timeout");
2390
+ return;
2391
+ }
2392
+ if (typeof socket.ping !== "function") {
2393
+ this.#stopHeartbeat();
2394
+ return;
2395
+ }
2396
+ try {
2397
+ socket.ping();
2398
+ this.#lastPingAt = Date.now();
2399
+ } catch (error) {
2400
+ this.#failQueue(
2401
+ createCodexWebSocketTransportError(
2402
+ `websocket ping failed: ${error instanceof Error ? error.message : String(error)}`,
2403
+ ),
2404
+ "ping-failed",
2405
+ );
2406
+ }
2407
+ }, intervalMs);
2408
+ this.#heartbeatInterval.unref();
2409
+ }
2410
+
2411
+ #stopHeartbeat(): void {
2412
+ if (this.#heartbeatInterval) {
2413
+ clearInterval(this.#heartbeatInterval);
2414
+ this.#heartbeatInterval = undefined;
2415
+ }
2416
+ if (this.#removePongListener) {
2417
+ this.#removePongListener();
2418
+ this.#removePongListener = undefined;
2419
+ }
2420
+ this.#lastPingAt = 0;
2421
+ }
2422
+
2423
+ #failQueue(error: Error, closeReason: string): void {
2424
+ logCodexDebug("codex websocket transport failure", { error: error.message, closeReason });
2425
+ this.#queue.length = 0;
2426
+ this.#queue.push(error);
2427
+ this.close(closeReason);
2428
+ this.#wakeWaiters();
2429
+ }
2430
+
2431
+ /**
2432
+ * Discard data frames from a previous request that remained in `#queue`
2433
+ * after the consumer broke out on the terminal event. Preserves any queued
2434
+ * transport error (from `onerror` / `onclose` / `#failQueue`) so the next
2435
+ * `#nextMessage` surfaces the death signal instead of waiting it out.
2436
+ *
2437
+ * Returns the number of frames dropped (test/debug visibility only).
2438
+ */
2439
+ #dropStaleFrames(): number {
2440
+ if (this.#queue.length === 0) return 0;
2441
+ const surviving = this.#queue.filter(item => item instanceof Error);
2442
+ const dropped = this.#queue.length - surviving.length;
2443
+ if (dropped === 0) return 0;
2444
+ this.#queue.length = 0;
2445
+ for (const item of surviving) this.#queue.push(item);
2446
+ logCodexDebug("codex websocket dropped stale frames before request", { dropped });
2447
+ return dropped;
2448
+ }
2449
+
2450
+ #wakeWaiters(): void {
2451
+ for (;;) {
2452
+ const waiter = this.#waiters.shift();
2453
+ if (!waiter) break;
2454
+ waiter();
2455
+ }
2456
+ }
2457
+
2210
2458
  #push(item: Record<string, unknown> | Error | null): void {
2459
+ if (item instanceof Error) {
2460
+ if (!(this.#queue[0] instanceof Error)) {
2461
+ this.#queue.length = 0;
2462
+ }
2463
+ this.#queue.push(item);
2464
+ this.#wakeWaiters();
2465
+ return;
2466
+ }
2467
+ if (item !== null && this.#queue.length >= getCodexWebSocketMessageQueueCapacity()) {
2468
+ this.#failQueue(
2469
+ createCodexWebSocketTransportError(
2470
+ `websocket message queue exceeded ${getCodexWebSocketMessageQueueCapacity()} items`,
2471
+ ),
2472
+ "queue-overflow",
2473
+ );
2474
+ return;
2475
+ }
2211
2476
  this.#queue.push(item);
2212
2477
  const waiter = this.#waiters.shift();
2213
2478
  if (waiter) waiter();
@@ -2250,12 +2515,22 @@ async function getOrCreateCodexWebSocketConnection(
2250
2515
  ): Promise<CodexWebSocketConnection> {
2251
2516
  const headerRecord = headersToRecord(headers);
2252
2517
  if (state.connection?.isOpen()) {
2253
- if (state.connection.matchesAuth(headerRecord)) {
2518
+ if (!state.connection.matchesAuth(headerRecord)) {
2519
+ state.connection.close("token-refresh");
2520
+ resetCodexWebSocketAppendState(state);
2521
+ } else if (state.connection.isHealthyForReuse()) {
2254
2522
  logger.time("codexWs:reuseOpenSocket");
2255
2523
  return state.connection;
2524
+ } else {
2525
+ // Open in readyState but no inbound traffic recently — likely server-
2526
+ // evicted (issue #1450). Force a fresh handshake instead of writing
2527
+ // `response.create` into a half-open buffer and waiting out the
2528
+ // first-event timeout. Drop append state because the new socket
2529
+ // won't carry the prior `previous_response_id` context.
2530
+ logCodexDebug("codex websocket reuse rejected by health check", {});
2531
+ state.connection.close("stale-reuse");
2532
+ resetCodexWebSocketAppendState(state);
2256
2533
  }
2257
- state.connection.close("token-refresh");
2258
- resetCodexWebSocketAppendState(state);
2259
2534
  }
2260
2535
  state.connection?.close("reconnect");
2261
2536
  resetCodexWebSocketAppendState(state);
@@ -41,6 +41,7 @@ import { parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
41
41
  import { notifyProviderResponse } from "../utils/provider-response";
42
42
  import { callWithCopilotModelRetry } from "../utils/retry";
43
43
  import { adaptSchemaForStrict, NO_STRICT, sanitizeSchemaForOpenAIResponses, toolWireSchema } from "../utils/schema";
44
+ import { createSdkStreamRequestOptions } from "../utils/sdk-stream-timeout";
44
45
  import { wrapFetchForSseDebug } from "../utils/sse-debug";
45
46
  import { mapToOpenAIResponsesToolChoice, type OpenAIResponsesToolChoice } from "../utils/tool-choice";
46
47
  import {
@@ -208,10 +209,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
208
209
  };
209
210
  const openaiStream = await callWithCopilotModelRetry(
210
211
  async () => {
211
- const requestOptions =
212
- requestTimeoutMs === undefined
213
- ? { signal: requestSignal }
214
- : { signal: requestSignal, timeout: requestTimeoutMs };
212
+ const requestOptions = createSdkStreamRequestOptions(requestSignal, requestTimeoutMs);
215
213
  let requestTimeout: NodeJS.Timeout | undefined;
216
214
  if (requestTimeoutMs !== undefined) {
217
215
  requestTimeout = setTimeout(