@oh-my-pi/pi-ai 15.5.3 → 15.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,38 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [15.5.6] - 2026-05-27
|
|
6
|
+
### Added
|
|
7
|
+
|
|
8
|
+
- Added `PI_CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS` to control how long an idle Codex WebSocket stays eligible for reuse, with `0` disabling the check
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
|
|
12
|
+
- Fixed reused Codex WebSocket connections that had gone silent without activity to be dropped and replaced with a fresh handshake after the idle-reuse threshold, preventing stalled next requests
|
|
13
|
+
- Fixed stale response frames left in the websocket queue from a completed turn so subsequent requests no longer process terminal frames from the previous response
|
|
14
|
+
- Fixed websocket dead-socket detection to fail a stale connection when no inbound traffic or pong is observed after a ping timeout, improving recovery on runtimes that do not emit pong events
|
|
15
|
+
|
|
16
|
+
## [15.5.5] - 2026-05-27
|
|
17
|
+
|
|
18
|
+
### Added
|
|
19
|
+
|
|
20
|
+
- Added `PI_CODEX_WEBSOCKET_PING_INTERVAL_MS` to configure the interval for Codex WebSocket protocol ping heartbeats
|
|
21
|
+
- Added `PI_CODEX_WEBSOCKET_PONG_TIMEOUT_MS` to configure the Codex WebSocket pong timeout used to detect unresponsive connections
|
|
22
|
+
- Added `PI_CODEX_WEBSOCKET_MESSAGE_QUEUE_CAPACITY` to configure the maximum buffered Codex WebSocket inbound queue size before transport fallback
|
|
23
|
+
|
|
24
|
+
### Changed
|
|
25
|
+
|
|
26
|
+
- Improved Codex WebSocket timeout diagnostics to include last event type and time since last progress event
|
|
27
|
+
- Enhanced Codex WebSocket error classification to recognize ping, pong, send, and queue-overflow failures as retryable
|
|
28
|
+
|
|
29
|
+
### Fixed
|
|
30
|
+
|
|
31
|
+
- Fixed Codex WebSocket send failures by wrapping socket.send() in try-catch and surfacing errors as retryable transport errors
|
|
32
|
+
- Fixed Codex WebSocket inbound queue overflow by adding capacity bounds and triggering fallback to SSE when exceeded
|
|
33
|
+
- Fixed Codex WebSocket pong timeout detection by tracking pong events and failing the connection when no pong is received within the configured timeout
|
|
34
|
+
- Fixed Anthropic streaming to suppress hallucinated meta-prompt thinking blocks (the recent "I don't see any current rewritten thinking..." regression). When the marker phrase `rewritten thinking` appears in a streamed thinking summary the block is collapsed to a plain `Thinking...` placeholder and its signature is dropped so subsequent turns can't re-anchor on the garbled chain.
|
|
35
|
+
- Fixed Codex WebSocket silent stalls by adding protocol pings, inbound queue bounding, clearer idle-timeout diagnostics, and SDK retry clamping for first-event timeouts.
|
|
36
|
+
|
|
5
37
|
## [15.5.0] - 2026-05-26
|
|
6
38
|
### Added
|
|
7
39
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-ai",
|
|
4
|
-
"version": "15.5.
|
|
4
|
+
"version": "15.5.6",
|
|
5
5
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -40,7 +40,7 @@
|
|
|
40
40
|
"dependencies": {
|
|
41
41
|
"@anthropic-ai/sdk": "^0.94.0",
|
|
42
42
|
"@bufbuild/protobuf": "^2.12.0",
|
|
43
|
-
"@oh-my-pi/pi-utils": "15.5.
|
|
43
|
+
"@oh-my-pi/pi-utils": "15.5.6",
|
|
44
44
|
"openai": "^6.36.0",
|
|
45
45
|
"partial-json": "^0.1.7",
|
|
46
46
|
"zod": "4.4.3"
|
|
@@ -66,6 +66,7 @@ import { notifyProviderResponse } from "../utils/provider-response";
|
|
|
66
66
|
import { isCopilotTransientModelError } from "../utils/retry";
|
|
67
67
|
import { COMBINATOR_KEYS, NO_STRICT, toolWireSchema } from "../utils/schema";
|
|
68
68
|
import { spillToDescription } from "../utils/schema/spill";
|
|
69
|
+
import { createSdkStreamRequestOptions } from "../utils/sdk-stream-timeout";
|
|
69
70
|
import { notifyRawSseEvent, wrapFetchForSseDebug } from "../utils/sse-debug";
|
|
70
71
|
import {
|
|
71
72
|
buildCopilotDynamicHeaders,
|
|
@@ -547,6 +548,17 @@ function convertContentBlocks(
|
|
|
547
548
|
return blocks;
|
|
548
549
|
}
|
|
549
550
|
|
|
551
|
+
/**
|
|
552
|
+
* Marker phrase that Claude has been observed to hallucinate inside reasoning summaries
|
|
553
|
+
* (e.g. "I don't see any current rewritten thinking or next thinking to process. Could
|
|
554
|
+
* you provide..."). When this substring appears in a streamed thinking block we collapse
|
|
555
|
+
* the entire block to {@link BROKEN_THINKING_REPLACEMENT} and drop the signature so
|
|
556
|
+
* downstream UI/transcripts don't surface the meta-prompt and replay can't re-anchor on
|
|
557
|
+
* the garbled chain.
|
|
558
|
+
*/
|
|
559
|
+
const BROKEN_THINKING_MARKER = "rewritten thinking";
|
|
560
|
+
const BROKEN_THINKING_REPLACEMENT = "Thinking...";
|
|
561
|
+
|
|
550
562
|
export type AnthropicEffort = "low" | "medium" | "high" | "xhigh" | "max";
|
|
551
563
|
export type AnthropicThinkingDisplay = "summarized" | "omitted";
|
|
552
564
|
|
|
@@ -1096,6 +1108,12 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1096
1108
|
const requestTimeoutMs =
|
|
1097
1109
|
firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0 ? firstEventTimeoutMs : undefined;
|
|
1098
1110
|
const blocks = output.content as Block[];
|
|
1111
|
+
// Recent Claude releases occasionally hallucinate meta-prompts asking the operator
|
|
1112
|
+
// to supply "rewritten thinking" / "next thinking" as reasoning content. The summary
|
|
1113
|
+
// is useless and confuses the UI, so we collapse any thinking block whose stream
|
|
1114
|
+
// contains the marker phrase down to a plain "Thinking..." placeholder and drop the
|
|
1115
|
+
// (now invalid) signature so subsequent turns don't replay the garbled chain.
|
|
1116
|
+
const suppressedThinkingBlocks = new WeakSet<Block>();
|
|
1099
1117
|
stream.push({ type: "start", partial: output });
|
|
1100
1118
|
// Retry loop for transient errors from the stream.
|
|
1101
1119
|
// Provider-level transport/rate-limit failures: only before any streamed content starts.
|
|
@@ -1106,10 +1124,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1106
1124
|
while (true) {
|
|
1107
1125
|
activeAbortTracker = createAbortSourceTracker(options?.signal);
|
|
1108
1126
|
const { requestSignal } = activeAbortTracker;
|
|
1109
|
-
const requestOptions =
|
|
1110
|
-
requestTimeoutMs === undefined
|
|
1111
|
-
? { signal: requestSignal }
|
|
1112
|
-
: { signal: requestSignal, timeout: requestTimeoutMs };
|
|
1127
|
+
const requestOptions = createSdkStreamRequestOptions(requestSignal, requestTimeoutMs);
|
|
1113
1128
|
const anthropicRequest = client.messages.create({ ...params, stream: true }, requestOptions);
|
|
1114
1129
|
let streamedReplayUnsafeContent = false;
|
|
1115
1130
|
|
|
@@ -1253,7 +1268,14 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1253
1268
|
const index = blocks.findIndex(b => b.index === event.index);
|
|
1254
1269
|
const block = blocks[index];
|
|
1255
1270
|
if (block && block.type === "thinking") {
|
|
1271
|
+
if (suppressedThinkingBlocks.has(block)) continue;
|
|
1256
1272
|
block.thinking += event.delta.thinking;
|
|
1273
|
+
if (block.thinking.includes(BROKEN_THINKING_MARKER)) {
|
|
1274
|
+
suppressedThinkingBlocks.add(block);
|
|
1275
|
+
block.thinking = BROKEN_THINKING_REPLACEMENT;
|
|
1276
|
+
block.thinkingSignature = "";
|
|
1277
|
+
continue;
|
|
1278
|
+
}
|
|
1257
1279
|
stream.push({
|
|
1258
1280
|
type: "thinking_delta",
|
|
1259
1281
|
contentIndex: index,
|
|
@@ -1277,7 +1299,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1277
1299
|
} else if (event.delta.type === "signature_delta") {
|
|
1278
1300
|
const index = blocks.findIndex(b => b.index === event.index);
|
|
1279
1301
|
const block = blocks[index];
|
|
1280
|
-
if (block && block.type === "thinking") {
|
|
1302
|
+
if (block && block.type === "thinking" && !suppressedThinkingBlocks.has(block)) {
|
|
1281
1303
|
block.thinkingSignature = block.thinkingSignature || "";
|
|
1282
1304
|
block.thinkingSignature += event.delta.signature;
|
|
1283
1305
|
}
|
|
@@ -1295,6 +1317,14 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1295
1317
|
partial: output,
|
|
1296
1318
|
});
|
|
1297
1319
|
} else if (block.type === "thinking") {
|
|
1320
|
+
if (
|
|
1321
|
+
!suppressedThinkingBlocks.has(block) &&
|
|
1322
|
+
block.thinking.includes(BROKEN_THINKING_MARKER)
|
|
1323
|
+
) {
|
|
1324
|
+
suppressedThinkingBlocks.add(block);
|
|
1325
|
+
block.thinking = BROKEN_THINKING_REPLACEMENT;
|
|
1326
|
+
block.thinkingSignature = "";
|
|
1327
|
+
}
|
|
1298
1328
|
stream.push({
|
|
1299
1329
|
type: "thinking_end",
|
|
1300
1330
|
contentIndex: index,
|
|
@@ -93,6 +93,23 @@ const CODEX_DEBUG = $flag("PI_CODEX_DEBUG");
|
|
|
93
93
|
const CODEX_MAX_RETRIES = 5;
|
|
94
94
|
const CODEX_RETRY_DELAY_MS = 500;
|
|
95
95
|
const CODEX_WEBSOCKET_CONNECT_TIMEOUT_MS = 10000;
|
|
96
|
+
const CODEX_WEBSOCKET_PING_INTERVAL_MS = 10_000;
|
|
97
|
+
const CODEX_WEBSOCKET_PONG_TIMEOUT_MS = 60_000;
|
|
98
|
+
const CODEX_WEBSOCKET_MESSAGE_QUEUE_CAPACITY = 4096;
|
|
99
|
+
/**
|
|
100
|
+
* Maximum quiet period (no inbound frames AND no observed pong) we'll trust a
|
|
101
|
+
* reused WebSocket for before forcing a fresh handshake. Codex backends and
|
|
102
|
+
* intermediaries occasionally evict idle sockets server-side without sending a
|
|
103
|
+
* FIN, leaving the local `readyState` as OPEN while the next `send()` becomes a
|
|
104
|
+
* write into a half-open buffer. Reusing such a socket parks the next request
|
|
105
|
+
* at `#nextMessage` until the first-event/idle timeout fires (issue #1450). The
|
|
106
|
+
* heartbeat below also catches dead sockets, but only after `pongTimeoutMs`
|
|
107
|
+
* (default 60s) and only while a request is active — this gate closes the door
|
|
108
|
+
* earlier and even when the gap between requests is purely client-side (tool
|
|
109
|
+
* execution, user typing, etc.). Set `PI_CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS=0`
|
|
110
|
+
* to disable.
|
|
111
|
+
*/
|
|
112
|
+
const CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS = 30_000;
|
|
96
113
|
/**
|
|
97
114
|
* Steady-state liveness ceiling for the Codex WebSocket transport. Distinct from
|
|
98
115
|
* the OMP-wide stream watchdog removed in #1392: a WebSocket can stay TCP-open
|
|
@@ -132,6 +149,24 @@ function isCodexStreamProgressEvent(event: unknown): boolean {
|
|
|
132
149
|
return typeof type === "string" && CODEX_ADDITIONAL_PROGRESS_EVENT_TYPES.has(type);
|
|
133
150
|
}
|
|
134
151
|
|
|
152
|
+
type CodexWebSocketTimeoutDetails = {
|
|
153
|
+
lastEventAt: number;
|
|
154
|
+
lastEventType?: string;
|
|
155
|
+
lastProgressAt: number;
|
|
156
|
+
lastProgressEventType?: string;
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
function createCodexWebSocketTimeoutMessage(reason: string, details: CodexWebSocketTimeoutDetails): string {
|
|
160
|
+
const now = Date.now();
|
|
161
|
+
const lastEvent = details.lastEventType
|
|
162
|
+
? `${details.lastEventType} ${Math.max(0, now - details.lastEventAt)}ms ago`
|
|
163
|
+
: "none";
|
|
164
|
+
const lastProgress = details.lastProgressEventType
|
|
165
|
+
? `${details.lastProgressEventType} ${Math.max(0, now - details.lastProgressAt)}ms ago`
|
|
166
|
+
: "none";
|
|
167
|
+
return `${reason} (last event: ${lastEvent}; last progress: ${lastProgress})`;
|
|
168
|
+
}
|
|
169
|
+
|
|
135
170
|
type CodexTransport = "sse" | "websocket";
|
|
136
171
|
type CodexEventItem = ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall | ResponseCustomToolCall;
|
|
137
172
|
type CodexOutputBlock = ThinkingContent | TextContent | (ToolCall & { partialJson: string });
|
|
@@ -254,6 +289,25 @@ function getCodexWebSocketFirstEventTimeoutMs(): number {
|
|
|
254
289
|
);
|
|
255
290
|
}
|
|
256
291
|
|
|
292
|
+
function getCodexWebSocketPingIntervalMs(): number {
|
|
293
|
+
return parseCodexNonNegativeInteger($env.PI_CODEX_WEBSOCKET_PING_INTERVAL_MS, CODEX_WEBSOCKET_PING_INTERVAL_MS);
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
function getCodexWebSocketPongTimeoutMs(): number {
|
|
297
|
+
return parseCodexNonNegativeInteger($env.PI_CODEX_WEBSOCKET_PONG_TIMEOUT_MS, CODEX_WEBSOCKET_PONG_TIMEOUT_MS);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
function getCodexWebSocketMessageQueueCapacity(): number {
|
|
301
|
+
return parseCodexPositiveInteger(
|
|
302
|
+
$env.PI_CODEX_WEBSOCKET_MESSAGE_QUEUE_CAPACITY,
|
|
303
|
+
CODEX_WEBSOCKET_MESSAGE_QUEUE_CAPACITY,
|
|
304
|
+
);
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
function getCodexWebSocketMaxIdleReuseMs(): number {
|
|
308
|
+
return parseCodexNonNegativeInteger($env.PI_CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS, CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS);
|
|
309
|
+
}
|
|
310
|
+
|
|
257
311
|
function createCodexProviderSessionState(): CodexProviderSessionState {
|
|
258
312
|
const state: CodexProviderSessionState = {
|
|
259
313
|
webSocketSessions: new Map(),
|
|
@@ -301,6 +355,10 @@ function isCodexWebSocketRetryableStreamError(error: unknown): boolean {
|
|
|
301
355
|
message.includes("websocket closed (") ||
|
|
302
356
|
message.includes("websocket closed before response completion") ||
|
|
303
357
|
message.includes("websocket connection is unavailable") ||
|
|
358
|
+
message.includes("websocket send failed") ||
|
|
359
|
+
message.includes("websocket ping failed") ||
|
|
360
|
+
message.includes("websocket pong timeout") ||
|
|
361
|
+
message.includes("websocket message queue exceeded") ||
|
|
304
362
|
message.includes("idle timeout waiting for websocket") ||
|
|
305
363
|
message.includes("timeout waiting for first websocket event") ||
|
|
306
364
|
message.includes("syntaxerror") ||
|
|
@@ -1986,6 +2044,17 @@ class CodexWebSocketConnection {
|
|
|
1986
2044
|
#connectPromise?: Promise<void>;
|
|
1987
2045
|
#activeRequest = false;
|
|
1988
2046
|
#streamObserver?: (event: RawSseEvent) => void;
|
|
2047
|
+
#heartbeatInterval: NodeJS.Timeout | undefined;
|
|
2048
|
+
#removePongListener?: () => void;
|
|
2049
|
+
/**
|
|
2050
|
+
* Wall-clock of the most recent inbound activity on this socket — any
|
|
2051
|
+
* decoded message, any pong, or the moment the handshake completed. Used
|
|
2052
|
+
* by {@link isHealthyForReuse} so we don't write a continuation frame into
|
|
2053
|
+
* a TCP-open-but-server-evicted socket whose `readyState` still says OPEN.
|
|
2054
|
+
*/
|
|
2055
|
+
#lastInboundAt = 0;
|
|
2056
|
+
/** Wall-clock of the last heartbeat ping we issued; 0 if none yet. */
|
|
2057
|
+
#lastPingAt = 0;
|
|
1989
2058
|
|
|
1990
2059
|
constructor(url: string, headers: Record<string, string>, options: CodexWebSocketConnectionOptions) {
|
|
1991
2060
|
this.#url = url;
|
|
@@ -1997,6 +2066,29 @@ class CodexWebSocketConnection {
|
|
|
1997
2066
|
return this.#socket?.readyState === WebSocket.OPEN;
|
|
1998
2067
|
}
|
|
1999
2068
|
|
|
2069
|
+
/**
|
|
2070
|
+
* Stricter variant of {@link isOpen} for the connection-pool reuse gate.
|
|
2071
|
+
* Refuses sockets that have been silent past {@link CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS}.
|
|
2072
|
+
*
|
|
2073
|
+
* Bun's `WebSocket` does not always surface server-side eviction (no
|
|
2074
|
+
* `onclose`, no `onerror`), so a socket can sit in readyState OPEN long
|
|
2075
|
+
* after the upstream has dropped it. Reusing such a socket sends the next
|
|
2076
|
+
* `response.create` into a half-open write buffer and parks the reader
|
|
2077
|
+
* until the first-event / idle timeout fires (issue #1450). Forcing a
|
|
2078
|
+
* reconnect on any suspect socket trades a sub-second handshake for a
|
|
2079
|
+
* 60–300 s stall.
|
|
2080
|
+
*/
|
|
2081
|
+
isHealthyForReuse(): boolean {
|
|
2082
|
+
if (!this.isOpen()) return false;
|
|
2083
|
+
const maxIdleMs = getCodexWebSocketMaxIdleReuseMs();
|
|
2084
|
+
if (maxIdleMs <= 0) return true;
|
|
2085
|
+
// Initial connect sets #lastInboundAt; any later message or pong refreshes
|
|
2086
|
+
// it. A zero value means the field was never initialized, which itself is
|
|
2087
|
+
// a desync — treat as unhealthy.
|
|
2088
|
+
if (this.#lastInboundAt === 0) return false;
|
|
2089
|
+
return Date.now() - this.#lastInboundAt <= maxIdleMs;
|
|
2090
|
+
}
|
|
2091
|
+
|
|
2000
2092
|
matchesAuth(headers: Record<string, string>): boolean {
|
|
2001
2093
|
return this.#headers.authorization === headers.authorization;
|
|
2002
2094
|
}
|
|
@@ -2009,6 +2101,7 @@ class CodexWebSocketConnection {
|
|
|
2009
2101
|
this.#socket.close(1000, reason);
|
|
2010
2102
|
}
|
|
2011
2103
|
this.#socket = null;
|
|
2104
|
+
this.#stopHeartbeat();
|
|
2012
2105
|
}
|
|
2013
2106
|
|
|
2014
2107
|
async connect(signal?: AbortSignal): Promise<void> {
|
|
@@ -2058,7 +2151,9 @@ class CodexWebSocketConnection {
|
|
|
2058
2151
|
if (!settled) {
|
|
2059
2152
|
settled = true;
|
|
2060
2153
|
clearPending();
|
|
2154
|
+
this.#lastInboundAt = Date.now();
|
|
2061
2155
|
this.#captureHandshakeHeaders(socket, event);
|
|
2156
|
+
this.#startHeartbeat(socket);
|
|
2062
2157
|
resolve();
|
|
2063
2158
|
}
|
|
2064
2159
|
};
|
|
@@ -2079,6 +2174,7 @@ class CodexWebSocketConnection {
|
|
|
2079
2174
|
};
|
|
2080
2175
|
socket.onclose = event => {
|
|
2081
2176
|
this.#socket = null;
|
|
2177
|
+
this.#stopHeartbeat();
|
|
2082
2178
|
if (!settled) {
|
|
2083
2179
|
settled = true;
|
|
2084
2180
|
clearPending();
|
|
@@ -2089,6 +2185,10 @@ class CodexWebSocketConnection {
|
|
|
2089
2185
|
this.#push(null);
|
|
2090
2186
|
};
|
|
2091
2187
|
socket.onmessage = event => {
|
|
2188
|
+
// Stamp inbound activity before parsing so even malformed frames refresh
|
|
2189
|
+
// the liveness clock — what matters for reuse health is that the upstream
|
|
2190
|
+
// is still talking to us, not that every frame is well-formed.
|
|
2191
|
+
this.#lastInboundAt = Date.now();
|
|
2092
2192
|
try {
|
|
2093
2193
|
const text = typeof event.data === "string" ? event.data : Buffer.from(event.data).toString("utf-8");
|
|
2094
2194
|
if (!text) return;
|
|
@@ -2132,6 +2232,17 @@ class CodexWebSocketConnection {
|
|
|
2132
2232
|
}
|
|
2133
2233
|
this.#activeRequest = true;
|
|
2134
2234
|
this.#streamObserver = onSseEvent;
|
|
2235
|
+
// Drain any non-error frames left over from a prior request before sending.
|
|
2236
|
+
// `processCodexResponseStream` breaks its `for-await` on the terminal event,
|
|
2237
|
+
// which interrupts our generator at `yield next` (the post-yield `break`
|
|
2238
|
+
// never runs). Any frame that landed between the consumer's break and the
|
|
2239
|
+
// generator's `finally` lingers in `#queue` and would otherwise become the
|
|
2240
|
+
// first frame of THIS request — a stale `response.completed` would end the
|
|
2241
|
+
// turn immediately with empty output, and a stale non-progress frame would
|
|
2242
|
+
// flip `sawFirstEvent` and silently downgrade the first-event timeout to
|
|
2243
|
+
// the longer idle timeout. Transport errors are preserved so we surface
|
|
2244
|
+
// the death signal instead of writing into a dead socket.
|
|
2245
|
+
this.#dropStaleFrames();
|
|
2135
2246
|
const onAbort = () => {
|
|
2136
2247
|
this.close("aborted");
|
|
2137
2248
|
this.#push(createCodexWebSocketTransportError("request was aborted"));
|
|
@@ -2147,23 +2258,48 @@ class CodexWebSocketConnection {
|
|
|
2147
2258
|
try {
|
|
2148
2259
|
const requestPayload = JSON.stringify(request);
|
|
2149
2260
|
notifyCodexWebSocketOutbound(onSseEvent, request, requestPayload);
|
|
2150
|
-
|
|
2261
|
+
try {
|
|
2262
|
+
this.#socket.send(requestPayload);
|
|
2263
|
+
} catch (error) {
|
|
2264
|
+
throw createCodexWebSocketTransportError(
|
|
2265
|
+
`websocket send failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
2266
|
+
);
|
|
2267
|
+
}
|
|
2151
2268
|
let sawFirstEvent = false;
|
|
2152
2269
|
const { idleTimeoutMs, firstEventTimeoutMs } = timeouts;
|
|
2153
2270
|
let lastProgressAt = Date.now();
|
|
2271
|
+
let lastProgressEventType: string | undefined;
|
|
2272
|
+
let lastEventAt = lastProgressAt;
|
|
2273
|
+
let lastEventType: string | undefined;
|
|
2154
2274
|
while (true) {
|
|
2155
2275
|
let timeoutMs: number | undefined;
|
|
2156
2276
|
let timeoutReason: string;
|
|
2157
2277
|
if (sawFirstEvent) {
|
|
2158
|
-
timeoutReason = "idle timeout waiting for websocket"
|
|
2278
|
+
timeoutReason = createCodexWebSocketTimeoutMessage("idle timeout waiting for websocket", {
|
|
2279
|
+
lastEventAt,
|
|
2280
|
+
lastEventType,
|
|
2281
|
+
lastProgressAt,
|
|
2282
|
+
lastProgressEventType,
|
|
2283
|
+
});
|
|
2159
2284
|
if (idleTimeoutMs !== undefined && idleTimeoutMs > 0) {
|
|
2160
2285
|
timeoutMs = idleTimeoutMs - (Date.now() - lastProgressAt);
|
|
2161
2286
|
if (timeoutMs <= 0) {
|
|
2287
|
+
logCodexDebug("codex websocket idle timeout", {
|
|
2288
|
+
lastEventType,
|
|
2289
|
+
lastProgressEventType,
|
|
2290
|
+
msSinceLastEvent: Date.now() - lastEventAt,
|
|
2291
|
+
msSinceLastProgress: Date.now() - lastProgressAt,
|
|
2292
|
+
});
|
|
2162
2293
|
throw createCodexWebSocketTransportError(timeoutReason);
|
|
2163
2294
|
}
|
|
2164
2295
|
}
|
|
2165
2296
|
} else {
|
|
2166
|
-
timeoutReason = "timeout waiting for first websocket event"
|
|
2297
|
+
timeoutReason = createCodexWebSocketTimeoutMessage("timeout waiting for first websocket event", {
|
|
2298
|
+
lastEventAt,
|
|
2299
|
+
lastEventType,
|
|
2300
|
+
lastProgressAt,
|
|
2301
|
+
lastProgressEventType,
|
|
2302
|
+
});
|
|
2167
2303
|
if (firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0) {
|
|
2168
2304
|
timeoutMs = firstEventTimeoutMs;
|
|
2169
2305
|
}
|
|
@@ -2176,11 +2312,14 @@ class CodexWebSocketConnection {
|
|
|
2176
2312
|
throw createCodexWebSocketTransportError("websocket closed before response completion");
|
|
2177
2313
|
}
|
|
2178
2314
|
sawFirstEvent = true;
|
|
2315
|
+
const eventType = typeof next.type === "string" ? next.type : "";
|
|
2316
|
+
lastEventAt = Date.now();
|
|
2317
|
+
lastEventType = eventType || undefined;
|
|
2179
2318
|
if (isCodexStreamProgressEvent(next)) {
|
|
2180
|
-
lastProgressAt =
|
|
2319
|
+
lastProgressAt = lastEventAt;
|
|
2320
|
+
lastProgressEventType = lastEventType;
|
|
2181
2321
|
}
|
|
2182
2322
|
yield next;
|
|
2183
|
-
const eventType = typeof next.type === "string" ? next.type : "";
|
|
2184
2323
|
if (
|
|
2185
2324
|
eventType === "response.completed" ||
|
|
2186
2325
|
eventType === "response.done" ||
|
|
@@ -2207,7 +2346,133 @@ class CodexWebSocketConnection {
|
|
|
2207
2346
|
this.#onHandshakeHeaders(headers);
|
|
2208
2347
|
}
|
|
2209
2348
|
|
|
2349
|
+
#startHeartbeat(socket: Bun.WebSocket): void {
|
|
2350
|
+
this.#stopHeartbeat();
|
|
2351
|
+
const intervalMs = getCodexWebSocketPingIntervalMs();
|
|
2352
|
+
if (intervalMs <= 0) return;
|
|
2353
|
+
|
|
2354
|
+
this.#lastPingAt = 0;
|
|
2355
|
+
const socketEventTarget = socket as EventTarget;
|
|
2356
|
+
const onPong = () => {
|
|
2357
|
+
// Pongs are inbound activity — refresh the reuse-health clock so a quiet
|
|
2358
|
+
// but ping-responsive socket stays trustworthy across requests.
|
|
2359
|
+
this.#lastInboundAt = Date.now();
|
|
2360
|
+
};
|
|
2361
|
+
if (
|
|
2362
|
+
typeof socketEventTarget.addEventListener === "function" &&
|
|
2363
|
+
typeof socketEventTarget.removeEventListener === "function"
|
|
2364
|
+
) {
|
|
2365
|
+
socketEventTarget.addEventListener("pong", onPong);
|
|
2366
|
+
this.#removePongListener = () => socketEventTarget.removeEventListener("pong", onPong);
|
|
2367
|
+
}
|
|
2368
|
+
|
|
2369
|
+
this.#heartbeatInterval = setInterval(() => {
|
|
2370
|
+
if (this.#socket !== socket || socket.readyState !== WebSocket.OPEN) {
|
|
2371
|
+
this.#stopHeartbeat();
|
|
2372
|
+
return;
|
|
2373
|
+
}
|
|
2374
|
+
// Fail-closed on missing pongs even when no pong has ever been observed.
|
|
2375
|
+
// The previous `#observedPong &&` guard disabled the timeout entirely on
|
|
2376
|
+
// runtimes where Bun does not surface a `pong` event for our outgoing
|
|
2377
|
+
// pings (issue #1450) — letting truly dead sockets sail through the
|
|
2378
|
+
// pool until the per-request first-event / idle timeout (60–300 s)
|
|
2379
|
+
// finally fired. Instead, trigger on inbound silence: if we sent a
|
|
2380
|
+
// ping at least `pongTimeoutMs` ago and have received no traffic of
|
|
2381
|
+
// any kind (data frame or pong) since, the socket is unhealthy.
|
|
2382
|
+
const pongTimeoutMs = getCodexWebSocketPongTimeoutMs();
|
|
2383
|
+
if (
|
|
2384
|
+
pongTimeoutMs > 0 &&
|
|
2385
|
+
this.#lastPingAt > 0 &&
|
|
2386
|
+
this.#lastPingAt > this.#lastInboundAt &&
|
|
2387
|
+
Date.now() - this.#lastPingAt > pongTimeoutMs
|
|
2388
|
+
) {
|
|
2389
|
+
this.#failQueue(createCodexWebSocketTransportError("websocket pong timeout"), "pong-timeout");
|
|
2390
|
+
return;
|
|
2391
|
+
}
|
|
2392
|
+
if (typeof socket.ping !== "function") {
|
|
2393
|
+
this.#stopHeartbeat();
|
|
2394
|
+
return;
|
|
2395
|
+
}
|
|
2396
|
+
try {
|
|
2397
|
+
socket.ping();
|
|
2398
|
+
this.#lastPingAt = Date.now();
|
|
2399
|
+
} catch (error) {
|
|
2400
|
+
this.#failQueue(
|
|
2401
|
+
createCodexWebSocketTransportError(
|
|
2402
|
+
`websocket ping failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
2403
|
+
),
|
|
2404
|
+
"ping-failed",
|
|
2405
|
+
);
|
|
2406
|
+
}
|
|
2407
|
+
}, intervalMs);
|
|
2408
|
+
this.#heartbeatInterval.unref();
|
|
2409
|
+
}
|
|
2410
|
+
|
|
2411
|
+
#stopHeartbeat(): void {
|
|
2412
|
+
if (this.#heartbeatInterval) {
|
|
2413
|
+
clearInterval(this.#heartbeatInterval);
|
|
2414
|
+
this.#heartbeatInterval = undefined;
|
|
2415
|
+
}
|
|
2416
|
+
if (this.#removePongListener) {
|
|
2417
|
+
this.#removePongListener();
|
|
2418
|
+
this.#removePongListener = undefined;
|
|
2419
|
+
}
|
|
2420
|
+
this.#lastPingAt = 0;
|
|
2421
|
+
}
|
|
2422
|
+
|
|
2423
|
+
#failQueue(error: Error, closeReason: string): void {
|
|
2424
|
+
logCodexDebug("codex websocket transport failure", { error: error.message, closeReason });
|
|
2425
|
+
this.#queue.length = 0;
|
|
2426
|
+
this.#queue.push(error);
|
|
2427
|
+
this.close(closeReason);
|
|
2428
|
+
this.#wakeWaiters();
|
|
2429
|
+
}
|
|
2430
|
+
|
|
2431
|
+
/**
|
|
2432
|
+
* Discard data frames from a previous request that remained in `#queue`
|
|
2433
|
+
* after the consumer broke out on the terminal event. Preserves any queued
|
|
2434
|
+
* transport error (from `onerror` / `onclose` / `#failQueue`) so the next
|
|
2435
|
+
* `#nextMessage` surfaces the death signal instead of waiting it out.
|
|
2436
|
+
*
|
|
2437
|
+
* Returns the number of frames dropped (test/debug visibility only).
|
|
2438
|
+
*/
|
|
2439
|
+
#dropStaleFrames(): number {
|
|
2440
|
+
if (this.#queue.length === 0) return 0;
|
|
2441
|
+
const surviving = this.#queue.filter(item => item instanceof Error);
|
|
2442
|
+
const dropped = this.#queue.length - surviving.length;
|
|
2443
|
+
if (dropped === 0) return 0;
|
|
2444
|
+
this.#queue.length = 0;
|
|
2445
|
+
for (const item of surviving) this.#queue.push(item);
|
|
2446
|
+
logCodexDebug("codex websocket dropped stale frames before request", { dropped });
|
|
2447
|
+
return dropped;
|
|
2448
|
+
}
|
|
2449
|
+
|
|
2450
|
+
#wakeWaiters(): void {
|
|
2451
|
+
for (;;) {
|
|
2452
|
+
const waiter = this.#waiters.shift();
|
|
2453
|
+
if (!waiter) break;
|
|
2454
|
+
waiter();
|
|
2455
|
+
}
|
|
2456
|
+
}
|
|
2457
|
+
|
|
2210
2458
|
#push(item: Record<string, unknown> | Error | null): void {
|
|
2459
|
+
if (item instanceof Error) {
|
|
2460
|
+
if (!(this.#queue[0] instanceof Error)) {
|
|
2461
|
+
this.#queue.length = 0;
|
|
2462
|
+
}
|
|
2463
|
+
this.#queue.push(item);
|
|
2464
|
+
this.#wakeWaiters();
|
|
2465
|
+
return;
|
|
2466
|
+
}
|
|
2467
|
+
if (item !== null && this.#queue.length >= getCodexWebSocketMessageQueueCapacity()) {
|
|
2468
|
+
this.#failQueue(
|
|
2469
|
+
createCodexWebSocketTransportError(
|
|
2470
|
+
`websocket message queue exceeded ${getCodexWebSocketMessageQueueCapacity()} items`,
|
|
2471
|
+
),
|
|
2472
|
+
"queue-overflow",
|
|
2473
|
+
);
|
|
2474
|
+
return;
|
|
2475
|
+
}
|
|
2211
2476
|
this.#queue.push(item);
|
|
2212
2477
|
const waiter = this.#waiters.shift();
|
|
2213
2478
|
if (waiter) waiter();
|
|
@@ -2250,12 +2515,22 @@ async function getOrCreateCodexWebSocketConnection(
|
|
|
2250
2515
|
): Promise<CodexWebSocketConnection> {
|
|
2251
2516
|
const headerRecord = headersToRecord(headers);
|
|
2252
2517
|
if (state.connection?.isOpen()) {
|
|
2253
|
-
if (state.connection.matchesAuth(headerRecord)) {
|
|
2518
|
+
if (!state.connection.matchesAuth(headerRecord)) {
|
|
2519
|
+
state.connection.close("token-refresh");
|
|
2520
|
+
resetCodexWebSocketAppendState(state);
|
|
2521
|
+
} else if (state.connection.isHealthyForReuse()) {
|
|
2254
2522
|
logger.time("codexWs:reuseOpenSocket");
|
|
2255
2523
|
return state.connection;
|
|
2524
|
+
} else {
|
|
2525
|
+
// Open in readyState but no inbound traffic recently — likely server-
|
|
2526
|
+
// evicted (issue #1450). Force a fresh handshake instead of writing
|
|
2527
|
+
// `response.create` into a half-open buffer and waiting out the
|
|
2528
|
+
// first-event timeout. Drop append state because the new socket
|
|
2529
|
+
// won't carry the prior `previous_response_id` context.
|
|
2530
|
+
logCodexDebug("codex websocket reuse rejected by health check", {});
|
|
2531
|
+
state.connection.close("stale-reuse");
|
|
2532
|
+
resetCodexWebSocketAppendState(state);
|
|
2256
2533
|
}
|
|
2257
|
-
state.connection.close("token-refresh");
|
|
2258
|
-
resetCodexWebSocketAppendState(state);
|
|
2259
2534
|
}
|
|
2260
2535
|
state.connection?.close("reconnect");
|
|
2261
2536
|
resetCodexWebSocketAppendState(state);
|
|
@@ -41,6 +41,7 @@ import { parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
|
|
|
41
41
|
import { notifyProviderResponse } from "../utils/provider-response";
|
|
42
42
|
import { callWithCopilotModelRetry } from "../utils/retry";
|
|
43
43
|
import { adaptSchemaForStrict, NO_STRICT, sanitizeSchemaForOpenAIResponses, toolWireSchema } from "../utils/schema";
|
|
44
|
+
import { createSdkStreamRequestOptions } from "../utils/sdk-stream-timeout";
|
|
44
45
|
import { wrapFetchForSseDebug } from "../utils/sse-debug";
|
|
45
46
|
import { mapToOpenAIResponsesToolChoice, type OpenAIResponsesToolChoice } from "../utils/tool-choice";
|
|
46
47
|
import {
|
|
@@ -208,10 +209,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
|
|
|
208
209
|
};
|
|
209
210
|
const openaiStream = await callWithCopilotModelRetry(
|
|
210
211
|
async () => {
|
|
211
|
-
const requestOptions =
|
|
212
|
-
requestTimeoutMs === undefined
|
|
213
|
-
? { signal: requestSignal }
|
|
214
|
-
: { signal: requestSignal, timeout: requestTimeoutMs };
|
|
212
|
+
const requestOptions = createSdkStreamRequestOptions(requestSignal, requestTimeoutMs);
|
|
215
213
|
let requestTimeout: NodeJS.Timeout | undefined;
|
|
216
214
|
if (requestTimeoutMs !== undefined) {
|
|
217
215
|
requestTimeout = setTimeout(
|