@oh-my-pi/pi-ai 15.11.7 → 15.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,31 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [15.12.0] - 2026-06-12
6
+
7
+ ### Fixed
8
+
9
+ - Fixed Anthropic requests bypassing lone-surrogate sanitization after payload hooks or Anthropic-origin tool-call replay: the model itself can emit unpaired surrogate escapes in its own tool-argument JSON (streamed out fine, then rejected with `400 The request body is not valid JSON` on every subsequent request, bricking the session). The final Anthropic payload is now deep-sanitized with `toWellFormed()` immediately before SDK serialization; the pass is identity-preserving, so well-formed arguments stay byte-identical and prompt-cache prefixes are unaffected.
10
+
11
+ ## [15.11.8] - 2026-06-12
12
+
13
+ ### Breaking Changes
14
+
15
+ - Removed the Codex SSE stateful transport path, so SSE turns no longer send `previous_response_id` with delta input and now always send the full transcript
16
+
17
+ ### Changed
18
+
19
+ - Scoped `x-codex-turn-state` handling to within-turn continuations so only tool-loop follow-ups include the turn-state header and new user turns start without it
20
+
21
+ ### Removed
22
+
23
+ - Removed the `statefulResponses` option from `OpenAICodexResponsesOptions`, and SSE stateful mode is no longer controlled by the `PI_CODEX_STATEFUL`-style flag
24
+
25
+ ### Fixed
26
+
27
+ - Fixed the platform OpenAI Responses and Codex websocket stale-chain classifiers missing the "Unsupported parameter: previous_response_id" rejection phrasing (FastAPI-style `detail` body with no `error.code`), so a chained turn now falls back to a full-transcript replay instead of surfacing the 400
28
+ - Fixed the HTTP-400 raw-request dump for Codex SSE to record the body actually sent on the wire instead of the pre-transport request body, which made chained-request failures look like the rejected parameter was never sent
29
+
5
30
  ## [15.11.7] - 2026-06-12
6
31
 
7
32
  ### Added
@@ -3338,4 +3363,4 @@ _Dedicated to Peter's shoulder ([@steipete](https://twitter.com/steipete))_
3338
3363
 
3339
3364
  ## [0.9.4] - 2025-11-26
3340
3365
 
3341
- Initial release with multi-provider LLM support.
3366
+ Initial release with multi-provider LLM support.
@@ -11,12 +11,6 @@ export interface OpenAICodexResponsesOptions extends StreamOptions {
11
11
  codexMode?: boolean;
12
12
  toolChoice?: ToolChoice;
13
13
  preferWebsockets?: boolean;
14
- /**
15
- * Enable stateful SSE turns: chain via `previous_response_id` + delta input
16
- * instead of replaying the full transcript. Requires `sessionId` +
17
- * `providerSessionState`. `false` vetoes the `PI_CODEX_STATEFUL` env flag.
18
- */
19
- statefulResponses?: boolean;
20
14
  serviceTier?: ServiceTier;
21
15
  /**
22
16
  * Opt into the Responses Lite transport contract. Sends
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "15.11.7",
4
+ "version": "15.12.0",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -38,8 +38,8 @@
38
38
  },
39
39
  "dependencies": {
40
40
  "@bufbuild/protobuf": "^2.12.0",
41
- "@oh-my-pi/pi-catalog": "15.11.7",
42
- "@oh-my-pi/pi-utils": "15.11.7",
41
+ "@oh-my-pi/pi-catalog": "15.12.0",
42
+ "@oh-my-pi/pi-utils": "15.12.0",
43
43
  "openai": "^6.39.0",
44
44
  "partial-json": "^0.1.7",
45
45
  "zod": "4.4.3"
@@ -1639,6 +1639,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1639
1639
  if (replacementPayload !== undefined) {
1640
1640
  nextParams = replacementPayload as typeof nextParams;
1641
1641
  }
1642
+ nextParams = toWellFormedDeep(nextParams) as typeof nextParams;
1642
1643
  rawRequestDump = {
1643
1644
  provider: model.provider,
1644
1645
  api: output.api,
@@ -2911,11 +2912,22 @@ function ensureErrorToolResultWireContent(
2911
2912
  : [{ type: "text", text: EMPTY_ERROR_TOOL_RESULT_TEXT }];
2912
2913
  }
2913
2914
 
2914
- function buildToolResultBlock(model: Model<"anthropic-messages">, msg: ToolResultMessage): ContentBlockParam {
2915
- const content = ensureErrorToolResultWireContent(
2916
- convertContentBlocks(msg.content, model.input.includes("image")),
2917
- msg.isError,
2918
- );
2915
+ function buildToolResultBlock(
2916
+ model: Model<"anthropic-messages">,
2917
+ msg: ToolResultMessage,
2918
+ hoistedImages: ContentBlockParam[],
2919
+ ): ContentBlockParam {
2920
+ let content = convertContentBlocks(msg.content, model.input.includes("image"));
2921
+ // Anthropic rejects images inside error tool results ("all content must be
2922
+ // type `text` if `is_error` is true") — keep the text in the block and
2923
+ // hoist the images after the message's tool_result run.
2924
+ if (msg.isError && typeof content !== "string" && content.some(block => block.type === "image")) {
2925
+ for (const block of content) {
2926
+ if (block.type === "image") hoistedImages.push(block);
2927
+ }
2928
+ content = content.filter(block => block.type === "text");
2929
+ }
2930
+ content = ensureErrorToolResultWireContent(content, msg.isError);
2919
2931
  const block: ContentBlockParam = {
2920
2932
  type: "tool_result",
2921
2933
  tool_use_id: msg.toolCallId,
@@ -3064,13 +3076,12 @@ export function convertAnthropicMessages(
3064
3076
  type: "tool_use",
3065
3077
  id: block.id,
3066
3078
  name: isOAuthToken ? applyClaudeToolPrefix(block.name) : block.name,
3067
- // Anthropic-origin arguments are guaranteed well-formed (they came
3068
- // from the API's own JSON); cross-API replays can carry lone
3069
- // surrogates that Anthropic's strict UTF-8 validation rejects.
3070
- input:
3071
- msg.api === "anthropic-messages"
3072
- ? (block.arguments ?? {})
3073
- : toWellFormedDeep(block.arguments ?? {}),
3079
+ // Always sanitize: the model itself can emit lone-surrogate escapes
3080
+ // in tool-argument JSON (streamed out fine, rejected with a 400 on
3081
+ // replay by Anthropic's strict UTF-8 validation). toWellFormedDeep
3082
+ // is identity-preserving, so well-formed arguments stay
3083
+ // byte-identical and prompt-cache prefixes are unaffected.
3084
+ input: toWellFormedDeep(block.arguments ?? {}),
3074
3085
  });
3075
3086
  }
3076
3087
  }
@@ -3082,21 +3093,30 @@ export function convertAnthropicMessages(
3082
3093
  } else if (msg.role === "toolResult") {
3083
3094
  // Collect all consecutive toolResult messages, needed for z.ai Anthropic endpoint
3084
3095
  const toolResults: ContentBlockParam[] = [];
3096
+ // Images stripped out of error tool results, re-attached after the run.
3097
+ const hoistedImages: ContentBlockParam[] = [];
3085
3098
 
3086
3099
  // Add the current tool result
3087
- toolResults.push(buildToolResultBlock(model, msg));
3100
+ toolResults.push(buildToolResultBlock(model, msg, hoistedImages));
3088
3101
 
3089
3102
  // Look ahead for consecutive toolResult messages
3090
3103
  let j = i + 1;
3091
3104
  while (j < transformedMessages.length && transformedMessages[j].role === "toolResult") {
3092
3105
  const nextMsg = transformedMessages[j] as ToolResultMessage; // We know it's a toolResult
3093
- toolResults.push(buildToolResultBlock(model, nextMsg));
3106
+ toolResults.push(buildToolResultBlock(model, nextMsg, hoistedImages));
3094
3107
  j++;
3095
3108
  }
3096
3109
 
3097
3110
  // Skip the messages we've already processed
3098
3111
  i = j - 1;
3099
3112
 
3113
+ if (hoistedImages.length > 0) {
3114
+ toolResults.push(
3115
+ { type: "text", text: "Attached image(s) from the tool result(s) above:" },
3116
+ ...hoistedImages,
3117
+ );
3118
+ }
3119
+
3100
3120
  // Add a single user message with all tool results
3101
3121
  params.push({
3102
3122
  role: "user",
@@ -96,12 +96,6 @@ export interface OpenAICodexResponsesOptions extends StreamOptions {
96
96
  codexMode?: boolean;
97
97
  toolChoice?: ToolChoice;
98
98
  preferWebsockets?: boolean;
99
- /**
100
- * Enable stateful SSE turns: chain via `previous_response_id` + delta input
101
- * instead of replaying the full transcript. Requires `sessionId` +
102
- * `providerSessionState`. `false` vetoes the `PI_CODEX_STATEFUL` env flag.
103
- */
104
- statefulResponses?: boolean;
105
99
  serviceTier?: ServiceTier;
106
100
  /**
107
101
  * Opt into the Responses Lite transport contract. Sends
@@ -190,9 +184,6 @@ const CODEX_WHITESPACE_TOOL_CALL_ARGUMENT_DELTA_EVENT_LIMIT = 256;
190
184
  const CODEX_WHITESPACE_TOOL_CALL_ARGUMENT_DELTA_CHAR_LIMIT = 16 * 1024;
191
185
  const CODEX_WHITESPACE_LOOP_RETRY_LIMIT = 2;
192
186
  const CODEX_WHITESPACE_LOOP_RETRY_DELAY_MS = 250;
193
- /** Consecutive stale-previous-response SSE failures before chaining is disabled for the session. */
194
- const CODEX_SSE_CHAIN_STALE_FAILURE_LIMIT = 3;
195
- const CODEX_STATEFUL_DEFAULT = true;
196
187
 
197
188
  function isCodexStreamProgressEvent(event: unknown): boolean {
198
189
  if (isOpenAIResponsesProgressEvent(event)) return true;
@@ -237,9 +228,9 @@ export interface OpenAICodexWebSocketDebugStats {
237
228
  }
238
229
 
239
230
  /**
240
- * Per-session transport state shared by BOTH transports: turn chaining
241
- * (`previous_response_id` baseline), turn-state/models-etag headers, websocket
242
- * connection pooling, and debug stats. The name is historical — SSE-only
231
+ * Per-session transport state shared by BOTH transports: websocket turn
232
+ * chaining (`previous_response_id` baseline), turn-state/models-etag headers,
233
+ * websocket connection pooling, and debug stats. The name is historical — SSE-only
243
234
  * sessions use it too.
244
235
  */
245
236
  type CodexWebSocketSessionState = {
@@ -248,12 +239,6 @@ type CodexWebSocketSessionState = {
248
239
  lastResponseId?: string;
249
240
  lastResponseItems?: InputItem[];
250
241
  canAppend: boolean;
251
- /** Transport that minted lastResponseId; chaining requires the next request to use the same transport. */
252
- chainTransport?: CodexTransport;
253
- /** Set once SSE chaining is judged unsupported for this session (circuit breaker). */
254
- disableSseChaining: boolean;
255
- /** Consecutive stale-previous-response failures on SSE; reset on a successful chained completion. */
256
- sseChainStaleFailures: number;
257
242
  turnState?: string;
258
243
  modelsEtag?: string;
259
244
  connection?: CodexWebSocketConnection;
@@ -295,10 +280,6 @@ interface CodexStreamRuntime {
295
280
  eventStream: AsyncGenerator<Record<string, unknown>>;
296
281
  requestBodyForState: RequestBody;
297
282
  transport: CodexTransport;
298
- /** Whether this request may capture/reset SSE chain state (stateful SSE gating + session state present). */
299
- sseChainingEnabled: boolean;
300
- /** previous_response_id sent on the wire for the currently open stream, if the request was a delta. */
301
- sentPreviousResponseId?: string;
302
283
  websocketState?: CodexWebSocketSessionState;
303
284
  currentItem: CodexEventItem | null;
304
285
  currentBlock: CodexOutputBlock | null;
@@ -357,11 +338,6 @@ function isCodexWebSocketEnvEnabled(): boolean {
357
338
  return $flag("PI_CODEX_WEBSOCKET");
358
339
  }
359
340
 
360
- function isCodexSseStatefulEnabled(options: OpenAICodexResponsesOptions | undefined): boolean {
361
- if (options?.statefulResponses === false) return false;
362
- return options?.statefulResponses === true || $flag("PI_CODEX_STATEFUL", CODEX_STATEFUL_DEFAULT);
363
- }
364
-
365
341
  function getCodexWebSocketRetryBudget(): number {
366
342
  return parseCodexNonNegativeInteger($env.PI_CODEX_WEBSOCKET_RETRY_BUDGET, CODEX_WEBSOCKET_RETRY_BUDGET);
367
343
  }
@@ -754,6 +730,11 @@ async function buildCodexRequestContext(
754
730
  }
755
731
  const websocketState =
756
732
  sessionKey && providerSessionState ? getCodexWebSocketSessionState(sessionKey, providerSessionState) : undefined;
733
+ if (websocketState && !isCodexWithinTurnContinuation(context)) {
734
+ // codex-rs scopes `x-codex-turn-state` to a single user turn: tool-loop
735
+ // follow-ups echo it, a new user turn starts without it.
736
+ websocketState.turnState = undefined;
737
+ }
757
738
  return {
758
739
  apiKey,
759
740
  accountId,
@@ -856,7 +837,6 @@ async function openInitialCodexEventStream(
856
837
  eventStream: AsyncGenerator<Record<string, unknown>>;
857
838
  requestBodyForState: RequestBody;
858
839
  transport: CodexTransport;
859
- sentPreviousResponseId?: string;
860
840
  }> {
861
841
  const { transformedBody, websocketState } = requestContext;
862
842
  if (websocketState && shouldUseCodexWebSocket(model, websocketState, options?.preferWebsockets)) {
@@ -906,18 +886,17 @@ async function openCodexWebSocketTransport(
906
886
  eventStream: AsyncGenerator<Record<string, unknown>>;
907
887
  requestBodyForState: RequestBody;
908
888
  transport: CodexTransport;
909
- sentPreviousResponseId?: string;
910
889
  }> {
911
- const chained = buildCodexChainedRequestBody(requestContext.transformedBody, websocketState, "websocket");
890
+ const chainedBody = buildCodexChainedRequestBody(requestContext.transformedBody, websocketState);
912
891
  // WebSocket frames cannot carry per-request HTTP headers, so the Responses
913
892
  // Lite marker rides in `client_metadata` on every `response.create`.
914
893
  const websocketRequest: Record<string, unknown> = {
915
894
  type: "response.create",
916
- ...chained.body,
895
+ ...chainedBody,
917
896
  ...(requestContext.responsesLite
918
897
  ? {
919
898
  client_metadata: {
920
- ...(chained.body.client_metadata ?? {}),
899
+ ...(chainedBody.client_metadata ?? {}),
921
900
  [CODEX_WS_RESPONSES_LITE_CLIENT_METADATA_KEY]: "true",
922
901
  },
923
902
  }
@@ -960,10 +939,24 @@ async function openCodexWebSocketTransport(
960
939
  eventStream,
961
940
  requestBodyForState,
962
941
  transport: "websocket",
963
- sentPreviousResponseId: chained.previousResponseId,
964
942
  };
965
943
  }
966
944
 
945
+ /**
946
+ * True when the request continues the current turn (everything after the
947
+ * last assistant message is tool results), false when a new user turn starts.
948
+ * Mirrors codex-rs, which scopes `x-codex-turn-state` to a single turn and
949
+ * clears it when the next one begins.
950
+ */
951
+ function isCodexWithinTurnContinuation(context: Context): boolean {
952
+ for (let i = context.messages.length - 1; i >= 0; i--) {
953
+ const role = context.messages[i]?.role;
954
+ if (role === "toolResult") continue;
955
+ return role === "assistant";
956
+ }
957
+ return false;
958
+ }
959
+
967
960
  async function openCodexSseTransport(
968
961
  model: Model<"openai-codex-responses">,
969
962
  requestContext: CodexRequestContext,
@@ -975,10 +968,11 @@ async function openCodexSseTransport(
975
968
  eventStream: AsyncGenerator<Record<string, unknown>>;
976
969
  requestBodyForState: RequestBody;
977
970
  transport: CodexTransport;
978
- sentPreviousResponseId?: string;
979
971
  }> {
980
- const open = async (wireBody: RequestBody) =>
981
- requestSetup.wrapCodexSseStream(
972
+ const open = async (wireBody: RequestBody) => {
973
+ // Keep the 400 dump honest: record the body actually sent on the wire.
974
+ requestContext.rawRequestDump.body = wireBody;
975
+ return requestSetup.wrapCodexSseStream(
982
976
  await openCodexSseEventStream(
983
977
  requestContext.url,
984
978
  requestContext.requestHeaders,
@@ -993,24 +987,8 @@ async function openCodexSseTransport(
993
987
  options?.fetch,
994
988
  ),
995
989
  );
996
- const statefulEnabled = isCodexSseStatefulEnabled(options) && state !== undefined && !state.disableSseChaining;
997
- const chained: CodexChainedRequest = statefulEnabled ? buildCodexChainedRequestBody(body, state, "sse") : { body };
998
- if (state && chained.previousResponseId) {
999
- try {
1000
- return {
1001
- eventStream: await open(chained.body),
1002
- requestBodyForState: structuredCloneJSON(body),
1003
- transport: "sse",
1004
- sentPreviousResponseId: chained.previousResponseId,
1005
- };
1006
- } catch (error) {
1007
- if (options?.signal?.aborted || !isCodexStalePreviousResponseError(error)) throw error;
1008
- // Server rejected the chain baseline: reset and retry this open once
1009
- // with the full transcript. Structurally cannot loop — the retry body
1010
- // carries no previous_response_id.
1011
- registerCodexSseChainStaleFailure(state, error);
1012
- }
1013
- }
990
+ };
991
+ recordCodexWebSocketRequestStats(state, body);
1014
992
  return { eventStream: await open(body), requestBodyForState: structuredCloneJSON(body), transport: "sse" };
1015
993
  }
1016
994
 
@@ -1030,7 +1008,6 @@ async function reopenCodexWebSocketRuntimeStream(
1030
1008
  runtime.eventStream = next.eventStream;
1031
1009
  runtime.requestBodyForState = next.requestBodyForState;
1032
1010
  runtime.transport = next.transport;
1033
- runtime.sentPreviousResponseId = next.sentPreviousResponseId;
1034
1011
  state.lastTransport = next.transport;
1035
1012
  } catch (error) {
1036
1013
  const wsError = error instanceof Error ? error : new Error(String(error));
@@ -1062,7 +1039,6 @@ async function reopenCodexSseRuntimeStream(
1062
1039
  runtime.eventStream = next.eventStream;
1063
1040
  runtime.requestBodyForState = next.requestBodyForState;
1064
1041
  runtime.transport = next.transport;
1065
- runtime.sentPreviousResponseId = next.sentPreviousResponseId;
1066
1042
  if (state) {
1067
1043
  state.lastTransport = next.transport;
1068
1044
  }
@@ -1072,16 +1048,12 @@ function createCodexStreamRuntime(initial: {
1072
1048
  eventStream: AsyncGenerator<Record<string, unknown>>;
1073
1049
  requestBodyForState: RequestBody;
1074
1050
  transport: CodexTransport;
1075
- sentPreviousResponseId?: string;
1076
1051
  websocketState?: CodexWebSocketSessionState;
1077
- sseChainingEnabled: boolean;
1078
1052
  }): CodexStreamRuntime {
1079
1053
  return {
1080
1054
  eventStream: initial.eventStream,
1081
1055
  requestBodyForState: initial.requestBodyForState,
1082
1056
  transport: initial.transport,
1083
- sseChainingEnabled: initial.sseChainingEnabled,
1084
- sentPreviousResponseId: initial.sentPreviousResponseId,
1085
1057
  websocketState: initial.websocketState,
1086
1058
  currentItem: null,
1087
1059
  currentBlock: null,
@@ -1594,12 +1566,7 @@ function handleOutputItemDone(
1594
1566
  function handleResponseCreated(runtime: CodexStreamRuntime, rawEvent: Record<string, unknown>): void {
1595
1567
  const response = (rawEvent as { response?: { id?: string } }).response;
1596
1568
  const state = runtime.websocketState;
1597
- if (
1598
- state &&
1599
- (runtime.transport === "websocket" || runtime.sseChainingEnabled) &&
1600
- typeof response?.id === "string" &&
1601
- response.id.length > 0
1602
- ) {
1569
+ if (state && runtime.transport === "websocket" && typeof response?.id === "string" && response.id.length > 0) {
1603
1570
  state.lastResponseId = response.id;
1604
1571
  }
1605
1572
  }
@@ -1635,17 +1602,22 @@ function handleResponseCompleted(
1635
1602
  }
1636
1603
 
1637
1604
  const state = runtime.websocketState;
1638
- if (state && (runtime.transport === "websocket" || runtime.sseChainingEnabled)) {
1639
- state.lastRequest = structuredCloneJSON(runtime.requestBodyForState);
1640
- if (typeof response?.id === "string" && response.id.length > 0) {
1641
- state.lastResponseId = response.id;
1642
- state.lastResponseItems = stripInputItemIds(structuredCloneJSON(runtime.nativeOutputItems));
1643
- state.canAppend = rawEvent.type === "response.done" || rawEvent.type === "response.completed";
1644
- state.chainTransport = runtime.transport;
1645
- if (runtime.sentPreviousResponseId) state.sseChainStaleFailures = 0;
1605
+ if (state) {
1606
+ if (runtime.transport !== "websocket") {
1607
+ // SSE turns never chain (previous_response_id is websocket-only on this
1608
+ // endpoint); a completed SSE turn also invalidates any websocket append
1609
+ // baseline, which no longer matches the transcript.
1610
+ resetCodexWebSocketAppendState(state);
1646
1611
  } else {
1647
- // Without a response id the append baseline cannot be trusted.
1648
- state.canAppend = false;
1612
+ state.lastRequest = structuredCloneJSON(runtime.requestBodyForState);
1613
+ if (typeof response?.id === "string" && response.id.length > 0) {
1614
+ state.lastResponseId = response.id;
1615
+ state.lastResponseItems = stripInputItemIds(structuredCloneJSON(runtime.nativeOutputItems));
1616
+ state.canAppend = rawEvent.type === "response.done" || rawEvent.type === "response.completed";
1617
+ } else {
1618
+ // Without a response id the append baseline cannot be trusted.
1619
+ state.canAppend = false;
1620
+ }
1649
1621
  }
1650
1622
  }
1651
1623
 
@@ -1753,7 +1725,7 @@ async function tryRecoverCodexWhitespaceToolCallLoop(
1753
1725
 
1754
1726
  runtime.whitespaceLoopRetries += 1;
1755
1727
  const websocketState = context.requestContext.websocketState;
1756
- if (websocketState && (runtime.transport === "websocket" || runtime.sseChainingEnabled)) {
1728
+ if (websocketState) {
1757
1729
  resetCodexWebSocketAppendState(websocketState);
1758
1730
  resetCodexSessionMetadata(websocketState);
1759
1731
  }
@@ -1860,21 +1832,14 @@ function isCodexStalePreviousResponseError(error: unknown): boolean {
1860
1832
  if (error instanceof CodexProviderStreamError) return error.code === "previous_response_not_found";
1861
1833
  if (!(error instanceof Error)) return false;
1862
1834
  if ((error as { code?: string }).code === "previous_response_not_found") return true;
1863
- return /previous[ _]?response/i.test(error.message) && /not[ _]?found|invalid|expired|stale/i.test(error.message);
1864
- }
1865
-
1866
- function registerCodexSseChainStaleFailure(state: CodexWebSocketSessionState, error: unknown): void {
1867
- resetCodexWebSocketAppendState(state);
1868
- resetCodexSessionMetadata(state);
1869
- state.sseChainStaleFailures += 1;
1870
- if (state.sseChainStaleFailures >= CODEX_SSE_CHAIN_STALE_FAILURE_LIMIT && !state.disableSseChaining) {
1871
- state.disableSseChaining = true;
1872
- }
1873
- logCodexDebug("codex sse previous_response_id rejected; falling back to full context", {
1874
- error: error instanceof Error ? error.message : String(error),
1875
- consecutiveFailures: state.sseChainStaleFailures,
1876
- disabled: state.disableSseChaining,
1877
- });
1835
+ // "unsupported": the backend intermittently rejects the parameter outright
1836
+ // with `{"detail":"Unsupported parameter: previous_response_id"}` (no
1837
+ // `error.code`); treat it like a stale chain so the turn replays with full
1838
+ // context instead of surfacing the 400.
1839
+ return (
1840
+ /previous[ _]?response/i.test(error.message) &&
1841
+ /not[ _]?found|invalid|expired|stale|unsupported/i.test(error.message)
1842
+ );
1878
1843
  }
1879
1844
 
1880
1845
  async function tryRecoverCodexPreviousResponseNotFound(
@@ -1892,18 +1857,14 @@ async function tryRecoverCodexPreviousResponseNotFound(
1892
1857
  ) {
1893
1858
  return false;
1894
1859
  }
1895
- if (runtime.transport !== "websocket" && !runtime.sentPreviousResponseId) {
1896
- // SSE error unrelated to chaining — let other recovery handle it.
1860
+ if (runtime.transport !== "websocket") {
1861
+ // SSE never sends previous_response_id; let other recovery handle it.
1897
1862
  return false;
1898
1863
  }
1899
1864
 
1900
1865
  runtime.providerRetryAttempt += 1;
1901
- if (runtime.transport === "websocket") {
1902
- resetCodexWebSocketAppendState(websocketState);
1903
- resetCodexSessionMetadata(websocketState);
1904
- } else {
1905
- registerCodexSseChainStaleFailure(websocketState, error);
1906
- }
1866
+ resetCodexWebSocketAppendState(websocketState);
1867
+ resetCodexSessionMetadata(websocketState);
1907
1868
  runtime.currentItem = null;
1908
1869
  runtime.currentBlock = null;
1909
1870
  runtime.sawTerminalEvent = false;
@@ -1913,13 +1874,8 @@ async function tryRecoverCodexPreviousResponseNotFound(
1913
1874
 
1914
1875
  logCodexDebug("codex previous_response_id expired; retrying with full context", {
1915
1876
  retry: runtime.providerRetryAttempt,
1916
- transport: runtime.transport,
1917
1877
  });
1918
- if (runtime.transport === "websocket") {
1919
- await reopenCodexWebSocketRuntimeStream(context, runtime, websocketState);
1920
- } else {
1921
- await reopenCodexSseRuntimeStream(context, runtime, websocketState);
1922
- }
1878
+ await reopenCodexWebSocketRuntimeStream(context, runtime, websocketState);
1923
1879
  return true;
1924
1880
  }
1925
1881
 
@@ -1996,7 +1952,7 @@ async function tryRetryCodexProviderError(
1996
1952
 
1997
1953
  runtime.providerRetryAttempt += 1;
1998
1954
  const websocketState = context.requestContext.websocketState;
1999
- if (websocketState && (runtime.transport === "websocket" || runtime.sseChainingEnabled)) {
1955
+ if (websocketState) {
2000
1956
  resetCodexWebSocketAppendState(websocketState);
2001
1957
  resetCodexSessionMetadata(websocketState);
2002
1958
  }
@@ -2037,7 +1993,7 @@ function finalizeCodexResponse(
2037
1993
  throw new Error("Request was aborted");
2038
1994
  }
2039
1995
  if (!runtime.sawTerminalEvent) {
2040
- if (context.requestContext.websocketState && (runtime.transport === "websocket" || runtime.sseChainingEnabled)) {
1996
+ if (context.requestContext.websocketState) {
2041
1997
  resetCodexWebSocketAppendState(context.requestContext.websocketState);
2042
1998
  resetCodexSessionMetadata(context.requestContext.websocketState);
2043
1999
  }
@@ -2102,7 +2058,6 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
2102
2058
  const runtime = createCodexStreamRuntime({
2103
2059
  ...initialTransport,
2104
2060
  websocketState: requestContext.websocketState,
2105
- sseChainingEnabled: isCodexSseStatefulEnabled(options) && requestContext.websocketState !== undefined,
2106
2061
  });
2107
2062
  if (requestContext.websocketState) {
2108
2063
  requestContext.websocketState.lastTransport = initialTransport.transport;
@@ -2262,8 +2217,6 @@ function getCodexWebSocketSessionState(
2262
2217
  const created: CodexWebSocketSessionState = {
2263
2218
  disableWebsocket: false,
2264
2219
  canAppend: false,
2265
- disableSseChaining: false,
2266
- sseChainStaleFailures: 0,
2267
2220
  fallbackCount: 0,
2268
2221
  prewarmed: false,
2269
2222
  stats: {
@@ -2281,7 +2234,6 @@ function resetCodexWebSocketAppendState(state: CodexWebSocketSessionState): void
2281
2234
  state.lastRequest = undefined;
2282
2235
  state.lastResponseId = undefined;
2283
2236
  state.lastResponseItems = undefined;
2284
- state.chainTransport = undefined;
2285
2237
  }
2286
2238
 
2287
2239
  function resetCodexSessionMetadata(state: CodexWebSocketSessionState): void {
@@ -2413,40 +2365,32 @@ function recordCodexWebSocketRequestStats(
2413
2365
  state.stats.lastPreviousResponseId = undefined;
2414
2366
  }
2415
2367
 
2416
- interface CodexChainedRequest {
2417
- body: RequestBody;
2418
- /** Set iff the body carries previous_response_id (delta request). */
2419
- previousResponseId?: string;
2420
- }
2421
-
2422
2368
  /**
2423
- * Shape the next turn's request for either transport: when the session's
2424
- * append baseline is intact (same options, strict history prefix, same
2425
- * transport), chain via `previous_response_id` + delta-only `input`; otherwise
2426
- * break the chain and replay the full transcript.
2369
+ * Shape the next websocket turn's request body: when the session's append
2370
+ * baseline is intact (same options, strict history prefix), chain via
2371
+ * `previous_response_id` + delta-only `input`; otherwise break the chain and
2372
+ * replay the full transcript. SSE requests never chain the HTTP endpoint's
2373
+ * request schema has no `previous_response_id` (codex-rs carries it only on
2374
+ * websocket `response.create` frames) and strict gateway validators 400 it
2375
+ * with `{"detail":"Unsupported parameter: previous_response_id"}`.
2427
2376
  */
2428
2377
  function buildCodexChainedRequestBody(
2429
2378
  requestBody: RequestBody,
2430
2379
  state: CodexWebSocketSessionState | undefined,
2431
- transport: CodexTransport,
2432
- ): CodexChainedRequest {
2433
- const chainable = state?.canAppend === true && state.chainTransport === transport;
2380
+ ): RequestBody {
2381
+ const chainable = state?.canAppend === true;
2434
2382
  const appendInput = chainable
2435
2383
  ? buildResponsesDeltaInput<InputItem>(state.lastRequest, state.lastResponseItems, requestBody)
2436
2384
  : null;
2437
2385
  if (appendInput && appendInput.length > 0 && state?.lastResponseId) {
2438
2386
  const body: RequestBody = { ...requestBody, previous_response_id: state.lastResponseId, input: appendInput };
2439
2387
  recordCodexWebSocketRequestStats(state, body);
2440
- return { body, previousResponseId: state.lastResponseId };
2388
+ return body;
2441
2389
  }
2442
2390
  if (chainable && state) {
2443
- // Chaining was eligible on this transport but the prefix/options check
2444
- // failed: history mutated or options changed — break the chain. A bare
2445
- // transport flip (chainTransport mismatch) deliberately does NOT reset:
2446
- // turn-state/models-etag must keep replaying on the other transport, and
2447
- // the next completion overwrites the baseline anyway.
2391
+ // Chaining was eligible but the prefix/options check failed: history
2392
+ // mutated or options changed — break the chain.
2448
2393
  logCodexDebug("codex append reset", {
2449
- transport,
2450
2394
  hadTurnStateHeader: Boolean(state.turnState),
2451
2395
  hadModelsEtagHeader: Boolean(state.modelsEtag),
2452
2396
  });
@@ -2454,7 +2398,7 @@ function buildCodexChainedRequestBody(
2454
2398
  resetCodexSessionMetadata(state);
2455
2399
  }
2456
2400
  recordCodexWebSocketRequestStats(state, requestBody);
2457
- return { body: requestBody };
2401
+ return requestBody;
2458
2402
  }
2459
2403
 
2460
2404
  function toWebSocketUrl(url: string): string {
@@ -285,7 +285,12 @@ function buildOpenAIResponsesChainedParams(
285
285
  function isOpenAIResponsesStalePreviousResponseError(error: unknown): boolean {
286
286
  if (!(error instanceof Error)) return false;
287
287
  if ((error as { code?: string }).code === "previous_response_not_found") return true;
288
- return /previous[ _]?response/i.test(error.message) && /not[ _]?found|invalid|expired|stale/i.test(error.message);
288
+ // "unsupported" covers endpoints that reject the parameter outright
289
+ // (e.g. "Unsupported parameter: previous_response_id").
290
+ return (
291
+ /previous[ _]?response/i.test(error.message) &&
292
+ /not[ _]?found|invalid|expired|stale|unsupported/i.test(error.message)
293
+ );
289
294
  }
290
295
 
291
296
  /**