@prometheus-ai/ai 0.5.4 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/dist/types/auth-broker/remote-store.d.ts +2 -1
  2. package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
  3. package/dist/types/auth-gateway/server.d.ts +19 -0
  4. package/dist/types/auth-gateway/types.d.ts +9 -3
  5. package/dist/types/auth-retry.d.ts +119 -0
  6. package/dist/types/auth-storage.d.ts +217 -8
  7. package/dist/types/errors.d.ts +24 -0
  8. package/dist/types/index.d.ts +5 -9
  9. package/dist/types/provider-details.d.ts +1 -1
  10. package/dist/types/providers/amazon-bedrock.d.ts +12 -6
  11. package/dist/types/providers/anthropic-client.d.ts +10 -3
  12. package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
  13. package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
  14. package/dist/types/providers/anthropic-wire.d.ts +3 -3
  15. package/dist/types/providers/anthropic.d.ts +41 -34
  16. package/dist/types/providers/aws-credentials.d.ts +8 -0
  17. package/dist/types/providers/azure-openai-responses.d.ts +1 -0
  18. package/dist/types/providers/google-gemini-cli.d.ts +22 -1
  19. package/dist/types/providers/google-shared.d.ts +22 -0
  20. package/dist/types/providers/google-types.d.ts +13 -1
  21. package/dist/types/providers/mock.d.ts +8 -3
  22. package/dist/types/providers/ollama.d.ts +6 -0
  23. package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
  24. package/dist/types/providers/openai-chat-server.d.ts +3 -3
  25. package/dist/types/providers/openai-chat-wire.d.ts +644 -0
  26. package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
  27. package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
  28. package/dist/types/providers/openai-codex-responses.d.ts +31 -2
  29. package/dist/types/providers/openai-completions-compat.d.ts +2 -25
  30. package/dist/types/providers/openai-completions.d.ts +2 -10
  31. package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
  32. package/dist/types/providers/openai-responses-server.d.ts +2 -2
  33. package/dist/types/providers/openai-responses-shared.d.ts +49 -9
  34. package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
  35. package/dist/types/providers/openai-responses.d.ts +13 -4
  36. package/dist/types/providers/prometheus-native-client.d.ts +9 -0
  37. package/dist/types/providers/prometheus-native-server.d.ts +4 -3
  38. package/dist/types/providers/transform-messages.d.ts +1 -2
  39. package/dist/types/rate-limit-utils.d.ts +3 -2
  40. package/dist/types/registry/aimlapi.d.ts +4 -0
  41. package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
  42. package/dist/types/registry/amazon-bedrock.d.ts +5 -0
  43. package/dist/types/registry/anthropic.d.ts +10 -0
  44. package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
  45. package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
  46. package/dist/types/registry/cerebras.d.ts +7 -0
  47. package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
  48. package/dist/types/registry/cursor.d.ts +7 -0
  49. package/dist/types/registry/deepseek.d.ts +8 -0
  50. package/dist/types/registry/derived.d.ts +5 -0
  51. package/dist/types/registry/firepass.d.ts +16 -0
  52. package/dist/types/registry/fireworks.d.ts +7 -0
  53. package/dist/types/registry/github-copilot.d.ts +7 -0
  54. package/dist/types/registry/gitlab-duo.d.ts +9 -0
  55. package/dist/types/registry/google-antigravity.d.ts +9 -0
  56. package/dist/types/registry/google-gemini-cli.d.ts +9 -0
  57. package/dist/types/registry/google-vertex.d.ts +5 -0
  58. package/dist/types/registry/google.d.ts +4 -0
  59. package/dist/types/registry/groq.d.ts +4 -0
  60. package/dist/types/registry/huggingface.d.ts +7 -0
  61. package/dist/types/registry/index.d.ts +4 -0
  62. package/dist/types/registry/kagi.d.ts +14 -0
  63. package/dist/types/registry/kilo.d.ts +7 -0
  64. package/dist/types/registry/kimi-code.d.ts +7 -0
  65. package/dist/types/registry/litellm.d.ts +13 -0
  66. package/dist/types/registry/lm-studio.d.ts +8 -0
  67. package/dist/types/registry/minimax-code-cn.d.ts +6 -0
  68. package/dist/types/registry/minimax-code.d.ts +6 -0
  69. package/dist/types/registry/minimax.d.ts +4 -0
  70. package/dist/types/registry/mistral.d.ts +4 -0
  71. package/dist/types/registry/moonshot.d.ts +7 -0
  72. package/dist/types/registry/nanogpt.d.ts +7 -0
  73. package/dist/types/registry/nvidia.d.ts +7 -0
  74. package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
  75. package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
  76. package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
  77. package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
  78. package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
  79. package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
  80. package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
  81. package/dist/types/registry/ollama-cloud.d.ts +7 -0
  82. package/dist/types/registry/ollama.d.ts +12 -0
  83. package/dist/types/registry/openai-codex-device.d.ts +8 -0
  84. package/dist/types/registry/openai-codex.d.ts +9 -0
  85. package/dist/types/registry/openai.d.ts +4 -0
  86. package/dist/types/registry/opencode-go.d.ts +6 -0
  87. package/dist/types/registry/opencode-zen.d.ts +6 -0
  88. package/dist/types/registry/openrouter.d.ts +13 -0
  89. package/dist/types/registry/parallel.d.ts +14 -0
  90. package/dist/types/registry/perplexity.d.ts +7 -0
  91. package/dist/types/registry/qianfan.d.ts +7 -0
  92. package/dist/types/registry/qwen-portal.d.ts +7 -0
  93. package/dist/types/registry/registry.d.ts +272 -0
  94. package/dist/types/registry/synthetic.d.ts +6 -0
  95. package/dist/types/registry/tavily.d.ts +14 -0
  96. package/dist/types/registry/together.d.ts +6 -0
  97. package/dist/types/registry/types.d.ts +51 -0
  98. package/dist/types/registry/venice.d.ts +13 -0
  99. package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
  100. package/dist/types/registry/vllm.d.ts +7 -0
  101. package/dist/types/registry/wafer-pass.d.ts +6 -0
  102. package/dist/types/registry/wafer-serverless.d.ts +6 -0
  103. package/dist/types/registry/xai-oauth.d.ts +7 -0
  104. package/dist/types/registry/xai.d.ts +4 -0
  105. package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
  106. package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
  107. package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
  108. package/dist/types/registry/xiaomi.d.ts +6 -0
  109. package/dist/types/registry/zai.d.ts +7 -0
  110. package/dist/types/registry/zenmux.d.ts +7 -0
  111. package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
  112. package/dist/types/stream.d.ts +9 -1
  113. package/dist/types/types.d.ts +56 -295
  114. package/dist/types/usage/google-antigravity.d.ts +15 -1
  115. package/dist/types/usage/openai-codex-reset.d.ts +79 -0
  116. package/dist/types/usage/openai-codex.d.ts +1 -0
  117. package/dist/types/usage.d.ts +77 -4
  118. package/dist/types/utils/abort.d.ts +6 -0
  119. package/dist/types/utils/event-stream.d.ts +2 -0
  120. package/dist/types/utils/http-inspector.d.ts +0 -1
  121. package/dist/types/utils/idle-iterator.d.ts +35 -0
  122. package/dist/types/utils/openai-http.d.ts +58 -0
  123. package/dist/types/utils/request-debug.d.ts +3 -0
  124. package/dist/types/utils/retry-after.d.ts +1 -0
  125. package/dist/types/utils/schema/fields.d.ts +5 -0
  126. package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
  127. package/dist/types/utils/schema/stamps.d.ts +7 -15
  128. package/dist/types/utils/sse-debug.d.ts +0 -5
  129. package/dist/types/utils/stream-markup-healing.d.ts +2 -0
  130. package/dist/types/utils.d.ts +1 -5
  131. package/package.json +17 -29
  132. package/src/auth-broker/remote-store.ts +10 -1
  133. package/src/auth-broker/snapshot-cache.ts +1 -1
  134. package/src/auth-broker/wire-schemas.ts +1 -1
  135. package/src/auth-gateway/http.ts +1 -1
  136. package/src/auth-gateway/server.ts +95 -30
  137. package/src/auth-gateway/types.ts +10 -2
  138. package/src/auth-retry.ts +238 -0
  139. package/src/auth-storage.ts +935 -430
  140. package/src/errors.ts +32 -0
  141. package/src/index.ts +9 -14
  142. package/src/provider-details.ts +1 -1
  143. package/src/providers/__tests__/google-auth.test.ts +144 -0
  144. package/src/providers/amazon-bedrock.ts +70 -40
  145. package/src/providers/anthropic-client.ts +15 -13
  146. package/src/providers/anthropic-messages-server-schema.ts +17 -7
  147. package/src/providers/anthropic-messages-server.ts +88 -20
  148. package/src/providers/anthropic-wire.ts +4 -3
  149. package/src/providers/anthropic.ts +1234 -621
  150. package/src/providers/aws-credentials.ts +47 -5
  151. package/src/providers/aws-eventstream.ts +5 -0
  152. package/src/providers/azure-openai-responses.ts +117 -67
  153. package/src/providers/cursor.ts +30 -30
  154. package/src/providers/github-copilot-headers.ts +1 -1
  155. package/src/providers/gitlab-duo.ts +36 -29
  156. package/src/providers/google-auth.ts +71 -8
  157. package/src/providers/google-gemini-cli.ts +118 -22
  158. package/src/providers/google-shared.ts +163 -43
  159. package/src/providers/google-types.ts +10 -1
  160. package/src/providers/kimi.ts +1 -1
  161. package/src/providers/mock.ts +11 -3
  162. package/src/providers/ollama.ts +64 -7
  163. package/src/providers/openai-anthropic-shim.ts +17 -8
  164. package/src/providers/openai-chat-server-schema.ts +9 -3
  165. package/src/providers/openai-chat-server.ts +82 -16
  166. package/src/providers/openai-chat-wire.ts +847 -0
  167. package/src/providers/openai-codex/request-transformer.ts +129 -34
  168. package/src/providers/openai-codex/response-handler.ts +22 -1
  169. package/src/providers/openai-codex-responses.ts +699 -247
  170. package/src/providers/openai-completions-compat.ts +8 -308
  171. package/src/providers/openai-completions.ts +416 -267
  172. package/src/providers/openai-responses-server-schema.ts +15 -9
  173. package/src/providers/openai-responses-server.ts +162 -114
  174. package/src/providers/openai-responses-shared.ts +320 -82
  175. package/src/providers/openai-responses-wire.ts +6391 -0
  176. package/src/providers/openai-responses.ts +382 -176
  177. package/src/providers/prometheus-native-client.ts +27 -11
  178. package/src/providers/prometheus-native-server.ts +44 -17
  179. package/src/providers/transform-messages.ts +311 -120
  180. package/src/providers/vision-guard.ts +5 -3
  181. package/src/rate-limit-utils.ts +13 -3
  182. package/src/registry/aimlapi.ts +6 -0
  183. package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
  184. package/src/registry/amazon-bedrock.ts +22 -0
  185. package/src/registry/anthropic.ts +26 -0
  186. package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
  187. package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
  188. package/src/{utils/oauth → registry}/cerebras.ts +8 -1
  189. package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
  190. package/src/registry/cursor.ts +20 -0
  191. package/src/{utils/oauth → registry}/deepseek.ts +9 -17
  192. package/src/registry/derived.ts +9 -0
  193. package/src/{utils/oauth → registry}/firepass.ts +10 -2
  194. package/src/{utils/oauth → registry}/fireworks.ts +8 -1
  195. package/src/registry/github-copilot.ts +22 -0
  196. package/src/registry/gitlab-duo.ts +19 -0
  197. package/src/registry/google-antigravity.ts +21 -0
  198. package/src/registry/google-gemini-cli.ts +21 -0
  199. package/src/registry/google-vertex.ts +38 -0
  200. package/src/registry/google.ts +6 -0
  201. package/src/registry/groq.ts +6 -0
  202. package/src/{utils/oauth → registry}/huggingface.ts +8 -19
  203. package/src/registry/index.ts +4 -0
  204. package/src/{utils/oauth → registry}/kagi.ts +9 -11
  205. package/src/{utils/oauth → registry}/kilo.ts +11 -6
  206. package/src/registry/kimi-code.ts +17 -0
  207. package/src/{utils/oauth → registry}/litellm.ts +8 -12
  208. package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
  209. package/src/registry/minimax-code-cn.ts +12 -0
  210. package/src/registry/minimax-code.ts +12 -0
  211. package/src/registry/minimax.ts +6 -0
  212. package/src/registry/mistral.ts +6 -0
  213. package/src/{utils/oauth → registry}/moonshot.ts +8 -9
  214. package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
  215. package/src/{utils/oauth → registry}/nvidia.ts +8 -18
  216. package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
  217. package/src/{utils → registry}/oauth/anthropic.ts +38 -17
  218. package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
  219. package/src/registry/oauth/gitlab-duo.ts +198 -0
  220. package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
  221. package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
  222. package/src/registry/oauth/index.ts +164 -0
  223. package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
  224. package/src/{utils → registry}/oauth/types.ts +7 -51
  225. package/src/{utils → registry}/oauth/wafer.ts +1 -1
  226. package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
  227. package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
  228. package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
  229. package/src/{utils/oauth → registry}/ollama.ts +8 -13
  230. package/src/registry/openai-codex-device.ts +18 -0
  231. package/src/registry/openai-codex.ts +19 -0
  232. package/src/registry/openai.ts +6 -0
  233. package/src/registry/opencode-go.ts +12 -0
  234. package/src/registry/opencode-zen.ts +12 -0
  235. package/src/{utils/oauth → registry}/openrouter.ts +10 -2
  236. package/src/{utils/oauth → registry}/parallel.ts +9 -11
  237. package/src/registry/perplexity.ts +13 -0
  238. package/src/{utils/oauth → registry}/qianfan.ts +8 -17
  239. package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
  240. package/src/registry/registry.ts +149 -0
  241. package/src/{utils/oauth → registry}/synthetic.ts +7 -1
  242. package/src/{utils/oauth → registry}/tavily.ts +10 -12
  243. package/src/{utils/oauth → registry}/together.ts +7 -1
  244. package/src/registry/types.ts +56 -0
  245. package/src/{utils/oauth → registry}/venice.ts +8 -12
  246. package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
  247. package/src/{utils/oauth → registry}/vllm.ts +9 -16
  248. package/src/registry/wafer-pass.ts +12 -0
  249. package/src/registry/wafer-serverless.ts +12 -0
  250. package/src/registry/xai-oauth.ts +17 -0
  251. package/src/registry/xai.ts +6 -0
  252. package/src/registry/xiaomi-token-plan-ams.ts +12 -0
  253. package/src/registry/xiaomi-token-plan-cn.ts +12 -0
  254. package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
  255. package/src/registry/xiaomi.ts +12 -0
  256. package/src/{utils/oauth → registry}/zai.ts +10 -22
  257. package/src/{utils/oauth → registry}/zenmux.ts +8 -1
  258. package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
  259. package/src/stream.ts +229 -199
  260. package/src/types.ts +63 -384
  261. package/src/usage/claude.ts +4 -2
  262. package/src/usage/github-copilot.ts +4 -2
  263. package/src/usage/google-antigravity.ts +196 -28
  264. package/src/usage/kimi.ts +1 -1
  265. package/src/usage/minimax-code.ts +5 -6
  266. package/src/usage/openai-codex-reset.ts +174 -0
  267. package/src/usage/openai-codex.ts +19 -2
  268. package/src/usage/zai.ts +2 -1
  269. package/src/usage.ts +93 -4
  270. package/src/utils/abort.ts +14 -0
  271. package/src/utils/event-stream.ts +17 -0
  272. package/src/utils/http-inspector.ts +4 -12
  273. package/src/utils/idle-iterator.ts +250 -79
  274. package/src/utils/openai-http.ts +157 -0
  275. package/src/utils/request-debug.ts +67 -19
  276. package/src/utils/retry-after.ts +1 -1
  277. package/src/utils/retry.ts +23 -2
  278. package/src/utils/schema/CONSTRAINTS.md +4 -2
  279. package/src/utils/schema/fields.ts +16 -0
  280. package/src/utils/schema/json-schema-validator.ts +19 -1
  281. package/src/utils/schema/normalize.ts +80 -8
  282. package/src/utils/schema/stamps.ts +22 -10
  283. package/src/utils/schema/wire.ts +2 -2
  284. package/src/utils/sse-debug.ts +0 -271
  285. package/src/utils/stream-markup-healing.ts +50 -8
  286. package/src/utils/validation.ts +49 -13
  287. package/src/utils.ts +2 -26
  288. package/dist/types/model-cache.d.ts +0 -17
  289. package/dist/types/model-manager.d.ts +0 -64
  290. package/dist/types/model-thinking.d.ts +0 -100
  291. package/dist/types/models.d.ts +0 -12
  292. package/dist/types/provider-models/bundled-references.d.ts +0 -4
  293. package/dist/types/provider-models/descriptors.d.ts +0 -50
  294. package/dist/types/provider-models/google.d.ts +0 -24
  295. package/dist/types/provider-models/index.d.ts +0 -5
  296. package/dist/types/provider-models/ollama.d.ts +0 -7
  297. package/dist/types/provider-models/openai-compat.d.ts +0 -323
  298. package/dist/types/provider-models/special.d.ts +0 -16
  299. package/dist/types/utils/discovery/antigravity.d.ts +0 -61
  300. package/dist/types/utils/discovery/codex.d.ts +0 -38
  301. package/dist/types/utils/discovery/cursor.d.ts +0 -23
  302. package/dist/types/utils/discovery/gemini.d.ts +0 -25
  303. package/dist/types/utils/discovery/index.d.ts +0 -4
  304. package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
  305. package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
  306. package/dist/types/utils/oauth/cerebras.d.ts +0 -1
  307. package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
  308. package/dist/types/utils/oauth/deepseek.d.ts +0 -10
  309. package/dist/types/utils/oauth/firepass.d.ts +0 -1
  310. package/dist/types/utils/oauth/fireworks.d.ts +0 -1
  311. package/dist/types/utils/oauth/huggingface.d.ts +0 -19
  312. package/dist/types/utils/oauth/kagi.d.ts +0 -17
  313. package/dist/types/utils/oauth/kilo.d.ts +0 -5
  314. package/dist/types/utils/oauth/litellm.d.ts +0 -18
  315. package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
  316. package/dist/types/utils/oauth/moonshot.d.ts +0 -1
  317. package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
  318. package/dist/types/utils/oauth/nvidia.d.ts +0 -18
  319. package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
  320. package/dist/types/utils/oauth/ollama.d.ts +0 -18
  321. package/dist/types/utils/oauth/openrouter.d.ts +0 -1
  322. package/dist/types/utils/oauth/parallel.d.ts +0 -17
  323. package/dist/types/utils/oauth/qianfan.d.ts +0 -17
  324. package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
  325. package/dist/types/utils/oauth/synthetic.d.ts +0 -1
  326. package/dist/types/utils/oauth/tavily.d.ts +0 -17
  327. package/dist/types/utils/oauth/together.d.ts +0 -1
  328. package/dist/types/utils/oauth/venice.d.ts +0 -18
  329. package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
  330. package/dist/types/utils/oauth/vllm.d.ts +0 -16
  331. package/dist/types/utils/oauth/zai.d.ts +0 -18
  332. package/dist/types/utils/oauth/zenmux.d.ts +0 -1
  333. package/dist/types/utils/oauth/zhipu.d.ts +0 -18
  334. package/src/model-cache.ts +0 -129
  335. package/src/model-manager.ts +0 -469
  336. package/src/model-thinking.ts +0 -756
  337. package/src/models.json +0 -60287
  338. package/src/models.json.d.ts +0 -9
  339. package/src/models.ts +0 -56
  340. package/src/provider-models/bundled-references.ts +0 -38
  341. package/src/provider-models/descriptors.ts +0 -364
  342. package/src/provider-models/google.ts +0 -88
  343. package/src/provider-models/index.ts +0 -5
  344. package/src/provider-models/ollama.ts +0 -153
  345. package/src/provider-models/openai-compat.ts +0 -2904
  346. package/src/provider-models/special.ts +0 -67
  347. package/src/utils/discovery/antigravity.ts +0 -261
  348. package/src/utils/discovery/codex.ts +0 -371
  349. package/src/utils/discovery/cursor.ts +0 -306
  350. package/src/utils/discovery/gemini.ts +0 -248
  351. package/src/utils/discovery/index.ts +0 -4
  352. package/src/utils/discovery/openai-compatible.ts +0 -224
  353. package/src/utils/oauth/gitlab-duo.ts +0 -123
  354. package/src/utils/oauth/index.ts +0 -502
  355. /package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
  356. /package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
  357. /package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
  358. /package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
  359. /package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
  360. /package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
  361. /package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
  362. /package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
  363. /package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
  364. /package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
  365. /package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
  366. /package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
  367. /package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
  368. /package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
  369. /package/src/{utils → registry}/oauth/callback-server.ts +0 -0
  370. /package/src/{utils → registry}/oauth/cursor.ts +0 -0
  371. /package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
  372. /package/src/{utils → registry}/oauth/kimi.ts +0 -0
  373. /package/src/{utils → registry}/oauth/oauth.html +0 -0
  374. /package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
  375. /package/src/{utils → registry}/oauth/opencode.ts +0 -0
  376. /package/src/{utils → registry}/oauth/perplexity.ts +0 -0
  377. /package/src/{utils → registry}/oauth/pkce.ts +0 -0
@@ -1,5 +1,12 @@
1
1
  import * as os from "node:os";
2
2
  import { scheduler } from "node:timers/promises";
3
+ import { calculateCost } from "@prometheus-ai/catalog/models";
4
+ import {
5
+ CODEX_BASE_URL,
6
+ getCodexAccountId,
7
+ OPENAI_HEADER_VALUES,
8
+ OPENAI_HEADERS,
9
+ } from "@prometheus-ai/catalog/wire/codex";
3
10
  import {
4
11
  $env,
5
12
  $flag,
@@ -10,17 +17,7 @@ import {
10
17
  readSseJson,
11
18
  structuredCloneJSON,
12
19
  } from "@prometheus-ai/utils";
13
- import type OpenAI from "openai";
14
- import type {
15
- ResponseCustomToolCall,
16
- ResponseFunctionToolCall,
17
- ResponseInput,
18
- ResponseInputContent,
19
- ResponseOutputMessage,
20
- ResponseReasoningItem,
21
- } from "openai/resources/responses/responses";
22
20
  import packageJson from "../../package.json" with { type: "json" };
23
- import { calculateCost } from "../models";
24
21
  import { getEnvApiKey } from "../stream";
25
22
  import {
26
23
  type Api,
@@ -58,17 +55,18 @@ import { createRequestDebugSession, isRequestDebugEnabled, type RequestDebugResp
58
55
  import { adaptSchemaForStrict, NO_STRICT, sanitizeSchemaForOpenAIResponses, toolWireSchema } from "../utils/schema";
59
56
  import { notifyRawSseEvent } from "../utils/sse-debug";
60
57
  import { compactGrammarDefinition } from "./grammar";
61
- import { CODEX_BASE_URL, getCodexAccountId, OPENAI_HEADER_VALUES, OPENAI_HEADERS } from "./openai-codex/constants";
62
58
  import {
59
+ type CodexReasoningContext,
63
60
  type CodexRequestOptions,
64
61
  type InputItem,
65
62
  type RequestBody,
66
63
  transformRequestBody,
67
64
  } from "./openai-codex/request-transformer";
68
- import { parseCodexError } from "./openai-codex/response-handler";
65
+ import { CodexApiError } from "./openai-codex/response-handler";
69
66
  import { normalizeOpenAIResponsesPromptCacheKey } from "./openai-responses";
70
67
  import {
71
68
  appendResponsesToolResultMessages,
69
+ buildResponsesDeltaInput,
72
70
  convertResponsesAssistantMessage,
73
71
  convertResponsesInputContent,
74
72
  encodeResponsesToolCallId,
@@ -77,17 +75,49 @@ import {
77
75
  mapOpenAIResponsesStopReason,
78
76
  populateResponsesUsageFromResponse,
79
77
  } from "./openai-responses-shared";
78
+ import type {
79
+ ResponseCustomToolCall,
80
+ ResponseFunctionToolCall,
81
+ ResponseInput,
82
+ ResponseInputContent,
83
+ ResponseOutputMessage,
84
+ ResponseReasoningItem,
85
+ ResponseStatus,
86
+ } from "./openai-responses-wire";
80
87
  import { transformMessages } from "./transform-messages";
81
88
 
82
89
  export interface OpenAICodexResponsesOptions extends StreamOptions {
83
90
  reasoning?: "none" | "minimal" | "low" | "medium" | "high" | "xhigh";
84
91
  reasoningSummary?: "auto" | "concise" | "detailed" | null;
92
+ /** `reasoning.context` replay scope. Defaults to `all_turns` under {@link OpenAICodexResponsesOptions.responsesLite}, otherwise omitted (server default is `current_turn`). */
93
+ reasoningContext?: CodexReasoningContext;
85
94
  textVerbosity?: "low" | "medium" | "high";
86
95
  include?: string[];
87
96
  codexMode?: boolean;
88
97
  toolChoice?: ToolChoice;
89
98
  preferWebsockets?: boolean;
90
99
  serviceTier?: ServiceTier;
100
+ /**
101
+ * Opt into the Responses Lite transport contract. Sends
102
+ * `x-openai-internal-codex-responses-lite: true` on HTTP requests and on the
103
+ * WebSocket upgrade (the marker is connection-scoped there, so lite and
104
+ * non-lite turns never share a pooled socket), strips image detail from
105
+ * input, and defaults `reasoning.context` to `all_turns` — mirroring codex-rs.
106
+ */
107
+ responsesLite?: boolean;
108
+ /**
109
+ * Extra `client_metadata` to include in the request body on both transports.
110
+ * The canonical Codex envelope is `client_metadata["x-codex-turn-metadata"]`
111
+ * (JSON string of thread/turn identifiers); flat keys are also accepted.
112
+ */
113
+ clientMetadata?: Record<string, string>;
114
+ /**
115
+ * Invoked when the server streams a `response.metadata` event carrying
116
+ * ChatGPT moderation metadata (`metadata.openai_chatgpt_moderation_metadata`)
117
+ * for first-party presentation parity. Diagnostic observer: failures are
118
+ * swallowed and must not alter the stream.
119
+ */
120
+ onModerationMetadata?: (metadata: unknown) => void;
91
121
  }
92
122
 
93
123
  const CODEX_DEBUG = $flag("PROMETHEUS_CODEX_DEBUG");
@@ -107,8 +137,8 @@ const CODEX_WEBSOCKET_MESSAGE_QUEUE_CAPACITY = 4096;
107
137
  * heartbeat below also catches dead sockets, but only after `pongTimeoutMs`
108
138
  * (default 60s) and only while a request is active — this gate closes the door
109
139
  * earlier and even when the gap between requests is purely client-side (tool
110
- * execution, user typing, etc.). Set `PROMETHEUS_CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS=0`
111
- * to disable.
140
+ * execution, user typing, etc.). Set
141
+ * `PROMETHEUS_CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS=0` to disable.
112
142
  */
113
143
  const CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS = 30_000;
114
144
  /**
@@ -136,12 +166,24 @@ const CODEX_RETRYABLE_EVENT_MESSAGE =
136
166
  const CODEX_PROVIDER_SESSION_STATE_KEY = "openai-codex-responses";
137
167
  const X_CODEX_TURN_STATE_HEADER = "x-codex-turn-state";
138
168
  const X_MODELS_ETAG_HEADER = "x-models-etag";
139
- const X_REASONING_INCLUDED_HEADER = "x-reasoning-included";
169
+ const X_OPENAI_INTERNAL_CODEX_RESPONSES_LITE_HEADER = "x-openai-internal-codex-responses-lite";
170
+ /** WebSocket frames cannot carry per-request HTTP headers; codex-rs mirrors the lite marker into `client_metadata` under this key. */
171
+ const CODEX_WS_RESPONSES_LITE_CLIENT_METADATA_KEY = "ws_request_header_x_openai_internal_codex_responses_lite";
172
+ /** `response.metadata` payload key carrying ChatGPT moderation metadata. */
173
+ const CODEX_MODERATION_METADATA_KEY = "openai_chatgpt_moderation_metadata";
140
174
  /** Connection-level websocket failures that should immediately fall back to SSE without retrying. */
141
175
  const CODEX_WEBSOCKET_FATAL_PATTERNS = ["websocket error:", "websocket closed before open", "connection timeout"];
142
176
  /** Max total time to spend retrying 429s with server-provided delays (5 minutes). */
143
177
  const CODEX_RATE_LIMIT_BUDGET_MS = 5 * 60 * 1000;
144
178
  const CODEX_ADDITIONAL_PROGRESS_EVENT_TYPES = new Set(["response.done", "response.incomplete"]);
179
+ // Provider/model failure mode: Codex can keep a response alive by streaming
180
+ // whitespace-only function-call argument deltas forever. Those frames count as
181
+ // transport activity, so idle timers never fire; cap the run before raw debug
182
+ // buffers and partial JSON grow without semantic progress.
183
+ const CODEX_WHITESPACE_TOOL_CALL_ARGUMENT_DELTA_EVENT_LIMIT = 256;
184
+ const CODEX_WHITESPACE_TOOL_CALL_ARGUMENT_DELTA_CHAR_LIMIT = 16 * 1024;
185
+ const CODEX_WHITESPACE_LOOP_RETRY_LIMIT = 2;
186
+ const CODEX_WHITESPACE_LOOP_RETRY_DELAY_MS = 250;
145
187
 
146
188
  function isCodexStreamProgressEvent(event: unknown): boolean {
147
189
  if (isOpenAIResponsesProgressEvent(event)) return true;
@@ -172,6 +214,11 @@ type CodexTransport = "sse" | "websocket";
172
214
  type CodexEventItem = ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall | ResponseCustomToolCall;
173
215
  type CodexOutputBlock = ThinkingContent | TextContent | (ToolCall & { partialJson: string; lastParseLen?: number });
174
216
 
217
+ /**
218
+ * Per-session request-shape counters. Despite the name, these cover both
219
+ * transports: once stateful SSE chaining is enabled, SSE requests are counted
220
+ * too (the shared chained-request builder records every request it shapes).
221
+ */
175
222
  export interface OpenAICodexWebSocketDebugStats {
176
223
  fullContextRequests: number;
177
224
  deltaRequests: number;
@@ -180,6 +227,12 @@ export interface OpenAICodexWebSocketDebugStats {
180
227
  lastPreviousResponseId?: string;
181
228
  }
182
229
 
230
+ /**
231
+ * Per-session transport state shared by BOTH transports: websocket turn
232
+ * chaining (`previous_response_id` baseline), turn-state/models-etag headers,
233
+ * websocket connection pooling, and debug stats. The name is historical — SSE-only
234
+ * sessions use it too.
235
+ */
183
236
  type CodexWebSocketSessionState = {
184
237
  disableWebsocket: boolean;
185
238
  lastRequest?: RequestBody;
@@ -188,7 +241,6 @@ type CodexWebSocketSessionState = {
188
241
  canAppend: boolean;
189
242
  turnState?: string;
190
243
  modelsEtag?: string;
191
- reasoningIncluded?: boolean;
192
244
  connection?: CodexWebSocketConnection;
193
245
  lastTransport?: CodexTransport;
194
246
  fallbackCount: number;
@@ -211,6 +263,7 @@ interface CodexRequestContext {
211
263
  transportSessionId?: string;
212
264
  providerSessionState?: CodexProviderSessionState;
213
265
  websocketState?: CodexWebSocketSessionState;
266
+ responsesLite: boolean;
214
267
  transformedBody: RequestBody;
215
268
  rawRequestDump: RawHttpRequestDump;
216
269
  }
@@ -219,6 +272,7 @@ interface CodexRequestSetup {
219
272
  requestSignal: AbortSignal;
220
273
  wrapCodexSseStream: (source: AsyncGenerator<Record<string, unknown>>) => AsyncGenerator<Record<string, unknown>>;
221
274
  requestAbortController: AbortController;
275
+ firstEventTimeoutMs: number | undefined;
222
276
  websocketIdleTimeoutMs: number | undefined;
223
277
  websocketFirstEventTimeoutMs: number | undefined;
224
278
  }
@@ -235,6 +289,21 @@ interface CodexStreamRuntime {
235
289
  providerRetryAttempt: number;
236
290
  sawTerminalEvent: boolean;
237
291
  canSafelyReplayWebsocketOverSse: boolean;
292
+ whitespaceToolCallArgumentsDelta?: CodexWhitespaceToolCallArgumentsDeltaState;
293
+ whitespaceLoopRetries: number;
294
+ }
295
+
296
+ interface CodexWhitespaceToolCallArgumentsDeltaState {
297
+ itemId: string;
298
+ outputIndex?: number;
299
+ consecutiveEvents: number;
300
+ consecutiveChars: number;
301
+ firstSequenceNumber?: number;
302
+ lastSequenceNumber?: number;
303
+ }
304
+
305
+ interface CodexWhitespaceToolCallArgumentsDeltaInterruption {
306
+ message: string;
238
307
  }
239
308
 
240
309
  interface CodexStreamProcessingContext {
@@ -266,54 +335,55 @@ function parseCodexPositiveInteger(value: string | undefined, fallback: number):
266
335
  return Math.trunc(parsed);
267
336
  }
268
337
 
338
+ function getCodexEnv(name: string): string | undefined {
339
+ return $env[`PROMETHEUS_${name}`] ?? $env[`PI_${name}`];
340
+ }
341
+
269
342
  function isCodexWebSocketEnvEnabled(): boolean {
270
343
  return $flag("PROMETHEUS_CODEX_WEBSOCKET");
271
344
  }
272
345
 
273
346
  function getCodexWebSocketRetryBudget(): number {
274
- return parseCodexNonNegativeInteger($env.PROMETHEUS_CODEX_WEBSOCKET_RETRY_BUDGET, CODEX_WEBSOCKET_RETRY_BUDGET);
347
+ return parseCodexNonNegativeInteger(getCodexEnv("CODEX_WEBSOCKET_RETRY_BUDGET"), CODEX_WEBSOCKET_RETRY_BUDGET);
275
348
  }
276
349
 
277
350
  function getCodexWebSocketRetryDelayMs(retry: number): number {
278
- const baseDelay = parseCodexPositiveInteger($env.PROMETHEUS_CODEX_WEBSOCKET_RETRY_DELAY_MS, CODEX_RETRY_DELAY_MS);
351
+ const baseDelay = parseCodexPositiveInteger(getCodexEnv("CODEX_WEBSOCKET_RETRY_DELAY_MS"), CODEX_RETRY_DELAY_MS);
279
352
  return baseDelay * Math.max(1, retry);
280
353
  }
281
354
 
282
355
  function getCodexWebSocketIdleTimeoutMs(): number {
283
- return parseCodexPositiveInteger($env.PROMETHEUS_CODEX_WEBSOCKET_IDLE_TIMEOUT_MS, CODEX_WEBSOCKET_IDLE_TIMEOUT_MS);
356
+ return parseCodexPositiveInteger(getCodexEnv("CODEX_WEBSOCKET_IDLE_TIMEOUT_MS"), CODEX_WEBSOCKET_IDLE_TIMEOUT_MS);
284
357
  }
285
358
 
286
359
  function getCodexWebSocketFirstEventTimeoutMs(): number {
287
360
  return parseCodexPositiveInteger(
288
- $env.PROMETHEUS_CODEX_WEBSOCKET_FIRST_EVENT_TIMEOUT_MS,
361
+ getCodexEnv("CODEX_WEBSOCKET_FIRST_EVENT_TIMEOUT_MS"),
289
362
  CODEX_WEBSOCKET_FIRST_EVENT_TIMEOUT_MS,
290
363
  );
291
364
  }
292
365
 
293
366
  function getCodexWebSocketPingIntervalMs(): number {
294
367
  return parseCodexNonNegativeInteger(
295
- $env.PROMETHEUS_CODEX_WEBSOCKET_PING_INTERVAL_MS,
368
+ getCodexEnv("CODEX_WEBSOCKET_PING_INTERVAL_MS"),
296
369
  CODEX_WEBSOCKET_PING_INTERVAL_MS,
297
370
  );
298
371
  }
299
372
 
300
373
  function getCodexWebSocketPongTimeoutMs(): number {
301
- return parseCodexNonNegativeInteger(
302
- $env.PROMETHEUS_CODEX_WEBSOCKET_PONG_TIMEOUT_MS,
303
- CODEX_WEBSOCKET_PONG_TIMEOUT_MS,
304
- );
374
+ return parseCodexNonNegativeInteger(getCodexEnv("CODEX_WEBSOCKET_PONG_TIMEOUT_MS"), CODEX_WEBSOCKET_PONG_TIMEOUT_MS);
305
375
  }
306
376
 
307
377
  function getCodexWebSocketMessageQueueCapacity(): number {
308
378
  return parseCodexPositiveInteger(
309
- $env.PROMETHEUS_CODEX_WEBSOCKET_MESSAGE_QUEUE_CAPACITY,
379
+ getCodexEnv("CODEX_WEBSOCKET_MESSAGE_QUEUE_CAPACITY"),
310
380
  CODEX_WEBSOCKET_MESSAGE_QUEUE_CAPACITY,
311
381
  );
312
382
  }
313
383
 
314
384
  function getCodexWebSocketMaxIdleReuseMs(): number {
315
385
  return parseCodexNonNegativeInteger(
316
- $env.PROMETHEUS_CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS,
386
+ getCodexEnv("CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS"),
317
387
  CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS,
318
388
  );
319
389
  }
@@ -369,6 +439,7 @@ function isCodexWebSocketRetryableStreamError(error: unknown): boolean {
369
439
  message.includes("websocket ping failed") ||
370
440
  message.includes("websocket pong timeout") ||
371
441
  message.includes("websocket message queue exceeded") ||
442
+ message.includes("websocket request already in progress") ||
372
443
  message.includes("idle timeout waiting for websocket") ||
373
444
  message.includes("timeout waiting for first websocket event") ||
374
445
  message.includes("syntaxerror") ||
@@ -420,11 +491,6 @@ function updateCodexSessionMetadataFromHeaders(
420
491
  if (modelsEtag && modelsEtag.length > 0) {
421
492
  state.modelsEtag = modelsEtag;
422
493
  }
423
- const reasoningIncluded = resolvedHeaders.get(X_REASONING_INCLUDED_HEADER);
424
- if (reasoningIncluded !== null) {
425
- const normalized = reasoningIncluded.trim().toLowerCase();
426
- state.reasoningIncluded = normalized.length === 0 ? true : normalized !== "false";
427
- }
428
494
  }
429
495
 
430
496
  function extractCodexWebSocketHandshakeHeaders(socket: Bun.WebSocket, openEvent?: Event): Headers | undefined {
@@ -499,13 +565,16 @@ export function normalizeCodexToolChoice(
499
565
  if (!choice) return undefined;
500
566
  if (typeof choice === "string") return choice;
501
567
  const allowFreeform = model ? supportsFreeformApplyPatchCodex(model) : false;
502
- const mapName = (name: string): Record<string, string> => {
568
+ const mapName = (name: string): Record<string, string> | undefined => {
569
+ const directTool = tools.find(tool => tool.name === name);
503
570
  const customTool = allowFreeform
504
571
  ? tools.find(tool => tool.customFormat && (tool.name === name || tool.customWireName === name))
505
572
  : undefined;
573
+ const offeredTool = customTool ?? directTool;
574
+ if (!offeredTool) return undefined;
506
575
  return customTool
507
576
  ? { type: "custom", name: customTool.customWireName ?? customTool.name }
508
- : { type: "function", name };
577
+ : { type: "function", name: offeredTool.name };
509
578
  };
510
579
  if (choice.type === "function") {
511
580
  if ("function" in choice && choice.function?.name) {
@@ -597,6 +666,7 @@ function resetOutputState(output: AssistantMessage): void {
597
666
  output.content.length = 0;
598
667
  output.usage = createEmptyUsage();
599
668
  output.stopReason = "stop";
669
+ output.stopDetails = undefined;
600
670
  }
601
671
 
602
672
  function removeTransientBlockIndices(output: AssistantMessage): void {
@@ -631,6 +701,7 @@ function createRequestSetup(options: OpenAICodexResponsesOptions | undefined): C
631
701
  requestAbortController,
632
702
  requestSignal,
633
703
  wrapCodexSseStream,
704
+ firstEventTimeoutMs,
634
705
  websocketIdleTimeoutMs,
635
706
  websocketFirstEventTimeoutMs,
636
707
  };
@@ -666,13 +737,19 @@ async function buildCodexRequestContext(
666
737
  };
667
738
 
668
739
  const providerSessionState = getCodexProviderSessionState(options?.providerSessionState);
669
- const sessionKey = getCodexWebSocketSessionKey(transportSessionId, model, accountId, baseUrl);
740
+ const responsesLite = options?.responsesLite === true;
741
+ const sessionKey = getCodexWebSocketSessionKey(transportSessionId, model, accountId, baseUrl, responsesLite);
670
742
  const publicSessionKey = getCodexPublicSessionKey(transportSessionId, model, baseUrl);
671
743
  if (sessionKey && publicSessionKey) {
672
744
  providerSessionState?.webSocketPublicToPrivate.set(publicSessionKey, sessionKey);
673
745
  }
674
746
  const websocketState =
675
747
  sessionKey && providerSessionState ? getCodexWebSocketSessionState(sessionKey, providerSessionState) : undefined;
748
+ if (websocketState && !isCodexWithinTurnContinuation(context)) {
749
+ // codex-rs scopes `x-codex-turn-state` to a single user turn: tool-loop
750
+ // follow-ups echo it, a new user turn starts without it.
751
+ websocketState.turnState = undefined;
752
+ }
676
753
  return {
677
754
  apiKey,
678
755
  accountId,
@@ -682,6 +759,7 @@ async function buildCodexRequestContext(
682
759
  transportSessionId,
683
760
  providerSessionState,
684
761
  websocketState,
762
+ responsesLite,
685
763
  transformedBody,
686
764
  rawRequestDump,
687
765
  };
@@ -695,14 +773,14 @@ async function buildTransformedCodexRequestBody(
695
773
  ): Promise<RequestBody> {
696
774
  const params: RequestBody = {
697
775
  model: model.id,
698
- input: [...convertMessages(model, context)],
776
+ input: convertMessages(model, context),
699
777
  stream: true,
700
778
  prompt_cache_key: promptCacheKey,
701
779
  };
702
780
 
703
- if (options?.maxTokens) {
704
- params.max_output_tokens = options.maxTokens;
705
- }
781
+ // `maxTokens` is intentionally not forwarded: transformRequestBody strips
782
+ // `max_output_tokens`/`max_completion_tokens` (the Codex backend rejects
783
+ // caller-supplied output caps).
706
784
  if (options?.temperature !== undefined) {
707
785
  params.temperature = options.temperature;
708
786
  }
@@ -750,11 +828,16 @@ async function buildTransformedCodexRequestBody(
750
828
  params.instructions = systemPrompts[0];
751
829
  }
752
830
  const developerMessages = systemPrompts.slice(1);
831
+ if (options?.clientMetadata && Object.keys(options.clientMetadata).length > 0) {
832
+ params.client_metadata = { ...options.clientMetadata };
833
+ }
753
834
  const codexOptions: CodexRequestOptions = {
754
835
  reasoningEffort: options?.reasoning,
755
- reasoningSummary: options?.reasoningSummary ?? "auto",
836
+ reasoningSummary: options?.reasoningSummary === undefined ? "auto" : options.reasoningSummary,
837
+ reasoningContext: options?.reasoningContext,
756
838
  textVerbosity: options?.textVerbosity,
757
839
  include: options?.include,
840
+ responsesLite: options?.responsesLite,
758
841
  };
759
842
 
760
843
  return transformRequestBody(params, model, codexOptions, { developerMessages });
@@ -819,7 +902,21 @@ async function openCodexWebSocketTransport(
819
902
  requestBodyForState: RequestBody;
820
903
  transport: CodexTransport;
821
904
  }> {
822
- const websocketRequest = buildCodexWebSocketRequest(requestContext.transformedBody, websocketState);
905
+ const chainedBody = buildCodexChainedRequestBody(requestContext.transformedBody, websocketState);
906
+ // WebSocket frames cannot carry per-request HTTP headers, so the Responses
907
+ // Lite marker rides in `client_metadata` on every `response.create`.
908
+ const websocketRequest: Record<string, unknown> = {
909
+ type: "response.create",
910
+ ...chainedBody,
911
+ ...(requestContext.responsesLite
912
+ ? {
913
+ client_metadata: {
914
+ ...(chainedBody.client_metadata ?? {}),
915
+ [CODEX_WS_RESPONSES_LITE_CLIENT_METADATA_KEY]: "true",
916
+ },
917
+ }
918
+ : {}),
919
+ };
823
920
  const websocketHeaders = createCodexHeaders(
824
921
  requestContext.requestHeaders,
825
922
  requestContext.accountId,
@@ -827,6 +924,7 @@ async function openCodexWebSocketTransport(
827
924
  requestContext.transportSessionId,
828
925
  "websocket",
829
926
  websocketState,
927
+ requestContext.responsesLite,
830
928
  );
831
929
  const requestBodyForState = structuredCloneJSON(requestContext.transformedBody);
832
930
  logCodexDebug("codex websocket request", {
@@ -852,7 +950,26 @@ async function openCodexWebSocketTransport(
852
950
  requestSetup.requestSignal,
853
951
  onSseEvent,
854
952
  );
855
- return { eventStream, requestBodyForState, transport: "websocket" };
953
+ return {
954
+ eventStream,
955
+ requestBodyForState,
956
+ transport: "websocket",
957
+ };
958
+ }
959
+
960
+ /**
961
+ * True when the request continues the current turn (everything after the
962
+ * last assistant message is tool results), false when a new user turn starts.
963
+ * Mirrors codex-rs, which scopes `x-codex-turn-state` to a single turn and
964
+ * clears it when the next one begins.
965
+ */
966
+ function isCodexWithinTurnContinuation(context: Context): boolean {
967
+ for (let i = context.messages.length - 1; i >= 0; i--) {
968
+ const role = context.messages[i]?.role;
969
+ if (role === "toolResult") continue;
970
+ return role === "assistant";
971
+ }
972
+ return false;
856
973
  }
857
974
 
858
975
  async function openCodexSseTransport(
@@ -867,21 +984,28 @@ async function openCodexSseTransport(
867
984
  requestBodyForState: RequestBody;
868
985
  transport: CodexTransport;
869
986
  }> {
870
- const eventStream = requestSetup.wrapCodexSseStream(
871
- await openCodexSseEventStream(
872
- requestContext.url,
873
- requestContext.requestHeaders,
874
- requestContext.accountId,
875
- requestContext.apiKey,
876
- requestContext.transportSessionId,
877
- body,
878
- state,
879
- requestSetup.requestSignal,
880
- event => options?.onSseEvent?.(event, model),
881
- options?.fetch,
882
- ),
883
- );
884
- return { eventStream, requestBodyForState: structuredCloneJSON(body), transport: "sse" };
987
+ const open = async (wireBody: RequestBody) => {
988
+ // Keep the 400 dump honest: record the body actually sent on the wire.
989
+ requestContext.rawRequestDump.body = wireBody;
990
+ return requestSetup.wrapCodexSseStream(
991
+ await openCodexSseEventStream(
992
+ requestContext.url,
993
+ requestContext.requestHeaders,
994
+ requestContext.accountId,
995
+ requestContext.apiKey,
996
+ requestContext.transportSessionId,
997
+ wireBody,
998
+ state,
999
+ requestContext.responsesLite,
1000
+ requestSetup.requestSignal,
1001
+ requestSetup.firstEventTimeoutMs,
1002
+ event => options?.onSseEvent?.(event, model),
1003
+ options?.fetch,
1004
+ ),
1005
+ );
1006
+ };
1007
+ recordCodexWebSocketRequestStats(state, body);
1008
+ return { eventStream: await open(body), requestBodyForState: structuredCloneJSON(body), transport: "sse" };
885
1009
  }
886
1010
 
887
1011
  async function reopenCodexWebSocketRuntimeStream(
@@ -952,11 +1076,94 @@ function createCodexStreamRuntime(initial: {
952
1076
  nativeOutputItems: [],
953
1077
  websocketStreamRetries: 0,
954
1078
  providerRetryAttempt: 0,
1079
+ whitespaceLoopRetries: 0,
955
1080
  sawTerminalEvent: false,
956
1081
  canSafelyReplayWebsocketOverSse: true,
1082
+ whitespaceToolCallArgumentsDelta: undefined,
1083
+ };
1084
+ }
1085
+
1086
+ function resetWhitespaceToolCallArgumentsDelta(runtime: CodexStreamRuntime): void {
1087
+ runtime.whitespaceToolCallArgumentsDelta = undefined;
1088
+ }
1089
+
1090
+ function isJsonWhitespaceOnly(value: string): boolean {
1091
+ for (let index = 0; index < value.length; index += 1) {
1092
+ const code = value.charCodeAt(index);
1093
+ if (code !== 0x09 && code !== 0x0a && code !== 0x0d && code !== 0x20) {
1094
+ return false;
1095
+ }
1096
+ }
1097
+ return true;
1098
+ }
1099
+
1100
+ function readOptionalInteger(value: unknown): number | undefined {
1101
+ if (typeof value !== "number" || !Number.isFinite(value)) return undefined;
1102
+ return Math.trunc(value);
1103
+ }
1104
+
1105
+ function observeWhitespaceToolCallArgumentsDelta(
1106
+ runtime: CodexStreamRuntime,
1107
+ rawEvent: Record<string, unknown>,
1108
+ delta: string,
1109
+ ): CodexWhitespaceToolCallArgumentsDeltaInterruption | undefined {
1110
+ if (!isJsonWhitespaceOnly(delta)) {
1111
+ resetWhitespaceToolCallArgumentsDelta(runtime);
1112
+ return undefined;
1113
+ }
1114
+
1115
+ const itemId =
1116
+ typeof rawEvent.item_id === "string" && rawEvent.item_id.length > 0
1117
+ ? rawEvent.item_id
1118
+ : (runtime.currentItem?.id ?? "");
1119
+ const outputIndex = readOptionalInteger(rawEvent.output_index);
1120
+ const sequenceNumber = readOptionalInteger(rawEvent.sequence_number);
1121
+ let state = runtime.whitespaceToolCallArgumentsDelta;
1122
+ if (!state || state.itemId !== itemId || state.outputIndex !== outputIndex) {
1123
+ state = {
1124
+ itemId,
1125
+ outputIndex,
1126
+ consecutiveEvents: 0,
1127
+ consecutiveChars: 0,
1128
+ firstSequenceNumber: sequenceNumber,
1129
+ };
1130
+ runtime.whitespaceToolCallArgumentsDelta = state;
1131
+ }
1132
+
1133
+ state.consecutiveEvents += 1;
1134
+ state.consecutiveChars += delta.length;
1135
+ state.lastSequenceNumber = sequenceNumber;
1136
+ if (
1137
+ state.consecutiveEvents < CODEX_WHITESPACE_TOOL_CALL_ARGUMENT_DELTA_EVENT_LIMIT &&
1138
+ state.consecutiveChars < CODEX_WHITESPACE_TOOL_CALL_ARGUMENT_DELTA_CHAR_LIMIT
1139
+ ) {
1140
+ return undefined;
1141
+ }
1142
+
1143
+ const itemLabel = itemId ? ` for item ${itemId}` : "";
1144
+ const sequenceLabel =
1145
+ state.firstSequenceNumber === undefined || state.lastSequenceNumber === undefined
1146
+ ? ""
1147
+ : `, sequence ${state.firstSequenceNumber}..${state.lastSequenceNumber}`;
1148
+ return {
1149
+ message: `Interrupted OpenAI Codex response after ${state.consecutiveEvents} consecutive whitespace-only tool-call argument delta events (${state.consecutiveChars} chars${sequenceLabel})${itemLabel}.`,
957
1150
  };
958
1151
  }
959
1152
 
1153
+ function interruptWhitespaceToolCallArgumentsDelta(
1154
+ runtime: CodexStreamRuntime,
1155
+ interruption: CodexWhitespaceToolCallArgumentsDeltaInterruption,
1156
+ ): never {
1157
+ // Close the degenerate websocket so the server stops streaming whitespace
1158
+ // frames. Do NOT abort requestSetup.requestAbortController: reopen*RuntimeStream
1159
+ // reuses the same setup across retries, and requestSignal is an AbortSignal.any
1160
+ // over that controller — aborting it stays latched and makes recovery
1161
+ // impossible. Throwing unwinds the for-await, which returns the SSE generator
1162
+ // and cancels its underlying body.
1163
+ runtime.websocketState?.connection?.close("degenerate-tool-call");
1164
+ throw new CodexWhitespaceToolCallLoopError(interruption.message);
1165
+ }
1166
+
960
1167
  async function processCodexResponseStream(
961
1168
  context: CodexStreamProcessingContext,
962
1169
  runtime: CodexStreamRuntime,
@@ -968,12 +1175,7 @@ async function processCodexResponseStream(
968
1175
  try {
969
1176
  let firstTokenTime = context.firstTokenTime;
970
1177
  for await (const rawEvent of runtime.eventStream) {
971
- firstTokenTime = handleCodexStreamEvent({
972
- ...context,
973
- runtime,
974
- rawEvent,
975
- firstTokenTime,
976
- });
1178
+ firstTokenTime = handleCodexStreamEvent(context, runtime, rawEvent, firstTokenTime);
977
1179
  if (runtime.sawTerminalEvent) break;
978
1180
  }
979
1181
  return { firstTokenTime };
@@ -986,23 +1188,18 @@ async function processCodexResponseStream(
986
1188
  }
987
1189
  }
988
1190
 
989
- function handleCodexStreamEvent(args: {
990
- model: Model<"openai-codex-responses">;
991
- output: AssistantMessage;
992
- stream: AssistantMessageEventStream;
993
- runtime: CodexStreamRuntime;
994
- rawEvent: Record<string, unknown>;
995
- firstTokenTime?: number;
996
- }): number | undefined {
997
- const { model, output, stream, runtime, rawEvent } = args;
1191
+ function handleCodexStreamEvent(
1192
+ context: CodexStreamProcessingContext,
1193
+ runtime: CodexStreamRuntime,
1194
+ rawEvent: Record<string, unknown>,
1195
+ firstTokenTime: number | undefined,
1196
+ ): number | undefined {
1197
+ const { model, output, stream } = context;
998
1198
  const eventType = typeof rawEvent.type === "string" ? rawEvent.type : "";
999
- if (!eventType) return args.firstTokenTime;
1000
-
1001
- const blocks = output.content;
1002
- const blockIndex = () => blocks.length - 1;
1003
- let firstTokenTime = args.firstTokenTime;
1199
+ if (!eventType) return firstTokenTime;
1004
1200
 
1005
1201
  if (eventType === "response.output_item.added") {
1202
+ resetWhitespaceToolCallArgumentsDelta(runtime);
1006
1203
  if (!firstTokenTime) firstTokenTime = Date.now();
1007
1204
  const item = rawEvent.item as CodexEventItem;
1008
1205
  runtime.currentItem = item;
@@ -1011,7 +1208,7 @@ function handleCodexStreamEvent(args: {
1011
1208
  output.content.push(runtime.currentBlock);
1012
1209
  stream.push({
1013
1210
  type: getOutputBlockStartEventType(runtime.currentBlock),
1014
- contentIndex: blockIndex(),
1211
+ contentIndex: output.content.length - 1,
1015
1212
  partial: output,
1016
1213
  });
1017
1214
  return firstTokenTime;
@@ -1023,12 +1220,12 @@ function handleCodexStreamEvent(args: {
1023
1220
  }
1024
1221
 
1025
1222
  if (eventType === "response.reasoning_summary_text.delta") {
1026
- handleReasoningSummaryTextDelta(runtime.currentItem, runtime.currentBlock, rawEvent, stream, output, blockIndex);
1223
+ handleReasoningSummaryTextDelta(runtime.currentItem, runtime.currentBlock, rawEvent, stream, output);
1027
1224
  return firstTokenTime;
1028
1225
  }
1029
1226
 
1030
1227
  if (eventType === "response.reasoning_summary_part.done") {
1031
- handleReasoningSummaryPartDone(runtime.currentItem, runtime.currentBlock, stream, output, blockIndex);
1228
+ handleReasoningSummaryPartDone(runtime.currentItem, runtime.currentBlock, stream, output);
1032
1229
  return firstTokenTime;
1033
1230
  }
1034
1231
 
@@ -1038,58 +1235,48 @@ function handleCodexStreamEvent(args: {
1038
1235
  }
1039
1236
 
1040
1237
  if (eventType === "response.output_text.delta") {
1041
- handleMessageTextDelta(
1042
- runtime.currentItem,
1043
- runtime.currentBlock,
1044
- rawEvent,
1045
- stream,
1046
- output,
1047
- blockIndex,
1048
- "output_text",
1049
- );
1238
+ handleMessageTextDelta(runtime.currentItem, runtime.currentBlock, rawEvent, stream, output, "output_text");
1050
1239
  return firstTokenTime;
1051
1240
  }
1052
1241
 
1053
1242
  if (eventType === "response.refusal.delta") {
1054
- handleMessageTextDelta(
1055
- runtime.currentItem,
1056
- runtime.currentBlock,
1057
- rawEvent,
1058
- stream,
1059
- output,
1060
- blockIndex,
1061
- "refusal",
1062
- );
1243
+ handleMessageTextDelta(runtime.currentItem, runtime.currentBlock, rawEvent, stream, output, "refusal");
1063
1244
  return firstTokenTime;
1064
1245
  }
1065
1246
 
1066
1247
  if (eventType === "response.function_call_arguments.delta") {
1067
- handleToolCallArgumentsDelta(runtime.currentItem, runtime.currentBlock, rawEvent, stream, output, blockIndex);
1248
+ const interruption = handleToolCallArgumentsDelta(runtime, rawEvent, stream, output);
1249
+ if (interruption) interruptWhitespaceToolCallArgumentsDelta(runtime, interruption);
1068
1250
  return firstTokenTime;
1069
1251
  }
1070
1252
 
1071
1253
  if (eventType === "response.function_call_arguments.done") {
1254
+ resetWhitespaceToolCallArgumentsDelta(runtime);
1072
1255
  handleToolCallArgumentsDone(runtime.currentItem, runtime.currentBlock, rawEvent);
1073
1256
  return firstTokenTime;
1074
1257
  }
1075
1258
 
1076
1259
  if (eventType === "response.custom_tool_call_input.delta") {
1077
- handleCustomToolCallInputDelta(runtime.currentItem, runtime.currentBlock, rawEvent, stream, output, blockIndex);
1260
+ const interruption = handleCustomToolCallInputDelta(runtime, rawEvent, stream, output);
1261
+ if (interruption) interruptWhitespaceToolCallArgumentsDelta(runtime, interruption);
1078
1262
  return firstTokenTime;
1079
1263
  }
1080
1264
 
1081
1265
  if (eventType === "response.custom_tool_call_input.done") {
1266
+ resetWhitespaceToolCallArgumentsDelta(runtime);
1082
1267
  handleCustomToolCallInputDone(runtime.currentItem, runtime.currentBlock, rawEvent);
1083
1268
  return firstTokenTime;
1084
1269
  }
1085
1270
 
1086
1271
  if (eventType === "response.output_item.done") {
1087
- handleOutputItemDone(model, output, stream, runtime, rawEvent, blockIndex);
1272
+ resetWhitespaceToolCallArgumentsDelta(runtime);
1273
+ handleOutputItemDone(model, output, stream, runtime, rawEvent);
1088
1274
  return firstTokenTime;
1089
1275
  }
1090
1276
 
1091
1277
  if (eventType === "response.created") {
1092
- return handleResponseCreated(runtime, rawEvent);
1278
+ handleResponseCreated(runtime, rawEvent);
1279
+ return firstTokenTime;
1093
1280
  }
1094
1281
 
1095
1282
  if (eventType === "response.completed" || eventType === "response.done" || eventType === "response.incomplete") {
@@ -1097,6 +1284,18 @@ function handleCodexStreamEvent(args: {
1097
1284
  return firstTokenTime;
1098
1285
  }
1099
1286
 
1287
+ if (eventType === "response.metadata") {
1288
+ const moderation = asRecord(rawEvent.metadata)?.[CODEX_MODERATION_METADATA_KEY];
1289
+ if (moderation !== undefined) {
1290
+ try {
1291
+ context.options?.onModerationMetadata?.(moderation);
1292
+ } catch {
1293
+ // Diagnostic observer: failures must not disturb the stream.
1294
+ }
1295
+ }
1296
+ return firstTokenTime;
1297
+ }
1298
+
1100
1299
  if (eventType === "error" || eventType === "response.failed") {
1101
1300
  throw createCodexProviderStreamError(rawEvent);
1102
1301
  }
@@ -1154,7 +1353,6 @@ function handleReasoningSummaryTextDelta(
1154
1353
  rawEvent: Record<string, unknown>,
1155
1354
  stream: AssistantMessageEventStream,
1156
1355
  output: AssistantMessage,
1157
- blockIndex: () => number,
1158
1356
  ): void {
1159
1357
  if (currentItem?.type !== "reasoning" || currentBlock?.type !== "thinking") return;
1160
1358
  currentItem.summary = currentItem.summary || [];
@@ -1163,7 +1361,7 @@ function handleReasoningSummaryTextDelta(
1163
1361
  const delta = (rawEvent as { delta?: string }).delta || "";
1164
1362
  currentBlock.thinking += delta;
1165
1363
  lastPart.text += delta;
1166
- stream.push({ type: "thinking_delta", contentIndex: blockIndex(), delta, partial: output });
1364
+ stream.push({ type: "thinking_delta", contentIndex: output.content.length - 1, delta, partial: output });
1167
1365
  }
1168
1366
 
1169
1367
  function handleReasoningSummaryPartDone(
@@ -1171,7 +1369,6 @@ function handleReasoningSummaryPartDone(
1171
1369
  currentBlock: CodexOutputBlock | null,
1172
1370
  stream: AssistantMessageEventStream,
1173
1371
  output: AssistantMessage,
1174
- blockIndex: () => number,
1175
1372
  ): void {
1176
1373
  if (currentItem?.type !== "reasoning" || currentBlock?.type !== "thinking") return;
1177
1374
  currentItem.summary = currentItem.summary || [];
@@ -1179,7 +1376,7 @@ function handleReasoningSummaryPartDone(
1179
1376
  if (!lastPart) return;
1180
1377
  currentBlock.thinking += "\n\n";
1181
1378
  lastPart.text += "\n\n";
1182
- stream.push({ type: "thinking_delta", contentIndex: blockIndex(), delta: "\n\n", partial: output });
1379
+ stream.push({ type: "thinking_delta", contentIndex: output.content.length - 1, delta: "\n\n", partial: output });
1183
1380
  }
1184
1381
 
1185
1382
  function handleContentPartAdded(currentItem: CodexEventItem | null, rawEvent: Record<string, unknown>): void {
@@ -1197,13 +1394,20 @@ function handleMessageTextDelta(
1197
1394
  rawEvent: Record<string, unknown>,
1198
1395
  stream: AssistantMessageEventStream,
1199
1396
  output: AssistantMessage,
1200
- blockIndex: () => number,
1201
1397
  partType: "output_text" | "refusal",
1202
1398
  ): void {
1203
1399
  if (currentItem?.type !== "message" || currentBlock?.type !== "text") return;
1204
- if (!currentItem.content || currentItem.content.length === 0) return;
1205
- const lastPart = currentItem.content[currentItem.content.length - 1];
1206
- if (!lastPart || lastPart.type !== partType) return;
1400
+ currentItem.content = currentItem.content || [];
1401
+ let lastPart = currentItem.content[currentItem.content.length - 1];
1402
+ if (lastPart?.type !== partType) {
1403
+ // `content_part.added` never arrived (lossy proxy) — synthesize the part
1404
+ // so live text still streams instead of freezing until output_item.done.
1405
+ lastPart =
1406
+ partType === "output_text"
1407
+ ? { type: "output_text", text: "", annotations: [] }
1408
+ : { type: "refusal", refusal: "" };
1409
+ currentItem.content.push(lastPart);
1410
+ }
1207
1411
  const delta = (rawEvent as { delta?: string }).delta || "";
1208
1412
  currentBlock.text += delta;
1209
1413
  if (lastPart.type === "output_text") {
@@ -1211,26 +1415,33 @@ function handleMessageTextDelta(
1211
1415
  } else {
1212
1416
  lastPart.refusal += delta;
1213
1417
  }
1214
- stream.push({ type: "text_delta", contentIndex: blockIndex(), delta, partial: output });
1418
+ stream.push({ type: "text_delta", contentIndex: output.content.length - 1, delta, partial: output });
1215
1419
  }
1216
1420
 
1217
1421
  function handleToolCallArgumentsDelta(
1218
- currentItem: CodexEventItem | null,
1219
- currentBlock: CodexOutputBlock | null,
1422
+ runtime: CodexStreamRuntime,
1220
1423
  rawEvent: Record<string, unknown>,
1221
1424
  stream: AssistantMessageEventStream,
1222
1425
  output: AssistantMessage,
1223
- blockIndex: () => number,
1224
- ): void {
1225
- if (currentItem?.type !== "function_call" || currentBlock?.type !== "toolCall") return;
1426
+ ): CodexWhitespaceToolCallArgumentsDeltaInterruption | undefined {
1226
1427
  const delta = (rawEvent as { delta?: string }).delta || "";
1428
+ // Observe BEFORE the item/block guard: degenerate whitespace frames can keep
1429
+ // arriving after the item closed (currentBlock detached) and still count as
1430
+ // progress for the idle watchdogs — dropping them unobserved would reopen
1431
+ // the infinite-loop hole the breaker exists for.
1432
+ const interruption = observeWhitespaceToolCallArgumentsDelta(runtime, rawEvent, delta);
1433
+ if (interruption) return interruption;
1434
+ const currentItem = runtime.currentItem;
1435
+ const currentBlock = runtime.currentBlock;
1436
+ if (currentItem?.type !== "function_call" || currentBlock?.type !== "toolCall") return undefined;
1227
1437
  currentBlock.partialJson += delta;
1228
1438
  const throttled = parseStreamingJsonThrottled(currentBlock.partialJson, currentBlock.lastParseLen ?? 0);
1229
1439
  if (throttled) {
1230
1440
  currentBlock.arguments = throttled.value;
1231
1441
  currentBlock.lastParseLen = throttled.parsedLen;
1232
1442
  }
1233
- stream.push({ type: "toolcall_delta", contentIndex: blockIndex(), delta, partial: output });
1443
+ stream.push({ type: "toolcall_delta", contentIndex: output.content.length - 1, delta, partial: output });
1444
+ return undefined;
1234
1445
  }
1235
1446
 
1236
1447
  function handleToolCallArgumentsDone(
@@ -1249,18 +1460,22 @@ function handleToolCallArgumentsDone(
1249
1460
  }
1250
1461
 
1251
1462
  function handleCustomToolCallInputDelta(
1252
- currentItem: CodexEventItem | null,
1253
- currentBlock: CodexOutputBlock | null,
1463
+ runtime: CodexStreamRuntime,
1254
1464
  rawEvent: Record<string, unknown>,
1255
1465
  stream: AssistantMessageEventStream,
1256
1466
  output: AssistantMessage,
1257
- blockIndex: () => number,
1258
- ): void {
1259
- if (currentItem?.type !== "custom_tool_call" || currentBlock?.type !== "toolCall") return;
1467
+ ): CodexWhitespaceToolCallArgumentsDeltaInterruption | undefined {
1260
1468
  const delta = (rawEvent as { delta?: string }).delta || "";
1469
+ // Observe BEFORE the item/block guard — see handleToolCallArgumentsDelta.
1470
+ const interruption = observeWhitespaceToolCallArgumentsDelta(runtime, rawEvent, delta);
1471
+ if (interruption) return interruption;
1472
+ const currentItem = runtime.currentItem;
1473
+ const currentBlock = runtime.currentBlock;
1474
+ if (currentItem?.type !== "custom_tool_call" || currentBlock?.type !== "toolCall") return undefined;
1261
1475
  currentBlock.partialJson += delta;
1262
- currentBlock.arguments = { input: currentBlock.partialJson };
1263
- stream.push({ type: "toolcall_delta", contentIndex: blockIndex(), delta, partial: output });
1476
+ (currentBlock.arguments as { input?: string }).input = currentBlock.partialJson;
1477
+ stream.push({ type: "toolcall_delta", contentIndex: output.content.length - 1, delta, partial: output });
1478
+ return undefined;
1264
1479
  }
1265
1480
 
1266
1481
  function handleCustomToolCallInputDone(
@@ -1282,9 +1497,10 @@ function handleOutputItemDone(
1282
1497
  stream: AssistantMessageEventStream,
1283
1498
  runtime: CodexStreamRuntime,
1284
1499
  rawEvent: Record<string, unknown>,
1285
- blockIndex: () => number,
1286
1500
  ): void {
1287
- const item = structuredCloneJSON(rawEvent.item) as CodexEventItem;
1501
+ const rawItem = rawEvent.item;
1502
+ if (!rawItem || typeof rawItem !== "object") return;
1503
+ const item = structuredCloneJSON(rawItem) as CodexEventItem;
1288
1504
  runtime.nativeOutputItems.push(item as unknown as Record<string, unknown>);
1289
1505
 
1290
1506
  if (item.type === "reasoning" && runtime.currentBlock?.type === "thinking") {
@@ -1292,7 +1508,7 @@ function handleOutputItemDone(
1292
1508
  runtime.currentBlock.thinkingSignature = JSON.stringify(item);
1293
1509
  stream.push({
1294
1510
  type: "thinking_end",
1295
- contentIndex: blockIndex(),
1511
+ contentIndex: output.content.length - 1,
1296
1512
  content: runtime.currentBlock.thinking,
1297
1513
  partial: output,
1298
1514
  });
@@ -1308,7 +1524,7 @@ function handleOutputItemDone(
1308
1524
  runtime.currentBlock.textSignature = encodeTextSignatureV1(item.id, phase);
1309
1525
  stream.push({
1310
1526
  type: "text_end",
1311
- contentIndex: blockIndex(),
1527
+ contentIndex: output.content.length - 1,
1312
1528
  content: runtime.currentBlock.text,
1313
1529
  partial: output,
1314
1530
  });
@@ -1329,9 +1545,12 @@ function handleOutputItemDone(
1329
1545
  runtime.currentBlock.arguments = toolCall.arguments;
1330
1546
  delete (runtime.currentBlock as { partialJson?: string }).partialJson;
1331
1547
  delete (runtime.currentBlock as { lastParseLen?: number }).lastParseLen;
1548
+ // Detach so a late/duplicate arguments.delta cannot append to the
1549
+ // finished block or trip the whitespace-loop guard against it.
1550
+ runtime.currentBlock = null;
1332
1551
  }
1333
1552
  runtime.canSafelyReplayWebsocketOverSse = false;
1334
- stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output });
1553
+ stream.push({ type: "toolcall_end", contentIndex: output.content.length - 1, toolCall, partial: output });
1335
1554
  return;
1336
1555
  }
1337
1556
 
@@ -1347,21 +1566,25 @@ function handleOutputItemDone(
1347
1566
  arguments: { input: rawInput },
1348
1567
  customWireName: item.name,
1349
1568
  };
1569
+ if (runtime.currentBlock?.type === "toolCall") {
1570
+ runtime.currentBlock.arguments = { input: rawInput };
1571
+ delete (runtime.currentBlock as { partialJson?: string }).partialJson;
1572
+ runtime.currentBlock = null;
1573
+ }
1350
1574
  runtime.canSafelyReplayWebsocketOverSse = false;
1351
- stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output });
1575
+ stream.push({ type: "toolcall_end", contentIndex: output.content.length - 1, toolCall, partial: output });
1352
1576
  return;
1353
1577
  }
1354
1578
 
1355
1579
  void model;
1356
1580
  }
1357
1581
 
1358
- function handleResponseCreated(runtime: CodexStreamRuntime, rawEvent: Record<string, unknown>): number | undefined {
1582
+ function handleResponseCreated(runtime: CodexStreamRuntime, rawEvent: Record<string, unknown>): void {
1359
1583
  const response = (rawEvent as { response?: { id?: string } }).response;
1360
1584
  const state = runtime.websocketState;
1361
- if (runtime.transport === "websocket" && state && typeof response?.id === "string" && response.id.length > 0) {
1585
+ if (state && runtime.transport === "websocket" && typeof response?.id === "string" && response.id.length > 0) {
1362
1586
  state.lastResponseId = response.id;
1363
1587
  }
1364
- return undefined;
1365
1588
  }
1366
1589
 
1367
1590
  function handleResponseCompleted(
@@ -1384,6 +1607,7 @@ function handleResponseCompleted(
1384
1607
  };
1385
1608
  status?: string;
1386
1609
  service_tier?: ServiceTier | "default";
1610
+ end_turn?: boolean;
1387
1611
  };
1388
1612
  }
1389
1613
  ).response;
@@ -1394,21 +1618,58 @@ function handleResponseCompleted(
1394
1618
  }
1395
1619
 
1396
1620
  const state = runtime.websocketState;
1397
- if (runtime.transport === "websocket" && state) {
1398
- state.lastRequest = structuredCloneJSON(runtime.requestBodyForState);
1399
- if (typeof response?.id === "string" && response.id.length > 0) {
1400
- state.lastResponseId = response.id;
1401
- state.lastResponseItems = stripInputItemIds(structuredCloneJSON(runtime.nativeOutputItems));
1621
+ if (state) {
1622
+ if (runtime.transport !== "websocket") {
1623
+ // SSE turns never chain (previous_response_id is websocket-only on this
1624
+ // endpoint); a completed SSE turn also invalidates any websocket append
1625
+ // baseline, which no longer matches the transcript.
1626
+ resetCodexWebSocketAppendState(state);
1627
+ } else {
1628
+ state.lastRequest = structuredCloneJSON(runtime.requestBodyForState);
1629
+ if (typeof response?.id === "string" && response.id.length > 0) {
1630
+ state.lastResponseId = response.id;
1631
+ state.lastResponseItems = stripInputItemIds(structuredCloneJSON(runtime.nativeOutputItems));
1632
+ state.canAppend = rawEvent.type === "response.done" || rawEvent.type === "response.completed";
1633
+ } else {
1634
+ // Without a response id the append baseline cannot be trusted.
1635
+ state.canAppend = false;
1636
+ }
1637
+ }
1638
+ }
1639
+
1640
+ // Finalize any toolCall block whose output_item.done never arrived: the
1641
+ // throttled delta parser may have left block.arguments stale, and the
1642
+ // toolUse promotion below would hand the agent incomplete arguments.
1643
+ // Mirrors the shared decoder's response.completed sweep; also strips the
1644
+ // transient partialJson/lastParseLen fields so they never persist.
1645
+ for (const block of output.content) {
1646
+ if (block.type !== "toolCall") continue;
1647
+ const pending = block as ToolCall & { partialJson?: string; lastParseLen?: number };
1648
+ if (pending.partialJson) {
1649
+ pending.arguments =
1650
+ pending.customWireName !== undefined
1651
+ ? { input: pending.partialJson }
1652
+ : parseStreamingJson(pending.partialJson);
1402
1653
  }
1403
- state.canAppend = rawEvent.type === "response.done" || rawEvent.type === "response.completed";
1654
+ delete pending.partialJson;
1655
+ delete pending.lastParseLen;
1404
1656
  }
1405
1657
 
1406
1658
  calculateCost(model, output.usage);
1407
1659
  applyCodexServiceTierPricing(model, output.usage, response?.service_tier, runtime.requestBodyForState.service_tier);
1408
- output.stopReason = mapOpenAIResponsesStopReason(response?.status as OpenAI.Responses.ResponseStatus | undefined);
1660
+ output.stopReason = mapOpenAIResponsesStopReason(response?.status as ResponseStatus | undefined);
1409
1661
  if (output.content.some(block => block.type === "toolCall") && output.stopReason === "stop") {
1410
1662
  output.stopReason = "toolUse";
1411
1663
  }
1664
+ // The Codex backend marks an unfinished turn with `end_turn: false` on the
1665
+ // terminal event: this response ended on commentary only and the model
1666
+ // expects to be sampled again (mirrors codex-rs `needs_follow_up`). Surface
1667
+ // it as a non-terminal stop so the agent loop replays history and
1668
+ // re-samples instead of ending the turn. Gated on "stop": with tool calls
1669
+ // present the loop continues through tool execution anyway.
1670
+ if (response?.end_turn === false && output.stopReason === "stop") {
1671
+ output.stopDetails = { type: "pause_turn" };
1672
+ }
1412
1673
  }
1413
1674
 
1414
1675
  async function recoverCodexStreamError(
@@ -1416,6 +1677,9 @@ async function recoverCodexStreamError(
1416
1677
  runtime: CodexStreamRuntime,
1417
1678
  error: unknown,
1418
1679
  ): Promise<boolean> {
1680
+ if (await tryRecoverCodexWhitespaceToolCallLoop(context, runtime, error)) {
1681
+ return true;
1682
+ }
1419
1683
  if (await tryReconnectCodexWebSocketOnConnectionLimit(context, runtime, error)) {
1420
1684
  return true;
1421
1685
  }
@@ -1431,11 +1695,90 @@ async function recoverCodexStreamError(
1431
1695
  return false;
1432
1696
  }
1433
1697
 
1698
+ /**
1699
+ * Pop the half-built degenerate tool-call block (the one whose arguments were
1700
+ * nothing but whitespace) off the output accumulator so it never surfaces in the
1701
+ * caller's message. Any legitimate content produced before it is preserved.
1702
+ */
1703
+ function dropTrailingDegenerateToolCall(output: AssistantMessage, runtime: CodexStreamRuntime): void {
1704
+ const block = runtime.currentBlock;
1705
+ if (block && block.type === "toolCall" && output.content[output.content.length - 1] === block) {
1706
+ output.content.pop();
1707
+ }
1708
+ runtime.currentItem = null;
1709
+ runtime.currentBlock = null;
1710
+ }
1711
+
1712
+ /**
1713
+ * Recover from the degenerate whitespace-only tool-call argument loop
1714
+ * ({@link CodexWhitespaceToolCallLoopError}). The interrupted function call has
1715
+ * no usable arguments, so drop the partial turn and replay the request from
1716
+ * scratch — bounded by {@link CODEX_WHITESPACE_LOOP_RETRY_LIMIT}. Sampling
1717
+ * nondeterminism usually breaks the loop on a fresh attempt; once the budget is
1718
+ * exhausted the original error is surfaced (now without the junk tool call
1719
+ * polluting the message). Replay is refused once a toolcall_end was already
1720
+ * delivered to the consumer (`canSafelyReplayWebsocketOverSse`) — it would
1721
+ * re-emit the same tool calls.
1722
+ */
1723
+ async function tryRecoverCodexWhitespaceToolCallLoop(
1724
+ context: CodexStreamProcessingContext,
1725
+ runtime: CodexStreamRuntime,
1726
+ error: unknown,
1727
+ ): Promise<boolean> {
1728
+ if (!(error instanceof CodexWhitespaceToolCallLoopError)) {
1729
+ return false;
1730
+ }
1731
+ // Drop the half-built degenerate tool call whether or not we retry, so it
1732
+ // never reaches the caller's message.
1733
+ dropTrailingDegenerateToolCall(context.output, runtime);
1734
+ if (
1735
+ runtime.whitespaceLoopRetries >= CODEX_WHITESPACE_LOOP_RETRY_LIMIT ||
1736
+ !runtime.canSafelyReplayWebsocketOverSse ||
1737
+ context.options?.signal?.aborted
1738
+ ) {
1739
+ return false;
1740
+ }
1741
+
1742
+ runtime.whitespaceLoopRetries += 1;
1743
+ const websocketState = context.requestContext.websocketState;
1744
+ if (websocketState) {
1745
+ resetCodexWebSocketAppendState(websocketState);
1746
+ resetCodexSessionMetadata(websocketState);
1747
+ }
1748
+
1749
+ logCodexDebug("retrying codex turn after whitespace-only tool-call argument loop", {
1750
+ retry: runtime.whitespaceLoopRetries,
1751
+ retryBudget: CODEX_WHITESPACE_LOOP_RETRY_LIMIT,
1752
+ transport: runtime.transport,
1753
+ });
1754
+
1755
+ runtime.currentItem = null;
1756
+ runtime.currentBlock = null;
1757
+ runtime.sawTerminalEvent = false;
1758
+ runtime.nativeOutputItems.length = 0;
1759
+ resetWhitespaceToolCallArgumentsDelta(runtime);
1760
+ resetOutputState(context.output);
1761
+ context.firstTokenTime = undefined;
1762
+ await scheduler.wait(CODEX_WHITESPACE_LOOP_RETRY_DELAY_MS * runtime.whitespaceLoopRetries, {
1763
+ signal: context.requestSetup.requestSignal,
1764
+ });
1765
+
1766
+ if (runtime.transport === "websocket" && websocketState) {
1767
+ await reopenCodexWebSocketRuntimeStream(context, runtime, websocketState);
1768
+ return true;
1769
+ }
1770
+
1771
+ await reopenCodexSseRuntimeStream(context, runtime, websocketState);
1772
+ return true;
1773
+ }
1774
+
1434
1775
  /**
1435
1776
  * Handles `websocket_connection_limit_reached` errors by closing the stale connection
1436
1777
  * and opening a fresh websocket. If content has already been emitted to the caller,
1437
1778
  * falls back to SSE replay (same as other WS failures) since we cannot safely
1438
- * continue a partial response on a new connection.
1779
+ * continue a partial response on a new connection. If a tool call was already
1780
+ * delivered (`canSafelyReplayWebsocketOverSse` is false), the error surfaces
1781
+ * instead — replaying would re-emit the same tool calls.
1439
1782
  */
1440
1783
  async function tryReconnectCodexWebSocketOnConnectionLimit(
1441
1784
  context: CodexStreamProcessingContext,
@@ -1455,6 +1798,12 @@ async function tryReconnectCodexWebSocketOnConnectionLimit(
1455
1798
  websocketState.connection = undefined;
1456
1799
  resetCodexWebSocketAppendState(websocketState);
1457
1800
 
1801
+ if (context.output.content.length > 0 && !runtime.canSafelyReplayWebsocketOverSse) {
1802
+ // A toolcall_end already reached the consumer; a full replay would emit
1803
+ // the same tool calls a second time. Let the error surface instead.
1804
+ return false;
1805
+ }
1806
+
1458
1807
  logCodexDebug("codex websocket connection limit reached, reconnecting", {
1459
1808
  hadContent: context.output.content.length > 0,
1460
1809
  retry: runtime.websocketStreamRetries,
@@ -1463,7 +1812,6 @@ async function tryReconnectCodexWebSocketOnConnectionLimit(
1463
1812
  if (context.output.content.length > 0) {
1464
1813
  // Content already emitted to the caller — cannot safely continue on a new WS.
1465
1814
  // Reset and replay the full request over SSE.
1466
- runtime.canSafelyReplayWebsocketOverSse = true;
1467
1815
  runtime.currentItem = null;
1468
1816
  runtime.currentBlock = null;
1469
1817
  runtime.nativeOutputItems.length = 0;
@@ -1474,14 +1822,40 @@ async function tryReconnectCodexWebSocketOnConnectionLimit(
1474
1822
  return true;
1475
1823
  }
1476
1824
 
1477
- // No content emitted yet — reconnect over websocket.
1825
+ // No content emitted yet — clear accumulator state from the failed attempt
1826
+ // (blockless native items can exist even with empty content) and reconnect
1827
+ // over websocket, bounded by the shared retry budget: an account-scoped
1828
+ // limit can reject every fresh connection, and an unbounded loop would
1829
+ // hammer the endpoint with zero backoff.
1830
+ runtime.currentItem = null;
1831
+ runtime.currentBlock = null;
1832
+ runtime.nativeOutputItems.length = 0;
1833
+ context.firstTokenTime = undefined;
1834
+ if (runtime.websocketStreamRetries >= getCodexWebSocketRetryBudget()) {
1835
+ recordCodexWebSocketFailure(websocketState, true);
1836
+ await reopenCodexSseRuntimeStream(context, runtime, websocketState);
1837
+ return true;
1838
+ }
1478
1839
  runtime.websocketStreamRetries += 1;
1840
+ await scheduler.wait(getCodexWebSocketRetryDelayMs(runtime.websocketStreamRetries), {
1841
+ signal: context.requestSetup.requestSignal,
1842
+ });
1479
1843
  await reopenCodexWebSocketRuntimeStream(context, runtime, websocketState);
1480
1844
  return true;
1481
1845
  }
1482
1846
 
1483
- function isCodexPreviousResponseNotFound(error: unknown): boolean {
1484
- return error instanceof CodexProviderStreamError && error.code === "previous_response_not_found";
1847
+ function isCodexStalePreviousResponseError(error: unknown): boolean {
1848
+ if (error instanceof CodexProviderStreamError) return error.code === "previous_response_not_found";
1849
+ if (!(error instanceof Error)) return false;
1850
+ if ((error as { code?: string }).code === "previous_response_not_found") return true;
1851
+ // "unsupported": the backend intermittently rejects the parameter outright
1852
+ // with `{"detail":"Unsupported parameter: previous_response_id"}` (no
1853
+ // `error.code`); treat it like a stale chain so the turn replays with full
1854
+ // context instead of surfacing the 400.
1855
+ return (
1856
+ /previous[ _]?response/i.test(error.message) &&
1857
+ /not[ _]?found|invalid|expired|stale|unsupported/i.test(error.message)
1858
+ );
1485
1859
  }
1486
1860
 
1487
1861
  async function tryRecoverCodexPreviousResponseNotFound(
@@ -1491,15 +1865,18 @@ async function tryRecoverCodexPreviousResponseNotFound(
1491
1865
  ): Promise<boolean> {
1492
1866
  const websocketState = context.requestContext.websocketState;
1493
1867
  if (
1494
- !isCodexPreviousResponseNotFound(error) ||
1868
+ !isCodexStalePreviousResponseError(error) ||
1495
1869
  !websocketState ||
1496
- runtime.transport !== "websocket" ||
1497
1870
  context.output.content.length > 0 ||
1498
1871
  context.options?.signal?.aborted ||
1499
1872
  runtime.providerRetryAttempt >= CODEX_MAX_RETRIES
1500
1873
  ) {
1501
1874
  return false;
1502
1875
  }
1876
+ if (runtime.transport !== "websocket") {
1877
+ // SSE never sends previous_response_id; let other recovery handle it.
1878
+ return false;
1879
+ }
1503
1880
 
1504
1881
  runtime.providerRetryAttempt += 1;
1505
1882
  resetCodexWebSocketAppendState(websocketState);
@@ -1551,6 +1928,13 @@ async function tryReplayWebsocketFailureOverSse(
1551
1928
 
1552
1929
  if (!activateFallback) {
1553
1930
  runtime.websocketStreamRetries += 1;
1931
+ // Full re-send on a fresh socket: clear accumulator state from the failed
1932
+ // attempt. Content is empty here, but blockless native items (e.g.
1933
+ // web_search_call) may already have accumulated.
1934
+ runtime.currentItem = null;
1935
+ runtime.currentBlock = null;
1936
+ runtime.nativeOutputItems.length = 0;
1937
+ context.firstTokenTime = undefined;
1554
1938
  await scheduler.wait(getCodexWebSocketRetryDelayMs(runtime.websocketStreamRetries), {
1555
1939
  signal: context.requestSetup.requestSignal,
1556
1940
  });
@@ -1558,14 +1942,11 @@ async function tryReplayWebsocketFailureOverSse(
1558
1942
  return true;
1559
1943
  }
1560
1944
 
1561
- if (replayingBufferedOutputOverSse) {
1562
- runtime.canSafelyReplayWebsocketOverSse = true;
1563
- runtime.currentItem = null;
1564
- runtime.currentBlock = null;
1565
- runtime.nativeOutputItems.length = 0;
1566
- resetOutputState(context.output);
1567
- context.firstTokenTime = undefined;
1568
- }
1945
+ runtime.currentItem = null;
1946
+ runtime.currentBlock = null;
1947
+ runtime.nativeOutputItems.length = 0;
1948
+ resetOutputState(context.output);
1949
+ context.firstTokenTime = undefined;
1569
1950
 
1570
1951
  await reopenCodexSseRuntimeStream(context, runtime, state);
1571
1952
  return true;
@@ -1587,7 +1968,7 @@ async function tryRetryCodexProviderError(
1587
1968
 
1588
1969
  runtime.providerRetryAttempt += 1;
1589
1970
  const websocketState = context.requestContext.websocketState;
1590
- if (runtime.transport === "websocket" && websocketState) {
1971
+ if (websocketState) {
1591
1972
  resetCodexWebSocketAppendState(websocketState);
1592
1973
  resetCodexSessionMetadata(websocketState);
1593
1974
  }
@@ -1602,6 +1983,7 @@ async function tryRetryCodexProviderError(
1602
1983
  runtime.currentItem = null;
1603
1984
  runtime.currentBlock = null;
1604
1985
  runtime.sawTerminalEvent = false;
1986
+ runtime.nativeOutputItems.length = 0;
1605
1987
  resetOutputState(context.output);
1606
1988
  context.firstTokenTime = undefined;
1607
1989
  await scheduler.wait(CODEX_RETRY_DELAY_MS * runtime.providerRetryAttempt, {
@@ -1627,7 +2009,7 @@ function finalizeCodexResponse(
1627
2009
  throw new Error("Request was aborted");
1628
2010
  }
1629
2011
  if (!runtime.sawTerminalEvent) {
1630
- if (runtime.transport === "websocket" && context.requestContext.websocketState) {
2012
+ if (context.requestContext.websocketState) {
1631
2013
  resetCodexWebSocketAppendState(context.requestContext.websocketState);
1632
2014
  resetCodexSessionMetadata(context.requestContext.websocketState);
1633
2015
  }
@@ -1684,9 +2066,10 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
1684
2066
  const output = createAssistantOutput(model);
1685
2067
  const requestSetup = createRequestSetup(options);
1686
2068
  let processingContext: CodexStreamProcessingContext | undefined;
2069
+ let requestContext: CodexRequestContext | undefined;
1687
2070
 
1688
2071
  try {
1689
- const requestContext = await buildCodexRequestContext(model, context, options, output);
2072
+ requestContext = await buildCodexRequestContext(model, context, options, output);
1690
2073
  const initialTransport = await openInitialCodexEventStream(model, options, requestSetup, requestContext);
1691
2074
  const runtime = createCodexStreamRuntime({
1692
2075
  ...initialTransport,
@@ -1720,12 +2103,13 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
1720
2103
  stream,
1721
2104
  options,
1722
2105
  requestSetup,
1723
- requestContext: {
2106
+ requestContext: requestContext ?? {
1724
2107
  apiKey: "",
1725
2108
  accountId: "",
1726
2109
  baseUrl: model.baseUrl || CODEX_BASE_URL,
1727
2110
  url: "",
1728
2111
  requestHeaders: {},
2112
+ responsesLite: options?.responsesLite === true,
1729
2113
  transformedBody: { model: model.id },
1730
2114
  rawRequestDump: {
1731
2115
  provider: model.provider,
@@ -1738,8 +2122,19 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
1738
2122
  },
1739
2123
  startTime,
1740
2124
  } satisfies CodexStreamProcessingContext);
1741
- const failure = await handleCodexStreamFailure(failureContext, error);
1742
- stream.push({ type: "error", reason: failure.stopReason as "error" | "aborted", error: failure });
2125
+ try {
2126
+ const failure = await handleCodexStreamFailure(failureContext, error);
2127
+ stream.push({ type: "error", reason: failure.stopReason as "error" | "aborted", error: failure });
2128
+ } catch (failureError) {
2129
+ // Last resort — the failure handler itself threw (exotic error object or
2130
+ // request-dump formatting). Never leave the stream un-ended.
2131
+ logger.error("Codex stream failure handler threw", {
2132
+ error: failureError instanceof Error ? failureError.message : String(failureError),
2133
+ });
2134
+ output.stopReason = "error";
2135
+ output.errorMessage ??= error instanceof Error ? error.message : String(error);
2136
+ stream.push({ type: "error", reason: "error", error: output });
2137
+ }
1743
2138
  stream.end();
1744
2139
  }
1745
2140
  })();
@@ -1751,7 +2146,7 @@ export async function prewarmOpenAICodexResponses(
1751
2146
  model: Model<"openai-codex-responses">,
1752
2147
  options?: Pick<
1753
2148
  OpenAICodexResponsesOptions,
1754
- "apiKey" | "headers" | "sessionId" | "signal" | "preferWebsockets" | "providerSessionState"
2149
+ "apiKey" | "headers" | "sessionId" | "signal" | "preferWebsockets" | "providerSessionState" | "responsesLite"
1755
2150
  >,
1756
2151
  ): Promise<void> {
1757
2152
  const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
@@ -1761,7 +2156,8 @@ export async function prewarmOpenAICodexResponses(
1761
2156
  const url = resolveCodexResponsesUrl(baseUrl);
1762
2157
  const promptCacheKey = normalizeOpenAIResponsesPromptCacheKey(options?.sessionId);
1763
2158
  const providerSessionState = getCodexProviderSessionState(options?.providerSessionState);
1764
- const sessionKey = getCodexWebSocketSessionKey(promptCacheKey, model, accountId, baseUrl);
2159
+ const responsesLite = options?.responsesLite === true;
2160
+ const sessionKey = getCodexWebSocketSessionKey(promptCacheKey, model, accountId, baseUrl, responsesLite);
1765
2161
  const publicSessionKey = getCodexPublicSessionKey(promptCacheKey, model, baseUrl);
1766
2162
  if (publicSessionKey && sessionKey) {
1767
2163
  providerSessionState?.webSocketPublicToPrivate.set(publicSessionKey, sessionKey);
@@ -1778,6 +2174,7 @@ export async function prewarmOpenAICodexResponses(
1778
2174
  promptCacheKey,
1779
2175
  "websocket",
1780
2176
  state,
2177
+ responsesLite,
1781
2178
  );
1782
2179
  await logger.time(
1783
2180
  "prewarmCodex:establishWs",
@@ -1807,10 +2204,14 @@ function getCodexWebSocketSessionKey(
1807
2204
  model: Model<"openai-codex-responses">,
1808
2205
  accountId: string,
1809
2206
  baseUrl: string,
2207
+ responsesLite: boolean,
1810
2208
  ): string | undefined {
1811
2209
  const promptCacheKey = normalizeOpenAIResponsesPromptCacheKey(sessionId);
1812
2210
  if (!promptCacheKey) return undefined;
1813
- return `${accountId}:${baseUrl}:${model.id}:${promptCacheKey}`;
2211
+ // Responses Lite is connection-scoped on the WebSocket upgrade, so lite and
2212
+ // non-lite turns must never share a pooled socket or append state.
2213
+ const liteSuffix = responsesLite ? ":lite" : "";
2214
+ return `${accountId}:${baseUrl}:${model.id}:${promptCacheKey}${liteSuffix}`;
1814
2215
  }
1815
2216
 
1816
2217
  function getCodexPublicSessionKey(
@@ -1854,13 +2255,18 @@ function resetCodexWebSocketAppendState(state: CodexWebSocketSessionState): void
1854
2255
  function resetCodexSessionMetadata(state: CodexWebSocketSessionState): void {
1855
2256
  state.turnState = undefined;
1856
2257
  state.modelsEtag = undefined;
1857
- state.reasoningIncluded = undefined;
1858
2258
  }
1859
2259
 
1860
2260
  function recordCodexWebSocketFailure(state: CodexWebSocketSessionState, activateFallback: boolean): void {
1861
2261
  resetCodexWebSocketAppendState(state);
1862
- state.connection?.close("fallback");
1863
- state.connection = undefined;
2262
+ // Never tear down a CONNECTING socket: it belongs to a concurrent caller's
2263
+ // in-flight handshake (prewarm/request race); closing it would reject that
2264
+ // caller with a fatal "websocket closed before open" and disable websockets
2265
+ // for the whole session.
2266
+ if (state.connection && !state.connection.isConnecting()) {
2267
+ state.connection.close("fallback");
2268
+ state.connection = undefined;
2269
+ }
1864
2270
  state.lastFallbackAt = Date.now();
1865
2271
  if (activateFallback && !state.disableWebsocket) {
1866
2272
  state.disableWebsocket = true;
@@ -1949,28 +2355,6 @@ export function getOpenAICodexTransportDetails(
1949
2355
  };
1950
2356
  }
1951
2357
 
1952
- function buildAppendInput(
1953
- previous: RequestBody | undefined,
1954
- previousResponseItems: InputItem[] | undefined,
1955
- current: RequestBody,
1956
- ): InputItem[] | null {
1957
- if (!previous) return null;
1958
- if (!Array.isArray(previous.input) || !Array.isArray(current.input)) return null;
1959
- const previousWithoutInput = { ...previous, input: undefined };
1960
- const currentWithoutInput = { ...current, input: undefined };
1961
- if (JSON.stringify(previousWithoutInput) !== JSON.stringify(currentWithoutInput)) {
1962
- return null;
1963
- }
1964
- const baseline = [...previous.input, ...(previousResponseItems ?? [])];
1965
- if (current.input.length <= baseline.length) return null;
1966
- for (let index = 0; index < baseline.length; index += 1) {
1967
- if (JSON.stringify(baseline[index]) !== JSON.stringify(current.input[index])) {
1968
- return null;
1969
- }
1970
- }
1971
- return current.input.slice(baseline.length) as InputItem[];
1972
- }
1973
-
1974
2358
  function stripInputItemIds(items: Array<Record<string, unknown>>): InputItem[] {
1975
2359
  return items.map(item => {
1976
2360
  if (item.id == null) return item as InputItem;
@@ -1997,37 +2381,40 @@ function recordCodexWebSocketRequestStats(
1997
2381
  state.stats.lastPreviousResponseId = undefined;
1998
2382
  }
1999
2383
 
2000
- function buildCodexWebSocketRequest(
2384
+ /**
2385
+ * Shape the next websocket turn's request body: when the session's append
2386
+ * baseline is intact (same options, strict history prefix), chain via
2387
+ * `previous_response_id` + delta-only `input`; otherwise break the chain and
2388
+ * replay the full transcript. SSE requests never chain — the HTTP endpoint's
2389
+ * request schema has no `previous_response_id` (codex-rs carries it only on
2390
+ * websocket `response.create` frames) and strict gateway validators 400 it
2391
+ * with `{"detail":"Unsupported parameter: previous_response_id"}`.
2392
+ */
2393
+ function buildCodexChainedRequestBody(
2001
2394
  requestBody: RequestBody,
2002
2395
  state: CodexWebSocketSessionState | undefined,
2003
- ): Record<string, unknown> {
2004
- const appendInput = state?.canAppend
2005
- ? buildAppendInput(state.lastRequest, state.lastResponseItems, requestBody)
2396
+ ): RequestBody {
2397
+ const chainable = state?.canAppend === true;
2398
+ const appendInput = chainable
2399
+ ? buildResponsesDeltaInput<InputItem>(state.lastRequest, state.lastResponseItems, requestBody)
2006
2400
  : null;
2007
2401
  if (appendInput && appendInput.length > 0 && state?.lastResponseId) {
2008
- const request = {
2009
- type: "response.create",
2010
- ...requestBody,
2011
- previous_response_id: state.lastResponseId,
2012
- input: appendInput,
2013
- };
2014
- recordCodexWebSocketRequestStats(state, request);
2015
- return request;
2016
- }
2017
- if (state?.canAppend) {
2018
- logCodexDebug("codex websocket append reset", {
2402
+ const body: RequestBody = { ...requestBody, previous_response_id: state.lastResponseId, input: appendInput };
2403
+ recordCodexWebSocketRequestStats(state, body);
2404
+ return body;
2405
+ }
2406
+ if (chainable && state) {
2407
+ // Chaining was eligible but the prefix/options check failed: history
2408
+ // mutated or options changed — break the chain.
2409
+ logCodexDebug("codex append reset", {
2019
2410
  hadTurnStateHeader: Boolean(state.turnState),
2020
2411
  hadModelsEtagHeader: Boolean(state.modelsEtag),
2021
2412
  });
2022
2413
  resetCodexWebSocketAppendState(state);
2023
2414
  resetCodexSessionMetadata(state);
2024
2415
  }
2025
- const request = {
2026
- type: "response.create",
2027
- ...requestBody,
2028
- };
2029
- recordCodexWebSocketRequestStats(state, request);
2030
- return request;
2416
+ recordCodexWebSocketRequestStats(state, requestBody);
2417
+ return requestBody;
2031
2418
  }
2032
2419
 
2033
2420
  function toWebSocketUrl(url: string): string {
@@ -2091,6 +2478,11 @@ class CodexWebSocketConnection {
2091
2478
  return this.#socket?.readyState === WebSocket.OPEN;
2092
2479
  }
2093
2480
 
2481
+ /** True while a handshake (possibly started by another caller) is still in flight. */
2482
+ isConnecting(): boolean {
2483
+ return this.#connectPromise !== undefined;
2484
+ }
2485
+
2094
2486
  /**
2095
2487
  * Stricter variant of {@link isOpen} for the connection-pool reuse gate.
2096
2488
  * Refuses sockets that have been silent past {@link CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS}.
@@ -2146,10 +2538,18 @@ class CodexWebSocketConnection {
2146
2538
  this.#socket = socket;
2147
2539
  let settled = false;
2148
2540
  let timeout: NodeJS.Timeout | undefined;
2541
+ const clearPending = () => {
2542
+ if (timeout !== undefined) {
2543
+ clearTimeout(timeout);
2544
+ timeout = undefined;
2545
+ }
2546
+ if (signal) signal.removeEventListener("abort", onAbort);
2547
+ };
2149
2548
  const onAbort = () => {
2150
2549
  socket.close(1000, "aborted");
2151
2550
  if (!settled) {
2152
2551
  settled = true;
2552
+ clearPending();
2153
2553
  reject(createCodexWebSocketTransportError("request was aborted"));
2154
2554
  }
2155
2555
  };
@@ -2160,17 +2560,16 @@ class CodexWebSocketConnection {
2160
2560
  signal.addEventListener("abort", onAbort, { once: true });
2161
2561
  }
2162
2562
  }
2163
- const clearPending = () => {
2164
- if (timeout) clearTimeout(timeout);
2165
- if (signal) signal.removeEventListener("abort", onAbort);
2166
- };
2167
- timeout = setTimeout(() => {
2168
- socket.close(1000, "connect-timeout");
2169
- if (!settled) {
2170
- settled = true;
2171
- reject(createCodexWebSocketTransportError("connection timeout"));
2172
- }
2173
- }, CODEX_WEBSOCKET_CONNECT_TIMEOUT_MS);
2563
+ if (!settled) {
2564
+ timeout = setTimeout(() => {
2565
+ socket.close(1000, "connect-timeout");
2566
+ if (!settled) {
2567
+ settled = true;
2568
+ clearPending();
2569
+ reject(createCodexWebSocketTransportError("connection timeout"));
2570
+ }
2571
+ }, CODEX_WEBSOCKET_CONNECT_TIMEOUT_MS);
2572
+ }
2174
2573
 
2175
2574
  socket.onopen = event => {
2176
2575
  if (!settled) {
@@ -2256,6 +2655,9 @@ class CodexWebSocketConnection {
2256
2655
  if (this.#activeRequest) {
2257
2656
  throw createCodexWebSocketTransportError("websocket request already in progress");
2258
2657
  }
2658
+ if (signal?.aborted) {
2659
+ throw createCodexWebSocketTransportError("request was aborted");
2660
+ }
2259
2661
  this.#activeRequest = true;
2260
2662
  this.#streamObserver = onSseEvent;
2261
2663
  // Drain any non-error frames left over from a prior request before sending.
@@ -2273,13 +2675,7 @@ class CodexWebSocketConnection {
2273
2675
  this.close("aborted");
2274
2676
  this.#push(createCodexWebSocketTransportError("request was aborted"));
2275
2677
  };
2276
- if (signal) {
2277
- if (signal.aborted) {
2278
- onAbort();
2279
- } else {
2280
- signal.addEventListener("abort", onAbort, { once: true });
2281
- }
2282
- }
2678
+ if (signal) signal.addEventListener("abort", onAbort, { once: true });
2283
2679
 
2284
2680
  try {
2285
2681
  const debugSession = isRequestDebugEnabled()
@@ -2297,8 +2693,13 @@ class CodexWebSocketConnection {
2297
2693
 
2298
2694
  const requestPayload = JSON.stringify(request);
2299
2695
  notifyCodexWebSocketOutbound(onSseEvent, request, requestPayload);
2696
+ // Re-check liveness: the debug-session await above can outlive the socket.
2697
+ const socket = this.#socket;
2698
+ if (!socket || socket.readyState !== WebSocket.OPEN) {
2699
+ throw createCodexWebSocketTransportError("websocket connection is unavailable");
2700
+ }
2300
2701
  try {
2301
- this.#socket.send(requestPayload);
2702
+ socket.send(requestPayload);
2302
2703
  } catch (error) {
2303
2704
  throw createCodexWebSocketTransportError(
2304
2705
  `websocket send failed: ${error instanceof Error ? error.message : String(error)}`,
@@ -2517,9 +2918,11 @@ class CodexWebSocketConnection {
2517
2918
 
2518
2919
  #push(item: Record<string, unknown> | Error | null): void {
2519
2920
  if (item instanceof Error) {
2520
- if (!(this.#queue[0] instanceof Error)) {
2521
- this.#queue.length = 0;
2522
- }
2921
+ // Append after frames already received instead of wiping them: a queued
2922
+ // terminal event (e.g. `response.completed` followed by an eager server
2923
+ // close) must still reach the consumer rather than morph into a spurious
2924
+ // transport failure. `#dropStaleFrames` keeps errors across requests, so
2925
+ // the death signal still surfaces if the data frames go unconsumed.
2523
2926
  this.#queue.push(item);
2524
2927
  this.#wakeWaiters();
2525
2928
  return;
@@ -2574,6 +2977,22 @@ async function getOrCreateCodexWebSocketConnection(
2574
2977
  signal?: AbortSignal,
2575
2978
  ): Promise<CodexWebSocketConnection> {
2576
2979
  const headerRecord = headersToRecord(headers);
2980
+ // Join an in-flight handshake instead of tearing it down: closing a
2981
+ // CONNECTING socket rejects the concurrent caller (prewarm racing the first
2982
+ // request) with a fatal "websocket closed before open", which would disable
2983
+ // websockets for the entire session.
2984
+ // Bounded re-join: a fresh handshake may have been started by yet another
2985
+ // caller while we awaited the previous one.
2986
+ for (let joinAttempt = 0; joinAttempt < 3; joinAttempt += 1) {
2987
+ const pending = state.connection;
2988
+ if (!pending || pending.isOpen() || !pending.isConnecting()) break;
2989
+ try {
2990
+ await pending.connect(signal);
2991
+ } catch {
2992
+ // The handshake owner surfaces its own failure; re-evaluate below
2993
+ // (state.connection may have been replaced or cleared).
2994
+ }
2995
+ }
2577
2996
  if (state.connection?.isOpen()) {
2578
2997
  if (!state.connection.matchesAuth(headerRecord)) {
2579
2998
  state.connection.close("token-refresh");
@@ -2612,11 +3031,13 @@ async function openCodexSseEventStream(
2612
3031
  sessionId: string | undefined,
2613
3032
  body: RequestBody,
2614
3033
  state: CodexWebSocketSessionState | undefined,
2615
- signal?: AbortSignal,
3034
+ responsesLite: boolean,
3035
+ signal: AbortSignal | undefined,
3036
+ firstEventTimeoutMs: number | undefined,
2616
3037
  onSseEvent?: OpenAICodexResponsesOptions["onSseEvent"],
2617
3038
  fetchOverride?: FetchImpl,
2618
3039
  ): Promise<AsyncGenerator<Record<string, unknown>>> {
2619
- const headers = createCodexHeaders(requestHeaders, accountId, apiKey, sessionId, "sse", state);
3040
+ const headers = createCodexHeaders(requestHeaders, accountId, apiKey, sessionId, "sse", state, responsesLite);
2620
3041
  logCodexDebug("codex request", {
2621
3042
  url,
2622
3043
  model: body.model,
@@ -2624,15 +3045,31 @@ async function openCodexSseEventStream(
2624
3045
  sentTurnStateHeader: headers.has(X_CODEX_TURN_STATE_HEADER),
2625
3046
  sentModelsEtagHeader: headers.has(X_MODELS_ETAG_HEADER),
2626
3047
  });
3048
+ // `wrapCodexSseStream` arms a first-event watchdog only after this fetch
3049
+ // resolves (it wraps the SSE generator). With `timeout: false` disabling
3050
+ // Bun's native 300s ceiling, a stalled pre-response request needs its own
3051
+ // watchdog — combine the caller signal with a fresh
3052
+ // `AbortSignal.timeout(firstEventTimeoutMs)` so headers must arrive
3053
+ // within the configured budget (issue #2422).
3054
+ const preResponseWatchdog =
3055
+ firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0
3056
+ ? AbortSignal.timeout(firstEventTimeoutMs)
3057
+ : undefined;
3058
+ const fetchSignal = preResponseWatchdog
3059
+ ? signal
3060
+ ? AbortSignal.any([signal, preResponseWatchdog])
3061
+ : preResponseWatchdog
3062
+ : signal;
2627
3063
  const response = await fetchWithRetry(url, {
2628
3064
  method: "POST",
2629
3065
  headers,
2630
3066
  body: JSON.stringify(body),
2631
- signal,
3067
+ signal: fetchSignal,
2632
3068
  maxAttempts: CODEX_MAX_RETRIES + 1,
2633
3069
  defaultDelayMs: attempt => CODEX_RETRY_DELAY_MS * (attempt + 1),
2634
3070
  maxDelayMs: CODEX_RATE_LIMIT_BUDGET_MS,
2635
3071
  fetch: fetchOverride,
3072
+ timeout: false,
2636
3073
  });
2637
3074
  logCodexDebug("codex response", {
2638
3075
  url: response.url,
@@ -2641,14 +3078,10 @@ async function openCodexSseEventStream(
2641
3078
  contentType: response.headers.get("content-type") || null,
2642
3079
  cfRay: response.headers.get("cf-ray") || null,
2643
3080
  });
2644
- updateCodexSessionMetadataFromHeaders(state, response.headers);
2645
3081
  if (!response.ok) {
2646
- const info = await parseCodexError(response);
2647
- const error = new Error(info.friendlyMessage || info.message);
2648
- (error as { headers?: Headers; status?: number }).headers = response.headers;
2649
- (error as { headers?: Headers; status?: number }).status = response.status;
2650
- throw error;
3082
+ throw await CodexApiError.fromResponse(response);
2651
3083
  }
3084
+ updateCodexSessionMetadataFromHeaders(state, response.headers);
2652
3085
  if (!response.body) {
2653
3086
  throw new Error("No response body");
2654
3087
  }
@@ -2677,6 +3110,7 @@ function createCodexHeaders(
2677
3110
  sessionId?: string,
2678
3111
  transport: CodexTransport = "sse",
2679
3112
  state?: CodexWebSocketSessionState,
3113
+ responsesLite = false,
2680
3114
  ): Headers {
2681
3115
  const headers = new Headers(initHeaders ?? {});
2682
3116
  headers.delete("x-api-key");
@@ -2698,6 +3132,7 @@ function createCodexHeaders(
2698
3132
  } else {
2699
3133
  headers.delete(OPENAI_HEADERS.CONVERSATION_ID);
2700
3134
  headers.delete(OPENAI_HEADERS.SESSION_ID);
3135
+ headers.delete("x-client-request-id");
2701
3136
  }
2702
3137
  if (state?.turnState) {
2703
3138
  headers.set(X_CODEX_TURN_STATE_HEADER, state.turnState);
@@ -2709,6 +3144,11 @@ function createCodexHeaders(
2709
3144
  } else {
2710
3145
  headers.delete(X_MODELS_ETAG_HEADER);
2711
3146
  }
3147
+ if (responsesLite) {
3148
+ headers.set(X_OPENAI_INTERNAL_CODEX_RESPONSES_LITE_HEADER, "true");
3149
+ } else {
3150
+ headers.delete(X_OPENAI_INTERNAL_CODEX_RESPONSES_LITE_HEADER);
3151
+ }
2712
3152
  if (transport === "sse") {
2713
3153
  headers.set("accept", "text/event-stream");
2714
3154
  headers.set("content-type", "application/json");
@@ -2736,6 +3176,7 @@ function redactHeaders(headers: Headers): Record<string, string> {
2736
3176
  lower.includes("account") ||
2737
3177
  lower.includes("session") ||
2738
3178
  lower.includes("conversation") ||
3179
+ lower === "x-client-request-id" ||
2739
3180
  lower === "cookie"
2740
3181
  ) {
2741
3182
  redacted[key] = "[redacted]";
@@ -2815,11 +3256,13 @@ function convertMessages(model: Model<"openai-codex-responses">, context: Contex
2815
3256
 
2816
3257
  if (msg.role === "assistant") {
2817
3258
  const assistantMsg = msg as AssistantMessage;
2818
- const providerPayload = getOpenAIResponsesHistoryPayload(
2819
- assistantMsg.providerPayload,
2820
- model.provider,
2821
- assistantMsg.provider,
2822
- );
3259
+ // Native items are model-bound (reasoning carries encrypted content
3260
+ // minted by the producing model); after a mid-session model switch fall
3261
+ // back to block re-encode, which strips foreign signatures.
3262
+ const providerPayload =
3263
+ assistantMsg.api === model.api && assistantMsg.model === model.id
3264
+ ? getOpenAIResponsesHistoryPayload(assistantMsg.providerPayload, model.provider, assistantMsg.provider)
3265
+ : undefined;
2823
3266
  const historyItems = providerPayload?.items as Array<ResponseInput[number]> | undefined;
2824
3267
  if (historyItems) {
2825
3268
  for (const item of historyItems) {
@@ -2941,6 +3384,13 @@ function getString(value: unknown): string | undefined {
2941
3384
  return typeof value === "string" ? value : undefined;
2942
3385
  }
2943
3386
 
3387
+ class CodexWhitespaceToolCallLoopError extends Error {
3388
+ constructor(message: string) {
3389
+ super(message);
3390
+ this.name = "CodexWhitespaceToolCallLoopError";
3391
+ }
3392
+ }
3393
+
2944
3394
  class CodexProviderStreamError extends Error {
2945
3395
  readonly retryable: boolean;
2946
3396
  readonly code?: string;
@@ -2965,7 +3415,9 @@ function isRetryableCodexFailureEvent(rawEvent: Record<string, unknown>): boolea
2965
3415
  }
2966
3416
 
2967
3417
  function createCodexProviderStreamError(rawEvent: Record<string, unknown>): CodexProviderStreamError {
2968
- const code = getString(rawEvent.code) ?? "";
3418
+ const response = asRecord(rawEvent.response);
3419
+ const nestedError = asRecord(rawEvent.error) ?? (response ? asRecord(response.error) : null);
3420
+ const code = getString(rawEvent.code) ?? getString(nestedError?.code) ?? getString(nestedError?.type) ?? "";
2969
3421
  const message = getString(rawEvent.message) ?? "";
2970
3422
  const formattedMessage =
2971
3423
  typeof rawEvent.type === "string" && rawEvent.type === "error"