@prometheus-ai/ai 0.5.4 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/dist/types/auth-broker/remote-store.d.ts +2 -1
  2. package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
  3. package/dist/types/auth-gateway/server.d.ts +19 -0
  4. package/dist/types/auth-gateway/types.d.ts +9 -3
  5. package/dist/types/auth-retry.d.ts +119 -0
  6. package/dist/types/auth-storage.d.ts +217 -8
  7. package/dist/types/errors.d.ts +24 -0
  8. package/dist/types/index.d.ts +5 -9
  9. package/dist/types/provider-details.d.ts +1 -1
  10. package/dist/types/providers/amazon-bedrock.d.ts +12 -6
  11. package/dist/types/providers/anthropic-client.d.ts +10 -3
  12. package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
  13. package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
  14. package/dist/types/providers/anthropic-wire.d.ts +3 -3
  15. package/dist/types/providers/anthropic.d.ts +41 -34
  16. package/dist/types/providers/aws-credentials.d.ts +8 -0
  17. package/dist/types/providers/azure-openai-responses.d.ts +1 -0
  18. package/dist/types/providers/google-gemini-cli.d.ts +22 -1
  19. package/dist/types/providers/google-shared.d.ts +22 -0
  20. package/dist/types/providers/google-types.d.ts +13 -1
  21. package/dist/types/providers/mock.d.ts +8 -3
  22. package/dist/types/providers/ollama.d.ts +6 -0
  23. package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
  24. package/dist/types/providers/openai-chat-server.d.ts +3 -3
  25. package/dist/types/providers/openai-chat-wire.d.ts +644 -0
  26. package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
  27. package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
  28. package/dist/types/providers/openai-codex-responses.d.ts +31 -2
  29. package/dist/types/providers/openai-completions-compat.d.ts +2 -25
  30. package/dist/types/providers/openai-completions.d.ts +2 -10
  31. package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
  32. package/dist/types/providers/openai-responses-server.d.ts +2 -2
  33. package/dist/types/providers/openai-responses-shared.d.ts +49 -9
  34. package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
  35. package/dist/types/providers/openai-responses.d.ts +13 -4
  36. package/dist/types/providers/prometheus-native-client.d.ts +9 -0
  37. package/dist/types/providers/prometheus-native-server.d.ts +4 -3
  38. package/dist/types/providers/transform-messages.d.ts +1 -2
  39. package/dist/types/rate-limit-utils.d.ts +3 -2
  40. package/dist/types/registry/aimlapi.d.ts +4 -0
  41. package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
  42. package/dist/types/registry/amazon-bedrock.d.ts +5 -0
  43. package/dist/types/registry/anthropic.d.ts +10 -0
  44. package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
  45. package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
  46. package/dist/types/registry/cerebras.d.ts +7 -0
  47. package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
  48. package/dist/types/registry/cursor.d.ts +7 -0
  49. package/dist/types/registry/deepseek.d.ts +8 -0
  50. package/dist/types/registry/derived.d.ts +5 -0
  51. package/dist/types/registry/firepass.d.ts +16 -0
  52. package/dist/types/registry/fireworks.d.ts +7 -0
  53. package/dist/types/registry/github-copilot.d.ts +7 -0
  54. package/dist/types/registry/gitlab-duo.d.ts +9 -0
  55. package/dist/types/registry/google-antigravity.d.ts +9 -0
  56. package/dist/types/registry/google-gemini-cli.d.ts +9 -0
  57. package/dist/types/registry/google-vertex.d.ts +5 -0
  58. package/dist/types/registry/google.d.ts +4 -0
  59. package/dist/types/registry/groq.d.ts +4 -0
  60. package/dist/types/registry/huggingface.d.ts +7 -0
  61. package/dist/types/registry/index.d.ts +4 -0
  62. package/dist/types/registry/kagi.d.ts +14 -0
  63. package/dist/types/registry/kilo.d.ts +7 -0
  64. package/dist/types/registry/kimi-code.d.ts +7 -0
  65. package/dist/types/registry/litellm.d.ts +13 -0
  66. package/dist/types/registry/lm-studio.d.ts +8 -0
  67. package/dist/types/registry/minimax-code-cn.d.ts +6 -0
  68. package/dist/types/registry/minimax-code.d.ts +6 -0
  69. package/dist/types/registry/minimax.d.ts +4 -0
  70. package/dist/types/registry/mistral.d.ts +4 -0
  71. package/dist/types/registry/moonshot.d.ts +7 -0
  72. package/dist/types/registry/nanogpt.d.ts +7 -0
  73. package/dist/types/registry/nvidia.d.ts +7 -0
  74. package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
  75. package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
  76. package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
  77. package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
  78. package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
  79. package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
  80. package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
  81. package/dist/types/registry/ollama-cloud.d.ts +7 -0
  82. package/dist/types/registry/ollama.d.ts +12 -0
  83. package/dist/types/registry/openai-codex-device.d.ts +8 -0
  84. package/dist/types/registry/openai-codex.d.ts +9 -0
  85. package/dist/types/registry/openai.d.ts +4 -0
  86. package/dist/types/registry/opencode-go.d.ts +6 -0
  87. package/dist/types/registry/opencode-zen.d.ts +6 -0
  88. package/dist/types/registry/openrouter.d.ts +13 -0
  89. package/dist/types/registry/parallel.d.ts +14 -0
  90. package/dist/types/registry/perplexity.d.ts +7 -0
  91. package/dist/types/registry/qianfan.d.ts +7 -0
  92. package/dist/types/registry/qwen-portal.d.ts +7 -0
  93. package/dist/types/registry/registry.d.ts +272 -0
  94. package/dist/types/registry/synthetic.d.ts +6 -0
  95. package/dist/types/registry/tavily.d.ts +14 -0
  96. package/dist/types/registry/together.d.ts +6 -0
  97. package/dist/types/registry/types.d.ts +51 -0
  98. package/dist/types/registry/venice.d.ts +13 -0
  99. package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
  100. package/dist/types/registry/vllm.d.ts +7 -0
  101. package/dist/types/registry/wafer-pass.d.ts +6 -0
  102. package/dist/types/registry/wafer-serverless.d.ts +6 -0
  103. package/dist/types/registry/xai-oauth.d.ts +7 -0
  104. package/dist/types/registry/xai.d.ts +4 -0
  105. package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
  106. package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
  107. package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
  108. package/dist/types/registry/xiaomi.d.ts +6 -0
  109. package/dist/types/registry/zai.d.ts +7 -0
  110. package/dist/types/registry/zenmux.d.ts +7 -0
  111. package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
  112. package/dist/types/stream.d.ts +9 -1
  113. package/dist/types/types.d.ts +56 -295
  114. package/dist/types/usage/google-antigravity.d.ts +15 -1
  115. package/dist/types/usage/openai-codex-reset.d.ts +79 -0
  116. package/dist/types/usage/openai-codex.d.ts +1 -0
  117. package/dist/types/usage.d.ts +77 -4
  118. package/dist/types/utils/abort.d.ts +6 -0
  119. package/dist/types/utils/event-stream.d.ts +2 -0
  120. package/dist/types/utils/http-inspector.d.ts +0 -1
  121. package/dist/types/utils/idle-iterator.d.ts +35 -0
  122. package/dist/types/utils/openai-http.d.ts +58 -0
  123. package/dist/types/utils/request-debug.d.ts +3 -0
  124. package/dist/types/utils/retry-after.d.ts +1 -0
  125. package/dist/types/utils/schema/fields.d.ts +5 -0
  126. package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
  127. package/dist/types/utils/schema/stamps.d.ts +7 -15
  128. package/dist/types/utils/sse-debug.d.ts +0 -5
  129. package/dist/types/utils/stream-markup-healing.d.ts +2 -0
  130. package/dist/types/utils.d.ts +1 -5
  131. package/package.json +17 -29
  132. package/src/auth-broker/remote-store.ts +10 -1
  133. package/src/auth-broker/snapshot-cache.ts +1 -1
  134. package/src/auth-broker/wire-schemas.ts +1 -1
  135. package/src/auth-gateway/http.ts +1 -1
  136. package/src/auth-gateway/server.ts +95 -30
  137. package/src/auth-gateway/types.ts +10 -2
  138. package/src/auth-retry.ts +238 -0
  139. package/src/auth-storage.ts +935 -430
  140. package/src/errors.ts +32 -0
  141. package/src/index.ts +9 -14
  142. package/src/provider-details.ts +1 -1
  143. package/src/providers/__tests__/google-auth.test.ts +144 -0
  144. package/src/providers/amazon-bedrock.ts +70 -40
  145. package/src/providers/anthropic-client.ts +15 -13
  146. package/src/providers/anthropic-messages-server-schema.ts +17 -7
  147. package/src/providers/anthropic-messages-server.ts +88 -20
  148. package/src/providers/anthropic-wire.ts +4 -3
  149. package/src/providers/anthropic.ts +1234 -621
  150. package/src/providers/aws-credentials.ts +47 -5
  151. package/src/providers/aws-eventstream.ts +5 -0
  152. package/src/providers/azure-openai-responses.ts +117 -67
  153. package/src/providers/cursor.ts +30 -30
  154. package/src/providers/github-copilot-headers.ts +1 -1
  155. package/src/providers/gitlab-duo.ts +36 -29
  156. package/src/providers/google-auth.ts +71 -8
  157. package/src/providers/google-gemini-cli.ts +118 -22
  158. package/src/providers/google-shared.ts +163 -43
  159. package/src/providers/google-types.ts +10 -1
  160. package/src/providers/kimi.ts +1 -1
  161. package/src/providers/mock.ts +11 -3
  162. package/src/providers/ollama.ts +64 -7
  163. package/src/providers/openai-anthropic-shim.ts +17 -8
  164. package/src/providers/openai-chat-server-schema.ts +9 -3
  165. package/src/providers/openai-chat-server.ts +82 -16
  166. package/src/providers/openai-chat-wire.ts +847 -0
  167. package/src/providers/openai-codex/request-transformer.ts +129 -34
  168. package/src/providers/openai-codex/response-handler.ts +22 -1
  169. package/src/providers/openai-codex-responses.ts +699 -247
  170. package/src/providers/openai-completions-compat.ts +8 -308
  171. package/src/providers/openai-completions.ts +416 -267
  172. package/src/providers/openai-responses-server-schema.ts +15 -9
  173. package/src/providers/openai-responses-server.ts +162 -114
  174. package/src/providers/openai-responses-shared.ts +320 -82
  175. package/src/providers/openai-responses-wire.ts +6391 -0
  176. package/src/providers/openai-responses.ts +382 -176
  177. package/src/providers/prometheus-native-client.ts +27 -11
  178. package/src/providers/prometheus-native-server.ts +44 -17
  179. package/src/providers/transform-messages.ts +311 -120
  180. package/src/providers/vision-guard.ts +5 -3
  181. package/src/rate-limit-utils.ts +13 -3
  182. package/src/registry/aimlapi.ts +6 -0
  183. package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
  184. package/src/registry/amazon-bedrock.ts +22 -0
  185. package/src/registry/anthropic.ts +26 -0
  186. package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
  187. package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
  188. package/src/{utils/oauth → registry}/cerebras.ts +8 -1
  189. package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
  190. package/src/registry/cursor.ts +20 -0
  191. package/src/{utils/oauth → registry}/deepseek.ts +9 -17
  192. package/src/registry/derived.ts +9 -0
  193. package/src/{utils/oauth → registry}/firepass.ts +10 -2
  194. package/src/{utils/oauth → registry}/fireworks.ts +8 -1
  195. package/src/registry/github-copilot.ts +22 -0
  196. package/src/registry/gitlab-duo.ts +19 -0
  197. package/src/registry/google-antigravity.ts +21 -0
  198. package/src/registry/google-gemini-cli.ts +21 -0
  199. package/src/registry/google-vertex.ts +38 -0
  200. package/src/registry/google.ts +6 -0
  201. package/src/registry/groq.ts +6 -0
  202. package/src/{utils/oauth → registry}/huggingface.ts +8 -19
  203. package/src/registry/index.ts +4 -0
  204. package/src/{utils/oauth → registry}/kagi.ts +9 -11
  205. package/src/{utils/oauth → registry}/kilo.ts +11 -6
  206. package/src/registry/kimi-code.ts +17 -0
  207. package/src/{utils/oauth → registry}/litellm.ts +8 -12
  208. package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
  209. package/src/registry/minimax-code-cn.ts +12 -0
  210. package/src/registry/minimax-code.ts +12 -0
  211. package/src/registry/minimax.ts +6 -0
  212. package/src/registry/mistral.ts +6 -0
  213. package/src/{utils/oauth → registry}/moonshot.ts +8 -9
  214. package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
  215. package/src/{utils/oauth → registry}/nvidia.ts +8 -18
  216. package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
  217. package/src/{utils → registry}/oauth/anthropic.ts +38 -17
  218. package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
  219. package/src/registry/oauth/gitlab-duo.ts +198 -0
  220. package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
  221. package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
  222. package/src/registry/oauth/index.ts +164 -0
  223. package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
  224. package/src/{utils → registry}/oauth/types.ts +7 -51
  225. package/src/{utils → registry}/oauth/wafer.ts +1 -1
  226. package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
  227. package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
  228. package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
  229. package/src/{utils/oauth → registry}/ollama.ts +8 -13
  230. package/src/registry/openai-codex-device.ts +18 -0
  231. package/src/registry/openai-codex.ts +19 -0
  232. package/src/registry/openai.ts +6 -0
  233. package/src/registry/opencode-go.ts +12 -0
  234. package/src/registry/opencode-zen.ts +12 -0
  235. package/src/{utils/oauth → registry}/openrouter.ts +10 -2
  236. package/src/{utils/oauth → registry}/parallel.ts +9 -11
  237. package/src/registry/perplexity.ts +13 -0
  238. package/src/{utils/oauth → registry}/qianfan.ts +8 -17
  239. package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
  240. package/src/registry/registry.ts +149 -0
  241. package/src/{utils/oauth → registry}/synthetic.ts +7 -1
  242. package/src/{utils/oauth → registry}/tavily.ts +10 -12
  243. package/src/{utils/oauth → registry}/together.ts +7 -1
  244. package/src/registry/types.ts +56 -0
  245. package/src/{utils/oauth → registry}/venice.ts +8 -12
  246. package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
  247. package/src/{utils/oauth → registry}/vllm.ts +9 -16
  248. package/src/registry/wafer-pass.ts +12 -0
  249. package/src/registry/wafer-serverless.ts +12 -0
  250. package/src/registry/xai-oauth.ts +17 -0
  251. package/src/registry/xai.ts +6 -0
  252. package/src/registry/xiaomi-token-plan-ams.ts +12 -0
  253. package/src/registry/xiaomi-token-plan-cn.ts +12 -0
  254. package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
  255. package/src/registry/xiaomi.ts +12 -0
  256. package/src/{utils/oauth → registry}/zai.ts +10 -22
  257. package/src/{utils/oauth → registry}/zenmux.ts +8 -1
  258. package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
  259. package/src/stream.ts +229 -199
  260. package/src/types.ts +63 -384
  261. package/src/usage/claude.ts +4 -2
  262. package/src/usage/github-copilot.ts +4 -2
  263. package/src/usage/google-antigravity.ts +196 -28
  264. package/src/usage/kimi.ts +1 -1
  265. package/src/usage/minimax-code.ts +5 -6
  266. package/src/usage/openai-codex-reset.ts +174 -0
  267. package/src/usage/openai-codex.ts +19 -2
  268. package/src/usage/zai.ts +2 -1
  269. package/src/usage.ts +93 -4
  270. package/src/utils/abort.ts +14 -0
  271. package/src/utils/event-stream.ts +17 -0
  272. package/src/utils/http-inspector.ts +4 -12
  273. package/src/utils/idle-iterator.ts +250 -79
  274. package/src/utils/openai-http.ts +157 -0
  275. package/src/utils/request-debug.ts +67 -19
  276. package/src/utils/retry-after.ts +1 -1
  277. package/src/utils/retry.ts +23 -2
  278. package/src/utils/schema/CONSTRAINTS.md +4 -2
  279. package/src/utils/schema/fields.ts +16 -0
  280. package/src/utils/schema/json-schema-validator.ts +19 -1
  281. package/src/utils/schema/normalize.ts +80 -8
  282. package/src/utils/schema/stamps.ts +22 -10
  283. package/src/utils/schema/wire.ts +2 -2
  284. package/src/utils/sse-debug.ts +0 -271
  285. package/src/utils/stream-markup-healing.ts +50 -8
  286. package/src/utils/validation.ts +49 -13
  287. package/src/utils.ts +2 -26
  288. package/dist/types/model-cache.d.ts +0 -17
  289. package/dist/types/model-manager.d.ts +0 -64
  290. package/dist/types/model-thinking.d.ts +0 -100
  291. package/dist/types/models.d.ts +0 -12
  292. package/dist/types/provider-models/bundled-references.d.ts +0 -4
  293. package/dist/types/provider-models/descriptors.d.ts +0 -50
  294. package/dist/types/provider-models/google.d.ts +0 -24
  295. package/dist/types/provider-models/index.d.ts +0 -5
  296. package/dist/types/provider-models/ollama.d.ts +0 -7
  297. package/dist/types/provider-models/openai-compat.d.ts +0 -323
  298. package/dist/types/provider-models/special.d.ts +0 -16
  299. package/dist/types/utils/discovery/antigravity.d.ts +0 -61
  300. package/dist/types/utils/discovery/codex.d.ts +0 -38
  301. package/dist/types/utils/discovery/cursor.d.ts +0 -23
  302. package/dist/types/utils/discovery/gemini.d.ts +0 -25
  303. package/dist/types/utils/discovery/index.d.ts +0 -4
  304. package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
  305. package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
  306. package/dist/types/utils/oauth/cerebras.d.ts +0 -1
  307. package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
  308. package/dist/types/utils/oauth/deepseek.d.ts +0 -10
  309. package/dist/types/utils/oauth/firepass.d.ts +0 -1
  310. package/dist/types/utils/oauth/fireworks.d.ts +0 -1
  311. package/dist/types/utils/oauth/huggingface.d.ts +0 -19
  312. package/dist/types/utils/oauth/kagi.d.ts +0 -17
  313. package/dist/types/utils/oauth/kilo.d.ts +0 -5
  314. package/dist/types/utils/oauth/litellm.d.ts +0 -18
  315. package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
  316. package/dist/types/utils/oauth/moonshot.d.ts +0 -1
  317. package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
  318. package/dist/types/utils/oauth/nvidia.d.ts +0 -18
  319. package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
  320. package/dist/types/utils/oauth/ollama.d.ts +0 -18
  321. package/dist/types/utils/oauth/openrouter.d.ts +0 -1
  322. package/dist/types/utils/oauth/parallel.d.ts +0 -17
  323. package/dist/types/utils/oauth/qianfan.d.ts +0 -17
  324. package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
  325. package/dist/types/utils/oauth/synthetic.d.ts +0 -1
  326. package/dist/types/utils/oauth/tavily.d.ts +0 -17
  327. package/dist/types/utils/oauth/together.d.ts +0 -1
  328. package/dist/types/utils/oauth/venice.d.ts +0 -18
  329. package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
  330. package/dist/types/utils/oauth/vllm.d.ts +0 -16
  331. package/dist/types/utils/oauth/zai.d.ts +0 -18
  332. package/dist/types/utils/oauth/zenmux.d.ts +0 -1
  333. package/dist/types/utils/oauth/zhipu.d.ts +0 -18
  334. package/src/model-cache.ts +0 -129
  335. package/src/model-manager.ts +0 -469
  336. package/src/model-thinking.ts +0 -756
  337. package/src/models.json +0 -60287
  338. package/src/models.json.d.ts +0 -9
  339. package/src/models.ts +0 -56
  340. package/src/provider-models/bundled-references.ts +0 -38
  341. package/src/provider-models/descriptors.ts +0 -364
  342. package/src/provider-models/google.ts +0 -88
  343. package/src/provider-models/index.ts +0 -5
  344. package/src/provider-models/ollama.ts +0 -153
  345. package/src/provider-models/openai-compat.ts +0 -2904
  346. package/src/provider-models/special.ts +0 -67
  347. package/src/utils/discovery/antigravity.ts +0 -261
  348. package/src/utils/discovery/codex.ts +0 -371
  349. package/src/utils/discovery/cursor.ts +0 -306
  350. package/src/utils/discovery/gemini.ts +0 -248
  351. package/src/utils/discovery/index.ts +0 -4
  352. package/src/utils/discovery/openai-compatible.ts +0 -224
  353. package/src/utils/oauth/gitlab-duo.ts +0 -123
  354. package/src/utils/oauth/index.ts +0 -502
  355. /package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
  356. /package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
  357. /package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
  358. /package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
  359. /package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
  360. /package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
  361. /package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
  362. /package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
  363. /package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
  364. /package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
  365. /package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
  366. /package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
  367. /package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
  368. /package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
  369. /package/src/{utils → registry}/oauth/callback-server.ts +0 -0
  370. /package/src/{utils → registry}/oauth/cursor.ts +0 -0
  371. /package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
  372. /package/src/{utils → registry}/oauth/kimi.ts +0 -0
  373. /package/src/{utils → registry}/oauth/oauth.html +0 -0
  374. /package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
  375. /package/src/{utils → registry}/oauth/opencode.ts +0 -0
  376. /package/src/{utils → registry}/oauth/perplexity.ts +0 -0
  377. /package/src/{utils → registry}/oauth/pkce.ts +0 -0
@@ -1,20 +1,14 @@
1
- import { $env, extractHttpStatusFromError, structuredCloneJSON } from "@prometheus-ai/utils";
2
- import OpenAI, { APIConnectionTimeoutError as OpenAIConnectionTimeoutError } from "openai";
3
- import type {
4
- Tool as OpenAITool,
5
- ResponseCreateParamsStreaming,
6
- ResponseInput,
7
- } from "openai/resources/responses/responses";
1
+ import { hostMatchesUrl } from "@prometheus-ai/catalog/hosts";
2
+ import { parseGitHubCopilotApiKey } from "@prometheus-ai/catalog/wire/github-copilot";
3
+ import { $env, $flag, extractHttpStatusFromError, logger, structuredCloneJSON } from "@prometheus-ai/utils";
8
4
  import { getEnvApiKey } from "../stream";
9
5
  import type {
10
6
  AssistantMessage,
11
- CacheRetention,
12
7
  Context,
13
- FetchImpl,
14
8
  MessageAttribution,
15
9
  Model,
16
- OpenAICompat,
17
10
  ProviderSessionState,
11
+ RawSseEvent,
18
12
  ServiceTier,
19
13
  StreamFunction,
20
14
  StreamOptions,
@@ -37,12 +31,10 @@ import {
37
31
  getOpenAIStreamIdleTimeoutMs,
38
32
  iterateWithIdleTimeout,
39
33
  } from "../utils/idle-iterator";
40
- import { parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
34
+ import { postOpenAIStream } from "../utils/openai-http";
41
35
  import { notifyProviderResponse } from "../utils/provider-response";
42
36
  import { callWithCopilotModelRetry } from "../utils/retry";
43
37
  import { adaptSchemaForStrict, NO_STRICT, sanitizeSchemaForOpenAIResponses, toolWireSchema } from "../utils/schema";
44
- import { createSdkStreamRequestOptions } from "../utils/sdk-stream-timeout";
45
- import { wrapFetchForSseDebug } from "../utils/sse-debug";
46
38
  import { mapToOpenAIResponsesToolChoice, type OpenAIResponsesToolChoice } from "../utils/tool-choice";
47
39
  import {
48
40
  buildCopilotDynamicHeaders,
@@ -54,6 +46,7 @@ import {
54
46
  appendResponsesToolResultMessages,
55
47
  applyCommonResponsesSamplingParams,
56
48
  applyResponsesReasoningParams,
49
+ buildResponsesDeltaInput,
57
50
  collectCustomCallIds,
58
51
  collectKnownCallIds,
59
52
  convertResponsesAssistantMessage,
@@ -62,24 +55,17 @@ import {
62
55
  isOpenAIResponsesProgressEvent,
63
56
  normalizeResponsesToolCallIdForTransform,
64
57
  processResponsesStream,
58
+ repairOrphanResponsesToolCalls,
65
59
  repairOrphanResponsesToolOutputs,
66
60
  } from "./openai-responses-shared";
61
+ import type {
62
+ Tool as OpenAITool,
63
+ ResponseCreateParamsStreaming,
64
+ ResponseInput,
65
+ ResponseStreamEvent,
66
+ } from "./openai-responses-wire";
67
67
  import { transformMessages } from "./transform-messages";
68
68
 
69
- /**
70
- * Get prompt cache retention based on cacheRetention and base URL.
71
- * Only applies to direct OpenAI API calls (api.openai.com).
72
- */
73
- function getPromptCacheRetention(baseUrl: string, cacheRetention: CacheRetention): "24h" | undefined {
74
- if (cacheRetention !== "long") {
75
- return undefined;
76
- }
77
- if (baseUrl.includes("api.openai.com")) {
78
- return "24h";
79
- }
80
- return undefined;
81
- }
82
-
83
69
  export function normalizeOpenAIResponsesPromptCacheKey(sessionId: string | undefined): string | undefined {
84
70
  if (!sessionId || sessionId.length === 0) return undefined;
85
71
  const wellFormed = sessionId.toWellFormed();
@@ -93,6 +79,16 @@ export interface OpenAIResponsesOptions extends StreamOptions {
93
79
  reasoningSummary?: "auto" | "detailed" | "concise" | null;
94
80
  serviceTier?: ServiceTier;
95
81
  toolChoice?: ToolChoice;
82
+ /**
83
+ * Stateful turns: chain via `previous_response_id` + delta input instead of
84
+ * replaying the full transcript. Forces `store: true` (the platform only
85
+ * resolves stored responses). Defaults ON against the official OpenAI API
86
+ * and OFF for other Responses endpoints; `PROMETHEUS_OPENAI_STATEFUL` overrides the
87
+ * default, and `false` here vetoes everything. Requires `sessionId` +
88
+ * `providerSessionState`. Falls back to a full replay whenever history
89
+ * mutates or the server reports a stale id.
90
+ */
91
+ statefulResponses?: boolean;
96
92
  /**
97
93
  * Enforce strict tool call/result pairing when building Responses API inputs.
98
94
  * Azure OpenAI and GitHub Copilot Responses paths require tool results to match prior tool calls.
@@ -120,8 +116,8 @@ export interface OpenAIResponsesOptions extends StreamOptions {
120
116
  */
121
117
  omitReasoningEffort?: boolean;
122
118
  /**
123
- * Extra request headers merged onto the underlying client's
124
- * defaultHeaders. Used by adapter wrappers to inject provider-specific
119
+ * Extra request headers merged onto the model/copilot defaults. Used by
120
+ * adapter wrappers to inject provider-specific
125
121
  * routing or cache hints.
126
122
  */
127
123
  headers?: Record<string, string>;
@@ -136,16 +132,38 @@ export interface OpenAIResponsesOptions extends StreamOptions {
136
132
  const OPENAI_RESPONSES_PROVIDER_SESSION_STATE_PREFIX = "openai-responses:";
137
133
  const OPENAI_RESPONSES_FIRST_EVENT_TIMEOUT_MESSAGE =
138
134
  "OpenAI responses stream timed out while waiting for the first event";
135
+ /** Consecutive stale-previous-response failures before chaining is disabled for the session. */
136
+ const OPENAI_RESPONSES_CHAIN_STALE_FAILURE_LIMIT = 3;
139
137
 
140
138
  interface OpenAIResponsesProviderSessionState extends ProviderSessionState {
141
139
  nativeHistoryReplayWarmed: boolean;
140
+ /** Stateful `previous_response_id` chain baselines, keyed by baseUrl/model/session. */
141
+ chains: Map<string, OpenAIResponsesChainState>;
142
+ }
143
+
144
+ interface OpenAIResponsesChainState {
145
+ /**
146
+ * Wire params of the last successful turn, with per-turn trailing
147
+ * scaffolding stripped from `input` (never carries previous_response_id).
148
+ */
149
+ lastParams?: OpenAIResponsesSamplingParams;
150
+ lastResponseId?: string;
151
+ /** Output items of the last response, in replay-sanitized form (matches next-turn input). */
152
+ lastResponseItems?: ResponseInput;
153
+ canAppend: boolean;
154
+ /** Consecutive stale-previous-response failures; reset on a successful chained completion. */
155
+ staleFailures: number;
156
+ /** Set once chaining is judged unsupported for this session (circuit breaker). */
157
+ disabled: boolean;
142
158
  }
143
159
 
144
160
  function createOpenAIResponsesProviderSessionState(): OpenAIResponsesProviderSessionState {
145
161
  const state: OpenAIResponsesProviderSessionState = {
146
162
  nativeHistoryReplayWarmed: false,
163
+ chains: new Map(),
147
164
  close: () => {
148
165
  state.nativeHistoryReplayWarmed = false;
166
+ state.chains.clear();
149
167
  },
150
168
  };
151
169
  return state;
@@ -174,6 +192,142 @@ function canReplayOpenAIResponsesNativeHistory(
174
192
  return providerSessionState?.nativeHistoryReplayWarmed ?? true;
175
193
  }
176
194
 
195
+ function isOpenAIResponsesStatefulEnabled(
196
+ options: OpenAIResponsesOptions | undefined,
197
+ baseUrl: string | undefined,
198
+ ): boolean {
199
+ if (options?.statefulResponses === false) return false;
200
+ if (options?.statefulResponses === true) return true;
201
+ // Default ON only against the official OpenAI API: chaining forces
202
+ // `store: true`, and third-party /v1/responses proxies routinely ignore or
203
+ // reject `previous_response_id`. An unset baseUrl means the default
204
+ // endpoint (api.openai.com).
205
+ return $flag("PROMETHEUS_OPENAI_STATEFUL", !baseUrl || hostMatchesUrl(baseUrl, "openai"));
206
+ }
207
+
208
+ function getOpenAIResponsesChainState(
209
+ providerSessionState: OpenAIResponsesProviderSessionState,
210
+ model: Model<"openai-responses">,
211
+ sessionId: string,
212
+ ): OpenAIResponsesChainState {
213
+ const key = `${model.baseUrl ?? ""}\u0000${model.id}\u0000${sessionId}`;
214
+ const existing = providerSessionState.chains.get(key);
215
+ if (existing) return existing;
216
+ const created: OpenAIResponsesChainState = { canAppend: false, staleFailures: 0, disabled: false };
217
+ providerSessionState.chains.set(key, created);
218
+ return created;
219
+ }
220
+
221
+ function resetOpenAIResponsesChainState(state: OpenAIResponsesChainState): void {
222
+ state.canAppend = false;
223
+ state.lastParams = undefined;
224
+ state.lastResponseId = undefined;
225
+ state.lastResponseItems = undefined;
226
+ }
227
+
228
+ interface OpenAIResponsesChainedParams {
229
+ params: OpenAIResponsesSamplingParams;
230
+ /** Set iff the params carry previous_response_id (delta request). */
231
+ previousResponseId?: string;
232
+ }
233
+
234
+ /**
235
+ * Drop the per-turn trailing scaffolding (the GPT-5 "Juice: 0" developer item)
236
+ * from `input`, yielding the wire form of the conversation arguments alone.
237
+ */
238
+ function stripTrailingScaffolding(
239
+ params: OpenAIResponsesSamplingParams,
240
+ trailingScaffoldingItems: number,
241
+ ): OpenAIResponsesSamplingParams {
242
+ if (trailingScaffoldingItems <= 0 || !Array.isArray(params.input)) return params;
243
+ return { ...params, input: params.input.slice(0, params.input.length - trailingScaffoldingItems) };
244
+ }
245
+
246
+ /**
247
+ * Shape the next turn's request: when the session's append baseline is intact
248
+ * (same options, strict history prefix), chain via `previous_response_id` +
249
+ * delta-only `input`; otherwise break the chain and replay the full transcript.
250
+ *
251
+ * The prefix check runs on the wire form of the conversation arguments alone:
252
+ * per-turn trailing scaffolding is excluded from both sides and re-appended to
253
+ * the delta, so a decoration that trails every request can never masquerade as
254
+ * a history mutation.
255
+ */
256
+ function buildOpenAIResponsesChainedParams(
257
+ params: OpenAIResponsesSamplingParams,
258
+ trailingScaffoldingItems: number,
259
+ chain: OpenAIResponsesChainState,
260
+ ): OpenAIResponsesChainedParams {
261
+ const historyParams = stripTrailingScaffolding(params, trailingScaffoldingItems);
262
+ const deltaInput = chain.canAppend
263
+ ? buildResponsesDeltaInput<ResponseInput[number]>(chain.lastParams, chain.lastResponseItems, historyParams)
264
+ : null;
265
+ if (deltaInput && deltaInput.length > 0 && chain.lastResponseId) {
266
+ const scaffolding =
267
+ historyParams !== params && Array.isArray(params.input)
268
+ ? params.input.slice(params.input.length - trailingScaffoldingItems)
269
+ : [];
270
+ return {
271
+ params: { ...params, previous_response_id: chain.lastResponseId, input: [...deltaInput, ...scaffolding] },
272
+ previousResponseId: chain.lastResponseId,
273
+ };
274
+ }
275
+ if (chain.canAppend) {
276
+ // History mutated or options changed — break the chain and replay in full.
277
+ resetOpenAIResponsesChainState(chain);
278
+ }
279
+ return { params };
280
+ }
281
+
282
+ function isOpenAIResponsesStalePreviousResponseError(error: unknown): boolean {
283
+ if (!(error instanceof Error)) return false;
284
+ if ((error as { code?: string }).code === "previous_response_not_found") return true;
285
+ // "unsupported" covers endpoints that reject the parameter outright
286
+ // (e.g. "Unsupported parameter: previous_response_id").
287
+ return (
288
+ /previous[ _]?response/i.test(error.message) &&
289
+ /not[ _]?found|invalid|expired|stale|unsupported/i.test(error.message)
290
+ );
291
+ }
292
+
293
+ /**
294
+ * Zero Data Retention orgs accept `store: true` but refuse to resolve any
295
+ * `previous_response_id` — the prior response was never persisted server-side.
296
+ * The 400 carries a fixed phrasing ("Zero Data Retention") that the generic
297
+ * stale-id regex above does not match, so it is classified separately and
298
+ * disables chaining categorically (one strike, not three).
299
+ */
300
+ function isOpenAIResponsesZeroDataRetentionError(error: unknown): boolean {
301
+ if (!(error instanceof Error)) return false;
302
+ return /previous[ _]?response/i.test(error.message) && /zero[ _-]?data[ _-]?retention/i.test(error.message);
303
+ }
304
+
305
+ function registerOpenAIResponsesChainStaleFailure(chain: OpenAIResponsesChainState, error: unknown): void {
306
+ resetOpenAIResponsesChainState(chain);
307
+ chain.staleFailures += 1;
308
+ if (chain.staleFailures >= OPENAI_RESPONSES_CHAIN_STALE_FAILURE_LIMIT) {
309
+ chain.disabled = true;
310
+ }
311
+ logger.debug("OpenAI responses previous_response_id rejected; falling back to full context", {
312
+ error: error instanceof Error ? error.message : String(error),
313
+ consecutiveFailures: chain.staleFailures,
314
+ disabled: chain.disabled,
315
+ });
316
+ }
317
+
318
+ /**
319
+ * One-shot ZDR signal: the org will never resolve a stored response, so skip
320
+ * the staleFailures counter and disable chaining immediately for this session.
321
+ */
322
+ function markOpenAIResponsesChainZeroDataRetention(chain: OpenAIResponsesChainState, error: unknown): void {
323
+ resetOpenAIResponsesChainState(chain);
324
+ chain.disabled = true;
325
+ chain.staleFailures = OPENAI_RESPONSES_CHAIN_STALE_FAILURE_LIMIT;
326
+ logger.debug("OpenAI responses chaining disabled (Zero Data Retention)", {
327
+ error: error instanceof Error ? error.message : String(error),
328
+ });
329
+ }
330
+
177
331
  type OpenAIResponsesSamplingParams = ResponseCreateParamsStreaming & {
178
332
  top_p?: number;
179
333
  top_k?: number;
@@ -204,9 +358,32 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
204
358
  model.id,
205
359
  );
206
360
  let rawRequestDump: RawHttpRequestDump | undefined;
361
+ let chainState: OpenAIResponsesChainState | undefined;
362
+ let sentPreviousResponseId: string | undefined;
207
363
  const abortTracker = createAbortSourceTracker(options?.signal);
208
364
  const firstEventTimeoutAbortError = new Error(OPENAI_RESPONSES_FIRST_EVENT_TIMEOUT_MESSAGE);
209
365
  const { requestAbortController, requestSignal } = abortTracker;
366
+ const onSseEvent = options?.onSseEvent;
367
+ const rawSseObserver = onSseEvent
368
+ ? (event: RawSseEvent) => {
369
+ if (!event.event && event.data && event.data !== "[DONE]") {
370
+ try {
371
+ const parsed = JSON.parse(event.data);
372
+ const resolvedEvent =
373
+ typeof parsed.type === "string"
374
+ ? parsed.type
375
+ : typeof parsed.object === "string"
376
+ ? parsed.object
377
+ : null;
378
+ if (resolvedEvent) {
379
+ event.event = resolvedEvent;
380
+ event.raw = [`event: ${resolvedEvent}`, ...event.raw];
381
+ }
382
+ } catch {}
383
+ }
384
+ onSseEvent(event, model);
385
+ }
386
+ : undefined;
210
387
 
211
388
  try {
212
389
  // Keep request routing on `sessionId` while allowing callers to pin a
@@ -214,88 +391,138 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
214
391
  // avoid perturbing provider conversation state without cold-starting the cache.
215
392
  const routingSessionId = getOpenAIResponsesRoutingSessionId(options);
216
393
  const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
217
- const { client, copilotPremiumRequests, baseUrl } = createClient(
218
- model,
219
- context,
220
- apiKey,
221
- options?.headers,
222
- options?.initiatorOverride,
223
- routingSessionId,
224
- options?.onSseEvent,
225
- options?.fetch,
226
- );
394
+ const {
395
+ headers: requestHeaders,
396
+ copilotPremiumRequests,
397
+ baseUrl,
398
+ } = createRequestSetup(model, context, apiKey, options?.headers, options?.initiatorOverride, routingSessionId);
227
399
  const premiumRequestsTotal = copilotPremiumRequests;
228
400
  const providerSessionState = getOpenAIResponsesProviderSessionState(model, options?.providerSessionState);
229
- const { params } = buildParams(model, context, options, providerSessionState, baseUrl);
401
+ const { params, trailingScaffoldingItems } = buildParams(model, context, options, providerSessionState);
402
+ if (isOpenAIResponsesStatefulEnabled(options, baseUrl) && routingSessionId && providerSessionState) {
403
+ chainState = getOpenAIResponsesChainState(providerSessionState, model, routingSessionId);
404
+ if (!chainState.disabled) {
405
+ // Platform `previous_response_id` chaining only resolves stored responses.
406
+ params.store = true;
407
+ }
408
+ }
409
+ const chained: OpenAIResponsesChainedParams =
410
+ chainState && !chainState.disabled
411
+ ? buildOpenAIResponsesChainedParams(params, trailingScaffoldingItems, chainState)
412
+ : { params };
413
+ sentPreviousResponseId = chained.previousResponseId;
230
414
  const idleTimeoutMs = options?.streamIdleTimeoutMs ?? getOpenAIStreamIdleTimeoutMs();
231
415
  const firstEventTimeoutMs =
232
416
  options?.streamFirstEventTimeoutMs ?? getOpenAIStreamFirstEventTimeoutMs(idleTimeoutMs);
233
417
  const requestTimeoutMs =
234
418
  firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0 ? firstEventTimeoutMs : undefined;
235
419
  options?.onPayload?.(params);
420
+ const requestUrl = `${(baseUrl ?? "https://api.openai.com/v1").replace(/\/+$/, "")}/responses`;
236
421
  rawRequestDump = {
237
422
  provider: model.provider,
238
423
  api: output.api,
239
424
  model: model.id,
240
425
  method: "POST",
241
- url: `${baseUrl ?? "https://api.openai.com/v1"}/responses`,
242
- body: params,
426
+ url: requestUrl,
427
+ body: chained.params,
243
428
  };
244
- const openaiStream = await callWithCopilotModelRetry(
245
- async () => {
246
- const requestOptions = createSdkStreamRequestOptions(requestSignal, requestTimeoutMs);
247
- let requestTimeout: NodeJS.Timeout | undefined;
248
- if (requestTimeoutMs !== undefined) {
249
- requestTimeout = setTimeout(
250
- () => abortTracker.abortLocally(firstEventTimeoutAbortError),
251
- requestTimeoutMs,
252
- );
253
- }
254
- try {
255
- const { data, response, request_id } = await client.responses
256
- .create(params, requestOptions)
257
- .withResponse();
258
- await notifyProviderResponse(options, response, model, request_id);
259
- return data;
260
- } catch (error) {
261
- if (error instanceof OpenAIConnectionTimeoutError && !abortTracker.wasCallerAbort()) {
262
- throw firstEventTimeoutAbortError;
429
+ const openResponsesStream = (requestParams: OpenAIResponsesSamplingParams) =>
430
+ callWithCopilotModelRetry(
431
+ async () => {
432
+ let requestTimeout: NodeJS.Timeout | undefined;
433
+ if (requestTimeoutMs !== undefined) {
434
+ requestTimeout = setTimeout(
435
+ () => abortTracker.abortLocally(firstEventTimeoutAbortError),
436
+ requestTimeoutMs,
437
+ );
263
438
  }
264
- throw error;
265
- } finally {
266
- if (requestTimeout !== undefined) clearTimeout(requestTimeout);
267
- }
268
- },
269
- { provider: model.provider, signal: requestSignal },
270
- );
439
+ try {
440
+ const headers = { ...requestHeaders };
441
+ if (requestTimeoutMs !== undefined) {
442
+ headers["X-Stainless-Timeout"] = Math.floor(requestTimeoutMs / 1000).toString();
443
+ }
444
+ const { events, response, requestId } = await postOpenAIStream<ResponseStreamEvent>({
445
+ url: requestUrl,
446
+ headers,
447
+ body: requestParams,
448
+ signal: requestSignal,
449
+ fetch: options?.fetch,
450
+ // With a first-event watchdog armed, transport retries must
451
+ // not silently extend the caller's deadline.
452
+ maxAttempts: requestTimeoutMs !== undefined ? 1 : undefined,
453
+ onSseEvent: rawSseObserver,
454
+ });
455
+ // Disarm the first-event watchdog as soon as headers arrive — a slow
456
+ // onResponse callback must not abort an already-connected stream.
457
+ if (requestTimeout !== undefined) {
458
+ clearTimeout(requestTimeout);
459
+ requestTimeout = undefined;
460
+ }
461
+ await notifyProviderResponse(options, response, model, requestId);
462
+ return events;
463
+ } finally {
464
+ if (requestTimeout !== undefined) clearTimeout(requestTimeout);
465
+ }
466
+ },
467
+ { provider: model.provider, signal: requestSignal },
468
+ );
469
+ let openaiStream: AsyncIterable<ResponseStreamEvent>;
470
+ try {
471
+ openaiStream = await openResponsesStream(chained.params);
472
+ } catch (error) {
473
+ if (!chainState || !sentPreviousResponseId || requestSignal.aborted) {
474
+ throw error;
475
+ }
476
+ const zdrRejection = isOpenAIResponsesZeroDataRetentionError(error);
477
+ if (!zdrRejection && !isOpenAIResponsesStalePreviousResponseError(error)) {
478
+ throw error;
479
+ }
480
+ // Server rejected the chain baseline: reset, count the failure (or
481
+ // disable categorically on ZDR), and retry once with the full
482
+ // transcript. Structurally cannot loop — the retry carries no
483
+ // previous_response_id.
484
+ if (zdrRejection) {
485
+ markOpenAIResponsesChainZeroDataRetention(chainState, error);
486
+ // ZDR orgs cannot store responses; the original request forced
487
+ // `store: true` for chaining, which is meaningless here and would
488
+ // otherwise leave subsequent turns asking the server to retain
489
+ // data it must discard.
490
+ params.store = false;
491
+ } else {
492
+ registerOpenAIResponsesChainStaleFailure(chainState, error);
493
+ }
494
+ sentPreviousResponseId = undefined;
495
+ rawRequestDump.body = params;
496
+ openaiStream = await openResponsesStream(params);
497
+ }
271
498
  if (premiumRequestsTotal !== undefined) output.usage.premiumRequests = premiumRequestsTotal;
272
499
  stream.push({ type: "start", partial: output });
273
500
 
274
501
  const nativeOutputItems: Array<Record<string, unknown>> = [];
275
- await processResponsesStream(
276
- iterateWithIdleTimeout(openaiStream, {
277
- idleTimeoutMs,
278
- firstItemTimeoutMs: firstEventTimeoutMs,
279
- firstItemErrorMessage: OPENAI_RESPONSES_FIRST_EVENT_TIMEOUT_MESSAGE,
280
- errorMessage: "OpenAI responses stream stalled while waiting for the next event",
281
- onFirstItemTimeout: () => abortTracker.abortLocally(firstEventTimeoutAbortError),
282
- onIdle: () => requestAbortController.abort(),
283
- abortSignal: options?.signal,
284
- isProgressItem: isOpenAIResponsesProgressEvent,
285
- }),
286
- output,
287
- stream,
288
- model,
289
- {
290
- onFirstToken: () => {
291
- if (!firstTokenTime) firstTokenTime = Date.now();
292
- },
293
- onOutputItemDone: item => {
294
- nativeOutputItems.push(structuredCloneJSON<unknown>(item) as unknown as Record<string, unknown>);
295
- },
502
+ let sawCompleted = false;
503
+ const timedOpenaiStream = iterateWithIdleTimeout(openaiStream, {
504
+ idleTimeoutMs,
505
+ firstItemTimeoutMs: firstEventTimeoutMs,
506
+ firstItemErrorMessage: OPENAI_RESPONSES_FIRST_EVENT_TIMEOUT_MESSAGE,
507
+ errorMessage: "OpenAI responses stream stalled while waiting for the next event",
508
+ onFirstItemTimeout: () => abortTracker.abortLocally(firstEventTimeoutAbortError),
509
+ onIdle: () => requestAbortController.abort(),
510
+ abortSignal: options?.signal,
511
+ isProgressItem: isOpenAIResponsesProgressEvent,
512
+ });
513
+ await processResponsesStream(timedOpenaiStream, output, stream, model, {
514
+ onFirstToken: () => {
515
+ if (!firstTokenTime) firstTokenTime = Date.now();
296
516
  },
297
- );
298
- if (premiumRequestsTotal !== undefined) output.usage.premiumRequests = premiumRequestsTotal;
517
+ onOutputItemDone: item => {
518
+ // `processResponsesStream` hands over a private clone already; no
519
+ // second deep copy needed (reasoning items carry multi-KB blobs).
520
+ nativeOutputItems.push(item as unknown as Record<string, unknown>);
521
+ },
522
+ onCompleted: () => {
523
+ sawCompleted = true;
524
+ },
525
+ });
299
526
 
300
527
  const firstEventTimeoutError = abortTracker.getLocalAbortReason();
301
528
  if (firstEventTimeoutError) {
@@ -305,12 +532,36 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
305
532
  throw new Error("Request was aborted");
306
533
  }
307
534
 
535
+ // Detect premature stream closure: the HTTP stream ended without the
536
+ // provider sending `response.completed`. Custom/proxy providers may
537
+ // drop the connection mid-stream; without this guard the incomplete
538
+ // output is silently surfaced as a successful "stop".
539
+ if (!sawCompleted) {
540
+ throw new Error("OpenAI responses stream closed before response.completed was received");
541
+ }
542
+
308
543
  if (output.stopReason === "aborted" || output.stopReason === "error") {
309
544
  throw new Error(output.errorMessage ?? "An unknown error occurred");
310
545
  }
311
546
 
312
547
  output.providerPayload = createOpenAIResponsesHistoryPayload(model.provider, nativeOutputItems);
313
548
  if (providerSessionState) providerSessionState.nativeHistoryReplayWarmed = true;
549
+ if (chainState) {
550
+ chainState.lastParams = structuredCloneJSON(stripTrailingScaffolding(params, trailingScaffoldingItems));
551
+ if (output.responseId) {
552
+ chainState.lastResponseId = output.responseId;
553
+ chainState.lastResponseItems = sanitizeOpenAIResponsesHistoryItemsForReplay(
554
+ structuredCloneJSON(nativeOutputItems),
555
+ );
556
+ chainState.canAppend = true;
557
+ // Only a successful CHAINED completion clears the stale counter — a
558
+ // full-context success must not mask categorical rejection.
559
+ if (sentPreviousResponseId) chainState.staleFailures = 0;
560
+ } else {
561
+ // Without a response id the append baseline cannot be trusted.
562
+ chainState.canAppend = false;
563
+ }
564
+ }
314
565
 
315
566
  output.duration = Date.now() - startTime;
316
567
  if (firstTokenTime) output.ttft = firstTokenTime - startTime;
@@ -318,6 +569,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
318
569
  stream.end();
319
570
  } catch (error) {
320
571
  for (const block of output.content) delete (block as { index?: number }).index;
572
+ if (chainState) resetOpenAIResponsesChainState(chainState);
321
573
  const firstEventTimeoutError = abortTracker.getLocalAbortReason();
322
574
  output.stopReason = abortTracker.wasCallerAbort() ? "aborted" : "error";
323
575
  output.errorStatus = extractHttpStatusFromError(error);
@@ -333,17 +585,15 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
333
585
  return stream;
334
586
  };
335
587
 
336
- function createClient(
588
+ function createRequestSetup(
337
589
  model: Model<"openai-responses">,
338
590
  context: Context,
339
591
  apiKey?: string,
340
592
  extraHeaders?: Record<string, string>,
341
593
  initiatorOverride?: MessageAttribution,
342
594
  sessionId?: string,
343
- onSseEvent?: OpenAIResponsesOptions["onSseEvent"],
344
- fetchOverride?: FetchImpl,
345
595
  ): {
346
- client: OpenAI;
596
+ headers: Record<string, string>;
347
597
  copilotPremiumRequests: number | undefined;
348
598
  baseUrl: string | undefined;
349
599
  } {
@@ -375,23 +625,12 @@ function createClient(
375
625
  copilotPremiumRequests = copilot.premiumRequests;
376
626
  baseUrl = resolveGitHubCopilotBaseUrl(model.baseUrl, rawApiKey) ?? model.baseUrl;
377
627
  }
378
- if (sessionId && model.provider === "openai" && (baseUrl ?? "").toLowerCase().includes("api.openai.com")) {
628
+ if (sessionId && model.provider === "openai") {
379
629
  headers.session_id ??= sessionId;
380
630
  headers["x-client-request-id"] ??= sessionId;
381
631
  }
382
- const baseFetch = fetchOverride ?? fetch;
383
- return {
384
- client: new OpenAI({
385
- apiKey,
386
- baseURL: baseUrl,
387
- dangerouslyAllowBrowser: true,
388
- maxRetries: 5,
389
- defaultHeaders: headers,
390
- fetch: onSseEvent ? wrapFetchForSseDebug(baseFetch, event => onSseEvent(event, model)) : baseFetch,
391
- }),
392
- copilotPremiumRequests,
393
- baseUrl,
394
- };
632
+ headers.Authorization ??= `Bearer ${apiKey}`;
633
+ return { headers, copilotPremiumRequests, baseUrl };
395
634
  }
396
635
 
397
636
  function getOpenAIResponsesPromptCacheKey(
@@ -418,24 +657,14 @@ function buildParams(
418
657
  context: Context,
419
658
  options: OpenAIResponsesOptions | undefined,
420
659
  providerSessionState: OpenAIResponsesProviderSessionState | undefined,
421
- resolvedBaseUrl?: string,
422
- ): { conversationMessages: ResponseInput; params: OpenAIResponsesSamplingParams } {
423
- const strictResponsesPairing =
424
- options?.strictResponsesPairing ??
425
- (isAzureOpenAIBaseUrl(model.baseUrl ?? "") || model.provider === "github-copilot");
426
- const conversationMessages = convertConversationMessages(
427
- model,
428
- context,
429
- strictResponsesPairing,
430
- providerSessionState,
431
- options,
432
- );
433
- const messages: ResponseInput = [...conversationMessages];
660
+ ): { params: OpenAIResponsesSamplingParams; trailingScaffoldingItems: number } {
661
+ const strictResponsesPairing = options?.strictResponsesPairing ?? model.compat.strictResponsesPairing;
662
+ const messages = convertConversationMessages(model, context, strictResponsesPairing, providerSessionState, options);
434
663
 
435
664
  const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
436
665
  let systemInstructions: string | undefined;
437
666
  if (systemPrompts.length > 0) {
438
- const needsDeveloperRole = model.reasoning && supportsDeveloperRole(resolvedBaseUrl ?? model);
667
+ const needsDeveloperRole = model.reasoning && model.compat.supportsDeveloperRole;
439
668
  if (needsDeveloperRole) {
440
669
  // Reasoning models on known OpenAI-compatible endpoints require the
441
670
  // `developer` role. Send all system prompts inline in `input`.
@@ -453,12 +682,16 @@ function buildParams(
453
682
  const cacheRetention = resolveCacheRetention(options?.cacheRetention);
454
683
  const promptCacheKey = getOpenAIResponsesPromptCacheKey(options);
455
684
  const params: OpenAIResponsesSamplingParams = {
456
- model: model.id,
685
+ model: model.requestModelId ?? model.id,
457
686
  input: messages,
458
687
  instructions: systemInstructions,
459
688
  stream: true,
460
689
  prompt_cache_key: promptCacheKey,
461
- prompt_cache_retention: promptCacheKey ? getPromptCacheRetention(model.baseUrl, cacheRetention) : undefined,
690
+ prompt_cache_retention: promptCacheKey
691
+ ? cacheRetention === "long" && model.compat.supportsLongPromptCacheRetention
692
+ ? "24h"
693
+ : undefined
694
+ : undefined,
462
695
  store: false,
463
696
  stream_options: model.provider === "openai" ? { include_obfuscation: false } : undefined,
464
697
  };
@@ -469,8 +702,8 @@ function buildParams(
469
702
  // TODO: openai responses has no top-level `frequency_penalty` field as of the current SDK;
470
703
  // `StreamOptions.frequencyPenalty` is intentionally dropped for this provider.
471
704
 
472
- if (context.tools && context.tools.length > 0) {
473
- params.tools = convertTools(context.tools, supportsStrictMode(model), model);
705
+ if (context.tools) {
706
+ params.tools = convertTools(context.tools, model.compat.supportsStrictMode, model);
474
707
  if (options?.toolChoice) {
475
708
  params.tool_choice = mapOpenAIResponsesToolChoiceForTools(options.toolChoice, context.tools, model);
476
709
  }
@@ -485,16 +718,15 @@ function buildParams(
485
718
  }
486
719
  }
487
720
 
488
- applyResponsesReasoningParams(
721
+ const trailingScaffoldingItems = applyResponsesReasoningParams(
489
722
  params,
490
723
  model,
491
724
  options,
492
725
  messages,
493
726
  effort =>
494
- mapReasoningEffort(
495
- effort as NonNullable<OpenAIResponsesOptions["reasoning"]>,
496
- model.compat?.reasoningEffortMap,
497
- ),
727
+ model.compat.reasoningEffortMap?.[effort as NonNullable<OpenAIResponsesOptions["reasoning"]>] ??
728
+ model.thinking?.effortMap?.[effort as NonNullable<OpenAIResponsesOptions["reasoning"]>] ??
729
+ effort,
498
730
  options?.includeEncryptedReasoning ?? true,
499
731
  options?.omitReasoningEffort ?? false,
500
732
  );
@@ -503,42 +735,7 @@ function buildParams(
503
735
  Object.assign(params, options.extraBody);
504
736
  }
505
737
 
506
- return { conversationMessages, params };
507
- }
508
-
509
- function mapReasoningEffort(
510
- effort: NonNullable<OpenAIResponsesOptions["reasoning"]>,
511
- reasoningEffortMap: OpenAICompat["reasoningEffortMap"] | undefined,
512
- ): string {
513
- return reasoningEffortMap?.[effort] ?? effort;
514
- }
515
-
516
- function isAzureOpenAIBaseUrl(baseUrl: string): boolean {
517
- return baseUrl.includes(".openai.azure.com") || baseUrl.includes("azure.com/openai");
518
- }
519
-
520
- function supportsStrictMode(model: Model<"openai-responses">): boolean {
521
- if (model.provider === "openai" || model.provider === "azure" || model.provider === "github-copilot") return true;
522
-
523
- const baseUrl = model.baseUrl.toLowerCase();
524
- return (
525
- baseUrl.includes("api.openai.com") ||
526
- baseUrl.includes(".openai.azure.com") ||
527
- baseUrl.includes("models.inference.ai.azure.com")
528
- );
529
- }
530
-
531
- export function supportsDeveloperRole(modelOrBaseUrl: Pick<Model, "provider" | "baseUrl"> | string): boolean {
532
- const baseUrl =
533
- typeof modelOrBaseUrl === "string" ? modelOrBaseUrl.toLowerCase() : (modelOrBaseUrl.baseUrl ?? "").toLowerCase();
534
- return (
535
- baseUrl.includes("api.openai.com") ||
536
- baseUrl.includes(".openai.azure.com") ||
537
- baseUrl.includes("azure.com/openai") ||
538
- baseUrl.includes("models.inference.ai.azure.com") ||
539
- baseUrl.includes("githubcopilot.com") ||
540
- baseUrl.includes("copilot-api.")
541
- );
738
+ return { params, trailingScaffoldingItems };
542
739
  }
543
740
 
544
741
  function convertConversationMessages(
@@ -581,9 +778,13 @@ function convertConversationMessages(
581
778
  messages.push({ role: "user", content });
582
779
  } else if (msg.role === "assistant") {
583
780
  const assistantMsg = msg as AssistantMessage;
584
- const providerPayload = shouldReplayNativeHistory
585
- ? getOpenAIResponsesHistoryPayload(assistantMsg.providerPayload, model.provider, assistantMsg.provider)
586
- : undefined;
781
+ // Native items are model-bound (reasoning carries encrypted content minted
782
+ // by the producing model); after a mid-session model switch fall back to
783
+ // block re-encode, which strips foreign signatures.
784
+ const providerPayload =
785
+ shouldReplayNativeHistory && assistantMsg.api === model.api && assistantMsg.model === model.id
786
+ ? getOpenAIResponsesHistoryPayload(assistantMsg.providerPayload, model.provider, assistantMsg.provider)
787
+ : undefined;
587
788
  const historyItems = providerPayload?.items;
588
789
  if (historyItems) {
589
790
  const sanitizedHistoryItems = sanitizeOpenAIResponsesHistoryItemsForReplay(filterReasoning(historyItems));
@@ -614,7 +815,7 @@ function convertConversationMessages(
614
815
  msgIndex++;
615
816
  }
616
817
 
617
- return repairOrphanResponsesToolOutputs(messages);
818
+ return repairOrphanResponsesToolCalls(repairOrphanResponsesToolOutputs(messages));
618
819
  }
619
820
 
620
821
  /**
@@ -635,13 +836,18 @@ export function mapOpenAIResponsesToolChoiceForTools(
635
836
  model: Model<"openai-responses">,
636
837
  ): OpenAIResponsesToolChoice {
637
838
  const mapped = mapToOpenAIResponsesToolChoice(choice);
638
- if (!mapped || typeof mapped === "string" || mapped.type !== "function" || !supportsFreeformApplyPatch(model)) {
839
+ if (!mapped || typeof mapped === "string" || mapped.type !== "function") {
639
840
  return mapped;
640
841
  }
641
842
 
642
- const customTool = tools.find(
643
- tool => tool.customFormat && (tool.name === mapped.name || tool.customWireName === mapped.name),
644
- );
843
+ const directTool = tools.find(tool => tool.name === mapped.name);
844
+ const customTool = supportsFreeformApplyPatch(model)
845
+ ? tools.find(tool => tool.customFormat && (tool.name === mapped.name || tool.customWireName === mapped.name))
846
+ : undefined;
847
+ const offeredTool = customTool ?? directTool;
848
+ if (!offeredTool) {
849
+ return undefined;
850
+ }
645
851
  return customTool ? { type: "custom", name: customTool.customWireName ?? customTool.name } : mapped;
646
852
  }
647
853