@prometheus-ai/ai 0.5.4 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/dist/types/auth-broker/remote-store.d.ts +2 -1
  2. package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
  3. package/dist/types/auth-gateway/server.d.ts +19 -0
  4. package/dist/types/auth-gateway/types.d.ts +9 -3
  5. package/dist/types/auth-retry.d.ts +119 -0
  6. package/dist/types/auth-storage.d.ts +217 -8
  7. package/dist/types/errors.d.ts +24 -0
  8. package/dist/types/index.d.ts +5 -9
  9. package/dist/types/provider-details.d.ts +1 -1
  10. package/dist/types/providers/amazon-bedrock.d.ts +12 -6
  11. package/dist/types/providers/anthropic-client.d.ts +10 -3
  12. package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
  13. package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
  14. package/dist/types/providers/anthropic-wire.d.ts +3 -3
  15. package/dist/types/providers/anthropic.d.ts +41 -34
  16. package/dist/types/providers/aws-credentials.d.ts +8 -0
  17. package/dist/types/providers/azure-openai-responses.d.ts +1 -0
  18. package/dist/types/providers/google-gemini-cli.d.ts +22 -1
  19. package/dist/types/providers/google-shared.d.ts +22 -0
  20. package/dist/types/providers/google-types.d.ts +13 -1
  21. package/dist/types/providers/mock.d.ts +8 -3
  22. package/dist/types/providers/ollama.d.ts +6 -0
  23. package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
  24. package/dist/types/providers/openai-chat-server.d.ts +3 -3
  25. package/dist/types/providers/openai-chat-wire.d.ts +644 -0
  26. package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
  27. package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
  28. package/dist/types/providers/openai-codex-responses.d.ts +31 -2
  29. package/dist/types/providers/openai-completions-compat.d.ts +2 -25
  30. package/dist/types/providers/openai-completions.d.ts +2 -10
  31. package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
  32. package/dist/types/providers/openai-responses-server.d.ts +2 -2
  33. package/dist/types/providers/openai-responses-shared.d.ts +49 -9
  34. package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
  35. package/dist/types/providers/openai-responses.d.ts +13 -4
  36. package/dist/types/providers/prometheus-native-client.d.ts +9 -0
  37. package/dist/types/providers/prometheus-native-server.d.ts +4 -3
  38. package/dist/types/providers/transform-messages.d.ts +1 -2
  39. package/dist/types/rate-limit-utils.d.ts +3 -2
  40. package/dist/types/registry/aimlapi.d.ts +4 -0
  41. package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
  42. package/dist/types/registry/amazon-bedrock.d.ts +5 -0
  43. package/dist/types/registry/anthropic.d.ts +10 -0
  44. package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
  45. package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
  46. package/dist/types/registry/cerebras.d.ts +7 -0
  47. package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
  48. package/dist/types/registry/cursor.d.ts +7 -0
  49. package/dist/types/registry/deepseek.d.ts +8 -0
  50. package/dist/types/registry/derived.d.ts +5 -0
  51. package/dist/types/registry/firepass.d.ts +16 -0
  52. package/dist/types/registry/fireworks.d.ts +7 -0
  53. package/dist/types/registry/github-copilot.d.ts +7 -0
  54. package/dist/types/registry/gitlab-duo.d.ts +9 -0
  55. package/dist/types/registry/google-antigravity.d.ts +9 -0
  56. package/dist/types/registry/google-gemini-cli.d.ts +9 -0
  57. package/dist/types/registry/google-vertex.d.ts +5 -0
  58. package/dist/types/registry/google.d.ts +4 -0
  59. package/dist/types/registry/groq.d.ts +4 -0
  60. package/dist/types/registry/huggingface.d.ts +7 -0
  61. package/dist/types/registry/index.d.ts +4 -0
  62. package/dist/types/registry/kagi.d.ts +14 -0
  63. package/dist/types/registry/kilo.d.ts +7 -0
  64. package/dist/types/registry/kimi-code.d.ts +7 -0
  65. package/dist/types/registry/litellm.d.ts +13 -0
  66. package/dist/types/registry/lm-studio.d.ts +8 -0
  67. package/dist/types/registry/minimax-code-cn.d.ts +6 -0
  68. package/dist/types/registry/minimax-code.d.ts +6 -0
  69. package/dist/types/registry/minimax.d.ts +4 -0
  70. package/dist/types/registry/mistral.d.ts +4 -0
  71. package/dist/types/registry/moonshot.d.ts +7 -0
  72. package/dist/types/registry/nanogpt.d.ts +7 -0
  73. package/dist/types/registry/nvidia.d.ts +7 -0
  74. package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
  75. package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
  76. package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
  77. package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
  78. package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
  79. package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
  80. package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
  81. package/dist/types/registry/ollama-cloud.d.ts +7 -0
  82. package/dist/types/registry/ollama.d.ts +12 -0
  83. package/dist/types/registry/openai-codex-device.d.ts +8 -0
  84. package/dist/types/registry/openai-codex.d.ts +9 -0
  85. package/dist/types/registry/openai.d.ts +4 -0
  86. package/dist/types/registry/opencode-go.d.ts +6 -0
  87. package/dist/types/registry/opencode-zen.d.ts +6 -0
  88. package/dist/types/registry/openrouter.d.ts +13 -0
  89. package/dist/types/registry/parallel.d.ts +14 -0
  90. package/dist/types/registry/perplexity.d.ts +7 -0
  91. package/dist/types/registry/qianfan.d.ts +7 -0
  92. package/dist/types/registry/qwen-portal.d.ts +7 -0
  93. package/dist/types/registry/registry.d.ts +272 -0
  94. package/dist/types/registry/synthetic.d.ts +6 -0
  95. package/dist/types/registry/tavily.d.ts +14 -0
  96. package/dist/types/registry/together.d.ts +6 -0
  97. package/dist/types/registry/types.d.ts +51 -0
  98. package/dist/types/registry/venice.d.ts +13 -0
  99. package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
  100. package/dist/types/registry/vllm.d.ts +7 -0
  101. package/dist/types/registry/wafer-pass.d.ts +6 -0
  102. package/dist/types/registry/wafer-serverless.d.ts +6 -0
  103. package/dist/types/registry/xai-oauth.d.ts +7 -0
  104. package/dist/types/registry/xai.d.ts +4 -0
  105. package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
  106. package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
  107. package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
  108. package/dist/types/registry/xiaomi.d.ts +6 -0
  109. package/dist/types/registry/zai.d.ts +7 -0
  110. package/dist/types/registry/zenmux.d.ts +7 -0
  111. package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
  112. package/dist/types/stream.d.ts +9 -1
  113. package/dist/types/types.d.ts +56 -295
  114. package/dist/types/usage/google-antigravity.d.ts +15 -1
  115. package/dist/types/usage/openai-codex-reset.d.ts +79 -0
  116. package/dist/types/usage/openai-codex.d.ts +1 -0
  117. package/dist/types/usage.d.ts +77 -4
  118. package/dist/types/utils/abort.d.ts +6 -0
  119. package/dist/types/utils/event-stream.d.ts +2 -0
  120. package/dist/types/utils/http-inspector.d.ts +0 -1
  121. package/dist/types/utils/idle-iterator.d.ts +35 -0
  122. package/dist/types/utils/openai-http.d.ts +58 -0
  123. package/dist/types/utils/request-debug.d.ts +3 -0
  124. package/dist/types/utils/retry-after.d.ts +1 -0
  125. package/dist/types/utils/schema/fields.d.ts +5 -0
  126. package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
  127. package/dist/types/utils/schema/stamps.d.ts +7 -15
  128. package/dist/types/utils/sse-debug.d.ts +0 -5
  129. package/dist/types/utils/stream-markup-healing.d.ts +2 -0
  130. package/dist/types/utils.d.ts +1 -5
  131. package/package.json +17 -29
  132. package/src/auth-broker/remote-store.ts +10 -1
  133. package/src/auth-broker/snapshot-cache.ts +1 -1
  134. package/src/auth-broker/wire-schemas.ts +1 -1
  135. package/src/auth-gateway/http.ts +1 -1
  136. package/src/auth-gateway/server.ts +95 -30
  137. package/src/auth-gateway/types.ts +10 -2
  138. package/src/auth-retry.ts +238 -0
  139. package/src/auth-storage.ts +935 -430
  140. package/src/errors.ts +32 -0
  141. package/src/index.ts +9 -14
  142. package/src/provider-details.ts +1 -1
  143. package/src/providers/__tests__/google-auth.test.ts +144 -0
  144. package/src/providers/amazon-bedrock.ts +70 -40
  145. package/src/providers/anthropic-client.ts +15 -13
  146. package/src/providers/anthropic-messages-server-schema.ts +17 -7
  147. package/src/providers/anthropic-messages-server.ts +88 -20
  148. package/src/providers/anthropic-wire.ts +4 -3
  149. package/src/providers/anthropic.ts +1234 -621
  150. package/src/providers/aws-credentials.ts +47 -5
  151. package/src/providers/aws-eventstream.ts +5 -0
  152. package/src/providers/azure-openai-responses.ts +117 -67
  153. package/src/providers/cursor.ts +30 -30
  154. package/src/providers/github-copilot-headers.ts +1 -1
  155. package/src/providers/gitlab-duo.ts +36 -29
  156. package/src/providers/google-auth.ts +71 -8
  157. package/src/providers/google-gemini-cli.ts +118 -22
  158. package/src/providers/google-shared.ts +163 -43
  159. package/src/providers/google-types.ts +10 -1
  160. package/src/providers/kimi.ts +1 -1
  161. package/src/providers/mock.ts +11 -3
  162. package/src/providers/ollama.ts +64 -7
  163. package/src/providers/openai-anthropic-shim.ts +17 -8
  164. package/src/providers/openai-chat-server-schema.ts +9 -3
  165. package/src/providers/openai-chat-server.ts +82 -16
  166. package/src/providers/openai-chat-wire.ts +847 -0
  167. package/src/providers/openai-codex/request-transformer.ts +129 -34
  168. package/src/providers/openai-codex/response-handler.ts +22 -1
  169. package/src/providers/openai-codex-responses.ts +699 -247
  170. package/src/providers/openai-completions-compat.ts +8 -308
  171. package/src/providers/openai-completions.ts +416 -267
  172. package/src/providers/openai-responses-server-schema.ts +15 -9
  173. package/src/providers/openai-responses-server.ts +162 -114
  174. package/src/providers/openai-responses-shared.ts +320 -82
  175. package/src/providers/openai-responses-wire.ts +6391 -0
  176. package/src/providers/openai-responses.ts +382 -176
  177. package/src/providers/prometheus-native-client.ts +27 -11
  178. package/src/providers/prometheus-native-server.ts +44 -17
  179. package/src/providers/transform-messages.ts +311 -120
  180. package/src/providers/vision-guard.ts +5 -3
  181. package/src/rate-limit-utils.ts +13 -3
  182. package/src/registry/aimlapi.ts +6 -0
  183. package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
  184. package/src/registry/amazon-bedrock.ts +22 -0
  185. package/src/registry/anthropic.ts +26 -0
  186. package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
  187. package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
  188. package/src/{utils/oauth → registry}/cerebras.ts +8 -1
  189. package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
  190. package/src/registry/cursor.ts +20 -0
  191. package/src/{utils/oauth → registry}/deepseek.ts +9 -17
  192. package/src/registry/derived.ts +9 -0
  193. package/src/{utils/oauth → registry}/firepass.ts +10 -2
  194. package/src/{utils/oauth → registry}/fireworks.ts +8 -1
  195. package/src/registry/github-copilot.ts +22 -0
  196. package/src/registry/gitlab-duo.ts +19 -0
  197. package/src/registry/google-antigravity.ts +21 -0
  198. package/src/registry/google-gemini-cli.ts +21 -0
  199. package/src/registry/google-vertex.ts +38 -0
  200. package/src/registry/google.ts +6 -0
  201. package/src/registry/groq.ts +6 -0
  202. package/src/{utils/oauth → registry}/huggingface.ts +8 -19
  203. package/src/registry/index.ts +4 -0
  204. package/src/{utils/oauth → registry}/kagi.ts +9 -11
  205. package/src/{utils/oauth → registry}/kilo.ts +11 -6
  206. package/src/registry/kimi-code.ts +17 -0
  207. package/src/{utils/oauth → registry}/litellm.ts +8 -12
  208. package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
  209. package/src/registry/minimax-code-cn.ts +12 -0
  210. package/src/registry/minimax-code.ts +12 -0
  211. package/src/registry/minimax.ts +6 -0
  212. package/src/registry/mistral.ts +6 -0
  213. package/src/{utils/oauth → registry}/moonshot.ts +8 -9
  214. package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
  215. package/src/{utils/oauth → registry}/nvidia.ts +8 -18
  216. package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
  217. package/src/{utils → registry}/oauth/anthropic.ts +38 -17
  218. package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
  219. package/src/registry/oauth/gitlab-duo.ts +198 -0
  220. package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
  221. package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
  222. package/src/registry/oauth/index.ts +164 -0
  223. package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
  224. package/src/{utils → registry}/oauth/types.ts +7 -51
  225. package/src/{utils → registry}/oauth/wafer.ts +1 -1
  226. package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
  227. package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
  228. package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
  229. package/src/{utils/oauth → registry}/ollama.ts +8 -13
  230. package/src/registry/openai-codex-device.ts +18 -0
  231. package/src/registry/openai-codex.ts +19 -0
  232. package/src/registry/openai.ts +6 -0
  233. package/src/registry/opencode-go.ts +12 -0
  234. package/src/registry/opencode-zen.ts +12 -0
  235. package/src/{utils/oauth → registry}/openrouter.ts +10 -2
  236. package/src/{utils/oauth → registry}/parallel.ts +9 -11
  237. package/src/registry/perplexity.ts +13 -0
  238. package/src/{utils/oauth → registry}/qianfan.ts +8 -17
  239. package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
  240. package/src/registry/registry.ts +149 -0
  241. package/src/{utils/oauth → registry}/synthetic.ts +7 -1
  242. package/src/{utils/oauth → registry}/tavily.ts +10 -12
  243. package/src/{utils/oauth → registry}/together.ts +7 -1
  244. package/src/registry/types.ts +56 -0
  245. package/src/{utils/oauth → registry}/venice.ts +8 -12
  246. package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
  247. package/src/{utils/oauth → registry}/vllm.ts +9 -16
  248. package/src/registry/wafer-pass.ts +12 -0
  249. package/src/registry/wafer-serverless.ts +12 -0
  250. package/src/registry/xai-oauth.ts +17 -0
  251. package/src/registry/xai.ts +6 -0
  252. package/src/registry/xiaomi-token-plan-ams.ts +12 -0
  253. package/src/registry/xiaomi-token-plan-cn.ts +12 -0
  254. package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
  255. package/src/registry/xiaomi.ts +12 -0
  256. package/src/{utils/oauth → registry}/zai.ts +10 -22
  257. package/src/{utils/oauth → registry}/zenmux.ts +8 -1
  258. package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
  259. package/src/stream.ts +229 -199
  260. package/src/types.ts +63 -384
  261. package/src/usage/claude.ts +4 -2
  262. package/src/usage/github-copilot.ts +4 -2
  263. package/src/usage/google-antigravity.ts +196 -28
  264. package/src/usage/kimi.ts +1 -1
  265. package/src/usage/minimax-code.ts +5 -6
  266. package/src/usage/openai-codex-reset.ts +174 -0
  267. package/src/usage/openai-codex.ts +19 -2
  268. package/src/usage/zai.ts +2 -1
  269. package/src/usage.ts +93 -4
  270. package/src/utils/abort.ts +14 -0
  271. package/src/utils/event-stream.ts +17 -0
  272. package/src/utils/http-inspector.ts +4 -12
  273. package/src/utils/idle-iterator.ts +250 -79
  274. package/src/utils/openai-http.ts +157 -0
  275. package/src/utils/request-debug.ts +67 -19
  276. package/src/utils/retry-after.ts +1 -1
  277. package/src/utils/retry.ts +23 -2
  278. package/src/utils/schema/CONSTRAINTS.md +4 -2
  279. package/src/utils/schema/fields.ts +16 -0
  280. package/src/utils/schema/json-schema-validator.ts +19 -1
  281. package/src/utils/schema/normalize.ts +80 -8
  282. package/src/utils/schema/stamps.ts +22 -10
  283. package/src/utils/schema/wire.ts +2 -2
  284. package/src/utils/sse-debug.ts +0 -271
  285. package/src/utils/stream-markup-healing.ts +50 -8
  286. package/src/utils/validation.ts +49 -13
  287. package/src/utils.ts +2 -26
  288. package/dist/types/model-cache.d.ts +0 -17
  289. package/dist/types/model-manager.d.ts +0 -64
  290. package/dist/types/model-thinking.d.ts +0 -100
  291. package/dist/types/models.d.ts +0 -12
  292. package/dist/types/provider-models/bundled-references.d.ts +0 -4
  293. package/dist/types/provider-models/descriptors.d.ts +0 -50
  294. package/dist/types/provider-models/google.d.ts +0 -24
  295. package/dist/types/provider-models/index.d.ts +0 -5
  296. package/dist/types/provider-models/ollama.d.ts +0 -7
  297. package/dist/types/provider-models/openai-compat.d.ts +0 -323
  298. package/dist/types/provider-models/special.d.ts +0 -16
  299. package/dist/types/utils/discovery/antigravity.d.ts +0 -61
  300. package/dist/types/utils/discovery/codex.d.ts +0 -38
  301. package/dist/types/utils/discovery/cursor.d.ts +0 -23
  302. package/dist/types/utils/discovery/gemini.d.ts +0 -25
  303. package/dist/types/utils/discovery/index.d.ts +0 -4
  304. package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
  305. package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
  306. package/dist/types/utils/oauth/cerebras.d.ts +0 -1
  307. package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
  308. package/dist/types/utils/oauth/deepseek.d.ts +0 -10
  309. package/dist/types/utils/oauth/firepass.d.ts +0 -1
  310. package/dist/types/utils/oauth/fireworks.d.ts +0 -1
  311. package/dist/types/utils/oauth/huggingface.d.ts +0 -19
  312. package/dist/types/utils/oauth/kagi.d.ts +0 -17
  313. package/dist/types/utils/oauth/kilo.d.ts +0 -5
  314. package/dist/types/utils/oauth/litellm.d.ts +0 -18
  315. package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
  316. package/dist/types/utils/oauth/moonshot.d.ts +0 -1
  317. package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
  318. package/dist/types/utils/oauth/nvidia.d.ts +0 -18
  319. package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
  320. package/dist/types/utils/oauth/ollama.d.ts +0 -18
  321. package/dist/types/utils/oauth/openrouter.d.ts +0 -1
  322. package/dist/types/utils/oauth/parallel.d.ts +0 -17
  323. package/dist/types/utils/oauth/qianfan.d.ts +0 -17
  324. package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
  325. package/dist/types/utils/oauth/synthetic.d.ts +0 -1
  326. package/dist/types/utils/oauth/tavily.d.ts +0 -17
  327. package/dist/types/utils/oauth/together.d.ts +0 -1
  328. package/dist/types/utils/oauth/venice.d.ts +0 -18
  329. package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
  330. package/dist/types/utils/oauth/vllm.d.ts +0 -16
  331. package/dist/types/utils/oauth/zai.d.ts +0 -18
  332. package/dist/types/utils/oauth/zenmux.d.ts +0 -1
  333. package/dist/types/utils/oauth/zhipu.d.ts +0 -18
  334. package/src/model-cache.ts +0 -129
  335. package/src/model-manager.ts +0 -469
  336. package/src/model-thinking.ts +0 -756
  337. package/src/models.json +0 -60287
  338. package/src/models.json.d.ts +0 -9
  339. package/src/models.ts +0 -56
  340. package/src/provider-models/bundled-references.ts +0 -38
  341. package/src/provider-models/descriptors.ts +0 -364
  342. package/src/provider-models/google.ts +0 -88
  343. package/src/provider-models/index.ts +0 -5
  344. package/src/provider-models/ollama.ts +0 -153
  345. package/src/provider-models/openai-compat.ts +0 -2904
  346. package/src/provider-models/special.ts +0 -67
  347. package/src/utils/discovery/antigravity.ts +0 -261
  348. package/src/utils/discovery/codex.ts +0 -371
  349. package/src/utils/discovery/cursor.ts +0 -306
  350. package/src/utils/discovery/gemini.ts +0 -248
  351. package/src/utils/discovery/index.ts +0 -4
  352. package/src/utils/discovery/openai-compatible.ts +0 -224
  353. package/src/utils/oauth/gitlab-duo.ts +0 -123
  354. package/src/utils/oauth/index.ts +0 -502
  355. /package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
  356. /package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
  357. /package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
  358. /package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
  359. /package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
  360. /package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
  361. /package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
  362. /package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
  363. /package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
  364. /package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
  365. /package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
  366. /package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
  367. /package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
  368. /package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
  369. /package/src/{utils → registry}/oauth/callback-server.ts +0 -0
  370. /package/src/{utils → registry}/oauth/cursor.ts +0 -0
  371. /package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
  372. /package/src/{utils → registry}/oauth/kimi.ts +0 -0
  373. /package/src/{utils → registry}/oauth/oauth.html +0 -0
  374. /package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
  375. /package/src/{utils → registry}/oauth/opencode.ts +0 -0
  376. /package/src/{utils → registry}/oauth/perplexity.ts +0 -0
  377. /package/src/{utils → registry}/oauth/pkce.ts +0 -0
@@ -1,22 +1,11 @@
1
- import { structuredCloneJSON } from "@prometheus-ai/utils";
2
- import type OpenAI from "openai";
3
- import type {
4
- ResponseCustomToolCall,
5
- ResponseFunctionToolCall,
6
- ResponseInput,
7
- ResponseInputContent,
8
- ResponseInputImage,
9
- ResponseInputText,
10
- ResponseOutputItem,
11
- ResponseOutputMessage,
12
- ResponseReasoningItem,
13
- } from "openai/resources/responses/responses";
14
- import { calculateCost } from "../models";
1
+ import { calculateCost } from "@prometheus-ai/catalog/models";
2
+ import { logger, structuredCloneJSON } from "@prometheus-ai/utils";
15
3
  import {
16
4
  type Api,
17
5
  type AssistantMessage,
18
6
  type ImageContent,
19
7
  type Model,
8
+ OPENAI_MAX_OUTPUT_TOKENS,
20
9
  resolveServiceTier,
21
10
  type ServiceTier,
22
11
  type StopReason,
@@ -31,6 +20,20 @@ import {
31
20
  import { normalizeResponsesToolCallId } from "../utils";
32
21
  import type { AssistantMessageEventStream } from "../utils/event-stream";
33
22
  import { parseStreamingJson, parseStreamingJsonThrottled } from "../utils/json-parse";
23
+ import type {
24
+ ResponseCreateParamsStreaming,
25
+ ResponseCustomToolCall,
26
+ ResponseFunctionToolCall,
27
+ ResponseInput,
28
+ ResponseInputContent,
29
+ ResponseInputImage,
30
+ ResponseInputText,
31
+ ResponseOutputItem,
32
+ ResponseOutputMessage,
33
+ ResponseReasoningItem,
34
+ ResponseStatus,
35
+ ResponseStreamEvent,
36
+ } from "./openai-responses-wire";
34
37
  import { joinTextWithImagePlaceholder, NON_VISION_IMAGE_PLACEHOLDER, partitionVisionContent } from "./vision-guard";
35
38
  export const OPENAI_RESPONSES_PROGRESS_EVENT_TYPES: ReadonlySet<string> = new Set([
36
39
  "response.created",
@@ -48,6 +51,7 @@ export const OPENAI_RESPONSES_PROGRESS_EVENT_TYPES: ReadonlySet<string> = new Se
48
51
  "response.custom_tool_call_input.done",
49
52
  "response.output_item.done",
50
53
  "response.completed",
54
+ "response.incomplete",
51
55
  "response.failed",
52
56
  "error",
53
57
  ]);
@@ -212,6 +216,59 @@ export function repairOrphanResponsesToolOutputs(input: ResponseInput): Response
212
216
  });
213
217
  }
214
218
 
219
+ /** Placeholder output for a tool call whose result is absent from the input. */
220
+ const ORPHAN_TOOL_CALL_PLACEHOLDER =
221
+ "[No tool output recorded: the tool call was interrupted before it produced a result.]";
222
+
223
+ /**
224
+ * Synthesize a placeholder `function_call_output` / `custom_tool_call_output`
225
+ * for every `function_call` / `custom_tool_call` whose `call_id` has no matching
226
+ * output later in the same input. The Responses API rejects an unpaired call
227
+ * with `400 No tool output found for function call …`.
228
+ *
229
+ * Orphan calls surface when the user branches/navigates the session tree to a
230
+ * node that ends on a tool call (the tool-result child is excluded from the
231
+ * reconstructed history) or when a turn is aborted/crashes after the call
232
+ * streamed but before its result persisted. Dropping the call would erase the
233
+ * assistant's action; a placeholder output keeps the call visible so the model
234
+ * can recover (e.g. re-issue the call). Symmetric to
235
+ * {@link repairOrphanResponsesToolOutputs}.
236
+ */
237
+ export function repairOrphanResponsesToolCalls(input: ResponseInput): ResponseInput {
238
+ const outputCallIds = new Set<string>();
239
+ for (const item of input) {
240
+ const t = (item as { type?: string }).type;
241
+ if (t !== "function_call_output" && t !== "custom_tool_call_output") continue;
242
+ const callId = (item as { call_id?: unknown }).call_id;
243
+ if (typeof callId === "string") outputCallIds.add(callId);
244
+ }
245
+ let hasOrphan = false;
246
+ for (const item of input) {
247
+ const t = (item as { type?: string }).type;
248
+ if (t !== "function_call" && t !== "custom_tool_call") continue;
249
+ const callId = (item as { call_id?: unknown }).call_id;
250
+ if (typeof callId === "string" && !outputCallIds.has(callId)) {
251
+ hasOrphan = true;
252
+ break;
253
+ }
254
+ }
255
+ if (!hasOrphan) return input;
256
+ const repaired: ResponseInput = [];
257
+ for (const item of input) {
258
+ repaired.push(item);
259
+ const t = (item as { type?: string }).type;
260
+ if (t !== "function_call" && t !== "custom_tool_call") continue;
261
+ const callId = (item as { call_id?: unknown }).call_id;
262
+ if (typeof callId !== "string" || outputCallIds.has(callId)) continue;
263
+ repaired.push({
264
+ type: t === "custom_tool_call" ? "custom_tool_call_output" : "function_call_output",
265
+ call_id: callId,
266
+ output: ORPHAN_TOOL_CALL_PLACEHOLDER,
267
+ } as ResponseInput[number]);
268
+ }
269
+ return repaired;
270
+ }
271
+
215
272
  export function convertResponsesInputContent(
216
273
  content: string | Array<TextContent | ImageContent>,
217
274
  supportsImages: boolean,
@@ -234,7 +291,7 @@ export function convertResponsesInputContent(
234
291
  for (const item of imageBlocks) {
235
292
  normalizedContent.push({
236
293
  type: "input_image",
237
- detail: "auto",
294
+ detail: item.detail ?? "auto",
238
295
  image_url: `data:${item.mimeType};base64,${item.data}`,
239
296
  } satisfies ResponseInputImage);
240
297
  }
@@ -256,6 +313,7 @@ export function convertResponsesAssistantMessage<TApi extends Api>(
256
313
  customCallIds?: Set<string>,
257
314
  ): ResponseInput {
258
315
  const outputItems: ResponseInput = [];
316
+ let unsignedTextBlocks = 0;
259
317
  const isDifferentModel =
260
318
  assistantMsg.model !== model.id && assistantMsg.provider === model.provider && assistantMsg.api === model.api;
261
319
 
@@ -265,7 +323,12 @@ export function convertResponsesAssistantMessage<TApi extends Api>(
265
323
  continue;
266
324
  }
267
325
  if (block.thinkingSignature) {
268
- outputItems.push(JSON.parse(block.thinkingSignature) as ResponseReasoningItem);
326
+ try {
327
+ outputItems.push(JSON.parse(block.thinkingSignature) as ResponseReasoningItem);
328
+ } catch {
329
+ // Legacy/corrupt persisted signature — skip the reasoning item
330
+ // rather than failing the whole request build.
331
+ }
269
332
  }
270
333
  continue;
271
334
  }
@@ -274,7 +337,10 @@ export function convertResponsesAssistantMessage<TApi extends Api>(
274
337
  const parsedSignature = parseTextSignature(block.textSignature);
275
338
  let msgId = parsedSignature?.id;
276
339
  if (!msgId) {
277
- msgId = `msg_${msgIndex}`;
340
+ // Distinct ids per unsigned block: several text blocks in one message
341
+ // (cross-provider replay downgrades thinking → text) must not share an id.
342
+ msgId = unsignedTextBlocks === 0 ? `msg_${msgIndex}` : `msg_${msgIndex}_${unsignedTextBlocks}`;
343
+ unsignedTextBlocks += 1;
278
344
  } else if (msgId.length > 64) {
279
345
  msgId = `msg_${Bun.hash(msgId).toString(36)}`;
280
346
  }
@@ -339,10 +405,6 @@ export function appendResponsesToolResultMessages<TApi extends Api>(
339
405
  const hasImages = toolResult.content.some((block): block is ImageContent => block.type === "image");
340
406
  const omittedImages = hasImages && !supportsImages;
341
407
  const normalized = normalizeResponsesToolCallId(toolResult.toolCallId);
342
- if (strictResponsesPairing && !knownCallIds.has(normalized.callId)) {
343
- return;
344
- }
345
-
346
408
  const output = (
347
409
  omittedImages
348
410
  ? joinTextWithImagePlaceholder(textResult, true)
@@ -350,6 +412,19 @@ export function appendResponsesToolResultMessages<TApi extends Api>(
350
412
  ? textResult
351
413
  : "(see attached image)"
352
414
  ).toWellFormed();
415
+ if (strictResponsesPairing && !knownCallIds.has(normalized.callId)) {
416
+ // Strict backends (Azure, Copilot) reject unpaired outputs outright, but
417
+ // silently dropping the result loses information the model needs. Fold it
418
+ // into an assistant note instead (same shape as repairOrphanResponsesToolOutputs).
419
+ const limit = 16_000;
420
+ const noteText = output.length > limit ? `${output.slice(0, limit)}\n...[truncated]` : output;
421
+ messages.push({
422
+ type: "message",
423
+ role: "assistant",
424
+ content: `[Orphan ${toolResult.toolName || "tool"} result; call_id=${normalized.callId}]: ${noteText}`,
425
+ } as ResponseInput[number]);
426
+ return;
427
+ }
353
428
  if (customCallIds?.has(normalized.callId)) {
354
429
  messages.push({
355
430
  type: "custom_tool_call_output",
@@ -375,7 +450,7 @@ export function appendResponsesToolResultMessages<TApi extends Api>(
375
450
  if (block.type === "image") {
376
451
  contentParts.push({
377
452
  type: "input_image",
378
- detail: "auto",
453
+ detail: block.detail ?? "auto",
379
454
  image_url: `data:${block.mimeType};base64,${block.data}`,
380
455
  } satisfies ResponseInputImage);
381
456
  }
@@ -386,10 +461,18 @@ export function appendResponsesToolResultMessages<TApi extends Api>(
386
461
  export interface ProcessResponsesStreamOptions {
387
462
  onFirstToken?: () => void;
388
463
  onOutputItemDone?: (item: ResponseOutputItem) => void;
464
+ /**
465
+ * Called when a terminal `response.completed` or `response.incomplete` event
466
+ * is successfully processed. Only invoked on the successful-completion path;
467
+ * thrown failure (`response.failed`) and cancellation paths never call this.
468
+ * Used by callers to detect premature stream closure (i.e. the stream ended
469
+ * without a recognized terminal event).
470
+ */
471
+ onCompleted?: () => void;
389
472
  }
390
473
 
391
474
  export async function processResponsesStream<TApi extends Api>(
392
- openaiStream: AsyncIterable<OpenAI.Responses.ResponseStreamEvent>,
475
+ openaiStream: AsyncIterable<ResponseStreamEvent>,
393
476
  output: AssistantMessage,
394
477
  stream: AssistantMessageEventStream,
395
478
  model: Model<TApi>,
@@ -403,9 +486,16 @@ export async function processResponsesStream<TApi extends Api>(
403
486
 
404
487
  // Multiple items (parallel function_calls in particular) can be open at the same
405
488
  // time. OpenAI's spec routes every per-item event by `output_index`/`item_id`;
406
- // see https://github.com/uttamtrivedi/Prometheus/issues/1880 — llama.cpp emits parallel
489
+ // see https://github.com/uttamtrivedi/prometheus/issues/1880 — llama.cpp emits parallel
407
490
  // function_call deltas interleaved, and a singleton `current` reference would
408
491
  // fold them into the wrong block and drop arguments on every call but the last.
492
+ //
493
+ // llama.cpp's `to_json_oaicompat_resp` (issue #2015) compounds this: `output_item.added`
494
+ // for function_call/custom_tool_call carries `item.call_id` but no `item.id` and no
495
+ // `output_index`, while the matching `function_call_arguments.delta` carries
496
+ // `item_id = "fc_<call_id>"`. Registering function-call items by `call_id` as a
497
+ // secondary key lets the delta lookup find the right block on hosts that emit one
498
+ // identifier but not the other.
409
499
  const openItemsByOutputIndex = new Map<number, StreamingItem>();
410
500
  const openItemsByItemId = new Map<string, StreamingItem>();
411
501
  let lastOpenItem: StreamingItem | null = null;
@@ -415,9 +505,11 @@ export async function processResponsesStream<TApi extends Api>(
415
505
  outputIndex: number | undefined,
416
506
  itemId: string | undefined,
417
507
  entry: StreamingItem,
508
+ alternateItemKey?: string,
418
509
  ): void => {
419
510
  if (typeof outputIndex === "number") openItemsByOutputIndex.set(outputIndex, entry);
420
511
  if (itemId) openItemsByItemId.set(itemId, entry);
512
+ if (alternateItemKey && alternateItemKey !== itemId) openItemsByItemId.set(alternateItemKey, entry);
421
513
  openItemsInOrder.push(entry);
422
514
  lastOpenItem = entry;
423
515
  };
@@ -455,9 +547,11 @@ export async function processResponsesStream<TApi extends Api>(
455
547
  outputIndex: number | undefined,
456
548
  itemId: string | undefined,
457
549
  entry: StreamingItem | undefined,
550
+ alternateItemKey?: string,
458
551
  ): void => {
459
552
  if (typeof outputIndex === "number") openItemsByOutputIndex.delete(outputIndex);
460
553
  if (itemId) openItemsByItemId.delete(itemId);
554
+ if (alternateItemKey && alternateItemKey !== itemId) openItemsByItemId.delete(alternateItemKey);
461
555
  if (entry) {
462
556
  const index = openItemsInOrder.indexOf(entry);
463
557
  if (index >= 0) openItemsInOrder.splice(index, 1);
@@ -497,7 +591,7 @@ export async function processResponsesStream<TApi extends Api>(
497
591
  partialJson: item.arguments || "",
498
592
  };
499
593
  output.content.push(block);
500
- registerOpenItem(event.output_index, item.id, { item, block });
594
+ registerOpenItem(event.output_index, item.id, { item, block }, item.call_id);
501
595
  stream.push({ type: "toolcall_start", contentIndex: contentIndexOf(block), partial: output });
502
596
  } else if (item.type === "custom_tool_call") {
503
597
  const block: StreamingToolCallBlock = {
@@ -515,7 +609,7 @@ export async function processResponsesStream<TApi extends Api>(
515
609
  partialJson: item.input ?? "",
516
610
  };
517
611
  output.content.push(block);
518
- registerOpenItem(event.output_index, item.id, { item, block });
612
+ registerOpenItem(event.output_index, item.id, { item, block }, item.call_id);
519
613
  stream.push({ type: "toolcall_start", contentIndex: contentIndexOf(block), partial: output });
520
614
  }
521
615
  } else if (event.type === "response.reasoning_summary_part.added") {
@@ -580,32 +674,42 @@ export async function processResponsesStream<TApi extends Api>(
580
674
  } else if (event.type === "response.output_text.delta") {
581
675
  const entry = lookupOpenItem(event);
582
676
  if (entry?.item.type === "message" && entry.block.type === "text") {
583
- const lastPart = entry.item.content?.[entry.item.content.length - 1];
584
- if (lastPart?.type === "output_text") {
585
- entry.block.text += event.delta;
586
- lastPart.text += event.delta;
587
- stream.push({
588
- type: "text_delta",
589
- contentIndex: contentIndexOf(entry.block),
590
- delta: event.delta,
591
- partial: output,
592
- });
677
+ entry.item.content = entry.item.content || [];
678
+ let lastPart = entry.item.content[entry.item.content.length - 1];
679
+ if (lastPart?.type !== "output_text") {
680
+ // `content_part.added` never arrived (lossy proxy) — synthesize the
681
+ // part so live text still streams instead of freezing until the
682
+ // item's output_item.done recovers the final text.
683
+ lastPart = { type: "output_text", text: "", annotations: [] };
684
+ entry.item.content.push(lastPart);
593
685
  }
686
+ entry.block.text += event.delta;
687
+ lastPart.text += event.delta;
688
+ stream.push({
689
+ type: "text_delta",
690
+ contentIndex: contentIndexOf(entry.block),
691
+ delta: event.delta,
692
+ partial: output,
693
+ });
594
694
  }
595
695
  } else if (event.type === "response.refusal.delta") {
596
696
  const entry = lookupOpenItem(event);
597
697
  if (entry?.item.type === "message" && entry.block.type === "text") {
598
- const lastPart = entry.item.content?.[entry.item.content.length - 1];
599
- if (lastPart?.type === "refusal") {
600
- entry.block.text += event.delta;
601
- lastPart.refusal += event.delta;
602
- stream.push({
603
- type: "text_delta",
604
- contentIndex: contentIndexOf(entry.block),
605
- delta: event.delta,
606
- partial: output,
607
- });
698
+ entry.item.content = entry.item.content || [];
699
+ let lastPart = entry.item.content[entry.item.content.length - 1];
700
+ if (lastPart?.type !== "refusal") {
701
+ // Same lossy-proxy hardening as the output_text branch above.
702
+ lastPart = { type: "refusal", refusal: "" };
703
+ entry.item.content.push(lastPart);
608
704
  }
705
+ entry.block.text += event.delta;
706
+ lastPart.refusal += event.delta;
707
+ stream.push({
708
+ type: "text_delta",
709
+ contentIndex: contentIndexOf(entry.block),
710
+ delta: event.delta,
711
+ partial: output,
712
+ });
609
713
  }
610
714
  } else if (event.type === "response.function_call_arguments.delta") {
611
715
  const entry = lookupOpenFunctionCallItem(event);
@@ -656,7 +760,10 @@ export async function processResponsesStream<TApi extends Api>(
656
760
  } else if (event.type === "response.output_item.done") {
657
761
  const item = structuredCloneJSON(event.item);
658
762
  options?.onOutputItemDone?.(item);
659
- const entry = lookupOpenItem({ output_index: event.output_index, item_id: item.id });
763
+ const entry =
764
+ item.type === "function_call" || item.type === "custom_tool_call"
765
+ ? lookupOpenItem({ output_index: event.output_index, item_id: item.id ?? item.call_id })
766
+ : lookupOpenItem({ output_index: event.output_index, item_id: item.id });
660
767
  if (item.type === "reasoning") {
661
768
  const thinking =
662
769
  item.summary?.length > 0
@@ -664,9 +771,15 @@ export async function processResponsesStream<TApi extends Api>(
664
771
  : item.content?.[0]?.type === "reasoning_text"
665
772
  ? (item.content[0].text ?? "")
666
773
  : "";
667
- const reasoningBlock = output.content.find(
668
- b => b.type === "thinking" && (b as ThinkingContent).itemId === item.id,
669
- ) as ThinkingContent | undefined;
774
+ // Prefer the routed entry; the bare itemId find misroutes when ids are
775
+ // absent (`undefined === undefined` matches the FIRST thinking block) and
776
+ // misses entirely when the done-event id drifts from the added-event id.
777
+ const reasoningBlock =
778
+ entry?.block.type === "thinking"
779
+ ? entry.block
780
+ : (output.content.find(b => b.type === "thinking" && (b as ThinkingContent).itemId === item.id) as
781
+ | ThinkingContent
782
+ | undefined);
670
783
  if (reasoningBlock) {
671
784
  reasoningBlock.thinking = thinking;
672
785
  reasoningBlock.thinkingSignature = JSON.stringify(item);
@@ -678,18 +791,25 @@ export async function processResponsesStream<TApi extends Api>(
678
791
  });
679
792
  }
680
793
  closeOpenItem(event.output_index, item.id, entry);
681
- } else if (item.type === "message" && entry?.block.type === "text") {
682
- const block = entry.block;
683
- block.text = item.content
794
+ } else if (item.type === "message") {
795
+ const block = entry?.block.type === "text" ? entry.block : undefined;
796
+ const text = item.content
684
797
  .map(part => (part.type === "output_text" ? (part.text ?? "") : (part.refusal ?? "")))
685
798
  .join("");
686
- block.textSignature = encodeTextSignatureV1(item.id, item.phase ?? undefined);
687
- stream.push({
688
- type: "text_end",
689
- contentIndex: contentIndexOf(block),
690
- content: block.text,
691
- partial: output,
692
- });
799
+ const textSignature = encodeTextSignatureV1(item.id, item.phase ?? undefined);
800
+ let contentIndex: number;
801
+ if (block) {
802
+ block.text = text;
803
+ block.textSignature = textSignature;
804
+ contentIndex = contentIndexOf(block);
805
+ } else {
806
+ // `output_item.added` never arrived (lossy proxy) — synthesize the
807
+ // block so the final message still carries the authoritative text.
808
+ const synthesized: TextContent = { type: "text", text, textSignature };
809
+ output.content.push(synthesized);
810
+ contentIndex = output.content.length - 1;
811
+ }
812
+ stream.push({ type: "text_end", contentIndex, content: text, partial: output });
693
813
  closeOpenItem(event.output_index, item.id, entry);
694
814
  } else if (item.type === "function_call") {
695
815
  const block = entry?.block.type === "toolCall" ? entry.block : undefined;
@@ -704,6 +824,7 @@ export async function processResponsesStream<TApi extends Api>(
704
824
  name: item.name,
705
825
  arguments: args,
706
826
  };
827
+ let contentIndex: number;
707
828
  if (block) {
708
829
  // Persist the authoritative final args on the stored block. The
709
830
  // throttled delta parser may have skipped the last partial parse,
@@ -713,9 +834,15 @@ export async function processResponsesStream<TApi extends Api>(
713
834
  delete (block as { partialJson?: string }).partialJson;
714
835
  delete (block as { lastParseLen?: number }).lastParseLen;
715
836
  delete (block as { argumentsDone?: boolean }).argumentsDone;
837
+ contentIndex = contentIndexOf(block);
838
+ } else {
839
+ // `output_item.added` never arrived (lossy proxy) — synthesize the
840
+ // block so the final message carries the call the consumer was told
841
+ // completed (the agent loop executes tools from message.content).
842
+ output.content.push(toolCall);
843
+ contentIndex = output.content.length - 1;
716
844
  }
717
- const contentIndex = block ? contentIndexOf(block) : output.content.length - 1;
718
- closeOpenItem(event.output_index, item.id, entry);
845
+ closeOpenItem(event.output_index, item.id, entry, item.call_id);
719
846
  stream.push({ type: "toolcall_end", contentIndex, toolCall, partial: output });
720
847
  } else if (item.type === "custom_tool_call") {
721
848
  const block = entry?.block.type === "toolCall" ? entry.block : undefined;
@@ -727,12 +854,39 @@ export async function processResponsesStream<TApi extends Api>(
727
854
  arguments: { input: rawInput },
728
855
  customWireName: item.name,
729
856
  };
730
- const contentIndex = block ? contentIndexOf(block) : output.content.length - 1;
731
- closeOpenItem(event.output_index, item.id, entry);
857
+ let contentIndex: number;
858
+ if (block) {
859
+ // Persist the final input on the stored block and drop the transient
860
+ // accumulation buffer, mirroring the function_call branch above.
861
+ block.arguments = { input: rawInput };
862
+ delete (block as { partialJson?: string }).partialJson;
863
+ delete (block as { lastParseLen?: number }).lastParseLen;
864
+ contentIndex = contentIndexOf(block);
865
+ } else {
866
+ output.content.push(toolCall);
867
+ contentIndex = output.content.length - 1;
868
+ }
869
+ closeOpenItem(event.output_index, item.id, entry, item.call_id);
732
870
  stream.push({ type: "toolcall_end", contentIndex, toolCall, partial: output });
733
871
  }
734
- } else if (event.type === "response.completed") {
872
+ } else if (event.type === "response.completed" || event.type === "response.incomplete") {
735
873
  const response = event.response;
874
+ // Finalize any toolCall block whose output_item.done never arrived: the
875
+ // throttled delta parser may have left block.arguments stale, and the
876
+ // toolUse override below would hand the agent incomplete arguments.
877
+ for (const open of openItemsInOrder) {
878
+ if (open.block.type !== "toolCall") continue;
879
+ const block = open.block;
880
+ if (block.partialJson && !block.argumentsDone) {
881
+ block.arguments =
882
+ open.item.type === "custom_tool_call"
883
+ ? { input: block.partialJson }
884
+ : parseStreamingJson(block.partialJson);
885
+ }
886
+ delete (block as { partialJson?: string }).partialJson;
887
+ delete (block as { lastParseLen?: number }).lastParseLen;
888
+ delete (block as { argumentsDone?: boolean }).argumentsDone;
889
+ }
736
890
  if (response?.id) {
737
891
  output.responseId = response.id;
738
892
  }
@@ -752,12 +906,40 @@ export async function processResponsesStream<TApi extends Api>(
752
906
  : "Unknown error (no error details in response)";
753
907
  throw new Error(message);
754
908
  }
909
+ if (response?.status === "incomplete" && response.incomplete_details?.reason === "content_filter") {
910
+ // A content-filtered turn is a failure, not a token-cap truncation —
911
+ // mapping it to "length" would route the agent loop into "shorten your
912
+ // output" recovery against a filtered prompt.
913
+ throw new Error("incomplete: content_filter");
914
+ }
755
915
  if (output.content.some(block => block.type === "toolCall") && output.stopReason === "stop") {
756
916
  output.stopReason = "toolUse";
757
917
  }
918
+ // Codex-lineage backends/gateways mark an unfinished turn with
919
+ // `end_turn: false` on the terminal event (the response ended on
920
+ // commentary only). Not in the SDK types or the platform API today —
921
+ // inert when absent. Same mapping as openai-codex-responses: surface a
922
+ // non-terminal stop so the agent loop re-samples instead of ending the
923
+ // turn.
924
+ if ((response as { end_turn?: boolean } | undefined)?.end_turn === false && output.stopReason === "stop") {
925
+ output.stopDetails = { type: "pause_turn" };
926
+ }
927
+ options?.onCompleted?.();
928
+ // `response.completed`/`response.incomplete` is the last event of a
929
+ // Responses stream. Stop pulling instead of waiting for the server to
930
+ // close the connection: misbehaving providers keep the socket open
931
+ // after the terminal event, which would park this loop until the idle
932
+ // watchdog converts an already-successful turn into a timeout error.
933
+ // Breaking unwinds the iterator chain (the consumer's `.return()`
934
+ // reaches the SDK stream), actively releasing the connection.
935
+ break;
758
936
  } else if (event.type === "error") {
759
- throw new Error(`Error Code ${event.code}: ${event.message}` || "Unknown error");
937
+ const err = (event as any).error ?? event;
938
+ const code = err.code ?? "unknown";
939
+ const message = err.message ?? "no message";
940
+ throw new Error(`Error Code ${code}: ${message}`);
760
941
  } else if (event.type === "response.failed") {
942
+ populateResponsesUsageFromResponse(output, event.response?.usage);
761
943
  const error = event.response?.error ?? (event.response as any)?.status_details?.error;
762
944
  const details = event.response?.incomplete_details;
763
945
  const message = error
@@ -770,7 +952,7 @@ export async function processResponsesStream<TApi extends Api>(
770
952
  }
771
953
  }
772
954
 
773
- export function mapOpenAIResponsesStopReason(status: OpenAI.Responses.ResponseStatus | undefined): StopReason {
955
+ export function mapOpenAIResponsesStopReason(status: ResponseStatus | undefined): StopReason {
774
956
  if (!status) return "stop";
775
957
  switch (status) {
776
958
  case "completed":
@@ -784,8 +966,12 @@ export function mapOpenAIResponsesStopReason(status: OpenAI.Responses.ResponseSt
784
966
  case "queued":
785
967
  return "stop";
786
968
  default: {
969
+ // Compile-time exhaustiveness; at runtime a brand-new status from the
970
+ // server must degrade gracefully instead of failing a fully-streamed
971
+ // response.
787
972
  const exhaustive: never = status;
788
- throw new Error(`Unhandled stop reason: ${exhaustive}`);
973
+ logger.warn("Unhandled OpenAI Responses stop reason", { status: exhaustive });
974
+ return "stop";
789
975
  }
790
976
  }
791
977
  }
@@ -820,7 +1006,7 @@ export type ResponsesSamplingParamsExtras = {
820
1006
  repetition_penalty?: number;
821
1007
  };
822
1008
 
823
- type CommonResponsesParams = OpenAI.Responses.ResponseCreateParamsStreaming & ResponsesSamplingParamsExtras;
1009
+ type CommonResponsesParams = ResponseCreateParamsStreaming & ResponsesSamplingParamsExtras;
824
1010
 
825
1011
  type CommonSamplingOptions = Pick<
826
1012
  StreamOptions,
@@ -839,9 +1025,15 @@ type CommonSamplingOptions = Pick<
839
1025
  export function applyCommonResponsesSamplingParams<P extends CommonResponsesParams>(
840
1026
  params: P,
841
1027
  options: CommonSamplingOptions | undefined,
842
- model: Pick<Model, "provider" | "omitMaxOutputTokens">,
1028
+ model: Pick<Model, "provider" | "omitMaxOutputTokens" | "maxTokens">,
843
1029
  ): void {
844
- if (options?.maxTokens && !model.omitMaxOutputTokens) params.max_output_tokens = options.maxTokens;
1030
+ if (options?.maxTokens && !model.omitMaxOutputTokens) {
1031
+ params.max_output_tokens = Math.min(
1032
+ options.maxTokens,
1033
+ model.maxTokens ?? Number.POSITIVE_INFINITY,
1034
+ OPENAI_MAX_OUTPUT_TOKENS,
1035
+ );
1036
+ }
845
1037
  if (options?.temperature !== undefined) params.temperature = options.temperature;
846
1038
  if (options?.topP !== undefined) params.top_p = options.topP;
847
1039
  if (options?.topK !== undefined) params.top_k = options.topK;
@@ -863,8 +1055,12 @@ type ReasoningOptions = {
863
1055
 
864
1056
  /**
865
1057
  * Apply reasoning-related Responses parameters: enable encrypted reasoning content for replay,
866
- * set effort/summary when requested, and otherwise inject the GPT-5 "Juice: 0" no-reasoning hack.
867
- * Mutates `params` and may push a developer message into `messages`.
1058
+ * set effort/summary when requested, and otherwise inject the "Juice: 0" no-reasoning hack
1059
+ * when `model.compat.requiresJuiceZeroHack` is set (GPT-5 family by default).
1060
+ * Mutates `params` and may push a developer message into `messages`. Returns
1061
+ * the number of per-turn trailing scaffolding items appended to `messages`
1062
+ * (the "Juice: 0" developer item), so callers doing stateful
1063
+ * `previous_response_id` chaining can exclude them from append-baseline math.
868
1064
  *
869
1065
  * @param omitReasoningEffort - When `true`, suppresses `params.reasoning.effort` from the wire
870
1066
  * body. Set by `xai-responses.ts` via {@link OpenAIResponsesOptions.omitReasoningEffort} for
@@ -875,21 +1071,23 @@ type ReasoningOptions = {
875
1071
  * without needing explicit activation. Callers that pass `options.reasoning` for such models
876
1072
  * should expect this documented downgrade: the model will reason, but at its default effort.
877
1073
  */
878
- export function applyResponsesReasoningParams<P extends OpenAI.Responses.ResponseCreateParamsStreaming>(
1074
+ export function applyResponsesReasoningParams<P extends ResponseCreateParamsStreaming>(
879
1075
  params: P,
880
- model: Model<Api>,
1076
+ model: Model<"openai-responses" | "azure-openai-responses" | "openai-codex-responses">,
881
1077
  options: ReasoningOptions | undefined,
882
1078
  messages: ResponseInput,
883
1079
  mapEffort?: (effort: string) => string,
884
1080
  includeEncryptedReasoning: boolean = true,
885
1081
  omitReasoningEffort: boolean = false,
886
- ): void {
887
- if (!model.reasoning) return;
1082
+ ): number {
1083
+ if (!model.reasoning) return 0;
888
1084
  // Always request encrypted reasoning content so reasoning items can be replayed in
889
1085
  // multi-turn conversations when store is false (items aren't persisted server-side, so
890
- // we must include the full content). See: https://github.com/uttamtrivedi/Prometheus/issues/41
1086
+ // we must include the full content). See: https://github.com/uttamtrivedi/prometheus/issues/41
891
1087
  if (includeEncryptedReasoning) {
892
- params.include = ["reasoning.encrypted_content"];
1088
+ const include = params.include ?? [];
1089
+ if (!include.includes("reasoning.encrypted_content")) include.push("reasoning.encrypted_content");
1090
+ params.include = include;
893
1091
  }
894
1092
 
895
1093
  if (options?.reasoning || options?.reasoningSummary !== undefined) {
@@ -904,12 +1102,12 @@ export function applyResponsesReasoningParams<P extends OpenAI.Responses.Respons
904
1102
  // When only options.reasoning (effort level) is set, params.reasoning
905
1103
  // is intentionally omitted — see @param omitReasoningEffort above.
906
1104
  if (options?.reasoningSummary !== undefined && options?.reasoningSummary !== null) {
907
- type ReasoningParam = NonNullable<OpenAI.Responses.ResponseCreateParamsStreaming["reasoning"]>;
1105
+ type ReasoningParam = NonNullable<ResponseCreateParamsStreaming["reasoning"]>;
908
1106
  params.reasoning = { summary: options.reasoningSummary || "auto" } as P["reasoning"] & ReasoningParam;
909
1107
  }
910
1108
  } else {
911
1109
  const requested = options?.reasoning || "medium";
912
- type ReasoningParam = NonNullable<OpenAI.Responses.ResponseCreateParamsStreaming["reasoning"]>;
1110
+ type ReasoningParam = NonNullable<ResponseCreateParamsStreaming["reasoning"]>;
913
1111
  const reasoningParams: ReasoningParam = {
914
1112
  effort: (mapEffort ? mapEffort(requested) : requested) as ReasoningParam["effort"],
915
1113
  };
@@ -918,13 +1116,15 @@ export function applyResponsesReasoningParams<P extends OpenAI.Responses.Respons
918
1116
  }
919
1117
  params.reasoning = reasoningParams as P["reasoning"];
920
1118
  }
921
- } else if (model.name.toLowerCase().startsWith("gpt-5")) {
1119
+ } else if (model.compat.requiresJuiceZeroHack) {
922
1120
  // Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
923
1121
  messages.push({
924
1122
  role: "developer",
925
1123
  content: [{ type: "input_text", text: "# Juice: 0 !important" }],
926
1124
  });
1125
+ return 1;
927
1126
  }
1127
+ return 0;
928
1128
  }
929
1129
 
930
1130
  /** Populate `output.usage` from a Responses-API `response.usage` payload. Does not invoke `calculateCost`. */
@@ -944,6 +1144,10 @@ export function populateResponsesUsageFromResponse(
944
1144
  if (!usage) return;
945
1145
  const cachedTokens = usage.input_tokens_details?.cached_tokens || 0;
946
1146
  const reasoningTokens = usage.output_tokens_details?.reasoning_tokens || 0;
1147
+ // Wholesale replacement must not drop provider-annotated extras (Copilot
1148
+ // premium-request accounting): the failed/cancelled paths throw right after
1149
+ // this call with no later chance to re-apply.
1150
+ const premiumRequests = output.usage.premiumRequests;
947
1151
  output.usage = {
948
1152
  input: (usage.input_tokens || 0) - cachedTokens,
949
1153
  output: usage.output_tokens || 0,
@@ -953,4 +1157,38 @@ export function populateResponsesUsageFromResponse(
953
1157
  ...(reasoningTokens > 0 ? { reasoningTokens } : {}),
954
1158
  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
955
1159
  };
1160
+ if (premiumRequests !== undefined) {
1161
+ output.usage.premiumRequests = premiumRequests;
1162
+ }
1163
+ }
1164
+
1165
+ /**
1166
+ * Strict-prefix delta for stateful `previous_response_id` chaining (used by the
1167
+ * platform Responses provider and the Codex provider on both transports):
1168
+ * returns the input items the current request appends beyond the previous
1169
+ * request's input plus the previous response's output items, or null when the
1170
+ * request options differ or history mutated (the chain must break). Per-turn
1171
+ * `client_metadata` (e.g. rotating turn ids) is excluded from the option
1172
+ * comparison; codex-rs excludes it from the same check.
1173
+ */
1174
+ export function buildResponsesDeltaInput<TItem>(
1175
+ previous: { input?: unknown } | undefined,
1176
+ previousResponseItems: readonly TItem[] | undefined,
1177
+ current: { input?: unknown },
1178
+ ): TItem[] | null {
1179
+ if (!previous) return null;
1180
+ if (!Array.isArray(previous.input) || !Array.isArray(current.input)) return null;
1181
+ const previousWithoutInput = { ...previous, input: undefined, client_metadata: undefined };
1182
+ const currentWithoutInput = { ...current, input: undefined, client_metadata: undefined };
1183
+ if (JSON.stringify(previousWithoutInput) !== JSON.stringify(currentWithoutInput)) {
1184
+ return null;
1185
+ }
1186
+ const baseline = [...previous.input, ...(previousResponseItems ?? [])];
1187
+ if (current.input.length <= baseline.length) return null;
1188
+ for (let index = 0; index < baseline.length; index += 1) {
1189
+ if (JSON.stringify(baseline[index]) !== JSON.stringify(current.input[index])) {
1190
+ return null;
1191
+ }
1192
+ }
1193
+ return current.input.slice(baseline.length) as TItem[];
956
1194
  }