@prometheus-ai/ai 0.5.4 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/dist/types/auth-broker/remote-store.d.ts +2 -1
  2. package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
  3. package/dist/types/auth-gateway/server.d.ts +19 -0
  4. package/dist/types/auth-gateway/types.d.ts +9 -3
  5. package/dist/types/auth-retry.d.ts +119 -0
  6. package/dist/types/auth-storage.d.ts +217 -8
  7. package/dist/types/errors.d.ts +24 -0
  8. package/dist/types/index.d.ts +5 -9
  9. package/dist/types/provider-details.d.ts +1 -1
  10. package/dist/types/providers/amazon-bedrock.d.ts +12 -6
  11. package/dist/types/providers/anthropic-client.d.ts +10 -3
  12. package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
  13. package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
  14. package/dist/types/providers/anthropic-wire.d.ts +3 -3
  15. package/dist/types/providers/anthropic.d.ts +41 -34
  16. package/dist/types/providers/aws-credentials.d.ts +8 -0
  17. package/dist/types/providers/azure-openai-responses.d.ts +1 -0
  18. package/dist/types/providers/google-gemini-cli.d.ts +22 -1
  19. package/dist/types/providers/google-shared.d.ts +22 -0
  20. package/dist/types/providers/google-types.d.ts +13 -1
  21. package/dist/types/providers/mock.d.ts +8 -3
  22. package/dist/types/providers/ollama.d.ts +6 -0
  23. package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
  24. package/dist/types/providers/openai-chat-server.d.ts +3 -3
  25. package/dist/types/providers/openai-chat-wire.d.ts +644 -0
  26. package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
  27. package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
  28. package/dist/types/providers/openai-codex-responses.d.ts +31 -2
  29. package/dist/types/providers/openai-completions-compat.d.ts +2 -25
  30. package/dist/types/providers/openai-completions.d.ts +2 -10
  31. package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
  32. package/dist/types/providers/openai-responses-server.d.ts +2 -2
  33. package/dist/types/providers/openai-responses-shared.d.ts +49 -9
  34. package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
  35. package/dist/types/providers/openai-responses.d.ts +13 -4
  36. package/dist/types/providers/prometheus-native-client.d.ts +9 -0
  37. package/dist/types/providers/prometheus-native-server.d.ts +4 -3
  38. package/dist/types/providers/transform-messages.d.ts +1 -2
  39. package/dist/types/rate-limit-utils.d.ts +3 -2
  40. package/dist/types/registry/aimlapi.d.ts +4 -0
  41. package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
  42. package/dist/types/registry/amazon-bedrock.d.ts +5 -0
  43. package/dist/types/registry/anthropic.d.ts +10 -0
  44. package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
  45. package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
  46. package/dist/types/registry/cerebras.d.ts +7 -0
  47. package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
  48. package/dist/types/registry/cursor.d.ts +7 -0
  49. package/dist/types/registry/deepseek.d.ts +8 -0
  50. package/dist/types/registry/derived.d.ts +5 -0
  51. package/dist/types/registry/firepass.d.ts +16 -0
  52. package/dist/types/registry/fireworks.d.ts +7 -0
  53. package/dist/types/registry/github-copilot.d.ts +7 -0
  54. package/dist/types/registry/gitlab-duo.d.ts +9 -0
  55. package/dist/types/registry/google-antigravity.d.ts +9 -0
  56. package/dist/types/registry/google-gemini-cli.d.ts +9 -0
  57. package/dist/types/registry/google-vertex.d.ts +5 -0
  58. package/dist/types/registry/google.d.ts +4 -0
  59. package/dist/types/registry/groq.d.ts +4 -0
  60. package/dist/types/registry/huggingface.d.ts +7 -0
  61. package/dist/types/registry/index.d.ts +4 -0
  62. package/dist/types/registry/kagi.d.ts +14 -0
  63. package/dist/types/registry/kilo.d.ts +7 -0
  64. package/dist/types/registry/kimi-code.d.ts +7 -0
  65. package/dist/types/registry/litellm.d.ts +13 -0
  66. package/dist/types/registry/lm-studio.d.ts +8 -0
  67. package/dist/types/registry/minimax-code-cn.d.ts +6 -0
  68. package/dist/types/registry/minimax-code.d.ts +6 -0
  69. package/dist/types/registry/minimax.d.ts +4 -0
  70. package/dist/types/registry/mistral.d.ts +4 -0
  71. package/dist/types/registry/moonshot.d.ts +7 -0
  72. package/dist/types/registry/nanogpt.d.ts +7 -0
  73. package/dist/types/registry/nvidia.d.ts +7 -0
  74. package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
  75. package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
  76. package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
  77. package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
  78. package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
  79. package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
  80. package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
  81. package/dist/types/registry/ollama-cloud.d.ts +7 -0
  82. package/dist/types/registry/ollama.d.ts +12 -0
  83. package/dist/types/registry/openai-codex-device.d.ts +8 -0
  84. package/dist/types/registry/openai-codex.d.ts +9 -0
  85. package/dist/types/registry/openai.d.ts +4 -0
  86. package/dist/types/registry/opencode-go.d.ts +6 -0
  87. package/dist/types/registry/opencode-zen.d.ts +6 -0
  88. package/dist/types/registry/openrouter.d.ts +13 -0
  89. package/dist/types/registry/parallel.d.ts +14 -0
  90. package/dist/types/registry/perplexity.d.ts +7 -0
  91. package/dist/types/registry/qianfan.d.ts +7 -0
  92. package/dist/types/registry/qwen-portal.d.ts +7 -0
  93. package/dist/types/registry/registry.d.ts +272 -0
  94. package/dist/types/registry/synthetic.d.ts +6 -0
  95. package/dist/types/registry/tavily.d.ts +14 -0
  96. package/dist/types/registry/together.d.ts +6 -0
  97. package/dist/types/registry/types.d.ts +51 -0
  98. package/dist/types/registry/venice.d.ts +13 -0
  99. package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
  100. package/dist/types/registry/vllm.d.ts +7 -0
  101. package/dist/types/registry/wafer-pass.d.ts +6 -0
  102. package/dist/types/registry/wafer-serverless.d.ts +6 -0
  103. package/dist/types/registry/xai-oauth.d.ts +7 -0
  104. package/dist/types/registry/xai.d.ts +4 -0
  105. package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
  106. package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
  107. package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
  108. package/dist/types/registry/xiaomi.d.ts +6 -0
  109. package/dist/types/registry/zai.d.ts +7 -0
  110. package/dist/types/registry/zenmux.d.ts +7 -0
  111. package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
  112. package/dist/types/stream.d.ts +9 -1
  113. package/dist/types/types.d.ts +56 -295
  114. package/dist/types/usage/google-antigravity.d.ts +15 -1
  115. package/dist/types/usage/openai-codex-reset.d.ts +79 -0
  116. package/dist/types/usage/openai-codex.d.ts +1 -0
  117. package/dist/types/usage.d.ts +77 -4
  118. package/dist/types/utils/abort.d.ts +6 -0
  119. package/dist/types/utils/event-stream.d.ts +2 -0
  120. package/dist/types/utils/http-inspector.d.ts +0 -1
  121. package/dist/types/utils/idle-iterator.d.ts +35 -0
  122. package/dist/types/utils/openai-http.d.ts +58 -0
  123. package/dist/types/utils/request-debug.d.ts +3 -0
  124. package/dist/types/utils/retry-after.d.ts +1 -0
  125. package/dist/types/utils/schema/fields.d.ts +5 -0
  126. package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
  127. package/dist/types/utils/schema/stamps.d.ts +7 -15
  128. package/dist/types/utils/sse-debug.d.ts +0 -5
  129. package/dist/types/utils/stream-markup-healing.d.ts +2 -0
  130. package/dist/types/utils.d.ts +1 -5
  131. package/package.json +17 -29
  132. package/src/auth-broker/remote-store.ts +10 -1
  133. package/src/auth-broker/snapshot-cache.ts +1 -1
  134. package/src/auth-broker/wire-schemas.ts +1 -1
  135. package/src/auth-gateway/http.ts +1 -1
  136. package/src/auth-gateway/server.ts +95 -30
  137. package/src/auth-gateway/types.ts +10 -2
  138. package/src/auth-retry.ts +238 -0
  139. package/src/auth-storage.ts +935 -430
  140. package/src/errors.ts +32 -0
  141. package/src/index.ts +9 -14
  142. package/src/provider-details.ts +1 -1
  143. package/src/providers/__tests__/google-auth.test.ts +144 -0
  144. package/src/providers/amazon-bedrock.ts +70 -40
  145. package/src/providers/anthropic-client.ts +15 -13
  146. package/src/providers/anthropic-messages-server-schema.ts +17 -7
  147. package/src/providers/anthropic-messages-server.ts +88 -20
  148. package/src/providers/anthropic-wire.ts +4 -3
  149. package/src/providers/anthropic.ts +1234 -621
  150. package/src/providers/aws-credentials.ts +47 -5
  151. package/src/providers/aws-eventstream.ts +5 -0
  152. package/src/providers/azure-openai-responses.ts +117 -67
  153. package/src/providers/cursor.ts +30 -30
  154. package/src/providers/github-copilot-headers.ts +1 -1
  155. package/src/providers/gitlab-duo.ts +36 -29
  156. package/src/providers/google-auth.ts +71 -8
  157. package/src/providers/google-gemini-cli.ts +118 -22
  158. package/src/providers/google-shared.ts +163 -43
  159. package/src/providers/google-types.ts +10 -1
  160. package/src/providers/kimi.ts +1 -1
  161. package/src/providers/mock.ts +11 -3
  162. package/src/providers/ollama.ts +64 -7
  163. package/src/providers/openai-anthropic-shim.ts +17 -8
  164. package/src/providers/openai-chat-server-schema.ts +9 -3
  165. package/src/providers/openai-chat-server.ts +82 -16
  166. package/src/providers/openai-chat-wire.ts +847 -0
  167. package/src/providers/openai-codex/request-transformer.ts +129 -34
  168. package/src/providers/openai-codex/response-handler.ts +22 -1
  169. package/src/providers/openai-codex-responses.ts +699 -247
  170. package/src/providers/openai-completions-compat.ts +8 -308
  171. package/src/providers/openai-completions.ts +416 -267
  172. package/src/providers/openai-responses-server-schema.ts +15 -9
  173. package/src/providers/openai-responses-server.ts +162 -114
  174. package/src/providers/openai-responses-shared.ts +320 -82
  175. package/src/providers/openai-responses-wire.ts +6391 -0
  176. package/src/providers/openai-responses.ts +382 -176
  177. package/src/providers/prometheus-native-client.ts +27 -11
  178. package/src/providers/prometheus-native-server.ts +44 -17
  179. package/src/providers/transform-messages.ts +311 -120
  180. package/src/providers/vision-guard.ts +5 -3
  181. package/src/rate-limit-utils.ts +13 -3
  182. package/src/registry/aimlapi.ts +6 -0
  183. package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
  184. package/src/registry/amazon-bedrock.ts +22 -0
  185. package/src/registry/anthropic.ts +26 -0
  186. package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
  187. package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
  188. package/src/{utils/oauth → registry}/cerebras.ts +8 -1
  189. package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
  190. package/src/registry/cursor.ts +20 -0
  191. package/src/{utils/oauth → registry}/deepseek.ts +9 -17
  192. package/src/registry/derived.ts +9 -0
  193. package/src/{utils/oauth → registry}/firepass.ts +10 -2
  194. package/src/{utils/oauth → registry}/fireworks.ts +8 -1
  195. package/src/registry/github-copilot.ts +22 -0
  196. package/src/registry/gitlab-duo.ts +19 -0
  197. package/src/registry/google-antigravity.ts +21 -0
  198. package/src/registry/google-gemini-cli.ts +21 -0
  199. package/src/registry/google-vertex.ts +38 -0
  200. package/src/registry/google.ts +6 -0
  201. package/src/registry/groq.ts +6 -0
  202. package/src/{utils/oauth → registry}/huggingface.ts +8 -19
  203. package/src/registry/index.ts +4 -0
  204. package/src/{utils/oauth → registry}/kagi.ts +9 -11
  205. package/src/{utils/oauth → registry}/kilo.ts +11 -6
  206. package/src/registry/kimi-code.ts +17 -0
  207. package/src/{utils/oauth → registry}/litellm.ts +8 -12
  208. package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
  209. package/src/registry/minimax-code-cn.ts +12 -0
  210. package/src/registry/minimax-code.ts +12 -0
  211. package/src/registry/minimax.ts +6 -0
  212. package/src/registry/mistral.ts +6 -0
  213. package/src/{utils/oauth → registry}/moonshot.ts +8 -9
  214. package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
  215. package/src/{utils/oauth → registry}/nvidia.ts +8 -18
  216. package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
  217. package/src/{utils → registry}/oauth/anthropic.ts +38 -17
  218. package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
  219. package/src/registry/oauth/gitlab-duo.ts +198 -0
  220. package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
  221. package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
  222. package/src/registry/oauth/index.ts +164 -0
  223. package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
  224. package/src/{utils → registry}/oauth/types.ts +7 -51
  225. package/src/{utils → registry}/oauth/wafer.ts +1 -1
  226. package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
  227. package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
  228. package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
  229. package/src/{utils/oauth → registry}/ollama.ts +8 -13
  230. package/src/registry/openai-codex-device.ts +18 -0
  231. package/src/registry/openai-codex.ts +19 -0
  232. package/src/registry/openai.ts +6 -0
  233. package/src/registry/opencode-go.ts +12 -0
  234. package/src/registry/opencode-zen.ts +12 -0
  235. package/src/{utils/oauth → registry}/openrouter.ts +10 -2
  236. package/src/{utils/oauth → registry}/parallel.ts +9 -11
  237. package/src/registry/perplexity.ts +13 -0
  238. package/src/{utils/oauth → registry}/qianfan.ts +8 -17
  239. package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
  240. package/src/registry/registry.ts +149 -0
  241. package/src/{utils/oauth → registry}/synthetic.ts +7 -1
  242. package/src/{utils/oauth → registry}/tavily.ts +10 -12
  243. package/src/{utils/oauth → registry}/together.ts +7 -1
  244. package/src/registry/types.ts +56 -0
  245. package/src/{utils/oauth → registry}/venice.ts +8 -12
  246. package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
  247. package/src/{utils/oauth → registry}/vllm.ts +9 -16
  248. package/src/registry/wafer-pass.ts +12 -0
  249. package/src/registry/wafer-serverless.ts +12 -0
  250. package/src/registry/xai-oauth.ts +17 -0
  251. package/src/registry/xai.ts +6 -0
  252. package/src/registry/xiaomi-token-plan-ams.ts +12 -0
  253. package/src/registry/xiaomi-token-plan-cn.ts +12 -0
  254. package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
  255. package/src/registry/xiaomi.ts +12 -0
  256. package/src/{utils/oauth → registry}/zai.ts +10 -22
  257. package/src/{utils/oauth → registry}/zenmux.ts +8 -1
  258. package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
  259. package/src/stream.ts +229 -199
  260. package/src/types.ts +63 -384
  261. package/src/usage/claude.ts +4 -2
  262. package/src/usage/github-copilot.ts +4 -2
  263. package/src/usage/google-antigravity.ts +196 -28
  264. package/src/usage/kimi.ts +1 -1
  265. package/src/usage/minimax-code.ts +5 -6
  266. package/src/usage/openai-codex-reset.ts +174 -0
  267. package/src/usage/openai-codex.ts +19 -2
  268. package/src/usage/zai.ts +2 -1
  269. package/src/usage.ts +93 -4
  270. package/src/utils/abort.ts +14 -0
  271. package/src/utils/event-stream.ts +17 -0
  272. package/src/utils/http-inspector.ts +4 -12
  273. package/src/utils/idle-iterator.ts +250 -79
  274. package/src/utils/openai-http.ts +157 -0
  275. package/src/utils/request-debug.ts +67 -19
  276. package/src/utils/retry-after.ts +1 -1
  277. package/src/utils/retry.ts +23 -2
  278. package/src/utils/schema/CONSTRAINTS.md +4 -2
  279. package/src/utils/schema/fields.ts +16 -0
  280. package/src/utils/schema/json-schema-validator.ts +19 -1
  281. package/src/utils/schema/normalize.ts +80 -8
  282. package/src/utils/schema/stamps.ts +22 -10
  283. package/src/utils/schema/wire.ts +2 -2
  284. package/src/utils/sse-debug.ts +0 -271
  285. package/src/utils/stream-markup-healing.ts +50 -8
  286. package/src/utils/validation.ts +49 -13
  287. package/src/utils.ts +2 -26
  288. package/dist/types/model-cache.d.ts +0 -17
  289. package/dist/types/model-manager.d.ts +0 -64
  290. package/dist/types/model-thinking.d.ts +0 -100
  291. package/dist/types/models.d.ts +0 -12
  292. package/dist/types/provider-models/bundled-references.d.ts +0 -4
  293. package/dist/types/provider-models/descriptors.d.ts +0 -50
  294. package/dist/types/provider-models/google.d.ts +0 -24
  295. package/dist/types/provider-models/index.d.ts +0 -5
  296. package/dist/types/provider-models/ollama.d.ts +0 -7
  297. package/dist/types/provider-models/openai-compat.d.ts +0 -323
  298. package/dist/types/provider-models/special.d.ts +0 -16
  299. package/dist/types/utils/discovery/antigravity.d.ts +0 -61
  300. package/dist/types/utils/discovery/codex.d.ts +0 -38
  301. package/dist/types/utils/discovery/cursor.d.ts +0 -23
  302. package/dist/types/utils/discovery/gemini.d.ts +0 -25
  303. package/dist/types/utils/discovery/index.d.ts +0 -4
  304. package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
  305. package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
  306. package/dist/types/utils/oauth/cerebras.d.ts +0 -1
  307. package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
  308. package/dist/types/utils/oauth/deepseek.d.ts +0 -10
  309. package/dist/types/utils/oauth/firepass.d.ts +0 -1
  310. package/dist/types/utils/oauth/fireworks.d.ts +0 -1
  311. package/dist/types/utils/oauth/huggingface.d.ts +0 -19
  312. package/dist/types/utils/oauth/kagi.d.ts +0 -17
  313. package/dist/types/utils/oauth/kilo.d.ts +0 -5
  314. package/dist/types/utils/oauth/litellm.d.ts +0 -18
  315. package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
  316. package/dist/types/utils/oauth/moonshot.d.ts +0 -1
  317. package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
  318. package/dist/types/utils/oauth/nvidia.d.ts +0 -18
  319. package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
  320. package/dist/types/utils/oauth/ollama.d.ts +0 -18
  321. package/dist/types/utils/oauth/openrouter.d.ts +0 -1
  322. package/dist/types/utils/oauth/parallel.d.ts +0 -17
  323. package/dist/types/utils/oauth/qianfan.d.ts +0 -17
  324. package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
  325. package/dist/types/utils/oauth/synthetic.d.ts +0 -1
  326. package/dist/types/utils/oauth/tavily.d.ts +0 -17
  327. package/dist/types/utils/oauth/together.d.ts +0 -1
  328. package/dist/types/utils/oauth/venice.d.ts +0 -18
  329. package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
  330. package/dist/types/utils/oauth/vllm.d.ts +0 -16
  331. package/dist/types/utils/oauth/zai.d.ts +0 -18
  332. package/dist/types/utils/oauth/zenmux.d.ts +0 -1
  333. package/dist/types/utils/oauth/zhipu.d.ts +0 -18
  334. package/src/model-cache.ts +0 -129
  335. package/src/model-manager.ts +0 -469
  336. package/src/model-thinking.ts +0 -756
  337. package/src/models.json +0 -60287
  338. package/src/models.json.d.ts +0 -9
  339. package/src/models.ts +0 -56
  340. package/src/provider-models/bundled-references.ts +0 -38
  341. package/src/provider-models/descriptors.ts +0 -364
  342. package/src/provider-models/google.ts +0 -88
  343. package/src/provider-models/index.ts +0 -5
  344. package/src/provider-models/ollama.ts +0 -153
  345. package/src/provider-models/openai-compat.ts +0 -2904
  346. package/src/provider-models/special.ts +0 -67
  347. package/src/utils/discovery/antigravity.ts +0 -261
  348. package/src/utils/discovery/codex.ts +0 -371
  349. package/src/utils/discovery/cursor.ts +0 -306
  350. package/src/utils/discovery/gemini.ts +0 -248
  351. package/src/utils/discovery/index.ts +0 -4
  352. package/src/utils/discovery/openai-compatible.ts +0 -224
  353. package/src/utils/oauth/gitlab-duo.ts +0 -123
  354. package/src/utils/oauth/index.ts +0 -502
  355. /package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
  356. /package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
  357. /package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
  358. /package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
  359. /package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
  360. /package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
  361. /package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
  362. /package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
  363. /package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
  364. /package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
  365. /package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
  366. /package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
  367. /package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
  368. /package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
  369. /package/src/{utils → registry}/oauth/callback-server.ts +0 -0
  370. /package/src/{utils → registry}/oauth/cursor.ts +0 -0
  371. /package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
  372. /package/src/{utils → registry}/oauth/kimi.ts +0 -0
  373. /package/src/{utils → registry}/oauth/oauth.html +0 -0
  374. /package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
  375. /package/src/{utils → registry}/oauth/opencode.ts +0 -0
  376. /package/src/{utils → registry}/oauth/perplexity.ts +0 -0
  377. /package/src/{utils → registry}/oauth/pkce.ts +0 -0
@@ -2,8 +2,10 @@
2
2
  * Shared utilities for Google Generative AI and Google Cloud Code Assist providers.
3
3
  */
4
4
 
5
+ import { scheduler } from "node:timers/promises";
6
+ import { calculateCost } from "@prometheus-ai/catalog/models";
5
7
  import { extractHttpStatusFromError, readSseJson } from "@prometheus-ai/utils";
6
- import { calculateCost } from "../models";
8
+ import { ProviderHttpError } from "../errors";
7
9
  import type {
8
10
  Api,
9
11
  AssistantMessage,
@@ -20,7 +22,7 @@ import type {
20
22
  } from "../types";
21
23
  import { normalizeSystemPrompts } from "../utils";
22
24
  import { AssistantMessageEventStream } from "../utils/event-stream";
23
- import { finalizeErrorMessage, type RawHttpRequestDump, withHttpStatus } from "../utils/http-inspector";
25
+ import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
24
26
  import { normalizeSchemaForCCA, normalizeSchemaForGoogle, toolWireSchema } from "../utils/schema";
25
27
  import type {
26
28
  Content,
@@ -45,6 +47,11 @@ export type {
45
47
  } from "./google-types";
46
48
  export { normalizeSchemaForGoogle };
47
49
 
50
+ /** Non-2xx response (or in-stream error chunk) from the Google Generative Language / Vertex API. */
51
+ export class GoogleApiError extends ProviderHttpError {
52
+ override readonly name = "GoogleApiError";
53
+ }
54
+
48
55
  type GoogleApiType = "google-generative-ai" | "google-gemini-cli" | "google-vertex";
49
56
 
50
57
  /**
@@ -160,7 +167,19 @@ export function convertMessages<T extends GoogleApiType>(model: Model<T>, contex
160
167
 
161
168
  const transformedMessages = transformMessages(context.messages, model, normalizeToolCallId);
162
169
 
170
+ // Gemini < 3 image tool results go in a separate user turn, but parallel tool results must
171
+ // stay a single contiguous functionResponse turn ("number of function response parts is not
172
+ // equal to number of function call parts"). Buffer image turns and flush them only after the
173
+ // merged functionResponse turn is complete.
174
+ let pendingToolImageParts: Part[] = [];
175
+ const flushPendingToolImages = () => {
176
+ if (pendingToolImageParts.length === 0) return;
177
+ contents.push({ role: "user", parts: pendingToolImageParts });
178
+ pendingToolImageParts = [];
179
+ };
180
+
163
181
  for (const msg of transformedMessages) {
182
+ if (msg.role !== "toolResult") flushPendingToolImages();
164
183
  if (msg.role === "user" || msg.role === "developer") {
165
184
  if (typeof msg.content === "string") {
166
185
  // Skip empty user messages
@@ -314,15 +333,13 @@ export function convertMessages<T extends GoogleApiType>(model: Model<T>, contex
314
333
  });
315
334
  }
316
335
 
317
- // For Gemini < 3, add images in a separate user message
336
+ // For Gemini < 3, buffer images for a separate user message after the functionResponse turn
318
337
  if (hasImages && !modelSupportsMultimodalFunctionResponse) {
319
- contents.push({
320
- role: "user",
321
- parts: [{ text: "Tool result image:" }, ...imageParts],
322
- });
338
+ pendingToolImageParts.push({ text: "Tool result image:" }, ...imageParts);
323
339
  }
324
340
  }
325
341
  }
342
+ flushPendingToolImages();
326
343
 
327
344
  return contents;
328
345
  }
@@ -355,7 +372,7 @@ export function convertTools(
355
372
  description: tool.description || "",
356
373
  ...(useParameters
357
374
  ? { parameters: normalizeSchemaForCCA(toolWireSchema(tool)) }
358
- : { parametersJsonSchema: toolWireSchema(tool) }),
375
+ : { parametersJsonSchema: normalizeSchemaForGoogle(toolWireSchema(tool)) }),
359
376
  })),
360
377
  },
361
378
  ];
@@ -422,6 +439,47 @@ export function mapStopReasonString(reason: string): StopReason {
422
439
  }
423
440
  }
424
441
 
442
+ /**
443
+ * Bounded retries for the well-known Gemini "empty response" failure: a benign
444
+ * `finishReason: STOP` carrying only an empty/whitespace text part and no tool call.
445
+ * Shared by the public/Vertex `streamGoogleGenAI` path and the Cloud Code Assist
446
+ * (`google-gemini-cli`/`google-antigravity`) provider so both apply the same policy.
447
+ */
448
+ export const MAX_EMPTY_STREAM_RETRIES = 2;
449
+ export const EMPTY_STREAM_BASE_DELAY_MS = 500;
450
+
451
+ /**
452
+ * Whether a completed Google assistant message carries content worth delivering.
453
+ *
454
+ * A tool call or any non-whitespace text counts as meaningful. An empty/whitespace-only
455
+ * text part — or thinking that never produced an answer — is the "empty response" failure:
456
+ * delivered as-is the agent loop has nothing to act on and silently halts, so the request
457
+ * must be retried instead of surfaced.
458
+ */
459
+ export function hasMeaningfulGoogleContent(output: AssistantMessage): boolean {
460
+ for (const block of output.content) {
461
+ if (block.type === "toolCall") return true;
462
+ if (block.type === "text" && block.text.trim().length > 0) return true;
463
+ }
464
+ return false;
465
+ }
466
+
467
+ /** Wipe a streamed message between empty-response retries so the next attempt starts clean. */
468
+ function resetGoogleStreamOutputForRetry(output: AssistantMessage): void {
469
+ output.content = [];
470
+ output.usage = {
471
+ input: 0,
472
+ output: 0,
473
+ cacheRead: 0,
474
+ cacheWrite: 0,
475
+ totalTokens: 0,
476
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
477
+ };
478
+ output.stopReason = "stop";
479
+ output.errorMessage = undefined;
480
+ output.timestamp = Date.now();
481
+ }
482
+
425
483
  /**
426
484
  * Module-local counter for generating unique tool call IDs across Google providers.
427
485
  * Shared so that a single monotonically-increasing sequence is used regardless of which
@@ -527,6 +585,7 @@ export async function consumeGoogleStream<T extends GoogleApiType>(args: {
527
585
  const blockIndex = () => blocks.length - 1;
528
586
  let currentBlock: TextContent | ThinkingContent | null = null;
529
587
  let firstTokenSeen = false;
588
+ let sawFinishReason = false;
530
589
 
531
590
  const flushCurrent = () => {
532
591
  if (!currentBlock) return;
@@ -534,10 +593,23 @@ export async function consumeGoogleStream<T extends GoogleApiType>(args: {
534
593
  };
535
594
 
536
595
  for await (const chunk of googleStream) {
596
+ if (chunk.error) {
597
+ const detail = chunk.error.message || chunk.error.status || "unknown error";
598
+ const message = `Google API stream error: ${detail}`;
599
+ throw typeof chunk.error.code === "number" && chunk.error.code >= 400
600
+ ? new GoogleApiError(message, chunk.error.code)
601
+ : new Error(message);
602
+ }
603
+ if (!chunk.candidates?.length && chunk.promptFeedback?.blockReason) {
604
+ const detail = chunk.promptFeedback.blockReasonMessage;
605
+ throw new Error(
606
+ `Request blocked by Google (${chunk.promptFeedback.blockReason})${detail ? `: ${detail}` : ""}`,
607
+ );
608
+ }
537
609
  const candidate = chunk.candidates?.[0];
538
610
  if (candidate?.content?.parts) {
539
611
  for (const part of candidate.content.parts) {
540
- if (part.text !== undefined) {
612
+ if (part.text !== undefined && part.text !== "") {
541
613
  if (!firstTokenSeen) {
542
614
  firstTokenSeen = true;
543
615
  onFirstToken?.();
@@ -578,6 +650,18 @@ export async function consumeGoogleStream<T extends GoogleApiType>(args: {
578
650
  partial: output,
579
651
  });
580
652
  }
653
+ } else if (part.text === "" && part.thoughtSignature && currentBlock && !part.functionCall) {
654
+ if (currentBlock.type === "thinking") {
655
+ currentBlock.thinkingSignature = retainThoughtSignature(
656
+ currentBlock.thinkingSignature,
657
+ part.thoughtSignature,
658
+ );
659
+ } else if (retainTextSignature) {
660
+ currentBlock.textSignature = retainThoughtSignature(
661
+ currentBlock.textSignature,
662
+ part.thoughtSignature,
663
+ );
664
+ }
581
665
  }
582
666
 
583
667
  if (part.functionCall) {
@@ -606,9 +690,17 @@ export async function consumeGoogleStream<T extends GoogleApiType>(args: {
606
690
  }
607
691
 
608
692
  if (candidate?.finishReason) {
609
- output.stopReason = mapStopReason(candidate.finishReason);
610
- if (output.content.some(b => b.type === "toolCall")) {
693
+ sawFinishReason = true;
694
+ const mapped = mapStopReason(candidate.finishReason);
695
+ // Only let a trailing tool call upgrade benign finishes; SAFETY/MALFORMED_FUNCTION_CALL
696
+ // and friends must surface as errors even when earlier chunks carried valid tool calls.
697
+ if ((mapped === "stop" || mapped === "length") && output.content.some(b => b.type === "toolCall")) {
611
698
  output.stopReason = "toolUse";
699
+ } else {
700
+ output.stopReason = mapped;
701
+ if (mapped === "error") {
702
+ output.errorMessage = `Generation failed with finish reason: ${candidate.finishReason}`;
703
+ }
612
704
  }
613
705
  }
614
706
 
@@ -645,6 +737,10 @@ export async function consumeGoogleStream<T extends GoogleApiType>(args: {
645
737
  throw new Error("Request was aborted");
646
738
  }
647
739
 
740
+ if (!sawFinishReason) {
741
+ throw new Error("Google API stream ended without a finish reason (connection dropped or response truncated)");
742
+ }
743
+
648
744
  if (output.stopReason === "aborted" || output.stopReason === "error") {
649
745
  throw new Error(output.errorMessage ?? "An unknown error occurred");
650
746
  }
@@ -804,41 +900,65 @@ export function streamGoogleGenAI<T extends "google-generative-ai" | "google-ver
804
900
  headers: plan.headers,
805
901
  };
806
902
 
807
- const wireBody = paramsToWireBody(params);
903
+ const bodyJson = JSON.stringify(paramsToWireBody(params));
808
904
  const fetchImpl = plan.fetch ?? options?.fetch ?? (globalThis.fetch.bind(globalThis) as FetchImpl);
809
- const response = await fetchImpl(plan.url, {
810
- method: "POST",
811
- headers: { ...plan.headers, "Content-Type": "application/json", Accept: "text/event-stream" },
812
- body: JSON.stringify(wireBody),
813
- signal: options?.signal,
814
- });
815
- if (!response.ok) {
816
- const errorText = await response.text().catch(() => "");
817
- throw withHttpStatus(
818
- new Error(`Google API error (${response.status}): ${extractGoogleErrorMessage(errorText)}`),
819
- response.status,
820
- );
821
- }
822
- if (!response.body) {
823
- throw new Error("Google API returned an empty response body");
824
- }
825
-
826
- const googleStream = readSseJson<GenerateContentResponse>(response.body, options?.signal, event =>
827
- options?.onSseEvent?.({ event: event.event, data: event.data, raw: [...event.raw] }, model),
828
- );
905
+ const openStream = async (): Promise<ReadableStream<Uint8Array>> => {
906
+ const response = await fetchImpl(plan.url, {
907
+ method: "POST",
908
+ headers: { ...plan.headers, "Content-Type": "application/json", Accept: "text/event-stream" },
909
+ body: bodyJson,
910
+ signal: options?.signal,
911
+ });
912
+ if (!response.ok) {
913
+ const errorText = await response.text().catch(() => "");
914
+ throw new GoogleApiError(
915
+ `Google API error (${response.status}): ${extractGoogleErrorMessage(errorText)}`,
916
+ response.status,
917
+ { headers: response.headers },
918
+ );
919
+ }
920
+ if (!response.body) {
921
+ throw new Error("Google API returned an empty response body");
922
+ }
923
+ return response.body as ReadableStream<Uint8Array>;
924
+ };
829
925
 
926
+ let body = await openStream();
830
927
  stream.push({ type: "start", partial: output });
831
- await consumeGoogleStream({
832
- googleStream,
833
- output,
834
- stream,
835
- model,
836
- options,
837
- retainTextSignature,
838
- onFirstToken: () => {
839
- firstTokenTime = Date.now();
840
- },
841
- });
928
+
929
+ // Gemini occasionally finishes with `finishReason: STOP` while emitting only an empty
930
+ // text part and no tool call. Delivered as-is the agent receives a blank message and
931
+ // silently halts mid-task, so retry a bounded number of times before giving up.
932
+ for (let emptyAttempt = 0; ; emptyAttempt++) {
933
+ const googleStream = readSseJson<GenerateContentResponse>(body, options?.signal, event =>
934
+ options?.onSseEvent?.({ event: event.event, data: event.data, raw: [...event.raw] }, model),
935
+ );
936
+ await consumeGoogleStream({
937
+ googleStream,
938
+ output,
939
+ stream,
940
+ model,
941
+ options,
942
+ retainTextSignature,
943
+ onFirstToken: () => {
944
+ firstTokenTime = Date.now();
945
+ },
946
+ });
947
+
948
+ if (output.stopReason !== "stop" || hasMeaningfulGoogleContent(output)) break;
949
+ if (emptyAttempt >= MAX_EMPTY_STREAM_RETRIES) {
950
+ throw new Error(
951
+ `Google API returned an empty response (finishReason STOP with no content) after ${MAX_EMPTY_STREAM_RETRIES + 1} attempts`,
952
+ );
953
+ }
954
+ try {
955
+ await scheduler.wait(EMPTY_STREAM_BASE_DELAY_MS * 2 ** emptyAttempt, { signal: options?.signal });
956
+ } catch {
957
+ throw new Error("Request was aborted");
958
+ }
959
+ resetGoogleStreamOutputForRetry(output);
960
+ body = await openStream();
961
+ }
842
962
 
843
963
  output.duration = Date.now() - startTime;
844
964
  if (firstTokenTime) output.ttft = firstTokenTime - startTime;
@@ -157,11 +157,20 @@ export interface UsageMetadata {
157
157
  cachedContentTokenCount?: number;
158
158
  }
159
159
 
160
+ /** Prompt-level safety feedback; `blockReason` is set (with no candidates) when the prompt is blocked. */
161
+ export interface PromptFeedback {
162
+ blockReason?: string;
163
+ blockReasonMessage?: string;
164
+ [key: string]: unknown;
165
+ }
166
+
160
167
  /** Single SSE chunk's parsed JSON body. */
161
168
  export interface GenerateContentResponse {
162
169
  candidates?: Candidate[];
163
170
  usageMetadata?: UsageMetadata;
164
171
  modelVersion?: string;
165
172
  responseId?: string;
166
- promptFeedback?: Record<string, unknown>;
173
+ promptFeedback?: PromptFeedback;
174
+ /** In-band stream failure (quota, internal error) delivered as a final JSON event. */
175
+ error?: { code?: number; message?: string; status?: string };
167
176
  }
@@ -9,9 +9,9 @@
9
9
  * Note: Kimi calculates TPM rate limits based on max_tokens, not actual output.
10
10
  */
11
11
 
12
+ import { getKimiCommonHeaders } from "../registry/oauth/kimi";
12
13
  import type { Api, Context, Model } from "../types";
13
14
  import type { AssistantMessageEventStream } from "../utils/event-stream";
14
- import { getKimiCommonHeaders } from "../utils/oauth/kimi";
15
15
  import {
16
16
  type OpenAIAnthropicApiFormat,
17
17
  type OpenAIAnthropicShimOptions,
@@ -2,7 +2,7 @@
2
2
  * Mock provider for tests.
3
3
  *
4
4
  * Implements `Model<"mock">` + `streamMock` so test code can drive
5
- * pi-agent-core / streamSimple-shaped consumers without an HTTP client.
5
+ * agent-core / streamSimple-shaped consumers without an HTTP client.
6
6
  *
7
7
  * Usage:
8
8
  *
@@ -49,6 +49,7 @@ import type {
49
49
  Context,
50
50
  Model,
51
51
  SimpleStreamOptions,
52
+ StopDetails,
52
53
  StopReason,
53
54
  TextContent,
54
55
  ThinkingContent,
@@ -81,6 +82,10 @@ export interface MockResponse {
81
82
  content?: ReadonlyArray<MockContent>;
82
83
  /** Stop reason. Defaults to `"toolUse"` when content has tool calls, else `"stop"`. */
83
84
  stopReason?: StopReason;
85
+ /** Structured terminal stop classification, e.g. Anthropic refusal metadata. */
86
+ stopDetails?: StopDetails | null;
87
+ /** Error text paired with an explicit `"error"` stop reason. */
88
+ errorMessage?: string;
84
89
  /** Usage stats. Missing fields default to 0; missing `cost.total` is recomputed from components. */
85
90
  usage?: Partial<Omit<Usage, "cost">> & { cost?: Partial<Usage["cost"]> };
86
91
  /** Pre-set responseId. */
@@ -168,6 +173,7 @@ export class MockModel implements Model<MockApi> {
168
173
  readonly cost: Model["cost"];
169
174
  readonly contextWindow: number;
170
175
  readonly maxTokens: number;
176
+ readonly compat = undefined;
171
177
 
172
178
  /** Recorded calls in invocation order. */
173
179
  readonly calls: MockCall[] = [];
@@ -226,7 +232,7 @@ export function createMockModel(options: MockModelOptions = {}): MockModel {
226
232
  return new MockModel(options);
227
233
  }
228
234
 
229
- /** Stream function for `Model<"mock">`. Matches the @prometheus-ai/ai per-provider stream signature. */
235
+ /** Stream function for `Model<"mock">`. Matches the Prometheus AI per-provider stream signature. */
230
236
  export function streamMock(
231
237
  model: Model<Api>,
232
238
  context: Context,
@@ -250,7 +256,7 @@ export function streamMock(
250
256
  }
251
257
 
252
258
  /** Convenience: register the mock provider with the global custom API registry. */
253
- export function registerMockApi(sourceId = "prometheus-ai/mock"): void {
259
+ export function registerMockApi(sourceId = "@prometheus-ai/ai/mock"): void {
254
260
  registerCustomApi(MOCK_API, streamMock, sourceId);
255
261
  }
256
262
 
@@ -388,6 +394,8 @@ async function runMock(
388
394
  const reason: StopReason = response.stopReason ?? (hasToolCall ? ("toolUse" as StopReason) : ("stop" as StopReason));
389
395
 
390
396
  partial.stopReason = reason;
397
+ partial.stopDetails = response.stopDetails;
398
+ partial.errorMessage = response.errorMessage;
391
399
  partial.usage = mergeUsage(response.usage);
392
400
  partial.duration = Date.now() - startedAt;
393
401
 
@@ -1,4 +1,5 @@
1
1
  import { extractHttpStatusFromError, fetchWithRetry } from "@prometheus-ai/utils";
2
+ import { ProviderHttpError } from "../errors";
2
3
  import { getEnvApiKey } from "../stream";
3
4
  import type {
4
5
  Api,
@@ -16,7 +17,8 @@ import type {
16
17
  } from "../types";
17
18
  import { normalizeSystemPrompts } from "../utils";
18
19
  import { AssistantMessageEventStream } from "../utils/event-stream";
19
- import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
20
+ import { type CapturedHttpErrorResponse, finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
21
+ import { getOpenAIStreamFirstEventTimeoutMs, getOpenAIStreamIdleTimeoutMs } from "../utils/idle-iterator";
20
22
  import { parseStreamingJson } from "../utils/json-parse";
21
23
  import { toolWireSchema } from "../utils/schema/wire";
22
24
  import {
@@ -27,8 +29,14 @@ import {
27
29
  } from "../utils/stream-markup-healing";
28
30
  import { transformMessages } from "./transform-messages";
29
31
 
32
+ /** Non-2xx response from the Ollama `/api/chat` endpoint. */
33
+ export class OllamaApiError extends ProviderHttpError {
34
+ override readonly name = "OllamaApiError";
35
+ }
36
+
30
37
  export interface OllamaChatOptions extends StreamOptions {
31
38
  reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
39
+ disableReasoning?: boolean;
32
40
  toolChoice?: ToolChoice;
33
41
  }
34
42
 
@@ -91,7 +99,14 @@ function normalizeBaseUrl(baseUrl?: string): string {
91
99
  return trimmed.endsWith("/api") ? trimmed.slice(0, -4) : trimmed;
92
100
  }
93
101
 
94
- function mapReasoning(reasoning: OllamaChatOptions["reasoning"]): boolean | "low" | "medium" | "high" | undefined {
102
+ function mapReasoning(
103
+ reasoning: OllamaChatOptions["reasoning"],
104
+ disableReasoning: boolean | undefined,
105
+ modelReasoning: boolean,
106
+ ): boolean | "low" | "medium" | "high" | undefined {
107
+ if (disableReasoning && modelReasoning) {
108
+ return false;
109
+ }
95
110
  switch (reasoning) {
96
111
  case "minimal":
97
112
  case "low":
@@ -258,7 +273,7 @@ function convertTools(tools: Tool[] | undefined): OllamaFunctionTool[] | undefin
258
273
  }
259
274
 
260
275
  function createChatBody(model: Model<"ollama-chat">, context: Context, options: OllamaChatOptions | undefined) {
261
- const think = mapReasoning(options?.reasoning);
276
+ const think = mapReasoning(options?.reasoning, options?.disableReasoning, model.reasoning);
262
277
  const toolChoice = mapToolChoice(options?.toolChoice);
263
278
  const selectedTools = selectToolsForToolChoice(context.tools, options?.toolChoice);
264
279
  const tools = convertTools(selectedTools);
@@ -268,11 +283,32 @@ function createChatBody(model: Model<"ollama-chat">, context: Context, options:
268
283
  ...(tools ? { tools } : {}),
269
284
  ...(think !== undefined ? { think } : {}),
270
285
  ...(toolChoice !== undefined ? { tool_choice: toolChoice } : {}),
271
- ...(options?.maxTokens !== undefined ? { options: { num_predict: options.maxTokens } } : {}),
286
+ ...(options?.maxTokens !== undefined && !model.omitMaxOutputTokens
287
+ ? { options: { num_predict: options.maxTokens } }
288
+ : {}),
272
289
  stream: true,
273
290
  };
274
291
  }
275
292
 
293
+ async function captureHttpErrorResponse(response: Response): Promise<CapturedHttpErrorResponse> {
294
+ let bodyText: string | undefined;
295
+ let bodyJson: unknown;
296
+ try {
297
+ bodyText = await response.text();
298
+ if (bodyText.trim()) {
299
+ try {
300
+ bodyJson = JSON.parse(bodyText) as unknown;
301
+ } catch {}
302
+ }
303
+ } catch {}
304
+ return {
305
+ status: response.status,
306
+ headers: response.headers,
307
+ bodyText,
308
+ bodyJson,
309
+ };
310
+ }
311
+
276
312
  async function* iterateNdjson(stream: ReadableStream<Uint8Array>): AsyncGenerator<OllamaChatChunk> {
277
313
  const reader = stream.getReader();
278
314
  const decoder = new TextDecoder();
@@ -376,6 +412,7 @@ export const streamOllama: StreamFunction<"ollama-chat"> = (
376
412
  let firstTokenTime: number | undefined;
377
413
  const output = createEmptyOutput(model);
378
414
  let rawRequestDump: RawHttpRequestDump | undefined;
415
+ let capturedErrorResponse: CapturedHttpErrorResponse | undefined;
379
416
  let activeThinkingIndex: number | undefined;
380
417
  let activeTextIndex: number | undefined;
381
418
  const activeToolIndices = new Set<number>();
@@ -489,6 +526,22 @@ export const streamOllama: StreamFunction<"ollama-chat"> = (
489
526
  url: `${baseUrl}/api/chat`,
490
527
  body,
491
528
  };
529
+ // Direct callers that bypass `register-builtins` (which installs
530
+ // the iterator-level watchdog) need a pre-response timer alongside
531
+ // `timeout: false`; otherwise an Ollama server that accepts the
532
+ // POST and never streams headers would hang forever (issue #2422).
533
+ const idleTimeoutMs = options.streamIdleTimeoutMs ?? getOpenAIStreamIdleTimeoutMs();
534
+ const firstEventTimeoutMs =
535
+ options.streamFirstEventTimeoutMs ?? getOpenAIStreamFirstEventTimeoutMs(idleTimeoutMs);
536
+ const preResponseWatchdog =
537
+ firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0
538
+ ? AbortSignal.timeout(firstEventTimeoutMs)
539
+ : undefined;
540
+ const fetchSignal = preResponseWatchdog
541
+ ? options.signal
542
+ ? AbortSignal.any([options.signal, preResponseWatchdog])
543
+ : preResponseWatchdog
544
+ : options.signal;
492
545
  const response = await fetchWithRetry(`${baseUrl}/api/chat`, {
493
546
  method: "POST",
494
547
  headers: {
@@ -498,12 +551,16 @@ export const streamOllama: StreamFunction<"ollama-chat"> = (
498
551
  "Content-Type": "application/json",
499
552
  },
500
553
  body: JSON.stringify(body),
501
- signal: options.signal,
554
+ signal: fetchSignal,
502
555
  defaultDelayMs: OLLAMA_RETRY_DELAYS_MS,
503
556
  fetch: options.fetch,
557
+ timeout: false,
504
558
  });
505
559
  if (!response.ok) {
506
- throw new Error(`HTTP ${response.status} from ${baseUrl}/api/chat`);
560
+ capturedErrorResponse = await captureHttpErrorResponse(response);
561
+ throw new OllamaApiError(`HTTP ${response.status} from ${baseUrl}/api/chat`, response.status, {
562
+ headers: response.headers,
563
+ });
507
564
  }
508
565
  if (!response.body) {
509
566
  throw new Error("Ollama returned an empty response body");
@@ -631,7 +688,7 @@ export const streamOllama: StreamFunction<"ollama-chat"> = (
631
688
  }
632
689
  output.stopReason = options.signal?.aborted ? "aborted" : "error";
633
690
  output.errorStatus = extractHttpStatusFromError(error);
634
- output.errorMessage = await finalizeErrorMessage(error, rawRequestDump);
691
+ output.errorMessage = await finalizeErrorMessage(error, rawRequestDump, capturedErrorResponse);
635
692
  output.duration = Date.now() - startTime;
636
693
  if (firstTokenTime) {
637
694
  output.ttft = firstTokenTime - startTime;
@@ -8,8 +8,9 @@
8
8
  * here once.
9
9
  */
10
10
 
11
+ import { buildModel } from "@prometheus-ai/catalog/build";
11
12
  import { ANTHROPIC_THINKING } from "../stream";
12
- import type { Context, Model, SimpleStreamOptions } from "../types";
13
+ import type { Context, Model, ModelSpec, SimpleStreamOptions } from "../types";
13
14
  import { AssistantMessageEventStream } from "../utils/event-stream";
14
15
  import { createProviderErrorMessage } from "./error-message";
15
16
  import { streamAnthropic, streamOpenAICompletions } from "./register-builtins";
@@ -44,6 +45,9 @@ export function streamOpenAIAnthropicShim(
44
45
  ): AssistantMessageEventStream {
45
46
  const stream = new AssistantMessageEventStream();
46
47
  const format = options?.format ?? config.defaultFormat;
48
+ // The resolver form of `apiKey` is resolved upstream in `streamSimple`;
49
+ // this shim only ever receives a static bearer string.
50
+ const apiKey = typeof options?.apiKey === "string" ? options.apiKey : undefined;
47
51
 
48
52
  (async () => {
49
53
  try {
@@ -53,7 +57,7 @@ export function streamOpenAIAnthropicShim(
53
57
  };
54
58
 
55
59
  if (format === "anthropic") {
56
- const anthropicModel: Model<"anthropic-messages"> = {
60
+ const anthropicModel = buildModel({
57
61
  id: model.id,
58
62
  name: model.name,
59
63
  api: "anthropic-messages",
@@ -65,7 +69,7 @@ export function streamOpenAIAnthropicShim(
65
69
  reasoning: model.reasoning,
66
70
  input: model.input,
67
71
  cost: model.cost,
68
- };
72
+ } as ModelSpec<"anthropic-messages">);
69
73
 
70
74
  const reasoningEffort = options?.reasoning;
71
75
  const thinkingEnabled = !!reasoningEffort && model.reasoning;
@@ -74,14 +78,14 @@ export function streamOpenAIAnthropicShim(
74
78
  : undefined;
75
79
 
76
80
  const innerStream = streamAnthropic(anthropicModel, context, {
77
- apiKey: options?.apiKey,
81
+ apiKey,
78
82
  temperature: options?.temperature,
79
83
  topP: options?.topP,
80
84
  topK: options?.topK,
81
85
  minP: options?.minP,
82
86
  presencePenalty: options?.presencePenalty,
83
87
  repetitionPenalty: options?.repetitionPenalty,
84
- maxTokens: options?.maxTokens ?? model.maxTokens,
88
+ maxTokens: options?.maxTokens ?? model.maxTokens ?? undefined,
85
89
  signal: options?.signal,
86
90
  headers: mergedHeaders,
87
91
  sessionId: options?.sessionId,
@@ -98,19 +102,24 @@ export function streamOpenAIAnthropicShim(
98
102
  }
99
103
  } else {
100
104
  const openaiModel: Model<"openai-completions"> = config.openaiBaseUrl
101
- ? { ...model, baseUrl: config.openaiBaseUrl, headers: mergedHeaders }
105
+ ? buildModel({
106
+ ...model,
107
+ baseUrl: config.openaiBaseUrl,
108
+ headers: mergedHeaders,
109
+ compat: model.compatConfig,
110
+ } as ModelSpec<"openai-completions">)
102
111
  : model;
103
112
 
104
113
  const reasoningEffort = options?.reasoning;
105
114
  const innerStream = streamOpenAICompletions(openaiModel, context, {
106
- apiKey: options?.apiKey,
115
+ apiKey,
107
116
  temperature: options?.temperature,
108
117
  topP: options?.topP,
109
118
  topK: options?.topK,
110
119
  minP: options?.minP,
111
120
  presencePenalty: options?.presencePenalty,
112
121
  repetitionPenalty: options?.repetitionPenalty,
113
- maxTokens: options?.maxTokens ?? model.maxTokens,
122
+ maxTokens: options?.maxTokens ?? model.maxTokens ?? undefined,
114
123
  signal: options?.signal,
115
124
  headers: mergedHeaders,
116
125
  sessionId: options?.sessionId,
@@ -7,6 +7,8 @@
7
7
  * non-strict defaults (e.g. `stream_options.include_obfuscation`) — does not
8
8
  * trip 400s on shapes we simply ignore.
9
9
  */
10
+
11
+ import { z } from "zod/v4";
10
12
  import type {
11
13
  ChatCompletionContentPart,
12
14
  ChatCompletionCreateParams,
@@ -14,8 +16,7 @@ import type {
14
16
  ChatCompletionMessageToolCall,
15
17
  ChatCompletionTool,
16
18
  ChatCompletionToolChoiceOption,
17
- } from "openai/resources/chat/completions";
18
- import * as z from "zod/v4";
19
+ } from "./openai-chat-wire";
19
20
 
20
21
  // ─── User-message content parts ─────────────────────────────────────────────
21
22
 
@@ -27,7 +28,7 @@ export const textPartSchema = z.object({
27
28
  /**
28
29
  * OpenAI documents `image_url` as either `{ url: string, detail?: ... }` or —
29
30
  * older clients — a bare string. Accept both shapes; downstream we extract a
30
- * URL. `detail` is accepted for forward-compat but currently dropped (@prometheus-ai/ai's
31
+ * URL. `detail` is accepted for forward-compat but currently dropped (Prometheus AI's
31
32
  * `ImageContent` has no detail field — TODO: plumb through if/when added).
32
33
  */
33
34
  export const imagePartSchema = z.object({
@@ -145,6 +146,11 @@ export const assistantMessageSchema = z.object({
145
146
  role: z.literal("assistant"),
146
147
  content: baseContent.optional(),
147
148
  tool_calls: z.array(toolCallSchema).optional(),
149
+ // DeepSeek-style reasoning channel. The gateway emits it on the way out
150
+ // (encodeResponse/encodeStream); accept it back so thinking-mode
151
+ // continuations replay the model's actual reasoning instead of a
152
+ // synthesized placeholder.
153
+ reasoning_content: z.string().nullish(),
148
154
  });
149
155
 
150
156
  export const toolMessageSchema = z.object({