@prometheus-ai/ai 0.5.4 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/dist/types/auth-broker/remote-store.d.ts +2 -1
  2. package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
  3. package/dist/types/auth-gateway/server.d.ts +19 -0
  4. package/dist/types/auth-gateway/types.d.ts +9 -3
  5. package/dist/types/auth-retry.d.ts +119 -0
  6. package/dist/types/auth-storage.d.ts +217 -8
  7. package/dist/types/errors.d.ts +24 -0
  8. package/dist/types/index.d.ts +5 -9
  9. package/dist/types/provider-details.d.ts +1 -1
  10. package/dist/types/providers/amazon-bedrock.d.ts +12 -6
  11. package/dist/types/providers/anthropic-client.d.ts +10 -3
  12. package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
  13. package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
  14. package/dist/types/providers/anthropic-wire.d.ts +3 -3
  15. package/dist/types/providers/anthropic.d.ts +41 -34
  16. package/dist/types/providers/aws-credentials.d.ts +8 -0
  17. package/dist/types/providers/azure-openai-responses.d.ts +1 -0
  18. package/dist/types/providers/google-gemini-cli.d.ts +22 -1
  19. package/dist/types/providers/google-shared.d.ts +22 -0
  20. package/dist/types/providers/google-types.d.ts +13 -1
  21. package/dist/types/providers/mock.d.ts +8 -3
  22. package/dist/types/providers/ollama.d.ts +6 -0
  23. package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
  24. package/dist/types/providers/openai-chat-server.d.ts +3 -3
  25. package/dist/types/providers/openai-chat-wire.d.ts +644 -0
  26. package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
  27. package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
  28. package/dist/types/providers/openai-codex-responses.d.ts +31 -2
  29. package/dist/types/providers/openai-completions-compat.d.ts +2 -25
  30. package/dist/types/providers/openai-completions.d.ts +2 -10
  31. package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
  32. package/dist/types/providers/openai-responses-server.d.ts +2 -2
  33. package/dist/types/providers/openai-responses-shared.d.ts +49 -9
  34. package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
  35. package/dist/types/providers/openai-responses.d.ts +13 -4
  36. package/dist/types/providers/prometheus-native-client.d.ts +9 -0
  37. package/dist/types/providers/prometheus-native-server.d.ts +4 -3
  38. package/dist/types/providers/transform-messages.d.ts +1 -2
  39. package/dist/types/rate-limit-utils.d.ts +3 -2
  40. package/dist/types/registry/aimlapi.d.ts +4 -0
  41. package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
  42. package/dist/types/registry/amazon-bedrock.d.ts +5 -0
  43. package/dist/types/registry/anthropic.d.ts +10 -0
  44. package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
  45. package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
  46. package/dist/types/registry/cerebras.d.ts +7 -0
  47. package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
  48. package/dist/types/registry/cursor.d.ts +7 -0
  49. package/dist/types/registry/deepseek.d.ts +8 -0
  50. package/dist/types/registry/derived.d.ts +5 -0
  51. package/dist/types/registry/firepass.d.ts +16 -0
  52. package/dist/types/registry/fireworks.d.ts +7 -0
  53. package/dist/types/registry/github-copilot.d.ts +7 -0
  54. package/dist/types/registry/gitlab-duo.d.ts +9 -0
  55. package/dist/types/registry/google-antigravity.d.ts +9 -0
  56. package/dist/types/registry/google-gemini-cli.d.ts +9 -0
  57. package/dist/types/registry/google-vertex.d.ts +5 -0
  58. package/dist/types/registry/google.d.ts +4 -0
  59. package/dist/types/registry/groq.d.ts +4 -0
  60. package/dist/types/registry/huggingface.d.ts +7 -0
  61. package/dist/types/registry/index.d.ts +4 -0
  62. package/dist/types/registry/kagi.d.ts +14 -0
  63. package/dist/types/registry/kilo.d.ts +7 -0
  64. package/dist/types/registry/kimi-code.d.ts +7 -0
  65. package/dist/types/registry/litellm.d.ts +13 -0
  66. package/dist/types/registry/lm-studio.d.ts +8 -0
  67. package/dist/types/registry/minimax-code-cn.d.ts +6 -0
  68. package/dist/types/registry/minimax-code.d.ts +6 -0
  69. package/dist/types/registry/minimax.d.ts +4 -0
  70. package/dist/types/registry/mistral.d.ts +4 -0
  71. package/dist/types/registry/moonshot.d.ts +7 -0
  72. package/dist/types/registry/nanogpt.d.ts +7 -0
  73. package/dist/types/registry/nvidia.d.ts +7 -0
  74. package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
  75. package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
  76. package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
  77. package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
  78. package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
  79. package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
  80. package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
  81. package/dist/types/registry/ollama-cloud.d.ts +7 -0
  82. package/dist/types/registry/ollama.d.ts +12 -0
  83. package/dist/types/registry/openai-codex-device.d.ts +8 -0
  84. package/dist/types/registry/openai-codex.d.ts +9 -0
  85. package/dist/types/registry/openai.d.ts +4 -0
  86. package/dist/types/registry/opencode-go.d.ts +6 -0
  87. package/dist/types/registry/opencode-zen.d.ts +6 -0
  88. package/dist/types/registry/openrouter.d.ts +13 -0
  89. package/dist/types/registry/parallel.d.ts +14 -0
  90. package/dist/types/registry/perplexity.d.ts +7 -0
  91. package/dist/types/registry/qianfan.d.ts +7 -0
  92. package/dist/types/registry/qwen-portal.d.ts +7 -0
  93. package/dist/types/registry/registry.d.ts +272 -0
  94. package/dist/types/registry/synthetic.d.ts +6 -0
  95. package/dist/types/registry/tavily.d.ts +14 -0
  96. package/dist/types/registry/together.d.ts +6 -0
  97. package/dist/types/registry/types.d.ts +51 -0
  98. package/dist/types/registry/venice.d.ts +13 -0
  99. package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
  100. package/dist/types/registry/vllm.d.ts +7 -0
  101. package/dist/types/registry/wafer-pass.d.ts +6 -0
  102. package/dist/types/registry/wafer-serverless.d.ts +6 -0
  103. package/dist/types/registry/xai-oauth.d.ts +7 -0
  104. package/dist/types/registry/xai.d.ts +4 -0
  105. package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
  106. package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
  107. package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
  108. package/dist/types/registry/xiaomi.d.ts +6 -0
  109. package/dist/types/registry/zai.d.ts +7 -0
  110. package/dist/types/registry/zenmux.d.ts +7 -0
  111. package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
  112. package/dist/types/stream.d.ts +9 -1
  113. package/dist/types/types.d.ts +56 -295
  114. package/dist/types/usage/google-antigravity.d.ts +15 -1
  115. package/dist/types/usage/openai-codex-reset.d.ts +79 -0
  116. package/dist/types/usage/openai-codex.d.ts +1 -0
  117. package/dist/types/usage.d.ts +77 -4
  118. package/dist/types/utils/abort.d.ts +6 -0
  119. package/dist/types/utils/event-stream.d.ts +2 -0
  120. package/dist/types/utils/http-inspector.d.ts +0 -1
  121. package/dist/types/utils/idle-iterator.d.ts +35 -0
  122. package/dist/types/utils/openai-http.d.ts +58 -0
  123. package/dist/types/utils/request-debug.d.ts +3 -0
  124. package/dist/types/utils/retry-after.d.ts +1 -0
  125. package/dist/types/utils/schema/fields.d.ts +5 -0
  126. package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
  127. package/dist/types/utils/schema/stamps.d.ts +7 -15
  128. package/dist/types/utils/sse-debug.d.ts +0 -5
  129. package/dist/types/utils/stream-markup-healing.d.ts +2 -0
  130. package/dist/types/utils.d.ts +1 -5
  131. package/package.json +17 -29
  132. package/src/auth-broker/remote-store.ts +10 -1
  133. package/src/auth-broker/snapshot-cache.ts +1 -1
  134. package/src/auth-broker/wire-schemas.ts +1 -1
  135. package/src/auth-gateway/http.ts +1 -1
  136. package/src/auth-gateway/server.ts +95 -30
  137. package/src/auth-gateway/types.ts +10 -2
  138. package/src/auth-retry.ts +238 -0
  139. package/src/auth-storage.ts +935 -430
  140. package/src/errors.ts +32 -0
  141. package/src/index.ts +9 -14
  142. package/src/provider-details.ts +1 -1
  143. package/src/providers/__tests__/google-auth.test.ts +144 -0
  144. package/src/providers/amazon-bedrock.ts +70 -40
  145. package/src/providers/anthropic-client.ts +15 -13
  146. package/src/providers/anthropic-messages-server-schema.ts +17 -7
  147. package/src/providers/anthropic-messages-server.ts +88 -20
  148. package/src/providers/anthropic-wire.ts +4 -3
  149. package/src/providers/anthropic.ts +1234 -621
  150. package/src/providers/aws-credentials.ts +47 -5
  151. package/src/providers/aws-eventstream.ts +5 -0
  152. package/src/providers/azure-openai-responses.ts +117 -67
  153. package/src/providers/cursor.ts +30 -30
  154. package/src/providers/github-copilot-headers.ts +1 -1
  155. package/src/providers/gitlab-duo.ts +36 -29
  156. package/src/providers/google-auth.ts +71 -8
  157. package/src/providers/google-gemini-cli.ts +118 -22
  158. package/src/providers/google-shared.ts +163 -43
  159. package/src/providers/google-types.ts +10 -1
  160. package/src/providers/kimi.ts +1 -1
  161. package/src/providers/mock.ts +11 -3
  162. package/src/providers/ollama.ts +64 -7
  163. package/src/providers/openai-anthropic-shim.ts +17 -8
  164. package/src/providers/openai-chat-server-schema.ts +9 -3
  165. package/src/providers/openai-chat-server.ts +82 -16
  166. package/src/providers/openai-chat-wire.ts +847 -0
  167. package/src/providers/openai-codex/request-transformer.ts +129 -34
  168. package/src/providers/openai-codex/response-handler.ts +22 -1
  169. package/src/providers/openai-codex-responses.ts +699 -247
  170. package/src/providers/openai-completions-compat.ts +8 -308
  171. package/src/providers/openai-completions.ts +416 -267
  172. package/src/providers/openai-responses-server-schema.ts +15 -9
  173. package/src/providers/openai-responses-server.ts +162 -114
  174. package/src/providers/openai-responses-shared.ts +320 -82
  175. package/src/providers/openai-responses-wire.ts +6391 -0
  176. package/src/providers/openai-responses.ts +382 -176
  177. package/src/providers/prometheus-native-client.ts +27 -11
  178. package/src/providers/prometheus-native-server.ts +44 -17
  179. package/src/providers/transform-messages.ts +311 -120
  180. package/src/providers/vision-guard.ts +5 -3
  181. package/src/rate-limit-utils.ts +13 -3
  182. package/src/registry/aimlapi.ts +6 -0
  183. package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
  184. package/src/registry/amazon-bedrock.ts +22 -0
  185. package/src/registry/anthropic.ts +26 -0
  186. package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
  187. package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
  188. package/src/{utils/oauth → registry}/cerebras.ts +8 -1
  189. package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
  190. package/src/registry/cursor.ts +20 -0
  191. package/src/{utils/oauth → registry}/deepseek.ts +9 -17
  192. package/src/registry/derived.ts +9 -0
  193. package/src/{utils/oauth → registry}/firepass.ts +10 -2
  194. package/src/{utils/oauth → registry}/fireworks.ts +8 -1
  195. package/src/registry/github-copilot.ts +22 -0
  196. package/src/registry/gitlab-duo.ts +19 -0
  197. package/src/registry/google-antigravity.ts +21 -0
  198. package/src/registry/google-gemini-cli.ts +21 -0
  199. package/src/registry/google-vertex.ts +38 -0
  200. package/src/registry/google.ts +6 -0
  201. package/src/registry/groq.ts +6 -0
  202. package/src/{utils/oauth → registry}/huggingface.ts +8 -19
  203. package/src/registry/index.ts +4 -0
  204. package/src/{utils/oauth → registry}/kagi.ts +9 -11
  205. package/src/{utils/oauth → registry}/kilo.ts +11 -6
  206. package/src/registry/kimi-code.ts +17 -0
  207. package/src/{utils/oauth → registry}/litellm.ts +8 -12
  208. package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
  209. package/src/registry/minimax-code-cn.ts +12 -0
  210. package/src/registry/minimax-code.ts +12 -0
  211. package/src/registry/minimax.ts +6 -0
  212. package/src/registry/mistral.ts +6 -0
  213. package/src/{utils/oauth → registry}/moonshot.ts +8 -9
  214. package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
  215. package/src/{utils/oauth → registry}/nvidia.ts +8 -18
  216. package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
  217. package/src/{utils → registry}/oauth/anthropic.ts +38 -17
  218. package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
  219. package/src/registry/oauth/gitlab-duo.ts +198 -0
  220. package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
  221. package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
  222. package/src/registry/oauth/index.ts +164 -0
  223. package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
  224. package/src/{utils → registry}/oauth/types.ts +7 -51
  225. package/src/{utils → registry}/oauth/wafer.ts +1 -1
  226. package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
  227. package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
  228. package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
  229. package/src/{utils/oauth → registry}/ollama.ts +8 -13
  230. package/src/registry/openai-codex-device.ts +18 -0
  231. package/src/registry/openai-codex.ts +19 -0
  232. package/src/registry/openai.ts +6 -0
  233. package/src/registry/opencode-go.ts +12 -0
  234. package/src/registry/opencode-zen.ts +12 -0
  235. package/src/{utils/oauth → registry}/openrouter.ts +10 -2
  236. package/src/{utils/oauth → registry}/parallel.ts +9 -11
  237. package/src/registry/perplexity.ts +13 -0
  238. package/src/{utils/oauth → registry}/qianfan.ts +8 -17
  239. package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
  240. package/src/registry/registry.ts +149 -0
  241. package/src/{utils/oauth → registry}/synthetic.ts +7 -1
  242. package/src/{utils/oauth → registry}/tavily.ts +10 -12
  243. package/src/{utils/oauth → registry}/together.ts +7 -1
  244. package/src/registry/types.ts +56 -0
  245. package/src/{utils/oauth → registry}/venice.ts +8 -12
  246. package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
  247. package/src/{utils/oauth → registry}/vllm.ts +9 -16
  248. package/src/registry/wafer-pass.ts +12 -0
  249. package/src/registry/wafer-serverless.ts +12 -0
  250. package/src/registry/xai-oauth.ts +17 -0
  251. package/src/registry/xai.ts +6 -0
  252. package/src/registry/xiaomi-token-plan-ams.ts +12 -0
  253. package/src/registry/xiaomi-token-plan-cn.ts +12 -0
  254. package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
  255. package/src/registry/xiaomi.ts +12 -0
  256. package/src/{utils/oauth → registry}/zai.ts +10 -22
  257. package/src/{utils/oauth → registry}/zenmux.ts +8 -1
  258. package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
  259. package/src/stream.ts +229 -199
  260. package/src/types.ts +63 -384
  261. package/src/usage/claude.ts +4 -2
  262. package/src/usage/github-copilot.ts +4 -2
  263. package/src/usage/google-antigravity.ts +196 -28
  264. package/src/usage/kimi.ts +1 -1
  265. package/src/usage/minimax-code.ts +5 -6
  266. package/src/usage/openai-codex-reset.ts +174 -0
  267. package/src/usage/openai-codex.ts +19 -2
  268. package/src/usage/zai.ts +2 -1
  269. package/src/usage.ts +93 -4
  270. package/src/utils/abort.ts +14 -0
  271. package/src/utils/event-stream.ts +17 -0
  272. package/src/utils/http-inspector.ts +4 -12
  273. package/src/utils/idle-iterator.ts +250 -79
  274. package/src/utils/openai-http.ts +157 -0
  275. package/src/utils/request-debug.ts +67 -19
  276. package/src/utils/retry-after.ts +1 -1
  277. package/src/utils/retry.ts +23 -2
  278. package/src/utils/schema/CONSTRAINTS.md +4 -2
  279. package/src/utils/schema/fields.ts +16 -0
  280. package/src/utils/schema/json-schema-validator.ts +19 -1
  281. package/src/utils/schema/normalize.ts +80 -8
  282. package/src/utils/schema/stamps.ts +22 -10
  283. package/src/utils/schema/wire.ts +2 -2
  284. package/src/utils/sse-debug.ts +0 -271
  285. package/src/utils/stream-markup-healing.ts +50 -8
  286. package/src/utils/validation.ts +49 -13
  287. package/src/utils.ts +2 -26
  288. package/dist/types/model-cache.d.ts +0 -17
  289. package/dist/types/model-manager.d.ts +0 -64
  290. package/dist/types/model-thinking.d.ts +0 -100
  291. package/dist/types/models.d.ts +0 -12
  292. package/dist/types/provider-models/bundled-references.d.ts +0 -4
  293. package/dist/types/provider-models/descriptors.d.ts +0 -50
  294. package/dist/types/provider-models/google.d.ts +0 -24
  295. package/dist/types/provider-models/index.d.ts +0 -5
  296. package/dist/types/provider-models/ollama.d.ts +0 -7
  297. package/dist/types/provider-models/openai-compat.d.ts +0 -323
  298. package/dist/types/provider-models/special.d.ts +0 -16
  299. package/dist/types/utils/discovery/antigravity.d.ts +0 -61
  300. package/dist/types/utils/discovery/codex.d.ts +0 -38
  301. package/dist/types/utils/discovery/cursor.d.ts +0 -23
  302. package/dist/types/utils/discovery/gemini.d.ts +0 -25
  303. package/dist/types/utils/discovery/index.d.ts +0 -4
  304. package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
  305. package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
  306. package/dist/types/utils/oauth/cerebras.d.ts +0 -1
  307. package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
  308. package/dist/types/utils/oauth/deepseek.d.ts +0 -10
  309. package/dist/types/utils/oauth/firepass.d.ts +0 -1
  310. package/dist/types/utils/oauth/fireworks.d.ts +0 -1
  311. package/dist/types/utils/oauth/huggingface.d.ts +0 -19
  312. package/dist/types/utils/oauth/kagi.d.ts +0 -17
  313. package/dist/types/utils/oauth/kilo.d.ts +0 -5
  314. package/dist/types/utils/oauth/litellm.d.ts +0 -18
  315. package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
  316. package/dist/types/utils/oauth/moonshot.d.ts +0 -1
  317. package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
  318. package/dist/types/utils/oauth/nvidia.d.ts +0 -18
  319. package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
  320. package/dist/types/utils/oauth/ollama.d.ts +0 -18
  321. package/dist/types/utils/oauth/openrouter.d.ts +0 -1
  322. package/dist/types/utils/oauth/parallel.d.ts +0 -17
  323. package/dist/types/utils/oauth/qianfan.d.ts +0 -17
  324. package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
  325. package/dist/types/utils/oauth/synthetic.d.ts +0 -1
  326. package/dist/types/utils/oauth/tavily.d.ts +0 -17
  327. package/dist/types/utils/oauth/together.d.ts +0 -1
  328. package/dist/types/utils/oauth/venice.d.ts +0 -18
  329. package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
  330. package/dist/types/utils/oauth/vllm.d.ts +0 -16
  331. package/dist/types/utils/oauth/zai.d.ts +0 -18
  332. package/dist/types/utils/oauth/zenmux.d.ts +0 -1
  333. package/dist/types/utils/oauth/zhipu.d.ts +0 -18
  334. package/src/model-cache.ts +0 -129
  335. package/src/model-manager.ts +0 -469
  336. package/src/model-thinking.ts +0 -756
  337. package/src/models.json +0 -60287
  338. package/src/models.json.d.ts +0 -9
  339. package/src/models.ts +0 -56
  340. package/src/provider-models/bundled-references.ts +0 -38
  341. package/src/provider-models/descriptors.ts +0 -364
  342. package/src/provider-models/google.ts +0 -88
  343. package/src/provider-models/index.ts +0 -5
  344. package/src/provider-models/ollama.ts +0 -153
  345. package/src/provider-models/openai-compat.ts +0 -2904
  346. package/src/provider-models/special.ts +0 -67
  347. package/src/utils/discovery/antigravity.ts +0 -261
  348. package/src/utils/discovery/codex.ts +0 -371
  349. package/src/utils/discovery/cursor.ts +0 -306
  350. package/src/utils/discovery/gemini.ts +0 -248
  351. package/src/utils/discovery/index.ts +0 -4
  352. package/src/utils/discovery/openai-compatible.ts +0 -224
  353. package/src/utils/oauth/gitlab-duo.ts +0 -123
  354. package/src/utils/oauth/index.ts +0 -502
  355. /package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
  356. /package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
  357. /package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
  358. /package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
  359. /package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
  360. /package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
  361. /package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
  362. /package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
  363. /package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
  364. /package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
  365. /package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
  366. /package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
  367. /package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
  368. /package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
  369. /package/src/{utils → registry}/oauth/callback-server.ts +0 -0
  370. /package/src/{utils → registry}/oauth/cursor.ts +0 -0
  371. /package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
  372. /package/src/{utils → registry}/oauth/kimi.ts +0 -0
  373. /package/src/{utils → registry}/oauth/oauth.html +0 -0
  374. /package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
  375. /package/src/{utils → registry}/oauth/opencode.ts +0 -0
  376. /package/src/{utils → registry}/oauth/perplexity.ts +0 -0
  377. /package/src/{utils → registry}/oauth/pkce.ts +0 -0
package/src/stream.ts CHANGED
@@ -1,15 +1,17 @@
1
- import * as fs from "node:fs";
2
- import * as os from "node:os";
3
- import * as path from "node:path";
4
- import { $env, $pickenv, extractHttpStatusFromError } from "@prometheus-ai/utils";
5
- import { getCustomApi } from "./api-registry";
6
- import type { Effort } from "./model-thinking";
1
+ import type { Effort } from "@prometheus-ai/catalog/effort";
2
+ import { isVertexExpressOpenAIUrl, isVertexRawPredictUrl } from "@prometheus-ai/catalog/hosts";
7
3
  import {
8
4
  mapEffortToAnthropicAdaptiveEffort,
9
5
  mapEffortToGoogleThinkingLevel,
10
- modelOmitsReasoningEffort,
6
+ minimumSupportedEffort,
11
7
  requireSupportedEffort,
12
- } from "./model-thinking";
8
+ resolveWireModelId,
9
+ } from "@prometheus-ai/catalog/model-thinking";
10
+ import { CATALOG_PROVIDERS, type ProviderCatalogEntry } from "@prometheus-ai/catalog/provider-models";
11
+ import { $env, $pickenv, extractHttpStatusFromError } from "@prometheus-ai/utils";
12
+ import { getCustomApi } from "./api-registry";
13
+ import { type ApiKeyResolver, AUTH_RETRY_STEPS, isApiKeyResolver, resolveRetryKey } from "./auth-retry";
14
+ import { ProviderHttpError } from "./errors";
13
15
  import type { BedrockOptions } from "./providers/amazon-bedrock";
14
16
  import type { AnthropicOptions } from "./providers/anthropic";
15
17
  import type { CursorOptions } from "./providers/cursor";
@@ -46,6 +48,7 @@ import {
46
48
  import { isSyntheticModel, streamSynthetic } from "./providers/synthetic";
47
49
  import { streamXAIResponses } from "./providers/xai-responses";
48
50
  import { isUsageLimitError } from "./rate-limit-utils";
51
+ import { PROVIDER_REGISTRY } from "./registry";
49
52
  import type {
50
53
  Api,
51
54
  AssistantMessage,
@@ -60,29 +63,13 @@ import type {
60
63
  ToolChoice,
61
64
  } from "./types";
62
65
  import { AssistantMessageEventStream } from "./utils/event-stream";
63
- import { isFoundryEnabled } from "./utils/foundry";
64
66
  import { withRequestDebugFetch } from "./utils/request-debug";
65
67
 
66
- let cachedVertexAdcCredentialsExists: boolean | null = null;
67
-
68
- function hasVertexAdcCredentials(): boolean {
69
- if (cachedVertexAdcCredentialsExists === null) {
70
- const gacPath = $env.GOOGLE_APPLICATION_CREDENTIALS;
71
- if (gacPath) {
72
- cachedVertexAdcCredentialsExists = fs.existsSync(gacPath);
73
- } else {
74
- cachedVertexAdcCredentialsExists = fs.existsSync(
75
- path.join(os.homedir(), ".config", "gcloud", "application_default_credentials.json"),
76
- );
77
- }
78
- }
79
- return cachedVertexAdcCredentialsExists;
80
- }
81
68
  function isGoogleVertexAuthenticatedModel(model: Model<Api>): boolean {
82
69
  return (
83
70
  model.provider === "google-vertex" &&
84
- ((model.api === "openai-completions" && model.baseUrl.includes("/endpoints/openapi")) ||
85
- (model.api === "anthropic-messages" && model.baseUrl.includes(":streamRawPredict")))
71
+ ((model.api === "openai-completions" && isVertexExpressOpenAIUrl(model.baseUrl)) ||
72
+ (model.api === "anthropic-messages" && isVertexRawPredictUrl(model.baseUrl)))
86
73
  );
87
74
  }
88
75
 
@@ -94,7 +81,7 @@ function createVertexAuthenticatedFetch(options: StreamOptions | undefined): Fet
94
81
  headers.set("Authorization", `Bearer ${token}`);
95
82
  const rewritten = resolveVertexRequest(input);
96
83
  const url = rewritten instanceof Request ? rewritten.url : rewritten.toString();
97
- if (isVertexAnthropicRawPredict(url)) {
84
+ if (isVertexRawPredictUrl(url)) {
98
85
  const bodyText = await readVertexRequestBody(rewritten, init);
99
86
  const transformed = transformVertexAnthropicBody(bodyText);
100
87
  return baseFetch(url, {
@@ -109,10 +96,6 @@ function createVertexAuthenticatedFetch(options: StreamOptions | undefined): Fet
109
96
  return Object.assign(vertexFetch, baseFetch.preconnect ? { preconnect: baseFetch.preconnect } : {});
110
97
  }
111
98
 
112
- function isVertexAnthropicRawPredict(url: string): boolean {
113
- return url.includes(":streamRawPredict") || url.includes(":rawPredict");
114
- }
115
-
116
99
  async function readVertexRequestBody(input: string | URL | Request, init: RequestInit | undefined): Promise<string> {
117
100
  if (input instanceof Request) return input.clone().text();
118
101
  const body = init?.body;
@@ -174,101 +157,35 @@ function resolveVertexRequest(input: string | URL | Request): string | URL | Req
174
157
 
175
158
  type KeyResolver = string | (() => string | undefined);
176
159
 
177
- const serviceProviderMap: Record<string, KeyResolver> = {
178
- "alibaba-coding-plan": "ALIBABA_CODING_PLAN_API_KEY",
179
- openai: "OPENAI_API_KEY",
180
- google: "GEMINI_API_KEY",
181
- groq: "GROQ_API_KEY",
182
- cerebras: "CEREBRAS_API_KEY",
183
- xai: "XAI_API_KEY",
184
- "xai-oauth": () => $pickenv("XAI_OAUTH_TOKEN", "XAI_API_KEY"),
185
- fireworks: "FIREWORKS_API_KEY",
186
- firepass: "FIREPASS_API_KEY",
187
- "wafer-pass": "WAFER_PASS_API_KEY",
188
- "wafer-serverless": "WAFER_SERVERLESS_API_KEY",
189
- openrouter: "OPENROUTER_API_KEY",
190
- kilo: "KILO_API_KEY",
191
- "vercel-ai-gateway": "AI_GATEWAY_API_KEY",
192
- zai: "ZAI_API_KEY",
193
- "zhipu-coding-plan": "ZHIPU_API_KEY",
194
- mistral: "MISTRAL_API_KEY",
195
- minimax: "MINIMAX_API_KEY",
196
- "minimax-code": "MINIMAX_CODE_API_KEY",
197
- "minimax-code-cn": "MINIMAX_CODE_CN_API_KEY",
198
- "opencode-go": "OPENCODE_API_KEY",
199
- "opencode-zen": "OPENCODE_API_KEY",
200
- cursor: "CURSOR_ACCESS_TOKEN",
201
- deepseek: "DEEPSEEK_API_KEY",
202
- "openai-codex": "OPENAI_CODEX_OAUTH_TOKEN",
160
+ const LEGACY_ENV_KEYS: Record<string, KeyResolver> = {
161
+ // Non-provider / search-tool keys and API-name keys not modeled as registry provider defs.
203
162
  "azure-openai-responses": "AZURE_OPENAI_API_KEY",
163
+ "llama.cpp": "LLAMA_CPP_API_KEY",
204
164
  exa: "EXA_API_KEY",
205
165
  jina: "JINA_API_KEY",
206
166
  brave: "BRAVE_API_KEY",
207
- perplexity: "PERPLEXITY_API_KEY",
208
- tavily: "TAVILY_API_KEY",
209
- parallel: "PARALLEL_API_KEY",
210
- kagi: "KAGI_API_KEY",
211
- // GitHub Copilot uses GitHub personal access token
212
- "github-copilot": () => $pickenv("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"),
213
- // Foundry mode optionally switches Anthropic auth to enterprise gateway credentials.
214
- anthropic: () =>
215
- isFoundryEnabled()
216
- ? $pickenv("ANTHROPIC_FOUNDRY_API_KEY", "ANTHROPIC_OAUTH_TOKEN", "ANTHROPIC_API_KEY")
217
- : $pickenv("ANTHROPIC_OAUTH_TOKEN", "ANTHROPIC_API_KEY"),
218
- "gitlab-duo": "GITLAB_TOKEN",
219
- // Vertex AI supports either GOOGLE_CLOUD_API_KEY or Application Default Credentials.
220
- "google-vertex": () => {
221
- if ($env.GOOGLE_CLOUD_API_KEY) {
222
- return $env.GOOGLE_CLOUD_API_KEY;
223
- }
224
- const hasCredentials = hasVertexAdcCredentials();
225
- const hasProject = !!($env.GOOGLE_CLOUD_PROJECT || $env.GCP_PROJECT || $env.GCLOUD_PROJECT);
226
- const hasLocation = !!($env.GOOGLE_VERTEX_LOCATION || $env.GOOGLE_CLOUD_LOCATION || $env.VERTEX_LOCATION);
227
- if (hasCredentials && hasProject && hasLocation) {
228
- return "<authenticated>";
229
- }
230
- },
231
- // Amazon Bedrock supports multiple credential sources:
232
- // 1. AWS_BEARER_TOKEN_BEDROCK - Bedrock API keys (bearer token)
233
- // 2. AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY - standard IAM keys
234
- // 3. AWS_PROFILE - named profile from ~/.aws/credentials
235
- // 4. AWS_CONTAINER_CREDENTIALS_* - ECS/Task IAM role credentials
236
- // 5. AWS_WEB_IDENTITY_TOKEN_FILE + AWS_ROLE_ARN - IRSA (EKS) web identity
237
- "amazon-bedrock": () => {
238
- const hasEcsCredentials =
239
- !!$env.AWS_CONTAINER_CREDENTIALS_RELATIVE_URI || !!$env.AWS_CONTAINER_CREDENTIALS_FULL_URI;
240
- const hasWebIdentity = !!$env.AWS_WEB_IDENTITY_TOKEN_FILE && !!$env.AWS_ROLE_ARN;
241
- if (
242
- $env.AWS_PROFILE ||
243
- ($env.AWS_ACCESS_KEY_ID && $env.AWS_SECRET_ACCESS_KEY) ||
244
- $env.AWS_BEARER_TOKEN_BEDROCK ||
245
- hasEcsCredentials ||
246
- hasWebIdentity
247
- ) {
248
- return "<authenticated>";
249
- }
250
- },
251
- synthetic: "SYNTHETIC_API_KEY",
252
- "cloudflare-ai-gateway": "CLOUDFLARE_AI_GATEWAY_API_KEY",
253
- huggingface: () => $pickenv("HUGGINGFACE_HUB_TOKEN", "HF_TOKEN"),
254
- litellm: "LITELLM_API_KEY",
255
- moonshot: "MOONSHOT_API_KEY",
256
- nvidia: "NVIDIA_API_KEY",
257
- nanogpt: "NANO_GPT_API_KEY",
258
- "lm-studio": "LM_STUDIO_API_KEY",
259
- ollama: "OLLAMA_API_KEY",
260
- "ollama-cloud": "OLLAMA_CLOUD_API_KEY",
261
- "llama.cpp": "LLAMA_CPP_API_KEY",
262
- qianfan: "QIANFAN_API_KEY",
263
- "qwen-portal": () => $pickenv("QWEN_OAUTH_TOKEN", "QWEN_PORTAL_API_KEY"),
264
- together: "TOGETHER_API_KEY",
265
- zenmux: "ZENMUX_API_KEY",
266
- venice: "VENICE_API_KEY",
267
- vllm: "VLLM_API_KEY",
268
- xiaomi: "XIAOMI_API_KEY",
269
- "xiaomi-token-plan-sgp": "XIAOMI_TOKEN_PLAN_SGP_API_KEY",
270
- "xiaomi-token-plan-ams": "XIAOMI_TOKEN_PLAN_AMS_API_KEY",
271
- "xiaomi-token-plan-cn": "XIAOMI_TOKEN_PLAN_CN_API_KEY",
167
+ };
168
+
169
+ /**
170
+ * Env fallbacks derived from the catalog table — the single source for plain
171
+ * provider env-var names. Registry defs override with computed resolvers
172
+ * (Foundry/ADC/Bedrock probes); legacy non-provider keys merge last.
173
+ */
174
+ const CATALOG_ENTRY_ENV_KEYS = (CATALOG_PROVIDERS as readonly ProviderCatalogEntry[]).flatMap(provider => {
175
+ const envVars = provider.envVars;
176
+ if (!envVars || envVars.length === 0) return [];
177
+ const resolver: KeyResolver = envVars.length === 1 ? envVars[0] : () => $pickenv(...envVars);
178
+ return [[provider.id, resolver] as [string, KeyResolver]];
179
+ });
180
+
181
+ const serviceProviderMap: Record<string, KeyResolver> = {
182
+ ...Object.fromEntries(CATALOG_ENTRY_ENV_KEYS),
183
+ ...Object.fromEntries(
184
+ PROVIDER_REGISTRY.flatMap(provider =>
185
+ provider.envKeys != null ? [[provider.id, provider.envKeys] as [string, KeyResolver]] : [],
186
+ ),
187
+ ),
188
+ ...LEGACY_ENV_KEYS,
272
189
  };
273
190
 
274
191
  /**
@@ -285,6 +202,18 @@ export function getEnvApiKey(provider: string): string | undefined {
285
202
  return resolver?.();
286
203
  }
287
204
 
205
+ /**
206
+ * Name of the environment variable that backs `getEnvApiKey` for a provider,
207
+ * when that provider maps to a single named variable (e.g. `github-copilot` →
208
+ * `COPILOT_GITHUB_TOKEN`). Returns undefined for providers whose env fallback
209
+ * is computed (multi-var pickers, Vertex ADC / Bedrock probes, …) since no
210
+ * single variable name describes the source.
211
+ */
212
+ export function getEnvApiKeyName(provider: string): string | undefined {
213
+ const resolver = serviceProviderMap[provider];
214
+ return typeof resolver === "string" ? resolver : undefined;
215
+ }
216
+
288
217
  /**
289
218
  * Enumerate every provider that has an env-var fallback for `getEnvApiKey`.
290
219
  * Used by `prometheus auth-broker migrate --include-env` to discover env-sourced keys
@@ -424,11 +353,10 @@ function isRetryableUpstreamError(error: unknown, status: number | undefined, me
424
353
  return !!message && isUsageLimitError(message);
425
354
  }
426
355
 
427
- function createAssistantAuthError(message: AssistantMessage): Error & { status?: number } {
428
- const error: Error & { status?: number } = new Error(message.errorMessage ?? "Provider authentication failed");
356
+ function createAssistantAuthError(message: AssistantMessage): Error {
357
+ const text = message.errorMessage ?? "Provider authentication failed";
429
358
  const status = extractStatusFromAssistantError(message);
430
- if (status !== undefined) error.status = status;
431
- return error;
359
+ return status === undefined ? new Error(text) : new ProviderHttpError(text, status);
432
360
  }
433
361
 
434
362
  function emitBufferedEvents(stream: AssistantMessageEventStream, events: AssistantMessageEvent[]): void {
@@ -437,18 +365,39 @@ function emitBufferedEvents(stream: AssistantMessageEventStream, events: Assista
437
365
  }
438
366
  }
439
367
 
368
+ function createLegacyAuthErrorResolver(
369
+ provider: string,
370
+ initialKey: string,
371
+ onAuthError: NonNullable<SimpleStreamOptions["onAuthError"]>,
372
+ ): ApiKeyResolver {
373
+ let currentKey = initialKey;
374
+ return async ({ error }) => {
375
+ if (error === undefined) return currentKey;
376
+ const nextKey = (await onAuthError(provider, currentKey, error)) || undefined;
377
+ if (nextKey !== undefined) currentKey = nextKey;
378
+ return nextKey;
379
+ };
380
+ }
381
+
440
382
  export function streamSimple<TApi extends Api>(
441
383
  model: Model<TApi>,
442
384
  context: Context,
443
385
  options?: SimpleStreamOptions,
444
386
  ): AssistantMessageEventStream {
445
387
  const requestOptions = withRequestDebugFetch(options);
446
- const retryApiKey = requestOptions?.onAuthError
447
- ? (requestOptions.apiKey ?? getEnvApiKey(model.provider))
448
- : undefined;
449
- if (retryApiKey) {
388
+ const legacyAuthErrorResolver =
389
+ typeof requestOptions?.apiKey === "string" && requestOptions.onAuthError
390
+ ? createLegacyAuthErrorResolver(model.provider, requestOptions.apiKey, requestOptions.onAuthError)
391
+ : undefined;
392
+ const apiKeyResolver = isApiKeyResolver(requestOptions?.apiKey) ? requestOptions.apiKey : legacyAuthErrorResolver;
393
+ if (apiKeyResolver) {
450
394
  const outer = new AssistantMessageEventStream();
451
- const onAuthError = requestOptions!.onAuthError!;
395
+ const signal = requestOptions?.signal;
396
+ // One inner attempt against a resolved string key. When
397
+ // `captureAuthFailure` is set, a retryable auth error that arrives before
398
+ // any replay-unsafe event is buffered and returned (so the caller can
399
+ // retry with a fresh key) instead of surfaced. The terminal attempt
400
+ // clears the flag and emits whatever it gets.
452
401
  const runAttempt = async (apiKey: string, captureAuthFailure: boolean): Promise<AuthRetryFailure | undefined> => {
453
402
  const bufferedEvents: AssistantMessageEvent[] = [];
454
403
  let emittedReplayUnsafeEvent = false;
@@ -458,7 +407,8 @@ export function streamSimple<TApi extends Api>(
458
407
  };
459
408
 
460
409
  try {
461
- const inner = streamSimple(model, context, { ...requestOptions, apiKey, onAuthError: undefined });
410
+ const innerOptions: SimpleStreamOptions = { ...requestOptions, apiKey, onAuthError: undefined };
411
+ const inner = streamSimple(model, context, innerOptions);
462
412
  for await (const event of inner) {
463
413
  if (!emittedReplayUnsafeEvent && event.type === "start") {
464
414
  bufferedEvents.push(event);
@@ -510,19 +460,43 @@ export function streamSimple<TApi extends Api>(
510
460
  };
511
461
 
512
462
  void (async () => {
513
- const failure = await runAttempt(retryApiKey, true);
514
- if (!failure) return;
515
- let nextKey: string | undefined;
463
+ let lastKey: string | undefined;
516
464
  try {
517
- nextKey = await onAuthError(model.provider, retryApiKey, failure.error);
518
- } catch {
519
- nextKey = undefined;
465
+ lastKey = (await apiKeyResolver({ lastChance: false, error: undefined, signal })) || undefined;
466
+ } catch (error) {
467
+ // A thrown resolver is a broker/OAuth/network failure, not a missing
468
+ // key — surface the cause instead of masking it as "No API key".
469
+ outer.fail(
470
+ new Error(
471
+ `Failed to resolve API key for provider ${model.provider}: ${error instanceof Error ? error.message : String(error)}`,
472
+ { cause: error },
473
+ ),
474
+ );
475
+ return;
520
476
  }
521
- if (!nextKey || nextKey === retryApiKey) {
522
- emitFailure(failure);
477
+ if (lastKey === undefined) {
478
+ outer.fail(new Error(`No API key for provider: ${model.provider}`));
523
479
  return;
524
480
  }
525
- await runAttempt(nextKey, false);
481
+ let failure = await runAttempt(lastKey, true);
482
+ if (!failure) return;
483
+ // a/b/c policy: refresh the same account (lastChance=false), then
484
+ // switch to a sibling (lastChance=true). A step is skipped when the
485
+ // resolver yields the same key it just tried or `undefined`; the
486
+ // final step's attempt clears the capture flag so it emits directly.
487
+ for (let step = 0; step < AUTH_RETRY_STEPS.length; step++) {
488
+ // Caller aborted between attempts: don't mint a fresh token or fire
489
+ // another doomed request — emit the captured failure instead.
490
+ if (signal?.aborted) break;
491
+ const nextKey = await resolveRetryKey(apiKeyResolver, AUTH_RETRY_STEPS[step]!, failure.error, signal);
492
+ if (nextKey === undefined || nextKey === lastKey) continue;
493
+ lastKey = nextKey;
494
+ const isLastStep = step === AUTH_RETRY_STEPS.length - 1;
495
+ const next = await runAttempt(nextKey, !isLastStep);
496
+ if (!next) return;
497
+ failure = next;
498
+ }
499
+ emitFailure(failure);
526
500
  })();
527
501
  return outer;
528
502
  }
@@ -553,7 +527,10 @@ export function streamSimple<TApi extends Api>(
553
527
  return stream(model, context, providerOptions);
554
528
  }
555
529
 
556
- const apiKey = requestOptions?.apiKey || getEnvApiKey(model.provider);
530
+ // The resolver form is handled by the wrapper above; only a static string
531
+ // key reaches this point.
532
+ const apiKey =
533
+ (typeof requestOptions?.apiKey === "string" ? requestOptions.apiKey : undefined) || getEnvApiKey(model.provider);
557
534
  if (!apiKey) {
558
535
  throw new Error(`No API key for provider: ${model.provider}`);
559
536
  }
@@ -599,6 +576,16 @@ export async function completeSimple<TApi extends Api>(
599
576
  }
600
577
 
601
578
  const MIN_OUTPUT_TOKENS = 1024;
579
+ // Fallback total output cap for models whose catalog entry has no maxTokens.
580
+ const OUTPUT_CAP_WHEN_UNKNOWN = 64_000;
581
+ function maxTokensWithThinkingBudget(
582
+ baseMaxTokens: number | undefined,
583
+ modelMaxTokens: number | null,
584
+ thinkingBudget: number,
585
+ ): number {
586
+ const uncappedMaxTokens = baseMaxTokens === undefined ? OUTPUT_CAP_WHEN_UNKNOWN : baseMaxTokens + thinkingBudget;
587
+ return Math.min(uncappedMaxTokens, modelMaxTokens ?? Number.POSITIVE_INFINITY);
588
+ }
602
589
  export const OUTPUT_FALLBACK_BUFFER = 4000;
603
590
  const ANTHROPIC_USE_INTERLEAVED_THINKING = Bun.env.PROMETHEUS_NO_INTERLEAVED_THINKING !== "1";
604
591
 
@@ -697,24 +684,53 @@ function resolveOpenAiReasoningEffort<TApi extends Api>(
697
684
  ): Effort | undefined {
698
685
  const reasoning = options?.reasoning;
699
686
  if (!reasoning || !model.reasoning) return undefined;
700
- // Models with compat.supportsReasoningEffort: false reason natively but
701
- // reject the wire effort param. The wire-side omitReasoningEffort gate
702
- // (providers/xai-responses.ts:78) is the actual strip; returning
703
- // undefined here avoids a redundant requireSupportedEffort throw that
704
- // would defeat the gate and surface a confusing
705
- // "Compaction failed: Thinking effort high is not supported by..." to
706
- // the user.
707
- if (modelOmitsReasoningEffort(model)) return undefined;
687
+ // Models that reason natively but expose no effort dial carry
688
+ // `thinking: undefined` (baked at build time from
689
+ // `compat.supportsReasoningEffort: false` on openai-responses*). The
690
+ // wire-side omitReasoningEffort gate (providers/xai-responses.ts:78) is the
691
+ // actual strip; returning undefined here avoids a redundant
692
+ // requireSupportedEffort throw that would defeat the gate and surface a
693
+ // confusing "Compaction failed: Thinking effort high is not supported
694
+ // by..." to the user.
695
+ if (!model.thinking) return undefined;
708
696
  return requireSupportedEffort(model, reasoning);
709
697
  }
710
698
 
711
699
  const castApi = <TApi extends Api>(api: OptionsForApi<TApi>): OptionsForApi<Api> => api as OptionsForApi<Api>;
712
700
 
713
- function mapOptionsForApi<TApi extends Api>(
701
+ /**
702
+ * Mandatory-reasoning endpoints (`thinking.requiresEffort`) reject disabled
703
+ * or omitted thinking ("Reasoning is mandatory for this endpoint and cannot
704
+ * be disabled") — clamp to the lowest supported effort instead.
705
+ * `suppressWhenOff` models handle off provider-side via explicit wire
706
+ * suppression. Collapsed pairs interplay: pair derivation strips member
707
+ * flags (off routes to a bare SKU that CAN disable), while identity backfill
708
+ * re-flags pairs whose logical id is itself mandatory (Gemini 3.x) — there
709
+ * the clamp wins and the floored effort routes to the thinking SKU.
710
+ */
711
+ function normalizeMandatoryReasoningOptions<TApi extends Api>(
714
712
  model: Model<TApi>,
715
713
  options?: SimpleStreamOptions,
714
+ ): SimpleStreamOptions | undefined {
715
+ if (
716
+ !model.reasoning ||
717
+ !model.thinking?.requiresEffort ||
718
+ model.thinking.suppressWhenOff ||
719
+ (options?.reasoning !== undefined && !options.disableReasoning)
720
+ ) {
721
+ return options;
722
+ }
723
+ const floor = minimumSupportedEffort(model);
724
+ if (floor === undefined) return options;
725
+ return { ...options, reasoning: floor, disableReasoning: undefined };
726
+ }
727
+
728
+ function mapOptionsForApi<TApi extends Api>(
729
+ model: Model<TApi>,
730
+ rawOptions?: SimpleStreamOptions,
716
731
  apiKey?: string,
717
732
  ): OptionsForApi<TApi> {
733
+ const options = normalizeMandatoryReasoningOptions(model, rawOptions);
718
734
  const base = {
719
735
  temperature: options?.temperature,
720
736
  topP: options?.topP,
@@ -722,9 +738,9 @@ function mapOptionsForApi<TApi extends Api>(
722
738
  minP: options?.minP,
723
739
  presencePenalty: options?.presencePenalty,
724
740
  repetitionPenalty: options?.repetitionPenalty,
725
- maxTokens: options?.maxTokens ?? model.maxTokens,
741
+ maxTokens: options?.maxTokens ?? model.maxTokens ?? undefined,
726
742
  signal: options?.signal,
727
- apiKey: apiKey || options?.apiKey,
743
+ apiKey: apiKey ?? (typeof options?.apiKey === "string" ? options.apiKey : undefined),
728
744
  cacheRetention: options?.cacheRetention,
729
745
  headers: options?.headers,
730
746
  initiatorOverride: options?.initiatorOverride,
@@ -750,6 +766,7 @@ function mapOptionsForApi<TApi extends Api>(
750
766
  if (!reasoning || !model.reasoning) {
751
767
  return castApi<"anthropic-messages">({
752
768
  ...base,
769
+ requestModelId: resolveWireModelId(model, undefined),
753
770
  thinkingEnabled: false,
754
771
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
755
772
  thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
@@ -761,6 +778,7 @@ function mapOptionsForApi<TApi extends Api>(
761
778
  if (thinkingBudget <= 0) {
762
779
  return castApi<"anthropic-messages">({
763
780
  ...base,
781
+ requestModelId: resolveWireModelId(model, undefined),
764
782
  thinkingEnabled: false,
765
783
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
766
784
  thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
@@ -774,6 +792,7 @@ function mapOptionsForApi<TApi extends Api>(
774
792
  const effort = mapEffortToAnthropicAdaptiveEffort(model, reasoning);
775
793
  return castApi<"anthropic-messages">({
776
794
  ...base,
795
+ requestModelId: resolveWireModelId(model, reasoning),
777
796
  thinkingEnabled: true,
778
797
  effort,
779
798
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
@@ -785,6 +804,7 @@ function mapOptionsForApi<TApi extends Api>(
785
804
  if (ANTHROPIC_USE_INTERLEAVED_THINKING) {
786
805
  return castApi<"anthropic-messages">({
787
806
  ...base,
807
+ requestModelId: resolveWireModelId(model, reasoning),
788
808
  thinkingEnabled: true,
789
809
  thinkingBudgetTokens: thinkingBudget,
790
810
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
@@ -793,8 +813,8 @@ function mapOptionsForApi<TApi extends Api>(
793
813
  });
794
814
  }
795
815
 
796
- // Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
797
- const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
816
+ // Caller's maxTokens is desired output, so add thinking budget on top. With no caller/model cap, use a finite total fallback.
817
+ const maxTokens = maxTokensWithThinkingBudget(base.maxTokens, model.maxTokens, thinkingBudget);
798
818
 
799
819
  // If not enough room for thinking + output, reduce thinking budget
800
820
  if (maxTokens <= thinkingBudget) {
@@ -805,6 +825,7 @@ function mapOptionsForApi<TApi extends Api>(
805
825
  if (thinkingBudget <= 0) {
806
826
  return castApi<"anthropic-messages">({
807
827
  ...base,
828
+ requestModelId: resolveWireModelId(model, undefined),
808
829
  thinkingEnabled: false,
809
830
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
810
831
  thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
@@ -814,6 +835,7 @@ function mapOptionsForApi<TApi extends Api>(
814
835
  return castApi<"anthropic-messages">({
815
836
  ...base,
816
837
  maxTokens,
838
+ requestModelId: resolveWireModelId(model, reasoning),
817
839
  thinkingEnabled: true,
818
840
  thinkingBudgetTokens: thinkingBudget,
819
841
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
@@ -837,10 +859,13 @@ function mapOptionsForApi<TApi extends Api>(
837
859
  }
838
860
  const budgetInfo = resolveBedrockThinkingBudget(model as Model<"bedrock-converse-stream">, options);
839
861
  if (!budgetInfo) return bedrockBase as OptionsForApi<TApi>;
840
- let maxTokens = bedrockBase.maxTokens ?? model.maxTokens;
862
+ let maxTokens = bedrockBase.maxTokens ?? model.maxTokens ?? OUTPUT_CAP_WHEN_UNKNOWN;
841
863
  let thinkingBudgets = bedrockBase.thinkingBudgets;
842
864
  if (maxTokens <= budgetInfo.budget) {
843
- const desiredMaxTokens = Math.min(model.maxTokens, budgetInfo.budget + MIN_OUTPUT_TOKENS);
865
+ const desiredMaxTokens = Math.min(
866
+ model.maxTokens ?? Number.POSITIVE_INFINITY,
867
+ budgetInfo.budget + MIN_OUTPUT_TOKENS,
868
+ );
844
869
  if (desiredMaxTokens > maxTokens) {
845
870
  maxTokens = desiredMaxTokens;
846
871
  }
@@ -912,7 +937,7 @@ function mapOptionsForApi<TApi extends Api>(
912
937
  ...base,
913
938
  thinking: {
914
939
  enabled: true,
915
- level: mapEffortToGoogleThinkingLevel(googleModel, effort),
940
+ level: mapEffortToGoogleThinkingLevel(effort),
916
941
  },
917
942
  toolChoice: mapGoogleToolChoice(options?.toolChoice),
918
943
  });
@@ -930,53 +955,57 @@ function mapOptionsForApi<TApi extends Api>(
930
955
 
931
956
  case "google-gemini-cli": {
932
957
  const reasoning = options?.reasoning;
933
- if (!reasoning || !model.reasoning) {
934
- return castApi<"google-gemini-cli">({
935
- ...base,
936
- thinking: { enabled: false },
937
- toolChoice: mapGoogleToolChoice(options?.toolChoice),
938
- });
939
- }
940
-
941
- const effort = requireSupportedEffort(model, reasoning);
958
+ const toolChoice = mapGoogleToolChoice(options?.toolChoice);
959
+ if (reasoning && model.reasoning) {
960
+ const effort = requireSupportedEffort(model, reasoning);
961
+
962
+ // Gemini 3+ models use thinkingLevel instead of thinkingBudget
963
+ if (model.thinking?.mode === "google-level") {
964
+ return castApi<"google-gemini-cli">({
965
+ ...base,
966
+ requestModelId: resolveWireModelId(model, effort),
967
+ thinking: {
968
+ enabled: true,
969
+ level: mapEffortToGoogleThinkingLevel(effort),
970
+ },
971
+ toolChoice,
972
+ });
973
+ }
942
974
 
943
- // Gemini 3+ models use thinkingLevel instead of thinkingBudget
944
- if (model.thinking?.mode === "google-level") {
945
- return castApi<"google-gemini-cli">({
946
- ...base,
947
- thinking: {
948
- enabled: true,
949
- level: mapEffortToGoogleThinkingLevel(model, effort),
950
- },
951
- toolChoice: mapGoogleToolChoice(options?.toolChoice),
952
- });
953
- }
975
+ let thinkingBudget = options.thinkingBudgets?.[effort] ?? GOOGLE_THINKING[effort];
954
976
 
955
- let thinkingBudget = options.thinkingBudgets?.[effort] ?? GOOGLE_THINKING[effort];
977
+ // Caller's maxTokens is desired output, so add thinking budget on top. With no caller/model cap, use a finite total fallback.
978
+ const maxTokens = maxTokensWithThinkingBudget(base.maxTokens, model.maxTokens, thinkingBudget);
956
979
 
957
- // Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
958
- const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
980
+ // If not enough room for thinking + output, reduce thinking budget
981
+ if (maxTokens <= thinkingBudget) {
982
+ thinkingBudget = Math.max(0, maxTokens - MIN_OUTPUT_TOKENS);
983
+ }
959
984
 
960
- // If not enough room for thinking + output, reduce thinking budget
961
- if (maxTokens <= thinkingBudget) {
962
- thinkingBudget = Math.max(0, maxTokens - MIN_OUTPUT_TOKENS) ?? 0;
985
+ if (thinkingBudget > 0) {
986
+ return castApi<"google-gemini-cli">({
987
+ ...base,
988
+ maxTokens,
989
+ requestModelId: resolveWireModelId(model, effort),
990
+ thinking: { enabled: true, budgetTokens: thinkingBudget },
991
+ toolChoice,
992
+ });
993
+ }
994
+ // Budget clamped to zero — fall through to the thinking-off path.
963
995
  }
964
996
 
965
- // If thinking budget is too low, disable thinking
966
- if (thinkingBudget <= 0) {
967
- return castApi<"google-gemini-cli">({
968
- ...base,
969
- thinking: { enabled: false },
970
- toolChoice: mapGoogleToolChoice(options?.toolChoice),
971
- });
972
- } else {
973
- return castApi<"google-gemini-cli">({
974
- ...base,
975
- maxTokens,
976
- thinking: { enabled: true, budgetTokens: thinkingBudget },
977
- toolChoice: mapGoogleToolChoice(options?.toolChoice),
978
- });
997
+ const thinking: GoogleGeminiCliOptions["thinking"] = { enabled: false };
998
+ if (model.reasoning && model.thinking?.suppressWhenOff) {
999
+ // CCA re-applies the per-id baked server default when the config
1000
+ // is omitted; suppression must be explicit on the wire.
1001
+ thinking.suppress = model.thinking.mode === "google-level" ? { level: "MINIMAL" } : { budget: 0 };
979
1002
  }
1003
+ return castApi<"google-gemini-cli">({
1004
+ ...base,
1005
+ requestModelId: resolveWireModelId(model, undefined),
1006
+ thinking,
1007
+ toolChoice,
1008
+ });
980
1009
  }
981
1010
 
982
1011
  case "google-vertex": {
@@ -999,7 +1028,7 @@ function mapOptionsForApi<TApi extends Api>(
999
1028
  ...base,
1000
1029
  thinking: {
1001
1030
  enabled: true,
1002
- level: mapEffortToGoogleThinkingLevel(geminiModel, effort),
1031
+ level: mapEffortToGoogleThinkingLevel(effort),
1003
1032
  },
1004
1033
  toolChoice: mapGoogleToolChoice(options?.toolChoice),
1005
1034
  });
@@ -1019,6 +1048,7 @@ function mapOptionsForApi<TApi extends Api>(
1019
1048
  return castApi<"ollama-chat">({
1020
1049
  ...base,
1021
1050
  reasoning: resolveOpenAiReasoningEffort(model, options),
1051
+ disableReasoning: options?.disableReasoning,
1022
1052
  toolChoice: options?.toolChoice,
1023
1053
  });
1024
1054