@prometheus-ai/ai 0.5.3 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/dist/types/auth-broker/remote-store.d.ts +2 -1
  2. package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
  3. package/dist/types/auth-gateway/server.d.ts +19 -0
  4. package/dist/types/auth-gateway/types.d.ts +9 -3
  5. package/dist/types/auth-retry.d.ts +119 -0
  6. package/dist/types/auth-storage.d.ts +217 -8
  7. package/dist/types/errors.d.ts +24 -0
  8. package/dist/types/index.d.ts +5 -9
  9. package/dist/types/provider-details.d.ts +1 -1
  10. package/dist/types/providers/amazon-bedrock.d.ts +12 -6
  11. package/dist/types/providers/anthropic-client.d.ts +10 -3
  12. package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
  13. package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
  14. package/dist/types/providers/anthropic-wire.d.ts +3 -3
  15. package/dist/types/providers/anthropic.d.ts +41 -34
  16. package/dist/types/providers/aws-credentials.d.ts +8 -0
  17. package/dist/types/providers/azure-openai-responses.d.ts +1 -0
  18. package/dist/types/providers/google-gemini-cli.d.ts +22 -1
  19. package/dist/types/providers/google-shared.d.ts +22 -0
  20. package/dist/types/providers/google-types.d.ts +13 -1
  21. package/dist/types/providers/mock.d.ts +8 -3
  22. package/dist/types/providers/ollama.d.ts +6 -0
  23. package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
  24. package/dist/types/providers/openai-chat-server.d.ts +3 -3
  25. package/dist/types/providers/openai-chat-wire.d.ts +644 -0
  26. package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
  27. package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
  28. package/dist/types/providers/openai-codex-responses.d.ts +31 -2
  29. package/dist/types/providers/openai-completions-compat.d.ts +2 -25
  30. package/dist/types/providers/openai-completions.d.ts +2 -10
  31. package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
  32. package/dist/types/providers/openai-responses-server.d.ts +2 -2
  33. package/dist/types/providers/openai-responses-shared.d.ts +49 -9
  34. package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
  35. package/dist/types/providers/openai-responses.d.ts +13 -4
  36. package/dist/types/providers/prometheus-native-client.d.ts +9 -0
  37. package/dist/types/providers/prometheus-native-server.d.ts +4 -3
  38. package/dist/types/providers/transform-messages.d.ts +1 -2
  39. package/dist/types/rate-limit-utils.d.ts +3 -2
  40. package/dist/types/registry/aimlapi.d.ts +4 -0
  41. package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
  42. package/dist/types/registry/amazon-bedrock.d.ts +5 -0
  43. package/dist/types/registry/anthropic.d.ts +10 -0
  44. package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
  45. package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
  46. package/dist/types/registry/cerebras.d.ts +7 -0
  47. package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
  48. package/dist/types/registry/cursor.d.ts +7 -0
  49. package/dist/types/registry/deepseek.d.ts +8 -0
  50. package/dist/types/registry/derived.d.ts +5 -0
  51. package/dist/types/registry/firepass.d.ts +16 -0
  52. package/dist/types/registry/fireworks.d.ts +7 -0
  53. package/dist/types/registry/github-copilot.d.ts +7 -0
  54. package/dist/types/registry/gitlab-duo.d.ts +9 -0
  55. package/dist/types/registry/google-antigravity.d.ts +9 -0
  56. package/dist/types/registry/google-gemini-cli.d.ts +9 -0
  57. package/dist/types/registry/google-vertex.d.ts +5 -0
  58. package/dist/types/registry/google.d.ts +4 -0
  59. package/dist/types/registry/groq.d.ts +4 -0
  60. package/dist/types/registry/huggingface.d.ts +7 -0
  61. package/dist/types/registry/index.d.ts +4 -0
  62. package/dist/types/registry/kagi.d.ts +14 -0
  63. package/dist/types/registry/kilo.d.ts +7 -0
  64. package/dist/types/registry/kimi-code.d.ts +7 -0
  65. package/dist/types/registry/litellm.d.ts +13 -0
  66. package/dist/types/registry/lm-studio.d.ts +8 -0
  67. package/dist/types/registry/minimax-code-cn.d.ts +6 -0
  68. package/dist/types/registry/minimax-code.d.ts +6 -0
  69. package/dist/types/registry/minimax.d.ts +4 -0
  70. package/dist/types/registry/mistral.d.ts +4 -0
  71. package/dist/types/registry/moonshot.d.ts +7 -0
  72. package/dist/types/registry/nanogpt.d.ts +7 -0
  73. package/dist/types/registry/nvidia.d.ts +7 -0
  74. package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
  75. package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
  76. package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
  77. package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
  78. package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
  79. package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
  80. package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
  81. package/dist/types/registry/ollama-cloud.d.ts +7 -0
  82. package/dist/types/registry/ollama.d.ts +12 -0
  83. package/dist/types/registry/openai-codex-device.d.ts +8 -0
  84. package/dist/types/registry/openai-codex.d.ts +9 -0
  85. package/dist/types/registry/openai.d.ts +4 -0
  86. package/dist/types/registry/opencode-go.d.ts +6 -0
  87. package/dist/types/registry/opencode-zen.d.ts +6 -0
  88. package/dist/types/registry/openrouter.d.ts +13 -0
  89. package/dist/types/registry/parallel.d.ts +14 -0
  90. package/dist/types/registry/perplexity.d.ts +7 -0
  91. package/dist/types/registry/qianfan.d.ts +7 -0
  92. package/dist/types/registry/qwen-portal.d.ts +7 -0
  93. package/dist/types/registry/registry.d.ts +272 -0
  94. package/dist/types/registry/synthetic.d.ts +6 -0
  95. package/dist/types/registry/tavily.d.ts +14 -0
  96. package/dist/types/registry/together.d.ts +6 -0
  97. package/dist/types/registry/types.d.ts +51 -0
  98. package/dist/types/registry/venice.d.ts +13 -0
  99. package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
  100. package/dist/types/registry/vllm.d.ts +7 -0
  101. package/dist/types/registry/wafer-pass.d.ts +6 -0
  102. package/dist/types/registry/wafer-serverless.d.ts +6 -0
  103. package/dist/types/registry/xai-oauth.d.ts +7 -0
  104. package/dist/types/registry/xai.d.ts +4 -0
  105. package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
  106. package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
  107. package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
  108. package/dist/types/registry/xiaomi.d.ts +6 -0
  109. package/dist/types/registry/zai.d.ts +7 -0
  110. package/dist/types/registry/zenmux.d.ts +7 -0
  111. package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
  112. package/dist/types/stream.d.ts +9 -1
  113. package/dist/types/types.d.ts +56 -295
  114. package/dist/types/usage/google-antigravity.d.ts +15 -1
  115. package/dist/types/usage/openai-codex-reset.d.ts +79 -0
  116. package/dist/types/usage/openai-codex.d.ts +1 -0
  117. package/dist/types/usage.d.ts +77 -4
  118. package/dist/types/utils/abort.d.ts +6 -0
  119. package/dist/types/utils/event-stream.d.ts +2 -0
  120. package/dist/types/utils/http-inspector.d.ts +0 -1
  121. package/dist/types/utils/idle-iterator.d.ts +35 -0
  122. package/dist/types/utils/openai-http.d.ts +58 -0
  123. package/dist/types/utils/request-debug.d.ts +3 -0
  124. package/dist/types/utils/retry-after.d.ts +1 -0
  125. package/dist/types/utils/schema/fields.d.ts +5 -0
  126. package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
  127. package/dist/types/utils/schema/stamps.d.ts +7 -15
  128. package/dist/types/utils/sse-debug.d.ts +0 -5
  129. package/dist/types/utils/stream-markup-healing.d.ts +2 -0
  130. package/dist/types/utils.d.ts +1 -5
  131. package/package.json +17 -29
  132. package/src/auth-broker/remote-store.ts +10 -1
  133. package/src/auth-broker/snapshot-cache.ts +1 -1
  134. package/src/auth-broker/wire-schemas.ts +1 -1
  135. package/src/auth-gateway/http.ts +1 -1
  136. package/src/auth-gateway/server.ts +95 -30
  137. package/src/auth-gateway/types.ts +10 -2
  138. package/src/auth-retry.ts +238 -0
  139. package/src/auth-storage.ts +935 -430
  140. package/src/errors.ts +32 -0
  141. package/src/index.ts +9 -14
  142. package/src/provider-details.ts +1 -1
  143. package/src/providers/__tests__/google-auth.test.ts +144 -0
  144. package/src/providers/amazon-bedrock.ts +70 -40
  145. package/src/providers/anthropic-client.ts +15 -13
  146. package/src/providers/anthropic-messages-server-schema.ts +17 -7
  147. package/src/providers/anthropic-messages-server.ts +88 -20
  148. package/src/providers/anthropic-wire.ts +4 -3
  149. package/src/providers/anthropic.ts +1234 -621
  150. package/src/providers/aws-credentials.ts +47 -5
  151. package/src/providers/aws-eventstream.ts +5 -0
  152. package/src/providers/azure-openai-responses.ts +117 -67
  153. package/src/providers/cursor.ts +30 -30
  154. package/src/providers/github-copilot-headers.ts +1 -1
  155. package/src/providers/gitlab-duo.ts +36 -29
  156. package/src/providers/google-auth.ts +71 -8
  157. package/src/providers/google-gemini-cli.ts +118 -22
  158. package/src/providers/google-shared.ts +163 -43
  159. package/src/providers/google-types.ts +10 -1
  160. package/src/providers/kimi.ts +1 -1
  161. package/src/providers/mock.ts +11 -3
  162. package/src/providers/ollama.ts +64 -7
  163. package/src/providers/openai-anthropic-shim.ts +17 -8
  164. package/src/providers/openai-chat-server-schema.ts +9 -3
  165. package/src/providers/openai-chat-server.ts +82 -16
  166. package/src/providers/openai-chat-wire.ts +847 -0
  167. package/src/providers/openai-codex/request-transformer.ts +129 -34
  168. package/src/providers/openai-codex/response-handler.ts +22 -1
  169. package/src/providers/openai-codex-responses.ts +699 -247
  170. package/src/providers/openai-completions-compat.ts +8 -308
  171. package/src/providers/openai-completions.ts +416 -267
  172. package/src/providers/openai-responses-server-schema.ts +15 -9
  173. package/src/providers/openai-responses-server.ts +162 -114
  174. package/src/providers/openai-responses-shared.ts +320 -82
  175. package/src/providers/openai-responses-wire.ts +6391 -0
  176. package/src/providers/openai-responses.ts +382 -176
  177. package/src/providers/prometheus-native-client.ts +27 -11
  178. package/src/providers/prometheus-native-server.ts +44 -17
  179. package/src/providers/transform-messages.ts +311 -120
  180. package/src/providers/vision-guard.ts +5 -3
  181. package/src/rate-limit-utils.ts +13 -3
  182. package/src/registry/aimlapi.ts +6 -0
  183. package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
  184. package/src/registry/amazon-bedrock.ts +22 -0
  185. package/src/registry/anthropic.ts +26 -0
  186. package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
  187. package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
  188. package/src/{utils/oauth → registry}/cerebras.ts +8 -1
  189. package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
  190. package/src/registry/cursor.ts +20 -0
  191. package/src/{utils/oauth → registry}/deepseek.ts +9 -17
  192. package/src/registry/derived.ts +9 -0
  193. package/src/{utils/oauth → registry}/firepass.ts +10 -2
  194. package/src/{utils/oauth → registry}/fireworks.ts +8 -1
  195. package/src/registry/github-copilot.ts +22 -0
  196. package/src/registry/gitlab-duo.ts +19 -0
  197. package/src/registry/google-antigravity.ts +21 -0
  198. package/src/registry/google-gemini-cli.ts +21 -0
  199. package/src/registry/google-vertex.ts +38 -0
  200. package/src/registry/google.ts +6 -0
  201. package/src/registry/groq.ts +6 -0
  202. package/src/{utils/oauth → registry}/huggingface.ts +8 -19
  203. package/src/registry/index.ts +4 -0
  204. package/src/{utils/oauth → registry}/kagi.ts +9 -11
  205. package/src/{utils/oauth → registry}/kilo.ts +11 -6
  206. package/src/registry/kimi-code.ts +17 -0
  207. package/src/{utils/oauth → registry}/litellm.ts +8 -12
  208. package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
  209. package/src/registry/minimax-code-cn.ts +12 -0
  210. package/src/registry/minimax-code.ts +12 -0
  211. package/src/registry/minimax.ts +6 -0
  212. package/src/registry/mistral.ts +6 -0
  213. package/src/{utils/oauth → registry}/moonshot.ts +8 -9
  214. package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
  215. package/src/{utils/oauth → registry}/nvidia.ts +8 -18
  216. package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
  217. package/src/{utils → registry}/oauth/anthropic.ts +38 -17
  218. package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
  219. package/src/registry/oauth/gitlab-duo.ts +198 -0
  220. package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
  221. package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
  222. package/src/registry/oauth/index.ts +164 -0
  223. package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
  224. package/src/{utils → registry}/oauth/types.ts +7 -51
  225. package/src/{utils → registry}/oauth/wafer.ts +1 -1
  226. package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
  227. package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
  228. package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
  229. package/src/{utils/oauth → registry}/ollama.ts +8 -13
  230. package/src/registry/openai-codex-device.ts +18 -0
  231. package/src/registry/openai-codex.ts +19 -0
  232. package/src/registry/openai.ts +6 -0
  233. package/src/registry/opencode-go.ts +12 -0
  234. package/src/registry/opencode-zen.ts +12 -0
  235. package/src/{utils/oauth → registry}/openrouter.ts +10 -2
  236. package/src/{utils/oauth → registry}/parallel.ts +9 -11
  237. package/src/registry/perplexity.ts +13 -0
  238. package/src/{utils/oauth → registry}/qianfan.ts +8 -17
  239. package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
  240. package/src/registry/registry.ts +149 -0
  241. package/src/{utils/oauth → registry}/synthetic.ts +7 -1
  242. package/src/{utils/oauth → registry}/tavily.ts +10 -12
  243. package/src/{utils/oauth → registry}/together.ts +7 -1
  244. package/src/registry/types.ts +56 -0
  245. package/src/{utils/oauth → registry}/venice.ts +8 -12
  246. package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
  247. package/src/{utils/oauth → registry}/vllm.ts +9 -16
  248. package/src/registry/wafer-pass.ts +12 -0
  249. package/src/registry/wafer-serverless.ts +12 -0
  250. package/src/registry/xai-oauth.ts +17 -0
  251. package/src/registry/xai.ts +6 -0
  252. package/src/registry/xiaomi-token-plan-ams.ts +12 -0
  253. package/src/registry/xiaomi-token-plan-cn.ts +12 -0
  254. package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
  255. package/src/registry/xiaomi.ts +12 -0
  256. package/src/{utils/oauth → registry}/zai.ts +10 -22
  257. package/src/{utils/oauth → registry}/zenmux.ts +8 -1
  258. package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
  259. package/src/stream.ts +229 -199
  260. package/src/types.ts +63 -384
  261. package/src/usage/claude.ts +4 -2
  262. package/src/usage/github-copilot.ts +4 -2
  263. package/src/usage/google-antigravity.ts +196 -28
  264. package/src/usage/kimi.ts +1 -1
  265. package/src/usage/minimax-code.ts +5 -6
  266. package/src/usage/openai-codex-reset.ts +174 -0
  267. package/src/usage/openai-codex.ts +19 -2
  268. package/src/usage/zai.ts +2 -1
  269. package/src/usage.ts +93 -4
  270. package/src/utils/abort.ts +14 -0
  271. package/src/utils/event-stream.ts +17 -0
  272. package/src/utils/http-inspector.ts +4 -12
  273. package/src/utils/idle-iterator.ts +250 -79
  274. package/src/utils/openai-http.ts +157 -0
  275. package/src/utils/request-debug.ts +67 -19
  276. package/src/utils/retry-after.ts +1 -1
  277. package/src/utils/retry.ts +23 -2
  278. package/src/utils/schema/CONSTRAINTS.md +4 -2
  279. package/src/utils/schema/fields.ts +16 -0
  280. package/src/utils/schema/json-schema-validator.ts +19 -1
  281. package/src/utils/schema/normalize.ts +80 -8
  282. package/src/utils/schema/stamps.ts +22 -10
  283. package/src/utils/schema/wire.ts +2 -2
  284. package/src/utils/sse-debug.ts +0 -271
  285. package/src/utils/stream-markup-healing.ts +50 -8
  286. package/src/utils/validation.ts +49 -13
  287. package/src/utils.ts +2 -26
  288. package/dist/types/model-cache.d.ts +0 -17
  289. package/dist/types/model-manager.d.ts +0 -64
  290. package/dist/types/model-thinking.d.ts +0 -100
  291. package/dist/types/models.d.ts +0 -12
  292. package/dist/types/provider-models/bundled-references.d.ts +0 -4
  293. package/dist/types/provider-models/descriptors.d.ts +0 -50
  294. package/dist/types/provider-models/google.d.ts +0 -24
  295. package/dist/types/provider-models/index.d.ts +0 -5
  296. package/dist/types/provider-models/ollama.d.ts +0 -7
  297. package/dist/types/provider-models/openai-compat.d.ts +0 -323
  298. package/dist/types/provider-models/special.d.ts +0 -16
  299. package/dist/types/utils/discovery/antigravity.d.ts +0 -61
  300. package/dist/types/utils/discovery/codex.d.ts +0 -38
  301. package/dist/types/utils/discovery/cursor.d.ts +0 -23
  302. package/dist/types/utils/discovery/gemini.d.ts +0 -25
  303. package/dist/types/utils/discovery/index.d.ts +0 -4
  304. package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
  305. package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
  306. package/dist/types/utils/oauth/cerebras.d.ts +0 -1
  307. package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
  308. package/dist/types/utils/oauth/deepseek.d.ts +0 -10
  309. package/dist/types/utils/oauth/firepass.d.ts +0 -1
  310. package/dist/types/utils/oauth/fireworks.d.ts +0 -1
  311. package/dist/types/utils/oauth/huggingface.d.ts +0 -19
  312. package/dist/types/utils/oauth/kagi.d.ts +0 -17
  313. package/dist/types/utils/oauth/kilo.d.ts +0 -5
  314. package/dist/types/utils/oauth/litellm.d.ts +0 -18
  315. package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
  316. package/dist/types/utils/oauth/moonshot.d.ts +0 -1
  317. package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
  318. package/dist/types/utils/oauth/nvidia.d.ts +0 -18
  319. package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
  320. package/dist/types/utils/oauth/ollama.d.ts +0 -18
  321. package/dist/types/utils/oauth/openrouter.d.ts +0 -1
  322. package/dist/types/utils/oauth/parallel.d.ts +0 -17
  323. package/dist/types/utils/oauth/qianfan.d.ts +0 -17
  324. package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
  325. package/dist/types/utils/oauth/synthetic.d.ts +0 -1
  326. package/dist/types/utils/oauth/tavily.d.ts +0 -17
  327. package/dist/types/utils/oauth/together.d.ts +0 -1
  328. package/dist/types/utils/oauth/venice.d.ts +0 -18
  329. package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
  330. package/dist/types/utils/oauth/vllm.d.ts +0 -16
  331. package/dist/types/utils/oauth/zai.d.ts +0 -18
  332. package/dist/types/utils/oauth/zenmux.d.ts +0 -1
  333. package/dist/types/utils/oauth/zhipu.d.ts +0 -18
  334. package/src/model-cache.ts +0 -129
  335. package/src/model-manager.ts +0 -469
  336. package/src/model-thinking.ts +0 -756
  337. package/src/models.json +0 -60287
  338. package/src/models.json.d.ts +0 -9
  339. package/src/models.ts +0 -56
  340. package/src/provider-models/bundled-references.ts +0 -38
  341. package/src/provider-models/descriptors.ts +0 -364
  342. package/src/provider-models/google.ts +0 -88
  343. package/src/provider-models/index.ts +0 -5
  344. package/src/provider-models/ollama.ts +0 -153
  345. package/src/provider-models/openai-compat.ts +0 -2904
  346. package/src/provider-models/special.ts +0 -67
  347. package/src/utils/discovery/antigravity.ts +0 -261
  348. package/src/utils/discovery/codex.ts +0 -371
  349. package/src/utils/discovery/cursor.ts +0 -306
  350. package/src/utils/discovery/gemini.ts +0 -248
  351. package/src/utils/discovery/index.ts +0 -4
  352. package/src/utils/discovery/openai-compatible.ts +0 -224
  353. package/src/utils/oauth/gitlab-duo.ts +0 -123
  354. package/src/utils/oauth/index.ts +0 -502
  355. /package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
  356. /package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
  357. /package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
  358. /package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
  359. /package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
  360. /package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
  361. /package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
  362. /package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
  363. /package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
  364. /package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
  365. /package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
  366. /package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
  367. /package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
  368. /package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
  369. /package/src/{utils → registry}/oauth/callback-server.ts +0 -0
  370. /package/src/{utils → registry}/oauth/cursor.ts +0 -0
  371. /package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
  372. /package/src/{utils → registry}/oauth/kimi.ts +0 -0
  373. /package/src/{utils → registry}/oauth/oauth.html +0 -0
  374. /package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
  375. /package/src/{utils → registry}/oauth/opencode.ts +0 -0
  376. /package/src/{utils → registry}/oauth/perplexity.ts +0 -0
  377. /package/src/{utils → registry}/oauth/pkce.ts +0 -0
@@ -17,3 +17,9 @@ export interface AbortSourceTracker {
17
17
  * the UI showing a spinner the user already tried to cancel).
18
18
  */
19
19
  export declare function createAbortSourceTracker(callerSignal?: AbortSignal): AbortSourceTracker;
20
+ /**
21
+ * Race a shared promise against a caller's AbortSignal without coupling the
22
+ * underlying work to that signal. The shared promise keeps running (and caches
23
+ * its result) even when an individual caller bails out.
24
+ */
25
+ export declare function raceWithSignal<T>(promise: Promise<T>, signal: AbortSignal | undefined): Promise<T>;
@@ -7,6 +7,8 @@ export declare class EventStream<T, R = T> implements AsyncIterable<T> {
7
7
  reject: (err: unknown) => void;
8
8
  }>;
9
9
  done: boolean;
10
+ /** True once finalResultPromise has been resolved or rejected. */
11
+ resultSettled: boolean;
10
12
  finalResultPromise: Promise<R>;
11
13
  resolveFinalResult: (result: R) => void;
12
14
  rejectFinalResult: (err: unknown) => void;
@@ -15,7 +15,6 @@ export type CapturedHttpErrorResponse = {
15
15
  };
16
16
  export declare function appendRawHttpRequestDumpFor400(message: string, error: unknown, dump: RawHttpRequestDump | undefined): Promise<string>;
17
17
  export declare function finalizeErrorMessage(error: unknown, rawRequestDump: RawHttpRequestDump | undefined, capturedErrorResponse?: CapturedHttpErrorResponse): Promise<string>;
18
- export declare function withHttpStatus(error: unknown, status: number): Error;
19
18
  /**
20
19
  * Rewrite error message for GitHub Copilot request failures.
21
20
  * Must run AFTER finalizeErrorMessage since it replaces the message entirely.
@@ -76,3 +76,38 @@ export interface IdleTimeoutIteratorOptions {
76
76
  * before any user-visible content has streamed.
77
77
  */
78
78
  export declare function iterateWithIdleTimeout<T>(iterable: AsyncIterable<T>, options: IdleTimeoutIteratorOptions): AsyncGenerator<T>;
79
+ export interface TerminalGraceIteratorOptions {
80
+ /**
81
+ * Epoch-ms timestamp at which the consumer observed a logically terminal
82
+ * item (e.g. a chat-completions chunk carrying `finish_reason`), or
83
+ * `undefined` while the stream is still mid-response. Read before every
84
+ * pull, so the consumer can flip it between yields.
85
+ */
86
+ finishedAtMs: () => number | undefined;
87
+ /**
88
+ * Post-terminal budget: how long after `finishedAtMs()` to keep draining
89
+ * trailing items (e.g. a usage-only chunk or the `[DONE]` sentinel) before
90
+ * ending the iteration cleanly. The deadline is fixed at
91
+ * `finishedAtMs() + graceMs`; trailing items do not extend it, so
92
+ * keepalive-only servers cannot hold the stream open.
93
+ */
94
+ graceMs: number;
95
+ /**
96
+ * Invoked when the grace window closes with the source still open. Use it
97
+ * to abort the underlying request: the source generator is typically parked
98
+ * mid-`next()` (not at a yield), so a queued `.return()` alone cannot reach
99
+ * the transport until that pending read settles.
100
+ */
101
+ onGraceEnd?: () => void;
102
+ }
103
+ /**
104
+ * Yields items from an async iterable until the consumer marks the stream
105
+ * logically finished AND the source stays silent past a short grace window.
106
+ *
107
+ * Misbehaving OpenAI-compatible servers deliver the terminal chunk but never
108
+ * send `[DONE]` nor close the connection; without this guard the consumer
109
+ * hangs on `iterator.next()` until the idle watchdog converts an
110
+ * already-successful turn into a timeout error. Grace expiry is a clean end
111
+ * of iteration, never an error.
112
+ */
113
+ export declare function iterateWithTerminalGrace<T>(iterable: AsyncIterable<T>, options: TerminalGraceIteratorOptions): AsyncGenerator<T>;
@@ -0,0 +1,58 @@
1
+ /**
2
+ * JSON-POST → SSE transport for OpenAI-wire streaming endpoints (chat
3
+ * completions, responses, azure responses). Replaces the `openai` SDK client:
4
+ *
5
+ * - Retries: `fetchWithRetry` (Retry-After/quota-hint aware; 5xx/408/429 and
6
+ * transient network errors). Default 6 total attempts — parity with the
7
+ * SDK's former `maxRetries: 5`.
8
+ * - SSE decode: `readSseJson` (spec-compliant framing, `[DONE]`-aware).
9
+ * `onSseEvent` observers now receive real wire frames instead of events
10
+ * re-synthesized from decoded SDK objects.
11
+ * - Errors: {@link OpenAIHttpError} exposes `status`/`headers`/`code`
12
+ * structurally (ProviderHttpError contract — `extractHttpStatusFromError`,
13
+ * retry-after extraction, copilot transient classification) and carries the
14
+ * captured response body for the strict-tools fallback and the responses
15
+ * chain-state detectors, which regex over `error.message`.
16
+ */
17
+ import { type SseEventObserver } from "@prometheus-ai/utils";
18
+ import { ProviderHttpError } from "../errors";
19
+ import type { FetchImpl } from "../types";
20
+ import type { CapturedHttpErrorResponse } from "./http-inspector";
21
+ /** Non-2xx response from an OpenAI-wire endpoint, with the decoded body attached. */
22
+ export declare class OpenAIHttpError extends ProviderHttpError {
23
+ readonly captured: CapturedHttpErrorResponse;
24
+ constructor(message: string, captured: CapturedHttpErrorResponse, code: string | undefined);
25
+ }
26
+ export interface OpenAIStreamRequestInit {
27
+ url: string;
28
+ headers: Record<string, string>;
29
+ /** JSON request body; serialized once per call (retries resend the same bytes). */
30
+ body: unknown;
31
+ signal: AbortSignal;
32
+ fetch?: FetchImpl;
33
+ /**
34
+ * Total attempts (initial + retries). Defaults to {@link DEFAULT_MAX_ATTEMPTS}.
35
+ * Pass `1` when a first-event watchdog is armed so retries cannot silently
36
+ * extend the caller's deadline (mirrors the old `maxRetries: 0` hint).
37
+ */
38
+ maxAttempts?: number;
39
+ /** Raw wire-frame observer (`onSseEvent` debug pipeline). */
40
+ onSseEvent?: SseEventObserver;
41
+ }
42
+ export interface OpenAIStreamHandle<TEvent> {
43
+ /** Decoded `data:` payloads; terminates on `[DONE]` or stream end. */
44
+ events: AsyncGenerator<TEvent>;
45
+ response: Response;
46
+ /** `x-request-id` response header (the SDK's former `request_id`). */
47
+ requestId: string | null;
48
+ }
49
+ /**
50
+ * POST a JSON body and stream back decoded SSE events.
51
+ *
52
+ * Throws {@link OpenAIHttpError} on a non-2xx terminal response. Aborts on
53
+ * `signal` propagate from `fetchWithRetry`/`readSseJson`; callers own the
54
+ * watchdog timers and abort-reason bookkeeping.
55
+ */
56
+ export declare function postOpenAIStream<TEvent>(init: OpenAIStreamRequestInit): Promise<OpenAIStreamHandle<TEvent>>;
57
+ /** Decode a non-2xx response into an {@link OpenAIHttpError} without consuming it twice. */
58
+ export declare function captureOpenAIHttpError(response: Response): Promise<OpenAIHttpError>;
@@ -22,6 +22,9 @@ export interface RequestDebugSession {
22
22
  wrapResponse(response: Response): Promise<Response>;
23
23
  }
24
24
  export declare function isRequestDebugEnabled(): boolean;
25
+ export declare function setNextRequestDebugPath(requestPath: string): void;
26
+ export declare function clearNextRequestDebugPath(): void;
27
+ export declare function getNextRequestDebugPath(): string | undefined;
25
28
  export declare function wrapFetchForRequestDebug(fetchImpl: FetchImpl): FetchImpl;
26
29
  export declare function withRequestDebugFetch<T extends {
27
30
  fetch?: FetchImpl;
@@ -1,3 +1,4 @@
1
1
  export type HeadersLike = Headers | Record<string, string | undefined> | undefined | null;
2
2
  export declare function formatErrorMessageWithRetryAfter(error: unknown, headers?: HeadersLike): string;
3
3
  export declare function getRetryAfterMsFromHeaders(headers: HeadersLike): number | undefined;
4
+ export declare function getHeadersFromError(error: unknown): HeadersLike;
@@ -30,6 +30,11 @@ export declare const NON_STRUCTURAL_SCHEMA_KEYS: Record<string, true>;
30
30
  * Used when collapsing mixed-type combiner variants for CCA Claude.
31
31
  */
32
32
  export declare const CLOUD_CODE_ASSIST_TYPE_SPECIFIC_KEYS: Record<string, Record<string, true>>;
33
+ /**
34
+ * Flat set of every type-specific key across all CCA types.
35
+ * Used to identify sibling keys that need filtering during mixed-type collapse.
36
+ */
37
+ export declare const ALL_CCA_TYPE_SPECIFIC_KEYS: Record<string, true>;
33
38
  /**
34
39
  * Cloud Code Assist shared schema keys allowed on any type.
35
40
  * Used alongside CLOUD_CODE_ASSIST_TYPE_SPECIFIC_KEYS for CCA combiner collapsing.
@@ -3,6 +3,14 @@ export interface JsonSchemaValidationIssue {
3
3
  message: string;
4
4
  expectedTypes?: string[];
5
5
  keyword?: string;
6
+ /**
7
+ * Marks issues that originate inside a failed `anyOf` / `oneOf` branch.
8
+ * Consumers such as the tool-argument coercion layer use this to avoid
9
+ * applying type repairs (e.g. singleton-array wrapping) that would be
10
+ * authoritative outside of a combinator but are only one candidate
11
+ * branch's expectation here.
12
+ */
13
+ fromUnionBranch?: boolean;
6
14
  }
7
15
  export interface JsonSchemaValidationResult {
8
16
  success: boolean;
@@ -1,17 +1,3 @@
1
- /**
2
- * Symbol-keyed lazy memoization stamped directly onto the host object.
3
- *
4
- * Faster than a module-level `WeakMap` in V8/JSC because the symbol slot is
5
- * resolved through the object's hidden class instead of a side-table hash
6
- * lookup. The slot is defined as a non-enumerable property so the stamp
7
- * does not leak through `{...spread}`, `Object.keys`, `JSON.stringify`, or
8
- * `toEqual`-style deep equality.
9
- *
10
- * Caveats: the stamp lives as long as the host object, even after callers
11
- * release their references to the cached value — only use this for caches
12
- * whose lifetime should match the host. Frozen hosts will throw on write in
13
- * strict mode; callers that may receive frozen input must handle that.
14
- */
15
1
  export declare function stamp<T extends object, V>(target: T, key: symbol, compute: (target: T) => V): V;
16
2
  export declare function epochNext(): number;
17
3
  /**
@@ -20,6 +6,12 @@ export declare function epochNext(): number;
20
6
  * subsequent call within the same epoch.
21
7
  */
22
8
  export declare function once<T extends object>(target: T, epoch: number): boolean;
23
- /** Returns `true` on first entry, `false` if `target` is already on the current path. */
9
+ /**
10
+ * Returns `true` on first entry, `false` if `target` is already on the
11
+ * current path. A `false` return does NOT deepen the counter — callers pair
12
+ * `exit` only with successful enters (`if (!enter(n)) bail; try {…} finally
13
+ * { exit(n); }`), so incrementing on the cycle branch would leak depth and
14
+ * make every later top-level walk of the same object misreport a cycle.
15
+ */
24
16
  export declare function enter<T extends object>(target: T): boolean;
25
17
  export declare function exit<T extends object>(target: T): void;
@@ -1,10 +1,5 @@
1
1
  import type { ServerSentEvent } from "@prometheus-ai/utils";
2
2
  import type { RawSseEvent } from "../types";
3
- type FetchFunction = (input: string | URL | Request, init?: RequestInit) => Promise<Response>;
4
- type FetchWithPreconnect = FetchFunction & {
5
- preconnect?: typeof fetch.preconnect;
6
- };
7
3
  type RawSseObserver = (event: RawSseEvent) => void;
8
4
  export declare function notifyRawSseEvent(observer: RawSseObserver | undefined, event: ServerSentEvent | RawSseEvent): void;
9
- export declare function wrapFetchForSseDebug(fetchImpl: FetchWithPreconnect, observer: RawSseObserver | undefined): FetchWithPreconnect;
10
5
  export {};
@@ -75,6 +75,8 @@ export declare class StreamMarkupHealing {
75
75
  export declare function modelMayLeakKimiToolCalls(provider: string, modelId: string): boolean;
76
76
  /** Cheap model/provider gate for DeepSeek DSML envelope leaks. */
77
77
  export declare function modelMayLeakDsmlToolCalls(provider: string, modelId: string): boolean;
78
+ /** Cheap model/provider gate for MiniMax plain thinking tag leaks. */
79
+ export declare function modelMayLeakThinkingTags(provider: string, modelId: string): boolean;
78
80
  export declare function getStreamMarkupHealingPattern(provider: string, modelId: string, options?: {
79
81
  readonly parseThinkingTags?: boolean;
80
82
  }): StreamMarkupHealingPattern | undefined;
@@ -1,10 +1,7 @@
1
- import type { ResponseInput } from "openai/resources/responses/responses";
1
+ import type { ResponseInput } from "./providers/openai-responses-wire";
2
2
  import type { CacheRetention, OpenAIResponsesHistoryPayload, ProviderPayload } from "./types";
3
3
  export { isRecord } from "@prometheus-ai/utils";
4
4
  export declare function normalizeSystemPrompts(systemPrompt: readonly string[] | string | undefined | null): string[];
5
- export declare function toNumber(value: unknown): number | undefined;
6
- export declare function toPositiveNumber(value: unknown, fallback: number): number;
7
- export declare function toBoolean(value: unknown): boolean | undefined;
8
5
  export declare function normalizeToolCallId(id: string): string;
9
6
  type ResponsesToolItemIdPrefix = "fc" | "ctc";
10
7
  export declare function normalizeResponsesToolCallId(id: string, itemPrefix?: ResponsesToolItemIdPrefix): {
@@ -25,4 +22,3 @@ export declare function getOpenAIResponsesHistoryItems(providerPayload: Provider
25
22
  * Defaults to "short" and uses PROMETHEUS_CACHE_RETENTION for backward compatibility.
26
23
  */
27
24
  export declare function resolveCacheRetention(cacheRetention?: CacheRetention): CacheRetention;
28
- export declare function isAnthropicOAuthToken(key: string): boolean;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@prometheus-ai/ai",
4
- "version": "0.5.3",
4
+ "version": "0.5.8",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://prometheus.trivlab.com",
7
7
  "author": "Uttam Trivedi",
@@ -31,15 +31,15 @@
31
31
  "lint": "biome lint .",
32
32
  "test": "bun test --parallel --timeout=15000",
33
33
  "fix": "biome check --write --unsafe .",
34
- "fmt": "biome format --write .",
35
- "generate-models": "bun scripts/generate-models.ts"
34
+ "fmt": "biome format --write ."
36
35
  },
37
36
  "dependencies": {
38
37
  "@bufbuild/protobuf": "^2.12.0",
39
- "@prometheus-ai/utils": "0.5.3",
38
+ "@prometheus-ai/catalog": "0.5.8",
39
+ "@prometheus-ai/utils": "0.5.8",
40
40
  "openai": "^6.39.0",
41
41
  "partial-json": "^0.1.7",
42
- "zod": "4.4.3"
42
+ "zod": "^4"
43
43
  },
44
44
  "devDependencies": {
45
45
  "@types/bun": "^1.3.14"
@@ -78,18 +78,6 @@
78
78
  "types": "./dist/types/auth-gateway/*.d.ts",
79
79
  "import": "./src/auth-gateway/*.ts"
80
80
  },
81
- "./models.json": {
82
- "types": "./dist/types/models.json.d.d.ts",
83
- "import": "./src/models.json"
84
- },
85
- "./provider-models": {
86
- "types": "./dist/types/provider-models/index.d.ts",
87
- "import": "./src/provider-models/index.ts"
88
- },
89
- "./provider-models/*": {
90
- "types": "./dist/types/provider-models/*.d.ts",
91
- "import": "./src/provider-models/*.ts"
92
- },
93
81
  "./providers/*": {
94
82
  "types": "./dist/types/providers/*.d.ts",
95
83
  "import": "./src/providers/*.ts"
@@ -110,21 +98,21 @@
110
98
  "types": "./dist/types/utils/*.d.ts",
111
99
  "import": "./src/utils/*.ts"
112
100
  },
113
- "./utils/discovery": {
114
- "types": "./dist/types/utils/discovery/index.d.ts",
115
- "import": "./src/utils/discovery/index.ts"
101
+ "./oauth": {
102
+ "types": "./dist/types/registry/oauth/index.d.ts",
103
+ "import": "./src/registry/oauth/index.ts"
116
104
  },
117
- "./utils/discovery/*": {
118
- "types": "./dist/types/utils/discovery/*.d.ts",
119
- "import": "./src/utils/discovery/*.ts"
105
+ "./oauth/*": {
106
+ "types": "./dist/types/registry/oauth/*.d.ts",
107
+ "import": "./src/registry/oauth/*.ts"
120
108
  },
121
- "./utils/oauth": {
122
- "types": "./dist/types/utils/oauth/index.d.ts",
123
- "import": "./src/utils/oauth/index.ts"
109
+ "./registry": {
110
+ "types": "./dist/types/registry/index.d.ts",
111
+ "import": "./src/registry/index.ts"
124
112
  },
125
- "./utils/oauth/*": {
126
- "types": "./dist/types/utils/oauth/*.d.ts",
127
- "import": "./src/utils/oauth/*.ts"
113
+ "./registry/oauth": {
114
+ "types": "./dist/types/registry/oauth/index.d.ts",
115
+ "import": "./src/registry/oauth/index.ts"
128
116
  },
129
117
  "./utils/schema": {
130
118
  "types": "./dist/types/utils/schema/index.d.ts",
@@ -17,9 +17,9 @@ import {
17
17
  REMOTE_REFRESH_SENTINEL,
18
18
  type StoredAuthCredential,
19
19
  } from "../auth-storage";
20
+ import type { OAuthCredentials } from "../registry/oauth/types";
20
21
  import type { Provider } from "../types";
21
22
  import type { UsageReport } from "../usage";
22
- import type { OAuthCredentials } from "../utils/oauth/types";
23
23
  import { type AuthBrokerClient, AuthBrokerStreamUnsupportedError } from "./client";
24
24
  import type { RefresherSchedule, SnapshotEntry, SnapshotResponse, SnapshotStreamEvent } from "./types";
25
25
 
@@ -275,6 +275,15 @@ export class RemoteAuthCredentialStore implements AuthCredentialStore {
275
275
  });
276
276
  }
277
277
 
278
+ async deleteAuthCredentialRemote(id: number, disabledCause: string): Promise<boolean> {
279
+ const found = this.#snapshot.credentials.some(entry => entry.id === id);
280
+ if (!found) return false;
281
+ await this.#client.disableCredential(id, disabledCause);
282
+ this.#removeCredentialById(id);
283
+ this.#maybeRefreshSnapshot("delete credential");
284
+ return true;
285
+ }
286
+
278
287
  tryDisableAuthCredentialIfMatches(id: number, _expectedData: string, disabledCause: string): boolean {
279
288
  const found = this.#snapshot.credentials.find(entry => entry.id === id);
280
289
  if (!found) return false;
@@ -12,7 +12,7 @@ import { isEnoent, logger } from "@prometheus-ai/utils";
12
12
  import type { SnapshotResponse } from "./types";
13
13
  import { snapshotResponseSchema } from "./wire-schemas";
14
14
 
15
- const MAGIC = new Uint8Array([0x50, 0x52, 0x4d, 0x53]); // "PRMS"
15
+ const MAGIC = new Uint8Array([0x4f, 0x4d, 0x50, 0x53]); // "OMPS"
16
16
  const VERSION = 1;
17
17
  const VERSION_OFFSET = MAGIC.byteLength;
18
18
  const IV_OFFSET = VERSION_OFFSET + 1;
@@ -10,7 +10,7 @@
10
10
  * keys are rejected — the previous implementation used a hand-rolled
11
11
  * `hasOnlyFields` allowlist for the same effect.
12
12
  */
13
- import * as z from "zod/v4";
13
+ import { z } from "zod/v4";
14
14
  import { REMOTE_REFRESH_SENTINEL } from "../auth-storage";
15
15
  import { usageReportSchema } from "../usage";
16
16
 
@@ -74,7 +74,7 @@ const PASSTHROUGH_HEADER_NAMES: Record<string, true> = {
74
74
  "openai-organization": true,
75
75
  "openai-project": true,
76
76
  "openai-beta": true,
77
- // Codex / ChatGPT-OAuth backend headers (see openai-codex/constants.ts).
77
+ // Codex / ChatGPT-OAuth backend headers (see @prometheus-ai/catalog/wire/codex).
78
78
  // `session_id` and `conversation_id` thread the upstream session so prompt
79
79
  // caching and per-conversation rate limiting work; `chatgpt-account-id` and
80
80
  // `originator` identify the calling account and client surface.
@@ -17,9 +17,11 @@
17
17
  * POST /v1/messages → Anthropic messages in/out
18
18
  * POST /v1/responses → OpenAI Responses in/out
19
19
  */
20
+
21
+ import { Effort } from "@prometheus-ai/catalog/effort";
20
22
  import { extractRetryHint, logger } from "@prometheus-ai/utils";
23
+ import type { ApiKeyResolver } from "../auth-retry";
21
24
  import type { AuthStorage } from "../auth-storage";
22
- import { Effort } from "../model-thinking";
23
25
  import * as anthropicMessages from "../providers/anthropic-messages-server";
24
26
  import * as openaiChat from "../providers/openai-chat-server";
25
27
  import * as openaiResponses from "../providers/openai-responses-server";
@@ -314,9 +316,10 @@ async function refreshGatewayApiKeyAfterAuthError(
314
316
  const message = error instanceof Error ? error.message : String(error);
315
317
  if (isUsageLimitError(message)) {
316
318
  const retryAfterMs = extractRetryHint(undefined, message);
317
- const switched = await storage.markUsageLimitReached(provider, sessionId, {
319
+ const { switched, retryAtMs } = await storage.markUsageLimitReached(provider, sessionId, {
318
320
  retryAfterMs,
319
321
  baseUrl: model.baseUrl,
322
+ modelId: model.id,
320
323
  signal,
321
324
  });
322
325
  logger.debug("auth-gateway retrying provider request after usage-limit block", {
@@ -325,6 +328,7 @@ async function refreshGatewayApiKeyAfterAuthError(
325
328
  peer,
326
329
  switched,
327
330
  retryAfterMs,
331
+ retryAtMs,
328
332
  error: message,
329
333
  });
330
334
  if (!switched) return undefined;
@@ -340,6 +344,60 @@ async function refreshGatewayApiKeyAfterAuthError(
340
344
  return storage.getApiKey(provider, sessionId, { modelId: model.id, signal });
341
345
  }
342
346
 
347
+ /**
348
+ * Build the {@link ApiKeyResolver} handed to `streamSimple` for a gateway
349
+ * request. Drives the central a/b/c auth-retry policy server-side:
350
+ *
351
+ * - initial resolve → the credential already resolved for this request.
352
+ * - step (b) `!lastChance` → force-refresh the SAME session-sticky credential
353
+ * (a peer/broker may have rotated its token out from under our cached copy).
354
+ * - step (c) `lastChance` → {@link refreshGatewayApiKeyAfterAuthError} switches
355
+ * to a sibling (usage-limit block vs credential invalidation by error class).
356
+ *
357
+ * `lastKey` tracks the most recent bearer so the switch step invalidates the
358
+ * credential that actually failed.
359
+ */
360
+ function buildGatewayApiKeyResolver(
361
+ storage: AuthStorage,
362
+ model: Model<Api>,
363
+ sessionId: string,
364
+ initialKey: string,
365
+ requestSignal: AbortSignal,
366
+ format: string,
367
+ peer: string,
368
+ ): ApiKeyResolver {
369
+ let lastKey = initialKey;
370
+ return async ({ lastChance, error, signal }) => {
371
+ const sig = signal ?? requestSignal;
372
+ if (error === undefined) {
373
+ lastKey = initialKey;
374
+ return initialKey;
375
+ }
376
+ if (!lastChance) {
377
+ const refreshed = await storage.getApiKey(model.provider, sessionId, {
378
+ modelId: model.id,
379
+ signal: sig,
380
+ forceRefresh: true,
381
+ });
382
+ lastKey = refreshed ?? lastKey;
383
+ return refreshed;
384
+ }
385
+ const next = await refreshGatewayApiKeyAfterAuthError(
386
+ storage,
387
+ model,
388
+ sessionId,
389
+ model.provider,
390
+ lastKey,
391
+ error,
392
+ sig,
393
+ format,
394
+ peer,
395
+ );
396
+ lastKey = next ?? lastKey;
397
+ return next;
398
+ };
399
+ }
400
+
343
401
  function clientClosedResponse(route: { module: FormatModule }): Response {
344
402
  return route.module.formatError(499, "request_aborted", "client closed request");
345
403
  }
@@ -447,19 +505,15 @@ async function handleFormatEndpoint(
447
505
  }
448
506
 
449
507
  const streamOpts = buildStreamOptions(parsed, model.api, controller.signal);
450
- streamOpts.apiKey = apiKey;
451
- streamOpts.onAuthError = (provider, oldKey, error) =>
452
- refreshGatewayApiKeyAfterAuthError(
453
- bootOpts.storage,
454
- model,
455
- sessionId,
456
- provider,
457
- oldKey,
458
- error,
459
- controller.signal,
460
- route.label,
461
- peer,
462
- );
508
+ streamOpts.apiKey = buildGatewayApiKeyResolver(
509
+ bootOpts.storage,
510
+ model,
511
+ sessionId,
512
+ apiKey,
513
+ controller.signal,
514
+ route.label,
515
+ peer,
516
+ );
463
517
 
464
518
  logger.info("auth-gateway request", {
465
519
  format: route.label,
@@ -514,7 +568,14 @@ async function handleFormatEndpoint(
514
568
  }
515
569
  if (controller.signal.aborted) return clientClosedResponse(route);
516
570
 
517
- const sseStream = route.module.encodeStream(events, parsed.modelId, parsed.options);
571
+ const sseStream = route.module.encodeStream(events, parsed.modelId, parsed.options, {
572
+ signal: controller.signal,
573
+ onCancel: reason => {
574
+ if (!controller.signal.aborted) {
575
+ controller.abort(reason instanceof Error ? reason : new Error("client closed request"));
576
+ }
577
+ },
578
+ });
518
579
  return new Response(sseStream, {
519
580
  status: 200,
520
581
  headers: {
@@ -532,7 +593,7 @@ async function handleFormatEndpoint(
532
593
  /**
533
594
  * Prometheus-native fast path: `POST /v1/prometheus/stream`. Accepts the canonical Prometheus AI
534
595
  * `Context` directly (no wire-format round-trip) and emits a bandwidth-shrunk
535
- * event stream matching `Prometheus agent`'s `streamProxy`. Skips the OpenAI /
596
+ * event stream matching Prometheus' `streamProxy`. Skips the OpenAI /
536
597
  * Anthropic / Responses translation layers — those exist to bridge foreign
537
598
  * SDKs (llm-git, anthropic-sdk, openai-sdk), and bridging back to prometheus-native
538
599
  * just to bridge forward again is wasted work.
@@ -604,18 +665,15 @@ async function handlePrometheusNative(bootOpts: AuthGatewayBootOptions, req: Req
604
665
  // only inject server-controlled fields. The codex temperature/topP strip
605
666
  // matches `buildStreamOptions` — Codex rejects them with a 400.
606
667
  const streamOpts: SimpleStreamOptions = { ...parsed.options, apiKey, signal: controller.signal };
607
- streamOpts.onAuthError = (provider, oldKey, error) =>
608
- refreshGatewayApiKeyAfterAuthError(
609
- bootOpts.storage,
610
- model,
611
- sessionId,
612
- provider,
613
- oldKey,
614
- error,
615
- controller.signal,
616
- "prometheus-native",
617
- peer,
618
- );
668
+ streamOpts.apiKey = buildGatewayApiKeyResolver(
669
+ bootOpts.storage,
670
+ model,
671
+ sessionId,
672
+ apiKey,
673
+ controller.signal,
674
+ "prometheus-native",
675
+ peer,
676
+ );
619
677
  if (model.api === "openai-codex-responses") {
620
678
  delete streamOpts.temperature;
621
679
  delete streamOpts.topP;
@@ -679,7 +737,14 @@ async function handlePrometheusNative(bootOpts: AuthGatewayBootOptions, req: Req
679
737
  }
680
738
  if (controller.signal.aborted) return aborted();
681
739
 
682
- const sseStream = prometheusNative.encodeStream(events);
740
+ const sseStream = prometheusNative.encodeStream(events, parsed.modelId, parsed.options, {
741
+ signal: controller.signal,
742
+ onCancel: reason => {
743
+ if (!controller.signal.aborted) {
744
+ controller.abort(reason instanceof Error ? reason : new Error("client closed request"));
745
+ }
746
+ },
747
+ });
683
748
  return new Response(sseStream, {
684
749
  status: 200,
685
750
  headers: {
@@ -1,4 +1,4 @@
1
- import type { Effort } from "../model-thinking";
1
+ import type { Effort } from "@prometheus-ai/catalog/effort";
2
2
  import type {
3
3
  AssistantMessage,
4
4
  AssistantMessageEventStream,
@@ -14,7 +14,7 @@ import type {
14
14
  * The gateway sits between unauthenticated clients (containerized prometheus,
15
15
  * llm-git, …) and the broker. It accepts provider-format HTTP requests
16
16
  * (OpenAI chat-completions / Anthropic messages / OpenAI Responses),
17
- * dispatches them through @prometheus-ai/ai's `streamSimple()`, and translates the
17
+ * dispatches them through Prometheus AI's `streamSimple()`, and translates the
18
18
  * canonical event stream back to the matching wire format. The gateway
19
19
  * injects `Authorization` server-side so clients never see access tokens.
20
20
  */
@@ -110,6 +110,13 @@ export interface AuthGatewayParsedRequest {
110
110
  options: AuthGatewayParsedRequestOptions;
111
111
  }
112
112
 
113
+ export interface AuthGatewayStreamControl {
114
+ /** Gateway request signal. Encoders stop producing frames when it aborts. */
115
+ signal?: AbortSignal;
116
+ /** Called when the HTTP response body is cancelled by the client. */
117
+ onCancel?: (reason?: unknown) => void;
118
+ }
119
+
113
120
  export interface AuthGatewayFormatModule {
114
121
  parseRequest(body: unknown, headers?: Headers): AuthGatewayParsedRequest;
115
122
  encodeResponse(message: AssistantMessage, requestedModelId: string): Record<string, unknown>;
@@ -117,6 +124,7 @@ export interface AuthGatewayFormatModule {
117
124
  events: AssistantMessageEventStream,
118
125
  requestedModelId: string,
119
126
  options?: AuthGatewayParsedRequestOptions,
127
+ control?: AuthGatewayStreamControl,
120
128
  ): ReadableStream<Uint8Array>;
121
129
  /**
122
130
  * Emit a protocol-specific error envelope. OpenAI returns