@prometheus-ai/ai 0.5.3 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/dist/types/auth-broker/remote-store.d.ts +2 -1
  2. package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
  3. package/dist/types/auth-gateway/server.d.ts +19 -0
  4. package/dist/types/auth-gateway/types.d.ts +9 -3
  5. package/dist/types/auth-retry.d.ts +119 -0
  6. package/dist/types/auth-storage.d.ts +217 -8
  7. package/dist/types/errors.d.ts +24 -0
  8. package/dist/types/index.d.ts +5 -9
  9. package/dist/types/provider-details.d.ts +1 -1
  10. package/dist/types/providers/amazon-bedrock.d.ts +12 -6
  11. package/dist/types/providers/anthropic-client.d.ts +10 -3
  12. package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
  13. package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
  14. package/dist/types/providers/anthropic-wire.d.ts +3 -3
  15. package/dist/types/providers/anthropic.d.ts +41 -34
  16. package/dist/types/providers/aws-credentials.d.ts +8 -0
  17. package/dist/types/providers/azure-openai-responses.d.ts +1 -0
  18. package/dist/types/providers/google-gemini-cli.d.ts +22 -1
  19. package/dist/types/providers/google-shared.d.ts +22 -0
  20. package/dist/types/providers/google-types.d.ts +13 -1
  21. package/dist/types/providers/mock.d.ts +8 -3
  22. package/dist/types/providers/ollama.d.ts +6 -0
  23. package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
  24. package/dist/types/providers/openai-chat-server.d.ts +3 -3
  25. package/dist/types/providers/openai-chat-wire.d.ts +644 -0
  26. package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
  27. package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
  28. package/dist/types/providers/openai-codex-responses.d.ts +31 -2
  29. package/dist/types/providers/openai-completions-compat.d.ts +2 -25
  30. package/dist/types/providers/openai-completions.d.ts +2 -10
  31. package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
  32. package/dist/types/providers/openai-responses-server.d.ts +2 -2
  33. package/dist/types/providers/openai-responses-shared.d.ts +49 -9
  34. package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
  35. package/dist/types/providers/openai-responses.d.ts +13 -4
  36. package/dist/types/providers/prometheus-native-client.d.ts +9 -0
  37. package/dist/types/providers/prometheus-native-server.d.ts +4 -3
  38. package/dist/types/providers/transform-messages.d.ts +1 -2
  39. package/dist/types/rate-limit-utils.d.ts +3 -2
  40. package/dist/types/registry/aimlapi.d.ts +4 -0
  41. package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
  42. package/dist/types/registry/amazon-bedrock.d.ts +5 -0
  43. package/dist/types/registry/anthropic.d.ts +10 -0
  44. package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
  45. package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
  46. package/dist/types/registry/cerebras.d.ts +7 -0
  47. package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
  48. package/dist/types/registry/cursor.d.ts +7 -0
  49. package/dist/types/registry/deepseek.d.ts +8 -0
  50. package/dist/types/registry/derived.d.ts +5 -0
  51. package/dist/types/registry/firepass.d.ts +16 -0
  52. package/dist/types/registry/fireworks.d.ts +7 -0
  53. package/dist/types/registry/github-copilot.d.ts +7 -0
  54. package/dist/types/registry/gitlab-duo.d.ts +9 -0
  55. package/dist/types/registry/google-antigravity.d.ts +9 -0
  56. package/dist/types/registry/google-gemini-cli.d.ts +9 -0
  57. package/dist/types/registry/google-vertex.d.ts +5 -0
  58. package/dist/types/registry/google.d.ts +4 -0
  59. package/dist/types/registry/groq.d.ts +4 -0
  60. package/dist/types/registry/huggingface.d.ts +7 -0
  61. package/dist/types/registry/index.d.ts +4 -0
  62. package/dist/types/registry/kagi.d.ts +14 -0
  63. package/dist/types/registry/kilo.d.ts +7 -0
  64. package/dist/types/registry/kimi-code.d.ts +7 -0
  65. package/dist/types/registry/litellm.d.ts +13 -0
  66. package/dist/types/registry/lm-studio.d.ts +8 -0
  67. package/dist/types/registry/minimax-code-cn.d.ts +6 -0
  68. package/dist/types/registry/minimax-code.d.ts +6 -0
  69. package/dist/types/registry/minimax.d.ts +4 -0
  70. package/dist/types/registry/mistral.d.ts +4 -0
  71. package/dist/types/registry/moonshot.d.ts +7 -0
  72. package/dist/types/registry/nanogpt.d.ts +7 -0
  73. package/dist/types/registry/nvidia.d.ts +7 -0
  74. package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
  75. package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
  76. package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
  77. package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
  78. package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
  79. package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
  80. package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
  81. package/dist/types/registry/ollama-cloud.d.ts +7 -0
  82. package/dist/types/registry/ollama.d.ts +12 -0
  83. package/dist/types/registry/openai-codex-device.d.ts +8 -0
  84. package/dist/types/registry/openai-codex.d.ts +9 -0
  85. package/dist/types/registry/openai.d.ts +4 -0
  86. package/dist/types/registry/opencode-go.d.ts +6 -0
  87. package/dist/types/registry/opencode-zen.d.ts +6 -0
  88. package/dist/types/registry/openrouter.d.ts +13 -0
  89. package/dist/types/registry/parallel.d.ts +14 -0
  90. package/dist/types/registry/perplexity.d.ts +7 -0
  91. package/dist/types/registry/qianfan.d.ts +7 -0
  92. package/dist/types/registry/qwen-portal.d.ts +7 -0
  93. package/dist/types/registry/registry.d.ts +272 -0
  94. package/dist/types/registry/synthetic.d.ts +6 -0
  95. package/dist/types/registry/tavily.d.ts +14 -0
  96. package/dist/types/registry/together.d.ts +6 -0
  97. package/dist/types/registry/types.d.ts +51 -0
  98. package/dist/types/registry/venice.d.ts +13 -0
  99. package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
  100. package/dist/types/registry/vllm.d.ts +7 -0
  101. package/dist/types/registry/wafer-pass.d.ts +6 -0
  102. package/dist/types/registry/wafer-serverless.d.ts +6 -0
  103. package/dist/types/registry/xai-oauth.d.ts +7 -0
  104. package/dist/types/registry/xai.d.ts +4 -0
  105. package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
  106. package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
  107. package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
  108. package/dist/types/registry/xiaomi.d.ts +6 -0
  109. package/dist/types/registry/zai.d.ts +7 -0
  110. package/dist/types/registry/zenmux.d.ts +7 -0
  111. package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
  112. package/dist/types/stream.d.ts +9 -1
  113. package/dist/types/types.d.ts +56 -295
  114. package/dist/types/usage/google-antigravity.d.ts +15 -1
  115. package/dist/types/usage/openai-codex-reset.d.ts +79 -0
  116. package/dist/types/usage/openai-codex.d.ts +1 -0
  117. package/dist/types/usage.d.ts +77 -4
  118. package/dist/types/utils/abort.d.ts +6 -0
  119. package/dist/types/utils/event-stream.d.ts +2 -0
  120. package/dist/types/utils/http-inspector.d.ts +0 -1
  121. package/dist/types/utils/idle-iterator.d.ts +35 -0
  122. package/dist/types/utils/openai-http.d.ts +58 -0
  123. package/dist/types/utils/request-debug.d.ts +3 -0
  124. package/dist/types/utils/retry-after.d.ts +1 -0
  125. package/dist/types/utils/schema/fields.d.ts +5 -0
  126. package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
  127. package/dist/types/utils/schema/stamps.d.ts +7 -15
  128. package/dist/types/utils/sse-debug.d.ts +0 -5
  129. package/dist/types/utils/stream-markup-healing.d.ts +2 -0
  130. package/dist/types/utils.d.ts +1 -5
  131. package/package.json +17 -29
  132. package/src/auth-broker/remote-store.ts +10 -1
  133. package/src/auth-broker/snapshot-cache.ts +1 -1
  134. package/src/auth-broker/wire-schemas.ts +1 -1
  135. package/src/auth-gateway/http.ts +1 -1
  136. package/src/auth-gateway/server.ts +95 -30
  137. package/src/auth-gateway/types.ts +10 -2
  138. package/src/auth-retry.ts +238 -0
  139. package/src/auth-storage.ts +935 -430
  140. package/src/errors.ts +32 -0
  141. package/src/index.ts +9 -14
  142. package/src/provider-details.ts +1 -1
  143. package/src/providers/__tests__/google-auth.test.ts +144 -0
  144. package/src/providers/amazon-bedrock.ts +70 -40
  145. package/src/providers/anthropic-client.ts +15 -13
  146. package/src/providers/anthropic-messages-server-schema.ts +17 -7
  147. package/src/providers/anthropic-messages-server.ts +88 -20
  148. package/src/providers/anthropic-wire.ts +4 -3
  149. package/src/providers/anthropic.ts +1234 -621
  150. package/src/providers/aws-credentials.ts +47 -5
  151. package/src/providers/aws-eventstream.ts +5 -0
  152. package/src/providers/azure-openai-responses.ts +117 -67
  153. package/src/providers/cursor.ts +30 -30
  154. package/src/providers/github-copilot-headers.ts +1 -1
  155. package/src/providers/gitlab-duo.ts +36 -29
  156. package/src/providers/google-auth.ts +71 -8
  157. package/src/providers/google-gemini-cli.ts +118 -22
  158. package/src/providers/google-shared.ts +163 -43
  159. package/src/providers/google-types.ts +10 -1
  160. package/src/providers/kimi.ts +1 -1
  161. package/src/providers/mock.ts +11 -3
  162. package/src/providers/ollama.ts +64 -7
  163. package/src/providers/openai-anthropic-shim.ts +17 -8
  164. package/src/providers/openai-chat-server-schema.ts +9 -3
  165. package/src/providers/openai-chat-server.ts +82 -16
  166. package/src/providers/openai-chat-wire.ts +847 -0
  167. package/src/providers/openai-codex/request-transformer.ts +129 -34
  168. package/src/providers/openai-codex/response-handler.ts +22 -1
  169. package/src/providers/openai-codex-responses.ts +699 -247
  170. package/src/providers/openai-completions-compat.ts +8 -308
  171. package/src/providers/openai-completions.ts +416 -267
  172. package/src/providers/openai-responses-server-schema.ts +15 -9
  173. package/src/providers/openai-responses-server.ts +162 -114
  174. package/src/providers/openai-responses-shared.ts +320 -82
  175. package/src/providers/openai-responses-wire.ts +6391 -0
  176. package/src/providers/openai-responses.ts +382 -176
  177. package/src/providers/prometheus-native-client.ts +27 -11
  178. package/src/providers/prometheus-native-server.ts +44 -17
  179. package/src/providers/transform-messages.ts +311 -120
  180. package/src/providers/vision-guard.ts +5 -3
  181. package/src/rate-limit-utils.ts +13 -3
  182. package/src/registry/aimlapi.ts +6 -0
  183. package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
  184. package/src/registry/amazon-bedrock.ts +22 -0
  185. package/src/registry/anthropic.ts +26 -0
  186. package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
  187. package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
  188. package/src/{utils/oauth → registry}/cerebras.ts +8 -1
  189. package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
  190. package/src/registry/cursor.ts +20 -0
  191. package/src/{utils/oauth → registry}/deepseek.ts +9 -17
  192. package/src/registry/derived.ts +9 -0
  193. package/src/{utils/oauth → registry}/firepass.ts +10 -2
  194. package/src/{utils/oauth → registry}/fireworks.ts +8 -1
  195. package/src/registry/github-copilot.ts +22 -0
  196. package/src/registry/gitlab-duo.ts +19 -0
  197. package/src/registry/google-antigravity.ts +21 -0
  198. package/src/registry/google-gemini-cli.ts +21 -0
  199. package/src/registry/google-vertex.ts +38 -0
  200. package/src/registry/google.ts +6 -0
  201. package/src/registry/groq.ts +6 -0
  202. package/src/{utils/oauth → registry}/huggingface.ts +8 -19
  203. package/src/registry/index.ts +4 -0
  204. package/src/{utils/oauth → registry}/kagi.ts +9 -11
  205. package/src/{utils/oauth → registry}/kilo.ts +11 -6
  206. package/src/registry/kimi-code.ts +17 -0
  207. package/src/{utils/oauth → registry}/litellm.ts +8 -12
  208. package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
  209. package/src/registry/minimax-code-cn.ts +12 -0
  210. package/src/registry/minimax-code.ts +12 -0
  211. package/src/registry/minimax.ts +6 -0
  212. package/src/registry/mistral.ts +6 -0
  213. package/src/{utils/oauth → registry}/moonshot.ts +8 -9
  214. package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
  215. package/src/{utils/oauth → registry}/nvidia.ts +8 -18
  216. package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
  217. package/src/{utils → registry}/oauth/anthropic.ts +38 -17
  218. package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
  219. package/src/registry/oauth/gitlab-duo.ts +198 -0
  220. package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
  221. package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
  222. package/src/registry/oauth/index.ts +164 -0
  223. package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
  224. package/src/{utils → registry}/oauth/types.ts +7 -51
  225. package/src/{utils → registry}/oauth/wafer.ts +1 -1
  226. package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
  227. package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
  228. package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
  229. package/src/{utils/oauth → registry}/ollama.ts +8 -13
  230. package/src/registry/openai-codex-device.ts +18 -0
  231. package/src/registry/openai-codex.ts +19 -0
  232. package/src/registry/openai.ts +6 -0
  233. package/src/registry/opencode-go.ts +12 -0
  234. package/src/registry/opencode-zen.ts +12 -0
  235. package/src/{utils/oauth → registry}/openrouter.ts +10 -2
  236. package/src/{utils/oauth → registry}/parallel.ts +9 -11
  237. package/src/registry/perplexity.ts +13 -0
  238. package/src/{utils/oauth → registry}/qianfan.ts +8 -17
  239. package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
  240. package/src/registry/registry.ts +149 -0
  241. package/src/{utils/oauth → registry}/synthetic.ts +7 -1
  242. package/src/{utils/oauth → registry}/tavily.ts +10 -12
  243. package/src/{utils/oauth → registry}/together.ts +7 -1
  244. package/src/registry/types.ts +56 -0
  245. package/src/{utils/oauth → registry}/venice.ts +8 -12
  246. package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
  247. package/src/{utils/oauth → registry}/vllm.ts +9 -16
  248. package/src/registry/wafer-pass.ts +12 -0
  249. package/src/registry/wafer-serverless.ts +12 -0
  250. package/src/registry/xai-oauth.ts +17 -0
  251. package/src/registry/xai.ts +6 -0
  252. package/src/registry/xiaomi-token-plan-ams.ts +12 -0
  253. package/src/registry/xiaomi-token-plan-cn.ts +12 -0
  254. package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
  255. package/src/registry/xiaomi.ts +12 -0
  256. package/src/{utils/oauth → registry}/zai.ts +10 -22
  257. package/src/{utils/oauth → registry}/zenmux.ts +8 -1
  258. package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
  259. package/src/stream.ts +229 -199
  260. package/src/types.ts +63 -384
  261. package/src/usage/claude.ts +4 -2
  262. package/src/usage/github-copilot.ts +4 -2
  263. package/src/usage/google-antigravity.ts +196 -28
  264. package/src/usage/kimi.ts +1 -1
  265. package/src/usage/minimax-code.ts +5 -6
  266. package/src/usage/openai-codex-reset.ts +174 -0
  267. package/src/usage/openai-codex.ts +19 -2
  268. package/src/usage/zai.ts +2 -1
  269. package/src/usage.ts +93 -4
  270. package/src/utils/abort.ts +14 -0
  271. package/src/utils/event-stream.ts +17 -0
  272. package/src/utils/http-inspector.ts +4 -12
  273. package/src/utils/idle-iterator.ts +250 -79
  274. package/src/utils/openai-http.ts +157 -0
  275. package/src/utils/request-debug.ts +67 -19
  276. package/src/utils/retry-after.ts +1 -1
  277. package/src/utils/retry.ts +23 -2
  278. package/src/utils/schema/CONSTRAINTS.md +4 -2
  279. package/src/utils/schema/fields.ts +16 -0
  280. package/src/utils/schema/json-schema-validator.ts +19 -1
  281. package/src/utils/schema/normalize.ts +80 -8
  282. package/src/utils/schema/stamps.ts +22 -10
  283. package/src/utils/schema/wire.ts +2 -2
  284. package/src/utils/sse-debug.ts +0 -271
  285. package/src/utils/stream-markup-healing.ts +50 -8
  286. package/src/utils/validation.ts +49 -13
  287. package/src/utils.ts +2 -26
  288. package/dist/types/model-cache.d.ts +0 -17
  289. package/dist/types/model-manager.d.ts +0 -64
  290. package/dist/types/model-thinking.d.ts +0 -100
  291. package/dist/types/models.d.ts +0 -12
  292. package/dist/types/provider-models/bundled-references.d.ts +0 -4
  293. package/dist/types/provider-models/descriptors.d.ts +0 -50
  294. package/dist/types/provider-models/google.d.ts +0 -24
  295. package/dist/types/provider-models/index.d.ts +0 -5
  296. package/dist/types/provider-models/ollama.d.ts +0 -7
  297. package/dist/types/provider-models/openai-compat.d.ts +0 -323
  298. package/dist/types/provider-models/special.d.ts +0 -16
  299. package/dist/types/utils/discovery/antigravity.d.ts +0 -61
  300. package/dist/types/utils/discovery/codex.d.ts +0 -38
  301. package/dist/types/utils/discovery/cursor.d.ts +0 -23
  302. package/dist/types/utils/discovery/gemini.d.ts +0 -25
  303. package/dist/types/utils/discovery/index.d.ts +0 -4
  304. package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
  305. package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
  306. package/dist/types/utils/oauth/cerebras.d.ts +0 -1
  307. package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
  308. package/dist/types/utils/oauth/deepseek.d.ts +0 -10
  309. package/dist/types/utils/oauth/firepass.d.ts +0 -1
  310. package/dist/types/utils/oauth/fireworks.d.ts +0 -1
  311. package/dist/types/utils/oauth/huggingface.d.ts +0 -19
  312. package/dist/types/utils/oauth/kagi.d.ts +0 -17
  313. package/dist/types/utils/oauth/kilo.d.ts +0 -5
  314. package/dist/types/utils/oauth/litellm.d.ts +0 -18
  315. package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
  316. package/dist/types/utils/oauth/moonshot.d.ts +0 -1
  317. package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
  318. package/dist/types/utils/oauth/nvidia.d.ts +0 -18
  319. package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
  320. package/dist/types/utils/oauth/ollama.d.ts +0 -18
  321. package/dist/types/utils/oauth/openrouter.d.ts +0 -1
  322. package/dist/types/utils/oauth/parallel.d.ts +0 -17
  323. package/dist/types/utils/oauth/qianfan.d.ts +0 -17
  324. package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
  325. package/dist/types/utils/oauth/synthetic.d.ts +0 -1
  326. package/dist/types/utils/oauth/tavily.d.ts +0 -17
  327. package/dist/types/utils/oauth/together.d.ts +0 -1
  328. package/dist/types/utils/oauth/venice.d.ts +0 -18
  329. package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
  330. package/dist/types/utils/oauth/vllm.d.ts +0 -16
  331. package/dist/types/utils/oauth/zai.d.ts +0 -18
  332. package/dist/types/utils/oauth/zenmux.d.ts +0 -1
  333. package/dist/types/utils/oauth/zhipu.d.ts +0 -18
  334. package/src/model-cache.ts +0 -129
  335. package/src/model-manager.ts +0 -469
  336. package/src/model-thinking.ts +0 -756
  337. package/src/models.json +0 -60287
  338. package/src/models.json.d.ts +0 -9
  339. package/src/models.ts +0 -56
  340. package/src/provider-models/bundled-references.ts +0 -38
  341. package/src/provider-models/descriptors.ts +0 -364
  342. package/src/provider-models/google.ts +0 -88
  343. package/src/provider-models/index.ts +0 -5
  344. package/src/provider-models/ollama.ts +0 -153
  345. package/src/provider-models/openai-compat.ts +0 -2904
  346. package/src/provider-models/special.ts +0 -67
  347. package/src/utils/discovery/antigravity.ts +0 -261
  348. package/src/utils/discovery/codex.ts +0 -371
  349. package/src/utils/discovery/cursor.ts +0 -306
  350. package/src/utils/discovery/gemini.ts +0 -248
  351. package/src/utils/discovery/index.ts +0 -4
  352. package/src/utils/discovery/openai-compatible.ts +0 -224
  353. package/src/utils/oauth/gitlab-duo.ts +0 -123
  354. package/src/utils/oauth/index.ts +0 -502
  355. /package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
  356. /package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
  357. /package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
  358. /package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
  359. /package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
  360. /package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
  361. /package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
  362. /package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
  363. /package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
  364. /package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
  365. /package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
  366. /package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
  367. /package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
  368. /package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
  369. /package/src/{utils → registry}/oauth/callback-server.ts +0 -0
  370. /package/src/{utils → registry}/oauth/cursor.ts +0 -0
  371. /package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
  372. /package/src/{utils → registry}/oauth/kimi.ts +0 -0
  373. /package/src/{utils → registry}/oauth/oauth.html +0 -0
  374. /package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
  375. /package/src/{utils → registry}/oauth/opencode.ts +0 -0
  376. /package/src/{utils → registry}/oauth/perplexity.ts +0 -0
  377. /package/src/{utils → registry}/oauth/pkce.ts +0 -0
@@ -1,9 +1,15 @@
1
+ export * from "@prometheus-ai/catalog/effort";
2
+ export * from "@prometheus-ai/catalog/types";
3
+ import type { DeleteArgs, DeleteResult, DiagnosticsArgs, DiagnosticsResult, GrepArgs, GrepResult, LsArgs, LsResult, McpResult, ReadArgs, ReadResult, ShellArgs, ShellResult, WriteArgs, WriteResult } from "@prometheus-ai/catalog/discovery/cursor-gen/agent_pb";
4
+ import type { Effort } from "@prometheus-ai/catalog/effort";
5
+ import type { Api, FetchImpl, Model, Provider, ThinkingBudgets, Usage } from "@prometheus-ai/catalog/types";
1
6
  import type { ZodType, z } from "zod/v4";
7
+ import type { ApiKey } from "./auth-retry";
2
8
  import type { BedrockOptions } from "./providers/amazon-bedrock";
3
9
  import type { AnthropicOptions } from "./providers/anthropic";
10
+ import type { StopDetails } from "./providers/anthropic-wire";
4
11
  import type { AzureOpenAIResponsesOptions } from "./providers/azure-openai-responses";
5
12
  import type { CursorOptions } from "./providers/cursor";
6
- import type { DeleteArgs, DeleteResult, DiagnosticsArgs, DiagnosticsResult, GrepArgs, GrepResult, LsArgs, LsResult, McpResult, ReadArgs, ReadResult, ShellArgs, ShellResult, WriteArgs, WriteResult } from "./providers/cursor/gen/agent_pb";
7
13
  import type { GoogleOptions } from "./providers/google";
8
14
  import type { GoogleGeminiCliOptions } from "./providers/google-gemini-cli";
9
15
  import type { GoogleVertexOptions } from "./providers/google-vertex";
@@ -12,9 +18,20 @@ import type { OpenAICodexResponsesOptions } from "./providers/openai-codex-respo
12
18
  import type { OpenAICompletionsOptions } from "./providers/openai-completions";
13
19
  import type { OpenAIResponsesOptions } from "./providers/openai-responses";
14
20
  import type { AssistantMessageEventStream } from "./utils/event-stream";
21
+ export type { StopDetails } from "./providers/anthropic-wire";
15
22
  export type { AssistantMessageEventStream } from "./utils/event-stream";
16
- export type KnownApi = "openai-completions" | "openai-responses" | "openai-codex-responses" | "azure-openai-responses" | "anthropic-messages" | "bedrock-converse-stream" | "google-generative-ai" | "google-gemini-cli" | "google-vertex" | "ollama-chat" | "cursor-agent";
17
- export type Api = KnownApi | (string & {});
23
+ /**
24
+ * Ceiling on the output-token count prometheus requests from any OpenAI-family endpoint
25
+ * (openai-responses, azure/xai responses, and openai-completions). Mirrors
26
+ * Anthropic's {@link CLAUDE_CODE_MAX_OUTPUT_TOKENS}.
27
+ *
28
+ * Catalog `maxTokens` frequently reflects a model's context window rather than a
29
+ * given upstream's real per-request output cap. OpenRouter, for instance,
30
+ * advertises 131072 output tokens for `z-ai/glm-4.7`, but the Cerebras upstream
31
+ * only allows ~131072 tokens total — so requesting the full ceiling overflows
32
+ * with a 400. Requested output is clamped to this value (and to `model.maxTokens`).
33
+ */
34
+ export declare const OPENAI_MAX_OUTPUT_TOKENS = 64000;
18
35
  export interface ApiOptionsMap {
19
36
  "anthropic-messages": AnthropicOptions;
20
37
  "bedrock-converse-stream": BedrockOptions;
@@ -29,32 +46,6 @@ export interface ApiOptionsMap {
29
46
  "cursor-agent": CursorOptions;
30
47
  }
31
48
  export type OptionsForApi<TApi extends Api> = StreamOptions | (TApi extends keyof ApiOptionsMap ? ApiOptionsMap[TApi] : never);
32
- /** Canonical thinking transport used by a model. */
33
- export type ThinkingControlMode = "effort" | "budget" | "google-level" | "anthropic-adaptive" | "anthropic-budget-effort";
34
- /** Per-model thinking capabilities used to clamp and map user-facing effort levels. */
35
- export interface ThinkingConfig {
36
- /** Least intensive supported user-facing effort level. */
37
- minLevel: Effort;
38
- /** Most intensive supported user-facing effort level. */
39
- maxLevel: Effort;
40
- /**
41
- * Optional explicit list of supported levels. When present, takes precedence over
42
- * the `minLevel`..`maxLevel` range — used to encode discrete sets with gaps
43
- * (e.g. Gemini 3 Pro supports `low` and `high` but not `medium`).
44
- */
45
- levels?: readonly Effort[];
46
- /** Optional default effort applied when this model is selected. Falls back to global default if absent. */
47
- defaultLevel?: Effort;
48
- /** Provider-specific transport used to encode the selected effort. */
49
- mode: ThinkingControlMode;
50
- }
51
- export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "firepass" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "xai-oauth" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "zhipu-coding-plan" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "xiaomi-token-plan-sgp" | "xiaomi-token-plan-ams" | "xiaomi-token-plan-cn" | "wafer-pass" | "wafer-serverless" | "zenmux" | "lm-studio";
52
- export type Provider = KnownProvider | string;
53
- import type { Effort } from "./model-thinking";
54
- /** Token budgets for each thinking level (token-based providers only) */
55
- export type ThinkingBudgets = {
56
- [key in Effort]?: number;
57
- };
58
49
  export interface TokenTaskBudget {
59
50
  type: "tokens";
60
51
  total: number;
@@ -129,14 +120,6 @@ export interface RawSseEvent {
129
120
  data: string;
130
121
  raw: string[];
131
122
  }
132
- /**
133
- * `fetch`-compatible function. Accepts any callable matching the standard
134
- * fetch signature; `preconnect` is optional because non-Bun runtimes (browsers,
135
- * test mocks) won't expose it.
136
- */
137
- export type FetchImpl = ((input: string | URL | Request, init?: RequestInit) => Promise<Response>) & {
138
- preconnect?: typeof globalThis.fetch.preconnect;
139
- };
140
123
  export interface StreamOptions {
141
124
  temperature?: number;
142
125
  topP?: number;
@@ -159,12 +142,6 @@ export interface StreamOptions {
159
142
  maxTokens?: number;
160
143
  signal?: AbortSignal;
161
144
  apiKey?: string;
162
- /**
163
- * Called when a provider returns 401 before any replay-unsafe assistant
164
- * event has been emitted. Returning a different key retries the provider
165
- * request once.
166
- */
167
- onAuthError?: (provider: string, apiKey: string, error: unknown) => Promise<string | undefined>;
168
145
  cacheRetention?: CacheRetention;
169
146
  /**
170
147
  * Additional headers to include in provider requests.
@@ -274,7 +251,21 @@ export interface StreamOptions {
274
251
  /** Cursor exec/MCP tool handlers (cursor-agent only). */
275
252
  execHandlers?: CursorExecHandlers;
276
253
  }
277
- export interface SimpleStreamOptions extends StreamOptions {
254
+ export interface SimpleStreamOptions extends Omit<StreamOptions, "apiKey"> {
255
+ /**
256
+ * API key for the request: either a static bearer string, or an
257
+ * {@link ApiKeyResolver} that mints/rotates the key across the central
258
+ * a/b/c auth-retry policy. `streamSimple`/`completeSimple` resolve a
259
+ * resolver to a string before per-provider dispatch, so providers only
260
+ * ever see the resolved {@link StreamOptions.apiKey} string.
261
+ */
262
+ apiKey?: ApiKey;
263
+ /**
264
+ * Legacy static-key retry hook. New callers should pass an {@link ApiKeyResolver}
265
+ * as `apiKey`; `streamSimple` adapts this hook into that resolver shape for
266
+ * older Prometheus runtime paths.
267
+ */
268
+ onAuthError?: (provider: string, oldKey: string, error: unknown) => Promise<string | undefined> | string | undefined;
278
269
  reasoning?: Effort;
279
270
  /**
280
271
  * Force-disable reasoning for the request even when the model supports it.
@@ -345,6 +336,12 @@ export interface ImageContent {
345
336
  type: "image";
346
337
  data: string;
347
338
  mimeType: string;
339
+ /**
340
+ * OpenAI-only resolution hint. `"original"` preserves native resolution
341
+ * (required for snapcompact frames, whose glyphs do not survive the
342
+ * default `auto` downscale). Providers without a detail knob ignore it.
343
+ */
344
+ detail?: "auto" | "low" | "high" | "original";
348
345
  }
349
346
  export interface ToolCall {
350
347
  type: "toolCall";
@@ -362,52 +359,6 @@ export interface ToolCall {
362
359
  */
363
360
  customWireName?: string;
364
361
  }
365
- export interface Usage {
366
- /** Non-cached input tokens (matches the bucket the provider bills as new input). */
367
- input: number;
368
- /** Total output tokens for the turn, including thinking, assistant text, and tool-call argument tokens. */
369
- output: number;
370
- /** Tokens read from the prompt cache. */
371
- cacheRead: number;
372
- /** Tokens written to the prompt cache (cache creation). */
373
- cacheWrite: number;
374
- /** Sum of input + output + cacheRead + cacheWrite. */
375
- totalTokens: number;
376
- /** Copilot premium-request counter, when applicable. */
377
- premiumRequests?: number;
378
- /**
379
- * Reasoning/thinking tokens included in `output`, when the provider reports them
380
- * (OpenAI `output_tokens_details.reasoning_tokens`, Google `thoughtsTokenCount`).
381
- * Always a subset of `output` — non-reasoning output is `output - reasoningTokens`.
382
- *
383
- * Providers that don't expose this leave it undefined rather than guessing;
384
- * `undefined` means unknown, NOT zero.
385
- */
386
- reasoningTokens?: number;
387
- /**
388
- * Cache-write TTL breakdown (Anthropic only). When set, the components sum to
389
- * `cacheWrite`. Absent providers do not populate this.
390
- */
391
- cttl?: {
392
- ephemeral5m?: number;
393
- ephemeral1h?: number;
394
- };
395
- /**
396
- * Server-side tool invocations made during this turn (Anthropic web_search /
397
- * web_fetch, OpenAI built-in tools when reported). Counts requests, not tokens.
398
- */
399
- server?: {
400
- webSearch?: number;
401
- webFetch?: number;
402
- };
403
- cost: {
404
- input: number;
405
- output: number;
406
- cacheRead: number;
407
- cacheWrite: number;
408
- total: number;
409
- };
410
- }
411
362
  export type StopReason = "stop" | "length" | "toolUse" | "error" | "aborted";
412
363
  export interface OpenAIResponsesHistoryPayload {
413
364
  type: "openaiResponsesHistory";
@@ -445,8 +396,17 @@ export interface AssistantMessage {
445
396
  provider: Provider;
446
397
  model: string;
447
398
  responseId?: string;
399
+ /**
400
+ * Name of the upstream provider an aggregator routed this request to, as
401
+ * reported in the response (e.g. OpenRouter's top-level `provider` field:
402
+ * `"OpenAI"`, `"Anthropic"`, `"Together"`). Distinct from `provider`, which
403
+ * is the configured gateway we called (`"openrouter"`). Undefined for direct
404
+ * providers that expose no such field.
405
+ */
406
+ upstreamProvider?: string;
448
407
  usage: Usage;
449
408
  stopReason: StopReason;
409
+ stopDetails?: StopDetails | null;
450
410
  errorMessage?: string;
451
411
  /** HTTP status surfaced by the provider when the request failed. Populated by every provider's catch block alongside `errorMessage` so consumers (auth retry, telemetry, UI) can branch without regex-scraping the message. */
452
412
  errorStatus?: number;
@@ -475,6 +435,12 @@ export interface ToolResultMessage<TDetails = any> {
475
435
  attribution?: MessageAttribution;
476
436
  /** Timestamp when output was pruned (ms since epoch). Undefined if unpruned. */
477
437
  prunedAt?: number;
438
+ /**
439
+ * Tool-declared: this result carried no information worth retaining once
440
+ * consumed (zero matches, elapsed wait). Compaction passes may elide it.
441
+ * Never set together with isError.
442
+ */
443
+ useless?: boolean;
478
444
  timestamp: number;
479
445
  }
480
446
  export type Message = UserMessage | DeveloperMessage | AssistantMessage | ToolResultMessage;
@@ -612,208 +578,3 @@ export type AssistantMessageEvent = {
612
578
  reason: Extract<StopReason, "aborted" | "error">;
613
579
  error: AssistantMessage;
614
580
  };
615
- /**
616
- * Compatibility settings for openai-completions API.
617
- * Use this to override URL-based auto-detection for custom providers.
618
- */
619
- export interface OpenAICompat {
620
- /** Whether the provider supports the `store` field. Default: auto-detected from URL. */
621
- supportsStore?: boolean;
622
- /** Whether the provider supports the `developer` role (vs `system`). Default: auto-detected from URL. */
623
- supportsDeveloperRole?: boolean;
624
- /**
625
- * Whether the provider's chat-completions endpoint accepts multiple
626
- * leading `system`/`developer` messages. When false, ordered system
627
- * prompts are coalesced into a single message joined by `\n\n` so
628
- * strict chat templates (e.g. Qwen-served via vLLM, MiniMax) accept
629
- * the request. Default: detected per provider/baseUrl. Canonical
630
- * OpenAI/Azure/OpenRouter/Cerebras/Together/Fireworks/Groq/DeepSeek/
631
- * Mistral/xAI/Z.ai/GitHub Copilot/Zenmux are treated as `true`;
632
- * unknown or strict-template hosts default to `false`. Setting this
633
- * to `true` preserves separate blocks, which is preferred for
634
- * KV-cache reuse when the trailing prompt changes between calls.
635
- */
636
- supportsMultipleSystemMessages?: boolean;
637
- /** Whether the provider supports `reasoning_effort`. Default: auto-detected from URL. */
638
- supportsReasoningEffort?: boolean;
639
- /** Optional mapping from Prometheus AI reasoning levels to provider/model-specific `reasoning_effort` values. */
640
- reasoningEffortMap?: Partial<Record<Effort, string>>;
641
- /** Whether the provider supports `stream_options: { include_usage: true }` for token usage in streaming responses. Default: true. */
642
- supportsUsageInStreaming?: boolean;
643
- /** Which field to use for max tokens. Default: auto-detected from URL. */
644
- maxTokensField?: "max_completion_tokens" | "max_tokens";
645
- /** Whether tool results require the `name` field. Default: auto-detected from URL. */
646
- requiresToolResultName?: boolean;
647
- /** Whether a user message after tool results requires an assistant message in between. Default: auto-detected from URL. */
648
- requiresAssistantAfterToolResult?: boolean;
649
- /** Whether thinking blocks must be converted to text blocks with <thinking> delimiters. Default: auto-detected from URL. */
650
- requiresThinkingAsText?: boolean;
651
- /** Whether tool call IDs must be normalized to Mistral format (exactly 9 alphanumeric chars). Default: auto-detected from URL. */
652
- requiresMistralToolIds?: boolean;
653
- /** Format for reasoning/thinking parameter. "openai" uses reasoning_effort, "openrouter" uses reasoning: { effort }, "zai" uses thinking: { type: "enabled" | "disabled" } (also used by Moonshot Kimi), "qwen" uses top-level enable_thinking, and "qwen-chat-template" uses chat_template_kwargs.enable_thinking. Default: "openai". */
654
- thinkingFormat?: "openai" | "openrouter" | "zai" | "qwen" | "qwen-chat-template";
655
- /** Optional `thinking.keep` value for Z.ai/Moonshot-style thinking params. Set false to suppress auto-detected keep. Default: auto-detected. */
656
- thinkingKeep?: "all" | false;
657
- /** Which reasoning content field to emit on assistant messages. Default: auto-detected. */
658
- reasoningContentField?: "reasoning_content" | "reasoning" | "reasoning_text";
659
- /** Whether assistant tool-call messages must include reasoning content. Default: false. */
660
- requiresReasoningContentForToolCalls?: boolean;
661
- /** Whether the provider accepts a synthetic placeholder (e.g. ".") for missing reasoning_content on tool-call turns. Default: true. Set to false for providers like DeepSeek that validate the exact reasoning_content value. */
662
- allowsSyntheticReasoningContentForToolCalls?: boolean;
663
- /** Whether assistant tool-call messages must include non-empty content. Default: false. */
664
- requiresAssistantContentForToolCalls?: boolean;
665
- /** Whether the provider supports the `tool_choice` parameter. Default: true. */
666
- supportsToolChoice?: boolean;
667
- /**
668
- * Drop reasoning fields (`reasoning_effort`, OpenRouter `reasoning`) for
669
- * the request when `tool_choice` forces a tool call. Mirrors the Anthropic
670
- * `disableThinkingIfToolChoiceForced` rule for backends like Kimi that
671
- * 400 with `tool_choice 'specified' is incompatible with thinking
672
- * enabled` whenever both are present. Default: auto-detected (Kimi).
673
- */
674
- disableReasoningOnForcedToolChoice?: boolean;
675
- /**
676
- * Drop reasoning fields (`reasoning_effort`, OpenRouter `reasoning`) for
677
- * any request that sends `tool_choice`. Use for providers/models that accept
678
- * tools and `tool_choice`, but reject `tool_choice` while thinking is enabled.
679
- * Default: auto-detected (DeepSeek reasoning models).
680
- */
681
- disableReasoningOnToolChoice?: boolean;
682
- /** OpenRouter-specific routing preferences. Only used when baseUrl points to OpenRouter. */
683
- openRouterRouting?: OpenRouterRouting;
684
- /** Vercel AI Gateway routing preferences. Only used when baseUrl points to Vercel AI Gateway. */
685
- vercelGatewayRouting?: VercelGatewayRouting;
686
- /** Extra fields to include in request body (e.g. gateway routing hints for OpenClaw-style proxies). */
687
- extraBody?: Record<string, unknown>;
688
- /** Whether chat-completions payloads should include provider-specific prompt-cache markers. */
689
- cacheControlFormat?: "anthropic" | undefined;
690
- /** Whether the provider supports the `strict` field in tool definitions. Default: auto-detected per provider/baseUrl (conservative for unknown providers). */
691
- supportsStrictMode?: boolean;
692
- /** Whether tool schemas must be sent either all strict or all non-strict. Undefined keeps the existing per-tool mixed behavior. */
693
- toolStrictMode?: "all_strict" | "none";
694
- }
695
- /**
696
- * Compatibility settings for anthropic-messages API.
697
- * Use this to disable features that strict-by-default Anthropic accepts but
698
- * that proxy gateways (Vertex AI, AWS Bedrock-style fronts, etc.) reject.
699
- */
700
- export interface AnthropicCompat {
701
- /**
702
- * Drop the top-level `strict: true` field on tool definitions. Vertex AI's
703
- * Anthropic-compatible endpoint rejects unknown tool fields with
704
- * `tools.<n>.custom.strict: Extra inputs are not permitted`.
705
- */
706
- disableStrictTools?: boolean;
707
- /**
708
- * Map adaptive thinking (`thinking: { type: "adaptive" }`) to
709
- * `{ type: "enabled", budget_tokens }`. Vertex AI rejects the `adaptive`
710
- * tag with `Input tag 'adaptive' ... does not match any of the expected
711
- * tags: 'disabled', 'enabled'`.
712
- */
713
- disableAdaptiveThinking?: boolean;
714
- /** Whether tools may include Anthropic's per-tool eager_input_streaming flag. Default: true. */
715
- supportsEagerToolInputStreaming?: boolean;
716
- /** Whether long prompt-cache retention (`ttl: "1h"`) is supported. Default: true for canonical Anthropic API. */
717
- supportsLongCacheRetention?: boolean;
718
- /**
719
- * Whether mid-conversation `role: "system"` messages are accepted in the
720
- * `messages` array (Claude Opus 4.8+ on the first-party Claude API and
721
- * Claude Platform on AWS). When unset, auto-detected from the model id and
722
- * base URL. Not available on Bedrock, Vertex AI, or Microsoft Foundry.
723
- */
724
- supportsMidConversationSystem?: boolean;
725
- }
726
- /**
727
- * OpenRouter provider routing preferences.
728
- * Controls which upstream providers OpenRouter routes requests to.
729
- * @see https://openrouter.ai/docs/provider-routing
730
- */
731
- export interface OpenRouterRouting {
732
- /** List of provider slugs to exclusively use for this request (e.g., ["amazon-bedrock", "anthropic"]). */
733
- only?: string[];
734
- /** List of provider slugs to try in order (e.g., ["anthropic", "openai"]). */
735
- order?: string[];
736
- }
737
- /**
738
- * Vercel AI Gateway routing preferences.
739
- * Controls which upstream providers the gateway routes requests to.
740
- * @see https://vercel.com/docs/ai-gateway/models-and-providers/provider-options
741
- */
742
- export interface VercelGatewayRouting {
743
- /** List of provider slugs to exclusively use for this request (e.g., ["bedrock", "anthropic"]). */
744
- only?: string[];
745
- /** List of provider slugs to try in order (e.g., ["anthropic", "openai"]). */
746
- order?: string[];
747
- }
748
- export interface Model<TApi extends Api = any> {
749
- id: string;
750
- name: string;
751
- api: TApi;
752
- provider: Provider;
753
- baseUrl: string;
754
- reasoning: boolean;
755
- input: ("text" | "image")[];
756
- cost: {
757
- input: number;
758
- output: number;
759
- cacheRead: number;
760
- cacheWrite: number;
761
- };
762
- /** Premium Copilot requests charged per user-initiated request (defaults to 1). */
763
- premiumMultiplier?: number;
764
- contextWindow: number;
765
- maxTokens: number;
766
- /**
767
- * When `true`, providers MUST omit `max_output_tokens` (Responses) /
768
- * `max_tokens` / `max_completion_tokens` (Completions) from the outbound
769
- * request and let the upstream API decide the per-response cap. `maxTokens`
770
- * is still used locally for budgeting (compaction, context promotion); only
771
- * the wire field is suppressed.
772
- *
773
- * Use this for proxies (notably Ollama) that forward to a backend whose true
774
- * output limit Prometheus cannot discover — sending the wrong value triggers 400s
775
- * from the upstream provider.
776
- */
777
- omitMaxOutputTokens?: boolean;
778
- headers?: Record<string, string>;
779
- /**
780
- * Streaming transport override. When `"prometheus-native"`, `streamSimple` routes
781
- * the request to the model's `baseUrl` via the auth-gateway's
782
- * `POST /v1/prometheus/stream` endpoint instead of dispatching the per-API
783
- * provider client. The `baseUrl` must point at an `prometheus auth-gateway`
784
- * (or compatible) host; `headers.Authorization` (or `apiKey` resolved by
785
- * the registry) carries the gateway bearer.
786
- *
787
- * Used by containerized prometheus installs (e.g. roboprometheus slots) to route every
788
- * LLM call through a sidecar gateway that holds the real provider
789
- * credentials. The model's other metadata (pricing, context window,
790
- * thinking config, …) still resolves locally; only the streaming
791
- * dispatch is redirected.
792
- */
793
- transport?: "prometheus-native";
794
- /** Hint that websocket transport should be preferred when supported by the provider implementation. */
795
- preferWebsockets?: boolean;
796
- /** Preferred model to switch to when context promotion is triggered (model id or provider/id). */
797
- contextPromotionTarget?: string;
798
- /** Provider-assigned priority value (lower = higher priority). */
799
- priority?: number;
800
- /** Canonical thinking capability metadata for this model. */
801
- thinking?: ThinkingConfig;
802
- /** Compatibility overrides per API. If not set, auto-detected from baseUrl. */
803
- compat?: TApi extends "openai-completions" | "openai-responses" ? OpenAICompat : TApi extends "anthropic-messages" ? AnthropicCompat : never;
804
- /**
805
- * Which shape to use when exposing the Codex `apply_patch` tool to this model.
806
- * Generated catalog policy sets `"freeform"` for first-party GPT-5 Responses
807
- * models that support OpenAI custom tools with a Lark grammar. The freeform
808
- * variant sends a raw patch string with no JSON envelope.
809
- * - `"function"` or undefined: JSON function-tool with `{input: string}` (spec §1.2).
810
- */
811
- applyPatchToolType?: "freeform" | "function";
812
- /**
813
- * Force OAuth-style request shaping for providers whose API key prefix doesn't
814
- * match an OAuth token (e.g. routing Anthropic traffic through a proxy that
815
- * expects Claude Code framing). When true, the streaming layer sets
816
- * `options.isOAuth = true` for the underlying provider call.
817
- */
818
- isOAuth?: boolean;
819
- }
@@ -1,2 +1,16 @@
1
- import type { UsageProvider } from "../usage";
1
+ import type { CredentialRankingStrategy, UsageProvider } from "../usage";
2
2
  export declare const antigravityUsageProvider: UsageProvider;
3
+ /**
4
+ * Antigravity quotas reset daily and are returned per backend counter
5
+ * (Anthropic / Google / OpenAI) without a fixed "primary vs secondary"
6
+ * split. `fetchAntigravityUsage` already sorts `limits` ascending by
7
+ * `remainingFraction`; after model-family scoping, the most-pressured
8
+ * relevant counter is index 0.
9
+ *
10
+ * Leave `secondary` unset: AuthStorage compares secondary metrics before
11
+ * primary metrics, which is correct for providers with explicit long-window
12
+ * limits but wrong here. Ranking Antigravity by the bottleneck counter first
13
+ * avoids preferring an account at 95% Gemini / 0% Claude over one at
14
+ * 80% Gemini / 70% Claude.
15
+ */
16
+ export declare const antigravityRankingStrategy: CredentialRankingStrategy;
@@ -0,0 +1,79 @@
1
+ /**
2
+ * OpenAI Codex "saved rate limit reset" redemption client.
3
+ *
4
+ * OpenAI lets paid Codex accounts bank a usage-window reset and spend it on
5
+ * demand (announced 2026-06-11). The count is surfaced on `/wham/usage` as
6
+ * `rate_limit_reset_credits.available_count` (see `./openai-codex.ts`), but the
7
+ * actual credit objects and the redeem action live on two dedicated routes:
8
+ *
9
+ * GET /wham/rate-limit-reset-credits → list redeemable credits
10
+ * POST /wham/rate-limit-reset-credits/consume → spend one credit
11
+ * body: { credit_id, redeem_request_id }
12
+ *
13
+ * `redeem_request_id` is a client-generated idempotency key (UUID). The consume
14
+ * response carries a `code`: `"reset"` on success, otherwise a business reason
15
+ * (`already_redeemed`, `no_credit`, `nothing_to_reset`).
16
+ *
17
+ * These are thin, dependency-light functions so both the interactive session
18
+ * (the `/usage reset` command + auto-redeem) and any out-of-band tooling can
19
+ * share one wire contract.
20
+ */
21
+ import type { FetchImpl } from "../types";
22
+ /** A single redeemable (or already-spent) saved reset. */
23
+ export interface CodexResetCredit {
24
+ /** Opaque credit id, e.g. `RateLimitResetCredit_…`. Pass to {@link consumeCodexResetCredit}. */
25
+ id: string;
26
+ /** Backend reset family, e.g. `codex_rate_limits`. */
27
+ resetType?: string;
28
+ /** `available`, `redeemed`, … */
29
+ status?: string;
30
+ grantedAt?: string;
31
+ expiresAt?: string;
32
+ redeemStartedAt?: string | null;
33
+ redeemedAt?: string | null;
34
+ /** Human-facing card title, e.g. "One free rate limit reset". */
35
+ title?: string;
36
+ description?: string;
37
+ }
38
+ /** Result of listing an account's saved resets. */
39
+ export interface CodexResetCreditList {
40
+ credits: CodexResetCredit[];
41
+ /** Backend-reported count of credits redeemable right now. */
42
+ availableCount: number;
43
+ }
44
+ /**
45
+ * Consume outcome `code`. `reset` means a window was actually reset; the others
46
+ * are no-op business outcomes the caller should surface verbatim-ish to the user.
47
+ */
48
+ export type CodexResetConsumeCode = "reset" | "already_redeemed" | "no_credit" | "nothing_to_reset" | (string & {});
49
+ export interface CodexResetConsumeResult {
50
+ /** `true` only when `code === "reset"` (a reset was applied). */
51
+ ok: boolean;
52
+ code: CodexResetConsumeCode;
53
+ /** HTTP status of the consume call (for diagnostics). */
54
+ status: number;
55
+ raw?: unknown;
56
+ }
57
+ interface CodexResetAuth {
58
+ accessToken: string;
59
+ accountId?: string;
60
+ /** Provider base URL override; defaults to the Codex backend. */
61
+ baseUrl?: string;
62
+ fetch: FetchImpl;
63
+ signal?: AbortSignal;
64
+ }
65
+ /**
66
+ * List the account's saved rate-limit resets. Returns `null` on transport/auth
67
+ * failure (non-2xx or thrown), letting callers treat it the same as "no data".
68
+ */
69
+ export declare function listCodexResetCredits(auth: CodexResetAuth): Promise<CodexResetCreditList | null>;
70
+ /**
71
+ * Spend one saved reset. `redeemRequestId` is the idempotency key; one is
72
+ * generated when omitted, so retrying with the SAME id is safe and won't
73
+ * double-spend. The returned `code` is `"reset"` on success.
74
+ */
75
+ export declare function consumeCodexResetCredit(auth: CodexResetAuth & {
76
+ creditId: string;
77
+ redeemRequestId?: string;
78
+ }): Promise<CodexResetConsumeResult>;
79
+ export {};
@@ -1,3 +1,4 @@
1
1
  import type { CredentialRankingStrategy, UsageProvider } from "../usage";
2
+ export declare function normalizeCodexBaseUrl(baseUrl?: string): string;
2
3
  export declare const openaiCodexUsageProvider: UsageProvider;
3
4
  export declare const codexRankingStrategy: CredentialRankingStrategy;
@@ -4,8 +4,8 @@
4
4
  * Provides a normalized schema to represent multiple limit windows, model tiers,
5
5
  * and shared quotas across providers.
6
6
  */
7
- import * as z from "zod/v4";
8
- import type { Provider } from "./types";
7
+ import { z } from "zod/v4";
8
+ import type { FetchImpl, Provider } from "./types";
9
9
  export type UsageUnit = "percent" | "tokens" | "requests" | "usd" | "minutes" | "bytes" | "unknown";
10
10
  export type UsageStatus = "ok" | "warning" | "exhausted" | "unknown";
11
11
  /** Time window for a limit (e.g. 5h, 7d, monthly). */
@@ -57,14 +57,64 @@ export interface UsageLimit {
57
57
  status?: UsageStatus;
58
58
  notes?: string[];
59
59
  }
60
+ /**
61
+ * Saved/banked rate-limit resets an account can redeem on demand.
62
+ *
63
+ * Surfaced by providers that let users defer a usage-window reset and spend it
64
+ * later (OpenAI Codex "saved rate limit resets"). The redeem itself is a
65
+ * separate, provider-specific action; this is the read-only count for display.
66
+ */
67
+ export interface UsageResetCredits {
68
+ /** Number of resets available to redeem right now. */
69
+ availableCount: number;
70
+ }
60
71
  /** Aggregated usage report for a provider. */
61
72
  export interface UsageReport {
62
73
  provider: Provider;
63
74
  fetchedAt: number;
64
75
  limits: UsageLimit[];
76
+ /** Saved rate-limit resets the account can redeem, when the provider reports them. */
77
+ resetCredits?: UsageResetCredits;
65
78
  metadata?: Record<string, unknown>;
66
79
  raw?: unknown;
67
80
  }
81
+ /**
82
+ * Resolve a limit's used fraction (0..1; >1 means overage) from whichever
83
+ * amount fields the provider populated. Precedence mirrors the usage UIs:
84
+ * explicit fraction > used/limit > percent-unit used > inverted remaining.
85
+ */
86
+ export declare function resolveUsedFraction(limit: UsageLimit): number | undefined;
87
+ /**
88
+ * One recorded usage-limit snapshot: a single limit window of one account at
89
+ * a point in time. The usage cache itself is latest-snapshot-only; history
90
+ * rows are appended by the auth storage layer whenever a fresh report is
91
+ * fetched, so limit utilization stays inspectable over time.
92
+ */
93
+ export interface UsageHistoryEntry {
94
+ /** Epoch ms the report was fetched. */
95
+ recordedAt: number;
96
+ provider: Provider;
97
+ /** Stable credential identity key (account/email/project derived). */
98
+ accountKey: string;
99
+ email?: string;
100
+ accountId?: string;
101
+ /** {@link UsageLimit.id} of the recorded window. */
102
+ limitId: string;
103
+ /** Human label of the limit. */
104
+ label: string;
105
+ windowLabel?: string;
106
+ /** Used fraction (0..1) when resolvable. */
107
+ usedFraction?: number;
108
+ status?: UsageStatus;
109
+ /** Epoch ms the window resets, when known. */
110
+ resetsAt?: number;
111
+ }
112
+ /** Filter for reading recorded usage history. */
113
+ export interface UsageHistoryQuery {
114
+ provider?: string;
115
+ /** Inclusive lower bound on {@link UsageHistoryEntry.recordedAt} (epoch ms). */
116
+ sinceMs?: number;
117
+ }
68
118
  export declare const usageUnitSchema: z.ZodEnum<{
69
119
  bytes: "bytes";
70
120
  minutes: "minutes";
@@ -155,6 +205,9 @@ export declare const usageLimitSchema: z.ZodObject<{
155
205
  }>>;
156
206
  notes: z.ZodOptional<z.ZodArray<z.ZodString>>;
157
207
  }, z.core.$strip>;
208
+ export declare const usageResetCreditsSchema: z.ZodObject<{
209
+ availableCount: z.ZodNumber;
210
+ }, z.core.$strip>;
158
211
  export declare const usageReportSchema: z.ZodObject<{
159
212
  provider: z.ZodString;
160
213
  fetchedAt: z.ZodNumber;
@@ -201,6 +254,9 @@ export declare const usageReportSchema: z.ZodObject<{
201
254
  }>>;
202
255
  notes: z.ZodOptional<z.ZodArray<z.ZodString>>;
203
256
  }, z.core.$strip>>;
257
+ resetCredits: z.ZodOptional<z.ZodObject<{
258
+ availableCount: z.ZodNumber;
259
+ }, z.core.$strip>>;
204
260
  metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
205
261
  raw: z.ZodOptional<z.ZodUnknown>;
206
262
  }, z.core.$strip>;
@@ -231,7 +287,7 @@ export interface UsageFetchParams {
231
287
  }
232
288
  /** Shared runtime utilities for fetchers. */
233
289
  export interface UsageFetchContext {
234
- fetch: typeof fetch;
290
+ fetch: FetchImpl;
235
291
  logger?: UsageLogger;
236
292
  retryWait?: (delayMs: number, signal?: AbortSignal) => Promise<void>;
237
293
  }
@@ -243,13 +299,30 @@ export interface UsageProvider {
243
299
  parseRateLimitHeaders?(headers: Record<string, string>, now?: number): UsageReport | null;
244
300
  supports?(params: UsageFetchParams): boolean;
245
301
  }
302
+ /** Request context used when ranking usage for a specific model. */
303
+ export interface CredentialRankingContext {
304
+ /** Provider model id, when the caller is selecting a credential for one model. */
305
+ modelId?: string;
306
+ }
246
307
  /** Strategy for usage-based credential ranking. Providers implement this to opt into smart credential selection. */
247
308
  export interface CredentialRankingStrategy {
248
309
  /** Extract the primary (short) and secondary (long) window limits from a usage report. */
249
- findWindowLimits(report: UsageReport): {
310
+ findWindowLimits(report: UsageReport, context?: CredentialRankingContext): {
250
311
  primary?: UsageLimit;
251
312
  secondary?: UsageLimit;
252
313
  };
314
+ /**
315
+ * Restrict limits to the ones relevant for the requested model before
316
+ * credential-wide exhaustion checks and ranking. Providers with shared
317
+ * account-wide quotas can omit this and use all limits.
318
+ */
319
+ scopeLimits?(report: UsageReport, context?: CredentialRankingContext): UsageLimit[];
320
+ /**
321
+ * Return a provider-local backoff scope for the requested model. Providers
322
+ * with backend-specific quotas use this so one exhausted model family does
323
+ * not block unrelated families on the same OAuth credential.
324
+ */
325
+ blockScope?(context?: CredentialRankingContext): string | undefined;
253
326
  /** Fallback window durations (ms) when limits don't specify durationMs. */
254
327
  windowDefaults: {
255
328
  primaryMs: number;