@prometheus-ai/ai 0.5.4 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/dist/types/auth-broker/remote-store.d.ts +2 -1
  2. package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
  3. package/dist/types/auth-gateway/server.d.ts +19 -0
  4. package/dist/types/auth-gateway/types.d.ts +9 -3
  5. package/dist/types/auth-retry.d.ts +119 -0
  6. package/dist/types/auth-storage.d.ts +217 -8
  7. package/dist/types/errors.d.ts +24 -0
  8. package/dist/types/index.d.ts +5 -9
  9. package/dist/types/provider-details.d.ts +1 -1
  10. package/dist/types/providers/amazon-bedrock.d.ts +12 -6
  11. package/dist/types/providers/anthropic-client.d.ts +10 -3
  12. package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
  13. package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
  14. package/dist/types/providers/anthropic-wire.d.ts +3 -3
  15. package/dist/types/providers/anthropic.d.ts +41 -34
  16. package/dist/types/providers/aws-credentials.d.ts +8 -0
  17. package/dist/types/providers/azure-openai-responses.d.ts +1 -0
  18. package/dist/types/providers/google-gemini-cli.d.ts +22 -1
  19. package/dist/types/providers/google-shared.d.ts +22 -0
  20. package/dist/types/providers/google-types.d.ts +13 -1
  21. package/dist/types/providers/mock.d.ts +8 -3
  22. package/dist/types/providers/ollama.d.ts +6 -0
  23. package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
  24. package/dist/types/providers/openai-chat-server.d.ts +3 -3
  25. package/dist/types/providers/openai-chat-wire.d.ts +644 -0
  26. package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
  27. package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
  28. package/dist/types/providers/openai-codex-responses.d.ts +31 -2
  29. package/dist/types/providers/openai-completions-compat.d.ts +2 -25
  30. package/dist/types/providers/openai-completions.d.ts +2 -10
  31. package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
  32. package/dist/types/providers/openai-responses-server.d.ts +2 -2
  33. package/dist/types/providers/openai-responses-shared.d.ts +49 -9
  34. package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
  35. package/dist/types/providers/openai-responses.d.ts +13 -4
  36. package/dist/types/providers/prometheus-native-client.d.ts +9 -0
  37. package/dist/types/providers/prometheus-native-server.d.ts +4 -3
  38. package/dist/types/providers/transform-messages.d.ts +1 -2
  39. package/dist/types/rate-limit-utils.d.ts +3 -2
  40. package/dist/types/registry/aimlapi.d.ts +4 -0
  41. package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
  42. package/dist/types/registry/amazon-bedrock.d.ts +5 -0
  43. package/dist/types/registry/anthropic.d.ts +10 -0
  44. package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
  45. package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
  46. package/dist/types/registry/cerebras.d.ts +7 -0
  47. package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
  48. package/dist/types/registry/cursor.d.ts +7 -0
  49. package/dist/types/registry/deepseek.d.ts +8 -0
  50. package/dist/types/registry/derived.d.ts +5 -0
  51. package/dist/types/registry/firepass.d.ts +16 -0
  52. package/dist/types/registry/fireworks.d.ts +7 -0
  53. package/dist/types/registry/github-copilot.d.ts +7 -0
  54. package/dist/types/registry/gitlab-duo.d.ts +9 -0
  55. package/dist/types/registry/google-antigravity.d.ts +9 -0
  56. package/dist/types/registry/google-gemini-cli.d.ts +9 -0
  57. package/dist/types/registry/google-vertex.d.ts +5 -0
  58. package/dist/types/registry/google.d.ts +4 -0
  59. package/dist/types/registry/groq.d.ts +4 -0
  60. package/dist/types/registry/huggingface.d.ts +7 -0
  61. package/dist/types/registry/index.d.ts +4 -0
  62. package/dist/types/registry/kagi.d.ts +14 -0
  63. package/dist/types/registry/kilo.d.ts +7 -0
  64. package/dist/types/registry/kimi-code.d.ts +7 -0
  65. package/dist/types/registry/litellm.d.ts +13 -0
  66. package/dist/types/registry/lm-studio.d.ts +8 -0
  67. package/dist/types/registry/minimax-code-cn.d.ts +6 -0
  68. package/dist/types/registry/minimax-code.d.ts +6 -0
  69. package/dist/types/registry/minimax.d.ts +4 -0
  70. package/dist/types/registry/mistral.d.ts +4 -0
  71. package/dist/types/registry/moonshot.d.ts +7 -0
  72. package/dist/types/registry/nanogpt.d.ts +7 -0
  73. package/dist/types/registry/nvidia.d.ts +7 -0
  74. package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
  75. package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
  76. package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
  77. package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
  78. package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
  79. package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
  80. package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
  81. package/dist/types/registry/ollama-cloud.d.ts +7 -0
  82. package/dist/types/registry/ollama.d.ts +12 -0
  83. package/dist/types/registry/openai-codex-device.d.ts +8 -0
  84. package/dist/types/registry/openai-codex.d.ts +9 -0
  85. package/dist/types/registry/openai.d.ts +4 -0
  86. package/dist/types/registry/opencode-go.d.ts +6 -0
  87. package/dist/types/registry/opencode-zen.d.ts +6 -0
  88. package/dist/types/registry/openrouter.d.ts +13 -0
  89. package/dist/types/registry/parallel.d.ts +14 -0
  90. package/dist/types/registry/perplexity.d.ts +7 -0
  91. package/dist/types/registry/qianfan.d.ts +7 -0
  92. package/dist/types/registry/qwen-portal.d.ts +7 -0
  93. package/dist/types/registry/registry.d.ts +272 -0
  94. package/dist/types/registry/synthetic.d.ts +6 -0
  95. package/dist/types/registry/tavily.d.ts +14 -0
  96. package/dist/types/registry/together.d.ts +6 -0
  97. package/dist/types/registry/types.d.ts +51 -0
  98. package/dist/types/registry/venice.d.ts +13 -0
  99. package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
  100. package/dist/types/registry/vllm.d.ts +7 -0
  101. package/dist/types/registry/wafer-pass.d.ts +6 -0
  102. package/dist/types/registry/wafer-serverless.d.ts +6 -0
  103. package/dist/types/registry/xai-oauth.d.ts +7 -0
  104. package/dist/types/registry/xai.d.ts +4 -0
  105. package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
  106. package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
  107. package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
  108. package/dist/types/registry/xiaomi.d.ts +6 -0
  109. package/dist/types/registry/zai.d.ts +7 -0
  110. package/dist/types/registry/zenmux.d.ts +7 -0
  111. package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
  112. package/dist/types/stream.d.ts +9 -1
  113. package/dist/types/types.d.ts +56 -295
  114. package/dist/types/usage/google-antigravity.d.ts +15 -1
  115. package/dist/types/usage/openai-codex-reset.d.ts +79 -0
  116. package/dist/types/usage/openai-codex.d.ts +1 -0
  117. package/dist/types/usage.d.ts +77 -4
  118. package/dist/types/utils/abort.d.ts +6 -0
  119. package/dist/types/utils/event-stream.d.ts +2 -0
  120. package/dist/types/utils/http-inspector.d.ts +0 -1
  121. package/dist/types/utils/idle-iterator.d.ts +35 -0
  122. package/dist/types/utils/openai-http.d.ts +58 -0
  123. package/dist/types/utils/request-debug.d.ts +3 -0
  124. package/dist/types/utils/retry-after.d.ts +1 -0
  125. package/dist/types/utils/schema/fields.d.ts +5 -0
  126. package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
  127. package/dist/types/utils/schema/stamps.d.ts +7 -15
  128. package/dist/types/utils/sse-debug.d.ts +0 -5
  129. package/dist/types/utils/stream-markup-healing.d.ts +2 -0
  130. package/dist/types/utils.d.ts +1 -5
  131. package/package.json +17 -29
  132. package/src/auth-broker/remote-store.ts +10 -1
  133. package/src/auth-broker/snapshot-cache.ts +1 -1
  134. package/src/auth-broker/wire-schemas.ts +1 -1
  135. package/src/auth-gateway/http.ts +1 -1
  136. package/src/auth-gateway/server.ts +95 -30
  137. package/src/auth-gateway/types.ts +10 -2
  138. package/src/auth-retry.ts +238 -0
  139. package/src/auth-storage.ts +935 -430
  140. package/src/errors.ts +32 -0
  141. package/src/index.ts +9 -14
  142. package/src/provider-details.ts +1 -1
  143. package/src/providers/__tests__/google-auth.test.ts +144 -0
  144. package/src/providers/amazon-bedrock.ts +70 -40
  145. package/src/providers/anthropic-client.ts +15 -13
  146. package/src/providers/anthropic-messages-server-schema.ts +17 -7
  147. package/src/providers/anthropic-messages-server.ts +88 -20
  148. package/src/providers/anthropic-wire.ts +4 -3
  149. package/src/providers/anthropic.ts +1234 -621
  150. package/src/providers/aws-credentials.ts +47 -5
  151. package/src/providers/aws-eventstream.ts +5 -0
  152. package/src/providers/azure-openai-responses.ts +117 -67
  153. package/src/providers/cursor.ts +30 -30
  154. package/src/providers/github-copilot-headers.ts +1 -1
  155. package/src/providers/gitlab-duo.ts +36 -29
  156. package/src/providers/google-auth.ts +71 -8
  157. package/src/providers/google-gemini-cli.ts +118 -22
  158. package/src/providers/google-shared.ts +163 -43
  159. package/src/providers/google-types.ts +10 -1
  160. package/src/providers/kimi.ts +1 -1
  161. package/src/providers/mock.ts +11 -3
  162. package/src/providers/ollama.ts +64 -7
  163. package/src/providers/openai-anthropic-shim.ts +17 -8
  164. package/src/providers/openai-chat-server-schema.ts +9 -3
  165. package/src/providers/openai-chat-server.ts +82 -16
  166. package/src/providers/openai-chat-wire.ts +847 -0
  167. package/src/providers/openai-codex/request-transformer.ts +129 -34
  168. package/src/providers/openai-codex/response-handler.ts +22 -1
  169. package/src/providers/openai-codex-responses.ts +699 -247
  170. package/src/providers/openai-completions-compat.ts +8 -308
  171. package/src/providers/openai-completions.ts +416 -267
  172. package/src/providers/openai-responses-server-schema.ts +15 -9
  173. package/src/providers/openai-responses-server.ts +162 -114
  174. package/src/providers/openai-responses-shared.ts +320 -82
  175. package/src/providers/openai-responses-wire.ts +6391 -0
  176. package/src/providers/openai-responses.ts +382 -176
  177. package/src/providers/prometheus-native-client.ts +27 -11
  178. package/src/providers/prometheus-native-server.ts +44 -17
  179. package/src/providers/transform-messages.ts +311 -120
  180. package/src/providers/vision-guard.ts +5 -3
  181. package/src/rate-limit-utils.ts +13 -3
  182. package/src/registry/aimlapi.ts +6 -0
  183. package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
  184. package/src/registry/amazon-bedrock.ts +22 -0
  185. package/src/registry/anthropic.ts +26 -0
  186. package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
  187. package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
  188. package/src/{utils/oauth → registry}/cerebras.ts +8 -1
  189. package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
  190. package/src/registry/cursor.ts +20 -0
  191. package/src/{utils/oauth → registry}/deepseek.ts +9 -17
  192. package/src/registry/derived.ts +9 -0
  193. package/src/{utils/oauth → registry}/firepass.ts +10 -2
  194. package/src/{utils/oauth → registry}/fireworks.ts +8 -1
  195. package/src/registry/github-copilot.ts +22 -0
  196. package/src/registry/gitlab-duo.ts +19 -0
  197. package/src/registry/google-antigravity.ts +21 -0
  198. package/src/registry/google-gemini-cli.ts +21 -0
  199. package/src/registry/google-vertex.ts +38 -0
  200. package/src/registry/google.ts +6 -0
  201. package/src/registry/groq.ts +6 -0
  202. package/src/{utils/oauth → registry}/huggingface.ts +8 -19
  203. package/src/registry/index.ts +4 -0
  204. package/src/{utils/oauth → registry}/kagi.ts +9 -11
  205. package/src/{utils/oauth → registry}/kilo.ts +11 -6
  206. package/src/registry/kimi-code.ts +17 -0
  207. package/src/{utils/oauth → registry}/litellm.ts +8 -12
  208. package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
  209. package/src/registry/minimax-code-cn.ts +12 -0
  210. package/src/registry/minimax-code.ts +12 -0
  211. package/src/registry/minimax.ts +6 -0
  212. package/src/registry/mistral.ts +6 -0
  213. package/src/{utils/oauth → registry}/moonshot.ts +8 -9
  214. package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
  215. package/src/{utils/oauth → registry}/nvidia.ts +8 -18
  216. package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
  217. package/src/{utils → registry}/oauth/anthropic.ts +38 -17
  218. package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
  219. package/src/registry/oauth/gitlab-duo.ts +198 -0
  220. package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
  221. package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
  222. package/src/registry/oauth/index.ts +164 -0
  223. package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
  224. package/src/{utils → registry}/oauth/types.ts +7 -51
  225. package/src/{utils → registry}/oauth/wafer.ts +1 -1
  226. package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
  227. package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
  228. package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
  229. package/src/{utils/oauth → registry}/ollama.ts +8 -13
  230. package/src/registry/openai-codex-device.ts +18 -0
  231. package/src/registry/openai-codex.ts +19 -0
  232. package/src/registry/openai.ts +6 -0
  233. package/src/registry/opencode-go.ts +12 -0
  234. package/src/registry/opencode-zen.ts +12 -0
  235. package/src/{utils/oauth → registry}/openrouter.ts +10 -2
  236. package/src/{utils/oauth → registry}/parallel.ts +9 -11
  237. package/src/registry/perplexity.ts +13 -0
  238. package/src/{utils/oauth → registry}/qianfan.ts +8 -17
  239. package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
  240. package/src/registry/registry.ts +149 -0
  241. package/src/{utils/oauth → registry}/synthetic.ts +7 -1
  242. package/src/{utils/oauth → registry}/tavily.ts +10 -12
  243. package/src/{utils/oauth → registry}/together.ts +7 -1
  244. package/src/registry/types.ts +56 -0
  245. package/src/{utils/oauth → registry}/venice.ts +8 -12
  246. package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
  247. package/src/{utils/oauth → registry}/vllm.ts +9 -16
  248. package/src/registry/wafer-pass.ts +12 -0
  249. package/src/registry/wafer-serverless.ts +12 -0
  250. package/src/registry/xai-oauth.ts +17 -0
  251. package/src/registry/xai.ts +6 -0
  252. package/src/registry/xiaomi-token-plan-ams.ts +12 -0
  253. package/src/registry/xiaomi-token-plan-cn.ts +12 -0
  254. package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
  255. package/src/registry/xiaomi.ts +12 -0
  256. package/src/{utils/oauth → registry}/zai.ts +10 -22
  257. package/src/{utils/oauth → registry}/zenmux.ts +8 -1
  258. package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
  259. package/src/stream.ts +229 -199
  260. package/src/types.ts +63 -384
  261. package/src/usage/claude.ts +4 -2
  262. package/src/usage/github-copilot.ts +4 -2
  263. package/src/usage/google-antigravity.ts +196 -28
  264. package/src/usage/kimi.ts +1 -1
  265. package/src/usage/minimax-code.ts +5 -6
  266. package/src/usage/openai-codex-reset.ts +174 -0
  267. package/src/usage/openai-codex.ts +19 -2
  268. package/src/usage/zai.ts +2 -1
  269. package/src/usage.ts +93 -4
  270. package/src/utils/abort.ts +14 -0
  271. package/src/utils/event-stream.ts +17 -0
  272. package/src/utils/http-inspector.ts +4 -12
  273. package/src/utils/idle-iterator.ts +250 -79
  274. package/src/utils/openai-http.ts +157 -0
  275. package/src/utils/request-debug.ts +67 -19
  276. package/src/utils/retry-after.ts +1 -1
  277. package/src/utils/retry.ts +23 -2
  278. package/src/utils/schema/CONSTRAINTS.md +4 -2
  279. package/src/utils/schema/fields.ts +16 -0
  280. package/src/utils/schema/json-schema-validator.ts +19 -1
  281. package/src/utils/schema/normalize.ts +80 -8
  282. package/src/utils/schema/stamps.ts +22 -10
  283. package/src/utils/schema/wire.ts +2 -2
  284. package/src/utils/sse-debug.ts +0 -271
  285. package/src/utils/stream-markup-healing.ts +50 -8
  286. package/src/utils/validation.ts +49 -13
  287. package/src/utils.ts +2 -26
  288. package/dist/types/model-cache.d.ts +0 -17
  289. package/dist/types/model-manager.d.ts +0 -64
  290. package/dist/types/model-thinking.d.ts +0 -100
  291. package/dist/types/models.d.ts +0 -12
  292. package/dist/types/provider-models/bundled-references.d.ts +0 -4
  293. package/dist/types/provider-models/descriptors.d.ts +0 -50
  294. package/dist/types/provider-models/google.d.ts +0 -24
  295. package/dist/types/provider-models/index.d.ts +0 -5
  296. package/dist/types/provider-models/ollama.d.ts +0 -7
  297. package/dist/types/provider-models/openai-compat.d.ts +0 -323
  298. package/dist/types/provider-models/special.d.ts +0 -16
  299. package/dist/types/utils/discovery/antigravity.d.ts +0 -61
  300. package/dist/types/utils/discovery/codex.d.ts +0 -38
  301. package/dist/types/utils/discovery/cursor.d.ts +0 -23
  302. package/dist/types/utils/discovery/gemini.d.ts +0 -25
  303. package/dist/types/utils/discovery/index.d.ts +0 -4
  304. package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
  305. package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
  306. package/dist/types/utils/oauth/cerebras.d.ts +0 -1
  307. package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
  308. package/dist/types/utils/oauth/deepseek.d.ts +0 -10
  309. package/dist/types/utils/oauth/firepass.d.ts +0 -1
  310. package/dist/types/utils/oauth/fireworks.d.ts +0 -1
  311. package/dist/types/utils/oauth/huggingface.d.ts +0 -19
  312. package/dist/types/utils/oauth/kagi.d.ts +0 -17
  313. package/dist/types/utils/oauth/kilo.d.ts +0 -5
  314. package/dist/types/utils/oauth/litellm.d.ts +0 -18
  315. package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
  316. package/dist/types/utils/oauth/moonshot.d.ts +0 -1
  317. package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
  318. package/dist/types/utils/oauth/nvidia.d.ts +0 -18
  319. package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
  320. package/dist/types/utils/oauth/ollama.d.ts +0 -18
  321. package/dist/types/utils/oauth/openrouter.d.ts +0 -1
  322. package/dist/types/utils/oauth/parallel.d.ts +0 -17
  323. package/dist/types/utils/oauth/qianfan.d.ts +0 -17
  324. package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
  325. package/dist/types/utils/oauth/synthetic.d.ts +0 -1
  326. package/dist/types/utils/oauth/tavily.d.ts +0 -17
  327. package/dist/types/utils/oauth/together.d.ts +0 -1
  328. package/dist/types/utils/oauth/venice.d.ts +0 -18
  329. package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
  330. package/dist/types/utils/oauth/vllm.d.ts +0 -16
  331. package/dist/types/utils/oauth/zai.d.ts +0 -18
  332. package/dist/types/utils/oauth/zenmux.d.ts +0 -1
  333. package/dist/types/utils/oauth/zhipu.d.ts +0 -18
  334. package/src/model-cache.ts +0 -129
  335. package/src/model-manager.ts +0 -469
  336. package/src/model-thinking.ts +0 -756
  337. package/src/models.json +0 -60287
  338. package/src/models.json.d.ts +0 -9
  339. package/src/models.ts +0 -56
  340. package/src/provider-models/bundled-references.ts +0 -38
  341. package/src/provider-models/descriptors.ts +0 -364
  342. package/src/provider-models/google.ts +0 -88
  343. package/src/provider-models/index.ts +0 -5
  344. package/src/provider-models/ollama.ts +0 -153
  345. package/src/provider-models/openai-compat.ts +0 -2904
  346. package/src/provider-models/special.ts +0 -67
  347. package/src/utils/discovery/antigravity.ts +0 -261
  348. package/src/utils/discovery/codex.ts +0 -371
  349. package/src/utils/discovery/cursor.ts +0 -306
  350. package/src/utils/discovery/gemini.ts +0 -248
  351. package/src/utils/discovery/index.ts +0 -4
  352. package/src/utils/discovery/openai-compatible.ts +0 -224
  353. package/src/utils/oauth/gitlab-duo.ts +0 -123
  354. package/src/utils/oauth/index.ts +0 -502
  355. /package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
  356. /package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
  357. /package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
  358. /package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
  359. /package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
  360. /package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
  361. /package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
  362. /package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
  363. /package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
  364. /package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
  365. /package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
  366. /package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
  367. /package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
  368. /package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
  369. /package/src/{utils → registry}/oauth/callback-server.ts +0 -0
  370. /package/src/{utils → registry}/oauth/cursor.ts +0 -0
  371. /package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
  372. /package/src/{utils → registry}/oauth/kimi.ts +0 -0
  373. /package/src/{utils → registry}/oauth/oauth.html +0 -0
  374. /package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
  375. /package/src/{utils → registry}/oauth/opencode.ts +0 -0
  376. /package/src/{utils → registry}/oauth/perplexity.ts +0 -0
  377. /package/src/{utils → registry}/oauth/pkce.ts +0 -0
package/src/usage.ts CHANGED
@@ -4,8 +4,8 @@
4
4
  * Provides a normalized schema to represent multiple limit windows, model tiers,
5
5
  * and shared quotas across providers.
6
6
  */
7
- import * as z from "zod/v4";
8
- import type { Provider } from "./types";
7
+ import { z } from "zod/v4";
8
+ import type { FetchImpl, Provider } from "./types";
9
9
  export type UsageUnit = "percent" | "tokens" | "requests" | "usd" | "minutes" | "bytes" | "unknown";
10
10
 
11
11
  export type UsageStatus = "ok" | "warning" | "exhausted" | "unknown";
@@ -63,15 +63,78 @@ export interface UsageLimit {
63
63
  notes?: string[];
64
64
  }
65
65
 
66
+ /**
67
+ * Saved/banked rate-limit resets an account can redeem on demand.
68
+ *
69
+ * Surfaced by providers that let users defer a usage-window reset and spend it
70
+ * later (OpenAI Codex "saved rate limit resets"). The redeem itself is a
71
+ * separate, provider-specific action; this is the read-only count for display.
72
+ */
73
+ export interface UsageResetCredits {
74
+ /** Number of resets available to redeem right now. */
75
+ availableCount: number;
76
+ }
77
+
66
78
  /** Aggregated usage report for a provider. */
67
79
  export interface UsageReport {
68
80
  provider: Provider;
69
81
  fetchedAt: number;
70
82
  limits: UsageLimit[];
83
+ /** Saved rate-limit resets the account can redeem, when the provider reports them. */
84
+ resetCredits?: UsageResetCredits;
71
85
  metadata?: Record<string, unknown>;
72
86
  raw?: unknown;
73
87
  }
74
88
 
89
+ /**
90
+ * Resolve a limit's used fraction (0..1; >1 means overage) from whichever
91
+ * amount fields the provider populated. Precedence mirrors the usage UIs:
92
+ * explicit fraction > used/limit > percent-unit used > inverted remaining.
93
+ */
94
+ export function resolveUsedFraction(limit: UsageLimit): number | undefined {
95
+ const amount = limit.amount;
96
+ if (amount.usedFraction !== undefined) return amount.usedFraction;
97
+ if (amount.used !== undefined && amount.limit !== undefined && amount.limit > 0) {
98
+ return amount.used / amount.limit;
99
+ }
100
+ if (amount.unit === "percent" && amount.used !== undefined) return amount.used / 100;
101
+ if (amount.remainingFraction !== undefined) return Math.max(0, 1 - amount.remainingFraction);
102
+ return undefined;
103
+ }
104
+
105
+ /**
106
+ * One recorded usage-limit snapshot: a single limit window of one account at
107
+ * a point in time. The usage cache itself is latest-snapshot-only; history
108
+ * rows are appended by the auth storage layer whenever a fresh report is
109
+ * fetched, so limit utilization stays inspectable over time.
110
+ */
111
+ export interface UsageHistoryEntry {
112
+ /** Epoch ms the report was fetched. */
113
+ recordedAt: number;
114
+ provider: Provider;
115
+ /** Stable credential identity key (account/email/project derived). */
116
+ accountKey: string;
117
+ email?: string;
118
+ accountId?: string;
119
+ /** {@link UsageLimit.id} of the recorded window. */
120
+ limitId: string;
121
+ /** Human label of the limit. */
122
+ label: string;
123
+ windowLabel?: string;
124
+ /** Used fraction (0..1) when resolvable. */
125
+ usedFraction?: number;
126
+ status?: UsageStatus;
127
+ /** Epoch ms the window resets, when known. */
128
+ resetsAt?: number;
129
+ }
130
+
131
+ /** Filter for reading recorded usage history. */
132
+ export interface UsageHistoryQuery {
133
+ provider?: string;
134
+ /** Inclusive lower bound on {@link UsageHistoryEntry.recordedAt} (epoch ms). */
135
+ sinceMs?: number;
136
+ }
137
+
75
138
  // ─── Zod schemas (wire-shape validation for the broker `/v1/usage` endpoint) ─
76
139
 
77
140
  export const usageUnitSchema = z.enum(["percent", "tokens", "requests", "usd", "minutes", "bytes", "unknown"]);
@@ -114,10 +177,15 @@ export const usageLimitSchema = z.object({
114
177
  notes: z.array(z.string()).optional(),
115
178
  });
116
179
 
180
+ export const usageResetCreditsSchema = z.object({
181
+ availableCount: z.number(),
182
+ });
183
+
117
184
  export const usageReportSchema = z.object({
118
185
  provider: z.string(),
119
186
  fetchedAt: z.number(),
120
187
  limits: z.array(usageLimitSchema),
188
+ resetCredits: usageResetCreditsSchema.optional(),
121
189
  metadata: z.record(z.string(), z.unknown()).optional(),
122
190
  // `raw` is provider-specific and may be anything; the broker strips it before
123
191
  // sending the report over the wire, so accept-but-ignore here.
@@ -154,7 +222,7 @@ export interface UsageFetchParams {
154
222
 
155
223
  /** Shared runtime utilities for fetchers. */
156
224
  export interface UsageFetchContext {
157
- fetch: typeof fetch;
225
+ fetch: FetchImpl;
158
226
  logger?: UsageLogger;
159
227
  retryWait?: (delayMs: number, signal?: AbortSignal) => Promise<void>;
160
228
  }
@@ -168,13 +236,34 @@ export interface UsageProvider {
168
236
  supports?(params: UsageFetchParams): boolean;
169
237
  }
170
238
 
239
+ /** Request context used when ranking usage for a specific model. */
240
+ export interface CredentialRankingContext {
241
+ /** Provider model id, when the caller is selecting a credential for one model. */
242
+ modelId?: string;
243
+ }
244
+
171
245
  /** Strategy for usage-based credential ranking. Providers implement this to opt into smart credential selection. */
172
246
  export interface CredentialRankingStrategy {
173
247
  /** Extract the primary (short) and secondary (long) window limits from a usage report. */
174
- findWindowLimits(report: UsageReport): {
248
+ findWindowLimits(
249
+ report: UsageReport,
250
+ context?: CredentialRankingContext,
251
+ ): {
175
252
  primary?: UsageLimit;
176
253
  secondary?: UsageLimit;
177
254
  };
255
+ /**
256
+ * Restrict limits to the ones relevant for the requested model before
257
+ * credential-wide exhaustion checks and ranking. Providers with shared
258
+ * account-wide quotas can omit this and use all limits.
259
+ */
260
+ scopeLimits?(report: UsageReport, context?: CredentialRankingContext): UsageLimit[];
261
+ /**
262
+ * Return a provider-local backoff scope for the requested model. Providers
263
+ * with backend-specific quotas use this so one exhausted model family does
264
+ * not block unrelated families on the same OAuth credential.
265
+ */
266
+ blockScope?(context?: CredentialRankingContext): string | undefined;
178
267
  /** Fallback window durations (ms) when limits don't specify durationMs. */
179
268
  windowDefaults: {
180
269
  primaryMs: number;
@@ -49,3 +49,17 @@ export function createAbortSourceTracker(callerSignal?: AbortSignal): AbortSourc
49
49
  },
50
50
  };
51
51
  }
52
+
53
+ /**
54
+ * Race a shared promise against a caller's AbortSignal without coupling the
55
+ * underlying work to that signal. The shared promise keeps running (and caches
56
+ * its result) even when an individual caller bails out.
57
+ */
58
+ export function raceWithSignal<T>(promise: Promise<T>, signal: AbortSignal | undefined): Promise<T> {
59
+ if (!signal) return promise;
60
+ if (signal.aborted) return Promise.reject(signal.reason ?? new Error("Request was aborted"));
61
+ const { promise: aborted, reject } = Promise.withResolvers<never>();
62
+ const onAbort = () => reject(signal.reason ?? new Error("Request was aborted"));
63
+ signal.addEventListener("abort", onAbort, { once: true });
64
+ return Promise.race([promise, aborted]).finally(() => signal.removeEventListener("abort", onAbort));
65
+ }
@@ -5,6 +5,8 @@ export class EventStream<T, R = T> implements AsyncIterable<T> {
5
5
  queue: T[] = [];
6
6
  waiting: Array<{ resolve: (value: IteratorResult<T>) => void; reject: (err: unknown) => void }> = [];
7
7
  done = false;
8
+ /** True once finalResultPromise has been resolved or rejected. */
9
+ resultSettled = false;
8
10
  #failed = false;
9
11
  #error: unknown = undefined;
10
12
  finalResultPromise: Promise<R>;
@@ -30,6 +32,7 @@ export class EventStream<T, R = T> implements AsyncIterable<T> {
30
32
 
31
33
  if (this.isComplete(event)) {
32
34
  this.done = true;
35
+ this.resultSettled = true;
33
36
  this.resolveFinalResult(this.extractResult(event));
34
37
  }
35
38
 
@@ -54,7 +57,13 @@ export class EventStream<T, R = T> implements AsyncIterable<T> {
54
57
  end(result?: R): void {
55
58
  this.done = true;
56
59
  if (result !== undefined) {
60
+ this.resultSettled = true;
57
61
  this.resolveFinalResult(result);
62
+ } else if (!this.resultSettled) {
63
+ // end() without a terminal value must still settle result() —
64
+ // otherwise complete()/result() awaits hang forever.
65
+ this.resultSettled = true;
66
+ this.rejectFinalResult(new Error("Stream ended without a final result"));
58
67
  }
59
68
  // Notify all waiting consumers that we're done
60
69
  while (this.waiting.length > 0) {
@@ -75,6 +84,7 @@ export class EventStream<T, R = T> implements AsyncIterable<T> {
75
84
  this.done = true;
76
85
  this.#failed = true;
77
86
  this.#error = err;
87
+ this.resultSettled = true;
78
88
  this.rejectFinalResult(err);
79
89
  while (this.waiting.length > 0) {
80
90
  const waiter = this.waiting.shift()!;
@@ -126,6 +136,7 @@ export class AssistantMessageEventStream extends EventStream<AssistantMessageEve
126
136
  // Completion resolves the final result and still emits the terminal event.
127
137
  if (this.isComplete(event)) {
128
138
  this.done = true;
139
+ this.resultSettled = true;
129
140
  this.resolveFinalResult(this.extractResult(event));
130
141
  }
131
142
 
@@ -135,7 +146,13 @@ export class AssistantMessageEventStream extends EventStream<AssistantMessageEve
135
146
  override end(result?: AssistantMessage): void {
136
147
  this.done = true;
137
148
  if (result !== undefined) {
149
+ this.resultSettled = true;
138
150
  this.resolveFinalResult(result);
151
+ } else if (!this.resultSettled) {
152
+ // Mirror the base class: a result-less end() must not leave
153
+ // result() pending forever.
154
+ this.resultSettled = true;
155
+ this.rejectFinalResult(new Error("Stream ended without a final result"));
139
156
  }
140
157
  this.endWaiting();
141
158
  }
@@ -1,5 +1,5 @@
1
1
  import * as path from "node:path";
2
- import { extractHttpStatusFromError, getLogsDir } from "@prometheus-ai/utils";
2
+ import { extractHttpStatusFromError, getLogsDir, isBunTestRuntime } from "@prometheus-ai/utils";
3
3
  import { isCopilotTransientModelError } from "./retry.js";
4
4
  import { formatErrorMessageWithRetryAfter } from "./retry-after.js";
5
5
 
@@ -20,10 +20,6 @@ export type CapturedHttpErrorResponse = {
20
20
  bodyJson?: unknown;
21
21
  };
22
22
 
23
- type ErrorWithStatus = {
24
- status?: unknown;
25
- };
26
-
27
23
  const SENSITIVE_HEADERS = ["authorization", "x-api-key", "api-key", "cookie", "set-cookie", "proxy-authorization"];
28
24
 
29
25
  export async function appendRawHttpRequestDumpFor400(
@@ -31,7 +27,9 @@ export async function appendRawHttpRequestDumpFor400(
31
27
  error: unknown,
32
28
  dump: RawHttpRequestDump | undefined,
33
29
  ): Promise<string> {
34
- if (!dump || extractHttpStatusFromError(error) !== 400) {
30
+ // Never persist dumps under the test runner: providers exercise the 400 path
31
+ // with mocked fetch responses, which would otherwise litter the real ~/.prometheus logs.
32
+ if (!dump || isBunTestRuntime() || extractHttpStatusFromError(error) !== 400) {
35
33
  return message;
36
34
  }
37
35
 
@@ -65,12 +63,6 @@ export async function finalizeErrorMessage(
65
63
  return appendRawHttpRequestDumpFor400(message, error, rawRequestDump);
66
64
  }
67
65
 
68
- export function withHttpStatus(error: unknown, status: number): Error {
69
- const wrapped = error instanceof Error ? error : new Error(String(error));
70
- (wrapped as ErrorWithStatus).status = status;
71
- return wrapped;
72
- }
73
-
74
66
  /**
75
67
  * Rewrite error message for GitHub Copilot request failures.
76
68
  * Must run AFTER finalizeErrorMessage since it replaces the message entirely.
@@ -2,6 +2,8 @@ import { $env } from "@prometheus-ai/utils";
2
2
 
3
3
  const DEFAULT_STREAM_IDLE_TIMEOUT_MS = 120_000;
4
4
  const DEFAULT_STREAM_FIRST_EVENT_TIMEOUT_MS = 100_000;
5
+ /** Re-mint persistent race promises every N iterations (see hoisted-racer comment). */
6
+ const RACER_REMINT_INTERVAL = 1024;
5
7
 
6
8
  function normalizeIdleTimeoutMs(value: string | undefined, fallback: number): number | undefined {
7
9
  if (value === undefined) return fallback;
@@ -130,8 +132,11 @@ export async function* iterateWithIdleTimeout<T>(
130
132
  firstItemTimeoutMs !== undefined && firstItemTimeoutMs > 0 ? Date.now() + firstItemTimeoutMs : undefined;
131
133
  const abortSignal = options.abortSignal;
132
134
  const iterator = iterable[Symbol.asyncIterator]();
135
+ let iteratorClosed = false;
133
136
 
134
137
  const closeIterator = (): void => {
138
+ if (iteratorClosed) return;
139
+ iteratorClosed = true;
135
140
  const returnPromise = iterator.return?.();
136
141
  if (returnPromise) {
137
142
  void returnPromise.catch(() => {});
@@ -167,100 +172,266 @@ export async function* iterateWithIdleTimeout<T>(
167
172
  (firstItemTimeoutMs === undefined || firstItemTimeoutMs <= 0) &&
168
173
  (options.idleTimeoutMs === undefined || options.idleTimeoutMs <= 0);
169
174
 
170
- while (true) {
171
- let activeTimeoutMs: number | undefined;
172
- if (awaitingFirstItem) {
173
- if (firstItemDeadlineMs !== undefined) {
174
- activeTimeoutMs = firstItemDeadlineMs - Date.now();
175
- if (activeTimeoutMs <= 0) {
176
- options.onFirstItemTimeout?.();
177
- closeIterator();
178
- throw new Error(options.firstItemErrorMessage ?? options.errorMessage);
179
- }
180
- }
181
- } else if (options.idleTimeoutMs !== undefined && options.idleTimeoutMs > 0) {
182
- activeTimeoutMs = options.idleTimeoutMs - (Date.now() - lastProgressAt);
183
- if (activeTimeoutMs <= 0) {
184
- options.onIdle?.();
185
- closeIterator();
186
- throw new Error(options.errorMessage);
187
- }
188
- }
189
-
190
- const nextResultPromise = withRacy(iterator.next());
175
+ // Persistent racers, hoisted out of the per-item loop. The abort promise can
176
+ // only ever resolve once (abort latches), and a timeout resolution always
177
+ // precedes a throw — so neither needs per-item re-creation. This keeps the
178
+ // token hot path free of timer create/destroy and listener churn.
179
+ //
180
+ // Each Promise.race() call still attaches a reaction record to every pending
181
+ // racer, and those records live until the racer settles — so a never-firing
182
+ // abort/timeout promise would accumulate one record per streamed item for
183
+ // the stream's whole life. The loop re-mints both promises every
184
+ // RACER_REMINT_INTERVAL iterations to keep that retention bounded; the
185
+ // listener and timer callbacks resolve through late-bound variables so a
186
+ // re-mint never strands them.
187
+ let abortPromise: Promise<{ kind: "abort" }> | undefined;
188
+ let abortListener: (() => void) | undefined;
189
+ let resolveAbort: ((value: { kind: "abort" }) => void) | undefined;
190
+ if (abortSignal) {
191
+ const { promise, resolve } = Promise.withResolvers<{ kind: "abort" }>();
192
+ resolveAbort = resolve;
193
+ abortListener = () => resolveAbort?.({ kind: "abort" });
194
+ abortSignal.addEventListener("abort", abortListener, { once: true });
195
+ abortPromise = promise;
196
+ }
191
197
 
192
- const racers: Array<
193
- Promise<
194
- | { kind: "next"; result: IteratorResult<T> }
195
- | { kind: "error"; error: unknown }
196
- | { kind: "timeout" }
197
- | { kind: "abort" }
198
- >
199
- > = [nextResultPromise];
198
+ let timeoutPromise: Promise<{ kind: "timeout" }> | undefined;
199
+ let resolveTimeout: ((value: { kind: "timeout" }) => void) | undefined;
200
+ let timeoutFired = false;
201
+ let timer: NodeJS.Timeout | undefined;
202
+ let timerFireAtMs = Infinity;
200
203
 
201
- let timer: NodeJS.Timeout | undefined;
202
- let resolveTimeout: ((value: { kind: "timeout" }) => void) | undefined;
203
- const enforceTimeout = !noTimeoutEnforced && activeTimeoutMs !== undefined && activeTimeoutMs > 0;
204
- if (enforceTimeout) {
204
+ const currentDeadlineMs = (): number | undefined => {
205
+ if (awaitingFirstItem) return firstItemDeadlineMs;
206
+ if (options.idleTimeoutMs !== undefined && options.idleTimeoutMs > 0) {
207
+ return lastProgressAt + options.idleTimeoutMs;
208
+ }
209
+ return undefined;
210
+ };
211
+ const onTimerFire = (): void => {
212
+ timer = undefined;
213
+ timerFireAtMs = Infinity;
214
+ const deadlineMs = currentDeadlineMs();
215
+ if (deadlineMs === undefined) return;
216
+ const remainingMs = deadlineMs - Date.now();
217
+ if (remainingMs > 0) {
218
+ // Progress moved the deadline since this timer was armed — re-arm for
219
+ // the remainder. One stale wake per idle period, not one per item.
220
+ timerFireAtMs = deadlineMs;
221
+ timer = setTimeout(onTimerFire, remainingMs);
222
+ return;
223
+ }
224
+ timeoutFired = true;
225
+ resolveTimeout?.({ kind: "timeout" });
226
+ };
227
+ const armTimer = (deadlineMs: number): void => {
228
+ if (timeoutPromise === undefined || timeoutFired) {
229
+ // A fired-but-unconsumed resolution (the item won the same race) is
230
+ // stale — racing it again would fake a timeout, so mint a fresh one.
205
231
  const { promise, resolve } = Promise.withResolvers<{ kind: "timeout" }>();
232
+ timeoutPromise = promise;
206
233
  resolveTimeout = resolve;
207
- timer = setTimeout(() => resolve({ kind: "timeout" }), activeTimeoutMs);
208
- racers.push(promise);
234
+ timeoutFired = false;
209
235
  }
210
-
211
- let abortListener: (() => void) | undefined;
212
- let resolveAbort: ((value: { kind: "abort" }) => void) | undefined;
213
- if (abortSignal) {
214
- const { promise, resolve } = Promise.withResolvers<{ kind: "abort" }>();
215
- resolveAbort = resolve;
216
- abortListener = () => resolve({ kind: "abort" });
217
- abortSignal.addEventListener("abort", abortListener, { once: true });
218
- racers.push(promise);
236
+ if (timer !== undefined) {
237
+ // An armed timer firing at or before the new deadline re-arms itself.
238
+ if (timerFireAtMs <= deadlineMs) return;
239
+ clearTimeout(timer);
219
240
  }
241
+ timerFireAtMs = deadlineMs;
242
+ timer = setTimeout(onTimerFire, Math.max(0, deadlineMs - Date.now()));
243
+ };
220
244
 
221
- try {
222
- const outcome = await Promise.race(racers);
223
- if (outcome.kind === "abort") {
224
- closeIterator();
225
- throw abortReason(abortSignal!);
245
+ try {
246
+ let raceCount = 0;
247
+ while (true) {
248
+ if (++raceCount % RACER_REMINT_INTERVAL === 0) {
249
+ if (abortPromise !== undefined && !abortSignal!.aborted) {
250
+ const { promise, resolve } = Promise.withResolvers<{ kind: "abort" }>();
251
+ resolveAbort = resolve;
252
+ abortPromise = promise;
253
+ }
254
+ if (timeoutPromise !== undefined && !timeoutFired) {
255
+ const { promise, resolve } = Promise.withResolvers<{ kind: "timeout" }>();
256
+ resolveTimeout = resolve;
257
+ timeoutPromise = promise;
258
+ }
226
259
  }
227
- if (outcome.kind === "timeout") {
228
- if (!awaitingFirstItem) {
260
+ let activeTimeoutMs: number | undefined;
261
+ if (awaitingFirstItem) {
262
+ if (firstItemDeadlineMs !== undefined) {
263
+ activeTimeoutMs = firstItemDeadlineMs - Date.now();
264
+ if (activeTimeoutMs <= 0) {
265
+ options.onFirstItemTimeout?.();
266
+ closeIterator();
267
+ throw new Error(options.firstItemErrorMessage ?? options.errorMessage);
268
+ }
269
+ }
270
+ } else if (options.idleTimeoutMs !== undefined && options.idleTimeoutMs > 0) {
271
+ activeTimeoutMs = options.idleTimeoutMs - (Date.now() - lastProgressAt);
272
+ if (activeTimeoutMs <= 0) {
229
273
  options.onIdle?.();
230
- } else {
231
- options.onFirstItemTimeout?.();
274
+ closeIterator();
275
+ throw new Error(options.errorMessage);
232
276
  }
233
- closeIterator();
234
- throw new Error(
235
- !awaitingFirstItem ? options.errorMessage : (options.firstItemErrorMessage ?? options.errorMessage),
236
- );
237
277
  }
238
- if (outcome.kind === "error") {
239
- throw outcome.error;
278
+
279
+ const nextResultPromise = withRacy(iterator.next());
280
+
281
+ const racers: Array<
282
+ Promise<
283
+ | { kind: "next"; result: IteratorResult<T> }
284
+ | { kind: "error"; error: unknown }
285
+ | { kind: "timeout" }
286
+ | { kind: "abort" }
287
+ >
288
+ > = [nextResultPromise];
289
+
290
+ const enforceTimeout = !noTimeoutEnforced && activeTimeoutMs !== undefined && activeTimeoutMs > 0;
291
+ if (enforceTimeout) {
292
+ armTimer(Date.now() + activeTimeoutMs!);
293
+ racers.push(timeoutPromise!);
294
+ }
295
+ if (abortPromise) {
296
+ racers.push(abortPromise);
240
297
  }
241
- if (outcome.result.done) {
242
- markFirstItemReceived();
243
- return;
298
+
299
+ // Tracks whether this iteration handed an item to the consumer and resumed
300
+ // normally. Any other exit — internal throw, `done` return, or the consumer
301
+ // abandoning us via `.return()`/`.throw()` at the `yield` below — must close
302
+ // the upstream iterator so the underlying SSE body / SDK stream (and its
303
+ // socket) is released instead of being left suspended.
304
+ let continuing = false;
305
+ try {
306
+ const outcome = await Promise.race(racers);
307
+ if (outcome.kind === "abort") {
308
+ closeIterator();
309
+ throw abortReason(abortSignal!);
310
+ }
311
+ if (outcome.kind === "timeout") {
312
+ if (!awaitingFirstItem) {
313
+ options.onIdle?.();
314
+ } else {
315
+ options.onFirstItemTimeout?.();
316
+ }
317
+ closeIterator();
318
+ throw new Error(
319
+ !awaitingFirstItem ? options.errorMessage : (options.firstItemErrorMessage ?? options.errorMessage),
320
+ );
321
+ }
322
+ if (outcome.kind === "error") {
323
+ throw outcome.error;
324
+ }
325
+ if (outcome.result.done) {
326
+ markFirstItemReceived();
327
+ return;
328
+ }
329
+ const item = outcome.result.value;
330
+ // Non-progress items (e.g. provider keepalives, synthetic `start` events that
331
+ // arrive before the model has produced any tokens) MUST NOT flip us out of
332
+ // `awaitingFirstItem`. Otherwise the next iteration switches from the (longer)
333
+ // first-item watchdog to the (shorter) idle watchdog while we're still waiting
334
+ // on the model's first real output.
335
+ if (isProgressItem(item)) {
336
+ markFirstItemReceived();
337
+ lastProgressAt = Date.now();
338
+ }
339
+ yield item;
340
+ continuing = true;
341
+ } finally {
342
+ if (!continuing) closeIterator();
244
343
  }
245
- const item = outcome.result.value;
246
- // Non-progress items (e.g. provider keepalives, synthetic `start` events that
247
- // arrive before the model has produced any tokens) MUST NOT flip us out of
248
- // `awaitingFirstItem`. Otherwise the next iteration switches from the (longer)
249
- // first-item watchdog to the (shorter) idle watchdog while we're still waiting
250
- // on the model's first real output.
251
- if (isProgressItem(item)) {
252
- markFirstItemReceived();
253
- lastProgressAt = Date.now();
344
+ }
345
+ } finally {
346
+ if (timer !== undefined) clearTimeout(timer);
347
+ // Settle the persistent racers so the final Promise.race releases them.
348
+ resolveTimeout?.({ kind: "timeout" });
349
+ if (abortListener && abortSignal) {
350
+ abortSignal.removeEventListener("abort", abortListener);
351
+ }
352
+ resolveAbort?.({ kind: "abort" });
353
+ }
354
+ }
355
+
356
+ export interface TerminalGraceIteratorOptions {
357
+ /**
358
+ * Epoch-ms timestamp at which the consumer observed a logically terminal
359
+ * item (e.g. a chat-completions chunk carrying `finish_reason`), or
360
+ * `undefined` while the stream is still mid-response. Read before every
361
+ * pull, so the consumer can flip it between yields.
362
+ */
363
+ finishedAtMs: () => number | undefined;
364
+ /**
365
+ * Post-terminal budget: how long after `finishedAtMs()` to keep draining
366
+ * trailing items (e.g. a usage-only chunk or the `[DONE]` sentinel) before
367
+ * ending the iteration cleanly. The deadline is fixed at
368
+ * `finishedAtMs() + graceMs`; trailing items do not extend it, so
369
+ * keepalive-only servers cannot hold the stream open.
370
+ */
371
+ graceMs: number;
372
+ /**
373
+ * Invoked when the grace window closes with the source still open. Use it
374
+ * to abort the underlying request: the source generator is typically parked
375
+ * mid-`next()` (not at a yield), so a queued `.return()` alone cannot reach
376
+ * the transport until that pending read settles.
377
+ */
378
+ onGraceEnd?: () => void;
379
+ }
380
+
381
+ /**
382
+ * Yields items from an async iterable until the consumer marks the stream
383
+ * logically finished AND the source stays silent past a short grace window.
384
+ *
385
+ * Misbehaving OpenAI-compatible servers deliver the terminal chunk but never
386
+ * send `[DONE]` nor close the connection; without this guard the consumer
387
+ * hangs on `iterator.next()` until the idle watchdog converts an
388
+ * already-successful turn into a timeout error. Grace expiry is a clean end
389
+ * of iteration, never an error.
390
+ */
391
+ export async function* iterateWithTerminalGrace<T>(
392
+ iterable: AsyncIterable<T>,
393
+ options: TerminalGraceIteratorOptions,
394
+ ): AsyncGenerator<T> {
395
+ const iterator = iterable[Symbol.asyncIterator]();
396
+ try {
397
+ while (true) {
398
+ const finishedAtMs = options.finishedAtMs();
399
+ if (finishedAtMs === undefined) {
400
+ const result = await iterator.next();
401
+ if (result.done) return;
402
+ yield result.value;
403
+ continue;
404
+ }
405
+ const remainingMs = finishedAtMs + options.graceMs - Date.now();
406
+ if (remainingMs <= 0) {
407
+ options.onGraceEnd?.();
408
+ return;
254
409
  }
255
- yield item;
256
- } finally {
257
- if (timer !== undefined) clearTimeout(timer);
258
- // Resolve dangling promises so the racers don't leak (Promise.race is one-shot).
259
- resolveTimeout?.({ kind: "timeout" });
260
- if (abortListener && abortSignal) {
261
- abortSignal.removeEventListener("abort", abortListener);
410
+ const nextPromise = iterator.next();
411
+ let timer: NodeJS.Timeout | undefined;
412
+ const timeoutPromise = new Promise<"timeout">(resolve => {
413
+ timer = setTimeout(() => resolve("timeout"), remainingMs);
414
+ });
415
+ try {
416
+ const outcome = await Promise.race([nextPromise, timeoutPromise]);
417
+ if (outcome === "timeout") {
418
+ // The abandoned read settles (likely rejects) once onGraceEnd
419
+ // aborts the transport — mark it handled so it cannot surface
420
+ // as an unhandled rejection.
421
+ nextPromise.catch(() => {});
422
+ options.onGraceEnd?.();
423
+ return;
424
+ }
425
+ if (outcome.done) return;
426
+ yield outcome.value;
427
+ } finally {
428
+ if (timer !== undefined) clearTimeout(timer);
262
429
  }
263
- resolveAbort?.({ kind: "abort" });
430
+ }
431
+ } finally {
432
+ const returnPromise = iterator.return?.();
433
+ if (returnPromise) {
434
+ void Promise.resolve(returnPromise).catch(() => {});
264
435
  }
265
436
  }
266
437
  }